Skip to content

Commit

Permalink
Reduce distances_ size
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelS239 committed Dec 14, 2024
1 parent 38b7c52 commit cb0cbf9
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 7 deletions.
18 changes: 11 additions & 7 deletions src/core/algorithms/dd/split/split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,9 @@ inline bool Split::CheckDFConstraint(DFConstraint const& dif_constraint,
std::pair<std::size_t, std::size_t> tuple_pair) {
ClusterIndex const first_cluster = plis_[column_index].GetInvertedIndex()[tuple_pair.first];
ClusterIndex const second_cluster = plis_[column_index].GetInvertedIndex()[tuple_pair.second];
double const dif = distances_[column_index][first_cluster][second_cluster];
ClusterIndex const min_cluster = std::min(first_cluster, second_cluster);
ClusterIndex const max_cluster = std::max(first_cluster, second_cluster);
double const dif = distances_[column_index][min_cluster][max_cluster - min_cluster];

if (type_ids_[column_index] == +model::TypeId::kDouble) {
if (!dif_constraint.Contains(dif)) {
Expand Down Expand Up @@ -381,31 +383,33 @@ bool Split::VerifyDD(DF const& lhs, DF const& rhs) {

void Split::CalculateAllDistances() {
plis_ = std::vector<DistancePositionListIndex>(num_columns_);
distances_ = std::vector<std::vector<std::vector<double>>>(num_columns_);
distances_.reserve(num_columns_);
min_max_dif_ = std::vector<model::DFConstraint>(num_columns_, {0, 0});

for (model::ColumnIndex column_index = 0; column_index < num_columns_; column_index++) {
DistancePositionListIndex pli(typed_relation_->GetColumnData(column_index), num_rows_);
std::vector<ClusterInfo> const& clusters = pli.GetClusters();
std::size_t const num_clusters = clusters.size();
std::vector<std::vector<double>> cur_column_distances = std::vector<std::vector<double>>(
num_clusters, std::vector<double>(num_clusters, 0));
std::vector<std::vector<double>> cur_column_distances;
cur_column_distances.reserve(num_clusters);

double max_dif = 0, min_dif = std::numeric_limits<double>::max();
for (ClusterIndex i = 0; i < num_clusters; i++) {
cur_column_distances.emplace_back();
cur_column_distances[i].reserve(num_clusters - i);
cur_column_distances[i].push_back(0);
for (ClusterIndex j = i + 1; j < num_clusters; j++) {
std::size_t const first_index = clusters[i].first_tuple_index;
std::size_t const second_index = clusters[j].first_tuple_index;
double const dif = CalculateDistance(column_index, {first_index, second_index});
max_dif = std::max(max_dif, dif);
min_dif = std::min(min_dif, dif);
cur_column_distances[i][j] = dif;
cur_column_distances[j][i] = dif;
cur_column_distances[i].push_back(dif);
}
if (clusters[i].size > 1) min_dif = 0;
}
min_max_dif_[column_index] = {min_dif, max_dif};
distances_[column_index] = std::move(cur_column_distances);
distances_.emplace_back(std::move(cur_column_distances));
plis_[column_index] = std::move(pli);
}
}
Expand Down
1 change: 1 addition & 0 deletions src/core/algorithms/dd/split/split.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class Split : public Algorithm {
tuple_pairs_.clear();
non_empty_cols_.clear();
index_search_spaces_.clear();
distances_.clear();
}

double CalculateDistance(model::ColumnIndex column_index,
Expand Down

0 comments on commit cb0cbf9

Please sign in to comment.