From cb0cbf988436fcc1866a4df2e51847abc3f8171b Mon Sep 17 00:00:00 2001 From: Michael Sinelnikov Date: Mon, 11 Nov 2024 19:47:26 +0300 Subject: [PATCH] Reduce distances_ size --- src/core/algorithms/dd/split/split.cpp | 18 +++++++++++------- src/core/algorithms/dd/split/split.h | 1 + 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/core/algorithms/dd/split/split.cpp b/src/core/algorithms/dd/split/split.cpp index e44af3f8a..92d27dad9 100644 --- a/src/core/algorithms/dd/split/split.cpp +++ b/src/core/algorithms/dd/split/split.cpp @@ -348,7 +348,9 @@ inline bool Split::CheckDFConstraint(DFConstraint const& dif_constraint, std::pair tuple_pair) { ClusterIndex const first_cluster = plis_[column_index].GetInvertedIndex()[tuple_pair.first]; ClusterIndex const second_cluster = plis_[column_index].GetInvertedIndex()[tuple_pair.second]; - double const dif = distances_[column_index][first_cluster][second_cluster]; + ClusterIndex const min_cluster = std::min(first_cluster, second_cluster); + ClusterIndex const max_cluster = std::max(first_cluster, second_cluster); + double const dif = distances_[column_index][min_cluster][max_cluster - min_cluster]; if (type_ids_[column_index] == +model::TypeId::kDouble) { if (!dif_constraint.Contains(dif)) { @@ -381,31 +383,33 @@ bool Split::VerifyDD(DF const& lhs, DF const& rhs) { void Split::CalculateAllDistances() { plis_ = std::vector(num_columns_); - distances_ = std::vector>>(num_columns_); + distances_.reserve(num_columns_); min_max_dif_ = std::vector(num_columns_, {0, 0}); for (model::ColumnIndex column_index = 0; column_index < num_columns_; column_index++) { DistancePositionListIndex pli(typed_relation_->GetColumnData(column_index), num_rows_); std::vector const& clusters = pli.GetClusters(); std::size_t const num_clusters = clusters.size(); - std::vector> cur_column_distances = std::vector>( - num_clusters, std::vector(num_clusters, 0)); + std::vector> cur_column_distances; + cur_column_distances.reserve(num_clusters); double max_dif = 0, min_dif = std::numeric_limits::max(); for (ClusterIndex i = 0; i < num_clusters; i++) { + cur_column_distances.emplace_back(); + cur_column_distances[i].reserve(num_clusters - i); + cur_column_distances[i].push_back(0); for (ClusterIndex j = i + 1; j < num_clusters; j++) { std::size_t const first_index = clusters[i].first_tuple_index; std::size_t const second_index = clusters[j].first_tuple_index; double const dif = CalculateDistance(column_index, {first_index, second_index}); max_dif = std::max(max_dif, dif); min_dif = std::min(min_dif, dif); - cur_column_distances[i][j] = dif; - cur_column_distances[j][i] = dif; + cur_column_distances[i].push_back(dif); } if (clusters[i].size > 1) min_dif = 0; } min_max_dif_[column_index] = {min_dif, max_dif}; - distances_[column_index] = std::move(cur_column_distances); + distances_.emplace_back(std::move(cur_column_distances)); plis_[column_index] = std::move(pli); } } diff --git a/src/core/algorithms/dd/split/split.h b/src/core/algorithms/dd/split/split.h index fc5ae2b73..4d4933256 100644 --- a/src/core/algorithms/dd/split/split.h +++ b/src/core/algorithms/dd/split/split.h @@ -57,6 +57,7 @@ class Split : public Algorithm { tuple_pairs_.clear(); non_empty_cols_.clear(); index_search_spaces_.clear(); + distances_.clear(); } double CalculateDistance(model::ColumnIndex column_index,