From 056a15e0f76a4fab350602ea60806026c7a10dc9 Mon Sep 17 00:00:00 2001 From: Dmitriy Kozenko <8863314@gmail.com> Date: Sun, 22 Sep 2024 15:32:59 +0300 Subject: [PATCH] Changes after review --- src/core/algorithms/fd/eulerfd/cluster.cpp | 23 +++-- src/core/algorithms/fd/eulerfd/cluster.h | 6 +- src/core/algorithms/fd/eulerfd/eulerfd.cpp | 96 +++++++++---------- src/core/algorithms/fd/eulerfd/eulerfd.h | 28 +++--- src/core/algorithms/fd/eulerfd/mlfq.cpp | 21 ++-- src/core/algorithms/fd/eulerfd/mlfq.h | 15 +-- .../algorithms/fd/eulerfd/search_tree.cpp | 10 +- src/core/algorithms/fd/eulerfd/search_tree.h | 29 +++--- src/core/config/custom_random/option.cpp | 4 +- src/core/config/descriptions.h | 2 +- src/python_bindings/py_util/py_to_any.cpp | 3 +- src/tests/test_fd_util.h | 2 +- 12 files changed, 121 insertions(+), 118 deletions(-) diff --git a/src/core/algorithms/fd/eulerfd/cluster.cpp b/src/core/algorithms/fd/eulerfd/cluster.cpp index 5f3ec751d0..9416c42af4 100644 --- a/src/core/algorithms/fd/eulerfd/cluster.cpp +++ b/src/core/algorithms/fd/eulerfd/cluster.cpp @@ -1,19 +1,24 @@ #include "cluster.h" +#include + namespace algos { -void Cluster::ShuffleData(RandomStrategy rand) { - if (cluster_data_.size() == 0) { +// We want have same permutation of array on any platforms and compilers +// (it is necessary for consistent hash values in test). +// But implementation of std::shuffle depends on STL, so we can't use it +void Cluster::ShuffleData(RandomStrategy const& custom_rand) { + if (cluster_data_.empty()) { return; } for (size_t i = cluster_data_.size() - 1; i > 0; i--) { - std::swap(cluster_data_[i], cluster_data_[rand() % (i + 1)]); + std::swap(cluster_data_[i], cluster_data_[custom_rand() % (i + 1)]); } } -Cluster::Cluster(std::vector &&cluster_data, RandomStrategy rand) +Cluster::Cluster(std::vector&& cluster_data, RandomStrategy const& custom_rand) : cluster_data_(std::move(cluster_data)) { - ShuffleData(rand); + ShuffleData(custom_rand); hist_effects_.fill(1); } @@ -22,16 +27,18 @@ double Cluster::GetAverage() const { return sum / kInitialWindow; } -double Cluster::Sample(RegisterTuplesFunction handle_tuples) { +double Cluster::Sample(RegisterTuplesFunction const& handle_tuples) { new_tuples_pairs_ = new_non_fds_ = 0; window_++; - for (int i = 0; i < (int)cluster_data_.size() - (int)window_; i++) { + int64_t barrier = static_cast(cluster_data_.size()) - static_cast(window_); + for (int64_t i = 0; i < barrier; i++) { new_non_fds_ += handle_tuples(cluster_data_[i], cluster_data_[i + window_]); } new_tuples_pairs_ = cluster_data_.size() - window_; - double cur_eff = new_tuples_pairs_ == 0 ? 0 : (double)new_non_fds_ / new_tuples_pairs_; + double cur_eff = + new_tuples_pairs_ == 0 ? 0 : (static_cast(new_non_fds_) / new_tuples_pairs_); hist_effects_[sample_number_ % kInitialWindow] = cur_eff; sample_number_++; diff --git a/src/core/algorithms/fd/eulerfd/cluster.h b/src/core/algorithms/fd/eulerfd/cluster.h index 2515512f37..3703d20285 100644 --- a/src/core/algorithms/fd/eulerfd/cluster.h +++ b/src/core/algorithms/fd/eulerfd/cluster.h @@ -22,10 +22,10 @@ class Cluster { size_t new_tuples_pairs_ = 0; size_t new_non_fds_ = 0; - void ShuffleData(RandomStrategy rand); + void ShuffleData(RandomStrategy const &custom_rand); public: - Cluster(std::vector &&cluster_data, RandomStrategy rand); + Cluster(std::vector &&cluster_data, RandomStrategy const &custom_rand); Cluster(Cluster &other) = delete; Cluster &operator=(Cluster &other) = delete; @@ -33,7 +33,7 @@ class Cluster { Cluster(Cluster &&other) = default; Cluster &operator=(Cluster &&other) = default; - double Sample(RegisterTuplesFunction handle_tuples); + double Sample(RegisterTuplesFunction const &handle_tuples); double GetAverage() const; }; } // namespace algos diff --git a/src/core/algorithms/fd/eulerfd/eulerfd.cpp b/src/core/algorithms/fd/eulerfd/eulerfd.cpp index 19434b85bd..b94b114919 100644 --- a/src/core/algorithms/fd/eulerfd/eulerfd.cpp +++ b/src/core/algorithms/fd/eulerfd/eulerfd.cpp @@ -7,7 +7,7 @@ EulerFD::EulerFD() : FDAlgorithm({kDefaultPhaseName}), mlfq_(kQueuesNumber) { last_pcover_ratios_.fill(1); RegisterOption(config::kCustomRandomFlagOpt(&custom_random_opt_)); - // set configuration options + // Set configuration options RegisterOption(config::kTableOpt(&input_table_)); RegisterOption(config::kEqualNullsOpt(&is_null_equal_null_)); MakeOptionsAvailable({config::kTableOpt.GetName(), config::kEqualNullsOpt.GetName()}); @@ -31,8 +31,8 @@ void EulerFD::LoadDataInternal() { schema_->AppendColumn(column_name); } - // in each column mapping string values into integer values. - // using only hash isnt good idea because colisions dont processing + // In each column mapping string values into integer values. + // Using only hash isn't good idea because collisions don't processing. std::vector> columns(number_of_attributes_); std::vector current_id(number_of_attributes_, 0); @@ -43,15 +43,16 @@ void EulerFD::LoadDataInternal() { } tuples_.emplace_back(std::vector(number_of_attributes_)); + auto ¤t_tuple = tuples_.back(); for (size_t i = 0; i < number_of_attributes_; i++) { auto &values = columns[i]; auto it = values.find(line[i]); if (it != values.end()) { - tuples_.back()[i] = it->second; + current_tuple[i] = it->second; } else { size_t id = current_id[i]++; values[std::move(line[i])] = id; - tuples_.back()[i] = id; + current_tuple[i] = id; } } } @@ -59,21 +60,21 @@ void EulerFD::LoadDataInternal() { } void EulerFD::ResetStateFd() { - // data from sampling module + // Data from sampling module clusters_.clear(); constant_columns_.clear(); is_first_sample_ = true; mlfq_.Clear(); - effective_treshold_ = kInitialEffectiveTreshold; + effective_treshold_ = kInitialEffectiveThreshold; invalids_.clear(); new_invalids_.clear(); fd_num_ = old_invalid_size_ = 0; - // data from p/ncover consturction modules + // Data from p/ncover construction modules attribute_indexes_.clear(); - attribute_frequences_.clear(); + attribute_frequencies_.clear(); positive_cover_.clear(); negative_cover_.clear(); @@ -85,7 +86,7 @@ void EulerFD::ResetStateFd() { } void EulerFD::SaveAnswer() { - if (attribute_indexes_.size() == 0) { + if (attribute_indexes_.empty()) { std::cout << "attribute_indexes_ size is 0\n"; return; } @@ -96,7 +97,7 @@ void EulerFD::SaveAnswer() { } for (size_t rhs_attr = 0; rhs_attr < number_of_attributes_; rhs_attr++) { - // then tree filling we use inverse indexes, so to get correct tree we inverse again + // Then tree filling we use inverse indexes, so to get correct tree we inverse again size_t inv_rhs_attr = inv_indexes[rhs_attr]; auto &tree = positive_cover_[inv_rhs_attr]; auto rhs = *schema_->GetColumn(rhs_attr); @@ -111,18 +112,18 @@ void EulerFD::InitCovers() { positive_cover_.clear(); positive_cover_.reserve(number_of_attributes_); for (size_t i = 0; i < number_of_attributes_; i++) { - positive_cover_.push_back(SearchTreeEulerFD(number_of_attributes_)); + positive_cover_.emplace_back(SearchTreeEulerFD(number_of_attributes_)); positive_cover_.back().Add(Bitset(number_of_attributes_)); } negative_cover_.clear(); negative_cover_.reserve(number_of_attributes_); for (size_t i = 0; i < number_of_attributes_; i++) { - negative_cover_.push_back(SearchTreeEulerFD(number_of_attributes_)); + negative_cover_.emplace_back(SearchTreeEulerFD(number_of_attributes_)); } - attribute_frequences_.resize(number_of_attributes_); - std::fill(attribute_frequences_.begin(), attribute_frequences_.end(), 0); + attribute_frequencies_.resize(number_of_attributes_); + std::fill(attribute_frequencies_.begin(), attribute_frequencies_.end(), 0); attribute_indexes_.clear(); } @@ -133,8 +134,8 @@ void EulerFD::BuildPartition() { } constant_columns_ = Bitset(number_of_attributes_); - // build partition: same values in each column unite into clusters, - // then stripping it: clusters with 1 element cant get us new invalid fd + // Build partition: same values in each column unite into clusters, + // then stripping it: clusters with 1 element can`t get us new invalid fd for (size_t attr_num = 0; attr_num < number_of_attributes_; attr_num++) { std::unordered_map> values; for (size_t tuple_num = 0; tuple_num < number_of_tuples_; tuple_num++) { @@ -143,11 +144,11 @@ void EulerFD::BuildPartition() { similar_values.push_back(tuple_num); } if (values.size() == 1) { - // its mean in this column only one value + // This mean in this column only one value constant_columns_[attr_num] = true; continue; } else if (values.size() == number_of_tuples_) { - // its mean all clusters' sizes are 1, so we can pass cycle below + // This means that the sizes of all clusters are 1, so we can pass the cycle below continue; } else { for (auto &&[_, cluster] : values) { @@ -169,12 +170,12 @@ EulerFD::Bitset EulerFD::BuildAgreeSet(size_t t1, size_t t2) const { return equal_attr; } -double EulerFD::SamlingInCluster(Cluster *cluster) { +double EulerFD::SamplingInCluster(Cluster *cluster) { return cluster->Sample([this](size_t t1, size_t t2) -> size_t { Bitset agree_set = BuildAgreeSet(t1, t2); auto &&[_, result] = invalids_.insert(agree_set); - // check is discovered fd new + // Check that this is a new FD if (result) { new_invalids_.insert(agree_set); return agree_set.size() - agree_set.count(); @@ -186,13 +187,13 @@ double EulerFD::SamlingInCluster(Cluster *cluster) { void EulerFD::Sampling() { if (is_first_sample_) { - // in first sampling mlfq is empty, we fiil it. - // ww put all clusters in mlfq, even if effective was 0 + // In first sampling mlfq is empty, we fill it. + // We put all clusters in mlfq, even if effective coefficient was 0 is_first_sample_ = false; - for (size_t i = 0; i < clusters_.size(); i++) { // TODO: maybe ranged for - double eff = SamlingInCluster(&clusters_[i]); - mlfq_.Add(&clusters_[i], eff, true); + for (auto &cluster : clusters_) { + double eff = SamplingInCluster(&cluster); + mlfq_.Add(&cluster, eff, true); } if (mlfq_.GetLastQueueSize() > 0) { @@ -202,11 +203,11 @@ void EulerFD::Sampling() { return; } - // sampling in first queues of mlfq; + // Sampling in first queues of mlfq new_invalids_.clear(); while (mlfq_.GetEffectiveSize() > 0) { Cluster *cluster = mlfq_.Get(); - SamlingInCluster(cluster); + SamplingInCluster(cluster); mlfq_.Add(cluster, cluster->GetAverage()); } @@ -214,10 +215,10 @@ void EulerFD::Sampling() { effective_treshold_ = std::min(effective_treshold_ / 4, mlfq_.MaxEffectInLastQueue() / 2); } - // sampling in last queues (it is priority queue) of mlfq; + // Sampling in last queues (it is priority queues) of mlfq while (mlfq_.GetLastQueueSize() > 0 && mlfq_.MaxEffectInLastQueue() >= effective_treshold_) { Cluster *cluster = mlfq_.Get(); - SamlingInCluster(cluster); + SamplingInCluster(cluster); mlfq_.AddAtLast(cluster); } } @@ -225,29 +226,29 @@ void EulerFD::Sampling() { bool EulerFD::IsNCoverGrowthSmall() const { double sum = std::accumulate(last_ncover_ratios_.begin(), last_ncover_ratios_.end(), 0.0); double grow_rate = sum / kWindow; - return grow_rate < kNegCoverGrowthTreshold; + return grow_rate < kNegCoverGrowthThreshold; } bool EulerFD::IsPCoverGrowthSmall() const { double sum = std::accumulate(last_pcover_ratios_.begin(), last_pcover_ratios_.end(), 0.0); double grow_rate = sum / kWindow; - return grow_rate < kPosCoverGrowthTreshold; + return grow_rate < kPosCoverGrowthThreshold; } std::vector EulerFD::GetAttributesSortedByFrequency( std::vector const &neg_cover_vector) { for (auto const &bitset : neg_cover_vector) { for (size_t attr_num = 0; attr_num < number_of_attributes_; ++attr_num) { - attribute_frequences_[attr_num] += bitset[attr_num]; + attribute_frequencies_[attr_num] += bitset[attr_num]; } } std::vector attr_indices(number_of_attributes_); std::iota(attr_indices.begin(), attr_indices.end(), 0); - // after sorting in indexes[0] is number of column with the largest number of 1 + // After sorting in indexes[0] is number of column with the largest number of 1 std::sort(attr_indices.begin(), attr_indices.end(), [&](size_t lhs, size_t rhs) { - return attribute_frequences_[lhs] < attribute_frequences_[rhs]; + return attribute_frequencies_[lhs] < attribute_frequencies_[rhs]; }); return attr_indices; @@ -310,7 +311,7 @@ size_t EulerFD::Invert(size_t rhs, std::vector const &neg) { Bitset copy = removed; copy.set(i); if (!tree.ContainsAnySubsetOf(copy)) { - tree.Add(copy); + tree.Add(std::move(copy)); } } } @@ -319,7 +320,7 @@ size_t EulerFD::Invert(size_t rhs, std::vector const &neg) { } size_t EulerFD::GenerateResults() { - // check is new non fd discovered + // Check is new non fd discovered if (old_invalid_size_ == invalids_.size()) { return fd_num_; } @@ -328,7 +329,7 @@ size_t EulerFD::GenerateResults() { std::vector neg_cover_vector(new_invalids_.begin(), new_invalids_.end()); auto new_indexes = GetAttributesSortedByFrequency(neg_cover_vector); - // check is rebuild of covers neccesary + // Check is rebuild of covers neccesary if (attribute_indexes_ != new_indexes) { if (old_invalid_size_ != 0) { InitCovers(); @@ -343,16 +344,16 @@ size_t EulerFD::GenerateResults() { inv_indexes[attribute_indexes_[i]] = i; } - // change attribute order, in first index the largset number of 1 + // Change attribute order, in first index set with the largest number of 1 for (auto &neg_cover_el : neg_cover_vector) { neg_cover_el = ChangeAttributesOrder(neg_cover_el, inv_indexes); } - // sorting all non fd by number of 1 + // Sorting all non fd by number of 1 std::sort(neg_cover_vector.begin(), neg_cover_vector.end(), [](Bitset const &left, Bitset const &right) { return left.count() > right.count(); }); - // creating ncover and pcover trees for each rhs + // Creating ncover and pcover trees for each rhs size_t fd_num = 0; for (size_t rhs = 0; rhs < number_of_attributes_; rhs++) { if (constant_columns_[rhs]) { @@ -361,7 +362,7 @@ size_t EulerFD::GenerateResults() { size_t real_rhs = inv_indexes[rhs]; auto neg = CreateNegativeCover(real_rhs, neg_cover_vector); - // sorting all non fd by number of 1 + // Sorting all non fd by number of 1 std::sort(neg.begin(), neg.end(), [](Bitset const &left, Bitset const &right) { return left.count() > right.count(); }); @@ -375,7 +376,7 @@ unsigned long long EulerFD::ExecuteInternal() { return 0; } - // choose random strategy (it is neccesary for stable unit tests) + // Choose random strategy (it is necessary for stable unit tests) if (custom_random_opt_.first) { random_ = std::make_unique(custom_random_opt_.second); rand_function_ = [&]() { return random_->NextInt(kRandomUpperBound); }; @@ -387,8 +388,8 @@ unsigned long long EulerFD::ExecuteInternal() { auto start_time = std::chrono::system_clock::now(); BuildPartition(); - if (clusters_.size() == 0) { - // in small datasets sometimes after clusters stripping there are no clusters for sampling + if (clusters_.empty()) { + // In small datasets sometimes after clusters stripping there are no clusters for sampling std::cout << "number of clusters is 0*\n"; return 0; } @@ -400,10 +401,9 @@ unsigned long long EulerFD::ExecuteInternal() { Sampling(); last_ncover_ratios_[iteration_number % kWindow] = - invalids_.size() == 0 ? 0 - : (double)(invalids_.size() - ncover_size) / invalids_.size(); + invalids_.empty() ? 0 : (double)(invalids_.size() - ncover_size) / invalids_.size(); - // check criterion for enter second EulerFD cycle + // Check criterion for enter second EulerFD cycle if (IsNCoverGrowthSmall()) { size_t pcover_size = fd_num_; fd_num_ = GenerateResults(); diff --git a/src/core/algorithms/fd/eulerfd/eulerfd.h b/src/core/algorithms/fd/eulerfd/eulerfd.h index ef7b283617..34605bc5be 100644 --- a/src/core/algorithms/fd/eulerfd/eulerfd.h +++ b/src/core/algorithms/fd/eulerfd/eulerfd.h @@ -32,13 +32,13 @@ class EulerFD : public FDAlgorithm { using Bitset = boost::dynamic_bitset<>; using RandomStrategy = Cluster::RandomStrategy; - // random strategy for unit tests + // Random strategy for unit tests config::CustomRandomFlagType custom_random_opt_; RandomStrategy rand_function_; std::unique_ptr random_{}; constexpr static std::size_t kRandomUpperBound = 3047102; - // data from load data + // Data from load data size_t number_of_attributes_{}; size_t number_of_tuples_{}; config::InputTable input_table_; @@ -47,14 +47,14 @@ class EulerFD : public FDAlgorithm { config::EqNullsType is_null_equal_null_{}; - // thresholds to checking criterion of euler fd cycles - constexpr static double kPosCoverGrowthTreshold = 0.01; - constexpr static double kNegCoverGrowthTreshold = 0.01; + // Thresholds to checking criterion of EulerFD cycles + constexpr static double kPosCoverGrowthThreshold = 0.01; + constexpr static double kNegCoverGrowthThreshold = 0.01; constexpr static size_t kWindow = 3; std::array last_ncover_ratios_; std::array last_pcover_ratios_; - // clusters from partition + // Clusters from partition std::vector clusters_; Bitset constant_columns_; @@ -62,22 +62,22 @@ class EulerFD : public FDAlgorithm { bool is_first_sample_ = true; constexpr static size_t const kQueuesNumber = 5; MLFQ mlfq_; - constexpr static double kInitialEffectiveTreshold = 0.01; - double effective_treshold_ = kInitialEffectiveTreshold; + constexpr static double kInitialEffectiveThreshold = 0.01; + double effective_treshold_ = kInitialEffectiveThreshold; - // invalid fds storages + // Invalid fds storages std::unordered_set invalids_; std::unordered_set new_invalids_; size_t fd_num_ = 0; size_t old_invalid_size_ = 0; - // covers of fds + // Covers of fds std::vector negative_cover_; std::vector positive_cover_; - // data for sorting columns by number of 1 + // Data for sorting columns by number of 1 std::vector attribute_indexes_; - std::vector attribute_frequences_; + std::vector attribute_frequencies_; void LoadDataInternal() final; unsigned long long ExecuteInternal() final; @@ -89,7 +89,7 @@ class EulerFD : public FDAlgorithm { void InitCovers(); void BuildPartition(); - double SamlingInCluster(Cluster *cluster); + double SamplingInCluster(Cluster *cluster); void Sampling(); size_t GenerateResults(); @@ -109,8 +109,6 @@ class EulerFD : public FDAlgorithm { bool IsNCoverGrowthSmall() const; bool IsPCoverGrowthSmall() const; - void HandleInvalid(); - void SaveAnswer(); public: diff --git a/src/core/algorithms/fd/eulerfd/mlfq.cpp b/src/core/algorithms/fd/eulerfd/mlfq.cpp index 733ad146d3..5c40795551 100644 --- a/src/core/algorithms/fd/eulerfd/mlfq.cpp +++ b/src/core/algorithms/fd/eulerfd/mlfq.cpp @@ -1,12 +1,6 @@ #include "mlfq.h" -#include #include -#include -#include -#include -#include -#include namespace algos { @@ -15,22 +9,23 @@ bool MLFQ::LastQueueElement::operator<(LastQueueElement const &other) const { } MLFQ::MLFQ(size_t queues_number) { - double range = 0.001; + double range = kLastQueueRangeBarrier; + queues_.reserve(queues_number); for (size_t i = 0; i < queues_number; i++) { queues_.push_back({{}, range}); range *= 10; } } -void MLFQ::Add(Cluster *cluster, double prioritet, bool add_if_zero) { - if (prioritet == 0 && !add_if_zero) { +void MLFQ::Add(Cluster *cluster, double priority, bool add_if_zero) { + if (priority == 0 && !add_if_zero) { return; } - if (prioritet < 0.001) { + if (priority < kLastQueueRangeBarrier) { AddAtLast(cluster); } else { - int queue = (int)std::floor(std::log10(prioritet)) + 3; - // evaluating index of queue + // Evaluating index of queue + int queue = static_cast(std::floor(std::log10(priority))) + 3; queue = queue > 4 ? 4 : queue; actual_queue_ = std::max(actual_queue_, queue); queues_[queue].first.push(cluster); @@ -49,7 +44,7 @@ Cluster *MLFQ::Get() { Cluster *save = queues_[actual_queue_].first.front(); queues_[actual_queue_].first.pop(); effective_size_--; - while (actual_queue_ >= 0 && queues_[actual_queue_].first.size() == 0) { + while (actual_queue_ >= 0 && queues_[actual_queue_].first.empty()) { actual_queue_--; } return save; diff --git a/src/core/algorithms/fd/eulerfd/mlfq.h b/src/core/algorithms/fd/eulerfd/mlfq.h index 18c1bf48a8..3b3e86f3fa 100644 --- a/src/core/algorithms/fd/eulerfd/mlfq.h +++ b/src/core/algorithms/fd/eulerfd/mlfq.h @@ -13,6 +13,8 @@ class MLFQ { private: using Queue = std::pair, double>; + constexpr static double kLastQueueRangeBarrier = 0.001; + struct LastQueueElement { Cluster *cluster{}; bool operator<(LastQueueElement const &other) const; @@ -22,20 +24,19 @@ class MLFQ { size_t effective_size_ = 0; std::priority_queue last_queue_{}; - // actual_queue is index of not empty queue with the highest prioritet + // Index of not empty queue with the highest priority int actual_queue_ = -1; public: - MLFQ(size_t queues_number); + explicit MLFQ(size_t queues_number); - void Add(Cluster *cluster, double prioritet, bool add_if_zero = false); + void Add(Cluster *cluster, double priority, bool add_if_zero = false); void AddAtLast(Cluster *cluster); [[nodiscard]] Cluster *Get(); - [[nodiscard]] Cluster *GetFromLast(); - size_t GetEffectiveSize() const; - size_t GetLastQueueSize() const; - double MaxEffectInLastQueue() const; + [[nodiscard]] size_t GetEffectiveSize() const; + [[nodiscard]] size_t GetLastQueueSize() const; + [[nodiscard]] double MaxEffectInLastQueue() const; void Clear(); }; } // namespace algos diff --git a/src/core/algorithms/fd/eulerfd/search_tree.cpp b/src/core/algorithms/fd/eulerfd/search_tree.cpp index 322e87f303..4482d1b978 100644 --- a/src/core/algorithms/fd/eulerfd/search_tree.cpp +++ b/src/core/algorithms/fd/eulerfd/search_tree.cpp @@ -252,8 +252,8 @@ void SearchTreeEulerFD::InsertLeafIntoEnd(std::shared_ptr const& current_n current_node->bit_ = set_bit; } - current_node->left_ = new_left; - current_node->right_ = new_right; + current_node->left_ = std::move(new_left); + current_node->right_ = std::move(new_right); UpdateInterAndUnion(current_node); } @@ -274,8 +274,8 @@ void SearchTreeEulerFD::InsertLeafIntoMiddle(std::shared_ptr const& curren current_node->right_->parent_ = replacing_node; current_node->bit_ = set_bit; - current_node->left_ = new_left; - current_node->right_ = new_right; + current_node->left_ = std::move(new_left); + current_node->right_ = std::move(new_right); UpdateInterAndUnion(current_node); } @@ -293,7 +293,7 @@ bool SearchTreeEulerFD::SupersetsTraverse(Bitset const& set, } bool SearchTreeEulerFD::ContainsAnySupersetOf(Bitset const& set) const { - return root_ ? SupersetsTraverse(set, root_) : false; + return root_ && SupersetsTraverse(set, root_); } } // namespace algos diff --git a/src/core/algorithms/fd/eulerfd/search_tree.h b/src/core/algorithms/fd/eulerfd/search_tree.h index c54305490f..c1727ebb52 100644 --- a/src/core/algorithms/fd/eulerfd/search_tree.h +++ b/src/core/algorithms/fd/eulerfd/search_tree.h @@ -16,9 +16,9 @@ class SearchTreeEulerFD { struct Node { size_t bit_{}; - Bitset set_; - Bitset union_; - Bitset inter_; + Bitset set_; // Set of attributes which corresponds this node + Bitset union_; // Union of children of this node + Bitset inter_; // Intersection of children of this node std::shared_ptr left_{}; std::shared_ptr right_{}; @@ -29,14 +29,14 @@ class SearchTreeEulerFD { [[nodiscard]] Bitset const& GetUnion() const; [[nodiscard]] Bitset const& GetInter() const; - // for inner nodes + // For inner nodes Node(size_t bit, Bitset sets_union, Bitset sets_inter, std::shared_ptr const& parent, std::shared_ptr left = nullptr, std::shared_ptr right = nullptr); - // for leaves + // For leaves Node(size_t bit, Bitset set, std::shared_ptr const& parent); - // for both types of nodes + // For both types of nodes Node(size_t bit, Bitset set, Bitset sets_union, Bitset sets_inter, std::shared_ptr const& parent, std::shared_ptr left = nullptr, std::shared_ptr right = nullptr); @@ -50,20 +50,23 @@ class SearchTreeEulerFD { void CollectSubsets(Bitset const& set, std::shared_ptr const& current_node, BitsetConsumer const& collect, bool& go_further) const; - bool SupersetsTraverse(Bitset const& set, std::shared_ptr const& current_node) const; + [[nodiscard]] bool SupersetsTraverse(Bitset const& set, + std::shared_ptr const& current_node) const; void ForEach(std::shared_ptr const& current_node, BitsetConsumer const& collect) const; - std::shared_ptr FindNode(Bitset const& set); + [[nodiscard]] std::shared_ptr FindNode(Bitset const& set); void CutLeaf(std::shared_ptr const& node_to_remove); - void InsertLeafIntoEnd(std::shared_ptr const& current_node, Bitset const& set, - size_t node_bit, size_t set_bit); - void InsertLeafIntoMiddle(std::shared_ptr const& current_node, Bitset const& set, - size_t set_bit); + + static void InsertLeafIntoEnd(std::shared_ptr const& current_node, Bitset const& set, + size_t node_bit, size_t set_bit); + static void InsertLeafIntoMiddle(std::shared_ptr const& current_node, Bitset const& set, + size_t set_bit); static void UpdateInterAndUnion(std::shared_ptr const& node); - static std::pair FindNodeAndSetBits(Bitset const& node_set, Bitset const& set); + [[nodiscard]] static std::pair FindNodeAndSetBits(Bitset const& node_set, + Bitset const& set); public: explicit SearchTreeEulerFD(size_t number_of_attributes); diff --git a/src/core/config/custom_random/option.cpp b/src/core/config/custom_random/option.cpp index 317ad0ceb5..d6aeca7975 100644 --- a/src/core/config/custom_random/option.cpp +++ b/src/core/config/custom_random/option.cpp @@ -6,7 +6,5 @@ namespace config { extern CommonOption const kCustomRandomFlagOpt{ - names::kCustomRandom, descriptions::kDCustomRandom, std::make_pair(false, 47) - // names::kCustomRandom, descriptions::kDCustomRandom, {false, 47} -}; + names::kCustomRandom, descriptions::kDCustomRandom, std::make_pair(false, 47)}; } // namespace config diff --git a/src/core/config/descriptions.h b/src/core/config/descriptions.h index 11f244e6f9..a55548c37a 100644 --- a/src/core/config/descriptions.h +++ b/src/core/config/descriptions.h @@ -42,7 +42,7 @@ constexpr auto kDThreads = "number of threads to use. If 0, then as many threads are used as the " "hardware can handle concurrently."; constexpr auto kDCustomRandom = - "for same answers in tests of approximate algs in different platforms.\n" + "for the same answers in tests of approximate algorithms on different platforms:\n" "first field is flag is enable, second seed of custom random"; constexpr auto kDError = "error threshold value for Approximate FD algorithms"; auto const kDPfdErrorMeasure = details::kDPfdErrorMeasureString.c_str(); diff --git a/src/python_bindings/py_util/py_to_any.cpp b/src/python_bindings/py_util/py_to_any.cpp index 1c21cbd44b..c2ba881fc4 100644 --- a/src/python_bindings/py_util/py_to_any.cpp +++ b/src/python_bindings/py_util/py_to_any.cpp @@ -118,7 +118,8 @@ boost::any CustomRandomFlagToAny(std::string_view option_name, py::handle obj) { } auto tup = py::cast(obj); if (py::len(tup) != 2) { - throw config::ConfigurationError("Tuple for converting into std::pair must get 2 fields."); + throw config::ConfigurationError( + "Tuple for converting into std::pair must contain 2 fields."); } bool flag = CastAndReplaceCastError(option_name, tup[0]); int seed = CastAndReplaceCastError(option_name, tup[1]); diff --git a/src/tests/test_fd_util.h b/src/tests/test_fd_util.h index 37e6dd7ea3..db6ce74183 100644 --- a/src/tests/test_fd_util.h +++ b/src/tests/test_fd_util.h @@ -148,7 +148,7 @@ struct ApproximateDatasets { {tests::kCIPublicHighway, 13035}, {tests::kEpicMeds, 26201}, // answer is 15 / 16 {tests::kEpicVitals, 2083}, - {tests::kIowa1kk, 57837}, // answer is 2531 / 1584 (average 2k, it is bad seed :( + {tests::kIowa1kk, 57837}, // answer is 2531 / 1584 (average 2k, it is bad seed) :( {tests::kLegacyPayors, 43612}, }}; };