From b598f0aaf5916e19bc3760bfcccf5a4957d8c9ac Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Mon, 25 Nov 2024 11:24:42 +0300 Subject: [PATCH 01/32] Minor changes in build.sh Add `set -e` in build.sh to stop build process on error --- build.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/build.sh b/build.sh index c89d854cab..c031e1ac3a 100755 --- a/build.sh +++ b/build.sh @@ -1,5 +1,8 @@ #!/bin/bash +# Stop on error: +set -e + function print_help() { cat << EOF Usage: ./build.sh [options] @@ -116,5 +119,5 @@ fi cd .. mkdir -p build cd build -rm CMakeCache.txt +rm -f CMakeCache.txt cmake $PREFIX .. && make $JOBS_OPTION From 3f00f3d8c9bddf28304dfada0f9408c0f75b4116 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Tue, 26 Nov 2024 21:01:43 +0300 Subject: [PATCH 02/32] Add goggletest as SYSTEM subdirectory Add SYSTEM flag to add_subdirectory(".../googletest") in CMakeLists.txt to supress warnings from the inside of googletest --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dd04905570..d70fbf615f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -122,7 +122,7 @@ include_directories(SYSTEM "lib/easyloggingpp/src" "lib/better-enums/" "lib/emha # adding submodules if (COMPILE_TESTS) - add_subdirectory("lib/googletest") + add_subdirectory("lib/googletest" SYSTEM) endif() set( CMAKE_BUILD_TYPE_COPY "${CMAKE_BUILD_TYPE}" ) From 24780b7e7b86abaf189b2bddc8f996169f9ce0da Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Mon, 25 Nov 2024 11:25:20 +0300 Subject: [PATCH 03/32] Replace INSTANTIATE_TEST_CASE_P with INSTANTIATE_TEST_SUITE_P Replace INSTANTIATE_TEST_CASE_P, which is deprecated, with INSTANTIATE_TEST_SUITE_P in NDVerifier tests --- src/tests/test_nd_verifier.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/test_nd_verifier.cpp b/src/tests/test_nd_verifier.cpp index 9a7884d366..de9e2e3f87 100644 --- a/src/tests/test_nd_verifier.cpp +++ b/src/tests/test_nd_verifier.cpp @@ -51,14 +51,14 @@ INSTANTIATE_TEST_SUITE_P( NDVerifyingParams({1, 2, 3}, {6}, 2) )); -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( NDVerifierHeavyDatasets, TestNDVerifying, ::testing::Values( NDVerifyingParams({5}, {6}, 1000000, kIowa1kk), // I just want to see execution time. Real weight doesn't matter (but it shouldn't be very big) NDVerifyingParams({16, 17, 18}, {20, 23}, 1000000, kIowa1kk) // Also, I want to see how execution time depends on number of columns )); -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( NDVerifierTestNullEqualNull, TestNDVerifying, ::testing::Values( // 6-th column contains 2 values and 7 empty cells NDVerifyingParams({0}, {6}, 3, kTestND, true), From 096bfb182da317c3531498e4651022b5029a31e6 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Wed, 27 Nov 2024 10:33:28 +0300 Subject: [PATCH 04/32] Mark unused variables as [[maybe_unused]] Mark unused fields as [[maybe_unused]], because clang produces warning that becomes an error in Debug mode. Use MAYBE_UNUSED macro, because g++ produces warning when [[maybe_unused]] macro is present, and clang -- when it isn't. --- .../fd/pyrocommon/model/pli_cache.h | 20 +++++++++++-------- src/core/model/table/agree_set_factory.cpp | 4 ++-- src/core/model/table/vertical_map.cpp | 8 ++++---- src/core/util/maybe_unused.h | 12 +++++++++++ 4 files changed, 30 insertions(+), 14 deletions(-) create mode 100644 src/core/util/maybe_unused.h diff --git a/src/core/algorithms/fd/pyrocommon/model/pli_cache.h b/src/core/algorithms/fd/pyrocommon/model/pli_cache.h index d2933337e0..59f948bf9f 100644 --- a/src/core/algorithms/fd/pyrocommon/model/pli_cache.h +++ b/src/core/algorithms/fd/pyrocommon/model/pli_cache.h @@ -8,6 +8,7 @@ class ProfilingContext; #include "cache_eviction_method.h" #include "caching_method.h" #include "model/table/column_layout_relation_data.h" +#include "util/maybe_unused.h" namespace model { @@ -29,20 +30,23 @@ class PLICache { std::unique_ptr> index_; // usageCounter - for parallelism - int saved_intersections_ = 0; + // All these MAYBE_UNUSED variables are required to support Pyro's caching strategies from our + // ADBIS paper: https://link.springer.com/chapter/10.1007/978-3-030-30278-8_7 + + MAYBE_UNUSED int saved_intersections_ = 0; mutable std::mutex getting_pli_mutex_; CachingMethod caching_method_; - CacheEvictionMethod eviction_method_; - double caching_method_value_; + MAYBE_UNUSED CacheEvictionMethod eviction_method_; + MAYBE_UNUSED double caching_method_value_; // long long maximumAvailableMemory_ = 0; double maximum_entropy_; - double mean_entropy_; - double min_entropy_; - double median_entropy_; - double median_gini_; - double median_inverted_entropy_; + MAYBE_UNUSED double mean_entropy_; + MAYBE_UNUSED double min_entropy_; + MAYBE_UNUSED double median_entropy_; + MAYBE_UNUSED double median_gini_; + MAYBE_UNUSED double median_inverted_entropy_; std::variant> CachingProcess( Vertical const& vertical, std::unique_ptr pli, diff --git a/src/core/model/table/agree_set_factory.cpp b/src/core/model/table/agree_set_factory.cpp index f48cccdb78..73a27fc8e5 100644 --- a/src/core/model/table/agree_set_factory.cpp +++ b/src/core/model/table/agree_set_factory.cpp @@ -159,8 +159,8 @@ AgreeSetFactory::SetOfAgreeSets AgreeSetFactory::GenAsUsingMapOfIdSets() const { */ unsigned short const actual_threads_num = std::min(max_representation.size(), (size_t)config_.threads_num); - auto task = [&identifier_sets, &agree_sets, percent_per_cluster, actual_threads_num, - &map_init_mutex, this, &threads_agree_sets, &map_init_cv, + auto task = [&identifier_sets, percent_per_cluster, actual_threads_num, &map_init_mutex, + this, &threads_agree_sets, &map_init_cv, &map_initialized](SetOfVectors::value_type const& cluster) { std::thread::id const thread_id = std::this_thread::get_id(); diff --git a/src/core/model/table/vertical_map.cpp b/src/core/model/table/vertical_map.cpp index 1e57a1ac6a..6eb8a48a78 100644 --- a/src/core/model/table/vertical_map.cpp +++ b/src/core/model/table/vertical_map.cpp @@ -425,7 +425,7 @@ void VerticalMap::Shrink(double factor, std::function key_queue.push(entry); } }); - unsigned int num_of_removed = 0; + // unsigned int num_of_removed = 0; unsigned int target_size = size_ * factor; while (!key_queue.empty() && size_ > target_size) { auto key = key_queue.top().first; @@ -433,7 +433,7 @@ void VerticalMap::Shrink(double factor, std::function // insert additional logging - num_of_removed++; + // num_of_removed++; Remove(key); } shrink_invocations_++; @@ -467,14 +467,14 @@ void VerticalMap::Shrink(std::unordered_map& usag key_queue.push(entry); } }); - unsigned int num_of_removed = 0; + // unsigned int num_of_removed = 0; while (!key_queue.empty()) { auto key = key_queue.front().first; key_queue.pop(); // insert additional logging - num_of_removed++; + // num_of_removed++; Remove(key); RemoveFromUsageCounter(usage_counter, key); } diff --git a/src/core/util/maybe_unused.h b/src/core/util/maybe_unused.h new file mode 100644 index 0000000000..11c06d619d --- /dev/null +++ b/src/core/util/maybe_unused.h @@ -0,0 +1,12 @@ +#pragma once + +// clang produces warning on unused private fields, so they need to be marked as [[maybe_unused]], +// but g++ doesn't recognize [[maybe_unused]] on class fields and produces warning. +// This macro expands to [[maybe_unused]], when compiler is clang, nop otherwise +// (see https://stackoverflow.com/questions/50646334/maybe-unused-on-member-variable-gcc-warns- +// incorrectly-that-attribute-is and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72789) +#ifdef __clang__ +#define MAYBE_UNUSED [[maybe_unused]] +#else +#define MAYBE_UNUSED /* Ignore */ +#endif From cdd1263d1ca5bcd497d7d11eb6c9fb1b161523c3 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Wed, 27 Nov 2024 10:35:05 +0300 Subject: [PATCH 05/32] Convert values explicitly in tests/test_types.cpp Convert double literals to model::DoubleType and model::IntType explicitly, because otherwise clang produces a warning, that becomes an error in debug mode --- src/tests/test_types.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/tests/test_types.cpp b/src/tests/test_types.cpp index c0ccf2917f..7e11b6b55f 100644 --- a/src/tests/test_types.cpp +++ b/src/tests/test_types.cpp @@ -97,8 +97,8 @@ TYPED_TEST(TestNumeric, Negate) { }; test(0); - test(-123.5); - test(321.4); + test(typename TypeParam::UnderlyingType(-123.5)); + test(typename TypeParam::UnderlyingType(321.4)); } TYPED_TEST(TestNumeric, Abs) { @@ -108,8 +108,8 @@ TYPED_TEST(TestNumeric, Abs) { }; test(0); - test(-123.5); - test(321.4); + test(typename TypeParam::UnderlyingType(-123.5)); + test(typename TypeParam::UnderlyingType(321.4)); } TYPED_TEST(TestNumeric, Add) { @@ -135,7 +135,7 @@ TYPED_TEST(TestNumeric, Div) { test(0, 100); test(22, 1); test(123, 321); - test(11.4, 3.14); + test(Type(11.4), Type(3.14)); test(-102, 11); test(-123, 123); test(-21, -7); @@ -150,7 +150,7 @@ TYPED_TEST(TestNumeric, Sub) { test(0, 100); test(22, 12); test(123, 321); - test(2.72, 1.3123141); + test(Type(2.72), Type(1.3123141)); test(-102, 11); test(-123, 123); test(-21, -7); @@ -166,7 +166,7 @@ TYPED_TEST(TestNumeric, Mul) { test(100, 0); test(22, 12); test(123, 321); - test(2.72, 1.3123141); + test(Type(2.72), Type(1.3123141)); test(-102, 11); test(-123, 123); test(-21, -7); @@ -183,7 +183,7 @@ TYPED_TEST(TestNumeric, Pow) { test(0, 100); test(22, 12); test(123, 321); - test(2.72, 1.3123141); + test(Type(2.72), Type(1.3123141)); test(-102, 11); test(-123, 123); test(-21, -7); @@ -200,7 +200,7 @@ TYPED_TEST(TestNumeric, Dist) { test(0, 100); test(22, 12); test(123, 321); - test(2.72, 1.3123141); + test(Type(2.72), Type(1.3123141)); test(-102, 11); test(-123, 123); test(-21, -7); @@ -214,8 +214,8 @@ TYPED_TEST(TestNumeric, ValueToString) { test(0); test(123); - test(3.14123123182387); - test(-1231.123456678987654321); + test(typename TypeParam::UnderlyingType(3.14123123182387)); + test(typename TypeParam::UnderlyingType(-1231.123456678987654321)); } struct TestStringParam { From 7894ecf18a51ffedbe9d3660f0af0150f77f1204 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Wed, 27 Nov 2024 10:57:09 +0300 Subject: [PATCH 06/32] Biuld and test with Clang in CI Add run_tests_clang action in core_tests.yml Also, work around clang-18 bug --- .../download-libraries/action.yml | 19 ++++++++++- .github/workflows/core-tests.yml | 34 ++++++++++++++++++- CMakeLists.txt | 10 ++++++ 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/.github/composite-actions/download-libraries/action.yml b/.github/composite-actions/download-libraries/action.yml index 0cc80b53a1..c4bc62bd17 100644 --- a/.github/composite-actions/download-libraries/action.yml +++ b/.github/composite-actions/download-libraries/action.yml @@ -16,6 +16,11 @@ inputs: description: 'Install boost' default: true + install-clang: + type: boolean + description: 'Install clang' + default: false + runs: using: 'composite' steps: @@ -25,6 +30,18 @@ runs: sudo apt-get update -y sudo apt-get install gcc-10 g++-10 cmake build-essential -y shell: bash + if: inputs.install-clang != 'true' + + - name: Install clang + # llvm.sh installs all needed libraries, no need in build-essential + run: | + sudo apt-get update -y + sudo apt-get install cmake make -y + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 17 + shell: bash + if: inputs.install-clang != 'false' - name: Make lib directory run: | @@ -35,7 +52,7 @@ runs: uses: ./.github/composite-actions/download-library with: directory: googletest - download-command: git clone https://github.com/google/googletest/ --branch release-1.12.1 --depth 1 + download-command: git clone https://github.com/google/googletest/ --branch release-1.14.0 --depth 1 if: inputs.download-googletest != 'false' - name: Download easyloggingpp diff --git a/.github/workflows/core-tests.yml b/.github/workflows/core-tests.yml index 058ca3d2fe..e4ec897e22 100644 --- a/.github/workflows/core-tests.yml +++ b/.github/workflows/core-tests.yml @@ -27,7 +27,7 @@ on: #- examples/** workflow_dispatch: jobs: - run_tests: + run_tests_gcc: runs-on: ubuntu-latest strategy: matrix: @@ -44,6 +44,38 @@ jobs: uses: ./.github/composite-actions/download-datasets - name: Build run: | + export CC=gcc-10 + export CXX=g++-10 + if [[ "${{matrix.cfg.BUILD_TYPE}}" == "Debug" ]]; then + ./build.sh --debug --sanitizer=${{ matrix.cfg.SANITIZER }} + else + ./build.sh + fi + - name: Test + working-directory: ${{github.workspace}}/build/target + shell: bash + run: ./Desbordante_test --gtest_filter='*:-*HeavyDatasets*' + run_tests_clang: + runs-on: ubuntu-latest + strategy: + matrix: + cfg: + - { BUILD_TYPE: Release } + - { BUILD_TYPE: Debug } + - { BUILD_TYPE: Debug, SANITIZER : ADDRESS } + - { BUILD_TYPE: Debug, SANITIZER : UB } + steps: + - uses: actions/checkout@v3 + - name: Download libraries + uses: ./.github/composite-actions/download-libraries + with: + install-clang: true + - name: Download datasets + uses: ./.github/composite-actions/download-datasets + - name: Build + run: | + export CC=clang-17 + export CXX=clang++-17 if [[ "${{matrix.cfg.BUILD_TYPE}}" == "Debug" ]]; then ./build.sh --debug --sanitizer=${{ matrix.cfg.SANITIZER }} else diff --git a/CMakeLists.txt b/CMakeLists.txt index d70fbf615f..2ed773246d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,6 +95,16 @@ else() add_compile_options(-ggdb3) endif() + # Workaround clang-18 bug: + # https://github.com/llvm/llvm-project/issues/76515?ysclid=m406q4it5k674680045 + if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + string(FIND "${CMAKE_CXX_COMPILER_VERSION}" "18" IDX) + if (IDX EQUAL 0) # clang major version is 18 + message(WARNING "C++ compiler is Clang++-18. Supressing deprecated declaration warnings. Consider using another version of Clang") + string(JOIN ";" DEBUG_BUILD_OPTS "${DEBUG_BUILD_OPTS}" "-Wno-deprecated-declarations") + endif() + endif() + add_compile_options("$<$:${DEBUG_BUILD_OPTS}>") add_link_options("$<$:${DEBUG_LINK_OPTS}>") From 862481ad25ad460c9d782d6c0e499b54eee090da Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Thu, 28 Nov 2024 12:17:04 +0300 Subject: [PATCH 07/32] Fix UB found by clang UB sanitizer - Check that column is numeric *before* casting it to INumericType in DataStats::GetMedianAD - Don't process empty vector in GetPartition in gfd_validation.cpp - Disable tests causing float-to-long cast when result is infinity --- src/core/algorithms/gfd/gfd_validation.cpp | 4 ++++ src/core/algorithms/statistics/data_stats.cpp | 2 +- src/tests/test_types.cpp | 14 ++++++++++---- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/core/algorithms/gfd/gfd_validation.cpp b/src/core/algorithms/gfd/gfd_validation.cpp index 1f9bf64dec..39a3e4b0c9 100644 --- a/src/core/algorithms/gfd/gfd_validation.cpp +++ b/src/core/algorithms/gfd/gfd_validation.cpp @@ -25,6 +25,10 @@ std::vector> GetPartition(std::vector const& can config::ThreadNumType const& threads_num) { std::vector> result = {}; + if (candidates.empty()) { + return {}; + } + int musthave = candidates.size() / threads_num; int oversized_num = candidates.size() % threads_num; diff --git a/src/core/algorithms/statistics/data_stats.cpp b/src/core/algorithms/statistics/data_stats.cpp index 809cf42863..121b044c8a 100644 --- a/src/core/algorithms/statistics/data_stats.cpp +++ b/src/core/algorithms/statistics/data_stats.cpp @@ -464,8 +464,8 @@ Statistic DataStats::GetMedianAD(size_t index) const { return all_stats_[index].median_ad; } mo::TypedColumnData const& col = col_data_[index]; - auto const& type = static_cast(col.GetType()); if (!col.IsNumeric()) return {}; + auto const& type = static_cast(col.GetType()); std::vector data = DeleteNullAndEmpties(index); std::byte* median = MedianOfNumericVector(data, type); diff --git a/src/tests/test_types.cpp b/src/tests/test_types.cpp index 7e11b6b55f..38fcb95e18 100644 --- a/src/tests/test_types.cpp +++ b/src/tests/test_types.cpp @@ -182,10 +182,16 @@ TYPED_TEST(TestNumeric, Pow) { test(0, 100); test(22, 12); - test(123, 321); - test(Type(2.72), Type(1.3123141)); - test(-102, 11); - test(-123, 123); + // 123^321 won't fit into long (i. e. IntType) -- it's UB + if constexpr (!std::is_same_v) { + test(123, 321); + } + test(Type(2.72), 1.3123141); + // -102^11 and -123^123 won't fit into long (i. e. IntType) -- it's UB + if constexpr (!std::is_same_v) { + test(-102, 11); + test(-123, 123); + } test(-21, -7); } From f43886fe0a829a181eb029d4170c9d85c3b7d13a Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Sun, 1 Dec 2024 10:51:09 +0300 Subject: [PATCH 08/32] Update README Add Clang installation guide to README.md --- README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 375a2153fd..f6f75346d8 100644 --- a/README.md +++ b/README.md @@ -245,7 +245,7 @@ The following instructions were tested on Ubuntu 20.04+ LTS and macOS Sonoma 14. ### Dependencies Prior to cloning the repository and attempting to build the project, ensure that you have the following software: -- GNU GCC, version 10+ +- GNU GCC, version 10+ or Clang, version 16+ - CMake, version 3.13+ - Boost library built with GCC, version 1.81.0+ @@ -254,6 +254,7 @@ To use test datasets you will need: #### Ubuntu dependencies installation +##### GCC Run the following commands: ```sh sudo apt install gcc g++ cmake libboost-all-dev git-lfs @@ -263,6 +264,18 @@ export CXX=g++ The last 2 lines set gcc as CMake compiler in your terminal session. You can also add them to the end of `~/.profile` to set this by default in all sessions. +##### Clang +Run the following commands: +```sh +sudo apt install cmake libboost-all-dev git-lfs +bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" +export CC=clang +export CXX=clang++ +``` +Second command installs the latest version of LLVM (which includes Clang). For other installation options, see [LLVM packages page](https://apt.llvm.org/). +The last 2 lines set Clang as CMake compiler in your terminal session. +You can also add them to the end of `~/.profile` to set this by default in all sessions. + #### MacOS dependencies installation Install Xcode Command Line Tools if you don't have them. Run: From fd385d4ae295a91fbb43e0aa78db1592e58c2efc Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Sat, 7 Dec 2024 11:14:49 +0300 Subject: [PATCH 09/32] Use &Insert instead of this->Insert in KDTree constructor --- src/core/util/kdtree.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/util/kdtree.h b/src/core/util/kdtree.h index 5e47d1b7de..761f3b394a 100644 --- a/src/core/util/kdtree.h +++ b/src/core/util/kdtree.h @@ -191,12 +191,12 @@ size_t KDTree::Size() const { template KDTree::KDTree(std::vector const& points) : KDTree() { - std::for_each(points.begin(), points.end(), this->Insert); + std::for_each(points.begin(), points.end(), &Insert); } template KDTree::KDTree(std::initializer_list const& points) : KDTree() { - std::for_each(points.begin(), points.end(), this->Insert); + std::for_each(points.begin(), points.end(), &Insert); } template From fafff2055e7ad509bec594ca7578466f76290b81 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Sat, 30 Nov 2024 23:16:12 +0300 Subject: [PATCH 10/32] Use proxy class when jthread isn't availible Use simple RAII wrapper of std::thread instead of std::jthread when the latter isn't implemented (which is the case in Apple Clang) --- src/core/util/auto_join_thread.h | 47 ++++++++++++++++++++++++++++++ src/core/util/worker_thread_pool.h | 4 +-- 2 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 src/core/util/auto_join_thread.h diff --git a/src/core/util/auto_join_thread.h b/src/core/util/auto_join_thread.h new file mode 100644 index 0000000000..21216acf57 --- /dev/null +++ b/src/core/util/auto_join_thread.h @@ -0,0 +1,47 @@ +#pragma once + +#include + +namespace util::jthread { + +/// @brief Simple RAII wrapper for std::thread. Joins on destruction. +/// @remark The class is inspired by Scott Meyers' ThreadRAII (from Effective Modern C++) +class AutoJoinThread { +public: + explicit AutoJoinThread(std::thread&& t) : t(std::move(t)) {} + + AutoJoinThread(AutoJoinThread&&) = default; + AutoJoinThread& operator=(AutoJoinThread&&) = default; + // std::thread is not copyable: + AutoJoinThread(AutoJoinThread&) = delete; + AutoJoinThread& operator=(AutoJoinThread&) = delete; + + template + explicit AutoJoinThread(F&& f, Args&&... args) + : AutoJoinThread(std::thread{std::forward(f), std::forward(args)...}) {} + + ~AutoJoinThread() { + if (t.joinable()) { + t.join(); + } + } + + std::thread& get() { + return t; + } + +private: + std::thread t; +}; + +} // namespace util::jthread + +namespace util { + +#ifdef __cpp_lib_jthread +using JThread = std::jthread; +#else +using JThread = jthread::AutoJoinThread; +#endif + +} // namespace util diff --git a/src/core/util/worker_thread_pool.h b/src/core/util/worker_thread_pool.h index 9d4449245a..37e2cb9e9b 100644 --- a/src/core/util/worker_thread_pool.h +++ b/src/core/util/worker_thread_pool.h @@ -7,11 +7,11 @@ #include #include #include -#include #include #include #include "model/index.h" +#include "util/auto_join_thread.h" #include "util/barrier.h" #include "util/desbordante_assume.h" @@ -44,7 +44,7 @@ class WorkerThreadPool { }; Worker work_; - std::vector worker_threads_; + std::vector worker_threads_; std::vector> tasks_; util::Barrier barrier_; std::condition_variable working_var_; From 8c2d63c0963aad397206125950e0a1c8c5f06c37 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Sun, 1 Dec 2024 15:14:32 +0300 Subject: [PATCH 11/32] Use custom implementations of some std::bitset functions Use custom implementations of std::bitset::_Find_dirst and _Find_next when gcc intrinsics aren't availible Use some tricks with boost::dynamic_bitset in fd_tree_bitset to avoid GCC intrinsics _Find_first and _Find_next --- .../algorithms/fd/fdep/fd_tree_element.cpp | 21 +- src/core/algorithms/fd/fdep/fdep.cpp | 6 +- .../od/fastod/model/attribute_set.h | 5 +- src/core/util/bitset_extensions.cpp | 54 +++++ src/core/util/bitset_extensions.h | 191 ++++++++++++++++++ 5 files changed, 262 insertions(+), 15 deletions(-) create mode 100644 src/core/util/bitset_extensions.cpp create mode 100644 src/core/util/bitset_extensions.h diff --git a/src/core/algorithms/fd/fdep/fd_tree_element.cpp b/src/core/algorithms/fd/fdep/fd_tree_element.cpp index a1cd92678e..30b0c2787e 100644 --- a/src/core/algorithms/fd/fdep/fd_tree_element.cpp +++ b/src/core/algorithms/fd/fdep/fd_tree_element.cpp @@ -1,6 +1,7 @@ #include "fd_tree_element.h" #include "boost/dynamic_bitset.hpp" +#include "util/bitset_extensions.h" FDTreeElement::FDTreeElement(size_t max_attribute_number) : max_attribute_number_(max_attribute_number) { @@ -45,7 +46,7 @@ bool FDTreeElement::ContainsGeneralization(std::bitset const& lhs, return true; } - size_t next_set_attr = lhs._Find_next(current_attr); + size_t next_set_attr = util::FindNext(lhs, current_attr); if (next_set_attr == kMaxAttrNum) { return false; } @@ -71,7 +72,7 @@ bool FDTreeElement::GetGeneralizationAndDelete(std::bitset const& l return true; } - size_t next_set_attr = lhs._Find_next(current_attr); + size_t next_set_attr = util::FindNext(lhs, current_attr); if (next_set_attr == kMaxAttrNum) { return false; } @@ -104,7 +105,7 @@ bool FDTreeElement::GetSpecialization(std::bitset const& lhs, size_ bool found = false; size_t attr = (current_attr > 1 ? current_attr : 1); - size_t next_set_attr = lhs._Find_next(current_attr); + size_t next_set_attr = util::FindNext(lhs, current_attr); if (next_set_attr == kMaxAttrNum) { while (!found && attr <= this->max_attribute_number_) { @@ -153,7 +154,8 @@ void FDTreeElement::AddFunctionalDependency(std::bitset const& lhs, FDTreeElement* current_node = this; this->AddRhsAttribute(attr_num); - for (size_t i = lhs._Find_first(); i != kMaxAttrNum; i = lhs._Find_next(i)) { + auto iter = util::MakeBitsetIterator(lhs); + for (size_t i = iter->Pos(); i != kMaxAttrNum; iter->Next(), i = iter->Pos()) { if (current_node->children_[i - 1] == nullptr) { current_node->children_[i - 1] = std::make_unique(this->max_attribute_number_); @@ -215,8 +217,8 @@ void FDTreeElement::PrintDependencies(std::bitset& active_path, std if (this->is_fd_[attr - 1]) { out = "{"; - for (size_t i = active_path._Find_first(); i != kMaxAttrNum; - i = active_path._Find_next(i)) { + auto iter = util::MakeBitsetIterator(active_path); + for (size_t i = iter->Pos(); i != kMaxAttrNum; iter->Next(), i = iter->Pos()) { if (!column_id.empty()) out += column_id + std::to_string(std::stoi(column_names[i - 1]) + 1) + ","; else @@ -257,11 +259,8 @@ void FDTreeElement::TransformTreeFdCollection(std::bitset& active_p for (size_t attr = 1; attr <= this->max_attribute_number_; ++attr) { if (this->is_fd_[attr - 1]) { - boost::dynamic_bitset<> lhs_bitset(this->max_attribute_number_); - for (size_t i = active_path._Find_first(); i != kMaxAttrNum; - i = active_path._Find_next(i)) { - lhs_bitset.set(i - 1); - } + auto lhs_bitset = util::CreateDynamicBitset(active_path, + this->max_attribute_number_); Vertical lhs(scheme.get(), lhs_bitset); Column rhs(scheme.get(), scheme->GetColumn(attr - 1)->GetName(), attr - 1); fd_collection.emplace_back(FD{lhs, rhs, scheme}); diff --git a/src/core/algorithms/fd/fdep/fdep.cpp b/src/core/algorithms/fd/fdep/fdep.cpp index 293902a270..af0507f0e1 100644 --- a/src/core/algorithms/fd/fdep/fdep.cpp +++ b/src/core/algorithms/fd/fdep/fdep.cpp @@ -5,6 +5,7 @@ #include "config/equal_nulls/option.h" #include "config/tabular_data/input_table/option.h" #include "model/table/column_layout_relation_data.h" +#include "util/bitset_extensions.h" // #ifndef PRINT_FDS // #define PRINT_FDS @@ -96,8 +97,9 @@ void FDep::AddViolatedFDs(std::vector const& t1, std::vector con } equal_attr &= (~diff_attr); - for (size_t attr = diff_attr._Find_first(); attr != FDTreeElement::kMaxAttrNum; - attr = diff_attr._Find_next(attr)) { + auto iter = util::MakeBitsetIterator(diff_attr); + for (size_t attr = iter->Pos(); attr != FDTreeElement::kMaxAttrNum; + iter->Next(), attr = iter->Pos()) { this->neg_cover_tree_->AddFunctionalDependency(equal_attr, attr); } } diff --git a/src/core/algorithms/od/fastod/model/attribute_set.h b/src/core/algorithms/od/fastod/model/attribute_set.h index 7ac9cec2a6..c0744f5239 100644 --- a/src/core/algorithms/od/fastod/model/attribute_set.h +++ b/src/core/algorithms/od/fastod/model/attribute_set.h @@ -8,6 +8,7 @@ #include #include "model/table/column_index.h" +#include "util/bitset_extensions.h" namespace algos::fastod { @@ -93,11 +94,11 @@ class AttributeSet { } model::ColumnIndex FindFirst() const noexcept { - return bitset_._Find_first(); + return util::FindFirst(bitset_); } model::ColumnIndex FindNext(model::ColumnIndex pos) const noexcept { - return bitset_._Find_next(pos); + return util::FindNext(bitset_, pos); } std::string ToString() const; diff --git a/src/core/util/bitset_extensions.cpp b/src/core/util/bitset_extensions.cpp new file mode 100644 index 0000000000..164d122d3e --- /dev/null +++ b/src/core/util/bitset_extensions.cpp @@ -0,0 +1,54 @@ +#include "bitset_extensions.h" + +#include +#include + +namespace util::bitset_extensions { + +constexpr unsigned char GetByte(unsigned long long val, size_t byte_num) { + return (val & bytes[byte_num]) >> (byte_num * 8); +} + +size_t FindFirstFixedWidth(std::bitset const& bs) { + if (bs.none()) { + return kWidth; + } + unsigned long long val = bs.to_ullong(); + for (size_t byte_idx{0}; byte_idx < kNumBytes; ++byte_idx) { + auto byte = GetByte(val, byte_idx); + if (byte > 0) { + return byte_idx * 8 + std::countr_zero(byte); + } + } + __builtin_unreachable(); +} + +size_t FindNextFixedWidth(std::bitset const& bs, size_t pos) { + if (bs.none()) { + return kWidth; + } + unsigned long long val = bs.to_ullong(); + size_t start_byte = pos / 8; + size_t bit_pos = pos % 8; + for (size_t byte_idx{start_byte}; byte_idx < kNumBytes; ++byte_idx) { + auto byte = GetByte(val, byte_idx); + if (byte > 0) { + if (byte_idx > start_byte) { + return byte_idx * 8 + std::countr_zero(byte); + } else { + size_t leading_zeros = std::countl_zero(byte); + if (leading_zeros < 7 - bit_pos) { + std::bitset<8> bs{byte}; + for (size_t i{bit_pos + 1}; i < 8; ++i) { + if (bs[i]) { + return start_byte * 8 + i; + } + } + } + } + } + } + return kWidth; +} + +} // namespace util::bitset_extensions diff --git a/src/core/util/bitset_extensions.h b/src/core/util/bitset_extensions.h new file mode 100644 index 0000000000..3f5d9168e7 --- /dev/null +++ b/src/core/util/bitset_extensions.h @@ -0,0 +1,191 @@ +/* This file contains custom implementation of _Find_first and _Find_next gcc-specific methods +(which come from SGI extensions) of std::bitset for 64-bit bitsets. +These implementations are close to what is in SGI (and are competitive in terms of efficiency). +It shouldn't be so hard to adapt them for bitsets of any width -- see, for example, +https://cocode.se/c++/unsigned_split.html. +If you need _Find_first or _Find_next methods, consider using FindFirst and FindNext from this file. +FindFirst and FindNext are wrappers that use custom implementations if (and only if) gcc intrinsiscs +aren't availible. */ + +#pragma once + +#include +#include +#include + +#include + +namespace util { + +/// @brief Wrapper for std::bitset to iterate through set bits. +class IBitsetIterator { +public: + virtual ~IBitsetIterator() {} + + virtual size_t Pos() const noexcept = 0; + virtual void Next() noexcept = 0; +}; + +namespace bitset_extensions { + +static std::vector const bytes{0xff, + 0xff'00, + 0xff'00'00, + 0xff'00'00'00, + 0xff'00'00'00'00, + 0xff'00'00'00'00'00, + 0xff'00'00'00'00'00'00, + 0xff'00'00'00'00'00'00'00}; +constexpr static size_t kNumBytes = 8; +constexpr static size_t kWidth = 64; + +constexpr unsigned char GetByte(unsigned long long val, size_t byte_num); + +size_t FindFirstFixedWidth(std::bitset const&); + +size_t FindNextFixedWidth(std::bitset const&, size_t pos); + +/// @brief Test if T has method _Find_next using SFINAE +template +struct TestBitset { +private: + typedef char yes[1]; + typedef char no[2]; + + template + static yes& test(decltype(&Tp::_Find_next)); + + template + static no& test(...); + +public: + static bool const value = sizeof(test(nullptr)) == sizeof(yes); +}; + +/// @brief Wrapper for std::bitset to iterate through set bits using temporary +/// boost::dynamic_bitset. +template +class DynamicBitsetIterator : public IBitsetIterator { +private: + boost::dynamic_bitset<> bs_; + size_t pos_; + +public: + DynamicBitsetIterator(std::bitset const& bs) : bs_(bs.to_string()), pos_(bs_.find_first()) { + if (pos_ > bs_.size()) { + pos_ = bs_.size(); + } + } + + ~DynamicBitsetIterator() override = default; + + size_t Pos() const noexcept override { + return pos_; + } + + void Next() noexcept override { + pos_ = bs_.find_next(pos_); + if (pos_ > bs_.size()) { + pos_ = bs_.size(); + } + } +}; + +/// @brief Wrapper for std::bitset to iterate through set bits using GCC intrinsics. +/// If reference to bitset is invalidated, behaviour is undefined! +template +class BitsetIterator : public IBitsetIterator { +private: + std::bitset const& bs_; + size_t pos_; + +public: + BitsetIterator(std::bitset const& bs) : bs_(bs), pos_(bs_._Find_first()) {} + + ~BitsetIterator() override = default; + + size_t Pos() const noexcept override { + return pos_; + } + + void Next() noexcept override { + pos_ = bs_._Find_next(pos_); + } +}; + +} // namespace bitset_extensions + +/// @brief Call bs._Find_first if it's availible, use custom implementation otherwise +template >::value, + bool>::type = true> +inline size_t FindFirst(std::bitset const& bs) noexcept { + return bs._Find_first(); +} + +/// @brief Call bs._Find_first if it's availible, use custom implementation otherwise +template >::value>> +inline size_t FindFirst(std::bitset const& bs) noexcept { + return bitset_extensions::FindFirstFixedWidth(bs); +} + +/// @brief Call bs._Find_next if it's availible, use custom implementation otherwise +template >::value, + bool>::type = true> +inline size_t FindNext(std::bitset const& bs, size_t pos) noexcept { + return bs._Find_next(pos); +} + +/// @brief Call bs._Find_next if it's availible, use custom implementation otherwise +template >::value>> +inline size_t FindNext(std::bitset const& bs, size_t pos) noexcept { + if constexpr (S == 64) { + return bitset_extensions::FindNextFixedWidth(bs, pos); + } else { + // FIXME(senichenkov): implement custom FindNext for 256-bit (or custom width) bitsets + boost::dynamic_bitset<> dbs(bs.to_string()); + auto result = dbs.find_next(pos); + return result <= S ? result : S; + } +} + +/// @brief If _Find_next is availible, copy every set bit, else copy biset to dynamic_bitset +/// through string representation +template >::value, + bool>::type = true> +inline boost::dynamic_bitset<> CreateDynamicBitset(std::bitset const& bs, + std::size_t size = S) noexcept { + boost::dynamic_bitset<> dyn_bitset(size); + for (size_t i = bs._Find_first(); i != S; i = bs._Find_next(i)) { + dyn_bitset.set(i - 1); + } + return dyn_bitset; +} + +/// @brief If _Find_next is availible, copy every set bit, else copy biset to dynamic_bitset +/// through string representation +template >::value>> +inline boost::dynamic_bitset<> CreateDynamicBitset(std::bitset const& bs, + [[maybe_unused]] std::size_t size = S) noexcept { + return boost::dynamic_bitset(bs.to_string()); +} + +/// @brief If _Find_next is availible, create std::bitset set-bits-iterator, else +/// boost::dynamic_bitset set-bits-iterator +template >::value, + bool>::type = true> +inline std::unique_ptr MakeBitsetIterator(std::bitset const& bs) { + return std::make_unique>(bs); +} + +/// @brief If _Find_next is availible, create std::bitset set-bits-iterator, else +/// boost::dynamic_bitset set-bits-iterator +template >::value>> +inline std::unique_ptr MakeBitsetIterator(std::bitset const& bs) { + return std::make_unique>(bs); +} + +} // namespace util From e33a6d1ea0094f8cb500c5062a4b93a0b6a0e12a Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Sun, 1 Dec 2024 16:01:09 +0300 Subject: [PATCH 12/32] Use std::chrono::high_resolution_clock in Fastod's timer Use std::chrono::high_resolution_clock instead of std::chrono::_V2::high_resolution_clock in fastod/util/timer.h, because symbol versioning is gcc-specific --- src/core/algorithms/od/fastod/util/timer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/algorithms/od/fastod/util/timer.h b/src/core/algorithms/od/fastod/util/timer.h index 4601f980bc..23ada501b1 100644 --- a/src/core/algorithms/od/fastod/util/timer.h +++ b/src/core/algorithms/od/fastod/util/timer.h @@ -4,7 +4,7 @@ namespace algos::fastod { -using TimePoint = std::chrono::_V2::high_resolution_clock::time_point; +using TimePoint = std::chrono::high_resolution_clock::time_point; class Timer { private: From 34080fdcdae2ae7f95cd20ab45b1539d7c6ab812 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Wed, 4 Dec 2024 20:09:06 +0300 Subject: [PATCH 13/32] Fix formatting --- src/core/util/auto_join_thread.h | 12 ++++----- src/core/util/bitset_extensions.h | 41 ++++++++++++++++--------------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/src/core/util/auto_join_thread.h b/src/core/util/auto_join_thread.h index 21216acf57..ade34ee8d1 100644 --- a/src/core/util/auto_join_thread.h +++ b/src/core/util/auto_join_thread.h @@ -8,7 +8,7 @@ namespace util::jthread { /// @remark The class is inspired by Scott Meyers' ThreadRAII (from Effective Modern C++) class AutoJoinThread { public: - explicit AutoJoinThread(std::thread&& t) : t(std::move(t)) {} + explicit AutoJoinThread(std::thread&& t) : t_(std::move(t)) {} AutoJoinThread(AutoJoinThread&&) = default; AutoJoinThread& operator=(AutoJoinThread&&) = default; @@ -21,17 +21,17 @@ class AutoJoinThread { : AutoJoinThread(std::thread{std::forward(f), std::forward(args)...}) {} ~AutoJoinThread() { - if (t.joinable()) { - t.join(); + if (t_.joinable()) { + t_.join(); } } - std::thread& get() { - return t; + std::thread& Get() { + return t_; } private: - std::thread t; + std::thread t_; }; } // namespace util::jthread diff --git a/src/core/util/bitset_extensions.h b/src/core/util/bitset_extensions.h index 3f5d9168e7..94eab8e299 100644 --- a/src/core/util/bitset_extensions.h +++ b/src/core/util/bitset_extensions.h @@ -28,14 +28,14 @@ class IBitsetIterator { namespace bitset_extensions { -static std::vector const bytes{0xff, - 0xff'00, - 0xff'00'00, - 0xff'00'00'00, - 0xff'00'00'00'00, - 0xff'00'00'00'00'00, - 0xff'00'00'00'00'00'00, - 0xff'00'00'00'00'00'00'00}; +static std::vector const kBytes{0xff, + 0xff'00, + 0xff'00'00, + 0xff'00'00'00, + 0xff'00'00'00'00, + 0xff'00'00'00'00'00, + 0xff'00'00'00'00'00'00, + 0xff'00'00'00'00'00'00'00}; constexpr static size_t kNumBytes = 8; constexpr static size_t kWidth = 64; @@ -53,15 +53,19 @@ struct TestBitset { typedef char no[2]; template - static yes& test(decltype(&Tp::_Find_next)); + static yes& Test(decltype(&Tp::_Find_next)); template - static no& test(...); + static no& Test(...); public: - static bool const value = sizeof(test(nullptr)) == sizeof(yes); + static bool const kValue = sizeof(Test(nullptr)) == sizeof(yes); }; +/// @brief Test if T has method _Find_next +template +constexpr bool TestBitsetV = TestBitset::kValue; + /// @brief Wrapper for std::bitset to iterate through set bits using temporary /// boost::dynamic_bitset. template @@ -116,29 +120,27 @@ class BitsetIterator : public IBitsetIterator { } // namespace bitset_extensions /// @brief Call bs._Find_first if it's availible, use custom implementation otherwise -template >::value, +template >, bool>::type = true> inline size_t FindFirst(std::bitset const& bs) noexcept { return bs._Find_first(); } /// @brief Call bs._Find_first if it's availible, use custom implementation otherwise -template >::value>> +template >>> inline size_t FindFirst(std::bitset const& bs) noexcept { return bitset_extensions::FindFirstFixedWidth(bs); } /// @brief Call bs._Find_next if it's availible, use custom implementation otherwise -template >::value, +template >, bool>::type = true> inline size_t FindNext(std::bitset const& bs, size_t pos) noexcept { return bs._Find_next(pos); } /// @brief Call bs._Find_next if it's availible, use custom implementation otherwise -template >::value>> +template >>> inline size_t FindNext(std::bitset const& bs, size_t pos) noexcept { if constexpr (S == 64) { return bitset_extensions::FindNextFixedWidth(bs, pos); @@ -174,7 +176,7 @@ inline boost::dynamic_bitset<> CreateDynamicBitset(std::bitset const& bs, /// @brief If _Find_next is availible, create std::bitset set-bits-iterator, else /// boost::dynamic_bitset set-bits-iterator -template >::value, +template >, bool>::type = true> inline std::unique_ptr MakeBitsetIterator(std::bitset const& bs) { return std::make_unique>(bs); @@ -182,8 +184,7 @@ inline std::unique_ptr MakeBitsetIterator(std::bitset const& /// @brief If _Find_next is availible, create std::bitset set-bits-iterator, else /// boost::dynamic_bitset set-bits-iterator -template >::value>> +template >>> inline std::unique_ptr MakeBitsetIterator(std::bitset const& bs) { return std::make_unique>(bs); } From 9f5e27771cfb009ea0be9123e2a16b2fb929ee4a Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Wed, 4 Dec 2024 20:38:30 +0300 Subject: [PATCH 14/32] Make GetByte in bitset_extensions conditionally constexpr Make GetByte function contexpr only if std::vector::operator[] is contexpr (which isn't on old versions of GCC) --- src/core/util/bitset_extensions.cpp | 4 ++-- src/core/util/bitset_extensions.h | 11 ++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/core/util/bitset_extensions.cpp b/src/core/util/bitset_extensions.cpp index 164d122d3e..30bbfd2398 100644 --- a/src/core/util/bitset_extensions.cpp +++ b/src/core/util/bitset_extensions.cpp @@ -5,8 +5,8 @@ namespace util::bitset_extensions { -constexpr unsigned char GetByte(unsigned long long val, size_t byte_num) { - return (val & bytes[byte_num]) >> (byte_num * 8); +CONSTEXPR_IF_VECTOR_IS_CONSTEXPR unsigned char GetByte(unsigned long long val, size_t byte_num) { + return (val & kBytes[byte_num]) >> (byte_num * 8); } size_t FindFirstFixedWidth(std::bitset const& bs) { diff --git a/src/core/util/bitset_extensions.h b/src/core/util/bitset_extensions.h index 94eab8e299..0e9fbcf34f 100644 --- a/src/core/util/bitset_extensions.h +++ b/src/core/util/bitset_extensions.h @@ -39,7 +39,16 @@ static std::vector const kBytes{0xff, constexpr static size_t kNumBytes = 8; constexpr static size_t kWidth = 64; -constexpr unsigned char GetByte(unsigned long long val, size_t byte_num); +// std::vector::operator[] is constexpr only since GCC 12 +// (see https://en.cppreference.com/w/cpp/compiler_support) +// FIXME(senichenkov): check old versions of other compilers +#if defined(__GNUC__) && (__GNUC__ < 12) +#define CONSTEXPR_IF_VECTOR_IS_CONSTEXPR /* Ignore */ +#else +#define CONSTEXPR_IF_VECTOR_IS_CONSTEXPR conxtexpr +#endif + +CONSTEXPR_IF_VECTOR_IS_CONSTEXPR unsigned char GetByte(unsigned long long val, size_t byte_num); size_t FindFirstFixedWidth(std::bitset const&); From 35e0df3f8e70655f6cfafb936a9e8cbfdb93d398 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Wed, 4 Dec 2024 20:58:58 +0300 Subject: [PATCH 15/32] Fix formatting --- src/core/util/bitset_extensions.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/core/util/bitset_extensions.h b/src/core/util/bitset_extensions.h index 0e9fbcf34f..123fba2f2d 100644 --- a/src/core/util/bitset_extensions.h +++ b/src/core/util/bitset_extensions.h @@ -73,7 +73,7 @@ struct TestBitset { /// @brief Test if T has method _Find_next template -constexpr bool TestBitsetV = TestBitset::kValue; +constexpr bool kTestBitsetV = TestBitset::kValue; /// @brief Wrapper for std::bitset to iterate through set bits using temporary /// boost::dynamic_bitset. @@ -129,27 +129,27 @@ class BitsetIterator : public IBitsetIterator { } // namespace bitset_extensions /// @brief Call bs._Find_first if it's availible, use custom implementation otherwise -template >, +template >, bool>::type = true> inline size_t FindFirst(std::bitset const& bs) noexcept { return bs._Find_first(); } /// @brief Call bs._Find_first if it's availible, use custom implementation otherwise -template >>> +template >>> inline size_t FindFirst(std::bitset const& bs) noexcept { return bitset_extensions::FindFirstFixedWidth(bs); } /// @brief Call bs._Find_next if it's availible, use custom implementation otherwise -template >, +template >, bool>::type = true> inline size_t FindNext(std::bitset const& bs, size_t pos) noexcept { return bs._Find_next(pos); } /// @brief Call bs._Find_next if it's availible, use custom implementation otherwise -template >>> +template >>> inline size_t FindNext(std::bitset const& bs, size_t pos) noexcept { if constexpr (S == 64) { return bitset_extensions::FindNextFixedWidth(bs, pos); @@ -185,7 +185,7 @@ inline boost::dynamic_bitset<> CreateDynamicBitset(std::bitset const& bs, /// @brief If _Find_next is availible, create std::bitset set-bits-iterator, else /// boost::dynamic_bitset set-bits-iterator -template >, +template >, bool>::type = true> inline std::unique_ptr MakeBitsetIterator(std::bitset const& bs) { return std::make_unique>(bs); @@ -193,7 +193,7 @@ inline std::unique_ptr MakeBitsetIterator(std::bitset const& /// @brief If _Find_next is availible, create std::bitset set-bits-iterator, else /// boost::dynamic_bitset set-bits-iterator -template >>> +template >>> inline std::unique_ptr MakeBitsetIterator(std::bitset const& bs) { return std::make_unique>(bs); } From fdc6aad14a2bb29c65880ccaeaa9f46217bd3367 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Sun, 8 Dec 2024 13:23:43 +0300 Subject: [PATCH 16/32] Fix: shift bitset in CreateDynamicBtiset Shift bitset 1 bit left in CreateDynamicBitset (fallback variant), as it done in main (gcc intrinsic) variant. Also, rename this function for clarity. --- .../algorithms/fd/fdep/fd_tree_element.cpp | 4 ++-- src/core/util/bitset_extensions.h | 20 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/core/algorithms/fd/fdep/fd_tree_element.cpp b/src/core/algorithms/fd/fdep/fd_tree_element.cpp index 30b0c2787e..7afb76f689 100644 --- a/src/core/algorithms/fd/fdep/fd_tree_element.cpp +++ b/src/core/algorithms/fd/fdep/fd_tree_element.cpp @@ -259,8 +259,8 @@ void FDTreeElement::TransformTreeFdCollection(std::bitset& active_p for (size_t attr = 1; attr <= this->max_attribute_number_; ++attr) { if (this->is_fd_[attr - 1]) { - auto lhs_bitset = util::CreateDynamicBitset(active_path, - this->max_attribute_number_); + auto lhs_bitset = + util::CreateShiftedDynamicBitset(active_path, this->max_attribute_number_); Vertical lhs(scheme.get(), lhs_bitset); Column rhs(scheme.get(), scheme->GetColumn(attr - 1)->GetName(), attr - 1); fd_collection.emplace_back(FD{lhs, rhs, scheme}); diff --git a/src/core/util/bitset_extensions.h b/src/core/util/bitset_extensions.h index 123fba2f2d..2d1bf171af 100644 --- a/src/core/util/bitset_extensions.h +++ b/src/core/util/bitset_extensions.h @@ -162,11 +162,11 @@ inline size_t FindNext(std::bitset const& bs, size_t pos) noexcept { } /// @brief If _Find_next is availible, copy every set bit, else copy biset to dynamic_bitset -/// through string representation -template >::value, +/// through string representation. Bitset is shifted 1 bit left. +template >, bool>::type = true> -inline boost::dynamic_bitset<> CreateDynamicBitset(std::bitset const& bs, - std::size_t size = S) noexcept { +inline boost::dynamic_bitset<> CreateShiftedDynamicBitset(std::bitset const& bs, + std::size_t size = S) noexcept { boost::dynamic_bitset<> dyn_bitset(size); for (size_t i = bs._Find_first(); i != S; i = bs._Find_next(i)) { dyn_bitset.set(i - 1); @@ -175,12 +175,12 @@ inline boost::dynamic_bitset<> CreateDynamicBitset(std::bitset const& bs, } /// @brief If _Find_next is availible, copy every set bit, else copy biset to dynamic_bitset -/// through string representation -template >::value>> -inline boost::dynamic_bitset<> CreateDynamicBitset(std::bitset const& bs, - [[maybe_unused]] std::size_t size = S) noexcept { - return boost::dynamic_bitset(bs.to_string()); +/// through string representation. Bitset is shifted 1 bit left. +template >>> +inline boost::dynamic_bitset<> CreateShiftedDynamicBitset(std::bitset const& bs, + std::size_t size = S) noexcept { + size_t start = S - size - 1; + return boost::dynamic_bitset(bs.to_string(), start, size); } /// @brief If _Find_next is availible, create std::bitset set-bits-iterator, else From 752abba16ff153367e97056b23adeb7d29be634f Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Mon, 2 Dec 2024 10:00:33 +0300 Subject: [PATCH 17/32] Use system_clock in fd/sfd/cords Use system_clock everywhere instead of mix of system_clock and high_resolution_clock in Cords::ExecuteInternal --- src/core/algorithms/fd/sfd/cords.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/algorithms/fd/sfd/cords.cpp b/src/core/algorithms/fd/sfd/cords.cpp index af719871a7..5e5ec75c4f 100644 --- a/src/core/algorithms/fd/sfd/cords.cpp +++ b/src/core/algorithms/fd/sfd/cords.cpp @@ -163,7 +163,7 @@ unsigned long long Cords::ExecuteInternal() { Init(column_count, data); - auto start_time = std::chrono::high_resolution_clock::now(); + auto start_time = std::chrono::system_clock::now(); SetProgress(kTotalProgressPercent); ToNextProgressPhase(); From 9b5a39ab1ccd6b1609cec8aaa68b301496be5958 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Mon, 16 Dec 2024 18:35:33 +0300 Subject: [PATCH 18/32] Update README Add instructions how to build boost with clang --- README.md | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f6f75346d8..8bcbe1a0a2 100644 --- a/README.md +++ b/README.md @@ -247,7 +247,7 @@ Prior to cloning the repository and attempting to build the project, ensure that - GNU GCC, version 10+ or Clang, version 16+ - CMake, version 3.13+ -- Boost library built with GCC, version 1.81.0+ +- Boost library built with compiler you're going to use (GCC or Clang), version 1.81.0+ To use test datasets you will need: - Git Large File Storage, version 3.0.2+ @@ -265,15 +265,42 @@ The last 2 lines set gcc as CMake compiler in your terminal session. You can also add them to the end of `~/.profile` to set this by default in all sessions. ##### Clang +Firstly, you'll need to build Boost with Clang, as packaged versions, distributed by package managers, are built with GCC and have different ABI. +Instructions below are given for Boost-1.81.0, but you can use any version, that is greater than 1.81.0. +For further details on Boost installation, please consult [Boost documentation](https://www.boost.org/doc/libs/1_87_0/more/getting_started/unix-variants.html). +1) It's recommended to install Boost under `/usr/local`. You can use any other location, but you'll need to adapt instructions for it. +```sh +cd /usr/lib +``` +2) Download an official Boost distribuition from [SourceForge](https://sourceforge.net/projects/boost/files/boost/1.81.0/) +3) Unpack downloaded archive: +```sh +tar --bzip2 -xf boost_1_81_0.tar.bz2 +``` +4) Compile Boost: +```sh +./bootstrap --with-toolset=clang +./b2 clean +./b2 toolset=clang cxxflags="-stdlib=libc++" linkflags="-stdlib=libc++" +``` +5) Install Boost: +```sh +sudo ./b2 install +``` + Run the following commands: ```sh -sudo apt install cmake libboost-all-dev git-lfs +sudo apt install cmake git-lfs bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" export CC=clang export CXX=clang++ +export CXXFLAGS="-stdlib=libc++" +# libc++ is fully compatible with GCC's ABI, so you can omit the next line if you want to use libstdc++ ABI: +export LDFLAGS="-lc++abi" # Use [libc++abi](https://libcxxabi.llvm.org/index.html). +export BOOST_ROOT="/usr/local/" ``` Second command installs the latest version of LLVM (which includes Clang). For other installation options, see [LLVM packages page](https://apt.llvm.org/). -The last 2 lines set Clang as CMake compiler in your terminal session. +The last 5 lines set Clang as CMake compiler in your terminal session and set directory where Boost libraries are located. You can also add them to the end of `~/.profile` to set this by default in all sessions. #### MacOS dependencies installation From f19759b8b89223673126455e408db7a68b51af97 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Mon, 16 Dec 2024 18:36:11 +0300 Subject: [PATCH 19/32] Use libc++ and build boost with clang in CI Link clang build with libc++ instead of libstc++ in CI Also build boost with clang in CI --- .../download-libraries/action.yml | 22 +++++++++++++++---- .github/workflows/core-tests.yml | 4 ++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/.github/composite-actions/download-libraries/action.yml b/.github/composite-actions/download-libraries/action.yml index c4bc62bd17..6cc7430ce2 100644 --- a/.github/composite-actions/download-libraries/action.yml +++ b/.github/composite-actions/download-libraries/action.yml @@ -11,11 +11,16 @@ inputs: description: 'Download googletest' default: true - install-boost: + install-boost-gcc: type: boolean - description: 'Install boost' + description: 'Install boost built with GCC' default: true + install-boost-clang: + type: boolean + description: 'Install boost built with clang' + default: false + install-clang: type: boolean description: 'Install clang' @@ -89,13 +94,22 @@ runs: directory: boost download-command: wget -O boost_1_81_0.tar.gz https://sourceforge.net/projects/boost/files/boost/1.81.0/boost_1_81_0.tar.gz/download && tar xzvf boost_1_81_0.tar.gz && mv boost_1_81_0 boost - - name: Install Boost + - name: Install Boost built with GCC run: | cd lib/boost ./bootstrap.sh --prefix=/usr sudo ./b2 install --prefix=/usr shell: bash - if: inputs.install-boost != 'false' + if: inputs.install-boost-gcc != 'false' + - name: Install Boost build with clang + run: | + cd lib/boost + ./bootstrap.sh --prefix=/usr --with-toolset=clang + ./b2 clean + ./b2 toolset=clang cxxflags="-stdlib=libc++" linkflags="-stdlib=libc++" + sudo ./b2 install --prefix=/usr + shell: bash + if: inputs.install-boost-clang != 'false' - name: Download frozen uses: ./.github/composite-actions/download-library with: diff --git a/.github/workflows/core-tests.yml b/.github/workflows/core-tests.yml index e4ec897e22..2feaca0ebe 100644 --- a/.github/workflows/core-tests.yml +++ b/.github/workflows/core-tests.yml @@ -70,12 +70,16 @@ jobs: uses: ./.github/composite-actions/download-libraries with: install-clang: true + install-boost-gcc: false + install-boost-clang: true - name: Download datasets uses: ./.github/composite-actions/download-datasets - name: Build run: | export CC=clang-17 export CXX=clang++-17 + export CXXFLAGS="-stdlib=libc++" + export LDFLAGS="-lc++abi" if [[ "${{matrix.cfg.BUILD_TYPE}}" == "Debug" ]]; then ./build.sh --debug --sanitizer=${{ matrix.cfg.SANITIZER }} else From bdb1d4e59b8895d3ce8d65092c7562f8674763e8 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Mon, 16 Dec 2024 19:21:17 +0300 Subject: [PATCH 20/32] Catch exceptions in AutoJoinThread's destructor Don't violate implicit noexcept(true) in AutoJoinThread's destructor --- src/core/util/auto_join_thread.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/core/util/auto_join_thread.h b/src/core/util/auto_join_thread.h index ade34ee8d1..c217e564a0 100644 --- a/src/core/util/auto_join_thread.h +++ b/src/core/util/auto_join_thread.h @@ -1,7 +1,10 @@ #pragma once +#include #include +#include + namespace util::jthread { /// @brief Simple RAII wrapper for std::thread. Joins on destruction. @@ -20,10 +23,13 @@ class AutoJoinThread { explicit AutoJoinThread(F&& f, Args&&... args) : AutoJoinThread(std::thread{std::forward(f), std::forward(args)...}) {} - ~AutoJoinThread() { + ~AutoJoinThread() try { if (t_.joinable()) { t_.join(); } + } catch (std::system_error const& e) { + LOG(ERROR) << e.what(); + return; // Don't pass exception on } std::thread& Get() { From 4c9522285c6a5b58009119d1cb90543ee86dbdc1 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Tue, 26 Nov 2024 21:01:43 +0300 Subject: [PATCH 21/32] Update googletest version from 1.13.0 to 1.14.0 Version 1.13.0 of googletest produces "narrowing conversion" warning due to a bug (issue #4206, https://github.com/google/googletest/issues/4206) when building with Clang. This bug was fixed in 1.14.0 (commit 3044657, https://github.com/google/googletest/commit/3044657e7afa759ce875a8161cd4bff0fd2e22bc) --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index c031e1ac3a..15273fa434 100755 --- a/build.sh +++ b/build.sh @@ -88,7 +88,7 @@ if [[ $NO_TESTS == true ]]; then PREFIX="$PREFIX -D COMPILE_TESTS=OFF" else if [[ ! -d "googletest" ]] ; then - git clone https://github.com/google/googletest/ --branch v1.13.0 --depth 1 + git clone https://github.com/google/googletest/ --branch v1.14.0 --depth 1 fi fi From dd75964855add1d9205fe9915b19eb1138caa586 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Mon, 16 Dec 2024 19:42:09 +0300 Subject: [PATCH 22/32] Install all LLVM packages in CI Install all LLVM packages in install-clang action in CI, not only LLVM and Clang --- .github/composite-actions/download-libraries/action.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/composite-actions/download-libraries/action.yml b/.github/composite-actions/download-libraries/action.yml index 6cc7430ce2..0022cbb21b 100644 --- a/.github/composite-actions/download-libraries/action.yml +++ b/.github/composite-actions/download-libraries/action.yml @@ -39,12 +39,13 @@ runs: - name: Install clang # llvm.sh installs all needed libraries, no need in build-essential + # "all" option is needed to install libc++ and libc++abi run: | sudo apt-get update -y sudo apt-get install cmake make -y wget https://apt.llvm.org/llvm.sh chmod +x llvm.sh - sudo ./llvm.sh 17 + sudo ./llvm.sh 17 all shell: bash if: inputs.install-clang != 'false' @@ -101,7 +102,7 @@ runs: sudo ./b2 install --prefix=/usr shell: bash if: inputs.install-boost-gcc != 'false' - - name: Install Boost build with clang + - name: Install Boost built with clang run: | cd lib/boost ./bootstrap.sh --prefix=/usr --with-toolset=clang From c51921b8966847cdaf18243154ad64cb23e067d2 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Wed, 18 Dec 2024 13:07:40 +0300 Subject: [PATCH 23/32] Use concepts instead of SFINAE in bitset_extensions Use concepts to test GCC intrinsics precence, instead of SFINAE --- src/core/util/bitset_extensions.h | 55 ++++++++++--------------------- 1 file changed, 17 insertions(+), 38 deletions(-) diff --git a/src/core/util/bitset_extensions.h b/src/core/util/bitset_extensions.h index 2d1bf171af..78a30ce4f5 100644 --- a/src/core/util/bitset_extensions.h +++ b/src/core/util/bitset_extensions.h @@ -39,13 +39,10 @@ static std::vector const kBytes{0xff, constexpr static size_t kNumBytes = 8; constexpr static size_t kWidth = 64; -// std::vector::operator[] is constexpr only since GCC 12 -// (see https://en.cppreference.com/w/cpp/compiler_support) -// FIXME(senichenkov): check old versions of other compilers -#if defined(__GNUC__) && (__GNUC__ < 12) -#define CONSTEXPR_IF_VECTOR_IS_CONSTEXPR /* Ignore */ +#if (__cpp_lib_constexpr_vector == 201907L) +#define CONSTEXPR_IF_VECTOR_IS_CONSTEXPR constexpr #else -#define CONSTEXPR_IF_VECTOR_IS_CONSTEXPR conxtexpr +#define CONSTEXPR_IF_VECTOR_IS_CONSTEXPR /* Ignore */ #endif CONSTEXPR_IF_VECTOR_IS_CONSTEXPR unsigned char GetByte(unsigned long long val, size_t byte_num); @@ -54,26 +51,8 @@ size_t FindFirstFixedWidth(std::bitset const&); size_t FindNextFixedWidth(std::bitset const&, size_t pos); -/// @brief Test if T has method _Find_next using SFINAE -template -struct TestBitset { -private: - typedef char yes[1]; - typedef char no[2]; - - template - static yes& Test(decltype(&Tp::_Find_next)); - - template - static no& Test(...); - -public: - static bool const kValue = sizeof(Test(nullptr)) == sizeof(yes); -}; - -/// @brief Test if T has method _Find_next -template -constexpr bool kTestBitsetV = TestBitset::kValue; +template +concept HasFindFirst = requires(Bitset bs) { bs._Find_first(); }; /// @brief Wrapper for std::bitset to iterate through set bits using temporary /// boost::dynamic_bitset. @@ -129,27 +108,27 @@ class BitsetIterator : public IBitsetIterator { } // namespace bitset_extensions /// @brief Call bs._Find_first if it's availible, use custom implementation otherwise -template >, - bool>::type = true> +template + requires bitset_extensions::HasFindFirst> inline size_t FindFirst(std::bitset const& bs) noexcept { return bs._Find_first(); } /// @brief Call bs._Find_first if it's availible, use custom implementation otherwise -template >>> +template inline size_t FindFirst(std::bitset const& bs) noexcept { return bitset_extensions::FindFirstFixedWidth(bs); } /// @brief Call bs._Find_next if it's availible, use custom implementation otherwise -template >, - bool>::type = true> +template + requires bitset_extensions::HasFindFirst> inline size_t FindNext(std::bitset const& bs, size_t pos) noexcept { return bs._Find_next(pos); } /// @brief Call bs._Find_next if it's availible, use custom implementation otherwise -template >>> +template inline size_t FindNext(std::bitset const& bs, size_t pos) noexcept { if constexpr (S == 64) { return bitset_extensions::FindNextFixedWidth(bs, pos); @@ -163,8 +142,8 @@ inline size_t FindNext(std::bitset const& bs, size_t pos) noexcept { /// @brief If _Find_next is availible, copy every set bit, else copy biset to dynamic_bitset /// through string representation. Bitset is shifted 1 bit left. -template >, - bool>::type = true> +template + requires bitset_extensions::HasFindFirst> inline boost::dynamic_bitset<> CreateShiftedDynamicBitset(std::bitset const& bs, std::size_t size = S) noexcept { boost::dynamic_bitset<> dyn_bitset(size); @@ -176,7 +155,7 @@ inline boost::dynamic_bitset<> CreateShiftedDynamicBitset(std::bitset const& /// @brief If _Find_next is availible, copy every set bit, else copy biset to dynamic_bitset /// through string representation. Bitset is shifted 1 bit left. -template >>> +template inline boost::dynamic_bitset<> CreateShiftedDynamicBitset(std::bitset const& bs, std::size_t size = S) noexcept { size_t start = S - size - 1; @@ -185,15 +164,15 @@ inline boost::dynamic_bitset<> CreateShiftedDynamicBitset(std::bitset const& /// @brief If _Find_next is availible, create std::bitset set-bits-iterator, else /// boost::dynamic_bitset set-bits-iterator -template >, - bool>::type = true> +template + requires bitset_extensions::HasFindFirst> inline std::unique_ptr MakeBitsetIterator(std::bitset const& bs) { return std::make_unique>(bs); } /// @brief If _Find_next is availible, create std::bitset set-bits-iterator, else /// boost::dynamic_bitset set-bits-iterator -template >>> +template inline std::unique_ptr MakeBitsetIterator(std::bitset const& bs) { return std::make_unique>(bs); } From 90efdf0b4f063ffc533e5075c625395c1b8ea51f Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Mon, 9 Dec 2024 23:19:30 +0300 Subject: [PATCH 24/32] Use stable_sort in PDFTane::CalculatePFDError Use stable_sort instead of sort to avoid relying on unspecified behaviour (wheter sort preserves order of equal elements) --- src/core/algorithms/fd/tane/pfdtane.cpp | 10 ++++++---- src/tests/test_pfdtane.cpp | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/core/algorithms/fd/tane/pfdtane.cpp b/src/core/algorithms/fd/tane/pfdtane.cpp index 68c7ccc106..2dfd0a013b 100644 --- a/src/core/algorithms/fd/tane/pfdtane.cpp +++ b/src/core/algorithms/fd/tane/pfdtane.cpp @@ -1,5 +1,7 @@ #include "pfdtane.h" +#include + #include "config/error/option.h" #include "config/error_measure/option.h" #include "enums.h" @@ -48,10 +50,10 @@ config::ErrorType PFDTane::CalculatePFDError(model::PositionListIndex const* x_p std::deque xa_index = xa_pli->GetIndex(); std::shared_ptr probing_table_ptr = x_pli->CalculateAndGetProbingTable(); auto const& probing_table = *probing_table_ptr; - std::sort(xa_index.begin(), xa_index.end(), - [&probing_table](Cluster const& a, Cluster const& b) { - return probing_table[a.front()] < probing_table[b.front()]; - }); + std::stable_sort(xa_index.begin(), xa_index.end(), + [&probing_table](Cluster const& a, Cluster const& b) { + return probing_table[a.front()] < probing_table[b.front()]; + }); double sum = 0.0; std::size_t cluster_rows_count = 0; std::deque const& x_index = x_pli->GetIndex(); diff --git a/src/tests/test_pfdtane.cpp b/src/tests/test_pfdtane.cpp index 1721ec7cf7..a93c085566 100644 --- a/src/tests/test_pfdtane.cpp +++ b/src/tests/test_pfdtane.cpp @@ -65,9 +65,9 @@ INSTANTIATE_TEST_SUITE_P( PFDTaneTestMiningSuite, TestPFDTaneMining, ::testing::Values( PFDTaneMiningParams(44381, 0.3, +algos::PfdErrorMeasure::per_value, kTestFD), - PFDTaneMiningParams(39491, 0.1, +algos::PfdErrorMeasure::per_value, kIris), + PFDTaneMiningParams(19266, 0.1, +algos::PfdErrorMeasure::per_value, kIris), PFDTaneMiningParams(10695, 0.01, +algos::PfdErrorMeasure::per_value, kIris), - PFDTaneMiningParams(7893, 0.1, +algos::PfdErrorMeasure::per_value, kNeighbors10k), + PFDTaneMiningParams(44088, 0.1, +algos::PfdErrorMeasure::per_value, kNeighbors10k), PFDTaneMiningParams(41837, 0.01, +algos::PfdErrorMeasure::per_value, kNeighbors10k) )); From 7adcb42edbdd80af9392b24a58fb5d32b9206e06 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Sun, 8 Dec 2024 23:00:40 +0300 Subject: [PATCH 25/32] Sort pairs by value if frequencies are equal in Cords Sort pairs by value if frequencies are equal in Cords' FrequencyHandler::InitFrequencyHandler --- .../algorithms/fd/sfd/frequency_handler.cpp | 5 +- src/tests/test_sfd.cpp | 62 +++++++++---------- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/src/core/algorithms/fd/sfd/frequency_handler.cpp b/src/core/algorithms/fd/sfd/frequency_handler.cpp index 0d2d700600..3157d14462 100644 --- a/src/core/algorithms/fd/sfd/frequency_handler.cpp +++ b/src/core/algorithms/fd/sfd/frequency_handler.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -30,7 +31,9 @@ void FrequencyHandler::InitFrequencyHandler(std::vector auto cmp = [](std::pair const &left, std::pair const &right) { - return left.second > right.second; + // Compare frequencies. + // If frequencies are equal, compare values lexicographically. + return std::tie(left.second, left.first) > std::tie(right.second, right.first); }; std::sort(values_ordered_by_frequencies.begin(), values_ordered_by_frequencies.end(), cmp); diff --git a/src/tests/test_sfd.cpp b/src/tests/test_sfd.cpp index f754af2966..0392b6bda8 100644 --- a/src/tests/test_sfd.cpp +++ b/src/tests/test_sfd.cpp @@ -48,41 +48,41 @@ namespace tests { TEST(TestCordsUtils, FrequenciesOfIris) { std::vector>> expected = { - {{"7.400000", 34}, {"7.600000", 33}, {"4.300000", 32}, {"7.100000", 31}, - {"6.100000", 11}, {"5.400000", 8}, {"6.600000", 25}, {"6.400000", 7}, - {"5.800000", 5}, {"4.800000", 14}, {"5.200000", 18}, {"4.900000", 9}, - {"5.500000", 6}, {"5.700000", 4}, {"4.600000", 16}, {"5.100000", 2}, - {"6.000000", 10}, {"5.600000", 12}, {"4.500000", 27}, {"6.700000", 3}, - {"6.300000", 1}, {"6.500000", 13}, {"6.200000", 15}, {"7.300000", 30}, - {"7.900000", 29}, {"6.900000", 17}, {"6.800000", 20}, {"7.000000", 28}, - {"5.900000", 21}, {"4.700000", 24}, {"5.000000", 0}, {"4.400000", 22}, - {"7.700000", 19}, {"7.200000", 23}, {"5.300000", 26}}, - - {{"4.200000", 21}, {"4.400000", 19}, {"4.000000", 18}, {"2.400000", 16}, - {"3.700000", 15}, {"4.100000", 20}, {"3.600000", 14}, {"2.800000", 1}, + {{"4.300000", 34}, {"4.500000", 33}, {"5.300000", 32}, {"7.100000", 30}, + {"5.400000", 11}, {"6.600000", 24}, {"6.100000", 8}, {"7.600000", 27}, + {"5.800000", 6}, {"4.800000", 14}, {"5.200000", 18}, {"6.400000", 5}, + {"4.900000", 12}, {"5.500000", 7}, {"5.700000", 4}, {"4.600000", 19}, + {"5.100000", 2}, {"6.000000", 9}, {"5.600000", 10}, {"6.700000", 3}, + {"6.300000", 1}, {"6.500000", 13}, {"6.200000", 17}, {"7.700000", 15}, + {"7.200000", 20}, {"7.300000", 29}, {"6.900000", 16}, {"7.900000", 26}, + {"6.800000", 21}, {"7.000000", 31}, {"5.900000", 22}, {"4.700000", 25}, + {"5.000000", 0}, {"4.400000", 23}, {"7.400000", 28}}, + + {{"4.000000", 21}, {"4.200000", 19}, {"4.400000", 18}, {"2.400000", 15}, + {"4.100000", 20}, {"3.600000", 14}, {"3.700000", 13}, {"2.800000", 1}, {"3.800000", 8}, {"3.200000", 2}, {"3.900000", 17}, {"3.100000", 4}, {"3.300000", 10}, {"2.900000", 5}, {"2.000000", 22}, {"2.500000", 7}, - {"3.400000", 3}, {"3.500000", 9}, {"2.300000", 12}, {"2.200000", 13}, + {"3.400000", 3}, {"3.500000", 9}, {"2.300000", 12}, {"2.200000", 16}, {"3.000000", 0}, {"2.700000", 6}, {"2.600000", 11}}, - {{"6.900000", 42}, {"6.300000", 41}, {"6.600000", 40}, {"1.000000", 39}, - {"1.100000", 38}, {"3.000000", 37}, {"3.600000", 34}, {"5.400000", 32}, - {"6.700000", 31}, {"5.300000", 30}, {"5.900000", 29}, {"4.900000", 7}, - {"5.800000", 15}, {"3.700000", 36}, {"5.200000", 27}, {"4.800000", 10}, - {"1.900000", 28}, {"4.500000", 3}, {"1.300000", 4}, {"5.100000", 2}, - {"6.000000", 23}, {"1.600000", 5}, {"4.400000", 12}, {"5.000000", 11}, - {"6.400000", 35}, {"3.500000", 24}, {"1.500000", 0}, {"4.200000", 13}, - {"6.100000", 17}, {"3.800000", 33}, {"1.400000", 1}, {"5.700000", 16}, - {"4.600000", 19}, {"5.500000", 18}, {"1.200000", 22}, {"4.100000", 20}, - {"4.000000", 8}, {"5.600000", 6}, {"3.900000", 21}, {"4.700000", 9}, - {"1.700000", 14}, {"3.300000", 25}, {"4.300000", 26}}, - - {{"0.600000", 20}, {"1.700000", 19}, {"1.100000", 18}, {"1.900000", 13}, - {"0.200000", 0}, {"2.400000", 16}, {"1.300000", 1}, {"2.100000", 10}, - {"1.800000", 2}, {"2.200000", 15}, {"0.400000", 6}, {"1.500000", 3}, - {"0.100000", 9}, {"1.400000", 4}, {"2.300000", 5}, {"0.300000", 7}, - {"0.500000", 21}, {"2.500000", 17}, {"1.600000", 14}, {"2.000000", 11}, - {"1.200000", 12}, {"1.000000", 8}}, + {{"1.000000", 42}, {"1.100000", 41}, {"3.000000", 40}, {"3.600000", 39}, + {"6.300000", 36}, {"6.600000", 34}, {"6.900000", 33}, {"5.900000", 24}, + {"4.900000", 7}, {"4.500000", 3}, {"1.300000", 5}, {"1.900000", 31}, + {"4.800000", 11}, {"5.800000", 16}, {"3.700000", 38}, {"5.200000", 27}, + {"5.400000", 25}, {"5.100000", 2}, {"6.000000", 23}, {"1.600000", 4}, + {"4.400000", 12}, {"5.000000", 10}, {"6.700000", 22}, {"6.400000", 35}, + {"3.500000", 29}, {"1.500000", 0}, {"4.200000", 13}, {"6.100000", 15}, + {"3.300000", 30}, {"4.700000", 8}, {"1.700000", 14}, {"3.800000", 37}, + {"1.400000", 1}, {"5.700000", 17}, {"4.600000", 19}, {"5.500000", 18}, + {"1.200000", 32}, {"4.100000", 20}, {"4.000000", 9}, {"5.600000", 6}, + {"3.900000", 21}, {"5.300000", 26}, {"4.300000", 28}}, + + {{"0.600000", 20}, {"1.700000", 19}, {"1.100000", 18}, {"1.200000", 13}, + {"0.200000", 0}, {"2.400000", 16}, {"1.300000", 1}, {"2.100000", 9}, + {"1.800000", 2}, {"2.200000", 17}, {"0.400000", 7}, {"1.500000", 3}, + {"0.100000", 11}, {"2.300000", 4}, {"1.400000", 5}, {"1.000000", 6}, + {"0.300000", 8}, {"0.500000", 21}, {"2.500000", 15}, {"1.600000", 14}, + {"2.000000", 10}, {"1.900000", 12}}, {{"Iris-setosa", 2}, {"Iris-versicolor", 1}, {"Iris-virginica", 0}}}; From 7940b8aefbf98194af3b43ac9e8029d9da0ec8a3 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Thu, 12 Dec 2024 22:44:53 +0300 Subject: [PATCH 26/32] Use revese iterators in egfd_validation.cpp: ReverseConstruction Use reverse iterators to traverse set in reverse order, instead of messing with normal iterators --- src/core/algorithms/gfd/egfd_validation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/algorithms/gfd/egfd_validation.cpp b/src/core/algorithms/gfd/egfd_validation.cpp index 21bf4be6a5..307b2fff42 100644 --- a/src/core/algorithms/gfd/egfd_validation.cpp +++ b/src/core/algorithms/gfd/egfd_validation.cpp @@ -342,7 +342,7 @@ void ReverseConstruction(std::set const& lev, graph_t const& graph, gr std::map>& candidates, std::map& cnts, std::map>& unvisited_neighbours) { - for (std::set::iterator j = --lev.end(); j != std::next(lev.begin(), -1); --j) { + for (auto j = lev.rbegin(); j != lev.rend(); ++j) { vertex_t u = *j; int cnt = 0; if (unvisited_neighbours.find(u) != unvisited_neighbours.end()) { From 8d17f04a43d2dc6229cb5d93cb2dd7e836beaee8 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Thu, 19 Dec 2024 19:53:14 +0300 Subject: [PATCH 27/32] Add explicit 'install-gcc' flag in download-libraries --- .github/composite-actions/download-libraries/action.yml | 9 +++++++-- .github/workflows/core-tests.yml | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/composite-actions/download-libraries/action.yml b/.github/composite-actions/download-libraries/action.yml index 0022cbb21b..dd32e86f76 100644 --- a/.github/composite-actions/download-libraries/action.yml +++ b/.github/composite-actions/download-libraries/action.yml @@ -21,9 +21,14 @@ inputs: description: 'Install boost built with clang' default: false + install-gcc: + type: boolean + description: 'Install GCC toolset (compiler and build tools)' + default: true + install-clang: type: boolean - description: 'Install clang' + description: 'Install clang toolset (compiler and build tools)' default: false runs: @@ -35,7 +40,7 @@ runs: sudo apt-get update -y sudo apt-get install gcc-10 g++-10 cmake build-essential -y shell: bash - if: inputs.install-clang != 'true' + if: inputs.install-gcc != 'false' - name: Install clang # llvm.sh installs all needed libraries, no need in build-essential diff --git a/.github/workflows/core-tests.yml b/.github/workflows/core-tests.yml index 2feaca0ebe..e5718a1c47 100644 --- a/.github/workflows/core-tests.yml +++ b/.github/workflows/core-tests.yml @@ -69,6 +69,7 @@ jobs: - name: Download libraries uses: ./.github/composite-actions/download-libraries with: + install-gcc: false install-clang: true install-boost-gcc: false install-boost-clang: true From eb39b0cc5804b5b3db4e232e33f38e922b693e13 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Sat, 21 Dec 2024 13:59:41 +0300 Subject: [PATCH 28/32] Disable false positive ASAN check Disable alloc-dealloc-mismatch check as it's false positive on Ubuntu (see comment in CMakeLists.txt) --- CMakeLists.txt | 16 ++++++++++++++++ address_sanitizer_ignore_list.txt | 4 ++++ 2 files changed, 20 insertions(+) create mode 100644 address_sanitizer_ignore_list.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 2ed773246d..b6049fc63f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,6 +66,22 @@ else() if (ASAN) # Set DEBUG build options specific for build with ASAN set(ASAN_OPTS "-fsanitize=address") + + find_program(APT_FOUND apt-get) + if (APT_FOUND AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + # alloc-dealloc-mismatch generates false positives on boost exceptions + # This applies only to Ubuntu package: + # https://github.com/llvm/llvm-project/issues/59432?ysclid=m4y0iqca2c577414782 + # Disable this check on files listed in address_sanitizer_ignore_list.txt if compiler + # is Clang and apt-get is installed on system: + # FIXME(senichenkov): apt-get is not an ideal check -- maybe it wouldn't be so hard to + # ask apt-get if repository is ubuntu-...? + message("Running on Ubuntu") + message(WARNING "ASAN is broken in Ubuntu package, therefore alloc-dealloc-mismatch") + message(WARNING "check will be supressed. Consider using another distro for full ASAN coverage.") + string(JOIN ";" ASAN_OPTS "-fsanitize-ignorelist=${CMAKE_SOURCE_DIR}/address_sanitizer_ignore_list.txt") + endif() + string(JOIN ";" DEBUG_BUILD_OPTS "${DEBUG_BUILD_OPTS}" "-O1" "-Wno-error" # Use of -Werror is discouraged with sanitizers diff --git a/address_sanitizer_ignore_list.txt b/address_sanitizer_ignore_list.txt new file mode 100644 index 0000000000..e14a6175f9 --- /dev/null +++ b/address_sanitizer_ignore_list.txt @@ -0,0 +1,4 @@ +# Disable alloc_dealloc_mismatch ASAN check +[alloc_dealloc_mismatch] +# in file: +src:typed_column_data.h From f8571d4058f8c4c7978bf46f6511bf665dcb718a Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Sat, 21 Dec 2024 17:21:36 +0300 Subject: [PATCH 29/32] Fix: don't build boost in wheel.yml --- .github/workflows/wheel.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 451a6b4293..712d1206c0 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -75,7 +75,7 @@ jobs: with: download-pybind: true download-googletest: false - install-boost: false + install-boost-gcc: false - name: Build wheels uses: pypa/cibuildwheel@v2.16.2 From 53c8e42a8e6f3d327bc3a893e9a62ec99874109f Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Sat, 21 Dec 2024 19:07:35 +0300 Subject: [PATCH 30/32] Fix: build boost in /usr/local instead of /usr/lib --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8bcbe1a0a2..d08e006365 100644 --- a/README.md +++ b/README.md @@ -270,7 +270,7 @@ Instructions below are given for Boost-1.81.0, but you can use any version, that For further details on Boost installation, please consult [Boost documentation](https://www.boost.org/doc/libs/1_87_0/more/getting_started/unix-variants.html). 1) It's recommended to install Boost under `/usr/local`. You can use any other location, but you'll need to adapt instructions for it. ```sh -cd /usr/lib +cd /usr/local ``` 2) Download an official Boost distribuition from [SourceForge](https://sourceforge.net/projects/boost/files/boost/1.81.0/) 3) Unpack downloaded archive: From 06220e491fa521798bb8cae18932b56752acc351 Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Sat, 21 Dec 2024 19:58:41 +0300 Subject: [PATCH 31/32] Update README Remove comment about libc++abi in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d08e006365..ba4633a2a0 100644 --- a/README.md +++ b/README.md @@ -296,7 +296,7 @@ export CC=clang export CXX=clang++ export CXXFLAGS="-stdlib=libc++" # libc++ is fully compatible with GCC's ABI, so you can omit the next line if you want to use libstdc++ ABI: -export LDFLAGS="-lc++abi" # Use [libc++abi](https://libcxxabi.llvm.org/index.html). +export LDFLAGS="-lc++abi" export BOOST_ROOT="/usr/local/" ``` Second command installs the latest version of LLVM (which includes Clang). For other installation options, see [LLVM packages page](https://apt.llvm.org/). From a97055e8da5f99fd3a5a91596283e4c570aa9a3f Mon Sep 17 00:00:00 2001 From: Senichenkov Date: Fri, 3 Jan 2025 14:20:47 +0300 Subject: [PATCH 32/32] Fix some uncertainties in README - Now link to boost documentation in README points on 1.81.0 instead of 1.87.0 - Fix some errors in README --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ba4633a2a0..fe07566b6c 100644 --- a/README.md +++ b/README.md @@ -266,9 +266,9 @@ You can also add them to the end of `~/.profile` to set this by default in all s ##### Clang Firstly, you'll need to build Boost with Clang, as packaged versions, distributed by package managers, are built with GCC and have different ABI. -Instructions below are given for Boost-1.81.0, but you can use any version, that is greater than 1.81.0. -For further details on Boost installation, please consult [Boost documentation](https://www.boost.org/doc/libs/1_87_0/more/getting_started/unix-variants.html). -1) It's recommended to install Boost under `/usr/local`. You can use any other location, but you'll need to adapt instructions for it. +Instructions below are given for Boost-1.81.0. If you want to use another version, you'll need to change `tar ...` command on step 3. +For further details on Boost installation, please consult [Boost documentation](https://www.boost.org/doc/libs/1_81_0/more/getting_started/unix-variants.html). +1) It's recommended to install Boost into `/usr/local`. You can use any other location, but you'll need to adapt instructions for it. ```sh cd /usr/local ```