From 89e1dd0793c64614d2a1ae3c0d33096461767842 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 19 Sep 2024 10:07:28 -0400 Subject: [PATCH 1/7] Update packages --- conan/conanfile.py | 2 +- ports/decimal/portfile.cmake | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conan/conanfile.py b/conan/conanfile.py index 7a0774c4b..b2ac89198 100644 --- a/conan/conanfile.py +++ b/conan/conanfile.py @@ -19,7 +19,7 @@ class CharconvConan(ConanFile): name = "boost_decimal" - version = "1.1.0" + version = "2.3.0" description = "Boost provides free peer-reviewed portable C++ source libraries" url = "https://github.com/cppalliance/decimal" homepage = "https://github.com/cppalliance/decimal" diff --git a/ports/decimal/portfile.cmake b/ports/decimal/portfile.cmake index 881e719c4..6f0ae9e03 100644 --- a/ports/decimal/portfile.cmake +++ b/ports/decimal/portfile.cmake @@ -7,8 +7,8 @@ vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO cppalliance/decimal - REF v1.1.0 - SHA512 9ee10a32958a58e96ec4d1fc5f39e3c86352d36365547716e7903340435878b86b56f74e8abcaedfc227154c409d8ccfdb0563a2d98b605aa1799c071ba15cca + REF v2.3.0 + SHA512 a5dac21e3f786e028b5e526c615c1ff7f671f2759a6968edaf97de90f5c5ede59e956dca730d14edabce4d86b58d5dacca2dab0706cb9e17474871f6433171eb HEAD_REF master ) From c995e2323054af12251776ff9db1ebba58ffe44a Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 19 Sep 2024 13:07:54 -0400 Subject: [PATCH 2/7] Remove u128 x64 intrinsic optimizations --- include/boost/decimal/detail/emulated128.hpp | 90 +++++--------------- 1 file changed, 22 insertions(+), 68 deletions(-) diff --git a/include/boost/decimal/detail/emulated128.hpp b/include/boost/decimal/detail/emulated128.hpp index e99e25ec8..574ebe585 100644 --- a/include/boost/decimal/detail/emulated128.hpp +++ b/include/boost/decimal/detail/emulated128.hpp @@ -757,31 +757,15 @@ constexpr auto uint128::operator+=(std::uint64_t n) noexcept -> uint128& constexpr auto operator+(uint128 lhs, uint128 rhs) noexcept -> uint128 { - #if (defined(BOOST_DECIMAL_HAS_X64_INTRINSICS) || defined(BOOST_DECIMAL_HAS_MSVC_64BIT_INTRINSICS)) && !defined(BOOST_DECIMAL_NO_CONSTEVAL_DETECTION) - if (!BOOST_DECIMAL_IS_CONSTANT_EVALUATED(lhs.low)) - { - // Branchless version can be executed on x64 machines when available - unsigned long long low {}; - unsigned long long high {}; - - const auto carry {BOOST_DECIMAL_ADD_CARRY(0, lhs.low, rhs.low, &low)}; - BOOST_DECIMAL_ADD_CARRY(carry, lhs.high, rhs.high, &high); + uint128 temp {lhs.high + rhs.high, lhs.low + rhs.low}; - return uint128{high, low}; - } - else - #endif + // Need to carry a bit into rhs + if (temp.low < lhs.low) { - uint128 temp {lhs.high + rhs.high, lhs.low + rhs.low}; - - // Need to carry a bit into rhs - if (temp.low < lhs.low) - { - ++temp.high; - } - - return temp; + ++temp.high; } + + return temp; } constexpr auto uint128::operator+=(uint128 v) noexcept -> uint128& @@ -807,31 +791,15 @@ constexpr auto uint128::operator++(int) noexcept -> uint128 constexpr auto operator-(uint128 lhs, uint128 rhs) noexcept -> uint128 { - #if (defined(BOOST_DECIMAL_HAS_X64_INTRINSICS) || defined(BOOST_DECIMAL_HAS_MSVC_64BIT_INTRINSICS)) && !defined(BOOST_DECIMAL_NO_CONSTEVAL_DETECTION) - if (!BOOST_DECIMAL_IS_CONSTANT_EVALUATED(lhs.low)) - { - // Branchless version can be executed on x64 machines when available - unsigned long long low {}; - unsigned long long high {}; - - const auto carry {_subborrow_u64(0, lhs.low, rhs.low, &low)}; - _subborrow_u64(carry, lhs.high, rhs.high, &high); + uint128 temp {lhs.high - rhs.high, lhs.low - rhs.low}; - return uint128{high, low}; - } - else - #endif + // Check for carry + if (lhs.low < rhs.low) { - uint128 temp {lhs.high - rhs.high, lhs.low - rhs.low}; - - // Check for carry - if (lhs.low < rhs.low) - { - --temp.high; - } - - return temp; + --temp.high; } + + return temp; } constexpr auto uint128::operator-=(uint128 v) noexcept -> uint128& @@ -1328,35 +1296,21 @@ constexpr auto int128::operator>(int rhs) const noexcept -> bool constexpr auto operator+(const int128& lhs, const int128& rhs) noexcept -> int128 { - #if (defined(BOOST_DECIMAL_HAS_X64_INTRINSICS) || defined(BOOST_DECIMAL_HAS_MSVC_64BIT_INTRINSICS)) && !defined(BOOST_DECIMAL_NO_CONSTEVAL_DETECTION) - if (!BOOST_DECIMAL_IS_CONSTANT_EVALUATED(lhs.low)) - { - unsigned long long low {}; - unsigned long long high {}; - - const auto carry {BOOST_DECIMAL_ADD_CARRY(0, lhs.low, rhs.low, &low)}; - BOOST_DECIMAL_ADD_CARRY(carry, static_cast(lhs.high), static_cast(rhs.high), &high); - - return {static_cast(high), low}; - } - #endif - { - #ifdef BOOST_DECIMAL_HAS_INT128 + #ifdef BOOST_DECIMAL_HAS_INT128 - const auto lhs_full {(static_cast<__uint128_t>(lhs.high) << 64) | lhs.low}; - const auto rhs_full {(static_cast<__uint128_t>(rhs.high) << 64) | rhs.low}; - const auto result {lhs_full + rhs_full}; + const auto lhs_full {(static_cast<__uint128_t>(lhs.high) << 64) | lhs.low}; + const auto rhs_full {(static_cast<__uint128_t>(rhs.high) << 64) | rhs.low}; + const auto result {lhs_full + rhs_full}; - return {static_cast(result >> 64), static_cast(result)}; + return {static_cast(result >> 64), static_cast(result)}; - #else + #else - const auto new_low {lhs.low + rhs.low}; - const auto new_high {lhs.high + rhs.high + static_cast(new_low < lhs.low)}; - return int128{new_high, new_low}; + const auto new_low {lhs.low + rhs.low}; + const auto new_high {lhs.high + rhs.high + static_cast(new_low < lhs.low)}; + return int128{new_high, new_low}; - #endif - } + #endif } constexpr auto operator-(const int128& lhs, const int128& rhs) noexcept -> int128 From 81c80d50659ce13c917d3737a0f5fe71cf4a8b7c Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 19 Sep 2024 14:51:23 -0400 Subject: [PATCH 3/7] Add x64 performance to the docs --- doc/decimal/benchmarks.adoc | 161 ++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) diff --git a/doc/decimal/benchmarks.adoc b/doc/decimal/benchmarks.adoc index 9a700ddae..b4f7e0db3 100644 --- a/doc/decimal/benchmarks.adoc +++ b/doc/decimal/benchmarks.adoc @@ -25,6 +25,38 @@ An example on Linux with b2: `../../../b2 cxxstd=20 toolset=gcc-13 define=BOOST_ The benchmark for comparisons generates a random vector containing 2,000,000 elements and does operations `>`, `>=`, `<`, `\<=`, `==`, and `!=` between `vec[i] and vec[i + 1]`. This is repeated 5 times to generate stable results. +=== x64 Linux Results + +Run using an Intel i9-11900k chipset running RHEL 9.4 and GCC 11.4.1-3 + +|=== +| Type | Runtime (us) | Ratio to `double` +| `float` +| 35,581 +| 0.604 +| `double` +| 58,848 +| 1.000 +| `decimal32` +| 2,410,084 +| 40.954 +| `decimal64` +| 4,233,175 +| 71.934 +| `decimal128` +| 6,337,447 +| 107.692 +| `decimal32_fast` +| 628,241 +| 10.676 +| `decimal64_fast` +| 724,474 +| 12.311 +| `decimal128_fast` +| 517,930 +| 8.801 +|=== + === M1 macOS Results Run using a Macbook pro with M1 pro chipset running macOS Sonoma 14.4.1 and homebrew Clang 18.1.4 @@ -62,6 +94,130 @@ Run using a Macbook pro with M1 pro chipset running macOS Sonoma 14.4.1 and home The benchmark for these operations generates a random vector containing 2,000,000 elements and does operations `+`, `-`, `*`, `/` between `vec[i] and vec[i + 1]`. This is repeated 5 times to generate stable results. +=== x64 Linux Results + +Run using an Intel i9-11900k chipset running RHEL 9.4 and GCC 11.4.1-3 + +==== Addition + +|=== +| Type | Runtime (us) | Ratio to `double` +| `float` +| 54,566 +| 1.077 +| `double` +| 50,640 +| 1.000 +| `decimal32` +| 3,639,957 +| 71.879 +| `decimal64` +| 4,172,318 +| 82.392 +| `decimal128` +| 10,936,595 +| 215.968 +| `decimal32_fast` +| 1,148,249 +| 22.675 +| `decimal64_fast` +| 1,149,203 +| 22.694 +| `decimal128_fast` +| 7,424,598 +| 146.615 +|=== + +==== Subtraction + +|=== +| Type | Runtime (us) | Ratio to `double` +| `float` +| 48,654 +| 0.912 +| `double` +| 53,348 +| 1.000 +| `decimal32` +| 2,850,709 +| 53.436 +| `decimal64` +| 3,493,936 +| 65.493 +| `decimal128` +| 10,492,728 +| 196.685 +| `decimal32_fast` +| 1,012,199 +| 18.974 +| `decimal64_fast` +| 1,055,476 +| 19.785 +| `decimal128_fast` +| 2,114,185 +| 39.630 +|=== + +==== Multiplication + +|=== +| Type | Runtime (us) | Ratio to `double` +| `float` +| 53,405 +| 1.101 +| `double` +| 48,497 +| 1.000 +| `decimal32` +| 2,708,779 +| 55.855 +| `decimal64` +| 2,761,465 +| 56.941 +| `decimal128` +| 8,509,678 +| 175.468 +| `decimal32_fast` +| 451,679 +| 9.313 +| `decimal64_fast` +| 777,927 +| 16.041 +| `decimal128_fast` +| 13,970,509 +| 288.070 +|=== + +==== Division + +|=== +| Type | Runtime (us) | Ratio to `double` +| `float` +| 58,955 +| 0.755 +| `double` +| 78,046 +| 1.000 +| `decimal32` +| 2,907,134 +| 37.249 +| `decimal64` +| 3,464,841 +| 44.394 +| `decimal128` +| 18,202,742 +| 233.231 +| `decimal32_fast` +| 1,092,346 +| 13.996 +| `decimal64_fast` +| 1,207,648 +| 15.474 +| `decimal128_fast` +| 1,208,184 +| 15.480 +|=== + === M1 macOS Results Run using a Macbook pro with M1 pro chipset running macOS Sonoma 14.4.1 and homebrew Clang 18.1.4 @@ -186,6 +342,9 @@ Run using a Macbook pro with M1 pro chipset running macOS Sonoma 14.4.1 and home | 77.956 |=== +//// +These are not available for the built-ins so not deleting but also not incorporating + == Selected Special Functions The benchmark for these operations generates a random vector containing 2,000,000 elements and does operations `+`, `-`, `*`, `/` between `vec[i] and vec[i + 1]`. @@ -377,3 +536,5 @@ Run using a Macbook pro with M1 pro chipset running macOS Sonoma 14.4.1 and home | 3,108,380 | 9.724 |=== + +//// From bf087a2bf0f8df14f27f1326211916398b250377 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 19 Sep 2024 15:20:34 -0400 Subject: [PATCH 4/7] Update number of runs for C benchmarks --- test/benchmark_libdfp.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/test/benchmark_libdfp.c b/test/benchmark_libdfp.c index 16f2cb654..be0a0f24f 100644 --- a/test/benchmark_libdfp.c +++ b/test/benchmark_libdfp.c @@ -8,7 +8,7 @@ #include #include -#define K 2000000 +#define K 20000000 #define N 5 double float_rand(double min, double max) @@ -136,21 +136,21 @@ __attribute__ ((__noinline__)) void test_comparisons_128(_Decimal128* data, cons typedef _Decimal32 (*operation_32)(_Decimal32, _Decimal32); -_Decimal32 add_32(_Decimal32 a, _Decimal32 b) +__attribute__ ((__noinline__)) _Decimal32 add_32(_Decimal32 a, _Decimal32 b) { return a + b; } -_Decimal32 sub_32(_Decimal32 a, _Decimal32 b) +__attribute__ ((__noinline__)) _Decimal32 sub_32(_Decimal32 a, _Decimal32 b) { return a - b; } -_Decimal32 mul_32(_Decimal32 a, _Decimal32 b) +__attribute__ ((__noinline__)) _Decimal32 mul_32(_Decimal32 a, _Decimal32 b) { return a * b; } -_Decimal32 div_32(_Decimal32 a, _Decimal32 b) +__attribute__ ((__noinline__)) _Decimal32 div_32(_Decimal32 a, _Decimal32 b) { return a / b; } @@ -181,21 +181,22 @@ __attribute__ ((__noinline__)) void test_two_element_operation_32(_Decimal32* da typedef _Decimal64 (*operation_64)(_Decimal64, _Decimal64); -_Decimal64 add_64(_Decimal64 a, _Decimal64 b) +__attribute__ ((__noinline__)) _Decimal64 add_64(_Decimal64 a, _Decimal64 b) { return a + b; } -_Decimal64 sub_64(_Decimal64 a, _Decimal64 b) + +__attribute__ ((__noinline__)) _Decimal64 sub_64(_Decimal64 a, _Decimal64 b) { return a - b; } -_Decimal64 mul_64(_Decimal64 a, _Decimal64 b) +__attribute__ ((__noinline__)) _Decimal64 mul_64(_Decimal64 a, _Decimal64 b) { return a * b; } -_Decimal64 div_64(_Decimal64 a, _Decimal64 b) +__attribute__ ((__noinline__)) _Decimal64 div_64(_Decimal64 a, _Decimal64 b) { return a / b; } @@ -226,21 +227,22 @@ __attribute__ ((__noinline__)) void test_two_element_operation_64(_Decimal64* da typedef _Decimal128 (*operation_128)(_Decimal128, _Decimal128); -_Decimal128 add_128(_Decimal128 a, _Decimal128 b) +__attribute__ ((__noinline__)) _Decimal128 add_128(_Decimal128 a, _Decimal128 b) { return a + b; } -_Decimal128 sub_128(_Decimal128 a, _Decimal128 b) + +__attribute__ ((__noinline__)) _Decimal128 sub_128(_Decimal128 a, _Decimal128 b) { return a - b; } -_Decimal128 mul_128(_Decimal128 a, _Decimal128 b) +__attribute__ ((__noinline__)) _Decimal128 mul_128(_Decimal128 a, _Decimal128 b) { return a * b; } -_Decimal128 div_128(_Decimal128 a, _Decimal128 b) +__attribute__ ((__noinline__)) _Decimal128 div_128(_Decimal128 a, _Decimal128 b) { return a / b; } From 693907fe349c5584ffc3e27bae449eebc6a08a06 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 19 Sep 2024 15:21:41 -0400 Subject: [PATCH 5/7] Add libdfp benchmarks to table --- doc/decimal/benchmarks.adoc | 45 +++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/doc/decimal/benchmarks.adoc b/doc/decimal/benchmarks.adoc index b4f7e0db3..b90946181 100644 --- a/doc/decimal/benchmarks.adoc +++ b/doc/decimal/benchmarks.adoc @@ -55,6 +55,15 @@ Run using an Intel i9-11900k chipset running RHEL 9.4 and GCC 11.4.1-3 | `decimal128_fast` | 517,930 | 8.801 +| GCC `_Decimal32` +| 893,375 +| 15.181 +| GCC `_Decimal64` +| 496,127 +| 8.431 +| GCC `_Decimal128` +| 1,143,636 +| 19.434 |=== === M1 macOS Results @@ -126,6 +135,15 @@ Run using an Intel i9-11900k chipset running RHEL 9.4 and GCC 11.4.1-3 | `decimal128_fast` | 7,424,598 | 146.615 +| GCC `_Decimal32` +| 2,997,658 +| 50.939 +| GCC `_Decimal64` +| 2,129,898 +| 36.193 +| GCC `_Decimal128` +| 3,056,979 +| 51.947 |=== ==== Subtraction @@ -156,6 +174,15 @@ Run using an Intel i9-11900k chipset running RHEL 9.4 and GCC 11.4.1-3 | `decimal128_fast` | 2,114,185 | 39.630 +| GCC `_Decimal32` +| 2,006,964 +| 37.620 +| GCC `_Decimal64` +| 1,324,796 +| 24.833 +| GCC `_Decimal128` +| 2,783,553 +| 52.177 |=== ==== Multiplication @@ -186,6 +213,15 @@ Run using an Intel i9-11900k chipset running RHEL 9.4 and GCC 11.4.1-3 | `decimal128_fast` | 13,970,509 | 288.070 +| GCC `_Decimal32` +| 2,507,998 +| 51.714 +| GCC `_Decimal64` +| 2,414,864 +| 49.794 +| GCC `_Decimal128` +| 6,248,956 +| 128.852 |=== ==== Division @@ -216,6 +252,15 @@ Run using an Intel i9-11900k chipset running RHEL 9.4 and GCC 11.4.1-3 | `decimal128_fast` | 1,208,184 | 15.480 +| GCC `_Decimal32` +| 5,002,197 +| 64.093 +| GCC `_Decimal64` +| 2,961,731 +| 37.900 +| GCC `_Decimal128` +| 10,095,995 +| 129.360 |=== === M1 macOS Results From 17be9c21f39cbb1e23b800aa4f5f0c9bc4129be3 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 20 Sep 2024 09:59:50 -0400 Subject: [PATCH 6/7] Fix ordering --- test/benchmarks.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/benchmarks.cpp b/test/benchmarks.cpp index aa58485c0..4b47be4c9 100644 --- a/test/benchmarks.cpp +++ b/test/benchmarks.cpp @@ -378,8 +378,8 @@ int main() std::cout << "\n===== Subtraction =====\n"; - test_two_element_operation(double_vector, std::minus<>(), "Subtraction", "double"); test_two_element_operation(float_vector, std::minus<>(), "Subtraction", "float"); + test_two_element_operation(double_vector, std::minus<>(), "Subtraction", "double"); test_two_element_operation(dec32_vector, std::minus<>(), "Subtraction", "decimal32"); test_two_element_operation(dec64_vector, std::minus<>(), "Subtraction", "decimal64"); test_two_element_operation(dec128_vector, std::minus<>(), "Subtraction", "decimal128"); From 0478b0cbb8500b7726dfc04efb1d1a90a49e59a7 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 20 Sep 2024 10:21:01 -0400 Subject: [PATCH 7/7] Update apple benchmarks --- doc/decimal/benchmarks.adoc | 156 ++++++++++++++++++------------------ 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/doc/decimal/benchmarks.adoc b/doc/decimal/benchmarks.adoc index b90946181..b2ff69d33 100644 --- a/doc/decimal/benchmarks.adoc +++ b/doc/decimal/benchmarks.adoc @@ -22,7 +22,7 @@ An example on Linux with b2: `../../../b2 cxxstd=20 toolset=gcc-13 define=BOOST_ == Comparisons -The benchmark for comparisons generates a random vector containing 2,000,000 elements and does operations `>`, `>=`, `<`, `\<=`, `==`, and `!=` between `vec[i] and vec[i + 1]`. +The benchmark for comparisons generates a random vector containing 20,000,000 elements and does operations `>`, `>=`, `<`, `\<=`, `==`, and `!=` between `vec[i] and vec[i + 1]`. This is repeated 5 times to generate stable results. === x64 Linux Results @@ -68,39 +68,39 @@ Run using an Intel i9-11900k chipset running RHEL 9.4 and GCC 11.4.1-3 === M1 macOS Results -Run using a Macbook pro with M1 pro chipset running macOS Sonoma 14.4.1 and homebrew Clang 18.1.4 +Run using a Macbook pro with M1 pro chipset running macOS Sonoma 15.0 and homebrew Clang 18.1.8 |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 8587 -| 1.376 +| 146,976 +| 2.319 | `double` -| 6240 +| 63,382 | 1.000 | `decimal32` -| 275,597 -| 44.166 +| 1,797,597 +| 28.361 | `decimal64` -| 296,929 -| 47.587 +| 2,799,376 +| 44.167 | `decimal128` -| 821,847 -| 131.706 +| 6,478,939 +| 102.220 | `decimal32_fast` -| 99,664 -| 15.972 +| 1,070,232 +| 16.885 | `decimal64_fast` -| 102,132 -| 16.367 +| 1,111,273 +| 17.533 | `decimal128_fast` -| 146,302 -| 23.446 +| 1,118,976 +| 17.654 |=== == Basic Operations -The benchmark for these operations generates a random vector containing 2,000,000 elements and does operations `+`, `-`, `*`, `/` between `vec[i] and vec[i + 1]`. +The benchmark for these operations generates a random vector containing 20,000,000 elements and does operations `+`, `-`, `*`, `/` between `vec[i] and vec[i + 1]`. This is repeated 5 times to generate stable results. === x64 Linux Results @@ -272,29 +272,29 @@ Run using a Macbook pro with M1 pro chipset running macOS Sonoma 14.4.1 and home |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 2705 -| 0.859 +| 16,685 +| 0.955 | `double` -| 3148 +| 17,476 | 1.000 | `decimal32` -| 351,505 -| 111.660 +| 2,528,095 +| 144.661 | `decimal64` -| 359,425 -| 114.176 +| 2,713,507 +| 155.270 | `decimal128` -| 1,446,674 -| 459.553 +| 11,969,714 +| 684.923 | `decimal32_fast` -| 146,873 -| 46.656 +| 1,423,277 +| 81.442 | `decimal64_fast` -| 139,294 -| 44.248 +| 1,280,409 +| 73.267 | `decimal128_fast` -| 707,308 -| 224.685 +| 6,047,499 +| 346.046 |=== ==== Subtraction @@ -302,29 +302,29 @@ Run using a Macbook pro with M1 pro chipset running macOS Sonoma 14.4.1 and home |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 3339 -| 2.014 +| 16,302 +| 1.045 | `double` -| 1658 +| 17,033 | 1.000 | `decimal32` -| 267,646 -| 161.427 +| 2,010,525 +| 118.037 | `decimal64` -| 303,589 -| 183.106 +| 2,237,729 +| 131.376 | `decimal128` -| 954,211 -| 575.519 +| 6,907,396 +| 405.530 | `decimal32_fast` -| 147,112 -| 88.729 +| 1,378,448 +| 80.928 | `decimal64_fast` -| 145,606 -| 87.820 +| 1,276,731 +| 74.956 | `decimal128_fast` -| 394,538 -| 2387.960 +| 2,970,586 +| 174.401 |=== ==== Multiplication @@ -332,29 +332,29 @@ Run using a Macbook pro with M1 pro chipset running macOS Sonoma 14.4.1 and home |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 1646 -| 0.957 +| 16,499 +| 0.926 | `double` -| 1720 +| 17,821 | 1.000 | `decimal32` -| 313,219 -| 182.104 +| 1,951,504 +| 109.506 | `decimal64` -| 583,818 -| 339.429 +| 2,480,528 +| 139.191 | `decimal128` -| 1,881,936 -| 1094.149 +| 14,360,630 +| 805.826 | `decimal32_fast` -| 86,093 -| 50.054 +| 630,355 +| 35.371 | `decimal64_fast` -| 333,582 -| 193.943 +| 987,703 +| 55.424 | `decimal128_fast` -| 1,269,429 -| 738.040 +| 12,573,178 +| 705.526 |=== ==== Division @@ -362,29 +362,29 @@ Run using a Macbook pro with M1 pro chipset running macOS Sonoma 14.4.1 and home |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 2120 -| 0.547 +| 20,267 +| 0.841 | `double` -| 3874 +| 24,111 | 1.000 | `decimal32` -| 307,337 -| 79.333 +| 1,757,506 +| 72.892 | `decimal64` -| 447,910 -| 115.620 +| 3,496,913 +| 145.033 | `decimal128` -| 2,544,798 -| 656.892 +| 20,017,989 +| 830.243 | `decimal32_fast` -| 105,796 -| 27.309 +| 846,727 +| 35.118 | `decimal64_fast` -| 291,671 -| 75.289 +| 2,484,985 +| 103.064 | `decimal128_fast` -| 302,003 -| 77.956 +| 2,490,175 +| 103.280 |=== ////