From 031efef6d4eb6695b270d243a764d1b14f4e6fe3 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Mon, 9 Dec 2024 10:18:46 +0100 Subject: [PATCH] Optimize tuning compile times (#3074) --- cub/benchmarks/bench/partition/flagged.cu | 8 +++++++- cub/benchmarks/bench/partition/if.cu | 8 +++++++- cub/benchmarks/bench/scan/exclusive/base.cuh | 6 +++++- cub/benchmarks/bench/select/flagged.cu | 9 ++++++++- cub/benchmarks/bench/select/if.cu | 9 ++++++++- cub/benchmarks/bench/select/unique.cu | 9 ++++++++- cub/benchmarks/bench/transform/heavy.cu | 14 +++++++++++--- 7 files changed, 54 insertions(+), 9 deletions(-) diff --git a/cub/benchmarks/bench/partition/flagged.cu b/cub/benchmarks/bench/partition/flagged.cu index ab2fd83dca7..fcd81e660f6 100644 --- a/cub/benchmarks/bench/partition/flagged.cu +++ b/cub/benchmarks/bench/partition/flagged.cu @@ -182,7 +182,13 @@ void flagged(nvbench::state& state, nvbench::type_list; +using ::cuda::std::false_type; +using ::cuda::std::true_type; +#ifdef TUNE_DistinctPartitions +using distinct_partitions = nvbench::type_list; // expands to "false_type" or "true_type" +#else // !defined(TUNE_DistinctPartitions) +using distinct_partitions = nvbench::type_list; +#endif // TUNE_DistinctPartitions NVBENCH_BENCH_TYPES(flagged, NVBENCH_TYPE_AXES(fundamental_types, offset_types, distinct_partitions)) .set_name("base") diff --git a/cub/benchmarks/bench/partition/if.cu b/cub/benchmarks/bench/partition/if.cu index 5fc4f82f6d9..d456e65fc1c 100644 --- a/cub/benchmarks/bench/partition/if.cu +++ b/cub/benchmarks/bench/partition/if.cu @@ -208,7 +208,13 @@ void partition(nvbench::state& state, nvbench::type_list; +using ::cuda::std::false_type; +using ::cuda::std::true_type; +#ifdef TUNE_DistinctPartitions +using distinct_partitions = nvbench::type_list; // expands to "false_type" or "true_type" +#else // !defined(TUNE_DistinctPartitions) +using distinct_partitions = nvbench::type_list; +#endif // TUNE_DistinctPartitions NVBENCH_BENCH_TYPES(partition, NVBENCH_TYPE_AXES(fundamental_types, offset_types, distinct_partitions)) .set_name("base") diff --git a/cub/benchmarks/bench/scan/exclusive/base.cuh b/cub/benchmarks/bench/scan/exclusive/base.cuh index e3cd7a7be8e..b5c793ca612 100644 --- a/cub/benchmarks/bench/scan/exclusive/base.cuh +++ b/cub/benchmarks/bench/scan/exclusive/base.cuh @@ -131,7 +131,11 @@ static void basic(nvbench::state& state, nvbench::type_list) }); } -using some_offset_types = nvbench::type_list; +#ifdef TUNE_OffsetT +using some_offset_types = nvbench::type_list; +#else +using some_offset_types = nvbench::type_list; +#endif NVBENCH_BENCH_TYPES(basic, NVBENCH_TYPE_AXES(all_types, some_offset_types)) .set_name("base") diff --git a/cub/benchmarks/bench/select/flagged.cu b/cub/benchmarks/bench/select/flagged.cu index 51e3f04d028..3a180a65adc 100644 --- a/cub/benchmarks/bench/select/flagged.cu +++ b/cub/benchmarks/bench/select/flagged.cu @@ -163,7 +163,14 @@ void select(nvbench::state& state, nvbench::type_list) }); } -using may_alias = nvbench::type_list<::cuda::std::false_type, ::cuda::std::true_type>; +using ::cuda::std::false_type; +using ::cuda::std::true_type; +#ifdef TUNE_MayAlias +using may_alias = nvbench::type_list; // expands to "false_type" or "true_type" +#else // !defined(TUNE_MayAlias) +using may_alias = nvbench::type_list; +#endif // TUNE_MayAlias + // The implementation of DeviceSelect for 64-bit offset types uses a streaming approach, where it runs multiple passes // using a 32-bit offset type, so we only need to test one (to save time for tuning and the benchmark CI). using select_offset_types = nvbench::type_list; diff --git a/cub/benchmarks/bench/select/if.cu b/cub/benchmarks/bench/select/if.cu index 00b2763fe49..03d6d57a1ad 100644 --- a/cub/benchmarks/bench/select/if.cu +++ b/cub/benchmarks/bench/select/if.cu @@ -189,7 +189,14 @@ void select(nvbench::state& state, nvbench::type_list) }); } -using may_alias = nvbench::type_list<::cuda::std::false_type, ::cuda::std::true_type>; +using ::cuda::std::false_type; +using ::cuda::std::true_type; +#ifdef TUNE_MayAlias +using may_alias = nvbench::type_list; // expands to "false_type" or "true_type" +#else // !defined(TUNE_MayAlias) +using may_alias = nvbench::type_list; +#endif // TUNE_MayAlias + // The implementation of DeviceSelect for 64-bit offset types uses a streaming approach, where it runs multiple passes // using a 32-bit offset type, so we only need to test one (to save time for tuning and the benchmark CI). using select_offset_types = nvbench::type_list; diff --git a/cub/benchmarks/bench/select/unique.cu b/cub/benchmarks/bench/select/unique.cu index 276db2782ae..110fe4d8fd2 100644 --- a/cub/benchmarks/bench/select/unique.cu +++ b/cub/benchmarks/bench/select/unique.cu @@ -141,7 +141,14 @@ static void unique(nvbench::state& state, nvbench::type_list; +using ::cuda::std::false_type; +using ::cuda::std::true_type; +#ifdef TUNE_MayAlias +using may_alias = nvbench::type_list; // expands to "false_type" or "true_type" +#else // !defined(TUNE_MayAlias) +using may_alias = nvbench::type_list; +#endif // TUNE_MayAlias + // The implementation of DeviceSelect for 64-bit offset types uses a streaming approach, where it runs multiple passes // using a 32-bit offset type, so we only need to test one (to save time for tuning and the benchmark CI). using select_offset_types = nvbench::type_list; diff --git a/cub/benchmarks/bench/transform/heavy.cu b/cub/benchmarks/bench/transform/heavy.cu index be17a04fd8c..00729c03ae1 100644 --- a/cub/benchmarks/bench/transform/heavy.cu +++ b/cub/benchmarks/bench/transform/heavy.cu @@ -53,10 +53,18 @@ static void heavy(nvbench::state& state, nvbench::type_list) bench_transform(state, ::cuda::std::tuple{in.begin()}, out.begin(), n, heavy_functor{}); } -template -using ic = ::cuda::std::integral_constant; +using ::cuda::std::integral_constant; +#ifdef TUNE_Heaviness +using heaviness = nvbench::type_list; // expands to "integral_constant" +#else +using heaviness = + nvbench::type_list, + integral_constant, + integral_constant, + integral_constant>; +#endif -NVBENCH_BENCH_TYPES(heavy, NVBENCH_TYPE_AXES(nvbench::type_list, ic<64>, ic<128>, ic<256>>)) +NVBENCH_BENCH_TYPES(heavy, NVBENCH_TYPE_AXES(heaviness)) .set_name("heavy") .set_type_axes_names({"Heaviness{ct}"}) .add_int64_power_of_two_axis("Elements{io}", nvbench::range(16, 28, 4));