Skip to content

Commit

Permalink
Optimize tuning compile times (#3074)
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber authored Dec 9, 2024
1 parent d6c4d8d commit 031efef
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 9 deletions.
8 changes: 7 additions & 1 deletion cub/benchmarks/bench/partition/flagged.cu
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,13 @@ void flagged(nvbench::state& state, nvbench::type_list<T, OffsetT, UseDistinctPa
});
}

using distinct_partitions = nvbench::type_list<::cuda::std::false_type, ::cuda::std::true_type>;
using ::cuda::std::false_type;
using ::cuda::std::true_type;
#ifdef TUNE_DistinctPartitions
using distinct_partitions = nvbench::type_list<TUNE_DistinctPartitions>; // expands to "false_type" or "true_type"
#else // !defined(TUNE_DistinctPartitions)
using distinct_partitions = nvbench::type_list<false_type, true_type>;
#endif // TUNE_DistinctPartitions

NVBENCH_BENCH_TYPES(flagged, NVBENCH_TYPE_AXES(fundamental_types, offset_types, distinct_partitions))
.set_name("base")
Expand Down
8 changes: 7 additions & 1 deletion cub/benchmarks/bench/partition/if.cu
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,13 @@ void partition(nvbench::state& state, nvbench::type_list<T, OffsetT, UseDistinct
});
}

using distinct_partitions = nvbench::type_list<::cuda::std::false_type, ::cuda::std::true_type>;
using ::cuda::std::false_type;
using ::cuda::std::true_type;
#ifdef TUNE_DistinctPartitions
using distinct_partitions = nvbench::type_list<TUNE_DistinctPartitions>; // expands to "false_type" or "true_type"
#else // !defined(TUNE_DistinctPartitions)
using distinct_partitions = nvbench::type_list<false_type, true_type>;
#endif // TUNE_DistinctPartitions

NVBENCH_BENCH_TYPES(partition, NVBENCH_TYPE_AXES(fundamental_types, offset_types, distinct_partitions))
.set_name("base")
Expand Down
6 changes: 5 additions & 1 deletion cub/benchmarks/bench/scan/exclusive/base.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,11 @@ static void basic(nvbench::state& state, nvbench::type_list<T, OffsetT>)
});
}

using some_offset_types = nvbench::type_list<nvbench::uint32_t, nvbench::uint64_t>;
#ifdef TUNE_OffsetT
using some_offset_types = nvbench::type_list<TUNE_OffsetT>;
#else
using some_offset_types = nvbench::type_list<uint32_t, uint64_t>;
#endif

NVBENCH_BENCH_TYPES(basic, NVBENCH_TYPE_AXES(all_types, some_offset_types))
.set_name("base")
Expand Down
9 changes: 8 additions & 1 deletion cub/benchmarks/bench/select/flagged.cu
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,14 @@ void select(nvbench::state& state, nvbench::type_list<T, OffsetT, MayAlias>)
});
}

using may_alias = nvbench::type_list<::cuda::std::false_type, ::cuda::std::true_type>;
using ::cuda::std::false_type;
using ::cuda::std::true_type;
#ifdef TUNE_MayAlias
using may_alias = nvbench::type_list<TUNE_MayAlias>; // expands to "false_type" or "true_type"
#else // !defined(TUNE_MayAlias)
using may_alias = nvbench::type_list<false_type, true_type>;
#endif // TUNE_MayAlias

// The implementation of DeviceSelect for 64-bit offset types uses a streaming approach, where it runs multiple passes
// using a 32-bit offset type, so we only need to test one (to save time for tuning and the benchmark CI).
using select_offset_types = nvbench::type_list<int64_t>;
Expand Down
9 changes: 8 additions & 1 deletion cub/benchmarks/bench/select/if.cu
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,14 @@ void select(nvbench::state& state, nvbench::type_list<T, OffsetT, MayAlias>)
});
}

using may_alias = nvbench::type_list<::cuda::std::false_type, ::cuda::std::true_type>;
using ::cuda::std::false_type;
using ::cuda::std::true_type;
#ifdef TUNE_MayAlias
using may_alias = nvbench::type_list<TUNE_MayAlias>; // expands to "false_type" or "true_type"
#else // !defined(TUNE_MayAlias)
using may_alias = nvbench::type_list<false_type, true_type>;
#endif // TUNE_MayAlias

// The implementation of DeviceSelect for 64-bit offset types uses a streaming approach, where it runs multiple passes
// using a 32-bit offset type, so we only need to test one (to save time for tuning and the benchmark CI).
using select_offset_types = nvbench::type_list<int64_t>;
Expand Down
9 changes: 8 additions & 1 deletion cub/benchmarks/bench/select/unique.cu
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,14 @@ static void unique(nvbench::state& state, nvbench::type_list<T, OffsetT, MayAlia
});
}

using may_alias = nvbench::type_list<::cuda::std::false_type, ::cuda::std::true_type>;
using ::cuda::std::false_type;
using ::cuda::std::true_type;
#ifdef TUNE_MayAlias
using may_alias = nvbench::type_list<TUNE_MayAlias>; // expands to "false_type" or "true_type"
#else // !defined(TUNE_MayAlias)
using may_alias = nvbench::type_list<false_type, true_type>;
#endif // TUNE_MayAlias

// The implementation of DeviceSelect for 64-bit offset types uses a streaming approach, where it runs multiple passes
// using a 32-bit offset type, so we only need to test one (to save time for tuning and the benchmark CI).
using select_offset_types = nvbench::type_list<int64_t>;
Expand Down
14 changes: 11 additions & 3 deletions cub/benchmarks/bench/transform/heavy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,18 @@ static void heavy(nvbench::state& state, nvbench::type_list<Heaviness>)
bench_transform(state, ::cuda::std::tuple{in.begin()}, out.begin(), n, heavy_functor<Heaviness::value>{});
}

template <int I>
using ic = ::cuda::std::integral_constant<int, I>;
using ::cuda::std::integral_constant;
#ifdef TUNE_Heaviness
using heaviness = nvbench::type_list<TUNE_Heaviness>; // expands to "integral_constant<int, ...>"
#else
using heaviness =
nvbench::type_list<integral_constant<int, 32>,
integral_constant<int, 64>,
integral_constant<int, 128>,
integral_constant<int, 256>>;
#endif

NVBENCH_BENCH_TYPES(heavy, NVBENCH_TYPE_AXES(nvbench::type_list<ic<32>, ic<64>, ic<128>, ic<256>>))
NVBENCH_BENCH_TYPES(heavy, NVBENCH_TYPE_AXES(heaviness))
.set_name("heavy")
.set_type_axes_names({"Heaviness{ct}"})
.add_int64_power_of_two_axis("Elements{io}", nvbench::range(16, 28, 4));

0 comments on commit 031efef

Please sign in to comment.