From 525518e079bde5ea7cd59874bbe4826bd4151af2 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Fri, 1 Mar 2024 13:57:12 -0800 Subject: [PATCH] update function signature for compute_offset_aligned_edge_chunks --- cpp/include/cugraph/utilities/misc_utils.cuh | 55 ++++++++++--------- cpp/src/link_prediction/similarity_impl.cuh | 22 ++++---- ...m_reduce_dst_key_aggregated_outgoing_e.cuh | 14 +++-- .../renumber_sampled_edgelist_impl.cuh | 14 ++--- .../sampling_post_processing_impl.cuh | 23 +++----- cpp/src/structure/detail/structure_utils.cuh | 16 ++---- 6 files changed, 66 insertions(+), 78 deletions(-) diff --git a/cpp/include/cugraph/utilities/misc_utils.cuh b/cpp/include/cugraph/utilities/misc_utils.cuh index 948a9476bc1..4aa788727d1 100644 --- a/cpp/include/cugraph/utilities/misc_utils.cuh +++ b/cpp/include/cugraph/utilities/misc_utils.cuh @@ -38,49 +38,50 @@ namespace cugraph { namespace detail { template -std::tuple, std::vector> compute_offset_aligned_edge_chunks( +std::tuple, std::vector> compute_offset_aligned_element_chunks( raft::handle_t const& handle, - offset_t const* offsets, - data_t num_offsets, + raft::device_span offsets, offset_t num_elements, - size_t approx_edge_chunk_size) + data_t approx_element_chunk_size) { auto search_offset_first = thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{1}), cuda::proclaim_return_type( - [approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; })); - auto num_chunks = (num_elements + approx_edge_chunk_size - 1) / approx_edge_chunk_size; + [approx_element_chunk_size] __device__(auto i) { return i * approx_element_chunk_size; })); + auto num_chunks = (num_elements + approx_element_chunk_size - 1) / approx_element_chunk_size; if (num_chunks > 1) { - rmm::device_uvector d_vertex_offsets(num_chunks - 1, handle.get_stream()); + rmm::device_uvector d_chunk_offsets(num_chunks - 1, handle.get_stream()); thrust::lower_bound(handle.get_thrust_policy(), - offsets, - offsets + num_offsets + 1, + offsets.begin(), + offsets.end(), search_offset_first, - search_offset_first + d_vertex_offsets.size(), - d_vertex_offsets.begin()); - rmm::device_uvector d_edge_offsets(d_vertex_offsets.size(), handle.get_stream()); + search_offset_first + d_chunk_offsets.size(), + d_chunk_offsets.begin()); + rmm::device_uvector d_element_offsets(d_chunk_offsets.size(), handle.get_stream()); thrust::gather(handle.get_thrust_policy(), - d_vertex_offsets.begin(), - d_vertex_offsets.end(), - offsets, - d_edge_offsets.begin()); - std::vector h_edge_offsets(num_chunks + 1, offset_t{0}); - h_edge_offsets.back() = num_elements; - raft::update_host( - h_edge_offsets.data() + 1, d_edge_offsets.data(), d_edge_offsets.size(), handle.get_stream()); - std::vector h_vertex_offsets(num_chunks + 1, data_t{0}); - h_vertex_offsets.back() = num_offsets; - raft::update_host(h_vertex_offsets.data() + 1, - d_vertex_offsets.data(), - d_vertex_offsets.size(), + d_chunk_offsets.begin(), + d_chunk_offsets.end(), + offsets.begin(), + d_element_offsets.begin()); + std::vector h_element_offsets(num_chunks + 1, offset_t{0}); + h_element_offsets.back() = num_elements; + raft::update_host(h_element_offsets.data() + 1, + d_element_offsets.data(), + d_element_offsets.size(), + handle.get_stream()); + std::vector h_chunk_offsets(num_chunks + 1, data_t{0}); + h_chunk_offsets.back() = offsets.size() - 1; + raft::update_host(h_chunk_offsets.data() + 1, + d_chunk_offsets.data(), + d_chunk_offsets.size(), handle.get_stream()); handle.sync_stream(); - return std::make_tuple(h_vertex_offsets, h_edge_offsets); + return std::make_tuple(h_chunk_offsets, h_element_offsets); } else { - return std::make_tuple(std::vector{{0, num_offsets}}, + return std::make_tuple(std::vector{{0, offsets.size() - 1}}, std::vector{{0, num_elements}}); } } diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index 6a822b18d18..c13259f0da7 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -276,7 +276,8 @@ all_pairs_similarity(raft::handle_t const& handle, two_hop_degrees.begin()); if (vertices) { - rmm::device_uvector gathered_degrees(tmp_vertices.size() + 1, handle.get_stream()); + rmm::device_uvector gathered_two_hop_degrees(tmp_vertices.size() + 1, + handle.get_stream()); thrust::gather( handle.get_thrust_policy(), @@ -287,9 +288,9 @@ all_pairs_similarity(raft::handle_t const& handle, tmp_vertices.end(), cugraph::detail::shift_left_t{graph_view.local_vertex_partition_range_first()}), two_hop_degrees.begin(), - gathered_degrees.begin()); + gathered_two_hop_degrees.begin()); - two_hop_degrees = std::move(gathered_degrees); + two_hop_degrees = std::move(gathered_two_hop_degrees); } thrust::sort_by_key(handle.get_thrust_policy(), @@ -303,6 +304,8 @@ all_pairs_similarity(raft::handle_t const& handle, two_hop_degrees.end(), two_hop_degrees.begin()); + auto two_hop_degree_offsets = std::move(two_hop_degrees); + rmm::device_uvector top_v1(0, handle.get_stream()); rmm::device_uvector top_v2(0, handle.get_stream()); rmm::device_uvector top_score(0, handle.get_stream()); @@ -316,16 +319,15 @@ all_pairs_similarity(raft::handle_t const& handle, std::vector batch_offsets; raft::update_host(&sum_two_hop_degrees, - two_hop_degrees.data() + two_hop_degrees.size() - 1, + two_hop_degree_offsets.data() + two_hop_degree_offsets.size() - 1, 1, handle.get_stream()); - std::tie(batch_offsets, std::ignore) = - compute_offset_aligned_edge_chunks(handle, - two_hop_degrees.data(), - two_hop_degrees.size() - 1, - sum_two_hop_degrees, - MAX_PAIRS_PER_BATCH); + std::tie(batch_offsets, std::ignore) = compute_offset_aligned_element_chunks( + handle, + raft::device_span{two_hop_degree_offsets.data(), two_hop_degree_offsets.size()}, + sum_two_hop_degrees, + MAX_PAIRS_PER_BATCH); for (size_t batch_number = 0; batch_number < (batch_offsets.size() - 1); ++batch_number) { if (batch_offsets[batch_number + 1] > batch_offsets[batch_number]) { diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index 8e627392555..945b7e270e9 100644 --- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -355,13 +355,15 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( // to limit memory footprint ((1 << 20) is a tuning parameter) auto approx_edges_to_sort_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20); - auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks( + auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( handle, - edge_partition.offsets(), - edge_partition.dcs_nzd_vertices() - ? (*segment_offsets)[detail::num_sparse_segments_per_vertex_partition] + - *(edge_partition.dcs_nzd_vertex_count()) - : edge_partition.major_range_size(), + raft::device_span{ + edge_partition.offsets(), + static_cast( + edge_partition.dcs_nzd_vertices() + ? (*segment_offsets)[detail::num_sparse_segments_per_vertex_partition] + + *(edge_partition.dcs_nzd_vertex_count()) + : edge_partition.major_range_size())}, edge_partition.number_of_edges(), approx_edges_to_sort_per_iteration); auto num_chunks = h_vertex_offsets.size() - 1; diff --git a/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh b/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh index 0f128eb8410..f5bc3ef6d2e 100644 --- a/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh +++ b/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh @@ -107,12 +107,8 @@ compute_min_hop_for_unique_label_vertex_pairs( rmm::device_uvector d_tmp_storage(0, handle.get_stream()); - auto [h_label_offsets, h_edge_offsets] = - detail::compute_offset_aligned_edge_chunks(handle, - (*label_offsets).data(), - num_labels, - vertices.size(), - approx_edges_to_sort_per_iteration); + auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( + handle, *label_offsets, vertices.size(), approx_edges_to_sort_per_iteration); auto num_chunks = h_label_offsets.size() - 1; for (size_t i = 0; i < num_chunks; ++i) { @@ -599,10 +595,10 @@ renumber_sampled_edgelist( static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20) /* tuning parameter */; // for segmented sort - auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks( + auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( handle, - (*renumber_map_label_offsets).data(), - static_cast((*renumber_map_label_offsets).size() - 1), + raft::device_span{(*renumber_map_label_offsets).data(), + (*renumber_map_label_offsets).size()}, renumber_map.size(), approx_edges_to_sort_per_iteration); auto num_chunks = h_label_offsets.size() - 1; diff --git a/cpp/src/sampling/sampling_post_processing_impl.cuh b/cpp/src/sampling/sampling_post_processing_impl.cuh index f506e4bd04c..1545a00d7fd 100644 --- a/cpp/src/sampling/sampling_post_processing_impl.cuh +++ b/cpp/src/sampling/sampling_post_processing_impl.cuh @@ -286,12 +286,8 @@ compute_min_hop_for_unique_label_vertex_pairs( rmm::device_uvector d_tmp_storage(0, handle.get_stream()); - auto [h_label_offsets, h_edge_offsets] = - detail::compute_offset_aligned_edge_chunks(handle, - (*label_offsets).data(), - num_labels, - vertices.size(), - approx_edges_to_sort_per_iteration); + auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( + handle, *label_offsets, vertices.size(), approx_edges_to_sort_per_iteration); auto num_chunks = h_label_offsets.size() - 1; for (size_t i = 0; i < num_chunks; ++i) { @@ -741,10 +737,10 @@ renumber_sampled_edgelist( static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20) /* tuning parameter */; // for segmented sort - auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks( + auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( handle, - (*renumber_map_label_offsets).data(), - static_cast((*renumber_map_label_offsets).size() - 1), + raft::device_span{(*renumber_map_label_offsets).data(), + (*renumber_map_label_offsets).size() - 1}, renumber_map.size(), approx_edges_to_sort_per_iteration); auto num_chunks = h_label_offsets.size() - 1; @@ -910,11 +906,10 @@ sort_sampled_edge_tuples( (1 << 20) /* tuning parameter */; // for sorts in chunks std::tie(h_label_offsets, h_edge_offsets) = - detail::compute_offset_aligned_edge_chunks(handle, - std::get<0>(*edgelist_label_offsets).data(), - std::get<1>(*edgelist_label_offsets), - edgelist_majors.size(), - approx_edges_to_sort_per_iteration); + detail::compute_offset_aligned_element_chunks(handle, + std::get<0>(*edgelist_label_offsets), + edgelist_majors.size(), + approx_edges_to_sort_per_iteration); } else { h_label_offsets = {0, 1}; h_edge_offsets = {0, edgelist_majors.size()}; diff --git a/cpp/src/structure/detail/structure_utils.cuh b/cpp/src/structure/detail/structure_utils.cuh index a96467ce06b..1ef975c1dec 100644 --- a/cpp/src/structure/detail/structure_utils.cuh +++ b/cpp/src/structure/detail/structure_utils.cuh @@ -316,12 +316,8 @@ void sort_adjacency_list(raft::handle_t const& handle, // to limit memory footprint ((1 << 20) is a tuning parameter) auto approx_edges_to_sort_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20); - auto [h_vertex_offsets, h_edge_offsets] = - detail::compute_offset_aligned_edge_chunks(handle, - offsets.data(), - static_cast(offsets.size() - 1), - num_edges, - approx_edges_to_sort_per_iteration); + auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( + handle, offsets, num_edges, approx_edges_to_sort_per_iteration); auto num_chunks = h_vertex_offsets.size() - 1; // 3. Segmented sort each vertex's neighbors @@ -451,12 +447,8 @@ void sort_adjacency_list(raft::handle_t const& handle, // to limit memory footprint ((1 << 20) is a tuning parameter) auto approx_edges_to_sort_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20); - auto [h_vertex_offsets, h_edge_offsets] = - detail::compute_offset_aligned_edge_chunks(handle, - offsets.data(), - static_cast(offsets.size() - 1), - num_edges, - approx_edges_to_sort_per_iteration); + auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( + handle, offsets, num_edges, approx_edges_to_sort_per_iteration); auto num_chunks = h_vertex_offsets.size() - 1; // 3. Segmented sort each vertex's neighbors