Skip to content

Commit

Permalink
update function signature for compute_offset_aligned_edge_chunks
Browse files Browse the repository at this point in the history
  • Loading branch information
ChuckHastings committed Mar 1, 2024
1 parent 9aa7b1a commit 525518e
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 78 deletions.
55 changes: 28 additions & 27 deletions cpp/include/cugraph/utilities/misc_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -38,49 +38,50 @@ namespace cugraph {
namespace detail {

template <typename data_t, typename offset_t>
std::tuple<std::vector<data_t>, std::vector<offset_t>> compute_offset_aligned_edge_chunks(
std::tuple<std::vector<data_t>, std::vector<offset_t>> compute_offset_aligned_element_chunks(
raft::handle_t const& handle,
offset_t const* offsets,
data_t num_offsets,
raft::device_span<offset_t const> offsets,
offset_t num_elements,
size_t approx_edge_chunk_size)
data_t approx_element_chunk_size)
{
auto search_offset_first = thrust::make_transform_iterator(
thrust::make_counting_iterator(size_t{1}),
cuda::proclaim_return_type<size_t>(
[approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; }));
auto num_chunks = (num_elements + approx_edge_chunk_size - 1) / approx_edge_chunk_size;
[approx_element_chunk_size] __device__(auto i) { return i * approx_element_chunk_size; }));
auto num_chunks = (num_elements + approx_element_chunk_size - 1) / approx_element_chunk_size;

if (num_chunks > 1) {
rmm::device_uvector<data_t> d_vertex_offsets(num_chunks - 1, handle.get_stream());
rmm::device_uvector<data_t> d_chunk_offsets(num_chunks - 1, handle.get_stream());
thrust::lower_bound(handle.get_thrust_policy(),
offsets,
offsets + num_offsets + 1,
offsets.begin(),
offsets.end(),
search_offset_first,
search_offset_first + d_vertex_offsets.size(),
d_vertex_offsets.begin());
rmm::device_uvector<offset_t> d_edge_offsets(d_vertex_offsets.size(), handle.get_stream());
search_offset_first + d_chunk_offsets.size(),
d_chunk_offsets.begin());
rmm::device_uvector<offset_t> d_element_offsets(d_chunk_offsets.size(), handle.get_stream());
thrust::gather(handle.get_thrust_policy(),
d_vertex_offsets.begin(),
d_vertex_offsets.end(),
offsets,
d_edge_offsets.begin());
std::vector<offset_t> h_edge_offsets(num_chunks + 1, offset_t{0});
h_edge_offsets.back() = num_elements;
raft::update_host(
h_edge_offsets.data() + 1, d_edge_offsets.data(), d_edge_offsets.size(), handle.get_stream());
std::vector<data_t> h_vertex_offsets(num_chunks + 1, data_t{0});
h_vertex_offsets.back() = num_offsets;
raft::update_host(h_vertex_offsets.data() + 1,
d_vertex_offsets.data(),
d_vertex_offsets.size(),
d_chunk_offsets.begin(),
d_chunk_offsets.end(),
offsets.begin(),
d_element_offsets.begin());
std::vector<offset_t> h_element_offsets(num_chunks + 1, offset_t{0});
h_element_offsets.back() = num_elements;
raft::update_host(h_element_offsets.data() + 1,
d_element_offsets.data(),
d_element_offsets.size(),
handle.get_stream());
std::vector<data_t> h_chunk_offsets(num_chunks + 1, data_t{0});
h_chunk_offsets.back() = offsets.size() - 1;
raft::update_host(h_chunk_offsets.data() + 1,
d_chunk_offsets.data(),
d_chunk_offsets.size(),
handle.get_stream());

handle.sync_stream();

return std::make_tuple(h_vertex_offsets, h_edge_offsets);
return std::make_tuple(h_chunk_offsets, h_element_offsets);
} else {
return std::make_tuple(std::vector<data_t>{{0, num_offsets}},
return std::make_tuple(std::vector<data_t>{{0, offsets.size() - 1}},
std::vector<offset_t>{{0, num_elements}});
}
}
Expand Down
22 changes: 12 additions & 10 deletions cpp/src/link_prediction/similarity_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,8 @@ all_pairs_similarity(raft::handle_t const& handle,
two_hop_degrees.begin());

if (vertices) {
rmm::device_uvector<size_t> gathered_degrees(tmp_vertices.size() + 1, handle.get_stream());
rmm::device_uvector<size_t> gathered_two_hop_degrees(tmp_vertices.size() + 1,
handle.get_stream());

thrust::gather(
handle.get_thrust_policy(),
Expand All @@ -287,9 +288,9 @@ all_pairs_similarity(raft::handle_t const& handle,
tmp_vertices.end(),
cugraph::detail::shift_left_t<vertex_t>{graph_view.local_vertex_partition_range_first()}),
two_hop_degrees.begin(),
gathered_degrees.begin());
gathered_two_hop_degrees.begin());

two_hop_degrees = std::move(gathered_degrees);
two_hop_degrees = std::move(gathered_two_hop_degrees);
}

thrust::sort_by_key(handle.get_thrust_policy(),
Expand All @@ -303,6 +304,8 @@ all_pairs_similarity(raft::handle_t const& handle,
two_hop_degrees.end(),
two_hop_degrees.begin());

auto two_hop_degree_offsets = std::move(two_hop_degrees);

rmm::device_uvector<vertex_t> top_v1(0, handle.get_stream());
rmm::device_uvector<vertex_t> top_v2(0, handle.get_stream());
rmm::device_uvector<weight_t> top_score(0, handle.get_stream());
Expand All @@ -316,16 +319,15 @@ all_pairs_similarity(raft::handle_t const& handle,
std::vector<size_t> batch_offsets;

raft::update_host(&sum_two_hop_degrees,
two_hop_degrees.data() + two_hop_degrees.size() - 1,
two_hop_degree_offsets.data() + two_hop_degree_offsets.size() - 1,
1,
handle.get_stream());

std::tie(batch_offsets, std::ignore) =
compute_offset_aligned_edge_chunks(handle,
two_hop_degrees.data(),
two_hop_degrees.size() - 1,
sum_two_hop_degrees,
MAX_PAIRS_PER_BATCH);
std::tie(batch_offsets, std::ignore) = compute_offset_aligned_element_chunks(
handle,
raft::device_span<size_t const>{two_hop_degree_offsets.data(), two_hop_degree_offsets.size()},
sum_two_hop_degrees,
MAX_PAIRS_PER_BATCH);

for (size_t batch_number = 0; batch_number < (batch_offsets.size() - 1); ++batch_number) {
if (batch_offsets[batch_number + 1] > batch_offsets[batch_number]) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -355,13 +355,15 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
// to limit memory footprint ((1 << 20) is a tuning parameter)
auto approx_edges_to_sort_per_iteration =
static_cast<size_t>(handle.get_device_properties().multiProcessorCount) * (1 << 20);
auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks(
auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks(
handle,
edge_partition.offsets(),
edge_partition.dcs_nzd_vertices()
? (*segment_offsets)[detail::num_sparse_segments_per_vertex_partition] +
*(edge_partition.dcs_nzd_vertex_count())
: edge_partition.major_range_size(),
raft::device_span<edge_t const>{
edge_partition.offsets(),
static_cast<size_t>(
edge_partition.dcs_nzd_vertices()
? (*segment_offsets)[detail::num_sparse_segments_per_vertex_partition] +
*(edge_partition.dcs_nzd_vertex_count())
: edge_partition.major_range_size())},
edge_partition.number_of_edges(),
approx_edges_to_sort_per_iteration);
auto num_chunks = h_vertex_offsets.size() - 1;
Expand Down
14 changes: 5 additions & 9 deletions cpp/src/sampling/renumber_sampled_edgelist_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,8 @@ compute_min_hop_for_unique_label_vertex_pairs(

rmm::device_uvector<std::byte> d_tmp_storage(0, handle.get_stream());

auto [h_label_offsets, h_edge_offsets] =
detail::compute_offset_aligned_edge_chunks(handle,
(*label_offsets).data(),
num_labels,
vertices.size(),
approx_edges_to_sort_per_iteration);
auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks(
handle, *label_offsets, vertices.size(), approx_edges_to_sort_per_iteration);
auto num_chunks = h_label_offsets.size() - 1;

for (size_t i = 0; i < num_chunks; ++i) {
Expand Down Expand Up @@ -599,10 +595,10 @@ renumber_sampled_edgelist(
static_cast<size_t>(handle.get_device_properties().multiProcessorCount) *
(1 << 20) /* tuning parameter */; // for segmented sort

auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks(
auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks(
handle,
(*renumber_map_label_offsets).data(),
static_cast<size_t>((*renumber_map_label_offsets).size() - 1),
raft::device_span<size_t const>{(*renumber_map_label_offsets).data(),
(*renumber_map_label_offsets).size()},
renumber_map.size(),
approx_edges_to_sort_per_iteration);
auto num_chunks = h_label_offsets.size() - 1;
Expand Down
23 changes: 9 additions & 14 deletions cpp/src/sampling/sampling_post_processing_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -286,12 +286,8 @@ compute_min_hop_for_unique_label_vertex_pairs(

rmm::device_uvector<std::byte> d_tmp_storage(0, handle.get_stream());

auto [h_label_offsets, h_edge_offsets] =
detail::compute_offset_aligned_edge_chunks(handle,
(*label_offsets).data(),
num_labels,
vertices.size(),
approx_edges_to_sort_per_iteration);
auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks(
handle, *label_offsets, vertices.size(), approx_edges_to_sort_per_iteration);
auto num_chunks = h_label_offsets.size() - 1;

for (size_t i = 0; i < num_chunks; ++i) {
Expand Down Expand Up @@ -741,10 +737,10 @@ renumber_sampled_edgelist(
static_cast<size_t>(handle.get_device_properties().multiProcessorCount) *
(1 << 20) /* tuning parameter */; // for segmented sort

auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks(
auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks(
handle,
(*renumber_map_label_offsets).data(),
static_cast<size_t>((*renumber_map_label_offsets).size() - 1),
raft::device_span<size_t const>{(*renumber_map_label_offsets).data(),
(*renumber_map_label_offsets).size() - 1},
renumber_map.size(),
approx_edges_to_sort_per_iteration);
auto num_chunks = h_label_offsets.size() - 1;
Expand Down Expand Up @@ -910,11 +906,10 @@ sort_sampled_edge_tuples(
(1 << 20) /* tuning parameter */; // for sorts in chunks

std::tie(h_label_offsets, h_edge_offsets) =
detail::compute_offset_aligned_edge_chunks(handle,
std::get<0>(*edgelist_label_offsets).data(),
std::get<1>(*edgelist_label_offsets),
edgelist_majors.size(),
approx_edges_to_sort_per_iteration);
detail::compute_offset_aligned_element_chunks(handle,
std::get<0>(*edgelist_label_offsets),
edgelist_majors.size(),
approx_edges_to_sort_per_iteration);
} else {
h_label_offsets = {0, 1};
h_edge_offsets = {0, edgelist_majors.size()};
Expand Down
16 changes: 4 additions & 12 deletions cpp/src/structure/detail/structure_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -316,12 +316,8 @@ void sort_adjacency_list(raft::handle_t const& handle,
// to limit memory footprint ((1 << 20) is a tuning parameter)
auto approx_edges_to_sort_per_iteration =
static_cast<size_t>(handle.get_device_properties().multiProcessorCount) * (1 << 20);
auto [h_vertex_offsets, h_edge_offsets] =
detail::compute_offset_aligned_edge_chunks(handle,
offsets.data(),
static_cast<vertex_t>(offsets.size() - 1),
num_edges,
approx_edges_to_sort_per_iteration);
auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks(
handle, offsets, num_edges, approx_edges_to_sort_per_iteration);
auto num_chunks = h_vertex_offsets.size() - 1;

// 3. Segmented sort each vertex's neighbors
Expand Down Expand Up @@ -451,12 +447,8 @@ void sort_adjacency_list(raft::handle_t const& handle,
// to limit memory footprint ((1 << 20) is a tuning parameter)
auto approx_edges_to_sort_per_iteration =
static_cast<size_t>(handle.get_device_properties().multiProcessorCount) * (1 << 20);
auto [h_vertex_offsets, h_edge_offsets] =
detail::compute_offset_aligned_edge_chunks(handle,
offsets.data(),
static_cast<vertex_t>(offsets.size() - 1),
num_edges,
approx_edges_to_sort_per_iteration);
auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks(
handle, offsets, num_edges, approx_edges_to_sort_per_iteration);
auto num_chunks = h_vertex_offsets.size() - 1;

// 3. Segmented sort each vertex's neighbors
Expand Down

0 comments on commit 525518e

Please sign in to comment.