Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Draft] Use resources for allocation #1519

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/RAJA/pattern/params/reducer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ namespace detail
#if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE)
// Device related attributes.
value_type * devicetarget = nullptr;
RAJA::detail::SoAPtr<value_type, device_mem_pool_t> device_mem;
RAJA::detail::SoAPtr<value_type> device_mem;
unsigned int * device_count = nullptr;
#endif

Expand Down
17 changes: 12 additions & 5 deletions include/RAJA/policy/cuda/MemUtils_CUDA.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ namespace detail
struct cudaInfo {
cuda_dim_t gridDim{0, 0, 0};
cuda_dim_t blockDim{0, 0, 0};
::RAJA::resources::Cuda res{::RAJA::resources::Cuda::CudaFromStream(0,0)};
::RAJA::resources::Cuda cuda_res{::RAJA::resources::Cuda::CudaFromStream(0,0)};
::RAJA::resources::Resource res{::RAJA::resources::Cuda::CudaFromStream(0,0)};
bool setup_reducers = false;
#if defined(RAJA_ENABLE_OPENMP)
cudaInfo* thread_states = nullptr;
Expand Down Expand Up @@ -251,20 +252,26 @@ cuda_dim_t currentBlockDim() { return detail::tl_status.blockDim; }

//! get resource for current launch
RAJA_INLINE
::RAJA::resources::Cuda currentResource() { return detail::tl_status.res; }
::RAJA::resources::Cuda& currentCudaResource() { return detail::tl_status.cuda_res; }

//! get resource for current launch
RAJA_INLINE
::RAJA::resources::Resource& currentResource() { return detail::tl_status.res; }

//! create copy of loop_body that is setup for device execution
template <typename LOOP_BODY>
template <typename Res, typename LOOP_BODY>
RAJA_INLINE typename std::remove_reference<LOOP_BODY>::type make_launch_body(
cuda_dim_t gridDim,
cuda_dim_t blockDim,
size_t RAJA_UNUSED_ARG(dynamic_smem),
::RAJA::resources::Cuda res,
Res res,
LOOP_BODY&& loop_body)
{
detail::SetterResetter<bool> setup_reducers_srer(
detail::tl_status.setup_reducers, true);
detail::SetterResetter<::RAJA::resources::Cuda> res_srer(
detail::SetterResetter<::RAJA::resources::Cuda> cuda_res_srer(
detail::tl_status.cuda_res, res);
detail::SetterResetter<::RAJA::resources::Resource> res_srer(
detail::tl_status.res, res);

detail::tl_status.gridDim = gridDim;
Expand Down
30 changes: 18 additions & 12 deletions include/RAJA/policy/cuda/forall.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,13 +187,14 @@ __launch_bounds__(BlockSize, 1) __global__
////////////////////////////////////////////////////////////////////////
//

template <typename Iterable, typename LoopBody, size_t BlockSize, size_t BlocksPerSM, bool Async, typename ForallParam>
template <typename Res, typename Iterable, typename LoopBody, size_t BlockSize, size_t BlocksPerSM, bool Async, typename ForallParam>
RAJA_INLINE
concepts::enable_if_t<
resources::EventProxy<resources::Cuda>,
resources::EventProxy<Res>,
std::is_base_of<resources::Cuda, Res>,
RAJA::expt::type_traits::is_ForallParamPack<ForallParam>,
RAJA::expt::type_traits::is_ForallParamPack_empty<ForallParam>>
forall_impl(resources::Cuda cuda_res,
forall_impl(Res cuda_res,
cuda_exec_explicit<BlockSize, BlocksPerSM, Async>,
Iterable&& iter,
LoopBody&& loop_body,
Expand Down Expand Up @@ -250,16 +251,17 @@ forall_impl(resources::Cuda cuda_res,
RAJA_FT_END;
}

return resources::EventProxy<resources::Cuda>(cuda_res);
return resources::EventProxy<Res>(cuda_res);
}

template <typename Iterable, typename LoopBody, size_t BlockSize, size_t BlocksPerSM, bool Async, typename ForallParam>
template <typename Res, typename Iterable, typename LoopBody, size_t BlockSize, size_t BlocksPerSM, bool Async, typename ForallParam>
RAJA_INLINE
concepts::enable_if_t<
resources::EventProxy<resources::Cuda>,
resources::EventProxy<Res>,
std::is_base_of<resources::Cuda, Res>,
RAJA::expt::type_traits::is_ForallParamPack<ForallParam>,
concepts::negate< RAJA::expt::type_traits::is_ForallParamPack_empty<ForallParam>> >
forall_impl(resources::Cuda cuda_res,
forall_impl(Res cuda_res,
cuda_exec_explicit<BlockSize, BlocksPerSM, Async>,
Iterable&& iter,
LoopBody&& loop_body,
Expand Down Expand Up @@ -325,7 +327,7 @@ forall_impl(resources::Cuda cuda_res,
RAJA_FT_END;
}

return resources::EventProxy<resources::Cuda>(cuda_res);
return resources::EventProxy<Res>(cuda_res);
}


Expand All @@ -348,13 +350,17 @@ forall_impl(resources::Cuda cuda_res,
*
******************************************************************************
*/
template <typename LoopBody,
template <typename Res,
typename LoopBody,
size_t BlockSize,
size_t BlocksPerSM,
bool Async,
typename... SegmentTypes>
RAJA_INLINE resources::EventProxy<resources::Cuda>
forall_impl(resources::Cuda r,
RAJA_INLINE
concepts::enable_if_t<
resources::EventProxy<Res>,
std::is_base_of<resources::Cuda, Res> >
forall_impl(Res r,
ExecPolicy<seq_segit, cuda_exec_explicit<BlockSize, BlocksPerSM, Async>>,
const TypedIndexSet<SegmentTypes...>& iset,
LoopBody&& loop_body)
Expand All @@ -369,7 +375,7 @@ forall_impl(resources::Cuda r,
} // iterate over segments of index set

if (!Async) RAJA::cuda::synchronize(r);
return resources::EventProxy<resources::Cuda>(r);
return resources::EventProxy<Res>(r);
}

} // namespace cuda
Expand Down
4 changes: 2 additions & 2 deletions include/RAJA/policy/cuda/params/reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ namespace detail {
// Init
template<typename EXEC_POL, typename OP, typename T>
camp::concepts::enable_if< type_traits::is_cuda_policy<EXEC_POL> >
init(Reducer<OP, T>& red, const RAJA::cuda::detail::cudaInfo & cs)
init(Reducer<OP, T>& red, RAJA::cuda::detail::cudaInfo& cs)
{
cudaMalloc( (void**)(&(red.devicetarget)), sizeof(T));
red.device_mem.allocate(cs.gridDim.x * cs.gridDim.y * cs.gridDim.z);
red.device_mem.allocate(cs.gridDim.x * cs.gridDim.y * cs.gridDim.z, cs.res);
red.device_count = RAJA::cuda::device_zeroed_mempool_type::getInstance().template malloc<unsigned int>(1);
}

Expand Down
Loading