Skip to content

Commit

Permalink
simplify launch testing and add waits
Browse files Browse the repository at this point in the history
  • Loading branch information
MrBurmark committed Dec 10, 2024
1 parent d252a4d commit fb1acb8
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ void LaunchNestedDirectTestImpl(INDEX_TYPE M)
&working_array,
&check_array,
&test_array);

std::iota(test_array, test_array + data_len, 0);
working_res.memset(working_array, 0, sizeof(INDEX_TYPE) * data_len);

//6 threads total
constexpr int threads_x = 2;
constexpr int threads_y = 3;
Expand All @@ -62,8 +66,6 @@ void LaunchNestedDirectTestImpl(INDEX_TYPE M)

if ( RAJA::stripIndexType(N) > 0 ) {

std::iota(test_array, test_array + RAJA::stripIndexType(N), 0);

constexpr int DIM = 6;
using layout_t = RAJA::Layout<DIM, INDEX_TYPE,DIM-1>;
RAJA::View<INDEX_TYPE, layout_t> Aview(working_array, N6, N5, N4, N3, N2, N1);
Expand All @@ -83,7 +85,7 @@ void LaunchNestedDirectTestImpl(INDEX_TYPE M)
auto idx = tx + N1 * (ty + N2 * (tz + N3 * (bx + N4 * (by + N5 * bz))));


Aview(bz, by, bx, tz, ty, tx) = static_cast<INDEX_TYPE>(idx);
Aview(bz, by, bx, tz, ty, tx) += static_cast<INDEX_TYPE>(idx);
});
});
});
Expand All @@ -94,10 +96,6 @@ void LaunchNestedDirectTestImpl(INDEX_TYPE M)
});
} else { // zero-length segment

memset(static_cast<void*>(test_array), 0, sizeof(INDEX_TYPE) * data_len);

working_res.memcpy(working_array, test_array, sizeof(INDEX_TYPE) * data_len);

RAJA::launch<LAUNCH_POLICY>
(RAJA::LaunchParams(RAJA::Teams(blocks_x, blocks_y, blocks_z), RAJA::Threads(blocks_x, blocks_y ,blocks_z)),
[=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {
Expand All @@ -123,6 +121,7 @@ void LaunchNestedDirectTestImpl(INDEX_TYPE M)
}

working_res.memcpy(check_array, working_array, sizeof(INDEX_TYPE) * data_len);
working_res.wait();

if (RAJA::stripIndexType(N) > 0) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ void LaunchNestedLoopTestImpl(INDEX_TYPE M)
&check_array,
&test_array);

std::iota(test_array, test_array + data_len, 0);
working_res.memset(working_array, 0, sizeof(INDEX_TYPE) * data_len);

//6 threads total
constexpr int threads_x = 1;
constexpr int threads_y = 2;
Expand All @@ -67,8 +70,6 @@ void LaunchNestedLoopTestImpl(INDEX_TYPE M)

if ( RAJA::stripIndexType(N) > 0 ) {

std::iota(test_array, test_array + RAJA::stripIndexType(N), 0);

constexpr int DIM = 6;
using layout_t = RAJA::Layout<DIM, INDEX_TYPE,DIM-1>;
RAJA::View<INDEX_TYPE, layout_t> Aview(working_array, N6, N5, N4, N3, N2, N1);
Expand All @@ -88,7 +89,7 @@ void LaunchNestedLoopTestImpl(INDEX_TYPE M)
auto idx = tx + N1 * (ty + N2 * (tz + N3 * (bx + N4 * (by + N5 * bz))));


Aview(bz, by, bx, tz, ty, tx) = static_cast<INDEX_TYPE>(idx);
Aview(bz, by, bx, tz, ty, tx) += static_cast<INDEX_TYPE>(idx);

});
});
Expand All @@ -100,10 +101,6 @@ void LaunchNestedLoopTestImpl(INDEX_TYPE M)
});
} else { // zero-length segment

memset(static_cast<void*>(test_array), 0, sizeof(INDEX_TYPE) * data_len);

working_res.memcpy(working_array, test_array, sizeof(INDEX_TYPE) * data_len);

RAJA::launch<LAUNCH_POLICY>
(RAJA::LaunchParams(RAJA::Teams(blocks_x, blocks_y, blocks_z), RAJA::Threads(blocks_x, blocks_y ,blocks_z)),
[=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {
Expand All @@ -129,6 +126,7 @@ void LaunchNestedLoopTestImpl(INDEX_TYPE M)
}

working_res.memcpy(check_array, working_array, sizeof(INDEX_TYPE) * data_len);
working_res.wait();

if (RAJA::stripIndexType(N) > 0) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ void LaunchNestedTileUncheckedTestImpl(INDEX_TYPE M)
&test_array);

std::iota(test_array, test_array + data_len, 0);
working_res.memset(working_array, 0, sizeof(INDEX_TYPE) * data_len);
if ( data_len > 0 ) {
working_res.memset(working_array, 0, sizeof(INDEX_TYPE) * data_len);
}

constexpr int DIM = 3;
using layout_t = RAJA::Layout<DIM, INDEX_TYPE,DIM-1>;
Expand Down Expand Up @@ -86,10 +88,8 @@ void LaunchNestedTileUncheckedTestImpl(INDEX_TYPE M)
});
});

if ( RAJA::stripIndexType(N) > 0 ) {

if ( data_len > 0 ) {
working_res.memcpy(check_array, working_array, sizeof(INDEX_TYPE) * data_len);

}
working_res.wait();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ void LaunchNestedUncheckedTestImpl(INDEX_TYPE M)
&working_array,
&check_array,
&test_array);

std::iota(test_array, test_array + data_len, 0);
if ( data_len > 0 ) {
working_res.memset(working_array, 0, sizeof(INDEX_TYPE) * data_len);
}

//6 threads total
const int threads_x = 2*M;
const int threads_y = 3*M;
Expand All @@ -57,8 +63,6 @@ void LaunchNestedUncheckedTestImpl(INDEX_TYPE M)
const int blocks_y = 5*M;
const int blocks_z = 6*M;

std::iota(test_array, test_array + RAJA::stripIndexType(N), 0);

const int DIM = 6;
using layout_t = RAJA::Layout<DIM, INDEX_TYPE,DIM-1>;
RAJA::View<INDEX_TYPE, layout_t> Aview(working_array, N6, N5, N4, N3, N2, N1);
Expand All @@ -78,7 +82,7 @@ void LaunchNestedUncheckedTestImpl(INDEX_TYPE M)
auto idx = tx + N1 * (ty + N2 * (tz + N3 * (bx + N4 * (by + N5 * bz))));


Aview(bz, by, bx, tz, ty, tx) = static_cast<INDEX_TYPE>(idx);
Aview(bz, by, bx, tz, ty, tx) += static_cast<INDEX_TYPE>(idx);
});
});
});
Expand All @@ -88,11 +92,10 @@ void LaunchNestedUncheckedTestImpl(INDEX_TYPE M)
});
});

if ( RAJA::stripIndexType(N) > 0 ) {

if ( data_len > 0 ) {
working_res.memcpy(check_array, working_array, sizeof(INDEX_TYPE) * data_len);

}
working_res.wait();

for (INDEX_TYPE i = INDEX_TYPE(0); i < N; i++) {
ASSERT_EQ(test_array[RAJA::stripIndexType(i)], check_array[RAJA::stripIndexType(i)]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,12 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M)
&check_iloop_array,
&test_iloop_array);

if ( RAJA::stripIndexType(N) > 0 ) {
std::iota(test_ttile_array, test_ttile_array + data_len, 0);
std::iota(test_iloop_array, test_iloop_array + data_len, 0);
working_res.memset(working_ttile_array, 0, sizeof(INDEX_TYPE) * data_len);
working_res.memset(working_iloop_array, 0, sizeof(INDEX_TYPE) * data_len);

std::iota(test_ttile_array, test_ttile_array + RAJA::stripIndexType(N), 0);
std::iota(test_iloop_array, test_iloop_array + RAJA::stripIndexType(N), 0);
if ( RAJA::stripIndexType(N) > 0 ) {

RAJA::launch<LAUNCH_POLICY>(
RAJA::LaunchParams(RAJA::Teams(blocks_x), RAJA::Threads(threads_x)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {
Expand All @@ -65,8 +67,8 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M)
RAJA::loop_icount<THREAD_X_POLICY>(
ctx, x_tile, [&](INDEX_TYPE tx, INDEX_TYPE ix) {

working_ttile_array[tx] = bx;
working_iloop_array[tx] = ix;
working_ttile_array[tx] += bx;
working_iloop_array[tx] += ix;

}
);
Expand All @@ -77,10 +79,6 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M)

} else { // zero-length segment

memset(static_cast<void*>(test_ttile_array), 0, sizeof(INDEX_TYPE) * data_len);

working_res.memcpy(working_ttile_array, test_ttile_array, sizeof(INDEX_TYPE) * data_len);

RAJA::launch<LAUNCH_POLICY>(
RAJA::LaunchParams(RAJA::Teams(blocks_x), RAJA::Threads(blocks_x)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {

Expand All @@ -102,6 +100,7 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M)

working_res.memcpy(check_ttile_array, working_ttile_array, sizeof(INDEX_TYPE) * data_len);
working_res.memcpy(check_iloop_array, working_iloop_array, sizeof(INDEX_TYPE) * data_len);
working_res.wait();

if (RAJA::stripIndexType(N) > 0) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,12 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M)
&check_iloop_array,
&test_iloop_array);

if ( RAJA::stripIndexType(N) > 0 ) {
std::iota(test_ttile_array, test_ttile_array + data_len, 0);
std::iota(test_iloop_array, test_iloop_array + data_len, 0);
working_res.memset(working_ttile_array, 0, sizeof(INDEX_TYPE) * data_len);
working_res.memset(working_iloop_array, 0, sizeof(INDEX_TYPE) * data_len);

std::iota(test_ttile_array, test_ttile_array + RAJA::stripIndexType(N), 0);
std::iota(test_iloop_array, test_iloop_array + RAJA::stripIndexType(N), 0);
if ( RAJA::stripIndexType(N) > 0 ) {

RAJA::launch<LAUNCH_POLICY>(
RAJA::LaunchParams(RAJA::Teams(blocks_x), RAJA::Threads(threads_x)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {
Expand All @@ -79,10 +81,6 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M)
);
} else { // zero-length segment

memset(static_cast<void*>(test_ttile_array), 0, sizeof(INDEX_TYPE) * data_len);

working_res.memcpy(working_ttile_array, test_ttile_array, sizeof(INDEX_TYPE) * data_len);

RAJA::launch<LAUNCH_POLICY>(
RAJA::LaunchParams(RAJA::Teams(blocks_x), RAJA::Threads(blocks_x)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {

Expand All @@ -105,6 +103,7 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M)

working_res.memcpy(check_ttile_array, working_ttile_array, sizeof(INDEX_TYPE) * data_len);
working_res.memcpy(check_iloop_array, working_iloop_array, sizeof(INDEX_TYPE) * data_len);
working_res.wait();

if (RAJA::stripIndexType(N) > 0) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,12 @@ void LaunchNestedTileUncheckedTestImpl(INDEX_TYPE M)
&check_iloop_array,
&test_iloop_array);


std::iota(test_ttile_array, test_ttile_array + RAJA::stripIndexType(N), 0);
std::iota(test_iloop_array, test_iloop_array + RAJA::stripIndexType(N), 0);
if ( data_len > 0 ) {
std::iota(test_ttile_array, test_ttile_array + data_len, 0);
std::iota(test_iloop_array, test_iloop_array + data_len, 0);
working_res.memset(working_ttile_array, 0, sizeof(INDEX_TYPE) * data_len);
working_res.memset(working_iloop_array, 0, sizeof(INDEX_TYPE) * data_len);
}

RAJA::launch<LAUNCH_POLICY>(
RAJA::LaunchParams(RAJA::Teams(blocks_x), RAJA::Threads(threads_x)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {
Expand All @@ -67,12 +70,11 @@ void LaunchNestedTileUncheckedTestImpl(INDEX_TYPE M)
}
);

if ( RAJA::stripIndexType(N) > 0 ) {

if ( data_len > 0 ) {
working_res.memcpy(check_ttile_array, working_ttile_array, sizeof(INDEX_TYPE) * data_len);
working_res.memcpy(check_iloop_array, working_iloop_array, sizeof(INDEX_TYPE) * data_len);

}
working_res.wait();

INDEX_TYPE idx = 0;
for (INDEX_TYPE bx = INDEX_TYPE(0); bx < blocks_x; ++bx) {
Expand Down

0 comments on commit fb1acb8

Please sign in to comment.