Skip to content

Commit

Permalink
TL/UCP: reduce_kn avg pre op fix (#449) (#454)
Browse files Browse the repository at this point in the history
* TL/UCP: reduce_kn avg pre op fix

* TL/UCP: code review fixes

Co-authored-by: valentin petrov <[email protected]>

Co-authored-by: valentin petrov <[email protected]>
  • Loading branch information
shimmybalsam and valentin petrov authored Apr 4, 2022
1 parent 20670f8 commit e38647f
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 15 deletions.
18 changes: 13 additions & 5 deletions src/components/tl/ucp/reduce/reduce.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,34 @@ ucc_status_t ucc_tl_ucp_reduce_init(ucc_tl_ucp_task_t *task)
ucc_rank_t vrank = (myrank - root + team_size) % team_size;
ucc_status_t status = UCC_OK;
ucc_memory_type_t mtype;
size_t data_size;
ucc_datatype_t dt;
size_t count, data_size;
int isleaf;
int self_avg;

if (root == myrank) {
data_size = args->dst.info.count * ucc_dt_size(args->dst.info.datatype);
count = args->dst.info.count;
dt = args->dst.info.datatype;
mtype = args->dst.info.mem_type;
} else {
data_size = args->src.info.count * ucc_dt_size(args->src.info.datatype);
count = args->src.info.count;
dt = args->src.info.datatype;
mtype = args->src.info.mem_type;
}
data_size = count * ucc_dt_size(dt);
task->super.post = ucc_tl_ucp_reduce_knomial_start;
task->super.progress = ucc_tl_ucp_reduce_knomial_progress;
task->super.finalize = ucc_tl_ucp_reduce_knomial_finalize;
task->reduce_kn.radix =
ucc_min(UCC_TL_UCP_TEAM_LIB(team)->cfg.reduce_kn_radix, team_size);
CALC_KN_TREE_DIST(team_size, task->reduce_kn.radix,
task->reduce_kn.max_dist);
isleaf = (vrank % task->reduce_kn.radix != 0 || vrank == team_size - 1);
isleaf = (vrank % task->reduce_kn.radix != 0 || vrank == team_size - 1);
self_avg = (vrank % task->reduce_kn.radix == 0 && args->op == UCC_OP_AVG &&
UCC_TL_UCP_TEAM_LIB(team)->cfg.reduce_avg_pre_op);
task->reduce_kn.scratch_mc_header = NULL;

if (!isleaf) {
if (!isleaf || self_avg) {
/* scratch of size radix to fit up to radix - 1 recieved vectors
from its children at each step,
and an additional 1 for previous step reduce multi result */
Expand All @@ -49,5 +56,6 @@ ucc_status_t ucc_tl_ucp_reduce_init(ucc_tl_ucp_task_t *task)
task->reduce_kn.scratch =
task->reduce_kn.scratch_mc_header->addr;
}

return status;
}
58 changes: 48 additions & 10 deletions src/components/tl/ucp/reduce/reduce_knomial.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,15 +124,37 @@ void ucc_tl_ucp_reduce_knomial_progress(ucc_coll_task_t *coll_task)

ucc_status_t ucc_tl_ucp_reduce_knomial_start(ucc_coll_task_t *coll_task)
{
ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, ucc_tl_ucp_task_t);
ucc_coll_args_t *args = &TASK_ARGS(task);
ucc_tl_ucp_team_t *team = TASK_TEAM(task);
uint32_t radix = task->reduce_kn.radix;
ucc_rank_t root = (ucc_rank_t)args->root;
ucc_rank_t rank = UCC_TL_TEAM_RANK(team);
ucc_rank_t size = UCC_TL_TEAM_SIZE(team);
ucc_rank_t vrank = (rank - root + size) % size;
int isleaf = ((vrank % radix != 0) || (vrank == size - 1));
ucc_tl_ucp_task_t *task =
ucc_derived_of(coll_task, ucc_tl_ucp_task_t);
ucc_coll_args_t *args = &TASK_ARGS(task);
ucc_tl_ucp_team_t *team = TASK_TEAM(task);
uint32_t radix = task->reduce_kn.radix;
ucc_rank_t root = (ucc_rank_t)args->root;
ucc_rank_t rank = UCC_TL_TEAM_RANK(team);
ucc_rank_t size = UCC_TL_TEAM_SIZE(team);
ucc_rank_t vrank = (rank - root + size) % size;
int isleaf =
(vrank % radix != 0 || vrank == size - 1);
int avg_pre_op =
UCC_TL_UCP_TEAM_LIB(team)->cfg.reduce_avg_pre_op;
int self_avg = (args->op == UCC_OP_AVG &&
avg_pre_op && vrank % radix == 0);
size_t data_size, count;
ucc_memory_type_t mtype;
ucc_datatype_t dt;
ucc_status_t status;

if (root == rank) {
count = args->dst.info.count;
dt = args->dst.info.datatype;
mtype = args->dst.info.mem_type;
} else {
count = args->src.info.count;
dt = args->src.info.datatype;
mtype = args->src.info.mem_type;
}
data_size = count * ucc_dt_size(dt);


UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_reduce_kn_start", 0);
ucc_tl_ucp_task_reset(task, UCC_INPROGRESS);
Expand All @@ -141,10 +163,26 @@ ucc_status_t ucc_tl_ucp_reduce_knomial_start(ucc_coll_task_t *coll_task)
args->src.info.buffer = args->dst.info.buffer;
}

if (isleaf) {
if (isleaf && !self_avg) {
task->reduce_kn.scratch = args->src.info.buffer;
}

if (isleaf && self_avg) {
/* In case of avg_pre_op, single leaf process which does not take part
in first iteration reduction must divide itself by team_size */
status = ucc_dt_reduce_multi_alpha(args->src.info.buffer,
args->src.info.buffer, task->reduce_kn.scratch, 1, count,
data_size, dt, UCC_OP_PROD,
(double)1 / (double)(UCC_TL_TEAM_SIZE(TASK_TEAM(task)) * 2),
mtype, args);
if (ucc_unlikely(UCC_OK != status)) {
tl_error(UCC_TASK_LIB(task),
"failed to perform dt reduction");
task->super.super.status = status;
return status;
}
}

task->reduce_kn.dist = 1;
task->reduce_kn.phase = UCC_REDUCE_KN_PHASE_INIT;

Expand Down

0 comments on commit e38647f

Please sign in to comment.