-
Notifications
You must be signed in to change notification settings - Fork 103
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DRAFT : Register memory #1010
base: master
Are you sure you want to change the base?
DRAFT : Register memory #1010
Changes from 9 commits
c6d49ef
43faffd
0441b48
a22f47c
44d21ea
e14d2b1
8ef02c3
3417737
6589983
4cbdc7e
00a9097
3ca742c
01879f9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
#include "coll_patterns/sra_knomial.h" | ||
#include "utils/ucc_math.h" | ||
#include "utils/ucc_coll_utils.h" | ||
#include <stdio.h> | ||
|
||
#define SAVE_STATE(_phase) \ | ||
do { \ | ||
|
@@ -50,6 +51,9 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) | |
args->root : 0; | ||
ucc_rank_t rank = VRANK(task->subset.myrank, broot, size); | ||
size_t local = GET_LOCAL_COUNT(args, size, rank); | ||
ucp_mem_h *mh_list = task->mh_list; | ||
int max_count = task->count_mh; | ||
int count_mh = 0; | ||
void *sbuf; | ||
ptrdiff_t peer_seg_offset, local_seg_offset; | ||
ucc_rank_t peer, peer_dist; | ||
|
@@ -59,38 +63,51 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) | |
size_t extra_count; | ||
|
||
EXEC_TASK_TEST(UCC_KN_PHASE_INIT, "failed during ee task test", | ||
task->allgather_kn.etask); | ||
task->allgather_kn.etask = NULL; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. a BUG - you can't remove it. its needed |
||
task->allgather_kn.etask_linked_list_head->etask); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove |
||
// task->allgather_kn.etask_linked_list_head = NULL; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove |
||
UCC_KN_GOTO_PHASE(task->allgather_kn.phase); | ||
if (KN_NODE_EXTRA == node_type) { | ||
peer = ucc_knomial_pattern_get_proxy(p, rank); | ||
if (p->type != KN_PATTERN_ALLGATHERX) { | ||
UCPCHECK_GOTO(ucc_tl_ucp_send_nb(task->allgather_kn.sbuf, | ||
UCPCHECK_GOTO(ucc_tl_ucp_send_nb_with_mem(task->allgather_kn.sbuf, | ||
local * dt_size, mem_type, | ||
ucc_ep_map_eval(task->subset.map, | ||
INV_VRANK(peer,broot,size)), | ||
team, task), | ||
team, task, mh_list[count_mh++]), | ||
task, out); | ||
if (count_mh >= max_count){ | ||
printf("count_mh is bigger than it should (%d >= %d).", count_mh, max_count); | ||
goto out; | ||
} | ||
} | ||
UCPCHECK_GOTO(ucc_tl_ucp_recv_nb(rbuf, data_size, mem_type, | ||
UCPCHECK_GOTO(ucc_tl_ucp_send_nb_with_mem(rbuf, data_size, mem_type, | ||
ucc_ep_map_eval(task->subset.map, | ||
INV_VRANK(peer,broot,size)), | ||
team, task), | ||
team, task, mh_list[count_mh++]), | ||
task, out); | ||
if (count_mh >= max_count){ | ||
printf("count_mh is bigger than it should (%d >= %d).", count_mh, max_count); | ||
goto out; | ||
} | ||
} | ||
if ((p->type != KN_PATTERN_ALLGATHERX) && (node_type == KN_NODE_PROXY)) { | ||
peer = ucc_knomial_pattern_get_extra(p, rank); | ||
extra_count = GET_LOCAL_COUNT(args, size, peer); | ||
peer = ucc_ep_map_eval(task->subset.map, peer); | ||
UCPCHECK_GOTO(ucc_tl_ucp_recv_nb(PTR_OFFSET(task->allgather_kn.sbuf, | ||
UCPCHECK_GOTO(ucc_tl_ucp_recv_nb_with_mem(PTR_OFFSET(task->allgather_kn.sbuf, | ||
local * dt_size), extra_count * dt_size, | ||
mem_type, peer, team, task), | ||
mem_type, peer, team, task, mh_list[count_mh++]), | ||
task, out); | ||
if (count_mh >= max_count){ | ||
printf("count_mh is bigger than it should (%d >= %d).", count_mh, max_count); | ||
goto out; | ||
} | ||
} | ||
|
||
UCC_KN_PHASE_EXTRA: | ||
if ((KN_NODE_EXTRA == node_type) || (KN_NODE_PROXY == node_type)) { | ||
if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) { | ||
if (UCC_INPROGRESS == ucc_tl_ucp_test_with_etasks(task)) { | ||
SAVE_STATE(UCC_KN_PHASE_EXTRA); | ||
return; | ||
} | ||
|
@@ -114,12 +131,16 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) | |
continue; | ||
} | ||
} | ||
UCPCHECK_GOTO(ucc_tl_ucp_send_nb(sbuf, local_seg_count * dt_size, | ||
UCPCHECK_GOTO(ucc_tl_ucp_send_nb_with_mem(sbuf, local_seg_count * dt_size, | ||
mem_type, | ||
ucc_ep_map_eval(task->subset.map, | ||
INV_VRANK(peer, broot, size)), | ||
team, task), | ||
team, task, mh_list[count_mh++]), | ||
task, out); | ||
if (count_mh >= max_count){ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is a function ucc_assert for that look what its doing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why u chose to out that check here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should I just raise an error maybe ? How ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if I use ucc_assert, I can't add my print right ? |
||
printf("count_mh is bigger than it should (%d >= %d).", count_mh, max_count); | ||
goto out; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not good here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you mean even with ucc_assert ? |
||
} | ||
} | ||
|
||
for (loop_step = 1; loop_step < radix; loop_step++) { | ||
|
@@ -137,15 +158,19 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) | |
} | ||
} | ||
UCPCHECK_GOTO( | ||
ucc_tl_ucp_recv_nb(PTR_OFFSET(rbuf, peer_seg_offset * dt_size), | ||
ucc_tl_ucp_recv_nb_with_mem(PTR_OFFSET(rbuf, peer_seg_offset * dt_size), | ||
peer_seg_count * dt_size, mem_type, | ||
ucc_ep_map_eval(task->subset.map, | ||
INV_VRANK(peer, broot, size)), | ||
team, task), | ||
team, task, mh_list[count_mh++]), | ||
task, out); | ||
if (count_mh >= max_count){ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above |
||
printf("count_mh is bigger than it should (%d >= %d).", count_mh, max_count); | ||
goto out; | ||
} | ||
} | ||
UCC_KN_PHASE_LOOP: | ||
if (UCC_INPROGRESS == ucc_tl_ucp_test_recv(task)) { | ||
if (UCC_INPROGRESS == ucc_tl_ucp_test_recv_with_etasks(task)) { | ||
SAVE_STATE(UCC_KN_PHASE_LOOP); | ||
return; | ||
} | ||
|
@@ -154,15 +179,19 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) | |
|
||
if (KN_NODE_PROXY == node_type) { | ||
peer = ucc_knomial_pattern_get_extra(p, rank); | ||
UCPCHECK_GOTO(ucc_tl_ucp_send_nb(args->dst.info.buffer, data_size, | ||
UCPCHECK_GOTO(ucc_tl_ucp_send_nb_with_mem(args->dst.info.buffer, data_size, | ||
mem_type, | ||
ucc_ep_map_eval(task->subset.map, | ||
INV_VRANK(peer, broot, size)), | ||
team, task), | ||
team, task, mh_list[count_mh++]), | ||
task, out); | ||
if (count_mh >= max_count){ | ||
printf("count_mh is bigger than it should (%d >= %d).", count_mh, max_count); | ||
goto out; | ||
} | ||
} | ||
UCC_KN_PHASE_PROXY: | ||
if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) { | ||
if (UCC_INPROGRESS == ucc_tl_ucp_test_with_etasks(task)) { | ||
SAVE_STATE(UCC_KN_PHASE_PROXY); | ||
return; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add in out |
||
|
@@ -193,7 +222,6 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task) | |
|
||
UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_allgather_kn_start", 0); | ||
ucc_tl_ucp_task_reset(task, UCC_INPROGRESS); | ||
task->allgather_kn.etask = NULL; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BUG - why removed? |
||
task->allgather_kn.phase = UCC_KN_PHASE_INIT; | ||
if (ct == UCC_COLL_TYPE_ALLGATHER) { | ||
ucc_kn_ag_pattern_init(size, rank, radix, args->dst.info.count, | ||
|
@@ -212,7 +240,7 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task) | |
eargs.copy.len = args->src.info.count * | ||
ucc_dt_size(args->src.info.datatype); | ||
status = ucc_ee_executor_task_post(exec, &eargs, | ||
&task->allgather_kn.etask); | ||
&task->allgather_kn.etask_linked_list_head->etask); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not good, same BUG as above |
||
if (ucc_unlikely(status != UCC_OK)) { | ||
task->super.status = status; | ||
return status; | ||
|
@@ -234,6 +262,183 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task) | |
return ucc_progress_queue_enqueue(UCC_TL_CORE_CTX(team)->pq, &task->super); | ||
} | ||
|
||
void register_memory(ucc_coll_task_t *coll_task){ | ||
|
||
ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, | ||
ucc_tl_ucp_task_t); | ||
ucc_coll_args_t *args = &TASK_ARGS(task); | ||
ucc_tl_ucp_team_t *team = TASK_TEAM(task); | ||
ucc_kn_radix_t radix = task->allgather_kn.p.radix; | ||
uint8_t node_type = task->allgather_kn.p.node_type; | ||
ucc_knomial_pattern_t *p = &task->allgather_kn.p; | ||
void *rbuf = args->dst.info.buffer; | ||
ucc_memory_type_t mem_type = args->dst.info.mem_type; | ||
size_t count = args->dst.info.count; | ||
size_t dt_size = ucc_dt_size(args->dst.info.datatype); | ||
size_t data_size = count * dt_size; | ||
ucc_rank_t size = task->subset.map.ep_num; | ||
ucc_rank_t broot = args->coll_type == UCC_COLL_TYPE_BCAST ? | ||
args->root : 0; | ||
ucc_rank_t rank = VRANK(task->subset.myrank, broot, size); | ||
size_t local = GET_LOCAL_COUNT(args, size, rank); | ||
void *sbuf; | ||
ptrdiff_t peer_seg_offset, local_seg_offset; | ||
ucc_rank_t peer, peer_dist; | ||
ucc_kn_radix_t loop_step; | ||
size_t peer_seg_count, local_seg_count; | ||
ucc_status_t status; | ||
size_t extra_count; | ||
|
||
ucc_tl_ucp_context_t *ctx = UCC_TL_UCP_TEAM_CTX(team); | ||
ucp_mem_map_params_t mmap_params; | ||
ucp_mem_h mh; | ||
int size_of_list = 1; | ||
int count_mh = 0; | ||
ucp_mem_h *mh_list = (ucp_mem_h *)malloc(size_of_list * sizeof(ucp_mem_h)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. u never called free There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where should this be ? In the end of the register function ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in finalize There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also where ? |
||
|
||
mmap_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | | ||
UCP_MEM_MAP_PARAM_FIELD_LENGTH | | ||
UCP_MEM_MAP_PARAM_FIELD_MEMORY_TYPE; | ||
mmap_params.memory_type = ucc_memtype_to_ucs[mem_type]; | ||
|
||
if (KN_NODE_EXTRA == node_type) { | ||
if (p->type != KN_PATTERN_ALLGATHERX) { | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no need for all of the line spaces |
||
mmap_params.address = task->allgather_kn.sbuf; | ||
mmap_params.length = local * dt_size; | ||
status = ucp_mem_map(ctx->worker.ucp_context, &mmap_params, &mh); | ||
if (UCC_OK != status) { | ||
task->super.status = status; | ||
return; | ||
} | ||
if (count_mh == size_of_list){ | ||
size_of_list *= 2; | ||
mh_list = (ucp_mem_h *)realloc(mh_list, size_of_list * sizeof(ucp_mem_h)); | ||
} | ||
mh_list[count_mh++] = mh; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what happended to putting it in function please? or macro? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes I tried but had bug, I'll show you |
||
} | ||
|
||
mmap_params.address = rbuf; | ||
mmap_params.length = data_size; | ||
status = ucp_mem_map(ctx->worker.ucp_context, &mmap_params, &mh); | ||
if (UCC_OK != status) { | ||
task->super.status = status; | ||
return; | ||
} | ||
if (count_mh == size_of_list){ | ||
size_of_list *= 2; | ||
mh_list = (ucp_mem_h *)realloc(mh_list, size_of_list * sizeof(ucp_mem_h)); | ||
} | ||
mh_list[count_mh++] = mh; | ||
} | ||
if ((p->type != KN_PATTERN_ALLGATHERX) && (node_type == KN_NODE_PROXY)) { | ||
peer = ucc_knomial_pattern_get_extra(p, rank); | ||
extra_count = GET_LOCAL_COUNT(args, size, peer); | ||
peer = ucc_ep_map_eval(task->subset.map, peer); | ||
mmap_params.address = PTR_OFFSET(task->allgather_kn.sbuf, | ||
local * dt_size); | ||
mmap_params.length = extra_count * dt_size; | ||
status = ucp_mem_map(ctx->worker.ucp_context, &mmap_params, &mh); | ||
if (UCC_OK != status) { | ||
task->super.status = status; | ||
return; | ||
} | ||
if (count_mh == size_of_list){ | ||
size_of_list *= 2; | ||
mh_list = (ucp_mem_h *)realloc(mh_list, size_of_list * sizeof(ucp_mem_h)); | ||
} | ||
mh_list[count_mh++] = mh; | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
if ((KN_NODE_EXTRA == node_type) || (KN_NODE_PROXY == node_type)) { | ||
if (KN_NODE_EXTRA == node_type) { | ||
goto out; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there shouldn't be any use of OUT in register_memory, its needed only in progress function, not here which is blocking There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but before there was this : UCC_KN_PHASE_EXTRA:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here : e14d2b1#r1727153646 |
||
} | ||
} | ||
while (!ucc_knomial_pattern_loop_done(p)) { | ||
ucc_kn_ag_pattern_peer_seg(rank, p, &local_seg_count, | ||
&local_seg_offset); | ||
sbuf = PTR_OFFSET(rbuf, local_seg_offset * dt_size); | ||
|
||
for (loop_step = radix - 1; loop_step > 0; loop_step--) { | ||
peer = ucc_knomial_pattern_get_loop_peer(p, rank, loop_step); | ||
if (peer == UCC_KN_PEER_NULL) | ||
continue; | ||
if (coll_task->bargs.args.coll_type == UCC_COLL_TYPE_BCAST) { | ||
peer_dist = ucc_knomial_calc_recv_dist(size - p->n_extra, | ||
ucc_knomial_pattern_loop_rank(p, peer), p->radix, 0); | ||
if (peer_dist < task->allgather_kn.recv_dist) { | ||
continue; | ||
} | ||
} | ||
mmap_params.address = sbuf; | ||
mmap_params.length = local_seg_count * dt_size; | ||
status = ucp_mem_map(ctx->worker.ucp_context, &mmap_params, &mh); | ||
if (UCC_OK != status) { | ||
task->super.status = status; | ||
return; | ||
} | ||
if (count_mh == size_of_list){ | ||
size_of_list *= 2; | ||
mh_list = (ucp_mem_h *)realloc(mh_list, size_of_list * sizeof(ucp_mem_h)); | ||
} | ||
mh_list[count_mh++] = mh; | ||
} | ||
|
||
for (loop_step = 1; loop_step < radix; loop_step++) { | ||
peer = ucc_knomial_pattern_get_loop_peer(p, rank, loop_step); | ||
if (peer == UCC_KN_PEER_NULL) | ||
continue; | ||
ucc_kn_ag_pattern_peer_seg(peer, p, &peer_seg_count, | ||
&peer_seg_offset); | ||
|
||
if (coll_task->bargs.args.coll_type == UCC_COLL_TYPE_BCAST) { | ||
peer_dist = ucc_knomial_calc_recv_dist(size - p->n_extra, | ||
ucc_knomial_pattern_loop_rank(p, peer), p->radix, 0); | ||
if (peer_dist > task->allgather_kn.recv_dist) { | ||
continue; | ||
} | ||
} | ||
mmap_params.address = PTR_OFFSET(rbuf, peer_seg_offset * dt_size); | ||
mmap_params.length = peer_seg_count * dt_size; | ||
status = ucp_mem_map(ctx->worker.ucp_context, &mmap_params, &mh); | ||
if (UCC_OK != status) { | ||
task->super.status = status; | ||
return; | ||
} | ||
if (count_mh == size_of_list){ | ||
size_of_list *= 2; | ||
mh_list = (ucp_mem_h *)realloc(mh_list, size_of_list * sizeof(ucp_mem_h)); | ||
} | ||
mh_list[count_mh++] = mh; | ||
} | ||
ucc_kn_ag_pattern_next_iter(p); | ||
} | ||
|
||
if (KN_NODE_PROXY == node_type) { | ||
mmap_params.address = args->dst.info.buffer; | ||
mmap_params.length = data_size; | ||
status = ucp_mem_map(ctx->worker.ucp_context, &mmap_params, &mh); | ||
if (UCC_OK != status) { | ||
task->super.status = status; | ||
return; | ||
} | ||
if (count_mh == size_of_list){ | ||
size_of_list *= 2; | ||
mh_list = (ucp_mem_h *)realloc(mh_list, size_of_list * sizeof(ucp_mem_h)); | ||
} | ||
mh_list[count_mh++] = mh; | ||
} | ||
|
||
out: | ||
ucc_assert(UCC_TL_UCP_TASK_P2P_COMPLETE(task)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this needed here? |
||
task->super.status = UCC_OK; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this needed here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess no |
||
task->mh_list = mh_list; | ||
task->count_mh = count_mh-1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. because I'm always increamenting count_mh after using it so the last one is by 1 to high |
||
UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_allgather_kn_done", 0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this needed here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know what this is doing, do you know ? |
||
|
||
} | ||
|
||
ucc_status_t ucc_tl_ucp_allgather_knomial_init_r( | ||
ucc_base_coll_args_t *coll_args, ucc_base_team_t *team, | ||
ucc_coll_task_t **task_h, ucc_kn_radix_t radix) | ||
|
@@ -242,13 +447,21 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_init_r( | |
ucc_tl_ucp_task_t *task; | ||
ucc_sbgp_t *sbgp; | ||
|
||
printf("USING NEW KNOMIAL"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add TODO remove later |
||
|
||
task = ucc_tl_ucp_init_task(coll_args, team); | ||
ucc_mpool_init(&task->allgather_kn.etask_node_mpool, 0, sizeof(node_ucc_ee_executor_task_t), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. u didn't check for status There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what should I do if status<0 ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. u never called mpool finalize There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didnt find anything called mpool finalize, what are you talking about ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ucc_mpool_cleanup in knomial finalize or similar There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see anything close to finalize inside allgather_knomial.c, is it elsewhere ? |
||
0, UCC_CACHE_LINE_SIZE, 16, UINT_MAX, NULL, | ||
tl_team->super.super.context->ucc_context->thread_mode, "etasks_linked_list_nodes"); | ||
|
||
if (tl_team->cfg.use_reordering && | ||
coll_args->args.coll_type == UCC_COLL_TYPE_ALLREDUCE) { | ||
sbgp = ucc_topo_get_sbgp(tl_team->topo, UCC_SBGP_FULL_HOST_ORDERED); | ||
task->subset.myrank = sbgp->group_rank; | ||
task->subset.map = sbgp->map; | ||
} | ||
register_memory(&task->super); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. u didn't check for status |
||
task->allgather_kn.etask_linked_list_head = NULL; | ||
task->allgather_kn.p.radix = radix; | ||
task->super.flags |= UCC_COLL_TASK_FLAG_EXECUTOR; | ||
task->super.post = ucc_tl_ucp_allgather_knomial_start; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BUG - this is a mistake, because u want to keep it when u go in and out from progress
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
right but so how do I initialize it ?