-
Notifications
You must be signed in to change notification settings - Fork 103
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DRAFT : Register memory #1010
base: master
Are you sure you want to change the base?
DRAFT : Register memory #1010
Changes from 8 commits
c6d49ef
43faffd
0441b48
a22f47c
44d21ea
e14d2b1
8ef02c3
3417737
6589983
4cbdc7e
00a9097
3ca742c
01879f9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
#include "coll_patterns/sra_knomial.h" | ||
#include "utils/ucc_math.h" | ||
#include "utils/ucc_coll_utils.h" | ||
#include <stdio.h> | ||
|
||
#define SAVE_STATE(_phase) \ | ||
do { \ | ||
|
@@ -50,6 +51,9 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) | |
args->root : 0; | ||
ucc_rank_t rank = VRANK(task->subset.myrank, broot, size); | ||
size_t local = GET_LOCAL_COUNT(args, size, rank); | ||
ucp_mem_h *mh_list = task->super.mh_list; | ||
int max_count = task->super.count_mh; | ||
int count_mh = 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BUG - this is a mistake, because u want to keep it when u go in and out from progress There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. right but so how do I initialize it ? |
||
void *sbuf; | ||
ptrdiff_t peer_seg_offset, local_seg_offset; | ||
ucc_rank_t peer, peer_dist; | ||
|
@@ -59,38 +63,52 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) | |
size_t extra_count; | ||
|
||
EXEC_TASK_TEST(UCC_KN_PHASE_INIT, "failed during ee task test", | ||
task->allgather_kn.etask); | ||
task->allgather_kn.etask = NULL; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. a BUG - you can't remove it. its needed |
||
task->allgather_kn.etask_linked_list_head->val); | ||
// EXEC_TASK_TEST_2("failed to copy data to user buffer", | ||
// task->allgather_kn.etask); | ||
task->allgather_kn.etask_linked_list_head->val = NULL; | ||
UCC_KN_GOTO_PHASE(task->allgather_kn.phase); | ||
if (KN_NODE_EXTRA == node_type) { | ||
peer = ucc_knomial_pattern_get_proxy(p, rank); | ||
if (p->type != KN_PATTERN_ALLGATHERX) { | ||
UCPCHECK_GOTO(ucc_tl_ucp_send_nb(task->allgather_kn.sbuf, | ||
UCPCHECK_GOTO(ucc_tl_ucp_send_nb_with_mem(task->allgather_kn.sbuf, | ||
local * dt_size, mem_type, | ||
ucc_ep_map_eval(task->subset.map, | ||
INV_VRANK(peer,broot,size)), | ||
team, task), | ||
team, task, mh_list[count_mh++]), | ||
task, out); | ||
if (count_mh >= max_count){ | ||
printf("count_mh is bigger than it should (%d >= %d).", count_mh, max_count); | ||
goto out; | ||
} | ||
} | ||
UCPCHECK_GOTO(ucc_tl_ucp_recv_nb(rbuf, data_size, mem_type, | ||
UCPCHECK_GOTO(ucc_tl_ucp_send_nb_with_mem(rbuf, data_size, mem_type, | ||
ucc_ep_map_eval(task->subset.map, | ||
INV_VRANK(peer,broot,size)), | ||
team, task), | ||
team, task, mh_list[count_mh++]), | ||
task, out); | ||
if (count_mh >= max_count){ | ||
printf("count_mh is bigger than it should (%d >= %d).", count_mh, max_count); | ||
goto out; | ||
} | ||
} | ||
if ((p->type != KN_PATTERN_ALLGATHERX) && (node_type == KN_NODE_PROXY)) { | ||
peer = ucc_knomial_pattern_get_extra(p, rank); | ||
extra_count = GET_LOCAL_COUNT(args, size, peer); | ||
peer = ucc_ep_map_eval(task->subset.map, peer); | ||
UCPCHECK_GOTO(ucc_tl_ucp_recv_nb(PTR_OFFSET(task->allgather_kn.sbuf, | ||
UCPCHECK_GOTO(ucc_tl_ucp_recv_nb_with_mem(PTR_OFFSET(task->allgather_kn.sbuf, | ||
local * dt_size), extra_count * dt_size, | ||
mem_type, peer, team, task), | ||
mem_type, peer, team, task, mh_list[count_mh++]), | ||
task, out); | ||
if (count_mh >= max_count){ | ||
printf("count_mh is bigger than it should (%d >= %d).", count_mh, max_count); | ||
goto out; | ||
} | ||
} | ||
|
||
UCC_KN_PHASE_EXTRA: | ||
if ((KN_NODE_EXTRA == node_type) || (KN_NODE_PROXY == node_type)) { | ||
if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) { | ||
if (UCC_INPROGRESS == ucc_tl_ucp_test_with_etasks(task)) { | ||
SAVE_STATE(UCC_KN_PHASE_EXTRA); | ||
return; | ||
} | ||
|
@@ -114,12 +132,16 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) | |
continue; | ||
} | ||
} | ||
UCPCHECK_GOTO(ucc_tl_ucp_send_nb(sbuf, local_seg_count * dt_size, | ||
UCPCHECK_GOTO(ucc_tl_ucp_send_nb_with_mem(sbuf, local_seg_count * dt_size, | ||
mem_type, | ||
ucc_ep_map_eval(task->subset.map, | ||
INV_VRANK(peer, broot, size)), | ||
team, task), | ||
team, task, mh_list[count_mh++]), | ||
task, out); | ||
if (count_mh >= max_count){ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is a function ucc_assert for that look what its doing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why u chose to out that check here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should I just raise an error maybe ? How ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if I use ucc_assert, I can't add my print right ? |
||
printf("count_mh is bigger than it should (%d >= %d).", count_mh, max_count); | ||
goto out; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not good here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you mean even with ucc_assert ? |
||
} | ||
} | ||
|
||
for (loop_step = 1; loop_step < radix; loop_step++) { | ||
|
@@ -137,15 +159,19 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) | |
} | ||
} | ||
UCPCHECK_GOTO( | ||
ucc_tl_ucp_recv_nb(PTR_OFFSET(rbuf, peer_seg_offset * dt_size), | ||
ucc_tl_ucp_recv_nb_with_mem(PTR_OFFSET(rbuf, peer_seg_offset * dt_size), | ||
peer_seg_count * dt_size, mem_type, | ||
ucc_ep_map_eval(task->subset.map, | ||
INV_VRANK(peer, broot, size)), | ||
team, task), | ||
team, task, mh_list[count_mh++]), | ||
task, out); | ||
if (count_mh >= max_count){ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above |
||
printf("count_mh is bigger than it should (%d >= %d).", count_mh, max_count); | ||
goto out; | ||
} | ||
} | ||
UCC_KN_PHASE_LOOP: | ||
if (UCC_INPROGRESS == ucc_tl_ucp_test_recv(task)) { | ||
if (UCC_INPROGRESS == ucc_tl_ucp_test_recv_with_etasks(task)) { | ||
SAVE_STATE(UCC_KN_PHASE_LOOP); | ||
return; | ||
} | ||
|
@@ -154,15 +180,19 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) | |
|
||
if (KN_NODE_PROXY == node_type) { | ||
peer = ucc_knomial_pattern_get_extra(p, rank); | ||
UCPCHECK_GOTO(ucc_tl_ucp_send_nb(args->dst.info.buffer, data_size, | ||
UCPCHECK_GOTO(ucc_tl_ucp_send_nb_with_mem(args->dst.info.buffer, data_size, | ||
mem_type, | ||
ucc_ep_map_eval(task->subset.map, | ||
INV_VRANK(peer, broot, size)), | ||
team, task), | ||
team, task, mh_list[count_mh++]), | ||
task, out); | ||
if (count_mh >= max_count){ | ||
printf("count_mh is bigger than it should (%d >= %d).", count_mh, max_count); | ||
goto out; | ||
} | ||
} | ||
UCC_KN_PHASE_PROXY: | ||
if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) { | ||
if (UCC_INPROGRESS == ucc_tl_ucp_test_with_etasks(task)) { | ||
SAVE_STATE(UCC_KN_PHASE_PROXY); | ||
return; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add in out |
||
|
@@ -193,7 +223,8 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task) | |
|
||
UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_allgather_kn_start", 0); | ||
ucc_tl_ucp_task_reset(task, UCC_INPROGRESS); | ||
task->allgather_kn.etask = NULL; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BUG - why removed? |
||
task->allgather_kn.etask_linked_list_head = (node_ucc_ee_executor_task_t *)malloc(sizeof(node_ucc_ee_executor_task_t)); | ||
task->allgather_kn.etask_linked_list_head->val = NULL; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. task->allgather_kn.etask_linked_list_head = NULL |
||
task->allgather_kn.phase = UCC_KN_PHASE_INIT; | ||
if (ct == UCC_COLL_TYPE_ALLGATHER) { | ||
ucc_kn_ag_pattern_init(size, rank, radix, args->dst.info.count, | ||
|
@@ -212,7 +243,7 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task) | |
eargs.copy.len = args->src.info.count * | ||
ucc_dt_size(args->src.info.datatype); | ||
status = ucc_ee_executor_task_post(exec, &eargs, | ||
&task->allgather_kn.etask); | ||
&task->allgather_kn.etask_linked_list_head->val); | ||
if (ucc_unlikely(status != UCC_OK)) { | ||
task->super.status = status; | ||
return status; | ||
|
@@ -234,6 +265,164 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task) | |
return ucc_progress_queue_enqueue(UCC_TL_CORE_CTX(team)->pq, &task->super); | ||
} | ||
|
||
void register_memory(ucc_coll_task_t *coll_task){ | ||
|
||
ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, | ||
ucc_tl_ucp_task_t); | ||
ucc_coll_args_t *args = &TASK_ARGS(task); | ||
ucc_tl_ucp_team_t *team = TASK_TEAM(task); | ||
ucc_kn_radix_t radix = task->allgather_kn.p.radix; | ||
uint8_t node_type = task->allgather_kn.p.node_type; | ||
ucc_knomial_pattern_t *p = &task->allgather_kn.p; | ||
void *rbuf = args->dst.info.buffer; | ||
ucc_memory_type_t mem_type = args->dst.info.mem_type; | ||
size_t count = args->dst.info.count; | ||
size_t dt_size = ucc_dt_size(args->dst.info.datatype); | ||
size_t data_size = count * dt_size; | ||
ucc_rank_t size = task->subset.map.ep_num; | ||
ucc_rank_t broot = args->coll_type == UCC_COLL_TYPE_BCAST ? | ||
args->root : 0; | ||
ucc_rank_t rank = VRANK(task->subset.myrank, broot, size); | ||
size_t local = GET_LOCAL_COUNT(args, size, rank); | ||
void *sbuf; | ||
ptrdiff_t peer_seg_offset, local_seg_offset; | ||
ucc_rank_t peer, peer_dist; | ||
ucc_kn_radix_t loop_step; | ||
size_t peer_seg_count, local_seg_count; | ||
// ucc_status_t status; | ||
size_t extra_count; | ||
|
||
ucc_tl_ucp_context_t *ctx = UCC_TL_UCP_TEAM_CTX(team); | ||
ucp_mem_map_params_t mmap_params; | ||
ucp_mem_h mh; | ||
int size_of_list = 6; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe start with 1; |
||
int count_mh = 0; | ||
ucp_mem_h *mh_list = (ucp_mem_h *)malloc(size_of_list * sizeof(ucp_mem_h)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. u never called free There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where should this be ? In the end of the register function ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in finalize There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also where ? |
||
|
||
mmap_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | | ||
UCP_MEM_MAP_PARAM_FIELD_LENGTH | | ||
UCP_MEM_MAP_PARAM_FIELD_MEMORY_TYPE; | ||
mmap_params.memory_type = ucc_memtype_to_ucs[mem_type]; | ||
|
||
// EXEC_TASK_TEST(UCC_KN_PHASE_INIT, "failed during ee task test", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could be removed |
||
// task->allgather_kn.etask_linked_list_head->val); | ||
// task->allgather_kn.etask_linked_list_head->val = NULL; | ||
// UCC_KN_GOTO_PHASE(task->allgather_kn.phase); | ||
if (KN_NODE_EXTRA == node_type) { | ||
if (p->type != KN_PATTERN_ALLGATHERX) { | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no need for all of the line spaces |
||
mmap_params.address = task->allgather_kn.sbuf; | ||
mmap_params.length = local * dt_size; | ||
UCPCHECK_GOTO(ucs_status_to_ucc_status(ucp_mem_map(ctx->worker.ucp_context, &mmap_params, &mh)), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dst is not important? ask Sergey There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dest isnt important |
||
task, out); | ||
if (count_mh == size_of_list){ | ||
size_of_list *= 2; | ||
mh_list = (ucp_mem_h *)realloc(mh_list, size_of_list * sizeof(ucp_mem_h)); | ||
} | ||
mh_list[count_mh++] = mh; | ||
} | ||
|
||
mmap_params.address = rbuf; | ||
mmap_params.length = data_size; | ||
UCPCHECK_GOTO(ucs_status_to_ucc_status(ucp_mem_map(ctx->worker.ucp_context, &mmap_params, &mh)), | ||
task, out); | ||
if (count_mh == size_of_list){ | ||
size_of_list *= 2; | ||
mh_list = (ucp_mem_h *)realloc(mh_list, size_of_list * sizeof(ucp_mem_h)); | ||
} | ||
mh_list[count_mh++] = mh; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make sure all cases are perfectly identical logic (double list size etc), try to create a macro if possible There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what isnt identical, I'm not sure to understand |
||
} | ||
if ((p->type != KN_PATTERN_ALLGATHERX) && (node_type == KN_NODE_PROXY)) { | ||
peer = ucc_knomial_pattern_get_extra(p, rank); | ||
extra_count = GET_LOCAL_COUNT(args, size, peer); | ||
peer = ucc_ep_map_eval(task->subset.map, peer); | ||
mmap_params.address = PTR_OFFSET(task->allgather_kn.sbuf, | ||
local * dt_size); | ||
mmap_params.length = extra_count * dt_size; | ||
UCPCHECK_GOTO(ucs_status_to_ucc_status(ucp_mem_map(ctx->worker.ucp_context, &mmap_params, &mh)), | ||
task, out); | ||
if (count_mh == size_of_list){ | ||
size_of_list *= 2; | ||
mh_list = (ucp_mem_h *)realloc(mh_list, size_of_list * sizeof(ucp_mem_h)); | ||
} | ||
mh_list[count_mh++] = mh; | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
while (!ucc_knomial_pattern_loop_done(p)) { | ||
ucc_kn_ag_pattern_peer_seg(rank, p, &local_seg_count, | ||
&local_seg_offset); | ||
sbuf = PTR_OFFSET(rbuf, local_seg_offset * dt_size); | ||
|
||
for (loop_step = radix - 1; loop_step > 0; loop_step--) { | ||
peer = ucc_knomial_pattern_get_loop_peer(p, rank, loop_step); | ||
if (peer == UCC_KN_PEER_NULL) | ||
continue; | ||
if (coll_task->bargs.args.coll_type == UCC_COLL_TYPE_BCAST) { | ||
peer_dist = ucc_knomial_calc_recv_dist(size - p->n_extra, | ||
ucc_knomial_pattern_loop_rank(p, peer), p->radix, 0); | ||
if (peer_dist < task->allgather_kn.recv_dist) { | ||
continue; | ||
} | ||
} | ||
mmap_params.address = sbuf; | ||
mmap_params.length = local_seg_count * dt_size; | ||
UCPCHECK_GOTO(ucs_status_to_ucc_status(ucp_mem_map(ctx->worker.ucp_context, &mmap_params, &mh)), | ||
task, out); | ||
if (count_mh == size_of_list){ | ||
size_of_list *= 2; | ||
mh_list = (ucp_mem_h *)realloc(mh_list, size_of_list * sizeof(ucp_mem_h)); | ||
} | ||
mh_list[count_mh++] = mh; | ||
} | ||
|
||
for (loop_step = 1; loop_step < radix; loop_step++) { | ||
peer = ucc_knomial_pattern_get_loop_peer(p, rank, loop_step); | ||
if (peer == UCC_KN_PEER_NULL) | ||
continue; | ||
ucc_kn_ag_pattern_peer_seg(peer, p, &peer_seg_count, | ||
&peer_seg_offset); | ||
|
||
if (coll_task->bargs.args.coll_type == UCC_COLL_TYPE_BCAST) { | ||
peer_dist = ucc_knomial_calc_recv_dist(size - p->n_extra, | ||
ucc_knomial_pattern_loop_rank(p, peer), p->radix, 0); | ||
if (peer_dist > task->allgather_kn.recv_dist) { | ||
continue; | ||
} | ||
} | ||
mmap_params.address = PTR_OFFSET(rbuf, peer_seg_offset * dt_size); | ||
mmap_params.length = peer_seg_count * dt_size; | ||
UCPCHECK_GOTO(ucs_status_to_ucc_status(ucp_mem_map(ctx->worker.ucp_context, &mmap_params, &mh)), | ||
task, out); | ||
if (count_mh == size_of_list){ | ||
size_of_list *= 2; | ||
mh_list = (ucp_mem_h *)realloc(mh_list, size_of_list * sizeof(ucp_mem_h)); | ||
} | ||
mh_list[count_mh++] = mh; | ||
} | ||
ucc_kn_ag_pattern_next_iter(p); | ||
} | ||
|
||
if (KN_NODE_PROXY == node_type) { | ||
mmap_params.address = args->dst.info.buffer; | ||
mmap_params.length = data_size; | ||
UCPCHECK_GOTO(ucs_status_to_ucc_status(ucp_mem_map(ctx->worker.ucp_context, &mmap_params, &mh)), | ||
task, out); | ||
if (count_mh == size_of_list){ | ||
size_of_list *= 2; | ||
mh_list = (ucp_mem_h *)realloc(mh_list, size_of_list * sizeof(ucp_mem_h)); | ||
} | ||
mh_list[count_mh++] = mh; | ||
} | ||
|
||
out: | ||
ucc_assert(UCC_TL_UCP_TASK_P2P_COMPLETE(task)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this needed here? |
||
task->super.status = UCC_OK; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this needed here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess no |
||
coll_task->mh_list = mh_list; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this will be changed to task->mh_list |
||
coll_task->count_mh = count_mh-1; | ||
UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_allgather_kn_done", 0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this needed here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know what this is doing, do you know ? |
||
|
||
} | ||
|
||
ucc_status_t ucc_tl_ucp_allgather_knomial_init_r( | ||
ucc_base_coll_args_t *coll_args, ucc_base_team_t *team, | ||
ucc_coll_task_t **task_h, ucc_kn_radix_t radix) | ||
|
@@ -242,7 +431,13 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_init_r( | |
ucc_tl_ucp_task_t *task; | ||
ucc_sbgp_t *sbgp; | ||
|
||
printf("USING NEW KNOMIAL"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add TODO remove later |
||
|
||
task = ucc_tl_ucp_init_task(coll_args, team); | ||
ucc_mpool_init(&task->allgather_kn.etask_node_mpool, 0, sizeof(node_ucc_ee_executor_task_t), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. u didn't check for status There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what should I do if status<0 ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. u never called mpool finalize There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didnt find anything called mpool finalize, what are you talking about ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ucc_mpool_cleanup in knomial finalize or similar There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see anything close to finalize inside allgather_knomial.c, is it elsewhere ? |
||
0, UCC_CACHE_LINE_SIZE, 16, UINT_MAX, NULL, | ||
tl_team->super.super.context->ucc_context->thread_mode, "etasks_linked_list_nodes"); | ||
|
||
if (tl_team->cfg.use_reordering && | ||
coll_args->args.coll_type == UCC_COLL_TYPE_ALLREDUCE) { | ||
sbgp = ucc_topo_get_sbgp(tl_team->topo, UCC_SBGP_FULL_HOST_ORDERED); | ||
|
@@ -251,6 +446,7 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_init_r( | |
} | ||
task->allgather_kn.p.radix = radix; | ||
task->super.flags |= UCC_COLL_TASK_FLAG_EXECUTOR; | ||
register_memory(&task->super); | ||
task->super.post = ucc_tl_ucp_allgather_knomial_start; | ||
task->super.progress = ucc_tl_ucp_allgather_knomial_progress; | ||
*task_h = &task->super; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
change val to etask