17#include "kmp_wait_release.h"
18#include "kmp_taskdeps.h"
21#include "ompt-specific.h"
25static void __kmp_enable_tasking(kmp_task_team_t *task_team,
26 kmp_info_t *this_thr);
27static void __kmp_alloc_task_deque(kmp_info_t *thread,
28 kmp_thread_data_t *thread_data);
29static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
30 kmp_task_team_t *task_team);
31static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
33#ifdef BUILD_TIED_TASK_STACK
42static void __kmp_trace_task_stack(kmp_int32 gtid,
43 kmp_thread_data_t *thread_data,
44 int threshold,
char *location) {
45 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
46 kmp_taskdata_t **stack_top = task_stack->ts_top;
47 kmp_int32 entries = task_stack->ts_entries;
48 kmp_taskdata_t *tied_task;
52 (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
53 "first_block = %p, stack_top = %p \n",
54 location, gtid, entries, task_stack->ts_first_block, stack_top));
56 KMP_DEBUG_ASSERT(stack_top != NULL);
57 KMP_DEBUG_ASSERT(entries > 0);
59 while (entries != 0) {
60 KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
62 if (entries & TASK_STACK_INDEX_MASK == 0) {
63 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
65 stack_block = stack_block->sb_prev;
66 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
73 tied_task = *stack_top;
75 KMP_DEBUG_ASSERT(tied_task != NULL);
76 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
79 (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
80 "stack_top=%p, tied_task=%p\n",
81 location, gtid, entries, stack_top, tied_task));
83 KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
86 (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
96static void __kmp_init_task_stack(kmp_int32 gtid,
97 kmp_thread_data_t *thread_data) {
98 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
99 kmp_stack_block_t *first_block;
102 first_block = &task_stack->ts_first_block;
103 task_stack->ts_top = (kmp_taskdata_t **)first_block;
104 memset((
void *)first_block,
'\0',
105 TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
108 task_stack->ts_entries = TASK_STACK_EMPTY;
109 first_block->sb_next = NULL;
110 first_block->sb_prev = NULL;
117static void __kmp_free_task_stack(kmp_int32 gtid,
118 kmp_thread_data_t *thread_data) {
119 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
120 kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
122 KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
124 while (stack_block != NULL) {
125 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
127 stack_block->sb_next = NULL;
128 stack_block->sb_prev = NULL;
129 if (stack_block != &task_stack->ts_first_block) {
130 __kmp_thread_free(thread,
133 stack_block = next_block;
136 task_stack->ts_entries = 0;
137 task_stack->ts_top = NULL;
146static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
147 kmp_taskdata_t *tied_task) {
149 kmp_thread_data_t *thread_data =
150 &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
151 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
153 if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
157 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
158 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
161 (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
162 gtid, thread, tied_task));
164 *(task_stack->ts_top) = tied_task;
167 task_stack->ts_top++;
168 task_stack->ts_entries++;
170 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
172 kmp_stack_block_t *stack_block =
173 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
176 if (stack_block->sb_next !=
178 task_stack->ts_top = &stack_block->sb_next->sb_block[0];
180 kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
181 thread,
sizeof(kmp_stack_block_t));
183 task_stack->ts_top = &new_block->sb_block[0];
184 stack_block->sb_next = new_block;
185 new_block->sb_prev = stack_block;
186 new_block->sb_next = NULL;
190 (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
191 gtid, tied_task, new_block));
194 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
205static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
206 kmp_taskdata_t *ending_task) {
208 kmp_thread_data_t *thread_data =
209 &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
210 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
211 kmp_taskdata_t *tied_task;
213 if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
218 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
219 KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
221 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
225 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
226 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
228 stack_block = stack_block->sb_prev;
229 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
233 task_stack->ts_top--;
234 task_stack->ts_entries--;
236 tied_task = *(task_stack->ts_top);
238 KMP_DEBUG_ASSERT(tied_task != NULL);
239 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
240 KMP_DEBUG_ASSERT(tied_task == ending_task);
242 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
251static bool __kmp_task_is_allowed(
int gtid,
const kmp_int32 is_constrained,
252 const kmp_taskdata_t *tasknew,
253 const kmp_taskdata_t *taskcurr) {
254 if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) {
258 kmp_taskdata_t *current = taskcurr->td_last_tied;
259 KMP_DEBUG_ASSERT(current != NULL);
261 if (current->td_flags.tasktype == TASK_EXPLICIT ||
262 current->td_taskwait_thread > 0) {
263 kmp_int32 level = current->td_level;
264 kmp_taskdata_t *parent = tasknew->td_parent;
265 while (parent != current && parent->td_level > level) {
267 parent = parent->td_parent;
268 KMP_DEBUG_ASSERT(parent != NULL);
270 if (parent != current)
275 kmp_depnode_t *node = tasknew->td_depnode;
276 if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
277 for (
int i = 0; i < node->dn.mtx_num_locks; ++i) {
278 KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
279 if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
282 for (
int j = i - 1; j >= 0; --j)
283 __kmp_release_lock(node->dn.mtx_locks[j], gtid);
287 node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
296static void __kmp_realloc_task_deque(kmp_info_t *thread,
297 kmp_thread_data_t *thread_data) {
298 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
299 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size);
300 kmp_int32 new_size = 2 * size;
302 KE_TRACE(10, (
"__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
303 "%d] for thread_data %p\n",
304 __kmp_gtid_from_thread(thread), size, new_size, thread_data));
306 kmp_taskdata_t **new_deque =
307 (kmp_taskdata_t **)__kmp_allocate(new_size *
sizeof(kmp_taskdata_t *));
310 for (i = thread_data->td.td_deque_head, j = 0; j < size;
311 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
312 new_deque[j] = thread_data->td.td_deque[i];
314 __kmp_free(thread_data->td.td_deque);
316 thread_data->td.td_deque_head = 0;
317 thread_data->td.td_deque_tail = size;
318 thread_data->td.td_deque = new_deque;
319 thread_data->td.td_deque_size = new_size;
322static kmp_task_pri_t *__kmp_alloc_task_pri_list() {
323 kmp_task_pri_t *l = (kmp_task_pri_t *)__kmp_allocate(
sizeof(kmp_task_pri_t));
324 kmp_thread_data_t *thread_data = &l->td;
325 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
326 thread_data->td.td_deque_last_stolen = -1;
327 KE_TRACE(20, (
"__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "
328 "for thread_data %p\n",
329 __kmp_get_gtid(), INITIAL_TASK_DEQUE_SIZE, thread_data));
330 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
331 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
332 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
341static kmp_thread_data_t *
342__kmp_get_priority_deque_data(kmp_task_team_t *task_team, kmp_int32 pri) {
343 kmp_thread_data_t *thread_data;
344 kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
345 if (lst->priority == pri) {
347 thread_data = &lst->td;
348 }
else if (lst->priority < pri) {
351 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
352 thread_data = &list->td;
353 list->priority = pri;
355 task_team->tt.tt_task_pri_list = list;
357 kmp_task_pri_t *next_queue = lst->next;
358 while (next_queue && next_queue->priority > pri) {
360 next_queue = lst->next;
363 if (next_queue == NULL) {
365 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
366 thread_data = &list->td;
367 list->priority = pri;
370 }
else if (next_queue->priority == pri) {
372 thread_data = &next_queue->td;
375 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
376 thread_data = &list->td;
377 list->priority = pri;
378 list->next = next_queue;
386static kmp_int32 __kmp_push_priority_task(kmp_int32 gtid, kmp_info_t *thread,
387 kmp_taskdata_t *taskdata,
388 kmp_task_team_t *task_team,
390 kmp_thread_data_t *thread_data = NULL;
392 (
"__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n",
393 gtid, taskdata, pri));
396 kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
397 if (UNLIKELY(lst == NULL)) {
398 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
399 if (task_team->tt.tt_task_pri_list == NULL) {
401 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
402 thread_data = &list->td;
403 list->priority = pri;
405 task_team->tt.tt_task_pri_list = list;
408 thread_data = __kmp_get_priority_deque_data(task_team, pri);
410 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
412 if (lst->priority == pri) {
414 thread_data = &lst->td;
416 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
417 thread_data = __kmp_get_priority_deque_data(task_team, pri);
418 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
421 KMP_DEBUG_ASSERT(thread_data);
423 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
425 if (TCR_4(thread_data->td.td_deque_ntasks) >=
426 TASK_DEQUE_SIZE(thread_data->td)) {
427 if (__kmp_enable_task_throttling &&
428 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
429 thread->th.th_current_task)) {
430 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
431 KA_TRACE(20, (
"__kmp_push_priority_task: T#%d deque is full; returning "
432 "TASK_NOT_PUSHED for task %p\n",
434 return TASK_NOT_PUSHED;
437 __kmp_realloc_task_deque(thread, thread_data);
440 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
441 TASK_DEQUE_SIZE(thread_data->td));
443 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
445 thread_data->td.td_deque_tail =
446 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
447 TCW_4(thread_data->td.td_deque_ntasks,
448 TCR_4(thread_data->td.td_deque_ntasks) + 1);
449 KMP_FSYNC_RELEASING(thread->th.th_current_task);
450 KMP_FSYNC_RELEASING(taskdata);
451 KA_TRACE(20, (
"__kmp_push_priority_task: T#%d returning "
452 "TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n",
453 gtid, taskdata, thread_data->td.td_deque_ntasks,
454 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
455 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
456 task_team->tt.tt_num_task_pri++;
457 return TASK_SUCCESSFULLY_PUSHED;
461static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
462 kmp_info_t *thread = __kmp_threads[gtid];
463 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
468 if (UNLIKELY(taskdata->td_flags.hidden_helper &&
469 !KMP_HIDDEN_HELPER_THREAD(gtid))) {
470 kmp_int32 shadow_gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
471 __kmpc_give_task(task, __kmp_tid_from_gtid(shadow_gtid));
473 __kmp_hidden_helper_worker_thread_signal();
474 return TASK_SUCCESSFULLY_PUSHED;
477 kmp_task_team_t *task_team = thread->th.th_task_team;
478 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
479 kmp_thread_data_t *thread_data;
482 (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
484 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
487 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
488 KMP_DEBUG_USE_VAR(counter);
491 (
"__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
492 gtid, counter, taskdata));
496 if (UNLIKELY(taskdata->td_flags.task_serial)) {
497 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning "
498 "TASK_NOT_PUSHED for task %p\n",
500 return TASK_NOT_PUSHED;
505 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
506 if (UNLIKELY(!KMP_TASKING_ENABLED(task_team))) {
507 __kmp_enable_tasking(task_team, thread);
509 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
510 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
512 if (taskdata->td_flags.priority_specified && task->data2.priority > 0 &&
513 __kmp_max_task_priority > 0) {
514 int pri = KMP_MIN(task->data2.priority, __kmp_max_task_priority);
515 return __kmp_push_priority_task(gtid, thread, taskdata, task_team, pri);
519 thread_data = &task_team->tt.tt_threads_data[tid];
524 if (UNLIKELY(thread_data->td.td_deque == NULL)) {
525 __kmp_alloc_task_deque(thread, thread_data);
530 if (TCR_4(thread_data->td.td_deque_ntasks) >=
531 TASK_DEQUE_SIZE(thread_data->td)) {
532 if (__kmp_enable_task_throttling &&
533 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
534 thread->th.th_current_task)) {
535 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning "
536 "TASK_NOT_PUSHED for task %p\n",
538 return TASK_NOT_PUSHED;
540 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
542 if (TCR_4(thread_data->td.td_deque_ntasks) >=
543 TASK_DEQUE_SIZE(thread_data->td)) {
545 __kmp_realloc_task_deque(thread, thread_data);
551 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
553 if (TCR_4(thread_data->td.td_deque_ntasks) >=
554 TASK_DEQUE_SIZE(thread_data->td)) {
555 if (__kmp_enable_task_throttling &&
556 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
557 thread->th.th_current_task)) {
558 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
559 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; "
560 "returning TASK_NOT_PUSHED for task %p\n",
562 return TASK_NOT_PUSHED;
565 __kmp_realloc_task_deque(thread, thread_data);
570 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
571 TASK_DEQUE_SIZE(thread_data->td));
573 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
576 thread_data->td.td_deque_tail =
577 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
578 TCW_4(thread_data->td.td_deque_ntasks,
579 TCR_4(thread_data->td.td_deque_ntasks) + 1);
580 KMP_FSYNC_RELEASING(thread->th.th_current_task);
581 KMP_FSYNC_RELEASING(taskdata);
582 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
583 "task=%p ntasks=%d head=%u tail=%u\n",
584 gtid, taskdata, thread_data->td.td_deque_ntasks,
585 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
587 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
589 return TASK_SUCCESSFULLY_PUSHED;
596void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
597 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(enter): T#%d "
598 "this_thread=%p, curtask=%p, "
599 "curtask_parent=%p\n",
600 0, this_thr, this_thr->th.th_current_task,
601 this_thr->th.th_current_task->td_parent));
603 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
605 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(exit): T#%d "
606 "this_thread=%p, curtask=%p, "
607 "curtask_parent=%p\n",
608 0, this_thr, this_thr->th.th_current_task,
609 this_thr->th.th_current_task->td_parent));
618void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
622 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
625 tid, this_thr, this_thr->th.th_current_task,
626 team->t.t_implicit_task_taskdata[tid].td_parent));
628 KMP_DEBUG_ASSERT(this_thr != NULL);
631 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
632 team->t.t_implicit_task_taskdata[0].td_parent =
633 this_thr->th.th_current_task;
634 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
637 team->t.t_implicit_task_taskdata[tid].td_parent =
638 team->t.t_implicit_task_taskdata[0].td_parent;
639 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
642 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
645 tid, this_thr, this_thr->th.th_current_task,
646 team->t.t_implicit_task_taskdata[tid].td_parent));
654static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
655 kmp_taskdata_t *current_task) {
656 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
657 kmp_info_t *thread = __kmp_threads[gtid];
660 (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
661 gtid, taskdata, current_task));
663 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
668 current_task->td_flags.executing = 0;
671#ifdef BUILD_TIED_TASK_STACK
672 if (taskdata->td_flags.tiedness == TASK_TIED) {
673 __kmp_push_task_stack(gtid, thread, taskdata);
678 thread->th.th_current_task = taskdata;
680 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
681 taskdata->td_flags.tiedness == TASK_UNTIED);
682 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
683 taskdata->td_flags.tiedness == TASK_UNTIED);
684 taskdata->td_flags.started = 1;
685 taskdata->td_flags.executing = 1;
686 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
687 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
694 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
705static inline void __ompt_task_init(kmp_taskdata_t *task,
int tid) {
707 task->ompt_task_info.task_data.value = 0;
708 task->ompt_task_info.frame.exit_frame = ompt_data_none;
709 task->ompt_task_info.frame.enter_frame = ompt_data_none;
710 task->ompt_task_info.frame.exit_frame_flags =
711 ompt_frame_runtime | ompt_frame_framepointer;
712 task->ompt_task_info.frame.enter_frame_flags =
713 ompt_frame_runtime | ompt_frame_framepointer;
714 task->ompt_task_info.dispatch_chunk.start = 0;
715 task->ompt_task_info.dispatch_chunk.iterations = 0;
720static inline void __ompt_task_start(kmp_task_t *task,
721 kmp_taskdata_t *current_task,
723 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
724 ompt_task_status_t status = ompt_task_switch;
725 if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
726 status = ompt_task_yield;
727 __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
730 if (ompt_enabled.ompt_callback_task_schedule) {
731 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
732 &(current_task->ompt_task_info.task_data), status,
733 &(taskdata->ompt_task_info.task_data));
735 taskdata->ompt_task_info.scheduling_parent = current_task;
740static inline void __ompt_task_finish(kmp_task_t *task,
741 kmp_taskdata_t *resumed_task,
742 ompt_task_status_t status) {
743 if (ompt_enabled.ompt_callback_task_schedule) {
744 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
745 if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
746 taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
747 status = ompt_task_cancel;
751 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
752 &(taskdata->ompt_task_info.task_data), status,
753 (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
759static void __kmpc_omp_task_begin_if0_template(
ident_t *loc_ref, kmp_int32 gtid,
762 void *return_address) {
763 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
764 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
766 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
768 gtid, loc_ref, taskdata, current_task));
770 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
773 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
774 KMP_DEBUG_USE_VAR(counter);
775 KA_TRACE(20, (
"__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
776 "incremented for task %p\n",
777 gtid, counter, taskdata));
780 taskdata->td_flags.task_serial =
782 __kmp_task_start(gtid, task, current_task);
786 if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
787 current_task->ompt_task_info.frame.enter_frame.ptr =
788 taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
789 current_task->ompt_task_info.frame.enter_frame_flags =
790 taskdata->ompt_task_info.frame.exit_frame_flags =
791 ompt_frame_application | ompt_frame_framepointer;
793 if (ompt_enabled.ompt_callback_task_create) {
794 ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
795 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
796 &(parent_info->task_data), &(parent_info->frame),
797 &(taskdata->ompt_task_info.task_data),
798 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0,
801 __ompt_task_start(task, current_task, gtid);
805 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
811static void __kmpc_omp_task_begin_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
814 void *return_address) {
815 __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
826void __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid,
829 if (UNLIKELY(ompt_enabled.enabled)) {
830 OMPT_STORE_RETURN_ADDRESS(gtid);
831 __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
832 OMPT_GET_FRAME_ADDRESS(1),
833 OMPT_LOAD_RETURN_ADDRESS(gtid));
837 __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
843void __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
844 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
848 (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
849 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
851 __kmp_task_start(gtid, task, current_task);
853 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
854 loc_ref, KMP_TASK_TO_TASKDATA(task)));
864static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
865 kmp_info_t *thread) {
866 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n", gtid,
870 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
871 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
872 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
873 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
874 KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||
875 taskdata->td_flags.task_serial == 1);
876 KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0);
877 kmp_task_t *task = KMP_TASKDATA_TO_TASK(taskdata);
879 task->data1.destructors = NULL;
880 task->data2.priority = 0;
882 taskdata->td_flags.freed = 1;
885 __kmp_fast_free(thread, taskdata);
887 __kmp_thread_free(thread, taskdata);
889 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
898static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
899 kmp_taskdata_t *taskdata,
900 kmp_info_t *thread) {
903 kmp_int32 team_serial =
904 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
905 !taskdata->td_flags.proxy;
906 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
908 kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
909 KMP_DEBUG_ASSERT(children >= 0);
912 while (children == 0) {
913 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
915 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
916 "and freeing itself\n",
920 __kmp_free_task(gtid, taskdata, thread);
922 taskdata = parent_taskdata;
928 if (taskdata->td_flags.tasktype == TASK_IMPLICIT) {
929 if (taskdata->td_dephash) {
930 int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks);
931 kmp_tasking_flags_t flags_old = taskdata->td_flags;
932 if (children == 0 && flags_old.complete == 1) {
933 kmp_tasking_flags_t flags_new = flags_old;
934 flags_new.complete = 0;
935 if (KMP_COMPARE_AND_STORE_ACQ32(
936 RCAST(kmp_int32 *, &taskdata->td_flags),
937 *RCAST(kmp_int32 *, &flags_old),
938 *RCAST(kmp_int32 *, &flags_new))) {
939 KA_TRACE(100, (
"__kmp_free_task_and_ancestors: T#%d cleans "
940 "dephash of implicit task %p\n",
943 __kmp_dephash_free_entries(thread, taskdata->td_dephash);
950 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
951 KMP_DEBUG_ASSERT(children >= 0);
955 20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
956 "not freeing it yet\n",
957 gtid, taskdata, children));
968static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
969 kmp_tasking_flags_t flags = taskdata->td_flags;
970 bool ret = !(flags.team_serial || flags.tasking_ser);
971 ret = ret || flags.proxy == TASK_PROXY ||
972 flags.detachable == TASK_DETACHABLE || flags.hidden_helper;
974 KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0;
988static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
989 kmp_taskdata_t *resumed_task) {
990 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
991 kmp_info_t *thread = __kmp_threads[gtid];
992 kmp_task_team_t *task_team =
993 thread->th.th_task_team;
995 kmp_int32 children = 0;
997 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming "
999 gtid, taskdata, resumed_task));
1001 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
1004#ifdef BUILD_TIED_TASK_STACK
1005 if (taskdata->td_flags.tiedness == TASK_TIED) {
1006 __kmp_pop_task_stack(gtid, thread, taskdata);
1010 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
1013 kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1;
1016 (
"__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
1017 gtid, counter, taskdata));
1021 if (resumed_task == NULL) {
1022 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
1023 resumed_task = taskdata->td_parent;
1026 thread->th.th_current_task = resumed_task;
1027 resumed_task->td_flags.executing = 1;
1028 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d partially done task %p, "
1029 "resuming task %p\n",
1030 gtid, taskdata, resumed_task));
1038 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
1039 taskdata->td_flags.task_serial);
1040 if (taskdata->td_flags.task_serial) {
1041 if (resumed_task == NULL) {
1042 resumed_task = taskdata->td_parent;
1046 KMP_DEBUG_ASSERT(resumed_task !=
1056 if (UNLIKELY(taskdata->td_flags.destructors_thunk)) {
1057 kmp_routine_entry_t destr_thunk = task->data1.destructors;
1058 KMP_ASSERT(destr_thunk);
1059 destr_thunk(gtid, task);
1062 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
1063 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
1064 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
1066 bool detach =
false;
1067 if (UNLIKELY(taskdata->td_flags.detachable == TASK_DETACHABLE)) {
1068 if (taskdata->td_allow_completion_event.type ==
1069 KMP_EVENT_ALLOW_COMPLETION) {
1071 __kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
1072 if (taskdata->td_allow_completion_event.type ==
1073 KMP_EVENT_ALLOW_COMPLETION) {
1075 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
1076 taskdata->td_flags.executing = 0;
1083 __ompt_task_finish(task, resumed_task, ompt_task_detach);
1089 taskdata->td_flags.proxy = TASK_PROXY;
1092 __kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
1097 taskdata->td_flags.complete = 1;
1102 __ompt_task_finish(task, resumed_task, ompt_task_complete);
1106 if (__kmp_track_children_task(taskdata)) {
1107 __kmp_release_deps(gtid, taskdata);
1112 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
1113 KMP_DEBUG_ASSERT(children >= 0);
1114 if (taskdata->td_taskgroup)
1115 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
1116 }
else if (task_team && (task_team->tt.tt_found_proxy_tasks ||
1117 task_team->tt.tt_hidden_helper_task_encountered)) {
1120 __kmp_release_deps(gtid, taskdata);
1126 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
1127 taskdata->td_flags.executing = 0;
1131 20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
1132 gtid, taskdata, children));
1138 thread->th.th_current_task = resumed_task;
1140 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
1144 resumed_task->td_flags.executing = 1;
1147 10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
1148 gtid, taskdata, resumed_task));
1154static void __kmpc_omp_task_complete_if0_template(
ident_t *loc_ref,
1157 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
1158 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
1159 KMP_DEBUG_ASSERT(gtid >= 0);
1161 __kmp_task_finish<ompt>(gtid, task, NULL);
1163 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
1164 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
1168 ompt_frame_t *ompt_frame;
1169 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1170 ompt_frame->enter_frame = ompt_data_none;
1171 ompt_frame->enter_frame_flags =
1172 ompt_frame_runtime | ompt_frame_framepointer;
1181void __kmpc_omp_task_complete_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
1183 __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
1192void __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid,
1195 if (UNLIKELY(ompt_enabled.enabled)) {
1196 __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
1200 __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
1206void __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid,
1208 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
1209 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1211 __kmp_task_finish<false>(gtid, task,
1214 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
1215 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1231void __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr,
1232 kmp_team_t *team,
int tid,
int set_curr_task) {
1233 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
1237 (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
1238 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE"));
1240 task->td_task_id = KMP_GEN_TASK_ID();
1241 task->td_team = team;
1244 task->td_ident = loc_ref;
1245 task->td_taskwait_ident = NULL;
1246 task->td_taskwait_counter = 0;
1247 task->td_taskwait_thread = 0;
1249 task->td_flags.tiedness = TASK_TIED;
1250 task->td_flags.tasktype = TASK_IMPLICIT;
1251 task->td_flags.proxy = TASK_FULL;
1254 task->td_flags.task_serial = 1;
1255 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1256 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1258 task->td_flags.started = 1;
1259 task->td_flags.executing = 1;
1260 task->td_flags.complete = 0;
1261 task->td_flags.freed = 0;
1263 task->td_depnode = NULL;
1264 task->td_last_tied = task;
1265 task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1267 if (set_curr_task) {
1268 KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0);
1270 KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0);
1271 task->td_taskgroup = NULL;
1272 task->td_dephash = NULL;
1273 __kmp_push_current_task_to_thread(this_thr, team, tid);
1275 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
1276 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
1280 if (UNLIKELY(ompt_enabled.enabled))
1281 __ompt_task_init(task, tid);
1284 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
1293void __kmp_finish_implicit_task(kmp_info_t *thread) {
1294 kmp_taskdata_t *task = thread->th.th_current_task;
1295 if (task->td_dephash) {
1297 task->td_flags.complete = 1;
1298 children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks);
1299 kmp_tasking_flags_t flags_old = task->td_flags;
1300 if (children == 0 && flags_old.complete == 1) {
1301 kmp_tasking_flags_t flags_new = flags_old;
1302 flags_new.complete = 0;
1303 if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags),
1304 *RCAST(kmp_int32 *, &flags_old),
1305 *RCAST(kmp_int32 *, &flags_new))) {
1306 KA_TRACE(100, (
"__kmp_finish_implicit_task: T#%d cleans "
1307 "dephash of implicit task %p\n",
1308 thread->th.th_info.ds.ds_gtid, task));
1309 __kmp_dephash_free_entries(thread, task->td_dephash);
1319void __kmp_free_implicit_task(kmp_info_t *thread) {
1320 kmp_taskdata_t *task = thread->th.th_current_task;
1321 if (task && task->td_dephash) {
1322 __kmp_dephash_free(thread, task->td_dephash);
1323 task->td_dephash = NULL;
1329static size_t __kmp_round_up_to_val(
size_t size,
size_t val) {
1330 if (size & (val - 1)) {
1332 if (size <= KMP_SIZE_T_MAX - val) {
1351kmp_task_t *__kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1352 kmp_tasking_flags_t *flags,
1353 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1354 kmp_routine_entry_t task_entry) {
1356 kmp_taskdata_t *taskdata;
1357 kmp_info_t *thread = __kmp_threads[gtid];
1358 kmp_team_t *team = thread->th.th_team;
1359 kmp_taskdata_t *parent_task = thread->th.th_current_task;
1360 size_t shareds_offset;
1362 if (UNLIKELY(!TCR_4(__kmp_init_middle)))
1363 __kmp_middle_initialize();
1365 if (flags->hidden_helper) {
1366 if (__kmp_enable_hidden_helper) {
1367 if (!TCR_4(__kmp_init_hidden_helper))
1368 __kmp_hidden_helper_initialize();
1371 flags->hidden_helper = FALSE;
1375 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
1376 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1377 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
1378 sizeof_shareds, task_entry));
1380 KMP_DEBUG_ASSERT(parent_task);
1381 if (parent_task->td_flags.final) {
1382 if (flags->merged_if0) {
1387 if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
1391 KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
1397 if (UNLIKELY(flags->proxy == TASK_PROXY ||
1398 flags->detachable == TASK_DETACHABLE || flags->hidden_helper)) {
1399 if (flags->proxy == TASK_PROXY) {
1400 flags->tiedness = TASK_UNTIED;
1401 flags->merged_if0 = 1;
1405 if ((thread->th.th_task_team) == NULL) {
1408 KMP_DEBUG_ASSERT(team->t.t_serialized);
1410 (
"T#%d creating task team in __kmp_task_alloc for proxy task\n",
1413 __kmp_task_team_setup(thread, team, 1);
1414 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
1416 kmp_task_team_t *task_team = thread->th.th_task_team;
1419 if (!KMP_TASKING_ENABLED(task_team)) {
1422 (
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
1423 __kmp_enable_tasking(task_team, thread);
1424 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1425 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
1427 if (thread_data->td.td_deque == NULL) {
1428 __kmp_alloc_task_deque(thread, thread_data);
1432 if ((flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE) &&
1433 task_team->tt.tt_found_proxy_tasks == FALSE)
1434 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
1435 if (flags->hidden_helper &&
1436 task_team->tt.tt_hidden_helper_task_encountered == FALSE)
1437 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, TRUE);
1442 shareds_offset =
sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
1443 shareds_offset = __kmp_round_up_to_val(shareds_offset,
sizeof(
void *));
1446 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
1448 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
1453 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
1456 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
1460 task = KMP_TASKDATA_TO_TASK(taskdata);
1463#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
1464 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(
double) - 1)) == 0);
1465 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(
double) - 1)) == 0);
1467 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(_Quad) - 1)) == 0);
1468 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(_Quad) - 1)) == 0);
1470 if (sizeof_shareds > 0) {
1472 task->shareds = &((
char *)taskdata)[shareds_offset];
1474 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
1477 task->shareds = NULL;
1479 task->routine = task_entry;
1482 taskdata->td_task_id = KMP_GEN_TASK_ID();
1483 taskdata->td_team = thread->th.th_team;
1484 taskdata->td_alloc_thread = thread;
1485 taskdata->td_parent = parent_task;
1486 taskdata->td_level = parent_task->td_level + 1;
1487 KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
1488 taskdata->td_ident = loc_ref;
1489 taskdata->td_taskwait_ident = NULL;
1490 taskdata->td_taskwait_counter = 0;
1491 taskdata->td_taskwait_thread = 0;
1492 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
1494 if (flags->proxy == TASK_FULL)
1495 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
1497 taskdata->td_flags = *flags;
1498 taskdata->td_task_team = thread->th.th_task_team;
1499 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1500 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1503 if (flags->hidden_helper) {
1504 kmp_info_t *shadow_thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
1505 taskdata->td_team = shadow_thread->th.th_team;
1506 taskdata->td_task_team = shadow_thread->th.th_task_team;
1510 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1513 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1519 taskdata->td_flags.task_serial =
1520 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1521 taskdata->td_flags.tasking_ser || flags->merged_if0);
1523 taskdata->td_flags.started = 0;
1524 taskdata->td_flags.executing = 0;
1525 taskdata->td_flags.complete = 0;
1526 taskdata->td_flags.freed = 0;
1528 KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
1530 KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
1531 taskdata->td_taskgroup =
1532 parent_task->td_taskgroup;
1533 taskdata->td_dephash = NULL;
1534 taskdata->td_depnode = NULL;
1535 if (flags->tiedness == TASK_UNTIED)
1536 taskdata->td_last_tied = NULL;
1538 taskdata->td_last_tied = taskdata;
1539 taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1541 if (UNLIKELY(ompt_enabled.enabled))
1542 __ompt_task_init(taskdata, gtid);
1546 if (__kmp_track_children_task(taskdata)) {
1547 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
1548 if (parent_task->td_taskgroup)
1549 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
1552 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
1553 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
1555 if (flags->hidden_helper) {
1556 taskdata->td_flags.task_serial = FALSE;
1558 KMP_ATOMIC_INC(&__kmp_unexecuted_hidden_helper_tasks);
1562 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1563 gtid, taskdata, taskdata->td_parent));
1568kmp_task_t *__kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1569 kmp_int32 flags,
size_t sizeof_kmp_task_t,
1570 size_t sizeof_shareds,
1571 kmp_routine_entry_t task_entry) {
1573 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1574 __kmp_assert_valid_gtid(gtid);
1575 input_flags->native = FALSE;
1577 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
1578 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1579 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1580 input_flags->proxy ?
"proxy" :
"",
1581 input_flags->detachable ?
"detachable" :
"", sizeof_kmp_task_t,
1582 sizeof_shareds, task_entry));
1584 retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1585 sizeof_shareds, task_entry);
1587 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
1592kmp_task_t *__kmpc_omp_target_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1594 size_t sizeof_kmp_task_t,
1595 size_t sizeof_shareds,
1596 kmp_routine_entry_t task_entry,
1597 kmp_int64 device_id) {
1598 auto &input_flags =
reinterpret_cast<kmp_tasking_flags_t &
>(flags);
1600 input_flags.tiedness = TASK_UNTIED;
1602 if (__kmp_enable_hidden_helper)
1603 input_flags.hidden_helper = TRUE;
1605 return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
1606 sizeof_shareds, task_entry);
1624 kmp_task_t *new_task, kmp_int32 naffins,
1625 kmp_task_affinity_info_t *affin_list) {
1634static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1635 kmp_taskdata_t *current_task) {
1636 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1640 30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1641 gtid, taskdata, current_task));
1642 KMP_DEBUG_ASSERT(task);
1643 if (UNLIKELY(taskdata->td_flags.proxy == TASK_PROXY &&
1644 taskdata->td_flags.complete == 1)) {
1649 (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1652 __kmp_bottom_half_finish_proxy(gtid, task);
1654 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for "
1655 "proxy task %p, resuming task %p\n",
1656 gtid, taskdata, current_task));
1664 ompt_thread_info_t oldInfo;
1665 if (UNLIKELY(ompt_enabled.enabled)) {
1667 thread = __kmp_threads[gtid];
1668 oldInfo = thread->th.ompt_thread_info;
1669 thread->th.ompt_thread_info.wait_id = 0;
1670 thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
1671 ? ompt_state_work_serial
1672 : ompt_state_work_parallel;
1673 taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1678 if (taskdata->td_flags.hidden_helper) {
1680 KMP_ASSERT(KMP_HIDDEN_HELPER_THREAD(gtid));
1681 KMP_ATOMIC_DEC(&__kmp_unexecuted_hidden_helper_tasks);
1685 if (taskdata->td_flags.proxy != TASK_PROXY) {
1686 __kmp_task_start(gtid, task, current_task);
1692 if (UNLIKELY(__kmp_omp_cancellation)) {
1693 thread = __kmp_threads[gtid];
1694 kmp_team_t *this_team = thread->th.th_team;
1695 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1696 if ((taskgroup && taskgroup->cancel_request) ||
1697 (this_team->t.t_cancel_request == cancel_parallel)) {
1698#if OMPT_SUPPORT && OMPT_OPTIONAL
1699 ompt_data_t *task_data;
1700 if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
1701 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
1702 ompt_callbacks.ompt_callback(ompt_callback_cancel)(
1704 ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
1705 : ompt_cancel_parallel) |
1706 ompt_cancel_discarded_task,
1719 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
1720 taskdata->td_last_tied = current_task->td_last_tied;
1721 KMP_DEBUG_ASSERT(taskdata->td_last_tied);
1723#if KMP_STATS_ENABLED
1725 switch (KMP_GET_THREAD_STATE()) {
1726 case FORK_JOIN_BARRIER:
1727 KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
1730 KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
1733 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
1736 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
1739 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
1742 KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
1749 if (UNLIKELY(ompt_enabled.enabled))
1750 __ompt_task_start(task, current_task, gtid);
1752#if OMPT_SUPPORT && OMPT_OPTIONAL
1753 if (UNLIKELY(ompt_enabled.ompt_callback_dispatch &&
1754 taskdata->ompt_task_info.dispatch_chunk.iterations > 0)) {
1755 ompt_data_t instance = ompt_data_none;
1756 instance.ptr = &(taskdata->ompt_task_info.dispatch_chunk);
1757 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1758 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
1759 &(team_info->parallel_data), &(taskdata->ompt_task_info.task_data),
1760 ompt_dispatch_taskloop_chunk, instance);
1761 taskdata->ompt_task_info.dispatch_chunk = {0, 0};
1766 if (ompd_state & OMPD_ENABLE_BP)
1767 ompd_bp_task_begin();
1770#if USE_ITT_BUILD && USE_ITT_NOTIFY
1771 kmp_uint64 cur_time;
1772 kmp_int32 kmp_itt_count_task =
1773 __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
1774 current_task->td_flags.tasktype == TASK_IMPLICIT;
1775 if (kmp_itt_count_task) {
1776 thread = __kmp_threads[gtid];
1778 if (thread->th.th_bar_arrive_time)
1779 cur_time = __itt_get_timestamp();
1781 kmp_itt_count_task = 0;
1783 KMP_FSYNC_ACQUIRED(taskdata);
1786 if (task->routine != NULL) {
1787#ifdef KMP_GOMP_COMPAT
1788 if (taskdata->td_flags.native) {
1789 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1793 (*(task->routine))(gtid, task);
1796 KMP_POP_PARTITIONED_TIMER();
1798#if USE_ITT_BUILD && USE_ITT_NOTIFY
1799 if (kmp_itt_count_task) {
1801 thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1803 KMP_FSYNC_CANCEL(taskdata);
1804 KMP_FSYNC_RELEASING(taskdata->td_parent);
1809 if (ompd_state & OMPD_ENABLE_BP)
1814 if (taskdata->td_flags.proxy != TASK_PROXY) {
1816 if (UNLIKELY(ompt_enabled.enabled)) {
1817 thread->th.ompt_thread_info = oldInfo;
1818 if (taskdata->td_flags.tiedness == TASK_TIED) {
1819 taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1821 __kmp_task_finish<true>(gtid, task, current_task);
1824 __kmp_task_finish<false>(gtid, task, current_task);
1829 (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1830 gtid, taskdata, current_task));
1844kmp_int32 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid,
1845 kmp_task_t *new_task) {
1846 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1848 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
1849 loc_ref, new_taskdata));
1852 kmp_taskdata_t *parent;
1853 if (UNLIKELY(ompt_enabled.enabled)) {
1854 parent = new_taskdata->td_parent;
1855 if (ompt_enabled.ompt_callback_task_create) {
1856 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1857 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
1858 &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0,
1859 OMPT_GET_RETURN_ADDRESS(0));
1867 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1869 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1870 new_taskdata->td_flags.task_serial = 1;
1871 __kmp_invoke_task(gtid, new_task, current_task);
1876 (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1877 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
1878 gtid, loc_ref, new_taskdata));
1881 if (UNLIKELY(ompt_enabled.enabled)) {
1882 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1885 return TASK_CURRENT_NOT_QUEUED;
1899kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
1900 bool serialize_immediate) {
1901 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1905 if (new_taskdata->td_flags.proxy == TASK_PROXY ||
1906 __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1908 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1909 if (serialize_immediate)
1910 new_taskdata->td_flags.task_serial = 1;
1911 __kmp_invoke_task(gtid, new_task, current_task);
1912 }
else if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
1913 __kmp_wpolicy_passive) {
1914 kmp_info_t *this_thr = __kmp_threads[gtid];
1915 kmp_team_t *team = this_thr->th.th_team;
1916 kmp_int32 nthreads = this_thr->th.th_team_nproc;
1917 for (
int i = 0; i < nthreads; ++i) {
1918 kmp_info_t *thread = team->t.t_threads[i];
1919 if (thread == this_thr)
1921 if (thread->th.th_sleep_loc != NULL) {
1922 __kmp_null_resume_wrapper(thread);
1927 return TASK_CURRENT_NOT_QUEUED;
1942kmp_int32 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid,
1943 kmp_task_t *new_task) {
1945 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1947#if KMP_DEBUG || OMPT_SUPPORT
1948 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1950 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1952 __kmp_assert_valid_gtid(gtid);
1955 kmp_taskdata_t *parent = NULL;
1956 if (UNLIKELY(ompt_enabled.enabled)) {
1957 if (!new_taskdata->td_flags.started) {
1958 OMPT_STORE_RETURN_ADDRESS(gtid);
1959 parent = new_taskdata->td_parent;
1960 if (!parent->ompt_task_info.frame.enter_frame.ptr) {
1961 parent->ompt_task_info.frame.enter_frame.ptr =
1962 OMPT_GET_FRAME_ADDRESS(0);
1964 if (ompt_enabled.ompt_callback_task_create) {
1965 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1966 &(parent->ompt_task_info.task_data),
1967 &(parent->ompt_task_info.frame),
1968 &(new_taskdata->ompt_task_info.task_data),
1969 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1970 OMPT_LOAD_RETURN_ADDRESS(gtid));
1975 __ompt_task_finish(new_task,
1976 new_taskdata->ompt_task_info.scheduling_parent,
1978 new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1983 res = __kmp_omp_task(gtid, new_task,
true);
1985 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning "
1986 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1987 gtid, loc_ref, new_taskdata));
1989 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1990 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
2009kmp_int32 __kmp_omp_taskloop_task(
ident_t *loc_ref, kmp_int32 gtid,
2010 kmp_task_t *new_task,
void *codeptr_ra) {
2012 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
2014#if KMP_DEBUG || OMPT_SUPPORT
2015 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
2017 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
2021 kmp_taskdata_t *parent = NULL;
2022 if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
2023 parent = new_taskdata->td_parent;
2024 if (!parent->ompt_task_info.frame.enter_frame.ptr)
2025 parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2026 if (ompt_enabled.ompt_callback_task_create) {
2027 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
2028 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
2029 &(new_taskdata->ompt_task_info.task_data),
2030 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
2036 res = __kmp_omp_task(gtid, new_task,
true);
2038 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning "
2039 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
2040 gtid, loc_ref, new_taskdata));
2042 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
2043 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
2050static kmp_int32 __kmpc_omp_taskwait_template(
ident_t *loc_ref, kmp_int32 gtid,
2051 void *frame_address,
2052 void *return_address) {
2053 kmp_taskdata_t *taskdata =
nullptr;
2055 int thread_finished = FALSE;
2056 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
2058 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
2059 KMP_DEBUG_ASSERT(gtid >= 0);
2061 if (__kmp_tasking_mode != tskm_immediate_exec) {
2062 thread = __kmp_threads[gtid];
2063 taskdata = thread->th.th_current_task;
2065#if OMPT_SUPPORT && OMPT_OPTIONAL
2066 ompt_data_t *my_task_data;
2067 ompt_data_t *my_parallel_data;
2070 my_task_data = &(taskdata->ompt_task_info.task_data);
2071 my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
2073 taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
2075 if (ompt_enabled.ompt_callback_sync_region) {
2076 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2077 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
2078 my_task_data, return_address);
2081 if (ompt_enabled.ompt_callback_sync_region_wait) {
2082 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2083 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
2084 my_task_data, return_address);
2094 taskdata->td_taskwait_counter += 1;
2095 taskdata->td_taskwait_ident = loc_ref;
2096 taskdata->td_taskwait_thread = gtid + 1;
2099 void *itt_sync_obj = NULL;
2101 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2106 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
2108 must_wait = must_wait || (thread->th.th_task_team != NULL &&
2109 thread->th.th_task_team->tt.tt_found_proxy_tasks);
2113 (__kmp_enable_hidden_helper && thread->th.th_task_team != NULL &&
2114 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered);
2117 kmp_flag_32<false, false> flag(
2118 RCAST(std::atomic<kmp_uint32> *,
2119 &(taskdata->td_incomplete_child_tasks)),
2121 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
2122 flag.execute_tasks(thread, gtid, FALSE,
2123 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2124 __kmp_task_stealing_constraint);
2128 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2129 KMP_FSYNC_ACQUIRED(taskdata);
2134 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2136#if OMPT_SUPPORT && OMPT_OPTIONAL
2138 if (ompt_enabled.ompt_callback_sync_region_wait) {
2139 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2140 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
2141 my_task_data, return_address);
2143 if (ompt_enabled.ompt_callback_sync_region) {
2144 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2145 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
2146 my_task_data, return_address);
2148 taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
2154 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
2155 "returning TASK_CURRENT_NOT_QUEUED\n",
2158 return TASK_CURRENT_NOT_QUEUED;
2161#if OMPT_SUPPORT && OMPT_OPTIONAL
2163static kmp_int32 __kmpc_omp_taskwait_ompt(
ident_t *loc_ref, kmp_int32 gtid,
2164 void *frame_address,
2165 void *return_address) {
2166 return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
2173kmp_int32 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid) {
2174#if OMPT_SUPPORT && OMPT_OPTIONAL
2175 if (UNLIKELY(ompt_enabled.enabled)) {
2176 OMPT_STORE_RETURN_ADDRESS(gtid);
2177 return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0),
2178 OMPT_LOAD_RETURN_ADDRESS(gtid));
2181 return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
2185kmp_int32 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part) {
2186 kmp_taskdata_t *taskdata = NULL;
2188 int thread_finished = FALSE;
2191 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
2193 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
2194 gtid, loc_ref, end_part));
2195 __kmp_assert_valid_gtid(gtid);
2197 if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
2198 thread = __kmp_threads[gtid];
2199 taskdata = thread->th.th_current_task;
2206 taskdata->td_taskwait_counter += 1;
2207 taskdata->td_taskwait_ident = loc_ref;
2208 taskdata->td_taskwait_thread = gtid + 1;
2211 void *itt_sync_obj = NULL;
2213 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2216 if (!taskdata->td_flags.team_serial) {
2217 kmp_task_team_t *task_team = thread->th.th_task_team;
2218 if (task_team != NULL) {
2219 if (KMP_TASKING_ENABLED(task_team)) {
2221 if (UNLIKELY(ompt_enabled.enabled))
2222 thread->th.ompt_thread_info.ompt_task_yielded = 1;
2224 __kmp_execute_tasks_32(
2225 thread, gtid, (kmp_flag_32<> *)NULL, FALSE,
2226 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2227 __kmp_task_stealing_constraint);
2229 if (UNLIKELY(ompt_enabled.enabled))
2230 thread->th.ompt_thread_info.ompt_task_yielded = 0;
2236 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2241 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2244 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
2245 "returning TASK_CURRENT_NOT_QUEUED\n",
2248 return TASK_CURRENT_NOT_QUEUED;
2269 unsigned reserved31 : 31;
2349template <
typename T>
2350void *__kmp_task_reduction_init(
int gtid,
int num, T *data) {
2351 __kmp_assert_valid_gtid(gtid);
2352 kmp_info_t *thread = __kmp_threads[gtid];
2353 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2354 kmp_uint32 nth = thread->th.th_team_nproc;
2358 KMP_ASSERT(tg != NULL);
2359 KMP_ASSERT(data != NULL);
2360 KMP_ASSERT(num > 0);
2362 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
2366 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
2370 for (
int i = 0; i < num; ++i) {
2371 size_t size = data[i].reduce_size - 1;
2373 size += CACHE_LINE - size % CACHE_LINE;
2374 KMP_ASSERT(data[i].reduce_comb != NULL);
2377 arr[i].
flags = data[i].flags;
2381 __kmp_assign_orig<T>(arr[i], data[i]);
2382 if (!arr[i].flags.lazy_priv) {
2385 arr[i].
reduce_pend = (
char *)(arr[i].reduce_priv) + nth * size;
2386 if (arr[i].reduce_init != NULL) {
2388 for (
size_t j = 0; j < nth; ++j) {
2389 __kmp_call_init<T>(arr[i], j * size);
2396 arr[i].
reduce_priv = __kmp_allocate(nth *
sizeof(
void *));
2399 tg->reduce_data = (
void *)arr;
2400 tg->reduce_num_data = num;
2439template <
typename T>
2440void __kmp_task_reduction_init_copy(kmp_info_t *thr,
int num, T *data,
2441 kmp_taskgroup_t *tg,
void *reduce_data) {
2443 KA_TRACE(20, (
"__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
2445 thr, tg, reduce_data));
2450 for (
int i = 0; i < num; ++i) {
2453 tg->reduce_data = (
void *)arr;
2454 tg->reduce_num_data = num;
2467 __kmp_assert_valid_gtid(gtid);
2468 kmp_info_t *thread = __kmp_threads[gtid];
2469 kmp_int32 nth = thread->th.th_team_nproc;
2473 kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
2475 tg = thread->th.th_current_task->td_taskgroup;
2476 KMP_ASSERT(tg != NULL);
2478 kmp_int32 num = tg->reduce_num_data;
2479 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
2481 KMP_ASSERT(data != NULL);
2482 while (tg != NULL) {
2483 for (
int i = 0; i < num; ++i) {
2484 if (!arr[i].flags.lazy_priv) {
2485 if (data == arr[i].reduce_shar ||
2486 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
2487 return (
char *)(arr[i].
reduce_priv) + tid * arr[i].reduce_size;
2490 void **p_priv = (
void **)(arr[i].reduce_priv);
2491 if (data == arr[i].reduce_shar)
2494 for (
int j = 0; j < nth; ++j)
2495 if (data == p_priv[j])
2499 if (p_priv[tid] == NULL) {
2501 p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
2502 if (arr[i].reduce_init != NULL) {
2503 if (arr[i].reduce_orig != NULL) {
2505 p_priv[tid], arr[i].reduce_orig);
2507 ((void (*)(
void *))arr[i].
reduce_init)(p_priv[tid]);
2516 num = tg->reduce_num_data;
2518 KMP_ASSERT2(0,
"Unknown task reduction item");
2524static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
2525 kmp_int32 nth = th->th.th_team_nproc;
2526 KMP_DEBUG_ASSERT(nth > 1);
2528 kmp_int32 num = tg->reduce_num_data;
2529 for (
int i = 0; i < num; ++i) {
2531 void (*f_fini)(
void *) = (
void (*)(
void *))(arr[i].
reduce_fini);
2532 void (*f_comb)(
void *,
void *) =
2534 if (!arr[i].flags.lazy_priv) {
2537 for (
int j = 0; j < nth; ++j) {
2538 void *priv_data = (
char *)pr_data + j * size;
2539 f_comb(sh_data, priv_data);
2544 void **pr_data = (
void **)(arr[i].reduce_priv);
2545 for (
int j = 0; j < nth; ++j) {
2546 if (pr_data[j] != NULL) {
2547 f_comb(sh_data, pr_data[j]);
2550 __kmp_free(pr_data[j]);
2554 __kmp_free(arr[i].reduce_priv);
2556 __kmp_thread_free(th, arr);
2557 tg->reduce_data = NULL;
2558 tg->reduce_num_data = 0;
2564static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg) {
2565 __kmp_thread_free(th, tg->reduce_data);
2566 tg->reduce_data = NULL;
2567 tg->reduce_num_data = 0;
2570template <
typename T>
2571void *__kmp_task_reduction_modifier_init(
ident_t *loc,
int gtid,
int is_ws,
2573 __kmp_assert_valid_gtid(gtid);
2574 kmp_info_t *thr = __kmp_threads[gtid];
2575 kmp_int32 nth = thr->th.th_team_nproc;
2576 __kmpc_taskgroup(loc, gtid);
2579 (
"__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n",
2580 gtid, thr->th.th_current_task->td_taskgroup));
2581 return (
void *)thr->th.th_current_task->td_taskgroup;
2583 kmp_team_t *team = thr->th.th_team;
2585 kmp_taskgroup_t *tg;
2586 reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
2587 if (reduce_data == NULL &&
2588 __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
2591 KMP_DEBUG_ASSERT(reduce_data == NULL);
2593 tg = (kmp_taskgroup_t *)__kmp_task_reduction_init<T>(gtid, num, data);
2597 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0);
2598 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0);
2599 KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data);
2602 (reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])) ==
2606 KMP_DEBUG_ASSERT(reduce_data > (
void *)1);
2607 tg = thr->th.th_current_task->td_taskgroup;
2608 __kmp_task_reduction_init_copy<T>(thr, num, data, tg, reduce_data);
2630 int num,
void *data) {
2631 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2651 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2664 __kmpc_end_taskgroup(loc, gtid);
2668void __kmpc_taskgroup(
ident_t *loc,
int gtid) {
2669 __kmp_assert_valid_gtid(gtid);
2670 kmp_info_t *thread = __kmp_threads[gtid];
2671 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2672 kmp_taskgroup_t *tg_new =
2673 (kmp_taskgroup_t *)__kmp_thread_malloc(thread,
sizeof(kmp_taskgroup_t));
2674 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
2675 KMP_ATOMIC_ST_RLX(&tg_new->count, 0);
2676 KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq);
2677 tg_new->parent = taskdata->td_taskgroup;
2678 tg_new->reduce_data = NULL;
2679 tg_new->reduce_num_data = 0;
2680 tg_new->gomp_data = NULL;
2681 taskdata->td_taskgroup = tg_new;
2683#if OMPT_SUPPORT && OMPT_OPTIONAL
2684 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2685 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2687 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2688 kmp_team_t *team = thread->th.th_team;
2689 ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
2691 ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
2693 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2694 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2695 &(my_task_data), codeptr);
2702void __kmpc_end_taskgroup(
ident_t *loc,
int gtid) {
2703 __kmp_assert_valid_gtid(gtid);
2704 kmp_info_t *thread = __kmp_threads[gtid];
2705 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2706 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
2707 int thread_finished = FALSE;
2709#if OMPT_SUPPORT && OMPT_OPTIONAL
2711 ompt_data_t my_task_data;
2712 ompt_data_t my_parallel_data;
2713 void *codeptr =
nullptr;
2714 if (UNLIKELY(ompt_enabled.enabled)) {
2715 team = thread->th.th_team;
2716 my_task_data = taskdata->ompt_task_info.task_data;
2718 my_parallel_data = team->t.ompt_team_info.parallel_data;
2719 codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2721 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2725 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
2726 KMP_DEBUG_ASSERT(taskgroup != NULL);
2727 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
2729 if (__kmp_tasking_mode != tskm_immediate_exec) {
2731 taskdata->td_taskwait_counter += 1;
2732 taskdata->td_taskwait_ident = loc;
2733 taskdata->td_taskwait_thread = gtid + 1;
2737 void *itt_sync_obj = NULL;
2739 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2743#if OMPT_SUPPORT && OMPT_OPTIONAL
2744 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2745 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2746 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2747 &(my_task_data), codeptr);
2751 if (!taskdata->td_flags.team_serial ||
2752 (thread->th.th_task_team != NULL &&
2753 (thread->th.th_task_team->tt.tt_found_proxy_tasks ||
2754 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered))) {
2755 kmp_flag_32<false, false> flag(
2756 RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)), 0U);
2757 while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
2758 flag.execute_tasks(thread, gtid, FALSE,
2759 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2760 __kmp_task_stealing_constraint);
2763 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2765#if OMPT_SUPPORT && OMPT_OPTIONAL
2766 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2767 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2768 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2769 &(my_task_data), codeptr);
2774 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2775 KMP_FSYNC_ACQUIRED(taskdata);
2778 KMP_DEBUG_ASSERT(taskgroup->count == 0);
2780 if (taskgroup->reduce_data != NULL &&
2781 !taskgroup->gomp_data) {
2784 kmp_team_t *t = thread->th.th_team;
2788 if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])) != NULL &&
2791 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0]);
2792 if (cnt == thread->th.th_team_nproc - 1) {
2795 __kmp_task_reduction_fini(thread, taskgroup);
2798 __kmp_thread_free(thread, reduce_data);
2799 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL);
2800 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0);
2804 __kmp_task_reduction_clean(thread, taskgroup);
2806 }
else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])) !=
2810 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1]);
2811 if (cnt == thread->th.th_team_nproc - 1) {
2813 __kmp_task_reduction_fini(thread, taskgroup);
2816 __kmp_thread_free(thread, reduce_data);
2817 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL);
2818 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0);
2822 __kmp_task_reduction_clean(thread, taskgroup);
2826 __kmp_task_reduction_fini(thread, taskgroup);
2830 taskdata->td_taskgroup = taskgroup->parent;
2831 __kmp_thread_free(thread, taskgroup);
2833 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
2836#if OMPT_SUPPORT && OMPT_OPTIONAL
2837 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2838 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2839 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2840 &(my_task_data), codeptr);
2845static kmp_task_t *__kmp_get_priority_task(kmp_int32 gtid,
2846 kmp_task_team_t *task_team,
2847 kmp_int32 is_constrained) {
2848 kmp_task_t *task = NULL;
2849 kmp_taskdata_t *taskdata;
2850 kmp_taskdata_t *current;
2851 kmp_thread_data_t *thread_data;
2852 int ntasks = task_team->tt.tt_num_task_pri;
2855 20, (
"__kmp_get_priority_task(exit #1): T#%d No tasks to get\n", gtid));
2860 if (__kmp_atomic_compare_store(&task_team->tt.tt_num_task_pri, ntasks,
2863 }
while (ntasks > 0);
2865 KA_TRACE(20, (
"__kmp_get_priority_task(exit #2): T#%d No tasks to get\n",
2871 kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
2873 KMP_ASSERT(list != NULL);
2874 thread_data = &list->td;
2875 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2876 deque_ntasks = thread_data->td.td_deque_ntasks;
2877 if (deque_ntasks == 0) {
2878 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2879 KA_TRACE(20, (
"__kmp_get_priority_task: T#%d No tasks to get from %p\n",
2880 __kmp_get_gtid(), thread_data));
2883 }
while (deque_ntasks == 0);
2884 KMP_DEBUG_ASSERT(deque_ntasks);
2885 int target = thread_data->td.td_deque_head;
2886 current = __kmp_threads[gtid]->th.th_current_task;
2887 taskdata = thread_data->td.td_deque[target];
2888 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2890 thread_data->td.td_deque_head =
2891 (target + 1) & TASK_DEQUE_MASK(thread_data->td);
2893 if (!task_team->tt.tt_untied_task_encountered) {
2895 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2896 KA_TRACE(20, (
"__kmp_get_priority_task(exit #3): T#%d could not get task "
2897 "from %p: task_team=%p ntasks=%d head=%u tail=%u\n",
2898 gtid, thread_data, task_team, deque_ntasks, target,
2899 thread_data->td.td_deque_tail));
2900 task_team->tt.tt_num_task_pri++;
2906 for (i = 1; i < deque_ntasks; ++i) {
2907 target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
2908 taskdata = thread_data->td.td_deque[target];
2909 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2915 if (taskdata == NULL) {
2917 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2919 10, (
"__kmp_get_priority_task(exit #4): T#%d could not get task from "
2920 "%p: task_team=%p ntasks=%d head=%u tail=%u\n",
2921 gtid, thread_data, task_team, deque_ntasks,
2922 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2923 task_team->tt.tt_num_task_pri++;
2927 for (i = i + 1; i < deque_ntasks; ++i) {
2929 target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
2930 thread_data->td.td_deque[prev] = thread_data->td.td_deque[target];
2934 thread_data->td.td_deque_tail ==
2935 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(thread_data->td)));
2936 thread_data->td.td_deque_tail = target;
2938 thread_data->td.td_deque_ntasks = deque_ntasks - 1;
2939 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2940 task = KMP_TASKDATA_TO_TASK(taskdata);
2945static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
2946 kmp_task_team_t *task_team,
2947 kmp_int32 is_constrained) {
2949 kmp_taskdata_t *taskdata;
2950 kmp_thread_data_t *thread_data;
2953 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2954 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
2957 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
2959 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
2960 gtid, thread_data->td.td_deque_ntasks,
2961 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2963 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2965 (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
2966 "ntasks=%d head=%u tail=%u\n",
2967 gtid, thread_data->td.td_deque_ntasks,
2968 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2972 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2974 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2975 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2977 (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
2978 "ntasks=%d head=%u tail=%u\n",
2979 gtid, thread_data->td.td_deque_ntasks,
2980 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2984 tail = (thread_data->td.td_deque_tail - 1) &
2985 TASK_DEQUE_MASK(thread_data->td);
2986 taskdata = thread_data->td.td_deque[tail];
2988 if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata,
2989 thread->th.th_current_task)) {
2991 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2993 (
"__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
2994 "ntasks=%d head=%u tail=%u\n",
2995 gtid, thread_data->td.td_deque_ntasks,
2996 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3000 thread_data->td.td_deque_tail = tail;
3001 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
3003 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3005 KA_TRACE(10, (
"__kmp_remove_my_task(exit #4): T#%d task %p removed: "
3006 "ntasks=%d head=%u tail=%u\n",
3007 gtid, taskdata, thread_data->td.td_deque_ntasks,
3008 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3010 task = KMP_TASKDATA_TO_TASK(taskdata);
3017static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
3018 kmp_task_team_t *task_team,
3019 std::atomic<kmp_int32> *unfinished_threads,
3020 int *thread_finished,
3021 kmp_int32 is_constrained) {
3023 kmp_taskdata_t *taskdata;
3024 kmp_taskdata_t *current;
3025 kmp_thread_data_t *victim_td, *threads_data;
3027 kmp_int32 victim_tid;
3029 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3031 threads_data = task_team->tt.tt_threads_data;
3032 KMP_DEBUG_ASSERT(threads_data != NULL);
3034 victim_tid = victim_thr->th.th_info.ds.ds_tid;
3035 victim_td = &threads_data[victim_tid];
3037 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: "
3038 "task_team=%p ntasks=%d head=%u tail=%u\n",
3039 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
3040 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
3041 victim_td->td.td_deque_tail));
3043 if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
3044 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
3045 "task_team=%p ntasks=%d head=%u tail=%u\n",
3046 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
3047 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
3048 victim_td->td.td_deque_tail));
3052 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
3054 int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
3057 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3058 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
3059 "task_team=%p ntasks=%d head=%u tail=%u\n",
3060 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
3061 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3065 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
3066 current = __kmp_threads[gtid]->th.th_current_task;
3067 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
3068 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
3070 victim_td->td.td_deque_head =
3071 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
3073 if (!task_team->tt.tt_untied_task_encountered) {
3075 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3076 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d could not steal from "
3077 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
3078 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
3079 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3084 target = victim_td->td.td_deque_head;
3086 for (i = 1; i < ntasks; ++i) {
3087 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
3088 taskdata = victim_td->td.td_deque[target];
3089 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
3095 if (taskdata == NULL) {
3097 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3098 KA_TRACE(10, (
"__kmp_steal_task(exit #4): T#%d could not steal from "
3099 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
3100 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
3101 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3105 for (i = i + 1; i < ntasks; ++i) {
3107 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
3108 victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
3112 victim_td->td.td_deque_tail ==
3113 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
3114 victim_td->td.td_deque_tail = target;
3116 if (*thread_finished) {
3123 KMP_ATOMIC_INC(unfinished_threads);
3126 (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
3127 gtid, count + 1, task_team));
3128 *thread_finished = FALSE;
3130 TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
3132 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3136 (
"__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
3137 "task_team=%p ntasks=%d head=%u tail=%u\n",
3138 gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,
3139 ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3141 task = KMP_TASKDATA_TO_TASK(taskdata);
3155static inline int __kmp_execute_tasks_template(
3156 kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
3157 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3158 kmp_int32 is_constrained) {
3159 kmp_task_team_t *task_team = thread->th.th_task_team;
3160 kmp_thread_data_t *threads_data;
3162 kmp_info_t *other_thread;
3163 kmp_taskdata_t *current_task = thread->th.th_current_task;
3164 std::atomic<kmp_int32> *unfinished_threads;
3165 kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
3166 tid = thread->th.th_info.ds.ds_tid;
3168 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3169 KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
3171 if (task_team == NULL || current_task == NULL)
3174 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
3175 "*thread_finished=%d\n",
3176 gtid, final_spin, *thread_finished));
3178 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
3179 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
3181 KMP_DEBUG_ASSERT(threads_data != NULL);
3183 nthreads = task_team->tt.tt_nproc;
3184 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
3185 KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks ||
3186 task_team->tt.tt_hidden_helper_task_encountered);
3187 KMP_DEBUG_ASSERT(*unfinished_threads >= 0);
3193 if (task_team->tt.tt_num_task_pri) {
3194 task = __kmp_get_priority_task(gtid, task_team, is_constrained);
3196 if (task == NULL && use_own_tasks) {
3197 task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
3199 if ((task == NULL) && (nthreads > 1)) {
3203 if (victim_tid == -2) {
3204 victim_tid = threads_data[tid].td.td_deque_last_stolen;
3207 other_thread = threads_data[victim_tid].td.td_thr;
3209 if (victim_tid != -1) {
3211 }
else if (!new_victim) {
3217 victim_tid = __kmp_get_random(thread) % (nthreads - 1);
3218 if (victim_tid >= tid) {
3222 other_thread = threads_data[victim_tid].td.td_thr;
3232 if ((__kmp_tasking_mode == tskm_task_teams) &&
3233 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
3234 (TCR_PTR(CCAST(
void *, other_thread->th.th_sleep_loc)) !=
3237 __kmp_null_resume_wrapper(other_thread);
3250 task = __kmp_steal_task(other_thread, gtid, task_team,
3251 unfinished_threads, thread_finished,
3255 if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
3256 threads_data[tid].td.td_deque_last_stolen = victim_tid;
3263 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
3272#if USE_ITT_BUILD && USE_ITT_NOTIFY
3273 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
3274 if (itt_sync_obj == NULL) {
3276 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
3278 __kmp_itt_task_starting(itt_sync_obj);
3281 __kmp_invoke_task(gtid, task, current_task);
3283 if (itt_sync_obj != NULL)
3284 __kmp_itt_task_finished(itt_sync_obj);
3291 if (flag == NULL || (!final_spin && flag->done_check())) {
3294 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3298 if (thread->th.th_task_team == NULL) {
3301 KMP_YIELD(__kmp_library == library_throughput);
3304 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
3305 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned "
3306 "other tasks, restart\n",
3317 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks) == 0) {
3321 if (!*thread_finished) {
3323 kmp_int32 count = -1 +
3325 KMP_ATOMIC_DEC(unfinished_threads);
3326 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d dec "
3327 "unfinished_threads to %d task_team=%p\n",
3328 gtid, count, task_team));
3329 *thread_finished = TRUE;
3337 if (flag != NULL && flag->done_check()) {
3340 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3348 if (thread->th.th_task_team == NULL) {
3350 (
"__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
3359 if (flag == NULL || (!final_spin && flag->done_check())) {
3361 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3368 if (nthreads == 1 &&
3369 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks))
3373 (
"__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
3379template <
bool C,
bool S>
3380int __kmp_execute_tasks_32(
3381 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32<C, S> *flag,
int final_spin,
3382 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3383 kmp_int32 is_constrained) {
3384 return __kmp_execute_tasks_template(
3385 thread, gtid, flag, final_spin,
3386 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3389template <
bool C,
bool S>
3390int __kmp_execute_tasks_64(
3391 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64<C, S> *flag,
int final_spin,
3392 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3393 kmp_int32 is_constrained) {
3394 return __kmp_execute_tasks_template(
3395 thread, gtid, flag, final_spin,
3396 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3399template <
bool C,
bool S>
3400int __kmp_atomic_execute_tasks_64(
3401 kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
3402 int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3403 kmp_int32 is_constrained) {
3404 return __kmp_execute_tasks_template(
3405 thread, gtid, flag, final_spin,
3406 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3409int __kmp_execute_tasks_oncore(
3410 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
3411 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3412 kmp_int32 is_constrained) {
3413 return __kmp_execute_tasks_template(
3414 thread, gtid, flag, final_spin,
3415 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3419__kmp_execute_tasks_32<false, false>(kmp_info_t *, kmp_int32,
3420 kmp_flag_32<false, false> *,
int,
3421 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3423template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
3424 kmp_flag_64<false, true> *,
3426 int *USE_ITT_BUILD_ARG(
void *),
3429template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
3430 kmp_flag_64<true, false> *,
3432 int *USE_ITT_BUILD_ARG(
void *),
3435template int __kmp_atomic_execute_tasks_64<false, true>(
3436 kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *,
int,
3437 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3439template int __kmp_atomic_execute_tasks_64<true, false>(
3440 kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *,
int,
3441 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3446static void __kmp_enable_tasking(kmp_task_team_t *task_team,
3447 kmp_info_t *this_thr) {
3448 kmp_thread_data_t *threads_data;
3449 int nthreads, i, is_init_thread;
3451 KA_TRACE(10, (
"__kmp_enable_tasking(enter): T#%d\n",
3452 __kmp_gtid_from_thread(this_thr)));
3454 KMP_DEBUG_ASSERT(task_team != NULL);
3455 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
3457 nthreads = task_team->tt.tt_nproc;
3458 KMP_DEBUG_ASSERT(nthreads > 0);
3459 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
3462 is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
3464 if (!is_init_thread) {
3468 (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
3469 __kmp_gtid_from_thread(this_thr)));
3472 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
3473 KMP_DEBUG_ASSERT(threads_data != NULL);
3475 if (__kmp_tasking_mode == tskm_task_teams &&
3476 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
3480 for (i = 0; i < nthreads; i++) {
3482 kmp_info_t *thread = threads_data[i].td.td_thr;
3484 if (i == this_thr->th.th_info.ds.ds_tid) {
3493 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3495 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
3496 __kmp_gtid_from_thread(this_thr),
3497 __kmp_gtid_from_thread(thread)));
3498 __kmp_null_resume_wrapper(thread);
3500 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
3501 __kmp_gtid_from_thread(this_thr),
3502 __kmp_gtid_from_thread(thread)));
3507 KA_TRACE(10, (
"__kmp_enable_tasking(exit): T#%d\n",
3508 __kmp_gtid_from_thread(this_thr)));
3541static kmp_task_team_t *__kmp_free_task_teams =
3544kmp_bootstrap_lock_t __kmp_task_team_lock =
3545 KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
3552static void __kmp_alloc_task_deque(kmp_info_t *thread,
3553 kmp_thread_data_t *thread_data) {
3554 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
3555 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
3558 thread_data->td.td_deque_last_stolen = -1;
3560 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
3561 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
3562 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
3566 (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
3567 __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
3571 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
3572 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
3573 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
3579static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
3580 if (thread_data->td.td_deque != NULL) {
3581 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3582 TCW_4(thread_data->td.td_deque_ntasks, 0);
3583 __kmp_free(thread_data->td.td_deque);
3584 thread_data->td.td_deque = NULL;
3585 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3588#ifdef BUILD_TIED_TASK_STACK
3590 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
3591 __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
3603static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
3604 kmp_task_team_t *task_team) {
3605 kmp_thread_data_t **threads_data_p;
3606 kmp_int32 nthreads, maxthreads;
3607 int is_init_thread = FALSE;
3609 if (TCR_4(task_team->tt.tt_found_tasks)) {
3614 threads_data_p = &task_team->tt.tt_threads_data;
3615 nthreads = task_team->tt.tt_nproc;
3616 maxthreads = task_team->tt.tt_max_threads;
3621 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3623 if (!TCR_4(task_team->tt.tt_found_tasks)) {
3625 kmp_team_t *team = thread->th.th_team;
3628 is_init_thread = TRUE;
3629 if (maxthreads < nthreads) {
3631 if (*threads_data_p != NULL) {
3632 kmp_thread_data_t *old_data = *threads_data_p;
3633 kmp_thread_data_t *new_data = NULL;
3637 (
"__kmp_realloc_task_threads_data: T#%d reallocating "
3638 "threads data for task_team %p, new_size = %d, old_size = %d\n",
3639 __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
3644 new_data = (kmp_thread_data_t *)__kmp_allocate(
3645 nthreads *
sizeof(kmp_thread_data_t));
3647 KMP_MEMCPY_S((
void *)new_data, nthreads *
sizeof(kmp_thread_data_t),
3648 (
void *)old_data, maxthreads *
sizeof(kmp_thread_data_t));
3650#ifdef BUILD_TIED_TASK_STACK
3652 for (i = maxthreads; i < nthreads; i++) {
3653 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3654 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3658 (*threads_data_p) = new_data;
3659 __kmp_free(old_data);
3661 KE_TRACE(10, (
"__kmp_realloc_task_threads_data: T#%d allocating "
3662 "threads data for task_team %p, size = %d\n",
3663 __kmp_gtid_from_thread(thread), task_team, nthreads));
3667 *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
3668 nthreads *
sizeof(kmp_thread_data_t));
3669#ifdef BUILD_TIED_TASK_STACK
3671 for (i = 0; i < nthreads; i++) {
3672 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3673 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3677 task_team->tt.tt_max_threads = nthreads;
3680 KMP_DEBUG_ASSERT(*threads_data_p != NULL);
3684 for (i = 0; i < nthreads; i++) {
3685 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3686 thread_data->td.td_thr = team->t.t_threads[i];
3688 if (thread_data->td.td_deque_last_stolen >= nthreads) {
3692 thread_data->td.td_deque_last_stolen = -1;
3697 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
3700 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3701 return is_init_thread;
3707static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
3708 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3709 if (task_team->tt.tt_threads_data != NULL) {
3711 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
3712 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
3714 __kmp_free(task_team->tt.tt_threads_data);
3715 task_team->tt.tt_threads_data = NULL;
3717 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3723static void __kmp_free_task_pri_list(kmp_task_team_t *task_team) {
3724 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3725 if (task_team->tt.tt_task_pri_list != NULL) {
3726 kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
3727 while (list != NULL) {
3728 kmp_task_pri_t *next = list->next;
3729 __kmp_free_task_deque(&list->td);
3733 task_team->tt.tt_task_pri_list = NULL;
3735 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3742static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
3744 kmp_task_team_t *task_team = NULL;
3747 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
3748 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
3750 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3752 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3753 if (__kmp_free_task_teams != NULL) {
3754 task_team = __kmp_free_task_teams;
3755 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
3756 task_team->tt.tt_next = NULL;
3758 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3761 if (task_team == NULL) {
3762 KE_TRACE(10, (
"__kmp_allocate_task_team: T#%d allocating "
3763 "task team for team %p\n",
3764 __kmp_gtid_from_thread(thread), team));
3767 task_team = (kmp_task_team_t *)__kmp_allocate(
sizeof(kmp_task_team_t));
3768 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
3769 __kmp_init_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3770#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
3773 __itt_suppress_mark_range(
3774 __itt_suppress_range, __itt_suppress_threading_errors,
3775 &task_team->tt.tt_found_tasks,
sizeof(task_team->tt.tt_found_tasks));
3776 __itt_suppress_mark_range(__itt_suppress_range,
3777 __itt_suppress_threading_errors,
3778 CCAST(kmp_uint32 *, &task_team->tt.tt_active),
3779 sizeof(task_team->tt.tt_active));
3787 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3788 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3789 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3790 task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
3792 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads);
3793 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3794 TCW_4(task_team->tt.tt_active, TRUE);
3796 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p "
3797 "unfinished_threads init'd to %d\n",
3798 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
3799 KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)));
3806void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
3807 KA_TRACE(20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
3808 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
3811 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3813 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
3814 task_team->tt.tt_next = __kmp_free_task_teams;
3815 TCW_PTR(__kmp_free_task_teams, task_team);
3817 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3825void __kmp_reap_task_teams(
void) {
3826 kmp_task_team_t *task_team;
3828 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3830 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3831 while ((task_team = __kmp_free_task_teams) != NULL) {
3832 __kmp_free_task_teams = task_team->tt.tt_next;
3833 task_team->tt.tt_next = NULL;
3836 if (task_team->tt.tt_threads_data != NULL) {
3837 __kmp_free_task_threads_data(task_team);
3839 if (task_team->tt.tt_task_pri_list != NULL) {
3840 __kmp_free_task_pri_list(task_team);
3842 __kmp_free(task_team);
3844 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3851void __kmp_wait_to_unref_task_teams(
void) {
3857 KMP_INIT_YIELD(spins);
3858 KMP_INIT_BACKOFF(time);
3866 for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
3867 thread = thread->th.th_next_pool) {
3871 if (TCR_PTR(thread->th.th_task_team) == NULL) {
3872 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
3873 __kmp_gtid_from_thread(thread)));
3878 if (!__kmp_is_thread_alive(thread, &exit_val)) {
3879 thread->th.th_task_team = NULL;
3886 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to "
3887 "unreference task_team\n",
3888 __kmp_gtid_from_thread(thread)));
3890 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
3893 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3897 (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
3898 __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
3899 __kmp_null_resume_wrapper(thread);
3908 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
3914void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
int always) {
3915 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3921 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
3922 (always || team->t.t_nproc > 1)) {
3923 team->t.t_task_team[this_thr->th.th_task_state] =
3924 __kmp_allocate_task_team(this_thr, team);
3925 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d created new task_team %p"
3926 " for team %d at parity=%d\n",
3927 __kmp_gtid_from_thread(this_thr),
3928 team->t.t_task_team[this_thr->th.th_task_state], team->t.t_id,
3929 this_thr->th.th_task_state));
3939 if (team->t.t_nproc > 1) {
3940 int other_team = 1 - this_thr->th.th_task_state;
3941 KMP_DEBUG_ASSERT(other_team >= 0 && other_team < 2);
3942 if (team->t.t_task_team[other_team] == NULL) {
3943 team->t.t_task_team[other_team] =
3944 __kmp_allocate_task_team(this_thr, team);
3945 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d created second new "
3946 "task_team %p for team %d at parity=%d\n",
3947 __kmp_gtid_from_thread(this_thr),
3948 team->t.t_task_team[other_team], team->t.t_id, other_team));
3951 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
3952 if (!task_team->tt.tt_active ||
3953 team->t.t_nproc != task_team->tt.tt_nproc) {
3954 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
3955 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3956 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3957 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3958 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads,
3960 TCW_4(task_team->tt.tt_active, TRUE);
3964 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d reset next task_team "
3965 "%p for team %d at parity=%d\n",
3966 __kmp_gtid_from_thread(this_thr),
3967 team->t.t_task_team[other_team], team->t.t_id, other_team));
3975 if (this_thr == __kmp_hidden_helper_main_thread) {
3976 for (
int i = 0; i < 2; ++i) {
3977 kmp_task_team_t *task_team = team->t.t_task_team[i];
3978 if (KMP_TASKING_ENABLED(task_team)) {
3981 __kmp_enable_tasking(task_team, this_thr);
3982 for (
int j = 0; j < task_team->tt.tt_nproc; ++j) {
3983 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[j];
3984 if (thread_data->td.td_deque == NULL) {
3985 __kmp_alloc_task_deque(__kmp_hidden_helper_threads[j], thread_data);
3995void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
3996 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
4000 this_thr->th.th_task_state = (kmp_uint8)(1 - this_thr->th.th_task_state);
4004 TCW_PTR(this_thr->th.th_task_team,
4005 team->t.t_task_team[this_thr->th.th_task_state]);
4007 (
"__kmp_task_team_sync: Thread T#%d task team switched to task_team "
4008 "%p from Team #%d (parity=%d)\n",
4009 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
4010 team->t.t_id, this_thr->th.th_task_state));
4020void __kmp_task_team_wait(
4021 kmp_info_t *this_thr,
4022 kmp_team_t *team USE_ITT_BUILD_ARG(
void *itt_sync_obj),
int wait) {
4023 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
4025 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
4026 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
4028 if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
4030 KA_TRACE(20, (
"__kmp_task_team_wait: Primary T#%d waiting for all tasks "
4031 "(for unfinished_threads to reach 0) on task_team = %p\n",
4032 __kmp_gtid_from_thread(this_thr), task_team));
4036 kmp_flag_32<false, false> flag(
4037 RCAST(std::atomic<kmp_uint32> *,
4038 &task_team->tt.tt_unfinished_threads),
4040 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
4046 (
"__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
4047 "setting active to false, setting local and team's pointer to NULL\n",
4048 __kmp_gtid_from_thread(this_thr), task_team));
4049 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
4050 task_team->tt.tt_found_proxy_tasks == TRUE ||
4051 task_team->tt.tt_hidden_helper_task_encountered == TRUE);
4052 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
4053 TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
4054 KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
4055 TCW_SYNC_4(task_team->tt.tt_active, FALSE);
4058 TCW_PTR(this_thr->th.th_task_team, NULL);
4067void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
int gtid) {
4068 std::atomic<kmp_uint32> *spin = RCAST(
4069 std::atomic<kmp_uint32> *,
4070 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
4072 KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
4075 KMP_FSYNC_SPIN_INIT(spin, NULL);
4077 kmp_flag_32<false, false> spin_flag(spin, 0U);
4078 while (!spin_flag.execute_tasks(thread, gtid, TRUE,
4079 &flag USE_ITT_BUILD_ARG(NULL), 0)) {
4082 KMP_FSYNC_SPIN_PREPARE(RCAST(
void *, spin));
4085 if (TCR_4(__kmp_global.g.g_done)) {
4086 if (__kmp_global.g.g_abort)
4087 __kmp_abort_thread();
4093 KMP_FSYNC_SPIN_ACQUIRED(RCAST(
void *, spin));
4102static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
4104 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4105 kmp_task_team_t *task_team = taskdata->td_task_team;
4107 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n",
4111 KMP_DEBUG_ASSERT(task_team != NULL);
4113 bool result =
false;
4114 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
4116 if (thread_data->td.td_deque == NULL) {
4120 (
"__kmp_give_task: thread %d has no queue while giving task %p.\n",
4125 if (TCR_4(thread_data->td.td_deque_ntasks) >=
4126 TASK_DEQUE_SIZE(thread_data->td)) {
4129 (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n",
4134 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
4137 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
4138 if (TCR_4(thread_data->td.td_deque_ntasks) >=
4139 TASK_DEQUE_SIZE(thread_data->td)) {
4141 __kmp_realloc_task_deque(thread, thread_data);
4146 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
4148 if (TCR_4(thread_data->td.td_deque_ntasks) >=
4149 TASK_DEQUE_SIZE(thread_data->td)) {
4150 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to "
4156 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
4157 goto release_and_exit;
4159 __kmp_realloc_task_deque(thread, thread_data);
4165 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
4167 thread_data->td.td_deque_tail =
4168 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
4169 TCW_4(thread_data->td.td_deque_ntasks,
4170 TCR_4(thread_data->td.td_deque_ntasks) + 1);
4173 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n",
4177 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
4182#define PROXY_TASK_FLAG 0x40000000
4199static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
4200 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
4201 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4202 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
4203 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
4205 taskdata->td_flags.complete = 1;
4207 if (taskdata->td_taskgroup)
4208 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
4212 KMP_ATOMIC_OR(&taskdata->td_incomplete_child_tasks, PROXY_TASK_FLAG);
4215static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
4217 kmp_int32 children = 0;
4221 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
4222 KMP_DEBUG_ASSERT(children >= 0);
4225 KMP_ATOMIC_AND(&taskdata->td_incomplete_child_tasks, ~PROXY_TASK_FLAG);
4228static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
4229 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4230 kmp_info_t *thread = __kmp_threads[gtid];
4232 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4233 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
4238 while ((KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) &
4239 PROXY_TASK_FLAG) > 0)
4242 __kmp_release_deps(gtid, taskdata);
4243 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
4255 KMP_DEBUG_ASSERT(ptask != NULL);
4256 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4258 10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
4260 __kmp_assert_valid_gtid(gtid);
4261 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4263 __kmp_first_top_half_finish_proxy(taskdata);
4264 __kmp_second_top_half_finish_proxy(taskdata);
4265 __kmp_bottom_half_finish_proxy(gtid, ptask);
4268 (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
4272void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
4273 KMP_DEBUG_ASSERT(ptask != NULL);
4274 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4278 kmp_team_t *team = taskdata->td_team;
4279 kmp_int32 nthreads = team->t.t_nproc;
4284 kmp_int32 start_k = start % nthreads;
4286 kmp_int32 k = start_k;
4290 thread = team->t.t_threads[k];
4291 k = (k + 1) % nthreads;
4297 }
while (!__kmp_give_task(thread, k, ptask, pass));
4299 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && __kmp_wpolicy_passive) {
4301 for (
int i = 0; i < nthreads; ++i) {
4302 thread = team->t.t_threads[i];
4303 if (thread->th.th_sleep_loc != NULL) {
4304 __kmp_null_resume_wrapper(thread);
4319 KMP_DEBUG_ASSERT(ptask != NULL);
4320 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4324 (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
4327 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4329 __kmp_first_top_half_finish_proxy(taskdata);
4331 __kmpc_give_task(ptask);
4333 __kmp_second_top_half_finish_proxy(taskdata);
4337 (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
4341kmp_event_t *__kmpc_task_allow_completion_event(
ident_t *loc_ref,
int gtid,
4343 kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
4344 if (td->td_allow_completion_event.type == KMP_EVENT_UNINITIALIZED) {
4345 td->td_allow_completion_event.type = KMP_EVENT_ALLOW_COMPLETION;
4346 td->td_allow_completion_event.ed.task = task;
4347 __kmp_init_tas_lock(&td->td_allow_completion_event.lock);
4349 return &td->td_allow_completion_event;
4352void __kmp_fulfill_event(kmp_event_t *event) {
4353 if (event->type == KMP_EVENT_ALLOW_COMPLETION) {
4354 kmp_task_t *ptask = event->ed.task;
4355 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4356 bool detached =
false;
4357 int gtid = __kmp_get_gtid();
4362 __kmp_acquire_tas_lock(&event->lock, gtid);
4363 if (taskdata->td_flags.proxy == TASK_PROXY) {
4369 if (UNLIKELY(ompt_enabled.enabled))
4370 __ompt_task_finish(ptask, NULL, ompt_task_early_fulfill);
4373 event->type = KMP_EVENT_UNINITIALIZED;
4374 __kmp_release_tas_lock(&event->lock, gtid);
4380 if (UNLIKELY(ompt_enabled.enabled))
4381 __ompt_task_finish(ptask, NULL, ompt_task_late_fulfill);
4385 kmp_team_t *team = taskdata->td_team;
4386 kmp_info_t *thread = __kmp_get_thread();
4387 if (thread->th.th_team == team) {
4405kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
4407 kmp_taskdata_t *taskdata;
4408 kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
4409 kmp_taskdata_t *parent_task = taskdata_src->td_parent;
4410 size_t shareds_offset;
4413 KA_TRACE(10, (
"__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
4415 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
4417 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
4418 task_size = taskdata_src->td_size_alloc;
4421 KA_TRACE(30, (
"__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
4424 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
4426 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
4428 KMP_MEMCPY(taskdata, taskdata_src, task_size);
4430 task = KMP_TASKDATA_TO_TASK(taskdata);
4433 taskdata->td_task_id = KMP_GEN_TASK_ID();
4434 if (task->shareds != NULL) {
4435 shareds_offset = (
char *)task_src->shareds - (
char *)taskdata_src;
4436 task->shareds = &((
char *)taskdata)[shareds_offset];
4437 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
4440 taskdata->td_alloc_thread = thread;
4441 taskdata->td_parent = parent_task;
4443 taskdata->td_taskgroup = parent_task->td_taskgroup;
4446 if (taskdata->td_flags.tiedness == TASK_TIED)
4447 taskdata->td_last_tied = taskdata;
4451 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
4452 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
4453 if (parent_task->td_taskgroup)
4454 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
4457 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
4458 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
4462 (
"__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
4463 thread, taskdata, taskdata->td_parent));
4465 if (UNLIKELY(ompt_enabled.enabled))
4466 __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
4475typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
4477KMP_BUILD_ASSERT(
sizeof(
long) == 4 ||
sizeof(
long) == 8);
4482class kmp_taskloop_bounds_t {
4484 const kmp_taskdata_t *taskdata;
4485 size_t lower_offset;
4486 size_t upper_offset;
4489 kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
4490 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)),
4491 lower_offset((char *)lb - (char *)task),
4492 upper_offset((char *)ub - (char *)task) {
4493 KMP_DEBUG_ASSERT((
char *)lb > (
char *)_task);
4494 KMP_DEBUG_ASSERT((
char *)ub > (
char *)_task);
4496 kmp_taskloop_bounds_t(kmp_task_t *_task,
const kmp_taskloop_bounds_t &bounds)
4497 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)),
4498 lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
4499 size_t get_lower_offset()
const {
return lower_offset; }
4500 size_t get_upper_offset()
const {
return upper_offset; }
4501 kmp_uint64 get_lb()
const {
4503#if defined(KMP_GOMP_COMPAT)
4505 if (!taskdata->td_flags.native) {
4506 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4509 if (taskdata->td_size_loop_bounds == 4) {
4510 kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
4511 retval = (kmp_int64)*lb;
4513 kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
4514 retval = (kmp_int64)*lb;
4519 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4523 kmp_uint64 get_ub()
const {
4525#if defined(KMP_GOMP_COMPAT)
4527 if (!taskdata->td_flags.native) {
4528 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4531 if (taskdata->td_size_loop_bounds == 4) {
4532 kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
4533 retval = (kmp_int64)*ub;
4535 kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
4536 retval = (kmp_int64)*ub;
4540 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4544 void set_lb(kmp_uint64 lb) {
4545#if defined(KMP_GOMP_COMPAT)
4547 if (!taskdata->td_flags.native) {
4548 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4551 if (taskdata->td_size_loop_bounds == 4) {
4552 kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
4553 *lower = (kmp_uint32)lb;
4555 kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
4556 *lower = (kmp_uint64)lb;
4560 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4563 void set_ub(kmp_uint64 ub) {
4564#if defined(KMP_GOMP_COMPAT)
4566 if (!taskdata->td_flags.native) {
4567 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4570 if (taskdata->td_size_loop_bounds == 4) {
4571 kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
4572 *upper = (kmp_uint32)ub;
4574 kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
4575 *upper = (kmp_uint64)ub;
4579 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4600void __kmp_taskloop_linear(
ident_t *loc,
int gtid, kmp_task_t *task,
4601 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4602 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4603 kmp_uint64 grainsize, kmp_uint64 extras,
4604 kmp_int64 last_chunk, kmp_uint64 tc,
4610 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
4611 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4613 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4614 kmp_uint64 lower = task_bounds.get_lb();
4615 kmp_uint64 upper = task_bounds.get_ub();
4617 kmp_info_t *thread = __kmp_threads[gtid];
4618 kmp_taskdata_t *current_task = thread->th.th_current_task;
4619 kmp_task_t *next_task;
4620 kmp_int32 lastpriv = 0;
4622 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
4623 (last_chunk < 0 ? last_chunk : extras));
4624 KMP_DEBUG_ASSERT(num_tasks > extras);
4625 KMP_DEBUG_ASSERT(num_tasks > 0);
4626 KA_TRACE(20, (
"__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
4627 "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",
4628 gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,
4629 ub_glob, st, task_dup));
4632 for (i = 0; i < num_tasks; ++i) {
4633 kmp_uint64 chunk_minus_1;
4635 chunk_minus_1 = grainsize - 1;
4637 chunk_minus_1 = grainsize;
4640 upper = lower + st * chunk_minus_1;
4644 if (i == num_tasks - 1) {
4647 KMP_DEBUG_ASSERT(upper == *ub);
4648 if (upper == ub_glob)
4650 }
else if (st > 0) {
4651 KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
4652 if ((kmp_uint64)st > ub_glob - upper)
4655 KMP_DEBUG_ASSERT(upper + st < *ub);
4656 if (upper - ub_glob < (kmp_uint64)(-st))
4660 next_task = __kmp_task_dup_alloc(thread, task);
4661 kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
4662 kmp_taskloop_bounds_t next_task_bounds =
4663 kmp_taskloop_bounds_t(next_task, task_bounds);
4666 next_task_bounds.set_lb(lower);
4667 if (next_taskdata->td_flags.native) {
4668 next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
4670 next_task_bounds.set_ub(upper);
4672 if (ptask_dup != NULL)
4674 ptask_dup(next_task, task, lastpriv);
4676 (
"__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
4677 "upper %lld stride %lld, (offsets %p %p)\n",
4678 gtid, i, next_task, lower, upper, st,
4679 next_task_bounds.get_lower_offset(),
4680 next_task_bounds.get_upper_offset()));
4682 __kmp_omp_taskloop_task(NULL, gtid, next_task,
4685 if (ompt_enabled.ompt_callback_dispatch) {
4686 OMPT_GET_DISPATCH_CHUNK(next_taskdata->ompt_task_info.dispatch_chunk,
4691 __kmp_omp_task(gtid, next_task,
true);
4696 __kmp_task_start(gtid, task, current_task);
4698 __kmp_task_finish<false>(gtid, task, current_task);
4703typedef struct __taskloop_params {
4710 kmp_uint64 num_tasks;
4711 kmp_uint64 grainsize;
4713 kmp_int64 last_chunk;
4715 kmp_uint64 num_t_min;
4719} __taskloop_params_t;
4721void __kmp_taskloop_recur(
ident_t *,
int, kmp_task_t *, kmp_uint64 *,
4722 kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
4723 kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64,
4731int __kmp_taskloop_task(
int gtid,
void *ptask) {
4732 __taskloop_params_t *p =
4733 (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
4734 kmp_task_t *task = p->task;
4735 kmp_uint64 *lb = p->lb;
4736 kmp_uint64 *ub = p->ub;
4737 void *task_dup = p->task_dup;
4739 kmp_int64 st = p->st;
4740 kmp_uint64 ub_glob = p->ub_glob;
4741 kmp_uint64 num_tasks = p->num_tasks;
4742 kmp_uint64 grainsize = p->grainsize;
4743 kmp_uint64 extras = p->extras;
4744 kmp_int64 last_chunk = p->last_chunk;
4745 kmp_uint64 tc = p->tc;
4746 kmp_uint64 num_t_min = p->num_t_min;
4748 void *codeptr_ra = p->codeptr_ra;
4751 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4752 KMP_DEBUG_ASSERT(task != NULL);
4754 (
"__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
4755 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
4756 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
4759 KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
4760 if (num_tasks > num_t_min)
4761 __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
4762 grainsize, extras, last_chunk, tc, num_t_min,
4768 __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
4769 grainsize, extras, last_chunk, tc,
4775 KA_TRACE(40, (
"__kmp_taskloop_task(exit): T#%d\n", gtid));
4797void __kmp_taskloop_recur(
ident_t *loc,
int gtid, kmp_task_t *task,
4798 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4799 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4800 kmp_uint64 grainsize, kmp_uint64 extras,
4801 kmp_int64 last_chunk, kmp_uint64 tc,
4802 kmp_uint64 num_t_min,
4807 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4808 KMP_DEBUG_ASSERT(task != NULL);
4809 KMP_DEBUG_ASSERT(num_tasks > num_t_min);
4811 (
"__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
4812 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
4813 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
4815 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4816 kmp_uint64 lower = *lb;
4817 kmp_info_t *thread = __kmp_threads[gtid];
4819 kmp_task_t *next_task;
4820 size_t lower_offset =
4821 (
char *)lb - (
char *)task;
4822 size_t upper_offset =
4823 (
char *)ub - (
char *)task;
4825 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
4826 (last_chunk < 0 ? last_chunk : extras));
4827 KMP_DEBUG_ASSERT(num_tasks > extras);
4828 KMP_DEBUG_ASSERT(num_tasks > 0);
4831 kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
4832 kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
4833 kmp_uint64 gr_size0 = grainsize;
4834 kmp_uint64 n_tsk0 = num_tasks >> 1;
4835 kmp_uint64 n_tsk1 = num_tasks - n_tsk0;
4836 if (last_chunk < 0) {
4838 last_chunk1 = last_chunk;
4839 tc0 = grainsize * n_tsk0;
4841 }
else if (n_tsk0 <= extras) {
4844 ext1 = extras - n_tsk0;
4845 tc0 = gr_size0 * n_tsk0;
4850 tc1 = grainsize * n_tsk1;
4853 ub0 = lower + st * (tc0 - 1);
4857 next_task = __kmp_task_dup_alloc(thread, task);
4859 *(kmp_uint64 *)((
char *)next_task + lower_offset) = lb1;
4860 if (ptask_dup != NULL)
4861 ptask_dup(next_task, task, 0);
4866 kmp_taskdata_t *current_task = thread->th.th_current_task;
4867 thread->th.th_current_task = taskdata->td_parent;
4868 kmp_task_t *new_task =
4869 __kmpc_omp_task_alloc(loc, gtid, 1, 3 *
sizeof(
void *),
4870 sizeof(__taskloop_params_t), &__kmp_taskloop_task);
4872 thread->th.th_current_task = current_task;
4873 __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
4874 p->task = next_task;
4875 p->lb = (kmp_uint64 *)((
char *)next_task + lower_offset);
4876 p->ub = (kmp_uint64 *)((
char *)next_task + upper_offset);
4877 p->task_dup = task_dup;
4879 p->ub_glob = ub_glob;
4880 p->num_tasks = n_tsk1;
4881 p->grainsize = grainsize;
4883 p->last_chunk = last_chunk1;
4885 p->num_t_min = num_t_min;
4887 p->codeptr_ra = codeptr_ra;
4892 __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
4894 __kmp_omp_task(gtid, new_task,
true);
4898 if (n_tsk0 > num_t_min)
4899 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
4900 ext0, last_chunk0, tc0, num_t_min,
4906 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
4907 gr_size0, ext0, last_chunk0, tc0,
4913 KA_TRACE(40, (
"__kmp_taskloop_recur(exit): T#%d\n", gtid));
4916static void __kmp_taskloop(
ident_t *loc,
int gtid, kmp_task_t *task,
int if_val,
4917 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4918 int nogroup,
int sched, kmp_uint64 grainsize,
4919 int modifier,
void *task_dup) {
4920 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4921 KMP_DEBUG_ASSERT(task != NULL);
4923#if OMPT_SUPPORT && OMPT_OPTIONAL
4924 OMPT_STORE_RETURN_ADDRESS(gtid);
4926 __kmpc_taskgroup(loc, gtid);
4931 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4934 kmp_uint64 lower = task_bounds.get_lb();
4935 kmp_uint64 upper = task_bounds.get_ub();
4936 kmp_uint64 ub_glob = upper;
4937 kmp_uint64 num_tasks = 0, extras = 0;
4938 kmp_int64 last_chunk =
4940 kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
4941 kmp_info_t *thread = __kmp_threads[gtid];
4942 kmp_taskdata_t *current_task = thread->th.th_current_task;
4944 KA_TRACE(20, (
"__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
4945 "grain %llu(%d, %d), dup %p\n",
4946 gtid, taskdata, lower, upper, st, grainsize, sched, modifier,
4951 tc = upper - lower + 1;
4952 }
else if (st < 0) {
4953 tc = (lower - upper) / (-st) + 1;
4955 tc = (upper - lower) / st + 1;
4958 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid));
4960 __kmp_task_start(gtid, task, current_task);
4962 __kmp_task_finish<false>(gtid, task, current_task);
4966#if OMPT_SUPPORT && OMPT_OPTIONAL
4967 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
4968 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
4969 if (ompt_enabled.ompt_callback_work) {
4970 ompt_callbacks.ompt_callback(ompt_callback_work)(
4971 ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
4972 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
4976 if (num_tasks_min == 0)
4979 KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE);
4985 grainsize = thread->th.th_team_nproc * 10;
4988 if (grainsize > tc) {
4993 num_tasks = grainsize;
4994 grainsize = tc / num_tasks;
4995 extras = tc % num_tasks;
4999 if (grainsize > tc) {
5005 num_tasks = (tc + grainsize - 1) / grainsize;
5006 last_chunk = tc - (num_tasks * grainsize);
5009 num_tasks = tc / grainsize;
5011 grainsize = tc / num_tasks;
5012 extras = tc % num_tasks;
5017 KMP_ASSERT2(0,
"unknown scheduling of taskloop");
5020 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
5021 (last_chunk < 0 ? last_chunk : extras));
5022 KMP_DEBUG_ASSERT(num_tasks > extras);
5023 KMP_DEBUG_ASSERT(num_tasks > 0);
5029 taskdata->td_flags.task_serial = 1;
5030 taskdata->td_flags.tiedness = TASK_TIED;
5032 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5033 grainsize, extras, last_chunk, tc,
5035 OMPT_GET_RETURN_ADDRESS(0),
5040 }
else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
5041 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
5042 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
5043 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
5045 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5046 grainsize, extras, last_chunk, tc, num_tasks_min,
5048 OMPT_GET_RETURN_ADDRESS(0),
5052 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
5053 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
5054 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
5056 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5057 grainsize, extras, last_chunk, tc,
5059 OMPT_GET_RETURN_ADDRESS(0),
5064#if OMPT_SUPPORT && OMPT_OPTIONAL
5065 if (ompt_enabled.ompt_callback_work) {
5066 ompt_callbacks.ompt_callback(ompt_callback_work)(
5067 ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
5068 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
5073#if OMPT_SUPPORT && OMPT_OPTIONAL
5074 OMPT_STORE_RETURN_ADDRESS(gtid);
5076 __kmpc_end_taskgroup(loc, gtid);
5078 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d\n", gtid));
5098 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup,
5099 int sched, kmp_uint64 grainsize,
void *task_dup) {
5100 __kmp_assert_valid_gtid(gtid);
5101 KA_TRACE(20, (
"__kmpc_taskloop(enter): T#%d\n", gtid));
5102 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
5104 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d\n", gtid));
5125 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
5126 int nogroup,
int sched, kmp_uint64 grainsize,
5127 int modifier,
void *task_dup) {
5128 __kmp_assert_valid_gtid(gtid);
5129 KA_TRACE(20, (
"__kmpc_taskloop_5(enter): T#%d\n", gtid));
5130 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
5131 modifier, task_dup);
5132 KA_TRACE(20, (
"__kmpc_taskloop_5(exit): T#%d\n", gtid));
struct kmp_taskred_data kmp_taskred_data_t
struct kmp_task_red_input kmp_task_red_input_t
struct kmp_taskred_flags kmp_taskred_flags_t
struct kmp_taskred_input kmp_taskred_input_t
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void * __kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data)
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup)
void * __kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
void * __kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask)
void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws)
kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, kmp_task_affinity_info_t *affin_list)
void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, int modifier, void *task_dup)
void * __kmpc_task_reduction_init(int gtid, int num, void *data)
void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask)
void * __kmpc_taskred_init(int gtid, int num, void *data)
kmp_taskred_flags_t flags