14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
29#include "kmp_dispatch_hier.h"
33#include "ompt-specific.h"
36#include "ompd-specific.h"
39#if OMP_PROFILING_SUPPORT
40#include "llvm/Support/TimeProfiler.h"
41static char *ProfileTraceFile =
nullptr;
45#define KMP_USE_PRCTL 0
61#if defined(KMP_GOMP_COMPAT)
62char const __kmp_version_alt_comp[] =
63 KMP_VERSION_PREFIX
"alternative compiler support: yes";
66char const __kmp_version_omp_api[] =
67 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
70char const __kmp_version_lock[] =
71 KMP_VERSION_PREFIX
"lock type: run time selectable";
74#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
79kmp_info_t __kmp_monitor;
84void __kmp_cleanup(
void);
86static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
88static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
89 kmp_internal_control_t *new_icvs,
91#if KMP_AFFINITY_SUPPORTED
92static void __kmp_partition_places(kmp_team_t *team,
93 int update_master_only = 0);
95static void __kmp_do_serial_initialize(
void);
96#if ENABLE_LIBOMPTARGET
97static void __kmp_target_init(
void);
99void __kmp_fork_barrier(
int gtid,
int tid);
100void __kmp_join_barrier(
int gtid);
101void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
102 kmp_internal_control_t *new_icvs,
ident_t *loc);
104#ifdef USE_LOAD_BALANCE
105static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
108static int __kmp_expand_threads(
int nNeed);
110static int __kmp_unregister_root_other_thread(
int gtid);
112static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
113kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
115void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
117void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
119static kmp_nested_nthreads_t *__kmp_override_nested_nth(kmp_info_t *thr,
121 kmp_nested_nthreads_t *new_nested_nth =
122 (kmp_nested_nthreads_t *)KMP_INTERNAL_MALLOC(
123 sizeof(kmp_nested_nthreads_t));
124 int new_size = level + thr->th.th_set_nested_nth_sz;
125 new_nested_nth->nth = (
int *)KMP_INTERNAL_MALLOC(new_size *
sizeof(
int));
126 for (
int i = 0; i < level + 1; ++i)
127 new_nested_nth->nth[i] = 0;
128 for (
int i = level + 1, j = 1; i < new_size; ++i, ++j)
129 new_nested_nth->nth[i] = thr->th.th_set_nested_nth[j];
130 new_nested_nth->size = new_nested_nth->used = new_size;
131 return new_nested_nth;
137int __kmp_get_global_thread_id() {
139 kmp_info_t **other_threads;
147 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
148 __kmp_nth, __kmp_all_nth));
155 if (!TCR_4(__kmp_init_gtid))
159 if (TCR_4(__kmp_gtid_mode) >= 3) {
160 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
164 if (TCR_4(__kmp_gtid_mode) >= 2) {
165 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
166 return __kmp_gtid_get_specific();
168 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
170 stack_addr = (
char *)&stack_data;
171 other_threads = __kmp_threads;
184 for (i = 0; i < __kmp_threads_capacity; i++) {
186 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
190 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
191 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
195 if (stack_addr <= stack_base) {
196 size_t stack_diff = stack_base - stack_addr;
198 if (stack_diff <= stack_size) {
205 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() < 0 ||
206 __kmp_gtid_get_specific() == i);
214 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
215 "thread, using TLS\n"));
216 i = __kmp_gtid_get_specific();
227 if (!TCR_SYNC_PTR(other_threads[i]))
232 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
233 KMP_FATAL(StackOverflow, i);
236 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
237 if (stack_addr > stack_base) {
238 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
239 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
240 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
243 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
244 stack_base - stack_addr);
248 if (__kmp_storage_map) {
249 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
250 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
251 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
252 other_threads[i]->th.th_info.ds.ds_stacksize,
253 "th_%d stack (refinement)", i);
258int __kmp_get_global_thread_id_reg() {
261 if (!__kmp_init_serial) {
265 if (TCR_4(__kmp_gtid_mode) >= 3) {
266 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
270 if (TCR_4(__kmp_gtid_mode) >= 2) {
271 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
272 gtid = __kmp_gtid_get_specific();
275 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
276 gtid = __kmp_get_global_thread_id();
280 if (gtid == KMP_GTID_DNE) {
282 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
283 "Registering a new gtid.\n"));
284 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
285 if (!__kmp_init_serial) {
286 __kmp_do_serial_initialize();
287 gtid = __kmp_gtid_get_specific();
289 gtid = __kmp_register_root(FALSE);
291 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
295 KMP_DEBUG_ASSERT(gtid >= 0);
301void __kmp_check_stack_overlap(kmp_info_t *th) {
303 char *stack_beg = NULL;
304 char *stack_end = NULL;
307 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
308 if (__kmp_storage_map) {
309 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
310 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
312 gtid = __kmp_gtid_from_thread(th);
314 if (gtid == KMP_GTID_MONITOR) {
315 __kmp_print_storage_map_gtid(
316 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
317 "th_%s stack (%s)",
"mon",
318 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
320 __kmp_print_storage_map_gtid(
321 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
322 "th_%d stack (%s)", gtid,
323 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
329 gtid = __kmp_gtid_from_thread(th);
330 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
332 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
333 if (stack_beg == NULL) {
334 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
335 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
338 for (f = 0; f < __kmp_threads_capacity; f++) {
339 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
341 if (f_th && f_th != th) {
342 char *other_stack_end =
343 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
344 char *other_stack_beg =
345 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
346 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
347 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
350 if (__kmp_storage_map)
351 __kmp_print_storage_map_gtid(
352 -1, other_stack_beg, other_stack_end,
353 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
354 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
356 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
362 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
367void __kmp_infinite_loop(
void) {
368 static int done = FALSE;
375#define MAX_MESSAGE 512
377void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
378 char const *format, ...) {
379 char buffer[MAX_MESSAGE];
382 va_start(ap, format);
383 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
384 p2, (
unsigned long)size, format);
385 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
386 __kmp_vprintf(kmp_err, buffer, ap);
387#if KMP_PRINT_DATA_PLACEMENT
390 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
391 if (__kmp_storage_map_verbose) {
392 node = __kmp_get_host_node(p1);
394 __kmp_storage_map_verbose = FALSE;
398 int localProc = __kmp_get_cpu_from_gtid(gtid);
400 const int page_size = KMP_GET_PAGE_SIZE();
402 p1 = (
void *)((
size_t)p1 & ~((
size_t)page_size - 1));
403 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
405 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
408 __kmp_printf_no_lock(
" GTID %d\n", gtid);
417 (
char *)p1 += page_size;
418 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
419 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
423 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
424 (
char *)p1 + (page_size - 1),
425 __kmp_get_host_node(p1));
427 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
428 (
char *)p2 + (page_size - 1),
429 __kmp_get_host_node(p2));
435 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
438 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
443void __kmp_warn(
char const *format, ...) {
444 char buffer[MAX_MESSAGE];
447 if (__kmp_generate_warnings == kmp_warnings_off) {
451 va_start(ap, format);
453 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
454 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
455 __kmp_vprintf(kmp_err, buffer, ap);
456 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
461void __kmp_abort_process() {
463 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
465 if (__kmp_debug_buf) {
466 __kmp_dump_debug_buffer();
472 __kmp_global.g.g_abort = SIGABRT;
486 __kmp_unregister_library();
490 __kmp_infinite_loop();
491 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
495void __kmp_abort_thread(
void) {
498 __kmp_infinite_loop();
504static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
505 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
508 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
509 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
511 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
512 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
514 __kmp_print_storage_map_gtid(
515 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
516 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
518 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
519 &thr->th.th_bar[bs_plain_barrier + 1],
520 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
523 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
524 &thr->th.th_bar[bs_forkjoin_barrier + 1],
525 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
528#if KMP_FAST_REDUCTION_BARRIER
529 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
530 &thr->th.th_bar[bs_reduction_barrier + 1],
531 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
539static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
540 int team_id,
int num_thr) {
541 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
542 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
545 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
546 &team->t.t_bar[bs_last_barrier],
547 sizeof(kmp_balign_team_t) * bs_last_barrier,
548 "%s_%d.t_bar", header, team_id);
550 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
551 &team->t.t_bar[bs_plain_barrier + 1],
552 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
555 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
556 &team->t.t_bar[bs_forkjoin_barrier + 1],
557 sizeof(kmp_balign_team_t),
558 "%s_%d.t_bar[forkjoin]", header, team_id);
560#if KMP_FAST_REDUCTION_BARRIER
561 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
562 &team->t.t_bar[bs_reduction_barrier + 1],
563 sizeof(kmp_balign_team_t),
564 "%s_%d.t_bar[reduction]", header, team_id);
567 __kmp_print_storage_map_gtid(
568 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
569 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
571 __kmp_print_storage_map_gtid(
572 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
573 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
575 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
576 &team->t.t_disp_buffer[num_disp_buff],
577 sizeof(dispatch_shared_info_t) * num_disp_buff,
578 "%s_%d.t_disp_buffer", header, team_id);
581static void __kmp_init_allocator() {
582 __kmp_init_memkind();
583 __kmp_init_target_mem();
585static void __kmp_fini_allocator() {
586 __kmp_fini_target_mem();
587 __kmp_fini_memkind();
592#if ENABLE_LIBOMPTARGET
593static void __kmp_init_omptarget() {
594 __kmp_init_target_task();
603BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
608 case DLL_PROCESS_ATTACH:
609 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
613 case DLL_PROCESS_DETACH:
614 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
627 if (lpReserved == NULL)
628 __kmp_internal_end_library(__kmp_gtid_get_specific());
632 case DLL_THREAD_ATTACH:
633 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
639 case DLL_THREAD_DETACH:
640 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
642 __kmp_internal_end_thread(__kmp_gtid_get_specific());
653void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
654 int gtid = *gtid_ref;
655#ifdef BUILD_PARALLEL_ORDERED
656 kmp_team_t *team = __kmp_team_from_gtid(gtid);
659 if (__kmp_env_consistency_check) {
660 if (__kmp_threads[gtid]->th.th_root->r.r_active)
661#if KMP_USE_DYNAMIC_LOCK
662 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
664 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
667#ifdef BUILD_PARALLEL_ORDERED
668 if (!team->t.t_serialized) {
670 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
678void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
679 int gtid = *gtid_ref;
680#ifdef BUILD_PARALLEL_ORDERED
681 int tid = __kmp_tid_from_gtid(gtid);
682 kmp_team_t *team = __kmp_team_from_gtid(gtid);
685 if (__kmp_env_consistency_check) {
686 if (__kmp_threads[gtid]->th.th_root->r.r_active)
687 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
689#ifdef BUILD_PARALLEL_ORDERED
690 if (!team->t.t_serialized) {
695 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
705int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
710 if (!TCR_4(__kmp_init_parallel))
711 __kmp_parallel_initialize();
712 __kmp_resume_if_soft_paused();
714 th = __kmp_threads[gtid];
715 team = th->th.th_team;
718 th->th.th_ident = id_ref;
720 if (team->t.t_serialized) {
723 kmp_int32 old_this = th->th.th_local.this_construct;
725 ++th->th.th_local.this_construct;
729 if (team->t.t_construct == old_this) {
730 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
731 th->th.th_local.this_construct);
734 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
735 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
736 team->t.t_active_level == 1) {
738 __kmp_itt_metadata_single(id_ref);
743 if (__kmp_env_consistency_check) {
744 if (status && push_ws) {
745 __kmp_push_workshare(gtid, ct_psingle, id_ref);
747 __kmp_check_workshare(gtid, ct_psingle, id_ref);
752 __kmp_itt_single_start(gtid);
758void __kmp_exit_single(
int gtid) {
760 __kmp_itt_single_end(gtid);
762 if (__kmp_env_consistency_check)
763 __kmp_pop_workshare(gtid, ct_psingle, NULL);
772static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
773 int master_tid,
int set_nthreads,
777 KMP_DEBUG_ASSERT(__kmp_init_serial);
778 KMP_DEBUG_ASSERT(root && parent_team);
779 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
783 new_nthreads = set_nthreads;
784 if (!get__dynamic_2(parent_team, master_tid)) {
787#ifdef USE_LOAD_BALANCE
788 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
789 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
790 if (new_nthreads == 1) {
791 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
792 "reservation to 1 thread\n",
796 if (new_nthreads < set_nthreads) {
797 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
798 "reservation to %d threads\n",
799 master_tid, new_nthreads));
803 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
804 new_nthreads = __kmp_avail_proc - __kmp_nth +
805 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
806 if (new_nthreads <= 1) {
807 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
808 "reservation to 1 thread\n",
812 if (new_nthreads < set_nthreads) {
813 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
814 "reservation to %d threads\n",
815 master_tid, new_nthreads));
817 new_nthreads = set_nthreads;
819 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
820 if (set_nthreads > 2) {
821 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
822 new_nthreads = (new_nthreads % set_nthreads) + 1;
823 if (new_nthreads == 1) {
824 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
825 "reservation to 1 thread\n",
829 if (new_nthreads < set_nthreads) {
830 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
831 "reservation to %d threads\n",
832 master_tid, new_nthreads));
840 if (__kmp_nth + new_nthreads -
841 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
843 int tl_nthreads = __kmp_max_nth - __kmp_nth +
844 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
845 if (tl_nthreads <= 0) {
850 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
851 __kmp_reserve_warn = 1;
852 __kmp_msg(kmp_ms_warning,
853 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
854 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
856 if (tl_nthreads == 1) {
857 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
858 "reduced reservation to 1 thread\n",
862 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
863 "reservation to %d threads\n",
864 master_tid, tl_nthreads));
865 new_nthreads = tl_nthreads;
869 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
870 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
871 if (cg_nthreads + new_nthreads -
872 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
874 int tl_nthreads = max_cg_threads - cg_nthreads +
875 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
876 if (tl_nthreads <= 0) {
881 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
882 __kmp_reserve_warn = 1;
883 __kmp_msg(kmp_ms_warning,
884 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
885 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
887 if (tl_nthreads == 1) {
888 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
889 "reduced reservation to 1 thread\n",
893 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
894 "reservation to %d threads\n",
895 master_tid, tl_nthreads));
896 new_nthreads = tl_nthreads;
902 capacity = __kmp_threads_capacity;
903 if (TCR_PTR(__kmp_threads[0]) == NULL) {
909 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
910 capacity -= __kmp_hidden_helper_threads_num;
912 if (__kmp_nth + new_nthreads -
913 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
916 int slotsRequired = __kmp_nth + new_nthreads -
917 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
919 int slotsAdded = __kmp_expand_threads(slotsRequired);
920 if (slotsAdded < slotsRequired) {
922 new_nthreads -= (slotsRequired - slotsAdded);
923 KMP_ASSERT(new_nthreads >= 1);
926 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
927 __kmp_reserve_warn = 1;
928 if (__kmp_tp_cached) {
929 __kmp_msg(kmp_ms_warning,
930 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
931 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
932 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
934 __kmp_msg(kmp_ms_warning,
935 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
936 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
943 if (new_nthreads == 1) {
945 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
946 "dead roots and rechecking; requested %d threads\n",
947 __kmp_get_gtid(), set_nthreads));
949 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
951 __kmp_get_gtid(), new_nthreads, set_nthreads));
955 if (this_thr->th.th_nt_strict && new_nthreads < set_nthreads) {
956 __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev,
957 this_thr->th.th_nt_msg);
965static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
966 kmp_info_t *master_th,
int master_gtid,
967 int fork_teams_workers) {
971 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
972 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
976 master_th->th.th_info.ds.ds_tid = 0;
977 master_th->th.th_team = team;
978 master_th->th.th_team_nproc = team->t.t_nproc;
979 master_th->th.th_team_master = master_th;
980 master_th->th.th_team_serialized = FALSE;
981 master_th->th.th_dispatch = &team->t.t_dispatch[0];
985 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
988 int level = team->t.t_active_level - 1;
989 if (master_th->th.th_teams_microtask) {
990 if (master_th->th.th_teams_size.nteams > 1) {
994 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
995 master_th->th.th_teams_level == team->t.t_level) {
1000 if (level < __kmp_hot_teams_max_level) {
1001 if (hot_teams[level].hot_team) {
1003 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1007 hot_teams[level].hot_team = team;
1008 hot_teams[level].hot_team_nth = team->t.t_nproc;
1014 if (!use_hot_team) {
1017 team->t.t_threads[0] = master_th;
1018 __kmp_initialize_info(master_th, team, 0, master_gtid);
1021 for (i = 1; i < team->t.t_nproc; i++) {
1024 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1025 team->t.t_threads[i] = thr;
1026 KMP_DEBUG_ASSERT(thr);
1027 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1029 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1030 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1031 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1032 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1033 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1034 team->t.t_bar[bs_plain_barrier].b_arrived));
1035 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1036 thr->th.th_teams_level = master_th->th.th_teams_level;
1037 thr->th.th_teams_size = master_th->th.th_teams_size;
1040 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1041 for (b = 0; b < bs_last_barrier; ++b) {
1042 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1043 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1045 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1051#if KMP_AFFINITY_SUPPORTED
1055 if (!fork_teams_workers) {
1056 __kmp_partition_places(team);
1060 if (team->t.t_nproc > 1 &&
1061 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1062 team->t.b->update_num_threads(team->t.t_nproc);
1063 __kmp_add_threads_to_team(team, team->t.t_nproc);
1068 if (__kmp_tasking_mode != tskm_immediate_exec) {
1070 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team->t.t_parent, master_th);
1073 (
"__kmp_fork_team_threads: Primary T#%d pushing task_team %p / team "
1074 "%p, new task_team %p / team %p\n",
1075 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
1076 team->t.t_parent, team->t.t_task_team[master_th->th.th_task_state],
1080 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1081 master_th->th.th_task_state);
1085 if (team->t.t_nproc > 1) {
1086 KMP_DEBUG_ASSERT(team->t.t_threads[1]->th.th_task_state == 0 ||
1087 team->t.t_threads[1]->th.th_task_state == 1);
1088 KMP_CHECK_UPDATE(master_th->th.th_task_state,
1089 team->t.t_threads[1]->th.th_task_state);
1091 master_th->th.th_task_state = 0;
1095 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1096 master_th->th.th_task_state);
1098 master_th->th.th_task_state = 0;
1102 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1103 for (i = 0; i < team->t.t_nproc; i++) {
1104 kmp_info_t *thr = team->t.t_threads[i];
1105 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1106 thr->th.th_prev_level != team->t.t_level) {
1107 team->t.t_display_affinity = 1;
1116#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1120inline static void propagateFPControl(kmp_team_t *team) {
1121 if (__kmp_inherit_fp_control) {
1122 kmp_int16 x87_fpu_control_word;
1126 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1127 __kmp_store_mxcsr(&mxcsr);
1128 mxcsr &= KMP_X86_MXCSR_MASK;
1139 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1140 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1143 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1147 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1153inline static void updateHWFPControl(kmp_team_t *team) {
1154 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1157 kmp_int16 x87_fpu_control_word;
1159 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1160 __kmp_store_mxcsr(&mxcsr);
1161 mxcsr &= KMP_X86_MXCSR_MASK;
1163 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1164 __kmp_clear_x87_fpu_status_word();
1165 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1168 if (team->t.t_mxcsr != mxcsr) {
1169 __kmp_load_mxcsr(&team->t.t_mxcsr);
1174#define propagateFPControl(x) ((void)0)
1175#define updateHWFPControl(x) ((void)0)
1178static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1183void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1184 kmp_info_t *this_thr;
1185 kmp_team_t *serial_team;
1187 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1194 if (!TCR_4(__kmp_init_parallel))
1195 __kmp_parallel_initialize();
1196 __kmp_resume_if_soft_paused();
1198 this_thr = __kmp_threads[global_tid];
1199 serial_team = this_thr->th.th_serial_team;
1202 KMP_DEBUG_ASSERT(serial_team);
1205 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1206 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1207 proc_bind = proc_bind_false;
1208 }
else if (proc_bind == proc_bind_default) {
1211 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1214 this_thr->th.th_set_proc_bind = proc_bind_default;
1219 if (this_thr->th.th_nt_strict && this_thr->th.th_set_nproc > 1)
1220 __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev,
1221 this_thr->th.th_nt_msg);
1223 this_thr->th.th_set_nproc = 0;
1226 ompt_data_t ompt_parallel_data = ompt_data_none;
1227 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1228 if (ompt_enabled.enabled &&
1229 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1231 ompt_task_info_t *parent_task_info;
1232 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1234 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1235 if (ompt_enabled.ompt_callback_parallel_begin) {
1238 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1239 &(parent_task_info->task_data), &(parent_task_info->frame),
1240 &ompt_parallel_data, team_size,
1241 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1246 if (this_thr->th.th_team != serial_team) {
1248 int level = this_thr->th.th_team->t.t_level;
1250 if (serial_team->t.t_serialized) {
1253 kmp_team_t *new_team;
1255 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1257 new_team = __kmp_allocate_team(
1258 this_thr->th.th_root, 1, 1,
1262 proc_bind, &this_thr->th.th_current_task->td_icvs, 0, NULL);
1263 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1264 KMP_ASSERT(new_team);
1267 new_team->t.t_threads[0] = this_thr;
1268 new_team->t.t_parent = this_thr->th.th_team;
1269 serial_team = new_team;
1270 this_thr->th.th_serial_team = serial_team;
1274 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1275 global_tid, serial_team));
1283 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1284 global_tid, serial_team));
1288 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1289 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1290 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1291 serial_team->t.t_ident = loc;
1292 serial_team->t.t_serialized = 1;
1293 serial_team->t.t_nproc = 1;
1294 serial_team->t.t_parent = this_thr->th.th_team;
1295 if (this_thr->th.th_team->t.t_nested_nth)
1296 serial_team->t.t_nested_nth = this_thr->th.th_team->t.t_nested_nth;
1298 serial_team->t.t_nested_nth = &__kmp_nested_nth;
1300 serial_team->t.t_primary_task_state = this_thr->th.th_task_state;
1301 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1302 this_thr->th.th_team = serial_team;
1303 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1305 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1306 this_thr->th.th_current_task));
1307 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1308 this_thr->th.th_current_task->td_flags.executing = 0;
1310 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1315 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1316 &this_thr->th.th_current_task->td_parent->td_icvs);
1320 kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
1321 if (this_thr->th.th_team->t.t_nested_nth)
1322 nested_nth = this_thr->th.th_team->t.t_nested_nth;
1323 if (nested_nth->used && (level + 1 < nested_nth->used)) {
1324 this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
1327 if (__kmp_nested_proc_bind.used &&
1328 (level + 1 < __kmp_nested_proc_bind.used)) {
1329 this_thr->th.th_current_task->td_icvs.proc_bind =
1330 __kmp_nested_proc_bind.bind_types[level + 1];
1334 serial_team->t.t_pkfn = (microtask_t)(~0);
1336 this_thr->th.th_info.ds.ds_tid = 0;
1339 this_thr->th.th_team_nproc = 1;
1340 this_thr->th.th_team_master = this_thr;
1341 this_thr->th.th_team_serialized = 1;
1342 this_thr->th.th_task_team = NULL;
1343 this_thr->th.th_task_state = 0;
1345 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1346 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1347 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1349 propagateFPControl(serial_team);
1352 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1353 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1354 serial_team->t.t_dispatch->th_disp_buffer =
1355 (dispatch_private_info_t *)__kmp_allocate(
1356 sizeof(dispatch_private_info_t));
1358 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1365 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1366 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1367 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1368 ++serial_team->t.t_serialized;
1369 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1372 int level = this_thr->th.th_team->t.t_level;
1376 kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
1377 if (serial_team->t.t_nested_nth)
1378 nested_nth = serial_team->t.t_nested_nth;
1379 if (nested_nth->used && (level + 1 < nested_nth->used)) {
1380 this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
1383 serial_team->t.t_level++;
1384 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1385 "of serial team %p to %d\n",
1386 global_tid, serial_team, serial_team->t.t_level));
1389 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1391 dispatch_private_info_t *disp_buffer =
1392 (dispatch_private_info_t *)__kmp_allocate(
1393 sizeof(dispatch_private_info_t));
1394 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1395 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1397 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1400 __kmp_push_task_team_node(this_thr, serial_team);
1404 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1408 if (__kmp_display_affinity) {
1409 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1410 this_thr->th.th_prev_num_threads != 1) {
1412 __kmp_aux_display_affinity(global_tid, NULL);
1413 this_thr->th.th_prev_level = serial_team->t.t_level;
1414 this_thr->th.th_prev_num_threads = 1;
1418 if (__kmp_env_consistency_check)
1419 __kmp_push_parallel(global_tid, NULL);
1421 serial_team->t.ompt_team_info.master_return_address = codeptr;
1422 if (ompt_enabled.enabled &&
1423 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1424 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1425 OMPT_GET_FRAME_ADDRESS(0);
1427 ompt_lw_taskteam_t lw_taskteam;
1428 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1429 &ompt_parallel_data, codeptr);
1431 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1435 if (ompt_enabled.ompt_callback_implicit_task) {
1436 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1437 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1438 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1439 ompt_task_implicit);
1440 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1441 __kmp_tid_from_gtid(global_tid);
1445 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1446 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1447 OMPT_GET_FRAME_ADDRESS(0);
1453static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1454 microtask_t microtask,
int level,
1455 int teams_level, kmp_va_list ap) {
1456 return (master_th->th.th_teams_microtask && ap &&
1457 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1462static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1463 int teams_level, kmp_va_list ap) {
1464 return ((ap == NULL && active_level == 0) ||
1465 (ap && teams_level > 0 && teams_level == level));
1472__kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1473 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1474 enum fork_context_e call_context, microtask_t microtask,
1475 launch_t invoker,
int master_set_numthreads,
int level,
1477 ompt_data_t ompt_parallel_data,
void *return_address,
1483 parent_team->t.t_ident = loc;
1484 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1485 parent_team->t.t_argc = argc;
1486 argv = (
void **)parent_team->t.t_argv;
1487 for (i = argc - 1; i >= 0; --i) {
1488 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1491 if (parent_team == master_th->th.th_serial_team) {
1494 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1496 if (call_context == fork_context_gnu) {
1499 parent_team->t.t_serialized--;
1504 parent_team->t.t_pkfn = microtask;
1509 void **exit_frame_p;
1510 ompt_data_t *implicit_task_data;
1511 ompt_lw_taskteam_t lw_taskteam;
1513 if (ompt_enabled.enabled) {
1514 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1515 &ompt_parallel_data, return_address);
1516 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1518 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1522 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1523 if (ompt_enabled.ompt_callback_implicit_task) {
1524 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1525 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1526 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1527 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1531 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1533 exit_frame_p = &dummy;
1539 parent_team->t.t_serialized--;
1542 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1543 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1544 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1553 if (ompt_enabled.enabled) {
1554 *exit_frame_p = NULL;
1555 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1556 if (ompt_enabled.ompt_callback_implicit_task) {
1557 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1558 ompt_scope_end, NULL, implicit_task_data, 1,
1559 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1561 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1562 __ompt_lw_taskteam_unlink(master_th);
1563 if (ompt_enabled.ompt_callback_parallel_end) {
1564 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1565 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1566 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1568 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1574 parent_team->t.t_pkfn = microtask;
1575 parent_team->t.t_invoke = invoker;
1576 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1577 parent_team->t.t_active_level++;
1578 parent_team->t.t_level++;
1579 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1586 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1589 if (ompt_enabled.enabled) {
1590 ompt_lw_taskteam_t lw_taskteam;
1591 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1593 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1598 if (master_set_numthreads) {
1599 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1601 kmp_info_t **other_threads = parent_team->t.t_threads;
1604 int old_proc = master_th->th.th_teams_size.nth;
1605 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1606 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1607 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1609 parent_team->t.t_nproc = master_set_numthreads;
1610 for (i = 0; i < master_set_numthreads; ++i) {
1611 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1615 master_th->th.th_set_nproc = 0;
1619 if (__kmp_debugging) {
1620 int nth = __kmp_omp_num_threads(loc);
1622 master_set_numthreads = nth;
1628 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1630 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1631 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1632 proc_bind = proc_bind_false;
1635 if (proc_bind == proc_bind_default) {
1636 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1642 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1643 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1644 master_th->th.th_current_task->td_icvs.proc_bind)) {
1645 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1648 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1650 if (proc_bind_icv != proc_bind_default &&
1651 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1652 kmp_info_t **other_threads = parent_team->t.t_threads;
1653 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1654 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1658 master_th->th.th_set_proc_bind = proc_bind_default;
1660#if USE_ITT_BUILD && USE_ITT_NOTIFY
1661 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1663 __kmp_forkjoin_frames_mode == 3 &&
1664 parent_team->t.t_active_level == 1
1665 && master_th->th.th_teams_size.nteams == 1) {
1666 kmp_uint64 tmp_time = __itt_get_timestamp();
1667 master_th->th.th_frame_time = tmp_time;
1668 parent_team->t.t_region_time = tmp_time;
1670 if (__itt_stack_caller_create_ptr) {
1671 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1673 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1676#if KMP_AFFINITY_SUPPORTED
1677 __kmp_partition_places(parent_team);
1680 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1681 "master_th=%p, gtid=%d\n",
1682 root, parent_team, master_th, gtid));
1683 __kmp_internal_fork(loc, gtid, parent_team);
1684 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1685 "master_th=%p, gtid=%d\n",
1686 root, parent_team, master_th, gtid));
1688 if (call_context == fork_context_gnu)
1692 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1693 parent_team->t.t_id, parent_team->t.t_pkfn));
1695 if (!parent_team->t.t_invoke(gtid)) {
1696 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1698 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1699 parent_team->t.t_id, parent_team->t.t_pkfn));
1702 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1709__kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1710 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1711 kmp_info_t *master_th, kmp_team_t *parent_team,
1713 ompt_data_t *ompt_parallel_data,
void **return_address,
1714 ompt_data_t **parent_task_data,
1722#if KMP_OS_LINUX && \
1723 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1726 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1731 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1736 master_th->th.th_serial_team->t.t_pkfn = microtask;
1739 if (call_context == fork_context_intel) {
1741 master_th->th.th_serial_team->t.t_ident = loc;
1744 master_th->th.th_serial_team->t.t_level--;
1749 void **exit_frame_p;
1750 ompt_task_info_t *task_info;
1751 ompt_lw_taskteam_t lw_taskteam;
1753 if (ompt_enabled.enabled) {
1754 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1755 ompt_parallel_data, *return_address);
1757 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1759 task_info = OMPT_CUR_TASK_INFO(master_th);
1760 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1761 if (ompt_enabled.ompt_callback_implicit_task) {
1762 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1763 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1764 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1765 &(task_info->task_data), 1,
1766 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1770 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1772 exit_frame_p = &dummy;
1777 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1778 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1779 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1788 if (ompt_enabled.enabled) {
1789 *exit_frame_p = NULL;
1790 if (ompt_enabled.ompt_callback_implicit_task) {
1791 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1792 ompt_scope_end, NULL, &(task_info->task_data), 1,
1793 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1795 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1796 __ompt_lw_taskteam_unlink(master_th);
1797 if (ompt_enabled.ompt_callback_parallel_end) {
1798 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1799 ompt_parallel_data, *parent_task_data,
1800 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1802 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1805 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1806 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1807 team = master_th->th.th_team;
1809 team->t.t_invoke = invoker;
1810 __kmp_alloc_argv_entries(argc, team, TRUE);
1811 team->t.t_argc = argc;
1812 argv = (
void **)team->t.t_argv;
1813 for (i = argc - 1; i >= 0; --i)
1814 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1821 if (ompt_enabled.enabled) {
1822 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1823 if (ompt_enabled.ompt_callback_implicit_task) {
1824 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1825 ompt_scope_end, NULL, &(task_info->task_data), 0,
1826 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1828 if (ompt_enabled.ompt_callback_parallel_end) {
1829 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1830 ompt_parallel_data, *parent_task_data,
1831 OMPT_INVOKER(call_context) | ompt_parallel_league,
1834 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1839 for (i = argc - 1; i >= 0; --i)
1840 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1845 void **exit_frame_p;
1846 ompt_task_info_t *task_info;
1847 ompt_lw_taskteam_t lw_taskteam;
1848 ompt_data_t *implicit_task_data;
1850 if (ompt_enabled.enabled) {
1851 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1852 ompt_parallel_data, *return_address);
1853 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1855 task_info = OMPT_CUR_TASK_INFO(master_th);
1856 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1859 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1860 if (ompt_enabled.ompt_callback_implicit_task) {
1861 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1862 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1863 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1864 ompt_task_implicit);
1865 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1869 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1871 exit_frame_p = &dummy;
1876 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1877 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1878 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1887 if (ompt_enabled.enabled) {
1888 *exit_frame_p = NULL;
1889 if (ompt_enabled.ompt_callback_implicit_task) {
1890 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1891 ompt_scope_end, NULL, &(task_info->task_data), 1,
1892 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1895 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1896 __ompt_lw_taskteam_unlink(master_th);
1897 if (ompt_enabled.ompt_callback_parallel_end) {
1898 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1899 ompt_parallel_data, *parent_task_data,
1900 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1902 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1906 }
else if (call_context == fork_context_gnu) {
1908 if (ompt_enabled.enabled) {
1909 ompt_lw_taskteam_t lwt;
1910 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1913 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1914 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1920 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1923 KMP_ASSERT2(call_context < fork_context_last,
1924 "__kmp_serial_fork_call: unknown fork_context parameter");
1927 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1934int __kmp_fork_call(
ident_t *loc,
int gtid,
1935 enum fork_context_e call_context,
1936 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1941 int master_this_cons;
1943 kmp_team_t *parent_team;
1944 kmp_info_t *master_th;
1948 int master_set_numthreads;
1949 int task_thread_limit = 0;
1953 kmp_hot_team_ptr_t **p_hot_teams;
1955 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1958 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1959 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1962 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1964 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1965 __kmp_stkpadding += (short)((kmp_int64)dummy);
1971 if (!TCR_4(__kmp_init_parallel))
1972 __kmp_parallel_initialize();
1973 __kmp_resume_if_soft_paused();
1978 master_th = __kmp_threads[gtid];
1980 parent_team = master_th->th.th_team;
1981 master_tid = master_th->th.th_info.ds.ds_tid;
1982 master_this_cons = master_th->th.th_local.this_construct;
1983 root = master_th->th.th_root;
1984 master_active = root->r.r_active;
1985 master_set_numthreads = master_th->th.th_set_nproc;
1987 master_th->th.th_current_task->td_icvs.task_thread_limit;
1990 ompt_data_t ompt_parallel_data = ompt_data_none;
1991 ompt_data_t *parent_task_data = NULL;
1992 ompt_frame_t *ompt_frame = NULL;
1993 void *return_address = NULL;
1995 if (ompt_enabled.enabled) {
1996 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1998 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
2003 __kmp_assign_root_init_mask();
2006 level = parent_team->t.t_level;
2008 active_level = parent_team->t.t_active_level;
2010 teams_level = master_th->th.th_teams_level;
2011 p_hot_teams = &master_th->th.th_hot_teams;
2012 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
2013 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
2014 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
2015 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
2017 (*p_hot_teams)[0].hot_team_nth = 1;
2021 if (ompt_enabled.enabled) {
2022 if (ompt_enabled.ompt_callback_parallel_begin) {
2023 int team_size = master_set_numthreads
2024 ? master_set_numthreads
2025 : get__nproc_2(parent_team, master_tid);
2026 int flags = OMPT_INVOKER(call_context) |
2027 ((microtask == (microtask_t)__kmp_teams_master)
2028 ? ompt_parallel_league
2029 : ompt_parallel_team);
2030 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
2031 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
2034 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2038 master_th->th.th_ident = loc;
2041 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
2042 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
2043 call_context, microtask, invoker,
2044 master_set_numthreads, level,
2046 ompt_parallel_data, return_address,
2055 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(parent_team, master_th);
2059 __kmp_is_entering_teams(active_level, level, teams_level, ap);
2060 if ((!enter_teams &&
2061 (parent_team->t.t_active_level >=
2062 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
2063 (__kmp_library == library_serial)) {
2064 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
2067 nthreads = master_set_numthreads
2068 ? master_set_numthreads
2070 : get__nproc_2(parent_team, master_tid);
2073 nthreads = task_thread_limit > 0 && task_thread_limit < nthreads
2080 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2085 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2086 nthreads, enter_teams);
2087 if (nthreads == 1) {
2091 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2095 KMP_DEBUG_ASSERT(nthreads > 0);
2098 master_th->th.th_set_nproc = 0;
2100 if (nthreads == 1) {
2101 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2102 invoker, master_th, parent_team,
2104 &ompt_parallel_data, &return_address,
2112 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2113 "curtask=%p, curtask_max_aclevel=%d\n",
2114 parent_team->t.t_active_level, master_th,
2115 master_th->th.th_current_task,
2116 master_th->th.th_current_task->td_icvs.max_active_levels));
2120 master_th->th.th_current_task->td_flags.executing = 0;
2122 if (!master_th->th.th_teams_microtask || level > teams_level) {
2124 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2128 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2129 kmp_nested_nthreads_t *nested_nth = NULL;
2130 if (!master_th->th.th_set_nested_nth &&
2131 (level + 1 < parent_team->t.t_nested_nth->used) &&
2132 (parent_team->t.t_nested_nth->nth[level + 1] != nthreads_icv)) {
2133 nthreads_icv = parent_team->t.t_nested_nth->nth[level + 1];
2134 }
else if (master_th->th.th_set_nested_nth) {
2135 nested_nth = __kmp_override_nested_nth(master_th, level);
2136 if ((level + 1 < nested_nth->used) &&
2137 (nested_nth->nth[level + 1] != nthreads_icv))
2138 nthreads_icv = nested_nth->nth[level + 1];
2146 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2148 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2149 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2150 proc_bind = proc_bind_false;
2154 if (proc_bind == proc_bind_default) {
2155 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2158 if (master_th->th.th_teams_microtask &&
2159 microtask == (microtask_t)__kmp_teams_master) {
2160 proc_bind = __kmp_teams_proc_bind;
2166 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2167 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2168 master_th->th.th_current_task->td_icvs.proc_bind)) {
2171 if (!master_th->th.th_teams_microtask ||
2172 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2173 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2178 master_th->th.th_set_proc_bind = proc_bind_default;
2180 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2181 kmp_internal_control_t new_icvs;
2182 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2183 new_icvs.next = NULL;
2184 if (nthreads_icv > 0) {
2185 new_icvs.nproc = nthreads_icv;
2187 if (proc_bind_icv != proc_bind_default) {
2188 new_icvs.proc_bind = proc_bind_icv;
2192 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2193 team = __kmp_allocate_team(root, nthreads, nthreads,
2197 proc_bind, &new_icvs, argc, master_th);
2198 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2199 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2202 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2203 team = __kmp_allocate_team(
2204 root, nthreads, nthreads,
2208 proc_bind, &master_th->th.th_current_task->td_icvs, argc, master_th);
2209 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2210 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2211 &master_th->th.th_current_task->td_icvs);
2214 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2217 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2218 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2219 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2220 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2221 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2223 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2226 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2228 if (!master_th->th.th_teams_microtask || level > teams_level) {
2229 int new_level = parent_team->t.t_level + 1;
2230 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2231 new_level = parent_team->t.t_active_level + 1;
2232 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2235 int new_level = parent_team->t.t_level;
2236 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2237 new_level = parent_team->t.t_active_level;
2238 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2240 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2242 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2244 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2245 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2248 if (team->t.t_nested_nth &&
2249 team->t.t_nested_nth != parent_team->t.t_nested_nth) {
2250 KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
2251 KMP_INTERNAL_FREE(team->t.t_nested_nth);
2252 team->t.t_nested_nth = NULL;
2254 team->t.t_nested_nth = parent_team->t.t_nested_nth;
2255 if (master_th->th.th_set_nested_nth) {
2257 nested_nth = __kmp_override_nested_nth(master_th, level);
2258 team->t.t_nested_nth = nested_nth;
2259 KMP_INTERNAL_FREE(master_th->th.th_set_nested_nth);
2260 master_th->th.th_set_nested_nth = NULL;
2261 master_th->th.th_set_nested_nth_sz = 0;
2262 master_th->th.th_nt_strict =
false;
2266 propagateFPControl(team);
2268 if (ompd_state & OMPD_ENABLE_BP)
2269 ompd_bp_parallel_begin();
2274 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2275 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2277 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2278 (team->t.t_master_tid == 0 &&
2279 (team->t.t_parent == root->r.r_root_team ||
2280 team->t.t_parent->t.t_serialized)));
2284 argv = (
void **)team->t.t_argv;
2286 for (i = argc - 1; i >= 0; --i) {
2287 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2288 KMP_CHECK_UPDATE(*argv, new_argv);
2292 for (i = 0; i < argc; ++i) {
2294 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2299 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2300 if (!root->r.r_active)
2301 root->r.r_active = TRUE;
2303 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2304 __kmp_setup_icv_copy(team, nthreads,
2305 &master_th->th.th_current_task->td_icvs, loc);
2308 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2311 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2314 if (team->t.t_active_level == 1
2315 && !master_th->th.th_teams_microtask) {
2317 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2318 (__kmp_forkjoin_frames_mode == 3 ||
2319 __kmp_forkjoin_frames_mode == 1)) {
2320 kmp_uint64 tmp_time = 0;
2321 if (__itt_get_timestamp_ptr)
2322 tmp_time = __itt_get_timestamp();
2324 master_th->th.th_frame_time = tmp_time;
2325 if (__kmp_forkjoin_frames_mode == 3)
2326 team->t.t_region_time = tmp_time;
2330 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2331 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2333 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2339 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2342 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2343 root, team, master_th, gtid));
2346 if (__itt_stack_caller_create_ptr) {
2349 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2350 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2351 }
else if (parent_team->t.t_serialized) {
2356 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2357 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2365 __kmp_internal_fork(loc, gtid, team);
2366 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2367 "master_th=%p, gtid=%d\n",
2368 root, team, master_th, gtid));
2371 if (call_context == fork_context_gnu) {
2372 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2377 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2378 team->t.t_id, team->t.t_pkfn));
2381#if KMP_STATS_ENABLED
2385 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2389 if (!team->t.t_invoke(gtid)) {
2390 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2393#if KMP_STATS_ENABLED
2396 KMP_SET_THREAD_STATE(previous_state);
2400 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2401 team->t.t_id, team->t.t_pkfn));
2404 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2406 if (ompt_enabled.enabled) {
2407 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2415static inline void __kmp_join_restore_state(kmp_info_t *thread,
2418 thread->th.ompt_thread_info.state =
2419 ((team->t.t_serialized) ? ompt_state_work_serial
2420 : ompt_state_work_parallel);
2423static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2424 kmp_team_t *team, ompt_data_t *parallel_data,
2425 int flags,
void *codeptr) {
2426 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2427 if (ompt_enabled.ompt_callback_parallel_end) {
2428 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2429 parallel_data, &(task_info->task_data), flags, codeptr);
2432 task_info->frame.enter_frame = ompt_data_none;
2433 __kmp_join_restore_state(thread, team);
2437void __kmp_join_call(
ident_t *loc,
int gtid
2440 enum fork_context_e fork_context
2444 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2446 kmp_team_t *parent_team;
2447 kmp_info_t *master_th;
2451 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2454 master_th = __kmp_threads[gtid];
2455 root = master_th->th.th_root;
2456 team = master_th->th.th_team;
2457 parent_team = team->t.t_parent;
2459 master_th->th.th_ident = loc;
2462 void *team_microtask = (
void *)team->t.t_pkfn;
2466 if (ompt_enabled.enabled &&
2467 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2468 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2473 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2474 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2475 "th_task_team = %p\n",
2476 __kmp_gtid_from_thread(master_th), team,
2477 team->t.t_task_team[master_th->th.th_task_state],
2478 master_th->th.th_task_team));
2479 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, master_th);
2483 if (team->t.t_serialized) {
2484 if (master_th->th.th_teams_microtask) {
2486 int level = team->t.t_level;
2487 int tlevel = master_th->th.th_teams_level;
2488 if (level == tlevel) {
2492 }
else if (level == tlevel + 1) {
2496 team->t.t_serialized++;
2502 if (ompt_enabled.enabled) {
2503 if (fork_context == fork_context_gnu) {
2504 __ompt_lw_taskteam_unlink(master_th);
2506 __kmp_join_restore_state(master_th, parent_team);
2513 master_active = team->t.t_master_active;
2518 __kmp_internal_join(loc, gtid, team);
2520 if (__itt_stack_caller_create_ptr) {
2521 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2523 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2524 team->t.t_stack_id = NULL;
2528 master_th->th.th_task_state =
2531 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2532 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2536 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2537 parent_team->t.t_stack_id = NULL;
2545 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2546 void *codeptr = team->t.ompt_team_info.master_return_address;
2551 if (team->t.t_active_level == 1 &&
2552 (!master_th->th.th_teams_microtask ||
2553 master_th->th.th_teams_size.nteams == 1)) {
2554 master_th->th.th_ident = loc;
2557 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2558 __kmp_forkjoin_frames_mode == 3)
2559 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2560 master_th->th.th_frame_time, 0, loc,
2561 master_th->th.th_team_nproc, 1);
2562 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2563 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2564 __kmp_itt_region_joined(gtid);
2568#if KMP_AFFINITY_SUPPORTED
2571 master_th->th.th_first_place = team->t.t_first_place;
2572 master_th->th.th_last_place = team->t.t_last_place;
2576 if (master_th->th.th_teams_microtask && !exit_teams &&
2577 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2578 team->t.t_level == master_th->th.th_teams_level + 1) {
2583 ompt_data_t ompt_parallel_data = ompt_data_none;
2584 if (ompt_enabled.enabled) {
2585 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2586 if (ompt_enabled.ompt_callback_implicit_task) {
2587 int ompt_team_size = team->t.t_nproc;
2588 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2589 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2590 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2592 task_info->frame.exit_frame = ompt_data_none;
2593 task_info->task_data = ompt_data_none;
2594 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2595 __ompt_lw_taskteam_unlink(master_th);
2600 team->t.t_active_level--;
2601 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2607 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2608 int old_num = master_th->th.th_team_nproc;
2609 int new_num = master_th->th.th_teams_size.nth;
2610 kmp_info_t **other_threads = team->t.t_threads;
2611 team->t.t_nproc = new_num;
2612 for (
int i = 0; i < old_num; ++i) {
2613 other_threads[i]->th.th_team_nproc = new_num;
2616 for (
int i = old_num; i < new_num; ++i) {
2618 KMP_DEBUG_ASSERT(other_threads[i]);
2619 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2620 for (
int b = 0; b < bs_last_barrier; ++b) {
2621 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2622 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2624 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2627 if (__kmp_tasking_mode != tskm_immediate_exec) {
2629 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2635 if (ompt_enabled.enabled) {
2636 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2637 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2645 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2646 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2648 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2653 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2655 if (!master_th->th.th_teams_microtask ||
2656 team->t.t_level > master_th->th.th_teams_level) {
2658 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2660 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2663 if (ompt_enabled.enabled) {
2664 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2665 if (ompt_enabled.ompt_callback_implicit_task) {
2666 int flags = (team_microtask == (
void *)__kmp_teams_master)
2668 : ompt_task_implicit;
2669 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2670 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2671 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2672 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2674 task_info->frame.exit_frame = ompt_data_none;
2675 task_info->task_data = ompt_data_none;
2679 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2681 __kmp_pop_current_task_from_thread(master_th);
2683 master_th->th.th_def_allocator = team->t.t_def_allocator;
2686 if (ompd_state & OMPD_ENABLE_BP)
2687 ompd_bp_parallel_end();
2689 updateHWFPControl(team);
2691 if (root->r.r_active != master_active)
2692 root->r.r_active = master_active;
2694 __kmp_free_team(root, team, master_th);
2701 master_th->th.th_team = parent_team;
2702 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2703 master_th->th.th_team_master = parent_team->t.t_threads[0];
2704 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2707 if (parent_team->t.t_serialized &&
2708 parent_team != master_th->th.th_serial_team &&
2709 parent_team != root->r.r_root_team) {
2710 __kmp_free_team(root, master_th->th.th_serial_team, NULL);
2711 master_th->th.th_serial_team = parent_team;
2714 if (__kmp_tasking_mode != tskm_immediate_exec) {
2716 KMP_DEBUG_ASSERT(team->t.t_primary_task_state == 0 ||
2717 team->t.t_primary_task_state == 1);
2718 master_th->th.th_task_state = (kmp_uint8)team->t.t_primary_task_state;
2721 master_th->th.th_task_team =
2722 parent_team->t.t_task_team[master_th->th.th_task_state];
2724 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2725 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2732 master_th->th.th_current_task->td_flags.executing = 1;
2734 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2736#if KMP_AFFINITY_SUPPORTED
2737 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2738 __kmp_reset_root_init_mask(gtid);
2743 OMPT_INVOKER(fork_context) |
2744 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2745 : ompt_parallel_team);
2746 if (ompt_enabled.enabled) {
2747 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2753 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2758void __kmp_save_internal_controls(kmp_info_t *thread) {
2760 if (thread->th.th_team != thread->th.th_serial_team) {
2763 if (thread->th.th_team->t.t_serialized > 1) {
2766 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2769 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2770 thread->th.th_team->t.t_serialized) {
2775 kmp_internal_control_t *control =
2776 (kmp_internal_control_t *)__kmp_allocate(
2777 sizeof(kmp_internal_control_t));
2779 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2781 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2783 control->next = thread->th.th_team->t.t_control_stack_top;
2784 thread->th.th_team->t.t_control_stack_top = control;
2790void __kmp_set_num_threads(
int new_nth,
int gtid) {
2794 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2795 KMP_DEBUG_ASSERT(__kmp_init_serial);
2799 else if (new_nth > __kmp_max_nth)
2800 new_nth = __kmp_max_nth;
2803 thread = __kmp_threads[gtid];
2804 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2807 __kmp_save_internal_controls(thread);
2809 set__nproc(thread, new_nth);
2814 root = thread->th.th_root;
2815 if (__kmp_init_parallel && (!root->r.r_active) &&
2816 (root->r.r_hot_team->t.t_nproc > new_nth) && __kmp_hot_teams_max_level &&
2817 !__kmp_hot_teams_mode) {
2818 kmp_team_t *hot_team = root->r.r_hot_team;
2821 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2823 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2824 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2827 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2828 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2829 if (__kmp_tasking_mode != tskm_immediate_exec) {
2832 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2834 __kmp_free_thread(hot_team->t.t_threads[f]);
2835 hot_team->t.t_threads[f] = NULL;
2837 hot_team->t.t_nproc = new_nth;
2838 if (thread->th.th_hot_teams) {
2839 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2840 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2843 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2844 hot_team->t.b->update_num_threads(new_nth);
2845 __kmp_add_threads_to_team(hot_team, new_nth);
2848 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2851 for (f = 0; f < new_nth; f++) {
2852 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2853 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2856 hot_team->t.t_size_changed = -1;
2861void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2864 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2866 gtid, max_active_levels));
2867 KMP_DEBUG_ASSERT(__kmp_init_serial);
2870 if (max_active_levels < 0) {
2871 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2876 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2877 "max_active_levels for thread %d = (%d)\n",
2878 gtid, max_active_levels));
2881 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2886 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2887 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2888 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2894 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2895 "max_active_levels for thread %d = (%d)\n",
2896 gtid, max_active_levels));
2898 thread = __kmp_threads[gtid];
2900 __kmp_save_internal_controls(thread);
2902 set__max_active_levels(thread, max_active_levels);
2906int __kmp_get_max_active_levels(
int gtid) {
2909 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2910 KMP_DEBUG_ASSERT(__kmp_init_serial);
2912 thread = __kmp_threads[gtid];
2913 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2914 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2915 "curtask_maxaclevel=%d\n",
2916 gtid, thread->th.th_current_task,
2917 thread->th.th_current_task->td_icvs.max_active_levels));
2918 return thread->th.th_current_task->td_icvs.max_active_levels;
2922void __kmp_set_num_teams(
int num_teams) {
2924 __kmp_nteams = num_teams;
2926int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2928void __kmp_set_teams_thread_limit(
int limit) {
2930 __kmp_teams_thread_limit = limit;
2932int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2934KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2935KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2938void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2940 kmp_sched_t orig_kind;
2943 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2944 gtid, (
int)kind, chunk));
2945 KMP_DEBUG_ASSERT(__kmp_init_serial);
2952 kind = __kmp_sched_without_mods(kind);
2954 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2955 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2957 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2958 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2960 kind = kmp_sched_default;
2964 thread = __kmp_threads[gtid];
2966 __kmp_save_internal_controls(thread);
2968 if (kind < kmp_sched_upper_std) {
2969 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2972 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2974 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2975 __kmp_sch_map[kind - kmp_sched_lower - 1];
2980 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2981 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2982 kmp_sched_lower - 2];
2984 __kmp_sched_apply_mods_intkind(
2985 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2986 if (kind == kmp_sched_auto || chunk < 1) {
2988 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2990 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2995void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2999 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
3000 KMP_DEBUG_ASSERT(__kmp_init_serial);
3002 thread = __kmp_threads[gtid];
3004 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
3005 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
3007 case kmp_sch_static_greedy:
3008 case kmp_sch_static_balanced:
3009 *kind = kmp_sched_static;
3010 __kmp_sched_apply_mods_stdkind(kind, th_type);
3013 case kmp_sch_static_chunked:
3014 *kind = kmp_sched_static;
3016 case kmp_sch_dynamic_chunked:
3017 *kind = kmp_sched_dynamic;
3020 case kmp_sch_guided_iterative_chunked:
3021 case kmp_sch_guided_analytical_chunked:
3022 *kind = kmp_sched_guided;
3025 *kind = kmp_sched_auto;
3027 case kmp_sch_trapezoidal:
3028 *kind = kmp_sched_trapezoidal;
3030#if KMP_STATIC_STEAL_ENABLED
3031 case kmp_sch_static_steal:
3032 *kind = kmp_sched_static_steal;
3036 KMP_FATAL(UnknownSchedulingType, th_type);
3039 __kmp_sched_apply_mods_stdkind(kind, th_type);
3040 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3043int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
3049 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3050 KMP_DEBUG_ASSERT(__kmp_init_serial);
3057 thr = __kmp_threads[gtid];
3058 team = thr->th.th_team;
3059 ii = team->t.t_level;
3063 if (thr->th.th_teams_microtask) {
3065 int tlevel = thr->th.th_teams_level;
3068 KMP_DEBUG_ASSERT(ii >= tlevel);
3080 return __kmp_tid_from_gtid(gtid);
3082 dd = team->t.t_serialized;
3084 while (ii > level) {
3085 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3087 if ((team->t.t_serialized) && (!dd)) {
3088 team = team->t.t_parent;
3092 team = team->t.t_parent;
3093 dd = team->t.t_serialized;
3098 return (dd > 1) ? (0) : (team->t.t_master_tid);
3101int __kmp_get_team_size(
int gtid,
int level) {
3107 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3108 KMP_DEBUG_ASSERT(__kmp_init_serial);
3115 thr = __kmp_threads[gtid];
3116 team = thr->th.th_team;
3117 ii = team->t.t_level;
3121 if (thr->th.th_teams_microtask) {
3123 int tlevel = thr->th.th_teams_level;
3126 KMP_DEBUG_ASSERT(ii >= tlevel);
3137 while (ii > level) {
3138 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3140 if (team->t.t_serialized && (!dd)) {
3141 team = team->t.t_parent;
3145 team = team->t.t_parent;
3150 return team->t.t_nproc;
3153kmp_r_sched_t __kmp_get_schedule_global() {
3158 kmp_r_sched_t r_sched;
3164 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3165 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3168 r_sched.r_sched_type = __kmp_static;
3171 r_sched.r_sched_type = __kmp_guided;
3173 r_sched.r_sched_type = __kmp_sched;
3175 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3177 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3179 r_sched.chunk = KMP_DEFAULT_CHUNK;
3181 r_sched.chunk = __kmp_chunk;
3189static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3191 KMP_DEBUG_ASSERT(team);
3192 if (!realloc || argc > team->t.t_max_argc) {
3194 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3195 "current entries=%d\n",
3196 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3198 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3199 __kmp_free((
void *)team->t.t_argv);
3201 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3203 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3204 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3206 team->t.t_id, team->t.t_max_argc));
3207 team->t.t_argv = &team->t.t_inline_argv[0];
3208 if (__kmp_storage_map) {
3209 __kmp_print_storage_map_gtid(
3210 -1, &team->t.t_inline_argv[0],
3211 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3212 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3217 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3218 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3220 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3222 team->t.t_id, team->t.t_max_argc));
3224 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3225 if (__kmp_storage_map) {
3226 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3227 &team->t.t_argv[team->t.t_max_argc],
3228 sizeof(
void *) * team->t.t_max_argc,
3229 "team_%d.t_argv", team->t.t_id);
3235static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3237 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3239 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3240 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3241 sizeof(dispatch_shared_info_t) * num_disp_buff);
3242 team->t.t_dispatch =
3243 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3244 team->t.t_implicit_task_taskdata =
3245 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3246 team->t.t_max_nproc = max_nth;
3249 for (i = 0; i < num_disp_buff; ++i) {
3250 team->t.t_disp_buffer[i].buffer_index = i;
3251 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3255static void __kmp_free_team_arrays(kmp_team_t *team) {
3258 for (i = 0; i < team->t.t_max_nproc; ++i) {
3259 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3260 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3261 team->t.t_dispatch[i].th_disp_buffer = NULL;
3264#if KMP_USE_HIER_SCHED
3265 __kmp_dispatch_free_hierarchies(team);
3267 __kmp_free(team->t.t_threads);
3268 __kmp_free(team->t.t_disp_buffer);
3269 __kmp_free(team->t.t_dispatch);
3270 __kmp_free(team->t.t_implicit_task_taskdata);
3271 team->t.t_threads = NULL;
3272 team->t.t_disp_buffer = NULL;
3273 team->t.t_dispatch = NULL;
3274 team->t.t_implicit_task_taskdata = 0;
3277static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3278 kmp_info_t **oldThreads = team->t.t_threads;
3280 __kmp_free(team->t.t_disp_buffer);
3281 __kmp_free(team->t.t_dispatch);
3282 __kmp_free(team->t.t_implicit_task_taskdata);
3283 __kmp_allocate_team_arrays(team, max_nth);
3285 KMP_MEMCPY(team->t.t_threads, oldThreads,
3286 team->t.t_nproc *
sizeof(kmp_info_t *));
3288 __kmp_free(oldThreads);
3291static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3293 kmp_r_sched_t r_sched =
3294 __kmp_get_schedule_global();
3296 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3298 kmp_internal_control_t g_icvs = {
3300 (kmp_int8)__kmp_global.g.g_dynamic,
3302 (kmp_int8)__kmp_env_blocktime,
3304 __kmp_dflt_blocktime,
3309 __kmp_dflt_team_nth,
3315 __kmp_dflt_max_active_levels,
3319 __kmp_nested_proc_bind.bind_types[0],
3320 __kmp_default_device,
3327static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3329 kmp_internal_control_t gx_icvs;
3330 gx_icvs.serial_nesting_level =
3332 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3333 gx_icvs.next = NULL;
3338static void __kmp_initialize_root(kmp_root_t *root) {
3340 kmp_team_t *root_team;
3341 kmp_team_t *hot_team;
3342 int hot_team_max_nth;
3343 kmp_r_sched_t r_sched =
3344 __kmp_get_schedule_global();
3345 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3346 KMP_DEBUG_ASSERT(root);
3347 KMP_ASSERT(!root->r.r_begin);
3350 __kmp_init_lock(&root->r.r_begin_lock);
3351 root->r.r_begin = FALSE;
3352 root->r.r_active = FALSE;
3353 root->r.r_in_parallel = 0;
3354 root->r.r_blocktime = __kmp_dflt_blocktime;
3355#if KMP_AFFINITY_SUPPORTED
3356 root->r.r_affinity_assigned = FALSE;
3361 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3363 root_team = __kmp_allocate_team(root,
3369 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3376 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3379 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3381 root->r.r_root_team = root_team;
3382 root_team->t.t_control_stack_top = NULL;
3385 root_team->t.t_threads[0] = NULL;
3386 root_team->t.t_nproc = 1;
3387 root_team->t.t_serialized = 1;
3389 root_team->t.t_sched.sched = r_sched.sched;
3390 root_team->t.t_nested_nth = &__kmp_nested_nth;
3393 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3394 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3398 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3400 hot_team = __kmp_allocate_team(root,
3402 __kmp_dflt_team_nth_ub * 2,
3406 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3410 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3412 root->r.r_hot_team = hot_team;
3413 root_team->t.t_control_stack_top = NULL;
3416 hot_team->t.t_parent = root_team;
3419 hot_team_max_nth = hot_team->t.t_max_nproc;
3420 for (f = 0; f < hot_team_max_nth; ++f) {
3421 hot_team->t.t_threads[f] = NULL;
3423 hot_team->t.t_nproc = 1;
3425 hot_team->t.t_sched.sched = r_sched.sched;
3426 hot_team->t.t_size_changed = 0;
3427 hot_team->t.t_nested_nth = &__kmp_nested_nth;
3432typedef struct kmp_team_list_item {
3433 kmp_team_p
const *entry;
3434 struct kmp_team_list_item *next;
3435} kmp_team_list_item_t;
3436typedef kmp_team_list_item_t *kmp_team_list_t;
3438static void __kmp_print_structure_team_accum(
3439 kmp_team_list_t list,
3440 kmp_team_p
const *team
3450 KMP_DEBUG_ASSERT(list != NULL);
3455 __kmp_print_structure_team_accum(list, team->t.t_parent);
3456 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3460 while (l->next != NULL && l->entry != team) {
3463 if (l->next != NULL) {
3469 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3475 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3476 sizeof(kmp_team_list_item_t));
3483static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3486 __kmp_printf(
"%s", title);
3488 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3490 __kmp_printf(
" - (nil)\n");
3494static void __kmp_print_structure_thread(
char const *title,
3495 kmp_info_p
const *thread) {
3496 __kmp_printf(
"%s", title);
3497 if (thread != NULL) {
3498 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3500 __kmp_printf(
" - (nil)\n");
3504void __kmp_print_structure(
void) {
3506 kmp_team_list_t list;
3510 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3514 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3515 "Table\n------------------------------\n");
3518 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3519 __kmp_printf(
"%2d", gtid);
3520 if (__kmp_threads != NULL) {
3521 __kmp_printf(
" %p", __kmp_threads[gtid]);
3523 if (__kmp_root != NULL) {
3524 __kmp_printf(
" %p", __kmp_root[gtid]);
3531 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3533 if (__kmp_threads != NULL) {
3535 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3536 kmp_info_t
const *thread = __kmp_threads[gtid];
3537 if (thread != NULL) {
3538 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3539 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3540 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3541 __kmp_print_structure_team(
" Serial Team: ",
3542 thread->th.th_serial_team);
3543 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3544 __kmp_print_structure_thread(
" Primary: ",
3545 thread->th.th_team_master);
3546 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3547 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3548 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3549 __kmp_print_structure_thread(
" Next in pool: ",
3550 thread->th.th_next_pool);
3552 __kmp_print_structure_team_accum(list, thread->th.th_team);
3553 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3557 __kmp_printf(
"Threads array is not allocated.\n");
3561 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3563 if (__kmp_root != NULL) {
3565 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3566 kmp_root_t
const *root = __kmp_root[gtid];
3568 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3569 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3570 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3571 __kmp_print_structure_thread(
" Uber Thread: ",
3572 root->r.r_uber_thread);
3573 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3574 __kmp_printf(
" In Parallel: %2d\n",
3575 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3577 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3578 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3582 __kmp_printf(
"Ubers array is not allocated.\n");
3585 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3587 while (list->next != NULL) {
3588 kmp_team_p
const *team = list->entry;
3590 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3591 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3592 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3593 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3594 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3595 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3596 for (i = 0; i < team->t.t_nproc; ++i) {
3597 __kmp_printf(
" Thread %2d: ", i);
3598 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3600 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3606 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3608 __kmp_print_structure_thread(
"Thread pool: ",
3609 CCAST(kmp_info_t *, __kmp_thread_pool));
3610 __kmp_print_structure_team(
"Team pool: ",
3611 CCAST(kmp_team_t *, __kmp_team_pool));
3615 while (list != NULL) {
3616 kmp_team_list_item_t *item = list;
3618 KMP_INTERNAL_FREE(item);
3627static const unsigned __kmp_primes[] = {
3628 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3629 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3630 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3631 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3632 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3633 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3634 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3635 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3636 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3637 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3638 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3642unsigned short __kmp_get_random(kmp_info_t *thread) {
3643 unsigned x = thread->th.th_x;
3644 unsigned short r = (
unsigned short)(x >> 16);
3646 thread->th.th_x = x * thread->th.th_a + 1;
3648 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3649 thread->th.th_info.ds.ds_tid, r));
3655void __kmp_init_random(kmp_info_t *thread) {
3656 unsigned seed = thread->th.th_info.ds.ds_tid;
3659 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3660 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3662 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3668static int __kmp_reclaim_dead_roots(
void) {
3671 for (i = 0; i < __kmp_threads_capacity; ++i) {
3672 if (KMP_UBER_GTID(i) &&
3673 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3676 r += __kmp_unregister_root_other_thread(i);
3701static int __kmp_expand_threads(
int nNeed) {
3703 int minimumRequiredCapacity;
3705 kmp_info_t **newThreads;
3706 kmp_root_t **newRoot;
3712#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3715 added = __kmp_reclaim_dead_roots();
3744 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3747 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3751 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3753 newCapacity = __kmp_threads_capacity;
3755 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3756 : __kmp_sys_max_nth;
3757 }
while (newCapacity < minimumRequiredCapacity);
3758 newThreads = (kmp_info_t **)__kmp_allocate(
3759 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3761 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3762 KMP_MEMCPY(newThreads, __kmp_threads,
3763 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3764 KMP_MEMCPY(newRoot, __kmp_root,
3765 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3768 kmp_old_threads_list_t *node =
3769 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3770 node->threads = __kmp_threads;
3771 node->next = __kmp_old_threads_list;
3772 __kmp_old_threads_list = node;
3774 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3775 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3776 added += newCapacity - __kmp_threads_capacity;
3777 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3779 if (newCapacity > __kmp_tp_capacity) {
3780 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3781 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3782 __kmp_threadprivate_resize_cache(newCapacity);
3784 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3786 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3795int __kmp_register_root(
int initial_thread) {
3796 kmp_info_t *root_thread;
3800 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3801 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3818 capacity = __kmp_threads_capacity;
3819 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3826 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3827 capacity -= __kmp_hidden_helper_threads_num;
3831 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3832 if (__kmp_tp_cached) {
3833 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3834 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3835 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3837 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3847 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3850 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3851 gtid <= __kmp_hidden_helper_threads_num;
3854 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3855 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3856 "hidden helper thread: T#%d\n",
3862 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3865 for (gtid = __kmp_hidden_helper_threads_num + 1;
3866 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3870 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3871 KMP_ASSERT(gtid < __kmp_threads_capacity);
3876 TCW_4(__kmp_nth, __kmp_nth + 1);
3880 if (__kmp_adjust_gtid_mode) {
3881 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3882 if (TCR_4(__kmp_gtid_mode) != 2) {
3883 TCW_4(__kmp_gtid_mode, 2);
3886 if (TCR_4(__kmp_gtid_mode) != 1) {
3887 TCW_4(__kmp_gtid_mode, 1);
3892#ifdef KMP_ADJUST_BLOCKTIME
3895 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3896 if (__kmp_nth > __kmp_avail_proc) {
3897 __kmp_zero_bt = TRUE;
3903 if (!(root = __kmp_root[gtid])) {
3904 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3905 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3908#if KMP_STATS_ENABLED
3910 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3911 __kmp_stats_thread_ptr->startLife();
3912 KMP_SET_THREAD_STATE(SERIAL_REGION);
3915 __kmp_initialize_root(root);
3918 if (root->r.r_uber_thread) {
3919 root_thread = root->r.r_uber_thread;
3921 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3922 if (__kmp_storage_map) {
3923 __kmp_print_thread_storage_map(root_thread, gtid);
3925 root_thread->th.th_info.ds.ds_gtid = gtid;
3927 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3929 root_thread->th.th_root = root;
3930 if (__kmp_env_consistency_check) {
3931 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3934 __kmp_initialize_fast_memory(root_thread);
3938 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3939 __kmp_initialize_bget(root_thread);
3941 __kmp_init_random(root_thread);
3945 if (!root_thread->th.th_serial_team) {
3946 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3947 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3948 root_thread->th.th_serial_team =
3949 __kmp_allocate_team(root, 1, 1,
3953 proc_bind_default, &r_icvs, 0, NULL);
3955 KMP_ASSERT(root_thread->th.th_serial_team);
3956 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3957 root_thread->th.th_serial_team));
3960 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3962 root->r.r_root_team->t.t_threads[0] = root_thread;
3963 root->r.r_hot_team->t.t_threads[0] = root_thread;
3964 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3966 root_thread->th.th_serial_team->t.t_serialized = 0;
3967 root->r.r_uber_thread = root_thread;
3970 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3971 TCW_4(__kmp_init_gtid, TRUE);
3974 __kmp_gtid_set_specific(gtid);
3977 __kmp_itt_thread_name(gtid);
3980#ifdef KMP_TDATA_GTID
3983 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3984 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3986 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3988 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3989 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3990 KMP_INIT_BARRIER_STATE));
3993 for (b = 0; b < bs_last_barrier; ++b) {
3994 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3996 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
4000 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
4001 KMP_INIT_BARRIER_STATE);
4003#if KMP_AFFINITY_SUPPORTED
4004 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
4005 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
4006 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
4007 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
4009 root_thread->th.th_def_allocator = __kmp_def_allocator;
4010 root_thread->th.th_prev_level = 0;
4011 root_thread->th.th_prev_num_threads = 1;
4013 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
4014 tmp->cg_root = root_thread;
4015 tmp->cg_thread_limit = __kmp_cg_max_nth;
4016 tmp->cg_nthreads = 1;
4017 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
4018 " cg_nthreads init to 1\n",
4021 root_thread->th.th_cg_roots = tmp;
4023 __kmp_root_counter++;
4026 if (ompt_enabled.enabled) {
4028 kmp_info_t *root_thread = ompt_get_thread();
4030 ompt_set_thread_state(root_thread, ompt_state_overhead);
4032 if (ompt_enabled.ompt_callback_thread_begin) {
4033 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4034 ompt_thread_initial, __ompt_get_thread_data_internal());
4036 ompt_data_t *task_data;
4037 ompt_data_t *parallel_data;
4038 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4040 if (ompt_enabled.ompt_callback_implicit_task) {
4041 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4042 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4045 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4049 if (ompd_state & OMPD_ENABLE_BP)
4050 ompd_bp_thread_begin();
4054 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4059static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4060 const int max_level) {
4062 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4063 if (!hot_teams || !hot_teams[level].hot_team) {
4066 KMP_DEBUG_ASSERT(level < max_level);
4067 kmp_team_t *team = hot_teams[level].hot_team;
4068 nth = hot_teams[level].hot_team_nth;
4070 if (level < max_level - 1) {
4071 for (i = 0; i < nth; ++i) {
4072 kmp_info_t *th = team->t.t_threads[i];
4073 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4074 if (i > 0 && th->th.th_hot_teams) {
4075 __kmp_free(th->th.th_hot_teams);
4076 th->th.th_hot_teams = NULL;
4080 __kmp_free_team(root, team, NULL);
4086static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4087 kmp_team_t *root_team = root->r.r_root_team;
4088 kmp_team_t *hot_team = root->r.r_hot_team;
4089 int n = hot_team->t.t_nproc;
4092 KMP_DEBUG_ASSERT(!root->r.r_active);
4094 root->r.r_root_team = NULL;
4095 root->r.r_hot_team = NULL;
4098 __kmp_free_team(root, root_team, NULL);
4099 if (__kmp_hot_teams_max_level >
4101 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4102 kmp_info_t *th = hot_team->t.t_threads[i];
4103 if (__kmp_hot_teams_max_level > 1) {
4104 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4106 if (th->th.th_hot_teams) {
4107 __kmp_free(th->th.th_hot_teams);
4108 th->th.th_hot_teams = NULL;
4112 __kmp_free_team(root, hot_team, NULL);
4117 if (__kmp_tasking_mode != tskm_immediate_exec) {
4118 __kmp_wait_to_unref_task_teams();
4124 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4126 (LPVOID) & (root->r.r_uber_thread->th),
4127 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4128 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4132 if (ompd_state & OMPD_ENABLE_BP)
4133 ompd_bp_thread_end();
4137 ompt_data_t *task_data;
4138 ompt_data_t *parallel_data;
4139 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4141 if (ompt_enabled.ompt_callback_implicit_task) {
4142 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4143 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4145 if (ompt_enabled.ompt_callback_thread_end) {
4146 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4147 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4153 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4154 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4156 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4157 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4160 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4161 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4162 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4163 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4164 root->r.r_uber_thread->th.th_cg_roots = NULL;
4166 __kmp_reap_thread(root->r.r_uber_thread, 1);
4170 root->r.r_uber_thread = NULL;
4172 root->r.r_begin = FALSE;
4177void __kmp_unregister_root_current_thread(
int gtid) {
4178 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4182 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4183 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4184 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4187 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4190 kmp_root_t *root = __kmp_root[gtid];
4192 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4193 KMP_ASSERT(KMP_UBER_GTID(gtid));
4194 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4195 KMP_ASSERT(root->r.r_active == FALSE);
4199 kmp_info_t *thread = __kmp_threads[gtid];
4200 kmp_team_t *team = thread->th.th_team;
4201 kmp_task_team_t *task_team = thread->th.th_task_team;
4204 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4205 task_team->tt.tt_hidden_helper_task_encountered)) {
4208 thread->th.ompt_thread_info.state = ompt_state_undefined;
4210 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4213 __kmp_reset_root(gtid, root);
4217 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4219 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4226static int __kmp_unregister_root_other_thread(
int gtid) {
4227 kmp_root_t *root = __kmp_root[gtid];
4230 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4231 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4232 KMP_ASSERT(KMP_UBER_GTID(gtid));
4233 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4234 KMP_ASSERT(root->r.r_active == FALSE);
4236 r = __kmp_reset_root(gtid, root);
4238 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4244void __kmp_task_info() {
4246 kmp_int32 gtid = __kmp_entry_gtid();
4247 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4248 kmp_info_t *this_thr = __kmp_threads[gtid];
4249 kmp_team_t *steam = this_thr->th.th_serial_team;
4250 kmp_team_t *team = this_thr->th.th_team;
4253 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4255 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4256 team->t.t_implicit_task_taskdata[tid].td_parent);
4263static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4264 int tid,
int gtid) {
4268 KMP_DEBUG_ASSERT(this_thr != NULL);
4269 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4270 KMP_DEBUG_ASSERT(team);
4271 KMP_DEBUG_ASSERT(team->t.t_threads);
4272 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4273 kmp_info_t *master = team->t.t_threads[0];
4274 KMP_DEBUG_ASSERT(master);
4275 KMP_DEBUG_ASSERT(master->th.th_root);
4279 TCW_SYNC_PTR(this_thr->th.th_team, team);
4281 this_thr->th.th_info.ds.ds_tid = tid;
4282 this_thr->th.th_set_nproc = 0;
4283 if (__kmp_tasking_mode != tskm_immediate_exec)
4286 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4288 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4289 this_thr->th.th_set_proc_bind = proc_bind_default;
4291#if KMP_AFFINITY_SUPPORTED
4292 this_thr->th.th_new_place = this_thr->th.th_current_place;
4294 this_thr->th.th_root = master->th.th_root;
4297 this_thr->th.th_team_nproc = team->t.t_nproc;
4298 this_thr->th.th_team_master = master;
4299 this_thr->th.th_team_serialized = team->t.t_serialized;
4301 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4303 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4304 tid, gtid, this_thr, this_thr->th.th_current_task));
4306 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4309 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4310 tid, gtid, this_thr, this_thr->th.th_current_task));
4315 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4317 this_thr->th.th_local.this_construct = 0;
4319 if (!this_thr->th.th_pri_common) {
4320 this_thr->th.th_pri_common =
4321 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4322 if (__kmp_storage_map) {
4323 __kmp_print_storage_map_gtid(
4324 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4325 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4327 this_thr->th.th_pri_head = NULL;
4330 if (this_thr != master &&
4331 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4333 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4334 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4337 int i = tmp->cg_nthreads--;
4338 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4339 " on node %p of thread %p to %d\n",
4340 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4345 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4347 this_thr->th.th_cg_roots->cg_nthreads++;
4348 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4349 " node %p of thread %p to %d\n",
4350 this_thr, this_thr->th.th_cg_roots,
4351 this_thr->th.th_cg_roots->cg_root,
4352 this_thr->th.th_cg_roots->cg_nthreads));
4353 this_thr->th.th_current_task->td_icvs.thread_limit =
4354 this_thr->th.th_cg_roots->cg_thread_limit;
4359 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4362 sizeof(dispatch_private_info_t) *
4363 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4364 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4365 team->t.t_max_nproc));
4366 KMP_ASSERT(dispatch);
4367 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4368 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4370 dispatch->th_disp_index = 0;
4371 dispatch->th_doacross_buf_idx = 0;
4372 if (!dispatch->th_disp_buffer) {
4373 dispatch->th_disp_buffer =
4374 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4376 if (__kmp_storage_map) {
4377 __kmp_print_storage_map_gtid(
4378 gtid, &dispatch->th_disp_buffer[0],
4379 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4381 : __kmp_dispatch_num_buffers],
4383 "th_%d.th_dispatch.th_disp_buffer "
4384 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4385 gtid, team->t.t_id, gtid);
4388 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4391 dispatch->th_dispatch_pr_current = 0;
4392 dispatch->th_dispatch_sh_current = 0;
4394 dispatch->th_deo_fcn = 0;
4395 dispatch->th_dxo_fcn = 0;
4398 this_thr->th.th_next_pool = NULL;
4400 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4401 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4411kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4413 kmp_team_t *serial_team;
4414 kmp_info_t *new_thr;
4417 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4418 KMP_DEBUG_ASSERT(root && team);
4424 if (__kmp_thread_pool && !KMP_HIDDEN_HELPER_TEAM(team)) {
4425 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4426 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4427 if (new_thr == __kmp_thread_pool_insert_pt) {
4428 __kmp_thread_pool_insert_pt = NULL;
4430 TCW_4(new_thr->th.th_in_pool, FALSE);
4431 __kmp_suspend_initialize_thread(new_thr);
4432 __kmp_lock_suspend_mx(new_thr);
4433 if (new_thr->th.th_active_in_pool == TRUE) {
4434 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4435 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4436 new_thr->th.th_active_in_pool = FALSE;
4438 __kmp_unlock_suspend_mx(new_thr);
4440 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4441 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4442 KMP_ASSERT(!new_thr->th.th_team);
4443 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4446 __kmp_initialize_info(new_thr, team, new_tid,
4447 new_thr->th.th_info.ds.ds_gtid);
4448 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4450 TCW_4(__kmp_nth, __kmp_nth + 1);
4452 new_thr->th.th_task_state = 0;
4454 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4456 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4460#ifdef KMP_ADJUST_BLOCKTIME
4463 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4464 if (__kmp_nth > __kmp_avail_proc) {
4465 __kmp_zero_bt = TRUE;
4474 kmp_balign_t *balign = new_thr->th.th_bar;
4475 for (b = 0; b < bs_last_barrier; ++b)
4476 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4479 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4480 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4487 KMP_ASSERT(KMP_HIDDEN_HELPER_TEAM(team) || __kmp_nth == __kmp_all_nth);
4488 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4493 if (!TCR_4(__kmp_init_monitor)) {
4494 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4495 if (!TCR_4(__kmp_init_monitor)) {
4496 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4497 TCW_4(__kmp_init_monitor, 1);
4498 __kmp_create_monitor(&__kmp_monitor);
4499 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4510 while (TCR_4(__kmp_init_monitor) < 2) {
4513 KF_TRACE(10, (
"after monitor thread has started\n"));
4516 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4523 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4525 : __kmp_hidden_helper_threads_num + 1;
4527 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4529 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4532 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4533 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4538 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4540 new_thr->th.th_nt_strict =
false;
4541 new_thr->th.th_nt_loc = NULL;
4542 new_thr->th.th_nt_sev = severity_fatal;
4543 new_thr->th.th_nt_msg = NULL;
4545 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4547#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4550 __itt_suppress_mark_range(
4551 __itt_suppress_range, __itt_suppress_threading_errors,
4552 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4553 __itt_suppress_mark_range(
4554 __itt_suppress_range, __itt_suppress_threading_errors,
4555 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4557 __itt_suppress_mark_range(
4558 __itt_suppress_range, __itt_suppress_threading_errors,
4559 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4561 __itt_suppress_mark_range(__itt_suppress_range,
4562 __itt_suppress_threading_errors,
4563 &new_thr->th.th_suspend_init_count,
4564 sizeof(new_thr->th.th_suspend_init_count));
4567 __itt_suppress_mark_range(__itt_suppress_range,
4568 __itt_suppress_threading_errors,
4569 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4570 sizeof(new_thr->th.th_bar[0].bb.b_go));
4571 __itt_suppress_mark_range(__itt_suppress_range,
4572 __itt_suppress_threading_errors,
4573 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4574 sizeof(new_thr->th.th_bar[1].bb.b_go));
4575 __itt_suppress_mark_range(__itt_suppress_range,
4576 __itt_suppress_threading_errors,
4577 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4578 sizeof(new_thr->th.th_bar[2].bb.b_go));
4580 if (__kmp_storage_map) {
4581 __kmp_print_thread_storage_map(new_thr, new_gtid);
4586 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4587 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4588 new_thr->th.th_serial_team = serial_team =
4589 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4593 proc_bind_default, &r_icvs, 0, NULL);
4595 KMP_ASSERT(serial_team);
4596 serial_team->t.t_serialized = 0;
4598 serial_team->t.t_threads[0] = new_thr;
4600 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4604 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4607 __kmp_initialize_fast_memory(new_thr);
4611 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4612 __kmp_initialize_bget(new_thr);
4615 __kmp_init_random(new_thr);
4619 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4620 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4623 kmp_balign_t *balign = new_thr->th.th_bar;
4624 for (b = 0; b < bs_last_barrier; ++b) {
4625 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4626 balign[b].bb.team = NULL;
4627 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4628 balign[b].bb.use_oncore_barrier = 0;
4631 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4632 new_thr->th.th_sleep_loc_type = flag_unset;
4634 new_thr->th.th_spin_here = FALSE;
4635 new_thr->th.th_next_waiting = 0;
4637 new_thr->th.th_blocking =
false;
4640#if KMP_AFFINITY_SUPPORTED
4641 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4642 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4643 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4644 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4646 new_thr->th.th_def_allocator = __kmp_def_allocator;
4647 new_thr->th.th_prev_level = 0;
4648 new_thr->th.th_prev_num_threads = 1;
4650 TCW_4(new_thr->th.th_in_pool, FALSE);
4651 new_thr->th.th_active_in_pool = FALSE;
4652 TCW_4(new_thr->th.th_active, TRUE);
4654 new_thr->th.th_set_nested_nth = NULL;
4655 new_thr->th.th_set_nested_nth_sz = 0;
4663 if (__kmp_adjust_gtid_mode) {
4664 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4665 if (TCR_4(__kmp_gtid_mode) != 2) {
4666 TCW_4(__kmp_gtid_mode, 2);
4669 if (TCR_4(__kmp_gtid_mode) != 1) {
4670 TCW_4(__kmp_gtid_mode, 1);
4675#ifdef KMP_ADJUST_BLOCKTIME
4678 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4679 if (__kmp_nth > __kmp_avail_proc) {
4680 __kmp_zero_bt = TRUE;
4685#if KMP_AFFINITY_SUPPORTED
4687 __kmp_affinity_set_init_mask(new_gtid, FALSE);
4692 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4693 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4695 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4697 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4708static void __kmp_reinitialize_team(kmp_team_t *team,
4709 kmp_internal_control_t *new_icvs,
4711 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4712 team->t.t_threads[0], team));
4713 KMP_DEBUG_ASSERT(team && new_icvs);
4714 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4715 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4717 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4719 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4720 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4722 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4723 team->t.t_threads[0], team));
4729static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4730 kmp_internal_control_t *new_icvs,
4732 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4735 KMP_DEBUG_ASSERT(team);
4736 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4737 KMP_DEBUG_ASSERT(team->t.t_threads);
4740 team->t.t_master_tid = 0;
4742 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4743 team->t.t_nproc = new_nproc;
4746 team->t.t_next_pool = NULL;
4750 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4751 team->t.t_invoke = NULL;
4754 team->t.t_sched.sched = new_icvs->sched.sched;
4756#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4757 team->t.t_fp_control_saved = FALSE;
4758 team->t.t_x87_fpu_control_word = 0;
4759 team->t.t_mxcsr = 0;
4762 team->t.t_construct = 0;
4764 team->t.t_ordered.dt.t_value = 0;
4765 team->t.t_master_active = FALSE;
4768 team->t.t_copypriv_data = NULL;
4771 team->t.t_copyin_counter = 0;
4774 team->t.t_control_stack_top = NULL;
4776 __kmp_reinitialize_team(team, new_icvs, loc);
4779 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4782#if KMP_AFFINITY_SUPPORTED
4783static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
4784 int first,
int last,
int newp) {
4785 th->th.th_first_place = first;
4786 th->th.th_last_place = last;
4787 th->th.th_new_place = newp;
4788 if (newp != th->th.th_current_place) {
4789 if (__kmp_display_affinity && team->t.t_display_affinity != 1)
4790 team->t.t_display_affinity = 1;
4792 th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4793 th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4801static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4803 if (KMP_HIDDEN_HELPER_TEAM(team))
4806 kmp_info_t *master_th = team->t.t_threads[0];
4807 KMP_DEBUG_ASSERT(master_th != NULL);
4808 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4809 int first_place = master_th->th.th_first_place;
4810 int last_place = master_th->th.th_last_place;
4811 int masters_place = master_th->th.th_current_place;
4812 int num_masks = __kmp_affinity.num_masks;
4813 team->t.t_first_place = first_place;
4814 team->t.t_last_place = last_place;
4816 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4817 "bound to place %d partition = [%d,%d]\n",
4818 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4819 team->t.t_id, masters_place, first_place, last_place));
4821 switch (proc_bind) {
4823 case proc_bind_default:
4826 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4829 case proc_bind_primary: {
4831 int n_th = team->t.t_nproc;
4832 for (f = 1; f < n_th; f++) {
4833 kmp_info_t *th = team->t.t_threads[f];
4834 KMP_DEBUG_ASSERT(th != NULL);
4835 __kmp_set_thread_place(team, th, first_place, last_place, masters_place);
4837 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4838 "partition = [%d,%d]\n",
4839 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4840 f, masters_place, first_place, last_place));
4844 case proc_bind_close: {
4846 int n_th = team->t.t_nproc;
4848 if (first_place <= last_place) {
4849 n_places = last_place - first_place + 1;
4851 n_places = num_masks - first_place + last_place + 1;
4853 if (n_th <= n_places) {
4854 int place = masters_place;
4855 for (f = 1; f < n_th; f++) {
4856 kmp_info_t *th = team->t.t_threads[f];
4857 KMP_DEBUG_ASSERT(th != NULL);
4859 if (place == last_place) {
4860 place = first_place;
4861 }
else if (place == (num_masks - 1)) {
4866 __kmp_set_thread_place(team, th, first_place, last_place, place);
4868 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4869 "partition = [%d,%d]\n",
4870 __kmp_gtid_from_thread(team->t.t_threads[f]),
4871 team->t.t_id, f, place, first_place, last_place));
4874 int S, rem, gap, s_count;
4875 S = n_th / n_places;
4877 rem = n_th - (S * n_places);
4878 gap = rem > 0 ? n_places / rem : n_places;
4879 int place = masters_place;
4881 for (f = 0; f < n_th; f++) {
4882 kmp_info_t *th = team->t.t_threads[f];
4883 KMP_DEBUG_ASSERT(th != NULL);
4885 __kmp_set_thread_place(team, th, first_place, last_place, place);
4888 if ((s_count == S) && rem && (gap_ct == gap)) {
4890 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4892 if (place == last_place) {
4893 place = first_place;
4894 }
else if (place == (num_masks - 1)) {
4902 }
else if (s_count == S) {
4903 if (place == last_place) {
4904 place = first_place;
4905 }
else if (place == (num_masks - 1)) {
4915 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4916 "partition = [%d,%d]\n",
4917 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4918 th->th.th_new_place, first_place, last_place));
4920 KMP_DEBUG_ASSERT(place == masters_place);
4924 case proc_bind_spread: {
4926 int n_th = team->t.t_nproc;
4929 if (first_place <= last_place) {
4930 n_places = last_place - first_place + 1;
4932 n_places = num_masks - first_place + last_place + 1;
4934 if (n_th <= n_places) {
4937 if (n_places != num_masks) {
4938 int S = n_places / n_th;
4939 int s_count, rem, gap, gap_ct;
4941 place = masters_place;
4942 rem = n_places - n_th * S;
4943 gap = rem ? n_th / rem : 1;
4946 if (update_master_only == 1)
4948 for (f = 0; f < thidx; f++) {
4949 kmp_info_t *th = team->t.t_threads[f];
4950 KMP_DEBUG_ASSERT(th != NULL);
4952 int fplace = place, nplace = place;
4954 while (s_count < S) {
4955 if (place == last_place) {
4956 place = first_place;
4957 }
else if (place == (num_masks - 1)) {
4964 if (rem && (gap_ct == gap)) {
4965 if (place == last_place) {
4966 place = first_place;
4967 }
else if (place == (num_masks - 1)) {
4975 __kmp_set_thread_place(team, th, fplace, place, nplace);
4978 if (place == last_place) {
4979 place = first_place;
4980 }
else if (place == (num_masks - 1)) {
4987 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4988 "partition = [%d,%d], num_masks: %u\n",
4989 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4990 f, th->th.th_new_place, th->th.th_first_place,
4991 th->th.th_last_place, num_masks));
4997 double current =
static_cast<double>(masters_place);
4999 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
5004 if (update_master_only == 1)
5006 for (f = 0; f < thidx; f++) {
5007 first =
static_cast<int>(current);
5008 last =
static_cast<int>(current + spacing) - 1;
5009 KMP_DEBUG_ASSERT(last >= first);
5010 if (first >= n_places) {
5011 if (masters_place) {
5014 if (first == (masters_place + 1)) {
5015 KMP_DEBUG_ASSERT(f == n_th);
5018 if (last == masters_place) {
5019 KMP_DEBUG_ASSERT(f == (n_th - 1));
5023 KMP_DEBUG_ASSERT(f == n_th);
5028 if (last >= n_places) {
5029 last = (n_places - 1);
5034 KMP_DEBUG_ASSERT(0 <= first);
5035 KMP_DEBUG_ASSERT(n_places > first);
5036 KMP_DEBUG_ASSERT(0 <= last);
5037 KMP_DEBUG_ASSERT(n_places > last);
5038 KMP_DEBUG_ASSERT(last_place >= first_place);
5039 th = team->t.t_threads[f];
5040 KMP_DEBUG_ASSERT(th);
5041 __kmp_set_thread_place(team, th, first, last, place);
5043 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5044 "partition = [%d,%d], spacing = %.4f\n",
5045 __kmp_gtid_from_thread(team->t.t_threads[f]),
5046 team->t.t_id, f, th->th.th_new_place,
5047 th->th.th_first_place, th->th.th_last_place, spacing));
5051 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5053 int S, rem, gap, s_count;
5054 S = n_th / n_places;
5056 rem = n_th - (S * n_places);
5057 gap = rem > 0 ? n_places / rem : n_places;
5058 int place = masters_place;
5061 if (update_master_only == 1)
5063 for (f = 0; f < thidx; f++) {
5064 kmp_info_t *th = team->t.t_threads[f];
5065 KMP_DEBUG_ASSERT(th != NULL);
5067 __kmp_set_thread_place(team, th, place, place, place);
5070 if ((s_count == S) && rem && (gap_ct == gap)) {
5072 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5074 if (place == last_place) {
5075 place = first_place;
5076 }
else if (place == (num_masks - 1)) {
5084 }
else if (s_count == S) {
5085 if (place == last_place) {
5086 place = first_place;
5087 }
else if (place == (num_masks - 1)) {
5096 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5097 "partition = [%d,%d]\n",
5098 __kmp_gtid_from_thread(team->t.t_threads[f]),
5099 team->t.t_id, f, th->th.th_new_place,
5100 th->th.th_first_place, th->th.th_last_place));
5102 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5110 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5117kmp_team_t *__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5119 ompt_data_t ompt_parallel_data,
5121 kmp_proc_bind_t new_proc_bind,
5122 kmp_internal_control_t *new_icvs,
int argc,
5123 kmp_info_t *master) {
5124 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5127 int use_hot_team = !root->r.r_active;
5129 int do_place_partition = 1;
5131 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5132 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5133 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5136 kmp_hot_team_ptr_t *hot_teams;
5138 team = master->th.th_team;
5139 level = team->t.t_active_level;
5140 if (master->th.th_teams_microtask) {
5141 if (master->th.th_teams_size.nteams > 1 &&
5144 (microtask_t)__kmp_teams_master ||
5145 master->th.th_teams_level <
5152 if ((master->th.th_teams_size.nteams == 1 &&
5153 master->th.th_teams_level >= team->t.t_level) ||
5154 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5155 do_place_partition = 0;
5157 hot_teams = master->th.th_hot_teams;
5158 if (level < __kmp_hot_teams_max_level && hot_teams &&
5159 hot_teams[level].hot_team) {
5167 KMP_DEBUG_ASSERT(new_nproc == 1);
5170 if (use_hot_team && new_nproc > 1) {
5171 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5172 team = hot_teams[level].hot_team;
5174 if (__kmp_tasking_mode != tskm_immediate_exec) {
5175 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5176 "task_team[1] = %p before reinit\n",
5177 team->t.t_task_team[0], team->t.t_task_team[1]));
5181 if (team->t.t_nproc != new_nproc &&
5182 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5184 int old_nthr = team->t.t_nproc;
5185 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5190 if (do_place_partition == 0)
5191 team->t.t_proc_bind = proc_bind_default;
5195 if (team->t.t_nproc == new_nproc) {
5196 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5199 if (team->t.t_size_changed == -1) {
5200 team->t.t_size_changed = 1;
5202 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5206 kmp_r_sched_t new_sched = new_icvs->sched;
5208 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5210 __kmp_reinitialize_team(team, new_icvs,
5211 root->r.r_uber_thread->th.th_ident);
5213 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5214 team->t.t_threads[0], team));
5215 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5217#if KMP_AFFINITY_SUPPORTED
5218 if ((team->t.t_size_changed == 0) &&
5219 (team->t.t_proc_bind == new_proc_bind)) {
5220 if (new_proc_bind == proc_bind_spread) {
5221 if (do_place_partition) {
5223 __kmp_partition_places(team, 1);
5226 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5227 "proc_bind = %d, partition = [%d,%d]\n",
5228 team->t.t_id, new_proc_bind, team->t.t_first_place,
5229 team->t.t_last_place));
5231 if (do_place_partition) {
5232 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5233 __kmp_partition_places(team);
5237 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5239 }
else if (team->t.t_nproc > new_nproc) {
5241 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5244 team->t.t_size_changed = 1;
5245 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5248 __kmp_add_threads_to_team(team, new_nproc);
5252 if (__kmp_tasking_mode != tskm_immediate_exec) {
5253 for (f = new_nproc; f < team->t.t_nproc; f++) {
5254 kmp_info_t *th = team->t.t_threads[f];
5255 KMP_DEBUG_ASSERT(th);
5256 th->th.th_task_team = NULL;
5259 if (__kmp_hot_teams_mode == 0) {
5262 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5263 hot_teams[level].hot_team_nth = new_nproc;
5265 for (f = new_nproc; f < team->t.t_nproc; f++) {
5266 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5267 __kmp_free_thread(team->t.t_threads[f]);
5268 team->t.t_threads[f] = NULL;
5274 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5275 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5276 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5277 for (
int b = 0; b < bs_last_barrier; ++b) {
5278 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5279 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5281 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5285 team->t.t_nproc = new_nproc;
5287 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5288 __kmp_reinitialize_team(team, new_icvs,
5289 root->r.r_uber_thread->th.th_ident);
5292 for (f = 0; f < new_nproc; ++f) {
5293 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5298 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5299 team->t.t_threads[0], team));
5301 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5304 for (f = 0; f < team->t.t_nproc; f++) {
5305 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5306 team->t.t_threads[f]->th.th_team_nproc ==
5311 if (do_place_partition) {
5312 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5313#if KMP_AFFINITY_SUPPORTED
5314 __kmp_partition_places(team);
5320 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5322 int old_nproc = team->t.t_nproc;
5323 team->t.t_size_changed = 1;
5325 int avail_threads = hot_teams[level].hot_team_nth;
5326 if (new_nproc < avail_threads)
5327 avail_threads = new_nproc;
5328 kmp_info_t **other_threads = team->t.t_threads;
5329 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5333 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5334 for (b = 0; b < bs_last_barrier; ++b) {
5335 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5336 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5338 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5342 if (hot_teams[level].hot_team_nth >= new_nproc) {
5345 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5346 team->t.t_nproc = new_nproc;
5350 team->t.t_nproc = hot_teams[level].hot_team_nth;
5351 hot_teams[level].hot_team_nth = new_nproc;
5352 if (team->t.t_max_nproc < new_nproc) {
5354 __kmp_reallocate_team_arrays(team, new_nproc);
5355 __kmp_reinitialize_team(team, new_icvs, NULL);
5358#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5359 KMP_AFFINITY_SUPPORTED
5365 kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5369 for (f = team->t.t_nproc; f < new_nproc; f++) {
5370 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5371 KMP_DEBUG_ASSERT(new_worker);
5372 team->t.t_threads[f] = new_worker;
5375 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5376 "join=%llu, plain=%llu\n",
5377 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5378 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5379 team->t.t_bar[bs_plain_barrier].b_arrived));
5383 kmp_balign_t *balign = new_worker->th.th_bar;
5384 for (b = 0; b < bs_last_barrier; ++b) {
5385 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5386 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5387 KMP_BARRIER_PARENT_FLAG);
5389 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5395#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5396 KMP_AFFINITY_SUPPORTED
5398 new_temp_affinity.restore();
5401 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5404 __kmp_add_threads_to_team(team, new_nproc);
5408 __kmp_initialize_team(team, new_nproc, new_icvs,
5409 root->r.r_uber_thread->th.th_ident);
5412 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5413 for (f = 0; f < team->t.t_nproc; ++f)
5414 __kmp_initialize_info(team->t.t_threads[f], team, f,
5415 __kmp_gtid_from_tid(f, team));
5418 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5419 for (f = old_nproc; f < team->t.t_nproc; ++f)
5420 team->t.t_threads[f]->th.th_task_state = old_state;
5423 for (f = 0; f < team->t.t_nproc; ++f) {
5424 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5425 team->t.t_threads[f]->th.th_team_nproc ==
5430 if (do_place_partition) {
5431 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5432#if KMP_AFFINITY_SUPPORTED
5433 __kmp_partition_places(team);
5438 if (master->th.th_teams_microtask) {
5439 for (f = 1; f < new_nproc; ++f) {
5441 kmp_info_t *thr = team->t.t_threads[f];
5442 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5443 thr->th.th_teams_level = master->th.th_teams_level;
5444 thr->th.th_teams_size = master->th.th_teams_size;
5450 for (f = 1; f < new_nproc; ++f) {
5451 kmp_info_t *thr = team->t.t_threads[f];
5453 kmp_balign_t *balign = thr->th.th_bar;
5454 for (b = 0; b < bs_last_barrier; ++b) {
5455 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5456 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5458 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5465 __kmp_alloc_argv_entries(argc, team, TRUE);
5466 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5470 KF_TRACE(10, (
" hot_team = %p\n", team));
5473 if (__kmp_tasking_mode != tskm_immediate_exec) {
5474 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5475 "task_team[1] = %p after reinit\n",
5476 team->t.t_task_team[0], team->t.t_task_team[1]));
5481 __ompt_team_assign_id(team, ompt_parallel_data);
5491 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5494 if (team->t.t_max_nproc >= max_nproc) {
5496 __kmp_team_pool = team->t.t_next_pool;
5498 if (max_nproc > 1 &&
5499 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5501 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5506 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5508 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5509 "task_team[1] %p to NULL\n",
5510 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5511 team->t.t_task_team[0] = NULL;
5512 team->t.t_task_team[1] = NULL;
5515 __kmp_alloc_argv_entries(argc, team, TRUE);
5516 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5519 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5520 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5523 for (b = 0; b < bs_last_barrier; ++b) {
5524 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5526 team->t.t_bar[b].b_master_arrived = 0;
5527 team->t.t_bar[b].b_team_arrived = 0;
5532 team->t.t_proc_bind = new_proc_bind;
5534 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5538 __ompt_team_assign_id(team, ompt_parallel_data);
5541 team->t.t_nested_nth = NULL;
5552 team = __kmp_reap_team(team);
5553 __kmp_team_pool = team;
5558 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5561 team->t.t_max_nproc = max_nproc;
5562 if (max_nproc > 1 &&
5563 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5565 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5570 __kmp_allocate_team_arrays(team, max_nproc);
5572 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5573 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5575 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5577 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5578 team->t.t_task_team[0] = NULL;
5580 team->t.t_task_team[1] = NULL;
5583 if (__kmp_storage_map) {
5584 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5588 __kmp_alloc_argv_entries(argc, team, FALSE);
5589 team->t.t_argc = argc;
5592 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5593 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5596 for (b = 0; b < bs_last_barrier; ++b) {
5597 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5599 team->t.t_bar[b].b_master_arrived = 0;
5600 team->t.t_bar[b].b_team_arrived = 0;
5605 team->t.t_proc_bind = new_proc_bind;
5608 __ompt_team_assign_id(team, ompt_parallel_data);
5609 team->t.ompt_serialized_team_info = NULL;
5614 team->t.t_nested_nth = NULL;
5616 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5627void __kmp_free_team(kmp_root_t *root, kmp_team_t *team, kmp_info_t *master) {
5629 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5633 KMP_DEBUG_ASSERT(root);
5634 KMP_DEBUG_ASSERT(team);
5635 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5636 KMP_DEBUG_ASSERT(team->t.t_threads);
5638 int use_hot_team = team == root->r.r_hot_team;
5641 level = team->t.t_active_level - 1;
5642 if (master->th.th_teams_microtask) {
5643 if (master->th.th_teams_size.nteams > 1) {
5647 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5648 master->th.th_teams_level == team->t.t_level) {
5654 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5656 if (level < __kmp_hot_teams_max_level) {
5657 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5663 TCW_SYNC_PTR(team->t.t_pkfn,
5666 team->t.t_copyin_counter = 0;
5671 if (!use_hot_team) {
5672 if (__kmp_tasking_mode != tskm_immediate_exec) {
5674 for (f = 1; f < team->t.t_nproc; ++f) {
5675 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5676 kmp_info_t *th = team->t.t_threads[f];
5677 volatile kmp_uint32 *state = &th->th.th_reap_state;
5678 while (*state != KMP_SAFE_TO_REAP) {
5682 if (!__kmp_is_thread_alive(th, &ecode)) {
5683 *state = KMP_SAFE_TO_REAP;
5688 if (th->th.th_sleep_loc)
5689 __kmp_null_resume_wrapper(th);
5696 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5697 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5698 if (task_team != NULL) {
5699 for (f = 0; f < team->t.t_nproc; ++f) {
5700 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5701 team->t.t_threads[f]->th.th_task_team = NULL;
5705 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5706 __kmp_get_gtid(), task_team, team->t.t_id));
5707 __kmp_free_task_team(master, task_team);
5708 team->t.t_task_team[tt_idx] = NULL;
5714 if (team->t.t_nested_nth && team->t.t_nested_nth != &__kmp_nested_nth &&
5715 team->t.t_nested_nth != team->t.t_parent->t.t_nested_nth) {
5716 KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
5717 KMP_INTERNAL_FREE(team->t.t_nested_nth);
5719 team->t.t_nested_nth = NULL;
5722 team->t.t_parent = NULL;
5723 team->t.t_level = 0;
5724 team->t.t_active_level = 0;
5727 for (f = 1; f < team->t.t_nproc; ++f) {
5728 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5729 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5730 (void)KMP_COMPARE_AND_STORE_ACQ32(
5731 &(team->t.t_threads[f]->th.th_used_in_team), 1, 2);
5733 __kmp_free_thread(team->t.t_threads[f]);
5736 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5739 team->t.b->go_release();
5740 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5741 for (f = 1; f < team->t.t_nproc; ++f) {
5742 if (team->t.b->sleep[f].sleep) {
5743 __kmp_atomic_resume_64(
5744 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5745 (kmp_atomic_flag_64<> *)NULL);
5750 for (
int f = 1; f < team->t.t_nproc; ++f) {
5751 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5757 for (f = 1; f < team->t.t_nproc; ++f) {
5758 team->t.t_threads[f] = NULL;
5761 if (team->t.t_max_nproc > 1 &&
5762 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5763 distributedBarrier::deallocate(team->t.b);
5768 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5769 __kmp_team_pool = (
volatile kmp_team_t *)team;
5772 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5773 team->t.t_threads[1]->th.th_cg_roots);
5774 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5776 for (f = 1; f < team->t.t_nproc; ++f) {
5777 kmp_info_t *thr = team->t.t_threads[f];
5778 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5779 thr->th.th_cg_roots->cg_root == thr);
5781 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5782 thr->th.th_cg_roots = tmp->up;
5783 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5784 " up to node %p. cg_nthreads was %d\n",
5785 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5786 int i = tmp->cg_nthreads--;
5791 if (thr->th.th_cg_roots)
5792 thr->th.th_current_task->td_icvs.thread_limit =
5793 thr->th.th_cg_roots->cg_thread_limit;
5802kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5803 kmp_team_t *next_pool = team->t.t_next_pool;
5805 KMP_DEBUG_ASSERT(team);
5806 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5807 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5808 KMP_DEBUG_ASSERT(team->t.t_threads);
5809 KMP_DEBUG_ASSERT(team->t.t_argv);
5814 __kmp_free_team_arrays(team);
5815 if (team->t.t_argv != &team->t.t_inline_argv[0])
5816 __kmp_free((
void *)team->t.t_argv);
5848void __kmp_free_thread(kmp_info_t *this_th) {
5852 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5853 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5855 KMP_DEBUG_ASSERT(this_th);
5860 kmp_balign_t *balign = this_th->th.th_bar;
5861 for (b = 0; b < bs_last_barrier; ++b) {
5862 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5863 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5864 balign[b].bb.team = NULL;
5865 balign[b].bb.leaf_kids = 0;
5867 this_th->th.th_task_state = 0;
5868 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5871 TCW_PTR(this_th->th.th_team, NULL);
5872 TCW_PTR(this_th->th.th_root, NULL);
5873 TCW_PTR(this_th->th.th_dispatch, NULL);
5875 while (this_th->th.th_cg_roots) {
5876 this_th->th.th_cg_roots->cg_nthreads--;
5877 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5878 " %p of thread %p to %d\n",
5879 this_th, this_th->th.th_cg_roots,
5880 this_th->th.th_cg_roots->cg_root,
5881 this_th->th.th_cg_roots->cg_nthreads));
5882 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5883 if (tmp->cg_root == this_th) {
5884 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5886 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5887 this_th->th.th_cg_roots = tmp->up;
5890 if (tmp->cg_nthreads == 0) {
5893 this_th->th.th_cg_roots = NULL;
5903 __kmp_free_implicit_task(this_th);
5904 this_th->th.th_current_task = NULL;
5908 gtid = this_th->th.th_info.ds.ds_gtid;
5909 if (__kmp_thread_pool_insert_pt != NULL) {
5910 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5911 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5912 __kmp_thread_pool_insert_pt = NULL;
5921 if (__kmp_thread_pool_insert_pt != NULL) {
5922 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5924 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5926 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5927 scan = &((*scan)->th.th_next_pool))
5932 TCW_PTR(this_th->th.th_next_pool, *scan);
5933 __kmp_thread_pool_insert_pt = *scan = this_th;
5934 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5935 (this_th->th.th_info.ds.ds_gtid <
5936 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5937 TCW_4(this_th->th.th_in_pool, TRUE);
5938 __kmp_suspend_initialize_thread(this_th);
5939 __kmp_lock_suspend_mx(this_th);
5940 if (this_th->th.th_active == TRUE) {
5941 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5942 this_th->th.th_active_in_pool = TRUE;
5946 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5949 __kmp_unlock_suspend_mx(this_th);
5951 TCW_4(__kmp_nth, __kmp_nth - 1);
5953#ifdef KMP_ADJUST_BLOCKTIME
5956 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5957 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5958 if (__kmp_nth <= __kmp_avail_proc) {
5959 __kmp_zero_bt = FALSE;
5969void *__kmp_launch_thread(kmp_info_t *this_thr) {
5970#if OMP_PROFILING_SUPPORT
5971 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5973 if (ProfileTraceFile)
5974 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5977 int gtid = this_thr->th.th_info.ds.ds_gtid;
5979 kmp_team_t **
volatile pteam;
5982 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5984 if (__kmp_env_consistency_check) {
5985 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5989 if (ompd_state & OMPD_ENABLE_BP)
5990 ompd_bp_thread_begin();
5994 ompt_data_t *thread_data =
nullptr;
5995 if (ompt_enabled.enabled) {
5996 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5997 *thread_data = ompt_data_none;
5999 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6000 this_thr->th.ompt_thread_info.wait_id = 0;
6001 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6002 this_thr->th.ompt_thread_info.parallel_flags = 0;
6003 if (ompt_enabled.ompt_callback_thread_begin) {
6004 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6005 ompt_thread_worker, thread_data);
6007 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6012 while (!TCR_4(__kmp_global.g.g_done)) {
6013 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6017 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6020 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6023 if (ompt_enabled.enabled) {
6024 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6028 pteam = &this_thr->th.th_team;
6031 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6033 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6036 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6037 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6038 (*pteam)->t.t_pkfn));
6040 updateHWFPControl(*pteam);
6043 if (ompt_enabled.enabled) {
6044 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6048 rc = (*pteam)->t.t_invoke(gtid);
6052 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6053 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6054 (*pteam)->t.t_pkfn));
6057 if (ompt_enabled.enabled) {
6059 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6061 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6065 __kmp_join_barrier(gtid);
6070 if (ompd_state & OMPD_ENABLE_BP)
6071 ompd_bp_thread_end();
6075 if (ompt_enabled.ompt_callback_thread_end) {
6076 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6080 this_thr->th.th_task_team = NULL;
6082 __kmp_common_destroy_gtid(gtid);
6084 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6087#if OMP_PROFILING_SUPPORT
6088 llvm::timeTraceProfilerFinishThread();
6095void __kmp_internal_end_dest(
void *specific_gtid) {
6098 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6100 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6104 __kmp_internal_end_thread(gtid);
6107#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6109__attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6110 __kmp_internal_end_atexit();
6117void __kmp_internal_end_atexit(
void) {
6118 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6142 __kmp_internal_end_library(-1);
6144 __kmp_close_console();
6148static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6153 KMP_DEBUG_ASSERT(thread != NULL);
6155 gtid = thread->th.th_info.ds.ds_gtid;
6158 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6161 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6163 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6165 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6167 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6171 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6173 __kmp_release_64(&flag);
6178 __kmp_reap_worker(thread);
6190 if (thread->th.th_active_in_pool) {
6191 thread->th.th_active_in_pool = FALSE;
6192 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6193 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6197 __kmp_free_implicit_task(thread);
6201 __kmp_free_fast_memory(thread);
6204 __kmp_suspend_uninitialize_thread(thread);
6206 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6207 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6212#ifdef KMP_ADJUST_BLOCKTIME
6215 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6216 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6217 if (__kmp_nth <= __kmp_avail_proc) {
6218 __kmp_zero_bt = FALSE;
6224 if (__kmp_env_consistency_check) {
6225 if (thread->th.th_cons) {
6226 __kmp_free_cons_stack(thread->th.th_cons);
6227 thread->th.th_cons = NULL;
6231 if (thread->th.th_pri_common != NULL) {
6232 __kmp_free(thread->th.th_pri_common);
6233 thread->th.th_pri_common = NULL;
6237 if (thread->th.th_local.bget_data != NULL) {
6238 __kmp_finalize_bget(thread);
6242#if KMP_AFFINITY_SUPPORTED
6243 if (thread->th.th_affin_mask != NULL) {
6244 KMP_CPU_FREE(thread->th.th_affin_mask);
6245 thread->th.th_affin_mask = NULL;
6249#if KMP_USE_HIER_SCHED
6250 if (thread->th.th_hier_bar_data != NULL) {
6251 __kmp_free(thread->th.th_hier_bar_data);
6252 thread->th.th_hier_bar_data = NULL;
6256 __kmp_reap_team(thread->th.th_serial_team);
6257 thread->th.th_serial_team = NULL;
6264static void __kmp_itthash_clean(kmp_info_t *th) {
6266 if (__kmp_itt_region_domains.count > 0) {
6267 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6268 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6270 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6271 __kmp_thread_free(th, bucket);
6276 if (__kmp_itt_barrier_domains.count > 0) {
6277 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6278 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6280 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6281 __kmp_thread_free(th, bucket);
6289static void __kmp_internal_end(
void) {
6293 __kmp_unregister_library();
6300 __kmp_reclaim_dead_roots();
6304 for (i = 0; i < __kmp_threads_capacity; i++)
6306 if (__kmp_root[i]->r.r_active)
6309 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6311 if (i < __kmp_threads_capacity) {
6323 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6324 if (TCR_4(__kmp_init_monitor)) {
6325 __kmp_reap_monitor(&__kmp_monitor);
6326 TCW_4(__kmp_init_monitor, 0);
6328 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6329 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6335 for (i = 0; i < __kmp_threads_capacity; i++) {
6336 if (__kmp_root[i]) {
6339 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6348 while (__kmp_thread_pool != NULL) {
6350 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6351 __kmp_thread_pool = thread->th.th_next_pool;
6353 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6354 thread->th.th_next_pool = NULL;
6355 thread->th.th_in_pool = FALSE;
6356 __kmp_reap_thread(thread, 0);
6358 __kmp_thread_pool_insert_pt = NULL;
6361 while (__kmp_team_pool != NULL) {
6363 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6364 __kmp_team_pool = team->t.t_next_pool;
6366 team->t.t_next_pool = NULL;
6367 __kmp_reap_team(team);
6370 __kmp_reap_task_teams();
6377 for (i = 0; i < __kmp_threads_capacity; i++) {
6378 kmp_info_t *thr = __kmp_threads[i];
6379 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6384 for (i = 0; i < __kmp_threads_capacity; ++i) {
6391 TCW_SYNC_4(__kmp_init_common, FALSE);
6393 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6401 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6402 if (TCR_4(__kmp_init_monitor)) {
6403 __kmp_reap_monitor(&__kmp_monitor);
6404 TCW_4(__kmp_init_monitor, 0);
6406 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6407 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6410 TCW_4(__kmp_init_gtid, FALSE);
6419void __kmp_internal_end_library(
int gtid_req) {
6426 if (__kmp_global.g.g_abort) {
6427 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6431 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6432 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6437 if (TCR_4(__kmp_init_hidden_helper) &&
6438 !TCR_4(__kmp_hidden_helper_team_done)) {
6439 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6441 __kmp_hidden_helper_main_thread_release();
6443 __kmp_hidden_helper_threads_deinitz_wait();
6449 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6451 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6452 if (gtid == KMP_GTID_SHUTDOWN) {
6453 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6454 "already shutdown\n"));
6456 }
else if (gtid == KMP_GTID_MONITOR) {
6457 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6458 "registered, or system shutdown\n"));
6460 }
else if (gtid == KMP_GTID_DNE) {
6461 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6464 }
else if (KMP_UBER_GTID(gtid)) {
6466 if (__kmp_root[gtid]->r.r_active) {
6467 __kmp_global.g.g_abort = -1;
6468 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6469 __kmp_unregister_library();
6471 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6475 __kmp_itthash_clean(__kmp_threads[gtid]);
6478 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6479 __kmp_unregister_root_current_thread(gtid);
6486#ifdef DUMP_DEBUG_ON_EXIT
6487 if (__kmp_debug_buf)
6488 __kmp_dump_debug_buffer();
6493 __kmp_unregister_library();
6498 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6501 if (__kmp_global.g.g_abort) {
6502 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6504 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6507 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6508 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6517 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6520 __kmp_internal_end();
6522 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6523 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6525 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6527#ifdef DUMP_DEBUG_ON_EXIT
6528 if (__kmp_debug_buf)
6529 __kmp_dump_debug_buffer();
6533 __kmp_close_console();
6536 __kmp_fini_allocator();
6540void __kmp_internal_end_thread(
int gtid_req) {
6549 if (__kmp_global.g.g_abort) {
6550 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6554 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6555 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6560 if (TCR_4(__kmp_init_hidden_helper) &&
6561 !TCR_4(__kmp_hidden_helper_team_done)) {
6562 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6564 __kmp_hidden_helper_main_thread_release();
6566 __kmp_hidden_helper_threads_deinitz_wait();
6573 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6575 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6576 if (gtid == KMP_GTID_SHUTDOWN) {
6577 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6578 "already shutdown\n"));
6580 }
else if (gtid == KMP_GTID_MONITOR) {
6581 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6582 "registered, or system shutdown\n"));
6584 }
else if (gtid == KMP_GTID_DNE) {
6585 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6589 }
else if (KMP_UBER_GTID(gtid)) {
6591 if (__kmp_root[gtid]->r.r_active) {
6592 __kmp_global.g.g_abort = -1;
6593 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6595 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6599 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6601 __kmp_unregister_root_current_thread(gtid);
6605 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6608 __kmp_threads[gtid]->th.th_task_team = NULL;
6612 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6618 if (__kmp_pause_status != kmp_hard_paused)
6622 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6627 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6630 if (__kmp_global.g.g_abort) {
6631 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6633 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6636 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6637 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6648 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6650 for (i = 0; i < __kmp_threads_capacity; ++i) {
6651 if (KMP_UBER_GTID(i)) {
6654 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6655 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6656 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6663 __kmp_internal_end();
6665 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6666 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6668 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6670#ifdef DUMP_DEBUG_ON_EXIT
6671 if (__kmp_debug_buf)
6672 __kmp_dump_debug_buffer();
6679static long __kmp_registration_flag = 0;
6681static char *__kmp_registration_str = NULL;
6684static inline char *__kmp_reg_status_name() {
6690#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6691 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6694 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6698#if defined(KMP_USE_SHM)
6699bool __kmp_shm_available =
false;
6700bool __kmp_tmp_available =
false;
6702char *temp_reg_status_file_name =
nullptr;
6705void __kmp_register_library_startup(
void) {
6707 char *name = __kmp_reg_status_name();
6713#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6714 __kmp_initialize_system_tick();
6716 __kmp_read_system_time(&time.dtime);
6717 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6718 __kmp_registration_str =
6719 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6720 __kmp_registration_flag, KMP_LIBRARY_FILE);
6722 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6723 __kmp_registration_str));
6729#if defined(KMP_USE_SHM)
6730 char *shm_name =
nullptr;
6731 char *data1 =
nullptr;
6732 __kmp_shm_available = __kmp_detect_shm();
6733 if (__kmp_shm_available) {
6735 shm_name = __kmp_str_format(
"/%s", name);
6736 int shm_preexist = 0;
6737 fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6738 if ((fd1 == -1) && (errno == EEXIST)) {
6741 fd1 = shm_open(shm_name, O_RDWR, 0600);
6743 KMP_WARNING(FunctionError,
"Can't open SHM");
6744 __kmp_shm_available =
false;
6749 if (__kmp_shm_available && shm_preexist == 0) {
6750 if (ftruncate(fd1, SHM_SIZE) == -1) {
6751 KMP_WARNING(FunctionError,
"Can't set size of SHM");
6752 __kmp_shm_available =
false;
6755 if (__kmp_shm_available) {
6756 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6758 if (data1 == MAP_FAILED) {
6759 KMP_WARNING(FunctionError,
"Can't map SHM");
6760 __kmp_shm_available =
false;
6763 if (__kmp_shm_available) {
6764 if (shm_preexist == 0) {
6765 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6768 value = __kmp_str_format(
"%s", data1);
6769 munmap(data1, SHM_SIZE);
6774 if (!__kmp_shm_available)
6775 __kmp_tmp_available = __kmp_detect_tmp();
6776 if (!__kmp_shm_available && __kmp_tmp_available) {
6783 temp_reg_status_file_name = __kmp_str_format(
"/tmp/%s", name);
6784 int tmp_preexist = 0;
6785 fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6786 if ((fd1 == -1) && (errno == EEXIST)) {
6789 fd1 = open(temp_reg_status_file_name, O_RDWR, 0600);
6791 KMP_WARNING(FunctionError,
"Can't open TEMP");
6792 __kmp_tmp_available =
false;
6797 if (__kmp_tmp_available && tmp_preexist == 0) {
6799 if (ftruncate(fd1, SHM_SIZE) == -1) {
6800 KMP_WARNING(FunctionError,
"Can't set size of /tmp file");
6801 __kmp_tmp_available =
false;
6804 if (__kmp_tmp_available) {
6805 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6807 if (data1 == MAP_FAILED) {
6808 KMP_WARNING(FunctionError,
"Can't map /tmp");
6809 __kmp_tmp_available =
false;
6812 if (__kmp_tmp_available) {
6813 if (tmp_preexist == 0) {
6814 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6817 value = __kmp_str_format(
"%s", data1);
6818 munmap(data1, SHM_SIZE);
6823 if (!__kmp_shm_available && !__kmp_tmp_available) {
6826 __kmp_env_set(name, __kmp_registration_str, 0);
6828 value = __kmp_env_get(name);
6832 __kmp_env_set(name, __kmp_registration_str, 0);
6834 value = __kmp_env_get(name);
6837 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6844 char *flag_addr_str = NULL;
6845 char *flag_val_str = NULL;
6846 char const *file_name = NULL;
6847 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6848 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6851 unsigned long *flag_addr = 0;
6852 unsigned long flag_val = 0;
6853 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6854 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6855 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6859 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6873 file_name =
"unknown library";
6878 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6879 if (!__kmp_str_match_true(duplicate_ok)) {
6881 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6882 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6884 KMP_INTERNAL_FREE(duplicate_ok);
6885 __kmp_duplicate_library_ok = 1;
6890#if defined(KMP_USE_SHM)
6891 if (__kmp_shm_available) {
6892 shm_unlink(shm_name);
6893 }
else if (__kmp_tmp_available) {
6894 unlink(temp_reg_status_file_name);
6897 __kmp_env_unset(name);
6901 __kmp_env_unset(name);
6905 KMP_DEBUG_ASSERT(0);
6909 KMP_INTERNAL_FREE((
void *)value);
6910#if defined(KMP_USE_SHM)
6912 KMP_INTERNAL_FREE((
void *)shm_name);
6915 KMP_INTERNAL_FREE((
void *)name);
6919void __kmp_unregister_library(
void) {
6921 char *name = __kmp_reg_status_name();
6924#if defined(KMP_USE_SHM)
6925 char *shm_name =
nullptr;
6927 if (__kmp_shm_available) {
6928 shm_name = __kmp_str_format(
"/%s", name);
6929 fd1 = shm_open(shm_name, O_RDONLY, 0600);
6931 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6932 if (data1 != MAP_FAILED) {
6933 value = __kmp_str_format(
"%s", data1);
6934 munmap(data1, SHM_SIZE);
6938 }
else if (__kmp_tmp_available) {
6939 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6941 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6942 if (data1 != MAP_FAILED) {
6943 value = __kmp_str_format(
"%s", data1);
6944 munmap(data1, SHM_SIZE);
6949 value = __kmp_env_get(name);
6952 value = __kmp_env_get(name);
6955 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6956 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6957 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6959#if defined(KMP_USE_SHM)
6960 if (__kmp_shm_available) {
6961 shm_unlink(shm_name);
6962 }
else if (__kmp_tmp_available) {
6963 unlink(temp_reg_status_file_name);
6965 __kmp_env_unset(name);
6968 __kmp_env_unset(name);
6972#if defined(KMP_USE_SHM)
6974 KMP_INTERNAL_FREE(shm_name);
6975 if (temp_reg_status_file_name)
6976 KMP_INTERNAL_FREE(temp_reg_status_file_name);
6979 KMP_INTERNAL_FREE(__kmp_registration_str);
6980 KMP_INTERNAL_FREE(value);
6981 KMP_INTERNAL_FREE(name);
6983 __kmp_registration_flag = 0;
6984 __kmp_registration_str = NULL;
6991#if KMP_MIC_SUPPORTED
6993static void __kmp_check_mic_type() {
6994 kmp_cpuid_t cpuid_state = {0};
6995 kmp_cpuid_t *cs_p = &cpuid_state;
6996 __kmp_x86_cpuid(1, 0, cs_p);
6998 if ((cs_p->eax & 0xff0) == 0xB10) {
6999 __kmp_mic_type = mic2;
7000 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
7001 __kmp_mic_type = mic3;
7003 __kmp_mic_type = non_mic;
7010static void __kmp_user_level_mwait_init() {
7011 struct kmp_cpuid buf;
7012 __kmp_x86_cpuid(7, 0, &buf);
7013 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
7014 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
7015 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
7016 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
7017 __kmp_umwait_enabled));
7020#ifndef AT_INTELPHIUSERMWAIT
7023#define AT_INTELPHIUSERMWAIT 10000
7028unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7029unsigned long getauxval(
unsigned long) {
return 0; }
7031static void __kmp_user_level_mwait_init() {
7036 if (__kmp_mic_type == mic3) {
7037 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7038 if ((res & 0x1) || __kmp_user_level_mwait) {
7039 __kmp_mwait_enabled = TRUE;
7040 if (__kmp_user_level_mwait) {
7041 KMP_INFORM(EnvMwaitWarn);
7044 __kmp_mwait_enabled = FALSE;
7047 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7048 "__kmp_mwait_enabled = %d\n",
7049 __kmp_mic_type, __kmp_mwait_enabled));
7053static void __kmp_do_serial_initialize(
void) {
7057 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7059 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7060 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7061 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7062 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7063 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7073 __kmp_validate_locks();
7075#if ENABLE_LIBOMPTARGET
7077 __kmp_init_omptarget();
7081 __kmp_init_allocator();
7087 if (__kmp_need_register_serial)
7088 __kmp_register_library_startup();
7091 if (TCR_4(__kmp_global.g.g_done)) {
7092 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7095 __kmp_global.g.g_abort = 0;
7096 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7099#if KMP_USE_ADAPTIVE_LOCKS
7100#if KMP_DEBUG_ADAPTIVE_LOCKS
7101 __kmp_init_speculative_stats();
7104#if KMP_STATS_ENABLED
7107 __kmp_init_lock(&__kmp_global_lock);
7108 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7109 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7110 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7111 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7112 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7113 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7114 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7115 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7116 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7117 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7118 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7119 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7120 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7121 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7122 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7124 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7126 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7130 __kmp_runtime_initialize();
7132#if KMP_MIC_SUPPORTED
7133 __kmp_check_mic_type();
7135#if ENABLE_LIBOMPTARGET
7136 __kmp_target_init();
7143 __kmp_abort_delay = 0;
7147 __kmp_dflt_team_nth_ub = __kmp_xproc;
7148 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7149 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7151 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7152 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7154 __kmp_max_nth = __kmp_sys_max_nth;
7155 __kmp_cg_max_nth = __kmp_sys_max_nth;
7156 __kmp_teams_max_nth = __kmp_xproc;
7157 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7158 __kmp_teams_max_nth = __kmp_sys_max_nth;
7163 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7165 __kmp_monitor_wakeups =
7166 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7167 __kmp_bt_intervals =
7168 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7171 __kmp_library = library_throughput;
7173 __kmp_static = kmp_sch_static_balanced;
7180#if KMP_FAST_REDUCTION_BARRIER
7181#define kmp_reduction_barrier_gather_bb ((int)1)
7182#define kmp_reduction_barrier_release_bb ((int)1)
7183#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7184#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7186 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7187 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7188 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7189 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7190 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7191#if KMP_FAST_REDUCTION_BARRIER
7192 if (i == bs_reduction_barrier) {
7194 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7195 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7196 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7197 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7201#if KMP_FAST_REDUCTION_BARRIER
7202#undef kmp_reduction_barrier_release_pat
7203#undef kmp_reduction_barrier_gather_pat
7204#undef kmp_reduction_barrier_release_bb
7205#undef kmp_reduction_barrier_gather_bb
7207#if KMP_MIC_SUPPORTED
7208 if (__kmp_mic_type == mic2) {
7210 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7211 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7213 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7214 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7216#if KMP_FAST_REDUCTION_BARRIER
7217 if (__kmp_mic_type == mic2) {
7218 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7219 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7226 __kmp_env_checks = TRUE;
7228 __kmp_env_checks = FALSE;
7232 __kmp_foreign_tp = TRUE;
7234 __kmp_global.g.g_dynamic = FALSE;
7235 __kmp_global.g.g_dynamic_mode = dynamic_default;
7237 __kmp_init_nesting_mode();
7239 __kmp_env_initialize(NULL);
7241#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7242 __kmp_user_level_mwait_init();
7246 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7247 if (__kmp_str_match_true(val)) {
7248 kmp_str_buf_t buffer;
7249 __kmp_str_buf_init(&buffer);
7250 __kmp_i18n_dump_catalog(&buffer);
7251 __kmp_printf(
"%s", buffer.str);
7252 __kmp_str_buf_free(&buffer);
7254 __kmp_env_free(&val);
7257 __kmp_threads_capacity =
7258 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7260 __kmp_tp_capacity = __kmp_default_tp_capacity(
7261 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7266 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7267 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7268 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7269 __kmp_thread_pool = NULL;
7270 __kmp_thread_pool_insert_pt = NULL;
7271 __kmp_team_pool = NULL;
7278 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7280 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7281 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7282 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7285 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7287 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7292 gtid = __kmp_register_root(TRUE);
7293 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7294 KMP_ASSERT(KMP_UBER_GTID(gtid));
7295 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7299 __kmp_common_initialize();
7303 __kmp_register_atfork();
7306#if !KMP_DYNAMIC_LIB || \
7307 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7312 int rc = atexit(__kmp_internal_end_atexit);
7314 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7320#if KMP_HANDLE_SIGNALS
7326 __kmp_install_signals(FALSE);
7329 __kmp_install_signals(TRUE);
7334 __kmp_init_counter++;
7336 __kmp_init_serial = TRUE;
7338 if (__kmp_version) {
7339 __kmp_print_version_1();
7342 if (__kmp_settings) {
7346 if (__kmp_display_env || __kmp_display_env_verbose) {
7347 __kmp_env_print_2();
7356 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7359void __kmp_serial_initialize(
void) {
7360 if (__kmp_init_serial) {
7363 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7364 if (__kmp_init_serial) {
7365 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7368 __kmp_do_serial_initialize();
7369 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7372static void __kmp_do_middle_initialize(
void) {
7374 int prev_dflt_team_nth;
7376 if (!__kmp_init_serial) {
7377 __kmp_do_serial_initialize();
7380 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7382 if (UNLIKELY(!__kmp_need_register_serial)) {
7385 __kmp_register_library_startup();
7390 prev_dflt_team_nth = __kmp_dflt_team_nth;
7392#if KMP_AFFINITY_SUPPORTED
7395 __kmp_affinity_initialize(__kmp_affinity);
7399 KMP_ASSERT(__kmp_xproc > 0);
7400 if (__kmp_avail_proc == 0) {
7401 __kmp_avail_proc = __kmp_xproc;
7407 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7408 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7413 if (__kmp_dflt_team_nth == 0) {
7414#ifdef KMP_DFLT_NTH_CORES
7416 __kmp_dflt_team_nth = __kmp_ncores;
7417 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7418 "__kmp_ncores (%d)\n",
7419 __kmp_dflt_team_nth));
7422 __kmp_dflt_team_nth = __kmp_avail_proc;
7423 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7424 "__kmp_avail_proc(%d)\n",
7425 __kmp_dflt_team_nth));
7429 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7430 __kmp_dflt_team_nth = KMP_MIN_NTH;
7432 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7433 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7436 if (__kmp_nesting_mode > 0)
7437 __kmp_set_nesting_mode_threads();
7441 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7443 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7448 for (i = 0; i < __kmp_threads_capacity; i++) {
7449 kmp_info_t *thread = __kmp_threads[i];
7452 if (thread->th.th_current_task->td_icvs.nproc != 0)
7455 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7460 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7461 __kmp_dflt_team_nth));
7463#ifdef KMP_ADJUST_BLOCKTIME
7465 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7466 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7467 if (__kmp_nth > __kmp_avail_proc) {
7468 __kmp_zero_bt = TRUE;
7474 TCW_SYNC_4(__kmp_init_middle, TRUE);
7476 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7479void __kmp_middle_initialize(
void) {
7480 if (__kmp_init_middle) {
7483 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7484 if (__kmp_init_middle) {
7485 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7488 __kmp_do_middle_initialize();
7489 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7492void __kmp_parallel_initialize(
void) {
7493 int gtid = __kmp_entry_gtid();
7496 if (TCR_4(__kmp_init_parallel))
7498 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7499 if (TCR_4(__kmp_init_parallel)) {
7500 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7505 if (TCR_4(__kmp_global.g.g_done)) {
7508 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7509 __kmp_infinite_loop();
7515 if (!__kmp_init_middle) {
7516 __kmp_do_middle_initialize();
7518 __kmp_assign_root_init_mask();
7519 __kmp_resume_if_hard_paused();
7522 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7523 KMP_ASSERT(KMP_UBER_GTID(gtid));
7525#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7528 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7529 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7530 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7534#if KMP_HANDLE_SIGNALS
7536 __kmp_install_signals(TRUE);
7540 __kmp_suspend_initialize();
7542#if defined(USE_LOAD_BALANCE)
7543 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7544 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7547 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7548 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7552 if (__kmp_version) {
7553 __kmp_print_version_2();
7557 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7560 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7562 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7565void __kmp_hidden_helper_initialize() {
7566 if (TCR_4(__kmp_init_hidden_helper))
7570 if (!TCR_4(__kmp_init_parallel))
7571 __kmp_parallel_initialize();
7575 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7576 if (TCR_4(__kmp_init_hidden_helper)) {
7577 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7581#if KMP_AFFINITY_SUPPORTED
7585 if (!__kmp_hh_affinity.flags.initialized)
7586 __kmp_affinity_initialize(__kmp_hh_affinity);
7590 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7594 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7597 __kmp_do_initialize_hidden_helper_threads();
7600 __kmp_hidden_helper_threads_initz_wait();
7603 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7605 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7610void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7612 kmp_disp_t *dispatch;
7617 this_thr->th.th_local.this_construct = 0;
7619 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7621 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7622 KMP_DEBUG_ASSERT(dispatch);
7623 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7627 dispatch->th_disp_index = 0;
7628 dispatch->th_doacross_buf_idx = 0;
7629 if (__kmp_env_consistency_check)
7630 __kmp_push_parallel(gtid, team->t.t_ident);
7635void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7637 if (__kmp_env_consistency_check)
7638 __kmp_pop_parallel(gtid, team->t.t_ident);
7640 __kmp_finish_implicit_task(this_thr);
7643int __kmp_invoke_task_func(
int gtid) {
7645 int tid = __kmp_tid_from_gtid(gtid);
7646 kmp_info_t *this_thr = __kmp_threads[gtid];
7647 kmp_team_t *team = this_thr->th.th_team;
7649 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7651 if (__itt_stack_caller_create_ptr) {
7653 if (team->t.t_stack_id != NULL) {
7654 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7656 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7657 __kmp_itt_stack_callee_enter(
7658 (__itt_caller)team->t.t_parent->t.t_stack_id);
7662#if INCLUDE_SSC_MARKS
7663 SSC_MARK_INVOKING();
7668 void **exit_frame_p;
7669 ompt_data_t *my_task_data;
7670 ompt_data_t *my_parallel_data;
7673 if (ompt_enabled.enabled) {
7674 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7675 .ompt_task_info.frame.exit_frame.ptr);
7677 exit_frame_p = &dummy;
7681 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7682 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7683 if (ompt_enabled.ompt_callback_implicit_task) {
7684 ompt_team_size = team->t.t_nproc;
7685 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7686 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7687 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7688 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7692#if KMP_STATS_ENABLED
7694 if (previous_state == stats_state_e::TEAMS_REGION) {
7695 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7697 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7699 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7702 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7703 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7710 *exit_frame_p = NULL;
7711 this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_team;
7714#if KMP_STATS_ENABLED
7715 if (previous_state == stats_state_e::TEAMS_REGION) {
7716 KMP_SET_THREAD_STATE(previous_state);
7718 KMP_POP_PARTITIONED_TIMER();
7722 if (__itt_stack_caller_create_ptr) {
7724 if (team->t.t_stack_id != NULL) {
7725 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7727 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7728 __kmp_itt_stack_callee_leave(
7729 (__itt_caller)team->t.t_parent->t.t_stack_id);
7733 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7738void __kmp_teams_master(
int gtid) {
7740 kmp_info_t *thr = __kmp_threads[gtid];
7741 kmp_team_t *team = thr->th.th_team;
7742 ident_t *loc = team->t.t_ident;
7743 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7744 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7745 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7746 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7747 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7750 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7753 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7754 tmp->cg_nthreads = 1;
7755 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7756 " cg_nthreads to 1\n",
7758 tmp->up = thr->th.th_cg_roots;
7759 thr->th.th_cg_roots = tmp;
7763#if INCLUDE_SSC_MARKS
7766 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7767 (microtask_t)thr->th.th_teams_microtask,
7768 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7769#if INCLUDE_SSC_MARKS
7773 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7774 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7777 __kmp_join_call(loc, gtid
7786int __kmp_invoke_teams_master(
int gtid) {
7787 kmp_info_t *this_thr = __kmp_threads[gtid];
7788 kmp_team_t *team = this_thr->th.th_team;
7790 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7791 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7792 (
void *)__kmp_teams_master);
7794 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7796 int tid = __kmp_tid_from_gtid(gtid);
7797 ompt_data_t *task_data =
7798 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7799 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7800 if (ompt_enabled.ompt_callback_implicit_task) {
7801 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7802 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7804 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7807 __kmp_teams_master(gtid);
7809 this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_league;
7811 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7819void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7820 kmp_info_t *thr = __kmp_threads[gtid];
7822 if (num_threads > 0)
7823 thr->th.th_set_nproc = num_threads;
7826void __kmp_push_num_threads_list(
ident_t *
id,
int gtid, kmp_uint32 list_length,
7827 int *num_threads_list) {
7828 kmp_info_t *thr = __kmp_threads[gtid];
7830 KMP_DEBUG_ASSERT(list_length > 1);
7832 if (num_threads_list[0] > 0)
7833 thr->th.th_set_nproc = num_threads_list[0];
7834 thr->th.th_set_nested_nth =
7835 (
int *)KMP_INTERNAL_MALLOC(list_length *
sizeof(
int));
7836 for (kmp_uint32 i = 0; i < list_length; ++i)
7837 thr->th.th_set_nested_nth[i] = num_threads_list[i];
7838 thr->th.th_set_nested_nth_sz = list_length;
7841void __kmp_set_strict_num_threads(
ident_t *loc,
int gtid,
int sev,
7843 kmp_info_t *thr = __kmp_threads[gtid];
7844 thr->th.th_nt_strict =
true;
7845 thr->th.th_nt_loc = loc;
7847 if (sev == severity_warning)
7848 thr->th.th_nt_sev = sev;
7850 thr->th.th_nt_sev = severity_fatal;
7853 thr->th.th_nt_msg = msg;
7855 thr->th.th_nt_msg =
"Cannot form team with number of threads specified by "
7856 "strict num_threads clause.";
7859static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7861 KMP_DEBUG_ASSERT(thr);
7863 if (!TCR_4(__kmp_init_middle))
7864 __kmp_middle_initialize();
7865 __kmp_assign_root_init_mask();
7866 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7867 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7869 if (num_threads == 0) {
7870 if (__kmp_teams_thread_limit > 0) {
7871 num_threads = __kmp_teams_thread_limit;
7873 num_threads = __kmp_avail_proc / num_teams;
7878 if (num_threads > __kmp_dflt_team_nth) {
7879 num_threads = __kmp_dflt_team_nth;
7881 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7882 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7884 if (num_teams * num_threads > __kmp_teams_max_nth) {
7885 num_threads = __kmp_teams_max_nth / num_teams;
7887 if (num_threads == 0) {
7891 if (num_threads < 0) {
7892 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7898 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7900 if (num_threads > __kmp_dflt_team_nth) {
7901 num_threads = __kmp_dflt_team_nth;
7903 if (num_teams * num_threads > __kmp_teams_max_nth) {
7904 int new_threads = __kmp_teams_max_nth / num_teams;
7905 if (new_threads == 0) {
7908 if (new_threads != num_threads) {
7909 if (!__kmp_reserve_warn) {
7910 __kmp_reserve_warn = 1;
7911 __kmp_msg(kmp_ms_warning,
7912 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7913 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7916 num_threads = new_threads;
7919 thr->th.th_teams_size.nth = num_threads;
7924void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7926 kmp_info_t *thr = __kmp_threads[gtid];
7927 if (num_teams < 0) {
7930 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7934 if (num_teams == 0) {
7935 if (__kmp_nteams > 0) {
7936 num_teams = __kmp_nteams;
7941 if (num_teams > __kmp_teams_max_nth) {
7942 if (!__kmp_reserve_warn) {
7943 __kmp_reserve_warn = 1;
7944 __kmp_msg(kmp_ms_warning,
7945 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7946 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7948 num_teams = __kmp_teams_max_nth;
7952 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7954 __kmp_push_thread_limit(thr, num_teams, num_threads);
7959void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7960 int num_teams_ub,
int num_threads) {
7961 kmp_info_t *thr = __kmp_threads[gtid];
7962 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7963 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7964 KMP_DEBUG_ASSERT(num_threads >= 0);
7966 if (num_teams_lb > num_teams_ub) {
7967 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7968 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7973 if (num_teams_lb == 0 && num_teams_ub > 0)
7974 num_teams_lb = num_teams_ub;
7976 if (num_teams_lb == 0 && num_teams_ub == 0) {
7977 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7978 if (num_teams > __kmp_teams_max_nth) {
7979 if (!__kmp_reserve_warn) {
7980 __kmp_reserve_warn = 1;
7981 __kmp_msg(kmp_ms_warning,
7982 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7983 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7985 num_teams = __kmp_teams_max_nth;
7987 }
else if (num_teams_lb == num_teams_ub) {
7988 num_teams = num_teams_ub;
7990 if (num_threads <= 0) {
7991 if (num_teams_ub > __kmp_teams_max_nth) {
7992 num_teams = num_teams_lb;
7994 num_teams = num_teams_ub;
7997 num_teams = (num_threads > __kmp_teams_max_nth)
7999 : __kmp_teams_max_nth / num_threads;
8000 if (num_teams < num_teams_lb) {
8001 num_teams = num_teams_lb;
8002 }
else if (num_teams > num_teams_ub) {
8003 num_teams = num_teams_ub;
8009 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
8011 __kmp_push_thread_limit(thr, num_teams, num_threads);
8015void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
8016 kmp_info_t *thr = __kmp_threads[gtid];
8017 thr->th.th_set_proc_bind = proc_bind;
8022void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
8023 kmp_info_t *this_thr = __kmp_threads[gtid];
8029 KMP_DEBUG_ASSERT(team);
8030 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8031 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8034 team->t.t_construct = 0;
8035 team->t.t_ordered.dt.t_value =
8039 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
8040 if (team->t.t_max_nproc > 1) {
8042 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
8043 team->t.t_disp_buffer[i].buffer_index = i;
8044 team->t.t_disp_buffer[i].doacross_buf_idx = i;
8047 team->t.t_disp_buffer[0].buffer_index = 0;
8048 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
8052 KMP_ASSERT(this_thr->th.th_team == team);
8055 for (f = 0; f < team->t.t_nproc; f++) {
8056 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
8057 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8062 __kmp_fork_barrier(gtid, 0);
8065void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
8066 kmp_info_t *this_thr = __kmp_threads[gtid];
8068 KMP_DEBUG_ASSERT(team);
8069 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8070 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8076 if (__kmp_threads[gtid] &&
8077 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8078 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8079 __kmp_threads[gtid]);
8080 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8081 "team->t.t_nproc=%d\n",
8082 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8084 __kmp_print_structure();
8086 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8087 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8090 __kmp_join_barrier(gtid);
8092 ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
8093 if (ompt_enabled.enabled &&
8094 (ompt_state == ompt_state_wait_barrier_teams ||
8095 ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
8096 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8097 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8098 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8100 void *codeptr = NULL;
8101 if (KMP_MASTER_TID(ds_tid) &&
8102 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8103 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8104 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8106 ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
8107 if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
8108 sync_kind = ompt_sync_region_barrier_teams;
8109 if (ompt_enabled.ompt_callback_sync_region_wait) {
8110 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8111 sync_kind, ompt_scope_end, NULL, task_data, codeptr);
8113 if (ompt_enabled.ompt_callback_sync_region) {
8114 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8115 sync_kind, ompt_scope_end, NULL, task_data, codeptr);
8118 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8119 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8120 ompt_scope_end, NULL, task_data, 0, ds_tid,
8121 ompt_task_implicit);
8127 KMP_ASSERT(this_thr->th.th_team == team);
8132#ifdef USE_LOAD_BALANCE
8136static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8139 kmp_team_t *hot_team;
8141 if (root->r.r_active) {
8144 hot_team = root->r.r_hot_team;
8145 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8146 return hot_team->t.t_nproc - 1;
8151 for (i = 1; i < hot_team->t.t_nproc; i++) {
8152 if (hot_team->t.t_threads[i]->th.th_active) {
8161static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8164 int hot_team_active;
8165 int team_curr_active;
8168 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8170 KMP_DEBUG_ASSERT(root);
8171 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8172 ->th.th_current_task->td_icvs.dynamic == TRUE);
8173 KMP_DEBUG_ASSERT(set_nproc > 1);
8175 if (set_nproc == 1) {
8176 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8185 pool_active = __kmp_thread_pool_active_nth;
8186 hot_team_active = __kmp_active_hot_team_nproc(root);
8187 team_curr_active = pool_active + hot_team_active + 1;
8190 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8191 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8192 "hot team active = %d\n",
8193 system_active, pool_active, hot_team_active));
8195 if (system_active < 0) {
8199 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8200 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8203 retval = __kmp_avail_proc - __kmp_nth +
8204 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8205 if (retval > set_nproc) {
8208 if (retval < KMP_MIN_NTH) {
8209 retval = KMP_MIN_NTH;
8212 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8220 if (system_active < team_curr_active) {
8221 system_active = team_curr_active;
8223 retval = __kmp_avail_proc - system_active + team_curr_active;
8224 if (retval > set_nproc) {
8227 if (retval < KMP_MIN_NTH) {
8228 retval = KMP_MIN_NTH;
8231 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8240void __kmp_cleanup(
void) {
8243 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8245 if (TCR_4(__kmp_init_parallel)) {
8246#if KMP_HANDLE_SIGNALS
8247 __kmp_remove_signals();
8249 TCW_4(__kmp_init_parallel, FALSE);
8252 if (TCR_4(__kmp_init_middle)) {
8253#if KMP_AFFINITY_SUPPORTED
8254 __kmp_affinity_uninitialize();
8256 __kmp_cleanup_hierarchy();
8257 TCW_4(__kmp_init_middle, FALSE);
8260 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8262 if (__kmp_init_serial) {
8263 __kmp_runtime_destroy();
8264 __kmp_init_serial = FALSE;
8267 __kmp_cleanup_threadprivate_caches();
8269 for (f = 0; f < __kmp_threads_capacity; f++) {
8270 if (__kmp_root[f] != NULL) {
8271 __kmp_free(__kmp_root[f]);
8272 __kmp_root[f] = NULL;
8275 __kmp_free(__kmp_threads);
8278 __kmp_threads = NULL;
8280 __kmp_threads_capacity = 0;
8283 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8285 kmp_old_threads_list_t *next = ptr->next;
8286 __kmp_free(ptr->threads);
8290 __kmp_old_threads_list = NULL;
8292#if KMP_USE_DYNAMIC_LOCK
8293 __kmp_cleanup_indirect_user_locks();
8295 __kmp_cleanup_user_locks();
8298 if (ompd_env_block) {
8299 __kmp_free(ompd_env_block);
8300 ompd_env_block = NULL;
8301 ompd_env_block_size = 0;
8305#if KMP_AFFINITY_SUPPORTED
8306 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8307 __kmp_cpuinfo_file = NULL;
8310#if KMP_USE_ADAPTIVE_LOCKS
8311#if KMP_DEBUG_ADAPTIVE_LOCKS
8312 __kmp_print_speculative_stats();
8315 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8316 __kmp_nested_nth.nth = NULL;
8317 __kmp_nested_nth.size = 0;
8318 __kmp_nested_nth.used = 0;
8320 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8321 __kmp_nested_proc_bind.bind_types = NULL;
8322 __kmp_nested_proc_bind.size = 0;
8323 __kmp_nested_proc_bind.used = 0;
8324 __kmp_dflt_team_nth = 0;
8325 __kmp_dflt_team_nth_ub = 0;
8326 if (__kmp_affinity_format) {
8327 KMP_INTERNAL_FREE(__kmp_affinity_format);
8328 __kmp_affinity_format = NULL;
8331 __kmp_i18n_catclose();
8333 if (__kmp_nesting_nth_level)
8334 KMP_INTERNAL_FREE(__kmp_nesting_nth_level);
8336#if KMP_USE_HIER_SCHED
8337 __kmp_hier_scheds.deallocate();
8340#if KMP_STATS_ENABLED
8344 __kmpc_destroy_allocator(KMP_GTID_SHUTDOWN, __kmp_def_allocator);
8345 __kmp_def_allocator = omp_default_mem_alloc;
8347 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8352int __kmp_ignore_mppbeg(
void) {
8355 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8356 if (__kmp_str_match_false(env))
8363int __kmp_ignore_mppend(
void) {
8366 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8367 if (__kmp_str_match_false(env))
8374void __kmp_internal_begin(
void) {
8380 gtid = __kmp_entry_gtid();
8381 root = __kmp_threads[gtid]->th.th_root;
8382 KMP_ASSERT(KMP_UBER_GTID(gtid));
8384 if (root->r.r_begin)
8386 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8387 if (root->r.r_begin) {
8388 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8392 root->r.r_begin = TRUE;
8394 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8399void __kmp_user_set_library(
enum library_type arg) {
8406 gtid = __kmp_entry_gtid();
8407 thread = __kmp_threads[gtid];
8409 root = thread->th.th_root;
8411 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8413 if (root->r.r_in_parallel) {
8415 KMP_WARNING(SetLibraryIncorrectCall);
8420 case library_serial:
8421 thread->th.th_set_nproc = 0;
8422 set__nproc(thread, 1);
8424 case library_turnaround:
8425 thread->th.th_set_nproc = 0;
8426 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8427 : __kmp_dflt_team_nth_ub);
8429 case library_throughput:
8430 thread->th.th_set_nproc = 0;
8431 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8432 : __kmp_dflt_team_nth_ub);
8435 KMP_FATAL(UnknownLibraryType, arg);
8438 __kmp_aux_set_library(arg);
8441void __kmp_aux_set_stacksize(
size_t arg) {
8442 if (!__kmp_init_serial)
8443 __kmp_serial_initialize();
8446 if (arg & (0x1000 - 1)) {
8447 arg &= ~(0x1000 - 1);
8452 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8455 if (!TCR_4(__kmp_init_parallel)) {
8458 if (value < __kmp_sys_min_stksize)
8459 value = __kmp_sys_min_stksize;
8460 else if (value > KMP_MAX_STKSIZE)
8461 value = KMP_MAX_STKSIZE;
8463 __kmp_stksize = value;
8465 __kmp_env_stksize = TRUE;
8468 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8473void __kmp_aux_set_library(
enum library_type arg) {
8474 __kmp_library = arg;
8476 switch (__kmp_library) {
8477 case library_serial: {
8478 KMP_INFORM(LibraryIsSerial);
8480 case library_turnaround:
8481 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8482 __kmp_use_yield = 2;
8484 case library_throughput:
8485 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8486 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8489 KMP_FATAL(UnknownLibraryType, arg);
8495static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8496 kmp_info_t *thr = __kmp_entry_thread();
8497 teams_serialized = 0;
8498 if (thr->th.th_teams_microtask) {
8499 kmp_team_t *team = thr->th.th_team;
8500 int tlevel = thr->th.th_teams_level;
8501 int ii = team->t.t_level;
8502 teams_serialized = team->t.t_serialized;
8503 int level = tlevel + 1;
8504 KMP_DEBUG_ASSERT(ii >= tlevel);
8505 while (ii > level) {
8506 for (teams_serialized = team->t.t_serialized;
8507 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8509 if (team->t.t_serialized && (!teams_serialized)) {
8510 team = team->t.t_parent;
8514 team = team->t.t_parent;
8523int __kmp_aux_get_team_num() {
8525 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8527 if (serialized > 1) {
8530 return team->t.t_master_tid;
8536int __kmp_aux_get_num_teams() {
8538 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8540 if (serialized > 1) {
8543 return team->t.t_parent->t.t_nproc;
8582typedef struct kmp_affinity_format_field_t {
8584 const char *long_name;
8587} kmp_affinity_format_field_t;
8589static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8590#if KMP_AFFINITY_SUPPORTED
8591 {
'A',
"thread_affinity",
's'},
8593 {
't',
"team_num",
'd'},
8594 {
'T',
"num_teams",
'd'},
8595 {
'L',
"nesting_level",
'd'},
8596 {
'n',
"thread_num",
'd'},
8597 {
'N',
"num_threads",
'd'},
8598 {
'a',
"ancestor_tnum",
'd'},
8600 {
'P',
"process_id",
'd'},
8601 {
'i',
"native_thread_id",
'd'}};
8604static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8606 kmp_str_buf_t *field_buffer) {
8607 int rc, format_index, field_value;
8608 const char *width_left, *width_right;
8609 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8610 static const int FORMAT_SIZE = 20;
8611 char format[FORMAT_SIZE] = {0};
8612 char absolute_short_name = 0;
8614 KMP_DEBUG_ASSERT(gtid >= 0);
8615 KMP_DEBUG_ASSERT(th);
8616 KMP_DEBUG_ASSERT(**ptr ==
'%');
8617 KMP_DEBUG_ASSERT(field_buffer);
8619 __kmp_str_buf_clear(field_buffer);
8626 __kmp_str_buf_cat(field_buffer,
"%", 1);
8637 right_justify =
false;
8639 right_justify =
true;
8643 width_left = width_right = NULL;
8644 if (**ptr >=
'0' && **ptr <=
'9') {
8652 format[format_index++] =
'%';
8654 format[format_index++] =
'-';
8656 format[format_index++] =
'0';
8657 if (width_left && width_right) {
8661 while (i < 8 && width_left < width_right) {
8662 format[format_index++] = *width_left;
8670 found_valid_name =
false;
8671 parse_long_name = (**ptr ==
'{');
8672 if (parse_long_name)
8674 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8675 sizeof(__kmp_affinity_format_table[0]);
8677 char short_name = __kmp_affinity_format_table[i].short_name;
8678 const char *long_name = __kmp_affinity_format_table[i].long_name;
8679 char field_format = __kmp_affinity_format_table[i].field_format;
8680 if (parse_long_name) {
8681 size_t length = KMP_STRLEN(long_name);
8682 if (strncmp(*ptr, long_name, length) == 0) {
8683 found_valid_name =
true;
8686 }
else if (**ptr == short_name) {
8687 found_valid_name =
true;
8690 if (found_valid_name) {
8691 format[format_index++] = field_format;
8692 format[format_index++] =
'\0';
8693 absolute_short_name = short_name;
8697 if (parse_long_name) {
8699 absolute_short_name = 0;
8707 switch (absolute_short_name) {
8709 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8712 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8715 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8718 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8721 static const int BUFFER_SIZE = 256;
8722 char buf[BUFFER_SIZE];
8723 __kmp_expand_host_name(buf, BUFFER_SIZE);
8724 rc = __kmp_str_buf_print(field_buffer, format, buf);
8727 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8730 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8733 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8737 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8738 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8740#if KMP_AFFINITY_SUPPORTED
8742 if (th->th.th_affin_mask) {
8744 __kmp_str_buf_init(&buf);
8745 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8746 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8747 __kmp_str_buf_free(&buf);
8749 rc = __kmp_str_buf_print(field_buffer,
"%s",
"disabled");
8756 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8758 if (parse_long_name) {
8767 KMP_ASSERT(format_index <= FORMAT_SIZE);
8777size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8778 kmp_str_buf_t *buffer) {
8779 const char *parse_ptr;
8781 const kmp_info_t *th;
8782 kmp_str_buf_t field;
8784 KMP_DEBUG_ASSERT(buffer);
8785 KMP_DEBUG_ASSERT(gtid >= 0);
8787 __kmp_str_buf_init(&field);
8788 __kmp_str_buf_clear(buffer);
8790 th = __kmp_threads[gtid];
8796 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8797 parse_ptr = __kmp_affinity_format;
8799 KMP_DEBUG_ASSERT(parse_ptr);
8801 while (*parse_ptr !=
'\0') {
8803 if (*parse_ptr ==
'%') {
8805 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8806 __kmp_str_buf_catbuf(buffer, &field);
8810 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8815 __kmp_str_buf_free(&field);
8820void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8822 __kmp_str_buf_init(&buf);
8823 __kmp_aux_capture_affinity(gtid, format, &buf);
8824 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8825 __kmp_str_buf_free(&buf);
8829void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8830 int blocktime = arg;
8836 __kmp_save_internal_controls(thread);
8839 if (blocktime < KMP_MIN_BLOCKTIME)
8840 blocktime = KMP_MIN_BLOCKTIME;
8841 else if (blocktime > KMP_MAX_BLOCKTIME)
8842 blocktime = KMP_MAX_BLOCKTIME;
8844 set__blocktime_team(thread->th.th_team, tid, blocktime);
8845 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8849 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8851 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8852 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8858 set__bt_set_team(thread->th.th_team, tid, bt_set);
8859 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8861 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8862 "bt_intervals=%d, monitor_updates=%d\n",
8863 __kmp_gtid_from_tid(tid, thread->th.th_team),
8864 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8865 __kmp_monitor_wakeups));
8867 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8868 __kmp_gtid_from_tid(tid, thread->th.th_team),
8869 thread->th.th_team->t.t_id, tid, blocktime));
8873void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8874 if (!__kmp_init_serial) {
8875 __kmp_serial_initialize();
8877 __kmp_env_initialize(str);
8879 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8887PACKED_REDUCTION_METHOD_T
8888__kmp_determine_reduction_method(
8889 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8890 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8891 kmp_critical_name *lck) {
8902 PACKED_REDUCTION_METHOD_T retval;
8906 KMP_DEBUG_ASSERT(lck);
8908#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8910 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8911#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8913 retval = critical_reduce_block;
8916 team_size = __kmp_get_team_num_threads(global_tid);
8917 if (team_size == 1) {
8919 retval = empty_reduce_block;
8923 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8925#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8926 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
8927 KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM
8929#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8930 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HAIKU || \
8931 KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8933 int teamsize_cutoff = 4;
8935#if KMP_MIC_SUPPORTED
8936 if (__kmp_mic_type != non_mic) {
8937 teamsize_cutoff = 8;
8940 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8941 if (tree_available) {
8942 if (team_size <= teamsize_cutoff) {
8943 if (atomic_available) {
8944 retval = atomic_reduce_block;
8947 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8949 }
else if (atomic_available) {
8950 retval = atomic_reduce_block;
8953#error "Unknown or unsupported OS"
8958#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \
8959 KMP_ARCH_WASM || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 || KMP_ARCH_SPARC
8961#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8962 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HAIKU || KMP_OS_HURD || \
8963 KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8967 if (atomic_available) {
8968 if (num_vars <= 2) {
8969 retval = atomic_reduce_block;
8975 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8976 if (atomic_available && (num_vars <= 3)) {
8977 retval = atomic_reduce_block;
8978 }
else if (tree_available) {
8979 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8980 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8981 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8986#error "Unknown or unsupported OS"
8990#error "Unknown or unsupported architecture"
8998 if (__kmp_force_reduction_method != reduction_method_not_defined &&
9001 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
9003 int atomic_available, tree_available;
9005 switch ((forced_retval = __kmp_force_reduction_method)) {
9006 case critical_reduce_block:
9010 case atomic_reduce_block:
9011 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
9012 if (!atomic_available) {
9013 KMP_WARNING(RedMethodNotSupported,
"atomic");
9014 forced_retval = critical_reduce_block;
9018 case tree_reduce_block:
9019 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
9020 if (!tree_available) {
9021 KMP_WARNING(RedMethodNotSupported,
"tree");
9022 forced_retval = critical_reduce_block;
9024#if KMP_FAST_REDUCTION_BARRIER
9025 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
9034 retval = forced_retval;
9037 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
9039#undef FAST_REDUCTION_TREE_METHOD_GENERATED
9040#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
9045kmp_int32 __kmp_get_reduce_method(
void) {
9046 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
9051void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
9055void __kmp_hard_pause() {
9056 __kmp_pause_status = kmp_hard_paused;
9057 __kmp_internal_end_thread(-1);
9061void __kmp_resume_if_soft_paused() {
9062 if (__kmp_pause_status == kmp_soft_paused) {
9063 __kmp_pause_status = kmp_not_paused;
9065 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
9066 kmp_info_t *thread = __kmp_threads[gtid];
9068 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
9070 if (fl.is_sleeping())
9072 else if (__kmp_try_suspend_mx(thread)) {
9073 __kmp_unlock_suspend_mx(thread);
9076 if (fl.is_sleeping()) {
9079 }
else if (__kmp_try_suspend_mx(thread)) {
9080 __kmp_unlock_suspend_mx(thread);
9092int __kmp_pause_resource(kmp_pause_status_t level) {
9093 if (level == kmp_not_paused) {
9094 if (__kmp_pause_status == kmp_not_paused) {
9098 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
9099 __kmp_pause_status == kmp_hard_paused);
9100 __kmp_pause_status = kmp_not_paused;
9103 }
else if (level == kmp_soft_paused) {
9104 if (__kmp_pause_status != kmp_not_paused) {
9111 }
else if (level == kmp_hard_paused || level == kmp_stop_tool_paused) {
9113 if (__kmp_pause_status != kmp_not_paused) {
9126void __kmp_omp_display_env(
int verbose) {
9127 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9128 if (__kmp_init_serial == 0)
9129 __kmp_do_serial_initialize();
9130 __kmp_display_env_impl(!verbose, verbose);
9131 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9135void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9137 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9139 kmp_info_t **other_threads = team->t.t_threads;
9143 for (
int f = 1; f < old_nthreads; ++f) {
9144 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9146 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9152 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9153 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9157 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9159 team->t.t_threads[f]->th.th_used_in_team.store(2);
9160 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9163 team->t.b->go_release();
9169 int count = old_nthreads - 1;
9171 count = old_nthreads - 1;
9172 for (
int f = 1; f < old_nthreads; ++f) {
9173 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9174 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9175 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9176 void *, other_threads[f]->th.th_sleep_loc);
9177 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9180 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9186 team->t.b->update_num_threads(new_nthreads);
9187 team->t.b->go_reset();
9190void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9192 KMP_DEBUG_ASSERT(team);
9198 for (
int f = 1; f < new_nthreads; ++f) {
9199 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9200 (void)KMP_COMPARE_AND_STORE_ACQ32(
9201 &(team->t.t_threads[f]->th.th_used_in_team), 0, 3);
9202 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9203 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9204 (kmp_flag_32<false, false> *)NULL);
9210 int count = new_nthreads - 1;
9212 count = new_nthreads - 1;
9213 for (
int f = 1; f < new_nthreads; ++f) {
9214 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9222kmp_info_t **__kmp_hidden_helper_threads;
9223kmp_info_t *__kmp_hidden_helper_main_thread;
9224std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9226kmp_int32 __kmp_hidden_helper_threads_num = 8;
9227kmp_int32 __kmp_enable_hidden_helper = TRUE;
9229kmp_int32 __kmp_hidden_helper_threads_num = 0;
9230kmp_int32 __kmp_enable_hidden_helper = FALSE;
9234std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9236void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9241 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9242 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9243 __kmp_hidden_helper_threads_num)
9249 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9250 __kmp_hidden_helper_initz_release();
9251 __kmp_hidden_helper_main_thread_wait();
9253 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9254 __kmp_hidden_helper_worker_thread_signal();
9260void __kmp_hidden_helper_threads_initz_routine() {
9262 const int gtid = __kmp_register_root(TRUE);
9263 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9264 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9265 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9266 __kmp_hidden_helper_threads_num;
9268 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9273 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9275 __kmp_hidden_helper_threads_deinitz_release();
9295void __kmp_init_nesting_mode() {
9296 int levels = KMP_HW_LAST;
9297 __kmp_nesting_mode_nlevels = levels;
9298 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9299 for (
int i = 0; i < levels; ++i)
9300 __kmp_nesting_nth_level[i] = 0;
9301 if (__kmp_nested_nth.size < levels) {
9302 __kmp_nested_nth.nth =
9303 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9304 __kmp_nested_nth.size = levels;
9309void __kmp_set_nesting_mode_threads() {
9310 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9312 if (__kmp_nesting_mode == 1)
9313 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9314 else if (__kmp_nesting_mode > 1)
9315 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9317 if (__kmp_topology) {
9319 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9320 loc < __kmp_nesting_mode_nlevels;
9321 loc++, hw_level++) {
9322 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9323 if (__kmp_nesting_nth_level[loc] == 1)
9327 if (__kmp_nesting_mode > 1 && loc > 1) {
9328 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9329 int num_cores = __kmp_topology->get_count(core_level);
9330 int upper_levels = 1;
9331 for (
int level = 0; level < loc - 1; ++level)
9332 upper_levels *= __kmp_nesting_nth_level[level];
9333 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9334 __kmp_nesting_nth_level[loc - 1] =
9335 num_cores / __kmp_nesting_nth_level[loc - 2];
9337 __kmp_nesting_mode_nlevels = loc;
9338 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9340 if (__kmp_avail_proc >= 4) {
9341 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9342 __kmp_nesting_nth_level[1] = 2;
9343 __kmp_nesting_mode_nlevels = 2;
9345 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9346 __kmp_nesting_mode_nlevels = 1;
9348 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9350 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9351 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9353 set__nproc(thread, __kmp_nesting_nth_level[0]);
9354 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9355 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9356 if (get__max_active_levels(thread) > 1) {
9358 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9360 if (__kmp_nesting_mode == 1)
9361 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9364#if ENABLE_LIBOMPTARGET
9365void (*kmp_target_sync_cb)(
ident_t *loc_ref,
int gtid,
void *current_task,
9366 void *event) = NULL;
9367void __kmp_target_init() {
9369 *(
void **)(&kmp_target_sync_cb) = KMP_DLSYM(
"__tgt_target_sync");
9375#if !KMP_STATS_ENABLED
9376void __kmp_reset_stats() {}
9379int __kmp_omp_debug_struct_info = FALSE;
9380int __kmp_debugging = FALSE;
9382#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9383void __kmp_itt_fini_ittlib() {}
9384void __kmp_itt_init_ittlib() {}
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)