LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_csupport.cpp
1/*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#define __KMP_IMP
14#include "omp.h" /* extern "C" declarations of user-visible routines */
15#include "kmp.h"
16#include "kmp_error.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
19#include "kmp_lock.h"
20#include "kmp_stats.h"
21#include "kmp_utils.h"
22#include "ompt-specific.h"
23
24#define MAX_MESSAGE 512
25
26// flags will be used in future, e.g. to implement openmp_strict library
27// restrictions
28
37void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
38 // By default __kmpc_begin() is no-op.
39 char *env;
40 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
41 __kmp_str_match_true(env)) {
42 __kmp_middle_initialize();
43 __kmp_assign_root_init_mask();
44 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
45 } else if (__kmp_ignore_mppbeg() == FALSE) {
46 // By default __kmp_ignore_mppbeg() returns TRUE.
47 __kmp_internal_begin();
48 KC_TRACE(10, ("__kmpc_begin: called\n"));
49 }
50}
51
60void __kmpc_end(ident_t *loc) {
61 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
62 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
63 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
64 // returns FALSE and __kmpc_end() will unregister this root (it can cause
65 // library shut down).
66 if (__kmp_ignore_mppend() == FALSE) {
67 KC_TRACE(10, ("__kmpc_end: called\n"));
68 KA_TRACE(30, ("__kmpc_end\n"));
69
70 __kmp_internal_end_thread(-1);
71 }
72#if KMP_OS_WINDOWS && OMPT_SUPPORT
73 // Normal exit process on Windows does not allow worker threads of the final
74 // parallel region to finish reporting their events, so shutting down the
75 // library here fixes the issue at least for the cases where __kmpc_end() is
76 // placed properly.
77 if (ompt_enabled.enabled)
78 __kmp_internal_end_library(__kmp_gtid_get_specific());
79#endif
80}
81
101 kmp_int32 gtid = __kmp_entry_gtid();
102
103 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
104
105 return gtid;
106}
107
123 KC_TRACE(10,
124 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
125
126 return TCR_4(__kmp_all_nth);
127}
128
136 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
137 return __kmp_tid_from_gtid(__kmp_entry_gtid());
138}
139
146 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
147
148 return __kmp_entry_thread()->th.th_team->t.t_nproc;
149}
150
157kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
158#ifndef KMP_DEBUG
159
160 return TRUE;
161
162#else
163
164 const char *semi2;
165 const char *semi3;
166 int line_no;
167
168 if (__kmp_par_range == 0) {
169 return TRUE;
170 }
171 semi2 = loc->psource;
172 if (semi2 == NULL) {
173 return TRUE;
174 }
175 semi2 = strchr(semi2, ';');
176 if (semi2 == NULL) {
177 return TRUE;
178 }
179 semi2 = strchr(semi2 + 1, ';');
180 if (semi2 == NULL) {
181 return TRUE;
182 }
183 if (__kmp_par_range_filename[0]) {
184 const char *name = semi2 - 1;
185 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
186 name--;
187 }
188 if ((*name == '/') || (*name == ';')) {
189 name++;
190 }
191 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
192 return __kmp_par_range < 0;
193 }
194 }
195 semi3 = strchr(semi2 + 1, ';');
196 if (__kmp_par_range_routine[0]) {
197 if ((semi3 != NULL) && (semi3 > semi2) &&
198 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
199 return __kmp_par_range < 0;
200 }
201 }
202 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
203 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
204 return __kmp_par_range > 0;
205 }
206 return __kmp_par_range < 0;
207 }
208 return TRUE;
209
210#endif /* KMP_DEBUG */
211}
212
220 return __kmp_entry_thread()->th.th_root->r.r_active;
221}
222
232void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
233 kmp_int32 num_threads) {
234 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
235 global_tid, num_threads));
236 __kmp_assert_valid_gtid(global_tid);
237 __kmp_push_num_threads(loc, global_tid, num_threads);
238}
239
240void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
241 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
242 /* the num_threads are automatically popped */
243}
244
245void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
246 kmp_int32 proc_bind) {
247 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
248 proc_bind));
249 __kmp_assert_valid_gtid(global_tid);
250 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
251}
252
263void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
264 int gtid = __kmp_entry_gtid();
265
266#if (KMP_STATS_ENABLED)
267 // If we were in a serial region, then stop the serial timer, record
268 // the event, and start parallel region timer
269 stats_state_e previous_state = KMP_GET_THREAD_STATE();
270 if (previous_state == stats_state_e::SERIAL_REGION) {
271 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
272 } else {
273 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
274 }
275 int inParallel = __kmpc_in_parallel(loc);
276 if (inParallel) {
277 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
278 } else {
279 KMP_COUNT_BLOCK(OMP_PARALLEL);
280 }
281#endif
282
283 // maybe to save thr_state is enough here
284 {
285 va_list ap;
286 va_start(ap, microtask);
287
288#if OMPT_SUPPORT
289 ompt_frame_t *ompt_frame;
290 if (ompt_enabled.enabled) {
291 kmp_info_t *master_th = __kmp_threads[gtid];
292 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
293 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
294 }
295 OMPT_STORE_RETURN_ADDRESS(gtid);
296#endif
297
298#if INCLUDE_SSC_MARKS
299 SSC_MARK_FORKING();
300#endif
301 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
302 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
303 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
304 kmp_va_addr_of(ap));
305#if INCLUDE_SSC_MARKS
306 SSC_MARK_JOINING();
307#endif
308 __kmp_join_call(loc, gtid
309#if OMPT_SUPPORT
310 ,
311 fork_context_intel
312#endif
313 );
314
315 va_end(ap);
316
317#if OMPT_SUPPORT
318 if (ompt_enabled.enabled) {
319 ompt_frame->enter_frame = ompt_data_none;
320 }
321#endif
322 }
323
324#if KMP_STATS_ENABLED
325 if (previous_state == stats_state_e::SERIAL_REGION) {
326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
327 KMP_SET_THREAD_STATE(previous_state);
328 } else {
329 KMP_POP_PARTITIONED_TIMER();
330 }
331#endif // KMP_STATS_ENABLED
332}
333
344void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
345 kmp_int32 cond, void *args) {
346 int gtid = __kmp_entry_gtid();
347 if (cond) {
348 if (args)
349 __kmpc_fork_call(loc, argc, microtask, args);
350 else
351 __kmpc_fork_call(loc, argc, microtask);
352 } else {
354
355#if OMPT_SUPPORT
356 void *exit_frame_ptr;
357#endif
358
359 if (args)
360 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
361 /*npr=*/0,
362 /*argc=*/1, &args
363#if OMPT_SUPPORT
364 ,
365 &exit_frame_ptr
366#endif
367 );
368 else
369 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
370 /*npr=*/0,
371 /*argc=*/0,
372 /*args=*/nullptr
373#if OMPT_SUPPORT
374 ,
375 &exit_frame_ptr
376#endif
377 );
378
380 }
381}
382
394void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
395 kmp_int32 num_teams, kmp_int32 num_threads) {
396 KA_TRACE(20,
397 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
398 global_tid, num_teams, num_threads));
399 __kmp_assert_valid_gtid(global_tid);
400 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
401}
402
413void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid,
414 kmp_int32 thread_limit) {
415 __kmp_assert_valid_gtid(global_tid);
416 kmp_info_t *thread = __kmp_threads[global_tid];
417 if (thread_limit > 0)
418 thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
419}
420
437void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
438 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
439 kmp_int32 num_threads) {
440 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
441 " num_teams_ub=%d num_threads=%d\n",
442 global_tid, num_teams_lb, num_teams_ub, num_threads));
443 __kmp_assert_valid_gtid(global_tid);
444 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
445 num_threads);
446}
447
458void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
459 ...) {
460 int gtid = __kmp_entry_gtid();
461 kmp_info_t *this_thr = __kmp_threads[gtid];
462 va_list ap;
463 va_start(ap, microtask);
464
465#if KMP_STATS_ENABLED
466 KMP_COUNT_BLOCK(OMP_TEAMS);
467 stats_state_e previous_state = KMP_GET_THREAD_STATE();
468 if (previous_state == stats_state_e::SERIAL_REGION) {
469 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
470 } else {
471 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
472 }
473#endif
474
475 // remember teams entry point and nesting level
476 this_thr->th.th_teams_microtask = microtask;
477 this_thr->th.th_teams_level =
478 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
479
480#if OMPT_SUPPORT
481 kmp_team_t *parent_team = this_thr->th.th_team;
482 int tid = __kmp_tid_from_gtid(gtid);
483 if (ompt_enabled.enabled) {
484 parent_team->t.t_implicit_task_taskdata[tid]
485 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
486 }
487 OMPT_STORE_RETURN_ADDRESS(gtid);
488#endif
489
490 // check if __kmpc_push_num_teams called, set default number of teams
491 // otherwise
492 if (this_thr->th.th_teams_size.nteams == 0) {
493 __kmp_push_num_teams(loc, gtid, 0, 0);
494 }
495 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
496 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
497 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
498
499 __kmp_fork_call(
500 loc, gtid, fork_context_intel, argc,
501 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
502 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
503 __kmp_join_call(loc, gtid
504#if OMPT_SUPPORT
505 ,
506 fork_context_intel
507#endif
508 );
509
510 // Pop current CG root off list
511 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
512 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
513 this_thr->th.th_cg_roots = tmp->up;
514 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
515 " to node %p. cg_nthreads was %d\n",
516 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
517 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
518 int i = tmp->cg_nthreads--;
519 if (i == 1) { // check is we are the last thread in CG (not always the case)
520 __kmp_free(tmp);
521 }
522 // Restore current task's thread_limit from CG root
523 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
524 this_thr->th.th_current_task->td_icvs.thread_limit =
525 this_thr->th.th_cg_roots->cg_thread_limit;
526
527 this_thr->th.th_teams_microtask = NULL;
528 this_thr->th.th_teams_level = 0;
529 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
530 va_end(ap);
531#if KMP_STATS_ENABLED
532 if (previous_state == stats_state_e::SERIAL_REGION) {
533 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
534 KMP_SET_THREAD_STATE(previous_state);
535 } else {
536 KMP_POP_PARTITIONED_TIMER();
537 }
538#endif // KMP_STATS_ENABLED
539}
540
541// I don't think this function should ever have been exported.
542// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
543// openmp code ever called it, but it's been exported from the RTL for so
544// long that I'm afraid to remove the definition.
545int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
546
559void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
560 // The implementation is now in kmp_runtime.cpp so that it can share static
561 // functions with kmp_fork_call since the tasks to be done are similar in
562 // each case.
563 __kmp_assert_valid_gtid(global_tid);
564#if OMPT_SUPPORT
565 OMPT_STORE_RETURN_ADDRESS(global_tid);
566#endif
567 __kmp_serialized_parallel(loc, global_tid);
568}
569
577void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
578 kmp_internal_control_t *top;
579 kmp_info_t *this_thr;
580 kmp_team_t *serial_team;
581
582 KC_TRACE(10,
583 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
584
585 /* skip all this code for autopar serialized loops since it results in
586 unacceptable overhead */
587 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
588 return;
589
590 // Not autopar code
591 __kmp_assert_valid_gtid(global_tid);
592 if (!TCR_4(__kmp_init_parallel))
593 __kmp_parallel_initialize();
594
595 __kmp_resume_if_soft_paused();
596
597 this_thr = __kmp_threads[global_tid];
598 serial_team = this_thr->th.th_serial_team;
599
600 kmp_task_team_t *task_team = this_thr->th.th_task_team;
601 // we need to wait for the proxy tasks before finishing the thread
602 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
603 task_team->tt.tt_hidden_helper_task_encountered))
604 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
605
606 KMP_MB();
607 KMP_DEBUG_ASSERT(serial_team);
608 KMP_ASSERT(serial_team->t.t_serialized);
609 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
610 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
611 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
612 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
613
614#if OMPT_SUPPORT
615 if (ompt_enabled.enabled &&
616 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
617 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
618 if (ompt_enabled.ompt_callback_implicit_task) {
619 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
620 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
621 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
622 }
623
624 // reset clear the task id only after unlinking the task
625 ompt_data_t *parent_task_data;
626 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
627
628 if (ompt_enabled.ompt_callback_parallel_end) {
629 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
630 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
631 ompt_parallel_invoker_program | ompt_parallel_team,
632 OMPT_LOAD_RETURN_ADDRESS(global_tid));
633 }
634 __ompt_lw_taskteam_unlink(this_thr);
635 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
636 }
637#endif
638
639 /* If necessary, pop the internal control stack values and replace the team
640 * values */
641 top = serial_team->t.t_control_stack_top;
642 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
643 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
644 serial_team->t.t_control_stack_top = top->next;
645 __kmp_free(top);
646 }
647
648 /* pop dispatch buffers stack */
649 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
650 {
651 dispatch_private_info_t *disp_buffer =
652 serial_team->t.t_dispatch->th_disp_buffer;
653 serial_team->t.t_dispatch->th_disp_buffer =
654 serial_team->t.t_dispatch->th_disp_buffer->next;
655 __kmp_free(disp_buffer);
656 }
657
658 /* pop the task team stack */
659 if (serial_team->t.t_serialized > 1) {
660 __kmp_pop_task_team_node(this_thr, serial_team);
661 }
662
663 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
664
665 --serial_team->t.t_serialized;
666 if (serial_team->t.t_serialized == 0) {
667
668 /* return to the parallel section */
669
670#if KMP_ARCH_X86 || KMP_ARCH_X86_64
671 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
672 __kmp_clear_x87_fpu_status_word();
673 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
674 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
675 }
676#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
677
678 __kmp_pop_current_task_from_thread(this_thr);
679#if OMPD_SUPPORT
680 if (ompd_state & OMPD_ENABLE_BP)
681 ompd_bp_parallel_end();
682#endif
683
684 this_thr->th.th_team = serial_team->t.t_parent;
685 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
686
687 /* restore values cached in the thread */
688 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
689 this_thr->th.th_team_master =
690 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
691 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
692
693 /* TODO the below shouldn't need to be adjusted for serialized teams */
694 this_thr->th.th_dispatch =
695 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
696
697 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
698 this_thr->th.th_current_task->td_flags.executing = 1;
699
700 if (__kmp_tasking_mode != tskm_immediate_exec) {
701 // Restore task state from serial team structure
702 KMP_DEBUG_ASSERT(serial_team->t.t_primary_task_state == 0 ||
703 serial_team->t.t_primary_task_state == 1);
704 this_thr->th.th_task_state =
705 (kmp_uint8)serial_team->t.t_primary_task_state;
706 // Copy the task team from the new child / old parent team to the thread.
707 this_thr->th.th_task_team =
708 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
709 KA_TRACE(20,
710 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
711 "team %p\n",
712 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
713 }
714#if KMP_AFFINITY_SUPPORTED
715 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
716 __kmp_reset_root_init_mask(global_tid);
717 }
718#endif
719 } else {
720 if (__kmp_tasking_mode != tskm_immediate_exec) {
721 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
722 "depth of serial team %p to %d\n",
723 global_tid, serial_team, serial_team->t.t_serialized));
724 }
725 }
726
727 serial_team->t.t_level--;
728 if (__kmp_env_consistency_check)
729 __kmp_pop_parallel(global_tid, NULL);
730#if OMPT_SUPPORT
731 if (ompt_enabled.enabled)
732 this_thr->th.ompt_thread_info.state =
733 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
734 : ompt_state_work_parallel);
735#endif
736}
737
747 KC_TRACE(10, ("__kmpc_flush: called\n"));
748
749 /* need explicit __mf() here since use volatile instead in library */
750 KMP_MFENCE(); /* Flush all pending memory write invalidates. */
751
752#if OMPT_SUPPORT && OMPT_OPTIONAL
753 if (ompt_enabled.ompt_callback_flush) {
754 ompt_callbacks.ompt_callback(ompt_callback_flush)(
755 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
756 }
757#endif
758}
759
760/* -------------------------------------------------------------------------- */
768void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
769 KMP_COUNT_BLOCK(OMP_BARRIER);
770 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
771 __kmp_assert_valid_gtid(global_tid);
772
773 if (!TCR_4(__kmp_init_parallel))
774 __kmp_parallel_initialize();
775
776 __kmp_resume_if_soft_paused();
777
778 if (__kmp_env_consistency_check) {
779 if (loc == 0) {
780 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
781 }
782 __kmp_check_barrier(global_tid, ct_barrier, loc);
783 }
784
785#if OMPT_SUPPORT
786 ompt_frame_t *ompt_frame;
787 if (ompt_enabled.enabled) {
788 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
789 if (ompt_frame->enter_frame.ptr == NULL)
790 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
791 }
792 OMPT_STORE_RETURN_ADDRESS(global_tid);
793#endif
794 __kmp_threads[global_tid]->th.th_ident = loc;
795 // TODO: explicit barrier_wait_id:
796 // this function is called when 'barrier' directive is present or
797 // implicit barrier at the end of a worksharing construct.
798 // 1) better to add a per-thread barrier counter to a thread data structure
799 // 2) set to 0 when a new team is created
800 // 4) no sync is required
801
802 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
803#if OMPT_SUPPORT && OMPT_OPTIONAL
804 if (ompt_enabled.enabled) {
805 ompt_frame->enter_frame = ompt_data_none;
806 }
807#endif
808}
809
810/* The BARRIER for a MASTER section is always explicit */
817kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
818 int status = 0;
819
820 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
821 __kmp_assert_valid_gtid(global_tid);
822
823 if (!TCR_4(__kmp_init_parallel))
824 __kmp_parallel_initialize();
825
826 __kmp_resume_if_soft_paused();
827
828 if (KMP_MASTER_GTID(global_tid)) {
829 KMP_COUNT_BLOCK(OMP_MASTER);
830 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
831 status = 1;
832 }
833
834#if OMPT_SUPPORT && OMPT_OPTIONAL
835 if (status) {
836 if (ompt_enabled.ompt_callback_masked) {
837 kmp_info_t *this_thr = __kmp_threads[global_tid];
838 kmp_team_t *team = this_thr->th.th_team;
839
840 int tid = __kmp_tid_from_gtid(global_tid);
841 ompt_callbacks.ompt_callback(ompt_callback_masked)(
842 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
843 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
844 OMPT_GET_RETURN_ADDRESS(0));
845 }
846 }
847#endif
848
849 if (__kmp_env_consistency_check) {
850#if KMP_USE_DYNAMIC_LOCK
851 if (status)
852 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
853 else
854 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
855#else
856 if (status)
857 __kmp_push_sync(global_tid, ct_master, loc, NULL);
858 else
859 __kmp_check_sync(global_tid, ct_master, loc, NULL);
860#endif
861 }
862
863 return status;
864}
865
874void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
875 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
876 __kmp_assert_valid_gtid(global_tid);
877 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
878 KMP_POP_PARTITIONED_TIMER();
879
880#if OMPT_SUPPORT && OMPT_OPTIONAL
881 kmp_info_t *this_thr = __kmp_threads[global_tid];
882 kmp_team_t *team = this_thr->th.th_team;
883 if (ompt_enabled.ompt_callback_masked) {
884 int tid = __kmp_tid_from_gtid(global_tid);
885 ompt_callbacks.ompt_callback(ompt_callback_masked)(
886 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
887 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
888 OMPT_GET_RETURN_ADDRESS(0));
889 }
890#endif
891
892 if (__kmp_env_consistency_check) {
893 if (KMP_MASTER_GTID(global_tid))
894 __kmp_pop_sync(global_tid, ct_master, loc);
895 }
896}
897
906kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
907 int status = 0;
908 int tid;
909 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
910 __kmp_assert_valid_gtid(global_tid);
911
912 if (!TCR_4(__kmp_init_parallel))
913 __kmp_parallel_initialize();
914
915 __kmp_resume_if_soft_paused();
916
917 tid = __kmp_tid_from_gtid(global_tid);
918 if (tid == filter) {
919 KMP_COUNT_BLOCK(OMP_MASKED);
920 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
921 status = 1;
922 }
923
924#if OMPT_SUPPORT && OMPT_OPTIONAL
925 if (status) {
926 if (ompt_enabled.ompt_callback_masked) {
927 kmp_info_t *this_thr = __kmp_threads[global_tid];
928 kmp_team_t *team = this_thr->th.th_team;
929 ompt_callbacks.ompt_callback(ompt_callback_masked)(
930 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
931 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
932 OMPT_GET_RETURN_ADDRESS(0));
933 }
934 }
935#endif
936
937 if (__kmp_env_consistency_check) {
938#if KMP_USE_DYNAMIC_LOCK
939 if (status)
940 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
941 else
942 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
943#else
944 if (status)
945 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
946 else
947 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
948#endif
949 }
950
951 return status;
952}
953
962void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
963 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
964 __kmp_assert_valid_gtid(global_tid);
965 KMP_POP_PARTITIONED_TIMER();
966
967#if OMPT_SUPPORT && OMPT_OPTIONAL
968 kmp_info_t *this_thr = __kmp_threads[global_tid];
969 kmp_team_t *team = this_thr->th.th_team;
970 if (ompt_enabled.ompt_callback_masked) {
971 int tid = __kmp_tid_from_gtid(global_tid);
972 ompt_callbacks.ompt_callback(ompt_callback_masked)(
973 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
974 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
975 OMPT_GET_RETURN_ADDRESS(0));
976 }
977#endif
978
979 if (__kmp_env_consistency_check) {
980 __kmp_pop_sync(global_tid, ct_masked, loc);
981 }
982}
983
991void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
992 int cid = 0;
993 kmp_info_t *th;
994 KMP_DEBUG_ASSERT(__kmp_init_serial);
995
996 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
997 __kmp_assert_valid_gtid(gtid);
998
999 if (!TCR_4(__kmp_init_parallel))
1000 __kmp_parallel_initialize();
1001
1002 __kmp_resume_if_soft_paused();
1003
1004#if USE_ITT_BUILD
1005 __kmp_itt_ordered_prep(gtid);
1006// TODO: ordered_wait_id
1007#endif /* USE_ITT_BUILD */
1008
1009 th = __kmp_threads[gtid];
1010
1011#if OMPT_SUPPORT && OMPT_OPTIONAL
1012 kmp_team_t *team;
1013 ompt_wait_id_t lck;
1014 void *codeptr_ra;
1015 OMPT_STORE_RETURN_ADDRESS(gtid);
1016 if (ompt_enabled.enabled) {
1017 team = __kmp_team_from_gtid(gtid);
1018 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
1019 /* OMPT state update */
1020 th->th.ompt_thread_info.wait_id = lck;
1021 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
1022
1023 /* OMPT event callback */
1024 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1025 if (ompt_enabled.ompt_callback_mutex_acquire) {
1026 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1027 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
1028 codeptr_ra);
1029 }
1030 }
1031#endif
1032
1033 if (th->th.th_dispatch->th_deo_fcn != 0)
1034 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
1035 else
1036 __kmp_parallel_deo(&gtid, &cid, loc);
1037
1038#if OMPT_SUPPORT && OMPT_OPTIONAL
1039 if (ompt_enabled.enabled) {
1040 /* OMPT state update */
1041 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1042 th->th.ompt_thread_info.wait_id = 0;
1043
1044 /* OMPT event callback */
1045 if (ompt_enabled.ompt_callback_mutex_acquired) {
1046 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1047 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1048 }
1049 }
1050#endif
1051
1052#if USE_ITT_BUILD
1053 __kmp_itt_ordered_start(gtid);
1054#endif /* USE_ITT_BUILD */
1055}
1056
1064void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1065 int cid = 0;
1066 kmp_info_t *th;
1067
1068 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1069 __kmp_assert_valid_gtid(gtid);
1070
1071#if USE_ITT_BUILD
1072 __kmp_itt_ordered_end(gtid);
1073// TODO: ordered_wait_id
1074#endif /* USE_ITT_BUILD */
1075
1076 th = __kmp_threads[gtid];
1077
1078 if (th->th.th_dispatch->th_dxo_fcn != 0)
1079 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1080 else
1081 __kmp_parallel_dxo(&gtid, &cid, loc);
1082
1083#if OMPT_SUPPORT && OMPT_OPTIONAL
1084 OMPT_STORE_RETURN_ADDRESS(gtid);
1085 if (ompt_enabled.ompt_callback_mutex_released) {
1086 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1087 ompt_mutex_ordered,
1088 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1089 ->t.t_ordered.dt.t_value,
1090 OMPT_LOAD_RETURN_ADDRESS(gtid));
1091 }
1092#endif
1093}
1094
1095#if KMP_USE_DYNAMIC_LOCK
1096
1097static __forceinline void
1098__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1099 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1100 // Pointer to the allocated indirect lock is written to crit, while indexing
1101 // is ignored.
1102 void *idx;
1103 kmp_indirect_lock_t **lck;
1104 lck = (kmp_indirect_lock_t **)crit;
1105 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1106 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1107 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1108 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1109 KA_TRACE(20,
1110 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1111#if USE_ITT_BUILD
1112 __kmp_itt_critical_creating(ilk->lock, loc);
1113#endif
1114 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1115 if (status == 0) {
1116#if USE_ITT_BUILD
1117 __kmp_itt_critical_destroyed(ilk->lock);
1118#endif
1119 // We don't really need to destroy the unclaimed lock here since it will be
1120 // cleaned up at program exit.
1121 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1122 }
1123 KMP_DEBUG_ASSERT(*lck != NULL);
1124}
1125
1126// Fast-path acquire tas lock
1127#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1128 { \
1129 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1130 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1131 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1132 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1133 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1134 kmp_uint32 spins; \
1135 KMP_FSYNC_PREPARE(l); \
1136 KMP_INIT_YIELD(spins); \
1137 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1138 do { \
1139 if (TCR_4(__kmp_nth) > \
1140 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1141 KMP_YIELD(TRUE); \
1142 } else { \
1143 KMP_YIELD_SPIN(spins); \
1144 } \
1145 __kmp_spin_backoff(&backoff); \
1146 } while ( \
1147 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1148 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1149 } \
1150 KMP_FSYNC_ACQUIRED(l); \
1151 }
1152
1153// Fast-path test tas lock
1154#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1155 { \
1156 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1157 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1158 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1159 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1160 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1161 }
1162
1163// Fast-path release tas lock
1164#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1165 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1166
1167#if KMP_USE_FUTEX
1168
1169#include <sys/syscall.h>
1170#include <unistd.h>
1171#ifndef FUTEX_WAIT
1172#define FUTEX_WAIT 0
1173#endif
1174#ifndef FUTEX_WAKE
1175#define FUTEX_WAKE 1
1176#endif
1177
1178// Fast-path acquire futex lock
1179#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1180 { \
1181 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1182 kmp_int32 gtid_code = (gtid + 1) << 1; \
1183 KMP_MB(); \
1184 KMP_FSYNC_PREPARE(ftx); \
1185 kmp_int32 poll_val; \
1186 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1187 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1188 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1189 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1190 if (!cond) { \
1191 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1192 poll_val | \
1193 KMP_LOCK_BUSY(1, futex))) { \
1194 continue; \
1195 } \
1196 poll_val |= KMP_LOCK_BUSY(1, futex); \
1197 } \
1198 kmp_int32 rc; \
1199 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1200 NULL, NULL, 0)) != 0) { \
1201 continue; \
1202 } \
1203 gtid_code |= 1; \
1204 } \
1205 KMP_FSYNC_ACQUIRED(ftx); \
1206 }
1207
1208// Fast-path test futex lock
1209#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1210 { \
1211 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1212 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1213 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1214 KMP_FSYNC_ACQUIRED(ftx); \
1215 rc = TRUE; \
1216 } else { \
1217 rc = FALSE; \
1218 } \
1219 }
1220
1221// Fast-path release futex lock
1222#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1223 { \
1224 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1225 KMP_MB(); \
1226 KMP_FSYNC_RELEASING(ftx); \
1227 kmp_int32 poll_val = \
1228 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1229 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1230 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1231 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1232 } \
1233 KMP_MB(); \
1234 KMP_YIELD_OVERSUB(); \
1235 }
1236
1237#endif // KMP_USE_FUTEX
1238
1239#else // KMP_USE_DYNAMIC_LOCK
1240
1241static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1242 ident_t const *loc,
1243 kmp_int32 gtid) {
1244 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1245
1246 // Because of the double-check, the following load doesn't need to be volatile
1247 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1248
1249 if (lck == NULL) {
1250 void *idx;
1251
1252 // Allocate & initialize the lock.
1253 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1254 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1255 __kmp_init_user_lock_with_checks(lck);
1256 __kmp_set_user_lock_location(lck, loc);
1257#if USE_ITT_BUILD
1258 __kmp_itt_critical_creating(lck);
1259// __kmp_itt_critical_creating() should be called *before* the first usage
1260// of underlying lock. It is the only place where we can guarantee it. There
1261// are chances the lock will destroyed with no usage, but it is not a
1262// problem, because this is not real event seen by user but rather setting
1263// name for object (lock). See more details in kmp_itt.h.
1264#endif /* USE_ITT_BUILD */
1265
1266 // Use a cmpxchg instruction to slam the start of the critical section with
1267 // the lock pointer. If another thread beat us to it, deallocate the lock,
1268 // and use the lock that the other thread allocated.
1269 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1270
1271 if (status == 0) {
1272// Deallocate the lock and reload the value.
1273#if USE_ITT_BUILD
1274 __kmp_itt_critical_destroyed(lck);
1275// Let ITT know the lock is destroyed and the same memory location may be reused
1276// for another purpose.
1277#endif /* USE_ITT_BUILD */
1278 __kmp_destroy_user_lock_with_checks(lck);
1279 __kmp_user_lock_free(&idx, gtid, lck);
1280 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1281 KMP_DEBUG_ASSERT(lck != NULL);
1282 }
1283 }
1284 return lck;
1285}
1286
1287#endif // KMP_USE_DYNAMIC_LOCK
1288
1299void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1300 kmp_critical_name *crit) {
1301#if KMP_USE_DYNAMIC_LOCK
1302#if OMPT_SUPPORT && OMPT_OPTIONAL
1303 OMPT_STORE_RETURN_ADDRESS(global_tid);
1304#endif // OMPT_SUPPORT
1305 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1306#else
1307 KMP_COUNT_BLOCK(OMP_CRITICAL);
1308#if OMPT_SUPPORT && OMPT_OPTIONAL
1309 ompt_state_t prev_state = ompt_state_undefined;
1310 ompt_thread_info_t ti;
1311#endif
1312 kmp_user_lock_p lck;
1313
1314 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1315 __kmp_assert_valid_gtid(global_tid);
1316
1317 // TODO: add THR_OVHD_STATE
1318
1319 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1320 KMP_CHECK_USER_LOCK_INIT();
1321
1322 if ((__kmp_user_lock_kind == lk_tas) &&
1323 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1324 lck = (kmp_user_lock_p)crit;
1325 }
1326#if KMP_USE_FUTEX
1327 else if ((__kmp_user_lock_kind == lk_futex) &&
1328 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1329 lck = (kmp_user_lock_p)crit;
1330 }
1331#endif
1332 else { // ticket, queuing or drdpa
1333 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1334 }
1335
1336 if (__kmp_env_consistency_check)
1337 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1338
1339 // since the critical directive binds to all threads, not just the current
1340 // team we have to check this even if we are in a serialized team.
1341 // also, even if we are the uber thread, we still have to conduct the lock,
1342 // as we have to contend with sibling threads.
1343
1344#if USE_ITT_BUILD
1345 __kmp_itt_critical_acquiring(lck);
1346#endif /* USE_ITT_BUILD */
1347#if OMPT_SUPPORT && OMPT_OPTIONAL
1348 OMPT_STORE_RETURN_ADDRESS(gtid);
1349 void *codeptr_ra = NULL;
1350 if (ompt_enabled.enabled) {
1351 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1352 /* OMPT state update */
1353 prev_state = ti.state;
1354 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1355 ti.state = ompt_state_wait_critical;
1356
1357 /* OMPT event callback */
1358 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1359 if (ompt_enabled.ompt_callback_mutex_acquire) {
1360 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1361 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1362 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1363 }
1364 }
1365#endif
1366 // Value of 'crit' should be good for using as a critical_id of the critical
1367 // section directive.
1368 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1369
1370#if USE_ITT_BUILD
1371 __kmp_itt_critical_acquired(lck);
1372#endif /* USE_ITT_BUILD */
1373#if OMPT_SUPPORT && OMPT_OPTIONAL
1374 if (ompt_enabled.enabled) {
1375 /* OMPT state update */
1376 ti.state = prev_state;
1377 ti.wait_id = 0;
1378
1379 /* OMPT event callback */
1380 if (ompt_enabled.ompt_callback_mutex_acquired) {
1381 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1382 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1383 }
1384 }
1385#endif
1386 KMP_POP_PARTITIONED_TIMER();
1387
1388 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1389 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1390#endif // KMP_USE_DYNAMIC_LOCK
1391}
1392
1393#if KMP_USE_DYNAMIC_LOCK
1394
1395// Converts the given hint to an internal lock implementation
1396static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1397#if KMP_USE_TSX
1398#define KMP_TSX_LOCK(seq) lockseq_##seq
1399#else
1400#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1401#endif
1402
1403#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1404#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1405#else
1406#define KMP_CPUINFO_RTM 0
1407#endif
1408
1409 // Hints that do not require further logic
1410 if (hint & kmp_lock_hint_hle)
1411 return KMP_TSX_LOCK(hle);
1412 if (hint & kmp_lock_hint_rtm)
1413 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1414 if (hint & kmp_lock_hint_adaptive)
1415 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1416
1417 // Rule out conflicting hints first by returning the default lock
1418 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1419 return __kmp_user_lock_seq;
1420 if ((hint & omp_lock_hint_speculative) &&
1421 (hint & omp_lock_hint_nonspeculative))
1422 return __kmp_user_lock_seq;
1423
1424 // Do not even consider speculation when it appears to be contended
1425 if (hint & omp_lock_hint_contended)
1426 return lockseq_queuing;
1427
1428 // Uncontended lock without speculation
1429 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1430 return lockseq_tas;
1431
1432 // Use RTM lock for speculation
1433 if (hint & omp_lock_hint_speculative)
1434 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1435
1436 return __kmp_user_lock_seq;
1437}
1438
1439#if OMPT_SUPPORT && OMPT_OPTIONAL
1440#if KMP_USE_DYNAMIC_LOCK
1441static kmp_mutex_impl_t
1442__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1443 if (user_lock) {
1444 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1445 case 0:
1446 break;
1447#if KMP_USE_FUTEX
1448 case locktag_futex:
1449 return kmp_mutex_impl_queuing;
1450#endif
1451 case locktag_tas:
1452 return kmp_mutex_impl_spin;
1453#if KMP_USE_TSX
1454 case locktag_hle:
1455 case locktag_rtm_spin:
1456 return kmp_mutex_impl_speculative;
1457#endif
1458 default:
1459 return kmp_mutex_impl_none;
1460 }
1461 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1462 }
1463 KMP_ASSERT(ilock);
1464 switch (ilock->type) {
1465#if KMP_USE_TSX
1466 case locktag_adaptive:
1467 case locktag_rtm_queuing:
1468 return kmp_mutex_impl_speculative;
1469#endif
1470 case locktag_nested_tas:
1471 return kmp_mutex_impl_spin;
1472#if KMP_USE_FUTEX
1473 case locktag_nested_futex:
1474#endif
1475 case locktag_ticket:
1476 case locktag_queuing:
1477 case locktag_drdpa:
1478 case locktag_nested_ticket:
1479 case locktag_nested_queuing:
1480 case locktag_nested_drdpa:
1481 return kmp_mutex_impl_queuing;
1482 default:
1483 return kmp_mutex_impl_none;
1484 }
1485}
1486#else
1487// For locks without dynamic binding
1488static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1489 switch (__kmp_user_lock_kind) {
1490 case lk_tas:
1491 return kmp_mutex_impl_spin;
1492#if KMP_USE_FUTEX
1493 case lk_futex:
1494#endif
1495 case lk_ticket:
1496 case lk_queuing:
1497 case lk_drdpa:
1498 return kmp_mutex_impl_queuing;
1499#if KMP_USE_TSX
1500 case lk_hle:
1501 case lk_rtm_queuing:
1502 case lk_rtm_spin:
1503 case lk_adaptive:
1504 return kmp_mutex_impl_speculative;
1505#endif
1506 default:
1507 return kmp_mutex_impl_none;
1508 }
1509}
1510#endif // KMP_USE_DYNAMIC_LOCK
1511#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1512
1526void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1527 kmp_critical_name *crit, uint32_t hint) {
1528 KMP_COUNT_BLOCK(OMP_CRITICAL);
1529 kmp_user_lock_p lck;
1530#if OMPT_SUPPORT && OMPT_OPTIONAL
1531 ompt_state_t prev_state = ompt_state_undefined;
1532 ompt_thread_info_t ti;
1533 // This is the case, if called from __kmpc_critical:
1534 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1535 if (!codeptr)
1536 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1537#endif
1538
1539 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1540 __kmp_assert_valid_gtid(global_tid);
1541
1542 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1543 // Check if it is initialized.
1544 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1545 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1546 if (*lk == 0) {
1547 if (KMP_IS_D_LOCK(lockseq)) {
1548 KMP_COMPARE_AND_STORE_ACQ32(
1549 (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0,
1550 KMP_GET_D_TAG(lockseq));
1551 } else {
1552 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1553 }
1554 }
1555 // Branch for accessing the actual lock object and set operation. This
1556 // branching is inevitable since this lock initialization does not follow the
1557 // normal dispatch path (lock table is not used).
1558 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1559 lck = (kmp_user_lock_p)lk;
1560 if (__kmp_env_consistency_check) {
1561 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1562 __kmp_map_hint_to_lock(hint));
1563 }
1564#if USE_ITT_BUILD
1565 __kmp_itt_critical_acquiring(lck);
1566#endif
1567#if OMPT_SUPPORT && OMPT_OPTIONAL
1568 if (ompt_enabled.enabled) {
1569 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1570 /* OMPT state update */
1571 prev_state = ti.state;
1572 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1573 ti.state = ompt_state_wait_critical;
1574
1575 /* OMPT event callback */
1576 if (ompt_enabled.ompt_callback_mutex_acquire) {
1577 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1578 ompt_mutex_critical, (unsigned int)hint,
1579 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1580 codeptr);
1581 }
1582 }
1583#endif
1584#if KMP_USE_INLINED_TAS
1585 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1586 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1587 } else
1588#elif KMP_USE_INLINED_FUTEX
1589 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1590 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1591 } else
1592#endif
1593 {
1594 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1595 }
1596 } else {
1597 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1598 lck = ilk->lock;
1599 if (__kmp_env_consistency_check) {
1600 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1601 __kmp_map_hint_to_lock(hint));
1602 }
1603#if USE_ITT_BUILD
1604 __kmp_itt_critical_acquiring(lck);
1605#endif
1606#if OMPT_SUPPORT && OMPT_OPTIONAL
1607 if (ompt_enabled.enabled) {
1608 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1609 /* OMPT state update */
1610 prev_state = ti.state;
1611 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1612 ti.state = ompt_state_wait_critical;
1613
1614 /* OMPT event callback */
1615 if (ompt_enabled.ompt_callback_mutex_acquire) {
1616 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1617 ompt_mutex_critical, (unsigned int)hint,
1618 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1619 codeptr);
1620 }
1621 }
1622#endif
1623 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1624 }
1625 KMP_POP_PARTITIONED_TIMER();
1626
1627#if USE_ITT_BUILD
1628 __kmp_itt_critical_acquired(lck);
1629#endif /* USE_ITT_BUILD */
1630#if OMPT_SUPPORT && OMPT_OPTIONAL
1631 if (ompt_enabled.enabled) {
1632 /* OMPT state update */
1633 ti.state = prev_state;
1634 ti.wait_id = 0;
1635
1636 /* OMPT event callback */
1637 if (ompt_enabled.ompt_callback_mutex_acquired) {
1638 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1639 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1640 }
1641 }
1642#endif
1643
1644 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1645 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1646} // __kmpc_critical_with_hint
1647
1648#endif // KMP_USE_DYNAMIC_LOCK
1649
1659void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1660 kmp_critical_name *crit) {
1661 kmp_user_lock_p lck;
1662
1663 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1664
1665#if KMP_USE_DYNAMIC_LOCK
1666 int locktag = KMP_EXTRACT_D_TAG(crit);
1667 if (locktag) {
1668 lck = (kmp_user_lock_p)crit;
1669 KMP_ASSERT(lck != NULL);
1670 if (__kmp_env_consistency_check) {
1671 __kmp_pop_sync(global_tid, ct_critical, loc);
1672 }
1673#if USE_ITT_BUILD
1674 __kmp_itt_critical_releasing(lck);
1675#endif
1676#if KMP_USE_INLINED_TAS
1677 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1678 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1679 } else
1680#elif KMP_USE_INLINED_FUTEX
1681 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1682 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1683 } else
1684#endif
1685 {
1686 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1687 }
1688 } else {
1689 kmp_indirect_lock_t *ilk =
1690 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1691 KMP_ASSERT(ilk != NULL);
1692 lck = ilk->lock;
1693 if (__kmp_env_consistency_check) {
1694 __kmp_pop_sync(global_tid, ct_critical, loc);
1695 }
1696#if USE_ITT_BUILD
1697 __kmp_itt_critical_releasing(lck);
1698#endif
1699 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1700 }
1701
1702#else // KMP_USE_DYNAMIC_LOCK
1703
1704 if ((__kmp_user_lock_kind == lk_tas) &&
1705 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1706 lck = (kmp_user_lock_p)crit;
1707 }
1708#if KMP_USE_FUTEX
1709 else if ((__kmp_user_lock_kind == lk_futex) &&
1710 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1711 lck = (kmp_user_lock_p)crit;
1712 }
1713#endif
1714 else { // ticket, queuing or drdpa
1715 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1716 }
1717
1718 KMP_ASSERT(lck != NULL);
1719
1720 if (__kmp_env_consistency_check)
1721 __kmp_pop_sync(global_tid, ct_critical, loc);
1722
1723#if USE_ITT_BUILD
1724 __kmp_itt_critical_releasing(lck);
1725#endif /* USE_ITT_BUILD */
1726 // Value of 'crit' should be good for using as a critical_id of the critical
1727 // section directive.
1728 __kmp_release_user_lock_with_checks(lck, global_tid);
1729
1730#endif // KMP_USE_DYNAMIC_LOCK
1731
1732#if OMPT_SUPPORT && OMPT_OPTIONAL
1733 /* OMPT release event triggers after lock is released; place here to trigger
1734 * for all #if branches */
1735 OMPT_STORE_RETURN_ADDRESS(global_tid);
1736 if (ompt_enabled.ompt_callback_mutex_released) {
1737 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1738 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1739 OMPT_LOAD_RETURN_ADDRESS(0));
1740 }
1741#endif
1742
1743 KMP_POP_PARTITIONED_TIMER();
1744 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1745}
1746
1756kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1757 int status;
1758 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1759 __kmp_assert_valid_gtid(global_tid);
1760
1761 if (!TCR_4(__kmp_init_parallel))
1762 __kmp_parallel_initialize();
1763
1764 __kmp_resume_if_soft_paused();
1765
1766 if (__kmp_env_consistency_check)
1767 __kmp_check_barrier(global_tid, ct_barrier, loc);
1768
1769#if OMPT_SUPPORT
1770 ompt_frame_t *ompt_frame;
1771 if (ompt_enabled.enabled) {
1772 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1773 if (ompt_frame->enter_frame.ptr == NULL)
1774 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1775 }
1776 OMPT_STORE_RETURN_ADDRESS(global_tid);
1777#endif
1778#if USE_ITT_NOTIFY
1779 __kmp_threads[global_tid]->th.th_ident = loc;
1780#endif
1781 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1782#if OMPT_SUPPORT && OMPT_OPTIONAL
1783 if (ompt_enabled.enabled) {
1784 ompt_frame->enter_frame = ompt_data_none;
1785 }
1786#endif
1787
1788 return (status != 0) ? 0 : 1;
1789}
1790
1800void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1801 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1802 __kmp_assert_valid_gtid(global_tid);
1803 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1804}
1805
1816kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1817 kmp_int32 ret;
1818 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1819 __kmp_assert_valid_gtid(global_tid);
1820
1821 if (!TCR_4(__kmp_init_parallel))
1822 __kmp_parallel_initialize();
1823
1824 __kmp_resume_if_soft_paused();
1825
1826 if (__kmp_env_consistency_check) {
1827 if (loc == 0) {
1828 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1829 }
1830 __kmp_check_barrier(global_tid, ct_barrier, loc);
1831 }
1832
1833#if OMPT_SUPPORT
1834 ompt_frame_t *ompt_frame;
1835 if (ompt_enabled.enabled) {
1836 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1837 if (ompt_frame->enter_frame.ptr == NULL)
1838 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1839 }
1840 OMPT_STORE_RETURN_ADDRESS(global_tid);
1841#endif
1842#if USE_ITT_NOTIFY
1843 __kmp_threads[global_tid]->th.th_ident = loc;
1844#endif
1845 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1846#if OMPT_SUPPORT && OMPT_OPTIONAL
1847 if (ompt_enabled.enabled) {
1848 ompt_frame->enter_frame = ompt_data_none;
1849 }
1850#endif
1851
1852 ret = __kmpc_master(loc, global_tid);
1853
1854 if (__kmp_env_consistency_check) {
1855 /* there's no __kmpc_end_master called; so the (stats) */
1856 /* actions of __kmpc_end_master are done here */
1857 if (ret) {
1858 /* only one thread should do the pop since only */
1859 /* one did the push (see __kmpc_master()) */
1860 __kmp_pop_sync(global_tid, ct_master, loc);
1861 }
1862 }
1863
1864 return (ret);
1865}
1866
1867/* The BARRIER for a SINGLE process section is always explicit */
1879kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1880 __kmp_assert_valid_gtid(global_tid);
1881 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1882
1883 if (rc) {
1884 // We are going to execute the single statement, so we should count it.
1885 KMP_COUNT_BLOCK(OMP_SINGLE);
1886 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1887 }
1888
1889#if OMPT_SUPPORT && OMPT_OPTIONAL
1890 kmp_info_t *this_thr = __kmp_threads[global_tid];
1891 kmp_team_t *team = this_thr->th.th_team;
1892 int tid = __kmp_tid_from_gtid(global_tid);
1893
1894 if (ompt_enabled.enabled) {
1895 if (rc) {
1896 if (ompt_enabled.ompt_callback_work) {
1897 ompt_callbacks.ompt_callback(ompt_callback_work)(
1898 ompt_work_single_executor, ompt_scope_begin,
1899 &(team->t.ompt_team_info.parallel_data),
1900 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1901 1, OMPT_GET_RETURN_ADDRESS(0));
1902 }
1903 } else {
1904 if (ompt_enabled.ompt_callback_work) {
1905 ompt_callbacks.ompt_callback(ompt_callback_work)(
1906 ompt_work_single_other, ompt_scope_begin,
1907 &(team->t.ompt_team_info.parallel_data),
1908 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1909 1, OMPT_GET_RETURN_ADDRESS(0));
1910 ompt_callbacks.ompt_callback(ompt_callback_work)(
1911 ompt_work_single_other, ompt_scope_end,
1912 &(team->t.ompt_team_info.parallel_data),
1913 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1914 1, OMPT_GET_RETURN_ADDRESS(0));
1915 }
1916 }
1917 }
1918#endif
1919
1920 return rc;
1921}
1922
1932void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1933 __kmp_assert_valid_gtid(global_tid);
1934 __kmp_exit_single(global_tid);
1935 KMP_POP_PARTITIONED_TIMER();
1936
1937#if OMPT_SUPPORT && OMPT_OPTIONAL
1938 kmp_info_t *this_thr = __kmp_threads[global_tid];
1939 kmp_team_t *team = this_thr->th.th_team;
1940 int tid = __kmp_tid_from_gtid(global_tid);
1941
1942 if (ompt_enabled.ompt_callback_work) {
1943 ompt_callbacks.ompt_callback(ompt_callback_work)(
1944 ompt_work_single_executor, ompt_scope_end,
1945 &(team->t.ompt_team_info.parallel_data),
1946 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1947 OMPT_GET_RETURN_ADDRESS(0));
1948 }
1949#endif
1950}
1951
1959void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1960 KMP_POP_PARTITIONED_TIMER();
1961 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1962
1963#if OMPT_SUPPORT && OMPT_OPTIONAL
1964 if (ompt_enabled.ompt_callback_work) {
1965 ompt_work_t ompt_work_type = ompt_work_loop;
1966 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1967 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1968 // Determine workshare type
1969 if (loc != NULL) {
1970 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1971 ompt_work_type = ompt_work_loop;
1972 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1973 ompt_work_type = ompt_work_sections;
1974 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1975 ompt_work_type = ompt_work_distribute;
1976 } else {
1977 // use default set above.
1978 // a warning about this case is provided in __kmpc_for_static_init
1979 }
1980 KMP_DEBUG_ASSERT(ompt_work_type);
1981 }
1982 ompt_callbacks.ompt_callback(ompt_callback_work)(
1983 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1984 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1985 }
1986#endif
1987 if (__kmp_env_consistency_check)
1988 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1989}
1990
1991// User routines which take C-style arguments (call by value)
1992// different from the Fortran equivalent routines
1993
1994void ompc_set_num_threads(int arg) {
1995 // !!!!! TODO: check the per-task binding
1996 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1997}
1998
1999void ompc_set_dynamic(int flag) {
2000 kmp_info_t *thread;
2001
2002 /* For the thread-private implementation of the internal controls */
2003 thread = __kmp_entry_thread();
2004
2005 __kmp_save_internal_controls(thread);
2006
2007 set__dynamic(thread, flag ? true : false);
2008}
2009
2010void ompc_set_nested(int flag) {
2011 kmp_info_t *thread;
2012
2013 /* For the thread-private internal controls implementation */
2014 thread = __kmp_entry_thread();
2015
2016 __kmp_save_internal_controls(thread);
2017
2018 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
2019}
2020
2021void ompc_set_max_active_levels(int max_active_levels) {
2022 /* TO DO */
2023 /* we want per-task implementation of this internal control */
2024
2025 /* For the per-thread internal controls implementation */
2026 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
2027}
2028
2029void ompc_set_schedule(omp_sched_t kind, int modifier) {
2030 // !!!!! TODO: check the per-task binding
2031 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
2032}
2033
2034int ompc_get_ancestor_thread_num(int level) {
2035 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
2036}
2037
2038int ompc_get_team_size(int level) {
2039 return __kmp_get_team_size(__kmp_entry_gtid(), level);
2040}
2041
2042/* OpenMP 5.0 Affinity Format API */
2043void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2044 if (!__kmp_init_serial) {
2045 __kmp_serial_initialize();
2046 }
2047 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2048 format, KMP_STRLEN(format) + 1);
2049}
2050
2051size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2052 size_t format_size;
2053 if (!__kmp_init_serial) {
2054 __kmp_serial_initialize();
2055 }
2056 format_size = KMP_STRLEN(__kmp_affinity_format);
2057 if (buffer && size) {
2058 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2059 format_size + 1);
2060 }
2061 return format_size;
2062}
2063
2064void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2065 int gtid;
2066 if (!TCR_4(__kmp_init_middle)) {
2067 __kmp_middle_initialize();
2068 }
2069 __kmp_assign_root_init_mask();
2070 gtid = __kmp_get_gtid();
2071#if KMP_AFFINITY_SUPPORTED
2072 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2073 __kmp_affinity.flags.reset) {
2074 __kmp_reset_root_init_mask(gtid);
2075 }
2076#endif
2077 __kmp_aux_display_affinity(gtid, format);
2078}
2079
2080size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2081 char const *format) {
2082 int gtid;
2083 size_t num_required;
2084 kmp_str_buf_t capture_buf;
2085 if (!TCR_4(__kmp_init_middle)) {
2086 __kmp_middle_initialize();
2087 }
2088 __kmp_assign_root_init_mask();
2089 gtid = __kmp_get_gtid();
2090#if KMP_AFFINITY_SUPPORTED
2091 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2092 __kmp_affinity.flags.reset) {
2093 __kmp_reset_root_init_mask(gtid);
2094 }
2095#endif
2096 __kmp_str_buf_init(&capture_buf);
2097 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2098 if (buffer && buf_size) {
2099 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2100 capture_buf.used + 1);
2101 }
2102 __kmp_str_buf_free(&capture_buf);
2103 return num_required;
2104}
2105
2106void kmpc_set_stacksize(int arg) {
2107 // __kmp_aux_set_stacksize initializes the library if needed
2108 __kmp_aux_set_stacksize(arg);
2109}
2110
2111void kmpc_set_stacksize_s(size_t arg) {
2112 // __kmp_aux_set_stacksize initializes the library if needed
2113 __kmp_aux_set_stacksize(arg);
2114}
2115
2116void kmpc_set_blocktime(int arg) {
2117 int gtid, tid, bt = arg;
2118 kmp_info_t *thread;
2119
2120 gtid = __kmp_entry_gtid();
2121 tid = __kmp_tid_from_gtid(gtid);
2122 thread = __kmp_thread_from_gtid(gtid);
2123
2124 __kmp_aux_convert_blocktime(&bt);
2125 __kmp_aux_set_blocktime(bt, thread, tid);
2126}
2127
2128void kmpc_set_library(int arg) {
2129 // __kmp_user_set_library initializes the library if needed
2130 __kmp_user_set_library((enum library_type)arg);
2131}
2132
2133void kmpc_set_defaults(char const *str) {
2134 // __kmp_aux_set_defaults initializes the library if needed
2135 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2136}
2137
2138void kmpc_set_disp_num_buffers(int arg) {
2139 // ignore after initialization because some teams have already
2140 // allocated dispatch buffers
2141 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2142 arg <= KMP_MAX_DISP_NUM_BUFF) {
2143 __kmp_dispatch_num_buffers = arg;
2144 }
2145}
2146
2147int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2148#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2149 return -1;
2150#else
2151 if (!TCR_4(__kmp_init_middle)) {
2152 __kmp_middle_initialize();
2153 }
2154 __kmp_assign_root_init_mask();
2155 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2156#endif
2157}
2158
2159int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2160#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2161 return -1;
2162#else
2163 if (!TCR_4(__kmp_init_middle)) {
2164 __kmp_middle_initialize();
2165 }
2166 __kmp_assign_root_init_mask();
2167 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2168#endif
2169}
2170
2171int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2172#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2173 return -1;
2174#else
2175 if (!TCR_4(__kmp_init_middle)) {
2176 __kmp_middle_initialize();
2177 }
2178 __kmp_assign_root_init_mask();
2179 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2180#endif
2181}
2182
2183/* -------------------------------------------------------------------------- */
2228void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2229 void *cpy_data, void (*cpy_func)(void *, void *),
2230 kmp_int32 didit) {
2231 void **data_ptr;
2232 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2233 __kmp_assert_valid_gtid(gtid);
2234
2235 KMP_MB();
2236
2237 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2238
2239 if (__kmp_env_consistency_check) {
2240 if (loc == 0) {
2241 KMP_WARNING(ConstructIdentInvalid);
2242 }
2243 }
2244
2245 // ToDo: Optimize the following two barriers into some kind of split barrier
2246
2247 if (didit)
2248 *data_ptr = cpy_data;
2249
2250#if OMPT_SUPPORT
2251 ompt_frame_t *ompt_frame;
2252 if (ompt_enabled.enabled) {
2253 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2254 if (ompt_frame->enter_frame.ptr == NULL)
2255 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2256 }
2257 OMPT_STORE_RETURN_ADDRESS(gtid);
2258#endif
2259/* This barrier is not a barrier region boundary */
2260#if USE_ITT_NOTIFY
2261 __kmp_threads[gtid]->th.th_ident = loc;
2262#endif
2263 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2264
2265 if (!didit)
2266 (*cpy_func)(cpy_data, *data_ptr);
2267
2268 // Consider next barrier a user-visible barrier for barrier region boundaries
2269 // Nesting checks are already handled by the single construct checks
2270 {
2271#if OMPT_SUPPORT
2272 OMPT_STORE_RETURN_ADDRESS(gtid);
2273#endif
2274#if USE_ITT_NOTIFY
2275 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2276// tasks can overwrite the location)
2277#endif
2278 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2279#if OMPT_SUPPORT && OMPT_OPTIONAL
2280 if (ompt_enabled.enabled) {
2281 ompt_frame->enter_frame = ompt_data_none;
2282 }
2283#endif
2284 }
2285}
2286
2287/* --------------------------------------------------------------------------*/
2304void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2305 void **data_ptr;
2306
2307 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2308
2309 KMP_MB();
2310
2311 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2312
2313 if (__kmp_env_consistency_check) {
2314 if (loc == 0) {
2315 KMP_WARNING(ConstructIdentInvalid);
2316 }
2317 }
2318
2319 // ToDo: Optimize the following barrier
2320
2321 if (cpy_data)
2322 *data_ptr = cpy_data;
2323
2324#if OMPT_SUPPORT
2325 ompt_frame_t *ompt_frame;
2326 if (ompt_enabled.enabled) {
2327 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2328 if (ompt_frame->enter_frame.ptr == NULL)
2329 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2330 OMPT_STORE_RETURN_ADDRESS(gtid);
2331 }
2332#endif
2333/* This barrier is not a barrier region boundary */
2334#if USE_ITT_NOTIFY
2335 __kmp_threads[gtid]->th.th_ident = loc;
2336#endif
2337 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2338
2339 return *data_ptr;
2340}
2341
2342/* -------------------------------------------------------------------------- */
2343
2344#define INIT_LOCK __kmp_init_user_lock_with_checks
2345#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2346#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2347#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2348#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2349#define ACQUIRE_NESTED_LOCK_TIMED \
2350 __kmp_acquire_nested_user_lock_with_checks_timed
2351#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2352#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2353#define TEST_LOCK __kmp_test_user_lock_with_checks
2354#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2355#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2356#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2357
2358// TODO: Make check abort messages use location info & pass it into
2359// with_checks routines
2360
2361#if KMP_USE_DYNAMIC_LOCK
2362
2363// internal lock initializer
2364static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2365 kmp_dyna_lockseq_t seq) {
2366 if (KMP_IS_D_LOCK(seq)) {
2367 KMP_INIT_D_LOCK(lock, seq);
2368#if USE_ITT_BUILD
2369 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2370#endif
2371 } else {
2372 KMP_INIT_I_LOCK(lock, seq);
2373#if USE_ITT_BUILD
2374 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2375 __kmp_itt_lock_creating(ilk->lock, loc);
2376#endif
2377 }
2378}
2379
2380// internal nest lock initializer
2381static __forceinline void
2382__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2383 kmp_dyna_lockseq_t seq) {
2384#if KMP_USE_TSX
2385 // Don't have nested lock implementation for speculative locks
2386 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2387 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2388 seq = __kmp_user_lock_seq;
2389#endif
2390 switch (seq) {
2391 case lockseq_tas:
2392 seq = lockseq_nested_tas;
2393 break;
2394#if KMP_USE_FUTEX
2395 case lockseq_futex:
2396 seq = lockseq_nested_futex;
2397 break;
2398#endif
2399 case lockseq_ticket:
2400 seq = lockseq_nested_ticket;
2401 break;
2402 case lockseq_queuing:
2403 seq = lockseq_nested_queuing;
2404 break;
2405 case lockseq_drdpa:
2406 seq = lockseq_nested_drdpa;
2407 break;
2408 default:
2409 seq = lockseq_nested_queuing;
2410 }
2411 KMP_INIT_I_LOCK(lock, seq);
2412#if USE_ITT_BUILD
2413 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2414 __kmp_itt_lock_creating(ilk->lock, loc);
2415#endif
2416}
2417
2418/* initialize the lock with a hint */
2419void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2420 uintptr_t hint) {
2421 KMP_DEBUG_ASSERT(__kmp_init_serial);
2422 if (__kmp_env_consistency_check && user_lock == NULL) {
2423 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2424 }
2425
2426 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2427
2428#if OMPT_SUPPORT && OMPT_OPTIONAL
2429 // This is the case, if called from omp_init_lock_with_hint:
2430 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2431 if (!codeptr)
2432 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2433 if (ompt_enabled.ompt_callback_lock_init) {
2434 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2435 ompt_mutex_lock, (omp_lock_hint_t)hint,
2436 __ompt_get_mutex_impl_type(user_lock),
2437 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2438 }
2439#endif
2440}
2441
2442/* initialize the lock with a hint */
2443void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2444 void **user_lock, uintptr_t hint) {
2445 KMP_DEBUG_ASSERT(__kmp_init_serial);
2446 if (__kmp_env_consistency_check && user_lock == NULL) {
2447 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2448 }
2449
2450 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2451
2452#if OMPT_SUPPORT && OMPT_OPTIONAL
2453 // This is the case, if called from omp_init_lock_with_hint:
2454 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2455 if (!codeptr)
2456 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2457 if (ompt_enabled.ompt_callback_lock_init) {
2458 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2459 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2460 __ompt_get_mutex_impl_type(user_lock),
2461 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2462 }
2463#endif
2464}
2465
2466#endif // KMP_USE_DYNAMIC_LOCK
2467
2468/* initialize the lock */
2469void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2470#if KMP_USE_DYNAMIC_LOCK
2471
2472 KMP_DEBUG_ASSERT(__kmp_init_serial);
2473 if (__kmp_env_consistency_check && user_lock == NULL) {
2474 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2475 }
2476 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2477
2478#if OMPT_SUPPORT && OMPT_OPTIONAL
2479 // This is the case, if called from omp_init_lock_with_hint:
2480 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2481 if (!codeptr)
2482 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2483 if (ompt_enabled.ompt_callback_lock_init) {
2484 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2485 ompt_mutex_lock, omp_lock_hint_none,
2486 __ompt_get_mutex_impl_type(user_lock),
2487 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2488 }
2489#endif
2490
2491#else // KMP_USE_DYNAMIC_LOCK
2492
2493 static char const *const func = "omp_init_lock";
2494 kmp_user_lock_p lck;
2495 KMP_DEBUG_ASSERT(__kmp_init_serial);
2496
2497 if (__kmp_env_consistency_check) {
2498 if (user_lock == NULL) {
2499 KMP_FATAL(LockIsUninitialized, func);
2500 }
2501 }
2502
2503 KMP_CHECK_USER_LOCK_INIT();
2504
2505 if ((__kmp_user_lock_kind == lk_tas) &&
2506 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2507 lck = (kmp_user_lock_p)user_lock;
2508 }
2509#if KMP_USE_FUTEX
2510 else if ((__kmp_user_lock_kind == lk_futex) &&
2511 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2512 lck = (kmp_user_lock_p)user_lock;
2513 }
2514#endif
2515 else {
2516 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2517 }
2518 INIT_LOCK(lck);
2519 __kmp_set_user_lock_location(lck, loc);
2520
2521#if OMPT_SUPPORT && OMPT_OPTIONAL
2522 // This is the case, if called from omp_init_lock_with_hint:
2523 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2524 if (!codeptr)
2525 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2526 if (ompt_enabled.ompt_callback_lock_init) {
2527 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2528 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2529 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2530 }
2531#endif
2532
2533#if USE_ITT_BUILD
2534 __kmp_itt_lock_creating(lck);
2535#endif /* USE_ITT_BUILD */
2536
2537#endif // KMP_USE_DYNAMIC_LOCK
2538} // __kmpc_init_lock
2539
2540/* initialize the lock */
2541void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2542#if KMP_USE_DYNAMIC_LOCK
2543
2544 KMP_DEBUG_ASSERT(__kmp_init_serial);
2545 if (__kmp_env_consistency_check && user_lock == NULL) {
2546 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2547 }
2548 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2549
2550#if OMPT_SUPPORT && OMPT_OPTIONAL
2551 // This is the case, if called from omp_init_lock_with_hint:
2552 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2553 if (!codeptr)
2554 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2555 if (ompt_enabled.ompt_callback_lock_init) {
2556 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2557 ompt_mutex_nest_lock, omp_lock_hint_none,
2558 __ompt_get_mutex_impl_type(user_lock),
2559 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2560 }
2561#endif
2562
2563#else // KMP_USE_DYNAMIC_LOCK
2564
2565 static char const *const func = "omp_init_nest_lock";
2566 kmp_user_lock_p lck;
2567 KMP_DEBUG_ASSERT(__kmp_init_serial);
2568
2569 if (__kmp_env_consistency_check) {
2570 if (user_lock == NULL) {
2571 KMP_FATAL(LockIsUninitialized, func);
2572 }
2573 }
2574
2575 KMP_CHECK_USER_LOCK_INIT();
2576
2577 if ((__kmp_user_lock_kind == lk_tas) &&
2578 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2579 OMP_NEST_LOCK_T_SIZE)) {
2580 lck = (kmp_user_lock_p)user_lock;
2581 }
2582#if KMP_USE_FUTEX
2583 else if ((__kmp_user_lock_kind == lk_futex) &&
2584 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2585 OMP_NEST_LOCK_T_SIZE)) {
2586 lck = (kmp_user_lock_p)user_lock;
2587 }
2588#endif
2589 else {
2590 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2591 }
2592
2593 INIT_NESTED_LOCK(lck);
2594 __kmp_set_user_lock_location(lck, loc);
2595
2596#if OMPT_SUPPORT && OMPT_OPTIONAL
2597 // This is the case, if called from omp_init_lock_with_hint:
2598 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2599 if (!codeptr)
2600 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2601 if (ompt_enabled.ompt_callback_lock_init) {
2602 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2603 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2604 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2605 }
2606#endif
2607
2608#if USE_ITT_BUILD
2609 __kmp_itt_lock_creating(lck);
2610#endif /* USE_ITT_BUILD */
2611
2612#endif // KMP_USE_DYNAMIC_LOCK
2613} // __kmpc_init_nest_lock
2614
2615void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2616#if KMP_USE_DYNAMIC_LOCK
2617
2618#if USE_ITT_BUILD
2619 kmp_user_lock_p lck;
2620 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2621 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2622 } else {
2623 lck = (kmp_user_lock_p)user_lock;
2624 }
2625 __kmp_itt_lock_destroyed(lck);
2626#endif
2627#if OMPT_SUPPORT && OMPT_OPTIONAL
2628 // This is the case, if called from omp_init_lock_with_hint:
2629 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2630 if (!codeptr)
2631 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2632 if (ompt_enabled.ompt_callback_lock_destroy) {
2633 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2634 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2635 }
2636#endif
2637 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2638#else
2639 kmp_user_lock_p lck;
2640
2641 if ((__kmp_user_lock_kind == lk_tas) &&
2642 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2643 lck = (kmp_user_lock_p)user_lock;
2644 }
2645#if KMP_USE_FUTEX
2646 else if ((__kmp_user_lock_kind == lk_futex) &&
2647 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2648 lck = (kmp_user_lock_p)user_lock;
2649 }
2650#endif
2651 else {
2652 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2653 }
2654
2655#if OMPT_SUPPORT && OMPT_OPTIONAL
2656 // This is the case, if called from omp_init_lock_with_hint:
2657 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2658 if (!codeptr)
2659 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2660 if (ompt_enabled.ompt_callback_lock_destroy) {
2661 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2662 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2663 }
2664#endif
2665
2666#if USE_ITT_BUILD
2667 __kmp_itt_lock_destroyed(lck);
2668#endif /* USE_ITT_BUILD */
2669 DESTROY_LOCK(lck);
2670
2671 if ((__kmp_user_lock_kind == lk_tas) &&
2672 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2673 ;
2674 }
2675#if KMP_USE_FUTEX
2676 else if ((__kmp_user_lock_kind == lk_futex) &&
2677 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2678 ;
2679 }
2680#endif
2681 else {
2682 __kmp_user_lock_free(user_lock, gtid, lck);
2683 }
2684#endif // KMP_USE_DYNAMIC_LOCK
2685} // __kmpc_destroy_lock
2686
2687/* destroy the lock */
2688void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2689#if KMP_USE_DYNAMIC_LOCK
2690
2691#if USE_ITT_BUILD
2692 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2693 __kmp_itt_lock_destroyed(ilk->lock);
2694#endif
2695#if OMPT_SUPPORT && OMPT_OPTIONAL
2696 // This is the case, if called from omp_init_lock_with_hint:
2697 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2698 if (!codeptr)
2699 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2700 if (ompt_enabled.ompt_callback_lock_destroy) {
2701 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2702 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2703 }
2704#endif
2705 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2706
2707#else // KMP_USE_DYNAMIC_LOCK
2708
2709 kmp_user_lock_p lck;
2710
2711 if ((__kmp_user_lock_kind == lk_tas) &&
2712 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2713 OMP_NEST_LOCK_T_SIZE)) {
2714 lck = (kmp_user_lock_p)user_lock;
2715 }
2716#if KMP_USE_FUTEX
2717 else if ((__kmp_user_lock_kind == lk_futex) &&
2718 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2719 OMP_NEST_LOCK_T_SIZE)) {
2720 lck = (kmp_user_lock_p)user_lock;
2721 }
2722#endif
2723 else {
2724 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2725 }
2726
2727#if OMPT_SUPPORT && OMPT_OPTIONAL
2728 // This is the case, if called from omp_init_lock_with_hint:
2729 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2730 if (!codeptr)
2731 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2732 if (ompt_enabled.ompt_callback_lock_destroy) {
2733 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2734 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2735 }
2736#endif
2737
2738#if USE_ITT_BUILD
2739 __kmp_itt_lock_destroyed(lck);
2740#endif /* USE_ITT_BUILD */
2741
2742 DESTROY_NESTED_LOCK(lck);
2743
2744 if ((__kmp_user_lock_kind == lk_tas) &&
2745 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2746 OMP_NEST_LOCK_T_SIZE)) {
2747 ;
2748 }
2749#if KMP_USE_FUTEX
2750 else if ((__kmp_user_lock_kind == lk_futex) &&
2751 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2752 OMP_NEST_LOCK_T_SIZE)) {
2753 ;
2754 }
2755#endif
2756 else {
2757 __kmp_user_lock_free(user_lock, gtid, lck);
2758 }
2759#endif // KMP_USE_DYNAMIC_LOCK
2760} // __kmpc_destroy_nest_lock
2761
2762void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2763 KMP_COUNT_BLOCK(OMP_set_lock);
2764#if KMP_USE_DYNAMIC_LOCK
2765 int tag = KMP_EXTRACT_D_TAG(user_lock);
2766#if USE_ITT_BUILD
2767 __kmp_itt_lock_acquiring(
2768 (kmp_user_lock_p)
2769 user_lock); // itt function will get to the right lock object.
2770#endif
2771#if OMPT_SUPPORT && OMPT_OPTIONAL
2772 // This is the case, if called from omp_init_lock_with_hint:
2773 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2774 if (!codeptr)
2775 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2776 if (ompt_enabled.ompt_callback_mutex_acquire) {
2777 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2778 ompt_mutex_lock, omp_lock_hint_none,
2779 __ompt_get_mutex_impl_type(user_lock),
2780 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2781 }
2782#endif
2783#if KMP_USE_INLINED_TAS
2784 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2785 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2786 } else
2787#elif KMP_USE_INLINED_FUTEX
2788 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2789 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2790 } else
2791#endif
2792 {
2793 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2794 }
2795#if USE_ITT_BUILD
2796 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2797#endif
2798#if OMPT_SUPPORT && OMPT_OPTIONAL
2799 if (ompt_enabled.ompt_callback_mutex_acquired) {
2800 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2801 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2802 }
2803#endif
2804
2805#else // KMP_USE_DYNAMIC_LOCK
2806
2807 kmp_user_lock_p lck;
2808
2809 if ((__kmp_user_lock_kind == lk_tas) &&
2810 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2811 lck = (kmp_user_lock_p)user_lock;
2812 }
2813#if KMP_USE_FUTEX
2814 else if ((__kmp_user_lock_kind == lk_futex) &&
2815 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2816 lck = (kmp_user_lock_p)user_lock;
2817 }
2818#endif
2819 else {
2820 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2821 }
2822
2823#if USE_ITT_BUILD
2824 __kmp_itt_lock_acquiring(lck);
2825#endif /* USE_ITT_BUILD */
2826#if OMPT_SUPPORT && OMPT_OPTIONAL
2827 // This is the case, if called from omp_init_lock_with_hint:
2828 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2829 if (!codeptr)
2830 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2831 if (ompt_enabled.ompt_callback_mutex_acquire) {
2832 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2833 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2834 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2835 }
2836#endif
2837
2838 ACQUIRE_LOCK(lck, gtid);
2839
2840#if USE_ITT_BUILD
2841 __kmp_itt_lock_acquired(lck);
2842#endif /* USE_ITT_BUILD */
2843
2844#if OMPT_SUPPORT && OMPT_OPTIONAL
2845 if (ompt_enabled.ompt_callback_mutex_acquired) {
2846 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2847 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2848 }
2849#endif
2850
2851#endif // KMP_USE_DYNAMIC_LOCK
2852}
2853
2854void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2855#if KMP_USE_DYNAMIC_LOCK
2856
2857#if USE_ITT_BUILD
2858 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2859#endif
2860#if OMPT_SUPPORT && OMPT_OPTIONAL
2861 // This is the case, if called from omp_init_lock_with_hint:
2862 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2863 if (!codeptr)
2864 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2865 if (ompt_enabled.enabled) {
2866 if (ompt_enabled.ompt_callback_mutex_acquire) {
2867 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2868 ompt_mutex_nest_lock, omp_lock_hint_none,
2869 __ompt_get_mutex_impl_type(user_lock),
2870 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2871 }
2872 }
2873#endif
2874 int acquire_status =
2875 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2876 (void)acquire_status;
2877#if USE_ITT_BUILD
2878 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2879#endif
2880
2881#if OMPT_SUPPORT && OMPT_OPTIONAL
2882 if (ompt_enabled.enabled) {
2883 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2884 if (ompt_enabled.ompt_callback_mutex_acquired) {
2885 // lock_first
2886 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2887 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2888 codeptr);
2889 }
2890 } else {
2891 if (ompt_enabled.ompt_callback_nest_lock) {
2892 // lock_next
2893 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2894 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2895 }
2896 }
2897 }
2898#endif
2899
2900#else // KMP_USE_DYNAMIC_LOCK
2901 int acquire_status;
2902 kmp_user_lock_p lck;
2903
2904 if ((__kmp_user_lock_kind == lk_tas) &&
2905 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2906 OMP_NEST_LOCK_T_SIZE)) {
2907 lck = (kmp_user_lock_p)user_lock;
2908 }
2909#if KMP_USE_FUTEX
2910 else if ((__kmp_user_lock_kind == lk_futex) &&
2911 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2912 OMP_NEST_LOCK_T_SIZE)) {
2913 lck = (kmp_user_lock_p)user_lock;
2914 }
2915#endif
2916 else {
2917 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2918 }
2919
2920#if USE_ITT_BUILD
2921 __kmp_itt_lock_acquiring(lck);
2922#endif /* USE_ITT_BUILD */
2923#if OMPT_SUPPORT && OMPT_OPTIONAL
2924 // This is the case, if called from omp_init_lock_with_hint:
2925 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2926 if (!codeptr)
2927 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2928 if (ompt_enabled.enabled) {
2929 if (ompt_enabled.ompt_callback_mutex_acquire) {
2930 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2931 ompt_mutex_nest_lock, omp_lock_hint_none,
2932 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2933 codeptr);
2934 }
2935 }
2936#endif
2937
2938 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2939
2940#if USE_ITT_BUILD
2941 __kmp_itt_lock_acquired(lck);
2942#endif /* USE_ITT_BUILD */
2943
2944#if OMPT_SUPPORT && OMPT_OPTIONAL
2945 if (ompt_enabled.enabled) {
2946 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2947 if (ompt_enabled.ompt_callback_mutex_acquired) {
2948 // lock_first
2949 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2950 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2951 }
2952 } else {
2953 if (ompt_enabled.ompt_callback_nest_lock) {
2954 // lock_next
2955 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2956 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2957 }
2958 }
2959 }
2960#endif
2961
2962#endif // KMP_USE_DYNAMIC_LOCK
2963}
2964
2965void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2966#if KMP_USE_DYNAMIC_LOCK
2967
2968 int tag = KMP_EXTRACT_D_TAG(user_lock);
2969#if USE_ITT_BUILD
2970 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2971#endif
2972#if KMP_USE_INLINED_TAS
2973 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2974 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2975 } else
2976#elif KMP_USE_INLINED_FUTEX
2977 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2978 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2979 } else
2980#endif
2981 {
2982 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2983 }
2984
2985#if OMPT_SUPPORT && OMPT_OPTIONAL
2986 // This is the case, if called from omp_init_lock_with_hint:
2987 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2988 if (!codeptr)
2989 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2990 if (ompt_enabled.ompt_callback_mutex_released) {
2991 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2992 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2993 }
2994#endif
2995
2996#else // KMP_USE_DYNAMIC_LOCK
2997
2998 kmp_user_lock_p lck;
2999
3000 /* Can't use serial interval since not block structured */
3001 /* release the lock */
3002
3003 if ((__kmp_user_lock_kind == lk_tas) &&
3004 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3005#if KMP_OS_LINUX && \
3006 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3007// "fast" path implemented to fix customer performance issue
3008#if USE_ITT_BUILD
3009 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3010#endif /* USE_ITT_BUILD */
3011 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
3012 KMP_MB();
3013
3014#if OMPT_SUPPORT && OMPT_OPTIONAL
3015 // This is the case, if called from omp_init_lock_with_hint:
3016 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3017 if (!codeptr)
3018 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3019 if (ompt_enabled.ompt_callback_mutex_released) {
3020 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3021 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3022 }
3023#endif
3024
3025 return;
3026#else
3027 lck = (kmp_user_lock_p)user_lock;
3028#endif
3029 }
3030#if KMP_USE_FUTEX
3031 else if ((__kmp_user_lock_kind == lk_futex) &&
3032 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3033 lck = (kmp_user_lock_p)user_lock;
3034 }
3035#endif
3036 else {
3037 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
3038 }
3039
3040#if USE_ITT_BUILD
3041 __kmp_itt_lock_releasing(lck);
3042#endif /* USE_ITT_BUILD */
3043
3044 RELEASE_LOCK(lck, gtid);
3045
3046#if OMPT_SUPPORT && OMPT_OPTIONAL
3047 // This is the case, if called from omp_init_lock_with_hint:
3048 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3049 if (!codeptr)
3050 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3051 if (ompt_enabled.ompt_callback_mutex_released) {
3052 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3053 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3054 }
3055#endif
3056
3057#endif // KMP_USE_DYNAMIC_LOCK
3058}
3059
3060/* release the lock */
3061void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3062#if KMP_USE_DYNAMIC_LOCK
3063
3064#if USE_ITT_BUILD
3065 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3066#endif
3067 int release_status =
3068 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3069 (void)release_status;
3070
3071#if OMPT_SUPPORT && OMPT_OPTIONAL
3072 // This is the case, if called from omp_init_lock_with_hint:
3073 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3074 if (!codeptr)
3075 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3076 if (ompt_enabled.enabled) {
3077 if (release_status == KMP_LOCK_RELEASED) {
3078 if (ompt_enabled.ompt_callback_mutex_released) {
3079 // release_lock_last
3080 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3081 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3082 codeptr);
3083 }
3084 } else if (ompt_enabled.ompt_callback_nest_lock) {
3085 // release_lock_prev
3086 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3087 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3088 }
3089 }
3090#endif
3091
3092#else // KMP_USE_DYNAMIC_LOCK
3093
3094 kmp_user_lock_p lck;
3095
3096 /* Can't use serial interval since not block structured */
3097
3098 if ((__kmp_user_lock_kind == lk_tas) &&
3099 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3100 OMP_NEST_LOCK_T_SIZE)) {
3101#if KMP_OS_LINUX && \
3102 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3103 // "fast" path implemented to fix customer performance issue
3104 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3105#if USE_ITT_BUILD
3106 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3107#endif /* USE_ITT_BUILD */
3108
3109#if OMPT_SUPPORT && OMPT_OPTIONAL
3110 int release_status = KMP_LOCK_STILL_HELD;
3111#endif
3112
3113 if (--(tl->lk.depth_locked) == 0) {
3114 TCW_4(tl->lk.poll, 0);
3115#if OMPT_SUPPORT && OMPT_OPTIONAL
3116 release_status = KMP_LOCK_RELEASED;
3117#endif
3118 }
3119 KMP_MB();
3120
3121#if OMPT_SUPPORT && OMPT_OPTIONAL
3122 // This is the case, if called from omp_init_lock_with_hint:
3123 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3124 if (!codeptr)
3125 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3126 if (ompt_enabled.enabled) {
3127 if (release_status == KMP_LOCK_RELEASED) {
3128 if (ompt_enabled.ompt_callback_mutex_released) {
3129 // release_lock_last
3130 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3131 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3132 }
3133 } else if (ompt_enabled.ompt_callback_nest_lock) {
3134 // release_lock_previous
3135 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3136 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3137 }
3138 }
3139#endif
3140
3141 return;
3142#else
3143 lck = (kmp_user_lock_p)user_lock;
3144#endif
3145 }
3146#if KMP_USE_FUTEX
3147 else if ((__kmp_user_lock_kind == lk_futex) &&
3148 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3149 OMP_NEST_LOCK_T_SIZE)) {
3150 lck = (kmp_user_lock_p)user_lock;
3151 }
3152#endif
3153 else {
3154 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3155 }
3156
3157#if USE_ITT_BUILD
3158 __kmp_itt_lock_releasing(lck);
3159#endif /* USE_ITT_BUILD */
3160
3161 int release_status;
3162 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3163#if OMPT_SUPPORT && OMPT_OPTIONAL
3164 // This is the case, if called from omp_init_lock_with_hint:
3165 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3166 if (!codeptr)
3167 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3168 if (ompt_enabled.enabled) {
3169 if (release_status == KMP_LOCK_RELEASED) {
3170 if (ompt_enabled.ompt_callback_mutex_released) {
3171 // release_lock_last
3172 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3173 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3174 }
3175 } else if (ompt_enabled.ompt_callback_nest_lock) {
3176 // release_lock_previous
3177 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3178 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3179 }
3180 }
3181#endif
3182
3183#endif // KMP_USE_DYNAMIC_LOCK
3184}
3185
3186/* try to acquire the lock */
3187int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3188 KMP_COUNT_BLOCK(OMP_test_lock);
3189
3190#if KMP_USE_DYNAMIC_LOCK
3191 int rc;
3192 int tag = KMP_EXTRACT_D_TAG(user_lock);
3193#if USE_ITT_BUILD
3194 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3195#endif
3196#if OMPT_SUPPORT && OMPT_OPTIONAL
3197 // This is the case, if called from omp_init_lock_with_hint:
3198 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3199 if (!codeptr)
3200 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3201 if (ompt_enabled.ompt_callback_mutex_acquire) {
3202 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3203 ompt_mutex_test_lock, omp_lock_hint_none,
3204 __ompt_get_mutex_impl_type(user_lock),
3205 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3206 }
3207#endif
3208#if KMP_USE_INLINED_TAS
3209 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3210 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3211 } else
3212#elif KMP_USE_INLINED_FUTEX
3213 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3214 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3215 } else
3216#endif
3217 {
3218 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3219 }
3220 if (rc) {
3221#if USE_ITT_BUILD
3222 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3223#endif
3224#if OMPT_SUPPORT && OMPT_OPTIONAL
3225 if (ompt_enabled.ompt_callback_mutex_acquired) {
3226 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3227 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3228 }
3229#endif
3230 return FTN_TRUE;
3231 } else {
3232#if USE_ITT_BUILD
3233 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3234#endif
3235 return FTN_FALSE;
3236 }
3237
3238#else // KMP_USE_DYNAMIC_LOCK
3239
3240 kmp_user_lock_p lck;
3241 int rc;
3242
3243 if ((__kmp_user_lock_kind == lk_tas) &&
3244 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3245 lck = (kmp_user_lock_p)user_lock;
3246 }
3247#if KMP_USE_FUTEX
3248 else if ((__kmp_user_lock_kind == lk_futex) &&
3249 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3250 lck = (kmp_user_lock_p)user_lock;
3251 }
3252#endif
3253 else {
3254 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3255 }
3256
3257#if USE_ITT_BUILD
3258 __kmp_itt_lock_acquiring(lck);
3259#endif /* USE_ITT_BUILD */
3260#if OMPT_SUPPORT && OMPT_OPTIONAL
3261 // This is the case, if called from omp_init_lock_with_hint:
3262 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3263 if (!codeptr)
3264 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3265 if (ompt_enabled.ompt_callback_mutex_acquire) {
3266 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3267 ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3268 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3269 }
3270#endif
3271
3272 rc = TEST_LOCK(lck, gtid);
3273#if USE_ITT_BUILD
3274 if (rc) {
3275 __kmp_itt_lock_acquired(lck);
3276 } else {
3277 __kmp_itt_lock_cancelled(lck);
3278 }
3279#endif /* USE_ITT_BUILD */
3280#if OMPT_SUPPORT && OMPT_OPTIONAL
3281 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3282 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3283 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3284 }
3285#endif
3286
3287 return (rc ? FTN_TRUE : FTN_FALSE);
3288
3289 /* Can't use serial interval since not block structured */
3290
3291#endif // KMP_USE_DYNAMIC_LOCK
3292}
3293
3294/* try to acquire the lock */
3295int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3296#if KMP_USE_DYNAMIC_LOCK
3297 int rc;
3298#if USE_ITT_BUILD
3299 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3300#endif
3301#if OMPT_SUPPORT && OMPT_OPTIONAL
3302 // This is the case, if called from omp_init_lock_with_hint:
3303 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3304 if (!codeptr)
3305 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3306 if (ompt_enabled.ompt_callback_mutex_acquire) {
3307 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3308 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3309 __ompt_get_mutex_impl_type(user_lock),
3310 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3311 }
3312#endif
3313 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3314#if USE_ITT_BUILD
3315 if (rc) {
3316 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3317 } else {
3318 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3319 }
3320#endif
3321#if OMPT_SUPPORT && OMPT_OPTIONAL
3322 if (ompt_enabled.enabled && rc) {
3323 if (rc == 1) {
3324 if (ompt_enabled.ompt_callback_mutex_acquired) {
3325 // lock_first
3326 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3327 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3328 codeptr);
3329 }
3330 } else {
3331 if (ompt_enabled.ompt_callback_nest_lock) {
3332 // lock_next
3333 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3334 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3335 }
3336 }
3337 }
3338#endif
3339 return rc;
3340
3341#else // KMP_USE_DYNAMIC_LOCK
3342
3343 kmp_user_lock_p lck;
3344 int rc;
3345
3346 if ((__kmp_user_lock_kind == lk_tas) &&
3347 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3348 OMP_NEST_LOCK_T_SIZE)) {
3349 lck = (kmp_user_lock_p)user_lock;
3350 }
3351#if KMP_USE_FUTEX
3352 else if ((__kmp_user_lock_kind == lk_futex) &&
3353 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3354 OMP_NEST_LOCK_T_SIZE)) {
3355 lck = (kmp_user_lock_p)user_lock;
3356 }
3357#endif
3358 else {
3359 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3360 }
3361
3362#if USE_ITT_BUILD
3363 __kmp_itt_lock_acquiring(lck);
3364#endif /* USE_ITT_BUILD */
3365
3366#if OMPT_SUPPORT && OMPT_OPTIONAL
3367 // This is the case, if called from omp_init_lock_with_hint:
3368 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3369 if (!codeptr)
3370 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3371 if (ompt_enabled.enabled) &&
3372 ompt_enabled.ompt_callback_mutex_acquire) {
3373 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3374 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3375 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3376 codeptr);
3377 }
3378#endif
3379
3380 rc = TEST_NESTED_LOCK(lck, gtid);
3381#if USE_ITT_BUILD
3382 if (rc) {
3383 __kmp_itt_lock_acquired(lck);
3384 } else {
3385 __kmp_itt_lock_cancelled(lck);
3386 }
3387#endif /* USE_ITT_BUILD */
3388#if OMPT_SUPPORT && OMPT_OPTIONAL
3389 if (ompt_enabled.enabled && rc) {
3390 if (rc == 1) {
3391 if (ompt_enabled.ompt_callback_mutex_acquired) {
3392 // lock_first
3393 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3394 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3395 }
3396 } else {
3397 if (ompt_enabled.ompt_callback_nest_lock) {
3398 // lock_next
3399 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3400 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3401 }
3402 }
3403 }
3404#endif
3405 return rc;
3406
3407 /* Can't use serial interval since not block structured */
3408
3409#endif // KMP_USE_DYNAMIC_LOCK
3410}
3411
3412// Interface to fast scalable reduce methods routines
3413
3414// keep the selected method in a thread local structure for cross-function
3415// usage: will be used in __kmpc_end_reduce* functions;
3416// another solution: to re-determine the method one more time in
3417// __kmpc_end_reduce* functions (new prototype required then)
3418// AT: which solution is better?
3419#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3420 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3421
3422#define __KMP_GET_REDUCTION_METHOD(gtid) \
3423 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3424
3425// description of the packed_reduction_method variable: look at the macros in
3426// kmp.h
3427
3428// used in a critical section reduce block
3429static __forceinline void
3430__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3431 kmp_critical_name *crit) {
3432
3433 // this lock was visible to a customer and to the threading profile tool as a
3434 // serial overhead span (although it's used for an internal purpose only)
3435 // why was it visible in previous implementation?
3436 // should we keep it visible in new reduce block?
3437 kmp_user_lock_p lck;
3438
3439#if KMP_USE_DYNAMIC_LOCK
3440
3441 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3442 // Check if it is initialized.
3443 if (*lk == 0) {
3444 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3445 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3446 KMP_GET_D_TAG(__kmp_user_lock_seq));
3447 } else {
3448 __kmp_init_indirect_csptr(crit, loc, global_tid,
3449 KMP_GET_I_TAG(__kmp_user_lock_seq));
3450 }
3451 }
3452 // Branch for accessing the actual lock object and set operation. This
3453 // branching is inevitable since this lock initialization does not follow the
3454 // normal dispatch path (lock table is not used).
3455 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3456 lck = (kmp_user_lock_p)lk;
3457 KMP_DEBUG_ASSERT(lck != NULL);
3458 if (__kmp_env_consistency_check) {
3459 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3460 }
3461 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3462 } else {
3463 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3464 lck = ilk->lock;
3465 KMP_DEBUG_ASSERT(lck != NULL);
3466 if (__kmp_env_consistency_check) {
3467 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3468 }
3469 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3470 }
3471
3472#else // KMP_USE_DYNAMIC_LOCK
3473
3474 // We know that the fast reduction code is only emitted by Intel compilers
3475 // with 32 byte critical sections. If there isn't enough space, then we
3476 // have to use a pointer.
3477 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3478 lck = (kmp_user_lock_p)crit;
3479 } else {
3480 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3481 }
3482 KMP_DEBUG_ASSERT(lck != NULL);
3483
3484 if (__kmp_env_consistency_check)
3485 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3486
3487 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3488
3489#endif // KMP_USE_DYNAMIC_LOCK
3490}
3491
3492// used in a critical section reduce block
3493static __forceinline void
3494__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3495 kmp_critical_name *crit) {
3496
3497 kmp_user_lock_p lck;
3498
3499#if KMP_USE_DYNAMIC_LOCK
3500
3501 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3502 lck = (kmp_user_lock_p)crit;
3503 if (__kmp_env_consistency_check)
3504 __kmp_pop_sync(global_tid, ct_critical, loc);
3505 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3506 } else {
3507 kmp_indirect_lock_t *ilk =
3508 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3509 if (__kmp_env_consistency_check)
3510 __kmp_pop_sync(global_tid, ct_critical, loc);
3511 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3512 }
3513
3514#else // KMP_USE_DYNAMIC_LOCK
3515
3516 // We know that the fast reduction code is only emitted by Intel compilers
3517 // with 32 byte critical sections. If there isn't enough space, then we have
3518 // to use a pointer.
3519 if (__kmp_base_user_lock_size > 32) {
3520 lck = *((kmp_user_lock_p *)crit);
3521 KMP_ASSERT(lck != NULL);
3522 } else {
3523 lck = (kmp_user_lock_p)crit;
3524 }
3525
3526 if (__kmp_env_consistency_check)
3527 __kmp_pop_sync(global_tid, ct_critical, loc);
3528
3529 __kmp_release_user_lock_with_checks(lck, global_tid);
3530
3531#endif // KMP_USE_DYNAMIC_LOCK
3532} // __kmp_end_critical_section_reduce_block
3533
3534static __forceinline int
3535__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3536 int *task_state) {
3537 kmp_team_t *team;
3538
3539 // Check if we are inside the teams construct?
3540 if (th->th.th_teams_microtask) {
3541 *team_p = team = th->th.th_team;
3542 if (team->t.t_level == th->th.th_teams_level) {
3543 // This is reduction at teams construct.
3544 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3545 // Let's swap teams temporarily for the reduction.
3546 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3547 th->th.th_team = team->t.t_parent;
3548 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3549 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3550 *task_state = th->th.th_task_state;
3551 th->th.th_task_state = 0;
3552
3553 return 1;
3554 }
3555 }
3556 return 0;
3557}
3558
3559static __forceinline void
3560__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3561 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3562 th->th.th_info.ds.ds_tid = 0;
3563 th->th.th_team = team;
3564 th->th.th_team_nproc = team->t.t_nproc;
3565 th->th.th_task_team = team->t.t_task_team[task_state];
3566 __kmp_type_convert(task_state, &(th->th.th_task_state));
3567}
3568
3569/* 2.a.i. Reduce Block without a terminating barrier */
3585kmp_int32
3586__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3587 size_t reduce_size, void *reduce_data,
3588 void (*reduce_func)(void *lhs_data, void *rhs_data),
3589 kmp_critical_name *lck) {
3590
3591 KMP_COUNT_BLOCK(REDUCE_nowait);
3592 int retval = 0;
3593 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3594 kmp_info_t *th;
3595 kmp_team_t *team;
3596 int teams_swapped = 0, task_state;
3597 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3598 __kmp_assert_valid_gtid(global_tid);
3599
3600 // why do we need this initialization here at all?
3601 // Reduction clause can not be used as a stand-alone directive.
3602
3603 // do not call __kmp_serial_initialize(), it will be called by
3604 // __kmp_parallel_initialize() if needed
3605 // possible detection of false-positive race by the threadchecker ???
3606 if (!TCR_4(__kmp_init_parallel))
3607 __kmp_parallel_initialize();
3608
3609 __kmp_resume_if_soft_paused();
3610
3611// check correctness of reduce block nesting
3612#if KMP_USE_DYNAMIC_LOCK
3613 if (__kmp_env_consistency_check)
3614 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3615#else
3616 if (__kmp_env_consistency_check)
3617 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3618#endif
3619
3620 th = __kmp_thread_from_gtid(global_tid);
3621 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3622
3623 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3624 // the value should be kept in a variable
3625 // the variable should be either a construct-specific or thread-specific
3626 // property, not a team specific property
3627 // (a thread can reach the next reduce block on the next construct, reduce
3628 // method may differ on the next construct)
3629 // an ident_t "loc" parameter could be used as a construct-specific property
3630 // (what if loc == 0?)
3631 // (if both construct-specific and team-specific variables were shared,
3632 // then unness extra syncs should be needed)
3633 // a thread-specific variable is better regarding two issues above (next
3634 // construct and extra syncs)
3635 // a thread-specific "th_local.reduction_method" variable is used currently
3636 // each thread executes 'determine' and 'set' lines (no need to execute by one
3637 // thread, to avoid unness extra syncs)
3638
3639 packed_reduction_method = __kmp_determine_reduction_method(
3640 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3641 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3642
3643 OMPT_REDUCTION_DECL(th, global_tid);
3644 if (packed_reduction_method == critical_reduce_block) {
3645
3646 OMPT_REDUCTION_BEGIN;
3647
3648 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3649 retval = 1;
3650
3651 } else if (packed_reduction_method == empty_reduce_block) {
3652
3653 OMPT_REDUCTION_BEGIN;
3654
3655 // usage: if team size == 1, no synchronization is required ( Intel
3656 // platforms only )
3657 retval = 1;
3658
3659 } else if (packed_reduction_method == atomic_reduce_block) {
3660
3661 retval = 2;
3662
3663 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3664 // won't be called by the code gen)
3665 // (it's not quite good, because the checking block has been closed by
3666 // this 'pop',
3667 // but atomic operation has not been executed yet, will be executed
3668 // slightly later, literally on next instruction)
3669 if (__kmp_env_consistency_check)
3670 __kmp_pop_sync(global_tid, ct_reduce, loc);
3671
3672 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3673 tree_reduce_block)) {
3674
3675// AT: performance issue: a real barrier here
3676// AT: (if primary thread is slow, other threads are blocked here waiting for
3677// the primary thread to come and release them)
3678// AT: (it's not what a customer might expect specifying NOWAIT clause)
3679// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3680// be confusing to a customer)
3681// AT: another implementation of *barrier_gather*nowait() (or some other design)
3682// might go faster and be more in line with sense of NOWAIT
3683// AT: TO DO: do epcc test and compare times
3684
3685// this barrier should be invisible to a customer and to the threading profile
3686// tool (it's neither a terminating barrier nor customer's code, it's
3687// used for an internal purpose)
3688#if OMPT_SUPPORT
3689 // JP: can this barrier potentially leed to task scheduling?
3690 // JP: as long as there is a barrier in the implementation, OMPT should and
3691 // will provide the barrier events
3692 // so we set-up the necessary frame/return addresses.
3693 ompt_frame_t *ompt_frame;
3694 if (ompt_enabled.enabled) {
3695 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3696 if (ompt_frame->enter_frame.ptr == NULL)
3697 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3698 }
3699 OMPT_STORE_RETURN_ADDRESS(global_tid);
3700#endif
3701#if USE_ITT_NOTIFY
3702 __kmp_threads[global_tid]->th.th_ident = loc;
3703#endif
3704 retval =
3705 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3706 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3707 retval = (retval != 0) ? (0) : (1);
3708#if OMPT_SUPPORT && OMPT_OPTIONAL
3709 if (ompt_enabled.enabled) {
3710 ompt_frame->enter_frame = ompt_data_none;
3711 }
3712#endif
3713
3714 // all other workers except primary thread should do this pop here
3715 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3716 if (__kmp_env_consistency_check) {
3717 if (retval == 0) {
3718 __kmp_pop_sync(global_tid, ct_reduce, loc);
3719 }
3720 }
3721
3722 } else {
3723
3724 // should never reach this block
3725 KMP_ASSERT(0); // "unexpected method"
3726 }
3727 if (teams_swapped) {
3728 __kmp_restore_swapped_teams(th, team, task_state);
3729 }
3730 KA_TRACE(
3731 10,
3732 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3733 global_tid, packed_reduction_method, retval));
3734
3735 return retval;
3736}
3737
3746void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3747 kmp_critical_name *lck) {
3748
3749 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3750
3751 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3752 __kmp_assert_valid_gtid(global_tid);
3753
3754 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3755
3756 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3757
3758 if (packed_reduction_method == critical_reduce_block) {
3759
3760 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3761 OMPT_REDUCTION_END;
3762
3763 } else if (packed_reduction_method == empty_reduce_block) {
3764
3765 // usage: if team size == 1, no synchronization is required ( on Intel
3766 // platforms only )
3767
3768 OMPT_REDUCTION_END;
3769
3770 } else if (packed_reduction_method == atomic_reduce_block) {
3771
3772 // neither primary thread nor other workers should get here
3773 // (code gen does not generate this call in case 2: atomic reduce block)
3774 // actually it's better to remove this elseif at all;
3775 // after removal this value will checked by the 'else' and will assert
3776
3777 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3778 tree_reduce_block)) {
3779
3780 // only primary thread gets here
3781 // OMPT: tree reduction is annotated in the barrier code
3782
3783 } else {
3784
3785 // should never reach this block
3786 KMP_ASSERT(0); // "unexpected method"
3787 }
3788
3789 if (__kmp_env_consistency_check)
3790 __kmp_pop_sync(global_tid, ct_reduce, loc);
3791
3792 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3793 global_tid, packed_reduction_method));
3794
3795 return;
3796}
3797
3798/* 2.a.ii. Reduce Block with a terminating barrier */
3799
3815kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3816 size_t reduce_size, void *reduce_data,
3817 void (*reduce_func)(void *lhs_data, void *rhs_data),
3818 kmp_critical_name *lck) {
3819 KMP_COUNT_BLOCK(REDUCE_wait);
3820 int retval = 0;
3821 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3822 kmp_info_t *th;
3823 kmp_team_t *team;
3824 int teams_swapped = 0, task_state;
3825
3826 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3827 __kmp_assert_valid_gtid(global_tid);
3828
3829 // why do we need this initialization here at all?
3830 // Reduction clause can not be a stand-alone directive.
3831
3832 // do not call __kmp_serial_initialize(), it will be called by
3833 // __kmp_parallel_initialize() if needed
3834 // possible detection of false-positive race by the threadchecker ???
3835 if (!TCR_4(__kmp_init_parallel))
3836 __kmp_parallel_initialize();
3837
3838 __kmp_resume_if_soft_paused();
3839
3840// check correctness of reduce block nesting
3841#if KMP_USE_DYNAMIC_LOCK
3842 if (__kmp_env_consistency_check)
3843 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3844#else
3845 if (__kmp_env_consistency_check)
3846 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3847#endif
3848
3849 th = __kmp_thread_from_gtid(global_tid);
3850 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3851
3852 packed_reduction_method = __kmp_determine_reduction_method(
3853 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3854 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3855
3856 OMPT_REDUCTION_DECL(th, global_tid);
3857
3858 if (packed_reduction_method == critical_reduce_block) {
3859
3860 OMPT_REDUCTION_BEGIN;
3861 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3862 retval = 1;
3863
3864 } else if (packed_reduction_method == empty_reduce_block) {
3865
3866 OMPT_REDUCTION_BEGIN;
3867 // usage: if team size == 1, no synchronization is required ( Intel
3868 // platforms only )
3869 retval = 1;
3870
3871 } else if (packed_reduction_method == atomic_reduce_block) {
3872
3873 retval = 2;
3874
3875 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3876 tree_reduce_block)) {
3877
3878// case tree_reduce_block:
3879// this barrier should be visible to a customer and to the threading profile
3880// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3881#if OMPT_SUPPORT
3882 ompt_frame_t *ompt_frame;
3883 if (ompt_enabled.enabled) {
3884 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3885 if (ompt_frame->enter_frame.ptr == NULL)
3886 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3887 }
3888 OMPT_STORE_RETURN_ADDRESS(global_tid);
3889#endif
3890#if USE_ITT_NOTIFY
3891 __kmp_threads[global_tid]->th.th_ident =
3892 loc; // needed for correct notification of frames
3893#endif
3894 retval =
3895 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3896 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3897 retval = (retval != 0) ? (0) : (1);
3898#if OMPT_SUPPORT && OMPT_OPTIONAL
3899 if (ompt_enabled.enabled) {
3900 ompt_frame->enter_frame = ompt_data_none;
3901 }
3902#endif
3903
3904 // all other workers except primary thread should do this pop here
3905 // (none of other workers except primary will enter __kmpc_end_reduce())
3906 if (__kmp_env_consistency_check) {
3907 if (retval == 0) { // 0: all other workers; 1: primary thread
3908 __kmp_pop_sync(global_tid, ct_reduce, loc);
3909 }
3910 }
3911
3912 } else {
3913
3914 // should never reach this block
3915 KMP_ASSERT(0); // "unexpected method"
3916 }
3917 if (teams_swapped) {
3918 __kmp_restore_swapped_teams(th, team, task_state);
3919 }
3920
3921 KA_TRACE(10,
3922 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3923 global_tid, packed_reduction_method, retval));
3924 return retval;
3925}
3926
3937void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3938 kmp_critical_name *lck) {
3939
3940 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3941 kmp_info_t *th;
3942 kmp_team_t *team;
3943 int teams_swapped = 0, task_state;
3944
3945 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3946 __kmp_assert_valid_gtid(global_tid);
3947
3948 th = __kmp_thread_from_gtid(global_tid);
3949 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3950
3951 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3952
3953 // this barrier should be visible to a customer and to the threading profile
3954 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3955 OMPT_REDUCTION_DECL(th, global_tid);
3956
3957 if (packed_reduction_method == critical_reduce_block) {
3958 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3959
3960 OMPT_REDUCTION_END;
3961
3962// TODO: implicit barrier: should be exposed
3963#if OMPT_SUPPORT
3964 ompt_frame_t *ompt_frame;
3965 if (ompt_enabled.enabled) {
3966 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3967 if (ompt_frame->enter_frame.ptr == NULL)
3968 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3969 }
3970 OMPT_STORE_RETURN_ADDRESS(global_tid);
3971#endif
3972#if USE_ITT_NOTIFY
3973 __kmp_threads[global_tid]->th.th_ident = loc;
3974#endif
3975 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3976#if OMPT_SUPPORT && OMPT_OPTIONAL
3977 if (ompt_enabled.enabled) {
3978 ompt_frame->enter_frame = ompt_data_none;
3979 }
3980#endif
3981
3982 } else if (packed_reduction_method == empty_reduce_block) {
3983
3984 OMPT_REDUCTION_END;
3985
3986// usage: if team size==1, no synchronization is required (Intel platforms only)
3987
3988// TODO: implicit barrier: should be exposed
3989#if OMPT_SUPPORT
3990 ompt_frame_t *ompt_frame;
3991 if (ompt_enabled.enabled) {
3992 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3993 if (ompt_frame->enter_frame.ptr == NULL)
3994 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3995 }
3996 OMPT_STORE_RETURN_ADDRESS(global_tid);
3997#endif
3998#if USE_ITT_NOTIFY
3999 __kmp_threads[global_tid]->th.th_ident = loc;
4000#endif
4001 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4002#if OMPT_SUPPORT && OMPT_OPTIONAL
4003 if (ompt_enabled.enabled) {
4004 ompt_frame->enter_frame = ompt_data_none;
4005 }
4006#endif
4007
4008 } else if (packed_reduction_method == atomic_reduce_block) {
4009
4010#if OMPT_SUPPORT
4011 ompt_frame_t *ompt_frame;
4012 if (ompt_enabled.enabled) {
4013 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4014 if (ompt_frame->enter_frame.ptr == NULL)
4015 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4016 }
4017 OMPT_STORE_RETURN_ADDRESS(global_tid);
4018#endif
4019// TODO: implicit barrier: should be exposed
4020#if USE_ITT_NOTIFY
4021 __kmp_threads[global_tid]->th.th_ident = loc;
4022#endif
4023 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4024#if OMPT_SUPPORT && OMPT_OPTIONAL
4025 if (ompt_enabled.enabled) {
4026 ompt_frame->enter_frame = ompt_data_none;
4027 }
4028#endif
4029
4030 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
4031 tree_reduce_block)) {
4032
4033 // only primary thread executes here (primary releases all other workers)
4034 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
4035 global_tid);
4036
4037 } else {
4038
4039 // should never reach this block
4040 KMP_ASSERT(0); // "unexpected method"
4041 }
4042 if (teams_swapped) {
4043 __kmp_restore_swapped_teams(th, team, task_state);
4044 }
4045
4046 if (__kmp_env_consistency_check)
4047 __kmp_pop_sync(global_tid, ct_reduce, loc);
4048
4049 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4050 global_tid, packed_reduction_method));
4051
4052 return;
4053}
4054
4055#undef __KMP_GET_REDUCTION_METHOD
4056#undef __KMP_SET_REDUCTION_METHOD
4057
4058/* end of interface to fast scalable reduce routines */
4059
4060kmp_uint64 __kmpc_get_taskid() {
4061
4062 kmp_int32 gtid;
4063 kmp_info_t *thread;
4064
4065 gtid = __kmp_get_gtid();
4066 if (gtid < 0) {
4067 return 0;
4068 }
4069 thread = __kmp_thread_from_gtid(gtid);
4070 return thread->th.th_current_task->td_task_id;
4071
4072} // __kmpc_get_taskid
4073
4074kmp_uint64 __kmpc_get_parent_taskid() {
4075
4076 kmp_int32 gtid;
4077 kmp_info_t *thread;
4078 kmp_taskdata_t *parent_task;
4079
4080 gtid = __kmp_get_gtid();
4081 if (gtid < 0) {
4082 return 0;
4083 }
4084 thread = __kmp_thread_from_gtid(gtid);
4085 parent_task = thread->th.th_current_task->td_parent;
4086 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4087
4088} // __kmpc_get_parent_taskid
4089
4101void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4102 const struct kmp_dim *dims) {
4103 __kmp_assert_valid_gtid(gtid);
4104 int j, idx;
4105 kmp_int64 last, trace_count;
4106 kmp_info_t *th = __kmp_threads[gtid];
4107 kmp_team_t *team = th->th.th_team;
4108 kmp_uint32 *flags;
4109 kmp_disp_t *pr_buf = th->th.th_dispatch;
4110 dispatch_shared_info_t *sh_buf;
4111
4112 KA_TRACE(
4113 20,
4114 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4115 gtid, num_dims, !team->t.t_serialized));
4116 KMP_DEBUG_ASSERT(dims != NULL);
4117 KMP_DEBUG_ASSERT(num_dims > 0);
4118
4119 if (team->t.t_serialized) {
4120 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4121 return; // no dependencies if team is serialized
4122 }
4123 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4124 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4125 // the next loop
4126 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4127
4128 // Save bounds info into allocated private buffer
4129 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4130 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4131 th, sizeof(kmp_int64) * (4 * num_dims + 1));
4132 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4133 pr_buf->th_doacross_info[0] =
4134 (kmp_int64)num_dims; // first element is number of dimensions
4135 // Save also address of num_done in order to access it later without knowing
4136 // the buffer index
4137 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4138 pr_buf->th_doacross_info[2] = dims[0].lo;
4139 pr_buf->th_doacross_info[3] = dims[0].up;
4140 pr_buf->th_doacross_info[4] = dims[0].st;
4141 last = 5;
4142 for (j = 1; j < num_dims; ++j) {
4143 kmp_int64
4144 range_length; // To keep ranges of all dimensions but the first dims[0]
4145 if (dims[j].st == 1) { // most common case
4146 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4147 range_length = dims[j].up - dims[j].lo + 1;
4148 } else {
4149 if (dims[j].st > 0) {
4150 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4151 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4152 } else { // negative increment
4153 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4154 range_length =
4155 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4156 }
4157 }
4158 pr_buf->th_doacross_info[last++] = range_length;
4159 pr_buf->th_doacross_info[last++] = dims[j].lo;
4160 pr_buf->th_doacross_info[last++] = dims[j].up;
4161 pr_buf->th_doacross_info[last++] = dims[j].st;
4162 }
4163
4164 // Compute total trip count.
4165 // Start with range of dims[0] which we don't need to keep in the buffer.
4166 if (dims[0].st == 1) { // most common case
4167 trace_count = dims[0].up - dims[0].lo + 1;
4168 } else if (dims[0].st > 0) {
4169 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4170 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4171 } else { // negative increment
4172 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4173 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4174 }
4175 for (j = 1; j < num_dims; ++j) {
4176 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4177 }
4178 KMP_DEBUG_ASSERT(trace_count > 0);
4179
4180 // Check if shared buffer is not occupied by other loop (idx -
4181 // __kmp_dispatch_num_buffers)
4182 if (idx != sh_buf->doacross_buf_idx) {
4183 // Shared buffer is occupied, wait for it to be free
4184 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4185 __kmp_eq_4, NULL);
4186 }
4187#if KMP_32_BIT_ARCH
4188 // Check if we are the first thread. After the CAS the first thread gets 0,
4189 // others get 1 if initialization is in progress, allocated pointer otherwise.
4190 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4191 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4192 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4193#else
4194 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4195 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4196#endif
4197 if (flags == NULL) {
4198 // we are the first thread, allocate the array of flags
4199 size_t size =
4200 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4201 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4202 KMP_MB();
4203 sh_buf->doacross_flags = flags;
4204 } else if (flags == (kmp_uint32 *)1) {
4205#if KMP_32_BIT_ARCH
4206 // initialization is still in progress, need to wait
4207 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4208#else
4209 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4210#endif
4211 KMP_YIELD(TRUE);
4212 KMP_MB();
4213 } else {
4214 KMP_MB();
4215 }
4216 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4217 pr_buf->th_doacross_flags =
4218 sh_buf->doacross_flags; // save private copy in order to not
4219 // touch shared buffer on each iteration
4220 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4221}
4222
4223void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4224 __kmp_assert_valid_gtid(gtid);
4225 kmp_int64 shft;
4226 size_t num_dims, i;
4227 kmp_uint32 flag;
4228 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4229 kmp_info_t *th = __kmp_threads[gtid];
4230 kmp_team_t *team = th->th.th_team;
4231 kmp_disp_t *pr_buf;
4232 kmp_int64 lo, up, st;
4233
4234 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4235 if (team->t.t_serialized) {
4236 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4237 return; // no dependencies if team is serialized
4238 }
4239
4240 // calculate sequential iteration number and check out-of-bounds condition
4241 pr_buf = th->th.th_dispatch;
4242 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4243 num_dims = (size_t)pr_buf->th_doacross_info[0];
4244 lo = pr_buf->th_doacross_info[2];
4245 up = pr_buf->th_doacross_info[3];
4246 st = pr_buf->th_doacross_info[4];
4247#if OMPT_SUPPORT && OMPT_OPTIONAL
4248 SimpleVLA<ompt_dependence_t> deps(num_dims);
4249#endif
4250 if (st == 1) { // most common case
4251 if (vec[0] < lo || vec[0] > up) {
4252 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4253 "bounds [%lld,%lld]\n",
4254 gtid, vec[0], lo, up));
4255 return;
4256 }
4257 iter_number = vec[0] - lo;
4258 } else if (st > 0) {
4259 if (vec[0] < lo || vec[0] > up) {
4260 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4261 "bounds [%lld,%lld]\n",
4262 gtid, vec[0], lo, up));
4263 return;
4264 }
4265 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4266 } else { // negative increment
4267 if (vec[0] > lo || vec[0] < up) {
4268 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4269 "bounds [%lld,%lld]\n",
4270 gtid, vec[0], lo, up));
4271 return;
4272 }
4273 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4274 }
4275#if OMPT_SUPPORT && OMPT_OPTIONAL
4276 deps[0].variable.value = iter_number;
4277 deps[0].dependence_type = ompt_dependence_type_sink;
4278#endif
4279 for (i = 1; i < num_dims; ++i) {
4280 kmp_int64 iter, ln;
4281 size_t j = i * 4;
4282 ln = pr_buf->th_doacross_info[j + 1];
4283 lo = pr_buf->th_doacross_info[j + 2];
4284 up = pr_buf->th_doacross_info[j + 3];
4285 st = pr_buf->th_doacross_info[j + 4];
4286 if (st == 1) {
4287 if (vec[i] < lo || vec[i] > up) {
4288 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4289 "bounds [%lld,%lld]\n",
4290 gtid, vec[i], lo, up));
4291 return;
4292 }
4293 iter = vec[i] - lo;
4294 } else if (st > 0) {
4295 if (vec[i] < lo || vec[i] > up) {
4296 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4297 "bounds [%lld,%lld]\n",
4298 gtid, vec[i], lo, up));
4299 return;
4300 }
4301 iter = (kmp_uint64)(vec[i] - lo) / st;
4302 } else { // st < 0
4303 if (vec[i] > lo || vec[i] < up) {
4304 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4305 "bounds [%lld,%lld]\n",
4306 gtid, vec[i], lo, up));
4307 return;
4308 }
4309 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4310 }
4311 iter_number = iter + ln * iter_number;
4312#if OMPT_SUPPORT && OMPT_OPTIONAL
4313 deps[i].variable.value = iter;
4314 deps[i].dependence_type = ompt_dependence_type_sink;
4315#endif
4316 }
4317 shft = iter_number % 32; // use 32-bit granularity
4318 iter_number >>= 5; // divided by 32
4319 flag = 1 << shft;
4320 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4321 KMP_YIELD(TRUE);
4322 }
4323 KMP_MB();
4324#if OMPT_SUPPORT && OMPT_OPTIONAL
4325 if (ompt_enabled.ompt_callback_dependences) {
4326 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4327 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4328 }
4329#endif
4330 KA_TRACE(20,
4331 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4332 gtid, (iter_number << 5) + shft));
4333}
4334
4335void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4336 __kmp_assert_valid_gtid(gtid);
4337 kmp_int64 shft;
4338 size_t num_dims, i;
4339 kmp_uint32 flag;
4340 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4341 kmp_info_t *th = __kmp_threads[gtid];
4342 kmp_team_t *team = th->th.th_team;
4343 kmp_disp_t *pr_buf;
4344 kmp_int64 lo, st;
4345
4346 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4347 if (team->t.t_serialized) {
4348 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4349 return; // no dependencies if team is serialized
4350 }
4351
4352 // calculate sequential iteration number (same as in "wait" but no
4353 // out-of-bounds checks)
4354 pr_buf = th->th.th_dispatch;
4355 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4356 num_dims = (size_t)pr_buf->th_doacross_info[0];
4357 lo = pr_buf->th_doacross_info[2];
4358 st = pr_buf->th_doacross_info[4];
4359#if OMPT_SUPPORT && OMPT_OPTIONAL
4360 SimpleVLA<ompt_dependence_t> deps(num_dims);
4361#endif
4362 if (st == 1) { // most common case
4363 iter_number = vec[0] - lo;
4364 } else if (st > 0) {
4365 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4366 } else { // negative increment
4367 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4368 }
4369#if OMPT_SUPPORT && OMPT_OPTIONAL
4370 deps[0].variable.value = iter_number;
4371 deps[0].dependence_type = ompt_dependence_type_source;
4372#endif
4373 for (i = 1; i < num_dims; ++i) {
4374 kmp_int64 iter, ln;
4375 size_t j = i * 4;
4376 ln = pr_buf->th_doacross_info[j + 1];
4377 lo = pr_buf->th_doacross_info[j + 2];
4378 st = pr_buf->th_doacross_info[j + 4];
4379 if (st == 1) {
4380 iter = vec[i] - lo;
4381 } else if (st > 0) {
4382 iter = (kmp_uint64)(vec[i] - lo) / st;
4383 } else { // st < 0
4384 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4385 }
4386 iter_number = iter + ln * iter_number;
4387#if OMPT_SUPPORT && OMPT_OPTIONAL
4388 deps[i].variable.value = iter;
4389 deps[i].dependence_type = ompt_dependence_type_source;
4390#endif
4391 }
4392#if OMPT_SUPPORT && OMPT_OPTIONAL
4393 if (ompt_enabled.ompt_callback_dependences) {
4394 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4395 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4396 }
4397#endif
4398 shft = iter_number % 32; // use 32-bit granularity
4399 iter_number >>= 5; // divided by 32
4400 flag = 1 << shft;
4401 KMP_MB();
4402 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4403 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4404 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4405 (iter_number << 5) + shft));
4406}
4407
4408void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4409 __kmp_assert_valid_gtid(gtid);
4410 kmp_int32 num_done;
4411 kmp_info_t *th = __kmp_threads[gtid];
4412 kmp_team_t *team = th->th.th_team;
4413 kmp_disp_t *pr_buf = th->th.th_dispatch;
4414
4415 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4416 if (team->t.t_serialized) {
4417 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4418 return; // nothing to do
4419 }
4420 num_done =
4421 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4422 if (num_done == th->th.th_team_nproc) {
4423 // we are the last thread, need to free shared resources
4424 int idx = pr_buf->th_doacross_buf_idx - 1;
4425 dispatch_shared_info_t *sh_buf =
4426 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4427 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4428 (kmp_int64)&sh_buf->doacross_num_done);
4429 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4430 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4431 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4432 sh_buf->doacross_flags = NULL;
4433 sh_buf->doacross_num_done = 0;
4434 sh_buf->doacross_buf_idx +=
4435 __kmp_dispatch_num_buffers; // free buffer for future re-use
4436 }
4437 // free private resources (need to keep buffer index forever)
4438 pr_buf->th_doacross_flags = NULL;
4439 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4440 pr_buf->th_doacross_info = NULL;
4441 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4442}
4443
4444/* OpenMP 5.1 Memory Management routines */
4445void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4446 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4447}
4448
4449void *omp_aligned_alloc(size_t align, size_t size,
4450 omp_allocator_handle_t allocator) {
4451 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4452}
4453
4454void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4455 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4456}
4457
4458void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4459 omp_allocator_handle_t allocator) {
4460 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4461}
4462
4463void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4464 omp_allocator_handle_t free_allocator) {
4465 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4466 free_allocator);
4467}
4468
4469void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4470 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4471}
4472/* end of OpenMP 5.1 Memory Management routines */
4473
4474int __kmpc_get_target_offload(void) {
4475 if (!__kmp_init_serial) {
4476 __kmp_serial_initialize();
4477 }
4478 return __kmp_target_offload;
4479}
4480
4481int __kmpc_pause_resource(kmp_pause_status_t level) {
4482 if (!__kmp_init_serial) {
4483 return 1; // Can't pause if runtime is not initialized
4484 }
4485 return __kmp_pause_resource(level);
4486}
4487
4488void __kmpc_error(ident_t *loc, int severity, const char *message) {
4489 if (!__kmp_init_serial)
4490 __kmp_serial_initialize();
4491
4492 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4493
4494#if OMPT_SUPPORT
4495 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4496 ompt_callbacks.ompt_callback(ompt_callback_error)(
4497 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4498 OMPT_GET_RETURN_ADDRESS(0));
4499 }
4500#endif // OMPT_SUPPORT
4501
4502 char *src_loc;
4503 if (loc && loc->psource) {
4504 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4505 src_loc =
4506 __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4507 __kmp_str_loc_free(&str_loc);
4508 } else {
4509 src_loc = __kmp_str_format("unknown");
4510 }
4511
4512 if (severity == severity_warning)
4513 KMP_WARNING(UserDirectedWarning, src_loc, message);
4514 else
4515 KMP_FATAL(UserDirectedError, src_loc, message);
4516
4517 __kmp_str_free(&src_loc);
4518}
4519
4520// Mark begin of scope directive.
4521void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4522// reserved is for extension of scope directive and not used.
4523#if OMPT_SUPPORT && OMPT_OPTIONAL
4524 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4525 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4526 int tid = __kmp_tid_from_gtid(gtid);
4527 ompt_callbacks.ompt_callback(ompt_callback_work)(
4528 ompt_work_scope, ompt_scope_begin,
4529 &(team->t.ompt_team_info.parallel_data),
4530 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4531 OMPT_GET_RETURN_ADDRESS(0));
4532 }
4533#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4534}
4535
4536// Mark end of scope directive
4537void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4538// reserved is for extension of scope directive and not used.
4539#if OMPT_SUPPORT && OMPT_OPTIONAL
4540 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4541 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4542 int tid = __kmp_tid_from_gtid(gtid);
4543 ompt_callbacks.ompt_callback(ompt_callback_work)(
4544 ompt_work_scope, ompt_scope_end,
4545 &(team->t.ompt_team_info.parallel_data),
4546 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4547 OMPT_GET_RETURN_ADDRESS(0));
4548 }
4549#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4550}
4551
4552#ifdef KMP_USE_VERSION_SYMBOLS
4553// For GOMP compatibility there are two versions of each omp_* API.
4554// One is the plain C symbol and one is the Fortran symbol with an appended
4555// underscore. When we implement a specific ompc_* version of an omp_*
4556// function, we want the plain GOMP versioned symbol to alias the ompc_* version
4557// instead of the Fortran versions in kmp_ftn_entry.h
4558extern "C" {
4559// Have to undef these from omp.h so they aren't translated into
4560// their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4561#ifdef omp_set_affinity_format
4562#undef omp_set_affinity_format
4563#endif
4564#ifdef omp_get_affinity_format
4565#undef omp_get_affinity_format
4566#endif
4567#ifdef omp_display_affinity
4568#undef omp_display_affinity
4569#endif
4570#ifdef omp_capture_affinity
4571#undef omp_capture_affinity
4572#endif
4573KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4574 "OMP_5.0");
4575KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4576 "OMP_5.0");
4577KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4578 "OMP_5.0");
4579KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4580 "OMP_5.0");
4581} // extern "C"
4582#endif
@ KMP_IDENT_WORK_LOOP
Definition kmp.h:227
@ KMP_IDENT_WORK_SECTIONS
Definition kmp.h:229
@ KMP_IDENT_AUTOPAR
Definition kmp.h:212
@ KMP_IDENT_WORK_DISTRIBUTE
Definition kmp.h:231
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, kmp_int32 cond, void *args)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition kmp.h:1743
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition kmp_stats.h:911
stats_state_e
the states which a thread can be in
Definition kmp_stats.h:63
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition kmp.h:247
char const * psource
Definition kmp.h:257
kmp_int32 flags
Definition kmp.h:249