LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_atomic.cpp
1/*
2 * kmp_atomic.cpp -- ATOMIC implementation routines
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp_atomic.h"
14#include "kmp.h" // TRUE, asm routines prototypes
15
16typedef unsigned char uchar;
17typedef unsigned short ushort;
18
561/*
562 * Global vars
563 */
564
565#ifndef KMP_GOMP_COMPAT
566int __kmp_atomic_mode = 1; // Intel perf
567#else
568int __kmp_atomic_mode = 2; // GOMP compatibility
569#endif /* KMP_GOMP_COMPAT */
570
571KMP_ALIGN(128)
572
573// Control access to all user coded atomics in Gnu compat mode
574kmp_atomic_lock_t __kmp_atomic_lock;
575// Control access to all user coded atomics for 1-byte fixed data types
576kmp_atomic_lock_t __kmp_atomic_lock_1i;
577// Control access to all user coded atomics for 2-byte fixed data types
578kmp_atomic_lock_t __kmp_atomic_lock_2i;
579// Control access to all user coded atomics for 4-byte fixed data types
580kmp_atomic_lock_t __kmp_atomic_lock_4i;
581// Control access to all user coded atomics for kmp_real32 data type
582kmp_atomic_lock_t __kmp_atomic_lock_4r;
583// Control access to all user coded atomics for 8-byte fixed data types
584kmp_atomic_lock_t __kmp_atomic_lock_8i;
585// Control access to all user coded atomics for kmp_real64 data type
586kmp_atomic_lock_t __kmp_atomic_lock_8r;
587// Control access to all user coded atomics for complex byte data type
588kmp_atomic_lock_t __kmp_atomic_lock_8c;
589// Control access to all user coded atomics for long double data type
590kmp_atomic_lock_t __kmp_atomic_lock_10r;
591// Control access to all user coded atomics for _Quad data type
592kmp_atomic_lock_t __kmp_atomic_lock_16r;
593// Control access to all user coded atomics for double complex data type
594kmp_atomic_lock_t __kmp_atomic_lock_16c;
595// Control access to all user coded atomics for long double complex type
596kmp_atomic_lock_t __kmp_atomic_lock_20c;
597// Control access to all user coded atomics for _Quad complex data type
598kmp_atomic_lock_t __kmp_atomic_lock_32c;
599
600/* 2007-03-02:
601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602 on *_32 and *_32e. This is just a temporary workaround for the problem. It
603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604 in assembler language. */
605#define KMP_ATOMIC_VOLATILE volatile
606
607#if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608
609static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610 return lhs.q + rhs.q;
611}
612static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613 return lhs.q - rhs.q;
614}
615static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616 return lhs.q * rhs.q;
617}
618static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619 return lhs.q / rhs.q;
620}
621static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622 return lhs.q < rhs.q;
623}
624static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625 return lhs.q > rhs.q;
626}
627
628static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629 return lhs.q + rhs.q;
630}
631static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632 return lhs.q - rhs.q;
633}
634static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635 return lhs.q * rhs.q;
636}
637static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638 return lhs.q / rhs.q;
639}
640static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641 return lhs.q < rhs.q;
642}
643static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644 return lhs.q > rhs.q;
645}
646
647static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648 kmp_cmplx128_a4_t &rhs) {
649 return lhs.q + rhs.q;
650}
651static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652 kmp_cmplx128_a4_t &rhs) {
653 return lhs.q - rhs.q;
654}
655static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656 kmp_cmplx128_a4_t &rhs) {
657 return lhs.q * rhs.q;
658}
659static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660 kmp_cmplx128_a4_t &rhs) {
661 return lhs.q / rhs.q;
662}
663
664static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665 kmp_cmplx128_a16_t &rhs) {
666 return lhs.q + rhs.q;
667}
668static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669 kmp_cmplx128_a16_t &rhs) {
670 return lhs.q - rhs.q;
671}
672static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673 kmp_cmplx128_a16_t &rhs) {
674 return lhs.q * rhs.q;
675}
676static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677 kmp_cmplx128_a16_t &rhs) {
678 return lhs.q / rhs.q;
679}
680
681#endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682
683// ATOMIC implementation routines -----------------------------------------
684// One routine for each operation and operand type.
685// All routines declarations looks like
686// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687
688#define KMP_CHECK_GTID \
689 if (gtid == KMP_GTID_UNKNOWN) { \
690 gtid = __kmp_entry_gtid(); \
691 } // check and get gtid when needed
692
693// Beginning of a definition (provides name, parameters, gebug trace)
694// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695// fixed)
696// OP_ID - operation identifier (add, sub, mul, ...)
697// TYPE - operands' type
698#define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
699 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
700 TYPE *lhs, TYPE rhs) { \
701 KMP_DEBUG_ASSERT(__kmp_init_serial); \
702 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703
704// ------------------------------------------------------------------------
705// Lock variables used for critical sections for various size operands
706#define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707#define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708#define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709#define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710#define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711#define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712#define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713#define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714#define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715#define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716#define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717#define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718#define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719
720// ------------------------------------------------------------------------
721// Operation on *lhs, rhs bound by critical section
722// OP - operator (it's supposed to contain an assignment)
723// LCK_ID - lock identifier
724// Note: don't check gtid as it should always be valid
725// 1, 2-byte - expect valid parameter, other - check before this macro
726#define OP_CRITICAL(OP, LCK_ID) \
727 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
728 \
729 (*lhs) OP(rhs); \
730 \
731 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732
733#define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
734 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
735 (*lhs) = (TYPE)((*lhs)OP rhs); \
736 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737
738// ------------------------------------------------------------------------
739// For GNU compatibility, we may need to use a critical section,
740// even though it is not required by the ISA.
741//
742// On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744// critical section. On Intel(R) 64, all atomic operations are done with fetch
745// and add or compare and exchange. Therefore, the FLAG parameter to this
746// macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747// require a critical section, where we predict that they will be implemented
748// in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749//
750// When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751// the FLAG parameter should always be 1. If we know that we will be using
752// a critical section, then we want to make certain that we use the generic
753// lock __kmp_atomic_lock to protect the atomic update, and not of of the
754// locks that are specialized based upon the size or type of the data.
755//
756// If FLAG is 0, then we are relying on dead code elimination by the build
757// compiler to get rid of the useless block of code, and save a needless
758// branch at runtime.
759
760#ifdef KMP_GOMP_COMPAT
761#define OP_GOMP_CRITICAL(OP, FLAG) \
762 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
763 KMP_CHECK_GTID; \
764 OP_CRITICAL(OP, 0); \
765 return; \
766 }
767
768#define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \
769 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
770 KMP_CHECK_GTID; \
771 OP_UPDATE_CRITICAL(TYPE, OP, 0); \
772 return; \
773 }
774#else
775#define OP_GOMP_CRITICAL(OP, FLAG)
776#define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777#endif /* KMP_GOMP_COMPAT */
778
779#if KMP_MIC
780#define KMP_DO_PAUSE _mm_delay_32(1)
781#else
782#define KMP_DO_PAUSE
783#endif /* KMP_MIC */
784
785// ------------------------------------------------------------------------
786// Operation on *lhs, rhs using "compare_and_store" routine
787// TYPE - operands' type
788// BITS - size in bits, used to distinguish low level calls
789// OP - operator
790#define OP_CMPXCHG(TYPE, BITS, OP) \
791 { \
792 TYPE old_value, new_value; \
793 old_value = *(TYPE volatile *)lhs; \
794 new_value = (TYPE)(old_value OP rhs); \
795 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
796 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
797 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
798 KMP_DO_PAUSE; \
799 \
800 old_value = *(TYPE volatile *)lhs; \
801 new_value = (TYPE)(old_value OP rhs); \
802 } \
803 }
804
805#if USE_CMPXCHG_FIX
806// 2007-06-25:
807// workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808// and win_32e are affected (I verified the asm). Compiler ignores the volatile
809// qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810// compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811// the workaround.
812#define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
813 { \
814 struct _sss { \
815 TYPE cmp; \
816 kmp_int##BITS *vvv; \
817 }; \
818 struct _sss old_value, new_value; \
819 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
820 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
821 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
822 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
823 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
824 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
825 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
826 KMP_DO_PAUSE; \
827 \
828 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
829 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
830 } \
831 }
832// end of the first part of the workaround for C78287
833#endif // USE_CMPXCHG_FIX
834
835#if KMP_OS_WINDOWS && KMP_ARCH_AARCH64
836// Undo explicit type casts to get MSVC ARM64 to build. Uses
837// OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
838#undef OP_CMPXCHG
839#define OP_CMPXCHG(TYPE, BITS, OP) \
840 { \
841 struct _sss { \
842 TYPE cmp; \
843 kmp_int##BITS *vvv; \
844 }; \
845 struct _sss old_value, new_value; \
846 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
847 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
848 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
849 new_value.cmp = old_value.cmp OP rhs; \
850 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
851 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
852 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
853 KMP_DO_PAUSE; \
854 \
855 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
856 new_value.cmp = old_value.cmp OP rhs; \
857 } \
858 }
859
860#undef OP_UPDATE_CRITICAL
861#define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
862 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
863 (*lhs) = (*lhs)OP rhs; \
864 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
865
866#endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64
867
868#if KMP_ARCH_X86 || KMP_ARCH_X86_64
869
870// ------------------------------------------------------------------------
871// X86 or X86_64: no alignment problems ====================================
872#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
873 GOMP_FLAG) \
874 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
875 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
876 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
877 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
878 }
879// -------------------------------------------------------------------------
880#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
881 GOMP_FLAG) \
882 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
883 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
884 OP_CMPXCHG(TYPE, BITS, OP) \
885 }
886#if USE_CMPXCHG_FIX
887// -------------------------------------------------------------------------
888// workaround for C78287 (complex(kind=4) data type)
889#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
890 MASK, GOMP_FLAG) \
891 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
892 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
893 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
894 }
895// end of the second part of the workaround for C78287
896#endif // USE_CMPXCHG_FIX
897
898#else
899// -------------------------------------------------------------------------
900// Code for other architectures that don't handle unaligned accesses.
901#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
902 GOMP_FLAG) \
903 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
904 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
905 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
906 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
907 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
908 } else { \
909 KMP_CHECK_GTID; \
910 OP_UPDATE_CRITICAL(TYPE, OP, \
911 LCK_ID) /* unaligned address - use critical */ \
912 } \
913 }
914// -------------------------------------------------------------------------
915#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
916 GOMP_FLAG) \
917 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
918 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
919 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
920 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
921 } else { \
922 KMP_CHECK_GTID; \
923 OP_UPDATE_CRITICAL(TYPE, OP, \
924 LCK_ID) /* unaligned address - use critical */ \
925 } \
926 }
927#if USE_CMPXCHG_FIX
928// -------------------------------------------------------------------------
929// workaround for C78287 (complex(kind=4) data type)
930#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
931 MASK, GOMP_FLAG) \
932 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
933 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
934 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
935 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
936 } else { \
937 KMP_CHECK_GTID; \
938 OP_UPDATE_CRITICAL(TYPE, OP, \
939 LCK_ID) /* unaligned address - use critical */ \
940 } \
941 }
942// end of the second part of the workaround for C78287
943#endif // USE_CMPXCHG_FIX
944#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
945
946// Routines for ATOMIC 4-byte operands addition and subtraction
947ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
948 0) // __kmpc_atomic_fixed4_add
949ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
950 0) // __kmpc_atomic_fixed4_sub
951
952ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
953 KMP_ARCH_X86) // __kmpc_atomic_float4_add
954ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
955 KMP_ARCH_X86) // __kmpc_atomic_float4_sub
956
957// Routines for ATOMIC 8-byte operands addition and subtraction
958ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
959 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
960ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
961 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
962
963ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
964 KMP_ARCH_X86) // __kmpc_atomic_float8_add
965ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
966 KMP_ARCH_X86) // __kmpc_atomic_float8_sub
967
968// ------------------------------------------------------------------------
969// Entries definition for integer operands
970// TYPE_ID - operands type and size (fixed4, float4)
971// OP_ID - operation identifier (add, sub, mul, ...)
972// TYPE - operand type
973// BITS - size in bits, used to distinguish low level calls
974// OP - operator (used in critical section)
975// LCK_ID - lock identifier, used to possibly distinguish lock variable
976// MASK - used for alignment check
977
978// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
979// ------------------------------------------------------------------------
980// Routines for ATOMIC integer operands, other operators
981// ------------------------------------------------------------------------
982// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
983ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
984 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
985ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
986 0) // __kmpc_atomic_fixed1_andb
987ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
988 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
989ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
990 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
991ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
992 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
993ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
994 0) // __kmpc_atomic_fixed1_orb
995ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
996 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
997ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
998 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
999ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
1000 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
1001ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
1002 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
1003ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
1004 0) // __kmpc_atomic_fixed1_xor
1005ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
1006 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
1007ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
1008 0) // __kmpc_atomic_fixed2_andb
1009ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
1010 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
1011ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
1012 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
1013ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
1014 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
1015ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
1016 0) // __kmpc_atomic_fixed2_orb
1017ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
1018 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
1019ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
1020 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
1021ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
1022 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
1023ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
1024 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
1025ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
1026 0) // __kmpc_atomic_fixed2_xor
1027ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
1028 0) // __kmpc_atomic_fixed4_andb
1029ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
1030 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
1031ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
1032 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1033ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1034 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1035ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1036 0) // __kmpc_atomic_fixed4_orb
1037ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1038 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1039ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1040 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1041ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1042 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1043ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1044 0) // __kmpc_atomic_fixed4_xor
1045ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1046 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1047ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1048 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1049ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1050 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1051ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1052 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1053ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1054 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1055ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1056 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1057ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1058 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1059ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1060 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1061ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1062 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1063ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1064 KMP_ARCH_X86) // __kmpc_atomic_float4_div
1065ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1066 KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1067ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1068 KMP_ARCH_X86) // __kmpc_atomic_float8_div
1069ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1070 KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1071// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1072
1073/* ------------------------------------------------------------------------ */
1074/* Routines for C/C++ Reduction operators && and || */
1075
1076// ------------------------------------------------------------------------
1077// Need separate macros for &&, || because there is no combined assignment
1078// TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1079#define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1080 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1081 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1082 OP_CRITICAL(= *lhs OP, LCK_ID) \
1083 }
1084
1085#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1086
1087// ------------------------------------------------------------------------
1088// X86 or X86_64: no alignment problems ===================================
1089#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1090 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1091 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1092 OP_CMPXCHG(TYPE, BITS, OP) \
1093 }
1094
1095#else
1096// ------------------------------------------------------------------------
1097// Code for other architectures that don't handle unaligned accesses.
1098#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1099 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1100 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1101 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1102 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1103 } else { \
1104 KMP_CHECK_GTID; \
1105 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1106 } \
1107 }
1108#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1109
1110ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1111 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1112ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1113 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1114ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1115 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1116ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1117 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1118ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1119 0) // __kmpc_atomic_fixed4_andl
1120ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1121 0) // __kmpc_atomic_fixed4_orl
1122ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1123 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1124ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1125 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1126
1127/* ------------------------------------------------------------------------- */
1128/* Routines for Fortran operators that matched no one in C: */
1129/* MAX, MIN, .EQV., .NEQV. */
1130/* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1131/* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1132
1133// -------------------------------------------------------------------------
1134// MIN and MAX need separate macros
1135// OP - operator to check if we need any actions?
1136#define MIN_MAX_CRITSECT(OP, LCK_ID) \
1137 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1138 \
1139 if (*lhs OP rhs) { /* still need actions? */ \
1140 *lhs = rhs; \
1141 } \
1142 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1143
1144// -------------------------------------------------------------------------
1145#ifdef KMP_GOMP_COMPAT
1146#define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1147 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1148 KMP_CHECK_GTID; \
1149 MIN_MAX_CRITSECT(OP, 0); \
1150 return; \
1151 }
1152#else
1153#define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1154#endif /* KMP_GOMP_COMPAT */
1155
1156// -------------------------------------------------------------------------
1157#define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1158 { \
1159 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1160 TYPE old_value; \
1161 temp_val = *lhs; \
1162 old_value = temp_val; \
1163 while (old_value OP rhs && /* still need actions? */ \
1164 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1165 (kmp_int##BITS *)lhs, \
1166 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1167 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1168 temp_val = *lhs; \
1169 old_value = temp_val; \
1170 } \
1171 }
1172
1173// -------------------------------------------------------------------------
1174// 1-byte, 2-byte operands - use critical section
1175#define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1176 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1177 if (*lhs OP rhs) { /* need actions? */ \
1178 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1179 MIN_MAX_CRITSECT(OP, LCK_ID) \
1180 } \
1181 }
1182
1183#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184
1185// -------------------------------------------------------------------------
1186// X86 or X86_64: no alignment problems ====================================
1187#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1188 GOMP_FLAG) \
1189 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1190 if (*lhs OP rhs) { \
1191 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1192 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1193 } \
1194 }
1195
1196#else
1197// -------------------------------------------------------------------------
1198// Code for other architectures that don't handle unaligned accesses.
1199#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1200 GOMP_FLAG) \
1201 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1202 if (*lhs OP rhs) { \
1203 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1204 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1205 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1206 } else { \
1207 KMP_CHECK_GTID; \
1208 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1209 } \
1210 } \
1211 }
1212#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1213
1214MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1215 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1216MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1217 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1218MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1219 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1220MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1221 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1222MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1223 0) // __kmpc_atomic_fixed4_max
1224MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1225 0) // __kmpc_atomic_fixed4_min
1226MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1227 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1228MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1229 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1230MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1231 KMP_ARCH_X86) // __kmpc_atomic_float4_max
1232MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1233 KMP_ARCH_X86) // __kmpc_atomic_float4_min
1234MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1235 KMP_ARCH_X86) // __kmpc_atomic_float8_max
1236MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1237 KMP_ARCH_X86) // __kmpc_atomic_float8_min
1238#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1239MIN_MAX_CRITICAL(float10, max, long double, <, 10r,
1240 1) // __kmpc_atomic_float10_max
1241MIN_MAX_CRITICAL(float10, min, long double, >, 10r,
1242 1) // __kmpc_atomic_float10_min
1243#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1244#if KMP_HAVE_QUAD
1245MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1246 1) // __kmpc_atomic_float16_max
1247MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1248 1) // __kmpc_atomic_float16_min
1249#if (KMP_ARCH_X86)
1250MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1251 1) // __kmpc_atomic_float16_max_a16
1252MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1253 1) // __kmpc_atomic_float16_min_a16
1254#endif // (KMP_ARCH_X86)
1255#endif // KMP_HAVE_QUAD
1256// ------------------------------------------------------------------------
1257// Need separate macros for .EQV. because of the need of complement (~)
1258// OP ignored for critical sections, ^=~ used instead
1259#define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1260 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1261 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1262 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \
1263 }
1264
1265// ------------------------------------------------------------------------
1266#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1267// ------------------------------------------------------------------------
1268// X86 or X86_64: no alignment problems ===================================
1269#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1270 GOMP_FLAG) \
1271 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1272 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1273 OP_CMPXCHG(TYPE, BITS, OP) \
1274 }
1275// ------------------------------------------------------------------------
1276#else
1277// ------------------------------------------------------------------------
1278// Code for other architectures that don't handle unaligned accesses.
1279#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1280 GOMP_FLAG) \
1281 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1282 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \
1283 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1284 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1285 } else { \
1286 KMP_CHECK_GTID; \
1287 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \
1288 } \
1289 }
1290#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1291
1292ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1293 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1294ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1295 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1296ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1297 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1298ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1299 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1300ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1301 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1302ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1303 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1304ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1305 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1306ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1307 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1308
1309// ------------------------------------------------------------------------
1310// Routines for Extended types: long double, _Quad, complex flavours (use
1311// critical section)
1312// TYPE_ID, OP_ID, TYPE - detailed above
1313// OP - operator
1314// LCK_ID - lock identifier, used to possibly distinguish lock variable
1315#define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1316 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1317 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1318 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1319 }
1320
1321/* ------------------------------------------------------------------------- */
1322#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1323// routines for long double type
1324ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1325 1) // __kmpc_atomic_float10_add
1326ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1327 1) // __kmpc_atomic_float10_sub
1328ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1329 1) // __kmpc_atomic_float10_mul
1330ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1331 1) // __kmpc_atomic_float10_div
1332#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1333#if KMP_HAVE_QUAD
1334// routines for _Quad type
1335ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1336 1) // __kmpc_atomic_float16_add
1337ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1338 1) // __kmpc_atomic_float16_sub
1339ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1340 1) // __kmpc_atomic_float16_mul
1341ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1342 1) // __kmpc_atomic_float16_div
1343#if (KMP_ARCH_X86)
1344ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1345 1) // __kmpc_atomic_float16_add_a16
1346ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1347 1) // __kmpc_atomic_float16_sub_a16
1348ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1349 1) // __kmpc_atomic_float16_mul_a16
1350ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1351 1) // __kmpc_atomic_float16_div_a16
1352#endif // (KMP_ARCH_X86)
1353#endif // KMP_HAVE_QUAD
1354// routines for complex types
1355
1356#if USE_CMPXCHG_FIX
1357// workaround for C78287 (complex(kind=4) data type)
1358ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1359 1) // __kmpc_atomic_cmplx4_add
1360ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1361 1) // __kmpc_atomic_cmplx4_sub
1362ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1363 1) // __kmpc_atomic_cmplx4_mul
1364ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1365 1) // __kmpc_atomic_cmplx4_div
1366// end of the workaround for C78287
1367#else
1368ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1369ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1370ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1371ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1372#endif // USE_CMPXCHG_FIX
1373
1374ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1375ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1376ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1377ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1378#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1379ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1380 1) // __kmpc_atomic_cmplx10_add
1381ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1382 1) // __kmpc_atomic_cmplx10_sub
1383ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1384 1) // __kmpc_atomic_cmplx10_mul
1385ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1386 1) // __kmpc_atomic_cmplx10_div
1387#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1388#if KMP_HAVE_QUAD
1389ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1390 1) // __kmpc_atomic_cmplx16_add
1391ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1392 1) // __kmpc_atomic_cmplx16_sub
1393ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1394 1) // __kmpc_atomic_cmplx16_mul
1395ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1396 1) // __kmpc_atomic_cmplx16_div
1397#if (KMP_ARCH_X86)
1398ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1399 1) // __kmpc_atomic_cmplx16_add_a16
1400ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1401 1) // __kmpc_atomic_cmplx16_sub_a16
1402ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1403 1) // __kmpc_atomic_cmplx16_mul_a16
1404ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1405 1) // __kmpc_atomic_cmplx16_div_a16
1406#endif // (KMP_ARCH_X86)
1407#endif // KMP_HAVE_QUAD
1408
1409// OpenMP 4.0: x = expr binop x for non-commutative operations.
1410// Supported only on IA-32 architecture and Intel(R) 64
1411#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1412
1413// ------------------------------------------------------------------------
1414// Operation on *lhs, rhs bound by critical section
1415// OP - operator (it's supposed to contain an assignment)
1416// LCK_ID - lock identifier
1417// Note: don't check gtid as it should always be valid
1418// 1, 2-byte - expect valid parameter, other - check before this macro
1419#define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1420 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1421 \
1422 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
1423 \
1424 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1425
1426#ifdef KMP_GOMP_COMPAT
1427#define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \
1428 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1429 KMP_CHECK_GTID; \
1430 OP_CRITICAL_REV(TYPE, OP, 0); \
1431 return; \
1432 }
1433
1434#else
1435#define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1436#endif /* KMP_GOMP_COMPAT */
1437
1438// Beginning of a definition (provides name, parameters, gebug trace)
1439// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1440// fixed)
1441// OP_ID - operation identifier (add, sub, mul, ...)
1442// TYPE - operands' type
1443#define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1444 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1445 TYPE *lhs, TYPE rhs) { \
1446 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1447 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1448
1449// ------------------------------------------------------------------------
1450// Operation on *lhs, rhs using "compare_and_store" routine
1451// TYPE - operands' type
1452// BITS - size in bits, used to distinguish low level calls
1453// OP - operator
1454// Note: temp_val introduced in order to force the compiler to read
1455// *lhs only once (w/o it the compiler reads *lhs twice)
1456#define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1457 { \
1458 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1459 TYPE old_value, new_value; \
1460 temp_val = *lhs; \
1461 old_value = temp_val; \
1462 new_value = (TYPE)(rhs OP old_value); \
1463 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1464 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1465 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1466 KMP_DO_PAUSE; \
1467 \
1468 temp_val = *lhs; \
1469 old_value = temp_val; \
1470 new_value = (TYPE)(rhs OP old_value); \
1471 } \
1472 }
1473
1474// -------------------------------------------------------------------------
1475#define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1476 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1477 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1478 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1479 }
1480
1481// ------------------------------------------------------------------------
1482// Entries definition for integer operands
1483// TYPE_ID - operands type and size (fixed4, float4)
1484// OP_ID - operation identifier (add, sub, mul, ...)
1485// TYPE - operand type
1486// BITS - size in bits, used to distinguish low level calls
1487// OP - operator (used in critical section)
1488// LCK_ID - lock identifier, used to possibly distinguish lock variable
1489
1490// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1491// ------------------------------------------------------------------------
1492// Routines for ATOMIC integer operands, other operators
1493// ------------------------------------------------------------------------
1494// TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1495ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1496 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1497ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1498 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1499ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1500 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1501ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1502 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1503ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1504 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1505ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1506 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1507
1508ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1509 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1510ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1511 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1512ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1513 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1514ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1515 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1516ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1517 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1518ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1519 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1520
1521ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1522 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1523ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1524 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1525ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1526 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1527ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1528 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1529ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1530 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1531ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1532 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1533
1534ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1535 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1536ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1537 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1538ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1539 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1540ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1541 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1542ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1543 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1544ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1545 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1546
1547ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1548 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1549ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1550 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1551
1552ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1553 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1554ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1555 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1556// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1557
1558// ------------------------------------------------------------------------
1559// Routines for Extended types: long double, _Quad, complex flavours (use
1560// critical section)
1561// TYPE_ID, OP_ID, TYPE - detailed above
1562// OP - operator
1563// LCK_ID - lock identifier, used to possibly distinguish lock variable
1564#define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1565 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1566 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1567 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1568 }
1569
1570/* ------------------------------------------------------------------------- */
1571// routines for long double type
1572ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1573 1) // __kmpc_atomic_float10_sub_rev
1574ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1575 1) // __kmpc_atomic_float10_div_rev
1576#if KMP_HAVE_QUAD
1577// routines for _Quad type
1578ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1579 1) // __kmpc_atomic_float16_sub_rev
1580ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1581 1) // __kmpc_atomic_float16_div_rev
1582#if (KMP_ARCH_X86)
1583ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1584 1) // __kmpc_atomic_float16_sub_a16_rev
1585ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1586 1) // __kmpc_atomic_float16_div_a16_rev
1587#endif // KMP_ARCH_X86
1588#endif // KMP_HAVE_QUAD
1589
1590// routines for complex types
1591ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1592 1) // __kmpc_atomic_cmplx4_sub_rev
1593ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1594 1) // __kmpc_atomic_cmplx4_div_rev
1595ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1596 1) // __kmpc_atomic_cmplx8_sub_rev
1597ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1598 1) // __kmpc_atomic_cmplx8_div_rev
1599ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1600 1) // __kmpc_atomic_cmplx10_sub_rev
1601ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1602 1) // __kmpc_atomic_cmplx10_div_rev
1603#if KMP_HAVE_QUAD
1604ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1605 1) // __kmpc_atomic_cmplx16_sub_rev
1606ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1607 1) // __kmpc_atomic_cmplx16_div_rev
1608#if (KMP_ARCH_X86)
1609ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1610 1) // __kmpc_atomic_cmplx16_sub_a16_rev
1611ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1612 1) // __kmpc_atomic_cmplx16_div_a16_rev
1613#endif // KMP_ARCH_X86
1614#endif // KMP_HAVE_QUAD
1615
1616#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1617// End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1618
1619/* ------------------------------------------------------------------------ */
1620/* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1621/* Note: in order to reduce the total number of types combinations */
1622/* it is supposed that compiler converts RHS to longest floating type,*/
1623/* that is _Quad, before call to any of these routines */
1624/* Conversion to _Quad will be done by the compiler during calculation, */
1625/* conversion back to TYPE - before the assignment, like: */
1626/* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1627/* Performance penalty expected because of SW emulation use */
1628/* ------------------------------------------------------------------------ */
1629
1630#define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1631 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1632 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1633 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1634 KA_TRACE(100, \
1635 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1636 gtid));
1637
1638// -------------------------------------------------------------------------
1639#define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1640 GOMP_FLAG) \
1641 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1642 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1643 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1644 }
1645
1646// -------------------------------------------------------------------------
1647#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1648// -------------------------------------------------------------------------
1649// X86 or X86_64: no alignment problems ====================================
1650#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1651 LCK_ID, MASK, GOMP_FLAG) \
1652 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1653 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1654 OP_CMPXCHG(TYPE, BITS, OP) \
1655 }
1656// -------------------------------------------------------------------------
1657#else
1658// ------------------------------------------------------------------------
1659// Code for other architectures that don't handle unaligned accesses.
1660#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1661 LCK_ID, MASK, GOMP_FLAG) \
1662 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1663 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1664 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1665 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1666 } else { \
1667 KMP_CHECK_GTID; \
1668 OP_UPDATE_CRITICAL(TYPE, OP, \
1669 LCK_ID) /* unaligned address - use critical */ \
1670 } \
1671 }
1672#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1673
1674// -------------------------------------------------------------------------
1675#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1676// -------------------------------------------------------------------------
1677#define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1678 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1679 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1680 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1681 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1682 }
1683#define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1684 LCK_ID, GOMP_FLAG) \
1685 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1686 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1687 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1688 }
1689#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1690
1691// RHS=float8
1692ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1693 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1694ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1695 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1696ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1697 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1698ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1699 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1700ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1701 0) // __kmpc_atomic_fixed4_mul_float8
1702ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1703 0) // __kmpc_atomic_fixed4_div_float8
1704ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1705 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1706ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1707 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1708ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1709 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1710ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1711 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1712ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1713 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1714ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1715 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1716
1717// RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1718// use them)
1719#if KMP_HAVE_QUAD
1720ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1721 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1722ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1723 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1724ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1725 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1726ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1727 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1728ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1729 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1730ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1731 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1732ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1733 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1734ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1735 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1736
1737ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1738 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1739ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1740 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1741ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1742 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1743ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1744 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1745ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1746 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1747ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1748 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1749ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1750 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1751ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1752 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1753
1754ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1755 0) // __kmpc_atomic_fixed4_add_fp
1756ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1757 0) // __kmpc_atomic_fixed4u_add_fp
1758ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1759 0) // __kmpc_atomic_fixed4_sub_fp
1760ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1761 0) // __kmpc_atomic_fixed4u_sub_fp
1762ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1763 0) // __kmpc_atomic_fixed4_mul_fp
1764ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1765 0) // __kmpc_atomic_fixed4u_mul_fp
1766ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1767 0) // __kmpc_atomic_fixed4_div_fp
1768ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1769 0) // __kmpc_atomic_fixed4u_div_fp
1770
1771ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1772 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1773ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1774 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1775ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1776 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1777ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1778 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1779ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1780 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1781ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1782 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1783ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1784 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1785ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1786 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1787
1788ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1789 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1790ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1791 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1792ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1793 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1794ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1795 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1796
1797ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1798 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1799ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1800 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1801ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1802 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1803ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1804 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1805
1806#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1807ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1808 1) // __kmpc_atomic_float10_add_fp
1809ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1810 1) // __kmpc_atomic_float10_sub_fp
1811ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1812 1) // __kmpc_atomic_float10_mul_fp
1813ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1814 1) // __kmpc_atomic_float10_div_fp
1815
1816// Reverse operations
1817ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1818 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1819ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1820 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1821ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1822 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1823ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1824 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1825
1826ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1827 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1828ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1829 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1830ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1831 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1832ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1833 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1834
1835ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1836 0) // __kmpc_atomic_fixed4_sub_rev_fp
1837ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1838 0) // __kmpc_atomic_fixed4u_sub_rev_fp
1839ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1840 0) // __kmpc_atomic_fixed4_div_rev_fp
1841ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1842 0) // __kmpc_atomic_fixed4u_div_rev_fp
1843
1844ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1845 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1846ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1847 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1848ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1849 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1850ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1851 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1852
1853ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1854 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1855ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1856 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1857
1858ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1859 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1860ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1861 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1862
1863ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1864 1) // __kmpc_atomic_float10_sub_rev_fp
1865ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1866 1) // __kmpc_atomic_float10_div_rev_fp
1867#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1868
1869#endif // KMP_HAVE_QUAD
1870
1871#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1872// ------------------------------------------------------------------------
1873// X86 or X86_64: no alignment problems ====================================
1874#if USE_CMPXCHG_FIX
1875// workaround for C78287 (complex(kind=4) data type)
1876#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1877 LCK_ID, MASK, GOMP_FLAG) \
1878 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1879 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1880 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1881 }
1882// end of the second part of the workaround for C78287
1883#else
1884#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1885 LCK_ID, MASK, GOMP_FLAG) \
1886 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1887 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1888 OP_CMPXCHG(TYPE, BITS, OP) \
1889 }
1890#endif // USE_CMPXCHG_FIX
1891#else
1892// ------------------------------------------------------------------------
1893// Code for other architectures that don't handle unaligned accesses.
1894#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1895 LCK_ID, MASK, GOMP_FLAG) \
1896 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1897 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1898 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1899 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1900 } else { \
1901 KMP_CHECK_GTID; \
1902 OP_UPDATE_CRITICAL(TYPE, OP, \
1903 LCK_ID) /* unaligned address - use critical */ \
1904 } \
1905 }
1906#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1907
1908ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1909 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1910ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1911 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1912ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1913 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1914ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1915 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1916
1917// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1918#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1919
1920// ------------------------------------------------------------------------
1921// Atomic READ routines
1922
1923// ------------------------------------------------------------------------
1924// Beginning of a definition (provides name, parameters, gebug trace)
1925// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1926// fixed)
1927// OP_ID - operation identifier (add, sub, mul, ...)
1928// TYPE - operands' type
1929#define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1930 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1931 TYPE *loc) { \
1932 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1933 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1934
1935// ------------------------------------------------------------------------
1936// Operation on *lhs, rhs using "compare_and_store_ret" routine
1937// TYPE - operands' type
1938// BITS - size in bits, used to distinguish low level calls
1939// OP - operator
1940// Note: temp_val introduced in order to force the compiler to read
1941// *lhs only once (w/o it the compiler reads *lhs twice)
1942// TODO: check if it is still necessary
1943// Return old value regardless of the result of "compare & swap# operation
1944#define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1945 { \
1946 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1947 union f_i_union { \
1948 TYPE f_val; \
1949 kmp_int##BITS i_val; \
1950 }; \
1951 union f_i_union old_value; \
1952 temp_val = *loc; \
1953 old_value.f_val = temp_val; \
1954 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1955 (kmp_int##BITS *)loc, \
1956 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1957 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1958 new_value = old_value.f_val; \
1959 return new_value; \
1960 }
1961
1962// -------------------------------------------------------------------------
1963// Operation on *lhs, rhs bound by critical section
1964// OP - operator (it's supposed to contain an assignment)
1965// LCK_ID - lock identifier
1966// Note: don't check gtid as it should always be valid
1967// 1, 2-byte - expect valid parameter, other - check before this macro
1968#define OP_CRITICAL_READ(OP, LCK_ID) \
1969 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1970 \
1971 new_value = (*loc); \
1972 \
1973 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1974
1975// -------------------------------------------------------------------------
1976#ifdef KMP_GOMP_COMPAT
1977#define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1978 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1979 KMP_CHECK_GTID; \
1980 OP_CRITICAL_READ(OP, 0); \
1981 return new_value; \
1982 }
1983#else
1984#define OP_GOMP_CRITICAL_READ(OP, FLAG)
1985#endif /* KMP_GOMP_COMPAT */
1986
1987// -------------------------------------------------------------------------
1988#define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1989 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1990 TYPE new_value; \
1991 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1992 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1993 return new_value; \
1994 }
1995// -------------------------------------------------------------------------
1996#define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1997 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1998 TYPE new_value; \
1999 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
2000 OP_CMPXCHG_READ(TYPE, BITS, OP) \
2001 }
2002// ------------------------------------------------------------------------
2003// Routines for Extended types: long double, _Quad, complex flavours (use
2004// critical section)
2005// TYPE_ID, OP_ID, TYPE - detailed above
2006// OP - operator
2007// LCK_ID - lock identifier, used to possibly distinguish lock variable
2008#define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2009 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
2010 TYPE new_value; \
2011 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
2012 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
2013 return new_value; \
2014 }
2015
2016// ------------------------------------------------------------------------
2017// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
2018// value doesn't work.
2019// Let's return the read value through the additional parameter.
2020#if (KMP_OS_WINDOWS)
2021
2022#define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
2023 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2024 \
2025 (*out) = (*loc); \
2026 \
2027 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2028// ------------------------------------------------------------------------
2029#ifdef KMP_GOMP_COMPAT
2030#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
2031 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2032 KMP_CHECK_GTID; \
2033 OP_CRITICAL_READ_WRK(OP, 0); \
2034 }
2035#else
2036#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
2037#endif /* KMP_GOMP_COMPAT */
2038// ------------------------------------------------------------------------
2039#define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2040 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
2041 TYPE *loc) { \
2042 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2043 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2044
2045// ------------------------------------------------------------------------
2046#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2047 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2048 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
2049 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
2050 }
2051
2052#endif // KMP_OS_WINDOWS
2053
2054// ------------------------------------------------------------------------
2055// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2056ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2057ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2058 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2059ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2060 KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2061ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2062 KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2063
2064// !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2065ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2066 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2067ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2068 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2069
2070ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2071 1) // __kmpc_atomic_float10_rd
2072#if KMP_HAVE_QUAD
2073ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2074 1) // __kmpc_atomic_float16_rd
2075#endif // KMP_HAVE_QUAD
2076
2077// Fix for CQ220361 on Windows* OS
2078#if (KMP_OS_WINDOWS)
2079ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2080 1) // __kmpc_atomic_cmplx4_rd
2081#else
2082ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2083 1) // __kmpc_atomic_cmplx4_rd
2084#endif // (KMP_OS_WINDOWS)
2085ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2086 1) // __kmpc_atomic_cmplx8_rd
2087ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2088 1) // __kmpc_atomic_cmplx10_rd
2089#if KMP_HAVE_QUAD
2090ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2091 1) // __kmpc_atomic_cmplx16_rd
2092#if (KMP_ARCH_X86)
2093ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2094 1) // __kmpc_atomic_float16_a16_rd
2095ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2096 1) // __kmpc_atomic_cmplx16_a16_rd
2097#endif // (KMP_ARCH_X86)
2098#endif // KMP_HAVE_QUAD
2099
2100// ------------------------------------------------------------------------
2101// Atomic WRITE routines
2102
2103#define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2104 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2105 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2106 KMP_XCHG_FIXED##BITS(lhs, rhs); \
2107 }
2108// ------------------------------------------------------------------------
2109#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2110 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2111 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2112 KMP_XCHG_REAL##BITS(lhs, rhs); \
2113 }
2114
2115// ------------------------------------------------------------------------
2116// Operation on *lhs, rhs using "compare_and_store" routine
2117// TYPE - operands' type
2118// BITS - size in bits, used to distinguish low level calls
2119// OP - operator
2120// Note: temp_val introduced in order to force the compiler to read
2121// *lhs only once (w/o it the compiler reads *lhs twice)
2122#define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2123 { \
2124 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2125 TYPE old_value, new_value; \
2126 temp_val = *lhs; \
2127 old_value = temp_val; \
2128 new_value = rhs; \
2129 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2130 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2131 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2132 temp_val = *lhs; \
2133 old_value = temp_val; \
2134 new_value = rhs; \
2135 } \
2136 }
2137
2138// -------------------------------------------------------------------------
2139#define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2140 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2141 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2142 OP_CMPXCHG_WR(TYPE, BITS, OP) \
2143 }
2144
2145// ------------------------------------------------------------------------
2146// Routines for Extended types: long double, _Quad, complex flavours (use
2147// critical section)
2148// TYPE_ID, OP_ID, TYPE - detailed above
2149// OP - operator
2150// LCK_ID - lock identifier, used to possibly distinguish lock variable
2151#define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2152 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2153 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2154 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2155 }
2156// -------------------------------------------------------------------------
2157
2158ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2159 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2160ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2161 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2162ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2163 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2164#if (KMP_ARCH_X86)
2165ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2166 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2167#else
2168ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2169 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2170#endif // (KMP_ARCH_X86)
2171
2172ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2173 KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2174#if (KMP_ARCH_X86)
2175ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2176 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2177#else
2178ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2179 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2180#endif // (KMP_ARCH_X86)
2181
2182ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2183 1) // __kmpc_atomic_float10_wr
2184#if KMP_HAVE_QUAD
2185ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2186 1) // __kmpc_atomic_float16_wr
2187#endif // KMP_HAVE_QUAD
2188ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2189ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2190 1) // __kmpc_atomic_cmplx8_wr
2191ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2192 1) // __kmpc_atomic_cmplx10_wr
2193#if KMP_HAVE_QUAD
2194ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2195 1) // __kmpc_atomic_cmplx16_wr
2196#if (KMP_ARCH_X86)
2197ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2198 1) // __kmpc_atomic_float16_a16_wr
2199ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2200 1) // __kmpc_atomic_cmplx16_a16_wr
2201#endif // (KMP_ARCH_X86)
2202#endif // KMP_HAVE_QUAD
2203
2204// ------------------------------------------------------------------------
2205// Atomic CAPTURE routines
2206
2207// Beginning of a definition (provides name, parameters, gebug trace)
2208// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2209// fixed)
2210// OP_ID - operation identifier (add, sub, mul, ...)
2211// TYPE - operands' type
2212#define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2213 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2214 TYPE *lhs, TYPE rhs, int flag) { \
2215 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2216 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2217
2218// -------------------------------------------------------------------------
2219// Operation on *lhs, rhs bound by critical section
2220// OP - operator (it's supposed to contain an assignment)
2221// LCK_ID - lock identifier
2222// Note: don't check gtid as it should always be valid
2223// 1, 2-byte - expect valid parameter, other - check before this macro
2224#define OP_CRITICAL_CPT(OP, LCK_ID) \
2225 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2226 \
2227 if (flag) { \
2228 (*lhs) OP rhs; \
2229 new_value = (*lhs); \
2230 } else { \
2231 new_value = (*lhs); \
2232 (*lhs) OP rhs; \
2233 } \
2234 \
2235 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2236 return new_value;
2237
2238#define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \
2239 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2240 \
2241 if (flag) { \
2242 (*lhs) = (TYPE)((*lhs)OP rhs); \
2243 new_value = (*lhs); \
2244 } else { \
2245 new_value = (*lhs); \
2246 (*lhs) = (TYPE)((*lhs)OP rhs); \
2247 } \
2248 \
2249 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2250 return new_value;
2251
2252// ------------------------------------------------------------------------
2253#ifdef KMP_GOMP_COMPAT
2254#define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \
2255 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2256 KMP_CHECK_GTID; \
2257 OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \
2258 }
2259#else
2260#define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2261#endif /* KMP_GOMP_COMPAT */
2262
2263// ------------------------------------------------------------------------
2264// Operation on *lhs, rhs using "compare_and_store" routine
2265// TYPE - operands' type
2266// BITS - size in bits, used to distinguish low level calls
2267// OP - operator
2268// Note: temp_val introduced in order to force the compiler to read
2269// *lhs only once (w/o it the compiler reads *lhs twice)
2270#define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2271 { \
2272 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2273 TYPE old_value, new_value; \
2274 temp_val = *lhs; \
2275 old_value = temp_val; \
2276 new_value = (TYPE)(old_value OP rhs); \
2277 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2278 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2279 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2280 temp_val = *lhs; \
2281 old_value = temp_val; \
2282 new_value = (TYPE)(old_value OP rhs); \
2283 } \
2284 if (flag) { \
2285 return new_value; \
2286 } else \
2287 return old_value; \
2288 }
2289
2290// -------------------------------------------------------------------------
2291#define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2292 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2293 TYPE new_value; \
2294 (void)new_value; \
2295 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2296 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2297 }
2298
2299// -------------------------------------------------------------------------
2300#define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2301 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2302 TYPE old_value, new_value; \
2303 (void)new_value; \
2304 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2305 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2306 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2307 if (flag) { \
2308 return old_value OP rhs; \
2309 } else \
2310 return old_value; \
2311 }
2312// -------------------------------------------------------------------------
2313
2314ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2315 0) // __kmpc_atomic_fixed4_add_cpt
2316ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2317 0) // __kmpc_atomic_fixed4_sub_cpt
2318ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2319 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2320ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2321 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2322
2323ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2324 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2325ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2326 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2327ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2328 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2329ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2330 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2331
2332// ------------------------------------------------------------------------
2333// Entries definition for integer operands
2334// TYPE_ID - operands type and size (fixed4, float4)
2335// OP_ID - operation identifier (add, sub, mul, ...)
2336// TYPE - operand type
2337// BITS - size in bits, used to distinguish low level calls
2338// OP - operator (used in critical section)
2339// TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2340// ------------------------------------------------------------------------
2341// Routines for ATOMIC integer operands, other operators
2342// ------------------------------------------------------------------------
2343// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2344ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2345 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2346ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2347 0) // __kmpc_atomic_fixed1_andb_cpt
2348ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2349 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2350ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2351 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2352ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2353 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2354ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2355 0) // __kmpc_atomic_fixed1_orb_cpt
2356ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2357 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2358ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2359 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2360ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2361 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2362ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2363 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2364ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2365 0) // __kmpc_atomic_fixed1_xor_cpt
2366ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2367 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2368ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2369 0) // __kmpc_atomic_fixed2_andb_cpt
2370ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2371 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2372ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2373 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2374ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2375 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2376ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2377 0) // __kmpc_atomic_fixed2_orb_cpt
2378ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2379 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2380ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2381 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2382ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2383 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2384ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2385 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2386ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2387 0) // __kmpc_atomic_fixed2_xor_cpt
2388ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2389 0) // __kmpc_atomic_fixed4_andb_cpt
2390ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2391 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2392ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2393 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2394ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2395 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2396ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2397 0) // __kmpc_atomic_fixed4_orb_cpt
2398ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2399 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2400ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2401 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2402ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2403 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2404ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2405 0) // __kmpc_atomic_fixed4_xor_cpt
2406ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2407 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2408ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2409 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2410ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2411 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2412ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2413 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2414ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2415 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2416ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2417 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2418ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2419 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2420ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2421 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2422ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2423 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2424ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2425 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2426ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2427 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2428ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2429 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2430ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2431 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2432// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2433
2434// CAPTURE routines for mixed types RHS=float16
2435#if KMP_HAVE_QUAD
2436
2437// Beginning of a definition (provides name, parameters, gebug trace)
2438// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2439// fixed)
2440// OP_ID - operation identifier (add, sub, mul, ...)
2441// TYPE - operands' type
2442#define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2443 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2444 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2445 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2446 KA_TRACE(100, \
2447 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2448 gtid));
2449
2450// -------------------------------------------------------------------------
2451#define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2452 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2453 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2454 TYPE new_value; \
2455 (void)new_value; \
2456 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2457 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2458 }
2459
2460// -------------------------------------------------------------------------
2461#define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2462 LCK_ID, GOMP_FLAG) \
2463 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2464 TYPE new_value; \
2465 (void)new_value; \
2466 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2467 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2468 }
2469
2470ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2471 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2472ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2473 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2474ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2475 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2476ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2477 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2478ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2479 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2480ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2481 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2482ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2483 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2484ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2485 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2486
2487ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2488 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2489ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2490 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2491ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2492 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2493ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2494 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2495ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2496 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2497ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2498 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2499ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2500 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2501ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2502 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2503
2504ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2505 0) // __kmpc_atomic_fixed4_add_cpt_fp
2506ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2507 0) // __kmpc_atomic_fixed4u_add_cpt_fp
2508ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2509 0) // __kmpc_atomic_fixed4_sub_cpt_fp
2510ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2511 0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2512ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2513 0) // __kmpc_atomic_fixed4_mul_cpt_fp
2514ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2515 0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2516ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2517 0) // __kmpc_atomic_fixed4_div_cpt_fp
2518ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2519 0) // __kmpc_atomic_fixed4u_div_cpt_fp
2520
2521ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2522 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2523ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2524 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2525ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2526 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2527ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2528 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2529ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2530 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2531ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2532 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2533ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2534 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2535ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2536 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2537
2538ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2539 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2540ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2541 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2542ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2543 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2544ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2545 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2546
2547ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2548 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2549ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2550 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2551ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2552 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2553ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2554 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2555
2556ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2557 1) // __kmpc_atomic_float10_add_cpt_fp
2558ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2559 1) // __kmpc_atomic_float10_sub_cpt_fp
2560ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2561 1) // __kmpc_atomic_float10_mul_cpt_fp
2562ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2563 1) // __kmpc_atomic_float10_div_cpt_fp
2564
2565#endif // KMP_HAVE_QUAD
2566
2567// ------------------------------------------------------------------------
2568// Routines for C/C++ Reduction operators && and ||
2569
2570// -------------------------------------------------------------------------
2571// Operation on *lhs, rhs bound by critical section
2572// OP - operator (it's supposed to contain an assignment)
2573// LCK_ID - lock identifier
2574// Note: don't check gtid as it should always be valid
2575// 1, 2-byte - expect valid parameter, other - check before this macro
2576#define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2577 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2578 \
2579 if (flag) { \
2580 new_value OP rhs; \
2581 (*lhs) = new_value; \
2582 } else { \
2583 new_value = (*lhs); \
2584 (*lhs) OP rhs; \
2585 } \
2586 \
2587 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2588
2589// ------------------------------------------------------------------------
2590#ifdef KMP_GOMP_COMPAT
2591#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2592 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2593 KMP_CHECK_GTID; \
2594 OP_CRITICAL_L_CPT(OP, 0); \
2595 return new_value; \
2596 }
2597#else
2598#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2599#endif /* KMP_GOMP_COMPAT */
2600
2601// ------------------------------------------------------------------------
2602// Need separate macros for &&, || because there is no combined assignment
2603#define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2604 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2605 TYPE new_value; \
2606 (void)new_value; \
2607 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2608 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2609 }
2610
2611ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2612 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2613ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2614 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2615ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2616 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2617ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2618 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2619ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2620 0) // __kmpc_atomic_fixed4_andl_cpt
2621ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2622 0) // __kmpc_atomic_fixed4_orl_cpt
2623ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2624 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2625ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2626 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2627
2628// -------------------------------------------------------------------------
2629// Routines for Fortran operators that matched no one in C:
2630// MAX, MIN, .EQV., .NEQV.
2631// Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2632// Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2633
2634// -------------------------------------------------------------------------
2635// MIN and MAX need separate macros
2636// OP - operator to check if we need any actions?
2637#define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2638 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2639 \
2640 if (*lhs OP rhs) { /* still need actions? */ \
2641 old_value = *lhs; \
2642 *lhs = rhs; \
2643 if (flag) \
2644 new_value = rhs; \
2645 else \
2646 new_value = old_value; \
2647 } else { \
2648 new_value = *lhs; \
2649 } \
2650 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2651 return new_value;
2652
2653// -------------------------------------------------------------------------
2654#ifdef KMP_GOMP_COMPAT
2655#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2656 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2657 KMP_CHECK_GTID; \
2658 MIN_MAX_CRITSECT_CPT(OP, 0); \
2659 }
2660#else
2661#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2662#endif /* KMP_GOMP_COMPAT */
2663
2664// -------------------------------------------------------------------------
2665#define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2666 { \
2667 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2668 /*TYPE old_value; */ \
2669 temp_val = *lhs; \
2670 old_value = temp_val; \
2671 while (old_value OP rhs && /* still need actions? */ \
2672 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2673 (kmp_int##BITS *)lhs, \
2674 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2675 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2676 temp_val = *lhs; \
2677 old_value = temp_val; \
2678 } \
2679 if (flag) \
2680 return rhs; \
2681 else \
2682 return old_value; \
2683 }
2684
2685// -------------------------------------------------------------------------
2686// 1-byte, 2-byte operands - use critical section
2687#define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2688 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2689 TYPE new_value, old_value; \
2690 if (*lhs OP rhs) { /* need actions? */ \
2691 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2692 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2693 } \
2694 return *lhs; \
2695 }
2696
2697#define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2698 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2699 TYPE new_value, old_value; \
2700 (void)new_value; \
2701 if (*lhs OP rhs) { \
2702 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2703 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2704 } \
2705 return *lhs; \
2706 }
2707
2708MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2709 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2710MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2711 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2712MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2713 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2714MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2715 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2716MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2717 0) // __kmpc_atomic_fixed4_max_cpt
2718MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2719 0) // __kmpc_atomic_fixed4_min_cpt
2720MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2721 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2722MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2723 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2724MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2725 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2726MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2727 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2728MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2729 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2730MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2731 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2732MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r,
2733 1) // __kmpc_atomic_float10_max_cpt
2734MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r,
2735 1) // __kmpc_atomic_float10_min_cpt
2736#if KMP_HAVE_QUAD
2737MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2738 1) // __kmpc_atomic_float16_max_cpt
2739MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2740 1) // __kmpc_atomic_float16_min_cpt
2741#if (KMP_ARCH_X86)
2742MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2743 1) // __kmpc_atomic_float16_max_a16_cpt
2744MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2745 1) // __kmpc_atomic_float16_mix_a16_cpt
2746#endif // (KMP_ARCH_X86)
2747#endif // KMP_HAVE_QUAD
2748
2749// ------------------------------------------------------------------------
2750#ifdef KMP_GOMP_COMPAT
2751#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2752 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2753 KMP_CHECK_GTID; \
2754 OP_CRITICAL_CPT(OP, 0); \
2755 }
2756#else
2757#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2758#endif /* KMP_GOMP_COMPAT */
2759// ------------------------------------------------------------------------
2760#define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2761 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2762 TYPE new_value; \
2763 (void)new_value; \
2764 OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
2765 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2766 }
2767
2768// ------------------------------------------------------------------------
2769
2770ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2771 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2772ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2773 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2774ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2775 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2776ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2777 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2778ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2779 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2780ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2781 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2782ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2783 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2784ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2785 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2786
2787// ------------------------------------------------------------------------
2788// Routines for Extended types: long double, _Quad, complex flavours (use
2789// critical section)
2790// TYPE_ID, OP_ID, TYPE - detailed above
2791// OP - operator
2792// LCK_ID - lock identifier, used to possibly distinguish lock variable
2793#define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2794 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2795 TYPE new_value; \
2796 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2797 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2798 }
2799
2800// ------------------------------------------------------------------------
2801// Workaround for cmplx4. Regular routines with return value don't work
2802// on Win_32e. Let's return captured values through the additional parameter.
2803#define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2804 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2805 \
2806 if (flag) { \
2807 (*lhs) OP rhs; \
2808 (*out) = (*lhs); \
2809 } else { \
2810 (*out) = (*lhs); \
2811 (*lhs) OP rhs; \
2812 } \
2813 \
2814 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2815 return;
2816// ------------------------------------------------------------------------
2817
2818#ifdef KMP_GOMP_COMPAT
2819#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2820 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2821 KMP_CHECK_GTID; \
2822 OP_CRITICAL_CPT_WRK(OP## =, 0); \
2823 }
2824#else
2825#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2826#endif /* KMP_GOMP_COMPAT */
2827// ------------------------------------------------------------------------
2828
2829#define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2830 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2831 TYPE rhs, TYPE *out, int flag) { \
2832 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2833 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2834// ------------------------------------------------------------------------
2835
2836#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2837 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2838 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2839 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2840 }
2841// The end of workaround for cmplx4
2842
2843/* ------------------------------------------------------------------------- */
2844// routines for long double type
2845ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2846 1) // __kmpc_atomic_float10_add_cpt
2847ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2848 1) // __kmpc_atomic_float10_sub_cpt
2849ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2850 1) // __kmpc_atomic_float10_mul_cpt
2851ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2852 1) // __kmpc_atomic_float10_div_cpt
2853#if KMP_HAVE_QUAD
2854// routines for _Quad type
2855ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2856 1) // __kmpc_atomic_float16_add_cpt
2857ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2858 1) // __kmpc_atomic_float16_sub_cpt
2859ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2860 1) // __kmpc_atomic_float16_mul_cpt
2861ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2862 1) // __kmpc_atomic_float16_div_cpt
2863#if (KMP_ARCH_X86)
2864ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2865 1) // __kmpc_atomic_float16_add_a16_cpt
2866ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2867 1) // __kmpc_atomic_float16_sub_a16_cpt
2868ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2869 1) // __kmpc_atomic_float16_mul_a16_cpt
2870ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2871 1) // __kmpc_atomic_float16_div_a16_cpt
2872#endif // (KMP_ARCH_X86)
2873#endif // KMP_HAVE_QUAD
2874
2875// routines for complex types
2876
2877// cmplx4 routines to return void
2878ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2879 1) // __kmpc_atomic_cmplx4_add_cpt
2880ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2881 1) // __kmpc_atomic_cmplx4_sub_cpt
2882ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2883 1) // __kmpc_atomic_cmplx4_mul_cpt
2884ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2885 1) // __kmpc_atomic_cmplx4_div_cpt
2886
2887ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2888 1) // __kmpc_atomic_cmplx8_add_cpt
2889ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2890 1) // __kmpc_atomic_cmplx8_sub_cpt
2891ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2892 1) // __kmpc_atomic_cmplx8_mul_cpt
2893ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2894 1) // __kmpc_atomic_cmplx8_div_cpt
2895ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2896 1) // __kmpc_atomic_cmplx10_add_cpt
2897ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2898 1) // __kmpc_atomic_cmplx10_sub_cpt
2899ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2900 1) // __kmpc_atomic_cmplx10_mul_cpt
2901ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2902 1) // __kmpc_atomic_cmplx10_div_cpt
2903#if KMP_HAVE_QUAD
2904ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2905 1) // __kmpc_atomic_cmplx16_add_cpt
2906ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2907 1) // __kmpc_atomic_cmplx16_sub_cpt
2908ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2909 1) // __kmpc_atomic_cmplx16_mul_cpt
2910ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2911 1) // __kmpc_atomic_cmplx16_div_cpt
2912#if (KMP_ARCH_X86)
2913ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2914 1) // __kmpc_atomic_cmplx16_add_a16_cpt
2915ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2916 1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2917ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2918 1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2919ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2920 1) // __kmpc_atomic_cmplx16_div_a16_cpt
2921#endif // (KMP_ARCH_X86)
2922#endif // KMP_HAVE_QUAD
2923
2924// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2925// binop x; v = x; } for non-commutative operations.
2926// Supported only on IA-32 architecture and Intel(R) 64
2927
2928// -------------------------------------------------------------------------
2929// Operation on *lhs, rhs bound by critical section
2930// OP - operator (it's supposed to contain an assignment)
2931// LCK_ID - lock identifier
2932// Note: don't check gtid as it should always be valid
2933// 1, 2-byte - expect valid parameter, other - check before this macro
2934#define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
2935 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2936 \
2937 if (flag) { \
2938 /*temp_val = (*lhs);*/ \
2939 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2940 new_value = (*lhs); \
2941 } else { \
2942 new_value = (*lhs); \
2943 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2944 } \
2945 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2946 return new_value;
2947
2948// ------------------------------------------------------------------------
2949#ifdef KMP_GOMP_COMPAT
2950#define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \
2951 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2952 KMP_CHECK_GTID; \
2953 OP_CRITICAL_CPT_REV(TYPE, OP, 0); \
2954 }
2955#else
2956#define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2957#endif /* KMP_GOMP_COMPAT */
2958
2959// ------------------------------------------------------------------------
2960// Operation on *lhs, rhs using "compare_and_store" routine
2961// TYPE - operands' type
2962// BITS - size in bits, used to distinguish low level calls
2963// OP - operator
2964// Note: temp_val introduced in order to force the compiler to read
2965// *lhs only once (w/o it the compiler reads *lhs twice)
2966#define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2967 { \
2968 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2969 TYPE old_value, new_value; \
2970 temp_val = *lhs; \
2971 old_value = temp_val; \
2972 new_value = (TYPE)(rhs OP old_value); \
2973 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2974 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2975 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2976 temp_val = *lhs; \
2977 old_value = temp_val; \
2978 new_value = (TYPE)(rhs OP old_value); \
2979 } \
2980 if (flag) { \
2981 return new_value; \
2982 } else \
2983 return old_value; \
2984 }
2985
2986// -------------------------------------------------------------------------
2987#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2988 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2989 TYPE new_value; \
2990 (void)new_value; \
2991 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
2992 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2993 }
2994
2995ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2996 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2997ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2998 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2999ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
3000 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
3001ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
3002 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
3003ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
3004 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
3005ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
3006 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
3007ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
3008 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
3009ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
3010 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
3011ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
3012 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
3013ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
3014 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
3015ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
3016 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
3017ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
3018 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
3019ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
3020 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
3021ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
3022 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
3023ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
3024 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
3025ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
3026 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
3027ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
3028 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
3029ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
3030 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
3031ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
3032 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
3033ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
3034 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
3035ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
3036 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
3037ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
3038 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
3039ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
3040 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
3041ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
3042 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
3043ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
3044 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
3045ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
3046 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3047ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3048 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3049ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3050 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3051// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
3052
3053// ------------------------------------------------------------------------
3054// Routines for Extended types: long double, _Quad, complex flavours (use
3055// critical section)
3056// TYPE_ID, OP_ID, TYPE - detailed above
3057// OP - operator
3058// LCK_ID - lock identifier, used to possibly distinguish lock variable
3059#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
3060 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
3061 TYPE new_value; \
3062 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
3063 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3064 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
3065 }
3066
3067/* ------------------------------------------------------------------------- */
3068// routines for long double type
3069ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3070 1) // __kmpc_atomic_float10_sub_cpt_rev
3071ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3072 1) // __kmpc_atomic_float10_div_cpt_rev
3073#if KMP_HAVE_QUAD
3074// routines for _Quad type
3075ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3076 1) // __kmpc_atomic_float16_sub_cpt_rev
3077ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3078 1) // __kmpc_atomic_float16_div_cpt_rev
3079#if (KMP_ARCH_X86)
3080ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3081 1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3082ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3083 1) // __kmpc_atomic_float16_div_a16_cpt_rev
3084#endif // (KMP_ARCH_X86)
3085#endif // KMP_HAVE_QUAD
3086
3087// routines for complex types
3088
3089// ------------------------------------------------------------------------
3090// Workaround for cmplx4. Regular routines with return value don't work
3091// on Win_32e. Let's return captured values through the additional parameter.
3092#define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3093 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3094 \
3095 if (flag) { \
3096 (*lhs) = (rhs)OP(*lhs); \
3097 (*out) = (*lhs); \
3098 } else { \
3099 (*out) = (*lhs); \
3100 (*lhs) = (rhs)OP(*lhs); \
3101 } \
3102 \
3103 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3104 return;
3105// ------------------------------------------------------------------------
3106
3107#ifdef KMP_GOMP_COMPAT
3108#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3109 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3110 KMP_CHECK_GTID; \
3111 OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3112 }
3113#else
3114#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3115#endif /* KMP_GOMP_COMPAT */
3116// ------------------------------------------------------------------------
3117
3118#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3119 GOMP_FLAG) \
3120 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3121 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3122 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3123 }
3124// The end of workaround for cmplx4
3125
3126// !!! TODO: check if we need to return void for cmplx4 routines
3127// cmplx4 routines to return void
3128ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3129 1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3130ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3131 1) // __kmpc_atomic_cmplx4_div_cpt_rev
3132
3133ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3134 1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3135ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3136 1) // __kmpc_atomic_cmplx8_div_cpt_rev
3137ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3138 1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3139ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3140 1) // __kmpc_atomic_cmplx10_div_cpt_rev
3141#if KMP_HAVE_QUAD
3142ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3143 1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3144ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3145 1) // __kmpc_atomic_cmplx16_div_cpt_rev
3146#if (KMP_ARCH_X86)
3147ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3148 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3149ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3150 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3151#endif // (KMP_ARCH_X86)
3152#endif // KMP_HAVE_QUAD
3153
3154// Capture reverse for mixed type: RHS=float16
3155#if KMP_HAVE_QUAD
3156
3157// Beginning of a definition (provides name, parameters, gebug trace)
3158// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3159// fixed)
3160// OP_ID - operation identifier (add, sub, mul, ...)
3161// TYPE - operands' type
3162// -------------------------------------------------------------------------
3163#define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3164 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3165 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3166 TYPE new_value; \
3167 (void)new_value; \
3168 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3169 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3170 }
3171
3172// -------------------------------------------------------------------------
3173#define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3174 LCK_ID, GOMP_FLAG) \
3175 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3176 TYPE new_value; \
3177 (void)new_value; \
3178 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
3179 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
3180 }
3181
3182ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3183 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3184ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3185 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3186ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3187 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3188ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3189 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3190
3191ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3192 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3193ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3194 1,
3195 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3196ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3197 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3198ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3199 1,
3200 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3201
3202ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3203 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3204ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3205 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3206ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3207 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3208ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3209 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3210
3211ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3212 7,
3213 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3214ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3215 8i, 7,
3216 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3217ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3218 7,
3219 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3220ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3221 8i, 7,
3222 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3223
3224ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3225 4r, 3,
3226 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3227ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3228 4r, 3,
3229 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3230
3231ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3232 8r, 7,
3233 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3234ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3235 8r, 7,
3236 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3237
3238ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3239 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3240ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3241 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3242
3243#endif // KMP_HAVE_QUAD
3244
3245// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3246
3247#define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3248 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3249 TYPE rhs) { \
3250 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3251 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3252
3253#define CRITICAL_SWP(LCK_ID) \
3254 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3255 \
3256 old_value = (*lhs); \
3257 (*lhs) = rhs; \
3258 \
3259 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3260 return old_value;
3261
3262// ------------------------------------------------------------------------
3263#ifdef KMP_GOMP_COMPAT
3264#define GOMP_CRITICAL_SWP(FLAG) \
3265 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3266 KMP_CHECK_GTID; \
3267 CRITICAL_SWP(0); \
3268 }
3269#else
3270#define GOMP_CRITICAL_SWP(FLAG)
3271#endif /* KMP_GOMP_COMPAT */
3272
3273#define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3274 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3275 TYPE old_value; \
3276 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3277 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3278 return old_value; \
3279 }
3280// ------------------------------------------------------------------------
3281#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3282 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3283 TYPE old_value; \
3284 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3285 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3286 return old_value; \
3287 }
3288
3289// ------------------------------------------------------------------------
3290#define CMPXCHG_SWP(TYPE, BITS) \
3291 { \
3292 TYPE KMP_ATOMIC_VOLATILE temp_val; \
3293 TYPE old_value, new_value; \
3294 temp_val = *lhs; \
3295 old_value = temp_val; \
3296 new_value = rhs; \
3297 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3298 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3299 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3300 temp_val = *lhs; \
3301 old_value = temp_val; \
3302 new_value = rhs; \
3303 } \
3304 return old_value; \
3305 }
3306
3307// -------------------------------------------------------------------------
3308#define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3309 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3310 TYPE old_value; \
3311 (void)old_value; \
3312 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3313 CMPXCHG_SWP(TYPE, BITS) \
3314 }
3315
3316ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3317ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3318ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3319
3320ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3321 KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3322
3323#if (KMP_ARCH_X86)
3324ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3325 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3326ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3327 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3328#else
3329ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3330ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3331 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3332#endif // (KMP_ARCH_X86)
3333
3334// ------------------------------------------------------------------------
3335// Routines for Extended types: long double, _Quad, complex flavours (use
3336// critical section)
3337#define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3338 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3339 TYPE old_value; \
3340 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3341 CRITICAL_SWP(LCK_ID) \
3342 }
3343
3344// ------------------------------------------------------------------------
3345// !!! TODO: check if we need to return void for cmplx4 routines
3346// Workaround for cmplx4. Regular routines with return value don't work
3347// on Win_32e. Let's return captured values through the additional parameter.
3348
3349#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3350 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3351 TYPE rhs, TYPE *out) { \
3352 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3353 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3354
3355#define CRITICAL_SWP_WRK(LCK_ID) \
3356 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3357 \
3358 tmp = (*lhs); \
3359 (*lhs) = (rhs); \
3360 (*out) = tmp; \
3361 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3362 return;
3363// ------------------------------------------------------------------------
3364
3365#ifdef KMP_GOMP_COMPAT
3366#define GOMP_CRITICAL_SWP_WRK(FLAG) \
3367 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3368 KMP_CHECK_GTID; \
3369 CRITICAL_SWP_WRK(0); \
3370 }
3371#else
3372#define GOMP_CRITICAL_SWP_WRK(FLAG)
3373#endif /* KMP_GOMP_COMPAT */
3374// ------------------------------------------------------------------------
3375
3376#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3377 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3378 TYPE tmp; \
3379 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3380 CRITICAL_SWP_WRK(LCK_ID) \
3381 }
3382// The end of workaround for cmplx4
3383
3384ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3385#if KMP_HAVE_QUAD
3386ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3387#endif // KMP_HAVE_QUAD
3388// cmplx4 routine to return void
3389ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3390
3391// ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3392// __kmpc_atomic_cmplx4_swp
3393
3394ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3395ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3396#if KMP_HAVE_QUAD
3397ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3398#if (KMP_ARCH_X86)
3399ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3400 1) // __kmpc_atomic_float16_a16_swp
3401ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3402 1) // __kmpc_atomic_cmplx16_a16_swp
3403#endif // (KMP_ARCH_X86)
3404#endif // KMP_HAVE_QUAD
3405
3406// End of OpenMP 4.0 Capture
3407
3408#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3409
3410#undef OP_CRITICAL
3411
3412/* ------------------------------------------------------------------------ */
3413/* Generic atomic routines */
3414
3415void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3416 void (*f)(void *, void *, void *)) {
3417 KMP_DEBUG_ASSERT(__kmp_init_serial);
3418
3419 if (
3420#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3421 FALSE /* must use lock */
3422#else
3423 TRUE
3424#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3425 ) {
3426 kmp_int8 old_value, new_value;
3427
3428 old_value = *(kmp_int8 *)lhs;
3429 (*f)(&new_value, &old_value, rhs);
3430
3431 /* TODO: Should this be acquire or release? */
3432 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3433 *(kmp_int8 *)&new_value)) {
3434 KMP_CPU_PAUSE();
3435
3436 old_value = *(kmp_int8 *)lhs;
3437 (*f)(&new_value, &old_value, rhs);
3438 }
3439
3440 return;
3441 } else {
3442 // All 1-byte data is of integer data type.
3443
3444#ifdef KMP_GOMP_COMPAT
3445 if (__kmp_atomic_mode == 2) {
3446 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3447 } else
3448#endif /* KMP_GOMP_COMPAT */
3449 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3450
3451 (*f)(lhs, lhs, rhs);
3452
3453#ifdef KMP_GOMP_COMPAT
3454 if (__kmp_atomic_mode == 2) {
3455 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3456 } else
3457#endif /* KMP_GOMP_COMPAT */
3458 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3459 }
3460}
3461
3462void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3463 void (*f)(void *, void *, void *)) {
3464 if (
3465#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3466 FALSE /* must use lock */
3467#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3468 TRUE /* no alignment problems */
3469#else
3470 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3471#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3472 ) {
3473 kmp_int16 old_value, new_value;
3474
3475 old_value = *(kmp_int16 *)lhs;
3476 (*f)(&new_value, &old_value, rhs);
3477
3478 /* TODO: Should this be acquire or release? */
3479 while (!KMP_COMPARE_AND_STORE_ACQ16(
3480 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3481 KMP_CPU_PAUSE();
3482
3483 old_value = *(kmp_int16 *)lhs;
3484 (*f)(&new_value, &old_value, rhs);
3485 }
3486
3487 return;
3488 } else {
3489 // All 2-byte data is of integer data type.
3490
3491#ifdef KMP_GOMP_COMPAT
3492 if (__kmp_atomic_mode == 2) {
3493 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3494 } else
3495#endif /* KMP_GOMP_COMPAT */
3496 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3497
3498 (*f)(lhs, lhs, rhs);
3499
3500#ifdef KMP_GOMP_COMPAT
3501 if (__kmp_atomic_mode == 2) {
3502 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3503 } else
3504#endif /* KMP_GOMP_COMPAT */
3505 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3506 }
3507}
3508
3509void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3510 void (*f)(void *, void *, void *)) {
3511 KMP_DEBUG_ASSERT(__kmp_init_serial);
3512
3513 if (
3514// FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3515// Gomp compatibility is broken if this routine is called for floats.
3516#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3517 TRUE /* no alignment problems */
3518#else
3519 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3520#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3521 ) {
3522 kmp_int32 old_value, new_value;
3523
3524 old_value = *(kmp_int32 *)lhs;
3525 (*f)(&new_value, &old_value, rhs);
3526
3527 /* TODO: Should this be acquire or release? */
3528 while (!KMP_COMPARE_AND_STORE_ACQ32(
3529 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3530 KMP_CPU_PAUSE();
3531
3532 old_value = *(kmp_int32 *)lhs;
3533 (*f)(&new_value, &old_value, rhs);
3534 }
3535
3536 return;
3537 } else {
3538 // Use __kmp_atomic_lock_4i for all 4-byte data,
3539 // even if it isn't of integer data type.
3540
3541#ifdef KMP_GOMP_COMPAT
3542 if (__kmp_atomic_mode == 2) {
3543 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3544 } else
3545#endif /* KMP_GOMP_COMPAT */
3546 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3547
3548 (*f)(lhs, lhs, rhs);
3549
3550#ifdef KMP_GOMP_COMPAT
3551 if (__kmp_atomic_mode == 2) {
3552 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3553 } else
3554#endif /* KMP_GOMP_COMPAT */
3555 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3556 }
3557}
3558
3559void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3560 void (*f)(void *, void *, void *)) {
3561 KMP_DEBUG_ASSERT(__kmp_init_serial);
3562 if (
3563
3564#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3565 FALSE /* must use lock */
3566#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3567 TRUE /* no alignment problems */
3568#else
3569 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3570#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3571 ) {
3572 kmp_int64 old_value, new_value;
3573
3574 old_value = *(kmp_int64 *)lhs;
3575 (*f)(&new_value, &old_value, rhs);
3576 /* TODO: Should this be acquire or release? */
3577 while (!KMP_COMPARE_AND_STORE_ACQ64(
3578 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3579 KMP_CPU_PAUSE();
3580
3581 old_value = *(kmp_int64 *)lhs;
3582 (*f)(&new_value, &old_value, rhs);
3583 }
3584
3585 return;
3586 } else {
3587 // Use __kmp_atomic_lock_8i for all 8-byte data,
3588 // even if it isn't of integer data type.
3589
3590#ifdef KMP_GOMP_COMPAT
3591 if (__kmp_atomic_mode == 2) {
3592 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3593 } else
3594#endif /* KMP_GOMP_COMPAT */
3595 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3596
3597 (*f)(lhs, lhs, rhs);
3598
3599#ifdef KMP_GOMP_COMPAT
3600 if (__kmp_atomic_mode == 2) {
3601 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3602 } else
3603#endif /* KMP_GOMP_COMPAT */
3604 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3605 }
3606}
3607#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3608void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3609 void (*f)(void *, void *, void *)) {
3610 KMP_DEBUG_ASSERT(__kmp_init_serial);
3611
3612#ifdef KMP_GOMP_COMPAT
3613 if (__kmp_atomic_mode == 2) {
3614 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3615 } else
3616#endif /* KMP_GOMP_COMPAT */
3617 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3618
3619 (*f)(lhs, lhs, rhs);
3620
3621#ifdef KMP_GOMP_COMPAT
3622 if (__kmp_atomic_mode == 2) {
3623 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3624 } else
3625#endif /* KMP_GOMP_COMPAT */
3626 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3627}
3628#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3629
3630void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3631 void (*f)(void *, void *, void *)) {
3632 KMP_DEBUG_ASSERT(__kmp_init_serial);
3633
3634#ifdef KMP_GOMP_COMPAT
3635 if (__kmp_atomic_mode == 2) {
3636 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3637 } else
3638#endif /* KMP_GOMP_COMPAT */
3639 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3640
3641 (*f)(lhs, lhs, rhs);
3642
3643#ifdef KMP_GOMP_COMPAT
3644 if (__kmp_atomic_mode == 2) {
3645 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3646 } else
3647#endif /* KMP_GOMP_COMPAT */
3648 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3649}
3650#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3651void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3652 void (*f)(void *, void *, void *)) {
3653 KMP_DEBUG_ASSERT(__kmp_init_serial);
3654
3655#ifdef KMP_GOMP_COMPAT
3656 if (__kmp_atomic_mode == 2) {
3657 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3658 } else
3659#endif /* KMP_GOMP_COMPAT */
3660 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3661
3662 (*f)(lhs, lhs, rhs);
3663
3664#ifdef KMP_GOMP_COMPAT
3665 if (__kmp_atomic_mode == 2) {
3666 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3667 } else
3668#endif /* KMP_GOMP_COMPAT */
3669 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3670}
3671#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3672void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3673 void (*f)(void *, void *, void *)) {
3674 KMP_DEBUG_ASSERT(__kmp_init_serial);
3675
3676#ifdef KMP_GOMP_COMPAT
3677 if (__kmp_atomic_mode == 2) {
3678 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3679 } else
3680#endif /* KMP_GOMP_COMPAT */
3681 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3682
3683 (*f)(lhs, lhs, rhs);
3684
3685#ifdef KMP_GOMP_COMPAT
3686 if (__kmp_atomic_mode == 2) {
3687 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3688 } else
3689#endif /* KMP_GOMP_COMPAT */
3690 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3691}
3692
3693// AC: same two routines as GOMP_atomic_start/end, but will be called by our
3694// compiler; duplicated in order to not use 3-party names in pure Intel code
3695// TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3696void __kmpc_atomic_start(void) {
3697 int gtid = __kmp_entry_gtid();
3698 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3699 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3700}
3701
3702void __kmpc_atomic_end(void) {
3703 int gtid = __kmp_get_gtid();
3704 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3705 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3706}
3707
3708#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3709
3710// OpenMP 5.1 compare and swap
3711
3726bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3727 return KMP_COMPARE_AND_STORE_ACQ8(x, e, d);
3728}
3729bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
3730 short d) {
3731 return KMP_COMPARE_AND_STORE_ACQ16(x, e, d);
3732}
3733bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
3734 kmp_int32 d) {
3735 return KMP_COMPARE_AND_STORE_ACQ32(x, e, d);
3736}
3737bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
3738 kmp_int64 d) {
3739 return KMP_COMPARE_AND_STORE_ACQ64(x, e, d);
3740}
3741
3756char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3757 return KMP_COMPARE_AND_STORE_RET8(x, e, d);
3758}
3759short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
3760 short d) {
3761 return KMP_COMPARE_AND_STORE_RET16(x, e, d);
3762}
3763kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
3764 kmp_int32 e, kmp_int32 d) {
3765 return KMP_COMPARE_AND_STORE_RET32(x, e, d);
3766}
3767kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
3768 kmp_int64 e, kmp_int64 d) {
3769 return KMP_COMPARE_AND_STORE_RET64(x, e, d);
3770}
3771
3788bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3789 char d, char *pv) {
3790 char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3791 if (old == e)
3792 return true;
3793 KMP_ASSERT(pv != NULL);
3794 *pv = old;
3795 return false;
3796}
3797bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3798 short d, short *pv) {
3799 short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3800 if (old == e)
3801 return true;
3802 KMP_ASSERT(pv != NULL);
3803 *pv = old;
3804 return false;
3805}
3806bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3807 kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3808 kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3809 if (old == e)
3810 return true;
3811 KMP_ASSERT(pv != NULL);
3812 *pv = old;
3813 return false;
3814}
3815bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3816 kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3817 kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3818 if (old == e)
3819 return true;
3820 KMP_ASSERT(pv != NULL);
3821 *pv = old;
3822 return false;
3823}
3824
3841char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3842 char d, char *pv) {
3843 char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3844 KMP_ASSERT(pv != NULL);
3845 *pv = old == e ? d : old;
3846 return old;
3847}
3848short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3849 short d, short *pv) {
3850 short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3851 KMP_ASSERT(pv != NULL);
3852 *pv = old == e ? d : old;
3853 return old;
3854}
3855kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3856 kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3857 kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3858 KMP_ASSERT(pv != NULL);
3859 *pv = old == e ? d : old;
3860 return old;
3861}
3862kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3863 kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3864 kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3865 KMP_ASSERT(pv != NULL);
3866 *pv = old == e ? d : old;
3867 return old;
3868}
3869
3870// End OpenMP 5.1 compare + capture
3871#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3872
3877// end of file
Definition kmp.h:234