v / thirdparty / libatomic_ops
Raw file | 481 loc (445 sloc) | 20.62 KB | Latest commit hash 90d9b200f
1/*
2 * Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P.
3 * Copyright (c) 2008-2021 Ivan Maidanski
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#ifndef AO_ATOMIC_OPS_H
25#define AO_ATOMIC_OPS_H
26
27#include "atomic_ops/ao_version.h"
28 /* Define version numbers here to allow */
29 /* test on build machines for cross-builds. */
30
31#include <assert.h>
32#include <stddef.h>
33
34/* We define various atomic operations on memory in a */
35/* machine-specific way. Unfortunately, this is complicated */
36/* by the fact that these may or may not be combined with */
37/* various memory barriers. Thus the actual operations we */
38/* define have the form AO_<atomic-op>_<barrier>, for all */
39/* plausible combinations of <atomic-op> and <barrier>. */
40/* This of course results in a mild combinatorial explosion. */
41/* To deal with it, we try to generate derived */
42/* definitions for as many of the combinations as we can, as */
43/* automatically as possible. */
44/* */
45/* Our assumption throughout is that the programmer will */
46/* specify the least demanding operation and memory barrier */
47/* that will guarantee correctness for the implementation. */
48/* Our job is to find the least expensive way to implement it */
49/* on the applicable hardware. In many cases that will */
50/* involve, for example, a stronger memory barrier, or a */
51/* combination of hardware primitives. */
52/* */
53/* Conventions: */
54/* "plain" atomic operations are not guaranteed to include */
55/* a barrier. The suffix in the name specifies the barrier */
56/* type. Suffixes are: */
57/* _release: Earlier operations may not be delayed past it. */
58/* _acquire: Later operations may not move ahead of it. */
59/* _read: Subsequent reads must follow this operation and */
60/* preceding reads. */
61/* _write: Earlier writes precede both this operation and */
62/* later writes. */
63/* _full: Ordered with respect to both earlier and later memory */
64/* operations. */
65/* _release_write: Ordered with respect to earlier writes. */
66/* _acquire_read: Ordered with respect to later reads. */
67/* */
68/* Currently we try to define the following atomic memory */
69/* operations, in combination with the above barriers: */
70/* AO_nop */
71/* AO_load */
72/* AO_store */
73/* AO_test_and_set (binary) */
74/* AO_fetch_and_add */
75/* AO_fetch_and_add1 */
76/* AO_fetch_and_sub1 */
77/* AO_and */
78/* AO_or */
79/* AO_xor */
80/* AO_compare_and_swap */
81/* AO_fetch_compare_and_swap */
82/* */
83/* Note that atomicity guarantees are valid only if both */
84/* readers and writers use AO_ operations to access the */
85/* shared value, while ordering constraints are intended to */
86/* apply all memory operations. If a location can potentially */
87/* be accessed simultaneously from multiple threads, and one of */
88/* those accesses may be a write access, then all such */
89/* accesses to that location should be through AO_ primitives. */
90/* However if AO_ operations enforce sufficient ordering to */
91/* ensure that a location x cannot be accessed concurrently, */
92/* or can only be read concurrently, then x can be accessed */
93/* via ordinary references and assignments. */
94/* */
95/* AO_compare_and_swap takes an address and an expected old */
96/* value and a new value, and returns an int. Non-zero result */
97/* indicates that it succeeded. */
98/* AO_fetch_compare_and_swap takes an address and an expected */
99/* old value and a new value, and returns the real old value. */
100/* The operation succeeded if and only if the expected old */
101/* value matches the old value returned. */
102/* */
103/* Test_and_set takes an address, atomically replaces it by */
104/* AO_TS_SET, and returns the prior value. */
105/* An AO_TS_t location can be reset with the */
106/* AO_CLEAR macro, which normally uses AO_store_release. */
107/* AO_fetch_and_add takes an address and an AO_t increment */
108/* value. The AO_fetch_and_add1 and AO_fetch_and_sub1 variants */
109/* are provided, since they allow faster implementations on */
110/* some hardware. AO_and, AO_or, AO_xor do atomically and, or, */
111/* xor (respectively) an AO_t value into a memory location, */
112/* but do not provide access to the original. */
113/* */
114/* We expect this list to grow slowly over time. */
115/* */
116/* Note that AO_nop_full is a full memory barrier. */
117/* */
118/* Note that if some data is initialized with */
119/* data.x = ...; data.y = ...; ... */
120/* AO_store_release_write(&data_is_initialized, 1) */
121/* then data is guaranteed to be initialized after the test */
122/* if (AO_load_acquire_read(&data_is_initialized)) ... */
123/* succeeds. Furthermore, this should generate near-optimal */
124/* code on all common platforms. */
125/* */
126/* All operations operate on unsigned AO_t, which */
127/* is the natural word size, and usually unsigned long. */
128/* It is possible to check whether a particular operation op */
129/* is available on a particular platform by checking whether */
130/* AO_HAVE_op is defined. We make heavy use of these macros */
131/* internally. */
132
133/* The rest of this file basically has three sections: */
134/* */
135/* Some utility and default definitions. */
136/* */
137/* The architecture dependent section: */
138/* This defines atomic operations that have direct hardware */
139/* support on a particular platform, mostly by including the */
140/* appropriate compiler- and hardware-dependent file. */
141/* */
142/* The synthesis section: */
143/* This tries to define other atomic operations in terms of */
144/* those that are explicitly available on the platform. */
145/* This section is hardware independent. */
146/* We make no attempt to synthesize operations in ways that */
147/* effectively introduce locks, except for the debugging/demo */
148/* pthread-based implementation at the beginning. A more */
149/* realistic implementation that falls back to locks could be */
150/* added as a higher layer. But that would sacrifice */
151/* usability from signal handlers. */
152/* The synthesis section is implemented almost entirely in */
153/* atomic_ops/generalize.h. */
154
155/* Some common defaults. Overridden for some architectures. */
156#define AO_t size_t
157
158/* The test_and_set primitive returns an AO_TS_VAL_t value. */
159/* AO_TS_t is the type of an in-memory test-and-set location. */
160
161#define AO_TS_INITIALIZER ((AO_TS_t)AO_TS_CLEAR)
162
163/* Convenient internal macro to test version of GCC. */
164#if defined(__GNUC__) && defined(__GNUC_MINOR__)
165# define AO_GNUC_PREREQ(major, minor) \
166 ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((major) << 16) + (minor))
167#else
168# define AO_GNUC_PREREQ(major, minor) 0 /* false */
169#endif
170
171/* Convenient internal macro to test version of Clang. */
172#if defined(__clang__) && defined(__clang_major__)
173# define AO_CLANG_PREREQ(major, minor) \
174 ((__clang_major__ << 16) + __clang_minor__ >= ((major) << 16) + (minor))
175#else
176# define AO_CLANG_PREREQ(major, minor) 0 /* false */
177#endif
178
179/* Platform-dependent stuff: */
180#if (defined(__GNUC__) || defined(_MSC_VER) || defined(__INTEL_COMPILER) \
181 || defined(__DMC__) || defined(__WATCOMC__)) && !defined(AO_NO_INLINE)
182# define AO_INLINE static __inline
183#elif defined(__sun) && !defined(AO_NO_INLINE)
184# define AO_INLINE static inline
185#else
186# define AO_INLINE static
187#endif
188
189#if AO_GNUC_PREREQ(3, 0) && !defined(LINT2)
190# define AO_EXPECT_FALSE(expr) __builtin_expect(expr, 0)
191 /* Equivalent to (expr) but predict that usually (expr) == 0. */
192#else
193# define AO_EXPECT_FALSE(expr) (expr)
194#endif /* !__GNUC__ */
195
196#if defined(__has_feature)
197 /* __has_feature() is supported. */
198# if __has_feature(address_sanitizer)
199# define AO_ADDRESS_SANITIZER
200# endif
201# if __has_feature(memory_sanitizer)
202# define AO_MEMORY_SANITIZER
203# endif
204# if __has_feature(thread_sanitizer)
205# define AO_THREAD_SANITIZER
206# endif
207#else
208# ifdef __SANITIZE_ADDRESS__
209 /* GCC v4.8+ */
210# define AO_ADDRESS_SANITIZER
211# endif
212#endif /* !__has_feature */
213
214#ifndef AO_ATTR_NO_SANITIZE_MEMORY
215# ifndef AO_MEMORY_SANITIZER
216# define AO_ATTR_NO_SANITIZE_MEMORY /* empty */
217# elif AO_CLANG_PREREQ(3, 8)
218# define AO_ATTR_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory")))
219# else
220# define AO_ATTR_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
221# endif
222#endif /* !AO_ATTR_NO_SANITIZE_MEMORY */
223
224#ifndef AO_ATTR_NO_SANITIZE_THREAD
225# ifndef AO_THREAD_SANITIZER
226# define AO_ATTR_NO_SANITIZE_THREAD /* empty */
227# elif AO_CLANG_PREREQ(3, 8)
228# define AO_ATTR_NO_SANITIZE_THREAD __attribute__((no_sanitize("thread")))
229# else
230# define AO_ATTR_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
231# endif
232#endif /* !AO_ATTR_NO_SANITIZE_THREAD */
233
234#if (AO_GNUC_PREREQ(7, 5) || __STDC_VERSION__ >= 201112L) && !defined(LINT2)
235# define AO_ALIGNOF_SUPPORTED 1
236#endif
237
238#ifdef AO_ALIGNOF_SUPPORTED
239# define AO_ASSERT_ADDR_ALIGNED(addr) \
240 assert(((size_t)(addr) & (__alignof__(*(addr)) - 1)) == 0)
241#else
242# define AO_ASSERT_ADDR_ALIGNED(addr) \
243 assert(((size_t)(addr) & (sizeof(*(addr)) - 1)) == 0)
244#endif /* !AO_ALIGNOF_SUPPORTED */
245
246#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
247# define AO_compiler_barrier() __asm__ __volatile__("" : : : "memory")
248#elif defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \
249 || defined(__WATCOMC__)
250# if defined(_AMD64_) || defined(_M_X64) || _MSC_VER >= 1400
251# if defined(_WIN32_WCE)
252/* # include <cmnintrin.h> */
253# elif defined(_MSC_VER)
254# include <intrin.h>
255# endif
256# pragma intrinsic(_ReadWriteBarrier)
257# define AO_compiler_barrier() _ReadWriteBarrier()
258 /* We assume this does not generate a fence instruction. */
259 /* The documentation is a bit unclear. */
260# else
261# define AO_compiler_barrier() __asm { }
262 /* The preceding implementation may be preferable here too. */
263 /* But the documentation warns about VC++ 2003 and earlier. */
264# endif
265#elif defined(__INTEL_COMPILER)
266# define AO_compiler_barrier() __memory_barrier()
267 /* FIXME: Too strong? IA64-only? */
268#elif defined(_HPUX_SOURCE)
269# if defined(__ia64)
270# include <machine/sys/inline.h>
271# define AO_compiler_barrier() _Asm_sched_fence()
272# else
273 /* FIXME - We do not know how to do this. This is a guess. */
274 /* And probably a bad one. */
275 static volatile int AO_barrier_dummy;
276# define AO_compiler_barrier() (void)(AO_barrier_dummy = AO_barrier_dummy)
277# endif
278#else
279 /* We conjecture that the following usually gives us the right */
280 /* semantics or an error. */
281# define AO_compiler_barrier() asm("")
282#endif
283
284#if defined(AO_USE_PTHREAD_DEFS)
285# include "atomic_ops/sysdeps/generic_pthread.h"
286#endif /* AO_USE_PTHREAD_DEFS */
287
288#if (defined(__CC_ARM) || defined(__ARMCC__)) && !defined(__GNUC__) \
289 && !defined(AO_USE_PTHREAD_DEFS)
290# include "atomic_ops/sysdeps/armcc/arm_v6.h"
291# define AO_GENERALIZE_TWICE
292#endif
293
294#if defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS) \
295 && !defined(__INTEL_COMPILER)
296# if defined(__i386__)
297 /* We don't define AO_USE_SYNC_CAS_BUILTIN for x86 here because */
298 /* it might require specifying additional options (like -march) */
299 /* or additional link libraries (if -march is not specified). */
300# include "atomic_ops/sysdeps/gcc/x86.h"
301# elif defined(__x86_64__)
302# if AO_GNUC_PREREQ(4, 2) && !defined(AO_USE_SYNC_CAS_BUILTIN)
303 /* It is safe to use __sync CAS built-in on this architecture. */
304# define AO_USE_SYNC_CAS_BUILTIN
305# endif
306# include "atomic_ops/sysdeps/gcc/x86.h"
307# elif defined(__ia64__)
308# include "atomic_ops/sysdeps/gcc/ia64.h"
309# define AO_GENERALIZE_TWICE
310# elif defined(__hppa__)
311# include "atomic_ops/sysdeps/gcc/hppa.h"
312# define AO_CAN_EMUL_CAS
313# elif defined(__alpha__)
314# include "atomic_ops/sysdeps/gcc/alpha.h"
315# define AO_GENERALIZE_TWICE
316# elif defined(__s390__)
317# include "atomic_ops/sysdeps/gcc/s390.h"
318# elif defined(__sparc__)
319# include "atomic_ops/sysdeps/gcc/sparc.h"
320# define AO_CAN_EMUL_CAS
321# elif defined(__m68k__)
322# include "atomic_ops/sysdeps/gcc/m68k.h"
323# elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
324 || defined(__powerpc64__) || defined(__ppc64__) || defined(_ARCH_PPC)
325# include "atomic_ops/sysdeps/gcc/powerpc.h"
326# elif defined(__aarch64__)
327# include "atomic_ops/sysdeps/gcc/aarch64.h"
328# define AO_CAN_EMUL_CAS
329# elif defined(__arm__)
330# include "atomic_ops/sysdeps/gcc/arm.h"
331# define AO_CAN_EMUL_CAS
332# elif defined(__cris__) || defined(CRIS)
333# include "atomic_ops/sysdeps/gcc/cris.h"
334# define AO_CAN_EMUL_CAS
335# define AO_GENERALIZE_TWICE
336# elif defined(__mips__)
337# include "atomic_ops/sysdeps/gcc/mips.h"
338# elif defined(__sh__) || defined(SH4)
339# include "atomic_ops/sysdeps/gcc/sh.h"
340# define AO_CAN_EMUL_CAS
341# elif defined(__avr32__)
342# include "atomic_ops/sysdeps/gcc/avr32.h"
343# elif defined(__hexagon__)
344# include "atomic_ops/sysdeps/gcc/hexagon.h"
345# elif defined(__nios2__)
346# include "atomic_ops/sysdeps/gcc/generic.h"
347# define AO_CAN_EMUL_CAS
348# elif defined(__riscv)
349# include "atomic_ops/sysdeps/gcc/riscv.h"
350# elif defined(__tile__)
351# include "atomic_ops/sysdeps/gcc/tile.h"
352# else /* etc. */
353# include "atomic_ops/sysdeps/gcc/generic.h"
354# endif
355#endif /* __GNUC__ && !AO_USE_PTHREAD_DEFS */
356
357#if (defined(__IBMC__) || defined(__IBMCPP__)) && !defined(__GNUC__) \
358 && !defined(AO_USE_PTHREAD_DEFS)
359# if defined(__powerpc__) || defined(__powerpc) || defined(__ppc__) \
360 || defined(__PPC__) || defined(_M_PPC) || defined(_ARCH_PPC) \
361 || defined(_ARCH_PWR)
362# include "atomic_ops/sysdeps/ibmc/powerpc.h"
363# define AO_GENERALIZE_TWICE
364# endif
365#endif
366
367#if defined(__INTEL_COMPILER) && !defined(AO_USE_PTHREAD_DEFS)
368# if defined(__ia64__)
369# include "atomic_ops/sysdeps/icc/ia64.h"
370# define AO_GENERALIZE_TWICE
371# endif
372# if defined(__GNUC__)
373 /* Intel Compiler in GCC compatible mode */
374# if defined(__i386__)
375# include "atomic_ops/sysdeps/gcc/x86.h"
376# endif /* __i386__ */
377# if defined(__x86_64__)
378# if (__INTEL_COMPILER > 1110) && !defined(AO_USE_SYNC_CAS_BUILTIN)
379# define AO_USE_SYNC_CAS_BUILTIN
380# endif
381# include "atomic_ops/sysdeps/gcc/x86.h"
382# endif /* __x86_64__ */
383# endif
384#endif
385
386#if defined(_HPUX_SOURCE) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS)
387# if defined(__ia64)
388# include "atomic_ops/sysdeps/hpc/ia64.h"
389# define AO_GENERALIZE_TWICE
390# else
391# include "atomic_ops/sysdeps/hpc/hppa.h"
392# define AO_CAN_EMUL_CAS
393# endif
394#endif
395
396#if defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \
397 || (defined(__WATCOMC__) && defined(__NT__))
398# if defined(_AMD64_) || defined(_M_X64) || defined(_M_ARM64)
399# include "atomic_ops/sysdeps/msftc/x86_64.h"
400# elif defined(_M_IX86) || defined(x86)
401# include "atomic_ops/sysdeps/msftc/x86.h"
402# elif defined(_M_ARM) || defined(ARM) || defined(_ARM_)
403# include "atomic_ops/sysdeps/msftc/arm.h"
404# define AO_GENERALIZE_TWICE
405# endif
406#endif
407
408#if defined(__sun) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS)
409 /* Note: use -DAO_USE_PTHREAD_DEFS if Sun CC does not handle inline asm. */
410# if defined(__i386) || defined(__x86_64) || defined(__amd64)
411# include "atomic_ops/sysdeps/sunc/x86.h"
412# endif
413#endif
414
415#if !defined(__GNUC__) && (defined(sparc) || defined(__sparc)) \
416 && !defined(AO_USE_PTHREAD_DEFS)
417# include "atomic_ops/sysdeps/sunc/sparc.h"
418# define AO_CAN_EMUL_CAS
419#endif
420
421#if (defined(AO_REQUIRE_CAS) && !defined(AO_HAVE_compare_and_swap) \
422 && !defined(AO_HAVE_fetch_compare_and_swap) \
423 && !defined(AO_HAVE_compare_and_swap_full) \
424 && !defined(AO_HAVE_fetch_compare_and_swap_full) \
425 && !defined(AO_HAVE_compare_and_swap_acquire) \
426 && !defined(AO_HAVE_fetch_compare_and_swap_acquire)) || defined(CPPCHECK)
427# if defined(AO_CAN_EMUL_CAS)
428# include "atomic_ops/sysdeps/emul_cas.h"
429# elif !defined(CPPCHECK)
430# error Cannot implement AO_compare_and_swap_full on this architecture.
431# endif
432#endif /* AO_REQUIRE_CAS && !AO_HAVE_compare_and_swap ... */
433
434/* The most common way to clear a test-and-set location */
435/* at the end of a critical section. */
436#if defined(AO_AO_TS_T) && !defined(AO_HAVE_CLEAR)
437# define AO_CLEAR(addr) AO_store_release((AO_TS_t *)(addr), AO_TS_CLEAR)
438# define AO_HAVE_CLEAR
439#endif
440#if defined(AO_CHAR_TS_T) && !defined(AO_HAVE_CLEAR)
441# define AO_CLEAR(addr) AO_char_store_release((AO_TS_t *)(addr), AO_TS_CLEAR)
442# define AO_HAVE_CLEAR
443#endif
444
445/* The generalization section. */
446#if !defined(AO_GENERALIZE_TWICE) && defined(AO_CAN_EMUL_CAS) \
447 && !defined(AO_HAVE_compare_and_swap_full) \
448 && !defined(AO_HAVE_fetch_compare_and_swap_full)
449# define AO_GENERALIZE_TWICE
450#endif
451
452/* Theoretically we should repeatedly include atomic_ops/generalize.h. */
453/* In fact, we observe that this converges after a small fixed number */
454/* of iterations, usually one. */
455#include "atomic_ops/generalize.h"
456
457#if !defined(AO_GENERALIZE_TWICE) \
458 && defined(AO_HAVE_compare_double_and_swap_double) \
459 && (!defined(AO_HAVE_double_load) || !defined(AO_HAVE_double_store))
460# define AO_GENERALIZE_TWICE
461#endif
462
463#ifdef AO_T_IS_INT
464 /* Included after the first generalization pass. */
465# include "atomic_ops/sysdeps/ao_t_is_int.h"
466# ifndef AO_GENERALIZE_TWICE
467 /* Always generalize again. */
468# define AO_GENERALIZE_TWICE
469# endif
470#endif /* AO_T_IS_INT */
471
472#ifdef AO_GENERALIZE_TWICE
473# include "atomic_ops/generalize.h"
474#endif
475
476/* For compatibility with version 0.4 and earlier */
477#define AO_TS_T AO_TS_t
478#define AO_T AO_t
479#define AO_TS_VAL AO_TS_VAL_t
480
481#endif /* !AO_ATOMIC_OPS_H */