1 | /* |
2 | * Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
5 | * of this software and associated documentation files (the "Software"), to deal |
6 | * in the Software without restriction, including without limitation the rights |
7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
8 | * copies of the Software, and to permit persons to whom the Software is |
9 | * furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
20 | * SOFTWARE. |
21 | */ |
22 | |
23 | /* |
24 | * Initialized data and out-of-line functions to support atomic_ops.h |
25 | * go here. Currently this is needed only for pthread-based atomics |
26 | * emulation, or for compare-and-swap emulation. |
27 | * Pthreads emulation isn't useful on a native Windows platform, and |
28 | * cas emulation is not needed. Thus we skip this on Windows. |
29 | */ |
30 | |
31 | #if defined(HAVE_CONFIG_H) |
32 | # include "config.h" |
33 | #endif |
34 | |
35 | #if (defined(__hexagon__) || defined(__native_client__)) \ |
36 | && !defined(AO_USE_NO_SIGNALS) && !defined(AO_USE_NANOSLEEP) |
37 | /* Hexagon QuRT does not have sigprocmask (but Hexagon does not need */ |
38 | /* emulation, so it is OK not to bother about signals blocking). */ |
39 | /* Since NaCl is not recognized by configure yet, we do it here. */ |
40 | # define AO_USE_NO_SIGNALS |
41 | # define AO_USE_NANOSLEEP |
42 | #endif |
43 | |
44 | #if defined(AO_USE_WIN32_PTHREADS) && !defined(AO_USE_NO_SIGNALS) |
45 | # define AO_USE_NO_SIGNALS |
46 | #endif |
47 | |
48 | #if (defined(__linux__) || defined(__GLIBC__) || defined(__GNU__)) \ |
49 | && !defined(AO_USE_NO_SIGNALS) && !defined(_GNU_SOURCE) |
50 | # define _GNU_SOURCE 1 |
51 | #endif |
52 | |
53 | #undef AO_REQUIRE_CAS |
54 | #include "atomic_ops.h" /* Without cas emulation! */ |
55 | |
56 | #if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__BORLANDC__) \ |
57 | || defined(AO_USE_NO_SIGNALS) |
58 | |
59 | #ifndef AO_NO_PTHREADS |
60 | # include <pthread.h> |
61 | #endif |
62 | |
63 | #ifndef AO_USE_NO_SIGNALS |
64 | # include <signal.h> |
65 | #endif |
66 | |
67 | #ifdef AO_USE_NANOSLEEP |
68 | /* This requires _POSIX_TIMERS feature. */ |
69 | # include <sys/time.h> |
70 | # include <time.h> |
71 | #elif defined(AO_USE_WIN32_PTHREADS) |
72 | # include <windows.h> /* for Sleep() */ |
73 | #elif defined(_HPUX_SOURCE) |
74 | # include <sys/time.h> |
75 | #else |
76 | # include <sys/select.h> |
77 | #endif |
78 | |
79 | #ifndef AO_HAVE_double_t |
80 | # include "atomic_ops/sysdeps/standard_ao_double_t.h" |
81 | #endif |
82 | |
83 | /* Lock for pthreads-based implementation. */ |
84 | #ifndef AO_NO_PTHREADS |
85 | pthread_mutex_t AO_pt_lock = PTHREAD_MUTEX_INITIALIZER; |
86 | #endif |
87 | |
88 | /* |
89 | * Out of line compare-and-swap emulation based on test and set. |
90 | * |
91 | * We use a small table of locks for different compare_and_swap locations. |
92 | * Before we update perform a compare-and-swap, we grab the corresponding |
93 | * lock. Different locations may hash to the same lock, but since we |
94 | * never acquire more than one lock at a time, this can't deadlock. |
95 | * We explicitly disable signals while we perform this operation. |
96 | * |
97 | * TODO: Probably also support emulation based on Lamport |
98 | * locks, since we may not have test_and_set either. |
99 | */ |
100 | #define AO_HASH_SIZE 16 |
101 | |
102 | #define AO_HASH(x) (((unsigned long)(x) >> 12) & (AO_HASH_SIZE-1)) |
103 | |
104 | static AO_TS_t AO_locks[AO_HASH_SIZE] = { |
105 | AO_TS_INITIALIZER, AO_TS_INITIALIZER, AO_TS_INITIALIZER, AO_TS_INITIALIZER, |
106 | AO_TS_INITIALIZER, AO_TS_INITIALIZER, AO_TS_INITIALIZER, AO_TS_INITIALIZER, |
107 | AO_TS_INITIALIZER, AO_TS_INITIALIZER, AO_TS_INITIALIZER, AO_TS_INITIALIZER, |
108 | AO_TS_INITIALIZER, AO_TS_INITIALIZER, AO_TS_INITIALIZER, AO_TS_INITIALIZER, |
109 | }; |
110 | |
111 | void AO_pause(int); /* defined below */ |
112 | |
113 | static void lock_ool(volatile AO_TS_t *l) |
114 | { |
115 | int i = 0; |
116 | |
117 | while (AO_test_and_set_acquire(l) == AO_TS_SET) |
118 | AO_pause(++i); |
119 | } |
120 | |
121 | AO_INLINE void lock(volatile AO_TS_t *l) |
122 | { |
123 | if (AO_EXPECT_FALSE(AO_test_and_set_acquire(l) == AO_TS_SET)) |
124 | lock_ool(l); |
125 | } |
126 | |
127 | AO_INLINE void unlock(volatile AO_TS_t *l) |
128 | { |
129 | AO_CLEAR(l); |
130 | } |
131 | |
132 | #ifndef AO_USE_NO_SIGNALS |
133 | static sigset_t all_sigs; |
134 | static volatile AO_t initialized = 0; |
135 | static volatile AO_TS_t init_lock = AO_TS_INITIALIZER; |
136 | |
137 | AO_INLINE void block_all_signals(sigset_t *old_sigs_ptr) |
138 | { |
139 | if (AO_EXPECT_FALSE(!AO_load_acquire(&initialized))) |
140 | { |
141 | lock(&init_lock); |
142 | if (!initialized) |
143 | sigfillset(&all_sigs); |
144 | unlock(&init_lock); |
145 | AO_store_release(&initialized, 1); |
146 | } |
147 | sigprocmask(SIG_BLOCK, &all_sigs, old_sigs_ptr); |
148 | /* Neither sigprocmask nor pthread_sigmask is 100% */ |
149 | /* guaranteed to work here. Sigprocmask is not */ |
150 | /* guaranteed be thread safe, and pthread_sigmask */ |
151 | /* is not async-signal-safe. Under linuxthreads, */ |
152 | /* sigprocmask may block some pthreads-internal */ |
153 | /* signals. So long as we do that for short periods, */ |
154 | /* we should be OK. */ |
155 | } |
156 | #endif /* !AO_USE_NO_SIGNALS */ |
157 | |
158 | AO_t AO_fetch_compare_and_swap_emulation(volatile AO_t *addr, AO_t old_val, |
159 | AO_t new_val) |
160 | { |
161 | AO_TS_t *my_lock = AO_locks + AO_HASH(addr); |
162 | AO_t fetched_val; |
163 | |
164 | # ifndef AO_USE_NO_SIGNALS |
165 | sigset_t old_sigs; |
166 | block_all_signals(&old_sigs); |
167 | # endif |
168 | lock(my_lock); |
169 | fetched_val = *addr; |
170 | if (fetched_val == old_val) |
171 | *addr = new_val; |
172 | unlock(my_lock); |
173 | # ifndef AO_USE_NO_SIGNALS |
174 | sigprocmask(SIG_SETMASK, &old_sigs, NULL); |
175 | # endif |
176 | return fetched_val; |
177 | } |
178 | |
179 | int AO_compare_double_and_swap_double_emulation(volatile AO_double_t *addr, |
180 | AO_t old_val1, AO_t old_val2, |
181 | AO_t new_val1, AO_t new_val2) |
182 | { |
183 | AO_TS_t *my_lock = AO_locks + AO_HASH(addr); |
184 | int result; |
185 | |
186 | # ifndef AO_USE_NO_SIGNALS |
187 | sigset_t old_sigs; |
188 | block_all_signals(&old_sigs); |
189 | # endif |
190 | lock(my_lock); |
191 | if (addr -> AO_val1 == old_val1 && addr -> AO_val2 == old_val2) |
192 | { |
193 | addr -> AO_val1 = new_val1; |
194 | addr -> AO_val2 = new_val2; |
195 | result = 1; |
196 | } |
197 | else |
198 | result = 0; |
199 | unlock(my_lock); |
200 | # ifndef AO_USE_NO_SIGNALS |
201 | sigprocmask(SIG_SETMASK, &old_sigs, NULL); |
202 | # endif |
203 | return result; |
204 | } |
205 | |
206 | void AO_store_full_emulation(volatile AO_t *addr, AO_t val) |
207 | { |
208 | AO_TS_t *my_lock = AO_locks + AO_HASH(addr); |
209 | lock(my_lock); |
210 | *addr = val; |
211 | unlock(my_lock); |
212 | } |
213 | |
214 | #else /* Non-posix platform */ |
215 | |
216 | # include <windows.h> |
217 | |
218 | # define AO_USE_WIN32_PTHREADS |
219 | /* define to use Sleep() */ |
220 | |
221 | extern int AO_non_posix_implementation_is_entirely_in_headers; |
222 | |
223 | #endif |
224 | |
225 | static AO_t spin_dummy = 1; |
226 | |
227 | /* Spin for 2**n units. */ |
228 | static void AO_spin(int n) |
229 | { |
230 | AO_t j = AO_load(&spin_dummy); |
231 | int i = 2 << n; |
232 | |
233 | while (i-- > 0) |
234 | j += (j - 1) << 2; |
235 | /* Given 'spin_dummy' is initialized to 1, j is 1 after the loop. */ |
236 | AO_store(&spin_dummy, j); |
237 | } |
238 | |
239 | void AO_pause(int n) |
240 | { |
241 | if (n < 12) |
242 | AO_spin(n); |
243 | else |
244 | { |
245 | # ifdef AO_USE_NANOSLEEP |
246 | struct timespec ts; |
247 | ts.tv_sec = 0; |
248 | ts.tv_nsec = n > 28 ? 100000L * 1000 : 1L << (n - 2); |
249 | nanosleep(&ts, 0); |
250 | # elif defined(AO_USE_WIN32_PTHREADS) |
251 | Sleep(n > 28 ? 100 /* millis */ |
252 | : n < 22 ? 1 : (DWORD)1 << (n - 22)); |
253 | # else |
254 | struct timeval tv; |
255 | /* Short async-signal-safe sleep. */ |
256 | int usec = n > 28 ? 100000 : 1 << (n - 12); |
257 | /* Use an intermediate variable (of int type) to avoid */ |
258 | /* "shift followed by widening conversion" warning. */ |
259 | |
260 | tv.tv_sec = 0; |
261 | tv.tv_usec = usec; |
262 | (void)select(0, 0, 0, 0, &tv); |
263 | # endif |
264 | } |
265 | } |