JODA  0.13.1 (59b41972)
JSON On-Demand Analysis
atomicops.h
Go to the documentation of this file.
1 // ©2013-2016 Cameron Desrochers.
2 // Distributed under the simplified BSD license (see the license file that
3 // should have come with this header).
4 // Uses Jeff Preshing's semaphore implementation (under the terms of its
5 // separate zlib license, embedded below).
6 
7 #pragma once
8 
9 // Provides portable (VC++2010+, Intel ICC 13, GCC 4.7+, and anything C++11
10 // compliant) implementation of low-level memory barriers, plus a few
11 // semi-portable utility macros (for inlining and alignment). Also has a basic
12 // atomic type (limited to hardware-supported atomics with no memory ordering
13 // guarantees). Uses the AE_* prefix for macros (historical reasons), and the
14 // "moodycamel" namespace for symbols.
15 
16 #include <cassert>
17 #include <cerrno>
18 #include <cstdint>
19 #include <ctime>
20 #include <type_traits>
21 
22 // Platform detection
23 #if defined(__INTEL_COMPILER)
24 #define AE_ICC
25 #elif defined(_MSC_VER)
26 #define AE_VCPP
27 #elif defined(__GNUC__)
28 #define AE_GCC
29 #endif
30 
31 #if defined(_M_IA64) || defined(__ia64__)
32 #define AE_ARCH_IA64
33 #elif defined(_WIN64) || defined(__amd64__) || defined(_M_X64) || \
34  defined(__x86_64__)
35 #define AE_ARCH_X64
36 #elif defined(_M_IX86) || defined(__i386__)
37 #define AE_ARCH_X86
38 #elif defined(_M_PPC) || defined(__powerpc__)
39 #define AE_ARCH_PPC
40 #else
41 #define AE_ARCH_UNKNOWN
42 #endif
43 
44 // AE_UNUSED
45 #define AE_UNUSED(x) ((void)x)
46 
47 // AE_FORCEINLINE
48 #if defined(AE_VCPP) || defined(AE_ICC)
49 #define AE_FORCEINLINE __forceinline
50 #elif defined(AE_GCC)
51 //#define AE_FORCEINLINE __attribute__((always_inline))
52 #define AE_FORCEINLINE inline
53 #else
54 #define AE_FORCEINLINE inline
55 #endif
56 
57 // AE_ALIGN
58 #if defined(AE_VCPP) || defined(AE_ICC)
59 #define AE_ALIGN(x) __declspec(align(x))
60 #elif defined(AE_GCC)
61 #define AE_ALIGN(x) __attribute__((aligned(x)))
62 #else
63 // Assume GCC compliant syntax...
64 #define AE_ALIGN(x) __attribute__((aligned(x)))
65 #endif
66 
67 // Portable atomic fences implemented below:
68 
69 namespace moodycamel {
70 
77 
78  // memory_order_sync: Forces a full sync:
79  // #LoadLoad, #LoadStore, #StoreStore, and most significantly, #StoreLoad
81 };
82 
83 } // end namespace moodycamel
84 
85 #if (defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))) || \
86  defined(AE_ICC)
87 // VS2010 and ICC13 don't support std::atomic_*_fence, implement our own fences
88 
89 #include <intrin.h>
90 
91 #if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
92 #define AeFullSync _mm_mfence
93 #define AeLiteSync _mm_mfence
94 #elif defined(AE_ARCH_IA64)
95 #define AeFullSync __mf
96 #define AeLiteSync __mf
97 #elif defined(AE_ARCH_PPC)
98 #include <ppcintrinsics.h>
99 #define AeFullSync __sync
100 #define AeLiteSync __lwsync
101 #endif
102 
103 #ifdef AE_VCPP
104 #pragma warning(push)
105 #pragma warning( \
106  disable : 4365) // Disable erroneous 'conversion from long to unsigned int,
107  // signed/unsigned mismatch' error when using `assert`
108 #ifdef __cplusplus_cli
109 #pragma managed(push, off)
110 #endif
111 #endif
112 
113 namespace moodycamel {
114 
116  switch (order) {
118  break;
120  _ReadBarrier();
121  break;
123  _WriteBarrier();
124  break;
126  _ReadWriteBarrier();
127  break;
129  _ReadWriteBarrier();
130  break;
131  default:
132  assert(false);
133  }
134 }
135 
136 // x86/x64 have a strong memory model -- all loads and stores have
137 // acquire and release semantics automatically (so only need compiler
138 // barriers for those).
139 #if defined(AE_ARCH_X86) || defined(AE_ARCH_X64)
140 AE_FORCEINLINE void fence(memory_order order) {
141  switch (order) {
143  break;
145  _ReadBarrier();
146  break;
148  _WriteBarrier();
149  break;
151  _ReadWriteBarrier();
152  break;
154  _ReadWriteBarrier();
155  AeFullSync();
156  _ReadWriteBarrier();
157  break;
158  default:
159  assert(false);
160  }
161 }
162 #else
163 AE_FORCEINLINE void fence(memory_order order) {
164  // Non-specialized arch, use heavier memory barriers everywhere just in case
165  // :-(
166  switch (order) {
168  break;
170  _ReadBarrier();
171  AeLiteSync();
172  _ReadBarrier();
173  break;
175  _WriteBarrier();
176  AeLiteSync();
177  _WriteBarrier();
178  break;
180  _ReadWriteBarrier();
181  AeLiteSync();
182  _ReadWriteBarrier();
183  break;
185  _ReadWriteBarrier();
186  AeFullSync();
187  _ReadWriteBarrier();
188  break;
189  default:
190  assert(false);
191  }
192 }
193 #endif
194 } // end namespace moodycamel
195 #else
196 // Use standard library of atomics
197 #include <atomic>
198 
199 namespace moodycamel {
200 
202  switch (order) {
204  break;
206  std::atomic_signal_fence(std::memory_order_acquire);
207  break;
209  std::atomic_signal_fence(std::memory_order_release);
210  break;
212  std::atomic_signal_fence(std::memory_order_acq_rel);
213  break;
215  std::atomic_signal_fence(std::memory_order_seq_cst);
216  break;
217  default:
218  assert(false);
219  }
220 }
221 
223  switch (order) {
225  break;
227  std::atomic_thread_fence(std::memory_order_acquire);
228  break;
230  std::atomic_thread_fence(std::memory_order_release);
231  break;
233  std::atomic_thread_fence(std::memory_order_acq_rel);
234  break;
236  std::atomic_thread_fence(std::memory_order_seq_cst);
237  break;
238  default:
239  assert(false);
240  }
241 }
242 
243 } // end namespace moodycamel
244 
245 #endif
246 
247 #if !defined(AE_VCPP) || (_MSC_VER >= 1700 && !defined(__cplusplus_cli))
248 #define AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
249 #endif
250 
251 #ifdef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
252 #include <atomic>
253 #endif
254 #include <utility>
255 
256 // WARNING: *NOT* A REPLACEMENT FOR std::atomic. READ CAREFULLY:
257 // Provides basic support for atomic variables -- no memory ordering guarantees
258 // are provided. The guarantee of atomicity is only made for types that already
259 // have atomic loadVar and storeVar guarantees at the hardware level -- on most
260 // platforms this generally means aligned pointers and integers (only).
261 namespace moodycamel {
262 template <typename T>
263 class weak_atomic {
264  public:
266 #ifdef AE_VCPP
267 #pragma warning(push)
268 #pragma warning(disable : 4100) // Get rid of (erroneous) 'unreferenced formal
269  // parameter' warning
270 #endif
271  template <typename U>
272  weak_atomic(U&& x) : value(std::forward<U>(x)) {}
273 #ifdef __cplusplus_cli
274  // Work around bug with universal reference/nullptr combination that only
275  // appears when /clr is on
276  weak_atomic(nullptr_t) : value(nullptr) {}
277 #endif
278  weak_atomic(weak_atomic const& other) : value(other.value) {}
279  weak_atomic(weak_atomic&& other) : value(std::move(other.value)) {}
280 #ifdef AE_VCPP
281 #pragma warning(pop)
282 #endif
283 
284  AE_FORCEINLINE operator T() const { return load(); }
285 
286 #ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
287  template <typename U>
288  AE_FORCEINLINE weak_atomic const& operator=(U&& x) {
289  value = std::forward<U>(x);
290  return *this;
291  }
292  AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) {
293  value = other.value;
294  return *this;
295  }
296 
297  AE_FORCEINLINE T loadVar() const { return value; }
298 
299  AE_FORCEINLINE T fetch_add_acquire(T increment) {
300 #if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
301  if (sizeof(T) == 4)
302  return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
303 #if defined(_M_AMD64)
304  else if (sizeof(T) == 8)
305  return _InterlockedExchangeAdd64((long long volatile*)&value,
306  (long long)increment);
307 #endif
308 #else
309 #error Unsupported platform
310 #endif
311  assert(false && "T must be either a 32 or 64 bit type");
312  return value;
313  }
314 
315  AE_FORCEINLINE T fetch_add_release(T increment) {
316 #if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
317  if (sizeof(T) == 4)
318  return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
319 #if defined(_M_AMD64)
320  else if (sizeof(T) == 8)
321  return _InterlockedExchangeAdd64((long long volatile*)&value,
322  (long long)increment);
323 #endif
324 #else
325 #error Unsupported platform
326 #endif
327  assert(false && "T must be either a 32 or 64 bit type");
328  return value;
329  }
330 #else
331  template <typename U>
333  value.store(std::forward<U>(x), std::memory_order_relaxed);
334  return *this;
335  }
336 
338  value.store(other.value.load(std::memory_order_relaxed),
340  return *this;
341  }
342 
343  AE_FORCEINLINE T load() const {
344  return value.load(std::memory_order_relaxed);
345  }
346 
348  return value.fetch_add(increment, std::memory_order_acquire);
349  }
350 
352  return value.fetch_add(increment, std::memory_order_release);
353  }
354 #endif
355 
356  private:
357 #ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
358  // No std::atomic support, but still need to circumvent compiler
359  // optimizations. `volatile` will make memory access slow, but is guaranteed
360  // to be reliable.
361  volatile T value;
362 #else
363  std::atomic<T> value;
364 #endif
365 };
366 
367 } // end namespace moodycamel
368 
369 // Portable single-producer, single-consumer semaphore below:
370 
371 #if defined(_WIN32)
372 // Avoid including windows.h in a header; we only need a handful of
373 // items, so we'll redeclare them here (this is relatively safe since
374 // the API generally has to remain stable between Windows versions).
375 // I know this is an ugly hack but it still beats polluting the global
376 // namespace with thousands of generic names or adding a .cpp for nothing.
377 extern "C" {
378 struct _SECURITY_ATTRIBUTES;
379 __declspec(dllimport) void* __stdcall CreateSemaphoreW(
380  _SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount,
381  long lMaximumCount, const wchar_t* lpName);
382 __declspec(dllimport) int __stdcall CloseHandle(void* hObject);
383 __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(
384  void* hHandle, unsigned long dwMilliseconds);
385 __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore,
386  long lReleaseCount,
387  long* lpPreviousCount);
388 }
389 #elif defined(__MACH__)
390 #include <mach/mach.h>
391 #elif defined(__unix__)
392 #include <semaphore.h>
393 #endif
394 
395 namespace moodycamel {
396 // Code in the spsc_sema namespace below is an adaptation of Jeff Preshing's
397 // portable + lightweight semaphore implementations, originally from
398 // https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
399 // LICENSE:
400 // Copyright (c) 2015 Jeff Preshing
401 //
402 // This software is provided 'as-is', without any express or implied
403 // warranty. In no event will the authors be held liable for any damages
404 // arising from the use of this software.
405 //
406 // Permission is granted to anyone to use this software for any purpose,
407 // including commercial applications, and to alter it and redistribute it
408 // freely, subject to the following restrictions:
409 //
410 // 1. The origin of this software must not be misrepresented; you must not
411 // claim that you wrote the original software. If you use this software
412 // in a product, an acknowledgement in the product documentation would be
413 // appreciated but is not required.
414 // 2. Altered source versions must be plainly marked as such, and must not be
415 // misrepresented as being the original software.
416 // 3. This notice may not be removed or altered from any source distribution.
417 namespace spsc_sema {
418 #if defined(_WIN32)
419 class Semaphore {
420  private:
421  void* m_hSema;
422 
423  Semaphore(const Semaphore& other);
424  Semaphore& operator=(const Semaphore& other);
425 
426  public:
427  Semaphore(int initialCount = 0) {
428  assert(initialCount >= 0);
429  const long maxLong = 0x7fffffff;
430  m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
431  }
432 
433  ~Semaphore() { CloseHandle(m_hSema); }
434 
435  void wait() {
436  const unsigned long infinite = 0xffffffff;
437  WaitForSingleObject(m_hSema, infinite);
438  }
439 
440  bool try_wait() {
441  const unsigned long RC_WAIT_TIMEOUT = 0x00000102;
442  return WaitForSingleObject(m_hSema, 0) != RC_WAIT_TIMEOUT;
443  }
444 
445  bool timed_wait(std::uint64_t usecs) {
446  const unsigned long RC_WAIT_TIMEOUT = 0x00000102;
447  return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) !=
448  RC_WAIT_TIMEOUT;
449  }
450 
451  void signal(int count = 1) { ReleaseSemaphore(m_hSema, count, nullptr); }
452 };
453 #elif defined(__MACH__)
454 //---------------------------------------------------------
455 // Semaphore (Apple iOS and OSX)
456 // Can't use POSIX semaphores due to
457 // http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
458 //---------------------------------------------------------
459 class Semaphore {
460  private:
461  semaphore_t m_sema;
462 
463  Semaphore(const Semaphore& other);
464  Semaphore& operator=(const Semaphore& other);
465 
466  public:
467  Semaphore(int initialCount = 0) {
468  assert(initialCount >= 0);
469  semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
470  }
471 
472  ~Semaphore() { semaphore_destroy(mach_task_self(), m_sema); }
473 
474  void wait() { semaphore_wait(m_sema); }
475 
476  bool try_wait() { return timed_wait(0); }
477 
478  bool timed_wait(std::int64_t timeout_usecs) {
479  mach_timespec_t ts;
480  ts.tv_sec = timeout_usecs / 1000000;
481  ts.tv_nsec = (timeout_usecs % 1000000) * 1000;
482 
483  // added in OSX 10.10:
484  // https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
485  kern_return_t rc = semaphore_timedwait(m_sema, ts);
486 
487  return rc != KERN_OPERATION_TIMED_OUT;
488  }
489 
490  void signal() { semaphore_signal(m_sema); }
491 
492  void signal(int count) {
493  while (count-- > 0) {
494  semaphore_signal(m_sema);
495  }
496  }
497 };
498 #elif defined(__unix__)
499 //---------------------------------------------------------
500 // Semaphore (POSIX, Linux)
501 //---------------------------------------------------------
502 class Semaphore {
503  private:
504  sem_t m_sema;
505 
506  Semaphore(const Semaphore& other);
507  Semaphore& operator=(const Semaphore& other);
508 
509  public:
510  Semaphore(int initialCount = 0) {
511  assert(initialCount >= 0);
512  sem_init(&m_sema, 0, initialCount);
513  }
514 
515  ~Semaphore() { sem_destroy(&m_sema); }
516 
517  void wait() {
518  // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
519  int rc;
520  do {
521  rc = sem_wait(&m_sema);
522  } while (rc == -1 && errno == EINTR);
523  }
524 
525  bool try_wait() {
526  int rc;
527  do {
528  rc = sem_trywait(&m_sema);
529  } while (rc == -1 && errno == EINTR);
530  return !(rc == -1 && errno == EAGAIN);
531  }
532 
533  bool timed_wait(std::uint64_t usecs) {
534  struct timespec ts;
535  const int usecs_in_1_sec = 1000000;
536  const int nsecs_in_1_sec = 1000000000;
537  clock_gettime(CLOCK_REALTIME, &ts);
538  ts.tv_sec += usecs / usecs_in_1_sec;
539  ts.tv_nsec += (usecs % usecs_in_1_sec) * 1000;
540  // sem_timedwait bombs if you have more than 1e9 in tv_nsec
541  // so we have to clean things up before passing it in
542  if (ts.tv_nsec > nsecs_in_1_sec) {
543  ts.tv_nsec -= nsecs_in_1_sec;
544  ++ts.tv_sec;
545  }
546 
547  int rc;
548  do {
549  rc = sem_timedwait(&m_sema, &ts);
550  } while (rc == -1 && errno == EINTR);
551  return !(rc == -1 && errno == ETIMEDOUT);
552  }
553 
554  void signal() { sem_post(&m_sema); }
555 
556  void signal(int count) {
557  while (count-- > 0) {
558  sem_post(&m_sema);
559  }
560  }
561 };
562 #else
563 #error Unsupported platform! (No semaphore wrapper available)
564 #endif
565 
566 //---------------------------------------------------------
567 // LightweightSemaphore
568 //---------------------------------------------------------
570  public:
571  typedef std::make_signed<std::size_t>::type ssize_t;
572 
573  private:
574  weak_atomic<ssize_t> m_count;
575  Semaphore m_sema;
576 
577  bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1) {
578  ssize_t oldCount;
579  // Is there a better way to set the initial spin count?
580  // If we lower it to 1000, testBenaphore becomes 15x slower on my Core
581  // i7-5930K Windows PC, as threads start hitting the kernel semaphore.
582  int spin = 10000;
583  while (--spin >= 0) {
584  if (m_count.load() > 0) {
585  m_count.fetch_add_acquire(-1);
586  return true;
587  }
588  compiler_fence(memory_order_acquire); // Prevent the compiler from
589  // collapsing the loop.
590  }
591  oldCount = m_count.fetch_add_acquire(-1);
592  if (oldCount > 0) return true;
593  if (timeout_usecs < 0) {
594  m_sema.wait();
595  return true;
596  }
597  if (m_sema.timed_wait(timeout_usecs)) return true;
598  // At this point, we've timed out waiting for the semaphore, but the
599  // count is still decremented indicating we may still be waiting on
600  // it. So we have to re-adjust the count, but only if the semaphore
601  // wasn't signaled enough times for us too since then. If it was, we
602  // need to release the semaphore too.
603  while (true) {
604  oldCount = m_count.fetch_add_release(1);
605  if (oldCount < 0)
606  return false; // successfully restored things to the way they were
607  // Oh, the producer thread just signaled the semaphore after all. Try
608  // again:
609  oldCount = m_count.fetch_add_acquire(-1);
610  if (oldCount > 0 && m_sema.try_wait()) return true;
611  }
612  }
613 
614  public:
615  LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount) {
616  assert(initialCount >= 0);
617  }
618 
619  bool tryWait() {
620  if (m_count.load() > 0) {
621  m_count.fetch_add_acquire(-1);
622  return true;
623  }
624  return false;
625  }
626 
627  void wait() {
628  if (!tryWait()) waitWithPartialSpinning();
629  }
630 
631  bool wait(std::int64_t timeout_usecs) {
632  return tryWait() || waitWithPartialSpinning(timeout_usecs);
633  }
634 
635  void signal(ssize_t count = 1) {
636  assert(count >= 0);
637  ssize_t oldCount = m_count.fetch_add_release(count);
638  assert(oldCount >= -1);
639  if (oldCount < 0) {
640  m_sema.signal(1);
641  }
642  }
643 
645  ssize_t count = m_count.load();
646  return count > 0 ? count : 0;
647  }
648 };
649 } // end namespace spsc_sema
650 } // end namespace moodycamel
651 
652 #if defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))
653 #pragma warning(pop)
654 #ifdef __cplusplus_cli
655 #pragma managed(pop)
656 #endif
657 #endif
#define AE_FORCEINLINE
Definition: atomicops.h:54
bool tryWait()
Definition: atomicops.h:619
std::make_signed< std::size_t >::type ssize_t
Definition: atomicops.h:571
void wait()
Definition: atomicops.h:627
ssize_t availableApprox() const
Definition: atomicops.h:644
bool wait(std::int64_t timeout_usecs)
Definition: atomicops.h:631
LightweightSemaphore(ssize_t initialCount=0)
Definition: atomicops.h:615
void signal(ssize_t count=1)
Definition: atomicops.h:635
Definition: atomicops.h:263
AE_FORCEINLINE T fetch_add_release(T increment)
Definition: atomicops.h:351
AE_FORCEINLINE T load() const
Definition: atomicops.h:343
AE_FORCEINLINE weak_atomic const & operator=(weak_atomic const &other)
Definition: atomicops.h:337
AE_FORCEINLINE T fetch_add_acquire(T increment)
Definition: atomicops.h:347
weak_atomic(weak_atomic &&other)
Definition: atomicops.h:279
weak_atomic()
Definition: atomicops.h:265
AE_FORCEINLINE weak_atomic const & operator=(U &&x)
Definition: atomicops.h:332
weak_atomic(weak_atomic const &other)
Definition: atomicops.h:278
weak_atomic(U &&x)
Definition: atomicops.h:272
Definition: atomicops.h:69
memory_order
Definition: atomicops.h:71
@ memory_order_acq_rel
Definition: atomicops.h:75
@ memory_order_seq_cst
Definition: atomicops.h:76
@ memory_order_acquire
Definition: atomicops.h:73
@ memory_order_relaxed
Definition: atomicops.h:72
@ memory_order_sync
Definition: atomicops.h:80
@ memory_order_release
Definition: atomicops.h:74
AE_FORCEINLINE void fence(memory_order order)
Definition: atomicops.h:222
AE_FORCEINLINE void compiler_fence(memory_order order)
Definition: atomicops.h:201