da/da5/concurrentqueue_8h_source.html

 // Provides a C++11 implementation of a multi-producer, multi-consumer lock-free

 // queue. An overview, including benchmark results, is provided here:

 //     http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++

 // The full design is also described in excruciating detail at:

 //    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue


 // Simplified BSD license:

 // Copyright (c) 2013-2016, Cameron Desrochers.

 // All rights reserved.

 //

 // Redistribution and use in source and binary forms, with or without

 // modification, are permitted provided that the following conditions are met:

 //

 // - Redistributions of source code must retain the above copyright notice, this

 // list of conditions and the following disclaimer.

 // - Redistributions in binary form must reproduce the above copyright notice,

 // this list of conditions and the following disclaimer in the documentation

 // and/or other materials provided with the distribution.

 //

 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 // POSSIBILITY OF SUCH DAMAGE.


 #pragma once


 #if defined(__GNUC__)

 // Disable -Wconversion warnings (spuriously triggered when Traits::size_t and

 // Traits::index_t are set to < 32 bits, causing integer promotion, causing

 // warnings upon assigning any computed values)

 #pragma GCC diagnostic push

 #pragma GCC diagnostic ignored "-Wconversion"


 #ifdef MCDBGQ_USE_RELACY

 #pragma GCC diagnostic ignored "-Wint-to-pointer-cast"

 #endif

 #endif


 #if defined(__APPLE__)

 #include "TargetConditionals.h"

 #endif


 #ifdef MCDBGQ_USE_RELACY

 #include "relacy/relacy_std.hpp"

 #include "relacy_shims.h"

 // We only use malloc/free anyway, and the delete macro messes up `= delete`

 // method declarations. We'll override the default trait malloc ourselves

 // without a macro.

 #undef new

 #undef delete

 #undef malloc

 #undef free

 #else

 #include <atomic>  // Requires C++11. Sorry VS2010.

 #include <cassert>

 #endif

 #include <algorithm>

 #include <array>

 #include <climits>  // for CHAR_BIT

 #include <cstddef>  // for max_align_t

 #include <cstdint>

 #include <cstdlib>

 #include <limits>

 #include <thread>  // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading

 #include <type_traits>

 #include <utility>


 // Platform-specific definitions of a numeric thread ID type and an invalid

 // value

 namespace moodycamel {

 namespace details {

 template <typename thread_id_t>

 struct thread_id_converter {

   typedef thread_id_t thread_id_numeric_size_t;

   typedef thread_id_t thread_id_hash_t;


   static thread_id_hash_t prehash(thread_id_t const &x) { return x; }

 };

 }  // namespace details

 }  // namespace moodycamel

 #if defined(MCDBGQ_USE_RELACY)

 namespace moodycamel {

 namespace details {

 typedef std::uint32_t thread_id_t;

 static const thread_id_t invalid_thread_id = 0xFFFFFFFFU;

 static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU;

 static inline thread_id_t thread_id() { return rl::thread_index(); }

 }  // namespace details

 }  // namespace moodycamel

 #elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)

 // No sense pulling in windows.h in a header, we'll manually declare the

 // function we use and rely on backwards-compatibility for this not to break

 extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(

     void);

 namespace moodycamel {

 namespace details {

 static_assert(sizeof(unsigned long) == sizeof(std::uint32_t),

               "Expected size of unsigned long to be 32 bits on Windows");

 typedef std::uint32_t thread_id_t;

 static const thread_id_t invalid_thread_id =

     0;  // See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx

 static const thread_id_t invalid_thread_id2 =

     0xFFFFFFFFU;  // Not technically guaranteed to be invalid, but is never used

                   // in practice. Note that all Win32 thread IDs are presently

                   // multiples of 4.

 static inline thread_id_t thread_id() {

   return static_cast<thread_id_t>(::GetCurrentThreadId());

 }

 }  // namespace details

 }  // namespace moodycamel

 #elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \

     (defined(__APPLE__) && TARGET_OS_IPHONE)

 namespace moodycamel {

 namespace details {

 static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8,

               "std::thread::id is expected to be either 4 or 8 bytes");


 typedef std::thread::id thread_id_t;

 static const thread_id_t invalid_thread_id;  // Default ctor creates invalid ID


 // Note we don't define a invalid_thread_id2 since std::thread::id doesn't have

 // one; it's only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined

 // anyway, which it won't be.

 static inline thread_id_t thread_id() { return std::this_thread::get_id(); }


 template <std::size_t>

 struct thread_id_size {};

 template <>

 struct thread_id_size<4> {

   typedef std::uint32_t numeric_t;

 };

 template <>

 struct thread_id_size<8> {

   typedef std::uint64_t numeric_t;

 };


 template <>

 struct thread_id_converter<thread_id_t> {

   typedef thread_id_size<sizeof(thread_id_t)>::numeric_t

       thread_id_numeric_size_t;

 #ifndef __APPLE__

   typedef std::size_t thread_id_hash_t;

 #else

   typedef thread_id_numeric_size_t thread_id_hash_t;

 #endif


   static thread_id_hash_t prehash(thread_id_t const &x) {

 #ifndef __APPLE__

     return std::hash<std::thread::id>()(x);

 #else

     return *reinterpret_cast<thread_id_hash_t const *>(&x);

 #endif

   }

 };

 }  // namespace details

 }  // namespace moodycamel

 #else

 // Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475

 // In order to get a numeric thread ID in a platform-independent way, we use a

 // thread-local static variable's address as a thread identifier :-)

 #if defined(__GNUC__) || defined(__INTEL_COMPILER)

 #define MOODYCAMEL_THREADLOCAL __thread

 #elif defined(_MSC_VER)

 #define MOODYCAMEL_THREADLOCAL __declspec(thread)

 #else

 // Assume C++11 compliant compiler

 #define MOODYCAMEL_THREADLOCAL thread_local

 #endif

 namespace moodycamel {

 namespace details {

 typedef std::uintptr_t thread_id_t;

 static const thread_id_t invalid_thread_id = 0;  // Address can't be nullptr

 static const thread_id_t invalid_thread_id2 =

     1;  // Member accesses off a null pointer are also generally invalid. Plus

         // it's not aligned.

 static inline thread_id_t thread_id() {

   static MOODYCAMEL_THREADLOCAL int x;

   return reinterpret_cast<thread_id_t>(&x);

 }

 }  // namespace details

 }  // namespace moodycamel

 #endif


 // Exceptions

 #ifndef MOODYCAMEL_EXCEPTIONS_ENABLED

 #if (defined(_MSC_VER) && defined(_CPPUNWIND)) ||   \

     (defined(__GNUC__) && defined(__EXCEPTIONS)) || \

     (!defined(_MSC_VER) && !defined(__GNUC__))

 #define MOODYCAMEL_EXCEPTIONS_ENABLED

 #endif

 #endif

 #ifdef MOODYCAMEL_EXCEPTIONS_ENABLED

 #define MOODYCAMEL_TRY try

 #define MOODYCAMEL_CATCH(...) catch (__VA_ARGS__)

 #define MOODYCAMEL_RETHROW throw

 #define MOODYCAMEL_THROW(expr) throw(expr)

 #else

 #define MOODYCAMEL_TRY if (true)

 #define MOODYCAMEL_CATCH(...) else if (false)

 #define MOODYCAMEL_RETHROW

 #define MOODYCAMEL_THROW(expr)

 #endif


 #ifndef MOODYCAMEL_NOEXCEPT

 #if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)

 #define MOODYCAMEL_NOEXCEPT

 #define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true

 #define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true

 #elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800

 // VS2012's std::is_nothrow_[move_]constructible is broken and returns true when

 // it shouldn't :-( We have to assume *all* non-trivial constructors may throw

 // on VS2012!

 #define MOODYCAMEL_NOEXCEPT _NOEXCEPT

 #define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)    \

   (std::is_rvalue_reference<valueType>::value &&           \

            std::is_move_constructible<type>::value         \

        ? std::is_trivially_move_constructible<type>::value \

        : std::is_trivially_copy_constructible<type>::value)

 #define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr)      \

   ((std::is_rvalue_reference<valueType>::value &&              \

             std::is_move_assignable<type>::value               \

         ? std::is_trivially_move_assignable<type>::value ||    \

               std::is_nothrow_move_assignable<type>::value     \

         : std::is_trivially_copy_assignable<type>::value ||    \

               std::is_nothrow_copy_assignable<type>::value) && \

    MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))

 #elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900

 #define MOODYCAMEL_NOEXCEPT _NOEXCEPT

 #define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)       \

   (std::is_rvalue_reference<valueType>::value &&              \

            std::is_move_constructible<type>::value            \

        ? std::is_trivially_move_constructible<type>::value || \

              std::is_nothrow_move_constructible<type>::value  \

        : std::is_trivially_copy_constructible<type>::value || \

              std::is_nothrow_copy_constructible<type>::value)

 #define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr)      \

   ((std::is_rvalue_reference<valueType>::value &&              \

             std::is_move_assignable<type>::value               \

         ? std::is_trivially_move_assignable<type>::value ||    \

               std::is_nothrow_move_assignable<type>::value     \

         : std::is_trivially_copy_assignable<type>::value ||    \

               std::is_nothrow_copy_assignable<type>::value) && \

    MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))

 #else

 #define MOODYCAMEL_NOEXCEPT noexcept

 #define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr)

 #define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr)

 #endif

 #endif


 #ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

 #ifdef MCDBGQ_USE_RELACY

 #define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

 #else

 // VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a

 // crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445 g++ <=4.7 doesn't

 // support thread_local either. Finally, iOS/ARM doesn't have support for it

 // either, and g++/ARM allows it to compile but it's unconfirmed to actually

 // work

 #if (!defined(_MSC_VER) || _MSC_VER >= 1900) &&                        \

     (!defined(__MINGW32__) && !defined(__MINGW64__) ||                 \

      !defined(__WINPTHREADS_VERSION)) &&                               \

     (!defined(__GNUC__) || __GNUC__ > 4 ||                             \

      (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) &&                        \

     (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && \

     !defined(_M_ARM) && !defined(__aarch64__)

 // Assume `thread_local` is fully supported in all other C++11

 // compilers/platforms

 //#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED    // always disabled for now

 // since several users report having problems with it on

 #endif

 #endif

 #endif


 // VS2012 doesn't support deleted functions.

 // In this case, we declare the function normally but don't define it. A link

 // error will be generated if the function is called.

 #ifndef MOODYCAMEL_DELETE_FUNCTION

 #if defined(_MSC_VER) && _MSC_VER < 1800

 #define MOODYCAMEL_DELETE_FUNCTION

 #else

 #define MOODYCAMEL_DELETE_FUNCTION = delete

 #endif

 #endif


 // Compiler-specific likely/unlikely hints

 namespace moodycamel {

 namespace details {

 #if defined(__GNUC__)


 static inline bool(likely)(bool x) { return __builtin_expect((x), true); }


 static inline bool(unlikely)(bool x) { return __builtin_expect((x), false); }

 #else

 static inline bool(likely)(bool x) { return x; }

 static inline bool(unlikely)(bool x) { return x; }

 #endif

 }  // namespace details

 }  // namespace moodycamel


 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

 #include "internal/concurrentqueue_internal_debug.h"

 #endif


 namespace moodycamel {

 namespace details {

 template <typename T>

 struct const_numeric_max {

   static_assert(std::is_integral<T>::value,

                 "const_numeric_max can only be used with integers");

   static const T value =

       std::numeric_limits<T>::is_signed

           ? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) -

                 static_cast<T>(1)

           : static_cast<T>(-1);

 };


 #if defined(__GLIBCXX__)

 typedef ::max_align_t

     std_max_align_t;  // libstdc++ forgot to add it to std:: for a while

 #else

 typedef std::max_align_t std_max_align_t;  // Others (e.g. MSVC) insist it can

                                            // *only* be accessed via std::

 #endif


 // Some platforms have incorrectly set max_align_t to a type with <8 bytes

 // alignment even while supporting 8-byte aligned scalar values (*cough* 32-bit

 // iOS). Work around this with our own union. See issue #64.

 typedef union {

   std_max_align_t x;

   long long y;

   void *z;

 } max_align_t;

 }  // namespace details


 // Default traits for the ConcurrentQueue. To change some of the

 // traits without re-implementing all of them, inherit from this

 // struct and shadow the declarations you wish to be different;

 // since the traits are used as a template type parameter, the

 // shadowed declarations will be used where defined, and the defaults

 // otherwise.

 struct ConcurrentQueueDefaultTraits {

   // General-purpose size type. std::size_t is strongly recommended.

   typedef std::size_t size_t;


   // The type used for the enqueue and dequeue indices. Must be at least as

   // large as size_t. Should be significantly larger than the number of elements

   // you expect to hold at once, especially if you have a high turnover rate;

   // for example, on 32-bit x86, if you expect to have over a hundred million

   // elements or pump several million elements through your queue in a very

   // short space of time, using a 32-bit type *may* trigger a race condition.

   // A 64-bit int type is recommended in that case, and in practice will

   // prevent a race condition no matter the usage of the queue. Note that

   // whether the queue is lock-free with a 64-int type depends on the whether

   // std::atomic<std::uint64_t> is lock-free, which is platform-specific.

   typedef std::size_t index_t;


   // Internally, all elements are enqueued and dequeued from multi-element

   // blocks; this is the smallest controllable unit. If you expect few elements

   // but many producers, a smaller block size should be favoured. For few

   // producers and/or many elements, a larger block size is preferred. A sane

   // default is provided. Must be a power of 2.

   static const size_t BLOCK_SIZE = 32;


   // For explicit producers (i.e. when using a producer token), the block is

   // checked for being empty by iterating through a list of flags, one per

   // element. For large block sizes, this is too inefficient, and switching to

   // an atomic counter-based approach is faster. The switch is made for block

   // sizes strictly larger than this threshold.

   static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;


   // How many full blocks can be expected for a single explicit producer? This

   // should reflect that number's maximum for optimal performance. Must be a

   // power of 2.

   static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;


   // How many full blocks can be expected for a single implicit producer? This

   // should reflect that number's maximum for optimal performance. Must be a

   // power of 2.

   static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;


   // The initial size of the hash table mapping thread IDs to implicit

   // producers. Note that the hash is resized every time it becomes half full.

   // Must be a power of two, and either 0 or at least 1. If 0, implicit

   // production (using the enqueue methods without an explicit producer token)

   // is disabled.

   static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;


   // Controls the number of items that an explicit consumer (i.e. one with a

   // token) must consume before it causes all consumers to rotate and move on to

   // the next internal queue.

   static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE =

       256;


   // The maximum number of elements (inclusive) that can be enqueued to a

   // sub-queue. Enqueue operations that would cause this limit to be surpassed

   // will fail. Note that this limit is enforced at the block level (for

   // performance reasons), i.e. it's rounded up to the nearest block size.

   static const size_t MAX_SUBQUEUE_SIZE =

       details::const_numeric_max<size_t>::value;


 #ifndef MCDBGQ_USE_RELACY

   // Memory allocation can be customized if needed.

   // malloc should return nullptr on failure, and handle alignment like

   // std::malloc.

 #if defined(malloc) || defined(free)

   // Gah, this is 2015, stop defining macros that break standard code already!

   // Work around malloc/free being special macros:

   static inline void *WORKAROUND_malloc(size_t size) { return malloc(size); }

   static inline void WORKAROUND_free(void *ptr) { return free(ptr); }

   static inline void *(malloc)(size_t size) { return WORKAROUND_malloc(size); }

   static inline void(free)(void *ptr) { return WORKAROUND_free(ptr); }

 #else


   static inline void *malloc(size_t size) { return std::malloc(size); }


   static inline void free(void *ptr) { return std::free(ptr); }

 #endif

 #else

   // Debug versions when running under the Relacy race detector (ignore

   // these in user code)

   static inline void *malloc(size_t size) { return rl::rl_malloc(size, $); }

   static inline void free(void *ptr) { return rl::rl_free(ptr, $); }

 #endif

 };


 // When producing or consuming many elements, the most efficient way is to:

 //    1) Use one of the bulk-operation methods of the queue with a token

 //    2) Failing that, use the bulk-operation methods without a token

 //    3) Failing that, create a token and use that with the single-item methods

 //    4) Failing that, use the single-parameter methods of the queue

 // Having said that, don't create tokens willy-nilly -- ideally there should be

 // a maximum of one token per thread (of each kind).

 struct ProducerToken;

 struct ConsumerToken;


 template <typename T, typename Traits>

 class ConcurrentQueue;

 template <typename T, typename Traits>

 class BlockingConcurrentQueue;

 class ConcurrentQueueTests;


 namespace details {

 struct ConcurrentQueueProducerTypelessBase {

   ConcurrentQueueProducerTypelessBase *next;

   std::atomic<bool> inactive;

   ProducerToken *token;


   ConcurrentQueueProducerTypelessBase()

       : next(nullptr), inactive(false), token(nullptr) {}

 };


 template <bool use32>

 struct _hash_32_or_64 {

   static inline std::uint32_t hash(std::uint32_t h) {

     // MurmurHash3 finalizer -- see

     // https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp

     // Since the thread ID is already unique, all we really want to do is

     // propagate that uniqueness evenly across all the bits, so that we can use

     // a subset of the bits while reducing collisions significantly

     h ^= h >> 16;

     h *= 0x85ebca6b;

     h ^= h >> 13;

     h *= 0xc2b2ae35;

     return h ^ (h >> 16);

   }

 };


 template <>

 struct _hash_32_or_64<1> {

   static inline std::uint64_t hash(std::uint64_t h) {

     h ^= h >> 33;

     h *= 0xff51afd7ed558ccd;

     h ^= h >> 33;

     h *= 0xc4ceb9fe1a85ec53;

     return h ^ (h >> 33);

   }

 };


 template <std::size_t size>

 struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> {};


 static inline size_t hash_thread_id(thread_id_t id) {

   static_assert(

       sizeof(thread_id_t) <= 8,

       "Expected a platform where thread IDs are at most 64-bit values");

   return static_cast<size_t>(

       hash_32_or_64<sizeof(

           thread_id_converter<thread_id_t>::thread_id_hash_t)>::

           hash(thread_id_converter<thread_id_t>::prehash(id)));

 }


 template <typename T>

 static inline bool circular_less_than(T a, T b) {

 #ifdef _MSC_VER

 #pragma warning(push)

 #pragma warning(disable : 4554)

 #endif

   static_assert(

       std::is_integral<T>::value && !std::numeric_limits<T>::is_signed,

       "circular_less_than is intended to be used only with unsigned integer "

       "types");

   return static_cast<T>(a - b) >

          static_cast<T>(static_cast<T>(1)

                         << static_cast<T>(sizeof(T) * CHAR_BIT - 1));

 #ifdef _MSC_VER

 #pragma warning(pop)

 #endif

 }


 template <typename U>

 static inline char *align_for(char *ptr) {

   const std::size_t alignment = std::alignment_of<U>::value;

   return ptr +

          (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) %

              alignment;

 }


 template <typename T>

 static inline T ceil_to_pow_2(T x) {

   static_assert(

       std::is_integral<T>::value && !std::numeric_limits<T>::is_signed,

       "ceil_to_pow_2 is intended to be used only with unsigned integer types");


   // Adapted from

   // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2

   --x;

   x |= x >> 1;

   x |= x >> 2;

   x |= x >> 4;

   for (std::size_t i = 1; i < sizeof(T); i <<= 1) {

     x |= x >> (i << 3);

   }

   ++x;

   return x;

 }


 template <typename T>

 static inline void swap_relaxed(std::atomic<T> &left, std::atomic<T> &right) {

   T temp = std::move(left.load(std::memory_order_relaxed));

   left.store(std::move(right.load(std::memory_order_relaxed)),

              std::memory_order_relaxed);

   right.store(std::move(temp), std::memory_order_relaxed);

 }


 template <typename T>

 static inline T const &nomove(T const &x) {

   return x;

 }


 template <bool Enable>

 struct nomove_if {

   template <typename T>

   static inline T const &eval(T const &x) {

     return x;

   }

 };


 template <>

 struct nomove_if<false> {

   template <typename U>

   static inline auto eval(U &&x) -> decltype(std::forward<U>(x)) {

     return std::forward<U>(x);

   }

 };


 template <typename It>

 static inline auto deref_noexcept(It &it) MOODYCAMEL_NOEXCEPT -> decltype(*it) {

   return *it;

 }


 #if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || \

     (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)

 template <typename T>

 struct is_trivially_destructible : std::is_trivially_destructible<T> {};

 #else

 template <typename T>

 struct is_trivially_destructible : std::has_trivial_destructor<T> {};

 #endif


 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

 #ifdef MCDBGQ_USE_RELACY

 typedef RelacyThreadExitListener ThreadExitListener;

 typedef RelacyThreadExitNotifier ThreadExitNotifier;

 #else

 struct ThreadExitListener {

   typedef void (*callback_t)(void *);

   callback_t callback;

   void *userData;


   ThreadExitListener *next;  // reserved for use by the ThreadExitNotifier

 };


 class ThreadExitNotifier {

  public:

   static void subscribe(ThreadExitListener *listener) {

     auto &tlsInst = instance();

     listener->next = tlsInst.tail;

     tlsInst.tail = listener;

   }


   static void unsubscribe(ThreadExitListener *listener) {

     auto &tlsInst = instance();

     ThreadExitListener **prev = &tlsInst.tail;

     for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) {

       if (ptr == listener) {

         *prev = ptr->next;

         break;

       }

       prev = &ptr->next;

     }

   }


  private:

   ThreadExitNotifier() : tail(nullptr) {}

   ThreadExitNotifier(ThreadExitNotifier const &) MOODYCAMEL_DELETE_FUNCTION;

   ThreadExitNotifier &operator=(ThreadExitNotifier const &)

       MOODYCAMEL_DELETE_FUNCTION;


   ~ThreadExitNotifier() {

     // This thread is about to exit, let everyone know!

     assert(this == &instance() &&

            "If this assert fails, you likely have a buggy compiler! Change the "

            "preprocessor conditions such that "

            "MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined.");

     for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) {

       ptr->callback(ptr->userData);

     }

   }


   // Thread-local

   static inline ThreadExitNotifier &instance() {

     static thread_local ThreadExitNotifier notifier;

     return notifier;

   }


  private:

   ThreadExitListener *tail;

 };

 #endif

 #endif


 template <typename T>

 struct static_is_lock_free_num {

   enum { value = 0 };

 };

 template <>

 struct static_is_lock_free_num<signed char> {

   enum { value = ATOMIC_CHAR_LOCK_FREE };

 };

 template <>

 struct static_is_lock_free_num<short> {

   enum { value = ATOMIC_SHORT_LOCK_FREE };

 };

 template <>

 struct static_is_lock_free_num<int> {

   enum { value = ATOMIC_INT_LOCK_FREE };

 };

 template <>

 struct static_is_lock_free_num<long> {

   enum { value = ATOMIC_LONG_LOCK_FREE };

 };

 template <>

 struct static_is_lock_free_num<long long> {

   enum { value = ATOMIC_LLONG_LOCK_FREE };

 };

 template <typename T>

 struct static_is_lock_free

     : static_is_lock_free_num<typename std::make_signed<T>::type> {};

 template <>

 struct static_is_lock_free<bool> {

   enum { value = ATOMIC_BOOL_LOCK_FREE };

 };

 template <typename U>

 struct static_is_lock_free<U *> {

   enum { value = ATOMIC_POINTER_LOCK_FREE };

 };

 }  // namespace details


 struct ProducerToken {

   template <typename T, typename Traits>

   explicit ProducerToken(ConcurrentQueue<T, Traits> &queue);


   template <typename T, typename Traits>

   explicit ProducerToken(BlockingConcurrentQueue<T, Traits> &queue);


   ProducerToken(ProducerToken &&other) MOODYCAMEL_NOEXCEPT

       : producer(other.producer) {

     other.producer = nullptr;

     if (producer != nullptr) {

       producer->token = this;

     }

   }


   inline ProducerToken &operator=(ProducerToken &&other) MOODYCAMEL_NOEXCEPT {

     swap(other);

     return *this;

   }


   void swap(ProducerToken &other) MOODYCAMEL_NOEXCEPT {

     std::swap(producer, other.producer);

     if (producer != nullptr) {

       producer->token = this;

     }

     if (other.producer != nullptr) {

       other.producer->token = &other;

     }

   }


   // A token is always valid unless:

   //     1) Memory allocation failed during construction

   //     2) It was moved via the move constructor

   //        (Note: assignment does a swap, leaving both potentially valid)

   //     3) The associated queue was destroyed

   // Note that if valid() returns true, that only indicates

   // that the token is valid for use with a specific queue,

   // but not which one; that's up to the user to track.

   inline bool valid() const { return producer != nullptr; }


   ~ProducerToken() {

     if (producer != nullptr) {

       producer->token = nullptr;

       producer->inactive.store(true, std::memory_order_release);

     }

   }


   // Disable copying and assignment

   ProducerToken(ProducerToken const &) MOODYCAMEL_DELETE_FUNCTION;

   ProducerToken &operator=(ProducerToken const &) MOODYCAMEL_DELETE_FUNCTION;


  private:

   template <typename T, typename Traits>

   friend class ConcurrentQueue;


   friend class ConcurrentQueueTests;


  protected:

   details::ConcurrentQueueProducerTypelessBase *producer;

 };


 struct ConsumerToken {

   template <typename T, typename Traits>

   explicit ConsumerToken(ConcurrentQueue<T, Traits> &q);


   template <typename T, typename Traits>

   explicit ConsumerToken(BlockingConcurrentQueue<T, Traits> &q);


   ConsumerToken(ConsumerToken &&other) MOODYCAMEL_NOEXCEPT

       : initialOffset(other.initialOffset),

         lastKnownGlobalOffset(other.lastKnownGlobalOffset),

         itemsConsumedFromCurrent(other.itemsConsumedFromCurrent),

         currentProducer(other.currentProducer),

         desiredProducer(other.desiredProducer) {}


   inline ConsumerToken &operator=(ConsumerToken &&other) MOODYCAMEL_NOEXCEPT {

     swap(other);

     return *this;

   }


   void swap(ConsumerToken &other) MOODYCAMEL_NOEXCEPT {

     std::swap(initialOffset, other.initialOffset);

     std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset);

     std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent);

     std::swap(currentProducer, other.currentProducer);

     std::swap(desiredProducer, other.desiredProducer);

   }


   // Disable copying and assignment

   ConsumerToken(ConsumerToken const &) MOODYCAMEL_DELETE_FUNCTION;

   ConsumerToken &operator=(ConsumerToken const &) MOODYCAMEL_DELETE_FUNCTION;


  private:

   template <typename T, typename Traits>

   friend class ConcurrentQueue;


   friend class ConcurrentQueueTests;


  private:  // but shared with ConcurrentQueue

   std::uint32_t initialOffset;

   std::uint32_t lastKnownGlobalOffset;

   std::uint32_t itemsConsumedFromCurrent;

   details::ConcurrentQueueProducerTypelessBase *currentProducer;

   details::ConcurrentQueueProducerTypelessBase *desiredProducer;

 };


 // Need to forward-declare this swap because it's in a namespace.

 // See

 // http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces

 template <typename T, typename Traits>

 inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &a,

                  typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &b)

     MOODYCAMEL_NOEXCEPT;


 template <typename T, typename Traits = ConcurrentQueueDefaultTraits>

 class ConcurrentQueue {

  public:

   typedef ::moodycamel::ProducerToken producer_token_t;

   typedef ::moodycamel::ConsumerToken consumer_token_t;


   typedef typename Traits::index_t index_t;

   typedef typename Traits::size_t size_t;


   static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);

   static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD =

       static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);

   static const size_t EXPLICIT_INITIAL_INDEX_SIZE =

       static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);

   static const size_t IMPLICIT_INITIAL_INDEX_SIZE =

       static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE);

   static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE =

       static_cast<size_t>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE);

   static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE =

       static_cast<std::uint32_t>(

           Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE);

 #ifdef _MSC_VER

 #pragma warning(push)

 #pragma warning(disable : 4307)  // + integral constant overflow (that's what

                                  // the ternary expression is for!)

 #pragma warning(disable : 4309)  // static_cast: Truncation of constant value

 #endif

   static const size_t MAX_SUBQUEUE_SIZE =

       (details::const_numeric_max<size_t>::value -

            static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) <

        BLOCK_SIZE)

           ? details::const_numeric_max<size_t>::value

           : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) +

               (BLOCK_SIZE - 1)) /

              BLOCK_SIZE * BLOCK_SIZE);

 #ifdef _MSC_VER

 #pragma warning(pop)

 #endif


   static_assert(!std::numeric_limits<size_t>::is_signed &&

                     std::is_integral<size_t>::value,

                 "Traits::size_t must be an unsigned integral type");

   static_assert(!std::numeric_limits<index_t>::is_signed &&

                     std::is_integral<index_t>::value,

                 "Traits::index_t must be an unsigned integral type");

   static_assert(sizeof(index_t) >= sizeof(size_t),

                 "Traits::index_t must be at least as wide as Traits::size_t");

   static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)),

                 "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)");

   static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) &&

                     !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD &

                       (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)),

                 "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a "

                 "power of 2 (and greater than 1)");

   static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) &&

                     !(EXPLICIT_INITIAL_INDEX_SIZE &

                       (EXPLICIT_INITIAL_INDEX_SIZE - 1)),

                 "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and "

                 "greater than 1)");

   static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) &&

                     !(IMPLICIT_INITIAL_INDEX_SIZE &

                       (IMPLICIT_INITIAL_INDEX_SIZE - 1)),

                 "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and "

                 "greater than 1)");

   static_assert(

       (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) ||

           !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE &

             (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)),

       "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2");

   static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 ||

                     INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1,

                 "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least "

                 "1 (or 0 to disable implicit enqueueing)");


  public:

   // Creates a queue with at least `capacity` element slots; note that the

   // actual number of elements that can be inserted without additional memory

   // allocation depends on the number of producers and the block size (e.g. if

   // the block size is equal to `capacity`, only a single block will be

   // allocated up-front, which means only a single producer will be able to

   // enqueue elements without an extra allocation -- blocks aren't shared

   // between producers). This method is not thread safe -- it is up to the user

   // to ensure that the queue is fully constructed before it starts being used

   // by other threads (this includes making the memory effects of construction

   // visible, possibly with a memory barrier).

   explicit ConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)

       : producerListTail(nullptr),

         producerCount(0),

         initialBlockPoolIndex(0),

         nextExplicitConsumerId(0),

         globalExplicitConsumerOffset(0) {

     implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);

     populate_initial_implicit_producer_hash();

     populate_initial_block_list(capacity / BLOCK_SIZE +

                                 ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));


 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

     // Track all the producers using a fully-resolved typed list for

     // each kind; this makes it possible to debug them starting from

     // the root queue object (otherwise wacky casts are needed that

     // don't compile in the debugger's expression evaluator).

     explicitProducers.store(nullptr, std::memory_order_relaxed);

     implicitProducers.store(nullptr, std::memory_order_relaxed);

 #endif

   }


   // Computes the correct amount of pre-allocated blocks for you based

   // on the minimum number of elements you want available at any given

   // time, and the maximum concurrent number of each type of producer.

   ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers,

                   size_t maxImplicitProducers)

       : producerListTail(nullptr),

         producerCount(0),

         initialBlockPoolIndex(0),

         nextExplicitConsumerId(0),

         globalExplicitConsumerOffset(0) {

     implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);

     populate_initial_implicit_producer_hash();

     size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) *

                         (maxExplicitProducers + 1) +

                     2 * (maxExplicitProducers + maxImplicitProducers);

     populate_initial_block_list(blocks);


 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

     explicitProducers.store(nullptr, std::memory_order_relaxed);

     implicitProducers.store(nullptr, std::memory_order_relaxed);

 #endif

   }


   // Note: The queue should not be accessed concurrently while it's

   // being deleted. It's up to the user to synchronize this.

   // This method is not thread safe.

   ~ConcurrentQueue() {

     // Destroy producers

     auto ptr = producerListTail.load(std::memory_order_relaxed);

     while (ptr != nullptr) {

       auto next = ptr->next_prod();

       if (ptr->token != nullptr) {

         ptr->token->producer = nullptr;

       }

       destroy(ptr);

       ptr = next;

     }


     // Destroy implicit producer hash tables

     if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {

       auto hash = implicitProducerHash.load(std::memory_order_relaxed);

       while (hash != nullptr) {

         auto prev = hash->prev;

         if (prev != nullptr) {  // The last hash is part of this object and was

                                 // not allocated dynamically

           for (size_t i = 0; i != hash->capacity; ++i) {

             hash->entries[i].~ImplicitProducerKVP();

           }

           hash->~ImplicitProducerHash();

           (Traits::free)(hash);

         }

         hash = prev;

       }

     }


     // Destroy global free list

     auto block = freeList.head_unsafe();

     while (block != nullptr) {

       auto next = block->freeListNext.load(std::memory_order_relaxed);

       if (block->dynamicallyAllocated) {

         destroy(block);

       }

       block = next;

     }


     // Destroy initial free list

     destroy_array(initialBlockPool, initialBlockPoolSize);

   }


   // Disable copying and copy assignment

   ConcurrentQueue(ConcurrentQueue const &) MOODYCAMEL_DELETE_FUNCTION;

   ConcurrentQueue &operator=(ConcurrentQueue const &)

       MOODYCAMEL_DELETE_FUNCTION;


   // Moving is supported, but note that it is *not* a thread-safe operation.

   // Nobody can use the queue while it's being moved, and the memory effects

   // of that move must be propagated to other threads before they can use it.

   // Note: When a queue is moved, its tokens are still valid but can only be

   // used with the destination queue (i.e. semantically they are moved along

   // with the queue itself).

   ConcurrentQueue(ConcurrentQueue &&other) MOODYCAMEL_NOEXCEPT

       : producerListTail(

             other.producerListTail.load(std::memory_order_relaxed)),

         producerCount(other.producerCount.load(std::memory_order_relaxed)),

         initialBlockPoolIndex(

             other.initialBlockPoolIndex.load(std::memory_order_relaxed)),

         initialBlockPool(other.initialBlockPool),

         initialBlockPoolSize(other.initialBlockPoolSize),

         freeList(std::move(other.freeList)),

         nextExplicitConsumerId(

             other.nextExplicitConsumerId.load(std::memory_order_relaxed)),

         globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(

             std::memory_order_relaxed)) {

     // Move the other one into this, and leave the other one as an empty queue

     implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);

     populate_initial_implicit_producer_hash();

     swap_implicit_producer_hashes(other);


     other.producerListTail.store(nullptr, std::memory_order_relaxed);

     other.producerCount.store(0, std::memory_order_relaxed);

     other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);

     other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);


 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

     explicitProducers.store(

         other.explicitProducers.load(std::memory_order_relaxed),

         std::memory_order_relaxed);

     other.explicitProducers.store(nullptr, std::memory_order_relaxed);

     implicitProducers.store(

         other.implicitProducers.load(std::memory_order_relaxed),

         std::memory_order_relaxed);

     other.implicitProducers.store(nullptr, std::memory_order_relaxed);

 #endif


     other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);

     other.initialBlockPoolSize = 0;

     other.initialBlockPool = nullptr;


     reown_producers();

   }


   inline ConcurrentQueue &operator=(ConcurrentQueue &&other)

       MOODYCAMEL_NOEXCEPT {

     return swap_internal(other);

   }


   // Swaps this queue's state with the other's. Not thread-safe.

   // Swapping two queues does not invalidate their tokens, however

   // the tokens that were created for one queue must be used with

   // only the swapped queue (i.e. the tokens are tied to the

   // queue's movable state, not the object itself).

   inline void swap(ConcurrentQueue &other) MOODYCAMEL_NOEXCEPT {

     swap_internal(other);

   }


  private:

   ConcurrentQueue &swap_internal(ConcurrentQueue &other) {

     if (this == &other) {

       return *this;

     }


     details::swap_relaxed(producerListTail, other.producerListTail);

     details::swap_relaxed(producerCount, other.producerCount);

     details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);

     std::swap(initialBlockPool, other.initialBlockPool);

     std::swap(initialBlockPoolSize, other.initialBlockPoolSize);

     freeList.swap(other.freeList);

     details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);

     details::swap_relaxed(globalExplicitConsumerOffset,

                           other.globalExplicitConsumerOffset);


     swap_implicit_producer_hashes(other);


     reown_producers();

     other.reown_producers();


 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

     details::swap_relaxed(explicitProducers, other.explicitProducers);

     details::swap_relaxed(implicitProducers, other.implicitProducers);

 #endif


     return *this;

   }


  public:

   // Enqueues a single item (by copying it).

   // Allocates memory if required. Only fails if memory allocation fails (or

   // implicit production is disabled because

   // Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, or

   // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).

   // Thread-safe.

   inline bool enqueue(T const &item) {

     if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;

     return inner_enqueue<CanAlloc>(item);

   }


   // Enqueues a single item (by moving it, if possible).

   // Allocates memory if required. Only fails if memory allocation fails (or

   // implicit production is disabled because

   // Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, or

   // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).

   // Thread-safe.

   inline bool enqueue(T &&item) {

     if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;

     return inner_enqueue<CanAlloc>(std::move(item));

   }


   // Enqueues a single item (by copying it) using an explicit producer token.

   // Allocates memory if required. Only fails if memory allocation fails (or

   // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).

   // Thread-safe.

   inline bool enqueue(producer_token_t const &token, T const &item) {

     return inner_enqueue<CanAlloc>(token, item);

   }


   // Enqueues a single item (by moving it, if possible) using an explicit

   // producer token. Allocates memory if required. Only fails if memory

   // allocation fails (or Traits::MAX_SUBQUEUE_SIZE has been defined and would

   // be surpassed). Thread-safe.

   inline bool enqueue(producer_token_t const &token, T &&item) {

     return inner_enqueue<CanAlloc>(token, std::move(item));

   }


   // Enqueues several items.

   // Allocates memory if required. Only fails if memory allocation fails (or

   // implicit production is disabled because

   // Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, or

   // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). Note:

   // Use std::make_move_iterator if the elements should be moved instead of

   // copied. Thread-safe.

   template <typename It>

   bool enqueue_bulk(It itemFirst, size_t count) {

     if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;

     return inner_enqueue_bulk<CanAlloc>(itemFirst, count);

   }


   // Enqueues several items using an explicit producer token.

   // Allocates memory if required. Only fails if memory allocation fails

   // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).

   // Note: Use std::make_move_iterator if the elements should be moved

   // instead of copied.

   // Thread-safe.

   template <typename It>

   bool enqueue_bulk(producer_token_t const &token, It itemFirst, size_t count) {

     return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);

   }


   // Enqueues a single item (by copying it).

   // Does not allocate memory. Fails if not enough room to enqueue (or implicit

   // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE

   // is 0).

   // Thread-safe.

   inline bool try_enqueue(T const &item) {

     if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;

     return inner_enqueue<CannotAlloc>(item);

   }


   // Enqueues a single item (by moving it, if possible).

   // Does not allocate memory (except for one-time implicit producer).

   // Fails if not enough room to enqueue (or implicit production is

   // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).

   // Thread-safe.

   inline bool try_enqueue(T &&item) {

     if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;

     return inner_enqueue<CannotAlloc>(std::move(item));

   }


   // Enqueues a single item (by copying it) using an explicit producer token.

   // Does not allocate memory. Fails if not enough room to enqueue.

   // Thread-safe.

   inline bool try_enqueue(producer_token_t const &token, T const &item) {

     return inner_enqueue<CannotAlloc>(token, item);

   }


   // Enqueues a single item (by moving it, if possible) using an explicit

   // producer token. Does not allocate memory. Fails if not enough room to

   // enqueue. Thread-safe.

   inline bool try_enqueue(producer_token_t const &token, T &&item) {

     return inner_enqueue<CannotAlloc>(token, std::move(item));

   }


   // Enqueues several items.

   // Does not allocate memory (except for one-time implicit producer).

   // Fails if not enough room to enqueue (or implicit production is

   // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).

   // Note: Use std::make_move_iterator if the elements should be moved

   // instead of copied.

   // Thread-safe.

   template <typename It>

   bool try_enqueue_bulk(It itemFirst, size_t count) {

     if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;

     return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);

   }


   // Enqueues several items using an explicit producer token.

   // Does not allocate memory. Fails if not enough room to enqueue.

   // Note: Use std::make_move_iterator if the elements should be moved

   // instead of copied.

   // Thread-safe.

   template <typename It>

   bool try_enqueue_bulk(producer_token_t const &token, It itemFirst,

                         size_t count) {

     return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);

   }


   // Attempts to dequeue from the queue.

   // Returns false if all producer streams appeared empty at the time they

   // were checked (so, the queue is likely but not guaranteed to be empty).

   // Never allocates. Thread-safe.

   template <typename U>

   bool try_dequeue(U &item) {

     // Instead of simply trying each producer in turn (which could cause

     // needless contention on the first producer), we score them heuristically.

     size_t nonEmptyCount = 0;

     ProducerBase *best = nullptr;

     size_t bestSize = 0;

     for (auto ptr = producerListTail.load(std::memory_order_acquire);

          nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) {

       auto size = ptr->size_approx();

       if (size > 0) {

         if (size > bestSize) {

           bestSize = size;

           best = ptr;

         }

         ++nonEmptyCount;

       }

     }


     // If there was at least one non-empty queue but it appears empty at the

     // time we try to dequeue from it, we need to make sure every queue's been

     // tried

     if (nonEmptyCount > 0) {

       if ((details::likely)(best->dequeue(item))) {

         return true;

       }

       for (auto ptr = producerListTail.load(std::memory_order_acquire);

            ptr != nullptr; ptr = ptr->next_prod()) {

         if (ptr != best && ptr->dequeue(item)) {

           return true;

         }

       }

     }

     return false;

   }


   // Attempts to dequeue from the queue.

   // Returns false if all producer streams appeared empty at the time they

   // were checked (so, the queue is likely but not guaranteed to be empty).

   // This differs from the try_dequeue(item) method in that this one does

   // not attempt to reduce contention by interleaving the order that producer

   // streams are dequeued from. So, using this method can reduce overall

   // throughput under contention, but will give more predictable results in

   // single-threaded consumer scenarios. This is mostly only useful for internal

   // unit tests. Never allocates. Thread-safe.

   template <typename U>

   bool try_dequeue_non_interleaved(U &item) {

     for (auto ptr = producerListTail.load(std::memory_order_acquire);

          ptr != nullptr; ptr = ptr->next_prod()) {

       if (ptr->dequeue(item)) {

         return true;

       }

     }

     return false;

   }


   // Attempts to dequeue from the queue using an explicit consumer token.

   // Returns false if all producer streams appeared empty at the time they

   // were checked (so, the queue is likely but not guaranteed to be empty).

   // Never allocates. Thread-safe.

   template <typename U>

   bool try_dequeue(consumer_token_t &token, U &item) {

     // The idea is roughly as follows:

     // Every 256 items from one producer, make everyone rotate (increase the

     // global offset) -> this means the highest efficiency consumer dictates the

     // rotation speed of everyone else, more or less If you see that the global

     // offset has changed, you must reset your consumption counter and move to

     // your designated place If there's no items where you're supposed to be,

     // keep moving until you find a producer with some items If the global

     // offset has not changed but you've run out of items to consume, move over

     // from your current position until you find an producer with something in

     // it


     if (token.desiredProducer == nullptr ||

         token.lastKnownGlobalOffset !=

             globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {

       if (!update_current_producer_after_rotation(token)) {

         return false;

       }

     }


     // If there was at least one non-empty queue but it appears empty at the

     // time we try to dequeue from it, we need to make sure every queue's been

     // tried

     if (static_cast<ProducerBase *>(token.currentProducer)->dequeue(item)) {

       if (++token.itemsConsumedFromCurrent ==

           EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {

         globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);

       }

       return true;

     }


     auto tail = producerListTail.load(std::memory_order_acquire);

     auto ptr = static_cast<ProducerBase *>(token.currentProducer)->next_prod();

     if (ptr == nullptr) {

       ptr = tail;

     }

     while (ptr != static_cast<ProducerBase *>(token.currentProducer)) {

       if (ptr->dequeue(item)) {

         token.currentProducer = ptr;

         token.itemsConsumedFromCurrent = 1;

         return true;

       }

       ptr = ptr->next_prod();

       if (ptr == nullptr) {

         ptr = tail;

       }

     }

     return false;

   }


   // Attempts to dequeue several elements from the queue.

   // Returns the number of items actually dequeued.

   // Returns 0 if all producer streams appeared empty at the time they

   // were checked (so, the queue is likely but not guaranteed to be empty).

   // Never allocates. Thread-safe.

   template <typename It>

   size_t try_dequeue_bulk(It itemFirst, size_t max) {

     size_t count = 0;

     for (auto ptr = producerListTail.load(std::memory_order_acquire);

          ptr != nullptr; ptr = ptr->next_prod()) {

       count += ptr->dequeue_bulk(itemFirst, max - count);

       if (count == max) {

         break;

       }

     }

     return count;

   }


   // Attempts to dequeue several elements from the queue using an explicit

   // consumer token. Returns the number of items actually dequeued. Returns 0 if

   // all producer streams appeared empty at the time they were checked (so, the

   // queue is likely but not guaranteed to be empty). Never allocates.

   // Thread-safe.

   template <typename It>

   size_t try_dequeue_bulk(consumer_token_t &token, It itemFirst, size_t max) {

     if (token.desiredProducer == nullptr ||

         token.lastKnownGlobalOffset !=

             globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {

       if (!update_current_producer_after_rotation(token)) {

         return 0;

       }

     }


     size_t count = static_cast<ProducerBase *>(token.currentProducer)

                        ->dequeue_bulk(itemFirst, max);

     if (count == max) {

       if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >=

           EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {

         globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);

       }

       return max;

     }

     token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);

     max -= count;


     auto tail = producerListTail.load(std::memory_order_acquire);

     auto ptr = static_cast<ProducerBase *>(token.currentProducer)->next_prod();

     if (ptr == nullptr) {

       ptr = tail;

     }

     while (ptr != static_cast<ProducerBase *>(token.currentProducer)) {

       auto dequeued = ptr->dequeue_bulk(itemFirst, max);

       count += dequeued;

       if (dequeued != 0) {

         token.currentProducer = ptr;

         token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);

       }

       if (dequeued == max) {

         break;

       }

       max -= dequeued;

       ptr = ptr->next_prod();

       if (ptr == nullptr) {

         ptr = tail;

       }

     }

     return count;

   }


   // Attempts to dequeue from a specific producer's inner queue.

   // If you happen to know which producer you want to dequeue from, this

   // is significantly faster than using the general-case try_dequeue methods.

   // Returns false if the producer's queue appeared empty at the time it

   // was checked (so, the queue is likely but not guaranteed to be empty).

   // Never allocates. Thread-safe.

   template <typename U>

   inline bool try_dequeue_from_producer(producer_token_t const &producer,

                                         U &item) {

     return static_cast<ExplicitProducer *>(producer.producer)->dequeue(item);

   }


   // Attempts to dequeue several elements from a specific producer's inner

   // queue. Returns the number of items actually dequeued. If you happen to know

   // which producer you want to dequeue from, this is significantly faster than

   // using the general-case try_dequeue methods. Returns 0 if the producer's

   // queue appeared empty at the time it was checked (so, the queue is likely

   // but not guaranteed to be empty). Never allocates. Thread-safe.

   template <typename It>

   inline size_t try_dequeue_bulk_from_producer(producer_token_t const &producer,

                                                It itemFirst, size_t max) {

     return static_cast<ExplicitProducer *>(producer.producer)

         ->dequeue_bulk(itemFirst, max);

   }


   // Returns an estimate of the total number of elements currently in the queue.

   // This estimate is only accurate if the queue has completely stabilized

   // before it is called (i.e. all enqueue and dequeue operations have completed

   // and their memory effects are visible on the calling thread, and no further

   // operations start while this method is being called). Thread-safe.

   size_t size_approx() const {

     size_t size = 0;

     for (auto ptr = producerListTail.load(std::memory_order_acquire);

          ptr != nullptr; ptr = ptr->next_prod()) {

       size += ptr->size_approx();

     }

     return size;

   }


   // Returns true if the underlying atomic variables used by

   // the queue are lock-free (they should be on most platforms).

   // Thread-safe.

   static bool is_lock_free() {

     return details::static_is_lock_free<bool>::value == 2 &&

            details::static_is_lock_free<size_t>::value == 2 &&

            details::static_is_lock_free<std::uint32_t>::value == 2 &&

            details::static_is_lock_free<index_t>::value == 2 &&

            details::static_is_lock_free<void *>::value == 2 &&

            details::static_is_lock_free<typename details::thread_id_converter<

                details::thread_id_t>::thread_id_numeric_size_t>::value == 2;

   }


  private:

   friend struct ProducerToken;

   friend struct ConsumerToken;

   struct ExplicitProducer;

   friend struct ExplicitProducer;

   struct ImplicitProducer;

   friend struct ImplicitProducer;


   friend class ConcurrentQueueTests;


   enum AllocationMode { CanAlloc, CannotAlloc };


   // Queue methods


   template <AllocationMode canAlloc, typename U>

   inline bool inner_enqueue(producer_token_t const &token, U &&element) {

     return static_cast<ExplicitProducer *>(token.producer)

         ->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(

             std::forward<U>(element));

   }


   template <AllocationMode canAlloc, typename U>

   inline bool inner_enqueue(U &&element) {

     auto producer = get_or_add_implicit_producer();

     return producer == nullptr

                ? false

                : producer->ConcurrentQueue::ImplicitProducer::template enqueue<

                      canAlloc>(std::forward<U>(element));

   }


   template <AllocationMode canAlloc, typename It>

   inline bool inner_enqueue_bulk(producer_token_t const &token, It itemFirst,

                                  size_t count) {

     return static_cast<ExplicitProducer *>(token.producer)

         ->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(

             itemFirst, count);

   }


   template <AllocationMode canAlloc, typename It>

   inline bool inner_enqueue_bulk(It itemFirst, size_t count) {

     auto producer = get_or_add_implicit_producer();

     return producer == nullptr

                ? false

                : producer->ConcurrentQueue::ImplicitProducer::
                     template enqueue_bulk<canAlloc>(itemFirst, count);

   }


   inline bool update_current_producer_after_rotation(consumer_token_t &token) {

     // Ah, there's been a rotation, figure out where we should be!

     auto tail = producerListTail.load(std::memory_order_acquire);

     if (token.desiredProducer == nullptr && tail == nullptr) {

       return false;

     }

     auto prodCount = producerCount.load(std::memory_order_relaxed);

     auto globalOffset =

         globalExplicitConsumerOffset.load(std::memory_order_relaxed);

     if ((details::unlikely)(token.desiredProducer == nullptr)) {

       // Aha, first time we're dequeueing anything.

       // Figure out our local position

       // Note: offset is from start, not end, but we're traversing from end --

       // subtract from count first

       std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount);

       token.desiredProducer = tail;

       for (std::uint32_t i = 0; i != offset; ++i) {

         token.desiredProducer =

             static_cast<ProducerBase *>(token.desiredProducer)->next_prod();

         if (token.desiredProducer == nullptr) {

           token.desiredProducer = tail;

         }

       }

     }


     std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset;

     if (delta >= prodCount) {

       delta = delta % prodCount;

     }

     for (std::uint32_t i = 0; i != delta; ++i) {

       token.desiredProducer =

           static_cast<ProducerBase *>(token.desiredProducer)->next_prod();

       if (token.desiredProducer == nullptr) {

         token.desiredProducer = tail;

       }

     }


     token.lastKnownGlobalOffset = globalOffset;

     token.currentProducer = token.desiredProducer;

     token.itemsConsumedFromCurrent = 0;

     return true;

   }


   // Free list


   template <typename N>

   struct FreeListNode {

     FreeListNode() : freeListRefs(0), freeListNext(nullptr) {}


     std::atomic<std::uint32_t> freeListRefs;

     std::atomic<N *> freeListNext;

   };


   // A simple CAS-based lock-free free list. Not the fastest thing in the world

   // under heavy contention, but simple and correct (assuming nodes are never

   // freed until after the free list is destroyed), and fairly speedy under low

   // contention.

   template <typename N>  // N must inherit FreeListNode or have the same fields

                          // (and initialization of them)

   struct FreeList {

     FreeList() : freeListHead(nullptr) {}


     FreeList(FreeList &&other)

         : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) {

       other.freeListHead.store(nullptr, std::memory_order_relaxed);

     }


     void swap(FreeList &other) {

       details::swap_relaxed(freeListHead, other.freeListHead);

     }


     FreeList(FreeList const &) MOODYCAMEL_DELETE_FUNCTION;

     FreeList &operator=(FreeList const &) MOODYCAMEL_DELETE_FUNCTION;


     inline void add(N *node) {

 #if MCDBGQ_NOLOCKFREE_FREELIST

       debug::DebugLock lock(mutex);

 #endif

       // We know that the should-be-on-freelist bit is 0 at this point, so it's

       // safe to set it using a fetch_add

       if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST,

                                        std::memory_order_acq_rel) == 0) {

         // Oh look! We were the last ones referencing this node, and we know

         // we want to add it to the free list, so let's do it!

         add_knowing_refcount_is_zero(node);

       }

     }


     inline N *try_get() {

 #if MCDBGQ_NOLOCKFREE_FREELIST

       debug::DebugLock lock(mutex);

 #endif

       auto head = freeListHead.load(std::memory_order_acquire);

       while (head != nullptr) {

         auto prevHead = head;

         auto refs = head->freeListRefs.load(std::memory_order_relaxed);

         if ((refs & REFS_MASK) == 0 ||

             !head->freeListRefs.compare_exchange_strong(

                 refs, refs + 1, std::memory_order_acquire,

                 std::memory_order_relaxed)) {

           head = freeListHead.load(std::memory_order_acquire);

           continue;

         }


         // Good, reference count has been incremented (it wasn't at zero), which

         // means we can read the next and not worry about it changing between

         // now and the time we do the CAS

         auto next = head->freeListNext.load(std::memory_order_relaxed);

         if (freeListHead.compare_exchange_strong(head, next,

                                                  std::memory_order_acquire,

                                                  std::memory_order_relaxed)) {

           // Yay, got the node. This means it was on the list, which means

           // shouldBeOnFreeList must be false no matter the refcount (because

           // nobody else knows it's been taken off yet, it can't have been put

           // back on).

           assert((head->freeListRefs.load(std::memory_order_relaxed) &

                   SHOULD_BE_ON_FREELIST) == 0);


           // Decrease refcount twice, once for our ref, and once for the list's

           // ref

           head->freeListRefs.fetch_sub(2, std::memory_order_release);

           return head;

         }


         // OK, the head must have changed on us, but we still need to decrease

         // the refcount we increased. Note that we don't need to release any

         // memory effects, but we do need to ensure that the reference count

         // decrement happens-after the CAS on the head.

         refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel);

         if (refs == SHOULD_BE_ON_FREELIST + 1) {

           add_knowing_refcount_is_zero(prevHead);

         }

       }


       return nullptr;

     }


     // Useful for traversing the list when there's no contention (e.g. to

     // destroy remaining nodes)

     N *head_unsafe() const {

       return freeListHead.load(std::memory_order_relaxed);

     }


    private:

     inline void add_knowing_refcount_is_zero(N *node) {

       // Since the refcount is zero, and nobody can increase it once it's zero

       // (except us, and we run only one copy of this method per node at a time,

       // i.e. the single thread case), then we know we can safely change the

       // next pointer of the node; however, once the refcount is back above

       // zero, then other threads could increase it (happens under heavy

       // contention, when the refcount goes to zero in between a load and a

       // refcount increment of a node in try_get, then back up to something

       // non-zero, then the refcount increment is done by the other thread) --

       // so, if the CAS to add the node to the actual list fails, decrease the

       // refcount and leave the add operation to the next thread who puts the

       // refcount back at zero (which could be us, hence the loop).

       auto head = freeListHead.load(std::memory_order_relaxed);

       while (true) {

         node->freeListNext.store(head, std::memory_order_relaxed);

         node->freeListRefs.store(1, std::memory_order_release);

         if (!freeListHead.compare_exchange_strong(head, node,

                                                   std::memory_order_release,

                                                   std::memory_order_relaxed)) {

           // Hmm, the add failed, but we can only try again when the refcount

           // goes back to zero

           if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1,

                                            std::memory_order_release) == 1) {

             continue;

           }

         }

         return;

       }

     }


    private:

     // Implemented like a stack, but where node order doesn't matter (nodes are

     // inserted out of order under contention)

     std::atomic<N *> freeListHead;


     static const std::uint32_t REFS_MASK = 0x7FFFFFFF;

     static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;


 #if MCDBGQ_NOLOCKFREE_FREELIST

     debug::DebugMutex mutex;

 #endif

   };


   // Block


   enum InnerQueueContext { implicit_context = 0, explicit_context = 1 };


   struct Block {

     Block()

         : next(nullptr),

           elementsCompletelyDequeued(0),

           freeListRefs(0),

           freeListNext(nullptr),

           shouldBeOnFreeList(false),

           dynamicallyAllocated(true) {

 #if MCDBGQ_TRACKMEM

       owner = nullptr;

 #endif

     }


     template <InnerQueueContext context>

     inline bool is_empty() const {

       if (context == explicit_context &&

           BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {

         // Check flags

         for (size_t i = 0; i < BLOCK_SIZE; ++i) {

           if (!emptyFlags[i].load(std::memory_order_relaxed)) {

             return false;

           }

         }


         // Aha, empty; make sure we have all other memory effects that happened

         // before the empty flags were set

         std::atomic_thread_fence(std::memory_order_acquire);

         return true;

       } else {

         // Check counter

         if (elementsCompletelyDequeued.load(std::memory_order_relaxed) ==

             BLOCK_SIZE) {

           std::atomic_thread_fence(std::memory_order_acquire);

           return true;

         }

         assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <=

                BLOCK_SIZE);

         return false;

       }

     }


     // Returns true if the block is now empty (does not apply in explicit

     // context)

     template <InnerQueueContext context>

     inline bool set_empty(index_t i) {

       if (context == explicit_context &&

           BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {

         // Set flag

         assert(!emptyFlags[BLOCK_SIZE - 1 -

                            static_cast<size_t>(

                                i & static_cast<index_t>(BLOCK_SIZE - 1))]

                     .load(std::memory_order_relaxed));

         emptyFlags[BLOCK_SIZE - 1 -

                    static_cast<size_t>(i &

                                        static_cast<index_t>(BLOCK_SIZE - 1))]

             .store(true, std::memory_order_release);

         return false;

       } else {

         // Increment counter

         auto prevVal =

             elementsCompletelyDequeued.fetch_add(1, std::memory_order_release);

         assert(prevVal < BLOCK_SIZE);

         return prevVal == BLOCK_SIZE - 1;

       }

     }


     // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping

     // and count > 0). Returns true if the block is now empty (does not apply in

     // explicit context).

     template <InnerQueueContext context>

     inline bool set_many_empty(index_t i, size_t count) {

       if (context == explicit_context &&

           BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {

         // Set flags

         std::atomic_thread_fence(std::memory_order_release);

         i = BLOCK_SIZE - 1 -

             static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) -

             count + 1;

         for (size_t j = 0; j != count; ++j) {

           assert(!emptyFlags[i + j].load(std::memory_order_relaxed));

           emptyFlags[i + j].store(true, std::memory_order_relaxed);

         }

         return false;

       } else {

         // Increment counter

         auto prevVal = elementsCompletelyDequeued.fetch_add(

             count, std::memory_order_release);

         assert(prevVal + count <= BLOCK_SIZE);

         return prevVal + count == BLOCK_SIZE;

       }

     }


     template <InnerQueueContext context>

     inline void set_all_empty() {

       if (context == explicit_context &&

           BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {

         // Set all flags

         for (size_t i = 0; i != BLOCK_SIZE; ++i) {

           emptyFlags[i].store(true, std::memory_order_relaxed);

         }

       } else {

         // Reset counter

         elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);

       }

     }


     template <InnerQueueContext context>

     inline void reset_empty() {

       if (context == explicit_context &&

           BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {

         // Reset flags

         for (size_t i = 0; i != BLOCK_SIZE; ++i) {

           emptyFlags[i].store(false, std::memory_order_relaxed);

         }

       } else {

         // Reset counter

         elementsCompletelyDequeued.store(0, std::memory_order_relaxed);

       }

     }


     inline T *operator[](index_t idx) MOODYCAMEL_NOEXCEPT {

       return static_cast<T *>(static_cast<void *>(elements)) +

              static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1));

     }


     inline T const *operator[](index_t idx) const MOODYCAMEL_NOEXCEPT {

       return static_cast<T const *>(static_cast<void const *>(elements)) +

              static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1));

     }


    private:

     // IMPORTANT: This must be the first member in Block, so that if T depends

     // on the alignment of addresses returned by malloc, that alignment will be

     // preserved. Apparently clang actually generates code that uses this

     // assumption for AVX instructions in some cases. Ideally, we should also

     // align Block to the alignment of T in case it's higher than malloc's

     // 16-byte alignment, but this is hard to do in a cross-platform way. Assert

     // for this case:

     static_assert(

         std::alignment_of<T>::value <=

             std::alignment_of<details::max_align_t>::value,

         "The queue does not support super-aligned types at this time");

     // Additionally, we need the alignment of Block itself to be a multiple of

     // max_align_t since otherwise the appropriate padding will not be added at

     // the end of Block in order to make arrays of Blocks all be properly

     // aligned (not just the first one). We use a union to force this.

     union {

       char elements[sizeof(T) * BLOCK_SIZE];

       details::max_align_t dummy;

     };


    public:

     Block *next;

     std::atomic<size_t> elementsCompletelyDequeued;

     std::atomic<bool> emptyFlags

         [BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1];


    public:

     std::atomic<std::uint32_t> freeListRefs;

     std::atomic<Block *> freeListNext;

     std::atomic<bool> shouldBeOnFreeList;

     bool dynamicallyAllocated;  // Perhaps a better name for this would be

                                 // 'isNotPartOfInitialBlockPool'


 #if MCDBGQ_TRACKMEM

     void *owner;

 #endif

   };


   static_assert(std::alignment_of<Block>::value >=

                     std::alignment_of<details::max_align_t>::value,

                 "Internal error: Blocks must be at least as aligned as the "

                 "type they are wrapping");


 #if MCDBGQ_TRACKMEM

  public:

   struct MemStats;


  private:

 #endif


   // Producer base


   struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase {

     ProducerBase(ConcurrentQueue *parent_, bool isExplicit_)

         : tailIndex(0),

           headIndex(0),

           dequeueOptimisticCount(0),

           dequeueOvercommit(0),

           tailBlock(nullptr),

           isExplicit(isExplicit_),

           parent(parent_) {}


     virtual ~ProducerBase(){};


     template <typename U>

     inline bool dequeue(U &element) {

       if (isExplicit) {

         return static_cast<ExplicitProducer *>(this)->dequeue(element);

       } else {

         return static_cast<ImplicitProducer *>(this)->dequeue(element);

       }

     }


     template <typename It>

     inline size_t dequeue_bulk(It &itemFirst, size_t max) {

       if (isExplicit) {

         return static_cast<ExplicitProducer *>(this)->dequeue_bulk(itemFirst,

                                                                    max);

       } else {

         return static_cast<ImplicitProducer *>(this)->dequeue_bulk(itemFirst,

                                                                    max);

       }

     }


     inline ProducerBase *next_prod() const {

       return static_cast<ProducerBase *>(next);

     }


     inline size_t size_approx() const {

       auto tail = tailIndex.load(std::memory_order_relaxed);

       auto head = headIndex.load(std::memory_order_relaxed);

       return details::circular_less_than(head, tail)

                  ? static_cast<size_t>(tail - head)

                  : 0;

     }


     inline index_t getTail() const {

       return tailIndex.load(std::memory_order_relaxed);

     }


    protected:

     std::atomic<index_t> tailIndex;  // Where to enqueue to next

     std::atomic<index_t> headIndex;  // Where to dequeue from next


     std::atomic<index_t> dequeueOptimisticCount;

     std::atomic<index_t> dequeueOvercommit;


     Block *tailBlock;


    public:

     bool isExplicit;

     ConcurrentQueue *parent;


    protected:

 #if MCDBGQ_TRACKMEM

     friend struct MemStats;

 #endif

   };


   // Explicit queue


   struct ExplicitProducer : public ProducerBase {

     explicit ExplicitProducer(ConcurrentQueue *parent)

         : ProducerBase(parent, true),

           blockIndex(nullptr),

           pr_blockIndexSlotsUsed(0),

           pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1),

           pr_blockIndexFront(0),

           pr_blockIndexEntries(nullptr),

           pr_blockIndexRaw(nullptr) {

       size_t poolBasedIndexSize =

           details::ceil_to_pow_2(parent->initialBlockPoolSize) >> 1;

       if (poolBasedIndexSize > pr_blockIndexSize) {

         pr_blockIndexSize = poolBasedIndexSize;

       }


       new_block_index(0);  // This creates an index with double the number of

                            // current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE

     }


     ~ExplicitProducer() {

       // Destruct any elements not yet dequeued.

       // Since we're in the destructor, we can assume all elements

       // are either completely dequeued or completely not (no halfways).

       if (this->tailBlock !=

           nullptr) {  // Note this means there must be a block index too

         // First find the block that's partially dequeued, if any

         Block *halfDequeuedBlock = nullptr;

         if ((this->headIndex.load(std::memory_order_relaxed) &

              static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {

           // The head's not on a block boundary, meaning a block somewhere is

           // partially dequeued (or the head block is the tail block and was

           // fully dequeued, but the head/tail are still not on a boundary)

           size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) &

                      (pr_blockIndexSize - 1);

           while (details::circular_less_than<index_t>(

               pr_blockIndexEntries[i].base + BLOCK_SIZE,

               this->headIndex.load(std::memory_order_relaxed))) {

             i = (i + 1) & (pr_blockIndexSize - 1);

           }

           assert(details::circular_less_than<index_t>(

               pr_blockIndexEntries[i].base,

               this->headIndex.load(std::memory_order_relaxed)));

           halfDequeuedBlock = pr_blockIndexEntries[i].block;

         }


         // Start at the head block (note the first line in the loop gives us the

         // head from the tail on the first iteration)

         auto block = this->tailBlock;

         do {

           block = block->next;

           if (block->ConcurrentQueue::Block::template is_empty<

                   explicit_context>()) {

             continue;

           }


           size_t i = 0;  // Offset into block

           if (block == halfDequeuedBlock) {

             i = static_cast<size_t>(

                 this->headIndex.load(std::memory_order_relaxed) &

                 static_cast<index_t>(BLOCK_SIZE - 1));

           }


           // Walk through all the items in the block; if this is the tail block,

           // we need to stop when we reach the tail index

           auto lastValidIndex =

               (this->tailIndex.load(std::memory_order_relaxed) &

                static_cast<index_t>(BLOCK_SIZE - 1)) == 0

                   ? BLOCK_SIZE

                   : static_cast<size_t>(

                         this->tailIndex.load(std::memory_order_relaxed) &

                         static_cast<index_t>(BLOCK_SIZE - 1));

           while (i != BLOCK_SIZE &&

                  (block != this->tailBlock || i != lastValidIndex)) {

             (*block)[i++]->~T();

           }

         } while (block != this->tailBlock);

       }


       // Destroy all blocks that we own

       if (this->tailBlock != nullptr) {

         auto block = this->tailBlock;

         do {

           auto nextBlock = block->next;

           if (block->dynamicallyAllocated) {

             destroy(block);

           } else {

             this->parent->add_block_to_free_list(block);

           }

           block = nextBlock;

         } while (block != this->tailBlock);

       }


       // Destroy the block indices

       auto header = static_cast<BlockIndexHeader *>(pr_blockIndexRaw);

       while (header != nullptr) {

         auto prev = static_cast<BlockIndexHeader *>(header->prev);

         header->~BlockIndexHeader();

         (Traits::free)(header);

         header = prev;

       }

     }


     template <AllocationMode allocMode, typename U>

     inline bool enqueue(U &&element) {

       index_t currentTailIndex =

           this->tailIndex.load(std::memory_order_relaxed);

       index_t newTailIndex = 1 + currentTailIndex;

       if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {

         // We reached the end of a block, start a new one

         auto startBlock = this->tailBlock;

         auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;

         if (this->tailBlock != nullptr &&

             this->tailBlock->next->ConcurrentQueue::Block::template is_empty<

                 explicit_context>()) {

           // We can re-use the block ahead of us, it's empty!

           this->tailBlock = this->tailBlock->next;

           this->tailBlock->ConcurrentQueue::Block::template reset_empty<

               explicit_context>();


           // We'll put the block on the block index (guaranteed to be room since

           // we're conceptually removing the last block from it first -- except

           // instead of removing then adding, we can just overwrite). Note that

           // there must be a valid block index here, since even if allocation

           // failed in the ctor, it would have been re-attempted when adding the

           // first block to the queue; since there is such a block, a block

           // index must have been successfully allocated.

         } else {

           // Whatever head value we see here is >= the last value we saw here

           // (relatively), and <= its current value. Since we have the most

           // recent tail, the head must be

           // <= to it.

           auto head = this->headIndex.load(std::memory_order_relaxed);

           assert(!details::circular_less_than<index_t>(currentTailIndex, head));

           if (!details::circular_less_than<index_t>(

                   head, currentTailIndex + BLOCK_SIZE) ||

               (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&

                (MAX_SUBQUEUE_SIZE == 0 ||

                 MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {

             // We can't enqueue in another block because there's not enough

             // leeway -- the tail could surpass the head by the time the block

             // fills up! (Or we'll exceed the size limit, if the second part of

             // the condition was true.)

             return false;

           }

           // We're going to need a new block; check that the block index has

           // room

           if (pr_blockIndexRaw == nullptr ||

               pr_blockIndexSlotsUsed == pr_blockIndexSize) {

             // Hmm, the circular block index is already full -- we'll need

             // to allocate a new index. Note pr_blockIndexRaw can only be

             // nullptr if the initial allocation failed in the constructor.


             if (allocMode == CannotAlloc ||

                 !new_block_index(pr_blockIndexSlotsUsed)) {

               return false;

             }

           }


           // Insert a new block in the circular linked list

           auto newBlock =

               this->parent

                   ->ConcurrentQueue::template requisition_block<allocMode>();

           if (newBlock == nullptr) {

             return false;

           }

 #if MCDBGQ_TRACKMEM

           newBlock->owner = this;

 #endif

           newBlock->ConcurrentQueue::Block::template reset_empty<

               explicit_context>();

           if (this->tailBlock == nullptr) {

             newBlock->next = newBlock;

           } else {

             newBlock->next = this->tailBlock->next;

             this->tailBlock->next = newBlock;

           }

           this->tailBlock = newBlock;

           ++pr_blockIndexSlotsUsed;

         }


         if (!MOODYCAMEL_NOEXCEPT_CTOR(

                 T, U, new (nullptr) T(std::forward<U>(element)))) {

           // The constructor may throw. We want the element not to appear in the

           // queue in that case (without corrupting the queue):

           MOODYCAMEL_TRY {

             new ((*this->tailBlock)[currentTailIndex])

                 T(std::forward<U>(element));

           }

           MOODYCAMEL_CATCH(...) {

             // Revert change to the current block, but leave the new block

             // available for next time

             pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;

             this->tailBlock =

                 startBlock == nullptr ? this->tailBlock : startBlock;

             MOODYCAMEL_RETHROW;

           }

         } else {

           (void)startBlock;

           (void)originalBlockIndexSlotsUsed;

         }


         // Add block to block index

         auto &entry = blockIndex.load(std::memory_order_relaxed)

                           ->entries[pr_blockIndexFront];

         entry.base = currentTailIndex;

         entry.block = this->tailBlock;

         blockIndex.load(std::memory_order_relaxed)

             ->front.store(pr_blockIndexFront, std::memory_order_release);

         pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);


         if (!MOODYCAMEL_NOEXCEPT_CTOR(

                 T, U, new (nullptr) T(std::forward<U>(element)))) {

           this->tailIndex.store(newTailIndex, std::memory_order_release);

           return true;

         }

       }


       // Enqueue

       new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));


       this->tailIndex.store(newTailIndex, std::memory_order_release);

       return true;

     }


     template <typename U>

     bool dequeue(U &element) {

       auto tail = this->tailIndex.load(std::memory_order_relaxed);

       auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);

       if (details::circular_less_than<index_t>(

               this->dequeueOptimisticCount.load(std::memory_order_relaxed) -

                   overcommit,

               tail)) {

         // Might be something to dequeue, let's give it a try


         // Note that this if is purely for performance purposes in the common

         // case when the queue is empty and the values are eventually consistent

         // -- we may enter here spuriously.


         // Note that whatever the values of overcommit and tail are, they are

         // not going to change (unless we change them) and must be the same

         // value at this point (inside the if) as when the if condition was

         // evaluated.


         // We insert an acquire fence here to synchronize-with the release upon

         // incrementing dequeueOvercommit below. This ensures that whatever the

         // value we got loaded into overcommit, the load of dequeueOptisticCount

         // in the fetch_add below will result in a value at least as recent as

         // that (and therefore at least as large). Note that I believe a

         // compiler (signal) fence here would be sufficient due to the nature of

         // fetch_add (all read-modify-write operations are guaranteed to work on

         // the latest value in the modification order), but unfortunately that

         // can't be shown to be correct using only the C++11 standard. See

         // http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case

         std::atomic_thread_fence(std::memory_order_acquire);


         // Increment optimistic counter, then check if it went over the boundary

         auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(

             1, std::memory_order_relaxed);


         // Note that since dequeueOvercommit must be <= dequeueOptimisticCount

         // (because dequeueOvercommit is only ever incremented after

         // dequeueOptimisticCount -- this is enforced in the `else` block

         // below), and since we now have a version of dequeueOptimisticCount

         // that is at least as recent as overcommit (due to the release upon

         // incrementing dequeueOvercommit and the acquire above that

         // synchronizes with it), overcommit <= myDequeueCount. However, we

         // can't assert this since both dequeueOptimisticCount and

         // dequeueOvercommit may (independently) overflow; in such a case,

         // though, the logic still holds since the difference between the two is

         // maintained.


         // Note that we reload tail here in case it changed; it will be the same

         // value as before or greater, since this load is sequenced after

         // (happens after) the earlier load above. This is supported by

         // read-read coherency (as defined in the standard), explained here:

         // http://en.cppreference.com/w/cpp/atomic/memory_order

         tail = this->tailIndex.load(std::memory_order_acquire);

         if ((details::likely)(details::circular_less_than<index_t>(

                 myDequeueCount - overcommit, tail))) {

           // Guaranteed to be at least one element to dequeue!


           // Get the index. Note that since there's guaranteed to be at least

           // one element, this will never exceed tail. We need to do an

           // acquire-release fence here since it's possible that whatever

           // condition got us to this point was for an earlier enqueued element

           // (that we already see the memory effects for), but that by the time

           // we increment somebody else has incremented it, and we need to see

           // the memory effects for *that* element, which is in such a case is

           // necessarily visible on the thread that incremented it in the first

           // place with the more current condition (they must have acquired a

           // tail that is at least as recent).

           auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);


           // Determine which block the element is in


           auto localBlockIndex = blockIndex.load(std::memory_order_acquire);

           auto localBlockIndexHead =

               localBlockIndex->front.load(std::memory_order_acquire);


           // We need to be careful here about subtracting and dividing because

           // of index wrap-around. When an index wraps, we need to preserve the

           // sign of the offset when dividing it by the block size (in order to

           // get a correct signed block count offset in all cases):

           auto headBase = localBlockIndex->entries[localBlockIndexHead].base;

           auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);

           auto offset = static_cast<size_t>(

               static_cast<typename std::make_signed<index_t>::type>(

                   blockBaseIndex - headBase) /

               BLOCK_SIZE);

           auto block = localBlockIndex

                            ->entries[(localBlockIndexHead + offset) &

                                      (localBlockIndex->size - 1)]

                            .block;


           // Dequeue

           auto &el = *((*block)[index]);

           if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, element = std::move(el))) {

             // Make sure the element is still fully dequeued and destroyed even

             // if the assignment throws

             struct Guard {

               Block *block;

               index_t index;


               ~Guard() {

                 (*block)[index]->~T();

                 block->ConcurrentQueue::Block::template set_empty<

                     explicit_context>(index);

               }

             } guard = {block, index};


             element = std::move(el);

           } else {

             element = std::move(el);

             el.~T();

             block->ConcurrentQueue::Block::template set_empty<explicit_context>(

                 index);

           }


           return true;

         } else {

           // Wasn't anything to dequeue after all; make the effective dequeue

           // count eventually consistent

           this->dequeueOvercommit.fetch_add(

               1,

               std::memory_order_release);  // Release so that the fetch_add on

                                            // dequeueOptimisticCount is

                                            // guaranteed to happen before this

                                            // write

         }

       }


       return false;

     }


     template <AllocationMode allocMode, typename It>

     bool enqueue_bulk(It itemFirst, size_t count) {

       // First, we need to make sure we have enough room to enqueue all of the

       // elements; this means pre-allocating blocks and putting them in the

       // block index (but only if all the allocations succeeded).

       index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);

       auto startBlock = this->tailBlock;

       auto originalBlockIndexFront = pr_blockIndexFront;

       auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;


       Block *firstAllocatedBlock = nullptr;


       // Figure out how many blocks we'll need to allocate, and do so

       size_t blockBaseDiff =

           ((startTailIndex + count - 1) &

            ~static_cast<index_t>(BLOCK_SIZE - 1)) -

           ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));

       index_t currentTailIndex =

           (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);

       if (blockBaseDiff > 0) {

         // Allocate as many blocks as possible from ahead

         while (blockBaseDiff > 0 && this->tailBlock != nullptr &&

                this->tailBlock->next != firstAllocatedBlock &&

                this->tailBlock->next->ConcurrentQueue::Block::template is_empty<

                    explicit_context>()) {

           blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);

           currentTailIndex += static_cast<index_t>(BLOCK_SIZE);


           this->tailBlock = this->tailBlock->next;

           firstAllocatedBlock = firstAllocatedBlock == nullptr

                                     ? this->tailBlock

                                     : firstAllocatedBlock;


           auto &entry = blockIndex.load(std::memory_order_relaxed)

                             ->entries[pr_blockIndexFront];

           entry.base = currentTailIndex;

           entry.block = this->tailBlock;

           pr_blockIndexFront =

               (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);

         }


         // Now allocate as many blocks as necessary from the block pool

         while (blockBaseDiff > 0) {

           blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);

           currentTailIndex += static_cast<index_t>(BLOCK_SIZE);


           auto head = this->headIndex.load(std::memory_order_relaxed);

           assert(!details::circular_less_than<index_t>(currentTailIndex, head));

           bool full =

               !details::circular_less_than<index_t>(

                   head, currentTailIndex + BLOCK_SIZE) ||

               (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&

                (MAX_SUBQUEUE_SIZE == 0 ||

                 MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));

           if (pr_blockIndexRaw == nullptr ||

               pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {

             if (allocMode == CannotAlloc || full ||

                 !new_block_index(originalBlockIndexSlotsUsed)) {

               // Failed to allocate, undo changes (but keep injected blocks)

               pr_blockIndexFront = originalBlockIndexFront;

               pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;

               this->tailBlock =

                   startBlock == nullptr ? firstAllocatedBlock : startBlock;

               return false;

             }


             // pr_blockIndexFront is updated inside new_block_index, so we need

             // to update our fallback value too (since we keep the new index

             // even if we later fail)

             originalBlockIndexFront = originalBlockIndexSlotsUsed;

           }


           // Insert a new block in the circular linked list

           auto newBlock =

               this->parent

                   ->ConcurrentQueue::template requisition_block<allocMode>();

           if (newBlock == nullptr) {

             pr_blockIndexFront = originalBlockIndexFront;

             pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;

             this->tailBlock =

                 startBlock == nullptr ? firstAllocatedBlock : startBlock;

             return false;

           }


 #if MCDBGQ_TRACKMEM

           newBlock->owner = this;

 #endif

           newBlock->ConcurrentQueue::Block::template set_all_empty<

               explicit_context>();

           if (this->tailBlock == nullptr) {

             newBlock->next = newBlock;

           } else {

             newBlock->next = this->tailBlock->next;

             this->tailBlock->next = newBlock;

           }

           this->tailBlock = newBlock;

           firstAllocatedBlock = firstAllocatedBlock == nullptr

                                     ? this->tailBlock

                                     : firstAllocatedBlock;


           ++pr_blockIndexSlotsUsed;


           auto &entry = blockIndex.load(std::memory_order_relaxed)

                             ->entries[pr_blockIndexFront];

           entry.base = currentTailIndex;

           entry.block = this->tailBlock;

           pr_blockIndexFront =

               (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);

         }


         // Excellent, all allocations succeeded. Reset each block's emptiness

         // before we fill them up, and publish the new block index front

         auto block = firstAllocatedBlock;

         while (true) {

           block->ConcurrentQueue::Block::template reset_empty<

               explicit_context>();

           if (block == this->tailBlock) {

             break;

           }

           block = block->next;

         }


         if (MOODYCAMEL_NOEXCEPT_CTOR(

                 T, decltype(*itemFirst),

                 new (nullptr) T(details::deref_noexcept(itemFirst)))) {

           blockIndex.load(std::memory_order_relaxed)

               ->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1),

                             std::memory_order_release);

         }

       }


       // Enqueue, one block at a time

       index_t newTailIndex = startTailIndex + static_cast<index_t>(count);

       currentTailIndex = startTailIndex;

       auto endBlock = this->tailBlock;

       this->tailBlock = startBlock;

       assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 ||

              firstAllocatedBlock != nullptr || count == 0);

       if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 &&

           firstAllocatedBlock != nullptr) {

         this->tailBlock = firstAllocatedBlock;

       }

       while (true) {

         auto stopIndex =

             (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +

             static_cast<index_t>(BLOCK_SIZE);

         if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {

           stopIndex = newTailIndex;

         }

         if (MOODYCAMEL_NOEXCEPT_CTOR(

                 T, decltype(*itemFirst),

                 new (nullptr) T(details::deref_noexcept(itemFirst)))) {

           while (currentTailIndex != stopIndex) {

             new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);

           }

         } else {

           MOODYCAMEL_TRY {

             while (currentTailIndex != stopIndex) {

               // Must use copy constructor even if move constructor is available

               // because we may have to revert if there's an exception.

               // Sorry about the horrible templated next line, but it was the

               // only way to disable moving *at compile time*, which is

               // important because a type may only define a (noexcept) move

               // constructor, and so calls to the cctor will not compile, even

               // if they are in an if branch that will never be executed

               new ((*this->tailBlock)[currentTailIndex])

                   T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(

                         T, decltype(*itemFirst),

                         new (nullptr) T(details::deref_noexcept(

                             itemFirst)))>::eval(*itemFirst));

               ++currentTailIndex;

               ++itemFirst;

             }

           }

           MOODYCAMEL_CATCH(...) {

             // Oh dear, an exception's been thrown -- destroy the elements that

             // were enqueued so far and revert the entire bulk operation (we'll

             // keep any allocated blocks in our linked list for later, though).

             auto constructedStopIndex = currentTailIndex;

             auto lastBlockEnqueued = this->tailBlock;


             pr_blockIndexFront = originalBlockIndexFront;

             pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;

             this->tailBlock =

                 startBlock == nullptr ? firstAllocatedBlock : startBlock;


             if (!details::is_trivially_destructible<T>::value) {

               auto block = startBlock;

               if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) ==

                   0) {

                 block = firstAllocatedBlock;

               }

               currentTailIndex = startTailIndex;

               while (true) {

                 stopIndex =

                     (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +

                     static_cast<index_t>(BLOCK_SIZE);

                 if (details::circular_less_than<index_t>(constructedStopIndex,

                                                          stopIndex)) {

                   stopIndex = constructedStopIndex;

                 }

                 while (currentTailIndex != stopIndex) {

                   (*block)[currentTailIndex++]->~T();

                 }

                 if (block == lastBlockEnqueued) {

                   break;

                 }

                 block = block->next;

               }

             }

             MOODYCAMEL_RETHROW;

           }

         }


         if (this->tailBlock == endBlock) {

           assert(currentTailIndex == newTailIndex);

           break;

         }

         this->tailBlock = this->tailBlock->next;

       }


       if (!MOODYCAMEL_NOEXCEPT_CTOR(

               T, decltype(*itemFirst),

               new (nullptr) T(details::deref_noexcept(itemFirst))) &&

           firstAllocatedBlock != nullptr) {

         blockIndex.load(std::memory_order_relaxed)

             ->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1),

                           std::memory_order_release);

       }


       this->tailIndex.store(newTailIndex, std::memory_order_release);

       return true;

     }


     template <typename It>

     size_t dequeue_bulk(It &itemFirst, size_t max) {

       auto tail = this->tailIndex.load(std::memory_order_relaxed);

       auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);

       auto desiredCount = static_cast<size_t>(

           tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) -

                   overcommit));

       if (details::circular_less_than<size_t>(0, desiredCount)) {

         desiredCount = desiredCount < max ? desiredCount : max;

         std::atomic_thread_fence(std::memory_order_acquire);


         auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(

             desiredCount, std::memory_order_relaxed);

         ;


         tail = this->tailIndex.load(std::memory_order_acquire);

         auto actualCount =

             static_cast<size_t>(tail - (myDequeueCount - overcommit));

         if (details::circular_less_than<size_t>(0, actualCount)) {

           actualCount = desiredCount < actualCount ? desiredCount : actualCount;

           if (actualCount < desiredCount) {

             this->dequeueOvercommit.fetch_add(desiredCount - actualCount,

                                               std::memory_order_release);

           }


           // Get the first index. Note that since there's guaranteed to be at

           // least actualCount elements, this will never exceed tail.

           auto firstIndex =

               this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);


           // Determine which block the first element is in

           auto localBlockIndex = blockIndex.load(std::memory_order_acquire);

           auto localBlockIndexHead =

               localBlockIndex->front.load(std::memory_order_acquire);


           auto headBase = localBlockIndex->entries[localBlockIndexHead].base;

           auto firstBlockBaseIndex =

               firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);

           auto offset = static_cast<size_t>(

               static_cast<typename std::make_signed<index_t>::type>(

                   firstBlockBaseIndex - headBase) /

               BLOCK_SIZE);

           auto indexIndex =

               (localBlockIndexHead + offset) & (localBlockIndex->size - 1);


           // Iterate the blocks and dequeue

           auto index = firstIndex;

           do {

             auto firstIndexInBlock = index;

             auto endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +

                             static_cast<index_t>(BLOCK_SIZE);

             endIndex =

                 details::circular_less_than<index_t>(

                     firstIndex + static_cast<index_t>(actualCount), endIndex)

                     ? firstIndex + static_cast<index_t>(actualCount)

                     : endIndex;

             auto block = localBlockIndex->entries[indexIndex].block;

             if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&,

                                            details::deref_noexcept(itemFirst) =

                                                std::move((*(*block)[index])))) {

               while (index != endIndex) {

                 auto &el = *((*block)[index]);

                 *itemFirst++ = std::move(el);

                 el.~T();

                 ++index;

               }

             } else {

               MOODYCAMEL_TRY {

                 while (index != endIndex) {

                   auto &el = *((*block)[index]);

                   *itemFirst = std::move(el);

                   ++itemFirst;

                   el.~T();

                   ++index;

                 }

               }

               MOODYCAMEL_CATCH(...) {

                 // It's too late to revert the dequeue, but we can make sure

                 // that all the dequeued objects are properly destroyed and the

                 // block index (and empty count) are properly updated before we

                 // propagate the exception

                 do {

                   block = localBlockIndex->entries[indexIndex].block;

                   while (index != endIndex) {

                     (*block)[index++]->~T();

                   }

                   block->ConcurrentQueue::Block::template set_many_empty<

                       explicit_context>(

                       firstIndexInBlock,

                       static_cast<size_t>(endIndex - firstIndexInBlock));

                   indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);


                   firstIndexInBlock = index;

                   endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +

                              static_cast<index_t>(BLOCK_SIZE);

                   endIndex =

                       details::circular_less_than<index_t>(

                           firstIndex + static_cast<index_t>(actualCount),

                           endIndex)

                           ? firstIndex + static_cast<index_t>(actualCount)

                           : endIndex;

                 } while (index != firstIndex + actualCount);


                 MOODYCAMEL_RETHROW;

               }

             }

             block->ConcurrentQueue::Block::template set_many_empty<

                 explicit_context>(

                 firstIndexInBlock,

                 static_cast<size_t>(endIndex - firstIndexInBlock));

             indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);

           } while (index != firstIndex + actualCount);


           return actualCount;

         } else {

           // Wasn't anything to dequeue after all; make the effective dequeue

           // count eventually consistent

           this->dequeueOvercommit.fetch_add(desiredCount,

                                             std::memory_order_release);

         }

       }


       return 0;

     }


    private:

     struct BlockIndexEntry {

       index_t base;

       Block *block;

     };


     struct BlockIndexHeader {

       size_t size;

       std::atomic<size_t>

           front;  // Current slot (not next, like pr_blockIndexFront)

       BlockIndexEntry *entries;

       void *prev;

     };


     bool new_block_index(size_t numberOfFilledSlotsToExpose) {

       auto prevBlockSizeMask = pr_blockIndexSize - 1;


       // Create the new block

       pr_blockIndexSize <<= 1;

       auto newRawPtr = static_cast<char *>((Traits::malloc)(

           sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value -

           1 + sizeof(BlockIndexEntry) * pr_blockIndexSize));

       if (newRawPtr == nullptr) {

         pr_blockIndexSize >>= 1;  // Reset to allow graceful retry

         return false;

       }


       auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry *>(

           details::align_for<BlockIndexEntry>(newRawPtr +

                                               sizeof(BlockIndexHeader)));


       // Copy in all the old indices, if any

       size_t j = 0;

       if (pr_blockIndexSlotsUsed != 0) {

         auto i =

             (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask;

         do {

           newBlockIndexEntries[j++] = pr_blockIndexEntries[i];

           i = (i + 1) & prevBlockSizeMask;

         } while (i != pr_blockIndexFront);

       }


       // Update everything

       auto header = new (newRawPtr) BlockIndexHeader;

       header->size = pr_blockIndexSize;

       header->front.store(numberOfFilledSlotsToExpose - 1,

                           std::memory_order_relaxed);

       header->entries = newBlockIndexEntries;

       header->prev = pr_blockIndexRaw;  // we link the new block to the old one

                                         // so we can free it later


       pr_blockIndexFront = j;

       pr_blockIndexEntries = newBlockIndexEntries;

       pr_blockIndexRaw = newRawPtr;

       blockIndex.store(header, std::memory_order_release);


       return true;

     }


    private:

     std::atomic<BlockIndexHeader *> blockIndex;


     // To be used by producer only -- consumer must use the ones in referenced

     // by blockIndex

     size_t pr_blockIndexSlotsUsed;

     size_t pr_blockIndexSize;

     size_t pr_blockIndexFront;  // Next slot (not current)

     BlockIndexEntry *pr_blockIndexEntries;

     void *pr_blockIndexRaw;


 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

    public:

     ExplicitProducer *nextExplicitProducer;


    private:

 #endif


 #if MCDBGQ_TRACKMEM

     friend struct MemStats;

 #endif

   };


   // Implicit queue


   struct ImplicitProducer : public ProducerBase {

     ImplicitProducer(ConcurrentQueue *parent)

         : ProducerBase(parent, false),

           nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE),

           blockIndex(nullptr) {

       new_block_index();

     }


     ~ImplicitProducer() {

       // Note that since we're in the destructor we can assume that all

       // enqueue/dequeue operations completed already; this means that all

       // undequeued elements are placed contiguously across contiguous blocks,

       // and that only the first and last remaining blocks can be only partially

       // empty (all other remaining blocks must be completely full).


 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

       // Unregister ourselves for thread termination notification

       if (!this->inactive.load(std::memory_order_relaxed)) {

         details::ThreadExitNotifier::unsubscribe(&threadExitListener);

       }

 #endif


       // Destroy all remaining elements!

       auto tail = this->tailIndex.load(std::memory_order_relaxed);

       auto index = this->headIndex.load(std::memory_order_relaxed);

       Block *block = nullptr;

       assert(index == tail || details::circular_less_than(index, tail));

       bool forceFreeLastBlock =

           index != tail;  // If we enter the loop, then the last (tail) block

                           // will not be freed

       while (index != tail) {

         if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ||

             block == nullptr) {

           if (block != nullptr) {

             // Free the old block

             this->parent->add_block_to_free_list(block);

           }


           block = get_block_index_entry_for_index(index)->value.load(

               std::memory_order_relaxed);

         }


         ((*block)[index])->~T();

         ++index;

       }

       // Even if the queue is empty, there's still one block that's not on the

       // free list (unless the head index reached the end of it, in which case

       // the tail will be poised to create a new block).

       if (this->tailBlock != nullptr &&

           (forceFreeLastBlock ||

            (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {

         this->parent->add_block_to_free_list(this->tailBlock);

       }


       // Destroy block index

       auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);

       if (localBlockIndex != nullptr) {

         for (size_t i = 0; i != localBlockIndex->capacity; ++i) {

           localBlockIndex->index[i]->~BlockIndexEntry();

         }

         do {

           auto prev = localBlockIndex->prev;

           localBlockIndex->~BlockIndexHeader();

           (Traits::free)(localBlockIndex);

           localBlockIndex = prev;

         } while (localBlockIndex != nullptr);

       }

     }


     template <AllocationMode allocMode, typename U>

     inline bool enqueue(U &&element) {

       index_t currentTailIndex =

           this->tailIndex.load(std::memory_order_relaxed);

       index_t newTailIndex = 1 + currentTailIndex;

       if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {

         // We reached the end of a block, start a new one

         auto head = this->headIndex.load(std::memory_order_relaxed);

         assert(!details::circular_less_than<index_t>(currentTailIndex, head));

         if (!details::circular_less_than<index_t>(

                 head, currentTailIndex + BLOCK_SIZE) ||

             (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&

              (MAX_SUBQUEUE_SIZE == 0 ||

               MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {

           return false;

         }

 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

         debug::DebugLock lock(mutex);

 #endif

         // Find out where we'll be inserting this block in the block index

         BlockIndexEntry *idxEntry;

         if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {

           return false;

         }


         // Get ahold of a new block

         auto newBlock =

             this->parent

                 ->ConcurrentQueue::template requisition_block<allocMode>();

         if (newBlock == nullptr) {

           rewind_block_index_tail();

           idxEntry->value.store(nullptr, std::memory_order_relaxed);

           return false;

         }

 #if MCDBGQ_TRACKMEM

         newBlock->owner = this;

 #endif

         newBlock

             ->ConcurrentQueue::Block::template reset_empty<implicit_context>();


         if (!MOODYCAMEL_NOEXCEPT_CTOR(

                 T, U, new (nullptr) T(std::forward<U>(element)))) {

           // May throw, try to insert now before we publish the fact that we

           // have this new block

           MOODYCAMEL_TRY {

             new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));

           }

           MOODYCAMEL_CATCH(...) {

             rewind_block_index_tail();

             idxEntry->value.store(nullptr, std::memory_order_relaxed);

             this->parent->add_block_to_free_list(newBlock);

             MOODYCAMEL_RETHROW;

           }

         }


         // Insert the new block into the index

         idxEntry->value.store(newBlock, std::memory_order_relaxed);


         this->tailBlock = newBlock;


         if (!MOODYCAMEL_NOEXCEPT_CTOR(

                 T, U, new (nullptr) T(std::forward<U>(element)))) {

           this->tailIndex.store(newTailIndex, std::memory_order_release);

           return true;

         }

       }


       // Enqueue

       new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));


       this->tailIndex.store(newTailIndex, std::memory_order_release);

       return true;

     }


     template <typename U>

     bool dequeue(U &element) {

       // See ExplicitProducer::dequeue for rationale and explanation

       index_t tail = this->tailIndex.load(std::memory_order_relaxed);

       index_t overcommit =

           this->dequeueOvercommit.load(std::memory_order_relaxed);

       if (details::circular_less_than<index_t>(

               this->dequeueOptimisticCount.load(std::memory_order_relaxed) -

                   overcommit,

               tail)) {

         std::atomic_thread_fence(std::memory_order_acquire);


         index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(

             1, std::memory_order_relaxed);

         tail = this->tailIndex.load(std::memory_order_acquire);

         if ((details::likely)(details::circular_less_than<index_t>(

                 myDequeueCount - overcommit, tail))) {

           index_t index =

               this->headIndex.fetch_add(1, std::memory_order_acq_rel);


           // Determine which block the element is in

           auto entry = get_block_index_entry_for_index(index);


           // Dequeue

           auto block = entry->value.load(std::memory_order_relaxed);

           auto &el = *((*block)[index]);


           if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, element = std::move(el))) {

 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

             // Note: Acquiring the mutex with every dequeue instead of only when

             // a block is released is very sub-optimal, but it is, after all,

             // purely debug code.

             debug::DebugLock lock(producer->mutex);

 #endif

             struct Guard {

               Block *block;

               index_t index;

               BlockIndexEntry *entry;

               ConcurrentQueue *parent;


               ~Guard() {

                 (*block)[index]->~T();

                 if (block->ConcurrentQueue::Block::template set_empty<

                         implicit_context>(index)) {

                   entry->value.store(nullptr, std::memory_order_relaxed);

                   parent->add_block_to_free_list(block);

                 }

               }

             } guard = {block, index, entry, this->parent};


             element = std::move(el);

           } else {

             element = std::move(el);

             el.~T();


             if (block->ConcurrentQueue::Block::template set_empty<

                     implicit_context>(index)) {

               {

 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

                 debug::DebugLock lock(mutex);

 #endif

                 // Add the block back into the global free pool (and remove from

                 // block index)

                 entry->value.store(nullptr, std::memory_order_relaxed);

               }

               this->parent->add_block_to_free_list(

                   block);  // releases the above store

             }

           }


           return true;

         } else {

           this->dequeueOvercommit.fetch_add(1, std::memory_order_release);

         }

       }


       return false;

     }


     template <AllocationMode allocMode, typename It>

     bool enqueue_bulk(It itemFirst, size_t count) {

       // First, we need to make sure we have enough room to enqueue all of the

       // elements; this means pre-allocating blocks and putting them in the

       // block index (but only if all the allocations succeeded).


       // Note that the tailBlock we start off with may not be owned by us any

       // more; this happens if it was filled up exactly to the top (setting

       // tailIndex to the first index of the next block which is not yet

       // allocated), then dequeued completely (putting it on the free list)

       // before we enqueue again.


       index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);

       auto startBlock = this->tailBlock;

       Block *firstAllocatedBlock = nullptr;

       auto endBlock = this->tailBlock;


       // Figure out how many blocks we'll need to allocate, and do so

       size_t blockBaseDiff =

           ((startTailIndex + count - 1) &

            ~static_cast<index_t>(BLOCK_SIZE - 1)) -

           ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));

       index_t currentTailIndex =

           (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);

       if (blockBaseDiff > 0) {

 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

         debug::DebugLock lock(mutex);

 #endif

         do {

           blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);

           currentTailIndex += static_cast<index_t>(BLOCK_SIZE);


           // Find out where we'll be inserting this block in the block index

           BlockIndexEntry *idxEntry =

               nullptr;  // initialization here unnecessary but compiler can't

                         // always tell

           Block *newBlock;

           bool indexInserted = false;

           auto head = this->headIndex.load(std::memory_order_relaxed);

           assert(!details::circular_less_than<index_t>(currentTailIndex, head));

           bool full =

               !details::circular_less_than<index_t>(

                   head, currentTailIndex + BLOCK_SIZE) ||

               (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&

                (MAX_SUBQUEUE_SIZE == 0 ||

                 MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));

           if (full ||

               !(indexInserted = insert_block_index_entry<allocMode>(

                     idxEntry, currentTailIndex)) ||

               (newBlock =

                    this->parent->ConcurrentQueue::template requisition_block<

                        allocMode>()) == nullptr) {

             // Index allocation or block allocation failed; revert any other

             // allocations and index insertions done so far for this operation

             if (indexInserted) {

               rewind_block_index_tail();

               idxEntry->value.store(nullptr, std::memory_order_relaxed);

             }

             currentTailIndex =

                 (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);

             for (auto block = firstAllocatedBlock; block != nullptr;

                  block = block->next) {

               currentTailIndex += static_cast<index_t>(BLOCK_SIZE);

               idxEntry = get_block_index_entry_for_index(currentTailIndex);

               idxEntry->value.store(nullptr, std::memory_order_relaxed);

               rewind_block_index_tail();

             }

             this->parent->add_blocks_to_free_list(firstAllocatedBlock);

             this->tailBlock = startBlock;


             return false;

           }


 #if MCDBGQ_TRACKMEM

           newBlock->owner = this;

 #endif

           newBlock->ConcurrentQueue::Block::template reset_empty<

               implicit_context>();

           newBlock->next = nullptr;


           // Insert the new block into the index

           idxEntry->value.store(newBlock, std::memory_order_relaxed);


           // Store the chain of blocks so that we can undo if later allocations

           // fail, and so that we can find the blocks when we do the actual

           // enqueueing

           if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 ||

               firstAllocatedBlock != nullptr) {

             assert(this->tailBlock != nullptr);

             this->tailBlock->next = newBlock;

           }

           this->tailBlock = newBlock;

           endBlock = newBlock;

           firstAllocatedBlock =

               firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;

         } while (blockBaseDiff > 0);

       }


       // Enqueue, one block at a time

       index_t newTailIndex = startTailIndex + static_cast<index_t>(count);

       currentTailIndex = startTailIndex;

       this->tailBlock = startBlock;

       assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 ||

              firstAllocatedBlock != nullptr || count == 0);

       if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 &&

           firstAllocatedBlock != nullptr) {

         this->tailBlock = firstAllocatedBlock;

       }

       while (true) {

         auto stopIndex =

             (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +

             static_cast<index_t>(BLOCK_SIZE);

         if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {

           stopIndex = newTailIndex;

         }

         if (MOODYCAMEL_NOEXCEPT_CTOR(

                 T, decltype(*itemFirst),

                 new (nullptr) T(details::deref_noexcept(itemFirst)))) {

           while (currentTailIndex != stopIndex) {

             new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);

           }

         } else {

           MOODYCAMEL_TRY {

             while (currentTailIndex != stopIndex) {

               new ((*this->tailBlock)[currentTailIndex])

                   T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(

                         T, decltype(*itemFirst),

                         new (nullptr) T(details::deref_noexcept(

                             itemFirst)))>::eval(*itemFirst));

               ++currentTailIndex;

               ++itemFirst;

             }

           }

           MOODYCAMEL_CATCH(...) {

             auto constructedStopIndex = currentTailIndex;

             auto lastBlockEnqueued = this->tailBlock;


             if (!details::is_trivially_destructible<T>::value) {

               auto block = startBlock;

               if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) ==

                   0) {

                 block = firstAllocatedBlock;

               }

               currentTailIndex = startTailIndex;

               while (true) {

                 stopIndex =

                     (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +

                     static_cast<index_t>(BLOCK_SIZE);

                 if (details::circular_less_than<index_t>(constructedStopIndex,

                                                          stopIndex)) {

                   stopIndex = constructedStopIndex;

                 }

                 while (currentTailIndex != stopIndex) {

                   (*block)[currentTailIndex++]->~T();

                 }

                 if (block == lastBlockEnqueued) {

                   break;

                 }

                 block = block->next;

               }

             }


             currentTailIndex =

                 (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);

             for (auto block = firstAllocatedBlock; block != nullptr;

                  block = block->next) {

               currentTailIndex += static_cast<index_t>(BLOCK_SIZE);

               auto idxEntry = get_block_index_entry_for_index(currentTailIndex);

               idxEntry->value.store(nullptr, std::memory_order_relaxed);

               rewind_block_index_tail();

             }

             this->parent->add_blocks_to_free_list(firstAllocatedBlock);

             this->tailBlock = startBlock;

             MOODYCAMEL_RETHROW;

           }

         }


         if (this->tailBlock == endBlock) {

           assert(currentTailIndex == newTailIndex);

           break;

         }

         this->tailBlock = this->tailBlock->next;

       }

       this->tailIndex.store(newTailIndex, std::memory_order_release);

       return true;

     }


     template <typename It>

     size_t dequeue_bulk(It &itemFirst, size_t max) {

       auto tail = this->tailIndex.load(std::memory_order_relaxed);

       auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);

       auto desiredCount = static_cast<size_t>(

           tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) -

                   overcommit));

       if (details::circular_less_than<size_t>(0, desiredCount)) {

         desiredCount = desiredCount < max ? desiredCount : max;

         std::atomic_thread_fence(std::memory_order_acquire);


         auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(

             desiredCount, std::memory_order_relaxed);


         tail = this->tailIndex.load(std::memory_order_acquire);

         auto actualCount =

             static_cast<size_t>(tail - (myDequeueCount - overcommit));

         if (details::circular_less_than<size_t>(0, actualCount)) {

           actualCount = desiredCount < actualCount ? desiredCount : actualCount;

           if (actualCount < desiredCount) {

             this->dequeueOvercommit.fetch_add(desiredCount - actualCount,

                                               std::memory_order_release);

           }


           // Get the first index. Note that since there's guaranteed to be at

           // least actualCount elements, this will never exceed tail.

           auto firstIndex =

               this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);


           // Iterate the blocks and dequeue

           auto index = firstIndex;

           BlockIndexHeader *localBlockIndex;

           auto indexIndex =

               get_block_index_index_for_index(index, localBlockIndex);

           do {

             auto blockStartIndex = index;

             auto endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +

                             static_cast<index_t>(BLOCK_SIZE);

             endIndex =

                 details::circular_less_than<index_t>(

                     firstIndex + static_cast<index_t>(actualCount), endIndex)

                     ? firstIndex + static_cast<index_t>(actualCount)

                     : endIndex;


             auto entry = localBlockIndex->index[indexIndex];

             auto block = entry->value.load(std::memory_order_relaxed);

             if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&,

                                            details::deref_noexcept(itemFirst) =

                                                std::move((*(*block)[index])))) {

               while (index != endIndex) {

                 auto &el = *((*block)[index]);

                 *itemFirst++ = std::move(el);

                 el.~T();

                 ++index;

               }

             } else {

               MOODYCAMEL_TRY {

                 while (index != endIndex) {

                   auto &el = *((*block)[index]);

                   *itemFirst = std::move(el);

                   ++itemFirst;

                   el.~T();

                   ++index;

                 }

               }

               MOODYCAMEL_CATCH(...) {

                 do {

                   entry = localBlockIndex->index[indexIndex];

                   block = entry->value.load(std::memory_order_relaxed);

                   while (index != endIndex) {

                     (*block)[index++]->~T();

                   }


                   if (block->ConcurrentQueue::Block::template set_many_empty<

                           implicit_context>(

                           blockStartIndex,

                           static_cast<size_t>(endIndex - blockStartIndex))) {

 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

                     debug::DebugLock lock(mutex);

 #endif

                     entry->value.store(nullptr, std::memory_order_relaxed);

                     this->parent->add_block_to_free_list(block);

                   }

                   indexIndex =

                       (indexIndex + 1) & (localBlockIndex->capacity - 1);


                   blockStartIndex = index;

                   endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +

                              static_cast<index_t>(BLOCK_SIZE);

                   endIndex =

                       details::circular_less_than<index_t>(

                           firstIndex + static_cast<index_t>(actualCount),

                           endIndex)

                           ? firstIndex + static_cast<index_t>(actualCount)

                           : endIndex;

                 } while (index != firstIndex + actualCount);


                 MOODYCAMEL_RETHROW;

               }

             }

             if (block->ConcurrentQueue::Block::template set_many_empty<

                     implicit_context>(

                     blockStartIndex,

                     static_cast<size_t>(endIndex - blockStartIndex))) {

               {

 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

                 debug::DebugLock lock(mutex);

 #endif

                 // Note that the set_many_empty above did a release, meaning

                 // that anybody who acquires the block we're about to free can

                 // use it safely since our writes (and reads!) will have

                 // happened-before then.

                 entry->value.store(nullptr, std::memory_order_relaxed);

               }

               this->parent->add_block_to_free_list(

                   block);  // releases the above store

             }

             indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);

           } while (index != firstIndex + actualCount);


           return actualCount;

         } else {

           this->dequeueOvercommit.fetch_add(desiredCount,

                                             std::memory_order_release);

         }

       }


       return 0;

     }


    private:

     // The block size must be > 1, so any number with the low bit set is an

     // invalid block base index

     static const index_t INVALID_BLOCK_BASE = 1;


     struct BlockIndexEntry {

       std::atomic<index_t> key;

       std::atomic<Block *> value;

     };


     struct BlockIndexHeader {

       size_t capacity;

       std::atomic<size_t> tail;

       BlockIndexEntry *entries;

       BlockIndexEntry **index;

       BlockIndexHeader *prev;

     };


     template <AllocationMode allocMode>

     inline bool insert_block_index_entry(BlockIndexEntry *&idxEntry,

                                          index_t blockStartIndex) {

       auto localBlockIndex =

           blockIndex.load(std::memory_order_relaxed);  // We're the only writer

                                                        // thread, relaxed is OK

       if (localBlockIndex == nullptr) {

         return false;  // this can happen if new_block_index failed in the

                        // constructor

       }

       auto newTail =

           (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) &

           (localBlockIndex->capacity - 1);

       idxEntry = localBlockIndex->index[newTail];

       if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||

           idxEntry->value.load(std::memory_order_relaxed) == nullptr) {

         idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);

         localBlockIndex->tail.store(newTail, std::memory_order_release);

         return true;

       }


       // No room in the old block index, try to allocate another one!

       if (allocMode == CannotAlloc || !new_block_index()) {

         return false;

       }

       localBlockIndex = blockIndex.load(std::memory_order_relaxed);

       newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) &

                 (localBlockIndex->capacity - 1);

       idxEntry = localBlockIndex->index[newTail];

       assert(idxEntry->key.load(std::memory_order_relaxed) ==

              INVALID_BLOCK_BASE);

       idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);

       localBlockIndex->tail.store(newTail, std::memory_order_release);

       return true;

     }


     inline void rewind_block_index_tail() {

       auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);

       localBlockIndex->tail.store(

           (localBlockIndex->tail.load(std::memory_order_relaxed) - 1) &

               (localBlockIndex->capacity - 1),

           std::memory_order_relaxed);

     }


     inline BlockIndexEntry *get_block_index_entry_for_index(

         index_t index) const {

       BlockIndexHeader *localBlockIndex;

       auto idx = get_block_index_index_for_index(index, localBlockIndex);

       return localBlockIndex->index[idx];

     }


     inline size_t get_block_index_index_for_index(

         index_t index, BlockIndexHeader *&localBlockIndex) const {

 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

       debug::DebugLock lock(mutex);

 #endif

       index &= ~static_cast<index_t>(BLOCK_SIZE - 1);

       localBlockIndex = blockIndex.load(std::memory_order_acquire);

       auto tail = localBlockIndex->tail.load(std::memory_order_acquire);

       auto tailBase =

           localBlockIndex->index[tail]->key.load(std::memory_order_relaxed);

       assert(tailBase != INVALID_BLOCK_BASE);

       // Note: Must use division instead of shift because the index may wrap

       // around, causing a negative offset, whose negativity we want to preserve

       auto offset = static_cast<size_t>(

           static_cast<typename std::make_signed<index_t>::type>(index -

                                                                 tailBase) /

           BLOCK_SIZE);

       size_t idx = (tail + offset) & (localBlockIndex->capacity - 1);

       assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) ==

                  index &&

              localBlockIndex->index[idx]->value.load(

                  std::memory_order_relaxed) != nullptr);

       return idx;

     }


     bool new_block_index() {

       auto prev = blockIndex.load(std::memory_order_relaxed);

       size_t prevCapacity = prev == nullptr ? 0 : prev->capacity;

       auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity;

       auto raw = static_cast<char *>((Traits::malloc)(

           sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value -

           1 + sizeof(BlockIndexEntry) * entryCount +

           std::alignment_of<BlockIndexEntry *>::value - 1 +

           sizeof(BlockIndexEntry *) * nextBlockIndexCapacity));

       if (raw == nullptr) {

         return false;

       }


       auto header = new (raw) BlockIndexHeader;

       auto entries = reinterpret_cast<BlockIndexEntry *>(

           details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader)));

       auto index = reinterpret_cast<BlockIndexEntry **>(

           details::align_for<BlockIndexEntry *>(

               reinterpret_cast<char *>(entries) +

               sizeof(BlockIndexEntry) * entryCount));

       if (prev != nullptr) {

         auto prevTail = prev->tail.load(std::memory_order_relaxed);

         auto prevPos = prevTail;

         size_t i = 0;

         do {

           prevPos = (prevPos + 1) & (prev->capacity - 1);

           index[i++] = prev->index[prevPos];

         } while (prevPos != prevTail);

         assert(i == prevCapacity);

       }

       for (size_t i = 0; i != entryCount; ++i) {

         new (entries + i) BlockIndexEntry;

         entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed);

         index[prevCapacity + i] = entries + i;

       }

       header->prev = prev;

       header->entries = entries;

       header->index = index;

       header->capacity = nextBlockIndexCapacity;

       header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1),

                          std::memory_order_relaxed);


       blockIndex.store(header, std::memory_order_release);


       nextBlockIndexCapacity <<= 1;


       return true;

     }


    private:

     size_t nextBlockIndexCapacity;

     std::atomic<BlockIndexHeader *> blockIndex;


 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

    public:

     details::ThreadExitListener threadExitListener;


    private:

 #endif


 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

    public:

     ImplicitProducer *nextImplicitProducer;


    private:

 #endif


 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX

     mutable debug::DebugMutex mutex;

 #endif

 #if MCDBGQ_TRACKMEM

     friend struct MemStats;

 #endif

   };


   // Block pool manipulation


   void populate_initial_block_list(size_t blockCount) {

     initialBlockPoolSize = blockCount;

     if (initialBlockPoolSize == 0) {

       initialBlockPool = nullptr;

       return;

     }


     initialBlockPool = create_array<Block>(blockCount);

     if (initialBlockPool == nullptr) {

       initialBlockPoolSize = 0;

     }

     for (size_t i = 0; i < initialBlockPoolSize; ++i) {

       initialBlockPool[i].dynamicallyAllocated = false;

     }

   }


   inline Block *try_get_block_from_initial_pool() {

     if (initialBlockPoolIndex.load(std::memory_order_relaxed) >=

         initialBlockPoolSize) {

       return nullptr;

     }


     auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);


     return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr;

   }


   inline void add_block_to_free_list(Block *block) {

 #if MCDBGQ_TRACKMEM

     block->owner = nullptr;

 #endif

     freeList.add(block);

   }


   inline void add_blocks_to_free_list(Block *block) {

     while (block != nullptr) {

       auto next = block->next;

       add_block_to_free_list(block);

       block = next;

     }

   }


   inline Block *try_get_block_from_free_list() { return freeList.try_get(); }


   // Gets a free block from one of the memory pools, or allocates a new one (if

   // applicable)

   template <AllocationMode canAlloc>

   Block *requisition_block() {

     auto block = try_get_block_from_initial_pool();

     if (block != nullptr) {

       return block;

     }


     block = try_get_block_from_free_list();

     if (block != nullptr) {

       return block;

     }


     if (canAlloc == CanAlloc) {

       return create<Block>();

     }


     return nullptr;

   }


 #if MCDBGQ_TRACKMEM

  public:

   struct MemStats {

     size_t allocatedBlocks;

     size_t usedBlocks;

     size_t freeBlocks;

     size_t ownedBlocksExplicit;

     size_t ownedBlocksImplicit;

     size_t implicitProducers;

     size_t explicitProducers;

     size_t elementsEnqueued;

     size_t blockClassBytes;

     size_t queueClassBytes;

     size_t implicitBlockIndexBytes;

     size_t explicitBlockIndexBytes;


     friend class ConcurrentQueue;


    private:

     static MemStats getFor(ConcurrentQueue *q) {

       MemStats stats = {0};


       stats.elementsEnqueued = q->size_approx();


       auto block = q->freeList.head_unsafe();

       while (block != nullptr) {

         ++stats.allocatedBlocks;

         ++stats.freeBlocks;

         block = block->freeListNext.load(std::memory_order_relaxed);

       }


       for (auto ptr = q->producerListTail.load(std::memory_order_acquire);

            ptr != nullptr; ptr = ptr->next_prod()) {

         bool implicit = dynamic_cast<ImplicitProducer *>(ptr) != nullptr;

         stats.implicitProducers += implicit ? 1 : 0;

         stats.explicitProducers += implicit ? 0 : 1;


         if (implicit) {

           auto prod = static_cast<ImplicitProducer *>(ptr);

           stats.queueClassBytes += sizeof(ImplicitProducer);

           auto head = prod->headIndex.load(std::memory_order_relaxed);

           auto tail = prod->tailIndex.load(std::memory_order_relaxed);

           auto hash = prod->blockIndex.load(std::memory_order_relaxed);

           if (hash != nullptr) {

             for (size_t i = 0; i != hash->capacity; ++i) {

               if (hash->index[i]->key.load(std::memory_order_relaxed) !=

                       ImplicitProducer::INVALID_BLOCK_BASE &&

                   hash->index[i]->value.load(std::memory_order_relaxed) !=

                       nullptr) {

                 ++stats.allocatedBlocks;

                 ++stats.ownedBlocksImplicit;

               }

             }

             stats.implicitBlockIndexBytes +=

                 hash->capacity *

                 sizeof(typename ImplicitProducer::BlockIndexEntry);

             for (; hash != nullptr; hash = hash->prev) {

               stats.implicitBlockIndexBytes +=

                   sizeof(typename ImplicitProducer::BlockIndexHeader) +

                   hash->capacity *

                       sizeof(typename ImplicitProducer::BlockIndexEntry *);

             }

           }

           for (; details::circular_less_than<index_t>(head, tail);

                head += BLOCK_SIZE) {

             // auto block = prod->get_block_index_entry_for_index(head);

             ++stats.usedBlocks;

           }

         } else {

           auto prod = static_cast<ExplicitProducer *>(ptr);

           stats.queueClassBytes += sizeof(ExplicitProducer);

           auto tailBlock = prod->tailBlock;

           bool wasNonEmpty = false;

           if (tailBlock != nullptr) {

             auto block = tailBlock;

             do {

               ++stats.allocatedBlocks;

               if (!block->ConcurrentQueue::Block::template is_empty<

                       explicit_context>() ||

                   wasNonEmpty) {

                 ++stats.usedBlocks;

                 wasNonEmpty = wasNonEmpty || block != tailBlock;

               }

               ++stats.ownedBlocksExplicit;

               block = block->next;

             } while (block != tailBlock);

           }

           auto index = prod->blockIndex.load(std::memory_order_relaxed);

           while (index != nullptr) {

             stats.explicitBlockIndexBytes +=

                 sizeof(typename ExplicitProducer::BlockIndexHeader) +

                 index->size *

                     sizeof(typename ExplicitProducer::BlockIndexEntry);

             index = static_cast<typename ExplicitProducer::BlockIndexHeader *>(

                 index->prev);

           }

         }

       }


       auto freeOnInitialPool =

           q->initialBlockPoolIndex.load(std::memory_order_relaxed) >=

                   q->initialBlockPoolSize

               ? 0

               : q->initialBlockPoolSize -

                     q->initialBlockPoolIndex.load(std::memory_order_relaxed);

       stats.allocatedBlocks += freeOnInitialPool;

       stats.freeBlocks += freeOnInitialPool;


       stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;

       stats.queueClassBytes += sizeof(ConcurrentQueue);


       return stats;

     }

   };


   // For debugging only. Not thread-safe.

   MemStats getMemStats() { return MemStats::getFor(this); }


  private:

   friend struct MemStats;

 #endif


   // Producer list manipulation


   ProducerBase *recycle_or_create_producer(bool isExplicit) {

     bool recycled;

     return recycle_or_create_producer(isExplicit, recycled);

   }


   ProducerBase *recycle_or_create_producer(bool isExplicit, bool &recycled) {

 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH

     debug::DebugLock lock(implicitProdMutex);

 #endif

     // Try to re-use one first

     for (auto ptr = producerListTail.load(std::memory_order_acquire);

          ptr != nullptr; ptr = ptr->next_prod()) {

       if (ptr->inactive.load(std::memory_order_relaxed) &&

           ptr->isExplicit == isExplicit) {

         bool expected = true;

         if (ptr->inactive.compare_exchange_strong(

                 expected, /* desired */

                 false, std::memory_order_acquire, std::memory_order_relaxed)) {

           // We caught one! It's been marked as activated, the caller can have

           // it

           recycled = true;

           return ptr;

         }

       }

     }


     recycled = false;

     return add_producer(

         isExplicit ? static_cast<ProducerBase *>(create<ExplicitProducer>(this))

                    : create<ImplicitProducer>(this));

   }


   ProducerBase *add_producer(ProducerBase *producer) {

     // Handle failed memory allocation

     if (producer == nullptr) {

       return nullptr;

     }


     producerCount.fetch_add(1, std::memory_order_relaxed);


     // Add it to the lock-free list

     auto prevTail = producerListTail.load(std::memory_order_relaxed);

     do {

       producer->next = prevTail;

     } while (!producerListTail.compare_exchange_weak(

         prevTail, producer, std::memory_order_release,

         std::memory_order_relaxed));


 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

     if (producer->isExplicit) {

       auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);

       do {

         static_cast<ExplicitProducer *>(producer)->nextExplicitProducer =

             prevTailExplicit;

       } while (!explicitProducers.compare_exchange_weak(

           prevTailExplicit, static_cast<ExplicitProducer *>(producer),

           std::memory_order_release, std::memory_order_relaxed));

     } else {

       auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed);

       do {

         static_cast<ImplicitProducer *>(producer)->nextImplicitProducer =

             prevTailImplicit;

       } while (!implicitProducers.compare_exchange_weak(

           prevTailImplicit, static_cast<ImplicitProducer *>(producer),

           std::memory_order_release, std::memory_order_relaxed));

     }

 #endif


     return producer;

   }


   void reown_producers() {

     // After another instance is moved-into/swapped-with this one, all the

     // producers we stole still think their parents are the other queue.

     // So fix them up!

     for (auto ptr = producerListTail.load(std::memory_order_relaxed);

          ptr != nullptr; ptr = ptr->next_prod()) {

       ptr->parent = this;

     }

   }


   // Implicit producer hash


   struct ImplicitProducerKVP {

     std::atomic<details::thread_id_t> key;

     ImplicitProducer *value;  // No need for atomicity since it's only read by

                               // the thread that sets it in the first place


     ImplicitProducerKVP() : value(nullptr) {}


     ImplicitProducerKVP(ImplicitProducerKVP &&other) MOODYCAMEL_NOEXCEPT {

       key.store(other.key.load(std::memory_order_relaxed),

                 std::memory_order_relaxed);

       value = other.value;

     }


     inline ImplicitProducerKVP &operator=(ImplicitProducerKVP &&other)

         MOODYCAMEL_NOEXCEPT {

       swap(other);

       return *this;

     }


     inline void swap(ImplicitProducerKVP &other) MOODYCAMEL_NOEXCEPT {

       if (this != &other) {

         details::swap_relaxed(key, other.key);

         std::swap(value, other.value);

       }

     }

   };


   template <typename XT, typename XTraits>

   friend void moodycamel::swap(

       typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP &,

       typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP &)

       MOODYCAMEL_NOEXCEPT;


   struct ImplicitProducerHash {

     size_t capacity;

     ImplicitProducerKVP *entries;

     ImplicitProducerHash *prev;

   };


   inline void populate_initial_implicit_producer_hash() {

     if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return;


     implicitProducerHashCount.store(0, std::memory_order_relaxed);

     auto hash = &initialImplicitProducerHash;

     hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;

     hash->entries = &initialImplicitProducerHashEntries[0];

     for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {

       initialImplicitProducerHashEntries[i].key.store(

           details::invalid_thread_id, std::memory_order_relaxed);

     }

     hash->prev = nullptr;

     implicitProducerHash.store(hash, std::memory_order_relaxed);

   }


   void swap_implicit_producer_hashes(ConcurrentQueue &other) {

     if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return;


     // Swap (assumes our implicit producer hash is initialized)

     initialImplicitProducerHashEntries.swap(

         other.initialImplicitProducerHashEntries);

     initialImplicitProducerHash.entries =

         &initialImplicitProducerHashEntries[0];

     other.initialImplicitProducerHash.entries =

         &other.initialImplicitProducerHashEntries[0];


     details::swap_relaxed(implicitProducerHashCount,

                           other.implicitProducerHashCount);


     details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);

     if (implicitProducerHash.load(std::memory_order_relaxed) ==

         &other.initialImplicitProducerHash) {

       implicitProducerHash.store(&initialImplicitProducerHash,

                                  std::memory_order_relaxed);

     } else {

       ImplicitProducerHash *hash;

       for (hash = implicitProducerHash.load(std::memory_order_relaxed);

            hash->prev != &other.initialImplicitProducerHash;

            hash = hash->prev) {

         continue;

       }

       hash->prev = &initialImplicitProducerHash;

     }

     if (other.implicitProducerHash.load(std::memory_order_relaxed) ==

         &initialImplicitProducerHash) {

       other.implicitProducerHash.store(&other.initialImplicitProducerHash,

                                        std::memory_order_relaxed);

     } else {

       ImplicitProducerHash *hash;

       for (hash = other.implicitProducerHash.load(std::memory_order_relaxed);

            hash->prev != &initialImplicitProducerHash; hash = hash->prev) {

         continue;

       }

       hash->prev = &other.initialImplicitProducerHash;

     }

   }


   // Only fails (returns nullptr) if memory allocation fails

   ImplicitProducer *get_or_add_implicit_producer() {

     // Note that since the data is essentially thread-local (key is thread ID),

     // there's a reduced need for fences (memory ordering is already consistent

     // for any individual thread), except for the current table itself.


     // Start by looking for the thread ID in the current and all previous hash

     // tables. If it's not found, it must not be in there yet, since this same

     // thread would have added it previously to one of the tables that we

     // traversed.


     // Code and algorithm adapted from

     // http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table


 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH

     debug::DebugLock lock(implicitProdMutex);

 #endif


     auto id = details::thread_id();

     auto hashedId = details::hash_thread_id(id);


     auto mainHash = implicitProducerHash.load(std::memory_order_acquire);

     for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {

       // Look for the id in this hash

       auto index = hashedId;

       while (true) {  // Not an infinite loop because at least one slot is free

                       // in the hash table

         index &= hash->capacity - 1;


         auto probedKey =

             hash->entries[index].key.load(std::memory_order_relaxed);

         if (probedKey == id) {

           // Found it! If we had to search several hashes deep, though, we

           // should lazily add it to the current main hash table to avoid the

           // extended search next time. Note there's guaranteed to be room in

           // the current hash table since every subsequent table implicitly

           // reserves space for all previous tables (there's only one

           // implicitProducerHashCount).

           auto value = hash->entries[index].value;

           if (hash != mainHash) {

             index = hashedId;

             while (true) {

               index &= mainHash->capacity - 1;

               probedKey =

                   mainHash->entries[index].key.load(std::memory_order_relaxed);

               auto empty = details::invalid_thread_id;

 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

               auto reusable = details::invalid_thread_id2;

               if ((probedKey == empty &&

                    mainHash->entries[index].key.compare_exchange_strong(

                        empty, id, std::memory_order_relaxed,

                        std::memory_order_relaxed)) ||

                   (probedKey == reusable &&

                    mainHash->entries[index].key.compare_exchange_strong(

                        reusable, id, std::memory_order_acquire,

                        std::memory_order_acquire))) {

 #else

               if ((probedKey == empty &&

                    mainHash->entries[index].key.compare_exchange_strong(

                        empty, id, std::memory_order_relaxed,

                        std::memory_order_relaxed))) {

 #endif

                 mainHash->entries[index].value = value;

                 break;

               }

               ++index;

             }

           }


           return value;

         }

         if (probedKey == details::invalid_thread_id) {

           break;  // Not in this hash table

         }

         ++index;

       }

     }


     // Insert!

     auto newCount =

         1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);

     while (true) {

       if (newCount >= (mainHash->capacity >> 1) &&

           !implicitProducerHashResizeInProgress.test_and_set(

               std::memory_order_acquire)) {

         // We've acquired the resize lock, try to allocate a bigger hash table.

         // Note the acquire fence synchronizes with the release fence at the end

         // of this block, and hence when we reload implicitProducerHash it must

         // be the most recent version (it only gets changed within this locked

         // block).

         mainHash = implicitProducerHash.load(std::memory_order_acquire);

         if (newCount >= (mainHash->capacity >> 1)) {

           auto newCapacity = mainHash->capacity << 1;

           while (newCount >= (newCapacity >> 1)) {

             newCapacity <<= 1;

           }

           auto raw = static_cast<char *>(

               (Traits::malloc)(sizeof(ImplicitProducerHash) +

                                std::alignment_of<ImplicitProducerKVP>::value -

                                1 + sizeof(ImplicitProducerKVP) * newCapacity));

           if (raw == nullptr) {

             // Allocation failed

             implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);

             implicitProducerHashResizeInProgress.clear(

                 std::memory_order_relaxed);

             return nullptr;

           }


           auto newHash = new (raw) ImplicitProducerHash;

           newHash->capacity = newCapacity;

           newHash->entries = reinterpret_cast<ImplicitProducerKVP *>(

               details::align_for<ImplicitProducerKVP>(

                   raw + sizeof(ImplicitProducerHash)));

           for (size_t i = 0; i != newCapacity; ++i) {

             new (newHash->entries + i) ImplicitProducerKVP;

             newHash->entries[i].key.store(details::invalid_thread_id,

                                           std::memory_order_relaxed);

           }

           newHash->prev = mainHash;

           implicitProducerHash.store(newHash, std::memory_order_release);

           implicitProducerHashResizeInProgress.clear(std::memory_order_release);

           mainHash = newHash;

         } else {

           implicitProducerHashResizeInProgress.clear(std::memory_order_release);

         }

       }


       // If it's < three-quarters full, add to the old one anyway so that we

       // don't have to wait for the next table to finish being allocated by

       // another thread (and if we just finished allocating above, the condition

       // will always be true)

       if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) {

         bool recycled;

         auto producer = static_cast<ImplicitProducer *>(

             recycle_or_create_producer(false, recycled));

         if (producer == nullptr) {

           implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);

           return nullptr;

         }

         if (recycled) {

           implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);

         }


 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

         producer->threadExitListener.callback =

             &ConcurrentQueue::implicit_producer_thread_exited_callback;

         producer->threadExitListener.userData = producer;

         details::ThreadExitNotifier::subscribe(&producer->threadExitListener);

 #endif


         auto index = hashedId;

         while (true) {

           index &= mainHash->capacity - 1;

           auto probedKey =

               mainHash->entries[index].key.load(std::memory_order_relaxed);


           auto empty = details::invalid_thread_id;

 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

           auto reusable = details::invalid_thread_id2;

           if ((probedKey == empty &&

                mainHash->entries[index].key.compare_exchange_strong(

                    empty, id, std::memory_order_relaxed,

                    std::memory_order_relaxed)) ||

               (probedKey == reusable &&

                mainHash->entries[index].key.compare_exchange_strong(

                    reusable, id, std::memory_order_acquire,

                    std::memory_order_acquire))) {

 #else

           if ((probedKey == empty &&

                mainHash->entries[index].key.compare_exchange_strong(

                    empty, id, std::memory_order_relaxed,

                    std::memory_order_relaxed))) {

 #endif

             mainHash->entries[index].value = producer;

             break;

           }

           ++index;

         }

         return producer;

       }


       // Hmm, the old hash is quite full and somebody else is busy allocating a

       // new one. We need to wait for the allocating thread to finish (if it

       // succeeds, we add, if not, we try to allocate ourselves).

       mainHash = implicitProducerHash.load(std::memory_order_acquire);

     }

   }  // namespace moodycamel


 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED

   void implicit_producer_thread_exited(ImplicitProducer *producer) {

     // Remove from thread exit listeners

     details::ThreadExitNotifier::unsubscribe(&producer->threadExitListener);


     // Remove from hash

 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH

     debug::DebugLock lock(implicitProdMutex);

 #endif

     auto hash = implicitProducerHash.load(std::memory_order_acquire);

     assert(hash != nullptr);  // The thread exit listener is only registered if

                               // we were added to a hash in the first place

     auto id = details::thread_id();

     auto hashedId = details::hash_thread_id(id);

     details::thread_id_t probedKey;


     // We need to traverse all the hashes just in case other threads aren't on

     // the current one yet and are trying to add an entry thinking there's a

     // free slot (because they reused a producer)

     for (; hash != nullptr; hash = hash->prev) {

       auto index = hashedId;

       do {

         index &= hash->capacity - 1;

         probedKey = hash->entries[index].key.load(std::memory_order_relaxed);

         if (probedKey == id) {

           hash->entries[index].key.store(details::invalid_thread_id2,

                                          std::memory_order_release);

           break;

         }

         ++index;

       } while (

           probedKey !=

           details::invalid_thread_id);  // Can happen if the hash has changed

                                         // but we weren't put back in it yet, or

                                         // if we weren't added to this hash in

                                         // the first place

     }


     // Mark the queue as being recyclable

     producer->inactive.store(true, std::memory_order_release);

   }


   static void implicit_producer_thread_exited_callback(void *userData) {

     auto producer = static_cast<ImplicitProducer *>(userData);

     auto queue = producer->parent;

     queue->implicit_producer_thread_exited(producer);

   }

 #endif


   // Utility functions


   template <typename U>

   static inline U *create_array(size_t count) {

     assert(count > 0);

     auto p = static_cast<U *>((Traits::malloc)(sizeof(U) * count));

     if (p == nullptr) {

       return nullptr;

     }


     for (size_t i = 0; i != count; ++i) {

       new (p + i) U();

     }

     return p;

   }


   template <typename U>

   static inline void destroy_array(U *p, size_t count) {

     if (p != nullptr) {

       assert(count > 0);

       for (size_t i = count; i != 0;) {

         (p + --i)->~U();

       }

       (Traits::free)(p);

     }

   }


   template <typename U>

   static inline U *create() {

     auto p = (Traits::malloc)(sizeof(U));

     return p != nullptr ? new (p) U : nullptr;

   }


   template <typename U, typename A1>

   static inline U *create(A1 &&a1) {

     auto p = (Traits::malloc)(sizeof(U));

     return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;

   }


   template <typename U>

   static inline void destroy(U *p) {

     if (p != nullptr) {

       p->~U();

     }

     (Traits::free)(p);

   }


  private:

   std::atomic<ProducerBase *> producerListTail;

   std::atomic<std::uint32_t> producerCount;


   std::atomic<size_t> initialBlockPoolIndex;

   Block *initialBlockPool;

   size_t initialBlockPoolSize;


 #if !MCDBGQ_USEDEBUGFREELIST

   FreeList<Block> freeList;

 #else

   debug::DebugFreeList<Block> freeList;

 #endif


   std::atomic<ImplicitProducerHash *> implicitProducerHash;

   std::atomic<size_t>

       implicitProducerHashCount;  // Number of slots logically used

   ImplicitProducerHash initialImplicitProducerHash;

   std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE>

       initialImplicitProducerHashEntries;

   std::atomic_flag implicitProducerHashResizeInProgress;


   std::atomic<std::uint32_t> nextExplicitConsumerId;

   std::atomic<std::uint32_t> globalExplicitConsumerOffset;


 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH

   debug::DebugMutex implicitProdMutex;

 #endif


 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG

   std::atomic<ExplicitProducer *> explicitProducers;

   std::atomic<ImplicitProducer *> implicitProducers;

 #endif

 };


 template <typename T, typename Traits>

 ProducerToken::ProducerToken(ConcurrentQueue<T, Traits> &queue)

     : producer(queue.recycle_or_create_producer(true)) {

   if (producer != nullptr) {

     producer->token = this;

   }

 }


 template <typename T, typename Traits>

 ProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits> &queue)

     : producer(reinterpret_cast<ConcurrentQueue<T, Traits> *>(&queue)

                    ->recycle_or_create_producer(true)) {

   if (producer != nullptr) {

     producer->token = this;

   }

 }


 template <typename T, typename Traits>

 ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits> &queue)

     : itemsConsumedFromCurrent(0),

       currentProducer(nullptr),

       desiredProducer(nullptr) {

   initialOffset =

       queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);

   lastKnownGlobalOffset = -1;

 }


 template <typename T, typename Traits>

 ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits> &queue)

     : itemsConsumedFromCurrent(0),

       currentProducer(nullptr),

       desiredProducer(nullptr) {

   initialOffset =

       reinterpret_cast<ConcurrentQueue<T, Traits> *>(&queue)

           ->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);

   lastKnownGlobalOffset = -1;

 }


 template <typename T, typename Traits>

 inline void swap(ConcurrentQueue<T, Traits> &a,

                  ConcurrentQueue<T, Traits> &b) MOODYCAMEL_NOEXCEPT {

   a.swap(b);

 }


 inline void swap(ProducerToken &a, ProducerToken &b) MOODYCAMEL_NOEXCEPT {

   a.swap(b);

 }


 inline void swap(ConsumerToken &a, ConsumerToken &b) MOODYCAMEL_NOEXCEPT {

   a.swap(b);

 }


 template <typename T, typename Traits>

 inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &a,

                  typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &b)

     MOODYCAMEL_NOEXCEPT {

   a.swap(b);

 }


 }  // namespace moodycamel


 #if defined(__GNUC__)

 #pragma GCC diagnostic pop

 #endif

moodycamel::BlockingConcurrentQueue
Definition: blockingconcurrentqueue.h:373

moodycamel::ConcurrentQueue
Definition: concurrentqueue.h:802

moodycamel::ConcurrentQueue::try_enqueue
bool try_enqueue(producer_token_t const &token, T &&item)
Definition: concurrentqueue.h:1164

moodycamel::ConcurrentQueue::enqueue
bool enqueue(producer_token_t const &token, T const &item)
Definition: concurrentqueue.h:1098

moodycamel::ConcurrentQueue::enqueue
bool enqueue(producer_token_t const &token, T &&item)
Definition: concurrentqueue.h:1106

moodycamel::ConcurrentQueue::operator=
ConcurrentQueue & operator=(ConcurrentQueue const &) MOODYCAMEL_DELETE_FUNCTION

moodycamel::ConcurrentQueue::try_dequeue
bool try_dequeue(U &item)
Definition: concurrentqueue.h:1197

moodycamel::ConcurrentQueue::ConcurrentQueue
ConcurrentQueue(ConcurrentQueue const &) MOODYCAMEL_DELETE_FUNCTION

moodycamel::ConcurrentQueue::try_dequeue_bulk_from_producer
size_t try_dequeue_bulk_from_producer(producer_token_t const &producer, It itemFirst, size_t max)
Definition: concurrentqueue.h:1395

moodycamel::ConcurrentQueue::MAX_SUBQUEUE_SIZE
static const size_t MAX_SUBQUEUE_SIZE
Definition: concurrentqueue.h:828

moodycamel::ConcurrentQueue::ConcurrentQueue
ConcurrentQueue(size_t capacity=6 *BLOCK_SIZE)
Definition: concurrentqueue.h:886

moodycamel::ConcurrentQueue::try_enqueue_bulk
bool try_enqueue_bulk(producer_token_t const &token, It itemFirst, size_t count)
Definition: concurrentqueue.h:1187

moodycamel::ConcurrentQueue::enqueue
bool enqueue(T &&item)
Definition: concurrentqueue.h:1089

moodycamel::ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
Definition: concurrentqueue.h:817

moodycamel::ConcurrentQueue::enqueue_bulk
bool enqueue_bulk(It itemFirst, size_t count)
Definition: concurrentqueue.h:1118

moodycamel::ConcurrentQueue::try_dequeue_bulk
size_t try_dequeue_bulk(consumer_token_t &token, It itemFirst, size_t max)
Definition: concurrentqueue.h:1331

moodycamel::ConcurrentQueue::try_dequeue_from_producer
bool try_dequeue_from_producer(producer_token_t const &producer, U &item)
Definition: concurrentqueue.h:1383

moodycamel::ConcurrentQueue::enqueue_bulk
bool enqueue_bulk(producer_token_t const &token, It itemFirst, size_t count)
Definition: concurrentqueue.h:1130

moodycamel::ConcurrentQueue::try_dequeue
bool try_dequeue(consumer_token_t &token, U &item)
Definition: concurrentqueue.h:1257

moodycamel::ConcurrentQueue::ConcurrentQueueTests
friend class ConcurrentQueueTests
Definition: concurrentqueue.h:1436

moodycamel::ConcurrentQueue::producer_token_t
::moodycamel::ProducerToken producer_token_t
Definition: concurrentqueue.h:804

moodycamel::ConcurrentQueue::ExplicitProducer
friend struct ExplicitProducer
Definition: concurrentqueue.h:1431

moodycamel::ConcurrentQueue::swap
void swap(ConcurrentQueue &other) MOODYCAMEL_NOEXCEPT
Definition: concurrentqueue.h:1038

moodycamel::ConcurrentQueue::try_dequeue_bulk
size_t try_dequeue_bulk(It itemFirst, size_t max)
Definition: concurrentqueue.h:1313

moodycamel::ConcurrentQueue::ConsumerToken
friend struct ConsumerToken
Definition: concurrentqueue.h:1430

moodycamel::ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE
static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE
Definition: concurrentqueue.h:819

moodycamel::ConcurrentQueue::operator=
ConcurrentQueue & operator=(ConcurrentQueue &&other) MOODYCAMEL_NOEXCEPT
Definition: concurrentqueue.h:1028

moodycamel::ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE
static const size_t IMPLICIT_INITIAL_INDEX_SIZE
Definition: concurrentqueue.h:815

moodycamel::ConcurrentQueue::try_enqueue
bool try_enqueue(T const &item)
Definition: concurrentqueue.h:1139

moodycamel::ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD
static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD
Definition: concurrentqueue.h:811

moodycamel::ConcurrentQueue::ImplicitProducer
friend struct ImplicitProducer
Definition: concurrentqueue.h:1433

moodycamel::ConcurrentQueue::size_t
Traits::size_t size_t
Definition: concurrentqueue.h:808

moodycamel::ConcurrentQueue::consumer_token_t
::moodycamel::ConsumerToken consumer_token_t
Definition: concurrentqueue.h:805

moodycamel::ConcurrentQueue::enqueue
bool enqueue(T const &item)
Definition: concurrentqueue.h:1078

moodycamel::ConcurrentQueue::is_lock_free
static bool is_lock_free()
Definition: concurrentqueue.h:1418

moodycamel::ConcurrentQueue::ProducerToken
friend struct ProducerToken
Definition: concurrentqueue.h:1429

moodycamel::ConcurrentQueue::try_enqueue
bool try_enqueue(producer_token_t const &token, T const &item)
Definition: concurrentqueue.h:1157

moodycamel::ConcurrentQueue::ConcurrentQueue
ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
Definition: concurrentqueue.h:910

moodycamel::ConcurrentQueue::index_t
Traits::index_t index_t
Definition: concurrentqueue.h:807

moodycamel::ConcurrentQueue::try_dequeue_non_interleaved
bool try_dequeue_non_interleaved(U &item)
Definition: concurrentqueue.h:1242

moodycamel::ConcurrentQueue::~ConcurrentQueue
~ConcurrentQueue()
Definition: concurrentqueue.h:933

moodycamel::ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE
static const size_t EXPLICIT_INITIAL_INDEX_SIZE
Definition: concurrentqueue.h:813

moodycamel::ConcurrentQueue::BLOCK_SIZE
static const size_t BLOCK_SIZE
Definition: concurrentqueue.h:810

moodycamel::ConcurrentQueue::ConcurrentQueue
ConcurrentQueue(ConcurrentQueue &&other) MOODYCAMEL_NOEXCEPT
Definition: concurrentqueue.h:987

moodycamel::ConcurrentQueue::size_approx
size_t size_approx() const
Definition: concurrentqueue.h:1406

moodycamel::ConcurrentQueue::try_enqueue_bulk
bool try_enqueue_bulk(It itemFirst, size_t count)
Definition: concurrentqueue.h:1176

moodycamel::ConcurrentQueue::try_enqueue
bool try_enqueue(T &&item)
Definition: concurrentqueue.h:1149

MOODYCAMEL_NOEXCEPT
#define MOODYCAMEL_NOEXCEPT
Definition: concurrentqueue.h:252

MOODYCAMEL_NOEXCEPT_CTOR
#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)
Definition: concurrentqueue.h:253

MOODYCAMEL_THREADLOCAL
#define MOODYCAMEL_THREADLOCAL
Definition: concurrentqueue.h:174

MOODYCAMEL_DELETE_FUNCTION
#define MOODYCAMEL_DELETE_FUNCTION
Definition: concurrentqueue.h:289

MOODYCAMEL_CATCH
#define MOODYCAMEL_CATCH(...)
Definition: concurrentqueue.h:201

MOODYCAMEL_NOEXCEPT_ASSIGN
#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr)
Definition: concurrentqueue.h:254

MOODYCAMEL_RETHROW
#define MOODYCAMEL_RETHROW
Definition: concurrentqueue.h:202

MOODYCAMEL_TRY
#define MOODYCAMEL_TRY
Definition: concurrentqueue.h:200

moodycamel::details::invalid_thread_id2
static const thread_id_t invalid_thread_id2
Definition: concurrentqueue.h:180

moodycamel::details::max_align_t::y
long long y
Definition: concurrentqueue.h:338

moodycamel::details::circular_less_than
static bool circular_less_than(T a, T b)
Definition: concurrentqueue.h:501

moodycamel::details::likely
static bool() likely(bool x)
Definition: concurrentqueue.h:302

moodycamel::details::swap_relaxed
static void swap_relaxed(std::atomic< T > &left, std::atomic< T > &right)
Definition: concurrentqueue.h:546

moodycamel::details::std_max_align_t
std::max_align_t std_max_align_t
Definition: concurrentqueue.h:329

moodycamel::details::deref_noexcept
static auto deref_noexcept(It &it) MOODYCAMEL_NOEXCEPT -> decltype(*it)
Definition: concurrentqueue.h:575

moodycamel::details::invalid_thread_id
static const thread_id_t invalid_thread_id
Definition: concurrentqueue.h:179

moodycamel::details::align_for
static char * align_for(char *ptr)
Definition: concurrentqueue.h:519

moodycamel::details::hash_thread_id
static size_t hash_thread_id(thread_id_t id)
Definition: concurrentqueue.h:490

moodycamel::details::max_align_t::z
void * z
Definition: concurrentqueue.h:339

moodycamel::details::thread_id
static thread_id_t thread_id()
Definition: concurrentqueue.h:183

moodycamel::details::unlikely
static bool() unlikely(bool x)
Definition: concurrentqueue.h:303

moodycamel::details::nomove
static T const  & nomove(T const &x)
Definition: concurrentqueue.h:554

moodycamel::details::max_align_t::x
std_max_align_t x
Definition: concurrentqueue.h:337

moodycamel::details::thread_id_t
std::uintptr_t thread_id_t
Definition: concurrentqueue.h:178

moodycamel::details::ceil_to_pow_2
static T ceil_to_pow_2(T x)
Definition: concurrentqueue.h:527

moodycamel::details::max_align_t
Definition: concurrentqueue.h:336

moodycamel
Definition: atomicops.h:69

moodycamel::swap
void swap(BlockingConcurrentQueue< T, Traits > &a, BlockingConcurrentQueue< T, Traits > &b) MOODYCAMEL_NOEXCEPT
Definition: blockingconcurrentqueue.h:929

moodycamel::memory_order_acq_rel
@ memory_order_acq_rel
Definition: atomicops.h:75

moodycamel::memory_order_acquire
@ memory_order_acquire
Definition: atomicops.h:73

moodycamel::memory_order_relaxed
@ memory_order_relaxed
Definition: atomicops.h:72

moodycamel::memory_order_release
@ memory_order_release
Definition: atomicops.h:74

moodycamel::swap
void swap(ConsumerToken &a, ConsumerToken &b) MOODYCAMEL_NOEXCEPT
Definition: concurrentqueue.h:4180

moodycamel::ConcurrentQueueDefaultTraits
Definition: concurrentqueue.h:349

moodycamel::ConcurrentQueueDefaultTraits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
Definition: concurrentqueue.h:394

moodycamel::ConcurrentQueueDefaultTraits::free
static void free(void *ptr)
Definition: concurrentqueue.h:424

moodycamel::ConcurrentQueueDefaultTraits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE
static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE
Definition: concurrentqueue.h:399

moodycamel::ConcurrentQueueDefaultTraits::EXPLICIT_INITIAL_INDEX_SIZE
static const size_t EXPLICIT_INITIAL_INDEX_SIZE
Definition: concurrentqueue.h:382

moodycamel::ConcurrentQueueDefaultTraits::IMPLICIT_INITIAL_INDEX_SIZE
static const size_t IMPLICIT_INITIAL_INDEX_SIZE
Definition: concurrentqueue.h:387

moodycamel::ConcurrentQueueDefaultTraits::malloc
static void * malloc(size_t size)
Definition: concurrentqueue.h:422

moodycamel::ConcurrentQueueDefaultTraits::BLOCK_SIZE
static const size_t BLOCK_SIZE
Definition: concurrentqueue.h:370

moodycamel::ConcurrentQueueDefaultTraits::index_t
std::size_t index_t
Definition: concurrentqueue.h:363

moodycamel::ConcurrentQueueDefaultTraits::MAX_SUBQUEUE_SIZE
static const size_t MAX_SUBQUEUE_SIZE
Definition: concurrentqueue.h:406

moodycamel::ConcurrentQueueDefaultTraits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD
static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD
Definition: concurrentqueue.h:377

moodycamel::ConcurrentQueueDefaultTraits::size_t
std::size_t size_t
Definition: concurrentqueue.h:351

moodycamel::ConsumerToken
Definition: concurrentqueue.h:748

moodycamel::ConsumerToken::operator=
ConsumerToken & operator=(ConsumerToken const &) MOODYCAMEL_DELETE_FUNCTION

moodycamel::ConsumerToken::ConsumerToken
ConsumerToken(ConsumerToken const &) MOODYCAMEL_DELETE_FUNCTION

moodycamel::ConsumerToken::ConsumerToken
ConsumerToken(ConcurrentQueue< T, Traits > &q)
Definition: concurrentqueue.h:4150

moodycamel::ConsumerToken::operator=
ConsumerToken & operator=(ConsumerToken &&other) MOODYCAMEL_NOEXCEPT
Definition: concurrentqueue.h:762

moodycamel::ConsumerToken::ConcurrentQueueTests
friend class ConcurrentQueueTests
Definition: concurrentqueue.h:783

moodycamel::ConsumerToken::ConsumerToken
ConsumerToken(ConsumerToken &&other) MOODYCAMEL_NOEXCEPT
Definition: concurrentqueue.h:755

moodycamel::ConsumerToken::swap
void swap(ConsumerToken &other) MOODYCAMEL_NOEXCEPT
Definition: concurrentqueue.h:767

moodycamel::ProducerToken
Definition: concurrentqueue.h:687

moodycamel::ProducerToken::valid
bool valid() const
Definition: concurrentqueue.h:725

moodycamel::ProducerToken::~ProducerToken
~ProducerToken()
Definition: concurrentqueue.h:727

moodycamel::ProducerToken::ConcurrentQueueTests
friend class ConcurrentQueueTests
Definition: concurrentqueue.h:742

moodycamel::ProducerToken::ProducerToken
ProducerToken(ConcurrentQueue< T, Traits > &queue)
Definition: concurrentqueue.h:4133

moodycamel::ProducerToken::operator=
ProducerToken & operator=(ProducerToken &&other) MOODYCAMEL_NOEXCEPT
Definition: concurrentqueue.h:702

moodycamel::ProducerToken::producer
details::ConcurrentQueueProducerTypelessBase * producer
Definition: concurrentqueue.h:745

moodycamel::ProducerToken::ProducerToken
ProducerToken(ProducerToken const &) MOODYCAMEL_DELETE_FUNCTION

moodycamel::ProducerToken::swap
void swap(ProducerToken &other) MOODYCAMEL_NOEXCEPT
Definition: concurrentqueue.h:707

moodycamel::ProducerToken::ProducerToken
ProducerToken(ProducerToken &&other) MOODYCAMEL_NOEXCEPT
Definition: concurrentqueue.h:694

moodycamel::ProducerToken::operator=
ProducerToken & operator=(ProducerToken const &) MOODYCAMEL_DELETE_FUNCTION

moodycamel::details::ConcurrentQueueProducerTypelessBase
Definition: concurrentqueue.h:451

moodycamel::details::ConcurrentQueueProducerTypelessBase::inactive
std::atomic< bool > inactive
Definition: concurrentqueue.h:453

moodycamel::details::ConcurrentQueueProducerTypelessBase::next
ConcurrentQueueProducerTypelessBase * next
Definition: concurrentqueue.h:452

moodycamel::details::ConcurrentQueueProducerTypelessBase::token
ProducerToken * token
Definition: concurrentqueue.h:454

moodycamel::details::ConcurrentQueueProducerTypelessBase::ConcurrentQueueProducerTypelessBase
ConcurrentQueueProducerTypelessBase()
Definition: concurrentqueue.h:456

moodycamel::details::_hash_32_or_64< 1 >::hash
static std::uint64_t hash(std::uint64_t h)
Definition: concurrentqueue.h:478

moodycamel::details::_hash_32_or_64
Definition: concurrentqueue.h:461

moodycamel::details::_hash_32_or_64::hash
static std::uint32_t hash(std::uint32_t h)
Definition: concurrentqueue.h:462

moodycamel::details::const_numeric_max
Definition: concurrentqueue.h:315

moodycamel::details::const_numeric_max::value
static const T value
Definition: concurrentqueue.h:318

moodycamel::details::hash_32_or_64
Definition: concurrentqueue.h:488

moodycamel::details::is_trivially_destructible
Definition: concurrentqueue.h:582

moodycamel::details::nomove_if< false >::eval
static auto eval(U &&x) -> decltype(std::forward< U >(x))
Definition: concurrentqueue.h:569

moodycamel::details::nomove_if
Definition: concurrentqueue.h:559

moodycamel::details::nomove_if::eval
static T const  & eval(T const &x)
Definition: concurrentqueue.h:561

moodycamel::details::static_is_lock_free_num
Definition: concurrentqueue.h:651

moodycamel::details::static_is_lock_free_num::value
@ value
Definition: concurrentqueue.h:652

moodycamel::details::static_is_lock_free
Definition: concurrentqueue.h:676

moodycamel::details::thread_id_converter
Definition: concurrentqueue.h:80

moodycamel::details::thread_id_converter::thread_id_numeric_size_t
thread_id_t thread_id_numeric_size_t
Definition: concurrentqueue.h:81

moodycamel::details::thread_id_converter::thread_id_hash_t
thread_id_t thread_id_hash_t
Definition: concurrentqueue.h:82

moodycamel::details::thread_id_converter::prehash
static thread_id_hash_t prehash(thread_id_t const &x)
Definition: concurrentqueue.h:84