diff options
Diffstat (limited to 'lib/atomic.h')
-rw-r--r-- | lib/atomic.h | 2716 |
1 files changed, 2716 insertions, 0 deletions
diff --git a/lib/atomic.h b/lib/atomic.h new file mode 100644 index 000000000..afab4843f --- /dev/null +++ b/lib/atomic.h @@ -0,0 +1,2716 @@ +/* Atomic operations for Nimrod */ + +#if defined(_MSCVER) +__declspec(naked) int __fastcall Xadd (volatile int* pNum, int val) +{ + __asm + { + lock xadd dword ptr [ECX], EDX + mov EAX, EDX + ret + } +} + + + +#endif + + +#define ATOMIC_ASM(type,op) \ + __asm __volatile ("lock; " op : "=m" (*(type *)p) : "ir" (v), "0" (*(type *)p)) + +#define ATOMIC_ASM_NOLOCK(type,op) \ + __asm __volatile (op : "=m" (*(type *)p) : "ir" (v), "0" (*(type *)p)) + +static __inline void +atomic_add_int(void *p, u_int v) +{ + ATOMIC_ASM(int, "addl %1,%0"); +} + +static __inline void +atomic_add_int_nolock(void *p, u_int v) +{ + ATOMIC_ASM_NOLOCK(int, "addl %1,%0"); +} + + + +/* +Atomic.h + +Joshua Scholar +May 26, 2003 + +This header contains: + +a multiprocessor nonblocking FIFO, + +a multiprocessor nonblocking LIFO + +multiprocessor nonblocking reference counting (including volatile pointers +that can be safely shared between processors) + +nonblocking memory allocation routines + +template types that encapsulate variables meant to be shared between +processors - all changes to these variables are atomic and globally visible + +All kinds of atomic operations that are useful in a multiprocessor context. + +The philosophy behind this code is that I created templates that encapsulate +atomic access so that while the templates themselves may not be the easiest +code to read, code that uses these templates can be simple, abstract and +reliable. + +I also created regular C style functions, overloaded by type for some of +the more basic operations. If you have regular variables or memory +locations that you want to use in a globally visible way you have two +choices. + +If the operation you want is one of the basic building blocks you can +call one of the overloaded functions like InterlockedSetIfEqual(). + +Otherwise it's perfectly safe to cast a pointer to your data to be a pointer +to one of the atomic types so that you can use their methods. For instance: +if (((AtomicInt *)foo)->CompareSomeBitsAndExchangeSomeOtherBits(exchange, + bitsToExchange, + comperand, + bitsToCompare)) + ... +or even + +//atomically allocate n bytes out of the pool + data = ((*(AtomicPtr<char> *)curPool)+= n) - n; + + + +State of code: + +Unlike other libraries of similar routines that I wrote in the past, this +has not been thoroughly tested. In fact I don't remember how much of it has +been tested at this point. + +It would take an 8 way machine for me to really pound on the routines. + + + +Overview + +Some basic types are: +typedef Atomic<int> AtomicInt; +typedef Atomic<unsigned int> AtomicUInt; +typedef Atomic<__int64> AtomicInt64; +typedef Atomic<unsigned __int64> AtomicUInt64; + +Fancier types include +template <typename T> struct AtomicPtr; +This is a pointer that has the same semantics as the above integer types + +template <typename T>struct AtomicPtrWithCount; + +AtomicPtrWithCount<T> is a very important type. It has 32 bits of pointer +and 32 bits of counter. There are a number of algorithms that are possible +when a pointer and a counter can be changed atomically together including a +lock free pushdown stack and reference counted garbage collection where +multiple processors can share pointers as well as sharing the data that's +pointed at. + +template <typename T> struct AtomicPtrWithMark; + +This is similar to AtomicPtrWithCount<T> but the extra 32 bits are accessed +as individual bits instead of being accessed as a counter. It's not as +important. I was playing with algorithms before I realized that the +important ones I was afraid of using had been published independently of my +former employer. + + + +The atomic number types act as integer variables, but all changes to these +variable happen through interlocked atomic instructions. +All changes are therefor "globally visible" in Intel's parlance. + +Note that incrementing (or decrementing or adding to) one of these uses +InterlockedExchangeAdd, which for 64 bit numbers ends up relying on "lock +CMPXCHG8B" + +There's an Exchange method. + +There are also special methods that use compare exchange in some forms that +I've found useful: + +T CompareExchange(T exchange, T comperand) +bool SetIfEqual(T exchange, T comperand) + +and fancier ones I found some uses for + + inline bool SetIfSomeBitsAreEqual(T exchange, T comperand, T bitsToCompare) + inline bool SetSomeBitsIfThoseBitsAreEqual(T exchange, + T comperand, + T bitsToCompare) + inline bool SetSomeBitsIfSomeOtherBitsAreEqual(T exchange, + T bitsToExchange, + T comperand, + T bitsToCompare + ) + + inline T CompareSomeBitsAndExchange(T exchange, + T comperand, + T bitsToCompare) + inline T CompareSomeBitsAndExchangeThoseBits(T exchange, + T comperand, + T bitsToCompare) + inline T CompareSomeBitsAndExchangeSomeOtherBits(T exchange, + T bitsToExchange, + T comperand, + T bitsToCompare + ) + +There are also atomic bit test, bit test and set etc. methods: + + inline bool BTS(int bit) + inline bool BTC(int bit) + inline bool BTR(int bit) + inline bool BT(int bit) + +ALGORITHMS and their classes: + +The important ones are: + +struct Counted +Use this as the base type for any object you want to be reference counted + +template <typename T> class CountedPtr; +Safely acts as pointer to a reference counted type. This pointer can not be +shared between threads safely. + +template <typename T> class AtomicCountedPtr; +Like CountedPtr<T> but this pointer CAN be shared between threads/processors +safely. + +template <typename T> class MPQueue; +Multiprocessor queue. This is the nonblocking shared FIFO. + +Note, for the sake of convenience there is a Fifo that has the same +semantics but can only be used single threaded: +template <typename T> class Queue ; + +class MPCountStack; +This is the multiprocessor nonblocking LIFO stack. Note that what gets +pushed on the stack are pointers to MPStackElement. Your data must be +objects derived from MPStackElements. Note that it is not legal to push a +NULL onto the stack - NULL is used to signal an empty stack. + +Note that for the sake of convienience there is, once again, a single +threaded version of the stack: +class SimpleStack. + +There are also classes for allocators that use the MPCountStack as a +freelist. +template <typename T, typename BLOCK_ALLOCATOR> +struct MPCountFreeListAllocator; + +This template is recursive in the sense that each allocator gets new blocks +from a shared allocatorn passed in the constructor (of type +BLOCK_ALLOCATOR). You can build a tree of allocators this way. The root of +the tree should be of type SimpleAllocator<T> which just calls new and +delete. + +Once again, for the sake of simplicity, there is a single threaded version +of the block allocator called +template <typename T, typename BLOCK_ALLOCATOR> struct SimpleBlockAllocator + +*/ +#ifndef ATOMIC_H +#define ATOMIC_H +#include <windows.h> +#include <assert.h> +#include <new> +using namespace std; +/* + windows defines the following interlocked routines + we need to define the equivalent for volatiles (which + they should have been in the first place, and the following + types voltatile long, volatile int, volatile unsigned long, + volatile unsigned int, volatile T*, volatile __int64, + volatile unsigned __int64. + + Note: I use the platform SDK which has different header files + for interlocked instructions than the Windows includes in Visual + C + + If you have the platform SDK and the code doesn't compile then + you need to make sure that + "C:\Program Files\Microsoft Platform SDK\include" + is the first directory listed under menus "Tools" -> menu item + "Options" -> tab "Directories" (of course if you installed the + platform SDK in a different directory than + "C:\Program Files\Microsoft Platform SDK" then you should use + YOUR path. + + If you don't have the plaform SDK then InterlockedCompareExchange + is defined for void * instead of being defined for longs... and + there is no InterlockedCompareExchangePointer + + The whole point of Microsoft having different headers was an update + to support 64 bit platforms which doesn't matter here at all (some + of the code here relies on CMPXCHG8B swaping out both a pointer AND + a counter - a trick that won't work on the current 64 bit platforms). + + In any case, if you don't have the platform SDK then just + appropriate casts to make the code compile. Keep in mind that + casting from 64 bit types to 32 bit types is wrong - where there's + a 64 bit type I meant it to call one of my assembly language + routines that uses CMPXCHG8B. + + LONG + InterlockedIncrement( + LPLONG lpAddend + ); + + LONG + InterlockedDecrement( + LPLONG lpAddend + ); + + LONG + InterlockedExchange( + LPLONG Target, + LONG Value + ); + + LONG + InterlockedExchangeAdd( + LPLONG Addend, + LONG Value + ); + + LONG + InterlockedCompareExchange ( + PLONG Destination, + LONG ExChange, + LONG Comperand + ); + + PVOID + InterlockedExchangePointer ( + PVOID *Target, + PVOID Value + ); + + PVOID + InterlockedCompareExchangePointer ( + PVOID *Destination, + PVOID ExChange, + PVOID Comperand + ); +*/ + +//we'll need a special cases for volatile __int64 and +//volatile unsigned __int64 + +template <typename T> +inline T InterlockedIncrement(volatile T * ptr) +{ + return (T)InterlockedIncrement((LPLONG)ptr); +} + +template <typename T> +inline T InterlockedDecrement(volatile T * ptr) +{ + return (T)InterlockedDecrement((LPLONG)ptr); +} + +template <typename T> +inline T InterlockedExchange(volatile T * target,T value) +{ + return (T)InterlockedExchange((LPLONG)target,(LONG)value); +} + +template <typename T> +inline T InterlockedExchangeAdd(volatile T *addend,T value) +{ + return (T)InterlockedExchangeAdd((LPLONG)addend,(LONG)value); +} + +template <typename T> +T InterlockedCompareExchange (volatile T * dest,T exchange,T comperand) +{ + return (T)InterlockedCompareExchange ((LPLONG)dest, + (LONG)exchange, + (LONG)comperand); +} +//most common use of InterlockedCompareExchange +template <typename T> +bool InterlockedSetIfEqual (volatile T * dest,T exchange,T comperand) +{ + return comperand==InterlockedCompareExchange(dest,exchange,comperand); +} + +//disable the no return value warning, because the assembly language +//routines load the appropriate registers directly +#pragma warning(disable:4035) + +inline unsigned __int64 +InterlockedCompareExchange(volatile unsigned __int64 *dest + ,unsigned __int64 exchange + ,unsigned __int64 comperand) +{ + //value returned in eax::edx + __asm { + lea esi,comperand; + lea edi,exchange; + + mov eax,[esi]; + mov edx,4[esi]; + mov ebx,[edi]; + mov ecx,4[edi]; + mov esi,dest; + //lock CMPXCHG8B [esi] is equivalent to the following except + //that it's atomic: + //ZeroFlag = (edx:eax == *esi); + //if (ZeroFlag) *esi = ecx:ebx; + //else edx:eax = *esi; + lock CMPXCHG8B [esi]; + } +} + +//most common use of InterlockedCompareExchange +//It's more efficient to use the z flag than to do another compare +inline bool +InterlockedSetIfEqual(volatile unsigned __int64 *dest + ,unsigned __int64 exchange + ,unsigned __int64 comperand) +{ + //value returned in eax + __asm { + lea esi,comperand; + lea edi,exchange; + + mov eax,[esi]; + mov edx,4[esi]; + mov ebx,[edi]; + mov ecx,4[edi]; + mov esi,dest; + //lock CMPXCHG8B [esi] is equivalent to the following except + //that it's atomic: + //ZeroFlag = (edx:eax == *esi); + //if (ZeroFlag) *esi = ecx:ebx; + //else edx:eax = *esi; + lock CMPXCHG8B [esi]; + mov eax,0; + setz al; + } +} +#pragma warning(default:4035) + +inline unsigned __int64 InterlockedIncrement(volatile unsigned __int64 * ptr) +{ + unsigned __int64 comperand; + unsigned __int64 exchange; + do { + comperand = *ptr; + exchange = comperand+1; + }while(!InterlockedSetIfEqual(ptr,exchange,comperand)); + return exchange; +} + +inline unsigned __int64 InterlockedDecrement(volatile unsigned __int64 * ptr) +{ + unsigned __int64 comperand; + unsigned __int64 exchange; + do { + comperand = *ptr; + exchange = comperand-1; + }while(!InterlockedSetIfEqual(ptr,exchange,comperand)); + return exchange; +} + +inline unsigned __int64 InterlockedExchange(volatile unsigned __int64 * target, + unsigned __int64 value) +{ + unsigned __int64 comperand; + do { + comperand = *target; + }while(!InterlockedSetIfEqual(target,value,comperand)); + return comperand; +} + +inline unsigned __int64 InterlockedExchangeAdd(volatile unsigned __int64 *addend, + unsigned __int64 value) +{ + unsigned __int64 comperand; + do { + comperand = *addend; + }while(!InterlockedSetIfEqual(addend,comperand+value,comperand)); + return comperand; +} + +#pragma warning(disable:4035) +inline __int64 +InterlockedCompareExchange(volatile __int64 *dest + ,__int64 exchange + ,__int64 comperand) +{ + //value returned in eax::edx + __asm { + lea esi,comperand; + lea edi,exchange; + + mov eax,[esi]; + mov edx,4[esi]; + mov ebx,[edi]; + mov ecx,4[edi]; + mov esi,dest; + //lock CMPXCHG8B [esi] is equivalent to the following except + //that it's atomic: + //ZeroFlag = (edx:eax == *esi); + //if (ZeroFlag) *esi = ecx:ebx; + //else edx:eax = *esi; + lock CMPXCHG8B [esi]; + } +} + +//most common use of InterlockedCompareExchange +//It's more efficient to use the z flag than to do another compare +inline bool +InterlockedSetIfEqual(volatile __int64 *dest + ,__int64 exchange + ,__int64 comperand) +{ + //value returned in eax + __asm { + lea esi,comperand; + lea edi,exchange; + + mov eax,[esi]; + mov edx,4[esi]; + mov ebx,[edi]; + mov ecx,4[edi]; + mov esi,dest; + //lock CMPXCHG8B [esi] is equivalent to the following except + //that it's atomic: + //ZeroFlag = (edx:eax == *esi); + //if (ZeroFlag) *esi = ecx:ebx; + //else edx:eax = *esi; + lock CMPXCHG8B [esi]; + mov eax,0; + setz al; + } +} +#pragma warning(default:4035) + +inline __int64 InterlockedIncrement(volatile __int64 * dest) +{ + __int64 comperand; + __int64 exchange; + do { + comperand = *dest; + exchange = comperand+1; + }while(!InterlockedSetIfEqual(dest,exchange,comperand)); + return exchange; +} + +inline __int64 InterlockedDecrement(volatile __int64 * dest) +{ + __int64 comperand; + __int64 exchange; + do { + comperand = *dest; + exchange = comperand-1; + }while(!InterlockedSetIfEqual(dest,exchange,comperand)); + return exchange; +} + +inline __int64 InterlockedExchange(volatile __int64 * target,__int64 value) +{ + __int64 comperand; + do { + comperand = *target; + }while(!InterlockedSetIfEqual(target,value,comperand)); + return comperand; +} + +inline __int64 InterlockedExchangeAdd(volatile __int64 *addend, + __int64 value) +{ + __int64 comperand; + do { + comperand = *addend; + }while(!InterlockedSetIfEqual(addend,comperand+value,comperand)); + return comperand; +} + +#pragma warning(disable:4035) +//I've just thought of some algorithms that use BTS and all so I'm including them +inline bool InterlockedBTS(volatile int *dest, int bit) +{ + //value returned in eax + __asm{ + mov eax,bit; + mov ebx,dest; + lock bts [ebx],eax; + mov eax,0; + setc al; + } +} +inline bool InterlockedBTC(volatile int *dest, int bit) +{ + //value returned in eax + __asm{ + mov eax,bit; + mov ebx,dest; + lock btc [ebx],eax; + mov eax,0; + setc al; + } +} +inline bool InterlockedBTR(volatile int *dest, int bit) +{ + //value returned in eax + __asm{ + mov eax,bit; + mov ebx,dest; + lock btr [ebx],eax; + mov eax,0; + setc al; + } +} +//you can lock BT but since it doesn't change memory there isn't really any point +inline bool BT(volatile int *dest, int bit) +{ + //value returned in eax + __asm{ + mov eax,bit; + mov ebx,dest; + bt [ebx],eax; + mov eax,0; + setc al; + } +} +#pragma warning(default:4035) + +inline bool InterlockedBTS(volatile unsigned *dest, int bit) +{ + return InterlockedBTS((volatile int *)dest,bit); +} +inline bool InterlockedBTC(volatile unsigned *dest, int bit) +{ + return InterlockedBTC((volatile int *)dest,bit); +} +inline bool InterlockedBTR(volatile unsigned *dest, int bit) +{ + return InterlockedBTR((volatile int *)dest,bit); +} +inline bool BT(volatile unsigned *dest, int bit) +{ + return BT((volatile int *)dest,bit); +} + +inline bool InterlockedBTS(volatile unsigned long *dest, int bit) +{ + return InterlockedBTS((volatile int *)dest,bit); +} +inline bool InterlockedBTC(volatile unsigned long *dest, int bit) +{ + return InterlockedBTC((volatile int *)dest,bit); +} +inline bool InterlockedBTR(volatile unsigned long *dest, int bit) +{ + return InterlockedBTR((volatile int *)dest,bit); +} +inline bool BT(volatile unsigned long *dest, int bit) +{ + return BT((volatile int *)dest,bit); +} + +inline bool InterlockedBTS(volatile long *dest, int bit) +{ + return InterlockedBTS((volatile int *)dest,bit); +} +inline bool InterlockedBTC(volatile long *dest, int bit) +{ + return InterlockedBTC((volatile int *)dest,bit); +} +inline bool InterlockedBTR(volatile long *dest, int bit) +{ + return InterlockedBTR((volatile int *)dest,bit); +} +inline bool BT(volatile long *dest, int bit) +{ + return BT((volatile int *)dest,bit); +} + +inline bool InterlockedBTS(volatile __int64 *dest, int bit) +{ + if (bit<32) return InterlockedBTS((volatile int *)&dest,bit); + return InterlockedBTS(1+(volatile int *)&dest,bit-32); +} +inline bool InterlockedBTC(volatile __int64 *dest, int bit) +{ + if (bit<32) return InterlockedBTC((volatile int *)&dest,bit); + return InterlockedBTC(1+(volatile int *)&dest,bit-32); +} +inline bool InterlockedBTR(volatile __int64 *dest, int bit) +{ + if (bit<32) return InterlockedBTR((volatile int *)&dest,bit); + return InterlockedBTR(1+(volatile int *)&dest,bit-32); +} +inline bool BT(volatile __int64 *dest, int bit) +{ + if (bit<32) return BT((volatile int *)&dest,bit); + return BT(1+(volatile int *)&dest,bit-32); +} +inline bool InterlockedBTS(volatile unsigned __int64 *dest, int bit) +{ + if (bit<32) return InterlockedBTS((volatile int *)&dest,bit); + return InterlockedBTS(1+(volatile int *)&dest,bit-32); +} +inline bool InterlockedBTC(volatile unsigned __int64 *dest, int bit) +{ + if (bit<32) return InterlockedBTC((volatile int *)&dest,bit); + return InterlockedBTC(1+(volatile int *)&dest,bit-32); +} +inline bool InterlockedBTR(volatile unsigned __int64 *dest, int bit) +{ + if (bit<32) return InterlockedBTR((volatile int *)&dest,bit); + return InterlockedBTR(1+(volatile int *)&dest,bit-32); +} +inline bool BT(volatile unsigned __int64 *dest, int bit) +{ + if (bit<32) return BT((volatile int *)&dest,bit); + return BT(1+(volatile int *)&dest,bit-32); +} + +//T can be int, unsigned int, long, unsigned long +//__int64 or unsigned __int64 +template <typename T> +struct Atomic +{ + volatile T value; + + inline Atomic(){} + + explicit inline Atomic(T n) + { //so that it's globally visible (to use intel's terminology) + InterlockedExchange(&value,n); + } + + //if you need an atomic load then use (*this += 0) + //but I haven't found any algorithms that + //require an atomic load + inline operator T() const + { + return value; + } + + inline T CompareExchange(T exchange, T comperand) + { + return InterlockedCompareExchange(&value,exchange,comperand); + } + + //useful - simulates having a compareexchange that ignores some bits in + //the compare + inline T CompareSomeBitsAndExchange(T exchange, T comperand, T bitsToCompare) + { + T returned; + T bitsToIgnore = ~bitsToCompare; + for (;;){ + returned = InterlockedCompareExchange(&value,exchange,comperand); + if (returned == comperand) return returned; + if (0 != ((returned ^ comperand) & bitsToCompare)) break; + comparand = (comparand&bitsToCompare) | (bitsToIgnore&returned); + } + return returned; + } + + //useful - simulates having a compareexchange that ignores some bits in + //the compare and in the set + inline T CompareSomeBitsAndExchangeThoseBits(T exchange, T comperand, T bitsToCompare) + { + T returned = comperand; + T bitsToIgnore = ~bitsToCompare; + for (;;){ + exchange = (exchange&bitsToCompare) | (bitsToIgnore&returned); + returned = InterlockedCompareExchange(&value,exchange,comperand); + if (returned == comperand) return returned; + if (0 != ((returned ^ comperand) & bitsToCompare)) break; + comparand = (comparand&bitsToCompare) | (bitsToIgnore&returned); + } + return returned; + } + + //useful - simulates having a compareexchange that ignores some bits in + //the compare and others in the set + inline T CompareSomeBitsAndExchangeSomeOtherBits(T exchange, + T bitsToExchange, + T comperand, + T bitsToCompare + ) + { + T returned = value; + T bitsToIgnore = ~bitsToCompare; + T bitsToLeave = ~bitsToExchange; + for (;;){ + exchange = (exchange&bitsToExchange) | (bitsToLeave&returned); + returned = InterlockedCompareExchange(&value,exchange,comperand); + if (returned == comperand) return returned; + if (0 != ((returned ^ comperand) & bitsToCompare)) break; + comparand = (comparand&bitsToCompare) | (bitsToIgnore&returned); + } + return returned; + } + + inline T Exchange(T exchange) + { + return InterlockedExchange(&value,exchange); + } + + inline bool SetIfEqual(T exchange, T comperand) + { + return InterlockedSetIfEqual(&value,exchange,comperand); + } + + //useful - simulates having a compareexchange that ignores some bits in + //the compare + inline bool SetIfSomeBitsAreEqual(T exchange, T comperand, T bitsToCompare) + { + T returned; + T bitsToIgnore = ~bitsToCompare; + for (;;){ + returned = InterlockedCompareExchange(&value,exchange,comperand); + if (returned == comperand) return true; + if (0 != ((returned ^ comperand) & bitsToCompare)) break; + comparand = (comparand&bitsToCompare) | (bitsToIgnore&returned); + } + return false; + } + + //useful - simulates having a compareexchange that ignores some bits in + //the compare and in the set + inline bool SetSomeBitsIfThoseBitsAreEqual(T exchange, T comperand, T bitsToCompare) + { + T returned = comperand; + T bitsToIgnore = ~bitsToCompare; + for (;;){ + exchange = (exchange&bitsToCompare) | (bitsToIgnore&returned); + returned = InterlockedCompareExchange(&value,exchange,comperand); + if (returned == comperand) return true; + if (0 != ((returned ^ comperand) & bitsToCompare)) break; + comperand = (comperand&bitsToCompare) | (bitsToIgnore&returned); + } + return false; + } + //useful - simulates having a compareexchange that ignores some bits in + //the compare and others in the set + inline bool SetSomeBitsIfSomeOtherBitsAreEqual(T exchange, + T bitsToExchange, + T comperand, + T bitsToCompare + ) + { + T returned = value; + T bitsToIgnore = ~bitsToCompare; + T bitsToLeave = ~bitsToExchange; + for (;;){ + exchange = (exchange&bitsToExchange) | (bitsToLeave&returned); + returned = InterlockedCompareExchange(&value,exchange,comperand); + if (returned == comperand) return true; + if (0 != ((returned ^ comperand) & bitsToCompare)) break; + comperand = (comperand&bitsToCompare) | (bitsToIgnore&returned); + } + return false; + } + + inline T operator =(T exchange) + { + Exchange(exchange); + return exchange; + } + + inline T operator +=(T n) + { + return n + InterlockedExchangeAdd(&value, n); + } + + inline T operator -=(T n) + { + return InterlockedExchangeAdd(&value, -n) - n; + } + inline T operator ++() + { + return (*this += 1); + } + + inline T operator --() + { + return (*this -= 1); + } + + inline T operator ++(int) + { + return InterlockedExchangeAdd(&value, (T)1); + } + + inline T operator --(int) + { + return InterlockedExchangeAdd(&value, (T)-1); + } + + inline T operator &=(T n) + { + T comperand; + T exchange; + do { + comperand = value; + exchange = comperand & n; + }while(!SetIfEqual(exchange, comperand)); + return exchange; + } + + inline T operator |= (T n) + { + T comperand; + T exchange; + do { + comperand = value; + exchange = comperand | n; + }while(!SetIfEqual(exchange, comperand)); + return exchange; + } + + //yes this isn't standard, but it's useful + inline T Mask(T bitsToKeep, T bitsToSet) + { + T comperand; + T exchange; + do { + comperand = value ; + exchange = ((comperand & bitsToKeep) | bitsToSet); + }while(!SetIfEqual(exchange, comperand)); + return exchange; + } + + inline T operator ^= (T n) + { + T comperand; + T exchange; + do { + comperand = value; + exchange = comperand ^ n; + }while(!SetIfEqual(exchange, comperand)); + return exchange; + } + inline T operator *= (T n) + { + T comperand; + T exchange; + do { + comperand = value; + exchange = comperand * n; + }while(!SetIfEqual(exchange, comperand)); + return exchange; + } + + inline T operator /= (T n) + { + T comperand; + T exchange; + do { + comperand = value; + exchange = comperand / n; + }while(!SetIfEqual(exchange, comperand)); + return exchange; + } + + inline T operator >>= (unsigned n) + { + T comperand; + T exchange; + do { + comperand = value; + exchange = comperand >> n; + }while(!SetIfEqual(exchange, comperand)); + return exchange; + } + + inline T operator <<= (unsigned n) + { + T comperand; + T exchange; + do { + comperand = value; + exchange = comperand << n; + }while(!SetIfEqual(exchange, comperand)); + return exchange; + } + inline bool BTS(int bit) + { + return InterlockedBTS(&value,bit); + } + inline bool BTC(int bit) + { + return InterlockedBTC(&value,bit); + } + inline bool BTR(int bit) + { + return InterlockedBTR(&value,bit); + } + inline bool BT(int bit) + { + return BT(&value,bit); + } +}; + +template <typename T> +T * InterlockedCompareExchangePointer ( + T *volatile *dest, + T * exchange, + T * comperand + ) +{ + return (T*)InterlockedCompareExchangePointer((PVOID *)dest,(PVOID)exchange,(PVOID)comperand); +} + +template <typename T> +struct AtomicPtr +{ + T * volatile value; + + inline AtomicPtr(){} + + explicit inline AtomicPtr(T *n) + { //so that it's globally visible (to use intel's terminology) + InterlockedExchange(&value,n); + } + explicit inline AtomicPtr(const T *n) + { //so that it's globally visible (to use intel's terminology) + InterlockedExchange(&value,(T *)n); + } + + inline operator T *() const + { + return value; + } + + inline operator const T *() const + { + return value; + } + + inline T *CompareExchange(T *exchange, T *comperand) + { + return InterlockedCompareExchangePointer(&value,exchange,comperand); + } + + //useful - simulates having a compareexchange that ignores some bits in + //the compare + inline T * CompareSomeBitsAndExchange(T * exchange, T * comperand, int bitsToCompare) + { + T *returned; + int bitsToIgnore = ~bitsToCompare; + for (;;){ + returned = InterlockedCompareExchangePointer(&value,exchange,comperand); + if (returned == comperand) return returned; + if (0 != ((returned ^ comperand) & bitsToCompare)) break; + comparand = (comparand&bitsToCompare) | (bitsToIgnore&returned); + } + return returned; + } + + //useful - simulates having a compareexchange that ignores some bits in + //the compare and in the set + inline T * CompareSomeBitsAndExchangeThoseBits(T * exchange, T * comperand, int bitsToCompare) + { + T *returned = comperand; + int bitsToIgnore = ~bitsToCompare; + for (;;){ + exchange = (T*)((int)exchange&bitsToCompare) | (bitsToIgnore&(int)returned); + returned = InterlockedCompareExchangePointer(&value,exchange,comperand); + if (returned == comperand) return returned; + if (0 != ((returned ^ comperand) & bitsToCompare)) break; + comparand = (T*)((int)comparand&bitsToCompare) | (bitsToIgnore&(int)returned); + } + return returned; + } + //useful - simulates having a compareexchange that ignores some bits in + //the compare and others in the set + inline T* CompareSomeBitsAndExchangeSomeOtherBits(T *exchange, + int bitsToExchange, + T * comperand, + int bitsToCompare + ) + { + T * returned = value; + int bitsToIgnore = ~bitsToCompare; + int bitsToLeave = ~bitsToExchange; + for (;;){ + exchange = (T*)((int)exchange&bitsToExchange) | (bitsToLeave&(int)returned); + returned = InterlockedCompareExchange(&value,exchange,comperand); + if (returned == comperand) return returned; + if (0 != ((returned ^ comperand) & bitsToCompare)) break; + comparand = (T*)((int)comparand&bitsToCompare) | (bitsToIgnore&(int)returned); + } + return returned; + } + + inline T * operator ->() const + { + assert (value != NULL); + return value; + } + + inline T *Exchange(T *exchange) + { + return InterlockedExchange(&value,exchange); + } + + inline bool SetIfEqual(T *exchange, T *comperand) + { + return comperand==InterlockedCompareExchangePointer(&value,exchange,comperand); + } + + //useful - simulates having a compareexchange that ignores some bits in + //the compare + inline bool SetIfSomeBitsAreEqual(T * exchange, T * comperand, int bitsToCompare) + { + T *returned; + int bitsToIgnore = ~bitsToCompare; + for (;;){ + returned = InterlockedCompareExchangePointer(&value,exchange,comperand); + if (returned == comperand) return true; + if (0 != ((returned ^ comperand) & bitsToCompare)) break; + comparand = (comparand&bitsToCompare) | (bitsToIgnore&returned); + } + return false; + } + + //useful - simulates having a compareexchange that ignores some bits in + //the compare and in the set + inline bool SetSomeBitsIfThoseBitsAreEqual(T * exchange, T * comperand, int bitsToCompare) + { + T *returned = comperand; + int bitsToIgnore = ~bitsToCompare; + for (;;){ + exchange = (T*)((int)exchange&bitsToCompare) | (bitsToIgnore&(int)returned); + returned = InterlockedCompareExchangePointer(&value,exchange,comperand); + if (returned == comperand) return true; + if (0 != ((returned ^ comperand) & bitsToCompare)) break; + comparand = (T*)((int)comparand&bitsToCompare) | (bitsToIgnore&(int)returned); + } + return false; + } + + //useful - simulates having a compareexchange that ignores some bits in + //the compare and others in the set + inline bool SetSomeBitsIfSomeOtherBitsAreEqual(T *exchange, + int bitsToExchange, + T * comperand, + int bitsToCompare + ) + { + T * returned = value; + int bitsToIgnore = ~bitsToCompare; + int bitsToLeave = ~bitsToExchange; + for (;;){ + exchange = (T*)((int)exchange&bitsToExchange) | (bitsToLeave&(int)returned); + returned = InterlockedCompareExchange(&value,exchange,comperand); + if (returned == comperand) return true; + if (0 != ((returned ^ comperand) & bitsToCompare)) break; + comparand = (T*)((int)comparand&bitsToCompare) | (bitsToIgnore&(int)returned); + } + return false; + } + + inline T *operator =(T *exchange) + { + Exchange(exchange); + return exchange; + } + + template <typename INT_TYPE> + inline T *operator +=(INT_TYPE n) + { + return n + (T *)InterlockedExchangeAdd((LPLONG)&value, (LONG)(n * (LONG)sizeof(T))); + } + + template <typename INT_TYPE> + inline T *operator -=(INT_TYPE n) + { + return (T *)InterlockedExchangeAdd((LPLONG)&value, (LONG)(-n * (LONG)sizeof(T))) - n; + } + inline T *operator ++() + { + return (T *)InterlockedExchangeAdd((LPLONG)&value, (LONG)(sizeof(T))) + 1; + } + + inline T *operator --() + { + return (T *)InterlockedExchangeAdd((LPLONG)&value, -(LONG)(sizeof(T))) - 1; + } + + inline T *operator ++(int) + { + return (T*)InterlockedExchangeAdd((LPLONG)&value, (LONG)sizeof(T)); + } + + inline T *operator --(int) + { + return (T*)InterlockedExchangeAdd((LPLONG)&value, -(LONG)sizeof(T)); + } + + //yes this isn't standard, but it's useful + template <typename INT_TYPE> + inline T *operator &=(INT_TYPE n) + { + T * comperand; + T * exchange; + do { + comperand = value; + exchange = (T *)((LONG)comperand & n); + }while(!SetIfEqual(exchange, comperand)); + return exchange; + } + + //yes this isn't standard, but it's useful + template <typename INT_TYPE> + inline T *operator |= (INT_TYPE n) + { + T * comperand; + T * exchange; + do { + comperand = value; + exchange = (T *)((LONG)comperand | n); + }while(!SetIfEqual(exchange, comperand)); + return exchange; + } + + //yes this isn't standard, but it's useful + template <typename INT_TYPE> + inline T operator ^= (INT_TYPE n) + { + T * comperand; + T * exchange; + do { + comperand = value; + exchange = (T *)((LONG)comperand ^ n); + }while(!SetIfEqual(exchange, comperand)); + return exchange; + } + + //yes this isn't standard, but it's useful + template <typename INT_TYPE_A, typename INT_TYPE_B> + inline T * Mask(INT_TYPE_A bitsToKeep, INT_TYPE_B bitsToSet) + { + T * comperand; + T * exchange; + do { + comperand = value ; + exchange = (T *)(((LONG)comperand & bitsToKeep) | bitsToSet); + }while(!SetIfEqual(exchange, comperand)); + return exchange; + } + + + inline bool BTS(int bit) + { + return InterlockedBTS((volatile int *)&value,bit); + } + inline bool BTC(int bit) + { + return InterlockedBTC((volatile int *)&value,bit); + } + inline bool BTR(int bit) + { + return InterlockedBTR((volatile int *)&value,bit); + } + inline bool BT(int bit) + { + return BT((volatile int *)&value,bit); + } +}; + +typedef Atomic<int> AtomicInt; +typedef Atomic<unsigned int> AtomicUInt; +typedef Atomic<__int64> AtomicInt64; +typedef Atomic<unsigned __int64> AtomicUInt64; + +template +<typename T, typename A = AtomicUInt64> +struct AtomicUnion +{ + A whole; + + AtomicUnion() + { + assert(sizeof(T)<=sizeof(A)); + new((void *)&whole) T();//in place new + + } + + ~AtomicUnion() + { + ((T *)&whole)->~T(); + } + + T &Value() + { + return *(T *)&whole; + } + const T &Value() const + { + return *(const T *)&whole; + } +}; + + +template <typename T> +struct AtomicPtrWithCountStruct +{ + AtomicPtr<T> ptr; + AtomicInt count; +}; + + +template <typename T> +struct PtrWithCountStruct +{ + T* ptr; + int count; +}; + +template <typename T, int MARKBITS=3> +struct BitMarkedAtomicPtr : public AtomicPtr<T> +{ + const int MarkMask() const + { + return (1<<MARKBITS)-1; + } + + const int DataMask() const + { + return ~ThreadBitMask(); + } + + static T * MaskPtr(T *data) + { + return (T*)((int)data & DataMask()); + } + + static int MaskMark(T * data) + { + return ((int)data & MarkMask()); + } + + static T* Mark(T * data, int bit) + { + return (T*)((int)data | 1<<bit); + } + + static T* Unmark(T * data, int bit) + { + return (T*)((int)data & ~(1<<bit)); + } + + T * MaskPtr() + { + return MaskPtr(value); + } + + int MaskMark() + { + return MaskMark(value); + } + + //note new value has the marks in exchange not the original marks + inline bool SetIfMarked(T *exchange, int bit) + { + assert(bit<MARKBITS); + return SetIfSomeBitsAreEqual(exchange,DataMask(),(T*)(1<<bit),1<<bit); + } + + //note new value has the marks in exchange not the original marks + T * ExchangeIfMarked(T *exchange, int bit) + { + assert(bit<MARKBITS); + return CompareSomeBitsAndExchange(exchange,DataMask(),(T*)(1<<bit),1<<bit); + } + + inline bool SetAndClearMarksIfMarked(T *exchange, int bit) + { + return SetIfMarked(MaskPtr(exchange),bit); + } + + inline T * ExchangeAndClearMarksIfMarked(T *exchange, int bit) + { + return ExchangeIfMarked(MaskPtr(exchange),bit); + } + + inline bool SetAndClearOtherMarksIfMarked(T *exchange, int bit) + { + return SetIfMarked(Mark(MaskPtr(exchange),bit), + bit); + } + + //note new value has the marks in exchange not the original marks + T * ExchangeAndClearOtherMarksIfMarked(T *exchange, int bit) + { + return ExchangeIfMarked(Mark(MaskPtr(exchange),bit), + bit); + } + + inline bool SetAndMarkIfMarked(T *exchange, int bit) + { + return SetSomeBitsIfSomeOtherBitsAreEqual( + Mark(MaskPtr(exchange),bit), + DataMask() | 1<<bit, + (T*)(1<<bit), + 1<<bit); + } + + inline T * ExchangeAndMarkIfMarked(T *exchange, int bit) + { + return CompareSomeBitsAndExchangeSomeOtherBits( + Mark(MaskPtr(exchange),bit), + DataMask() | 1<<bit, + (T*)(1<<bit), + 1<<bit); + } + + bool Mark(int bit) + { + assert(bit<MARKBITS); + return BTS(bit); + } + + bool Unmark(int bit) + { + assert(bit<MARKBITS); + return BTC(bit); + } + + bool InvertMark(int bit) + { + assert(bit<MARKBITS); + return BTR(bit); + } + + bool IsMarked(int bit) + { + assert(bit<MARKBITS); + return BT(bit); + } +}; + +template <typename T> +struct AtomicPtrWithCount : public AtomicUnion< AtomicPtrWithCountStruct<T> > +{ + typedef AtomicUnion< PtrWithCountStruct<T>, unsigned __int64 > SimpleUnionType; + AtomicPtrWithCount() + { + whole = 0; + } + AtomicPtrWithCount(unsigned __int64 o) + { + whole = o.whole; + } + AtomicPtrWithCount(T *ptr) + { + SimpleUnionType o; + o.Value().ptr = ptr; + o.Value().count = 0; + whole = o.whole; + } + + operator unsigned __int64() const + { + return whole; + } + T *Ptr() const + { + return Value().ptr; + } + int Count() const + { + return Value().count; + } + unsigned __int64 Whole() const + { + return whole; + } + + AtomicPtr<T> & Ptr() + { + return Value().ptr; + } + AtomicInt & Count() + { + return Value().count; + } + AtomicUInt64 & Whole() + { + return whole; + } + unsigned __int64 SetPtrAndIncCount(T *ptr) + { + SimpleUnionType was; + SimpleUnionType to; + to.Value().ptr = ptr; + do { + was.whole = whole; + to.Value().count = was.Value().count+1; + }while(!whole.SetIfEqual(to.whole,was.whole)); + return to.whole; + } + bool SetIfPtrEqualAndIncCount(T *exchange, T *comperand) + { + SimpleUnionType was; + SimpleUnionType to; + to.ptr = exchange; + do { + was.whole = whole; + if (was.Value().ptr != comperand) return false; + to.Value().count = was.Value().count+1; + }while(!whole.SetIfEqual(to.whole,was.whole)); + return true; + } + unsigned __int64 ExchangeIfPtrEqualAndIncCount(T *exchange, T *comperand) + { + SimpleUnionType was; + SimpleUnionType to; + to.ptr = exchange; + do { + was.whole = whole; + if (was.Value().ptr != comperand) return was.whole; + to.Value().count = was.Value().count+1; + }while(!whole.SetIfEqual(to.whole,was.whole)); + return was.whole; + } + inline T *operator =(T *exchange) + { + SetPtrAndIncCount(exchange); + return exchange; + } + + template <typename INT_TYPE> + inline T *operator +=(INT_TYPE n) + { + T *was; + T *to; + do { + was = Ptr(); + to = was + n; + }while (!SetIfPtrEqualAndIncCount(to,was)); + return to; + } + + template <typename INT_TYPE> + inline T *operator -=(INT_TYPE n) + { + T *was; + T *to; + do { + was = Ptr(); + to = was - n; + }while (!SetIfPtrEqualAndIncCount(to,was)); + return to; + } + inline T *operator ++() + { + return (*this += 1); + } + + inline T *operator --() + { + return (*this -= 1); + } + + inline T *operator ++(int) + { + return (++ *this) - 1; + } + + inline T *operator --(int) + { + return (-- *this) + 1; + } +}; + +template <typename T> +struct AtomicPtrWithMarkStruct +{ + AtomicPtr<T> ptr; + AtomicUInt mark; +}; + + +template <typename T> +struct PtrWithMarkStruct +{ + T* ptr; + unsigned int mark; +}; + +template <typename T> +struct AtomicPtrWithMark : public AtomicUnion< AtomicPtrWithMarkStruct<T> > +{ + typedef AtomicUnion< PtrWithMarkStruct<T>, unsigned __int64 > SimpleUnionType; + AtomicPtrWithMark() + { + whole = 0; + } + AtomicPtrWithMark(unsigned __int64 o) + { + whole = o.whole; + } + AtomicPtrWithMark(T *ptr) + { + SimpleUnionType o; + o.Value().ptr = ptr; + o.Value().mark = 0; + whole = o.whole; + } + + operator unsigned __int64() const + { + return whole; + } + T *Ptr() const + { + return Value().ptr; + } + int Mark() const + { + return Value().mark; + } + unsigned __int64 Whole() const + { + return whole; + } + + AtomicPtr<T> & Ptr() + { + return Value().ptr; + } + AtomicInt & Mark() + { + return Value().mark; + } + AtomicUInt64 & Whole() + { + return whole; + } + + inline bool SetAndClearOtherMarksIfMarked(T *exchange, int bit) + { + assert(bit<32); + SimpleUnionType compareMask; + compareMask.Value().ptr = NULL; + compareMask.Value().mark = 1u<<bit; + + SimpleUnionType exchangeValue; + exchangeValue.Value().ptr = exchange; + exchangeValue.Value().mark = compareMask.Value().mark; + + return whole.SetSomeBitsIfSomeOtherBitsAreEqual(exchangeValue.whole, + (unsigned __int64)-1i64, + compareMask.whole, + compareMask.whole); + } + inline bool SetAndClearMarksIfPtrEqual(T *exchange, T*comparend) + { + SimpleUnionType compareMask; + compareMask.Value().ptr = (T*)-1; + compareMask.Value().mark = 0; + + SimpleUnionType compareValue; + compareValue.Value().ptr = comparend; + compareValue.Value().mark = 0; + + SimpleUnionType exchangeValue; + exchangeValue.Value().ptr = exchange; + exchangeValue.Value().mark = 0; + + return whole.SetSomeBitsIfThoseBitsAreEqual(exchangeValue.whole, + compareValue.whole, + compareMask.whole); + } + + inline bool SetAndClearMarksIfMarked(T *exchange, int bit) + { + assert(bit<32); + SimpleUnionType compareMask; + compareMask.Value().ptr = NULL; + compareMask.Value().mark = 1u<<bit; + + SimpleUnionType exchangeValue; + exchangeValue.Value().ptr = exchange; + exchangeValue.Value().mark = 0; + + return whole.SetSomeBitsIfSomeOtherBitsAreEqual(exchangeValue.whole, + (unsigned __int64)-1i64, + compareMask.whole, + compareMask.whole); + } + //note new value has the marks in exchange not the original marks + unsigned __int64 ExchangeAndClearOtherMarksIfMarked(T *exchange, int bit) + { + assert(bit<32); + SimpleUnionType compareMask; + compareMask.Value().ptr = NULL; + compareMask.Value().mark = 1u<<bit; + + SimpleUnionType exchangeValue; + exchangeValue.Value().ptr = exchange; + exchangeValue.Value().mark = compareMask.Value().mark; + + return whole.CompareSomeBitsAndExchangeSomeOtherBits(exchangeValue.whole, + exchangeMask.whole, + compareMask.whole, + compareMask.whole); + } + + unsigned __int64 ExchangeAndClearMarksIfMarked(T *exchange, int bit) + { + assert(bit<32); + SimpleUnionType compareMask; + compareMask.Value().ptr = NULL; + compareMask.Value().mark = 1u<<bit; + + SimpleUnionType exchangeValue; + exchangeValue.Value().ptr = exchange; + exchangeValue.Value().mark = 0; + + return whole.CompareSomeBitsAndExchangeSomeOtherBits(exchangeValue.whole, + exchangeMask.whole, + compareMask.whole, + compareMask.whole); + } + + + bool Mark(int bit) + { + assert(bit<32); + return whole.BTS(bit); + } + + bool Unmark(int bit) + { + assert(bit<32); + return whole.BTC(bit); + } + + bool InvertMark(int bit) + { + assert(bit<32); + return whole.BTR(bit); + } + + bool IsMarked(int bit) + { + assert(bit<32); + return whole.BT(bit); + } +}; + +struct MPStackElement +{ + MPStackElement * next; +}; + +class MPMarkStack +{ +protected: + AtomicPtrWithMark<MPStackElement> tos; + AtomicUInt availableThreadBits; + AtomicInt lastReserved; + +public: + + MPStackElement* ExchangeStack(MPStackElement *newStack = NULL) + { + AtomicPtrWithMark<MPStackElement>::SimpleUnionType newWhole, ret; + newWhole.Value().mark = 0; + newWhole.Value().ptr = newStack; + ret.whole = tos.whole.Exchange(newWhole.whole); + return ret.Value().ptr; + } + + MPMarkStack() + { + lastReserved = 0; + tos.whole = 0; + availableThreadBits = (unsigned)-1; + } + + //expensive - call once per thread to reserve a bit + int ReserveThreadBit() + { + int offset = lastReserved; + //since locked operations are slow we only do + //bit tests on bits that look acceptable because + //of a nonlocked read. + unsigned readOnce = availableThreadBits; + for (int i=0; i< 32;++i){ + const int bit = (31&(i+offset)); + const unsigned mask = 1u<<bit; + if ((readOnce & mask)!=0){ + if (availableThreadBits.BTR(bit)) { + lastReserved = ((bit+1)&31);//try the next one + return bit+1; + } + readOnce = availableThreadBits; + } + } + return 0; + } + int BlockingReserveThreadBit() + { + int i; + while (0==(i=ReserveThreadBit())); + return i; + } + int ReturnThreadBit(int i) + { + availableThreadBits.BTS(i-1); + } + + void PushElement(MPStackElement *element) + { + MPStackElement * next; + do { + next = tos.Value().ptr; + element->next = tos.Value().ptr; + }while(!tos.SetAndClearMarksIfPtrEqual(element,next)); + } + + void PushList(MPStackElement *top, MPStackElement *bottom) + { + MPStackElement * next; + do { + next = tos.Value().ptr; + bottom->next = tos.Value().ptr; + }while(!tos.SetAndClearMarksIfPtrEqual(top,next)); + } + + MPStackElement* PopElement(int i) + { + for(;;) { + tos.Value().mark.BTS(i-1); + MPStackElement * was = tos.Value().ptr; + if (was == NULL) return NULL; + if (tos.SetAndClearMarksIfMarked(was->next,i-1)) return was; + } + } + MPStackElement* PopElement() + { + int id = BlockingReserveThreadBit(); + MPStackElement * ret = PopElement(id); + ReturnThreadBit(id); + return ret; + } +}; +class MPCountStack +{ +protected: + AtomicPtrWithCount<MPStackElement> tos; + +public: + + MPStackElement* ExchangeStack(MPStackElement *newStack = NULL) + { + return tos.Value().ptr.Exchange(newStack); + } + + MPCountStack() + { + tos = 0; + } + + + void PushElement(MPStackElement *element) + { + MPStackElement * next; + do { + next = tos.Value().ptr; + element->next = tos.Value().ptr; + }while(!tos.Value().ptr.SetIfEqual(element,next)); + } + + void PushList(MPStackElement *top, MPStackElement *bottom) + { + MPStackElement * next; + do { + next = tos.Value().ptr; + bottom->next = tos.Value().ptr; + }while(!tos.Value().ptr.SetIfEqual(top,next)); + } + + MPStackElement* PopElement() + { + AtomicPtrWithCount<MPStackElement>::SimpleUnionType was; + AtomicPtrWithCount<MPStackElement>::SimpleUnionType to; + do { + was.whole = tos.whole; + if (was.Value().ptr == NULL) return NULL; + to.Value().count = was.Value().count + 1; + to.Value().ptr = was.Value().ptr->next; + }while(!tos.whole.SetIfEqual(was.whole,to.whole)); + return was.Value().ptr; + } +}; + +class SimpleStack +{ +protected: + MPStackElement * tos; + +public: + + MPStackElement* ExchangeStack(MPStackElement *newStack = NULL) + { + MPStackElement* was = tos; + tos = newStack; + return was; + } + + SimpleStack() + { + tos = 0; + } + + + void PushElement(MPStackElement *element) + { + element->next = tos; + tos = element; + } + + void PushList(MPStackElement *top, MPStackElement *bottom) + { + bottom->next = tos; + tos = top; + } + + MPStackElement* PopElement() + { + MPStackElement* was = tos; + if (was) tos = was->next; + return was; + } +}; + +template <typename T> +struct MPMemBlock : public MPStackElement +{ + T data; + char bottomOfBlock; +}; + +#define OFFSET_OF_MEMBER(type,member) ((int)&(((type *)0)->member)) + +//put one of these in each thread object +//and you have thread local allocation +template <typename T, typename BLOCK_ALLOCATOR> +struct SimpleBlockAllocator +{ + SimpleStack data; + + typedef SimpleBlockAllocator<T,BLOCK_ALLOCATOR> AllocatorType; + + BLOCK_ALLOCATOR & MyAllocator; + + SimpleBlockAllocator(BLOCK_ALLOCATOR &source) + :MyAllocator(source) + {} + + int ReserveThreadBit(){return 1;} + int BlockingReserveThreadBit(){return 1;} + void ReturnThreadBit(int){ } + + int Size() const { return sizeof(T); } + void AddBlock() + { + int blockSize = MyAllocator.Size(); + int elementSize = sizeof(MPMemBlock<T>); + assert( MyAllocator.Size() >= elementSize); + MPMemBlock<T> *bottom = (MPMemBlock<T> *)MyAllocator.Allocate(); + void *prev = 0; + MPMemBlock<T> *top = bottom; + do { + top->bottomOfBlock = 0; + top->next = prev; + prev = (void *)top; + ++top; + }while( (blockSize-=elementSize) >= elementSize); + bottom->bottomOfBlock = 1; + data.PushList(top-1,bottom); + } + void *Allocate(int) + { + return Allocate(); + } + void *Allocate() + { + MPMemBlock<T> *ret; + for(;;) { + ret = (MPMemBlock<T> *)data.PopElement(); + if (ret!=NULL) return &ret->data; + AddBlock(); + } + } + void Deallocate(void *ob) + { + if (!ob) return; + MPMemBlock<T> *ret = (MPMemBlock<T> *) + ((char *)data - OFFSET_OF_MEMBER(MPMemBlock<T>,ob)); + data.PushElement(ret); + } + //all memory allocated must be Deallocated before Clear() or the destructor is called + void Clear() + { + MPMemBlock<T> *stackWas = (MPMemBlock<T> *)data.ExchangeStack(NULL); + MPMemBlock<T> *blocks = NULL; + while(stackWas){ + if (stackWas->bottomOfBlock){ + if (blocks) { + blocks->next = stackWas; + } + blocks = stackWas; + } + stackWas = (MPMemBlock<T> *)(stackWas->next); + } + while(blocks) + { + stackWas = (MPMemBlock<T> *)blocks->next; + MyAllocator.Deallocate(blocks); + blocks = stackWas; + } + } + //all memory allocated must be Deallocated before Clear() or the destructor is called + ~SimpleBlockAllocator() + { + Clear(); + } +}; + +template<class T> +struct SimpleAllocator +{ + void *Allocate() { return (void *)new char[sizeof(T)]; } + void *Allocate(int) { return Allocate(); } + + int ReserveThreadBit(){return 1;} + int BlockingReserveThreadBit(){return 1;} + void ReturnThreadBit(int){} + + void Deallocate(void *data) { delete[] (char[])data; } + int Size() const { return sizeof(T); } + typedef SimpleAllocator AllocatorType; +}; + +enum DontReserve{ DONT_RESERVE }; + +template<class T> +struct ThreadMarkedAllocator +{ + T& allocator; + int bit; + + ThreadMarkedAllocator(T &sourceAllocator) + :allocator(sourceAllocator) + ,bit(sourceAllocator.BlockingReserveThreadBit()) + {} + + ThreadMarkedAllocator(T &sourceAllocator,DontReserve) + :allocator(sourceAllocator) + ,bit(0) + {} + + ~ThreadMarkedAllocator() + { + allocator.ReturnThreadBit(bit); + } + + bool ReserveThreadBit() + { + if (bit==0) bit=sourceAllocator.ReserveThreadBit(); + return bit!=0; + } + void BlockingReserveThreadBit() + { + if (bit==0) bit=sourceAllocator.BlockingReserveThreadBit(); + } + void ReturnThreadBit() + { + int was = bit; + bit = 0; + sourceAllocator.ReturnThreadBit(was); + } + + void *Allocate() + { + assert(bit!=0); + return allocator.Allocate(bit); + } + + void Deallocate(void *data) { allocator.Deallocate(data); } + int Size() const { return Allocator.Size(); } + typedef ThreadMarkedAllocator<T> AllocatorType; +}; + +template <typename T, typename BLOCK_ALLOCATOR> +struct MPMarkFreeListAllocator +{ + MPMarkStack data; + + typedef MPMarkFreeListAllocator<T,BLOCK_ALLOCATOR> AllocatorType; + + BLOCK_ALLOCATOR &MyAllocator; + + MPMarkFreeListAllocator(BLOCK_ALLOCATOR &source) + :MyAllocator(source) + {} + + int ReserveThreadBit(){return data.ReserveThreadBit();} + int BlockingReserveThreadBit(){return data.BlockingReserveThreadBit();} + void ReturnThreadBit(int bit){ data.ReturnThreadBit(bit); } + + int Size() const { return sizeof(T); } + void AddBlock() + { + int blockSize = MyAllocator.Size(); + int elementSize = sizeof(MPMemBlock<T>); + assert( MyAllocator.Size() >= elementSize); + MPMemBlock<T> *bottom = (MPMemBlock<T> *)MyAllocator.Allocate(); + void *prev = 0; + MPMemBlock<T> *top = bottom; + do { + top->bottomOfBlock = 0; + top->next = prev; + prev = (void *)top; + ++top; + }while( (blockSize-=elementSize) >= elementSize); + bottom->bottomOfBlock = 1; + data.PushList(top-1,bottom); + } + void *Allocate(int mark) + { + MPMemBlock<T> *ret; + for(;;) { + ret = (MPMemBlock<T> *)data.PopElement(mark); + if (ret!=NULL) return ret; + AddBlock(); + } + } + void *Allocate() + { + MPMemBlock<T> *ret; + for(;;) { + ret = (MPMemBlock<T> *)data.PopElement(); + if (ret!=NULL) return &ret->data; + AddBlock(); + } + } + void Deallocate(void *ob) + { + if (!ob) return; + MPMemBlock<T> *ret = (MPMemBlock<T> *) + ((char *)data - OFFSET_OF_MEMBER(MPMemBlock<T>,ob)); + data.PushElement(ret); + } + //all memory allocated must be Deallocated before Clear() or the destructor is called + void Clear() + { + MPMemBlock<T> *stackWas = (MPMemBlock<T> *)data.ExchangeStack(NULL); + MPMemBlock<T> *blocks = NULL; + while(stackWas){ + if (stackWas->bottomOfBlock){ + if (blocks) { + blocks->next = stackWas; + } + blocks = stackWas; + } + stackWas = (MPMemBlock<T> *)(stackWas->next); + } + while(blocks) + { + stackWas = (MPMemBlock<T> *)blocks->next; + MyAllocator.Deallocate(blocks); + blocks = stackWas; + } + } + //all memory allocated must be Deallocated before Clear() or the destructor is called + ~MPMarkFreeListAllocator() + { + Clear(); + } +}; +template <typename T, typename BLOCK_ALLOCATOR> +struct MPCountFreeListAllocator +{ + MPCountStack data; + + typedef MPCountFreeListAllocator<T,BLOCK_ALLOCATOR> AllocatorType; + + BLOCK_ALLOCATOR & MyAllocator; + + MPCountFreeListAllocator(BLOCK_ALLOCATOR &source) + :MyAllocator(source) + {} + + int ReserveThreadBit(){return 1;} + int BlockingReserveThreadBit(){return 1;} + void ReturnThreadBit(int){ } + + int Size() const { return sizeof(T); } + void AddBlock() + { + int blockSize = MyAllocator.Size(); + int elementSize = sizeof(MPMemBlock<T>); + assert( MyAllocator.Size() >= elementSize); + MPMemBlock<T> *bottom = (MPMemBlock<T> *)MyAllocator.Allocate(); + void *prev = 0; + MPMemBlock<T> *top = bottom; + do { + top->bottomOfBlock = 0; + top->next = prev; + prev = (void *)top; + ++top; + }while( (blockSize-=elementSize) >= elementSize); + bottom->bottomOfBlock = 1; + data.PushList(top-1,bottom); + } + void *Allocate(int) + { + return Allocate(); + } + void *Allocate() + { + MPMemBlock<T> *ret; + for(;;) { + ret = (MPMemBlock<T> *)data.PopElement(); + if (ret!=NULL) return &ret->data; + AddBlock(); + } + } + void Deallocate(void *ob) + { + if (!ob) return; + MPMemBlock<T> *ret = (MPMemBlock<T> *) + ((char *)data - OFFSET_OF_MEMBER(MPMemBlock<T>,ob)); + data.PushElement(ret); + } + //all memory allocated must be Deallocated before Clear() or the destructor is called + void Clear() + { + MPMemBlock<T> *stackWas = (MPMemBlock<T> *)data.ExchangeStack(NULL); + MPMemBlock<T> *blocks = NULL; + while(stackWas){ + if (stackWas->bottomOfBlock){ + if (blocks) { + blocks->next = stackWas; + } + blocks = stackWas; + } + stackWas = (MPMemBlock<T> *)(stackWas->next); + } + while(blocks) + { + stackWas = (MPMemBlock<T> *)blocks->next; + MyAllocator.Deallocate(blocks); + blocks = stackWas; + } + } + //all memory allocated must be Deallocated before Clear() or the destructor is called + ~MPCountFreeListAllocator() + { + Clear(); + } +}; + +struct Counted +{ + mutable AtomicInt refCount; + Counted():refCount(0){} +}; + +//it is NOT safe to read or write from more than one thread at a time +template <typename T> +class CountedPtr +{ + +protected: + T *value; + void decRefCount() + { + decRefCount(value); + } +public: + static void decRefCount(T *ptr) + { + if (ptr && 0 == --ptr->refCount){ + delete ptr; + } + } + void SetValueIgnoringRefCounts(T *ptr) + { + value = ptr; + } + CountedPtr():letter(NULL){} + + CountedPtr(T *ptr) + { + value = ptr; + if (value) ++value->refCount; + } + + operator const T *() const { return value; } + operator T *() { return value; } + + CountedPtr<T> & operator=(const T *ptr) + { + if (ptr) ++ptr->refCount; + decRefCount(); + value = ptr; + return *this; + } + + operator bool() const + { return value!=NULL; } + + bool operator !() const + { return value==NULL; } + + + const T & operator*() const + { return *value; } + + T & operator*() + { return *value; } + + T * operator->() + { assert(value); return value; } + const T * operator->()const + { assert(value); return value; } + + ~CountedPtr() + { + decRefCount(); + } +}; + +template <typename T> +class AtomicCountedPtr +{ +protected: + mutable AtomicPtrWithCount<T> value; +public: + + + CountedPtr<T> & LoadValue(CountedPtr<T> &dest) const + { +retry: + AtomicPtrWithCount<T>::SimpleUnionType inc; + inc.Value().ptr = 0; + inc.Value().count = 1; + AtomicPtrWithCount<T>::SimpleUnionType ret; + ret.whole = (value.whole+=inc.whole); //increment in ptr + dest = ret.Value().ptr ; //increment in object + AtomicPtrWithCount<T>::SimpleUnionType update; + update.Value().ptr = ret.Value().ptr; + for(;;){ + update.Value().count = ret.Value().count-1; + AtomicPtrWithCount<T>::SimpleUnionType current; + current.whole = value.whole.SetIfEqual(update.whole,ret.whole); + if (current.whole == ret.whole) break; //successfully decremented in ptr + if (current.Value().ptr != ret.Value().ptr) || current.Value().count < 1){ + //because of the line "dest = ret.Value().ptr" above + //the old value will decrement in the prev object as the next object is loaded + goto retry; + } + ret.Value().count = current.Value().count; + } + return dest; + } + + CountedPtr<T> & LoadValue(CountedPtr<T> &dest, int &count) const + { +retry: + AtomicPtrWithCount<T>::SimpleUnionType inc; + inc.Value().ptr = 0; + inc.Value().count = 1; + AtomicPtrWithCount<T>::SimpleUnionType ret; + ret.whole = (value.whole+=inc.whole); //increment in ptr + dest = ret.Value().ptr ; //increment in object + AtomicPtrWithCount<T>::SimpleUnionType update; + update.Value().ptr = ret.Value().ptr; + for(;;){ + update.Value().count = ret.Value().count-1; + AtomicPtrWithCount<T>::SimpleUnionType current; + current.whole = value.whole.SetIfEqual(update.whole,ret.whole); + if (current.whole == ret.whole) break; //successfully decremented in ptr + if (current.Value().ptr != ret.Value().ptr) || current.Value().count < 1){ + //because of the line "dest = ret.Value().ptr" above + //the old value will decrement in the prev object as the next object is loaded + goto retry; + } + ret.Value().count = current.Value().count; + } + count = ret.Value().count; + return dest; + } + + CountedPtr<T> & ExchangeInPlace(CountedPtr<T> &dest) + { + AtomicPtrWithCount<T>::SimpleUnionType destWhole; + destWhole.Value().ptr = dest; + destWhole.Value().count = 0; + destWhole.whole = value.whole.Exchange(destWhole.whole); + if (destWhole.Value().ptr != NULL && destWhole.Value().count != 0) + destWhole.Value().ptr->refCount += destWhole.Value().count; + dest.SetValueIgnoringRefCounts(destWhole.Value().ptr); + return dest; + } + + CountedPtr<T> & Exchange(CountedPtr<T> &dest, T *source) + { + dest = source; + return ExchangeInPlace(dest); + } + + bool CompareExchange(CountedPtr<T> &dest, T *exchange, T * comparend) + { + CountedPtr<T> holdExchange(exchange); +retry: + AtomicPtrWithCount<T>::SimpleUnionType comp; + if (LoadValue(dest,comp.Value().count) != comparend) return false; + comp.Value().ptr = dest; + AtomicPtrWithCount<T>::SimpleUnionType exch; + exch.Value().ptr=exchange; + exch.Value().count = 0; + AtomicPtrWithCount<T>::SimpleUnionType ret; + for (;;){ + ret.whole = value.whole.CompareExchange(exch.whole,comp.whole); + if (ret.whole == comp.whole) { + //transfer increment from holdExchange to this + holdExchange.SetValueIgnoringRefCounts(NULL); + //had inc from Load and the one from this + if (dest) { + const int dec = 1-comp.Value().count; + if (dec!=0) dest->refCount -= dec; + } + return true; + } + if (ret.Value().ptr != comp.Value().ptr) { + //cant return the new pointer because there's no + //reason to think it hasn't been collected already + goto retry; + } + comp.Value().count = ret.Value().count; + } + } + + bool SetIfEqual(T *exchange, T * comparend) + { + CountedPtr<T> holdExchange(exchange); +retry: + AtomicPtrWithCount<T>::SimpleUnionType comp; + comp.whole = value.whole; + if (comp.Value().ptr != comparend) return false; + AtomicPtrWithCount<T>::SimpleUnionType exch; + exch.Value().ptr=exchange; + exch.Value().count = 0; + AtomicPtrWithCount<T>::SimpleUnionType ret; + for (;;){ + ret.whole = value.whole.CompareExchange(exch.whole,comp.whole); + if (ret.whole == comp.whole) { + if (comp.Value().ptr) { + const int dec = 1-comp.Value().count; + if (dec!=0 && 0==(comp.Value().ptr->refCount -= dec)){ + delete comp.Value().ptr; + }; + } + //transfer increment from holdExchange to this + holdExchange.SetValueIgnoringRefCounts(NULL); + return true; + } + if (ret.Value().ptr != comp.Value().ptr) { + return false; + } + comp.Value().count = ret.Value().count; + } + } + + void operator = (const T *ptr) + { + CountedPtr<T> dest(ptr); + ExchangeInPlace(dest); + } + + //not recommended because value won't be consistant throughout + //a given expression + operator bool() const + { return value.Value().ptr!=NULL; } + + //not recommended because value won't be consistant throughout + //a given expression + bool operator !() const + { return value.Value().ptr==NULL; } + + + AtomicCountedPtr() + {} + + ~AtomicCountedPtr() + { + *this = (T*)NULL; + } +}; + +class MPQueueBase +{ + AtomicUInt * data; + int len; + + struct AtomicHeadAndTail{ + AtomicInt head; + AtomicInt tail; + }; + + AtomicUnion<AtomicHeadAndTail> headAndTail; + + struct HeadAndTailStruct{ + int head; + int tail; + }; + union HeadAndTailUnion + { + HeadAndTailStruct value; + unsigned __int64 whole; + }; + + AtomicUInt & Index(int i) + { + return data[i & (len-1)]; + } + bool IsNil(int i) + { + return (i & 1) != 0; + } + + public: + + int Used() const + { + HeadAndTailUnion ht; + ht.whole = headAndTail.whole; + return ht.value.head-ht.value.tail; + + } + bool Empty() const + { + HeadAndTailUnion ht; + ht.whole = headAndTail.whole; + return ht.value.head==ht.value.tail; + } + int Available() const + { + return MaxLen()-Used(); + + } + int MaxLen() const + { + return len; + } + + MPQueueBase(int lenLn2) + :len(1<<lenLn2) + { + headAndTail.whole = 0; + data = new AtomicUInt[len]; + for (int i=0;i<len;++i) data[i] = 1; + } + + ~MPQueueBase() + { + delete [] data; + } + + bool Put(int v) + { + assert((v&1)==0); + for(;;){ + HeadAndTailUnion ht; + ht.whole = headAndTail.whole; + if (ht.value.head-ht.value.tail >= len) return false; + + int wasThere = Index(ht.value.head); + + if (IsNil(wasThere)){ + if (Index(ht.value.head).SetIfEqual(v,wasThere)){ + headAndTail.Value().head.SetIfEqual(1+ht.value.head,ht.value.head); + return true; + } + } + headAndTail.Value().head.SetIfEqual(1+ht.value.head,ht.value.head); + } + } + int Get() + { + for(;;){ + HeadAndTailUnion ht; + ht.whole = headAndTail.whole; + if (ht.value.head-ht.value.tail < 1) return 0; + + int wasThere = Index(ht.value.tail); + int newNil = (ht.value.tail|1); + + if (!IsNil(wasThere)){ + if (Index(ht.value.tail).SetIfEqual(newNil,wasThere)){ + headAndTail.Value().tail.SetIfEqual(1+ht.value.tail,ht.value.tail); + return wasThere; + } + } + headAndTail.Value().tail.SetIfEqual(1+ht.value.tail,ht.value.tail); + } + } +}; + +template <typename T> +class MPQueue +{ + MPQueueBase queue; +public: + MPQueue(int lenLn2) + :queue(lenLn2) + {} + T *Get() + { + return (T*)queue.Get(); + } + bool Put(T * v) + { + return queue.Put((int)v); + } + bool Empty() const + { + return queue.Empty(); + } + int Used() const + { + return queue.Used(); + + } + int Available() const + { + return queue.Available(); + + } + int MaxLen() const + { + return queue.MaxLen(); + } +}; + +class QueueBase +{ + unsigned * data; + int len; + + int head; + int tail; + + unsigned & Index(int i) + { + return data[i & (len-1)]; + } + +public: + + int Used() const + { + return head-tail; + + } + bool Empty() const + { + return head==tail; + + } + + int Available() const + { + return MaxLen()-Used(); + + } + int MaxLen() const + { + return len; + } + + QueueBase(int lenLn2) + :len(1<<lenLn2) + ,head(0) + ,tail(0) + { + data = new unsigned[len]; + for (int i=0;i<len;++i) data[i] = 1; + } + + ~QueueBase() + { + delete [] data; + } + + bool Put(int v) + { + if (Available() < 1) return false; + Index(head++) = v; + return true; + } + + int Get() + { + if (Used() < 1) return 0; + return Index(tail++); + } +}; +template <typename T> +class Queue +{ + QueueBase queue; +public: + Queue(int lenLn2) + :queue(lenLn2) + {} + T *Get() + { + return (T*)queue.Get(); + } + bool Put(T * v) + { + return queue.Put((int)v); + } + bool Empty() const + { + return queue.Empty(); + } + int Used() const + { + return queue.Used(); + + } + int Available() const + { + return queue.Available(); + + } + int MaxLen() const + { + return queue.MaxLen(); + } +}; + + +#endif |