doxygen/html/gm_append_buffer_8h_source.html

 /************************************************************************
 **
 ** Copyright (C) 2014 by Carlos Augusto Teixera Mendes
 ** All rights reserved.
 **
 ** This file is part of the "GeMA" software. It's use should respect
 ** the terms in the license agreement that can be found together
 ** with this source code.
 ** It is provided AS IS, with NO WARRANTY OF ANY KIND,
 ** INCLUDING THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR
 ** A PARTICULAR PURPOSE.
 **
 ************************************************************************/

 #ifndef _GEMA_APPEND_BUFFER_H_
 #define _GEMA_APPEND_BUFFER_H_

 #include "gmTrace.h"
 #include <assert.h>

 #include <QAtomicInteger>
 #include <QVarLengthArray>

 #include "gmSpinLock.h"
 #include "gmThreadManager.h"
 #include "gmThreadLocalStorage.h"

 // Sanity checks for the current implementation
 #ifndef Q_ATOMIC_INT64_IS_SUPPORTED
 #error No 64 bit support for atomic operations
 #endif

 // Assert commented due to QTBUG-82864
 //#ifndef Q_ATOMIC_INT64_FETCH_AND_ADD_IS_ALWAYS_NATIVE
 //#error No 64 bit support for Fetch and Add operations
 //#endif

 static_assert(sizeof(size_t) == 8, "Unexpected size_t size");


 template <class T> class GmAppendBuffer
 {
 public:
   virtual ~GmAppendBuffer() {}

   virtual void clear() = 0;

   virtual void reserve(size_t size) = 0;

   virtual void append(const T& val) = 0;

   virtual void appendFromThread(int tid, const T& val) = 0;

   virtual size_t size() const = 0;

   virtual T* data() = 0;

   virtual size_t usedMemory() const = 0;
 };


 template <class T, class Base = GmAppendBuffer<T> > class GmPerThreadAppendBuffer : public Base
 {
 public:

   GmPerThreadAppendBuffer(size_t initSize, double resizeFactor = 2.0, int numThreads = -1)
   {
     S_TRACE();
     Q_UNUSED(resizeFactor);

     assert(GmThreadManager::inMainThread());
     assert(initSize >= 0);
     assert(numThreads >= -1 && numThreads <= GmThreadManager::maxWorkerThreads());

     // numThreads set to 0 is used to inform us that only the global thread will be used
     _globalOnly = (numThreads == 0);
     _nt = GmOmpAdjustNumThreads(numThreads);

     _dataBuffer = NULL;
     _dataSize   = 0;

     if(initSize)
       reserve(initSize);
   }

   ~GmPerThreadAppendBuffer()
   {
     S_TRACE();
     assert(GmThreadManager::inMainThread());

     if(_dataBuffer != _data.localData(0).data())  // Can't delete _dataBuffer if it is pointing to a _data internal buffer.
       delete[] _dataBuffer;                       // It will be deleted by _data. This only happens if no threads where used.
   }

   // See comments on the base class
   virtual void clear()
   {
     S_TRACE();
     assert(GmThreadManager::inMainThread());

     if(_dataBuffer != _data.localData(0).data())  // Can't delete _dataBuffer if it is pointing to a _data internal buffer
       delete[] _dataBuffer;                       // It will be cleared by _data. This only happens if no threads where used.
     _dataBuffer = NULL;
     _dataSize   = 0;

     for(int i = 0, nb = _data.size(); i < nb; i++)
     {
       _data.localData(i).clear();
       _data.localData(i).squeeze();
     }
   }

   virtual void reserve(size_t bsize)
   {
     S_TRACE();
     assert(GmThreadManager::inMainThread());
     assert(!_dataBuffer);
     assert(size() == 0);

     if(_globalOnly)
       _data.localData(0).reserve((int)bsize);  // TODO : Use a buffer with a size_t maximum capacity
     else
     {
       // Although it is possible that both the main thread buffer and the
       // worker thread buffers are used, we are going to reserve space for the
       // workers only.  The mixed option should happend only when a physics
       // supports threading and another doesn't.
       assert(_nt > 0 && _nt <= GmThreadManager::maxWorkerThreads());

       size_t s = (bsize +_nt-1)/_nt; // bsize / _nt rounded up

       for(int i = 1; i < _nt; i++)
         _data.localData(i).reserve((int)s);
     }
   }

   // See comments on the base class
   virtual void append(const T& val)
   {
     S_TRACE();
     assert(!_dataBuffer);

     _data.localData().append(val);
   }

   // See comments on the base class
   virtual void appendFromThread(int tid, const T& val)
   {
     S_TRACE();
     assert(!_dataBuffer);
     assert(!_globalOnly || (_globalOnly && tid == 0));

     _data.localData(tid).append(val);
   }

   // See comments on the base class
   virtual size_t size() const
   {
     S_TRACE();
     assert(GmThreadManager::inMainThread());

     // If data() was called, returns the saved size from the previous call
     if(_dataBuffer)
       return _dataSize;

     size_t s = 0;
     for(int i = 0, nb = _data.size(); i < nb; i++)
       s += _data.localData(i).size();
     return s;
   }

   // See comments on the base class
   virtual T* data()
   {
     S_TRACE();
     assert(GmThreadManager::inMainThread());

     // If the user has already called data(), returns the same buffer as the previous call
     if(_dataBuffer)
       return _dataBuffer;

     size_t s = size();

     if(s == _data.localData(0).size())  // Only the local buffer was filled
     {
       _dataBuffer = _data.localData(0).data();
       _dataSize   = s;
     }
     else
     {
       _dataBuffer = new(std::nothrow) T[s];
       if(_dataBuffer)
       {
         size_t o = 0;
         for(int i = 0, nb = _data.size(); i < nb; i++) // Traverses the main thread and the worker thread buffers
         {
           // Copy buffer data
           QVarLengthArray<T>& buffer = _data.localData(i);

           size_t n = buffer.size();
           GmPmemcpy(_dataBuffer + o, buffer.data(), n * sizeof(T), _nt);
           o += n;

           // Since we have allocated a new full buffer, lets release the per thread buffers
           buffer.clear();
           buffer.squeeze();
         }
         assert(o == s);
         _dataSize   = s;
       }
     }

     return _dataBuffer;
   }

   // See comments on the base class
   virtual size_t usedMemory() const
   {
     S_TRACE();
     assert(GmThreadManager::inMainThread());

     if(_dataBuffer) // After calling data()
     {
       if(_dataBuffer == _data.localData(0).data())
         return _data.localData(0).capacity() * sizeof(T);
       else
         return _dataSize * sizeof(T);
     }

     size_t s = 0;
     for(int i = 0, nb = _data.size(); i < nb; i++)
       s += _data.localData(i).capacity();  // capacity is the true used memory, and not size...
     return s * sizeof(T);
   }

 protected:
   GmTLS< QVarLengthArray<T>, false > _data;
   bool   _globalOnly;
   int    _nt;
   T*     _dataBuffer;
   size_t _dataSize;
 };


 template <class T, class Base = GmAppendBuffer<T>> class GmSingleAppendBuffer : public Base
 {
 public:
   GmSingleAppendBuffer(size_t initSize, double resizeFactor = 2.0, int numThreads = -1)
     : _controll(NULL)
   {
     S_TRACE();
     assert(GmThreadManager::inMainThread());
     assert(resizeFactor > 1.0);
     assert(initSize >= 0);
     assert(numThreads >= -1 && numThreads <= GmThreadManager::maxWorkerThreads());

     _nt = GmOmpAdjustNumThreads(numThreads);

     _resizeFactor = resizeFactor;
     _nextIndex    = 0;
     _head         = NULL;
     _dataBuffer   = NULL;

     if(initSize)
       reserve(initSize);
   }

   ~GmSingleAppendBuffer()
   {
     S_TRACE();
     clear();
   }

   // See comments on the base class
   virtual void clear()
   {
     S_TRACE();
     assert(GmThreadManager::inMainThread());

     clearList();
     delete[] _dataBuffer;
     _dataBuffer = NULL;

     _nextIndex = 0;
   }

   virtual void reserve(size_t bsize)
   {
     S_TRACE();
     assert(GmThreadManager::inMainThread());
     assert(!_head);
     assert(!_dataBuffer);

     _head = new ControllData(bsize, NULL);
     _controll.store(_head);
   }

   // See comments on the base class
   virtual void append(const T& val)
   {
     S_TRACE();
     assert(_head);
     assert(!_dataBuffer);

     // Get our index in the buffer.  It might be in an unallocated position!
     // In that case, no matter who allocates the next buffer, our global
     // position is fixed in pos.
     size_t pos = _nextIndex.fetchAndAddAcquire(1);

     while(1)
     {
       // Get the pointer for the current control structure
       ControllData* p = _controll.loadAcquire();
       assert(p);

       // Does 'pos' belongs to the buffer B pointed by p?
       // There are 3 possible options:
       //  a) pos belongs to B: we can just write it
       //  b) pos references a position that is further away than the last position
       //     in B.  We must allocate a new buffer or retry with a buffer allocated
       //     by another thread.
       //  c) pos belongs to a buffer that came before B.  This can only happend
       //     in a very unlikelly case where the current buffer fills but before
       //     the current thread allocates a new buffer, another thread does the
       //     allocation, the new buffer is also filled and yet another buffer is
       //     allocated.  In this very unlikely case, we just traverse the control
       //     list until we find the correct buffer.  Notice that this is safe
       //     since the previous pointer is written when the controll data is
       //     created and never again changed.
       qint64 index = pos - p->_offset;  // MUST be SIGNED!

       if(index < (qint64)p->_size)  // Cases a) or c)
       {
         // If we are in case c), lets traverse the controll blocks to find the correct buffer
         while(index < 0) { assert(p->_prev);  p = p->_prev; index += p->_size; }

         p->_data[index] = val;
         return;
       }

       // No luck, we are in case b). We need to allocate a new buffer, if it was
       // not already allocated by another thread.  To know if we are the one to
       // create the new buffer, we will try to acquire a spin lock and recheck if p is
       // still the current buffer.  If it isn't (or we couldn't get the lock), we will
       // restart our append, keeping the same target position, but with a new p.
       // If it is, we will create a new Controll block with a new buffer and publish
       // it before releasing the lock.
       if(!_controllLock.tryLock())
         continue;

       if(_controll.load() != p)
       {
         _controllLock.unlock();
         continue;
       }

       size_t cursize = p->_offset + p->_size;
       size_t newsize = qMax((size_t)(cursize * _resizeFactor), pos + 1); // qMax makes sure that after growing we can at least fit our position

       try
       {
         ControllData* newc = new ControllData(newsize - cursize, p);
         p->_next = newc;
         assert(pos >= newc->_offset);
         newc->_data[pos - newc->_offset] = val;

         _controll.storeRelease(newc);
         _controllLock.unlock();
       }
       catch(...)  // If allocating the new ControllData raises an error (out of memory), we MUST release the lock!
       {
         // printf("Erro alocando buffer: %zu %zu %zu %zu %f %zu\n", p->_offset, p->_size, cursize, pos, _resizeFactor, newsize);
         _controllLock.unlock();
         throw;
       }
       return;
     }
   }

   // See comments on the base class
   virtual void appendFromThread(int tid, const T& val)
   {
     S_TRACE();
     Q_UNUSED(tid);
     append(val);
   }

   // See comments on the base class
   virtual size_t size() const { assert(GmThreadManager::inMainThread()); return _nextIndex; }

   // See comments on the base class
   virtual T* data()
   {
     S_TRACE();
     assert(GmThreadManager::inMainThread());

     // If the user has already called data(), returns the same buffer as the previous call
     if(_dataBuffer)
     {
       assert(!_head);
       return _dataBuffer;
     }

     // First call to data
     assert(_head);

     // If the buffer stores a single vector, we can simply take and return that vector
     if(!_head->_next)
     {
       assert(_nextIndex <= _head->_size && _head->_offset == 0 && !_head->_prev);

       _dataBuffer  = _head->_data;
       _head->_data = NULL;  // Prevent _data from being deleted by the ControllData destructor
     }
     else
     {
       // No such luck. We need to allocate a full vector and fill its contents copying
       // data from each buffer vector
       _dataBuffer = new(std::nothrow) T[_nextIndex];
       if(_dataBuffer)
       {
 #ifndef NDEBUG
         size_t o = 0;
 #endif
         ControllData* p = _head;
         while(p)
         {
           size_t n = p->_next ? p->_size : _nextIndex - p->_offset;

 #ifndef NDEBUG
           assert(!p->_prev || p->_prev->_next == p);
           assert(!p->_next || p->_next->_prev == p);
           assert(p->_offset == o);
           o += n;
 #endif
           GmPmemcpy(_dataBuffer + p->_offset, p->_data, n * sizeof(T), _nt);
           p = p->_next;
         }
 #ifndef NDEBUG
         assert(o == _nextIndex);
 #endif
       }
     }

     // Release memory stored in the buffers
     if(_dataBuffer)
       clearList();

     return _dataBuffer;
   }

   // See comments on the base class
   virtual size_t usedMemory() const
   {
     S_TRACE();
     assert(GmThreadManager::inMainThread());

     if(_dataBuffer) // After call to data()
       return _nextIndex * sizeof(T);

     ControllData* p = _controll.load();  // In the main thread, all tasks have finished so controll MUST point to the last buffer
     return (p->_offset + p->_size) * sizeof(T);
   }

 protected:
   // Clears the buffer list, releasing buffer memory
   void clearList()
   {
     ControllData* p;
     while((p = _head) != NULL)
     {
       _head = _head->_next;
       delete p;
     }
     _controll.store(NULL);
   }

   struct ControllData
   {
     ControllData(size_t size, ControllData* prev)
     {
       S_TRACE();
       _data = new T[size];
       _size = size;
       _offset = prev ? (prev->_offset + prev->_size) : 0;
       _prev = prev;
       _next = NULL;
     }

     ~ControllData() { S_TRACE(); delete[] _data; }

     T*     _data;
     size_t _size;
     size_t _offset;

     ControllData* _prev;
     ControllData* _next;
   };

   ControllData*                _head;
   double                       _resizeFactor;
   QAtomicInteger<size_t>       _nextIndex;
   QAtomicPointer<ControllData> _controll;
   GmSpinLock                   _controllLock;
   T*                           _dataBuffer;
   int                          _nt;
 };

 #endif
GmSingleAppendBuffer::ControllData::_data
T * _data
This data buffer.
Definition: gmAppendBuffer.h:666

GmPerThreadAppendBuffer::_dataBuffer
T * _dataBuffer
The single buffer after a call to data()
Definition: gmAppendBuffer.h:359

QAtomicPointer

QVarLengthArray::squeeze
void squeeze()

GmSpinLock
A simple spin lock implementation based on a loop using test and set over an atomic int to change its...
Definition: gmSpinLock.h:36

QAtomicInteger::fetchAndAddAcquire
T fetchAndAddAcquire(T valueToAdd)

GmPmemcpy
void * GmPmemcpy(void *dst, const void *src, size_t n, int nt=0, size_t min=10 *1024 *1024)
Parallel (thread enabled) version of memcpy using OpenMP.
Definition: gmMemory.h:167

GmPerThreadAppendBuffer::_globalOnly
bool _globalOnly
Flag set to true if the user passed zero as the number of threads for the constructor.
Definition: gmAppendBuffer.h:357

GmPerThreadAppendBuffer
An implementation of the GmAppendBuffer interface based on a "per thread" growing buffer.
Definition: gmAppendBuffer.h:152

GmAppendBuffer::append
virtual void append(const T &val)=0
Appends val to the buffer in a thread safe way. Can grow the buffer if needed.

GmSingleAppendBuffer::ControllData
Aux controll structure storing a buffer.
Definition: gmAppendBuffer.h:648

gmThreadLocalStorage.h
Declaration of the GmTLS class.

GmAppendBuffer::appendFromThread
virtual void appendFromThread(int tid, const T &val)=0
Appends val to the buffer in a thread safe way, using the given tid to access TLS storage....

GmSingleAppendBuffer::ControllData::_prev
ControllData * _prev
The previous buffer.
Definition: gmAppendBuffer.h:670

GmPerThreadAppendBuffer::GmPerThreadAppendBuffer
GmPerThreadAppendBuffer(size_t initSize, double resizeFactor=2.0, int numThreads=-1)
Buffer constructor. Can optionally pre allocate the buffer with initSize entries. If initSize is zero...
Definition: gmAppendBuffer.h:178

S_TRACE
#define S_TRACE()
Macro for run time stack tracking at release build.
Definition: gmTrace.h:44

GmSingleAppendBuffer
An implementation of the GmAppendBuffer interface based on synchoronized access to a shared buffer.
Definition: gmAppendBuffer.h:402

QVarLengthArray

GmAppendBuffer::data
virtual T * data()=0
Returns a vector filled with the buffer data. After this call, NO calls to append() can be made witho...

GmTLS
A class that works together with GmThreadManager to provide thread local storage.
Definition: gmThreadLocalStorage.h:131

gmSpinLock.h
Declaration of the GmSpinLock class.

GmSingleAppendBuffer::_resizeFactor
double _resizeFactor
The resize factor.
Definition: gmAppendBuffer.h:675

GmAppendBuffer
A virtual class representing a buffer of T objects that can be appended in a thread-safe way,...
Definition: gmArmadilloSolverMatrix.h:36

GmSingleAppendBuffer::reserve
virtual void reserve(size_t bsize)
See comments on the base class. Should not be called if the size was given in the constructor.
Definition: gmAppendBuffer.h:456

GmSpinLock::tryLock
bool tryLock()
Try to lock. Returns true if the lock was aquired, false otherwise.
Definition: gmSpinLock.h:46

gmThreadManager.h
Declaration of the GmThreadManager class.

GmPerThreadAppendBuffer::reserve
virtual void reserve(size_t bsize)
Pre allocates buffer sizes. See basic description on the base class Should not be called if the size ...
Definition: gmAppendBuffer.h:232

GmSingleAppendBuffer::_nextIndex
QAtomicInteger< size_t > _nextIndex
The next free index in the global vector.
Definition: gmAppendBuffer.h:676

GmSingleAppendBuffer::ControllData::~ControllData
~ControllData()
Destructor.
Definition: gmAppendBuffer.h:664

QVarLengthArray::clear
void clear()

GmSingleAppendBuffer::ControllData::_size
size_t _size
The size of this data buffer.
Definition: gmAppendBuffer.h:667

GmTLS::localData
T & localData(int tid)
Returns the given thread local data as a modifiable reference.
Definition: gmThreadLocalStorage.h:163

GmSingleAppendBuffer::_dataBuffer
T * _dataBuffer
The single buffer after a call to data()
Definition: gmAppendBuffer.h:679

GmOmpAdjustNumThreads
int GmOmpAdjustNumThreads(int nt)
Adjusts the given number of threads. If nt <= 0 or if nt > maximum number of omp threads,...
Definition: gmOmp.h:51

GmPerThreadAppendBuffer::_nt
int _nt
Number of threads considered for buffer pre-allocation & parallel memcopy.
Definition: gmAppendBuffer.h:358

GmSingleAppendBuffer::GmSingleAppendBuffer
GmSingleAppendBuffer(size_t initSize, double resizeFactor=2.0, int numThreads=-1)
Buffer constructor. Can optionally pre allocate the buffer with initSize entries. If initSize is zero...
Definition: gmAppendBuffer.h:415

GmAppendBuffer::reserve
virtual void reserve(size_t size)=0
Informs the buffer of the expected number of entries that will be filled by (concurrent) calls to app...

GmAppendBuffer::clear
virtual void clear()=0
Releases the memory used by the buffer, returning it to a recently constructed state....

GmAppendBuffer::usedMemory
virtual size_t usedMemory() const =0
Returns an estimative of the total memory used by the buffer in bytes.

GmAppendBuffer::size
virtual size_t size() const =0
Returns the number of entries in the buffer. Must be called from the main thread only.

GmPerThreadAppendBuffer::_dataSize
size_t _dataSize
The size in _dataBuffer when _dataBuffer is not NULL.
Definition: gmAppendBuffer.h:360

GmSingleAppendBuffer::ControllData::_next
ControllData * _next
The next buffer.
Definition: gmAppendBuffer.h:671

GmPerThreadAppendBuffer::_data
GmTLS< QVarLengthArray< T >, false > _data
Per thread buffers.
Definition: gmAppendBuffer.h:356

GmThreadManager::inMainThread
static bool inMainThread()
Is the current thread the main thread? Equivalent to comparing the currentId() with 0.
Definition: gmThreadManager.h:169

QVarLengthArray::data
T * data()

GmTLS::size
int size() const
Returns the number of values stored in the TLS object. Equal to the max number of threads + 1 (for th...
Definition: gmThreadLocalStorage.h:160

GmSingleAppendBuffer::_nt
int _nt
Number of threads considered for parallel memcopy.
Definition: gmAppendBuffer.h:680

GmSpinLock::unlock
void unlock()
Releases the lock.
Definition: gmSpinLock.h:49

GmPerThreadAppendBuffer::~GmPerThreadAppendBuffer
~GmPerThreadAppendBuffer()
Destructor.
Definition: gmAppendBuffer.h:199

GmSingleAppendBuffer::~GmSingleAppendBuffer
~GmSingleAppendBuffer()
Destructor. Releases the allocated memory.
Definition: gmAppendBuffer.h:436

GmSingleAppendBuffer::_controllLock
GmSpinLock _controllLock
The lock controlling changes to _controll.
Definition: gmAppendBuffer.h:678

QVarLengthArray::size
int size() const const

GmAppendBuffer::~GmAppendBuffer
virtual ~GmAppendBuffer()
Virtual destructor.
Definition: gmAppendBuffer.h:69

GmSingleAppendBuffer::ControllData::_offset
size_t _offset
The offset of the first entry in data in the global buffer reference.
Definition: gmAppendBuffer.h:668

GmSingleAppendBuffer::_controll
QAtomicPointer< ControllData > _controll
The controll block pointing to the current buffer.
Definition: gmAppendBuffer.h:677

gmTrace.h
Auxiliary configuration file used to enable or disable compiling the GeMA tools with support for usin...

GmThreadManager::maxWorkerThreads
static int maxWorkerThreads()
Returns the maximum number of allowed working threads.
Definition: gmThreadManager.h:153

GmSingleAppendBuffer::_head
ControllData * _head
Pointer to the first allocated buffer.
Definition: gmAppendBuffer.h:674

GmSingleAppendBuffer::ControllData::ControllData
ControllData(size_t size, ControllData *prev)
Constructor: Initializes the buffer with the given size. Allocation errors should be cought by the ca...
Definition: gmAppendBuffer.h:653

QAtomicInteger< size_t >