GemaCoreLib
The GeMA Core library
Typedefs | Functions
gmIntegerSort.h File Reference

Definition of a set of functions for integer vector sorting of triplet data. More...

#include "gmCoreConfig.h"
#include "gmSparseMatrixTripletData.h"
#include <QtAlgorithms>
Include dependency graph for gmIntegerSort.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Typedefs

typedef unsigned GmRadixBucketType
 The type used for count buckets in radix sort. Change it to size_t if it is possible to have more than 4G entries in a data vector!
 

Functions

template<class DataT , class KeyT , class BucketT >
GMC_API_EXPORT DataT * GmLsdRadixSort (DataT *src, size_t n, DataT *aux=NULL, bool auxCopy=true, unsigned digitBits=8, unsigned keyBits=sizeof(KeyT) *8, unsigned keyOffset=0)
 A serial, stable, Radix sort implementation based on the algorithm in "Introduction to Algorithms", Cormen et all. More...
 
template<class DataT , class KeyT , class BucketT >
GMC_API_EXPORT DataT * GmPLsdRadixSort (DataT *src, size_t n, DataT *aux=NULL, bool auxCopy=true, int nt=0, unsigned digitBits=8, unsigned keyBits=sizeof(KeyT) *8, unsigned keyOffset=0)
 Simmilar to GmLsdRadixSort() but executing with nt threads. More...
 
template<class DataT >
GMC_API_EXPORT void GmSkaRadixSort (DataT *src, size_t n)
 Simple stub for calling ska_sort(). Used to include the external lib inside GemaCoreLib so that clients don't need to include its path.
 
template<class DataT , class ExtractKey >
GMC_API_EXPORT void GmSkaRadixSort (DataT *src, size_t n, ExtractKey &&key)
 Simple stub for calling ska_sort(). Used to include the external lib inside GemaCoreLib so that clients don't need to include its path.
 
unsigned GmRadixSortAutoNumDigitBits (unsigned keyBits, unsigned max)
 A function to find the number of bits in a digit based on the number of key bits.
 
unsigned * GmUnsignedCopyRadixSort (unsigned *src, size_t n, unsigned *aux, bool auxCopy, unsigned max=UINT_MAX, unsigned digitBits=0)
 A serial, stable, Radix sort implementation based on the algorithm in "Introduction to Algorithms", Cormen et all for sorting vectors of unsigned integers using a number of digit bits defined by the user. More...
 
unsigned * GmUnsignedCopyRadixSort (unsigned *src, size_t n, unsigned max=UINT_MAX, unsigned digitBits=0)
 Overload of GmUnsignedCopyRadixSort() passing a NULL aux vector.
 
unsigned * GmUnsignedPCopyRadixSort (unsigned *src, size_t n, unsigned *aux, bool auxCopy, int nt=0, unsigned max=UINT_MAX, unsigned digitBits=0)
 Similar to GmUnsignedCopyRadixSort() but running with nt multiple threads. More...
 
unsigned * GmUnsignedPCopyRadixSort (unsigned *src, size_t n, int nt=0, unsigned max=UINT_MAX, unsigned digitBits=0)
 Overload of GmUnsignedPCopyRadixSort() passing a NULL aux vector.
 
void GmUnsignedInplaceRadixSort (unsigned *src, size_t n)
 An inplace sort algorithm based on the SKA library for usigned data.
 
template<GmSparseMatrixLayoutTypes T>
GmSparseMatrixTripletData< T > * GmTripletCopyRadixSort (GmSparseMatrixTripletData< T > *src, size_t n, int nlin, GmSparseMatrixTripletData< T > *aux, bool auxCopy, unsigned digitBits=0)
 Similar to GmUnsignedCopyRadixSort() for Triplet data where nlin is the number of matrix lines (and columns).
 
template<GmSparseMatrixLayoutTypes T>
GmSparseMatrixTripletData< T > * GmTripletCopyRadixSort (GmSparseMatrixTripletData< T > *src, size_t n, int nlin, unsigned digitBits=0)
 Overload of GmTripletCopyRadixSort() passing a NULL aux vector.
 
template<GmSparseMatrixLayoutTypes T>
GmSparseMatrixTripletData< T > * GmTripletPCopyRadixSort (GmSparseMatrixTripletData< T > *src, size_t n, int nlin, GmSparseMatrixTripletData< T > *aux, bool auxCopy, int nt=0, unsigned digitBits=0)
 Similar to GmUnsignedPCopyRadixSort() for Triplet data where nlin is the number of matrix lines (and columns).
 
template<GmSparseMatrixLayoutTypes T>
GmSparseMatrixTripletData< T > * GmTripletPCopyRadixSort (GmSparseMatrixTripletData< T > *src, size_t n, int nlin, int nt=0, unsigned digitBits=0)
 Overload of GmTripletPCopyRadixSort() passing a NULL aux vector.
 
template<GmSparseMatrixLayoutTypes T>
void GmTripletInplaceRadixSort (GmSparseMatrixTripletData< T > *src, size_t n)
 An inplace sort algorithm based on the SKA library for triplet data.
 
template GmSparseMatrixTripletData< GM_CSR_SPARSE_FORMAT > * GmTripletCopyRadixSort< GM_CSR_SPARSE_FORMAT > (GmSparseMatrixTripletData< GM_CSR_SPARSE_FORMAT > *, size_t, int, GmSparseMatrixTripletData< GM_CSR_SPARSE_FORMAT > *, bool, unsigned)
 
template GmSparseMatrixTripletData< GM_CSC_SPARSE_FORMAT > * GmTripletCopyRadixSort< GM_CSC_SPARSE_FORMAT > (GmSparseMatrixTripletData< GM_CSC_SPARSE_FORMAT > *, size_t, int, GmSparseMatrixTripletData< GM_CSC_SPARSE_FORMAT > *, bool, unsigned)
 
template GmSparseMatrixTripletData< GM_CSR_SPARSE_FORMAT > * GmTripletPCopyRadixSort< GM_CSR_SPARSE_FORMAT > (GmSparseMatrixTripletData< GM_CSR_SPARSE_FORMAT > *, size_t, int, GmSparseMatrixTripletData< GM_CSR_SPARSE_FORMAT > *, bool, int, unsigned)
 
template GmSparseMatrixTripletData< GM_CSC_SPARSE_FORMAT > * GmTripletPCopyRadixSort< GM_CSC_SPARSE_FORMAT > (GmSparseMatrixTripletData< GM_CSC_SPARSE_FORMAT > *, size_t, int, GmSparseMatrixTripletData< GM_CSC_SPARSE_FORMAT > *, bool, int, unsigned)
 
template void GmTripletInplaceRadixSort< GM_CSR_SPARSE_FORMAT > (GmSparseMatrixTripletData< GM_CSR_SPARSE_FORMAT > *, size_t)
 
template void GmTripletInplaceRadixSort< GM_CSC_SPARSE_FORMAT > (GmSparseMatrixTripletData< GM_CSC_SPARSE_FORMAT > *, size_t)
 

Detailed Description

Definition of a set of functions for integer vector sorting of triplet data.

Author
Carlos Augusto Teixeira Mendes
Date
april, 2020

Function Documentation

◆ GmLsdRadixSort()

template<class DataT , class KeyT , class BucketT >
GMC_API_EXPORT DataT* GmLsdRadixSort ( DataT *  src,
size_t  n,
DataT *  aux,
bool  auxCopy,
unsigned  digitBits,
unsigned  keyBits,
unsigned  keyOffset 
)

A serial, stable, Radix sort implementation based on the algorithm in "Introduction to Algorithms", Cormen et all.

This is a generic implementation that can be used for any vector data type where the sorting key is an unsigned number and is present as the FIRST field in the data type structure.

Sorting is done by running counting sort repeatedly for each key digit, starting from the least significant digit (LSD). Digit size is a function parameter. There is support for when the digit size does not evenly divides the key type size (ex: 32 bits key with 12 bits digit)

Another parameter specifies how many bits of the key must be sorted. This allows, for example, when sorting a vector with 32-bit keys and 8 bit digits, where the maximum key value is 16.000.000 (24 significant digits) to issue only 3 sorting passes instead of the usual 4.

This implementation is NOT in-place and needs a destination vector with the same size as the input one. It also uses stack space equal to 2^digitBits * sizeof(BucketT) for the counting buckets, making a digitBits of 16 bits its "practical" limit (64K * sizeof(BucketT))

This implementation also does NOT handle signed numbers. See comments in the countSort() function.

Template Parameters
DataTThe type of the vector data
KeyTThe type of the key used to sort the vector. Must be unsigned.
BucketTThe unsigned type used for the count array. Must be big enough to fit the value of n.
Parameters
srcThe vector to be sorted
nThe vector size
auxAuxiliar vector with size n used by the sort routine. Can be NULL. In that case the vector will be allocated and freed internally.
auxCopyFlag used only if aux is different from NULL. If set to true, the sorted result will always end-up in src. If the number of sort passes is odd, a memcpy oeration will ensure that the result ends up in src.

If set to false, no extra copy will be done and the result is either in src or in aux, as reported by the function return.

Parameters
digitBitsThe number of bits in the sorting digits.
keyBitsThe number of bits in the key that should be considered for sorting.
keyOffsetThe offset in bits for the start of the key. Usually 0.
Returns
Returns the vector that stores the sorted result. Normally will be equal to src, but if aux != NULL, auxCopy == false and the number of sorting passes is odd, will be equal to aux. Will return NULL if the function could not allocate auxiliary memory.

◆ GmPLsdRadixSort()

template<class DataT , class KeyT , class BucketT >
GMC_API_EXPORT DataT* GmPLsdRadixSort ( DataT *  src,
size_t  n,
DataT *  aux,
bool  auxCopy,
int  nt,
unsigned  digitBits,
unsigned  keyBits,
unsigned  keyOffset 
)

Simmilar to GmLsdRadixSort() but executing with nt threads.

The extra parameter nt controls the number of requested threads. A value of 0 (or -1 for compatibility with GmThreadManager) means that omp_get_max_threads() will be used.

◆ GmUnsignedCopyRadixSort()

unsigned* GmUnsignedCopyRadixSort ( unsigned *  src,
size_t  n,
unsigned *  aux,
bool  auxCopy,
unsigned  max = UINT_MAX,
unsigned  digitBits = 0 
)
inline

A serial, stable, Radix sort implementation based on the algorithm in "Introduction to Algorithms", Cormen et all for sorting vectors of unsigned integers using a number of digit bits defined by the user.

This implementation is NOT in-place and needs a destination vector with the same size as the input one.

Parameters
srcThe vector to be sorted
nThe vector size
auxAuxiliar vector with size n used by the sort routine. Can be NULL. In that case the vector will be allocated and freed internally.
auxCopyFlag used only if aux is different from NULL. If set to true, the sorted result will always end-up in src. If the number of sort passes is odd, a memcpy oeration will ensure that the result ends up in src. If set to false, no extra copy will be done and the result is either in src or in aux, as reported by the function return.
maxThe maximum value in src. If different from UINT_MAX, is used to limit the number of bits that need to be considered during the sort, possibly decreasing the number of sorting passes.
digitBitsThe number of bits in a digit. Controls the number of passes and the number of buckets used in the sorting. When set to zero, the number of digits is defined automatically by the function based on the maximum value.
Returns
Returns the vector that stores the sorted result. Normally will be equal to src, but if aux != NULL, auxCopy == false and the number of sorting passes is odd, will be equal to aux. Will return NULL if the function could not allocate auxiliary memory.

◆ GmUnsignedPCopyRadixSort()

unsigned* GmUnsignedPCopyRadixSort ( unsigned *  src,
size_t  n,
unsigned *  aux,
bool  auxCopy,
int  nt = 0,
unsigned  max = UINT_MAX,
unsigned  digitBits = 0 
)
inline

Similar to GmUnsignedCopyRadixSort() but running with nt multiple threads.

The extra parameter nt controls the number of requested threads. A value of 0 (or -1 for compatibility with GmThreadManager) means that omp_get_max_threads() will be used.