![]() |
GemaCoreLib
The GeMA Core library
|
Definition of a set of functions for integer vector sorting of triplet data. More...
Go to the source code of this file.
Typedefs | |
typedef unsigned | GmRadixBucketType |
The type used for count buckets in radix sort. Change it to size_t if it is possible to have more than 4G entries in a data vector! | |
Functions | |
template<class DataT , class KeyT , class BucketT > | |
GMC_API_EXPORT DataT * | GmLsdRadixSort (DataT *src, size_t n, DataT *aux=NULL, bool auxCopy=true, unsigned digitBits=8, unsigned keyBits=sizeof(KeyT) *8, unsigned keyOffset=0) |
A serial, stable, Radix sort implementation based on the algorithm in "Introduction to Algorithms", Cormen et all. More... | |
template<class DataT , class KeyT , class BucketT > | |
GMC_API_EXPORT DataT * | GmPLsdRadixSort (DataT *src, size_t n, DataT *aux=NULL, bool auxCopy=true, int nt=0, unsigned digitBits=8, unsigned keyBits=sizeof(KeyT) *8, unsigned keyOffset=0) |
Simmilar to GmLsdRadixSort() but executing with nt threads. More... | |
template<class DataT > | |
GMC_API_EXPORT void | GmSkaRadixSort (DataT *src, size_t n) |
Simple stub for calling ska_sort(). Used to include the external lib inside GemaCoreLib so that clients don't need to include its path. | |
template<class DataT , class ExtractKey > | |
GMC_API_EXPORT void | GmSkaRadixSort (DataT *src, size_t n, ExtractKey &&key) |
Simple stub for calling ska_sort(). Used to include the external lib inside GemaCoreLib so that clients don't need to include its path. | |
unsigned | GmRadixSortAutoNumDigitBits (unsigned keyBits, unsigned max) |
A function to find the number of bits in a digit based on the number of key bits. | |
unsigned * | GmUnsignedCopyRadixSort (unsigned *src, size_t n, unsigned *aux, bool auxCopy, unsigned max=UINT_MAX, unsigned digitBits=0) |
A serial, stable, Radix sort implementation based on the algorithm in "Introduction to Algorithms", Cormen et all for sorting vectors of unsigned integers using a number of digit bits defined by the user. More... | |
unsigned * | GmUnsignedCopyRadixSort (unsigned *src, size_t n, unsigned max=UINT_MAX, unsigned digitBits=0) |
Overload of GmUnsignedCopyRadixSort() passing a NULL aux vector. | |
unsigned * | GmUnsignedPCopyRadixSort (unsigned *src, size_t n, unsigned *aux, bool auxCopy, int nt=0, unsigned max=UINT_MAX, unsigned digitBits=0) |
Similar to GmUnsignedCopyRadixSort() but running with nt multiple threads. More... | |
unsigned * | GmUnsignedPCopyRadixSort (unsigned *src, size_t n, int nt=0, unsigned max=UINT_MAX, unsigned digitBits=0) |
Overload of GmUnsignedPCopyRadixSort() passing a NULL aux vector. | |
void | GmUnsignedInplaceRadixSort (unsigned *src, size_t n) |
An inplace sort algorithm based on the SKA library for usigned data. | |
template<GmSparseMatrixLayoutTypes T> | |
GmSparseMatrixTripletData< T > * | GmTripletCopyRadixSort (GmSparseMatrixTripletData< T > *src, size_t n, int nlin, GmSparseMatrixTripletData< T > *aux, bool auxCopy, unsigned digitBits=0) |
Similar to GmUnsignedCopyRadixSort() for Triplet data where nlin is the number of matrix lines (and columns). | |
template<GmSparseMatrixLayoutTypes T> | |
GmSparseMatrixTripletData< T > * | GmTripletCopyRadixSort (GmSparseMatrixTripletData< T > *src, size_t n, int nlin, unsigned digitBits=0) |
Overload of GmTripletCopyRadixSort() passing a NULL aux vector. | |
template<GmSparseMatrixLayoutTypes T> | |
GmSparseMatrixTripletData< T > * | GmTripletPCopyRadixSort (GmSparseMatrixTripletData< T > *src, size_t n, int nlin, GmSparseMatrixTripletData< T > *aux, bool auxCopy, int nt=0, unsigned digitBits=0) |
Similar to GmUnsignedPCopyRadixSort() for Triplet data where nlin is the number of matrix lines (and columns). | |
template<GmSparseMatrixLayoutTypes T> | |
GmSparseMatrixTripletData< T > * | GmTripletPCopyRadixSort (GmSparseMatrixTripletData< T > *src, size_t n, int nlin, int nt=0, unsigned digitBits=0) |
Overload of GmTripletPCopyRadixSort() passing a NULL aux vector. | |
template<GmSparseMatrixLayoutTypes T> | |
void | GmTripletInplaceRadixSort (GmSparseMatrixTripletData< T > *src, size_t n) |
An inplace sort algorithm based on the SKA library for triplet data. | |
template GmSparseMatrixTripletData< GM_CSR_SPARSE_FORMAT > * | GmTripletCopyRadixSort< GM_CSR_SPARSE_FORMAT > (GmSparseMatrixTripletData< GM_CSR_SPARSE_FORMAT > *, size_t, int, GmSparseMatrixTripletData< GM_CSR_SPARSE_FORMAT > *, bool, unsigned) |
template GmSparseMatrixTripletData< GM_CSC_SPARSE_FORMAT > * | GmTripletCopyRadixSort< GM_CSC_SPARSE_FORMAT > (GmSparseMatrixTripletData< GM_CSC_SPARSE_FORMAT > *, size_t, int, GmSparseMatrixTripletData< GM_CSC_SPARSE_FORMAT > *, bool, unsigned) |
template GmSparseMatrixTripletData< GM_CSR_SPARSE_FORMAT > * | GmTripletPCopyRadixSort< GM_CSR_SPARSE_FORMAT > (GmSparseMatrixTripletData< GM_CSR_SPARSE_FORMAT > *, size_t, int, GmSparseMatrixTripletData< GM_CSR_SPARSE_FORMAT > *, bool, int, unsigned) |
template GmSparseMatrixTripletData< GM_CSC_SPARSE_FORMAT > * | GmTripletPCopyRadixSort< GM_CSC_SPARSE_FORMAT > (GmSparseMatrixTripletData< GM_CSC_SPARSE_FORMAT > *, size_t, int, GmSparseMatrixTripletData< GM_CSC_SPARSE_FORMAT > *, bool, int, unsigned) |
template void | GmTripletInplaceRadixSort< GM_CSR_SPARSE_FORMAT > (GmSparseMatrixTripletData< GM_CSR_SPARSE_FORMAT > *, size_t) |
template void | GmTripletInplaceRadixSort< GM_CSC_SPARSE_FORMAT > (GmSparseMatrixTripletData< GM_CSC_SPARSE_FORMAT > *, size_t) |
Definition of a set of functions for integer vector sorting of triplet data.
GMC_API_EXPORT DataT* GmLsdRadixSort | ( | DataT * | src, |
size_t | n, | ||
DataT * | aux, | ||
bool | auxCopy, | ||
unsigned | digitBits, | ||
unsigned | keyBits, | ||
unsigned | keyOffset | ||
) |
A serial, stable, Radix sort implementation based on the algorithm in "Introduction to Algorithms", Cormen et all.
This is a generic implementation that can be used for any vector data type where the sorting key is an unsigned number and is present as the FIRST field in the data type structure.
Sorting is done by running counting sort repeatedly for each key digit, starting from the least significant digit (LSD). Digit size is a function parameter. There is support for when the digit size does not evenly divides the key type size (ex: 32 bits key with 12 bits digit)
Another parameter specifies how many bits of the key must be sorted. This allows, for example, when sorting a vector with 32-bit keys and 8 bit digits, where the maximum key value is 16.000.000 (24 significant digits) to issue only 3 sorting passes instead of the usual 4.
This implementation is NOT in-place and needs a destination vector with the same size as the input one. It also uses stack space equal to 2^digitBits * sizeof(BucketT) for the counting buckets, making a digitBits of 16 bits its "practical" limit (64K * sizeof(BucketT))
This implementation also does NOT handle signed numbers. See comments in the countSort() function.
DataT | The type of the vector data |
KeyT | The type of the key used to sort the vector. Must be unsigned. |
BucketT | The unsigned type used for the count array. Must be big enough to fit the value of n. |
src | The vector to be sorted |
n | The vector size |
aux | Auxiliar vector with size n used by the sort routine. Can be NULL. In that case the vector will be allocated and freed internally. |
auxCopy | Flag used only if aux is different from NULL. If set to true, the sorted result will always end-up in src. If the number of sort passes is odd, a memcpy oeration will ensure that the result ends up in src. |
If set to false, no extra copy will be done and the result is either in src or in aux, as reported by the function return.
digitBits | The number of bits in the sorting digits. |
keyBits | The number of bits in the key that should be considered for sorting. |
keyOffset | The offset in bits for the start of the key. Usually 0. |
GMC_API_EXPORT DataT* GmPLsdRadixSort | ( | DataT * | src, |
size_t | n, | ||
DataT * | aux, | ||
bool | auxCopy, | ||
int | nt, | ||
unsigned | digitBits, | ||
unsigned | keyBits, | ||
unsigned | keyOffset | ||
) |
Simmilar to GmLsdRadixSort() but executing with nt threads.
The extra parameter nt controls the number of requested threads. A value of 0 (or -1 for compatibility with GmThreadManager) means that omp_get_max_threads() will be used.
|
inline |
A serial, stable, Radix sort implementation based on the algorithm in "Introduction to Algorithms", Cormen et all for sorting vectors of unsigned integers using a number of digit bits defined by the user.
This implementation is NOT in-place and needs a destination vector with the same size as the input one.
src | The vector to be sorted |
n | The vector size |
aux | Auxiliar vector with size n used by the sort routine. Can be NULL. In that case the vector will be allocated and freed internally. |
auxCopy | Flag used only if aux is different from NULL. If set to true, the sorted result will always end-up in src. If the number of sort passes is odd, a memcpy oeration will ensure that the result ends up in src. If set to false, no extra copy will be done and the result is either in src or in aux, as reported by the function return. |
max | The maximum value in src. If different from UINT_MAX, is used to limit the number of bits that need to be considered during the sort, possibly decreasing the number of sorting passes. |
digitBits | The number of bits in a digit. Controls the number of passes and the number of buckets used in the sorting. When set to zero, the number of digits is defined automatically by the function based on the maximum value. |
|
inline |
Similar to GmUnsignedCopyRadixSort() but running with nt multiple threads.
The extra parameter nt controls the number of requested threads. A value of 0 (or -1 for compatibility with GmThreadManager) means that omp_get_max_threads() will be used.