Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
08d0052
changes to enable better allocator support. this has been tested with…
kstppd Sep 6, 2024
22133a4
Merge branch 'master' of github.com:kstppd/hashinator into better_all…
kstppd Sep 6, 2024
6a5d4bc
use std allocator to demonstrate last commit's functionallity
kstppd Sep 6, 2024
ad05504
remove split host allocator since there is no need for it anymore
kstppd Sep 6, 2024
f2be8b3
fix hashinator ctpr
kstppd Sep 7, 2024
1ababbc
Add some more ctors for splitvectors and a host unit test with umpire
kstppd Sep 7, 2024
754aae0
more umpire tests
kstppd Sep 16, 2024
a53b4e9
update allocators in splittools
kstppd Sep 16, 2024
5b1e7e6
unit tests update
kstppd Sep 16, 2024
c2a0e96
Meson update to only build and test Umpire unit tests if Umpire is in…
kstppd Sep 28, 2024
3e09873
Implement some first changes at 38000ft
kstppd Oct 6, 2024
9cb9486
And update README
kstppd Oct 6, 2024
ff4d157
do not use offset of in non POD stuff
kstppd Dec 7, 2024
27789a6
Use optional to fix compiler warning us for no return in function
kstppd Dec 8, 2024
714d57d
Update README.md
kstppd Dec 8, 2024
12c58fa
Add memtest unit
kstppd Feb 9, 2025
8170005
Change splitvector's swap so that is it does not check for swapping w…
kstppd Feb 10, 2025
06e7013
finally an update
kstppd Apr 4, 2025
12f1a67
comment added
kstppd Apr 6, 2025
85a61a7
revert back to previous behavior for device buckets
kstppd Apr 6, 2025
3130b6b
some fixes and less restrictiv split vector contructors
kstppd Apr 8, 2025
60d65cc
some fixes and less restrictiv split vector contructors
kstppd Apr 8, 2025
3e69d91
fix
kstppd Apr 8, 2025
a2409ed
Revert "fix"
kstppd Apr 8, 2025
a108a65
tRevert "some fixes and less restrictiv split vector contructors"
kstppd Apr 8, 2025
aa9922b
Revert "some fixes and less restrictiv split vector contructors"
kstppd Apr 8, 2025
2361d04
fix
kstppd Apr 8, 2025
93653e9
relax is_trivial
kstppd Apr 8, 2025
2cdd964
Revert "Use optional to fix compiler warning us for no return in func…
kstppd Apr 12, 2025
b13a279
remove duplicate include
kstppd Apr 12, 2025
65d3dfa
comment update
kstppd Apr 12, 2025
1448978
Merge remote-tracking branch 'fmihpc/master' into better_allocator_su…
kstppd Apr 12, 2025
1a24b37
avoid a small git wreck
kstppd Apr 12, 2025
4290365
memcpy fix
kstppd Apr 12, 2025
fd4c004
use library in meson for umpire detection
kstppd Jun 6, 2025
c6d374c
update tests for Umpire compatibillity
kstppd Jun 10, 2025
6d63fcf
off with device buckets and the offset of thingy
kstppd Jun 11, 2025
60b87c6
test coarse mem
kstppd Jun 13, 2025
218dcc7
update bench script
kstppd Jun 13, 2025
b25c150
update bench script and allow coarse grain memory when running on AMD hw
kstppd Jun 13, 2025
a3d3d7f
Oops missing main type
kstppd Aug 14, 2025
dfd59c5
Oops missing main type
kstppd Aug 14, 2025
fe7b238
Merge branch 'master' into better_allocator_support
kstppd Jun 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 103 additions & 39 deletions include/hashinator/hashinator.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,30 @@ using DefaultMetaAllocator = split::split_host_allocator<T>;
#endif

using MapInfo = Hashinator::Info;
template <typename KEY_TYPE, typename VAL_TYPE, KEY_TYPE EMPTYBUCKET = std::numeric_limits<KEY_TYPE>::max(),
KEY_TYPE TOMBSTONE = EMPTYBUCKET - 1, class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>,
class DeviceHasher = DefaultHasher, class Meta_Allocator = DefaultMetaAllocator<MapInfo>>
template <typename KEY_TYPE, typename VAL_TYPE, class Allocator = DefaultMetaAllocator<hash_pair<KEY_TYPE,VAL_TYPE>>,
Comment thread
kstppd marked this conversation as resolved.
Outdated
KEY_TYPE EMPTYBUCKET = std::numeric_limits<KEY_TYPE>::max(),KEY_TYPE TOMBSTONE = EMPTYBUCKET - 1,
class HashFunction = HashFunctions::Fibonacci<KEY_TYPE>,class DeviceHasher = DefaultHasher>
class Hashmap {

private:

constexpr size_t get_number_of_Ts_for_Map_Info()const noexcept{
constexpr size_t size_of_T=sizeof(hash_pair<KEY_TYPE,VAL_TYPE>);
constexpr size_t size_of_info=sizeof(MapInfo);
if constexpr (size_of_T>size_of_info){
return 1;
}
return size_of_info/size_of_T;
Comment thread
markusbattarbee marked this conversation as resolved.
Outdated
}

// CUDA device handle
Hashmap* device_map;
split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>>* device_buckets;
split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>,Allocator>* device_buckets;
//~CUDA device handle

// Host members
split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>> buckets;
Meta_Allocator _metaAllocator; // Allocator used to allocate and deallocate memory for metadata
split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>,Allocator> buckets;
Allocator _allocator; // Allocator used to allocate and deallocate memory for metadata
MapInfo* _mapInfo;
//~Host members

Expand All @@ -85,7 +95,7 @@ class Hashmap {
void preallocate_device_handles() {
#ifndef HASHINATOR_CPU_ONLY_MODE
SPLIT_CHECK_ERR(split_gpuMalloc((void**)&device_map, sizeof(Hashmap)));
device_buckets = reinterpret_cast<split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>>*>(
device_buckets = reinterpret_cast<split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>,Allocator>*>(
reinterpret_cast<char*>(device_map) + offsetof(Hashmap, buckets));
#endif
}
Expand All @@ -106,39 +116,61 @@ class Hashmap {
inline void set_status(status code) noexcept { _mapInfo->err = code; }

public:
Hashmap() {
Hashmap():_allocator(Allocator{}) {
preallocate_device_handles();
_mapInfo = _metaAllocator.allocate(1);
_mapInfo = reinterpret_cast<MapInfo*>(_allocator.allocate(get_number_of_Ts_for_Map_Info()));
*_mapInfo = MapInfo(5);
buckets = split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>,Allocator>(
1 << _mapInfo->sizePower, hash_pair<KEY_TYPE, VAL_TYPE>(EMPTYBUCKET, VAL_TYPE()),_allocator);
#ifndef HASHINATOR_CPU_ONLY_MODE
SPLIT_CHECK_ERR(split_gpuMemcpy(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice));
#endif
};

Hashmap(const Allocator& allocator):buckets(split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>,Allocator>(
1 << 5, hash_pair<KEY_TYPE, VAL_TYPE>(EMPTYBUCKET, VAL_TYPE()),_allocator)),_allocator(allocator){
Comment thread
markusbattarbee marked this conversation as resolved.
Outdated
preallocate_device_handles();
_mapInfo = reinterpret_cast<MapInfo*>(_allocator.allocate(get_number_of_Ts_for_Map_Info()));
*_mapInfo = MapInfo(5);
buckets = split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>>(
1 << _mapInfo->sizePower, hash_pair<KEY_TYPE, VAL_TYPE>(EMPTYBUCKET, VAL_TYPE()));
#ifndef HASHINATOR_CPU_ONLY_MODE
SPLIT_CHECK_ERR(split_gpuMemcpy(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice));
#endif
};

Hashmap(int sizepower) {
Hashmap(int sizepower):_allocator(Allocator{}) {
preallocate_device_handles();
_mapInfo = _metaAllocator.allocate(1);
_mapInfo = reinterpret_cast<MapInfo*>(_allocator.allocate(get_number_of_Ts_for_Map_Info()));
*_mapInfo = MapInfo(sizepower);
buckets = split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>>(
Comment thread
markusbattarbee marked this conversation as resolved.
buckets = split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>,Allocator>(
1 << _mapInfo->sizePower, hash_pair<KEY_TYPE, VAL_TYPE>(EMPTYBUCKET, VAL_TYPE()));
Comment thread
markusbattarbee marked this conversation as resolved.
Outdated
#ifndef HASHINATOR_CPU_ONLY_MODE
SPLIT_CHECK_ERR(split_gpuMemcpy(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice));
#endif
};

Hashmap(int sizepower,const Allocator& allocator):buckets(split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>,Allocator>(
1 << sizepower, hash_pair<KEY_TYPE, VAL_TYPE>(EMPTYBUCKET, VAL_TYPE()),allocator)),_allocator(allocator){
preallocate_device_handles();
_mapInfo = reinterpret_cast<MapInfo*>(_allocator.allocate(get_number_of_Ts_for_Map_Info()));
*_mapInfo = MapInfo(sizepower);
#ifndef HASHINATOR_CPU_ONLY_MODE
SPLIT_CHECK_ERR(split_gpuMemcpy(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice));
#endif
};

Hashmap(const Hashmap<KEY_TYPE, VAL_TYPE>& other) {
Hashmap(const Hashmap<KEY_TYPE, VAL_TYPE,Allocator>& other) {
_allocator=other._allocator;
preallocate_device_handles();
_mapInfo = _metaAllocator.allocate(1);
_mapInfo = reinterpret_cast<MapInfo*>(_allocator.allocate(get_number_of_Ts_for_Map_Info()));
*_mapInfo = *(other._mapInfo);
buckets = other.buckets;
#ifndef HASHINATOR_CPU_ONLY_MODE
SPLIT_CHECK_ERR(split_gpuMemcpy(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice));
#endif
};

Hashmap(Hashmap<KEY_TYPE, VAL_TYPE>&& other) {

Hashmap(Hashmap<KEY_TYPE, VAL_TYPE,Allocator>&& other) {
_allocator=other._allocator;
preallocate_device_handles();
_mapInfo = other._mapInfo;
other._mapInfo = nullptr;
Expand All @@ -148,7 +180,7 @@ class Hashmap {
#endif
};

Hashmap& operator=(const Hashmap<KEY_TYPE, VAL_TYPE>& other) {
Hashmap& operator=(const Hashmap<KEY_TYPE, VAL_TYPE,Allocator>& other) {
Comment thread
kstppd marked this conversation as resolved.
if (this == &other) {
return *this;
}
Expand All @@ -162,7 +194,7 @@ class Hashmap {

#ifndef HASHINATOR_CPU_ONLY_MODE
/** Copy assign but using a provided stream */
void overwrite(const Hashmap<KEY_TYPE, VAL_TYPE>& other, split_gpuStream_t stream = 0) {
void overwrite(const Hashmap<KEY_TYPE, VAL_TYPE,Allocator>& other, split_gpuStream_t stream = 0) {
if (this == &other) {
return;
}
Expand All @@ -174,11 +206,11 @@ class Hashmap {
}
#endif

Hashmap& operator=(Hashmap<KEY_TYPE, VAL_TYPE>&& other) {
Hashmap& operator=(Hashmap<KEY_TYPE, VAL_TYPE,Allocator>&& other) {
if (this == &other) {
return *this;
}
_metaAllocator.deallocate(_mapInfo, 1);
_allocator.deallocate(reinterpret_cast<hash_pair<KEY_TYPE,VAL_TYPE>*>(_mapInfo), get_number_of_Ts_for_Map_Info());
_mapInfo = other._mapInfo;
other._mapInfo = nullptr;
buckets = std::move(other.buckets);
Expand All @@ -190,7 +222,7 @@ class Hashmap {

~Hashmap() {
deallocate_device_handles();
_metaAllocator.deallocate(_mapInfo, 1);
_allocator.deallocate(reinterpret_cast<hash_pair<KEY_TYPE,VAL_TYPE>*>(_mapInfo), get_number_of_Ts_for_Map_Info());
};

#ifdef HASHINATOR_CPU_ONLY_MODE
Expand Down Expand Up @@ -237,8 +269,8 @@ class Hashmap {
if (newSizePower > 32) {
throw std::out_of_range("Hashmap ran into rehashing catastrophe and exceeded 32bit buckets.");
}
split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>> newBuckets(
1 << newSizePower, hash_pair<KEY_TYPE, VAL_TYPE>(EMPTYBUCKET, VAL_TYPE()));
split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>,Allocator> newBuckets(
1 << newSizePower, hash_pair<KEY_TYPE, VAL_TYPE>(EMPTYBUCKET, VAL_TYPE()),_allocator);
_mapInfo->sizePower = newSizePower;
int bitMask = (1 << _mapInfo->sizePower) - 1; // For efficient modulo of the array size

Expand Down Expand Up @@ -322,8 +354,8 @@ class Hashmap {
// DeviceHasher::reset_all(buckets.data(),_mapInfo, buckets.size(), s);
Comment thread
kstppd marked this conversation as resolved.
} else {
// Need new buckets
buckets = std::move(split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>>(
1 << newSizePower, hash_pair<KEY_TYPE, VAL_TYPE>(EMPTYBUCKET, VAL_TYPE())));
buckets = std::move(split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>,Allocator>(
1 << newSizePower, hash_pair<KEY_TYPE, VAL_TYPE>(EMPTYBUCKET, VAL_TYPE()),_allocator));
SPLIT_CHECK_ERR(split_gpuMemcpyAsync(device_map, this, sizeof(Hashmap), split_gpuMemcpyHostToDevice, s));
optimizeGPU(s);
}
Expand Down Expand Up @@ -483,7 +515,7 @@ class Hashmap {

#ifdef HASHINATOR_CPU_ONLY_MODE
void clear() {
buckets = split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>>(1 << _mapInfo->sizePower, {EMPTYBUCKET, VAL_TYPE()});
buckets = split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>,Allocator>(1 << _mapInfo->sizePower, {EMPTYBUCKET, VAL_TYPE()});
Comment thread
markusbattarbee marked this conversation as resolved.
Outdated
*_mapInfo = MapInfo(_mapInfo->sizePower);
return;
}
Expand All @@ -493,7 +525,7 @@ class Hashmap {
switch (t) {
case targets::host:
buckets =
split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>>(1 << _mapInfo->sizePower, {EMPTYBUCKET, VAL_TYPE()});
split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>,Allocator>(1 << _mapInfo->sizePower, {EMPTYBUCKET, VAL_TYPE()},_allocator);
*_mapInfo = MapInfo(_mapInfo->sizePower);
break;

Expand Down Expand Up @@ -671,11 +703,11 @@ class Hashmap {

// Iterator type. Iterates through all non-empty buckets.
class iterator {
Hashmap<KEY_TYPE, VAL_TYPE>* hashtable;
Hashmap<KEY_TYPE, VAL_TYPE,Allocator>* hashtable;
size_t index;

public:
iterator(Hashmap<KEY_TYPE, VAL_TYPE>& hashtable, size_t index) : hashtable(&hashtable), index(index) {}
iterator(Hashmap<KEY_TYPE, VAL_TYPE,Allocator>& hashtable, size_t index) : hashtable(&hashtable), index(index) {}

iterator& operator++() {
index++;
Expand Down Expand Up @@ -706,11 +738,11 @@ class Hashmap {

// Const iterator.
class const_iterator {
const Hashmap<KEY_TYPE, VAL_TYPE>* hashtable;
const Hashmap<KEY_TYPE, VAL_TYPE,Allocator>* hashtable;
size_t index;

public:
explicit const_iterator(const Hashmap<KEY_TYPE, VAL_TYPE>& hashtable, size_t index)
explicit const_iterator(const Hashmap<KEY_TYPE, VAL_TYPE,Allocator>& hashtable, size_t index)
: hashtable(&hashtable), index(index) {}
const_iterator& operator++() {
index++;
Expand Down Expand Up @@ -1152,7 +1184,7 @@ class Hashmap {
* hmap.extractPattern(elements,Rule<uint32_t,uint32_t>());
* */
template <bool prefetches = true, typename Rule>
size_t extractPattern(split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>>& elements, Rule rule,
size_t extractPattern(split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>,Allocator>& elements, Rule rule,
split_gpuStream_t s = 0) {
elements.resize(_mapInfo->fill + 1, true);
if constexpr (prefetches) {
Expand All @@ -1179,18 +1211,18 @@ class Hashmap {
const size_t memory_for_pool = 8 * nBlocks * sizeof(uint32_t);
split::tools::splitStackArena mPool(memory_for_pool, s);
size_t retval =
split::tools::copy_if_raw<hash_pair<KEY_TYPE, VAL_TYPE>, Rule, defaults::MAX_BLOCKSIZE, defaults::WARPSIZE>(
split::tools::copy_if_raw<hash_pair<KEY_TYPE, VAL_TYPE>,Allocator, Rule, defaults::MAX_BLOCKSIZE, defaults::WARPSIZE>(
buckets, elements, rule, nBlocks, mPool, s);
return retval;
}
template <typename Rule>
void extractPatternLoop(split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>>& elements, Rule rule,
void extractPatternLoop(split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>,Allocator>& elements, Rule rule,
split_gpuStream_t s = 0) {
// Extract elements matching the Pattern Rule(element)==true;
split::tools::copy_if_loop<hash_pair<KEY_TYPE, VAL_TYPE>, Rule, defaults::MAX_BLOCKSIZE, defaults::WARPSIZE>(
*device_buckets, elements, rule, s);
}
void extractLoop(split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>>& elements, split_gpuStream_t s = 0) {
void extractLoop(split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>,Allocator>& elements, split_gpuStream_t s = 0) {
// Extract all valid elements
auto rule = [] __host__ __device__(const hash_pair<KEY_TYPE, VAL_TYPE>& kval) -> bool {
return kval.first != EMPTYBUCKET && kval.first != TOMBSTONE;
Expand All @@ -1214,6 +1246,24 @@ class Hashmap {
}
return elements.size();
}

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the old versions of extractKeysByPattern on lines 1233 and 1267 still needed now that the KeyAlloc versions exist?

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

up - what I mean is that with Allocator support, will the old versions (which do not template allocators) ever be called anymore? Or could they be safely deleted?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope those methods should stay in my opinion. They are currently used in the unit tests. Now the new methods will only be called if the allocator is provided in the template argument list

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a lot of code duplication :( Couldn't you put the default allocator as the default argument to this templated function? (same thing as in split_tools.h line 104)

template <typename KeyAlloc,bool prefetches = true, typename Rule>
size_t extractKeysByPattern(split::SplitVector<KEY_TYPE,KeyAlloc>& elements, Rule rule, split_gpuStream_t s = 0) {
elements.resize(_mapInfo->fill + 1, true);
if constexpr (prefetches) {
elements.optimizeGPU(s);
}
// Extract element **keys** matching the Pattern Rule(element)==true;
split::tools::copy_keys_if<hash_pair<KEY_TYPE, VAL_TYPE>, KEY_TYPE,Allocator,KeyAlloc, Rule,defaults::MAX_BLOCKSIZE,
defaults::WARPSIZE>(buckets, elements, rule, s);
// FIXME: there is an issue where paging to host occurs and following calls to hashmap operations take a hit.
// temp fix: call optimizeGPU() here
if constexpr (prefetches) {
optimizeGPU(s);
}
return elements.size();
}

template <bool prefetches = true, typename Rule>
size_t extractKeysByPattern(split::SplitVector<KEY_TYPE>& elements, Rule rule, void* stack, size_t max_size,
split_gpuStream_t s = 0) {
Expand All @@ -1226,6 +1276,20 @@ class Hashmap {
defaults::WARPSIZE>(buckets, elements, rule, stack, max_size, s);
return elements.size();
}

template <typename KeyAlloc, bool prefetches = true, typename Rule>
size_t extractKeysByPattern(split::SplitVector<KEY_TYPE,KeyAlloc>& elements, Rule rule, void* stack, size_t max_size,
split_gpuStream_t s = 0) {
elements.resize(_mapInfo->fill + 1, true);
if constexpr (prefetches) {
elements.optimizeGPU(s);
}
// Extract element **keys** matching the Pattern Rule(element)==true;
split::tools::copy_keys_if<hash_pair<KEY_TYPE, VAL_TYPE>, KEY_TYPE,Allocator,KeyAlloc, Rule, defaults::MAX_BLOCKSIZE,
defaults::WARPSIZE>(buckets, elements, rule, stack, max_size, s);
return elements.size();
}

Comment thread
markusbattarbee marked this conversation as resolved.
template <typename Rule>
void extractKeysByPatternLoop(split::SplitVector<KEY_TYPE>& elements, Rule rule, split_gpuStream_t s = 0) {
// Extract element **keys** matching the Pattern Rule(element)==true;
Expand Down Expand Up @@ -1470,11 +1534,11 @@ class Hashmap {
class device_iterator {
private:
size_t index;
Hashmap<KEY_TYPE, VAL_TYPE>* hashtable;
Hashmap<KEY_TYPE, VAL_TYPE,Allocator>* hashtable;

public:
HASHINATOR_DEVICEONLY
device_iterator(Hashmap<KEY_TYPE, VAL_TYPE>& hashtable, size_t index) : index(index), hashtable(&hashtable) {}
device_iterator(Hashmap<KEY_TYPE, VAL_TYPE,Allocator>& hashtable, size_t index) : index(index), hashtable(&hashtable) {}

HASHINATOR_DEVICEONLY
size_t getIndex() { return index; }
Expand Down
Loading