JaffarPlus
High-performance best-first search optimizer for tool-assisted speedruns
Loading...
Searching...
No Matches
hashDb.hpp
Go to the documentation of this file.
1#pragma once
2
9#include "numa.hpp"
10#include <atomic>
11#include <jaffarCommon/concurrent.hpp>
12#include <jaffarCommon/hash.hpp>
13#include <jaffarCommon/json.hpp>
14#include <jaffarCommon/logger.hpp>
15#include <jaffarCommon/parallel.hpp>
16
17namespace jaffarPlus
18{
19
32class HashDb final
33{
34public:
41 HashDb(const nlohmann::json& config)
42 {
43 _maxStoreCount = jaffarCommon::json::getNumber<size_t>(config, "Max Store Count");
44 _maxStoreSizeMb = jaffarCommon::json::getNumber<double>(config, "Max Store Size (Mb)");
45
46 // For testing purposes, the maximum size can be overriden by environment variables
47 if (auto* value = std::getenv("JAFFAR_ENGINE_OVERRIDE_MAX_HASHDB_SIZE_MB")) _maxStoreSizeMb = std::stoul(value);
48 }
49
52 __INLINE__ size_t getMaxBudgetBytes() const { return (size_t)(_maxStoreSizeMb * 1024.0 * 1024.0) * _maxStoreCount; }
53
56 {
57 for (auto* s : _l1) delete s;
58 if (_l2 != nullptr) delete _l2;
59 }
60
69 __INLINE__ void initialize()
70 {
71 // The configured "Max Store Size" is the ABSOLUTE per-generation budget for the whole hash DB:
72 // the global L2 plus all per-domain L1 caches combined never exceed it (so the total footprint is
73 // Max Store Size x Max Store Count, independent of NUMA layout -- the tiering does not inflate it).
74 // Rolling is driven by each store's actual measured memory (see getStoreSizeBytes).
75 const size_t totalBudgetBytes = (size_t)(_maxStoreSizeMb * 1024.0 * 1024.0);
76
77 if (_numaCount > 1)
78 {
79 // Multi-domain: split the budget so L2 + all L1 == the configured budget. The global L2 gets 3/4
80 // and the per-domain L1 caches share the remaining 1/4. L2 holds the complete dedup set, which
81 // benefits directly from more capacity (fewer rolled-out / re-explored states). The L1s only
82 // absorb within-domain repeats, whose working set is strongly biased toward spatiotemporally
83 // close states -- so a smaller cache still captures most of the local hit rate. Hence the 3:1
84 // weighting toward L2.
85 _maxStoreSizeBytes = (totalBudgetBytes * 3) / 4;
86 _l1MaxStoreSizeBytes = (totalBudgetBytes / 4) / (size_t)_numaCount;
87 _l1.resize(_numaCount);
88 for (int i = 0; i < _numaCount; i++) _l1[i] = makeStore();
89 }
90 else
91 {
92 // Single domain: no L1 tier, so the one global store gets the whole budget.
93 _maxStoreSizeBytes = totalBudgetBytes;
94 }
95
96 // The single global authoritative store (holds the complete dedup set)
97 _l2 = makeStore();
98 }
99
109 void printInfo() const
110 {
111 constexpr double toMb = 1.0 / (1024.0 * 1024.0);
112
113 // Sum a store's measured resident memory across its rolling generations
114 auto storeSizeMb = [this](const NUMAStore_t* s)
115 {
116 size_t bytes = 0;
117 for (const auto& h : s->_hashStores) bytes += getStoreSizeBytes(h);
118 return (double)bytes * toMb;
119 };
120
121 // Print the per-generation breakdown of a rolling store (newest first). Exposes each store's Id
122 // and Age, so window rolling is observable (an Id > 0 means a new store was created), plus its
123 // entries, size, and check/collision counts (current store from the live per-thread counters,
124 // archived stores from their snapshot taken at roll time).
125 auto printStores = [this, toMb, &storeSizeMb](const NUMAStore_t* s, const char* label)
126 {
127 size_t idx = 0;
128 for (auto it = s->_hashStores.rbegin(); it != s->_hashStores.rend(); ++it, ++idx)
129 {
130 const bool cur = (it == s->_hashStores.rbegin());
131 const size_t q = cur ? s->getQueryCount() : it->queryCount;
132 const size_t c = cur ? s->getCollisionCount() : it->collisionCount;
133 jaffarCommon::logger::log("[J+] + %sStore %lu/%lu - Id: %lu - Age: %lu, Entries: %.3f M, Size: %.1f Mb, Check Count: %lu, Collision Count: %lu (Rate %.3f%%)\n", label,
134 idx + 1, _maxStoreCount, it->id, it->age, (double)it->hashSet.size() * toMb, (double)getStoreSizeBytes(*it) * toMb, q, c,
135 q == 0 ? 0.0 : 100.0 * (double)c / (double)q);
136 }
137 };
138
139 // Single-domain (no L1 tier): report L2 as a single store, with its rolling-window breakdown
140 if (_l1.empty())
141 {
142 jaffarCommon::logger::log("[J+] Single-Store Dedup (single NUMA domain), budget %.1f Mb:\n", (double)(_maxStoreSizeBytes * _maxStoreCount) * toMb);
143 printStores(_l2, "");
144 return;
145 }
146
147 // Two-tier: aggregate L1 across domains + the global L2
148 size_t l1q = 0, l1c = 0, l1entries = 0;
149 double l1SizeMb = 0.0;
150 // Use LIFETIME (since-start) totals for the aggregate ratios so L1 and L2 are compared over the
151 // same whole-run window. The per-store getQueryCount()/getCollisionCount() are reset on every roll
152 // and the tiers roll at very different rates (L1 budget is a small fraction of L2's, so L1 rolls
153 // far more often) -- comparing those current-window counts made l2q/l1q and the dup rate exceed
154 // 100%. The lifetime totals make every ratio below a true fraction bounded by 100%.
155 for (auto* const g : _l1)
156 {
157 l1q += g->getTotalQueryCount();
158 l1c += g->getTotalCollisionCount();
159 for (const auto& s : g->_hashStores) l1entries += s.hashSet.size();
160 l1SizeMb += storeSizeMb(g);
161 }
162 const size_t l2q = _l2->getTotalQueryCount();
163 const size_t l2c = _l2->getTotalCollisionCount();
164 size_t l2entries = 0;
165 for (const auto& s : _l2->_hashStores) l2entries += s.hashSet.size();
166
167 // Budgets: each L1 rolls at _l1MaxStoreSizeBytes and keeps up to _maxStoreCount generations
168 // (times numaCount domains); L2 rolls at _maxStoreSizeBytes. The two budgets sum to the
169 // configured Max Store Size x Max Store Count (3/4 to L2, 1/4 shared across the L1 caches).
170 const double l1BudgetMb = (double)(_l1MaxStoreSizeBytes * _maxStoreCount * (size_t)_numaCount) * toMb;
171 const double l2BudgetMb = (double)(_maxStoreSizeBytes * _maxStoreCount) * toMb;
172
173 jaffarCommon::logger::log("[J+] Two-Tier Dedup (%d NUMA-local L1 + global L2):\n", _numaCount);
174 jaffarCommon::logger::log("[J+] + L1 (local): Checks: %lu, Local Hits: %lu (%.3f%%), Entries: %.3f M, Size: %.1f / %.1f Mb\n", l1q, l1c,
175 l1q == 0 ? 0.0 : 100.0 * (double)l1c / (double)l1q, (double)l1entries * toMb, l1SizeMb, l1BudgetMb);
176 jaffarCommon::logger::log(
177 "[J+] + L2 (global): Checks (L1 misses): %lu (%.3f%% of all), Cross-Domain Hits: %lu, Entries: %.3f M, Size: %.1f / %.1f Mb\n", l2q,
178 l1q == 0 ? 0.0 : 100.0 * (double)l2q / (double)l1q, l2c, (double)l2entries * toMb, storeSizeMb(_l2), l2BudgetMb);
179 jaffarCommon::logger::log("[J+] + Served locally (no L2 access): %.3f%% | Total dup rate: %.3f%%\n", l1q == 0 ? 0.0 : 100.0 * (double)l1c / (double)l1q,
180 l1q == 0 ? 0.0 : 100.0 * (double)(l1c + l2c) / (double)l1q);
181 // L2 rolling-window breakdown (Id/Age make window rolling observable)
182 printStores(_l2, "L2 ");
183 }
184
195 __INLINE__ bool checkHashExists(const jaffarCommon::hash::hash_t hash)
196 {
197 const auto threadId = jaffarCommon::parallel::getThreadId();
198
199 // Single-domain: just the one local store
200 if (_l1.empty())
201 {
202 _l2->_threadQueryCount[threadId].value++;
203 const bool collision = probeStore(_l2, hash);
204 if (collision) _l2->_threadCollisionCount[threadId].value++;
205 return collision;
206 }
207
208 // L1: this domain's local store. By construction every hash in any L1 was also inserted into L2,
209 // so an L1 hit is a genuine duplicate -- answered locally, with no L2 (possibly remote) access.
210 // probeStore() also inserts the hash into L1's current store (caching it locally).
211 auto* const l1 = _l1[_preferredNumaDomain];
212 l1->_threadQueryCount[threadId].value++;
213 if (probeStore(l1, hash))
214 {
215 l1->_threadCollisionCount[threadId].value++;
216 return true;
217 }
218
219 // L1 miss -> consult the authoritative global L2. probeStore() inserts the hash into L2's current
220 // store, making it globally visible (so other domains detect it as a duplicate). This is what
221 // preserves complete, global dedup: every globally-new hash is registered in L2 exactly once, by
222 // whichever domain first encounters it.
223 _l2->_threadQueryCount[threadId].value++;
224 const bool collision = probeStore(_l2, hash);
225 if (collision) _l2->_threadCollisionCount[threadId].value++;
226 return collision;
227 }
228
236 __INLINE__ void insertHash(const jaffarCommon::hash::hash_t hash)
237 {
238 if (_l1.empty() == false) _l1[_preferredNumaDomain]->_hashStores.rbegin()->hashSet.insert(hash);
239 _l2->_hashStores.rbegin()->hashSet.insert(hash);
240 }
241
249 __INLINE__ void advanceStep()
250 {
251 for (auto* const g : _l1) rollStore(g, _l1MaxStoreSizeBytes);
253 }
254
255private:
264 {
265 const size_t id;
266
267 const size_t age;
268
269 size_t queryCount;
270
272
273 jaffarCommon::concurrent::HashSet_t<jaffarCommon::hash::hash_t> hashSet = {};
274 };
275
284 struct alignas(64) paddedCounter_t
285 {
286 size_t value = 0;
287 };
288
294 {
296
301 size_t _currentAge = 0;
302
309 std::deque<hashStore_t> _hashStores;
310
311 std::vector<paddedCounter_t> _threadQueryCount;
312
313 std::vector<paddedCounter_t> _threadCollisionCount;
314
320
321 // Aggregating helpers (called only at reporting / store-rollover time, never in the hot path)
322
327 __INLINE__ size_t getQueryCount() const
328 {
329 size_t total = 0;
330 for (const auto& c : _threadQueryCount) total += c.value;
331 return total;
332 }
337 __INLINE__ size_t getCollisionCount() const
338 {
339 size_t total = 0;
340 for (const auto& c : _threadCollisionCount) total += c.value;
341 return total;
342 }
344 __INLINE__ void resetCounts()
345 {
346 for (auto& c : _threadQueryCount) c.value = 0;
347 for (auto& c : _threadCollisionCount) c.value = 0;
348 }
351 __INLINE__ size_t getTotalQueryCount() const { return _lifetimeQueryCount + getQueryCount(); }
354 __INLINE__ size_t getTotalCollisionCount() const { return _lifetimeCollisionCount + getCollisionCount(); }
355 };
356
361 __INLINE__ NUMAStore_t* makeStore()
362 {
363 auto* s = new NUMAStore_t;
364 s->_hashStores.push_back(hashStore_t({.id = s->_currentHashStoreId++, .age = s->_currentAge, .queryCount = 0, .collisionCount = 0}));
365 s->_threadQueryCount.resize(_threadCount);
366 s->_threadCollisionCount.resize(_threadCount);
367 return s;
368 }
369
380 __INLINE__ bool probeStore(NUMAStore_t* const store, const jaffarCommon::hash::hash_t hash)
381 {
382 auto itr = store->_hashStores.rbegin();
383 size_t curHashStoreIdx = 0;
384 while (itr != store->_hashStores.rend())
385 {
386 bool collisionFound = false;
387 if (curHashStoreIdx == 0)
388 collisionFound = itr->hashSet.insert(hash).second == false;
389 else
390 collisionFound = itr->hashSet.contains(hash);
391 if (collisionFound == true) return true;
392 itr++;
393 curHashStoreIdx++;
394 }
395 return false;
396 }
397
408 __INLINE__ void rollStore(NUMAStore_t* const g, const size_t maxBytes)
409 {
410 auto& currentHashStore = *g->_hashStores.rbegin();
411
412 if (getStoreSizeBytes(currentHashStore) >= maxBytes)
413 {
414 // Snapshot the (summed per-thread) counters into the current store before any structural
415 // change, then reset. This must precede pop_front(): when maxStoreCount == 1 the front store
416 // IS the current (back) store, so popping it first would leave currentHashStore dangling.
417 currentHashStore.queryCount = g->getQueryCount();
418 currentHashStore.collisionCount = g->getCollisionCount();
419 // Fold this window's counts into the lifetime accumulators BEFORE the reset, so the since-start
420 // totals survive both the reset and the eventual discard (pop_front) of this generation.
421 g->_lifetimeQueryCount += currentHashStore.queryCount;
422 g->_lifetimeCollisionCount += currentHashStore.collisionCount;
423 g->resetCounts();
424
425 // If we already reached the maximum hash stores, discard the oldest one
426 if (g->_hashStores.size() == _maxStoreCount) g->_hashStores.pop_front();
427
428 // Now create the new one, by pushing it from the back
429 g->_hashStores.push_back(hashStore_t({.id = g->_currentHashStoreId++, .age = g->_currentAge, .queryCount = 0, .collisionCount = 0}));
430 }
431
432 // Increasing age
433 g->_currentAge++;
434 }
435
446 __INLINE__ size_t getStoreSizeBytes(const hashStore_t& store) const { return store.hashSet.capacity() * _bytesPerSlot + _hashStoreFixedOverhead; }
447
453 static constexpr size_t _bytesPerSlot = sizeof(jaffarCommon::hash::hash_t) + sizeof(uint8_t);
454
461 static constexpr size_t _hashStoreFixedOverhead = sizeof(jaffarCommon::concurrent::HashSet_t<jaffarCommon::hash::hash_t>);
462
464
466
468
470
471 std::vector<NUMAStore_t*> _l1;
472
473 NUMAStore_t* _l2 = nullptr;
474};
475
476} // namespace jaffarPlus
Two-tier hash database that deduplicates visited states across the whole search.
Definition hashDb.hpp:33
void printInfo() const
Logs the database's current state, including per-generation breakdowns of the rolling stores and aggr...
Definition hashDb.hpp:109
void advanceStep()
Signals that a new search step has started, rolling and ageing every store.
Definition hashDb.hpp:249
~HashDb()
Destroys the database, deleting all per-domain L1 stores and the global L2 store.
Definition hashDb.hpp:55
size_t getStoreSizeBytes(const hashStore_t &store) const
Computes the actual resident memory of a hash store generation from the underlying set's measured all...
Definition hashDb.hpp:446
std::vector< NUMAStore_t * > _l1
Per-domain NUMA-local L1 caches. Empty on a single-domain run.
Definition hashDb.hpp:471
bool probeStore(NUMAStore_t *const store, const jaffarCommon::hash::hash_t hash)
Reverse-scans a store's hash sets (newest first), inserting into the current set and checking-only th...
Definition hashDb.hpp:380
NUMAStore_t * makeStore()
Creates a fresh store with a single (empty) hash set generation and per-thread counters.
Definition hashDb.hpp:361
size_t _maxStoreSizeBytes
Maximum global L2 store size, in bytes (derived from _maxStoreSizeMb).
Definition hashDb.hpp:467
bool checkHashExists(const jaffarCommon::hash::hash_t hash)
Checks whether the given hash is already present in the database, inserting it if new.
Definition hashDb.hpp:195
size_t _l1MaxStoreSizeBytes
Per-domain L1 store budget, in bytes (derived: global budget / number of NUMA domains).
Definition hashDb.hpp:469
void initialize()
Allocates the hash stores and splits the configured budget across the L1 and L2 tiers.
Definition hashDb.hpp:69
size_t getMaxBudgetBytes() const
Configured PEAK hash-DB footprint in bytes = Max Store Size x Max Store Count (the rolling generation...
Definition hashDb.hpp:52
size_t _maxStoreCount
Maximum number of store generations to keep at any time.
Definition hashDb.hpp:463
static constexpr size_t _bytesPerSlot
Per-slot memory cost of the underlying phmap flat hash set.
Definition hashDb.hpp:453
void rollStore(NUMAStore_t *const g, const size_t maxBytes)
Rolls a single store and advances its age.
Definition hashDb.hpp:408
static constexpr size_t _hashStoreFixedOverhead
Fixed overhead of a hash store, independent of the number of entries.
Definition hashDb.hpp:461
HashDb(const nlohmann::json &config)
Constructs the hash database from its configuration block.
Definition hashDb.hpp:41
double _maxStoreSizeMb
Maximum (global L2) store size, in megabytes.
Definition hashDb.hpp:465
NUMAStore_t * _l2
The single global authoritative store (holds the complete dedup set).
Definition hashDb.hpp:473
void insertHash(const jaffarCommon::hash::hash_t hash)
Inserts a hash into the current (newest) store(s) without checking for collisions.
Definition hashDb.hpp:236
NUMA topology detection: distance/preference matrices and per-domain delegate-thread selection,...
A single (L1 or L2) hash store: a rolling window of hashStore_t generations together with its rolling...
Definition hashDb.hpp:294
std::vector< paddedCounter_t > _threadQueryCount
Per-thread query counts for the current store, indexed by thread id.
Definition hashDb.hpp:311
size_t getTotalQueryCount() const
Lifetime (since-start) totals = already-rolled generations + the current window.
Definition hashDb.hpp:351
size_t _lifetimeCollisionCount
Lifetime collision total; companion to _lifetimeQueryCount (same since-start window).
Definition hashDb.hpp:319
void resetCounts()
Resets all per-thread query and collision counters to zero.
Definition hashDb.hpp:344
size_t getTotalCollisionCount() const
Lifetime (since-start) collision total = already-rolled generations + the current window (the collisi...
Definition hashDb.hpp:354
size_t _currentAge
Number of steps elapsed since this store began, assigned as the age of newly created generations.
Definition hashDb.hpp:301
std::deque< hashStore_t > _hashStores
The rolling window of hash store generations (oldest at front, current/newest at back).
Definition hashDb.hpp:309
size_t _currentHashStoreId
Next id to assign to a newly created hash store generation.
Definition hashDb.hpp:295
size_t _lifetimeQueryCount
Lifetime query/collision totals from all ALREADY-ROLLED generations (the per-thread counters above ar...
Definition hashDb.hpp:318
std::vector< paddedCounter_t > _threadCollisionCount
Per-thread collision counts for the current store, indexed by thread id.
Definition hashDb.hpp:313
size_t getCollisionCount() const
Sums the per-thread collision counts.
Definition hashDb.hpp:337
size_t getQueryCount() const
Sums the per-thread query counts.
Definition hashDb.hpp:327
One generation of a rolling store: a hash set of previously found states plus its identity,...
Definition hashDb.hpp:264
size_t queryCount
How many queries were made (snapshot of summed per-thread counts at roll time).
Definition hashDb.hpp:269
const size_t id
The store id.
Definition hashDb.hpp:265
size_t collisionCount
How many collisions were detected (snapshot of summed per-thread counts at roll time).
Definition hashDb.hpp:271
jaffarCommon::concurrent::HashSet_t< jaffarCommon::hash::hash_t > hashSet
The internal set of hashes for this store generation.
Definition hashDb.hpp:273
const size_t age
The store age (number of steps elapsed since it was created).
Definition hashDb.hpp:267
A query/collision counter padded to a full cache line to avoid false sharing.
Definition hashDb.hpp:285
size_t value
The counter's accumulated value.
Definition hashDb.hpp:286