JaffarPlus
High-performance best-first search optimizer for tool-assisted speedruns
Loading...
Searching...
No Matches
engine.hpp
Go to the documentation of this file.
1#pragma once
2
10#include "game.hpp"
11#include "hashDb.hpp"
12#include "numa.hpp"
13#include "runner.hpp"
14#include "stateDb.hpp"
15#include <algorithm>
16#include <emulatorList.hpp>
17#include <gameList.hpp>
18#include <jaffarCommon/deserializers/base.hpp>
19#include <jaffarCommon/hash.hpp>
20#include <jaffarCommon/json.hpp>
21#include <jaffarCommon/logger.hpp>
22#include <jaffarCommon/parallel.hpp>
23#include <jaffarCommon/serializers/base.hpp>
24#include <jaffarCommon/timing.hpp>
25
26// Fine-grained per-operation timing for the engine hot loop. Each timed region costs two
27// clock_gettime calls; multiplied across millions of states processed per step this is a real
28// overhead for a light emulator (~25% with QuickerSDLPoP), while being negligible under a heavy one
29// (QuickerNES is ~92% emulation). It is therefore compiled out unless built with
30// -DdetailedProfiling=true. When disabled, JAFFAR_PROF_DECL declares no variable and JAFFAR_PROF_ACC
31// is a no-op, so there is neither timing overhead nor an unused-variable warning. The coarse
32// per-step timers (step wall time, throughput, serial DB-advance stages) are always kept.
33#ifdef JAFFARPLUS_DETAILED_PROFILING
35#define JAFFAR_PROF_DECL(var) const auto var = jaffarCommon::timing::now()
37#define JAFFAR_PROF_ACC(field, var) field += jaffarCommon::timing::timeDeltaMicroseconds(jaffarCommon::timing::now(), var)
38#else
39#define JAFFAR_PROF_DECL(var) ((void)0)
40#define JAFFAR_PROF_ACC(field, var) ((void)0)
41#endif
42
43namespace jaffarPlus
44{
45
56class Engine final
57{
58public:
67 Engine(const nlohmann::json& emulatorConfig, const nlohmann::json& gameConfig, const nlohmann::json& runnerConfig, const nlohmann::json& engineConfig)
68 {
69 // Initializing NUMA and threading subsystems
71
72 // Sanity check
73 if (_threadCount == 0) JAFFAR_THROW_LOGIC("The number of worker threads must be at least one. Provided: %lu\n", _threadCount);
74
75 // Printing initial information
76 jaffarCommon::logger::log("[J+] Using %lu worker threads.\n", _threadCount);
77
78 // Creating storage for the runnners (one per thread)
79 _runners.resize(_threadCount);
80
81 // Creating runners, one per thread
82 JAFFAR_PARALLEL
83 {
84 // Creating runner from the configuration
85 auto r = jaffarPlus::Runner::getRunner(emulatorConfig, gameConfig, runnerConfig);
86
87 // Storing runner. Index by the (dense) OpenMP thread id, NOT sched_getcpu(): the latter is only
88 // equal to the thread id when threads are pinned 1:1 (OMP_PROC_BIND), and otherwise leaves
89 // _runners[0] null -> the *_runners[0] use below (and workerFunction, which already uses the
90 // thread id) would dereference a null runner. This bites CI runners that don't pin threads.
91 _runners[jaffarCommon::parallel::getThreadId()] = std::move(r);
92 }
93
94 // Grabbing a runner to do continue build the state databases
95 auto& r = *_runners[0];
96
97 // Mutable copy so unrecognized Engine keys can be flagged after the known ones are consumed
98 auto engineConfigRemaining = engineConfig;
99
100 // Creating State database
101 auto stateDatabaseJs = jaffarCommon::json::popObject(engineConfigRemaining, "State Database");
102 _stateDb = std::make_unique<jaffarPlus::StateDb>(r, stateDatabaseJs);
103
104 // Creating hash database
105 auto hashDbConfig = jaffarCommon::json::popObject(engineConfigRemaining, "Hash Database");
106 _hashDbEnabled = jaffarCommon::json::getBoolean(hashDbConfig, "Enabled");
107 if (_hashDbEnabled == true) _hashDb = std::make_unique<jaffarPlus::HashDb>(hashDbConfig);
108
109 // Base-state pull batch size (per-worker queue-pull granularity). Optional: an explicit value
110 // always wins; otherwise 0 here means "auto-tune", resolved in initialize() from the measured
111 // per-state cost (light cores -> larger batch to amortize the queue lock; heavy/variable cores
112 // -> small batch for load balance).
113 _baseStateBatch = engineConfigRemaining.contains("Base State Batch Size") ? jaffarCommon::json::popNumber<size_t>(engineConfigRemaining, "Base State Batch Size") : 0;
115
116 // Any remaining Engine key is unrecognized
117 jaffarCommon::json::checkEmpty(engineConfigRemaining, "Engine Configuration");
118
119 // Reserving storage for timing information
120 _threadStepTime.resize(_threadCount);
121
122 // Reserving per-thread accumulators (cache-line aligned to avoid false sharing).
123 // These collect all hot-loop timing/counter increments without atomics, and are
124 // reduced into the shared totals once per step (see runStep).
125 _threadAccumulators.resize(_threadCount);
126 };
127
137 {
138 // Initializing state counters
141
142 // Initializing cumulative timing
156
157 // Resetting total running time
159
160 // Resetting state counts
164 _repeatedStates = 0;
165 _failedStates = 0;
166 _winStates = 0;
167 _normalStates = 0;
168
169 // Resetting checkpoint counters
173
174 // Resetting counter for the current step
175 _currentStep = 0;
176
177 // Resetting last active manually solution save rule id
179
180 // Create the one shared input-history backing (e.g. the trie, for the "Trie" strategy; null for
181 // raw/none) and inject it into every worker runner BEFORE they initialize, so all workers share it.
182 // It is sized with one free-list shard per worker thread plus one for the driver's intermediate-result
183 // thread (contention-free alloc/free). The StateDb reaches the strategy via the reference runner.
184 _inputHistoryBacking = inputHistory::createSharedBacking(_runners[0]->getInputHistoryConfig(), _threadCount + 1);
185
186 // Initializing runners, one per thread
187 JAFFAR_PARALLEL
188 {
189 // Creating thread's own runner (index by OpenMP thread id, consistent with construction/workerFunction)
190 auto& r = _runners[jaffarCommon::parallel::getThreadId()];
191
192 // Share the one backing (prefix sharing) and give each worker its own free-list shard (its thread id).
193 r->setInputHistoryBacking(_inputHistoryBacking, (uint32_t)jaffarCommon::parallel::getThreadId(), _threadCount + 1);
194 r->initialize();
195 }
196
197 // Combined RAM guard: the per-NUMA check inside StateDb::initialize() validates ONLY the state
198 // DB, and the hash DB's peak footprint is (Max Store Size x Max Store Count) -- NOT just Max Store
199 // Size. Summing both against total free RAM here catches a silent overcommit that the OS would
200 // otherwise resolve by OOM-killing the process mid-run (hours in). Done before any DB allocates.
201 {
202 const size_t stateBudget = _stateDb->getMaxBudgetBytes();
203 const size_t hashBudget = (_hashDbEnabled == true) ? _hashDb->getMaxBudgetBytes() : 0;
204 // Shared input-history trie: an uncounted structure that grows ~ live-states x depth up to its hard
205 // node cap (~384 GiB for the Trie strategy on this build). Reserve its ceiling. (0 for None/Raw.)
206 const size_t historyBound = inputHistory::getSharedBackingMaxMemoryBytes(_inputHistoryBacking);
207 // The state DB is a fixed pool (1x; full slots are scavenged from the current step, never doubled).
208 // The hash DB phmap grows by doubling, so during a rehash the old table and the new 2x table briefly
209 // coexist -- reserve ~2x the hash budget for that transient. (Empirically a 270 GB state+hash config
210 // peaked at ~520 GB RSS = state + ~2*hash + trie, then OOM-killed mid-run.)
211 const size_t hashPeak = hashBudget * 2;
212 const size_t totalBudget = stateBudget + hashPeak + historyBound;
213 size_t freeRam = 0;
214 for (int i = 0; i < _numaCount; i++)
215 {
216 long long nodeFree = 0;
217 numa_node_size64(i, &nodeFree);
218 freeRam += (size_t)nodeFree;
219 }
220 const size_t usable = (size_t)((double)freeRam * 0.90); // 10% headroom: emulators, OS, fragmentation
221 if (totalBudget > usable)
222 {
223 const double GB = 1024.0 * 1024.0 * 1024.0;
224 JAFFAR_THROW_RUNTIME("Configured database budget exceeds available memory:\n"
225 " State DB ('Max Size (Mb)', fixed pool) = %.1f GB\n"
226 " Hash DB peak (2 x 'Max Store Size' x 'Max Store Count') = %.1f GB\n"
227 " Input-history trie (hard node-storage ceiling) = %.1f GB\n"
228 " TOTAL = %.1f GB\n"
229 " Usable (90%% of %.1f GB free RAM) = %.1f GB\n"
230 "Reduce 'State Database/Max Size (Mb)' and/or 'Hash Database/Max Store Size (Mb)'.\n"
231 "NOTE: the hash DB phmap doubles on growth, so a rehash briefly holds the old + new (2x) table; the\n"
232 "Trie input-history is a separate structure that grows up to ~384 GiB. Both are otherwise uncounted.\n",
233 (double)stateBudget / GB, (double)hashPeak / GB, (double)historyBound / GB, (double)totalBudget / GB, (double)freeRam / GB, (double)usable / GB);
234 }
235 }
236
237 // Initializing State Db
238 _stateDb->initialize();
239
240 // Initializing hash database
241 if (_hashDbEnabled == true) _hashDb->initialize();
242
243 // Grabbing a runner to do continue initialization
244 auto& r = *_runners[0];
245
246 // Auto-tune the base-state pull batch (when "Base State Batch Size" was not set in the config).
247 // We time a burst of state advances on the (just-loaded) initial state -- which is representative
248 // of the core's per-state cost -- then choose a batch so each batch is ~TARGET_BATCH_NS of work:
249 // enough to amortize the per-NUMA queue lock on cheap cores (large batch) while keeping a small
250 // batch on heavy/variable cores so end-of-step load imbalance stays low. The measurement state is
251 // saved and restored so the search still starts from the exact initial state. Batch size never
252 // affects which states are explored, only the work distribution.
253 if (_baseStateBatch == 0)
254 {
255 const size_t stateSize = r.getStateSize();
256 std::vector<char> scratch(stateSize);
257 {
258 jaffarCommon::serializer::Contiguous s(scratch.data(), stateSize);
259 r.serializeState(s);
260 }
261
262 const auto allowedInputs = r.getAllowedInputs();
263 const InputSet::inputIndex_t calInput = allowedInputs.empty() ? (InputSet::inputIndex_t)0 : allowedInputs[0];
264
265 const size_t CAL_FRAMES = 200;
266 const auto tCal0 = jaffarCommon::timing::now();
267 for (size_t i = 0; i < CAL_FRAMES; i++) r.advanceState(calInput);
268 const size_t perStateNs = jaffarCommon::timing::timeDeltaNanoseconds(jaffarCommon::timing::now(), tCal0) / CAL_FRAMES;
269
270 // Restore the exact initial state
271 {
272 jaffarCommon::deserializer::Contiguous d(scratch.data(), stateSize);
273 r.deserializeState(d);
274 }
275
276 // ~200us of work per batch: SDLPoP (~10us/state) -> ~16, Genesis (~600us/state) -> 1.
277 constexpr size_t TARGET_BATCH_NS = 200000;
278 size_t b = (perStateNs > 0) ? ((TARGET_BATCH_NS + perStateNs / 2) / perStateNs) : BASE_STATE_BATCH_MAX;
279 if (b < 1) b = 1;
281 _baseStateBatch = b;
282 jaffarCommon::logger::log("[J+] Auto-tuned base-state batch size: %lu (measured %.1f us/state)\n", _baseStateBatch, (double)perStateNs / 1000.0);
283 }
284 if (_baseStateBatch < 1) _baseStateBatch = 1;
285
286 // Evaluate game rules on the initial state
287 r.getGame()->evaluateRules();
288
289 // Determining new game state type
290 r.getGame()->updateGameStateType();
291
292 // Running game-specific rule actions
293 r.getGame()->runGameSpecificRuleActions();
294
295 // Updating game reward
296 r.getGame()->updateReward();
297
298 // Getting reward for the initial state
299 const auto reward = r.getGame()->getReward();
300
301 // Getting a free state data pointer to store the state into (serial init path -> thread 0)
302 auto stateData = _stateDb->getFreeState(0);
303
304 // Pushing initial state to the next state database
305 _stateDb->pushState(reward, r, stateData);
306
307 // Advancing the step in the state database
308 _stateDb->advanceStep();
309
310 // Getting memory for the reference state
311 _stateSizeInDatabase = _stateDb->getStateSizeInDatabase();
312
313 // Standalone snapshots hold the FULL self-contained state ([hot][history]); the DB slot is hot-only.
314 _fullStateSize = _stateDb->getFullStateSize();
315
316 // Allocating memory for the best win state
318
319 // Allocating memory for manual state saving
321
322 // Getting hash from first state
323 const auto hash = r.computeHash();
324
325 // Adding it to the hash DB
326 if (_hashDbEnabled == true) _hashDb->insertHash(hash);
327 }
328
337 void runStep()
338 {
339 // Computing step time
340 const auto tStep = jaffarCommon::timing::now();
341
342 // Clearing step timing for the serially-measured stages (the rest are reduced
343 // from the per-thread accumulators after the parallel region)
346
347 // Resetting per-thread accumulators for this step
348 for (ssize_t i = 0; i < _threadCount; i++) _threadAccumulators[i].reset();
349
350 // Clearing win state reward
351 _stepBestWinState.reward = -std::numeric_limits<float>::infinity();
352
353 // Clearing manually saved state
354 _manualSaveSolution.reward = -std::numeric_limits<float>::infinity();
357
358 // (Manual solution storing) Resetting last active rule id flag
360
361 // Performing one computation step in parallel
362 JAFFAR_PARALLEL
364
365 // Reducing per-thread accumulators into the shared totals (serial, ~threadCount adds).
366 // The per-step raw timers and step counters are zeroed here, then summed; the
367 // run-long state-type counters keep accumulating across steps.
382 for (ssize_t i = 0; i < _threadCount; i++)
383 {
384 const auto& a = _threadAccumulators[i];
385 _runnerStateAdvanceThreadRawTime += a.runnerStateAdvance;
386 _runnerStateLoadThreadRawTime += a.runnerStateLoad;
387 _runnerStateSaveThreadRawTime += a.runnerStateSave;
388 _calculateHashThreadRawTime += a.calculateHash;
389 _checkHashThreadRawTime += a.checkHash;
390 _ruleCheckingThreadRawTime += a.ruleChecking;
391 _getFreeStateThreadRawTime += a.getFreeState;
392 _returnFreeStateThreadRawTime += a.returnFreeState;
393 _calculateRewardThreadRawTime += a.calculateReward;
394 _getAllowedInputsThreadRawTime += a.getAllowedInputs;
395 _getCandidateInputsThreadRawTime += a.getCandidateInputs;
396 _popBaseStateDbThreadRawTime += a.popBaseStateDb;
397 _stepBaseStatesProcessed += a.baseStatesProcessed;
398 _stepNewStatesProcessed += a.newStatesProcessed;
399 _normalStates += a.normalStates;
400 _repeatedStates += a.repeatedStates;
401 _failedStates += a.failedStates;
402 _winStates += a.winStates;
403 _droppedStatesNoStorage += a.droppedStatesNoStorage;
404 _droppedStatesFailedSerialization += a.droppedStatesFailedSerialization;
405 _droppedStatesCheckpoint += a.droppedStatesCheckpoint;
406 }
407
408 // Advancing hash database state
409 const auto t0 = jaffarCommon::timing::now();
410 if (_hashDbEnabled == true) _hashDb->advanceStep();
411 _advanceHashDbThreadRawTime += jaffarCommon::timing::timeDeltaMicroseconds(jaffarCommon::timing::now(), t0);
412
413 // Swapping next and current state databases
414 const auto t1 = jaffarCommon::timing::now();
415 _stateDb->advanceStep();
416 _advanceStateDbThreadRawTime += jaffarCommon::timing::timeDeltaMicroseconds(jaffarCommon::timing::now(), t1);
417
418 // Updating last active last rule Id
420 {
423 }
424
425 // Computing step time
426 _currentStepTime = jaffarCommon::timing::timeDeltaMicroseconds(jaffarCommon::timing::now(), tStep);
427
428 // Computing total running time
430
431 // Getting maximum thread step time
434 for (ssize_t i = 0; i < _threadCount; i++)
436 {
439 }
440
441 // Processing thread-average step timing
456
457 // Sub-total thread-average step timing
473
474 // Processing cumulative timing
489
490 // Sub-total cumulative time calculation
506
507 // Processing state counters
510
511 // Advancing step
512 _currentStep++;
513 }
514
519 {
520 // Free the state buffers allocated in initialize() (raw malloc; see _stateSizeInDatabase use above)
523 }
524
525 // Relevant data for the driver
526
528 auto& getStateDb() const { return _stateDb; }
530 auto getStepBestWinState() const { return _stepBestWinState; }
534 auto getWinStatesFound() const { return _winStates.load(); }
536 auto getStateCount() const { return _stateDb->getStateCount(); }
537
539 size_t getInputHistoryMaxMemoryBytes() const { return inputHistory::getSharedBackingMaxMemoryBytes(_inputHistoryBacking); }
541 size_t getInputHistoryApproxMemoryBytes() const { return inputHistory::getSharedBackingApproxMemoryBytes(_inputHistoryBacking); }
543 bool isInputHistoryExhausted() const { return inputHistory::isSharedBackingExhausted(_inputHistoryBacking); }
544
549 {
550 // Printing information
551 jaffarCommon::logger::log("[J+] Thread Count / NUMA Domains: %3d / %d\n", _threadCount, _numaCount);
552#ifdef JAFFARPLUS_DETAILED_PROFILING
553 jaffarCommon::logger::log("[J+] Elapsed Time (Step/Total): %9.3fs (%7.3f%%) / %9.3fs (%3.3f%%)\n", 1.0e-6 * (double)(_currentStepTime),
554 100.0 * ((double)(_subTotalAverageTime) / (double)(_currentStepTime)), 1.0e-6 * (double)(_totalRunningTime),
555 100.0 * ((double)_subTotalAverageCumulativeTime) / (double)(_totalRunningTime));
556
557 jaffarCommon::logger::log("[J+] + Runner State Avance (Step/Total): %9.3fs (%7.3f%%) / %9.3fs (%3.3f%%)\n", 1.0e-6 * (double)(_runnerStateAdvanceAverageTime),
558 100.0 * ((double)(_runnerStateAdvanceAverageTime) / (double)(_currentStepTime)), 1.0e-6 * (double)(_runnerStateAdvanceAverageCumulativeTime),
559 100.0 * ((double)_runnerStateAdvanceAverageCumulativeTime) / (double)(_totalRunningTime));
560
561 jaffarCommon::logger::log("[J+] + Runner State Load (Step/Total): %9.3fs (%7.3f%%) / %9.3fs (%3.3f%%)\n", 1.0e-6 * (double)(_runnerStateLoadAverageTime),
562 100.0 * ((double)(_runnerStateLoadAverageTime) / (double)(_currentStepTime)), 1.0e-6 * (double)(_runnerStateLoadAverageCumulativeTime),
563 100.0 * ((double)_runnerStateLoadAverageCumulativeTime) / (double)(_totalRunningTime));
564
565 jaffarCommon::logger::log("[J+] + Runner State Save (Step/Total): %9.3fs (%7.3f%%) / %9.3fs (%3.3f%%)\n", 1.0e-6 * (double)(_runnerStateSaveAverageTime),
566 100.0 * ((double)(_runnerStateSaveAverageTime) / (double)(_currentStepTime)), 1.0e-6 * (double)(_runnerStateSaveAverageCumulativeTime),
567 100.0 * ((double)_runnerStateSaveAverageCumulativeTime) / (double)(_totalRunningTime));
568
569 jaffarCommon::logger::log("[J+] + Hash Calculation (Step/Total): %9.3fs (%7.3f%%) / %9.3fs (%3.3f%%)\n", 1.0e-6 * (double)(_calculateHashAverageTime),
570 100.0 * ((double)(_calculateHashAverageTime) / (double)(_currentStepTime)), 1.0e-6 * (double)(_calculateHashAverageCumulativeTime),
571 100.0 * ((double)_calculateHashAverageCumulativeTime) / (double)(_totalRunningTime));
572
573 jaffarCommon::logger::log("[J+] + Hash Checking (Step/Total): %9.3fs (%7.3f%%) / %9.3fs (%3.3f%%)\n", 1.0e-6 * (double)(_checkHashAverageTime),
574 100.0 * ((double)(_checkHashAverageTime) / (double)(_currentStepTime)), 1.0e-6 * (double)(_checkHashAverageCumulativeTime),
575 100.0 * ((double)_checkHashAverageCumulativeTime) / (double)(_totalRunningTime));
576
577 jaffarCommon::logger::log("[J+] + Rule Checking (Step/Total): %9.3fs (%7.3f%%) / %9.3fs (%3.3f%%)\n", 1.0e-6 * (double)(_ruleCheckingAverageTime),
578 100.0 * ((double)(_ruleCheckingAverageTime) / (double)(_currentStepTime)), 1.0e-6 * (double)(_ruleCheckingAverageCumulativeTime),
579 100.0 * ((double)_ruleCheckingAverageCumulativeTime) / (double)(_totalRunningTime));
580
581 jaffarCommon::logger::log("[J+] + Get Free State (Step/Total): %9.3fs (%7.3f%%) / %9.3fs (%3.3f%%)\n", 1.0e-6 * (double)(_getFreeStateAverageTime),
582 100.0 * ((double)(_getFreeStateAverageTime) / (double)(_currentStepTime)), 1.0e-6 * (double)(_getFreeStateAverageCumulativeTime),
583 100.0 * ((double)_getFreeStateAverageCumulativeTime) / (double)(_totalRunningTime));
584
585 jaffarCommon::logger::log("[J+] + Return Free State (Step/Total): %9.3fs (%7.3f%%) / %9.3fs (%3.3f%%)\n", 1.0e-6 * (double)(_returnFreeStateAverageTime),
586 100.0 * ((double)(_returnFreeStateAverageTime) / (double)(_currentStepTime)), 1.0e-6 * (double)(_returnFreeStateAverageCumulativeTime),
587 100.0 * ((double)_returnFreeStateAverageCumulativeTime) / (double)(_totalRunningTime));
588
589 jaffarCommon::logger::log("[J+] + Calculate Reward (Step/Total): %9.3fs (%7.3f%%) / %9.3fs (%3.3f%%)\n", 1.0e-6 * (double)(_calculateRewardAverageTime),
590 100.0 * ((double)(_calculateRewardAverageTime) / (double)(_currentStepTime)), 1.0e-6 * (double)(_calculateRewardAverageCumulativeTime),
591 100.0 * ((double)_calculateRewardAverageCumulativeTime) / (double)(_totalRunningTime));
592
593 jaffarCommon::logger::log("[J+] + Popping Base State (Step/Total): %9.3fs (%7.3f%%) / %9.3fs (%3.3f%%)\n", 1.0e-6 * (double)(_popBaseStateDbAverageTime),
594 100.0 * ((double)(_popBaseStateDbAverageTime) / (double)(_currentStepTime)), 1.0e-6 * (double)(_popBaseStateDbAverageCumulativeTime),
595 100.0 * ((double)_popBaseStateDbAverageCumulativeTime) / (double)(_totalRunningTime));
596
597 jaffarCommon::logger::log("[J+] + Get Allowed Inputs (Step/Total): %9.3fs (%7.3f%%) / %9.3fs (%3.3f%%)\n", 1.0e-6 * (double)(_getAllowedInputsAverageTime),
598 100.0 * ((double)(_getAllowedInputsAverageTime) / (double)(_currentStepTime)), 1.0e-6 * (double)(_getAllowedInputsAverageCumulativeTime),
599 100.0 * ((double)_getAllowedInputsAverageCumulativeTime) / (double)(_totalRunningTime));
600
601 jaffarCommon::logger::log("[J+] + Get Candidate Inputs (Step/Total): %9.3fs (%7.3f%%) / %9.3fs (%3.3f%%)\n", 1.0e-6 * (double)(_getCandidateInputsAverageTime),
602 100.0 * ((double)(_getCandidateInputsAverageTime) / (double)(_currentStepTime)), 1.0e-6 * (double)(_getCandidateInputsAverageCumulativeTime),
603 100.0 * ((double)_getCandidateInputsAverageCumulativeTime) / (double)(_totalRunningTime));
604#else
605 // Detailed per-operation profiling is compiled out (default). Only the coarse step wall time and
606 // the serially-measured DB-advance stages below are available; build -DdetailedProfiling=true for
607 // the full per-operation breakdown.
608 jaffarCommon::logger::log("[J+] Elapsed Time (Step/Total): %9.3fs / %9.3fs (per-operation breakdown disabled; build -DdetailedProfiling=true)\n",
609 1.0e-6 * (double)(_currentStepTime), 1.0e-6 * (double)(_totalRunningTime));
610#endif
611
612 jaffarCommon::logger::log("[J+] + Advance Hash Db (Step/Total): %9.3fs (%7.3f%%) / %9.3fs (%3.3f%%)\n", 1.0e-6 * (double)(_advanceHashDbAverageTime),
613 100.0 * ((double)(_advanceHashDbAverageTime) / (double)(_currentStepTime)), 1.0e-6 * (double)(_advanceHashDbAverageCumulativeTime),
614 100.0 * ((double)_advanceHashDbAverageCumulativeTime) / (double)(_totalRunningTime));
615
616 jaffarCommon::logger::log("[J+] + Advance State Db (Step/Total): %9.3fs (%7.3f%%) / %9.3fs (%3.3f%%)\n", 1.0e-6 * (double)(_advanceStateDbAverageTime),
617 100.0 * ((double)(_advanceStateDbAverageTime) / (double)(_currentStepTime)), 1.0e-6 * (double)(_advanceStateDbAverageCumulativeTime),
618 100.0 * ((double)_advanceStateDbAverageCumulativeTime) / (double)(_totalRunningTime));
619
620 jaffarCommon::logger::log("[J+] Checkpoint (Level/Tolerance/Cutoff): %lu / %lu / %lu\n", _checkpointLevel, _checkpointTolerance, _checkpointCutoff);
621 jaffarCommon::logger::log("[J+] Base States Processed: %.3f Mstates (Total: %.3f Mstates)\n", 1.0e-6 * (double)_stepBaseStatesProcessed,
622 1.0e-6 * (double)_totalBaseStatesProcessed);
623 jaffarCommon::logger::log("[J+] New States Processed: %.3f Mstates (Total: %.3f Mstates)\n", 1.0e-6 * (double)_stepNewStatesProcessed,
624 1.0e-6 * (double)_totalNewStatesProcessed);
625
626 jaffarCommon::logger::log("[J+] Base States Performance: %.3f Mstates/s (Average: %.3f Mstates/s)\n",
627 1.0e-6 * (double)_stepBaseStatesProcessed / (1.0e-6 * (double)_currentStepTime),
628 1.0e-6 * (double)_totalBaseStatesProcessed / (1.0e-6 * (double)_totalRunningTime));
629 jaffarCommon::logger::log("[J+] New States Performance: %.3f Mstates/s (Average: %.3f Mstates/s)\n",
630 1.0e-6 * (double)_stepNewStatesProcessed / (1.0e-6 * (double)_currentStepTime),
631 1.0e-6 * (double)_totalNewStatesProcessed / (1.0e-6 * (double)_totalRunningTime));
632
633 jaffarCommon::logger::log("[J+] Dropped States (No Storage Available): %lu (%5.3f%% of New States Processed) \n", _droppedStatesNoStorage.load(),
634 100.0 * (double)_droppedStatesNoStorage.load() / (double)_totalNewStatesProcessed);
635 jaffarCommon::logger::log("[J+] Dropped States (Failed Serialization): %lu (%5.3f%% of New States Processed) \n", _droppedStatesFailedSerialization.load(),
636 100.0 * (double)_droppedStatesFailedSerialization.load() / (double)_totalNewStatesProcessed);
637 jaffarCommon::logger::log("[J+] Dropped States (Checkpoint): %lu (%5.3f%% of New States Processed) \n", _droppedStatesCheckpoint.load(),
638 100.0 * (double)_droppedStatesCheckpoint.load() / (double)_totalNewStatesProcessed);
639 jaffarCommon::logger::log("[J+] Failed States: %lu (%5.3f%% of New States Processed) \n", _failedStates.load(),
640 100.0 * (double)_failedStates.load() / (double)_totalNewStatesProcessed);
641 jaffarCommon::logger::log("[J+] Repeated States: %lu (%5.3f%% of New States Processed) \n", _repeatedStates.load(),
642 100.0 * (double)_repeatedStates.load() / (double)_totalNewStatesProcessed);
643 jaffarCommon::logger::log("[J+] Normal States: %lu (%5.3f%% of New States Processed) \n", _normalStates.load(),
644 100.0 * (double)_normalStates.load() / (double)_totalNewStatesProcessed);
645 jaffarCommon::logger::log("[J+] Win States: %lu (%5.3f%% of New States Processed) \n", _winStates.load(),
646 100.0 * (double)_winStates.load() / (double)_totalNewStatesProcessed);
647
648 // Print state database information
649 jaffarCommon::logger::log("[J+] State Database Information:\n");
650 _stateDb->printInfo();
651
652 if (_hashDbEnabled == true)
653 {
654 jaffarCommon::logger::log("[J+] Hash Database Information:\n");
655 _hashDb->printInfo();
656 }
657
658 jaffarCommon::logger::log("[J+] Manually Saved Solution:\n");
659 jaffarCommon::logger::log("[J+] + Path: '%s'\n", _manualSaveSolution.path.c_str());
660 jaffarCommon::logger::log("[J+] + Reward: %f\n", _manualSaveSolution.reward);
661 jaffarCommon::logger::log("[J+] + Last Rule Idx: %ld (Active: %ld, Path: '%s')\n", _manualSaveSolution.lastRuleIdx,
663
664 // Printing candidate inpts
665 jaffarCommon::logger::log("[J+] Candidate Inputs:\n");
666 for (const auto& entry : _candidateInputsDetected)
667 {
668 jaffarCommon::logger::log("[J+] + Hash: %s\n", jaffarCommon::hash::hashToString(entry.first).c_str());
669 for (const auto input : entry.second) jaffarCommon::logger::log("[J+] + %3lu %s\n", input, _runners[0]->getInputStringFromIndex(input).c_str());
670 }
671 }
672
674 __INLINE__ size_t getStateSizeInDatabase() const { return _stateSizeInDatabase; }
675
677 __INLINE__ size_t getFullStateSize() const { return _fullStateSize; }
678
679private:
681 // Number of base states a worker pulls from the shared per-NUMA state-DB queue per lock
682 // acquisition (into a thread-local buffer), instead of locking once per state. On a light
683 // emulator with dozens of worker threads per NUMA domain, that single-state lock/unlock was the
684 // dominant cost ("Popping Base State" ~37% of wall time); batching collapses it to <0.5%.
685 //
686 // The value is empirically tuned (EPYC 9755, 256 threads, SDLPoP lvl01): throughput is flat-
687 // optimal across 4-16 and falls off above it (32 ~ -4%, 64 ~ -10%, 512 ~ -9%). The reason is that
688 // once the lock contention is gone, the dominant effect is intra-step load balancing across 256
689 // threads -- a larger batch lets one straggler hold many states while others see the queue drain
690 // and go idle. 16 sits at the top of the flat region while keeping enough amortization headroom to
691 // stay robust for workloads with cheaper per-state work or higher thread counts. (A page-sized
692 // batch of 512 pointers was tried, on the theory the buffer would be cache/TLB friendly, but the
693 // buffer is tiny and L1-resident at any of these sizes, so the load-imbalance cost dominated.)
694 // The batch size is configurable ("Base State Batch Size"); when unset it comes from the
695 // emulator's getSuggestedStateBatchSize() (heavy cores 1 for load balance, light cores ~16 to
696 // amortize the queue lock). BASE_STATE_BATCH_MAX bounds the thread-local pull buffer; the active
697 // count is _baseStateBatch (clamped to [1, MAX]).
698 static constexpr size_t BASE_STATE_BATCH_MAX = 16;
699
711
714 {
715 float reward;
716 void* stateData = nullptr;
717 size_t stepCount = 0;
718 };
719
722 {
723 std::string path;
724 float reward;
725 void* stateData = nullptr;
726 ssize_t lastRuleIdx;
727 size_t stepCount = 0;
728 };
729
770
780 {
781 // Getting my thread id
782 const auto threadId = jaffarCommon::parallel::getThreadId();
783
784 // Getting my thread-local accumulator (no atomics in the hot path)
785 auto& acc = _threadAccumulators[threadId];
786
787 // Starting to measure thread-specific step time
788 const auto threadTime0 = jaffarCommon::timing::now();
789
790 // Getting my runner
791 auto& r = _runners[threadId];
792
793 // Base states are pulled from the database in batches into this thread-local buffer, so the
794 // shared per-NUMA queue lock is acquired once per BASE_STATE_BATCH states instead of once per
795 // state. With many worker threads per NUMA domain and cheap per-state work, the single-state
796 // lock/unlock dominates wall time ("Popping Base State"); batching amortizes it away.
797 void* baseStateBatch[BASE_STATE_BATCH_MAX];
799 size_t batchCount = _stateDb->popStates(baseStateBatch, _baseStateBatch, threadId);
800 JAFFAR_PROF_ACC(acc.popBaseStateDb, t);
801 size_t batchIdx = 0;
802
803 // While there are still states in the database, keep on grabbing them
804 while (batchIdx < batchCount)
805 {
806 // Taking the next base state from the local batch
807 void* baseStateData = baseStateBatch[batchIdx++];
808
809 // Increasing base state counter
810 acc.baseStatesProcessed++;
811
812 // Load state into runner via the state database (base states are slab slots: hot slot + cold path)
814 _stateDb->loadStateFromSlot(*r, baseStateData);
815 r->setSearchStep(_currentStep); // base state's depth = current search step (count is not stored per-state)
816 JAFFAR_PROF_ACC(acc.runnerStateLoad, t0);
817
818 // Getting allowed inputs
820 const auto allowedInputs = r->getAllowedInputs();
821 JAFFAR_PROF_ACC(acc.getAllowedInputs, t1);
822
823 // Getting candidate inputs (those not already covered by the allowed set). Computed here,
824 // before the allowed inputs are tried, while the runner still holds the unperturbed base state.
826 auto candidateInputs = r->getCandidateInputs();
827 JAFFAR_PROF_ACC(acc.getCandidateInputs, t2);
828
829 // Finding unique candidate inputs
830 std::vector<InputSet::inputIndex_t> uniqueCandidateInputs;
831 for (const auto& input : candidateInputs)
832 if (std::find(allowedInputs.begin(), allowedInputs.end(), input) == allowedInputs.end()) uniqueCandidateInputs.push_back(input);
833
834 // Discriminating hash of the *base* state (the situation being expanded), used to dedup
835 // candidate-input probing across like states. It must be taken from the base state, so it is
836 // captured now -- before the allowed/candidate inputs below advance the runner away from it.
837 jaffarCommon::hash::hash_t baseStateInputHash{};
838 if (uniqueCandidateInputs.empty() == false) baseStateInputHash = r->getGame()->getStateInputHash();
839
840 // Trying out each possible input in the set
841 for (auto inputItr = allowedInputs.begin(); inputItr != allowedInputs.end(); inputItr++) runNewInput(*r, baseStateData, *inputItr, acc, threadId);
842
843 // Run each candidate input, keyed by the base state's discriminating hash
844 for (const auto input : uniqueCandidateInputs)
845 {
846 // Making sure we don't try the input if it was already detected for this type of state
847 if (_candidateInputsDetected.contains(baseStateInputHash))
848 if (_candidateInputsDetected[baseStateInputHash].contains(input)) continue;
849
850 // Running input
851 const auto result = runNewInput(*r, baseStateData, input, acc, threadId);
852
853 // If this is not a repeated state, store it as new candidate input
854 if (result != inputResult_t::repeated) _candidateInputsDetected[baseStateInputHash].insert(input);
855 }
856
857 // Return base state to the free state queue
859 _stateDb->returnFreeState(baseStateData, threadId);
860 JAFFAR_PROF_ACC(acc.returnFreeState, t8);
861
862 // When the local batch is exhausted, pull the next batch from the database
863 if (batchIdx >= batchCount)
864 {
866 batchCount = _stateDb->popStates(baseStateBatch, _baseStateBatch, threadId);
867 JAFFAR_PROF_ACC(acc.popBaseStateDb, t9);
868 batchIdx = 0;
869 }
870 }
871
872 // Taking final thread-specific time measurement
873 _threadStepTime[threadId] = jaffarCommon::timing::timeDeltaMicroseconds(jaffarCommon::timing::now(), threadTime0);
874 }
875
885 __INLINE__ inputResult_t runNewInput(Runner& r, const void* baseStateData, const InputSet::inputIndex_t input, threadAccumulator_t& acc, const size_t threadId)
886 {
887 // Increasing new state counter
888 acc.newStatesProcessed++;
889
890 // Re-loading base state (slab slot: hot slot + cold path)
892 _stateDb->loadStateFromSlot(r, baseStateData);
893 r.setSearchStep(_currentStep); // base state's depth = current search step (count is not stored per-state)
895
896 // Running input
897 const auto result = runInput(r, input, acc, threadId);
898
899 // Update counters depending on the outcomes
900 if (result == inputResult_t::normal) acc.normalStates++;
901 if (result == inputResult_t::repeated) acc.repeatedStates++;
902 if (result == inputResult_t::failed) acc.failedStates++;
903 if (result == inputResult_t::win) acc.winStates++;
907
908 // Checking whether this state's checkpoint is new
909 const auto stateCheckpointLevel = r.getGame()->getCheckpointLevel();
910 const auto stateCheckpointTolerance = r.getGame()->getCheckpointTolerance();
911 if (stateCheckpointLevel > _checkpointLevel)
912 {
913 _checkpointLevel = stateCheckpointLevel;
914 _checkpointTolerance = stateCheckpointTolerance;
915 _checkpointCutoff = _currentStep + stateCheckpointTolerance;
916 }
917
918 // Returning result
919 return result;
920 }
921
936 __INLINE__ inputResult_t runInput(Runner& r, const InputSet::inputIndex_t input, threadAccumulator_t& acc, const size_t threadId)
937 {
938 // Now advancing state with the provided input
940 r.advanceState(input);
942
943 // Computing runner hash
945 const auto hash = r.computeHash();
947
948 // Checking if hash is repeated (i.e., has been seen before)
950 bool hashExists = _hashDbEnabled ? _hashDb->checkHashExists(hash) : false;
951 JAFFAR_PROF_ACC(acc.checkHash, t3);
952
953 // If state is repeated then we are not interested in it, continue
954 if (hashExists == true) return inputResult_t::repeated;
955
956 // Evaluating game rules based on the new state
958 r.getGame()->evaluateRules();
959
960 // Checking whether this state meets checkpoint
962 {
963 const auto stateCheckpointLevel = r.getGame()->getCheckpointLevel();
964
965 // If state does not meet checkpoint, then do not process it further
966 if (stateCheckpointLevel < _checkpointLevel) return inputResult_t::droppedCheckpoint;
967 }
968
969 // Determining state type
971
972 // Getting state type
973 const auto stateType = r.getGame()->getStateType();
975
976 // Now we have determined the state is not repeated, check if it's not a failed state
977 if (stateType == Game::stateType_t::fail) return inputResult_t::failed;
978
979 // Now that the state is not failed nor repeated, this is effectively a new state to add
981 void* newStateData = _stateDb->getFreeState(threadId);
983
984 // If couldn't get any memory, simply drop the state
985 if (newStateData == nullptr) return inputResult_t::droppedNoStorage;
986
987 // Updating state reward
989 r.getGame()->updateReward();
990
991 // Getting state reward
992 const auto reward = r.getGame()->getReward();
994
995 // If this is a win state, register it and return
996 if (stateType == Game::stateType_t::win)
997 {
999
1000 // Check if the new win state is the best and store it in that case
1001 _stepBestWinStateLock.lock();
1002 if (reward > _stepBestWinState.reward)
1003 {
1004 _stateDb->saveStateFromRunner(r, _stepBestWinState.stateData);
1005 _stepBestWinState.reward = reward;
1006 _stepBestWinState.stepCount = r.getStepCount(); // record depth: the count is not serialized per-state
1007 }
1008 _stepBestWinStateLock.unlock();
1009
1010 // Freeing up the state data
1011 JAFFAR_PROF_DECL(t7);
1012 _stateDb->returnFreeState(newStateData, threadId);
1014
1015 // Returning a win result
1016 return inputResult_t::win;
1017 }
1018
1019 // If this is a normal state and has possible inputs store it in the next state database
1020 if (stateType == Game::stateType_t::normal)
1021 {
1022 // If this is a normal state, push into the state database
1023 JAFFAR_PROF_DECL(t8);
1024 auto success = _stateDb->pushState(reward, r, newStateData);
1026
1027 // If pushing the state failed (e.g. serialization error), drop it and continue, keeping a counter
1028 if (success == false)
1029 {
1030 // Freeing up state memory
1031 JAFFAR_PROF_DECL(t9);
1032 _stateDb->returnFreeState(newStateData, threadId);
1034
1035 // Returning dropped result by failed serialization
1037 }
1038 }
1039
1040 // Checking for manual saved solution is required
1041 const auto currentLastRuleIdx = r.getGame()->getSaveSolutionCurrentLastRuleIdx();
1042 if (r.getGame()->isSaveSolution() && currentLastRuleIdx > _manualSaveSolutionActiveLastRuleId)
1043 {
1044 // Grab lock
1046
1047 // Do this only if reward is better
1048 if (reward > _manualSaveSolution.reward)
1049 {
1050 // Store path, data, and reward
1051 _stateDb->saveStateFromRunner(r, _manualSaveSolution.stateData);
1053 _manualSaveSolution.reward = reward;
1054 _manualSaveSolution.lastRuleIdx = currentLastRuleIdx;
1055 _manualSaveSolution.stepCount = r.getStepCount(); // record depth (the count is not serialized per-state)
1057 }
1058
1059 // Release lock
1060 _manualSaveSolutionLock.unlock();
1061 }
1062
1063 // If store succeeded, return a normal execution
1064 return inputResult_t::normal;
1065 }
1066
1068 jaffarCommon::concurrent::HashMap_t<jaffarCommon::hash::hash_t, jaffarCommon::concurrent::HashSet_t<InputSet::inputIndex_t>> _candidateInputsDetected;
1069
1071 size_t _currentStep = 0;
1072
1074 std::vector<std::unique_ptr<Runner>> _runners;
1075
1077 std::unique_ptr<jaffarPlus::StateDb> _stateDb;
1078
1081 std::shared_ptr<void> _inputHistoryBacking;
1082
1086
1088 std::unique_ptr<jaffarPlus::HashDb> _hashDb;
1089
1092
1095
1098
1099 // Storage for manually triggered save solutionm
1100
1106
1107 // Checkpoint information
1111
1113
1115 std::atomic<size_t> _droppedStatesNoStorage;
1116
1119
1121 std::atomic<size_t> _droppedStatesCheckpoint;
1122
1124 std::atomic<size_t> _repeatedStates;
1125
1127 std::atomic<size_t> _failedStates;
1128
1130 std::atomic<size_t> _winStates;
1131
1133 std::atomic<size_t> _normalStates;
1134
1135 std::atomic<size_t> _stepBaseStatesProcessed;
1136 std::atomic<size_t> _totalBaseStatesProcessed;
1137
1138 std::atomic<size_t> _stepNewStatesProcessed;
1139 std::atomic<size_t> _totalNewStatesProcessed;
1140
1142
1145
1146 std::vector<size_t> _threadStepTime;
1149
1151 std::vector<threadAccumulator_t> _threadAccumulators;
1152
1155
1156 // Time spent advancing runner state per step
1160
1161 // Time spent loading states into the runner
1162 std::atomic<size_t> _runnerStateLoadThreadRawTime;
1163 std::atomic<size_t> _runnerStateLoadAverageTime;
1165
1166 // Time spent saving runner states into the state db
1167 std::atomic<size_t> _runnerStateSaveThreadRawTime;
1168 std::atomic<size_t> _runnerStateSaveAverageTime;
1170
1171 // Time spent calculating hash
1172 std::atomic<size_t> _calculateHashThreadRawTime;
1173 std::atomic<size_t> _calculateHashAverageTime;
1175
1176 // Time spent checking hash
1177 std::atomic<size_t> _checkHashThreadRawTime;
1178 std::atomic<size_t> _checkHashAverageTime;
1180
1181 // Rule checking time
1182 std::atomic<size_t> _ruleCheckingThreadRawTime;
1183 std::atomic<size_t> _ruleCheckingAverageTime;
1185
1186 // Get free state time
1187 std::atomic<size_t> _getFreeStateThreadRawTime;
1188 std::atomic<size_t> _getFreeStateAverageTime;
1190
1191 // Return free state time
1192 std::atomic<size_t> _returnFreeStateThreadRawTime;
1193 std::atomic<size_t> _returnFreeStateAverageTime;
1195
1196 // Reward calculation time
1197 std::atomic<size_t> _calculateRewardThreadRawTime;
1198 std::atomic<size_t> _calculateRewardAverageTime;
1200
1201 // Get allowed inputs time
1203 std::atomic<size_t> _getAllowedInputsAverageTime;
1205
1206 // Get candidate inputs time
1210
1211 // Advance Hash DB time
1212 std::atomic<size_t> _advanceHashDbThreadRawTime;
1213 std::atomic<size_t> _advanceHashDbAverageTime;
1215
1216 // Advance State DB time
1217 std::atomic<size_t> _advanceStateDbThreadRawTime;
1218 std::atomic<size_t> _advanceStateDbAverageTime;
1220
1221 // Popping states from the State DB time
1222 std::atomic<size_t> _popBaseStateDbThreadRawTime;
1223 std::atomic<size_t> _popBaseStateDbAverageTime;
1225
1228};
1229
1230} // namespace jaffarPlus
Parallel state-space search engine.
Definition engine.hpp:57
std::atomic< size_t > _getFreeStateAverageTime
Per-thread-average get-free-state time for the step.
Definition engine.hpp:1188
std::atomic< size_t > _advanceHashDbAverageTime
Hash-DB advance time reported for the step.
Definition engine.hpp:1213
std::atomic< size_t > _getCandidateInputsAverageTime
Per-thread-average get-candidate-inputs time for the step.
Definition engine.hpp:1208
std::atomic< size_t > _checkHashThreadRawTime
Summed per-thread hash-checking time for the step.
Definition engine.hpp:1177
std::atomic< size_t > _advanceStateDbThreadRawTime
Serially-measured state-DB advance time for the step.
Definition engine.hpp:1217
size_t getStateSizeInDatabase() const
Returns the size, in bytes, of a single state as stored in the database.
Definition engine.hpp:674
std::atomic< size_t > _droppedStatesFailedSerialization
Counter for states dropped due to failed serialization.
Definition engine.hpp:1118
std::atomic< size_t > _popBaseStateDbThreadRawTime
Summed per-thread base-state pop time for the step.
Definition engine.hpp:1222
size_t _checkpointTolerance
Tolerance (in steps) associated with the current checkpoint level.
Definition engine.hpp:1109
size_t getInputHistoryMaxMemoryBytes() const
Hard memory ceiling (bytes) of the shared input-history backing; 0 for None/Raw (no ceiling).
Definition engine.hpp:539
std::atomic< size_t > _advanceHashDbThreadRawTime
Serially-measured hash-DB advance time for the step.
Definition engine.hpp:1212
std::atomic< size_t > _getCandidateInputsAverageCumulativeTime
Cumulative per-thread-average get-candidate-inputs time.
Definition engine.hpp:1209
std::atomic< size_t > _checkHashAverageCumulativeTime
Cumulative per-thread-average hash-checking time.
Definition engine.hpp:1179
std::atomic< size_t > _totalBaseStatesProcessed
Base states processed across all steps so far.
Definition engine.hpp:1136
std::atomic< size_t > _returnFreeStateAverageTime
Per-thread-average return-free-state time for the step.
Definition engine.hpp:1193
static constexpr size_t BASE_STATE_BATCH_MAX
Number of base states a worker pulls from the state-DB queue per lock acquisition (batch size).
Definition engine.hpp:698
std::atomic< size_t > _ruleCheckingThreadRawTime
Summed per-thread rule-checking time for the step.
Definition engine.hpp:1182
std::unique_ptr< jaffarPlus::HashDb > _hashDb
Thread-safe hash database used to detect repeated states.
Definition engine.hpp:1088
std::atomic< size_t > _getFreeStateThreadRawTime
Summed per-thread get-free-state time for the step.
Definition engine.hpp:1187
void initialize()
Resets execution back to step zero and clears all databases and counters.
Definition engine.hpp:136
size_t _fullStateSize
Full self-contained serialized state size ([hot]+[history]) for standalone snapshot buffers.
Definition engine.hpp:1094
std::atomic< size_t > _advanceStateDbAverageCumulativeTime
Cumulative state-DB advance time.
Definition engine.hpp:1219
std::string _manualSaveSolutionLastPath
Path of the most recently activated manual-save solution.
Definition engine.hpp:1105
std::atomic< size_t > _runnerStateLoadAverageTime
Per-thread-average state-load time for the step.
Definition engine.hpp:1163
std::atomic< size_t > _stepBaseStatesProcessed
Base states processed during the current step.
Definition engine.hpp:1135
std::atomic< size_t > _calculateRewardAverageTime
Per-thread-average reward-calculation time for the step.
Definition engine.hpp:1198
std::atomic< size_t > _runnerStateSaveAverageTime
Per-thread-average state-save time for the step.
Definition engine.hpp:1168
size_t _maxThreadStepTime
Maximum per-thread step time for the current step.
Definition engine.hpp:1147
std::vector< std::unique_ptr< Runner > > _runners
Collection of runners for the workers to use (one per thread).
Definition engine.hpp:1074
manualSaveSolution_t _manualSaveSolution
Best manually saved solution for the current step.
Definition engine.hpp:1102
size_t _totalRunningTime
Total running time so far, in microseconds.
Definition engine.hpp:1154
std::atomic< size_t > _popBaseStateDbAverageTime
Per-thread-average base-state pop time for the step.
Definition engine.hpp:1223
std::atomic< size_t > _calculateHashAverageCumulativeTime
Cumulative per-thread-average hash-calculation time.
Definition engine.hpp:1174
std::atomic< size_t > _checkHashAverageTime
Per-thread-average hash-checking time for the step.
Definition engine.hpp:1178
size_t _subTotalAverageTime
Sum of all per-operation average times for the current step.
Definition engine.hpp:1226
std::atomic< size_t > _winStates
Counter for win states.
Definition engine.hpp:1130
stateInfo_t _stepBestWinState
Best win state (by reward) found during the current step.
Definition engine.hpp:1097
std::atomic< size_t > _runnerStateSaveThreadRawTime
Summed per-thread state-save time for the step.
Definition engine.hpp:1167
std::atomic< size_t > _calculateRewardAverageCumulativeTime
Cumulative per-thread-average reward-calculation time.
Definition engine.hpp:1199
size_t _currentStepTime
Overall running time of the current step, in microseconds.
Definition engine.hpp:1144
std::atomic< size_t > _runnerStateAdvanceAverageCumulativeTime
Cumulative per-thread-average runner-advance time.
Definition engine.hpp:1159
bool _hashDbEnabled
Whether hashing is enabled. Games that cannot loop skip the hash DB to save memory and computation.
Definition engine.hpp:1084
std::atomic< size_t > _droppedStatesCheckpoint
Counter for states dropped due to not meeting the checkpoint.
Definition engine.hpp:1121
auto getStepBestWinState() const
Returns a copy of the best win state recorded in the current step.
Definition engine.hpp:530
std::atomic< size_t > _popBaseStateDbAverageCumulativeTime
Cumulative per-thread-average base-state pop time.
Definition engine.hpp:1224
std::atomic< size_t > _getAllowedInputsAverageCumulativeTime
Cumulative per-thread-average get-allowed-inputs time.
Definition engine.hpp:1204
jaffarCommon::concurrent::HashMap_t< jaffarCommon::hash::hash_t, jaffarCommon::concurrent::HashSet_t< InputSet::inputIndex_t > > _candidateInputsDetected
Per-base-state-input-hash set of candidate inputs already detected, used to dedup candidate-input pro...
Definition engine.hpp:1068
std::atomic< size_t > _getAllowedInputsThreadRawTime
Summed per-thread get-allowed-inputs time for the step.
Definition engine.hpp:1202
void workerFunction()
Worker body executed in parallel by every thread during a step.
Definition engine.hpp:779
std::vector< threadAccumulator_t > _threadAccumulators
Per-thread accumulators for hot-loop timing/counters, reduced once per step.
Definition engine.hpp:1151
size_t getFullStateSize() const
Full self-contained state size ([hot]+[history]); for standalone snapshots outside the slabs.
Definition engine.hpp:677
std::unique_ptr< jaffarPlus::StateDb > _stateDb
Thread-safe state database holding the current and next step's states.
Definition engine.hpp:1077
std::atomic< size_t > _calculateHashAverageTime
Per-thread-average hash-calculation time for the step.
Definition engine.hpp:1173
std::atomic< size_t > _runnerStateSaveAverageCumulativeTime
Cumulative per-thread-average state-save time.
Definition engine.hpp:1169
bool _manualSaveSolutionUpdatedLastRuleId
Whether the manual-save last-rule id changed this step.
Definition engine.hpp:1103
std::atomic< size_t > _runnerStateLoadThreadRawTime
Summed per-thread state-load time for the step.
Definition engine.hpp:1162
inputResult_t runInput(Runner &r, const InputSet::inputIndex_t input, threadAccumulator_t &acc, const size_t threadId)
Advances the runner by one input and classifies/stores the resulting state.
Definition engine.hpp:936
std::mutex _stepBestWinStateLock
Guards updates to _stepBestWinState.
Definition engine.hpp:1096
std::atomic< size_t > _advanceStateDbAverageTime
State-DB advance time reported for the step.
Definition engine.hpp:1218
std::atomic< size_t > _getAllowedInputsAverageTime
Per-thread-average get-allowed-inputs time for the step.
Definition engine.hpp:1203
auto getManualSaveSolution() const
Returns a copy of the most recent manually saved solution.
Definition engine.hpp:532
inputResult_t runNewInput(Runner &r, const void *baseStateData, const InputSet::inputIndex_t input, threadAccumulator_t &acc, const size_t threadId)
Re-loads the base state, runs a single input, updates per-outcome counters and checkpoint tracking.
Definition engine.hpp:885
size_t _maxThreadStepTimeThreadId
Id of the thread with the maximum step time.
Definition engine.hpp:1148
auto getStateCount() const
Returns the number of states currently held in the state database.
Definition engine.hpp:536
size_t _stateSizeInDatabase
Size of a single state as stored in the database, in bytes.
Definition engine.hpp:1091
ssize_t _manualSaveSolutionActiveLastRuleId
Currently active manual-save last-rule id across steps.
Definition engine.hpp:1104
std::shared_ptr< void > _inputHistoryBacking
The one shared input-history backing (e.g.
Definition engine.hpp:1081
std::atomic< size_t > _repeatedStates
Counter for repeated states (detected via hash collision).
Definition engine.hpp:1124
std::atomic< size_t > _runnerStateAdvanceAverageTime
Per-thread-average runner-advance time for the step.
Definition engine.hpp:1158
std::atomic< size_t > _calculateRewardThreadRawTime
Summed per-thread reward-calculation time for the step.
Definition engine.hpp:1197
std::atomic< size_t > _normalStates
Counter for normal states.
Definition engine.hpp:1133
std::atomic< size_t > _returnFreeStateAverageCumulativeTime
Cumulative per-thread-average return-free-state time.
Definition engine.hpp:1194
inputResult_t
Outcome of running a single input on a base state.
Definition engine.hpp:702
@ normal
Resulting state was a normal state and was stored.
Definition engine.hpp:708
@ repeated
Resulting state's hash was already seen.
Definition engine.hpp:703
@ win
Resulting state was a win state.
Definition engine.hpp:709
@ droppedNoStorage
No free state slot was available to store the new state.
Definition engine.hpp:704
@ droppedFailedSerialization
Pushing the state into the database failed (e.g. serialization error).
Definition engine.hpp:705
@ failed
Resulting state was classified as a loss.
Definition engine.hpp:707
@ droppedCheckpoint
State did not meet the current checkpoint level past the cutoff step.
Definition engine.hpp:706
void runStep()
Runs a single search step: expands all current base states in parallel and advances the databases.
Definition engine.hpp:337
std::atomic< size_t > _failedStates
Counter for failed states (reached a point in the game considered a loss).
Definition engine.hpp:1127
size_t _currentStep
Counter for the current step.
Definition engine.hpp:1071
std::atomic< size_t > _returnFreeStateThreadRawTime
Summed per-thread return-free-state time for the step.
Definition engine.hpp:1192
std::atomic< size_t > _stepNewStatesProcessed
New states processed during the current step.
Definition engine.hpp:1138
std::atomic< size_t > _getFreeStateAverageCumulativeTime
Cumulative per-thread-average get-free-state time.
Definition engine.hpp:1189
size_t _subTotalAverageCumulativeTime
Sum of all per-operation cumulative average times.
Definition engine.hpp:1227
std::atomic< size_t > _advanceHashDbAverageCumulativeTime
Cumulative hash-DB advance time.
Definition engine.hpp:1214
std::vector< size_t > _threadStepTime
Per-thread running time of the current step, in microseconds.
Definition engine.hpp:1146
std::atomic< size_t > _runnerStateLoadAverageCumulativeTime
Cumulative per-thread-average state-load time.
Definition engine.hpp:1164
bool isInputHistoryExhausted() const
True if the shared input-history backing (the Trie) has hit its hard node-storage ceiling.
Definition engine.hpp:543
void printInfo()
Logs engine status: timing breakdown, throughput, state counts, checkpoints, databases,...
Definition engine.hpp:548
auto & getStateDb() const
Returns a reference to the owned state database.
Definition engine.hpp:528
size_t _checkpointCutoff
Step index after which states below _checkpointLevel are dropped.
Definition engine.hpp:1110
std::atomic< size_t > _ruleCheckingAverageCumulativeTime
Cumulative per-thread-average rule-checking time.
Definition engine.hpp:1184
std::atomic< size_t > _runnerStateAdvanceThreadRawTime
Summed per-thread runner-advance time for the step.
Definition engine.hpp:1157
size_t _baseStateBatch
Active base-state pull batch size ("Base State Batch Size").
Definition engine.hpp:1085
auto getWinStatesFound() const
Returns the cumulative number of win states found so far.
Definition engine.hpp:534
std::atomic< size_t > _calculateHashThreadRawTime
Summed per-thread hash-calculation time for the step.
Definition engine.hpp:1172
size_t getInputHistoryApproxMemoryBytes() const
Current (approximate) live memory (bytes) of the shared input-history backing; 0 for None/Raw.
Definition engine.hpp:541
std::atomic< size_t > _ruleCheckingAverageTime
Per-thread-average rule-checking time for the step.
Definition engine.hpp:1183
std::atomic< size_t > _droppedStatesNoStorage
Counter for states dropped due to lack of free states.
Definition engine.hpp:1115
Engine(const nlohmann::json &emulatorConfig, const nlohmann::json &gameConfig, const nlohmann::json &runnerConfig, const nlohmann::json &engineConfig)
Constructs the engine, building one runner per worker thread and the state/hash databases.
Definition engine.hpp:67
std::atomic< size_t > _totalNewStatesProcessed
New states processed across all steps so far.
Definition engine.hpp:1139
size_t _checkpointLevel
Highest checkpoint level reached so far.
Definition engine.hpp:1108
std::mutex _manualSaveSolutionLock
Guards updates to _manualSaveSolution.
Definition engine.hpp:1101
~Engine()
Frees the best-win and manual-save state buffers allocated in initialize.
Definition engine.hpp:518
std::atomic< size_t > _getCandidateInputsThreadRawTime
Summed per-thread get-candidate-inputs time for the step.
Definition engine.hpp:1207
size_t getCheckpointTolerance() const
Returns the current state's checkpoint tolerance.
Definition game.hpp:607
bool isSaveSolution() const
Indicates whether the current state should trigger a save solution.
Definition game.hpp:613
ssize_t getSaveSolutionCurrentLastRuleIdx() const
Returns the current last rule index that set a save solution.
Definition game.hpp:619
void updateReward()
Recomputes the current state's reward from the satisfied rules.
Definition game.hpp:447
size_t getCheckpointLevel() const
Returns the current state's checkpoint level.
Definition game.hpp:604
void evaluateRules()
Evaluates the rule set against the current state.
Definition game.hpp:351
float getReward() const
Returns the current state's reward.
Definition game.hpp:584
void updateGameStateType()
Recomputes the state type and checkpoint level from the satisfied rules.
Definition game.hpp:405
stateType_t getStateType() const
Returns the current state type (normal, win or fail).
Definition game.hpp:601
@ normal
No win or fail rule is currently satisfied.
Definition game.hpp:44
@ fail
A fail rule is currently satisfied.
Definition game.hpp:46
@ win
A win rule is currently satisfied.
Definition game.hpp:45
const std::string getSaveSolutionPath() const
Returns the save path of the rule that activated the current save solution.
Definition game.hpp:625
size_t inputIndex_t
Type used to index an input.
Definition inputSet.hpp:29
Owns a Game instance and advances it according to configured inputs.
Definition runner.hpp:38
static std::unique_ptr< Runner > getRunner(const nlohmann::json &emulatorConfig, const nlohmann::json &gameConfig, const nlohmann::json &runnerConfig)
Creates a runner from the emulator, game and runner configurations.
Definition runner.hpp:527
void advanceState(const InputSet::inputIndex_t inputIdx)
Advances the game by one input, then by the configured number of frameskip frames.
Definition runner.hpp:309
void setSearchStep(const size_t searchStep)
Sets the step counter from a search step.
Definition runner.hpp:351
jaffarCommon::hash::hash_t computeHash() const
Computes a hash of the current runner state.
Definition runner.hpp:426
Game * getGame() const
Returns a pointer to the owned game instance.
Definition runner.hpp:516
size_t getStepCount() const
Returns the current step counter (number of inputs applied / the state's depth).
Definition runner.hpp:354
#define JAFFAR_PROF_ACC(field, var)
Accumulates the microseconds elapsed since timestamp var into field.
Definition engine.hpp:37
#define JAFFAR_PROF_DECL(var)
Declares a timestamp variable var holding the current time (detailed-profiling build).
Definition engine.hpp:35
Abstract base for a JaffarPlus game: wraps an emulator, registers game properties,...
Two-tier (per-domain L1 + shared global L2) hash database used to deduplicate visited search states,...
NUMA topology detection: distance/preference matrices and per-domain delegate-thread selection,...
void initializeNUMA()
Initializes NUMA / core-affinity state.
Definition numa.hpp:44
Drives a Game forward one input at a time, managing the allowed/candidate input sets,...
Per-NUMA-domain database of serialized game states, with reward-ordered queues that feed the search o...
A manually saved solution: its input path, reward, serialized state, and triggering rule index.
Definition engine.hpp:722
ssize_t lastRuleIdx
Index of the last rule active when the state was saved.
Definition engine.hpp:726
float reward
Reward of the saved state.
Definition engine.hpp:724
std::string path
Input sequence (solution path) that reached the saved state.
Definition engine.hpp:723
size_t stepCount
Depth (input count) of the saved state, recorded at capture.
Definition engine.hpp:727
void * stateData
Raw buffer holding the serialized saved state, or nullptr if unset.
Definition engine.hpp:725
A reward value paired with a serialized state buffer.
Definition engine.hpp:714
size_t stepCount
Depth (input count) of the saved state, recorded at capture (the count is not serialized per-state).
Definition engine.hpp:717
void * stateData
Raw buffer holding the serialized state, or nullptr if unset.
Definition engine.hpp:716
float reward
Reward associated with the stored state.
Definition engine.hpp:715
Per-thread accumulator for timing and counters.
Definition engine.hpp:741
size_t normalStates
Number of normal states produced.
Definition engine.hpp:759
size_t runnerStateAdvance
Time spent advancing the runner state with an input.
Definition engine.hpp:743
size_t baseStatesProcessed
Number of base states this thread expanded.
Definition engine.hpp:757
size_t calculateHash
Time spent computing state hashes.
Definition engine.hpp:746
size_t getAllowedInputs
Time spent querying the runner's allowed inputs.
Definition engine.hpp:752
size_t droppedStatesCheckpoint
Number of states dropped for not meeting the checkpoint.
Definition engine.hpp:765
void reset()
Resets all timers and counters to zero.
Definition engine.hpp:768
size_t getCandidateInputs
Time spent querying the runner's candidate inputs.
Definition engine.hpp:753
size_t droppedStatesFailedSerialization
Number of states dropped due to failed serialization.
Definition engine.hpp:764
size_t runnerStateLoad
Time spent loading states into the runner.
Definition engine.hpp:744
size_t repeatedStates
Number of states dropped as repeated.
Definition engine.hpp:760
size_t runnerStateSave
Time spent saving runner states into the state database.
Definition engine.hpp:745
size_t newStatesProcessed
Number of new states this thread produced via inputs.
Definition engine.hpp:758
size_t popBaseStateDb
Time spent popping base-state batches from the state database.
Definition engine.hpp:754
size_t failedStates
Number of states classified as failures.
Definition engine.hpp:761
size_t calculateReward
Time spent computing state rewards.
Definition engine.hpp:751
size_t checkHash
Time spent checking hashes against the hash database.
Definition engine.hpp:747
size_t returnFreeState
Time spent returning state slots to the free queue.
Definition engine.hpp:750
size_t droppedStatesNoStorage
Number of states dropped for lack of free storage.
Definition engine.hpp:763
size_t ruleChecking
Time spent evaluating rules and determining state type.
Definition engine.hpp:748
size_t winStates
Number of win states produced.
Definition engine.hpp:762
size_t getFreeState
Time spent acquiring free state slots.
Definition engine.hpp:749