JaffarPlus
High-performance best-first search optimizer for tool-assisted speedruns
Loading...
Searching...
No Matches
numa.hpp
Go to the documentation of this file.
1#pragma once
2
10#include <cstdlib>
11#include <jaffarCommon/exceptions.hpp>
12#include <jaffarCommon/parallel.hpp>
13#include <map>
14#include <mutex>
15#include <numa.h>
16#include <vector>
17
18namespace jaffarPlus
19{
20
21static int _numaCount;
22
23static int _threadCount;
24
25static std::vector<std::vector<size_t>> _numaDistanceMatrix;
26
27static std::vector<std::vector<size_t>>
29
30static std::vector<ssize_t> _numaDelegateThreadId;
31
32static thread_local int _preferredNumaDomain;
33
34static thread_local int _myThreadId;
35
44__INLINE__ void initializeNUMA()
45{
46 // Getting number of threads
47 _threadCount = jaffarCommon::parallel::getMaxThreadCount();
48
49 // Getting number of numa domains (serial execution collapses to a single domain)
50 if (_threadCount == 1)
51 _numaCount = 1;
52 else
53 _numaCount = numa_max_node() + 1;
54
55 // Checking whether the numa library calls are available
56 const auto numaAvailable = numa_available();
57 if (numaAvailable != 0) JAFFAR_THROW_RUNTIME("The system does not provide NUMA detection support.");
58
59 // Overriding thread count, if requested
60 if (auto* value = std::getenv("JAFFAR_ENGINE_OVERRIDE_MAX_THREAD_COUNT")) jaffarCommon::parallel::setThreadCount(std::stoul(value));
61
62 // Check NUMA distances
64 for (ssize_t i = 0; i < _numaCount; i++) _numaDistanceMatrix[i].resize(_numaCount);
65 for (ssize_t i = 0; i < _numaCount; i++)
66 for (ssize_t j = 0; j < _numaCount; j++) _numaDistanceMatrix[i][j] = (size_t)numa_distance(i, j);
67
68 // printf("NUMA Distances:\n");
69 // for(ssize_t i = 0; i < _numaCount; i++)
70 // {
71 // for(ssize_t j = 0; j < _numaCount; j++)
72 // {
73 // printf("%3lu ", _numaDistanceMatrix[i][j]);
74 // }
75 // printf("\n");
76 // }
77
78 // Establishing NUMA preference matrix
80 for (ssize_t i = 0; i < _numaCount; i++)
81 {
82 // Putting NUMA domains in the same group based on distance
83 std::map<size_t, std::vector<int>> _localPreferenceMap;
84 for (ssize_t j = 0; j < _numaCount; j++) _localPreferenceMap[_numaDistanceMatrix[i][j]].push_back(j);
85
86 // Assigning NUMA domains based on local preference
87 for (const auto& entry : _localPreferenceMap)
88 {
89 const auto& localPreferenceGroup = entry.second;
90 for (ssize_t j = 0; j < (ssize_t)localPreferenceGroup.size(); j++)
91 {
92 const size_t offset = (j + i) % localPreferenceGroup.size();
93 _numaPreferenceMatrix[i].push_back(localPreferenceGroup[offset]);
94 }
95 }
96 }
97
98 // printf("NUMA Locality Preferences:\n");
99 // for(ssize_t i = 0; i < _numaCount; i++)
100 // {
101 // for(ssize_t j = 0; j < _numaCount; j++)
102 // {
103 // printf("%3lu ", _numaPreferenceMatrix[i][j]);
104 // }
105 // printf("\n");
106 // }
107
109 // Initializing numa delegate thread storage
111 for (int i = 0; i < _numaCount; i++) _numaDelegateThreadId[i] = -1;
112
113 // Detecting thread-specific NUMA information
114 std::mutex workMutex;
115 JAFFAR_PARALLEL
116 {
117 // Identify this worker by its dense OpenMP thread id (unique 0..N-1), NOT sched_getcpu(): the CPU
118 // id collides whenever threads share a core -- under oversubscription, or simply unpinned
119 // scheduling on CI runners that don't set OMP_PROC_BIND. A colliding id corrupts the per-NUMA
120 // delegate selection below (and StateDb's delegate-gated queue allocation), leaving a domain with
121 // no queues so the search immediately "runs out of states". The preferred NUMA domain still comes
122 // from the actual CPU the thread is currently on.
123 _myThreadId = jaffarCommon::parallel::getThreadId();
124 _preferredNumaDomain = numa_node_of_cpu(sched_getcpu());
125
126 // Setting myself as numa delegate, if I'm the first thread seen in this numa domain
127 workMutex.lock();
129 workMutex.unlock();
130 }
131}
132
133} // namespace jaffarPlus
static thread_local int _preferredNumaDomain
Thread-local preferred NUMA domain (the NUMA node of the CPU the thread currently runs on).
Definition numa.hpp:32
static int _numaCount
Number of NUMA domains detected.
Definition numa.hpp:21
static std::vector< std::vector< size_t > > _numaPreferenceMatrix
NUMA preference matrix; row i lists domains ordered by ascending distance from domain i (ties rotated...
Definition numa.hpp:28
void initializeNUMA()
Initializes NUMA / core-affinity state.
Definition numa.hpp:44
static int _threadCount
Number of threads to use.
Definition numa.hpp:23
static std::vector< std::vector< size_t > > _numaDistanceMatrix
NUMA distance matrix; entry [i][j] is the reported distance from domain i to domain j.
Definition numa.hpp:25
static std::vector< ssize_t > _numaDelegateThreadId
Delegate thread (OpenMP thread id) per NUMA domain, or -1 if none assigned.
Definition numa.hpp:30
static thread_local int _myThreadId
Thread-local dense OpenMP thread id of the current thread.
Definition numa.hpp:34