misc: Remove WinProcGroup
This commit is contained in:
parent
bd835e3800
commit
3ca016e952
113
src/misc.cpp
113
src/misc.cpp
|
@ -134,119 +134,6 @@ void prefetch_range(void *addr, size_t len)
|
|||
#endif
|
||||
|
||||
|
||||
namespace WinProcGroup
|
||||
{
|
||||
|
||||
#ifndef _WIN32
|
||||
|
||||
void bindThisThread(size_t)
|
||||
{
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/// best_group() retrieves logical processor information using Windows specific
|
||||
/// API and returns the best group id for the thread with index idx. Original
|
||||
/// code from Texel by Peter Österlund.
|
||||
|
||||
int best_group(size_t idx)
|
||||
{
|
||||
int threads = 0;
|
||||
int nodes = 0;
|
||||
int cores = 0;
|
||||
DWORD returnLength = 0;
|
||||
DWORD byteOffset = 0;
|
||||
|
||||
// Early exit if the needed API is not available at runtime
|
||||
HMODULE k32 = GetModuleHandle(L"Kernel32.dll");
|
||||
if (k32 == nullptr)
|
||||
return -1;
|
||||
auto fun1 = (fun1_t)(void(*)())GetProcAddress(k32, "GetLogicalProcessorInformationEx");
|
||||
if (!fun1)
|
||||
return -1;
|
||||
|
||||
// First call to get returnLength. We expect it to fail due to null buffer
|
||||
if (fun1(RelationAll, nullptr, &returnLength))
|
||||
return -1;
|
||||
|
||||
// Once we know returnLength, allocate the buffer
|
||||
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr;
|
||||
ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)malloc(returnLength);
|
||||
|
||||
if (ptr == nullptr)
|
||||
return -1;
|
||||
|
||||
// Second call, now we expect to succeed
|
||||
if (!fun1(RelationAll, buffer, &returnLength)) {
|
||||
free(buffer);
|
||||
return -1;
|
||||
}
|
||||
|
||||
while (byteOffset < returnLength) {
|
||||
if (ptr->Relationship == RelationNumaNode)
|
||||
nodes++;
|
||||
|
||||
else if (ptr->Relationship == RelationProcessorCore) {
|
||||
cores++;
|
||||
threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1;
|
||||
}
|
||||
|
||||
assert(ptr->Size);
|
||||
byteOffset += ptr->Size;
|
||||
ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)(((char *)ptr) + ptr->Size);
|
||||
}
|
||||
|
||||
free(buffer);
|
||||
|
||||
std::vector<int> groups;
|
||||
|
||||
// Run as many threads as possible on the same node until core limit is
|
||||
// reached, then move on filling the next node.
|
||||
for (int n = 0; n < nodes; n++)
|
||||
for (int i = 0; i < cores / nodes; i++)
|
||||
groups.push_back(n);
|
||||
|
||||
// In case a core has more than one logical processor (we assume 2) and we
|
||||
// have still threads to allocate, then spread them evenly across available
|
||||
// nodes.
|
||||
for (int t = 0; t < threads - cores; t++)
|
||||
groups.push_back(t % nodes);
|
||||
|
||||
// If we still have more threads than the total number of logical processors
|
||||
// then return -1 and let the OS to decide what to do.
|
||||
return idx < groups.size() ? groups[idx] : -1;
|
||||
}
|
||||
|
||||
|
||||
/// bindThisThread() set the group affinity of the current thread
|
||||
|
||||
void bindThisThread(size_t idx)
|
||||
{
|
||||
// Use only local variables to be thread-safe
|
||||
const int group = best_group(idx);
|
||||
|
||||
if (group == -1)
|
||||
return;
|
||||
|
||||
// Early exit if the needed API are not available at runtime
|
||||
HMODULE k32 = GetModuleHandle(L"Kernel32.dll");
|
||||
if (k32 == nullptr)
|
||||
return;
|
||||
auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx");
|
||||
auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity");
|
||||
|
||||
if (!fun2 || !fun3)
|
||||
return;
|
||||
|
||||
GROUP_AFFINITY affinity;
|
||||
if (fun2((USHORT)group, &affinity))
|
||||
fun3(GetCurrentThread(), &affinity, nullptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace WinProcGroup
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <direct.h>
|
||||
#define GETCWD _getcwd
|
||||
|
|
10
src/misc.h
10
src/misc.h
|
@ -134,16 +134,6 @@ constexpr uint64_t mul_hi64(uint64_t a, uint64_t b) {
|
|||
#endif
|
||||
}
|
||||
|
||||
/// Under Windows it is not possible for a process to run on more than one
|
||||
/// logical processor group. This usually means to be limited to use max 64
|
||||
/// cores. To overcome this, some special platform specific API should be
|
||||
/// called to set group affinity for each thread. Original code from Texel by
|
||||
/// Peter Österlund.
|
||||
|
||||
namespace WinProcGroup {
|
||||
void bindThisThread(size_t idx);
|
||||
}
|
||||
|
||||
namespace CommandLine {
|
||||
void init(int argc, char* argv[]);
|
||||
|
||||
|
|
Loading…
Reference in New Issue