diff --git a/src/misc.cpp b/src/misc.cpp index d9cbf513..5ac93e33 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -134,119 +134,6 @@ void prefetch_range(void *addr, size_t len) #endif -namespace WinProcGroup -{ - -#ifndef _WIN32 - -void bindThisThread(size_t) -{ -} - -#else - -/// best_group() retrieves logical processor information using Windows specific -/// API and returns the best group id for the thread with index idx. Original -/// code from Texel by Peter Österlund. - -int best_group(size_t idx) -{ - int threads = 0; - int nodes = 0; - int cores = 0; - DWORD returnLength = 0; - DWORD byteOffset = 0; - - // Early exit if the needed API is not available at runtime - HMODULE k32 = GetModuleHandle(L"Kernel32.dll"); - if (k32 == nullptr) - return -1; - auto fun1 = (fun1_t)(void(*)())GetProcAddress(k32, "GetLogicalProcessorInformationEx"); - if (!fun1) - return -1; - - // First call to get returnLength. We expect it to fail due to null buffer - if (fun1(RelationAll, nullptr, &returnLength)) - return -1; - - // Once we know returnLength, allocate the buffer - SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr; - ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)malloc(returnLength); - - if (ptr == nullptr) - return -1; - - // Second call, now we expect to succeed - if (!fun1(RelationAll, buffer, &returnLength)) { - free(buffer); - return -1; - } - - while (byteOffset < returnLength) { - if (ptr->Relationship == RelationNumaNode) - nodes++; - - else if (ptr->Relationship == RelationProcessorCore) { - cores++; - threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1; - } - - assert(ptr->Size); - byteOffset += ptr->Size; - ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)(((char *)ptr) + ptr->Size); - } - - free(buffer); - - std::vector groups; - - // Run as many threads as possible on the same node until core limit is - // reached, then move on filling the next node. - for (int n = 0; n < nodes; n++) - for (int i = 0; i < cores / nodes; i++) - groups.push_back(n); - - // In case a core has more than one logical processor (we assume 2) and we - // have still threads to allocate, then spread them evenly across available - // nodes. - for (int t = 0; t < threads - cores; t++) - groups.push_back(t % nodes); - - // If we still have more threads than the total number of logical processors - // then return -1 and let the OS to decide what to do. - return idx < groups.size() ? groups[idx] : -1; -} - - -/// bindThisThread() set the group affinity of the current thread - -void bindThisThread(size_t idx) -{ - // Use only local variables to be thread-safe - const int group = best_group(idx); - - if (group == -1) - return; - - // Early exit if the needed API are not available at runtime - HMODULE k32 = GetModuleHandle(L"Kernel32.dll"); - if (k32 == nullptr) - return; - auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx"); - auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity"); - - if (!fun2 || !fun3) - return; - - GROUP_AFFINITY affinity; - if (fun2((USHORT)group, &affinity)) - fun3(GetCurrentThread(), &affinity, nullptr); -} - -#endif - -} // namespace WinProcGroup - #ifdef _WIN32 #include #define GETCWD _getcwd diff --git a/src/misc.h b/src/misc.h index cc38a429..04357714 100644 --- a/src/misc.h +++ b/src/misc.h @@ -134,16 +134,6 @@ constexpr uint64_t mul_hi64(uint64_t a, uint64_t b) { #endif } -/// Under Windows it is not possible for a process to run on more than one -/// logical processor group. This usually means to be limited to use max 64 -/// cores. To overcome this, some special platform specific API should be -/// called to set group affinity for each thread. Original code from Texel by -/// Peter Österlund. - -namespace WinProcGroup { - void bindThisThread(size_t idx); -} - namespace CommandLine { void init(int argc, char* argv[]);