288 lines
9.7 KiB
C++
288 lines
9.7 KiB
C++
/*
|
|
This file is part of Sanmill.
|
|
Copyright (C) 2019-2021 The Sanmill developers (see AUTHORS file)
|
|
|
|
Sanmill is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
Sanmill is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#ifndef HASH_MAP_H_
|
|
#define HASH_MAP_H_
|
|
|
|
#include <cstdint>
|
|
#include <iostream>
|
|
#include <functional>
|
|
#include <mutex>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <cstring>
|
|
#include "hashnode.h"
|
|
#include "misc.h"
|
|
#include "types.h"
|
|
#include "config.h"
|
|
|
|
#define HASH_KEY_DISABLE
|
|
|
|
constexpr size_t HASH_SIZE_DEFAULT = 1031; // A prime number as key size gives a better distribution of values in buckets
|
|
|
|
namespace CTSL // Concurrent Thread Safe Library
|
|
{
|
|
// The class representing the key map.
|
|
// It is expected for user defined types, the key function will be provided.
|
|
// By default, the std::key function will be used
|
|
// If the key size is not provided, then a default size of 1031 will be used
|
|
// The key table itself consists of an array of key buckets.
|
|
// Each key bucket is implemented as singly linked list with the head as a dummy node created
|
|
// during the creation of the bucket. All the key buckets are created during the construction of the map.
|
|
// Locks are taken per bucket, hence multiple threads can write simultaneously in different buckets in the key map
|
|
#ifdef HASH_KEY_DISABLE
|
|
#define hashFn Key
|
|
template <typename K, typename V>
|
|
#else
|
|
template <typename K, typename V, typename F = std::key<K> >
|
|
#endif
|
|
class HashMap
|
|
{
|
|
public:
|
|
HashMap(hashFn hashSize_ = HASH_SIZE_DEFAULT) : hashSize(hashSize_)
|
|
{
|
|
#ifdef DISABLE_HASHBUCKET
|
|
#ifdef ALIGNED_LARGE_PAGES
|
|
hashTable = (HashNode<K, V>*)aligned_large_pages_alloc(sizeof(HashNode<K, V>) * hashSize);
|
|
#else
|
|
hashTable = new HashNode<K, V>[hashSize]; // Create the key table as an array of key nodes
|
|
#endif // ALIGNED_LARGE_PAGES
|
|
|
|
memset(hashTable, 0, sizeof(HashNode<K, V>) * hashSize);
|
|
#else
|
|
hashTable = new HashBucket<K, V>[hashSize]; //create the key table as an array of key buckets
|
|
#endif
|
|
}
|
|
|
|
~HashMap()
|
|
{
|
|
#ifdef ALIGNED_LARGE_PAGES
|
|
aligned_large_pages_free(hashTable);
|
|
#else
|
|
delete [] hashTable;
|
|
#endif
|
|
}
|
|
// Copy and Move of the HashMap are not supported at this moment
|
|
HashMap(const HashMap&) = delete;
|
|
HashMap(HashMap&&) = delete;
|
|
HashMap& operator=(const HashMap&) = delete;
|
|
HashMap& operator=(HashMap&&) = delete;
|
|
|
|
// Function to find an entry in the key map matching the key.
|
|
// If key is found, the corresponding value is copied into the parameter "value" and function returns true.
|
|
// If key is not found, function returns false.
|
|
bool find(const K &key, V &value) const
|
|
{
|
|
K hashValue = hashFn(key) & (hashSize - 1) ;
|
|
#ifdef DISABLE_HASHBUCKET
|
|
// A shared mutex is used to enable multiple concurrent reads
|
|
#ifndef HASHMAP_NOLOCK
|
|
std::shared_lock<std::shared_timed_mutex> lock(mutex_);
|
|
#endif /* HASHMAP_NOLOCK */
|
|
|
|
if (hashTable[hashValue].getKey() == key) {
|
|
value = hashTable[hashValue].getValue();
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
#else
|
|
return hashTable[hashValue].find(key, value);
|
|
#endif
|
|
}
|
|
|
|
void prefetchValue(const K &key)
|
|
{
|
|
K hashValue = hashFn(key) & (hashSize - 1);
|
|
V *addr = &(hashTable[hashValue].getValue());
|
|
|
|
prefetch((void *)addr);
|
|
}
|
|
|
|
// Function to insert into the key map.
|
|
// If key already exists, update the value, else insert a new node in the bucket with the <key, value> pair.
|
|
K insert(const K &key, const V &value)
|
|
{
|
|
K hashValue = hashFn(key) & (hashSize - 1);
|
|
#ifdef DISABLE_HASHBUCKET
|
|
#ifndef HASHMAP_NOLOCK
|
|
std::unique_lock<std::shared_timed_mutex> lock(mutex_);
|
|
#endif /* HASHMAP_NOLOCK */
|
|
hashTable[hashValue].setKey(key);
|
|
hashTable[hashValue].setValue(value);
|
|
#else
|
|
hashTable[hashValue].insert(key, value);
|
|
#endif
|
|
return hashValue;
|
|
}
|
|
|
|
// Function to remove an entry from the bucket, if found
|
|
void erase(
|
|
#ifndef DISABLE_HASHBUCKET
|
|
const K &key
|
|
#endif
|
|
)
|
|
{
|
|
#ifdef DISABLE_HASHBUCKET
|
|
// std::unique_lock<std::shared_timed_mutex> lock(mutex_);
|
|
#else
|
|
size_t hashValue = hashFn(key) & (hashSize - 1);
|
|
hashTable[hashValue].erase(key);
|
|
#endif
|
|
}
|
|
|
|
|
|
// Function to clean up the hasp map, i.e., remove all entries from it
|
|
void clear()
|
|
{
|
|
#ifdef DISABLE_HASHBUCKET
|
|
memset(hashTable, 0, sizeof(HashNode<K, V>) * hashSize);
|
|
#else
|
|
for(size_t i = 0; i < hashSize; i++)
|
|
{
|
|
(hashTable[i]).clear();
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void resize(size_t size)
|
|
{
|
|
// TODO: Resize
|
|
if (size < 0x1000000) {
|
|
// New size is too small, do not resize
|
|
return;
|
|
}
|
|
|
|
#ifdef TRANSPOSITION_TABLE_64BIT_KEY
|
|
hashSize = size;
|
|
#else
|
|
hashSize = (uint32_t)size;
|
|
#endif // TRANSPOSITION_TABLE_64BIT_KEY
|
|
return;
|
|
}
|
|
|
|
// Function to dump the key map to file
|
|
void dump(const std::string &filename)
|
|
{
|
|
#ifdef DISABLE_HASHBUCKET
|
|
std::ofstream file;
|
|
file.open(filename, std::ios::out);
|
|
file.write((char *)(hashTable), sizeof(HashNode<K, V>) * hashSize);
|
|
file.close();
|
|
#endif
|
|
}
|
|
|
|
//Function to load the key map from file
|
|
void load(const std::string &filename)
|
|
{
|
|
#ifdef DISABLE_HASHBUCKET
|
|
std::ifstream file;
|
|
file.open(filename, std::ios::in);
|
|
file.read((char *)(hashTable), sizeof(HashNode<K, V>) * hashSize);
|
|
file.close();
|
|
|
|
stat();
|
|
#endif
|
|
}
|
|
|
|
void merge(const HashMap &other)
|
|
{
|
|
size_t ksize = sizeof(K);
|
|
size_t nsize = sizeof(HashNode<K, V>);
|
|
|
|
size_t nProcessed = 0;
|
|
size_t nMerged = 0;
|
|
size_t nSkip = 0;
|
|
size_t nAllSame = 0;
|
|
size_t nOnlyKeySame = 0;
|
|
size_t nDiff = 0;
|
|
|
|
char empty[sizeof(HashNode<K, V>)];
|
|
memset(empty, 0, nsize);
|
|
|
|
size_t nBefore = stat();
|
|
|
|
for (size_t i = 0; i < hashSize; i++) {
|
|
size_t offset = i * nsize;
|
|
if (memcmp((char *)other.hashTable + offset, empty, ksize)) {
|
|
nProcessed++;
|
|
if (!memcmp((char *)hashTable + offset, empty, ksize)) {
|
|
memcpy((char *)hashTable + offset, (char *)other.hashTable + offset, nsize);
|
|
nMerged++;
|
|
} else {
|
|
nSkip++;
|
|
if (!memcmp((char *)other.hashTable + offset, (char *)hashTable + offset, nsize)) {
|
|
nAllSame++;
|
|
} else if (!memcmp((char *)other.hashTable + offset, (char *)hashTable + offset, ksize)) {
|
|
nOnlyKeySame++;
|
|
} else {
|
|
nDiff++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
size_t nAfter = stat();
|
|
|
|
loggerDebug("[key merge]\nnProcessed = %lld, nMerged = %lld,\n"
|
|
"nSkip = %lld (nAllSame = %lld, nOnlyKeySame = %lld, nDiff = %lld)\n"
|
|
"hashSize = %d, nBefore = %lld (%f%%), nAfter = %lld (%f%%)\n",
|
|
nProcessed, nMerged, nSkip, nAllSame, nOnlyKeySame, nDiff,
|
|
hashSize, nBefore, (double)nBefore * 100 / hashSize, nAfter, (double)nAfter * 100 / hashSize);
|
|
}
|
|
|
|
size_t stat()
|
|
{
|
|
size_t nEntries = 0;
|
|
|
|
size_t size = sizeof(HashNode<K, V>);
|
|
char empty[sizeof(HashNode<K, V>)];
|
|
memset(empty, 0, size);
|
|
|
|
for (size_t i = 0; i < hashSize; i++) {
|
|
if (memcmp((char *)hashTable + i * size, empty, size)) {
|
|
nEntries++;
|
|
}
|
|
}
|
|
|
|
loggerDebug("Hash map loaded from file (%lld/%d entries)\n", nEntries, hashSize);
|
|
|
|
return nEntries;
|
|
}
|
|
|
|
private:
|
|
#ifdef DISABLE_HASHBUCKET
|
|
HashNode<K, V> *hashTable;
|
|
#else
|
|
HashBucket<K, V> * hashTable;
|
|
#endif
|
|
#ifdef HASH_KEY_DISABLE
|
|
#else
|
|
F hashFn;
|
|
#endif
|
|
hashFn hashSize;
|
|
#ifdef DISABLE_HASHBUCKET
|
|
#ifndef HASHMAP_NOLOCK
|
|
mutable std::shared_timed_mutex mutex_;
|
|
#endif /* HASHMAP_NOLOCK */
|
|
#endif
|
|
};
|
|
}
|
|
#endif // HASH_MAP_H_
|
|
|