chore(denseset): Simplify DenseSet, remove empty links. (#339)

This commit is contained in:
Roman Gershman 2022-10-01 06:58:37 +03:00 committed by GitHub
parent 7715ee06c6
commit 44f98de128
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 287 additions and 235 deletions

View File

@ -25,6 +25,35 @@ constexpr size_t kMinSizeShift = 2;
constexpr size_t kMinSize = 1 << kMinSizeShift;
constexpr bool kAllowDisplacements = true;
DenseSet::IteratorBase::IteratorBase(const DenseSet* owner, ChainVectorIterator begin_list)
: owner_(*owner), curr_list_(begin_list) {
if (begin_list != owner->entries_.end()) {
curr_entry_ = *begin_list;
// find the first non null entry
if (curr_entry_.IsEmpty()) {
Advance();
}
}
}
void DenseSet::IteratorBase::Advance() {
if (curr_entry_.IsLink()) {
curr_entry_ = curr_entry_.AsLink()->next;
DCHECK(!curr_entry_.IsEmpty());
} else {
DCHECK(curr_list_ != owner_.entries_.end());
do {
++curr_list_;
if (curr_list_ == owner_.entries_.end()) {
curr_entry_.Reset();
return;
}
} while (curr_list_->IsEmpty());
DCHECK(curr_list_ != owner_.entries_.end());
curr_entry_ = *curr_list_;
}
}
DenseSet::DenseSet(pmr::memory_resource* mr) : entries_(mr) {
}
@ -36,27 +65,34 @@ size_t DenseSet::PushFront(DenseSet::ChainVectorIterator it, void* data) {
// if this is an empty list assign the value to the empty placeholder pointer
if (it->IsEmpty()) {
it->SetObject(data);
return ObjectAllocSize(data);
} else {
// otherwise make a new link and connect it to the front of the list
it->SetLink(NewLink(data, *it));
}
// otherwise make a new link and connect it to the front of the list
it->SetLink(NewLink(data, *it));
return ObjectAllocSize(data);
}
void DenseSet::PushFront(DenseSet::ChainVectorIterator it, DenseSet::DensePtr ptr) {
DVLOG(2) << "PushFront to " << distance(entries_.begin(), it) << ", "
<< ObjectAllocSize(ptr.GetObject());
if (it->IsEmpty()) {
it->SetObject(ptr.GetObject());
if (ptr.IsLink()) {
FreeLink(ptr);
FreeLink(ptr.AsLink());
}
} else if (ptr.IsLink()) {
// if the pointer is already a link then no allocation needed
// if the pointer is already a link then no allocation needed.
*ptr.Next() = *it;
*it = ptr;
DCHECK(!it->AsLink()->next.IsEmpty());
} else {
DCHECK(ptr.IsObject());
// allocate a new link if needed and copy the pointer to the new link
it->SetLink(NewLink(ptr.GetObject(), *it));
it->SetLink(NewLink(ptr.Raw(), *it));
DCHECK(!it->AsLink()->next.IsEmpty());
}
}
@ -73,6 +109,7 @@ auto DenseSet::PopPtrFront(DenseSet::ChainVectorIterator it) -> DensePtr {
it->Reset();
} else {
DCHECK(it->IsLink());
// since a DenseLinkKey could be at the end of a chain and have a nullptr for next
// avoid dereferencing a nullptr and just reset the pointer to this DenseLinkKey
if (it->Next() == nullptr) {
@ -90,19 +127,7 @@ void* DenseSet::PopDataFront(DenseSet::ChainVectorIterator it) {
void* ret = front.GetObject();
if (front.IsLink()) {
FreeLink(front);
}
return ret;
}
// updates *node with the next item
auto DenseSet::Unlink(DenseSet::DensePtr* node) -> DensePtr {
DensePtr ret = *node;
if (node->IsObject()) {
node->Reset();
} else {
*node = *node->Next();
FreeLink(front.AsLink());
}
return ret;
@ -111,19 +136,20 @@ auto DenseSet::Unlink(DenseSet::DensePtr* node) -> DensePtr {
void DenseSet::ClearInternal() {
for (auto it = entries_.begin(); it != entries_.end(); ++it) {
while (!it->IsEmpty()) {
PopDataFront(it);
void* obj = PopDataFront(it);
ObjDelete(obj);
}
}
entries_.clear();
}
bool DenseSet::Equal(DensePtr dptr, const void* ptr) const {
bool DenseSet::Equal(DensePtr dptr, const void* ptr, uint32_t cookie) const {
if (dptr.IsEmpty()) {
return false;
}
return ObjEqual(dptr.GetObject(), ptr);
return ObjEqual(dptr.GetObject(), ptr, cookie);
}
auto DenseSet::FindEmptyAround(uint32_t bid) -> ChainVectorIterator {
@ -135,12 +161,16 @@ auto DenseSet::FindEmptyAround(uint32_t bid) -> ChainVectorIterator {
return entries_.end();
}
if (bid + 1 < entries_.size() && entries_[bid + 1].IsEmpty()) {
return entries_.begin() + bid + 1;
if (bid + 1 < entries_.size()) {
auto it = next(entries_.begin(), bid + 1);
if (it->IsEmpty())
return it;
}
if (bid && entries_[bid - 1].IsEmpty()) {
return entries_.begin() + bid - 1;
if (bid) {
auto it = next(entries_.begin(), bid - 1);
if (it->IsEmpty())
return it;
}
return entries_.end();
@ -162,32 +192,70 @@ void DenseSet::Grow() {
// perform rehashing of items in the set
for (long i = prev_size - 1; i >= 0; --i) {
DensePtr* curr = &entries_[i];
DensePtr* prev = nullptr;
// curr != nullptr checks if we have reached the end of a chain
// while !curr->IsEmpty() checks that the current chain is not empty
while (curr != nullptr && !curr->IsEmpty()) {
while (true) {
if (curr->IsEmpty())
break;
void* ptr = curr->GetObject();
uint32_t bid = BucketId(ptr);
DCHECK(ptr != nullptr && ObjectAllocSize(ptr));
uint32_t bid = BucketId(ptr, 0);
// if the item does not move from the current chain, ensure
// it is not marked as displaced and move to the next item in the chain
if (bid == i) {
curr->ClearDisplaced();
prev = curr;
curr = curr->Next();
if (curr == nullptr)
break;
} else {
// if the entry is in the wrong chain remove it and
// add it to the correct chain. This will also correct
// displaced entries
DensePtr node = Unlink(curr);
PushFront(entries_.begin() + bid, node);
entries_[bid].ClearDisplaced();
auto dest = entries_.begin() + bid;
DensePtr dptr = *curr;
if (curr->IsObject()) {
curr->Reset(); // reset the original placeholder (.next or root)
if (prev) {
DCHECK(prev->IsLink());
DenseLinkKey* plink = prev->AsLink();
DCHECK(&plink->next == curr);
// we want to make *prev a DensePtr instead of DenseLink and we
// want to deallocate the link.
DensePtr tmp = DensePtr::From(plink);
DCHECK(ObjectAllocSize(tmp.GetObject()));
FreeLink(plink);
*prev = tmp;
}
DVLOG(2) << " Pushing to " << bid << " " << dptr.GetObject();
PushFront(dest, dptr);
dest->ClearDisplaced();
break;
} // if IsObject
*curr = *dptr.Next();
DCHECK(!curr->IsEmpty());
PushFront(dest, dptr);
dest->ClearDisplaced();
}
}
}
}
bool DenseSet::AddInternal(void* ptr) {
uint64_t hc = Hash(ptr);
uint64_t hc = Hash(ptr, 0);
if (entries_.empty()) {
capacity_log_ = kMinSizeShift;
@ -203,7 +271,7 @@ bool DenseSet::AddInternal(void* ptr) {
// if the value is already in the set exit early
uint32_t bucket_id = BucketId(hc);
if (Find(ptr, bucket_id) != nullptr) {
if (Find(ptr, bucket_id, 0).second != nullptr) {
return false;
}
@ -265,42 +333,57 @@ bool DenseSet::AddInternal(void* ptr) {
return true;
}
auto DenseSet::Find(const void* ptr, uint32_t bid) const -> const DensePtr* {
const DensePtr* curr = &entries_[bid];
if (Equal(*curr, ptr)) {
return curr;
}
auto DenseSet::Find(const void* ptr, uint32_t bid, uint32_t cookie) -> pair<DensePtr*, DensePtr*> {
// could do it with zigzag decoding but this is clearer.
int offset[] = {0, -1, 1};
// first look for displaced nodes since this is quicker than iterating a potential long chain
if (bid && Equal(entries_[bid - 1], ptr)) {
return &entries_[bid - 1];
}
for (int j = 0; j < 3; ++j) {
if ((bid == 0 && j == 1) || (bid + 1 == entries_.size() && j == 2))
continue;
if (bid + 1 < entries_.size() && Equal(entries_[bid + 1], ptr)) {
return &entries_[bid + 1];
DensePtr* curr = &entries_[bid + offset[j]];
if (Equal(*curr, ptr, cookie)) {
return make_pair(nullptr, curr);
}
}
// if the node is not displaced, search the correct chain
curr = curr->Next();
DensePtr* prev = &entries_[bid];
DensePtr* curr = prev->Next();
while (curr != nullptr) {
if (Equal(*curr, ptr)) {
return curr;
if (Equal(*curr, ptr, cookie)) {
return make_pair(prev, curr);
}
prev = curr;
curr = curr->Next();
}
// not in the Set
return nullptr;
return make_pair(nullptr, nullptr);
}
void* DenseSet::Delete(DensePtr* ptr) {
void* DenseSet::Delete(DensePtr* prev, DensePtr* ptr) {
void* ret = nullptr;
if (ptr->IsObject()) {
ret = ptr->Raw();
ptr->Reset();
--num_used_buckets_;
if (prev == nullptr) {
--num_used_buckets_;
} else {
DCHECK(prev->IsLink());
--num_chain_entries_;
DenseLinkKey* plink = prev->AsLink();
DensePtr tmp = DensePtr::From(plink);
DCHECK(ObjectAllocSize(tmp.GetObject()));
FreeLink(plink);
*prev = tmp;
DCHECK(!prev->IsLink());
}
} else {
DCHECK(ptr->IsLink());
@ -308,7 +391,7 @@ void* DenseSet::Delete(DensePtr* ptr) {
ret = link->Raw();
*ptr = link->next;
--num_chain_entries_;
mr()->deallocate(link, sizeof(DenseLinkKey), alignof(DenseLinkKey));
FreeLink(link);
}
obj_malloc_used_ -= ObjectAllocSize(ret);
@ -373,7 +456,7 @@ uint32_t DenseSet::Scan(uint32_t cursor, const ItemCb& cb) const {
++entries_idx;
}
if (entries_idx >= entries_.size()) {
if (entries_idx == entries_.size()) {
return 0;
}

View File

@ -47,10 +47,18 @@ class DenseSet {
static constexpr size_t kDisplaceDirectionBit = 1ULL << 54;
static constexpr size_t kTagMask = 4095ULL << 51; // we reserve 12 high bits.
struct DensePtr {
class DensePtr {
public:
explicit DensePtr(void* p = nullptr) : ptr_(p) {
}
// Imports the object with its metadata except the link bit that is reset.
static DensePtr From(DenseLinkKey* o) {
DensePtr res;
res.ptr_ = (void*)(o->uptr() & (~kLinkBit));
return res;
}
uint64_t uptr() const {
return uint64_t(ptr_);
}
@ -137,7 +145,7 @@ class DenseSet {
}
private:
void* ptr_ = nullptr; //
void* ptr_ = nullptr;
};
struct DenseLinkKey : public DensePtr {
@ -151,6 +159,17 @@ class DenseSet {
using ChainVectorIterator = std::pmr::vector<DensePtr>::iterator;
using ChainVectorConstIterator = std::pmr::vector<DensePtr>::const_iterator;
class IteratorBase {
protected:
IteratorBase(const DenseSet* owner, ChainVectorIterator begin_list);
void Advance();
const DenseSet& owner_;
ChainVectorIterator curr_list_;
DensePtr curr_entry_;
};
public:
explicit DenseSet(std::pmr::memory_resource* mr = std::pmr::get_default_resource());
virtual ~DenseSet();
@ -184,7 +203,7 @@ class DenseSet {
return (num_chain_entries_ + entries_.capacity()) * sizeof(DensePtr);
}
template <typename T> class iterator {
template <typename T> class iterator : private IteratorBase {
static_assert(std::is_pointer_v<T>, "Iterators can only return pointers");
public:
@ -193,26 +212,11 @@ class DenseSet {
using pointer = value_type*;
using reference = value_type&;
iterator(DenseSet* set, ChainVectorIterator begin_list) : set_(set), curr_list_(begin_list) {
if (begin_list == set->entries_.end()) {
curr_entry_ = nullptr;
} else {
curr_entry_ = &*begin_list;
// find the first non null entry
if (curr_entry_ == nullptr || curr_entry_->IsEmpty()) {
++(*this);
}
}
iterator(DenseSet* set, ChainVectorIterator begin_list) : IteratorBase(set, begin_list) {
}
iterator& operator++() {
curr_entry_ = curr_entry_->Next();
while (curr_list_ != set_->entries_.end() &&
(curr_entry_ == nullptr || curr_entry_->IsEmpty())) {
++curr_list_;
curr_entry_ = &*curr_list_;
}
Advance();
return *this;
}
@ -225,20 +229,15 @@ class DenseSet {
}
value_type operator*() {
return (value_type)curr_entry_->GetObject();
return (value_type)curr_entry_.GetObject();
}
value_type operator->() {
return (value_type)curr_entry_->GetObject();
return (value_type)curr_entry_.GetObject();
}
private:
DenseSet* set_;
ChainVectorIterator curr_list_;
DensePtr* curr_entry_;
};
template <typename T> class const_iterator {
template <typename T> class const_iterator : private IteratorBase {
static_assert(std::is_pointer_v<T>, "Iterators can only return pointer types");
public:
@ -248,26 +247,11 @@ class DenseSet {
using reference = value_type&;
const_iterator(const DenseSet* set, ChainVectorConstIterator begin_list)
: set_(set), curr_list_(begin_list) {
if (begin_list == set->entries_.end()) {
curr_entry_ = nullptr;
} else {
curr_entry_ = &*begin_list;
// find the first non null entry
if (curr_entry_ == nullptr || curr_entry_->IsEmpty()) {
++(*this);
}
}
: IteratorBase(set, curr_list_) {
}
const_iterator& operator++() {
curr_entry_ = curr_entry_->Next();
curr_entry_ = curr_entry_->Next();
while (curr_list_ != set_->entries_.end() &&
(curr_entry_ == nullptr || curr_entry_->IsEmpty())) {
++curr_list_;
curr_entry_ = &*curr_list_;
}
Advance();
return *this;
}
@ -280,17 +264,12 @@ class DenseSet {
}
value_type operator*() const {
return (value_type)curr_entry_->GetObject();
return (value_type)curr_entry_.GetObject();
}
value_type operator->() const {
return (value_type)curr_entry_->GetObject();
return (value_type)curr_entry_.GetObject();
}
private:
const DenseSet* set_;
ChainVectorConstIterator curr_list_;
const DensePtr* curr_entry_;
};
template <typename T> iterator<T> begin() {
@ -316,19 +295,20 @@ class DenseSet {
protected:
// Virtual functions to be implemented for generic data
virtual uint64_t Hash(const void* obj) const = 0;
virtual bool ObjEqual(const void* obj1, const void* obj2) const = 0;
virtual uint64_t Hash(const void* obj, uint32_t cookie) const = 0;
virtual bool ObjEqual(const void* left, const void* right, uint32_t right_cookie) const = 0;
virtual size_t ObjectAllocSize(const void* obj) const = 0;
virtual void ObjDelete(void* obj) const = 0;
void* EraseInternal(void* obj) {
DensePtr* found = Find(obj);
return found ? Delete(found) : nullptr;
void* EraseInternal(void* obj, uint32_t cookie) {
auto [prev, found] = Find(obj, BucketId(obj, cookie), cookie);
return found ? Delete(prev, found) : nullptr;
}
bool AddInternal(void* obj);
bool ContainsInternal(const void* obj) const {
return Find(obj, BucketId(obj)) != nullptr;
bool ContainsInternal(const void* obj, uint32_t cookie) const {
return const_cast<DenseSet*>(this)->Find(obj, BucketId(obj, cookie), cookie).second != nullptr;
}
void* PopInternal();
@ -342,7 +322,7 @@ class DenseSet {
DenseSet(const DenseSet&) = delete;
DenseSet& operator=(DenseSet&) = delete;
bool Equal(DensePtr dptr, const void* ptr) const;
bool Equal(DensePtr dptr, const void* ptr, uint32_t cookie) const;
std::pmr::memory_resource* mr() {
return entries_.get_allocator().resource();
@ -352,8 +332,8 @@ class DenseSet {
return hash >> (64 - capacity_log_);
}
uint32_t BucketId(const void* ptr) const {
return BucketId(Hash(ptr));
uint32_t BucketId(const void* ptr, uint32_t cookie) const {
return BucketId(Hash(ptr, cookie));
}
// return a ChainVectorIterator (a.k.a iterator) or end if there is an empty chain found
@ -367,38 +347,21 @@ class DenseSet {
void* PopDataFront(ChainVectorIterator);
DensePtr PopPtrFront(ChainVectorIterator);
// Note this function will modify the iterator passed to it
// to point to the next node in the chain
DensePtr Unlink(DensePtr* node);
// ============ Pseudo Linked List in DenseSet end ==================
const DensePtr* Find(const void* ptr, uint32_t bid) const;
const DensePtr* Find(const void* ptr) const {
return Find(ptr, BucketId(ptr));
}
DensePtr* Find(const void* ptr, uint32_t bid) {
const DensePtr* ret = const_cast<const DenseSet*>(this)->Find(ptr, bid);
return const_cast<DensePtr*>(ret);
}
DensePtr* Find(const void* ptr) {
const DensePtr* ret = const_cast<const DenseSet*>(this)->Find(ptr);
return const_cast<DensePtr*>(ret);
}
// returns (prev, item) pair. If item is root, then prev is null.
std::pair<DensePtr*, DensePtr*> Find(const void* ptr, uint32_t bid, uint32_t cookie);
DenseLinkKey* NewLink(void* data, DensePtr next);
inline void FreeLink(DensePtr link) {
inline void FreeLink(DenseLinkKey* plink) {
// deallocate the link if it is no longer a link as it is now in an empty list
mr()->deallocate(link.AsLink(), sizeof(DenseLinkKey), alignof(DenseLinkKey));
mr()->deallocate(plink, sizeof(DenseLinkKey), alignof(DenseLinkKey));
}
void* Delete(DensePtr* ptr);
// Returns the object that was removed from ptr. If ptr is a link then deletes it internally.
void* Delete(DensePtr* prev, DensePtr* ptr);
// We may update it during const operations due to expiry interactions.
std::pmr::vector<DensePtr> entries_;
size_t obj_malloc_used_ = 0;

View File

@ -7,14 +7,19 @@ extern "C" {
#include "redis/zmalloc.h"
}
#include "base/logging.h"
using namespace std;
namespace dfly {
bool StringSet::AddSds(sds s1) {
return AddInternal(s1);
}
bool StringSet::Add(std::string_view s1) {
sds newsds = sdsnewlen(s1.data(), s1.size());
bool StringSet::Add(string_view src) {
sds newsds = sdsnewlen(src.data(), src.size());
if (!AddInternal(newsds)) {
sdsfree(newsds);
return false;
@ -23,39 +28,22 @@ bool StringSet::Add(std::string_view s1) {
return true;
}
bool StringSet::EraseSds(sds str) {
void* ret = EraseInternal(str);
if (ret == nullptr) {
return false;
}
bool StringSet::Erase(string_view str) {
void* ret = EraseInternal(&str, 1);
if (ret) {
sdsfree((sds)ret);
return true;
}
bool StringSet::Erase(std::string_view s1) {
sds to_erase = sdsnewlen(s1.data(), s1.size());
bool ret = EraseSds(to_erase);
sdsfree(to_erase);
return ret;
return false;
}
bool StringSet::ContainsSds(sds s1) const {
return ContainsInternal(s1);
}
bool StringSet::Contains(std::string_view s1) const {
sds to_search = sdsnewlen(s1.data(), s1.size());
bool ret = ContainsInternal(to_search);
sdsfree(to_search);
bool StringSet::Contains(string_view s1) const {
bool ret = ContainsInternal(&s1, 1);
return ret;
}
void StringSet::Clear() {
for (auto it = begin(); it != end(); ++it) {
sdsfree((sds)*it);
}
ClearInternal();
}
@ -80,23 +68,44 @@ uint32_t StringSet::Scan(uint32_t cursor, const std::function<void(const sds)>&
return DenseSet::Scan(cursor, [func](const void* ptr) { func((sds)ptr); });
}
uint64_t StringSet::Hash(const void* ptr) const {
uint64_t StringSet::Hash(const void* ptr, uint32_t cookie) const {
DCHECK_LT(cookie, 2u);
if (cookie == 0) {
sds s = (sds)ptr;
return CompactObj::HashCode(std::string_view{s, sdslen(s)});
return CompactObj::HashCode(string_view{s, sdslen(s)});
}
bool StringSet::ObjEqual(const void* ptr1, const void* ptr2) const {
sds s1 = (sds)ptr1;
sds s2 = (sds)ptr2;
const string_view* sv = (const string_view*)ptr;
return CompactObj::HashCode(*sv);
}
bool StringSet::ObjEqual(const void* left, const void* right, uint32_t right_cookie) const {
DCHECK_LT(right_cookie, 2u);
sds s1 = (sds)left;
if (right_cookie == 0) {
sds s2 = (sds)right;
if (sdslen(s1) != sdslen(s2)) {
return false;
}
return sdslen(s1) == 0 || memcmp(s1, s2, sdslen(s1)) == 0;
}
const string_view* right_sv = (const string_view*)right;
string_view left_sv{s1, sdslen(s1)};
return left_sv == (*right_sv);
}
size_t StringSet::ObjectAllocSize(const void* s1) const {
return zmalloc_usable_size(sdsAllocPtr((sds)s1));
}
}; // namespace dfly
void StringSet::ObjDelete(void* obj) const {
sdsfree((sds)obj);
}
}; // namespace dfly

View File

@ -16,16 +16,13 @@ class StringSet : public DenseSet {
public:
bool Add(std::string_view s1);
// Used currently by rdb_load.
bool AddSds(sds s1);
bool Erase(std::string_view s1);
bool EraseSds(sds s1);
bool Contains(std::string_view s1) const;
bool ContainsSds(sds s1) const;
void Clear();
std::optional<std::string> Pop();
@ -57,11 +54,12 @@ class StringSet : public DenseSet {
uint32_t Scan(uint32_t, const std::function<void(sds)>&) const;
protected:
uint64_t Hash(const void* ptr) const override;
uint64_t Hash(const void* ptr, uint32_t cookie) const override;
bool ObjEqual(const void* ptr1, const void* ptr2) const override;
bool ObjEqual(const void* left, const void* right, uint32_t right_cookie) const override;
size_t ObjectAllocSize(const void* s1) const override;
void ObjDelete(void* obj) const override;
};
} // end namespace dfly
} // end namespace dfly

View File

@ -29,25 +29,25 @@ namespace dfly {
using namespace std;
class DenseSetAllocator : public std::pmr::memory_resource {
class DenseSetAllocator : public pmr::memory_resource {
public:
bool all_freed() const {
return alloced_ == 0;
}
void* do_allocate(std::size_t bytes, std::size_t alignment) override {
void* do_allocate(size_t bytes, size_t alignment) override {
alloced_ += bytes;
void* p = std::pmr::new_delete_resource()->allocate(bytes, alignment);
void* p = pmr::new_delete_resource()->allocate(bytes, alignment);
return p;
}
void do_deallocate(void* p, std::size_t bytes, std::size_t alignment) override {
void do_deallocate(void* p, size_t bytes, size_t alignment) override {
alloced_ -= bytes;
return std::pmr::new_delete_resource()->deallocate(p, bytes, alignment);
return pmr::new_delete_resource()->deallocate(p, bytes, alignment);
}
bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override {
return std::pmr::new_delete_resource()->is_equal(other);
bool do_is_equal(const pmr::memory_resource& other) const noexcept override {
return pmr::new_delete_resource()->is_equal(other);
}
private:
@ -65,9 +65,7 @@ class StringSetTest : public ::testing::Test {
}
void SetUp() override {
orig_resource_ = std::pmr::get_default_resource();
std::pmr::set_default_resource(&alloc_);
ss_ = new StringSet();
ss_ = new StringSet(&alloc_);
}
void TearDown() override {
@ -76,21 +74,19 @@ class StringSetTest : public ::testing::Test {
// ensure there are no memory leaks after every test
EXPECT_TRUE(alloc_.all_freed());
EXPECT_EQ(zmalloc_used_memory_tl, 0);
std::pmr::set_default_resource(orig_resource_);
}
StringSet* ss_;
std::pmr::memory_resource* orig_resource_;
DenseSetAllocator alloc_;
};
TEST_F(StringSetTest, Basic) {
EXPECT_TRUE(ss_->Add(std::string_view{"foo"}));
EXPECT_TRUE(ss_->Add(std::string_view{"bar"}));
EXPECT_FALSE(ss_->Add(std::string_view{"foo"}));
EXPECT_FALSE(ss_->Add(std::string_view{"bar"}));
EXPECT_TRUE(ss_->Contains(std::string_view{"foo"}));
EXPECT_TRUE(ss_->Contains(std::string_view{"bar"}));
EXPECT_TRUE(ss_->Add("foo"sv));
EXPECT_TRUE(ss_->Add("bar"sv));
EXPECT_FALSE(ss_->Add("foo"sv));
EXPECT_FALSE(ss_->Add("bar"sv));
EXPECT_TRUE(ss_->Contains("foo"sv));
EXPECT_TRUE(ss_->Contains("bar"sv));
EXPECT_EQ(2, ss_->Size());
}
@ -107,6 +103,7 @@ TEST_F(StringSetTest, StandardAddErase) {
EXPECT_TRUE(ss_->Add("BBBBBAAAAAAAAAAA"));
EXPECT_TRUE(ss_->Add("BBBBBBBBAAAAAAAA"));
EXPECT_TRUE(ss_->Add("CCCCCBBBBBBBBBBB"));
// Remove link in the middle of chain
EXPECT_TRUE(ss_->Erase("BBBBBBBBAAAAAAAA"));
// Remove start of a chain
@ -120,9 +117,9 @@ TEST_F(StringSetTest, StandardAddErase) {
EXPECT_TRUE(ss_->Erase("AAAAAAAAAAAAAAA@"));
}
static std::string random_string(std::mt19937& rand, unsigned len) {
const std::string_view alpanum = "1234567890abcdefghijklmnopqrstuvwxyz";
std::string ret;
static string random_string(mt19937& rand, unsigned len) {
const string_view alpanum = "1234567890abcdefghijklmnopqrstuvwxyz";
string ret;
ret.reserve(len);
for (size_t i = 0; i < len; ++i) {
@ -136,12 +133,12 @@ TEST_F(StringSetTest, Resizing) {
constexpr size_t num_strs = 4096;
// pseudo random deterministic sequence with known seed should produce
// the same sequence on all systems
std::mt19937 rand(0);
mt19937 rand(0);
std::vector<std::string> strs;
vector<string> strs;
while (strs.size() != num_strs) {
auto str = random_string(rand, 10);
if (std::find(strs.begin(), strs.end(), str) != strs.end()) {
if (find(strs.begin(), strs.end(), str) != strs.end()) {
continue;
}
@ -163,8 +160,8 @@ TEST_F(StringSetTest, Resizing) {
}
TEST_F(StringSetTest, SimpleScan) {
std::unordered_set<std::string_view> info = {"foo", "bar"};
std::unordered_set<std::string_view> seen;
unordered_set<string_view> info = {"foo", "bar"};
unordered_set<string_view> seen;
for (auto str : info) {
EXPECT_TRUE(ss_->Add(str));
@ -174,26 +171,26 @@ TEST_F(StringSetTest, SimpleScan) {
do {
cursor = ss_->Scan(cursor, [&](const sds ptr) {
sds s = (sds)ptr;
std::string_view str{s, sdslen(s)};
string_view str{s, sdslen(s)};
EXPECT_TRUE(info.count(str));
seen.insert(str);
});
} while (cursor != 0);
EXPECT_TRUE(seen.size() == info.size() && std::equal(seen.begin(), seen.end(), info.begin()));
EXPECT_TRUE(seen.size() == info.size() && equal(seen.begin(), seen.end(), info.begin()));
}
// Ensure REDIS scan guarantees are met
TEST_F(StringSetTest, ScanGuarantees) {
std::unordered_set<std::string_view> to_be_seen = {"foo", "bar"};
std::unordered_set<std::string_view> not_be_seen = {"AAA", "BBB"};
std::unordered_set<std::string_view> maybe_seen = {"AA@@@@@@@@@@@@@@", "AAA@@@@@@@@@@@@@",
"AAAAAAAAA@@@@@@@", "AAAAAAAAAA@@@@@@"};
std::unordered_set<std::string_view> seen;
unordered_set<string_view> to_be_seen = {"foo", "bar"};
unordered_set<string_view> not_be_seen = {"AAA", "BBB"};
unordered_set<string_view> maybe_seen = {"AA@@@@@@@@@@@@@@", "AAA@@@@@@@@@@@@@",
"AAAAAAAAA@@@@@@@", "AAAAAAAAAA@@@@@@"};
unordered_set<string_view> seen;
auto scan_callback = [&](const sds ptr) {
sds s = (sds)ptr;
std::string_view str{s, sdslen(s)};
string_view str{s, sdslen(s)};
EXPECT_TRUE(to_be_seen.count(str) || maybe_seen.count(str));
EXPECT_FALSE(not_be_seen.count(str));
if (to_be_seen.count(str)) {
@ -231,25 +228,25 @@ TEST_F(StringSetTest, ScanGuarantees) {
TEST_F(StringSetTest, IntOnly) {
constexpr size_t num_ints = 8192;
std::unordered_set<unsigned int> numbers;
unordered_set<unsigned int> numbers;
for (size_t i = 0; i < num_ints; ++i) {
numbers.insert(i);
EXPECT_TRUE(ss_->Add(std::to_string(i)));
EXPECT_TRUE(ss_->Add(to_string(i)));
}
for (size_t i = 0; i < num_ints; ++i) {
EXPECT_FALSE(ss_->Add(std::to_string(i)));
EXPECT_FALSE(ss_->Add(to_string(i)));
}
std::mt19937 generator(0);
mt19937 generator(0);
size_t num_remove = generator() % 4096;
std::unordered_set<std::string> removed;
unordered_set<string> removed;
for (size_t i = 0; i < num_remove; ++i) {
auto remove_int = generator() % num_ints;
auto remove = std::to_string(remove_int);
auto remove = to_string(remove_int);
if (numbers.count(remove_int)) {
EXPECT_TRUE(ss_->Contains(remove));
ASSERT_TRUE(ss_->Contains(remove)) << remove_int;
EXPECT_TRUE(ss_->Erase(remove));
numbers.erase(remove_int);
} else {
@ -263,9 +260,10 @@ TEST_F(StringSetTest, IntOnly) {
size_t expected_seen = 0;
auto scan_callback = [&](const sds ptr) {
sds s = (sds)ptr;
std::string_view str{s, sdslen(s)};
EXPECT_FALSE(removed.count(std::string(str)));
if (numbers.count(std::atoi(str.data()))) {
string_view str{s, sdslen(s)};
EXPECT_FALSE(removed.count(string(str)));
if (numbers.count(atoi(str.data()))) {
++expected_seen;
}
};
@ -274,22 +272,24 @@ TEST_F(StringSetTest, IntOnly) {
do {
cursor = ss_->Scan(cursor, scan_callback);
// randomly throw in some new numbers
ss_->Add(std::to_string(generator()));
uint32_t val = generator();
VLOG(1) << "Val " << val;
ss_->Add(to_string(val));
} while (cursor != 0);
EXPECT_TRUE(expected_seen + removed.size() == num_ints);
}
TEST_F(StringSetTest, XtremeScanGrow) {
std::unordered_set<std::string> to_see, force_grow, seen;
unordered_set<string> to_see, force_grow, seen;
std::mt19937 generator(0);
mt19937 generator(0);
while (to_see.size() != 8) {
to_see.insert(random_string(generator, 10));
}
while (force_grow.size() != 8192) {
std::string str = random_string(generator, 10);
string str = random_string(generator, 10);
if (to_see.count(str)) {
continue;
@ -304,9 +304,9 @@ TEST_F(StringSetTest, XtremeScanGrow) {
auto scan_callback = [&](const sds ptr) {
sds s = (sds)ptr;
std::string_view str{s, sdslen(s)};
if (to_see.count(std::string(str))) {
seen.insert(std::string(str));
string_view str{s, sdslen(s)};
if (to_see.count(string(str))) {
seen.insert(string(str));
}
};
@ -326,9 +326,9 @@ TEST_F(StringSetTest, XtremeScanGrow) {
TEST_F(StringSetTest, Pop) {
constexpr size_t num_items = 8;
std::unordered_set<std::string> to_insert;
unordered_set<string> to_insert;
std::mt19937 generator(0);
mt19937 generator(0);
while (to_insert.size() != num_items) {
auto str = random_string(generator, 10);
@ -356,9 +356,9 @@ TEST_F(StringSetTest, Pop) {
TEST_F(StringSetTest, Iteration) {
constexpr size_t num_items = 8192;
std::unordered_set<std::string> to_insert;
unordered_set<string> to_insert;
std::mt19937 generator(0);
mt19937 generator(0);
while (to_insert.size() != num_items) {
auto str = random_string(generator, 10);
@ -371,7 +371,7 @@ TEST_F(StringSetTest, Iteration) {
}
for (const sds ptr : *ss_) {
std::string str{ptr, sdslen(ptr)};
string str{ptr, sdslen(ptr)};
EXPECT_TRUE(to_insert.count(str));
to_insert.erase(str);
}

View File

@ -89,8 +89,7 @@ pair<unsigned, bool> RemoveStrSet(ArgSlice vals, CompactObj* set) {
DCHECK_EQ(set->Encoding(), kEncodingStrMap2);
StringSet* ss = ((StringSet*)set->RObjPtr());
for (auto member : vals) {
shard->tmp_str1 = sdscpylen(shard->tmp_str1, member.data(), member.size());
removed += ss->EraseSds(shard->tmp_str1);
removed += ss->Erase(member);
}
isempty = ss->Empty();