2022-01-13 16:49:25 +08:00
|
|
|
// Copyright 2022, Roman Gershman. All rights reserved.
|
|
|
|
// See LICENSE for licensing terms.
|
|
|
|
//
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <absl/base/internal/endian.h>
|
|
|
|
|
|
|
|
#include <memory_resource>
|
|
|
|
#include <optional>
|
|
|
|
|
2022-02-24 21:22:59 +08:00
|
|
|
#include "core/small_string.h"
|
|
|
|
|
2022-01-13 16:49:25 +08:00
|
|
|
typedef struct redisObject robj;
|
|
|
|
typedef struct quicklist quicklist;
|
|
|
|
|
|
|
|
namespace dfly {
|
|
|
|
|
|
|
|
namespace detail {
|
|
|
|
|
|
|
|
class CompactBlob {
|
|
|
|
void* ptr_;
|
|
|
|
uint32_t sz;
|
|
|
|
|
|
|
|
public:
|
|
|
|
CompactBlob() : ptr_(nullptr), sz(0) {
|
|
|
|
}
|
|
|
|
|
|
|
|
explicit CompactBlob(std::string_view s, std::pmr::memory_resource* mr);
|
|
|
|
|
|
|
|
void Assign(std::string_view s, std::pmr::memory_resource* mr);
|
|
|
|
|
|
|
|
void Set(void* p, uint32_t s) {
|
|
|
|
ptr_ = p;
|
|
|
|
sz = s;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Free(std::pmr::memory_resource* mr);
|
|
|
|
|
|
|
|
size_t size() const {
|
|
|
|
return sz;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t capacity() const;
|
|
|
|
|
|
|
|
void* ptr() const {
|
|
|
|
return ptr_;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string_view AsView() const {
|
|
|
|
return std::string_view{reinterpret_cast<char*>(ptr_), sz};
|
|
|
|
}
|
|
|
|
|
|
|
|
void MakeRoom(size_t current_cap, size_t desired, std::pmr::memory_resource* mr);
|
|
|
|
} __attribute__((packed));
|
|
|
|
|
|
|
|
static_assert(sizeof(CompactBlob) == 12, "");
|
|
|
|
|
2022-01-13 21:47:04 +08:00
|
|
|
// redis objects or blobs of upto 4GB size.
|
2022-01-13 16:49:25 +08:00
|
|
|
struct RobjWrapper {
|
|
|
|
size_t MallocUsed() const;
|
|
|
|
|
2022-01-20 11:16:22 +08:00
|
|
|
uint64_t HashCode() const;
|
2022-01-13 16:49:25 +08:00
|
|
|
bool Equal(const RobjWrapper& ow) const;
|
|
|
|
bool Equal(std::string_view sv) const;
|
|
|
|
size_t Size() const;
|
|
|
|
void Free(std::pmr::memory_resource* mr);
|
|
|
|
|
|
|
|
CompactBlob blob;
|
|
|
|
static_assert(sizeof(blob) == 12);
|
|
|
|
|
|
|
|
uint32_t type : 4;
|
|
|
|
uint32_t encoding : 4;
|
2022-02-21 04:07:33 +08:00
|
|
|
uint32_t unneeded : 24;
|
2022-01-13 16:49:25 +08:00
|
|
|
RobjWrapper() {
|
|
|
|
}
|
|
|
|
} __attribute__((packed));
|
|
|
|
|
2022-02-25 05:19:28 +08:00
|
|
|
// unpacks 8->7 encoded blob back to ascii.
|
|
|
|
// generally, we can not unpack inplace because ascii (dest) buffer is 8/7 bigger than
|
|
|
|
// the source buffer.
|
|
|
|
// however, if binary data is positioned on the right of the ascii buffer with empty space on the
|
|
|
|
// left than we can unpack inplace.
|
|
|
|
void ascii_unpack(const uint8_t* bin, size_t ascii_len, char* ascii);
|
|
|
|
|
|
|
|
// packs ascii string (does not verify) into binary form saving 1 bit per byte on average (12.5%).
|
|
|
|
void ascii_pack(const char* ascii, size_t len, uint8_t* bin);
|
|
|
|
|
2022-01-13 16:49:25 +08:00
|
|
|
} // namespace detail
|
|
|
|
|
|
|
|
class CompactObj {
|
|
|
|
static constexpr unsigned kInlineLen = 16;
|
|
|
|
|
|
|
|
void operator=(const CompactObj&) = delete;
|
|
|
|
CompactObj(const CompactObj&) = delete;
|
|
|
|
|
|
|
|
// 0-16 is reserved for inline lengths of string type.
|
|
|
|
enum TagEnum {
|
|
|
|
INT_TAG = 17,
|
2022-02-24 21:22:59 +08:00
|
|
|
SMALL_TAG = 18,
|
2022-01-13 16:49:25 +08:00
|
|
|
ROBJ_TAG = 19,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum MaskBit {
|
|
|
|
REF_BIT = 1,
|
|
|
|
EXPIRE_BIT = 2,
|
2022-02-21 04:07:33 +08:00
|
|
|
FLAG_BIT = 4,
|
2022-02-25 05:19:28 +08:00
|
|
|
|
|
|
|
// ascii encoding is not an injective function. it compresses 8 bytes to 7 but also 7 to 7.
|
|
|
|
// therefore, in order to know the original length we introduce 2 flags that
|
|
|
|
// correct the length upon decoding. ASCII1_ENC_BIT rounds down the decoded length,
|
|
|
|
// while ASCII2_ENC_BIT rounds it up. See DecodedLen implementation for more info.
|
|
|
|
ASCII1_ENC_BIT = 8,
|
|
|
|
ASCII2_ENC_BIT = 0x10,
|
2022-01-13 16:49:25 +08:00
|
|
|
};
|
|
|
|
|
2022-02-25 05:19:28 +08:00
|
|
|
static constexpr uint8_t kEncMask = ASCII1_ENC_BIT | ASCII2_ENC_BIT;
|
|
|
|
|
2022-01-13 16:49:25 +08:00
|
|
|
public:
|
|
|
|
using PrefixArray = std::vector<std::string_view>;
|
|
|
|
|
|
|
|
CompactObj() { // By default - empty string.
|
|
|
|
}
|
|
|
|
|
|
|
|
explicit CompactObj(robj* o) {
|
|
|
|
ImportRObj(o);
|
|
|
|
}
|
|
|
|
|
|
|
|
explicit CompactObj(std::string_view str) {
|
|
|
|
SetString(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
CompactObj(CompactObj&& cs) noexcept {
|
|
|
|
operator=(std::move(cs));
|
|
|
|
};
|
|
|
|
|
|
|
|
~CompactObj();
|
|
|
|
|
|
|
|
CompactObj& operator=(CompactObj&& o) noexcept;
|
|
|
|
|
|
|
|
size_t StrSize() const;
|
|
|
|
|
|
|
|
// TODO: We don't use c++ constructs (ctor, dtor, =) in objects of U,
|
|
|
|
// because we use memcpy here.
|
|
|
|
CompactObj AsRef() const {
|
|
|
|
CompactObj res;
|
|
|
|
memcpy(&res.u_, &u_, sizeof(u_));
|
|
|
|
res.taglen_ = taglen_;
|
|
|
|
res.mask_ = mask_ | REF_BIT;
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2022-01-13 21:47:04 +08:00
|
|
|
bool IsRef() const {
|
|
|
|
return mask_ & REF_BIT;
|
|
|
|
}
|
|
|
|
|
2022-01-13 16:49:25 +08:00
|
|
|
std::string_view GetSlice(std::string* scratch) const;
|
|
|
|
|
|
|
|
std::string ToString() const {
|
|
|
|
std::string res;
|
|
|
|
GetString(&res);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2022-01-20 11:16:22 +08:00
|
|
|
uint64_t HashCode() const;
|
|
|
|
static uint64_t HashCode(std::string_view str);
|
|
|
|
|
2022-01-13 16:49:25 +08:00
|
|
|
bool operator==(const CompactObj& o) const;
|
|
|
|
|
|
|
|
bool operator==(std::string_view sl) const;
|
|
|
|
|
|
|
|
friend bool operator!=(const CompactObj& lhs, const CompactObj& rhs) {
|
|
|
|
return !(lhs == rhs);
|
|
|
|
}
|
|
|
|
|
|
|
|
friend bool operator==(std::string_view sl, const CompactObj& o) {
|
|
|
|
return o.operator==(sl);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool HasExpire() const {
|
|
|
|
return mask_ & EXPIRE_BIT;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SetExpire(bool e) {
|
|
|
|
if (e) {
|
|
|
|
mask_ |= EXPIRE_BIT;
|
|
|
|
} else {
|
|
|
|
mask_ &= ~EXPIRE_BIT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-21 04:07:33 +08:00
|
|
|
bool HasFlag() const {
|
|
|
|
return mask_ & FLAG_BIT;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SetFlag(bool e) {
|
|
|
|
if (e) {
|
|
|
|
mask_ |= FLAG_BIT;
|
|
|
|
} else {
|
|
|
|
mask_ &= ~FLAG_BIT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-13 16:49:25 +08:00
|
|
|
unsigned Encoding() const;
|
|
|
|
unsigned ObjType() const;
|
|
|
|
quicklist* GetQL() const;
|
|
|
|
|
|
|
|
// Takes ownership over o.
|
|
|
|
void ImportRObj(robj* o);
|
|
|
|
|
|
|
|
robj* AsRObj() const;
|
|
|
|
|
|
|
|
// Syncs 'this' instance with the object that was previously returned by AsRObj().
|
|
|
|
// Requires: AsRObj() has been called before in the same thread in fiber-atomic section.
|
|
|
|
void SyncRObj();
|
|
|
|
|
|
|
|
void SetInt(int64_t val);
|
|
|
|
std::optional<int64_t> TryGetInt() const;
|
|
|
|
|
|
|
|
void SetString(std::string_view str);
|
|
|
|
|
|
|
|
void GetString(std::string* res) const;
|
|
|
|
|
|
|
|
size_t MallocUsed() const;
|
|
|
|
|
|
|
|
// Resets the object to empty state.
|
|
|
|
void Reset();
|
|
|
|
|
|
|
|
bool IsInline() const {
|
|
|
|
return taglen_ <= kInlineLen;
|
|
|
|
}
|
|
|
|
|
|
|
|
static constexpr unsigned InlineLen() {
|
|
|
|
return kInlineLen;
|
|
|
|
}
|
|
|
|
|
2022-02-24 21:22:59 +08:00
|
|
|
struct Stats {
|
|
|
|
size_t small_string_bytes = 0;
|
|
|
|
};
|
|
|
|
|
|
|
|
static Stats GetStats();
|
|
|
|
|
2022-02-22 23:50:09 +08:00
|
|
|
static void InitThreadLocal(std::pmr::memory_resource* mr);
|
|
|
|
|
2022-01-13 16:49:25 +08:00
|
|
|
private:
|
2022-02-25 05:19:28 +08:00
|
|
|
size_t DecodedLen(size_t sz) const;
|
|
|
|
|
2022-01-13 16:49:25 +08:00
|
|
|
bool EqualNonInline(std::string_view sv) const;
|
|
|
|
|
|
|
|
// Requires: HasAllocated() - true.
|
|
|
|
void Free();
|
|
|
|
|
|
|
|
bool HasAllocated() const;
|
|
|
|
|
2022-02-25 05:19:28 +08:00
|
|
|
bool CmpEncoded(std::string_view sv) const;
|
|
|
|
|
2022-01-13 16:49:25 +08:00
|
|
|
void SetMeta(uint8_t taglen, uint8_t mask = 0) {
|
|
|
|
if (HasAllocated()) {
|
|
|
|
Free();
|
|
|
|
} else {
|
|
|
|
memset(u_.inline_str, 0, kInlineLen);
|
|
|
|
}
|
|
|
|
taglen_ = taglen;
|
|
|
|
mask_ = mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
// My main data structure. Union of representations.
|
2022-01-20 11:16:22 +08:00
|
|
|
// RobjWrapper is kInlineLen=16 bytes, so we employ SSO of that size via inline_str.
|
|
|
|
// In case of int values, we waste 8 bytes. I am assuming it's ok and it's not the data type
|
2022-01-13 21:47:04 +08:00
|
|
|
// with biggest memory usage.
|
2022-01-13 16:49:25 +08:00
|
|
|
union U {
|
|
|
|
char inline_str[kInlineLen];
|
|
|
|
|
2022-02-24 21:22:59 +08:00
|
|
|
SmallString small_str;
|
2022-01-13 16:49:25 +08:00
|
|
|
detail::RobjWrapper r_obj;
|
|
|
|
int64_t ival __attribute__((packed));
|
|
|
|
|
|
|
|
U() : r_obj() {
|
|
|
|
}
|
|
|
|
} u_;
|
|
|
|
|
2022-01-13 21:47:04 +08:00
|
|
|
//
|
2022-01-13 16:49:25 +08:00
|
|
|
static_assert(sizeof(u_) == 16, "");
|
|
|
|
|
2022-02-21 04:07:33 +08:00
|
|
|
// Maybe it's possible to merge those 2 together and gain another byte
|
|
|
|
// but lets postpone it to 2023.
|
2022-01-13 16:49:25 +08:00
|
|
|
mutable uint8_t mask_ = 0;
|
|
|
|
uint8_t taglen_ = 0;
|
|
|
|
};
|
|
|
|
|
|
|
|
inline bool CompactObj::operator==(std::string_view sv) const {
|
2022-02-25 05:19:28 +08:00
|
|
|
if (mask_ & kEncMask)
|
|
|
|
return CmpEncoded(sv);
|
|
|
|
|
2022-01-13 16:49:25 +08:00
|
|
|
if (IsInline()) {
|
|
|
|
return std::string_view{u_.inline_str, taglen_} == sv;
|
|
|
|
}
|
|
|
|
return EqualNonInline(sv);
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace dfly
|