180 lines
5.8 KiB
C++
180 lines
5.8 KiB
C++
// Copyright 2022, Roman Gershman. All rights reserved.
|
|
// See LICENSE for licensing terms.
|
|
//
|
|
|
|
#include "server/snapshot.h"
|
|
|
|
extern "C" {
|
|
#include "redis/object.h"
|
|
}
|
|
|
|
#include <absl/strings/str_cat.h>
|
|
|
|
#include "base/logging.h"
|
|
#include "server/rdb_save.h"
|
|
#include "util/fiber_sched_algo.h"
|
|
#include "util/proactor_base.h"
|
|
|
|
namespace dfly {
|
|
|
|
using namespace std;
|
|
using namespace util;
|
|
using namespace chrono_literals;
|
|
namespace this_fiber = ::boost::this_fiber;
|
|
using boost::fibers::fiber;
|
|
|
|
SliceSnapshot::SliceSnapshot(PrimeTable* prime, ExpireTable* et, StringChannel* dest)
|
|
: prime_table_(prime), dest_(dest) {
|
|
}
|
|
|
|
SliceSnapshot::~SliceSnapshot() {
|
|
}
|
|
|
|
void SliceSnapshot::Start(uint64_t version) {
|
|
DCHECK(!fb_.joinable());
|
|
|
|
VLOG(1) << "DbSaver::Start - saving entries with version less than " << version;
|
|
sfile_.reset(new io::StringFile);
|
|
|
|
rdb_serializer_.reset(new RdbSerializer(sfile_.get()));
|
|
snapshot_version_ = version;
|
|
fb_ = fiber([this] { FiberFunc(); });
|
|
}
|
|
|
|
void SliceSnapshot::Join() {
|
|
fb_.join();
|
|
}
|
|
|
|
static_assert(sizeof(PrimeTable::const_iterator) == 16);
|
|
|
|
void SliceSnapshot::SerializeCb(MainIterator it) {
|
|
error_code ec;
|
|
|
|
string tmp;
|
|
|
|
auto key = it->first.GetSlice(&tmp);
|
|
|
|
// TODO: fetch expire.
|
|
if (it->second.ObjType() == OBJ_STRING) {
|
|
ec = rdb_serializer_->SaveKeyVal(key, it->second.ToString(), 0);
|
|
} else {
|
|
robj* obj = it->second.AsRObj();
|
|
ec = rdb_serializer_->SaveKeyVal(key, obj, 0);
|
|
}
|
|
CHECK(!ec); // we write to StringFile.
|
|
++serialized_;
|
|
}
|
|
|
|
// Serializes all the entries with version less than snapshot_version_.
|
|
void SliceSnapshot::FiberFunc() {
|
|
this_fiber::properties<FiberProps>().set_name(
|
|
absl::StrCat("SliceSnapshot", ProactorBase::GetIndex()));
|
|
uint64_t cursor = 0;
|
|
static_assert(PHYSICAL_LEN > PrimeTable::kPhysicalBucketNum);
|
|
|
|
uint64_t last_yield = 0;
|
|
do {
|
|
// Traverse a single logical bucket but do not update its versions.
|
|
// we can not update a version because entries in the same bucket share part of the version.
|
|
// Therefore we save first, and then update version in one atomic swipe.
|
|
uint64_t next = prime_table_->Traverse(cursor, [this](auto it) { this->SaveCb(move(it)); });
|
|
|
|
cursor = next;
|
|
physical_mask_.reset();
|
|
|
|
// Flush if needed.
|
|
FlushSfile(false);
|
|
if (serialized_ >= last_yield + 100) {
|
|
DVLOG(2) << "Before sleep " << this_fiber::properties<FiberProps>().name();
|
|
this_fiber::yield();
|
|
last_yield = serialized_;
|
|
DVLOG(2) << "After sleep";
|
|
// flush in case other fibers (writes commands that pushed previous values) filled the file.
|
|
FlushSfile(false);
|
|
}
|
|
} while (cursor > 0);
|
|
|
|
DVLOG(1) << "after loop " << this_fiber::properties<FiberProps>().name();
|
|
FlushSfile(true);
|
|
dest_->StartClosing();
|
|
|
|
VLOG(1) << "Exit RdbProducer fiber with " << serialized_ << " serialized";
|
|
}
|
|
|
|
bool SliceSnapshot::FlushSfile(bool force) {
|
|
if (force) {
|
|
auto ec = rdb_serializer_->FlushMem();
|
|
CHECK(!ec);
|
|
if (sfile_->val.empty())
|
|
return false;
|
|
} else {
|
|
if (sfile_->val.size() < 4096) {
|
|
return false;
|
|
}
|
|
|
|
// Make sure we flush everything from membuffer in order to preserve the atomicity of keyvalue
|
|
// serializations.
|
|
auto ec = rdb_serializer_->FlushMem();
|
|
CHECK(!ec); // stringfile always succeeds.
|
|
}
|
|
VLOG(2) << "FlushSfile " << sfile_->val.size() << " bytes";
|
|
|
|
string tmp = std::move(sfile_->val); // important to move before pushing!
|
|
dest_->Push(std::move(tmp));
|
|
return true;
|
|
}
|
|
|
|
// The algorithm is to go over all the buckets and serialize entries that
|
|
// have version < snapshot_version_. In order to serialize each entry exactly once we update its
|
|
// version to snapshot_version_ once it has been serialized.
|
|
// Due to how bucket versions work we can not update individual entries - they may affect their
|
|
// neighbours in the bucket. Instead we handle serialization at physical bucket granularity.
|
|
// To further complicate things, Table::Traverse covers a logical bucket that may comprise of
|
|
// several physical buckets. The reason for this complication is that we need to guarantee
|
|
// a stable traversal during prime table mutations. PrimeTable::Traverse guarantees an atomic
|
|
// traversal of a single logical bucket, it also guarantees 100% coverage of all items
|
|
// that existed when the traversal started and survived until it finished.
|
|
//
|
|
// It's important that cb will run atomically so we avoid anu I/O work inside it.
|
|
// Instead, we flush our string file to disk in the traverse loop below.
|
|
bool SliceSnapshot::SaveCb(MainIterator it) {
|
|
// if we touched that physical bucket - skip it.
|
|
// We must to make sure we TraverseBucket exactly once for each physical bucket.
|
|
// This test is the first one because it's likely to be the fastest one:
|
|
// physical_mask_ is likely to be loaded in L1 and bucket_id() does not require accesing the
|
|
// prime_table.
|
|
if (physical_mask_.test(it.bucket_id())) {
|
|
return false;
|
|
}
|
|
|
|
uint64_t v = it.GetVersion();
|
|
if (v >= snapshot_version_) {
|
|
// either has been already serialized or added after snapshotting started.
|
|
DVLOG(2) << "Skipped " << it.segment_id() << ":" << it.bucket_id() << ":" << it.slot_id()
|
|
<< " at " << v;
|
|
++skipped_;
|
|
return false;
|
|
}
|
|
|
|
physical_mask_.set(it.bucket_id());
|
|
|
|
// Both traversals below execute atomically.
|
|
// traverse physical bucket and write into string file.
|
|
prime_table_->TraverseBucket(it, [this](auto entry_it) { this->SerializeCb(move(entry_it)); });
|
|
|
|
// Theoretically we could merge version_cb into the traversal above but then would would need
|
|
// to give up on DCHECK.
|
|
auto version_cb = [this](MainIterator entry_it) {
|
|
DCHECK_LE(entry_it.GetVersion(), snapshot_version_);
|
|
DVLOG(3) << "Bumping up version " << entry_it.bucket_id() << ":" << entry_it.slot_id();
|
|
|
|
entry_it.SetVersion(snapshot_version_);
|
|
};
|
|
|
|
prime_table_->TraverseBucket(it, version_cb);
|
|
|
|
return false;
|
|
}
|
|
|
|
} // namespace dfly
|