Implement snapshot consistency under the write load
This commit is contained in:
parent
a8a05949b0
commit
7ee6bd8471
37
core/dash.h
37
core/dash.h
|
@ -298,6 +298,17 @@ class DashTable : public detail::DashTableBase {
|
||||||
return const_bucket_iterator{it.owner_, it.seg_id_, it.bucket_id_, 0};
|
return const_bucket_iterator{it.owner_, it.seg_id_, it.bucket_id_, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Capture Version Change. Runs cb(it) on every bucket! (not entry) in the table whose version
|
||||||
|
// would potentially change upon insertion of 'k'.
|
||||||
|
// In practice traversal is limited to a single segment. The operation is read-only and
|
||||||
|
// simulates insertion process. 'cb' must accept const_iterator. Note: the interface a bit hacky
|
||||||
|
// since the iterator may point to non-existing slot. In practice it only can be sent to
|
||||||
|
// TraverseBucket function.
|
||||||
|
// The function returns any bucket that is touched by the insertion flow. In practice, we
|
||||||
|
// could tighten estimates by taking the new version into account.
|
||||||
|
// Not sure how important this is though.
|
||||||
|
template <typename U, typename Cb> void CVCUponInsert(const U& key, Cb&& cb) const;
|
||||||
|
|
||||||
void Clear();
|
void Clear();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -349,6 +360,32 @@ DashTable<_Key, _Value, Policy>::~DashTable() {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename _Key, typename _Value, typename Policy>
|
||||||
|
template <typename U, typename Cb>
|
||||||
|
void DashTable<_Key, _Value, Policy>::CVCUponInsert(const U& key, Cb&& cb) const {
|
||||||
|
uint64_t key_hash = DoHash(key);
|
||||||
|
uint32_t seg_id = SegmentId(key_hash);
|
||||||
|
assert(seg_id < segment_.size());
|
||||||
|
const SegmentType* target = segment_[seg_id];
|
||||||
|
|
||||||
|
uint8_t bids[2];
|
||||||
|
unsigned num_touched = target->CVCOnInsert(key_hash, bids);
|
||||||
|
if (num_touched > 0) {
|
||||||
|
for (unsigned i = 0; i < num_touched; ++i) {
|
||||||
|
cb(const_iterator{this, seg_id, bids[i], 0});
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
static_assert(kPhysicalBucketNum < 0xFF, "");
|
||||||
|
|
||||||
|
// Segment is full, we need to return the whole segment, because it can be split
|
||||||
|
// and its entries can be reshuffled into different buckets.
|
||||||
|
for (uint8_t i = 0; i < kPhysicalBucketNum; ++i) {
|
||||||
|
cb(const_iterator{this, seg_id, i, 0});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename _Key, typename _Value, typename Policy>
|
template <typename _Key, typename _Value, typename Policy>
|
||||||
void DashTable<_Key, _Value, Policy>::Clear() {
|
void DashTable<_Key, _Value, Policy>::Clear() {
|
||||||
auto cb = [this](SegmentType* seg) {
|
auto cb = [this](SegmentType* seg) {
|
||||||
|
|
|
@ -33,9 +33,26 @@ SliceSnapshot::~SliceSnapshot() {
|
||||||
|
|
||||||
void SliceSnapshot::Start(DbSlice* slice) {
|
void SliceSnapshot::Start(DbSlice* slice) {
|
||||||
DCHECK(!fb_.joinable());
|
DCHECK(!fb_.joinable());
|
||||||
|
db_slice_ = slice;
|
||||||
|
|
||||||
snapshot_version_ =
|
auto on_change = [this, slice](DbIndex db_index, const DbSlice::ChangeReq& req) {
|
||||||
slice->RegisterOnChange([this](DbIndex index, const DbSlice::ChangeReq& req) {});
|
PrimeTable* table = slice->GetTables(db_index).first;
|
||||||
|
|
||||||
|
if (const MainIterator* it = get_if<MainIterator>(&req)) {
|
||||||
|
if (it->GetVersion() < snapshot_version_) {
|
||||||
|
side_saved_ += SerializePhysicalBucket(table, *it);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
string_view key = get<string_view>(req);
|
||||||
|
table->CVCUponInsert(key, [this, table](PrimeTable::const_iterator it) {
|
||||||
|
if (it.MinVersion() < snapshot_version_) {
|
||||||
|
side_saved_ += SerializePhysicalBucket(table, it);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
snapshot_version_ = slice->RegisterOnChange(move(on_change));
|
||||||
VLOG(1) << "DbSaver::Start - saving entries with version less than " << snapshot_version_;
|
VLOG(1) << "DbSaver::Start - saving entries with version less than " << snapshot_version_;
|
||||||
sfile_.reset(new io::StringFile);
|
sfile_.reset(new io::StringFile);
|
||||||
|
|
||||||
|
@ -53,7 +70,7 @@ void SliceSnapshot::Join() {
|
||||||
|
|
||||||
static_assert(sizeof(PrimeTable::const_iterator) == 16);
|
static_assert(sizeof(PrimeTable::const_iterator) == 16);
|
||||||
|
|
||||||
void SliceSnapshot::SerializeCb(MainIterator it) {
|
void SliceSnapshot::SerializeSingleEntry(MainIterator it) {
|
||||||
error_code ec;
|
error_code ec;
|
||||||
|
|
||||||
string tmp;
|
string tmp;
|
||||||
|
@ -163,23 +180,28 @@ bool SliceSnapshot::SaveCb(MainIterator it) {
|
||||||
}
|
}
|
||||||
|
|
||||||
physical_mask_.set(it.bucket_id());
|
physical_mask_.set(it.bucket_id());
|
||||||
|
SerializePhysicalBucket(prime_table_, it);
|
||||||
// Both traversals below execute atomically.
|
|
||||||
// traverse physical bucket and write into string file.
|
|
||||||
prime_table_->TraverseBucket(it, [this](auto entry_it) { this->SerializeCb(move(entry_it)); });
|
|
||||||
|
|
||||||
// Theoretically we could merge version_cb into the traversal above but then would would need
|
|
||||||
// to give up on DCHECK.
|
|
||||||
auto version_cb = [this](MainIterator entry_it) {
|
|
||||||
DCHECK_LE(entry_it.GetVersion(), snapshot_version_);
|
|
||||||
DVLOG(3) << "Bumping up version " << entry_it.bucket_id() << ":" << entry_it.slot_id();
|
|
||||||
|
|
||||||
entry_it.SetVersion(snapshot_version_);
|
|
||||||
};
|
|
||||||
|
|
||||||
prime_table_->TraverseBucket(it, version_cb);
|
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned SliceSnapshot::SerializePhysicalBucket(PrimeTable* table, PrimeTable::const_iterator it) {
|
||||||
|
// Both traversals below execute atomically.
|
||||||
|
// traverse physical bucket and write into string file.
|
||||||
|
unsigned result = 0;
|
||||||
|
table->TraverseBucket(it, [&](auto entry_it) {
|
||||||
|
++result;
|
||||||
|
SerializeSingleEntry(move(entry_it));
|
||||||
|
});
|
||||||
|
|
||||||
|
table->TraverseBucket(it, [this](MainIterator entry_it) {
|
||||||
|
DCHECK_LE(entry_it.GetVersion(), snapshot_version_);
|
||||||
|
DVLOG(3) << "Bumping up version " << entry_it.bucket_id() << ":" << entry_it.slot_id();
|
||||||
|
|
||||||
|
entry_it.SetVersion(snapshot_version_);
|
||||||
|
});
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace dfly
|
} // namespace dfly
|
||||||
|
|
|
@ -33,9 +33,12 @@ class SliceSnapshot {
|
||||||
private:
|
private:
|
||||||
void FiberFunc();
|
void FiberFunc();
|
||||||
bool FlushSfile(bool force);
|
bool FlushSfile(bool force);
|
||||||
void SerializeCb(MainIterator it);
|
void SerializeSingleEntry(MainIterator it);
|
||||||
bool SaveCb(MainIterator it);
|
bool SaveCb(MainIterator it);
|
||||||
|
|
||||||
|
// Returns number of entries serialized.
|
||||||
|
unsigned SerializePhysicalBucket(PrimeTable* table, PrimeTable::const_iterator it);
|
||||||
|
|
||||||
::boost::fibers::fiber fb_;
|
::boost::fibers::fiber fb_;
|
||||||
|
|
||||||
enum {PHYSICAL_LEN = 128};
|
enum {PHYSICAL_LEN = 128};
|
||||||
|
@ -47,9 +50,10 @@ class SliceSnapshot {
|
||||||
// version upper bound for entries that should be saved (not included).
|
// version upper bound for entries that should be saved (not included).
|
||||||
uint64_t snapshot_version_ = 0;
|
uint64_t snapshot_version_ = 0;
|
||||||
PrimeTable* prime_table_;
|
PrimeTable* prime_table_;
|
||||||
|
DbSlice* db_slice_ = nullptr;
|
||||||
StringChannel* dest_;
|
StringChannel* dest_;
|
||||||
|
|
||||||
size_t serialized_ = 0, skipped_ = 0;
|
size_t serialized_ = 0, skipped_ = 0, side_saved_ = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace dfly
|
} // namespace dfly
|
||||||
|
|
Loading…
Reference in New Issue