dragonfly/server/generic_family.cc

567 lines
16 KiB
C++

// Copyright 2021, Roman Gershman. All rights reserved.
// See LICENSE for licensing terms.
//
#include "server/generic_family.h"
extern "C" {
#include "redis/object.h"
}
#include "base/logging.h"
#include "server/command_registry.h"
#include "server/conn_context.h"
#include "server/engine_shard_set.h"
#include "server/error.h"
#include "server/transaction.h"
#include "util/varz.h"
DEFINE_uint32(dbnum, 16, "Number of databases");
namespace dfly {
using namespace std;
namespace {
DEFINE_VARZ(VarzQps, ping_qps);
class Renamer {
public:
Renamer(DbIndex dind, ShardId source_id) : db_indx_(dind), src_sid_(source_id) {
}
OpResult<void> Find(ShardId shard_id, const ArgSlice& args);
OpResult<void> status() const {
return status_;
};
Transaction::RunnableType Finalize(bool skip_exist_dest);
private:
void MoveValues(EngineShard* shard, const ArgSlice& args);
DbIndex db_indx_;
ShardId src_sid_;
struct FindResult {
string_view key;
PrimeValue val;
uint64_t expire_ts;
bool found = false;
};
FindResult src_res_, dest_res_; // index 0 for source, 1 for destination
OpResult<void> status_;
};
OpResult<void> Renamer::Find(ShardId shard_id, const ArgSlice& args) {
CHECK_EQ(1u, args.size());
FindResult* res = (shard_id == src_sid_) ? &src_res_ : &dest_res_;
res->key = args.front();
auto [it, exp_it] = EngineShard::tlocal()->db_slice().FindExt(db_indx_, res->key);
res->found = IsValid(it);
if (IsValid(it)) {
res->val = it->second.AsRef();
res->expire_ts = IsValid(exp_it) ? exp_it->second : 0;
}
return OpStatus::OK;
};
void Renamer::MoveValues(EngineShard* shard, const ArgSlice& args) {
auto shard_id = shard->shard_id();
// TODO: when we want to maintain heap per shard model this code will require additional
// work
if (shard_id == src_sid_) { // Handle source key.
// delete the source entry.
auto it = shard->db_slice().FindExt(db_indx_, src_res_.key).first;
CHECK(shard->db_slice().Del(db_indx_, it));
return;
}
// Handle destination
string_view dest_key = dest_res_.key;
MainIterator dest_it = shard->db_slice().FindExt(db_indx_, dest_key).first;
if (IsValid(dest_it)) {
// we just move the source. We won't be able to do it with heap per shard model.
dest_it->second = std::move(src_res_.val);
shard->db_slice().Expire(db_indx_, dest_it, src_res_.expire_ts);
} else {
// we just add the key to destination with the source object.
shard->db_slice().AddNew(db_indx_, dest_key, std::move(src_res_.val), src_res_.expire_ts);
}
}
Transaction::RunnableType Renamer::Finalize(bool skip_exist_dest) {
auto cleanup = [](Transaction* t, EngineShard* shard) { return OpStatus::OK; };
if (!src_res_.found) {
status_ = OpStatus::KEY_NOTFOUND;
return cleanup;
}
if (dest_res_.found && skip_exist_dest) {
status_ = OpStatus::KEY_EXISTS;
return cleanup;
}
DCHECK(src_res_.val.IsRef());
// We can not copy from the existing value and delete it at the same time.
// TODO: if we want to allocate in shard, we must implement CompactObject::Clone.
// For now we hack it for strings only.
string val;
src_res_.val.GetString(&val);
src_res_.val.SetString(val);
// Src key exist and we need to override the destination.
return [this](Transaction* t, EngineShard* shard) {
this->MoveValues(shard, t->ShardArgsInShard(shard->shard_id()));
return OpStatus::OK;
};
}
const char* ObjTypeName(int type) {
switch (type) {
case OBJ_STRING:
return "string";
case OBJ_LIST:
return "list";
case OBJ_SET:
return "set";
case OBJ_ZSET:
return "zset";
case OBJ_HASH:
return "hash";
case OBJ_STREAM:
return "stream";
default:
LOG(ERROR) << "Unsupported type " << type;
}
return "invalid";
};
} // namespace
void GenericFamily::Init(util::ProactorPool* pp) {
ping_qps.Init(pp);
}
void GenericFamily::Shutdown() {
ping_qps.Shutdown();
}
void GenericFamily::Del(CmdArgList args, ConnectionContext* cntx) {
Transaction* transaction = cntx->transaction;
VLOG(1) << "Del " << ArgS(args, 1);
atomic_uint32_t result{0};
auto cb = [&result](const Transaction* t, EngineShard* shard) {
ArgSlice args = t->ShardArgsInShard(shard->shard_id());
auto res = OpDel(OpArgs{shard, t->db_index()}, args);
result.fetch_add(res.value_or(0), memory_order_relaxed);
return OpStatus::OK;
};
OpStatus status = transaction->ScheduleSingleHop(std::move(cb));
CHECK_EQ(OpStatus::OK, status);
DVLOG(2) << "Del ts " << transaction->txid();
cntx->SendLong(result.load(memory_order_release));
}
void GenericFamily::Ping(CmdArgList args, ConnectionContext* cntx) {
if (args.size() > 2) {
return cntx->SendError("wrong number of arguments for 'ping' command");
}
ping_qps.Inc();
// We synchronously block here until the engine sends us the payload and notifies that
// the I/O operation has been processed.
if (args.size() == 1) {
return cntx->SendSimpleRespString("PONG");
} else {
string_view arg = ArgS(args, 1);
DVLOG(2) << "Ping " << arg;
return cntx->SendBulkString(arg);
}
}
void GenericFamily::Exists(CmdArgList args, ConnectionContext* cntx) {
Transaction* transaction = cntx->transaction;
VLOG(1) << "Exists " << ArgS(args, 1);
atomic_uint32_t result{0};
auto cb = [&result](Transaction* t, EngineShard* shard) {
ArgSlice args = t->ShardArgsInShard(shard->shard_id());
auto res = OpExists(OpArgs{shard, t->db_index()}, args);
result.fetch_add(res.value_or(0), memory_order_relaxed);
return OpStatus::OK;
};
OpStatus status = transaction->ScheduleSingleHop(std::move(cb));
CHECK_EQ(OpStatus::OK, status);
return cntx->SendLong(result.load(memory_order_release));
}
void GenericFamily::Expire(CmdArgList args, ConnectionContext* cntx) {
string_view key = ArgS(args, 1);
string_view sec = ArgS(args, 2);
int64_t int_arg;
if (!absl::SimpleAtoi(sec, &int_arg)) {
return cntx->SendError(kInvalidIntErr);
}
int_arg = std::max(int_arg, -1L);
ExpireParams params{.ts = int_arg};
auto cb = [&](Transaction* t, EngineShard* shard) {
return OpExpire(OpArgs{shard, t->db_index()}, key, params);
};
OpStatus status = cntx->transaction->ScheduleSingleHop(move(cb));
cntx->SendLong(status == OpStatus::OK);
}
void GenericFamily::ExpireAt(CmdArgList args, ConnectionContext* cntx) {
string_view key = ArgS(args, 1);
string_view sec = ArgS(args, 2);
int64_t int_arg;
if (!absl::SimpleAtoi(sec, &int_arg)) {
return cntx->SendError(kInvalidIntErr);
}
int_arg = std::max(int_arg, 0L);
ExpireParams params{.ts = int_arg, .absolute = true};
auto cb = [&](Transaction* t, EngineShard* shard) {
return OpExpire(OpArgs{shard, t->db_index()}, key, params);
};
OpStatus status = cntx->transaction->ScheduleSingleHop(std::move(cb));
cntx->SendLong(status == OpStatus::OK);
}
void GenericFamily::Rename(CmdArgList args, ConnectionContext* cntx) {
OpResult<void> st = RenameGeneric(args, false, cntx);
cntx->SendError(st.status());
}
void GenericFamily::Ttl(CmdArgList args, ConnectionContext* cntx) {
TtlGeneric(args, cntx, TimeUnit::SEC);
}
void GenericFamily::Pttl(CmdArgList args, ConnectionContext* cntx) {
TtlGeneric(args, cntx, TimeUnit::MSEC);
}
void GenericFamily::TtlGeneric(CmdArgList args, ConnectionContext* cntx, TimeUnit unit) {
string_view key = ArgS(args, 1);
auto cb = [&](Transaction* t, EngineShard* shard) { return OpTtl(t, shard, key); };
OpResult<uint64_t> result = cntx->transaction->ScheduleSingleHopT(std::move(cb));
if (result) {
long ttl = (unit == TimeUnit::SEC) ? (result.value() + 500) / 1000 : result.value();
cntx->SendLong(ttl);
} else {
switch (result.status()) {
case OpStatus::KEY_NOTFOUND:
cntx->SendLong(-1);
break;
default:
cntx->SendLong(-2);
}
}
}
void GenericFamily::Select(CmdArgList args, ConnectionContext* cntx) {
string_view key = ArgS(args, 1);
int64_t index;
if (!absl::SimpleAtoi(key, &index)) {
return cntx->SendError(kInvalidDbIndErr);
}
if (index < 0 || index >= FLAGS_dbnum) {
return cntx->SendError(kDbIndOutOfRangeErr);
}
cntx->conn_state.db_index = index;
auto cb = [index](EngineShard* shard) {
shard->db_slice().ActivateDb(index);
return OpStatus::OK;
};
cntx->shard_set->RunBriefInParallel(std::move(cb));
return cntx->SendOk();
}
void GenericFamily::Type(CmdArgList args, ConnectionContext* cntx) {
std::string_view key = ArgS(args, 1);
auto cb = [&](Transaction* t, EngineShard* shard) -> OpResult<int> {
auto it = shard->db_slice().FindExt(t->db_index(), key).first;
if (!it.is_done()) {
return it->second.ObjType();
} else {
return OpStatus::KEY_NOTFOUND;
}
};
OpResult<int> result = cntx->transaction->ScheduleSingleHopT(std::move(cb));
if (!result) {
cntx->SendSimpleRespString("none");
} else {
cntx->SendSimpleRespString(ObjTypeName(result.value()));
}
}
OpResult<void> GenericFamily::RenameGeneric(CmdArgList args, bool skip_exist_dest,
ConnectionContext* cntx) {
string_view key[2] = {ArgS(args, 1), ArgS(args, 2)};
Transaction* transaction = cntx->transaction;
if (transaction->unique_shard_cnt() == 1) {
auto cb = [&](Transaction* t, EngineShard* shard) {
return OpRen(OpArgs{shard, t->db_index()}, key[0], key[1], skip_exist_dest);
};
OpResult<void> result = transaction->ScheduleSingleHopT(std::move(cb));
return result;
}
transaction->Schedule();
unsigned shard_count = transaction->shard_set()->size();
Renamer renamer{transaction->db_index(), Shard(key[0], shard_count)};
// Phase 1 -> Fetch keys from both shards.
// Phase 2 -> If everything is ok, clone the source object, delete the destination object, and
// set its ptr to cloned one. we also copy the expiration data of the source key.
transaction->Execute(
[&renamer](Transaction* t, EngineShard* shard) {
auto args = t->ShardArgsInShard(shard->shard_id());
return renamer.Find(shard->shard_id(), args).status();
},
false);
transaction->Execute(renamer.Finalize(skip_exist_dest), true);
return renamer.status();
}
void GenericFamily::Echo(CmdArgList args, ConnectionContext* cntx) {
string_view key = ArgS(args, 1);
return cntx->SendBulkString(key);
}
void GenericFamily::Scan(CmdArgList args, ConnectionContext* cntx) {
std::string_view token = ArgS(args, 1);
uint64_t cursor = 0;
EngineShardSet* ess = cntx->shard_set;
unsigned shard_count = ess->size();
// Dash table returns a cursor with its right byte empty. We will use it
// for encoding shard index. For now scan has a limitation of 255 shards.
CHECK_LT(shard_count, 1024u);
if (!absl::SimpleAtoi(token, &cursor)) {
return cntx->SendError("invalid cursor");
}
ShardId sid = cursor % 1024;
if (sid >= shard_count) {
return cntx->SendError("invalid cursor");
}
cursor >>= 10;
vector<string> keys;
do {
ess->Await(sid, [&] {
OpArgs op_args{EngineShard::tlocal(), cntx->conn_state.db_index};
OpScan(op_args, &cursor, &keys);
});
if (cursor == 0) {
++sid;
if (unsigned(sid) == shard_count)
break;
}
} while (keys.size() < 10);
if (sid < shard_count) {
cursor = (cursor << 10) | sid;
} else {
DCHECK_EQ(0u, cursor);
}
string res("*2\r\n$");
string curs_str = absl::StrCat(cursor);
absl::StrAppend(&res, curs_str.size(), "\r\n", curs_str, "\r\n*", keys.size(), "\r\n");
for (const auto& k : keys) {
absl::StrAppend(&res, "$", k.size(), "\r\n", k, "\r\n");
}
return cntx->SendRespBlob(res);
}
OpStatus GenericFamily::OpExpire(const OpArgs& op_args, string_view key,
const ExpireParams& params) {
auto& db_slice = op_args.shard->db_slice();
auto [it, expire_it] = db_slice.FindExt(op_args.db_ind, key);
if (!IsValid(it))
return OpStatus::KEY_NOTFOUND;
int64_t abs_msec = (params.unit == TimeUnit::SEC) ? params.ts * 1000 : params.ts;
if (!params.absolute) {
abs_msec += db_slice.Now();
}
if (abs_msec <= int64_t(db_slice.Now())) {
CHECK(db_slice.Del(op_args.db_ind, it));
} else if (IsValid(expire_it)) {
expire_it->second = abs_msec;
} else {
db_slice.Expire(op_args.db_ind, it, abs_msec);
}
return OpStatus::OK;
}
OpResult<uint64_t> GenericFamily::OpTtl(Transaction* t, EngineShard* shard, string_view key) {
auto& db_slice = shard->db_slice();
auto [it, expire] = db_slice.FindExt(t->db_index(), key);
if (!IsValid(it))
return OpStatus::KEY_NOTFOUND;
if (!IsValid(expire))
return OpStatus::SKIPPED;
int64_t ttl_ms = expire->second - db_slice.Now();
DCHECK_GT(ttl_ms, 0); // Otherwise FindExt would return null.
return ttl_ms;
}
OpResult<uint32_t> GenericFamily::OpDel(const OpArgs& op_args, ArgSlice keys) {
DVLOG(1) << "Del: " << keys[0];
auto& db_slice = op_args.shard->db_slice();
uint32_t res = 0;
for (uint32_t i = 0; i < keys.size(); ++i) {
auto fres = db_slice.FindExt(op_args.db_ind, keys[i]);
if (!IsValid(fres.first))
continue;
res += int(db_slice.Del(op_args.db_ind, fres.first));
}
return res;
}
OpResult<uint32_t> GenericFamily::OpExists(const OpArgs& op_args, ArgSlice keys) {
DVLOG(1) << "Exists: " << keys[0];
auto& db_slice = op_args.shard->db_slice();
uint32_t res = 0;
for (uint32_t i = 0; i < keys.size(); ++i) {
auto find_res = db_slice.FindExt(op_args.db_ind, keys[i]);
res += IsValid(find_res.first);
}
return res;
}
OpResult<void> GenericFamily::OpRen(const OpArgs& op_args, string_view from,
string_view to, bool skip_exists) {
auto& db_slice = op_args.shard->db_slice();
auto [from_it, expire_it] = db_slice.FindExt(op_args.db_ind, from);
if (!IsValid(from_it))
return OpStatus::KEY_NOTFOUND;
auto [to_it, to_expire] = db_slice.FindExt(op_args.db_ind, to);
if (IsValid(to_it)) {
if (skip_exists)
return OpStatus::KEY_EXISTS;
}
uint64_t exp_ts = IsValid(expire_it) ? expire_it->second : 0;
if (IsValid(to_it)) {
to_it->second = std::move(from_it->second);
from_it->second.SetExpire(IsValid(expire_it));
if (IsValid(to_expire)) {
to_it->second.SetExpire(true);
to_expire->second = exp_ts;
} else {
to_it->second.SetExpire(false);
db_slice.Expire(op_args.db_ind, to_it, exp_ts);
}
} else {
db_slice.AddNew(op_args.db_ind, to, std::move(from_it->second), exp_ts);
// Need search again since the container might invalidate the iterators.
from_it = db_slice.FindExt(op_args.db_ind, from).first;
}
CHECK(db_slice.Del(op_args.db_ind, from_it));
return OpStatus::OK;
}
void GenericFamily::OpScan(const OpArgs& op_args, uint64_t* cursor, vector<string>* vec) {
auto& db_slice = op_args.shard->db_slice();
DCHECK(db_slice.IsDbValid(op_args.db_ind));
unsigned cnt = 0;
auto scan_cb = [&](MainIterator it) {
if (it->second.HasExpire()) {
it = db_slice.ExpireIfNeeded(op_args.db_ind, it).first;
}
vec->push_back(it->first.ToString());
++cnt;
};
VLOG(1) << "PrimeTable " << db_slice.shard_id() << "/" << op_args.db_ind
<< " has " << db_slice.DbSize(op_args.db_ind);
uint64_t cur = *cursor;
auto [prime_table, expire_table] = db_slice.GetTables(op_args.db_ind);
do {
cur = prime_table->Traverse(cur, scan_cb);
} while (cur && cnt < 10);
VLOG(1) << "OpScan " << db_slice.shard_id() << " cursor: " << cur;
*cursor = cur;
}
using CI = CommandId;
#define HFUNC(x) SetHandler(&GenericFamily::x)
void GenericFamily::Register(CommandRegistry* registry) {
constexpr auto kSelectOpts = CO::LOADING | CO::FAST | CO::STALE;
*registry << CI{"DEL", CO::WRITE, -2, 1, -1, 1}.HFUNC(Del)
<< CI{"PING", CO::STALE | CO::FAST, -1, 0, 0, 0}.HFUNC(Ping)
<< CI{"ECHO", CO::READONLY | CO::FAST, 2, 0, 0, 0}.HFUNC(Echo)
<< CI{"EXISTS", CO::READONLY | CO::FAST, -2, 1, -1, 1}.HFUNC(Exists)
<< CI{"EXPIRE", CO::WRITE | CO::FAST, 3, 1, 1, 1}.HFUNC(Expire)
<< CI{"EXPIREAT", CO::WRITE | CO::FAST, 3, 1, 1, 1}.HFUNC(ExpireAt)
<< CI{"RENAME", CO::WRITE, 3, 1, 2, 1}.HFUNC(Rename)
<< CI{"SELECT", kSelectOpts, 2, 0, 0, 0}.HFUNC(Select)
<< CI{"SCAN", CO::READONLY | CO::FAST, -2, 0, 0, 0}.HFUNC(Scan)
<< CI{"TTL", CO::READONLY | CO::FAST | CO::RANDOM, 2, 1, 1, 1}.HFUNC(Ttl)
<< CI{"PTTL", CO::READONLY | CO::FAST | CO::RANDOM, 2, 1, 1, 1}.HFUNC(Pttl)
<< CI{"TYPE", CO::READONLY | CO::FAST, 2, 1, 1, 1}.HFUNC(Type);
}
} // namespace dfly