// Copyright 2021, Roman Gershman. All rights reserved. // See LICENSE for licensing terms. // #include "server/generic_family.h" extern "C" { #include "redis/object.h" } #include "base/logging.h" #include "server/command_registry.h" #include "server/conn_context.h" #include "server/engine_shard_set.h" #include "server/error.h" #include "server/transaction.h" #include "util/varz.h" DEFINE_uint32(dbnum, 16, "Number of databases"); namespace dfly { using namespace std; namespace { DEFINE_VARZ(VarzQps, ping_qps); class Renamer { public: Renamer(DbIndex dind, ShardId source_id) : db_indx_(dind), src_sid_(source_id) { } OpResult Find(ShardId shard_id, const ArgSlice& args); OpResult status() const { return status_; }; Transaction::RunnableType Finalize(bool skip_exist_dest); private: void MoveValues(EngineShard* shard, const ArgSlice& args); DbIndex db_indx_; ShardId src_sid_; struct FindResult { string_view key; PrimeValue val; uint64_t expire_ts; bool found = false; }; FindResult src_res_, dest_res_; // index 0 for source, 1 for destination OpResult status_; }; OpResult Renamer::Find(ShardId shard_id, const ArgSlice& args) { CHECK_EQ(1u, args.size()); FindResult* res = (shard_id == src_sid_) ? &src_res_ : &dest_res_; res->key = args.front(); auto [it, exp_it] = EngineShard::tlocal()->db_slice().FindExt(db_indx_, res->key); res->found = IsValid(it); if (IsValid(it)) { res->val = it->second.AsRef(); res->expire_ts = IsValid(exp_it) ? exp_it->second : 0; } return OpStatus::OK; }; void Renamer::MoveValues(EngineShard* shard, const ArgSlice& args) { auto shard_id = shard->shard_id(); // TODO: when we want to maintain heap per shard model this code will require additional // work if (shard_id == src_sid_) { // Handle source key. // delete the source entry. auto it = shard->db_slice().FindExt(db_indx_, src_res_.key).first; CHECK(shard->db_slice().Del(db_indx_, it)); return; } // Handle destination string_view dest_key = dest_res_.key; MainIterator dest_it = shard->db_slice().FindExt(db_indx_, dest_key).first; if (IsValid(dest_it)) { // we just move the source. We won't be able to do it with heap per shard model. dest_it->second = std::move(src_res_.val); shard->db_slice().Expire(db_indx_, dest_it, src_res_.expire_ts); } else { // we just add the key to destination with the source object. shard->db_slice().AddNew(db_indx_, dest_key, std::move(src_res_.val), src_res_.expire_ts); } } Transaction::RunnableType Renamer::Finalize(bool skip_exist_dest) { auto cleanup = [](Transaction* t, EngineShard* shard) { return OpStatus::OK; }; if (!src_res_.found) { status_ = OpStatus::KEY_NOTFOUND; return cleanup; } if (dest_res_.found && skip_exist_dest) { status_ = OpStatus::KEY_EXISTS; return cleanup; } DCHECK(src_res_.val.IsRef()); // We can not copy from the existing value and delete it at the same time. // TODO: if we want to allocate in shard, we must implement CompactObject::Clone. // For now we hack it for strings only. string val; src_res_.val.GetString(&val); src_res_.val.SetString(val); // Src key exist and we need to override the destination. return [this](Transaction* t, EngineShard* shard) { this->MoveValues(shard, t->ShardArgsInShard(shard->shard_id())); return OpStatus::OK; }; } const char* ObjTypeName(int type) { switch (type) { case OBJ_STRING: return "string"; case OBJ_LIST: return "list"; case OBJ_SET: return "set"; case OBJ_ZSET: return "zset"; case OBJ_HASH: return "hash"; case OBJ_STREAM: return "stream"; default: LOG(ERROR) << "Unsupported type " << type; } return "invalid"; }; } // namespace void GenericFamily::Init(util::ProactorPool* pp) { ping_qps.Init(pp); } void GenericFamily::Shutdown() { ping_qps.Shutdown(); } void GenericFamily::Del(CmdArgList args, ConnectionContext* cntx) { Transaction* transaction = cntx->transaction; VLOG(1) << "Del " << ArgS(args, 1); atomic_uint32_t result{0}; auto cb = [&result](const Transaction* t, EngineShard* shard) { ArgSlice args = t->ShardArgsInShard(shard->shard_id()); auto res = OpDel(OpArgs{shard, t->db_index()}, args); result.fetch_add(res.value_or(0), memory_order_relaxed); return OpStatus::OK; }; OpStatus status = transaction->ScheduleSingleHop(std::move(cb)); CHECK_EQ(OpStatus::OK, status); DVLOG(2) << "Del ts " << transaction->txid(); cntx->SendLong(result.load(memory_order_release)); } void GenericFamily::Ping(CmdArgList args, ConnectionContext* cntx) { if (args.size() > 2) { return cntx->SendError("wrong number of arguments for 'ping' command"); } ping_qps.Inc(); // We synchronously block here until the engine sends us the payload and notifies that // the I/O operation has been processed. if (args.size() == 1) { return cntx->SendSimpleRespString("PONG"); } else { string_view arg = ArgS(args, 1); DVLOG(2) << "Ping " << arg; return cntx->SendBulkString(arg); } } void GenericFamily::Exists(CmdArgList args, ConnectionContext* cntx) { Transaction* transaction = cntx->transaction; VLOG(1) << "Exists " << ArgS(args, 1); atomic_uint32_t result{0}; auto cb = [&result](Transaction* t, EngineShard* shard) { ArgSlice args = t->ShardArgsInShard(shard->shard_id()); auto res = OpExists(OpArgs{shard, t->db_index()}, args); result.fetch_add(res.value_or(0), memory_order_relaxed); return OpStatus::OK; }; OpStatus status = transaction->ScheduleSingleHop(std::move(cb)); CHECK_EQ(OpStatus::OK, status); return cntx->SendLong(result.load(memory_order_release)); } void GenericFamily::Expire(CmdArgList args, ConnectionContext* cntx) { string_view key = ArgS(args, 1); string_view sec = ArgS(args, 2); int64_t int_arg; if (!absl::SimpleAtoi(sec, &int_arg)) { return cntx->SendError(kInvalidIntErr); } int_arg = std::max(int_arg, -1L); ExpireParams params{.ts = int_arg}; auto cb = [&](Transaction* t, EngineShard* shard) { return OpExpire(OpArgs{shard, t->db_index()}, key, params); }; OpStatus status = cntx->transaction->ScheduleSingleHop(move(cb)); cntx->SendLong(status == OpStatus::OK); } void GenericFamily::ExpireAt(CmdArgList args, ConnectionContext* cntx) { string_view key = ArgS(args, 1); string_view sec = ArgS(args, 2); int64_t int_arg; if (!absl::SimpleAtoi(sec, &int_arg)) { return cntx->SendError(kInvalidIntErr); } int_arg = std::max(int_arg, 0L); ExpireParams params{.ts = int_arg, .absolute = true}; auto cb = [&](Transaction* t, EngineShard* shard) { return OpExpire(OpArgs{shard, t->db_index()}, key, params); }; OpStatus status = cntx->transaction->ScheduleSingleHop(std::move(cb)); cntx->SendLong(status == OpStatus::OK); } void GenericFamily::Rename(CmdArgList args, ConnectionContext* cntx) { OpResult st = RenameGeneric(args, false, cntx); cntx->SendError(st.status()); } void GenericFamily::Ttl(CmdArgList args, ConnectionContext* cntx) { TtlGeneric(args, cntx, TimeUnit::SEC); } void GenericFamily::Pttl(CmdArgList args, ConnectionContext* cntx) { TtlGeneric(args, cntx, TimeUnit::MSEC); } void GenericFamily::TtlGeneric(CmdArgList args, ConnectionContext* cntx, TimeUnit unit) { string_view key = ArgS(args, 1); auto cb = [&](Transaction* t, EngineShard* shard) { return OpTtl(t, shard, key); }; OpResult result = cntx->transaction->ScheduleSingleHopT(std::move(cb)); if (result) { long ttl = (unit == TimeUnit::SEC) ? (result.value() + 500) / 1000 : result.value(); cntx->SendLong(ttl); } else { switch (result.status()) { case OpStatus::KEY_NOTFOUND: cntx->SendLong(-1); break; default: cntx->SendLong(-2); } } } void GenericFamily::Select(CmdArgList args, ConnectionContext* cntx) { string_view key = ArgS(args, 1); int64_t index; if (!absl::SimpleAtoi(key, &index)) { return cntx->SendError(kInvalidDbIndErr); } if (index < 0 || index >= FLAGS_dbnum) { return cntx->SendError(kDbIndOutOfRangeErr); } cntx->conn_state.db_index = index; auto cb = [index](EngineShard* shard) { shard->db_slice().ActivateDb(index); return OpStatus::OK; }; cntx->shard_set->RunBriefInParallel(std::move(cb)); return cntx->SendOk(); } void GenericFamily::Type(CmdArgList args, ConnectionContext* cntx) { std::string_view key = ArgS(args, 1); auto cb = [&](Transaction* t, EngineShard* shard) -> OpResult { auto it = shard->db_slice().FindExt(t->db_index(), key).first; if (!it.is_done()) { return it->second.ObjType(); } else { return OpStatus::KEY_NOTFOUND; } }; OpResult result = cntx->transaction->ScheduleSingleHopT(std::move(cb)); if (!result) { cntx->SendSimpleRespString("none"); } else { cntx->SendSimpleRespString(ObjTypeName(result.value())); } } OpResult GenericFamily::RenameGeneric(CmdArgList args, bool skip_exist_dest, ConnectionContext* cntx) { string_view key[2] = {ArgS(args, 1), ArgS(args, 2)}; Transaction* transaction = cntx->transaction; if (transaction->unique_shard_cnt() == 1) { auto cb = [&](Transaction* t, EngineShard* shard) { return OpRen(OpArgs{shard, t->db_index()}, key[0], key[1], skip_exist_dest); }; OpResult result = transaction->ScheduleSingleHopT(std::move(cb)); return result; } transaction->Schedule(); unsigned shard_count = transaction->shard_set()->size(); Renamer renamer{transaction->db_index(), Shard(key[0], shard_count)}; // Phase 1 -> Fetch keys from both shards. // Phase 2 -> If everything is ok, clone the source object, delete the destination object, and // set its ptr to cloned one. we also copy the expiration data of the source key. transaction->Execute( [&renamer](Transaction* t, EngineShard* shard) { auto args = t->ShardArgsInShard(shard->shard_id()); return renamer.Find(shard->shard_id(), args).status(); }, false); transaction->Execute(renamer.Finalize(skip_exist_dest), true); return renamer.status(); } void GenericFamily::Echo(CmdArgList args, ConnectionContext* cntx) { string_view key = ArgS(args, 1); return cntx->SendBulkString(key); } void GenericFamily::Scan(CmdArgList args, ConnectionContext* cntx) { std::string_view token = ArgS(args, 1); uint64_t cursor = 0; EngineShardSet* ess = cntx->shard_set; unsigned shard_count = ess->size(); // Dash table returns a cursor with its right byte empty. We will use it // for encoding shard index. For now scan has a limitation of 255 shards. CHECK_LT(shard_count, 1024u); if (!absl::SimpleAtoi(token, &cursor)) { return cntx->SendError("invalid cursor"); } ShardId sid = cursor % 1024; if (sid >= shard_count) { return cntx->SendError("invalid cursor"); } cursor >>= 10; vector keys; do { ess->Await(sid, [&] { OpArgs op_args{EngineShard::tlocal(), cntx->conn_state.db_index}; OpScan(op_args, &cursor, &keys); }); if (cursor == 0) { ++sid; if (unsigned(sid) == shard_count) break; } } while (keys.size() < 10); if (sid < shard_count) { cursor = (cursor << 10) | sid; } else { DCHECK_EQ(0u, cursor); } string res("*2\r\n$"); string curs_str = absl::StrCat(cursor); absl::StrAppend(&res, curs_str.size(), "\r\n", curs_str, "\r\n*", keys.size(), "\r\n"); for (const auto& k : keys) { absl::StrAppend(&res, "$", k.size(), "\r\n", k, "\r\n"); } return cntx->SendRespBlob(res); } OpStatus GenericFamily::OpExpire(const OpArgs& op_args, string_view key, const ExpireParams& params) { auto& db_slice = op_args.shard->db_slice(); auto [it, expire_it] = db_slice.FindExt(op_args.db_ind, key); if (!IsValid(it)) return OpStatus::KEY_NOTFOUND; int64_t abs_msec = (params.unit == TimeUnit::SEC) ? params.ts * 1000 : params.ts; if (!params.absolute) { abs_msec += db_slice.Now(); } if (abs_msec <= int64_t(db_slice.Now())) { CHECK(db_slice.Del(op_args.db_ind, it)); } else if (IsValid(expire_it)) { expire_it->second = abs_msec; } else { db_slice.Expire(op_args.db_ind, it, abs_msec); } return OpStatus::OK; } OpResult GenericFamily::OpTtl(Transaction* t, EngineShard* shard, string_view key) { auto& db_slice = shard->db_slice(); auto [it, expire] = db_slice.FindExt(t->db_index(), key); if (!IsValid(it)) return OpStatus::KEY_NOTFOUND; if (!IsValid(expire)) return OpStatus::SKIPPED; int64_t ttl_ms = expire->second - db_slice.Now(); DCHECK_GT(ttl_ms, 0); // Otherwise FindExt would return null. return ttl_ms; } OpResult GenericFamily::OpDel(const OpArgs& op_args, ArgSlice keys) { DVLOG(1) << "Del: " << keys[0]; auto& db_slice = op_args.shard->db_slice(); uint32_t res = 0; for (uint32_t i = 0; i < keys.size(); ++i) { auto fres = db_slice.FindExt(op_args.db_ind, keys[i]); if (!IsValid(fres.first)) continue; res += int(db_slice.Del(op_args.db_ind, fres.first)); } return res; } OpResult GenericFamily::OpExists(const OpArgs& op_args, ArgSlice keys) { DVLOG(1) << "Exists: " << keys[0]; auto& db_slice = op_args.shard->db_slice(); uint32_t res = 0; for (uint32_t i = 0; i < keys.size(); ++i) { auto find_res = db_slice.FindExt(op_args.db_ind, keys[i]); res += IsValid(find_res.first); } return res; } OpResult GenericFamily::OpRen(const OpArgs& op_args, string_view from, string_view to, bool skip_exists) { auto& db_slice = op_args.shard->db_slice(); auto [from_it, expire_it] = db_slice.FindExt(op_args.db_ind, from); if (!IsValid(from_it)) return OpStatus::KEY_NOTFOUND; auto [to_it, to_expire] = db_slice.FindExt(op_args.db_ind, to); if (IsValid(to_it)) { if (skip_exists) return OpStatus::KEY_EXISTS; } uint64_t exp_ts = IsValid(expire_it) ? expire_it->second : 0; if (IsValid(to_it)) { to_it->second = std::move(from_it->second); from_it->second.SetExpire(IsValid(expire_it)); if (IsValid(to_expire)) { to_it->second.SetExpire(true); to_expire->second = exp_ts; } else { to_it->second.SetExpire(false); db_slice.Expire(op_args.db_ind, to_it, exp_ts); } } else { db_slice.AddNew(op_args.db_ind, to, std::move(from_it->second), exp_ts); // Need search again since the container might invalidate the iterators. from_it = db_slice.FindExt(op_args.db_ind, from).first; } CHECK(db_slice.Del(op_args.db_ind, from_it)); return OpStatus::OK; } void GenericFamily::OpScan(const OpArgs& op_args, uint64_t* cursor, vector* vec) { auto& db_slice = op_args.shard->db_slice(); DCHECK(db_slice.IsDbValid(op_args.db_ind)); unsigned cnt = 0; auto scan_cb = [&](MainIterator it) { if (it->second.HasExpire()) { it = db_slice.ExpireIfNeeded(op_args.db_ind, it).first; } vec->push_back(it->first.ToString()); ++cnt; }; VLOG(1) << "PrimeTable " << db_slice.shard_id() << "/" << op_args.db_ind << " has " << db_slice.DbSize(op_args.db_ind); uint64_t cur = *cursor; auto [prime_table, expire_table] = db_slice.GetTables(op_args.db_ind); do { cur = prime_table->Traverse(cur, scan_cb); } while (cur && cnt < 10); VLOG(1) << "OpScan " << db_slice.shard_id() << " cursor: " << cur; *cursor = cur; } using CI = CommandId; #define HFUNC(x) SetHandler(&GenericFamily::x) void GenericFamily::Register(CommandRegistry* registry) { constexpr auto kSelectOpts = CO::LOADING | CO::FAST | CO::STALE; *registry << CI{"DEL", CO::WRITE, -2, 1, -1, 1}.HFUNC(Del) << CI{"PING", CO::STALE | CO::FAST, -1, 0, 0, 0}.HFUNC(Ping) << CI{"ECHO", CO::READONLY | CO::FAST, 2, 0, 0, 0}.HFUNC(Echo) << CI{"EXISTS", CO::READONLY | CO::FAST, -2, 1, -1, 1}.HFUNC(Exists) << CI{"EXPIRE", CO::WRITE | CO::FAST, 3, 1, 1, 1}.HFUNC(Expire) << CI{"EXPIREAT", CO::WRITE | CO::FAST, 3, 1, 1, 1}.HFUNC(ExpireAt) << CI{"RENAME", CO::WRITE, 3, 1, 2, 1}.HFUNC(Rename) << CI{"SELECT", kSelectOpts, 2, 0, 0, 0}.HFUNC(Select) << CI{"SCAN", CO::READONLY | CO::FAST, -2, 0, 0, 0}.HFUNC(Scan) << CI{"TTL", CO::READONLY | CO::FAST | CO::RANDOM, 2, 1, 1, 1}.HFUNC(Ttl) << CI{"PTTL", CO::READONLY | CO::FAST | CO::RANDOM, 2, 1, 1, 1}.HFUNC(Pttl) << CI{"TYPE", CO::READONLY | CO::FAST, 2, 1, 1, 1}.HFUNC(Type); } } // namespace dfly