Memory accounting
Bring back application level used-memory tracking. Use internal mimalloc api for extracting comitted memory stats. This is much better performance-wise because calling mi_heap_visit_blocks becomes very slow for larger database sizes.
This commit is contained in:
parent
92475dd47a
commit
770fe0fe47
2
helio
2
helio
|
@ -1 +1 @@
|
|||
Subproject commit 6a755e1babf7383f90948c1d630728585ea6f10e
|
||||
Subproject commit 2a6629d7e47da545bede22507d04bd2de409b1cb
|
|
@ -7,7 +7,7 @@
|
|||
#include "core/dash.h"
|
||||
|
||||
#include <absl/container/flat_hash_map.h>
|
||||
#include <malloc.h>
|
||||
#include <mimalloc.h>
|
||||
|
||||
#include <functional>
|
||||
#include <set>
|
||||
|
@ -82,7 +82,7 @@ constexpr size_t foo = Segment::kBucketSz;
|
|||
class DashTest : public testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase() {
|
||||
init_zmalloc_threadlocal();
|
||||
init_zmalloc_threadlocal(mi_heap_get_backing());
|
||||
}
|
||||
|
||||
DashTest() : segment_(1) {
|
||||
|
|
|
@ -17,13 +17,19 @@ class MiMemoryResource final : public std::pmr::memory_resource {
|
|||
|
||||
mi_heap_t* heap() { return heap_;}
|
||||
|
||||
size_t used() const { return used_;}
|
||||
|
||||
private:
|
||||
void* do_allocate(std::size_t size, std::size_t align) {
|
||||
return mi_heap_malloc_aligned(heap_, size, align);
|
||||
void* res = mi_heap_malloc_aligned(heap_, size, align);
|
||||
if (res)
|
||||
used_ += mi_good_size(size);
|
||||
return res;
|
||||
}
|
||||
|
||||
void do_deallocate(void* ptr, std::size_t size, std::size_t align) {
|
||||
mi_free_size_aligned(ptr, size, align);
|
||||
used_ -= mi_good_size(size);
|
||||
}
|
||||
|
||||
bool do_is_equal(const std::pmr::memory_resource& o) const noexcept {
|
||||
|
@ -31,6 +37,7 @@ class MiMemoryResource final : public std::pmr::memory_resource {
|
|||
}
|
||||
|
||||
mi_heap_t* heap_;
|
||||
size_t used_ = 0;
|
||||
};
|
||||
|
||||
} // namespace dfly
|
|
@ -38,13 +38,17 @@ class SegmentAllocator {
|
|||
std::pair<Ptr, uint8_t*> Allocate(uint32_t size);
|
||||
|
||||
void Free(Ptr ptr) {
|
||||
mi_free(Translate(ptr));
|
||||
void* p = Translate(ptr);
|
||||
used_ -= mi_usable_size(p);
|
||||
mi_free(p);
|
||||
}
|
||||
|
||||
mi_heap_t* heap() {
|
||||
return heap_;
|
||||
}
|
||||
|
||||
size_t used() const { return used_; }
|
||||
|
||||
private:
|
||||
static uint32_t Offset(Ptr p) {
|
||||
return (p >> kSegmentIdBits) * 8;
|
||||
|
@ -55,6 +59,7 @@ class SegmentAllocator {
|
|||
std::vector<uint8_t*> address_table_;
|
||||
absl::flat_hash_map<uint64_t, uint16_t> rev_indx_;
|
||||
mi_heap_t* heap_;
|
||||
size_t used_ = 0;
|
||||
};
|
||||
|
||||
inline auto SegmentAllocator::Allocate(uint32_t size) -> std::pair<Ptr, uint8_t*> {
|
||||
|
@ -69,6 +74,7 @@ inline auto SegmentAllocator::Allocate(uint32_t size) -> std::pair<Ptr, uint8_t*
|
|||
}
|
||||
|
||||
Ptr res = (((ptr - seg_ptr) / 8) << kSegmentIdBits) | it->second;
|
||||
used_ += mi_good_size(size);
|
||||
|
||||
return std::make_pair(res, (uint8_t*)ptr);
|
||||
}
|
||||
|
|
|
@ -42,6 +42,10 @@ void SmallString::InitThreadLocal(void * heap) {
|
|||
XXH3_64bits_reset_withSeed(tl.xxh_state.get(), kHashSeed);
|
||||
}
|
||||
|
||||
size_t SmallString::UsedThreadLocal() {
|
||||
return tl.seg_alloc ? tl.seg_alloc->used() : 0;
|
||||
}
|
||||
|
||||
static_assert(sizeof(SmallString) == 16);
|
||||
|
||||
// we should use only for sizes greater than kPrefLen
|
||||
|
|
|
@ -19,6 +19,7 @@ class SmallString {
|
|||
public:
|
||||
|
||||
static void InitThreadLocal(void * heap);
|
||||
static size_t UsedThreadLocal();
|
||||
|
||||
void Reset() {
|
||||
size_ = 0;
|
||||
|
|
|
@ -114,6 +114,7 @@ size_t zmalloc_usable_size(const void* p);
|
|||
// roman: void zlibc_free(void *ptr);
|
||||
|
||||
void init_zmalloc_threadlocal(void* heap);
|
||||
extern __thread ssize_t zmalloc_used_memory_tl;
|
||||
|
||||
#undef __zm_str
|
||||
#undef __xstr
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#include "atomicvar.h"
|
||||
#include "zmalloc.h"
|
||||
|
||||
// __thread ssize_t used_memory_tl = 0;
|
||||
__thread ssize_t zmalloc_used_memory_tl = 0;
|
||||
__thread mi_heap_t* zmalloc_heap = NULL;
|
||||
|
||||
/* Allocate memory or panic */
|
||||
|
@ -27,10 +27,9 @@ size_t zmalloc_usable_size(const void* p) {
|
|||
}
|
||||
|
||||
void zfree(void* ptr) {
|
||||
// size_t usable = mi_usable_size(ptr);
|
||||
// used_memory_tl -= usable;
|
||||
mi_free(ptr);
|
||||
// return mi_free_size(ptr, usable);
|
||||
size_t usable = mi_usable_size(ptr);
|
||||
zmalloc_used_memory_tl -= usable;
|
||||
mi_free_size(ptr, usable);
|
||||
}
|
||||
|
||||
void* zrealloc(void* ptr, size_t size) {
|
||||
|
@ -39,9 +38,9 @@ void* zrealloc(void* ptr, size_t size) {
|
|||
}
|
||||
|
||||
void* zcalloc(size_t size) {
|
||||
// size_t usable = mi_good_size(size);
|
||||
size_t usable = mi_good_size(size);
|
||||
|
||||
// used_memory_tl += usable;
|
||||
zmalloc_used_memory_tl += usable;
|
||||
|
||||
return mi_heap_calloc(zmalloc_heap, 1, size);
|
||||
}
|
||||
|
@ -50,17 +49,17 @@ void* zmalloc_usable(size_t size, size_t* usable) {
|
|||
size_t g = mi_good_size(size);
|
||||
*usable = g;
|
||||
|
||||
// used_memory_tl += g;
|
||||
zmalloc_used_memory_tl += g;
|
||||
assert(zmalloc_heap);
|
||||
return mi_heap_malloc(zmalloc_heap, g);
|
||||
}
|
||||
|
||||
void* zrealloc_usable(void* ptr, size_t size, size_t* usable) {
|
||||
size_t g = mi_good_size(size);
|
||||
// size_t prev = mi_usable_size(ptr);
|
||||
size_t prev = mi_usable_size(ptr);
|
||||
*usable = g;
|
||||
|
||||
// used_memory_tl += (g - prev);
|
||||
zmalloc_used_memory_tl += (g - prev);
|
||||
return mi_heap_realloc(zmalloc_heap, ptr, g);
|
||||
}
|
||||
|
||||
|
@ -69,6 +68,7 @@ size_t znallocx(size_t size) {
|
|||
}
|
||||
|
||||
void zfree_size(void* ptr, size_t size) {
|
||||
zmalloc_used_memory_tl -= size;
|
||||
mi_free_size(ptr, size);
|
||||
}
|
||||
|
||||
|
@ -78,8 +78,8 @@ void* ztrymalloc(size_t size) {
|
|||
}
|
||||
|
||||
void* ztrycalloc(size_t size) {
|
||||
// size_t g = mi_good_size(size);
|
||||
// used_memory_tl += g;
|
||||
size_t g = mi_good_size(size);
|
||||
zmalloc_used_memory_tl += g;
|
||||
return mi_heap_calloc(zmalloc_heap, 1, size);
|
||||
}
|
||||
|
||||
|
|
|
@ -51,7 +51,6 @@ int main(int argc, char* argv[]) {
|
|||
mi_option_enable(mi_option_large_os_pages);
|
||||
mi_option_enable(mi_option_show_errors);
|
||||
mi_option_set(mi_option_max_warnings, 0);
|
||||
|
||||
_mi_options_init();
|
||||
|
||||
uring::UringPool pp{1024};
|
||||
|
|
|
@ -20,6 +20,12 @@ using namespace util;
|
|||
namespace this_fiber = ::boost::this_fiber;
|
||||
namespace fibers = ::boost::fibers;
|
||||
|
||||
namespace {
|
||||
|
||||
vector<EngineShardSet::CachedStats> cached_stats; // initialized in EngineShardSet::Init
|
||||
|
||||
} // namespace
|
||||
|
||||
thread_local EngineShard* EngineShard::shard_ = nullptr;
|
||||
constexpr size_t kQueueLen = 64;
|
||||
|
||||
|
@ -107,10 +113,12 @@ void EngineShard::DestroyThreadLocal() {
|
|||
return;
|
||||
|
||||
uint32_t index = shard_->db_slice_.shard_id();
|
||||
mi_heap_t* tlh = shard_->mi_resource_.heap();
|
||||
shard_->~EngineShard();
|
||||
mi_free(shard_);
|
||||
shard_ = nullptr;
|
||||
CompactObj::InitThreadLocal(nullptr);
|
||||
mi_heap_delete(tlh);
|
||||
VLOG(1) << "Shard reset " << index;
|
||||
}
|
||||
|
||||
|
@ -476,6 +484,7 @@ bool EngineShard::HasResultConverged(TxId notifyid) const {
|
|||
}
|
||||
|
||||
void EngineShard::CacheStats() {
|
||||
#if 0
|
||||
mi_heap_t* tlh = mi_resource_.heap();
|
||||
struct Sum {
|
||||
size_t used = 0;
|
||||
|
@ -493,19 +502,22 @@ void EngineShard::CacheStats() {
|
|||
|
||||
DVLOG(1) << "block_size " << block_size << "/" << area->block_size << ", reserved "
|
||||
<< area->reserved << " comitted " << area->committed << " used: " << area->used;
|
||||
|
||||
return true; // continue iteration
|
||||
};
|
||||
|
||||
mi_heap_visit_blocks(tlh, false /* visit all blocks*/, visit_cb, &sum);
|
||||
|
||||
stats_.heap_used_bytes = sum.used;
|
||||
stats_.heap_comitted_bytes = sum.comitted;
|
||||
#endif
|
||||
// mi_heap_visit_blocks(tlh, false /* visit all blocks*/, visit_cb, &sum);
|
||||
mi_stats_merge();
|
||||
// stats_.heap_used_bytes = sum.used;
|
||||
stats_.heap_used_bytes =
|
||||
mi_resource_.used() + zmalloc_used_memory_tl + SmallString::UsedThreadLocal();
|
||||
cached_stats[db_slice_.shard_id()].used_memory.store(stats_.heap_used_bytes,
|
||||
memory_order_relaxed);
|
||||
// stats_.heap_comitted_bytes = sum.comitted;
|
||||
}
|
||||
|
||||
void EngineShardSet::Init(uint32_t sz) {
|
||||
CHECK_EQ(0u, size());
|
||||
|
||||
cached_stats.resize(sz);
|
||||
shard_queue_.resize(sz);
|
||||
}
|
||||
|
||||
|
@ -515,4 +527,8 @@ void EngineShardSet::InitThreadLocal(ProactorBase* pb, bool update_db_time) {
|
|||
shard_queue_[es->shard_id()] = es->GetFiberQueue();
|
||||
}
|
||||
|
||||
const vector<EngineShardSet::CachedStats>& EngineShardSet::GetCachedStats() {
|
||||
return cached_stats;
|
||||
}
|
||||
|
||||
} // namespace dfly
|
||||
|
|
|
@ -13,8 +13,8 @@ extern "C" {
|
|||
#include <xxhash.h>
|
||||
|
||||
#include "base/string_view_sso.h"
|
||||
#include "core/tx_queue.h"
|
||||
#include "core/mi_memory_resource.h"
|
||||
#include "core/tx_queue.h"
|
||||
#include "server/db_slice.h"
|
||||
#include "util/fibers/fiberqueue_threadpool.h"
|
||||
#include "util/fibers/fibers_ext.h"
|
||||
|
@ -35,7 +35,7 @@ class EngineShard {
|
|||
|
||||
// number of bytes comitted by the allocator library (i.e. mmapped into physical memory).
|
||||
//
|
||||
size_t heap_comitted_bytes = 0;
|
||||
// size_t heap_comitted_bytes = 0;
|
||||
};
|
||||
|
||||
// EngineShard() is private down below.
|
||||
|
@ -184,6 +184,16 @@ class EngineShard {
|
|||
|
||||
class EngineShardSet {
|
||||
public:
|
||||
struct CachedStats {
|
||||
std::atomic_uint64_t used_memory;
|
||||
|
||||
CachedStats() : used_memory(0) {
|
||||
}
|
||||
|
||||
CachedStats(const CachedStats& o) : used_memory(o.used_memory.load()) {
|
||||
}
|
||||
};
|
||||
|
||||
explicit EngineShardSet(util::ProactorPool* pp) : pp_(pp) {
|
||||
}
|
||||
|
||||
|
@ -198,6 +208,8 @@ class EngineShardSet {
|
|||
void Init(uint32_t size);
|
||||
void InitThreadLocal(util::ProactorBase* pb, bool update_db_time);
|
||||
|
||||
static const std::vector<CachedStats>& GetCachedStats();
|
||||
|
||||
// Uses a shard queue to dispatch. Callback runs in a dedicated fiber.
|
||||
template <typename F> auto Await(ShardId sid, F&& f) {
|
||||
return shard_queue_[sid]->Await(std::forward<F>(f));
|
||||
|
|
|
@ -308,6 +308,7 @@ void Service::Init(util::AcceptServer* acceptor, const InitOpts& opts) {
|
|||
request_latency_usec.Init(&pp_);
|
||||
StringFamily::Init(&pp_);
|
||||
GenericFamily::Init(&pp_);
|
||||
server_family_.Init(acceptor);
|
||||
cmd_req.Init(&pp_, {"type"});
|
||||
}
|
||||
|
||||
|
@ -322,6 +323,7 @@ void Service::Shutdown() {
|
|||
engine_varz.reset();
|
||||
request_latency_usec.Shutdown();
|
||||
|
||||
// We mark that we are shuttind down.
|
||||
pp_.AwaitFiberOnAll([](ProactorBase* pb) { ServerState::tlocal()->Shutdown(); });
|
||||
|
||||
// to shutdown all the runtime components that depend on EngineShard.
|
||||
|
@ -331,6 +333,9 @@ void Service::Shutdown() {
|
|||
|
||||
cmd_req.Shutdown();
|
||||
shard_set_.RunBlockingInParallel([&](EngineShard*) { EngineShard::DestroyThreadLocal(); });
|
||||
|
||||
// wait for all the pending callbacks to stop.
|
||||
boost::this_fiber::sleep_for(10ms);
|
||||
}
|
||||
|
||||
void Service::DispatchCommand(CmdArgList args, facade::ConnectionContext* cntx) {
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#include <absl/cleanup/cleanup.h>
|
||||
#include <absl/random/random.h> // for master_id_ generation.
|
||||
#include <absl/strings/match.h>
|
||||
#include <malloc.h>
|
||||
#include <mimalloc-types.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include <filesystem>
|
||||
|
@ -39,6 +39,8 @@ DEFINE_string(requirepass, "", "password for AUTH authentication");
|
|||
|
||||
DECLARE_uint32(port);
|
||||
|
||||
extern "C" mi_stats_t _mi_stats_main;
|
||||
|
||||
namespace dfly {
|
||||
|
||||
using namespace std;
|
||||
|
@ -70,6 +72,8 @@ error_code CreateDirs(fs::path dir_path) {
|
|||
return ec;
|
||||
}
|
||||
|
||||
atomic_uint64_t used_mem_peak(0);
|
||||
|
||||
} // namespace
|
||||
|
||||
ServerFamily::ServerFamily(Service* service) : service_(*service), ess_(service->shard_set()) {
|
||||
|
@ -84,12 +88,29 @@ ServerFamily::~ServerFamily() {
|
|||
void ServerFamily::Init(util::AcceptServer* acceptor) {
|
||||
CHECK(acceptor_ == nullptr);
|
||||
acceptor_ = acceptor;
|
||||
|
||||
pb_task_ = ess_.pool()->GetNextProactor();
|
||||
auto cache_cb = [] {
|
||||
uint64_t sum = 0;
|
||||
const auto& stats = EngineShardSet::GetCachedStats();
|
||||
for (const auto& s : stats)
|
||||
sum += s.used_memory.load(memory_order_relaxed);
|
||||
|
||||
// Single writer, so no races.
|
||||
if (sum > used_mem_peak.load(memory_order_relaxed))
|
||||
used_mem_peak.store(sum, memory_order_relaxed);
|
||||
};
|
||||
|
||||
task_10ms_ = pb_task_->AwaitBrief([&] { return pb_task_->AddPeriodic(10, cache_cb); });
|
||||
}
|
||||
|
||||
void ServerFamily::Shutdown() {
|
||||
VLOG(1) << "ServerFamily::Shutdown";
|
||||
|
||||
service_.proactor_pool().GetNextProactor()->Await([this] {
|
||||
pb_task_->Await([this] {
|
||||
pb_task_->CancelPeriodic(task_10ms_);
|
||||
task_10ms_ = 0;
|
||||
|
||||
unique_lock lk(replica_of_mu_);
|
||||
if (replica_) {
|
||||
replica_->Stop();
|
||||
|
@ -321,7 +342,7 @@ Metrics ServerFamily::GetMetrics() const {
|
|||
result.events += db_stats.events;
|
||||
|
||||
EngineShard::Stats shard_stats = shard->stats();
|
||||
result.heap_comitted_bytes += shard_stats.heap_comitted_bytes;
|
||||
// result.heap_comitted_bytes += shard_stats.heap_comitted_bytes;
|
||||
result.heap_used_bytes += shard_stats.heap_used_bytes;
|
||||
}
|
||||
};
|
||||
|
@ -385,7 +406,7 @@ tcp_port:)";
|
|||
|
||||
absl::StrAppend(&info, "used_memory:", m.heap_used_bytes, "\n");
|
||||
absl::StrAppend(&info, "used_memory_human:", HumanReadableNumBytes(m.heap_used_bytes), "\n");
|
||||
absl::StrAppend(&info, "comitted_memory:", m.heap_comitted_bytes, "\n");
|
||||
absl::StrAppend(&info, "comitted_memory:", _mi_stats_main.committed.current, "\n");
|
||||
|
||||
if (sdata_res.has_value()) {
|
||||
absl::StrAppend(&info, "used_memory_rss:", sdata_res->vm_rss, "\n");
|
||||
|
@ -395,13 +416,12 @@ tcp_port:)";
|
|||
LOG(ERROR) << "Error fetching /proc/self/status stats";
|
||||
}
|
||||
|
||||
// TBD: should be the max of all seen used_memory values.
|
||||
absl::StrAppend(&info, "used_memory_peak:", -1, "\n");
|
||||
absl::StrAppend(&info, "used_memory_peak:", used_mem_peak.load(memory_order_relaxed), "\n");
|
||||
|
||||
// Blob - all these cases where the key/objects are represented by a single blob allocated on
|
||||
// heap. For example, strings or intsets. members of lists, sets, zsets etc
|
||||
// are not accounted for to avoid complex computations. In some cases, when number of members is
|
||||
// known we approximate their allocations by taking 16 bytes per member.
|
||||
// are not accounted for to avoid complex computations. In some cases, when number of members
|
||||
// is known we approximate their allocations by taking 16 bytes per member.
|
||||
absl::StrAppend(&info, "blob_used_memory:", m.db.obj_memory_usage, "\n");
|
||||
absl::StrAppend(&info, "table_used_memory:", m.db.table_mem_usage, "\n");
|
||||
absl::StrAppend(&info, "num_buckets:", m.db.bucket_count, "\n");
|
||||
|
@ -440,9 +460,9 @@ tcp_port:)";
|
|||
} else {
|
||||
absl::StrAppend(&info, "role:slave\n");
|
||||
|
||||
// it's safe to access replica_ because replica_ is created before etl.is_master set to false
|
||||
// and cleared after etl.is_master is set to true. And since the code here that checks for
|
||||
// is_master and copies shared_ptr is atomic, it1 should be correct.
|
||||
// it's safe to access replica_ because replica_ is created before etl.is_master set to
|
||||
// false and cleared after etl.is_master is set to true. And since the code here that checks
|
||||
// for is_master and copies shared_ptr is atomic, it1 should be correct.
|
||||
auto replica_ptr = replica_;
|
||||
Replica::Info rinfo = replica_ptr->GetInfo();
|
||||
absl::StrAppend(&info, "master_host:", rinfo.host, "\n");
|
||||
|
|
|
@ -77,10 +77,12 @@ class ServerFamily {
|
|||
|
||||
void SyncGeneric(std::string_view repl_master_id, uint64_t offs, ConnectionContext* cntx);
|
||||
|
||||
uint32_t task_10ms_ = 0;
|
||||
Service& service_;
|
||||
EngineShardSet& ess_;
|
||||
|
||||
util::AcceptServer* acceptor_ = nullptr;
|
||||
util::ProactorBase* pb_task_ = nullptr;
|
||||
::boost::fibers::mutex replica_of_mu_;
|
||||
std::shared_ptr<Replica> replica_; // protected by replica_of_mu_
|
||||
|
||||
|
@ -89,6 +91,7 @@ class ServerFamily {
|
|||
std::atomic<int64_t> last_save_; // in seconds.
|
||||
GlobalState global_state_;
|
||||
time_t start_time_ = 0; // in seconds, epoch time.
|
||||
|
||||
};
|
||||
|
||||
} // namespace dfly
|
||||
|
|
Loading…
Reference in New Issue