Allow external allocations for sizes greater than 1MB

This commit is contained in:
Roman Gershman 2022-04-29 09:38:01 +03:00
parent 0daa221ff5
commit 2966d04743
5 changed files with 137 additions and 146 deletions

View File

@ -66,46 +66,55 @@ optional<pair<size_t, size_t>> ExtentTree::GetRange(size_t len, size_t align) {
auto it = len_extents_.lower_bound(pair{len, 0}); auto it = len_extents_.lower_bound(pair{len, 0});
if (it == len_extents_.end()) if (it == len_extents_.end())
return nullopt; return nullopt;
size_t amask = align - 1; size_t amask = align - 1;
size_t aligned_start = 0; size_t aligned_start = it->second;
size_t extent_end = it->first + it->second;
while (true) { while (true) {
aligned_start = it->second; if ((aligned_start & amask) == 0) // aligned
if ((aligned_start & amask) == 0)
break; break;
size_t end = it->first + aligned_start;
// round up to the next aligned address // round up to the next aligned address
aligned_start = align + (aligned_start & (~amask)); aligned_start = (aligned_start + amask) & (~amask);
if (aligned_start + len <= end)
if (aligned_start + len <= extent_end) // check if we still inside the extent
break; break;
++it; ++it;
if (it == len_extents_.end()) if (it == len_extents_.end())
return nullopt; return nullopt;
aligned_start = it->second;
extent_end = it->first + it->second;
} }
DCHECK_GE(aligned_start, it->second); DCHECK_GE(aligned_start, it->second);
// if we are here - we found the range starting at aligned_start. // if we are here - we found the range starting at aligned_start.
// now we need to possibly break the existing extent to several parts or completely
// delete it.
auto eit = extents_.find(it->second); auto eit = extents_.find(it->second);
DCHECK(eit != extents_.end()); DCHECK(eit != extents_.end());
size_t end = eit->second;
size_t range_end = aligned_start + len; size_t range_end = aligned_start + len;
len_extents_.erase(it); len_extents_.erase(it);
// we break the interval [eit->first, eit->second] to either 0, 1 or 2 intervals. // we break the extent [eit->first, eit->second] to either 0, 1 or 2 intervals.
if (aligned_start > eit->first) { if (aligned_start > eit->first) { // do we have prefix?
eit->second = aligned_start; eit->second = aligned_start;
len_extents_.emplace(eit->second - eit->first, eit->first); len_extents_.emplace(eit->second - eit->first, eit->first);
} else { } else {
extents_.erase(eit); extents_.erase(eit);
} }
if (aligned_start + len < end) { if (range_end < extent_end) { // do we have suffix?
extents_.emplace(range_end, end); extents_.emplace(range_end, extent_end);
len_extents_.emplace(end - range_end, range_end); len_extents_.emplace(extent_end - range_end, range_end);
} }
DCHECK_EQ(range_end - aligned_start, len);
return pair{aligned_start, range_end}; return pair{aligned_start, range_end};
} }

View File

@ -18,7 +18,7 @@ class ExtentTree {
public: public:
void Add(size_t start, size_t len); void Add(size_t start, size_t len);
// in case of success, returns (start, end) pair where end-start >= len and // in case of success, returns (start, end) pair, where (end-start) >= len and
// start is aligned by align. // start is aligned by align.
std::optional<std::pair<size_t, size_t>> GetRange(size_t len, size_t align); std::optional<std::pair<size_t, size_t>> GetRange(size_t len, size_t align);

View File

@ -23,6 +23,11 @@ constexpr inline size_t divup(size_t num, size_t div) {
return (num + div - 1) / div; return (num + div - 1) / div;
} }
constexpr inline size_t alignup(size_t num, size_t align) {
size_t amask = align - 1;
return (num + amask) & (~amask);
}
constexpr inline size_t wsize_from_size(size_t size) { constexpr inline size_t wsize_from_size(size_t size) {
return divup(size, sizeof(uintptr_t)); return divup(size, sizeof(uintptr_t));
} }
@ -33,20 +38,23 @@ constexpr size_t kSmallPageShift = 21;
constexpr size_t kMediumPageShift = 24; constexpr size_t kMediumPageShift = 24;
constexpr size_t kSmallPageSize = 1UL << kSmallPageShift; // 2MB constexpr size_t kSmallPageSize = 1UL << kSmallPageShift; // 2MB
constexpr size_t kMediumPageSize = 1UL << kMediumPageShift; // 16MB constexpr size_t kMediumPageSize = 1UL << kMediumPageShift; // 16MB
constexpr size_t kMediumObjMaxSize = kMediumPageSize / 8;
// we preserve 16:1 ratio, i.e. each page can host at least 16 blocks within its class.
constexpr size_t kSmallObjMaxSize = kSmallPageSize / 16;
constexpr size_t kMediumObjMaxSize = kMediumPageSize / 16;
constexpr size_t kSegmentAlignment = 256_MB; constexpr size_t kSegmentAlignment = 256_MB;
constexpr size_t kSegmentDefaultSize = 256_MB; constexpr size_t kSegmentSize = 256_MB;
constexpr unsigned kNumBins = detail::kNumFreePages; constexpr unsigned kNumBins = detail::kNumFreePages;
constexpr unsigned kLargeSizeBin = kNumBins - 1; constexpr unsigned kLargeSizeBin = kNumBins - 1;
constexpr unsigned kMaxPagesInSegment = kSegmentDefaultSize / kSmallPageSize; constexpr unsigned kMaxPagesInSegment = kSegmentSize / kSmallPageSize;
constexpr unsigned kSegDescrAlignment = 8_KB; constexpr unsigned kSegDescrAlignment = 8_KB;
constexpr size_t kBinWordLens[kNumBins] = { constexpr size_t kBinWordLens[kNumBins] = {
1024, 1024 * 2, 1024 * 3, 4096, 5120, 6144, 7168, 8192, 10240, 12288, 1024, 1024 * 2, 1024 * 3, 4096, 5120, 6144, 7168, 8192, 10240,
14336, 16384, 20480, 24576, 28672, 32768, 40960, 49152, 57344, 65536, 12288, 14336, 16384, 20480, 24576, 28672, 32768, 40960, 49152,
81920, 98304, 114688, 131072, 163840, 196608, 229376, 262144, UINT64_MAX}; 57344, 65536, 81920, 98304, 114688, 131072, UINT64_MAX};
static_assert(kBinWordLens[kLargeSizeBin - 1] * 8 == kMediumObjMaxSize); static_assert(kBinWordLens[kLargeSizeBin - 1] * 8 == kMediumObjMaxSize);
static_assert(kBinWordLens[kLargeSizeBin] == UINT64_MAX); static_assert(kBinWordLens[kLargeSizeBin] == UINT64_MAX);
@ -81,16 +89,6 @@ static_assert(ToBinIdx(kMinBlockSize * 6) == 5);
static_assert(ToBinIdx(kMinBlockSize * 6 + 1) == 6); static_assert(ToBinIdx(kMinBlockSize * 6 + 1) == 6);
static_assert(ToBinIdx(kMinBlockSize * 7) == 6); static_assert(ToBinIdx(kMinBlockSize * 7) == 6);
// we preserve 8:1 ratio, i.e. each page can host at least 8 blocks within its class.
PageClass ClassFromSize(size_t size) {
if (size <= kSmallPageSize / 8)
return PageClass::SMALL_P;
if (size <= kMediumPageSize / 8)
return PageClass::MEDIUM_P;
return PageClass::LARGE_P;
}
size_t ToBlockSize(BinIdx idx) { size_t ToBlockSize(BinIdx idx) {
return kBinWordLens[idx] * 8; return kBinWordLens[idx] * 8;
} }
@ -99,9 +97,9 @@ size_t ToBlockSize(BinIdx idx) {
unsigned NumPagesInSegment(PageClass pc) { unsigned NumPagesInSegment(PageClass pc) {
switch (pc) { switch (pc) {
case PageClass::SMALL_P: case PageClass::SMALL_P:
return kSegmentDefaultSize >> kSmallPageShift; return kSegmentSize >> kSmallPageShift;
case PageClass::MEDIUM_P: case PageClass::MEDIUM_P:
return kSegmentDefaultSize >> kMediumPageShift; return kSegmentSize >> kMediumPageShift;
break; break;
case PageClass::LARGE_P: case PageClass::LARGE_P:
return 1; return 1;
@ -166,6 +164,15 @@ void Page::Init(PageClass pc, BinIdx bin_id) {
} }
} }
PageClass ClassFromSize(size_t size) {
if (size <= kSmallObjMaxSize)
return PageClass::SMALL_P;
if (size <= kMediumObjMaxSize)
return PageClass::MEDIUM_P;
return PageClass::LARGE_P;
}
} // namespace detail } // namespace detail
// //
@ -193,32 +200,32 @@ class ExternalAllocator::SegmentDescr {
explicit SegmentDescr(PageClass pc, size_t offs, uint16_t capacity); explicit SegmentDescr(PageClass pc, size_t offs, uint16_t capacity);
Page* FindPageSegment() { Page* FindPageSegment() {
return pi_.FindPageSegment(); return page_info_.FindPageSegment();
} }
Page* GetPage(unsigned i) { Page* GetPage(unsigned i) {
return pi_.pages + i; return page_info_.pages + i;
} }
size_t BlockOffset(const Page* page, unsigned blockpos) { size_t BlockOffset(const Page* page, unsigned blockpos) {
return offset_ + page->id * (1 << pi_.page_shift) + return offset_ + page->id * (1 << page_info_.page_shift) +
ToBlockSize(page->block_size_bin) * blockpos; ToBlockSize(page->block_size_bin) * blockpos;
} }
bool HasFreePages() const { bool HasFreePages() const {
return pi_.capacity > pi_.used; return page_info_.capacity > page_info_.used;
} }
unsigned capacity() const { unsigned capacity() const {
return pi_.capacity; return page_info_.capacity;
} }
unsigned used() const { unsigned used() const {
return pi_.used; return page_info_.used;
} }
unsigned page_shift() const { unsigned page_shift() const {
return pi_.page_shift; return page_info_.page_shift;
} }
PageClass page_class() const { PageClass page_class() const {
@ -277,18 +284,11 @@ class ExternalAllocator::SegmentDescr {
} }
}; };
struct LargeInfo { PageInfo page_info_;
size_t seg_size;
};
union {
PageInfo pi_;
LargeInfo li_;
};
}; };
ExternalAllocator::SegmentDescr::SegmentDescr(PageClass pc, size_t offs, uint16_t capacity) ExternalAllocator::SegmentDescr::SegmentDescr(PageClass pc, size_t offs, uint16_t page_capacity)
: offset_(offs), page_class_(pc), pi_(capacity) { : offset_(offs), page_class_(pc), page_info_(page_capacity) {
constexpr size_t kDescrSize = sizeof(SegmentDescr); constexpr size_t kDescrSize = sizeof(SegmentDescr);
(void)kDescrSize; (void)kDescrSize;
@ -296,12 +296,12 @@ ExternalAllocator::SegmentDescr::SegmentDescr(PageClass pc, size_t offs, uint16_
DCHECK(pc != PageClass::LARGE_P); DCHECK(pc != PageClass::LARGE_P);
if (pc == PageClass::MEDIUM_P) if (pc == PageClass::MEDIUM_P)
pi_.page_shift = kMediumPageShift; page_info_.page_shift = kMediumPageShift;
else else
pi_.page_shift = kSmallPageShift; page_info_.page_shift = kSmallPageShift;
for (unsigned i = 0; i < capacity; ++i) { for (unsigned i = 0; i < page_capacity; ++i) {
pi_.pages[i].Reset(i); page_info_.pages[i].Reset(i);
} }
} }
@ -323,20 +323,15 @@ int64_t ExternalAllocator::Malloc(size_t sz) {
Page* page = free_pages_[bin_idx]; Page* page = free_pages_[bin_idx];
if (page->available == 0) { // empty page. if (page->available == 0) { // empty page.
PageClass pc = ClassFromSize(sz); PageClass pc = detail::ClassFromSize(sz);
if (pc == PageClass::LARGE_P) { if (pc == PageClass::LARGE_P) {
size_t req_seg_size = 0; return LargeMalloc(sz);
page = FindLargePage(sz, &req_seg_size);
if (!page)
return -int64_t(req_seg_size);
} else {
page = FindPage(pc);
if (!page)
return -int64_t(kSegmentDefaultSize);
free_pages_[bin_idx] = page;
} }
page = FindPage(pc);
if (!page)
return -int64_t(kSegmentSize);
free_pages_[bin_idx] = page;
page->Init(pc, bin_idx); page->Init(pc, bin_idx);
} }
@ -381,26 +376,8 @@ void ExternalAllocator::Free(size_t offset, size_t sz) {
allocated_bytes_ -= block_size; allocated_bytes_ -= block_size;
} }
void ExternalAllocator::AddStorage(size_t offset, size_t size) { void ExternalAllocator::AddStorage(size_t start, size_t size) {
CHECK_EQ(256_MB, size); extent_tree_.Add(start, size);
CHECK_EQ(0u, offset % 256_MB);
size_t idx = offset / 256_MB;
CHECK_LE(segments_.size(), idx);
auto [it, added] = segm_intervals_.emplace(offset, size);
CHECK(added);
if (it != segm_intervals_.begin()) {
auto prev = it;
--prev;
CHECK_LE(prev->first + prev->second, offset);
}
auto next = it;
++next;
if (next != segm_intervals_.end()) {
CHECK_LE(offset + size, next->first);
}
capacity_ += size; capacity_ += size;
} }
@ -409,17 +386,9 @@ size_t ExternalAllocator::GoodSize(size_t sz) {
if (bin_idx < kLargeSizeBin) if (bin_idx < kLargeSizeBin)
return ToBlockSize(bin_idx); return ToBlockSize(bin_idx);
return divup(sz, 4_KB) * 4_KB; return alignup(sz, 4_KB);
} }
detail::PageClass ExternalAllocator::PageClassFromOffset(size_t offset) const {
size_t idx = offset / 256_MB;
CHECK_LT(idx, segments_.size());
CHECK(segments_[idx]);
SegmentDescr* seg = segments_[idx];
return seg->page_class();
}
/** /**
* *
@ -438,35 +407,35 @@ auto ExternalAllocator::FindPage(PageClass pc) -> Page* {
DCHECK_NE(pc, PageClass::LARGE_P); DCHECK_NE(pc, PageClass::LARGE_P);
SegmentDescr* seg = sq_[pc]; SegmentDescr* seg = sq_[pc];
if (seg) { while (seg) {
while (true) { if (seg->HasFreePages()) {
if (seg->HasFreePages()) { return seg->FindPageSegment();
return seg->FindPageSegment();
}
// remove head.
SegmentDescr* next = seg->Detach();
sq_[pc] = next;
if (next == nullptr) {
break;
}
seg = next;
} }
// remove head.
SegmentDescr* next = seg->Detach();
sq_[pc] = next;
seg = next;
} }
if (!segm_intervals_.empty()) { // no pages in the existing segments. Lets search in the extent tree.
auto op_range = extent_tree_.GetRange(kSegmentSize, kSegmentAlignment);
if (op_range) {
DCHECK_EQ(0u, op_range->first % kSegmentAlignment);
unsigned num_pages = NumPagesInSegment(pc); unsigned num_pages = NumPagesInSegment(pc);
size_t seg_idx = op_range->first / kSegmentAlignment;
auto it = segm_intervals_.begin(); if (segments_.size() > seg_idx) {
size_t seg_idx = it->first / kSegmentAlignment; DCHECK(segments_[seg_idx] == nullptr);
CHECK_LE(segments_.size(), seg_idx); } else {
segments_.resize(seg_idx + 1);
}
segments_.resize(seg_idx + 1);
void* ptr = void* ptr =
mi_malloc_aligned(sizeof(SegmentDescr) + num_pages * sizeof(Page), kSegDescrAlignment); mi_malloc_aligned(sizeof(SegmentDescr) + num_pages * sizeof(Page), kSegDescrAlignment);
SegmentDescr* seg = new (ptr) SegmentDescr(pc, it->first, num_pages); SegmentDescr* seg = new (ptr) SegmentDescr(pc, op_range->first, num_pages);
segments_[seg_idx] = seg; segments_[seg_idx] = seg;
segm_intervals_.erase(it);
DCHECK(sq_[pc] == NULL); DCHECK(sq_[pc] == NULL);
DCHECK(seg->next == seg->prev && seg == seg->next); DCHECK(seg->next == seg->prev && seg == seg->next);
@ -478,12 +447,15 @@ auto ExternalAllocator::FindPage(PageClass pc) -> Page* {
return nullptr; return nullptr;
} }
auto ExternalAllocator::FindLargePage(size_t size, size_t* segment_size) -> Page* { int64_t ExternalAllocator::LargeMalloc(size_t size) {
LOG(FATAL) << "TBD"; size_t align_sz = alignup(size, 4_KB);
// size_t aligned_blocks = divup(size, 4_KB); auto op_range = extent_tree_.GetRange(align_sz, 4_KB);
// size_t offset = GetLargeInterval(aligned_blocks); if (!op_range) {
// align_sz = max(align_sz, kSegmentSize);
return nullptr; return -int64_t(align_sz);
}
return op_range->first;
} }
void ExternalAllocator::FreePage(Page* page, SegmentDescr* owner, size_t block_size) { void ExternalAllocator::FreePage(Page* page, SegmentDescr* owner, size_t block_size) {
@ -512,7 +484,7 @@ void ExternalAllocator::FreePage(Page* page, SegmentDescr* owner, size_t block_s
sq->LinkBefore(owner); sq->LinkBefore(owner);
} }
} }
--owner->pi_.used; --owner->page_info_.used;
} }
inline auto ExternalAllocator::ToSegDescr(Page* page) -> SegmentDescr* { inline auto ExternalAllocator::ToSegDescr(Page* page) -> SegmentDescr* {

View File

@ -3,12 +3,13 @@
// //
#pragma once #pragma once
#include <absl/container/btree_map.h>
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <vector> #include <vector>
#include "core/extent_tree.h"
namespace dfly { namespace dfly {
constexpr inline unsigned long long operator""_MB(unsigned long long x) { constexpr inline unsigned long long operator""_MB(unsigned long long x) {
@ -34,19 +35,21 @@ constexpr inline unsigned long long operator""_KB(unsigned long long x) {
namespace detail { namespace detail {
class Page; class Page;
constexpr unsigned kNumFreePages = 29; constexpr unsigned kNumFreePages = 25;
/** /**
* pages classes can be SMALL, MEDIUM or LARGE. SMALL (2MB) for block sizes upto 256KB. * pages classes can be SMALL, MEDIUM or LARGE. SMALL (2MB) for block sizes upto 128KB.
* MEDIUM (16MB) for block sizes 256KB-2MB. Anything else is LARGE. * MEDIUM (16MB) for block sizes 128KB-1MB. Anything else is LARGE.
* *
*/ */
enum PageClass : uint8_t { enum PageClass : uint16_t {
SMALL_P = 0, SMALL_P = 0,
MEDIUM_P = 1, MEDIUM_P = 1,
LARGE_P = 2, LARGE_P = 2,
}; };
PageClass ClassFromSize(size_t size);
} // namespace detail } // namespace detail
class ExternalAllocator { class ExternalAllocator {
@ -65,13 +68,12 @@ class ExternalAllocator {
// For results >= 0 Returns offset to the backing storage where we may write the data of // For results >= 0 Returns offset to the backing storage where we may write the data of
// size sz. // size sz.
int64_t Malloc(size_t sz); int64_t Malloc(size_t sz);
void Free(size_t offset, size_t sz); void Free(size_t offset, size_t sz);
/// Adds backing storage to the allocator. /// Adds backing storage to the allocator. The range should not overlap with already
/// offset must be aligned to kExtAlignment boundaries. /// added storage ranges.
/// It is expected that storage is added in a linear fashion, without skipping ranges. void AddStorage(size_t start, size_t size);
/// So if [0, 256MB) is added, then next time [256MB, 512MB) is added etc.
void AddStorage(size_t offset, size_t size);
// Similar to mi_good_size, returns the size of the underlying block as if // Similar to mi_good_size, returns the size of the underlying block as if
// were returned by Malloc. Guaranteed that the result not less than sz. // were returned by Malloc. Guaranteed that the result not less than sz.
@ -86,9 +88,6 @@ class ExternalAllocator {
return allocated_bytes_; return allocated_bytes_;
} }
// accessors useful for tests.
detail::PageClass PageClassFromOffset(size_t offset) const;
private: private:
class SegmentDescr; class SegmentDescr;
using Page = detail::Page; using Page = detail::Page;
@ -97,7 +96,7 @@ class ExternalAllocator {
// Returns NULL if no page is found. // Returns NULL if no page is found.
Page* FindPage(detail::PageClass sc); Page* FindPage(detail::PageClass sc);
Page* FindLargePage(size_t size, size_t* segment_size); int64_t LargeMalloc(size_t size);
SegmentDescr* GetNewSegment(detail::PageClass sc); SegmentDescr* GetNewSegment(detail::PageClass sc);
void FreePage(Page* page, SegmentDescr* owner, size_t block_size); void FreePage(Page* page, SegmentDescr* owner, size_t block_size);
@ -109,10 +108,7 @@ class ExternalAllocator {
// A segment for each 256MB range. To get a segment id from the offset, shift right by 28. // A segment for each 256MB range. To get a segment id from the offset, shift right by 28.
std::vector<SegmentDescr*> segments_; std::vector<SegmentDescr*> segments_;
// weird queue to support AddStorage interface. We can not instantiate segment ExtentTree extent_tree_;
// until we know its class and that we know only when a page is demanded.
// sorted map of offset -> size.
absl::btree_map<size_t, size_t> segm_intervals_;
size_t capacity_ = 0; // in bytes. size_t capacity_ = 0; // in bytes.
size_t allocated_bytes_ = 0; size_t allocated_bytes_ = 0;

View File

@ -87,18 +87,32 @@ TEST_F(ExternalAllocatorTest, Invariants) {
} }
TEST_F(ExternalAllocatorTest, Classes) { TEST_F(ExternalAllocatorTest, Classes) {
using detail::ClassFromSize;
ext_alloc_.AddStorage(0, kSegSize); ext_alloc_.AddStorage(0, kSegSize);
off_t offs1 = ext_alloc_.Malloc(256_KB); ASSERT_EQ(detail::SMALL_P, ClassFromSize(128_KB));
EXPECT_EQ(detail::SMALL_P, ext_alloc_.PageClassFromOffset(offs1)); ASSERT_EQ(detail::MEDIUM_P, ClassFromSize(128_KB + 1));
off_t offs2 = ext_alloc_.Malloc(256_KB + 1); ASSERT_EQ(detail::LARGE_P, ClassFromSize(1_MB + 1));
off_t offs1 = ext_alloc_.Malloc(128_KB);
EXPECT_EQ(offs1, 0);
off_t offs2 = ext_alloc_.Malloc(128_KB + 1);
EXPECT_EQ(offs2, -kSegSize); EXPECT_EQ(offs2, -kSegSize);
ext_alloc_.AddStorage(kSegSize, kSegSize); ext_alloc_.AddStorage(kSegSize, kSegSize);
offs2 = ext_alloc_.Malloc(256_KB + 1); offs2 = ext_alloc_.Malloc(128_KB + 1);
EXPECT_EQ(detail::MEDIUM_P, ext_alloc_.PageClassFromOffset(offs2)); ASSERT_GT(offs2, 0);
off_t offs3 = ext_alloc_.Malloc(2_MB); offs2 = ext_alloc_.Malloc(1_MB);
EXPECT_EQ(detail::MEDIUM_P, ext_alloc_.PageClassFromOffset(offs3)); ASSERT_GT(offs2, 0);
EXPECT_EQ(2_MB, ExternalAllocator::GoodSize(2_MB));
off_t offs3 = ext_alloc_.Malloc(1_MB + 1);
ASSERT_LT(offs3, 0);
ext_alloc_.AddStorage(kSegSize * 2, kSegSize);
offs3 = ext_alloc_.Malloc(1_MB + 1);
ASSERT_GT(offs3, 0);
EXPECT_EQ(1_MB + 4_KB, ExternalAllocator::GoodSize(1_MB + 1));
} }
} // namespace dfly } // namespace dfly