From 2966d04743408c56506267ccac6825de8acc580a Mon Sep 17 00:00:00 2001 From: Roman Gershman Date: Fri, 29 Apr 2022 09:38:01 +0300 Subject: [PATCH] Allow external allocations for sizes greater than 1MB --- src/core/extent_tree.cc | 33 +++--- src/core/extent_tree.h | 2 +- src/core/external_alloc.cc | 184 ++++++++++++++------------------ src/core/external_alloc.h | 34 +++--- src/core/external_alloc_test.cc | 30 ++++-- 5 files changed, 137 insertions(+), 146 deletions(-) diff --git a/src/core/extent_tree.cc b/src/core/extent_tree.cc index 85c2f0e..80438e8 100644 --- a/src/core/extent_tree.cc +++ b/src/core/extent_tree.cc @@ -66,46 +66,55 @@ optional> ExtentTree::GetRange(size_t len, size_t align) { auto it = len_extents_.lower_bound(pair{len, 0}); if (it == len_extents_.end()) return nullopt; + size_t amask = align - 1; - size_t aligned_start = 0; + size_t aligned_start = it->second; + size_t extent_end = it->first + it->second; while (true) { - aligned_start = it->second; - if ((aligned_start & amask) == 0) + if ((aligned_start & amask) == 0) // aligned break; - size_t end = it->first + aligned_start; + // round up to the next aligned address - aligned_start = align + (aligned_start & (~amask)); - if (aligned_start + len <= end) + aligned_start = (aligned_start + amask) & (~amask); + + if (aligned_start + len <= extent_end) // check if we still inside the extent break; ++it; if (it == len_extents_.end()) return nullopt; + + aligned_start = it->second; + extent_end = it->first + it->second; } DCHECK_GE(aligned_start, it->second); + // if we are here - we found the range starting at aligned_start. + // now we need to possibly break the existing extent to several parts or completely + // delete it. auto eit = extents_.find(it->second); DCHECK(eit != extents_.end()); - size_t end = eit->second; size_t range_end = aligned_start + len; len_extents_.erase(it); - // we break the interval [eit->first, eit->second] to either 0, 1 or 2 intervals. - if (aligned_start > eit->first) { + // we break the extent [eit->first, eit->second] to either 0, 1 or 2 intervals. + if (aligned_start > eit->first) { // do we have prefix? eit->second = aligned_start; len_extents_.emplace(eit->second - eit->first, eit->first); } else { extents_.erase(eit); } - if (aligned_start + len < end) { - extents_.emplace(range_end, end); - len_extents_.emplace(end - range_end, range_end); + if (range_end < extent_end) { // do we have suffix? + extents_.emplace(range_end, extent_end); + len_extents_.emplace(extent_end - range_end, range_end); } + DCHECK_EQ(range_end - aligned_start, len); + return pair{aligned_start, range_end}; } diff --git a/src/core/extent_tree.h b/src/core/extent_tree.h index de2d34f..3152e52 100644 --- a/src/core/extent_tree.h +++ b/src/core/extent_tree.h @@ -18,7 +18,7 @@ class ExtentTree { public: void Add(size_t start, size_t len); - // in case of success, returns (start, end) pair where end-start >= len and + // in case of success, returns (start, end) pair, where (end-start) >= len and // start is aligned by align. std::optional> GetRange(size_t len, size_t align); diff --git a/src/core/external_alloc.cc b/src/core/external_alloc.cc index 0304dfc..8204455 100644 --- a/src/core/external_alloc.cc +++ b/src/core/external_alloc.cc @@ -23,6 +23,11 @@ constexpr inline size_t divup(size_t num, size_t div) { return (num + div - 1) / div; } +constexpr inline size_t alignup(size_t num, size_t align) { + size_t amask = align - 1; + return (num + amask) & (~amask); +} + constexpr inline size_t wsize_from_size(size_t size) { return divup(size, sizeof(uintptr_t)); } @@ -33,20 +38,23 @@ constexpr size_t kSmallPageShift = 21; constexpr size_t kMediumPageShift = 24; constexpr size_t kSmallPageSize = 1UL << kSmallPageShift; // 2MB constexpr size_t kMediumPageSize = 1UL << kMediumPageShift; // 16MB -constexpr size_t kMediumObjMaxSize = kMediumPageSize / 8; + +// we preserve 16:1 ratio, i.e. each page can host at least 16 blocks within its class. +constexpr size_t kSmallObjMaxSize = kSmallPageSize / 16; +constexpr size_t kMediumObjMaxSize = kMediumPageSize / 16; constexpr size_t kSegmentAlignment = 256_MB; -constexpr size_t kSegmentDefaultSize = 256_MB; +constexpr size_t kSegmentSize = 256_MB; constexpr unsigned kNumBins = detail::kNumFreePages; constexpr unsigned kLargeSizeBin = kNumBins - 1; -constexpr unsigned kMaxPagesInSegment = kSegmentDefaultSize / kSmallPageSize; +constexpr unsigned kMaxPagesInSegment = kSegmentSize / kSmallPageSize; constexpr unsigned kSegDescrAlignment = 8_KB; constexpr size_t kBinWordLens[kNumBins] = { - 1024, 1024 * 2, 1024 * 3, 4096, 5120, 6144, 7168, 8192, 10240, 12288, - 14336, 16384, 20480, 24576, 28672, 32768, 40960, 49152, 57344, 65536, - 81920, 98304, 114688, 131072, 163840, 196608, 229376, 262144, UINT64_MAX}; + 1024, 1024 * 2, 1024 * 3, 4096, 5120, 6144, 7168, 8192, 10240, + 12288, 14336, 16384, 20480, 24576, 28672, 32768, 40960, 49152, + 57344, 65536, 81920, 98304, 114688, 131072, UINT64_MAX}; static_assert(kBinWordLens[kLargeSizeBin - 1] * 8 == kMediumObjMaxSize); static_assert(kBinWordLens[kLargeSizeBin] == UINT64_MAX); @@ -81,16 +89,6 @@ static_assert(ToBinIdx(kMinBlockSize * 6) == 5); static_assert(ToBinIdx(kMinBlockSize * 6 + 1) == 6); static_assert(ToBinIdx(kMinBlockSize * 7) == 6); -// we preserve 8:1 ratio, i.e. each page can host at least 8 blocks within its class. -PageClass ClassFromSize(size_t size) { - if (size <= kSmallPageSize / 8) - return PageClass::SMALL_P; - if (size <= kMediumPageSize / 8) - return PageClass::MEDIUM_P; - - return PageClass::LARGE_P; -} - size_t ToBlockSize(BinIdx idx) { return kBinWordLens[idx] * 8; } @@ -99,9 +97,9 @@ size_t ToBlockSize(BinIdx idx) { unsigned NumPagesInSegment(PageClass pc) { switch (pc) { case PageClass::SMALL_P: - return kSegmentDefaultSize >> kSmallPageShift; + return kSegmentSize >> kSmallPageShift; case PageClass::MEDIUM_P: - return kSegmentDefaultSize >> kMediumPageShift; + return kSegmentSize >> kMediumPageShift; break; case PageClass::LARGE_P: return 1; @@ -166,6 +164,15 @@ void Page::Init(PageClass pc, BinIdx bin_id) { } } +PageClass ClassFromSize(size_t size) { + if (size <= kSmallObjMaxSize) + return PageClass::SMALL_P; + if (size <= kMediumObjMaxSize) + return PageClass::MEDIUM_P; + + return PageClass::LARGE_P; +} + } // namespace detail // @@ -193,32 +200,32 @@ class ExternalAllocator::SegmentDescr { explicit SegmentDescr(PageClass pc, size_t offs, uint16_t capacity); Page* FindPageSegment() { - return pi_.FindPageSegment(); + return page_info_.FindPageSegment(); } Page* GetPage(unsigned i) { - return pi_.pages + i; + return page_info_.pages + i; } size_t BlockOffset(const Page* page, unsigned blockpos) { - return offset_ + page->id * (1 << pi_.page_shift) + + return offset_ + page->id * (1 << page_info_.page_shift) + ToBlockSize(page->block_size_bin) * blockpos; } bool HasFreePages() const { - return pi_.capacity > pi_.used; + return page_info_.capacity > page_info_.used; } unsigned capacity() const { - return pi_.capacity; + return page_info_.capacity; } unsigned used() const { - return pi_.used; + return page_info_.used; } unsigned page_shift() const { - return pi_.page_shift; + return page_info_.page_shift; } PageClass page_class() const { @@ -277,18 +284,11 @@ class ExternalAllocator::SegmentDescr { } }; - struct LargeInfo { - size_t seg_size; - }; - - union { - PageInfo pi_; - LargeInfo li_; - }; + PageInfo page_info_; }; -ExternalAllocator::SegmentDescr::SegmentDescr(PageClass pc, size_t offs, uint16_t capacity) - : offset_(offs), page_class_(pc), pi_(capacity) { +ExternalAllocator::SegmentDescr::SegmentDescr(PageClass pc, size_t offs, uint16_t page_capacity) + : offset_(offs), page_class_(pc), page_info_(page_capacity) { constexpr size_t kDescrSize = sizeof(SegmentDescr); (void)kDescrSize; @@ -296,12 +296,12 @@ ExternalAllocator::SegmentDescr::SegmentDescr(PageClass pc, size_t offs, uint16_ DCHECK(pc != PageClass::LARGE_P); if (pc == PageClass::MEDIUM_P) - pi_.page_shift = kMediumPageShift; + page_info_.page_shift = kMediumPageShift; else - pi_.page_shift = kSmallPageShift; + page_info_.page_shift = kSmallPageShift; - for (unsigned i = 0; i < capacity; ++i) { - pi_.pages[i].Reset(i); + for (unsigned i = 0; i < page_capacity; ++i) { + page_info_.pages[i].Reset(i); } } @@ -323,20 +323,15 @@ int64_t ExternalAllocator::Malloc(size_t sz) { Page* page = free_pages_[bin_idx]; if (page->available == 0) { // empty page. - PageClass pc = ClassFromSize(sz); + PageClass pc = detail::ClassFromSize(sz); if (pc == PageClass::LARGE_P) { - size_t req_seg_size = 0; - page = FindLargePage(sz, &req_seg_size); - if (!page) - return -int64_t(req_seg_size); - } else { - page = FindPage(pc); - if (!page) - return -int64_t(kSegmentDefaultSize); - free_pages_[bin_idx] = page; + return LargeMalloc(sz); } - + page = FindPage(pc); + if (!page) + return -int64_t(kSegmentSize); + free_pages_[bin_idx] = page; page->Init(pc, bin_idx); } @@ -381,26 +376,8 @@ void ExternalAllocator::Free(size_t offset, size_t sz) { allocated_bytes_ -= block_size; } -void ExternalAllocator::AddStorage(size_t offset, size_t size) { - CHECK_EQ(256_MB, size); - CHECK_EQ(0u, offset % 256_MB); - - size_t idx = offset / 256_MB; - - CHECK_LE(segments_.size(), idx); - auto [it, added] = segm_intervals_.emplace(offset, size); - CHECK(added); - if (it != segm_intervals_.begin()) { - auto prev = it; - --prev; - CHECK_LE(prev->first + prev->second, offset); - } - auto next = it; - ++next; - if (next != segm_intervals_.end()) { - CHECK_LE(offset + size, next->first); - } - +void ExternalAllocator::AddStorage(size_t start, size_t size) { + extent_tree_.Add(start, size); capacity_ += size; } @@ -409,17 +386,9 @@ size_t ExternalAllocator::GoodSize(size_t sz) { if (bin_idx < kLargeSizeBin) return ToBlockSize(bin_idx); - return divup(sz, 4_KB) * 4_KB; + return alignup(sz, 4_KB); } -detail::PageClass ExternalAllocator::PageClassFromOffset(size_t offset) const { - size_t idx = offset / 256_MB; - CHECK_LT(idx, segments_.size()); - CHECK(segments_[idx]); - - SegmentDescr* seg = segments_[idx]; - return seg->page_class(); -} /** * @@ -438,35 +407,35 @@ auto ExternalAllocator::FindPage(PageClass pc) -> Page* { DCHECK_NE(pc, PageClass::LARGE_P); SegmentDescr* seg = sq_[pc]; - if (seg) { - while (true) { - if (seg->HasFreePages()) { - return seg->FindPageSegment(); - } - - // remove head. - SegmentDescr* next = seg->Detach(); - sq_[pc] = next; - if (next == nullptr) { - break; - } - seg = next; + while (seg) { + if (seg->HasFreePages()) { + return seg->FindPageSegment(); } + + // remove head. + SegmentDescr* next = seg->Detach(); + sq_[pc] = next; + seg = next; } - if (!segm_intervals_.empty()) { + // no pages in the existing segments. Lets search in the extent tree. + auto op_range = extent_tree_.GetRange(kSegmentSize, kSegmentAlignment); + if (op_range) { + DCHECK_EQ(0u, op_range->first % kSegmentAlignment); + unsigned num_pages = NumPagesInSegment(pc); + size_t seg_idx = op_range->first / kSegmentAlignment; - auto it = segm_intervals_.begin(); - size_t seg_idx = it->first / kSegmentAlignment; - CHECK_LE(segments_.size(), seg_idx); + if (segments_.size() > seg_idx) { + DCHECK(segments_[seg_idx] == nullptr); + } else { + segments_.resize(seg_idx + 1); + } - segments_.resize(seg_idx + 1); void* ptr = mi_malloc_aligned(sizeof(SegmentDescr) + num_pages * sizeof(Page), kSegDescrAlignment); - SegmentDescr* seg = new (ptr) SegmentDescr(pc, it->first, num_pages); + SegmentDescr* seg = new (ptr) SegmentDescr(pc, op_range->first, num_pages); segments_[seg_idx] = seg; - segm_intervals_.erase(it); DCHECK(sq_[pc] == NULL); DCHECK(seg->next == seg->prev && seg == seg->next); @@ -478,12 +447,15 @@ auto ExternalAllocator::FindPage(PageClass pc) -> Page* { return nullptr; } -auto ExternalAllocator::FindLargePage(size_t size, size_t* segment_size) -> Page* { - LOG(FATAL) << "TBD"; - // size_t aligned_blocks = divup(size, 4_KB); - // size_t offset = GetLargeInterval(aligned_blocks); - // - return nullptr; +int64_t ExternalAllocator::LargeMalloc(size_t size) { + size_t align_sz = alignup(size, 4_KB); + auto op_range = extent_tree_.GetRange(align_sz, 4_KB); + if (!op_range) { + align_sz = max(align_sz, kSegmentSize); + return -int64_t(align_sz); + } + + return op_range->first; } void ExternalAllocator::FreePage(Page* page, SegmentDescr* owner, size_t block_size) { @@ -512,7 +484,7 @@ void ExternalAllocator::FreePage(Page* page, SegmentDescr* owner, size_t block_s sq->LinkBefore(owner); } } - --owner->pi_.used; + --owner->page_info_.used; } inline auto ExternalAllocator::ToSegDescr(Page* page) -> SegmentDescr* { diff --git a/src/core/external_alloc.h b/src/core/external_alloc.h index ce53bea..e4a381c 100644 --- a/src/core/external_alloc.h +++ b/src/core/external_alloc.h @@ -3,12 +3,13 @@ // #pragma once -#include - #include #include #include +#include "core/extent_tree.h" + + namespace dfly { constexpr inline unsigned long long operator""_MB(unsigned long long x) { @@ -34,19 +35,21 @@ constexpr inline unsigned long long operator""_KB(unsigned long long x) { namespace detail { class Page; -constexpr unsigned kNumFreePages = 29; +constexpr unsigned kNumFreePages = 25; /** - * pages classes can be SMALL, MEDIUM or LARGE. SMALL (2MB) for block sizes upto 256KB. - * MEDIUM (16MB) for block sizes 256KB-2MB. Anything else is LARGE. + * pages classes can be SMALL, MEDIUM or LARGE. SMALL (2MB) for block sizes upto 128KB. + * MEDIUM (16MB) for block sizes 128KB-1MB. Anything else is LARGE. * */ -enum PageClass : uint8_t { +enum PageClass : uint16_t { SMALL_P = 0, MEDIUM_P = 1, LARGE_P = 2, }; +PageClass ClassFromSize(size_t size); + } // namespace detail class ExternalAllocator { @@ -65,13 +68,12 @@ class ExternalAllocator { // For results >= 0 Returns offset to the backing storage where we may write the data of // size sz. int64_t Malloc(size_t sz); + void Free(size_t offset, size_t sz); - /// Adds backing storage to the allocator. - /// offset must be aligned to kExtAlignment boundaries. - /// It is expected that storage is added in a linear fashion, without skipping ranges. - /// So if [0, 256MB) is added, then next time [256MB, 512MB) is added etc. - void AddStorage(size_t offset, size_t size); + /// Adds backing storage to the allocator. The range should not overlap with already + /// added storage ranges. + void AddStorage(size_t start, size_t size); // Similar to mi_good_size, returns the size of the underlying block as if // were returned by Malloc. Guaranteed that the result not less than sz. @@ -86,9 +88,6 @@ class ExternalAllocator { return allocated_bytes_; } - // accessors useful for tests. - detail::PageClass PageClassFromOffset(size_t offset) const; - private: class SegmentDescr; using Page = detail::Page; @@ -97,7 +96,7 @@ class ExternalAllocator { // Returns NULL if no page is found. Page* FindPage(detail::PageClass sc); - Page* FindLargePage(size_t size, size_t* segment_size); + int64_t LargeMalloc(size_t size); SegmentDescr* GetNewSegment(detail::PageClass sc); void FreePage(Page* page, SegmentDescr* owner, size_t block_size); @@ -109,10 +108,7 @@ class ExternalAllocator { // A segment for each 256MB range. To get a segment id from the offset, shift right by 28. std::vector segments_; - // weird queue to support AddStorage interface. We can not instantiate segment - // until we know its class and that we know only when a page is demanded. - // sorted map of offset -> size. - absl::btree_map segm_intervals_; + ExtentTree extent_tree_; size_t capacity_ = 0; // in bytes. size_t allocated_bytes_ = 0; diff --git a/src/core/external_alloc_test.cc b/src/core/external_alloc_test.cc index ab00d74..583e776 100644 --- a/src/core/external_alloc_test.cc +++ b/src/core/external_alloc_test.cc @@ -87,18 +87,32 @@ TEST_F(ExternalAllocatorTest, Invariants) { } TEST_F(ExternalAllocatorTest, Classes) { + using detail::ClassFromSize; + ext_alloc_.AddStorage(0, kSegSize); - off_t offs1 = ext_alloc_.Malloc(256_KB); - EXPECT_EQ(detail::SMALL_P, ext_alloc_.PageClassFromOffset(offs1)); - off_t offs2 = ext_alloc_.Malloc(256_KB + 1); + ASSERT_EQ(detail::SMALL_P, ClassFromSize(128_KB)); + ASSERT_EQ(detail::MEDIUM_P, ClassFromSize(128_KB + 1)); + ASSERT_EQ(detail::LARGE_P, ClassFromSize(1_MB + 1)); + + off_t offs1 = ext_alloc_.Malloc(128_KB); + EXPECT_EQ(offs1, 0); + + off_t offs2 = ext_alloc_.Malloc(128_KB + 1); EXPECT_EQ(offs2, -kSegSize); ext_alloc_.AddStorage(kSegSize, kSegSize); - offs2 = ext_alloc_.Malloc(256_KB + 1); - EXPECT_EQ(detail::MEDIUM_P, ext_alloc_.PageClassFromOffset(offs2)); - off_t offs3 = ext_alloc_.Malloc(2_MB); - EXPECT_EQ(detail::MEDIUM_P, ext_alloc_.PageClassFromOffset(offs3)); - EXPECT_EQ(2_MB, ExternalAllocator::GoodSize(2_MB)); + offs2 = ext_alloc_.Malloc(128_KB + 1); + ASSERT_GT(offs2, 0); + offs2 = ext_alloc_.Malloc(1_MB); + ASSERT_GT(offs2, 0); + + off_t offs3 = ext_alloc_.Malloc(1_MB + 1); + ASSERT_LT(offs3, 0); + ext_alloc_.AddStorage(kSegSize * 2, kSegSize); + offs3 = ext_alloc_.Malloc(1_MB + 1); + ASSERT_GT(offs3, 0); + + EXPECT_EQ(1_MB + 4_KB, ExternalAllocator::GoodSize(1_MB + 1)); } } // namespace dfly \ No newline at end of file