提交 0d333c8a 编写于 作者: R Ryan M. Lefever 提交者: TensorFlower Gardener

Change 2/6 for making MSA repacking slice aware.

Fix a bug in which we over-allocate space for slices, when they are colocated with larger buffers.

The interaction causing this behavior is as follows:
A) GlobalDecreasingSizeBestFitHeap::FindChunkCandidates() adds additional space to the last chunk in a sliced allocation, to account for max_colocation_size.
B) When AlternateMemoryBestFitHeap::CheckPrefetchFit() computes slices_for_pending_chunks, it recomputes the size of the sliced allocation as the sum of the sizes of the chunks returned from A. Note, we do not recompute the size for the allocation in a non-sliced world.
C) Before committing a chunk, GlobalDecreasingSizeBestFitHeap::CommitChunk() changes the chunk's size to fit the size from B. Thus, in the sliced case we keep the extra max_colocation_size space, since we recalculated the allocation size with it. In the non-sliced case, we adjust the chunk size back to what is needed for the request.

So, this change is a no-op for non-slices.

PiperOrigin-RevId: 565217603
上级 4404175d
......@@ -1729,11 +1729,10 @@ GlobalDecreasingSizeBestFitHeap<BufferType>::FindChunkCandidates(
return {};
}
CHECK_EQ(chunks.size(), sliced_buffer_interval.num_slices() + 1);
// The extra chunk is for colocations, so merge the last two chunks.
Chunk last = chunks.back();
// The extra chunk is to ensure that colocations of larger sizes can fit.
// However, we don't need that extra space for the buffer for which we found
// chunks.
chunks.pop_back();
chunks.back() = Chunk::FromOffsetSize(chunks.back().offset,
chunks.back().size + last.size);
return chunks;
}
......@@ -1743,13 +1742,8 @@ void GlobalDecreasingSizeBestFitHeap<BufferType>::CommitChunk(
const GlobalDecreasingSizeBestFitHeap<BufferType>::BufferInterval&
buffer_interval,
GlobalDecreasingSizeBestFitHeap<BufferType>::Chunk chunk) {
// Update the maximum heap size according to the one determined by the chunk
// candidate. In case of colocations of different sizes, the chunk size
// returned is the maximum of all colocations, so use this value to update the
// heap size.
CHECK_EQ(chunk.size, buffer_interval.size);
result_.heap_size = result_.UpdatedHeapSize(chunk);
// Now, update the chunk size to the actual size of the buffer interval.
chunk.size = buffer_interval.size;
interval_tree_.Add(buffer_interval.start, buffer_interval.end, chunk);
for (auto colocation : GetTransitiveColocations(buffer_interval)) {
auto colocation_interval = buffer_intervals_[colocation];
......@@ -1757,9 +1751,10 @@ void GlobalDecreasingSizeBestFitHeap<BufferType>::CommitChunk(
// of the colocated interval in case the colocations are of different sizes.
Chunk colocation_chunk =
Chunk::FromOffsetSize(chunk.offset, colocation_interval.size);
AddToChunkMap(colocation, colocation_chunk);
result_.heap_size = result_.UpdatedHeapSize(colocation_chunk);
interval_tree_.Add(colocation_interval.start, colocation_interval.end,
colocation_chunk);
AddToChunkMap(colocation, colocation_chunk);
}
AddToChunkMap(buffer_interval.buffer, chunk);
......
......@@ -1067,42 +1067,7 @@ TEST_F(NoFragmentationStatsHeapTest, Mixed) {
EXPECT_EQ(40, heap.Finish().heap_size);
}
class GlobalDecreasingSizeBestFitHeapTest : public HeapAlgorithmTestBase {
protected:
class InheritedGlobalDecreasingSizeBestFitHeap
: public GlobalDecreasingSizeBestFitHeap<HloValue> {
public:
InheritedGlobalDecreasingSizeBestFitHeap()
: GlobalDecreasingSizeBestFitHeap(/*alignment=*/1) {}
// Finds a chunk candidate and returns the offset and the new heap size.
std::pair<int64_t, int64_t> FindChunkCandidate(
const HloValue* buffer, int64_t size, int64_t start, int64_t end,
int64_t preferred_offset = -1) {
buffer_interval_.buffer = buffer;
buffer_interval_.size = size;
buffer_interval_.start = start;
buffer_interval_.end = end;
chunk_candidate_ = GlobalDecreasingSizeBestFitHeap::FindChunkCandidate(
buffer_interval_, preferred_offset);
EXPECT_EQ(chunk_candidate_.size, size);
return {chunk_candidate_.offset,
result_.UpdatedHeapSize(chunk_candidate_)};
}
// Commits the previously found chunk candidate.
void CommitChunk() {
GlobalDecreasingSizeBestFitHeap::CommitChunk(buffer_interval_,
chunk_candidate_);
}
private:
BufferInterval buffer_interval_;
Chunk chunk_candidate_;
};
InheritedGlobalDecreasingSizeBestFitHeap heap_;
};
class GlobalDecreasingSizeBestFitHeapTest : public HeapAlgorithmTestBase {};
TEST_F(GlobalDecreasingSizeBestFitHeapTest, Empty) {
GlobalDecreasingSizeBestFitHeap<HloValue> heap(/*alignment=*/1);
......@@ -1403,7 +1368,107 @@ TEST_F(GlobalDecreasingSizeBestFitHeapTest, ColocatedDifferentSize2) {
EXPECT_EQ(0, result.chunk_map.at(buffer_c_).offset);
}
TEST_F(GlobalDecreasingSizeBestFitHeapTest, ChunkCandidate) {
class FindGlobalDecreasingSizeBestFitTest : public HeapAlgorithmTestBase {
protected:
class InheritedGlobalDecreasingSizeBestFitHeap
: public GlobalDecreasingSizeBestFitHeap<HloValue> {
public:
InheritedGlobalDecreasingSizeBestFitHeap()
: GlobalDecreasingSizeBestFitHeap(/*alignment=*/1) {}
// Makes a BufferInterval from the input specifications, find a chunk
// candidate for it, with the preferred_offset if > -1, and commit that
// chunk. Returns the offset and the new heap size.
std::pair<int64_t, int64_t> MakeFindAndCommit(
const HloValue* buffer, int64_t size, int64_t start, int64_t end,
int64_t preferred_offset = -1) {
// Make the BufferInterval.
MakeBufferInterval(buffer, size, start, end);
BufferInterval* buffer_interval = &GetBufferInterval(buffer);
// Find a chunk candidate.
Chunk chunk_candidate =
FindChunkCandidate(*buffer_interval, preferred_offset);
EXPECT_EQ(chunk_candidate.size, size);
std::pair<int64_t, int64_t> result = std::make_pair(
chunk_candidate.offset, result_.UpdatedHeapSize(chunk_candidate));
// Commit the chunk.
CommitChunk(*buffer_interval, chunk_candidate);
return result;
}
// Creates a BufferInterval from the inputs and adds it to
// buffer_intervals_.
void MakeBufferInterval(const HloValue* buffer, int64_t size, int64_t start,
int64_t end) {
BufferInterval* buffer_interval = &buffer_intervals_[buffer];
buffer_interval->buffer = buffer;
buffer_interval->size = size;
buffer_interval->start = start;
buffer_interval->end = end;
}
// Adds a colocation to buffer_intervals_[buffer] for colocation.
void AddColocationToBuffer(const HloValue* buffer,
const HloValue* colocation) {
CHECK(buffer_intervals_.contains(buffer));
buffer_intervals_[buffer].colocations.push_back(colocation);
}
// Returns buffer_intervals_[buffer]. The returned reference is invalidated
// if any elements are added or removed from buffer_intervals_, e.g., if
// MakeBufferInterval() is called.
BufferInterval& GetBufferInterval(const HloValue* buffer) {
CHECK(buffer_intervals_.contains(buffer));
return buffer_intervals_[buffer];
}
// Expose protected function.
std::vector<Chunk> FindChunkCandidates(
const SlicedBufferInterval& sliced_buffer_interval,
int64_t preferred_offset = -1) const {
return GlobalDecreasingSizeBestFitHeap<HloValue>::FindChunkCandidates(
sliced_buffer_interval, preferred_offset);
}
// Expose protected function.
void CommitChunk(const BufferInterval& buffer_interval, Chunk chunk) {
GlobalDecreasingSizeBestFitHeap<HloValue>::CommitChunk(buffer_interval,
chunk);
}
// Typically, only one chunk is allowed to be assigned to each buffer in
// HeapSimulator. That limitation is insufficient for slices. However, MSA
// is the only code that generates slices, and it gets around that
// limitation by making this method a no-op. For testing, we'll allow
// multiple chunks to be assigned to a buffer (as would be allowed in MSA).
void AddToChunkMap(const HloValue* buffer, Chunk chunk) override {
committed_[buffer].push_back(chunk);
}
const absl::flat_hash_map<const HloValue*, std::vector<Chunk>>& committed()
const {
return committed_;
}
int64_t heap_size() const { return result_.heap_size; }
private:
absl::flat_hash_map<const HloValue*, std::vector<Chunk>> committed_;
};
using BufferInterval =
InheritedGlobalDecreasingSizeBestFitHeap::BufferInterval;
using SlicedBufferInterval =
InheritedGlobalDecreasingSizeBestFitHeap::SlicedBufferInterval;
using Chunk = InheritedGlobalDecreasingSizeBestFitHeap::Chunk;
InheritedGlobalDecreasingSizeBestFitHeap heap_;
};
TEST_F(FindGlobalDecreasingSizeBestFitTest, ChunkCandidate) {
// space
// ^
// 35|
......@@ -1431,25 +1496,203 @@ TEST_F(GlobalDecreasingSizeBestFitHeapTest, ChunkCandidate) {
// -----------------------------------------> time
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13
using pair = std::pair<int64_t, int64_t>;
EXPECT_EQ(pair(5, 10), heap_.FindChunkCandidate(buffer_a_, 5, 6, 10, 5));
heap_.CommitChunk(); // offset: 5, size: 5, start: 6, end: 10
// offset: 5, size: 5, start: 6, end: 10
// Preferred offset 5 is returned.
EXPECT_EQ(pair(0, 10), heap_.FindChunkCandidate(buffer_b_, 10, 3, 5));
heap_.CommitChunk(); // offset: 0, size: 10, start: 3, end: 5
EXPECT_EQ(pair(10, 15), heap_.FindChunkCandidate(buffer_c_, 5, 2, 8));
heap_.CommitChunk(); // offset: 10, size: 5, start: 2, end: 8
EXPECT_EQ(pair(0, 15), heap_.FindChunkCandidate(buffer_d_, 5, 0, 2, 10));
heap_.CommitChunk(); // offset: 0, size: 5, start: 0, end: 2
EXPECT_EQ(pair(5, 10), heap_.MakeFindAndCommit(buffer_a_, 5, 6, 10, 5));
// offset: 0, size: 10, start: 3, end: 5
EXPECT_EQ(pair(0, 10), heap_.MakeFindAndCommit(buffer_b_, 10, 3, 5));
// offset: 10, size: 5, start: 2, end: 8
EXPECT_EQ(pair(10, 15), heap_.MakeFindAndCommit(buffer_c_, 5, 2, 8));
// offset: 0, size: 5, start: 0, end: 2
// Preferred offset 10 could not be given because it is occupied.
EXPECT_EQ(pair(10, 20), heap_.FindChunkCandidate(buffer_e_, 10, 11, 13, 10));
heap_.CommitChunk(); // offset: 10, size: 10, start: 11, end: 13
EXPECT_EQ(pair(0, 15), heap_.MakeFindAndCommit(buffer_d_, 5, 0, 2, 10));
// offset: 10, size: 10, start: 11, end: 13
// Preferred offset 10 is returned.
EXPECT_EQ(pair(20, 25), heap_.FindChunkCandidate(buffer_f_, 5, 3, 5, 20));
heap_.CommitChunk(); // offset: 20, size: 5, start: 3, end: 5
EXPECT_EQ(pair(10, 20), heap_.MakeFindAndCommit(buffer_e_, 10, 11, 13, 10));
// offset: 20, size: 5, start: 3, end: 5
// Preferred offset 20 is returned.
EXPECT_EQ(pair(25, 35), heap_.FindChunkCandidate(buffer_g_, 10, 4, 8, 15));
heap_.CommitChunk(); // offset: 25, size: 10, start: 4, end: 8
EXPECT_EQ(pair(20, 25), heap_.MakeFindAndCommit(buffer_f_, 5, 3, 5, 20));
// offset: 25, size: 10, start: 4, end: 8
// Preferred offset 15 could not be given because it is occupied.
EXPECT_EQ(pair(25, 35), heap_.MakeFindAndCommit(buffer_g_, 10, 4, 8, 15));
}
TEST_F(FindGlobalDecreasingSizeBestFitTest, FindChunkCandidates) {
// space
// ^
// |
// 30 - +----+
// | | |
// - +---------+ +---+| E |
// | | | | || |
// 20 - | | | F |+----+
// | | C | | || |
// - | | +---++ |
// | | | | B |
// 10 - +----+----+----+ +---------+
// | | |
// - | A | +---------+
// | | | | D |
// +----|----|----|----|----|----|----|----> time
// 10 20 30
// Place and commit A.
{ // Force sliced buffers to go out of scope before they are invalidated by
// calls to MakeBufferInterval.
heap_.MakeBufferInterval(buffer_a_, 10, 5, 15);
auto sliced_buffer_a = SlicedBufferInterval::CreateMutableInterval(
heap_.GetBufferInterval(buffer_a_));
auto chunks = heap_.FindChunkCandidates(sliced_buffer_a);
EXPECT_THAT(chunks, ::testing::ElementsAre(Chunk::FromOffsetSize(0, 10)));
heap_.CommitChunk(sliced_buffer_a.full_buffer_interval(),
Chunk::FromOffsetSize(0, 10));
EXPECT_THAT(
heap_.committed(),
::testing::UnorderedElementsAre(::testing::Pair(
buffer_a_, ::testing::ElementsAre(Chunk::FromOffsetSize(0, 10)))));
EXPECT_EQ(heap_.heap_size(), 10);
}
// Colocate B and C.
{ // Force sliced buffers to go out of scope before they are invalidated by
// calls to MakeBufferInterval.
heap_.MakeBufferInterval(buffer_b_, 10, 25, 35);
heap_.MakeBufferInterval(buffer_c_, 15, 10, 20);
// Note, HeapSimulator uses GetTransitiveColocations(), so we can colocate
// b with c, without doing the reverse.
heap_.AddColocationToBuffer(buffer_b_, buffer_c_);
auto sliced_buffer_b = SlicedBufferInterval::CreateMutableInterval(
heap_.GetBufferInterval(buffer_b_));
auto sliced_buffer_c = SlicedBufferInterval::CreateMutableInterval(
heap_.GetBufferInterval(buffer_c_));
// // Slice B.
sliced_buffer_b.Slice({5, 5});
sliced_buffer_b.UpdateSliceStartTimes({25, 30});
// Place and commit B (and C transitively via colocation). B should be
// placed at an offset that accommodates C; however, it should not have the
// size of C.
auto chunks = heap_.FindChunkCandidates(sliced_buffer_b);
EXPECT_THAT(chunks, ::testing::ElementsAre(Chunk::FromOffsetSize(10, 5),
Chunk::FromOffsetSize(15, 5)));
// In today's code, MSA would massage the SlicedBufferInterval and returned
// chunks before calling CommitChunks. We hard-code simulations of those
// changes here.
//
// We turn:
// +----+ +----+
// | | | |
// +----+----+ => +----+ |
// | | | | |
// +---------+ +----+----+
heap_.CommitChunk(BufferInterval{buffer_b_, 5, 25, 30, /*colocations=*/{},
/*need_allocation=*/true},
Chunk::FromOffsetSize(10, 5));
heap_.CommitChunk(
BufferInterval{buffer_b_, 10, 30, 35, /*colocations=*/{buffer_c_},
/*need_allocation=*/true},
Chunk::FromOffsetSize(10, 10));
EXPECT_THAT(
heap_.committed(),
::testing::UnorderedElementsAre(
::testing::Pair(buffer_a_, ::testing::ElementsAre(
Chunk::FromOffsetSize(0, 10))),
::testing::Pair(buffer_b_, ::testing::ElementsAre(
Chunk::FromOffsetSize(10, 5),
Chunk::FromOffsetSize(10, 10))),
::testing::Pair(buffer_c_, ::testing::ElementsAre(
Chunk::FromOffsetSize(10, 15)))));
EXPECT_EQ(heap_.heap_size(), 25);
}
// Place and commit D.
{ // Force sliced buffers to go out of scope before they are invalidated by
// calls to MakeBufferInterval.
heap_.MakeBufferInterval(buffer_d_, 5, 25, 35);
auto sliced_buffer_d = SlicedBufferInterval::CreateMutableInterval(
heap_.GetBufferInterval(buffer_d_));
auto chunks = heap_.FindChunkCandidates(sliced_buffer_d);
EXPECT_THAT(chunks, ::testing::ElementsAre(Chunk::FromOffsetSize(0, 5)));
heap_.CommitChunk(sliced_buffer_d.full_buffer_interval(),
Chunk::FromOffsetSize(0, 5));
EXPECT_THAT(
heap_.committed(),
::testing::UnorderedElementsAre(
::testing::Pair(buffer_a_, ::testing::ElementsAre(
Chunk::FromOffsetSize(0, 10))),
::testing::Pair(buffer_b_, ::testing::ElementsAre(
Chunk::FromOffsetSize(10, 5),
Chunk::FromOffsetSize(10, 10))),
::testing::Pair(buffer_c_, ::testing::ElementsAre(
Chunk::FromOffsetSize(10, 15))),
::testing::Pair(buffer_d_, ::testing::ElementsAre(
Chunk::FromOffsetSize(0, 5)))));
EXPECT_EQ(heap_.heap_size(), 25);
}
// Place and commit E. It should fit just on top of B.
{ // Force sliced buffers to go out of scope before they are invalidated by
// calls to MakeBufferInterval.
heap_.MakeBufferInterval(buffer_e_, 10, 30, 35);
auto sliced_buffer_e = SlicedBufferInterval::CreateMutableInterval(
heap_.GetBufferInterval(buffer_e_));
auto chunks = heap_.FindChunkCandidates(sliced_buffer_e);
EXPECT_THAT(chunks, ::testing::ElementsAre(Chunk::FromOffsetSize(20, 10)));
heap_.CommitChunk(sliced_buffer_e.full_buffer_interval(),
Chunk::FromOffsetSize(20, 10));
EXPECT_THAT(
heap_.committed(),
::testing::UnorderedElementsAre(
::testing::Pair(buffer_a_, ::testing::ElementsAre(
Chunk::FromOffsetSize(0, 10))),
::testing::Pair(buffer_b_, ::testing::ElementsAre(
Chunk::FromOffsetSize(10, 5),
Chunk::FromOffsetSize(10, 10))),
::testing::Pair(buffer_c_, ::testing::ElementsAre(
Chunk::FromOffsetSize(10, 15))),
::testing::Pair(
buffer_d_, ::testing::ElementsAre(Chunk::FromOffsetSize(0, 5))),
::testing::Pair(buffer_e_, ::testing::ElementsAre(
Chunk::FromOffsetSize(20, 10)))));
EXPECT_EQ(heap_.heap_size(), 30);
}
// Place and commit F. It should fit on top of B's first slice.
{ // Force sliced buffers to go out of scope before they are invalidated by
// calls to MakeBufferInterval.
heap_.MakeBufferInterval(buffer_f_, 10, 25, 29);
auto sliced_buffer_f = SlicedBufferInterval::CreateMutableInterval(
heap_.GetBufferInterval(buffer_f_));
auto chunks = heap_.FindChunkCandidates(sliced_buffer_f);
EXPECT_THAT(chunks, ::testing::ElementsAre(Chunk::FromOffsetSize(15, 10)));
heap_.CommitChunk(sliced_buffer_f.full_buffer_interval(),
Chunk::FromOffsetSize(15, 10));
EXPECT_THAT(
heap_.committed(),
::testing::UnorderedElementsAre(
::testing::Pair(buffer_a_, ::testing::ElementsAre(
Chunk::FromOffsetSize(0, 10))),
::testing::Pair(buffer_b_, ::testing::ElementsAre(
Chunk::FromOffsetSize(10, 5),
Chunk::FromOffsetSize(10, 10))),
::testing::Pair(buffer_c_, ::testing::ElementsAre(
Chunk::FromOffsetSize(10, 15))),
::testing::Pair(
buffer_d_, ::testing::ElementsAre(Chunk::FromOffsetSize(0, 5))),
::testing::Pair(buffer_e_, ::testing::ElementsAre(
Chunk::FromOffsetSize(20, 10))),
::testing::Pair(buffer_f_, ::testing::ElementsAre(
Chunk::FromOffsetSize(15, 10)))));
EXPECT_EQ(heap_.heap_size(), 30);
}
}
class ConstrainedGlobalDecreasingSizeBestFitHeapTest
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册