From 4aa931dd858264f748a84a1440cc4fc486eeabe6 Mon Sep 17 00:00:00 2001 From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com> Date: Mon, 27 May 2019 10:31:42 +0800 Subject: [PATCH] Code clean of Allocator (#17602) * Revert "Revert "Fix allocator bug"" This reverts commit 174d0d0b90a610807d6f82927aad4def227ee643. * Revert "fix travis ci" This reverts commit 5656fa9f7ca278aff7319485c0d289a4ffc2f9d0. test=develop * add inlined_vector.h, test=develop * add inlined_vector_test,test=develop * clean code of allocator,test=develop * delete zero_size_allocator.h,test=develop * fix failed unittest,test=develop --- paddle/fluid/framework/lod_tensor_test.cc | 2 + .../api/analysis_predictor_tester.cc | 2 +- paddle/fluid/memory/allocation/CMakeLists.txt | 3 +- .../allocation/allocation_with_underlying.h | 33 ---------- paddle/fluid/memory/allocation/allocator.h | 13 +++- .../memory/allocation/allocator_facade.cc | 61 ++++++++++++++----- .../memory/allocation/buffered_allocator.cc | 1 - .../memory/allocation/locked_allocator.cc | 1 - .../memory/allocation/retry_allocator.cc | 1 - .../memory/allocation/zero_size_allocator.cc | 43 ------------- .../memory/allocation/zero_size_allocator.h | 46 -------------- paddle/fluid/memory/memcpy.cc | 13 ++++ paddle/fluid/operators/batch_norm_op.cc | 1 + paddle/fluid/operators/fake_quantize_op.cu | 4 +- 14 files changed, 75 insertions(+), 149 deletions(-) delete mode 100644 paddle/fluid/memory/allocation/allocation_with_underlying.h delete mode 100644 paddle/fluid/memory/allocation/zero_size_allocator.cc delete mode 100644 paddle/fluid/memory/allocation/zero_size_allocator.h diff --git a/paddle/fluid/framework/lod_tensor_test.cc b/paddle/fluid/framework/lod_tensor_test.cc index 15928c18d38..d1554113bc3 100644 --- a/paddle/fluid/framework/lod_tensor_test.cc +++ b/paddle/fluid/framework/lod_tensor_test.cc @@ -28,12 +28,14 @@ namespace framework { TEST(LoD, PrintLoDTensor) { LoDTensor tensor1; + tensor1.Resize({2}); tensor1.mutable_data(platform::CPUPlace()); tensor1.data()[0] = 0.2; tensor1.data()[1] = 0.5; LOG(INFO) << tensor1; LoDTensor tensor2; + tensor2.Resize({2}); tensor2.mutable_data(platform::CPUPlace()); tensor2.data()[0] = 1; tensor2.data()[1] = 2; diff --git a/paddle/fluid/inference/api/analysis_predictor_tester.cc b/paddle/fluid/inference/api/analysis_predictor_tester.cc index 6bc892638c2..6f57f8b2028 100644 --- a/paddle/fluid/inference/api/analysis_predictor_tester.cc +++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc @@ -384,7 +384,7 @@ TEST_F(MkldnnQuantizerTest, histogram_empty) { // zero tensor framework::LoDTensor var_tensor; var_tensor.Resize({0}); - ASSERT_TRUE(var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(platform::CPUPlace()); ASSERT_THROW(Histogram(var_tensor, -1, 1, 1), platform::EnforceNotMet); } diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index 4c4ae72effa..c309febd499 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -4,7 +4,6 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator) cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator) cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator) cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler) -cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator) cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator) if (WITH_GPU) @@ -38,7 +37,7 @@ else () set(AllocatorFacadeDeps) endif() -list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator) +list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator) cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator) cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator) diff --git a/paddle/fluid/memory/allocation/allocation_with_underlying.h b/paddle/fluid/memory/allocation/allocation_with_underlying.h deleted file mode 100644 index 69f78667d7d..00000000000 --- a/paddle/fluid/memory/allocation/allocation_with_underlying.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/fluid/memory/allocation/allocator.h" - -namespace paddle { -namespace memory { -namespace allocation { - -class AllocationWithUnderlying : public Allocation { - public: - explicit AllocationWithUnderlying(AllocationPtr allocation) - : Allocation(allocation->ptr(), allocation->size(), allocation->place()), - allocation_(std::move(allocation)) {} - AllocationPtr allocation_; -}; - -} // namespace allocation -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h index 5acdd9d0fcb..1fcd9361805 100644 --- a/paddle/fluid/memory/allocation/allocator.h +++ b/paddle/fluid/memory/allocation/allocator.h @@ -52,7 +52,7 @@ class Allocator; * decorate a RetryAllocator to any allocator to perform allocation retry when * first allocation request fails. * - * Explanations of Allocator design is as follows: + * Explanations of Allocator design are as follows: * * Suppose we have an allocator which is decorated by several allocators: * @@ -127,8 +127,15 @@ class Allocation { size_t size_; platform::Place place_; - // NOTE(zjl): Since decorated_allocators_ is usually a small vector - // We reserve a small buffer to it to prevent frequent heap allocation + /** + * NOTE(zjl): Since decorated_allocators_ is usually a small vector. + * We reserve a small buffer to it to prevent frequent heap allocation + * + * Instead, we can use a std::vector here, and reserve + * kReserveAllocatorNum in constructor of Allocation. + * But using std::vector would make ocr recognition model + * fail in CE. The train duration is 8% slower than KPI. + */ static constexpr size_t kReserveAllocatorNum = 8; using DecoratedAllocatorStack = framework::InlinedVector; diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 09328aded58..1ff719c9e72 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -29,7 +29,6 @@ #include "paddle/fluid/memory/allocation/legacy_allocator.h" #include "paddle/fluid/memory/allocation/locked_allocator.h" #include "paddle/fluid/memory/allocation/retry_allocator.h" -#include "paddle/fluid/memory/allocation/zero_size_allocator.h" #include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" @@ -192,10 +191,6 @@ class CUDAPinnedChunkedAllocator : public ChunkedAllocator { class AllocatorFacadePrivate { public: - std::map> allocators_; - - ~AllocatorFacadePrivate() = default; - AllocatorFacadePrivate() { auto strategy = GetAllocatorStrategy(); switch (strategy) { @@ -207,7 +202,6 @@ class AllocatorFacadePrivate { InitCPUAllocator(); InitCUDAAllocator(); InitCUDAPinnedAllocator(); - WrapZeroSizeAllocator(); break; } default: { @@ -215,6 +209,18 @@ class AllocatorFacadePrivate { static_cast(strategy)); } } + InitZeroSizeAllocators(); + } + + inline const std::shared_ptr& GetAllocator( + const platform::Place& place, size_t size) { + const auto& allocators = (size > 0 ? allocators_ : zero_size_allocators_); + auto iter = allocators.find(place); + if (iter == allocators.end()) { + throw BadAlloc( + string::Sprintf("No such allocator for the place, %s", place)); + } + return iter->second; } private: @@ -252,12 +258,40 @@ class AllocatorFacadePrivate { #endif } - void WrapZeroSizeAllocator() { - for (auto& pair : allocators_) { - pair.second = - std::make_shared(pair.second, pair.first); + class ZeroSizeAllocator : public Allocator { + public: + explicit ZeroSizeAllocator(platform::Place place) : place_(place) {} + + protected: + Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override { + return new Allocation(nullptr, 0, place_); + } + + void FreeImpl(Allocation* allocation) override { delete allocation; } + + private: + platform::Place place_; + }; + + void InitZeroSizeAllocators() { + std::vector places; + places.emplace_back(platform::CPUPlace()); +#ifdef PADDLE_WITH_CUDA + int device_count = platform::GetCUDADeviceCount(); + for (int dev_id = 0; dev_id < device_count; ++dev_id) { + places.emplace_back(platform::CUDAPlace(dev_id)); + } + places.emplace_back(platform::CUDAPinnedPlace()); +#endif + + for (auto& p : places) { + zero_size_allocators_[p] = std::make_shared(p); } } + + private: + std::map> allocators_; + std::map> zero_size_allocators_; }; // Pimpl. Make interface clean. @@ -276,12 +310,7 @@ std::shared_ptr AllocatorFacade::AllocShared( AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, size_t size, Allocator::Attr attr) { - auto it = m_->allocators_.find(place); - if (it == m_->allocators_.end()) { - throw BadAlloc( - string::Sprintf("No such allocator for the place, %s", place)); - } - return m_->allocators_.at(place)->Allocate(size, attr); + return m_->GetAllocator(place, size)->Allocate(size, attr); } } // namespace allocation diff --git a/paddle/fluid/memory/allocation/buffered_allocator.cc b/paddle/fluid/memory/allocation/buffered_allocator.cc index e04c0aa34b1..2f3e6205c3c 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator.cc @@ -16,7 +16,6 @@ #include #include #include -#include "paddle/fluid/memory/allocation/allocation_with_underlying.h" namespace paddle { namespace memory { diff --git a/paddle/fluid/memory/allocation/locked_allocator.cc b/paddle/fluid/memory/allocation/locked_allocator.cc index c43099cc88f..e9ec39c8932 100644 --- a/paddle/fluid/memory/allocation/locked_allocator.cc +++ b/paddle/fluid/memory/allocation/locked_allocator.cc @@ -15,7 +15,6 @@ #include "paddle/fluid/memory/allocation/locked_allocator.h" #include // NOLINT #include -#include "paddle/fluid/memory/allocation/allocation_with_underlying.h" #include "paddle/fluid/platform/lock_guard_ptr.h" namespace paddle { diff --git a/paddle/fluid/memory/allocation/retry_allocator.cc b/paddle/fluid/memory/allocation/retry_allocator.cc index 7e888988f96..167dd923dbb 100644 --- a/paddle/fluid/memory/allocation/retry_allocator.cc +++ b/paddle/fluid/memory/allocation/retry_allocator.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "paddle/fluid/memory/allocation/retry_allocator.h" -#include "paddle/fluid/memory/allocation/allocation_with_underlying.h" namespace paddle { namespace memory { namespace allocation { diff --git a/paddle/fluid/memory/allocation/zero_size_allocator.cc b/paddle/fluid/memory/allocation/zero_size_allocator.cc deleted file mode 100644 index 39743bcb10c..00000000000 --- a/paddle/fluid/memory/allocation/zero_size_allocator.cc +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/memory/allocation/zero_size_allocator.h" - -namespace paddle { -namespace memory { -namespace allocation { - -bool ZeroSizeAllocator::IsAllocThreadSafe() const { - return underlying_allocator_->IsAllocThreadSafe(); -} - -Allocation *ZeroSizeAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { - if (size == 0) { - return new Allocation(nullptr, 0, place_); - } else { - return underlying_allocator_->Allocate(size, attr).release(); - } -} - -void ZeroSizeAllocator::FreeImpl(Allocation *allocation) { - if (allocation->size() == 0) { - delete allocation; - } else { - underlying_allocator_->Free(allocation); - } -} - -} // namespace allocation -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/allocation/zero_size_allocator.h b/paddle/fluid/memory/allocation/zero_size_allocator.h deleted file mode 100644 index 08a7a06dbf2..00000000000 --- a/paddle/fluid/memory/allocation/zero_size_allocator.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include -#include "paddle/fluid/memory/allocation/allocator.h" - -namespace paddle { -namespace memory { -namespace allocation { - -// The allocator handles the request's size is zero. Allocator will always -// return an allocation even the request size is zero. However, the -// allocation.ptr() is nullptr -class ZeroSizeAllocator : public Allocator { - public: - ZeroSizeAllocator(std::shared_ptr underlying_allocator, - const platform::Place& p) - : underlying_allocator_(std::move(underlying_allocator)), place_(p) {} - - bool IsAllocThreadSafe() const override; - - protected: - Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; - void FreeImpl(Allocation* allocation) override; - - private: - std::shared_ptr underlying_allocator_; - const platform::Place& place_; -}; - -} // namespace allocation -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/memcpy.cc b/paddle/fluid/memory/memcpy.cc index 1408163e4b5..c08d86eb213 100644 --- a/paddle/fluid/memory/memcpy.cc +++ b/paddle/fluid/memory/memcpy.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/memory/memcpy.h" #include // for memcpy +#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { @@ -24,6 +25,7 @@ template <> void Copy(platform::CPUPlace, void* dst, platform::CPUPlace, const void* src, size_t num) { + if (UNLIKELY(num == 0)) return; std::memcpy(dst, src, num); } @@ -40,6 +42,7 @@ template <> void Copy( platform::CPUPlace dst_place, void* dst, platform::CUDAPlace src_place, const void* src, size_t num, cudaStream_t stream) { + if (UNLIKELY(num == 0)) return; platform::SetDeviceId(src_place.device); if (stream) { @@ -59,6 +62,8 @@ template <> void Copy( platform::CUDAPlace dst_place, void* dst, platform::CPUPlace src_place, const void* src, size_t num, cudaStream_t stream) { + if (UNLIKELY(num == 0)) return; + platform::SetDeviceId(dst_place.device); if (stream) { platform::RecordEvent record_event("GpuMemcpyAsync:CPU->GPU"); @@ -77,6 +82,8 @@ template <> void Copy( platform::CUDAPlace dst_place, void* dst, platform::CUDAPlace src_place, const void* src, size_t num, cudaStream_t stream) { + if (UNLIKELY(num == 0)) return; + if (dst_place == src_place) { platform::SetDeviceId(src_place.device); if (stream) { @@ -103,6 +110,7 @@ template <> void Copy( platform::CPUPlace dst_place, void* dst, platform::CUDAPinnedPlace src_place, const void* src, size_t num) { + if (UNLIKELY(num == 0)) return; std::memcpy(dst, src, num); } @@ -110,6 +118,7 @@ template <> void Copy( platform::CUDAPinnedPlace dst_place, void* dst, platform::CPUPlace src_place, const void* src, size_t num) { + if (UNLIKELY(num == 0)) return; std::memcpy(dst, src, num); } @@ -117,6 +126,7 @@ template <> void Copy( platform::CUDAPinnedPlace dst_place, void* dst, platform::CUDAPinnedPlace src_place, const void* src, size_t num) { + if (UNLIKELY(num == 0)) return; std::memcpy(dst, src, num); } @@ -125,6 +135,7 @@ void Copy( platform::CUDAPinnedPlace dst_place, void* dst, platform::CUDAPlace src_place, const void* src, size_t num, cudaStream_t stream) { + if (UNLIKELY(num == 0)) return; platform::SetDeviceId(src_place.device); if (stream) { platform::RecordEvent record_event("GpuMemcpyAsync:GPU->CUDAPinned"); @@ -140,6 +151,8 @@ void Copy( platform::CUDAPlace dst_place, void* dst, platform::CUDAPinnedPlace src_place, const void* src, size_t num, cudaStream_t stream) { + if (UNLIKELY(num == 0)) return; + platform::SetDeviceId(dst_place.device); if (stream) { platform::RecordEvent record_event("GpuMemcpyAsync:CUDAPinned->GPU"); diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index d583909a666..f6295337d1f 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -454,6 +454,7 @@ class BatchNormGradKernel const auto *running_mean = ctx.Input("Mean"); const auto *running_variance = ctx.Input("Variance"); mean_data = running_mean->data(); + inv_var_tensor.Resize({C}); T *running_inv_var_data = inv_var_tensor.mutable_data(ctx.GetPlace()); EigenVectorArrayMap inv_var_tmp(running_inv_var_data, C); ConstEigenVectorArrayMap var_arr(running_variance->data(), C); diff --git a/paddle/fluid/operators/fake_quantize_op.cu b/paddle/fluid/operators/fake_quantize_op.cu index 3d24e8986df..e9a7201bc08 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu +++ b/paddle/fluid/operators/fake_quantize_op.cu @@ -264,8 +264,8 @@ struct FindRangeAbsMaxFunctor { T* out_scale_data = out_scale->mutable_data(gpu_place); framework::Tensor need_find_max, out_size; - int* find_max = need_find_max.mutable_data(gpu_place); - int* out_size_data = out_size.mutable_data(gpu_place); + int* find_max = need_find_max.mutable_data({1}, gpu_place); + int* out_size_data = out_size.mutable_data({1}, gpu_place); FindRangeAbsMaxAndFillArray<<<1, 1, 0, ctx.stream()>>>( cur_scale.data(), last_scale.data(), iter.data(), -- GitLab