diff --git a/cmake/generic.cmake b/cmake/generic.cmake index e8bc285bdc95e213b9da2ee388078349a46d2798..c4c9f77df8d57fe162616d2250bd4dfe5b7754e7 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -244,11 +244,11 @@ function(cc_test TARGET_NAME) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_executable(${TARGET_NAME} ${cc_test_SRCS}) # Support linking flags: --whole-archive (Linux) / -force_load (MacOS) - target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog) + target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog) if("${cc_test_DEPS}" MATCHES "ARCHIVE_START") list(REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END) endif() - add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog) + add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog) add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} ${cc_test_ARGS} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) @@ -311,8 +311,8 @@ function(nv_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) - target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog) - add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog) + target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog) + add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog) add_test(${TARGET_NAME} ${TARGET_NAME}) endif() endfunction(nv_test) @@ -387,8 +387,8 @@ function(hip_test TARGET_NAME) endif() add_executable(${TARGET_NAME} ${_cmake_options} ${_generated_files} ${_sources}) set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP) - target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main paddle_memory gtest gflags) - add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main paddle_memory gtest gflags) + target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags) + add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags) add_test(${TARGET_NAME} ${TARGET_NAME}) endif() endfunction(hip_test) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index a473ed7400012b7d0cbc5ab9bed263b3cca8c6ec..3840bbe83b68dc2a49aa73feb57a80e9992cad5f 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -7,9 +7,9 @@ cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) if(WITH_GPU) - nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS ddim place paddle_memory device_context framework_proto) + nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS ddim place memory device_context framework_proto) else() - cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS ddim place paddle_memory device_context framework_proto) + cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS ddim place memory device_context framework_proto) endif() cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) @@ -21,9 +21,9 @@ endif() cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) -nv_test(mixed_vector_test SRCS mixed_vector_test.cu DEPS place paddle_memory device_context init) +nv_test(mixed_vector_test SRCS mixed_vector_test.cu DEPS place memory device_context init) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio) -cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor paddle_memory) +cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory) nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor init) cc_library(reader SRCS reader.cc DEPS lod_tensor ddim) diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index 17e38b1cf042657834b4d0d1c12cbbb92f19fa45..194df3e4a8b50700e2be01ce5ebca83b92501fb8 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -15,7 +15,6 @@ limitations under the License. */ #include "paddle/fluid/framework/scope.h" #include // for unique_ptr -#include // for call_once #include #include "glog/logging.h" #include "paddle/fluid/framework/threadpool.h" @@ -39,6 +38,7 @@ Scope::~Scope() { } Scope& Scope::NewScope() const { + std::unique_lock lock(mutex_); kids_.push_back(new Scope(this)); return *kids_.back(); } @@ -92,6 +92,7 @@ std::vector Scope::LocalVarNames() const { } void Scope::DeleteScope(Scope* scope) { + std::unique_lock lock(mutex_); auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope); this->kids_.erase(it); @@ -103,7 +104,7 @@ void Scope::DeleteScope(Scope* scope) { } } -void Scope::EraseVars(std::vector& var_names) { +void Scope::EraseVars(const std::vector& var_names) { std::set var_set(var_names.begin(), var_names.end()); for (auto it = vars_.begin(); it != vars_.end();) { if (var_set.find(it->first) != var_set.end()) { diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index c1e1f49caaa5a60df0e97289aada465b45213971..97a15c71773051dfc01c98f11cf9cb76adbcec7f 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include +#include // NOLINT #include #include #include @@ -51,7 +52,7 @@ class Scope { /// Create a variable with a scope-unique name. Variable* Var(std::string* name = nullptr); - void EraseVars(std::vector& var_names); + void EraseVars(const std::vector& var_names); /// Find a variable in the scope or any of its ancestors. Returns /// nullptr if cannot find. @@ -88,6 +89,9 @@ class Scope { Scope const* parent_{nullptr}; DISABLE_COPY_AND_ASSIGN(Scope); + + private: + mutable std::mutex mutex_; }; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index aff427310f15be72f5c8d0fa1537ffa6bbe2881d..f417f62f3f75360f4ae1b7795608ae95200cfeb8 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -1,4 +1,4 @@ -set(FLUID_CORE_MODULES proto_desc paddle_memory lod_tensor executor prune init) +set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor prune init) cc_library(paddle_fluid_api SRCS io.cc diff --git a/paddle/fluid/memory/CMakeLists.txt b/paddle/fluid/memory/CMakeLists.txt index 8b3043af7a18787a08583d47b76da679ccb63740..709fc7e12e1db537ceece30c405c0e8a2582e8ca 100644 --- a/paddle/fluid/memory/CMakeLists.txt +++ b/paddle/fluid/memory/CMakeLists.txt @@ -1,20 +1,15 @@ add_subdirectory(detail) -cc_library(memory SRCS memory.cc DEPS place enforce) +cc_library(malloc SRCS malloc.cc DEPS buddy_allocator place enforce) cc_library(memcpy SRCS memcpy.cc DEPS place) -cc_library(paddle_memory +cc_library(memory DEPS - memory - memcpy - meta_data - meta_cache - memory_block - buddy_allocator - system_allocator) + malloc + memcpy) -cc_test(memory_test SRCS memory_test.cc DEPS place paddle_memory) +cc_test(malloc_test SRCS malloc_test.cc DEPS malloc) #if (WITH_GPU) -# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place paddle_memory) +# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory) #endif() diff --git a/paddle/fluid/memory/detail/CMakeLists.txt b/paddle/fluid/memory/detail/CMakeLists.txt index b9c3fc31c1523abf3acbd116745bbf1596454aac..c725dba5e98c200c2542d97cb8f53a938f6b614a 100644 --- a/paddle/fluid/memory/detail/CMakeLists.txt +++ b/paddle/fluid/memory/detail/CMakeLists.txt @@ -1,3 +1,5 @@ +cc_library(memory_block SRCS memory_block.cc memory_block_desc.cc meta_cache.cc) + if(${WITH_GPU}) nv_library(system_allocator SRCS system_allocator.cc DEPS gflags cpu_info gpu_info) else(${WITH_GPU}) @@ -6,10 +8,4 @@ endif(${WITH_GPU}) cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator) -cc_library(meta_data SRCS meta_data.cc) - -cc_library(meta_cache SRCS meta_cache.cc) - -cc_library(memory_block SRCS memory_block.cc) - -cc_library(buddy_allocator SRCS buddy_allocator.cc DEPS glog) +cc_library(buddy_allocator SRCS buddy_allocator.cc DEPS memory_block system_allocator glog) diff --git a/paddle/fluid/memory/detail/buddy_allocator.cc b/paddle/fluid/memory/detail/buddy_allocator.cc index 876837838648d6733b104a5496454f5dc58bbb71..4194ba197948b47003863196efdac1c08a7ae4f6 100644 --- a/paddle/fluid/memory/detail/buddy_allocator.cc +++ b/paddle/fluid/memory/detail/buddy_allocator.cc @@ -46,7 +46,8 @@ inline size_t align(size_t size, size_t alignment) { void* BuddyAllocator::Alloc(size_t unaligned_size) { // adjust allocation alignment - size_t size = align(unaligned_size + sizeof(Metadata), min_chunk_size_); + size_t size = + align(unaligned_size + sizeof(MemoryBlock::Desc), min_chunk_size_); // acquire the allocator lock std::lock_guard lock(mutex_); @@ -103,7 +104,7 @@ void BuddyAllocator::Free(void* p) { return; } - block->mark_as_free(cache_); + block->mark_as_free(&cache_); total_used_ -= block->total_size(cache_); total_free_ += block->total_size(cache_); @@ -122,7 +123,7 @@ void BuddyAllocator::Free(void* p) { right_buddy)); // merge its right buddy to the block - block->merge(cache_, right_buddy); + block->merge(&cache_, right_buddy); } } @@ -139,7 +140,7 @@ void BuddyAllocator::Free(void* p) { left_buddy->total_size(cache_), left_buddy)); // merge the block to its left buddy - left_buddy->merge(cache_, block); + left_buddy->merge(&cache_, block); block = left_buddy; } } @@ -163,13 +164,13 @@ size_t BuddyAllocator::Used() { return total_used_; } void* BuddyAllocator::SystemAlloc(size_t size) { size_t index = 0; - void* p = system_allocator_->Alloc(index, size); + void* p = system_allocator_->Alloc(&index, size); VLOG(10) << "Allocated " << p << " from system allocator."; if (p == nullptr) return nullptr; - static_cast(p)->init(cache_, MemoryBlock::HUGE_CHUNK, index, + static_cast(p)->init(&cache_, MemoryBlock::HUGE_CHUNK, index, size, nullptr, nullptr); return static_cast(p)->data(); @@ -187,14 +188,14 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() { // Allocate a new maximum sized block size_t index = 0; - void* p = system_allocator_->Alloc(index, max_chunk_size_); + void* p = system_allocator_->Alloc(&index, max_chunk_size_); if (p == nullptr) return pool_.end(); VLOG(10) << "Creating and inserting new block " << p << " from system allocator"; - static_cast(p)->init(cache_, MemoryBlock::FREE_CHUNK, index, + static_cast(p)->init(&cache_, MemoryBlock::FREE_CHUNK, index, max_chunk_size_, nullptr, nullptr); // gpu fallback allocation @@ -238,11 +239,11 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it, VLOG(10) << "Split block (" << block << ", " << block->total_size(cache_) << ") into"; - block->split(cache_, size); + block->split(&cache_, size); VLOG(10) << "Left block (" << block << ", " << block->total_size(cache_) << ")"; - block->set_type(cache_, MemoryBlock::ARENA_CHUNK); + block->set_type(&cache_, MemoryBlock::ARENA_CHUNK); // the rest of memory if exist if (block->has_right_buddy(cache_)) { diff --git a/paddle/fluid/memory/detail/buddy_allocator.h b/paddle/fluid/memory/detail/buddy_allocator.h index a4ee70c2586f37e3b2328dedfe28135e14d8b18d..2f39d774d6fb6a2bc37877eb2f8b90bebd3cda28 100644 --- a/paddle/fluid/memory/detail/buddy_allocator.h +++ b/paddle/fluid/memory/detail/buddy_allocator.h @@ -14,18 +14,18 @@ limitations under the License. */ #pragma once -#include "paddle/fluid/memory/detail/meta_cache.h" -#include "paddle/fluid/memory/detail/meta_data.h" +#include // NOLINT +#include +#include +#include +#include + +#include "paddle/fluid/memory/detail/memory_block.h" #include "paddle/fluid/memory/detail/system_allocator.h" #include "paddle/fluid/platform/assert.h" #include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/gpu_info.h" -#include -#include -#include -#include - namespace paddle { namespace memory { namespace detail { diff --git a/paddle/fluid/memory/detail/memory_block.cc b/paddle/fluid/memory/detail/memory_block.cc index 07123f2669c3a829ff28e9fab5a404047c5a09c7..f34b922b25a0110690671d487f190e1b977a67bb 100644 --- a/paddle/fluid/memory/detail/memory_block.cc +++ b/paddle/fluid/memory/detail/memory_block.cc @@ -13,143 +13,142 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/memory/detail/memory_block.h" -#include "paddle/fluid/memory/detail/meta_cache.h" -#include "paddle/fluid/memory/detail/meta_data.h" #include "paddle/fluid/platform/assert.h" namespace paddle { namespace memory { namespace detail { -void MemoryBlock::init(MetadataCache& cache, Type t, size_t index, size_t size, +void MemoryBlock::init(MetadataCache* cache, Type t, size_t index, size_t size, void* left_buddy, void* right_buddy) { - cache.store(this, Metadata(t, index, size - sizeof(Metadata), size, - static_cast(left_buddy), - static_cast(right_buddy))); + cache->save( + this, MemoryBlock::Desc(t, index, size - sizeof(MemoryBlock::Desc), size, + static_cast(left_buddy), + static_cast(right_buddy))); } -MemoryBlock::Type MemoryBlock::type(MetadataCache& cache) const { +MemoryBlock::Type MemoryBlock::type(const MetadataCache& cache) const { return cache.load(this).type; } -size_t MemoryBlock::size(MetadataCache& cache) const { +size_t MemoryBlock::size(const MetadataCache& cache) const { return cache.load(this).size; } -size_t MemoryBlock::total_size(MetadataCache& cache) const { +size_t MemoryBlock::index(const MetadataCache& cache) const { + return cache.load(this).index; +} + +size_t MemoryBlock::total_size(const MetadataCache& cache) const { return cache.load(this).total_size; } -MemoryBlock* MemoryBlock::left_buddy(MetadataCache& cache) const { +bool MemoryBlock::has_left_buddy(const MetadataCache& cache) const { + return left_buddy(cache) != nullptr; +} + +bool MemoryBlock::has_right_buddy(const MetadataCache& cache) const { + return right_buddy(cache) != nullptr; +} + +MemoryBlock* MemoryBlock::left_buddy(const MetadataCache& cache) const { return cache.load(this).left_buddy; } -MemoryBlock* MemoryBlock::right_buddy(MetadataCache& cache) const { +MemoryBlock* MemoryBlock::right_buddy(const MetadataCache& cache) const { return cache.load(this).right_buddy; } -void MemoryBlock::split(MetadataCache& cache, size_t size) { +void MemoryBlock::split(MetadataCache* cache, size_t size) { // make sure the split fits - PADDLE_ASSERT(total_size(cache) >= size); + PADDLE_ASSERT(total_size(*cache) >= size); // bail out if there is no room for another partition - if (total_size(cache) - size <= sizeof(Metadata)) { + if (total_size(*cache) - size <= sizeof(MemoryBlock::Desc)) { return; } // find the position of the split void* right_partition = reinterpret_cast(this) + size; - size_t remaining_size = total_size(cache) - size; + size_t remaining_size = total_size(*cache) - size; // Add the new block as a buddy - auto metadata = cache.load(this); + auto metadata = cache->load(this); // Write the metadata for the new block auto new_block_right_buddy = metadata.right_buddy; - cache.store( - static_cast(right_partition), - Metadata(FREE_CHUNK, index(cache), remaining_size - sizeof(Metadata), - remaining_size, this, new_block_right_buddy)); + cache->save(static_cast(right_partition), + MemoryBlock::Desc(FREE_CHUNK, index(*cache), + remaining_size - sizeof(MemoryBlock::Desc), + remaining_size, this, new_block_right_buddy)); metadata.right_buddy = static_cast(right_partition); - metadata.size = size - sizeof(Metadata); + metadata.size = size - sizeof(MemoryBlock::Desc); metadata.total_size = size; - cache.store(this, metadata); + cache->save(this, metadata); // Write metadata for the new block's right buddy if (new_block_right_buddy != nullptr) { - auto buddy_metadata = cache.load(new_block_right_buddy); + auto buddy_metadata = cache->load(new_block_right_buddy); buddy_metadata.left_buddy = static_cast(right_partition); - cache.store(new_block_right_buddy, buddy_metadata); + cache->save(new_block_right_buddy, buddy_metadata); } } -void MemoryBlock::merge(MetadataCache& cache, MemoryBlock* right_buddy) { +void MemoryBlock::merge(MetadataCache* cache, MemoryBlock* right_buddy) { // only free blocks can be merged - PADDLE_ASSERT(type(cache) == FREE_CHUNK); - PADDLE_ASSERT(right_buddy->type(cache) == FREE_CHUNK); + PADDLE_ASSERT(type(*cache) == FREE_CHUNK); + PADDLE_ASSERT(right_buddy->type(*cache) == FREE_CHUNK); - auto metadata = cache.load(this); + auto metadata = cache->load(this); // link this->buddy's buddy - metadata.right_buddy = right_buddy->right_buddy(cache); + metadata.right_buddy = right_buddy->right_buddy(*cache); // link buddy's buddy -> this if (metadata.right_buddy != nullptr) { - auto buddy_metadata = cache.load(metadata.right_buddy); + auto buddy_metadata = cache->load(metadata.right_buddy); buddy_metadata.left_buddy = this; - cache.store(metadata.right_buddy, buddy_metadata); + cache->save(metadata.right_buddy, buddy_metadata); } - metadata.size += right_buddy->total_size(cache); - metadata.total_size += right_buddy->total_size(cache); + metadata.size += right_buddy->total_size(*cache); + metadata.total_size += right_buddy->total_size(*cache); - cache.store(this, metadata); - cache.store(right_buddy, Metadata(INVALID_CHUNK, 0, 0, 0, nullptr, nullptr)); + cache->save(this, metadata); + cache->save(right_buddy, + MemoryBlock::Desc(INVALID_CHUNK, 0, 0, 0, nullptr, nullptr)); } -void MemoryBlock::mark_as_free(MetadataCache& cache) { +void MemoryBlock::mark_as_free(MetadataCache* cache) { // check for double free or corruption - PADDLE_ASSERT(type(cache) != FREE_CHUNK); - PADDLE_ASSERT(type(cache) != INVALID_CHUNK); - + PADDLE_ASSERT(type(*cache) != FREE_CHUNK); + PADDLE_ASSERT(type(*cache) != INVALID_CHUNK); set_type(cache, FREE_CHUNK); } -void MemoryBlock::set_type(MetadataCache& cache, Type t) { - auto metadata = cache.load(this); - +void MemoryBlock::set_type(MetadataCache* cache, Type t) { + auto metadata = cache->load(this); metadata.type = t; - - cache.store(this, metadata); -} - -bool MemoryBlock::has_left_buddy(MetadataCache& cache) const { - return left_buddy(cache) != nullptr; -} - -bool MemoryBlock::has_right_buddy(MetadataCache& cache) const { - return right_buddy(cache) != nullptr; -} - -size_t MemoryBlock::index(MetadataCache& cache) const { - return cache.load(this).index; + cache->save(this, metadata); } void* MemoryBlock::data() const { - return const_cast(reinterpret_cast(this)) + 1; + return const_cast( + reinterpret_cast(this)) + + 1; } MemoryBlock* MemoryBlock::metadata() const { return const_cast(reinterpret_cast( - reinterpret_cast(this) - 1)); + reinterpret_cast(this) - 1)); } } // namespace detail diff --git a/paddle/fluid/memory/detail/memory_block.h b/paddle/fluid/memory/detail/memory_block.h index 72b40b73177d086aa912416e7f9cb3cd4ad5b45e..5cceba659beeec1b3c986dc43229f6725e3e11de 100644 --- a/paddle/fluid/memory/detail/memory_block.h +++ b/paddle/fluid/memory/detail/memory_block.h @@ -11,21 +11,21 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #pragma once -#include +#include +#include namespace paddle { namespace memory { namespace detail { -// Forward Declarations +// Forward declaration. class MetadataCache; -/*! \brief A class used to interpret the contents of a memory block */ -class MemoryBlock { - public: +// MemoryBlock represents Each allocated memory block, which contains +// MemoryBlock::Desc and the payload. +struct MemoryBlock { enum Type { FREE_CHUNK, // memory is free and idle ARENA_CHUNK, // memory is being occupied @@ -33,57 +33,96 @@ class MemoryBlock { INVALID_CHUNK // memory is invalid }; - public: - void init(MetadataCache& cache, Type t, size_t index, size_t size, + // init saves the MemoryBlock::Desc of the memory block in a MetadataCache. + // If it is a CPU memory block, the MetadataCache writes the + // MemoryBlock::Desc to the beginning of the block; or, if it is a GPU memory + // block, the MetadataCache writes the Meatadata to a std::map in + // the CPU. + void init(MetadataCache* cache, Type t, size_t index, size_t size, void* left_buddy, void* right_buddy); - public: - /*! \brief The type of the allocation */ - Type type(MetadataCache& cache) const; - - /*! \brief The size of the data region */ - size_t size(MetadataCache& cache) const; + // All these accessors returns fields in the MemoryBlock::Desc of the memory + // block. They all need a MetadataCache instance as their first + // parameter because they read the MemoryBlock::Desc from the cache. + Type type(const MetadataCache& cache) const; + size_t size(const MetadataCache& cache) const; + size_t index(const MetadataCache& cache) const; + size_t total_size(const MetadataCache& cache) const; + bool has_left_buddy(const MetadataCache& cache) const; + bool has_right_buddy(const MetadataCache& cache) const; + MemoryBlock* left_buddy(const MetadataCache& cache) const; + MemoryBlock* right_buddy(const MetadataCache& cache) const; - /*! \brief An index to track the allocator */ - size_t index(MetadataCache& cache) const; + // Split the allocation into left/right blocks. + void split(MetadataCache* cache, size_t size); - /*! \brief The total size of the block */ - size_t total_size(MetadataCache& cache) const; + // Merge left and right blocks together. + void merge(MetadataCache* cache, MemoryBlock* right_buddy); - /*! \brief Check the left buddy of the block */ - bool has_left_buddy(MetadataCache& cache) const; + // Mark the allocation as free. + void mark_as_free(MetadataCache* cache); - /*! \brief Check the right buddy of the block */ - bool has_right_buddy(MetadataCache& cache) const; - - /*! \brief Get the left buddy */ - MemoryBlock* left_buddy(MetadataCache& cache) const; - - /*! \brief Get the right buddy */ - MemoryBlock* right_buddy(MetadataCache& cache) const; - - public: - /*! \brief Split the allocation into left/right blocks */ - void split(MetadataCache& cache, size_t size); + // Change the type of the allocation. + void set_type(MetadataCache* cache, Type t); - /*! \brief Merge left and right blocks together */ - void merge(MetadataCache& cache, MemoryBlock* right_buddy); - - /*! \brief Mark the allocation as free */ - void mark_as_free(MetadataCache& cache); - - /*! \brief Change the type of the allocation */ - void set_type(MetadataCache& cache, Type t); - - public: - /*! \brief Get a pointer to the memory block's data */ void* data() const; - - /*! \brief Get a pointer to the memory block's metadata */ MemoryBlock* metadata() const; + // MemoryBlock::Desc describes a MemoryBlock. + struct Desc { + Desc(MemoryBlock::Type t, size_t i, size_t s, size_t ts, MemoryBlock* l, + MemoryBlock* r); + Desc(); + + // Updates guard_begin and guard_end by hashes of the Metadata object. + void update_guards(); + + // Checks that guard_begin and guard_end are hashes of the Metadata object. + bool check_guards() const; + + // TODO(gangliao): compress this + size_t guard_begin = 0; + MemoryBlock::Type type = MemoryBlock::INVALID_CHUNK; + size_t index = 0; + size_t size = 0; + size_t total_size = 0; + MemoryBlock* left_buddy = nullptr; + MemoryBlock* right_buddy = nullptr; + size_t guard_end = 0; + }; +}; + +// A cache for accessing memory block meta-data that may be expensive +// to access directly. This class exists to unify the +// MemoryBlock::Desc format between GPU and CPU allocations. It should +// be removed when the CPU can access all GPU allocations directly via +// UVM. +class MetadataCache { public: - static size_t overhead(); + explicit MetadataCache(bool uses_gpu); + + // Disable copying and assignment. + MetadataCache(const MetadataCache&) = delete; + MetadataCache& operator=(const MetadataCache&) = delete; + + // Returns the MemoryBlock::Desc for a memory block. When MetadataCache is + // used to manage CPU memory, the MemoryBlock::Desc resides at the beginning + // of the memory block; when used to manage GPU memory, the + // Meatadata resides in CPU memory indexed by cache_. + MemoryBlock::Desc load(const MemoryBlock* memory_block) const; + + // Saves the MemoryBlock::Desc of a memory block into the cache. For CPU + // memory block, writes the MemoryBlock::Desc to the beginning of the memory + // block; whereas for GPU memory, writes it to cache_. + void save(MemoryBlock* memory_block, const MemoryBlock::Desc& meta_data); + + // For GPU memory block, erases its MemoryBlock::Desc from cache_. + void invalidate(MemoryBlock* memory_block); + + private: + typedef std::unordered_map MetadataMap; + MetadataMap cache_; + bool uses_gpu_; }; } // namespace detail diff --git a/paddle/fluid/memory/detail/meta_data.cc b/paddle/fluid/memory/detail/memory_block_desc.cc similarity index 54% rename from paddle/fluid/memory/detail/meta_data.cc rename to paddle/fluid/memory/detail/memory_block_desc.cc index ad862af1705835c495a30232aa2bba2d2a56ad89..393dd9209c0aa443cd17c29b2f9de6eafb48bac9 100644 --- a/paddle/fluid/memory/detail/meta_data.cc +++ b/paddle/fluid/memory/detail/memory_block_desc.cc @@ -12,16 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/memory/detail/meta_data.h" - #include +#include "paddle/fluid/memory/detail/memory_block.h" + namespace paddle { namespace memory { namespace detail { -Metadata::Metadata(MemoryBlock::Type t, size_t i, size_t s, size_t ts, - MemoryBlock* l, MemoryBlock* r) +MemoryBlock::Desc::Desc(MemoryBlock::Type t, size_t i, size_t s, size_t ts, + MemoryBlock* l, MemoryBlock* r) : type(t), index(i), size(s), @@ -29,7 +29,7 @@ Metadata::Metadata(MemoryBlock::Type t, size_t i, size_t s, size_t ts, left_buddy(l), right_buddy(r) {} -Metadata::Metadata() +MemoryBlock::Desc::Desc() : type(MemoryBlock::INVALID_CHUNK), index(0), size(0), @@ -37,32 +37,36 @@ Metadata::Metadata() left_buddy(nullptr), right_buddy(nullptr) {} +namespace { + template -inline void hash_combine(std::size_t& seed, const T& v) { +inline void hash_combine(std::size_t* seed, const T& v) { std::hash hasher; - seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + (*seed) ^= hasher(v) + 0x9e3779b9 + ((*seed) << 6) + ((*seed) >> 2); } -inline size_t hash(const Metadata* metadata, size_t initial_seed) { +inline size_t hash(const MemoryBlock::Desc& metadata, size_t initial_seed) { size_t seed = initial_seed; - hash_combine(seed, (size_t)metadata->type); - hash_combine(seed, metadata->index); - hash_combine(seed, metadata->size); - hash_combine(seed, metadata->total_size); - hash_combine(seed, metadata->left_buddy); - hash_combine(seed, metadata->right_buddy); + hash_combine(&seed, static_cast(metadata.type)); + hash_combine(&seed, metadata.index); + hash_combine(&seed, metadata.size); + hash_combine(&seed, metadata.total_size); + hash_combine(&seed, metadata.left_buddy); + hash_combine(&seed, metadata.right_buddy); return seed; } -void Metadata::update_guards() { - guard_begin = hash(this, 1); - guard_end = hash(this, 2); +} // namespace + +void MemoryBlock::Desc::update_guards() { + guard_begin = hash(*this, 1); + guard_end = hash(*this, 2); } -bool Metadata::check_guards() const { - return guard_begin == hash(this, 1) && guard_end == hash(this, 2); +bool MemoryBlock::Desc::check_guards() const { + return guard_begin == hash(*this, 1) && guard_end == hash(*this, 2); } } // namespace detail diff --git a/paddle/fluid/memory/detail/meta_cache.cc b/paddle/fluid/memory/detail/meta_cache.cc index 43249e842ad4d2419fed041e6c9056021e9663cd..b86e4f38c42a26e155f276f9b73cbed1d0d83f7d 100644 --- a/paddle/fluid/memory/detail/meta_cache.cc +++ b/paddle/fluid/memory/detail/meta_cache.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/memory/detail/meta_cache.h" #include "glog/logging.h" #include "paddle/fluid/memory/detail/memory_block.h" #include "paddle/fluid/platform/assert.h" @@ -23,29 +22,28 @@ namespace detail { MetadataCache::MetadataCache(bool uses_gpu) : uses_gpu_(uses_gpu) {} -Metadata MetadataCache::load(const MemoryBlock* block) { +MemoryBlock::Desc MetadataCache::load(const MemoryBlock* block) const { if (uses_gpu_) { - auto existing_metadata = cache_.find(block); - PADDLE_ASSERT(existing_metadata->second.check_guards()); - return existing_metadata->second; + auto existing_desc = cache_.find(block); + PADDLE_ASSERT(existing_desc->second.check_guards()); + return existing_desc->second; } else { - auto* meta = reinterpret_cast(block); - VLOG(10) << "Load MetaData type=" << meta->type; - PADDLE_ASSERT(meta->check_guards()); - return *reinterpret_cast(block); + auto* desc = reinterpret_cast(block); + VLOG(10) << "Load MemoryBlock::Desc type=" << desc->type; + PADDLE_ASSERT(desc->check_guards()); + return *reinterpret_cast(block); } } -void MetadataCache::store(MemoryBlock* block, - const Metadata& original_metadata) { - auto metadata = original_metadata; - - metadata.update_guards(); +void MetadataCache::save(MemoryBlock* block, + const MemoryBlock::Desc& original_desc) { + auto desc = original_desc; + desc.update_guards(); if (uses_gpu_) { - cache_[block] = metadata; + cache_[block] = desc; } else { - *reinterpret_cast(block) = metadata; + *reinterpret_cast(block) = desc; } } diff --git a/paddle/fluid/memory/detail/meta_cache.h b/paddle/fluid/memory/detail/meta_cache.h deleted file mode 100644 index 3283d756a6e7f7f1750442039797846bdad51125..0000000000000000000000000000000000000000 --- a/paddle/fluid/memory/detail/meta_cache.h +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/fluid/memory/detail/memory_block.h" -#include "paddle/fluid/memory/detail/meta_data.h" - -#include - -namespace paddle { -namespace memory { -namespace detail { - -/** - * \brief A cache for accessing memory block meta-data that may be expensive - * to access directly. - * - * \note This class exists to unify the metadata format between GPU and CPU - * allocations. It should be removed when the CPU can access all GPU - * allocations directly via UVM. - */ -class MetadataCache { - public: - explicit MetadataCache(bool uses_gpu); - - public: - /*! \brief Load the associated metadata for the specified memory block. */ - Metadata load(const MemoryBlock* memory_block); - - /*! \brief Store the associated metadata for the specified memory block. */ - void store(MemoryBlock* memory_block, const Metadata& meta_data); - - /*! \brief Indicate that the specified metadata will no longer be used. */ - void invalidate(MemoryBlock* memory_block); - - public: - MetadataCache(const MetadataCache&) = delete; - MetadataCache& operator=(const MetadataCache&) = delete; - - private: - bool uses_gpu_; - - private: - typedef std::unordered_map MetadataMap; - - private: - MetadataMap cache_; -}; - -} // namespace detail -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/detail/meta_data.h b/paddle/fluid/memory/detail/meta_data.h deleted file mode 100644 index 14895ee8727e98186b1f1295321951e12753fef6..0000000000000000000000000000000000000000 --- a/paddle/fluid/memory/detail/meta_data.h +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/fluid/memory/detail/memory_block.h" - -#include - -namespace paddle { -namespace memory { -namespace detail { - -class Metadata { - public: - Metadata(MemoryBlock::Type t, size_t i, size_t s, size_t ts, MemoryBlock* l, - MemoryBlock* r); - Metadata(); - - public: - /*! \brief Update the guards when metadata is changed */ - void update_guards(); - - /*! \brief Check consistency to previous modification */ - bool check_guards() const; - - public: - // TODO(gangliao): compress this - // clang-format off - size_t guard_begin = 0; - MemoryBlock::Type type = MemoryBlock::INVALID_CHUNK; - size_t index = 0; - size_t size = 0; - size_t total_size = 0; - MemoryBlock* left_buddy = nullptr; - MemoryBlock* right_buddy = nullptr; - size_t guard_end = 0; - // clang-format on -}; - -} // namespace detail -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/detail/system_allocator.cc b/paddle/fluid/memory/detail/system_allocator.cc index a45f8c33ee5956f3409ee1b7c43628aa0acafb98..d5390529163491c2711e50ffad236534e88b73ee 100644 --- a/paddle/fluid/memory/detail/system_allocator.cc +++ b/paddle/fluid/memory/detail/system_allocator.cc @@ -13,16 +13,16 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/memory/detail/system_allocator.h" -#include "paddle/fluid/platform/assert.h" -#include "paddle/fluid/platform/cpu_info.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/gpu_info.h" #include // for malloc and free #include // for mlock and munlock #include // for std::max #include "gflags/gflags.h" +#include "paddle/fluid/platform/assert.h" +#include "paddle/fluid/platform/cpu_info.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/gpu_info.h" // If use_pinned_memory is true, CPUAllocator calls mlock, which // returns pinned and locked memory as staging areas for data exchange @@ -35,13 +35,13 @@ namespace paddle { namespace memory { namespace detail { -void* CPUAllocator::Alloc(size_t& index, size_t size) { +void* CPUAllocator::Alloc(size_t* index, size_t size) { // According to http://www.cplusplus.com/reference/cstdlib/malloc/, // malloc might not return nullptr if size is zero, but the returned // pointer shall not be dereferenced -- so we make it nullptr. if (size <= 0) return nullptr; - index = 0; // unlock memory + *index = 0; // unlock memory void* p; @@ -56,7 +56,7 @@ void* CPUAllocator::Alloc(size_t& index, size_t size) { if (p != nullptr) { if (FLAGS_use_pinned_memory) { - index = 1; + *index = 1; mlock(p, size); // lock memory } } @@ -75,7 +75,7 @@ bool CPUAllocator::UseGpu() const { return false; } #ifdef PADDLE_WITH_CUDA -void* GPUAllocator::Alloc(size_t& index, size_t size) { +void* GPUAllocator::Alloc(size_t* index, size_t size) { // CUDA documentation doesn't explain if cudaMalloc returns nullptr // if size is 0. We just make sure it does. if (size <= 0) return nullptr; @@ -93,7 +93,7 @@ void* GPUAllocator::Alloc(size_t& index, size_t size) { } if (result == cudaSuccess) { - index = 0; + *index = 0; gpu_alloc_size_ += size; return p; } else { @@ -133,7 +133,7 @@ bool GPUAllocator::UseGpu() const { return true; } // PINNED memory allows direct DMA transfers by the GPU to and from system // memory. It’s locked to a physical address. -void* CUDAPinnedAllocator::Alloc(size_t& index, size_t size) { +void* CUDAPinnedAllocator::Alloc(size_t* index, size_t size) { if (size <= 0) return nullptr; // NOTE: here, we use CUDAPinnedMaxAllocSize as the maximum memory size @@ -154,7 +154,7 @@ void* CUDAPinnedAllocator::Alloc(size_t& index, size_t size) { cudaError_t result = cudaMallocHost(&p, size); if (result == cudaSuccess) { - index = 1; // PINNED memory + *index = 1; // PINNED memory cuda_pinnd_alloc_size_ += size; return p; } else { diff --git a/paddle/fluid/memory/detail/system_allocator.h b/paddle/fluid/memory/detail/system_allocator.h index e3c50ef6483c61e2016bbd967a4100057c87dca3..a0386a2dad1bb7faf54197a47ca7a5b6d9488817 100644 --- a/paddle/fluid/memory/detail/system_allocator.h +++ b/paddle/fluid/memory/detail/system_allocator.h @@ -29,14 +29,14 @@ namespace detail { class SystemAllocator { public: virtual ~SystemAllocator() {} - virtual void* Alloc(size_t& index, size_t size) = 0; + virtual void* Alloc(size_t* index, size_t size) = 0; virtual void Free(void* p, size_t size, size_t index) = 0; virtual bool UseGpu() const = 0; }; class CPUAllocator : public SystemAllocator { public: - virtual void* Alloc(size_t& index, size_t size); + virtual void* Alloc(size_t* index, size_t size); virtual void Free(void* p, size_t size, size_t index); virtual bool UseGpu() const; }; @@ -46,7 +46,7 @@ class GPUAllocator : public SystemAllocator { public: explicit GPUAllocator(int gpu_id) : gpu_id_(gpu_id) {} - virtual void* Alloc(size_t& index, size_t size); + virtual void* Alloc(size_t* index, size_t size); virtual void Free(void* p, size_t size, size_t index); virtual bool UseGpu() const; @@ -58,7 +58,7 @@ class GPUAllocator : public SystemAllocator { class CUDAPinnedAllocator : public SystemAllocator { public: - virtual void* Alloc(size_t& index, size_t size); + virtual void* Alloc(size_t* index, size_t size); virtual void Free(void* p, size_t size, size_t index); virtual bool UseGpu() const; diff --git a/paddle/fluid/memory/detail/system_allocator_test.cc b/paddle/fluid/memory/detail/system_allocator_test.cc index 3e1926f632c57b7906e4a76f43ff7a753d71d97f..268260142c579ea9301d89fcec1613ce5b0e15a5 100644 --- a/paddle/fluid/memory/detail/system_allocator_test.cc +++ b/paddle/fluid/memory/detail/system_allocator_test.cc @@ -22,11 +22,11 @@ limitations under the License. */ DECLARE_bool(use_pinned_memory); -void TestAllocator(paddle::memory::detail::SystemAllocator& a, size_t size) { +void TestAllocator(paddle::memory::detail::SystemAllocator* a, size_t size) { bool freed = false; { size_t index; - void* p = a.Alloc(index, size); + void* p = a->Alloc(&index, size); if (size > 0) { EXPECT_NE(p, nullptr); } else { @@ -36,7 +36,7 @@ void TestAllocator(paddle::memory::detail::SystemAllocator& a, size_t size) { int* i = static_cast(p); std::shared_ptr ptr(i, [&](void* p) { freed = true; - a.Free(p, size, index); + a->Free(p, size, index); }); } EXPECT_TRUE(freed); @@ -45,21 +45,21 @@ void TestAllocator(paddle::memory::detail::SystemAllocator& a, size_t size) { TEST(CPUAllocator, NoLockMem) { FLAGS_use_pinned_memory = false; paddle::memory::detail::CPUAllocator a; - TestAllocator(a, 2048); - TestAllocator(a, 0); + TestAllocator(&a, 2048); + TestAllocator(&a, 0); } TEST(CPUAllocator, LockMem) { FLAGS_use_pinned_memory = true; paddle::memory::detail::CPUAllocator a; - TestAllocator(a, 2048); - TestAllocator(a, 0); + TestAllocator(&a, 2048); + TestAllocator(&a, 0); } #ifdef PADDLE_WITH_CUDA TEST(GPUAllocator, Alloc) { paddle::memory::detail::GPUAllocator a(0); - TestAllocator(a, 2048); - TestAllocator(a, 0); + TestAllocator(&a, 2048); + TestAllocator(&a, 0); } #endif diff --git a/paddle/fluid/memory/memory.cc b/paddle/fluid/memory/malloc.cc similarity index 99% rename from paddle/fluid/memory/memory.cc rename to paddle/fluid/memory/malloc.cc index 2c13dbc6d51bfa3853cec5270e8115c899f522ea..0c74f62de5c6f5d432ee928945db6dcf385ca209 100644 --- a/paddle/fluid/memory/memory.cc +++ b/paddle/fluid/memory/malloc.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/memory/memory.h" +#include "paddle/fluid/memory/malloc.h" #include "glog/logging.h" diff --git a/paddle/fluid/memory/malloc.h b/paddle/fluid/memory/malloc.h new file mode 100644 index 0000000000000000000000000000000000000000..3e6bfddd69cb16edf323d040ea5369cd551f299e --- /dev/null +++ b/paddle/fluid/memory/malloc.h @@ -0,0 +1,104 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/fluid/platform/place.h" + +namespace paddle { +namespace memory { + +/** + * \brief Allocate memory block in one place. + * + * \param[in] place Allocation place (CPU or GPU). + * \param[in] size Allocation size. + * + * \return Allocated memory block address. + * + * \note If return nullptr, it indicates memory allocation failed + * because insufficient memory in current system. When Alloc + * function is invoked, you must check the returned memory + * address is valid or not. + */ +template +void* Alloc(Place place, size_t size); + +/** + * \brief Free memory block in one place. + * + * \param[in] place Allocation place (CPU or GPU). + * \param[in] ptr Memory block address to free. + * + */ +template +void Free(Place place, void* ptr); + +/** + * \brief Total size of used memory in one place. + * + * \param[in] place Allocation place (CPU or GPU). + * + */ +template +size_t Used(Place place); + +struct Usage : public boost::static_visitor { + size_t operator()(const platform::CPUPlace& cpu) const; + size_t operator()(const platform::CUDAPlace& gpu) const; + size_t operator()(const platform::CUDAPinnedPlace& cuda_pinned) const; +}; + +size_t memory_usage(const platform::Place& p); + +/** + * \brief Free memory block in one place. + * + * \note In some cases, custom deleter is used to + * deallocate the memory automatically for + * std::unique_ptr in tensor.h. + * + */ +template +class PODDeleter { + static_assert(std::is_pod::value, "T must be POD"); + + public: + explicit PODDeleter(Place place) : place_(place) {} + void operator()(T* ptr) { Free(place_, static_cast(ptr)); } + + private: + Place place_; +}; + +/** + * \brief Free memory block in one place does not meet POD + * + * \note In some cases, custom deleter is used to + * deallocate the memory automatically for + * std::unique_ptr in tensor.h. + * + */ +template +class PlainDeleter { + public: + explicit PlainDeleter(Place place) : place_(place) {} + void operator()(T* ptr) { Free(place_, reinterpret_cast(ptr)); } + + private: + Place place_; +}; + +} // namespace memory +} // namespace paddle diff --git a/paddle/fluid/memory/memory_test.cc b/paddle/fluid/memory/malloc_test.cc similarity index 95% rename from paddle/fluid/memory/memory_test.cc rename to paddle/fluid/memory/malloc_test.cc index 9fbbe62559b1e29d6942a1ada62558b20830489b..d39466ef60c3750600dea726a6570397423d42f6 100644 --- a/paddle/fluid/memory/memory_test.cc +++ b/paddle/fluid/memory/malloc_test.cc @@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/memory/memory.h" +#include "paddle/fluid/memory/malloc.h" #include #include "gtest/gtest.h" #include "paddle/fluid/memory/detail/memory_block.h" -#include "paddle/fluid/memory/detail/meta_data.h" #include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/place.h" @@ -28,7 +27,7 @@ inline bool is_aligned(void const *p) { } size_t align(size_t size, paddle::platform::CPUPlace place) { - size += sizeof(paddle::memory::detail::Metadata); + size += sizeof(paddle::memory::detail::MemoryBlock::Desc); size_t alignment = paddle::platform::CpuMinChunkSize(); size_t remaining = size % alignment; return remaining == 0 ? size : size + (alignment - remaining); @@ -86,7 +85,7 @@ TEST(BuddyAllocator, CPUMultAlloc) { #ifdef PADDLE_WITH_CUDA size_t align(size_t size, paddle::platform::CUDAPlace place) { - size += sizeof(paddle::memory::detail::Metadata); + size += sizeof(paddle::memory::detail::MemoryBlock::Desc); size_t alignment = paddle::platform::GpuMinChunkSize(); size_t remaining = size % alignment; return remaining == 0 ? size : size + (alignment - remaining); @@ -142,7 +141,7 @@ TEST(BuddyAllocator, GPUMultAlloc) { } size_t align(size_t size, paddle::platform::CUDAPinnedPlace place) { - size += sizeof(paddle::memory::detail::Metadata); + size += sizeof(paddle::memory::detail::MemoryBlock::Desc); size_t alignment = paddle::platform::CUDAPinnedMinChunkSize(); size_t remaining = size % alignment; return remaining == 0 ? size : size + (alignment - remaining); diff --git a/paddle/fluid/memory/memory.h b/paddle/fluid/memory/memory.h index 3e6bfddd69cb16edf323d040ea5369cd551f299e..8d904e3be56abf0974ba7379f7ca1b676fcb0409 100644 --- a/paddle/fluid/memory/memory.h +++ b/paddle/fluid/memory/memory.h @@ -14,91 +14,5 @@ limitations under the License. */ #pragma once -#include "paddle/fluid/platform/place.h" - -namespace paddle { -namespace memory { - -/** - * \brief Allocate memory block in one place. - * - * \param[in] place Allocation place (CPU or GPU). - * \param[in] size Allocation size. - * - * \return Allocated memory block address. - * - * \note If return nullptr, it indicates memory allocation failed - * because insufficient memory in current system. When Alloc - * function is invoked, you must check the returned memory - * address is valid or not. - */ -template -void* Alloc(Place place, size_t size); - -/** - * \brief Free memory block in one place. - * - * \param[in] place Allocation place (CPU or GPU). - * \param[in] ptr Memory block address to free. - * - */ -template -void Free(Place place, void* ptr); - -/** - * \brief Total size of used memory in one place. - * - * \param[in] place Allocation place (CPU or GPU). - * - */ -template -size_t Used(Place place); - -struct Usage : public boost::static_visitor { - size_t operator()(const platform::CPUPlace& cpu) const; - size_t operator()(const platform::CUDAPlace& gpu) const; - size_t operator()(const platform::CUDAPinnedPlace& cuda_pinned) const; -}; - -size_t memory_usage(const platform::Place& p); - -/** - * \brief Free memory block in one place. - * - * \note In some cases, custom deleter is used to - * deallocate the memory automatically for - * std::unique_ptr in tensor.h. - * - */ -template -class PODDeleter { - static_assert(std::is_pod::value, "T must be POD"); - - public: - explicit PODDeleter(Place place) : place_(place) {} - void operator()(T* ptr) { Free(place_, static_cast(ptr)); } - - private: - Place place_; -}; - -/** - * \brief Free memory block in one place does not meet POD - * - * \note In some cases, custom deleter is used to - * deallocate the memory automatically for - * std::unique_ptr in tensor.h. - * - */ -template -class PlainDeleter { - public: - explicit PlainDeleter(Place place) : place_(place) {} - void operator()(T* ptr) { Free(place_, reinterpret_cast(ptr)); } - - private: - Place place_; -}; - -} // namespace memory -} // namespace paddle +#include "paddle/fluid/memory/malloc.h" +#include "paddle/fluid/memory/memcpy.h" diff --git a/paddle/fluid/memory/pinned_memory_test.cu b/paddle/fluid/memory/pinned_memory_test.cu index a000001f41788fb16ac075426f06357cbe42d642..0d898f59ee1b8c783c5357aa7e27581a993a6d30 100644 --- a/paddle/fluid/memory/pinned_memory_test.cu +++ b/paddle/fluid/memory/pinned_memory_test.cu @@ -15,7 +15,6 @@ limitations under the License. */ #include #include "paddle/fluid/memory/detail/memory_block.h" -#include "paddle/fluid/memory/detail/meta_data.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memory.h" diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 84eabab563e3404ad2a28bf76116c592db04742e..5ff987ad8b3ba3c9195e87e6c11e70ac98fa0a11 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -263,7 +263,7 @@ cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor) cc_test(beam_search_op_test SRCS beam_search_op_test.cc DEPS lod_tensor beam_search_op) -cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory) +cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor memory) cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) cc_test(save_load_combine_op_test SRCS save_load_combine_op_test.cc DEPS save_combine_op load_combine_op) nv_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) diff --git a/paddle/fluid/operators/activation_mkldnn_op.cc b/paddle/fluid/operators/activation_mkldnn_op.cc index 6ff363d766db7dd97e1bc193ef7b4a095a7b7c24..ab7c61227114fe7a0ce2ff2515dd560706058b64 100644 --- a/paddle/fluid/operators/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/activation_mkldnn_op.cc @@ -13,8 +13,8 @@ limitations under the License. */ #include "mkldnn.hpp" -#include "mkldnn_activation_op.h" #include "paddle/fluid/operators/activation_op.h" +#include "paddle/fluid/operators/mkldnn_activation_op.h" namespace paddle { namespace operators { @@ -40,18 +40,24 @@ void eltwise_forward(const ExecContext &ctx, mkldnn::algorithm algorithm, const T *dst_data = dst->template mutable_data(ctx.GetPlace()); // get memory dim - PADDLE_ENFORCE(src->dims().size() == 4, - "Input dim must be with 4, i.e. NCHW"); + PADDLE_ENFORCE(src->dims().size() == 2 || src->dims().size() == 4, + "Input dim must be with 2 or 4"); std::vector src_tz = framework::vectorize2int(src->dims()); // create memory description - // TODO(kbinias-intel): support more formats - auto data_md = platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32, - mkldnn::memory::format::nchw); + auto data_md = src_tz.size() == 2 + ? platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32, + mkldnn::memory::format::nc) + : platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32, + mkldnn::memory::format::nchw); // create memory primitives - auto src_memory = mkldnn::memory({data_md, mkldnn_engine}, (void *)src_data); - auto dst_memory = mkldnn::memory({data_md, mkldnn_engine}, (void *)dst_data); + auto src_memory = + mkldnn::memory({data_md, mkldnn_engine}, + static_cast(const_cast(src_data))); + auto dst_memory = + mkldnn::memory({data_md, mkldnn_engine}, + static_cast(const_cast(dst_data))); auto forward_desc = mkldnn::eltwise_forward::desc( mkldnn::prop_kind::forward_training, algorithm, data_md, alpha, beta); @@ -91,15 +97,21 @@ void eltwise_grad(const ExecContext &ctx, mkldnn::algorithm algorithm, std::vector src_tz = framework::vectorize2int(x->dims()); // create memory description - auto data_md = platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32, - mkldnn::memory::format::nchw); + auto data_md = src_tz.size() == 2 + ? platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32, + mkldnn::memory::format::nc) + : platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32, + mkldnn::memory::format::nchw); // create memory primitives - auto src_memory = mkldnn::memory({data_md, mkldnn_engine}, (void *)src); + auto src_memory = mkldnn::memory( + {data_md, mkldnn_engine}, static_cast(const_cast(src))); auto diff_src_memory = - mkldnn::memory({data_md, mkldnn_engine}, (void *)diff_src); + mkldnn::memory({data_md, mkldnn_engine}, + static_cast(const_cast(diff_src))); auto diff_dst_memory = - mkldnn::memory({data_md, mkldnn_engine}, (void *)diff_dst); + mkldnn::memory({data_md, mkldnn_engine}, + static_cast(const_cast(diff_dst))); auto backward_desc = mkldnn::eltwise_backward::desc(algorithm, data_md, data_md, alpha, beta); diff --git a/paddle/fluid/operators/detail/CMakeLists.txt b/paddle/fluid/operators/detail/CMakeLists.txt index 3adeeda90645ca983d9d9229b4cc1c4c90302206..719a7465b8d58ef8588ff1e83c2b971eb6fbb00f 100644 --- a/paddle/fluid/operators/detail/CMakeLists.txt +++ b/paddle/fluid/operators/detail/CMakeLists.txt @@ -5,5 +5,5 @@ if(WITH_DISTRIBUTE) set_source_files_properties(serde_test.cc grpc_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) cc_test(serde_test SRCS serde_test.cc variable_response.cc DEPS grpc++_unsecure grpc_unsecure gpr cares zlib protobuf sendrecvop_grpc) - cc_test(grpc_server_test SRCS grpc_server_test.cc DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf) + cc_test(grpc_server_test SRCS grpc_server_test.cc DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf executor proto_desc lookup_table_op) endif() diff --git a/paddle/fluid/operators/detail/grpc_client.cc b/paddle/fluid/operators/detail/grpc_client.cc index ef987d07f08525bff5267cdc2076ae767417e4f1..8bbfd1f15925992efdeaaffbbe7b350ffbcee889 100644 --- a/paddle/fluid/operators/detail/grpc_client.cc +++ b/paddle/fluid/operators/detail/grpc_client.cc @@ -138,7 +138,7 @@ bool RPCClient::AsyncPrefetchVariable(const std::string& ep, auto* var = p_scope->FindVar(in_var_name_val); ::grpc::ByteBuffer req; - SerializeToByteBuffer(in_var_name_val, var, *p_ctx, &req); + SerializeToByteBuffer(in_var_name_val, var, *p_ctx, &req, out_var_name_val); // var handle VarHandle var_h; diff --git a/paddle/fluid/operators/detail/grpc_server.cc b/paddle/fluid/operators/detail/grpc_server.cc index 2e7bf1921a26fc88d854e4db2c501548695a136a..d5fc163bc25409e0607b149b61c6266b38119d9d 100644 --- a/paddle/fluid/operators/detail/grpc_server.cc +++ b/paddle/fluid/operators/detail/grpc_server.cc @@ -138,39 +138,48 @@ class RequestPrefetch final : public RequestBase { framework::Scope* scope, const platform::DeviceContext* dev_ctx, framework::Executor* executor, - framework::ProgramDesc* program, int blkid) + framework::ProgramDesc* program, + framework::ExecutorPrepareContext* prefetch_ctx) : RequestBase(service, cq, dev_ctx), responder_(&ctx_), scope_(scope), executor_(executor), program_(program), - blkid_(blkid) { + prefetch_ctx_(prefetch_ctx) { + request_.reset(new VariableResponse(scope, dev_ctx_)); int method_id = static_cast(detail::GrpcMethod::kPrefetchVariable); - service_->RequestAsyncUnary(method_id, &ctx_, &request_, &responder_, cq_, - cq_, this); + service_->RequestAsyncUnary(method_id, &ctx_, request_.get(), &responder_, + cq_, cq_, this); } virtual ~RequestPrefetch() {} - virtual std::string GetReqName() { return request_.varname(); } + virtual std::string GetReqName() { return request_->Varname(); } virtual void Process() { // prefetch process... ::grpc::ByteBuffer reply; - // TODO(Yancey1989): execute the Block which containers prefetch ops - VLOG(3) << "RequestPrefetch Process in"; + std::string var_name = request_->OutVarname(); + auto var_desc = program_->Block(0).FindVar(var_name); + framework::Scope* local_scope = &scope_->NewScope(); + auto* var = local_scope->FindVar(var_name); + InitializeVariable(var, var_desc->GetType()); + executor_->RunPreparedContext(prefetch_ctx_, scope_, false, false); + + SerializeToByteBuffer(var_name, var, *dev_ctx_, &reply); responder_.Finish(reply, ::grpc::Status::OK, this); status_ = FINISH; } protected: - sendrecv::VariableMessage request_; + std::shared_ptr request_; ServerAsyncResponseWriter<::grpc::ByteBuffer> responder_; framework::Scope* scope_; framework::Executor* executor_; framework::ProgramDesc* program_; + framework::ExecutorPrepareContext* prefetch_ctx_; int blkid_; }; @@ -268,7 +277,7 @@ void AsyncGRPCServer::TryToRegisterNewPrefetchOne() { } RequestPrefetch* prefetch = new RequestPrefetch(&service_, cq_prefetch_.get(), scope_, dev_ctx_, - executor_, program_, prefetch_blk_id_); + executor_, program_, prefetch_ctx_); VLOG(4) << "Create RequestPrefetch status:" << prefetch->Status(); } diff --git a/paddle/fluid/operators/detail/grpc_server.h b/paddle/fluid/operators/detail/grpc_server.h index 380447f47c142bdc16e60f78c4b2d94235ec5060..b6110f92ed4f38a156e0c99ecfb399f3f47a169e 100644 --- a/paddle/fluid/operators/detail/grpc_server.h +++ b/paddle/fluid/operators/detail/grpc_server.h @@ -63,6 +63,10 @@ class AsyncGRPCServer final { void SetExecutor(framework::Executor *executor) { executor_ = executor; } + void SetPrefetchPreparedCtx(framework::ExecutorPrepareContext *prepared) { + prefetch_ctx_ = prepared; + } + int GetSelectedPort() { return selected_port_; } const ReceivedMessage Get() { return this->var_recv_queue_.Pop(); } @@ -111,6 +115,7 @@ class AsyncGRPCServer final { std::unique_ptr t_prefetch_; int prefetch_blk_id_; + framework::ExecutorPrepareContext *prefetch_ctx_; framework::ProgramDesc *program_; framework::Executor *executor_; int selected_port_; diff --git a/paddle/fluid/operators/detail/grpc_server_test.cc b/paddle/fluid/operators/detail/grpc_server_test.cc index b89aed0157de8e95564015b3e7f42316a39537f5..c51933718f4ca78e87c77e007c485642000d247d 100644 --- a/paddle/fluid/operators/detail/grpc_server_test.cc +++ b/paddle/fluid/operators/detail/grpc_server_test.cc @@ -20,43 +20,121 @@ limitations under the License. */ #include "paddle/fluid/operators/detail/grpc_client.h" #include "paddle/fluid/operators/detail/grpc_server.h" +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" + namespace framework = paddle::framework; namespace platform = paddle::platform; namespace detail = paddle::operators::detail; +USE_OP(lookup_table); + std::unique_ptr rpc_service_; +framework::BlockDesc* AppendPrefetchBlcok(framework::ProgramDesc* program) { + auto root_block = program->MutableBlock(0); + auto* block = program->AppendBlock(*root_block); + + framework::VariableNameMap input({{"W", {"w"}}, {"Ids", {"ids"}}}); + framework::VariableNameMap output({{"Output", {"out"}}}); + auto op = block->AppendOp(); + op->SetType("lookup_table"); + op->SetInput("W", {"w"}); + op->SetInput("Ids", {"ids"}); + op->SetOutput("Out", {"out"}); + + auto& out = *root_block->Var("out"); + out.SetType(framework::proto::VarType::SELECTED_ROWS); + out.SetShape({10, 10}); + + return block; +} + +void CreateVarsOnScope(framework::Scope* scope, platform::CPUPlace* place) { + auto w_var = scope->Var("w"); + w_var->GetMutable(); + + auto out_var = scope->Var("out"); + out_var->GetMutable(); + + auto ids_var = scope->Var("ids"); + ids_var->GetMutable(); +} + +void InitTensorsOnClient(framework::Scope* scope, platform::CPUPlace* place, + int64_t rows_numel) { + CreateVarsOnScope(scope, place); + auto ids_var = scope->Var("ids")->GetMutable(); + auto rows = ids_var->mutable_rows(); + for (int64_t i = 0; i < rows_numel; ++i) rows->push_back(i * 2); + ids_var->mutable_value()->Resize({rows_numel, 1}); + ids_var->mutable_value()->mutable_data(*place); +} + +void InitTensorsOnServer(framework::Scope* scope, platform::CPUPlace* place, + int64_t rows_numel) { + CreateVarsOnScope(scope, place); + auto w = scope->Var("w")->GetMutable(); + auto rows = w->mutable_rows(); + for (int64_t i = 0; i < rows_numel; ++i) rows->push_back(i); + auto w_value = w->mutable_value(); + w_value->Resize({rows_numel, 10}); + + auto ptr = w_value->mutable_data(*place); + + for (int64_t i = 0; i < w_value->numel(); ++i) { + ptr[i] = static_cast(i / 10); + } +} + void StartServer(const std::string& endpoint) { rpc_service_.reset(new detail::AsyncGRPCServer(endpoint)); + framework::ProgramDesc program; + framework::Scope scope; + platform::CPUPlace place; + framework::Executor exe(place); + platform::CPUDeviceContext ctx(place); + auto* block = AppendPrefetchBlcok(&program); + auto prepared = exe.Prepare(program, block->ID()); + InitTensorsOnServer(&scope, &place, 10); + + rpc_service_->SetProgram(&program); + rpc_service_->SetPrefetchPreparedCtx(prepared.get()); + rpc_service_->SetDevCtx(&ctx); + rpc_service_->SetScope(&scope); + rpc_service_->SetExecutor(&exe); + rpc_service_->RunSyncUpdate(); } TEST(PREFETCH, CPU) { // start up a server instance backend - // TODO(Yancey1989): Need to start a server with optimize blocks and - // prefetch blocks. std::thread server_thread(StartServer, "127.0.0.1:8889"); + sleep(2); framework::Scope scope; platform::CPUPlace place; platform::CPUDeviceContext ctx(place); // create var on local scope - std::string in_var_name("in"); + int64_t rows_numel = 5; + InitTensorsOnClient(&scope, &place, rows_numel); + std::string in_var_name("ids"); std::string out_var_name("out"); - auto* in_var = scope.Var(in_var_name); - auto* in_tensor = in_var->GetMutable(); - in_tensor->Resize({10, 10}); - VLOG(3) << "before mutable_data"; - in_tensor->mutable_data(place); - scope.Var(out_var_name); - - VLOG(3) << "before fetch"; detail::RPCClient client; client.AsyncPrefetchVariable("127.0.0.1:8889", ctx, scope, in_var_name, out_var_name); client.Wait(); + auto var = scope.Var(out_var_name); + auto value = var->GetMutable()->value(); + auto ptr = value.mutable_data(place); + rpc_service_->ShutDown(); server_thread.join(); rpc_service_.reset(nullptr); + + for (int64_t i = 0; i < rows_numel; ++i) { + EXPECT_EQ(ptr[0 + i * value.dims()[1]], static_cast(i * 2)); + } } diff --git a/paddle/fluid/operators/detail/send_recv.proto b/paddle/fluid/operators/detail/send_recv.proto index fc12e82a7e6bd10262092d1ca367980df64e91c2..02bb2b9cebb87b83aa1cbef0c644f969b4d17284 100644 --- a/paddle/fluid/operators/detail/send_recv.proto +++ b/paddle/fluid/operators/detail/send_recv.proto @@ -21,7 +21,7 @@ service SendRecvService { rpc SendVariable(VariableMessage) returns (VoidMessage) {} // Argument VariableMessage for GetVariable should only contain varname. rpc GetVariable(VariableMessage) returns (VariableMessage) {} - // Prefetch variable by Ids + // pre-fetch variable by given variable name and Ids rpc PrefetchVariable(VariableMessage) returns (VariableMessage) {} } @@ -67,6 +67,8 @@ message VariableMessage { bytes serialized = 8; // selected_rows data bytes rows = 9; + // Look up table block execution output variable name. + string out_varname = 10; } message VoidMessage {} diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.cc b/paddle/fluid/operators/detail/sendrecvop_utils.cc index f8576d01b10f4c0fda4d12d371b2966739acfc21..16c612c45a37dd2ffd17f8d5f5946df30e9b3fe6 100644 --- a/paddle/fluid/operators/detail/sendrecvop_utils.cc +++ b/paddle/fluid/operators/detail/sendrecvop_utils.cc @@ -30,11 +30,9 @@ namespace detail { void SerializeToByteBuffer(const std::string& name, framework::Variable* var, const platform::DeviceContext& ctx, - ::grpc::ByteBuffer* msg) { + ::grpc::ByteBuffer* msg, + const std::string& out_name) { using VarMsg = sendrecv::VariableMessage; - sendrecv::VariableMessage request; - std::string header; - request.AppendToString(&header); // When using GPU, need to free the copied CPU buffer // when the ByteBuffer destroies // TODO(typhoonzero): add unref here, if we have dependent @@ -52,6 +50,9 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, e.WriteUint64(VarMsg::kTypeFieldNumber, 1); } + if (!out_name.empty()) { + e.WriteString(VarMsg::kOutVarnameFieldNumber, out_name); + } switch (framework::ToVarType(var->Type())) { case framework::proto::VarType_Type_LOD_TENSOR: { auto tensor = var->Get(); diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.h b/paddle/fluid/operators/detail/sendrecvop_utils.h index d7954440846b8db9a9add0110fb9a546a762774d..c72e1bd076f670458f3915072154847db6205092 100644 --- a/paddle/fluid/operators/detail/sendrecvop_utils.h +++ b/paddle/fluid/operators/detail/sendrecvop_utils.h @@ -46,7 +46,8 @@ typedef void (*DestroyCallback)(void*); void SerializeToByteBuffer(const std::string& name, framework::Variable* var, const platform::DeviceContext& ctx, - ::grpc::ByteBuffer* msg); + ::grpc::ByteBuffer* msg, + const std::string& out_varname = std::string()); void DeserializeFromByteBuffer(const ::grpc::ByteBuffer& msg, const platform::DeviceContext& ctx, diff --git a/paddle/fluid/operators/detail/variable_response.cc b/paddle/fluid/operators/detail/variable_response.cc index 78e1d274a92241b5f2093beb63acdc8c497dfb83..c9d7fd6d1581f6f4182e9e3e0d633c13a3c336a5 100644 --- a/paddle/fluid/operators/detail/variable_response.cc +++ b/paddle/fluid/operators/detail/variable_response.cc @@ -416,6 +416,20 @@ int VariableResponse::Parse(Source* source) { } break; } + case sendrecv::VariableMessage::kOutVarnameFieldNumber: { + uint32_t length; + if ((wt != WIRETYPE_LENGTH_DELIMITED) || !input.ReadVarint32(&length)) { + return tag; + } + + std::string temp; + if (!input.ReadString(&temp, length)) { + return tag; + } + + meta_.set_out_varname(temp); + break; + } default: { // Unknown tag, return unknown error. diff --git a/paddle/fluid/operators/detail/variable_response.h b/paddle/fluid/operators/detail/variable_response.h index 050b6b84010b4f3e95bc88e5bb738ff18b7fe423..93b0d3cfb4f7d7f336414361773f872d7b259482 100644 --- a/paddle/fluid/operators/detail/variable_response.h +++ b/paddle/fluid/operators/detail/variable_response.h @@ -55,6 +55,7 @@ class VariableResponse { int Parse(const ::grpc::ByteBuffer& byte_buffer); inline std::string Varname() { return meta_.varname(); } + inline std::string OutVarname() { return meta_.out_varname(); } // should call parse first. framework::Variable* GetVar() { return scope_->FindVar(meta_.varname()); } diff --git a/paddle/fluid/operators/lod_reset_op.h b/paddle/fluid/operators/lod_reset_op.h index 99f01c2a255ade81421c2bba95ff3d38ced6f87c..bd19d8908e35e51872d324ea5aa6bb02110d5a92 100644 --- a/paddle/fluid/operators/lod_reset_op.h +++ b/paddle/fluid/operators/lod_reset_op.h @@ -14,6 +14,8 @@ limitations under the License. */ #pragma once +#include +#include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" @@ -35,7 +37,7 @@ class LoDResetKernel : public framework::OpKernel { if (lod_t->lod().size() > 0) { auto y_lod = lod_t->lod(); auto last_level = y_lod[y_lod.size() - 1]; - PADDLE_ENFORCE_EQ(last_level.back(), in->dims()[0], + PADDLE_ENFORCE_EQ((int64_t)(last_level.back()), in->dims()[0], "Last value of `Y`'s last level LoD should be equal " "to the first dimension of `X`"); out->set_lod(y_lod); diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 6780b8cc6deca64e9eaefa0b40d309449e730c8c..917bdc64abf608b8ade70c47f76a8adffb32046a 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -42,12 +42,12 @@ ENDIF() # memcpy depends on device_context, here add deps individually for # avoiding cycle dependencies -cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator - system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}) +cc_library(device_context SRCS device_context.cc DEPS malloc + place eigen3 ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}) nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) -nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place device_context) +nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context) cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto ${GPU_CTX_DEPS}) cc_library(profiler SRCS profiler.cc DEPS device_context device_tracer) diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 787925d9f8800b49de5b8b642304605ef4087d1e..884289a7fda65f9713392ec459219b4c89271e73 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -2,13 +2,13 @@ if(WITH_PYTHON) if(WITH_AMD_GPU) hip_library(paddle_pybind SHARED SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc - DEPS pybind python backward proto_desc paddle_memory executor prune init profiler feed_fetch_method + DEPS pybind python backward proto_desc memory executor prune init profiler feed_fetch_method parallel_executor ${GLOB_OP_LIB}) else() cc_library(paddle_pybind SHARED SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc - DEPS pybind python backward proto_desc paddle_memory executor prune init profiler feed_fetch_method + DEPS pybind python backward proto_desc memory executor prune init profiler feed_fetch_method parallel_executor ${GLOB_OP_LIB}) if(NOT APPLE AND NOT ANDROID) diff --git a/paddle/testing/CMakeLists.txt b/paddle/testing/CMakeLists.txt index 77f84cd43bdf35ae6f54b0db2b5f720d24872878..a1f446817e0cbc1b4391398a82b0846d01bbec2c 100644 --- a/paddle/testing/CMakeLists.txt +++ b/paddle/testing/CMakeLists.txt @@ -6,6 +6,6 @@ if(WITH_TESTING) add_library(paddle_test_util STATIC TestUtil.cpp) add_dependencies(paddle_test_util paddle_proto ${external_project_dependencies}) if(NOT MOBILE_INFERENCE) - cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS init paddle_memory gtest gflags) + cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS init memory gtest gflags) endif() endif() diff --git a/python/paddle/fluid/debuger.py b/python/paddle/fluid/debuger.py index 7b4afa9bf65e1369329cd4648c1f5c4bd8fa8357..1c56064a1e8bdc5d975837cb5a75a40d557765ad 100644 --- a/python/paddle/fluid/debuger.py +++ b/python/paddle/fluid/debuger.py @@ -16,6 +16,7 @@ import sys import re from graphviz import GraphPreviewGenerator import proto.framework_pb2 as framework_pb2 +from google.protobuf import text_format _vartype2str_ = [ "UNK", @@ -100,7 +101,7 @@ def repr_var(vardesc): def pprint_program_codes(program_desc): reprs = [] - for block_idx in range(program_desc.num_blocks()): + for block_idx in range(program_desc.desc.num_blocks()): block_desc = program_desc.block(block_idx) block_repr = pprint_block_codes(block_desc) reprs.append(block_repr) @@ -127,7 +128,7 @@ def pprint_block_codes(block_desc, show_backward=False): if type(block_desc) is not framework_pb2.BlockDesc: block_desc = framework_pb2.BlockDesc.FromString( - block_desc.serialize_to_string()) + block_desc.desc.serialize_to_string()) var_reprs = [] op_reprs = [] for var in block_desc.vars: @@ -237,13 +238,13 @@ def draw_block_graphviz(block, highlights=None, path="./temp.dot"): # draw parameters and args vars = {} for var in desc.vars: - shape = [str(i) for i in var.lod_tensor.tensor.dims] - if not shape: - shape = ['null'] + # TODO(gongwb): format the var.type # create var if var.persistable: varn = graph.add_param( - var.name, var.type, shape, highlight=need_highlight(var.name)) + var.name, + str(var.type).replace("\n", "
", 1), + highlight=need_highlight(var.name)) else: varn = graph.add_arg(var.name, highlight=need_highlight(var.name)) vars[var.name] = varn @@ -268,4 +269,4 @@ def draw_block_graphviz(block, highlights=None, path="./temp.dot"): for var in op.outputs: add_op_link_var(opn, var, True) - graph(path, show=True) + graph(path, show=False) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 185f4f6651f816e5588ebed31123ae5f84f9ca3b..fbe4531f056a583d2b4d855d32fe8e04da94fa3c 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -965,6 +965,13 @@ class Block(object): if var.type == core.VarDesc.VarType.STEP_SCOPES: ret_var = self.create_var( name=var.name, persistable=var.persistable, type=var.type) + elif var.type == core.VarDesc.VarType.SELECTED_ROWS: + ret_var = self.create_var( + name=var.name, + shape=var.shape, + dtype=var.dtype, + type=var.type, + persistable=True) else: ret_var = self.create_var( name=var.name, diff --git a/python/paddle/fluid/graphviz.py b/python/paddle/fluid/graphviz.py index b8d21344fc8f65f4025f28a195dab2d371b30292..125b4efa9d476e561bd78d0365cd92bbf7e66605 100644 --- a/python/paddle/fluid/graphviz.py +++ b/python/paddle/fluid/graphviz.py @@ -83,7 +83,7 @@ class Graph(object): file = open(dot_path, 'w') file.write(self.__str__()) image_path = os.path.join( - os.path.dirname(__file__), dot_path[:-3] + "pdf") + os.path.dirname(dot_path), dot_path[:-3] + "pdf") cmd = ["dot", "-Tpdf", dot_path, "-o", image_path] subprocess.Popen( cmd, @@ -199,7 +199,7 @@ class GraphPreviewGenerator(object): else: self.graph.show(path) - def add_param(self, name, data_type, shape, highlight=False): + def add_param(self, name, data_type, highlight=False): label = '\n'.join([ '<', ' ', @@ -214,11 +214,6 @@ class GraphPreviewGenerator(object): str(data_type), ' ' ' ', - ' ', - ' ' - ' ', '
', - '[%s]' % 'x'.join(shape), - '
>', ]) return self.graph.node( diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index fb162f8b7315936824ad40aca0c99e4dd09f9734..c5b53902bca90ae2260a7cda43e6866f897233b3 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -535,9 +535,37 @@ class TestSwish(OpTest): #--------------------test MKLDNN-------------------- -class TestMKLDNNRelu(TestRelu): +class TestMKLDNNReluDim2(TestRelu): def setUp(self): - super(TestMKLDNNRelu, self).setUp() + super(TestMKLDNNReluDim2, self).setUp() + + self.attrs = {"use_mkldnn": True} + + +class TestMKLDNNTanhDim2(TestTanh): + def setUp(self): + super(TestMKLDNNTanhDim2, self).setUp() + + self.attrs = {"use_mkldnn": True} + + +class TestMKLDNNSqrtDim2(TestSqrt): + def setUp(self): + super(TestMKLDNNSqrtDim2, self).setUp() + + self.attrs = {"use_mkldnn": True} + + +class TestMKLDNNAbsDim2(TestAbs): + def setUp(self): + super(TestMKLDNNAbsDim2, self).setUp() + + self.attrs = {"use_mkldnn": True} + + +class TestMKLDNNReluDim4(TestRelu): + def setUp(self): + super(TestMKLDNNReluDim4, self).setUp() x = np.random.uniform(-1, 1, [2, 4, 3, 5]).astype("float32") # The same reason with TestAbs @@ -549,9 +577,9 @@ class TestMKLDNNRelu(TestRelu): self.attrs = {"use_mkldnn": True} -class TestMKLDNNTanh(TestTanh): +class TestMKLDNNTanhDim4(TestTanh): def setUp(self): - super(TestMKLDNNTanh, self).setUp() + super(TestMKLDNNTanhDim4, self).setUp() self.inputs = { 'X': np.random.uniform(0.1, 1, [2, 4, 3, 5]).astype("float32") @@ -560,9 +588,9 @@ class TestMKLDNNTanh(TestTanh): self.attrs = {"use_mkldnn": True} -class TestMKLDNNSqrt(TestSqrt): +class TestMKLDNNSqrtDim4(TestSqrt): def setUp(self): - super(TestMKLDNNSqrt, self).setUp() + super(TestMKLDNNSqrtDim4, self).setUp() self.inputs = { 'X': np.random.uniform(0.1, 1, [2, 4, 3, 5]).astype("float32") @@ -571,9 +599,9 @@ class TestMKLDNNSqrt(TestSqrt): self.attrs = {"use_mkldnn": True} -class TestMKLDNNAbs(TestAbs): +class TestMKLDNNAbsDim4(TestAbs): def setUp(self): - super(TestMKLDNNAbs, self).setUp() + super(TestMKLDNNAbsDim4, self).setUp() x = np.random.uniform(-1, 1, [2, 4, 3, 5]).astype("float32") # The same reason with TestAbs diff --git a/python/paddle/fluid/tests/unittests/test_debugger.py b/python/paddle/fluid/tests/unittests/test_debugger.py index 2b7bbf9218f9b8fd8f5b29ac3cbc2f9680f471eb..67b03f635b6f8a3003efabe5425325080d47f61c 100644 --- a/python/paddle/fluid/tests/unittests/test_debugger.py +++ b/python/paddle/fluid/tests/unittests/test_debugger.py @@ -51,7 +51,9 @@ class TestDebugger(unittest.TestCase): outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) - print(debuger.pprint_program_codes(p.desc)) + print(debuger.pprint_program_codes(p)) + + debuger.draw_block_graphviz(p.block(0), path="./test.dot") if __name__ == '__main__':