未验证 提交 dc5a0420 编写于 作者: L Leo Chen 提交者: GitHub

refine allocation cmake (#44241)

* build into one static library

* move memory/detail to memory/allocation

* fix bug

* fix profiler

* fix framework_proto

* fix deps

* fix inference compilation

* fix rocm compile

* follow comments

* fix buddy_allocator_test
上级 246ac976
......@@ -427,10 +427,8 @@ copy(
set(module "memory")
copy(
fluid_lib_dist
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/detail/*.h
${src_dir}/${module}/allocation/*.h
DSTS ${dst_dir}/${module} ${dst_dir}/${module}/detail
${dst_dir}/${module}/allocation)
SRCS ${src_dir}/${module}/allocation/*.h
DSTS ${dst_dir}/${module}/allocation)
set(module "platform")
set(platform_lib_deps profiler_proto errors)
......
add_subdirectory(detail)
add_subdirectory(allocation)
if(WITH_MKLDNN)
......@@ -10,7 +9,7 @@ endif()
cc_library(
malloc
SRCS malloc.cc
DEPS place enforce allocator_facade profiler ${MKLDNN_CTX_DEPS})
DEPS place enforce allocator profiler ${MKLDNN_CTX_DEPS})
cc_library(
memcpy
SRCS memcpy.cc
......
cc_library(
allocator
SRCS allocator.cc
DEPS place stats profiler)
cc_library(
cpu_allocator
SRCS cpu_allocator.cc
DEPS allocator)
cc_library(
locked_allocator
SRCS locked_allocator.cc
DEPS allocator)
cc_library(
buffered_allocator
SRCS buffered_allocator.cc
DEPS allocator)
cc_library(
best_fit_allocator
SRCS best_fit_allocator.cc
DEPS allocator)
cc_library(
naive_best_fit_allocator
SRCS naive_best_fit_allocator.cc
DEPS allocator buddy_allocator)
cc_test(
naive_best_fit_allocator_test
SRCS naive_best_fit_allocator_test.cc
DEPS naive_best_fit_allocator)
cc_test(
buffered_allocator_test
SRCS buffered_allocator_test.cc
DEPS locked_allocator buffered_allocator cpu_allocator best_fit_allocator)
include(ExternalProject)
set(ALLOCATOR_DEPS place stats profiler)
set(ALLOCATOR_SRCS
allocator.cc
cpu_allocator.cc
locked_allocator.cc
aligned_allocator.cc
buffered_allocator.cc
best_fit_allocator.cc
naive_best_fit_allocator.cc
allocator_strategy.cc
allocator_facade.cc
auto_growth_best_fit_allocator.cc
virtual_memory_auto_growth_best_fit_allocator.cc
retry_allocator.cc
memory_block.cc
memory_block_desc.cc
meta_cache.cc
buddy_allocator.cc
system_allocator.cc)
if(WITH_MKLDNN)
set(MKLDNN_CTX_DEPS mkldnn)
else()
set(MKLDNN_CTX_DEPS)
if(WITH_GPU OR WITH_ROCM)
list(
APPEND
ALLOCATOR_SRCS
cuda_allocator.cc
cuda_managed_allocator.cc
pinned_allocator.cc
stream_safe_cuda_allocator.cc
thread_local_allocator.cc)
list(APPEND ALLOCATOR_DEPS cuda_device_guard gpu_info dynload_cuda)
endif()
if(WITH_GPU)
nv_library(
cuda_allocator
SRCS cuda_allocator.cc
DEPS allocator cuda_device_guard stats)
nv_library(
cuda_managed_allocator
SRCS cuda_managed_allocator.cc
DEPS allocator cuda_device_guard gpu_info)
nv_library(
pinned_allocator
SRCS pinned_allocator.cc
DEPS allocator)
nv_library(
stream_safe_cuda_allocator
SRCS stream_safe_cuda_allocator.cc
DEPS allocator cuda_graph)
nv_library(
thread_local_allocator
SRCS thread_local_allocator.cc
DEPS allocator)
list(APPEND ALLOCATOR_DEPS cuda_graph)
endif()
cc_test(
thread_local_allocator_test
SRCS thread_local_allocator_test.cc
DEPS thread_local_allocator)
if(CUDA_VERSION GREATER_EQUAL 10.2)
nv_library(
cuda_virtual_mem_allocator
SRCS cuda_virtual_mem_allocator.cc
DEPS dynload_cuda)
if(CUDA_VERSION VERSION_GREATER_EQUAL 10.2)
list(APPEND ALLOCATOR_SRCS cuda_virtual_mem_allocator.cc)
endif()
if(NOT WIN32)
list(APPEND ALLOCATOR_SRCS mmap_allocator.cc)
if(WITH_GPU)
list(APPEND ALLOCATOR_SRCS cuda_ipc_allocator.cc)
endif()
endif()
if(WITH_ROCM)
hip_library(
cuda_allocator
SRCS cuda_allocator.cc
DEPS allocator cuda_device_guard stats)
hip_library(
cuda_managed_allocator
SRCS cuda_managed_allocator.cc
DEPS allocator cuda_device_guard gpu_info)
hip_library(
pinned_allocator
SRCS pinned_allocator.cc
DEPS allocator)
hip_library(
stream_safe_cuda_allocator
SRCS stream_safe_cuda_allocator.cc
DEPS allocator)
hip_library(
thread_local_allocator
SRCS thread_local_allocator.cc
DEPS allocator)
if(WITH_ASCEND_CL)
list(APPEND ALLOCATOR_SRCS npu_allocator.cc npu_pinned_allocator.cc)
list(APPEND ALLOCATOR_DEPS npu_info)
endif()
cc_test(
thread_local_allocator_test
SRCS thread_local_allocator_test.cc
DEPS thread_local_allocator)
if(WITH_CUSTOM_DEVICE)
list(APPEND ALLOCATOR_SRCS custom_allocator.cc)
list(APPEND ALLOCATOR_DEPS device_manager)
endif()
if(WITH_ASCEND_CL)
cc_library(
npu_allocator
SRCS npu_allocator.cc
DEPS allocator npu_info)
cc_library(
npu_pinned_allocator
SRCS npu_pinned_allocator.cc
DEPS allocator npu_info)
if(WITH_XPU)
list(APPEND ALLOCATOR_DEPS xpu_info)
endif()
cc_library(
retry_allocator
SRCS retry_allocator.cc
if(WITH_IPU)
list(APPEND ALLOCATOR_DEPS ipu_info)
endif()
add_library(allocator "${ALLOCATOR_SRCS}")
target_link_libraries(allocator ${ALLOCATOR_DEPS})
# note: why only add dependency for framework_proto.
# Because it is needed to generate framework.pb.h used in some header files.
add_dependencies(allocator framework_proto)
set_property(GLOBAL PROPERTY FLUID_MODULES allocator)
cc_test(
naive_best_fit_allocator_test
SRCS naive_best_fit_allocator_test.cc
DEPS allocator)
cc_test(
buffered_allocator_test
SRCS buffered_allocator_test.cc
DEPS allocator)
if(WITH_GPU OR WITH_ROCM)
set(AllocatorFacadeDeps
gpu_info
cuda_allocator
cuda_managed_allocator
pinned_allocator
cuda_device_guard
thread_local_allocator
stream_safe_cuda_allocator
device_context)
if(CUDA_VERSION GREATER_EQUAL 10.2)
list(APPEND AllocatorFacadeDeps cuda_virtual_mem_allocator)
endif()
elseif(WITH_XPU)
set(AllocatorFacadeDeps xpu_info)
elseif(WITH_IPU)
set(AllocatorFacadeDeps ipu_info)
elseif(WITH_ASCEND)
set(AllocatorFacadeDeps ascend_npu_info)
else()
set(AllocatorFacadeDeps)
if(WITH_GPU)
nv_test(
thread_local_allocator_test
SRCS thread_local_allocator_test.cc
DEPS allocator)
endif()
if(WITH_CUSTOM_DEVICE)
cc_library(
custom_allocator
SRCS custom_allocator.cc
DEPS allocator device_manager)
set(AllocatorFacadeDeps ${AllocatorFacadeDeps} custom_allocator)
if(WITH_ROCM)
hip_test(
thread_local_allocator_test
SRCS thread_local_allocator_test.cc
DEPS allocator)
endif()
if(WITH_GPU)
nv_test(
best_fit_allocator_test
SRCS best_fit_allocator_test.cc best_fit_allocator_test.cu
DEPS best_fit_allocator locked_allocator cpu_allocator cuda_allocator
device_context memcpy)
DEPS allocator memcpy)
elseif(WITH_ROCM)
hip_test(
best_fit_allocator_test
SRCS best_fit_allocator_test.cc best_fit_allocator_test.cu
DEPS best_fit_allocator locked_allocator cpu_allocator cuda_allocator
device_context memcpy)
DEPS allocator memcpy)
else()
cc_test(
best_fit_allocator_test
SRCS best_fit_allocator_test.cc
DEPS best_fit_allocator locked_allocator cpu_allocator)
endif()
list(
APPEND
AllocatorFacadeDeps
cpu_allocator
locked_allocator
aligned_allocator
retry_allocator
buffered_allocator
naive_best_fit_allocator
auto_growth_best_fit_allocator
virtual_memory_auto_growth_best_fit_allocator
best_fit_allocator)
if(WITH_ASCEND_CL)
list(APPEND AllocatorFacadeDeps npu_pinned_allocator)
DEPS allocator)
endif()
cc_library(
aligned_allocator
SRCS aligned_allocator.cc
DEPS allocator)
cc_test(
test_aligned_allocator
SRCS test_aligned_allocator.cc
DEPS aligned_allocator)
cc_library(
allocator_strategy
SRCS allocator_strategy.cc
DEPS gflags ${AllocatorFacadeDeps})
cc_library(
allocator_facade
SRCS allocator_facade.cc
DEPS allocator_strategy stats)
if(WITH_GPU)
target_link_libraries(allocator_facade cuda_graph)
endif()
DEPS allocator)
cc_test(
retry_allocator_test
SRCS retry_allocator_test.cc
DEPS retry_allocator locked_allocator cpu_allocator)
if(WITH_TESTING)
if((WITH_GPU OR WITH_ROCM) AND TARGET retry_allocator_test)
target_link_libraries(retry_allocator_test cuda_allocator)
endif()
if(TEST retry_allocator_test)
set_tests_properties(retry_allocator_test PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE")
endif()
DEPS allocator)
if(TEST retry_allocator_test)
set_tests_properties(retry_allocator_test PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE")
endif()
cc_test(
allocator_facade_abs_flags_test
SRCS allocator_facade_abs_flags_test.cc
DEPS allocator_facade)
DEPS allocator)
cc_test(
allocator_facade_frac_flags_test
SRCS allocator_facade_frac_flags_test.cc
DEPS allocator_facade)
DEPS allocator)
cc_library(
auto_growth_best_fit_allocator
SRCS auto_growth_best_fit_allocator.cc
DEPS allocator aligned_allocator flags)
cc_test(
auto_growth_best_fit_allocator_facade_test
SRCS auto_growth_best_fit_allocator_facade_test.cc
DEPS cpu_allocator auto_growth_best_fit_allocator)
DEPS allocator)
cc_test(
auto_growth_best_fit_allocator_test
SRCS auto_growth_best_fit_allocator_test.cc
DEPS auto_growth_best_fit_allocator)
cc_library(
virtual_memory_auto_growth_best_fit_allocator
SRCS virtual_memory_auto_growth_best_fit_allocator.cc
DEPS allocator aligned_allocator)
DEPS allocator)
if(NOT WIN32)
cc_library(
mmap_allocator
SRCS mmap_allocator.cc
DEPS allocator)
cc_test(
mmap_allocator_test
SRCS mmap_allocator_test.cc
DEPS mmap_allocator allocator)
if(WITH_GPU)
cc_library(
cuda_ipc_allocator
SRCS cuda_ipc_allocator.cc
DEPS allocator)
DEPS allocator)
endif()
cc_test(
system_allocator_test
SRCS system_allocator_test.cc
DEPS allocator)
cc_test(
buddy_allocator_test
SRCS buddy_allocator_test.cc
DEPS allocator)
if(WITH_TESTING)
if(TEST buddy_allocator_test)
set_tests_properties(buddy_allocator_test PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE")
endif()
# TODO(zhiqiu): why not win32? because wget is not found on windows
if(NOT WIN32)
add_custom_target(
download_data
COMMAND wget -nc
https://paddle-ci.cdn.bcebos.com/buddy_allocator_test_data.tar
COMMAND tar -xf buddy_allocator_test_data.tar)
add_dependencies(buddy_allocator_test download_data)
endif()
endif()
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/memory/allocation/buddy_allocator.h"
#include <algorithm>
......
......@@ -25,8 +25,8 @@ limitations under the License. */
#include <utility>
#include <vector>
#include "paddle/fluid/memory/detail/memory_block.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/memory/allocation/memory_block.h"
#include "paddle/fluid/memory/allocation/system_allocator.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device/npu/npu_info.h"
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/memory/allocation/buddy_allocator.h"
#include <memory>
......@@ -330,7 +330,7 @@ TEST(BuddyAllocator, SpeedAna) {
std::vector<bool> vec_free_flag;
std::string line;
int size, id;
int size = 0, id = 0;
while (in_file >> size >> id) {
vec_size.push_back(size);
vec_pos.push_back(id);
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/detail/memory_block.h"
#include "paddle/fluid/memory/allocation/memory_block.h"
#include "paddle/fluid/platform/enforce.h"
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#include <cstddef>
#include <functional>
#include "paddle/fluid/memory/detail/memory_block.h"
#include "paddle/fluid/memory/allocation/memory_block.h"
namespace paddle {
namespace memory {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "glog/logging.h"
#include "paddle/fluid/memory/detail/memory_block.h"
#include "paddle/fluid/memory/allocation/memory_block.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
......
......@@ -18,8 +18,8 @@
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/memory/allocation/buddy_allocator.h"
#include "paddle/fluid/memory/allocation/system_allocator.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/enforce.h"
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/memory/allocation/system_allocator.h"
#include "paddle/fluid/memory/stats.h"
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/memory/allocation/system_allocator.h"
#include <memory>
......
......@@ -18,8 +18,8 @@
#include <vector>
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/memory/allocation/buddy_allocator.h"
#include "paddle/fluid/memory/allocation/system_allocator.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
namespace paddle {
......
include(ExternalProject)
cc_library(
memory_block
SRCS memory_block.cc memory_block_desc.cc meta_cache.cc
DEPS place)
if(WITH_GPU)
nv_library(
system_allocator
SRCS system_allocator.cc
DEPS gflags cpu_info gpu_info place)
elseif(WITH_ROCM)
hip_library(
system_allocator
SRCS system_allocator.cc
DEPS gflags cpu_info gpu_info place)
elseif(${WITH_ASCEND_CL})
cc_library(
system_allocator
SRCS system_allocator.cc
DEPS gflags cpu_info npu_info place)
elseif(WITH_MLU)
cc_library(
system_allocator
SRCS system_allocator.cc
DEPS gflags cpu_info mlu_info place)
else()
cc_library(
system_allocator
SRCS system_allocator.cc
DEPS gflags cpu_info place)
endif()
cc_test(
system_allocator_test
SRCS system_allocator_test.cc
DEPS system_allocator)
cc_library(
buddy_allocator
SRCS buddy_allocator.cc
DEPS memory_block system_allocator glog)
cc_test(
buddy_allocator_test
SRCS buddy_allocator_test.cc
DEPS buddy_allocator)
function(file_download_and_uncompress URL NAME)
message(STATUS "Download dependence[${NAME}] from ${URL}")
set(${NAME}_INCLUDE_DIR
${THIRD_PARTY_PATH}/${NAME}
PARENT_SCOPE)
ExternalProject_Add(
extern_download_${NAME}
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${THIRD_PARTY_PATH}/${NAME}
URL ${URL}
DOWNLOAD_DIR ${THIRD_PARTY_PATH}/${NAME}
SOURCE_DIR ${THIRD_PARTY_PATH}/${NAME}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND "")
set(third_party_deps
${third_party_deps} extern_download_${NAME}
PARENT_SCOPE)
endfunction()
if(WITH_TESTING)
if(TEST buddy_allocator_test)
set_tests_properties(buddy_allocator_test PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE")
endif()
set(URL "https://paddle-ci.cdn.bcebos.com/buddy_allocator_test_data.tar")
file_download_and_uncompress(URL "buddy_allocator")
endif()
......@@ -15,7 +15,7 @@ limitations under the License. */
#include <unordered_map>
#include "paddle/fluid/memory/detail/memory_block.h"
#include "paddle/fluid/memory/allocation/memory_block.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/platform/cpu_info.h"
......
......@@ -88,12 +88,12 @@ if(WITH_GPU)
nv_library(
cuda_graph_with_memory_pool
SRCS cuda_graph_with_memory_pool.cc
DEPS device_context allocator_facade cuda_graph)
DEPS device_context allocator cuda_graph)
else()
cc_library(
cuda_graph_with_memory_pool
SRCS cuda_graph_with_memory_pool.cc
DEPS device_context allocator_facade)
DEPS device_context allocator)
endif()
cc_library(
......
nv_library(
cuda_graph
SRCS cuda_graph.cc
DEPS enforce allocator_facade)
DEPS enforce)
nv_library(
cuda_profiler
SRCS cuda_profiler.cc
......
......@@ -310,8 +310,10 @@ RecordOpInfoSupplement::RecordOpInfoSupplement(
std::map<const char *, std::map<uint64_t, std::vector<uint64_t>>>
RecordMemEvent::size_cache;
std::map<const char *, std::map<uint64_t, bool>>
RecordMemEvent::has_initialized;
RecordMemEvent::RecordMemEvent(const void *ptr,
const phi::Place &place,
size_t size,
......
......@@ -28,7 +28,6 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/event.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.pb.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/fluid/platform/profiler/mem_tracing.h"
#include "paddle/fluid/platform/profiler/supplement_tracing.h"
......@@ -39,6 +38,10 @@ limitations under the License. */
namespace paddle {
namespace platform {
namespace proto {
class Profile;
}
const int kEnableProfiler = 1;
const int kDisableProfiler = 2;
......
......@@ -84,10 +84,6 @@ endif()
if(NOT WIN32)
set(PYBIND_DEPS ${PYBIND_DEPS} data_loader)
set(PYBIND_DEPS ${PYBIND_DEPS} mmap_allocator)
if(WITH_GPU)
set(PYBIND_DEPS ${PYBIND_DEPS} cuda_ipc_allocator)
endif()
if(WITH_NCCL OR WITH_RCCL)
set(PYBIND_DEPS ${PYBIND_DEPS} nccl_context)
set(PYBIND_DEPS ${PYBIND_DEPS} heter_ccl_context)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册