CMakeLists.txt 8.4 KB
Newer Older
P
peizhilin 已提交
1
proto_library(profiler_proto SRCS profiler.proto DEPS framework_proto simple_threadpool)
2
if(WITH_GPU)
3
  proto_library(external_error_proto SRCS external_error.proto)
4
endif(WITH_GPU)
5 6 7 8 9 10 11
if (WITH_PYTHON)
  py_proto_compile(profiler_py_proto SRCS profiler.proto)
  add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
  add_dependencies(profiler_py_proto profiler_py_proto_init)

  if (NOT WIN32)
    add_custom_command(TARGET profiler_py_proto POST_BUILD
12 13
        COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler
        COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler
X
Xin Pan 已提交
14 15
        COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler."
        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
16 17 18
  else(NOT WIN32)
    string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler/")
    add_custom_command(TARGET profiler_py_proto POST_BUILD
W
wopeizl 已提交
19 20 21 22
        COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler
        COMMAND copy /Y *.py ${proto_dstpath}
        COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler."
        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
23 24
  endif(NOT WIN32)
endif()
25

Z
Zeng Jinle 已提交
26
cc_library(flags SRCS flags.cc DEPS gflags boost)
27
cc_library(denormal SRCS denormal.cc DEPS)
28

29 30
cc_test(errors_test SRCS errors_test.cc DEPS errors enforce)

31
set(enforce_deps flags errors boost flags pten_enforce)
32
if(WITH_GPU)
33
  set(enforce_deps ${enforce_deps} external_error_proto)
34
endif()
F
fwenguang 已提交
35

36
cc_library(enforce INTERFACE SRCS enforce.cc DEPS ${enforce_deps})
H
hutuxian 已提交
37
cc_library(monitor SRCS monitor.cc)
Q
Qiao Longfei 已提交
38 39
cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece enforce)

T
tensor-tang 已提交
40 41 42 43 44
set(CPU_INFO_DEPS gflags glog enforce)
IF(WITH_XBYAK)
    list(APPEND CPU_INFO_DEPS xbyak)
ENDIF()
cc_library(cpu_info SRCS cpu_info.cc DEPS ${CPU_INFO_DEPS})
L
liaogang 已提交
45
cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info)
L
liutiexing 已提交
46
cc_library(os_info SRCS os_info.cc DEPS enforce)
L
liutiexing 已提交
47
cc_test(os_info_test SRCS os_info_test.cc DEPS os_info)
L
liaogang 已提交
48

49
IF(WITH_GPU)
50 51 52
    nv_library(cuda_graph_with_memory_pool SRCS cuda_graph_with_memory_pool.cc DEPS device_context allocator_facade cuda_graph)
ELSE()
    cc_library(cuda_graph_with_memory_pool SRCS cuda_graph_with_memory_pool.cc DEPS device_context allocator_facade)
53
ENDIF()
54

55
cc_library(place SRCS place.cc DEPS enforce boost pten_place)
L
liaogang 已提交
56
cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
L
liaogang 已提交
57

58 59 60 61 62 63
IF(WITH_MKLDNN)
    set(MKLDNN_CTX_DEPS mkldnn)
ELSE()
    set(MKLDNN_CTX_DEPS)
ENDIF()

64
add_subdirectory(device)
L
liaogang 已提交
65
add_subdirectory(dynload)
66
add_subdirectory(stream)
L
liaogang 已提交
67

T
tensor-tang 已提交
68 69 70
cc_library(cpu_helper SRCS cpu_helper.cc DEPS cblas enforce)
cc_test(cpu_helper_test SRCS cpu_helper_test.cc DEPS cpu_helper)

71
set(dgc_deps "")
G
gongweibao 已提交
72 73 74 75
IF(WITH_DGC)
    set(dgc_deps dgc)
ENDIF()

76
IF(WITH_GPU OR WITH_ROCM)
77
    set(GPU_CTX_DEPS dynload_cuda dynamic_loader cuda_stream)
Q
qijun 已提交
78 79
ENDIF()

J
jianghaicheng 已提交
80
IF(WITH_IPU)
A
Allen Guo 已提交
81
    set(IPU_CTX_DEPS ipu_info)
J
jianghaicheng 已提交
82 83 84 85
ELSE()
    set(IPU_CTX_DEPS)
ENDIF(WITH_IPU)

86 87 88 89
IF(WITH_ASCEND_CL)
    set(NPU_CTX_DEPS npu_stream npu_info)
ENDIF()

F
fwenguang 已提交
90 91 92 93 94
IF(WITH_MLU)
    set(MLU_CTX_DEPS mlu_device_context)
ENDIF()

IF(WITH_ASCEND_CL OR WITH_MLU)
95
cc_library(stream_callback_manager SRCS stream_callback_manager.cc DEPS simple_threadpool enforce)
96 97
ENDIF()

S
sneaxiy 已提交
98
IF(WITH_GPU)
99
    nv_library(stream_callback_manager SRCS stream_callback_manager.cc DEPS simple_threadpool enforce)
100 101 102 103 104 105
ENDIF()
IF(WITH_ROCM)
    hip_library(stream_callback_manager SRCS stream_callback_manager.cc DEPS simple_threadpool enforce)
ENDIF()

IF(WITH_GPU OR WITH_ROCM)
S
sneaxiy 已提交
106
  set(STREAM_CALLBACK_DEPS stream_callback_manager)
107 108
ELSEIF(WITH_ASCEND_CL)
  set(STREAM_CALLBACK_DEPS stream_callback_manager)
S
sneaxiy 已提交
109 110 111 112
ELSE()
  set(STREAM_CALLBACK_DEPS)
ENDIF()

113 114 115 116
if(WITH_GLOO)
    cc_library(gloo_context SRCS gloo_context.cc DEPS framework_proto gloo_wrapper enforce)
endif()

117 118
cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc DEPS boost)

119 120 121
# seperate init from device_context to avoid cycle dependencies
cc_library(init SRCS init.cc DEPS device_context custom_kernel)

122
# memcpy depends on device_context, here add deps individually for
Q
qijun 已提交
123
# avoiding cycle dependencies
124
cc_library(device_context SRCS device_context.cc DEPS simple_threadpool malloc xxhash ${STREAM_CALLBACK_DEPS}
125
    place pten_place eigen3 stringpiece cpu_helper cpu_info framework_proto ${IPU_CTX_DEPS} ${GPU_CTX_DEPS} ${NPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}
W
Wilber 已提交
126
    ${dgc_deps} dlpack cudnn_workspace_helper ${XPU_CTX_DEPS} ${MLU_CTX_DEPS} eigen3 cpu_context generator)
W
Wilber 已提交
127 128 129
if(WITH_XPU)
  target_link_libraries(device_context xpu_context)
endif()
130

131 132 133 134
cc_library(collective_helper SRCS collective_helper.cc gen_comm_id_helper.cc DEPS framework_proto device_context enforce)
if(WITH_ASCEND_CL)
    target_link_libraries(collective_helper npu_collective_helper)
endif()
135

136 137 138 139
if(WITH_CNCL)
    target_link_libraries(collective_helper mlu_collective_helper)
endif()

140
if(WITH_GPU OR WITH_ROCM)
W
Wilber 已提交
141
    target_link_libraries(device_context gpu_info gpu_context pten_gpu_info)
142
    target_link_libraries(device_context gpu_resource_pool)
143 144
endif()

145 146 147 148
if(WITH_ASCEND_CL)
    target_link_libraries(device_context npu_resource_pool)
endif()

149 150 151 152
if(WITH_CUSTOM_DEVICE)
    target_link_libraries(device_context custom_context)
endif()

153 154
cc_test(init_test SRCS init_test.cc DEPS device_context)

155 156 157 158
# Manage all device event library
set(DEVICE_EVENT_LIBS)
cc_library(device_event_base SRCS device_event_base.cc DEPS place enforce device_context op_registry)
set(DEVICE_EVENT_LIBS  device_event_base CACHE INTERNAL "device event libs")
159 160


161
if(WITH_GPU)
162 163 164 165
  nv_library(device_event_gpu SRCS device_event_gpu.cc DEPS device_event_base)
  set(DEVICE_EVENT_LIBS  device_event_gpu CACHE INTERNAL "device event libs")
  nv_test(device_event_test SRCS device_event_test.cc DEPS device_event_gpu)

166 167 168 169 170
  nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info)
  nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context)
endif()

if(WITH_ROCM)
171 172 173 174
  hip_library(device_event_gpu SRCS device_event_gpu.cc DEPS device_event_base)
  set(DEVICE_EVENT_LIBS  device_event_gpu CACHE INTERNAL "device event libs")
  hip_test(device_event_test SRCS device_event_test.cc DEPS device_event_gpu)

175 176 177
  hip_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info)
  hip_test(transform_test SRCS transform_test.cu DEPS memory place device_context)
endif()
D
dangqingqing 已提交
178

179
cc_library(timer SRCS timer.cc)
180
cc_test(timer_test SRCS timer_test.cc DEPS timer)
181

D
dongdaxiang 已提交
182
cc_library(lodtensor_printer SRCS lodtensor_printer.cc DEPS ddim place tensor scope lod_tensor variable_helper framework_proto)
D
dongdaxiang 已提交
183
cc_test(lodtensor_printer_test SRCS lodtensor_printer_test.cc DEPS lodtensor_printer)
D
dongdaxiang 已提交
184

L
liutiexing 已提交
185 186
add_subdirectory(profiler)

187
cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS})
188
if(WITH_GPU)
189
  nv_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce dynload_cuda)
190
  nv_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info gpu_info place)
191
elseif(WITH_ROCM)
192
  hip_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce)
193
  hip_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info gpu_info place)
194
else()
195
  cc_library(profiler SRCS profiler.cc DEPS os_info device_tracer enforce)
196
  cc_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info place)
197
endif()
198

T
Tao Luo 已提交
199
cc_test(profiler_test SRCS profiler_test.cc DEPS profiler)
200
cc_test(float16_test SRCS float16_test.cc DEPS lod_tensor)
201
cc_test(bfloat16_test SRCS bfloat16_test.cc DEPS lod_tensor)
202
cc_test(complex_test SRCS complex_test.cc DEPS lod_tensor)
203

204 205
IF(WITH_GPU)
  nv_test(float16_gpu_test SRCS float16_test.cu DEPS lod_tensor)
206
  nv_test(bfloat16_gpu_test SRCS bfloat16_test.cu DEPS lod_tensor)
207
  nv_test(complex_gpu_test SRCS complex_test.cu DEPS lod_tensor)
208 209 210
  nv_test(test_limit_gpu_memory SRCS test_limit_gpu_memory.cu DEPS gpu_info flags)
  nv_library(cuda_device_guard SRCS cuda_device_guard.cc DEPS gpu_info)
ENDIF()
211

212 213 214 215 216
IF(WITH_ROCM)
  hip_test(float16_gpu_test SRCS float16_test.cu DEPS lod_tensor)
  hip_test(test_limit_gpu_memory SRCS test_limit_gpu_memory.cu DEPS gpu_info flags)
  hip_library(cuda_device_guard SRCS cuda_device_guard.cc DEPS gpu_info)
ENDIF()
217

218 219
if(NOT APPLE AND NOT WIN32)
  cc_library(device_code SRCS device_code.cc DEPS device_context)
220
  if(WITH_GPU OR WITH_ROCM)
221 222
    cc_test(device_code_test SRCS device_code_test.cc DEPS device_code lod_tensor)
  endif()
223
endif()