CMakeLists.txt 10.2 KB
Newer Older
1 2 3 4 5 6 7 8
cc_library(
  var_handle
  SRCS var_handle.cc
  DEPS place framework_proto node)
cc_library(
  op_handle_base
  SRCS op_handle_base.cc
  DEPS var_handle device_context lod_tensor)
9

10 11 12
cc_library(
  scale_loss_grad_op_handle
  SRCS scale_loss_grad_op_handle.cc
13
  DEPS op_handle_base scope lod_tensor phi memory)
14 15 16
cc_library(
  fetch_op_handle
  SRCS fetch_op_handle.cc
17
  DEPS op_handle_base scope lod_tensor phi memory)
18 19 20
cc_library(
  fetch_async_op_handle
  SRCS fetch_async_op_handle.cc
21
  DEPS op_handle_base scope lod_tensor phi memory)
22

23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
cc_library(
  share_tensor_buffer_functor
  SRCS share_tensor_buffer_functor.cc
  DEPS framework_proto scope place operator op_registry)
cc_library(
  computation_op_handle
  SRCS computation_op_handle.cc
  DEPS framework_proto scope place operator op_registry)
cc_library(
  share_tensor_buffer_op_handle
  SRCS share_tensor_buffer_op_handle.cc
  DEPS op_handle_base scope computation_op_handle share_tensor_buffer_functor)
cc_library(
  rpc_op_handle
  SRCS rpc_op_handle.cc
  DEPS framework_proto scope place operator op_registry)
cc_library(
  fetch_barrier_op_handle
  SRCS fetch_barrier_op_handle.cc
  DEPS framework_proto scope place operator op_registry)
cc_library(
  multi_devices_helper
  SRCS multi_devices_helper.cc
  DEPS graph graph_helper)
Z
Zeng Jinle 已提交
47

48 49 50 51
cc_library(
  variable_visitor
  SRCS variable_visitor.cc
  DEPS lod_tensor selected_rows_utils)
C
chengduoZH 已提交
52

T
tangwei12 已提交
53
if(WITH_PSCORE)
54 55 56
  set(DISTRIBUTE_COMPILE_FLAGS
      "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
  )
57 58 59
  if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
    set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new")
  endif()
60 61 62 63 64 65 66 67
  set_source_files_properties(
    reduce_op_handle.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
  set_source_files_properties(
    threaded_ssa_graph_executor.cc PROPERTIES COMPILE_FLAGS
                                              ${DISTRIBUTE_COMPILE_FLAGS})
  set_source_files_properties(
    async_ssa_graph_executor.cc PROPERTIES COMPILE_FLAGS
                                           ${DISTRIBUTE_COMPILE_FLAGS})
68 69
endif()

Y
Yu Yang 已提交
70
if(WITH_GPU)
71 72 73 74 75 76 77 78 79 80
  nv_library(
    nan_inf_utils
    SRCS nan_inf_utils_detail.cc nan_inf_utils_detail.cu
    DEPS framework_proto scope place)
  nv_library(
    all_reduce_op_handle
    SRCS all_reduce_op_handle.cc
    DEPS op_handle_base
         scope
         lod_tensor
81
         phi
82 83 84 85 86 87 88 89 90
         memory
         dynload_cuda
         variable_visitor)
  nv_library(
    fused_all_reduce_op_handle
    SRCS fused_all_reduce_op_handle.cc
    DEPS op_handle_base
         scope
         lod_tensor
91
         phi
92 93 94
         memory
         dynload_cuda
         variable_visitor
95
         place)
96 97 98 99 100 101
  nv_library(
    grad_merge_all_reduce_op_handle
    SRCS grad_merge_all_reduce_op_handle.cc
    DEPS op_handle_base
         scope
         lod_tensor
102
         phi
103 104 105 106 107 108 109 110 111 112 113 114 115 116
         memory
         dynload_cuda
         variable_visitor
         place
         all_reduce_op_handle
         fused_all_reduce_op_handle)

  if(WITH_DGC)
    nv_library(
      sparse_all_reduce_op_handle
      SRCS sparse_all_reduce_op_handle.cc
      DEPS op_handle_base
           scope
           lod_tensor
117
           phi
118 119 120 121 122 123 124 125 126 127 128
           memory
           dynload_cuda
           variable_visitor
           dgc
           all_reduce_op_handle)
  endif()

  if(WITH_DISTRIBUTE)
    nv_library(
      reduce_op_handle
      SRCS reduce_op_handle.cc
129
      DEPS op_handle_base variable_visitor scope phi dynload_cuda)
130 131 132 133
  else()
    nv_library(
      reduce_op_handle
      SRCS reduce_op_handle.cc
134
      DEPS op_handle_base variable_visitor scope phi dynload_cuda)
135 136 137 138
  endif()
  nv_library(
    broadcast_op_handle
    SRCS broadcast_op_handle.cc
139
    DEPS op_handle_base scope phi memory variable_visitor dynload_cuda)
140 141 142 143
  nv_library(
    fused_broadcast_op_handle
    SRCS fused_broadcast_op_handle.cc
    DEPS broadcast_op_handle)
144
elseif(WITH_ROCM)
145 146 147 148 149 150 151 152 153 154
  hip_library(
    nan_inf_utils
    SRCS nan_inf_utils_detail.cc nan_inf_utils_detail.cu
    DEPS framework_proto scope place)
  hip_library(
    all_reduce_op_handle
    SRCS all_reduce_op_handle.cc
    DEPS op_handle_base
         scope
         lod_tensor
155
         phi
156 157 158 159 160 161 162 163 164
         memory
         dynload_cuda
         variable_visitor)
  hip_library(
    fused_all_reduce_op_handle
    SRCS fused_all_reduce_op_handle.cc
    DEPS op_handle_base
         scope
         lod_tensor
165
         phi
166 167 168
         memory
         dynload_cuda
         variable_visitor
169
         place)
170 171 172 173 174 175
  hip_library(
    grad_merge_all_reduce_op_handle
    SRCS grad_merge_all_reduce_op_handle.cc
    DEPS op_handle_base
         scope
         lod_tensor
176
         phi
177 178 179 180 181 182 183 184 185 186 187
         memory
         dynload_cuda
         variable_visitor
         place
         all_reduce_op_handle
         fused_all_reduce_op_handle)

  if(WITH_DISTRIBUTE)
    hip_library(
      reduce_op_handle
      SRCS reduce_op_handle.cc
188
      DEPS op_handle_base variable_visitor scope phi dynload_cuda)
189 190 191 192
  else()
    hip_library(
      reduce_op_handle
      SRCS reduce_op_handle.cc
193
      DEPS op_handle_base variable_visitor scope phi dynload_cuda)
194 195 196 197
  endif()
  hip_library(
    broadcast_op_handle
    SRCS broadcast_op_handle.cc
198
    DEPS op_handle_base scope phi memory variable_visitor dynload_cuda)
199 200 201 202
  hip_library(
    fused_broadcast_op_handle
    SRCS fused_broadcast_op_handle.cc
    DEPS broadcast_op_handle)
Y
Yu Yang 已提交
203
else()
204 205 206 207
  cc_library(
    nan_inf_utils
    SRCS nan_inf_utils_detail.cc
    DEPS framework_proto scope place)
208 209 210
  cc_library(
    all_reduce_op_handle
    SRCS all_reduce_op_handle.cc
211
    DEPS op_handle_base scope lod_tensor phi memory variable_visitor)
212 213 214 215 216 217
  cc_library(
    fused_all_reduce_op_handle
    SRCS fused_all_reduce_op_handle.cc
    DEPS op_handle_base
         scope
         lod_tensor
218
         phi
219 220
         memory
         variable_visitor
221
         place)
222 223 224 225 226 227
  cc_library(
    grad_merge_all_reduce_op_handle
    SRCS grad_merge_all_reduce_op_handle.cc
    DEPS op_handle_base
         scope
         lod_tensor
228
         phi
229 230 231 232 233 234 235 236 237
         memory
         variable_visitor
         place
         all_reduce_op_handle
         fused_all_reduce_op_handle)
  if(WITH_DISTRIBUTE)
    cc_library(
      reduce_op_handle
      SRCS reduce_op_handle.cc
238
      DEPS op_handle_base variable_visitor scope phi)
239 240 241 242
  else()
    cc_library(
      reduce_op_handle
      SRCS reduce_op_handle.cc
243
      DEPS op_handle_base variable_visitor scope phi)
244 245 246 247
  endif()
  cc_library(
    broadcast_op_handle
    SRCS broadcast_op_handle.cc
248
    DEPS op_handle_base scope phi memory variable_visitor)
249 250 251 252
  cc_library(
    fused_broadcast_op_handle
    SRCS fused_broadcast_op_handle.cc
    DEPS broadcast_op_handle)
Y
Yu Yang 已提交
253
endif()
C
chengduoZH 已提交
254

255 256 257
cc_library(
  gather_op_handle
  SRCS gather_op_handle.cc
258
  DEPS op_handle_base scope phi memory variable_visitor)
C
chengduoZH 已提交
259

260 261 262 263
cc_library(
  eager_deletion_op_handle
  SRCS eager_deletion_op_handle.cc
  DEPS lod_tensor selected_rows_utils reference_count_pass_helper)
Y
yuyang18 已提交
264

265 266 267
set(SSA_GRAPH_EXECUTOR_DEPS
    graph
    framework_proto
268
    multi_devices_helper
269 270 271
    reference_count_pass
    eager_deletion_pass
    buffer_shared_inplace_op_pass
272
    buffer_shared_cross_op_memory_reuse_pass
273
    inplace_addto_op_pass
274
    set_reader_device_info_utils)
275 276 277 278
cc_library(
  ssa_graph_executor
  SRCS ssa_graph_executor.cc
  DEPS ${SSA_GRAPH_EXECUTOR_DEPS})
S
sneaxiy 已提交
279

280 281 282 283 284
cc_library(
  threaded_ssa_graph_executor
  SRCS threaded_ssa_graph_executor.cc
  DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool
       device_context)
285

286 287 288 289
cc_library(
  parallel_ssa_graph_executor
  SRCS parallel_ssa_graph_executor.cc
  DEPS threaded_ssa_graph_executor)
Y
Yancey1989 已提交
290

291
set(ASYNC_SSA_GRAPH_EXECUTOR_DEPS threaded_ssa_graph_executor)
T
tangwei12 已提交
292

293 294 295 296
cc_library(
  async_ssa_graph_executor
  SRCS async_ssa_graph_executor.cc
  DEPS ${ASYNC_SSA_GRAPH_EXECUTOR_DEPS})
Q
can run  
Qiao Longfei 已提交
297

298 299 300 301 302 303
cc_test(
  broadcast_op_test
  SRCS broadcast_op_handle_test.cc
  DEPS var_handle
       op_handle_base
       scope
304
       phi
305 306 307
       memory
       device_context
       broadcast_op_handle)
308
cc_test_old(
309
  gather_op_test
310 311 312 313 314 315
  SRCS
  gather_op_handle_test.cc
  DEPS
  var_handle
  op_handle_base
  scope
316
  phi
317 318 319
  memory
  device_context
  gather_op_handle)
320

321 322 323 324 325 326 327 328
cc_library(
  scope_buffered_monitor
  SRCS scope_buffered_monitor.cc
  DEPS scope profiler selected_rows_utils)
cc_library(
  scope_buffered_ssa_graph_executor
  SRCS scope_buffered_ssa_graph_executor.cc
  DEPS ssa_graph_executor scope_buffered_monitor)
329
#cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope phi memory
Y
yuyang18 已提交
330
#        device_context reduce_op_handle )
331 332 333
cc_library(
  bind_threaded_ssa_graph_executor
  SRCS bind_threaded_ssa_graph_executor.cc
334
  DEPS fetch_op_handle phi ssa_graph_executor scope simple_threadpool
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
       device_context)
cc_library(
  fast_threaded_ssa_graph_executor
  SRCS fast_threaded_ssa_graph_executor.cc
  DEPS fetch_async_op_handle ssa_graph_executor scope simple_threadpool
       device_context)
cc_test(
  fused_broadcast_op_test
  SRCS fused_broadcast_op_handle_test.cc
  DEPS fused_broadcast_op_handle)

cc_test(exception_holder_test SRCS exception_holder_test.cc)

set(IR_PASS_DEPS
    graph_viz_pass
    multi_devices_graph_pass
    multi_devices_graph_print_pass
    multi_devices_graph_check_pass
    fuse_elewise_add_act_pass
    fuse_bn_act_pass
    fuse_bn_add_act_pass
    multi_batch_merge_pass
357 358
    fuse_relu_depthwise_conv_pass
    lock_free_optimize_pass
359 360 361 362
    sequential_execution_pass
    all_reduce_deps_pass
    add_reader_dependency_pass
    modify_op_lock_and_record_event_pass
363 364 365 366 367 368 369 370 371 372
    coalesce_grad_tensor_pass
    fuse_all_reduce_op_pass
    backward_optimizer_op_deps_pass
    fuse_adam_op_pass
    fuse_sgd_op_pass
    fuse_momentum_op_pass
    sync_batch_norm_pass
    runtime_context_cache_pass
    graph_to_program_pass
    fix_op_run_order_pass
373
    fuse_gemm_epilogue_pass
374
    fused_attention_pass
Y
yuehuayingxueluo 已提交
375
    fuse_adamw_op_pass
376
    fused_feedforward_pass
377
    delete_dropout_op_pass)
378

379
if(WITH_CINN)
380
  set(IR_PASS_DEPS ${IR_PASS_DEPS} build_cinn_pass)
381
  set(IR_PASS_DEPS ${IR_PASS_DEPS} cinn_zero_tensor_trick_pass)
382 383
endif()

384 385 386
if(NOT APPLE
   AND NOT WIN32
   AND (WITH_GPU OR WITH_ROCM))
387 388
  set(IR_PASS_DEPS ${IR_PASS_DEPS} fusion_group_pass)
endif()
389 390 391 392 393 394 395 396
cc_library(
  build_strategy
  SRCS build_strategy.cc
  DEPS pass_builder ${IR_PASS_DEPS})
cc_test(
  build_strategy_test
  SRCS build_strategy_test.cc
  DEPS build_strategy op_registry op_proto_maker graph string_helper)
Z
Zeng Jinle 已提交
397

398
if(WITH_MKLDNN)
Z
Zeng Jinle 已提交
399 400
  target_link_libraries(build_strategy mkldnn_placement_pass)
endif()