CMakeLists.txt 10.3 KB
Newer Older
1 2 3 4 5 6 7 8
cc_library(
  var_handle
  SRCS var_handle.cc
  DEPS place framework_proto node)
cc_library(
  op_handle_base
  SRCS op_handle_base.cc
  DEPS var_handle device_context lod_tensor)
9

10 11 12
cc_library(
  scale_loss_grad_op_handle
  SRCS scale_loss_grad_op_handle.cc
13
  DEPS op_handle_base scope lod_tensor phi memory)
14 15 16
cc_library(
  fetch_op_handle
  SRCS fetch_op_handle.cc
17
  DEPS op_handle_base scope lod_tensor phi memory)
18 19 20
cc_library(
  fetch_async_op_handle
  SRCS fetch_async_op_handle.cc
21
  DEPS op_handle_base scope lod_tensor phi memory)
22

23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
cc_library(
  share_tensor_buffer_functor
  SRCS share_tensor_buffer_functor.cc
  DEPS framework_proto scope place operator op_registry)
cc_library(
  computation_op_handle
  SRCS computation_op_handle.cc
  DEPS framework_proto scope place operator op_registry)
cc_library(
  share_tensor_buffer_op_handle
  SRCS share_tensor_buffer_op_handle.cc
  DEPS op_handle_base scope computation_op_handle share_tensor_buffer_functor)
cc_library(
  rpc_op_handle
  SRCS rpc_op_handle.cc
  DEPS framework_proto scope place operator op_registry)
cc_library(
  fetch_barrier_op_handle
  SRCS fetch_barrier_op_handle.cc
  DEPS framework_proto scope place operator op_registry)
cc_library(
  multi_devices_helper
  SRCS multi_devices_helper.cc
  DEPS graph graph_helper)
Z
Zeng Jinle 已提交
47

48 49 50 51
cc_library(
  variable_visitor
  SRCS variable_visitor.cc
  DEPS lod_tensor selected_rows_utils)
C
chengduoZH 已提交
52

T
tangwei12 已提交
53
if(WITH_PSCORE)
54 55 56
  set(DISTRIBUTE_COMPILE_FLAGS
      "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
  )
57 58 59
  if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
    set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new")
  endif()
60 61 62 63 64 65 66 67
  set_source_files_properties(
    reduce_op_handle.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
  set_source_files_properties(
    threaded_ssa_graph_executor.cc PROPERTIES COMPILE_FLAGS
                                              ${DISTRIBUTE_COMPILE_FLAGS})
  set_source_files_properties(
    async_ssa_graph_executor.cc PROPERTIES COMPILE_FLAGS
                                           ${DISTRIBUTE_COMPILE_FLAGS})
68 69
endif()

Y
Yu Yang 已提交
70
if(WITH_GPU)
71 72
  nv_library(
    nan_inf_utils
73 74
    SRCS nan_inf_utils_detail.cc
    DEPS framework_proto scope place phi)
75 76 77
  nv_library(
    all_reduce_op_handle
    SRCS all_reduce_op_handle.cc
R
risemeup1 已提交
78 79
    DEPS variable_visitor
         op_handle_base
80 81
         scope
         lod_tensor
82
         phi
83
         memory
R
risemeup1 已提交
84
         dynload_cuda)
85 86 87
  nv_library(
    fused_all_reduce_op_handle
    SRCS fused_all_reduce_op_handle.cc
R
risemeup1 已提交
88 89 90
    DEPS all_reduce_op_handle
         variable_visitor
         op_handle_base
91 92
         scope
         lod_tensor
93
         phi
94 95
         memory
         dynload_cuda
96
         place)
97 98 99
  nv_library(
    grad_merge_all_reduce_op_handle
    SRCS grad_merge_all_reduce_op_handle.cc
R
risemeup1 已提交
100 101
    DEPS fused_all_reduce_op_handle
         op_handle_base
102 103
         scope
         lod_tensor
104
         phi
105 106 107 108
         memory
         dynload_cuda
         variable_visitor
         place
R
risemeup1 已提交
109
         all_reduce_op_handle)
110 111 112 113 114 115 116 117

  if(WITH_DGC)
    nv_library(
      sparse_all_reduce_op_handle
      SRCS sparse_all_reduce_op_handle.cc
      DEPS op_handle_base
           scope
           lod_tensor
118
           phi
119 120 121 122 123 124 125 126 127 128 129
           memory
           dynload_cuda
           variable_visitor
           dgc
           all_reduce_op_handle)
  endif()

  if(WITH_DISTRIBUTE)
    nv_library(
      reduce_op_handle
      SRCS reduce_op_handle.cc
130
      DEPS op_handle_base variable_visitor scope phi dynload_cuda)
131 132 133 134
  else()
    nv_library(
      reduce_op_handle
      SRCS reduce_op_handle.cc
135
      DEPS op_handle_base variable_visitor scope phi dynload_cuda)
136 137 138 139
  endif()
  nv_library(
    broadcast_op_handle
    SRCS broadcast_op_handle.cc
140
    DEPS op_handle_base scope phi memory variable_visitor dynload_cuda)
141 142 143 144
  nv_library(
    fused_broadcast_op_handle
    SRCS fused_broadcast_op_handle.cc
    DEPS broadcast_op_handle)
145
elseif(WITH_ROCM)
146 147
  hip_library(
    nan_inf_utils
148 149
    SRCS nan_inf_utils_detail.cc
    DEPS framework_proto scope place phi)
150 151 152 153 154 155
  hip_library(
    all_reduce_op_handle
    SRCS all_reduce_op_handle.cc
    DEPS op_handle_base
         scope
         lod_tensor
156
         phi
157 158 159 160 161 162
         memory
         dynload_cuda
         variable_visitor)
  hip_library(
    fused_all_reduce_op_handle
    SRCS fused_all_reduce_op_handle.cc
R
risemeup1 已提交
163 164 165
    DEPS all_reduce_op_handle
         op_handle_base
         variable_visitor
166 167
         scope
         lod_tensor
168
         phi
169 170
         memory
         dynload_cuda
171
         place)
172 173 174
  hip_library(
    grad_merge_all_reduce_op_handle
    SRCS grad_merge_all_reduce_op_handle.cc
R
risemeup1 已提交
175 176
    DEPS fused_all_reduce_op_handle
         op_handle_base
177 178
         scope
         lod_tensor
179
         phi
180 181 182 183
         memory
         dynload_cuda
         variable_visitor
         place
R
risemeup1 已提交
184
         all_reduce_op_handle)
185 186 187 188 189

  if(WITH_DISTRIBUTE)
    hip_library(
      reduce_op_handle
      SRCS reduce_op_handle.cc
190
      DEPS op_handle_base variable_visitor scope phi dynload_cuda)
191 192 193 194
  else()
    hip_library(
      reduce_op_handle
      SRCS reduce_op_handle.cc
195
      DEPS op_handle_base variable_visitor scope phi dynload_cuda)
196 197 198 199
  endif()
  hip_library(
    broadcast_op_handle
    SRCS broadcast_op_handle.cc
200
    DEPS op_handle_base scope phi memory variable_visitor dynload_cuda)
201 202 203 204
  hip_library(
    fused_broadcast_op_handle
    SRCS fused_broadcast_op_handle.cc
    DEPS broadcast_op_handle)
Y
Yu Yang 已提交
205
else()
206 207 208
  cc_library(
    nan_inf_utils
    SRCS nan_inf_utils_detail.cc
209
    DEPS framework_proto scope place phi)
210 211 212
  cc_library(
    all_reduce_op_handle
    SRCS all_reduce_op_handle.cc
213
    DEPS op_handle_base scope lod_tensor phi memory variable_visitor)
214 215 216
  cc_library(
    fused_all_reduce_op_handle
    SRCS fused_all_reduce_op_handle.cc
R
risemeup1 已提交
217 218
    DEPS all_reduce_op_handle
         op_handle_base
219 220
         scope
         lod_tensor
221
         phi
222 223
         memory
         variable_visitor
224
         place)
225 226 227
  cc_library(
    grad_merge_all_reduce_op_handle
    SRCS grad_merge_all_reduce_op_handle.cc
R
risemeup1 已提交
228 229
    DEPS fused_all_reduce_op_handle
         op_handle_base
230 231
         scope
         lod_tensor
232
         phi
233 234 235
         memory
         variable_visitor
         place
R
risemeup1 已提交
236
         all_reduce_op_handle)
237 238 239 240
  if(WITH_DISTRIBUTE)
    cc_library(
      reduce_op_handle
      SRCS reduce_op_handle.cc
241
      DEPS op_handle_base variable_visitor scope phi)
242 243 244 245
  else()
    cc_library(
      reduce_op_handle
      SRCS reduce_op_handle.cc
246
      DEPS op_handle_base variable_visitor scope phi)
247 248 249 250
  endif()
  cc_library(
    broadcast_op_handle
    SRCS broadcast_op_handle.cc
251
    DEPS op_handle_base scope phi memory variable_visitor)
252 253 254 255
  cc_library(
    fused_broadcast_op_handle
    SRCS fused_broadcast_op_handle.cc
    DEPS broadcast_op_handle)
Y
Yu Yang 已提交
256
endif()
C
chengduoZH 已提交
257

258 259 260
cc_library(
  gather_op_handle
  SRCS gather_op_handle.cc
261
  DEPS op_handle_base scope phi memory variable_visitor)
C
chengduoZH 已提交
262

263 264 265 266
cc_library(
  eager_deletion_op_handle
  SRCS eager_deletion_op_handle.cc
  DEPS lod_tensor selected_rows_utils reference_count_pass_helper)
Y
yuyang18 已提交
267

268 269 270
set(SSA_GRAPH_EXECUTOR_DEPS
    graph
    framework_proto
271
    multi_devices_helper
272 273 274
    reference_count_pass
    eager_deletion_pass
    buffer_shared_inplace_op_pass
275
    buffer_shared_cross_op_memory_reuse_pass
276
    inplace_addto_op_pass
277
    set_reader_device_info_utils)
278 279 280 281
cc_library(
  ssa_graph_executor
  SRCS ssa_graph_executor.cc
  DEPS ${SSA_GRAPH_EXECUTOR_DEPS})
S
sneaxiy 已提交
282

283 284 285 286 287
cc_library(
  threaded_ssa_graph_executor
  SRCS threaded_ssa_graph_executor.cc
  DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool
       device_context)
288

289 290 291 292
cc_library(
  parallel_ssa_graph_executor
  SRCS parallel_ssa_graph_executor.cc
  DEPS threaded_ssa_graph_executor)
Y
Yancey1989 已提交
293

294
set(ASYNC_SSA_GRAPH_EXECUTOR_DEPS threaded_ssa_graph_executor)
T
tangwei12 已提交
295

296 297 298 299
cc_library(
  async_ssa_graph_executor
  SRCS async_ssa_graph_executor.cc
  DEPS ${ASYNC_SSA_GRAPH_EXECUTOR_DEPS})
Q
can run  
Qiao Longfei 已提交
300

301 302 303 304 305 306
cc_test(
  broadcast_op_test
  SRCS broadcast_op_handle_test.cc
  DEPS var_handle
       op_handle_base
       scope
307
       phi
308 309 310
       memory
       device_context
       broadcast_op_handle)
311
cc_test_old(
312
  gather_op_test
313 314 315 316 317 318
  SRCS
  gather_op_handle_test.cc
  DEPS
  var_handle
  op_handle_base
  scope
319
  phi
320 321 322
  memory
  device_context
  gather_op_handle)
323

324 325 326 327 328 329 330 331
cc_library(
  scope_buffered_monitor
  SRCS scope_buffered_monitor.cc
  DEPS scope profiler selected_rows_utils)
cc_library(
  scope_buffered_ssa_graph_executor
  SRCS scope_buffered_ssa_graph_executor.cc
  DEPS ssa_graph_executor scope_buffered_monitor)
332
#cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope phi memory
Y
yuyang18 已提交
333
#        device_context reduce_op_handle )
334 335 336
cc_library(
  bind_threaded_ssa_graph_executor
  SRCS bind_threaded_ssa_graph_executor.cc
337
  DEPS fetch_op_handle phi ssa_graph_executor scope simple_threadpool
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
       device_context)
cc_library(
  fast_threaded_ssa_graph_executor
  SRCS fast_threaded_ssa_graph_executor.cc
  DEPS fetch_async_op_handle ssa_graph_executor scope simple_threadpool
       device_context)
cc_test(
  fused_broadcast_op_test
  SRCS fused_broadcast_op_handle_test.cc
  DEPS fused_broadcast_op_handle)

cc_test(exception_holder_test SRCS exception_holder_test.cc)

set(IR_PASS_DEPS
    graph_viz_pass
    multi_devices_graph_pass
    multi_devices_graph_print_pass
    multi_devices_graph_check_pass
    fuse_elewise_add_act_pass
    fuse_bn_act_pass
    fuse_bn_add_act_pass
    multi_batch_merge_pass
360 361
    fuse_relu_depthwise_conv_pass
    lock_free_optimize_pass
362 363 364 365
    sequential_execution_pass
    all_reduce_deps_pass
    add_reader_dependency_pass
    modify_op_lock_and_record_event_pass
366 367 368 369 370 371 372 373 374 375
    coalesce_grad_tensor_pass
    fuse_all_reduce_op_pass
    backward_optimizer_op_deps_pass
    fuse_adam_op_pass
    fuse_sgd_op_pass
    fuse_momentum_op_pass
    sync_batch_norm_pass
    runtime_context_cache_pass
    graph_to_program_pass
    fix_op_run_order_pass
376
    fuse_gemm_epilogue_pass
377
    fused_attention_pass
Y
yuehuayingxueluo 已提交
378
    fuse_adamw_op_pass
379
    fused_feedforward_pass
380
    delete_dropout_op_pass)
381

382
if(WITH_CINN)
383
  set(IR_PASS_DEPS ${IR_PASS_DEPS} build_cinn_pass)
384
  set(IR_PASS_DEPS ${IR_PASS_DEPS} cinn_zero_tensor_trick_pass)
385 386
endif()

387 388 389
if(NOT APPLE
   AND NOT WIN32
   AND (WITH_GPU OR WITH_ROCM))
390 391
  set(IR_PASS_DEPS ${IR_PASS_DEPS} fusion_group_pass)
endif()
392 393 394 395 396 397 398 399
cc_library(
  build_strategy
  SRCS build_strategy.cc
  DEPS pass_builder ${IR_PASS_DEPS})
cc_test(
  build_strategy_test
  SRCS build_strategy_test.cc
  DEPS build_strategy op_registry op_proto_maker graph string_helper)
Z
Zeng Jinle 已提交
400

401
if(WITH_MKLDNN)
Z
Zeng Jinle 已提交
402 403
  target_link_libraries(build_strategy mkldnn_placement_pass)
endif()