load_ops.h 9.2 KB
Newer Older
H
hjchen2 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#ifdef PADDLE_MOBILE_CPU
#define LOAD_CPU_OP(op_type)                                           \
  extern int TouchOpRegistrar_##op_type##_##cpu();                     \
  static int use_op_itself_##op_type##_##cpu __attribute__((unused)) = \
      TouchOpRegistrar_##op_type##_##cpu()
#else
#define LOAD_CPU_OP(op_type)
#endif

26 27 28 29 30 31 32 33 34
#ifdef PADDLE_MOBILE_CL
#define LOAD_GPU_CL_OP(op_type)                                       \
  extern int TouchOpRegistrar_##op_type##_##cl();                     \
  static int use_op_itself_##op_type##_##cl __attribute__((unused)) = \
      TouchOpRegistrar_##op_type##_##cl()
#else
#define LOAD_GPU_CL_OP(op_type)
#endif

H
hjchen2 已提交
35 36 37 38
#ifdef PADDLE_MOBILE_FPGA
#define LOAD_FPGA_OP(op_type)                                           \
  extern int TouchOpRegistrar_##op_type##_##fpga();                     \
  static int use_op_itself_##op_type##_##fpga __attribute__((unused)) = \
H
hjchen2 已提交
39
      TouchOpRegistrar_##op_type##_##fpga()
H
hjchen2 已提交
40 41 42 43 44 45 46 47 48
#else
#define LOAD_FPGA_OP(op_type)
#endif

#define LOAD_FUSION_MATCHER(op_type)                                       \
  extern int TouchFusionMatcherRegistrar_##op_type();                      \
  static int use_fusion_matcher_itself_##op_type __attribute__((unused)) = \
      TouchFusionMatcherRegistrar_##op_type();

49 50 51
#define LOAD_OP(op_type)   \
  LOAD_CPU_OP(op_type);    \
  LOAD_GPU_CL_OP(op_type); \
H
hjchen2 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
  LOAD_FPGA_OP(op_type);

#define LOAD_OP1(op_type, device_type) LOAD_##device_type##_OP(op_type);

#define LOAD_OP2(op_type, device_type1, device_type2) \
  LOAD_OP1(op_type, device_type1)                     \
  LOAD_OP1(op_type, device_type2)

#define LOAD_OP3(op_type, device_type1, device_type2, device_type3) \
  LOAD_OP2(op_type, device_type1, device_type2)                     \
  LOAD_OP1(op_type, device_type3)

// load requared ops
LOAD_OP(feed)
LOAD_OP(fetch)
L
lijiancheng0614 已提交
67
#ifdef FILL_CONSTANT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
68
LOAD_OP2(fill_constant, CPU, FPGA)
L
lijiancheng0614 已提交
69
#endif
H
hjchen2 已提交
70
#ifdef BATCHNORM_OP
71
LOAD_OP2(batch_norm, CPU, GPU_CL);
H
hjchen2 已提交
72
#endif
73 74 75
#ifdef INSTANCENORM_OP
LOAD_OP1(instance_norm, GPU_CL);
#endif
H
hjchen2 已提交
76
#ifdef BILINEAR_INTERP_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
77
LOAD_OP1(bilinear_interp, CPU);
H
hjchen2 已提交
78
#endif
79 80 81 82 83 84
#ifdef NEAREST_INTERP_OP
LOAD_OP1(nearest_interp, CPU);
#endif
#ifdef LEAKY_RELU_OP
LOAD_OP1(leaky_relu, CPU);
#endif
H
hjchen2 已提交
85
#ifdef BOXCODER_OP
86
LOAD_OP2(box_coder, CPU, GPU_CL);
H
hjchen2 已提交
87 88
#endif
#ifdef CONCAT_OP
89
LOAD_OP3(concat, CPU, GPU_CL, FPGA);
H
hjchen2 已提交
90 91
#endif
#ifdef CONV_OP
92
LOAD_OP3(conv2d, CPU, GPU_CL, FPGA);
H
hjchen2 已提交
93 94
#endif
#ifdef LRN_OP
95
LOAD_OP2(lrn, CPU, GPU_CL);
H
hjchen2 已提交
96 97
#endif
#ifdef SIGMOID_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
98
LOAD_OP1(sigmoid, CPU);
H
hjchen2 已提交
99 100
#endif
#ifdef FUSION_FC_RELU_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
101
LOAD_OP2(fusion_fc_relu, CPU, FPGA);
102
LOAD_FUSION_MATCHER(fusion_fc_relu);
H
hjchen2 已提交
103 104
#endif
#ifdef FUSION_ELEMENTWISEADDRELU_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
105
LOAD_OP2(fusion_elementwise_add_relu, CPU, FPGA);
106
LOAD_FUSION_MATCHER(fusion_elementwise_add_relu);
H
hjchen2 已提交
107 108
#endif
#ifdef SPLIT_OP
109
LOAD_OP2(split, CPU, GPU_CL);
H
hjchen2 已提交
110 111
#endif
#ifdef RESIZE_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
112
LOAD_OP1(resize, CPU);
H
hjchen2 已提交
113 114
#endif
#ifdef FUSION_CONVADDBNRELU_OP
115
LOAD_OP3(fusion_conv_add_bn_relu, CPU, GPU_CL, FPGA);
116
LOAD_FUSION_MATCHER(fusion_conv_add_bn_relu);
H
hjchen2 已提交
117 118
#endif
#ifdef RESHAPE_OP
119
LOAD_OP2(reshape, CPU, GPU_CL);
H
hjchen2 已提交
120
#endif
L
lijiancheng0614 已提交
121
#ifdef RESHAPE2_OP
122
LOAD_OP2(reshape2, CPU, GPU_CL);
L
lijiancheng0614 已提交
123
#endif
H
hjchen2 已提交
124
#ifdef TRANSPOSE_OP
125
LOAD_OP2(transpose, CPU, GPU_CL);
H
hjchen2 已提交
126
#endif
L
lijiancheng0614 已提交
127
#ifdef TRANSPOSE2_OP
128
LOAD_OP2(transpose2, CPU, GPU_CL);
L
lijiancheng0614 已提交
129
#endif
H
hjchen2 已提交
130
#ifdef PRIORBOX_OP
131
LOAD_OP2(prior_box, CPU, GPU_CL);
H
hjchen2 已提交
132
#endif
133 134 135
#ifdef DENSITY_PRIORBOX_OP
LOAD_OP2(density_prior_box, CPU, GPU_CL);
#endif
H
hjchen2 已提交
136
#ifdef FUSION_CONVADDRELU_OP
137
LOAD_OP3(fusion_conv_add_relu, CPU, GPU_CL, FPGA);
138
LOAD_FUSION_MATCHER(fusion_conv_add_relu);
H
hjchen2 已提交
139 140
#endif
#ifdef FUSION_CONVADD_OP
141
LOAD_OP2(fusion_conv_add, CPU, GPU_CL);
142
LOAD_FUSION_MATCHER(fusion_conv_add);
H
hjchen2 已提交
143 144
#endif
#ifdef SOFTMAX_OP
145
LOAD_OP2(softmax, CPU, GPU_CL);
H
hjchen2 已提交
146 147
#endif
#ifdef SHAPE_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
148
LOAD_OP1(shape, CPU);
H
hjchen2 已提交
149 150
#endif
#ifdef DEPTHWISECONV_OP
Z
zp7 已提交
151
LOAD_OP2(depthwise_conv2d, CPU, GPU_CL);
H
hjchen2 已提交
152 153
#endif
#ifdef CONV_TRANSPOSE_OP
154
LOAD_OP2(conv2d_transpose, CPU, GPU_CL);
H
hjchen2 已提交
155 156
#endif
#ifdef SCALE_OP
J
Jiaying Zhao 已提交
157
LOAD_OP2(scale, CPU, GPU_CL);
H
hjchen2 已提交
158 159
#endif
#ifdef ELEMENTWISEADD_OP
160
LOAD_OP2(elementwise_add, CPU, GPU_CL);
H
hjchen2 已提交
161 162
#endif
#ifdef PRELU_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
163
LOAD_OP1(prelu, CPU);
H
hjchen2 已提交
164
#endif
165 166 167
#ifdef TANH_OP
LOAD_OP2(tanh, CPU, GPU_CL);
#endif
H
hjchen2 已提交
168
#ifdef FLATTEN_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
169
LOAD_OP1(flatten, CPU);
170 171 172
#endif
#ifdef FLATTEN2_OP
LOAD_OP2(flatten2, CPU, GPU_CL);
H
hjchen2 已提交
173 174
#endif
#ifdef FUSION_CONVBNADDRELU_OP
175
LOAD_OP3(fusion_conv_bn_add_relu, CPU, GPU_CL, FPGA);
176
LOAD_FUSION_MATCHER(fusion_conv_bn_add_relu);
H
hjchen2 已提交
177 178
#endif
#ifdef FUSION_CONVBNRELU_OP
179
LOAD_OP3(fusion_conv_bn_relu, CPU, GPU_CL, FPGA);
180
LOAD_FUSION_MATCHER(fusion_conv_bn_relu);
H
hjchen2 已提交
181
#endif
182 183 184 185
#ifdef FUSION_CONVRELU_OP
LOAD_OP2(fusion_conv_relu, CPU, GPU_CL);
LOAD_FUSION_MATCHER(fusion_conv_relu);
#endif
H
hjchen2 已提交
186
#ifdef GRU_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
187
LOAD_OP1(gru, CPU);
H
hjchen2 已提交
188
#endif
Z
zhaojiaying01 已提交
189
#ifdef GRU_UNIT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
190
LOAD_OP1(gru_unit, CPU);
Z
zhaojiaying01 已提交
191
#endif
H
hjchen2 已提交
192
#ifdef FUSION_CONVADDBN_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
193
LOAD_OP2(fusion_conv_add_bn, CPU, FPGA);
194
LOAD_FUSION_MATCHER(fusion_conv_add_bn);
H
hjchen2 已提交
195 196
#endif
#ifdef DROPOUT_OP
197
LOAD_OP3(dropout, CPU, GPU_CL, FPGA);
H
hjchen2 已提交
198 199
#endif
#ifdef FUSION_DWCONVBNRELU_OP
200
LOAD_OP2(fusion_dwconv_bn_relu, CPU, GPU_CL);
201
LOAD_FUSION_MATCHER(fusion_dwconv_bn_relu);
H
hjchen2 已提交
202 203
#endif
#ifdef CRF_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
204
LOAD_OP1(crf_decoding, CPU);
H
hjchen2 已提交
205 206
#endif
#ifdef MUL_OP
207
LOAD_OP2(mul, CPU, GPU_CL);
H
hjchen2 已提交
208
#endif
Z
zhaojiaying01 已提交
209
#ifdef NORM_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
210
LOAD_OP1(norm, CPU);
Z
zhaojiaying01 已提交
211
#endif
H
hjchen2 已提交
212
#ifdef RELU_OP
213
LOAD_OP2(relu, CPU, GPU_CL);
Z
zp7 已提交
214
LOAD_OP2(relu6, CPU, GPU_CL);
H
hjchen2 已提交
215 216
#endif
#ifdef IM2SEQUENCE_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
217
LOAD_OP1(im2sequence, CPU);
H
hjchen2 已提交
218 219
#endif
#ifdef LOOKUP_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
220
LOAD_OP1(lookup_table, CPU);
H
hjchen2 已提交
221 222
#endif
#ifdef FUSION_FC_OP
223
LOAD_OP3(fusion_fc, CPU, GPU_CL, FPGA);
224
LOAD_FUSION_MATCHER(fusion_fc);
H
hjchen2 已提交
225 226
#endif
#ifdef POOL_OP
227
LOAD_OP3(pool2d, CPU, GPU_CL, FPGA);
H
hjchen2 已提交
228 229
#endif
#ifdef MULTICLASSNMS_OP
230
LOAD_OP2(multiclass_nms, CPU, GPU_CL);
H
hjchen2 已提交
231
#endif
L
lijiancheng0614 已提交
232
#ifdef POLYGONBOXTRANSFORM_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
233
LOAD_OP1(polygon_box_transform, CPU);
L
lijiancheng0614 已提交
234
#endif
E
eclipsess 已提交
235
#ifdef SUM_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
236
LOAD_OP1(sum, CPU);
E
eclipsess 已提交
237 238
#endif
#ifdef ELEMENTWISEMUL_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
239
LOAD_OP1(elementwise_mul, CPU);
E
eclipsess 已提交
240
#endif
H
hjchen2 已提交
241
#ifdef SLICE_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
242
LOAD_OP1(slice, CPU);
H
hjchen2 已提交
243 244
#endif
#ifdef FUSION_CONVBN_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
245
LOAD_OP2(fusion_conv_bn, CPU, FPGA);
246
LOAD_FUSION_MATCHER(fusion_conv_bn);
H
hjchen2 已提交
247
#endif
248
#ifdef ELEMENTWISESUB_OP
249
LOAD_OP2(elementwise_sub, CPU, GPU_CL)
250
#endif
H
hjchen2 已提交
251
#ifdef TOP_K_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
252
LOAD_OP1(top_k, CPU)
H
hjchen2 已提交
253 254
#endif
#ifdef CAST_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
255
LOAD_OP1(cast, CPU)
H
hjchen2 已提交
256
#endif
H
hjchen2 已提交
257
#ifdef QUANT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
258
LOAD_OP1(quantize, CPU);
H
hjchen2 已提交
259 260
#endif
#ifdef DEQUANT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
261
LOAD_OP1(dequantize, CPU);
H
hjchen2 已提交
262
#endif
263
#ifdef FUSION_DEQUANT_BN_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
264
LOAD_OP1(fusion_dequant_bn, CPU);
265 266
LOAD_FUSION_MATCHER(fusion_dequant_bn);
#endif
267
#ifdef FUSION_DEQUANT_ADD_BN_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
268
LOAD_OP1(fusion_dequant_add_bn, CPU);
269 270 271
LOAD_FUSION_MATCHER(fusion_dequant_add_bn);
#endif
#ifdef FUSION_DEQUANT_BN_RELU_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
272
LOAD_OP1(fusion_dequant_bn_relu, CPU);
273 274
LOAD_FUSION_MATCHER(fusion_dequant_bn_relu);
#endif
H
hjchen2 已提交
275
#ifdef FUSION_DEQUANT_ADD_BN_RELU_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
276
LOAD_OP1(fusion_dequant_add_bn_relu, CPU);
H
hjchen2 已提交
277 278
LOAD_FUSION_MATCHER(fusion_dequant_add_bn_relu);
#endif
279
#ifdef FUSION_DEQUANT_ADD_BN_QUANT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
280
LOAD_OP1(fusion_dequant_add_bn_quant, CPU);
281 282 283
LOAD_FUSION_MATCHER(fusion_dequant_add_bn_quant);
#endif
#ifdef FUSION_DEQUANT_ADD_BN_RELU_QUANT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
284
LOAD_OP1(fusion_dequant_add_bn_relu_quant, CPU);
285 286
LOAD_FUSION_MATCHER(fusion_dequant_add_bn_relu_quant);
#endif
287
#ifdef SEQUENCE_EXPAND_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
288
LOAD_OP1(sequence_expand, CPU);
289 290
#endif
#ifdef SEQUENCE_POOL_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
291
LOAD_OP1(sequence_pool, CPU);
292
#endif
293 294 295
#ifdef SEQUENCE_SOFTMAX_OP
LOAD_OP1(sequence_softmax, CPU);
#endif
296
#ifdef LOG_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
297
LOAD_OP1(log, CPU);
298
#endif
299
#ifdef LOD_RESET_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
300
LOAD_OP1(lod_reset, CPU);
301 302
#endif
#ifdef LESS_THAN_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
303
LOAD_OP1(less_than, CPU);
304
#endif
Z
zhaojiaying01 已提交
305
#ifdef LOGICAL_AND_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
306
LOAD_OP1(logical_and, CPU);
Z
zhaojiaying01 已提交
307 308
#endif
#ifdef LOGICAL_OR_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
309
LOAD_OP1(logical_or, CPU);
Z
zhaojiaying01 已提交
310 311
#endif
#ifdef LOGICAL_NOT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
312
LOAD_OP1(logical_not, CPU);
Z
zhaojiaying01 已提交
313 314
#endif
#ifdef LOGICAL_XOR_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
315
LOAD_OP1(logical_xor, CPU);
Z
zhaojiaying01 已提交
316
#endif
317
#ifdef WHILE_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
318
LOAD_OP1(while, CPU);
319 320
#endif
#ifdef WRITE_TO_ARRAY_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
321
LOAD_OP1(write_to_array, CPU);
322 323
#endif
#ifdef READ_FROM_ARRAY_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
324
LOAD_OP1(read_from_array, CPU);
325
#endif
Z
zhaojiaying01 已提交
326
#ifdef IS_EMPTY_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
327
LOAD_OP1(is_empty, CPU);
Z
zhaojiaying01 已提交
328 329
#endif
#ifdef INCREMENT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
330
LOAD_OP1(increment, CPU);
Z
zhaojiaying01 已提交
331
#endif
332
#ifdef ANCHOR_GENERATOR_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
333
LOAD_OP1(anchor_generator, CPU);
334 335
#endif
#ifdef PROPOSAL_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
336
LOAD_OP1(generate_proposals, CPU);
337 338
#endif
#ifdef PSROI_POOL_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
339
LOAD_OP1(psroi_pool, CPU);
340
#endif
H
hjchen2 已提交
341
#ifdef ROI_PERSPECTIVE_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
342
LOAD_OP1(roi_perspective_transform, CPU);
H
hjchen2 已提交
343
#endif
344
#ifdef BEAM_SEARCH_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
345
LOAD_OP1(beam_search, CPU);
346
#endif
H
update  
hjchen2 已提交
347
#ifdef BEAM_SEARCH_DECODE_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
348
LOAD_OP1(beam_search_decode, CPU);
H
update  
hjchen2 已提交
349
#endif
H
hjchen2 已提交
350
#ifdef PAD2D_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
351
LOAD_OP1(pad2d, CPU);
H
hjchen2 已提交
352
#endif
353
#ifdef ONE_HOT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
354
LOAD_OP1(one_hot, CPU);
355
#endif
356
#ifdef ASSIGN_VALUE_OP
357
LOAD_OP2(assign_value, CPU, GPU_CL);
358
#endif
H
Huie 已提交
359 360 361
#ifdef EXP_OP
LOAD_OP1(exp, CPU);
#endif
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
#ifdef ASSIGN_OP
LOAD_OP1(assign, CPU);
#endif
#ifdef CONDITIONAL_BLOCK_OP
LOAD_OP1(conditional_block, CPU);
#endif
#ifdef EQUAL_OP
LOAD_OP1(equal, CPU);
#endif
#ifdef FILL_CONSTANT_BATCH_SIZE_LIKE_OP
LOAD_OP1(fill_constant_batch_size_like, CPU);
#endif
#ifdef RANGE_OP
LOAD_OP1(range, CPU);
#endif
#ifdef REDUCE_PROD_OP
LOAD_OP1(reduce_prod, CPU);
#endif
380 381 382
#ifdef PIXEL_SHUFFLE_OP
LOAD_OP1(pixel_shuffle, GPU_CL);
#endif
383 384 385 386 387 388
#ifdef EXPAND_OP
LOAD_OP1(expand, GPU_CL);
#endif
#ifdef GRID_SAMPLER_OP
LOAD_OP1(grid_sampler, GPU_CL);
#endif