load_ops.h 8.9 KB
Newer Older
H
hjchen2 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#ifdef PADDLE_MOBILE_CPU
#define LOAD_CPU_OP(op_type)                                           \
  extern int TouchOpRegistrar_##op_type##_##cpu();                     \
  static int use_op_itself_##op_type##_##cpu __attribute__((unused)) = \
      TouchOpRegistrar_##op_type##_##cpu()
#else
#define LOAD_CPU_OP(op_type)
#endif

26 27 28 29 30 31 32 33 34
#ifdef PADDLE_MOBILE_CL
#define LOAD_GPU_CL_OP(op_type)                                       \
  extern int TouchOpRegistrar_##op_type##_##cl();                     \
  static int use_op_itself_##op_type##_##cl __attribute__((unused)) = \
      TouchOpRegistrar_##op_type##_##cl()
#else
#define LOAD_GPU_CL_OP(op_type)
#endif

H
hjchen2 已提交
35 36 37 38
#ifdef PADDLE_MOBILE_FPGA
#define LOAD_FPGA_OP(op_type)                                           \
  extern int TouchOpRegistrar_##op_type##_##fpga();                     \
  static int use_op_itself_##op_type##_##fpga __attribute__((unused)) = \
H
hjchen2 已提交
39
      TouchOpRegistrar_##op_type##_##fpga()
H
hjchen2 已提交
40 41 42 43 44 45 46 47 48
#else
#define LOAD_FPGA_OP(op_type)
#endif

#define LOAD_FUSION_MATCHER(op_type)                                       \
  extern int TouchFusionMatcherRegistrar_##op_type();                      \
  static int use_fusion_matcher_itself_##op_type __attribute__((unused)) = \
      TouchFusionMatcherRegistrar_##op_type();

49 50 51
#define LOAD_OP(op_type)   \
  LOAD_CPU_OP(op_type);    \
  LOAD_GPU_CL_OP(op_type); \
H
hjchen2 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
  LOAD_FPGA_OP(op_type);

#define LOAD_OP1(op_type, device_type) LOAD_##device_type##_OP(op_type);

#define LOAD_OP2(op_type, device_type1, device_type2) \
  LOAD_OP1(op_type, device_type1)                     \
  LOAD_OP1(op_type, device_type2)

#define LOAD_OP3(op_type, device_type1, device_type2, device_type3) \
  LOAD_OP2(op_type, device_type1, device_type2)                     \
  LOAD_OP1(op_type, device_type3)

// load requared ops
LOAD_OP(feed)
LOAD_OP(fetch)
L
lijiancheng0614 已提交
67
#ifdef FILL_CONSTANT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
68
LOAD_OP2(fill_constant, CPU, FPGA)
L
lijiancheng0614 已提交
69
#endif
H
hjchen2 已提交
70
#ifdef BATCHNORM_OP
71
LOAD_OP2(batch_norm, CPU, GPU_CL);
H
hjchen2 已提交
72 73
#endif
#ifdef BILINEAR_INTERP_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
74
LOAD_OP1(bilinear_interp, CPU);
H
hjchen2 已提交
75
#endif
76 77 78 79 80 81
#ifdef NEAREST_INTERP_OP
LOAD_OP1(nearest_interp, CPU);
#endif
#ifdef LEAKY_RELU_OP
LOAD_OP1(leaky_relu, CPU);
#endif
H
hjchen2 已提交
82
#ifdef BOXCODER_OP
83
LOAD_OP2(box_coder, CPU, GPU_CL);
H
hjchen2 已提交
84 85
#endif
#ifdef CONCAT_OP
86
LOAD_OP3(concat, CPU, GPU_CL, FPGA);
H
hjchen2 已提交
87 88
#endif
#ifdef CONV_OP
89
LOAD_OP3(conv2d, CPU, GPU_CL, FPGA);
H
hjchen2 已提交
90 91
#endif
#ifdef LRN_OP
92
LOAD_OP2(lrn, CPU, GPU_CL);
H
hjchen2 已提交
93 94
#endif
#ifdef SIGMOID_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
95
LOAD_OP1(sigmoid, CPU);
H
hjchen2 已提交
96 97
#endif
#ifdef FUSION_FC_RELU_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
98
LOAD_OP2(fusion_fc_relu, CPU, FPGA);
99
LOAD_FUSION_MATCHER(fusion_fc_relu);
H
hjchen2 已提交
100 101
#endif
#ifdef FUSION_ELEMENTWISEADDRELU_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
102
LOAD_OP2(fusion_elementwise_add_relu, CPU, FPGA);
103
LOAD_FUSION_MATCHER(fusion_elementwise_add_relu);
H
hjchen2 已提交
104 105
#endif
#ifdef SPLIT_OP
106
LOAD_OP2(split, CPU, GPU_CL);
H
hjchen2 已提交
107 108
#endif
#ifdef RESIZE_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
109
LOAD_OP1(resize, CPU);
H
hjchen2 已提交
110 111
#endif
#ifdef FUSION_CONVADDBNRELU_OP
112
LOAD_OP3(fusion_conv_add_bn_relu, CPU, GPU_CL, FPGA);
113
LOAD_FUSION_MATCHER(fusion_conv_add_bn_relu);
H
hjchen2 已提交
114 115
#endif
#ifdef RESHAPE_OP
116
LOAD_OP2(reshape, CPU, GPU_CL);
H
hjchen2 已提交
117
#endif
L
lijiancheng0614 已提交
118
#ifdef RESHAPE2_OP
119
LOAD_OP2(reshape2, CPU, GPU_CL);
L
lijiancheng0614 已提交
120
#endif
H
hjchen2 已提交
121
#ifdef TRANSPOSE_OP
122
LOAD_OP2(transpose, CPU, GPU_CL);
H
hjchen2 已提交
123
#endif
L
lijiancheng0614 已提交
124
#ifdef TRANSPOSE2_OP
125
LOAD_OP2(transpose2, CPU, GPU_CL);
L
lijiancheng0614 已提交
126
#endif
H
hjchen2 已提交
127
#ifdef PRIORBOX_OP
128
LOAD_OP2(prior_box, CPU, GPU_CL);
H
hjchen2 已提交
129
#endif
130 131 132
#ifdef DENSITY_PRIORBOX_OP
LOAD_OP2(density_prior_box, CPU, GPU_CL);
#endif
H
hjchen2 已提交
133
#ifdef FUSION_CONVADDRELU_OP
134
LOAD_OP3(fusion_conv_add_relu, CPU, GPU_CL, FPGA);
135
LOAD_FUSION_MATCHER(fusion_conv_add_relu);
H
hjchen2 已提交
136 137
#endif
#ifdef FUSION_CONVADD_OP
138
LOAD_OP2(fusion_conv_add, CPU, GPU_CL);
139
LOAD_FUSION_MATCHER(fusion_conv_add);
H
hjchen2 已提交
140 141
#endif
#ifdef SOFTMAX_OP
142
LOAD_OP2(softmax, CPU, GPU_CL);
H
hjchen2 已提交
143 144
#endif
#ifdef SHAPE_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
145
LOAD_OP1(shape, CPU);
H
hjchen2 已提交
146 147
#endif
#ifdef DEPTHWISECONV_OP
Z
zp7 已提交
148
LOAD_OP2(depthwise_conv2d, CPU, GPU_CL);
H
hjchen2 已提交
149 150
#endif
#ifdef CONV_TRANSPOSE_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
151
LOAD_OP1(conv2d_transpose, CPU);
H
hjchen2 已提交
152 153
#endif
#ifdef SCALE_OP
J
Jiaying Zhao 已提交
154
LOAD_OP2(scale, CPU, GPU_CL);
H
hjchen2 已提交
155 156
#endif
#ifdef ELEMENTWISEADD_OP
157
LOAD_OP2(elementwise_add, CPU, GPU_CL);
H
hjchen2 已提交
158 159
#endif
#ifdef PRELU_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
160
LOAD_OP1(prelu, CPU);
H
hjchen2 已提交
161 162
#endif
#ifdef FLATTEN_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
163
LOAD_OP1(flatten, CPU);
164 165 166
#endif
#ifdef FLATTEN2_OP
LOAD_OP2(flatten2, CPU, GPU_CL);
H
hjchen2 已提交
167 168
#endif
#ifdef FUSION_CONVBNADDRELU_OP
169
LOAD_OP3(fusion_conv_bn_add_relu, CPU, GPU_CL, FPGA);
170
LOAD_FUSION_MATCHER(fusion_conv_bn_add_relu);
H
hjchen2 已提交
171 172
#endif
#ifdef FUSION_CONVBNRELU_OP
173
LOAD_OP3(fusion_conv_bn_relu, CPU, GPU_CL, FPGA);
174
LOAD_FUSION_MATCHER(fusion_conv_bn_relu);
H
hjchen2 已提交
175
#endif
176 177 178 179
#ifdef FUSION_CONVRELU_OP
LOAD_OP2(fusion_conv_relu, CPU, GPU_CL);
LOAD_FUSION_MATCHER(fusion_conv_relu);
#endif
H
hjchen2 已提交
180
#ifdef GRU_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
181
LOAD_OP1(gru, CPU);
H
hjchen2 已提交
182
#endif
Z
zhaojiaying01 已提交
183
#ifdef GRU_UNIT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
184
LOAD_OP1(gru_unit, CPU);
Z
zhaojiaying01 已提交
185
#endif
H
hjchen2 已提交
186
#ifdef FUSION_CONVADDBN_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
187
LOAD_OP2(fusion_conv_add_bn, CPU, FPGA);
188
LOAD_FUSION_MATCHER(fusion_conv_add_bn);
H
hjchen2 已提交
189 190
#endif
#ifdef DROPOUT_OP
191
LOAD_OP3(dropout, CPU, GPU_CL, FPGA);
H
hjchen2 已提交
192 193
#endif
#ifdef FUSION_DWCONVBNRELU_OP
194
LOAD_OP2(fusion_dwconv_bn_relu, CPU, GPU_CL);
195
LOAD_FUSION_MATCHER(fusion_dwconv_bn_relu);
H
hjchen2 已提交
196 197
#endif
#ifdef CRF_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
198
LOAD_OP1(crf_decoding, CPU);
H
hjchen2 已提交
199 200
#endif
#ifdef MUL_OP
201
LOAD_OP2(mul, CPU, GPU_CL);
H
hjchen2 已提交
202
#endif
Z
zhaojiaying01 已提交
203
#ifdef NORM_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
204
LOAD_OP1(norm, CPU);
Z
zhaojiaying01 已提交
205
#endif
H
hjchen2 已提交
206
#ifdef RELU_OP
207
LOAD_OP2(relu, CPU, GPU_CL);
Z
zp7 已提交
208
LOAD_OP2(relu6, CPU, GPU_CL);
H
hjchen2 已提交
209 210
#endif
#ifdef IM2SEQUENCE_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
211
LOAD_OP1(im2sequence, CPU);
H
hjchen2 已提交
212 213
#endif
#ifdef LOOKUP_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
214
LOAD_OP1(lookup_table, CPU);
H
hjchen2 已提交
215 216
#endif
#ifdef FUSION_FC_OP
217
LOAD_OP3(fusion_fc, CPU, GPU_CL, FPGA);
218
LOAD_FUSION_MATCHER(fusion_fc);
H
hjchen2 已提交
219 220
#endif
#ifdef POOL_OP
221
LOAD_OP3(pool2d, CPU, GPU_CL, FPGA);
H
hjchen2 已提交
222 223
#endif
#ifdef MULTICLASSNMS_OP
224
LOAD_OP2(multiclass_nms, CPU, GPU_CL);
H
hjchen2 已提交
225
#endif
L
lijiancheng0614 已提交
226
#ifdef POLYGONBOXTRANSFORM_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
227
LOAD_OP1(polygon_box_transform, CPU);
L
lijiancheng0614 已提交
228
#endif
E
eclipsess 已提交
229
#ifdef SUM_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
230
LOAD_OP1(sum, CPU);
E
eclipsess 已提交
231 232
#endif
#ifdef ELEMENTWISEMUL_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
233
LOAD_OP1(elementwise_mul, CPU);
E
eclipsess 已提交
234
#endif
H
hjchen2 已提交
235
#ifdef SLICE_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
236
LOAD_OP1(slice, CPU);
H
hjchen2 已提交
237 238
#endif
#ifdef FUSION_CONVBN_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
239
LOAD_OP2(fusion_conv_bn, CPU, FPGA);
240
LOAD_FUSION_MATCHER(fusion_conv_bn);
H
hjchen2 已提交
241
#endif
242
#ifdef ELEMENTWISESUB_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
243
LOAD_OP1(elementwise_sub, CPU)
244
#endif
H
hjchen2 已提交
245
#ifdef TOP_K_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
246
LOAD_OP1(top_k, CPU)
H
hjchen2 已提交
247 248
#endif
#ifdef CAST_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
249
LOAD_OP1(cast, CPU)
H
hjchen2 已提交
250
#endif
H
hjchen2 已提交
251
#ifdef QUANT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
252
LOAD_OP1(quantize, CPU);
H
hjchen2 已提交
253 254
#endif
#ifdef DEQUANT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
255
LOAD_OP1(dequantize, CPU);
H
hjchen2 已提交
256
#endif
257
#ifdef FUSION_DEQUANT_BN_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
258
LOAD_OP1(fusion_dequant_bn, CPU);
259 260
LOAD_FUSION_MATCHER(fusion_dequant_bn);
#endif
261
#ifdef FUSION_DEQUANT_ADD_BN_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
262
LOAD_OP1(fusion_dequant_add_bn, CPU);
263 264 265
LOAD_FUSION_MATCHER(fusion_dequant_add_bn);
#endif
#ifdef FUSION_DEQUANT_BN_RELU_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
266
LOAD_OP1(fusion_dequant_bn_relu, CPU);
267 268
LOAD_FUSION_MATCHER(fusion_dequant_bn_relu);
#endif
H
hjchen2 已提交
269
#ifdef FUSION_DEQUANT_ADD_BN_RELU_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
270
LOAD_OP1(fusion_dequant_add_bn_relu, CPU);
H
hjchen2 已提交
271 272
LOAD_FUSION_MATCHER(fusion_dequant_add_bn_relu);
#endif
273
#ifdef FUSION_DEQUANT_ADD_BN_QUANT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
274
LOAD_OP1(fusion_dequant_add_bn_quant, CPU);
275 276 277
LOAD_FUSION_MATCHER(fusion_dequant_add_bn_quant);
#endif
#ifdef FUSION_DEQUANT_ADD_BN_RELU_QUANT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
278
LOAD_OP1(fusion_dequant_add_bn_relu_quant, CPU);
279 280
LOAD_FUSION_MATCHER(fusion_dequant_add_bn_relu_quant);
#endif
281
#ifdef SEQUENCE_EXPAND_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
282
LOAD_OP1(sequence_expand, CPU);
283 284
#endif
#ifdef SEQUENCE_POOL_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
285
LOAD_OP1(sequence_pool, CPU);
286
#endif
287 288 289
#ifdef SEQUENCE_SOFTMAX_OP
LOAD_OP1(sequence_softmax, CPU);
#endif
290
#ifdef LOG_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
291
LOAD_OP1(log, CPU);
292
#endif
293
#ifdef LOD_RESET_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
294
LOAD_OP1(lod_reset, CPU);
295 296
#endif
#ifdef LESS_THAN_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
297
LOAD_OP1(less_than, CPU);
298
#endif
Z
zhaojiaying01 已提交
299
#ifdef LOGICAL_AND_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
300
LOAD_OP1(logical_and, CPU);
Z
zhaojiaying01 已提交
301 302
#endif
#ifdef LOGICAL_OR_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
303
LOAD_OP1(logical_or, CPU);
Z
zhaojiaying01 已提交
304 305
#endif
#ifdef LOGICAL_NOT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
306
LOAD_OP1(logical_not, CPU);
Z
zhaojiaying01 已提交
307 308
#endif
#ifdef LOGICAL_XOR_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
309
LOAD_OP1(logical_xor, CPU);
Z
zhaojiaying01 已提交
310
#endif
311
#ifdef WHILE_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
312
LOAD_OP1(while, CPU);
313 314
#endif
#ifdef WRITE_TO_ARRAY_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
315
LOAD_OP1(write_to_array, CPU);
316 317
#endif
#ifdef READ_FROM_ARRAY_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
318
LOAD_OP1(read_from_array, CPU);
319
#endif
Z
zhaojiaying01 已提交
320
#ifdef IS_EMPTY_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
321
LOAD_OP1(is_empty, CPU);
Z
zhaojiaying01 已提交
322 323
#endif
#ifdef INCREMENT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
324
LOAD_OP1(increment, CPU);
Z
zhaojiaying01 已提交
325
#endif
326
#ifdef ANCHOR_GENERATOR_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
327
LOAD_OP1(anchor_generator, CPU);
328 329
#endif
#ifdef PROPOSAL_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
330
LOAD_OP1(generate_proposals, CPU);
331 332
#endif
#ifdef PSROI_POOL_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
333
LOAD_OP1(psroi_pool, CPU);
334
#endif
H
hjchen2 已提交
335
#ifdef ROI_PERSPECTIVE_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
336
LOAD_OP1(roi_perspective_transform, CPU);
H
hjchen2 已提交
337
#endif
338
#ifdef BEAM_SEARCH_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
339
LOAD_OP1(beam_search, CPU);
340
#endif
H
update  
hjchen2 已提交
341
#ifdef BEAM_SEARCH_DECODE_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
342
LOAD_OP1(beam_search_decode, CPU);
H
update  
hjchen2 已提交
343
#endif
H
hjchen2 已提交
344
#ifdef PAD2D_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
345
LOAD_OP1(pad2d, CPU);
H
hjchen2 已提交
346
#endif
347
#ifdef ONE_HOT_OP
xiebaiyuan's avatar
xiebaiyuan 已提交
348
LOAD_OP1(one_hot, CPU);
349
#endif
350 351 352
#ifdef ASSIGN_VALUE_OP
LOAD_OP1(assign_value, CPU);
#endif
H
Huie 已提交
353 354 355
#ifdef EXP_OP
LOAD_OP1(exp, CPU);
#endif
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
#ifdef ASSIGN_OP
LOAD_OP1(assign, CPU);
#endif
#ifdef CONDITIONAL_BLOCK_OP
LOAD_OP1(conditional_block, CPU);
#endif
#ifdef EQUAL_OP
LOAD_OP1(equal, CPU);
#endif
#ifdef FILL_CONSTANT_BATCH_SIZE_LIKE_OP
LOAD_OP1(fill_constant_batch_size_like, CPU);
#endif
#ifdef RANGE_OP
LOAD_OP1(range, CPU);
#endif
#ifdef REDUCE_PROD_OP
LOAD_OP1(reduce_prod, CPU);
#endif