load_ops.h 8.5 KB
Newer Older
H
hjchen2 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#ifdef PADDLE_MOBILE_CPU
#define LOAD_CPU_OP(op_type)                                           \
  extern int TouchOpRegistrar_##op_type##_##cpu();                     \
  static int use_op_itself_##op_type##_##cpu __attribute__((unused)) = \
      TouchOpRegistrar_##op_type##_##cpu()
#else
#define LOAD_CPU_OP(op_type)
#endif

26 27 28 29 30 31 32 33 34
#ifdef PADDLE_MOBILE_CL
#define LOAD_GPU_CL_OP(op_type)                                       \
  extern int TouchOpRegistrar_##op_type##_##cl();                     \
  static int use_op_itself_##op_type##_##cl __attribute__((unused)) = \
      TouchOpRegistrar_##op_type##_##cl()
#else
#define LOAD_GPU_CL_OP(op_type)
#endif

H
hjchen2 已提交
35 36 37 38
#ifdef PADDLE_MOBILE_FPGA
#define LOAD_FPGA_OP(op_type)                                           \
  extern int TouchOpRegistrar_##op_type##_##fpga();                     \
  static int use_op_itself_##op_type##_##fpga __attribute__((unused)) = \
H
hjchen2 已提交
39
      TouchOpRegistrar_##op_type##_##fpga()
H
hjchen2 已提交
40 41 42 43 44 45 46 47 48
#else
#define LOAD_FPGA_OP(op_type)
#endif

#define LOAD_FUSION_MATCHER(op_type)                                       \
  extern int TouchFusionMatcherRegistrar_##op_type();                      \
  static int use_fusion_matcher_itself_##op_type __attribute__((unused)) = \
      TouchFusionMatcherRegistrar_##op_type();

49 50 51
#define LOAD_OP(op_type)   \
  LOAD_CPU_OP(op_type);    \
  LOAD_GPU_CL_OP(op_type); \
H
hjchen2 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
  LOAD_FPGA_OP(op_type);

#define LOAD_OP1(op_type, device_type) LOAD_##device_type##_OP(op_type);

#define LOAD_OP2(op_type, device_type1, device_type2) \
  LOAD_OP1(op_type, device_type1)                     \
  LOAD_OP1(op_type, device_type2)

#define LOAD_OP3(op_type, device_type1, device_type2, device_type3) \
  LOAD_OP2(op_type, device_type1, device_type2)                     \
  LOAD_OP1(op_type, device_type3)

// load requared ops
LOAD_OP(feed)
LOAD_OP(fetch)
L
lijiancheng0614 已提交
67 68 69
#ifdef FILL_CONSTANT_OP
LOAD_OP(fill_constant)
#endif
H
hjchen2 已提交
70
#ifdef BATCHNORM_OP
71
LOAD_OP2(batch_norm, CPU, GPU_CL);
H
hjchen2 已提交
72 73
#endif
#ifdef BILINEAR_INTERP_OP
74
LOAD_OP2(bilinear_interp, CPU, GPU_CL);
H
hjchen2 已提交
75 76
#endif
#ifdef BOXCODER_OP
77
LOAD_OP2(box_coder, CPU, GPU_CL);
H
hjchen2 已提交
78 79
#endif
#ifdef CONCAT_OP
80
LOAD_OP3(concat, CPU, GPU_CL, FPGA);
H
hjchen2 已提交
81 82
#endif
#ifdef CONV_OP
83
LOAD_OP3(conv2d, CPU, GPU_CL, FPGA);
H
hjchen2 已提交
84 85
#endif
#ifdef LRN_OP
86
LOAD_OP2(lrn, CPU, GPU_CL);
H
hjchen2 已提交
87 88
#endif
#ifdef SIGMOID_OP
89
LOAD_OP2(sigmoid, CPU, GPU_CL);
H
hjchen2 已提交
90 91
#endif
#ifdef FUSION_FC_RELU_OP
92
LOAD_OP3(fusion_fc_relu, CPU, GPU_CL, FPGA);
93
LOAD_FUSION_MATCHER(fusion_fc_relu);
H
hjchen2 已提交
94 95
#endif
#ifdef FUSION_ELEMENTWISEADDRELU_OP
96
LOAD_OP3(fusion_elementwise_add_relu, CPU, GPU_CL, FPGA);
97
LOAD_FUSION_MATCHER(fusion_elementwise_add_relu);
H
hjchen2 已提交
98 99
#endif
#ifdef SPLIT_OP
100
LOAD_OP2(split, CPU, GPU_CL);
H
hjchen2 已提交
101 102
#endif
#ifdef RESIZE_OP
103
LOAD_OP2(resize, CPU, GPU_CL);
H
hjchen2 已提交
104 105
#endif
#ifdef FUSION_CONVADDBNRELU_OP
106
LOAD_OP3(fusion_conv_add_bn_relu, CPU, GPU_CL, FPGA);
107
LOAD_FUSION_MATCHER(fusion_conv_add_bn_relu);
H
hjchen2 已提交
108 109
#endif
#ifdef RESHAPE_OP
110
LOAD_OP2(reshape, CPU, GPU_CL);
H
hjchen2 已提交
111
#endif
L
lijiancheng0614 已提交
112
#ifdef RESHAPE2_OP
113
LOAD_OP2(reshape2, CPU, GPU_CL);
L
lijiancheng0614 已提交
114
#endif
H
hjchen2 已提交
115
#ifdef TRANSPOSE_OP
116
LOAD_OP2(transpose, CPU, GPU_CL);
H
hjchen2 已提交
117
#endif
L
lijiancheng0614 已提交
118
#ifdef TRANSPOSE2_OP
119
LOAD_OP2(transpose2, CPU, GPU_CL);
L
lijiancheng0614 已提交
120
#endif
H
hjchen2 已提交
121
#ifdef PRIORBOX_OP
122
LOAD_OP2(prior_box, CPU, GPU_CL);
H
hjchen2 已提交
123 124
#endif
#ifdef FUSION_CONVADDRELU_OP
125
LOAD_OP3(fusion_conv_add_relu, CPU, GPU_CL, FPGA);
126
LOAD_FUSION_MATCHER(fusion_conv_add_relu);
H
hjchen2 已提交
127 128
#endif
#ifdef FUSION_CONVADD_OP
129
LOAD_OP2(fusion_conv_add, CPU, GPU_CL);
130
LOAD_FUSION_MATCHER(fusion_conv_add);
H
hjchen2 已提交
131 132
#endif
#ifdef SOFTMAX_OP
133
LOAD_OP2(softmax, CPU, GPU_CL);
H
hjchen2 已提交
134 135
#endif
#ifdef SHAPE_OP
136
LOAD_OP2(shape, CPU, GPU_CL);
H
hjchen2 已提交
137 138
#endif
#ifdef DEPTHWISECONV_OP
139
LOAD_OP2(depthwise_conv2d, CPU, GPU_CL);
H
hjchen2 已提交
140 141
#endif
#ifdef CONV_TRANSPOSE_OP
142
LOAD_OP2(conv2d_transpose, CPU, GPU_CL);
H
hjchen2 已提交
143 144
#endif
#ifdef SCALE_OP
145
LOAD_OP2(scale, CPU, GPU_CL);
H
hjchen2 已提交
146 147
#endif
#ifdef ELEMENTWISEADD_OP
148
LOAD_OP2(elementwise_add, CPU, GPU_CL);
H
hjchen2 已提交
149 150
#endif
#ifdef PRELU_OP
151
LOAD_OP2(prelu, CPU, GPU_CL);
H
hjchen2 已提交
152 153
#endif
#ifdef FLATTEN_OP
154
LOAD_OP2(flatten, CPU, GPU_CL);
H
hjchen2 已提交
155 156
#endif
#ifdef FUSION_CONVBNADDRELU_OP
157
LOAD_OP3(fusion_conv_bn_add_relu, CPU, GPU_CL, FPGA);
158
LOAD_FUSION_MATCHER(fusion_conv_bn_add_relu);
H
hjchen2 已提交
159 160
#endif
#ifdef FUSION_CONVBNRELU_OP
161
LOAD_OP3(fusion_conv_bn_relu, CPU, GPU_CL, FPGA);
162
LOAD_FUSION_MATCHER(fusion_conv_bn_relu);
H
hjchen2 已提交
163 164
#endif
#ifdef GRU_OP
165
LOAD_OP2(gru, CPU, GPU_CL);
H
hjchen2 已提交
166
#endif
Z
zhaojiaying01 已提交
167
#ifdef GRU_UNIT_OP
168
LOAD_OP2(gru_unit, CPU, GPU_CL);
Z
zhaojiaying01 已提交
169
#endif
H
hjchen2 已提交
170
#ifdef FUSION_CONVADDBN_OP
171
LOAD_OP3(fusion_conv_add_bn, CPU, GPU_CL, FPGA);
172
LOAD_FUSION_MATCHER(fusion_conv_add_bn);
H
hjchen2 已提交
173 174
#endif
#ifdef DROPOUT_OP
175
LOAD_OP3(dropout, CPU, GPU_CL, FPGA);
H
hjchen2 已提交
176 177
#endif
#ifdef FUSION_DWCONVBNRELU_OP
178
LOAD_OP2(fusion_dwconv_bn_relu, CPU, GPU_CL);
179
LOAD_FUSION_MATCHER(fusion_dwconv_bn_relu);
H
hjchen2 已提交
180 181
#endif
#ifdef CRF_OP
182
LOAD_OP2(crf_decoding, CPU, GPU_CL);
H
hjchen2 已提交
183 184
#endif
#ifdef MUL_OP
185
LOAD_OP2(mul, CPU, GPU_CL);
H
hjchen2 已提交
186
#endif
Z
zhaojiaying01 已提交
187
#ifdef NORM_OP
188
LOAD_OP2(norm, CPU, GPU_CL);
Z
zhaojiaying01 已提交
189
#endif
H
hjchen2 已提交
190
#ifdef RELU_OP
191 192
LOAD_OP2(relu, CPU, GPU_CL);
LOAD_OP2(relu6, CPU, GPU_CL);
H
hjchen2 已提交
193 194
#endif
#ifdef IM2SEQUENCE_OP
195
LOAD_OP2(im2sequence, CPU, GPU_CL);
H
hjchen2 已提交
196 197
#endif
#ifdef LOOKUP_OP
198
LOAD_OP2(lookup_table, CPU, GPU_CL);
H
hjchen2 已提交
199 200
#endif
#ifdef FUSION_FC_OP
201
LOAD_OP3(fusion_fc, CPU, GPU_CL, FPGA);
202
LOAD_FUSION_MATCHER(fusion_fc);
H
hjchen2 已提交
203 204
#endif
#ifdef POOL_OP
205
LOAD_OP3(pool2d, CPU, GPU_CL, FPGA);
H
hjchen2 已提交
206 207
#endif
#ifdef MULTICLASSNMS_OP
208
LOAD_OP2(multiclass_nms, CPU, GPU_CL);
H
hjchen2 已提交
209
#endif
L
lijiancheng0614 已提交
210
#ifdef POLYGONBOXTRANSFORM_OP
211
LOAD_OP2(polygon_box_transform, CPU, GPU_CL);
L
lijiancheng0614 已提交
212
#endif
E
eclipsess 已提交
213
#ifdef SUM_OP
214
LOAD_OP2(sum, CPU, GPU_CL);
E
eclipsess 已提交
215 216
#endif
#ifdef ELEMENTWISEMUL_OP
217
LOAD_OP2(elementwise_mul, CPU, GPU_CL);
E
eclipsess 已提交
218
#endif
H
hjchen2 已提交
219
#ifdef SLICE_OP
220
LOAD_OP2(slice, CPU, GPU_CL);
H
hjchen2 已提交
221 222
#endif
#ifdef FUSION_CONVBN_OP
223
LOAD_OP3(fusion_conv_bn, CPU, GPU_CL, FPGA);
224
LOAD_FUSION_MATCHER(fusion_conv_bn);
H
hjchen2 已提交
225
#endif
226
#ifdef ELEMENTWISESUB_OP
227
LOAD_OP2(elementwise_sub, CPU, GPU_CL)
228
#endif
H
hjchen2 已提交
229
#ifdef TOP_K_OP
230
LOAD_OP2(top_k, CPU, GPU_CL)
H
hjchen2 已提交
231 232
#endif
#ifdef CAST_OP
233
LOAD_OP2(cast, CPU, GPU_CL)
H
hjchen2 已提交
234
#endif
H
hjchen2 已提交
235
#ifdef QUANT_OP
236
LOAD_OP2(quantize, CPU, GPU_CL);
H
hjchen2 已提交
237 238
#endif
#ifdef DEQUANT_OP
239
LOAD_OP2(dequantize, CPU, GPU_CL);
H
hjchen2 已提交
240
#endif
241
#ifdef FUSION_DEQUANT_BN_OP
242
LOAD_OP2(fusion_dequant_bn, CPU, GPU_CL);
243 244
LOAD_FUSION_MATCHER(fusion_dequant_bn);
#endif
245
#ifdef FUSION_DEQUANT_ADD_BN_OP
246
LOAD_OP2(fusion_dequant_add_bn, CPU, GPU_CL);
247 248 249
LOAD_FUSION_MATCHER(fusion_dequant_add_bn);
#endif
#ifdef FUSION_DEQUANT_BN_RELU_OP
250
LOAD_OP2(fusion_dequant_bn_relu, CPU, GPU_CL);
251 252
LOAD_FUSION_MATCHER(fusion_dequant_bn_relu);
#endif
H
hjchen2 已提交
253
#ifdef FUSION_DEQUANT_ADD_BN_RELU_OP
254
LOAD_OP2(fusion_dequant_add_bn_relu, CPU, GPU_CL);
H
hjchen2 已提交
255 256
LOAD_FUSION_MATCHER(fusion_dequant_add_bn_relu);
#endif
257
#ifdef FUSION_DEQUANT_ADD_BN_QUANT_OP
258
LOAD_OP2(fusion_dequant_add_bn_quant, CPU, GPU_CL);
259 260 261
LOAD_FUSION_MATCHER(fusion_dequant_add_bn_quant);
#endif
#ifdef FUSION_DEQUANT_ADD_BN_RELU_QUANT_OP
262
LOAD_OP2(fusion_dequant_add_bn_relu_quant, CPU, GPU_CL);
263 264
LOAD_FUSION_MATCHER(fusion_dequant_add_bn_relu_quant);
#endif
265
#ifdef SEQUENCE_EXPAND_OP
266
LOAD_OP2(sequence_expand, CPU, GPU_CL);
267 268
#endif
#ifdef SEQUENCE_POOL_OP
269
LOAD_OP2(sequence_pool, CPU, GPU_CL);
270
#endif
271
#ifdef LOG_OP
272
LOAD_OP2(log, CPU, GPU_CL);
273
#endif
274
#ifdef LOD_RESET_OP
275
LOAD_OP2(lod_reset, CPU, GPU_CL);
276 277
#endif
#ifdef LESS_THAN_OP
278
LOAD_OP2(less_than, CPU, GPU_CL);
279
#endif
Z
zhaojiaying01 已提交
280
#ifdef LOGICAL_AND_OP
281
LOAD_OP2(logical_and, CPU, GPU_CL);
Z
zhaojiaying01 已提交
282 283
#endif
#ifdef LOGICAL_OR_OP
284
LOAD_OP2(logical_or, CPU, GPU_CL);
Z
zhaojiaying01 已提交
285 286
#endif
#ifdef LOGICAL_NOT_OP
287
LOAD_OP2(logical_not, CPU, GPU_CL);
Z
zhaojiaying01 已提交
288 289
#endif
#ifdef LOGICAL_XOR_OP
290
LOAD_OP2(logical_xor, CPU, GPU_CL);
Z
zhaojiaying01 已提交
291
#endif
292
#ifdef WHILE_OP
293
LOAD_OP2(while, CPU, GPU_CL);
294 295
#endif
#ifdef WRITE_TO_ARRAY_OP
296
LOAD_OP2(write_to_array, CPU, GPU_CL);
297 298
#endif
#ifdef READ_FROM_ARRAY_OP
299
LOAD_OP2(read_from_array, CPU, GPU_CL);
300
#endif
Z
zhaojiaying01 已提交
301
#ifdef IS_EMPTY_OP
302
LOAD_OP2(is_empty, CPU, GPU_CL);
Z
zhaojiaying01 已提交
303 304
#endif
#ifdef INCREMENT_OP
305
LOAD_OP2(increment, CPU, GPU_CL);
Z
zhaojiaying01 已提交
306
#endif
307
#ifdef ANCHOR_GENERATOR_OP
308
LOAD_OP2(anchor_generator, CPU, GPU_CL);
309 310
#endif
#ifdef PROPOSAL_OP
311
LOAD_OP2(generate_proposals, CPU, GPU_CL);
312 313
#endif
#ifdef PSROI_POOL_OP
314
LOAD_OP2(psroi_pool, CPU, GPU_CL);
315
#endif
H
hjchen2 已提交
316
#ifdef ROI_PERSPECTIVE_OP
317
LOAD_OP2(roi_perspective_transform, CPU, GPU_CL);
H
hjchen2 已提交
318
#endif
319
#ifdef BEAM_SEARCH_OP
320
LOAD_OP2(beam_search, CPU, GPU_CL);
321
#endif
H
update  
hjchen2 已提交
322
#ifdef BEAM_SEARCH_DECODE_OP
323
LOAD_OP2(beam_search_decode, CPU, GPU_CL);
H
update  
hjchen2 已提交
324
#endif
H
hjchen2 已提交
325
#ifdef PAD2D_OP
326
LOAD_OP2(pad2d, CPU, GPU_CL);
H
hjchen2 已提交
327
#endif
328
#ifdef ONE_HOT_OP
329
LOAD_OP2(one_hot, CPU, GPU_CL);
330
#endif