提交 692f0af0 编写于 作者: xiebaiyuan's avatar xiebaiyuan

add gpu cl load and fix make loadop num right

上级 693fa8c9
......@@ -23,6 +23,15 @@ limitations under the License. */
#define LOAD_CPU_OP(op_type)
#endif
#ifdef PADDLE_MOBILE_CL
#define LOAD_GPU_CL_OP(op_type) \
extern int TouchOpRegistrar_##op_type##_##cl(); \
static int use_op_itself_##op_type##_##cl __attribute__((unused)) = \
TouchOpRegistrar_##op_type##_##cl()
#else
#define LOAD_GPU_CL_OP(op_type)
#endif
#ifdef PADDLE_MOBILE_FPGA
#define LOAD_FPGA_OP(op_type) \
extern int TouchOpRegistrar_##op_type##_##fpga(); \
......@@ -37,8 +46,9 @@ limitations under the License. */
static int use_fusion_matcher_itself_##op_type __attribute__((unused)) = \
TouchFusionMatcherRegistrar_##op_type();
#define LOAD_OP(op_type) \
LOAD_CPU_OP(op_type); \
#define LOAD_OP(op_type) \
LOAD_CPU_OP(op_type); \
LOAD_GPU_CL_OP(op_type); \
LOAD_FPGA_OP(op_type);
#define LOAD_OP1(op_type, device_type) LOAD_##device_type##_OP(op_type);
......@@ -58,263 +68,263 @@ LOAD_OP(fetch)
LOAD_OP(fill_constant)
#endif
#ifdef BATCHNORM_OP
LOAD_OP1(batch_norm, CPU);
LOAD_OP2(batch_norm, CPU, GPU_CL);
#endif
#ifdef BILINEAR_INTERP_OP
LOAD_OP1(bilinear_interp, CPU);
LOAD_OP2(bilinear_interp, CPU, GPU_CL);
#endif
#ifdef BOXCODER_OP
LOAD_OP1(box_coder, CPU);
LOAD_OP2(box_coder, CPU, GPU_CL);
#endif
#ifdef CONCAT_OP
LOAD_OP2(concat, CPU, FPGA);
LOAD_OP3(concat, CPU, GPU_CL, FPGA);
#endif
#ifdef CONV_OP
LOAD_OP2(conv2d, CPU, FPGA);
LOAD_OP3(conv2d, CPU, GPU_CL, FPGA);
#endif
#ifdef LRN_OP
LOAD_OP1(lrn, CPU);
LOAD_OP2(lrn, CPU, GPU_CL);
#endif
#ifdef SIGMOID_OP
LOAD_OP1(sigmoid, CPU);
LOAD_OP2(sigmoid, CPU, GPU_CL);
#endif
#ifdef FUSION_FC_RELU_OP
LOAD_OP3(fusion_fc_relu, CPU, FPGA);
LOAD_OP3(fusion_fc_relu, CPU, GPU_CL, FPGA);
LOAD_FUSION_MATCHER(fusion_fc_relu);
#endif
#ifdef FUSION_ELEMENTWISEADDRELU_OP
LOAD_OP3(fusion_elementwise_add_relu, CPU, FPGA);
LOAD_OP3(fusion_elementwise_add_relu, CPU, GPU_CL, FPGA);
LOAD_FUSION_MATCHER(fusion_elementwise_add_relu);
#endif
#ifdef SPLIT_OP
LOAD_OP1(split, CPU);
LOAD_OP2(split, CPU, GPU_CL);
#endif
#ifdef RESIZE_OP
LOAD_OP1(resize, CPU);
LOAD_OP2(resize, CPU, GPU_CL);
#endif
#ifdef FUSION_CONVADDBNRELU_OP
LOAD_OP2(fusion_conv_add_bn_relu, CPU, FPGA);
LOAD_OP3(fusion_conv_add_bn_relu, CPU, GPU_CL, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_add_bn_relu);
#endif
#ifdef RESHAPE_OP
LOAD_OP1(reshape, CPU);
LOAD_OP2(reshape, CPU, GPU_CL);
#endif
#ifdef RESHAPE2_OP
LOAD_OP1(reshape2, CPU);
LOAD_OP2(reshape2, CPU, GPU_CL);
#endif
#ifdef TRANSPOSE_OP
LOAD_OP1(transpose, CPU);
LOAD_OP2(transpose, CPU, GPU_CL);
#endif
#ifdef TRANSPOSE2_OP
LOAD_OP1(transpose2, CPU);
LOAD_OP2(transpose2, CPU, GPU_CL);
#endif
#ifdef PRIORBOX_OP
LOAD_OP1(prior_box, CPU);
LOAD_OP2(prior_box, CPU, GPU_CL);
#endif
#ifdef FUSION_CONVADDRELU_OP
LOAD_OP2(fusion_conv_add_relu, CPU, FPGA);
LOAD_OP3(fusion_conv_add_relu, CPU, GPU_CL, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_add_relu);
#endif
#ifdef FUSION_CONVADD_OP
LOAD_OP1(fusion_conv_add, CPU);
LOAD_OP2(fusion_conv_add, CPU, GPU_CL);
LOAD_FUSION_MATCHER(fusion_conv_add);
#endif
#ifdef SOFTMAX_OP
LOAD_OP1(softmax, CPU);
LOAD_OP2(softmax, CPU, GPU_CL);
#endif
#ifdef SHAPE_OP
LOAD_OP1(shape, CPU);
LOAD_OP2(shape, CPU, GPU_CL);
#endif
#ifdef DEPTHWISECONV_OP
LOAD_OP1(depthwise_conv2d, CPU);
LOAD_OP2(depthwise_conv2d, CPU, GPU_CL);
#endif
#ifdef CONV_TRANSPOSE_OP
LOAD_OP1(conv2d_transpose, CPU);
LOAD_OP2(conv2d_transpose, CPU, GPU_CL);
#endif
#ifdef SCALE_OP
LOAD_OP1(scale, CPU);
LOAD_OP2(scale, CPU, GPU_CL);
#endif
#ifdef ELEMENTWISEADD_OP
LOAD_OP1(elementwise_add, CPU);
LOAD_OP2(elementwise_add, CPU, GPU_CL);
#endif
#ifdef PRELU_OP
LOAD_OP1(prelu, CPU);
LOAD_OP2(prelu, CPU, GPU_CL);
#endif
#ifdef FLATTEN_OP
LOAD_OP1(flatten, CPU);
LOAD_OP2(flatten, CPU, GPU_CL);
#endif
#ifdef FUSION_CONVBNADDRELU_OP
LOAD_OP2(fusion_conv_bn_add_relu, CPU, FPGA);
LOAD_OP3(fusion_conv_bn_add_relu, CPU, GPU_CL, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_bn_add_relu);
#endif
#ifdef FUSION_CONVBNRELU_OP
LOAD_OP2(fusion_conv_bn_relu, CPU, FPGA);
LOAD_OP3(fusion_conv_bn_relu, CPU, GPU_CL, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_bn_relu);
#endif
#ifdef GRU_OP
LOAD_OP1(gru, CPU);
LOAD_OP2(gru, CPU, GPU_CL);
#endif
#ifdef GRU_UNIT_OP
LOAD_OP1(gru_unit, CPU);
LOAD_OP2(gru_unit, CPU, GPU_CL);
#endif
#ifdef FUSION_CONVADDBN_OP
LOAD_OP2(fusion_conv_add_bn, CPU, FPGA);
LOAD_OP3(fusion_conv_add_bn, CPU, GPU_CL, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_add_bn);
#endif
#ifdef DROPOUT_OP
LOAD_OP2(dropout, CPU, FPGA);
LOAD_OP3(dropout, CPU, GPU_CL, FPGA);
#endif
#ifdef FUSION_DWCONVBNRELU_OP
LOAD_OP1(fusion_dwconv_bn_relu, CPU);
LOAD_OP2(fusion_dwconv_bn_relu, CPU, GPU_CL);
LOAD_FUSION_MATCHER(fusion_dwconv_bn_relu);
#endif
#ifdef CRF_OP
LOAD_OP1(crf_decoding, CPU);
LOAD_OP2(crf_decoding, CPU, GPU_CL);
#endif
#ifdef MUL_OP
LOAD_OP1(mul, CPU);
LOAD_OP2(mul, CPU, GPU_CL);
#endif
#ifdef NORM_OP
LOAD_OP1(norm, CPU);
LOAD_OP2(norm, CPU, GPU_CL);
#endif
#ifdef RELU_OP
LOAD_OP1(relu, CPU);
LOAD_OP1(relu6, CPU);
LOAD_OP2(relu, CPU, GPU_CL);
LOAD_OP2(relu6, CPU, GPU_CL);
#endif
#ifdef IM2SEQUENCE_OP
LOAD_OP1(im2sequence, CPU);
LOAD_OP2(im2sequence, CPU, GPU_CL);
#endif
#ifdef LOOKUP_OP
LOAD_OP1(lookup_table, CPU);
LOAD_OP2(lookup_table, CPU, GPU_CL);
#endif
#ifdef FUSION_FC_OP
LOAD_OP2(fusion_fc, CPU, FPGA);
LOAD_OP3(fusion_fc, CPU, GPU_CL, FPGA);
LOAD_FUSION_MATCHER(fusion_fc);
#endif
#ifdef POOL_OP
LOAD_OP2(pool2d, CPU, FPGA);
LOAD_OP3(pool2d, CPU, GPU_CL, FPGA);
#endif
#ifdef MULTICLASSNMS_OP
LOAD_OP1(multiclass_nms, CPU);
LOAD_OP2(multiclass_nms, CPU, GPU_CL);
#endif
#ifdef POLYGONBOXTRANSFORM_OP
LOAD_OP1(polygon_box_transform, CPU);
LOAD_OP2(polygon_box_transform, CPU, GPU_CL);
#endif
#ifdef SUM_OP
LOAD_OP1(sum, CPU);
LOAD_OP2(sum, CPU, GPU_CL);
#endif
#ifdef ELEMENTWISEMUL_OP
LOAD_OP1(elementwise_mul, CPU);
LOAD_OP2(elementwise_mul, CPU, GPU_CL);
#endif
#ifdef SLICE_OP
LOAD_OP1(slice, CPU);
LOAD_OP2(slice, CPU, GPU_CL);
#endif
#ifdef FUSION_CONVBN_OP
LOAD_OP2(fusion_conv_bn, CPU, FPGA);
LOAD_OP3(fusion_conv_bn, CPU, GPU_CL, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_bn);
#endif
#ifdef ELEMENTWISESUB_OP
LOAD_OP1(elementwise_sub, CPU)
LOAD_OP2(elementwise_sub, CPU, GPU_CL)
#endif
#ifdef TOP_K_OP
LOAD_OP1(top_k, CPU)
LOAD_OP2(top_k, CPU, GPU_CL)
#endif
#ifdef CAST_OP
LOAD_OP1(cast, CPU)
LOAD_OP2(cast, CPU, GPU_CL)
#endif
#ifdef QUANT_OP
LOAD_OP1(quantize, CPU);
LOAD_OP2(quantize, CPU, GPU_CL);
#endif
#ifdef DEQUANT_OP
LOAD_OP1(dequantize, CPU);
LOAD_OP2(dequantize, CPU, GPU_CL);
#endif
#ifdef FUSION_DEQUANT_BN_OP
LOAD_OP1(fusion_dequant_bn, CPU);
LOAD_OP2(fusion_dequant_bn, CPU, GPU_CL);
LOAD_FUSION_MATCHER(fusion_dequant_bn);
#endif
#ifdef FUSION_DEQUANT_ADD_BN_OP
LOAD_OP1(fusion_dequant_add_bn, CPU);
LOAD_OP2(fusion_dequant_add_bn, CPU, GPU_CL);
LOAD_FUSION_MATCHER(fusion_dequant_add_bn);
#endif
#ifdef FUSION_DEQUANT_BN_RELU_OP
LOAD_OP1(fusion_dequant_bn_relu, CPU);
LOAD_OP2(fusion_dequant_bn_relu, CPU, GPU_CL);
LOAD_FUSION_MATCHER(fusion_dequant_bn_relu);
#endif
#ifdef FUSION_DEQUANT_ADD_BN_RELU_OP
LOAD_OP1(fusion_dequant_add_bn_relu, CPU);
LOAD_OP2(fusion_dequant_add_bn_relu, CPU, GPU_CL);
LOAD_FUSION_MATCHER(fusion_dequant_add_bn_relu);
#endif
#ifdef FUSION_DEQUANT_ADD_BN_QUANT_OP
LOAD_OP1(fusion_dequant_add_bn_quant, CPU);
LOAD_OP2(fusion_dequant_add_bn_quant, CPU, GPU_CL);
LOAD_FUSION_MATCHER(fusion_dequant_add_bn_quant);
#endif
#ifdef FUSION_DEQUANT_ADD_BN_RELU_QUANT_OP
LOAD_OP1(fusion_dequant_add_bn_relu_quant, CPU);
LOAD_OP2(fusion_dequant_add_bn_relu_quant, CPU, GPU_CL);
LOAD_FUSION_MATCHER(fusion_dequant_add_bn_relu_quant);
#endif
#ifdef SEQUENCE_EXPAND_OP
LOAD_OP1(sequence_expand, CPU);
LOAD_OP2(sequence_expand, CPU, GPU_CL);
#endif
#ifdef SEQUENCE_POOL_OP
LOAD_OP1(sequence_pool, CPU);
LOAD_OP2(sequence_pool, CPU, GPU_CL);
#endif
#ifdef LOG_OP
LOAD_OP1(log, CPU);
LOAD_OP2(log, CPU, GPU_CL);
#endif
#ifdef LOD_RESET_OP
LOAD_OP1(lod_reset, CPU);
LOAD_OP2(lod_reset, CPU, GPU_CL);
#endif
#ifdef LESS_THAN_OP
LOAD_OP1(less_than, CPU);
LOAD_OP2(less_than, CPU, GPU_CL);
#endif
#ifdef LOGICAL_AND_OP
LOAD_OP1(logical_and, CPU);
LOAD_OP2(logical_and, CPU, GPU_CL);
#endif
#ifdef LOGICAL_OR_OP
LOAD_OP1(logical_or, CPU);
LOAD_OP2(logical_or, CPU, GPU_CL);
#endif
#ifdef LOGICAL_NOT_OP
LOAD_OP1(logical_not, CPU);
LOAD_OP2(logical_not, CPU, GPU_CL);
#endif
#ifdef LOGICAL_XOR_OP
LOAD_OP1(logical_xor, CPU);
LOAD_OP2(logical_xor, CPU, GPU_CL);
#endif
#ifdef WHILE_OP
LOAD_OP1(while, CPU);
LOAD_OP2(while, CPU, GPU_CL);
#endif
#ifdef WRITE_TO_ARRAY_OP
LOAD_OP1(write_to_array, CPU);
LOAD_OP2(write_to_array, CPU, GPU_CL);
#endif
#ifdef READ_FROM_ARRAY_OP
LOAD_OP1(read_from_array, CPU);
LOAD_OP2(read_from_array, CPU, GPU_CL);
#endif
#ifdef IS_EMPTY_OP
LOAD_OP1(is_empty, CPU);
LOAD_OP2(is_empty, CPU, GPU_CL);
#endif
#ifdef INCREMENT_OP
LOAD_OP1(increment, CPU);
LOAD_OP2(increment, CPU, GPU_CL);
#endif
#ifdef ANCHOR_GENERATOR_OP
LOAD_OP1(anchor_generator, CPU);
LOAD_OP2(anchor_generator, CPU, GPU_CL);
#endif
#ifdef PROPOSAL_OP
LOAD_OP1(generate_proposals, CPU);
LOAD_OP2(generate_proposals, CPU, GPU_CL);
#endif
#ifdef PSROI_POOL_OP
LOAD_OP1(psroi_pool, CPU);
LOAD_OP2(psroi_pool, CPU, GPU_CL);
#endif
#ifdef ROI_PERSPECTIVE_OP
LOAD_OP1(roi_perspective_transform, CPU);
LOAD_OP2(roi_perspective_transform, CPU, GPU_CL);
#endif
#ifdef BEAM_SEARCH_OP
LOAD_OP1(beam_search, CPU);
LOAD_OP2(beam_search, CPU, GPU_CL);
#endif
#ifdef BEAM_SEARCH_DECODE_OP
LOAD_OP1(beam_search_decode, CPU);
LOAD_OP2(beam_search_decode, CPU, GPU_CL);
#endif
#ifdef PAD2D_OP
LOAD_OP1(pad2d, CPU);
LOAD_OP2(pad2d, CPU, GPU_CL);
#endif
#ifdef ONE_HOT_OP
LOAD_OP1(one_hot, CPU);
LOAD_OP2(one_hot, CPU, GPU_CL);
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册