diff --git a/paddle/fluid/inference/anakin/convert/activation.cc b/paddle/fluid/inference/anakin/convert/activation.cc index 6e52357483d754b1ba1ba52de994353859e11a49..523571f1aa8b5a9d17f97a1fd765fe9f1ac95b22 100644 --- a/paddle/fluid/inference/anakin/convert/activation.cc +++ b/paddle/fluid/inference/anakin/convert/activation.cc @@ -43,47 +43,22 @@ void ActivationOpConverter::operator()( auto output_name = op_desc.Output("Out").front(); this->engine_->AddOp(op_name, "Activation", {input_name}, {output_name}); this->engine_->AddOpAttr(op_name, "type", anakin_op_type_); + + if (op_type_ == "swish") { + float beta = boost::get(op_desc.GetAttr("beta")); + this->engine_->AddOpAttr(op_name, "clip_relu_num", beta); + } + if (op_type_ == "relu6") { + float threshold = boost::get(op_desc.GetAttr("threshold")); + this->engine_->AddOpAttr(op_name, "clip_relu_num", threshold); + } } } // namespace anakin } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using sigmoid_nv_fp32 = - ::paddle::inference::anakin::SigmoidOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using sigmoid_nv_int8 = - ::paddle::inference::anakin::SigmoidOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; -using tanh_nv_fp32 = - ::paddle::inference::anakin::TanhOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using tanh_nv_int8 = - ::paddle::inference::anakin::TanhOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; - -REGISTER_CUDA_ANAKIN_OP_CONVERTER(sigmoid, sigmoid_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(sigmoid, sigmoid_nv_int8); -REGISTER_CUDA_ANAKIN_OP_CONVERTER(tanh, tanh_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(tanh, tanh_nv_int8); -#endif - -using sigmoid_cpu_fp32 = - ::paddle::inference::anakin::SigmoidOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using sigmoid_cpu_int8 = - ::paddle::inference::anakin::SigmoidOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; -using tanh_cpu_fp32 = - ::paddle::inference::anakin::TanhOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using tanh_cpu_int8 = - ::paddle::inference::anakin::TanhOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; - -REGISTER_CPU_ANAKIN_OP_CONVERTER(sigmoid, sigmoid_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(sigmoid, sigmoid_cpu_int8); - -REGISTER_CPU_ANAKIN_OP_CONVERTER(tanh, tanh_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(tanh, tanh_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(sigmoid, SigmoidOpConverter); +REGISTER_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter); +REGISTER_ANAKIN_OP_CONVERTER(swish, SwishOpConverter); +REGISTER_ANAKIN_OP_CONVERTER(relu6, Relu6OpConverter); diff --git a/paddle/fluid/inference/anakin/convert/activation.h b/paddle/fluid/inference/anakin/convert/activation.h index 021ec4c7fdf3e9115b67e76345427b11f30a7abd..a2475e492c408008fd7b22815a03aedfd3d04650 100644 --- a/paddle/fluid/inference/anakin/convert/activation.h +++ b/paddle/fluid/inference/anakin/convert/activation.h @@ -37,7 +37,9 @@ class ActivationOpConverter : public AnakinOpConverter { std::string op_type_; std::string anakin_op_type_; std::map anakin_op_types_{{"tanh", "TanH"}, - {"sigmoid", "Sigmoid"}}; + {"sigmoid", "Sigmoid"}, + {"relu6", "ClippedRelu"}, + {"swish", "Swish"}}; }; template @@ -52,6 +54,19 @@ class SigmoidOpConverter : public ActivationOpConverter { SigmoidOpConverter() : ActivationOpConverter("sigmoid") {} }; + +template +class Relu6OpConverter : public ActivationOpConverter { + public: + Relu6OpConverter() : ActivationOpConverter("relu6") {} +}; + +template +class SwishOpConverter : public ActivationOpConverter { + public: + SwishOpConverter() : ActivationOpConverter("swish") {} +}; + } // namespace anakin } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/affine_channel.cc b/paddle/fluid/inference/anakin/convert/affine_channel.cc index a3abca0a84f66ffade758008ed7add300226de75..534e7dca81db959115283d65018ec33cc7a0924c 100644 --- a/paddle/fluid/inference/anakin/convert/affine_channel.cc +++ b/paddle/fluid/inference/anakin/convert/affine_channel.cc @@ -52,22 +52,4 @@ void AffineChannelOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using affine_channel_nv_fp32 = - ::paddle::inference::anakin::AffineChannelOpConverter< - ::anakin::saber::NV, ::anakin::Precision::FP32>; -using affine_channel_nv_int8 = - ::paddle::inference::anakin::AffineChannelOpConverter< - ::anakin::saber::NV, ::anakin::Precision::INT8>; -REGISTER_CUDA_ANAKIN_OP_CONVERTER(affine_channel, affine_channel_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(affine_channel, affine_channel_nv_int8); -#endif - -using affine_channel_cpu_fp32 = - ::paddle::inference::anakin::AffineChannelOpConverter< - ::anakin::saber::X86, ::anakin::Precision::FP32>; -using affine_channel_cpu_int8 = - ::paddle::inference::anakin::AffineChannelOpConverter< - ::anakin::saber::X86, ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(affine_channel, affine_channel_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(affine_channel, affine_channel_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(affine_channel, AffineChannelOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/batch_norm.cc b/paddle/fluid/inference/anakin/convert/batch_norm.cc index fa7f3bd79f2807f31cd1a4f0e39770e0adc2bdf0..b41f5dc925208d38ae233f0b3d1ca425537b9b47 100644 --- a/paddle/fluid/inference/anakin/convert/batch_norm.cc +++ b/paddle/fluid/inference/anakin/convert/batch_norm.cc @@ -82,18 +82,4 @@ void BatchNormOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using bn_nv_fp32 = ::paddle::inference::anakin::BatchNormOpConverter< - ::anakin::saber::NV, ::anakin::Precision::FP32>; -using bn_nv_int8 = ::paddle::inference::anakin::BatchNormOpConverter< - ::anakin::saber::NV, ::anakin::Precision::INT8>; -REGISTER_CUDA_ANAKIN_OP_CONVERTER(batch_norm, bn_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(batch_norm, bn_nv_int8); -#endif - -using bn_cpu_fp32 = ::paddle::inference::anakin::BatchNormOpConverter< - ::anakin::saber::X86, ::anakin::Precision::FP32>; -using bn_cpu_int8 = ::paddle::inference::anakin::BatchNormOpConverter< - ::anakin::saber::X86, ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(batch_norm, bn_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(batch_norm, bn_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(batch_norm, BatchNormOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/concat.cc b/paddle/fluid/inference/anakin/convert/concat.cc index 6655c2f047a0da0f34588b66a46d1c04217c181d..584a82ead43fa75f0117cf524151bbd75cf54ba6 100644 --- a/paddle/fluid/inference/anakin/convert/concat.cc +++ b/paddle/fluid/inference/anakin/convert/concat.cc @@ -38,22 +38,4 @@ void ConcatOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using concat_nv_fp32 = - ::paddle::inference::anakin::ConcatOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using concat_nv_int8 = - ::paddle::inference::anakin::ConcatOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; -REGISTER_CUDA_ANAKIN_OP_CONVERTER(concat, concat_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(concat, concat_nv_int8); - -#endif -using concat_cpu_fp32 = - ::paddle::inference::anakin::ConcatOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using concat_cpu_int8 = - ::paddle::inference::anakin::ConcatOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(concat, concat_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(concat, concat_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(concat, ConcatOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/conv2d.cc b/paddle/fluid/inference/anakin/convert/conv2d.cc index e2ea6290fab1c84978a1805c042d4aaac4278d85..70e0adf5ead45dc93c31f5d8aecffd7213b35954 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d.cc +++ b/paddle/fluid/inference/anakin/convert/conv2d.cc @@ -105,22 +105,4 @@ void Conv2dOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using conv2d_nv_fp32 = - ::paddle::inference::anakin::Conv2dOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using conv2d_nv_int8 = - ::paddle::inference::anakin::Conv2dOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; -REGISTER_CUDA_ANAKIN_OP_CONVERTER(conv2d, conv2d_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(conv2d, conv2d_nv_int8); -#endif - -using conv2d_cpu_fp32 = - ::paddle::inference::anakin::Conv2dOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using conv2d_cpu_int8 = - ::paddle::inference::anakin::Conv2dOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(conv2d, conv2d_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(conv2d, conv2d_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(conv2d, Conv2dOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc index a557c35475d374cc4c4adf795b7764bf0e8bd399..a1568b8bdeeb93790ecc5f37844e7bf4b8892993 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc +++ b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc @@ -111,22 +111,4 @@ void Conv2dFusionOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using conv2d_fusion_nv_fp32 = - ::paddle::inference::anakin::Conv2dFusionOpConverter< - ::anakin::saber::NV, ::anakin::Precision::FP32>; -using conv2d_fusion_nv_int8 = - ::paddle::inference::anakin::Conv2dFusionOpConverter< - ::anakin::saber::NV, ::anakin::Precision::INT8>; -REGISTER_CUDA_ANAKIN_OP_CONVERTER(conv2d_fusion, conv2d_fusion_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(conv2d_fusion, conv2d_fusion_nv_int8); -#endif -using conv2d_fusion_cpu_fp32 = - ::paddle::inference::anakin::Conv2dFusionOpConverter< - ::anakin::saber::X86, ::anakin::Precision::FP32>; -using conv2d_fusion_cpu_int8 = - ::paddle::inference::anakin::Conv2dFusionOpConverter< - ::anakin::saber::X86, ::anakin::Precision::INT8>; - -REGISTER_CPU_ANAKIN_OP_CONVERTER(conv2d_fusion, conv2d_fusion_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(conv2d_fusion, conv2d_fusion_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(conv2d_fusion, Conv2dFusionOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/density_prior_box.cc b/paddle/fluid/inference/anakin/convert/density_prior_box.cc index 92d147708bf6475127aa7a31910fa97134d6076b..5bbaeb57a7da46adfaa47fb696b4b73c8e33c7f0 100644 --- a/paddle/fluid/inference/anakin/convert/density_prior_box.cc +++ b/paddle/fluid/inference/anakin/convert/density_prior_box.cc @@ -108,25 +108,5 @@ void DensityPriorBoxOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using ds_pr_nv_fp32 = ::paddle::inference::anakin::DensityPriorBoxOpConverter< - ::anakin::saber::NV, ::anakin::Precision::FP32>; -using ds_pr_nv_int8 = ::paddle::inference::anakin::DensityPriorBoxOpConverter< - ::anakin::saber::NV, ::anakin::Precision::INT8>; - -REGISTER_CUDA_ANAKIN_OP_CONVERTER(density_prior_box, ds_pr_nv_fp32); -REGISTER_CUDA_ANAKIN_OP_CONVERTER(prior_box, ds_pr_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(density_prior_box, ds_pr_nv_int8); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(prior_box, ds_pr_nv_int8); -#endif - -using ds_pr_cpu_fp32 = ::paddle::inference::anakin::DensityPriorBoxOpConverter< - ::anakin::saber::X86, ::anakin::Precision::FP32>; -using ds_pr_cpu_int8 = ::paddle::inference::anakin::DensityPriorBoxOpConverter< - ::anakin::saber::X86, ::anakin::Precision::INT8>; - -REGISTER_CPU_ANAKIN_OP_CONVERTER(density_prior_box, ds_pr_cpu_fp32); -REGISTER_CPU_ANAKIN_OP_CONVERTER(prior_box, ds_pr_cpu_fp32); - -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(density_prior_box, ds_pr_cpu_int8); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(prior_box, ds_pr_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(density_prior_box, DensityPriorBoxOpConverter); +REGISTER_ANAKIN_OP_CONVERTER(prior_box, DensityPriorBoxOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/detection_out.cc b/paddle/fluid/inference/anakin/convert/detection_out.cc index c06a8860e167f694b720dc038aefe8b11566df3b..73dd6f2832541ecda119702f1779363e2950e413 100644 --- a/paddle/fluid/inference/anakin/convert/detection_out.cc +++ b/paddle/fluid/inference/anakin/convert/detection_out.cc @@ -66,22 +66,4 @@ void DetectionOutOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using detection_out_nv_fp32 = - ::paddle::inference::anakin::DetectionOutOpConverter< - ::anakin::saber::NV, ::anakin::Precision::FP32>; -using detection_out_nv_int8 = - ::paddle::inference::anakin::DetectionOutOpConverter< - ::anakin::saber::NV, ::anakin::Precision::INT8>; -REGISTER_CUDA_ANAKIN_OP_CONVERTER(detection_out, detection_out_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(detection_out, detection_out_nv_int8); -#endif - -using detection_out_cpu_fp32 = - ::paddle::inference::anakin::DetectionOutOpConverter< - ::anakin::saber::X86, ::anakin::Precision::FP32>; -using detection_out_cpu_int8 = - ::paddle::inference::anakin::DetectionOutOpConverter< - ::anakin::saber::X86, ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(detection_out, detection_out_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(detection_out, detection_out_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(detection_out, DetectionOutOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/dropout.cc b/paddle/fluid/inference/anakin/convert/dropout.cc index 872ebaba3c0bcbcd467452d0da344da7dc3f6dde..6c5f80b5f8e07fd501348632ff3b4cda58de248c 100644 --- a/paddle/fluid/inference/anakin/convert/dropout.cc +++ b/paddle/fluid/inference/anakin/convert/dropout.cc @@ -52,22 +52,4 @@ void DropoutOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using dropout_nv_fp32 = - ::paddle::inference::anakin::DropoutOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using dropout_nv_int8 = - ::paddle::inference::anakin::DropoutOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; -REGISTER_CUDA_ANAKIN_OP_CONVERTER(dropout, dropout_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(dropout, dropout_nv_int8); -#endif - -using dropout_cpu_fp32 = - ::paddle::inference::anakin::DropoutOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using dropout_cpu_int8 = - ::paddle::inference::anakin::DropoutOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(dropout, dropout_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(dropout, dropout_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(dropout, DropoutOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/elementwise.cc b/paddle/fluid/inference/anakin/convert/elementwise.cc index e3ea6b2a97dd6a7f556239ed122c31d44a222155..dd32baa0b90018c8e0175fa9cae85a9fbeccedf0 100644 --- a/paddle/fluid/inference/anakin/convert/elementwise.cc +++ b/paddle/fluid/inference/anakin/convert/elementwise.cc @@ -71,32 +71,5 @@ void ElementwiseMulOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using elet_nv_fp32 = ::paddle::inference::anakin::ElementwiseAddOpConverter< - ::anakin::saber::NV, ::anakin::Precision::FP32>; -using elet_nv_int8 = ::paddle::inference::anakin::ElementwiseAddOpConverter< - ::anakin::saber::NV, ::anakin::Precision::INT8>; -using eletmul_nv_fp32 = ::paddle::inference::anakin::ElementwiseMulOpConverter< - ::anakin::saber::NV, ::anakin::Precision::FP32>; -using eletmul_nv_int8 = ::paddle::inference::anakin::ElementwiseMulOpConverter< - ::anakin::saber::NV, ::anakin::Precision::INT8>; - -REGISTER_CUDA_ANAKIN_OP_CONVERTER(elementwise_add, elet_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(elementwise_add, elet_nv_int8); -REGISTER_CUDA_ANAKIN_OP_CONVERTER(elementwise_mul, eletmul_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(elementwise_mul, eletmul_nv_int8); - -#endif -using elet_cpu_fp32 = ::paddle::inference::anakin::ElementwiseAddOpConverter< - ::anakin::saber::X86, ::anakin::Precision::FP32>; -using elet_cpu_int8 = ::paddle::inference::anakin::ElementwiseAddOpConverter< - ::anakin::saber::X86, ::anakin::Precision::INT8>; -using eletmul_cpu_fp32 = ::paddle::inference::anakin::ElementwiseMulOpConverter< - ::anakin::saber::X86, ::anakin::Precision::FP32>; -using eletmul_cpu_int8 = ::paddle::inference::anakin::ElementwiseMulOpConverter< - ::anakin::saber::X86, ::anakin::Precision::INT8>; - -REGISTER_CPU_ANAKIN_OP_CONVERTER(elementwise_add, elet_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(elementwise_add, elet_cpu_int8); -REGISTER_CPU_ANAKIN_OP_CONVERTER(elementwise_mul, eletmul_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(elementwise_mul, eletmul_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(elementwise_add, ElementwiseAddOpConverter); +REGISTER_ANAKIN_OP_CONVERTER(elementwise_mul, ElementwiseMulOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/fc.cc b/paddle/fluid/inference/anakin/convert/fc.cc index 04af311992210ed0977bb63f3a08c27a36cc6ca6..0621e3377b34660e3c2f1d1b83847bd46b5bd26d 100644 --- a/paddle/fluid/inference/anakin/convert/fc.cc +++ b/paddle/fluid/inference/anakin/convert/fc.cc @@ -117,40 +117,5 @@ void FcBaseOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using mul_nv_fp32 = - ::paddle::inference::anakin::MulOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using fc_nv_fp32 = - ::paddle::inference::anakin::FcOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using mul_nv_int8 = - ::paddle::inference::anakin::MulOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; -using fc_nv_int8 = - ::paddle::inference::anakin::FcOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; - -REGISTER_CUDA_ANAKIN_OP_CONVERTER(mul, mul_nv_fp32); -REGISTER_CUDA_ANAKIN_OP_CONVERTER(fc, fc_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(mul, mul_nv_int8); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(fc, fc_nv_int8); -#endif - -using mul_cpu_fp32 = - ::paddle::inference::anakin::MulOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using fc_cpu_fp32 = - ::paddle::inference::anakin::FcOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using mul_cpu_int8 = - ::paddle::inference::anakin::MulOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; -using fc_cpu_int8 = - ::paddle::inference::anakin::FcOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; - -REGISTER_CPU_ANAKIN_OP_CONVERTER(mul, mul_cpu_fp32); -REGISTER_CPU_ANAKIN_OP_CONVERTER(fc, fc_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(mul, mul_cpu_int8); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(fc, fc_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(mul, MulOpConverter); +REGISTER_ANAKIN_OP_CONVERTER(fc, FcOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/flatten.cc b/paddle/fluid/inference/anakin/convert/flatten.cc index 7ef9e11b091ffd645a7e989949df9582b25d8bce..7ce519a4de36c950bef1b4e856452828398aa57e 100644 --- a/paddle/fluid/inference/anakin/convert/flatten.cc +++ b/paddle/fluid/inference/anakin/convert/flatten.cc @@ -45,22 +45,4 @@ void FlattenOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using flatten_nv_fp32 = - ::paddle::inference::anakin::FlattenOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using flatten_nv_int8 = - ::paddle::inference::anakin::FlattenOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; - -REGISTER_CUDA_ANAKIN_OP_CONVERTER(flatten, flatten_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(flatten, flatten_nv_int8); -#endif -using flatten_cpu_fp32 = - ::paddle::inference::anakin::FlattenOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using flatten_cpu_int8 = - ::paddle::inference::anakin::FlattenOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(flatten, flatten_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(flatten, flatten_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(flatten, FlattenOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/im2sequence.cc b/paddle/fluid/inference/anakin/convert/im2sequence.cc index 37f3f425a4fedd14693082e80e42a03ec693f509..5a4e3e61c5e4e40d8fe98fba8e098d89d916dde1 100644 --- a/paddle/fluid/inference/anakin/convert/im2sequence.cc +++ b/paddle/fluid/inference/anakin/convert/im2sequence.cc @@ -55,18 +55,4 @@ void Im2SequenceConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using im2sequence_nv_fp32 = ::paddle::inference::anakin::Im2SequenceConverter< - ::anakin::saber::NV, ::anakin::Precision::FP32>; -using im2sequence_nv_int8 = ::paddle::inference::anakin::Im2SequenceConverter< - ::anakin::saber::NV, ::anakin::Precision::INT8>; -REGISTER_CUDA_ANAKIN_OP_CONVERTER(im2sequence, im2sequence_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(im2sequence, im2sequence_nv_int8); -#endif - -using im2sequence_cpu_fp32 = ::paddle::inference::anakin::Im2SequenceConverter< - ::anakin::saber::X86, ::anakin::Precision::FP32>; -using im2sequence_cpu_int8 = ::paddle::inference::anakin::Im2SequenceConverter< - ::anakin::saber::X86, ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(im2sequence, im2sequence_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(im2sequence, im2sequence_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(im2sequence, Im2SequenceConverter); diff --git a/paddle/fluid/inference/anakin/convert/op_converter.h b/paddle/fluid/inference/anakin/convert/op_converter.h index 6ff49c4a820ccb85658b1e480bb70223aa326d9a..a6ae51bd4b1c67104c732e12a66f74d7e4580bb5 100644 --- a/paddle/fluid/inference/anakin/convert/op_converter.h +++ b/paddle/fluid/inference/anakin/convert/op_converter.h @@ -183,25 +183,37 @@ template class AnakinOpConverter<::anakin::saber::X86, return 0; \ } -#define REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__) \ - REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CUDA, \ - ::anakin::saber::NV, FP32, \ - ::anakin::Precision::FP32) - -#define REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(op_type__, Converter__) \ - REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CUDA, \ - ::anakin::saber::NV, INT8, \ - ::anakin::Precision::INT8) - -#define REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__) \ - REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CPU, \ - ::anakin::saber::X86, FP32, \ - ::anakin::Precision::FP32) - -#define REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(op_type__, Converter__) \ - REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CPU, \ - ::anakin::saber::X86, INT8, \ - ::anakin::Precision::INT8) +#define WRAP(...) __VA_ARGS__ + +#define REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, \ + precision_type__) \ + REGISTER_ANAKIN_OP_CONVERTER_BASE( \ + op_type__, \ + ::paddle::inference::anakin::Converter__, \ + CUDA, ::anakin::saber::NV, precision_type__, \ + ::anakin::Precision::precision_type__) + +#define REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, \ + precision_type__) \ + REGISTER_ANAKIN_OP_CONVERTER_BASE( \ + op_type__, \ + ::paddle::inference::anakin::Converter__, \ + CPU, ::anakin::saber::X86, precision_type__, \ + ::anakin::Precision::precision_type__) + +#ifdef PADDLE_WITH_CUDA +#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \ + REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \ + REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8); \ + REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \ + REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8) +#else +#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \ + REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \ + REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8) +#endif #define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__, precision_type__) \ extern int Touch_anakin_##op_type__##_##place_type__##_##precision_type__(); \ diff --git a/paddle/fluid/inference/anakin/convert/pool2d.cc b/paddle/fluid/inference/anakin/convert/pool2d.cc index 436741b43b7058901ce4041a554976f072b519e1..11e7c717fd689b27a619a33bcac4037b30f97af8 100644 --- a/paddle/fluid/inference/anakin/convert/pool2d.cc +++ b/paddle/fluid/inference/anakin/convert/pool2d.cc @@ -71,22 +71,4 @@ void Pool2dOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using pool2d_nv_float32 = - ::paddle::inference::anakin::Pool2dOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using pool2d_nv_int8 = - ::paddle::inference::anakin::Pool2dOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; -REGISTER_CUDA_ANAKIN_OP_CONVERTER(pool2d, pool2d_nv_float32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(pool2d, pool2d_nv_int8); -#endif - -using pool2d_cpu_float32 = - ::paddle::inference::anakin::Pool2dOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using pool2d_cpu_int8 = - ::paddle::inference::anakin::Pool2dOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(pool2d, pool2d_cpu_float32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(pool2d, pool2d_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(pool2d, Pool2dOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/relu.cc b/paddle/fluid/inference/anakin/convert/relu.cc index 6d456ccfdcd1a1edc4411b8dc702fb9deb3bb246..00853406634bdf5c488d21aca8289826f3a93a16 100644 --- a/paddle/fluid/inference/anakin/convert/relu.cc +++ b/paddle/fluid/inference/anakin/convert/relu.cc @@ -57,36 +57,5 @@ void LeakyReluOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using relu_nv_fp32 = - ::paddle::inference::anakin::ReluOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using leaky_nv_fp32 = ::paddle::inference::anakin::LeakyReluOpConverter< - ::anakin::saber::NV, ::anakin::Precision::FP32>; -using relu_nv_int8 = - ::paddle::inference::anakin::ReluOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; -using leaky_nv_int8 = ::paddle::inference::anakin::LeakyReluOpConverter< - ::anakin::saber::NV, ::anakin::Precision::INT8>; - -REGISTER_CUDA_ANAKIN_OP_CONVERTER(relu, relu_nv_fp32); -REGISTER_CUDA_ANAKIN_OP_CONVERTER(leaky_relu, leaky_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(relu, relu_nv_int8); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(leaky_relu, leaky_nv_int8); - -#endif - -using relu_cpu_fp32 = - ::paddle::inference::anakin::ReluOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using leaky_cpu_fp32 = ::paddle::inference::anakin::LeakyReluOpConverter< - ::anakin::saber::X86, ::anakin::Precision::FP32>; -using relu_cpu_int8 = - ::paddle::inference::anakin::ReluOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; -using leaky_cpu_int8 = ::paddle::inference::anakin::LeakyReluOpConverter< - ::anakin::saber::X86, ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(relu, relu_cpu_fp32); -REGISTER_CPU_ANAKIN_OP_CONVERTER(leaky_relu, leaky_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(relu, relu_cpu_int8); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(leaky_relu, leaky_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(relu, ReluOpConverter); +REGISTER_ANAKIN_OP_CONVERTER(leaky_relu, LeakyReluOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/reshape.cc b/paddle/fluid/inference/anakin/convert/reshape.cc index b7b47e30b1c814a4004c9aa0a173dc16a18e8c54..d73736b7fecc758a2965f2d2afff9a808d6e2adc 100644 --- a/paddle/fluid/inference/anakin/convert/reshape.cc +++ b/paddle/fluid/inference/anakin/convert/reshape.cc @@ -46,22 +46,4 @@ void ReshapeOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using reshape_nv_fp32 = - ::paddle::inference::anakin::ReshapeOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using reshape_nv_int8 = - ::paddle::inference::anakin::ReshapeOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; -REGISTER_CUDA_ANAKIN_OP_CONVERTER(reshape, reshape_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(reshape, reshape_nv_int8); -#endif - -using reshape_cpu_fp32 = - ::paddle::inference::anakin::ReshapeOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using reshape_cpu_int8 = - ::paddle::inference::anakin::ReshapeOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(reshape, reshape_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(reshape, reshape_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(reshape, ReshapeOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/roi_align.cc b/paddle/fluid/inference/anakin/convert/roi_align.cc index 68d3bffd89d4336d4f5f0393c65fa77206317284..8702f638e10bbf72fa43d45e0042c16ffae447f1 100644 --- a/paddle/fluid/inference/anakin/convert/roi_align.cc +++ b/paddle/fluid/inference/anakin/convert/roi_align.cc @@ -51,22 +51,4 @@ void RoiAlignOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using roi_align_nv_fp32 = - ::paddle::inference::anakin::RoiAlignOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using roi_align_nv_int8 = - ::paddle::inference::anakin::RoiAlignOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; -REGISTER_CUDA_ANAKIN_OP_CONVERTER(roi_align, roi_align_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(roi_align, roi_align_nv_int8); -#endif - -using roi_align_cpu_fp32 = - ::paddle::inference::anakin::RoiAlignOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using roi_align_cpu_int8 = - ::paddle::inference::anakin::RoiAlignOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(roi_align, roi_align_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(roi_align, roi_align_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(roi_align, RoiAlignOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/scale.cc b/paddle/fluid/inference/anakin/convert/scale.cc index cdfdf86a9747e17231d07b24870399b7e67a9899..2559ec498c8ba423bf894b1ec67e24bd2567ff2b 100644 --- a/paddle/fluid/inference/anakin/convert/scale.cc +++ b/paddle/fluid/inference/anakin/convert/scale.cc @@ -49,22 +49,4 @@ void ScaleOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using scale_nv_fp32 = - ::paddle::inference::anakin::ScaleOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using scale_nv_int8 = - ::paddle::inference::anakin::ScaleOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; -REGISTER_CUDA_ANAKIN_OP_CONVERTER(scale, scale_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(scale, scale_nv_int8); -#endif - -using scale_cpu_fp32 = - ::paddle::inference::anakin::ScaleOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using scale_cpu_int8 = - ::paddle::inference::anakin::ScaleOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(scale, scale_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(scale, scale_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(scale, ScaleOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/softmax.cc b/paddle/fluid/inference/anakin/convert/softmax.cc index eb50e17e55f1176a37d2dd555a7d7399190abc44..a4dc5a9156b8f54cf8915e2a8829ada22d442ace 100644 --- a/paddle/fluid/inference/anakin/convert/softmax.cc +++ b/paddle/fluid/inference/anakin/convert/softmax.cc @@ -44,23 +44,4 @@ void SoftMaxOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using sm_nv_fp32 = - ::paddle::inference::anakin::SoftMaxOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using sm_nv_int8 = - ::paddle::inference::anakin::SoftMaxOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; - -REGISTER_CUDA_ANAKIN_OP_CONVERTER(softmax, sm_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(softmax, sm_nv_int8); -#endif - -using sm_cpu_fp32 = - ::paddle::inference::anakin::SoftMaxOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using sm_cpu_int8 = - ::paddle::inference::anakin::SoftMaxOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(softmax, sm_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(softmax, sm_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(softmax, SoftMaxOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/split.cc b/paddle/fluid/inference/anakin/convert/split.cc index b84860220fbe038171970d4f8c97ebc3df74af9c..e63edea94ae010f3bd2240fd21147642f647581e 100644 --- a/paddle/fluid/inference/anakin/convert/split.cc +++ b/paddle/fluid/inference/anakin/convert/split.cc @@ -55,23 +55,5 @@ void SplitOpConverter::operator()( } // namespace anakin } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using split_nv_fp32 = - ::paddle::inference::anakin::SplitOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using split_nv_int8 = - ::paddle::inference::anakin::SplitOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; -REGISTER_CUDA_ANAKIN_OP_CONVERTER(split, split_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(split, split_nv_int8); -#endif -using split_cpu_fp32 = - ::paddle::inference::anakin::SplitOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using split_cpu_int8 = - ::paddle::inference::anakin::SplitOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; - -REGISTER_CPU_ANAKIN_OP_CONVERTER(split, split_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(split, split_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(split, SplitOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/sum.cc b/paddle/fluid/inference/anakin/convert/sum.cc index 2bc4d124c905e570957a275ce3e1ac5cfdd6530a..870c07934090370a05ad5e8a2e68af8f314e25ae 100644 --- a/paddle/fluid/inference/anakin/convert/sum.cc +++ b/paddle/fluid/inference/anakin/convert/sum.cc @@ -47,22 +47,4 @@ void SumOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using sum_nv_fp32 = - ::paddle::inference::anakin::SumOpConverter<::anakin::saber::NV, - ::anakin::Precision::FP32>; -using sum_nv_int8 = - ::paddle::inference::anakin::SumOpConverter<::anakin::saber::NV, - ::anakin::Precision::INT8>; -REGISTER_CUDA_ANAKIN_OP_CONVERTER(sum, sum_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(sum, sum_nv_int8); -#endif - -using sum_cpu_fp32 = - ::paddle::inference::anakin::SumOpConverter<::anakin::saber::X86, - ::anakin::Precision::FP32>; -using sum_cpu_int8 = - ::paddle::inference::anakin::SumOpConverter<::anakin::saber::X86, - ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(sum, sum_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(sum, sum_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(sum, SumOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/test_activation_op.cc b/paddle/fluid/inference/anakin/convert/test_activation_op.cc index 67d3222d985b5df2559d43dcad79339df287bd49..4f898252d2798022d09f65e03b3cde35fcb6730c 100644 --- a/paddle/fluid/inference/anakin/convert/test_activation_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_activation_op.cc @@ -36,6 +36,14 @@ static void test_activation_op(const std::string& op_type, desc.SetInput("X", {"act-X"}); desc.SetOutput("Out", {"act-Out"}); + if (op_type == "swish") { + desc.SetAttr("beta", 1.0f); + } + + if (op_type == "relu6") { + desc.SetAttr("threshold", 6.0f); + } + LOG(INFO) << "set OP"; validator.SetOp(*desc.Proto()); LOG(INFO) << "execute"; @@ -55,6 +63,18 @@ TEST(tanh_op, gpu) { platform::CUDADeviceContext ctx(gpu_place); test_activation_op<::anakin::saber::NV>("tanh", ctx, true); } + +TEST(relu6_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_activation_op<::anakin::saber::NV>("relu6", ctx, true); +} + +TEST(swish_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_activation_op<::anakin::saber::NV>("swish", ctx, true); +} #endif /* @@ -69,6 +89,18 @@ TEST(tanh_op, cpu) { platform::CPUDeviceContext ctx(cpu_place); test_activation_op<::anakin::saber::X86>("tanh", ctx, false); } + +TEST(relu6_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_activation_op<::anakin::saber::X86>("relu6", ctx, false); +} + +TEST(swish_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_activation_op<::anakin::saber::X86>("swish", ctx, false); +} */ } // namespace anakin @@ -77,10 +109,16 @@ TEST(tanh_op, cpu) { USE_OP(sigmoid); USE_OP(tanh); +USE_OP(relu6); +USE_OP(swish); USE_CPU_ANAKIN_CONVERTER(sigmoid); USE_CPU_ANAKIN_CONVERTER(tanh); +USE_CPU_ANAKIN_CONVERTER(relu6); +USE_CPU_ANAKIN_CONVERTER(swish); #ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(sigmoid); USE_ANAKIN_CONVERTER(tanh); +USE_ANAKIN_CONVERTER(relu6); +USE_ANAKIN_CONVERTER(swish); #endif diff --git a/paddle/fluid/inference/anakin/convert/transpose.cc b/paddle/fluid/inference/anakin/convert/transpose.cc index 849bfc9ea3e490dafd1500f1912d22149b4d7baf..28071ca8449cdd61799011308a992eacd51dfd38 100644 --- a/paddle/fluid/inference/anakin/convert/transpose.cc +++ b/paddle/fluid/inference/anakin/convert/transpose.cc @@ -49,18 +49,4 @@ void TransposeOpConverter::operator()( } // namespace inference } // namespace paddle -#ifdef PADDLE_WITH_CUDA -using transpose_nv_fp32 = ::paddle::inference::anakin::TransposeOpConverter< - ::anakin::saber::NV, ::anakin::Precision::FP32>; -using transpose_nv_int8 = ::paddle::inference::anakin::TransposeOpConverter< - ::anakin::saber::NV, ::anakin::Precision::INT8>; -REGISTER_CUDA_ANAKIN_OP_CONVERTER(transpose, transpose_nv_fp32); -REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(transpose, transpose_nv_int8); -#endif - -using transpose_cpu_fp32 = ::paddle::inference::anakin::TransposeOpConverter< - ::anakin::saber::X86, ::anakin::Precision::FP32>; -using transpose_cpu_int8 = ::paddle::inference::anakin::TransposeOpConverter< - ::anakin::saber::X86, ::anakin::Precision::INT8>; -REGISTER_CPU_ANAKIN_OP_CONVERTER(transpose, transpose_cpu_fp32); -REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(transpose, transpose_cpu_int8); +REGISTER_ANAKIN_OP_CONVERTER(transpose, TransposeOpConverter); diff --git a/paddle/fluid/inference/anakin/op_teller.cc b/paddle/fluid/inference/anakin/op_teller.cc index 72064c1790da13543b9ea364cd0cdda050fcf36c..6cad00f8ecfe872924ed3804847cb22b8932b91d 100644 --- a/paddle/fluid/inference/anakin/op_teller.cc +++ b/paddle/fluid/inference/anakin/op_teller.cc @@ -46,6 +46,8 @@ struct SimpleOpTypeSetTeller : public Teller { teller_set.insert("prior_box"); teller_set.insert("leaky_relu"); teller_set.insert("affine_channel"); + teller_set.insert("relu6"); + teller_set.insert("swish"); } bool operator()(const std::string& op_type, diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index e1709fe2e674910f0f64cb3ead5bf90007508120..7552a576a655b9befca1fee221193bdf512f4cbc 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -893,4 +893,6 @@ USE_ANAKIN_CONVERTER(sum); USE_ANAKIN_CONVERTER(prior_box); USE_ANAKIN_CONVERTER(leaky_relu); USE_ANAKIN_CONVERTER(affine_channel); +USE_ANAKIN_CONVERTER(relu6); +USE_ANAKIN_CONVERTER(swish); #endif