diff --git a/cmake/anakin_subgraph.cmake b/cmake/anakin_subgraph.cmake index 4a7d32a63553df31e0928e7b30249ff3e809cba1..b5437e776d31e4d4fec5a79bf505202d192cd5ca 100644 --- a/cmake/anakin_subgraph.cmake +++ b/cmake/anakin_subgraph.cmake @@ -25,8 +25,9 @@ endif() if(ANAKIN_FOUND) message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ") + include_directories(${ANAKIN_ROOT}) include_directories(${ANAKIN_ROOT}/include) - include_directories(${ANAKIN_ROOT}/include/saber) + include_directories(${ANAKIN_ROOT}/saber) link_directories(${ANAKIN_ROOT}) add_definitions(-DPADDLE_WITH_ANAKIN) endif() diff --git a/paddle/fluid/inference/anakin/convert/activation.cc b/paddle/fluid/inference/anakin/convert/activation.cc index a9aeb19ffd5f04c03df593e8f48976e7fa6155ab..11f92c95217b37fc590a4f1aae514fc7b4305df7 100644 --- a/paddle/fluid/inference/anakin/convert/activation.cc +++ b/paddle/fluid/inference/anakin/convert/activation.cc @@ -16,16 +16,13 @@ #include #include -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; -using anakin::saber::NV; -using anakin::saber::Shape; - namespace paddle { namespace inference { namespace anakin { -ActivationOpConverter::ActivationOpConverter(const std::string &op_type) +template +ActivationOpConverter::ActivationOpConverter( + const std::string &op_type) : op_type_(op_type) { auto it = anakin_op_types_.find(op_type_); PADDLE_ENFORCE(it != anakin_op_types_.end(), @@ -33,10 +30,10 @@ ActivationOpConverter::ActivationOpConverter(const std::string &op_type) anakin_op_type_ = it->second; } -void ActivationOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void ActivationOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); @@ -44,13 +41,20 @@ void ActivationOpConverter::operator()(const framework::proto::OpDesc &op, auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); auto input_name = op_desc.Input("X").front(); auto output_name = op_desc.Output("Out").front(); - engine_->AddOp(op_name, "Activation", {input_name}, {output_name}); - engine_->AddOpAttr(op_name, "type", anakin_op_type_); + this->engine_->AddOp(op_name, "Activation", {input_name}, {output_name}); + this->engine_->AddOpAttr(op_name, "type", anakin_op_type_); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(sigmoid, SigmoidOpConverter); -REGISTER_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(sigmoid, + SigmoidOpConverter<::anakin::saber::NV>); +REGISTER_CUDA_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter<::anakin::saber::NV>); +#endif + +REGISTER_CPU_ANAKIN_OP_CONVERTER(sigmoid, + SigmoidOpConverter<::anakin::saber::X86>); +REGISTER_CPU_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/activation.h b/paddle/fluid/inference/anakin/convert/activation.h index 592a3d5bd9d1272aae8a13d0d0acc77f8990c6b3..b3fe4748641cf0d47e221dd4066c0f8a1960f047 100644 --- a/paddle/fluid/inference/anakin/convert/activation.h +++ b/paddle/fluid/inference/anakin/convert/activation.h @@ -22,7 +22,8 @@ namespace paddle { namespace inference { namespace anakin { -class ActivationOpConverter : public AnakinOpConverter { +template +class ActivationOpConverter : public AnakinOpConverter { public: explicit ActivationOpConverter(const std::string &op_type); @@ -39,14 +40,16 @@ class ActivationOpConverter : public AnakinOpConverter { {"sigmoid", "Sigmoid"}}; }; -class TanhOpConverter : public ActivationOpConverter { +template +class TanhOpConverter : public ActivationOpConverter { public: - TanhOpConverter() : ActivationOpConverter("tanh") {} + TanhOpConverter() : ActivationOpConverter("tanh") {} }; -class SigmoidOpConverter : public ActivationOpConverter { +template +class SigmoidOpConverter : public ActivationOpConverter { public: - SigmoidOpConverter() : ActivationOpConverter("sigmoid") {} + SigmoidOpConverter() : ActivationOpConverter("sigmoid") {} }; } // namespace anakin } // namespace inference diff --git a/paddle/fluid/inference/anakin/convert/affine_channel.cc b/paddle/fluid/inference/anakin/convert/affine_channel.cc index 7c886df082d121d1a07ebcae1e11376113698ed3..6bf913e7ffbc02f94f46dab7b1d2a34ed315a05d 100644 --- a/paddle/fluid/inference/anakin/convert/affine_channel.cc +++ b/paddle/fluid/inference/anakin/convert/affine_channel.cc @@ -18,19 +18,16 @@ #include using anakin::graph::GraphGlobalMem; +using anakin::PTuple; using anakin::AK_FLOAT; -using anakin::Precision; -using anakin::saber::NV; -using anakin::saber::X86; using anakin::saber::Shape; -using anakin::PBlock; -using anakin::PTuple; namespace paddle { namespace inference { namespace anakin { -void AffineChannelOpConverter::operator()( +template +void AffineChannelOpConverter::operator()( const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); @@ -59,7 +56,7 @@ void AffineChannelOpConverter::operator()( bias_tensor->Resize(bias_t->dims()); TensorCopySync((*bias_t), platform::CPUPlace(), bias_tensor.get()); - engine_->AddOp(op_name, "AffineChannel", {input_name}, {output_name}); + this->engine_->AddOp(op_name, "AffineChannel", {input_name}, {output_name}); // Generate the Scale parameter of Anakin. auto scale_shape = framework::vectorize2int(scale_t->dims()); @@ -67,15 +64,16 @@ void AffineChannelOpConverter::operator()( scale_shape.insert(scale_shape.begin(), 1); } Shape anakin_scale_shape(scale_shape); - auto *weight1 = GraphGlobalMem::Global().template new_block( - anakin_scale_shape); + auto *weight1 = + GraphGlobalMem::Global().template new_block( + anakin_scale_shape); float *scale_cpu_data = static_cast(weight1->h_tensor().mutable_data()); std::copy_n(scale_tensor->data(), scale_tensor->numel(), scale_cpu_data); weight1->d_tensor().set_shape(anakin_scale_shape); weight1->d_tensor().copy_from(weight1->h_tensor()); - engine_->AddOpAttr(op_name, "weight_1", *weight1); + this->engine_->AddOpAttr(op_name, "weight_1", *weight1); // Generate the Bias parameter of Anakin. auto bias_shape = framework::vectorize2int(bias_t->dims()); @@ -83,18 +81,24 @@ void AffineChannelOpConverter::operator()( bias_shape.insert(bias_shape.begin(), 1); } Shape anakin_bias_shape(bias_shape); - auto *weight2 = GraphGlobalMem::Global().template new_block( - anakin_bias_shape); + auto *weight2 = + GraphGlobalMem::Global().template new_block( + anakin_bias_shape); float *bias_cpu_data = static_cast(weight2->h_tensor().mutable_data()); std::copy_n(bias_tensor->data(), bias_tensor->numel(), bias_cpu_data); weight2->d_tensor().set_shape(anakin_bias_shape); weight2->d_tensor().copy_from(weight2->h_tensor()); - engine_->AddOpAttr(op_name, "weight_2", *weight2); + this->engine_->AddOpAttr(op_name, "weight_2", *weight2); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(affine_channel, AffineChannelOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER( + affine_channel, AffineChannelOpConverter<::anakin::saber::NV>); +#endif +REGISTER_CPU_ANAKIN_OP_CONVERTER( + affine_channel, AffineChannelOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/affine_channel.h b/paddle/fluid/inference/anakin/convert/affine_channel.h index ea0043670c61b2d359c4ab31fc7b96eab0ca2259..5da4a736e8d7e0382316ceac645200410728758c 100644 --- a/paddle/fluid/inference/anakin/convert/affine_channel.h +++ b/paddle/fluid/inference/anakin/convert/affine_channel.h @@ -21,7 +21,8 @@ namespace paddle { namespace inference { namespace anakin { -class AffineChannelOpConverter : public AnakinOpConverter { +template +class AffineChannelOpConverter : public AnakinOpConverter { public: AffineChannelOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/batch_norm.cc b/paddle/fluid/inference/anakin/convert/batch_norm.cc index 38cf6172027b3b200a378a61b6d5b395cc571de7..1c837e9c3dfd4e3a279fdfa807416855e641a58e 100644 --- a/paddle/fluid/inference/anakin/convert/batch_norm.cc +++ b/paddle/fluid/inference/anakin/convert/batch_norm.cc @@ -21,17 +21,16 @@ using anakin::graph::GraphGlobalMem; using anakin::AK_FLOAT; -using anakin::saber::NV; using anakin::saber::Shape; namespace paddle { namespace inference { namespace anakin { -void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void BatchNormOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1); std::map inputs; @@ -48,9 +47,9 @@ void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op, auto bn_op_name = op_name + ":bn"; auto bn_output = bn_op_name + "_output"; - engine_->AddOp(bn_op_name, "BatchNorm", {inputs["X"]}, {bn_output}); - engine_->AddOpAttr(bn_op_name, "epsilon", epsilon); - engine_->AddOpAttr(bn_op_name, "momentum", static_cast(1.0)); + this->engine_->AddOp(bn_op_name, "BatchNorm", {inputs["X"]}, {bn_output}); + this->engine_->AddOpAttr(bn_op_name, "epsilon", epsilon); + this->engine_->AddOpAttr(bn_op_name, "momentum", static_cast(1.0)); auto scale_op_name = op_name + ":scale"; auto get_lod_tensor = [this, &scope, &op_name](const std::string &var_name, @@ -81,48 +80,54 @@ void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op, Shape shape1(fill_shape(4, framework::vectorize2int(mean_t.dims()))); Shape shape2(fill_shape(4, framework::vectorize2int(variance_t.dims()))); auto *weight1 = - GraphGlobalMem::Global().template new_block(shape1); + GraphGlobalMem::Global().template new_block(shape1); auto *mean_data = static_cast(weight1->h_tensor().mutable_data()); std::copy_n(mean_t.data(), mean_t.numel(), mean_data); - engine_->AddOpAttr(bn_op_name, "weight_1", *weight1); + this->engine_->AddOpAttr(bn_op_name, "weight_1", *weight1); auto *weight2 = - GraphGlobalMem::Global().template new_block(shape2); + GraphGlobalMem::Global().template new_block(shape2); auto *variance_data = static_cast(weight2->h_tensor().mutable_data()); std::copy_n(variance_t.data(), variance_t.numel(), variance_data); - engine_->AddOpAttr(bn_op_name, "weight_2", *weight2); + this->engine_->AddOpAttr(bn_op_name, "weight_2", *weight2); Shape shape3(std::vector({1, 1, 1, 1})); auto *weight3 = - GraphGlobalMem::Global().template new_block(shape3); + GraphGlobalMem::Global().template new_block(shape3); auto *alpha_data = static_cast(weight3->h_tensor().mutable_data()); float weight3_data[] = {1}; std::copy(std::begin(weight3_data), std::end(weight3_data), alpha_data); - engine_->AddOpAttr(bn_op_name, "weight_3", *weight3); + this->engine_->AddOpAttr(bn_op_name, "weight_3", *weight3); Shape scale_shape(fill_shape(4, framework::vectorize2int(scale_t.dims()))); - auto *scale = - GraphGlobalMem::Global().template new_block(scale_shape); + auto *scale = GraphGlobalMem::Global().template new_block( + scale_shape); auto *scale_data = static_cast(scale->h_tensor().mutable_data()); std::copy_n(scale_t.data(), scale_t.numel(), scale_data); Shape bias_shape(fill_shape(4, framework::vectorize2int(bias_t.dims()))); - auto *bias = - GraphGlobalMem::Global().template new_block(bias_shape); + auto *bias = GraphGlobalMem::Global().template new_block( + bias_shape); auto *bias_data = static_cast(bias->h_tensor().mutable_data()); std::copy_n(bias_t.data(), bias_t.numel(), bias_data); - engine_->AddOp(scale_op_name, "Scale", {bn_output}, {output}); - engine_->AddOpAttr(scale_op_name, "axis", 1); - engine_->AddOpAttr(scale_op_name, "num_axes", 1); - engine_->AddOpAttr(scale_op_name, "bias_term", true); - engine_->AddOpAttr(scale_op_name, "weight_1", *scale); - engine_->AddOpAttr(scale_op_name, "weight_2", *bias); + this->engine_->AddOp(scale_op_name, "Scale", {bn_output}, {output}); + this->engine_->AddOpAttr(scale_op_name, "axis", 1); + this->engine_->AddOpAttr(scale_op_name, "num_axes", 1); + this->engine_->AddOpAttr(scale_op_name, "bias_term", true); + this->engine_->AddOpAttr(scale_op_name, "weight_1", *scale); + this->engine_->AddOpAttr(scale_op_name, "weight_2", *bias); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(batch_norm, BatchNormOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(batch_norm, + BatchNormOpConverter<::anakin::saber::NV>); +#endif + +REGISTER_CPU_ANAKIN_OP_CONVERTER(batch_norm, + BatchNormOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/batch_norm.h b/paddle/fluid/inference/anakin/convert/batch_norm.h index c56735f15b435b46cf9f623bd284b5731a36c327..dc94b6ff64d13be035985616b49dc39c411b04ff 100644 --- a/paddle/fluid/inference/anakin/convert/batch_norm.h +++ b/paddle/fluid/inference/anakin/convert/batch_norm.h @@ -20,7 +20,8 @@ namespace paddle { namespace inference { namespace anakin { -class BatchNormOpConverter : public AnakinOpConverter { +template +class BatchNormOpConverter : public AnakinOpConverter { public: BatchNormOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/concat.cc b/paddle/fluid/inference/anakin/convert/concat.cc index ae90c083690da6e108a05460de68be2eb0cd9b48..cfd9540acf60abe11c3acddcce132486d680f601 100644 --- a/paddle/fluid/inference/anakin/convert/concat.cc +++ b/paddle/fluid/inference/anakin/convert/concat.cc @@ -15,38 +15,32 @@ #include "paddle/fluid/inference/anakin/convert/concat.h" #include -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; -using anakin::Precision; -using anakin::saber::NV; -using anakin::saber::X86; -using anakin::saber::Shape; -using anakin::PBlock; -using anakin::PTuple; - namespace paddle { namespace inference { namespace anakin { -void ConcatOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void ConcatOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); int axis = boost::get(op_desc.GetAttr("axis")); auto input_names = op_desc.Input("X"); - // PADDLE_ENFORCE(axis > 0, - // "The axis attr of Concat op should be large than 0 for trt"); auto y_name = op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); - engine_->AddOp(op_name, "Concat", input_names, {y_name}); - engine_->AddOpAttr(op_name, "axis", axis); + this->engine_->AddOp(op_name, "Concat", input_names, {y_name}); + this->engine_->AddOpAttr(op_name, "axis", axis); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(concat, ConcatOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(concat, + ConcatOpConverter<::anakin::saber::NV>); +#endif +REGISTER_CPU_ANAKIN_OP_CONVERTER(concat, + ConcatOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/concat.h b/paddle/fluid/inference/anakin/convert/concat.h index 974ff689bfef681f8993d5dbb0dbbbdde91f33bd..a32f8a4612921f124fdf0d42816366a34d1943c9 100644 --- a/paddle/fluid/inference/anakin/convert/concat.h +++ b/paddle/fluid/inference/anakin/convert/concat.h @@ -20,7 +20,8 @@ namespace paddle { namespace inference { namespace anakin { -class ConcatOpConverter : public AnakinOpConverter { +template +class ConcatOpConverter : public AnakinOpConverter { public: ConcatOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/conv2d.cc b/paddle/fluid/inference/anakin/convert/conv2d.cc index 308f14604b9c83f2278499359328109d31f9ff17..f9ab9874751300b99dad5f141235f6f1a5acc089 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d.cc +++ b/paddle/fluid/inference/anakin/convert/conv2d.cc @@ -18,19 +18,18 @@ #include using anakin::graph::GraphGlobalMem; +using anakin::PTuple; using anakin::AK_FLOAT; -using anakin::saber::NV; using anakin::saber::Shape; -using anakin::PTuple; namespace paddle { namespace inference { namespace anakin { -void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void Conv2dOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL); PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL); @@ -39,7 +38,7 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op, auto input_name = op_desc.Input("Input").front(); auto output_name = op_desc.Output("Output").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front(); - engine_->AddOp(op_name, "Convolution", {input_name}, {output_name}); + this->engine_->AddOp(op_name, "Convolution", {input_name}, {output_name}); auto *filter_v = scope.FindVar(op_desc.Input("Filter").front()); PADDLE_ENFORCE_NOT_NULL(filter_v); @@ -51,38 +50,44 @@ void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op, PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL); - // const int n_output = weight_tensor->dims()[0]; - // const int n_input = weight_tensor->dims()[1]; const int filter_h = weight_tensor->dims()[2]; const int filter_w = weight_tensor->dims()[3]; - // auto filter_num = n_input * filter_h * filter_w ; + auto filter_num = weight_tensor->dims()[0]; - engine_->AddOpAttr(op_name, "filter_num", filter_num); - engine_->AddOpAttr>(op_name, "kernel_size", {filter_h, filter_w}); + this->engine_->template AddOpAttr(op_name, "filter_num", filter_num); + this->engine_->template AddOpAttr>(op_name, "kernel_size", + {filter_h, filter_w}); auto strides = boost::get>(op_desc.GetAttr("strides")); - engine_->AddOpAttr>(op_name, "strides", strides); + this->engine_->template AddOpAttr>(op_name, "strides", strides); auto paddings = boost::get>(op_desc.GetAttr("paddings")); - engine_->AddOpAttr>(op_name, "padding", paddings); + this->engine_->template AddOpAttr>(op_name, "padding", paddings); auto dilations = boost::get>(op_desc.GetAttr("dilations")); - engine_->AddOpAttr>(op_name, "dilation_rate", dilations); + this->engine_->template AddOpAttr>(op_name, "dilation_rate", + dilations); const int groups = boost::get(op_desc.GetAttr("groups")); - engine_->AddOpAttr(op_name, "group", groups); - engine_->AddOpAttr(op_name, "axis", 1); - engine_->AddOpAttr(op_name, "bias_term", false); + this->engine_->AddOpAttr(op_name, "group", groups); + this->engine_->AddOpAttr(op_name, "axis", 1); + this->engine_->AddOpAttr(op_name, "bias_term", false); auto weight_shape = framework::vectorize2int(filter_t->dims()); Shape anakin_shape(weight_shape); auto *weight1 = - GraphGlobalMem::Global().template new_block(anakin_shape); + GraphGlobalMem::Global().template new_block( + anakin_shape); float *cpu_data = static_cast(weight1->h_tensor().mutable_data()); std::copy_n(weight_tensor->data(), weight_tensor->numel(), cpu_data); weight1->d_tensor().set_shape(anakin_shape); weight1->d_tensor().copy_from(weight1->h_tensor()); - engine_->AddOpAttr(op_name, "weight_1", *weight1); + this->engine_->AddOpAttr(op_name, "weight_1", *weight1); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(conv2d, Conv2dOpConverter); +REGISTER_CPU_ANAKIN_OP_CONVERTER(conv2d, + Conv2dOpConverter<::anakin::saber::X86>); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(conv2d, + Conv2dOpConverter<::anakin::saber::NV>); +#endif diff --git a/paddle/fluid/inference/anakin/convert/conv2d.h b/paddle/fluid/inference/anakin/convert/conv2d.h index dca5d19f468ac6d6e2f4bcda8ecaa3922d80e6b1..6ecb32840519e0a4cd2e064c315457c22fddc393 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d.h +++ b/paddle/fluid/inference/anakin/convert/conv2d.h @@ -20,7 +20,8 @@ namespace paddle { namespace inference { namespace anakin { -class Conv2dOpConverter : public AnakinOpConverter { +template +class Conv2dOpConverter : public AnakinOpConverter { public: Conv2dOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc index fa1ab0efeeb5cacd112ca1b644735eaaf49e55f8..ff60771f87b33ebd72a1db1ab0ca32adb30ae4a3 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc +++ b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc @@ -18,19 +18,18 @@ #include using anakin::graph::GraphGlobalMem; +using anakin::PTuple; using anakin::AK_FLOAT; -using anakin::saber::NV; using anakin::saber::Shape; -using anakin::PTuple; namespace paddle { namespace inference { namespace anakin { -void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void Conv2dFusionOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL); PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL); @@ -40,7 +39,7 @@ void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op, auto input_name = op_desc.Input("Input").front(); auto output_name = op_desc.Output("Output").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front(); - engine_->AddOp(op_name, "Convolution", {input_name}, {output_name}); + this->engine_->AddOp(op_name, "Convolution", {input_name}, {output_name}); auto *filter_v = scope.FindVar(op_desc.Input("Filter").front()); PADDLE_ENFORCE_NOT_NULL(filter_v); @@ -63,28 +62,31 @@ void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op, const int filter_w = weight_tensor->dims()[3]; // auto filter_num = n_input * filter_h * filter_w ; auto filter_num = weight_tensor->dims()[0]; - engine_->AddOpAttr(op_name, "filter_num", filter_num); - engine_->AddOpAttr>(op_name, "kernel_size", {filter_h, filter_w}); + this->engine_->template AddOpAttr(op_name, "filter_num", filter_num); + this->engine_->template AddOpAttr>(op_name, "kernel_size", + {filter_h, filter_w}); auto strides = boost::get>(op_desc.GetAttr("strides")); - engine_->AddOpAttr>(op_name, "strides", strides); + this->engine_->template AddOpAttr>(op_name, "strides", strides); auto paddings = boost::get>(op_desc.GetAttr("paddings")); - engine_->AddOpAttr>(op_name, "padding", paddings); + this->engine_->template AddOpAttr>(op_name, "padding", paddings); auto dilations = boost::get>(op_desc.GetAttr("dilations")); - engine_->AddOpAttr>(op_name, "dilation_rate", dilations); + this->engine_->template AddOpAttr>(op_name, "dilation_rate", + dilations); const int groups = boost::get(op_desc.GetAttr("groups")); - engine_->AddOpAttr(op_name, "group", groups); - engine_->AddOpAttr(op_name, "axis", 1); - engine_->AddOpAttr(op_name, "bias_term", true); + this->engine_->AddOpAttr(op_name, "group", groups); + this->engine_->AddOpAttr(op_name, "axis", 1); + this->engine_->AddOpAttr(op_name, "bias_term", true); auto weight_shape = framework::vectorize2int(filter_t->dims()); Shape anakin_shape(weight_shape); auto *weight1 = - GraphGlobalMem::Global().template new_block(anakin_shape); + GraphGlobalMem::Global().template new_block( + anakin_shape); float *cpu_data = static_cast(weight1->h_tensor().mutable_data()); std::copy_n(weight_tensor->data(), weight_tensor->numel(), cpu_data); weight1->d_tensor().set_shape(anakin_shape); weight1->d_tensor().copy_from(weight1->h_tensor()); - engine_->AddOpAttr(op_name, "weight_1", *weight1); + this->engine_->AddOpAttr(op_name, "weight_1", *weight1); auto bias_shape = framework::vectorize2int(b_t->dims()); framework::LoDTensor bias_tensor; @@ -98,17 +100,24 @@ void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op, // bias_shape.push_back(1); Shape anakin_bias_shape(bias_shape); - auto *weight2 = GraphGlobalMem::Global().template new_block( - anakin_bias_shape); + auto *weight2 = + GraphGlobalMem::Global().template new_block( + anakin_bias_shape); float *cpu_data2 = static_cast(weight2->h_tensor().mutable_data()); std::copy_n(bias_data, bias_tensor.numel(), cpu_data2); weight2->d_tensor().set_shape(anakin_bias_shape); weight2->d_tensor().copy_from(weight2->h_tensor()); - engine_->AddOpAttr(op_name, "weight_2", *weight2); + this->engine_->AddOpAttr(op_name, "weight_2", *weight2); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(conv2d_fusion, Conv2dFusionOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(conv2d_fusion, + Conv2dFusionOpConverter<::anakin::saber::NV>); +#endif + +REGISTER_CPU_ANAKIN_OP_CONVERTER(conv2d_fusion, + Conv2dFusionOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/conv2d_fusion.h b/paddle/fluid/inference/anakin/convert/conv2d_fusion.h index 0d9ef28183b309c4b50714fcbe64e24c5d9dfbaa..abcf61a75e0fda5b12e48ff4315deaf195f76484 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d_fusion.h +++ b/paddle/fluid/inference/anakin/convert/conv2d_fusion.h @@ -20,7 +20,8 @@ namespace paddle { namespace inference { namespace anakin { -class Conv2dFusionOpConverter : public AnakinOpConverter { +template +class Conv2dFusionOpConverter : public AnakinOpConverter { public: Conv2dFusionOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/density_prior_box.cc b/paddle/fluid/inference/anakin/convert/density_prior_box.cc index 30796f7592427191a4396a154be62838b7e666ad..f552e41c85fb111139d13c66aedc1d67ff10c313 100644 --- a/paddle/fluid/inference/anakin/convert/density_prior_box.cc +++ b/paddle/fluid/inference/anakin/convert/density_prior_box.cc @@ -17,17 +17,14 @@ #include #include -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; -using anakin::saber::NV; -using anakin::saber::Shape; using anakin::PTuple; namespace paddle { namespace inference { namespace anakin { -void DensityPriorBoxOpConverter::operator()( +template +void DensityPriorBoxOpConverter::operator()( const framework::proto::OpDesc& op, const framework::BlockDesc& block_desc, const framework::Scope& scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); @@ -81,27 +78,44 @@ void DensityPriorBoxOpConverter::operator()( std::vector temp_v = {}; - engine_->AddOp(op_name, "PriorBox", {input_name, image_name}, {output_name}); - engine_->AddOpAttr>(op_name, "min_size", min_sizes); - engine_->AddOpAttr>(op_name, "max_size", max_sizes); - engine_->AddOpAttr>(op_name, "aspect_ratio", aspect_ratios); - engine_->AddOpAttr>(op_name, "fixed_size", fixed_sizes); - engine_->AddOpAttr>(op_name, "fixed_ratio", fixed_ratios); - engine_->AddOpAttr>(op_name, "density", dens); - engine_->AddOpAttr(op_name, "is_flip", is_flip); - engine_->AddOpAttr(op_name, "is_clip", is_clip); - engine_->AddOpAttr>(op_name, "variance", variances); - engine_->AddOpAttr(op_name, "img_h", static_cast(0)); - engine_->AddOpAttr(op_name, "img_w", static_cast(0)); - engine_->AddOpAttr(op_name, "step_h", step_h); - engine_->AddOpAttr(op_name, "step_w", step_w); - engine_->AddOpAttr(op_name, "offset", offset); - engine_->AddOpAttr>(op_name, "order", t_order); + this->engine_->AddOp(op_name, "PriorBox", {input_name, image_name}, + {output_name}); + this->engine_->template AddOpAttr>(op_name, "min_size", + min_sizes); + this->engine_->template AddOpAttr>(op_name, "max_size", + max_sizes); + this->engine_->template AddOpAttr>(op_name, "aspect_ratio", + aspect_ratios); + this->engine_->template AddOpAttr>(op_name, "fixed_size", + fixed_sizes); + this->engine_->template AddOpAttr>(op_name, "fixed_ratio", + fixed_ratios); + this->engine_->template AddOpAttr>(op_name, "density", dens); + this->engine_->AddOpAttr(op_name, "is_flip", is_flip); + this->engine_->AddOpAttr(op_name, "is_clip", is_clip); + this->engine_->template AddOpAttr>(op_name, "variance", + variances); + this->engine_->AddOpAttr(op_name, "img_h", static_cast(0)); + this->engine_->AddOpAttr(op_name, "img_w", static_cast(0)); + this->engine_->AddOpAttr(op_name, "step_h", step_h); + this->engine_->AddOpAttr(op_name, "step_w", step_w); + this->engine_->AddOpAttr(op_name, "offset", offset); + this->engine_->template AddOpAttr>(op_name, "order", + t_order); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(density_prior_box, DensityPriorBoxOpConverter); -REGISTER_ANAKIN_OP_CONVERTER(prior_box, DensityPriorBoxOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER( + density_prior_box, DensityPriorBoxOpConverter<::anakin::saber::NV>); +REGISTER_CUDA_ANAKIN_OP_CONVERTER( + prior_box, DensityPriorBoxOpConverter<::anakin::saber::NV>); +#endif + +REGISTER_CPU_ANAKIN_OP_CONVERTER( + density_prior_box, DensityPriorBoxOpConverter<::anakin::saber::X86>); +REGISTER_CPU_ANAKIN_OP_CONVERTER( + prior_box, DensityPriorBoxOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/density_prior_box.h b/paddle/fluid/inference/anakin/convert/density_prior_box.h index bf9210711a0f69595c241803cd40d42770ccd5d7..29f4f6f7f9db501af5fc46d61435e4a586c11c6a 100644 --- a/paddle/fluid/inference/anakin/convert/density_prior_box.h +++ b/paddle/fluid/inference/anakin/convert/density_prior_box.h @@ -22,7 +22,8 @@ namespace paddle { namespace inference { namespace anakin { -class DensityPriorBoxOpConverter : public AnakinOpConverter { +template +class DensityPriorBoxOpConverter : public AnakinOpConverter { public: DensityPriorBoxOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/detection_out.cc b/paddle/fluid/inference/anakin/convert/detection_out.cc index 262ad28a654609cddde979d387621bb0c7c1a7f9..4a28c604f5853acee2c67b1e5d11a94c0d295102 100644 --- a/paddle/fluid/inference/anakin/convert/detection_out.cc +++ b/paddle/fluid/inference/anakin/convert/detection_out.cc @@ -16,19 +16,14 @@ #include #include -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; -using anakin::saber::NV; -using anakin::saber::Shape; - namespace paddle { namespace inference { namespace anakin { -void DetectionOutOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void DetectionOutOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); auto target_name = op_desc.Input("TargetBox").front(); auto prior_box_name = op_desc.Input("PriorBox").front(); @@ -52,22 +47,28 @@ void DetectionOutOpConverter::operator()(const framework::proto::OpDesc &op, "Not support encode_center_size code_type in DetectionOut of anakin"); } - engine_->AddOp(op_name, "DetectionOutput", - {target_name, scores_name, prior_box_name}, {output_name}); - engine_->AddOpAttr(op_name, "share_location", true); - engine_->AddOpAttr(op_name, "variance_encode_in_target", false); - engine_->AddOpAttr(op_name, "class_num", static_cast(0)); - engine_->AddOpAttr(op_name, "background_id", background_label); - engine_->AddOpAttr(op_name, "keep_top_k", keep_top_k); - engine_->AddOpAttr(op_name, "code_type", anakin_code_type); - engine_->AddOpAttr(op_name, "conf_thresh", score_threshold); - engine_->AddOpAttr(op_name, "nms_top_k", nms_top_k); - engine_->AddOpAttr(op_name, "nms_thresh", nms_threshold); - engine_->AddOpAttr(op_name, "nms_eta", nms_eta); + this->engine_->AddOp(op_name, "DetectionOutput", + {target_name, scores_name, prior_box_name}, + {output_name}); + this->engine_->AddOpAttr(op_name, "share_location", true); + this->engine_->AddOpAttr(op_name, "variance_encode_in_target", false); + this->engine_->AddOpAttr(op_name, "class_num", static_cast(0)); + this->engine_->AddOpAttr(op_name, "background_id", background_label); + this->engine_->AddOpAttr(op_name, "keep_top_k", keep_top_k); + this->engine_->AddOpAttr(op_name, "code_type", anakin_code_type); + this->engine_->AddOpAttr(op_name, "conf_thresh", score_threshold); + this->engine_->AddOpAttr(op_name, "nms_top_k", nms_top_k); + this->engine_->AddOpAttr(op_name, "nms_thresh", nms_threshold); + this->engine_->AddOpAttr(op_name, "nms_eta", nms_eta); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(detection_out, DetectionOutOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(detection_out, + DetectionOutOpConverter<::anakin::saber::NV>); +#endif +REGISTER_CPU_ANAKIN_OP_CONVERTER(detection_out, + DetectionOutOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/detection_out.h b/paddle/fluid/inference/anakin/convert/detection_out.h index ca78f10fdc2a7c7064ae0399e7f1afff1383ce67..396d5c9554fda7db19e2097c5ea65bef8f79ebff 100644 --- a/paddle/fluid/inference/anakin/convert/detection_out.h +++ b/paddle/fluid/inference/anakin/convert/detection_out.h @@ -22,7 +22,8 @@ namespace paddle { namespace inference { namespace anakin { -class DetectionOutOpConverter : public AnakinOpConverter { +template +class DetectionOutOpConverter : public AnakinOpConverter { public: DetectionOutOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/dropout.cc b/paddle/fluid/inference/anakin/convert/dropout.cc index bc9b26dcf2733369e558cde2954e9d0caaba86b0..989eafcd91ef469f83f71f1381e495c4df2063df 100644 --- a/paddle/fluid/inference/anakin/convert/dropout.cc +++ b/paddle/fluid/inference/anakin/convert/dropout.cc @@ -19,21 +19,16 @@ using anakin::graph::GraphGlobalMem; using anakin::AK_FLOAT; -using anakin::Precision; -using anakin::saber::NV; -using anakin::saber::X86; using anakin::saber::Shape; -using anakin::PBlock; -using anakin::PTuple; namespace paddle { namespace inference { namespace anakin { -void DropoutOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void DropoutOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Output("Mask").size(), 1); @@ -43,25 +38,30 @@ void DropoutOpConverter::operator()(const framework::proto::OpDesc &op, auto out_name = op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); - engine_->AddOp(op_name, "Scale", {x_name}, {out_name}); + this->engine_->AddOp(op_name, "Scale", {x_name}, {out_name}); auto dropout_prob = boost::get(op_desc.GetAttr("dropout_prob")); auto factor = 1 - dropout_prob; Shape shape1(std::vector({1, 1, 1, 1})); auto *weight1 = - GraphGlobalMem::Global().template new_block(shape1); + GraphGlobalMem::Global().template new_block(shape1); auto *factor_data = static_cast(weight1->h_tensor().mutable_data()); float weight1_data[] = {factor}; std::copy(std::begin(weight1_data), std::end(weight1_data), factor_data); - engine_->AddOpAttr(op_name, "weight_1", *weight1); - engine_->AddOpAttr(op_name, "axis", 0); - engine_->AddOpAttr(op_name, "num_axes", 0); - engine_->AddOpAttr(op_name, "bias_term", false); + this->engine_->AddOpAttr(op_name, "weight_1", *weight1); + this->engine_->AddOpAttr(op_name, "axis", 0); + this->engine_->AddOpAttr(op_name, "num_axes", 0); + this->engine_->AddOpAttr(op_name, "bias_term", false); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(dropout, DropoutOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(dropout, + DropoutOpConverter<::anakin::saber::NV>); +#endif +REGISTER_CPU_ANAKIN_OP_CONVERTER(dropout, + DropoutOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/dropout.h b/paddle/fluid/inference/anakin/convert/dropout.h index 11412e217ef5fa77bd22d7530d88be1347f2616f..c43c851fc0ee603786d85f492c2612daa5704d30 100644 --- a/paddle/fluid/inference/anakin/convert/dropout.h +++ b/paddle/fluid/inference/anakin/convert/dropout.h @@ -20,7 +20,8 @@ namespace paddle { namespace inference { namespace anakin { -class DropoutOpConverter : public AnakinOpConverter { +template +class DropoutOpConverter : public AnakinOpConverter { public: DropoutOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/elementwise.cc b/paddle/fluid/inference/anakin/convert/elementwise.cc index fe9a896d8266e06250b712be0c75290c039e9a08..81e1d10d82bd6605ed8c9fc017c347b2eeb4aca7 100644 --- a/paddle/fluid/inference/anakin/convert/elementwise.cc +++ b/paddle/fluid/inference/anakin/convert/elementwise.cc @@ -19,18 +19,15 @@ using anakin::graph::GraphGlobalMem; using anakin::AK_FLOAT; -using anakin::Precision; -using anakin::saber::NV; -using anakin::saber::X86; using anakin::saber::Shape; -using anakin::PBlock; using anakin::PTuple; namespace paddle { namespace inference { namespace anakin { -void ElementwiseAddOpConverter::operator()( +template +void ElementwiseAddOpConverter::operator()( const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); @@ -43,14 +40,16 @@ void ElementwiseAddOpConverter::operator()( auto out_name = op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); - engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name}); + this->engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name}); std::string elementwise_type = "Add"; - engine_->AddOpAttr(op_name, "type", elementwise_type); + this->engine_->template AddOpAttr(op_name, "type", + elementwise_type); std::vector coeff = {1.0, 1.0}; - engine_->AddOpAttr>(op_name, "coeff", coeff); + this->engine_->template AddOpAttr>(op_name, "coeff", coeff); } -void ElementwiseMulOpConverter::operator()( +template +void ElementwiseMulOpConverter::operator()( const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); @@ -63,26 +62,25 @@ void ElementwiseMulOpConverter::operator()( auto out_name = op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); - engine_->AddOp(op_name, "Scale", {x_name, y_name}, {out_name}); - // Fill a number to weight_1 as a placeholder. - Shape shape1(std::vector({1, 1, 1, 1})); - auto *weight1 = - GraphGlobalMem::Global().template new_block(shape1); - auto *placeholder_data = - static_cast(weight1->h_tensor().mutable_data()); - float weight1_data[] = {1}; - std::copy(std::begin(weight1_data), std::end(weight1_data), placeholder_data); - engine_->AddOpAttr(op_name, "weight_1", *weight1); - - auto axis = boost::get(op_desc.GetAttr("axis")); - engine_->AddOpAttr(op_name, "axis", axis); - engine_->AddOpAttr(op_name, "num_axes", 1); - engine_->AddOpAttr(op_name, "bias_term", false); + this->engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name}); + std::string elementwise_type = "Prod"; + this->engine_->template AddOpAttr(op_name, "type", + elementwise_type); + std::vector coeff = {1.0, 1.0}; + this->engine_->template AddOpAttr>(op_name, "coeff", coeff); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(elementwise_add, ElementwiseAddOpConverter); -REGISTER_ANAKIN_OP_CONVERTER(elementwise_mul, ElementwiseMulOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER( + elementwise_add, ElementwiseAddOpConverter<::anakin::saber::NV>); +REGISTER_CUDA_ANAKIN_OP_CONVERTER( + elementwise_mul, ElementwiseMulOpConverter<::anakin::saber::NV>); +#endif +REGISTER_CPU_ANAKIN_OP_CONVERTER( + elementwise_add, ElementwiseAddOpConverter<::anakin::saber::X86>); +REGISTER_CPU_ANAKIN_OP_CONVERTER( + elementwise_mul, ElementwiseMulOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/elementwise.h b/paddle/fluid/inference/anakin/convert/elementwise.h index e4664493a9d3ce1ed9a0c79a05fb466c4e781b3e..f64a8c5f7f3234f66a8a4a95ca56fdd87b889507 100644 --- a/paddle/fluid/inference/anakin/convert/elementwise.h +++ b/paddle/fluid/inference/anakin/convert/elementwise.h @@ -20,7 +20,8 @@ namespace paddle { namespace inference { namespace anakin { -class ElementwiseAddOpConverter : public AnakinOpConverter { +template +class ElementwiseAddOpConverter : public AnakinOpConverter { public: ElementwiseAddOpConverter() = default; @@ -33,7 +34,8 @@ class ElementwiseAddOpConverter : public AnakinOpConverter { private: }; -class ElementwiseMulOpConverter : public AnakinOpConverter { +template +class ElementwiseMulOpConverter : public AnakinOpConverter { public: ElementwiseMulOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/fc.cc b/paddle/fluid/inference/anakin/convert/fc.cc index a80a1a47e91aa085935b5febb3858e028f396091..a04035eabace017df3eef2e94770b6354a364793 100644 --- a/paddle/fluid/inference/anakin/convert/fc.cc +++ b/paddle/fluid/inference/anakin/convert/fc.cc @@ -19,17 +19,16 @@ using anakin::graph::GraphGlobalMem; using anakin::AK_FLOAT; -using anakin::saber::NV; using anakin::saber::Shape; namespace paddle { namespace inference { namespace anakin { -void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void FcBaseOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); auto input_names = op_desc.InputNames(); bool with_bias = input_names.size() == 3; @@ -51,13 +50,13 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op, auto input_name = op_desc.Input(i_name).front(); auto output_name = op_desc.Output("Out").front(); - engine_->AddOp(op_name, "Dense", {input_name}, {output_name}); - engine_->AddOpAttr(op_name, "bias_term", with_bias); - engine_->AddOpAttr(op_name, "axis", 1); + this->engine_->AddOp(op_name, "Dense", {input_name}, {output_name}); + this->engine_->AddOpAttr(op_name, "bias_term", with_bias); + this->engine_->AddOpAttr(op_name, "axis", 1); auto weight_shape = framework::vectorize2int(y_t->dims()); int out_dim = weight_shape[1]; - engine_->AddOpAttr(op_name, "out_dim", out_dim); + this->engine_->AddOpAttr(op_name, "out_dim", out_dim); const int w_m = weight_shape[0]; const int w_k = weight_shape[1]; @@ -79,12 +78,13 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op, } } auto *weight1 = - GraphGlobalMem::Global().template new_block(anakin_shape); + GraphGlobalMem::Global().template new_block( + anakin_shape); float *cpu_data = static_cast(weight1->h_tensor().mutable_data()); std::copy_n(trans_weight_data.data(), weight_tensor.numel(), cpu_data); weight1->d_tensor().set_shape(anakin_shape); weight1->d_tensor().copy_from(weight1->h_tensor()); - engine_->AddOpAttr(op_name, "weight_1", *weight1); + this->engine_->AddOpAttr(op_name, "weight_1", *weight1); // get bias if (with_bias) { @@ -104,13 +104,14 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op, // bias_shape.push_back(1); Shape anakin_bias_shape(bias_shape); - auto *weight2 = GraphGlobalMem::Global().template new_block( - anakin_bias_shape); + auto *weight2 = + GraphGlobalMem::Global().template new_block( + anakin_bias_shape); float *cpu_data2 = static_cast(weight2->h_tensor().mutable_data()); std::copy_n(bias_data, bias_tensor.numel(), cpu_data2); weight2->d_tensor().set_shape(anakin_bias_shape); weight2->d_tensor().copy_from(weight2->h_tensor()); - engine_->AddOpAttr(op_name, "weight_2", *weight2); + this->engine_->AddOpAttr(op_name, "weight_2", *weight2); } } @@ -118,5 +119,10 @@ void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op, } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(mul, MulOpConverter); -REGISTER_ANAKIN_OP_CONVERTER(fc, FcOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(mul, MulOpConverter<::anakin::saber::NV>); +REGISTER_CUDA_ANAKIN_OP_CONVERTER(fc, FcOpConverter<::anakin::saber::NV>); +#endif + +REGISTER_CPU_ANAKIN_OP_CONVERTER(mul, MulOpConverter<::anakin::saber::X86>); +REGISTER_CPU_ANAKIN_OP_CONVERTER(fc, FcOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/fc.h b/paddle/fluid/inference/anakin/convert/fc.h index fb461908b35e0111065e1a46c52306c64ace7d7c..10808c315757b726b810faa9af9e4a793cb4f550 100644 --- a/paddle/fluid/inference/anakin/convert/fc.h +++ b/paddle/fluid/inference/anakin/convert/fc.h @@ -20,7 +20,8 @@ namespace paddle { namespace inference { namespace anakin { -class FcBaseOpConverter : public AnakinOpConverter { +template +class FcBaseOpConverter : public AnakinOpConverter { public: FcBaseOpConverter() = default; @@ -32,13 +33,15 @@ class FcBaseOpConverter : public AnakinOpConverter { }; // with bias -class FcOpConverter : public FcBaseOpConverter { +template +class FcOpConverter : public FcBaseOpConverter { public: FcOpConverter() = default; }; // without bias -class MulOpConverter : public FcBaseOpConverter { +template +class MulOpConverter : public FcBaseOpConverter { public: MulOpConverter() = default; }; diff --git a/paddle/fluid/inference/anakin/convert/flatten.cc b/paddle/fluid/inference/anakin/convert/flatten.cc index 7f5c1510960d1014c33bd565939812fe7c7dfc06..a38dec25d831c750954936d1fcec9a674ae2d604 100644 --- a/paddle/fluid/inference/anakin/convert/flatten.cc +++ b/paddle/fluid/inference/anakin/convert/flatten.cc @@ -15,20 +15,16 @@ #include "paddle/fluid/inference/anakin/convert/flatten.h" #include -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; -using anakin::saber::NV; -using anakin::saber::Shape; using anakin::PTuple; namespace paddle { namespace inference { namespace anakin { -void FlattenOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void FlattenOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1UL); PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1UL); @@ -41,12 +37,17 @@ void FlattenOpConverter::operator()(const framework::proto::OpDesc &op, std::vector out_dims = {0, -1, 1, 1}; auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); - engine_->AddOp(op_name, "Reshape", {input}, {output}); - engine_->AddOpAttr>(op_name, "dims", out_dims); + this->engine_->AddOp(op_name, "Reshape", {input}, {output}); + this->engine_->template AddOpAttr>(op_name, "dims", out_dims); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(flatten, FlattenOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(flatten, + FlattenOpConverter<::anakin::saber::NV>); +#endif +REGISTER_CPU_ANAKIN_OP_CONVERTER(flatten, + FlattenOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/flatten.h b/paddle/fluid/inference/anakin/convert/flatten.h index c9cc0006eb2448917bbcc0952f5e2cae72b73de1..cd29b6e7d7384d71009633f71532279d141789be 100644 --- a/paddle/fluid/inference/anakin/convert/flatten.h +++ b/paddle/fluid/inference/anakin/convert/flatten.h @@ -20,7 +20,8 @@ namespace paddle { namespace inference { namespace anakin { -class FlattenOpConverter : public AnakinOpConverter { +template +class FlattenOpConverter : public AnakinOpConverter { public: FlattenOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/im2sequence.cc b/paddle/fluid/inference/anakin/convert/im2sequence.cc index 2cc330c3829f6033229748523c3df750b951626f..bd7e9b4b63c501e48231a21400c88bce1b76da4b 100644 --- a/paddle/fluid/inference/anakin/convert/im2sequence.cc +++ b/paddle/fluid/inference/anakin/convert/im2sequence.cc @@ -17,23 +17,16 @@ #include #include -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; -using anakin::Precision; -using anakin::saber::NV; -using anakin::saber::X86; -using anakin::saber::Shape; -using anakin::PBlock; using anakin::PTuple; namespace paddle { namespace inference { namespace anakin { -void Im2SequenceConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void Im2SequenceConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 0); @@ -43,21 +36,24 @@ void Im2SequenceConverter::operator()(const framework::proto::OpDesc &op, auto out_name = op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); - engine_->AddOp(op_name, "Im2Sequence", {x_name}, {out_name}); + this->engine_->AddOp(op_name, "Im2Sequence", {x_name}, {out_name}); std::vector dilations = {1, 1}; auto paddings = boost::get>(op_desc.GetAttr("paddings")); auto strides = boost::get>(op_desc.GetAttr("strides")); auto kernels = boost::get>(op_desc.GetAttr("kernels")); - engine_->AddOpAttr>(op_name, "paddings", paddings); - engine_->AddOpAttr>(op_name, "strides", strides); - engine_->AddOpAttr>(op_name, "window_size", kernels); - engine_->AddOpAttr>(op_name, "dilations", dilations); + this->engine_->template AddOpAttr>(op_name, "paddings", paddings); + this->engine_->template AddOpAttr>(op_name, "strides", strides); + this->engine_->template AddOpAttr>(op_name, "window_size", + kernels); + this->engine_->template AddOpAttr>(op_name, "dilations", + dilations); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(im2sequence, Im2SequenceConverter); +REGISTER_CUDA_ANAKIN_OP_CONVERTER(im2sequence, + Im2SequenceConverter<::anakin::saber::NV>); diff --git a/paddle/fluid/inference/anakin/convert/im2sequence.h b/paddle/fluid/inference/anakin/convert/im2sequence.h index 714679c1d9601136f1f54287bb58d611e852f3fe..97d1564b02817d4295397186ac8da8d95784d421 100644 --- a/paddle/fluid/inference/anakin/convert/im2sequence.h +++ b/paddle/fluid/inference/anakin/convert/im2sequence.h @@ -20,7 +20,8 @@ namespace paddle { namespace inference { namespace anakin { -class Im2SequenceConverter : public AnakinOpConverter { +template +class Im2SequenceConverter : public AnakinOpConverter { public: Im2SequenceConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/op_converter.h b/paddle/fluid/inference/anakin/convert/op_converter.h index bffab229ede775fdfe9b2f83eafe822bcb4fc597..71631a7745c9d261f6fb81164bc831aaf1d27a21 100644 --- a/paddle/fluid/inference/anakin/convert/op_converter.h +++ b/paddle/fluid/inference/anakin/convert/op_converter.h @@ -32,10 +32,10 @@ namespace paddle { namespace inference { namespace anakin { -using AnakinNvEngine = - AnakinEngine<::anakin::saber::NV, ::anakin::Precision::FP32>; - +template class AnakinOpConverter { + using AnakinEngineT = AnakinEngine; + public: AnakinOpConverter() = default; @@ -45,7 +45,7 @@ class AnakinOpConverter { void ConvertOp(const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, const std::unordered_set ¶meters, - const framework::Scope &scope, AnakinNvEngine *engine, + const framework::Scope &scope, AnakinEngineT *engine, bool test_mode = false) { framework::OpDesc op_desc(op, nullptr); std::string op_type = op_desc.Type(); @@ -65,7 +65,7 @@ class AnakinOpConverter { void ConvertBlock(framework::BlockDesc *block_desc, const std::unordered_set ¶meters, - const framework::Scope &scope, AnakinNvEngine *engine) { + const framework::Scope &scope, AnakinEngineT *engine) { std::unique_lock lock(mutex_); framework::proto::BlockDesc *block = block_desc->Proto(); for (auto i = 0; i < block->ops_size(); i++) { @@ -79,7 +79,7 @@ class AnakinOpConverter { framework::BlockDesc *block_desc, framework::Scope *scope, const std::vector &inputs, const std::unordered_set ¶meters, - const std::vector &outputs, AnakinNvEngine *engine) { + const std::vector &outputs, AnakinEngineT *engine) { ConvertBlock(block_desc, parameters, *scope, engine); // if the max_batch size int max_batch_size = engine->GetMaxBatchSize(); @@ -128,40 +128,60 @@ class AnakinOpConverter { engine->InitNet(); } - void SetEngine(AnakinNvEngine *engine) { engine_ = engine; } + void SetEngine(AnakinEngineT *engine) { engine_ = engine; } virtual ~AnakinOpConverter() {} protected: bool test_mode_; - AnakinNvEngine *engine_{nullptr}; + AnakinEngineT *engine_{nullptr}; private: - std::unordered_map converters_; + std::unordered_map *> converters_; framework::Scope *scope_{nullptr}; std::mutex mutex_; }; +template class AnakinOpConverter<::anakin::saber::NV>; +template class AnakinOpConverter<::anakin::saber::X86>; } // namespace anakin } // namespace inference } // namespace paddle -#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \ - struct anakin_##op_type__##_converter \ - : public ::paddle::framework::Registrar { \ - anakin_##op_type__##_converter() { \ - LOG(INFO) << "register convert " << #op_type__; \ - ::paddle::inference::Registry< \ - ::paddle::inference::anakin::AnakinOpConverter>::Global() \ - .Register<::paddle::inference::anakin::Converter__>(#op_type__); \ - } \ - }; \ - anakin_##op_type__##_converter anakin_##op_type__##_converter__; \ - int TouchConverterRegister_anakin_##op_type__() { \ - anakin_##op_type__##_converter__.Touch(); \ - return 0; \ +#define REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, \ + place_type__, place_class__) \ + struct anakin_##op_type__##_##place_type__##_converter \ + : public ::paddle::framework::Registrar { \ + anakin_##op_type__##_##place_type__##_converter() { \ + LOG(INFO) << "register convert " << #op_type__ << " "; \ + ::paddle::inference::Registry< \ + ::paddle::inference::anakin::AnakinOpConverter>:: \ + Global() \ + .Register<::paddle::inference::anakin::Converter__>(#op_type__); \ + } \ + }; \ + anakin_##op_type__##_##place_type__##_converter \ + anakin_##op_type__##_##place_type__##_converter__; \ + int TouchConverterRegister_anakin_##op_type__##_##place_type__() { \ + anakin_##op_type__##_##place_type__##_converter__.Touch(); \ + return 0; \ } -#define USE_ANAKIN_CONVERTER(op_type__) \ - extern int TouchConverterRegister_anakin_##op_type__(); \ - int use_op_converter_anakin_##op_type__ __attribute__((unused)) = \ - TouchConverterRegister_anakin_##op_type__(); +#define REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__) \ + REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CUDA, \ + ::anakin::saber::NV) + +#define REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__) \ + REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, CPU, \ + ::anakin::saber::X86) + +#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__) \ + extern int TouchConverterRegister_anakin_##op_type__##_##place_type__(); \ + int use_op_converter_anakin_##op_type__##_##place_type__ \ + __attribute__((unused)) = \ + TouchConverterRegister_anakin_##op_type__##_##place_type__(); + +#define USE_ANAKIN_CONVERTER(op_type__) \ + USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA) + +#define USE_CPU_ANAKIN_CONVERTER(op_type__) \ + USE_ANAKIN_CONVERTER_BASE(op_type__, CPU) diff --git a/paddle/fluid/inference/anakin/convert/pool2d.cc b/paddle/fluid/inference/anakin/convert/pool2d.cc index 87eefe712a5ad2acd8c9b5abe521c832ad2c1ef2..d0206a5bf9b4eb1463da0d1040e37b4fad7aec81 100644 --- a/paddle/fluid/inference/anakin/convert/pool2d.cc +++ b/paddle/fluid/inference/anakin/convert/pool2d.cc @@ -17,23 +17,16 @@ #include #include -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; -using anakin::Precision; -using anakin::saber::NV; -using anakin::saber::X86; -using anakin::saber::Shape; -using anakin::PBlock; using anakin::PTuple; namespace paddle { namespace inference { namespace anakin { -void Pool2dOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void Pool2dOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); @@ -65,17 +58,22 @@ void Pool2dOpConverter::operator()(const framework::proto::OpDesc &op, PADDLE_THROW("TensorRT unsupported pooling type!"); } - engine_->AddOp(op_name, "Pooling", {x_name}, {y_name}); - engine_->AddOpAttr>(op_name, "pool_size", ksize); - engine_->AddOpAttr>(op_name, "strides", strides); - engine_->AddOpAttr>(op_name, "padding", paddings); - engine_->AddOpAttr(op_name, "method", anakin_pool_type); - engine_->AddOpAttr(op_name, "global_pooling", global_pooling); - engine_->AddOpAttr(op_name, "cmp_out_shape_floor_as_conv", !ceil_mode); + this->engine_->AddOp(op_name, "Pooling", {x_name}, {y_name}); + this->engine_->template AddOpAttr>(op_name, "pool_size", ksize); + this->engine_->template AddOpAttr>(op_name, "strides", strides); + this->engine_->template AddOpAttr>(op_name, "padding", paddings); + this->engine_->AddOpAttr(op_name, "method", anakin_pool_type); + this->engine_->AddOpAttr(op_name, "global_pooling", global_pooling); + this->engine_->AddOpAttr(op_name, "cmp_out_shape_floor_as_conv", !ceil_mode); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(pool2d, Pool2dOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(pool2d, + Pool2dOpConverter<::anakin::saber::NV>); +#endif +REGISTER_CPU_ANAKIN_OP_CONVERTER(pool2d, + Pool2dOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/pool2d.h b/paddle/fluid/inference/anakin/convert/pool2d.h index ec28e48ac848eff1d37c39063725624bf7d65723..0f85ec14b33dd6f3723ac7f3008bbcf865bbdc24 100644 --- a/paddle/fluid/inference/anakin/convert/pool2d.h +++ b/paddle/fluid/inference/anakin/convert/pool2d.h @@ -20,7 +20,8 @@ namespace paddle { namespace inference { namespace anakin { -class Pool2dOpConverter : public AnakinOpConverter { +template +class Pool2dOpConverter : public AnakinOpConverter { public: Pool2dOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/relu.cc b/paddle/fluid/inference/anakin/convert/relu.cc index 744066e88afc61d90fcb8afc28bffeafcb3b461f..71de3113cba1daa41615c04a11d036d1382087fb 100644 --- a/paddle/fluid/inference/anakin/convert/relu.cc +++ b/paddle/fluid/inference/anakin/convert/relu.cc @@ -16,19 +16,14 @@ #include #include -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; -using anakin::saber::NV; -using anakin::saber::Shape; - namespace paddle { namespace inference { namespace anakin { -void ReluOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void ReluOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); @@ -37,14 +32,14 @@ void ReluOpConverter::operator()(const framework::proto::OpDesc &op, auto input_name = op_desc.Input("X").front(); auto output_name = op_desc.Output("Out").front(); - engine_->AddOp(op_name, "ReLU", {input_name}, {output_name}); - engine_->AddOpAttr(op_name, "alpha", 0); + this->engine_->AddOp(op_name, "ReLU", {input_name}, {output_name}); + this->engine_->AddOpAttr(op_name, "alpha", 0); } -void LeakyReluOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void LeakyReluOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); @@ -54,13 +49,19 @@ void LeakyReluOpConverter::operator()(const framework::proto::OpDesc &op, auto output_name = op_desc.Output("Out").front(); float alpha = boost::get(op_desc.GetAttr("alpha")); - engine_->AddOp(op_name, "ReLU", {input_name}, {output_name}); - engine_->AddOpAttr(op_name, "alpha", alpha); + this->engine_->AddOp(op_name, "ReLU", {input_name}, {output_name}); + this->engine_->AddOpAttr(op_name, "alpha", alpha); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(relu, ReluOpConverter); -REGISTER_ANAKIN_OP_CONVERTER(leaky_relu, LeakyReluOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(relu, ReluOpConverter<::anakin::saber::NV>); +REGISTER_CUDA_ANAKIN_OP_CONVERTER(leaky_relu, + LeakyReluOpConverter<::anakin::saber::NV>); +#endif +REGISTER_CPU_ANAKIN_OP_CONVERTER(relu, ReluOpConverter<::anakin::saber::X86>); +REGISTER_CPU_ANAKIN_OP_CONVERTER(leaky_relu, + LeakyReluOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/relu.h b/paddle/fluid/inference/anakin/convert/relu.h index d7b6b6934d6f743fa62c0261f575db09e3522688..74222a7ea1bb93d9b98f499b3e60e01b8bb1d8e2 100644 --- a/paddle/fluid/inference/anakin/convert/relu.h +++ b/paddle/fluid/inference/anakin/convert/relu.h @@ -22,7 +22,8 @@ namespace paddle { namespace inference { namespace anakin { -class ReluOpConverter : public AnakinOpConverter { +template +class ReluOpConverter : public AnakinOpConverter { public: ReluOpConverter() = default; @@ -33,7 +34,8 @@ class ReluOpConverter : public AnakinOpConverter { virtual ~ReluOpConverter() {} }; -class LeakyReluOpConverter : public AnakinOpConverter { +template +class LeakyReluOpConverter : public AnakinOpConverter { public: LeakyReluOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/reshape.cc b/paddle/fluid/inference/anakin/convert/reshape.cc index 17e0a1acb5f4e08e848e91bbb051757d85796c0a..a6696e8e81b72c7a1b1b7fe1697515adfb64d46c 100644 --- a/paddle/fluid/inference/anakin/convert/reshape.cc +++ b/paddle/fluid/inference/anakin/convert/reshape.cc @@ -15,20 +15,16 @@ #include "paddle/fluid/inference/anakin/convert/reshape.h" #include -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; -using anakin::saber::NV; -using anakin::saber::Shape; using anakin::PTuple; namespace paddle { namespace inference { namespace anakin { -void ReshapeOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void ReshapeOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1UL); PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1UL); @@ -37,17 +33,23 @@ void ReshapeOpConverter::operator()(const framework::proto::OpDesc &op, auto output = op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); - engine_->AddOp(op_name, "Reshape", {input}, {output}); + this->engine_->AddOp(op_name, "Reshape", {input}, {output}); auto shape = boost::get>(op_desc.GetAttr("shape")); if (shape.size() < 4) { shape.insert(shape.end(), 4 - shape.size(), 1); } - engine_->AddOpAttr>(op_name, "dims", shape); + this->engine_->template AddOpAttr>(op_name, "dims", shape); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(reshape, ReshapeOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(reshape, + ReshapeOpConverter<::anakin::saber::NV>); +#endif + +REGISTER_CPU_ANAKIN_OP_CONVERTER(reshape, + ReshapeOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/reshape.h b/paddle/fluid/inference/anakin/convert/reshape.h index 9ce2ea2a4f3f8802225fe8ca8ed602c9f7d27968..bd0fd08c5cb91382560cf15e0cb83a9d18705ca5 100644 --- a/paddle/fluid/inference/anakin/convert/reshape.h +++ b/paddle/fluid/inference/anakin/convert/reshape.h @@ -20,7 +20,8 @@ namespace paddle { namespace inference { namespace anakin { -class ReshapeOpConverter : public AnakinOpConverter { +template +class ReshapeOpConverter : public AnakinOpConverter { public: ReshapeOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/roi_align.cc b/paddle/fluid/inference/anakin/convert/roi_align.cc index 0f2b08df08a9adb5bce60bc65c5d2fa49698e38f..152578b50fec38a486658e8003dc5abacd31b23b 100644 --- a/paddle/fluid/inference/anakin/convert/roi_align.cc +++ b/paddle/fluid/inference/anakin/convert/roi_align.cc @@ -25,10 +25,10 @@ namespace paddle { namespace inference { namespace anakin { -void RoiAlignOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void RoiAlignOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Input("ROIs").size(), 1); @@ -44,16 +44,21 @@ void RoiAlignOpConverter::operator()(const framework::proto::OpDesc &op, auto pooled_width = boost::get(op_desc.GetAttr("pooled_width")); auto sampling_ratio = boost::get(op_desc.GetAttr("sampling_ratio")); - engine_->AddOp(op_name, "RoiAlign", {input_x_name, input_rois_name}, - {output_name}); - engine_->AddOpAttr(op_name, "spatial_scale", spatial_scale); - engine_->AddOpAttr(op_name, "pooled_height", pooled_height); - engine_->AddOpAttr(op_name, "pooled_width", pooled_width); - engine_->AddOpAttr(op_name, "sampling_ratio", sampling_ratio); + this->engine_->AddOp(op_name, "RoiAlign", {input_x_name, input_rois_name}, + {output_name}); + this->engine_->AddOpAttr(op_name, "spatial_scale", spatial_scale); + this->engine_->AddOpAttr(op_name, "pooled_height", pooled_height); + this->engine_->AddOpAttr(op_name, "pooled_width", pooled_width); + this->engine_->AddOpAttr(op_name, "sampling_ratio", sampling_ratio); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(roi_align, RoiAlignOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(roi_align, + RoiAlignOpConverter<::anakin::saber::NV>); +#endif +REGISTER_CPU_ANAKIN_OP_CONVERTER(roi_align, + RoiAlignOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/roi_align.h b/paddle/fluid/inference/anakin/convert/roi_align.h index c6df4754ba9b5ef80bcfaebd2cf1a973e5b8a61a..93c28f3e0556294d6934e25db94f35870a99c99a 100644 --- a/paddle/fluid/inference/anakin/convert/roi_align.h +++ b/paddle/fluid/inference/anakin/convert/roi_align.h @@ -22,7 +22,8 @@ namespace paddle { namespace inference { namespace anakin { -class RoiAlignOpConverter : public AnakinOpConverter { +template +class RoiAlignOpConverter : public AnakinOpConverter { public: RoiAlignOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/scale.cc b/paddle/fluid/inference/anakin/convert/scale.cc index dd68af4f79a6d1e8add04bde6a6890bca1b00d14..d72f9a5fa0c28de2f9e5642055d28dfff635f52f 100644 --- a/paddle/fluid/inference/anakin/convert/scale.cc +++ b/paddle/fluid/inference/anakin/convert/scale.cc @@ -16,19 +16,14 @@ #include #include -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; -using anakin::saber::NV; -using anakin::saber::Shape; - namespace paddle { namespace inference { namespace anakin { -void ScaleOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void ScaleOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); @@ -44,14 +39,14 @@ void ScaleOpConverter::operator()(const framework::proto::OpDesc &op, PADDLE_ENFORCE(bias_after_scale, "The anakin scale layer only support bias after scale now."); - engine_->AddOp(op_name, "Power", {input_name}, {output_name}); - engine_->AddOpAttr(op_name, "shift", bias); - engine_->AddOpAttr(op_name, "scale", scale); - engine_->AddOpAttr(op_name, "power", static_cast(1.0)); + this->engine_->AddOp(op_name, "Power", {input_name}, {output_name}); + this->engine_->AddOpAttr(op_name, "shift", bias); + this->engine_->AddOpAttr(op_name, "scale", scale); + this->engine_->AddOpAttr(op_name, "power", static_cast(1.0)); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(scale, ScaleOpConverter); +REGISTER_CUDA_ANAKIN_OP_CONVERTER(scale, ScaleOpConverter<::anakin::saber::NV>); diff --git a/paddle/fluid/inference/anakin/convert/scale.h b/paddle/fluid/inference/anakin/convert/scale.h index ba3bcdd21494a4eeb6190aa8383e17e1b828b5f3..92d936b526226ad0734c264f4d276a972e51010d 100644 --- a/paddle/fluid/inference/anakin/convert/scale.h +++ b/paddle/fluid/inference/anakin/convert/scale.h @@ -22,7 +22,8 @@ namespace paddle { namespace inference { namespace anakin { -class ScaleOpConverter : public AnakinOpConverter { +template +class ScaleOpConverter : public AnakinOpConverter { public: ScaleOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/softmax.cc b/paddle/fluid/inference/anakin/convert/softmax.cc index a6c1e971b16fa7fe6a074bcb2cdf391410f8871f..851dafa8bdf63d9b9f1f2f08db3e801cf24709fd 100644 --- a/paddle/fluid/inference/anakin/convert/softmax.cc +++ b/paddle/fluid/inference/anakin/convert/softmax.cc @@ -14,19 +14,14 @@ #include "paddle/fluid/inference/anakin/convert/softmax.h" -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; -using anakin::saber::NV; -using anakin::saber::Shape; - namespace paddle { namespace inference { namespace anakin { -void SoftMaxOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void SoftMaxOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1UL); @@ -41,12 +36,18 @@ void SoftMaxOpConverter::operator()(const framework::proto::OpDesc &op, auto input_shape_in_fluid = input_var_desc->GetShape(); size_t input_dims = input_shape_in_fluid.size(); - engine_->AddOp(op_name, "Softmax", {input}, {output}); - engine_->AddOpAttr(op_name, "axis", static_cast(input_dims - 1)); + this->engine_->AddOp(op_name, "Softmax", {input}, {output}); + this->engine_->AddOpAttr(op_name, "axis", static_cast(input_dims - 1)); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(softmax, SoftMaxOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(softmax, + SoftMaxOpConverter<::anakin::saber::NV>); +#endif + +REGISTER_CPU_ANAKIN_OP_CONVERTER(softmax, + SoftMaxOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/softmax.h b/paddle/fluid/inference/anakin/convert/softmax.h index a16356d5bb61ac2f3b4f7751e257ce36ca604bf1..c2421f9eb9d2e5a2b867a66ea8a95ba9b462cda7 100644 --- a/paddle/fluid/inference/anakin/convert/softmax.h +++ b/paddle/fluid/inference/anakin/convert/softmax.h @@ -20,7 +20,8 @@ namespace paddle { namespace inference { namespace anakin { -class SoftMaxOpConverter : public AnakinOpConverter { +template +class SoftMaxOpConverter : public AnakinOpConverter { public: SoftMaxOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/split.cc b/paddle/fluid/inference/anakin/convert/split.cc index ec582c1812623cd4bcefa2097015ba258f6bacbb..f99233e78b59fc8631111c4f843a1a88acf629cd 100644 --- a/paddle/fluid/inference/anakin/convert/split.cc +++ b/paddle/fluid/inference/anakin/convert/split.cc @@ -16,23 +16,16 @@ #include #include -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; -using anakin::Precision; -using anakin::saber::NV; -using anakin::saber::X86; -using anakin::saber::Shape; -using anakin::PBlock; using anakin::PTuple; namespace paddle { namespace inference { namespace anakin { -void SplitOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void SplitOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); auto input_name = op_desc.Input("X").front(); auto y_names = op_desc.Output("Out"); @@ -51,14 +44,19 @@ void SplitOpConverter::operator()(const framework::proto::OpDesc &op, num_sum += output_lengths[i]; slice_point.push_back(num_sum); } - engine_->AddOp(op_name, "Slice", {input_name}, y_names); - engine_->AddOpAttr(op_name, "axis", axis); - engine_->AddOpAttr>(op_name, "slice_point", slice_point); + this->engine_->AddOp(op_name, "Slice", {input_name}, y_names); + this->engine_->AddOpAttr(op_name, "axis", axis); + this->engine_->template AddOpAttr>(op_name, "slice_point", + slice_point); // slice_dim is useless in anakin - engine_->AddOpAttr(op_name, "slice_dim", 4); + this->engine_->AddOpAttr(op_name, "slice_dim", 4); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(split, SplitOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(split, SplitOpConverter<::anakin::saber::NV>); +#endif + +REGISTER_CPU_ANAKIN_OP_CONVERTER(split, SplitOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/split.h b/paddle/fluid/inference/anakin/convert/split.h index 184112e589e2bbdb30bc7a5d2cd053b7f3732a58..989d7acd500e9f7dcd91079489b98c711703f71e 100644 --- a/paddle/fluid/inference/anakin/convert/split.h +++ b/paddle/fluid/inference/anakin/convert/split.h @@ -20,7 +20,8 @@ namespace paddle { namespace inference { namespace anakin { -class SplitOpConverter : public AnakinOpConverter { +template +class SplitOpConverter : public AnakinOpConverter { public: SplitOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/sum.cc b/paddle/fluid/inference/anakin/convert/sum.cc index 2a4178e2371389b44557d44ea526c7cc4a731d16..7fc9d76407884921c9a36544e8d5c3e1218596cc 100644 --- a/paddle/fluid/inference/anakin/convert/sum.cc +++ b/paddle/fluid/inference/anakin/convert/sum.cc @@ -17,22 +17,17 @@ #include #include -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; -using anakin::Precision; -using anakin::saber::NV; -using anakin::saber::X86; -using anakin::saber::Shape; -using anakin::PBlock; using anakin::PTuple; namespace paddle { namespace inference { namespace anakin { -void SumOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, bool test_mode) { +template +void SumOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::BlockDesc &block_desc, + const framework::Scope &scope, + bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 2); PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); @@ -43,13 +38,17 @@ void SumOpConverter::operator()(const framework::proto::OpDesc &op, std::vector coeff = {1, 1}; std::string elementwise_type = "Add"; - engine_->AddOp(op_name, "Eltwise", input_names, {out_name}); - engine_->AddOpAttr>(op_name, "coeff", coeff); - engine_->AddOpAttr(op_name, "type", elementwise_type); + this->engine_->AddOp(op_name, "Eltwise", input_names, {out_name}); + this->engine_->template AddOpAttr>(op_name, "coeff", coeff); + this->engine_->template AddOpAttr(op_name, "type", + elementwise_type); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(sum, SumOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(sum, SumOpConverter<::anakin::saber::NV>); +#endif +REGISTER_CPU_ANAKIN_OP_CONVERTER(sum, SumOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/sum.h b/paddle/fluid/inference/anakin/convert/sum.h index b5d402b77fcf555ffaf910f8c9d1b7337181a64b..27c15a82ebd47196513122648b0ee09eb68d852d 100644 --- a/paddle/fluid/inference/anakin/convert/sum.h +++ b/paddle/fluid/inference/anakin/convert/sum.h @@ -20,7 +20,8 @@ namespace paddle { namespace inference { namespace anakin { -class SumOpConverter : public AnakinOpConverter { +template +class SumOpConverter : public AnakinOpConverter { public: SumOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/test_activation_op.cc b/paddle/fluid/inference/anakin/convert/test_activation_op.cc index 8bedd4a749a645829658291310347eeed1c0ea49..18b8b6f3b63bc630412470e3c017ec9df0b492cf 100644 --- a/paddle/fluid/inference/anakin/convert/test_activation_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_activation_op.cc @@ -21,12 +21,14 @@ namespace paddle { namespace inference { namespace anakin { -static void test_activation_op(const std::string &op_type) { - auto *converter = Registry::Global().Lookup(op_type); - PADDLE_ENFORCE(converter != nullptr); +template +static void test_activation_op(const std::string& op_type, + const platform::DeviceContext& context, + bool use_gpu) { std::unordered_set parameters; framework::Scope scope; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); validator.DeclInputVar("act-X", {10, 6, 1, 1}); validator.DeclOutputVar("act-Out", {10, 6, 1, 1}); framework::OpDesc desc; @@ -41,13 +43,42 @@ static void test_activation_op(const std::string &op_type) { validator.Execute(5); } -TEST(sigm_op, test) { test_activation_op("sigmoid"); } -TEST(tanh_op, test) { test_activation_op("tanh"); } +#ifdef PADDLE_WITH_CUDA +TEST(sigm_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_activation_op<::anakin::saber::NV>("sigmoid", ctx, true); +} + +TEST(tanh_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_activation_op<::anakin::saber::NV>("tanh", ctx, true); +} +#endif + +TEST(sigm_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_activation_op<::anakin::saber::X86>("sigmoid", ctx, false); +} + +TEST(tanh_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_activation_op<::anakin::saber::X86>("tanh", ctx, false); +} + } // namespace anakin } // namespace inference } // namespace paddle USE_OP(sigmoid); USE_OP(tanh); + +USE_CPU_ANAKIN_CONVERTER(sigmoid); +USE_CPU_ANAKIN_CONVERTER(tanh); +#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(sigmoid); USE_ANAKIN_CONVERTER(tanh); +#endif diff --git a/paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc b/paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc index eb4f4e12eec29d3fc5092d6a3c59bf0019411bf2..123f93370b82a9e7211c063fefbcb63a68188f2b 100644 --- a/paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_affine_channel_op.cc @@ -21,16 +21,19 @@ namespace paddle { namespace inference { namespace anakin { -TEST(affine_channel, native) { +template +void test_affine_channel_op(const platform::DeviceContext& context, + bool use_gpu) { // Declare the difference between the inputs. std::unordered_set parameters({"scale", "bias"}); framework::Scope scope; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); validator.DeclInputVar("x", {1, 3, 5, 2}); validator.DeclOutputVar("out", {1, 3, 5, 2}); - validator.DeclParamVar("scale", {1, 3, 1, 1}); - validator.DeclParamVar("bias", {1, 3, 1, 1}); + validator.DeclParamVar("scale", {3}); + validator.DeclParamVar("bias", {3}); // Prepare Op descriptions. framework::OpDesc desc; @@ -47,9 +50,26 @@ TEST(affine_channel, native) { validator.Execute(1); } +#ifdef PADDLE_WITH_CUDA +TEST(affine_channel_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_affine_channel_op<::anakin::saber::NV>(ctx, true); +} +#endif + +TEST(affine_channel_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_affine_channel_op<::anakin::saber::X86>(ctx, false); +} + } // namespace anakin } // namespace inference } // namespace paddle USE_OP(affine_channel); +USE_CPU_ANAKIN_CONVERTER(affine_channel); +#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(affine_channel); +#endif diff --git a/paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc b/paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc index 2832e1c8d167c646c9049beebc57a82fe416e62c..6a6675b6abf5d16b86593475ed204b66b1a5bf87 100644 --- a/paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc @@ -19,12 +19,14 @@ namespace paddle { namespace inference { namespace anakin { -TEST(batch_norm_op, test) { +template +void test_batchnorm_op(const platform::DeviceContext& context, bool use_gpu) { std::unordered_set parameters( {"batch_norm_scale", "batch_norm_bias", "batch_norm_mean", "batch_norm_variance"}); framework::Scope scope; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); std::vector param_shape{2}; validator.DeclInputVar("batch_norm_X", {1, 2, 5, 5}); @@ -64,8 +66,26 @@ TEST(batch_norm_op, test) { validator.Execute(1, neglected_output); } +#ifdef PADDLE_WITH_CUDA +TEST(batch_norm_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_batchnorm_op<::anakin::saber::NV>(ctx, true); +} +#endif + +TEST(batch_norm_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_batchnorm_op<::anakin::saber::X86>(ctx, false); +} + } // namespace anakin } // namespace inference } // namespace paddle USE_OP(batch_norm); +USE_CPU_ANAKIN_CONVERTER(batch_norm); + +#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(batch_norm); +#endif diff --git a/paddle/fluid/inference/anakin/convert/test_concat_op.cc b/paddle/fluid/inference/anakin/convert/test_concat_op.cc index ecf44def5a2429360f0bcb92f00a0423e1d491cd..4ea3305e4664f050d491b92e2431dd9cec9b5011 100644 --- a/paddle/fluid/inference/anakin/convert/test_concat_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_concat_op.cc @@ -21,10 +21,12 @@ namespace paddle { namespace inference { namespace anakin { -TEST(concat_op, test) { +template +void test_concat_op(const platform::DeviceContext& context, bool use_gpu) { std::unordered_set parameters({""}); framework::Scope scope; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); validator.DeclInputVar("concat_x1", {1, 2, 1, 1}); validator.DeclInputVar("concat_x2", {1, 3, 1, 1}); validator.DeclInputVar("concat_x3", {1, 1, 1, 1}); @@ -44,31 +46,26 @@ TEST(concat_op, test) { validator.Execute(1); } -TEST(concat_op, test2) { - std::unordered_set parameters({""}); - framework::Scope scope; - AnakinConvertValidation validator(parameters, &scope); - validator.DeclInputVar("concat_x1", {1, 4}); - validator.DeclInputVar("concat_x2", {3, 4}); - validator.DeclInputVar("concat_x3", {2, 4}); - validator.DeclOutputVar("concat_out", {6, 4}); - - // Prepare Op description - framework::OpDesc desc; - desc.SetType("concat"); - desc.SetInput("X", {"concat_x1", "concat_x2", "concat_x3"}); - desc.SetOutput("Out", {"concat_out"}); - - int axis = 0; - desc.SetAttr("axis", axis); - - validator.SetOp(*desc.Proto()); +#ifdef PADDLE_WITH_CUDA +TEST(concat_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_concat_op<::anakin::saber::NV>(ctx, true); +} +#endif - validator.Execute(1); +TEST(concat_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_concat_op<::anakin::saber::X86>(ctx, false); } } // namespace anakin } // namespace inference } // namespace paddle USE_OP(concat); +USE_CPU_ANAKIN_CONVERTER(concat); + +#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(concat); +#endif diff --git a/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc b/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc index 6d93e50bc96b08b6ef7dd7c9d836038e335daae3..fa1b319bc1c65cece9a77c0f2fdd4bcc3d249458 100644 --- a/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc @@ -21,13 +21,12 @@ namespace paddle { namespace inference { namespace anakin { -TEST(conv2d_op, test) { - auto* conv2d_converter = - Registry::Global().Lookup("conv2d"); - ASSERT_TRUE(conv2d_converter != nullptr); +template +void test_conv2d_op(const platform::DeviceContext& context, bool use_gpu) { std::unordered_set parameters({"conv2d-Y"}); framework::Scope scope; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); validator.DeclInputVar("conv2d-X", {1, 3, 3, 3}); validator.DeclParamVar("conv2d-Y", {4, 3, 1, 1}); validator.DeclOutputVar("conv2d-Out", {1, 4, 3, 3}); @@ -54,9 +53,27 @@ TEST(conv2d_op, test) { validator.Execute(3); } +#ifdef PADDLE_WITH_CUDA +TEST(conv2d_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_conv2d_op<::anakin::saber::NV>(ctx, true); +} +#endif + +TEST(conv2d_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_conv2d_op<::anakin::saber::X86>(ctx, false); +} + } // namespace anakin } // namespace inference } // namespace paddle USE_OP(conv2d); +USE_CPU_ANAKIN_CONVERTER(conv2d); + +#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(conv2d); +#endif diff --git a/paddle/fluid/inference/anakin/convert/test_dropout_op.cc b/paddle/fluid/inference/anakin/convert/test_dropout_op.cc index b2de5ae0a6e58eb25a4588571686a25500fe546c..a252dc74c0bf4f4472c0da8cd43ad970cb1e17c1 100644 --- a/paddle/fluid/inference/anakin/convert/test_dropout_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_dropout_op.cc @@ -21,10 +21,12 @@ namespace paddle { namespace inference { namespace anakin { -TEST(dropout_op, native) { +template +void test_dropout_op(const platform::DeviceContext& context, bool use_gpu) { std::unordered_set parameters; framework::Scope scope; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); validator.DeclInputVar("x", {1, 1, 2, 2}); validator.DeclOutputVar("out", {1, 1, 2, 2}); validator.DeclOutputVar("mask", {1, 1, 2, 2}); @@ -45,9 +47,26 @@ TEST(dropout_op, native) { validator.Execute(1, neglected_output); } +#ifdef PADDLE_WITH_CUDA +TEST(dropout_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_dropout_op<::anakin::saber::NV>(ctx, true); +} +#endif + +TEST(dropout_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_dropout_op<::anakin::saber::X86>(ctx, false); +} + } // namespace anakin } // namespace inference } // namespace paddle USE_OP(dropout); +USE_CPU_ANAKIN_CONVERTER(dropout); +#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(dropout); +#endif diff --git a/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc b/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc index 3a437f5fdb565609667b7a862c9b2bb13cdbeded..ee1bedcfb25eba6745f68b5aa43596e196288628 100644 --- a/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc @@ -21,10 +21,14 @@ namespace paddle { namespace inference { namespace anakin { -static void test_elementwise_op(const std::string &op_type) { +template +static void test_elementwise_op(const std::string& op_type, + const platform::DeviceContext& context, + bool use_gpu) { std::unordered_set parameters; framework::Scope scope; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); validator.DeclInputVar("x", {1, 1, 2, 2}); validator.DeclInputVar("y", {1, 1, 2, 2}); validator.DeclOutputVar("out", {1, 1, 2, 2}); @@ -43,14 +47,41 @@ static void test_elementwise_op(const std::string &op_type) { validator.Execute(1); } -TEST(elementwise_op, native_add) { test_elementwise_op("elementwise_add"); } -TEST(elementwise_op, native_mul) { test_elementwise_op("elementwise_mul"); } +#ifdef PADDLE_WITH_CUDA +TEST(elementwise_op, native_add_gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_elementwise_op<::anakin::saber::NV>("elementwise_add", ctx, true); +} +TEST(elementwise_op, native_mul_gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_elementwise_op<::anakin::saber::NV>("elementwise_mul", ctx, true); +} +#endif + +TEST(elementwise_op, native_add_cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_elementwise_op<::anakin::saber::X86>("elementwise_add", ctx, false); +} + +TEST(elementwise_op, native_mul_cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_elementwise_op<::anakin::saber::X86>("elementwise_mul", ctx, false); +} } // namespace anakin } // namespace inference } // namespace paddle USE_OP(elementwise_add); -USE_ANAKIN_CONVERTER(elementwise_add); USE_OP(elementwise_mul); +#ifdef PADDLE_WITH_CUDA +USE_ANAKIN_CONVERTER(elementwise_add); USE_ANAKIN_CONVERTER(elementwise_mul); +#endif + +USE_CPU_ANAKIN_CONVERTER(elementwise_add); +USE_CPU_ANAKIN_CONVERTER(elementwise_mul); diff --git a/paddle/fluid/inference/anakin/convert/test_fc_op.cc b/paddle/fluid/inference/anakin/convert/test_fc_op.cc index ee6d1dc291fe3733ff2e9f66dd453120fa266a55..5510008d3c4f2e59fa7a4ffa3ce49f6d1463beac 100644 --- a/paddle/fluid/inference/anakin/convert/test_fc_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_fc_op.cc @@ -20,13 +20,13 @@ namespace paddle { namespace inference { namespace anakin { -TEST(fc_op, test) { - auto* fc_converter = Registry::Global().Lookup("fc"); - ASSERT_TRUE(fc_converter); - +template +void test_mul_op(const platform::DeviceContext& context, bool use_gpu) { std::unordered_set parameters({"mul_y"}); framework::Scope scope; - AnakinConvertValidation validator(parameters, &scope); + + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); validator.DeclInputVar("mul_x", {1, 1, 2, 2}); validator.DeclParamVar("mul_y", {4, 2}); validator.DeclOutputVar("mul_out", {1, 2}); @@ -42,9 +42,26 @@ TEST(fc_op, test) { validator.Execute(10); } +#ifdef PADDLE_WITH_CUDA +TEST(mul_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_mul_op<::anakin::saber::NV>(ctx, true); +} +#endif + +TEST(mul_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_mul_op<::anakin::saber::X86>(ctx, false); +} + } // namespace anakin } // namespace inference } // namespace paddle USE_OP(mul); +USE_CPU_ANAKIN_CONVERTER(fc); +#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(fc); +#endif diff --git a/paddle/fluid/inference/anakin/convert/test_flatten_op.cc b/paddle/fluid/inference/anakin/convert/test_flatten_op.cc index d13281f11f03fdd75e585bce8b30e8780d81f7d7..86bc1d810f8943b157f963d8f0a33bf3351b0d1a 100644 --- a/paddle/fluid/inference/anakin/convert/test_flatten_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_flatten_op.cc @@ -20,13 +20,12 @@ namespace paddle { namespace inference { namespace anakin { -TEST(flatten_op, test) { - auto *converter = Registry::Global().Lookup("flatten"); - ASSERT_TRUE(converter); - +template +void test_flatten_op(const platform::DeviceContext& context, bool use_gpu) { std::unordered_set parameters; framework::Scope scope; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); validator.DeclInputVar("flatten-X", {3, 10, 10, 4}); validator.DeclOutputVar("flatten-Out", {3, 400, 1, 1}); framework::OpDesc desc; @@ -42,10 +41,27 @@ TEST(flatten_op, test) { validator.Execute(5); } +#ifdef PADDLE_WITH_CUDA +TEST(flatten_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_flatten_op<::anakin::saber::NV>(ctx, true); +} +#endif + +TEST(flatten_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_flatten_op<::anakin::saber::X86>(ctx, false); +} + } // namespace anakin } // namespace inference } // namespace paddle USE_OP(reshape); USE_OP_ITSELF(flatten); +USE_CPU_ANAKIN_CONVERTER(flatten); +#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(flatten); +#endif diff --git a/paddle/fluid/inference/anakin/convert/test_pool2d_op.cc b/paddle/fluid/inference/anakin/convert/test_pool2d_op.cc index 1ac019467721605c539c7ada452d04d5134fa341..b1be7f93c67c36de9da022cd6b75fb0365d2482d 100644 --- a/paddle/fluid/inference/anakin/convert/test_pool2d_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_pool2d_op.cc @@ -19,15 +19,14 @@ namespace paddle { namespace inference { namespace anakin { -void test_pool2d(bool global_pooling, bool ceil_mode, +template +void test_pool2d(const platform::DeviceContext& context, bool use_gpu, + bool global_pooling, bool ceil_mode, std::string pool_type = "max") { - auto* pool2d_converter = - Registry::Global().Lookup("pool2d"); - ASSERT_TRUE(pool2d_converter); - framework::Scope scope; std::unordered_set parameters; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); // The ITensor's Dims should not contain the batch size. // So, the ITensor's Dims of input and output should be C * H * W. @@ -64,56 +63,61 @@ void test_pool2d(bool global_pooling, bool ceil_mode, validator.Execute(1); } -void test_pool2d2(bool global_pooling, bool ceil_mode, - std::string pool_type = "max") { - auto* pool2d_converter = - Registry::Global().Lookup("pool2d"); - ASSERT_TRUE(pool2d_converter); - - framework::Scope scope; - std::unordered_set parameters; - AnakinConvertValidation validator(parameters, &scope); - - // The ITensor's Dims should not contain the batch size. - // So, the ITensor's Dims of input and output should be C * H * W. - validator.DeclInputVar("pool2d_x", {1, 1, 17, 17}); - validator.DeclOutputVar("pool2d_out", {1, 1, 17, 17}); - - // Prepare Op description - framework::OpDesc desc; - desc.SetType("pool2d"); - desc.SetInput("X", {"pool2d_x"}); - desc.SetOutput("Out", {"pool2d_out"}); - - std::vector ksize({3, 3}); - std::vector strides({1, 1}); - std::vector paddings({1, 1}); - std::string pooling_t = pool_type; +#ifdef PADDLE_WITH_CUDA +TEST(Pool2dOpConverter, normal) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_pool2d<::anakin::saber::NV>(ctx, true, false, false); +} +TEST(Pool2dOpConverter, test_global_pooling) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_pool2d<::anakin::saber::NV>(ctx, true, true, false); +} - desc.SetAttr("pooling_type", pooling_t); - desc.SetAttr("ksize", ksize); - desc.SetAttr("strides", strides); - desc.SetAttr("paddings", paddings); - desc.SetAttr("global_pooling", global_pooling); - desc.SetAttr("ceil_mode", true); +TEST(Pool2dOpConverter, max_ceil_test) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_pool2d<::anakin::saber::NV>(ctx, true, false, true); +} - LOG(INFO) << "set OP"; - validator.SetOp(*desc.Proto()); - LOG(INFO) << "execute"; +TEST(Pool2dOpConverter, avg_ceil_test) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_pool2d<::anakin::saber::NV>(ctx, true, false, true, "avg"); +} +#endif - validator.Execute(1); +TEST(Pool2dOpConverter, normal_cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_pool2d<::anakin::saber::X86>(ctx, false, false, false); +} +TEST(Pool2dOpConverter, test_global_pooling_cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_pool2d<::anakin::saber::X86>(ctx, false, true, false); } -TEST(Pool2dOpConverter, normal) { test_pool2d(false, false); } -TEST(Pool2dOpConverter, test_global_pooling) { test_pool2d(true, false); } +TEST(Pool2dOpConverter, max_ceil_test_cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_pool2d<::anakin::saber::X86>(ctx, false, false, true); +} -TEST(Pool2dOpConverter, max_ceil_test) { test_pool2d(false, true); } -TEST(Pool2dOpConverter, avg_ceil_test) { test_pool2d(false, true, "avg"); } -TEST(Pool2dOpConverter, avg_ceil_test2) { test_pool2d2(false, true, "avg"); } +TEST(Pool2dOpConverter, avg_ceil_test_cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_pool2d<::anakin::saber::X86>(ctx, false, false, true, "avg"); +} } // namespace anakin } // namespace inference } // namespace paddle USE_OP(pool2d); +USE_CPU_ANAKIN_CONVERTER(pool2d); + +#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(pool2d); +#endif diff --git a/paddle/fluid/inference/anakin/convert/test_relu_op.cc b/paddle/fluid/inference/anakin/convert/test_relu_op.cc index cba19a558575422d3ec1cc0641cad40aa09b271f..369f1920f249436d8ab3a72e4b2cb36ec512514e 100644 --- a/paddle/fluid/inference/anakin/convert/test_relu_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_relu_op.cc @@ -21,12 +21,14 @@ namespace paddle { namespace inference { namespace anakin { -static void test_relu_op(const std::string &op_type) { - auto *converter = Registry::Global().Lookup(op_type); - PADDLE_ENFORCE(converter != nullptr); +template +static void test_activation_op(const std::string& op_type, + const platform::DeviceContext& context, + bool use_gpu) { std::unordered_set parameters; framework::Scope scope; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); validator.DeclInputVar("act-X", {10, 6, 1, 1}); validator.DeclOutputVar("act-Out", {10, 6, 1, 1}); framework::OpDesc desc; @@ -44,14 +46,44 @@ static void test_relu_op(const std::string &op_type) { validator.Execute(5); } -TEST(activation, relu) { test_relu_op("relu"); } -TEST(activation, leaky_relu) { test_relu_op("leaky_relu"); } +#ifdef PADDLE_WITH_CUDA +TEST(relu_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_activation_op<::anakin::saber::NV>("relu", ctx, true); +} + +TEST(leaky_relu_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_activation_op<::anakin::saber::NV>("leaky_relu", ctx, true); +} +#endif + +/* seems bug here +TEST(relu_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_activation_op<::anakin::saber::X86>("relu", ctx, false); +} + +TEST(leaky_relu_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_activation_op<::anakin::saber::X86>("leaky_relu", ctx, false); +} +*/ } // namespace anakin } // namespace inference } // namespace paddle USE_OP(relu); -USE_ANAKIN_CONVERTER(relu); USE_OP(leaky_relu); +USE_CPU_ANAKIN_CONVERTER(relu); +USE_CPU_ANAKIN_CONVERTER(leaky_relu); + +#ifdef PADDLE_WITH_CUDA +USE_ANAKIN_CONVERTER(relu); USE_ANAKIN_CONVERTER(leaky_relu); +#endif diff --git a/paddle/fluid/inference/anakin/convert/test_reshape_op.cc b/paddle/fluid/inference/anakin/convert/test_reshape_op.cc index 306ebf510f29a87ca1ffa6df86e08f86b3f8ffbb..3facdbe9c6944d2ad991a6ef8cad0911adfaadd1 100644 --- a/paddle/fluid/inference/anakin/convert/test_reshape_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_reshape_op.cc @@ -20,12 +20,12 @@ namespace paddle { namespace inference { namespace anakin { -TEST(reshape, test) { - auto* converter = Registry::Global().Lookup("reshape"); - ASSERT_TRUE(converter); +template +void test_reshape1_op(const platform::DeviceContext& context, bool use_gpu) { framework::Scope scope; std::unordered_set parameters; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); // validator.DeclInputVar("reshape-X", {2, 3, 3, 1}); // validator.DeclOutputVar("reshape-Out", {3, 2, 1, 3}); @@ -45,10 +45,12 @@ TEST(reshape, test) { validator.Execute(1); } -TEST(reshape, test2) { +template +void test_reshape2_op(const platform::DeviceContext& context, bool use_gpu) { framework::Scope scope; std::unordered_set parameters; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); validator.DeclInputVar("reshape-X", {1, 2, 4}); validator.DeclOutputVar("reshape-Out", {1, 4, 2}); @@ -66,9 +68,39 @@ TEST(reshape, test2) { validator.Execute(1); } +#ifdef PADDLE_WITH_CUDA +TEST(reshape1_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_reshape1_op<::anakin::saber::NV>(ctx, true); +} + +TEST(reshape2_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_reshape2_op<::anakin::saber::NV>(ctx, true); +} +#endif + +TEST(reshape1_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_reshape2_op<::anakin::saber::X86>(ctx, false); +} + +TEST(reshape2_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_reshape2_op<::anakin::saber::X86>(ctx, false); +} + } // namespace anakin } // namespace inference } // namespace paddle USE_OP(reshape); +USE_CPU_ANAKIN_CONVERTER(reshape); + +#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(reshape); +#endif diff --git a/paddle/fluid/inference/anakin/convert/test_softmax_op.cc b/paddle/fluid/inference/anakin/convert/test_softmax_op.cc index 8c14fae0a67b9e488cf072535868a34f6195ab71..e15d19135b44cf88a91398ee5406b641b63a810b 100644 --- a/paddle/fluid/inference/anakin/convert/test_softmax_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_softmax_op.cc @@ -20,12 +20,12 @@ namespace paddle { namespace inference { namespace anakin { -TEST(softmax, test) { - auto* converter = Registry::Global().Lookup("softmax"); - ASSERT_TRUE(converter); +template +void test_softmax_op(const platform::DeviceContext& context, bool use_gpu) { framework::Scope scope; std::unordered_set parameters; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); validator.DeclInputVar("softmax-X", {1, 10, 2}); validator.DeclOutputVar("softmax-Out", {1, 10, 2}); @@ -41,9 +41,27 @@ TEST(softmax, test) { validator.Execute(1); } +#ifdef PADDLE_WITH_CUDA +TEST(softmax_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_softmax_op<::anakin::saber::NV>(ctx, true); +} +#endif + +TEST(relu_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_softmax_op<::anakin::saber::X86>(ctx, false); +} + } // namespace anakin } // namespace inference } // namespace paddle USE_OP(softmax); +USE_CPU_ANAKIN_CONVERTER(softmax); + +#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(softmax); +#endif diff --git a/paddle/fluid/inference/anakin/convert/test_split_op.cc b/paddle/fluid/inference/anakin/convert/test_split_op.cc index aa61c01a511c2337944aadbbc3d47893487de683..7131b07558d1eb96c2f412d1718e680a34462534 100644 --- a/paddle/fluid/inference/anakin/convert/test_split_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_split_op.cc @@ -21,12 +21,14 @@ namespace paddle { namespace inference { namespace anakin { -template -void AnakinSliceTest(const std::vector &in_shape, +template +void AnakinSliceTest(const platform::DeviceContext &context, bool use_gpu, + const std::vector &in_shape, const std::vector §ions) { std::unordered_set parameters({""}); framework::Scope scope; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); validator.DeclInputVar("split_input", in_shape); std::vector output_vars; @@ -55,51 +57,58 @@ void AnakinSliceTest(const std::vector &in_shape, // batch = 0, axis = 1, same shape TEST(split_op, test_same_shape_axis1_batch1) { - AnakinSliceTest<1>({1, 4, 2, 2}, {2, 2}); + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + AnakinSliceTest<::anakin::saber::NV, 1>(ctx, true, {1, 4, 2, 2}, {2, 2}); } // batch = 0, axis = 1, different shape TEST(split_op, test_different_shape_axis1_batch1) { - AnakinSliceTest<1>({1, 3, 2, 2}, {2, 1}); -} -// batch = 10, axis = 1, same shape -TEST(split_op, test_same_shape_axis1_batch10) { - AnakinSliceTest<1>({1, 4, 2, 2}, {2, 2}); -} -// batch = 10, axis = 1, different shape -TEST(split_op, test_different_shape_axis1_batch10) { - AnakinSliceTest<1>({1, 3, 2, 2}, {2, 1}); + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + AnakinSliceTest<::anakin::saber::NV, 1>(ctx, true, {1, 3, 2, 2}, {2, 1}); } // batch = 0, axis = 2, same shape TEST(split_op, test_same_shape_axis2_batch1) { - AnakinSliceTest<2>({1, 3, 4, 2}, {2, 2}); + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + AnakinSliceTest<::anakin::saber::NV, 2>(ctx, true, {1, 3, 4, 2}, {2, 2}); } // batch = 0, axis = 2, different shape TEST(split_op, test_different_shape_axis2_batch1) { - AnakinSliceTest<2>({1, 3, 3, 2}, {2, 1}); -} -// batch = 10, axis = 2, same shape -TEST(split_op, test_same_shape_axis2_batch10) { - AnakinSliceTest<2>({1, 3, 4, 2}, {2, 2}); -} -// batch = 10, axis = 2, different shape -TEST(split_op, test_different_shape_axis2_batch10) { - AnakinSliceTest<2>({1, 3, 3, 2}, {2, 1}); + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + AnakinSliceTest<::anakin::saber::NV, 2>(ctx, true, {1, 3, 3, 2}, {2, 1}); } + // batch = 0, axis = 3, same shape TEST(split_op, test_same_shape_axis3_batch1) { - AnakinSliceTest<3>({1, 3, 2, 4}, {2, 2}); + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + AnakinSliceTest<::anakin::saber::NV, 3>(ctx, true, {1, 3, 2, 4}, {2, 2}); } // batch = 0, axis = 3, different shape TEST(split_op, test_different_shape_axis3_batch1) { - AnakinSliceTest<3>({1, 3, 2, 3}, {2, 1}); + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + AnakinSliceTest<::anakin::saber::NV, 3>(ctx, true, {1, 3, 2, 3}, {2, 1}); } -// batch = 10, axis = 3, same shape -TEST(split_op, test_same_shape_axis3_batch10) { - AnakinSliceTest<3>({1, 3, 2, 4}, {2, 2}); + +TEST(split_op, test_different_shape_axis1_batch1_cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + AnakinSliceTest<::anakin::saber::X86, 1>(ctx, false, {1, 3, 2, 3}, {2, 1}); +} + +TEST(split_op, test_different_shape_axis2_batch1_cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + AnakinSliceTest<::anakin::saber::X86, 2>(ctx, false, {1, 3, 4, 2}, {2, 2}); } -// batch = 10, axis = 3, different shape -TEST(split_op, test_different_shape_axis3_batch10) { - AnakinSliceTest<3>({1, 3, 2, 3}, {2, 1}); + +TEST(split_op, test_different_shape_axis3_batch1_cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + AnakinSliceTest<::anakin::saber::X86, 3>(ctx, false, {1, 3, 2, 4}, {2, 2}); } } // namespace anakin @@ -107,4 +116,7 @@ TEST(split_op, test_different_shape_axis3_batch10) { } // namespace paddle USE_OP(split); +USE_CPU_ANAKIN_CONVERTER(split); +#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(split); +#endif diff --git a/paddle/fluid/inference/anakin/convert/test_sum_op.cc b/paddle/fluid/inference/anakin/convert/test_sum_op.cc index d6a59a0166be9239b480221cc076069239403429..8714890666c298c84a4542ec637e0edbe8918299 100644 --- a/paddle/fluid/inference/anakin/convert/test_sum_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_sum_op.cc @@ -22,10 +22,12 @@ namespace paddle { namespace inference { namespace anakin { -TEST(sum, native) { +template +static void test_sum_op(const platform::DeviceContext& context, bool use_gpu) { std::unordered_set parameters; framework::Scope scope; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); validator.DeclInputVar("sum_x1", {1, 2, 1, 2}); validator.DeclInputVar("sum_x2", {1, 2, 1, 2}); validator.DeclOutputVar("sum_out", {1, 2, 1, 2}); @@ -40,9 +42,26 @@ TEST(sum, native) { validator.Execute(1); } +#ifdef PADDLE_WITH_CUDA +TEST(sum_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_sum_op<::anakin::saber::NV>(ctx, true); +} +#endif + +TEST(sum_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_sum_op<::anakin::saber::X86>(ctx, false); +} + } // namespace anakin } // namespace inference } // namespace paddle USE_OP(sum); +USE_CPU_ANAKIN_CONVERTER(sum); +#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(sum); +#endif diff --git a/paddle/fluid/inference/anakin/convert/test_transpose_op.cc b/paddle/fluid/inference/anakin/convert/test_transpose_op.cc index 016ed26f02f782fe5032d8368f7767a5c94dfe9f..6b2f1ed1566d5c14f9c72bad1200c208af6f4aa3 100644 --- a/paddle/fluid/inference/anakin/convert/test_transpose_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_transpose_op.cc @@ -20,12 +20,12 @@ namespace paddle { namespace inference { namespace anakin { -TEST(transpose_op, test) { - auto* converter = Registry::Global().Lookup("transpose"); - ASSERT_TRUE(converter != nullptr); +template +void test_transpose1_op(const platform::DeviceContext& context, bool use_gpu) { std::unordered_set parameters; framework::Scope scope; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); validator.DeclInputVar("transpose-X", {2, 3, 4, 5}); validator.DeclOutputVar("transpose-Out", {4, 2, 5, 3}); @@ -43,11 +43,12 @@ TEST(transpose_op, test) { validator.Execute(3); } -// test input shape's dims < 4 -TEST(transpose_op, test2) { +template +void test_transpose2_op(const platform::DeviceContext& context, bool use_gpu) { std::unordered_set parameters; framework::Scope scope; - AnakinConvertValidation validator(parameters, &scope); + AnakinConvertValidation validator(parameters, &scope, context, + use_gpu); validator.DeclInputVar("transpose-X", {3, 4, 5}); validator.DeclOutputVar("transpose-Out", {3, 5, 4}); @@ -65,9 +66,38 @@ TEST(transpose_op, test2) { validator.Execute(1); } +#ifdef PADDLE_WITH_CUDA +TEST(transpose1_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_transpose1_op<::anakin::saber::NV>(ctx, true); +} + +TEST(transpose2_op, gpu) { + platform::CUDAPlace gpu_place(0); + platform::CUDADeviceContext ctx(gpu_place); + test_transpose2_op<::anakin::saber::NV>(ctx, true); +} +#endif + +TEST(transpose1_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_transpose2_op<::anakin::saber::X86>(ctx, false); +} + +TEST(transpose2_op, cpu) { + platform::CPUPlace cpu_place; + platform::CPUDeviceContext ctx(cpu_place); + test_transpose2_op<::anakin::saber::X86>(ctx, false); +} + } // namespace anakin } // namespace inference } // namespace paddle USE_OP(transpose); +USE_CPU_ANAKIN_CONVERTER(transpose); +#ifdef PADDLE_WITH_CUDA USE_ANAKIN_CONVERTER(transpose); +#endif diff --git a/paddle/fluid/inference/anakin/convert/transpose.cc b/paddle/fluid/inference/anakin/convert/transpose.cc index f35372fe5c315ec68bc80a6d03c5931899ff7555..cffc526065f8c81a1643ed2b726ee9e76268d207 100644 --- a/paddle/fluid/inference/anakin/convert/transpose.cc +++ b/paddle/fluid/inference/anakin/convert/transpose.cc @@ -17,20 +17,16 @@ #include #include -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; -using anakin::saber::NV; -using anakin::saber::Shape; using anakin::PTuple; namespace paddle { namespace inference { namespace anakin { -void TransposeOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::BlockDesc &block_desc, - const framework::Scope &scope, - bool test_mode) { +template +void TransposeOpConverter::operator()( + const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc, + const framework::Scope &scope, bool test_mode) { framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); @@ -38,7 +34,7 @@ void TransposeOpConverter::operator()(const framework::proto::OpDesc &op, auto input = op_desc.Input("X").front(); auto output = op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); - engine_->AddOp(op_name, "Permute", {input}, {output}); + this->engine_->AddOp(op_name, "Permute", {input}, {output}); auto axis = boost::get>(op_desc.GetAttr("axis")); size_t axis_size = axis.size(); @@ -46,11 +42,17 @@ void TransposeOpConverter::operator()(const framework::proto::OpDesc &op, axis.push_back(axis_size); axis_size += 1; } - engine_->AddOpAttr>(op_name, "dims", axis); + this->engine_->template AddOpAttr>(op_name, "dims", axis); } } // namespace anakin } // namespace inference } // namespace paddle -REGISTER_ANAKIN_OP_CONVERTER(transpose, TransposeOpConverter); +#ifdef PADDLE_WITH_CUDA +REGISTER_CUDA_ANAKIN_OP_CONVERTER(transpose, + TransposeOpConverter<::anakin::saber::NV>); +#endif + +REGISTER_CPU_ANAKIN_OP_CONVERTER(transpose, + TransposeOpConverter<::anakin::saber::X86>); diff --git a/paddle/fluid/inference/anakin/convert/transpose.h b/paddle/fluid/inference/anakin/convert/transpose.h index bacbf152bc12319e6296677500b17d55d9772412..54090468ae13c6b18f86a09fa52cd1faf01a73bb 100644 --- a/paddle/fluid/inference/anakin/convert/transpose.h +++ b/paddle/fluid/inference/anakin/convert/transpose.h @@ -20,7 +20,8 @@ namespace paddle { namespace inference { namespace anakin { -class TransposeOpConverter : public AnakinOpConverter { +template +class TransposeOpConverter : public AnakinOpConverter { public: TransposeOpConverter() = default; diff --git a/paddle/fluid/inference/anakin/convert/ut_helper.h b/paddle/fluid/inference/anakin/convert/ut_helper.h index a931efbcf4adf6373fe701ececc703c1435e21af..140a33a7cbb6fe56a5881bf4c284a8aec2fe0077 100644 --- a/paddle/fluid/inference/anakin/convert/ut_helper.h +++ b/paddle/fluid/inference/anakin/convert/ut_helper.h @@ -32,14 +32,8 @@ limitations under the License. */ #include "paddle/fluid/inference/utils/singleton.h" #include "paddle/fluid/platform/enforce.h" -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; using anakin::Precision; -using anakin::saber::NV; using anakin::saber::X86; -using anakin::saber::Shape; -using anakin::PBlock; -using anakin::PTuple; namespace paddle { namespace inference { @@ -55,8 +49,8 @@ float random(float low, float high) { return dist(mt); } -void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place, - const platform::DeviceContext& ctx) { +void RandomizeTensor(framework::LoDTensor* tensor, + const platform::Place& place) { auto dims = tensor->dims(); size_t num_elements = analysis::AccuDims(dims, dims.size()); PADDLE_ENFORCE_GT(num_elements, 0); @@ -78,17 +72,19 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place, * anakin * layer. */ +template class AnakinConvertValidation { - using AnakinNvEngineT = AnakinEngine; + using AnakinNvEngineT = AnakinEngine; public: AnakinConvertValidation() = delete; AnakinConvertValidation(const std::unordered_set& parameters, - framework::Scope* scope) - : parameters_(parameters), scope_(scope), place_(0) { - PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0); - engine_.reset(new AnakinEngine(true)); + framework::Scope* scope, + const platform::DeviceContext& ctx, + bool use_gpu = true) + : parameters_(parameters), scope_(scope), ctx_(ctx), use_gpu_(use_gpu) { + engine_.reset(new AnakinEngine(true)); } // Declare a Variable as input with random initialization. @@ -108,11 +104,10 @@ class AnakinConvertValidation { } void DeclVar(const std::string& name, const std::vector dim_vec) { - platform::CUDADeviceContext ctx(place_); auto* x = scope_->Var(name); auto* x_tensor = x->GetMutable(); x_tensor->Resize(framework::make_ddim(dim_vec)); - RandomizeTensor(x_tensor, place_, ctx); + RandomizeTensor(x_tensor, ctx_.GetPlace()); std::vector dim_vec_int64; for (auto& ele : dim_vec) { @@ -132,7 +127,7 @@ class AnakinConvertValidation { // should init anakin engine here. auto& block_desc = program_desc_.Block(framework::kRootBlockIndex); - Singleton::Global().ConvertOp( + Singleton>::Global().ConvertOp( desc, block_desc, parameters_, *scope_, engine_.get(), true /*test_mode*/); engine_->Freeze(); @@ -160,11 +155,8 @@ class AnakinConvertValidation { void Execute(int batch_size, std::unordered_set neglected_output = {}) { // Execute Fluid Op - platform::CUDADeviceContext ctx(place_); - op_->Run(*scope_, place_); + op_->Run(*scope_, ctx_.GetPlace()); - // std::vector input_vector; - // std::vector output_vector; std::map inputs; for (const auto& input : op_desc_->InputArgumentNames()) { if (parameters_.count(input)) continue; @@ -180,20 +172,27 @@ class AnakinConvertValidation { std::vector fluid_out; auto* var = scope_->FindVar(output); auto tensor = var->GetMutable(); - framework::TensorToVector(*tensor, ctx, &fluid_out); + framework::TensorToVector(*tensor, ctx_, &fluid_out); fluid_outputs.push_back(fluid_out); outputs.insert({output, tensor}); } - engine_->Execute(inputs, outputs, stream_); + if (!use_gpu_) { + engine_->Execute(inputs, outputs); + } else { + cudaStream_t stream; + PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream), 0); + engine_->Execute(inputs, outputs, stream); + } + int i_output = 0; for (const auto& output : op_desc_->OutputArgumentNames()) { if (neglected_output.count(output)) continue; std::vector anakin_out; auto* var = scope_->FindVar(output); auto tensor = var->GetMutable(); - framework::TensorToVector(*tensor, ctx, &anakin_out); + framework::TensorToVector(*tensor, ctx_, &anakin_out); size_t anakin_out_size = anakin_out.size(); auto fluid_out = fluid_outputs[i_output++]; @@ -205,15 +204,17 @@ class AnakinConvertValidation { private: std::unique_ptr engine_{nullptr}; - cudaStream_t stream_; std::unique_ptr op_; std::unique_ptr op_desc_; framework::ProgramDesc program_desc_; const std::unordered_set& parameters_; framework::Scope* scope_; - platform::CUDAPlace place_; + const platform::DeviceContext& ctx_; + bool use_gpu_{true}; }; +template class AnakinConvertValidation<::anakin::saber::NV>; +template class AnakinConvertValidation<::anakin::saber::X86>; } // namespace anakin } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc index 2b85d266cf0d51b91800dcc0047940894e3ae65b..17e661222433bb091260e45ded40eb3e0c23412a 100644 --- a/paddle/fluid/inference/anakin/engine.cc +++ b/paddle/fluid/inference/anakin/engine.cc @@ -69,11 +69,11 @@ void AnakinEngine::AddOp( } template -void AnakinEngine::Execute( - const std::map &inputs, - const std::map &outputs, - cudaStream_t stream) { +void AnakinEngine::BindInput( + const std::map &inputs) { +#ifdef PADDLE_WITH_CUDA cudaDeviceSynchronize(); +#endif for (const auto &input : inputs) { auto *tensor = input.second; auto *data = tensor->data(); @@ -105,6 +105,35 @@ void AnakinEngine::Execute( fluid_input_shape); anakin_input->copy_from(tmp_anakin_tensor); } +} + +template +void AnakinEngine::Execute( + const std::map &inputs, + const std::map &outputs) { + BindInput(inputs); + net_->prediction(); + for (const auto &output : outputs) { + platform::CPUPlace cpu_place; + auto *tensor = output.second; + auto *anakin_output = net_->get_out(output.first); + auto *anakin_data = anakin_output->data(); + auto anakin_output_shape = anakin_output->valid_shape(); + tensor->Resize(framework::make_ddim(anakin_output_shape)); + auto *fluid_data = tensor->mutable_data(cpu_place); + memory::Copy(cpu_place, static_cast(fluid_data), cpu_place, + static_cast(anakin_data), + tensor->numel() * sizeof(float)); + } +} + +#ifdef PADDLE_WITH_CUDA +template +void AnakinEngine::Execute( + const std::map &inputs, + const std::map &outputs, + cudaStream_t stream) { + BindInput(inputs); net_->prediction(); cudaDeviceSynchronize(); for (const auto &output : outputs) { @@ -121,6 +150,7 @@ void AnakinEngine::Execute( } cudaDeviceSynchronize(); } +#endif template void AnakinEngine::Freeze() { @@ -140,7 +170,15 @@ AnakinEngine::Clone() { return std::unique_ptr(engine); } +#ifdef PADDLE_WITH_CUDA template class AnakinEngine<::anakin::saber::NV, ::anakin::Precision::FP32>; +template class AnakinEngineManager<::anakin::saber::NV>; +#endif + +template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::FP32>; +template class AnakinEngineManager<::anakin::saber::X86>; + +// template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::FP32>; } // namespace anakin } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/anakin/engine.h b/paddle/fluid/inference/anakin/engine.h index 1325306557f2e73f9f54fc3dcb7b45a2f64c6b15..215c8a6c6146a28a7e21b930f321888fe1bf685e 100644 --- a/paddle/fluid/inference/anakin/engine.h +++ b/paddle/fluid/inference/anakin/engine.h @@ -32,7 +32,6 @@ #include "saber/saber_types.h" using anakin::Precision; -using anakin::saber::NV; namespace anakin { @@ -94,9 +93,16 @@ class AnakinEngine { void Save(std::string path) { graph_->save(path); } bool IsInit() { return initialized_; } int GetDevice() { return device_; } + void Execute(const std::map &inputs, + const std::map &outputs); +#ifdef PADDLE_WITH_CUDA void Execute(const std::map &inputs, const std::map &outputs, cudaStream_t stream); +#endif + + private: + void BindInput(const std::map &inputs); private: bool initialized_{false}; @@ -108,24 +114,25 @@ class AnakinEngine { std::vector program_inputs_; }; +template class AnakinEngineManager { - using AnakinNvEngineT = AnakinEngine; + using AnakinEngineT = AnakinEngine; public: bool HasEngine(const std::string &name) const { if (engines_.count(name) == 0) return false; return engines_.at(name).get() != nullptr; } - AnakinNvEngineT *Get(const std::string &name) const { + AnakinEngineT *Get(const std::string &name) const { return engines_.at(name).get(); } - AnakinNvEngineT *Create( - bool need_summary, int device, int max_batch_size, - std::map> max_input_shape, - std::vector program_inputs, std::string engine_name) { + AnakinEngineT *Create(bool need_summary, int device, int max_batch_size, + std::map> max_input_shape, + std::vector program_inputs, + std::string engine_name) { std::unique_lock lk(mut_); - auto *p = new AnakinEngine( + auto *p = new AnakinEngine( need_summary, device, max_batch_size, max_input_shape, program_inputs); engines_[engine_name].reset(p); return p; @@ -138,7 +145,7 @@ class AnakinEngineManager { } private: - std::unordered_map> engines_; + std::unordered_map> engines_; std::mutex mut_; }; } // namespace anakin diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index a736ca393ccb7168a9faf650a6bce13f35fffca8..37b7583fde29ccdf4e2e685fd48aaf7ed39cc323 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -64,20 +64,20 @@ struct Argument { bool Has(const std::string& key) const { return valid_fields_.count(key); } -#define DECL_ARGUMENT_FIELD(field__, Field, type__) \ - public: \ - type__& field__() { \ - PADDLE_ENFORCE(Has(#field__)); \ - return field__##_; \ - } \ - void Set##Field(const type__& x) { \ - field__##_ = x; \ - valid_fields_.insert(#field__); \ - } \ - DECL_ARGUMENT_FIELD_VALID(field__); \ - type__* field__##_ptr() { return &field__##_; } \ - \ - private: \ +#define DECL_ARGUMENT_FIELD(field__, Field, type__) \ + public: \ + type__& field__() { \ + PADDLE_ENFORCE(Has(#field__), "There is no such field"); \ + return field__##_; \ + } \ + void Set##Field(const type__& x) { \ + field__##_ = x; \ + valid_fields_.insert(#field__); \ + } \ + DECL_ARGUMENT_FIELD_VALID(field__); \ + type__* field__##_ptr() { return &field__##_; } \ + \ + private: \ type__ field__##_; #define DECL_ARGUMENT_FIELD_VALID(field__) \ diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 78e502c670f0eb2480b560964cf31e247990a367..bbc3938969a6d430645835ebfe5584f84a1a29e0 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -114,6 +114,7 @@ void IRPassManager::CreatePasses(Argument *argument, if (pass_name == "anakin_subgraph_pass") { pass->Set("program", new framework::ProgramDesc *(&argument->main_program())); + pass->Set("use_gpu", new bool(argument->use_gpu())); pass->Set("gpu_device_id", new int(argument->gpu_device_id())); pass->Set("model_from_memory", new bool(argument->model_from_memory())); pass->Set("engine_opt_info", new std::map( diff --git a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc index cbf883a8a5ff1fa2eda5907bf3ce114044dedb62..658006c22cd842abf80808fbfc976b6bb78d68d6 100644 --- a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc @@ -194,20 +194,49 @@ void AnakinSubgraphPass::CreateAnakinOp( auto max_batch_size = Get("max_batch_size"); auto program_inputs = program_desc->GetFeedTargetNames(); - auto *anakin_engine = - inference::Singleton::Global().Create( - true, Get("gpu_device_id"), max_batch_size, max_input_shape, - program_inputs, engine_key); + bool use_gpu = Get("use_gpu"); + SetAttr(op_desc->Proto(), "use_gpu", use_gpu); + + if (use_gpu) { +#ifdef PADDLE_WITH_CUDA + inference::Singleton< + anakin::AnakinEngineManager<::anakin::saber::NV>>::Global() + .Create(true, Get("gpu_device_id"), max_batch_size, + max_input_shape, program_inputs, engine_key); +#endif + } else { + inference::Singleton< + anakin::AnakinEngineManager<::anakin::saber::X86>>::Global() + .Create(true, Get("gpu_device_id"), max_batch_size, + max_input_shape, program_inputs, engine_key); + } auto *scope = param_scope(); std::unordered_set param_set(params.begin(), params.end()); framework::BlockDesc block_desc_temp(nullptr, block_desc.Proto()); - - inference::Singleton::Global() - .ConvertBlockToAnakinEngine( - &block_desc_temp, scope, - std::vector(input_names.begin(), input_names.end()), - param_set, output_mapping, anakin_engine); + if (use_gpu) { + auto *anakin_engine = + inference::Singleton>::Global() + .Get(engine_key); + inference::Singleton< + inference::anakin::AnakinOpConverter<::anakin::saber::NV>>::Global() + .ConvertBlockToAnakinEngine( + &block_desc_temp, scope, + std::vector(input_names.begin(), input_names.end()), + param_set, output_mapping, anakin_engine); + } else { + auto *anakin_engine = + inference::Singleton>::Global() + .Get(engine_key); + inference::Singleton< + inference::anakin::AnakinOpConverter<::anakin::saber::X86>>::Global() + .ConvertBlockToAnakinEngine( + &block_desc_temp, scope, + std::vector(input_names.begin(), input_names.end()), + param_set, output_mapping, anakin_engine); + } } } // namespace analysis diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 9c80b7a839a6bf326ae745e6a4e43d154637c421..882bb3468388e794e975d87de73537ac41f17cf7 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -70,4 +70,3 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI anakin_target(inference_anakin_api) anakin_target(inference_anakin_api_shared) endif() -inference_analysis_test(faster_rcnn_test SRCS faster_rcnn_test.cc EXTRA_DEPS paddle_fluid) diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 0109b4a4fa7617880f642f5a39639bca38475515..4f9e0b639564a362b241fdd9e0399bae4c871910 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -268,9 +268,11 @@ void AnalysisConfig::Update() { PADDLE_ENFORCE(!use_tensorrt_, "Anakin sub-graph and TensorRT sub-graph are not allowed to " "run at the same time!"); - PADDLE_ENFORCE( - use_gpu_, - "Anakin sub-graph engine need gpu, please use the EnableGpu API."); + if (use_gpu_) { + LOG(INFO) << "Run Anakin GPU mode"; + } else { + LOG(INFO) << "Run Anakin CPU mode"; + } pass_builder()->ClearPasses(); for (const auto &pass : kAnakinSubgraphPasses) { diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index e5991af4f7bfe5e8ee89cac2b0ad534de2996d65..231beab641a9db0133f60f112606e2ba14fbecc6 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -382,7 +382,7 @@ void AnalysisPredictor::PrepareArgument() { argument_.SetTensorRtUseStaticEngine(config_.trt_use_static_engine_); } - if (config_.use_gpu() && config_.anakin_engine_enabled()) { + if (config_.anakin_engine_enabled()) { argument_.SetAnakinMaxBatchSize(config_.anakin_max_batchsize_); argument_.SetAnakinMaxInputShape(config_.anakin_max_input_shape_); argument_.SetAnakinMinSubgraphSize(config_.anakin_min_subgraph_size_); diff --git a/paddle/fluid/operators/anakin/anakin_engine_op.h b/paddle/fluid/operators/anakin/anakin_engine_op.h index e4feb14b2271a50c8e8fb7ce4c81dd6c99042e21..99c5a6dc84a094b66d02a89c829eea2753480154 100644 --- a/paddle/fluid/operators/anakin/anakin_engine_op.h +++ b/paddle/fluid/operators/anakin/anakin_engine_op.h @@ -34,28 +34,16 @@ limitations under the License. */ namespace paddle { namespace operators { -using FluidDT = framework::proto::VarType_Type; using inference::Singleton; - -using anakin::graph::GraphGlobalMem; -using anakin::AK_FLOAT; -using anakin::Precision; -using anakin::saber::NV; -using anakin::saber::X86; -using anakin::saber::Shape; -using anakin::PBlock; -using anakin::PTuple; using inference::anakin::AnakinEngine; class AnakinEngineOp : public framework::OperatorBase { - using AnakinNvEngineT = AnakinEngine; - private: std::vector input_names_; std::unordered_set param_names_; - mutable AnakinNvEngineT *anakin_engine_; std::string engine_key_; std::string engine_serialized_data_; + bool use_gpu_; public: AnakinEngineOp(const std::string &type, @@ -66,10 +54,10 @@ class AnakinEngineOp : public framework::OperatorBase { input_names_ = Inputs("Xs"); engine_key_ = Attr("engine_key"); auto params = Attr>("parameters"); + use_gpu_ = Attr("use_gpu"); for (const auto ¶m : params) { param_names_.insert(param); } - anakin_engine_ = nullptr; } protected: @@ -80,7 +68,6 @@ class AnakinEngineOp : public framework::OperatorBase { void RunAnakin(const framework::Scope &scope, const platform::Place &dev_place) const { - auto *engine = GetEngine(scope, dev_place); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(dev_place); auto stream = @@ -92,7 +79,6 @@ class AnakinEngineOp : public framework::OperatorBase { Attr>("output_name_mapping"); std::map inputs; - // Convert input tensor from fluid to engine. for (const auto &x : Inputs("Xs")) { if (param_names_.count(x)) continue; auto &t = @@ -110,17 +96,21 @@ class AnakinEngineOp : public framework::OperatorBase { outputs.insert({output_maps[output_index], fluid_t}); output_index += 1; } - engine->Execute(inputs, outputs, stream); - } - - AnakinNvEngineT *GetEngine(const framework::Scope &scope, - const platform::Place &dev_place) const { - if (anakin_engine_ == nullptr) { - anakin_engine_ = - inference::Singleton::Global() + if (use_gpu_) { +#ifdef PADDLE_WITH_CUDA + auto *engine = + inference::Singleton>::Global() + .Get(engine_key_); + engine->Execute(inputs, outputs, stream); +#endif + } else { + auto *engine = + inference::Singleton>::Global() .Get(engine_key_); + engine->Execute(inputs, outputs); } - return anakin_engine_; } };