diff --git a/lite/kernels/xpu/bridges/CMakeLists.txt b/lite/kernels/xpu/bridges/CMakeLists.txt index 8cff79f23fd97d5dd5590da9ea576f3d2b293925..ebddd36451f6d78ce52115c6a6ef9e47e044fd00 100644 --- a/lite/kernels/xpu/bridges/CMakeLists.txt +++ b/lite/kernels/xpu/bridges/CMakeLists.txt @@ -4,14 +4,23 @@ set(xpu_bridge_deps xpu_bridge_registry xpu_builder op) lite_cc_library(xpu_bridge_act_op SRCS act_op.cc DEPS ${xpu_bridge_deps}) lite_cc_library(xpu_bridge_conv_op SRCS conv_op.cc DEPS ${xpu_bridge_deps}) +lite_cc_library(xpu_bridge_elementwise_ops SRCS elementwise_ops.cc DEPS ${xpu_bridge_deps}) +lite_cc_library(xpu_bridge_pool_op SRCS pool_op.cc DEPS ${xpu_bridge_deps}) +lite_cc_library(xpu_bridge_softmax_op SRCS softmax_op.cc DEPS ${xpu_bridge_deps}) set(xpu_bridges xpu_bridge_registry xpu_bridge_act_op xpu_bridge_conv_op + xpu_bridge_elementwise_ops + xpu_bridge_pool_op + xpu_bridge_softmax_op CACHE INTERNAL "xpu_bridges") set(xpu_bridge_test_deps ${xpu_bridges} ${xpu_kernels} ${ops}) lite_cc_test(test_xpu_bridge_act_op SRCS act_op_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps}) lite_cc_test(test_xpu_bridge_conv_op SRCS conv_op_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps}) +lite_cc_test(test_xpu_bridge_elementwise_ops SRCS elementwise_ops_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps}) +lite_cc_test(test_xpu_bridge_pool_op SRCS pool_op_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps}) +lite_cc_test(test_xpu_bridge_softmax_op SRCS softmax_op_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps}) diff --git a/lite/kernels/xpu/bridges/elementwise_ops.cc b/lite/kernels/xpu/bridges/elementwise_ops.cc new file mode 100644 index 0000000000000000000000000000000000000000..07371bee2c6710ff2522d8965279dbd0d3c93430 --- /dev/null +++ b/lite/kernels/xpu/bridges/elementwise_ops.cc @@ -0,0 +1,77 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/backends/xpu/builder.h" +#include "lite/kernels/xpu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace xpu { +namespace bridges { + +node_map_type ElementwiseConverter(const std::shared_ptr op, + graph_ctx_type* graph_ctx, + const node_map_type& input_nodes) { + auto scope = op->scope(); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto unique_op_type = lite::xpu::UniqueName(op_type); + LOG(INFO) << "[XPU] Converting " + op_type + "..."; + + // check context + CHECK(graph_ctx != nullptr); + CHECK(graph_ctx->builder != nullptr); + CHECK(graph_ctx->params != nullptr); + + // get input, and attributes + auto x_var_name = op_info->Input("X").front(); + auto y_var_name = op_info->Input("Y").front(); + CHECK(input_nodes.count(x_var_name)); + CHECK(input_nodes.count(y_var_name)); + auto axis = op_info->GetAttr("axis"); + auto x_dims = scope->FindTensor(x_var_name)->dims(); + auto y_dims = scope->FindTensor(y_var_name)->dims(); + + // create elementwise node and set input, attributes + std::shared_ptr elementwise_node = nullptr; + if (y_dims.size() == 1) { + elementwise_node = + std::make_shared(graph_ctx->builder->CreateBiasAdd( + *input_nodes.at(x_var_name), *input_nodes.at(y_var_name), axis)); + } else if (x_dims.size() == y_dims.size()) { + elementwise_node = + std::make_shared(graph_ctx->builder->CreateBinaryOp( + "add", *input_nodes.at(x_var_name), *input_nodes.at(y_var_name))); + } else { + LOG(ERROR) << "XPU elementwise_add only support y of one dimension, or x " + "and y of the same dimension. But recieved x's dimension: " + << x_dims << ", y's dimension: " << y_dims << ", axis: " << axis; + } + graph_ctx->builder->SetLayer(unique_op_type); + + // output converted nodes + node_map_type output_nodes; + output_nodes[op_info->Output("Out").front()] = elementwise_node; + return output_nodes; +} + +} // namespace bridges +} // namespace xpu +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_XPU_BRIDGE(elementwise_add, + paddle::lite::kernels::xpu::bridges::ElementwiseConverter); diff --git a/lite/kernels/xpu/bridges/elementwise_ops_test.cc b/lite/kernels/xpu/bridges/elementwise_ops_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..2abda822e3ae380ad376e92db99b5ad204a2a2a4 --- /dev/null +++ b/lite/kernels/xpu/bridges/elementwise_ops_test.cc @@ -0,0 +1,188 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/elementwise_ops.h" +#include +#include +#include "lite/core/op_registry.h" +#include "lite/kernels/xpu/bridges/registry.h" +#include "lite/kernels/xpu/bridges/test_helper.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace xpu { +namespace bridges { + +template +void elementwise_add_ref(const std::shared_ptr op) { + Scope* scope = op->scope(); + const OpInfo* op_info = op->op_info(); + auto x = scope->FindVar(op_info->Input("X").front())->GetMutable(); + auto y = scope->FindVar(op_info->Input("Y").front())->GetMutable(); + auto out = + scope->FindVar(op_info->Output("Out").front())->GetMutable(); + + auto x_data = x->data(); + auto y_data = y->data(); + dtype* out_data = out->mutable_data(); + + auto x_dims = x->dims(); + auto y_dims = y->dims(); + int axis = op_info->GetAttr("axis"); + + if (axis < 0) { + axis = x_dims.size() - y_dims.size(); + } + int batch = 1; + int channels = 1; + int num = 1; + for (int i = 0; i < axis; ++i) { + batch *= x_dims[i]; + } + for (int i = 0; i < y_dims.size(); ++i) { + channels *= y_dims[i]; + } + for (int i = y_dims.size() + axis; i < x_dims.size(); ++i) { + num *= x_dims[i]; + } + // do elementwise add/sub/max... + std::string elt_type = "add"; + if (elt_type == "add") { + for (int i = 0; i < batch; ++i) { + for (int j = 0; j < channels; ++j) { + int offset = (i * channels + j) * num; + const dtype* din_ptr = x_data + offset; + const dtype diny_data = y_data[j]; + dtype* dout_ptr = out_data + offset; + for (int k = 0; k < num; ++k) { + *dout_ptr = *din_ptr + diny_data; + dout_ptr++; + din_ptr++; + } + } + } + } else if (elt_type == "sub") { + for (int i = 0; i < batch; ++i) { + for (int j = 0; j < channels; ++j) { + int offset = (i * channels + j) * num; + const dtype* din_ptr = x_data + offset; + const dtype diny_data = y_data[j]; + dtype* dout_ptr = out_data + offset; + for (int k = 0; k < num; ++k) { + *dout_ptr = *din_ptr - diny_data; + dout_ptr++; + din_ptr++; + } + } + } + } else if (elt_type == "mul") { + for (int i = 0; i < batch; ++i) { + for (int j = 0; j < channels; ++j) { + int offset = (i * channels + j) * num; + const dtype* din_ptr = x_data + offset; + const dtype diny_data = y_data[j]; + dtype* dout_ptr = out_data + offset; + for (int k = 0; k < num; ++k) { + *dout_ptr = *din_ptr * diny_data; + dout_ptr++; + din_ptr++; + } + } + } + } else if (elt_type == "max") { + for (int i = 0; i < batch; ++i) { + for (int j = 0; j < channels; ++j) { + int offset = (i * channels + j) * num; + const dtype* din_ptr = x_data + offset; + const dtype diny_data = y_data[j]; + dtype* dout_ptr = out_data + offset; + for (int k = 0; k < num; ++k) { + *dout_ptr = std::max(*din_ptr, diny_data); + dout_ptr++; + din_ptr++; + } + } + } + } else { + LOG(FATAL) << "unsupported Elementwise type: " << elt_type; + } +} + +void test_elementwise_add(std::vector x_dims, + std::vector y_dims, + int axis) { + // prepare input&output variables + Scope scope; + std::string x_var_name = "x"; + std::string y_var_name = "y"; + std::string out_var_name = "out"; + std::string out_ref_var_name = "out_ref"; + auto* x = scope.Var(x_var_name)->GetMutable(); + auto* y = scope.Var(y_var_name)->GetMutable(); + auto* out = scope.Var(out_var_name)->GetMutable(); + auto* out_ref = scope.Var(out_ref_var_name)->GetMutable(); + x->Resize(x_dims); + if (y_dims.size() == 0) { + y->Resize(x_dims); + } else { + y->Resize(y_dims); + } + + // initialize input&output data + FillTensor(x); + FillTensor(y); + + // initialize op desc + cpp::OpDesc opdesc; + opdesc.SetType("elementwise_add"); + opdesc.SetInput("X", {x_var_name}); + opdesc.SetInput("Y", {y_var_name}); + opdesc.SetOutput("Out", {out_var_name}); + opdesc.SetAttr("axis", axis); + + // create and convert op to XPU model, then run it on XPU + auto op = CreateOp(opdesc, &scope); + LauchOp(op, {x_var_name, y_var_name}, {out_var_name}); + out_ref->CopyDataFrom(*out); + + // execute reference implementation and save to output tensor + elementwise_add_ref(op); + + // compare results + auto* out_data = out->mutable_data(); + auto* out_ref_data = out_ref->mutable_data(); + for (int i = 0; i < out->dims().production(); i++) { + EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5); + } +} + +// xpu's bias_add only support y with one dimension +TEST(XPUBridges, elementwise_add) { + test_elementwise_add({1, 2, 3, 4}, {1}, 0); + test_elementwise_add({1, 2, 3, 4}, {2}, 1); + test_elementwise_add({2, 2, 3, 4}, {3}, 2); + test_elementwise_add({2, 2, 3, 4}, {4}, 3); + test_elementwise_add({2, 2, 3, 4}, {4}, -1); + test_elementwise_add({2, 2, 3, 4}, {}, -1); +} + +} // namespace bridges +} // namespace xpu +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_OP(elementwise_add); +USE_XPU_BRIDGE(elementwise_add); diff --git a/lite/kernels/xpu/bridges/pool_op.cc b/lite/kernels/xpu/bridges/pool_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..fbc6a9919c446508afa5a3b8a1c35352f9b8ecfa --- /dev/null +++ b/lite/kernels/xpu/bridges/pool_op.cc @@ -0,0 +1,97 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/backends/xpu/builder.h" +#include "lite/kernels/xpu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace xpu { +namespace bridges { + +node_map_type PoolConverter(const std::shared_ptr op, + graph_ctx_type* graph_ctx, + const node_map_type& input_nodes) { + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto unique_op_type = lite::xpu::UniqueName(op_type); + LOG(INFO) << "[XPU] Converting " + op_type + "..."; + + // check context + CHECK(graph_ctx != nullptr); + CHECK(graph_ctx->builder != nullptr); + CHECK(graph_ctx->params != nullptr); + + // get input, and attributes + auto x_var_name = op_info->Input("X").front(); + auto pooling_type = op_info->GetAttr("pooling_type"); + auto ceil_mode = op_info->GetAttr("ceil_mode"); + auto paddings = op_info->GetAttr>("paddings"); + auto global_pooling = op_info->GetAttr("global_pooling"); + auto ksize = op_info->GetAttr>("ksize"); + auto strides = op_info->GetAttr>("strides"); + auto exclusive = op_info->GetAttr("exclusive"); + + // create pool node and set params from op + CHECK(input_nodes.count(x_var_name)); + std::shared_ptr pool_node = nullptr; + if (pooling_type == "max") { + if (global_pooling) { + pool_node = std::make_shared( + graph_ctx->builder->CreateGlobalMaxPool2D( + *input_nodes.at(x_var_name))); + } else { + pool_node = std::make_shared( + graph_ctx->builder->CreateMaxPool2D(*input_nodes.at(x_var_name), + lite::xpu::CvtShape(ksize), + lite::xpu::CvtShape(strides), + lite::xpu::CvtShape(paddings), + "NCHW", + ceil_mode)); + } + } else if (pooling_type == "avg") { + if (global_pooling) { + pool_node = std::make_shared( + graph_ctx->builder->CreateGlobalAvgPool2D( + *input_nodes.at(x_var_name))); + } else { + pool_node = std::make_shared( + // !exclusive ---> count_include_pad + graph_ctx->builder->CreateAvgPool2D(*input_nodes.at(x_var_name), + lite::xpu::CvtShape(ksize), + lite::xpu::CvtShape(strides), + lite::xpu::CvtShape(paddings), + "NCHW", + ceil_mode, + !exclusive)); + } + } else { + LOG(FATAL) << "Unsupported pooling type: " << pooling_type; + } + graph_ctx->builder->SetLayer(unique_op_type); + + // output converted nodes + node_map_type output_nodes; + output_nodes[op_info->Output("Out").front()] = pool_node; + return output_nodes; +} + +} // namespace bridges +} // namespace xpu +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_XPU_BRIDGE(pool2d, paddle::lite::kernels::xpu::bridges::PoolConverter); diff --git a/lite/kernels/xpu/bridges/pool_op_test.cc b/lite/kernels/xpu/bridges/pool_op_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..512d59feb1340bcaa485d9290886cf5d58a878cf --- /dev/null +++ b/lite/kernels/xpu/bridges/pool_op_test.cc @@ -0,0 +1,267 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/pool_op.h" +#include +#include "lite/core/op_registry.h" +#include "lite/kernels/xpu/bridges/registry.h" +#include "lite/kernels/xpu/bridges/test_helper.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace xpu { +namespace bridges { + +void pool_ref(const std::shared_ptr op) { + Scope* scope = op->scope(); + const OpInfo* op_info = op->op_info(); + auto x = scope->FindVar(op_info->Input("X").front())->GetMutable(); + auto out = + scope->FindVar(op_info->Output("Out").front())->GetMutable(); + auto& in_dims = x->dims(); + auto& out_dims = out->dims(); + + const float* src_ptr = x->data(); + float* dst_ptr = out->mutable_data(); + + std::vector ksize = op_info->GetAttr>("ksize"); + std::vector strides = op_info->GetAttr>("strides"); + std::vector paddings = op_info->GetAttr>("paddings"); + bool exclusive = op_info->GetAttr("exclusive"); + std::string pooling_type = op_info->GetAttr("pooling_type"); + bool global_pooling = op_info->GetAttr("global_pooling"); + + int in_n = in_dims[0]; + int in_c = in_dims[1]; + int in_h = in_dims[2]; + int in_w = in_dims[3]; + int size_in_n = in_c * in_h * in_w; + int size_in_c = in_h * in_w; + + int out_h = out_dims[2]; + int out_w = out_dims[3]; + int size_out_n = in_c * out_h * out_w; + int size_out_c = out_h * out_w; + + int window_h = ksize[0]; + int window_w = ksize[1]; + int stride_h = strides[0]; + int stride_w = strides[1]; + int pad_h = paddings[0]; + int pad_w = paddings[1]; + + if (global_pooling == true) { + for (int n = 0; n < in_n; ++n) { + for (int c = 0; c < in_c; ++c) { + const float* src = src_ptr + n * size_in_n + c * size_in_c; + float res = src[0]; + if (pooling_type == "max") { + for (int i = 1; i < size_in_c; ++i) { + float cur_val = src[i]; + res = cur_val > res ? cur_val : res; + } + } else if (pooling_type == "avg") { + for (int i = 1; i < size_in_c; ++i) { + float cur_val = src[i]; + res += cur_val; + } + res /= size_in_c; + } + dst_ptr[n * size_out_n + c] = res; + } + } + } else { + for (int n = 0; n < in_n; ++n) { + for (int c = 0; c < in_c; ++c) { + for (int h = 0; h < out_h; ++h) { + int sh = h * stride_h; + int eh = sh + window_h; + sh = (sh - pad_h) < 0 ? 0 : sh - pad_h; + eh = (eh - pad_h) > in_h ? in_h : eh - pad_h; + for (int w = 0; w < out_w; ++w) { + int sw = w * stride_w; + int ew = sw + window_w; + sw = (sw - pad_w) < 0 ? 0 : sw - pad_w; + ew = (ew - pad_w) > in_w ? in_w : ew - pad_w; + int pooling_size = (ew - sw) * (eh - sh); + if (pooling_size == 0) continue; + float res = 0.f; + for (int kh = sh; kh < eh; ++kh) { + for (int kw = sw; kw < ew; ++kw) { + int src_idx = n * size_in_n + c * size_in_c + kh * in_w + kw; + if (kh == sh && kw == sw) { + res = src_ptr[src_idx]; + } else { + if (pooling_type == "max") { + res = res >= src_ptr[src_idx] ? res : src_ptr[src_idx]; + } + if (pooling_type == "avg") { + res += src_ptr[src_idx]; + } + } + } + } + if (pooling_type == "avg") { + if (exclusive) { + res /= pooling_size; + } else { + res /= window_h * window_w; + } + } + dst_ptr[n * size_out_n + c * size_out_c + h * out_w + w] = res; + } + } + } + } + } +} + +void test_pool(int bs, + int ic, + int ih, + int iw, + std::string pooling_type, + bool ceil_mode, + bool global_pooling, + bool exclusive, + int ksize, + int stride, + int padding) { + // prepare input&output variables + Scope scope; + std::string x_var_name = "x"; + std::string out_var_name = "out"; + std::string out_ref_var_name = "out_ref"; + auto* x = scope.Var(x_var_name)->GetMutable(); + auto* out = scope.Var(out_var_name)->GetMutable(); + auto* out_ref = scope.Var(out_ref_var_name)->GetMutable(); + x->Resize({bs, ic, ih, iw}); + + // initialize input&output data + FillTensor(x); + + // initialize op desc + cpp::OpDesc opdesc; + opdesc.SetType("pool2d"); + opdesc.SetInput("X", {x_var_name}); + opdesc.SetOutput("Out", {out_var_name}); + opdesc.SetAttr("pooling_type", pooling_type); + opdesc.SetAttr("ksize", std::vector({ksize, ksize})); + opdesc.SetAttr("global_pooling", global_pooling); + opdesc.SetAttr("exclusive", exclusive); + opdesc.SetAttr("strides", std::vector({stride, stride})); + opdesc.SetAttr("paddings", std::vector({padding, padding})); + opdesc.SetAttr("ceil_mode", ceil_mode); + + // create and convert op to XPU model, then run it on XPU + auto op = CreateOp(opdesc, &scope); + LauchOp(op, {x_var_name}, {out_var_name}); + out_ref->CopyDataFrom(*out); + + // execute reference implementation and save to output tensor + pool_ref(op); + + // compare results + auto* out_data = out->mutable_data(); + auto* out_ref_data = out_ref->mutable_data(); + for (int i = 0; i < out->dims().production(); i++) { + EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5); + } +} + +TEST(NPUBridges, pool) { + for (auto pooling_type : {"max", "avg"}) { + for (auto bs : {1, 3}) { + for (auto ic : {2}) { + for (auto ih : {3}) { + for (auto iw : {4}) { + test_pool(bs, ic, ih, iw, pooling_type, true, true, true, 0, 1, 0); + } + } + } + } + } + + for (auto pooling_type : {"max"}) { + for (auto ceil_mode : {true, false}) { + for (auto ksize : {2, 3}) { + for (auto stride : {1, 2}) { + for (auto padding : {0, 1}) { + for (auto bs : {1, 3}) { + for (auto ic : {2}) { + for (auto ih : {3}) { + for (auto iw : {4}) { + test_pool(bs, + ic, + ih, + iw, + pooling_type, + ceil_mode, + false, + true, + ksize, + stride, + padding); + } + } + } + } + } + } + } + } + } + + for (auto pooling_type : {"avg"}) { + for (auto ceil_mode : {true, false}) { + for (auto exclusive : {true, false}) { + for (auto ksize : {2, 3}) { + for (auto stride : {1, 2}) { + for (auto padding : {0, 1}) { + for (auto bs : {1, 3}) { + for (auto ic : {2}) { + for (auto ih : {3}) { + for (auto iw : {4}) { + test_pool(bs, + ic, + ih, + iw, + pooling_type, + ceil_mode, + false, + exclusive, + ksize, + stride, + padding); + } + } + } + } + } + } + } + } + } + } +} + +} // namespace bridges +} // namespace xpu +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_OP(pool2d); +USE_XPU_BRIDGE(pool2d); diff --git a/lite/kernels/xpu/bridges/softmax_op.cc b/lite/kernels/xpu/bridges/softmax_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..3972496762a1d399ab59e7a69b0e9e18a9c28300 --- /dev/null +++ b/lite/kernels/xpu/bridges/softmax_op.cc @@ -0,0 +1,61 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/backends/xpu/builder.h" +#include "lite/kernels/xpu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace xpu { +namespace bridges { + +node_map_type SoftmaxConverter(const std::shared_ptr op, + graph_ctx_type* graph_ctx, + const node_map_type& input_nodes) { + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto unique_op_type = lite::xpu::UniqueName(op_type); + LOG(INFO) << "[XPU] Converting " + op_type + "..."; + + // check context + CHECK(graph_ctx != nullptr); + CHECK(graph_ctx->builder != nullptr); + CHECK(graph_ctx->params != nullptr); + + // get op's attributes + auto x_var_name = op_info->Input("X").front(); + auto axis = op_info->GetAttr("axis"); + + // create softmax node and set params from ops + CHECK(input_nodes.count(x_var_name)); + std::shared_ptr softmax_node = nullptr; + softmax_node = std::make_shared( + graph_ctx->builder->CreateSoftmax(*input_nodes.at(x_var_name), axis)); + graph_ctx->builder->SetLayer(unique_op_type); + + // output converted nodes + node_map_type output_nodes; + output_nodes[op_info->Output("Out").front()] = softmax_node; + return output_nodes; +} + +} // namespace bridges +} // namespace xpu +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_XPU_BRIDGE(softmax, + paddle::lite::kernels::xpu::bridges::SoftmaxConverter); diff --git a/lite/kernels/xpu/bridges/softmax_op_test.cc b/lite/kernels/xpu/bridges/softmax_op_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..ee9a44acd5b8fec2e3df4d7bc4034808fc2b0b45 --- /dev/null +++ b/lite/kernels/xpu/bridges/softmax_op_test.cc @@ -0,0 +1,134 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/softmax_op.h" +#include +#include "lite/core/op_registry.h" +#include "lite/kernels/xpu/bridges/registry.h" +#include "lite/kernels/xpu/bridges/test_helper.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace xpu { +namespace bridges { + +template +void softmax_ref(const std::shared_ptr op) { + Scope* scope = op->scope(); + const OpInfo* op_info = op->op_info(); + auto x = scope->FindVar(op_info->Input("X").front())->GetMutable(); + auto out = + scope->FindVar(op_info->Output("Out").front())->GetMutable(); + auto x_data = x->data(); + auto out_data = out->mutable_data(); + DDim x_dims = x->dims(); + + auto x_rank = x_dims.size(); + int axis = op_info->GetAttr("axis"); + if (axis < 0) { + axis += x_rank; + } + int axis_size = x_dims[axis]; + int outer_num = x_dims.Slice(0, axis).production(); + int inner_num = x_dims.Slice(axis + 1, x_rank).production(); + int compute_size = outer_num * inner_num; + for (int i = 0; i < compute_size; i++) { + int idx_inner = i % inner_num; + int idx_outer = (i / inner_num) * axis_size; + int start = idx_outer * inner_num + idx_inner; + int offset; + + offset = start; + dtype max_data = std::numeric_limits::lowest(); + for (int j = 0; j < axis_size; j++) { + max_data = x_data[offset] > max_data ? x_data[offset] : max_data; + offset += inner_num; + } + + offset = start; + dtype sum_data = (dtype)0; + for (int j = 0; j < axis_size; j++) { + out_data[offset] = exp(x_data[offset] - max_data); + sum_data += out_data[offset]; + offset += inner_num; + } + + offset = start; + for (int j = 0; j < axis_size; j++) { + out_data[offset] /= sum_data; + offset += inner_num; + } + } +} + +void test_softmax(int bs, int ic, int ih, int iw, int axis) { + // prepare input&output variables + Scope scope; + std::string x_var_name = "x"; + std::string out_var_name = "out"; + std::string out_ref_var_name = "out_ref"; + auto* x = scope.Var(x_var_name)->GetMutable(); + auto* out = scope.Var(out_var_name)->GetMutable(); + auto* out_ref = scope.Var(out_ref_var_name)->GetMutable(); + x->Resize({bs, ic, ih, iw}); + + // initialize input&output data + FillTensor(x); + + // initialize op desc + cpp::OpDesc opdesc; + opdesc.SetType("softmax"); + opdesc.SetInput("X", {x_var_name}); + opdesc.SetOutput("Out", {out_var_name}); + opdesc.SetAttr("axis", axis); + + // create and convert op to XPU model, then run it on XPU + auto op = CreateOp(opdesc, &scope); + LauchOp(op, {x_var_name}, {out_var_name}); + out_ref->CopyDataFrom(*out); + + // execute reference implementation and save to output tensor + softmax_ref(op); + + // compare results + auto* out_data = out->mutable_data(); + auto* out_ref_data = out_ref->mutable_data(); + for (int i = 0; i < out->dims().production(); i++) { + EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5); + } +} + +TEST(NPUBridges, softmax) { + for (auto bs : {2, 3}) { + for (auto ic : {4}) { + for (auto ih : {5}) { + for (auto iw : {6}) { + for (auto axis : {-3, -1, 0, 1, 2, 3}) { + test_softmax(bs, ic, ih, iw, axis); + } + } + } + } + } +} + +} // namespace bridges +} // namespace xpu +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_OP(softmax); +USE_XPU_BRIDGE(softmax);