diff --git a/lite/kernels/xpu/bridges/CMakeLists.txt b/lite/kernels/xpu/bridges/CMakeLists.txt index ebddd36451f6d78ce52115c6a6ef9e47e044fd00..a1f7b67be3b0b1798ea50daa6638873500786912 100644 --- a/lite/kernels/xpu/bridges/CMakeLists.txt +++ b/lite/kernels/xpu/bridges/CMakeLists.txt @@ -7,6 +7,7 @@ lite_cc_library(xpu_bridge_conv_op SRCS conv_op.cc DEPS ${xpu_bridge_deps}) lite_cc_library(xpu_bridge_elementwise_ops SRCS elementwise_ops.cc DEPS ${xpu_bridge_deps}) lite_cc_library(xpu_bridge_pool_op SRCS pool_op.cc DEPS ${xpu_bridge_deps}) lite_cc_library(xpu_bridge_softmax_op SRCS softmax_op.cc DEPS ${xpu_bridge_deps}) +lite_cc_library(xpu_bridge_mul_op SRCS mul_op.cc DEPS ${xpu_bridge_deps}) set(xpu_bridges xpu_bridge_registry @@ -15,6 +16,7 @@ set(xpu_bridges xpu_bridge_elementwise_ops xpu_bridge_pool_op xpu_bridge_softmax_op + xpu_bridge_mul_op CACHE INTERNAL "xpu_bridges") set(xpu_bridge_test_deps ${xpu_bridges} ${xpu_kernels} ${ops}) @@ -24,3 +26,4 @@ lite_cc_test(test_xpu_bridge_conv_op SRCS conv_op_test.cc test_helper.cc DEPS ${ lite_cc_test(test_xpu_bridge_elementwise_ops SRCS elementwise_ops_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps}) lite_cc_test(test_xpu_bridge_pool_op SRCS pool_op_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps}) lite_cc_test(test_xpu_bridge_softmax_op SRCS softmax_op_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps}) +lite_cc_test(test_xpu_bridge_mul_op SRCS mul_op_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps}) diff --git a/lite/kernels/xpu/bridges/mul_op.cc b/lite/kernels/xpu/bridges/mul_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..edf44f78bbfb54cf4316d3b9d7d9be2a121669d7 --- /dev/null +++ b/lite/kernels/xpu/bridges/mul_op.cc @@ -0,0 +1,97 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/backends/xpu/builder.h" +#include "lite/kernels/xpu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace xpu { +namespace bridges { + +node_map_type MulConverter(const std::shared_ptr op, + graph_ctx_type* graph_ctx, + const node_map_type& input_nodes) { + auto scope = op->scope(); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto unique_op_type = lite::xpu::UniqueName(op_type); + LOG(INFO) << "[XPU] Converting " + op_type + "..."; + + // check context + CHECK(graph_ctx != nullptr); + CHECK(graph_ctx->builder != nullptr); + CHECK(graph_ctx->params != nullptr); + + // get input, and attributes + auto x_var_name = op_info->Input("X").front(); + auto y_var_name = op_info->Input("Y").front(); + auto y_tensor = scope->FindMutableTensor(y_var_name); + auto y_dims = y_tensor->dims(); + CHECK_EQ(y_dims.size(), 2) << "xpu now only support y_dims.size() == 2"; + + auto x_num_col_dims = op_info->GetAttr("x_num_col_dims"); + CHECK_EQ(x_num_col_dims, 1) << "xpu now only support x_num_col_dims == 1"; + auto y_num_col_dims = op_info->GetAttr("x_num_col_dims"); + CHECK_EQ(y_num_col_dims, 1) << "xpu now only support y_num_col_dims == 1"; + + // create x node + std::shared_ptr x_node = nullptr; + x_node = std::make_shared( + graph_ctx->builder->CreateBatchFlatten(*input_nodes.at(x_var_name))); + graph_ctx->builder->SetLayer(unique_op_type + "/X"); + + // transpose y + DDimLite y_dims_t(std::vector{1, 1}); + y_dims_t[0] = y_dims[1]; + y_dims_t[1] = y_dims[0]; + auto y_var_name_t = unique_op_type + "/Y"; + Tensor* y_tensor_t = new Tensor(); + y_tensor_t->Resize(y_dims_t); + auto y_data_t = y_tensor_t->mutable_data(); + auto y_data = y_tensor->mutable_data(); + for (int i = 0; i < y_dims_t[0]; i++) { + for (int j = 0; j < y_dims_t[1]; j++) { + y_data_t[i * y_dims_t[1] + j] = y_data[j * y_dims_t[0] + i]; + } + } + + // create y node + std::shared_ptr y_const_node = nullptr; + y_const_node = std::make_shared(graph_ctx->builder->CreateTensor( + y_var_name_t, lite::xpu::CvtShape(y_dims_t), ::xtcl::Float(32))); + auto y_const_tensor = lite::xpu::CvtTensor(y_tensor_t); + graph_ctx->params->emplace(std::make_pair(y_var_name_t, *y_const_tensor)); + delete y_tensor_t; + + // create mul node and set params from op + std::shared_ptr mul_node = nullptr; + mul_node = std::make_shared(graph_ctx->builder->CreateDense( + *x_node, *y_const_node, static_cast(y_dims[1]))); + graph_ctx->builder->SetLayer(unique_op_type); + + // output converted nodes + node_map_type output_nodes; + output_nodes[op_info->Output("Out").front()] = mul_node; + return output_nodes; +} + +} // namespace bridges +} // namespace xpu +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_XPU_BRIDGE(mul, paddle::lite::kernels::xpu::bridges::MulConverter); diff --git a/lite/kernels/xpu/bridges/mul_op_test.cc b/lite/kernels/xpu/bridges/mul_op_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..cd439b68cb7286a919a8fce97371443f53ed40db --- /dev/null +++ b/lite/kernels/xpu/bridges/mul_op_test.cc @@ -0,0 +1,113 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/mul_op.h" +#include +#include "lite/core/op_registry.h" +#include "lite/kernels/xpu/bridges/registry.h" +#include "lite/kernels/xpu/bridges/test_helper.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace xpu { +namespace bridges { + +void mul_ref(const std::shared_ptr op) { + Scope* scope = op->scope(); + const OpInfo* op_info = op->op_info(); + auto x = scope->FindVar(op_info->Input("X").front())->GetMutable(); + auto y = scope->FindVar(op_info->Input("Y").front())->GetMutable(); + auto out = + scope->FindVar(op_info->Output("Out").front())->GetMutable(); + int32_t x_num_col_dims = op_info->GetAttr("x_num_col_dims"); + int32_t y_num_col_dims = op_info->GetAttr("y_num_col_dims"); + auto x_data = x->mutable_data(); + auto y_data = y->mutable_data(); + auto out_data = out->mutable_data(); + auto x_mat_dims = x->dims().Flatten2D(x_num_col_dims); + auto y_mat_dims = y->dims().Flatten2D(y_num_col_dims); + CHECK_EQ(x_mat_dims[1], y_mat_dims[0]); + const int M = x_mat_dims[0]; + const int K = x_mat_dims[1]; + const int N = y_mat_dims[1]; + for (int m = 0; m < M; ++m) { + for (int n = 0; n < N; ++n) { + out_data[m * N + n] = 0; + for (int k = 0; k < K; ++k) { + out_data[m * N + n] += x_data[m * K + k] * y_data[k * N + n]; + } + } + } +} + +void test_mul(const std::vector& x_shape, + const std::vector& y_shape, + int x_num_col_dims, + int y_num_col_dims) { + Scope scope; + std::string x_var_name("X"); + std::string y_var_name("Y"); + std::string out_var_name("Out"); + std::string out_ref_var_name("out_ref"); + auto* x = scope.Var(x_var_name)->GetMutable(); + auto* y = scope.Var(y_var_name)->GetMutable(); + auto* out = scope.Var(out_var_name)->GetMutable(); + auto* out_ref = scope.Var(out_ref_var_name)->GetMutable(); + x->Resize(x_shape); + y->Resize(y_shape); + + FillTensor(x); + FillTensor(y); + + // create mul op + cpp::OpDesc mul_op_desc; + mul_op_desc.SetType("mul"); + mul_op_desc.SetInput("X", {x_var_name}); + mul_op_desc.SetInput("Y", {y_var_name}); + mul_op_desc.SetOutput("Out", {out_var_name}); + mul_op_desc.SetAttr("x_num_col_dims", static_cast(x_num_col_dims)); + mul_op_desc.SetAttr("y_num_col_dims", static_cast(y_num_col_dims)); + + auto mul_op = CreateOp(mul_op_desc, &scope); + LauchOp(mul_op, {x_var_name}, {out_var_name}); + out_ref->CopyDataFrom(*out); + + mul_ref(mul_op); + + // compare results + auto* out_data = out->mutable_data(); + auto* out_ref_data = out_ref->mutable_data(); + for (int i = 0; i < out->dims().production(); i++) { + EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5); + } +} + +TEST(XPUBridges, mul) { + test_mul({1, 2, 3, 4}, {24, 2}, 1, 1); + test_mul({2, 2, 3, 4}, {24, 2}, 1, 1); + test_mul({2, 7}, {7, 3}, 1, 1); + // test_mul({1, 8, 8, 1}, {1, 8, 2, 2}, 2, 2); + // test_mul({1, 5, 5, 1}, {1, 5, 7, 7}, 2, 2); + // test_mul({1, 4, 1, 1}, {4, 8}, 1, 1); +} + +} // namespace bridges +} // namespace xpu +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_OP(mul); +USE_XPU_BRIDGE(mul); diff --git a/lite/kernels/xpu/bridges/paddle_use_xpu_bridges.h b/lite/kernels/xpu/bridges/paddle_use_xpu_bridges.h index ee48fee626b9459bb24780e9241dab3071307774..27e936eaaa125f26b0bdab43f5c38d60769cfd88 100644 --- a/lite/kernels/xpu/bridges/paddle_use_xpu_bridges.h +++ b/lite/kernels/xpu/bridges/paddle_use_xpu_bridges.h @@ -19,3 +19,7 @@ USE_XPU_BRIDGE(relu); USE_XPU_BRIDGE(conv2d); USE_XPU_BRIDGE(depthwise_conv2d); +USE_XPU_BRIDGE(elementwise_add); +USE_XPU_BRIDGE(pool2d); +USE_XPU_BRIDGE(softmax); +USE_XPU_BRIDGE(mul); diff --git a/lite/kernels/xpu/bridges/pool_op_test.cc b/lite/kernels/xpu/bridges/pool_op_test.cc index 512d59feb1340bcaa485d9290886cf5d58a878cf..ed5f922d59b5ca5e387076c9a533c4b4c251cc87 100644 --- a/lite/kernels/xpu/bridges/pool_op_test.cc +++ b/lite/kernels/xpu/bridges/pool_op_test.cc @@ -181,7 +181,7 @@ void test_pool(int bs, } } -TEST(NPUBridges, pool) { +TEST(XPUBridges, pool) { for (auto pooling_type : {"max", "avg"}) { for (auto bs : {1, 3}) { for (auto ic : {2}) { diff --git a/lite/kernels/xpu/bridges/softmax_op_test.cc b/lite/kernels/xpu/bridges/softmax_op_test.cc index ee9a44acd5b8fec2e3df4d7bc4034808fc2b0b45..2cd12cbf4e8dc108ac43fec55a568ecec72a51ab 100644 --- a/lite/kernels/xpu/bridges/softmax_op_test.cc +++ b/lite/kernels/xpu/bridges/softmax_op_test.cc @@ -110,7 +110,7 @@ void test_softmax(int bs, int ic, int ih, int iw, int axis) { } } -TEST(NPUBridges, softmax) { +TEST(XPUBridges, softmax) { for (auto bs : {2, 3}) { for (auto ic : {4}) { for (auto ih : {5}) {