未验证 提交 c8ce8ae8 编写于 作者: Z zhupengyang 提交者: GitHub

[XPU] add mul op bridge (#2267)

* [XPU] add mul op bridge and unit test

test=develop

* use tmp tensor for transposed y

test=develop
上级 eed7a506
......@@ -7,6 +7,7 @@ lite_cc_library(xpu_bridge_conv_op SRCS conv_op.cc DEPS ${xpu_bridge_deps})
lite_cc_library(xpu_bridge_elementwise_ops SRCS elementwise_ops.cc DEPS ${xpu_bridge_deps})
lite_cc_library(xpu_bridge_pool_op SRCS pool_op.cc DEPS ${xpu_bridge_deps})
lite_cc_library(xpu_bridge_softmax_op SRCS softmax_op.cc DEPS ${xpu_bridge_deps})
lite_cc_library(xpu_bridge_mul_op SRCS mul_op.cc DEPS ${xpu_bridge_deps})
set(xpu_bridges
xpu_bridge_registry
......@@ -15,6 +16,7 @@ set(xpu_bridges
xpu_bridge_elementwise_ops
xpu_bridge_pool_op
xpu_bridge_softmax_op
xpu_bridge_mul_op
CACHE INTERNAL "xpu_bridges")
set(xpu_bridge_test_deps ${xpu_bridges} ${xpu_kernels} ${ops})
......@@ -24,3 +26,4 @@ lite_cc_test(test_xpu_bridge_conv_op SRCS conv_op_test.cc test_helper.cc DEPS ${
lite_cc_test(test_xpu_bridge_elementwise_ops SRCS elementwise_ops_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps})
lite_cc_test(test_xpu_bridge_pool_op SRCS pool_op_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps})
lite_cc_test(test_xpu_bridge_softmax_op SRCS softmax_op_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps})
lite_cc_test(test_xpu_bridge_mul_op SRCS mul_op_test.cc test_helper.cc DEPS ${xpu_bridge_test_deps})
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/xpu/builder.h"
#include "lite/kernels/xpu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
namespace bridges {
node_map_type MulConverter(const std::shared_ptr<lite::OpLite> op,
graph_ctx_type* graph_ctx,
const node_map_type& input_nodes) {
auto scope = op->scope();
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto unique_op_type = lite::xpu::UniqueName(op_type);
LOG(INFO) << "[XPU] Converting " + op_type + "...";
// check context
CHECK(graph_ctx != nullptr);
CHECK(graph_ctx->builder != nullptr);
CHECK(graph_ctx->params != nullptr);
// get input, and attributes
auto x_var_name = op_info->Input("X").front();
auto y_var_name = op_info->Input("Y").front();
auto y_tensor = scope->FindMutableTensor(y_var_name);
auto y_dims = y_tensor->dims();
CHECK_EQ(y_dims.size(), 2) << "xpu now only support y_dims.size() == 2";
auto x_num_col_dims = op_info->GetAttr<int>("x_num_col_dims");
CHECK_EQ(x_num_col_dims, 1) << "xpu now only support x_num_col_dims == 1";
auto y_num_col_dims = op_info->GetAttr<int>("x_num_col_dims");
CHECK_EQ(y_num_col_dims, 1) << "xpu now only support y_num_col_dims == 1";
// create x node
std::shared_ptr<xtcl::xExpr> x_node = nullptr;
x_node = std::make_shared<xtcl::xExpr>(
graph_ctx->builder->CreateBatchFlatten(*input_nodes.at(x_var_name)));
graph_ctx->builder->SetLayer(unique_op_type + "/X");
// transpose y
DDimLite y_dims_t(std::vector<int64_t>{1, 1});
y_dims_t[0] = y_dims[1];
y_dims_t[1] = y_dims[0];
auto y_var_name_t = unique_op_type + "/Y";
Tensor* y_tensor_t = new Tensor();
y_tensor_t->Resize(y_dims_t);
auto y_data_t = y_tensor_t->mutable_data<float>();
auto y_data = y_tensor->mutable_data<float>();
for (int i = 0; i < y_dims_t[0]; i++) {
for (int j = 0; j < y_dims_t[1]; j++) {
y_data_t[i * y_dims_t[1] + j] = y_data[j * y_dims_t[0] + i];
}
}
// create y node
std::shared_ptr<xtcl::xExpr> y_const_node = nullptr;
y_const_node = std::make_shared<xtcl::xExpr>(graph_ctx->builder->CreateTensor(
y_var_name_t, lite::xpu::CvtShape(y_dims_t), ::xtcl::Float(32)));
auto y_const_tensor = lite::xpu::CvtTensor(y_tensor_t);
graph_ctx->params->emplace(std::make_pair(y_var_name_t, *y_const_tensor));
delete y_tensor_t;
// create mul node and set params from op
std::shared_ptr<xtcl::xExpr> mul_node = nullptr;
mul_node = std::make_shared<xtcl::xExpr>(graph_ctx->builder->CreateDense(
*x_node, *y_const_node, static_cast<int>(y_dims[1])));
graph_ctx->builder->SetLayer(unique_op_type);
// output converted nodes
node_map_type output_nodes;
output_nodes[op_info->Output("Out").front()] = mul_node;
return output_nodes;
}
} // namespace bridges
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_XPU_BRIDGE(mul, paddle::lite::kernels::xpu::bridges::MulConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/mul_op.h"
#include <gtest/gtest.h>
#include "lite/core/op_registry.h"
#include "lite/kernels/xpu/bridges/registry.h"
#include "lite/kernels/xpu/bridges/test_helper.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
namespace bridges {
void mul_ref(const std::shared_ptr<operators::MulOpLite> op) {
Scope* scope = op->scope();
const OpInfo* op_info = op->op_info();
auto x = scope->FindVar(op_info->Input("X").front())->GetMutable<Tensor>();
auto y = scope->FindVar(op_info->Input("Y").front())->GetMutable<Tensor>();
auto out =
scope->FindVar(op_info->Output("Out").front())->GetMutable<Tensor>();
int32_t x_num_col_dims = op_info->GetAttr<int32_t>("x_num_col_dims");
int32_t y_num_col_dims = op_info->GetAttr<int32_t>("y_num_col_dims");
auto x_data = x->mutable_data<float>();
auto y_data = y->mutable_data<float>();
auto out_data = out->mutable_data<float>();
auto x_mat_dims = x->dims().Flatten2D(x_num_col_dims);
auto y_mat_dims = y->dims().Flatten2D(y_num_col_dims);
CHECK_EQ(x_mat_dims[1], y_mat_dims[0]);
const int M = x_mat_dims[0];
const int K = x_mat_dims[1];
const int N = y_mat_dims[1];
for (int m = 0; m < M; ++m) {
for (int n = 0; n < N; ++n) {
out_data[m * N + n] = 0;
for (int k = 0; k < K; ++k) {
out_data[m * N + n] += x_data[m * K + k] * y_data[k * N + n];
}
}
}
}
void test_mul(const std::vector<int64_t>& x_shape,
const std::vector<int64_t>& y_shape,
int x_num_col_dims,
int y_num_col_dims) {
Scope scope;
std::string x_var_name("X");
std::string y_var_name("Y");
std::string out_var_name("Out");
std::string out_ref_var_name("out_ref");
auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
auto* y = scope.Var(y_var_name)->GetMutable<Tensor>();
auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
auto* out_ref = scope.Var(out_ref_var_name)->GetMutable<Tensor>();
x->Resize(x_shape);
y->Resize(y_shape);
FillTensor<float>(x);
FillTensor<float>(y);
// create mul op
cpp::OpDesc mul_op_desc;
mul_op_desc.SetType("mul");
mul_op_desc.SetInput("X", {x_var_name});
mul_op_desc.SetInput("Y", {y_var_name});
mul_op_desc.SetOutput("Out", {out_var_name});
mul_op_desc.SetAttr("x_num_col_dims", static_cast<int>(x_num_col_dims));
mul_op_desc.SetAttr("y_num_col_dims", static_cast<int>(y_num_col_dims));
auto mul_op = CreateOp<operators::MulOpLite>(mul_op_desc, &scope);
LauchOp(mul_op, {x_var_name}, {out_var_name});
out_ref->CopyDataFrom(*out);
mul_ref(mul_op);
// compare results
auto* out_data = out->mutable_data<float>();
auto* out_ref_data = out_ref->mutable_data<float>();
for (int i = 0; i < out->dims().production(); i++) {
EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5);
}
}
TEST(XPUBridges, mul) {
test_mul({1, 2, 3, 4}, {24, 2}, 1, 1);
test_mul({2, 2, 3, 4}, {24, 2}, 1, 1);
test_mul({2, 7}, {7, 3}, 1, 1);
// test_mul({1, 8, 8, 1}, {1, 8, 2, 2}, 2, 2);
// test_mul({1, 5, 5, 1}, {1, 5, 7, 7}, 2, 2);
// test_mul({1, 4, 1, 1}, {4, 8}, 1, 1);
}
} // namespace bridges
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_OP(mul);
USE_XPU_BRIDGE(mul);
......@@ -19,3 +19,7 @@
USE_XPU_BRIDGE(relu);
USE_XPU_BRIDGE(conv2d);
USE_XPU_BRIDGE(depthwise_conv2d);
USE_XPU_BRIDGE(elementwise_add);
USE_XPU_BRIDGE(pool2d);
USE_XPU_BRIDGE(softmax);
USE_XPU_BRIDGE(mul);
......@@ -181,7 +181,7 @@ void test_pool(int bs,
}
}
TEST(NPUBridges, pool) {
TEST(XPUBridges, pool) {
for (auto pooling_type : {"max", "avg"}) {
for (auto bs : {1, 3}) {
for (auto ic : {2}) {
......
......@@ -110,7 +110,7 @@ void test_softmax(int bs, int ic, int ih, int iw, int axis) {
}
}
TEST(NPUBridges, softmax) {
TEST(XPUBridges, softmax) {
for (auto bs : {2, 3}) {
for (auto ic : {4}) {
for (auto ih : {5}) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册