提交 c0d3605f 编写于 作者: J jiweibo

test=develop add matmul_op

上级 ca334444
......@@ -31,6 +31,7 @@ USE_LITE_KERNEL(fetch, kFPGA, kFP16, kNHWC, def);
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(matmul, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(softmax, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(lrn, kARM, kFloat, kNCHW, def);
......
......@@ -19,6 +19,7 @@
#include "paddle_lite_factory_helper.h" // NOLINT
USE_LITE_OP(mul);
USE_LITE_OP(matmul);
USE_LITE_OP(fc);
USE_LITE_OP(relu);
USE_LITE_OP(scale);
......
......@@ -7,6 +7,7 @@ message(STATUS "compile with lite ARM kernels")
lite_cc_library(fc_compute_arm SRCS fc_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(activation_compute_arm SRCS activation_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(mul_compute_arm SRCS mul_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(matmul_compute_arm SRCS matmul_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(softmax_compute_arm SRCS softmax_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(conv_compute_arm SRCS conv_compute.cc DEPS ${lite_kernel_deps} math_arm)
......@@ -84,6 +85,7 @@ set(arm_kernels
fc_compute_arm
activation_compute_arm
mul_compute_arm
matmul_compute_arm
scale_compute_arm
softmax_compute_arm
conv_compute_arm
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/arm/matmul_compute.h"
#include <vector>
#include "lite/arm/math/funcs.h"
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
static void NaiveTranspose(int m, int n, const float* src, float* dst) {
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
dst[j * m + i] = src[i * n + j];
}
}
}
void MatMulCompute::PrepareForRun() {
auto& ctx = this->ctx_->template As<ARMContext>();
}
void MatMulCompute::Run() {
auto& param = Param<param_t>();
const auto* x_data = param.X->data<float>();
const auto* y_data = param.Y->data<float>();
auto* o_data = param.Out->mutable_data<float>();
auto x_dims = param.X->dims();
auto y_dims = param.Y->dims();
auto o_dims = param.Out->dims();
bool x_transpose = param.transpose_X;
bool y_transpose = param.transpose_Y;
float alpha = param.alpha;
auto& ctx = this->ctx_->template As<ARMContext>();
if (x_dims.size() > 2 && y_dims.size() >= 2) {
// x: [B, ..., M, K], y: [B, ..., K, N], out: [B, ..., M, N]
// x: [B, M, K], y: [K, N], out: [B, M, N]
if (!x_transpose && !y_transpose) {
CHECK_EQ(x_dims[x_dims.size() - 1], y_dims[y_dims.size() - 2])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< ") x_transpose is " << x_transpose << "y_transpose is "
<< y_transpose;
} else if (!x_transpose && y_transpose) {
CHECK_EQ(x_dims[x_dims.size() - 1], y_dims[y_dims.size() - 1])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< ") x_transpose is " << x_transpose << "y_transpose is "
<< y_transpose;
} else if (x_transpose && !y_transpose) {
CHECK_EQ(x_dims[x_dims.size() - 2], y_dims[y_dims.size() - 2])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< ") x_transpose is " << x_transpose << "y_transpose is "
<< y_transpose;
} else {
CHECK_EQ(x_dims[x_dims.size() - 2], y_dims[y_dims.size() - 1])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< ") x_transpose is " << x_transpose << "y_transpose is "
<< y_transpose;
}
if (!x_transpose) {
m_ = x_dims[x_dims.size() - 2];
k_ = x_dims[x_dims.size() - 1];
} else {
m_ = x_dims[x_dims.size() - 1];
k_ = x_dims[x_dims.size() - 2];
}
if (!y_transpose) {
n_ = y_dims[y_dims.size() - 1];
} else {
n_ = y_dims[y_dims.size() - 2];
}
int hblock = lite::arm::math::get_hblock(ctx.arch());
int m_round = 0;
m_round = hblock * ((m_ + hblock - 1) / hblock);
ctx.ExtendWorkspace(m_round * k_ * sizeof(float));
int x_inner = x_dims[x_dims.size() - 2] * x_dims[x_dims.size() - 1];
int y_inner = y_dims[y_dims.size() - 2] * y_dims[y_dims.size() - 1];
int out_inner = o_dims[o_dims.size() - 2] * o_dims[o_dims.size() - 1];
float* x_data_trans = nullptr;
if (x_transpose) {
x_data_trans = static_cast<float*>(malloc(sizeof(float) * x_inner));
}
if (y_dims.size() > 2) {
if (n_ == 1) {
for (size_t i = 0; i < x_dims.count(0, x_dims.size() - 2); ++i) {
if (x_transpose) {
NaiveTranspose(x_dims[x_dims.size() - 2],
x_dims[x_dims.size() - 1],
x_data + i * x_inner,
x_data_trans);
lite::arm::math::sgemv(x_data_trans,
y_data + i * y_inner,
o_data + i * out_inner,
false,
m_,
k_,
false,
nullptr,
false);
} else {
lite::arm::math::sgemv(x_data + i * x_inner,
y_data + i * y_inner,
o_data + i * out_inner,
false,
m_,
k_,
false,
nullptr,
false);
}
}
if (fabsf(param.alpha - 1.f) > 1e-8f) {
for (size_t i = 0; i < param.Out->dims().production(); ++i) {
o_data[i] *= param.alpha;
}
}
} else {
float* packed_x = static_cast<float*>(ctx.workspace_data<float>()) +
ctx.llc_size() / sizeof(float);
for (size_t i = 0; i < x_dims.count(0, x_dims.size() - 2); ++i) {
if (x_transpose) {
NaiveTranspose(x_dims[x_dims.size() - 2],
x_dims[x_dims.size() - 1],
x_data + i * x_inner,
x_data_trans);
lite::arm::math::prepackA(packed_x,
x_data_trans,
alpha,
k_,
0,
m_,
0,
k_,
false,
&ctx);
} else {
lite::arm::math::prepackA(packed_x,
x_data + i * x_inner,
alpha,
k_,
0,
m_,
0,
k_,
false,
&ctx);
}
int ldb = n_;
if (y_transpose) {
ldb = k_;
}
lite::arm::math::sgemm_prepack(y_transpose,
m_,
n_,
k_,
packed_x,
y_data + i * y_inner,
ldb,
0.f,
o_data + i * out_inner,
n_,
nullptr,
false,
false,
&ctx);
}
}
} else {
if (n_ == 1) {
for (size_t i = 0; i < x_dims.count(0, x_dims.size() - 2); ++i) {
if (x_transpose) {
NaiveTranspose(x_dims[x_dims.size() - 2],
x_dims[x_dims.size() - 1],
x_data + i * x_inner,
x_data_trans);
lite::arm::math::sgemv(x_data_trans,
y_data,
o_data + i * out_inner,
false,
m_,
k_,
false,
nullptr,
false);
} else {
lite::arm::math::sgemv(x_data + i * x_inner,
y_data,
o_data + i * out_inner,
false,
m_,
k_,
false,
nullptr,
false);
}
}
if (fabsf(param.alpha - 1.f) > 1e-8f) {
for (size_t i = 0; i < param.Out->dims().production(); ++i) {
o_data[i] *= param.alpha;
}
}
} else {
float* packed_x = static_cast<float*>(ctx.workspace_data<float>()) +
ctx.llc_size() / sizeof(float);
for (size_t i = 0; i < x_dims.count(0, x_dims.size() - 2); ++i) {
if (x_transpose) {
NaiveTranspose(x_dims[x_dims.size() - 2],
x_dims[x_dims.size() - 1],
x_data + i * x_inner,
x_data_trans);
lite::arm::math::prepackA(
packed_x, x_data_trans, alpha, k_, 0, m_, 0, k_, false, &ctx);
} else {
lite::arm::math::prepackA(packed_x,
x_data + i * x_inner,
alpha,
k_,
0,
m_,
0,
k_,
false,
&ctx);
}
int ldb = n_;
if (y_transpose) {
ldb = k_;
}
lite::arm::math::sgemm_prepack(y_transpose,
m_,
n_,
k_,
packed_x,
y_data,
ldb,
0.f,
o_data + i * out_inner,
n_,
nullptr,
false,
false,
&ctx);
}
}
}
if (x_data_trans) {
free(x_data_trans);
}
} else if (x_dims.size() == 2 && y_dims.size() == 2) {
// x: [M, K], y: [K, N], out: [M, N]
if (!x_transpose && !y_transpose) {
CHECK_EQ(x_dims[1], y_dims[0])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< "), x_transpose is " << x_transpose << ", y_transpose is "
<< y_transpose;
} else if (!x_transpose && y_transpose) {
CHECK_EQ(x_dims[1], y_dims[1])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< "), x_transpose is " << x_transpose << ", y_transpose is "
<< y_transpose;
} else if (x_transpose && !y_transpose) {
CHECK_EQ(x_dims[0], y_dims[0])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< "), x_transpose is " << x_transpose << ", y_transpose is "
<< y_transpose;
} else {
CHECK_EQ(x_dims[0], y_dims[1])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< "), x_transpose is " << x_transpose << ", y_transpose is "
<< y_transpose;
}
if (!x_transpose) {
m_ = x_dims[0];
k_ = x_dims[1];
} else {
m_ = x_dims[1];
k_ = x_dims[0];
}
if (!y_transpose) {
n_ = y_dims[1];
} else {
n_ = y_dims[0];
}
int hblock = lite::arm::math::get_hblock(ctx.arch());
int m_round = 0;
m_round = hblock * ((m_ + hblock - 1) / hblock);
ctx.ExtendWorkspace(m_round * k_ * sizeof(float));
if (n_ == 1) {
// lite::arm::math::sgemv doesn't support transpose.
if (x_transpose) {
float* x_data_trans =
static_cast<float*>(malloc(sizeof(float) * x_dims[0] * x_dims[1]));
NaiveTranspose(x_dims[0], x_dims[1], x_data, x_data_trans);
lite::arm::math::sgemv(
x_data_trans, y_data, o_data, false, m_, k_, false, nullptr, false);
} else {
lite::arm::math::sgemv(
x_data, y_data, o_data, false, m_, k_, false, nullptr, false);
}
if (fabsf(param.alpha - 1.f) > 1e-8f) {
for (size_t i = 0; i < param.Out->dims().production(); ++i) {
o_data[i] *= param.alpha;
}
}
} else {
float* packed_x = static_cast<float*>(ctx.workspace_data<float>()) +
ctx.llc_size() / sizeof(float);
// prepackA seems that doesn't support transpose.
if (x_transpose) {
float* x_data_trans =
static_cast<float*>(malloc(sizeof(float) * x_dims[0] * x_dims[1]));
NaiveTranspose(x_dims[0], x_dims[1], x_data, x_data_trans);
lite::arm::math::prepackA(
packed_x, x_data_trans, alpha, k_, 0, m_, 0, k_, false, &ctx);
} else {
lite::arm::math::prepackA(
packed_x, x_data, alpha, k_, 0, m_, 0, k_, false, &ctx);
}
int ldb = n_;
if (y_transpose) {
ldb = k_;
}
lite::arm::math::sgemm_prepack(y_transpose,
m_,
n_,
k_,
packed_x,
y_data,
ldb,
0.f,
o_data,
n_,
nullptr,
false,
false,
&ctx);
}
} else if (x_dims.size() > 2 && y_dims.size() == 1) {
// x: [B, M, K], y: [K], out: [B, M]
CHECK_EQ(x_dims[x_dims.size() - 1], y_dims[0])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< ")";
for (size_t i = 0; i < x_dims.count(0, x_dims.size() - 1); ++i) {
o_data[i] = 0;
for (size_t j = 0; j < y_dims[0]; ++j) {
o_data[i] += x_data[i * y_dims[0] + j] * y_data[j] * alpha;
}
}
} else if (x_dims.size() == 1 && y_dims.size() == 1) {
// x: [K], y: [K], out: [1]
if (x_dims[0] == y_dims[0] && x_transpose == false &&
y_transpose == false) {
o_data[0] = 0.;
for (size_t i = 0; i < x_dims[0]; ++i) {
o_data[0] += x_data[i] * y_data[i] * alpha;
}
}
// x: [M], y: [N], x_transpose: true, y_transpose: true, out: [M, N]
if (x_transpose == true && y_transpose == true) {
m_ = x_dims[0];
k_ = 1;
n_ = y_dims[0];
if (n_ == 1) {
lite::arm::math::sgemv(
x_data, y_data, o_data, false, m_, k_, false, nullptr, false);
if (fabsf(alpha - 1.f) > 1e-8f) {
for (size_t i = 0; i < param.Out->dims().production(); ++i) {
o_data[i] *= alpha;
}
}
} else {
float* packed_x = static_cast<float*>(ctx.workspace_data<float>()) +
ctx.llc_size() / sizeof(float);
lite::arm::math::prepackA(
packed_x, x_data, alpha, k_, 0, m_, 0, k_, false, &ctx);
int ldb = n_;
lite::arm::math::sgemm_prepack(false,
m_,
n_,
k_,
packed_x,
y_data,
ldb,
0.f,
o_data,
n_,
nullptr,
false,
false,
&ctx);
}
}
} else {
LOG(FATAL) << "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< ")";
}
}
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
matmul, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::MatMulCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "lite/core/types.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
class MatMulCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
public:
using param_t = operators::MatMulParam;
void PrepareForRun() override;
void Run() override;
virtual ~MatMulCompute() = default;
private:
int m_, n_, k_;
};
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
......@@ -5,6 +5,7 @@ lite_cc_library(pool_op SRCS pool_op.cc DEPS ${op_DEPS})
lite_cc_library(fc_op SRCS fc_op.cc DEPS ${op_DEPS})
lite_cc_library(relu_op SRCS relu_op.cc DEPS ${op_DEPS})
lite_cc_library(mul_op SRCS mul_op.cc DEPS ${op_DEPS})
lite_cc_library(matmul_op SRCS matmul_op.cc DEPS ${op_DEPS})
lite_cc_library(scale_op SRCS scale_op.cc DEPS ${op_DEPS})
lite_cc_library(softmax_op SRCS softmax_op.cc DEPS ${op_DEPS})
lite_cc_library(reshape_op SRCS reshape_op.cc DEPS ${op_DEPS} )
......@@ -89,6 +90,7 @@ set(ops
fc_op
relu_op
mul_op
matmul_op
scale_op
softmax_op
reshape_op
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/matmul_op.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace operators {
bool MatMulOpLite::CheckShape() const {
CHECK_OR_FALSE(param_.X);
CHECK_OR_FALSE(param_.Y);
CHECK_OR_FALSE(param_.Out);
return true;
}
bool MatMulOpLite::InferShape() const {
const auto x_dims = param_.X->dims();
const auto y_dims = param_.Y->dims();
bool x_transpose = param_.transpose_X;
bool y_transpose = param_.transpose_Y;
std::vector<int64_t> dim_out_vec;
if (x_dims.size() > 2 && y_dims.size() >= 2) {
// x: [B, ..., M, K], y: [B, ..., K, N], out: [B, ..., M, N]
// x: [B, M, K], y: [K, N], out: [B, M, N]
if (!x_transpose && !y_transpose) {
CHECK_EQ(x_dims[x_dims.size() - 1], y_dims[y_dims.size() - 2])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< ")";
} else if (!x_transpose && y_transpose) {
CHECK_EQ(x_dims[x_dims.size() - 1], y_dims[y_dims.size() - 1])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< ")";
} else if (x_transpose && !y_transpose) {
CHECK_EQ(x_dims[x_dims.size() - 2], y_dims[y_dims.size() - 2])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< ")";
} else {
CHECK_EQ(x_dims[x_dims.size() - 2], y_dims[y_dims.size() - 1])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< ")";
}
dim_out_vec.resize(x_dims.size());
for (size_t i = 0; i < x_dims.size() - 2; ++i) {
dim_out_vec[i] = x_dims[i];
}
if (!x_transpose && !y_transpose) {
dim_out_vec[x_dims.size() - 2] = x_dims[x_dims.size() - 2];
dim_out_vec[x_dims.size() - 1] = y_dims[y_dims.size() - 1];
} else if (!x_transpose && y_transpose) {
dim_out_vec[x_dims.size() - 2] = x_dims[x_dims.size() - 2];
dim_out_vec[x_dims.size() - 1] = y_dims[y_dims.size() - 2];
} else if (x_transpose && !y_transpose) {
dim_out_vec[x_dims.size() - 2] = x_dims[x_dims.size() - 1];
dim_out_vec[x_dims.size() - 1] = y_dims[y_dims.size() - 1];
} else {
dim_out_vec[x_dims.size() - 2] = x_dims[x_dims.size() - 1];
dim_out_vec[x_dims.size() - 1] = y_dims[y_dims.size() - 2];
}
} else if (x_dims.size() == 2 && y_dims.size() == 2) {
// x: [M, K], y: [K, N], out: [M, N]
// x: [M, K], y: [K, N], out: [M, N]
if (!x_transpose && !y_transpose) {
CHECK_EQ(x_dims[1], y_dims[0])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< "), x_transpose is " << x_transpose << ", y_transpose is "
<< y_transpose;
} else if (!x_transpose && y_transpose) {
CHECK_EQ(x_dims[1], y_dims[1])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< "), x_transpose is " << x_transpose << ", y_transpose is "
<< y_transpose;
} else if (x_transpose && !y_transpose) {
CHECK_EQ(x_dims[0], y_dims[0])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< "), x_transpose is " << x_transpose << ", y_transpose is "
<< y_transpose;
} else {
CHECK_EQ(x_dims[0], y_dims[1])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< "), x_transpose is " << x_transpose << ", y_transpose is "
<< y_transpose;
}
dim_out_vec.resize(x_dims.size());
if (x_transpose) {
dim_out_vec[0] = x_dims[1];
} else {
dim_out_vec[0] = x_dims[0];
}
if (y_transpose) {
dim_out_vec[1] = y_dims[0];
} else {
dim_out_vec[1] = y_dims[1];
}
} else if (x_dims.size() > 2 && y_dims.size() == 1) {
// x: [B, M, K], y: [K], out: [B, M]
CHECK_EQ(x_dims[x_dims.size() - 1], y_dims[0])
<< "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< ")";
dim_out_vec.resize(x_dims.size() - 1);
for (size_t i = 0; i < dim_out_vec.size(); ++i) {
dim_out_vec[i] = x_dims[i];
}
} else if (x_dims.size() == 1 && y_dims.size() == 1) { // todo
// x: [K], y: [K], out: [1]
if (x_dims[0] == y_dims[0] && x_transpose == false &&
y_transpose == false) {
dim_out_vec.resize(1);
dim_out_vec[0] = 1;
}
// x: [M], y: [N], x_transpose: true, y_transpose: true, out: [M, N]
if (x_transpose == true && y_transpose == true) {
dim_out_vec.resize(2);
dim_out_vec[0] = x_dims[0];
dim_out_vec[1] = y_dims[0];
}
} else {
LOG(FATAL) << "not supported x_dims(" << x_dims << ") and y_dims(" << y_dims
<< ")";
}
DDim dim_out(dim_out_vec);
param_.Out->Resize(dim_out);
return true;
}
bool MatMulOpLite::AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) {
CHECK(!op_desc.Input("X").empty());
CHECK(!op_desc.Input("Y").empty());
CHECK(!op_desc.Output("Out").empty());
auto X = op_desc.Input("X").front();
auto Y = op_desc.Input("Y").front();
auto Out = op_desc.Output("Out").front();
param_.X = GetVar<lite::Tensor>(scope, X);
param_.Y = GetVar<lite::Tensor>(scope, Y);
param_.Out = GetMutableVar<lite::Tensor>(scope, Out);
param_.transpose_X = op_desc.GetAttr<bool>("transpose_X");
param_.transpose_Y = op_desc.GetAttr<bool>("transpose_Y");
param_.alpha = op_desc.GetAttr<float>("alpha");
return true;
}
} // namespace operators
} // namespace lite
} // namespace paddle
REGISTER_LITE_OP(matmul, paddle::lite::operators::MatMulOpLite);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "lite/core/kernel.h"
#include "lite/core/op_lite.h"
#include "lite/core/scope.h"
#include "lite/operators/op_params.h"
#include "lite/utils/all.h"
namespace paddle {
namespace lite {
namespace operators {
class MatMulOpLite : public OpLite {
public:
MatMulOpLite() {}
explicit MatMulOpLite(const std::string &type) : OpLite(type) {}
bool CheckShape() const override;
bool InferShape() const override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
bool AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) override;
std::string DebugString() const override { return "matmul"; }
private:
mutable MatMulParam param_;
};
} // namespace operators
} // namespace lite
} // namespace paddle
......@@ -694,6 +694,16 @@ struct SliceParam {
std::vector<int> ends{};
std::vector<int> decrease_axis{};
};
/// ----------------------- matmul operators ----------------------
struct MatMulParam {
const lite::Tensor* X{};
const lite::Tensor* Y{};
lite::Tensor* Out{};
bool transpose_X{false};
bool transpose_Y{false};
float alpha{1.0f};
};
} // namespace operators
} // namespace lite
} // namespace paddle
......@@ -34,4 +34,5 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH
lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_expand_compute SRCS sequence_expand_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
endif()
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册