未验证 提交 459848c4 编写于 作者: L liu zhengxi 提交者: GitHub

enable conv2d op and its unit tests, test=develop (#2200)

enable conv2d op and its unit tests on x86 device
上级 b963383a
...@@ -10,6 +10,9 @@ add_kernel(slice_compute_x86 X86 basic SRCS slice_compute.cc DEPS ${lite_kernel_ ...@@ -10,6 +10,9 @@ add_kernel(slice_compute_x86 X86 basic SRCS slice_compute.cc DEPS ${lite_kernel_
add_kernel(squeeze_compute_x86 X86 basic SRCS squeeze_compute.cc DEPS ${lite_kernel_deps}) add_kernel(squeeze_compute_x86 X86 basic SRCS squeeze_compute.cc DEPS ${lite_kernel_deps})
add_kernel(fill_constant_batch_size_like_compute_x86 X86 basic SRCS fill_constant_batch_size_like_compute.cc DEPS ${lite_kernel_deps} math_function) add_kernel(fill_constant_batch_size_like_compute_x86 X86 basic SRCS fill_constant_batch_size_like_compute.cc DEPS ${lite_kernel_deps} math_function)
add_kernel(reshape_compute_x86 X86 basic SRCS reshape_compute.cc DEPS ${lite_kernel_deps} reshape_op) add_kernel(reshape_compute_x86 X86 basic SRCS reshape_compute.cc DEPS ${lite_kernel_deps} reshape_op)
add_kernel(conv_compute_x86 X86 basic SRCS conv_compute.cc DEPS ${lite_kernel_deps} blas im2col vol2col)
# lite_cc_library(elementwise_compute_x86 SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_sub_op elementwise_add_op)
# lite_cc_library(softmax_compute_x86 SRCS softmax_compute.cc DEPS ${lite_kernel_deps} softmax)
# lite_cc_library(dropout_compute_x86 SRCS dropout_compute.cc DEPS ${lite_kernel_deps} ) # lite_cc_library(dropout_compute_x86 SRCS dropout_compute.cc DEPS ${lite_kernel_deps} )
# lite_cc_library(conv_compute_x86 SRCS conv_compute.cc DEPS ${lite_kernel_deps} blas im2col vol2col) # lite_cc_library(conv_compute_x86 SRCS conv_compute.cc DEPS ${lite_kernel_deps} blas im2col vol2col)
# lite_cc_library(pool_compute_x86 SRCS pool_compute.cc DEPS ${lite_kernel_deps} pooling) # lite_cc_library(pool_compute_x86 SRCS pool_compute.cc DEPS ${lite_kernel_deps} pooling)
...@@ -37,6 +40,7 @@ if(NOT LITE_WITH_X86) ...@@ -37,6 +40,7 @@ if(NOT LITE_WITH_X86)
endif() endif()
add_kernel(matmul_compute_x86 X86 basic SRCS matmul_compute.cc DEPS ${lite_kernel_deps} blas) add_kernel(matmul_compute_x86 X86 basic SRCS matmul_compute.cc DEPS ${lite_kernel_deps} blas)
lite_cc_test(test_conv2d_compute_x86 SRCS conv_compute_test.cc DEPS conv_compute_x86)
lite_cc_test(test_mul_compute_x86 SRCS mul_compute_test.cc DEPS mul_compute_x86) lite_cc_test(test_mul_compute_x86 SRCS mul_compute_test.cc DEPS mul_compute_x86)
lite_cc_test(test_slice_compute_x86 SRCS slice_compute_test.cc DEPS slice_compute_x86) lite_cc_test(test_slice_compute_x86 SRCS slice_compute_test.cc DEPS slice_compute_x86)
lite_cc_test(test_squeeze_compute_x86 SRCS squeeze_compute_test.cc DEPS squeeze_compute_x86) lite_cc_test(test_squeeze_compute_x86 SRCS squeeze_compute_test.cc DEPS squeeze_compute_x86)
......
...@@ -16,15 +16,14 @@ ...@@ -16,15 +16,14 @@
#include <Eigen/Core> #include <Eigen/Core>
#include <string> #include <string>
#include <vector> #include <vector>
#include "lite/backends/x86/math/blas.h"
#include "lite/backends/x86/math/im2col.h"
#include "lite/backends/x86/math/vol2col.h"
#include "lite/core/kernel.h" #include "lite/core/kernel.h"
#include "lite/core/op_registry.h" #include "lite/core/op_registry.h"
#include "lite/core/types.h" #include "lite/core/types.h"
#include "lite/fluid/eigen.h"
#include "lite/operators/conv_op.h" #include "lite/operators/conv_op.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/depthwise_conv.h"
#include "paddle/fluid/operators/math/im2col.h"
#include "paddle/fluid/operators/math/vol2col.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -50,15 +49,14 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -50,15 +49,14 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public: public:
using param_t = operators::ConvParam; using param_t = operators::ConvParam;
void Run() override { void Run() override {
auto& context = ctx_->As<X86Context>();
auto& param = *param_.get_mutable<operators::ConvParam>(); auto& param = *param_.get_mutable<operators::ConvParam>();
lite::Tensor filter = *param.filter; lite::Tensor filter = *param.filter;
param.output->template mutable_data<T>(); param.output->mutable_data<T>();
const int batch_size = static_cast<int>(param.x->dims()[0]); const int batch_size = static_cast<int>(param.x->dims()[0]);
std::vector<int64_t> filter_shape_vec(filter.dims().Vectorize()); std::vector<int64_t> filter_shape_vec(filter.dims().Vectorize());
std::vector<int64_t> output_shape_vec(param.output->dims().Vectorize()); std::vector<int64_t> output_shape_vec(param.output->dims().Vectorize());
size_t data_dim = filter_shape_vec.size() - 2; size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim); std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
col_shape_vec[0] = param.x->dims()[1] / param.groups; col_shape_vec[0] = param.x->dims()[1] / param.groups;
...@@ -70,7 +68,6 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -70,7 +68,6 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
lite::DDim col_matrix_shape = col_shape.Flatten2D(data_dim + 1); lite::DDim col_matrix_shape = col_shape.Flatten2D(data_dim + 1);
bool is_expand = IsExpand( bool is_expand = IsExpand(
filter_shape_vec, param.strides, param.paddings, param.dilations); filter_shape_vec, param.strides, param.paddings, param.dilations);
lite::Tensor col; lite::Tensor col;
lite::Tensor col_matrix; lite::Tensor col_matrix;
if (is_expand) { if (is_expand) {
...@@ -80,40 +77,37 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -80,40 +77,37 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
col_matrix.Resize(col_matrix_shape); col_matrix.Resize(col_matrix_shape);
} }
lite::DDim input_shape = param.x->dims().Slice(1, param.x->dims().size()); lite::DDim input_shape = param.x->dims().Slice(1, param.x->dims().size());
lite::DDim filter_matrix_shape(std::vector<int64_t>{ lite::DDim filter_matrix_shape(std::vector<int64_t>{
filter.dims()[0], filter.dims().production() / filter.dims()[0]}); filter.dims()[0], filter.dims().production() / filter.dims()[0]});
filter.Resize(filter_matrix_shape); filter.Resize(filter_matrix_shape);
lite::DDim output_matrix_shape(std::vector<int64_t>{ lite::DDim output_matrix_shape(std::vector<int64_t>{
param.output->dims()[1], param.output->dims()[1],
param.output->dims().production() / param.output->dims().production() /
(param.output->dims()[0] * param.output->dims()[1])}); (param.output->dims()[0] * param.output->dims()[1])});
int in_step = static_cast<int>(param.x->dims()[1]) / param.groups; int in_step = static_cast<int>(param.x->dims()[1]) / param.groups;
int out_step = static_cast<int>(param.output->dims()[1]) / param.groups; int out_step = static_cast<int>(param.output->dims()[1]) / param.groups;
paddle::lite::x86::math::Vol2ColFunctor<lite::TargetType::kX86, T> vol2col;
paddle::operators::math::Vol2ColFunctor<platform::CPUDeviceContext, T> paddle::lite::x86::math::Im2ColFunctor<
vol2col; paddle::lite::x86::math::ColFormat::kCFO,
paddle::operators::math::Im2ColFunctor< lite::TargetType::kX86,
paddle::operators::math::ColFormat::kCFO,
platform::CPUDeviceContext,
T> T>
im2col; im2col;
auto blas = paddle::operators::math::GetBlas<platform::CPUDeviceContext, T>( auto blas =
platform::CPUDeviceContext()); paddle::lite::x86::math::GetBlas<lite::TargetType::kX86, T>(context);
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
lite::Tensor in_batch; lite::Tensor in_batch;
in_batch.ShareDataWith( lite::Tensor tmp_in_batch = param.x->Slice<T>(i, i + 1);
param.x->raw_tensor().Slice(i, i + 1).Resize(input_shape.data())); tmp_in_batch.Resize(input_shape);
in_batch.ShareDataWith(tmp_in_batch);
lite::Tensor out_batch; lite::Tensor out_batch;
out_batch.ShareDataWith(param.output->raw_tensor().Slice(i, i + 1).Resize( lite::Tensor tmp_out_batch = param.output->Slice<T>(i, i + 1);
output_matrix_shape.data())); tmp_out_batch.Resize(output_matrix_shape);
out_batch.ShareDataWith(tmp_out_batch);
for (int g = 0; g < param.groups; g++) { for (int g = 0; g < param.groups; g++) {
lite::Tensor in_slice; lite::Tensor in_slice;
in_slice.ShareDataWith( in_slice.ShareDataWith(
in_batch.raw_tensor().Slice(g * in_step, (g + 1) * in_step)); in_batch.Slice<T>(static_cast<int64_t>(g * in_step),
static_cast<int64_t>((g + 1) * in_step)));
if (!is_expand) { if (!is_expand) {
col.ShareDataWith(in_slice); col.ShareDataWith(in_slice);
...@@ -121,38 +115,40 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -121,38 +115,40 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
col_matrix.Resize(col_matrix_shape); col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) { } else if (data_dim == 2U) {
// im2col // im2col
im2col(platform::CPUDeviceContext(), im2col(context,
in_slice.raw_tensor(), in_slice,
param.dilations, param.dilations,
param.strides, param.strides,
std::vector<int>{param.paddings[0], std::vector<int>{param.paddings[0],
param.paddings[1], param.paddings[1],
param.paddings[0], param.paddings[0],
param.paddings[1]}, param.paddings[1]},
&(col.raw_tensor())); &(col));
} else if (data_dim == 3U) { } else if (data_dim == 3U) {
// vol2col // vol2col
vol2col(platform::CPUDeviceContext(), vol2col(context,
in_slice.raw_tensor(), in_slice,
param.dilations, param.dilations,
param.strides, param.strides,
param.paddings, param.paddings,
&(col.raw_tensor())); &(col));
} }
// gemm // gemm
lite::Tensor out_slice; lite::Tensor out_slice;
out_slice.ShareDataWith( out_slice.ShareDataWith(
out_batch.raw_tensor().Slice(g * out_step, (g + 1) * out_step)); out_batch.Slice<T>(static_cast<int64_t>(g * out_step),
static_cast<int64_t>((g + 1) * out_step)));
lite::Tensor filter_slice; lite::Tensor filter_slice;
filter_slice.ShareDataWith( filter_slice.ShareDataWith(
filter.raw_tensor().Slice(g * out_step, (g + 1) * out_step)); filter.Slice<T>(static_cast<int64_t>(g * out_step),
blas.MatMul(filter_slice.raw_tensor(), static_cast<int64_t>((g + 1) * out_step)));
blas.MatMul(filter_slice,
false, false,
col_matrix.raw_tensor(), col_matrix,
false, false,
T(1.0), T(1.0),
&(out_slice.raw_tensor()), &(out_slice),
T(0.0)); T(0.0));
} }
} }
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include "lite/kernels/x86/conv_compute.h" #include "lite/kernels/x86/conv_compute.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory>
#include <utility>
#include <vector> #include <vector>
#include "lite/core/op_registry.h" #include "lite/core/op_registry.h"
...@@ -38,7 +40,7 @@ TEST(conv2d_x86, init) { ...@@ -38,7 +40,7 @@ TEST(conv2d_x86, init) {
TEST(conv2d_x86, run_test) { TEST(conv2d_x86, run_test) {
lite::Tensor x, filter, b, out; lite::Tensor x, filter, b, out;
constexpr int batch_size = 1; const int batch_size = 1;
std::vector<int64_t> x_shape{batch_size, 3, 3, 3}; std::vector<int64_t> x_shape{batch_size, 3, 3, 3};
x.Resize(lite::DDim(x_shape)); x.Resize(lite::DDim(x_shape));
std::vector<int64_t> filter_shape{1, 3, 3, 3}; std::vector<int64_t> filter_shape{1, 3, 3, 3};
...@@ -74,7 +76,10 @@ TEST(conv2d_x86, run_test) { ...@@ -74,7 +76,10 @@ TEST(conv2d_x86, run_test) {
param.paddings = {0, 0}; param.paddings = {0, 0};
param.groups = 1; param.groups = 1;
param.dilations = {1, 1}; param.dilations = {1, 1};
LOG(INFO) << 123;
std::unique_ptr<KernelContext> ctx(new KernelContext);
ctx->As<X86Context>();
conv2d.SetContext(std::move(ctx));
conv2d.SetParam(param); conv2d.SetParam(param);
conv2d.Run(); conv2d.Run();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册