未验证 提交 7014a76b 编写于 作者: L lijianshe02 提交者: GitHub

add lite x86 ops for ASR test=develop (#1981)

* add lite x86 ops for ASR test=develop

* add lite x86 ops for ASR test=develop

* fix x86 ci run test problems test=develop

* fix mkl path for CI test=develop
上级 04f4775b
......@@ -6,7 +6,7 @@ configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h)
configure_file(warpctc_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/warpctc_lib_path.h)
lite_cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags)
#lite_cc_library(dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml)
lite_cc_library(dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml)
lite_cc_library(target_wrapper_x86 SRCS target_wrapper.cc)
lite_cc_library(x86_cpu_info SRCS cpu_info.cc DEPS xbyak)
......
......@@ -54,8 +54,8 @@ DEFINE_string(
DEFINE_string(mklml_dir, "", "Specify path for loading libmklml_intel.so.");
namespace paddle {
namespace platform {
namespace dynload {
namespace lite {
namespace x86 {
static constexpr char cupti_lib_path[] = CUPTI_LIB_PATH;
static constexpr char warpctc_lib_path[] = WARPCTC_LIB_PATH;
......@@ -258,6 +258,6 @@ void* GetMKLMLDsoHandle() {
#endif
}
} // namespace dynload
} // namespace platform
} // namespace x86
} // namespace lite
} // namespace paddle
......@@ -16,7 +16,7 @@ function(math_library TARGET)
endif()
list(LENGTH cc_srcs cc_srcs_len)
lite_cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps} eigen3)
lite_cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps} eigen3 dynload_mklml)
endfunction()
# please add new math_library in alphabetical order
......
......@@ -483,7 +483,7 @@ void Blas<Target>::MatMul(const lite::Tensor &mat_a,
mat_a.data<T>(),
mat_b.data<T>(),
beta,
mat_out->data<T>());
mat_out->mutable_data<T>());
}
template <>
......@@ -759,7 +759,7 @@ void Blas<Target>::MatMul(const lite::Tensor &mat_a,
mat_a.data<T>(),
mat_b.data<T>(),
beta,
mat_out->data<T>());
mat_out->mutable_data<T>());
} else {
PADDLE_ENFORCE(dim_a.batch_size_ == dim_b.batch_size_ ||
dim_a.batch_size_ == 0 || dim_b.batch_size_ == 0);
......@@ -773,7 +773,7 @@ void Blas<Target>::MatMul(const lite::Tensor &mat_a,
mat_a.data<T>(),
mat_b.data<T>(),
beta,
mat_out->data<T>(),
mat_out->mutable_data<T>(),
dim_a.batch_size_ == 0 ? dim_b.batch_size_ : dim_a.batch_size_,
dim_a.stride_,
dim_b.stride_);
......
......@@ -218,8 +218,13 @@ R *TensorLite::mutable_data(TargetType target) {
template <typename T>
TensorLite TensorLite::Slice(int64_t begin, int64_t end) const {
CHECK_GE(begin, 0);
CHECK_LE(end, dims_[0]);
CHECK_LT(begin, end);
if (dims_[0] == 1) {
return *this;
} else {
int64_t base = numel() / dims_[0];
TensorLite dst;
dst.buffer_ = buffer_;
dst.target_ = target_;
......@@ -228,6 +233,7 @@ TensorLite TensorLite::Slice(int64_t begin, int64_t end) const {
dst.Resize(dst_dims);
dst.offset_ = offset_ + static_cast<size_t>(begin * base) * sizeof(T);
return dst;
}
}
template <typename TensorT>
......
......@@ -23,11 +23,18 @@ add_kernel(scale_compute_x86 X86 basic SRCS scale_compute.cc DEPS ${lite_kernel_
# lite_cc_test(test_fc_compute_x86 SRCS fc_compute_test.cc DEPS fc_compute_x86)
# lite_cc_test(test_conv2d_compute_x86 SRCS conv_compute_test.cc DEPS conv_compute_x86)
# lite_cc_test(test_pool2d_compute_x86 SRCS pool_compute_test.cc DEPS pool_compute_x86)
# lite_cc_test(test_concat_compute_x86 SRCS concat_compute_test.cc DEPS concat_compute_x86)
# lite_cc_test(test_softmax_compute_x86 SRCS softmax_compute_test.cc DEPS softmax_compute_x86)
# lite_cc_test(test_elementwise_compute_x86 SRCS elementwise_compute_test.cc DEPS elementwise_compute_x86)
# lite_cc_test(test_relu_compute_x86 SRCS relu_compute_test.cc DEPS relu_compute_x86)
# lite_cc_test(test_mul_compute_x86 SRCS mul_compute_test.cc DEPS mul_compute_x86 operator)
# lite_cc_test(test_scale_compute_x86 SRCS scale_compute_test.cc DEPS scale_compute_x86)
# lite_cc_test(test_dropout_compute_x86 SRCS dropout_compute_test.cc DEPS dropout_compute_x86)
# lite_cc_test(test_batch_norm_compute_x86 SRCS batch_norm_compute_test.cc DEPS batch_norm_compute_x86)
add_kernel(mul_compute_x86 X86 basic SRCS mul_compute.cc DEPS ${lite_kernel_deps} blas)
add_kernel(concat_compute_x86 X86 basic SRCS concat_compute.cc DEPS ${lite_kernel_deps})
add_kernel(shape_compute_x86 X86 basic SRCS shape_compute.cc DEPS ${lite_kernel_deps})
add_kernel(sequence_pool_compute_x86 X86 basic SRCS sequence_pool_compute.cc DEPS ${lite_kernel_deps} sequence_pooling)
lite_cc_test(test_mul_compute_x86 SRCS mul_compute_test.cc DEPS mul_compute_x86)
lite_cc_test(test_concat_compute_x86 SRCS concat_compute_test.cc DEPS concat_compute_x86)
lite_cc_test(test_sequence_pool_compute_x86 SRCS sequence_pool_compute_test.cc DEPS sequence_pool_compute_x86)
lite_cc_test(test_shape_compute_x86 SRCS shape_compute_test.cc DEPS shape_compute_x86)
......@@ -18,13 +18,20 @@
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "lite/core/types.h"
#include "paddle/fluid/operators/strided_memcpy.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
inline int count(int start_axis, int end_axis, const lite::DDim& dim) {
int count = 1;
for (int i = start_axis; i < end_axis; ++i) {
count *= dim[i];
}
return count;
}
template <typename T>
class ConcatCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
......@@ -33,67 +40,28 @@ class ConcatCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void Run() override {
auto& param = *param_.get_mutable<param_t>();
int64_t axis = static_cast<int64_t>(param.axis);
auto x_dims = param.x[0]->dims();
auto out = param.output;
if (param.x.size() == 1) return;
if (axis == 0 && param.x.size() < 10) {
size_t output_offset = 0;
for (auto* in : param.x) {
if (!in || in->dims().production() == 0UL) {
continue;
}
auto in_stride = framework::stride_numel(in->dims().data());
auto out_stride = framework::stride_numel(out->dims().data());
paddle::operators::StridedNumelCopyWithAxis<T>(
platform::CPUDeviceContext(),
axis,
out->mutable_data<T>() + output_offset,
out_stride,
in->data<T>(),
in_stride,
in_stride[axis]);
output_offset += in_stride[axis];
}
} else {
std::vector<lite::Tensor> inputs;
for (size_t j = 0; j < param.x.size(); ++j) {
if (param.x[j] && param.x[j]->dims().production() > 0) {
inputs.push_back(*param.x[j]);
} else {
continue;
}
}
int num = inputs.size();
int rows = 1;
auto dim_0 = inputs[0].dims();
for (int i = 0; i < axis; ++i) {
rows *= dim_0[i];
}
int out_rows = rows, out_cols = 0;
std::vector<int64_t> input_cols(inputs.size());
for (int i = 0; i < num; ++i) {
int t_cols = inputs[i].dims().production() / rows;
out_cols += t_cols;
input_cols[i] = t_cols;
}
// computation
auto output_data = param.output->template mutable_data<T>();
int col_idx = 0;
for (int j = 0; j < num; ++j) {
int col_len = input_cols[j];
auto input_data = inputs[j].data<float>();
for (int k = 0; k < out_rows; ++k) {
std::memcpy(output_data + k * out_cols + col_idx,
input_data + k * col_len,
sizeof(T) * col_len);
}
col_idx += col_len;
int offset_concat_axis = 0;
int num_concat = count(0, axis, x_dims);
int concat_input_size = count(axis + 1, x_dims.size(), x_dims);
const int top_concat_axis = out->dims()[axis];
for (size_t i = 0; i < param.x.size(); ++i) {
auto bottom_data = param.x[i]->data<T>();
const int64_t bottom_concat_axis = param.x[i]->dims()[axis];
for (int n = 0; n < num_concat; ++n) {
std::memcpy(
output_data +
(n * top_concat_axis + offset_concat_axis) * concat_input_size,
bottom_data + n * bottom_concat_axis * concat_input_size,
(bottom_concat_axis * concat_input_size) * sizeof(T));
}
offset_concat_axis += bottom_concat_axis;
}
}
virtual ~ConcatCompute() = default;
};
......
......@@ -14,7 +14,6 @@
#include "lite/kernels/x86/concat_compute.h"
#include <gtest/gtest.h>
#include <iostream>
#include <vector>
#include "lite/core/op_registry.h"
......@@ -68,11 +67,11 @@ TEST(concat_x86, run_test) {
concat.SetParam(param);
concat.Run();
std::cout << "output: ";
std::vector<float> ref_results = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2};
for (int i = 0; i < out.dims().production(); i++) {
std::cout << out_data[i] << " ";
EXPECT_NEAR(out_data[i], ref_results[i], 1e-3);
}
std::cout << std::endl;
}
} // namespace x86
......
......@@ -25,6 +25,7 @@ REGISTER_LITE_KERNEL(mul,
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
#ifdef LITE_WITH_TRAIN
REGISTER_LITE_KERNEL(mul_grad,
kX86,
kFloat,
......@@ -40,3 +41,4 @@ REGISTER_LITE_KERNEL(mul_grad,
.BindOutput(paddle::framework::GradVarName("Y"),
{LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
#endif
......@@ -13,17 +13,26 @@
// limitations under the License.
#pragma once
#include "lite/backends/x86/math/blas.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "lite/core/types.h"
#include "paddle/fluid/operators/math/blas.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
using Tensor = framework::Tensor;
// using Tensor = framework::Tensor;
inline lite::Tensor ReshapeToMatrix(const lite::Tensor& src, int num_col_dims) {
int rank = src.dims().size();
if (rank == 2) {
return src;
}
lite::Tensor res;
res.ShareDataWith(src);
res.Resize(src.dims().Flatten2D(num_col_dims));
return res;
}
template <typename T>
class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
......@@ -33,36 +42,35 @@ class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void Run() override {
auto& context = ctx_->As<X86Context>();
auto& param = *param_.get_mutable<operators::MulParam>();
CHECK(context.x86_device_context());
// CHECK(context.x86_device_context());
param.output->template mutable_data<T>();
auto* z = param.output;
auto* x = &param.x->raw_tensor();
auto* y = &param.y->raw_tensor();
auto* x = param.x;
auto* y = param.y;
Tensor x_matrix, y_matrix;
if (x->dims().size() > 2) {
x_matrix = framework::ReshapeToMatrix(*x, param.x_num_col_dims);
x_matrix = ReshapeToMatrix(*x, param.x_num_col_dims);
} else {
x_matrix = *x;
}
if (y->dims().size() > 2) {
y_matrix = framework::ReshapeToMatrix(*y, param.y_num_col_dims);
y_matrix = ReshapeToMatrix(*y, param.y_num_col_dims);
} else {
y_matrix = *y;
}
auto* z = &param.output->raw_tensor();
z->mutable_data<T>();
auto z_dim = z->dims();
if (z_dim.size() != 2) {
z->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
}
auto blas = paddle::operators::math::GetBlas<platform::CPUDeviceContext, T>(
*context.x86_device_context());
auto blas = lite::x86::math::GetBlas<lite::TargetType::kX86, T>(context);
blas.MatMul(x_matrix, y_matrix, z);
if (z_dim.size() != 2) {
......@@ -73,6 +81,7 @@ class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
virtual ~MulCompute() = default;
};
#ifdef LITE_WITH_TRAIN
template <typename T>
class MulGradCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
......@@ -142,6 +151,7 @@ class MulGradCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
virtual ~MulGradCompute() = default;
};
#endif
} // namespace x86
} // namespace kernels
......
......@@ -19,7 +19,6 @@
#include <utility>
#include <vector>
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
......@@ -33,7 +32,7 @@ TEST(mul_x86, retrive_op) {
}
TEST(mul_x86, init) {
MulCompute<float> mul;
lite::kernels::x86::MulCompute<float> mul;
ASSERT_EQ(mul.precision(), PRECISION(kFloat));
ASSERT_EQ(mul.target(), TARGET(kX86));
}
......@@ -72,9 +71,10 @@ TEST(mul_x86, run_test) {
mul.SetParam(param);
mul.Run();
LOG(INFO) << "output: ";
std::vector<float> ref_result = {20, 23, 26, 29};
for (int i = 0; i < out.dims().production(); i++) {
LOG(INFO) << out_data[i];
EXPECT_NEAR(out_data[i], ref_result[i], 1e-3);
}
}
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/x86/sequence_pool_compute.h"
REGISTER_LITE_KERNEL(sequence_pool,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::SequencePoolCompute<float>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "lite/backends/x86/math/math_function.h"
#include "lite/backends/x86/math/sequence_pooling.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
template <typename T>
class SequencePoolCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::SequencePoolParam;
void Run() override {
auto& param = *param_.get_mutable<operators::SequencePoolParam>();
auto& context = ctx_->As<X86Context>();
auto* out = param.Out;
auto dims = param.X->dims();
auto lod = param.X->lod();
CHECK_EQ(lod.size(), 1UL);
CHECK_GE(dims[0], static_cast<int64_t>(lod[0].size() - 1));
dims[0] = lod[0].size() - 1;
out->Resize({dims});
out->mutable_data<T>();
lite::Tensor* index = nullptr;
const bool is_test = true;
float pad_value = 0.0;
lite::x86::math::SequencePoolFunctor<lite::TargetType::kX86, T> pool;
pool(context, param.pool_type, pad_value, *param.X, out, is_test, index);
}
virtual ~SequencePoolCompute() = default;
};
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/x86/sequence_pool_compute.h"
#include <gtest/gtest.h>
#include <memory>
#include <utility>
#include <vector>
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
TEST(sequence_pool_x86, retrive_op) {
auto sequence_pool =
KernelRegistry::Global().Create<TARGET(kX86), PRECISION(kFloat)>(
"sequence_pool");
ASSERT_FALSE(sequence_pool.empty());
ASSERT_TRUE(sequence_pool.front());
}
TEST(sequence_pool_x86, init) {
SequencePoolCompute<float> sequence_pool;
ASSERT_EQ(sequence_pool.precision(), PRECISION(kFloat));
ASSERT_EQ(sequence_pool.target(), TARGET(kX86));
}
TEST(sequence_pool_x86, run_test) {
lite::Tensor x, out;
lite::LoD lod;
lod.push_back(std::vector<uint64_t>{0, 10});
x.set_lod(lod);
const size_t second_dim = 8u;
std::vector<int64_t> input_shape{static_cast<int64_t>(lod[0].back()),
static_cast<int64_t>(second_dim)};
lite::DDim in_dims(input_shape);
x.Resize(in_dims);
const size_t out_first_dim = lod[0].size() - 1;
std::vector<int64_t> output_shape{static_cast<int64_t>(out_first_dim),
static_cast<int64_t>(second_dim)};
lite::DDim out_dims(output_shape);
out.Resize(out_dims);
auto x_data = x.mutable_data<float>();
auto out_data = out.mutable_data<float>();
for (int64_t i = 0; i < x.dims().production(); i++) {
x_data[i] = 1.1f * i;
}
SequencePoolCompute<float> sequence_pool;
operators::SequencePoolParam param;
param.X = &x;
param.Out = &out;
std::unique_ptr<KernelContext> ctx(new KernelContext);
ctx->As<X86Context>();
sequence_pool.SetContext(std::move(ctx));
sequence_pool.SetParam(param);
sequence_pool.Run();
std::vector<float> ref_results = {
39.6, 40.7, 41.8, 42.9, 44, 45.1, 46.2, 47.3};
for (int i = 0; i < out.dims().production(); i++) {
EXPECT_NEAR(out_data[i], ref_results[i], 1e-3);
}
}
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_KERNEL(sequence_pool, kX86, kFloat, kNCHW, def);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/x86/shape_compute.h"
REGISTER_LITE_KERNEL(shape,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::ShapeCompute<float>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
template <typename T>
class ShapeCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::ShapeParam;
void Run() override {
auto& param = *param_.get_mutable<operators::ShapeParam>();
// auto& context = context_->As<X86Context>();
auto out_data = param.Out->mutable_data<int32_t>();
auto in_dims = param.X->dims();
for (int i = 0; i < in_dims.size(); ++i) {
out_data[i] = in_dims[i];
}
}
virtual ~ShapeCompute() = default;
};
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/x86/shape_compute.h"
#include <gtest/gtest.h>
#include <vector>
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
TEST(shape_x86, retrive_op) {
auto shape =
KernelRegistry::Global().Create<TARGET(kX86), PRECISION(kFloat)>("shape");
ASSERT_FALSE(shape.empty());
ASSERT_TRUE(shape.front());
}
TEST(shape_x86, init) {
ShapeCompute<float> shape;
ASSERT_EQ(shape.precision(), PRECISION(kFloat));
ASSERT_EQ(shape.target(), TARGET(kX86));
}
TEST(shape_x86, run_test) {
lite::Tensor x, out;
constexpr int batch_size = 1;
std::vector<int64_t> x_shape{batch_size, 1, 3, 3};
x.Resize(lite::DDim(x_shape));
std::vector<int64_t> out_shape{4};
out.Resize(lite::DDim(out_shape));
auto x_data = x.mutable_data<float>();
auto out_data = out.mutable_data<int32_t>();
for (int64_t i = 0; i < x.dims().production(); i++) {
x_data[i] = 1;
}
ShapeCompute<float> shape;
operators::ShapeParam param;
param.X = &x;
param.Out = &out;
shape.SetParam(param);
shape.Run();
std::vector<float> ref_results = {1, 1, 3, 3};
for (int i = 0; i < out.dims().production(); i++) {
EXPECT_NEAR(out_data[i], ref_results[i], 1e-3);
}
}
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_KERNEL(shape, kX86, kFloat, kNCHW, def);
......@@ -88,7 +88,7 @@ add_operator(greater_than extra SRCS compare_op.cc DEPS ${op_DEPS})
add_operator(greater_equal extra SRCS compare_op.cc DEPS ${op_DEPS})
add_operator(read_from_array_op extra SRCS read_from_array_op.cc DEPS ${op_DEPS})
add_operator(beam_search_op extra SRCS beam_search_op.cc DEPS ${op_DEPS})
add_operator(sequence_pool_op_lite extra SRCS sequence_pool_op.cc DEPS ${op_DEPS})
add_operator(sequence_pool extra SRCS sequence_pool_op.cc DEPS ${op_DEPS})
add_operator(lod_reset_op extra SRCS lod_reset_op.cc DEPS ${op_DEPS})
add_operator(is_empty extra SRCS is_empty_op.cc DEPS ${op_DEPS})
add_operator(slice_op_lite extra SRCS slice_op.cc DEPS ${op_DEPS})
......
......@@ -666,7 +666,10 @@ struct BeamSearchParam {
struct SequencePoolParam {
const lite::Tensor* X{};
lite::Tensor* Out{};
std::string pool_type;
std::string pool_type{"AVERAGE"};
#ifdef LITE_WITH_X86
float pad_value{0.0};
#endif
};
struct SequenceExpandParam {
......
......@@ -151,7 +151,7 @@ function build_opencl {
# This method is only called in CI.
function cmake_x86_for_CI {
prepare_workspace # fake an empty __generated_code__.cc to pass cmake.
cmake .. -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags} -DLITE_WITH_PROFILE=ON -DWITH_MKL=OFF \
cmake .. -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags} -DLITE_WITH_PROFILE=ON -DWITH_MKL=ON \
-DLITE_BUILD_EXTRA=ON \
# Compile and execute the gen_code related test, so it will generate some code, and make the compilation reasonable.
......@@ -219,7 +219,7 @@ function test_server {
function build_test_server {
mkdir -p ./build
cd ./build
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/paddle/build/third_party/install/mklml/lib"
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PWD/third_party/install/mklml/lib"
cmake_x86_for_CI
build
......
......@@ -71,6 +71,7 @@ for line in lines:
alias = fields[-1]
key = "USE_LITE_KERNEL(%s, %s, %s, %s, %s);" % (
op, target, precision, layout, alias)
if "_grad" in key: continue
out_lines.append(key)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册