提交 629fab49 编写于 作者: J jiweibo

test=develop

上级 a2e0ab60
......@@ -34,6 +34,7 @@ void* TargetWrapper<TARGET(kHost)>::Malloc(size_t size) {
return r;
}
void TargetWrapper<TARGET(kHost)>::Free(void* ptr) {
ptr=Malloc(1);
if (ptr) {
free(static_cast<void**>(ptr)[-1]);
}
......
......@@ -123,14 +123,13 @@ struct RowwiseAdd<lite::TargetType::kX86, T> {
auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(vector.numel(), size);
PADDLE_ENFORCE_EQ(output->dims(), in_dims);
const T* input_data = input.data<T>();
const T* vector_data = vector.data<T>();
T* output_data = output->mutable_data<T>();
for (int64_t i = 0; i < in_dims[0]; ++i) {
for (int64_t j = 0; j < size; ++j) {
output_data[i * in_dims[0] + j] =
input_data[i * in_dims[0] + j] + vector_data[j];
output_data[i * size + j] =
input_data[i * size + j] + vector_data[j];
}
}
}
......
......@@ -23,6 +23,8 @@ void* TargetMalloc(TargetType target, size_t size) {
case TargetType::kHost:
case TargetType::kX86:
case TargetType::kARM:
data = TargetWrapper<TARGET(kHost)>::Malloc(size);
TargetWrapper<TARGET(kHost)>::Free(data);
data = TargetWrapper<TARGET(kHost)>::Malloc(size);
break;
#ifdef LITE_WITH_CUDA
......
......@@ -21,6 +21,7 @@
#include <string>
#include <vector>
#include "lite/core/program.h"
#include "lite/fluid/float16.h"
#ifdef LITE_WITH_OPENCL
#include "lite/backends/opencl/cl_image_converter.h"
......
......@@ -82,8 +82,7 @@ class FCFunctor {
memcpy(X1_data + i * KK, X + i * K, K * sizeof(T));
}
};
lite::x86::RunParallelFor(0, M, parallel_memcpy_x);
parallel_memcpy_x(0,M);
blas.GEMM(false,
false,
M,
......@@ -104,18 +103,17 @@ class FCFunctor {
memcpy(Y + i * N, Y1_data + i * NN, N * sizeof(T));
}
};
lite::x86::RunParallelFor(0, M, parallel_memcpy_y);
parallel_memcpy_y(0,M);
return;
}
lite::x86::RunParallelFor(0, M, parallel_compute);
parallel_compute(0,M);
} else {
blas.MatMul(M, N, K, X, W, Y);
if (!B) {
if (!B) {
return;
}
lite::x86::RunParallelFor(0, M, parallel_compute);
parallel_compute(0, M);
}
}
};
......@@ -139,7 +137,6 @@ class FcCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto w_dims1 = padding_weights ? w_dims[1] - 4 : w_dims[1];
int M = output->dims().production() / w_dims1;
const T* input_data = input->data<T>();
const T* w_data = w->data<T>();
T* output_data = output->mutable_data<T>();
......
......@@ -17,11 +17,18 @@
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "lite/core/types.h"
#include <chrono>
#include "lite/fluid/eigen.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
template <typename T,
int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = lite::fluid::EigenMatrix<T, MajorType, IndexType>;
// using Tensor = framework::Tensor;
inline lite::Tensor ReshapeToMatrix(const lite::Tensor& src, int num_col_dims) {
int rank = src.dims().size();
......@@ -40,9 +47,7 @@ class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
using param_t = operators::MulParam;
void Run() override {
auto& context = ctx_->As<X86Context>();
auto& param = *param_.get_mutable<operators::MulParam>();
// CHECK(context.x86_device_context());
auto* z = param.output;
......@@ -50,7 +55,6 @@ class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto* y = param.y;
Tensor x_matrix, y_matrix;
if (x->dims().size() > 2) {
x_matrix = ReshapeToMatrix(*x, param.x_num_col_dims);
} else {
......@@ -64,18 +68,10 @@ class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
y_matrix = *y;
}
z->mutable_data<T>();
auto z_dim = z->dims();
if (z_dim.size() != 2) {
z->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
}
auto blas = lite::x86::math::GetBlas<lite::TargetType::kX86, T>(context);
blas.MatMul(x_matrix, y_matrix, z);
if (z_dim.size() != 2) {
z->Resize(z_dim);
}
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> mat_test(x_matrix.mutable_data<T>(), x_matrix.dims()[0], x_matrix.dims()[1]);
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> mat1_test(y_matrix.mutable_data<T>(), y_matrix.dims()[0], y_matrix.dims()[1]);
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> out_test(z->mutable_data<T>(), z->dims()[0], z->dims()[1]);
out_test = mat_test * mat1_test;
}
virtual ~MulCompute() = default;
......
......@@ -24,3 +24,14 @@ REGISTER_LITE_KERNEL(
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.Finalize();
REGISTER_LITE_KERNEL(
sequence_reshape,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::SequenceReshapeFloatCompute<float>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kFloat))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kFloat))})
.Finalize();
......@@ -31,21 +31,16 @@ class SequenceReshapeCompute
void Run() override {
auto& param = *param_.get_mutable<operators::SequenceReshapeParam>();
// auto& context = context_->As<X86Context>();
auto* in = param.x;
auto* out = param.output;
int out_width = param.new_dim;
const auto& in_dims = in->dims();
int64_t in_width = in_dims[1];
auto& in_lod = in->lod();
CHECK_EQ(in_lod.size(), 1UL);
CHECK_EQ((uint64_t)in_dims[0], in_lod[0].back());
auto in_lod_l0 = in_lod[0];
int seq_num = in_lod_l0.size() - 1;
if (in_width == out_width) {
out->set_lod(in->lod());
} else {
......@@ -61,8 +56,7 @@ class SequenceReshapeCompute
out_lod[0][i + 1] = out_lod[0][i] + offset;
}
}
out->Resize(std::vector<int64_t>{static_cast<int64_t>(out->lod()[0].back()),
out->Resize(std::vector<int64_t>{in->numel()/out_width,
out_width});
auto* dst_ptr = out->mutable_data<T>();
auto size = in->numel() * sizeof(T);
......@@ -72,6 +66,53 @@ class SequenceReshapeCompute
virtual ~SequenceReshapeCompute() = default;
};
template <typename T>
class SequenceReshapeFloatCompute
: public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::SequenceReshapeParam;
void Run() override {
auto& param = *param_.get_mutable<operators::SequenceReshapeParam>();
auto* in = param.x;
auto* out = param.output;
auto out_data= out->mutable_data<T>();
for(int i=0;i<out->dims().production(); i++){
out_data[i] = 0;
}
int out_width = param.new_dim;
const auto& in_dims = in->dims();
int64_t in_width = in_dims[1];
auto& in_lod = in->lod();
CHECK_EQ(in_lod.size(), 1UL);
CHECK_EQ((uint64_t)in_dims[0], in_lod[0].back());
auto in_lod_l0 = in_lod[0];
int seq_num = in_lod_l0.size() - 1;
if (in_width == out_width) {
out->set_lod(in->lod());
} else {
auto& out_lod = *out->mutable_lod();
out_lod.resize(1);
out_lod[0].resize(seq_num + 1);
out_lod[0][0] = 0;
for (int i = 0; i < seq_num; ++i) {
size_t seq_len = in_lod_l0[i + 1] - in_lod_l0[i];
size_t offset = 0;
offset = (seq_len * in_width) / out_width;
CHECK_EQ(offset * out_width, seq_len * in_width);
out_lod[0][i + 1] = out_lod[0][i] + offset;
}
}
out->Resize(std::vector<int64_t>{in->numel()/out_width,
out_width});
auto* dst_ptr = out->mutable_data<T>();
auto size = in->numel() * sizeof(T);
std::memcpy(dst_ptr, in->data<T>(), size);
}
virtual ~SequenceReshapeFloatCompute() = default;
};
} // namespace x86
} // namespace kernels
} // namespace lite
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册