未验证 提交 468c1e43 编写于 作者: H huzhiqiang 提交者: GitHub

[x86] [BUG FIX] Fix x86 kernels and fix the issue x86 compiling fails when...

 [x86] [BUG FIX] Fix x86 kernels and fix the issue x86 compiling fails when `with_profiler=ON` #3478
上级 499b389e
...@@ -129,8 +129,7 @@ struct RowwiseAdd<lite::TargetType::kX86, T> { ...@@ -129,8 +129,7 @@ struct RowwiseAdd<lite::TargetType::kX86, T> {
T* output_data = output->template mutable_data<T>(); T* output_data = output->template mutable_data<T>();
for (int64_t i = 0; i < in_dims[0]; ++i) { for (int64_t i = 0; i < in_dims[0]; ++i) {
for (int64_t j = 0; j < size; ++j) { for (int64_t j = 0; j < size; ++j) {
output_data[i * in_dims[0] + j] = output_data[i * size + j] = input_data[i * size + j] + vector_data[j];
input_data[i * in_dims[0] + j] + vector_data[j];
} }
} }
} }
......
...@@ -279,7 +279,7 @@ struct MergeAdd<lite::TargetType::kX86, T> { ...@@ -279,7 +279,7 @@ struct MergeAdd<lite::TargetType::kX86, T> {
} }
} }
if (has_value_input == nullptr) { if (has_value_input == nullptr) {
VLOG(3) << "no input has value! just return" << std::endl; VLOG(3) << "no input has value! just return";
return; return;
} }
auto input_width = has_value_input->value().dims()[1]; auto input_width = has_value_input->value().dims()[1];
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "lite/core/program.h" #include "lite/core/program.h"
#include "lite/fluid/float16.h"
#ifdef LITE_WITH_OPENCL #ifdef LITE_WITH_OPENCL
#include "lite/backends/opencl/cl_image_converter.h" #include "lite/backends/opencl/cl_image_converter.h"
......
...@@ -24,3 +24,14 @@ REGISTER_LITE_KERNEL( ...@@ -24,3 +24,14 @@ REGISTER_LITE_KERNEL(
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))}) .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.Finalize(); .Finalize();
REGISTER_LITE_KERNEL(
sequence_reshape,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::SequenceReshapeFloatCompute<float>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kFloat))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kFloat))})
.Finalize();
...@@ -62,8 +62,7 @@ class SequenceReshapeCompute ...@@ -62,8 +62,7 @@ class SequenceReshapeCompute
} }
} }
out->Resize(std::vector<int64_t>{static_cast<int64_t>(out->lod()[0].back()), out->Resize(std::vector<int64_t>{in->numel() / out_width, out_width});
out_width});
auto* dst_ptr = out->template mutable_data<T>(); auto* dst_ptr = out->template mutable_data<T>();
auto size = in->numel() * sizeof(T); auto size = in->numel() * sizeof(T);
std::memcpy(dst_ptr, in->template data<T>(), size); std::memcpy(dst_ptr, in->template data<T>(), size);
...@@ -72,6 +71,52 @@ class SequenceReshapeCompute ...@@ -72,6 +71,52 @@ class SequenceReshapeCompute
virtual ~SequenceReshapeCompute() = default; virtual ~SequenceReshapeCompute() = default;
}; };
template <typename T>
class SequenceReshapeFloatCompute
: public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::SequenceReshapeParam;
void Run() override {
auto& param = *param_.get_mutable<operators::SequenceReshapeParam>();
auto* in = param.x;
auto* out = param.output;
auto out_data = out->mutable_data<T>();
for (int i = 0; i < out->dims().production(); i++) {
out_data[i] = 0;
}
int out_width = param.new_dim;
const auto& in_dims = in->dims();
int64_t in_width = in_dims[1];
auto& in_lod = in->lod();
CHECK_EQ(in_lod.size(), 1UL);
CHECK_EQ((uint64_t)in_dims[0], in_lod[0].back());
auto in_lod_l0 = in_lod[0];
int seq_num = in_lod_l0.size() - 1;
if (in_width == out_width) {
out->set_lod(in->lod());
} else {
auto& out_lod = *out->mutable_lod();
out_lod.resize(1);
out_lod[0].resize(seq_num + 1);
out_lod[0][0] = 0;
for (int i = 0; i < seq_num; ++i) {
size_t seq_len = in_lod_l0[i + 1] - in_lod_l0[i];
size_t offset = 0;
offset = (seq_len * in_width) / out_width;
CHECK_EQ(offset * out_width, seq_len * in_width);
out_lod[0][i + 1] = out_lod[0][i] + offset;
}
}
out->Resize(std::vector<int64_t>{in->numel() / out_width, out_width});
auto* dst_ptr = out->mutable_data<T>();
auto size = in->numel() * sizeof(T);
std::memcpy(dst_ptr, in->data<T>(), size);
}
virtual ~SequenceReshapeFloatCompute() = default;
};
} // namespace x86 } // namespace x86
} // namespace kernels } // namespace kernels
} // namespace lite } // namespace lite
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册