diff --git a/lite/backends/x86/math/math_function.cc b/lite/backends/x86/math/math_function.cc index 05a10b5a19fbc8e80ee6dd07e67154d9cf6d1b22..cb1781db2199c1b7a12aaec80b1904f65b23b534 100644 --- a/lite/backends/x86/math/math_function.cc +++ b/lite/backends/x86/math/math_function.cc @@ -129,8 +129,7 @@ struct RowwiseAdd { T* output_data = output->template mutable_data(); for (int64_t i = 0; i < in_dims[0]; ++i) { for (int64_t j = 0; j < size; ++j) { - output_data[i * in_dims[0] + j] = - input_data[i * in_dims[0] + j] + vector_data[j]; + output_data[i * size + j] = input_data[i * size + j] + vector_data[j]; } } } diff --git a/lite/backends/x86/math/selected_rows_functor.cc b/lite/backends/x86/math/selected_rows_functor.cc index acb377e31ccac96547fc4f0644332cfad36d66bc..fe7a46f9f04d49ea7b505b8e2ece6b4bdd0ec826 100644 --- a/lite/backends/x86/math/selected_rows_functor.cc +++ b/lite/backends/x86/math/selected_rows_functor.cc @@ -279,7 +279,7 @@ struct MergeAdd { } } if (has_value_input == nullptr) { - VLOG(3) << "no input has value! just return" << std::endl; + VLOG(3) << "no input has value! just return"; return; } auto input_width = has_value_input->value().dims()[1]; diff --git a/lite/core/profile/precision_profiler.h b/lite/core/profile/precision_profiler.h index f29ca5f8409779a2eb9b6b98b91c2c1e3af3c06d..0eebf6a61016a3b399b7a7d4de26a4303f741440 100644 --- a/lite/core/profile/precision_profiler.h +++ b/lite/core/profile/precision_profiler.h @@ -22,6 +22,7 @@ #include #include #include "lite/core/program.h" +#include "lite/fluid/float16.h" #ifdef LITE_WITH_OPENCL #include "lite/backends/opencl/cl_image_converter.h" diff --git a/lite/kernels/x86/sequence_reshape_compute.cc b/lite/kernels/x86/sequence_reshape_compute.cc index ccaeef27d7439b739b298f3b0756e2a2eddef2c1..22e10e94082ca3aef35d0e493e9854709986bcdc 100644 --- a/lite/kernels/x86/sequence_reshape_compute.cc +++ b/lite/kernels/x86/sequence_reshape_compute.cc @@ -24,3 +24,14 @@ REGISTER_LITE_KERNEL( .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))}) .Finalize(); + +REGISTER_LITE_KERNEL( + sequence_reshape, + kX86, + kFloat, + kNCHW, + paddle::lite::kernels::x86::SequenceReshapeFloatCompute, + def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kFloat))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kFloat))}) + .Finalize(); diff --git a/lite/kernels/x86/sequence_reshape_compute.h b/lite/kernels/x86/sequence_reshape_compute.h index d166f8bc3d80d9f87efb0315462daee3296f393f..bc5a1b0a533ac2a13ce316a991a0a4b19ce0c4ef 100644 --- a/lite/kernels/x86/sequence_reshape_compute.h +++ b/lite/kernels/x86/sequence_reshape_compute.h @@ -62,8 +62,7 @@ class SequenceReshapeCompute } } - out->Resize(std::vector{static_cast(out->lod()[0].back()), - out_width}); + out->Resize(std::vector{in->numel() / out_width, out_width}); auto* dst_ptr = out->template mutable_data(); auto size = in->numel() * sizeof(T); std::memcpy(dst_ptr, in->template data(), size); @@ -72,6 +71,52 @@ class SequenceReshapeCompute virtual ~SequenceReshapeCompute() = default; }; +template +class SequenceReshapeFloatCompute + : public KernelLite { + public: + using param_t = operators::SequenceReshapeParam; + + void Run() override { + auto& param = *param_.get_mutable(); + auto* in = param.x; + auto* out = param.output; + auto out_data = out->mutable_data(); + for (int i = 0; i < out->dims().production(); i++) { + out_data[i] = 0; + } + int out_width = param.new_dim; + const auto& in_dims = in->dims(); + int64_t in_width = in_dims[1]; + auto& in_lod = in->lod(); + CHECK_EQ(in_lod.size(), 1UL); + CHECK_EQ((uint64_t)in_dims[0], in_lod[0].back()); + auto in_lod_l0 = in_lod[0]; + int seq_num = in_lod_l0.size() - 1; + if (in_width == out_width) { + out->set_lod(in->lod()); + } else { + auto& out_lod = *out->mutable_lod(); + out_lod.resize(1); + out_lod[0].resize(seq_num + 1); + out_lod[0][0] = 0; + for (int i = 0; i < seq_num; ++i) { + size_t seq_len = in_lod_l0[i + 1] - in_lod_l0[i]; + size_t offset = 0; + offset = (seq_len * in_width) / out_width; + CHECK_EQ(offset * out_width, seq_len * in_width); + out_lod[0][i + 1] = out_lod[0][i] + offset; + } + } + out->Resize(std::vector{in->numel() / out_width, out_width}); + auto* dst_ptr = out->mutable_data(); + auto size = in->numel() * sizeof(T); + std::memcpy(dst_ptr, in->data(), size); + } + + virtual ~SequenceReshapeFloatCompute() = default; +}; + } // namespace x86 } // namespace kernels } // namespace lite