diff --git a/lite/backends/x86/math/sequence2batch.cc b/lite/backends/x86/math/sequence2batch.cc index ff215781f1efeb20a0e126a6e39a8f3508131abd..c12c05414d717dce706590a491ccae2384f3bfe5 100644 --- a/lite/backends/x86/math/sequence2batch.cc +++ b/lite/backends/x86/math/sequence2batch.cc @@ -24,12 +24,12 @@ class CopyMatrixRowsFunctor { public: void operator()(const lite::Context& context, const lite::Tensor& src, - std::vector index_lod, + const std::vector& index_lod, lite::Tensor* dst, bool is_src_index) { - size_t* index = index_lod.data(); - auto src_dims = src.dims(); - auto dst_dims = dst->dims(); + const size_t* index = index_lod.data(); + const auto& src_dims = src.dims(); + const auto& dst_dims = dst->dims(); PADDLE_ENFORCE_EQ( src_dims.size(), 2UL, "The src must be matrix with rank 2."); PADDLE_ENFORCE_EQ( diff --git a/lite/backends/x86/math/sequence2batch.h b/lite/backends/x86/math/sequence2batch.h index a97bfaf66607e5ea2efbd6f26f311fb4cd9dab67..a70cc5bf73522f97ab312fc48553b5316dbf8376 100644 --- a/lite/backends/x86/math/sequence2batch.h +++ b/lite/backends/x86/math/sequence2batch.h @@ -19,7 +19,6 @@ limitations under the License. */ #include "lite/core/context.h" #include "lite/core/tensor.h" #include "lite/fluid/eigen.h" -// #include "lite/fluid/lod.h" #include "lite/utils/paddle_enforce.h" namespace paddle { @@ -27,11 +26,6 @@ namespace lite { namespace x86 { namespace math { -template -using EigenMatrix = lite::fluid::EigenMatrix; - template class CopyMatrixRowsFunctor { public: @@ -42,7 +36,7 @@ class CopyMatrixRowsFunctor { // The indexed rows are based on the input index. void operator()(const lite::Context& context, const lite::Tensor& src, - std::vector index_lod, + const std::vector& index_lod, lite::Tensor* dst, bool is_src_index); }; @@ -56,6 +50,7 @@ class LoDTensor2BatchFunctor { // seq_info[3] = {(4, 5, 1), (0, 4, 0), (9, 3, 2)} // struct SeqInfo { + SeqInfo() = default; SeqInfo(int start, int length, int seq_idx) : start(start), length(length), seq_idx(seq_idx) {} int start; @@ -89,10 +84,12 @@ class LoDTensor2BatchFunctor { const auto& lod = lods[0]; - std::vector seq_info; + std::vector seq_info(lod.size() - 1); for (size_t seq_id = 0; seq_id < lod.size() - 1; ++seq_id) { int length = lod[seq_id + 1] - lod[seq_id]; - seq_info.emplace_back(lod[seq_id], length, seq_id); + seq_info[seq_id].start = lod[seq_id]; + seq_info[seq_id].length = length; + seq_info[seq_id].seq_idx = seq_id; } std::sort(seq_info.begin(), seq_info.end(), [](SeqInfo a, SeqInfo b) { @@ -122,21 +119,19 @@ class LoDTensor2BatchFunctor { // The max_seqlen represents batch size after rearranging the // input LodTensor. It is also the maximum length of input sequence. - lite::LoD batch_lods; - batch_lods.emplace_back(std::vector{0}); - batch_lods.emplace_back(std::vector{0}); - batch_lods.emplace_back(std::vector{0}); + LoD* batch_lods = batch->mutable_lod(); + batch_lods->resize(3); // batch_lods[0] is the start positions for batch LoDTensor int max_seqlen = seq_info[0].length; - batch_lods[0].resize(static_cast(max_seqlen + 1)); + batch_lods->at(0).resize(static_cast(max_seqlen + 1)); // batch_lods[1] is the raw index in the input LoDTensor - batch_lods[1].resize(static_cast(lod_tensor.dims()[0])); + batch_lods->at(1).resize(static_cast(lod_tensor.dims()[0])); // batch_lods[2] is the sort order for the input LoDTensor. - batch_lods[2].resize(seq_info.size()); + batch_lods->at(2).resize(seq_info.size()); - size_t* batch_starts = batch_lods[0].data(); - size_t* seq2batch_idx = batch_lods[1].data(); + size_t* batch_starts = batch_lods->at(0).data(); + size_t* seq2batch_idx = batch_lods->at(1).data(); batch_starts[0] = 0; for (int n = 0; n < max_seqlen; n++) { auto batch_id = static_cast(batch_starts[n]); @@ -153,14 +148,13 @@ class LoDTensor2BatchFunctor { } batch_starts[n + 1] = static_cast(batch_id); } - size_t* seq_order = batch_lods[2].data(); + size_t* seq_order = batch_lods->at(2).data(); for (size_t i = 0; i < seq_info.size(); ++i) { seq_order[i] = seq_info[i].seq_idx; } - batch->set_lod(batch_lods); CopyMatrixRowsFunctor to_batch; - to_batch(context, lod_tensor, batch_lods[1], batch, true); + to_batch(context, lod_tensor, batch_lods->at(1), batch, true); } }; diff --git a/lite/backends/x86/math/softmax_impl.h b/lite/backends/x86/math/softmax_impl.h index ae997a8680b9012435d80b4aa9f592a775e62e85..ec45377bc55154a4a36ebc5c3684ab7efeeef88e 100644 --- a/lite/backends/x86/math/softmax_impl.h +++ b/lite/backends/x86/math/softmax_impl.h @@ -99,7 +99,7 @@ class SoftmaxFunctor> { const int axis_dim, const lite::Tensor* X, lite::Tensor* Y) { - auto in_dims = X->dims(); + const auto& in_dims = X->dims(); constexpr int kBatchDim = 0; constexpr int kClassDim = 1; @@ -140,7 +140,7 @@ class SoftmaxFunctor> { const int axis_dim, const lite::Tensor* X, lite::Tensor* Y) { - auto in_dims = X->dims(); + const auto& in_dims = X->dims(); const float* in_data = X->data(); float* out_data = Y->mutable_data(); const int kBatchDim = 0; diff --git a/lite/fluid/eigen.h b/lite/fluid/eigen.h index eac5332b53c857b05aacbfa95ee2e4b9fcd98a93..c3af7e9f6c3588f404c614430bf01f7ab5e099e5 100644 --- a/lite/fluid/eigen.h +++ b/lite/fluid/eigen.h @@ -30,13 +30,20 @@ struct EigenDim { using Type = Eigen::DSizes; static Type From(const lite::DDim& dims) { - PADDLE_ENFORCE(dims.size() == D, "D must match DDim::size"); + PADDLE_ENFORCE_EQ(dims.size(), D, "D must match DDim::size"); Type ret; for (size_t d = 0; d < dims.size(); d++) { ret[d] = dims[d]; } return ret; } + + static Type From(const DDim::value_type length) { + PADDLE_ENFORCE_EQ(D, 1, "D must be 1."); + Type ret; + ret[0] = length; + return ret; + } }; // Interpret paddle::platform::Tensor as EigenTensor and EigenConstTensor. @@ -52,7 +59,7 @@ struct EigenTensor { using ConstType = Eigen::TensorMap>; - static Type From(Tensor& tensor, lite::DDim dims) { // NOLINT + static Type From(Tensor& tensor, const lite::DDim& dims) { // NOLINT return Type(const_cast(tensor.data()), EigenDim::From(dims)); // NOLINT } @@ -61,7 +68,7 @@ struct EigenTensor { return From(tensor, tensor.dims()); } // NOLINT - static ConstType From(const Tensor& tensor, lite::DDim dims) { + static ConstType From(const Tensor& tensor, const lite::DDim& dims) { return ConstType(tensor.data(), EigenDim::From(dims)); } @@ -97,14 +104,15 @@ template { // Flatten reshapes a Tensor into an EigenVector. static typename EigenVector::Type Flatten(Tensor& tensor) { // NOLINT - return EigenVector::From( - tensor, lite::DDim(std::vector({tensor.dims().production()}))); + return typename EigenVector::Type( + const_cast(tensor.data()), + EigenDim<1>::From(tensor.dims().production())); } static typename EigenVector::ConstType Flatten( const Tensor& tensor) { // NOLINT - return EigenVector::From( - tensor, lite::DDim(std::vector({tensor.dims().production()}))); + return typename EigenVector::ConstType( + tensor.data(), EigenDim<1>::From(tensor.dims().production())); } }; diff --git a/lite/kernels/x86/concat_compute.h b/lite/kernels/x86/concat_compute.h index 2c6419a3c3186bcd9b6985ac1ba0659ff72fcf6e..935f0811d4e7a7cbe2ce5fafa61b6d16a25d4a81 100644 --- a/lite/kernels/x86/concat_compute.h +++ b/lite/kernels/x86/concat_compute.h @@ -39,26 +39,28 @@ class ConcatCompute : public KernelLite { void Run() override { auto& param = *param_.get_mutable(); + if (param.x.size() == 1) { + param.output->ShareDataWith(*param.x[0]); + return; + } + int64_t axis = static_cast(param.axis); auto* axis_tensor = param.axis_tensor; if (axis_tensor != nullptr) { auto* axis_tensor_data = axis_tensor->data(); axis = static_cast(axis_tensor_data[0]); } - auto x_dims = param.x[0]->dims(); - auto out = param.output; - if (param.x.size() == 1) { - param.output->ShareDataWith(*param.x[0]); - return; - } - auto output_data = param.output->template mutable_data(); + const auto& x_dims = param.x[0]->dims(); + auto* out = param.output; + T* output_data = param.output->template mutable_data(); + int offset_concat_axis = 0; int num_concat = count(0, axis, x_dims); int concat_input_size = count(axis + 1, x_dims.size(), x_dims); const int top_concat_axis = out->dims()[axis]; for (size_t i = 0; i < param.x.size(); ++i) { - auto bottom_data = param.x[i]->data(); + const T* bottom_data = param.x[i]->data(); const int64_t bottom_concat_axis = param.x[i]->dims()[axis]; for (int n = 0; n < num_concat; ++n) { std::memcpy( diff --git a/lite/kernels/x86/lookup_table_compute.h b/lite/kernels/x86/lookup_table_compute.h index eeafa2e33e518f68042dc932531696b7af4e2f53..1801144f6eeb25a40fa052440b63913bc41a65a3 100644 --- a/lite/kernels/x86/lookup_table_compute.h +++ b/lite/kernels/x86/lookup_table_compute.h @@ -33,15 +33,15 @@ class LookupTableCompute : public KernelLite { auto *ids_t = param.Ids; auto *output_t = param.Out; int64_t padding_idx = param.padding_idx; - auto *ids = ids_t->data(); + const int64_t *ids = ids_t->data(); int64_t ids_numel = ids_t->dims().production(); auto *table_t = param.W; int64_t row_number = table_t->dims()[0]; int64_t row_width = table_t->dims()[1]; - auto *table = table_t->data(); - auto *output = output_t->mutable_data(); + const T *table = table_t->data(); + T *output = output_t->mutable_data(); memset(output, 0, output_t->dims().production() * sizeof(T)); for (int64_t i = 0; i < ids_numel; ++i) { if (padding_idx != -1 && ids[i] == padding_idx) { diff --git a/lite/kernels/x86/reduce_compute.h b/lite/kernels/x86/reduce_compute.h index 655f104ce65906f1904a7cf02d703069b0a7a2bf..f93157c837995792772c86d969312bfa28341ce4 100644 --- a/lite/kernels/x86/reduce_compute.h +++ b/lite/kernels/x86/reduce_compute.h @@ -51,7 +51,7 @@ class ReduceSumCompute : public KernelLite { auto* output = param.output; param.output->mutable_data(); - auto dims = param.dim; + const auto& dims = param.dim; bool keep_dim = param.keep_dim; if (reduce_all) { // Flatten and reduce 1-D tensor diff --git a/lite/kernels/x86/reduce_op_function.h b/lite/kernels/x86/reduce_op_function.h index b3ddab64e4bf8dc72cec3b86398f42269c5a947c..46e1248e070350ca82c73b639f8a924958460901 100644 --- a/lite/kernels/x86/reduce_op_function.h +++ b/lite/kernels/x86/reduce_op_function.h @@ -47,33 +47,23 @@ void ReduceFunctor(const lite::Tensor& input, const std::vector& dims, bool keep_dim) { auto x = EigenTensor::From(input); - auto x_rank = static_cast(x.dimensions().size()); + auto reduce_dim = Eigen::array(); - std::vector dims_ref = dims; - for (size_t i = 0; i < dims_ref.size(); ++i) { - if (dims_ref[i] < 0) dims_ref[i] = x_rank + dims_ref[i]; - reduce_dim[i] = dims_ref[i]; - } - // construct the squeezed output tensor - lite::DDim out_dims = output->dims(); - if (keep_dim && x_rank > 1) { - const int kDelFlag = -2; - auto dims_vector = out_dims.Vectorize(); - for (size_t i = 0; i < dims_ref.size(); ++i) { - dims_vector[dims_ref[i]] = kDelFlag; + auto x_rank = static_cast(x.dimensions().size()); + for (size_t i = 0; i < dims.size(); ++i) { + if (dims[i] < 0) { + reduce_dim[i] = x_rank + dims[i]; + } else { + reduce_dim[i] = dims[i]; } - dims_vector.erase(remove(dims_vector.begin(), dims_vector.end(), kDelFlag), - dims_vector.end()); - out_dims = lite::DDim(dims_vector); } - // auto& place = *context.eigen_device(); - Functor functor; + Functor functor; if (D == 1) { auto out = EigenScalar::From(output); functor(&x, &out, reduce_dim); } else { - auto out = EigenTensor::From(*output, out_dims); + auto out = EigenTensor::From(*output, output->dims()); functor(&x, &out, reduce_dim); } } diff --git a/lite/kernels/x86/sequence_reshape_compute.h b/lite/kernels/x86/sequence_reshape_compute.h index 68a573c2f674edcf0a09cccec730a8d7dbcea844..99f84ebd06e1f5742bbaee9f98ec17aee44bd871 100644 --- a/lite/kernels/x86/sequence_reshape_compute.h +++ b/lite/kernels/x86/sequence_reshape_compute.h @@ -36,11 +36,10 @@ class SequenceReshapeCompute auto* out = param.output; int out_width = param.new_dim; - auto in_dims = in->dims(); + const auto& in_dims = in->dims(); int64_t in_width = in_dims[1]; - // LOG(INFO)<<"sequence_reshape in tensor:"<<*in; - auto& in_lod = in->lod(); + auto& in_lod = in->lod(); CHECK_EQ(in_lod.size(), 1UL); CHECK_EQ((uint64_t)in_dims[0], in_lod[0].back()); @@ -63,13 +62,11 @@ class SequenceReshapeCompute } } - out->Resize(in_dims); + out->Resize(std::vector{static_cast(out->lod()[0].back()), + out_width}); auto* dst_ptr = out->mutable_data(); auto size = in->numel() * sizeof(T); std::memcpy(dst_ptr, in->data(), size); - std::vector out_shape{static_cast(out->lod()[0].back()), - out_width}; - out->Resize(lite::DDim(out_shape)); } virtual ~SequenceReshapeCompute() = default; diff --git a/lite/kernels/x86/softmax_compute.h b/lite/kernels/x86/softmax_compute.h index 169644db05e2fc9b83b11e068e03d6a44d5d06b7..8063cf6566157bb5bd63449ce1655dd024983d1a 100644 --- a/lite/kernels/x86/softmax_compute.h +++ b/lite/kernels/x86/softmax_compute.h @@ -29,7 +29,7 @@ static inline int CanonicalAxis(const int axis, const int rank) { return axis; } -static inline int SizeToAxis(const int axis, lite::DDim dims) { +static inline int SizeToAxis(const int axis, const DDim& dims) { int size = 1; for (int i = 0; i < axis; i++) { size *= dims[i]; @@ -37,7 +37,7 @@ static inline int SizeToAxis(const int axis, lite::DDim dims) { return size; } -static inline int SizeFromAxis(const int axis, lite::DDim dims) { +static inline int SizeFromAxis(const int axis, const DDim& dims) { int size = 1; for (size_t i = axis; i < dims.size(); i++) { size *= dims[i]; @@ -61,13 +61,15 @@ class SoftmaxCompute : public KernelLite { int axis_dim = param.x->dims()[axis]; const int n = SizeToAxis(axis, param.x->dims()); const int d = SizeFromAxis(axis, param.x->dims()); - std::vector shape{n, d}; - lite::Tensor input_2d, out_2d; + DDim shape(std::vector{n, d}); + + Tensor input_2d; + Tensor out_2d; input_2d.ShareDataWith(*param.x); - input_2d.Resize(lite::DDim(shape)); + input_2d.Resize(shape); out_2d.ShareDataWith(*param.output); - out_2d.Resize(lite::DDim(shape)); + out_2d.Resize(shape); lite::x86::math::SoftmaxFunctor()( context, axis_dim, &input_2d, &out_2d);