diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 643f875491724bf443bd7727391734377ee6180c..fc54ed697f685f048ee542aa2bebe91e03c6f76c 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -78,6 +78,9 @@ class Tensor { /*! Return the dimensions of the memory block. */ inline const DDim& dims() const; + /*! Return the numel of the memory block. */ + inline int64_t numel() const; + /*! Resize the dimensions of the memory block. */ inline Tensor& Resize(const DDim& dims); @@ -159,6 +162,9 @@ class Tensor { /*! points to dimensions of memory block. */ DDim dims_; + /*! the element count of tensor. */ + int64_t numel_; + /** * @brief A PlaceHolder may be shared by more than one tensor. * diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 94f436294f350e2a39785a09959efb3b17bd00a5..03678784b460d44a592a2dbfdff5f058d0d42bf8 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -24,7 +24,7 @@ inline void Tensor::check_memory_size() const { PADDLE_ENFORCE_NOT_NULL( holder_, "Tenosr holds no memory. Call Tensor::mutable_data first."); PADDLE_ENFORCE_GE( - holder_->size(), product(dims_) * sizeof(T) + offset_, + holder_->size(), numel_ * sizeof(T) + offset_, "Tensor's dims_ is out of bound. Call Tensor::mutable_data " "first to re-allocate memory.\n" "or maybe the required data-type mismatches the data already stored."); @@ -54,11 +54,11 @@ inline T* Tensor::mutable_data(DDim dims, platform::Place place) { template inline T* Tensor::mutable_data(platform::Place place) { static_assert(std::is_pod::value, "T must be POD"); - PADDLE_ENFORCE_GT(product(dims_), 0, + PADDLE_ENFORCE_GT(numel_, 0, "Tensor's numel must be larger than zero to call " "Tensor::mutable_data. Call Tensor::set_dim first."); /* some versions of boost::variant don't have operator!= */ - int64_t size = product(dims_) * sizeof(T); + int64_t size = numel_ * sizeof(T); if (holder_ == nullptr || !(holder_->place() == place) || holder_->size() < size + offset_) { if (platform::is_cpu_place(place)) { @@ -97,7 +97,7 @@ inline void Tensor::CopyFrom(const Tensor& src, auto dst_ptr = static_cast(mutable_data(dst_place)); - auto size = product(src.dims_) * sizeof(T); + auto size = src.numel() * sizeof(T); if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { memory::Copy(boost::get(dst_place), dst_ptr, @@ -131,7 +131,7 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { PADDLE_ENFORCE_LT(begin_idx, end_idx, "Begin index must be less than end index."); PADDLE_ENFORCE_NE(dims_[0], 1, "Can not slice a tensor with dims_[0] = 1."); - size_t base = product(dims_) / dims_[0]; + size_t base = numel_ / dims_[0]; Tensor dst; dst.holder_ = holder_; DDim dst_dims = dims_; @@ -143,10 +143,13 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { inline Tensor& Tensor::Resize(const DDim& dims) { dims_ = dims; + numel_ = product(dims_); return *this; } inline const DDim& Tensor::dims() const { return dims_; } +inline int64_t Tensor::numel() const { return numel_; } + } // namespace framework } // namespace paddle diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h index 9e2bcebe3b5432c157fac895a9bbab5164193dbb..0dc509952578497671a128374f77ce616a520909 100644 --- a/paddle/operators/cos_sim_op.h +++ b/paddle/operators/cos_sim_op.h @@ -42,7 +42,7 @@ class CosSimKernel : public framework::OpKernel { output_y_norm->mutable_data(context.GetPlace()); auto dims = input_x->dims(); - int size = static_cast(framework::product(dims)); + int64_t size = input_x->numel(); auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); auto x = EigenMatrix::From(*input_x, new_dims); auto y = EigenMatrix::From(*input_y, new_dims); @@ -72,7 +72,7 @@ class CosSimGradKernel : public framework::OpKernel { auto* input_grad_z = context.Input(framework::GradVarName("Out")); auto dims = input_x->dims(); - int size = static_cast(framework::product(dims)); + int64_t size = input_x->numel(); auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); auto x = EigenMatrix::From(*input_x, new_dims); auto y = EigenMatrix::From(*input_y, new_dims); diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 6574880c0eb6324b2dd175e39a364d2ef46e735e..3d76516405960c502a46997108049b2db5cab6bf 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -31,7 +31,7 @@ class CPUGaussianRandomKernel : public framework::OpKernel { } engine.seed(seed); std::normal_distribution dist(mean, std); - int64_t size = framework::product(tensor->dims()); + int64_t size = tensor->numel(); for (int64_t i = 0; i < size; ++i) { data[i] = dist(engine); } diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu index d9dbc1dcfe6a6676938d64be93c879ea69148018..2d63b3049988cfc3135a87a57dad56b970df3eab 100644 --- a/paddle/operators/gaussian_random_op.cu +++ b/paddle/operators/gaussian_random_op.cu @@ -50,8 +50,8 @@ class GPUGaussianRandomKernel : public framework::OpKernel { T mean = static_cast(context.Attr("mean")); T std = static_cast(context.Attr("std")); thrust::counting_iterator index_sequence_begin(0); - ssize_t N = framework::product(tensor->dims()); - thrust::transform(index_sequence_begin, index_sequence_begin + N, + int64_t size = tensor->numel(); + thrust::transform(index_sequence_begin, index_sequence_begin + size, thrust::device_ptr(data), GaussianGenerator(mean, std, seed)); } diff --git a/paddle/operators/lookup_table_op.cu b/paddle/operators/lookup_table_op.cu index 27eee3436af8107cef2aa3577ea238be49edf1af..708344046760691aa2da562eb1ee3d8b130c5f18 100644 --- a/paddle/operators/lookup_table_op.cu +++ b/paddle/operators/lookup_table_op.cu @@ -70,7 +70,7 @@ class LookupTableCUDAKernel : public framework::OpKernel { size_t N = table_t->dims()[0]; size_t D = table_t->dims()[1]; - size_t K = product(ids_t->dims()); + size_t K = ids_t->numel(); auto ids = ids_t->data(); auto table = table_t->data(); auto output = output_t->mutable_data(context.GetPlace()); @@ -91,7 +91,7 @@ class LookupTableGradCUDAKernel : public framework::OpKernel { int N = d_table_t->dims()[0]; int D = d_table_t->dims()[1]; - int K = product(ids_t->dims()); + int K = ids_t->numel(); const int32_t* ids = ids_t->data(); const T* d_output = d_output_t->data(); T* d_table = d_table_t->mutable_data(context.GetPlace()); diff --git a/paddle/operators/lookup_table_op.h b/paddle/operators/lookup_table_op.h index 877b36cef4ea9cdaaaf37c97d5e5bfce55b91436..a1298906dd4b4209644fe06584f70169519de01c 100644 --- a/paddle/operators/lookup_table_op.h +++ b/paddle/operators/lookup_table_op.h @@ -35,7 +35,7 @@ class LookupTableKernel : public framework::OpKernel { auto ids = ids_t->data(); auto table = table_t->data(); auto output = output_t->mutable_data(context.GetPlace()); - for (ssize_t i = 0; i < product(ids_t->dims()); ++i) { + for (int64_t i = 0; i < ids_t->numel(); ++i) { PADDLE_ENFORCE_LT(ids[i], N); PADDLE_ENFORCE_GE(ids[i], 0); memcpy(output + i * D, table + ids[i] * D, D * sizeof(T)); @@ -61,7 +61,7 @@ class LookupTableGradKernel : public framework::OpKernel { t.device(context.GetEigenDevice()) = t.constant(static_cast(0)); - for (ssize_t i = 0; i < product(ids_t->dims()); ++i) { + for (int64_t i = 0; i < ids_t->numel(); ++i) { PADDLE_ENFORCE_LT(ids[i], N); PADDLE_ENFORCE_GE(ids[i], 0); for (int j = 0; j < D; ++j) { diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h index 9848af280b62729bef9243052ceae0b7d8f4c6f5..ce31e178d8e375dc59be80a6c05133201308da70 100644 --- a/paddle/operators/mean_op.h +++ b/paddle/operators/mean_op.h @@ -49,12 +49,11 @@ class MeanGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto OG = context.Input(framework::GradVarName("Out")); - PADDLE_ENFORCE(framework::product(OG->dims()) == 1, - "Mean Gradient should be scalar"); + PADDLE_ENFORCE(OG->numel() == 1, "Mean Gradient should be scalar"); auto IG = context.Output(framework::GradVarName("X")); IG->mutable_data(context.GetPlace()); - T ig_size = (T)framework::product(IG->dims()); + T ig_size = static_cast(IG->numel()); Eigen::DSizes bcast(ig_size); EigenVector::Flatten(*IG).device(context.GetEigenDevice()) = diff --git a/paddle/operators/minus_op.cc b/paddle/operators/minus_op.cc index 069fb5e1abc657aa02a50fde352ce88d078c36e1..a4876feb2edf77bd422fa2a7687b0fa7d55dae47 100644 --- a/paddle/operators/minus_op.cc +++ b/paddle/operators/minus_op.cc @@ -31,8 +31,7 @@ class MinusOp : public framework::OperatorWithKernel { auto *right_tensor = ctx.Input("Y"); PADDLE_ENFORCE_EQ( - framework::product(left_tensor->dims()), - framework::product(right_tensor->dims()), + left_tensor->numel(), right_tensor->numel(), "Minus operator must take two tensor with same num of elements"); ctx.Output("Out")->Resize(left_tensor->dims()); } diff --git a/paddle/operators/squared_l2_distance_op.cc b/paddle/operators/squared_l2_distance_op.cc index dc30644a5e7e33d4289e48cac093aa5fde7e75e7..9f51d3efa8ecba894a1023b9de2df451ca85916c 100644 --- a/paddle/operators/squared_l2_distance_op.cc +++ b/paddle/operators/squared_l2_distance_op.cc @@ -41,8 +41,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { int rank = framework::arity(x_dims); PADDLE_ENFORCE_GE(rank, 2, "Tensor rank should be at least equal to 2."); - PADDLE_ENFORCE_EQ(framework::product(x_dims) / x_dims[0], - framework::product(y_dims) / y_dims[0], + PADDLE_ENFORCE_EQ(x->numel() / x_dims[0], y->numel() / y_dims[0], "Product of dimensions expcet the first dimension of " "input and target must be equal."); PADDLE_ENFORCE(y_dims[0] == 1 || y_dims[0] == x_dims[0], @@ -50,8 +49,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { "or to 1."); ctx.Output("sub_result") - ->Resize({static_cast(x_dims[0]), - static_cast(framework::product(x_dims) / x_dims[0])}); + ->Resize({x_dims[0], x->numel() / x_dims[0]}); ctx.Output("Out")->Resize({x_dims[0], 1}); } }; diff --git a/paddle/operators/squared_l2_distance_op.h b/paddle/operators/squared_l2_distance_op.h index ad3347a0b35f3385c5adbcd7ceaa94fe134105e3..097ac04fc09a10b3b624f491a847e281e41a802c 100644 --- a/paddle/operators/squared_l2_distance_op.h +++ b/paddle/operators/squared_l2_distance_op.h @@ -39,7 +39,7 @@ class SquaredL2DistanceKernel : public framework::OpKernel { auto in0_dims = in0->dims(); auto in1_dims = in1->dims(); - int cols = framework::product(in0_dims) / in0_dims[0]; + int cols = in0->numel() / in0_dims[0]; // reduce dimensions except the first auto x = EigenMatrix::From(*in0, framework::make_ddim({in0_dims[0], cols})); @@ -82,7 +82,7 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel { auto x_dims = x_g->dims(); auto y_dims = y_g->dims(); - int cols = framework::product(x_dims) / x_dims[0]; + int cols = x_g->numel() / x_dims[0]; // calculate gradient auto grad_mat = 2 * (out_grad.broadcast(Eigen::array({{1, cols}}))) * diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index f2aeef6c310df8535e67fa3906301a87f8ec4694..b8fbc9b52aecdb5c8d985b5de9bcd7cb85835b60 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -35,7 +35,7 @@ class CPUUniformRandomKernel : public framework::OpKernel { std::uniform_real_distribution dist( static_cast(context.Attr("min")), static_cast(context.Attr("max"))); - int64_t size = framework::product(tensor->dims()); + int64_t size = tensor->numel(); for (int64_t i = 0; i < size; ++i) { data[i] = dist(engine); } diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu index c2c041b144b6ca1f019f972e1301b756ec1c9301..6614b53b3f990d10c82633f3c1f079acea0cd827 100644 --- a/paddle/operators/uniform_random_op.cu +++ b/paddle/operators/uniform_random_op.cu @@ -53,8 +53,8 @@ class GPUUniformRandomKernel : public framework::OpKernel { T min = static_cast(context.Attr("min")); T max = static_cast(context.Attr("max")); thrust::counting_iterator index_sequence_begin(0); - ssize_t N = framework::product(tensor->dims()); - thrust::transform(index_sequence_begin, index_sequence_begin + N, + int64_t size = tensor->numel(); + thrust::transform(index_sequence_begin, index_sequence_begin + size, thrust::device_ptr(data), UniformGenerator(min, max, seed)); }