提交 104ed75f 编写于 作者: Q qingqing01 提交者: GitHub

Merge pull request #3958 from qingqing01/tensor_numel

Add function to get element count from tensor.
...@@ -81,6 +81,9 @@ class Tensor { ...@@ -81,6 +81,9 @@ class Tensor {
/*! Return the dimensions of the memory block. */ /*! Return the dimensions of the memory block. */
inline const DDim& dims() const; inline const DDim& dims() const;
/*! Return the numel of the memory block. */
inline int64_t numel() const;
/*! Resize the dimensions of the memory block. */ /*! Resize the dimensions of the memory block. */
inline Tensor& Resize(const DDim& dims); inline Tensor& Resize(const DDim& dims);
...@@ -162,6 +165,12 @@ class Tensor { ...@@ -162,6 +165,12 @@ class Tensor {
/*! points to dimensions of memory block. */ /*! points to dimensions of memory block. */
DDim dims_; DDim dims_;
/**
* A cache of the number of elements in a tensor.
* Would be 0 for an uninitialized tensor.
*/
int64_t numel_;
/** /**
* @brief A PlaceHolder may be shared by more than one tensor. * @brief A PlaceHolder may be shared by more than one tensor.
* *
......
...@@ -24,7 +24,7 @@ inline void Tensor::check_memory_size() const { ...@@ -24,7 +24,7 @@ inline void Tensor::check_memory_size() const {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
holder_, "Tenosr holds no memory. Call Tensor::mutable_data first."); holder_, "Tenosr holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
holder_->size(), product(dims_) * sizeof(T) + offset_, holder_->size(), numel() * sizeof(T) + offset_,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data " "Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory.\n" "first to re-allocate memory.\n"
"or maybe the required data-type mismatches the data already stored."); "or maybe the required data-type mismatches the data already stored.");
...@@ -54,11 +54,11 @@ inline T* Tensor::mutable_data(DDim dims, platform::Place place) { ...@@ -54,11 +54,11 @@ inline T* Tensor::mutable_data(DDim dims, platform::Place place) {
template <typename T> template <typename T>
inline T* Tensor::mutable_data(platform::Place place) { inline T* Tensor::mutable_data(platform::Place place) {
static_assert(std::is_pod<T>::value, "T must be POD"); static_assert(std::is_pod<T>::value, "T must be POD");
PADDLE_ENFORCE_GT(product(dims_), 0, PADDLE_ENFORCE_GT(numel(), 0,
"Tensor's numel must be larger than zero to call " "Tensor's numel must be larger than zero to call "
"Tensor::mutable_data. Call Tensor::set_dim first."); "Tensor::mutable_data. Call Tensor::set_dim first.");
/* some versions of boost::variant don't have operator!= */ /* some versions of boost::variant don't have operator!= */
int64_t size = product(dims_) * sizeof(T); int64_t size = numel() * sizeof(T);
if (holder_ == nullptr || !(holder_->place() == place) || if (holder_ == nullptr || !(holder_->place() == place) ||
holder_->size() < size + offset_) { holder_->size() < size + offset_) {
if (platform::is_cpu_place(place)) { if (platform::is_cpu_place(place)) {
...@@ -97,7 +97,7 @@ inline void Tensor::CopyFrom(const Tensor& src, ...@@ -97,7 +97,7 @@ inline void Tensor::CopyFrom(const Tensor& src,
auto dst_ptr = static_cast<void*>(mutable_data<T>(dst_place)); auto dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
auto size = product(src.dims_) * sizeof(T); auto size = src.numel() * sizeof(T);
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
...@@ -131,7 +131,7 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { ...@@ -131,7 +131,7 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
PADDLE_ENFORCE_LT(begin_idx, end_idx, PADDLE_ENFORCE_LT(begin_idx, end_idx,
"Begin index must be less than end index."); "Begin index must be less than end index.");
PADDLE_ENFORCE_NE(dims_[0], 1, "Can not slice a tensor with dims_[0] = 1."); PADDLE_ENFORCE_NE(dims_[0], 1, "Can not slice a tensor with dims_[0] = 1.");
size_t base = product(dims_) / dims_[0]; size_t base = numel() / dims_[0];
Tensor dst; Tensor dst;
dst.holder_ = holder_; dst.holder_ = holder_;
DDim dst_dims = dims_; DDim dst_dims = dims_;
...@@ -143,11 +143,14 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { ...@@ -143,11 +143,14 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
inline Tensor& Tensor::Resize(const DDim& dims) { inline Tensor& Tensor::Resize(const DDim& dims) {
dims_ = dims; dims_ = dims;
numel_ = product(dims_);
return *this; return *this;
} }
inline const DDim& Tensor::dims() const { return dims_; } inline const DDim& Tensor::dims() const { return dims_; }
inline int64_t Tensor::numel() const { return numel_; }
template <typename T> template <typename T>
inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) { inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
Tensor res; Tensor res;
......
...@@ -42,7 +42,7 @@ class CosSimKernel : public framework::OpKernel { ...@@ -42,7 +42,7 @@ class CosSimKernel : public framework::OpKernel {
output_y_norm->mutable_data<T>(context.GetPlace()); output_y_norm->mutable_data<T>(context.GetPlace());
auto dims = input_x->dims(); auto dims = input_x->dims();
int size = static_cast<int>(framework::product(dims)); int64_t size = input_x->numel();
auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); auto new_dims = framework::make_ddim({dims[0], size / dims[0]});
auto x = EigenMatrix<T>::From(*input_x, new_dims); auto x = EigenMatrix<T>::From(*input_x, new_dims);
auto y = EigenMatrix<T>::From(*input_y, new_dims); auto y = EigenMatrix<T>::From(*input_y, new_dims);
...@@ -72,7 +72,7 @@ class CosSimGradKernel : public framework::OpKernel { ...@@ -72,7 +72,7 @@ class CosSimGradKernel : public framework::OpKernel {
auto* input_grad_z = context.Input<Tensor>(framework::GradVarName("Out")); auto* input_grad_z = context.Input<Tensor>(framework::GradVarName("Out"));
auto dims = input_x->dims(); auto dims = input_x->dims();
int size = static_cast<int>(framework::product(dims)); int64_t size = input_x->numel();
auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); auto new_dims = framework::make_ddim({dims[0], size / dims[0]});
auto x = EigenMatrix<T>::From(*input_x, new_dims); auto x = EigenMatrix<T>::From(*input_x, new_dims);
auto y = EigenMatrix<T>::From(*input_y, new_dims); auto y = EigenMatrix<T>::From(*input_y, new_dims);
......
...@@ -31,7 +31,7 @@ class CPUGaussianRandomKernel : public framework::OpKernel { ...@@ -31,7 +31,7 @@ class CPUGaussianRandomKernel : public framework::OpKernel {
} }
engine.seed(seed); engine.seed(seed);
std::normal_distribution<T> dist(mean, std); std::normal_distribution<T> dist(mean, std);
int64_t size = framework::product(tensor->dims()); int64_t size = tensor->numel();
for (int64_t i = 0; i < size; ++i) { for (int64_t i = 0; i < size; ++i) {
data[i] = dist(engine); data[i] = dist(engine);
} }
......
...@@ -50,8 +50,8 @@ class GPUGaussianRandomKernel : public framework::OpKernel { ...@@ -50,8 +50,8 @@ class GPUGaussianRandomKernel : public framework::OpKernel {
T mean = static_cast<T>(context.Attr<float>("mean")); T mean = static_cast<T>(context.Attr<float>("mean"));
T std = static_cast<T>(context.Attr<float>("std")); T std = static_cast<T>(context.Attr<float>("std"));
thrust::counting_iterator<unsigned int> index_sequence_begin(0); thrust::counting_iterator<unsigned int> index_sequence_begin(0);
ssize_t N = framework::product(tensor->dims()); int64_t size = tensor->numel();
thrust::transform(index_sequence_begin, index_sequence_begin + N, thrust::transform(index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(data), thrust::device_ptr<T>(data),
GaussianGenerator<T>(mean, std, seed)); GaussianGenerator<T>(mean, std, seed));
} }
......
...@@ -70,7 +70,7 @@ class LookupTableCUDAKernel : public framework::OpKernel { ...@@ -70,7 +70,7 @@ class LookupTableCUDAKernel : public framework::OpKernel {
size_t N = table_t->dims()[0]; size_t N = table_t->dims()[0];
size_t D = table_t->dims()[1]; size_t D = table_t->dims()[1];
size_t K = product(ids_t->dims()); size_t K = ids_t->numel();
auto ids = ids_t->data<int32_t>(); auto ids = ids_t->data<int32_t>();
auto table = table_t->data<T>(); auto table = table_t->data<T>();
auto output = output_t->mutable_data<T>(context.GetPlace()); auto output = output_t->mutable_data<T>(context.GetPlace());
...@@ -91,7 +91,7 @@ class LookupTableGradCUDAKernel : public framework::OpKernel { ...@@ -91,7 +91,7 @@ class LookupTableGradCUDAKernel : public framework::OpKernel {
int N = d_table_t->dims()[0]; int N = d_table_t->dims()[0];
int D = d_table_t->dims()[1]; int D = d_table_t->dims()[1];
int K = product(ids_t->dims()); int K = ids_t->numel();
const int32_t* ids = ids_t->data<int32_t>(); const int32_t* ids = ids_t->data<int32_t>();
const T* d_output = d_output_t->data<T>(); const T* d_output = d_output_t->data<T>();
T* d_table = d_table_t->mutable_data<T>(context.GetPlace()); T* d_table = d_table_t->mutable_data<T>(context.GetPlace());
......
...@@ -35,7 +35,7 @@ class LookupTableKernel : public framework::OpKernel { ...@@ -35,7 +35,7 @@ class LookupTableKernel : public framework::OpKernel {
auto ids = ids_t->data<int32_t>(); auto ids = ids_t->data<int32_t>();
auto table = table_t->data<T>(); auto table = table_t->data<T>();
auto output = output_t->mutable_data<T>(context.GetPlace()); auto output = output_t->mutable_data<T>(context.GetPlace());
for (ssize_t i = 0; i < product(ids_t->dims()); ++i) { for (int64_t i = 0; i < ids_t->numel(); ++i) {
PADDLE_ENFORCE_LT(ids[i], N); PADDLE_ENFORCE_LT(ids[i], N);
PADDLE_ENFORCE_GE(ids[i], 0); PADDLE_ENFORCE_GE(ids[i], 0);
memcpy(output + i * D, table + ids[i] * D, D * sizeof(T)); memcpy(output + i * D, table + ids[i] * D, D * sizeof(T));
...@@ -61,7 +61,7 @@ class LookupTableGradKernel : public framework::OpKernel { ...@@ -61,7 +61,7 @@ class LookupTableGradKernel : public framework::OpKernel {
t.device(context.GetEigenDevice<platform::CPUPlace>()) = t.device(context.GetEigenDevice<platform::CPUPlace>()) =
t.constant(static_cast<T>(0)); t.constant(static_cast<T>(0));
for (ssize_t i = 0; i < product(ids_t->dims()); ++i) { for (int64_t i = 0; i < ids_t->numel(); ++i) {
PADDLE_ENFORCE_LT(ids[i], N); PADDLE_ENFORCE_LT(ids[i], N);
PADDLE_ENFORCE_GE(ids[i], 0); PADDLE_ENFORCE_GE(ids[i], 0);
for (int j = 0; j < D; ++j) { for (int j = 0; j < D; ++j) {
......
...@@ -49,12 +49,11 @@ class MeanGradKernel : public framework::OpKernel { ...@@ -49,12 +49,11 @@ class MeanGradKernel : public framework::OpKernel {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto OG = context.Input<Tensor>(framework::GradVarName("Out")); auto OG = context.Input<Tensor>(framework::GradVarName("Out"));
PADDLE_ENFORCE(framework::product(OG->dims()) == 1, PADDLE_ENFORCE(OG->numel() == 1, "Mean Gradient should be scalar");
"Mean Gradient should be scalar");
auto IG = context.Output<Tensor>(framework::GradVarName("X")); auto IG = context.Output<Tensor>(framework::GradVarName("X"));
IG->mutable_data<T>(context.GetPlace()); IG->mutable_data<T>(context.GetPlace());
T ig_size = (T)framework::product(IG->dims()); T ig_size = static_cast<T>(IG->numel());
Eigen::DSizes<int, 1> bcast(ig_size); Eigen::DSizes<int, 1> bcast(ig_size);
EigenVector<T>::Flatten(*IG).device(context.GetEigenDevice<Place>()) = EigenVector<T>::Flatten(*IG).device(context.GetEigenDevice<Place>()) =
......
...@@ -31,8 +31,7 @@ class MinusOp : public framework::OperatorWithKernel { ...@@ -31,8 +31,7 @@ class MinusOp : public framework::OperatorWithKernel {
auto *right_tensor = ctx.Input<framework::Tensor>("Y"); auto *right_tensor = ctx.Input<framework::Tensor>("Y");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
framework::product(left_tensor->dims()), left_tensor->numel(), right_tensor->numel(),
framework::product(right_tensor->dims()),
"Minus operator must take two tensor with same num of elements"); "Minus operator must take two tensor with same num of elements");
ctx.Output<framework::Tensor>("Out")->Resize(left_tensor->dims()); ctx.Output<framework::Tensor>("Out")->Resize(left_tensor->dims());
} }
......
...@@ -41,8 +41,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { ...@@ -41,8 +41,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel {
int rank = framework::arity(x_dims); int rank = framework::arity(x_dims);
PADDLE_ENFORCE_GE(rank, 2, "Tensor rank should be at least equal to 2."); PADDLE_ENFORCE_GE(rank, 2, "Tensor rank should be at least equal to 2.");
PADDLE_ENFORCE_EQ(framework::product(x_dims) / x_dims[0], PADDLE_ENFORCE_EQ(x->numel() / x_dims[0], y->numel() / y_dims[0],
framework::product(y_dims) / y_dims[0],
"Product of dimensions expcet the first dimension of " "Product of dimensions expcet the first dimension of "
"input and target must be equal."); "input and target must be equal.");
PADDLE_ENFORCE(y_dims[0] == 1 || y_dims[0] == x_dims[0], PADDLE_ENFORCE(y_dims[0] == 1 || y_dims[0] == x_dims[0],
...@@ -50,8 +49,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { ...@@ -50,8 +49,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel {
"or to 1."); "or to 1.");
ctx.Output<Tensor>("sub_result") ctx.Output<Tensor>("sub_result")
->Resize({static_cast<int>(x_dims[0]), ->Resize({x_dims[0], x->numel() / x_dims[0]});
static_cast<int>(framework::product(x_dims) / x_dims[0])});
ctx.Output<Tensor>("Out")->Resize({x_dims[0], 1}); ctx.Output<Tensor>("Out")->Resize({x_dims[0], 1});
} }
}; };
......
...@@ -39,7 +39,7 @@ class SquaredL2DistanceKernel : public framework::OpKernel { ...@@ -39,7 +39,7 @@ class SquaredL2DistanceKernel : public framework::OpKernel {
auto in0_dims = in0->dims(); auto in0_dims = in0->dims();
auto in1_dims = in1->dims(); auto in1_dims = in1->dims();
int cols = framework::product(in0_dims) / in0_dims[0]; int cols = in0->numel() / in0_dims[0];
// reduce dimensions except the first // reduce dimensions except the first
auto x = auto x =
EigenMatrix<T>::From(*in0, framework::make_ddim({in0_dims[0], cols})); EigenMatrix<T>::From(*in0, framework::make_ddim({in0_dims[0], cols}));
...@@ -82,7 +82,7 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel { ...@@ -82,7 +82,7 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel {
auto x_dims = x_g->dims(); auto x_dims = x_g->dims();
auto y_dims = y_g->dims(); auto y_dims = y_g->dims();
int cols = framework::product(x_dims) / x_dims[0]; int cols = x_g->numel() / x_dims[0];
// calculate gradient // calculate gradient
auto grad_mat = 2 * auto grad_mat = 2 *
(out_grad.broadcast(Eigen::array<int, 2>({{1, cols}}))) * (out_grad.broadcast(Eigen::array<int, 2>({{1, cols}}))) *
......
...@@ -35,7 +35,7 @@ class CPUUniformRandomKernel : public framework::OpKernel { ...@@ -35,7 +35,7 @@ class CPUUniformRandomKernel : public framework::OpKernel {
std::uniform_real_distribution<T> dist( std::uniform_real_distribution<T> dist(
static_cast<T>(context.Attr<float>("min")), static_cast<T>(context.Attr<float>("min")),
static_cast<T>(context.Attr<float>("max"))); static_cast<T>(context.Attr<float>("max")));
int64_t size = framework::product(tensor->dims()); int64_t size = tensor->numel();
for (int64_t i = 0; i < size; ++i) { for (int64_t i = 0; i < size; ++i) {
data[i] = dist(engine); data[i] = dist(engine);
} }
......
...@@ -53,8 +53,8 @@ class GPUUniformRandomKernel : public framework::OpKernel { ...@@ -53,8 +53,8 @@ class GPUUniformRandomKernel : public framework::OpKernel {
T min = static_cast<T>(context.Attr<float>("min")); T min = static_cast<T>(context.Attr<float>("min"));
T max = static_cast<T>(context.Attr<float>("max")); T max = static_cast<T>(context.Attr<float>("max"));
thrust::counting_iterator<unsigned int> index_sequence_begin(0); thrust::counting_iterator<unsigned int> index_sequence_begin(0);
ssize_t N = framework::product(tensor->dims()); int64_t size = tensor->numel();
thrust::transform(index_sequence_begin, index_sequence_begin + N, thrust::transform(index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(data), thrust::device_ptr<T>(data),
UniformGenerator<T>(min, max, seed)); UniformGenerator<T>(min, max, seed));
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册