提交 104ed75f 编写于 作者: Q qingqing01 提交者: GitHub

Merge pull request #3958 from qingqing01/tensor_numel

Add function to get element count from tensor.
......@@ -81,6 +81,9 @@ class Tensor {
/*! Return the dimensions of the memory block. */
inline const DDim& dims() const;
/*! Return the numel of the memory block. */
inline int64_t numel() const;
/*! Resize the dimensions of the memory block. */
inline Tensor& Resize(const DDim& dims);
......@@ -162,6 +165,12 @@ class Tensor {
/*! points to dimensions of memory block. */
DDim dims_;
/**
* A cache of the number of elements in a tensor.
* Would be 0 for an uninitialized tensor.
*/
int64_t numel_;
/**
* @brief A PlaceHolder may be shared by more than one tensor.
*
......
......@@ -24,7 +24,7 @@ inline void Tensor::check_memory_size() const {
PADDLE_ENFORCE_NOT_NULL(
holder_, "Tenosr holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE_GE(
holder_->size(), product(dims_) * sizeof(T) + offset_,
holder_->size(), numel() * sizeof(T) + offset_,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory.\n"
"or maybe the required data-type mismatches the data already stored.");
......@@ -54,11 +54,11 @@ inline T* Tensor::mutable_data(DDim dims, platform::Place place) {
template <typename T>
inline T* Tensor::mutable_data(platform::Place place) {
static_assert(std::is_pod<T>::value, "T must be POD");
PADDLE_ENFORCE_GT(product(dims_), 0,
PADDLE_ENFORCE_GT(numel(), 0,
"Tensor's numel must be larger than zero to call "
"Tensor::mutable_data. Call Tensor::set_dim first.");
/* some versions of boost::variant don't have operator!= */
int64_t size = product(dims_) * sizeof(T);
int64_t size = numel() * sizeof(T);
if (holder_ == nullptr || !(holder_->place() == place) ||
holder_->size() < size + offset_) {
if (platform::is_cpu_place(place)) {
......@@ -97,7 +97,7 @@ inline void Tensor::CopyFrom(const Tensor& src,
auto dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
auto size = product(src.dims_) * sizeof(T);
auto size = src.numel() * sizeof(T);
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
......@@ -131,7 +131,7 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
PADDLE_ENFORCE_LT(begin_idx, end_idx,
"Begin index must be less than end index.");
PADDLE_ENFORCE_NE(dims_[0], 1, "Can not slice a tensor with dims_[0] = 1.");
size_t base = product(dims_) / dims_[0];
size_t base = numel() / dims_[0];
Tensor dst;
dst.holder_ = holder_;
DDim dst_dims = dims_;
......@@ -143,11 +143,14 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
inline Tensor& Tensor::Resize(const DDim& dims) {
dims_ = dims;
numel_ = product(dims_);
return *this;
}
inline const DDim& Tensor::dims() const { return dims_; }
inline int64_t Tensor::numel() const { return numel_; }
template <typename T>
inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
Tensor res;
......
......@@ -42,7 +42,7 @@ class CosSimKernel : public framework::OpKernel {
output_y_norm->mutable_data<T>(context.GetPlace());
auto dims = input_x->dims();
int size = static_cast<int>(framework::product(dims));
int64_t size = input_x->numel();
auto new_dims = framework::make_ddim({dims[0], size / dims[0]});
auto x = EigenMatrix<T>::From(*input_x, new_dims);
auto y = EigenMatrix<T>::From(*input_y, new_dims);
......@@ -72,7 +72,7 @@ class CosSimGradKernel : public framework::OpKernel {
auto* input_grad_z = context.Input<Tensor>(framework::GradVarName("Out"));
auto dims = input_x->dims();
int size = static_cast<int>(framework::product(dims));
int64_t size = input_x->numel();
auto new_dims = framework::make_ddim({dims[0], size / dims[0]});
auto x = EigenMatrix<T>::From(*input_x, new_dims);
auto y = EigenMatrix<T>::From(*input_y, new_dims);
......
......@@ -31,7 +31,7 @@ class CPUGaussianRandomKernel : public framework::OpKernel {
}
engine.seed(seed);
std::normal_distribution<T> dist(mean, std);
int64_t size = framework::product(tensor->dims());
int64_t size = tensor->numel();
for (int64_t i = 0; i < size; ++i) {
data[i] = dist(engine);
}
......
......@@ -50,8 +50,8 @@ class GPUGaussianRandomKernel : public framework::OpKernel {
T mean = static_cast<T>(context.Attr<float>("mean"));
T std = static_cast<T>(context.Attr<float>("std"));
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
ssize_t N = framework::product(tensor->dims());
thrust::transform(index_sequence_begin, index_sequence_begin + N,
int64_t size = tensor->numel();
thrust::transform(index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(data),
GaussianGenerator<T>(mean, std, seed));
}
......
......@@ -70,7 +70,7 @@ class LookupTableCUDAKernel : public framework::OpKernel {
size_t N = table_t->dims()[0];
size_t D = table_t->dims()[1];
size_t K = product(ids_t->dims());
size_t K = ids_t->numel();
auto ids = ids_t->data<int32_t>();
auto table = table_t->data<T>();
auto output = output_t->mutable_data<T>(context.GetPlace());
......@@ -91,7 +91,7 @@ class LookupTableGradCUDAKernel : public framework::OpKernel {
int N = d_table_t->dims()[0];
int D = d_table_t->dims()[1];
int K = product(ids_t->dims());
int K = ids_t->numel();
const int32_t* ids = ids_t->data<int32_t>();
const T* d_output = d_output_t->data<T>();
T* d_table = d_table_t->mutable_data<T>(context.GetPlace());
......
......@@ -35,7 +35,7 @@ class LookupTableKernel : public framework::OpKernel {
auto ids = ids_t->data<int32_t>();
auto table = table_t->data<T>();
auto output = output_t->mutable_data<T>(context.GetPlace());
for (ssize_t i = 0; i < product(ids_t->dims()); ++i) {
for (int64_t i = 0; i < ids_t->numel(); ++i) {
PADDLE_ENFORCE_LT(ids[i], N);
PADDLE_ENFORCE_GE(ids[i], 0);
memcpy(output + i * D, table + ids[i] * D, D * sizeof(T));
......@@ -61,7 +61,7 @@ class LookupTableGradKernel : public framework::OpKernel {
t.device(context.GetEigenDevice<platform::CPUPlace>()) =
t.constant(static_cast<T>(0));
for (ssize_t i = 0; i < product(ids_t->dims()); ++i) {
for (int64_t i = 0; i < ids_t->numel(); ++i) {
PADDLE_ENFORCE_LT(ids[i], N);
PADDLE_ENFORCE_GE(ids[i], 0);
for (int j = 0; j < D; ++j) {
......
......@@ -49,12 +49,11 @@ class MeanGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto OG = context.Input<Tensor>(framework::GradVarName("Out"));
PADDLE_ENFORCE(framework::product(OG->dims()) == 1,
"Mean Gradient should be scalar");
PADDLE_ENFORCE(OG->numel() == 1, "Mean Gradient should be scalar");
auto IG = context.Output<Tensor>(framework::GradVarName("X"));
IG->mutable_data<T>(context.GetPlace());
T ig_size = (T)framework::product(IG->dims());
T ig_size = static_cast<T>(IG->numel());
Eigen::DSizes<int, 1> bcast(ig_size);
EigenVector<T>::Flatten(*IG).device(context.GetEigenDevice<Place>()) =
......
......@@ -31,8 +31,7 @@ class MinusOp : public framework::OperatorWithKernel {
auto *right_tensor = ctx.Input<framework::Tensor>("Y");
PADDLE_ENFORCE_EQ(
framework::product(left_tensor->dims()),
framework::product(right_tensor->dims()),
left_tensor->numel(), right_tensor->numel(),
"Minus operator must take two tensor with same num of elements");
ctx.Output<framework::Tensor>("Out")->Resize(left_tensor->dims());
}
......
......@@ -41,8 +41,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel {
int rank = framework::arity(x_dims);
PADDLE_ENFORCE_GE(rank, 2, "Tensor rank should be at least equal to 2.");
PADDLE_ENFORCE_EQ(framework::product(x_dims) / x_dims[0],
framework::product(y_dims) / y_dims[0],
PADDLE_ENFORCE_EQ(x->numel() / x_dims[0], y->numel() / y_dims[0],
"Product of dimensions expcet the first dimension of "
"input and target must be equal.");
PADDLE_ENFORCE(y_dims[0] == 1 || y_dims[0] == x_dims[0],
......@@ -50,8 +49,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel {
"or to 1.");
ctx.Output<Tensor>("sub_result")
->Resize({static_cast<int>(x_dims[0]),
static_cast<int>(framework::product(x_dims) / x_dims[0])});
->Resize({x_dims[0], x->numel() / x_dims[0]});
ctx.Output<Tensor>("Out")->Resize({x_dims[0], 1});
}
};
......
......@@ -39,7 +39,7 @@ class SquaredL2DistanceKernel : public framework::OpKernel {
auto in0_dims = in0->dims();
auto in1_dims = in1->dims();
int cols = framework::product(in0_dims) / in0_dims[0];
int cols = in0->numel() / in0_dims[0];
// reduce dimensions except the first
auto x =
EigenMatrix<T>::From(*in0, framework::make_ddim({in0_dims[0], cols}));
......@@ -82,7 +82,7 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel {
auto x_dims = x_g->dims();
auto y_dims = y_g->dims();
int cols = framework::product(x_dims) / x_dims[0];
int cols = x_g->numel() / x_dims[0];
// calculate gradient
auto grad_mat = 2 *
(out_grad.broadcast(Eigen::array<int, 2>({{1, cols}}))) *
......
......@@ -35,7 +35,7 @@ class CPUUniformRandomKernel : public framework::OpKernel {
std::uniform_real_distribution<T> dist(
static_cast<T>(context.Attr<float>("min")),
static_cast<T>(context.Attr<float>("max")));
int64_t size = framework::product(tensor->dims());
int64_t size = tensor->numel();
for (int64_t i = 0; i < size; ++i) {
data[i] = dist(engine);
}
......
......@@ -53,8 +53,8 @@ class GPUUniformRandomKernel : public framework::OpKernel {
T min = static_cast<T>(context.Attr<float>("min"));
T max = static_cast<T>(context.Attr<float>("max"));
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
ssize_t N = framework::product(tensor->dims());
thrust::transform(index_sequence_begin, index_sequence_begin + N,
int64_t size = tensor->numel();
thrust::transform(index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(data),
UniformGenerator<T>(min, max, seed));
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册