未验证 提交 d6c85c96 编写于 作者: T Tao Luo 提交者: GitHub

paddle::framework::vectorize() templatization (#19627)

test=develop
上级 3aaea4c5
......@@ -281,7 +281,7 @@ class WhileGradOp : public framework::OperatorBase {
auto &inside_tensor = var->Get<framework::LoDTensor>();
framework::AttributeMap attrs;
attrs["dtype"] = inside_tensor.type();
attrs["shape"] = framework::vectorize2int(inside_tensor.dims());
attrs["shape"] = framework::vectorize<int>(inside_tensor.dims());
attrs["value"] = 0.0f;
auto var_name = pg_ig_names[param_id];
......
......@@ -79,11 +79,11 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
cudnn_conv_desc, groups));
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize2int(input->dims()));
layout, framework::vectorize<int>(input->dims()));
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
layout, framework::vectorize2int(output->dims()));
layout, framework::vectorize<int>(output->dims()));
cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor<T>(
layout, framework::vectorize2int(filter->dims()));
layout, framework::vectorize<int>(filter->dims()));
// Now only support NCHW
std::vector<int> bias_dim = {1, static_cast<int>(output->dims()[1]), 1, 1};
cudnnTensorDescriptor_t cudnn_bias_desc =
......
......@@ -64,13 +64,13 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
// (N, M, H, W) or (N, M, D, H, W)
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize2int(input->dims()), groups);
layout, framework::vectorize<int>(input->dims()), groups);
// (N, C, O_h, O_w) or (N, C, O_d, O_h, O_w)
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
layout, framework::vectorize2int(output->dims()), groups);
layout, framework::vectorize<int>(output->dims()), groups);
// (M, C, K_h, K_w) or (M, C, K_d, K_h, K_w)
cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor<T>(
layout, framework::vectorize2int(filter->dims()), groups);
layout, framework::vectorize<int>(filter->dims()), groups);
cudnnConvolutionDescriptor_t cudnn_conv_desc =
conv_desc.descriptor<T>(paddings, strides, dilations);
......@@ -148,13 +148,13 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
// Input: (N, M, H, W) or (N, M, D, H, W)
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize2int(input->dims()), groups);
layout, framework::vectorize<int>(input->dims()), groups);
// Output: (N, C, O_h, O_w) or (N, C, O_d, O_h, O_w)
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
layout, framework::vectorize2int(output_grad->dims()), groups);
layout, framework::vectorize<int>(output_grad->dims()), groups);
// Filter (M, C, K_h, K_w) or (M, C, K_d K_h, K_w)
cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor<T>(
layout, framework::vectorize2int(filter->dims()), groups);
layout, framework::vectorize<int>(filter->dims()), groups);
cudnnConvolutionDescriptor_t cudnn_conv_desc =
conv_desc.descriptor<T>(paddings, strides, dilations);
......
......@@ -61,7 +61,7 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel<T> {
T* temp_data = temp_outs[0]->mutable_data<T>(input->dims(), ctx.GetPlace());
DataLayout layout = DataLayout::kNCHW;
std::vector<int> in_dim = framework::vectorize2int(input->dims());
std::vector<int> in_dim = framework::vectorize<int>(input->dims());
// ------------------- cudnn descriptors ---------------------
PoolingMode pooling_mode;
......@@ -83,9 +83,9 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel<T> {
pool_desc.descriptor(pooling_mode, k3x3, k1x1, k1x1);
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize2int(input->dims()));
layout, framework::vectorize<int>(input->dims()));
cudnnTensorDescriptor_t pool_out_desc = out_pool_desc.descriptor<T>(
layout, framework::vectorize2int(input->dims()));
layout, framework::vectorize<int>(input->dims()));
cudnnDataType_t cudnn_dtype = CudnnDataType<T>::type;
cudnnTensorDescriptor_t* out_desc = new cudnnTensorDescriptor_t[4];
......@@ -126,7 +126,7 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel<T> {
: CUDNN_DATA_FLOAT;
for (int i = 0; i < 4; ++i) {
filter_dims.push_back(framework::vectorize2int(filters[i]->dims()));
filter_dims.push_back(framework::vectorize<int>(filters[i]->dims()));
CUDNN_ENFORCE(platform::dynload::cudnnSetFilterNdDescriptor(
filter_desc[i], cudnn_dtype, format, 4, filter_dims[i].data()));
bias_dims.push_back({1, filter_dims[i][0], 1, 1});
......
......@@ -55,9 +55,9 @@ class CUDNNGridSampleOpKernel : public framework::OpKernel<T> {
ScopedTensorDescriptor input_desc;
ScopedTensorDescriptor output_desc;
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
DataLayout::kNCHW, framework::vectorize2int(input->dims()));
DataLayout::kNCHW, framework::vectorize<int>(input->dims()));
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
DataLayout::kNCHW, framework::vectorize2int(output->dims()));
DataLayout::kNCHW, framework::vectorize<int>(output->dims()));
CUDNN_ENFORCE(platform::dynload::cudnnSpatialTfSamplerForward(
handle, cudnn_st_desc, CudnnDataType<T>::kOne(), cudnn_input_desc,
......@@ -103,13 +103,13 @@ class CUDNNGridSampleGradOpKernel : public framework::OpKernel<T> {
ScopedTensorDescriptor input_grad_desc;
ScopedTensorDescriptor output_grad_desc;
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
DataLayout::kNCHW, framework::vectorize2int(input->dims()));
DataLayout::kNCHW, framework::vectorize<int>(input->dims()));
cudnnTensorDescriptor_t cudnn_input_grad_desc =
input_grad_desc.descriptor<T>(
DataLayout::kNCHW, framework::vectorize2int(input_grad->dims()));
DataLayout::kNCHW, framework::vectorize<int>(input_grad->dims()));
cudnnTensorDescriptor_t cudnn_output_grad_desc =
output_grad_desc.descriptor<T>(
DataLayout::kNCHW, framework::vectorize2int(output_grad->dims()));
DataLayout::kNCHW, framework::vectorize<int>(output_grad->dims()));
CUDNN_ENFORCE(platform::dynload::cudnnSpatialTfSamplerBackward(
handle, cudnn_st_dest, CudnnDataType<T>::kOne(), cudnn_input_desc,
......
......@@ -35,7 +35,7 @@ void SoftmaxCUDNNFunctor<T>::operator()(
// ------------------- cudnn descriptors ---------------------
ScopedTensorDescriptor xDesc;
ScopedTensorDescriptor yDesc;
std::vector<int> cudnn_tensor_dims = framework::vectorize2int(X->dims());
std::vector<int> cudnn_tensor_dims = framework::vectorize<int>(X->dims());
DataLayout layout = DataLayout::kNCHW;
if (cudnn_tensor_dims.size() == 5) {
layout = DataLayout::kNCDHW;
......@@ -64,7 +64,7 @@ void SoftmaxGradCUDNNFunctor<T>::operator()(
ScopedTensorDescriptor yDesc;
ScopedTensorDescriptor dyDesc;
ScopedTensorDescriptor dxDesc;
std::vector<int> cudnn_tensor_dims = framework::vectorize2int(Y->dims());
std::vector<int> cudnn_tensor_dims = framework::vectorize<int>(Y->dims());
DataLayout layout = DataLayout::kNCHW;
if (cudnn_tensor_dims.size() == 5) {
layout = DataLayout::kNCDHW;
......
......@@ -186,7 +186,7 @@ class NCEKernel : public framework::OpKernel<T> {
std::memcpy(x_tensor->data<int64_t>(), labels.data(),
labels.size() * sizeof(int64_t));
std::vector<int> w_dims = paddle::framework::vectorize2int(
std::vector<int> w_dims = paddle::framework::vectorize<int>(
context.Input<Tensor>("Weight")->dims());
w_dims[0] = static_cast<int>(labels.size());
......
......@@ -65,9 +65,9 @@ class PoolCUDNNOpKernel : public framework::OpKernel<T> {
}
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize2int(input->dims()));
layout, framework::vectorize<int>(input->dims()));
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
layout, framework::vectorize2int(output->dims()));
layout, framework::vectorize<int>(output->dims()));
PoolingMode pooling_mode;
if (pooling_type == "max") {
......@@ -132,9 +132,9 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel<T> {
}
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize2int(input->dims()));
layout, framework::vectorize<int>(input->dims()));
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
layout, framework::vectorize2int(output->dims()));
layout, framework::vectorize<int>(output->dims()));
PoolingMode pooling_mode;
if (pooling_type == "max") {
......
......@@ -41,7 +41,7 @@ class CUDAPReluKernel : public framework::OpKernel<T> {
int numel = x->numel();
auto dim = x->dims();
std::vector<int> input_shape = framework::vectorize2int(dim);
std::vector<int> input_shape = framework::vectorize<int>(dim);
if (mode == "channel") {
math::PreluChannelWiseDirectCUDAFunctor<T> prelu_channel_wise;
......@@ -157,7 +157,7 @@ class CUDAPReluGradKernel : public framework::OpKernel<T> {
int numel = x->numel();
auto dim = x->dims();
std::vector<int> input_shape = framework::vectorize2int(dim);
std::vector<int> input_shape = framework::vectorize<int>(dim);
auto stream = context.cuda_device_context().stream();
T* dalpha_tmp_ptr;
......
......@@ -56,7 +56,7 @@ class RandomCropOpInferShape : public framework::InferShapeBase {
auto shape = ctx->Attrs().Get<std::vector<int>>("shape");
auto x_dim = ctx->GetInputDim("X");
PADDLE_ENFORCE_GT(x_dim.size(), static_cast<int64_t>(shape.size()));
auto out_dim = framework::vectorize2int(x_dim);
auto out_dim = framework::vectorize<int>(x_dim);
for (size_t i = 1; i <= shape.size(); ++i) {
size_t x_i = x_dim.size() - i;
size_t shape_i = shape.size() - i;
......
......@@ -395,7 +395,7 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope,
cur_scope.FindVar(inside_grad_name)->Get<framework::LoDTensor>();
framework::AttributeMap attrs;
attrs["dtype"] = inside_tensor.type();
attrs["shape"] = framework::vectorize2int(inside_tensor.dims());
attrs["shape"] = framework::vectorize<int>(inside_tensor.dims());
attrs["value"] = 0.0f;
auto zero_op = framework::OpRegistry::CreateOp(
......
......@@ -251,7 +251,7 @@ void TensorReduce(const framework::Tensor& x, framework::Tensor* y,
std::vector<int> origin_reduce_dims, const Ty& init,
const ReduceOp& reducer, const TransformOp& transformer,
cudaStream_t stream) {
auto x_dim = framework::vectorize2int(x.dims());
auto x_dim = framework::vectorize<int>(x.dims());
std::vector<int> new_x_dim, new_reduce_dims;
int is_reduced = 0;
for (auto e : origin_reduce_dims) {
......
......@@ -107,7 +107,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase {
framework::AttributeMap attrs;
attrs["dtype"] = in_var_tensor.type();
attrs["shape"] = framework::vectorize2int(in_var_tensor.dims());
attrs["shape"] = framework::vectorize<int>(in_var_tensor.dims());
attrs["value"] = 0.0f;
auto zero_op = framework::OpRegistry::CreateOp(
......
......@@ -27,7 +27,7 @@ class SequenceMaskOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) must exist");
int maxlen = ctx->Attrs().Get<int>("maxlen");
auto dim = framework::vectorize2int(ctx->GetInputDim("X"));
auto dim = framework::vectorize<int>(ctx->GetInputDim("X"));
if (ctx->HasInputs("MaxLenTensor")) {
dim.push_back(-1);
......
......@@ -89,7 +89,7 @@ class SequenceMaskKernel : public framework::OpKernel<Tx> {
maxlen = *max_len_tensor->data<int32_t>();
}
auto y_dim = framework::vectorize2int(x->dims());
auto y_dim = framework::vectorize<int>(x->dims());
y_dim.push_back(maxlen);
y->Resize(framework::make_ddim(y_dim));
......@@ -110,7 +110,7 @@ class SequenceMaskKernel : public framework::OpKernel<Tx> {
#else
maxlen = static_cast<int>(*std::max_element(x_data, x_data + x_numel));
#endif
auto y_dim = framework::vectorize2int(x->dims());
auto y_dim = framework::vectorize<int>(x->dims());
y_dim.push_back(maxlen);
y->Resize(framework::make_ddim(y_dim));
}
......
......@@ -81,7 +81,7 @@ class SequencePadOp : public framework::OperatorWithKernel {
std::vector<int> out_dims_vec{out_dim_0, padded_length};
std::vector<int> len_dims_vec{out_dim_0, 1};
auto time_step_dims_vec = framework::vectorize2int(time_step_dims);
auto time_step_dims_vec = framework::vectorize<int>(time_step_dims);
out_dims_vec.insert(out_dims_vec.end(), time_step_dims_vec.begin(),
time_step_dims_vec.end());
ctx->SetOutputDim("Out", framework::make_ddim(out_dims_vec));
......
......@@ -84,9 +84,9 @@ class SliceGradKernel<paddle::platform::CUDADeviceContext,
dim3 threads(PADDLE_CUDA_NUM_THREADS);
auto stream = ctx.cuda_device_context().stream();
auto out_shape = framework::vectorize2int(out_dims);
auto out_shape = framework::vectorize<int>(out_dims);
thrust::device_vector<int> out_dims_vec(out_shape.begin(), out_shape.end());
auto in_shape = framework::vectorize2int(in_dims);
auto in_shape = framework::vectorize<int>(in_dims);
thrust::device_vector<int> in_dims_vec(in_shape.begin(), in_shape.end());
thrust::device_vector<int> offsets_vec(offsets.begin(), offsets.end());
const int* out_dims_ptr = thrust::raw_pointer_cast(out_dims_vec.data());
......
......@@ -111,7 +111,7 @@ class SqueezeOp : public framework::OperatorBase {
auto out_dims = SqueezeOpInferShape::GetOutputShape(axes, x_dims, true);
framework::AttributeMap attrs;
attrs["shape"] = framework::vectorize2int(out_dims);
attrs["shape"] = framework::vectorize<int>(out_dims);
// Invoke Reshape Op
auto reshape_op = framework::OpRegistry::CreateOp(
"reshape", {{"X", {Input("X")}}, {"Shape", {}}},
......@@ -177,7 +177,7 @@ class SqueezeGradOp : public framework::OperatorBase {
auto dout_name = Input(framework::GradVarName("Out"));
auto x_dims = scope.FindVar(Input("X"))->Get<framework::LoDTensor>().dims();
framework::AttributeMap attrs;
attrs["shape"] = framework::vectorize2int(x_dims);
attrs["shape"] = framework::vectorize<int>(x_dims);
auto reshape_op = framework::OpRegistry::CreateOp(
"reshape", {{"X", {dout_name}}, {"Shape", {}}}, {{"Out", {dx_name}}},
......@@ -231,7 +231,7 @@ class Squeeze2Op : public framework::OperatorBase {
auto out_dims = Squeeze2OpInferShape::GetOutputShape(axes, x_dims, true);
framework::AttributeMap attrs;
attrs["shape"] = framework::vectorize2int(out_dims);
attrs["shape"] = framework::vectorize<int>(out_dims);
// Invoke Reshape Op
auto reshape_op = framework::OpRegistry::CreateOp(
"reshape2", {{"X", {Input("X")}}, {"Shape", {}}},
......@@ -284,7 +284,7 @@ class Squeeze2GradOp : public framework::OperatorBase {
auto x_dims = framework::slice_ddim(xshape_dims, 1, xshape_dims.size());
framework::AttributeMap attrs;
attrs["shape"] = framework::vectorize2int(x_dims);
attrs["shape"] = framework::vectorize<int>(x_dims);
auto reshape_op = framework::OpRegistry::CreateOp(
"reshape2", {{"X", {dout_name}}, {"Shape", {}}},
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册