未验证 提交 dead24dd 编写于 作者: Z zyfncg 提交者: GitHub

[Phi] Support setting size of vector<Tensor> for out in yaml (#41576)

* support setting vector out size in yaml

* support setting size of vector<tensor> for out in yaml
上级 c3e1d257
...@@ -226,7 +226,7 @@ def ParseYamlReturns(string): ...@@ -226,7 +226,7 @@ def ParseYamlReturns(string):
returns = [x.strip() for x in string.strip().split(",")] returns = [x.strip() for x in string.strip().split(",")]
for i in range(len(returns)): for i in range(len(returns)):
ret = returns[i] ret = returns[i].split("{")[0].strip()
ret_name = "" ret_name = ""
if "(" in ret and ")" in ret: if "(" in ret and ")" in ret:
......
...@@ -297,10 +297,10 @@ std::vector<Tensor> split_impl(const Tensor& x, ...@@ -297,10 +297,10 @@ std::vector<Tensor> split_impl(const Tensor& x,
// Calculate the number of out tensors // Calculate the number of out tensors
size_t out_number; size_t out_number;
if (num_or_sections.GetData().size() == 1) { if (num_or_sections.size() == 1) {
out_number = num_or_sections.GetData()[0]; out_number = num_or_sections.GetData()[0];
} else { } else {
out_number = num_or_sections.GetData().size(); out_number = num_or_sections.size();
} }
std::vector<Tensor> out; std::vector<Tensor> out;
...@@ -475,54 +475,6 @@ std::tuple<Tensor, Tensor, Tensor> momentum_impl( ...@@ -475,54 +475,6 @@ std::tuple<Tensor, Tensor, Tensor> momentum_impl(
return api_output; return api_output;
} }
std::vector<Tensor> unbind_impl(const Tensor& input, int axis) {
auto kernel_key_set = ParseKernelKeyByInputArgs(input);
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
Backend kernel_backend = kernel_key.backend();
DataLayout kernel_layout = kernel_key.layout();
DataType kernel_data_type = kernel_key.dtype();
auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"unbind", {kernel_backend, kernel_layout, kernel_data_type});
VLOG(6) << "unbind API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
VLOG(6) << "unbind API kernel: " << kernel;
auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
auto dense_input = PrepareData(input, kernel.InputAt(0), {});
// Calculate the number of out tensors
auto input_shape = input.dims();
if (axis < 0) {
axis = input_shape.size() + axis;
}
auto out_num = input_shape[axis];
std::vector<Tensor> out;
auto dense_outs = SetKernelOutput(out_num, kernel_backend, &out);
std::vector<phi::MetaTensor> meta_outs;
meta_outs.reserve(out_num);
std::vector<phi::MetaTensor*> meta_out_ptrs;
meta_out_ptrs.reserve(out_num);
for (int64_t i = 0; i < out_num; ++i) {
meta_outs.push_back(dense_outs[i]);
meta_out_ptrs.push_back(&meta_outs.back());
}
phi::UnbindInferMeta(MakeMetaTensor(*dense_input), axis, meta_out_ptrs);
using kernel_signature = void (*)(const phi::DeviceContext&,
const phi::DenseTensor&,
int,
std::vector<phi::DenseTensor*>&);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx, *dense_input, axis, dense_outs);
return out;
}
////////////////// Backward(grad) api impls ////////////////////// ////////////////// Backward(grad) api impls //////////////////////
// TODO(chenweihang): the original sum grad op can support higher-level // TODO(chenweihang): the original sum grad op can support higher-level
...@@ -700,71 +652,6 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> batch_norm_impl( ...@@ -700,71 +652,6 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> batch_norm_impl(
return api_output; return api_output;
} }
std::vector<Tensor> concat_grad_impl(const std::vector<Tensor>& x,
const Tensor& out_grad,
const Scalar& axis) {
auto kernel_key_set = ParseKernelKeyByInputArgs(out_grad);
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
Backend kernel_backend = kernel_key.backend();
DataLayout kernel_layout = kernel_key.layout();
DataType kernel_data_type = kernel_key.dtype();
auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"concat_grad", {kernel_backend, kernel_layout, kernel_data_type});
VLOG(6) << "concat_grad API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
VLOG(6) << "concat_grad API kernel: " << kernel;
auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
// std::unique_ptr<std::vector<phi::DenseTensor>>
auto dense_x = PrepareData(x, kernel.InputAt(0), {});
auto dense_out_grad = PrepareData(out_grad, kernel.InputAt(1), {});
// Calculate the number of out tensors
size_t out_number = x.size();
std::vector<Tensor> x_grad;
auto dense_x_grad = SetKernelOutput(out_number, kernel_backend, &x_grad);
std::vector<phi::MetaTensor> meta_x;
meta_x.reserve(x.size());
std::vector<phi::MetaTensor*> meta_x_ptrs;
meta_x_ptrs.reserve(x.size());
for (const auto& t : *dense_x) {
meta_x.push_back(t);
meta_x_ptrs.push_back(&meta_x.back());
}
std::vector<phi::MetaTensor> meta_x_grad;
meta_x_grad.reserve(x.size());
std::vector<phi::MetaTensor*> meta_x_grad_ptrs;
meta_x_grad_ptrs.reserve(x.size());
for (size_t i = 0; i < out_number; ++i) {
meta_x_grad.push_back(*dense_x_grad[i]);
meta_x_grad_ptrs.push_back(&meta_x_grad.back());
}
phi::UnchangedMultiInferMeta(meta_x_ptrs, meta_x_grad_ptrs);
std::vector<const phi::DenseTensor*> dense_x_ptr;
dense_x_ptr.reserve(x.size());
for (const auto& t : *dense_x) {
dense_x_ptr.push_back(&t);
}
using kernel_signature = void (*)(const platform::DeviceContext&,
const std::vector<const phi::DenseTensor*>&,
const phi::DenseTensor&,
const phi::Scalar&,
std::vector<phi::DenseTensor*>);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(
*dev_ctx, dense_x_ptr, *dense_out_grad, phi::Scalar(axis), dense_x_grad);
return x_grad;
}
Tensor imag_grad_impl(const Tensor& out_grad) { Tensor imag_grad_impl(const Tensor& out_grad) {
phi::KernelKey kernel_key{ParseBackend(out_grad), phi::KernelKey kernel_key{ParseBackend(out_grad),
out_grad.layout(), out_grad.layout(),
...@@ -821,328 +708,5 @@ Tensor real_grad_impl(const Tensor& out_grad) { ...@@ -821,328 +708,5 @@ Tensor real_grad_impl(const Tensor& out_grad) {
return out; return out;
} }
std::vector<Tensor> stack_grad_impl(const std::vector<Tensor>& x,
const Tensor& out_grad,
int axis) {
auto kernel_key_set = ParseKernelKeyByInputArgs(out_grad);
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
Backend kernel_backend = kernel_key.backend();
DataLayout kernel_layout = kernel_key.layout();
DataType kernel_data_type = kernel_key.dtype();
auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"stack_grad", {kernel_backend, kernel_layout, kernel_data_type});
VLOG(6) << "stack_grad API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
VLOG(6) << "stack_grad API kernel: " << kernel;
auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
auto dense_out_grad = PrepareData(out_grad, kernel.InputAt(0), {});
size_t out_number = x.size();
std::vector<Tensor> x_grad;
auto dense_x_grad = SetKernelOutput(out_number, kernel_backend, &x_grad);
std::vector<phi::MetaTensor> meta_x_grad;
meta_x_grad.reserve(out_number);
std::vector<phi::MetaTensor*> meta_x_grad_ptrs;
meta_x_grad_ptrs.reserve(out_number);
for (size_t i = 0; i < out_number; ++i) {
meta_x_grad.push_back(dense_x_grad[i]);
meta_x_grad_ptrs.push_back(&meta_x_grad.back());
}
phi::StackGradInferMeta(
MakeMetaTensor(*dense_out_grad), axis, meta_x_grad_ptrs);
using kernel_signature = void (*)(const platform::DeviceContext&,
const phi::DenseTensor&,
int axis,
std::vector<phi::DenseTensor*>);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx, *dense_out_grad, axis, dense_x_grad);
return x_grad;
}
std::vector<Tensor> meshgrid_impl(const std::vector<Tensor>& inputs) {
Backend kernel_backend = Backend::UNDEFINED;
DataLayout kernel_layout = DataLayout::UNDEFINED;
DataType kernel_data_type = DataType::UNDEFINED;
if (kernel_backend == Backend::UNDEFINED ||
kernel_layout == DataLayout::UNDEFINED ||
kernel_data_type == DataType::UNDEFINED) {
auto kernel_key_set = ParseKernelKeyByInputArgs(inputs);
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
if (kernel_backend == Backend::UNDEFINED) {
kernel_backend = kernel_key.backend();
}
if (kernel_layout == DataLayout::UNDEFINED) {
kernel_layout = kernel_key.layout();
}
if (kernel_data_type == DataType::UNDEFINED) {
kernel_data_type = kernel_key.dtype();
}
}
const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"meshgrid", {kernel_backend, kernel_layout, kernel_data_type});
VLOG(6) << "meshgrid API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
VLOG(6) << "meshgrid API kernel: " << kernel;
auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
auto input_inputs_vec = PrepareData(inputs, kernel.InputAt(0), {});
std::vector<const phi::DenseTensor*> input_inputs(input_inputs_vec->size());
for (size_t i = 0; i < input_inputs.size(); ++i) {
input_inputs[i] = &input_inputs_vec->at(i);
}
auto x_meta_vec = MakeMetaTensor(input_inputs);
std::vector<phi::MetaTensor*> inputs_metas(x_meta_vec.size());
for (size_t i = 0; i < x_meta_vec.size(); ++i) {
inputs_metas[i] = &x_meta_vec[i];
}
// Calculate the number of out tensors
size_t out_number = inputs.size();
std::vector<Tensor> out;
auto dense_outs = SetKernelOutput(out_number, kernel_backend, &out);
std::vector<phi::MetaTensor> meta_outs;
meta_outs.reserve(out_number);
std::vector<phi::MetaTensor*> meta_out_ptrs;
meta_out_ptrs.reserve(out_number);
for (size_t i = 0; i < out_number; ++i) {
meta_outs.push_back(dense_outs[i]);
meta_out_ptrs.push_back(&meta_outs.back());
}
phi::MeshgridInferMeta(inputs_metas, meta_out_ptrs);
using kernel_signature = void (*)(const platform::DeviceContext&,
const std::vector<const phi::DenseTensor*>&,
std::vector<phi::DenseTensor*>&);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx, input_inputs, dense_outs);
return out;
}
std::vector<Tensor> meshgrid_grad_impl(
const std::vector<Tensor>& inputs,
const std::vector<Tensor>& outputs_grad) {
Backend kernel_backend = Backend::UNDEFINED;
DataLayout kernel_layout = DataLayout::UNDEFINED;
DataType kernel_data_type = DataType::UNDEFINED;
if (kernel_backend == Backend::UNDEFINED ||
kernel_layout == DataLayout::UNDEFINED ||
kernel_data_type == DataType::UNDEFINED) {
auto kernel_key_set = ParseKernelKeyByInputArgs(inputs, outputs_grad);
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
if (kernel_backend == Backend::UNDEFINED) {
kernel_backend = kernel_key.backend();
}
if (kernel_layout == DataLayout::UNDEFINED) {
kernel_layout = kernel_key.layout();
}
if (kernel_data_type == DataType::UNDEFINED) {
kernel_data_type = kernel_key.dtype();
}
}
const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"meshgrid_grad", {kernel_backend, kernel_layout, kernel_data_type});
VLOG(6) << "meshgrid_grad API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
VLOG(6) << "meshgrid_grad API kernel: " << kernel;
auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
auto input_inputs_vec = PrepareData(inputs, kernel.InputAt(0), {});
std::vector<const phi::DenseTensor*> input_inputs(input_inputs_vec->size());
for (size_t i = 0; i < input_inputs.size(); ++i) {
input_inputs[i] = &input_inputs_vec->at(i);
}
auto input_outputs_grad_vec =
PrepareData(outputs_grad, kernel.InputAt(1), {});
std::vector<const phi::DenseTensor*> input_outputs_grad(
input_outputs_grad_vec->size());
for (size_t i = 0; i < input_outputs_grad.size(); ++i) {
input_outputs_grad[i] = &input_outputs_grad_vec->at(i);
}
size_t out_number = inputs.size();
std::vector<Tensor> api_output;
auto kernel_out = SetKernelOutput(out_number, kernel_backend, &api_output);
auto inputs_meta_vec = MakeMetaTensor(input_inputs);
std::vector<phi::MetaTensor*> inputs_metas(inputs_meta_vec.size());
for (size_t i = 0; i < inputs_meta_vec.size(); ++i) {
inputs_metas[i] = &inputs_meta_vec[i];
}
auto outputs_grad_meta_vec = MakeMetaTensor(input_outputs_grad);
std::vector<phi::MetaTensor*> outputs_grad_metas(
outputs_grad_meta_vec.size());
for (size_t i = 0; i < outputs_grad_meta_vec.size(); ++i) {
outputs_grad_metas[i] = &outputs_grad_meta_vec[i];
}
std::vector<phi::MetaTensor> meta_outs;
meta_outs.reserve(out_number);
std::vector<phi::MetaTensor*> meta_out_ptrs;
meta_out_ptrs.reserve(out_number);
for (size_t i = 0; i < out_number; ++i) {
meta_outs.push_back(kernel_out[i]);
meta_out_ptrs.push_back(&meta_outs.back());
}
phi::MeshgridGradInferMeta(inputs_metas, outputs_grad_metas, meta_out_ptrs);
using kernel_signature = void (*)(const platform::DeviceContext&,
const std::vector<const phi::DenseTensor*>&,
const std::vector<const phi::DenseTensor*>&,
std::vector<phi::DenseTensor*>&);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx, input_inputs, input_outputs_grad, kernel_out);
return api_output;
}
std::vector<Tensor> multi_dot_grad_impl(const std::vector<Tensor>& x,
const Tensor& out_grad) {
Backend kernel_backend = Backend::UNDEFINED;
DataLayout kernel_layout = DataLayout::UNDEFINED;
DataType kernel_data_type = DataType::UNDEFINED;
if (kernel_backend == Backend::UNDEFINED ||
kernel_layout == DataLayout::UNDEFINED ||
kernel_data_type == DataType::UNDEFINED) {
auto kernel_key_set = ParseKernelKeyByInputArgs(x, out_grad);
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
if (kernel_backend == Backend::UNDEFINED) {
kernel_backend = kernel_key.backend();
}
if (kernel_layout == DataLayout::UNDEFINED) {
kernel_layout = kernel_key.layout();
}
if (kernel_data_type == DataType::UNDEFINED) {
kernel_data_type = kernel_key.dtype();
}
}
VLOG(6) << "multi_dot_grad API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"multi_dot_grad", {kernel_backend, kernel_layout, kernel_data_type});
VLOG(6) << "multi_dot_grad API kernel: " << kernel;
auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
auto input_x_vec = PrepareData(x, kernel.InputAt(0), {});
std::vector<const phi::DenseTensor*> input_x(input_x_vec->size());
for (size_t i = 0; i < input_x.size(); ++i) {
input_x[i] = &input_x_vec->at(i);
}
auto input_out_grad = PrepareData(out_grad, kernel.InputAt(1), {});
size_t out_number = input_x.size();
std::vector<Tensor> api_output;
auto kernel_out = SetKernelOutput(out_number, kernel_backend, &api_output);
auto x_meta_vec = MakeMetaTensor(input_x);
std::vector<phi::MetaTensor*> x_metas(x_meta_vec.size());
for (size_t i = 0; i < x_meta_vec.size(); ++i) {
x_metas[i] = &x_meta_vec[i];
}
std::vector<phi::MetaTensor> meta_outs;
meta_outs.reserve(out_number);
std::vector<phi::MetaTensor*> meta_out_ptrs;
meta_out_ptrs.reserve(out_number);
for (size_t i = 0; i < out_number; ++i) {
meta_outs.push_back(kernel_out[i]);
meta_out_ptrs.push_back(&meta_outs.back());
}
phi::MultiDotGradInferMeta(
x_metas, MakeMetaTensor(*input_out_grad), meta_out_ptrs);
using kernel_signature = void (*)(const platform::DeviceContext&,
const std::vector<const phi::DenseTensor*>&,
const phi::DenseTensor&,
std::vector<phi::DenseTensor*>&);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx, input_x, *input_out_grad, kernel_out);
return api_output;
}
std::vector<Tensor> multiplex_grad_impl(const std::vector<Tensor>& inputs,
const Tensor& ids,
const Tensor& out_grad) {
Backend kernel_backend = Backend::UNDEFINED;
DataLayout kernel_layout = DataLayout::UNDEFINED;
DataType kernel_data_type = DataType::UNDEFINED;
if (kernel_backend == Backend::UNDEFINED ||
kernel_layout == DataLayout::UNDEFINED ||
kernel_data_type == DataType::UNDEFINED) {
auto kernel_key_set = ParseKernelKeyByInputArgs(out_grad);
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
if (kernel_backend == Backend::UNDEFINED) {
kernel_backend = kernel_key.backend();
}
if (kernel_layout == DataLayout::UNDEFINED) {
kernel_layout = kernel_key.layout();
}
if (kernel_data_type == DataType::UNDEFINED) {
kernel_data_type = kernel_key.dtype();
}
}
VLOG(6) << "multiplex_grad API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"multiplex_grad", {kernel_backend, kernel_layout, kernel_data_type});
VLOG(6) << "multiplex_grad API kernel: " << kernel;
auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
auto input_ids = PrepareData(ids, kernel.InputAt(0), {});
auto input_out_grad = PrepareData(out_grad, kernel.InputAt(1), {});
auto out_number = inputs.size();
std::vector<Tensor> api_output;
auto kernel_out = SetKernelOutput(out_number, kernel_backend, &api_output);
std::vector<phi::MetaTensor> meta_outs;
meta_outs.reserve(out_number);
std::vector<phi::MetaTensor*> meta_out_ptrs;
meta_out_ptrs.reserve(out_number);
for (size_t i = 0; i < out_number; ++i) {
meta_outs.push_back(kernel_out[i]);
meta_out_ptrs.push_back(&meta_outs.back());
}
phi::MultiplexGradInferMeta(MakeMetaTensor(*input_ids),
MakeMetaTensor(*input_out_grad),
meta_out_ptrs);
using kernel_signature = void (*)(const platform::DeviceContext&,
const phi::DenseTensor&,
const phi::DenseTensor&,
std::vector<phi::DenseTensor*>&);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx, *input_ids, *input_out_grad, kernel_out);
return api_output;
}
} // namespace experimental } // namespace experimental
} // namespace paddle } // namespace paddle
...@@ -30,6 +30,20 @@ namespace experimental { ...@@ -30,6 +30,20 @@ namespace experimental {
////////////////// Forward api impls ////////////////////// ////////////////// Forward api impls //////////////////////
std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> batch_norm_impl(
const Tensor& x,
const Tensor& scale,
const Tensor& bias,
const Tensor& mean,
const Tensor& variance,
float momentum,
float epsilon,
const std::string& data_layout,
bool is_test,
bool use_global_stats,
bool trainable_statistics,
bool fuse_with_relu);
Tensor conv2d_impl(const Tensor& input, Tensor conv2d_impl(const Tensor& input,
const Tensor& filter, const Tensor& filter,
const std::vector<int>& strides, const std::vector<int>& strides,
...@@ -62,8 +76,6 @@ std::vector<Tensor> split_impl(const Tensor& x, ...@@ -62,8 +76,6 @@ std::vector<Tensor> split_impl(const Tensor& x,
const IntArray& num_or_sections, const IntArray& num_or_sections,
const Scalar& axis); const Scalar& axis);
std::vector<Tensor> meshgrid_impl(const std::vector<Tensor>& inputs);
std::tuple<Tensor, Tensor, Tensor> momentum_impl( std::tuple<Tensor, Tensor, Tensor> momentum_impl(
const Tensor& param, const Tensor& param,
const Tensor& grad, const Tensor& grad,
...@@ -77,49 +89,14 @@ std::tuple<Tensor, Tensor, Tensor> momentum_impl( ...@@ -77,49 +89,14 @@ std::tuple<Tensor, Tensor, Tensor> momentum_impl(
bool multi_precision, bool multi_precision,
float rescale_grad); float rescale_grad);
std::vector<Tensor> unbind_impl(const Tensor& input, int axis);
////////////////// Backward(grad) api impls ////////////////////// ////////////////// Backward(grad) api impls //////////////////////
std::vector<Tensor> add_n_grad_impl(const std::vector<Tensor>& x, std::vector<Tensor> add_n_grad_impl(const std::vector<Tensor>& x,
const Tensor& out_grad); const Tensor& out_grad);
std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> batch_norm_impl(
const Tensor& x,
const Tensor& scale,
const Tensor& bias,
const Tensor& mean,
const Tensor& variance,
float momentum,
float epsilon,
const std::string& data_layout,
bool is_test,
bool use_global_stats,
bool trainable_statistics,
bool fuse_with_relu);
/************************ backward api impl ***************************/
std::vector<Tensor> concat_grad_impl(const std::vector<Tensor>& x,
const Tensor& out_grad,
const Scalar& axis);
Tensor imag_grad_impl(const Tensor& x); Tensor imag_grad_impl(const Tensor& x);
Tensor real_grad_impl(const Tensor& x); Tensor real_grad_impl(const Tensor& x);
std::vector<Tensor> stack_grad_impl(const std::vector<Tensor>& x,
const Tensor& out_grad,
int axis);
std::vector<Tensor> meshgrid_grad_impl(const std::vector<Tensor>& inputs,
const std::vector<Tensor>& outputs_grad);
std::vector<Tensor> multi_dot_grad_impl(const std::vector<Tensor>& x,
const Tensor& out_grad);
std::vector<Tensor> multiplex_grad_impl(const std::vector<Tensor>& inputs,
const Tensor& ids,
const Tensor& out_grad);
} // namespace experimental } // namespace experimental
} // namespace paddle } // namespace paddle
...@@ -76,6 +76,16 @@ std::vector<phi::MetaTensor> MakeMetaTensor( ...@@ -76,6 +76,16 @@ std::vector<phi::MetaTensor> MakeMetaTensor(
return meta_tensors; return meta_tensors;
} }
std::vector<phi::MetaTensor> MakeMetaTensor(
const std::vector<phi::DenseTensor*>& tensors) {
std::vector<phi::MetaTensor> meta_tensors;
meta_tensors.reserve(tensors.size());
for (auto* t : tensors) {
meta_tensors.emplace_back(*t);
}
return meta_tensors;
}
phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor) { phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor) {
return phi::MetaTensor(tensor); return phi::MetaTensor(tensor);
} }
......
...@@ -53,6 +53,9 @@ phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor); ...@@ -53,6 +53,9 @@ phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor);
std::vector<phi::MetaTensor> MakeMetaTensor( std::vector<phi::MetaTensor> MakeMetaTensor(
const std::vector<const phi::DenseTensor*>& tensors); const std::vector<const phi::DenseTensor*>& tensors);
std::vector<phi::MetaTensor> MakeMetaTensor(
const std::vector<phi::DenseTensor*>& tensors);
phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor); phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor);
phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor); phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor);
......
...@@ -96,6 +96,8 @@ class IntArrayBase { ...@@ -96,6 +96,8 @@ class IntArrayBase {
template <typename OtherT> template <typename OtherT>
IntArrayBase(const IntArrayBase<OtherT>& other) : array_(other.GetData()) {} IntArrayBase(const IntArrayBase<OtherT>& other) : array_(other.GetData()) {}
size_t size() const { return array_.size(); }
const std::vector<int64_t>& GetData() const { return array_; } const std::vector<int64_t>& GetData() const { return array_; }
private: private:
......
...@@ -1290,8 +1290,11 @@ ...@@ -1290,8 +1290,11 @@
- api : meshgrid - api : meshgrid
args : (Tensor[] inputs) args : (Tensor[] inputs)
output : Tensor[] output : Tensor[]{inputs.size()}
invoke : meshgrid_impl(inputs) infer_meta :
func : MeshgridInferMeta
kernel :
func : meshgrid
backward : meshgrid_grad backward : meshgrid_grad
- api : min - api : min
...@@ -2059,8 +2062,11 @@ ...@@ -2059,8 +2062,11 @@
- api : unbind - api : unbind
args : (Tensor input, int axis) args : (Tensor input, int axis)
output : Tensor[] output : Tensor[] {axis<0 ? input.dims()[input.dims().size()+axis]:input.dims()[axis]}
invoke : unbind_impl(input, axis) infer_meta :
func : UnbindInferMeta
kernel :
func : unbind
backward : unbind_grad backward : unbind_grad
# unfold # unfold
......
...@@ -31,6 +31,7 @@ class BaseAPI(object): ...@@ -31,6 +31,7 @@ class BaseAPI(object):
# outputs: # outputs:
# names : [], list of output names # names : [], list of output names
# types : [], list of output types # types : [], list of output types
# out_size_expr : [], expression for getting size of vector<Tensor>
# return_type : Tensor, vector<Tensor>, ..., the return type of api # return_type : Tensor, vector<Tensor>, ..., the return type of api
# args_str: # args_str:
# args_declare : "str" // str of function params with default value. Example: (..., bool flag=false) # args_declare : "str" // str of function params with default value. Example: (..., bool flag=false)
...@@ -67,11 +68,12 @@ class BaseAPI(object): ...@@ -67,11 +68,12 @@ class BaseAPI(object):
] ]
inputs, attrs, args_str = self.parse_input_and_attr( inputs, attrs, args_str = self.parse_input_and_attr(
api_name, api_item_yaml['args'], optional_vars) api_name, api_item_yaml['args'], optional_vars)
output_type_list, output_names, return_type = self.parse_output( output_type_list, output_names, out_size_expr, return_type = self.parse_output(
api_name, api_item_yaml['output']) api_name, api_item_yaml['output'])
return inputs, attrs, { return inputs, attrs, {
'names': output_names, 'names': output_names,
'types': output_type_list, 'types': output_type_list,
'out_size_expr': out_size_expr,
'return_type': return_type 'return_type': return_type
}, args_str, optional_vars }, args_str, optional_vars
...@@ -184,39 +186,36 @@ class BaseAPI(object): ...@@ -184,39 +186,36 @@ class BaseAPI(object):
'Tensor': 'Tensor', 'Tensor': 'Tensor',
'Tensor[]': 'std::vector<Tensor>' 'Tensor[]': 'std::vector<Tensor>'
} }
if re.search(r'\([a-zA-Z0-9_@]*\)', output_item): result = re.search(
result = re.search( r"(?P<out_type>[a-zA-Z0-9_[\]]+)\s*(?P<name>\([a-zA-Z0-9_@]+\))?\s*(?P<expr>\{[^\}]+\})?",
r"(?P<out_type>[a-zA-Z0-9_[\]]+)\s*\((?P<name>[a-zA-Z0-9_@]+)\)", output_item)
output_item) assert result is not None, f"{api_name} : the output config parse error."
out_type = result.group('out_type') out_type = result.group('out_type')
assert out_type in output_type_map, \ assert out_type in output_type_map, \
f"{api_name} : Output type error: the output type only support Tensor and Tensor[], \ f"{api_name} : Output type error: the output type only support Tensor and Tensor[], \
but now is {out_type}." but now is {out_type}."
return output_type_map[out_type], result.group('name') out_name = 'out' if result.group('name') is None else result.group(
'name')[1:-1]
else: out_size_expr = None if result.group(
if output_item.strip() in output_type_map: 'expr') is None else result.group('expr')[1:-1]
return output_type_map[output_item.strip()], 'out' return output_type_map[out_type], out_name, out_size_expr
else:
raise ValueError(
"{} : Output type error: the output type only support Tensor and Tensor[], \
but now is {}.".format(api_name, output_item.strip()))
temp_list = output_config.split(',') temp_list = output_config.split(',')
if len(temp_list) == 1: if len(temp_list) == 1:
out_type, out_name = parse_output_item(temp_list[0]) out_type, out_name, size_expr = parse_output_item(temp_list[0])
return [out_type], [out_name], self.get_return_type([out_type]) return [out_type], [out_name], size_expr, self.get_return_type(
[out_type])
else: else:
out_type_list = [] out_type_list = []
out_name_list = [] out_name_list = []
for output_item in temp_list: for output_item in temp_list:
out_type, out_name = parse_output_item(output_item) out_type, out_name, size_expr = parse_output_item(output_item)
out_type_list.append(out_type) out_type_list.append(out_type)
out_name_list.append(out_name) out_name_list.append(out_name)
return out_type_list, out_name_list, self.get_return_type( return out_type_list, out_name_list, size_expr, self.get_return_type(
out_type_list) out_type_list)
def parse_infer_meta(self, infer_meta_config): def parse_infer_meta(self, infer_meta_config):
...@@ -462,9 +461,8 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self ...@@ -462,9 +461,8 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self
attr_names = self.attrs['names'] attr_names = self.attrs['names']
infer_meta = self.infer_meta infer_meta = self.infer_meta
infer_meta_params = infer_meta[ infer_meta_params = infer_meta['param'] if infer_meta[
'param'] + kernel_output_names if infer_meta[ 'param'] is not None else input_names + attr_names
'param'] is not None else input_names + attr_names + kernel_output_names
# generate meta tensors # generate meta tensors
meta_tensor_code = "" meta_tensor_code = ""
param_code = "" param_code = ""
...@@ -500,11 +498,6 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self ...@@ -500,11 +498,6 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self
raise ValueError( raise ValueError(
f"{self.api} : Param of infer_meta error : {self.inputs['input_info'][param]} type is not supported." f"{self.api} : Param of infer_meta error : {self.inputs['input_info'][param]} type is not supported."
) )
elif param in kernel_output_names:
meta_tensor_code = meta_tensor_code + code_indent + " phi::MetaTensor " + param.replace(
'kernel_', PREFIX_META_TENSOR_NAME) + "(" + param + ");\n"
param_code = param_code + "&" + param.replace(
'kernel_', PREFIX_META_TENSOR_NAME) + ", "
elif param in attr_names: elif param in attr_names:
param_code = param_code + param + ", " param_code = param_code + param + ", "
elif isinstance(param, str): elif isinstance(param, str):
...@@ -514,6 +507,23 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self ...@@ -514,6 +507,23 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self
else: else:
param_code = param_code + str(param) + ", " param_code = param_code + str(param) + ", "
for i, out_name in enumerate(kernel_output_names):
if self.outputs['types'][i] == 'std::vector<Tensor>':
meta_tensor_code = meta_tensor_code + f"""
{code_indent} auto {out_name}_{PREFIX_META_TENSOR_NAME}vec = MakeMetaTensor({out_name});
{code_indent} std::vector<phi::MetaTensor*> {out_name}_metas({out_name}_{PREFIX_META_TENSOR_NAME}vec.size());
{code_indent} for (size_t i = 0; i < {out_name}_{PREFIX_META_TENSOR_NAME}vec.size(); ++i) {{
{code_indent} {out_name}_metas[i] = &{out_name}_{PREFIX_META_TENSOR_NAME}vec[i];
{code_indent} }}"""
param_code = param_code + out_name + '_metas, '
else:
meta_tensor_code = meta_tensor_code + code_indent + " phi::MetaTensor " + out_name.replace(
'kernel_',
PREFIX_META_TENSOR_NAME) + "(" + out_name + ");\n"
param_code = param_code + "&" + out_name.replace(
'kernel_', PREFIX_META_TENSOR_NAME) + ", "
param_code = param_code[:-2] param_code = param_code[:-2]
return f"""{meta_tensor_code} return f"""{meta_tensor_code}
{code_indent} phi::{infer_meta['func']}({param_code}); {code_indent} phi::{infer_meta['func']}({param_code});
......
...@@ -91,7 +91,16 @@ class ForwardAPI(BaseAPI): ...@@ -91,7 +91,16 @@ class ForwardAPI(BaseAPI):
0]] if inplace_flag and self.inplace_map is not None and self.outputs[ 0]] if inplace_flag and self.inplace_map is not None and self.outputs[
'names'][0] in self.inplace_map else "" 'names'][0] in self.inplace_map else ""
output_create = f""" output_create = f"""
{code_indent} {self.outputs['return_type']} api_output{inplace_assign}; {code_indent} {self.outputs['return_type']} api_output{inplace_assign};"""
if self.outputs['return_type'] == 'std::vector<Tensor>':
assert self.outputs['out_size_expr'] is not None, \
f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api."
output_create = output_create + f"""
{code_indent} auto kernel_out = {set_out_func}({self.outputs['out_size_expr']}, kernel_backend, &api_output);"""
else:
output_create = output_create + f"""
{code_indent} auto kernel_out = {set_out_func}(kernel_backend, &api_output);""" {code_indent} auto kernel_out = {set_out_func}(kernel_backend, &api_output);"""
if not inplace_flag and self.view_map is not None and self.outputs[ if not inplace_flag and self.view_map is not None and self.outputs[
...@@ -113,7 +122,14 @@ class ForwardAPI(BaseAPI): ...@@ -113,7 +122,14 @@ class ForwardAPI(BaseAPI):
output_create = output_create + f""" output_create = output_create + f"""
{code_indent} std::get<{i}>(api_output) = {self.inplace_map[self.outputs['names'][i]]};""" {code_indent} std::get<{i}>(api_output) = {self.inplace_map[self.outputs['names'][i]]};"""
output_create = output_create + f""" if output_type_list[i] == 'std::vector<Tensor>':
assert self.outputs['out_size_expr'][i] is not None, \
f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api."
output_create = output_create + f"""
{code_indent} auto kernel_out_{i} = {set_out_func}({self.outputs['out_size_expr'][i]}, kernel_backend, &std::get<{i}>(api_output));"""
else:
output_create = output_create + f"""
{code_indent} auto kernel_out_{i} = {set_out_func}(kernel_backend, &std::get<{i}>(api_output));""" {code_indent} auto kernel_out_{i} = {set_out_func}(kernel_backend, &std::get<{i}>(api_output));"""
if not inplace_flag and self.view_map is not None and self.outputs[ if not inplace_flag and self.view_map is not None and self.outputs[
......
...@@ -44,7 +44,7 @@ ...@@ -44,7 +44,7 @@
- backward_api : add_n_grad - backward_api : add_n_grad
forward : add_n (Tensor[] x) -> Tensor(out) forward : add_n (Tensor[] x) -> Tensor(out)
args : (Tensor[] x, Tensor out_grad) args : (Tensor[] x, Tensor out_grad)
output : Tensor[](x_grad) output : Tensor[](x_grad){x.size()}
invoke : add_n_grad_impl(x, out_grad) invoke : add_n_grad_impl(x, out_grad)
no_need_buffer : x no_need_buffer : x
...@@ -215,8 +215,12 @@ ...@@ -215,8 +215,12 @@
- backward_api : concat_grad - backward_api : concat_grad
forward : concat (Tensor[] x, Scalar axis) -> Tensor(out) forward : concat (Tensor[] x, Scalar axis) -> Tensor(out)
args : (Tensor[] x, Tensor out_grad, Scalar axis = 0) args : (Tensor[] x, Tensor out_grad, Scalar axis = 0)
output : Tensor[](x_grad) output : Tensor[](x_grad){x.size()}
invoke : concat_grad_impl(x, out_grad, axis) infer_meta :
func : UnchangedMultiInferMeta
param : [x]
kernel :
func : concat_grad
no_need_buffer : x no_need_buffer : x
- backward_api : conj_grad - backward_api : conj_grad
...@@ -944,8 +948,11 @@ ...@@ -944,8 +948,11 @@
- backward_api : meshgrid_grad - backward_api : meshgrid_grad
forward : meshgrid (Tensor[] inputs) -> Tensor[](outputs) forward : meshgrid (Tensor[] inputs) -> Tensor[](outputs)
args : (Tensor[] inputs, Tensor[] outputs_grad) args : (Tensor[] inputs, Tensor[] outputs_grad)
output : Tensor[](inputs_grad) output : Tensor[](inputs_grad){inputs.size()}
invoke : meshgrid_grad_impl(inputs, outputs_grad) infer_meta :
func : MeshgridGradInferMeta
kernel :
func : meshgrid_grad
- backward_api : min_grad - backward_api : min_grad
forward: min (Tensor x, int64_t[] dims={}, bool keep_dim=false) -> Tensor(out) forward: min (Tensor x, int64_t[] dims={}, bool keep_dim=false) -> Tensor(out)
...@@ -1001,14 +1008,22 @@ ...@@ -1001,14 +1008,22 @@
- backward_api : multi_dot_grad - backward_api : multi_dot_grad
forward : multi_dot (Tensor[] x) -> Tensor(out) forward : multi_dot (Tensor[] x) -> Tensor(out)
args : (Tensor[] x, Tensor out_grad) args : (Tensor[] x, Tensor out_grad)
output : Tensor[](x_grad) output : Tensor[](x_grad) {x.size()}
invoke : multi_dot_grad_impl(x, out_grad) infer_meta :
func : MultiDotGradInferMeta
kernel :
func : multi_dot_grad
- backward_api : multiplex_grad - backward_api : multiplex_grad
forward : multiplex (Tensor[] ins, Tensor ids) -> Tensor(out) forward : multiplex (Tensor[] ins, Tensor ids) -> Tensor(out)
args : (Tensor[] ins, Tensor ids, Tensor out_grad) args : (Tensor[] ins, Tensor ids, Tensor out_grad)
output : Tensor[](ins_grad) output : Tensor[](ins_grad){ins.size()}
invoke : multiplex_grad_impl(ins, ids, out_grad) infer_meta :
func : MultiplexGradInferMeta
param : [ids, out_grad]
kernel :
func : multiplex_grad
param : [ids, out_grad]
- backward_api : multiply_grad - backward_api : multiply_grad
forward : multiply (Tensor x, Tensor y) -> Tensor(out) forward : multiply (Tensor x, Tensor y) -> Tensor(out)
...@@ -1448,8 +1463,13 @@ ...@@ -1448,8 +1463,13 @@
- backward_api : stack_grad - backward_api : stack_grad
forward : stack (Tensor[] x, int axis) -> Tensor(out) forward : stack (Tensor[] x, int axis) -> Tensor(out)
args : (Tensor[] x, Tensor out_grad, int axis) args : (Tensor[] x, Tensor out_grad, int axis)
output : Tensor[](x_grad) output : Tensor[](x_grad){x.size()}
invoke : stack_grad_impl(x, out_grad, axis) infer_meta :
func : StackGradInferMeta
param: [out_grad, axis]
kernel :
func : stack_grad
param : [out_grad, axis]
no_need_buffer : x no_need_buffer : x
- backward_api : strided_slice_grad - backward_api : strided_slice_grad
......
...@@ -35,7 +35,7 @@ class BackwardAPI(BaseAPI): ...@@ -35,7 +35,7 @@ class BackwardAPI(BaseAPI):
r"(?P<api>[a-z][a-z0-9_]+)\s*(?P<args>\([^\)]+\))\s*->\s*(?P<outputs>.+)", r"(?P<api>[a-z][a-z0-9_]+)\s*(?P<args>\([^\)]+\))\s*->\s*(?P<outputs>.+)",
forward_config) forward_config)
api = result.group('api') api = result.group('api')
_, outputs, _ = self.parse_output(self.api, result.group('outputs')) _, outputs, _, _ = self.parse_output(self.api, result.group('outputs'))
outputs = [item.split('@')[0] for item in outputs] outputs = [item.split('@')[0] for item in outputs]
fw_inputs, fw_attrs, _, = self.parse_input_and_attr( fw_inputs, fw_attrs, _, = self.parse_input_and_attr(
api, result.group('args')) api, result.group('args'))
...@@ -110,7 +110,16 @@ class BackwardAPI(BaseAPI): ...@@ -110,7 +110,16 @@ class BackwardAPI(BaseAPI):
0]] if inplace_flag and self.inplace_map is not None and self.outputs[ 0]] if inplace_flag and self.inplace_map is not None and self.outputs[
'names'][0] in self.inplace_map else "" 'names'][0] in self.inplace_map else ""
output_create = f""" output_create = f"""
{code_indent} {self.outputs['return_type']} api_output{inplace_assign}; {code_indent} {self.outputs['return_type']} api_output{inplace_assign};"""
if output_type_list[0] == 'std::vector<Tensor>':
assert self.outputs['out_size_expr'] is not None, \
f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api."
output_create = output_create + f"""
{code_indent} auto kernel_out = {set_out_func}({self.outputs['out_size_expr']}, kernel_backend, &api_output);"""
else:
output_create = output_create + f"""
{code_indent} auto kernel_out = {set_out_func}(kernel_backend, &api_output);""" {code_indent} auto kernel_out = {set_out_func}(kernel_backend, &api_output);"""
elif len(output_type_list) > 1: elif len(output_type_list) > 1:
...@@ -121,7 +130,6 @@ class BackwardAPI(BaseAPI): ...@@ -121,7 +130,6 @@ class BackwardAPI(BaseAPI):
kernel_output = kernel_output + f'kernel_out_{i}, ' kernel_output = kernel_output + f'kernel_out_{i}, '
output_names.append(f'kernel_out_{i}') output_names.append(f'kernel_out_{i}')
if out_type_item == 'Tensor': if out_type_item == 'Tensor':
get_out_code = f'&api_output[{i}][0]'
if inplace_flag and self.inplace_map is not None and self.outputs[ if inplace_flag and self.inplace_map is not None and self.outputs[
'names'][i] in self.inplace_map: 'names'][i] in self.inplace_map:
output_create = output_create + f""" output_create = output_create + f"""
...@@ -131,6 +139,9 @@ class BackwardAPI(BaseAPI): ...@@ -131,6 +139,9 @@ class BackwardAPI(BaseAPI):
output_create = output_create + f""" output_create = output_create + f"""
{code_indent} api_output[{i}].emplace_back();""" {code_indent} api_output[{i}].emplace_back();"""
output_create = output_create + f"""
{code_indent} auto kernel_out_{i} = {set_out_func}(kernel_backend, &api_output[{i}][0]);"""
else: else:
get_out_code = f'&api_output[{i}]' get_out_code = f'&api_output[{i}]'
if inplace_flag and self.inplace_map is not None and self.outputs[ if inplace_flag and self.inplace_map is not None and self.outputs[
...@@ -138,8 +149,10 @@ class BackwardAPI(BaseAPI): ...@@ -138,8 +149,10 @@ class BackwardAPI(BaseAPI):
output_create = output_create + f""" output_create = output_create + f"""
{code_indent} api_output[{i}] = {self.inplace_map[self.outputs['names'][i]]};""" {code_indent} api_output[{i}] = {self.inplace_map[self.outputs['names'][i]]};"""
output_create = output_create + f""" assert self.outputs['out_size_expr'][i] is not None, \
{code_indent} auto kernel_out_{i} = {set_out_func}(kernel_backend, {get_out_code});""" f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api."
output_create = output_create + f"""
{code_indent} auto kernel_out_{i} = {set_out_func}({self.outputs['out_size_expr'][i]}, kernel_backend, &api_output[{i}]);"""
kernel_output = kernel_output[:-2] kernel_output = kernel_output[:-2]
else: else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册