未验证 提交 aa45f931 编写于 作者: Z zyfncg 提交者: GitHub

Support code auto-gene for optimizer api in yaml (#43915)

* support complexd selected_rows kernel in yaml

* support configuring optimizer api in yaml

* fix data transform bug
上级 78023658
......@@ -224,7 +224,8 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_source_file_tmp}
${api_source_file}
COMMENT "copy_if_different ${api_header_file} ${api_source_file}"
DEPENDS ${api_yaml_file} ${api_gen_file} ${api_gen_base}
DEPENDS ${api_yaml_file} ${legacy_api_yaml_file} ${api_gen_file}
${api_gen_base}
VERBATIM)
# generate backward api
......@@ -240,7 +241,8 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_source_file_tmp}
${bw_api_source_file}
COMMENT "copy_if_different ${bw_api_header_file} ${bw_api_source_file}"
DEPENDS ${bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_base}
DEPENDS ${bw_api_yaml_file} ${legacy_bw_api_yaml_file} ${bw_api_gen_file}
${api_gen_base}
VERBATIM)
# generate sparse api
......
......@@ -32,237 +32,6 @@ limitations under the License. */
namespace paddle {
namespace experimental {
std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> adam_impl(
const Tensor& param,
const Tensor& grad,
const Tensor& learning_rate,
const Tensor& moment1,
const Tensor& moment2,
const Tensor& beta1_pow,
const Tensor& beta2_pow,
const paddle::optional<Tensor>& master_param,
const paddle::optional<Tensor>& skip_update,
const Scalar& beta1,
const Scalar& beta2,
const Scalar& epsilon,
bool lazy_mode,
int64_t min_row_size_to_use_multithread,
bool multi_precision,
bool use_global_beta_pow) {
Backend kernel_backend = Backend::UNDEFINED;
DataLayout kernel_layout = DataLayout::UNDEFINED;
DataType kernel_data_type = DataType::UNDEFINED;
if (kernel_backend == Backend::UNDEFINED ||
kernel_layout == DataLayout::UNDEFINED ||
kernel_data_type == DataType::UNDEFINED) {
auto kernel_key_set = ParseKernelKeyByInputArgs(param);
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
if (kernel_backend == Backend::UNDEFINED) {
kernel_backend = kernel_key.backend();
}
if (kernel_layout == DataLayout::UNDEFINED) {
kernel_layout = kernel_key.layout();
}
if (kernel_data_type == DataType::UNDEFINED) {
kernel_data_type = kernel_key.dtype();
}
}
std::string kernel_name = "adam";
if (!phi::DenseTensor::classof(grad.impl().get())) {
kernel_name = "adam_dense_param_sparse_grad";
}
const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
kernel_name, {kernel_backend, kernel_layout, kernel_data_type});
VLOG(6) << kernel_name << " API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
VLOG(6) << kernel_name << " API kernel: " << kernel;
auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
auto input_param = PrepareData(param, kernel.InputAt(0), {});
auto input_lr = PrepareData(learning_rate, kernel.InputAt(2), {});
auto input_moment1 = PrepareData(moment1, kernel.InputAt(3), {});
auto input_moment2 = PrepareData(moment2, kernel.InputAt(4), {});
auto input_beta1_pow = PrepareData(beta1_pow, kernel.InputAt(5), {});
auto input_beta2_pow = PrepareData(beta2_pow, kernel.InputAt(6), {});
auto input_master_param = PrepareData(master_param, kernel.InputAt(7), {});
auto input_skip_update = PrepareData(skip_update, kernel.InputAt(8), {});
std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> api_output;
auto kernel_out_0 = input_param.get();
auto kernel_out_1 = input_moment1.get();
auto kernel_out_2 = input_moment2.get();
auto kernel_out_3 = input_beta1_pow.get();
auto kernel_out_4 = input_beta2_pow.get();
phi::DenseTensor* kernel_out_5 = nullptr;
if (input_master_param) {
kernel_out_5 = input_master_param.get_ptr();
}
auto input_meta_ref_master_param = MakeMetaTensor(input_master_param);
auto input_meta_ref_skip_update = MakeMetaTensor(input_skip_update);
phi::MetaTensor meta_out_0(kernel_out_0);
phi::MetaTensor meta_out_1(kernel_out_1);
phi::MetaTensor meta_out_2(kernel_out_2);
phi::MetaTensor meta_out_3(kernel_out_3);
phi::MetaTensor meta_out_4(kernel_out_4);
phi::MetaTensor meta_out_5(kernel_out_5);
if (phi::DenseTensor::classof(grad.impl().get())) {
auto input_grad = PrepareData(grad, kernel.InputAt(1), {});
phi::AdamInferMeta(MakeMetaTensor(*input_param),
MakeMetaTensor(*input_grad),
MakeMetaTensor(*input_lr),
MakeMetaTensor(*input_moment1),
MakeMetaTensor(*input_moment2),
MakeMetaTensor(*input_beta1_pow),
MakeMetaTensor(*input_beta2_pow),
input_meta_ref_master_param,
input_meta_ref_skip_update,
beta1,
beta2,
epsilon,
lazy_mode,
min_row_size_to_use_multithread,
multi_precision,
use_global_beta_pow,
&meta_out_0,
&meta_out_1,
&meta_out_2,
&meta_out_3,
&meta_out_4,
&meta_out_5);
using kernel_signature = void (*)(const platform::DeviceContext&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const paddle::optional<phi::DenseTensor>&,
const paddle::optional<phi::DenseTensor>&,
const Scalar&,
const Scalar&,
const Scalar&,
bool,
int64_t,
bool,
bool,
phi::DenseTensor*,
phi::DenseTensor*,
phi::DenseTensor*,
phi::DenseTensor*,
phi::DenseTensor*,
phi::DenseTensor*);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx,
*input_param,
*input_grad,
*input_lr,
*input_moment1,
*input_moment2,
*input_beta1_pow,
*input_beta2_pow,
input_master_param,
input_skip_update,
beta1,
beta2,
epsilon,
lazy_mode,
min_row_size_to_use_multithread,
multi_precision,
use_global_beta_pow,
kernel_out_0,
kernel_out_1,
kernel_out_2,
kernel_out_3,
kernel_out_4,
kernel_out_5);
} else {
auto input_grad = TensorToSelectedRows(grad);
phi::AdamInferMeta(MakeMetaTensor(*input_param),
MakeMetaTensor(*input_grad),
MakeMetaTensor(*input_lr),
MakeMetaTensor(*input_moment1),
MakeMetaTensor(*input_moment2),
MakeMetaTensor(*input_beta1_pow),
MakeMetaTensor(*input_beta2_pow),
input_meta_ref_master_param,
input_meta_ref_skip_update,
beta1,
beta2,
epsilon,
lazy_mode,
min_row_size_to_use_multithread,
multi_precision,
use_global_beta_pow,
&meta_out_0,
&meta_out_1,
&meta_out_2,
&meta_out_3,
&meta_out_4,
&meta_out_5);
using kernel_signature = void (*)(const platform::DeviceContext&,
const phi::DenseTensor&,
const phi::SelectedRows&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const paddle::optional<phi::DenseTensor>&,
const paddle::optional<phi::DenseTensor>&,
const Scalar&,
const Scalar&,
const Scalar&,
bool,
int64_t,
bool,
bool,
phi::DenseTensor*,
phi::DenseTensor*,
phi::DenseTensor*,
phi::DenseTensor*,
phi::DenseTensor*,
phi::DenseTensor*);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx,
*input_param,
*input_grad,
*input_lr,
*input_moment1,
*input_moment2,
*input_beta1_pow,
*input_beta2_pow,
input_master_param,
input_skip_update,
beta1,
beta2,
epsilon,
lazy_mode,
min_row_size_to_use_multithread,
multi_precision,
use_global_beta_pow,
kernel_out_0,
kernel_out_1,
kernel_out_2,
kernel_out_3,
kernel_out_4,
kernel_out_5);
}
return api_output;
}
////////////////// Forward api impls //////////////////////
std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> adamw_impl(
......@@ -1100,159 +869,6 @@ std::tuple<Tensor, Tensor, Tensor> momentum_impl(
return api_output;
}
std::tuple<Tensor, Tensor> sgd_impl(
const Tensor& param,
const Tensor& learning_rate,
const Tensor& grad,
const paddle::optional<Tensor>& master_param,
bool multi_precision) {
DataType kernel_data_type = ParseDataType(param);
auto kernel_key_set = ParseKernelKeyByInputArgs(param, learning_rate, grad);
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
VLOG(6) << "sgd API kernel key: [" << kernel_key.backend() << ", "
<< kernel_key.layout() << ", " << kernel_data_type << "]";
const auto& param_tensor = param.impl();
std::string kernel_name = "sgd";
if (phi::DenseTensor::classof(param_tensor.get())) {
if (!phi::DenseTensor::classof(grad.impl().get())) {
kernel_name = "sgd_dense_param_sparse_grad";
}
} else {
kernel_name = "sgd_sparse_param_sparse_grad";
}
const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
kernel_name,
{kernel_key.backend(), kernel_key.layout(), kernel_data_type});
VLOG(6) << kernel_name << " API kernel: " << kernel;
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto in_learning_rate =
PrepareData(learning_rate, kernel.InputAt(1), {false, true, true, true});
std::tuple<Tensor, Tensor> out;
std::get<0>(out) = param;
if (master_param) {
std::get<1>(out) = *master_param;
}
phi::MetaTensor meta_out_0(std::get<0>(out).impl().get());
phi::MetaTensor meta_out_1(master_param ? std::get<1>(out).impl().get()
: nullptr);
if (phi::DenseTensor::classof(param_tensor.get())) {
auto in_param = PrepareData(param, kernel.InputAt(0), {});
auto in_master_param_opt = PrepareData(master_param, kernel.InputAt(3), {});
auto master_param_meta_opt = MakeMetaTensor(in_master_param_opt);
phi::DenseTensor* kernel_out_0 =
SetKernelOutput(kernel_key.backend(), &std::get<0>(out));
phi::DenseTensor* kernel_out_1 =
master_param
? static_cast<phi::DenseTensor*>(std::get<1>(out).impl().get())
: nullptr;
if (phi::DenseTensor::classof(grad.impl().get())) {
auto in_grad = PrepareData(grad, kernel.InputAt(2), {});
SgdInferMeta(MakeMetaTensor(*in_param),
MakeMetaTensor(*in_learning_rate),
MakeMetaTensor(*in_grad),
master_param_meta_opt,
multi_precision,
&meta_out_0,
&meta_out_1);
using kernel_signature =
void (*)(const platform::DeviceContext&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const paddle::optional<phi::DenseTensor>&,
bool,
phi::DenseTensor*,
phi::DenseTensor*);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx,
*in_param,
*in_learning_rate,
*in_grad,
in_master_param_opt,
multi_precision,
kernel_out_0,
kernel_out_1);
} else {
auto in_grad = TensorToSelectedRows(grad);
SgdInferMeta(MakeMetaTensor(*in_param),
MakeMetaTensor(*in_learning_rate),
MakeMetaTensor(*in_grad),
master_param_meta_opt,
multi_precision,
&meta_out_0,
&meta_out_1);
using kernel_signature =
void (*)(const platform::DeviceContext&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const phi::SelectedRows&,
const paddle::optional<phi::DenseTensor>&,
bool,
phi::DenseTensor*,
phi::DenseTensor*);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx,
*in_param,
*in_learning_rate,
*in_grad,
in_master_param_opt,
multi_precision,
kernel_out_0,
kernel_out_1);
}
} else {
auto in_param = TensorToSelectedRows(param);
auto in_grad = TensorToSelectedRows(grad);
auto in_master_param_opt = TensorToSelectedRows(master_param);
auto master_param_meta = MakeMetaTensor(in_master_param_opt);
phi::SelectedRows* kernel_out_0 =
SetSelectedRowsKernelOutput(kernel_key.backend(), &std::get<0>(out));
phi::SelectedRows* kernel_out_1 =
master_param
? static_cast<phi::SelectedRows*>(std::get<1>(out).impl().get())
: nullptr;
SgdInferMeta(MakeMetaTensor(*in_param),
MakeMetaTensor(*in_learning_rate),
MakeMetaTensor(*in_grad),
master_param_meta,
multi_precision,
&meta_out_0,
&meta_out_1);
using kernel_signature =
void (*)(const platform::DeviceContext&,
const phi::SelectedRows&,
const phi::DenseTensor&,
const phi::SelectedRows&,
const paddle::optional<phi::SelectedRows>&,
bool,
phi::SelectedRows*,
phi::SelectedRows*);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx,
*in_param,
*in_learning_rate,
*in_grad,
in_master_param_opt,
multi_precision,
kernel_out_0,
kernel_out_1);
}
return out;
}
////////////////// Backward(grad) api impls //////////////////////
// TODO(chenweihang): the original sum grad op can support higher-level
......
......@@ -31,24 +31,6 @@ namespace experimental {
////////////////// Forward api impls //////////////////////
std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> adam_impl(
const Tensor& param,
const Tensor& grad,
const Tensor& learning_rate,
const Tensor& moment1,
const Tensor& moment2,
const Tensor& beta1_pow,
const Tensor& beta2_pow,
const paddle::optional<Tensor>& master_param,
const paddle::optional<Tensor>& skip_update,
const Scalar& beta1,
const Scalar& beta2,
const Scalar& epsilon,
bool lazy_mode,
int64_t min_row_size_to_use_multithread,
bool multi_precision,
bool use_global_beta_pow);
std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> adamw_impl(
const Tensor& param,
const Tensor& grad,
......@@ -132,13 +114,6 @@ std::tuple<Tensor, Tensor, Tensor> momentum_impl(
bool multi_precision,
float rescale_grad);
std::tuple<Tensor, Tensor> sgd_impl(
const Tensor& param,
const Tensor& learning_rate,
const Tensor& grad,
const paddle::optional<Tensor>& master_param,
bool multi_precision);
////////////////// Backward(grad) api impls //////////////////////
void add_n_grad_impl(const std::vector<Tensor>& x,
......
......@@ -62,7 +62,7 @@ std::shared_ptr<phi::StringTensor> TensorToStringTensor(const Tensor& tensor) {
/* ----------------- for infer_meta --------------------- */
phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor) {
phi::MetaTensor MakeMetaTensor(const phi::TensorBase& tensor) {
return phi::MetaTensor(tensor);
}
......@@ -94,10 +94,6 @@ std::vector<phi::MetaTensor> MakeMetaTensor(
return meta_tensors;
}
phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor) {
return phi::MetaTensor(tensor);
}
phi::MetaTensor MakeMetaTensor(
const paddle::optional<phi::SelectedRows>& tensor) {
if (tensor) {
......@@ -106,10 +102,6 @@ phi::MetaTensor MakeMetaTensor(
return phi::MetaTensor();
}
phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor) {
return phi::MetaTensor(tensor);
}
/* ------------------ for output ----------------------- */
phi::DenseTensor* SetKernelOutput(Backend backend, Tensor* out) {
......
......@@ -47,7 +47,7 @@ std::shared_ptr<phi::StringTensor> TensorToStringTensor(const Tensor& tensor);
/* ----------------- for infer_meta --------------------- */
phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor);
phi::MetaTensor MakeMetaTensor(const phi::TensorBase& tensor);
phi::MetaTensor MakeMetaTensor(
const paddle::optional<phi::DenseTensor>& tensor);
......@@ -58,13 +58,9 @@ std::vector<phi::MetaTensor> MakeMetaTensor(
std::vector<phi::MetaTensor> MakeMetaTensor(
const std::vector<phi::DenseTensor*>& tensors);
phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor);
phi::MetaTensor MakeMetaTensor(
const paddle::optional<phi::SelectedRows>& tensor);
phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor);
/* ------------------ for output ----------------------- */
phi::DenseTensor* SetKernelOutput(Backend backend, Tensor* out);
......
......@@ -1370,8 +1370,8 @@ class SGDOptimizer(Optimizer):
lr = self._create_param_lr(param_and_grad)
if in_dygraph_mode():
_C_ops.final_state_sgd(param_and_grad[0], lr, param_and_grad[1],
master_weight, find_master)
_C_ops.final_state_sgd_(param_and_grad[0], lr, param_and_grad[1],
master_weight, find_master)
return None
if _in_legacy_dygraph():
_C_ops.sgd(param_and_grad[0], lr, param_and_grad[1], master_weight,
......
......@@ -342,7 +342,7 @@ class Adam(Optimizer):
_beta2 = self._beta2 if not isinstance(
self._beta2, Variable) else self._beta2.numpy().item(0)
_, _, _, _, _, _ = _C_ops.final_state_adam(
_, _, _, _, _, _ = _C_ops.final_state_adam_(
param_and_grad[0], param_and_grad[1], lr, moment1, moment2,
beta1_pow_acc, beta2_pow_acc, master_weight, found_inf, _beta1,
_beta2, self._epsilon, self._lazy_mode, 1000, find_master,
......
......@@ -143,8 +143,8 @@ class SGD(Optimizer):
lr = self._create_param_lr(param_and_grad)
if in_dygraph_mode():
_C_ops.final_state_sgd(param_and_grad[0], lr, param_and_grad[1],
master_weight, find_master)
_C_ops.final_state_sgd_(param_and_grad[0], lr, param_and_grad[1],
master_weight, find_master)
return None
if _in_legacy_dygraph():
_C_ops.sgd(param_and_grad[0], lr, param_and_grad[1], master_weight,
......
......@@ -45,9 +45,6 @@ class BaseAPI(object):
self.infer_meta = self.parse_infer_meta(
api_item_yaml['infer_meta'])
self.kernel = self.parse_kernel(api_item_yaml['kernel'])
self.support_selected_rows_kernel = False if len(
self.kernel['func']
) == 1 or not self.kernel['func'][1].endswith('_sr') else True
self.data_transform = self.parse_data_transform(api_item_yaml)
self.inplace_map, self.view_map = {}, {}
......@@ -61,6 +58,7 @@ class BaseAPI(object):
input_args = []
inplace_type_map = {
"const Tensor&": "Tensor&",
"const paddle::optional<Tensor>&": "paddle::optional<Tensor>&",
"const std::vector<Tensor>&": "std::vector<Tensor>&"
}
for name in self.inputs['names']:
......@@ -285,6 +283,17 @@ class BaseAPI(object):
tmp_in_out_list = in_out_str[1:-1].split('->')
inputs = [item.strip() for item in tmp_in_out_list[0].split(',')]
outputs = [item.strip() for item in tmp_in_out_list[1].split(',')]
# check the tensor type
for item in inputs:
assert item in [
'dense', 'selected_rows', 'sparse_coo', 'sparse_csr'
], f"{self.api} : Invalid input tensor type ('{item}'), here we only support 'dense', 'selected_rows', 'sparse_coo' and 'sparse_csr'."
for item in outputs:
assert item in [
'dense', 'selected_rows', 'sparse_coo', 'sparse_csr'
], f"{self.api} : Invalid output tensor type ('{item}'), here we only support 'dense', 'selected_rows', 'sparse_coo' and 'sparse_csr'."
return (inputs, outputs)
for func_item in kernel_funcs:
......@@ -440,11 +449,6 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
kernel_select_code = kernel_key_item_init + kernel_select_code
if len(input_names) > 0:
if self.support_selected_rows_kernel:
kernel_select_code = kernel_select_code + f"""
KernelType kernel_type = ParseKernelTypeByInputArgs({", ".join(input_names)});
"""
kernel_select_code = kernel_select_code + f"""
if (kernel_backend == Backend::UNDEFINED
|| kernel_layout == DataLayout::UNDEFINED
......@@ -528,8 +532,8 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
{code_indent} phi::{infer_meta['func']}({param_code});
"""
def get_kernel_args(self, code_indent):
input_trans_map = {
def get_kernel_args(self, kernel_tensor_type=None, code_indent=''):
dense_input_trans_map = {
'const Tensor&':
'const phi::DenseTensor&',
'const std::vector<Tensor>&':
......@@ -541,10 +545,17 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
'const paddle::optional<std::vector<Tensor>>&':
'paddle::optional<const std::vector<phi::DenseTensor>&>'
}
out_trans_map = {
dense_out_trans_map = {
'Tensor': 'phi::DenseTensor*',
'std::vector<Tensor>': 'std::vector<phi::DenseTensor*>&'
}
sr_input_trans_map = {
'const Tensor&':
'const phi::SelectedRows&',
'const paddle::optional<Tensor>&':
'const paddle::optional<phi::SelectedRows>&'
}
sr_out_trans_map = {'Tensor': 'phi::SelectedRows*'}
input_names = self.inputs['names']
input_infos = self.inputs['input_info']
kernel_args_type_list = ['const platform::DeviceContext&']
......@@ -558,127 +569,72 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
for i, input_name in enumerate(input_names):
# set input code
if input_name in kernel_param:
trans_flag = "{}"
if input_name in self.data_transform['skip_transform']:
trans_flag = "{true}"
elif input_name in self.data_transform['support_trans_dtype']:
trans_flag = "{false, true}"
if input_name in self.optional_vars:
input_tensor_code = input_tensor_code + f"""
# input is dense tensor
if kernel_tensor_type is None or kernel_tensor_type[0][
kernel_param.index(input_name)] == 'dense':
trans_flag = "{}"
if input_name in self.data_transform['skip_transform']:
trans_flag = "{true}"
elif input_name in self.data_transform[
'support_trans_dtype']:
trans_flag = "{false, true}"
if input_name in self.optional_vars:
input_tensor_code = input_tensor_code + f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareData({input_name}, kernel.InputAt({i}), {trans_flag});"""
else:
if self.inputs['input_info'][input_name] == "const Tensor&":
input_tensor_code = input_tensor_code + f"""
else:
if self.inputs['input_info'][
input_name] == "const Tensor&":
input_tensor_code = input_tensor_code + f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareData({input_name}, kernel.InputAt({i}), {trans_flag});"""
elif self.inputs['input_info'][
input_name] == "const std::vector<Tensor>&":
input_tensor_code = input_tensor_code + f"""
elif self.inputs['input_info'][
input_name] == "const std::vector<Tensor>&":
input_tensor_code = input_tensor_code + f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, kernel.InputAt({i}), {trans_flag});
{code_indent} std::vector<const phi::DenseTensor*> {PREFIX_TENSOR_NAME}{input_name}({PREFIX_TENSOR_NAME}{input_name}_vec->size());
{code_indent} for (size_t i = 0; i < {PREFIX_TENSOR_NAME}{input_name}.size(); ++i) {{
{code_indent} {PREFIX_TENSOR_NAME}{input_name}[i] = &{PREFIX_TENSOR_NAME}{input_name}_vec->at(i);
{code_indent} }}"""
else:
# do nothing
pass
else:
if input_name in self.optional_vars:
else:
# do nothing
pass
else: # input is selected_rows
input_tensor_code = input_tensor_code + f"""
{code_indent} {input_trans_map[input_infos[input_name]]} {PREFIX_TENSOR_NAME}{input_name}(paddle::none);
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_ptr = TensorToDenseTensor({input_name});
{code_indent} if ({PREFIX_TENSOR_NAME}{input_name}_ptr) {{
{code_indent} {PREFIX_TENSOR_NAME}{input_name} = paddle::make_optional<const phi::DenseTensor&>(*{PREFIX_TENSOR_NAME}{input_name}_ptr);
{code_indent} }}"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = TensorToSelectedRows({input_name});"""
else:
if input_name in self.infer_meta['param']:
if input_name in self.optional_vars:
input_tensor_code = input_tensor_code + f"""
{code_indent} paddle::optional<phi::TensorBase> {PREFIX_TENSOR_NAME}{input_name} = {input_name} ? paddle::optional<phi::TensorBase>(*{input_name}->impl()) : paddle::none;"""
else:
input_tensor_code = input_tensor_code + f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = TensorToDenseTensor({input_name});"""
else:
input_tensor_code = input_tensor_code + f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = {input_name}.impl();"""
kernel_args = "*dev_ctx, "
kernel_args = ["*dev_ctx"]
for param in kernel_param:
if param in input_names:
if param in self.optional_vars:
kernel_args = kernel_args + PREFIX_TENSOR_NAME + param + ", "
kernel_args.append(PREFIX_TENSOR_NAME + param)
else:
if self.inputs['input_info'][param] == "const Tensor&":
kernel_args = kernel_args + "*" + PREFIX_TENSOR_NAME + param + ", "
kernel_args.append("*" + PREFIX_TENSOR_NAME + param)
elif self.inputs['input_info'][
param] == "const std::vector<Tensor>&":
kernel_args = kernel_args + PREFIX_TENSOR_NAME + param + ", "
kernel_args.append(PREFIX_TENSOR_NAME + param)
else:
# do nothing
pass
kernel_in_type = input_trans_map[input_infos[param]]
kernel_args_type_list.append(kernel_in_type)
elif param in attr_names:
# set attr for kernel_context
if 'IntArray' in self.attrs['attr_info'][param][0]:
kernel_args_type_list.append('const phi::IntArray&')
param = 'phi::IntArray(' + param + ')'
elif 'Scalar' in self.attrs['attr_info'][param][0]:
kernel_args_type_list.append('const phi::Scalar&')
param = 'phi::Scalar(' + param + ')'
else:
# input is dense tensor
if kernel_tensor_type is None or kernel_tensor_type[0][
kernel_param.index(param)] == 'dense':
kernel_args_type_list.append(
self.attrs['attr_info'][param][0])
kernel_args = kernel_args + param + ", "
elif isinstance(param, bool):
kernel_args = kernel_args + str(param).lower() + ", "
else:
kernel_args = kernel_args + str(param) + ", "
for out_type in self.outputs['types']:
kernel_args_type_list.append(out_trans_map[out_type])
kernel_signature = "void(*)(" + ", ".join(kernel_args_type_list) + ")"
return input_tensor_code, kernel_args[:-2], kernel_signature
def get_selected_rows_kernel_args(self, code_indent):
input_trans_map = {
'const Tensor&':
'const phi::SelectedRows&',
'const paddle::optional<Tensor>&':
'const paddle::optional<phi::SelectedRows>&'
}
out_trans_map = {'Tensor': 'phi::SelectedRows*'}
input_names = self.inputs['names']
input_infos = self.inputs['input_info']
kernel_args_type_list = ['const platform::DeviceContext&']
attr_names = self.attrs['names']
kernel_param = self.kernel['param']
if kernel_param is None:
kernel_param = input_names + attr_names
input_tensor_code = ""
for i, input_name in enumerate(input_names):
# set input code
if input_name in self.optional_vars:
input_tensor_code = input_tensor_code + f"""
{code_indent} {input_trans_map[input_infos[input_name]]} {PREFIX_TENSOR_NAME}{input_name}(paddle::none);
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_ptr = TensorToSelectedRows({input_name});
{code_indent} if ({PREFIX_TENSOR_NAME}{input_name}_ptr) {{
{code_indent} {PREFIX_TENSOR_NAME}{input_name} = paddle::make_optional<const phi::SelectedRows&>(*{PREFIX_TENSOR_NAME}{input_name}_ptr);
{code_indent} }}"""
else:
input_tensor_code = input_tensor_code + f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = TensorToSelectedRows({input_name});"""
kernel_args = "*dev_ctx, "
for param in kernel_param:
if param in input_names:
if param in self.optional_vars:
kernel_args = kernel_args + PREFIX_TENSOR_NAME + param + ", "
else:
kernel_args = kernel_args + "*" + PREFIX_TENSOR_NAME + param + ", "
kernel_in_type = input_trans_map[input_infos[param]]
kernel_args_type_list.append(kernel_in_type)
dense_input_trans_map[input_infos[param]])
else: # input is selected_rows
kernel_args_type_list.append(
sr_input_trans_map[input_infos[param]])
elif param in attr_names:
# set attr for kernel_context
if 'IntArray' in self.attrs['attr_info'][param][0]:
......@@ -690,18 +646,22 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
else:
kernel_args_type_list.append(
self.attrs['attr_info'][param][0])
kernel_args = kernel_args + param + ", "
kernel_args.append(param)
elif isinstance(param, bool):
kernel_args = kernel_args + str(param).lower() + ", "
kernel_args.append(str(param).lower())
else:
kernel_args = kernel_args + str(param) + ", "
kernel_args.append(str(param))
for out_type in self.outputs['types']:
kernel_args_type_list.append(out_trans_map[out_type])
for i, out_type in enumerate(self.outputs['types']):
# output is dense tensor
if kernel_tensor_type is None or kernel_tensor_type[1][i] == 'dense':
kernel_args_type_list.append(dense_out_trans_map[out_type])
else: # output is selected_rows
kernel_args_type_list.append(sr_out_trans_map[out_type])
kernel_signature = "void(*)(" + ", ".join(kernel_args_type_list) + ")"
return input_tensor_code, kernel_args[:-2], kernel_signature
return input_tensor_code, ", ".join(kernel_args), kernel_signature
# Override by child class
def gene_return_code(self):
......@@ -709,25 +669,27 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
# Override by child class
def gene_output(self,
output_type_list,
set_out_func,
code_indent,
out_dtype_list,
out_tensor_type_list=None,
code_indent='',
inplace_flag=False):
return None, None, None
def gen_dense_tensor_kernel_code(self, code_indent, inplace_flag=False):
def gen_kernel_code(self, kernel_name, code_indent, inplace_flag=False):
kernel_dispatch = self.kernel['dispatch'][kernel_name]
input_tensors, kernel_args, kernel_signature = self.get_kernel_args(
code_indent)
kernel_dispatch, code_indent)
out_tensor_type_list = kernel_dispatch[1] if kernel_dispatch else None
outputs_args, kernel_output_names, output_create = self.gene_output(
self.outputs['types'], 'SetKernelOutput', code_indent, inplace_flag)
api_func_name = self.get_api_func_name() + ('_' if inplace_flag else '')
self.outputs['types'], out_tensor_type_list, code_indent,
inplace_flag)
cudnn_args = '' if self.kernel[
'use_gpudnn'] == 'false' else ', ' + self.kernel['use_gpudnn']
return f"""
{code_indent} VLOG(6) << "{self.api} API kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]";
{code_indent} const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
{code_indent} "{self.kernel['func'][0]}", {{kernel_backend, kernel_layout, kernel_data_type}}{cudnn_args});
{code_indent} VLOG(6) << "{self.api} API kernel: " << kernel;
{code_indent} "{kernel_name}", {{kernel_backend, kernel_layout, kernel_data_type}}{cudnn_args});
{code_indent} VLOG(6) << "{kernel_name} kernel: " << kernel;
{code_indent} auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
{input_tensors}
......@@ -737,38 +699,42 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
{code_indent} using kernel_signature = {kernel_signature};
{code_indent} auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
{code_indent} {{
{code_indent} paddle::platform::RecordEvent kernel_record_event(\"{api_func_name} compute\", paddle::platform::TracerEventType::OperatorInner, 1);
{code_indent} paddle::platform::RecordEvent kernel_record_event(\"{kernel_name} compute\", paddle::platform::TracerEventType::OperatorInner, 1);
{code_indent} (*kernel_fn)({kernel_args}, {outputs_args});
{code_indent} }}
{code_indent} {self.gene_return_code()}"""
def gen_selected_rows_kernel_code(self, code_indent, inplace_flag=False):
input_tensors, kernel_args, kernel_signature = self.get_selected_rows_kernel_args(
code_indent)
outputs_args, kernel_output_names, output_create = self.gene_output(
self.outputs['types'], 'SetSelectedRowsKernelOutput', code_indent,
inplace_flag)
api_func_name = self.get_api_func_name() + ('_' if inplace_flag else '')
return f"""
{code_indent} auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
{code_indent} "{self.kernel['func'][1]}", {{kernel_backend, kernel_layout, kernel_data_type}});
{code_indent} VLOG(6) << "{self.api} API SelectedRows kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]";
{code_indent} VLOG(6) << "{self.api} API SelectedRows kernel: " << kernel;
{code_indent} auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
{input_tensors}
{output_create}
{self.gene_infer_meta(kernel_output_names, code_indent)}
{code_indent} using kernel_signature = {kernel_signature};
{code_indent} auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
{code_indent} {{
{code_indent} paddle::platform::RecordEvent kernel_record_event(\"{api_func_name} compute\", paddle::platform::TracerEventType::OperatorInner, 1);
{code_indent} (*kernel_fn)({kernel_args}, {outputs_args});
{code_indent} }}
def get_condition_code(self, kernel_name):
assert self.kernel['dispatch'][kernel_name], \
f"{self.api} api: the tensor type of inputs and outputs for kernel isn't set, see also 'kernel:func' of 'scale' in api.yaml."
input_types = self.kernel['dispatch'][kernel_name][0]
condition_list = []
for i, in_type in enumerate(input_types):
if in_type == "dense":
if self.inputs['names'][i] in self.optional_vars:
condition_list.append(
f"(!{self.inputs['names'][i]} || {self.inputs['names'][i]}->is_dense_tensor())"
)
else:
condition_list.append(
f"{self.inputs['names'][i]}.is_dense_tensor()")
else:
if self.inputs['names'][i] in self.optional_vars:
condition_list.append(
f"(!{self.inputs['names'][i]} || {self.inputs['names'][i]}->is_selected_rows())"
)
else:
condition_list.append(
f"{self.inputs['names'][i]}.is_selected_rows()")
return " && ".join(condition_list)
{code_indent} {self.gene_return_code()}"""
def gene_dispatch_code(self, kernel_name, inplace_flag=False):
return f"""
if ({self.get_condition_code(kernel_name)}) {{
{self.gen_kernel_code(kernel_name, ' ', inplace_flag)}
}}
"""
def gene_base_api_code(self, inplace_flag=False):
api_func_name = self.get_api_func_name()
......@@ -779,21 +745,20 @@ PADDLE_API {self.get_return_type(inplace_flag)} {api_func_name}({self.get_define
{self.gene_kernel_select()}
"""
if self.support_selected_rows_kernel:
code_indent = ' '
if len(self.kernel['func']) > 1:
kernel_dispatch_code = ''
for kernel_name in self.kernel['func']:
kernel_dispatch_code += self.gene_dispatch_code(
kernel_name, inplace_flag)
return api_code + f"""
if(kernel_type == KernelType::DENSE_TENSOR_KENREL){{
{self.gen_dense_tensor_kernel_code(code_indent, inplace_flag)}
}} else {{
{self.gen_selected_rows_kernel_code(code_indent, inplace_flag)}
}}
{kernel_dispatch_code}
PADDLE_THROW(phi::errors::Unimplemented(
"The kernel of ({self.api}) for input tensors is unimplemented, please check the type of input tensors."));
}}
"""
else:
code_indent = ''
return api_code + self.gen_dense_tensor_kernel_code(
code_indent, inplace_flag) + """
return api_code + self.gen_kernel_code(self.kernel['func'][0], '',
inplace_flag) + """
}
"""
......
......@@ -24,6 +24,11 @@ inplace_out_type_map = {
"std::vector<Tensor>": "std::vector<Tensor>&"
}
inplace_optional_out_type_map = {
"Tensor": "paddle::optional<Tensor>&",
"std::vector<Tensor>": "paddle::optional<std::vector<Tensor>>&"
}
class ForwardAPI(BaseAPI):
......@@ -80,7 +85,11 @@ class ForwardAPI(BaseAPI):
for i, out_type in enumerate(self.outputs['types']):
out_name = self.outputs['names'][i].split('@')[0]
if inplace_flag and out_name in self.inplace_map:
out_type_list.append(inplace_out_type_map[out_type])
if self.inplace_map[out_name] in self.optional_vars:
out_type_list.append(
inplace_optional_out_type_map[out_type])
else:
out_type_list.append(inplace_out_type_map[out_type])
else:
out_type_list.append(out_type)
......@@ -94,7 +103,11 @@ class ForwardAPI(BaseAPI):
for i, out_type in enumerate(self.outputs['types']):
out_name = self.outputs['names'][i].split('@')[0]
if inplace_flag and out_name in self.inplace_map:
out_type_list.append(inplace_out_type_map[out_type])
if self.inplace_map[out_name] in self.optional_vars:
out_type_list.append(
inplace_optional_out_type_map[out_type])
else:
out_type_list.append(inplace_out_type_map[out_type])
elif self.is_dygraph_api or out_name not in self.intermediate_outs:
out_type_list.append(out_type)
......@@ -120,16 +133,16 @@ class ForwardAPI(BaseAPI):
return 'return {' + ", ".join(selected_code) + '};'
def gene_output(self,
output_type_list,
set_out_func,
code_indent,
out_dtype_list,
out_tensor_type_list=None,
code_indent='',
inplace_flag=False):
kernel_output = ""
output_names = []
output_create = ""
return_type = self.get_return_type_with_intermediate(inplace_flag)
if len(output_type_list) == 1:
if len(out_dtype_list) == 1:
kernel_output = 'kernel_out'
output_names.append('kernel_out')
inplace_assign = " = " + self.inplace_map[
......@@ -137,7 +150,8 @@ class ForwardAPI(BaseAPI):
'names'][0] in self.inplace_map else ""
output_create = f"""
{code_indent} {return_type} api_output{inplace_assign};"""
set_out_func = 'SetKernelOutput' if out_tensor_type_list is None or out_tensor_type_list[
0] == 'dense' else 'SetSelectedRowsKernelOutput'
if return_type == 'std::vector<Tensor>':
assert self.outputs['out_size_expr'][0] is not None, \
f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api."
......@@ -155,7 +169,7 @@ class ForwardAPI(BaseAPI):
{code_indent} kernel_out->ShareInplaceVersionCounterWith(*{PREFIX_TENSOR_NAME}{self.view_map[self.outputs['names'][0]]});
{code_indent} VLOG(3) << "Perform View between Output and Input Tensor, share allocation and inplace version.";"""
elif len(output_type_list) > 1:
elif len(out_dtype_list) > 1:
output_create = f"""
{code_indent} {return_type} api_output;"""
......@@ -171,19 +185,27 @@ class ForwardAPI(BaseAPI):
output_create += 'Tensor(), '
output_create = output_create[:-2] + '};'
for i in range(len(output_type_list)):
for i in range(len(out_dtype_list)):
kernel_output = kernel_output + f'kernel_out_{i}, '
output_names.append(f'kernel_out_{i}')
set_out_func = 'SetKernelOutput' if out_tensor_type_list is None or out_tensor_type_list[
i] == 'dense' else 'SetSelectedRowsKernelOutput'
get_out_code = f"&std::get<{i}>(api_output)"
if self.outputs['names'][
i] in self.inplace_map and self.inplace_map[
self.outputs['names'][i]] in self.optional_vars:
get_out_code = f"std::get<{i}>(api_output).get_ptr()"
if output_type_list[i] == 'std::vector<Tensor>':
if out_dtype_list[i] == 'std::vector<Tensor>':
assert self.outputs['out_size_expr'][i] is not None, \
f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api."
output_create = output_create + f"""
{code_indent} auto kernel_out_{i} = {set_out_func}({self.outputs['out_size_expr'][i]}, kernel_backend, &std::get<{i}>(api_output));"""
{code_indent} auto kernel_out_{i} = {set_out_func}({self.outputs['out_size_expr'][i]}, kernel_backend, {get_out_code});"""
else:
output_create = output_create + f"""
{code_indent} auto kernel_out_{i} = {set_out_func}(kernel_backend, &std::get<{i}>(api_output));"""
{code_indent} auto kernel_out_{i} = {set_out_func}(kernel_backend, {get_out_code});"""
if not inplace_flag and self.view_map is not None and self.outputs[
'names'][i] in self.view_map:
......
......@@ -114,22 +114,24 @@ class BackwardAPI(BaseAPI):
return 'void'
def gene_output(self,
output_type_list,
set_out_func,
code_indent,
out_dtype_list,
out_tensor_type_list=None,
code_indent='',
inplace_flag=False):
kernel_output = ""
output_names = []
output_create = ""
if len(output_type_list) == 1:
if len(out_dtype_list) == 1:
kernel_output = 'kernel_out'
output_names.append('kernel_out')
inplace_assign = " = " + self.inplace_map[self.outputs['names'][
0]] if inplace_flag and self.inplace_map is not None and self.outputs[
'names'][0] in self.inplace_map else ""
output_create = ""
if output_type_list[0] == 'std::vector<Tensor>':
set_out_func = 'SetKernelOutput' if out_tensor_type_list is None or out_tensor_type_list[
0] == 'dense' else 'SetSelectedRowsKernelOutput'
if out_dtype_list[0] == 'std::vector<Tensor>':
assert self.outputs['out_size_expr'] is not None, \
f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api."
output_create = output_create + f"""
......@@ -139,11 +141,13 @@ class BackwardAPI(BaseAPI):
output_create = output_create + f"""
{code_indent} auto kernel_out = {set_out_func}(kernel_backend, {self.outputs['names'][0]});"""
elif len(output_type_list) > 1:
elif len(out_dtype_list) > 1:
output_create = ""
for i, out_type_item in enumerate(output_type_list):
for i, out_type_item in enumerate(out_dtype_list):
kernel_output = kernel_output + f'kernel_out_{i}, '
output_names.append(f'kernel_out_{i}')
set_out_func = 'SetKernelOutput' if out_tensor_type_list is None or out_tensor_type_list[
i] == 'dense' else 'SetSelectedRowsKernelOutput'
if out_type_item == 'Tensor':
if inplace_flag and self.inplace_map is not None and self.outputs[
'names'][i] in self.inplace_map:
......
......@@ -48,11 +48,17 @@
kernel :
func : adadelta
- api : adam
- api : adam_
args : (Tensor param, Tensor grad, Tensor learning_rate, Tensor moment1, Tensor moment2, Tensor beta1_pow, Tensor beta2_pow, Tensor master_param, Tensor skip_update, Scalar beta1, Scalar beta2, Scalar epsilon, bool lazy_mode, int64_t min_row_size_to_use_multithread, bool multi_precision, bool use_global_beta_pow)
output : Tensor(param_out), Tensor(moment1_out), Tensor(moment2_out), Tensor(beta1_pow_out), Tensor(beta2_pow_out), Tensor(master_param_outs)
infer_meta :
func : AdamInferMeta
kernel :
func : adam {dense, dense, dense, dense, dense, dense, dense, dense, dense -> dense, dense, dense, dense, dense, dense},
adam_dense_param_sparse_grad {dense, selected_rows, dense, dense, dense, dense, dense, dense, dense -> dense, dense, dense, dense, dense, dense}
data_type : param
optional : master_param, skip_update
invoke : adam_impl(param, grad, learning_rate, moment1, moment2, beta1_pow, beta2_pow, master_param, skip_update, beta1, beta2, epsilon, lazy_mode, min_row_size_to_use_multithread, multi_precision, use_global_beta_pow)
inplace : (param -> param_out), (moment1 -> moment1_out), (moment2 -> moment2_out), (beta1_pow -> beta1_pow_out), (beta2_pow -> beta2_pow_out), (master_param -> master_param_outs)
- api : adamax
args : (Tensor param, Tensor grad, Tensor learning_rate, Tensor moment, Tensor inf_norm, Tensor beta1_pow, float beta1, float beta2, float epsilon)
......@@ -1015,7 +1021,8 @@
infer_meta :
func : IsfiniteInferMeta
kernel :
func : isfinite, infinite_sr
func : isfinite {dense -> dense},
infinite_sr {selected_rows -> selected_rows}
# isinf
- api : isinf
......@@ -1024,7 +1031,8 @@
infer_meta :
func : IsfiniteInferMeta
kernel :
func : isinf, isinf_sr
func : isinf {dense -> dense},
isinf_sr {selected_rows -> selected_rows}
# isnan
- api : isnan
......@@ -1033,7 +1041,8 @@
infer_meta :
func : IsfiniteInferMeta
kernel :
func : isnan, isnan_sr
func : isnan {dense -> dense},
isnan_sr {selected_rows -> selected_rows}
- api : kldiv_loss
args : (Tensor x, Tensor label, str reduction)
......@@ -1774,7 +1783,8 @@
func : UnchangedInferMeta
param : [x]
kernel :
func : scale, scale_sr
func : scale {dense -> dense},
scale_sr {selected_rows -> selected_rows}
inplace : (x -> out)
backward : scale_grad
......@@ -1829,11 +1839,20 @@
func : selu
backward : selu_grad
- api : sgd
- api : sgd_
args : (Tensor param, Tensor learning_rate, Tensor grad, Tensor master_param, bool multi_precision)
output : Tensor(param_out), Tensor(master_param_out)
invoke : sgd_impl(param, learning_rate, grad, master_param, multi_precision)
infer_meta :
func : SgdInferMeta
kernel :
func : sgd {dense, dense, dense, dense -> dense, dense},
sgd_dense_param_sparse_grad {dense, dense, selected_rows, dense -> dense, dense},
sgd_sparse_param_sparse_grad {selected_rows, dense, selected_rows, selected_rows -> selected_rows, selected_rows}
data_type : param
data_transform :
support_trans_dtype : learning_rate
optional : master_param
inplace : (param -> param_out), (master_param -> master_param_out)
- api : shape
args : (Tensor input)
......@@ -1841,7 +1860,8 @@
infer_meta :
func : ShapeInferMeta
kernel :
func : shape, shape_sr
func : shape {dense -> dense},
shape_sr {selected_rows -> selected_rows}
data_transform:
skip_transform : input
......
......@@ -31,18 +31,10 @@ class SparseAPI(ForwardAPI):
{super(SparseAPI, self).gene_api_declaration()}
"""
def get_kernel_tensor_out_type(self, output_name):
sparse_type = 'TensorType::DENSE_TENSOR'
if output_name.endswith('@SparseCooTensor'):
sparse_type = 'TensorType::SPARSE_COO'
elif output_name.endswith('@SparseCsrTensor'):
sparse_type = 'TensorType::SPARSE_CSR'
return sparse_type
def gene_output(self,
output_type_list,
set_out_func,
code_indent,
out_dtype_list,
out_tensor_type_list=None,
code_indent='',
inplace_flag=False):
kernel_output = ""
output_names = []
......@@ -54,7 +46,7 @@ class SparseAPI(ForwardAPI):
'sparse_csr': 'TensorType::SPARSE_CSR'
}
if len(output_type_list) == 1:
if len(out_dtype_list) == 1:
kernel_output = 'kernel_out'
output_names.append('kernel_out')
inplace_assign = " = " + self.inplace_map[self.outputs['names'][
......@@ -62,9 +54,9 @@ class SparseAPI(ForwardAPI):
'names'][0] in self.inplace_map else ""
output_create = f"""
{return_type} api_output{inplace_assign};
auto* kernel_out = {set_out_func}(&api_output, {output_type_map[output_type_list[0]]});"""
auto* kernel_out = SetSparseKernelOutput(&api_output, {output_type_map[out_dtype_list[0]]});"""
elif len(output_type_list) > 1:
elif len(out_dtype_list) > 1:
output_create = f"""
{return_type} api_output;"""
......@@ -80,11 +72,11 @@ class SparseAPI(ForwardAPI):
output_create += 'Tensor(), '
output_create = output_create[:-2] + '};'
for i in range(len(output_type_list)):
for i in range(len(out_dtype_list)):
kernel_output = kernel_output + f'kernel_out_{i}, '
output_names.append(f'kernel_out_{i}')
output_create = output_create + f"""
auto* kernel_out_{i} = {set_out_func}(&std::get<{i}>(api_output), {output_type_map[output_type_list[i]]});"""
auto* kernel_out_{i} = SetSparseKernelOutput(&std::get<{i}>(api_output), {output_type_map[out_dtype_list[i]]});"""
kernel_output = kernel_output[:-2]
else:
......@@ -148,8 +140,7 @@ class SparseAPI(ForwardAPI):
def gen_sparse_kernel_code(self, kernel_name, inplace_flag=False):
_, kernel_output_names, output_create = self.gene_output(
self.kernel['dispatch'][kernel_name][1], 'SetSparseKernelOutput',
'', inplace_flag)
self.kernel['dispatch'][kernel_name][1], None, '', inplace_flag)
kernel_context_code = self.gen_sparse_kernel_context(
kernel_output_names)
......@@ -189,7 +180,6 @@ class SparseAPI(ForwardAPI):
return " && ".join(condition_list)
def gene_dispatch_code(self, kernel_name, inplace_flag=False):
dispatch_code = ""
return f"""
if ({self.get_condition_code(kernel_name)}) {{
{self.gen_sparse_kernel_code(kernel_name, inplace_flag)}
......
......@@ -48,9 +48,9 @@ class SparseBackwardAPI(SparseAPI, BackwardAPI):
return BackwardAPI.get_define_args(self)
def gene_output(self,
output_type_list,
set_out_func,
code_indent,
out_dtype_list,
out_tensor_type_list=None,
code_indent='',
inplace_flag=False):
kernel_output = ""
output_names = []
......@@ -61,19 +61,19 @@ class SparseBackwardAPI(SparseAPI, BackwardAPI):
'sparse_csr': 'TensorType::SPARSE_CSR'
}
if len(output_type_list) == 1:
if len(out_dtype_list) == 1:
kernel_output = 'kernel_out'
output_names.append('kernel_out')
inplace_assign = " = " + self.inplace_map[self.outputs['names'][
0]] if inplace_flag and self.inplace_map is not None and self.outputs[
'names'][0] in self.inplace_map else ""
output_create = f"""
auto kernel_out = {set_out_func}({self.outputs['names'][0]}, {output_type_map[output_type_list[0]]});"""
auto kernel_out = SetSparseKernelOutput({self.outputs['names'][0]}, {output_type_map[out_dtype_list[0]]});"""
elif len(output_type_list) > 1:
elif len(out_dtype_list) > 1:
output_create = ""
for i, out_type_item in enumerate(output_type_list):
for i, out_type_item in enumerate(out_dtype_list):
kernel_output = kernel_output + f'kernel_out_{i}, '
output_names.append(f'kernel_out_{i}')
if inplace_flag and self.inplace_map is not None and self.outputs[
......@@ -82,7 +82,7 @@ class SparseBackwardAPI(SparseAPI, BackwardAPI):
*{self.outputs['names'][i]} = {self.inplace_map[self.outputs['names'][i]]};"""
output_create = output_create + f"""
auto kernel_out_{i} = {set_out_func}({self.outputs['names'][i]}, {output_type_map[output_type_list[i]]});"""
auto kernel_out_{i} = SetSparseKernelOutput({self.outputs['names'][i]}, {output_type_map[out_dtype_list[i]]});"""
kernel_output = kernel_output[:-2]
else:
......
......@@ -51,16 +51,16 @@ class StringsAPI(ForwardAPI):
return tensor_type_dict[kernel_tensor_out_type]
def gene_output(self,
output_type_list,
set_out_func,
code_indent,
out_dtype_list,
out_tensor_type_list=None,
code_indent='',
inplace_flag=False):
kernel_output = ""
output_names = []
output_create = ""
return_type = self.get_return_type(inplace_flag)
if len(output_type_list) == 1:
if len(out_dtype_list) == 1:
kernel_output = 'kernel_out'
output_names.append('kernel_out')
kernel_tensor_out_type = self.get_kernel_tensor_out_type(
......@@ -71,13 +71,13 @@ class StringsAPI(ForwardAPI):
'names'][0] in self.inplace_map else ""
output_create = f"""
{return_type} api_output{inplace_assign};
{tensor_type}* kernel_out = dynamic_cast<{tensor_type}*>({set_out_func}(kernel_backend, &api_output, {kernel_tensor_out_type}));"""
{tensor_type}* kernel_out = dynamic_cast<{tensor_type}*>(SetStringsKernelOutput(kernel_backend, &api_output, {kernel_tensor_out_type}));"""
elif len(output_type_list) > 1:
elif len(out_dtype_list) > 1:
output_create = f"""
{return_type} api_output;"""
for i in range(len(output_type_list)):
for i in range(len(out_dtype_list)):
kernel_output = kernel_output + f'kernel_out_{i}, '
output_names.append(f'kernel_out_{i}')
kernel_tensor_out_type = self.get_kernel_tensor_out_type(
......@@ -89,7 +89,7 @@ class StringsAPI(ForwardAPI):
std::get<{i}>(api_output) = {self.inplace_map[self.outputs['names'][i]]};"""
output_create = output_create + f"""
{tensor_type}* kernel_out_{i} = dynamic_cast<{tensor_type}*>({set_out_func}(&std::get<{i}>(api_output), {kernel_tensor_out_type}));"""
{tensor_type}* kernel_out_{i} = dynamic_cast<{tensor_type}*>(SetStringsKernelOutput(&std::get<{i}>(api_output), {kernel_tensor_out_type}));"""
kernel_output = kernel_output[:-2]
else:
......@@ -174,7 +174,7 @@ class StringsAPI(ForwardAPI):
input_tensors, kernel_args, kernel_signature = self.get_kernel_args(
code_indent)
outputs_args, kernel_output_names, output_create = self.gene_output(
self.outputs['types'], 'SetStringsKernelOutput', '', inplace_flag)
self.outputs['types'], None, '', inplace_flag)
return f"""
// 1. Get kernel signature and kernel
......@@ -252,11 +252,6 @@ class StringsAPI(ForwardAPI):
kernel_select_code = kernel_key_item_init + kernel_select_code
if len(input_names) > 0:
if self.support_selected_rows_kernel:
kernel_select_code = kernel_select_code + f"""
KernelType kernel_type = ParseKernelTypeByInputArgs({", ".join(input_names)});
"""
kernel_select_code = kernel_select_code + f"""
auto kernel_key_set = ParseKernelKeyByInputArgs({kernel_select_args});
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册