未验证 提交 412573f0 编写于 作者: W WangZhen 提交者: GitHub

Fix translated layer fine-tune error (#49870)

* Fix translated layer fine-tune
上级 56cacae9
...@@ -32,6 +32,7 @@ from paddle.fluid.layers.utils import _hash_with_id, flatten, pack_sequence_as ...@@ -32,6 +32,7 @@ from paddle.fluid.layers.utils import _hash_with_id, flatten, pack_sequence_as
from . import logging_utils from . import logging_utils
from .return_transformer import RETURN_NO_VALUE_MAGIC_NUM from .return_transformer import RETURN_NO_VALUE_MAGIC_NUM
from .utils import _out_grad_names, _param_grad_names
__all__ = [] __all__ = []
...@@ -375,46 +376,15 @@ class PartialProgramLayer: ...@@ -375,46 +376,15 @@ class PartialProgramLayer:
@LazyInitialized @LazyInitialized
def _param_grad_names(self): def _param_grad_names(self):
names = [] return _param_grad_names(self._train_program.desc, self._params)
# NOTE: `names` and `self._params` must be in the same order so that
# the param grad name can be set correctly in the run_program.
for param in self._params:
candidate = [
var_name
for var_name in self._train_program.block(0).vars.keys()
if var_name.endswith(param.name + '@GRAD')
]
if candidate:
names.append(
max(candidate, key=lambda name: name.count('grad/'))
)
else:
names.append(param.name + '@GRAD')
return names
@LazyInitialized @LazyInitialized
def _out_grad_names(self): def _out_grad_names(self):
""" return _out_grad_names(
Parse Out@GARD name from original train and infer program. self._train_program.desc,
""" self._create_program(is_infer_mode=True).desc.block(0).op_size(),
names = [] len(self._outputs.var_ids),
origin_infer_program = self._create_program(is_infer_mode=True) )
origin_train_program = self._train_program
fwd_end_op_index = len(origin_infer_program.block(0).ops)
for i in range(
fwd_end_op_index + 1,
min(
fwd_end_op_index + 2 * len(self._outputs.var_ids),
len(origin_train_program.block(0).ops),
),
2,
):
op = origin_train_program.block(0).ops[i]
if op.type == 'fill_constant':
var_name = op.output('Out')[0]
names.append(var_name)
return names
@property @property
def program(self): def program(self):
......
...@@ -1483,3 +1483,41 @@ def create_name_str(name_ids): ...@@ -1483,3 +1483,41 @@ def create_name_str(name_ids):
names_str = ["'%s'" % (name.replace("'", "\\'")) for name in name_ids] names_str = ["'%s'" % (name.replace("'", "\\'")) for name in name_ids]
return "(%s, )" % ','.join(names_str) return "(%s, )" % ','.join(names_str)
def _param_grad_names(program_desc, params):
"""
Parse PARAM@GARD name from original train and infer program.
"""
names = []
# NOTE: `names` and `self._params` must be in the same order so that
# the param grad name can be set correctly in the run_program.
for param in params:
candidate = [
var.name()
for var in program_desc.block(0).all_vars()
if var.name().endswith(param.name + '@GRAD')
]
if candidate:
names.append(max(candidate, key=lambda name: name.count('grad/')))
else:
names.append(param.name + '@GRAD')
return names
def _out_grad_names(program_desc, fwd_end_op_index, out_size):
"""
Parse Out@GARD name from original train and infer program.
"""
names = []
for i in range(
fwd_end_op_index + 1,
min(fwd_end_op_index + 2 * out_size, program_desc.block(0).op_size()),
2,
):
op = program_desc.block(0).op(i)
if op.type() == 'fill_constant':
var_name = op.output('Out')[0]
names.append(var_name)
return names
...@@ -33,6 +33,8 @@ from paddle.jit.dy2static.partial_program import ( ...@@ -33,6 +33,8 @@ from paddle.jit.dy2static.partial_program import (
add_build_strategy_for, add_build_strategy_for,
) )
from .dy2static.utils import _out_grad_names, _param_grad_names
__all__ = [] __all__ = []
INFER_MODEL_SUFFIX = ".pdmodel" INFER_MODEL_SUFFIX = ".pdmodel"
...@@ -887,28 +889,7 @@ def _construct_params_and_buffers( ...@@ -887,28 +889,7 @@ def _construct_params_and_buffers(
def _valid_vars(vars): def _valid_vars(vars):
if vars: return vars if vars else None
return vars
if framework._in_eager_without_dygraph_check():
return [
core.eager.Tensor(
core.VarDesc.VarType.FP32,
[],
"Fake_var",
core.VarDesc.VarType.RAW,
False,
)
]
else:
return [
core.VarBase(
core.VarDesc.VarType.FP32,
[],
"Fake_var",
core.VarDesc.VarType.RAW,
False,
)
]
def _run_dygraph(instance, input, program_holder): def _run_dygraph(instance, input, program_holder):
...@@ -1041,6 +1022,15 @@ def _run_dygraph(instance, input, program_holder): ...@@ -1041,6 +1022,15 @@ def _run_dygraph(instance, input, program_holder):
'program_id', 'program_id',
_hash_with_id(trace_program, instance), _hash_with_id(trace_program, instance),
] ]
if not instance._is_test:
attrs.extend(
(
'param_grad_names',
_param_grad_names(trace_program, persistable_vars),
'out_grad_names',
_out_grad_names(trace_program, end_op_index, len(output_vars)),
)
)
use_interpretorcore = ( use_interpretorcore = (
_is_enable_standalone_executor() _is_enable_standalone_executor()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册