未验证 提交 88f849b3 编写于 作者: B Bai Yifan 提交者: GitHub

Format KD english API (#102)

上级 4670a793
...@@ -20,21 +20,31 @@ def merge(teacher_program, ...@@ -20,21 +20,31 @@ def merge(teacher_program,
student_program, student_program,
data_name_map, data_name_map,
place, place,
scope=fluid.global_scope(), scope=None,
name_prefix='teacher_'): name_prefix='teacher_'):
""" """Merge teacher program into student program and add a uniform prefix to the
Merge teacher program into student program and add a uniform prefix to the
names of all vars in teacher program names of all vars in teacher program
Args: Args:
teacher_program(Program): The input teacher model paddle program teacher_program(Program): The input teacher model paddle program
student_program(Program): The input student model paddle program student_program(Program): The input student model paddle program
data_map_map(dict): Describe the mapping between the teacher var name data_map_map(dict): Mapping of teacher input interface name and student
and the student var name input interface name, where key of dict is the
input name of teacher_program, and value is the
input name of student_program.
place(fluid.CPUPlace()|fluid.CUDAPlace(N)): This parameter represents place(fluid.CPUPlace()|fluid.CUDAPlace(N)): This parameter represents
paddle run on which device. paddle run on which device.
scope(Scope): The input scope scope(Scope): This parameter indicates the variable scope used by
the program. If not specified, the default global scope
will be used. Default: None
name_prefix(str): Name prefix added for all vars of the teacher program. name_prefix(str): Name prefix added for all vars of the teacher program.
Default: 'teacher_'
Returns:
None
""" """
if scope==None:
scope = fluid.global_scope()
teacher_program = teacher_program.clone(for_test=True) teacher_program = teacher_program.clone(for_test=True)
for teacher_var in teacher_program.list_vars(): for teacher_var in teacher_program.list_vars():
skip_rename = False skip_rename = False
...@@ -89,9 +99,9 @@ def fsp_loss(teacher_var1_name, ...@@ -89,9 +99,9 @@ def fsp_loss(teacher_var1_name,
teacher_var2_name, teacher_var2_name,
student_var1_name, student_var1_name,
student_var2_name, student_var2_name,
program=fluid.default_main_program()): program=None):
""" """Combine variables from student model and teacher model by fsp-loss.
Combine variables from student model and teacher model by fsp-loss.
Args: Args:
teacher_var1_name(str): The name of teacher_var1. teacher_var1_name(str): The name of teacher_var1.
teacher_var2_name(str): The name of teacher_var2. Except for the teacher_var2_name(str): The name of teacher_var2. Except for the
...@@ -101,10 +111,14 @@ def fsp_loss(teacher_var1_name, ...@@ -101,10 +111,14 @@ def fsp_loss(teacher_var1_name,
student_var2_name(str): The name of student_var2. Except for the student_var2_name(str): The name of student_var2. Except for the
second dimension, all other dimensions should second dimension, all other dimensions should
be consistent with student_var1. be consistent with student_var1.
program(Program): The input distiller program. program(Program): The input distiller program. If not specified,
default: fluid.default_main_program() the default program will be used. Default: None
Return(Variable): fsp distiller loss.
Returns:
Variable: fsp distiller loss.
""" """
if program==None:
program=fluid.default_main_program()
teacher_var1 = program.global_block().var(teacher_var1_name) teacher_var1 = program.global_block().var(teacher_var1_name)
teacher_var2 = program.global_block().var(teacher_var2_name) teacher_var2 = program.global_block().var(teacher_var2_name)
student_var1 = program.global_block().var(student_var1_name) student_var1 = program.global_block().var(student_var1_name)
...@@ -118,16 +132,20 @@ def fsp_loss(teacher_var1_name, ...@@ -118,16 +132,20 @@ def fsp_loss(teacher_var1_name,
def l2_loss(teacher_var_name, def l2_loss(teacher_var_name,
student_var_name, student_var_name,
program=fluid.default_main_program()): program=None):
""" """Combine variables from student model and teacher model by l2-loss.
Combine variables from student model and teacher model by l2-loss.
Args: Args:
teacher_var_name(str): The name of teacher_var. teacher_var_name(str): The name of teacher_var.
student_var_name(str): The name of student_var. student_var_name(str): The name of student_var.
program(Program): The input distiller program. program(Program): The input distiller program. If not specified,
default: fluid.default_main_program() the default program will be used. Default: None
Return(Variable): l2 distiller loss.
Returns:
Variable: l2 distiller loss.
""" """
if program==None:
program=fluid.default_main_program()
student_var = program.global_block().var(student_var_name) student_var = program.global_block().var(student_var_name)
teacher_var = program.global_block().var(teacher_var_name) teacher_var = program.global_block().var(teacher_var_name)
l2_loss = fluid.layers.reduce_mean( l2_loss = fluid.layers.reduce_mean(
...@@ -137,22 +155,26 @@ def l2_loss(teacher_var_name, ...@@ -137,22 +155,26 @@ def l2_loss(teacher_var_name,
def soft_label_loss(teacher_var_name, def soft_label_loss(teacher_var_name,
student_var_name, student_var_name,
program=fluid.default_main_program(), program=None,
teacher_temperature=1., teacher_temperature=1.,
student_temperature=1.): student_temperature=1.):
""" """Combine variables from student model and teacher model by soft-label-loss.
Combine variables from student model and teacher model by soft-label-loss.
Args: Args:
teacher_var_name(str): The name of teacher_var. teacher_var_name(str): The name of teacher_var.
student_var_name(str): The name of student_var. student_var_name(str): The name of student_var.
program(Program): The input distiller program. program(Program): The input distiller program. If not specified,
default: fluid.default_main_program() the default program will be used. Default: None
teacher_temperature(float): Temperature used to divide teacher_temperature(float): Temperature used to divide
teacher_feature_map before softmax. default: 1.0 teacher_feature_map before softmax. Default: 1.0
student_temperature(float): Temperature used to divide student_temperature(float): Temperature used to divide
student_feature_map before softmax. default: 1.0 student_feature_map before softmax. Default: 1.0
Return(Variable): l2 distiller loss.
Returns:
Variable: l2 distiller loss.
""" """
if program==None:
program=fluid.default_main_program()
student_var = program.global_block().var(student_var_name) student_var = program.global_block().var(student_var_name)
teacher_var = program.global_block().var(teacher_var_name) teacher_var = program.global_block().var(teacher_var_name)
student_var = fluid.layers.softmax(student_var / student_temperature) student_var = fluid.layers.softmax(student_var / student_temperature)
...@@ -164,15 +186,19 @@ def soft_label_loss(teacher_var_name, ...@@ -164,15 +186,19 @@ def soft_label_loss(teacher_var_name,
return soft_label_loss return soft_label_loss
def loss(loss_func, program=fluid.default_main_program(), **kwargs): def loss(loss_func, program=None, **kwargs):
""" """Combine variables from student model and teacher model by self defined loss.
Combine variables from student model and teacher model by self defined loss.
Args: Args:
program(Program): The input distiller program. program(Program): The input distiller program. If not specified,
default: fluid.default_main_program() the default program will be used. Default: None
loss_func(function): The user self defined loss function. loss_func(function): The user self defined loss function.
Return(Variable): self defined distiller loss.
Returns:
Variable: self defined distiller loss.
""" """
if program==None:
program=fluid.default_main_program()
func_parameters = {} func_parameters = {}
for item in kwargs.items(): for item in kwargs.items():
if isinstance(item[1], str): if isinstance(item[1], str):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册