未验证 提交 1f2827d1 编写于 作者: C ceci3 提交者: GitHub

fix bug (#631)

上级 4923deac
...@@ -27,7 +27,6 @@ from paddle.metric import Accuracy ...@@ -27,7 +27,6 @@ from paddle.metric import Accuracy
from paddlenlp.data import Stack, Tuple, Pad from paddlenlp.data import Stack, Tuple, Pad
from paddlenlp.transformers import BertModel, BertForSequenceClassification, BertTokenizer from paddlenlp.transformers import BertModel, BertForSequenceClassification, BertTokenizer
from paddlenlp.utils.log import logger
from paddlenlp.metrics import AccuracyAndF1, Mcc, PearsonAndSpearman from paddlenlp.metrics import AccuracyAndF1, Mcc, PearsonAndSpearman
import paddlenlp.datasets as datasets import paddlenlp.datasets as datasets
from paddleslim.nas.ofa import OFA, DistillConfig, utils from paddleslim.nas.ofa import OFA, DistillConfig, utils
...@@ -166,7 +165,8 @@ def set_seed(args): ...@@ -166,7 +165,8 @@ def set_seed(args):
paddle.seed(args.seed + paddle.distributed.get_rank()) paddle.seed(args.seed + paddle.distributed.get_rank())
def evaluate(model, criterion, metric, data_loader, width_mult=1.0): def evaluate(model, criterion, metric, data_loader, epoch, step,
width_mult=1.0):
with paddle.no_grad(): with paddle.no_grad():
model.eval() model.eval()
metric.reset() metric.reset()
...@@ -180,8 +180,9 @@ def evaluate(model, criterion, metric, data_loader, width_mult=1.0): ...@@ -180,8 +180,9 @@ def evaluate(model, criterion, metric, data_loader, width_mult=1.0):
metric.update(correct) metric.update(correct)
results = metric.accumulate() results = metric.accumulate()
print( print(
"width_mult: %f, eval loss: %f, %s: %s\n" % "epoch: %d, batch: %d, width_mult: %s, eval loss: %f, %s: %s\n" %
(width_mult, loss.numpy(), metric.name(), results), (epoch, step, 'teacher' if width_mult == 100 else str(width_mult),
loss.numpy(), metric.name(), results),
end='') end='')
model.train() model.train()
...@@ -485,7 +486,7 @@ def do_train(args): ...@@ -485,7 +486,7 @@ def do_train(args):
if global_step % args.logging_steps == 0: if global_step % args.logging_steps == 0:
if (not args.n_gpu > 1) or paddle.distributed.get_rank() == 0: if (not args.n_gpu > 1) or paddle.distributed.get_rank() == 0:
logger.info( print(
"global step %d, epoch: %d, batch: %d, loss: %f, speed: %.2f step/s" "global step %d, epoch: %d, batch: %d, loss: %f, speed: %.2f step/s"
% (global_step, epoch, step, loss, % (global_step, epoch, step, loss,
args.logging_steps / (time.time() - tic_train))) args.logging_steps / (time.time() - tic_train)))
...@@ -498,12 +499,16 @@ def do_train(args): ...@@ -498,12 +499,16 @@ def do_train(args):
criterion, criterion,
metric, metric,
dev_data_loader_matched, dev_data_loader_matched,
epoch,
step,
width_mult=100) width_mult=100)
evaluate( evaluate(
teacher_model, teacher_model,
criterion, criterion,
metric, metric,
dev_data_loader_mismatched, dev_data_loader_mismatched,
epoch,
step,
width_mult=100) width_mult=100)
else: else:
evaluate( evaluate(
...@@ -511,6 +516,8 @@ def do_train(args): ...@@ -511,6 +516,8 @@ def do_train(args):
criterion, criterion,
metric, metric,
dev_data_loader, dev_data_loader,
epoch,
step,
width_mult=100) width_mult=100)
for idx, width_mult in enumerate(args.width_mult_list): for idx, width_mult in enumerate(args.width_mult_list):
net_config = utils.dynabert_config(ofa_model, width_mult) net_config = utils.dynabert_config(ofa_model, width_mult)
...@@ -518,14 +525,16 @@ def do_train(args): ...@@ -518,14 +525,16 @@ def do_train(args):
tic_eval = time.time() tic_eval = time.time()
if args.task_name == "mnli": if args.task_name == "mnli":
acc = evaluate(ofa_model, criterion, metric, acc = evaluate(ofa_model, criterion, metric,
dev_data_loader_matched, width_mult) dev_data_loader_matched, epoch, step,
width_mult)
evaluate(ofa_model, criterion, metric, evaluate(ofa_model, criterion, metric,
dev_data_loader_mismatched, width_mult) dev_data_loader_mismatched, epoch, step,
width_mult)
print("eval done total : %s s" % print("eval done total : %s s" %
(time.time() - tic_eval)) (time.time() - tic_eval))
else: else:
acc = evaluate(ofa_model, criterion, metric, acc = evaluate(ofa_model, criterion, metric,
dev_data_loader, width_mult) dev_data_loader, epoch, step, width_mult)
print("eval done total : %s s" % print("eval done total : %s s" %
(time.time() - tic_eval)) (time.time() - tic_eval))
......
...@@ -64,13 +64,15 @@ class Convert: ...@@ -64,13 +64,15 @@ class Convert:
w_attr = layer._param_attr if pd_ver == 185 else layer._weight_attr w_attr = layer._param_attr if pd_ver == 185 else layer._weight_attr
if isinstance(w_attr, ParamAttr): if isinstance(w_attr, ParamAttr):
if w_attr != None and not isinstance(w_attr, bool): if w_attr != None and not isinstance(w_attr,
bool) and w_attr.name != None:
w_attr.name = 'super_' + w_attr.name w_attr.name = 'super_' + w_attr.name
if has_bias: if has_bias:
if isinstance(layer._bias_attr, ParamAttr): if isinstance(layer._bias_attr, ParamAttr):
if layer._bias_attr != None and not isinstance(layer._bias_attr, if layer._bias_attr != None and not isinstance(
bool): layer._bias_attr,
bool) and layer._bias_attr.name != None:
layer._bias_attr.name = 'super_' + layer._bias_attr.name layer._bias_attr.name = 'super_' + layer._bias_attr.name
def convert(self, network): def convert(self, network):
...@@ -429,6 +431,7 @@ class Convert: ...@@ -429,6 +431,7 @@ class Convert:
new_attr_name = ['act', 'dtype'] new_attr_name = ['act', 'dtype']
else: else:
new_attr_name = ['weight_attr', 'bias_attr'] new_attr_name = ['weight_attr', 'bias_attr']
self._change_name(layer, pd_ver)
in_nc, out_nc = layer._parameters['weight'].shape in_nc, out_nc = layer._parameters['weight'].shape
new_attr_dict = dict.fromkeys(new_attr_name, None) new_attr_dict = dict.fromkeys(new_attr_name, None)
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
import sys import sys
sys.path.append("../") sys.path.append("../")
import unittest import unittest
import paddle
import paddle.nn as nn
from paddle.vision.models import mobilenet_v1 from paddle.vision.models import mobilenet_v1
from paddleslim.nas.ofa.convert_super import Convert, supernet from paddleslim.nas.ofa.convert_super import Convert, supernet
...@@ -29,5 +31,28 @@ class TestConvertSuper(unittest.TestCase): ...@@ -29,5 +31,28 @@ class TestConvertSuper(unittest.TestCase):
assert len(sp_model.sublayers()) == 151 assert len(sp_model.sublayers()) == 151
class TestConvertSuper(unittest.TestCase):
def setUp(self):
class Model(nn.Layer):
def __init__(self):
super(Model, self).__init__()
self.fc = nn.Linear(
5,
10,
weight_attr=paddle.ParamAttr(
initializer=nn.initializer.XavierNormal()),
bias_attr=paddle.ParamAttr(
initializer=nn.initializer.Constant(value=0.0)))
def forward(self, inputs):
return self.fc(inputs)
self.model = Model()
def test_convert(self):
sp_net_config = supernet(expand_ratio=[1, 2, 4])
sp_model = Convert(sp_net_config).convert(self.model)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册