未验证 提交 8bf2df5b 编写于 作者: zhouweiwei2014's avatar zhouweiwei2014 提交者: GitHub

change loss.numpy()[0] to float(loss) to adapt 0D (#1640)

上级 92874cc0
...@@ -34,7 +34,9 @@ class Model(nn.Layer): ...@@ -34,7 +34,9 @@ class Model(nn.Layer):
models += [ReLU()] models += [ReLU()]
models += [nn.Pool2D(2, 'max', 2)] models += [nn.Pool2D(2, 'max', 2)]
models += [ models += [
nn.Linear(784, 120), nn.Linear(120, 84), nn.Linear(84, 10) nn.Linear(784, 120),
nn.Linear(120, 84),
nn.Linear(84, 10)
] ]
models = ofa_super.convert(models) models = ofa_super.convert(models)
self.models = paddle.nn.Sequential(*models) self.models = paddle.nn.Sequential(*models)
...@@ -104,8 +106,9 @@ def test_ofa(): ...@@ -104,8 +106,9 @@ def test_ofa():
dy_x_data = np.array( dy_x_data = np.array(
[x[0].reshape(1, 28, 28) [x[0].reshape(1, 28, 28)
for x in data]).astype('float32') for x in data]).astype('float32')
y_data = np.array( y_data = np.array([x[1]
[x[1] for x in data]).astype('int64').reshape(-1, 1) for x in data]).astype('int64').reshape(
-1, 1)
img = paddle.to_tensor(dy_x_data) img = paddle.to_tensor(dy_x_data)
label = paddle.to_tensor(y_data) label = paddle.to_tensor(y_data)
...@@ -122,7 +125,7 @@ def test_ofa(): ...@@ -122,7 +125,7 @@ def test_ofa():
print( print(
'epoch: {}, batch: {}, loss: {}, distill loss: {}'. 'epoch: {}, batch: {}, loss: {}, distill loss: {}'.
format(epoch_id, batch_id, format(epoch_id, batch_id,
loss.numpy()[0], dis_loss.numpy()[0])) float(loss), float(dis_loss)))
### accumurate dynamic_batch_size network of gradients for same batch of data ### accumurate dynamic_batch_size network of gradients for same batch of data
### NOTE: need to fix gradients accumulate in PaddlePaddle ### NOTE: need to fix gradients accumulate in PaddlePaddle
adam.minimize(loss) adam.minimize(loss)
......
...@@ -67,10 +67,11 @@ def eval_function(exe, program, feed_names, fetch_list): ...@@ -67,10 +67,11 @@ def eval_function(exe, program, feed_names, fetch_list):
eval_losses = [] eval_losses = []
total_score = 0 total_score = 0
for eval_step, (data, labels, loss_mask, info) in enumerate(eval_loader()): for eval_step, (data, labels, loss_mask, info) in enumerate(eval_loader()):
preds = exe.run(program=program, preds = exe.run(
feed=data, program=program,
fetch_list=fetch_list, feed=data,
return_numpy=False) fetch_list=fetch_list,
return_numpy=False)
paddle.disable_static() paddle.disable_static()
...@@ -88,7 +89,7 @@ def eval_function(exe, program, feed_names, fetch_list): ...@@ -88,7 +89,7 @@ def eval_function(exe, program, feed_names, fetch_list):
masked_lm_loss = paddle.nn.functional.cross_entropy( masked_lm_loss = paddle.nn.functional.cross_entropy(
preds, labels, reduction="none") preds, labels, reduction="none")
loss = paddle.sum(masked_lm_loss * loss_mask) loss = paddle.sum(masked_lm_loss * loss_mask)
eval_losses.append(loss.numpy()[0]) eval_losses.append(float(loss))
total_score += loss.numpy() / (num_tokenized_tokens - 1) total_score += loss.numpy() / (num_tokenized_tokens - 1)
else: else:
...@@ -100,8 +101,8 @@ def eval_function(exe, program, feed_names, fetch_list): ...@@ -100,8 +101,8 @@ def eval_function(exe, program, feed_names, fetch_list):
acc = paddle.where( acc = paddle.where(
paddle.cast(loss_mask, 'bool'), acc, paddle.ones_like(acc)) paddle.cast(loss_mask, 'bool'), acc, paddle.ones_like(acc))
acc = paddle.sum(paddle.prod(acc, -1)) acc = paddle.sum(paddle.prod(acc, -1))
eval_losses.append(acc.numpy()[0]) eval_losses.append(float(acc))
total_score += acc.numpy()[0] total_score += float(acc)
if eval_step != 0 and (eval_step % 10 == 0): if eval_step != 0 and (eval_step % 10 == 0):
print("[eval] step: %d, %s: %.9f, speed: %.2f step/s" % print("[eval] step: %d, %s: %.9f, speed: %.2f step/s" %
...@@ -116,8 +117,8 @@ def eval_function(exe, program, feed_names, fetch_list): ...@@ -116,8 +117,8 @@ def eval_function(exe, program, feed_names, fetch_list):
ppl = math.exp(min(20, total_loss)) ppl = math.exp(min(20, total_loss))
token_ratio = (num_tokenized_tokens - 1) / (num_original_tokens - 1) token_ratio = (num_tokenized_tokens - 1) / (num_original_tokens - 1)
adjusted_ppl = math.exp(min(20, total_loss * token_ratio)) adjusted_ppl = math.exp(min(20, total_loss * token_ratio))
string = ' validation results on {} | '.format(gpt_config['Data'][ string = ' validation results on {} | '.format(
'Eval']['dataset']['name']) gpt_config['Data']['Eval']['dataset']['name'])
string += 'avg loss: {:.4E} | '.format(total_loss) string += 'avg loss: {:.4E} | '.format(total_loss)
string += 'ppl: {:.4E} | '.format(ppl) string += 'ppl: {:.4E} | '.format(ppl)
string += 'adjusted ppl: {:.4E} | '.format(adjusted_ppl) string += 'adjusted ppl: {:.4E} | '.format(adjusted_ppl)
...@@ -126,8 +127,8 @@ def eval_function(exe, program, feed_names, fetch_list): ...@@ -126,8 +127,8 @@ def eval_function(exe, program, feed_names, fetch_list):
else: else:
num_correct = float(total_score) num_correct = float(total_score)
acc = float(num_correct / num_examples) acc = float(num_correct / num_examples)
string = ' validation results on {} | '.format(gpt_config['Data'][ string = ' validation results on {} | '.format(
'Eval']['dataset']['name']) gpt_config['Data']['Eval']['dataset']['name'])
string += 'number correct: {:.4E} | '.format(num_correct) string += 'number correct: {:.4E} | '.format(num_correct)
string += 'total examples: {:.4E} | '.format(num_examples) string += 'total examples: {:.4E} | '.format(num_examples)
string += 'avg accuracy: {:.4E}'.format(acc) string += 'avg accuracy: {:.4E}'.format(acc)
......
...@@ -299,10 +299,11 @@ class TestOFA(unittest.TestCase): ...@@ -299,10 +299,11 @@ class TestOFA(unittest.TestCase):
self.elastic_order = ['kernel_size', 'width', 'depth'] self.elastic_order = ['kernel_size', 'width', 'depth']
def test_ofa(self): def test_ofa(self):
ofa_model = OFA(self.model, ofa_model = OFA(
self.run_config, self.model,
distill_config=self.distill_config, self.run_config,
elastic_order=self.elastic_order) distill_config=self.distill_config,
elastic_order=self.elastic_order)
start_epoch = 0 start_epoch = 0
for idx in range(len(self.run_config.n_epochs)): for idx in range(len(self.run_config.n_epochs)):
...@@ -316,8 +317,8 @@ class TestOFA(unittest.TestCase): ...@@ -316,8 +317,8 @@ class TestOFA(unittest.TestCase):
self.run_config.n_epochs[idx][ph_idx]): self.run_config.n_epochs[idx][ph_idx]):
if epoch_id == 0: if epoch_id == 0:
ofa_model.set_epoch(epoch_id) ofa_model.set_epoch(epoch_id)
for model_no in range(self.run_config.dynamic_batch_size[ for model_no in range(
idx]): self.run_config.dynamic_batch_size[idx]):
output = ofa_model(self.data) output = ofa_model(self.data)
if (isinstance(output, tuple)): if (isinstance(output, tuple)):
output = output[0] output = output[0]
...@@ -325,11 +326,11 @@ class TestOFA(unittest.TestCase): ...@@ -325,11 +326,11 @@ class TestOFA(unittest.TestCase):
if self.distill_config.mapping_layers != None: if self.distill_config.mapping_layers != None:
dis_loss = ofa_model.calc_distill_loss() dis_loss = ofa_model.calc_distill_loss()
loss += dis_loss loss += dis_loss
dis_loss = dis_loss.numpy()[0] dis_loss = float(dis_loss)
else: else:
dis_loss = 0 dis_loss = 0
print('epoch: {}, loss: {}, distill loss: {}'.format( print('epoch: {}, loss: {}, distill loss: {}'.format(
epoch_id, loss.numpy()[0], dis_loss)) epoch_id, float(loss), dis_loss))
loss.backward() loss.backward()
adam.minimize(loss) adam.minimize(loss)
adam.clear_gradients() adam.clear_gradients()
...@@ -536,8 +537,9 @@ class TestManualSetting(unittest.TestCase): ...@@ -536,8 +537,9 @@ class TestManualSetting(unittest.TestCase):
self.ofa_model2 = OFA(self.model, run_config=run_config) self.ofa_model2 = OFA(self.model, run_config=run_config)
self.ofa_model2._clear_search_space(self.data) self.ofa_model2._clear_search_space(self.data)
#print(self.ofa_model2._ofa_layers) #print(self.ofa_model2._ofa_layers)
assert self.ofa_model2._ofa_layers['models.1'][ assert self.ofa_model2._ofa_layers['models.1']['expand_ratio'] == [
'expand_ratio'] == [0.25, 1.0] 0.25, 1.0
]
assert len(self.ofa_model2._ofa_layers) == 2 assert len(self.ofa_model2._ofa_layers) == 2
#print(self.ofa_model_1._ofa_layers) #print(self.ofa_model_1._ofa_layers)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册