未验证 提交 8bf2df5b 编写于 作者: zhouweiwei2014's avatar zhouweiwei2014 提交者: GitHub

change loss.numpy()[0] to float(loss) to adapt 0D (#1640)

上级 92874cc0
......@@ -34,7 +34,9 @@ class Model(nn.Layer):
models += [ReLU()]
models += [nn.Pool2D(2, 'max', 2)]
models += [
nn.Linear(784, 120), nn.Linear(120, 84), nn.Linear(84, 10)
nn.Linear(784, 120),
nn.Linear(120, 84),
nn.Linear(84, 10)
]
models = ofa_super.convert(models)
self.models = paddle.nn.Sequential(*models)
......@@ -104,8 +106,9 @@ def test_ofa():
dy_x_data = np.array(
[x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(-1, 1)
y_data = np.array([x[1]
for x in data]).astype('int64').reshape(
-1, 1)
img = paddle.to_tensor(dy_x_data)
label = paddle.to_tensor(y_data)
......@@ -122,7 +125,7 @@ def test_ofa():
print(
'epoch: {}, batch: {}, loss: {}, distill loss: {}'.
format(epoch_id, batch_id,
loss.numpy()[0], dis_loss.numpy()[0]))
float(loss), float(dis_loss)))
### accumurate dynamic_batch_size network of gradients for same batch of data
### NOTE: need to fix gradients accumulate in PaddlePaddle
adam.minimize(loss)
......
......@@ -67,10 +67,11 @@ def eval_function(exe, program, feed_names, fetch_list):
eval_losses = []
total_score = 0
for eval_step, (data, labels, loss_mask, info) in enumerate(eval_loader()):
preds = exe.run(program=program,
feed=data,
fetch_list=fetch_list,
return_numpy=False)
preds = exe.run(
program=program,
feed=data,
fetch_list=fetch_list,
return_numpy=False)
paddle.disable_static()
......@@ -88,7 +89,7 @@ def eval_function(exe, program, feed_names, fetch_list):
masked_lm_loss = paddle.nn.functional.cross_entropy(
preds, labels, reduction="none")
loss = paddle.sum(masked_lm_loss * loss_mask)
eval_losses.append(loss.numpy()[0])
eval_losses.append(float(loss))
total_score += loss.numpy() / (num_tokenized_tokens - 1)
else:
......@@ -100,8 +101,8 @@ def eval_function(exe, program, feed_names, fetch_list):
acc = paddle.where(
paddle.cast(loss_mask, 'bool'), acc, paddle.ones_like(acc))
acc = paddle.sum(paddle.prod(acc, -1))
eval_losses.append(acc.numpy()[0])
total_score += acc.numpy()[0]
eval_losses.append(float(acc))
total_score += float(acc)
if eval_step != 0 and (eval_step % 10 == 0):
print("[eval] step: %d, %s: %.9f, speed: %.2f step/s" %
......@@ -116,8 +117,8 @@ def eval_function(exe, program, feed_names, fetch_list):
ppl = math.exp(min(20, total_loss))
token_ratio = (num_tokenized_tokens - 1) / (num_original_tokens - 1)
adjusted_ppl = math.exp(min(20, total_loss * token_ratio))
string = ' validation results on {} | '.format(gpt_config['Data'][
'Eval']['dataset']['name'])
string = ' validation results on {} | '.format(
gpt_config['Data']['Eval']['dataset']['name'])
string += 'avg loss: {:.4E} | '.format(total_loss)
string += 'ppl: {:.4E} | '.format(ppl)
string += 'adjusted ppl: {:.4E} | '.format(adjusted_ppl)
......@@ -126,8 +127,8 @@ def eval_function(exe, program, feed_names, fetch_list):
else:
num_correct = float(total_score)
acc = float(num_correct / num_examples)
string = ' validation results on {} | '.format(gpt_config['Data'][
'Eval']['dataset']['name'])
string = ' validation results on {} | '.format(
gpt_config['Data']['Eval']['dataset']['name'])
string += 'number correct: {:.4E} | '.format(num_correct)
string += 'total examples: {:.4E} | '.format(num_examples)
string += 'avg accuracy: {:.4E}'.format(acc)
......
......@@ -299,10 +299,11 @@ class TestOFA(unittest.TestCase):
self.elastic_order = ['kernel_size', 'width', 'depth']
def test_ofa(self):
ofa_model = OFA(self.model,
self.run_config,
distill_config=self.distill_config,
elastic_order=self.elastic_order)
ofa_model = OFA(
self.model,
self.run_config,
distill_config=self.distill_config,
elastic_order=self.elastic_order)
start_epoch = 0
for idx in range(len(self.run_config.n_epochs)):
......@@ -316,8 +317,8 @@ class TestOFA(unittest.TestCase):
self.run_config.n_epochs[idx][ph_idx]):
if epoch_id == 0:
ofa_model.set_epoch(epoch_id)
for model_no in range(self.run_config.dynamic_batch_size[
idx]):
for model_no in range(
self.run_config.dynamic_batch_size[idx]):
output = ofa_model(self.data)
if (isinstance(output, tuple)):
output = output[0]
......@@ -325,11 +326,11 @@ class TestOFA(unittest.TestCase):
if self.distill_config.mapping_layers != None:
dis_loss = ofa_model.calc_distill_loss()
loss += dis_loss
dis_loss = dis_loss.numpy()[0]
dis_loss = float(dis_loss)
else:
dis_loss = 0
print('epoch: {}, loss: {}, distill loss: {}'.format(
epoch_id, loss.numpy()[0], dis_loss))
epoch_id, float(loss), dis_loss))
loss.backward()
adam.minimize(loss)
adam.clear_gradients()
......@@ -536,8 +537,9 @@ class TestManualSetting(unittest.TestCase):
self.ofa_model2 = OFA(self.model, run_config=run_config)
self.ofa_model2._clear_search_space(self.data)
#print(self.ofa_model2._ofa_layers)
assert self.ofa_model2._ofa_layers['models.1'][
'expand_ratio'] == [0.25, 1.0]
assert self.ofa_model2._ofa_layers['models.1']['expand_ratio'] == [
0.25, 1.0
]
assert len(self.ofa_model2._ofa_layers) == 2
#print(self.ofa_model_1._ofa_layers)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册