From 8bf2df5b9e3ea9690764e0ae6f57b2772dce2246 Mon Sep 17 00:00:00 2001 From: Zhou Wei <1183042833@qq.com> Date: Wed, 1 Feb 2023 11:34:59 +0800 Subject: [PATCH] change loss.numpy()[0] to float(loss) to adapt 0D (#1640) --- demo/one_shot/ofa_train.py | 11 +++++---- example/quantization_analysis/GPT/analysis.py | 23 ++++++++++--------- tests/test_ofa.py | 22 ++++++++++-------- 3 files changed, 31 insertions(+), 25 deletions(-) diff --git a/demo/one_shot/ofa_train.py b/demo/one_shot/ofa_train.py index ee3d3b55..97c70d93 100644 --- a/demo/one_shot/ofa_train.py +++ b/demo/one_shot/ofa_train.py @@ -34,7 +34,9 @@ class Model(nn.Layer): models += [ReLU()] models += [nn.Pool2D(2, 'max', 2)] models += [ - nn.Linear(784, 120), nn.Linear(120, 84), nn.Linear(84, 10) + nn.Linear(784, 120), + nn.Linear(120, 84), + nn.Linear(84, 10) ] models = ofa_super.convert(models) self.models = paddle.nn.Sequential(*models) @@ -104,8 +106,9 @@ def test_ofa(): dy_x_data = np.array( [x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) + y_data = np.array([x[1] + for x in data]).astype('int64').reshape( + -1, 1) img = paddle.to_tensor(dy_x_data) label = paddle.to_tensor(y_data) @@ -122,7 +125,7 @@ def test_ofa(): print( 'epoch: {}, batch: {}, loss: {}, distill loss: {}'. format(epoch_id, batch_id, - loss.numpy()[0], dis_loss.numpy()[0])) + float(loss), float(dis_loss))) ### accumurate dynamic_batch_size network of gradients for same batch of data ### NOTE: need to fix gradients accumulate in PaddlePaddle adam.minimize(loss) diff --git a/example/quantization_analysis/GPT/analysis.py b/example/quantization_analysis/GPT/analysis.py index bddf60e1..4f4fa390 100644 --- a/example/quantization_analysis/GPT/analysis.py +++ b/example/quantization_analysis/GPT/analysis.py @@ -67,10 +67,11 @@ def eval_function(exe, program, feed_names, fetch_list): eval_losses = [] total_score = 0 for eval_step, (data, labels, loss_mask, info) in enumerate(eval_loader()): - preds = exe.run(program=program, - feed=data, - fetch_list=fetch_list, - return_numpy=False) + preds = exe.run( + program=program, + feed=data, + fetch_list=fetch_list, + return_numpy=False) paddle.disable_static() @@ -88,7 +89,7 @@ def eval_function(exe, program, feed_names, fetch_list): masked_lm_loss = paddle.nn.functional.cross_entropy( preds, labels, reduction="none") loss = paddle.sum(masked_lm_loss * loss_mask) - eval_losses.append(loss.numpy()[0]) + eval_losses.append(float(loss)) total_score += loss.numpy() / (num_tokenized_tokens - 1) else: @@ -100,8 +101,8 @@ def eval_function(exe, program, feed_names, fetch_list): acc = paddle.where( paddle.cast(loss_mask, 'bool'), acc, paddle.ones_like(acc)) acc = paddle.sum(paddle.prod(acc, -1)) - eval_losses.append(acc.numpy()[0]) - total_score += acc.numpy()[0] + eval_losses.append(float(acc)) + total_score += float(acc) if eval_step != 0 and (eval_step % 10 == 0): print("[eval] step: %d, %s: %.9f, speed: %.2f step/s" % @@ -116,8 +117,8 @@ def eval_function(exe, program, feed_names, fetch_list): ppl = math.exp(min(20, total_loss)) token_ratio = (num_tokenized_tokens - 1) / (num_original_tokens - 1) adjusted_ppl = math.exp(min(20, total_loss * token_ratio)) - string = ' validation results on {} | '.format(gpt_config['Data'][ - 'Eval']['dataset']['name']) + string = ' validation results on {} | '.format( + gpt_config['Data']['Eval']['dataset']['name']) string += 'avg loss: {:.4E} | '.format(total_loss) string += 'ppl: {:.4E} | '.format(ppl) string += 'adjusted ppl: {:.4E} | '.format(adjusted_ppl) @@ -126,8 +127,8 @@ def eval_function(exe, program, feed_names, fetch_list): else: num_correct = float(total_score) acc = float(num_correct / num_examples) - string = ' validation results on {} | '.format(gpt_config['Data'][ - 'Eval']['dataset']['name']) + string = ' validation results on {} | '.format( + gpt_config['Data']['Eval']['dataset']['name']) string += 'number correct: {:.4E} | '.format(num_correct) string += 'total examples: {:.4E} | '.format(num_examples) string += 'avg accuracy: {:.4E}'.format(acc) diff --git a/tests/test_ofa.py b/tests/test_ofa.py index 395e665b..f5bbcdc3 100644 --- a/tests/test_ofa.py +++ b/tests/test_ofa.py @@ -299,10 +299,11 @@ class TestOFA(unittest.TestCase): self.elastic_order = ['kernel_size', 'width', 'depth'] def test_ofa(self): - ofa_model = OFA(self.model, - self.run_config, - distill_config=self.distill_config, - elastic_order=self.elastic_order) + ofa_model = OFA( + self.model, + self.run_config, + distill_config=self.distill_config, + elastic_order=self.elastic_order) start_epoch = 0 for idx in range(len(self.run_config.n_epochs)): @@ -316,8 +317,8 @@ class TestOFA(unittest.TestCase): self.run_config.n_epochs[idx][ph_idx]): if epoch_id == 0: ofa_model.set_epoch(epoch_id) - for model_no in range(self.run_config.dynamic_batch_size[ - idx]): + for model_no in range( + self.run_config.dynamic_batch_size[idx]): output = ofa_model(self.data) if (isinstance(output, tuple)): output = output[0] @@ -325,11 +326,11 @@ class TestOFA(unittest.TestCase): if self.distill_config.mapping_layers != None: dis_loss = ofa_model.calc_distill_loss() loss += dis_loss - dis_loss = dis_loss.numpy()[0] + dis_loss = float(dis_loss) else: dis_loss = 0 print('epoch: {}, loss: {}, distill loss: {}'.format( - epoch_id, loss.numpy()[0], dis_loss)) + epoch_id, float(loss), dis_loss)) loss.backward() adam.minimize(loss) adam.clear_gradients() @@ -536,8 +537,9 @@ class TestManualSetting(unittest.TestCase): self.ofa_model2 = OFA(self.model, run_config=run_config) self.ofa_model2._clear_search_space(self.data) #print(self.ofa_model2._ofa_layers) - assert self.ofa_model2._ofa_layers['models.1'][ - 'expand_ratio'] == [0.25, 1.0] + assert self.ofa_model2._ofa_layers['models.1']['expand_ratio'] == [ + 0.25, 1.0 + ] assert len(self.ofa_model2._ofa_layers) == 2 #print(self.ofa_model_1._ofa_layers) -- GitLab