change loss.numpy()[0] to float(loss) to adapt 0D (#1640)

8bf2df5b · zhouweiwei2014 · GitHub · 92874cc0 · 8bf2df5b · 8bf2df5b
Showing with 31 addition and 25 deletion

demo/one_shot/ofa_train.py demo/one_shot/ofa_train.py +7 -4

example/quantization_analysis/GPT/analysis.py example/quantization_analysis/GPT/analysis.py +12 -11

tests/test_ofa.py tests/test_ofa.py +12 -10

未找到文件。
--- a/demo/one_shot/ofa_train.py
+++ b/demo/one_shot/ofa_train.py
@@ -34,7 +34,9 @@ class Model(nn.Layer):
            models += [ReLU()]
            models += [nn.Pool2D(2, 'max', 2)]
            models += [
-                nn.Linear(784, 120), nn.Linear(120, 84), nn.Linear(84, 10)
+                nn.Linear(784, 120),
+                nn.Linear(120, 84),
+                nn.Linear(84, 10)
            ]
            models = ofa_super.convert(models)
        self.models = paddle.nn.Sequential(*models)
@@ -104,8 +106,9 @@ def test_ofa():
                    dy_x_data = np.array(
                        [x[0].reshape(1, 28, 28)
                         for x in data]).astype('float32')
-                    y_data = np.array(
+                    y_data = np.array([x[1]
-                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
+                                       for x in data]).astype('int64').reshape(
+                                           -1, 1)
                    img = paddle.to_tensor(dy_x_data)
                    label = paddle.to_tensor(y_data)
@@ -122,7 +125,7 @@ def test_ofa():
                            print(
                                'epoch: {}, batch: {}, loss: {}, distill loss: {}'.
                                format(epoch_id, batch_id,
-                                       loss.numpy()[0], dis_loss.numpy()[0]))
+                                       float(loss), float(dis_loss)))
                    ### accumurate dynamic_batch_size network of gradients for same batch of data
                    ### NOTE: need to fix gradients accumulate in PaddlePaddle
                    adam.minimize(loss)

--- a/example/quantization_analysis/GPT/analysis.py
+++ b/example/quantization_analysis/GPT/analysis.py
@@ -67,7 +67,8 @@ def eval_function(exe, program, feed_names, fetch_list):
    eval_losses = []
    total_score = 0
    for eval_step, (data, labels, loss_mask, info) in enumerate(eval_loader()):
-        preds = exe.run(program=program,
+        preds = exe.run(
+            program=program,
            feed=data,
            fetch_list=fetch_list,
            return_numpy=False)
@@ -88,7 +89,7 @@ def eval_function(exe, program, feed_names, fetch_list):
            masked_lm_loss = paddle.nn.functional.cross_entropy(
                preds, labels, reduction="none")
            loss = paddle.sum(masked_lm_loss * loss_mask)
-            eval_losses.append(loss.numpy()[0])
+            eval_losses.append(float(loss))
            total_score += loss.numpy() / (num_tokenized_tokens - 1)
        else:
@@ -100,8 +101,8 @@ def eval_function(exe, program, feed_names, fetch_list):
            acc = paddle.where(
                paddle.cast(loss_mask, 'bool'), acc, paddle.ones_like(acc))
            acc = paddle.sum(paddle.prod(acc, -1))
-            eval_losses.append(acc.numpy()[0])
+            eval_losses.append(float(acc))
-            total_score += acc.numpy()[0]
+            total_score += float(acc)
        if eval_step != 0 and (eval_step % 10 == 0):
            print("[eval] step: %d, %s: %.9f, speed: %.2f step/s" %
@@ -116,8 +117,8 @@ def eval_function(exe, program, feed_names, fetch_list):
        ppl = math.exp(min(20, total_loss))
        token_ratio = (num_tokenized_tokens - 1) / (num_original_tokens - 1)
        adjusted_ppl = math.exp(min(20, total_loss * token_ratio))
-        string = ' validation results on {} | '.format(gpt_config['Data'][
+        string = ' validation results on {} | '.format(
-            'Eval']['dataset']['name'])
+            gpt_config['Data']['Eval']['dataset']['name'])
        string += 'avg loss: {:.4E} | '.format(total_loss)
        string += 'ppl: {:.4E} | '.format(ppl)
        string += 'adjusted ppl: {:.4E} | '.format(adjusted_ppl)
@@ -126,8 +127,8 @@ def eval_function(exe, program, feed_names, fetch_list):
    else:
        num_correct = float(total_score)
        acc = float(num_correct / num_examples)
-        string = ' validation results on {} | '.format(gpt_config['Data'][
+        string = ' validation results on {} | '.format(
-            'Eval']['dataset']['name'])
+            gpt_config['Data']['Eval']['dataset']['name'])
        string += 'number correct: {:.4E} | '.format(num_correct)
        string += 'total examples: {:.4E} | '.format(num_examples)
        string += 'avg accuracy: {:.4E}'.format(acc)

--- a/tests/test_ofa.py
+++ b/tests/test_ofa.py
@@ -299,7 +299,8 @@ class TestOFA(unittest.TestCase):
        self.elastic_order = ['kernel_size', 'width', 'depth']
    def test_ofa(self):
-        ofa_model = OFA(self.model,
+        ofa_model = OFA(
+            self.model,
            self.run_config,
            distill_config=self.distill_config,
            elastic_order=self.elastic_order)
@@ -316,8 +317,8 @@ class TestOFA(unittest.TestCase):
                                      self.run_config.n_epochs[idx][ph_idx]):
                    if epoch_id == 0:
                        ofa_model.set_epoch(epoch_id)
-                    for model_no in range(self.run_config.dynamic_batch_size[
+                    for model_no in range(
-                            idx]):
+                            self.run_config.dynamic_batch_size[idx]):
                        output = ofa_model(self.data)
                        if (isinstance(output, tuple)):
                            output = output[0]
@@ -325,11 +326,11 @@ class TestOFA(unittest.TestCase):
                        if self.distill_config.mapping_layers != None:
                            dis_loss = ofa_model.calc_distill_loss()
                            loss += dis_loss
-                            dis_loss = dis_loss.numpy()[0]
+                            dis_loss = float(dis_loss)
                        else:
                            dis_loss = 0
                        print('epoch: {}, loss: {}, distill loss: {}'.format(
-                            epoch_id, loss.numpy()[0], dis_loss))
+                            epoch_id, float(loss), dis_loss))
                        loss.backward()
                        adam.minimize(loss)
                        adam.clear_gradients()
@@ -536,8 +537,9 @@ class TestManualSetting(unittest.TestCase):
        self.ofa_model2 = OFA(self.model, run_config=run_config)
        self.ofa_model2._clear_search_space(self.data)
        #print(self.ofa_model2._ofa_layers)
-        assert self.ofa_model2._ofa_layers['models.1'][
+        assert self.ofa_model2._ofa_layers['models.1']['expand_ratio'] == [
-            'expand_ratio'] == [0.25, 1.0]
+            0.25, 1.0
+        ]
        assert len(self.ofa_model2._ofa_layers) == 2
        #print(self.ofa_model_1._ofa_layers)