From 3c46b9ee5381149d30ac80604f326a6eed5ee389 Mon Sep 17 00:00:00 2001 From: lvmingfu <630944715@qq.com> Date: Fri, 31 Jul 2020 17:27:53 +0800 Subject: [PATCH] Unify code formats in notebook for r0.6 --- .../computer_vision_application.ipynb | 20 +- tutorials/notebook/nlp_application.ipynb | 7810 ++++++++--------- 2 files changed, 3910 insertions(+), 3920 deletions(-) diff --git a/tutorials/notebook/computer_vision_application.ipynb b/tutorials/notebook/computer_vision_application.ipynb index 1ea4af28..0716952d 100644 --- a/tutorials/notebook/computer_vision_application.ipynb +++ b/tutorials/notebook/computer_vision_application.ipynb @@ -432,7 +432,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 1/ 10], step: [ 1875/ 1875], loss: [1.2926], avg los: [1.2926], time: [105040.2632]\n", + "epoch: 1 step: 1875, loss is 1.2926\n", "Epoch time: 105040.679, per step time: 56.022, avg loss: 1.293\n", "************************************************************\n" ] @@ -452,7 +452,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 2/ 10], step: [ 1875/ 1875], loss: [0.8226], avg los: [0.8226], time: [58600.0702]\n", + "epoch: 2 step: 1875, loss is 0.8226\n", "Epoch time: 58600.447, per step time: 31.254, avg loss: 0.823\n", "************************************************************\n" ] @@ -472,7 +472,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 3/ 10], step: [ 1875/ 1875], loss: [0.6473], avg los: [0.6473], time: [58604.6124]\n", + "epoch: 3 step: 1875, loss is 0.6473\n", "Epoch time: 58604.997, per step time: 31.256, avg loss: 0.647\n", "************************************************************\n" ] @@ -492,7 +492,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 4/ 10], step: [ 1875/ 1875], loss: [0.3235], avg los: [0.3235], time: [58602.4463]\n", + "epoch: 4 step: 1875, loss is 0.3235\n", "Epoch time: 58602.803, per step time: 31.255, avg loss: 0.324\n", "************************************************************\n" ] @@ -512,7 +512,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 5/ 10], step: [ 1875/ 1875], loss: [0.4524], avg los: [0.4524], time: [58594.3551]\n", + "epoch: 5 step: 1875, loss is 0.4524\n", "Epoch time: 58594.759, per step time: 31.251, avg loss: 0.452\n", "************************************************************\n" ] @@ -532,7 +532,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 6/ 10], step: [ 1875/ 1875], loss: [0.5436], avg los: [0.5436], time: [58600.3022]\n", + "epoch: 6 step: 1875, loss is 0.5436\n", "Epoch time: 58600.685, per step time: 31.254, avg loss: 0.544\n", "************************************************************\n" ] @@ -552,7 +552,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 7/ 10], step: [ 1875/ 1875], loss: [0.3080], avg los: [0.3080], time: [58599.0782]\n", + "epoch: 7 step: 1875, loss is 0.3080\n", "Epoch time: 58599.470, per step time: 31.253, avg loss: 0.308\n", "************************************************************\n" ] @@ -572,7 +572,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 8/ 10], step: [ 1875/ 1875], loss: [0.4420], avg los: [0.4420], time: [58600.0016]\n", + "epoch: 8 step: 1875, loss is 0.4420\n", "Epoch time: 58600.389, per step time: 31.254, avg loss: 0.442\n", "************************************************************\n" ] @@ -592,7 +592,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 9/ 10], step: [ 1875/ 1875], loss: [0.2113], avg los: [0.2113], time: [58598.8488]\n", + "epoch: 9 step: 1875, loss is 0.2113\n", "Epoch time: 58599.249, per step time: 31.253, avg loss: 0.211\n", "************************************************************\n" ] @@ -612,7 +612,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 10/ 10], step: [ 1875/ 1875], loss: [0.3720], avg los: [0.3720], time: [58583.0929]\n", + "epoch: 10 step: 1875, loss is 0.3720\n", "Epoch time: 58583.483, per step time: 31.245, avg loss: 0.372\n", "************************************************************\n" ] diff --git a/tutorials/notebook/nlp_application.ipynb b/tutorials/notebook/nlp_application.ipynb index aed020a7..7d5926b9 100644 --- a/tutorials/notebook/nlp_application.ipynb +++ b/tutorials/notebook/nlp_application.ipynb @@ -844,4198 +844,4188 @@ "output_type": "stream", "text": [ "============== Starting Training ==============\n", - "Epoch: [ 1/ 10], step: [ 1/ 390], loss: [0.6938], avg loss: [0.6938], time: [445.6811ms]\n", - "Epoch: [ 1/ 10], step: [ 2/ 390], loss: [0.6922], avg loss: [0.6930], time: [106.1635ms]\n", - "Epoch: [ 1/ 10], step: [ 3/ 390], loss: [0.6917], avg loss: [0.6926], time: [103.0388ms]\n", - "Epoch: [ 1/ 10], step: [ 4/ 390], loss: [0.6952], avg loss: [0.6932], time: [102.2997ms]\n", - "Epoch: [ 1/ 10], step: [ 5/ 390], loss: [0.6868], avg loss: [0.6920], time: [102.2105ms]\n", - "Epoch: [ 1/ 10], step: [ 6/ 390], loss: [0.6982], avg loss: [0.6930], time: [67.6618ms]\n", - "Epoch: [ 1/ 10], step: [ 7/ 390], loss: [0.6856], avg loss: [0.6919], time: [99.7233ms]\n", - "Epoch: [ 1/ 10], step: [ 8/ 390], loss: [0.6819], avg loss: [0.6907], time: [102.4535ms]\n", - "Epoch: [ 1/ 10], step: [ 9/ 390], loss: [0.7372], avg loss: [0.6959], time: [99.7229ms]\n", - "Epoch: [ 1/ 10], step: [ 10/ 390], loss: [0.6948], avg loss: [0.6957], time: [101.9838ms]\n", - "Epoch: [ 1/ 10], step: [ 11/ 390], loss: [0.6961], avg loss: [0.6958], time: [98.3083ms]\n", - "Epoch: [ 1/ 10], step: [ 12/ 390], loss: [0.6975], avg loss: [0.6959], time: [102.0112ms]\n", - "Epoch: [ 1/ 10], step: [ 13/ 390], loss: [0.6931], avg loss: [0.6957], time: [99.0953ms]\n", - "Epoch: [ 1/ 10], step: [ 14/ 390], loss: [0.6903], avg loss: [0.6953], time: [103.9104ms]\n", - "Epoch: [ 1/ 10], step: [ 15/ 390], loss: [0.6720], avg loss: [0.6938], time: [98.5680ms]\n", - "Epoch: [ 1/ 10], step: [ 16/ 390], loss: [0.7079], avg loss: [0.6946], time: [104.0378ms]\n", - "Epoch: [ 1/ 10], step: [ 17/ 390], loss: [0.7125], avg loss: [0.6957], time: [102.7691ms]\n", - "Epoch: [ 1/ 10], step: [ 18/ 390], loss: [0.7477], avg loss: [0.6986], time: [101.6769ms]\n", - "Epoch: [ 1/ 10], step: [ 19/ 390], loss: [0.6924], avg loss: [0.6983], time: [102.2615ms]\n", - "Epoch: [ 1/ 10], step: [ 20/ 390], loss: [0.7085], avg loss: [0.6988], time: [103.7412ms]\n", - "Epoch: [ 1/ 10], step: [ 21/ 390], loss: [0.6958], avg loss: [0.6986], time: [99.9415ms]\n", - "Epoch: [ 1/ 10], step: [ 22/ 390], loss: [0.6918], avg loss: [0.6983], time: [104.0506ms]\n", - "Epoch: [ 1/ 10], step: [ 23/ 390], loss: [0.6985], avg loss: [0.6983], time: [99.7727ms]\n", - "Epoch: [ 1/ 10], step: [ 24/ 390], loss: [0.6919], avg loss: [0.6981], time: [103.3425ms]\n", - "Epoch: [ 1/ 10], step: [ 25/ 390], loss: [0.6858], avg loss: [0.6976], time: [100.5120ms]\n", - "Epoch: [ 1/ 10], step: [ 26/ 390], loss: [0.6796], avg loss: [0.6969], time: [103.8098ms]\n", - "Epoch: [ 1/ 10], step: [ 27/ 390], loss: [0.7113], avg loss: [0.6974], time: [100.4076ms]\n", - "Epoch: [ 1/ 10], step: [ 28/ 390], loss: [0.7065], avg loss: [0.6977], time: [105.4525ms]\n", - "Epoch: [ 1/ 10], step: [ 29/ 390], loss: [0.6910], avg loss: [0.6975], time: [101.6884ms]\n", - "Epoch: [ 1/ 10], step: [ 30/ 390], loss: [0.6896], avg loss: [0.6972], time: [104.4266ms]\n", - "Epoch: [ 1/ 10], step: [ 31/ 390], loss: [0.6968], avg loss: [0.6972], time: [98.8655ms]\n", - "Epoch: [ 1/ 10], step: [ 32/ 390], loss: [0.6906], avg loss: [0.6970], time: [104.9941ms]\n", - "Epoch: [ 1/ 10], step: [ 33/ 390], loss: [0.6932], avg loss: [0.6969], time: [99.1578ms]\n", - "Epoch: [ 1/ 10], step: [ 34/ 390], loss: [0.6872], avg loss: [0.6966], time: [101.4924ms]\n", - "Epoch: [ 1/ 10], step: [ 35/ 390], loss: [0.6887], avg loss: [0.6964], time: [100.5478ms]\n", - "Epoch: [ 1/ 10], step: [ 36/ 390], loss: [0.6789], avg loss: [0.6959], time: [102.1488ms]\n", - "Epoch: [ 1/ 10], step: [ 37/ 390], loss: [0.6729], avg loss: [0.6953], time: [99.9565ms]\n", - "Epoch: [ 1/ 10], step: [ 38/ 390], loss: [0.7344], avg loss: [0.6963], time: [102.1416ms]\n", - "Epoch: [ 1/ 10], step: [ 39/ 390], loss: [0.6946], avg loss: [0.6963], time: [101.9769ms]\n", - "Epoch: [ 1/ 10], step: [ 40/ 390], loss: [0.6977], avg loss: [0.6963], time: [103.1592ms]\n", - "Epoch: [ 1/ 10], step: [ 41/ 390], loss: [0.7134], avg loss: [0.6967], time: [97.4550ms]\n", - "Epoch: [ 1/ 10], step: [ 42/ 390], loss: [0.6807], avg loss: [0.6963], time: [101.9244ms]\n", - "Epoch: [ 1/ 10], step: [ 43/ 390], loss: [0.6798], avg loss: [0.6960], time: [98.6509ms]\n", - "Epoch: [ 1/ 10], step: [ 44/ 390], loss: [0.7065], avg loss: [0.6962], time: [100.4102ms]\n", - "Epoch: [ 1/ 10], step: [ 45/ 390], loss: [0.6930], avg loss: [0.6961], time: [99.2818ms]\n", - "Epoch: [ 1/ 10], step: [ 46/ 390], loss: [0.6925], avg loss: [0.6960], time: [97.1210ms]\n", - "Epoch: [ 1/ 10], step: [ 47/ 390], loss: [0.6824], avg loss: [0.6958], time: [98.3243ms]\n", - "Epoch: [ 1/ 10], step: [ 48/ 390], loss: [0.7224], avg loss: [0.6963], time: [99.4642ms]\n", - "Epoch: [ 1/ 10], step: [ 49/ 390], loss: [0.7051], avg loss: [0.6965], time: [95.3386ms]\n", - "Epoch: [ 1/ 10], step: [ 50/ 390], loss: [0.7195], avg loss: [0.6970], time: [101.0215ms]\n", - "Epoch: [ 1/ 10], step: [ 51/ 390], loss: [0.6927], avg loss: [0.6969], time: [96.9672ms]\n", - "Epoch: [ 1/ 10], step: [ 52/ 390], loss: [0.7097], avg loss: [0.6971], time: [97.8920ms]\n", - "Epoch: [ 1/ 10], step: [ 53/ 390], loss: [0.6849], avg loss: [0.6969], time: [96.2329ms]\n", - "Epoch: [ 1/ 10], step: [ 54/ 390], loss: [0.6892], avg loss: [0.6967], time: [103.4982ms]\n", - "Epoch: [ 1/ 10], step: [ 55/ 390], loss: [0.6926], avg loss: [0.6967], time: [95.6774ms]\n", - "Epoch: [ 1/ 10], step: [ 56/ 390], loss: [0.6934], avg loss: [0.6966], time: [100.6739ms]\n", - "Epoch: [ 1/ 10], step: [ 57/ 390], loss: [0.6891], avg loss: [0.6965], time: [97.0731ms]\n", - "Epoch: [ 1/ 10], step: [ 58/ 390], loss: [0.7068], avg loss: [0.6967], time: [99.1342ms]\n", - "Epoch: [ 1/ 10], step: [ 59/ 390], loss: [0.6920], avg loss: [0.6966], time: [96.6048ms]\n", - "Epoch: [ 1/ 10], step: [ 60/ 390], loss: [0.7120], avg loss: [0.6968], time: [106.0467ms]\n", - "Epoch: [ 1/ 10], step: [ 61/ 390], loss: [0.6930], avg loss: [0.6968], time: [98.2921ms]\n", - "Epoch: [ 1/ 10], step: [ 62/ 390], loss: [0.7112], avg loss: [0.6970], time: [99.8714ms]\n", - "Epoch: [ 1/ 10], step: [ 63/ 390], loss: [0.6845], avg loss: [0.6968], time: [99.9265ms]\n", - "Epoch: [ 1/ 10], step: [ 64/ 390], loss: [0.6958], avg loss: [0.6968], time: [101.6951ms]\n", - "Epoch: [ 1/ 10], step: [ 65/ 390], loss: [0.6909], avg loss: [0.6967], time: [95.9563ms]\n", - "Epoch: [ 1/ 10], step: [ 66/ 390], loss: [0.6876], avg loss: [0.6966], time: [102.0942ms]\n", - "Epoch: [ 1/ 10], step: [ 67/ 390], loss: [0.6800], avg loss: [0.6963], time: [97.1215ms]\n", - "Epoch: [ 1/ 10], step: [ 68/ 390], loss: [0.7101], avg loss: [0.6965], time: [102.3653ms]\n", - "Epoch: [ 1/ 10], step: [ 69/ 390], loss: [0.7078], avg loss: [0.6967], time: [97.5039ms]\n", - "Epoch: [ 1/ 10], step: [ 70/ 390], loss: [0.6890], avg loss: [0.6966], time: [103.4834ms]\n", - "Epoch: [ 1/ 10], step: [ 71/ 390], loss: [0.6859], avg loss: [0.6964], time: [98.1841ms]\n", - "Epoch: [ 1/ 10], step: [ 72/ 390], loss: [0.6913], avg loss: [0.6963], time: [98.9609ms]\n", - "Epoch: [ 1/ 10], step: [ 73/ 390], loss: [0.6935], avg loss: [0.6963], time: [98.4514ms]\n", - "Epoch: [ 1/ 10], step: [ 74/ 390], loss: [0.6905], avg loss: [0.6962], time: [100.3788ms]\n", - "Epoch: [ 1/ 10], step: [ 75/ 390], loss: [0.6936], avg loss: [0.6962], time: [99.1523ms]\n", - "Epoch: [ 1/ 10], step: [ 76/ 390], loss: [0.6901], avg loss: [0.6961], time: [98.4559ms]\n", - "Epoch: [ 1/ 10], step: [ 77/ 390], loss: [0.6826], avg loss: [0.6959], time: [96.8366ms]\n", - "Epoch: [ 1/ 10], step: [ 78/ 390], loss: [0.6930], avg loss: [0.6959], time: [101.1457ms]\n", - "Epoch: [ 1/ 10], step: [ 79/ 390], loss: [0.6936], avg loss: [0.6959], time: [98.6462ms]\n", - "Epoch: [ 1/ 10], step: [ 80/ 390], loss: [0.6921], avg loss: [0.6958], time: [104.7125ms]\n", - "Epoch: [ 1/ 10], step: [ 81/ 390], loss: [0.6839], avg loss: [0.6957], time: [95.8931ms]\n", - "Epoch: [ 1/ 10], step: [ 82/ 390], loss: [0.6910], avg loss: [0.6956], time: [102.4179ms]\n", - "Epoch: [ 1/ 10], step: [ 83/ 390], loss: [0.6954], avg loss: [0.6956], time: [96.1897ms]\n", - "Epoch: [ 1/ 10], step: [ 84/ 390], loss: [0.6838], avg loss: [0.6955], time: [101.9053ms]\n", - "Epoch: [ 1/ 10], step: [ 85/ 390], loss: [0.6928], avg loss: [0.6954], time: [96.2470ms]\n", - "Epoch: [ 1/ 10], step: [ 86/ 390], loss: [0.6931], avg loss: [0.6954], time: [100.2293ms]\n", - "Epoch: [ 1/ 10], step: [ 87/ 390], loss: [0.6784], avg loss: [0.6952], time: [99.4971ms]\n", - "Epoch: [ 1/ 10], step: [ 88/ 390], loss: [0.6821], avg loss: [0.6951], time: [101.0315ms]\n", - "Epoch: [ 1/ 10], step: [ 89/ 390], loss: [0.6899], avg loss: [0.6950], time: [96.1020ms]\n" + "epoch: 1 step: 1, loss is 0.6938\n", + "epoch: 1 step: 2, loss is 0.6922\n", + "epoch: 1 step: 3, loss is 0.6917\n", + "epoch: 1 step: 4, loss is 0.6952\n", + "epoch: 1 step: 5, loss is 0.6868\n", + "epoch: 1 step: 6, loss is 0.6982\n", + "epoch: 1 step: 7, loss is 0.6856\n", + "epoch: 1 step: 8, loss is 0.6819\n", + "epoch: 1 step: 9, loss is 0.7372\n", + "epoch: 1 step: 10, loss is 0.6948\n", + "epoch: 1 step: 11, loss is 0.6961\n", + "epoch: 1 step: 12, loss is 0.6975\n", + "epoch: 1 step: 13, loss is 0.6931\n", + "epoch: 1 step: 14, loss is 0.6903\n", + "epoch: 1 step: 15, loss is 0.6720\n", + "epoch: 1 step: 16, loss is 0.7079\n", + "epoch: 1 step: 17, loss is 0.7125\n", + "epoch: 1 step: 18, loss is 0.7477\n", + "epoch: 1 step: 19, loss is 0.6924\n", + "epoch: 1 step: 20, loss is 0.7085\n", + "epoch: 1 step: 21, loss is 0.6958\n", + "epoch: 1 step: 22, loss is 0.6918\n", + "epoch: 1 step: 23, loss is 0.6985\n", + "epoch: 1 step: 24, loss is 0.6919\n", + "epoch: 1 step: 25, loss is 0.6858\n", + "epoch: 1 step: 26, loss is 0.6796\n", + "epoch: 1 step: 27, loss is 0.7113\n", + "epoch: 1 step: 28, loss is 0.7065\n", + "epoch: 1 step: 29, loss is 0.6910\n", + "epoch: 1 step: 30, loss is 0.6896\n", + "epoch: 1 step: 31, loss is 0.6968\n", + "epoch: 1 step: 32, loss is 0.6906\n", + "epoch: 1 step: 33, loss is 0.6932\n", + "epoch: 1 step: 34, loss is 0.6872\n", + "epoch: 1 step: 35, loss is 0.6887\n", + "epoch: 1 step: 36, loss is 0.6789\n", + "epoch: 1 step: 37, loss is 0.6729\n", + "epoch: 1 step: 38, loss is 0.7344\n", + "epoch: 1 step: 39, loss is 0.6946\n", + "epoch: 1 step: 40, loss is 0.6977\n", + "epoch: 1 step: 41, loss is 0.7134\n", + "epoch: 1 step: 42, loss is 0.6807\n", + "epoch: 1 step: 43, loss is 0.6798\n", + "epoch: 1 step: 44, loss is 0.7065\n", + "epoch: 1 step: 45, loss is 0.6930\n", + "epoch: 1 step: 46, loss is 0.6925\n", + "epoch: 1 step: 47, loss is 0.6824\n", + "epoch: 1 step: 48, loss is 0.7224\n", + "epoch: 1 step: 49, loss is 0.7051\n", + "epoch: 1 step: 50, loss is 0.7195\n", + "epoch: 1 step: 51, loss is 0.6927\n", + "epoch: 1 step: 52, loss is 0.7097\n", + "epoch: 1 step: 53, loss is 0.6849\n", + "epoch: 1 step: 54, loss is 0.6892\n", + "epoch: 1 step: 55, loss is 0.6926\n", + "epoch: 1 step: 56, loss is 0.6934\n", + "epoch: 1 step: 57, loss is 0.6891\n", + "epoch: 1 step: 58, loss is 0.7068\n", + "epoch: 1 step: 59, loss is 0.6920\n", + "epoch: 1 step: 60, loss is 0.7120\n", + "epoch: 1 step: 61, loss is 0.6930\n", + "epoch: 1 step: 62, loss is 0.7112\n", + "epoch: 1 step: 63, loss is 0.6845\n", + "epoch: 1 step: 64, loss is 0.6958\n", + "epoch: 1 step: 65, loss is 0.6909\n", + "epoch: 1 step: 66, loss is 0.6876\n", + "epoch: 1 step: 67, loss is 0.6800\n", + "epoch: 1 step: 68, loss is 0.7101\n", + "epoch: 1 step: 69, loss is 0.7078\n", + "epoch: 1 step: 70, loss is 0.6890\n", + "epoch: 1 step: 71, loss is 0.6859\n", + "epoch: 1 step: 72, loss is 0.6913\n", + "epoch: 1 step: 73, loss is 0.6935\n", + "epoch: 1 step: 74, loss is 0.6905\n", + "epoch: 1 step: 75, loss is 0.6936\n", + "epoch: 1 step: 76, loss is 0.6901\n", + "epoch: 1 step: 77, loss is 0.6826\n", + "epoch: 1 step: 78, loss is 0.6930\n", + "epoch: 1 step: 79, loss is 0.6936\n", + "epoch: 1 step: 80, loss is 0.6921\n", + "epoch: 1 step: 81, loss is 0.6839\n", + "epoch: 1 step: 82, loss is 0.6910\n", + "epoch: 1 step: 83, loss is 0.6954\n", + "epoch: 1 step: 84, loss is 0.6838\n", + "epoch: 1 step: 85, loss is 0.6928\n", + "epoch: 1 step: 86, loss is 0.6931\n", + "epoch: 1 step: 87, loss is 0.6784\n", + "epoch: 1 step: 88, loss is 0.6821\n", + "epoch: 1 step: 89, loss is 0.6899\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 1/ 10], step: [ 90/ 390], loss: [0.6860], avg loss: [0.6949], time: [104.0313ms]\n", - "Epoch: [ 1/ 10], step: [ 91/ 390], loss: [0.6900], avg loss: [0.6949], time: [98.9680ms]\n", - "Epoch: [ 1/ 10], step: [ 92/ 390], loss: [0.6846], avg loss: [0.6947], time: [100.7631ms]\n", - "Epoch: [ 1/ 10], step: [ 93/ 390], loss: [0.6833], avg loss: [0.6946], time: [99.0198ms]\n", - "Epoch: [ 1/ 10], step: [ 94/ 390], loss: [0.6901], avg loss: [0.6946], time: [99.3226ms]\n", - "Epoch: [ 1/ 10], step: [ 95/ 390], loss: [0.6831], avg loss: [0.6945], time: [97.3852ms]\n", - "Epoch: [ 1/ 10], step: [ 96/ 390], loss: [0.7010], avg loss: [0.6945], time: [102.8271ms]\n", - "Epoch: [ 1/ 10], step: [ 97/ 390], loss: [0.6925], avg loss: [0.6945], time: [96.1418ms]\n", - "Epoch: [ 1/ 10], step: [ 98/ 390], loss: [0.6768], avg loss: [0.6943], time: [98.8572ms]\n", - "Epoch: [ 1/ 10], step: [ 99/ 390], loss: [0.6848], avg loss: [0.6942], time: [96.3254ms]\n", - "Epoch: [ 1/ 10], step: [ 100/ 390], loss: [0.6925], avg loss: [0.6942], time: [105.0456ms]\n", - "Epoch: [ 1/ 10], step: [ 101/ 390], loss: [0.7067], avg loss: [0.6943], time: [96.3614ms]\n", - "Epoch: [ 1/ 10], step: [ 102/ 390], loss: [0.7053], avg loss: [0.6944], time: [100.3454ms]\n", - "Epoch: [ 1/ 10], step: [ 103/ 390], loss: [0.6841], avg loss: [0.6943], time: [97.2383ms]\n", - "Epoch: [ 1/ 10], step: [ 104/ 390], loss: [0.6882], avg loss: [0.6943], time: [99.4568ms]\n", - "Epoch: [ 1/ 10], step: [ 105/ 390], loss: [0.6794], avg loss: [0.6941], time: [97.8613ms]\n", - "Epoch: [ 1/ 10], step: [ 106/ 390], loss: [0.6754], avg loss: [0.6940], time: [103.1914ms]\n", - "Epoch: [ 1/ 10], step: [ 107/ 390], loss: [0.6788], avg loss: [0.6938], time: [100.8022ms]\n", - "Epoch: [ 1/ 10], step: [ 108/ 390], loss: [0.6930], avg loss: [0.6938], time: [101.5067ms]\n", - "Epoch: [ 1/ 10], step: [ 109/ 390], loss: [0.6792], avg loss: [0.6937], time: [98.3841ms]\n", - "Epoch: [ 1/ 10], step: [ 110/ 390], loss: [0.6889], avg loss: [0.6936], time: [100.2100ms]\n", - "Epoch: [ 1/ 10], step: [ 111/ 390], loss: [0.6800], avg loss: [0.6935], time: [99.3590ms]\n", - "Epoch: [ 1/ 10], step: [ 112/ 390], loss: [0.6881], avg loss: [0.6935], time: [104.4703ms]\n", - "Epoch: [ 1/ 10], step: [ 113/ 390], loss: [0.6866], avg loss: [0.6934], time: [99.1809ms]\n", - "Epoch: [ 1/ 10], step: [ 114/ 390], loss: [0.6963], avg loss: [0.6934], time: [101.8333ms]\n", - "Epoch: [ 1/ 10], step: [ 115/ 390], loss: [0.6698], avg loss: [0.6932], time: [102.0269ms]\n", - "Epoch: [ 1/ 10], step: [ 116/ 390], loss: [0.6795], avg loss: [0.6931], time: [104.2218ms]\n", - "Epoch: [ 1/ 10], step: [ 117/ 390], loss: [0.7177], avg loss: [0.6933], time: [98.0408ms]\n", - "Epoch: [ 1/ 10], step: [ 118/ 390], loss: [0.6559], avg loss: [0.6930], time: [104.4662ms]\n", - "Epoch: [ 1/ 10], step: [ 119/ 390], loss: [0.6949], avg loss: [0.6930], time: [97.7294ms]\n", - "Epoch: [ 1/ 10], step: [ 120/ 390], loss: [0.6934], avg loss: [0.6930], time: [100.3757ms]\n", - "Epoch: [ 1/ 10], step: [ 121/ 390], loss: [0.6854], avg loss: [0.6930], time: [95.2394ms]\n", - "Epoch: [ 1/ 10], step: [ 122/ 390], loss: [0.6730], avg loss: [0.6928], time: [105.2535ms]\n", - "Epoch: [ 1/ 10], step: [ 123/ 390], loss: [0.6616], avg loss: [0.6925], time: [96.2501ms]\n", - "Epoch: [ 1/ 10], step: [ 124/ 390], loss: [0.6572], avg loss: [0.6923], time: [97.7559ms]\n", - "Epoch: [ 1/ 10], step: [ 125/ 390], loss: [0.6612], avg loss: [0.6920], time: [96.9963ms]\n", - "Epoch: [ 1/ 10], step: [ 126/ 390], loss: [0.6623], avg loss: [0.6918], time: [98.4409ms]\n", - "Epoch: [ 1/ 10], step: [ 127/ 390], loss: [0.6790], avg loss: [0.6917], time: [96.9732ms]\n", - "Epoch: [ 1/ 10], step: [ 128/ 390], loss: [0.6518], avg loss: [0.6914], time: [98.8829ms]\n", - "Epoch: [ 1/ 10], step: [ 129/ 390], loss: [0.6196], avg loss: [0.6908], time: [97.8017ms]\n", - "Epoch: [ 1/ 10], step: [ 130/ 390], loss: [0.6518], avg loss: [0.6905], time: [98.0737ms]\n", - "Epoch: [ 1/ 10], step: [ 131/ 390], loss: [0.7111], avg loss: [0.6907], time: [101.8670ms]\n", - "Epoch: [ 1/ 10], step: [ 132/ 390], loss: [0.6345], avg loss: [0.6902], time: [100.6875ms]\n", - "Epoch: [ 1/ 10], step: [ 133/ 390], loss: [0.6846], avg loss: [0.6902], time: [100.5409ms]\n", - "Epoch: [ 1/ 10], step: [ 134/ 390], loss: [0.6700], avg loss: [0.6900], time: [99.1569ms]\n", - "Epoch: [ 1/ 10], step: [ 135/ 390], loss: [0.6939], avg loss: [0.6901], time: [98.1600ms]\n", - "Epoch: [ 1/ 10], step: [ 136/ 390], loss: [0.6846], avg loss: [0.6900], time: [100.2150ms]\n", - "Epoch: [ 1/ 10], step: [ 137/ 390], loss: [0.6408], avg loss: [0.6897], time: [99.3683ms]\n", - "Epoch: [ 1/ 10], step: [ 138/ 390], loss: [0.6886], avg loss: [0.6897], time: [100.5194ms]\n", - "Epoch: [ 1/ 10], step: [ 139/ 390], loss: [0.7377], avg loss: [0.6900], time: [97.7733ms]\n", - "Epoch: [ 1/ 10], step: [ 140/ 390], loss: [0.7049], avg loss: [0.6901], time: [99.1342ms]\n", - "Epoch: [ 1/ 10], step: [ 141/ 390], loss: [0.6946], avg loss: [0.6901], time: [101.1744ms]\n", - "Epoch: [ 1/ 10], step: [ 142/ 390], loss: [0.7178], avg loss: [0.6903], time: [103.1477ms]\n", - "Epoch: [ 1/ 10], step: [ 143/ 390], loss: [0.6664], avg loss: [0.6902], time: [96.6640ms]\n", - "Epoch: [ 1/ 10], step: [ 144/ 390], loss: [0.6791], avg loss: [0.6901], time: [101.9955ms]\n", - "Epoch: [ 1/ 10], step: [ 145/ 390], loss: [0.6599], avg loss: [0.6899], time: [96.8127ms]\n", - "Epoch: [ 1/ 10], step: [ 146/ 390], loss: [0.6665], avg loss: [0.6897], time: [102.8697ms]\n", - "Epoch: [ 1/ 10], step: [ 147/ 390], loss: [0.6800], avg loss: [0.6897], time: [97.5199ms]\n", - "Epoch: [ 1/ 10], step: [ 148/ 390], loss: [0.6777], avg loss: [0.6896], time: [100.7779ms]\n", - "Epoch: [ 1/ 10], step: [ 149/ 390], loss: [0.6690], avg loss: [0.6894], time: [96.0045ms]\n", - "Epoch: [ 1/ 10], step: [ 150/ 390], loss: [0.6887], avg loss: [0.6894], time: [105.4056ms]\n", - "Epoch: [ 1/ 10], step: [ 151/ 390], loss: [0.6878], avg loss: [0.6894], time: [102.3405ms]\n", - "Epoch: [ 1/ 10], step: [ 152/ 390], loss: [0.7036], avg loss: [0.6895], time: [100.7700ms]\n", - "Epoch: [ 1/ 10], step: [ 153/ 390], loss: [0.6570], avg loss: [0.6893], time: [98.1102ms]\n", - "Epoch: [ 1/ 10], step: [ 154/ 390], loss: [0.6865], avg loss: [0.6893], time: [99.4499ms]\n", - "Epoch: [ 1/ 10], step: [ 155/ 390], loss: [0.6811], avg loss: [0.6892], time: [102.2248ms]\n", - "Epoch: [ 1/ 10], step: [ 156/ 390], loss: [0.6733], avg loss: [0.6891], time: [102.7901ms]\n", - "Epoch: [ 1/ 10], step: [ 157/ 390], loss: [0.6737], avg loss: [0.6890], time: [98.1770ms]\n", - "Epoch: [ 1/ 10], step: [ 158/ 390], loss: [0.6779], avg loss: [0.6890], time: [102.8569ms]\n", - "Epoch: [ 1/ 10], step: [ 159/ 390], loss: [0.6573], avg loss: [0.6888], time: [98.7372ms]\n", - "Epoch: [ 1/ 10], step: [ 160/ 390], loss: [0.6782], avg loss: [0.6887], time: [104.6941ms]\n", - "Epoch: [ 1/ 10], step: [ 161/ 390], loss: [0.6704], avg loss: [0.6886], time: [98.0818ms]\n", - "Epoch: [ 1/ 10], step: [ 162/ 390], loss: [0.6862], avg loss: [0.6886], time: [102.0994ms]\n", - "Epoch: [ 1/ 10], step: [ 163/ 390], loss: [0.6740], avg loss: [0.6885], time: [95.8638ms]\n", - "Epoch: [ 1/ 10], step: [ 164/ 390], loss: [0.6466], avg loss: [0.6882], time: [99.0460ms]\n", - "Epoch: [ 1/ 10], step: [ 165/ 390], loss: [0.6506], avg loss: [0.6880], time: [98.9368ms]\n", - "Epoch: [ 1/ 10], step: [ 166/ 390], loss: [0.6750], avg loss: [0.6879], time: [101.0432ms]\n", - "Epoch: [ 1/ 10], step: [ 167/ 390], loss: [0.6466], avg loss: [0.6877], time: [101.4707ms]\n", - "Epoch: [ 1/ 10], step: [ 168/ 390], loss: [0.6610], avg loss: [0.6875], time: [106.3678ms]\n", - "Epoch: [ 1/ 10], step: [ 169/ 390], loss: [0.6550], avg loss: [0.6873], time: [98.6440ms]\n", - "Epoch: [ 1/ 10], step: [ 170/ 390], loss: [0.6806], avg loss: [0.6873], time: [100.2948ms]\n", - "Epoch: [ 1/ 10], step: [ 171/ 390], loss: [0.6723], avg loss: [0.6872], time: [98.7585ms]\n", - "Epoch: [ 1/ 10], step: [ 172/ 390], loss: [0.6515], avg loss: [0.6870], time: [99.8044ms]\n", - "Epoch: [ 1/ 10], step: [ 173/ 390], loss: [0.6704], avg loss: [0.6869], time: [101.5334ms]\n", - "Epoch: [ 1/ 10], step: [ 174/ 390], loss: [0.6675], avg loss: [0.6868], time: [99.2222ms]\n", - "Epoch: [ 1/ 10], step: [ 175/ 390], loss: [0.6535], avg loss: [0.6866], time: [99.7167ms]\n", - "Epoch: [ 1/ 10], step: [ 176/ 390], loss: [0.6660], avg loss: [0.6865], time: [103.0588ms]\n", - "Epoch: [ 1/ 10], step: [ 177/ 390], loss: [0.6390], avg loss: [0.6862], time: [99.4101ms]\n", - "Epoch: [ 1/ 10], step: [ 178/ 390], loss: [0.6589], avg loss: [0.6861], time: [98.7556ms]\n" + "epoch: 1 step: 90, loss is 0.6860\n", + "epoch: 1 step: 91, loss is 0.6900\n", + "epoch: 1 step: 92, loss is 0.6846\n", + "epoch: 1 step: 93, loss is 0.6833\n", + "epoch: 1 step: 94, loss is 0.6901\n", + "epoch: 1 step: 95, loss is 0.6831\n", + "epoch: 1 step: 96, loss is 0.7010\n", + "epoch: 1 step: 97, loss is 0.6925\n", + "epoch: 1 step: 98, loss is 0.6768\n", + "epoch: 1 step: 99, loss is 0.6848\n", + "epoch: 1 step: 100, loss is 0.6925\n", + "epoch: 1 step: 101, loss is 0.7067\n", + "epoch: 1 step: 102, loss is 0.7053\n", + "epoch: 1 step: 103, loss is 0.6841\n", + "epoch: 1 step: 104, loss is 0.6882\n", + "epoch: 1 step: 105, loss is 0.6794\n", + "epoch: 1 step: 106, loss is 0.6754\n", + "epoch: 1 step: 107, loss is 0.6788\n", + "epoch: 1 step: 108, loss is 0.6930\n", + "epoch: 1 step: 109, loss is 0.6792\n", + "epoch: 1 step: 110, loss is 0.6889\n", + "epoch: 1 step: 111, loss is 0.6800\n", + "epoch: 1 step: 112, loss is 0.6881\n", + "epoch: 1 step: 113, loss is 0.6866\n", + "epoch: 1 step: 114, loss is 0.6963\n", + "epoch: 1 step: 115, loss is 0.6698\n", + "epoch: 1 step: 116, loss is 0.6795\n", + "epoch: 1 step: 117, loss is 0.7177\n", + "epoch: 1 step: 118, loss is 0.6559\n", + "epoch: 1 step: 119, loss is 0.6949\n", + "epoch: 1 step: 120, loss is 0.6934\n", + "epoch: 1 step: 121, loss is 0.6854\n", + "epoch: 1 step: 122, loss is 0.6730\n", + "epoch: 1 step: 123, loss is 0.6616\n", + "epoch: 1 step: 124, loss is 0.6572\n", + "epoch: 1 step: 125, loss is 0.6612\n", + "epoch: 1 step: 126, loss is 0.6623\n", + "epoch: 1 step: 127, loss is 0.6790\n", + "epoch: 1 step: 128, loss is 0.6518\n", + "epoch: 1 step: 129, loss is 0.6196\n", + "epoch: 1 step: 130, loss is 0.6518\n", + "epoch: 1 step: 131, loss is 0.7111\n", + "epoch: 1 step: 132, loss is 0.6345\n", + "epoch: 1 step: 133, loss is 0.6846\n", + "epoch: 1 step: 134, loss is 0.6700\n", + "epoch: 1 step: 135, loss is 0.6939\n", + "epoch: 1 step: 136, loss is 0.6846\n", + "epoch: 1 step: 137, loss is 0.6408\n", + "epoch: 1 step: 138, loss is 0.6886\n", + "epoch: 1 step: 139, loss is 0.7377\n", + "epoch: 1 step: 140, loss is 0.7049\n", + "epoch: 1 step: 141, loss is 0.6946\n", + "epoch: 1 step: 142, loss is 0.7178\n", + "epoch: 1 step: 143, loss is 0.6664\n", + "epoch: 1 step: 144, loss is 0.6791\n", + "epoch: 1 step: 145, loss is 0.6599\n", + "epoch: 1 step: 146, loss is 0.6665\n", + "epoch: 1 step: 147, loss is 0.6800\n", + "epoch: 1 step: 148, loss is 0.6777\n", + "epoch: 1 step: 149, loss is 0.6690\n", + "epoch: 1 step: 150, loss is 0.6887\n", + "epoch: 1 step: 151, loss is 0.6878\n", + "epoch: 1 step: 152, loss is 0.7036\n", + "epoch: 1 step: 153, loss is 0.6570\n", + "epoch: 1 step: 154, loss is 0.6865\n", + "epoch: 1 step: 155, loss is 0.6811\n", + "epoch: 1 step: 156, loss is 0.6733\n", + "epoch: 1 step: 157, loss is 0.6737\n", + "epoch: 1 step: 158, loss is 0.6779\n", + "epoch: 1 step: 159, loss is 0.6573\n", + "epoch: 1 step: 160, loss is 0.6782\n", + "epoch: 1 step: 161, loss is 0.6704\n", + "epoch: 1 step: 162, loss is 0.6862\n", + "epoch: 1 step: 163, loss is 0.6740\n", + "epoch: 1 step: 164, loss is 0.6466\n", + "epoch: 1 step: 165, loss is 0.6506\n", + "epoch: 1 step: 166, loss is 0.6750\n", + "epoch: 1 step: 167, loss is 0.6466\n", + "epoch: 1 step: 168, loss is 0.6610\n", + "epoch: 1 step: 169, loss is 0.6550\n", + "epoch: 1 step: 170, loss is 0.6806\n", + "epoch: 1 step: 171, loss is 0.6723\n", + "epoch: 1 step: 172, loss is 0.6515\n", + "epoch: 1 step: 173, loss is 0.6704\n", + "epoch: 1 step: 174, loss is 0.6675\n", + "epoch: 1 step: 175, loss is 0.6535\n", + "epoch: 1 step: 176, loss is 0.6660\n", + "epoch: 1 step: 177, loss is 0.6390\n", + "epoch: 1 step: 178, loss is 0.6589\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 1/ 10], step: [ 179/ 390], loss: [0.6838], avg loss: [0.6860], time: [97.2741ms]\n", - "Epoch: [ 1/ 10], step: [ 180/ 390], loss: [0.7194], avg loss: [0.6862], time: [104.8265ms]\n", - "Epoch: [ 1/ 10], step: [ 181/ 390], loss: [0.5811], avg loss: [0.6856], time: [96.4580ms]\n", - "Epoch: [ 1/ 10], step: [ 182/ 390], loss: [0.7140], avg loss: [0.6858], time: [99.6931ms]\n", - "Epoch: [ 1/ 10], step: [ 183/ 390], loss: [0.7558], avg loss: [0.6862], time: [100.7893ms]\n", - "Epoch: [ 1/ 10], step: [ 184/ 390], loss: [0.6419], avg loss: [0.6859], time: [99.4534ms]\n", - "Epoch: [ 1/ 10], step: [ 185/ 390], loss: [0.5970], avg loss: [0.6855], time: [98.1152ms]\n", - "Epoch: [ 1/ 10], step: [ 186/ 390], loss: [0.7137], avg loss: [0.6856], time: [99.8573ms]\n", - "Epoch: [ 1/ 10], step: [ 187/ 390], loss: [0.6258], avg loss: [0.6853], time: [99.4055ms]\n", - "Epoch: [ 1/ 10], step: [ 188/ 390], loss: [0.6423], avg loss: [0.6851], time: [100.4550ms]\n", - "Epoch: [ 1/ 10], step: [ 189/ 390], loss: [0.6785], avg loss: [0.6850], time: [95.7451ms]\n", - "Epoch: [ 1/ 10], step: [ 190/ 390], loss: [0.6613], avg loss: [0.6849], time: [99.9112ms]\n", - "Epoch: [ 1/ 10], step: [ 191/ 390], loss: [0.6538], avg loss: [0.6847], time: [97.4913ms]\n", - "Epoch: [ 1/ 10], step: [ 192/ 390], loss: [0.6377], avg loss: [0.6845], time: [105.1736ms]\n", - "Epoch: [ 1/ 10], step: [ 193/ 390], loss: [0.7727], avg loss: [0.6850], time: [98.9897ms]\n", - "Epoch: [ 1/ 10], step: [ 194/ 390], loss: [0.6539], avg loss: [0.6848], time: [104.8808ms]\n", - "Epoch: [ 1/ 10], step: [ 195/ 390], loss: [0.6855], avg loss: [0.6848], time: [101.0678ms]\n", - "Epoch: [ 1/ 10], step: [ 196/ 390], loss: [0.6523], avg loss: [0.6846], time: [98.1519ms]\n", - "Epoch: [ 1/ 10], step: [ 197/ 390], loss: [0.6892], avg loss: [0.6847], time: [97.2669ms]\n", - "Epoch: [ 1/ 10], step: [ 198/ 390], loss: [0.6495], avg loss: [0.6845], time: [104.5499ms]\n", - "Epoch: [ 1/ 10], step: [ 199/ 390], loss: [0.6546], avg loss: [0.6843], time: [96.3891ms]\n", - "Epoch: [ 1/ 10], step: [ 200/ 390], loss: [0.6856], avg loss: [0.6843], time: [99.0260ms]\n", - "Epoch: [ 1/ 10], step: [ 201/ 390], loss: [0.6739], avg loss: [0.6843], time: [98.8247ms]\n", - "Epoch: [ 1/ 10], step: [ 202/ 390], loss: [0.6894], avg loss: [0.6843], time: [99.9596ms]\n", - "Epoch: [ 1/ 10], step: [ 203/ 390], loss: [0.6625], avg loss: [0.6842], time: [98.8562ms]\n", - "Epoch: [ 1/ 10], step: [ 204/ 390], loss: [0.6656], avg loss: [0.6841], time: [100.0981ms]\n", - "Epoch: [ 1/ 10], step: [ 205/ 390], loss: [0.6302], avg loss: [0.6838], time: [96.4847ms]\n", - "Epoch: [ 1/ 10], step: [ 206/ 390], loss: [0.6459], avg loss: [0.6837], time: [104.7690ms]\n", - "Epoch: [ 1/ 10], step: [ 207/ 390], loss: [0.6626], avg loss: [0.6836], time: [98.5188ms]\n", - "Epoch: [ 1/ 10], step: [ 208/ 390], loss: [0.6679], avg loss: [0.6835], time: [103.6048ms]\n", - "Epoch: [ 1/ 10], step: [ 209/ 390], loss: [0.6209], avg loss: [0.6832], time: [97.0821ms]\n", - "Epoch: [ 1/ 10], step: [ 210/ 390], loss: [0.6665], avg loss: [0.6831], time: [100.1060ms]\n", - "Epoch: [ 1/ 10], step: [ 211/ 390], loss: [0.6486], avg loss: [0.6829], time: [96.8394ms]\n", - "Epoch: [ 1/ 10], step: [ 212/ 390], loss: [0.6675], avg loss: [0.6829], time: [100.1658ms]\n", - "Epoch: [ 1/ 10], step: [ 213/ 390], loss: [0.6709], avg loss: [0.6828], time: [97.8286ms]\n", - "Epoch: [ 1/ 10], step: [ 214/ 390], loss: [0.6539], avg loss: [0.6827], time: [97.7886ms]\n", - "Epoch: [ 1/ 10], step: [ 215/ 390], loss: [0.6299], avg loss: [0.6824], time: [97.8947ms]\n", - "Epoch: [ 1/ 10], step: [ 216/ 390], loss: [0.6258], avg loss: [0.6822], time: [103.6613ms]\n", - "Epoch: [ 1/ 10], step: [ 217/ 390], loss: [0.6113], avg loss: [0.6818], time: [96.6077ms]\n", - "Epoch: [ 1/ 10], step: [ 218/ 390], loss: [0.6566], avg loss: [0.6817], time: [103.4837ms]\n", - "Epoch: [ 1/ 10], step: [ 219/ 390], loss: [0.6309], avg loss: [0.6815], time: [96.1127ms]\n", - "Epoch: [ 1/ 10], step: [ 220/ 390], loss: [0.7080], avg loss: [0.6816], time: [100.4570ms]\n", - "Epoch: [ 1/ 10], step: [ 221/ 390], loss: [0.6745], avg loss: [0.6816], time: [95.4196ms]\n", - "Epoch: [ 1/ 10], step: [ 222/ 390], loss: [0.7327], avg loss: [0.6818], time: [99.2377ms]\n", - "Epoch: [ 1/ 10], step: [ 223/ 390], loss: [0.6556], avg loss: [0.6817], time: [101.9399ms]\n", - "Epoch: [ 1/ 10], step: [ 224/ 390], loss: [0.5917], avg loss: [0.6813], time: [100.4891ms]\n", - "Epoch: [ 1/ 10], step: [ 225/ 390], loss: [0.6625], avg loss: [0.6812], time: [96.1914ms]\n", - "Epoch: [ 1/ 10], step: [ 226/ 390], loss: [0.5993], avg loss: [0.6808], time: [104.5396ms]\n", - "Epoch: [ 1/ 10], step: [ 227/ 390], loss: [0.6162], avg loss: [0.6806], time: [99.0164ms]\n", - "Epoch: [ 1/ 10], step: [ 228/ 390], loss: [0.5698], avg loss: [0.6801], time: [101.6934ms]\n", - "Epoch: [ 1/ 10], step: [ 229/ 390], loss: [0.6088], avg loss: [0.6798], time: [98.6443ms]\n", - "Epoch: [ 1/ 10], step: [ 230/ 390], loss: [0.6212], avg loss: [0.6795], time: [102.3512ms]\n", - "Epoch: [ 1/ 10], step: [ 231/ 390], loss: [0.5745], avg loss: [0.6791], time: [98.6683ms]\n", - "Epoch: [ 1/ 10], step: [ 232/ 390], loss: [0.6947], avg loss: [0.6791], time: [100.1174ms]\n", - "Epoch: [ 1/ 10], step: [ 233/ 390], loss: [0.6499], avg loss: [0.6790], time: [97.1973ms]\n", - "Epoch: [ 1/ 10], step: [ 234/ 390], loss: [0.6867], avg loss: [0.6790], time: [100.7817ms]\n", - "Epoch: [ 1/ 10], step: [ 235/ 390], loss: [0.6241], avg loss: [0.6788], time: [96.5447ms]\n", - "Epoch: [ 1/ 10], step: [ 236/ 390], loss: [0.8216], avg loss: [0.6794], time: [103.3659ms]\n", - "Epoch: [ 1/ 10], step: [ 237/ 390], loss: [0.6029], avg loss: [0.6791], time: [98.2652ms]\n", - "Epoch: [ 1/ 10], step: [ 238/ 390], loss: [0.7373], avg loss: [0.6793], time: [99.4091ms]\n", - "Epoch: [ 1/ 10], step: [ 239/ 390], loss: [0.7275], avg loss: [0.6795], time: [95.6967ms]\n", - "Epoch: [ 1/ 10], step: [ 240/ 390], loss: [0.6317], avg loss: [0.6793], time: [101.9759ms]\n", - "Epoch: [ 1/ 10], step: [ 241/ 390], loss: [0.6836], avg loss: [0.6793], time: [95.0050ms]\n", - "Epoch: [ 1/ 10], step: [ 242/ 390], loss: [0.7143], avg loss: [0.6795], time: [99.9835ms]\n", - "Epoch: [ 1/ 10], step: [ 243/ 390], loss: [0.6408], avg loss: [0.6793], time: [100.2116ms]\n", - "Epoch: [ 1/ 10], step: [ 244/ 390], loss: [0.6520], avg loss: [0.6792], time: [106.4565ms]\n", - "Epoch: [ 1/ 10], step: [ 245/ 390], loss: [0.6602], avg loss: [0.6791], time: [97.6610ms]\n", - "Epoch: [ 1/ 10], step: [ 246/ 390], loss: [0.6279], avg loss: [0.6789], time: [99.3872ms]\n", - "Epoch: [ 1/ 10], step: [ 247/ 390], loss: [0.6336], avg loss: [0.6787], time: [100.8461ms]\n", - "Epoch: [ 1/ 10], step: [ 248/ 390], loss: [0.6832], avg loss: [0.6788], time: [102.9501ms]\n", - "Epoch: [ 1/ 10], step: [ 249/ 390], loss: [0.6762], avg loss: [0.6788], time: [97.1913ms]\n", - "Epoch: [ 1/ 10], step: [ 250/ 390], loss: [0.7123], avg loss: [0.6789], time: [104.1181ms]\n", - "Epoch: [ 1/ 10], step: [ 251/ 390], loss: [0.7057], avg loss: [0.6790], time: [98.4380ms]\n", - "Epoch: [ 1/ 10], step: [ 252/ 390], loss: [0.6579], avg loss: [0.6789], time: [100.9729ms]\n", - "Epoch: [ 1/ 10], step: [ 253/ 390], loss: [0.6746], avg loss: [0.6789], time: [99.4725ms]\n", - "Epoch: [ 1/ 10], step: [ 254/ 390], loss: [0.6690], avg loss: [0.6789], time: [101.0892ms]\n", - "Epoch: [ 1/ 10], step: [ 255/ 390], loss: [0.6963], avg loss: [0.6789], time: [100.0776ms]\n", - "Epoch: [ 1/ 10], step: [ 256/ 390], loss: [0.6519], avg loss: [0.6788], time: [101.8677ms]\n", - "Epoch: [ 1/ 10], step: [ 257/ 390], loss: [0.6771], avg loss: [0.6788], time: [95.8588ms]\n", - "Epoch: [ 1/ 10], step: [ 258/ 390], loss: [0.6355], avg loss: [0.6786], time: [100.1673ms]\n", - "Epoch: [ 1/ 10], step: [ 259/ 390], loss: [0.6587], avg loss: [0.6786], time: [96.8349ms]\n", - "Epoch: [ 1/ 10], step: [ 260/ 390], loss: [0.6374], avg loss: [0.6784], time: [100.6339ms]\n", - "Epoch: [ 1/ 10], step: [ 261/ 390], loss: [0.6249], avg loss: [0.6782], time: [101.6564ms]\n", - "Epoch: [ 1/ 10], step: [ 262/ 390], loss: [0.6486], avg loss: [0.6781], time: [99.8287ms]\n", - "Epoch: [ 1/ 10], step: [ 263/ 390], loss: [0.6340], avg loss: [0.6779], time: [101.6524ms]\n", - "Epoch: [ 1/ 10], step: [ 264/ 390], loss: [0.6180], avg loss: [0.6777], time: [103.4558ms]\n", - "Epoch: [ 1/ 10], step: [ 265/ 390], loss: [0.6825], avg loss: [0.6777], time: [98.4805ms]\n", - "Epoch: [ 1/ 10], step: [ 266/ 390], loss: [0.6412], avg loss: [0.6776], time: [103.6050ms]\n", - "Epoch: [ 1/ 10], step: [ 267/ 390], loss: [0.6883], avg loss: [0.6776], time: [97.1916ms]\n" + "epoch: 1 step: 179, loss is 0.6838\n", + "epoch: 1 step: 180, loss is 0.7194\n", + "epoch: 1 step: 181, loss is 0.5811\n", + "epoch: 1 step: 182, loss is 0.7140\n", + "epoch: 1 step: 183, loss is 0.7558\n", + "epoch: 1 step: 184, loss is 0.6419\n", + "epoch: 1 step: 185, loss is 0.5970\n", + "epoch: 1 step: 186, loss is 0.7137\n", + "epoch: 1 step: 187, loss is 0.6258\n", + "epoch: 1 step: 188, loss is 0.6423\n", + "epoch: 1 step: 189, loss is 0.6785\n", + "epoch: 1 step: 190, loss is 0.6613\n", + "epoch: 1 step: 191, loss is 0.6538\n", + "epoch: 1 step: 192, loss is 0.6377\n", + "epoch: 1 step: 193, loss is 0.7727\n", + "epoch: 1 step: 194, loss is 0.6539\n", + "epoch: 1 step: 195, loss is 0.6855\n", + "epoch: 1 step: 196, loss is 0.6523\n", + "epoch: 1 step: 197, loss is 0.6892\n", + "epoch: 1 step: 198, loss is 0.6495\n", + "epoch: 1 step: 199, loss is 0.6546\n", + "epoch: 1 step: 200, loss is 0.6856\n", + "epoch: 1 step: 201, loss is 0.6739\n", + "epoch: 1 step: 202, loss is 0.6894\n", + "epoch: 1 step: 203, loss is 0.6625\n", + "epoch: 1 step: 204, loss is 0.6656\n", + "epoch: 1 step: 205, loss is 0.6302\n", + "epoch: 1 step: 206, loss is 0.6459\n", + "epoch: 1 step: 207, loss is 0.6626\n", + "epoch: 1 step: 208, loss is 0.6679\n", + "epoch: 1 step: 209, loss is 0.6209\n", + "epoch: 1 step: 210, loss is 0.6665\n", + "epoch: 1 step: 211, loss is 0.6486\n", + "epoch: 1 step: 212, loss is 0.6675\n", + "epoch: 1 step: 213, loss is 0.6709\n", + "epoch: 1 step: 214, loss is 0.6539\n", + "epoch: 1 step: 215, loss is 0.6299\n", + "epoch: 1 step: 216, loss is 0.6258\n", + "epoch: 1 step: 217, loss is 0.6113\n", + "epoch: 1 step: 218, loss is 0.6566\n", + "epoch: 1 step: 219, loss is 0.6309\n", + "epoch: 1 step: 220, loss is 0.7080\n", + "epoch: 1 step: 221, loss is 0.6745\n", + "epoch: 1 step: 222, loss is 0.7327\n", + "epoch: 1 step: 223, loss is 0.6556\n", + "epoch: 1 step: 224, loss is 0.5917\n", + "epoch: 1 step: 225, loss is 0.6625\n", + "epoch: 1 step: 226, loss is 0.5993\n", + "epoch: 1 step: 227, loss is 0.6162\n", + "epoch: 1 step: 228, loss is 0.5698\n", + "epoch: 1 step: 229, loss is 0.6088\n", + "epoch: 1 step: 230, loss is 0.6212\n", + "epoch: 1 step: 231, loss is 0.5745\n", + "epoch: 1 step: 232, loss is 0.6947\n", + "epoch: 1 step: 233, loss is 0.6499\n", + "epoch: 1 step: 234, loss is 0.6867\n", + "epoch: 1 step: 235, loss is 0.6241\n", + "epoch: 1 step: 236, loss is 0.8216\n", + "epoch: 1 step: 237, loss is 0.6029\n", + "epoch: 1 step: 238, loss is 0.7373\n", + "epoch: 1 step: 239, loss is 0.7275\n", + "epoch: 1 step: 240, loss is 0.6317\n", + "epoch: 1 step: 241, loss is 0.6836\n", + "epoch: 1 step: 242, loss is 0.7143\n", + "epoch: 1 step: 243, loss is 0.6408\n", + "epoch: 1 step: 244, loss is 0.6520\n", + "epoch: 1 step: 245, loss is 0.6602\n", + "epoch: 1 step: 246, loss is 0.6279\n", + "epoch: 1 step: 247, loss is 0.6336\n", + "epoch: 1 step: 248, loss is 0.6832\n", + "epoch: 1 step: 249, loss is 0.6762\n", + "epoch: 1 step: 250, loss is 0.7123\n", + "epoch: 1 step: 251, loss is 0.7057\n", + "epoch: 1 step: 252, loss is 0.6579\n", + "epoch: 1 step: 253, loss is 0.6746\n", + "epoch: 1 step: 254, loss is 0.6690\n", + "epoch: 1 step: 255, loss is 0.6963\n", + "epoch: 1 step: 256, loss is 0.6519\n", + "epoch: 1 step: 257, loss is 0.6771\n", + "epoch: 1 step: 258, loss is 0.6355\n", + "epoch: 1 step: 259, loss is 0.6587\n", + "epoch: 1 step: 260, loss is 0.6374\n", + "epoch: 1 step: 261, loss is 0.6249\n", + "epoch: 1 step: 262, loss is 0.6486\n", + "epoch: 1 step: 263, loss is 0.6340\n", + "epoch: 1 step: 264, loss is 0.6180\n", + "epoch: 1 step: 265, loss is 0.6825\n", + "epoch: 1 step: 266, loss is 0.6412\n", + "epoch: 1 step: 267, loss is 0.6883\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 1/ 10], step: [ 268/ 390], loss: [0.6293], avg loss: [0.6774], time: [99.0884ms]\n", - "Epoch: [ 1/ 10], step: [ 269/ 390], loss: [0.6679], avg loss: [0.6774], time: [98.5043ms]\n", - "Epoch: [ 1/ 10], step: [ 270/ 390], loss: [0.6610], avg loss: [0.6773], time: [104.2540ms]\n", - "Epoch: [ 1/ 10], step: [ 271/ 390], loss: [0.6144], avg loss: [0.6771], time: [98.5131ms]\n", - "Epoch: [ 1/ 10], step: [ 272/ 390], loss: [0.6461], avg loss: [0.6770], time: [98.3980ms]\n", - "Epoch: [ 1/ 10], step: [ 273/ 390], loss: [0.6446], avg loss: [0.6769], time: [97.9280ms]\n", - "Epoch: [ 1/ 10], step: [ 274/ 390], loss: [0.7186], avg loss: [0.6770], time: [100.8170ms]\n", - "Epoch: [ 1/ 10], step: [ 275/ 390], loss: [0.7003], avg loss: [0.6771], time: [100.1592ms]\n", - "Epoch: [ 1/ 10], step: [ 276/ 390], loss: [0.6935], avg loss: [0.6772], time: [101.7003ms]\n", - "Epoch: [ 1/ 10], step: [ 277/ 390], loss: [0.7605], avg loss: [0.6775], time: [102.7503ms]\n", - "Epoch: [ 1/ 10], step: [ 278/ 390], loss: [0.6664], avg loss: [0.6774], time: [102.6635ms]\n", - "Epoch: [ 1/ 10], step: [ 279/ 390], loss: [0.5582], avg loss: [0.6770], time: [97.8162ms]\n", - "Epoch: [ 1/ 10], step: [ 280/ 390], loss: [0.6123], avg loss: [0.6768], time: [100.5421ms]\n", - "Epoch: [ 1/ 10], step: [ 281/ 390], loss: [0.6410], avg loss: [0.6766], time: [96.9605ms]\n", - "Epoch: [ 1/ 10], step: [ 282/ 390], loss: [0.6696], avg loss: [0.6766], time: [102.0803ms]\n", - "Epoch: [ 1/ 10], step: [ 283/ 390], loss: [0.6637], avg loss: [0.6766], time: [97.7211ms]\n", - "Epoch: [ 1/ 10], step: [ 284/ 390], loss: [0.6558], avg loss: [0.6765], time: [101.4807ms]\n", - "Epoch: [ 1/ 10], step: [ 285/ 390], loss: [0.6364], avg loss: [0.6764], time: [97.6901ms]\n", - "Epoch: [ 1/ 10], step: [ 286/ 390], loss: [0.6613], avg loss: [0.6763], time: [104.0516ms]\n", - "Epoch: [ 1/ 10], step: [ 287/ 390], loss: [0.6815], avg loss: [0.6763], time: [97.9962ms]\n", - "Epoch: [ 1/ 10], step: [ 288/ 390], loss: [0.6551], avg loss: [0.6763], time: [99.8640ms]\n", - "Epoch: [ 1/ 10], step: [ 289/ 390], loss: [0.6071], avg loss: [0.6760], time: [97.0018ms]\n", - "Epoch: [ 1/ 10], step: [ 290/ 390], loss: [0.6287], avg loss: [0.6759], time: [98.8023ms]\n", - "Epoch: [ 1/ 10], step: [ 291/ 390], loss: [0.6090], avg loss: [0.6756], time: [97.9438ms]\n", - "Epoch: [ 1/ 10], step: [ 292/ 390], loss: [0.6697], avg loss: [0.6756], time: [102.2677ms]\n", - "Epoch: [ 1/ 10], step: [ 293/ 390], loss: [0.6100], avg loss: [0.6754], time: [100.4937ms]\n", - "Epoch: [ 1/ 10], step: [ 294/ 390], loss: [0.6452], avg loss: [0.6753], time: [99.9360ms]\n", - "Epoch: [ 1/ 10], step: [ 295/ 390], loss: [0.5721], avg loss: [0.6749], time: [98.2065ms]\n", - "Epoch: [ 1/ 10], step: [ 296/ 390], loss: [0.6412], avg loss: [0.6748], time: [102.8442ms]\n", - "Epoch: [ 1/ 10], step: [ 297/ 390], loss: [0.6133], avg loss: [0.6746], time: [97.3332ms]\n", - "Epoch: [ 1/ 10], step: [ 298/ 390], loss: [0.7127], avg loss: [0.6747], time: [100.2576ms]\n", - "Epoch: [ 1/ 10], step: [ 299/ 390], loss: [0.6043], avg loss: [0.6745], time: [99.0622ms]\n", - "Epoch: [ 1/ 10], step: [ 300/ 390], loss: [0.6349], avg loss: [0.6744], time: [101.5828ms]\n", - "Epoch: [ 1/ 10], step: [ 301/ 390], loss: [0.6233], avg loss: [0.6742], time: [101.6316ms]\n", - "Epoch: [ 1/ 10], step: [ 302/ 390], loss: [0.6955], avg loss: [0.6743], time: [100.1947ms]\n", - "Epoch: [ 1/ 10], step: [ 303/ 390], loss: [0.5825], avg loss: [0.6740], time: [98.4261ms]\n", - "Epoch: [ 1/ 10], step: [ 304/ 390], loss: [0.6163], avg loss: [0.6738], time: [101.7635ms]\n", - "Epoch: [ 1/ 10], step: [ 305/ 390], loss: [0.6739], avg loss: [0.6738], time: [99.4513ms]\n", - "Epoch: [ 1/ 10], step: [ 306/ 390], loss: [0.6409], avg loss: [0.6737], time: [102.5743ms]\n", - "Epoch: [ 1/ 10], step: [ 307/ 390], loss: [0.6608], avg loss: [0.6736], time: [97.9807ms]\n", - "Epoch: [ 1/ 10], step: [ 308/ 390], loss: [0.6505], avg loss: [0.6736], time: [104.2030ms]\n", - "Epoch: [ 1/ 10], step: [ 309/ 390], loss: [0.6090], avg loss: [0.6733], time: [95.8602ms]\n", - "Epoch: [ 1/ 10], step: [ 310/ 390], loss: [0.6088], avg loss: [0.6731], time: [101.6569ms]\n", - "Epoch: [ 1/ 10], step: [ 311/ 390], loss: [0.6254], avg loss: [0.6730], time: [98.6173ms]\n", - "Epoch: [ 1/ 10], step: [ 312/ 390], loss: [0.6485], avg loss: [0.6729], time: [99.7775ms]\n", - "Epoch: [ 1/ 10], step: [ 313/ 390], loss: [0.7142], avg loss: [0.6730], time: [95.9973ms]\n", - "Epoch: [ 1/ 10], step: [ 314/ 390], loss: [0.5787], avg loss: [0.6727], time: [103.0574ms]\n", - "Epoch: [ 1/ 10], step: [ 315/ 390], loss: [0.6295], avg loss: [0.6726], time: [99.2737ms]\n", - "Epoch: [ 1/ 10], step: [ 316/ 390], loss: [0.6210], avg loss: [0.6724], time: [101.5892ms]\n", - "Epoch: [ 1/ 10], step: [ 317/ 390], loss: [0.7650], avg loss: [0.6727], time: [96.6787ms]\n", - "Epoch: [ 1/ 10], step: [ 318/ 390], loss: [0.6355], avg loss: [0.6726], time: [104.0988ms]\n", - "Epoch: [ 1/ 10], step: [ 319/ 390], loss: [0.6717], avg loss: [0.6726], time: [101.0303ms]\n", - "Epoch: [ 1/ 10], step: [ 320/ 390], loss: [0.7392], avg loss: [0.6728], time: [100.6036ms]\n", - "Epoch: [ 1/ 10], step: [ 321/ 390], loss: [0.6969], avg loss: [0.6729], time: [97.0156ms]\n", - "Epoch: [ 1/ 10], step: [ 322/ 390], loss: [0.6394], avg loss: [0.6728], time: [104.1842ms]\n", - "Epoch: [ 1/ 10], step: [ 323/ 390], loss: [0.6603], avg loss: [0.6727], time: [101.1569ms]\n", - "Epoch: [ 1/ 10], step: [ 324/ 390], loss: [0.6058], avg loss: [0.6725], time: [99.5979ms]\n", - "Epoch: [ 1/ 10], step: [ 325/ 390], loss: [0.6332], avg loss: [0.6724], time: [97.6386ms]\n", - "Epoch: [ 1/ 10], step: [ 326/ 390], loss: [0.6236], avg loss: [0.6723], time: [100.7688ms]\n", - "Epoch: [ 1/ 10], step: [ 327/ 390], loss: [0.6483], avg loss: [0.6722], time: [99.6730ms]\n", - "Epoch: [ 1/ 10], step: [ 328/ 390], loss: [0.6229], avg loss: [0.6720], time: [98.9954ms]\n", - "Epoch: [ 1/ 10], step: [ 329/ 390], loss: [0.6022], avg loss: [0.6718], time: [99.1223ms]\n", - "Epoch: [ 1/ 10], step: [ 330/ 390], loss: [0.6393], avg loss: [0.6717], time: [99.3533ms]\n", - "Epoch: [ 1/ 10], step: [ 331/ 390], loss: [0.5813], avg loss: [0.6715], time: [99.1678ms]\n", - "Epoch: [ 1/ 10], step: [ 332/ 390], loss: [0.6013], avg loss: [0.6712], time: [101.7318ms]\n", - "Epoch: [ 1/ 10], step: [ 333/ 390], loss: [0.6026], avg loss: [0.6710], time: [97.6651ms]\n", - "Epoch: [ 1/ 10], step: [ 334/ 390], loss: [0.5768], avg loss: [0.6708], time: [99.6068ms]\n", - "Epoch: [ 1/ 10], step: [ 335/ 390], loss: [0.6915], avg loss: [0.6708], time: [99.7696ms]\n", - "Epoch: [ 1/ 10], step: [ 336/ 390], loss: [0.6256], avg loss: [0.6707], time: [104.4483ms]\n", - "Epoch: [ 1/ 10], step: [ 337/ 390], loss: [0.7781], avg loss: [0.6710], time: [101.9986ms]\n", - "Epoch: [ 1/ 10], step: [ 338/ 390], loss: [0.7050], avg loss: [0.6711], time: [107.2345ms]\n", - "Epoch: [ 1/ 10], step: [ 339/ 390], loss: [0.7328], avg loss: [0.6713], time: [102.3960ms]\n", - "Epoch: [ 1/ 10], step: [ 340/ 390], loss: [0.7076], avg loss: [0.6714], time: [101.7249ms]\n", - "Epoch: [ 1/ 10], step: [ 341/ 390], loss: [0.7222], avg loss: [0.6715], time: [98.3591ms]\n", - "Epoch: [ 1/ 10], step: [ 342/ 390], loss: [0.6022], avg loss: [0.6713], time: [103.7915ms]\n", - "Epoch: [ 1/ 10], step: [ 343/ 390], loss: [0.6293], avg loss: [0.6712], time: [103.1730ms]\n", - "Epoch: [ 1/ 10], step: [ 344/ 390], loss: [0.6443], avg loss: [0.6711], time: [98.4271ms]\n", - "Epoch: [ 1/ 10], step: [ 345/ 390], loss: [0.6849], avg loss: [0.6712], time: [101.1391ms]\n", - "Epoch: [ 1/ 10], step: [ 346/ 390], loss: [0.6910], avg loss: [0.6712], time: [102.3850ms]\n", - "Epoch: [ 1/ 10], step: [ 347/ 390], loss: [0.7112], avg loss: [0.6714], time: [100.4372ms]\n", - "Epoch: [ 1/ 10], step: [ 348/ 390], loss: [0.7019], avg loss: [0.6714], time: [100.4596ms]\n", - "Epoch: [ 1/ 10], step: [ 349/ 390], loss: [0.6608], avg loss: [0.6714], time: [98.1493ms]\n", - "Epoch: [ 1/ 10], step: [ 350/ 390], loss: [0.6993], avg loss: [0.6715], time: [98.9270ms]\n", - "Epoch: [ 1/ 10], step: [ 351/ 390], loss: [0.6632], avg loss: [0.6715], time: [95.9554ms]\n", - "Epoch: [ 1/ 10], step: [ 352/ 390], loss: [0.6706], avg loss: [0.6715], time: [100.6606ms]\n", - "Epoch: [ 1/ 10], step: [ 353/ 390], loss: [0.6401], avg loss: [0.6714], time: [96.2012ms]\n", - "Epoch: [ 1/ 10], step: [ 354/ 390], loss: [0.6503], avg loss: [0.6713], time: [98.9563ms]\n", - "Epoch: [ 1/ 10], step: [ 355/ 390], loss: [0.6477], avg loss: [0.6712], time: [96.5178ms]\n", - "Epoch: [ 1/ 10], step: [ 356/ 390], loss: [0.6509], avg loss: [0.6712], time: [103.2085ms]\n" + "epoch: 1 step: 268, loss is 0.6293\n", + "epoch: 1 step: 269, loss is 0.6679\n", + "epoch: 1 step: 270, loss is 0.6610\n", + "epoch: 1 step: 271, loss is 0.6144\n", + "epoch: 1 step: 272, loss is 0.6461\n", + "epoch: 1 step: 273, loss is 0.6446\n", + "epoch: 1 step: 274, loss is 0.7186\n", + "epoch: 1 step: 275, loss is 0.7003\n", + "epoch: 1 step: 276, loss is 0.6935\n", + "epoch: 1 step: 277, loss is 0.7605\n", + "epoch: 1 step: 278, loss is 0.6664\n", + "epoch: 1 step: 279, loss is 0.5582\n", + "epoch: 1 step: 280, loss is 0.6123\n", + "epoch: 1 step: 281, loss is 0.6410\n", + "epoch: 1 step: 282, loss is 0.6696\n", + "epoch: 1 step: 283, loss is 0.6637\n", + "epoch: 1 step: 284, loss is 0.6558\n", + "epoch: 1 step: 285, loss is 0.6364\n", + "epoch: 1 step: 286, loss is 0.6613\n", + "epoch: 1 step: 287, loss is 0.6815\n", + "epoch: 1 step: 288, loss is 0.6551\n", + "epoch: 1 step: 289, loss is 0.6071\n", + "epoch: 1 step: 290, loss is 0.6287\n", + "epoch: 1 step: 291, loss is 0.6090\n", + "epoch: 1 step: 292, loss is 0.6697\n", + "epoch: 1 step: 293, loss is 0.6100\n", + "epoch: 1 step: 294, loss is 0.6452\n", + "epoch: 1 step: 295, loss is 0.5721\n", + "epoch: 1 step: 296, loss is 0.6412\n", + "epoch: 1 step: 297, loss is 0.6133\n", + "epoch: 1 step: 298, loss is 0.7127\n", + "epoch: 1 step: 299, loss is 0.6043\n", + "epoch: 1 step: 300, loss is 0.6349\n", + "epoch: 1 step: 301, loss is 0.6233\n", + "epoch: 1 step: 302, loss is 0.6955\n", + "epoch: 1 step: 303, loss is 0.5825\n", + "epoch: 1 step: 304, loss is 0.6163\n", + "epoch: 1 step: 305, loss is 0.6739\n", + "epoch: 1 step: 306, loss is 0.6409\n", + "epoch: 1 step: 307, loss is 0.6608\n", + "epoch: 1 step: 308, loss is 0.6505\n", + "epoch: 1 step: 309, loss is 0.6090\n", + "epoch: 1 step: 310, loss is 0.6088\n", + "epoch: 1 step: 311, loss is 0.6254\n", + "epoch: 1 step: 312, loss is 0.6485\n", + "epoch: 1 step: 313, loss is 0.7142\n", + "epoch: 1 step: 314, loss is 0.5787\n", + "epoch: 1 step: 315, loss is 0.6295\n", + "epoch: 1 step: 316, loss is 0.6210\n", + "epoch: 1 step: 317, loss is 0.7650\n", + "epoch: 1 step: 318, loss is 0.6355\n", + "epoch: 1 step: 319, loss is 0.6717\n", + "epoch: 1 step: 320, loss is 0.7392\n", + "epoch: 1 step: 321, loss is 0.6969\n", + "epoch: 1 step: 322, loss is 0.6394\n", + "epoch: 1 step: 323, loss is 0.6603\n", + "epoch: 1 step: 324, loss is 0.6058\n", + "epoch: 1 step: 325, loss is 0.6332\n", + "epoch: 1 step: 326, loss is 0.6236\n", + "epoch: 1 step: 327, loss is 0.6483\n", + "epoch: 1 step: 328, loss is 0.6229\n", + "epoch: 1 step: 329, loss is 0.6022\n", + "epoch: 1 step: 330, loss is 0.6393\n", + "epoch: 1 step: 331, loss is 0.5813\n", + "epoch: 1 step: 332, loss is 0.6013\n", + "epoch: 1 step: 333, loss is 0.6026\n", + "epoch: 1 step: 334, loss is 0.5768\n", + "epoch: 1 step: 335, loss is 0.6915\n", + "epoch: 1 step: 336, loss is 0.6256\n", + "epoch: 1 step: 337, loss is 0.7781\n", + "epoch: 1 step: 338, loss is 0.7050\n", + "epoch: 1 step: 339, loss is 0.7328\n", + "epoch: 1 step: 340, loss is 0.7076\n", + "epoch: 1 step: 341, loss is 0.7222\n", + "epoch: 1 step: 342, loss is 0.6022\n", + "epoch: 1 step: 343, loss is 0.6293\n", + "epoch: 1 step: 344, loss is 0.6443\n", + "epoch: 1 step: 345, loss is 0.6849\n", + "epoch: 1 step: 346, loss is 0.6910\n", + "epoch: 1 step: 347, loss is 0.7112\n", + "epoch: 1 step: 348, loss is 0.7019\n", + "epoch: 1 step: 349, loss is 0.6608\n", + "epoch: 1 step: 350, loss is 0.6993\n", + "epoch: 1 step: 351, loss is 0.6632\n", + "epoch: 1 step: 352, loss is 0.6706\n", + "epoch: 1 step: 353, loss is 0.6401\n", + "epoch: 1 step: 354, loss is 0.6503\n", + "epoch: 1 step: 355, loss is 0.6477\n", + "epoch: 1 step: 356, loss is 0.6509\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 1/ 10], step: [ 357/ 390], loss: [0.6403], avg loss: [0.6711], time: [98.1786ms]\n", - "Epoch: [ 1/ 10], step: [ 358/ 390], loss: [0.6679], avg loss: [0.6711], time: [103.6110ms]\n", - "Epoch: [ 1/ 10], step: [ 359/ 390], loss: [0.6559], avg loss: [0.6711], time: [97.9443ms]\n", - "Epoch: [ 1/ 10], step: [ 360/ 390], loss: [0.6298], avg loss: [0.6709], time: [103.2884ms]\n", - "Epoch: [ 1/ 10], step: [ 361/ 390], loss: [0.6193], avg loss: [0.6708], time: [98.4299ms]\n", - "Epoch: [ 1/ 10], step: [ 362/ 390], loss: [0.6649], avg loss: [0.6708], time: [102.1912ms]\n", - "Epoch: [ 1/ 10], step: [ 363/ 390], loss: [0.6179], avg loss: [0.6706], time: [102.8066ms]\n", - "Epoch: [ 1/ 10], step: [ 364/ 390], loss: [0.6771], avg loss: [0.6707], time: [102.7441ms]\n", - "Epoch: [ 1/ 10], step: [ 365/ 390], loss: [0.6193], avg loss: [0.6705], time: [97.2888ms]\n", - "Epoch: [ 1/ 10], step: [ 366/ 390], loss: [0.5615], avg loss: [0.6702], time: [100.9829ms]\n", - "Epoch: [ 1/ 10], step: [ 367/ 390], loss: [0.6999], avg loss: [0.6703], time: [101.9204ms]\n", - "Epoch: [ 1/ 10], step: [ 368/ 390], loss: [0.6330], avg loss: [0.6702], time: [103.1630ms]\n", - "Epoch: [ 1/ 10], step: [ 369/ 390], loss: [0.6941], avg loss: [0.6703], time: [100.9564ms]\n", - "Epoch: [ 1/ 10], step: [ 370/ 390], loss: [0.7298], avg loss: [0.6704], time: [104.1873ms]\n", - "Epoch: [ 1/ 10], step: [ 371/ 390], loss: [0.7247], avg loss: [0.6706], time: [96.9987ms]\n", - "Epoch: [ 1/ 10], step: [ 372/ 390], loss: [0.5866], avg loss: [0.6703], time: [100.1570ms]\n", - "Epoch: [ 1/ 10], step: [ 373/ 390], loss: [0.6025], avg loss: [0.6702], time: [93.6594ms]\n", - "Epoch: [ 1/ 10], step: [ 374/ 390], loss: [0.6047], avg loss: [0.6700], time: [103.0066ms]\n", - "Epoch: [ 1/ 10], step: [ 375/ 390], loss: [0.5705], avg loss: [0.6697], time: [97.8637ms]\n", - "Epoch: [ 1/ 10], step: [ 376/ 390], loss: [0.7009], avg loss: [0.6698], time: [106.2992ms]\n", - "Epoch: [ 1/ 10], step: [ 377/ 390], loss: [0.6272], avg loss: [0.6697], time: [102.3777ms]\n", - "Epoch: [ 1/ 10], step: [ 378/ 390], loss: [0.6697], avg loss: [0.6697], time: [105.3488ms]\n", - "Epoch: [ 1/ 10], step: [ 379/ 390], loss: [0.6578], avg loss: [0.6697], time: [98.5255ms]\n", - "Epoch: [ 1/ 10], step: [ 380/ 390], loss: [0.5431], avg loss: [0.6693], time: [101.3367ms]\n", - "Epoch: [ 1/ 10], step: [ 381/ 390], loss: [0.7024], avg loss: [0.6694], time: [99.8781ms]\n", - "Epoch: [ 1/ 10], step: [ 382/ 390], loss: [0.5866], avg loss: [0.6692], time: [102.2394ms]\n", - "Epoch: [ 1/ 10], step: [ 383/ 390], loss: [0.6498], avg loss: [0.6691], time: [97.1584ms]\n", - "Epoch: [ 1/ 10], step: [ 384/ 390], loss: [0.5926], avg loss: [0.6689], time: [101.0160ms]\n", - "Epoch: [ 1/ 10], step: [ 385/ 390], loss: [0.6094], avg loss: [0.6688], time: [94.8484ms]\n", - "Epoch: [ 1/ 10], step: [ 386/ 390], loss: [0.5663], avg loss: [0.6685], time: [104.9602ms]\n", - "Epoch: [ 1/ 10], step: [ 387/ 390], loss: [0.6087], avg loss: [0.6684], time: [99.5123ms]\n", - "Epoch: [ 1/ 10], step: [ 388/ 390], loss: [0.5394], avg loss: [0.6680], time: [102.6874ms]\n", - "Epoch: [ 1/ 10], step: [ 389/ 390], loss: [0.7825], avg loss: [0.6683], time: [97.5268ms]\n", - "Epoch: [ 1/ 10], step: [ 390/ 390], loss: [0.6069], avg loss: [0.6682], time: [956.3572ms]\n", - "Epoch time: 40590.405, per step time: 104.078\n", + "epoch: 1 step: 357, loss is 0.6403\n", + "epoch: 1 step: 358, loss is 0.6679\n", + "epoch: 1 step: 359, loss is 0.6559\n", + "epoch: 1 step: 360, loss is 0.6298\n", + "epoch: 1 step: 361, loss is 0.6193\n", + "epoch: 1 step: 362, loss is 0.6649\n", + "epoch: 1 step: 363, loss is 0.6179\n", + "epoch: 1 step: 364, loss is 0.6771\n", + "epoch: 1 step: 365, loss is 0.6193\n", + "epoch: 1 step: 366, loss is 0.5615\n", + "epoch: 1 step: 367, loss is 0.6999\n", + "epoch: 1 step: 368, loss is 0.6330\n", + "epoch: 1 step: 369, loss is 0.6941\n", + "epoch: 1 step: 370, loss is 0.7298\n", + "epoch: 1 step: 371, loss is 0.7247\n", + "epoch: 1 step: 372, loss is 0.5866\n", + "epoch: 1 step: 373, loss is 0.6025\n", + "epoch: 1 step: 374, loss is 0.6047\n", + "epoch: 1 step: 375, loss is 0.5705\n", + "epoch: 1 step: 376, loss is 0.7009\n", + "epoch: 1 step: 377, loss is 0.6272\n", + "epoch: 1 step: 378, loss is 0.6697\n", + "epoch: 1 step: 379, loss is 0.6578\n", + "epoch: 1 step: 380, loss is 0.5431\n", + "epoch: 1 step: 381, loss is 0.7024\n", + "epoch: 1 step: 382, loss is 0.5866\n", + "epoch: 1 step: 383, loss is 0.6498\n", + "epoch: 1 step: 384, loss is 0.5926\n", + "epoch: 1 step: 385, loss is 0.6094\n", + "epoch: 1 step: 386, loss is 0.5663\n", + "epoch: 1 step: 387, loss is 0.6087\n", + "epoch: 1 step: 388, loss is 0.5394\n", + "epoch: 1 step: 389, loss is 0.7825\n", + "epoch: 1 step: 390, loss is 0.6069\n", "Epoch time: 40590.832, per step time: 104.079, avg loss: 0.668\n", "************************************************************\n", - "Epoch: [ 2/ 10], step: [ 1/ 390], loss: [0.7305], avg loss: [0.7305], time: [100.4219ms]\n", - "Epoch: [ 2/ 10], step: [ 2/ 390], loss: [0.7044], avg loss: [0.7175], time: [103.8322ms]\n", - "Epoch: [ 2/ 10], step: [ 3/ 390], loss: [0.5188], avg loss: [0.6512], time: [103.1067ms]\n", - "Epoch: [ 2/ 10], step: [ 4/ 390], loss: [0.5801], avg loss: [0.6334], time: [100.9061ms]\n", - "Epoch: [ 2/ 10], step: [ 5/ 390], loss: [0.6629], avg loss: [0.6393], time: [105.7405ms]\n", - "Epoch: [ 2/ 10], step: [ 6/ 390], loss: [0.6763], avg loss: [0.6455], time: [101.5539ms]\n", - "Epoch: [ 2/ 10], step: [ 7/ 390], loss: [0.6314], avg loss: [0.6435], time: [103.1678ms]\n", - "Epoch: [ 2/ 10], step: [ 8/ 390], loss: [0.6936], avg loss: [0.6497], time: [102.1249ms]\n", - "Epoch: [ 2/ 10], step: [ 9/ 390], loss: [0.5945], avg loss: [0.6436], time: [102.1895ms]\n", - "Epoch: [ 2/ 10], step: [ 10/ 390], loss: [0.7017], avg loss: [0.6494], time: [101.5260ms]\n", - "Epoch: [ 2/ 10], step: [ 11/ 390], loss: [0.6935], avg loss: [0.6534], time: [105.4053ms]\n", - "Epoch: [ 2/ 10], step: [ 12/ 390], loss: [0.6426], avg loss: [0.6525], time: [101.2661ms]\n", - "Epoch: [ 2/ 10], step: [ 13/ 390], loss: [0.6689], avg loss: [0.6538], time: [105.3002ms]\n", - "Epoch: [ 2/ 10], step: [ 14/ 390], loss: [0.6623], avg loss: [0.6544], time: [102.6111ms]\n", - "Epoch: [ 2/ 10], step: [ 15/ 390], loss: [0.6948], avg loss: [0.6571], time: [101.8302ms]\n", - "Epoch: [ 2/ 10], step: [ 16/ 390], loss: [0.6518], avg loss: [0.6568], time: [100.3373ms]\n", - "Epoch: [ 2/ 10], step: [ 17/ 390], loss: [0.6611], avg loss: [0.6570], time: [102.5856ms]\n", - "Epoch: [ 2/ 10], step: [ 18/ 390], loss: [0.6519], avg loss: [0.6567], time: [101.8844ms]\n", - "Epoch: [ 2/ 10], step: [ 19/ 390], loss: [0.6549], avg loss: [0.6566], time: [103.6978ms]\n", - "Epoch: [ 2/ 10], step: [ 20/ 390], loss: [0.6685], avg loss: [0.6572], time: [100.9917ms]\n", - "Epoch: [ 2/ 10], step: [ 21/ 390], loss: [0.6782], avg loss: [0.6582], time: [101.8064ms]\n", - "Epoch: [ 2/ 10], step: [ 22/ 390], loss: [0.6741], avg loss: [0.6589], time: [105.1960ms]\n", - "Epoch: [ 2/ 10], step: [ 23/ 390], loss: [0.6394], avg loss: [0.6581], time: [102.1986ms]\n", - "Epoch: [ 2/ 10], step: [ 24/ 390], loss: [0.6587], avg loss: [0.6581], time: [100.5142ms]\n", - "Epoch: [ 2/ 10], step: [ 25/ 390], loss: [0.6442], avg loss: [0.6576], time: [105.2723ms]\n", - "Epoch: [ 2/ 10], step: [ 26/ 390], loss: [0.6268], avg loss: [0.6564], time: [101.6653ms]\n", - "Epoch: [ 2/ 10], step: [ 27/ 390], loss: [0.6517], avg loss: [0.6562], time: [101.8386ms]\n", - "Epoch: [ 2/ 10], step: [ 28/ 390], loss: [0.6195], avg loss: [0.6549], time: [100.9421ms]\n", - "Epoch: [ 2/ 10], step: [ 29/ 390], loss: [0.6192], avg loss: [0.6537], time: [101.0842ms]\n", - "Epoch: [ 2/ 10], step: [ 30/ 390], loss: [0.6432], avg loss: [0.6533], time: [99.7970ms]\n", - "Epoch: [ 2/ 10], step: [ 31/ 390], loss: [0.6170], avg loss: [0.6521], time: [102.1228ms]\n", - "Epoch: [ 2/ 10], step: [ 32/ 390], loss: [0.6446], avg loss: [0.6519], time: [98.0394ms]\n", - "Epoch: [ 2/ 10], step: [ 33/ 390], loss: [0.6830], avg loss: [0.6528], time: [103.9677ms]\n", - "Epoch: [ 2/ 10], step: [ 34/ 390], loss: [0.6451], avg loss: [0.6526], time: [102.4153ms]\n", - "Epoch: [ 2/ 10], step: [ 35/ 390], loss: [0.6049], avg loss: [0.6513], time: [102.6518ms]\n", - "Epoch: [ 2/ 10], step: [ 36/ 390], loss: [0.6155], avg loss: [0.6503], time: [104.4526ms]\n", - "Epoch: [ 2/ 10], step: [ 37/ 390], loss: [0.6176], avg loss: [0.6494], time: [102.4578ms]\n", - "Epoch: [ 2/ 10], step: [ 38/ 390], loss: [0.7299], avg loss: [0.6515], time: [100.3780ms]\n", - "Epoch: [ 2/ 10], step: [ 39/ 390], loss: [0.6515], avg loss: [0.6515], time: [105.2492ms]\n", - "Epoch: [ 2/ 10], step: [ 40/ 390], loss: [0.5711], avg loss: [0.6495], time: [100.1759ms]\n", - "Epoch: [ 2/ 10], step: [ 41/ 390], loss: [0.6730], avg loss: [0.6501], time: [106.7350ms]\n", - "Epoch: [ 2/ 10], step: [ 42/ 390], loss: [0.6650], avg loss: [0.6504], time: [99.0667ms]\n", - "Epoch: [ 2/ 10], step: [ 43/ 390], loss: [0.6340], avg loss: [0.6500], time: [105.6554ms]\n", - "Epoch: [ 2/ 10], step: [ 44/ 390], loss: [0.5755], avg loss: [0.6483], time: [102.4828ms]\n", - "Epoch: [ 2/ 10], step: [ 45/ 390], loss: [0.6111], avg loss: [0.6475], time: [105.1023ms]\n", - "Epoch: [ 2/ 10], step: [ 46/ 390], loss: [0.5814], avg loss: [0.6461], time: [100.3659ms]\n", - "Epoch: [ 2/ 10], step: [ 47/ 390], loss: [0.6620], avg loss: [0.6464], time: [103.0633ms]\n", - "Epoch: [ 2/ 10], step: [ 48/ 390], loss: [0.5942], avg loss: [0.6453], time: [102.9394ms]\n", - "Epoch: [ 2/ 10], step: [ 49/ 390], loss: [0.7082], avg loss: [0.6466], time: [101.4242ms]\n", - "Epoch: [ 2/ 10], step: [ 50/ 390], loss: [0.5765], avg loss: [0.6452], time: [101.2902ms]\n", - "Epoch: [ 2/ 10], step: [ 51/ 390], loss: [0.5995], avg loss: [0.6443], time: [104.4326ms]\n", - "Epoch: [ 2/ 10], step: [ 52/ 390], loss: [0.6466], avg loss: [0.6444], time: [101.6693ms]\n", - "Epoch: [ 2/ 10], step: [ 53/ 390], loss: [0.5725], avg loss: [0.6430], time: [106.1947ms]\n" + "epoch: 2 step: 1, loss is 0.7305\n", + "epoch: 2 step: 2, loss is 0.7044\n", + "epoch: 2 step: 3, loss is 0.5188\n", + "epoch: 2 step: 4, loss is 0.5801\n", + "epoch: 2 step: 5, loss is 0.6629\n", + "epoch: 2 step: 6, loss is 0.6763\n", + "epoch: 2 step: 7, loss is 0.6314\n", + "epoch: 2 step: 8, loss is 0.6936\n", + "epoch: 2 step: 9, loss is 0.5945\n", + "epoch: 2 step: 10, loss is 0.7017\n", + "epoch: 2 step: 11, loss is 0.6935\n", + "epoch: 2 step: 12, loss is 0.6426\n", + "epoch: 2 step: 13, loss is 0.6689\n", + "epoch: 2 step: 14, loss is 0.6623\n", + "epoch: 2 step: 15, loss is 0.6948\n", + "epoch: 2 step: 16, loss is 0.6518\n", + "epoch: 2 step: 17, loss is 0.6611\n", + "epoch: 2 step: 18, loss is 0.6519\n", + "epoch: 2 step: 19, loss is 0.6549\n", + "epoch: 2 step: 20, loss is 0.6685\n", + "epoch: 2 step: 21, loss is 0.6782\n", + "epoch: 2 step: 22, loss is 0.6741\n", + "epoch: 2 step: 23, loss is 0.6394\n", + "epoch: 2 step: 24, loss is 0.6587\n", + "epoch: 2 step: 25, loss is 0.6442\n", + "epoch: 2 step: 26, loss is 0.6268\n", + "epoch: 2 step: 27, loss is 0.6517\n", + "epoch: 2 step: 28, loss is 0.6195\n", + "epoch: 2 step: 29, loss is 0.6192\n", + "epoch: 2 step: 30, loss is 0.6432\n", + "epoch: 2 step: 31, loss is 0.6170\n", + "epoch: 2 step: 32, loss is 0.6446\n", + "epoch: 2 step: 33, loss is 0.6830\n", + "epoch: 2 step: 34, loss is 0.6451\n", + "epoch: 2 step: 35, loss is 0.6049\n", + "epoch: 2 step: 36, loss is 0.6155\n", + "epoch: 2 step: 37, loss is 0.6176\n", + "epoch: 2 step: 38, loss is 0.7299\n", + "epoch: 2 step: 39, loss is 0.6515\n", + "epoch: 2 step: 40, loss is 0.5711\n", + "epoch: 2 step: 41, loss is 0.6730\n", + "epoch: 2 step: 42, loss is 0.6650\n", + "epoch: 2 step: 43, loss is 0.6340\n", + "epoch: 2 step: 44, loss is 0.5755\n", + "epoch: 2 step: 45, loss is 0.6111\n", + "epoch: 2 step: 46, loss is 0.5814\n", + "epoch: 2 step: 47, loss is 0.6620\n", + "epoch: 2 step: 48, loss is 0.5942\n", + "epoch: 2 step: 49, loss is 0.7082\n", + "epoch: 2 step: 50, loss is 0.5765\n", + "epoch: 2 step: 51, loss is 0.5995\n", + "epoch: 2 step: 52, loss is 0.6466\n", + "epoch: 2 step: 53, loss is 0.5725\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 2/ 10], step: [ 54/ 390], loss: [0.5748], avg loss: [0.6417], time: [99.1259ms]\n", - "Epoch: [ 2/ 10], step: [ 55/ 390], loss: [0.5293], avg loss: [0.6397], time: [104.4934ms]\n", - "Epoch: [ 2/ 10], step: [ 56/ 390], loss: [0.5660], avg loss: [0.6384], time: [100.8847ms]\n", - "Epoch: [ 2/ 10], step: [ 57/ 390], loss: [0.5283], avg loss: [0.6364], time: [108.6614ms]\n", - "Epoch: [ 2/ 10], step: [ 58/ 390], loss: [0.5347], avg loss: [0.6347], time: [101.3596ms]\n", - "Epoch: [ 2/ 10], step: [ 59/ 390], loss: [0.5154], avg loss: [0.6327], time: [100.2188ms]\n", - "Epoch: [ 2/ 10], step: [ 60/ 390], loss: [0.6732], avg loss: [0.6333], time: [99.3276ms]\n", - "Epoch: [ 2/ 10], step: [ 61/ 390], loss: [0.5197], avg loss: [0.6315], time: [104.8396ms]\n", - "Epoch: [ 2/ 10], step: [ 62/ 390], loss: [0.7254], avg loss: [0.6330], time: [100.4605ms]\n", - "Epoch: [ 2/ 10], step: [ 63/ 390], loss: [0.9070], avg loss: [0.6373], time: [108.4003ms]\n", - "Epoch: [ 2/ 10], step: [ 64/ 390], loss: [0.5558], avg loss: [0.6361], time: [100.4941ms]\n", - "Epoch: [ 2/ 10], step: [ 65/ 390], loss: [1.0045], avg loss: [0.6417], time: [103.8544ms]\n", - "Epoch: [ 2/ 10], step: [ 66/ 390], loss: [0.8933], avg loss: [0.6456], time: [100.5349ms]\n", - "Epoch: [ 2/ 10], step: [ 67/ 390], loss: [1.0105], avg loss: [0.6510], time: [103.8167ms]\n", - "Epoch: [ 2/ 10], step: [ 68/ 390], loss: [0.6910], avg loss: [0.6516], time: [102.1645ms]\n", - "Epoch: [ 2/ 10], step: [ 69/ 390], loss: [0.6372], avg loss: [0.6514], time: [103.2877ms]\n", - "Epoch: [ 2/ 10], step: [ 70/ 390], loss: [0.6704], avg loss: [0.6517], time: [100.8592ms]\n", - "Epoch: [ 2/ 10], step: [ 71/ 390], loss: [0.7066], avg loss: [0.6524], time: [103.1342ms]\n", - "Epoch: [ 2/ 10], step: [ 72/ 390], loss: [0.7282], avg loss: [0.6535], time: [100.5018ms]\n", - "Epoch: [ 2/ 10], step: [ 73/ 390], loss: [0.7256], avg loss: [0.6545], time: [107.3256ms]\n", - "Epoch: [ 2/ 10], step: [ 74/ 390], loss: [0.7049], avg loss: [0.6551], time: [102.4539ms]\n", - "Epoch: [ 2/ 10], step: [ 75/ 390], loss: [0.7688], avg loss: [0.6567], time: [103.9751ms]\n", - "Epoch: [ 2/ 10], step: [ 76/ 390], loss: [0.6864], avg loss: [0.6571], time: [98.9461ms]\n", - "Epoch: [ 2/ 10], step: [ 77/ 390], loss: [0.6767], avg loss: [0.6573], time: [107.2121ms]\n", - "Epoch: [ 2/ 10], step: [ 78/ 390], loss: [0.6959], avg loss: [0.6578], time: [100.4789ms]\n", - "Epoch: [ 2/ 10], step: [ 79/ 390], loss: [0.6960], avg loss: [0.6583], time: [107.1048ms]\n", - "Epoch: [ 2/ 10], step: [ 80/ 390], loss: [0.6875], avg loss: [0.6587], time: [100.2309ms]\n", - "Epoch: [ 2/ 10], step: [ 81/ 390], loss: [0.6882], avg loss: [0.6590], time: [102.3347ms]\n", - "Epoch: [ 2/ 10], step: [ 82/ 390], loss: [0.6958], avg loss: [0.6595], time: [103.2414ms]\n", - "Epoch: [ 2/ 10], step: [ 83/ 390], loss: [0.6996], avg loss: [0.6599], time: [106.8683ms]\n", - "Epoch: [ 2/ 10], step: [ 84/ 390], loss: [0.6975], avg loss: [0.6604], time: [101.3956ms]\n", - "Epoch: [ 2/ 10], step: [ 85/ 390], loss: [0.6863], avg loss: [0.6607], time: [102.7884ms]\n", - "Epoch: [ 2/ 10], step: [ 86/ 390], loss: [0.6881], avg loss: [0.6610], time: [101.3696ms]\n", - "Epoch: [ 2/ 10], step: [ 87/ 390], loss: [0.6797], avg loss: [0.6612], time: [102.0527ms]\n", - "Epoch: [ 2/ 10], step: [ 88/ 390], loss: [0.6784], avg loss: [0.6614], time: [102.5679ms]\n", - "Epoch: [ 2/ 10], step: [ 89/ 390], loss: [0.6775], avg loss: [0.6616], time: [103.6713ms]\n", - "Epoch: [ 2/ 10], step: [ 90/ 390], loss: [0.6681], avg loss: [0.6617], time: [101.7852ms]\n", - "Epoch: [ 2/ 10], step: [ 91/ 390], loss: [0.6906], avg loss: [0.6620], time: [105.4647ms]\n", - "Epoch: [ 2/ 10], step: [ 92/ 390], loss: [0.6787], avg loss: [0.6622], time: [99.4866ms]\n", - "Epoch: [ 2/ 10], step: [ 93/ 390], loss: [0.6724], avg loss: [0.6623], time: [101.1353ms]\n", - "Epoch: [ 2/ 10], step: [ 94/ 390], loss: [0.6556], avg loss: [0.6622], time: [100.1878ms]\n", - "Epoch: [ 2/ 10], step: [ 95/ 390], loss: [0.6690], avg loss: [0.6623], time: [104.3959ms]\n", - "Epoch: [ 2/ 10], step: [ 96/ 390], loss: [0.6389], avg loss: [0.6620], time: [102.6981ms]\n", - "Epoch: [ 2/ 10], step: [ 97/ 390], loss: [0.6665], avg loss: [0.6621], time: [102.3762ms]\n", - "Epoch: [ 2/ 10], step: [ 98/ 390], loss: [0.6657], avg loss: [0.6621], time: [98.5200ms]\n", - "Epoch: [ 2/ 10], step: [ 99/ 390], loss: [0.6476], avg loss: [0.6620], time: [104.3057ms]\n", - "Epoch: [ 2/ 10], step: [ 100/ 390], loss: [0.6320], avg loss: [0.6617], time: [100.6835ms]\n", - "Epoch: [ 2/ 10], step: [ 101/ 390], loss: [0.6269], avg loss: [0.6613], time: [102.8285ms]\n", - "Epoch: [ 2/ 10], step: [ 102/ 390], loss: [0.6891], avg loss: [0.6616], time: [100.7109ms]\n", - "Epoch: [ 2/ 10], step: [ 103/ 390], loss: [0.6737], avg loss: [0.6617], time: [107.4386ms]\n", - "Epoch: [ 2/ 10], step: [ 104/ 390], loss: [0.6194], avg loss: [0.6613], time: [99.6273ms]\n", - "Epoch: [ 2/ 10], step: [ 105/ 390], loss: [0.6310], avg loss: [0.6610], time: [104.0485ms]\n", - "Epoch: [ 2/ 10], step: [ 106/ 390], loss: [0.6765], avg loss: [0.6612], time: [99.9987ms]\n", - "Epoch: [ 2/ 10], step: [ 107/ 390], loss: [0.5332], avg loss: [0.6600], time: [100.6081ms]\n", - "Epoch: [ 2/ 10], step: [ 108/ 390], loss: [0.6403], avg loss: [0.6598], time: [97.5361ms]\n", - "Epoch: [ 2/ 10], step: [ 109/ 390], loss: [0.6084], avg loss: [0.6593], time: [102.1194ms]\n", - "Epoch: [ 2/ 10], step: [ 110/ 390], loss: [0.6587], avg loss: [0.6593], time: [100.1413ms]\n", - "Epoch: [ 2/ 10], step: [ 111/ 390], loss: [0.5721], avg loss: [0.6585], time: [100.9169ms]\n", - "Epoch: [ 2/ 10], step: [ 112/ 390], loss: [0.6253], avg loss: [0.6582], time: [101.2611ms]\n", - "Epoch: [ 2/ 10], step: [ 113/ 390], loss: [0.5386], avg loss: [0.6572], time: [105.2873ms]\n", - "Epoch: [ 2/ 10], step: [ 114/ 390], loss: [0.6135], avg loss: [0.6568], time: [103.6065ms]\n", - "Epoch: [ 2/ 10], step: [ 115/ 390], loss: [0.4770], avg loss: [0.6552], time: [104.1057ms]\n", - "Epoch: [ 2/ 10], step: [ 116/ 390], loss: [0.5140], avg loss: [0.6540], time: [100.6660ms]\n", - "Epoch: [ 2/ 10], step: [ 117/ 390], loss: [0.7868], avg loss: [0.6552], time: [102.5906ms]\n", - "Epoch: [ 2/ 10], step: [ 118/ 390], loss: [0.6497], avg loss: [0.6551], time: [99.2391ms]\n", - "Epoch: [ 2/ 10], step: [ 119/ 390], loss: [0.6640], avg loss: [0.6552], time: [104.5156ms]\n", - "Epoch: [ 2/ 10], step: [ 120/ 390], loss: [0.7578], avg loss: [0.6560], time: [99.5181ms]\n", - "Epoch: [ 2/ 10], step: [ 121/ 390], loss: [0.6687], avg loss: [0.6561], time: [101.7456ms]\n", - "Epoch: [ 2/ 10], step: [ 122/ 390], loss: [0.5661], avg loss: [0.6554], time: [99.5414ms]\n", - "Epoch: [ 2/ 10], step: [ 123/ 390], loss: [0.5133], avg loss: [0.6542], time: [105.3555ms]\n", - "Epoch: [ 2/ 10], step: [ 124/ 390], loss: [0.6696], avg loss: [0.6544], time: [100.5726ms]\n", - "Epoch: [ 2/ 10], step: [ 125/ 390], loss: [0.5755], avg loss: [0.6537], time: [104.1057ms]\n", - "Epoch: [ 2/ 10], step: [ 126/ 390], loss: [0.6681], avg loss: [0.6539], time: [105.4931ms]\n", - "Epoch: [ 2/ 10], step: [ 127/ 390], loss: [0.6086], avg loss: [0.6535], time: [104.8403ms]\n", - "Epoch: [ 2/ 10], step: [ 128/ 390], loss: [0.6800], avg loss: [0.6537], time: [99.4642ms]\n", - "Epoch: [ 2/ 10], step: [ 129/ 390], loss: [0.6341], avg loss: [0.6536], time: [102.6721ms]\n", - "Epoch: [ 2/ 10], step: [ 130/ 390], loss: [0.5987], avg loss: [0.6531], time: [100.3625ms]\n", - "Epoch: [ 2/ 10], step: [ 131/ 390], loss: [0.7033], avg loss: [0.6535], time: [103.8911ms]\n", - "Epoch: [ 2/ 10], step: [ 132/ 390], loss: [0.6140], avg loss: [0.6532], time: [103.9319ms]\n", - "Epoch: [ 2/ 10], step: [ 133/ 390], loss: [0.6079], avg loss: [0.6529], time: [106.1463ms]\n", - "Epoch: [ 2/ 10], step: [ 134/ 390], loss: [0.7079], avg loss: [0.6533], time: [99.8135ms]\n", - "Epoch: [ 2/ 10], step: [ 135/ 390], loss: [0.5892], avg loss: [0.6528], time: [106.6921ms]\n", - "Epoch: [ 2/ 10], step: [ 136/ 390], loss: [0.6120], avg loss: [0.6525], time: [104.1679ms]\n", - "Epoch: [ 2/ 10], step: [ 137/ 390], loss: [0.5910], avg loss: [0.6521], time: [103.2820ms]\n", - "Epoch: [ 2/ 10], step: [ 138/ 390], loss: [0.6155], avg loss: [0.6518], time: [99.8282ms]\n", - "Epoch: [ 2/ 10], step: [ 139/ 390], loss: [0.5877], avg loss: [0.6513], time: [101.1064ms]\n", - "Epoch: [ 2/ 10], step: [ 140/ 390], loss: [0.6593], avg loss: [0.6514], time: [99.6432ms]\n", - "Epoch: [ 2/ 10], step: [ 141/ 390], loss: [0.6068], avg loss: [0.6511], time: [103.2786ms]\n", - "Epoch: [ 2/ 10], step: [ 142/ 390], loss: [0.5731], avg loss: [0.6505], time: [101.6634ms]\n" + "epoch: 2 step: 54, loss is 0.5748\n", + "epoch: 2 step: 55, loss is 0.5293\n", + "epoch: 2 step: 56, loss is 0.5660\n", + "epoch: 2 step: 57, loss is 0.5283\n", + "epoch: 2 step: 58, loss is 0.5347\n", + "epoch: 2 step: 59, loss is 0.5154\n", + "epoch: 2 step: 60, loss is 0.6732\n", + "epoch: 2 step: 61, loss is 0.5197\n", + "epoch: 2 step: 62, loss is 0.7254\n", + "epoch: 2 step: 63, loss is 0.9070\n", + "epoch: 2 step: 64, loss is 0.5558\n", + "epoch: 2 step: 65, loss is 1.0045\n", + "epoch: 2 step: 66, loss is 0.8933\n", + "epoch: 2 step: 67, loss is 1.0105\n", + "epoch: 2 step: 68, loss is 0.6910\n", + "epoch: 2 step: 69, loss is 0.6372\n", + "epoch: 2 step: 70, loss is 0.6704\n", + "epoch: 2 step: 71, loss is 0.7066\n", + "epoch: 2 step: 72, loss is 0.7282\n", + "epoch: 2 step: 73, loss is 0.7256\n", + "epoch: 2 step: 74, loss is 0.7049\n", + "epoch: 2 step: 75, loss is 0.7688\n", + "epoch: 2 step: 76, loss is 0.6864\n", + "epoch: 2 step: 77, loss is 0.6767\n", + "epoch: 2 step: 78, loss is 0.6959\n", + "epoch: 2 step: 79, loss is 0.6960\n", + "epoch: 2 step: 80, loss is 0.6875\n", + "epoch: 2 step: 81, loss is 0.6882\n", + "epoch: 2 step: 82, loss is 0.6958\n", + "epoch: 2 step: 83, loss is 0.6996\n", + "epoch: 2 step: 84, loss is 0.6975\n", + "epoch: 2 step: 85, loss is 0.6863\n", + "epoch: 2 step: 86, loss is 0.6881\n", + "epoch: 2 step: 87, loss is 0.6797\n", + "epoch: 2 step: 88, loss is 0.6784\n", + "epoch: 2 step: 89, loss is 0.6775\n", + "epoch: 2 step: 90, loss is 0.6681\n", + "epoch: 2 step: 91, loss is 0.6906\n", + "epoch: 2 step: 92, loss is 0.6787\n", + "epoch: 2 step: 93, loss is 0.6724\n", + "epoch: 2 step: 94, loss is 0.6556\n", + "epoch: 2 step: 95, loss is 0.6690\n", + "epoch: 2 step: 96, loss is 0.6389\n", + "epoch: 2 step: 97, loss is 0.6665\n", + "epoch: 2 step: 98, loss is 0.6657\n", + "epoch: 2 step: 99, loss is 0.6476\n", + "epoch: 2 step: 100, loss is 0.6320\n", + "epoch: 2 step: 101, loss is 0.6269\n", + "epoch: 2 step: 102, loss is 0.6891\n", + "epoch: 2 step: 103, loss is 0.6737\n", + "epoch: 2 step: 104, loss is 0.6194\n", + "epoch: 2 step: 105, loss is 0.6310\n", + "epoch: 2 step: 106, loss is 0.6765\n", + "epoch: 2 step: 107, loss is 0.5332\n", + "epoch: 2 step: 108, loss is 0.6403\n", + "epoch: 2 step: 109, loss is 0.6084\n", + "epoch: 2 step: 110, loss is 0.6587\n", + "epoch: 2 step: 111, loss is 0.5721\n", + "epoch: 2 step: 112, loss is 0.6253\n", + "epoch: 2 step: 113, loss is 0.5386\n", + "epoch: 2 step: 114, loss is 0.6135\n", + "epoch: 2 step: 115, loss is 0.4770\n", + "epoch: 2 step: 116, loss is 0.5140\n", + "epoch: 2 step: 117, loss is 0.7868\n", + "epoch: 2 step: 118, loss is 0.6497\n", + "epoch: 2 step: 119, loss is 0.6640\n", + "epoch: 2 step: 120, loss is 0.7578\n", + "epoch: 2 step: 121, loss is 0.6687\n", + "epoch: 2 step: 122, loss is 0.5661\n", + "epoch: 2 step: 123, loss is 0.5133\n", + "epoch: 2 step: 124, loss is 0.6696\n", + "epoch: 2 step: 125, loss is 0.5755\n", + "epoch: 2 step: 126, loss is 0.6681\n", + "epoch: 2 step: 127, loss is 0.6086\n", + "epoch: 2 step: 128, loss is 0.6800\n", + "epoch: 2 step: 129, loss is 0.6341\n", + "epoch: 2 step: 130, loss is 0.5987\n", + "epoch: 2 step: 131, loss is 0.7033\n", + "epoch: 2 step: 132, loss is 0.6140\n", + "epoch: 2 step: 133, loss is 0.6079\n", + "epoch: 2 step: 134, loss is 0.7079\n", + "epoch: 2 step: 135, loss is 0.5892\n", + "epoch: 2 step: 136, loss is 0.6120\n", + "epoch: 2 step: 137, loss is 0.5910\n", + "epoch: 2 step: 138, loss is 0.6155\n", + "epoch: 2 step: 139, loss is 0.5877\n", + "epoch: 2 step: 140, loss is 0.6593\n", + "epoch: 2 step: 141, loss is 0.6068\n", + "epoch: 2 step: 142, loss is 0.5731\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 2/ 10], step: [ 143/ 390], loss: [0.5716], avg loss: [0.6500], time: [102.3705ms]\n", - "Epoch: [ 2/ 10], step: [ 144/ 390], loss: [0.6271], avg loss: [0.6498], time: [98.9797ms]\n", - "Epoch: [ 2/ 10], step: [ 145/ 390], loss: [0.5050], avg loss: [0.6488], time: [101.5060ms]\n", - "Epoch: [ 2/ 10], step: [ 146/ 390], loss: [0.5590], avg loss: [0.6482], time: [98.9730ms]\n", - "Epoch: [ 2/ 10], step: [ 147/ 390], loss: [0.6321], avg loss: [0.6481], time: [102.5190ms]\n", - "Epoch: [ 2/ 10], step: [ 148/ 390], loss: [0.6130], avg loss: [0.6479], time: [100.9171ms]\n", - "Epoch: [ 2/ 10], step: [ 149/ 390], loss: [0.5702], avg loss: [0.6473], time: [105.2413ms]\n", - "Epoch: [ 2/ 10], step: [ 150/ 390], loss: [0.5732], avg loss: [0.6468], time: [99.3552ms]\n", - "Epoch: [ 2/ 10], step: [ 151/ 390], loss: [0.5903], avg loss: [0.6465], time: [100.8067ms]\n", - "Epoch: [ 2/ 10], step: [ 152/ 390], loss: [0.5511], avg loss: [0.6458], time: [100.9417ms]\n", - "Epoch: [ 2/ 10], step: [ 153/ 390], loss: [0.6821], avg loss: [0.6461], time: [102.9837ms]\n", - "Epoch: [ 2/ 10], step: [ 154/ 390], loss: [0.4778], avg loss: [0.6450], time: [101.2075ms]\n", - "Epoch: [ 2/ 10], step: [ 155/ 390], loss: [0.6927], avg loss: [0.6453], time: [102.1085ms]\n", - "Epoch: [ 2/ 10], step: [ 156/ 390], loss: [0.5322], avg loss: [0.6446], time: [101.6080ms]\n", - "Epoch: [ 2/ 10], step: [ 157/ 390], loss: [0.4992], avg loss: [0.6436], time: [104.5997ms]\n", - "Epoch: [ 2/ 10], step: [ 158/ 390], loss: [0.5179], avg loss: [0.6428], time: [102.2282ms]\n", - "Epoch: [ 2/ 10], step: [ 159/ 390], loss: [0.7331], avg loss: [0.6434], time: [106.7560ms]\n", - "Epoch: [ 2/ 10], step: [ 160/ 390], loss: [0.6702], avg loss: [0.6436], time: [102.6788ms]\n", - "Epoch: [ 2/ 10], step: [ 161/ 390], loss: [0.5674], avg loss: [0.6431], time: [104.6824ms]\n", - "Epoch: [ 2/ 10], step: [ 162/ 390], loss: [0.6555], avg loss: [0.6432], time: [102.5145ms]\n", - "Epoch: [ 2/ 10], step: [ 163/ 390], loss: [0.6740], avg loss: [0.6434], time: [104.1384ms]\n", - "Epoch: [ 2/ 10], step: [ 164/ 390], loss: [0.6001], avg loss: [0.6431], time: [101.2747ms]\n", - "Epoch: [ 2/ 10], step: [ 165/ 390], loss: [0.6950], avg loss: [0.6434], time: [101.4607ms]\n", - "Epoch: [ 2/ 10], step: [ 166/ 390], loss: [0.6409], avg loss: [0.6434], time: [98.8727ms]\n", - "Epoch: [ 2/ 10], step: [ 167/ 390], loss: [0.5637], avg loss: [0.6429], time: [101.1057ms]\n", - "Epoch: [ 2/ 10], step: [ 168/ 390], loss: [0.5931], avg loss: [0.6426], time: [100.9135ms]\n", - "Epoch: [ 2/ 10], step: [ 169/ 390], loss: [0.5834], avg loss: [0.6423], time: [101.4235ms]\n", - "Epoch: [ 2/ 10], step: [ 170/ 390], loss: [0.6347], avg loss: [0.6422], time: [100.3559ms]\n", - "Epoch: [ 2/ 10], step: [ 171/ 390], loss: [0.5378], avg loss: [0.6416], time: [101.4481ms]\n", - "Epoch: [ 2/ 10], step: [ 172/ 390], loss: [0.5672], avg loss: [0.6412], time: [102.9112ms]\n", - "Epoch: [ 2/ 10], step: [ 173/ 390], loss: [0.5801], avg loss: [0.6408], time: [105.4759ms]\n", - "Epoch: [ 2/ 10], step: [ 174/ 390], loss: [0.4901], avg loss: [0.6400], time: [100.9290ms]\n", - "Epoch: [ 2/ 10], step: [ 175/ 390], loss: [0.6125], avg loss: [0.6398], time: [101.1415ms]\n", - "Epoch: [ 2/ 10], step: [ 176/ 390], loss: [0.5406], avg loss: [0.6393], time: [99.3505ms]\n", - "Epoch: [ 2/ 10], step: [ 177/ 390], loss: [0.5562], avg loss: [0.6388], time: [104.1486ms]\n", - "Epoch: [ 2/ 10], step: [ 178/ 390], loss: [0.5569], avg loss: [0.6383], time: [97.9052ms]\n", - "Epoch: [ 2/ 10], step: [ 179/ 390], loss: [0.3951], avg loss: [0.6370], time: [104.2259ms]\n", - "Epoch: [ 2/ 10], step: [ 180/ 390], loss: [0.5006], avg loss: [0.6362], time: [103.7629ms]\n", - "Epoch: [ 2/ 10], step: [ 181/ 390], loss: [0.5864], avg loss: [0.6359], time: [105.7148ms]\n", - "Epoch: [ 2/ 10], step: [ 182/ 390], loss: [0.4957], avg loss: [0.6352], time: [101.3072ms]\n", - "Epoch: [ 2/ 10], step: [ 183/ 390], loss: [0.6649], avg loss: [0.6353], time: [104.5079ms]\n", - "Epoch: [ 2/ 10], step: [ 184/ 390], loss: [0.6399], avg loss: [0.6354], time: [99.3087ms]\n", - "Epoch: [ 2/ 10], step: [ 185/ 390], loss: [0.5149], avg loss: [0.6347], time: [103.5523ms]\n", - "Epoch: [ 2/ 10], step: [ 186/ 390], loss: [0.4174], avg loss: [0.6335], time: [99.2980ms]\n", - "Epoch: [ 2/ 10], step: [ 187/ 390], loss: [0.7648], avg loss: [0.6342], time: [102.2778ms]\n", - "Epoch: [ 2/ 10], step: [ 188/ 390], loss: [0.5523], avg loss: [0.6338], time: [97.9698ms]\n", - "Epoch: [ 2/ 10], step: [ 189/ 390], loss: [0.5934], avg loss: [0.6336], time: [105.1064ms]\n", - "Epoch: [ 2/ 10], step: [ 190/ 390], loss: [0.6805], avg loss: [0.6338], time: [99.9675ms]\n", - "Epoch: [ 2/ 10], step: [ 191/ 390], loss: [0.6683], avg loss: [0.6340], time: [103.8826ms]\n", - "Epoch: [ 2/ 10], step: [ 192/ 390], loss: [0.6629], avg loss: [0.6342], time: [100.7442ms]\n", - "Epoch: [ 2/ 10], step: [ 193/ 390], loss: [0.6230], avg loss: [0.6341], time: [105.6695ms]\n", - "Epoch: [ 2/ 10], step: [ 194/ 390], loss: [0.6168], avg loss: [0.6340], time: [99.7014ms]\n", - "Epoch: [ 2/ 10], step: [ 195/ 390], loss: [0.6821], avg loss: [0.6343], time: [102.6065ms]\n", - "Epoch: [ 2/ 10], step: [ 196/ 390], loss: [0.7211], avg loss: [0.6347], time: [104.3158ms]\n", - "Epoch: [ 2/ 10], step: [ 197/ 390], loss: [0.6533], avg loss: [0.6348], time: [105.0341ms]\n", - "Epoch: [ 2/ 10], step: [ 198/ 390], loss: [0.6404], avg loss: [0.6348], time: [99.8521ms]\n", - "Epoch: [ 2/ 10], step: [ 199/ 390], loss: [0.6608], avg loss: [0.6350], time: [104.3358ms]\n", - "Epoch: [ 2/ 10], step: [ 200/ 390], loss: [0.6375], avg loss: [0.6350], time: [99.2339ms]\n", - "Epoch: [ 2/ 10], step: [ 201/ 390], loss: [0.6338], avg loss: [0.6350], time: [104.8193ms]\n", - "Epoch: [ 2/ 10], step: [ 202/ 390], loss: [0.6354], avg loss: [0.6350], time: [105.6454ms]\n", - "Epoch: [ 2/ 10], step: [ 203/ 390], loss: [0.6465], avg loss: [0.6350], time: [101.4972ms]\n", - "Epoch: [ 2/ 10], step: [ 204/ 390], loss: [0.6536], avg loss: [0.6351], time: [102.9661ms]\n", - "Epoch: [ 2/ 10], step: [ 205/ 390], loss: [0.5844], avg loss: [0.6349], time: [106.3495ms]\n", - "Epoch: [ 2/ 10], step: [ 206/ 390], loss: [0.6177], avg loss: [0.6348], time: [101.8763ms]\n", - "Epoch: [ 2/ 10], step: [ 207/ 390], loss: [0.5648], avg loss: [0.6344], time: [101.6600ms]\n", - "Epoch: [ 2/ 10], step: [ 208/ 390], loss: [0.6025], avg loss: [0.6343], time: [101.8512ms]\n", - "Epoch: [ 2/ 10], step: [ 209/ 390], loss: [0.6338], avg loss: [0.6343], time: [102.0007ms]\n", - "Epoch: [ 2/ 10], step: [ 210/ 390], loss: [0.6129], avg loss: [0.6342], time: [100.6916ms]\n", - "Epoch: [ 2/ 10], step: [ 211/ 390], loss: [0.5973], avg loss: [0.6340], time: [103.9431ms]\n", - "Epoch: [ 2/ 10], step: [ 212/ 390], loss: [0.5701], avg loss: [0.6337], time: [101.2611ms]\n", - "Epoch: [ 2/ 10], step: [ 213/ 390], loss: [0.6290], avg loss: [0.6337], time: [105.3917ms]\n", - "Epoch: [ 2/ 10], step: [ 214/ 390], loss: [0.6365], avg loss: [0.6337], time: [99.4079ms]\n", - "Epoch: [ 2/ 10], step: [ 215/ 390], loss: [0.5804], avg loss: [0.6335], time: [106.0677ms]\n", - "Epoch: [ 2/ 10], step: [ 216/ 390], loss: [0.5661], avg loss: [0.6331], time: [100.2049ms]\n", - "Epoch: [ 2/ 10], step: [ 217/ 390], loss: [0.5607], avg loss: [0.6328], time: [102.9911ms]\n", - "Epoch: [ 2/ 10], step: [ 218/ 390], loss: [0.5945], avg loss: [0.6326], time: [102.4287ms]\n", - "Epoch: [ 2/ 10], step: [ 219/ 390], loss: [0.5714], avg loss: [0.6324], time: [100.8162ms]\n", - "Epoch: [ 2/ 10], step: [ 220/ 390], loss: [0.5354], avg loss: [0.6319], time: [101.4338ms]\n", - "Epoch: [ 2/ 10], step: [ 221/ 390], loss: [0.5116], avg loss: [0.6314], time: [107.0015ms]\n", - "Epoch: [ 2/ 10], step: [ 222/ 390], loss: [0.6198], avg loss: [0.6313], time: [99.7632ms]\n", - "Epoch: [ 2/ 10], step: [ 223/ 390], loss: [0.6505], avg loss: [0.6314], time: [104.2926ms]\n", - "Epoch: [ 2/ 10], step: [ 224/ 390], loss: [0.5248], avg loss: [0.6309], time: [103.3173ms]\n", - "Epoch: [ 2/ 10], step: [ 225/ 390], loss: [0.6669], avg loss: [0.6311], time: [104.2821ms]\n", - "Epoch: [ 2/ 10], step: [ 226/ 390], loss: [0.5932], avg loss: [0.6309], time: [102.8402ms]\n", - "Epoch: [ 2/ 10], step: [ 227/ 390], loss: [0.5155], avg loss: [0.6304], time: [103.1370ms]\n", - "Epoch: [ 2/ 10], step: [ 228/ 390], loss: [0.7595], avg loss: [0.6310], time: [101.3536ms]\n", - "Epoch: [ 2/ 10], step: [ 229/ 390], loss: [0.5325], avg loss: [0.6305], time: [107.0580ms]\n", - "Epoch: [ 2/ 10], step: [ 230/ 390], loss: [0.4261], avg loss: [0.6297], time: [100.3525ms]\n", - "Epoch: [ 2/ 10], step: [ 231/ 390], loss: [0.7548], avg loss: [0.6302], time: [101.5737ms]\n" + "epoch: 2 step: 143, loss is 0.5716\n", + "epoch: 2 step: 144, loss is 0.6271\n", + "epoch: 2 step: 145, loss is 0.5050\n", + "epoch: 2 step: 146, loss is 0.5590\n", + "epoch: 2 step: 147, loss is 0.6321\n", + "epoch: 2 step: 148, loss is 0.6130\n", + "epoch: 2 step: 149, loss is 0.5702\n", + "epoch: 2 step: 150, loss is 0.5732\n", + "epoch: 2 step: 151, loss is 0.5903\n", + "epoch: 2 step: 152, loss is 0.5511\n", + "epoch: 2 step: 153, loss is 0.6821\n", + "epoch: 2 step: 154, loss is 0.4778\n", + "epoch: 2 step: 155, loss is 0.6927\n", + "epoch: 2 step: 156, loss is 0.5322\n", + "epoch: 2 step: 157, loss is 0.4992\n", + "epoch: 2 step: 158, loss is 0.5179\n", + "epoch: 2 step: 159, loss is 0.7331\n", + "epoch: 2 step: 160, loss is 0.6702\n", + "epoch: 2 step: 161, loss is 0.5674\n", + "epoch: 2 step: 162, loss is 0.6555\n", + "epoch: 2 step: 163, loss is 0.6740\n", + "epoch: 2 step: 164, loss is 0.6001\n", + "epoch: 2 step: 165, loss is 0.6950\n", + "epoch: 2 step: 166, loss is 0.6409\n", + "epoch: 2 step: 167, loss is 0.5637\n", + "epoch: 2 step: 168, loss is 0.5931\n", + "epoch: 2 step: 169, loss is 0.5834\n", + "epoch: 2 step: 170, loss is 0.6347\n", + "epoch: 2 step: 171, loss is 0.5378\n", + "epoch: 2 step: 172, loss is 0.5672\n", + "epoch: 2 step: 173, loss is 0.5801\n", + "epoch: 2 step: 174, loss is 0.4901\n", + "epoch: 2 step: 175, loss is 0.6125\n", + "epoch: 2 step: 176, loss is 0.5406\n", + "epoch: 2 step: 177, loss is 0.5562\n", + "epoch: 2 step: 178, loss is 0.5569\n", + "epoch: 2 step: 179, loss is 0.3951\n", + "epoch: 2 step: 180, loss is 0.5006\n", + "epoch: 2 step: 181, loss is 0.5864\n", + "epoch: 2 step: 182, loss is 0.4957\n", + "epoch: 2 step: 183, loss is 0.6649\n", + "epoch: 2 step: 184, loss is 0.6399\n", + "epoch: 2 step: 185, loss is 0.5149\n", + "epoch: 2 step: 186, loss is 0.4174\n", + "epoch: 2 step: 187, loss is 0.7648\n", + "epoch: 2 step: 188, loss is 0.5523\n", + "epoch: 2 step: 189, loss is 0.5934\n", + "epoch: 2 step: 190, loss is 0.6805\n", + "epoch: 2 step: 191, loss is 0.6683\n", + "epoch: 2 step: 192, loss is 0.6629\n", + "epoch: 2 step: 193, loss is 0.6230\n", + "epoch: 2 step: 194, loss is 0.6168\n", + "epoch: 2 step: 195, loss is 0.6821\n", + "epoch: 2 step: 196, loss is 0.7211\n", + "epoch: 2 step: 197, loss is 0.6533\n", + "epoch: 2 step: 198, loss is 0.6404\n", + "epoch: 2 step: 199, loss is 0.6608\n", + "epoch: 2 step: 200, loss is 0.6375\n", + "epoch: 2 step: 201, loss is 0.6338\n", + "epoch: 2 step: 202, loss is 0.6354\n", + "epoch: 2 step: 203, loss is 0.6465\n", + "epoch: 2 step: 204, loss is 0.6536\n", + "epoch: 2 step: 205, loss is 0.5844\n", + "epoch: 2 step: 206, loss is 0.6177\n", + "epoch: 2 step: 207, loss is 0.5648\n", + "epoch: 2 step: 208, loss is 0.6025\n", + "epoch: 2 step: 209, loss is 0.6338\n", + "epoch: 2 step: 210, loss is 0.6129\n", + "epoch: 2 step: 211, loss is 0.5973\n", + "epoch: 2 step: 212, loss is 0.5701\n", + "epoch: 2 step: 213, loss is 0.6290\n", + "epoch: 2 step: 214, loss is 0.6365\n", + "epoch: 2 step: 215, loss is 0.5804\n", + "epoch: 2 step: 216, loss is 0.5661\n", + "epoch: 2 step: 217, loss is 0.5607\n", + "epoch: 2 step: 218, loss is 0.5945\n", + "epoch: 2 step: 219, loss is 0.5714\n", + "epoch: 2 step: 220, loss is 0.5354\n", + "epoch: 2 step: 221, loss is 0.5116\n", + "epoch: 2 step: 222, loss is 0.6198\n", + "epoch: 2 step: 223, loss is 0.6505\n", + "epoch: 2 step: 224, loss is 0.5248\n", + "epoch: 2 step: 225, loss is 0.6669\n", + "epoch: 2 step: 226, loss is 0.5932\n", + "epoch: 2 step: 227, loss is 0.5155\n", + "epoch: 2 step: 228, loss is 0.7595\n", + "epoch: 2 step: 229, loss is 0.5325\n", + "epoch: 2 step: 230, loss is 0.4261\n", + "epoch: 2 step: 231, loss is 0.7548\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 2/ 10], step: [ 232/ 390], loss: [0.5745], avg loss: [0.6300], time: [99.3531ms]\n", - "Epoch: [ 2/ 10], step: [ 233/ 390], loss: [0.5614], avg loss: [0.6297], time: [107.9669ms]\n", - "Epoch: [ 2/ 10], step: [ 234/ 390], loss: [0.5357], avg loss: [0.6293], time: [101.3925ms]\n", - "Epoch: [ 2/ 10], step: [ 235/ 390], loss: [0.5186], avg loss: [0.6288], time: [102.6874ms]\n", - "Epoch: [ 2/ 10], step: [ 236/ 390], loss: [0.6700], avg loss: [0.6290], time: [103.9753ms]\n", - "Epoch: [ 2/ 10], step: [ 237/ 390], loss: [0.5584], avg loss: [0.6287], time: [101.3067ms]\n", - "Epoch: [ 2/ 10], step: [ 238/ 390], loss: [0.5589], avg loss: [0.6284], time: [103.8983ms]\n", - "Epoch: [ 2/ 10], step: [ 239/ 390], loss: [0.5363], avg loss: [0.6280], time: [105.3576ms]\n", - "Epoch: [ 2/ 10], step: [ 240/ 390], loss: [0.5776], avg loss: [0.6278], time: [101.3453ms]\n", - "Epoch: [ 2/ 10], step: [ 241/ 390], loss: [0.7283], avg loss: [0.6282], time: [104.6221ms]\n", - "Epoch: [ 2/ 10], step: [ 242/ 390], loss: [0.5002], avg loss: [0.6277], time: [98.7802ms]\n", - "Epoch: [ 2/ 10], step: [ 243/ 390], loss: [0.5267], avg loss: [0.6273], time: [107.5363ms]\n", - "Epoch: [ 2/ 10], step: [ 244/ 390], loss: [0.7191], avg loss: [0.6276], time: [103.0767ms]\n", - "Epoch: [ 2/ 10], step: [ 245/ 390], loss: [0.5527], avg loss: [0.6273], time: [106.7441ms]\n", - "Epoch: [ 2/ 10], step: [ 246/ 390], loss: [0.6456], avg loss: [0.6274], time: [102.1268ms]\n", - "Epoch: [ 2/ 10], step: [ 247/ 390], loss: [0.4888], avg loss: [0.6268], time: [105.3157ms]\n", - "Epoch: [ 2/ 10], step: [ 248/ 390], loss: [0.5648], avg loss: [0.6266], time: [97.7314ms]\n", - "Epoch: [ 2/ 10], step: [ 249/ 390], loss: [0.5652], avg loss: [0.6263], time: [103.8227ms]\n", - "Epoch: [ 2/ 10], step: [ 250/ 390], loss: [0.5415], avg loss: [0.6260], time: [101.5322ms]\n", - "Epoch: [ 2/ 10], step: [ 251/ 390], loss: [0.5158], avg loss: [0.6256], time: [104.6233ms]\n", - "Epoch: [ 2/ 10], step: [ 252/ 390], loss: [0.6121], avg loss: [0.6255], time: [100.1446ms]\n", - "Epoch: [ 2/ 10], step: [ 253/ 390], loss: [0.4672], avg loss: [0.6249], time: [102.5453ms]\n", - "Epoch: [ 2/ 10], step: [ 254/ 390], loss: [0.5177], avg loss: [0.6245], time: [100.5530ms]\n", - "Epoch: [ 2/ 10], step: [ 255/ 390], loss: [0.5891], avg loss: [0.6243], time: [103.0157ms]\n", - "Epoch: [ 2/ 10], step: [ 256/ 390], loss: [0.5838], avg loss: [0.6242], time: [101.9323ms]\n", - "Epoch: [ 2/ 10], step: [ 257/ 390], loss: [0.5129], avg loss: [0.6237], time: [103.1694ms]\n", - "Epoch: [ 2/ 10], step: [ 258/ 390], loss: [0.4615], avg loss: [0.6231], time: [100.1391ms]\n", - "Epoch: [ 2/ 10], step: [ 259/ 390], loss: [0.4765], avg loss: [0.6225], time: [104.8985ms]\n", - "Epoch: [ 2/ 10], step: [ 260/ 390], loss: [0.5161], avg loss: [0.6221], time: [99.1123ms]\n", - "Epoch: [ 2/ 10], step: [ 261/ 390], loss: [0.5247], avg loss: [0.6218], time: [104.0046ms]\n", - "Epoch: [ 2/ 10], step: [ 262/ 390], loss: [0.4824], avg loss: [0.6212], time: [103.3409ms]\n", - "Epoch: [ 2/ 10], step: [ 263/ 390], loss: [0.4950], avg loss: [0.6207], time: [102.7055ms]\n", - "Epoch: [ 2/ 10], step: [ 264/ 390], loss: [0.4001], avg loss: [0.6199], time: [105.7043ms]\n", - "Epoch: [ 2/ 10], step: [ 265/ 390], loss: [0.3896], avg loss: [0.6190], time: [102.8132ms]\n", - "Epoch: [ 2/ 10], step: [ 266/ 390], loss: [0.5145], avg loss: [0.6186], time: [100.1244ms]\n", - "Epoch: [ 2/ 10], step: [ 267/ 390], loss: [0.4265], avg loss: [0.6179], time: [105.5131ms]\n", - "Epoch: [ 2/ 10], step: [ 268/ 390], loss: [0.3818], avg loss: [0.6170], time: [96.6427ms]\n", - "Epoch: [ 2/ 10], step: [ 269/ 390], loss: [0.2814], avg loss: [0.6158], time: [106.8134ms]\n", - "Epoch: [ 2/ 10], step: [ 270/ 390], loss: [0.5369], avg loss: [0.6155], time: [101.3644ms]\n", - "Epoch: [ 2/ 10], step: [ 271/ 390], loss: [0.3595], avg loss: [0.6146], time: [103.8542ms]\n", - "Epoch: [ 2/ 10], step: [ 272/ 390], loss: [0.4517], avg loss: [0.6140], time: [105.8245ms]\n", - "Epoch: [ 2/ 10], step: [ 273/ 390], loss: [0.7099], avg loss: [0.6143], time: [104.4259ms]\n", - "Epoch: [ 2/ 10], step: [ 274/ 390], loss: [0.4052], avg loss: [0.6135], time: [99.7348ms]\n", - "Epoch: [ 2/ 10], step: [ 275/ 390], loss: [0.4128], avg loss: [0.6128], time: [105.0575ms]\n", - "Epoch: [ 2/ 10], step: [ 276/ 390], loss: [0.7017], avg loss: [0.6131], time: [101.4626ms]\n", - "Epoch: [ 2/ 10], step: [ 277/ 390], loss: [0.4718], avg loss: [0.6126], time: [104.1567ms]\n", - "Epoch: [ 2/ 10], step: [ 278/ 390], loss: [0.4687], avg loss: [0.6121], time: [101.0737ms]\n", - "Epoch: [ 2/ 10], step: [ 279/ 390], loss: [0.4270], avg loss: [0.6114], time: [102.0095ms]\n", - "Epoch: [ 2/ 10], step: [ 280/ 390], loss: [0.4992], avg loss: [0.6110], time: [102.6990ms]\n", - "Epoch: [ 2/ 10], step: [ 281/ 390], loss: [0.4861], avg loss: [0.6106], time: [105.7308ms]\n", - "Epoch: [ 2/ 10], step: [ 282/ 390], loss: [0.5556], avg loss: [0.6104], time: [100.7905ms]\n", - "Epoch: [ 2/ 10], step: [ 283/ 390], loss: [0.5015], avg loss: [0.6100], time: [103.3075ms]\n", - "Epoch: [ 2/ 10], step: [ 284/ 390], loss: [0.5049], avg loss: [0.6097], time: [101.5048ms]\n", - "Epoch: [ 2/ 10], step: [ 285/ 390], loss: [0.5007], avg loss: [0.6093], time: [99.9162ms]\n", - "Epoch: [ 2/ 10], step: [ 286/ 390], loss: [0.5154], avg loss: [0.6089], time: [103.2648ms]\n", - "Epoch: [ 2/ 10], step: [ 287/ 390], loss: [0.5927], avg loss: [0.6089], time: [107.6899ms]\n", - "Epoch: [ 2/ 10], step: [ 288/ 390], loss: [0.5553], avg loss: [0.6087], time: [98.1123ms]\n", - "Epoch: [ 2/ 10], step: [ 289/ 390], loss: [0.5091], avg loss: [0.6084], time: [106.3700ms]\n", - "Epoch: [ 2/ 10], step: [ 290/ 390], loss: [0.4555], avg loss: [0.6078], time: [101.5131ms]\n", - "Epoch: [ 2/ 10], step: [ 291/ 390], loss: [0.4482], avg loss: [0.6073], time: [103.6847ms]\n", - "Epoch: [ 2/ 10], step: [ 292/ 390], loss: [0.4880], avg loss: [0.6069], time: [97.4264ms]\n", - "Epoch: [ 2/ 10], step: [ 293/ 390], loss: [0.4739], avg loss: [0.6064], time: [102.1864ms]\n", - "Epoch: [ 2/ 10], step: [ 294/ 390], loss: [0.4351], avg loss: [0.6058], time: [100.0397ms]\n", - "Epoch: [ 2/ 10], step: [ 295/ 390], loss: [0.5434], avg loss: [0.6056], time: [107.4185ms]\n", - "Epoch: [ 2/ 10], step: [ 296/ 390], loss: [0.4808], avg loss: [0.6052], time: [100.1728ms]\n", - "Epoch: [ 2/ 10], step: [ 297/ 390], loss: [0.5042], avg loss: [0.6049], time: [104.5783ms]\n", - "Epoch: [ 2/ 10], step: [ 298/ 390], loss: [0.4165], avg loss: [0.6042], time: [102.1380ms]\n", - "Epoch: [ 2/ 10], step: [ 299/ 390], loss: [0.3246], avg loss: [0.6033], time: [106.3824ms]\n", - "Epoch: [ 2/ 10], step: [ 300/ 390], loss: [0.4363], avg loss: [0.6027], time: [99.7972ms]\n", - "Epoch: [ 2/ 10], step: [ 301/ 390], loss: [0.4205], avg loss: [0.6021], time: [101.7594ms]\n", - "Epoch: [ 2/ 10], step: [ 302/ 390], loss: [0.4846], avg loss: [0.6017], time: [98.7082ms]\n", - "Epoch: [ 2/ 10], step: [ 303/ 390], loss: [0.3752], avg loss: [0.6010], time: [102.6835ms]\n", - "Epoch: [ 2/ 10], step: [ 304/ 390], loss: [0.5174], avg loss: [0.6007], time: [99.9498ms]\n", - "Epoch: [ 2/ 10], step: [ 305/ 390], loss: [0.4815], avg loss: [0.6003], time: [105.4149ms]\n", - "Epoch: [ 2/ 10], step: [ 306/ 390], loss: [0.5788], avg loss: [0.6003], time: [100.3840ms]\n", - "Epoch: [ 2/ 10], step: [ 307/ 390], loss: [0.3501], avg loss: [0.5994], time: [102.5753ms]\n", - "Epoch: [ 2/ 10], step: [ 308/ 390], loss: [0.5348], avg loss: [0.5992], time: [100.6083ms]\n", - "Epoch: [ 2/ 10], step: [ 309/ 390], loss: [0.4691], avg loss: [0.5988], time: [107.1885ms]\n", - "Epoch: [ 2/ 10], step: [ 310/ 390], loss: [0.5035], avg loss: [0.5985], time: [103.5054ms]\n", - "Epoch: [ 2/ 10], step: [ 311/ 390], loss: [0.5681], avg loss: [0.5984], time: [107.4185ms]\n", - "Epoch: [ 2/ 10], step: [ 312/ 390], loss: [0.5657], avg loss: [0.5983], time: [101.8474ms]\n", - "Epoch: [ 2/ 10], step: [ 313/ 390], loss: [0.4784], avg loss: [0.5979], time: [104.7087ms]\n", - "Epoch: [ 2/ 10], step: [ 314/ 390], loss: [0.5547], avg loss: [0.5978], time: [103.3826ms]\n", - "Epoch: [ 2/ 10], step: [ 315/ 390], loss: [0.5812], avg loss: [0.5977], time: [102.4528ms]\n", - "Epoch: [ 2/ 10], step: [ 316/ 390], loss: [0.4795], avg loss: [0.5974], time: [100.2314ms]\n", - "Epoch: [ 2/ 10], step: [ 317/ 390], loss: [0.5181], avg loss: [0.5971], time: [105.5236ms]\n", - "Epoch: [ 2/ 10], step: [ 318/ 390], loss: [0.4481], avg loss: [0.5966], time: [99.0460ms]\n", - "Epoch: [ 2/ 10], step: [ 319/ 390], loss: [0.3989], avg loss: [0.5960], time: [101.9380ms]\n", - "Epoch: [ 2/ 10], step: [ 320/ 390], loss: [0.4208], avg loss: [0.5955], time: [99.8416ms]\n" + "epoch: 2 step: 232, loss is 0.5745\n", + "epoch: 2 step: 233, loss is 0.5614\n", + "epoch: 2 step: 234, loss is 0.5357\n", + "epoch: 2 step: 235, loss is 0.5186\n", + "epoch: 2 step: 236, loss is 0.6700\n", + "epoch: 2 step: 237, loss is 0.5584\n", + "epoch: 2 step: 238, loss is 0.5589\n", + "epoch: 2 step: 239, loss is 0.5363\n", + "epoch: 2 step: 240, loss is 0.5776\n", + "epoch: 2 step: 241, loss is 0.7283\n", + "epoch: 2 step: 242, loss is 0.5002\n", + "epoch: 2 step: 243, loss is 0.5267\n", + "epoch: 2 step: 244, loss is 0.7191\n", + "epoch: 2 step: 245, loss is 0.5527\n", + "epoch: 2 step: 246, loss is 0.6456\n", + "epoch: 2 step: 247, loss is 0.4888\n", + "epoch: 2 step: 248, loss is 0.5648\n", + "epoch: 2 step: 249, loss is 0.5652\n", + "epoch: 2 step: 250, loss is 0.5415\n", + "epoch: 2 step: 251, loss is 0.5158\n", + "epoch: 2 step: 252, loss is 0.6121\n", + "epoch: 2 step: 253, loss is 0.4672\n", + "epoch: 2 step: 254, loss is 0.5177\n", + "epoch: 2 step: 255, loss is 0.5891\n", + "epoch: 2 step: 256, loss is 0.5838\n", + "epoch: 2 step: 257, loss is 0.5129\n", + "epoch: 2 step: 258, loss is 0.4615\n", + "epoch: 2 step: 259, loss is 0.4765\n", + "epoch: 2 step: 260, loss is 0.5161\n", + "epoch: 2 step: 261, loss is 0.5247\n", + "epoch: 2 step: 262, loss is 0.4824\n", + "epoch: 2 step: 263, loss is 0.4950\n", + "epoch: 2 step: 264, loss is 0.4001\n", + "epoch: 2 step: 265, loss is 0.3896\n", + "epoch: 2 step: 266, loss is 0.5145\n", + "epoch: 2 step: 267, loss is 0.4265\n", + "epoch: 2 step: 268, loss is 0.3818\n", + "epoch: 2 step: 269, loss is 0.2814\n", + "epoch: 2 step: 270, loss is 0.5369\n", + "epoch: 2 step: 271, loss is 0.3595\n", + "epoch: 2 step: 272, loss is 0.4517\n", + "epoch: 2 step: 273, loss is 0.7099\n", + "epoch: 2 step: 274, loss is 0.4052\n", + "epoch: 2 step: 275, loss is 0.4128\n", + "epoch: 2 step: 276, loss is 0.7017\n", + "epoch: 2 step: 277, loss is 0.4718\n", + "epoch: 2 step: 278, loss is 0.4687\n", + "epoch: 2 step: 279, loss is 0.4270\n", + "epoch: 2 step: 280, loss is 0.4992\n", + "epoch: 2 step: 281, loss is 0.4861\n", + "epoch: 2 step: 282, loss is 0.5556\n", + "epoch: 2 step: 283, loss is 0.5015\n", + "epoch: 2 step: 284, loss is 0.5049\n", + "epoch: 2 step: 285, loss is 0.5007\n", + "epoch: 2 step: 286, loss is 0.5154\n", + "epoch: 2 step: 287, loss is 0.5927\n", + "epoch: 2 step: 288, loss is 0.5553\n", + "epoch: 2 step: 289, loss is 0.5091\n", + "epoch: 2 step: 290, loss is 0.4555\n", + "epoch: 2 step: 291, loss is 0.4482\n", + "epoch: 2 step: 292, loss is 0.4880\n", + "epoch: 2 step: 293, loss is 0.4739\n", + "epoch: 2 step: 294, loss is 0.4351\n", + "epoch: 2 step: 295, loss is 0.5434\n", + "epoch: 2 step: 296, loss is 0.4808\n", + "epoch: 2 step: 297, loss is 0.5042\n", + "epoch: 2 step: 298, loss is 0.4165\n", + "epoch: 2 step: 299, loss is 0.3246\n", + "epoch: 2 step: 300, loss is 0.4363\n", + "epoch: 2 step: 301, loss is 0.4205\n", + "epoch: 2 step: 302, loss is 0.4846\n", + "epoch: 2 step: 303, loss is 0.3752\n", + "epoch: 2 step: 304, loss is 0.5174\n", + "epoch: 2 step: 305, loss is 0.4815\n", + "epoch: 2 step: 306, loss is 0.5788\n", + "epoch: 2 step: 307, loss is 0.3501\n", + "epoch: 2 step: 308, loss is 0.5348\n", + "epoch: 2 step: 309, loss is 0.4691\n", + "epoch: 2 step: 310, loss is 0.5035\n", + "epoch: 2 step: 311, loss is 0.5681\n", + "epoch: 2 step: 312, loss is 0.5657\n", + "epoch: 2 step: 313, loss is 0.4784\n", + "epoch: 2 step: 314, loss is 0.5547\n", + "epoch: 2 step: 315, loss is 0.5812\n", + "epoch: 2 step: 316, loss is 0.4795\n", + "epoch: 2 step: 317, loss is 0.5181\n", + "epoch: 2 step: 318, loss is 0.4481\n", + "epoch: 2 step: 319, loss is 0.3989\n", + "epoch: 2 step: 320, loss is 0.4208\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 2/ 10], step: [ 321/ 390], loss: [0.3705], avg loss: [0.5948], time: [105.0887ms]\n", - "Epoch: [ 2/ 10], step: [ 322/ 390], loss: [0.4149], avg loss: [0.5942], time: [101.4295ms]\n", - "Epoch: [ 2/ 10], step: [ 323/ 390], loss: [0.4527], avg loss: [0.5938], time: [102.3750ms]\n", - "Epoch: [ 2/ 10], step: [ 324/ 390], loss: [0.3693], avg loss: [0.5931], time: [100.5075ms]\n", - "Epoch: [ 2/ 10], step: [ 325/ 390], loss: [0.4761], avg loss: [0.5927], time: [100.6401ms]\n", - "Epoch: [ 2/ 10], step: [ 326/ 390], loss: [0.3317], avg loss: [0.5919], time: [102.6881ms]\n", - "Epoch: [ 2/ 10], step: [ 327/ 390], loss: [0.5316], avg loss: [0.5917], time: [103.8661ms]\n", - "Epoch: [ 2/ 10], step: [ 328/ 390], loss: [0.4163], avg loss: [0.5912], time: [99.2351ms]\n", - "Epoch: [ 2/ 10], step: [ 329/ 390], loss: [0.3904], avg loss: [0.5906], time: [100.8196ms]\n", - "Epoch: [ 2/ 10], step: [ 330/ 390], loss: [0.6191], avg loss: [0.5907], time: [99.3040ms]\n", - "Epoch: [ 2/ 10], step: [ 331/ 390], loss: [0.3622], avg loss: [0.5900], time: [107.1546ms]\n", - "Epoch: [ 2/ 10], step: [ 332/ 390], loss: [0.4183], avg loss: [0.5895], time: [99.9868ms]\n", - "Epoch: [ 2/ 10], step: [ 333/ 390], loss: [0.5975], avg loss: [0.5895], time: [100.5914ms]\n", - "Epoch: [ 2/ 10], step: [ 334/ 390], loss: [0.3783], avg loss: [0.5889], time: [102.3099ms]\n", - "Epoch: [ 2/ 10], step: [ 335/ 390], loss: [0.4401], avg loss: [0.5884], time: [104.2039ms]\n", - "Epoch: [ 2/ 10], step: [ 336/ 390], loss: [0.3810], avg loss: [0.5878], time: [99.0837ms]\n", - "Epoch: [ 2/ 10], step: [ 337/ 390], loss: [0.3814], avg loss: [0.5872], time: [108.1743ms]\n", - "Epoch: [ 2/ 10], step: [ 338/ 390], loss: [0.4297], avg loss: [0.5867], time: [102.0694ms]\n", - "Epoch: [ 2/ 10], step: [ 339/ 390], loss: [0.2906], avg loss: [0.5858], time: [102.8275ms]\n", - "Epoch: [ 2/ 10], step: [ 340/ 390], loss: [0.3323], avg loss: [0.5851], time: [99.8542ms]\n", - "Epoch: [ 2/ 10], step: [ 341/ 390], loss: [0.4465], avg loss: [0.5847], time: [106.7357ms]\n", - "Epoch: [ 2/ 10], step: [ 342/ 390], loss: [0.4510], avg loss: [0.5843], time: [101.4016ms]\n", - "Epoch: [ 2/ 10], step: [ 343/ 390], loss: [0.4552], avg loss: [0.5839], time: [103.0991ms]\n", - "Epoch: [ 2/ 10], step: [ 344/ 390], loss: [0.3955], avg loss: [0.5834], time: [97.3434ms]\n", - "Epoch: [ 2/ 10], step: [ 345/ 390], loss: [0.3395], avg loss: [0.5827], time: [105.7241ms]\n", - "Epoch: [ 2/ 10], step: [ 346/ 390], loss: [0.5065], avg loss: [0.5825], time: [69.6723ms]\n", - "Epoch: [ 2/ 10], step: [ 347/ 390], loss: [0.4705], avg loss: [0.5821], time: [106.5094ms]\n", - "Epoch: [ 2/ 10], step: [ 348/ 390], loss: [0.4732], avg loss: [0.5818], time: [101.4545ms]\n", - "Epoch: [ 2/ 10], step: [ 349/ 390], loss: [0.3764], avg loss: [0.5812], time: [105.5155ms]\n", - "Epoch: [ 2/ 10], step: [ 350/ 390], loss: [0.3716], avg loss: [0.5806], time: [105.5231ms]\n", - "Epoch: [ 2/ 10], step: [ 351/ 390], loss: [0.4724], avg loss: [0.5803], time: [104.2888ms]\n", - "Epoch: [ 2/ 10], step: [ 352/ 390], loss: [0.3549], avg loss: [0.5797], time: [102.2933ms]\n", - "Epoch: [ 2/ 10], step: [ 353/ 390], loss: [0.4010], avg loss: [0.5792], time: [103.0416ms]\n", - "Epoch: [ 2/ 10], step: [ 354/ 390], loss: [0.4539], avg loss: [0.5788], time: [98.9168ms]\n", - "Epoch: [ 2/ 10], step: [ 355/ 390], loss: [0.5552], avg loss: [0.5788], time: [103.7295ms]\n", - "Epoch: [ 2/ 10], step: [ 356/ 390], loss: [0.3861], avg loss: [0.5782], time: [101.1715ms]\n", - "Epoch: [ 2/ 10], step: [ 357/ 390], loss: [0.4465], avg loss: [0.5778], time: [104.1949ms]\n", - "Epoch: [ 2/ 10], step: [ 358/ 390], loss: [0.3775], avg loss: [0.5773], time: [97.9333ms]\n", - "Epoch: [ 2/ 10], step: [ 359/ 390], loss: [0.5041], avg loss: [0.5771], time: [107.3239ms]\n", - "Epoch: [ 2/ 10], step: [ 360/ 390], loss: [0.4034], avg loss: [0.5766], time: [101.4485ms]\n", - "Epoch: [ 2/ 10], step: [ 361/ 390], loss: [0.3989], avg loss: [0.5761], time: [102.8931ms]\n", - "Epoch: [ 2/ 10], step: [ 362/ 390], loss: [0.4578], avg loss: [0.5758], time: [101.5456ms]\n", - "Epoch: [ 2/ 10], step: [ 363/ 390], loss: [0.4256], avg loss: [0.5754], time: [101.1083ms]\n", - "Epoch: [ 2/ 10], step: [ 364/ 390], loss: [0.4483], avg loss: [0.5750], time: [101.5882ms]\n", - "Epoch: [ 2/ 10], step: [ 365/ 390], loss: [0.5041], avg loss: [0.5748], time: [106.0119ms]\n", - "Epoch: [ 2/ 10], step: [ 366/ 390], loss: [0.4134], avg loss: [0.5744], time: [103.4594ms]\n", - "Epoch: [ 2/ 10], step: [ 367/ 390], loss: [0.5226], avg loss: [0.5742], time: [104.2194ms]\n", - "Epoch: [ 2/ 10], step: [ 368/ 390], loss: [0.3384], avg loss: [0.5736], time: [99.5936ms]\n", - "Epoch: [ 2/ 10], step: [ 369/ 390], loss: [0.4365], avg loss: [0.5732], time: [105.2415ms]\n", - "Epoch: [ 2/ 10], step: [ 370/ 390], loss: [0.3390], avg loss: [0.5726], time: [98.5243ms]\n", - "Epoch: [ 2/ 10], step: [ 371/ 390], loss: [0.3794], avg loss: [0.5721], time: [103.2960ms]\n", - "Epoch: [ 2/ 10], step: [ 372/ 390], loss: [0.4667], avg loss: [0.5718], time: [100.5466ms]\n", - "Epoch: [ 2/ 10], step: [ 373/ 390], loss: [0.2798], avg loss: [0.5710], time: [102.4997ms]\n", - "Epoch: [ 2/ 10], step: [ 374/ 390], loss: [0.4289], avg loss: [0.5706], time: [98.2299ms]\n", - "Epoch: [ 2/ 10], step: [ 375/ 390], loss: [0.4372], avg loss: [0.5703], time: [104.4865ms]\n", - "Epoch: [ 2/ 10], step: [ 376/ 390], loss: [0.3608], avg loss: [0.5697], time: [102.8581ms]\n", - "Epoch: [ 2/ 10], step: [ 377/ 390], loss: [0.3193], avg loss: [0.5691], time: [105.5598ms]\n", - "Epoch: [ 2/ 10], step: [ 378/ 390], loss: [0.3597], avg loss: [0.5685], time: [100.1291ms]\n", - "Epoch: [ 2/ 10], step: [ 379/ 390], loss: [0.4859], avg loss: [0.5683], time: [101.2230ms]\n", - "Epoch: [ 2/ 10], step: [ 380/ 390], loss: [0.3780], avg loss: [0.5678], time: [99.0679ms]\n", - "Epoch: [ 2/ 10], step: [ 381/ 390], loss: [0.3072], avg loss: [0.5671], time: [103.2448ms]\n", - "Epoch: [ 2/ 10], step: [ 382/ 390], loss: [0.4727], avg loss: [0.5668], time: [99.4911ms]\n", - "Epoch: [ 2/ 10], step: [ 383/ 390], loss: [0.4112], avg loss: [0.5664], time: [104.1186ms]\n", - "Epoch: [ 2/ 10], step: [ 384/ 390], loss: [0.4523], avg loss: [0.5661], time: [103.1640ms]\n", - "Epoch: [ 2/ 10], step: [ 385/ 390], loss: [0.3574], avg loss: [0.5656], time: [100.5161ms]\n", - "Epoch: [ 2/ 10], step: [ 386/ 390], loss: [0.3551], avg loss: [0.5651], time: [102.0844ms]\n", - "Epoch: [ 2/ 10], step: [ 387/ 390], loss: [0.5766], avg loss: [0.5651], time: [105.4604ms]\n", - "Epoch: [ 2/ 10], step: [ 388/ 390], loss: [0.5247], avg loss: [0.5650], time: [98.8855ms]\n", - "Epoch: [ 2/ 10], step: [ 389/ 390], loss: [0.4281], avg loss: [0.5646], time: [99.1461ms]\n", - "Epoch: [ 2/ 10], step: [ 390/ 390], loss: [0.4206], avg loss: [0.5643], time: [917.7203ms]\n", - "Epoch time: 41080.828, per step time: 105.335\n", + "epoch: 2 step: 321, loss is 0.3705\n", + "epoch: 2 step: 322, loss is 0.4149\n", + "epoch: 2 step: 323, loss is 0.4527\n", + "epoch: 2 step: 324, loss is 0.3693\n", + "epoch: 2 step: 325, loss is 0.4761\n", + "epoch: 2 step: 326, loss is 0.3317\n", + "epoch: 2 step: 327, loss is 0.5316\n", + "epoch: 2 step: 328, loss is 0.4163\n", + "epoch: 2 step: 329, loss is 0.3904\n", + "epoch: 2 step: 330, loss is 0.6191\n", + "epoch: 2 step: 331, loss is 0.3622\n", + "epoch: 2 step: 332, loss is 0.4183\n", + "epoch: 2 step: 333, loss is 0.5975\n", + "epoch: 2 step: 334, loss is 0.3783\n", + "epoch: 2 step: 335, loss is 0.4401\n", + "epoch: 2 step: 336, loss is 0.3810\n", + "epoch: 2 step: 337, loss is 0.3814\n", + "epoch: 2 step: 338, loss is 0.4297\n", + "epoch: 2 step: 339, loss is 0.2906\n", + "epoch: 2 step: 340, loss is 0.3323\n", + "epoch: 2 step: 341, loss is 0.4465\n", + "epoch: 2 step: 342, loss is 0.4510\n", + "epoch: 2 step: 343, loss is 0.4552\n", + "epoch: 2 step: 344, loss is 0.3955\n", + "epoch: 2 step: 345, loss is 0.3395\n", + "epoch: 2 step: 346, loss is 0.5065\n", + "epoch: 2 step: 347, loss is 0.4705\n", + "epoch: 2 step: 348, loss is 0.4732\n", + "epoch: 2 step: 349, loss is 0.3764\n", + "epoch: 2 step: 350, loss is 0.3716\n", + "epoch: 2 step: 351, loss is 0.4724\n", + "epoch: 2 step: 352, loss is 0.3549\n", + "epoch: 2 step: 353, loss is 0.4010\n", + "epoch: 2 step: 354, loss is 0.4539\n", + "epoch: 2 step: 355, loss is 0.5552\n", + "epoch: 2 step: 356, loss is 0.3861\n", + "epoch: 2 step: 357, loss is 0.4465\n", + "epoch: 2 step: 358, loss is 0.3775\n", + "epoch: 2 step: 359, loss is 0.5041\n", + "epoch: 2 step: 360, loss is 0.4034\n", + "epoch: 2 step: 361, loss is 0.3989\n", + "epoch: 2 step: 362, loss is 0.4578\n", + "epoch: 2 step: 363, loss is 0.4256\n", + "epoch: 2 step: 364, loss is 0.4483\n", + "epoch: 2 step: 365, loss is 0.5041\n", + "epoch: 2 step: 366, loss is 0.4134\n", + "epoch: 2 step: 367, loss is 0.5226\n", + "epoch: 2 step: 368, loss is 0.3384\n", + "epoch: 2 step: 369, loss is 0.4365\n", + "epoch: 2 step: 370, loss is 0.3390\n", + "epoch: 2 step: 371, loss is 0.3794\n", + "epoch: 2 step: 372, loss is 0.4667\n", + "epoch: 2 step: 373, loss is 0.2798\n", + "epoch: 2 step: 374, loss is 0.4289\n", + "epoch: 2 step: 375, loss is 0.4372\n", + "epoch: 2 step: 376, loss is 0.3608\n", + "epoch: 2 step: 377, loss is 0.3193\n", + "epoch: 2 step: 378, loss is 0.3597\n", + "epoch: 2 step: 379, loss is 0.4859\n", + "epoch: 2 step: 380, loss is 0.3780\n", + "epoch: 2 step: 381, loss is 0.3072\n", + "epoch: 2 step: 382, loss is 0.4727\n", + "epoch: 2 step: 383, loss is 0.4112\n", + "epoch: 2 step: 384, loss is 0.4523\n", + "epoch: 2 step: 385, loss is 0.3574\n", + "epoch: 2 step: 386, loss is 0.3551\n", + "epoch: 2 step: 387, loss is 0.5766\n", + "epoch: 2 step: 388, loss is 0.5247\n", + "epoch: 2 step: 389, loss is 0.4281\n", + "epoch: 2 step: 390, loss is 0.4206\n", "Epoch time: 41081.172, per step time: 105.336, avg loss: 0.564\n", "************************************************************\n", - "Epoch: [ 3/ 10], step: [ 1/ 390], loss: [0.3717], avg loss: [0.3717], time: [101.0344ms]\n", - "Epoch: [ 3/ 10], step: [ 2/ 390], loss: [0.4016], avg loss: [0.3867], time: [104.5682ms]\n", - "Epoch: [ 3/ 10], step: [ 3/ 390], loss: [0.4964], avg loss: [0.4233], time: [104.6176ms]\n", - "Epoch: [ 3/ 10], step: [ 4/ 390], loss: [0.4364], avg loss: [0.4265], time: [103.5764ms]\n", - "Epoch: [ 3/ 10], step: [ 5/ 390], loss: [0.4573], avg loss: [0.4327], time: [108.1645ms]\n", - "Epoch: [ 3/ 10], step: [ 6/ 390], loss: [0.4915], avg loss: [0.4425], time: [106.4374ms]\n", - "Epoch: [ 3/ 10], step: [ 7/ 390], loss: [0.3635], avg loss: [0.4312], time: [107.9173ms]\n", - "Epoch: [ 3/ 10], step: [ 8/ 390], loss: [0.4102], avg loss: [0.4286], time: [102.2503ms]\n", - "Epoch: [ 3/ 10], step: [ 9/ 390], loss: [0.4057], avg loss: [0.4260], time: [107.1496ms]\n", - "Epoch: [ 3/ 10], step: [ 10/ 390], loss: [0.4424], avg loss: [0.4277], time: [102.6711ms]\n", - "Epoch: [ 3/ 10], step: [ 11/ 390], loss: [0.4570], avg loss: [0.4303], time: [107.5852ms]\n", - "Epoch: [ 3/ 10], step: [ 12/ 390], loss: [0.4399], avg loss: [0.4311], time: [105.2186ms]\n", - "Epoch: [ 3/ 10], step: [ 13/ 390], loss: [0.3412], avg loss: [0.4242], time: [104.2027ms]\n", - "Epoch: [ 3/ 10], step: [ 14/ 390], loss: [0.4659], avg loss: [0.4272], time: [106.1070ms]\n", - "Epoch: [ 3/ 10], step: [ 15/ 390], loss: [0.5166], avg loss: [0.4332], time: [106.4236ms]\n", - "Epoch: [ 3/ 10], step: [ 16/ 390], loss: [0.3432], avg loss: [0.4275], time: [101.7649ms]\n", - "Epoch: [ 3/ 10], step: [ 17/ 390], loss: [0.2530], avg loss: [0.4173], time: [104.6968ms]\n" + "epoch: 3 step: 1, loss is 0.3717\n", + "epoch: 3 step: 2, loss is 0.4016\n", + "epoch: 3 step: 3, loss is 0.4964\n", + "epoch: 3 step: 4, loss is 0.4364\n", + "epoch: 3 step: 5, loss is 0.4573\n", + "epoch: 3 step: 6, loss is 0.4915\n", + "epoch: 3 step: 7, loss is 0.3635\n", + "epoch: 3 step: 8, loss is 0.4102\n", + "epoch: 3 step: 9, loss is 0.4057\n", + "epoch: 3 step: 10, loss is 0.4424\n", + "epoch: 3 step: 11, loss is 0.4570\n", + "epoch: 3 step: 12, loss is 0.4399\n", + "epoch: 3 step: 13, loss is 0.3412\n", + "epoch: 3 step: 14, loss is 0.4659\n", + "epoch: 3 step: 15, loss is 0.5166\n", + "epoch: 3 step: 16, loss is 0.3432\n", + "epoch: 3 step: 17, loss is 0.2530\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 3/ 10], step: [ 18/ 390], loss: [0.3993], avg loss: [0.4163], time: [104.6028ms]\n", - "Epoch: [ 3/ 10], step: [ 19/ 390], loss: [0.4321], avg loss: [0.4171], time: [106.9975ms]\n", - "Epoch: [ 3/ 10], step: [ 20/ 390], loss: [0.3459], avg loss: [0.4135], time: [101.6135ms]\n", - "Epoch: [ 3/ 10], step: [ 21/ 390], loss: [0.3473], avg loss: [0.4104], time: [106.1561ms]\n", - "Epoch: [ 3/ 10], step: [ 22/ 390], loss: [0.4423], avg loss: [0.4118], time: [102.2394ms]\n", - "Epoch: [ 3/ 10], step: [ 23/ 390], loss: [0.5265], avg loss: [0.4168], time: [106.6220ms]\n", - "Epoch: [ 3/ 10], step: [ 24/ 390], loss: [0.4170], avg loss: [0.4168], time: [105.6414ms]\n", - "Epoch: [ 3/ 10], step: [ 25/ 390], loss: [0.4483], avg loss: [0.4181], time: [108.6771ms]\n", - "Epoch: [ 3/ 10], step: [ 26/ 390], loss: [0.5304], avg loss: [0.4224], time: [107.1980ms]\n", - "Epoch: [ 3/ 10], step: [ 27/ 390], loss: [0.4433], avg loss: [0.4232], time: [105.8927ms]\n", - "Epoch: [ 3/ 10], step: [ 28/ 390], loss: [0.4486], avg loss: [0.4241], time: [100.7690ms]\n", - "Epoch: [ 3/ 10], step: [ 29/ 390], loss: [0.3785], avg loss: [0.4225], time: [104.8286ms]\n", - "Epoch: [ 3/ 10], step: [ 30/ 390], loss: [0.4524], avg loss: [0.4235], time: [100.2972ms]\n", - "Epoch: [ 3/ 10], step: [ 31/ 390], loss: [0.4300], avg loss: [0.4237], time: [104.8031ms]\n", - "Epoch: [ 3/ 10], step: [ 32/ 390], loss: [0.3490], avg loss: [0.4214], time: [102.2110ms]\n", - "Epoch: [ 3/ 10], step: [ 33/ 390], loss: [0.4418], avg loss: [0.4220], time: [104.5287ms]\n", - "Epoch: [ 3/ 10], step: [ 34/ 390], loss: [0.4400], avg loss: [0.4225], time: [105.4187ms]\n", - "Epoch: [ 3/ 10], step: [ 35/ 390], loss: [0.4215], avg loss: [0.4225], time: [106.2911ms]\n", - "Epoch: [ 3/ 10], step: [ 36/ 390], loss: [0.4959], avg loss: [0.4245], time: [103.3683ms]\n", - "Epoch: [ 3/ 10], step: [ 37/ 390], loss: [0.4083], avg loss: [0.4241], time: [105.6094ms]\n", - "Epoch: [ 3/ 10], step: [ 38/ 390], loss: [0.3641], avg loss: [0.4225], time: [103.6904ms]\n", - "Epoch: [ 3/ 10], step: [ 39/ 390], loss: [0.4726], avg loss: [0.4238], time: [104.6524ms]\n", - "Epoch: [ 3/ 10], step: [ 40/ 390], loss: [0.3642], avg loss: [0.4223], time: [102.7660ms]\n", - "Epoch: [ 3/ 10], step: [ 41/ 390], loss: [0.4058], avg loss: [0.4219], time: [105.2840ms]\n", - "Epoch: [ 3/ 10], step: [ 42/ 390], loss: [0.4929], avg loss: [0.4236], time: [107.5082ms]\n", - "Epoch: [ 3/ 10], step: [ 43/ 390], loss: [0.3960], avg loss: [0.4230], time: [104.9497ms]\n", - "Epoch: [ 3/ 10], step: [ 44/ 390], loss: [0.5293], avg loss: [0.4254], time: [101.9835ms]\n", - "Epoch: [ 3/ 10], step: [ 45/ 390], loss: [0.4512], avg loss: [0.4260], time: [104.2707ms]\n", - "Epoch: [ 3/ 10], step: [ 46/ 390], loss: [0.4348], avg loss: [0.4261], time: [104.0783ms]\n", - "Epoch: [ 3/ 10], step: [ 47/ 390], loss: [0.3913], avg loss: [0.4254], time: [110.6560ms]\n", - "Epoch: [ 3/ 10], step: [ 48/ 390], loss: [0.5439], avg loss: [0.4279], time: [106.7963ms]\n", - "Epoch: [ 3/ 10], step: [ 49/ 390], loss: [0.3946], avg loss: [0.4272], time: [104.5280ms]\n", - "Epoch: [ 3/ 10], step: [ 50/ 390], loss: [0.3742], avg loss: [0.4261], time: [101.4905ms]\n", - "Epoch: [ 3/ 10], step: [ 51/ 390], loss: [0.3904], avg loss: [0.4254], time: [108.8462ms]\n", - "Epoch: [ 3/ 10], step: [ 52/ 390], loss: [0.3143], avg loss: [0.4233], time: [105.3758ms]\n", - "Epoch: [ 3/ 10], step: [ 53/ 390], loss: [0.3225], avg loss: [0.4214], time: [104.1429ms]\n", - "Epoch: [ 3/ 10], step: [ 54/ 390], loss: [0.5099], avg loss: [0.4230], time: [105.7603ms]\n", - "Epoch: [ 3/ 10], step: [ 55/ 390], loss: [0.3449], avg loss: [0.4216], time: [104.8827ms]\n", - "Epoch: [ 3/ 10], step: [ 56/ 390], loss: [0.3859], avg loss: [0.4210], time: [103.6806ms]\n", - "Epoch: [ 3/ 10], step: [ 57/ 390], loss: [0.3710], avg loss: [0.4201], time: [106.0722ms]\n", - "Epoch: [ 3/ 10], step: [ 58/ 390], loss: [0.3936], avg loss: [0.4196], time: [100.5962ms]\n", - "Epoch: [ 3/ 10], step: [ 59/ 390], loss: [0.2827], avg loss: [0.4173], time: [106.4043ms]\n", - "Epoch: [ 3/ 10], step: [ 60/ 390], loss: [0.2523], avg loss: [0.4146], time: [104.5611ms]\n", - "Epoch: [ 3/ 10], step: [ 61/ 390], loss: [0.2955], avg loss: [0.4126], time: [104.2995ms]\n", - "Epoch: [ 3/ 10], step: [ 62/ 390], loss: [0.3792], avg loss: [0.4121], time: [105.1340ms]\n", - "Epoch: [ 3/ 10], step: [ 63/ 390], loss: [0.3951], avg loss: [0.4118], time: [104.1362ms]\n", - "Epoch: [ 3/ 10], step: [ 64/ 390], loss: [0.3538], avg loss: [0.4109], time: [102.9949ms]\n", - "Epoch: [ 3/ 10], step: [ 65/ 390], loss: [0.2615], avg loss: [0.4086], time: [103.0068ms]\n", - "Epoch: [ 3/ 10], step: [ 66/ 390], loss: [0.2563], avg loss: [0.4063], time: [102.3242ms]\n", - "Epoch: [ 3/ 10], step: [ 67/ 390], loss: [0.3461], avg loss: [0.4054], time: [109.4780ms]\n", - "Epoch: [ 3/ 10], step: [ 68/ 390], loss: [0.4189], avg loss: [0.4056], time: [101.6436ms]\n", - "Epoch: [ 3/ 10], step: [ 69/ 390], loss: [0.1861], avg loss: [0.4024], time: [108.0732ms]\n", - "Epoch: [ 3/ 10], step: [ 70/ 390], loss: [0.5654], avg loss: [0.4047], time: [100.9367ms]\n", - "Epoch: [ 3/ 10], step: [ 71/ 390], loss: [0.3408], avg loss: [0.4038], time: [109.4954ms]\n", - "Epoch: [ 3/ 10], step: [ 72/ 390], loss: [0.4145], avg loss: [0.4040], time: [101.1722ms]\n", - "Epoch: [ 3/ 10], step: [ 73/ 390], loss: [0.3291], avg loss: [0.4030], time: [104.6176ms]\n", - "Epoch: [ 3/ 10], step: [ 74/ 390], loss: [0.3935], avg loss: [0.4028], time: [103.2479ms]\n", - "Epoch: [ 3/ 10], step: [ 75/ 390], loss: [0.4106], avg loss: [0.4029], time: [103.3683ms]\n", - "Epoch: [ 3/ 10], step: [ 76/ 390], loss: [0.4341], avg loss: [0.4033], time: [107.6040ms]\n", - "Epoch: [ 3/ 10], step: [ 77/ 390], loss: [0.3573], avg loss: [0.4028], time: [105.2480ms]\n", - "Epoch: [ 3/ 10], step: [ 78/ 390], loss: [0.2479], avg loss: [0.4008], time: [101.9013ms]\n", - "Epoch: [ 3/ 10], step: [ 79/ 390], loss: [0.3640], avg loss: [0.4003], time: [106.1730ms]\n", - "Epoch: [ 3/ 10], step: [ 80/ 390], loss: [0.2931], avg loss: [0.3990], time: [105.1567ms]\n", - "Epoch: [ 3/ 10], step: [ 81/ 390], loss: [0.4537], avg loss: [0.3996], time: [106.9849ms]\n", - "Epoch: [ 3/ 10], step: [ 82/ 390], loss: [0.3663], avg loss: [0.3992], time: [102.8011ms]\n", - "Epoch: [ 3/ 10], step: [ 83/ 390], loss: [0.4545], avg loss: [0.3999], time: [107.5125ms]\n", - "Epoch: [ 3/ 10], step: [ 84/ 390], loss: [0.3072], avg loss: [0.3988], time: [105.4533ms]\n", - "Epoch: [ 3/ 10], step: [ 85/ 390], loss: [0.3475], avg loss: [0.3982], time: [103.9536ms]\n", - "Epoch: [ 3/ 10], step: [ 86/ 390], loss: [0.3380], avg loss: [0.3975], time: [103.4360ms]\n", - "Epoch: [ 3/ 10], step: [ 87/ 390], loss: [0.3027], avg loss: [0.3964], time: [104.1820ms]\n", - "Epoch: [ 3/ 10], step: [ 88/ 390], loss: [0.3898], avg loss: [0.3963], time: [102.8357ms]\n", - "Epoch: [ 3/ 10], step: [ 89/ 390], loss: [0.3724], avg loss: [0.3961], time: [109.4525ms]\n", - "Epoch: [ 3/ 10], step: [ 90/ 390], loss: [0.3696], avg loss: [0.3958], time: [103.3630ms]\n", - "Epoch: [ 3/ 10], step: [ 91/ 390], loss: [0.5897], avg loss: [0.3979], time: [103.6150ms]\n", - "Epoch: [ 3/ 10], step: [ 92/ 390], loss: [0.3328], avg loss: [0.3972], time: [105.8285ms]\n", - "Epoch: [ 3/ 10], step: [ 93/ 390], loss: [0.4406], avg loss: [0.3977], time: [104.1517ms]\n", - "Epoch: [ 3/ 10], step: [ 94/ 390], loss: [0.3753], avg loss: [0.3974], time: [106.1327ms]\n", - "Epoch: [ 3/ 10], step: [ 95/ 390], loss: [0.4312], avg loss: [0.3978], time: [102.4258ms]\n", - "Epoch: [ 3/ 10], step: [ 96/ 390], loss: [0.2916], avg loss: [0.3967], time: [105.0375ms]\n", - "Epoch: [ 3/ 10], step: [ 97/ 390], loss: [0.4791], avg loss: [0.3975], time: [104.1269ms]\n", - "Epoch: [ 3/ 10], step: [ 98/ 390], loss: [0.4071], avg loss: [0.3976], time: [102.0269ms]\n", - "Epoch: [ 3/ 10], step: [ 99/ 390], loss: [0.3603], avg loss: [0.3972], time: [102.6518ms]\n", - "Epoch: [ 3/ 10], step: [ 100/ 390], loss: [0.2947], avg loss: [0.3962], time: [102.4597ms]\n", - "Epoch: [ 3/ 10], step: [ 101/ 390], loss: [0.3169], avg loss: [0.3954], time: [108.8419ms]\n", - "Epoch: [ 3/ 10], step: [ 102/ 390], loss: [0.3696], avg loss: [0.3952], time: [104.0246ms]\n", - "Epoch: [ 3/ 10], step: [ 103/ 390], loss: [0.3359], avg loss: [0.3946], time: [108.8769ms]\n", - "Epoch: [ 3/ 10], step: [ 104/ 390], loss: [0.3557], avg loss: [0.3942], time: [102.0548ms]\n", - "Epoch: [ 3/ 10], step: [ 105/ 390], loss: [0.4236], avg loss: [0.3945], time: [103.1075ms]\n", - "Epoch: [ 3/ 10], step: [ 106/ 390], loss: [0.3706], avg loss: [0.3943], time: [103.7037ms]\n" + "epoch: 3 step: 18, loss is 0.3993\n", + "epoch: 3 step: 19, loss is 0.4321\n", + "epoch: 3 step: 20, loss is 0.3459\n", + "epoch: 3 step: 21, loss is 0.3473\n", + "epoch: 3 step: 22, loss is 0.4423\n", + "epoch: 3 step: 23, loss is 0.5265\n", + "epoch: 3 step: 24, loss is 0.4170\n", + "epoch: 3 step: 25, loss is 0.4483\n", + "epoch: 3 step: 26, loss is 0.5304\n", + "epoch: 3 step: 27, loss is 0.4433\n", + "epoch: 3 step: 28, loss is 0.4486\n", + "epoch: 3 step: 29, loss is 0.3785\n", + "epoch: 3 step: 30, loss is 0.4524\n", + "epoch: 3 step: 31, loss is 0.4300\n", + "epoch: 3 step: 32, loss is 0.3490\n", + "epoch: 3 step: 33, loss is 0.4418\n", + "epoch: 3 step: 34, loss is 0.4400\n", + "epoch: 3 step: 35, loss is 0.4215\n", + "epoch: 3 step: 36, loss is 0.4959\n", + "epoch: 3 step: 37, loss is 0.4083\n", + "epoch: 3 step: 38, loss is 0.3641\n", + "epoch: 3 step: 39, loss is 0.4726\n", + "epoch: 3 step: 40, loss is 0.3642\n", + "epoch: 3 step: 41, loss is 0.4058\n", + "epoch: 3 step: 42, loss is 0.4929\n", + "epoch: 3 step: 43, loss is 0.3960\n", + "epoch: 3 step: 44, loss is 0.5293\n", + "epoch: 3 step: 45, loss is 0.4512\n", + "epoch: 3 step: 46, loss is 0.4348\n", + "epoch: 3 step: 47, loss is 0.3913\n", + "epoch: 3 step: 48, loss is 0.5439\n", + "epoch: 3 step: 49, loss is 0.3946\n", + "epoch: 3 step: 50, loss is 0.3742\n", + "epoch: 3 step: 51, loss is 0.3904\n", + "epoch: 3 step: 52, loss is 0.3143\n", + "epoch: 3 step: 53, loss is 0.3225\n", + "epoch: 3 step: 54, loss is 0.5099\n", + "epoch: 3 step: 55, loss is 0.3449\n", + "epoch: 3 step: 56, loss is 0.3859\n", + "epoch: 3 step: 57, loss is 0.3710\n", + "epoch: 3 step: 58, loss is 0.3936\n", + "epoch: 3 step: 59, loss is 0.2827\n", + "epoch: 3 step: 60, loss is 0.2523\n", + "epoch: 3 step: 61, loss is 0.2955\n", + "epoch: 3 step: 62, loss is 0.3792\n", + "epoch: 3 step: 63, loss is 0.3951\n", + "epoch: 3 step: 64, loss is 0.3538\n", + "epoch: 3 step: 65, loss is 0.2615\n", + "epoch: 3 step: 66, loss is 0.2563\n", + "epoch: 3 step: 67, loss is 0.3461\n", + "epoch: 3 step: 68, loss is 0.4189\n", + "epoch: 3 step: 69, loss is 0.1861\n", + "epoch: 3 step: 70, loss is 0.5654\n", + "epoch: 3 step: 71, loss is 0.3408\n", + "epoch: 3 step: 72, loss is 0.4145\n", + "epoch: 3 step: 73, loss is 0.3291\n", + "epoch: 3 step: 74, loss is 0.3935\n", + "epoch: 3 step: 75, loss is 0.4106\n", + "epoch: 3 step: 76, loss is 0.4341\n", + "epoch: 3 step: 77, loss is 0.3573\n", + "epoch: 3 step: 78, loss is 0.2479\n", + "epoch: 3 step: 79, loss is 0.3640\n", + "epoch: 3 step: 80, loss is 0.2931\n", + "epoch: 3 step: 81, loss is 0.4537\n", + "epoch: 3 step: 82, loss is 0.3663\n", + "epoch: 3 step: 83, loss is 0.4545\n", + "epoch: 3 step: 84, loss is 0.3072\n", + "epoch: 3 step: 85, loss is 0.3475\n", + "epoch: 3 step: 86, loss is 0.3380\n", + "epoch: 3 step: 87, loss is 0.3027\n", + "epoch: 3 step: 88, loss is 0.3898\n", + "epoch: 3 step: 89, loss is 0.3724\n", + "epoch: 3 step: 90, loss is 0.3696\n", + "epoch: 3 step: 91, loss is 0.5897\n", + "epoch: 3 step: 92, loss is 0.3328\n", + "epoch: 3 step: 93, loss is 0.4406\n", + "epoch: 3 step: 94, loss is 0.3753\n", + "epoch: 3 step: 95, loss is 0.4312\n", + "epoch: 3 step: 96, loss is 0.2916\n", + "epoch: 3 step: 97, loss is 0.4791\n", + "epoch: 3 step: 98, loss is 0.4071\n", + "epoch: 3 step: 99, loss is 0.3603\n", + "epoch: 3 step: 100, loss is 0.2947\n", + "epoch: 3 step: 101, loss is 0.3169\n", + "epoch: 3 step: 102, loss is 0.3696\n", + "epoch: 3 step: 103, loss is 0.3359\n", + "epoch: 3 step: 104, loss is 0.3557\n", + "epoch: 3 step: 105, loss is 0.4236\n", + "epoch: 3 step: 106, loss is 0.3706\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 3/ 10], step: [ 107/ 390], loss: [0.4050], avg loss: [0.3944], time: [105.6411ms]\n", - "Epoch: [ 3/ 10], step: [ 108/ 390], loss: [0.4224], avg loss: [0.3946], time: [105.2477ms]\n", - "Epoch: [ 3/ 10], step: [ 109/ 390], loss: [0.3945], avg loss: [0.3946], time: [104.9845ms]\n", - "Epoch: [ 3/ 10], step: [ 110/ 390], loss: [0.3166], avg loss: [0.3939], time: [102.7188ms]\n", - "Epoch: [ 3/ 10], step: [ 111/ 390], loss: [0.4504], avg loss: [0.3944], time: [106.3836ms]\n", - "Epoch: [ 3/ 10], step: [ 112/ 390], loss: [0.4167], avg loss: [0.3946], time: [105.3114ms]\n", - "Epoch: [ 3/ 10], step: [ 113/ 390], loss: [0.4151], avg loss: [0.3948], time: [104.2454ms]\n", - "Epoch: [ 3/ 10], step: [ 114/ 390], loss: [0.4592], avg loss: [0.3954], time: [101.6955ms]\n", - "Epoch: [ 3/ 10], step: [ 115/ 390], loss: [0.4591], avg loss: [0.3959], time: [108.1009ms]\n", - "Epoch: [ 3/ 10], step: [ 116/ 390], loss: [0.4377], avg loss: [0.3963], time: [102.1514ms]\n", - "Epoch: [ 3/ 10], step: [ 117/ 390], loss: [0.3935], avg loss: [0.3963], time: [105.4153ms]\n", - "Epoch: [ 3/ 10], step: [ 118/ 390], loss: [0.4603], avg loss: [0.3968], time: [106.0941ms]\n", - "Epoch: [ 3/ 10], step: [ 119/ 390], loss: [0.4321], avg loss: [0.3971], time: [105.8371ms]\n", - "Epoch: [ 3/ 10], step: [ 120/ 390], loss: [0.3649], avg loss: [0.3968], time: [102.9572ms]\n", - "Epoch: [ 3/ 10], step: [ 121/ 390], loss: [0.2203], avg loss: [0.3954], time: [109.3593ms]\n", - "Epoch: [ 3/ 10], step: [ 122/ 390], loss: [0.4187], avg loss: [0.3956], time: [103.2066ms]\n", - "Epoch: [ 3/ 10], step: [ 123/ 390], loss: [0.4314], avg loss: [0.3959], time: [108.8202ms]\n", - "Epoch: [ 3/ 10], step: [ 124/ 390], loss: [0.4402], avg loss: [0.3962], time: [100.9719ms]\n", - "Epoch: [ 3/ 10], step: [ 125/ 390], loss: [0.4183], avg loss: [0.3964], time: [104.0988ms]\n", - "Epoch: [ 3/ 10], step: [ 126/ 390], loss: [0.2995], avg loss: [0.3956], time: [105.5896ms]\n", - "Epoch: [ 3/ 10], step: [ 127/ 390], loss: [0.5258], avg loss: [0.3966], time: [104.3465ms]\n", - "Epoch: [ 3/ 10], step: [ 128/ 390], loss: [0.3425], avg loss: [0.3962], time: [103.1077ms]\n", - "Epoch: [ 3/ 10], step: [ 129/ 390], loss: [0.4904], avg loss: [0.3970], time: [105.9737ms]\n", - "Epoch: [ 3/ 10], step: [ 130/ 390], loss: [0.3656], avg loss: [0.3967], time: [102.1028ms]\n", - "Epoch: [ 3/ 10], step: [ 131/ 390], loss: [0.2937], avg loss: [0.3959], time: [102.6547ms]\n", - "Epoch: [ 3/ 10], step: [ 132/ 390], loss: [0.3514], avg loss: [0.3956], time: [105.2051ms]\n", - "Epoch: [ 3/ 10], step: [ 133/ 390], loss: [0.4062], avg loss: [0.3957], time: [104.4257ms]\n", - "Epoch: [ 3/ 10], step: [ 134/ 390], loss: [0.4585], avg loss: [0.3961], time: [105.3288ms]\n", - "Epoch: [ 3/ 10], step: [ 135/ 390], loss: [0.4663], avg loss: [0.3967], time: [104.2590ms]\n", - "Epoch: [ 3/ 10], step: [ 136/ 390], loss: [0.4121], avg loss: [0.3968], time: [101.5935ms]\n", - "Epoch: [ 3/ 10], step: [ 137/ 390], loss: [0.5713], avg loss: [0.3980], time: [105.9940ms]\n", - "Epoch: [ 3/ 10], step: [ 138/ 390], loss: [0.5436], avg loss: [0.3991], time: [104.3868ms]\n", - "Epoch: [ 3/ 10], step: [ 139/ 390], loss: [0.3907], avg loss: [0.3990], time: [107.1291ms]\n", - "Epoch: [ 3/ 10], step: [ 140/ 390], loss: [0.3895], avg loss: [0.3990], time: [103.6448ms]\n", - "Epoch: [ 3/ 10], step: [ 141/ 390], loss: [0.2858], avg loss: [0.3982], time: [107.0457ms]\n", - "Epoch: [ 3/ 10], step: [ 142/ 390], loss: [0.3387], avg loss: [0.3978], time: [104.5744ms]\n", - "Epoch: [ 3/ 10], step: [ 143/ 390], loss: [0.2160], avg loss: [0.3965], time: [107.1973ms]\n", - "Epoch: [ 3/ 10], step: [ 144/ 390], loss: [0.3003], avg loss: [0.3958], time: [103.3521ms]\n", - "Epoch: [ 3/ 10], step: [ 145/ 390], loss: [0.4193], avg loss: [0.3960], time: [108.0887ms]\n", - "Epoch: [ 3/ 10], step: [ 146/ 390], loss: [0.2822], avg loss: [0.3952], time: [101.7621ms]\n", - "Epoch: [ 3/ 10], step: [ 147/ 390], loss: [0.4882], avg loss: [0.3958], time: [103.4474ms]\n", - "Epoch: [ 3/ 10], step: [ 148/ 390], loss: [0.3009], avg loss: [0.3952], time: [107.7070ms]\n", - "Epoch: [ 3/ 10], step: [ 149/ 390], loss: [0.4665], avg loss: [0.3957], time: [106.5691ms]\n", - "Epoch: [ 3/ 10], step: [ 150/ 390], loss: [0.1979], avg loss: [0.3943], time: [102.4261ms]\n", - "Epoch: [ 3/ 10], step: [ 151/ 390], loss: [0.5718], avg loss: [0.3955], time: [105.1044ms]\n", - "Epoch: [ 3/ 10], step: [ 152/ 390], loss: [0.4232], avg loss: [0.3957], time: [105.0439ms]\n", - "Epoch: [ 3/ 10], step: [ 153/ 390], loss: [0.3551], avg loss: [0.3954], time: [108.5839ms]\n", - "Epoch: [ 3/ 10], step: [ 154/ 390], loss: [0.4726], avg loss: [0.3959], time: [107.5211ms]\n", - "Epoch: [ 3/ 10], step: [ 155/ 390], loss: [0.4916], avg loss: [0.3966], time: [107.9369ms]\n", - "Epoch: [ 3/ 10], step: [ 156/ 390], loss: [0.2972], avg loss: [0.3959], time: [103.8926ms]\n", - "Epoch: [ 3/ 10], step: [ 157/ 390], loss: [0.5057], avg loss: [0.3966], time: [105.1493ms]\n", - "Epoch: [ 3/ 10], step: [ 158/ 390], loss: [0.3771], avg loss: [0.3965], time: [102.5431ms]\n", - "Epoch: [ 3/ 10], step: [ 159/ 390], loss: [0.4795], avg loss: [0.3970], time: [103.7226ms]\n", - "Epoch: [ 3/ 10], step: [ 160/ 390], loss: [0.3869], avg loss: [0.3970], time: [102.3602ms]\n", - "Epoch: [ 3/ 10], step: [ 161/ 390], loss: [0.4202], avg loss: [0.3971], time: [103.5981ms]\n", - "Epoch: [ 3/ 10], step: [ 162/ 390], loss: [0.4563], avg loss: [0.3975], time: [100.3494ms]\n", - "Epoch: [ 3/ 10], step: [ 163/ 390], loss: [0.4568], avg loss: [0.3978], time: [104.7633ms]\n", - "Epoch: [ 3/ 10], step: [ 164/ 390], loss: [0.4694], avg loss: [0.3983], time: [101.3589ms]\n", - "Epoch: [ 3/ 10], step: [ 165/ 390], loss: [0.4631], avg loss: [0.3987], time: [105.4330ms]\n", - "Epoch: [ 3/ 10], step: [ 166/ 390], loss: [0.4519], avg loss: [0.3990], time: [99.0994ms]\n", - "Epoch: [ 3/ 10], step: [ 167/ 390], loss: [0.3601], avg loss: [0.3987], time: [104.2192ms]\n", - "Epoch: [ 3/ 10], step: [ 168/ 390], loss: [0.4120], avg loss: [0.3988], time: [105.3808ms]\n", - "Epoch: [ 3/ 10], step: [ 169/ 390], loss: [0.4180], avg loss: [0.3989], time: [104.8212ms]\n", - "Epoch: [ 3/ 10], step: [ 170/ 390], loss: [0.4114], avg loss: [0.3990], time: [104.3897ms]\n", - "Epoch: [ 3/ 10], step: [ 171/ 390], loss: [0.4114], avg loss: [0.3991], time: [103.5337ms]\n", - "Epoch: [ 3/ 10], step: [ 172/ 390], loss: [0.4159], avg loss: [0.3992], time: [102.5863ms]\n", - "Epoch: [ 3/ 10], step: [ 173/ 390], loss: [0.4097], avg loss: [0.3992], time: [108.9649ms]\n", - "Epoch: [ 3/ 10], step: [ 174/ 390], loss: [0.4147], avg loss: [0.3993], time: [101.7361ms]\n", - "Epoch: [ 3/ 10], step: [ 175/ 390], loss: [0.4558], avg loss: [0.3997], time: [102.9305ms]\n", - "Epoch: [ 3/ 10], step: [ 176/ 390], loss: [0.4649], avg loss: [0.4000], time: [105.9370ms]\n", - "Epoch: [ 3/ 10], step: [ 177/ 390], loss: [0.3569], avg loss: [0.3998], time: [106.8923ms]\n", - "Epoch: [ 3/ 10], step: [ 178/ 390], loss: [0.3931], avg loss: [0.3997], time: [106.4448ms]\n", - "Epoch: [ 3/ 10], step: [ 179/ 390], loss: [0.4755], avg loss: [0.4002], time: [108.0317ms]\n", - "Epoch: [ 3/ 10], step: [ 180/ 390], loss: [0.3079], avg loss: [0.3997], time: [107.6589ms]\n", - "Epoch: [ 3/ 10], step: [ 181/ 390], loss: [0.2524], avg loss: [0.3988], time: [105.7515ms]\n", - "Epoch: [ 3/ 10], step: [ 182/ 390], loss: [0.4180], avg loss: [0.3989], time: [104.0635ms]\n", - "Epoch: [ 3/ 10], step: [ 183/ 390], loss: [0.3591], avg loss: [0.3987], time: [107.6572ms]\n", - "Epoch: [ 3/ 10], step: [ 184/ 390], loss: [0.4032], avg loss: [0.3988], time: [104.4724ms]\n", - "Epoch: [ 3/ 10], step: [ 185/ 390], loss: [0.4342], avg loss: [0.3989], time: [103.9944ms]\n", - "Epoch: [ 3/ 10], step: [ 186/ 390], loss: [0.4754], avg loss: [0.3994], time: [107.6875ms]\n", - "Epoch: [ 3/ 10], step: [ 187/ 390], loss: [0.4542], avg loss: [0.3996], time: [103.8816ms]\n", - "Epoch: [ 3/ 10], step: [ 188/ 390], loss: [0.4420], avg loss: [0.3999], time: [103.5764ms]\n", - "Epoch: [ 3/ 10], step: [ 189/ 390], loss: [0.4167], avg loss: [0.4000], time: [102.6843ms]\n", - "Epoch: [ 3/ 10], step: [ 190/ 390], loss: [0.3310], avg loss: [0.3996], time: [106.3318ms]\n", - "Epoch: [ 3/ 10], step: [ 191/ 390], loss: [0.3687], avg loss: [0.3994], time: [104.1405ms]\n", - "Epoch: [ 3/ 10], step: [ 192/ 390], loss: [0.5318], avg loss: [0.4001], time: [106.7257ms]\n", - "Epoch: [ 3/ 10], step: [ 193/ 390], loss: [0.4974], avg loss: [0.4006], time: [104.1036ms]\n", - "Epoch: [ 3/ 10], step: [ 194/ 390], loss: [0.3833], avg loss: [0.4005], time: [100.6978ms]\n", - "Epoch: [ 3/ 10], step: [ 195/ 390], loss: [0.3165], avg loss: [0.4001], time: [104.9170ms]\n" + "epoch: 3 step: 107, loss is 0.4050\n", + "epoch: 3 step: 108, loss is 0.4224\n", + "epoch: 3 step: 109, loss is 0.3945\n", + "epoch: 3 step: 110, loss is 0.3166\n", + "epoch: 3 step: 111, loss is 0.4504\n", + "epoch: 3 step: 112, loss is 0.4167\n", + "epoch: 3 step: 113, loss is 0.4151\n", + "epoch: 3 step: 114, loss is 0.4592\n", + "epoch: 3 step: 115, loss is 0.4591\n", + "epoch: 3 step: 116, loss is 0.4377\n", + "epoch: 3 step: 117, loss is 0.3935\n", + "epoch: 3 step: 118, loss is 0.4603\n", + "epoch: 3 step: 119, loss is 0.4321\n", + "epoch: 3 step: 120, loss is 0.3649\n", + "epoch: 3 step: 121, loss is 0.2203\n", + "epoch: 3 step: 122, loss is 0.4187\n", + "epoch: 3 step: 123, loss is 0.4314\n", + "epoch: 3 step: 124, loss is 0.4402\n", + "epoch: 3 step: 125, loss is 0.4183\n", + "epoch: 3 step: 126, loss is 0.2995\n", + "epoch: 3 step: 127, loss is 0.5258\n", + "epoch: 3 step: 128, loss is 0.3425\n", + "epoch: 3 step: 129, loss is 0.4904\n", + "epoch: 3 step: 130, loss is 0.3656\n", + "epoch: 3 step: 131, loss is 0.2937\n", + "epoch: 3 step: 132, loss is 0.3514\n", + "epoch: 3 step: 133, loss is 0.4062\n", + "epoch: 3 step: 134, loss is 0.4585\n", + "epoch: 3 step: 135, loss is 0.4663\n", + "epoch: 3 step: 136, loss is 0.4121\n", + "epoch: 3 step: 137, loss is 0.5713\n", + "epoch: 3 step: 138, loss is 0.5436\n", + "epoch: 3 step: 139, loss is 0.3907\n", + "epoch: 3 step: 140, loss is 0.3895\n", + "epoch: 3 step: 141, loss is 0.2858\n", + "epoch: 3 step: 142, loss is 0.3387\n", + "epoch: 3 step: 143, loss is 0.2160\n", + "epoch: 3 step: 144, loss is 0.3003\n", + "epoch: 3 step: 145, loss is 0.4193\n", + "epoch: 3 step: 146, loss is 0.2822\n", + "epoch: 3 step: 147, loss is 0.4882\n", + "epoch: 3 step: 148, loss is 0.3009\n", + "epoch: 3 step: 149, loss is 0.4665\n", + "epoch: 3 step: 150, loss is 0.1979\n", + "epoch: 3 step: 151, loss is 0.5718\n", + "epoch: 3 step: 152, loss is 0.4232\n", + "epoch: 3 step: 153, loss is 0.3551\n", + "epoch: 3 step: 154, loss is 0.4726\n", + "epoch: 3 step: 155, loss is 0.4916\n", + "epoch: 3 step: 156, loss is 0.2972\n", + "epoch: 3 step: 157, loss is 0.5057\n", + "epoch: 3 step: 158, loss is 0.3771\n", + "epoch: 3 step: 159, loss is 0.4795\n", + "epoch: 3 step: 160, loss is 0.3869\n", + "epoch: 3 step: 161, loss is 0.4202\n", + "epoch: 3 step: 162, loss is 0.4563\n", + "epoch: 3 step: 163, loss is 0.4568\n", + "epoch: 3 step: 164, loss is 0.4694\n", + "epoch: 3 step: 165, loss is 0.4631\n", + "epoch: 3 step: 166, loss is 0.4519\n", + "epoch: 3 step: 167, loss is 0.3601\n", + "epoch: 3 step: 168, loss is 0.4120\n", + "epoch: 3 step: 169, loss is 0.4180\n", + "epoch: 3 step: 170, loss is 0.4114\n", + "epoch: 3 step: 171, loss is 0.4114\n", + "epoch: 3 step: 172, loss is 0.4159\n", + "epoch: 3 step: 173, loss is 0.4097\n", + "epoch: 3 step: 174, loss is 0.4147\n", + "epoch: 3 step: 175, loss is 0.4558\n", + "epoch: 3 step: 176, loss is 0.4649\n", + "epoch: 3 step: 177, loss is 0.3569\n", + "epoch: 3 step: 178, loss is 0.3931\n", + "epoch: 3 step: 179, loss is 0.4755\n", + "epoch: 3 step: 180, loss is 0.3079\n", + "epoch: 3 step: 181, loss is 0.2524\n", + "epoch: 3 step: 182, loss is 0.4180\n", + "epoch: 3 step: 183, loss is 0.3591\n", + "epoch: 3 step: 184, loss is 0.4032\n", + "epoch: 3 step: 185, loss is 0.4342\n", + "epoch: 3 step: 186, loss is 0.4754\n", + "epoch: 3 step: 187, loss is 0.4542\n", + "epoch: 3 step: 188, loss is 0.4420\n", + "epoch: 3 step: 189, loss is 0.4167\n", + "epoch: 3 step: 190, loss is 0.3310\n", + "epoch: 3 step: 191, loss is 0.3687\n", + "epoch: 3 step: 192, loss is 0.5318\n", + "epoch: 3 step: 193, loss is 0.4974\n", + "epoch: 3 step: 194, loss is 0.3833\n", + "epoch: 3 step: 195, loss is 0.3165\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 3/ 10], step: [ 196/ 390], loss: [0.3696], avg loss: [0.4000], time: [104.5880ms]\n", - "Epoch: [ 3/ 10], step: [ 197/ 390], loss: [0.3521], avg loss: [0.3997], time: [102.7956ms]\n", - "Epoch: [ 3/ 10], step: [ 198/ 390], loss: [0.3601], avg loss: [0.3995], time: [104.1501ms]\n", - "Epoch: [ 3/ 10], step: [ 199/ 390], loss: [0.4757], avg loss: [0.3999], time: [102.9751ms]\n", - "Epoch: [ 3/ 10], step: [ 200/ 390], loss: [0.4163], avg loss: [0.4000], time: [103.6022ms]\n", - "Epoch: [ 3/ 10], step: [ 201/ 390], loss: [0.3398], avg loss: [0.3997], time: [104.8150ms]\n", - "Epoch: [ 3/ 10], step: [ 202/ 390], loss: [0.4203], avg loss: [0.3998], time: [103.7111ms]\n", - "Epoch: [ 3/ 10], step: [ 203/ 390], loss: [0.3198], avg loss: [0.3994], time: [102.7951ms]\n", - "Epoch: [ 3/ 10], step: [ 204/ 390], loss: [0.3190], avg loss: [0.3990], time: [103.6525ms]\n", - "Epoch: [ 3/ 10], step: [ 205/ 390], loss: [0.3116], avg loss: [0.3986], time: [103.7445ms]\n", - "Epoch: [ 3/ 10], step: [ 206/ 390], loss: [0.3934], avg loss: [0.3985], time: [103.9851ms]\n", - "Epoch: [ 3/ 10], step: [ 207/ 390], loss: [0.4535], avg loss: [0.3988], time: [107.6472ms]\n", - "Epoch: [ 3/ 10], step: [ 208/ 390], loss: [0.4659], avg loss: [0.3991], time: [104.4419ms]\n", - "Epoch: [ 3/ 10], step: [ 209/ 390], loss: [0.3414], avg loss: [0.3989], time: [104.8763ms]\n", - "Epoch: [ 3/ 10], step: [ 210/ 390], loss: [0.4802], avg loss: [0.3992], time: [105.4237ms]\n", - "Epoch: [ 3/ 10], step: [ 211/ 390], loss: [0.5756], avg loss: [0.4001], time: [104.9984ms]\n", - "Epoch: [ 3/ 10], step: [ 212/ 390], loss: [0.3171], avg loss: [0.3997], time: [104.9805ms]\n", - "Epoch: [ 3/ 10], step: [ 213/ 390], loss: [0.4107], avg loss: [0.3997], time: [106.6091ms]\n", - "Epoch: [ 3/ 10], step: [ 214/ 390], loss: [0.3674], avg loss: [0.3996], time: [104.7139ms]\n", - "Epoch: [ 3/ 10], step: [ 215/ 390], loss: [0.4184], avg loss: [0.3997], time: [103.4434ms]\n", - "Epoch: [ 3/ 10], step: [ 216/ 390], loss: [0.3420], avg loss: [0.3994], time: [104.7640ms]\n", - "Epoch: [ 3/ 10], step: [ 217/ 390], loss: [0.6002], avg loss: [0.4003], time: [106.9977ms]\n", - "Epoch: [ 3/ 10], step: [ 218/ 390], loss: [0.2872], avg loss: [0.3998], time: [105.8819ms]\n", - "Epoch: [ 3/ 10], step: [ 219/ 390], loss: [0.3229], avg loss: [0.3995], time: [105.8280ms]\n", - "Epoch: [ 3/ 10], step: [ 220/ 390], loss: [0.4415], avg loss: [0.3997], time: [102.8762ms]\n", - "Epoch: [ 3/ 10], step: [ 221/ 390], loss: [0.3746], avg loss: [0.3995], time: [106.3321ms]\n", - "Epoch: [ 3/ 10], step: [ 222/ 390], loss: [0.2635], avg loss: [0.3989], time: [103.1306ms]\n", - "Epoch: [ 3/ 10], step: [ 223/ 390], loss: [0.3991], avg loss: [0.3989], time: [105.2806ms]\n", - "Epoch: [ 3/ 10], step: [ 224/ 390], loss: [0.3567], avg loss: [0.3987], time: [102.5407ms]\n", - "Epoch: [ 3/ 10], step: [ 225/ 390], loss: [0.3465], avg loss: [0.3985], time: [105.3410ms]\n", - "Epoch: [ 3/ 10], step: [ 226/ 390], loss: [0.3587], avg loss: [0.3983], time: [105.5880ms]\n", - "Epoch: [ 3/ 10], step: [ 227/ 390], loss: [0.5150], avg loss: [0.3988], time: [104.2035ms]\n", - "Epoch: [ 3/ 10], step: [ 228/ 390], loss: [0.4710], avg loss: [0.3992], time: [104.1911ms]\n", - "Epoch: [ 3/ 10], step: [ 229/ 390], loss: [0.2521], avg loss: [0.3985], time: [109.4351ms]\n", - "Epoch: [ 3/ 10], step: [ 230/ 390], loss: [0.4252], avg loss: [0.3986], time: [101.8806ms]\n", - "Epoch: [ 3/ 10], step: [ 231/ 390], loss: [0.3643], avg loss: [0.3985], time: [104.1234ms]\n", - "Epoch: [ 3/ 10], step: [ 232/ 390], loss: [0.4818], avg loss: [0.3988], time: [100.6560ms]\n", - "Epoch: [ 3/ 10], step: [ 233/ 390], loss: [0.4397], avg loss: [0.3990], time: [109.3936ms]\n", - "Epoch: [ 3/ 10], step: [ 234/ 390], loss: [0.3876], avg loss: [0.3990], time: [101.9568ms]\n", - "Epoch: [ 3/ 10], step: [ 235/ 390], loss: [0.3596], avg loss: [0.3988], time: [105.1791ms]\n", - "Epoch: [ 3/ 10], step: [ 236/ 390], loss: [0.3529], avg loss: [0.3986], time: [101.6073ms]\n", - "Epoch: [ 3/ 10], step: [ 237/ 390], loss: [0.3215], avg loss: [0.3983], time: [108.1150ms]\n", - "Epoch: [ 3/ 10], step: [ 238/ 390], loss: [0.4018], avg loss: [0.3983], time: [107.6634ms]\n", - "Epoch: [ 3/ 10], step: [ 239/ 390], loss: [0.4951], avg loss: [0.3987], time: [105.2194ms]\n", - "Epoch: [ 3/ 10], step: [ 240/ 390], loss: [0.5848], avg loss: [0.3995], time: [102.4096ms]\n", - "Epoch: [ 3/ 10], step: [ 241/ 390], loss: [0.2801], avg loss: [0.3990], time: [105.2697ms]\n", - "Epoch: [ 3/ 10], step: [ 242/ 390], loss: [0.3817], avg loss: [0.3989], time: [101.7780ms]\n", - "Epoch: [ 3/ 10], step: [ 243/ 390], loss: [0.3129], avg loss: [0.3986], time: [108.2616ms]\n", - "Epoch: [ 3/ 10], step: [ 244/ 390], loss: [0.3563], avg loss: [0.3984], time: [102.7660ms]\n", - "Epoch: [ 3/ 10], step: [ 245/ 390], loss: [0.4328], avg loss: [0.3985], time: [102.7133ms]\n", - "Epoch: [ 3/ 10], step: [ 246/ 390], loss: [0.2599], avg loss: [0.3980], time: [104.3119ms]\n", - "Epoch: [ 3/ 10], step: [ 247/ 390], loss: [0.3628], avg loss: [0.3978], time: [104.1727ms]\n", - "Epoch: [ 3/ 10], step: [ 248/ 390], loss: [0.3745], avg loss: [0.3977], time: [105.3689ms]\n", - "Epoch: [ 3/ 10], step: [ 249/ 390], loss: [0.5442], avg loss: [0.3983], time: [103.2224ms]\n", - "Epoch: [ 3/ 10], step: [ 250/ 390], loss: [0.2922], avg loss: [0.3979], time: [102.6263ms]\n", - "Epoch: [ 3/ 10], step: [ 251/ 390], loss: [0.5088], avg loss: [0.3983], time: [105.4394ms]\n", - "Epoch: [ 3/ 10], step: [ 252/ 390], loss: [0.4104], avg loss: [0.3984], time: [102.2727ms]\n", - "Epoch: [ 3/ 10], step: [ 253/ 390], loss: [0.3428], avg loss: [0.3982], time: [105.7491ms]\n", - "Epoch: [ 3/ 10], step: [ 254/ 390], loss: [0.2948], avg loss: [0.3978], time: [101.4915ms]\n", - "Epoch: [ 3/ 10], step: [ 255/ 390], loss: [0.2938], avg loss: [0.3973], time: [108.3610ms]\n", - "Epoch: [ 3/ 10], step: [ 256/ 390], loss: [0.3375], avg loss: [0.3971], time: [101.7067ms]\n", - "Epoch: [ 3/ 10], step: [ 257/ 390], loss: [0.4268], avg loss: [0.3972], time: [105.7875ms]\n", - "Epoch: [ 3/ 10], step: [ 258/ 390], loss: [0.4184], avg loss: [0.3973], time: [106.6694ms]\n", - "Epoch: [ 3/ 10], step: [ 259/ 390], loss: [0.4208], avg loss: [0.3974], time: [103.3854ms]\n", - "Epoch: [ 3/ 10], step: [ 260/ 390], loss: [0.4031], avg loss: [0.3974], time: [101.1827ms]\n", - "Epoch: [ 3/ 10], step: [ 261/ 390], loss: [0.4611], avg loss: [0.3977], time: [108.3522ms]\n", - "Epoch: [ 3/ 10], step: [ 262/ 390], loss: [0.4319], avg loss: [0.3978], time: [107.0139ms]\n", - "Epoch: [ 3/ 10], step: [ 263/ 390], loss: [0.3944], avg loss: [0.3978], time: [104.6364ms]\n", - "Epoch: [ 3/ 10], step: [ 264/ 390], loss: [0.3305], avg loss: [0.3975], time: [101.7764ms]\n", - "Epoch: [ 3/ 10], step: [ 265/ 390], loss: [0.3527], avg loss: [0.3974], time: [108.5494ms]\n", - "Epoch: [ 3/ 10], step: [ 266/ 390], loss: [0.4057], avg loss: [0.3974], time: [104.1663ms]\n", - "Epoch: [ 3/ 10], step: [ 267/ 390], loss: [0.4273], avg loss: [0.3975], time: [103.1766ms]\n", - "Epoch: [ 3/ 10], step: [ 268/ 390], loss: [0.3185], avg loss: [0.3972], time: [103.4164ms]\n", - "Epoch: [ 3/ 10], step: [ 269/ 390], loss: [0.3514], avg loss: [0.3970], time: [106.0576ms]\n", - "Epoch: [ 3/ 10], step: [ 270/ 390], loss: [0.3194], avg loss: [0.3967], time: [102.9601ms]\n", - "Epoch: [ 3/ 10], step: [ 271/ 390], loss: [0.3234], avg loss: [0.3965], time: [107.7216ms]\n", - "Epoch: [ 3/ 10], step: [ 272/ 390], loss: [0.4830], avg loss: [0.3968], time: [101.5913ms]\n", - "Epoch: [ 3/ 10], step: [ 273/ 390], loss: [0.4117], avg loss: [0.3969], time: [104.7854ms]\n", - "Epoch: [ 3/ 10], step: [ 274/ 390], loss: [0.4786], avg loss: [0.3971], time: [104.3963ms]\n", - "Epoch: [ 3/ 10], step: [ 275/ 390], loss: [0.4281], avg loss: [0.3973], time: [109.1735ms]\n", - "Epoch: [ 3/ 10], step: [ 276/ 390], loss: [0.3829], avg loss: [0.3972], time: [104.0406ms]\n", - "Epoch: [ 3/ 10], step: [ 277/ 390], loss: [0.5034], avg loss: [0.3976], time: [106.9276ms]\n", - "Epoch: [ 3/ 10], step: [ 278/ 390], loss: [0.5044], avg loss: [0.3980], time: [101.9976ms]\n", - "Epoch: [ 3/ 10], step: [ 279/ 390], loss: [0.4408], avg loss: [0.3981], time: [107.4753ms]\n", - "Epoch: [ 3/ 10], step: [ 280/ 390], loss: [0.3188], avg loss: [0.3978], time: [101.8198ms]\n", - "Epoch: [ 3/ 10], step: [ 281/ 390], loss: [0.3911], avg loss: [0.3978], time: [109.5376ms]\n", - "Epoch: [ 3/ 10], step: [ 282/ 390], loss: [0.3954], avg loss: [0.3978], time: [103.4002ms]\n", - "Epoch: [ 3/ 10], step: [ 283/ 390], loss: [0.4993], avg loss: [0.3982], time: [104.7726ms]\n", - "Epoch: [ 3/ 10], step: [ 284/ 390], loss: [0.3837], avg loss: [0.3981], time: [102.5836ms]\n" + "epoch: 3 step: 196, loss is 0.3696\n", + "epoch: 3 step: 197, loss is 0.3521\n", + "epoch: 3 step: 198, loss is 0.3601\n", + "epoch: 3 step: 199, loss is 0.4757\n", + "epoch: 3 step: 200, loss is 0.4163\n", + "epoch: 3 step: 201, loss is 0.3398\n", + "epoch: 3 step: 202, loss is 0.4203\n", + "epoch: 3 step: 203, loss is 0.3198\n", + "epoch: 3 step: 204, loss is 0.3190\n", + "epoch: 3 step: 205, loss is 0.3116\n", + "epoch: 3 step: 206, loss is 0.3934\n", + "epoch: 3 step: 207, loss is 0.4535\n", + "epoch: 3 step: 208, loss is 0.4659\n", + "epoch: 3 step: 209, loss is 0.3414\n", + "epoch: 3 step: 210, loss is 0.4802\n", + "epoch: 3 step: 211, loss is 0.5756\n", + "epoch: 3 step: 212, loss is 0.3171\n", + "epoch: 3 step: 213, loss is 0.4107\n", + "epoch: 3 step: 214, loss is 0.3674\n", + "epoch: 3 step: 215, loss is 0.4184\n", + "epoch: 3 step: 216, loss is 0.3420\n", + "epoch: 3 step: 217, loss is 0.6002\n", + "epoch: 3 step: 218, loss is 0.2872\n", + "epoch: 3 step: 219, loss is 0.3229\n", + "epoch: 3 step: 220, loss is 0.4415\n", + "epoch: 3 step: 221, loss is 0.3746\n", + "epoch: 3 step: 222, loss is 0.2635\n", + "epoch: 3 step: 223, loss is 0.3991\n", + "epoch: 3 step: 224, loss is 0.3567\n", + "epoch: 3 step: 225, loss is 0.3465\n", + "epoch: 3 step: 226, loss is 0.3587\n", + "epoch: 3 step: 227, loss is 0.5150\n", + "epoch: 3 step: 228, loss is 0.4710\n", + "epoch: 3 step: 229, loss is 0.2521\n", + "epoch: 3 step: 230, loss is 0.4252\n", + "epoch: 3 step: 231, loss is 0.3643\n", + "epoch: 3 step: 232, loss is 0.4818\n", + "epoch: 3 step: 233, loss is 0.4397\n", + "epoch: 3 step: 234, loss is 0.3876\n", + "epoch: 3 step: 235, loss is 0.3596\n", + "epoch: 3 step: 236, loss is 0.3529\n", + "epoch: 3 step: 237, loss is 0.3215\n", + "epoch: 3 step: 238, loss is 0.4018\n", + "epoch: 3 step: 239, loss is 0.4951\n", + "epoch: 3 step: 240, loss is 0.5848\n", + "epoch: 3 step: 241, loss is 0.2801\n", + "epoch: 3 step: 242, loss is 0.3817\n", + "epoch: 3 step: 243, loss is 0.3129\n", + "epoch: 3 step: 244, loss is 0.3563\n", + "epoch: 3 step: 245, loss is 0.4328\n", + "epoch: 3 step: 246, loss is 0.2599\n", + "epoch: 3 step: 247, loss is 0.3628\n", + "epoch: 3 step: 248, loss is 0.3745\n", + "epoch: 3 step: 249, loss is 0.5442\n", + "epoch: 3 step: 250, loss is 0.2922\n", + "epoch: 3 step: 251, loss is 0.5088\n", + "epoch: 3 step: 252, loss is 0.4104\n", + "epoch: 3 step: 253, loss is 0.3428\n", + "epoch: 3 step: 254, loss is 0.2948\n", + "epoch: 3 step: 255, loss is 0.2938\n", + "epoch: 3 step: 256, loss is 0.3375\n", + "epoch: 3 step: 257, loss is 0.4268\n", + "epoch: 3 step: 258, loss is 0.4184\n", + "epoch: 3 step: 259, loss is 0.4208\n", + "epoch: 3 step: 260, loss is 0.4031\n", + "epoch: 3 step: 261, loss is 0.4611\n", + "epoch: 3 step: 262, loss is 0.4319\n", + "epoch: 3 step: 263, loss is 0.3944\n", + "epoch: 3 step: 264, loss is 0.3305\n", + "epoch: 3 step: 265, loss is 0.3527\n", + "epoch: 3 step: 266, loss is 0.4057\n", + "epoch: 3 step: 267, loss is 0.4273\n", + "epoch: 3 step: 268, loss is 0.3185\n", + "epoch: 3 step: 269, loss is 0.3514\n", + "epoch: 3 step: 270, loss is 0.3194\n", + "epoch: 3 step: 271, loss is 0.3234\n", + "epoch: 3 step: 272, loss is 0.4830\n", + "epoch: 3 step: 273, loss is 0.4117\n", + "epoch: 3 step: 274, loss is 0.4786\n", + "epoch: 3 step: 275, loss is 0.4281\n", + "epoch: 3 step: 276, loss is 0.3829\n", + "epoch: 3 step: 277, loss is 0.5034\n", + "epoch: 3 step: 278, loss is 0.5044\n", + "epoch: 3 step: 279, loss is 0.4408\n", + "epoch: 3 step: 280, loss is 0.3188\n", + "epoch: 3 step: 281, loss is 0.3911\n", + "epoch: 3 step: 282, loss is 0.3954\n", + "epoch: 3 step: 283, loss is 0.4993\n", + "epoch: 3 step: 284, loss is 0.3837\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 3/ 10], step: [ 285/ 390], loss: [0.4163], avg loss: [0.3982], time: [105.0472ms]\n", - "Epoch: [ 3/ 10], step: [ 286/ 390], loss: [0.4400], avg loss: [0.3983], time: [101.8260ms]\n", - "Epoch: [ 3/ 10], step: [ 287/ 390], loss: [0.5866], avg loss: [0.3990], time: [107.6553ms]\n", - "Epoch: [ 3/ 10], step: [ 288/ 390], loss: [0.5641], avg loss: [0.3996], time: [104.2180ms]\n", - "Epoch: [ 3/ 10], step: [ 289/ 390], loss: [0.4612], avg loss: [0.3998], time: [105.5670ms]\n", - "Epoch: [ 3/ 10], step: [ 290/ 390], loss: [0.2980], avg loss: [0.3994], time: [101.8584ms]\n", - "Epoch: [ 3/ 10], step: [ 291/ 390], loss: [0.4731], avg loss: [0.3997], time: [107.9223ms]\n", - "Epoch: [ 3/ 10], step: [ 292/ 390], loss: [0.3319], avg loss: [0.3994], time: [102.9286ms]\n", - "Epoch: [ 3/ 10], step: [ 293/ 390], loss: [0.2109], avg loss: [0.3988], time: [102.7219ms]\n", - "Epoch: [ 3/ 10], step: [ 294/ 390], loss: [0.3556], avg loss: [0.3987], time: [106.2779ms]\n", - "Epoch: [ 3/ 10], step: [ 295/ 390], loss: [0.5077], avg loss: [0.3990], time: [108.3124ms]\n", - "Epoch: [ 3/ 10], step: [ 296/ 390], loss: [0.3730], avg loss: [0.3989], time: [105.7329ms]\n", - "Epoch: [ 3/ 10], step: [ 297/ 390], loss: [0.3788], avg loss: [0.3989], time: [103.3728ms]\n", - "Epoch: [ 3/ 10], step: [ 298/ 390], loss: [0.4189], avg loss: [0.3989], time: [102.2112ms]\n", - "Epoch: [ 3/ 10], step: [ 299/ 390], loss: [0.4771], avg loss: [0.3992], time: [107.6407ms]\n", - "Epoch: [ 3/ 10], step: [ 300/ 390], loss: [0.4764], avg loss: [0.3995], time: [106.6926ms]\n", - "Epoch: [ 3/ 10], step: [ 301/ 390], loss: [0.2127], avg loss: [0.3988], time: [109.6792ms]\n", - "Epoch: [ 3/ 10], step: [ 302/ 390], loss: [0.3632], avg loss: [0.3987], time: [106.6904ms]\n", - "Epoch: [ 3/ 10], step: [ 303/ 390], loss: [0.4322], avg loss: [0.3988], time: [107.4171ms]\n", - "Epoch: [ 3/ 10], step: [ 304/ 390], loss: [0.2149], avg loss: [0.3982], time: [102.3970ms]\n", - "Epoch: [ 3/ 10], step: [ 305/ 390], loss: [0.3922], avg loss: [0.3982], time: [103.8301ms]\n", - "Epoch: [ 3/ 10], step: [ 306/ 390], loss: [0.3648], avg loss: [0.3981], time: [104.2843ms]\n", - "Epoch: [ 3/ 10], step: [ 307/ 390], loss: [0.4253], avg loss: [0.3982], time: [108.5942ms]\n", - "Epoch: [ 3/ 10], step: [ 308/ 390], loss: [0.2997], avg loss: [0.3979], time: [108.4452ms]\n", - "Epoch: [ 3/ 10], step: [ 309/ 390], loss: [0.4857], avg loss: [0.3981], time: [106.3452ms]\n", - "Epoch: [ 3/ 10], step: [ 310/ 390], loss: [0.2400], avg loss: [0.3976], time: [104.8474ms]\n", - "Epoch: [ 3/ 10], step: [ 311/ 390], loss: [0.3372], avg loss: [0.3974], time: [106.9918ms]\n", - "Epoch: [ 3/ 10], step: [ 312/ 390], loss: [0.3999], avg loss: [0.3974], time: [107.0898ms]\n", - "Epoch: [ 3/ 10], step: [ 313/ 390], loss: [0.3966], avg loss: [0.3974], time: [104.3730ms]\n", - "Epoch: [ 3/ 10], step: [ 314/ 390], loss: [0.3356], avg loss: [0.3972], time: [106.7452ms]\n", - "Epoch: [ 3/ 10], step: [ 315/ 390], loss: [0.4338], avg loss: [0.3974], time: [104.3062ms]\n", - "Epoch: [ 3/ 10], step: [ 316/ 390], loss: [0.4492], avg loss: [0.3975], time: [106.8077ms]\n", - "Epoch: [ 3/ 10], step: [ 317/ 390], loss: [0.4842], avg loss: [0.3978], time: [105.7394ms]\n", - "Epoch: [ 3/ 10], step: [ 318/ 390], loss: [0.4107], avg loss: [0.3978], time: [103.0300ms]\n", - "Epoch: [ 3/ 10], step: [ 319/ 390], loss: [0.4075], avg loss: [0.3979], time: [109.4158ms]\n", - "Epoch: [ 3/ 10], step: [ 320/ 390], loss: [0.2865], avg loss: [0.3975], time: [107.0471ms]\n", - "Epoch: [ 3/ 10], step: [ 321/ 390], loss: [0.4206], avg loss: [0.3976], time: [107.0685ms]\n", - "Epoch: [ 3/ 10], step: [ 322/ 390], loss: [0.3023], avg loss: [0.3973], time: [104.3916ms]\n", - "Epoch: [ 3/ 10], step: [ 323/ 390], loss: [0.5861], avg loss: [0.3979], time: [104.4352ms]\n", - "Epoch: [ 3/ 10], step: [ 324/ 390], loss: [0.3894], avg loss: [0.3979], time: [103.2722ms]\n", - "Epoch: [ 3/ 10], step: [ 325/ 390], loss: [0.4065], avg loss: [0.3979], time: [105.9616ms]\n", - "Epoch: [ 3/ 10], step: [ 326/ 390], loss: [0.4846], avg loss: [0.3982], time: [106.9846ms]\n", - "Epoch: [ 3/ 10], step: [ 327/ 390], loss: [0.3179], avg loss: [0.3979], time: [104.4197ms]\n", - "Epoch: [ 3/ 10], step: [ 328/ 390], loss: [0.4151], avg loss: [0.3980], time: [102.4847ms]\n", - "Epoch: [ 3/ 10], step: [ 329/ 390], loss: [0.4456], avg loss: [0.3981], time: [106.0786ms]\n", - "Epoch: [ 3/ 10], step: [ 330/ 390], loss: [0.5323], avg loss: [0.3985], time: [104.0373ms]\n", - "Epoch: [ 3/ 10], step: [ 331/ 390], loss: [0.4364], avg loss: [0.3986], time: [107.2137ms]\n", - "Epoch: [ 3/ 10], step: [ 332/ 390], loss: [0.3513], avg loss: [0.3985], time: [104.4378ms]\n", - "Epoch: [ 3/ 10], step: [ 333/ 390], loss: [0.3349], avg loss: [0.3983], time: [104.3508ms]\n", - "Epoch: [ 3/ 10], step: [ 334/ 390], loss: [0.4467], avg loss: [0.3984], time: [106.8215ms]\n", - "Epoch: [ 3/ 10], step: [ 335/ 390], loss: [0.3192], avg loss: [0.3982], time: [107.9407ms]\n", - "Epoch: [ 3/ 10], step: [ 336/ 390], loss: [0.3861], avg loss: [0.3982], time: [106.5843ms]\n", - "Epoch: [ 3/ 10], step: [ 337/ 390], loss: [0.4852], avg loss: [0.3984], time: [105.2516ms]\n", - "Epoch: [ 3/ 10], step: [ 338/ 390], loss: [0.5865], avg loss: [0.3990], time: [106.8594ms]\n", - "Epoch: [ 3/ 10], step: [ 339/ 390], loss: [0.4505], avg loss: [0.3991], time: [103.7941ms]\n", - "Epoch: [ 3/ 10], step: [ 340/ 390], loss: [0.3992], avg loss: [0.3991], time: [106.0836ms]\n", - "Epoch: [ 3/ 10], step: [ 341/ 390], loss: [0.4544], avg loss: [0.3993], time: [102.8435ms]\n", - "Epoch: [ 3/ 10], step: [ 342/ 390], loss: [0.6408], avg loss: [0.4000], time: [101.7888ms]\n", - "Epoch: [ 3/ 10], step: [ 343/ 390], loss: [0.4806], avg loss: [0.4002], time: [105.1824ms]\n", - "Epoch: [ 3/ 10], step: [ 344/ 390], loss: [0.4758], avg loss: [0.4005], time: [102.1531ms]\n", - "Epoch: [ 3/ 10], step: [ 345/ 390], loss: [0.3838], avg loss: [0.4004], time: [108.0832ms]\n", - "Epoch: [ 3/ 10], step: [ 346/ 390], loss: [0.4273], avg loss: [0.4005], time: [107.1842ms]\n", - "Epoch: [ 3/ 10], step: [ 347/ 390], loss: [0.3675], avg loss: [0.4004], time: [107.6250ms]\n", - "Epoch: [ 3/ 10], step: [ 348/ 390], loss: [0.4613], avg loss: [0.4006], time: [102.1802ms]\n", - "Epoch: [ 3/ 10], step: [ 349/ 390], loss: [0.5186], avg loss: [0.4009], time: [105.7131ms]\n", - "Epoch: [ 3/ 10], step: [ 350/ 390], loss: [0.4531], avg loss: [0.4011], time: [106.6077ms]\n", - "Epoch: [ 3/ 10], step: [ 351/ 390], loss: [0.3558], avg loss: [0.4009], time: [104.0854ms]\n", - "Epoch: [ 3/ 10], step: [ 352/ 390], loss: [0.3800], avg loss: [0.4009], time: [103.4341ms]\n", - "Epoch: [ 3/ 10], step: [ 353/ 390], loss: [0.4185], avg loss: [0.4009], time: [106.5884ms]\n", - "Epoch: [ 3/ 10], step: [ 354/ 390], loss: [0.3551], avg loss: [0.4008], time: [100.7278ms]\n", - "Epoch: [ 3/ 10], step: [ 355/ 390], loss: [0.3627], avg loss: [0.4007], time: [104.9283ms]\n", - "Epoch: [ 3/ 10], step: [ 356/ 390], loss: [0.3571], avg loss: [0.4006], time: [101.3572ms]\n", - "Epoch: [ 3/ 10], step: [ 357/ 390], loss: [0.5939], avg loss: [0.4011], time: [104.5790ms]\n", - "Epoch: [ 3/ 10], step: [ 358/ 390], loss: [0.5010], avg loss: [0.4014], time: [103.0738ms]\n", - "Epoch: [ 3/ 10], step: [ 359/ 390], loss: [0.3568], avg loss: [0.4013], time: [108.9163ms]\n", - "Epoch: [ 3/ 10], step: [ 360/ 390], loss: [0.3379], avg loss: [0.4011], time: [102.8593ms]\n", - "Epoch: [ 3/ 10], step: [ 361/ 390], loss: [0.3807], avg loss: [0.4010], time: [108.5019ms]\n", - "Epoch: [ 3/ 10], step: [ 362/ 390], loss: [0.5156], avg loss: [0.4013], time: [103.0893ms]\n", - "Epoch: [ 3/ 10], step: [ 363/ 390], loss: [0.4275], avg loss: [0.4014], time: [104.1322ms]\n", - "Epoch: [ 3/ 10], step: [ 364/ 390], loss: [0.4519], avg loss: [0.4015], time: [105.7711ms]\n", - "Epoch: [ 3/ 10], step: [ 365/ 390], loss: [0.4699], avg loss: [0.4017], time: [105.3543ms]\n", - "Epoch: [ 3/ 10], step: [ 366/ 390], loss: [0.3991], avg loss: [0.4017], time: [103.7087ms]\n", - "Epoch: [ 3/ 10], step: [ 367/ 390], loss: [0.5582], avg loss: [0.4022], time: [104.6586ms]\n", - "Epoch: [ 3/ 10], step: [ 368/ 390], loss: [0.3483], avg loss: [0.4020], time: [102.0648ms]\n", - "Epoch: [ 3/ 10], step: [ 369/ 390], loss: [0.5089], avg loss: [0.4023], time: [102.4358ms]\n", - "Epoch: [ 3/ 10], step: [ 370/ 390], loss: [0.4907], avg loss: [0.4025], time: [103.3907ms]\n", - "Epoch: [ 3/ 10], step: [ 371/ 390], loss: [0.3668], avg loss: [0.4024], time: [105.0577ms]\n", - "Epoch: [ 3/ 10], step: [ 372/ 390], loss: [0.4605], avg loss: [0.4026], time: [102.5267ms]\n", - "Epoch: [ 3/ 10], step: [ 373/ 390], loss: [0.4048], avg loss: [0.4026], time: [110.1303ms]\n" + "epoch: 3 step: 285, loss is 0.4163\n", + "epoch: 3 step: 286, loss is 0.4400\n", + "epoch: 3 step: 287, loss is 0.5866\n", + "epoch: 3 step: 288, loss is 0.5641\n", + "epoch: 3 step: 289, loss is 0.4612\n", + "epoch: 3 step: 290, loss is 0.2980\n", + "epoch: 3 step: 291, loss is 0.4731\n", + "epoch: 3 step: 292, loss is 0.3319\n", + "epoch: 3 step: 293, loss is 0.2109\n", + "epoch: 3 step: 294, loss is 0.3556\n", + "epoch: 3 step: 295, loss is 0.5077\n", + "epoch: 3 step: 296, loss is 0.3730\n", + "epoch: 3 step: 297, loss is 0.3788\n", + "epoch: 3 step: 298, loss is 0.4189\n", + "epoch: 3 step: 299, loss is 0.4771\n", + "epoch: 3 step: 300, loss is 0.4764\n", + "epoch: 3 step: 301, loss is 0.2127\n", + "epoch: 3 step: 302, loss is 0.3632\n", + "epoch: 3 step: 303, loss is 0.4322\n", + "epoch: 3 step: 304, loss is 0.2149\n", + "epoch: 3 step: 305, loss is 0.3922\n", + "epoch: 3 step: 306, loss is 0.3648\n", + "epoch: 3 step: 307, loss is 0.4253\n", + "epoch: 3 step: 308, loss is 0.2997\n", + "epoch: 3 step: 309, loss is 0.4857\n", + "epoch: 3 step: 310, loss is 0.2400\n", + "epoch: 3 step: 311, loss is 0.3372\n", + "epoch: 3 step: 312, loss is 0.3999\n", + "epoch: 3 step: 313, loss is 0.3966\n", + "epoch: 3 step: 314, loss is 0.3356\n", + "epoch: 3 step: 315, loss is 0.4338\n", + "epoch: 3 step: 316, loss is 0.4492\n", + "epoch: 3 step: 317, loss is 0.4842\n", + "epoch: 3 step: 318, loss is 0.4107\n", + "epoch: 3 step: 319, loss is 0.4075\n", + "epoch: 3 step: 320, loss is 0.2865\n", + "epoch: 3 step: 321, loss is 0.4206\n", + "epoch: 3 step: 322, loss is 0.3023\n", + "epoch: 3 step: 323, loss is 0.5861\n", + "epoch: 3 step: 324, loss is 0.3894\n", + "epoch: 3 step: 325, loss is 0.4065\n", + "epoch: 3 step: 326, loss is 0.4846\n", + "epoch: 3 step: 327, loss is 0.3179\n", + "epoch: 3 step: 328, loss is 0.4151\n", + "epoch: 3 step: 329, loss is 0.4456\n", + "epoch: 3 step: 330, loss is 0.5323\n", + "epoch: 3 step: 331, loss is 0.4364\n", + "epoch: 3 step: 332, loss is 0.3513\n", + "epoch: 3 step: 333, loss is 0.3349\n", + "epoch: 3 step: 334, loss is 0.4467\n", + "epoch: 3 step: 335, loss is 0.3192\n", + "epoch: 3 step: 336, loss is 0.3861\n", + "epoch: 3 step: 337, loss is 0.4852\n", + "epoch: 3 step: 338, loss is 0.5865\n", + "epoch: 3 step: 339, loss is 0.4505\n", + "epoch: 3 step: 340, loss is 0.3992\n", + "epoch: 3 step: 341, loss is 0.4544\n", + "epoch: 3 step: 342, loss is 0.6408\n", + "epoch: 3 step: 343, loss is 0.4806\n", + "epoch: 3 step: 344, loss is 0.4758\n", + "epoch: 3 step: 345, loss is 0.3838\n", + "epoch: 3 step: 346, loss is 0.4273\n", + "epoch: 3 step: 347, loss is 0.3675\n", + "epoch: 3 step: 348, loss is 0.4613\n", + "epoch: 3 step: 349, loss is 0.5186\n", + "epoch: 3 step: 350, loss is 0.4531\n", + "epoch: 3 step: 351, loss is 0.3558\n", + "epoch: 3 step: 352, loss is 0.3800\n", + "epoch: 3 step: 353, loss is 0.4185\n", + "epoch: 3 step: 354, loss is 0.3551\n", + "epoch: 3 step: 355, loss is 0.3627\n", + "epoch: 3 step: 356, loss is 0.3571\n", + "epoch: 3 step: 357, loss is 0.5939\n", + "epoch: 3 step: 358, loss is 0.5010\n", + "epoch: 3 step: 359, loss is 0.3568\n", + "epoch: 3 step: 360, loss is 0.3379\n", + "epoch: 3 step: 361, loss is 0.3807\n", + "epoch: 3 step: 362, loss is 0.5156\n", + "epoch: 3 step: 363, loss is 0.4275\n", + "epoch: 3 step: 364, loss is 0.4519\n", + "epoch: 3 step: 365, loss is 0.4699\n", + "epoch: 3 step: 366, loss is 0.3991\n", + "epoch: 3 step: 367, loss is 0.5582\n", + "epoch: 3 step: 368, loss is 0.3483\n", + "epoch: 3 step: 369, loss is 0.5089\n", + "epoch: 3 step: 370, loss is 0.4907\n", + "epoch: 3 step: 371, loss is 0.3668\n", + "epoch: 3 step: 372, loss is 0.4605\n", + "epoch: 3 step: 373, loss is 0.4048\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 3/ 10], step: [ 374/ 390], loss: [0.3921], avg loss: [0.4026], time: [107.5480ms]\n", - "Epoch: [ 3/ 10], step: [ 375/ 390], loss: [0.4149], avg loss: [0.4026], time: [106.8847ms]\n", - "Epoch: [ 3/ 10], step: [ 376/ 390], loss: [0.4907], avg loss: [0.4028], time: [104.5911ms]\n", - "Epoch: [ 3/ 10], step: [ 377/ 390], loss: [0.3688], avg loss: [0.4027], time: [109.8232ms]\n", - "Epoch: [ 3/ 10], step: [ 378/ 390], loss: [0.3472], avg loss: [0.4026], time: [101.5713ms]\n", - "Epoch: [ 3/ 10], step: [ 379/ 390], loss: [0.4601], avg loss: [0.4028], time: [109.2470ms]\n", - "Epoch: [ 3/ 10], step: [ 380/ 390], loss: [0.3989], avg loss: [0.4027], time: [105.3076ms]\n", - "Epoch: [ 3/ 10], step: [ 381/ 390], loss: [0.4383], avg loss: [0.4028], time: [104.4583ms]\n", - "Epoch: [ 3/ 10], step: [ 382/ 390], loss: [0.4026], avg loss: [0.4028], time: [105.3464ms]\n", - "Epoch: [ 3/ 10], step: [ 383/ 390], loss: [0.4012], avg loss: [0.4028], time: [101.9688ms]\n", - "Epoch: [ 3/ 10], step: [ 384/ 390], loss: [0.3780], avg loss: [0.4028], time: [103.9000ms]\n", - "Epoch: [ 3/ 10], step: [ 385/ 390], loss: [0.4996], avg loss: [0.4030], time: [109.7128ms]\n", - "Epoch: [ 3/ 10], step: [ 386/ 390], loss: [0.4128], avg loss: [0.4030], time: [101.9919ms]\n", - "Epoch: [ 3/ 10], step: [ 387/ 390], loss: [0.4403], avg loss: [0.4031], time: [105.8900ms]\n", - "Epoch: [ 3/ 10], step: [ 388/ 390], loss: [0.3133], avg loss: [0.4029], time: [100.8637ms]\n", - "Epoch: [ 3/ 10], step: [ 389/ 390], loss: [0.3768], avg loss: [0.4028], time: [101.2883ms]\n", - "Epoch: [ 3/ 10], step: [ 390/ 390], loss: [0.3408], avg loss: [0.4027], time: [828.4981ms]\n", - "Epoch time: 41946.674, per step time: 107.556\n", + "epoch: 3 step: 374, loss is 0.3921\n", + "epoch: 3 step: 375, loss is 0.4149\n", + "epoch: 3 step: 376, loss is 0.4907\n", + "epoch: 3 step: 377, loss is 0.3688\n", + "epoch: 3 step: 378, loss is 0.3472\n", + "epoch: 3 step: 379, loss is 0.4601\n", + "epoch: 3 step: 380, loss is 0.3989\n", + "epoch: 3 step: 381, loss is 0.4383\n", + "epoch: 3 step: 382, loss is 0.4026\n", + "epoch: 3 step: 383, loss is 0.4012\n", + "epoch: 3 step: 384, loss is 0.3780\n", + "epoch: 3 step: 385, loss is 0.4996\n", + "epoch: 3 step: 386, loss is 0.4128\n", + "epoch: 3 step: 387, loss is 0.4403\n", + "epoch: 3 step: 388, loss is 0.3133\n", + "epoch: 3 step: 389, loss is 0.3768\n", + "epoch: 3 step: 390, loss is 0.3408\n", "Epoch time: 41946.990, per step time: 107.556, avg loss: 0.403\n", "************************************************************\n", - "Epoch: [ 4/ 10], step: [ 1/ 390], loss: [0.4017], avg loss: [0.4017], time: [73.9217ms]\n", - "Epoch: [ 4/ 10], step: [ 2/ 390], loss: [0.4795], avg loss: [0.4406], time: [103.4141ms]\n", - "Epoch: [ 4/ 10], step: [ 3/ 390], loss: [0.2870], avg loss: [0.3894], time: [107.5652ms]\n", - "Epoch: [ 4/ 10], step: [ 4/ 390], loss: [0.4298], avg loss: [0.3995], time: [107.0776ms]\n", - "Epoch: [ 4/ 10], step: [ 5/ 390], loss: [0.3789], avg loss: [0.3954], time: [107.3534ms]\n", - "Epoch: [ 4/ 10], step: [ 6/ 390], loss: [0.3850], avg loss: [0.3936], time: [104.1341ms]\n", - "Epoch: [ 4/ 10], step: [ 7/ 390], loss: [0.5787], avg loss: [0.4201], time: [102.0460ms]\n", - "Epoch: [ 4/ 10], step: [ 8/ 390], loss: [0.4739], avg loss: [0.4268], time: [104.0175ms]\n", - "Epoch: [ 4/ 10], step: [ 9/ 390], loss: [0.3946], avg loss: [0.4232], time: [105.8216ms]\n", - "Epoch: [ 4/ 10], step: [ 10/ 390], loss: [0.4048], avg loss: [0.4214], time: [103.3876ms]\n", - "Epoch: [ 4/ 10], step: [ 11/ 390], loss: [0.2484], avg loss: [0.4057], time: [102.9871ms]\n", - "Epoch: [ 4/ 10], step: [ 12/ 390], loss: [0.2323], avg loss: [0.3912], time: [100.4424ms]\n", - "Epoch: [ 4/ 10], step: [ 13/ 390], loss: [0.4067], avg loss: [0.3924], time: [101.6319ms]\n", - "Epoch: [ 4/ 10], step: [ 14/ 390], loss: [0.3270], avg loss: [0.3877], time: [102.6273ms]\n", - "Epoch: [ 4/ 10], step: [ 15/ 390], loss: [0.4092], avg loss: [0.3892], time: [102.1852ms]\n", - "Epoch: [ 4/ 10], step: [ 16/ 390], loss: [0.3262], avg loss: [0.3852], time: [106.1547ms]\n", - "Epoch: [ 4/ 10], step: [ 17/ 390], loss: [0.3273], avg loss: [0.3818], time: [104.9578ms]\n", - "Epoch: [ 4/ 10], step: [ 18/ 390], loss: [0.3551], avg loss: [0.3803], time: [104.7392ms]\n", - "Epoch: [ 4/ 10], step: [ 19/ 390], loss: [0.2978], avg loss: [0.3760], time: [103.5285ms]\n", - "Epoch: [ 4/ 10], step: [ 20/ 390], loss: [0.3568], avg loss: [0.3750], time: [104.4667ms]\n", - "Epoch: [ 4/ 10], step: [ 21/ 390], loss: [0.3576], avg loss: [0.3742], time: [106.6606ms]\n", - "Epoch: [ 4/ 10], step: [ 22/ 390], loss: [0.4565], avg loss: [0.3779], time: [108.4642ms]\n", - "Epoch: [ 4/ 10], step: [ 23/ 390], loss: [0.3130], avg loss: [0.3751], time: [106.9276ms]\n", - "Epoch: [ 4/ 10], step: [ 24/ 390], loss: [0.3228], avg loss: [0.3729], time: [101.8312ms]\n", - "Epoch: [ 4/ 10], step: [ 25/ 390], loss: [0.4285], avg loss: [0.3752], time: [106.0030ms]\n", - "Epoch: [ 4/ 10], step: [ 26/ 390], loss: [0.4040], avg loss: [0.3763], time: [105.2804ms]\n", - "Epoch: [ 4/ 10], step: [ 27/ 390], loss: [0.2316], avg loss: [0.3709], time: [107.4708ms]\n", - "Epoch: [ 4/ 10], step: [ 28/ 390], loss: [0.2661], avg loss: [0.3672], time: [103.3499ms]\n", - "Epoch: [ 4/ 10], step: [ 29/ 390], loss: [0.3404], avg loss: [0.3662], time: [102.5293ms]\n", - "Epoch: [ 4/ 10], step: [ 30/ 390], loss: [0.4828], avg loss: [0.3701], time: [105.6926ms]\n", - "Epoch: [ 4/ 10], step: [ 31/ 390], loss: [0.3574], avg loss: [0.3697], time: [106.9551ms]\n", - "Epoch: [ 4/ 10], step: [ 32/ 390], loss: [0.5177], avg loss: [0.3743], time: [107.4872ms]\n", - "Epoch: [ 4/ 10], step: [ 33/ 390], loss: [0.4476], avg loss: [0.3766], time: [105.5844ms]\n", - "Epoch: [ 4/ 10], step: [ 34/ 390], loss: [0.4039], avg loss: [0.3774], time: [102.4349ms]\n", - "Epoch: [ 4/ 10], step: [ 35/ 390], loss: [0.4306], avg loss: [0.3789], time: [102.9930ms]\n", - "Epoch: [ 4/ 10], step: [ 36/ 390], loss: [0.3846], avg loss: [0.3790], time: [104.8679ms]\n", - "Epoch: [ 4/ 10], step: [ 37/ 390], loss: [0.3046], avg loss: [0.3770], time: [103.7214ms]\n", - "Epoch: [ 4/ 10], step: [ 38/ 390], loss: [0.3345], avg loss: [0.3759], time: [104.6548ms]\n", - "Epoch: [ 4/ 10], step: [ 39/ 390], loss: [0.4613], avg loss: [0.3781], time: [102.4151ms]\n", - "Epoch: [ 4/ 10], step: [ 40/ 390], loss: [0.4372], avg loss: [0.3796], time: [104.0227ms]\n", - "Epoch: [ 4/ 10], step: [ 41/ 390], loss: [0.3131], avg loss: [0.3780], time: [104.7094ms]\n", - "Epoch: [ 4/ 10], step: [ 42/ 390], loss: [0.3185], avg loss: [0.3765], time: [103.5342ms]\n", - "Epoch: [ 4/ 10], step: [ 43/ 390], loss: [0.4237], avg loss: [0.3776], time: [106.8850ms]\n", - "Epoch: [ 4/ 10], step: [ 44/ 390], loss: [0.3446], avg loss: [0.3769], time: [104.2166ms]\n", - "Epoch: [ 4/ 10], step: [ 45/ 390], loss: [0.3386], avg loss: [0.3760], time: [102.8478ms]\n", - "Epoch: [ 4/ 10], step: [ 46/ 390], loss: [0.2380], avg loss: [0.3730], time: [106.4036ms]\n", - "Epoch: [ 4/ 10], step: [ 47/ 390], loss: [0.2631], avg loss: [0.3707], time: [100.6525ms]\n", - "Epoch: [ 4/ 10], step: [ 48/ 390], loss: [0.3154], avg loss: [0.3695], time: [103.7781ms]\n", - "Epoch: [ 4/ 10], step: [ 49/ 390], loss: [0.3512], avg loss: [0.3692], time: [104.8393ms]\n", - "Epoch: [ 4/ 10], step: [ 50/ 390], loss: [0.3820], avg loss: [0.3694], time: [105.8657ms]\n", - "Epoch: [ 4/ 10], step: [ 51/ 390], loss: [0.4683], avg loss: [0.3714], time: [103.6565ms]\n", - "Epoch: [ 4/ 10], step: [ 52/ 390], loss: [0.3854], avg loss: [0.3716], time: [104.9857ms]\n", - "Epoch: [ 4/ 10], step: [ 53/ 390], loss: [0.4999], avg loss: [0.3741], time: [101.6917ms]\n", - "Epoch: [ 4/ 10], step: [ 54/ 390], loss: [0.5073], avg loss: [0.3765], time: [105.3748ms]\n", - "Epoch: [ 4/ 10], step: [ 55/ 390], loss: [0.4146], avg loss: [0.3772], time: [105.8059ms]\n", - "Epoch: [ 4/ 10], step: [ 56/ 390], loss: [0.4214], avg loss: [0.3780], time: [103.3521ms]\n", - "Epoch: [ 4/ 10], step: [ 57/ 390], loss: [0.3034], avg loss: [0.3767], time: [106.8072ms]\n", - "Epoch: [ 4/ 10], step: [ 58/ 390], loss: [0.3051], avg loss: [0.3755], time: [103.3835ms]\n", - "Epoch: [ 4/ 10], step: [ 59/ 390], loss: [0.3742], avg loss: [0.3754], time: [105.8295ms]\n", - "Epoch: [ 4/ 10], step: [ 60/ 390], loss: [0.4394], avg loss: [0.3765], time: [105.0005ms]\n", - "Epoch: [ 4/ 10], step: [ 61/ 390], loss: [0.2594], avg loss: [0.3746], time: [106.1208ms]\n", - "Epoch: [ 4/ 10], step: [ 62/ 390], loss: [0.4522], avg loss: [0.3758], time: [106.8385ms]\n", - "Epoch: [ 4/ 10], step: [ 63/ 390], loss: [0.4361], avg loss: [0.3768], time: [102.1011ms]\n", - "Epoch: [ 4/ 10], step: [ 64/ 390], loss: [0.3397], avg loss: [0.3762], time: [104.5163ms]\n", - "Epoch: [ 4/ 10], step: [ 65/ 390], loss: [0.2726], avg loss: [0.3746], time: [105.8998ms]\n", - "Epoch: [ 4/ 10], step: [ 66/ 390], loss: [0.3973], avg loss: [0.3750], time: [103.3664ms]\n", - "Epoch: [ 4/ 10], step: [ 67/ 390], loss: [0.3567], avg loss: [0.3747], time: [105.4764ms]\n", - "Epoch: [ 4/ 10], step: [ 68/ 390], loss: [0.3505], avg loss: [0.3743], time: [102.8385ms]\n", - "Epoch: [ 4/ 10], step: [ 69/ 390], loss: [0.3896], avg loss: [0.3746], time: [103.4474ms]\n", - "Epoch: [ 4/ 10], step: [ 70/ 390], loss: [0.3462], avg loss: [0.3741], time: [105.1519ms]\n" + "epoch: 4 step: 1, loss is 0.4017\n", + "epoch: 4 step: 2, loss is 0.4795\n", + "epoch: 4 step: 3, loss is 0.2870\n", + "epoch: 4 step: 4, loss is 0.4298\n", + "epoch: 4 step: 5, loss is 0.3789\n", + "epoch: 4 step: 6, loss is 0.3850\n", + "epoch: 4 step: 7, loss is 0.5787\n", + "epoch: 4 step: 8, loss is 0.4739\n", + "epoch: 4 step: 9, loss is 0.3946\n", + "epoch: 4 step: 10, loss is 0.4048\n", + "epoch: 4 step: 11, loss is 0.2484\n", + "epoch: 4 step: 12, loss is 0.2323\n", + "epoch: 4 step: 13, loss is 0.4067\n", + "epoch: 4 step: 14, loss is 0.3270\n", + "epoch: 4 step: 15, loss is 0.4092\n", + "epoch: 4 step: 16, loss is 0.3262\n", + "epoch: 4 step: 17, loss is 0.3273\n", + "epoch: 4 step: 18, loss is 0.3551\n", + "epoch: 4 step: 19, loss is 0.2978\n", + "epoch: 4 step: 20, loss is 0.3568\n", + "epoch: 4 step: 21, loss is 0.3576\n", + "epoch: 4 step: 22, loss is 0.4565\n", + "epoch: 4 step: 23, loss is 0.3130\n", + "epoch: 4 step: 24, loss is 0.3228\n", + "epoch: 4 step: 25, loss is 0.4285\n", + "epoch: 4 step: 26, loss is 0.4040\n", + "epoch: 4 step: 27, loss is 0.2316\n", + "epoch: 4 step: 28, loss is 0.2661\n", + "epoch: 4 step: 29, loss is 0.3404\n", + "epoch: 4 step: 30, loss is 0.4828\n", + "epoch: 4 step: 31, loss is 0.3574\n", + "epoch: 4 step: 32, loss is 0.5177\n", + "epoch: 4 step: 33, loss is 0.4476\n", + "epoch: 4 step: 34, loss is 0.4039\n", + "epoch: 4 step: 35, loss is 0.4306\n", + "epoch: 4 step: 36, loss is 0.3846\n", + "epoch: 4 step: 37, loss is 0.3046\n", + "epoch: 4 step: 38, loss is 0.3345\n", + "epoch: 4 step: 39, loss is 0.4613\n", + "epoch: 4 step: 40, loss is 0.4372\n", + "epoch: 4 step: 41, loss is 0.3131\n", + "epoch: 4 step: 42, loss is 0.3185\n", + "epoch: 4 step: 43, loss is 0.4237\n", + "epoch: 4 step: 44, loss is 0.3446\n", + "epoch: 4 step: 45, loss is 0.3386\n", + "epoch: 4 step: 46, loss is 0.2380\n", + "epoch: 4 step: 47, loss is 0.2631\n", + "epoch: 4 step: 48, loss is 0.3154\n", + "epoch: 4 step: 49, loss is 0.3512\n", + "epoch: 4 step: 50, loss is 0.3820\n", + "epoch: 4 step: 51, loss is 0.4683\n", + "epoch: 4 step: 52, loss is 0.3854\n", + "epoch: 4 step: 53, loss is 0.4999\n", + "epoch: 4 step: 54, loss is 0.5073\n", + "epoch: 4 step: 55, loss is 0.4146\n", + "epoch: 4 step: 56, loss is 0.4214\n", + "epoch: 4 step: 57, loss is 0.3034\n", + "epoch: 4 step: 58, loss is 0.3051\n", + "epoch: 4 step: 59, loss is 0.3742\n", + "epoch: 4 step: 60, loss is 0.4394\n", + "epoch: 4 step: 61, loss is 0.2594\n", + "epoch: 4 step: 62, loss is 0.4522\n", + "epoch: 4 step: 63, loss is 0.4361\n", + "epoch: 4 step: 64, loss is 0.3397\n", + "epoch: 4 step: 65, loss is 0.2726\n", + "epoch: 4 step: 66, loss is 0.3973\n", + "epoch: 4 step: 67, loss is 0.3567\n", + "epoch: 4 step: 68, loss is 0.3505\n", + "epoch: 4 step: 69, loss is 0.3896\n", + "epoch: 4 step: 70, loss is 0.3462\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 4/ 10], step: [ 71/ 390], loss: [0.3085], avg loss: [0.3732], time: [103.1454ms]\n", - "Epoch: [ 4/ 10], step: [ 72/ 390], loss: [0.2767], avg loss: [0.3719], time: [102.9990ms]\n", - "Epoch: [ 4/ 10], step: [ 73/ 390], loss: [0.3353], avg loss: [0.3714], time: [107.5771ms]\n", - "Epoch: [ 4/ 10], step: [ 74/ 390], loss: [0.4800], avg loss: [0.3729], time: [104.0356ms]\n", - "Epoch: [ 4/ 10], step: [ 75/ 390], loss: [0.2814], avg loss: [0.3716], time: [104.0728ms]\n", - "Epoch: [ 4/ 10], step: [ 76/ 390], loss: [0.4233], avg loss: [0.3723], time: [104.9471ms]\n", - "Epoch: [ 4/ 10], step: [ 77/ 390], loss: [0.2641], avg loss: [0.3709], time: [103.7886ms]\n", - "Epoch: [ 4/ 10], step: [ 78/ 390], loss: [0.3865], avg loss: [0.3711], time: [107.5280ms]\n", - "Epoch: [ 4/ 10], step: [ 79/ 390], loss: [0.2459], avg loss: [0.3695], time: [106.9174ms]\n", - "Epoch: [ 4/ 10], step: [ 80/ 390], loss: [0.4205], avg loss: [0.3702], time: [104.5945ms]\n", - "Epoch: [ 4/ 10], step: [ 81/ 390], loss: [0.4781], avg loss: [0.3715], time: [102.2487ms]\n", - "Epoch: [ 4/ 10], step: [ 82/ 390], loss: [0.5155], avg loss: [0.3732], time: [103.3859ms]\n", - "Epoch: [ 4/ 10], step: [ 83/ 390], loss: [0.3062], avg loss: [0.3724], time: [107.1780ms]\n", - "Epoch: [ 4/ 10], step: [ 84/ 390], loss: [0.4246], avg loss: [0.3731], time: [105.0212ms]\n", - "Epoch: [ 4/ 10], step: [ 85/ 390], loss: [0.4452], avg loss: [0.3739], time: [104.6870ms]\n", - "Epoch: [ 4/ 10], step: [ 86/ 390], loss: [0.4439], avg loss: [0.3747], time: [102.5512ms]\n", - "Epoch: [ 4/ 10], step: [ 87/ 390], loss: [0.3794], avg loss: [0.3748], time: [106.2095ms]\n", - "Epoch: [ 4/ 10], step: [ 88/ 390], loss: [0.4272], avg loss: [0.3754], time: [104.0113ms]\n", - "Epoch: [ 4/ 10], step: [ 89/ 390], loss: [0.3608], avg loss: [0.3752], time: [105.8640ms]\n", - "Epoch: [ 4/ 10], step: [ 90/ 390], loss: [0.3053], avg loss: [0.3744], time: [106.1811ms]\n", - "Epoch: [ 4/ 10], step: [ 91/ 390], loss: [0.3505], avg loss: [0.3742], time: [106.4234ms]\n", - "Epoch: [ 4/ 10], step: [ 92/ 390], loss: [0.2630], avg loss: [0.3730], time: [101.3699ms]\n", - "Epoch: [ 4/ 10], step: [ 93/ 390], loss: [0.4086], avg loss: [0.3733], time: [102.1168ms]\n", - "Epoch: [ 4/ 10], step: [ 94/ 390], loss: [0.3074], avg loss: [0.3726], time: [106.9517ms]\n", - "Epoch: [ 4/ 10], step: [ 95/ 390], loss: [0.2860], avg loss: [0.3717], time: [107.4505ms]\n", - "Epoch: [ 4/ 10], step: [ 96/ 390], loss: [0.3472], avg loss: [0.3715], time: [105.2582ms]\n", - "Epoch: [ 4/ 10], step: [ 97/ 390], loss: [0.4399], avg loss: [0.3722], time: [104.7673ms]\n", - "Epoch: [ 4/ 10], step: [ 98/ 390], loss: [0.2984], avg loss: [0.3714], time: [102.5717ms]\n", - "Epoch: [ 4/ 10], step: [ 99/ 390], loss: [0.5062], avg loss: [0.3728], time: [106.9460ms]\n", - "Epoch: [ 4/ 10], step: [ 100/ 390], loss: [0.5517], avg loss: [0.3746], time: [105.6576ms]\n", - "Epoch: [ 4/ 10], step: [ 101/ 390], loss: [0.5153], avg loss: [0.3760], time: [105.8364ms]\n", - "Epoch: [ 4/ 10], step: [ 102/ 390], loss: [0.4030], avg loss: [0.3762], time: [105.5653ms]\n", - "Epoch: [ 4/ 10], step: [ 103/ 390], loss: [0.3423], avg loss: [0.3759], time: [105.3305ms]\n", - "Epoch: [ 4/ 10], step: [ 104/ 390], loss: [0.5257], avg loss: [0.3773], time: [102.0906ms]\n", - "Epoch: [ 4/ 10], step: [ 105/ 390], loss: [0.3724], avg loss: [0.3773], time: [104.0003ms]\n", - "Epoch: [ 4/ 10], step: [ 106/ 390], loss: [0.3023], avg loss: [0.3766], time: [106.1201ms]\n", - "Epoch: [ 4/ 10], step: [ 107/ 390], loss: [0.3482], avg loss: [0.3763], time: [107.1177ms]\n", - "Epoch: [ 4/ 10], step: [ 108/ 390], loss: [0.3615], avg loss: [0.3762], time: [106.6062ms]\n", - "Epoch: [ 4/ 10], step: [ 109/ 390], loss: [0.4316], avg loss: [0.3767], time: [103.5779ms]\n", - "Epoch: [ 4/ 10], step: [ 110/ 390], loss: [0.3250], avg loss: [0.3762], time: [104.2085ms]\n", - "Epoch: [ 4/ 10], step: [ 111/ 390], loss: [0.4009], avg loss: [0.3764], time: [107.6093ms]\n", - "Epoch: [ 4/ 10], step: [ 112/ 390], loss: [0.3942], avg loss: [0.3766], time: [106.7224ms]\n", - "Epoch: [ 4/ 10], step: [ 113/ 390], loss: [0.2140], avg loss: [0.3752], time: [104.5911ms]\n", - "Epoch: [ 4/ 10], step: [ 114/ 390], loss: [0.4001], avg loss: [0.3754], time: [103.5399ms]\n", - "Epoch: [ 4/ 10], step: [ 115/ 390], loss: [0.4625], avg loss: [0.3761], time: [106.3213ms]\n", - "Epoch: [ 4/ 10], step: [ 116/ 390], loss: [0.3707], avg loss: [0.3761], time: [108.2938ms]\n", - "Epoch: [ 4/ 10], step: [ 117/ 390], loss: [0.5109], avg loss: [0.3772], time: [108.2978ms]\n", - "Epoch: [ 4/ 10], step: [ 118/ 390], loss: [0.3670], avg loss: [0.3772], time: [107.1320ms]\n", - "Epoch: [ 4/ 10], step: [ 119/ 390], loss: [0.3501], avg loss: [0.3769], time: [106.8544ms]\n", - "Epoch: [ 4/ 10], step: [ 120/ 390], loss: [0.3834], avg loss: [0.3770], time: [106.4882ms]\n", - "Epoch: [ 4/ 10], step: [ 121/ 390], loss: [0.3532], avg loss: [0.3768], time: [107.4162ms]\n", - "Epoch: [ 4/ 10], step: [ 122/ 390], loss: [0.3031], avg loss: [0.3762], time: [103.9529ms]\n", - "Epoch: [ 4/ 10], step: [ 123/ 390], loss: [0.3020], avg loss: [0.3756], time: [105.1888ms]\n", - "Epoch: [ 4/ 10], step: [ 124/ 390], loss: [0.2292], avg loss: [0.3744], time: [106.4696ms]\n", - "Epoch: [ 4/ 10], step: [ 125/ 390], loss: [0.4072], avg loss: [0.3747], time: [108.0911ms]\n", - "Epoch: [ 4/ 10], step: [ 126/ 390], loss: [0.3180], avg loss: [0.3742], time: [108.1126ms]\n", - "Epoch: [ 4/ 10], step: [ 127/ 390], loss: [0.3820], avg loss: [0.3743], time: [102.9165ms]\n", - "Epoch: [ 4/ 10], step: [ 128/ 390], loss: [0.4190], avg loss: [0.3746], time: [106.5381ms]\n", - "Epoch: [ 4/ 10], step: [ 129/ 390], loss: [0.2390], avg loss: [0.3736], time: [112.4887ms]\n", - "Epoch: [ 4/ 10], step: [ 130/ 390], loss: [0.3056], avg loss: [0.3731], time: [106.4742ms]\n", - "Epoch: [ 4/ 10], step: [ 131/ 390], loss: [0.3209], avg loss: [0.3727], time: [107.4369ms]\n", - "Epoch: [ 4/ 10], step: [ 132/ 390], loss: [0.3113], avg loss: [0.3722], time: [105.5963ms]\n", - "Epoch: [ 4/ 10], step: [ 133/ 390], loss: [0.2161], avg loss: [0.3710], time: [105.7923ms]\n", - "Epoch: [ 4/ 10], step: [ 134/ 390], loss: [0.3602], avg loss: [0.3709], time: [107.3329ms]\n", - "Epoch: [ 4/ 10], step: [ 135/ 390], loss: [0.3843], avg loss: [0.3710], time: [107.0983ms]\n", - "Epoch: [ 4/ 10], step: [ 136/ 390], loss: [0.4002], avg loss: [0.3712], time: [105.2377ms]\n", - "Epoch: [ 4/ 10], step: [ 137/ 390], loss: [0.3382], avg loss: [0.3710], time: [106.5493ms]\n", - "Epoch: [ 4/ 10], step: [ 138/ 390], loss: [0.4547], avg loss: [0.3716], time: [104.9621ms]\n", - "Epoch: [ 4/ 10], step: [ 139/ 390], loss: [0.4897], avg loss: [0.3725], time: [104.1026ms]\n", - "Epoch: [ 4/ 10], step: [ 140/ 390], loss: [0.2613], avg loss: [0.3717], time: [108.1474ms]\n", - "Epoch: [ 4/ 10], step: [ 141/ 390], loss: [0.3163], avg loss: [0.3713], time: [109.3452ms]\n", - "Epoch: [ 4/ 10], step: [ 142/ 390], loss: [0.3970], avg loss: [0.3715], time: [106.0774ms]\n", - "Epoch: [ 4/ 10], step: [ 143/ 390], loss: [0.4706], avg loss: [0.3722], time: [108.7382ms]\n", - "Epoch: [ 4/ 10], step: [ 144/ 390], loss: [0.2520], avg loss: [0.3713], time: [107.2977ms]\n", - "Epoch: [ 4/ 10], step: [ 145/ 390], loss: [0.2754], avg loss: [0.3707], time: [105.1550ms]\n", - "Epoch: [ 4/ 10], step: [ 146/ 390], loss: [0.3478], avg loss: [0.3705], time: [103.5702ms]\n", - "Epoch: [ 4/ 10], step: [ 147/ 390], loss: [0.3348], avg loss: [0.3703], time: [107.3720ms]\n", - "Epoch: [ 4/ 10], step: [ 148/ 390], loss: [0.4345], avg loss: [0.3707], time: [108.3019ms]\n", - "Epoch: [ 4/ 10], step: [ 149/ 390], loss: [0.2415], avg loss: [0.3698], time: [105.2392ms]\n", - "Epoch: [ 4/ 10], step: [ 150/ 390], loss: [0.4655], avg loss: [0.3705], time: [106.0359ms]\n", - "Epoch: [ 4/ 10], step: [ 151/ 390], loss: [0.3261], avg loss: [0.3702], time: [105.4292ms]\n", - "Epoch: [ 4/ 10], step: [ 152/ 390], loss: [0.5246], avg loss: [0.3712], time: [105.9971ms]\n", - "Epoch: [ 4/ 10], step: [ 153/ 390], loss: [0.4512], avg loss: [0.3717], time: [107.9559ms]\n", - "Epoch: [ 4/ 10], step: [ 154/ 390], loss: [0.2818], avg loss: [0.3711], time: [105.0174ms]\n", - "Epoch: [ 4/ 10], step: [ 155/ 390], loss: [0.4020], avg loss: [0.3713], time: [107.2338ms]\n", - "Epoch: [ 4/ 10], step: [ 156/ 390], loss: [0.3509], avg loss: [0.3712], time: [106.5938ms]\n", - "Epoch: [ 4/ 10], step: [ 157/ 390], loss: [0.5440], avg loss: [0.3723], time: [111.7208ms]\n", - "Epoch: [ 4/ 10], step: [ 158/ 390], loss: [0.3820], avg loss: [0.3724], time: [107.2078ms]\n", - "Epoch: [ 4/ 10], step: [ 159/ 390], loss: [0.3345], avg loss: [0.3721], time: [107.3508ms]\n" + "epoch: 4 step: 71, loss is 0.3085\n", + "epoch: 4 step: 72, loss is 0.2767\n", + "epoch: 4 step: 73, loss is 0.3353\n", + "epoch: 4 step: 74, loss is 0.4800\n", + "epoch: 4 step: 75, loss is 0.2814\n", + "epoch: 4 step: 76, loss is 0.4233\n", + "epoch: 4 step: 77, loss is 0.2641\n", + "epoch: 4 step: 78, loss is 0.3865\n", + "epoch: 4 step: 79, loss is 0.2459\n", + "epoch: 4 step: 80, loss is 0.4205\n", + "epoch: 4 step: 81, loss is 0.4781\n", + "epoch: 4 step: 82, loss is 0.5155\n", + "epoch: 4 step: 83, loss is 0.3062\n", + "epoch: 4 step: 84, loss is 0.4246\n", + "epoch: 4 step: 85, loss is 0.4452\n", + "epoch: 4 step: 86, loss is 0.4439\n", + "epoch: 4 step: 87, loss is 0.3794\n", + "epoch: 4 step: 88, loss is 0.4272\n", + "epoch: 4 step: 89, loss is 0.3608\n", + "epoch: 4 step: 90, loss is 0.3053\n", + "epoch: 4 step: 91, loss is 0.3505\n", + "epoch: 4 step: 92, loss is 0.2630\n", + "epoch: 4 step: 93, loss is 0.4086\n", + "epoch: 4 step: 94, loss is 0.3074\n", + "epoch: 4 step: 95, loss is 0.2860\n", + "epoch: 4 step: 96, loss is 0.3472\n", + "epoch: 4 step: 97, loss is 0.4399\n", + "epoch: 4 step: 98, loss is 0.2984\n", + "epoch: 4 step: 99, loss is 0.5062\n", + "epoch: 4 step: 100, loss is 0.5517\n", + "epoch: 4 step: 101, loss is 0.5153\n", + "epoch: 4 step: 102, loss is 0.4030\n", + "epoch: 4 step: 103, loss is 0.3423\n", + "epoch: 4 step: 104, loss is 0.5257\n", + "epoch: 4 step: 105, loss is 0.3724\n", + "epoch: 4 step: 106, loss is 0.3023\n", + "epoch: 4 step: 107, loss is 0.3482\n", + "epoch: 4 step: 108, loss is 0.3615\n", + "epoch: 4 step: 109, loss is 0.4316\n", + "epoch: 4 step: 110, loss is 0.3250\n", + "epoch: 4 step: 111, loss is 0.4009\n", + "epoch: 4 step: 112, loss is 0.3942\n", + "epoch: 4 step: 113, loss is 0.2140\n", + "epoch: 4 step: 114, loss is 0.4001\n", + "epoch: 4 step: 115, loss is 0.4625\n", + "epoch: 4 step: 116, loss is 0.3707\n", + "epoch: 4 step: 117, loss is 0.5109\n", + "epoch: 4 step: 118, loss is 0.3670\n", + "epoch: 4 step: 119, loss is 0.3501\n", + "epoch: 4 step: 120, loss is 0.3834\n", + "epoch: 4 step: 121, loss is 0.3532\n", + "epoch: 4 step: 122, loss is 0.3031\n", + "epoch: 4 step: 123, loss is 0.3020\n", + "epoch: 4 step: 124, loss is 0.2292\n", + "epoch: 4 step: 125, loss is 0.4072\n", + "epoch: 4 step: 126, loss is 0.3180\n", + "epoch: 4 step: 127, loss is 0.3820\n", + "epoch: 4 step: 128, loss is 0.4190\n", + "epoch: 4 step: 129, loss is 0.2390\n", + "epoch: 4 step: 130, loss is 0.3056\n", + "epoch: 4 step: 131, loss is 0.3209\n", + "epoch: 4 step: 132, loss is 0.3113\n", + "epoch: 4 step: 133, loss is 0.2161\n", + "epoch: 4 step: 134, loss is 0.3602\n", + "epoch: 4 step: 135, loss is 0.3843\n", + "epoch: 4 step: 136, loss is 0.4002\n", + "epoch: 4 step: 137, loss is 0.3382\n", + "epoch: 4 step: 138, loss is 0.4547\n", + "epoch: 4 step: 139, loss is 0.4897\n", + "epoch: 4 step: 140, loss is 0.2613\n", + "epoch: 4 step: 141, loss is 0.3163\n", + "epoch: 4 step: 142, loss is 0.3970\n", + "epoch: 4 step: 143, loss is 0.4706\n", + "epoch: 4 step: 144, loss is 0.2520\n", + "epoch: 4 step: 145, loss is 0.2754\n", + "epoch: 4 step: 146, loss is 0.3478\n", + "epoch: 4 step: 147, loss is 0.3348\n", + "epoch: 4 step: 148, loss is 0.4345\n", + "epoch: 4 step: 149, loss is 0.2415\n", + "epoch: 4 step: 150, loss is 0.4655\n", + "epoch: 4 step: 151, loss is 0.3261\n", + "epoch: 4 step: 152, loss is 0.5246\n", + "epoch: 4 step: 153, loss is 0.4512\n", + "epoch: 4 step: 154, loss is 0.2818\n", + "epoch: 4 step: 155, loss is 0.4020\n", + "epoch: 4 step: 156, loss is 0.3509\n", + "epoch: 4 step: 157, loss is 0.5440\n", + "epoch: 4 step: 158, loss is 0.3820\n", + "epoch: 4 step: 159, loss is 0.3345\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 4/ 10], step: [ 160/ 390], loss: [0.4387], avg loss: [0.3725], time: [105.3257ms]\n", - "Epoch: [ 4/ 10], step: [ 161/ 390], loss: [0.3441], avg loss: [0.3724], time: [105.7281ms]\n", - "Epoch: [ 4/ 10], step: [ 162/ 390], loss: [0.3684], avg loss: [0.3723], time: [105.7646ms]\n", - "Epoch: [ 4/ 10], step: [ 163/ 390], loss: [0.3465], avg loss: [0.3722], time: [106.7050ms]\n", - "Epoch: [ 4/ 10], step: [ 164/ 390], loss: [0.5299], avg loss: [0.3731], time: [105.3362ms]\n", - "Epoch: [ 4/ 10], step: [ 165/ 390], loss: [0.5045], avg loss: [0.3739], time: [106.9767ms]\n", - "Epoch: [ 4/ 10], step: [ 166/ 390], loss: [0.3958], avg loss: [0.3741], time: [106.9121ms]\n", - "Epoch: [ 4/ 10], step: [ 167/ 390], loss: [0.3517], avg loss: [0.3739], time: [107.1458ms]\n", - "Epoch: [ 4/ 10], step: [ 168/ 390], loss: [0.4668], avg loss: [0.3745], time: [107.9512ms]\n", - "Epoch: [ 4/ 10], step: [ 169/ 390], loss: [0.2722], avg loss: [0.3739], time: [102.9236ms]\n", - "Epoch: [ 4/ 10], step: [ 170/ 390], loss: [0.4252], avg loss: [0.3742], time: [104.2573ms]\n", - "Epoch: [ 4/ 10], step: [ 171/ 390], loss: [0.4219], avg loss: [0.3745], time: [109.7882ms]\n", - "Epoch: [ 4/ 10], step: [ 172/ 390], loss: [0.4034], avg loss: [0.3746], time: [107.9049ms]\n", - "Epoch: [ 4/ 10], step: [ 173/ 390], loss: [0.4636], avg loss: [0.3751], time: [105.2358ms]\n", - "Epoch: [ 4/ 10], step: [ 174/ 390], loss: [0.3881], avg loss: [0.3752], time: [107.3976ms]\n", - "Epoch: [ 4/ 10], step: [ 175/ 390], loss: [0.3162], avg loss: [0.3749], time: [103.8535ms]\n", - "Epoch: [ 4/ 10], step: [ 176/ 390], loss: [0.3936], avg loss: [0.3750], time: [107.2187ms]\n", - "Epoch: [ 4/ 10], step: [ 177/ 390], loss: [0.3591], avg loss: [0.3749], time: [106.3905ms]\n", - "Epoch: [ 4/ 10], step: [ 178/ 390], loss: [0.3104], avg loss: [0.3745], time: [106.2949ms]\n", - "Epoch: [ 4/ 10], step: [ 179/ 390], loss: [0.2385], avg loss: [0.3738], time: [105.2456ms]\n", - "Epoch: [ 4/ 10], step: [ 180/ 390], loss: [0.2899], avg loss: [0.3733], time: [105.3367ms]\n", - "Epoch: [ 4/ 10], step: [ 181/ 390], loss: [0.3091], avg loss: [0.3729], time: [106.3597ms]\n", - "Epoch: [ 4/ 10], step: [ 182/ 390], loss: [0.4573], avg loss: [0.3734], time: [105.8962ms]\n", - "Epoch: [ 4/ 10], step: [ 183/ 390], loss: [0.4415], avg loss: [0.3738], time: [105.4540ms]\n", - "Epoch: [ 4/ 10], step: [ 184/ 390], loss: [0.2995], avg loss: [0.3734], time: [103.1325ms]\n", - "Epoch: [ 4/ 10], step: [ 185/ 390], loss: [0.2719], avg loss: [0.3728], time: [107.6870ms]\n", - "Epoch: [ 4/ 10], step: [ 186/ 390], loss: [0.3571], avg loss: [0.3727], time: [106.4153ms]\n", - "Epoch: [ 4/ 10], step: [ 187/ 390], loss: [0.3442], avg loss: [0.3726], time: [108.4838ms]\n", - "Epoch: [ 4/ 10], step: [ 188/ 390], loss: [0.3863], avg loss: [0.3727], time: [105.6416ms]\n", - "Epoch: [ 4/ 10], step: [ 189/ 390], loss: [0.3299], avg loss: [0.3724], time: [107.1110ms]\n", - "Epoch: [ 4/ 10], step: [ 190/ 390], loss: [0.2998], avg loss: [0.3721], time: [104.6271ms]\n", - "Epoch: [ 4/ 10], step: [ 191/ 390], loss: [0.3399], avg loss: [0.3719], time: [109.9551ms]\n", - "Epoch: [ 4/ 10], step: [ 192/ 390], loss: [0.2481], avg loss: [0.3712], time: [106.9565ms]\n", - "Epoch: [ 4/ 10], step: [ 193/ 390], loss: [0.3842], avg loss: [0.3713], time: [105.9043ms]\n", - "Epoch: [ 4/ 10], step: [ 194/ 390], loss: [0.3805], avg loss: [0.3714], time: [107.5363ms]\n", - "Epoch: [ 4/ 10], step: [ 195/ 390], loss: [0.4114], avg loss: [0.3716], time: [106.0455ms]\n", - "Epoch: [ 4/ 10], step: [ 196/ 390], loss: [0.2850], avg loss: [0.3711], time: [106.7135ms]\n", - "Epoch: [ 4/ 10], step: [ 197/ 390], loss: [0.2693], avg loss: [0.3706], time: [107.2354ms]\n", - "Epoch: [ 4/ 10], step: [ 198/ 390], loss: [0.2606], avg loss: [0.3701], time: [104.0814ms]\n", - "Epoch: [ 4/ 10], step: [ 199/ 390], loss: [0.3752], avg loss: [0.3701], time: [105.1664ms]\n", - "Epoch: [ 4/ 10], step: [ 200/ 390], loss: [0.4419], avg loss: [0.3704], time: [107.0666ms]\n", - "Epoch: [ 4/ 10], step: [ 201/ 390], loss: [0.3777], avg loss: [0.3705], time: [106.7691ms]\n", - "Epoch: [ 4/ 10], step: [ 202/ 390], loss: [0.4244], avg loss: [0.3707], time: [105.9108ms]\n", - "Epoch: [ 4/ 10], step: [ 203/ 390], loss: [0.3185], avg loss: [0.3705], time: [108.5169ms]\n", - "Epoch: [ 4/ 10], step: [ 204/ 390], loss: [0.3078], avg loss: [0.3702], time: [104.2497ms]\n", - "Epoch: [ 4/ 10], step: [ 205/ 390], loss: [0.3949], avg loss: [0.3703], time: [104.3434ms]\n", - "Epoch: [ 4/ 10], step: [ 206/ 390], loss: [0.3288], avg loss: [0.3701], time: [104.9526ms]\n", - "Epoch: [ 4/ 10], step: [ 207/ 390], loss: [0.4153], avg loss: [0.3703], time: [104.3918ms]\n", - "Epoch: [ 4/ 10], step: [ 208/ 390], loss: [0.2307], avg loss: [0.3696], time: [106.4024ms]\n", - "Epoch: [ 4/ 10], step: [ 209/ 390], loss: [0.3982], avg loss: [0.3698], time: [105.8433ms]\n", - "Epoch: [ 4/ 10], step: [ 210/ 390], loss: [0.3027], avg loss: [0.3695], time: [106.5593ms]\n", - "Epoch: [ 4/ 10], step: [ 211/ 390], loss: [0.3901], avg loss: [0.3696], time: [104.8863ms]\n", - "Epoch: [ 4/ 10], step: [ 212/ 390], loss: [0.4023], avg loss: [0.3697], time: [106.0429ms]\n", - "Epoch: [ 4/ 10], step: [ 213/ 390], loss: [0.2610], avg loss: [0.3692], time: [105.1226ms]\n", - "Epoch: [ 4/ 10], step: [ 214/ 390], loss: [0.3141], avg loss: [0.3689], time: [106.7166ms]\n", - "Epoch: [ 4/ 10], step: [ 215/ 390], loss: [0.2775], avg loss: [0.3685], time: [107.2128ms]\n", - "Epoch: [ 4/ 10], step: [ 216/ 390], loss: [0.4507], avg loss: [0.3689], time: [107.1441ms]\n", - "Epoch: [ 4/ 10], step: [ 217/ 390], loss: [0.3489], avg loss: [0.3688], time: [109.1814ms]\n", - "Epoch: [ 4/ 10], step: [ 218/ 390], loss: [0.4935], avg loss: [0.3694], time: [105.3557ms]\n", - "Epoch: [ 4/ 10], step: [ 219/ 390], loss: [0.3538], avg loss: [0.3693], time: [108.0081ms]\n", - "Epoch: [ 4/ 10], step: [ 220/ 390], loss: [0.3235], avg loss: [0.3691], time: [106.7159ms]\n", - "Epoch: [ 4/ 10], step: [ 221/ 390], loss: [0.2939], avg loss: [0.3688], time: [105.4449ms]\n", - "Epoch: [ 4/ 10], step: [ 222/ 390], loss: [0.3348], avg loss: [0.3686], time: [105.8254ms]\n", - "Epoch: [ 4/ 10], step: [ 223/ 390], loss: [0.3916], avg loss: [0.3687], time: [106.7023ms]\n", - "Epoch: [ 4/ 10], step: [ 224/ 390], loss: [0.4481], avg loss: [0.3691], time: [104.8224ms]\n", - "Epoch: [ 4/ 10], step: [ 225/ 390], loss: [0.2748], avg loss: [0.3686], time: [104.7447ms]\n", - "Epoch: [ 4/ 10], step: [ 226/ 390], loss: [0.3481], avg loss: [0.3686], time: [105.0262ms]\n", - "Epoch: [ 4/ 10], step: [ 227/ 390], loss: [0.4186], avg loss: [0.3688], time: [109.0863ms]\n", - "Epoch: [ 4/ 10], step: [ 228/ 390], loss: [0.4347], avg loss: [0.3691], time: [108.5703ms]\n", - "Epoch: [ 4/ 10], step: [ 229/ 390], loss: [0.3251], avg loss: [0.3689], time: [104.6019ms]\n", - "Epoch: [ 4/ 10], step: [ 230/ 390], loss: [0.3473], avg loss: [0.3688], time: [106.7584ms]\n", - "Epoch: [ 4/ 10], step: [ 231/ 390], loss: [0.3915], avg loss: [0.3689], time: [106.0672ms]\n", - "Epoch: [ 4/ 10], step: [ 232/ 390], loss: [0.2889], avg loss: [0.3685], time: [106.2105ms]\n", - "Epoch: [ 4/ 10], step: [ 233/ 390], loss: [0.2659], avg loss: [0.3681], time: [105.5975ms]\n", - "Epoch: [ 4/ 10], step: [ 234/ 390], loss: [0.3052], avg loss: [0.3678], time: [108.1386ms]\n", - "Epoch: [ 4/ 10], step: [ 235/ 390], loss: [0.4258], avg loss: [0.3681], time: [103.0502ms]\n", - "Epoch: [ 4/ 10], step: [ 236/ 390], loss: [0.3783], avg loss: [0.3681], time: [108.2599ms]\n", - "Epoch: [ 4/ 10], step: [ 237/ 390], loss: [0.4851], avg loss: [0.3686], time: [107.0790ms]\n", - "Epoch: [ 4/ 10], step: [ 238/ 390], loss: [0.3114], avg loss: [0.3684], time: [106.1127ms]\n", - "Epoch: [ 4/ 10], step: [ 239/ 390], loss: [0.2487], avg loss: [0.3679], time: [107.1804ms]\n", - "Epoch: [ 4/ 10], step: [ 240/ 390], loss: [0.4030], avg loss: [0.3680], time: [106.5235ms]\n", - "Epoch: [ 4/ 10], step: [ 241/ 390], loss: [0.4842], avg loss: [0.3685], time: [105.6583ms]\n", - "Epoch: [ 4/ 10], step: [ 242/ 390], loss: [0.4098], avg loss: [0.3687], time: [106.6108ms]\n", - "Epoch: [ 4/ 10], step: [ 243/ 390], loss: [0.2414], avg loss: [0.3681], time: [108.7084ms]\n", - "Epoch: [ 4/ 10], step: [ 244/ 390], loss: [0.5210], avg loss: [0.3688], time: [106.1161ms]\n", - "Epoch: [ 4/ 10], step: [ 245/ 390], loss: [0.3267], avg loss: [0.3686], time: [108.6352ms]\n", - "Epoch: [ 4/ 10], step: [ 246/ 390], loss: [0.4094], avg loss: [0.3688], time: [107.4882ms]\n", - "Epoch: [ 4/ 10], step: [ 247/ 390], loss: [0.3241], avg loss: [0.3686], time: [107.6438ms]\n", - "Epoch: [ 4/ 10], step: [ 248/ 390], loss: [0.4039], avg loss: [0.3687], time: [109.0815ms]\n" + "epoch: 4 step: 160, loss is 0.4387\n", + "epoch: 4 step: 161, loss is 0.3441\n", + "epoch: 4 step: 162, loss is 0.3684\n", + "epoch: 4 step: 163, loss is 0.3465\n", + "epoch: 4 step: 164, loss is 0.5299\n", + "epoch: 4 step: 165, loss is 0.5045\n", + "epoch: 4 step: 166, loss is 0.3958\n", + "epoch: 4 step: 167, loss is 0.3517\n", + "epoch: 4 step: 168, loss is 0.4668\n", + "epoch: 4 step: 169, loss is 0.2722\n", + "epoch: 4 step: 170, loss is 0.4252\n", + "epoch: 4 step: 171, loss is 0.4219\n", + "epoch: 4 step: 172, loss is 0.4034\n", + "epoch: 4 step: 173, loss is 0.4636\n", + "epoch: 4 step: 174, loss is 0.3881\n", + "epoch: 4 step: 175, loss is 0.3162\n", + "epoch: 4 step: 176, loss is 0.3936\n", + "epoch: 4 step: 177, loss is 0.3591\n", + "epoch: 4 step: 178, loss is 0.3104\n", + "epoch: 4 step: 179, loss is 0.2385\n", + "epoch: 4 step: 180, loss is 0.2899\n", + "epoch: 4 step: 181, loss is 0.3091\n", + "epoch: 4 step: 182, loss is 0.4573\n", + "epoch: 4 step: 183, loss is 0.4415\n", + "epoch: 4 step: 184, loss is 0.2995\n", + "epoch: 4 step: 185, loss is 0.2719\n", + "epoch: 4 step: 186, loss is 0.3571\n", + "epoch: 4 step: 187, loss is 0.3442\n", + "epoch: 4 step: 188, loss is 0.3863\n", + "epoch: 4 step: 189, loss is 0.3299\n", + "epoch: 4 step: 190, loss is 0.2998\n", + "epoch: 4 step: 191, loss is 0.3399\n", + "epoch: 4 step: 192, loss is 0.2481\n", + "epoch: 4 step: 193, loss is 0.3842\n", + "epoch: 4 step: 194, loss is 0.3805\n", + "epoch: 4 step: 195, loss is 0.4114\n", + "epoch: 4 step: 196, loss is 0.2850\n", + "epoch: 4 step: 197, loss is 0.2693\n", + "epoch: 4 step: 198, loss is 0.2606\n", + "epoch: 4 step: 199, loss is 0.3752\n", + "epoch: 4 step: 200, loss is 0.4419\n", + "epoch: 4 step: 201, loss is 0.3777\n", + "epoch: 4 step: 202, loss is 0.4244\n", + "epoch: 4 step: 203, loss is 0.3185\n", + "epoch: 4 step: 204, loss is 0.3078\n", + "epoch: 4 step: 205, loss is 0.3949\n", + "epoch: 4 step: 206, loss is 0.3288\n", + "epoch: 4 step: 207, loss is 0.4153\n", + "epoch: 4 step: 208, loss is 0.2307\n", + "epoch: 4 step: 209, loss is 0.3982\n", + "epoch: 4 step: 210, loss is 0.3027\n", + "epoch: 4 step: 211, loss is 0.3901\n", + "epoch: 4 step: 212, loss is 0.4023\n", + "epoch: 4 step: 213, loss is 0.2610\n", + "epoch: 4 step: 214, loss is 0.3141\n", + "epoch: 4 step: 215, loss is 0.2775\n", + "epoch: 4 step: 216, loss is 0.4507\n", + "epoch: 4 step: 217, loss is 0.3489\n", + "epoch: 4 step: 218, loss is 0.4935\n", + "epoch: 4 step: 219, loss is 0.3538\n", + "epoch: 4 step: 220, loss is 0.3235\n", + "epoch: 4 step: 221, loss is 0.2939\n", + "epoch: 4 step: 222, loss is 0.3348\n", + "epoch: 4 step: 223, loss is 0.3916\n", + "epoch: 4 step: 224, loss is 0.4481\n", + "epoch: 4 step: 225, loss is 0.2748\n", + "epoch: 4 step: 226, loss is 0.3481\n", + "epoch: 4 step: 227, loss is 0.4186\n", + "epoch: 4 step: 228, loss is 0.4347\n", + "epoch: 4 step: 229, loss is 0.3251\n", + "epoch: 4 step: 230, loss is 0.3473\n", + "epoch: 4 step: 231, loss is 0.3915\n", + "epoch: 4 step: 232, loss is 0.2889\n", + "epoch: 4 step: 233, loss is 0.2659\n", + "epoch: 4 step: 234, loss is 0.3052\n", + "epoch: 4 step: 235, loss is 0.4258\n", + "epoch: 4 step: 236, loss is 0.3783\n", + "epoch: 4 step: 237, loss is 0.4851\n", + "epoch: 4 step: 238, loss is 0.3114\n", + "epoch: 4 step: 239, loss is 0.2487\n", + "epoch: 4 step: 240, loss is 0.4030\n", + "epoch: 4 step: 241, loss is 0.4842\n", + "epoch: 4 step: 242, loss is 0.4098\n", + "epoch: 4 step: 243, loss is 0.2414\n", + "epoch: 4 step: 244, loss is 0.5210\n", + "epoch: 4 step: 245, loss is 0.3267\n", + "epoch: 4 step: 246, loss is 0.4094\n", + "epoch: 4 step: 247, loss is 0.3241\n", + "epoch: 4 step: 248, loss is 0.4039\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 4/ 10], step: [ 249/ 390], loss: [0.2710], avg loss: [0.3683], time: [106.2400ms]\n", - "Epoch: [ 4/ 10], step: [ 250/ 390], loss: [0.3260], avg loss: [0.3682], time: [103.6398ms]\n", - "Epoch: [ 4/ 10], step: [ 251/ 390], loss: [0.3744], avg loss: [0.3682], time: [108.1443ms]\n", - "Epoch: [ 4/ 10], step: [ 252/ 390], loss: [0.2942], avg loss: [0.3679], time: [103.0304ms]\n", - "Epoch: [ 4/ 10], step: [ 253/ 390], loss: [0.4133], avg loss: [0.3681], time: [103.5023ms]\n", - "Epoch: [ 4/ 10], step: [ 254/ 390], loss: [0.2983], avg loss: [0.3678], time: [109.1344ms]\n", - "Epoch: [ 4/ 10], step: [ 255/ 390], loss: [0.4217], avg loss: [0.3680], time: [103.4021ms]\n", - "Epoch: [ 4/ 10], step: [ 256/ 390], loss: [0.3493], avg loss: [0.3679], time: [105.2632ms]\n", - "Epoch: [ 4/ 10], step: [ 257/ 390], loss: [0.2805], avg loss: [0.3676], time: [110.6668ms]\n", - "Epoch: [ 4/ 10], step: [ 258/ 390], loss: [0.3151], avg loss: [0.3674], time: [108.0148ms]\n", - "Epoch: [ 4/ 10], step: [ 259/ 390], loss: [0.3350], avg loss: [0.3673], time: [104.0602ms]\n", - "Epoch: [ 4/ 10], step: [ 260/ 390], loss: [0.5220], avg loss: [0.3679], time: [108.6056ms]\n", - "Epoch: [ 4/ 10], step: [ 261/ 390], loss: [0.2808], avg loss: [0.3675], time: [107.3999ms]\n", - "Epoch: [ 4/ 10], step: [ 262/ 390], loss: [0.2904], avg loss: [0.3672], time: [104.6116ms]\n", - "Epoch: [ 4/ 10], step: [ 263/ 390], loss: [0.4144], avg loss: [0.3674], time: [108.1769ms]\n", - "Epoch: [ 4/ 10], step: [ 264/ 390], loss: [0.3710], avg loss: [0.3674], time: [107.1186ms]\n", - "Epoch: [ 4/ 10], step: [ 265/ 390], loss: [0.2993], avg loss: [0.3672], time: [106.6625ms]\n", - "Epoch: [ 4/ 10], step: [ 266/ 390], loss: [0.3192], avg loss: [0.3670], time: [105.6526ms]\n", - "Epoch: [ 4/ 10], step: [ 267/ 390], loss: [0.2591], avg loss: [0.3666], time: [109.6139ms]\n", - "Epoch: [ 4/ 10], step: [ 268/ 390], loss: [0.4449], avg loss: [0.3669], time: [109.1521ms]\n", - "Epoch: [ 4/ 10], step: [ 269/ 390], loss: [0.3405], avg loss: [0.3668], time: [103.2643ms]\n", - "Epoch: [ 4/ 10], step: [ 270/ 390], loss: [0.3951], avg loss: [0.3669], time: [105.6435ms]\n", - "Epoch: [ 4/ 10], step: [ 271/ 390], loss: [0.3147], avg loss: [0.3667], time: [107.1754ms]\n", - "Epoch: [ 4/ 10], step: [ 272/ 390], loss: [0.3204], avg loss: [0.3665], time: [104.6278ms]\n", - "Epoch: [ 4/ 10], step: [ 273/ 390], loss: [0.5377], avg loss: [0.3671], time: [105.9399ms]\n", - "Epoch: [ 4/ 10], step: [ 274/ 390], loss: [0.3847], avg loss: [0.3672], time: [109.0810ms]\n", - "Epoch: [ 4/ 10], step: [ 275/ 390], loss: [0.4134], avg loss: [0.3674], time: [105.8214ms]\n", - "Epoch: [ 4/ 10], step: [ 276/ 390], loss: [0.3202], avg loss: [0.3672], time: [106.6880ms]\n", - "Epoch: [ 4/ 10], step: [ 277/ 390], loss: [0.3618], avg loss: [0.3672], time: [101.3110ms]\n", - "Epoch: [ 4/ 10], step: [ 278/ 390], loss: [0.4502], avg loss: [0.3675], time: [104.4438ms]\n", - "Epoch: [ 4/ 10], step: [ 279/ 390], loss: [0.3401], avg loss: [0.3674], time: [104.1548ms]\n", - "Epoch: [ 4/ 10], step: [ 280/ 390], loss: [0.4656], avg loss: [0.3677], time: [106.7429ms]\n", - "Epoch: [ 4/ 10], step: [ 281/ 390], loss: [0.4343], avg loss: [0.3680], time: [107.2030ms]\n", - "Epoch: [ 4/ 10], step: [ 282/ 390], loss: [0.3462], avg loss: [0.3679], time: [104.3711ms]\n", - "Epoch: [ 4/ 10], step: [ 283/ 390], loss: [0.3591], avg loss: [0.3679], time: [103.3731ms]\n", - "Epoch: [ 4/ 10], step: [ 284/ 390], loss: [0.2983], avg loss: [0.3676], time: [103.8666ms]\n", - "Epoch: [ 4/ 10], step: [ 285/ 390], loss: [0.4017], avg loss: [0.3677], time: [104.7235ms]\n", - "Epoch: [ 4/ 10], step: [ 286/ 390], loss: [0.2940], avg loss: [0.3675], time: [105.6240ms]\n", - "Epoch: [ 4/ 10], step: [ 287/ 390], loss: [0.4052], avg loss: [0.3676], time: [107.6298ms]\n", - "Epoch: [ 4/ 10], step: [ 288/ 390], loss: [0.2970], avg loss: [0.3674], time: [106.0195ms]\n", - "Epoch: [ 4/ 10], step: [ 289/ 390], loss: [0.4640], avg loss: [0.3677], time: [108.1021ms]\n", - "Epoch: [ 4/ 10], step: [ 290/ 390], loss: [0.2613], avg loss: [0.3673], time: [105.8490ms]\n", - "Epoch: [ 4/ 10], step: [ 291/ 390], loss: [0.2677], avg loss: [0.3670], time: [106.0126ms]\n", - "Epoch: [ 4/ 10], step: [ 292/ 390], loss: [0.3928], avg loss: [0.3671], time: [104.8071ms]\n", - "Epoch: [ 4/ 10], step: [ 293/ 390], loss: [0.3033], avg loss: [0.3669], time: [107.0890ms]\n", - "Epoch: [ 4/ 10], step: [ 294/ 390], loss: [0.3590], avg loss: [0.3668], time: [105.8483ms]\n", - "Epoch: [ 4/ 10], step: [ 295/ 390], loss: [0.6220], avg loss: [0.3677], time: [108.0527ms]\n", - "Epoch: [ 4/ 10], step: [ 296/ 390], loss: [0.4165], avg loss: [0.3679], time: [109.5693ms]\n", - "Epoch: [ 4/ 10], step: [ 297/ 390], loss: [0.3620], avg loss: [0.3679], time: [108.3882ms]\n", - "Epoch: [ 4/ 10], step: [ 298/ 390], loss: [0.3527], avg loss: [0.3678], time: [104.9139ms]\n", - "Epoch: [ 4/ 10], step: [ 299/ 390], loss: [0.3109], avg loss: [0.3676], time: [107.6729ms]\n", - "Epoch: [ 4/ 10], step: [ 300/ 390], loss: [0.4211], avg loss: [0.3678], time: [105.7062ms]\n", - "Epoch: [ 4/ 10], step: [ 301/ 390], loss: [0.3927], avg loss: [0.3679], time: [107.1277ms]\n", - "Epoch: [ 4/ 10], step: [ 302/ 390], loss: [0.3385], avg loss: [0.3678], time: [105.7725ms]\n", - "Epoch: [ 4/ 10], step: [ 303/ 390], loss: [0.3242], avg loss: [0.3676], time: [104.2752ms]\n", - "Epoch: [ 4/ 10], step: [ 304/ 390], loss: [0.3999], avg loss: [0.3677], time: [106.4508ms]\n", - "Epoch: [ 4/ 10], step: [ 305/ 390], loss: [0.2473], avg loss: [0.3673], time: [107.9853ms]\n", - "Epoch: [ 4/ 10], step: [ 306/ 390], loss: [0.4007], avg loss: [0.3675], time: [106.3206ms]\n", - "Epoch: [ 4/ 10], step: [ 307/ 390], loss: [0.3748], avg loss: [0.3675], time: [105.5491ms]\n", - "Epoch: [ 4/ 10], step: [ 308/ 390], loss: [0.3003], avg loss: [0.3673], time: [104.3818ms]\n", - "Epoch: [ 4/ 10], step: [ 309/ 390], loss: [0.4165], avg loss: [0.3674], time: [106.9283ms]\n", - "Epoch: [ 4/ 10], step: [ 310/ 390], loss: [0.2449], avg loss: [0.3670], time: [106.4966ms]\n", - "Epoch: [ 4/ 10], step: [ 311/ 390], loss: [0.3170], avg loss: [0.3669], time: [106.8151ms]\n", - "Epoch: [ 4/ 10], step: [ 312/ 390], loss: [0.3388], avg loss: [0.3668], time: [106.7402ms]\n", - "Epoch: [ 4/ 10], step: [ 313/ 390], loss: [0.5385], avg loss: [0.3673], time: [103.4353ms]\n", - "Epoch: [ 4/ 10], step: [ 314/ 390], loss: [0.3794], avg loss: [0.3674], time: [104.6982ms]\n", - "Epoch: [ 4/ 10], step: [ 315/ 390], loss: [0.2365], avg loss: [0.3669], time: [107.2829ms]\n", - "Epoch: [ 4/ 10], step: [ 316/ 390], loss: [0.4281], avg loss: [0.3671], time: [104.3134ms]\n", - "Epoch: [ 4/ 10], step: [ 317/ 390], loss: [0.3258], avg loss: [0.3670], time: [107.7969ms]\n", - "Epoch: [ 4/ 10], step: [ 318/ 390], loss: [0.4437], avg loss: [0.3672], time: [107.4538ms]\n", - "Epoch: [ 4/ 10], step: [ 319/ 390], loss: [0.3517], avg loss: [0.3672], time: [106.1678ms]\n", - "Epoch: [ 4/ 10], step: [ 320/ 390], loss: [0.3266], avg loss: [0.3671], time: [107.7993ms]\n", - "Epoch: [ 4/ 10], step: [ 321/ 390], loss: [0.3717], avg loss: [0.3671], time: [105.9566ms]\n", - "Epoch: [ 4/ 10], step: [ 322/ 390], loss: [0.4069], avg loss: [0.3672], time: [108.2475ms]\n", - "Epoch: [ 4/ 10], step: [ 323/ 390], loss: [0.3395], avg loss: [0.3671], time: [108.8541ms]\n", - "Epoch: [ 4/ 10], step: [ 324/ 390], loss: [0.4231], avg loss: [0.3673], time: [105.6015ms]\n", - "Epoch: [ 4/ 10], step: [ 325/ 390], loss: [0.4355], avg loss: [0.3675], time: [108.5124ms]\n", - "Epoch: [ 4/ 10], step: [ 326/ 390], loss: [0.2874], avg loss: [0.3673], time: [105.7117ms]\n", - "Epoch: [ 4/ 10], step: [ 327/ 390], loss: [0.3945], avg loss: [0.3673], time: [109.5996ms]\n", - "Epoch: [ 4/ 10], step: [ 328/ 390], loss: [0.3845], avg loss: [0.3674], time: [106.7383ms]\n", - "Epoch: [ 4/ 10], step: [ 329/ 390], loss: [0.4375], avg loss: [0.3676], time: [107.0645ms]\n", - "Epoch: [ 4/ 10], step: [ 330/ 390], loss: [0.3023], avg loss: [0.3674], time: [108.0580ms]\n", - "Epoch: [ 4/ 10], step: [ 331/ 390], loss: [0.4047], avg loss: [0.3675], time: [106.3502ms]\n", - "Epoch: [ 4/ 10], step: [ 332/ 390], loss: [0.3946], avg loss: [0.3676], time: [107.4476ms]\n", - "Epoch: [ 4/ 10], step: [ 333/ 390], loss: [0.3176], avg loss: [0.3675], time: [105.3164ms]\n", - "Epoch: [ 4/ 10], step: [ 334/ 390], loss: [0.4214], avg loss: [0.3676], time: [104.3229ms]\n", - "Epoch: [ 4/ 10], step: [ 335/ 390], loss: [0.4775], avg loss: [0.3679], time: [108.3291ms]\n", - "Epoch: [ 4/ 10], step: [ 336/ 390], loss: [0.3526], avg loss: [0.3679], time: [108.1722ms]\n", - "Epoch: [ 4/ 10], step: [ 337/ 390], loss: [0.4519], avg loss: [0.3681], time: [110.6203ms]\n" + "epoch: 4 step: 249, loss is 0.2710\n", + "epoch: 4 step: 250, loss is 0.3260\n", + "epoch: 4 step: 251, loss is 0.3744\n", + "epoch: 4 step: 252, loss is 0.2942\n", + "epoch: 4 step: 253, loss is 0.4133\n", + "epoch: 4 step: 254, loss is 0.2983\n", + "epoch: 4 step: 255, loss is 0.4217\n", + "epoch: 4 step: 256, loss is 0.3493\n", + "epoch: 4 step: 257, loss is 0.2805\n", + "epoch: 4 step: 258, loss is 0.3151\n", + "epoch: 4 step: 259, loss is 0.3350\n", + "epoch: 4 step: 260, loss is 0.5220\n", + "epoch: 4 step: 261, loss is 0.2808\n", + "epoch: 4 step: 262, loss is 0.2904\n", + "epoch: 4 step: 263, loss is 0.4144\n", + "epoch: 4 step: 264, loss is 0.3710\n", + "epoch: 4 step: 265, loss is 0.2993\n", + "epoch: 4 step: 266, loss is 0.3192\n", + "epoch: 4 step: 267, loss is 0.2591\n", + "epoch: 4 step: 268, loss is 0.4449\n", + "epoch: 4 step: 269, loss is 0.3405\n", + "epoch: 4 step: 270, loss is 0.3951\n", + "epoch: 4 step: 271, loss is 0.3147\n", + "epoch: 4 step: 272, loss is 0.3204\n", + "epoch: 4 step: 273, loss is 0.5377\n", + "epoch: 4 step: 274, loss is 0.3847\n", + "epoch: 4 step: 275, loss is 0.4134\n", + "epoch: 4 step: 276, loss is 0.3202\n", + "epoch: 4 step: 277, loss is 0.3618\n", + "epoch: 4 step: 278, loss is 0.4502\n", + "epoch: 4 step: 279, loss is 0.3401\n", + "epoch: 4 step: 280, loss is 0.4656\n", + "epoch: 4 step: 281, loss is 0.4343\n", + "epoch: 4 step: 282, loss is 0.3462\n", + "epoch: 4 step: 283, loss is 0.3591\n", + "epoch: 4 step: 284, loss is 0.2983\n", + "epoch: 4 step: 285, loss is 0.4017\n", + "epoch: 4 step: 286, loss is 0.2940\n", + "epoch: 4 step: 287, loss is 0.4052\n", + "epoch: 4 step: 288, loss is 0.2970\n", + "epoch: 4 step: 289, loss is 0.4640\n", + "epoch: 4 step: 290, loss is 0.2613\n", + "epoch: 4 step: 291, loss is 0.2677\n", + "epoch: 4 step: 292, loss is 0.3928\n", + "epoch: 4 step: 293, loss is 0.3033\n", + "epoch: 4 step: 294, loss is 0.3590\n", + "epoch: 4 step: 295, loss is 0.6220\n", + "epoch: 4 step: 296, loss is 0.4165\n", + "epoch: 4 step: 297, loss is 0.3620\n", + "epoch: 4 step: 298, loss is 0.3527\n", + "epoch: 4 step: 299, loss is 0.3109\n", + "epoch: 4 step: 300, loss is 0.4211\n", + "epoch: 4 step: 301, loss is 0.3927\n", + "epoch: 4 step: 302, loss is 0.3385\n", + "epoch: 4 step: 303, loss is 0.3242\n", + "epoch: 4 step: 304, loss is 0.3999\n", + "epoch: 4 step: 305, loss is 0.2473\n", + "epoch: 4 step: 306, loss is 0.4007\n", + "epoch: 4 step: 307, loss is 0.3748\n", + "epoch: 4 step: 308, loss is 0.3003\n", + "epoch: 4 step: 309, loss is 0.4165\n", + "epoch: 4 step: 310, loss is 0.2449\n", + "epoch: 4 step: 311, loss is 0.3170\n", + "epoch: 4 step: 312, loss is 0.3388\n", + "epoch: 4 step: 313, loss is 0.5385\n", + "epoch: 4 step: 314, loss is 0.3794\n", + "epoch: 4 step: 315, loss is 0.2365\n", + "epoch: 4 step: 316, loss is 0.4281\n", + "epoch: 4 step: 317, loss is 0.3258\n", + "epoch: 4 step: 318, loss is 0.4437\n", + "epoch: 4 step: 319, loss is 0.3517\n", + "epoch: 4 step: 320, loss is 0.3266\n", + "epoch: 4 step: 321, loss is 0.3717\n", + "epoch: 4 step: 322, loss is 0.4069\n", + "epoch: 4 step: 323, loss is 0.3395\n", + "epoch: 4 step: 324, loss is 0.4231\n", + "epoch: 4 step: 325, loss is 0.4355\n", + "epoch: 4 step: 326, loss is 0.2874\n", + "epoch: 4 step: 327, loss is 0.3945\n", + "epoch: 4 step: 328, loss is 0.3845\n", + "epoch: 4 step: 329, loss is 0.4375\n", + "epoch: 4 step: 330, loss is 0.3023\n", + "epoch: 4 step: 331, loss is 0.4047\n", + "epoch: 4 step: 332, loss is 0.3946\n", + "epoch: 4 step: 333, loss is 0.3176\n", + "epoch: 4 step: 334, loss is 0.4214\n", + "epoch: 4 step: 335, loss is 0.4775\n", + "epoch: 4 step: 336, loss is 0.3526\n", + "epoch: 4 step: 337, loss is 0.4519\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 4/ 10], step: [ 338/ 390], loss: [0.4460], avg loss: [0.3684], time: [105.4652ms]\n", - "Epoch: [ 4/ 10], step: [ 339/ 390], loss: [0.3561], avg loss: [0.3683], time: [105.1250ms]\n", - "Epoch: [ 4/ 10], step: [ 340/ 390], loss: [0.5193], avg loss: [0.3688], time: [104.8603ms]\n", - "Epoch: [ 4/ 10], step: [ 341/ 390], loss: [0.4446], avg loss: [0.3690], time: [105.6721ms]\n", - "Epoch: [ 4/ 10], step: [ 342/ 390], loss: [0.3434], avg loss: [0.3689], time: [105.7959ms]\n", - "Epoch: [ 4/ 10], step: [ 343/ 390], loss: [0.3595], avg loss: [0.3689], time: [105.6604ms]\n", - "Epoch: [ 4/ 10], step: [ 344/ 390], loss: [0.4241], avg loss: [0.3691], time: [107.3353ms]\n", - "Epoch: [ 4/ 10], step: [ 345/ 390], loss: [0.2956], avg loss: [0.3689], time: [110.8987ms]\n", - "Epoch: [ 4/ 10], step: [ 346/ 390], loss: [0.3377], avg loss: [0.3688], time: [107.3465ms]\n", - "Epoch: [ 4/ 10], step: [ 347/ 390], loss: [0.3574], avg loss: [0.3687], time: [109.6387ms]\n", - "Epoch: [ 4/ 10], step: [ 348/ 390], loss: [0.4708], avg loss: [0.3690], time: [106.5981ms]\n", - "Epoch: [ 4/ 10], step: [ 349/ 390], loss: [0.4382], avg loss: [0.3692], time: [108.3109ms]\n", - "Epoch: [ 4/ 10], step: [ 350/ 390], loss: [0.3674], avg loss: [0.3692], time: [106.4668ms]\n", - "Epoch: [ 4/ 10], step: [ 351/ 390], loss: [0.5617], avg loss: [0.3698], time: [107.5759ms]\n", - "Epoch: [ 4/ 10], step: [ 352/ 390], loss: [0.3479], avg loss: [0.3697], time: [104.9974ms]\n", - "Epoch: [ 4/ 10], step: [ 353/ 390], loss: [0.4457], avg loss: [0.3699], time: [104.9132ms]\n", - "Epoch: [ 4/ 10], step: [ 354/ 390], loss: [0.4470], avg loss: [0.3701], time: [104.7776ms]\n", - "Epoch: [ 4/ 10], step: [ 355/ 390], loss: [0.3042], avg loss: [0.3700], time: [105.3958ms]\n", - "Epoch: [ 4/ 10], step: [ 356/ 390], loss: [0.4274], avg loss: [0.3701], time: [103.8568ms]\n", - "Epoch: [ 4/ 10], step: [ 357/ 390], loss: [0.3954], avg loss: [0.3702], time: [107.7995ms]\n", - "Epoch: [ 4/ 10], step: [ 358/ 390], loss: [0.3816], avg loss: [0.3702], time: [106.2849ms]\n", - "Epoch: [ 4/ 10], step: [ 359/ 390], loss: [0.3290], avg loss: [0.3701], time: [109.7863ms]\n", - "Epoch: [ 4/ 10], step: [ 360/ 390], loss: [0.3382], avg loss: [0.3700], time: [105.7122ms]\n", - "Epoch: [ 4/ 10], step: [ 361/ 390], loss: [0.4071], avg loss: [0.3701], time: [106.6349ms]\n", - "Epoch: [ 4/ 10], step: [ 362/ 390], loss: [0.3767], avg loss: [0.3701], time: [104.5048ms]\n", - "Epoch: [ 4/ 10], step: [ 363/ 390], loss: [0.4927], avg loss: [0.3705], time: [106.5168ms]\n", - "Epoch: [ 4/ 10], step: [ 364/ 390], loss: [0.3349], avg loss: [0.3704], time: [105.2248ms]\n", - "Epoch: [ 4/ 10], step: [ 365/ 390], loss: [0.3436], avg loss: [0.3703], time: [106.6382ms]\n", - "Epoch: [ 4/ 10], step: [ 366/ 390], loss: [0.2961], avg loss: [0.3701], time: [104.9166ms]\n", - "Epoch: [ 4/ 10], step: [ 367/ 390], loss: [0.2820], avg loss: [0.3699], time: [106.4293ms]\n", - "Epoch: [ 4/ 10], step: [ 368/ 390], loss: [0.3242], avg loss: [0.3697], time: [104.6309ms]\n", - "Epoch: [ 4/ 10], step: [ 369/ 390], loss: [0.3750], avg loss: [0.3697], time: [108.9773ms]\n", - "Epoch: [ 4/ 10], step: [ 370/ 390], loss: [0.4032], avg loss: [0.3698], time: [106.9911ms]\n", - "Epoch: [ 4/ 10], step: [ 371/ 390], loss: [0.2909], avg loss: [0.3696], time: [109.1559ms]\n", - "Epoch: [ 4/ 10], step: [ 372/ 390], loss: [0.3955], avg loss: [0.3697], time: [108.0177ms]\n", - "Epoch: [ 4/ 10], step: [ 373/ 390], loss: [0.2918], avg loss: [0.3695], time: [110.2009ms]\n", - "Epoch: [ 4/ 10], step: [ 374/ 390], loss: [0.3997], avg loss: [0.3696], time: [106.7991ms]\n", - "Epoch: [ 4/ 10], step: [ 375/ 390], loss: [0.3154], avg loss: [0.3694], time: [108.1967ms]\n", - "Epoch: [ 4/ 10], step: [ 376/ 390], loss: [0.3779], avg loss: [0.3694], time: [107.9221ms]\n", - "Epoch: [ 4/ 10], step: [ 377/ 390], loss: [0.3876], avg loss: [0.3695], time: [105.7892ms]\n", - "Epoch: [ 4/ 10], step: [ 378/ 390], loss: [0.5116], avg loss: [0.3699], time: [103.4391ms]\n", - "Epoch: [ 4/ 10], step: [ 379/ 390], loss: [0.2980], avg loss: [0.3697], time: [106.8671ms]\n", - "Epoch: [ 4/ 10], step: [ 380/ 390], loss: [0.2813], avg loss: [0.3694], time: [105.5164ms]\n", - "Epoch: [ 4/ 10], step: [ 381/ 390], loss: [0.2438], avg loss: [0.3691], time: [108.1378ms]\n", - "Epoch: [ 4/ 10], step: [ 382/ 390], loss: [0.3873], avg loss: [0.3692], time: [106.9922ms]\n", - "Epoch: [ 4/ 10], step: [ 383/ 390], loss: [0.3675], avg loss: [0.3692], time: [108.4902ms]\n", - "Epoch: [ 4/ 10], step: [ 384/ 390], loss: [0.4243], avg loss: [0.3693], time: [104.8622ms]\n", - "Epoch: [ 4/ 10], step: [ 385/ 390], loss: [0.3276], avg loss: [0.3692], time: [107.1630ms]\n", - "Epoch: [ 4/ 10], step: [ 386/ 390], loss: [0.2505], avg loss: [0.3689], time: [105.1219ms]\n", - "Epoch: [ 4/ 10], step: [ 387/ 390], loss: [0.2351], avg loss: [0.3685], time: [108.8781ms]\n", - "Epoch: [ 4/ 10], step: [ 388/ 390], loss: [0.2487], avg loss: [0.3682], time: [105.7706ms]\n", - "Epoch: [ 4/ 10], step: [ 389/ 390], loss: [0.3252], avg loss: [0.3681], time: [103.2357ms]\n", - "Epoch: [ 4/ 10], step: [ 390/ 390], loss: [0.3969], avg loss: [0.3682], time: [920.7304ms]\n", - "Epoch time: 42451.183, per step time: 108.849\n", + "epoch: 4 step: 338, loss is 0.4460\n", + "epoch: 4 step: 339, loss is 0.3561\n", + "epoch: 4 step: 340, loss is 0.5193\n", + "epoch: 4 step: 341, loss is 0.4446\n", + "epoch: 4 step: 342, loss is 0.3434\n", + "epoch: 4 step: 343, loss is 0.3595\n", + "epoch: 4 step: 344, loss is 0.4241\n", + "epoch: 4 step: 345, loss is 0.2956\n", + "epoch: 4 step: 346, loss is 0.3377\n", + "epoch: 4 step: 347, loss is 0.3574\n", + "epoch: 4 step: 348, loss is 0.4708\n", + "epoch: 4 step: 349, loss is 0.4382\n", + "epoch: 4 step: 350, loss is 0.3674\n", + "epoch: 4 step: 351, loss is 0.5617\n", + "epoch: 4 step: 352, loss is 0.3479\n", + "epoch: 4 step: 353, loss is 0.4457\n", + "epoch: 4 step: 354, loss is 0.4470\n", + "epoch: 4 step: 355, loss is 0.3042\n", + "epoch: 4 step: 356, loss is 0.4274\n", + "epoch: 4 step: 357, loss is 0.3954\n", + "epoch: 4 step: 358, loss is 0.3816\n", + "epoch: 4 step: 359, loss is 0.3290\n", + "epoch: 4 step: 360, loss is 0.3382\n", + "epoch: 4 step: 361, loss is 0.4071\n", + "epoch: 4 step: 362, loss is 0.3767\n", + "epoch: 4 step: 363, loss is 0.4927\n", + "epoch: 4 step: 364, loss is 0.3349\n", + "epoch: 4 step: 365, loss is 0.3436\n", + "epoch: 4 step: 366, loss is 0.2961\n", + "epoch: 4 step: 367, loss is 0.2820\n", + "epoch: 4 step: 368, loss is 0.3242\n", + "epoch: 4 step: 369, loss is 0.3750\n", + "epoch: 4 step: 370, loss is 0.4032\n", + "epoch: 4 step: 371, loss is 0.2909\n", + "epoch: 4 step: 372, loss is 0.3955\n", + "epoch: 4 step: 373, loss is 0.2918\n", + "epoch: 4 step: 374, loss is 0.3997\n", + "epoch: 4 step: 375, loss is 0.3154\n", + "epoch: 4 step: 376, loss is 0.3779\n", + "epoch: 4 step: 377, loss is 0.3876\n", + "epoch: 4 step: 378, loss is 0.5116\n", + "epoch: 4 step: 379, loss is 0.2980\n", + "epoch: 4 step: 380, loss is 0.2813\n", + "epoch: 4 step: 381, loss is 0.2438\n", + "epoch: 4 step: 382, loss is 0.3873\n", + "epoch: 4 step: 383, loss is 0.3675\n", + "epoch: 4 step: 384, loss is 0.4243\n", + "epoch: 4 step: 385, loss is 0.3276\n", + "epoch: 4 step: 386, loss is 0.2505\n", + "epoch: 4 step: 387, loss is 0.2351\n", + "epoch: 4 step: 388, loss is 0.2487\n", + "epoch: 4 step: 389, loss is 0.3252\n", + "epoch: 4 step: 390, loss is 0.3969\n", "Epoch time: 42451.503, per step time: 108.850, avg loss: 0.368\n", "************************************************************\n", - "Epoch: [ 5/ 10], step: [ 1/ 390], loss: [0.2794], avg loss: [0.2794], time: [98.0172ms]\n", - "Epoch: [ 5/ 10], step: [ 2/ 390], loss: [0.2933], avg loss: [0.2863], time: [99.0131ms]\n", - "Epoch: [ 5/ 10], step: [ 3/ 390], loss: [0.3252], avg loss: [0.2993], time: [99.0434ms]\n", - "Epoch: [ 5/ 10], step: [ 4/ 390], loss: [0.4135], avg loss: [0.3279], time: [102.1309ms]\n", - "Epoch: [ 5/ 10], step: [ 5/ 390], loss: [0.3011], avg loss: [0.3225], time: [98.5022ms]\n", - "Epoch: [ 5/ 10], step: [ 6/ 390], loss: [0.2266], avg loss: [0.3065], time: [98.1512ms]\n", - "Epoch: [ 5/ 10], step: [ 7/ 390], loss: [0.3133], avg loss: [0.3075], time: [100.8203ms]\n", - "Epoch: [ 5/ 10], step: [ 8/ 390], loss: [0.3449], avg loss: [0.3122], time: [100.8372ms]\n", - "Epoch: [ 5/ 10], step: [ 9/ 390], loss: [0.3031], avg loss: [0.3112], time: [99.9768ms]\n", - "Epoch: [ 5/ 10], step: [ 10/ 390], loss: [0.3289], avg loss: [0.3129], time: [101.7046ms]\n", - "Epoch: [ 5/ 10], step: [ 11/ 390], loss: [0.3923], avg loss: [0.3201], time: [98.9933ms]\n", - "Epoch: [ 5/ 10], step: [ 12/ 390], loss: [0.3127], avg loss: [0.3195], time: [99.1669ms]\n", - "Epoch: [ 5/ 10], step: [ 13/ 390], loss: [0.3678], avg loss: [0.3232], time: [102.7796ms]\n", - "Epoch: [ 5/ 10], step: [ 14/ 390], loss: [0.3622], avg loss: [0.3260], time: [98.2735ms]\n", - "Epoch: [ 5/ 10], step: [ 15/ 390], loss: [0.2448], avg loss: [0.3206], time: [101.1198ms]\n", - "Epoch: [ 5/ 10], step: [ 16/ 390], loss: [0.2788], avg loss: [0.3180], time: [101.7115ms]\n", - "Epoch: [ 5/ 10], step: [ 17/ 390], loss: [0.3236], avg loss: [0.3183], time: [101.3196ms]\n", - "Epoch: [ 5/ 10], step: [ 18/ 390], loss: [0.4522], avg loss: [0.3258], time: [101.2344ms]\n", - "Epoch: [ 5/ 10], step: [ 19/ 390], loss: [0.2819], avg loss: [0.3234], time: [97.2536ms]\n", - "Epoch: [ 5/ 10], step: [ 20/ 390], loss: [0.2288], avg loss: [0.3187], time: [99.6263ms]\n", - "Epoch: [ 5/ 10], step: [ 21/ 390], loss: [0.2689], avg loss: [0.3163], time: [100.7798ms]\n", - "Epoch: [ 5/ 10], step: [ 22/ 390], loss: [0.4091], avg loss: [0.3206], time: [102.3359ms]\n", - "Epoch: [ 5/ 10], step: [ 23/ 390], loss: [0.2462], avg loss: [0.3173], time: [97.8270ms]\n", - "Epoch: [ 5/ 10], step: [ 24/ 390], loss: [0.3900], avg loss: [0.3203], time: [102.9096ms]\n", - "Epoch: [ 5/ 10], step: [ 25/ 390], loss: [0.3287], avg loss: [0.3207], time: [102.8814ms]\n", - "Epoch: [ 5/ 10], step: [ 26/ 390], loss: [0.3620], avg loss: [0.3223], time: [101.8305ms]\n", - "Epoch: [ 5/ 10], step: [ 27/ 390], loss: [0.3002], avg loss: [0.3215], time: [97.9817ms]\n", - "Epoch: [ 5/ 10], step: [ 28/ 390], loss: [0.2733], avg loss: [0.3197], time: [102.3424ms]\n", - "Epoch: [ 5/ 10], step: [ 29/ 390], loss: [0.3498], avg loss: [0.3208], time: [99.0362ms]\n", - "Epoch: [ 5/ 10], step: [ 30/ 390], loss: [0.3848], avg loss: [0.3229], time: [100.5993ms]\n", - "Epoch: [ 5/ 10], step: [ 31/ 390], loss: [0.3515], avg loss: [0.3238], time: [99.0884ms]\n", - "Epoch: [ 5/ 10], step: [ 32/ 390], loss: [0.3267], avg loss: [0.3239], time: [102.6182ms]\n", - "Epoch: [ 5/ 10], step: [ 33/ 390], loss: [0.2962], avg loss: [0.3231], time: [103.2467ms]\n", - "Epoch: [ 5/ 10], step: [ 34/ 390], loss: [0.3273], avg loss: [0.3232], time: [100.3430ms]\n" + "epoch: 5 step: 1, loss is 0.2794\n", + "epoch: 5 step: 2, loss is 0.2933\n", + "epoch: 5 step: 3, loss is 0.3252\n", + "epoch: 5 step: 4, loss is 0.4135\n", + "epoch: 5 step: 5, loss is 0.3011\n", + "epoch: 5 step: 6, loss is 0.2266\n", + "epoch: 5 step: 7, loss is 0.3133\n", + "epoch: 5 step: 8, loss is 0.3449\n", + "epoch: 5 step: 9, loss is 0.3031\n", + "epoch: 5 step: 10, loss is 0.3289\n", + "epoch: 5 step: 11, loss is 0.3923\n", + "epoch: 5 step: 12, loss is 0.3127\n", + "epoch: 5 step: 13, loss is 0.3678\n", + "epoch: 5 step: 14, loss is 0.3622\n", + "epoch: 5 step: 15, loss is 0.2448\n", + "epoch: 5 step: 16, loss is 0.2788\n", + "epoch: 5 step: 17, loss is 0.3236\n", + "epoch: 5 step: 18, loss is 0.4522\n", + "epoch: 5 step: 19, loss is 0.2819\n", + "epoch: 5 step: 20, loss is 0.2288\n", + "epoch: 5 step: 21, loss is 0.2689\n", + "epoch: 5 step: 22, loss is 0.4091\n", + "epoch: 5 step: 23, loss is 0.2462\n", + "epoch: 5 step: 24, loss is 0.3900\n", + "epoch: 5 step: 25, loss is 0.3287\n", + "epoch: 5 step: 26, loss is 0.3620\n", + "epoch: 5 step: 27, loss is 0.3002\n", + "epoch: 5 step: 28, loss is 0.2733\n", + "epoch: 5 step: 29, loss is 0.3498\n", + "epoch: 5 step: 30, loss is 0.3848\n", + "epoch: 5 step: 31, loss is 0.3515\n", + "epoch: 5 step: 32, loss is 0.3267\n", + "epoch: 5 step: 33, loss is 0.2962\n", + "epoch: 5 step: 34, loss is 0.3273\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 5/ 10], step: [ 35/ 390], loss: [0.3577], avg loss: [0.3242], time: [100.6477ms]\n", - "Epoch: [ 5/ 10], step: [ 36/ 390], loss: [0.4371], avg loss: [0.3273], time: [100.9886ms]\n", - "Epoch: [ 5/ 10], step: [ 37/ 390], loss: [0.4086], avg loss: [0.3295], time: [100.7073ms]\n", - "Epoch: [ 5/ 10], step: [ 38/ 390], loss: [0.1705], avg loss: [0.3253], time: [101.3937ms]\n", - "Epoch: [ 5/ 10], step: [ 39/ 390], loss: [0.3365], avg loss: [0.3256], time: [97.3103ms]\n", - "Epoch: [ 5/ 10], step: [ 40/ 390], loss: [0.3910], avg loss: [0.3273], time: [100.9321ms]\n", - "Epoch: [ 5/ 10], step: [ 41/ 390], loss: [0.3509], avg loss: [0.3278], time: [97.9929ms]\n", - "Epoch: [ 5/ 10], step: [ 42/ 390], loss: [0.4014], avg loss: [0.3296], time: [98.8083ms]\n", - "Epoch: [ 5/ 10], step: [ 43/ 390], loss: [0.2674], avg loss: [0.3281], time: [103.3001ms]\n", - "Epoch: [ 5/ 10], step: [ 44/ 390], loss: [0.3730], avg loss: [0.3292], time: [99.5758ms]\n", - "Epoch: [ 5/ 10], step: [ 45/ 390], loss: [0.2710], avg loss: [0.3279], time: [102.5946ms]\n", - "Epoch: [ 5/ 10], step: [ 46/ 390], loss: [0.2464], avg loss: [0.3261], time: [98.9909ms]\n", - "Epoch: [ 5/ 10], step: [ 47/ 390], loss: [0.3998], avg loss: [0.3277], time: [98.2902ms]\n", - "Epoch: [ 5/ 10], step: [ 48/ 390], loss: [0.2825], avg loss: [0.3267], time: [101.1927ms]\n", - "Epoch: [ 5/ 10], step: [ 49/ 390], loss: [0.2899], avg loss: [0.3260], time: [103.0283ms]\n", - "Epoch: [ 5/ 10], step: [ 50/ 390], loss: [0.2653], avg loss: [0.3248], time: [102.0355ms]\n", - "Epoch: [ 5/ 10], step: [ 51/ 390], loss: [0.3137], avg loss: [0.3245], time: [99.5808ms]\n", - "Epoch: [ 5/ 10], step: [ 52/ 390], loss: [0.2977], avg loss: [0.3240], time: [102.6967ms]\n", - "Epoch: [ 5/ 10], step: [ 53/ 390], loss: [0.1626], avg loss: [0.3210], time: [101.0261ms]\n", - "Epoch: [ 5/ 10], step: [ 54/ 390], loss: [0.3451], avg loss: [0.3214], time: [100.6527ms]\n", - "Epoch: [ 5/ 10], step: [ 55/ 390], loss: [0.4533], avg loss: [0.3238], time: [101.9166ms]\n", - "Epoch: [ 5/ 10], step: [ 56/ 390], loss: [0.3027], avg loss: [0.3234], time: [99.3133ms]\n", - "Epoch: [ 5/ 10], step: [ 57/ 390], loss: [0.3573], avg loss: [0.3240], time: [102.1547ms]\n", - "Epoch: [ 5/ 10], step: [ 58/ 390], loss: [0.2549], avg loss: [0.3229], time: [101.6333ms]\n", - "Epoch: [ 5/ 10], step: [ 59/ 390], loss: [0.3431], avg loss: [0.3232], time: [104.0702ms]\n", - "Epoch: [ 5/ 10], step: [ 60/ 390], loss: [0.3799], avg loss: [0.3241], time: [101.0134ms]\n", - "Epoch: [ 5/ 10], step: [ 61/ 390], loss: [0.2788], avg loss: [0.3234], time: [101.4233ms]\n", - "Epoch: [ 5/ 10], step: [ 62/ 390], loss: [0.2534], avg loss: [0.3223], time: [99.6974ms]\n", - "Epoch: [ 5/ 10], step: [ 63/ 390], loss: [0.4903], avg loss: [0.3249], time: [102.4032ms]\n", - "Epoch: [ 5/ 10], step: [ 64/ 390], loss: [0.3201], avg loss: [0.3249], time: [99.7505ms]\n", - "Epoch: [ 5/ 10], step: [ 65/ 390], loss: [0.3645], avg loss: [0.3255], time: [98.7120ms]\n", - "Epoch: [ 5/ 10], step: [ 66/ 390], loss: [0.2357], avg loss: [0.3241], time: [101.2895ms]\n", - "Epoch: [ 5/ 10], step: [ 67/ 390], loss: [0.3705], avg loss: [0.3248], time: [97.6963ms]\n", - "Epoch: [ 5/ 10], step: [ 68/ 390], loss: [0.1633], avg loss: [0.3224], time: [102.0422ms]\n", - "Epoch: [ 5/ 10], step: [ 69/ 390], loss: [0.2591], avg loss: [0.3215], time: [104.1415ms]\n", - "Epoch: [ 5/ 10], step: [ 70/ 390], loss: [0.3557], avg loss: [0.3220], time: [102.0610ms]\n", - "Epoch: [ 5/ 10], step: [ 71/ 390], loss: [0.2731], avg loss: [0.3213], time: [103.0972ms]\n", - "Epoch: [ 5/ 10], step: [ 72/ 390], loss: [0.4700], avg loss: [0.3234], time: [99.8461ms]\n", - "Epoch: [ 5/ 10], step: [ 73/ 390], loss: [0.3538], avg loss: [0.3238], time: [98.6857ms]\n", - "Epoch: [ 5/ 10], step: [ 74/ 390], loss: [0.2912], avg loss: [0.3233], time: [100.5046ms]\n", - "Epoch: [ 5/ 10], step: [ 75/ 390], loss: [0.3697], avg loss: [0.3240], time: [99.0720ms]\n", - "Epoch: [ 5/ 10], step: [ 76/ 390], loss: [0.4126], avg loss: [0.3251], time: [100.3754ms]\n", - "Epoch: [ 5/ 10], step: [ 77/ 390], loss: [0.4306], avg loss: [0.3265], time: [102.3202ms]\n", - "Epoch: [ 5/ 10], step: [ 78/ 390], loss: [0.3097], avg loss: [0.3263], time: [101.7010ms]\n", - "Epoch: [ 5/ 10], step: [ 79/ 390], loss: [0.2506], avg loss: [0.3253], time: [99.2160ms]\n", - "Epoch: [ 5/ 10], step: [ 80/ 390], loss: [0.3555], avg loss: [0.3257], time: [100.8434ms]\n", - "Epoch: [ 5/ 10], step: [ 81/ 390], loss: [0.4372], avg loss: [0.3271], time: [100.6939ms]\n", - "Epoch: [ 5/ 10], step: [ 82/ 390], loss: [0.3791], avg loss: [0.3277], time: [98.4166ms]\n", - "Epoch: [ 5/ 10], step: [ 83/ 390], loss: [0.3631], avg loss: [0.3281], time: [104.9848ms]\n", - "Epoch: [ 5/ 10], step: [ 84/ 390], loss: [0.2663], avg loss: [0.3274], time: [102.7405ms]\n", - "Epoch: [ 5/ 10], step: [ 85/ 390], loss: [0.4309], avg loss: [0.3286], time: [99.1342ms]\n", - "Epoch: [ 5/ 10], step: [ 86/ 390], loss: [0.3595], avg loss: [0.3290], time: [101.1102ms]\n", - "Epoch: [ 5/ 10], step: [ 87/ 390], loss: [0.3064], avg loss: [0.3287], time: [99.0789ms]\n", - "Epoch: [ 5/ 10], step: [ 88/ 390], loss: [0.3514], avg loss: [0.3290], time: [102.4284ms]\n", - "Epoch: [ 5/ 10], step: [ 89/ 390], loss: [0.3699], avg loss: [0.3294], time: [104.1958ms]\n", - "Epoch: [ 5/ 10], step: [ 90/ 390], loss: [0.4920], avg loss: [0.3313], time: [102.2644ms]\n", - "Epoch: [ 5/ 10], step: [ 91/ 390], loss: [0.2617], avg loss: [0.3305], time: [104.6135ms]\n", - "Epoch: [ 5/ 10], step: [ 92/ 390], loss: [0.3189], avg loss: [0.3304], time: [99.1240ms]\n", - "Epoch: [ 5/ 10], step: [ 93/ 390], loss: [0.2781], avg loss: [0.3298], time: [101.6312ms]\n", - "Epoch: [ 5/ 10], step: [ 94/ 390], loss: [0.2895], avg loss: [0.3294], time: [101.8772ms]\n", - "Epoch: [ 5/ 10], step: [ 95/ 390], loss: [0.2069], avg loss: [0.3281], time: [97.9164ms]\n", - "Epoch: [ 5/ 10], step: [ 96/ 390], loss: [0.4565], avg loss: [0.3294], time: [99.3772ms]\n", - "Epoch: [ 5/ 10], step: [ 97/ 390], loss: [0.2529], avg loss: [0.3286], time: [99.6089ms]\n", - "Epoch: [ 5/ 10], step: [ 98/ 390], loss: [0.2671], avg loss: [0.3280], time: [103.4021ms]\n", - "Epoch: [ 5/ 10], step: [ 99/ 390], loss: [0.2349], avg loss: [0.3271], time: [103.9906ms]\n", - "Epoch: [ 5/ 10], step: [ 100/ 390], loss: [0.5263], avg loss: [0.3291], time: [102.5908ms]\n", - "Epoch: [ 5/ 10], step: [ 101/ 390], loss: [0.4659], avg loss: [0.3304], time: [103.2996ms]\n", - "Epoch: [ 5/ 10], step: [ 102/ 390], loss: [0.2615], avg loss: [0.3297], time: [101.5222ms]\n", - "Epoch: [ 5/ 10], step: [ 103/ 390], loss: [0.4434], avg loss: [0.3308], time: [98.7415ms]\n", - "Epoch: [ 5/ 10], step: [ 104/ 390], loss: [0.3079], avg loss: [0.3306], time: [99.9544ms]\n", - "Epoch: [ 5/ 10], step: [ 105/ 390], loss: [0.4543], avg loss: [0.3318], time: [98.9020ms]\n", - "Epoch: [ 5/ 10], step: [ 106/ 390], loss: [0.4415], avg loss: [0.3328], time: [103.9250ms]\n", - "Epoch: [ 5/ 10], step: [ 107/ 390], loss: [0.2911], avg loss: [0.3324], time: [98.3868ms]\n", - "Epoch: [ 5/ 10], step: [ 108/ 390], loss: [0.2849], avg loss: [0.3320], time: [98.7260ms]\n", - "Epoch: [ 5/ 10], step: [ 109/ 390], loss: [0.2857], avg loss: [0.3316], time: [103.0591ms]\n", - "Epoch: [ 5/ 10], step: [ 110/ 390], loss: [0.4117], avg loss: [0.3323], time: [100.4229ms]\n", - "Epoch: [ 5/ 10], step: [ 111/ 390], loss: [0.3222], avg loss: [0.3322], time: [102.7026ms]\n", - "Epoch: [ 5/ 10], step: [ 112/ 390], loss: [0.3745], avg loss: [0.3326], time: [103.8859ms]\n", - "Epoch: [ 5/ 10], step: [ 113/ 390], loss: [0.3251], avg loss: [0.3325], time: [104.0967ms]\n", - "Epoch: [ 5/ 10], step: [ 114/ 390], loss: [0.3649], avg loss: [0.3328], time: [97.3947ms]\n", - "Epoch: [ 5/ 10], step: [ 115/ 390], loss: [0.4835], avg loss: [0.3341], time: [98.9718ms]\n", - "Epoch: [ 5/ 10], step: [ 116/ 390], loss: [0.3027], avg loss: [0.3338], time: [97.8193ms]\n", - "Epoch: [ 5/ 10], step: [ 117/ 390], loss: [0.2808], avg loss: [0.3334], time: [101.1531ms]\n", - "Epoch: [ 5/ 10], step: [ 118/ 390], loss: [0.4715], avg loss: [0.3346], time: [100.4035ms]\n", - "Epoch: [ 5/ 10], step: [ 119/ 390], loss: [0.2866], avg loss: [0.3342], time: [100.5137ms]\n", - "Epoch: [ 5/ 10], step: [ 120/ 390], loss: [0.2574], avg loss: [0.3335], time: [102.9720ms]\n", - "Epoch: [ 5/ 10], step: [ 121/ 390], loss: [0.4101], avg loss: [0.3342], time: [103.4522ms]\n", - "Epoch: [ 5/ 10], step: [ 122/ 390], loss: [0.4093], avg loss: [0.3348], time: [99.1914ms]\n", - "Epoch: [ 5/ 10], step: [ 123/ 390], loss: [0.3165], avg loss: [0.3346], time: [102.6032ms]\n" + "epoch: 5 step: 35, loss is 0.3577\n", + "epoch: 5 step: 36, loss is 0.4371\n", + "epoch: 5 step: 37, loss is 0.4086\n", + "epoch: 5 step: 38, loss is 0.1705\n", + "epoch: 5 step: 39, loss is 0.3365\n", + "epoch: 5 step: 40, loss is 0.3910\n", + "epoch: 5 step: 41, loss is 0.3509\n", + "epoch: 5 step: 42, loss is 0.4014\n", + "epoch: 5 step: 43, loss is 0.2674\n", + "epoch: 5 step: 44, loss is 0.3730\n", + "epoch: 5 step: 45, loss is 0.2710\n", + "epoch: 5 step: 46, loss is 0.2464\n", + "epoch: 5 step: 47, loss is 0.3998\n", + "epoch: 5 step: 48, loss is 0.2825\n", + "epoch: 5 step: 49, loss is 0.2899\n", + "epoch: 5 step: 50, loss is 0.2653\n", + "epoch: 5 step: 51, loss is 0.3137\n", + "epoch: 5 step: 52, loss is 0.2977\n", + "epoch: 5 step: 53, loss is 0.1626\n", + "epoch: 5 step: 54, loss is 0.3451\n", + "epoch: 5 step: 55, loss is 0.4533\n", + "epoch: 5 step: 56, loss is 0.3027\n", + "epoch: 5 step: 57, loss is 0.3573\n", + "epoch: 5 step: 58, loss is 0.2549\n", + "epoch: 5 step: 59, loss is 0.3431\n", + "epoch: 5 step: 60, loss is 0.3799\n", + "epoch: 5 step: 61, loss is 0.2788\n", + "epoch: 5 step: 62, loss is 0.2534\n", + "epoch: 5 step: 63, loss is 0.4903\n", + "epoch: 5 step: 64, loss is 0.3201\n", + "epoch: 5 step: 65, loss is 0.3645\n", + "epoch: 5 step: 66, loss is 0.2357\n", + "epoch: 5 step: 67, loss is 0.3705\n", + "epoch: 5 step: 68, loss is 0.1633\n", + "epoch: 5 step: 69, loss is 0.2591\n", + "epoch: 5 step: 70, loss is 0.3557\n", + "epoch: 5 step: 71, loss is 0.2731\n", + "epoch: 5 step: 72, loss is 0.4700\n", + "epoch: 5 step: 73, loss is 0.3538\n", + "epoch: 5 step: 74, loss is 0.2912\n", + "epoch: 5 step: 75, loss is 0.3697\n", + "epoch: 5 step: 76, loss is 0.4126\n", + "epoch: 5 step: 77, loss is 0.4306\n", + "epoch: 5 step: 78, loss is 0.3097\n", + "epoch: 5 step: 79, loss is 0.2506\n", + "epoch: 5 step: 80, loss is 0.3555\n", + "epoch: 5 step: 81, loss is 0.4372\n", + "epoch: 5 step: 82, loss is 0.3791\n", + "epoch: 5 step: 83, loss is 0.3631\n", + "epoch: 5 step: 84, loss is 0.2663\n", + "epoch: 5 step: 85, loss is 0.4309\n", + "epoch: 5 step: 86, loss is 0.3595\n", + "epoch: 5 step: 87, loss is 0.3064\n", + "epoch: 5 step: 88, loss is 0.3514\n", + "epoch: 5 step: 89, loss is 0.3699\n", + "epoch: 5 step: 90, loss is 0.4920\n", + "epoch: 5 step: 91, loss is 0.2617\n", + "epoch: 5 step: 92, loss is 0.3189\n", + "epoch: 5 step: 93, loss is 0.2781\n", + "epoch: 5 step: 94, loss is 0.2895\n", + "epoch: 5 step: 95, loss is 0.2069\n", + "epoch: 5 step: 96, loss is 0.4565\n", + "epoch: 5 step: 97, loss is 0.2529\n", + "epoch: 5 step: 98, loss is 0.2671\n", + "epoch: 5 step: 99, loss is 0.2349\n", + "epoch: 5 step: 100, loss is 0.5263\n", + "epoch: 5 step: 101, loss is 0.4659\n", + "epoch: 5 step: 102, loss is 0.2615\n", + "epoch: 5 step: 103, loss is 0.4434\n", + "epoch: 5 step: 104, loss is 0.3079\n", + "epoch: 5 step: 105, loss is 0.4543\n", + "epoch: 5 step: 106, loss is 0.4415\n", + "epoch: 5 step: 107, loss is 0.2911\n", + "epoch: 5 step: 108, loss is 0.2849\n", + "epoch: 5 step: 109, loss is 0.2857\n", + "epoch: 5 step: 110, loss is 0.4117\n", + "epoch: 5 step: 111, loss is 0.3222\n", + "epoch: 5 step: 112, loss is 0.3745\n", + "epoch: 5 step: 113, loss is 0.3251\n", + "epoch: 5 step: 114, loss is 0.3649\n", + "epoch: 5 step: 115, loss is 0.4835\n", + "epoch: 5 step: 116, loss is 0.3027\n", + "epoch: 5 step: 117, loss is 0.2808\n", + "epoch: 5 step: 118, loss is 0.4715\n", + "epoch: 5 step: 119, loss is 0.2866\n", + "epoch: 5 step: 120, loss is 0.2574\n", + "epoch: 5 step: 121, loss is 0.4101\n", + "epoch: 5 step: 122, loss is 0.4093\n", + "epoch: 5 step: 123, loss is 0.3165\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 5/ 10], step: [ 124/ 390], loss: [0.3165], avg loss: [0.3345], time: [98.5579ms]\n", - "Epoch: [ 5/ 10], step: [ 125/ 390], loss: [0.2910], avg loss: [0.3341], time: [104.8245ms]\n", - "Epoch: [ 5/ 10], step: [ 126/ 390], loss: [0.4151], avg loss: [0.3348], time: [100.1546ms]\n", - "Epoch: [ 5/ 10], step: [ 127/ 390], loss: [0.3650], avg loss: [0.3350], time: [98.5594ms]\n", - "Epoch: [ 5/ 10], step: [ 128/ 390], loss: [0.4466], avg loss: [0.3359], time: [98.1710ms]\n", - "Epoch: [ 5/ 10], step: [ 129/ 390], loss: [0.3491], avg loss: [0.3360], time: [102.2282ms]\n", - "Epoch: [ 5/ 10], step: [ 130/ 390], loss: [0.3943], avg loss: [0.3364], time: [102.3917ms]\n", - "Epoch: [ 5/ 10], step: [ 131/ 390], loss: [0.3831], avg loss: [0.3368], time: [102.0710ms]\n", - "Epoch: [ 5/ 10], step: [ 132/ 390], loss: [0.3353], avg loss: [0.3368], time: [99.2439ms]\n", - "Epoch: [ 5/ 10], step: [ 133/ 390], loss: [0.3608], avg loss: [0.3370], time: [99.8654ms]\n", - "Epoch: [ 5/ 10], step: [ 134/ 390], loss: [0.3089], avg loss: [0.3367], time: [102.2341ms]\n", - "Epoch: [ 5/ 10], step: [ 135/ 390], loss: [0.3661], avg loss: [0.3370], time: [101.4662ms]\n", - "Epoch: [ 5/ 10], step: [ 136/ 390], loss: [0.2462], avg loss: [0.3363], time: [100.6999ms]\n", - "Epoch: [ 5/ 10], step: [ 137/ 390], loss: [0.2555], avg loss: [0.3357], time: [100.4691ms]\n", - "Epoch: [ 5/ 10], step: [ 138/ 390], loss: [0.3958], avg loss: [0.3361], time: [103.4663ms]\n", - "Epoch: [ 5/ 10], step: [ 139/ 390], loss: [0.3909], avg loss: [0.3365], time: [103.5306ms]\n", - "Epoch: [ 5/ 10], step: [ 140/ 390], loss: [0.4445], avg loss: [0.3373], time: [99.4577ms]\n", - "Epoch: [ 5/ 10], step: [ 141/ 390], loss: [0.3978], avg loss: [0.3377], time: [102.4783ms]\n", - "Epoch: [ 5/ 10], step: [ 142/ 390], loss: [0.4142], avg loss: [0.3383], time: [103.3499ms]\n", - "Epoch: [ 5/ 10], step: [ 143/ 390], loss: [0.5226], avg loss: [0.3396], time: [101.3515ms]\n", - "Epoch: [ 5/ 10], step: [ 144/ 390], loss: [0.4125], avg loss: [0.3401], time: [100.8694ms]\n", - "Epoch: [ 5/ 10], step: [ 145/ 390], loss: [0.2795], avg loss: [0.3397], time: [98.3696ms]\n", - "Epoch: [ 5/ 10], step: [ 146/ 390], loss: [0.3510], avg loss: [0.3397], time: [101.3107ms]\n", - "Epoch: [ 5/ 10], step: [ 147/ 390], loss: [0.3275], avg loss: [0.3396], time: [100.5538ms]\n", - "Epoch: [ 5/ 10], step: [ 148/ 390], loss: [0.5054], avg loss: [0.3408], time: [100.9009ms]\n", - "Epoch: [ 5/ 10], step: [ 149/ 390], loss: [0.3694], avg loss: [0.3410], time: [99.3698ms]\n", - "Epoch: [ 5/ 10], step: [ 150/ 390], loss: [0.5045], avg loss: [0.3420], time: [103.3928ms]\n", - "Epoch: [ 5/ 10], step: [ 151/ 390], loss: [0.3543], avg loss: [0.3421], time: [99.5314ms]\n", - "Epoch: [ 5/ 10], step: [ 152/ 390], loss: [0.3545], avg loss: [0.3422], time: [101.6512ms]\n", - "Epoch: [ 5/ 10], step: [ 153/ 390], loss: [0.3695], avg loss: [0.3424], time: [103.3027ms]\n", - "Epoch: [ 5/ 10], step: [ 154/ 390], loss: [0.3324], avg loss: [0.3423], time: [99.4759ms]\n", - "Epoch: [ 5/ 10], step: [ 155/ 390], loss: [0.4030], avg loss: [0.3427], time: [102.0808ms]\n", - "Epoch: [ 5/ 10], step: [ 156/ 390], loss: [0.3399], avg loss: [0.3427], time: [101.4318ms]\n", - "Epoch: [ 5/ 10], step: [ 157/ 390], loss: [0.2697], avg loss: [0.3422], time: [98.8903ms]\n", - "Epoch: [ 5/ 10], step: [ 158/ 390], loss: [0.3390], avg loss: [0.3422], time: [100.8635ms]\n", - "Epoch: [ 5/ 10], step: [ 159/ 390], loss: [0.3495], avg loss: [0.3423], time: [98.5677ms]\n", - "Epoch: [ 5/ 10], step: [ 160/ 390], loss: [0.3949], avg loss: [0.3426], time: [102.8528ms]\n", - "Epoch: [ 5/ 10], step: [ 161/ 390], loss: [0.3042], avg loss: [0.3424], time: [103.6329ms]\n", - "Epoch: [ 5/ 10], step: [ 162/ 390], loss: [0.2852], avg loss: [0.3420], time: [98.7141ms]\n", - "Epoch: [ 5/ 10], step: [ 163/ 390], loss: [0.4251], avg loss: [0.3425], time: [101.6843ms]\n", - "Epoch: [ 5/ 10], step: [ 164/ 390], loss: [0.2808], avg loss: [0.3421], time: [99.1621ms]\n", - "Epoch: [ 5/ 10], step: [ 165/ 390], loss: [0.4844], avg loss: [0.3430], time: [101.5959ms]\n", - "Epoch: [ 5/ 10], step: [ 166/ 390], loss: [0.3811], avg loss: [0.3432], time: [100.4267ms]\n", - "Epoch: [ 5/ 10], step: [ 167/ 390], loss: [0.4935], avg loss: [0.3441], time: [104.2638ms]\n", - "Epoch: [ 5/ 10], step: [ 168/ 390], loss: [0.3312], avg loss: [0.3440], time: [99.5424ms]\n", - "Epoch: [ 5/ 10], step: [ 169/ 390], loss: [0.3287], avg loss: [0.3440], time: [100.5833ms]\n", - "Epoch: [ 5/ 10], step: [ 170/ 390], loss: [0.2893], avg loss: [0.3436], time: [99.6578ms]\n", - "Epoch: [ 5/ 10], step: [ 171/ 390], loss: [0.3934], avg loss: [0.3439], time: [100.3275ms]\n", - "Epoch: [ 5/ 10], step: [ 172/ 390], loss: [0.3728], avg loss: [0.3441], time: [101.0427ms]\n", - "Epoch: [ 5/ 10], step: [ 173/ 390], loss: [0.4014], avg loss: [0.3444], time: [100.5661ms]\n", - "Epoch: [ 5/ 10], step: [ 174/ 390], loss: [0.3923], avg loss: [0.3447], time: [100.2057ms]\n", - "Epoch: [ 5/ 10], step: [ 175/ 390], loss: [0.3733], avg loss: [0.3449], time: [103.4234ms]\n", - "Epoch: [ 5/ 10], step: [ 176/ 390], loss: [0.2801], avg loss: [0.3445], time: [101.1100ms]\n", - "Epoch: [ 5/ 10], step: [ 177/ 390], loss: [0.4638], avg loss: [0.3452], time: [99.8030ms]\n", - "Epoch: [ 5/ 10], step: [ 178/ 390], loss: [0.4426], avg loss: [0.3457], time: [102.2031ms]\n", - "Epoch: [ 5/ 10], step: [ 179/ 390], loss: [0.3452], avg loss: [0.3457], time: [101.6047ms]\n", - "Epoch: [ 5/ 10], step: [ 180/ 390], loss: [0.4646], avg loss: [0.3464], time: [100.5373ms]\n", - "Epoch: [ 5/ 10], step: [ 181/ 390], loss: [0.3066], avg loss: [0.3462], time: [103.2481ms]\n", - "Epoch: [ 5/ 10], step: [ 182/ 390], loss: [0.3812], avg loss: [0.3463], time: [101.4385ms]\n", - "Epoch: [ 5/ 10], step: [ 183/ 390], loss: [0.3036], avg loss: [0.3461], time: [101.9561ms]\n", - "Epoch: [ 5/ 10], step: [ 184/ 390], loss: [0.3178], avg loss: [0.3460], time: [102.9384ms]\n", - "Epoch: [ 5/ 10], step: [ 185/ 390], loss: [0.3505], avg loss: [0.3460], time: [101.8963ms]\n", - "Epoch: [ 5/ 10], step: [ 186/ 390], loss: [0.4441], avg loss: [0.3465], time: [102.1221ms]\n", - "Epoch: [ 5/ 10], step: [ 187/ 390], loss: [0.2443], avg loss: [0.3460], time: [98.6722ms]\n", - "Epoch: [ 5/ 10], step: [ 188/ 390], loss: [0.3056], avg loss: [0.3457], time: [103.1771ms]\n", - "Epoch: [ 5/ 10], step: [ 189/ 390], loss: [0.2921], avg loss: [0.3455], time: [99.9930ms]\n", - "Epoch: [ 5/ 10], step: [ 190/ 390], loss: [0.2108], avg loss: [0.3448], time: [101.2313ms]\n", - "Epoch: [ 5/ 10], step: [ 191/ 390], loss: [0.3682], avg loss: [0.3449], time: [99.5693ms]\n", - "Epoch: [ 5/ 10], step: [ 192/ 390], loss: [0.3154], avg loss: [0.3447], time: [100.4727ms]\n", - "Epoch: [ 5/ 10], step: [ 193/ 390], loss: [0.3327], avg loss: [0.3447], time: [99.0317ms]\n", - "Epoch: [ 5/ 10], step: [ 194/ 390], loss: [0.3686], avg loss: [0.3448], time: [98.9630ms]\n", - "Epoch: [ 5/ 10], step: [ 195/ 390], loss: [0.3824], avg loss: [0.3450], time: [98.0654ms]\n", - "Epoch: [ 5/ 10], step: [ 196/ 390], loss: [0.2827], avg loss: [0.3447], time: [98.7494ms]\n", - "Epoch: [ 5/ 10], step: [ 197/ 390], loss: [0.3519], avg loss: [0.3447], time: [101.0053ms]\n", - "Epoch: [ 5/ 10], step: [ 198/ 390], loss: [0.2818], avg loss: [0.3444], time: [101.0916ms]\n", - "Epoch: [ 5/ 10], step: [ 199/ 390], loss: [0.2671], avg loss: [0.3440], time: [101.9878ms]\n", - "Epoch: [ 5/ 10], step: [ 200/ 390], loss: [0.2776], avg loss: [0.3437], time: [101.5325ms]\n", - "Epoch: [ 5/ 10], step: [ 201/ 390], loss: [0.4823], avg loss: [0.3444], time: [105.1292ms]\n", - "Epoch: [ 5/ 10], step: [ 202/ 390], loss: [0.2648], avg loss: [0.3440], time: [99.5777ms]\n", - "Epoch: [ 5/ 10], step: [ 203/ 390], loss: [0.2620], avg loss: [0.3436], time: [102.6535ms]\n", - "Epoch: [ 5/ 10], step: [ 204/ 390], loss: [0.3181], avg loss: [0.3434], time: [100.5137ms]\n", - "Epoch: [ 5/ 10], step: [ 205/ 390], loss: [0.2479], avg loss: [0.3430], time: [101.0752ms]\n", - "Epoch: [ 5/ 10], step: [ 206/ 390], loss: [0.4319], avg loss: [0.3434], time: [99.4537ms]\n", - "Epoch: [ 5/ 10], step: [ 207/ 390], loss: [0.3991], avg loss: [0.3437], time: [102.4334ms]\n", - "Epoch: [ 5/ 10], step: [ 208/ 390], loss: [0.3004], avg loss: [0.3435], time: [102.9854ms]\n", - "Epoch: [ 5/ 10], step: [ 209/ 390], loss: [0.3004], avg loss: [0.3432], time: [100.8348ms]\n", - "Epoch: [ 5/ 10], step: [ 210/ 390], loss: [0.3069], avg loss: [0.3431], time: [101.9218ms]\n", - "Epoch: [ 5/ 10], step: [ 211/ 390], loss: [0.2957], avg loss: [0.3429], time: [98.5749ms]\n", - "Epoch: [ 5/ 10], step: [ 212/ 390], loss: [0.2999], avg loss: [0.3426], time: [104.1019ms]\n" + "epoch: 5 step: 124, loss is 0.3165\n", + "epoch: 5 step: 125, loss is 0.2910\n", + "epoch: 5 step: 126, loss is 0.4151\n", + "epoch: 5 step: 127, loss is 0.3650\n", + "epoch: 5 step: 128, loss is 0.4466\n", + "epoch: 5 step: 129, loss is 0.3491\n", + "epoch: 5 step: 130, loss is 0.3943\n", + "epoch: 5 step: 131, loss is 0.3831\n", + "epoch: 5 step: 132, loss is 0.3353\n", + "epoch: 5 step: 133, loss is 0.3608\n", + "epoch: 5 step: 134, loss is 0.3089\n", + "epoch: 5 step: 135, loss is 0.3661\n", + "epoch: 5 step: 136, loss is 0.2462\n", + "epoch: 5 step: 137, loss is 0.2555\n", + "epoch: 5 step: 138, loss is 0.3958\n", + "epoch: 5 step: 139, loss is 0.3909\n", + "epoch: 5 step: 140, loss is 0.4445\n", + "epoch: 5 step: 141, loss is 0.3978\n", + "epoch: 5 step: 142, loss is 0.4142\n", + "epoch: 5 step: 143, loss is 0.5226\n", + "epoch: 5 step: 144, loss is 0.4125\n", + "epoch: 5 step: 145, loss is 0.2795\n", + "epoch: 5 step: 146, loss is 0.3510\n", + "epoch: 5 step: 147, loss is 0.3275\n", + "epoch: 5 step: 148, loss is 0.5054\n", + "epoch: 5 step: 149, loss is 0.3694\n", + "epoch: 5 step: 150, loss is 0.5045\n", + "epoch: 5 step: 151, loss is 0.3543\n", + "epoch: 5 step: 152, loss is 0.3545\n", + "epoch: 5 step: 153, loss is 0.3695\n", + "epoch: 5 step: 154, loss is 0.3324\n", + "epoch: 5 step: 155, loss is 0.4030\n", + "epoch: 5 step: 156, loss is 0.3399\n", + "epoch: 5 step: 157, loss is 0.2697\n", + "epoch: 5 step: 158, loss is 0.3390\n", + "epoch: 5 step: 159, loss is 0.3495\n", + "epoch: 5 step: 160, loss is 0.3949\n", + "epoch: 5 step: 161, loss is 0.3042\n", + "epoch: 5 step: 162, loss is 0.2852\n", + "epoch: 5 step: 163, loss is 0.4251\n", + "epoch: 5 step: 164, loss is 0.2808\n", + "epoch: 5 step: 165, loss is 0.4844\n", + "epoch: 5 step: 166, loss is 0.3811\n", + "epoch: 5 step: 167, loss is 0.4935\n", + "epoch: 5 step: 168, loss is 0.3312\n", + "epoch: 5 step: 169, loss is 0.3287\n", + "epoch: 5 step: 170, loss is 0.2893\n", + "epoch: 5 step: 171, loss is 0.3934\n", + "epoch: 5 step: 172, loss is 0.3728\n", + "epoch: 5 step: 173, loss is 0.4014\n", + "epoch: 5 step: 174, loss is 0.3923\n", + "epoch: 5 step: 175, loss is 0.3733\n", + "epoch: 5 step: 176, loss is 0.2801\n", + "epoch: 5 step: 177, loss is 0.4638\n", + "epoch: 5 step: 178, loss is 0.4426\n", + "epoch: 5 step: 179, loss is 0.3452\n", + "epoch: 5 step: 180, loss is 0.4646\n", + "epoch: 5 step: 181, loss is 0.3066\n", + "epoch: 5 step: 182, loss is 0.3812\n", + "epoch: 5 step: 183, loss is 0.3036\n", + "epoch: 5 step: 184, loss is 0.3178\n", + "epoch: 5 step: 185, loss is 0.3505\n", + "epoch: 5 step: 186, loss is 0.4441\n", + "epoch: 5 step: 187, loss is 0.2443\n", + "epoch: 5 step: 188, loss is 0.3056\n", + "epoch: 5 step: 189, loss is 0.2921\n", + "epoch: 5 step: 190, loss is 0.2108\n", + "epoch: 5 step: 191, loss is 0.3682\n", + "epoch: 5 step: 192, loss is 0.3154\n", + "epoch: 5 step: 193, loss is 0.3327\n", + "epoch: 5 step: 194, loss is 0.3686\n", + "epoch: 5 step: 195, loss is 0.3824\n", + "epoch: 5 step: 196, loss is 0.2827\n", + "epoch: 5 step: 197, loss is 0.3519\n", + "epoch: 5 step: 198, loss is 0.2818\n", + "epoch: 5 step: 199, loss is 0.2671\n", + "epoch: 5 step: 200, loss is 0.2776\n", + "epoch: 5 step: 201, loss is 0.4823\n", + "epoch: 5 step: 202, loss is 0.2648\n", + "epoch: 5 step: 203, loss is 0.2620\n", + "epoch: 5 step: 204, loss is 0.3181\n", + "epoch: 5 step: 205, loss is 0.2479\n", + "epoch: 5 step: 206, loss is 0.4319\n", + "epoch: 5 step: 207, loss is 0.3991\n", + "epoch: 5 step: 208, loss is 0.3004\n", + "epoch: 5 step: 209, loss is 0.3004\n", + "epoch: 5 step: 210, loss is 0.3069\n", + "epoch: 5 step: 211, loss is 0.2957\n", + "epoch: 5 step: 212, loss is 0.2999\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 5/ 10], step: [ 213/ 390], loss: [0.4016], avg loss: [0.3429], time: [103.5557ms]\n", - "Epoch: [ 5/ 10], step: [ 214/ 390], loss: [0.2758], avg loss: [0.3426], time: [99.9570ms]\n", - "Epoch: [ 5/ 10], step: [ 215/ 390], loss: [0.4611], avg loss: [0.3432], time: [102.7234ms]\n", - "Epoch: [ 5/ 10], step: [ 216/ 390], loss: [0.3102], avg loss: [0.3430], time: [101.8171ms]\n", - "Epoch: [ 5/ 10], step: [ 217/ 390], loss: [0.3919], avg loss: [0.3432], time: [104.2428ms]\n", - "Epoch: [ 5/ 10], step: [ 218/ 390], loss: [0.3644], avg loss: [0.3433], time: [102.9439ms]\n", - "Epoch: [ 5/ 10], step: [ 219/ 390], loss: [0.3343], avg loss: [0.3433], time: [101.6750ms]\n", - "Epoch: [ 5/ 10], step: [ 220/ 390], loss: [0.3409], avg loss: [0.3433], time: [100.8224ms]\n", - "Epoch: [ 5/ 10], step: [ 221/ 390], loss: [0.3408], avg loss: [0.3433], time: [100.2448ms]\n", - "Epoch: [ 5/ 10], step: [ 222/ 390], loss: [0.3310], avg loss: [0.3432], time: [101.1682ms]\n", - "Epoch: [ 5/ 10], step: [ 223/ 390], loss: [0.3425], avg loss: [0.3432], time: [103.1351ms]\n", - "Epoch: [ 5/ 10], step: [ 224/ 390], loss: [0.2430], avg loss: [0.3428], time: [100.4946ms]\n", - "Epoch: [ 5/ 10], step: [ 225/ 390], loss: [0.2700], avg loss: [0.3424], time: [99.5657ms]\n", - "Epoch: [ 5/ 10], step: [ 226/ 390], loss: [0.4033], avg loss: [0.3427], time: [99.9317ms]\n", - "Epoch: [ 5/ 10], step: [ 227/ 390], loss: [0.3329], avg loss: [0.3427], time: [103.3795ms]\n", - "Epoch: [ 5/ 10], step: [ 228/ 390], loss: [0.4596], avg loss: [0.3432], time: [102.3130ms]\n", - "Epoch: [ 5/ 10], step: [ 229/ 390], loss: [0.3272], avg loss: [0.3431], time: [99.1611ms]\n", - "Epoch: [ 5/ 10], step: [ 230/ 390], loss: [0.2274], avg loss: [0.3426], time: [103.5240ms]\n", - "Epoch: [ 5/ 10], step: [ 231/ 390], loss: [0.4503], avg loss: [0.3431], time: [97.9187ms]\n", - "Epoch: [ 5/ 10], step: [ 232/ 390], loss: [0.2505], avg loss: [0.3427], time: [99.9558ms]\n", - "Epoch: [ 5/ 10], step: [ 233/ 390], loss: [0.3719], avg loss: [0.3428], time: [100.0600ms]\n", - "Epoch: [ 5/ 10], step: [ 234/ 390], loss: [0.2949], avg loss: [0.3426], time: [101.8853ms]\n", - "Epoch: [ 5/ 10], step: [ 235/ 390], loss: [0.3854], avg loss: [0.3428], time: [100.0516ms]\n", - "Epoch: [ 5/ 10], step: [ 236/ 390], loss: [0.5405], avg loss: [0.3436], time: [101.5785ms]\n", - "Epoch: [ 5/ 10], step: [ 237/ 390], loss: [0.3014], avg loss: [0.3434], time: [100.5096ms]\n", - "Epoch: [ 5/ 10], step: [ 238/ 390], loss: [0.3945], avg loss: [0.3437], time: [100.4689ms]\n", - "Epoch: [ 5/ 10], step: [ 239/ 390], loss: [0.3244], avg loss: [0.3436], time: [98.5432ms]\n", - "Epoch: [ 5/ 10], step: [ 240/ 390], loss: [0.4346], avg loss: [0.3440], time: [101.4802ms]\n", - "Epoch: [ 5/ 10], step: [ 241/ 390], loss: [0.3247], avg loss: [0.3439], time: [102.1733ms]\n", - "Epoch: [ 5/ 10], step: [ 242/ 390], loss: [0.4067], avg loss: [0.3441], time: [102.2019ms]\n", - "Epoch: [ 5/ 10], step: [ 243/ 390], loss: [0.4058], avg loss: [0.3444], time: [98.9230ms]\n", - "Epoch: [ 5/ 10], step: [ 244/ 390], loss: [0.3316], avg loss: [0.3443], time: [101.2557ms]\n", - "Epoch: [ 5/ 10], step: [ 245/ 390], loss: [0.3552], avg loss: [0.3444], time: [104.2919ms]\n", - "Epoch: [ 5/ 10], step: [ 246/ 390], loss: [0.2829], avg loss: [0.3441], time: [98.7475ms]\n", - "Epoch: [ 5/ 10], step: [ 247/ 390], loss: [0.3828], avg loss: [0.3443], time: [99.9448ms]\n", - "Epoch: [ 5/ 10], step: [ 248/ 390], loss: [0.3679], avg loss: [0.3444], time: [103.4460ms]\n", - "Epoch: [ 5/ 10], step: [ 249/ 390], loss: [0.3295], avg loss: [0.3443], time: [103.2743ms]\n", - "Epoch: [ 5/ 10], step: [ 250/ 390], loss: [0.2944], avg loss: [0.3441], time: [102.1883ms]\n", - "Epoch: [ 5/ 10], step: [ 251/ 390], loss: [0.2622], avg loss: [0.3438], time: [104.0289ms]\n", - "Epoch: [ 5/ 10], step: [ 252/ 390], loss: [0.4662], avg loss: [0.3443], time: [100.0810ms]\n", - "Epoch: [ 5/ 10], step: [ 253/ 390], loss: [0.4145], avg loss: [0.3446], time: [99.1349ms]\n", - "Epoch: [ 5/ 10], step: [ 254/ 390], loss: [0.2235], avg loss: [0.3441], time: [102.3924ms]\n", - "Epoch: [ 5/ 10], step: [ 255/ 390], loss: [0.3826], avg loss: [0.3442], time: [101.0902ms]\n", - "Epoch: [ 5/ 10], step: [ 256/ 390], loss: [0.4591], avg loss: [0.3447], time: [97.8513ms]\n", - "Epoch: [ 5/ 10], step: [ 257/ 390], loss: [0.2777], avg loss: [0.3444], time: [102.4303ms]\n", - "Epoch: [ 5/ 10], step: [ 258/ 390], loss: [0.3017], avg loss: [0.3443], time: [98.4631ms]\n", - "Epoch: [ 5/ 10], step: [ 259/ 390], loss: [0.1980], avg loss: [0.3437], time: [99.7696ms]\n", - "Epoch: [ 5/ 10], step: [ 260/ 390], loss: [0.3733], avg loss: [0.3438], time: [99.4329ms]\n", - "Epoch: [ 5/ 10], step: [ 261/ 390], loss: [0.3896], avg loss: [0.3440], time: [103.1668ms]\n", - "Epoch: [ 5/ 10], step: [ 262/ 390], loss: [0.3417], avg loss: [0.3440], time: [99.0276ms]\n", - "Epoch: [ 5/ 10], step: [ 263/ 390], loss: [0.4144], avg loss: [0.3442], time: [103.0836ms]\n", - "Epoch: [ 5/ 10], step: [ 264/ 390], loss: [0.3417], avg loss: [0.3442], time: [99.4034ms]\n", - "Epoch: [ 5/ 10], step: [ 265/ 390], loss: [0.3956], avg loss: [0.3444], time: [102.0124ms]\n", - "Epoch: [ 5/ 10], step: [ 266/ 390], loss: [0.4007], avg loss: [0.3446], time: [103.3103ms]\n", - "Epoch: [ 5/ 10], step: [ 267/ 390], loss: [0.3253], avg loss: [0.3446], time: [99.8549ms]\n", - "Epoch: [ 5/ 10], step: [ 268/ 390], loss: [0.3239], avg loss: [0.3445], time: [99.4136ms]\n", - "Epoch: [ 5/ 10], step: [ 269/ 390], loss: [0.2131], avg loss: [0.3440], time: [103.1957ms]\n", - "Epoch: [ 5/ 10], step: [ 270/ 390], loss: [0.3470], avg loss: [0.3440], time: [99.1795ms]\n", - "Epoch: [ 5/ 10], step: [ 271/ 390], loss: [0.2773], avg loss: [0.3438], time: [99.3474ms]\n", - "Epoch: [ 5/ 10], step: [ 272/ 390], loss: [0.4068], avg loss: [0.3440], time: [104.6441ms]\n", - "Epoch: [ 5/ 10], step: [ 273/ 390], loss: [0.2524], avg loss: [0.3437], time: [102.6011ms]\n", - "Epoch: [ 5/ 10], step: [ 274/ 390], loss: [0.2715], avg loss: [0.3434], time: [100.2104ms]\n", - "Epoch: [ 5/ 10], step: [ 275/ 390], loss: [0.2724], avg loss: [0.3431], time: [102.6239ms]\n", - "Epoch: [ 5/ 10], step: [ 276/ 390], loss: [0.4075], avg loss: [0.3434], time: [99.4756ms]\n", - "Epoch: [ 5/ 10], step: [ 277/ 390], loss: [0.1439], avg loss: [0.3426], time: [98.9230ms]\n", - "Epoch: [ 5/ 10], step: [ 278/ 390], loss: [0.2628], avg loss: [0.3424], time: [101.5642ms]\n", - "Epoch: [ 5/ 10], step: [ 279/ 390], loss: [0.2270], avg loss: [0.3419], time: [100.0366ms]\n", - "Epoch: [ 5/ 10], step: [ 280/ 390], loss: [0.3230], avg loss: [0.3419], time: [102.0269ms]\n", - "Epoch: [ 5/ 10], step: [ 281/ 390], loss: [0.3329], avg loss: [0.3418], time: [104.5034ms]\n", - "Epoch: [ 5/ 10], step: [ 282/ 390], loss: [0.3126], avg loss: [0.3417], time: [102.5646ms]\n", - "Epoch: [ 5/ 10], step: [ 283/ 390], loss: [0.3559], avg loss: [0.3418], time: [104.7299ms]\n", - "Epoch: [ 5/ 10], step: [ 284/ 390], loss: [0.4573], avg loss: [0.3422], time: [100.5504ms]\n", - "Epoch: [ 5/ 10], step: [ 285/ 390], loss: [0.3536], avg loss: [0.3422], time: [101.4774ms]\n", - "Epoch: [ 5/ 10], step: [ 286/ 390], loss: [0.2524], avg loss: [0.3419], time: [101.6161ms]\n", - "Epoch: [ 5/ 10], step: [ 287/ 390], loss: [0.4055], avg loss: [0.3421], time: [102.3798ms]\n", - "Epoch: [ 5/ 10], step: [ 288/ 390], loss: [0.2159], avg loss: [0.3417], time: [104.3766ms]\n", - "Epoch: [ 5/ 10], step: [ 289/ 390], loss: [0.3166], avg loss: [0.3416], time: [104.6736ms]\n", - "Epoch: [ 5/ 10], step: [ 290/ 390], loss: [0.3783], avg loss: [0.3417], time: [100.4434ms]\n", - "Epoch: [ 5/ 10], step: [ 291/ 390], loss: [0.4178], avg loss: [0.3420], time: [99.9520ms]\n", - "Epoch: [ 5/ 10], step: [ 292/ 390], loss: [0.4205], avg loss: [0.3423], time: [101.4216ms]\n", - "Epoch: [ 5/ 10], step: [ 293/ 390], loss: [0.3483], avg loss: [0.3423], time: [102.9370ms]\n", - "Epoch: [ 5/ 10], step: [ 294/ 390], loss: [0.5168], avg loss: [0.3429], time: [100.2884ms]\n", - "Epoch: [ 5/ 10], step: [ 295/ 390], loss: [0.4163], avg loss: [0.3431], time: [99.8797ms]\n", - "Epoch: [ 5/ 10], step: [ 296/ 390], loss: [0.3834], avg loss: [0.3433], time: [101.5699ms]\n", - "Epoch: [ 5/ 10], step: [ 297/ 390], loss: [0.3833], avg loss: [0.3434], time: [100.7218ms]\n", - "Epoch: [ 5/ 10], step: [ 298/ 390], loss: [0.4084], avg loss: [0.3436], time: [101.0218ms]\n", - "Epoch: [ 5/ 10], step: [ 299/ 390], loss: [0.4530], avg loss: [0.3440], time: [104.2936ms]\n", - "Epoch: [ 5/ 10], step: [ 300/ 390], loss: [0.2934], avg loss: [0.3438], time: [103.4582ms]\n", - "Epoch: [ 5/ 10], step: [ 301/ 390], loss: [0.4108], avg loss: [0.3441], time: [98.2459ms]\n" + "epoch: 5 step: 213, loss is 0.4016\n", + "epoch: 5 step: 214, loss is 0.2758\n", + "epoch: 5 step: 215, loss is 0.4611\n", + "epoch: 5 step: 216, loss is 0.3102\n", + "epoch: 5 step: 217, loss is 0.3919\n", + "epoch: 5 step: 218, loss is 0.3644\n", + "epoch: 5 step: 219, loss is 0.3343\n", + "epoch: 5 step: 220, loss is 0.3409\n", + "epoch: 5 step: 221, loss is 0.3408\n", + "epoch: 5 step: 222, loss is 0.3310\n", + "epoch: 5 step: 223, loss is 0.3425\n", + "epoch: 5 step: 224, loss is 0.2430\n", + "epoch: 5 step: 225, loss is 0.2700\n", + "epoch: 5 step: 226, loss is 0.4033\n", + "epoch: 5 step: 227, loss is 0.3329\n", + "epoch: 5 step: 228, loss is 0.4596\n", + "epoch: 5 step: 229, loss is 0.3272\n", + "epoch: 5 step: 230, loss is 0.2274\n", + "epoch: 5 step: 231, loss is 0.4503\n", + "epoch: 5 step: 232, loss is 0.2505\n", + "epoch: 5 step: 233, loss is 0.3719\n", + "epoch: 5 step: 234, loss is 0.2949\n", + "epoch: 5 step: 235, loss is 0.3854\n", + "epoch: 5 step: 236, loss is 0.5405\n", + "epoch: 5 step: 237, loss is 0.3014\n", + "epoch: 5 step: 238, loss is 0.3945\n", + "epoch: 5 step: 239, loss is 0.3244\n", + "epoch: 5 step: 240, loss is 0.4346\n", + "epoch: 5 step: 241, loss is 0.3247\n", + "epoch: 5 step: 242, loss is 0.4067\n", + "epoch: 5 step: 243, loss is 0.4058\n", + "epoch: 5 step: 244, loss is 0.3316\n", + "epoch: 5 step: 245, loss is 0.3552\n", + "epoch: 5 step: 246, loss is 0.2829\n", + "epoch: 5 step: 247, loss is 0.3828\n", + "epoch: 5 step: 248, loss is 0.3679\n", + "epoch: 5 step: 249, loss is 0.3295\n", + "epoch: 5 step: 250, loss is 0.2944\n", + "epoch: 5 step: 251, loss is 0.2622\n", + "epoch: 5 step: 252, loss is 0.4662\n", + "epoch: 5 step: 253, loss is 0.4145\n", + "epoch: 5 step: 254, loss is 0.2235\n", + "epoch: 5 step: 255, loss is 0.3826\n", + "epoch: 5 step: 256, loss is 0.4591\n", + "epoch: 5 step: 257, loss is 0.2777\n", + "epoch: 5 step: 258, loss is 0.3017\n", + "epoch: 5 step: 259, loss is 0.1980\n", + "epoch: 5 step: 260, loss is 0.3733\n", + "epoch: 5 step: 261, loss is 0.3896\n", + "epoch: 5 step: 262, loss is 0.3417\n", + "epoch: 5 step: 263, loss is 0.4144\n", + "epoch: 5 step: 264, loss is 0.3417\n", + "epoch: 5 step: 265, loss is 0.3956\n", + "epoch: 5 step: 266, loss is 0.4007\n", + "epoch: 5 step: 267, loss is 0.3253\n", + "epoch: 5 step: 268, loss is 0.3239\n", + "epoch: 5 step: 269, loss is 0.2131\n", + "epoch: 5 step: 270, loss is 0.3470\n", + "epoch: 5 step: 271, loss is 0.2773\n", + "epoch: 5 step: 272, loss is 0.4068\n", + "epoch: 5 step: 273, loss is 0.2524\n", + "epoch: 5 step: 274, loss is 0.2715\n", + "epoch: 5 step: 275, loss is 0.2724\n", + "epoch: 5 step: 276, loss is 0.4075\n", + "epoch: 5 step: 277, loss is 0.1439\n", + "epoch: 5 step: 278, loss is 0.2628\n", + "epoch: 5 step: 279, loss is 0.2270\n", + "epoch: 5 step: 280, loss is 0.3230\n", + "epoch: 5 step: 281, loss is 0.3329\n", + "epoch: 5 step: 282, loss is 0.3126\n", + "epoch: 5 step: 283, loss is 0.3559\n", + "epoch: 5 step: 284, loss is 0.4573\n", + "epoch: 5 step: 285, loss is 0.3536\n", + "epoch: 5 step: 286, loss is 0.2524\n", + "epoch: 5 step: 287, loss is 0.4055\n", + "epoch: 5 step: 288, loss is 0.2159\n", + "epoch: 5 step: 289, loss is 0.3166\n", + "epoch: 5 step: 290, loss is 0.3783\n", + "epoch: 5 step: 291, loss is 0.4178\n", + "epoch: 5 step: 292, loss is 0.4205\n", + "epoch: 5 step: 293, loss is 0.3483\n", + "epoch: 5 step: 294, loss is 0.5168\n", + "epoch: 5 step: 295, loss is 0.4163\n", + "epoch: 5 step: 296, loss is 0.3834\n", + "epoch: 5 step: 297, loss is 0.3833\n", + "epoch: 5 step: 298, loss is 0.4084\n", + "epoch: 5 step: 299, loss is 0.4530\n", + "epoch: 5 step: 300, loss is 0.2934\n", + "epoch: 5 step: 301, loss is 0.4108\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 5/ 10], step: [ 302/ 390], loss: [0.3439], avg loss: [0.3440], time: [102.3622ms]\n", - "Epoch: [ 5/ 10], step: [ 303/ 390], loss: [0.4070], avg loss: [0.3443], time: [104.6326ms]\n", - "Epoch: [ 5/ 10], step: [ 304/ 390], loss: [0.4360], avg loss: [0.3446], time: [100.9424ms]\n", - "Epoch: [ 5/ 10], step: [ 305/ 390], loss: [0.4695], avg loss: [0.3450], time: [98.6810ms]\n", - "Epoch: [ 5/ 10], step: [ 306/ 390], loss: [0.2571], avg loss: [0.3447], time: [101.9230ms]\n", - "Epoch: [ 5/ 10], step: [ 307/ 390], loss: [0.2597], avg loss: [0.3444], time: [98.5708ms]\n", - "Epoch: [ 5/ 10], step: [ 308/ 390], loss: [0.3709], avg loss: [0.3445], time: [98.8483ms]\n", - "Epoch: [ 5/ 10], step: [ 309/ 390], loss: [0.2729], avg loss: [0.3443], time: [100.6372ms]\n", - "Epoch: [ 5/ 10], step: [ 310/ 390], loss: [0.3060], avg loss: [0.3441], time: [100.9982ms]\n", - "Epoch: [ 5/ 10], step: [ 311/ 390], loss: [0.2724], avg loss: [0.3439], time: [102.2642ms]\n", - "Epoch: [ 5/ 10], step: [ 312/ 390], loss: [0.4042], avg loss: [0.3441], time: [101.1384ms]\n", - "Epoch: [ 5/ 10], step: [ 313/ 390], loss: [0.3170], avg loss: [0.3440], time: [101.8574ms]\n", - "Epoch: [ 5/ 10], step: [ 314/ 390], loss: [0.2852], avg loss: [0.3438], time: [100.0388ms]\n", - "Epoch: [ 5/ 10], step: [ 315/ 390], loss: [0.3810], avg loss: [0.3439], time: [103.9889ms]\n", - "Epoch: [ 5/ 10], step: [ 316/ 390], loss: [0.4999], avg loss: [0.3444], time: [104.8150ms]\n", - "Epoch: [ 5/ 10], step: [ 317/ 390], loss: [0.3802], avg loss: [0.3445], time: [104.3453ms]\n", - "Epoch: [ 5/ 10], step: [ 318/ 390], loss: [0.4756], avg loss: [0.3450], time: [100.0638ms]\n", - "Epoch: [ 5/ 10], step: [ 319/ 390], loss: [0.2718], avg loss: [0.3447], time: [102.3681ms]\n", - "Epoch: [ 5/ 10], step: [ 320/ 390], loss: [0.4197], avg loss: [0.3450], time: [101.9974ms]\n", - "Epoch: [ 5/ 10], step: [ 321/ 390], loss: [0.2601], avg loss: [0.3447], time: [104.8346ms]\n", - "Epoch: [ 5/ 10], step: [ 322/ 390], loss: [0.2091], avg loss: [0.3443], time: [98.9163ms]\n", - "Epoch: [ 5/ 10], step: [ 323/ 390], loss: [0.4082], avg loss: [0.3445], time: [103.3297ms]\n", - "Epoch: [ 5/ 10], step: [ 324/ 390], loss: [0.2823], avg loss: [0.3443], time: [98.2492ms]\n", - "Epoch: [ 5/ 10], step: [ 325/ 390], loss: [0.3926], avg loss: [0.3444], time: [100.7259ms]\n", - "Epoch: [ 5/ 10], step: [ 326/ 390], loss: [0.2773], avg loss: [0.3442], time: [102.9773ms]\n", - "Epoch: [ 5/ 10], step: [ 327/ 390], loss: [0.4278], avg loss: [0.3445], time: [102.3576ms]\n", - "Epoch: [ 5/ 10], step: [ 328/ 390], loss: [0.2811], avg loss: [0.3443], time: [100.8708ms]\n", - "Epoch: [ 5/ 10], step: [ 329/ 390], loss: [0.2949], avg loss: [0.3441], time: [102.0057ms]\n", - "Epoch: [ 5/ 10], step: [ 330/ 390], loss: [0.3619], avg loss: [0.3442], time: [98.7999ms]\n", - "Epoch: [ 5/ 10], step: [ 331/ 390], loss: [0.3774], avg loss: [0.3443], time: [103.1680ms]\n", - "Epoch: [ 5/ 10], step: [ 332/ 390], loss: [0.3439], avg loss: [0.3443], time: [100.1785ms]\n", - "Epoch: [ 5/ 10], step: [ 333/ 390], loss: [0.3816], avg loss: [0.3444], time: [99.3967ms]\n", - "Epoch: [ 5/ 10], step: [ 334/ 390], loss: [0.3978], avg loss: [0.3446], time: [97.2393ms]\n", - "Epoch: [ 5/ 10], step: [ 335/ 390], loss: [0.3064], avg loss: [0.3445], time: [102.1647ms]\n", - "Epoch: [ 5/ 10], step: [ 336/ 390], loss: [0.4427], avg loss: [0.3447], time: [102.5717ms]\n", - "Epoch: [ 5/ 10], step: [ 337/ 390], loss: [0.3247], avg loss: [0.3447], time: [101.8920ms]\n", - "Epoch: [ 5/ 10], step: [ 338/ 390], loss: [0.3244], avg loss: [0.3446], time: [100.7366ms]\n", - "Epoch: [ 5/ 10], step: [ 339/ 390], loss: [0.4572], avg loss: [0.3450], time: [99.7155ms]\n", - "Epoch: [ 5/ 10], step: [ 340/ 390], loss: [0.3603], avg loss: [0.3450], time: [102.0741ms]\n", - "Epoch: [ 5/ 10], step: [ 341/ 390], loss: [0.2594], avg loss: [0.3448], time: [103.5466ms]\n", - "Epoch: [ 5/ 10], step: [ 342/ 390], loss: [0.4625], avg loss: [0.3451], time: [102.2420ms]\n", - "Epoch: [ 5/ 10], step: [ 343/ 390], loss: [0.4464], avg loss: [0.3454], time: [103.6968ms]\n", - "Epoch: [ 5/ 10], step: [ 344/ 390], loss: [0.3788], avg loss: [0.3455], time: [102.4415ms]\n", - "Epoch: [ 5/ 10], step: [ 345/ 390], loss: [0.3054], avg loss: [0.3454], time: [104.5787ms]\n", - "Epoch: [ 5/ 10], step: [ 346/ 390], loss: [0.4174], avg loss: [0.3456], time: [102.5310ms]\n", - "Epoch: [ 5/ 10], step: [ 347/ 390], loss: [0.2062], avg loss: [0.3452], time: [105.1259ms]\n", - "Epoch: [ 5/ 10], step: [ 348/ 390], loss: [0.3455], avg loss: [0.3452], time: [99.9708ms]\n", - "Epoch: [ 5/ 10], step: [ 349/ 390], loss: [0.4392], avg loss: [0.3454], time: [102.7250ms]\n", - "Epoch: [ 5/ 10], step: [ 350/ 390], loss: [0.3018], avg loss: [0.3453], time: [103.0922ms]\n", - "Epoch: [ 5/ 10], step: [ 351/ 390], loss: [0.2346], avg loss: [0.3450], time: [103.0869ms]\n", - "Epoch: [ 5/ 10], step: [ 352/ 390], loss: [0.2619], avg loss: [0.3448], time: [102.4044ms]\n", - "Epoch: [ 5/ 10], step: [ 353/ 390], loss: [0.2922], avg loss: [0.3446], time: [99.6437ms]\n", - "Epoch: [ 5/ 10], step: [ 354/ 390], loss: [0.2231], avg loss: [0.3443], time: [98.7210ms]\n", - "Epoch: [ 5/ 10], step: [ 355/ 390], loss: [0.4164], avg loss: [0.3445], time: [100.8945ms]\n", - "Epoch: [ 5/ 10], step: [ 356/ 390], loss: [0.2650], avg loss: [0.3443], time: [104.3913ms]\n", - "Epoch: [ 5/ 10], step: [ 357/ 390], loss: [0.2103], avg loss: [0.3439], time: [104.9824ms]\n", - "Epoch: [ 5/ 10], step: [ 358/ 390], loss: [0.4690], avg loss: [0.3442], time: [98.4094ms]\n", - "Epoch: [ 5/ 10], step: [ 359/ 390], loss: [0.2352], avg loss: [0.3439], time: [99.5321ms]\n", - "Epoch: [ 5/ 10], step: [ 360/ 390], loss: [0.1806], avg loss: [0.3435], time: [101.3615ms]\n", - "Epoch: [ 5/ 10], step: [ 361/ 390], loss: [0.3843], avg loss: [0.3436], time: [98.7771ms]\n", - "Epoch: [ 5/ 10], step: [ 362/ 390], loss: [0.2840], avg loss: [0.3434], time: [102.2055ms]\n", - "Epoch: [ 5/ 10], step: [ 363/ 390], loss: [0.2744], avg loss: [0.3432], time: [99.4380ms]\n", - "Epoch: [ 5/ 10], step: [ 364/ 390], loss: [0.3938], avg loss: [0.3434], time: [99.0360ms]\n", - "Epoch: [ 5/ 10], step: [ 365/ 390], loss: [0.2933], avg loss: [0.3432], time: [100.8515ms]\n", - "Epoch: [ 5/ 10], step: [ 366/ 390], loss: [0.4054], avg loss: [0.3434], time: [100.8928ms]\n", - "Epoch: [ 5/ 10], step: [ 367/ 390], loss: [0.3868], avg loss: [0.3435], time: [103.4963ms]\n", - "Epoch: [ 5/ 10], step: [ 368/ 390], loss: [0.5758], avg loss: [0.3442], time: [99.6766ms]\n", - "Epoch: [ 5/ 10], step: [ 369/ 390], loss: [0.4107], avg loss: [0.3443], time: [99.5615ms]\n", - "Epoch: [ 5/ 10], step: [ 370/ 390], loss: [0.1999], avg loss: [0.3439], time: [100.8880ms]\n", - "Epoch: [ 5/ 10], step: [ 371/ 390], loss: [0.3547], avg loss: [0.3440], time: [99.0014ms]\n", - "Epoch: [ 5/ 10], step: [ 372/ 390], loss: [0.4353], avg loss: [0.3442], time: [102.4394ms]\n", - "Epoch: [ 5/ 10], step: [ 373/ 390], loss: [0.4284], avg loss: [0.3444], time: [102.0892ms]\n", - "Epoch: [ 5/ 10], step: [ 374/ 390], loss: [0.4428], avg loss: [0.3447], time: [98.7518ms]\n", - "Epoch: [ 5/ 10], step: [ 375/ 390], loss: [0.3787], avg loss: [0.3448], time: [105.1798ms]\n", - "Epoch: [ 5/ 10], step: [ 376/ 390], loss: [0.4395], avg loss: [0.3451], time: [99.2367ms]\n", - "Epoch: [ 5/ 10], step: [ 377/ 390], loss: [0.4732], avg loss: [0.3454], time: [103.2321ms]\n", - "Epoch: [ 5/ 10], step: [ 378/ 390], loss: [0.5450], avg loss: [0.3459], time: [102.8581ms]\n", - "Epoch: [ 5/ 10], step: [ 379/ 390], loss: [0.4199], avg loss: [0.3461], time: [98.7153ms]\n", - "Epoch: [ 5/ 10], step: [ 380/ 390], loss: [0.3545], avg loss: [0.3461], time: [102.5324ms]\n", - "Epoch: [ 5/ 10], step: [ 381/ 390], loss: [0.3200], avg loss: [0.3461], time: [103.4598ms]\n", - "Epoch: [ 5/ 10], step: [ 382/ 390], loss: [0.2886], avg loss: [0.3459], time: [102.0243ms]\n", - "Epoch: [ 5/ 10], step: [ 383/ 390], loss: [0.4360], avg loss: [0.3462], time: [101.8207ms]\n", - "Epoch: [ 5/ 10], step: [ 384/ 390], loss: [0.3312], avg loss: [0.3461], time: [100.0907ms]\n", - "Epoch: [ 5/ 10], step: [ 385/ 390], loss: [0.4088], avg loss: [0.3463], time: [99.1423ms]\n", - "Epoch: [ 5/ 10], step: [ 386/ 390], loss: [0.2987], avg loss: [0.3462], time: [102.1488ms]\n", - "Epoch: [ 5/ 10], step: [ 387/ 390], loss: [0.3314], avg loss: [0.3461], time: [104.4145ms]\n", - "Epoch: [ 5/ 10], step: [ 388/ 390], loss: [0.3461], avg loss: [0.3461], time: [99.1771ms]\n", - "Epoch: [ 5/ 10], step: [ 389/ 390], loss: [0.2056], avg loss: [0.3458], time: [98.6011ms]\n", - "Epoch: [ 5/ 10], step: [ 390/ 390], loss: [0.3620], avg loss: [0.3458], time: [866.3347ms]\n" + "epoch: 5 step: 302, loss is 0.3439\n", + "epoch: 5 step: 303, loss is 0.4070\n", + "epoch: 5 step: 304, loss is 0.4360\n", + "epoch: 5 step: 305, loss is 0.4695\n", + "epoch: 5 step: 306, loss is 0.2571\n", + "epoch: 5 step: 307, loss is 0.2597\n", + "epoch: 5 step: 308, loss is 0.3709\n", + "epoch: 5 step: 309, loss is 0.2729\n", + "epoch: 5 step: 310, loss is 0.3060\n", + "epoch: 5 step: 311, loss is 0.2724\n", + "epoch: 5 step: 312, loss is 0.4042\n", + "epoch: 5 step: 313, loss is 0.3170\n", + "epoch: 5 step: 314, loss is 0.2852\n", + "epoch: 5 step: 315, loss is 0.3810\n", + "epoch: 5 step: 316, loss is 0.4999\n", + "epoch: 5 step: 317, loss is 0.3802\n", + "epoch: 5 step: 318, loss is 0.4756\n", + "epoch: 5 step: 319, loss is 0.2718\n", + "epoch: 5 step: 320, loss is 0.4197\n", + "epoch: 5 step: 321, loss is 0.2601\n", + "epoch: 5 step: 322, loss is 0.2091\n", + "epoch: 5 step: 323, loss is 0.4082\n", + "epoch: 5 step: 324, loss is 0.2823\n", + "epoch: 5 step: 325, loss is 0.3926\n", + "epoch: 5 step: 326, loss is 0.2773\n", + "epoch: 5 step: 327, loss is 0.4278\n", + "epoch: 5 step: 328, loss is 0.2811\n", + "epoch: 5 step: 329, loss is 0.2949\n", + "epoch: 5 step: 330, loss is 0.3619\n", + "epoch: 5 step: 331, loss is 0.3774\n", + "epoch: 5 step: 332, loss is 0.3439\n", + "epoch: 5 step: 333, loss is 0.3816\n", + "epoch: 5 step: 334, loss is 0.3978\n", + "epoch: 5 step: 335, loss is 0.3064\n", + "epoch: 5 step: 336, loss is 0.4427\n", + "epoch: 5 step: 337, loss is 0.3247\n", + "epoch: 5 step: 338, loss is 0.3244\n", + "epoch: 5 step: 339, loss is 0.4572\n", + "epoch: 5 step: 340, loss is 0.3603\n", + "epoch: 5 step: 341, loss is 0.2594\n", + "epoch: 5 step: 342, loss is 0.4625\n", + "epoch: 5 step: 343, loss is 0.4464\n", + "epoch: 5 step: 344, loss is 0.3788\n", + "epoch: 5 step: 345, loss is 0.3054\n", + "epoch: 5 step: 346, loss is 0.4174\n", + "epoch: 5 step: 347, loss is 0.2062\n", + "epoch: 5 step: 348, loss is 0.3455\n", + "epoch: 5 step: 349, loss is 0.4392\n", + "epoch: 5 step: 350, loss is 0.3018\n", + "epoch: 5 step: 351, loss is 0.2346\n", + "epoch: 5 step: 352, loss is 0.2619\n", + "epoch: 5 step: 353, loss is 0.2922\n", + "epoch: 5 step: 354, loss is 0.2231\n", + "epoch: 5 step: 355, loss is 0.4164\n", + "epoch: 5 step: 356, loss is 0.2650\n", + "epoch: 5 step: 357, loss is 0.2103\n", + "epoch: 5 step: 358, loss is 0.4690\n", + "epoch: 5 step: 359, loss is 0.2352\n", + "epoch: 5 step: 360, loss is 0.1806\n", + "epoch: 5 step: 361, loss is 0.3843\n", + "epoch: 5 step: 362, loss is 0.2840\n", + "epoch: 5 step: 363, loss is 0.2744\n", + "epoch: 5 step: 364, loss is 0.3938\n", + "epoch: 5 step: 365, loss is 0.2933\n", + "epoch: 5 step: 366, loss is 0.4054\n", + "epoch: 5 step: 367, loss is 0.3868\n", + "epoch: 5 step: 368, loss is 0.5758\n", + "epoch: 5 step: 369, loss is 0.4107\n", + "epoch: 5 step: 370, loss is 0.1999\n", + "epoch: 5 step: 371, loss is 0.3547\n", + "epoch: 5 step: 372, loss is 0.4353\n", + "epoch: 5 step: 373, loss is 0.4284\n", + "epoch: 5 step: 374, loss is 0.4428\n", + "epoch: 5 step: 375, loss is 0.3787\n", + "epoch: 5 step: 376, loss is 0.4395\n", + "epoch: 5 step: 377, loss is 0.4732\n", + "epoch: 5 step: 378, loss is 0.5450\n", + "epoch: 5 step: 379, loss is 0.4199\n", + "epoch: 5 step: 380, loss is 0.3545\n", + "epoch: 5 step: 381, loss is 0.3200\n", + "epoch: 5 step: 382, loss is 0.2886\n", + "epoch: 5 step: 383, loss is 0.4360\n", + "epoch: 5 step: 384, loss is 0.3312\n", + "epoch: 5 step: 385, loss is 0.4088\n", + "epoch: 5 step: 386, loss is 0.2987\n", + "epoch: 5 step: 387, loss is 0.3314\n", + "epoch: 5 step: 388, loss is 0.3461\n", + "epoch: 5 step: 389, loss is 0.2056\n", + "epoch: 5 step: 390, loss is 0.3620\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch time: 40546.816, per step time: 103.966\n", "Epoch time: 40547.118, per step time: 103.967, avg loss: 0.346\n", "************************************************************\n", - "Epoch: [ 6/ 10], step: [ 1/ 390], loss: [0.3137], avg loss: [0.3137], time: [102.8788ms]\n", - "Epoch: [ 6/ 10], step: [ 2/ 390], loss: [0.3295], avg loss: [0.3216], time: [107.4462ms]\n", - "Epoch: [ 6/ 10], step: [ 3/ 390], loss: [0.4285], avg loss: [0.3572], time: [107.7762ms]\n", - "Epoch: [ 6/ 10], step: [ 4/ 390], loss: [0.2917], avg loss: [0.3409], time: [104.9762ms]\n", - "Epoch: [ 6/ 10], step: [ 5/ 390], loss: [0.3357], avg loss: [0.3398], time: [104.1481ms]\n", - "Epoch: [ 6/ 10], step: [ 6/ 390], loss: [0.3456], avg loss: [0.3408], time: [105.6588ms]\n", - "Epoch: [ 6/ 10], step: [ 7/ 390], loss: [0.4375], avg loss: [0.3546], time: [105.3269ms]\n", - "Epoch: [ 6/ 10], step: [ 8/ 390], loss: [0.3685], avg loss: [0.3563], time: [100.5785ms]\n", - "Epoch: [ 6/ 10], step: [ 9/ 390], loss: [0.2734], avg loss: [0.3471], time: [106.1952ms]\n", - "Epoch: [ 6/ 10], step: [ 10/ 390], loss: [0.2983], avg loss: [0.3422], time: [103.3828ms]\n", - "Epoch: [ 6/ 10], step: [ 11/ 390], loss: [0.3373], avg loss: [0.3418], time: [108.0239ms]\n", - "Epoch: [ 6/ 10], step: [ 12/ 390], loss: [0.3792], avg loss: [0.3449], time: [104.2621ms]\n", - "Epoch: [ 6/ 10], step: [ 13/ 390], loss: [0.2534], avg loss: [0.3379], time: [104.8844ms]\n", - "Epoch: [ 6/ 10], step: [ 14/ 390], loss: [0.2555], avg loss: [0.3320], time: [106.4305ms]\n", - "Epoch: [ 6/ 10], step: [ 15/ 390], loss: [0.2536], avg loss: [0.3268], time: [107.0900ms]\n", - "Epoch: [ 6/ 10], step: [ 16/ 390], loss: [0.2763], avg loss: [0.3236], time: [105.6392ms]\n", - "Epoch: [ 6/ 10], step: [ 17/ 390], loss: [0.3496], avg loss: [0.3251], time: [108.0658ms]\n", - "Epoch: [ 6/ 10], step: [ 18/ 390], loss: [0.2546], avg loss: [0.3212], time: [103.7407ms]\n", - "Epoch: [ 6/ 10], step: [ 19/ 390], loss: [0.4003], avg loss: [0.3254], time: [103.9438ms]\n", - "Epoch: [ 6/ 10], step: [ 20/ 390], loss: [0.4276], avg loss: [0.3305], time: [106.1435ms]\n", - "Epoch: [ 6/ 10], step: [ 21/ 390], loss: [0.3958], avg loss: [0.3336], time: [67.6832ms]\n", - "Epoch: [ 6/ 10], step: [ 22/ 390], loss: [0.2281], avg loss: [0.3288], time: [105.9005ms]\n", - "Epoch: [ 6/ 10], step: [ 23/ 390], loss: [0.3480], avg loss: [0.3296], time: [106.1544ms]\n", - "Epoch: [ 6/ 10], step: [ 24/ 390], loss: [0.3870], avg loss: [0.3320], time: [105.8366ms]\n", - "Epoch: [ 6/ 10], step: [ 25/ 390], loss: [0.2697], avg loss: [0.3295], time: [106.5907ms]\n", - "Epoch: [ 6/ 10], step: [ 26/ 390], loss: [0.2907], avg loss: [0.3280], time: [103.4799ms]\n", - "Epoch: [ 6/ 10], step: [ 27/ 390], loss: [0.3572], avg loss: [0.3291], time: [108.3992ms]\n", - "Epoch: [ 6/ 10], step: [ 28/ 390], loss: [0.3893], avg loss: [0.3313], time: [107.5842ms]\n", - "Epoch: [ 6/ 10], step: [ 29/ 390], loss: [0.2259], avg loss: [0.3276], time: [105.2296ms]\n", - "Epoch: [ 6/ 10], step: [ 30/ 390], loss: [0.3245], avg loss: [0.3275], time: [100.3568ms]\n", - "Epoch: [ 6/ 10], step: [ 31/ 390], loss: [0.3229], avg loss: [0.3274], time: [103.5793ms]\n", - "Epoch: [ 6/ 10], step: [ 32/ 390], loss: [0.4215], avg loss: [0.3303], time: [101.8555ms]\n", - "Epoch: [ 6/ 10], step: [ 33/ 390], loss: [0.3496], avg loss: [0.3309], time: [107.1312ms]\n", - "Epoch: [ 6/ 10], step: [ 34/ 390], loss: [0.2681], avg loss: [0.3291], time: [99.5495ms]\n", - "Epoch: [ 6/ 10], step: [ 35/ 390], loss: [0.2482], avg loss: [0.3268], time: [104.9037ms]\n", - "Epoch: [ 6/ 10], step: [ 36/ 390], loss: [0.2724], avg loss: [0.3252], time: [103.7097ms]\n", - "Epoch: [ 6/ 10], step: [ 37/ 390], loss: [0.2379], avg loss: [0.3229], time: [104.9495ms]\n", - "Epoch: [ 6/ 10], step: [ 38/ 390], loss: [0.3819], avg loss: [0.3244], time: [107.4395ms]\n", - "Epoch: [ 6/ 10], step: [ 39/ 390], loss: [0.3537], avg loss: [0.3252], time: [102.6006ms]\n", - "Epoch: [ 6/ 10], step: [ 40/ 390], loss: [0.4310], avg loss: [0.3278], time: [103.6968ms]\n", - "Epoch: [ 6/ 10], step: [ 41/ 390], loss: [0.2783], avg loss: [0.3266], time: [105.6654ms]\n", - "Epoch: [ 6/ 10], step: [ 42/ 390], loss: [0.2990], avg loss: [0.3260], time: [104.0280ms]\n", - "Epoch: [ 6/ 10], step: [ 43/ 390], loss: [0.2777], avg loss: [0.3248], time: [106.4887ms]\n", - "Epoch: [ 6/ 10], step: [ 44/ 390], loss: [0.3549], avg loss: [0.3255], time: [100.8370ms]\n", - "Epoch: [ 6/ 10], step: [ 45/ 390], loss: [0.3157], avg loss: [0.3253], time: [102.5386ms]\n", - "Epoch: [ 6/ 10], step: [ 46/ 390], loss: [0.3321], avg loss: [0.3255], time: [101.4981ms]\n", - "Epoch: [ 6/ 10], step: [ 47/ 390], loss: [0.3563], avg loss: [0.3261], time: [107.5685ms]\n", - "Epoch: [ 6/ 10], step: [ 48/ 390], loss: [0.4130], avg loss: [0.3279], time: [104.3987ms]\n", - "Epoch: [ 6/ 10], step: [ 49/ 390], loss: [0.3645], avg loss: [0.3287], time: [105.2272ms]\n", - "Epoch: [ 6/ 10], step: [ 50/ 390], loss: [0.2529], avg loss: [0.3272], time: [101.3525ms]\n", - "Epoch: [ 6/ 10], step: [ 51/ 390], loss: [0.2823], avg loss: [0.3263], time: [106.8044ms]\n", - "Epoch: [ 6/ 10], step: [ 52/ 390], loss: [0.3664], avg loss: [0.3270], time: [104.0533ms]\n", - "Epoch: [ 6/ 10], step: [ 53/ 390], loss: [0.2778], avg loss: [0.3261], time: [104.0053ms]\n", - "Epoch: [ 6/ 10], step: [ 54/ 390], loss: [0.2984], avg loss: [0.3256], time: [103.2319ms]\n", - "Epoch: [ 6/ 10], step: [ 55/ 390], loss: [0.2269], avg loss: [0.3238], time: [103.8632ms]\n", - "Epoch: [ 6/ 10], step: [ 56/ 390], loss: [0.4109], avg loss: [0.3254], time: [100.3292ms]\n", - "Epoch: [ 6/ 10], step: [ 57/ 390], loss: [0.4286], avg loss: [0.3272], time: [103.4818ms]\n", - "Epoch: [ 6/ 10], step: [ 58/ 390], loss: [0.2945], avg loss: [0.3266], time: [101.8953ms]\n", - "Epoch: [ 6/ 10], step: [ 59/ 390], loss: [0.4755], avg loss: [0.3291], time: [104.1601ms]\n", - "Epoch: [ 6/ 10], step: [ 60/ 390], loss: [0.4181], avg loss: [0.3306], time: [103.5502ms]\n", - "Epoch: [ 6/ 10], step: [ 61/ 390], loss: [0.4213], avg loss: [0.3321], time: [105.1967ms]\n", - "Epoch: [ 6/ 10], step: [ 62/ 390], loss: [0.1686], avg loss: [0.3295], time: [105.5331ms]\n", - "Epoch: [ 6/ 10], step: [ 63/ 390], loss: [0.2477], avg loss: [0.3282], time: [106.0550ms]\n", - "Epoch: [ 6/ 10], step: [ 64/ 390], loss: [0.2404], avg loss: [0.3268], time: [106.8165ms]\n", - "Epoch: [ 6/ 10], step: [ 65/ 390], loss: [0.3538], avg loss: [0.3272], time: [109.0724ms]\n", - "Epoch: [ 6/ 10], step: [ 66/ 390], loss: [0.2904], avg loss: [0.3267], time: [104.3298ms]\n", - "Epoch: [ 6/ 10], step: [ 67/ 390], loss: [0.4119], avg loss: [0.3279], time: [108.0468ms]\n", - "Epoch: [ 6/ 10], step: [ 68/ 390], loss: [0.3131], avg loss: [0.3277], time: [105.8502ms]\n", - "Epoch: [ 6/ 10], step: [ 69/ 390], loss: [0.4042], avg loss: [0.3288], time: [108.2604ms]\n", - "Epoch: [ 6/ 10], step: [ 70/ 390], loss: [0.4035], avg loss: [0.3299], time: [104.0134ms]\n", - "Epoch: [ 6/ 10], step: [ 71/ 390], loss: [0.3474], avg loss: [0.3301], time: [106.9710ms]\n", - "Epoch: [ 6/ 10], step: [ 72/ 390], loss: [0.4037], avg loss: [0.3312], time: [102.5674ms]\n", - "Epoch: [ 6/ 10], step: [ 73/ 390], loss: [0.2797], avg loss: [0.3305], time: [102.9291ms]\n", - "Epoch: [ 6/ 10], step: [ 74/ 390], loss: [0.3334], avg loss: [0.3305], time: [106.0703ms]\n", - "Epoch: [ 6/ 10], step: [ 75/ 390], loss: [0.2892], avg loss: [0.3299], time: [102.3366ms]\n", - "Epoch: [ 6/ 10], step: [ 76/ 390], loss: [0.4234], avg loss: [0.3312], time: [105.3853ms]\n", - "Epoch: [ 6/ 10], step: [ 77/ 390], loss: [0.2536], avg loss: [0.3302], time: [105.9346ms]\n", - "Epoch: [ 6/ 10], step: [ 78/ 390], loss: [0.3701], avg loss: [0.3307], time: [102.2868ms]\n", - "Epoch: [ 6/ 10], step: [ 79/ 390], loss: [0.4579], avg loss: [0.3323], time: [103.2307ms]\n", - "Epoch: [ 6/ 10], step: [ 80/ 390], loss: [0.3049], avg loss: [0.3319], time: [101.6500ms]\n", - "Epoch: [ 6/ 10], step: [ 81/ 390], loss: [0.3158], avg loss: [0.3317], time: [102.8347ms]\n", - "Epoch: [ 6/ 10], step: [ 82/ 390], loss: [0.4254], avg loss: [0.3329], time: [105.4041ms]\n", - "Epoch: [ 6/ 10], step: [ 83/ 390], loss: [0.2563], avg loss: [0.3320], time: [103.4667ms]\n", - "Epoch: [ 6/ 10], step: [ 84/ 390], loss: [0.3178], avg loss: [0.3318], time: [101.6469ms]\n", - "Epoch: [ 6/ 10], step: [ 85/ 390], loss: [0.3254], avg loss: [0.3317], time: [103.3061ms]\n", - "Epoch: [ 6/ 10], step: [ 86/ 390], loss: [0.2758], avg loss: [0.3311], time: [102.5198ms]\n", - "Epoch: [ 6/ 10], step: [ 87/ 390], loss: [0.4271], avg loss: [0.3322], time: [103.7211ms]\n" + "epoch: 6 step: 1, loss is 0.3137\n", + "epoch: 6 step: 2, loss is 0.3295\n", + "epoch: 6 step: 3, loss is 0.4285\n", + "epoch: 6 step: 4, loss is 0.2917\n", + "epoch: 6 step: 5, loss is 0.3357\n", + "epoch: 6 step: 6, loss is 0.3456\n", + "epoch: 6 step: 7, loss is 0.4375\n", + "epoch: 6 step: 8, loss is 0.3685\n", + "epoch: 6 step: 9, loss is 0.2734\n", + "epoch: 6 step: 10, loss is 0.2983\n", + "epoch: 6 step: 11, loss is 0.3373\n", + "epoch: 6 step: 12, loss is 0.3792\n", + "epoch: 6 step: 13, loss is 0.2534\n", + "epoch: 6 step: 14, loss is 0.2555\n", + "epoch: 6 step: 15, loss is 0.2536\n", + "epoch: 6 step: 16, loss is 0.2763\n", + "epoch: 6 step: 17, loss is 0.3496\n", + "epoch: 6 step: 18, loss is 0.2546\n", + "epoch: 6 step: 19, loss is 0.4003\n", + "epoch: 6 step: 20, loss is 0.4276\n", + "epoch: 6 step: 21, loss is 0.3958\n", + "epoch: 6 step: 22, loss is 0.2281\n", + "epoch: 6 step: 23, loss is 0.3480\n", + "epoch: 6 step: 24, loss is 0.3870\n", + "epoch: 6 step: 25, loss is 0.2697\n", + "epoch: 6 step: 26, loss is 0.2907\n", + "epoch: 6 step: 27, loss is 0.3572\n", + "epoch: 6 step: 28, loss is 0.3893\n", + "epoch: 6 step: 29, loss is 0.2259\n", + "epoch: 6 step: 30, loss is 0.3245\n", + "epoch: 6 step: 31, loss is 0.3229\n", + "epoch: 6 step: 32, loss is 0.4215\n", + "epoch: 6 step: 33, loss is 0.3496\n", + "epoch: 6 step: 34, loss is 0.2681\n", + "epoch: 6 step: 35, loss is 0.2482\n", + "epoch: 6 step: 36, loss is 0.2724\n", + "epoch: 6 step: 37, loss is 0.2379\n", + "epoch: 6 step: 38, loss is 0.3819\n", + "epoch: 6 step: 39, loss is 0.3537\n", + "epoch: 6 step: 40, loss is 0.4310\n", + "epoch: 6 step: 41, loss is 0.2783\n", + "epoch: 6 step: 42, loss is 0.2990\n", + "epoch: 6 step: 43, loss is 0.2777\n", + "epoch: 6 step: 44, loss is 0.3549\n", + "epoch: 6 step: 45, loss is 0.3157\n", + "epoch: 6 step: 46, loss is 0.3321\n", + "epoch: 6 step: 47, loss is 0.3563\n", + "epoch: 6 step: 48, loss is 0.4130\n", + "epoch: 6 step: 49, loss is 0.3645\n", + "epoch: 6 step: 50, loss is 0.2529\n", + "epoch: 6 step: 51, loss is 0.2823\n", + "epoch: 6 step: 52, loss is 0.3664\n", + "epoch: 6 step: 53, loss is 0.2778\n", + "epoch: 6 step: 54, loss is 0.2984\n", + "epoch: 6 step: 55, loss is 0.2269\n", + "epoch: 6 step: 56, loss is 0.4109\n", + "epoch: 6 step: 57, loss is 0.4286\n", + "epoch: 6 step: 58, loss is 0.2945\n", + "epoch: 6 step: 59, loss is 0.4755\n", + "epoch: 6 step: 60, loss is 0.4181\n", + "epoch: 6 step: 61, loss is 0.4213\n", + "epoch: 6 step: 62, loss is 0.1686\n", + "epoch: 6 step: 63, loss is 0.2477\n", + "epoch: 6 step: 64, loss is 0.2404\n", + "epoch: 6 step: 65, loss is 0.3538\n", + "epoch: 6 step: 66, loss is 0.2904\n", + "epoch: 6 step: 67, loss is 0.4119\n", + "epoch: 6 step: 68, loss is 0.3131\n", + "epoch: 6 step: 69, loss is 0.4042\n", + "epoch: 6 step: 70, loss is 0.4035\n", + "epoch: 6 step: 71, loss is 0.3474\n", + "epoch: 6 step: 72, loss is 0.4037\n", + "epoch: 6 step: 73, loss is 0.2797\n", + "epoch: 6 step: 74, loss is 0.3334\n", + "epoch: 6 step: 75, loss is 0.2892\n", + "epoch: 6 step: 76, loss is 0.4234\n", + "epoch: 6 step: 77, loss is 0.2536\n", + "epoch: 6 step: 78, loss is 0.3701\n", + "epoch: 6 step: 79, loss is 0.4579\n", + "epoch: 6 step: 80, loss is 0.3049\n", + "epoch: 6 step: 81, loss is 0.3158\n", + "epoch: 6 step: 82, loss is 0.4254\n", + "epoch: 6 step: 83, loss is 0.2563\n", + "epoch: 6 step: 84, loss is 0.3178\n", + "epoch: 6 step: 85, loss is 0.3254\n", + "epoch: 6 step: 86, loss is 0.2758\n", + "epoch: 6 step: 87, loss is 0.4271\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 6/ 10], step: [ 88/ 390], loss: [0.3815], avg loss: [0.3327], time: [99.7779ms]\n", - "Epoch: [ 6/ 10], step: [ 89/ 390], loss: [0.3205], avg loss: [0.3326], time: [102.2894ms]\n", - "Epoch: [ 6/ 10], step: [ 90/ 390], loss: [0.1674], avg loss: [0.3308], time: [107.3177ms]\n", - "Epoch: [ 6/ 10], step: [ 91/ 390], loss: [0.3302], avg loss: [0.3308], time: [104.4667ms]\n", - "Epoch: [ 6/ 10], step: [ 92/ 390], loss: [0.3680], avg loss: [0.3312], time: [105.5598ms]\n", - "Epoch: [ 6/ 10], step: [ 93/ 390], loss: [0.3370], avg loss: [0.3312], time: [103.6875ms]\n", - "Epoch: [ 6/ 10], step: [ 94/ 390], loss: [0.3272], avg loss: [0.3312], time: [105.0935ms]\n", - "Epoch: [ 6/ 10], step: [ 95/ 390], loss: [0.3728], avg loss: [0.3316], time: [108.2509ms]\n", - "Epoch: [ 6/ 10], step: [ 96/ 390], loss: [0.2415], avg loss: [0.3307], time: [104.2969ms]\n", - "Epoch: [ 6/ 10], step: [ 97/ 390], loss: [0.3413], avg loss: [0.3308], time: [106.3817ms]\n", - "Epoch: [ 6/ 10], step: [ 98/ 390], loss: [0.2772], avg loss: [0.3302], time: [104.6352ms]\n", - "Epoch: [ 6/ 10], step: [ 99/ 390], loss: [0.3638], avg loss: [0.3306], time: [103.6198ms]\n", - "Epoch: [ 6/ 10], step: [ 100/ 390], loss: [0.4868], avg loss: [0.3321], time: [104.7192ms]\n", - "Epoch: [ 6/ 10], step: [ 101/ 390], loss: [0.2709], avg loss: [0.3315], time: [104.6910ms]\n", - "Epoch: [ 6/ 10], step: [ 102/ 390], loss: [0.3050], avg loss: [0.3313], time: [102.3922ms]\n", - "Epoch: [ 6/ 10], step: [ 103/ 390], loss: [0.3113], avg loss: [0.3311], time: [103.4689ms]\n", - "Epoch: [ 6/ 10], step: [ 104/ 390], loss: [0.3130], avg loss: [0.3309], time: [101.4366ms]\n", - "Epoch: [ 6/ 10], step: [ 105/ 390], loss: [0.2987], avg loss: [0.3306], time: [109.1599ms]\n", - "Epoch: [ 6/ 10], step: [ 106/ 390], loss: [0.2144], avg loss: [0.3295], time: [105.4850ms]\n", - "Epoch: [ 6/ 10], step: [ 107/ 390], loss: [0.4136], avg loss: [0.3303], time: [105.8519ms]\n", - "Epoch: [ 6/ 10], step: [ 108/ 390], loss: [0.2410], avg loss: [0.3295], time: [104.0261ms]\n", - "Epoch: [ 6/ 10], step: [ 109/ 390], loss: [0.3518], avg loss: [0.3297], time: [104.0432ms]\n", - "Epoch: [ 6/ 10], step: [ 110/ 390], loss: [0.3474], avg loss: [0.3298], time: [105.0935ms]\n", - "Epoch: [ 6/ 10], step: [ 111/ 390], loss: [0.2430], avg loss: [0.3290], time: [105.2408ms]\n", - "Epoch: [ 6/ 10], step: [ 112/ 390], loss: [0.3468], avg loss: [0.3292], time: [100.6699ms]\n", - "Epoch: [ 6/ 10], step: [ 113/ 390], loss: [0.3406], avg loss: [0.3293], time: [105.6204ms]\n", - "Epoch: [ 6/ 10], step: [ 114/ 390], loss: [0.3484], avg loss: [0.3295], time: [100.8778ms]\n", - "Epoch: [ 6/ 10], step: [ 115/ 390], loss: [0.3458], avg loss: [0.3296], time: [103.7869ms]\n", - "Epoch: [ 6/ 10], step: [ 116/ 390], loss: [0.4029], avg loss: [0.3302], time: [102.7882ms]\n", - "Epoch: [ 6/ 10], step: [ 117/ 390], loss: [0.3123], avg loss: [0.3301], time: [105.2265ms]\n", - "Epoch: [ 6/ 10], step: [ 118/ 390], loss: [0.2976], avg loss: [0.3298], time: [101.3033ms]\n", - "Epoch: [ 6/ 10], step: [ 119/ 390], loss: [0.2587], avg loss: [0.3292], time: [106.1120ms]\n", - "Epoch: [ 6/ 10], step: [ 120/ 390], loss: [0.2946], avg loss: [0.3289], time: [103.9181ms]\n", - "Epoch: [ 6/ 10], step: [ 121/ 390], loss: [0.5230], avg loss: [0.3305], time: [104.0273ms]\n", - "Epoch: [ 6/ 10], step: [ 122/ 390], loss: [0.2541], avg loss: [0.3299], time: [107.4820ms]\n", - "Epoch: [ 6/ 10], step: [ 123/ 390], loss: [0.4289], avg loss: [0.3307], time: [102.1929ms]\n", - "Epoch: [ 6/ 10], step: [ 124/ 390], loss: [0.3652], avg loss: [0.3310], time: [102.3753ms]\n", - "Epoch: [ 6/ 10], step: [ 125/ 390], loss: [0.2435], avg loss: [0.3303], time: [103.3547ms]\n", - "Epoch: [ 6/ 10], step: [ 126/ 390], loss: [0.3469], avg loss: [0.3304], time: [103.7183ms]\n", - "Epoch: [ 6/ 10], step: [ 127/ 390], loss: [0.3319], avg loss: [0.3304], time: [103.5509ms]\n", - "Epoch: [ 6/ 10], step: [ 128/ 390], loss: [0.3387], avg loss: [0.3305], time: [103.8632ms]\n", - "Epoch: [ 6/ 10], step: [ 129/ 390], loss: [0.2644], avg loss: [0.3300], time: [102.5698ms]\n", - "Epoch: [ 6/ 10], step: [ 130/ 390], loss: [0.2812], avg loss: [0.3296], time: [104.6865ms]\n", - "Epoch: [ 6/ 10], step: [ 131/ 390], loss: [0.2899], avg loss: [0.3293], time: [106.7574ms]\n", - "Epoch: [ 6/ 10], step: [ 132/ 390], loss: [0.2739], avg loss: [0.3289], time: [102.3071ms]\n", - "Epoch: [ 6/ 10], step: [ 133/ 390], loss: [0.1730], avg loss: [0.3277], time: [105.8102ms]\n", - "Epoch: [ 6/ 10], step: [ 134/ 390], loss: [0.3183], avg loss: [0.3276], time: [100.9240ms]\n", - "Epoch: [ 6/ 10], step: [ 135/ 390], loss: [0.3891], avg loss: [0.3281], time: [102.3219ms]\n", - "Epoch: [ 6/ 10], step: [ 136/ 390], loss: [0.3395], avg loss: [0.3282], time: [103.2267ms]\n", - "Epoch: [ 6/ 10], step: [ 137/ 390], loss: [0.2796], avg loss: [0.3278], time: [105.4187ms]\n", - "Epoch: [ 6/ 10], step: [ 138/ 390], loss: [0.4936], avg loss: [0.3290], time: [104.6247ms]\n", - "Epoch: [ 6/ 10], step: [ 139/ 390], loss: [0.4189], avg loss: [0.3297], time: [106.3464ms]\n", - "Epoch: [ 6/ 10], step: [ 140/ 390], loss: [0.3429], avg loss: [0.3298], time: [104.7597ms]\n", - "Epoch: [ 6/ 10], step: [ 141/ 390], loss: [0.2839], avg loss: [0.3294], time: [103.1654ms]\n", - "Epoch: [ 6/ 10], step: [ 142/ 390], loss: [0.3150], avg loss: [0.3293], time: [99.8304ms]\n", - "Epoch: [ 6/ 10], step: [ 143/ 390], loss: [0.3406], avg loss: [0.3294], time: [107.5263ms]\n", - "Epoch: [ 6/ 10], step: [ 144/ 390], loss: [0.3555], avg loss: [0.3296], time: [105.5160ms]\n", - "Epoch: [ 6/ 10], step: [ 145/ 390], loss: [0.2782], avg loss: [0.3293], time: [101.9528ms]\n", - "Epoch: [ 6/ 10], step: [ 146/ 390], loss: [0.2559], avg loss: [0.3288], time: [105.1157ms]\n", - "Epoch: [ 6/ 10], step: [ 147/ 390], loss: [0.3379], avg loss: [0.3288], time: [106.3020ms]\n", - "Epoch: [ 6/ 10], step: [ 148/ 390], loss: [0.3768], avg loss: [0.3291], time: [104.6147ms]\n", - "Epoch: [ 6/ 10], step: [ 149/ 390], loss: [0.3913], avg loss: [0.3296], time: [106.1707ms]\n", - "Epoch: [ 6/ 10], step: [ 150/ 390], loss: [0.2264], avg loss: [0.3289], time: [105.3846ms]\n", - "Epoch: [ 6/ 10], step: [ 151/ 390], loss: [0.2102], avg loss: [0.3281], time: [102.5255ms]\n", - "Epoch: [ 6/ 10], step: [ 152/ 390], loss: [0.3544], avg loss: [0.3283], time: [102.5755ms]\n", - "Epoch: [ 6/ 10], step: [ 153/ 390], loss: [0.2458], avg loss: [0.3277], time: [103.0257ms]\n", - "Epoch: [ 6/ 10], step: [ 154/ 390], loss: [0.2079], avg loss: [0.3269], time: [104.9073ms]\n", - "Epoch: [ 6/ 10], step: [ 155/ 390], loss: [0.5016], avg loss: [0.3281], time: [107.5962ms]\n", - "Epoch: [ 6/ 10], step: [ 156/ 390], loss: [0.3904], avg loss: [0.3285], time: [102.3004ms]\n", - "Epoch: [ 6/ 10], step: [ 157/ 390], loss: [0.2560], avg loss: [0.3280], time: [103.6665ms]\n", - "Epoch: [ 6/ 10], step: [ 158/ 390], loss: [0.3972], avg loss: [0.3284], time: [99.9253ms]\n", - "Epoch: [ 6/ 10], step: [ 159/ 390], loss: [0.3128], avg loss: [0.3283], time: [104.0418ms]\n", - "Epoch: [ 6/ 10], step: [ 160/ 390], loss: [0.3540], avg loss: [0.3285], time: [105.4263ms]\n", - "Epoch: [ 6/ 10], step: [ 161/ 390], loss: [0.3925], avg loss: [0.3289], time: [108.7251ms]\n", - "Epoch: [ 6/ 10], step: [ 162/ 390], loss: [0.3021], avg loss: [0.3287], time: [102.9158ms]\n", - "Epoch: [ 6/ 10], step: [ 163/ 390], loss: [0.3047], avg loss: [0.3286], time: [105.1214ms]\n", - "Epoch: [ 6/ 10], step: [ 164/ 390], loss: [0.2893], avg loss: [0.3283], time: [103.3919ms]\n", - "Epoch: [ 6/ 10], step: [ 165/ 390], loss: [0.2883], avg loss: [0.3281], time: [104.9266ms]\n", - "Epoch: [ 6/ 10], step: [ 166/ 390], loss: [0.3685], avg loss: [0.3283], time: [102.9325ms]\n", - "Epoch: [ 6/ 10], step: [ 167/ 390], loss: [0.4150], avg loss: [0.3289], time: [105.2999ms]\n", - "Epoch: [ 6/ 10], step: [ 168/ 390], loss: [0.3211], avg loss: [0.3288], time: [101.4946ms]\n", - "Epoch: [ 6/ 10], step: [ 169/ 390], loss: [0.2711], avg loss: [0.3285], time: [105.3841ms]\n", - "Epoch: [ 6/ 10], step: [ 170/ 390], loss: [0.3252], avg loss: [0.3285], time: [103.5354ms]\n", - "Epoch: [ 6/ 10], step: [ 171/ 390], loss: [0.3076], avg loss: [0.3283], time: [103.5259ms]\n", - "Epoch: [ 6/ 10], step: [ 172/ 390], loss: [0.3561], avg loss: [0.3285], time: [102.9892ms]\n", - "Epoch: [ 6/ 10], step: [ 173/ 390], loss: [0.2063], avg loss: [0.3278], time: [106.4513ms]\n", - "Epoch: [ 6/ 10], step: [ 174/ 390], loss: [0.3680], avg loss: [0.3280], time: [104.9628ms]\n", - "Epoch: [ 6/ 10], step: [ 175/ 390], loss: [0.3585], avg loss: [0.3282], time: [106.5559ms]\n", - "Epoch: [ 6/ 10], step: [ 176/ 390], loss: [0.2052], avg loss: [0.3275], time: [105.7465ms]\n" + "epoch: 6 step: 88, loss is 0.3815\n", + "epoch: 6 step: 89, loss is 0.3205\n", + "epoch: 6 step: 90, loss is 0.1674\n", + "epoch: 6 step: 91, loss is 0.3302\n", + "epoch: 6 step: 92, loss is 0.3680\n", + "epoch: 6 step: 93, loss is 0.3370\n", + "epoch: 6 step: 94, loss is 0.3272\n", + "epoch: 6 step: 95, loss is 0.3728\n", + "epoch: 6 step: 96, loss is 0.2415\n", + "epoch: 6 step: 97, loss is 0.3413\n", + "epoch: 6 step: 98, loss is 0.2772\n", + "epoch: 6 step: 99, loss is 0.3638\n", + "epoch: 6 step: 100, loss is 0.4868\n", + "epoch: 6 step: 101, loss is 0.2709\n", + "epoch: 6 step: 102, loss is 0.3050\n", + "epoch: 6 step: 103, loss is 0.3113\n", + "epoch: 6 step: 104, loss is 0.3130\n", + "epoch: 6 step: 105, loss is 0.2987\n", + "epoch: 6 step: 106, loss is 0.2144\n", + "epoch: 6 step: 107, loss is 0.4136\n", + "epoch: 6 step: 108, loss is 0.2410\n", + "epoch: 6 step: 109, loss is 0.3518\n", + "epoch: 6 step: 110, loss is 0.3474\n", + "epoch: 6 step: 111, loss is 0.2430\n", + "epoch: 6 step: 112, loss is 0.3468\n", + "epoch: 6 step: 113, loss is 0.3406\n", + "epoch: 6 step: 114, loss is 0.3484\n", + "epoch: 6 step: 115, loss is 0.3458\n", + "epoch: 6 step: 116, loss is 0.4029\n", + "epoch: 6 step: 117, loss is 0.3123\n", + "epoch: 6 step: 118, loss is 0.2976\n", + "epoch: 6 step: 119, loss is 0.2587\n", + "epoch: 6 step: 120, loss is 0.2946\n", + "epoch: 6 step: 121, loss is 0.5230\n", + "epoch: 6 step: 122, loss is 0.2541\n", + "epoch: 6 step: 123, loss is 0.4289\n", + "epoch: 6 step: 124, loss is 0.3652\n", + "epoch: 6 step: 125, loss is 0.2435\n", + "epoch: 6 step: 126, loss is 0.3469\n", + "epoch: 6 step: 127, loss is 0.3319\n", + "epoch: 6 step: 128, loss is 0.3387\n", + "epoch: 6 step: 129, loss is 0.2644\n", + "epoch: 6 step: 130, loss is 0.2812\n", + "epoch: 6 step: 131, loss is 0.2899\n", + "epoch: 6 step: 132, loss is 0.2739\n", + "epoch: 6 step: 133, loss is 0.1730\n", + "epoch: 6 step: 134, loss is 0.3183\n", + "epoch: 6 step: 135, loss is 0.3891\n", + "epoch: 6 step: 136, loss is 0.3395\n", + "epoch: 6 step: 137, loss is 0.2796\n", + "epoch: 6 step: 138, loss is 0.4936\n", + "epoch: 6 step: 139, loss is 0.4189\n", + "epoch: 6 step: 140, loss is 0.3429\n", + "epoch: 6 step: 141, loss is 0.2839\n", + "epoch: 6 step: 142, loss is 0.3150\n", + "epoch: 6 step: 143, loss is 0.3406\n", + "epoch: 6 step: 144, loss is 0.3555\n", + "epoch: 6 step: 145, loss is 0.2782\n", + "epoch: 6 step: 146, loss is 0.2559\n", + "epoch: 6 step: 147, loss is 0.3379\n", + "epoch: 6 step: 148, loss is 0.3768\n", + "epoch: 6 step: 149, loss is 0.3913\n", + "epoch: 6 step: 150, loss is 0.2264\n", + "epoch: 6 step: 151, loss is 0.2102\n", + "epoch: 6 step: 152, loss is 0.3544\n", + "epoch: 6 step: 153, loss is 0.2458\n", + "epoch: 6 step: 154, loss is 0.2079\n", + "epoch: 6 step: 155, loss is 0.5016\n", + "epoch: 6 step: 156, loss is 0.3904\n", + "epoch: 6 step: 157, loss is 0.2560\n", + "epoch: 6 step: 158, loss is 0.3972\n", + "epoch: 6 step: 159, loss is 0.3128\n", + "epoch: 6 step: 160, loss is 0.3540\n", + "epoch: 6 step: 161, loss is 0.3925\n", + "epoch: 6 step: 162, loss is 0.3021\n", + "epoch: 6 step: 163, loss is 0.3047\n", + "epoch: 6 step: 164, loss is 0.2893\n", + "epoch: 6 step: 165, loss is 0.2883\n", + "epoch: 6 step: 166, loss is 0.3685\n", + "epoch: 6 step: 167, loss is 0.4150\n", + "epoch: 6 step: 168, loss is 0.3211\n", + "epoch: 6 step: 169, loss is 0.2711\n", + "epoch: 6 step: 170, loss is 0.3252\n", + "epoch: 6 step: 171, loss is 0.3076\n", + "epoch: 6 step: 172, loss is 0.3561\n", + "epoch: 6 step: 173, loss is 0.2063\n", + "epoch: 6 step: 174, loss is 0.3680\n", + "epoch: 6 step: 175, loss is 0.3585\n", + "epoch: 6 step: 176, loss is 0.2052\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 6/ 10], step: [ 177/ 390], loss: [0.3473], avg loss: [0.3276], time: [103.0400ms]\n", - "Epoch: [ 6/ 10], step: [ 178/ 390], loss: [0.4617], avg loss: [0.3284], time: [102.4530ms]\n", - "Epoch: [ 6/ 10], step: [ 179/ 390], loss: [0.2574], avg loss: [0.3280], time: [104.6448ms]\n", - "Epoch: [ 6/ 10], step: [ 180/ 390], loss: [0.2926], avg loss: [0.3278], time: [102.3531ms]\n", - "Epoch: [ 6/ 10], step: [ 181/ 390], loss: [0.2689], avg loss: [0.3274], time: [105.6643ms]\n", - "Epoch: [ 6/ 10], step: [ 182/ 390], loss: [0.2425], avg loss: [0.3270], time: [105.7646ms]\n", - "Epoch: [ 6/ 10], step: [ 183/ 390], loss: [0.4197], avg loss: [0.3275], time: [104.4226ms]\n", - "Epoch: [ 6/ 10], step: [ 184/ 390], loss: [0.3622], avg loss: [0.3277], time: [102.5190ms]\n", - "Epoch: [ 6/ 10], step: [ 185/ 390], loss: [0.3172], avg loss: [0.3276], time: [107.5490ms]\n", - "Epoch: [ 6/ 10], step: [ 186/ 390], loss: [0.2831], avg loss: [0.3274], time: [100.2440ms]\n", - "Epoch: [ 6/ 10], step: [ 187/ 390], loss: [0.4395], avg loss: [0.3280], time: [107.0464ms]\n", - "Epoch: [ 6/ 10], step: [ 188/ 390], loss: [0.3841], avg loss: [0.3283], time: [101.7933ms]\n", - "Epoch: [ 6/ 10], step: [ 189/ 390], loss: [0.4334], avg loss: [0.3288], time: [104.6174ms]\n", - "Epoch: [ 6/ 10], step: [ 190/ 390], loss: [0.5027], avg loss: [0.3297], time: [100.9848ms]\n", - "Epoch: [ 6/ 10], step: [ 191/ 390], loss: [0.5141], avg loss: [0.3307], time: [103.6432ms]\n", - "Epoch: [ 6/ 10], step: [ 192/ 390], loss: [0.3588], avg loss: [0.3309], time: [101.1293ms]\n", - "Epoch: [ 6/ 10], step: [ 193/ 390], loss: [0.3650], avg loss: [0.3310], time: [104.4297ms]\n", - "Epoch: [ 6/ 10], step: [ 194/ 390], loss: [0.3152], avg loss: [0.3310], time: [105.3679ms]\n", - "Epoch: [ 6/ 10], step: [ 195/ 390], loss: [0.3063], avg loss: [0.3308], time: [108.4087ms]\n", - "Epoch: [ 6/ 10], step: [ 196/ 390], loss: [0.3097], avg loss: [0.3307], time: [100.6358ms]\n", - "Epoch: [ 6/ 10], step: [ 197/ 390], loss: [0.3507], avg loss: [0.3308], time: [103.8554ms]\n", - "Epoch: [ 6/ 10], step: [ 198/ 390], loss: [0.2534], avg loss: [0.3304], time: [104.0516ms]\n", - "Epoch: [ 6/ 10], step: [ 199/ 390], loss: [0.4216], avg loss: [0.3309], time: [104.8689ms]\n", - "Epoch: [ 6/ 10], step: [ 200/ 390], loss: [0.4192], avg loss: [0.3313], time: [100.3902ms]\n", - "Epoch: [ 6/ 10], step: [ 201/ 390], loss: [0.3980], avg loss: [0.3317], time: [106.2922ms]\n", - "Epoch: [ 6/ 10], step: [ 202/ 390], loss: [0.3389], avg loss: [0.3317], time: [101.0244ms]\n", - "Epoch: [ 6/ 10], step: [ 203/ 390], loss: [0.3186], avg loss: [0.3316], time: [103.4589ms]\n", - "Epoch: [ 6/ 10], step: [ 204/ 390], loss: [0.5272], avg loss: [0.3326], time: [101.0959ms]\n", - "Epoch: [ 6/ 10], step: [ 205/ 390], loss: [0.4031], avg loss: [0.3329], time: [105.9949ms]\n", - "Epoch: [ 6/ 10], step: [ 206/ 390], loss: [0.3488], avg loss: [0.3330], time: [105.3770ms]\n", - "Epoch: [ 6/ 10], step: [ 207/ 390], loss: [0.3204], avg loss: [0.3330], time: [103.0207ms]\n", - "Epoch: [ 6/ 10], step: [ 208/ 390], loss: [0.3215], avg loss: [0.3329], time: [105.6094ms]\n", - "Epoch: [ 6/ 10], step: [ 209/ 390], loss: [0.3097], avg loss: [0.3328], time: [105.9272ms]\n", - "Epoch: [ 6/ 10], step: [ 210/ 390], loss: [0.2991], avg loss: [0.3326], time: [104.9280ms]\n", - "Epoch: [ 6/ 10], step: [ 211/ 390], loss: [0.2512], avg loss: [0.3322], time: [103.2462ms]\n", - "Epoch: [ 6/ 10], step: [ 212/ 390], loss: [0.2952], avg loss: [0.3321], time: [106.2520ms]\n", - "Epoch: [ 6/ 10], step: [ 213/ 390], loss: [0.3371], avg loss: [0.3321], time: [105.0916ms]\n", - "Epoch: [ 6/ 10], step: [ 214/ 390], loss: [0.3340], avg loss: [0.3321], time: [103.2071ms]\n", - "Epoch: [ 6/ 10], step: [ 215/ 390], loss: [0.2598], avg loss: [0.3318], time: [104.0084ms]\n", - "Epoch: [ 6/ 10], step: [ 216/ 390], loss: [0.3255], avg loss: [0.3317], time: [105.0892ms]\n", - "Epoch: [ 6/ 10], step: [ 217/ 390], loss: [0.3541], avg loss: [0.3318], time: [102.8223ms]\n", - "Epoch: [ 6/ 10], step: [ 218/ 390], loss: [0.3187], avg loss: [0.3318], time: [101.0220ms]\n", - "Epoch: [ 6/ 10], step: [ 219/ 390], loss: [0.2939], avg loss: [0.3316], time: [109.3974ms]\n", - "Epoch: [ 6/ 10], step: [ 220/ 390], loss: [0.2786], avg loss: [0.3314], time: [106.2407ms]\n", - "Epoch: [ 6/ 10], step: [ 221/ 390], loss: [0.2779], avg loss: [0.3311], time: [105.8226ms]\n", - "Epoch: [ 6/ 10], step: [ 222/ 390], loss: [0.4111], avg loss: [0.3315], time: [107.4440ms]\n", - "Epoch: [ 6/ 10], step: [ 223/ 390], loss: [0.3184], avg loss: [0.3314], time: [105.4258ms]\n", - "Epoch: [ 6/ 10], step: [ 224/ 390], loss: [0.1722], avg loss: [0.3307], time: [101.8672ms]\n", - "Epoch: [ 6/ 10], step: [ 225/ 390], loss: [0.2848], avg loss: [0.3305], time: [104.1944ms]\n", - "Epoch: [ 6/ 10], step: [ 226/ 390], loss: [0.3035], avg loss: [0.3304], time: [108.1917ms]\n", - "Epoch: [ 6/ 10], step: [ 227/ 390], loss: [0.4568], avg loss: [0.3309], time: [103.3666ms]\n", - "Epoch: [ 6/ 10], step: [ 228/ 390], loss: [0.2989], avg loss: [0.3308], time: [102.0334ms]\n", - "Epoch: [ 6/ 10], step: [ 229/ 390], loss: [0.2840], avg loss: [0.3306], time: [106.0257ms]\n", - "Epoch: [ 6/ 10], step: [ 230/ 390], loss: [0.3429], avg loss: [0.3307], time: [100.8019ms]\n", - "Epoch: [ 6/ 10], step: [ 231/ 390], loss: [0.3582], avg loss: [0.3308], time: [103.8568ms]\n", - "Epoch: [ 6/ 10], step: [ 232/ 390], loss: [0.2675], avg loss: [0.3305], time: [101.8233ms]\n", - "Epoch: [ 6/ 10], step: [ 233/ 390], loss: [0.2883], avg loss: [0.3303], time: [106.8397ms]\n", - "Epoch: [ 6/ 10], step: [ 234/ 390], loss: [0.3633], avg loss: [0.3305], time: [101.1724ms]\n", - "Epoch: [ 6/ 10], step: [ 235/ 390], loss: [0.3305], avg loss: [0.3305], time: [103.3738ms]\n", - "Epoch: [ 6/ 10], step: [ 236/ 390], loss: [0.2916], avg loss: [0.3303], time: [106.2672ms]\n", - "Epoch: [ 6/ 10], step: [ 237/ 390], loss: [0.3045], avg loss: [0.3302], time: [102.8006ms]\n", - "Epoch: [ 6/ 10], step: [ 238/ 390], loss: [0.2606], avg loss: [0.3299], time: [103.4052ms]\n", - "Epoch: [ 6/ 10], step: [ 239/ 390], loss: [0.2456], avg loss: [0.3295], time: [102.8235ms]\n", - "Epoch: [ 6/ 10], step: [ 240/ 390], loss: [0.2210], avg loss: [0.3291], time: [102.1998ms]\n", - "Epoch: [ 6/ 10], step: [ 241/ 390], loss: [0.3274], avg loss: [0.3291], time: [102.6089ms]\n", - "Epoch: [ 6/ 10], step: [ 242/ 390], loss: [0.4134], avg loss: [0.3294], time: [105.5417ms]\n", - "Epoch: [ 6/ 10], step: [ 243/ 390], loss: [0.4599], avg loss: [0.3300], time: [107.7466ms]\n", - "Epoch: [ 6/ 10], step: [ 244/ 390], loss: [0.5947], avg loss: [0.3311], time: [101.1190ms]\n", - "Epoch: [ 6/ 10], step: [ 245/ 390], loss: [0.2561], avg loss: [0.3307], time: [103.1210ms]\n", - "Epoch: [ 6/ 10], step: [ 246/ 390], loss: [0.2175], avg loss: [0.3303], time: [101.2173ms]\n", - "Epoch: [ 6/ 10], step: [ 247/ 390], loss: [0.3314], avg loss: [0.3303], time: [103.4813ms]\n", - "Epoch: [ 6/ 10], step: [ 248/ 390], loss: [0.2679], avg loss: [0.3300], time: [103.9636ms]\n", - "Epoch: [ 6/ 10], step: [ 249/ 390], loss: [0.3549], avg loss: [0.3301], time: [104.9063ms]\n", - "Epoch: [ 6/ 10], step: [ 250/ 390], loss: [0.2441], avg loss: [0.3298], time: [105.2394ms]\n", - "Epoch: [ 6/ 10], step: [ 251/ 390], loss: [0.2675], avg loss: [0.3295], time: [104.2738ms]\n", - "Epoch: [ 6/ 10], step: [ 252/ 390], loss: [0.3183], avg loss: [0.3295], time: [102.8545ms]\n", - "Epoch: [ 6/ 10], step: [ 253/ 390], loss: [0.3769], avg loss: [0.3297], time: [104.5601ms]\n", - "Epoch: [ 6/ 10], step: [ 254/ 390], loss: [0.2539], avg loss: [0.3294], time: [103.2653ms]\n", - "Epoch: [ 6/ 10], step: [ 255/ 390], loss: [0.4019], avg loss: [0.3297], time: [105.1457ms]\n", - "Epoch: [ 6/ 10], step: [ 256/ 390], loss: [0.3086], avg loss: [0.3296], time: [102.6106ms]\n", - "Epoch: [ 6/ 10], step: [ 257/ 390], loss: [0.4399], avg loss: [0.3300], time: [106.8661ms]\n", - "Epoch: [ 6/ 10], step: [ 258/ 390], loss: [0.2868], avg loss: [0.3299], time: [102.7610ms]\n", - "Epoch: [ 6/ 10], step: [ 259/ 390], loss: [0.3434], avg loss: [0.3299], time: [102.0977ms]\n", - "Epoch: [ 6/ 10], step: [ 260/ 390], loss: [0.2957], avg loss: [0.3298], time: [103.9495ms]\n", - "Epoch: [ 6/ 10], step: [ 261/ 390], loss: [0.2614], avg loss: [0.3295], time: [108.6328ms]\n", - "Epoch: [ 6/ 10], step: [ 262/ 390], loss: [0.2950], avg loss: [0.3294], time: [102.8056ms]\n", - "Epoch: [ 6/ 10], step: [ 263/ 390], loss: [0.2932], avg loss: [0.3292], time: [101.2115ms]\n", - "Epoch: [ 6/ 10], step: [ 264/ 390], loss: [0.3685], avg loss: [0.3294], time: [104.7268ms]\n", - "Epoch: [ 6/ 10], step: [ 265/ 390], loss: [0.2662], avg loss: [0.3292], time: [104.7187ms]\n" + "epoch: 6 step: 177, loss is 0.3473\n", + "epoch: 6 step: 178, loss is 0.4617\n", + "epoch: 6 step: 179, loss is 0.2574\n", + "epoch: 6 step: 180, loss is 0.2926\n", + "epoch: 6 step: 181, loss is 0.2689\n", + "epoch: 6 step: 182, loss is 0.2425\n", + "epoch: 6 step: 183, loss is 0.4197\n", + "epoch: 6 step: 184, loss is 0.3622\n", + "epoch: 6 step: 185, loss is 0.3172\n", + "epoch: 6 step: 186, loss is 0.2831\n", + "epoch: 6 step: 187, loss is 0.4395\n", + "epoch: 6 step: 188, loss is 0.3841\n", + "epoch: 6 step: 189, loss is 0.4334\n", + "epoch: 6 step: 190, loss is 0.5027\n", + "epoch: 6 step: 191, loss is 0.5141\n", + "epoch: 6 step: 192, loss is 0.3588\n", + "epoch: 6 step: 193, loss is 0.3650\n", + "epoch: 6 step: 194, loss is 0.3152\n", + "epoch: 6 step: 195, loss is 0.3063\n", + "epoch: 6 step: 196, loss is 0.3097\n", + "epoch: 6 step: 197, loss is 0.3507\n", + "epoch: 6 step: 198, loss is 0.2534\n", + "epoch: 6 step: 199, loss is 0.4216\n", + "epoch: 6 step: 200, loss is 0.4192\n", + "epoch: 6 step: 201, loss is 0.3980\n", + "epoch: 6 step: 202, loss is 0.3389\n", + "epoch: 6 step: 203, loss is 0.3186\n", + "epoch: 6 step: 204, loss is 0.5272\n", + "epoch: 6 step: 205, loss is 0.4031\n", + "epoch: 6 step: 206, loss is 0.3488\n", + "epoch: 6 step: 207, loss is 0.3204\n", + "epoch: 6 step: 208, loss is 0.3215\n", + "epoch: 6 step: 209, loss is 0.3097\n", + "epoch: 6 step: 210, loss is 0.2991\n", + "epoch: 6 step: 211, loss is 0.2512\n", + "epoch: 6 step: 212, loss is 0.2952\n", + "epoch: 6 step: 213, loss is 0.3371\n", + "epoch: 6 step: 214, loss is 0.3340\n", + "epoch: 6 step: 215, loss is 0.2598\n", + "epoch: 6 step: 216, loss is 0.3255\n", + "epoch: 6 step: 217, loss is 0.3541\n", + "epoch: 6 step: 218, loss is 0.3187\n", + "epoch: 6 step: 219, loss is 0.2939\n", + "epoch: 6 step: 220, loss is 0.2786\n", + "epoch: 6 step: 221, loss is 0.2779\n", + "epoch: 6 step: 222, loss is 0.4111\n", + "epoch: 6 step: 223, loss is 0.3184\n", + "epoch: 6 step: 224, loss is 0.1722\n", + "epoch: 6 step: 225, loss is 0.2848\n", + "epoch: 6 step: 226, loss is 0.3035\n", + "epoch: 6 step: 227, loss is 0.4568\n", + "epoch: 6 step: 228, loss is 0.2989\n", + "epoch: 6 step: 229, loss is 0.2840\n", + "epoch: 6 step: 230, loss is 0.3429\n", + "epoch: 6 step: 231, loss is 0.3582\n", + "epoch: 6 step: 232, loss is 0.2675\n", + "epoch: 6 step: 233, loss is 0.2883\n", + "epoch: 6 step: 234, loss is 0.3633\n", + "epoch: 6 step: 235, loss is 0.3305\n", + "epoch: 6 step: 236, loss is 0.2916\n", + "epoch: 6 step: 237, loss is 0.3045\n", + "epoch: 6 step: 238, loss is 0.2606\n", + "epoch: 6 step: 239, loss is 0.2456\n", + "epoch: 6 step: 240, loss is 0.2210\n", + "epoch: 6 step: 241, loss is 0.3274\n", + "epoch: 6 step: 242, loss is 0.4134\n", + "epoch: 6 step: 243, loss is 0.4599\n", + "epoch: 6 step: 244, loss is 0.5947\n", + "epoch: 6 step: 245, loss is 0.2561\n", + "epoch: 6 step: 246, loss is 0.2175\n", + "epoch: 6 step: 247, loss is 0.3314\n", + "epoch: 6 step: 248, loss is 0.2679\n", + "epoch: 6 step: 249, loss is 0.3549\n", + "epoch: 6 step: 250, loss is 0.2441\n", + "epoch: 6 step: 251, loss is 0.2675\n", + "epoch: 6 step: 252, loss is 0.3183\n", + "epoch: 6 step: 253, loss is 0.3769\n", + "epoch: 6 step: 254, loss is 0.2539\n", + "epoch: 6 step: 255, loss is 0.4019\n", + "epoch: 6 step: 256, loss is 0.3086\n", + "epoch: 6 step: 257, loss is 0.4399\n", + "epoch: 6 step: 258, loss is 0.2868\n", + "epoch: 6 step: 259, loss is 0.3434\n", + "epoch: 6 step: 260, loss is 0.2957\n", + "epoch: 6 step: 261, loss is 0.2614\n", + "epoch: 6 step: 262, loss is 0.2950\n", + "epoch: 6 step: 263, loss is 0.2932\n", + "epoch: 6 step: 264, loss is 0.3685\n", + "epoch: 6 step: 265, loss is 0.2662\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 6/ 10], step: [ 266/ 390], loss: [0.1851], avg loss: [0.3286], time: [106.4603ms]\n", - "Epoch: [ 6/ 10], step: [ 267/ 390], loss: [0.3902], avg loss: [0.3288], time: [105.4015ms]\n", - "Epoch: [ 6/ 10], step: [ 268/ 390], loss: [0.1962], avg loss: [0.3284], time: [102.4544ms]\n", - "Epoch: [ 6/ 10], step: [ 269/ 390], loss: [0.2614], avg loss: [0.3281], time: [105.6340ms]\n", - "Epoch: [ 6/ 10], step: [ 270/ 390], loss: [0.2919], avg loss: [0.3280], time: [103.2822ms]\n", - "Epoch: [ 6/ 10], step: [ 271/ 390], loss: [0.4295], avg loss: [0.3283], time: [104.4779ms]\n", - "Epoch: [ 6/ 10], step: [ 272/ 390], loss: [0.3681], avg loss: [0.3285], time: [107.9822ms]\n", - "Epoch: [ 6/ 10], step: [ 273/ 390], loss: [0.2417], avg loss: [0.3282], time: [106.6778ms]\n", - "Epoch: [ 6/ 10], step: [ 274/ 390], loss: [0.3749], avg loss: [0.3283], time: [107.0487ms]\n", - "Epoch: [ 6/ 10], step: [ 275/ 390], loss: [0.3401], avg loss: [0.3284], time: [103.5895ms]\n", - "Epoch: [ 6/ 10], step: [ 276/ 390], loss: [0.3363], avg loss: [0.3284], time: [103.7929ms]\n", - "Epoch: [ 6/ 10], step: [ 277/ 390], loss: [0.3809], avg loss: [0.3286], time: [106.1981ms]\n", - "Epoch: [ 6/ 10], step: [ 278/ 390], loss: [0.2851], avg loss: [0.3284], time: [104.9306ms]\n", - "Epoch: [ 6/ 10], step: [ 279/ 390], loss: [0.3831], avg loss: [0.3286], time: [102.7482ms]\n", - "Epoch: [ 6/ 10], step: [ 280/ 390], loss: [0.3269], avg loss: [0.3286], time: [107.4982ms]\n", - "Epoch: [ 6/ 10], step: [ 281/ 390], loss: [0.2682], avg loss: [0.3284], time: [107.1913ms]\n", - "Epoch: [ 6/ 10], step: [ 282/ 390], loss: [0.2464], avg loss: [0.3281], time: [100.6212ms]\n", - "Epoch: [ 6/ 10], step: [ 283/ 390], loss: [0.3946], avg loss: [0.3284], time: [105.3467ms]\n", - "Epoch: [ 6/ 10], step: [ 284/ 390], loss: [0.3671], avg loss: [0.3285], time: [105.5491ms]\n", - "Epoch: [ 6/ 10], step: [ 285/ 390], loss: [0.2973], avg loss: [0.3284], time: [107.2581ms]\n", - "Epoch: [ 6/ 10], step: [ 286/ 390], loss: [0.3856], avg loss: [0.3286], time: [105.2179ms]\n", - "Epoch: [ 6/ 10], step: [ 287/ 390], loss: [0.4005], avg loss: [0.3288], time: [106.7197ms]\n", - "Epoch: [ 6/ 10], step: [ 288/ 390], loss: [0.3100], avg loss: [0.3288], time: [103.0917ms]\n", - "Epoch: [ 6/ 10], step: [ 289/ 390], loss: [0.4213], avg loss: [0.3291], time: [102.3197ms]\n", - "Epoch: [ 6/ 10], step: [ 290/ 390], loss: [0.2163], avg loss: [0.3287], time: [102.0153ms]\n", - "Epoch: [ 6/ 10], step: [ 291/ 390], loss: [0.2245], avg loss: [0.3283], time: [103.6959ms]\n", - "Epoch: [ 6/ 10], step: [ 292/ 390], loss: [0.2426], avg loss: [0.3281], time: [104.0010ms]\n", - "Epoch: [ 6/ 10], step: [ 293/ 390], loss: [0.3086], avg loss: [0.3280], time: [104.6097ms]\n", - "Epoch: [ 6/ 10], step: [ 294/ 390], loss: [0.3300], avg loss: [0.3280], time: [106.2334ms]\n", - "Epoch: [ 6/ 10], step: [ 295/ 390], loss: [0.4324], avg loss: [0.3283], time: [106.7400ms]\n", - "Epoch: [ 6/ 10], step: [ 296/ 390], loss: [0.4079], avg loss: [0.3286], time: [103.9681ms]\n", - "Epoch: [ 6/ 10], step: [ 297/ 390], loss: [0.3564], avg loss: [0.3287], time: [104.4164ms]\n", - "Epoch: [ 6/ 10], step: [ 298/ 390], loss: [0.3987], avg loss: [0.3289], time: [104.2852ms]\n", - "Epoch: [ 6/ 10], step: [ 299/ 390], loss: [0.3378], avg loss: [0.3290], time: [106.4155ms]\n", - "Epoch: [ 6/ 10], step: [ 300/ 390], loss: [0.4463], avg loss: [0.3294], time: [106.0929ms]\n", - "Epoch: [ 6/ 10], step: [ 301/ 390], loss: [0.3557], avg loss: [0.3295], time: [105.8309ms]\n", - "Epoch: [ 6/ 10], step: [ 302/ 390], loss: [0.4535], avg loss: [0.3299], time: [104.7084ms]\n", - "Epoch: [ 6/ 10], step: [ 303/ 390], loss: [0.3136], avg loss: [0.3298], time: [102.8643ms]\n", - "Epoch: [ 6/ 10], step: [ 304/ 390], loss: [0.2858], avg loss: [0.3297], time: [102.1047ms]\n", - "Epoch: [ 6/ 10], step: [ 305/ 390], loss: [0.4527], avg loss: [0.3301], time: [107.2831ms]\n", - "Epoch: [ 6/ 10], step: [ 306/ 390], loss: [0.4973], avg loss: [0.3306], time: [104.9027ms]\n", - "Epoch: [ 6/ 10], step: [ 307/ 390], loss: [0.3944], avg loss: [0.3308], time: [103.0478ms]\n", - "Epoch: [ 6/ 10], step: [ 308/ 390], loss: [0.3267], avg loss: [0.3308], time: [101.0103ms]\n", - "Epoch: [ 6/ 10], step: [ 309/ 390], loss: [0.3917], avg loss: [0.3310], time: [107.0888ms]\n", - "Epoch: [ 6/ 10], step: [ 310/ 390], loss: [0.2803], avg loss: [0.3308], time: [105.2139ms]\n", - "Epoch: [ 6/ 10], step: [ 311/ 390], loss: [0.4024], avg loss: [0.3311], time: [105.0539ms]\n", - "Epoch: [ 6/ 10], step: [ 312/ 390], loss: [0.4093], avg loss: [0.3313], time: [101.4223ms]\n", - "Epoch: [ 6/ 10], step: [ 313/ 390], loss: [0.3855], avg loss: [0.3315], time: [107.0051ms]\n", - "Epoch: [ 6/ 10], step: [ 314/ 390], loss: [0.3074], avg loss: [0.3314], time: [101.5892ms]\n", - "Epoch: [ 6/ 10], step: [ 315/ 390], loss: [0.2501], avg loss: [0.3312], time: [104.3346ms]\n", - "Epoch: [ 6/ 10], step: [ 316/ 390], loss: [0.3559], avg loss: [0.3312], time: [105.0799ms]\n", - "Epoch: [ 6/ 10], step: [ 317/ 390], loss: [0.3158], avg loss: [0.3312], time: [105.5751ms]\n", - "Epoch: [ 6/ 10], step: [ 318/ 390], loss: [0.2860], avg loss: [0.3311], time: [101.3811ms]\n", - "Epoch: [ 6/ 10], step: [ 319/ 390], loss: [0.1979], avg loss: [0.3306], time: [107.5933ms]\n", - "Epoch: [ 6/ 10], step: [ 320/ 390], loss: [0.2508], avg loss: [0.3304], time: [104.7511ms]\n", - "Epoch: [ 6/ 10], step: [ 321/ 390], loss: [0.3351], avg loss: [0.3304], time: [103.9152ms]\n", - "Epoch: [ 6/ 10], step: [ 322/ 390], loss: [0.3078], avg loss: [0.3303], time: [101.2144ms]\n", - "Epoch: [ 6/ 10], step: [ 323/ 390], loss: [0.2160], avg loss: [0.3300], time: [108.0611ms]\n", - "Epoch: [ 6/ 10], step: [ 324/ 390], loss: [0.2717], avg loss: [0.3298], time: [100.1897ms]\n", - "Epoch: [ 6/ 10], step: [ 325/ 390], loss: [0.2465], avg loss: [0.3295], time: [106.1027ms]\n", - "Epoch: [ 6/ 10], step: [ 326/ 390], loss: [0.4169], avg loss: [0.3298], time: [106.1909ms]\n", - "Epoch: [ 6/ 10], step: [ 327/ 390], loss: [0.2714], avg loss: [0.3296], time: [108.6955ms]\n", - "Epoch: [ 6/ 10], step: [ 328/ 390], loss: [0.2966], avg loss: [0.3295], time: [101.0115ms]\n", - "Epoch: [ 6/ 10], step: [ 329/ 390], loss: [0.2984], avg loss: [0.3294], time: [102.9346ms]\n", - "Epoch: [ 6/ 10], step: [ 330/ 390], loss: [0.2708], avg loss: [0.3293], time: [105.4211ms]\n", - "Epoch: [ 6/ 10], step: [ 331/ 390], loss: [0.3978], avg loss: [0.3295], time: [106.9992ms]\n", - "Epoch: [ 6/ 10], step: [ 332/ 390], loss: [0.3094], avg loss: [0.3294], time: [104.5735ms]\n", - "Epoch: [ 6/ 10], step: [ 333/ 390], loss: [0.3462], avg loss: [0.3295], time: [105.4125ms]\n", - "Epoch: [ 6/ 10], step: [ 334/ 390], loss: [0.2669], avg loss: [0.3293], time: [101.0020ms]\n", - "Epoch: [ 6/ 10], step: [ 335/ 390], loss: [0.4101], avg loss: [0.3295], time: [103.3039ms]\n", - "Epoch: [ 6/ 10], step: [ 336/ 390], loss: [0.3374], avg loss: [0.3295], time: [107.1084ms]\n", - "Epoch: [ 6/ 10], step: [ 337/ 390], loss: [0.4897], avg loss: [0.3300], time: [102.1457ms]\n", - "Epoch: [ 6/ 10], step: [ 338/ 390], loss: [0.4213], avg loss: [0.3303], time: [102.9134ms]\n", - "Epoch: [ 6/ 10], step: [ 339/ 390], loss: [0.3470], avg loss: [0.3303], time: [105.8249ms]\n", - "Epoch: [ 6/ 10], step: [ 340/ 390], loss: [0.3184], avg loss: [0.3303], time: [101.3196ms]\n", - "Epoch: [ 6/ 10], step: [ 341/ 390], loss: [0.2712], avg loss: [0.3301], time: [102.4146ms]\n", - "Epoch: [ 6/ 10], step: [ 342/ 390], loss: [0.3386], avg loss: [0.3301], time: [103.4834ms]\n", - "Epoch: [ 6/ 10], step: [ 343/ 390], loss: [0.2672], avg loss: [0.3300], time: [107.3389ms]\n", - "Epoch: [ 6/ 10], step: [ 344/ 390], loss: [0.2524], avg loss: [0.3297], time: [106.3159ms]\n", - "Epoch: [ 6/ 10], step: [ 345/ 390], loss: [0.4011], avg loss: [0.3299], time: [105.5076ms]\n", - "Epoch: [ 6/ 10], step: [ 346/ 390], loss: [0.2394], avg loss: [0.3297], time: [106.8244ms]\n", - "Epoch: [ 6/ 10], step: [ 347/ 390], loss: [0.3335], avg loss: [0.3297], time: [106.9553ms]\n", - "Epoch: [ 6/ 10], step: [ 348/ 390], loss: [0.4013], avg loss: [0.3299], time: [101.4163ms]\n", - "Epoch: [ 6/ 10], step: [ 349/ 390], loss: [0.2915], avg loss: [0.3298], time: [108.4950ms]\n", - "Epoch: [ 6/ 10], step: [ 350/ 390], loss: [0.3499], avg loss: [0.3298], time: [101.9070ms]\n", - "Epoch: [ 6/ 10], step: [ 351/ 390], loss: [0.2878], avg loss: [0.3297], time: [103.7657ms]\n", - "Epoch: [ 6/ 10], step: [ 352/ 390], loss: [0.3596], avg loss: [0.3298], time: [99.0610ms]\n", - "Epoch: [ 6/ 10], step: [ 353/ 390], loss: [0.2053], avg loss: [0.3295], time: [105.2699ms]\n", - "Epoch: [ 6/ 10], step: [ 354/ 390], loss: [0.3241], avg loss: [0.3294], time: [101.9120ms]\n" + "epoch: 6 step: 266, loss is 0.1851\n", + "epoch: 6 step: 267, loss is 0.3902\n", + "epoch: 6 step: 268, loss is 0.1962\n", + "epoch: 6 step: 269, loss is 0.2614\n", + "epoch: 6 step: 270, loss is 0.2919\n", + "epoch: 6 step: 271, loss is 0.4295\n", + "epoch: 6 step: 272, loss is 0.3681\n", + "epoch: 6 step: 273, loss is 0.2417\n", + "epoch: 6 step: 274, loss is 0.3749\n", + "epoch: 6 step: 275, loss is 0.3401\n", + "epoch: 6 step: 276, loss is 0.3363\n", + "epoch: 6 step: 277, loss is 0.3809\n", + "epoch: 6 step: 278, loss is 0.2851\n", + "epoch: 6 step: 279, loss is 0.3831\n", + "epoch: 6 step: 280, loss is 0.3269\n", + "epoch: 6 step: 281, loss is 0.2682\n", + "epoch: 6 step: 282, loss is 0.2464\n", + "epoch: 6 step: 283, loss is 0.3946\n", + "epoch: 6 step: 284, loss is 0.3671\n", + "epoch: 6 step: 285, loss is 0.2973\n", + "epoch: 6 step: 286, loss is 0.3856\n", + "epoch: 6 step: 287, loss is 0.4005\n", + "epoch: 6 step: 288, loss is 0.3100\n", + "epoch: 6 step: 289, loss is 0.4213\n", + "epoch: 6 step: 290, loss is 0.2163\n", + "epoch: 6 step: 291, loss is 0.2245\n", + "epoch: 6 step: 292, loss is 0.2426\n", + "epoch: 6 step: 293, loss is 0.3086\n", + "epoch: 6 step: 294, loss is 0.3300\n", + "epoch: 6 step: 295, loss is 0.4324\n", + "epoch: 6 step: 296, loss is 0.4079\n", + "epoch: 6 step: 297, loss is 0.3564\n", + "epoch: 6 step: 298, loss is 0.3987\n", + "epoch: 6 step: 299, loss is 0.3378\n", + "epoch: 6 step: 300, loss is 0.4463\n", + "epoch: 6 step: 301, loss is 0.3557\n", + "epoch: 6 step: 302, loss is 0.4535\n", + "epoch: 6 step: 303, loss is 0.3136\n", + "epoch: 6 step: 304, loss is 0.2858\n", + "epoch: 6 step: 305, loss is 0.4527\n", + "epoch: 6 step: 306, loss is 0.4973\n", + "epoch: 6 step: 307, loss is 0.3944\n", + "epoch: 6 step: 308, loss is 0.3267\n", + "epoch: 6 step: 309, loss is 0.3917\n", + "epoch: 6 step: 310, loss is 0.2803\n", + "epoch: 6 step: 311, loss is 0.4024\n", + "epoch: 6 step: 312, loss is 0.4093\n", + "epoch: 6 step: 313, loss is 0.3855\n", + "epoch: 6 step: 314, loss is 0.3074\n", + "epoch: 6 step: 315, loss is 0.2501\n", + "epoch: 6 step: 316, loss is 0.3559\n", + "epoch: 6 step: 317, loss is 0.3158\n", + "epoch: 6 step: 318, loss is 0.2860\n", + "epoch: 6 step: 319, loss is 0.1979\n", + "epoch: 6 step: 320, loss is 0.2508\n", + "epoch: 6 step: 321, loss is 0.3351\n", + "epoch: 6 step: 322, loss is 0.3078\n", + "epoch: 6 step: 323, loss is 0.2160\n", + "epoch: 6 step: 324, loss is 0.2717\n", + "epoch: 6 step: 325, loss is 0.2465\n", + "epoch: 6 step: 326, loss is 0.4169\n", + "epoch: 6 step: 327, loss is 0.2714\n", + "epoch: 6 step: 328, loss is 0.2966\n", + "epoch: 6 step: 329, loss is 0.2984\n", + "epoch: 6 step: 330, loss is 0.2708\n", + "epoch: 6 step: 331, loss is 0.3978\n", + "epoch: 6 step: 332, loss is 0.3094\n", + "epoch: 6 step: 333, loss is 0.3462\n", + "epoch: 6 step: 334, loss is 0.2669\n", + "epoch: 6 step: 335, loss is 0.4101\n", + "epoch: 6 step: 336, loss is 0.3374\n", + "epoch: 6 step: 337, loss is 0.4897\n", + "epoch: 6 step: 338, loss is 0.4213\n", + "epoch: 6 step: 339, loss is 0.3470\n", + "epoch: 6 step: 340, loss is 0.3184\n", + "epoch: 6 step: 341, loss is 0.2712\n", + "epoch: 6 step: 342, loss is 0.3386\n", + "epoch: 6 step: 343, loss is 0.2672\n", + "epoch: 6 step: 344, loss is 0.2524\n", + "epoch: 6 step: 345, loss is 0.4011\n", + "epoch: 6 step: 346, loss is 0.2394\n", + "epoch: 6 step: 347, loss is 0.3335\n", + "epoch: 6 step: 348, loss is 0.4013\n", + "epoch: 6 step: 349, loss is 0.2915\n", + "epoch: 6 step: 350, loss is 0.3499\n", + "epoch: 6 step: 351, loss is 0.2878\n", + "epoch: 6 step: 352, loss is 0.3596\n", + "epoch: 6 step: 353, loss is 0.2053\n", + "epoch: 6 step: 354, loss is 0.3241\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 6/ 10], step: [ 355/ 390], loss: [0.4533], avg loss: [0.3298], time: [105.9098ms]\n", - "Epoch: [ 6/ 10], step: [ 356/ 390], loss: [0.2419], avg loss: [0.3295], time: [103.5061ms]\n", - "Epoch: [ 6/ 10], step: [ 357/ 390], loss: [0.2371], avg loss: [0.3293], time: [102.0308ms]\n", - "Epoch: [ 6/ 10], step: [ 358/ 390], loss: [0.3193], avg loss: [0.3293], time: [102.3316ms]\n", - "Epoch: [ 6/ 10], step: [ 359/ 390], loss: [0.4685], avg loss: [0.3296], time: [103.1935ms]\n", - "Epoch: [ 6/ 10], step: [ 360/ 390], loss: [0.3362], avg loss: [0.3297], time: [103.2398ms]\n", - "Epoch: [ 6/ 10], step: [ 361/ 390], loss: [0.4437], avg loss: [0.3300], time: [105.7558ms]\n", - "Epoch: [ 6/ 10], step: [ 362/ 390], loss: [0.3613], avg loss: [0.3301], time: [100.1587ms]\n", - "Epoch: [ 6/ 10], step: [ 363/ 390], loss: [0.4118], avg loss: [0.3303], time: [106.5342ms]\n", - "Epoch: [ 6/ 10], step: [ 364/ 390], loss: [0.3095], avg loss: [0.3302], time: [103.8628ms]\n", - "Epoch: [ 6/ 10], step: [ 365/ 390], loss: [0.2669], avg loss: [0.3301], time: [106.3886ms]\n", - "Epoch: [ 6/ 10], step: [ 366/ 390], loss: [0.2606], avg loss: [0.3299], time: [106.6759ms]\n", - "Epoch: [ 6/ 10], step: [ 367/ 390], loss: [0.3994], avg loss: [0.3301], time: [104.9557ms]\n", - "Epoch: [ 6/ 10], step: [ 368/ 390], loss: [0.2873], avg loss: [0.3299], time: [107.0628ms]\n", - "Epoch: [ 6/ 10], step: [ 369/ 390], loss: [0.2830], avg loss: [0.3298], time: [108.3372ms]\n", - "Epoch: [ 6/ 10], step: [ 370/ 390], loss: [0.2995], avg loss: [0.3297], time: [105.0665ms]\n", - "Epoch: [ 6/ 10], step: [ 371/ 390], loss: [0.2545], avg loss: [0.3295], time: [103.4839ms]\n", - "Epoch: [ 6/ 10], step: [ 372/ 390], loss: [0.2930], avg loss: [0.3294], time: [102.1507ms]\n", - "Epoch: [ 6/ 10], step: [ 373/ 390], loss: [0.3777], avg loss: [0.3296], time: [104.1732ms]\n", - "Epoch: [ 6/ 10], step: [ 374/ 390], loss: [0.5867], avg loss: [0.3302], time: [103.1616ms]\n", - "Epoch: [ 6/ 10], step: [ 375/ 390], loss: [0.2580], avg loss: [0.3301], time: [105.5446ms]\n", - "Epoch: [ 6/ 10], step: [ 376/ 390], loss: [0.1726], avg loss: [0.3296], time: [104.7406ms]\n", - "Epoch: [ 6/ 10], step: [ 377/ 390], loss: [0.2685], avg loss: [0.3295], time: [105.3722ms]\n", - "Epoch: [ 6/ 10], step: [ 378/ 390], loss: [0.2625], avg loss: [0.3293], time: [105.3462ms]\n", - "Epoch: [ 6/ 10], step: [ 379/ 390], loss: [0.2591], avg loss: [0.3291], time: [106.2505ms]\n", - "Epoch: [ 6/ 10], step: [ 380/ 390], loss: [0.3863], avg loss: [0.3293], time: [102.0939ms]\n", - "Epoch: [ 6/ 10], step: [ 381/ 390], loss: [0.2968], avg loss: [0.3292], time: [107.4014ms]\n", - "Epoch: [ 6/ 10], step: [ 382/ 390], loss: [0.3835], avg loss: [0.3293], time: [102.7396ms]\n", - "Epoch: [ 6/ 10], step: [ 383/ 390], loss: [0.4430], avg loss: [0.3296], time: [103.3907ms]\n", - "Epoch: [ 6/ 10], step: [ 384/ 390], loss: [0.4552], avg loss: [0.3299], time: [104.4273ms]\n", - "Epoch: [ 6/ 10], step: [ 385/ 390], loss: [0.2496], avg loss: [0.3297], time: [103.3270ms]\n", - "Epoch: [ 6/ 10], step: [ 386/ 390], loss: [0.2851], avg loss: [0.3296], time: [102.7114ms]\n", - "Epoch: [ 6/ 10], step: [ 387/ 390], loss: [0.2592], avg loss: [0.3294], time: [103.4312ms]\n", - "Epoch: [ 6/ 10], step: [ 388/ 390], loss: [0.3486], avg loss: [0.3295], time: [103.0850ms]\n", - "Epoch: [ 6/ 10], step: [ 389/ 390], loss: [0.4242], avg loss: [0.3297], time: [104.1567ms]\n", - "Epoch: [ 6/ 10], step: [ 390/ 390], loss: [0.4188], avg loss: [0.3300], time: [879.6704ms]\n", - "Epoch time: 41775.477, per step time: 107.117\n", + "epoch: 6 step: 355, loss is 0.4533\n", + "epoch: 6 step: 356, loss is 0.2419\n", + "epoch: 6 step: 357, loss is 0.2371\n", + "epoch: 6 step: 358, loss is 0.3193\n", + "epoch: 6 step: 359, loss is 0.4685\n", + "epoch: 6 step: 360, loss is 0.3362\n", + "epoch: 6 step: 361, loss is 0.4437\n", + "epoch: 6 step: 362, loss is 0.3613\n", + "epoch: 6 step: 363, loss is 0.4118\n", + "epoch: 6 step: 364, loss is 0.3095\n", + "epoch: 6 step: 365, loss is 0.2669\n", + "epoch: 6 step: 366, loss is 0.2606\n", + "epoch: 6 step: 367, loss is 0.3994\n", + "epoch: 6 step: 368, loss is 0.2873\n", + "epoch: 6 step: 369, loss is 0.2830\n", + "epoch: 6 step: 370, loss is 0.2995\n", + "epoch: 6 step: 371, loss is 0.2545\n", + "epoch: 6 step: 372, loss is 0.2930\n", + "epoch: 6 step: 373, loss is 0.3777\n", + "epoch: 6 step: 374, loss is 0.5867\n", + "epoch: 6 step: 375, loss is 0.2580\n", + "epoch: 6 step: 376, loss is 0.1726\n", + "epoch: 6 step: 377, loss is 0.2685\n", + "epoch: 6 step: 378, loss is 0.2625\n", + "epoch: 6 step: 379, loss is 0.2591\n", + "epoch: 6 step: 380, loss is 0.3863\n", + "epoch: 6 step: 381, loss is 0.2968\n", + "epoch: 6 step: 382, loss is 0.3835\n", + "epoch: 6 step: 383, loss is 0.4430\n", + "epoch: 6 step: 384, loss is 0.4552\n", + "epoch: 6 step: 385, loss is 0.2496\n", + "epoch: 6 step: 386, loss is 0.2851\n", + "epoch: 6 step: 387, loss is 0.2592\n", + "epoch: 6 step: 388, loss is 0.3486\n", + "epoch: 6 step: 389, loss is 0.4242\n", + "epoch: 6 step: 390, loss is 0.4188\n", "Epoch time: 41775.770, per step time: 107.117, avg loss: 0.330\n", "************************************************************\n", - "Epoch: [ 7/ 10], step: [ 1/ 390], loss: [0.4987], avg loss: [0.4987], time: [103.4799ms]\n", - "Epoch: [ 7/ 10], step: [ 2/ 390], loss: [0.2668], avg loss: [0.3827], time: [105.1037ms]\n", - "Epoch: [ 7/ 10], step: [ 3/ 390], loss: [0.2438], avg loss: [0.3364], time: [105.2711ms]\n", - "Epoch: [ 7/ 10], step: [ 4/ 390], loss: [0.2162], avg loss: [0.3064], time: [103.6854ms]\n", - "Epoch: [ 7/ 10], step: [ 5/ 390], loss: [0.2195], avg loss: [0.2890], time: [109.2501ms]\n", - "Epoch: [ 7/ 10], step: [ 6/ 390], loss: [0.3050], avg loss: [0.2917], time: [105.3102ms]\n", - "Epoch: [ 7/ 10], step: [ 7/ 390], loss: [0.2998], avg loss: [0.2928], time: [104.3055ms]\n", - "Epoch: [ 7/ 10], step: [ 8/ 390], loss: [0.2066], avg loss: [0.2820], time: [106.9927ms]\n", - "Epoch: [ 7/ 10], step: [ 9/ 390], loss: [0.2900], avg loss: [0.2829], time: [107.9371ms]\n", - "Epoch: [ 7/ 10], step: [ 10/ 390], loss: [0.3204], avg loss: [0.2867], time: [107.1513ms]\n", - "Epoch: [ 7/ 10], step: [ 11/ 390], loss: [0.3092], avg loss: [0.2887], time: [109.4887ms]\n", - "Epoch: [ 7/ 10], step: [ 12/ 390], loss: [0.2089], avg loss: [0.2821], time: [104.9278ms]\n", - "Epoch: [ 7/ 10], step: [ 13/ 390], loss: [0.4390], avg loss: [0.2941], time: [104.5280ms]\n", - "Epoch: [ 7/ 10], step: [ 14/ 390], loss: [0.2447], avg loss: [0.2906], time: [109.8156ms]\n", - "Epoch: [ 7/ 10], step: [ 15/ 390], loss: [0.3001], avg loss: [0.2912], time: [108.1660ms]\n", - "Epoch: [ 7/ 10], step: [ 16/ 390], loss: [0.2784], avg loss: [0.2904], time: [106.7076ms]\n", - "Epoch: [ 7/ 10], step: [ 17/ 390], loss: [0.3556], avg loss: [0.2943], time: [105.6578ms]\n", - "Epoch: [ 7/ 10], step: [ 18/ 390], loss: [0.4071], avg loss: [0.3005], time: [103.9522ms]\n", - "Epoch: [ 7/ 10], step: [ 19/ 390], loss: [0.3229], avg loss: [0.3017], time: [103.2581ms]\n", - "Epoch: [ 7/ 10], step: [ 20/ 390], loss: [0.3676], avg loss: [0.3050], time: [103.9524ms]\n", - "Epoch: [ 7/ 10], step: [ 21/ 390], loss: [0.4012], avg loss: [0.3096], time: [107.7132ms]\n", - "Epoch: [ 7/ 10], step: [ 22/ 390], loss: [0.2647], avg loss: [0.3075], time: [108.6879ms]\n", - "Epoch: [ 7/ 10], step: [ 23/ 390], loss: [0.2700], avg loss: [0.3059], time: [106.2062ms]\n", - "Epoch: [ 7/ 10], step: [ 24/ 390], loss: [0.2553], avg loss: [0.3038], time: [104.6212ms]\n", - "Epoch: [ 7/ 10], step: [ 25/ 390], loss: [0.3872], avg loss: [0.3071], time: [104.6693ms]\n", - "Epoch: [ 7/ 10], step: [ 26/ 390], loss: [0.2646], avg loss: [0.3055], time: [104.0697ms]\n", - "Epoch: [ 7/ 10], step: [ 27/ 390], loss: [0.4048], avg loss: [0.3092], time: [106.9255ms]\n", - "Epoch: [ 7/ 10], step: [ 28/ 390], loss: [0.2702], avg loss: [0.3078], time: [103.9541ms]\n", - "Epoch: [ 7/ 10], step: [ 29/ 390], loss: [0.2565], avg loss: [0.3060], time: [105.5868ms]\n", - "Epoch: [ 7/ 10], step: [ 30/ 390], loss: [0.3814], avg loss: [0.3085], time: [108.4297ms]\n", - "Epoch: [ 7/ 10], step: [ 31/ 390], loss: [0.2905], avg loss: [0.3080], time: [106.5609ms]\n", - "Epoch: [ 7/ 10], step: [ 32/ 390], loss: [0.3505], avg loss: [0.3093], time: [103.4110ms]\n", - "Epoch: [ 7/ 10], step: [ 33/ 390], loss: [0.2309], avg loss: [0.3069], time: [105.4876ms]\n", - "Epoch: [ 7/ 10], step: [ 34/ 390], loss: [0.2800], avg loss: [0.3061], time: [106.8888ms]\n", - "Epoch: [ 7/ 10], step: [ 35/ 390], loss: [0.2286], avg loss: [0.3039], time: [104.6867ms]\n", - "Epoch: [ 7/ 10], step: [ 36/ 390], loss: [0.2181], avg loss: [0.3015], time: [108.2883ms]\n", - "Epoch: [ 7/ 10], step: [ 37/ 390], loss: [0.3667], avg loss: [0.3033], time: [104.5008ms]\n", - "Epoch: [ 7/ 10], step: [ 38/ 390], loss: [0.3457], avg loss: [0.3044], time: [105.8502ms]\n", - "Epoch: [ 7/ 10], step: [ 39/ 390], loss: [0.3112], avg loss: [0.3046], time: [107.2445ms]\n", - "Epoch: [ 7/ 10], step: [ 40/ 390], loss: [0.2804], avg loss: [0.3040], time: [106.5450ms]\n", - "Epoch: [ 7/ 10], step: [ 41/ 390], loss: [0.2552], avg loss: [0.3028], time: [103.1537ms]\n", - "Epoch: [ 7/ 10], step: [ 42/ 390], loss: [0.1920], avg loss: [0.3001], time: [104.1739ms]\n", - "Epoch: [ 7/ 10], step: [ 43/ 390], loss: [0.3377], avg loss: [0.3010], time: [103.8806ms]\n", - "Epoch: [ 7/ 10], step: [ 44/ 390], loss: [0.2705], avg loss: [0.3003], time: [104.0616ms]\n", - "Epoch: [ 7/ 10], step: [ 45/ 390], loss: [0.4264], avg loss: [0.3031], time: [105.0491ms]\n", - "Epoch: [ 7/ 10], step: [ 46/ 390], loss: [0.2829], avg loss: [0.3027], time: [107.9972ms]\n", - "Epoch: [ 7/ 10], step: [ 47/ 390], loss: [0.4340], avg loss: [0.3055], time: [103.6415ms]\n", - "Epoch: [ 7/ 10], step: [ 48/ 390], loss: [0.2982], avg loss: [0.3053], time: [105.5567ms]\n", - "Epoch: [ 7/ 10], step: [ 49/ 390], loss: [0.2619], avg loss: [0.3044], time: [103.4281ms]\n", - "Epoch: [ 7/ 10], step: [ 50/ 390], loss: [0.3331], avg loss: [0.3050], time: [105.3843ms]\n", - "Epoch: [ 7/ 10], step: [ 51/ 390], loss: [0.2737], avg loss: [0.3044], time: [102.9415ms]\n" + "epoch: 7 step: 1, loss is 0.4987\n", + "epoch: 7 step: 2, loss is 0.2668\n", + "epoch: 7 step: 3, loss is 0.2438\n", + "epoch: 7 step: 4, loss is 0.2162\n", + "epoch: 7 step: 5, loss is 0.2195\n", + "epoch: 7 step: 6, loss is 0.3050\n", + "epoch: 7 step: 7, loss is 0.2998\n", + "epoch: 7 step: 8, loss is 0.2066\n", + "epoch: 7 step: 9, loss is 0.2900\n", + "epoch: 7 step: 10, loss is 0.3204\n", + "epoch: 7 step: 11, loss is 0.3092\n", + "epoch: 7 step: 12, loss is 0.2089\n", + "epoch: 7 step: 13, loss is 0.4390\n", + "epoch: 7 step: 14, loss is 0.2447\n", + "epoch: 7 step: 15, loss is 0.3001\n", + "epoch: 7 step: 16, loss is 0.2784\n", + "epoch: 7 step: 17, loss is 0.3556\n", + "epoch: 7 step: 18, loss is 0.4071\n", + "epoch: 7 step: 19, loss is 0.3229\n", + "epoch: 7 step: 20, loss is 0.3676\n", + "epoch: 7 step: 21, loss is 0.4012\n", + "epoch: 7 step: 22, loss is 0.2647\n", + "epoch: 7 step: 23, loss is 0.2700\n", + "epoch: 7 step: 24, loss is 0.2553\n", + "epoch: 7 step: 25, loss is 0.3872\n", + "epoch: 7 step: 26, loss is 0.2646\n", + "epoch: 7 step: 27, loss is 0.4048\n", + "epoch: 7 step: 28, loss is 0.2702\n", + "epoch: 7 step: 29, loss is 0.2565\n", + "epoch: 7 step: 30, loss is 0.3814\n", + "epoch: 7 step: 31, loss is 0.2905\n", + "epoch: 7 step: 32, loss is 0.3505\n", + "epoch: 7 step: 33, loss is 0.2309\n", + "epoch: 7 step: 34, loss is 0.2800\n", + "epoch: 7 step: 35, loss is 0.2286\n", + "epoch: 7 step: 36, loss is 0.2181\n", + "epoch: 7 step: 37, loss is 0.3667\n", + "epoch: 7 step: 38, loss is 0.3457\n", + "epoch: 7 step: 39, loss is 0.3112\n", + "epoch: 7 step: 40, loss is 0.2804\n", + "epoch: 7 step: 41, loss is 0.2552\n", + "epoch: 7 step: 42, loss is 0.1920\n", + "epoch: 7 step: 43, loss is 0.3377\n", + "epoch: 7 step: 44, loss is 0.2705\n", + "epoch: 7 step: 45, loss is 0.4264\n", + "epoch: 7 step: 46, loss is 0.2829\n", + "epoch: 7 step: 47, loss is 0.4340\n", + "epoch: 7 step: 48, loss is 0.2982\n", + "epoch: 7 step: 49, loss is 0.2619\n", + "epoch: 7 step: 50, loss is 0.3331\n", + "epoch: 7 step: 51, loss is 0.2737\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 7/ 10], step: [ 52/ 390], loss: [0.3062], avg loss: [0.3044], time: [106.5502ms]\n", - "Epoch: [ 7/ 10], step: [ 53/ 390], loss: [0.3455], avg loss: [0.3052], time: [105.5696ms]\n", - "Epoch: [ 7/ 10], step: [ 54/ 390], loss: [0.3581], avg loss: [0.3062], time: [104.5468ms]\n", - "Epoch: [ 7/ 10], step: [ 55/ 390], loss: [0.2514], avg loss: [0.3052], time: [104.1136ms]\n", - "Epoch: [ 7/ 10], step: [ 56/ 390], loss: [0.3478], avg loss: [0.3060], time: [109.1614ms]\n", - "Epoch: [ 7/ 10], step: [ 57/ 390], loss: [0.2962], avg loss: [0.3058], time: [108.6161ms]\n", - "Epoch: [ 7/ 10], step: [ 58/ 390], loss: [0.2631], avg loss: [0.3050], time: [104.1448ms]\n", - "Epoch: [ 7/ 10], step: [ 59/ 390], loss: [0.2864], avg loss: [0.3047], time: [105.4285ms]\n", - "Epoch: [ 7/ 10], step: [ 60/ 390], loss: [0.3093], avg loss: [0.3048], time: [104.7280ms]\n", - "Epoch: [ 7/ 10], step: [ 61/ 390], loss: [0.2864], avg loss: [0.3045], time: [103.9274ms]\n", - "Epoch: [ 7/ 10], step: [ 62/ 390], loss: [0.1889], avg loss: [0.3026], time: [105.6631ms]\n", - "Epoch: [ 7/ 10], step: [ 63/ 390], loss: [0.3674], avg loss: [0.3037], time: [108.4349ms]\n", - "Epoch: [ 7/ 10], step: [ 64/ 390], loss: [0.3365], avg loss: [0.3042], time: [103.7886ms]\n", - "Epoch: [ 7/ 10], step: [ 65/ 390], loss: [0.3307], avg loss: [0.3046], time: [105.7844ms]\n", - "Epoch: [ 7/ 10], step: [ 66/ 390], loss: [0.1550], avg loss: [0.3023], time: [104.3255ms]\n", - "Epoch: [ 7/ 10], step: [ 67/ 390], loss: [0.2388], avg loss: [0.3014], time: [105.9399ms]\n", - "Epoch: [ 7/ 10], step: [ 68/ 390], loss: [0.3041], avg loss: [0.3014], time: [106.5049ms]\n", - "Epoch: [ 7/ 10], step: [ 69/ 390], loss: [0.3472], avg loss: [0.3021], time: [105.2754ms]\n", - "Epoch: [ 7/ 10], step: [ 70/ 390], loss: [0.3063], avg loss: [0.3021], time: [107.3632ms]\n", - "Epoch: [ 7/ 10], step: [ 71/ 390], loss: [0.2721], avg loss: [0.3017], time: [103.0774ms]\n", - "Epoch: [ 7/ 10], step: [ 72/ 390], loss: [0.2984], avg loss: [0.3017], time: [108.0120ms]\n", - "Epoch: [ 7/ 10], step: [ 73/ 390], loss: [0.2822], avg loss: [0.3014], time: [107.9943ms]\n", - "Epoch: [ 7/ 10], step: [ 74/ 390], loss: [0.2518], avg loss: [0.3007], time: [103.7858ms]\n", - "Epoch: [ 7/ 10], step: [ 75/ 390], loss: [0.3445], avg loss: [0.3013], time: [105.7239ms]\n", - "Epoch: [ 7/ 10], step: [ 76/ 390], loss: [0.2901], avg loss: [0.3012], time: [107.0602ms]\n", - "Epoch: [ 7/ 10], step: [ 77/ 390], loss: [0.3076], avg loss: [0.3013], time: [104.1758ms]\n", - "Epoch: [ 7/ 10], step: [ 78/ 390], loss: [0.1980], avg loss: [0.2999], time: [105.1855ms]\n", - "Epoch: [ 7/ 10], step: [ 79/ 390], loss: [0.1895], avg loss: [0.2985], time: [103.4663ms]\n", - "Epoch: [ 7/ 10], step: [ 80/ 390], loss: [0.2033], avg loss: [0.2973], time: [106.2453ms]\n", - "Epoch: [ 7/ 10], step: [ 81/ 390], loss: [0.2264], avg loss: [0.2965], time: [103.9464ms]\n", - "Epoch: [ 7/ 10], step: [ 82/ 390], loss: [0.2937], avg loss: [0.2964], time: [104.4967ms]\n", - "Epoch: [ 7/ 10], step: [ 83/ 390], loss: [0.2607], avg loss: [0.2960], time: [104.8219ms]\n", - "Epoch: [ 7/ 10], step: [ 84/ 390], loss: [0.4120], avg loss: [0.2974], time: [106.9705ms]\n", - "Epoch: [ 7/ 10], step: [ 85/ 390], loss: [0.2139], avg loss: [0.2964], time: [109.8955ms]\n", - "Epoch: [ 7/ 10], step: [ 86/ 390], loss: [0.2820], avg loss: [0.2962], time: [104.9821ms]\n", - "Epoch: [ 7/ 10], step: [ 87/ 390], loss: [0.4323], avg loss: [0.2978], time: [105.1698ms]\n", - "Epoch: [ 7/ 10], step: [ 88/ 390], loss: [0.3326], avg loss: [0.2982], time: [105.3722ms]\n", - "Epoch: [ 7/ 10], step: [ 89/ 390], loss: [0.3487], avg loss: [0.2988], time: [104.0766ms]\n", - "Epoch: [ 7/ 10], step: [ 90/ 390], loss: [0.3475], avg loss: [0.2993], time: [102.5507ms]\n", - "Epoch: [ 7/ 10], step: [ 91/ 390], loss: [0.3121], avg loss: [0.2994], time: [105.6521ms]\n", - "Epoch: [ 7/ 10], step: [ 92/ 390], loss: [0.3437], avg loss: [0.2999], time: [103.9290ms]\n", - "Epoch: [ 7/ 10], step: [ 93/ 390], loss: [0.3428], avg loss: [0.3004], time: [109.7741ms]\n", - "Epoch: [ 7/ 10], step: [ 94/ 390], loss: [0.3187], avg loss: [0.3006], time: [107.1169ms]\n", - "Epoch: [ 7/ 10], step: [ 95/ 390], loss: [0.2734], avg loss: [0.3003], time: [103.1713ms]\n", - "Epoch: [ 7/ 10], step: [ 96/ 390], loss: [0.4287], avg loss: [0.3016], time: [107.0080ms]\n", - "Epoch: [ 7/ 10], step: [ 97/ 390], loss: [0.2319], avg loss: [0.3009], time: [103.9279ms]\n", - "Epoch: [ 7/ 10], step: [ 98/ 390], loss: [0.2512], avg loss: [0.3004], time: [107.6398ms]\n", - "Epoch: [ 7/ 10], step: [ 99/ 390], loss: [0.3681], avg loss: [0.3011], time: [105.2694ms]\n", - "Epoch: [ 7/ 10], step: [ 100/ 390], loss: [0.2600], avg loss: [0.3007], time: [102.6216ms]\n", - "Epoch: [ 7/ 10], step: [ 101/ 390], loss: [0.3838], avg loss: [0.3015], time: [105.0179ms]\n", - "Epoch: [ 7/ 10], step: [ 102/ 390], loss: [0.2613], avg loss: [0.3011], time: [105.4828ms]\n", - "Epoch: [ 7/ 10], step: [ 103/ 390], loss: [0.2161], avg loss: [0.3003], time: [105.6345ms]\n", - "Epoch: [ 7/ 10], step: [ 104/ 390], loss: [0.2999], avg loss: [0.3003], time: [107.4884ms]\n", - "Epoch: [ 7/ 10], step: [ 105/ 390], loss: [0.2319], avg loss: [0.2996], time: [104.6476ms]\n", - "Epoch: [ 7/ 10], step: [ 106/ 390], loss: [0.3333], avg loss: [0.2999], time: [107.5947ms]\n", - "Epoch: [ 7/ 10], step: [ 107/ 390], loss: [0.2740], avg loss: [0.2997], time: [105.1531ms]\n", - "Epoch: [ 7/ 10], step: [ 108/ 390], loss: [0.2087], avg loss: [0.2989], time: [102.8006ms]\n", - "Epoch: [ 7/ 10], step: [ 109/ 390], loss: [0.3952], avg loss: [0.2997], time: [104.7370ms]\n", - "Epoch: [ 7/ 10], step: [ 110/ 390], loss: [0.1982], avg loss: [0.2988], time: [108.3288ms]\n", - "Epoch: [ 7/ 10], step: [ 111/ 390], loss: [0.3236], avg loss: [0.2990], time: [104.2867ms]\n", - "Epoch: [ 7/ 10], step: [ 112/ 390], loss: [0.3696], avg loss: [0.2997], time: [107.6927ms]\n", - "Epoch: [ 7/ 10], step: [ 113/ 390], loss: [0.2700], avg loss: [0.2994], time: [104.3582ms]\n", - "Epoch: [ 7/ 10], step: [ 114/ 390], loss: [0.2315], avg loss: [0.2988], time: [103.0068ms]\n", - "Epoch: [ 7/ 10], step: [ 115/ 390], loss: [0.3591], avg loss: [0.2993], time: [103.8775ms]\n", - "Epoch: [ 7/ 10], step: [ 116/ 390], loss: [0.3878], avg loss: [0.3001], time: [103.0214ms]\n", - "Epoch: [ 7/ 10], step: [ 117/ 390], loss: [0.2875], avg loss: [0.3000], time: [106.9419ms]\n", - "Epoch: [ 7/ 10], step: [ 118/ 390], loss: [0.2651], avg loss: [0.2997], time: [107.3637ms]\n", - "Epoch: [ 7/ 10], step: [ 119/ 390], loss: [0.3032], avg loss: [0.2997], time: [105.6190ms]\n", - "Epoch: [ 7/ 10], step: [ 120/ 390], loss: [0.3698], avg loss: [0.3003], time: [104.0704ms]\n", - "Epoch: [ 7/ 10], step: [ 121/ 390], loss: [0.4825], avg loss: [0.3018], time: [105.0093ms]\n", - "Epoch: [ 7/ 10], step: [ 122/ 390], loss: [0.3069], avg loss: [0.3019], time: [105.5503ms]\n", - "Epoch: [ 7/ 10], step: [ 123/ 390], loss: [0.3896], avg loss: [0.3026], time: [105.6709ms]\n", - "Epoch: [ 7/ 10], step: [ 124/ 390], loss: [0.3294], avg loss: [0.3028], time: [106.7777ms]\n", - "Epoch: [ 7/ 10], step: [ 125/ 390], loss: [0.2650], avg loss: [0.3025], time: [101.7442ms]\n", - "Epoch: [ 7/ 10], step: [ 126/ 390], loss: [0.3385], avg loss: [0.3028], time: [104.1543ms]\n", - "Epoch: [ 7/ 10], step: [ 127/ 390], loss: [0.3434], avg loss: [0.3031], time: [104.6388ms]\n", - "Epoch: [ 7/ 10], step: [ 128/ 390], loss: [0.3783], avg loss: [0.3037], time: [106.7832ms]\n", - "Epoch: [ 7/ 10], step: [ 129/ 390], loss: [0.4386], avg loss: [0.3047], time: [107.1422ms]\n", - "Epoch: [ 7/ 10], step: [ 130/ 390], loss: [0.2633], avg loss: [0.3044], time: [105.0718ms]\n", - "Epoch: [ 7/ 10], step: [ 131/ 390], loss: [0.3878], avg loss: [0.3050], time: [105.1097ms]\n", - "Epoch: [ 7/ 10], step: [ 132/ 390], loss: [0.2874], avg loss: [0.3049], time: [107.7993ms]\n", - "Epoch: [ 7/ 10], step: [ 133/ 390], loss: [0.4297], avg loss: [0.3058], time: [103.5244ms]\n", - "Epoch: [ 7/ 10], step: [ 134/ 390], loss: [0.4489], avg loss: [0.3069], time: [105.8481ms]\n", - "Epoch: [ 7/ 10], step: [ 135/ 390], loss: [0.4091], avg loss: [0.3077], time: [109.1225ms]\n", - "Epoch: [ 7/ 10], step: [ 136/ 390], loss: [0.3105], avg loss: [0.3077], time: [102.7782ms]\n", - "Epoch: [ 7/ 10], step: [ 137/ 390], loss: [0.3260], avg loss: [0.3078], time: [104.6405ms]\n", - "Epoch: [ 7/ 10], step: [ 138/ 390], loss: [0.4096], avg loss: [0.3086], time: [103.8883ms]\n", - "Epoch: [ 7/ 10], step: [ 139/ 390], loss: [0.3988], avg loss: [0.3092], time: [105.8524ms]\n", - "Epoch: [ 7/ 10], step: [ 140/ 390], loss: [0.1529], avg loss: [0.3081], time: [106.8971ms]\n" + "epoch: 7 step: 52, loss is 0.3062\n", + "epoch: 7 step: 53, loss is 0.3455\n", + "epoch: 7 step: 54, loss is 0.3581\n", + "epoch: 7 step: 55, loss is 0.2514\n", + "epoch: 7 step: 56, loss is 0.3478\n", + "epoch: 7 step: 57, loss is 0.2962\n", + "epoch: 7 step: 58, loss is 0.2631\n", + "epoch: 7 step: 59, loss is 0.2864\n", + "epoch: 7 step: 60, loss is 0.3093\n", + "epoch: 7 step: 61, loss is 0.2864\n", + "epoch: 7 step: 62, loss is 0.1889\n", + "epoch: 7 step: 63, loss is 0.3674\n", + "epoch: 7 step: 64, loss is 0.3365\n", + "epoch: 7 step: 65, loss is 0.3307\n", + "epoch: 7 step: 66, loss is 0.1550\n", + "epoch: 7 step: 67, loss is 0.2388\n", + "epoch: 7 step: 68, loss is 0.3041\n", + "epoch: 7 step: 69, loss is 0.3472\n", + "epoch: 7 step: 70, loss is 0.3063\n", + "epoch: 7 step: 71, loss is 0.2721\n", + "epoch: 7 step: 72, loss is 0.2984\n", + "epoch: 7 step: 73, loss is 0.2822\n", + "epoch: 7 step: 74, loss is 0.2518\n", + "epoch: 7 step: 75, loss is 0.3445\n", + "epoch: 7 step: 76, loss is 0.2901\n", + "epoch: 7 step: 77, loss is 0.3076\n", + "epoch: 7 step: 78, loss is 0.1980\n", + "epoch: 7 step: 79, loss is 0.1895\n", + "epoch: 7 step: 80, loss is 0.2033\n", + "epoch: 7 step: 81, loss is 0.2264\n", + "epoch: 7 step: 82, loss is 0.2937\n", + "epoch: 7 step: 83, loss is 0.2607\n", + "epoch: 7 step: 84, loss is 0.4120\n", + "epoch: 7 step: 85, loss is 0.2139\n", + "epoch: 7 step: 86, loss is 0.2820\n", + "epoch: 7 step: 87, loss is 0.4323\n", + "epoch: 7 step: 88, loss is 0.3326\n", + "epoch: 7 step: 89, loss is 0.3487\n", + "epoch: 7 step: 90, loss is 0.3475\n", + "epoch: 7 step: 91, loss is 0.3121\n", + "epoch: 7 step: 92, loss is 0.3437\n", + "epoch: 7 step: 93, loss is 0.3428\n", + "epoch: 7 step: 94, loss is 0.3187\n", + "epoch: 7 step: 95, loss is 0.2734\n", + "epoch: 7 step: 96, loss is 0.4287\n", + "epoch: 7 step: 97, loss is 0.2319\n", + "epoch: 7 step: 98, loss is 0.2512\n", + "epoch: 7 step: 99, loss is 0.3681\n", + "epoch: 7 step: 100, loss is 0.2600\n", + "epoch: 7 step: 101, loss is 0.3838\n", + "epoch: 7 step: 102, loss is 0.2613\n", + "epoch: 7 step: 103, loss is 0.2161\n", + "epoch: 7 step: 104, loss is 0.2999\n", + "epoch: 7 step: 105, loss is 0.2319\n", + "epoch: 7 step: 106, loss is 0.3333\n", + "epoch: 7 step: 107, loss is 0.2740\n", + "epoch: 7 step: 108, loss is 0.2087\n", + "epoch: 7 step: 109, loss is 0.3952\n", + "epoch: 7 step: 110, loss is 0.1982\n", + "epoch: 7 step: 111, loss is 0.3236\n", + "epoch: 7 step: 112, loss is 0.3696\n", + "epoch: 7 step: 113, loss is 0.2700\n", + "epoch: 7 step: 114, loss is 0.2315\n", + "epoch: 7 step: 115, loss is 0.3591\n", + "epoch: 7 step: 116, loss is 0.3878\n", + "epoch: 7 step: 117, loss is 0.2875\n", + "epoch: 7 step: 118, loss is 0.2651\n", + "epoch: 7 step: 119, loss is 0.3032\n", + "epoch: 7 step: 120, loss is 0.3698\n", + "epoch: 7 step: 121, loss is 0.4825\n", + "epoch: 7 step: 122, loss is 0.3069\n", + "epoch: 7 step: 123, loss is 0.3896\n", + "epoch: 7 step: 124, loss is 0.3294\n", + "epoch: 7 step: 125, loss is 0.2650\n", + "epoch: 7 step: 126, loss is 0.3385\n", + "epoch: 7 step: 127, loss is 0.3434\n", + "epoch: 7 step: 128, loss is 0.3783\n", + "epoch: 7 step: 129, loss is 0.4386\n", + "epoch: 7 step: 130, loss is 0.2633\n", + "epoch: 7 step: 131, loss is 0.3878\n", + "epoch: 7 step: 132, loss is 0.2874\n", + "epoch: 7 step: 133, loss is 0.4297\n", + "epoch: 7 step: 134, loss is 0.4489\n", + "epoch: 7 step: 135, loss is 0.4091\n", + "epoch: 7 step: 136, loss is 0.3105\n", + "epoch: 7 step: 137, loss is 0.3260\n", + "epoch: 7 step: 138, loss is 0.4096\n", + "epoch: 7 step: 139, loss is 0.3988\n", + "epoch: 7 step: 140, loss is 0.1529\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 7/ 10], step: [ 141/ 390], loss: [0.4725], avg loss: [0.3093], time: [102.4973ms]\n", - "Epoch: [ 7/ 10], step: [ 142/ 390], loss: [0.3928], avg loss: [0.3099], time: [105.4571ms]\n", - "Epoch: [ 7/ 10], step: [ 143/ 390], loss: [0.3646], avg loss: [0.3102], time: [103.7819ms]\n", - "Epoch: [ 7/ 10], step: [ 144/ 390], loss: [0.2601], avg loss: [0.3099], time: [107.0786ms]\n", - "Epoch: [ 7/ 10], step: [ 145/ 390], loss: [0.4328], avg loss: [0.3107], time: [107.2645ms]\n", - "Epoch: [ 7/ 10], step: [ 146/ 390], loss: [0.4251], avg loss: [0.3115], time: [104.9128ms]\n", - "Epoch: [ 7/ 10], step: [ 147/ 390], loss: [0.2112], avg loss: [0.3108], time: [105.6156ms]\n", - "Epoch: [ 7/ 10], step: [ 148/ 390], loss: [0.3383], avg loss: [0.3110], time: [102.7992ms]\n", - "Epoch: [ 7/ 10], step: [ 149/ 390], loss: [0.3793], avg loss: [0.3115], time: [109.6804ms]\n", - "Epoch: [ 7/ 10], step: [ 150/ 390], loss: [0.2300], avg loss: [0.3109], time: [105.2263ms]\n", - "Epoch: [ 7/ 10], step: [ 151/ 390], loss: [0.3427], avg loss: [0.3111], time: [104.9993ms]\n", - "Epoch: [ 7/ 10], step: [ 152/ 390], loss: [0.3089], avg loss: [0.3111], time: [109.7083ms]\n", - "Epoch: [ 7/ 10], step: [ 153/ 390], loss: [0.3507], avg loss: [0.3114], time: [106.1387ms]\n", - "Epoch: [ 7/ 10], step: [ 154/ 390], loss: [0.2947], avg loss: [0.3113], time: [104.9142ms]\n", - "Epoch: [ 7/ 10], step: [ 155/ 390], loss: [0.2489], avg loss: [0.3109], time: [105.5384ms]\n", - "Epoch: [ 7/ 10], step: [ 156/ 390], loss: [0.2677], avg loss: [0.3106], time: [103.9970ms]\n", - "Epoch: [ 7/ 10], step: [ 157/ 390], loss: [0.3559], avg loss: [0.3109], time: [104.3847ms]\n", - "Epoch: [ 7/ 10], step: [ 158/ 390], loss: [0.4911], avg loss: [0.3120], time: [103.8167ms]\n", - "Epoch: [ 7/ 10], step: [ 159/ 390], loss: [0.1923], avg loss: [0.3113], time: [104.1927ms]\n", - "Epoch: [ 7/ 10], step: [ 160/ 390], loss: [0.2644], avg loss: [0.3110], time: [108.0878ms]\n", - "Epoch: [ 7/ 10], step: [ 161/ 390], loss: [0.2804], avg loss: [0.3108], time: [105.1543ms]\n", - "Epoch: [ 7/ 10], step: [ 162/ 390], loss: [0.4733], avg loss: [0.3118], time: [106.6024ms]\n", - "Epoch: [ 7/ 10], step: [ 163/ 390], loss: [0.3742], avg loss: [0.3122], time: [108.6566ms]\n", - "Epoch: [ 7/ 10], step: [ 164/ 390], loss: [0.1808], avg loss: [0.3114], time: [107.8246ms]\n", - "Epoch: [ 7/ 10], step: [ 165/ 390], loss: [0.3073], avg loss: [0.3114], time: [106.7700ms]\n", - "Epoch: [ 7/ 10], step: [ 166/ 390], loss: [0.2948], avg loss: [0.3113], time: [104.8410ms]\n", - "Epoch: [ 7/ 10], step: [ 167/ 390], loss: [0.2632], avg loss: [0.3110], time: [105.4540ms]\n", - "Epoch: [ 7/ 10], step: [ 168/ 390], loss: [0.3022], avg loss: [0.3109], time: [106.5183ms]\n", - "Epoch: [ 7/ 10], step: [ 169/ 390], loss: [0.2658], avg loss: [0.3107], time: [106.8065ms]\n", - "Epoch: [ 7/ 10], step: [ 170/ 390], loss: [0.2519], avg loss: [0.3103], time: [107.7523ms]\n", - "Epoch: [ 7/ 10], step: [ 171/ 390], loss: [0.1923], avg loss: [0.3096], time: [103.9245ms]\n", - "Epoch: [ 7/ 10], step: [ 172/ 390], loss: [0.4174], avg loss: [0.3102], time: [103.5368ms]\n", - "Epoch: [ 7/ 10], step: [ 173/ 390], loss: [0.2779], avg loss: [0.3101], time: [108.7277ms]\n", - "Epoch: [ 7/ 10], step: [ 174/ 390], loss: [0.2294], avg loss: [0.3096], time: [105.7508ms]\n", - "Epoch: [ 7/ 10], step: [ 175/ 390], loss: [0.3028], avg loss: [0.3096], time: [105.8543ms]\n", - "Epoch: [ 7/ 10], step: [ 176/ 390], loss: [0.2897], avg loss: [0.3094], time: [102.3648ms]\n", - "Epoch: [ 7/ 10], step: [ 177/ 390], loss: [0.3320], avg loss: [0.3096], time: [107.7371ms]\n", - "Epoch: [ 7/ 10], step: [ 178/ 390], loss: [0.4117], avg loss: [0.3101], time: [103.9295ms]\n", - "Epoch: [ 7/ 10], step: [ 179/ 390], loss: [0.2853], avg loss: [0.3100], time: [107.1053ms]\n", - "Epoch: [ 7/ 10], step: [ 180/ 390], loss: [0.2863], avg loss: [0.3099], time: [104.0466ms]\n", - "Epoch: [ 7/ 10], step: [ 181/ 390], loss: [0.2929], avg loss: [0.3098], time: [103.6911ms]\n", - "Epoch: [ 7/ 10], step: [ 182/ 390], loss: [0.3603], avg loss: [0.3101], time: [104.2993ms]\n", - "Epoch: [ 7/ 10], step: [ 183/ 390], loss: [0.3064], avg loss: [0.3100], time: [107.1732ms]\n", - "Epoch: [ 7/ 10], step: [ 184/ 390], loss: [0.3416], avg loss: [0.3102], time: [107.9051ms]\n", - "Epoch: [ 7/ 10], step: [ 185/ 390], loss: [0.1937], avg loss: [0.3096], time: [107.5134ms]\n", - "Epoch: [ 7/ 10], step: [ 186/ 390], loss: [0.3261], avg loss: [0.3097], time: [103.9124ms]\n", - "Epoch: [ 7/ 10], step: [ 187/ 390], loss: [0.4091], avg loss: [0.3102], time: [106.4439ms]\n", - "Epoch: [ 7/ 10], step: [ 188/ 390], loss: [0.3246], avg loss: [0.3103], time: [107.5549ms]\n", - "Epoch: [ 7/ 10], step: [ 189/ 390], loss: [0.2380], avg loss: [0.3099], time: [109.7984ms]\n", - "Epoch: [ 7/ 10], step: [ 190/ 390], loss: [0.3734], avg loss: [0.3102], time: [104.4533ms]\n", - "Epoch: [ 7/ 10], step: [ 191/ 390], loss: [0.2739], avg loss: [0.3100], time: [106.0677ms]\n", - "Epoch: [ 7/ 10], step: [ 192/ 390], loss: [0.1707], avg loss: [0.3093], time: [108.5434ms]\n", - "Epoch: [ 7/ 10], step: [ 193/ 390], loss: [0.2889], avg loss: [0.3092], time: [105.4544ms]\n", - "Epoch: [ 7/ 10], step: [ 194/ 390], loss: [0.3508], avg loss: [0.3094], time: [101.5360ms]\n", - "Epoch: [ 7/ 10], step: [ 195/ 390], loss: [0.3550], avg loss: [0.3097], time: [103.0908ms]\n", - "Epoch: [ 7/ 10], step: [ 196/ 390], loss: [0.3134], avg loss: [0.3097], time: [103.9538ms]\n", - "Epoch: [ 7/ 10], step: [ 197/ 390], loss: [0.2662], avg loss: [0.3095], time: [103.5016ms]\n", - "Epoch: [ 7/ 10], step: [ 198/ 390], loss: [0.1943], avg loss: [0.3089], time: [104.9876ms]\n", - "Epoch: [ 7/ 10], step: [ 199/ 390], loss: [0.2413], avg loss: [0.3085], time: [104.7204ms]\n", - "Epoch: [ 7/ 10], step: [ 200/ 390], loss: [0.4060], avg loss: [0.3090], time: [104.4509ms]\n", - "Epoch: [ 7/ 10], step: [ 201/ 390], loss: [0.2927], avg loss: [0.3089], time: [105.2222ms]\n", - "Epoch: [ 7/ 10], step: [ 202/ 390], loss: [0.4597], avg loss: [0.3097], time: [104.9170ms]\n", - "Epoch: [ 7/ 10], step: [ 203/ 390], loss: [0.1949], avg loss: [0.3091], time: [103.9994ms]\n", - "Epoch: [ 7/ 10], step: [ 204/ 390], loss: [0.2847], avg loss: [0.3090], time: [102.7470ms]\n", - "Epoch: [ 7/ 10], step: [ 205/ 390], loss: [0.2219], avg loss: [0.3086], time: [108.4127ms]\n", - "Epoch: [ 7/ 10], step: [ 206/ 390], loss: [0.2121], avg loss: [0.3081], time: [109.1967ms]\n", - "Epoch: [ 7/ 10], step: [ 207/ 390], loss: [0.2721], avg loss: [0.3079], time: [104.8143ms]\n", - "Epoch: [ 7/ 10], step: [ 208/ 390], loss: [0.3978], avg loss: [0.3084], time: [105.2120ms]\n", - "Epoch: [ 7/ 10], step: [ 209/ 390], loss: [0.3549], avg loss: [0.3086], time: [108.5777ms]\n", - "Epoch: [ 7/ 10], step: [ 210/ 390], loss: [0.2148], avg loss: [0.3081], time: [104.9058ms]\n", - "Epoch: [ 7/ 10], step: [ 211/ 390], loss: [0.3941], avg loss: [0.3085], time: [105.1302ms]\n", - "Epoch: [ 7/ 10], step: [ 212/ 390], loss: [0.3572], avg loss: [0.3088], time: [107.9516ms]\n", - "Epoch: [ 7/ 10], step: [ 213/ 390], loss: [0.4223], avg loss: [0.3093], time: [105.4211ms]\n", - "Epoch: [ 7/ 10], step: [ 214/ 390], loss: [0.3817], avg loss: [0.3096], time: [106.9067ms]\n", - "Epoch: [ 7/ 10], step: [ 215/ 390], loss: [0.2850], avg loss: [0.3095], time: [104.4755ms]\n", - "Epoch: [ 7/ 10], step: [ 216/ 390], loss: [0.3105], avg loss: [0.3095], time: [104.7082ms]\n", - "Epoch: [ 7/ 10], step: [ 217/ 390], loss: [0.2596], avg loss: [0.3093], time: [106.2517ms]\n", - "Epoch: [ 7/ 10], step: [ 218/ 390], loss: [0.2437], avg loss: [0.3090], time: [104.4776ms]\n", - "Epoch: [ 7/ 10], step: [ 219/ 390], loss: [0.3108], avg loss: [0.3090], time: [102.9851ms]\n", - "Epoch: [ 7/ 10], step: [ 220/ 390], loss: [0.2695], avg loss: [0.3088], time: [103.1549ms]\n", - "Epoch: [ 7/ 10], step: [ 221/ 390], loss: [0.1840], avg loss: [0.3083], time: [106.4603ms]\n", - "Epoch: [ 7/ 10], step: [ 222/ 390], loss: [0.3094], avg loss: [0.3083], time: [107.9845ms]\n", - "Epoch: [ 7/ 10], step: [ 223/ 390], loss: [0.3207], avg loss: [0.3083], time: [105.9992ms]\n", - "Epoch: [ 7/ 10], step: [ 224/ 390], loss: [0.2268], avg loss: [0.3080], time: [107.3406ms]\n", - "Epoch: [ 7/ 10], step: [ 225/ 390], loss: [0.2396], avg loss: [0.3077], time: [105.1574ms]\n", - "Epoch: [ 7/ 10], step: [ 226/ 390], loss: [0.1836], avg loss: [0.3071], time: [105.2394ms]\n", - "Epoch: [ 7/ 10], step: [ 227/ 390], loss: [0.2902], avg loss: [0.3070], time: [103.7591ms]\n", - "Epoch: [ 7/ 10], step: [ 228/ 390], loss: [0.3813], avg loss: [0.3074], time: [103.8945ms]\n", - "Epoch: [ 7/ 10], step: [ 229/ 390], loss: [0.2926], avg loss: [0.3073], time: [104.3301ms]\n" + "epoch: 7 step: 141, loss is 0.4725\n", + "epoch: 7 step: 142, loss is 0.3928\n", + "epoch: 7 step: 143, loss is 0.3646\n", + "epoch: 7 step: 144, loss is 0.2601\n", + "epoch: 7 step: 145, loss is 0.4328\n", + "epoch: 7 step: 146, loss is 0.4251\n", + "epoch: 7 step: 147, loss is 0.2112\n", + "epoch: 7 step: 148, loss is 0.3383\n", + "epoch: 7 step: 149, loss is 0.3793\n", + "epoch: 7 step: 150, loss is 0.2300\n", + "epoch: 7 step: 151, loss is 0.3427\n", + "epoch: 7 step: 152, loss is 0.3089\n", + "epoch: 7 step: 153, loss is 0.3507\n", + "epoch: 7 step: 154, loss is 0.2947\n", + "epoch: 7 step: 155, loss is 0.2489\n", + "epoch: 7 step: 156, loss is 0.2677\n", + "epoch: 7 step: 157, loss is 0.3559\n", + "epoch: 7 step: 158, loss is 0.4911\n", + "epoch: 7 step: 159, loss is 0.1923\n", + "epoch: 7 step: 160, loss is 0.2644\n", + "epoch: 7 step: 161, loss is 0.2804\n", + "epoch: 7 step: 162, loss is 0.4733\n", + "epoch: 7 step: 163, loss is 0.3742\n", + "epoch: 7 step: 164, loss is 0.1808\n", + "epoch: 7 step: 165, loss is 0.3073\n", + "epoch: 7 step: 166, loss is 0.2948\n", + "epoch: 7 step: 167, loss is 0.2632\n", + "epoch: 7 step: 168, loss is 0.3022\n", + "epoch: 7 step: 169, loss is 0.2658\n", + "epoch: 7 step: 170, loss is 0.2519\n", + "epoch: 7 step: 171, loss is 0.1923\n", + "epoch: 7 step: 172, loss is 0.4174\n", + "epoch: 7 step: 173, loss is 0.2779\n", + "epoch: 7 step: 174, loss is 0.2294\n", + "epoch: 7 step: 175, loss is 0.3028\n", + "epoch: 7 step: 176, loss is 0.2897\n", + "epoch: 7 step: 177, loss is 0.3320\n", + "epoch: 7 step: 178, loss is 0.4117\n", + "epoch: 7 step: 179, loss is 0.2853\n", + "epoch: 7 step: 180, loss is 0.2863\n", + "epoch: 7 step: 181, loss is 0.2929\n", + "epoch: 7 step: 182, loss is 0.3603\n", + "epoch: 7 step: 183, loss is 0.3064\n", + "epoch: 7 step: 184, loss is 0.3416\n", + "epoch: 7 step: 185, loss is 0.1937\n", + "epoch: 7 step: 186, loss is 0.3261\n", + "epoch: 7 step: 187, loss is 0.4091\n", + "epoch: 7 step: 188, loss is 0.3246\n", + "epoch: 7 step: 189, loss is 0.2380\n", + "epoch: 7 step: 190, loss is 0.3734\n", + "epoch: 7 step: 191, loss is 0.2739\n", + "epoch: 7 step: 192, loss is 0.1707\n", + "epoch: 7 step: 193, loss is 0.2889\n", + "epoch: 7 step: 194, loss is 0.3508\n", + "epoch: 7 step: 195, loss is 0.3550\n", + "epoch: 7 step: 196, loss is 0.3134\n", + "epoch: 7 step: 197, loss is 0.2662\n", + "epoch: 7 step: 198, loss is 0.1943\n", + "epoch: 7 step: 199, loss is 0.2413\n", + "epoch: 7 step: 200, loss is 0.4060\n", + "epoch: 7 step: 201, loss is 0.2927\n", + "epoch: 7 step: 202, loss is 0.4597\n", + "epoch: 7 step: 203, loss is 0.1949\n", + "epoch: 7 step: 204, loss is 0.2847\n", + "epoch: 7 step: 205, loss is 0.2219\n", + "epoch: 7 step: 206, loss is 0.2121\n", + "epoch: 7 step: 207, loss is 0.2721\n", + "epoch: 7 step: 208, loss is 0.3978\n", + "epoch: 7 step: 209, loss is 0.3549\n", + "epoch: 7 step: 210, loss is 0.2148\n", + "epoch: 7 step: 211, loss is 0.3941\n", + "epoch: 7 step: 212, loss is 0.3572\n", + "epoch: 7 step: 213, loss is 0.4223\n", + "epoch: 7 step: 214, loss is 0.3817\n", + "epoch: 7 step: 215, loss is 0.2850\n", + "epoch: 7 step: 216, loss is 0.3105\n", + "epoch: 7 step: 217, loss is 0.2596\n", + "epoch: 7 step: 218, loss is 0.2437\n", + "epoch: 7 step: 219, loss is 0.3108\n", + "epoch: 7 step: 220, loss is 0.2695\n", + "epoch: 7 step: 221, loss is 0.1840\n", + "epoch: 7 step: 222, loss is 0.3094\n", + "epoch: 7 step: 223, loss is 0.3207\n", + "epoch: 7 step: 224, loss is 0.2268\n", + "epoch: 7 step: 225, loss is 0.2396\n", + "epoch: 7 step: 226, loss is 0.1836\n", + "epoch: 7 step: 227, loss is 0.2902\n", + "epoch: 7 step: 228, loss is 0.3813\n", + "epoch: 7 step: 229, loss is 0.2926\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 7/ 10], step: [ 230/ 390], loss: [0.4031], avg loss: [0.3077], time: [104.8095ms]\n", - "Epoch: [ 7/ 10], step: [ 231/ 390], loss: [0.2659], avg loss: [0.3075], time: [106.7090ms]\n", - "Epoch: [ 7/ 10], step: [ 232/ 390], loss: [0.4359], avg loss: [0.3081], time: [105.9318ms]\n", - "Epoch: [ 7/ 10], step: [ 233/ 390], loss: [0.2296], avg loss: [0.3078], time: [104.9607ms]\n", - "Epoch: [ 7/ 10], step: [ 234/ 390], loss: [0.3760], avg loss: [0.3080], time: [104.5735ms]\n", - "Epoch: [ 7/ 10], step: [ 235/ 390], loss: [0.1930], avg loss: [0.3076], time: [106.2450ms]\n", - "Epoch: [ 7/ 10], step: [ 236/ 390], loss: [0.4012], avg loss: [0.3080], time: [105.0429ms]\n", - "Epoch: [ 7/ 10], step: [ 237/ 390], loss: [0.1525], avg loss: [0.3073], time: [103.3261ms]\n", - "Epoch: [ 7/ 10], step: [ 238/ 390], loss: [0.4822], avg loss: [0.3080], time: [105.6840ms]\n", - "Epoch: [ 7/ 10], step: [ 239/ 390], loss: [0.2978], avg loss: [0.3080], time: [108.5942ms]\n", - "Epoch: [ 7/ 10], step: [ 240/ 390], loss: [0.2879], avg loss: [0.3079], time: [105.2735ms]\n", - "Epoch: [ 7/ 10], step: [ 241/ 390], loss: [0.3184], avg loss: [0.3079], time: [103.0774ms]\n", - "Epoch: [ 7/ 10], step: [ 242/ 390], loss: [0.3067], avg loss: [0.3079], time: [108.2737ms]\n", - "Epoch: [ 7/ 10], step: [ 243/ 390], loss: [0.3059], avg loss: [0.3079], time: [105.4873ms]\n", - "Epoch: [ 7/ 10], step: [ 244/ 390], loss: [0.3247], avg loss: [0.3080], time: [107.8405ms]\n", - "Epoch: [ 7/ 10], step: [ 245/ 390], loss: [0.5435], avg loss: [0.3090], time: [104.5954ms]\n", - "Epoch: [ 7/ 10], step: [ 246/ 390], loss: [0.3728], avg loss: [0.3092], time: [105.0222ms]\n", - "Epoch: [ 7/ 10], step: [ 247/ 390], loss: [0.3015], avg loss: [0.3092], time: [104.3298ms]\n", - "Epoch: [ 7/ 10], step: [ 248/ 390], loss: [0.2837], avg loss: [0.3091], time: [105.3295ms]\n", - "Epoch: [ 7/ 10], step: [ 249/ 390], loss: [0.2077], avg loss: [0.3087], time: [104.7432ms]\n", - "Epoch: [ 7/ 10], step: [ 250/ 390], loss: [0.1852], avg loss: [0.3082], time: [107.6858ms]\n", - "Epoch: [ 7/ 10], step: [ 251/ 390], loss: [0.2704], avg loss: [0.3080], time: [105.6550ms]\n", - "Epoch: [ 7/ 10], step: [ 252/ 390], loss: [0.3132], avg loss: [0.3081], time: [105.1145ms]\n", - "Epoch: [ 7/ 10], step: [ 253/ 390], loss: [0.2244], avg loss: [0.3077], time: [108.8803ms]\n", - "Epoch: [ 7/ 10], step: [ 254/ 390], loss: [0.2337], avg loss: [0.3074], time: [104.8040ms]\n", - "Epoch: [ 7/ 10], step: [ 255/ 390], loss: [0.2662], avg loss: [0.3073], time: [105.6545ms]\n", - "Epoch: [ 7/ 10], step: [ 256/ 390], loss: [0.1683], avg loss: [0.3067], time: [105.4611ms]\n", - "Epoch: [ 7/ 10], step: [ 257/ 390], loss: [0.3610], avg loss: [0.3069], time: [106.9396ms]\n", - "Epoch: [ 7/ 10], step: [ 258/ 390], loss: [0.2154], avg loss: [0.3066], time: [105.5369ms]\n", - "Epoch: [ 7/ 10], step: [ 259/ 390], loss: [0.3245], avg loss: [0.3067], time: [105.3894ms]\n", - "Epoch: [ 7/ 10], step: [ 260/ 390], loss: [0.3826], avg loss: [0.3069], time: [107.1212ms]\n", - "Epoch: [ 7/ 10], step: [ 261/ 390], loss: [0.4108], avg loss: [0.3073], time: [106.4579ms]\n", - "Epoch: [ 7/ 10], step: [ 262/ 390], loss: [0.2967], avg loss: [0.3073], time: [107.2414ms]\n", - "Epoch: [ 7/ 10], step: [ 263/ 390], loss: [0.2311], avg loss: [0.3070], time: [103.9524ms]\n", - "Epoch: [ 7/ 10], step: [ 264/ 390], loss: [0.3229], avg loss: [0.3071], time: [104.0058ms]\n", - "Epoch: [ 7/ 10], step: [ 265/ 390], loss: [0.3456], avg loss: [0.3072], time: [105.0808ms]\n", - "Epoch: [ 7/ 10], step: [ 266/ 390], loss: [0.2595], avg loss: [0.3070], time: [108.4282ms]\n", - "Epoch: [ 7/ 10], step: [ 267/ 390], loss: [0.2446], avg loss: [0.3068], time: [106.0038ms]\n", - "Epoch: [ 7/ 10], step: [ 268/ 390], loss: [0.2589], avg loss: [0.3066], time: [104.6507ms]\n", - "Epoch: [ 7/ 10], step: [ 269/ 390], loss: [0.3324], avg loss: [0.3067], time: [102.9410ms]\n", - "Epoch: [ 7/ 10], step: [ 270/ 390], loss: [0.2709], avg loss: [0.3066], time: [105.9430ms]\n", - "Epoch: [ 7/ 10], step: [ 271/ 390], loss: [0.3636], avg loss: [0.3068], time: [104.7845ms]\n", - "Epoch: [ 7/ 10], step: [ 272/ 390], loss: [0.3574], avg loss: [0.3070], time: [108.6175ms]\n", - "Epoch: [ 7/ 10], step: [ 273/ 390], loss: [0.3321], avg loss: [0.3071], time: [104.9302ms]\n", - "Epoch: [ 7/ 10], step: [ 274/ 390], loss: [0.2917], avg loss: [0.3070], time: [108.9404ms]\n", - "Epoch: [ 7/ 10], step: [ 275/ 390], loss: [0.2740], avg loss: [0.3069], time: [105.9654ms]\n", - "Epoch: [ 7/ 10], step: [ 276/ 390], loss: [0.2684], avg loss: [0.3068], time: [103.2541ms]\n", - "Epoch: [ 7/ 10], step: [ 277/ 390], loss: [0.2436], avg loss: [0.3065], time: [102.8297ms]\n", - "Epoch: [ 7/ 10], step: [ 278/ 390], loss: [0.4741], avg loss: [0.3071], time: [106.4193ms]\n", - "Epoch: [ 7/ 10], step: [ 279/ 390], loss: [0.3996], avg loss: [0.3075], time: [107.2798ms]\n", - "Epoch: [ 7/ 10], step: [ 280/ 390], loss: [0.3023], avg loss: [0.3075], time: [105.6099ms]\n", - "Epoch: [ 7/ 10], step: [ 281/ 390], loss: [0.2293], avg loss: [0.3072], time: [105.9947ms]\n", - "Epoch: [ 7/ 10], step: [ 282/ 390], loss: [0.3209], avg loss: [0.3072], time: [104.4874ms]\n", - "Epoch: [ 7/ 10], step: [ 283/ 390], loss: [0.3115], avg loss: [0.3072], time: [105.0091ms]\n", - "Epoch: [ 7/ 10], step: [ 284/ 390], loss: [0.2205], avg loss: [0.3069], time: [104.9547ms]\n", - "Epoch: [ 7/ 10], step: [ 285/ 390], loss: [0.2650], avg loss: [0.3068], time: [106.4470ms]\n", - "Epoch: [ 7/ 10], step: [ 286/ 390], loss: [0.3380], avg loss: [0.3069], time: [105.9015ms]\n", - "Epoch: [ 7/ 10], step: [ 287/ 390], loss: [0.4386], avg loss: [0.3074], time: [108.6755ms]\n", - "Epoch: [ 7/ 10], step: [ 288/ 390], loss: [0.3113], avg loss: [0.3074], time: [106.8857ms]\n", - "Epoch: [ 7/ 10], step: [ 289/ 390], loss: [0.3227], avg loss: [0.3074], time: [105.7277ms]\n", - "Epoch: [ 7/ 10], step: [ 290/ 390], loss: [0.2071], avg loss: [0.3071], time: [105.9880ms]\n", - "Epoch: [ 7/ 10], step: [ 291/ 390], loss: [0.3814], avg loss: [0.3073], time: [105.9330ms]\n", - "Epoch: [ 7/ 10], step: [ 292/ 390], loss: [0.2602], avg loss: [0.3072], time: [108.2239ms]\n", - "Epoch: [ 7/ 10], step: [ 293/ 390], loss: [0.2281], avg loss: [0.3069], time: [103.5922ms]\n", - "Epoch: [ 7/ 10], step: [ 294/ 390], loss: [0.4244], avg loss: [0.3073], time: [108.8440ms]\n", - "Epoch: [ 7/ 10], step: [ 295/ 390], loss: [0.3539], avg loss: [0.3075], time: [105.6216ms]\n", - "Epoch: [ 7/ 10], step: [ 296/ 390], loss: [0.3055], avg loss: [0.3075], time: [103.5883ms]\n", - "Epoch: [ 7/ 10], step: [ 297/ 390], loss: [0.2855], avg loss: [0.3074], time: [103.2639ms]\n", - "Epoch: [ 7/ 10], step: [ 298/ 390], loss: [0.3432], avg loss: [0.3075], time: [106.2555ms]\n", - "Epoch: [ 7/ 10], step: [ 299/ 390], loss: [0.2286], avg loss: [0.3072], time: [110.4863ms]\n", - "Epoch: [ 7/ 10], step: [ 300/ 390], loss: [0.3493], avg loss: [0.3074], time: [103.4355ms]\n", - "Epoch: [ 7/ 10], step: [ 301/ 390], loss: [0.4564], avg loss: [0.3079], time: [108.3748ms]\n", - "Epoch: [ 7/ 10], step: [ 302/ 390], loss: [0.2489], avg loss: [0.3077], time: [104.0170ms]\n", - "Epoch: [ 7/ 10], step: [ 303/ 390], loss: [0.2173], avg loss: [0.3074], time: [106.5309ms]\n", - "Epoch: [ 7/ 10], step: [ 304/ 390], loss: [0.3805], avg loss: [0.3076], time: [104.9554ms]\n", - "Epoch: [ 7/ 10], step: [ 305/ 390], loss: [0.1876], avg loss: [0.3072], time: [104.3003ms]\n", - "Epoch: [ 7/ 10], step: [ 306/ 390], loss: [0.4118], avg loss: [0.3076], time: [104.6221ms]\n", - "Epoch: [ 7/ 10], step: [ 307/ 390], loss: [0.2634], avg loss: [0.3074], time: [105.2783ms]\n", - "Epoch: [ 7/ 10], step: [ 308/ 390], loss: [0.3567], avg loss: [0.3076], time: [104.6295ms]\n", - "Epoch: [ 7/ 10], step: [ 309/ 390], loss: [0.4348], avg loss: [0.3080], time: [108.4967ms]\n", - "Epoch: [ 7/ 10], step: [ 310/ 390], loss: [0.2597], avg loss: [0.3078], time: [105.5329ms]\n", - "Epoch: [ 7/ 10], step: [ 311/ 390], loss: [0.2622], avg loss: [0.3077], time: [108.0813ms]\n", - "Epoch: [ 7/ 10], step: [ 312/ 390], loss: [0.3840], avg loss: [0.3079], time: [107.1780ms]\n", - "Epoch: [ 7/ 10], step: [ 313/ 390], loss: [0.2901], avg loss: [0.3079], time: [105.6113ms]\n", - "Epoch: [ 7/ 10], step: [ 314/ 390], loss: [0.3276], avg loss: [0.3079], time: [108.1054ms]\n", - "Epoch: [ 7/ 10], step: [ 315/ 390], loss: [0.2987], avg loss: [0.3079], time: [108.2742ms]\n", - "Epoch: [ 7/ 10], step: [ 316/ 390], loss: [0.2979], avg loss: [0.3079], time: [105.0766ms]\n", - "Epoch: [ 7/ 10], step: [ 317/ 390], loss: [0.3587], avg loss: [0.3080], time: [106.2484ms]\n", - "Epoch: [ 7/ 10], step: [ 318/ 390], loss: [0.3245], avg loss: [0.3081], time: [107.5473ms]\n" + "epoch: 7 step: 230, loss is 0.4031\n", + "epoch: 7 step: 231, loss is 0.2659\n", + "epoch: 7 step: 232, loss is 0.4359\n", + "epoch: 7 step: 233, loss is 0.2296\n", + "epoch: 7 step: 234, loss is 0.3760\n", + "epoch: 7 step: 235, loss is 0.1930\n", + "epoch: 7 step: 236, loss is 0.4012\n", + "epoch: 7 step: 237, loss is 0.1525\n", + "epoch: 7 step: 238, loss is 0.4822\n", + "epoch: 7 step: 239, loss is 0.2978\n", + "epoch: 7 step: 240, loss is 0.2879\n", + "epoch: 7 step: 241, loss is 0.3184\n", + "epoch: 7 step: 242, loss is 0.3067\n", + "epoch: 7 step: 243, loss is 0.3059\n", + "epoch: 7 step: 244, loss is 0.3247\n", + "epoch: 7 step: 245, loss is 0.5435\n", + "epoch: 7 step: 246, loss is 0.3728\n", + "epoch: 7 step: 247, loss is 0.3015\n", + "epoch: 7 step: 248, loss is 0.2837\n", + "epoch: 7 step: 249, loss is 0.2077\n", + "epoch: 7 step: 250, loss is 0.1852\n", + "epoch: 7 step: 251, loss is 0.2704\n", + "epoch: 7 step: 252, loss is 0.3132\n", + "epoch: 7 step: 253, loss is 0.2244\n", + "epoch: 7 step: 254, loss is 0.2337\n", + "epoch: 7 step: 255, loss is 0.2662\n", + "epoch: 7 step: 256, loss is 0.1683\n", + "epoch: 7 step: 257, loss is 0.3610\n", + "epoch: 7 step: 258, loss is 0.2154\n", + "epoch: 7 step: 259, loss is 0.3245\n", + "epoch: 7 step: 260, loss is 0.3826\n", + "epoch: 7 step: 261, loss is 0.4108\n", + "epoch: 7 step: 262, loss is 0.2967\n", + "epoch: 7 step: 263, loss is 0.2311\n", + "epoch: 7 step: 264, loss is 0.3229\n", + "epoch: 7 step: 265, loss is 0.3456\n", + "epoch: 7 step: 266, loss is 0.2595\n", + "epoch: 7 step: 267, loss is 0.2446\n", + "epoch: 7 step: 268, loss is 0.2589\n", + "epoch: 7 step: 269, loss is 0.3324\n", + "epoch: 7 step: 270, loss is 0.2709\n", + "epoch: 7 step: 271, loss is 0.3636\n", + "epoch: 7 step: 272, loss is 0.3574\n", + "epoch: 7 step: 273, loss is 0.3321\n", + "epoch: 7 step: 274, loss is 0.2917\n", + "epoch: 7 step: 275, loss is 0.2740\n", + "epoch: 7 step: 276, loss is 0.2684\n", + "epoch: 7 step: 277, loss is 0.2436\n", + "epoch: 7 step: 278, loss is 0.4741\n", + "epoch: 7 step: 279, loss is 0.3996\n", + "epoch: 7 step: 280, loss is 0.3023\n", + "epoch: 7 step: 281, loss is 0.2293\n", + "epoch: 7 step: 282, loss is 0.3209\n", + "epoch: 7 step: 283, loss is 0.3115\n", + "epoch: 7 step: 284, loss is 0.2205\n", + "epoch: 7 step: 285, loss is 0.2650\n", + "epoch: 7 step: 286, loss is 0.3380\n", + "epoch: 7 step: 287, loss is 0.4386\n", + "epoch: 7 step: 288, loss is 0.3113\n", + "epoch: 7 step: 289, loss is 0.3227\n", + "epoch: 7 step: 290, loss is 0.2071\n", + "epoch: 7 step: 291, loss is 0.3814\n", + "epoch: 7 step: 292, loss is 0.2602\n", + "epoch: 7 step: 293, loss is 0.2281\n", + "epoch: 7 step: 294, loss is 0.4244\n", + "epoch: 7 step: 295, loss is 0.3539\n", + "epoch: 7 step: 296, loss is 0.3055\n", + "epoch: 7 step: 297, loss is 0.2855\n", + "epoch: 7 step: 298, loss is 0.3432\n", + "epoch: 7 step: 299, loss is 0.2286\n", + "epoch: 7 step: 300, loss is 0.3493\n", + "epoch: 7 step: 301, loss is 0.4564\n", + "epoch: 7 step: 302, loss is 0.2489\n", + "epoch: 7 step: 303, loss is 0.2173\n", + "epoch: 7 step: 304, loss is 0.3805\n", + "epoch: 7 step: 305, loss is 0.1876\n", + "epoch: 7 step: 306, loss is 0.4118\n", + "epoch: 7 step: 307, loss is 0.2634\n", + "epoch: 7 step: 308, loss is 0.3567\n", + "epoch: 7 step: 309, loss is 0.4348\n", + "epoch: 7 step: 310, loss is 0.2597\n", + "epoch: 7 step: 311, loss is 0.2622\n", + "epoch: 7 step: 312, loss is 0.3840\n", + "epoch: 7 step: 313, loss is 0.2901\n", + "epoch: 7 step: 314, loss is 0.3276\n", + "epoch: 7 step: 315, loss is 0.2987\n", + "epoch: 7 step: 316, loss is 0.2979\n", + "epoch: 7 step: 317, loss is 0.3587\n", + "epoch: 7 step: 318, loss is 0.3245\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 7/ 10], step: [ 319/ 390], loss: [0.2874], avg loss: [0.3080], time: [110.2710ms]\n", - "Epoch: [ 7/ 10], step: [ 320/ 390], loss: [0.2773], avg loss: [0.3079], time: [105.9952ms]\n", - "Epoch: [ 7/ 10], step: [ 321/ 390], loss: [0.3119], avg loss: [0.3079], time: [103.2131ms]\n", - "Epoch: [ 7/ 10], step: [ 322/ 390], loss: [0.5180], avg loss: [0.3086], time: [105.6886ms]\n", - "Epoch: [ 7/ 10], step: [ 323/ 390], loss: [0.2819], avg loss: [0.3085], time: [108.1693ms]\n", - "Epoch: [ 7/ 10], step: [ 324/ 390], loss: [0.2582], avg loss: [0.3084], time: [105.5784ms]\n", - "Epoch: [ 7/ 10], step: [ 325/ 390], loss: [0.3137], avg loss: [0.3084], time: [107.8506ms]\n", - "Epoch: [ 7/ 10], step: [ 326/ 390], loss: [0.3719], avg loss: [0.3086], time: [105.4270ms]\n", - "Epoch: [ 7/ 10], step: [ 327/ 390], loss: [0.2965], avg loss: [0.3085], time: [106.1039ms]\n", - "Epoch: [ 7/ 10], step: [ 328/ 390], loss: [0.2923], avg loss: [0.3085], time: [104.4450ms]\n", - "Epoch: [ 7/ 10], step: [ 329/ 390], loss: [0.2939], avg loss: [0.3084], time: [105.1989ms]\n", - "Epoch: [ 7/ 10], step: [ 330/ 390], loss: [0.2711], avg loss: [0.3083], time: [106.4591ms]\n", - "Epoch: [ 7/ 10], step: [ 331/ 390], loss: [0.2564], avg loss: [0.3082], time: [104.8388ms]\n", - "Epoch: [ 7/ 10], step: [ 332/ 390], loss: [0.2319], avg loss: [0.3079], time: [108.9432ms]\n", - "Epoch: [ 7/ 10], step: [ 333/ 390], loss: [0.2975], avg loss: [0.3079], time: [110.3246ms]\n", - "Epoch: [ 7/ 10], step: [ 334/ 390], loss: [0.6099], avg loss: [0.3088], time: [104.0637ms]\n", - "Epoch: [ 7/ 10], step: [ 335/ 390], loss: [0.3109], avg loss: [0.3088], time: [106.6015ms]\n", - "Epoch: [ 7/ 10], step: [ 336/ 390], loss: [0.1355], avg loss: [0.3083], time: [103.8537ms]\n", - "Epoch: [ 7/ 10], step: [ 337/ 390], loss: [0.4506], avg loss: [0.3087], time: [104.6624ms]\n", - "Epoch: [ 7/ 10], step: [ 338/ 390], loss: [0.4515], avg loss: [0.3091], time: [108.9067ms]\n", - "Epoch: [ 7/ 10], step: [ 339/ 390], loss: [0.3207], avg loss: [0.3092], time: [106.6246ms]\n", - "Epoch: [ 7/ 10], step: [ 340/ 390], loss: [0.3045], avg loss: [0.3092], time: [102.9556ms]\n", - "Epoch: [ 7/ 10], step: [ 341/ 390], loss: [0.2666], avg loss: [0.3090], time: [108.9008ms]\n", - "Epoch: [ 7/ 10], step: [ 342/ 390], loss: [0.4119], avg loss: [0.3093], time: [106.6031ms]\n", - "Epoch: [ 7/ 10], step: [ 343/ 390], loss: [0.2923], avg loss: [0.3093], time: [105.3276ms]\n", - "Epoch: [ 7/ 10], step: [ 344/ 390], loss: [0.3069], avg loss: [0.3093], time: [105.5856ms]\n", - "Epoch: [ 7/ 10], step: [ 345/ 390], loss: [0.2237], avg loss: [0.3090], time: [104.8748ms]\n", - "Epoch: [ 7/ 10], step: [ 346/ 390], loss: [0.2427], avg loss: [0.3088], time: [105.4814ms]\n", - "Epoch: [ 7/ 10], step: [ 347/ 390], loss: [0.2578], avg loss: [0.3087], time: [109.9048ms]\n", - "Epoch: [ 7/ 10], step: [ 348/ 390], loss: [0.3885], avg loss: [0.3089], time: [105.2608ms]\n", - "Epoch: [ 7/ 10], step: [ 349/ 390], loss: [0.2785], avg loss: [0.3088], time: [102.2446ms]\n", - "Epoch: [ 7/ 10], step: [ 350/ 390], loss: [0.3561], avg loss: [0.3090], time: [105.5462ms]\n", - "Epoch: [ 7/ 10], step: [ 351/ 390], loss: [0.4515], avg loss: [0.3094], time: [106.4477ms]\n", - "Epoch: [ 7/ 10], step: [ 352/ 390], loss: [0.2931], avg loss: [0.3093], time: [104.6855ms]\n", - "Epoch: [ 7/ 10], step: [ 353/ 390], loss: [0.3824], avg loss: [0.3095], time: [105.3936ms]\n", - "Epoch: [ 7/ 10], step: [ 354/ 390], loss: [0.1658], avg loss: [0.3091], time: [107.2855ms]\n", - "Epoch: [ 7/ 10], step: [ 355/ 390], loss: [0.4529], avg loss: [0.3095], time: [104.5136ms]\n", - "Epoch: [ 7/ 10], step: [ 356/ 390], loss: [0.3766], avg loss: [0.3097], time: [104.5198ms]\n", - "Epoch: [ 7/ 10], step: [ 357/ 390], loss: [0.2821], avg loss: [0.3097], time: [103.3289ms]\n", - "Epoch: [ 7/ 10], step: [ 358/ 390], loss: [0.2354], avg loss: [0.3094], time: [108.2060ms]\n", - "Epoch: [ 7/ 10], step: [ 359/ 390], loss: [0.3754], avg loss: [0.3096], time: [103.9968ms]\n", - "Epoch: [ 7/ 10], step: [ 360/ 390], loss: [0.3338], avg loss: [0.3097], time: [101.7914ms]\n", - "Epoch: [ 7/ 10], step: [ 361/ 390], loss: [0.3404], avg loss: [0.3098], time: [106.6372ms]\n", - "Epoch: [ 7/ 10], step: [ 362/ 390], loss: [0.5074], avg loss: [0.3103], time: [103.7226ms]\n", - "Epoch: [ 7/ 10], step: [ 363/ 390], loss: [0.3289], avg loss: [0.3104], time: [107.6303ms]\n", - "Epoch: [ 7/ 10], step: [ 364/ 390], loss: [0.2627], avg loss: [0.3102], time: [105.5136ms]\n", - "Epoch: [ 7/ 10], step: [ 365/ 390], loss: [0.3471], avg loss: [0.3103], time: [105.0911ms]\n", - "Epoch: [ 7/ 10], step: [ 366/ 390], loss: [0.3044], avg loss: [0.3103], time: [105.0951ms]\n", - "Epoch: [ 7/ 10], step: [ 367/ 390], loss: [0.4036], avg loss: [0.3106], time: [103.2827ms]\n", - "Epoch: [ 7/ 10], step: [ 368/ 390], loss: [0.3972], avg loss: [0.3108], time: [103.8237ms]\n", - "Epoch: [ 7/ 10], step: [ 369/ 390], loss: [0.3652], avg loss: [0.3110], time: [105.3357ms]\n", - "Epoch: [ 7/ 10], step: [ 370/ 390], loss: [0.3068], avg loss: [0.3110], time: [107.9171ms]\n", - "Epoch: [ 7/ 10], step: [ 371/ 390], loss: [0.2776], avg loss: [0.3109], time: [105.5713ms]\n", - "Epoch: [ 7/ 10], step: [ 372/ 390], loss: [0.3689], avg loss: [0.3110], time: [108.3863ms]\n", - "Epoch: [ 7/ 10], step: [ 373/ 390], loss: [0.3331], avg loss: [0.3111], time: [104.6934ms]\n", - "Epoch: [ 7/ 10], step: [ 374/ 390], loss: [0.3642], avg loss: [0.3112], time: [105.8490ms]\n", - "Epoch: [ 7/ 10], step: [ 375/ 390], loss: [0.4690], avg loss: [0.3116], time: [106.0085ms]\n", - "Epoch: [ 7/ 10], step: [ 376/ 390], loss: [0.3052], avg loss: [0.3116], time: [104.5957ms]\n", - "Epoch: [ 7/ 10], step: [ 377/ 390], loss: [0.2689], avg loss: [0.3115], time: [106.7445ms]\n", - "Epoch: [ 7/ 10], step: [ 378/ 390], loss: [0.5337], avg loss: [0.3121], time: [107.4522ms]\n", - "Epoch: [ 7/ 10], step: [ 379/ 390], loss: [0.2856], avg loss: [0.3120], time: [103.2515ms]\n", - "Epoch: [ 7/ 10], step: [ 380/ 390], loss: [0.2056], avg loss: [0.3118], time: [108.4552ms]\n", - "Epoch: [ 7/ 10], step: [ 381/ 390], loss: [0.3496], avg loss: [0.3119], time: [102.8993ms]\n", - "Epoch: [ 7/ 10], step: [ 382/ 390], loss: [0.3747], avg loss: [0.3120], time: [107.6214ms]\n", - "Epoch: [ 7/ 10], step: [ 383/ 390], loss: [0.2499], avg loss: [0.3119], time: [104.1596ms]\n", - "Epoch: [ 7/ 10], step: [ 384/ 390], loss: [0.3007], avg loss: [0.3118], time: [109.2494ms]\n", - "Epoch: [ 7/ 10], step: [ 385/ 390], loss: [0.2983], avg loss: [0.3118], time: [104.3456ms]\n", - "Epoch: [ 7/ 10], step: [ 386/ 390], loss: [0.3484], avg loss: [0.3119], time: [107.7156ms]\n", - "Epoch: [ 7/ 10], step: [ 387/ 390], loss: [0.3087], avg loss: [0.3119], time: [109.5369ms]\n", - "Epoch: [ 7/ 10], step: [ 388/ 390], loss: [0.3337], avg loss: [0.3119], time: [107.5892ms]\n", - "Epoch: [ 7/ 10], step: [ 389/ 390], loss: [0.2782], avg loss: [0.3118], time: [102.8912ms]\n", - "Epoch: [ 7/ 10], step: [ 390/ 390], loss: [0.2050], avg loss: [0.3116], time: [913.0900ms]\n", - "Epoch time: 42366.964, per step time: 108.633\n", + "epoch: 7 step: 319, loss is 0.2874\n", + "epoch: 7 step: 320, loss is 0.2773\n", + "epoch: 7 step: 321, loss is 0.3119\n", + "epoch: 7 step: 322, loss is 0.5180\n", + "epoch: 7 step: 323, loss is 0.2819\n", + "epoch: 7 step: 324, loss is 0.2582\n", + "epoch: 7 step: 325, loss is 0.3137\n", + "epoch: 7 step: 326, loss is 0.3719\n", + "epoch: 7 step: 327, loss is 0.2965\n", + "epoch: 7 step: 328, loss is 0.2923\n", + "epoch: 7 step: 329, loss is 0.2939\n", + "epoch: 7 step: 330, loss is 0.2711\n", + "epoch: 7 step: 331, loss is 0.2564\n", + "epoch: 7 step: 332, loss is 0.2319\n", + "epoch: 7 step: 333, loss is 0.2975\n", + "epoch: 7 step: 334, loss is 0.6099\n", + "epoch: 7 step: 335, loss is 0.3109\n", + "epoch: 7 step: 336, loss is 0.1355\n", + "epoch: 7 step: 337, loss is 0.4506\n", + "epoch: 7 step: 338, loss is 0.4515\n", + "epoch: 7 step: 339, loss is 0.3207\n", + "epoch: 7 step: 340, loss is 0.3045\n", + "epoch: 7 step: 341, loss is 0.2666\n", + "epoch: 7 step: 342, loss is 0.4119\n", + "epoch: 7 step: 343, loss is 0.2923\n", + "epoch: 7 step: 344, loss is 0.3069\n", + "epoch: 7 step: 345, loss is 0.2237\n", + "epoch: 7 step: 346, loss is 0.2427\n", + "epoch: 7 step: 347, loss is 0.2578\n", + "epoch: 7 step: 348, loss is 0.3885\n", + "epoch: 7 step: 349, loss is 0.2785\n", + "epoch: 7 step: 350, loss is 0.3561\n", + "epoch: 7 step: 351, loss is 0.4515\n", + "epoch: 7 step: 352, loss is 0.2931\n", + "epoch: 7 step: 353, loss is 0.3824\n", + "epoch: 7 step: 354, loss is 0.1658\n", + "epoch: 7 step: 355, loss is 0.4529\n", + "epoch: 7 step: 356, loss is 0.3766\n", + "epoch: 7 step: 357, loss is 0.2821\n", + "epoch: 7 step: 358, loss is 0.2354\n", + "epoch: 7 step: 359, loss is 0.3754\n", + "epoch: 7 step: 360, loss is 0.3338\n", + "epoch: 7 step: 361, loss is 0.3404\n", + "epoch: 7 step: 362, loss is 0.5074\n", + "epoch: 7 step: 363, loss is 0.3289\n", + "epoch: 7 step: 364, loss is 0.2627\n", + "epoch: 7 step: 365, loss is 0.3471\n", + "epoch: 7 step: 366, loss is 0.3044\n", + "epoch: 7 step: 367, loss is 0.4036\n", + "epoch: 7 step: 368, loss is 0.3972\n", + "epoch: 7 step: 369, loss is 0.3652\n", + "epoch: 7 step: 370, loss is 0.3068\n", + "epoch: 7 step: 371, loss is 0.2776\n", + "epoch: 7 step: 372, loss is 0.3689\n", + "epoch: 7 step: 373, loss is 0.3331\n", + "epoch: 7 step: 374, loss is 0.3642\n", + "epoch: 7 step: 375, loss is 0.4690\n", + "epoch: 7 step: 376, loss is 0.3052\n", + "epoch: 7 step: 377, loss is 0.2689\n", + "epoch: 7 step: 378, loss is 0.5337\n", + "epoch: 7 step: 379, loss is 0.2856\n", + "epoch: 7 step: 380, loss is 0.2056\n", + "epoch: 7 step: 381, loss is 0.3496\n", + "epoch: 7 step: 382, loss is 0.3747\n", + "epoch: 7 step: 383, loss is 0.2499\n", + "epoch: 7 step: 384, loss is 0.3007\n", + "epoch: 7 step: 385, loss is 0.2983\n", + "epoch: 7 step: 386, loss is 0.3484\n", + "epoch: 7 step: 387, loss is 0.3087\n", + "epoch: 7 step: 388, loss is 0.3337\n", + "epoch: 7 step: 389, loss is 0.2782\n", + "epoch: 7 step: 390, loss is 0.2050\n", "Epoch time: 42367.284, per step time: 108.634, avg loss: 0.312\n", "************************************************************\n", - "Epoch: [ 8/ 10], step: [ 1/ 390], loss: [0.2149], avg loss: [0.2149], time: [99.9627ms]\n", - "Epoch: [ 8/ 10], step: [ 2/ 390], loss: [0.1804], avg loss: [0.1977], time: [103.7598ms]\n", - "Epoch: [ 8/ 10], step: [ 3/ 390], loss: [0.3627], avg loss: [0.2527], time: [101.5825ms]\n", - "Epoch: [ 8/ 10], step: [ 4/ 390], loss: [0.3586], avg loss: [0.2792], time: [102.5608ms]\n", - "Epoch: [ 8/ 10], step: [ 5/ 390], loss: [0.2930], avg loss: [0.2819], time: [103.3924ms]\n", - "Epoch: [ 8/ 10], step: [ 6/ 390], loss: [0.2007], avg loss: [0.2684], time: [103.3638ms]\n", - "Epoch: [ 8/ 10], step: [ 7/ 390], loss: [0.2223], avg loss: [0.2618], time: [106.2517ms]\n", - "Epoch: [ 8/ 10], step: [ 8/ 390], loss: [0.2357], avg loss: [0.2585], time: [102.8740ms]\n", - "Epoch: [ 8/ 10], step: [ 9/ 390], loss: [0.3872], avg loss: [0.2728], time: [104.3794ms]\n", - "Epoch: [ 8/ 10], step: [ 10/ 390], loss: [0.1634], avg loss: [0.2619], time: [104.8412ms]\n", - "Epoch: [ 8/ 10], step: [ 11/ 390], loss: [0.2364], avg loss: [0.2596], time: [104.3904ms]\n", - "Epoch: [ 8/ 10], step: [ 12/ 390], loss: [0.4116], avg loss: [0.2722], time: [103.3373ms]\n", - "Epoch: [ 8/ 10], step: [ 13/ 390], loss: [0.2491], avg loss: [0.2705], time: [105.9339ms]\n", - "Epoch: [ 8/ 10], step: [ 14/ 390], loss: [0.3110], avg loss: [0.2734], time: [104.9607ms]\n", - "Epoch: [ 8/ 10], step: [ 15/ 390], loss: [0.2004], avg loss: [0.2685], time: [102.9387ms]\n" + "epoch: 8 step: 1, loss is 0.2149\n", + "epoch: 8 step: 2, loss is 0.1804\n", + "epoch: 8 step: 3, loss is 0.3627\n", + "epoch: 8 step: 4, loss is 0.3586\n", + "epoch: 8 step: 5, loss is 0.2930\n", + "epoch: 8 step: 6, loss is 0.2007\n", + "epoch: 8 step: 7, loss is 0.2223\n", + "epoch: 8 step: 8, loss is 0.2357\n", + "epoch: 8 step: 9, loss is 0.3872\n", + "epoch: 8 step: 10, loss is 0.1634\n", + "epoch: 8 step: 11, loss is 0.2364\n", + "epoch: 8 step: 12, loss is 0.4116\n", + "epoch: 8 step: 13, loss is 0.2491\n", + "epoch: 8 step: 14, loss is 0.3110\n", + "epoch: 8 step: 15, loss is 0.2004\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 8/ 10], step: [ 16/ 390], loss: [0.2551], avg loss: [0.2677], time: [107.0006ms]\n", - "Epoch: [ 8/ 10], step: [ 17/ 390], loss: [0.3402], avg loss: [0.2719], time: [102.4706ms]\n", - "Epoch: [ 8/ 10], step: [ 18/ 390], loss: [0.2975], avg loss: [0.2733], time: [106.4065ms]\n", - "Epoch: [ 8/ 10], step: [ 19/ 390], loss: [0.2487], avg loss: [0.2720], time: [106.8141ms]\n", - "Epoch: [ 8/ 10], step: [ 20/ 390], loss: [0.2542], avg loss: [0.2712], time: [108.3596ms]\n", - "Epoch: [ 8/ 10], step: [ 21/ 390], loss: [0.2751], avg loss: [0.2713], time: [101.2235ms]\n", - "Epoch: [ 8/ 10], step: [ 22/ 390], loss: [0.3212], avg loss: [0.2736], time: [107.4750ms]\n", - "Epoch: [ 8/ 10], step: [ 23/ 390], loss: [0.2760], avg loss: [0.2737], time: [105.3512ms]\n", - "Epoch: [ 8/ 10], step: [ 24/ 390], loss: [0.1505], avg loss: [0.2686], time: [101.8736ms]\n", - "Epoch: [ 8/ 10], step: [ 25/ 390], loss: [0.2349], avg loss: [0.2672], time: [104.0020ms]\n", - "Epoch: [ 8/ 10], step: [ 26/ 390], loss: [0.1072], avg loss: [0.2611], time: [106.9102ms]\n", - "Epoch: [ 8/ 10], step: [ 27/ 390], loss: [0.3493], avg loss: [0.2643], time: [102.9167ms]\n", - "Epoch: [ 8/ 10], step: [ 28/ 390], loss: [0.1981], avg loss: [0.2620], time: [104.2376ms]\n", - "Epoch: [ 8/ 10], step: [ 29/ 390], loss: [0.2218], avg loss: [0.2606], time: [100.7419ms]\n", - "Epoch: [ 8/ 10], step: [ 30/ 390], loss: [0.2380], avg loss: [0.2598], time: [102.7915ms]\n", - "Epoch: [ 8/ 10], step: [ 31/ 390], loss: [0.2702], avg loss: [0.2602], time: [102.2146ms]\n", - "Epoch: [ 8/ 10], step: [ 32/ 390], loss: [0.2819], avg loss: [0.2609], time: [103.3094ms]\n", - "Epoch: [ 8/ 10], step: [ 33/ 390], loss: [0.3173], avg loss: [0.2626], time: [104.7378ms]\n", - "Epoch: [ 8/ 10], step: [ 34/ 390], loss: [0.2883], avg loss: [0.2633], time: [101.5081ms]\n", - "Epoch: [ 8/ 10], step: [ 35/ 390], loss: [0.3038], avg loss: [0.2645], time: [106.4558ms]\n", - "Epoch: [ 8/ 10], step: [ 36/ 390], loss: [0.3776], avg loss: [0.2676], time: [106.2822ms]\n", - "Epoch: [ 8/ 10], step: [ 37/ 390], loss: [0.3619], avg loss: [0.2702], time: [99.5617ms]\n", - "Epoch: [ 8/ 10], step: [ 38/ 390], loss: [0.3471], avg loss: [0.2722], time: [106.0185ms]\n", - "Epoch: [ 8/ 10], step: [ 39/ 390], loss: [0.2261], avg loss: [0.2710], time: [105.8998ms]\n", - "Epoch: [ 8/ 10], step: [ 40/ 390], loss: [0.2389], avg loss: [0.2702], time: [101.6922ms]\n", - "Epoch: [ 8/ 10], step: [ 41/ 390], loss: [0.2973], avg loss: [0.2709], time: [103.1911ms]\n", - "Epoch: [ 8/ 10], step: [ 42/ 390], loss: [0.3369], avg loss: [0.2724], time: [102.7579ms]\n", - "Epoch: [ 8/ 10], step: [ 43/ 390], loss: [0.5723], avg loss: [0.2794], time: [104.4035ms]\n", - "Epoch: [ 8/ 10], step: [ 44/ 390], loss: [0.3082], avg loss: [0.2801], time: [102.6764ms]\n", - "Epoch: [ 8/ 10], step: [ 45/ 390], loss: [0.3245], avg loss: [0.2811], time: [104.8353ms]\n", - "Epoch: [ 8/ 10], step: [ 46/ 390], loss: [0.3054], avg loss: [0.2816], time: [104.9926ms]\n", - "Epoch: [ 8/ 10], step: [ 47/ 390], loss: [0.2204], avg loss: [0.2803], time: [101.2921ms]\n", - "Epoch: [ 8/ 10], step: [ 48/ 390], loss: [0.4341], avg loss: [0.2835], time: [103.3404ms]\n", - "Epoch: [ 8/ 10], step: [ 49/ 390], loss: [0.2574], avg loss: [0.2830], time: [101.7869ms]\n", - "Epoch: [ 8/ 10], step: [ 50/ 390], loss: [0.3625], avg loss: [0.2845], time: [106.5040ms]\n", - "Epoch: [ 8/ 10], step: [ 51/ 390], loss: [0.3555], avg loss: [0.2859], time: [104.4257ms]\n", - "Epoch: [ 8/ 10], step: [ 52/ 390], loss: [0.2120], avg loss: [0.2845], time: [102.0887ms]\n", - "Epoch: [ 8/ 10], step: [ 53/ 390], loss: [0.2403], avg loss: [0.2837], time: [103.4577ms]\n", - "Epoch: [ 8/ 10], step: [ 54/ 390], loss: [0.2480], avg loss: [0.2830], time: [102.0255ms]\n", - "Epoch: [ 8/ 10], step: [ 55/ 390], loss: [0.4171], avg loss: [0.2855], time: [103.0657ms]\n", - "Epoch: [ 8/ 10], step: [ 56/ 390], loss: [0.3163], avg loss: [0.2860], time: [103.3020ms]\n", - "Epoch: [ 8/ 10], step: [ 57/ 390], loss: [0.3176], avg loss: [0.2866], time: [101.9452ms]\n", - "Epoch: [ 8/ 10], step: [ 58/ 390], loss: [0.2448], avg loss: [0.2858], time: [104.5702ms]\n", - "Epoch: [ 8/ 10], step: [ 59/ 390], loss: [0.3658], avg loss: [0.2872], time: [105.9697ms]\n", - "Epoch: [ 8/ 10], step: [ 60/ 390], loss: [0.3966], avg loss: [0.2890], time: [103.7662ms]\n", - "Epoch: [ 8/ 10], step: [ 61/ 390], loss: [0.3659], avg loss: [0.2903], time: [104.8198ms]\n", - "Epoch: [ 8/ 10], step: [ 62/ 390], loss: [0.2222], avg loss: [0.2892], time: [102.3245ms]\n", - "Epoch: [ 8/ 10], step: [ 63/ 390], loss: [0.3557], avg loss: [0.2902], time: [101.8894ms]\n", - "Epoch: [ 8/ 10], step: [ 64/ 390], loss: [0.2123], avg loss: [0.2890], time: [103.6301ms]\n", - "Epoch: [ 8/ 10], step: [ 65/ 390], loss: [0.2045], avg loss: [0.2877], time: [100.6439ms]\n", - "Epoch: [ 8/ 10], step: [ 66/ 390], loss: [0.2570], avg loss: [0.2873], time: [106.7004ms]\n", - "Epoch: [ 8/ 10], step: [ 67/ 390], loss: [0.2672], avg loss: [0.2870], time: [103.2276ms]\n", - "Epoch: [ 8/ 10], step: [ 68/ 390], loss: [0.1659], avg loss: [0.2852], time: [104.2333ms]\n", - "Epoch: [ 8/ 10], step: [ 69/ 390], loss: [0.2854], avg loss: [0.2852], time: [104.9914ms]\n", - "Epoch: [ 8/ 10], step: [ 70/ 390], loss: [0.2377], avg loss: [0.2845], time: [102.8299ms]\n", - "Epoch: [ 8/ 10], step: [ 71/ 390], loss: [0.2993], avg loss: [0.2847], time: [104.7037ms]\n", - "Epoch: [ 8/ 10], step: [ 72/ 390], loss: [0.2682], avg loss: [0.2845], time: [104.3675ms]\n", - "Epoch: [ 8/ 10], step: [ 73/ 390], loss: [0.1733], avg loss: [0.2830], time: [103.1122ms]\n", - "Epoch: [ 8/ 10], step: [ 74/ 390], loss: [0.2731], avg loss: [0.2828], time: [107.4519ms]\n", - "Epoch: [ 8/ 10], step: [ 75/ 390], loss: [0.2913], avg loss: [0.2829], time: [105.0410ms]\n", - "Epoch: [ 8/ 10], step: [ 76/ 390], loss: [0.1981], avg loss: [0.2818], time: [103.5707ms]\n", - "Epoch: [ 8/ 10], step: [ 77/ 390], loss: [0.2849], avg loss: [0.2819], time: [101.8102ms]\n", - "Epoch: [ 8/ 10], step: [ 78/ 390], loss: [0.3997], avg loss: [0.2834], time: [106.3707ms]\n", - "Epoch: [ 8/ 10], step: [ 79/ 390], loss: [0.2753], avg loss: [0.2833], time: [103.1048ms]\n", - "Epoch: [ 8/ 10], step: [ 80/ 390], loss: [0.3147], avg loss: [0.2837], time: [102.6042ms]\n", - "Epoch: [ 8/ 10], step: [ 81/ 390], loss: [0.3199], avg loss: [0.2841], time: [102.7009ms]\n", - "Epoch: [ 8/ 10], step: [ 82/ 390], loss: [0.2713], avg loss: [0.2840], time: [102.1183ms]\n", - "Epoch: [ 8/ 10], step: [ 83/ 390], loss: [0.2855], avg loss: [0.2840], time: [105.1257ms]\n", - "Epoch: [ 8/ 10], step: [ 84/ 390], loss: [0.2076], avg loss: [0.2831], time: [103.4346ms]\n", - "Epoch: [ 8/ 10], step: [ 85/ 390], loss: [0.3363], avg loss: [0.2837], time: [103.2245ms]\n", - "Epoch: [ 8/ 10], step: [ 86/ 390], loss: [0.3122], avg loss: [0.2840], time: [102.2060ms]\n", - "Epoch: [ 8/ 10], step: [ 87/ 390], loss: [0.2516], avg loss: [0.2837], time: [101.2897ms]\n", - "Epoch: [ 8/ 10], step: [ 88/ 390], loss: [0.2329], avg loss: [0.2831], time: [106.2698ms]\n", - "Epoch: [ 8/ 10], step: [ 89/ 390], loss: [0.2841], avg loss: [0.2831], time: [103.2794ms]\n", - "Epoch: [ 8/ 10], step: [ 90/ 390], loss: [0.2238], avg loss: [0.2824], time: [102.0787ms]\n", - "Epoch: [ 8/ 10], step: [ 91/ 390], loss: [0.2369], avg loss: [0.2819], time: [104.2061ms]\n", - "Epoch: [ 8/ 10], step: [ 92/ 390], loss: [0.2746], avg loss: [0.2819], time: [105.7801ms]\n", - "Epoch: [ 8/ 10], step: [ 93/ 390], loss: [0.3308], avg loss: [0.2824], time: [102.5574ms]\n", - "Epoch: [ 8/ 10], step: [ 94/ 390], loss: [0.3584], avg loss: [0.2832], time: [103.8244ms]\n", - "Epoch: [ 8/ 10], step: [ 95/ 390], loss: [0.3276], avg loss: [0.2837], time: [103.2546ms]\n", - "Epoch: [ 8/ 10], step: [ 96/ 390], loss: [0.3361], avg loss: [0.2842], time: [101.2833ms]\n", - "Epoch: [ 8/ 10], step: [ 97/ 390], loss: [0.2652], avg loss: [0.2840], time: [105.8977ms]\n", - "Epoch: [ 8/ 10], step: [ 98/ 390], loss: [0.2178], avg loss: [0.2833], time: [102.9072ms]\n", - "Epoch: [ 8/ 10], step: [ 99/ 390], loss: [0.2998], avg loss: [0.2835], time: [102.4833ms]\n", - "Epoch: [ 8/ 10], step: [ 100/ 390], loss: [0.2527], avg loss: [0.2832], time: [100.8837ms]\n", - "Epoch: [ 8/ 10], step: [ 101/ 390], loss: [0.3188], avg loss: [0.2835], time: [102.9761ms]\n", - "Epoch: [ 8/ 10], step: [ 102/ 390], loss: [0.2340], avg loss: [0.2831], time: [106.6611ms]\n", - "Epoch: [ 8/ 10], step: [ 103/ 390], loss: [0.1899], avg loss: [0.2821], time: [102.8919ms]\n", - "Epoch: [ 8/ 10], step: [ 104/ 390], loss: [0.3204], avg loss: [0.2825], time: [103.0960ms]\n" + "epoch: 8 step: 16, loss is 0.2551\n", + "epoch: 8 step: 17, loss is 0.3402\n", + "epoch: 8 step: 18, loss is 0.2975\n", + "epoch: 8 step: 19, loss is 0.2487\n", + "epoch: 8 step: 20, loss is 0.2542\n", + "epoch: 8 step: 21, loss is 0.2751\n", + "epoch: 8 step: 22, loss is 0.3212\n", + "epoch: 8 step: 23, loss is 0.2760\n", + "epoch: 8 step: 24, loss is 0.1505\n", + "epoch: 8 step: 25, loss is 0.2349\n", + "epoch: 8 step: 26, loss is 0.1072\n", + "epoch: 8 step: 27, loss is 0.3493\n", + "epoch: 8 step: 28, loss is 0.1981\n", + "epoch: 8 step: 29, loss is 0.2218\n", + "epoch: 8 step: 30, loss is 0.2380\n", + "epoch: 8 step: 31, loss is 0.2702\n", + "epoch: 8 step: 32, loss is 0.2819\n", + "epoch: 8 step: 33, loss is 0.3173\n", + "epoch: 8 step: 34, loss is 0.2883\n", + "epoch: 8 step: 35, loss is 0.3038\n", + "epoch: 8 step: 36, loss is 0.3776\n", + "epoch: 8 step: 37, loss is 0.3619\n", + "epoch: 8 step: 38, loss is 0.3471\n", + "epoch: 8 step: 39, loss is 0.2261\n", + "epoch: 8 step: 40, loss is 0.2389\n", + "epoch: 8 step: 41, loss is 0.2973\n", + "epoch: 8 step: 42, loss is 0.3369\n", + "epoch: 8 step: 43, loss is 0.5723\n", + "epoch: 8 step: 44, loss is 0.3082\n", + "epoch: 8 step: 45, loss is 0.3245\n", + "epoch: 8 step: 46, loss is 0.3054\n", + "epoch: 8 step: 47, loss is 0.2204\n", + "epoch: 8 step: 48, loss is 0.4341\n", + "epoch: 8 step: 49, loss is 0.2574\n", + "epoch: 8 step: 50, loss is 0.3625\n", + "epoch: 8 step: 51, loss is 0.3555\n", + "epoch: 8 step: 52, loss is 0.2120\n", + "epoch: 8 step: 53, loss is 0.2403\n", + "epoch: 8 step: 54, loss is 0.2480\n", + "epoch: 8 step: 55, loss is 0.4171\n", + "epoch: 8 step: 56, loss is 0.3163\n", + "epoch: 8 step: 57, loss is 0.3176\n", + "epoch: 8 step: 58, loss is 0.2448\n", + "epoch: 8 step: 59, loss is 0.3658\n", + "epoch: 8 step: 60, loss is 0.3966\n", + "epoch: 8 step: 61, loss is 0.3659\n", + "epoch: 8 step: 62, loss is 0.2222\n", + "epoch: 8 step: 63, loss is 0.3557\n", + "epoch: 8 step: 64, loss is 0.2123\n", + "epoch: 8 step: 65, loss is 0.2045\n", + "epoch: 8 step: 66, loss is 0.2570\n", + "epoch: 8 step: 67, loss is 0.2672\n", + "epoch: 8 step: 68, loss is 0.1659\n", + "epoch: 8 step: 69, loss is 0.2854\n", + "epoch: 8 step: 70, loss is 0.2377\n", + "epoch: 8 step: 71, loss is 0.2993\n", + "epoch: 8 step: 72, loss is 0.2682\n", + "epoch: 8 step: 73, loss is 0.1733\n", + "epoch: 8 step: 74, loss is 0.2731\n", + "epoch: 8 step: 75, loss is 0.2913\n", + "epoch: 8 step: 76, loss is 0.1981\n", + "epoch: 8 step: 77, loss is 0.2849\n", + "epoch: 8 step: 78, loss is 0.3997\n", + "epoch: 8 step: 79, loss is 0.2753\n", + "epoch: 8 step: 80, loss is 0.3147\n", + "epoch: 8 step: 81, loss is 0.3199\n", + "epoch: 8 step: 82, loss is 0.2713\n", + "epoch: 8 step: 83, loss is 0.2855\n", + "epoch: 8 step: 84, loss is 0.2076\n", + "epoch: 8 step: 85, loss is 0.3363\n", + "epoch: 8 step: 86, loss is 0.3122\n", + "epoch: 8 step: 87, loss is 0.2516\n", + "epoch: 8 step: 88, loss is 0.2329\n", + "epoch: 8 step: 89, loss is 0.2841\n", + "epoch: 8 step: 90, loss is 0.2238\n", + "epoch: 8 step: 91, loss is 0.2369\n", + "epoch: 8 step: 92, loss is 0.2746\n", + "epoch: 8 step: 93, loss is 0.3308\n", + "epoch: 8 step: 94, loss is 0.3584\n", + "epoch: 8 step: 95, loss is 0.3276\n", + "epoch: 8 step: 96, loss is 0.3361\n", + "epoch: 8 step: 97, loss is 0.2652\n", + "epoch: 8 step: 98, loss is 0.2178\n", + "epoch: 8 step: 99, loss is 0.2998\n", + "epoch: 8 step: 100, loss is 0.2527\n", + "epoch: 8 step: 101, loss is 0.3188\n", + "epoch: 8 step: 102, loss is 0.2340\n", + "epoch: 8 step: 103, loss is 0.1899\n", + "epoch: 8 step: 104, loss is 0.3204\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 8/ 10], step: [ 105/ 390], loss: [0.3339], avg loss: [0.2830], time: [101.1257ms]\n", - "Epoch: [ 8/ 10], step: [ 106/ 390], loss: [0.3085], avg loss: [0.2832], time: [104.0373ms]\n", - "Epoch: [ 8/ 10], step: [ 107/ 390], loss: [0.3561], avg loss: [0.2839], time: [104.2287ms]\n", - "Epoch: [ 8/ 10], step: [ 108/ 390], loss: [0.3255], avg loss: [0.2843], time: [104.0325ms]\n", - "Epoch: [ 8/ 10], step: [ 109/ 390], loss: [0.3709], avg loss: [0.2851], time: [103.4937ms]\n", - "Epoch: [ 8/ 10], step: [ 110/ 390], loss: [0.2567], avg loss: [0.2848], time: [101.7263ms]\n", - "Epoch: [ 8/ 10], step: [ 111/ 390], loss: [0.2285], avg loss: [0.2843], time: [103.9937ms]\n", - "Epoch: [ 8/ 10], step: [ 112/ 390], loss: [0.1699], avg loss: [0.2833], time: [105.4158ms]\n", - "Epoch: [ 8/ 10], step: [ 113/ 390], loss: [0.2693], avg loss: [0.2832], time: [105.9487ms]\n", - "Epoch: [ 8/ 10], step: [ 114/ 390], loss: [0.4444], avg loss: [0.2846], time: [104.3928ms]\n", - "Epoch: [ 8/ 10], step: [ 115/ 390], loss: [0.2116], avg loss: [0.2840], time: [106.0302ms]\n", - "Epoch: [ 8/ 10], step: [ 116/ 390], loss: [0.3997], avg loss: [0.2850], time: [101.6543ms]\n", - "Epoch: [ 8/ 10], step: [ 117/ 390], loss: [0.2387], avg loss: [0.2846], time: [106.1738ms]\n", - "Epoch: [ 8/ 10], step: [ 118/ 390], loss: [0.2712], avg loss: [0.2845], time: [103.6386ms]\n", - "Epoch: [ 8/ 10], step: [ 119/ 390], loss: [0.2482], avg loss: [0.2842], time: [103.3423ms]\n", - "Epoch: [ 8/ 10], step: [ 120/ 390], loss: [0.2702], avg loss: [0.2840], time: [104.9221ms]\n", - "Epoch: [ 8/ 10], step: [ 121/ 390], loss: [0.4016], avg loss: [0.2850], time: [103.2031ms]\n", - "Epoch: [ 8/ 10], step: [ 122/ 390], loss: [0.3797], avg loss: [0.2858], time: [106.5831ms]\n", - "Epoch: [ 8/ 10], step: [ 123/ 390], loss: [0.1121], avg loss: [0.2844], time: [102.4332ms]\n", - "Epoch: [ 8/ 10], step: [ 124/ 390], loss: [0.2173], avg loss: [0.2838], time: [103.8256ms]\n", - "Epoch: [ 8/ 10], step: [ 125/ 390], loss: [0.2104], avg loss: [0.2832], time: [102.2441ms]\n", - "Epoch: [ 8/ 10], step: [ 126/ 390], loss: [0.2904], avg loss: [0.2833], time: [102.6425ms]\n", - "Epoch: [ 8/ 10], step: [ 127/ 390], loss: [0.2524], avg loss: [0.2831], time: [100.6939ms]\n", - "Epoch: [ 8/ 10], step: [ 128/ 390], loss: [0.2956], avg loss: [0.2832], time: [104.7201ms]\n", - "Epoch: [ 8/ 10], step: [ 129/ 390], loss: [0.3088], avg loss: [0.2834], time: [104.7549ms]\n", - "Epoch: [ 8/ 10], step: [ 130/ 390], loss: [0.2754], avg loss: [0.2833], time: [101.0370ms]\n", - "Epoch: [ 8/ 10], step: [ 131/ 390], loss: [0.2397], avg loss: [0.2830], time: [105.9654ms]\n", - "Epoch: [ 8/ 10], step: [ 132/ 390], loss: [0.3058], avg loss: [0.2831], time: [104.1484ms]\n", - "Epoch: [ 8/ 10], step: [ 133/ 390], loss: [0.1613], avg loss: [0.2822], time: [105.4857ms]\n", - "Epoch: [ 8/ 10], step: [ 134/ 390], loss: [0.2912], avg loss: [0.2823], time: [102.5901ms]\n", - "Epoch: [ 8/ 10], step: [ 135/ 390], loss: [0.2714], avg loss: [0.2822], time: [105.9830ms]\n", - "Epoch: [ 8/ 10], step: [ 136/ 390], loss: [0.2966], avg loss: [0.2823], time: [107.1360ms]\n", - "Epoch: [ 8/ 10], step: [ 137/ 390], loss: [0.4892], avg loss: [0.2838], time: [101.4531ms]\n", - "Epoch: [ 8/ 10], step: [ 138/ 390], loss: [0.4067], avg loss: [0.2847], time: [104.9294ms]\n", - "Epoch: [ 8/ 10], step: [ 139/ 390], loss: [0.3947], avg loss: [0.2855], time: [103.3685ms]\n", - "Epoch: [ 8/ 10], step: [ 140/ 390], loss: [0.2636], avg loss: [0.2853], time: [101.3122ms]\n", - "Epoch: [ 8/ 10], step: [ 141/ 390], loss: [0.2913], avg loss: [0.2854], time: [104.4834ms]\n", - "Epoch: [ 8/ 10], step: [ 142/ 390], loss: [0.3560], avg loss: [0.2859], time: [105.8872ms]\n", - "Epoch: [ 8/ 10], step: [ 143/ 390], loss: [0.1532], avg loss: [0.2850], time: [102.7212ms]\n", - "Epoch: [ 8/ 10], step: [ 144/ 390], loss: [0.1977], avg loss: [0.2844], time: [108.3438ms]\n", - "Epoch: [ 8/ 10], step: [ 145/ 390], loss: [0.2216], avg loss: [0.2839], time: [101.2526ms]\n", - "Epoch: [ 8/ 10], step: [ 146/ 390], loss: [0.3060], avg loss: [0.2841], time: [107.4364ms]\n", - "Epoch: [ 8/ 10], step: [ 147/ 390], loss: [0.2543], avg loss: [0.2839], time: [105.1431ms]\n", - "Epoch: [ 8/ 10], step: [ 148/ 390], loss: [0.2818], avg loss: [0.2839], time: [102.4642ms]\n", - "Epoch: [ 8/ 10], step: [ 149/ 390], loss: [0.3537], avg loss: [0.2843], time: [104.4877ms]\n", - "Epoch: [ 8/ 10], step: [ 150/ 390], loss: [0.2540], avg loss: [0.2841], time: [106.3020ms]\n", - "Epoch: [ 8/ 10], step: [ 151/ 390], loss: [0.2113], avg loss: [0.2836], time: [101.0654ms]\n", - "Epoch: [ 8/ 10], step: [ 152/ 390], loss: [0.3518], avg loss: [0.2841], time: [100.9429ms]\n", - "Epoch: [ 8/ 10], step: [ 153/ 390], loss: [0.2428], avg loss: [0.2838], time: [105.3364ms]\n", - "Epoch: [ 8/ 10], step: [ 154/ 390], loss: [0.2941], avg loss: [0.2839], time: [102.0465ms]\n", - "Epoch: [ 8/ 10], step: [ 155/ 390], loss: [0.3129], avg loss: [0.2841], time: [104.5511ms]\n", - "Epoch: [ 8/ 10], step: [ 156/ 390], loss: [0.3826], avg loss: [0.2847], time: [103.7295ms]\n", - "Epoch: [ 8/ 10], step: [ 157/ 390], loss: [0.2870], avg loss: [0.2847], time: [103.0314ms]\n", - "Epoch: [ 8/ 10], step: [ 158/ 390], loss: [0.3251], avg loss: [0.2850], time: [102.6587ms]\n", - "Epoch: [ 8/ 10], step: [ 159/ 390], loss: [0.4708], avg loss: [0.2861], time: [101.7122ms]\n", - "Epoch: [ 8/ 10], step: [ 160/ 390], loss: [0.3849], avg loss: [0.2868], time: [102.7229ms]\n", - "Epoch: [ 8/ 10], step: [ 161/ 390], loss: [0.3747], avg loss: [0.2873], time: [101.2404ms]\n", - "Epoch: [ 8/ 10], step: [ 162/ 390], loss: [0.2592], avg loss: [0.2871], time: [103.0819ms]\n", - "Epoch: [ 8/ 10], step: [ 163/ 390], loss: [0.3399], avg loss: [0.2875], time: [103.2174ms]\n", - "Epoch: [ 8/ 10], step: [ 164/ 390], loss: [0.3366], avg loss: [0.2878], time: [101.9282ms]\n", - "Epoch: [ 8/ 10], step: [ 165/ 390], loss: [0.2238], avg loss: [0.2874], time: [102.0501ms]\n", - "Epoch: [ 8/ 10], step: [ 166/ 390], loss: [0.2818], avg loss: [0.2873], time: [102.9756ms]\n", - "Epoch: [ 8/ 10], step: [ 167/ 390], loss: [0.3048], avg loss: [0.2874], time: [100.5049ms]\n", - "Epoch: [ 8/ 10], step: [ 168/ 390], loss: [0.2822], avg loss: [0.2874], time: [104.8827ms]\n", - "Epoch: [ 8/ 10], step: [ 169/ 390], loss: [0.2954], avg loss: [0.2875], time: [106.9095ms]\n", - "Epoch: [ 8/ 10], step: [ 170/ 390], loss: [0.2159], avg loss: [0.2870], time: [104.0988ms]\n", - "Epoch: [ 8/ 10], step: [ 171/ 390], loss: [0.2859], avg loss: [0.2870], time: [103.6556ms]\n", - "Epoch: [ 8/ 10], step: [ 172/ 390], loss: [0.3350], avg loss: [0.2873], time: [106.1785ms]\n", - "Epoch: [ 8/ 10], step: [ 173/ 390], loss: [0.2139], avg loss: [0.2869], time: [104.7947ms]\n", - "Epoch: [ 8/ 10], step: [ 174/ 390], loss: [0.3930], avg loss: [0.2875], time: [104.8133ms]\n", - "Epoch: [ 8/ 10], step: [ 175/ 390], loss: [0.2229], avg loss: [0.2871], time: [102.8888ms]\n", - "Epoch: [ 8/ 10], step: [ 176/ 390], loss: [0.3234], avg loss: [0.2873], time: [107.0349ms]\n", - "Epoch: [ 8/ 10], step: [ 177/ 390], loss: [0.2304], avg loss: [0.2870], time: [100.7838ms]\n", - "Epoch: [ 8/ 10], step: [ 178/ 390], loss: [0.3864], avg loss: [0.2876], time: [105.3085ms]\n", - "Epoch: [ 8/ 10], step: [ 179/ 390], loss: [0.3090], avg loss: [0.2877], time: [104.4328ms]\n", - "Epoch: [ 8/ 10], step: [ 180/ 390], loss: [0.2704], avg loss: [0.2876], time: [103.4174ms]\n", - "Epoch: [ 8/ 10], step: [ 181/ 390], loss: [0.3385], avg loss: [0.2879], time: [104.2037ms]\n", - "Epoch: [ 8/ 10], step: [ 182/ 390], loss: [0.2771], avg loss: [0.2878], time: [104.3608ms]\n", - "Epoch: [ 8/ 10], step: [ 183/ 390], loss: [0.3193], avg loss: [0.2880], time: [105.0370ms]\n", - "Epoch: [ 8/ 10], step: [ 184/ 390], loss: [0.1769], avg loss: [0.2874], time: [102.8898ms]\n", - "Epoch: [ 8/ 10], step: [ 185/ 390], loss: [0.2449], avg loss: [0.2872], time: [103.1065ms]\n", - "Epoch: [ 8/ 10], step: [ 186/ 390], loss: [0.2875], avg loss: [0.2872], time: [107.6005ms]\n", - "Epoch: [ 8/ 10], step: [ 187/ 390], loss: [0.2144], avg loss: [0.2868], time: [102.1121ms]\n", - "Epoch: [ 8/ 10], step: [ 188/ 390], loss: [0.4234], avg loss: [0.2875], time: [105.1552ms]\n", - "Epoch: [ 8/ 10], step: [ 189/ 390], loss: [0.2382], avg loss: [0.2872], time: [105.0382ms]\n", - "Epoch: [ 8/ 10], step: [ 190/ 390], loss: [0.2586], avg loss: [0.2871], time: [105.0529ms]\n", - "Epoch: [ 8/ 10], step: [ 191/ 390], loss: [0.3665], avg loss: [0.2875], time: [101.3770ms]\n", - "Epoch: [ 8/ 10], step: [ 192/ 390], loss: [0.2088], avg loss: [0.2871], time: [105.0966ms]\n", - "Epoch: [ 8/ 10], step: [ 193/ 390], loss: [0.3332], avg loss: [0.2873], time: [104.6309ms]\n" + "epoch: 8 step: 105, loss is 0.3339\n", + "epoch: 8 step: 106, loss is 0.3085\n", + "epoch: 8 step: 107, loss is 0.3561\n", + "epoch: 8 step: 108, loss is 0.3255\n", + "epoch: 8 step: 109, loss is 0.3709\n", + "epoch: 8 step: 110, loss is 0.2567\n", + "epoch: 8 step: 111, loss is 0.2285\n", + "epoch: 8 step: 112, loss is 0.1699\n", + "epoch: 8 step: 113, loss is 0.2693\n", + "epoch: 8 step: 114, loss is 0.4444\n", + "epoch: 8 step: 115, loss is 0.2116\n", + "epoch: 8 step: 116, loss is 0.3997\n", + "epoch: 8 step: 117, loss is 0.2387\n", + "epoch: 8 step: 118, loss is 0.2712\n", + "epoch: 8 step: 119, loss is 0.2482\n", + "epoch: 8 step: 120, loss is 0.2702\n", + "epoch: 8 step: 121, loss is 0.4016\n", + "epoch: 8 step: 122, loss is 0.3797\n", + "epoch: 8 step: 123, loss is 0.1121\n", + "epoch: 8 step: 124, loss is 0.2173\n", + "epoch: 8 step: 125, loss is 0.2104\n", + "epoch: 8 step: 126, loss is 0.2904\n", + "epoch: 8 step: 127, loss is 0.2524\n", + "epoch: 8 step: 128, loss is 0.2956\n", + "epoch: 8 step: 129, loss is 0.3088\n", + "epoch: 8 step: 130, loss is 0.2754\n", + "epoch: 8 step: 131, loss is 0.2397\n", + "epoch: 8 step: 132, loss is 0.3058\n", + "epoch: 8 step: 133, loss is 0.1613\n", + "epoch: 8 step: 134, loss is 0.2912\n", + "epoch: 8 step: 135, loss is 0.2714\n", + "epoch: 8 step: 136, loss is 0.2966\n", + "epoch: 8 step: 137, loss is 0.4892\n", + "epoch: 8 step: 138, loss is 0.4067\n", + "epoch: 8 step: 139, loss is 0.3947\n", + "epoch: 8 step: 140, loss is 0.2636\n", + "epoch: 8 step: 141, loss is 0.2913\n", + "epoch: 8 step: 142, loss is 0.3560\n", + "epoch: 8 step: 143, loss is 0.1532\n", + "epoch: 8 step: 144, loss is 0.1977\n", + "epoch: 8 step: 145, loss is 0.2216\n", + "epoch: 8 step: 146, loss is 0.3060\n", + "epoch: 8 step: 147, loss is 0.2543\n", + "epoch: 8 step: 148, loss is 0.2818\n", + "epoch: 8 step: 149, loss is 0.3537\n", + "epoch: 8 step: 150, loss is 0.2540\n", + "epoch: 8 step: 151, loss is 0.2113\n", + "epoch: 8 step: 152, loss is 0.3518\n", + "epoch: 8 step: 153, loss is 0.2428\n", + "epoch: 8 step: 154, loss is 0.2941\n", + "epoch: 8 step: 155, loss is 0.3129\n", + "epoch: 8 step: 156, loss is 0.3826\n", + "epoch: 8 step: 157, loss is 0.2870\n", + "epoch: 8 step: 158, loss is 0.3251\n", + "epoch: 8 step: 159, loss is 0.4708\n", + "epoch: 8 step: 160, loss is 0.3849\n", + "epoch: 8 step: 161, loss is 0.3747\n", + "epoch: 8 step: 162, loss is 0.2592\n", + "epoch: 8 step: 163, loss is 0.3399\n", + "epoch: 8 step: 164, loss is 0.3366\n", + "epoch: 8 step: 165, loss is 0.2238\n", + "epoch: 8 step: 166, loss is 0.2818\n", + "epoch: 8 step: 167, loss is 0.3048\n", + "epoch: 8 step: 168, loss is 0.2822\n", + "epoch: 8 step: 169, loss is 0.2954\n", + "epoch: 8 step: 170, loss is 0.2159\n", + "epoch: 8 step: 171, loss is 0.2859\n", + "epoch: 8 step: 172, loss is 0.3350\n", + "epoch: 8 step: 173, loss is 0.2139\n", + "epoch: 8 step: 174, loss is 0.3930\n", + "epoch: 8 step: 175, loss is 0.2229\n", + "epoch: 8 step: 176, loss is 0.3234\n", + "epoch: 8 step: 177, loss is 0.2304\n", + "epoch: 8 step: 178, loss is 0.3864\n", + "epoch: 8 step: 179, loss is 0.3090\n", + "epoch: 8 step: 180, loss is 0.2704\n", + "epoch: 8 step: 181, loss is 0.3385\n", + "epoch: 8 step: 182, loss is 0.2771\n", + "epoch: 8 step: 183, loss is 0.3193\n", + "epoch: 8 step: 184, loss is 0.1769\n", + "epoch: 8 step: 185, loss is 0.2449\n", + "epoch: 8 step: 186, loss is 0.2875\n", + "epoch: 8 step: 187, loss is 0.2144\n", + "epoch: 8 step: 188, loss is 0.4234\n", + "epoch: 8 step: 189, loss is 0.2382\n", + "epoch: 8 step: 190, loss is 0.2586\n", + "epoch: 8 step: 191, loss is 0.3665\n", + "epoch: 8 step: 192, loss is 0.2088\n", + "epoch: 8 step: 193, loss is 0.3332\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 8/ 10], step: [ 194/ 390], loss: [0.2501], avg loss: [0.2871], time: [105.4816ms]\n", - "Epoch: [ 8/ 10], step: [ 195/ 390], loss: [0.1891], avg loss: [0.2866], time: [102.4518ms]\n", - "Epoch: [ 8/ 10], step: [ 196/ 390], loss: [0.2274], avg loss: [0.2863], time: [101.9406ms]\n", - "Epoch: [ 8/ 10], step: [ 197/ 390], loss: [0.3215], avg loss: [0.2865], time: [100.8925ms]\n", - "Epoch: [ 8/ 10], step: [ 198/ 390], loss: [0.2382], avg loss: [0.2863], time: [106.7557ms]\n", - "Epoch: [ 8/ 10], step: [ 199/ 390], loss: [0.3136], avg loss: [0.2864], time: [105.3262ms]\n", - "Epoch: [ 8/ 10], step: [ 200/ 390], loss: [0.3687], avg loss: [0.2868], time: [102.4990ms]\n", - "Epoch: [ 8/ 10], step: [ 201/ 390], loss: [0.1899], avg loss: [0.2863], time: [101.1612ms]\n", - "Epoch: [ 8/ 10], step: [ 202/ 390], loss: [0.2513], avg loss: [0.2862], time: [101.4724ms]\n", - "Epoch: [ 8/ 10], step: [ 203/ 390], loss: [0.2842], avg loss: [0.2861], time: [102.1821ms]\n", - "Epoch: [ 8/ 10], step: [ 204/ 390], loss: [0.2917], avg loss: [0.2862], time: [106.1144ms]\n", - "Epoch: [ 8/ 10], step: [ 205/ 390], loss: [0.2588], avg loss: [0.2860], time: [106.8852ms]\n", - "Epoch: [ 8/ 10], step: [ 206/ 390], loss: [0.3324], avg loss: [0.2863], time: [106.4188ms]\n", - "Epoch: [ 8/ 10], step: [ 207/ 390], loss: [0.3042], avg loss: [0.2864], time: [102.8409ms]\n", - "Epoch: [ 8/ 10], step: [ 208/ 390], loss: [0.2606], avg loss: [0.2862], time: [103.3425ms]\n", - "Epoch: [ 8/ 10], step: [ 209/ 390], loss: [0.3536], avg loss: [0.2865], time: [105.6108ms]\n", - "Epoch: [ 8/ 10], step: [ 210/ 390], loss: [0.4595], avg loss: [0.2874], time: [102.4525ms]\n", - "Epoch: [ 8/ 10], step: [ 211/ 390], loss: [0.2538], avg loss: [0.2872], time: [100.4620ms]\n", - "Epoch: [ 8/ 10], step: [ 212/ 390], loss: [0.3812], avg loss: [0.2877], time: [102.3226ms]\n", - "Epoch: [ 8/ 10], step: [ 213/ 390], loss: [0.1679], avg loss: [0.2871], time: [101.3007ms]\n", - "Epoch: [ 8/ 10], step: [ 214/ 390], loss: [0.1868], avg loss: [0.2866], time: [102.5825ms]\n", - "Epoch: [ 8/ 10], step: [ 215/ 390], loss: [0.4198], avg loss: [0.2872], time: [101.8732ms]\n", - "Epoch: [ 8/ 10], step: [ 216/ 390], loss: [0.3415], avg loss: [0.2875], time: [104.1446ms]\n", - "Epoch: [ 8/ 10], step: [ 217/ 390], loss: [0.2309], avg loss: [0.2872], time: [104.2297ms]\n", - "Epoch: [ 8/ 10], step: [ 218/ 390], loss: [0.3316], avg loss: [0.2874], time: [103.6499ms]\n", - "Epoch: [ 8/ 10], step: [ 219/ 390], loss: [0.3680], avg loss: [0.2878], time: [103.9608ms]\n", - "Epoch: [ 8/ 10], step: [ 220/ 390], loss: [0.2453], avg loss: [0.2876], time: [103.8964ms]\n", - "Epoch: [ 8/ 10], step: [ 221/ 390], loss: [0.4186], avg loss: [0.2882], time: [103.5337ms]\n", - "Epoch: [ 8/ 10], step: [ 222/ 390], loss: [0.2608], avg loss: [0.2881], time: [105.7644ms]\n", - "Epoch: [ 8/ 10], step: [ 223/ 390], loss: [0.3379], avg loss: [0.2883], time: [101.0270ms]\n", - "Epoch: [ 8/ 10], step: [ 224/ 390], loss: [0.2239], avg loss: [0.2880], time: [104.9690ms]\n", - "Epoch: [ 8/ 10], step: [ 225/ 390], loss: [0.3269], avg loss: [0.2882], time: [103.5392ms]\n", - "Epoch: [ 8/ 10], step: [ 226/ 390], loss: [0.1936], avg loss: [0.2878], time: [103.5523ms]\n", - "Epoch: [ 8/ 10], step: [ 227/ 390], loss: [0.2899], avg loss: [0.2878], time: [102.5474ms]\n", - "Epoch: [ 8/ 10], step: [ 228/ 390], loss: [0.2795], avg loss: [0.2877], time: [101.8667ms]\n", - "Epoch: [ 8/ 10], step: [ 229/ 390], loss: [0.2784], avg loss: [0.2877], time: [101.3720ms]\n", - "Epoch: [ 8/ 10], step: [ 230/ 390], loss: [0.3530], avg loss: [0.2880], time: [107.1603ms]\n", - "Epoch: [ 8/ 10], step: [ 231/ 390], loss: [0.2883], avg loss: [0.2880], time: [105.8881ms]\n", - "Epoch: [ 8/ 10], step: [ 232/ 390], loss: [0.3957], avg loss: [0.2885], time: [102.8709ms]\n", - "Epoch: [ 8/ 10], step: [ 233/ 390], loss: [0.1569], avg loss: [0.2879], time: [104.8172ms]\n", - "Epoch: [ 8/ 10], step: [ 234/ 390], loss: [0.3854], avg loss: [0.2883], time: [105.8090ms]\n", - "Epoch: [ 8/ 10], step: [ 235/ 390], loss: [0.2987], avg loss: [0.2884], time: [105.6798ms]\n", - "Epoch: [ 8/ 10], step: [ 236/ 390], loss: [0.4343], avg loss: [0.2890], time: [107.0321ms]\n", - "Epoch: [ 8/ 10], step: [ 237/ 390], loss: [0.2411], avg loss: [0.2888], time: [106.8735ms]\n", - "Epoch: [ 8/ 10], step: [ 238/ 390], loss: [0.2459], avg loss: [0.2886], time: [101.7492ms]\n", - "Epoch: [ 8/ 10], step: [ 239/ 390], loss: [0.3338], avg loss: [0.2888], time: [102.7682ms]\n", - "Epoch: [ 8/ 10], step: [ 240/ 390], loss: [0.3082], avg loss: [0.2889], time: [105.4368ms]\n", - "Epoch: [ 8/ 10], step: [ 241/ 390], loss: [0.2265], avg loss: [0.2886], time: [99.3347ms]\n", - "Epoch: [ 8/ 10], step: [ 242/ 390], loss: [0.2507], avg loss: [0.2884], time: [104.3801ms]\n", - "Epoch: [ 8/ 10], step: [ 243/ 390], loss: [0.3032], avg loss: [0.2885], time: [102.6051ms]\n", - "Epoch: [ 8/ 10], step: [ 244/ 390], loss: [0.3334], avg loss: [0.2887], time: [102.5877ms]\n", - "Epoch: [ 8/ 10], step: [ 245/ 390], loss: [0.4204], avg loss: [0.2892], time: [102.8838ms]\n", - "Epoch: [ 8/ 10], step: [ 246/ 390], loss: [0.2962], avg loss: [0.2893], time: [108.4619ms]\n", - "Epoch: [ 8/ 10], step: [ 247/ 390], loss: [0.3268], avg loss: [0.2894], time: [104.6379ms]\n", - "Epoch: [ 8/ 10], step: [ 248/ 390], loss: [0.3063], avg loss: [0.2895], time: [106.3936ms]\n", - "Epoch: [ 8/ 10], step: [ 249/ 390], loss: [0.2344], avg loss: [0.2893], time: [103.0197ms]\n", - "Epoch: [ 8/ 10], step: [ 250/ 390], loss: [0.3675], avg loss: [0.2896], time: [103.4546ms]\n", - "Epoch: [ 8/ 10], step: [ 251/ 390], loss: [0.2744], avg loss: [0.2895], time: [103.1058ms]\n", - "Epoch: [ 8/ 10], step: [ 252/ 390], loss: [0.4469], avg loss: [0.2901], time: [106.5910ms]\n", - "Epoch: [ 8/ 10], step: [ 253/ 390], loss: [0.3931], avg loss: [0.2905], time: [105.9005ms]\n", - "Epoch: [ 8/ 10], step: [ 254/ 390], loss: [0.2097], avg loss: [0.2902], time: [106.3991ms]\n", - "Epoch: [ 8/ 10], step: [ 255/ 390], loss: [0.2915], avg loss: [0.2902], time: [101.1448ms]\n", - "Epoch: [ 8/ 10], step: [ 256/ 390], loss: [0.2605], avg loss: [0.2901], time: [103.9233ms]\n", - "Epoch: [ 8/ 10], step: [ 257/ 390], loss: [0.1835], avg loss: [0.2897], time: [104.6898ms]\n", - "Epoch: [ 8/ 10], step: [ 258/ 390], loss: [0.3082], avg loss: [0.2898], time: [107.6202ms]\n", - "Epoch: [ 8/ 10], step: [ 259/ 390], loss: [0.1538], avg loss: [0.2892], time: [102.6068ms]\n", - "Epoch: [ 8/ 10], step: [ 260/ 390], loss: [0.2970], avg loss: [0.2893], time: [108.2559ms]\n", - "Epoch: [ 8/ 10], step: [ 261/ 390], loss: [0.2292], avg loss: [0.2890], time: [102.9320ms]\n", - "Epoch: [ 8/ 10], step: [ 262/ 390], loss: [0.2763], avg loss: [0.2890], time: [105.5338ms]\n", - "Epoch: [ 8/ 10], step: [ 263/ 390], loss: [0.4960], avg loss: [0.2898], time: [100.9338ms]\n", - "Epoch: [ 8/ 10], step: [ 264/ 390], loss: [0.3799], avg loss: [0.2901], time: [105.1140ms]\n", - "Epoch: [ 8/ 10], step: [ 265/ 390], loss: [0.3887], avg loss: [0.2905], time: [104.0342ms]\n", - "Epoch: [ 8/ 10], step: [ 266/ 390], loss: [0.2376], avg loss: [0.2903], time: [107.4958ms]\n", - "Epoch: [ 8/ 10], step: [ 267/ 390], loss: [0.2944], avg loss: [0.2903], time: [101.5563ms]\n", - "Epoch: [ 8/ 10], step: [ 268/ 390], loss: [0.2557], avg loss: [0.2902], time: [101.6114ms]\n", - "Epoch: [ 8/ 10], step: [ 269/ 390], loss: [0.3924], avg loss: [0.2906], time: [106.6966ms]\n", - "Epoch: [ 8/ 10], step: [ 270/ 390], loss: [0.2742], avg loss: [0.2905], time: [102.8039ms]\n", - "Epoch: [ 8/ 10], step: [ 271/ 390], loss: [0.3677], avg loss: [0.2908], time: [104.4290ms]\n", - "Epoch: [ 8/ 10], step: [ 272/ 390], loss: [0.3184], avg loss: [0.2909], time: [104.7673ms]\n", - "Epoch: [ 8/ 10], step: [ 273/ 390], loss: [0.2249], avg loss: [0.2906], time: [103.0540ms]\n", - "Epoch: [ 8/ 10], step: [ 274/ 390], loss: [0.3460], avg loss: [0.2908], time: [104.6810ms]\n", - "Epoch: [ 8/ 10], step: [ 275/ 390], loss: [0.2943], avg loss: [0.2909], time: [101.3746ms]\n", - "Epoch: [ 8/ 10], step: [ 276/ 390], loss: [0.3249], avg loss: [0.2910], time: [102.2553ms]\n", - "Epoch: [ 8/ 10], step: [ 277/ 390], loss: [0.3228], avg loss: [0.2911], time: [104.1923ms]\n", - "Epoch: [ 8/ 10], step: [ 278/ 390], loss: [0.1978], avg loss: [0.2908], time: [102.0288ms]\n", - "Epoch: [ 8/ 10], step: [ 279/ 390], loss: [0.2511], avg loss: [0.2906], time: [100.1449ms]\n", - "Epoch: [ 8/ 10], step: [ 280/ 390], loss: [0.2804], avg loss: [0.2906], time: [105.2427ms]\n", - "Epoch: [ 8/ 10], step: [ 281/ 390], loss: [0.2771], avg loss: [0.2905], time: [105.3586ms]\n", - "Epoch: [ 8/ 10], step: [ 282/ 390], loss: [0.2485], avg loss: [0.2904], time: [101.2533ms]\n" + "epoch: 8 step: 194, loss is 0.2501\n", + "epoch: 8 step: 195, loss is 0.1891\n", + "epoch: 8 step: 196, loss is 0.2274\n", + "epoch: 8 step: 197, loss is 0.3215\n", + "epoch: 8 step: 198, loss is 0.2382\n", + "epoch: 8 step: 199, loss is 0.3136\n", + "epoch: 8 step: 200, loss is 0.3687\n", + "epoch: 8 step: 201, loss is 0.1899\n", + "epoch: 8 step: 202, loss is 0.2513\n", + "epoch: 8 step: 203, loss is 0.2842\n", + "epoch: 8 step: 204, loss is 0.2917\n", + "epoch: 8 step: 205, loss is 0.2588\n", + "epoch: 8 step: 206, loss is 0.3324\n", + "epoch: 8 step: 207, loss is 0.3042\n", + "epoch: 8 step: 208, loss is 0.2606\n", + "epoch: 8 step: 209, loss is 0.3536\n", + "epoch: 8 step: 210, loss is 0.4595\n", + "epoch: 8 step: 211, loss is 0.2538\n", + "epoch: 8 step: 212, loss is 0.3812\n", + "epoch: 8 step: 213, loss is 0.1679\n", + "epoch: 8 step: 214, loss is 0.1868\n", + "epoch: 8 step: 215, loss is 0.4198\n", + "epoch: 8 step: 216, loss is 0.3415\n", + "epoch: 8 step: 217, loss is 0.2309\n", + "epoch: 8 step: 218, loss is 0.3316\n", + "epoch: 8 step: 219, loss is 0.3680\n", + "epoch: 8 step: 220, loss is 0.2453\n", + "epoch: 8 step: 221, loss is 0.4186\n", + "epoch: 8 step: 222, loss is 0.2608\n", + "epoch: 8 step: 223, loss is 0.3379\n", + "epoch: 8 step: 224, loss is 0.2239\n", + "epoch: 8 step: 225, loss is 0.3269\n", + "epoch: 8 step: 226, loss is 0.1936\n", + "epoch: 8 step: 227, loss is 0.2899\n", + "epoch: 8 step: 228, loss is 0.2795\n", + "epoch: 8 step: 229, loss is 0.2784\n", + "epoch: 8 step: 230, loss is 0.3530\n", + "epoch: 8 step: 231, loss is 0.2883\n", + "epoch: 8 step: 232, loss is 0.3957\n", + "epoch: 8 step: 233, loss is 0.1569\n", + "epoch: 8 step: 234, loss is 0.3854\n", + "epoch: 8 step: 235, loss is 0.2987\n", + "epoch: 8 step: 236, loss is 0.4343\n", + "epoch: 8 step: 237, loss is 0.2411\n", + "epoch: 8 step: 238, loss is 0.2459\n", + "epoch: 8 step: 239, loss is 0.3338\n", + "epoch: 8 step: 240, loss is 0.3082\n", + "epoch: 8 step: 241, loss is 0.2265\n", + "epoch: 8 step: 242, loss is 0.2507\n", + "epoch: 8 step: 243, loss is 0.3032\n", + "epoch: 8 step: 244, loss is 0.3334\n", + "epoch: 8 step: 245, loss is 0.4204\n", + "epoch: 8 step: 246, loss is 0.2962\n", + "epoch: 8 step: 247, loss is 0.3268\n", + "epoch: 8 step: 248, loss is 0.3063\n", + "epoch: 8 step: 249, loss is 0.2344\n", + "epoch: 8 step: 250, loss is 0.3675\n", + "epoch: 8 step: 251, loss is 0.2744\n", + "epoch: 8 step: 252, loss is 0.4469\n", + "epoch: 8 step: 253, loss is 0.3931\n", + "epoch: 8 step: 254, loss is 0.2097\n", + "epoch: 8 step: 255, loss is 0.2915\n", + "epoch: 8 step: 256, loss is 0.2605\n", + "epoch: 8 step: 257, loss is 0.1835\n", + "epoch: 8 step: 258, loss is 0.3082\n", + "epoch: 8 step: 259, loss is 0.1538\n", + "epoch: 8 step: 260, loss is 0.2970\n", + "epoch: 8 step: 261, loss is 0.2292\n", + "epoch: 8 step: 262, loss is 0.2763\n", + "epoch: 8 step: 263, loss is 0.4960\n", + "epoch: 8 step: 264, loss is 0.3799\n", + "epoch: 8 step: 265, loss is 0.3887\n", + "epoch: 8 step: 266, loss is 0.2376\n", + "epoch: 8 step: 267, loss is 0.2944\n", + "epoch: 8 step: 268, loss is 0.2557\n", + "epoch: 8 step: 269, loss is 0.3924\n", + "epoch: 8 step: 270, loss is 0.2742\n", + "epoch: 8 step: 271, loss is 0.3677\n", + "epoch: 8 step: 272, loss is 0.3184\n", + "epoch: 8 step: 273, loss is 0.2249\n", + "epoch: 8 step: 274, loss is 0.3460\n", + "epoch: 8 step: 275, loss is 0.2943\n", + "epoch: 8 step: 276, loss is 0.3249\n", + "epoch: 8 step: 277, loss is 0.3228\n", + "epoch: 8 step: 278, loss is 0.1978\n", + "epoch: 8 step: 279, loss is 0.2511\n", + "epoch: 8 step: 280, loss is 0.2804\n", + "epoch: 8 step: 281, loss is 0.2771\n", + "epoch: 8 step: 282, loss is 0.2485\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 8/ 10], step: [ 283/ 390], loss: [0.3052], avg loss: [0.2904], time: [100.7648ms]\n", - "Epoch: [ 8/ 10], step: [ 284/ 390], loss: [0.3046], avg loss: [0.2905], time: [107.3947ms]\n", - "Epoch: [ 8/ 10], step: [ 285/ 390], loss: [0.3282], avg loss: [0.2906], time: [102.7765ms]\n", - "Epoch: [ 8/ 10], step: [ 286/ 390], loss: [0.2687], avg loss: [0.2905], time: [108.1564ms]\n", - "Epoch: [ 8/ 10], step: [ 287/ 390], loss: [0.2085], avg loss: [0.2903], time: [102.8466ms]\n", - "Epoch: [ 8/ 10], step: [ 288/ 390], loss: [0.2500], avg loss: [0.2901], time: [107.4021ms]\n", - "Epoch: [ 8/ 10], step: [ 289/ 390], loss: [0.2477], avg loss: [0.2900], time: [105.6416ms]\n", - "Epoch: [ 8/ 10], step: [ 290/ 390], loss: [0.1799], avg loss: [0.2896], time: [103.6391ms]\n", - "Epoch: [ 8/ 10], step: [ 291/ 390], loss: [0.3890], avg loss: [0.2899], time: [103.8487ms]\n", - "Epoch: [ 8/ 10], step: [ 292/ 390], loss: [0.2363], avg loss: [0.2897], time: [101.7900ms]\n", - "Epoch: [ 8/ 10], step: [ 293/ 390], loss: [0.3996], avg loss: [0.2901], time: [106.0460ms]\n", - "Epoch: [ 8/ 10], step: [ 294/ 390], loss: [0.3036], avg loss: [0.2902], time: [102.5286ms]\n", - "Epoch: [ 8/ 10], step: [ 295/ 390], loss: [0.3625], avg loss: [0.2904], time: [106.6227ms]\n", - "Epoch: [ 8/ 10], step: [ 296/ 390], loss: [0.3306], avg loss: [0.2906], time: [100.7111ms]\n", - "Epoch: [ 8/ 10], step: [ 297/ 390], loss: [0.2989], avg loss: [0.2906], time: [102.4008ms]\n", - "Epoch: [ 8/ 10], step: [ 298/ 390], loss: [0.3709], avg loss: [0.2908], time: [106.6396ms]\n", - "Epoch: [ 8/ 10], step: [ 299/ 390], loss: [0.4077], avg loss: [0.2912], time: [103.5302ms]\n", - "Epoch: [ 8/ 10], step: [ 300/ 390], loss: [0.3659], avg loss: [0.2915], time: [103.3807ms]\n", - "Epoch: [ 8/ 10], step: [ 301/ 390], loss: [0.3173], avg loss: [0.2916], time: [104.9225ms]\n", - "Epoch: [ 8/ 10], step: [ 302/ 390], loss: [0.2164], avg loss: [0.2913], time: [107.4843ms]\n", - "Epoch: [ 8/ 10], step: [ 303/ 390], loss: [0.2811], avg loss: [0.2913], time: [105.4397ms]\n", - "Epoch: [ 8/ 10], step: [ 304/ 390], loss: [0.2248], avg loss: [0.2911], time: [106.0581ms]\n", - "Epoch: [ 8/ 10], step: [ 305/ 390], loss: [0.3226], avg loss: [0.2912], time: [101.0187ms]\n", - "Epoch: [ 8/ 10], step: [ 306/ 390], loss: [0.4554], avg loss: [0.2917], time: [105.2384ms]\n", - "Epoch: [ 8/ 10], step: [ 307/ 390], loss: [0.2045], avg loss: [0.2914], time: [101.7089ms]\n", - "Epoch: [ 8/ 10], step: [ 308/ 390], loss: [0.2654], avg loss: [0.2913], time: [101.4795ms]\n", - "Epoch: [ 8/ 10], step: [ 309/ 390], loss: [0.3877], avg loss: [0.2917], time: [102.4687ms]\n", - "Epoch: [ 8/ 10], step: [ 310/ 390], loss: [0.3128], avg loss: [0.2917], time: [107.8303ms]\n", - "Epoch: [ 8/ 10], step: [ 311/ 390], loss: [0.3225], avg loss: [0.2918], time: [106.2863ms]\n", - "Epoch: [ 8/ 10], step: [ 312/ 390], loss: [0.2464], avg loss: [0.2917], time: [99.4079ms]\n", - "Epoch: [ 8/ 10], step: [ 313/ 390], loss: [0.2058], avg loss: [0.2914], time: [104.8460ms]\n", - "Epoch: [ 8/ 10], step: [ 314/ 390], loss: [0.2562], avg loss: [0.2913], time: [102.0980ms]\n", - "Epoch: [ 8/ 10], step: [ 315/ 390], loss: [0.2906], avg loss: [0.2913], time: [102.4487ms]\n", - "Epoch: [ 8/ 10], step: [ 316/ 390], loss: [0.2278], avg loss: [0.2911], time: [102.9611ms]\n", - "Epoch: [ 8/ 10], step: [ 317/ 390], loss: [0.5644], avg loss: [0.2919], time: [103.7703ms]\n", - "Epoch: [ 8/ 10], step: [ 318/ 390], loss: [0.2196], avg loss: [0.2917], time: [106.2648ms]\n", - "Epoch: [ 8/ 10], step: [ 319/ 390], loss: [0.2686], avg loss: [0.2916], time: [104.3472ms]\n", - "Epoch: [ 8/ 10], step: [ 320/ 390], loss: [0.4012], avg loss: [0.2920], time: [106.7402ms]\n", - "Epoch: [ 8/ 10], step: [ 321/ 390], loss: [0.3391], avg loss: [0.2921], time: [102.1755ms]\n", - "Epoch: [ 8/ 10], step: [ 322/ 390], loss: [0.2743], avg loss: [0.2921], time: [102.1249ms]\n", - "Epoch: [ 8/ 10], step: [ 323/ 390], loss: [0.4422], avg loss: [0.2925], time: [105.3572ms]\n", - "Epoch: [ 8/ 10], step: [ 324/ 390], loss: [0.3312], avg loss: [0.2927], time: [101.5308ms]\n", - "Epoch: [ 8/ 10], step: [ 325/ 390], loss: [0.4168], avg loss: [0.2930], time: [101.8789ms]\n", - "Epoch: [ 8/ 10], step: [ 326/ 390], loss: [0.2627], avg loss: [0.2930], time: [102.4492ms]\n", - "Epoch: [ 8/ 10], step: [ 327/ 390], loss: [0.3838], avg loss: [0.2932], time: [104.4939ms]\n", - "Epoch: [ 8/ 10], step: [ 328/ 390], loss: [0.3179], avg loss: [0.2933], time: [99.9658ms]\n", - "Epoch: [ 8/ 10], step: [ 329/ 390], loss: [0.3666], avg loss: [0.2935], time: [102.0112ms]\n", - "Epoch: [ 8/ 10], step: [ 330/ 390], loss: [0.3488], avg loss: [0.2937], time: [102.6850ms]\n", - "Epoch: [ 8/ 10], step: [ 331/ 390], loss: [0.2525], avg loss: [0.2936], time: [104.0716ms]\n", - "Epoch: [ 8/ 10], step: [ 332/ 390], loss: [0.2915], avg loss: [0.2936], time: [102.4113ms]\n", - "Epoch: [ 8/ 10], step: [ 333/ 390], loss: [0.2774], avg loss: [0.2935], time: [105.7770ms]\n", - "Epoch: [ 8/ 10], step: [ 334/ 390], loss: [0.2881], avg loss: [0.2935], time: [101.4314ms]\n", - "Epoch: [ 8/ 10], step: [ 335/ 390], loss: [0.3295], avg loss: [0.2936], time: [104.5465ms]\n", - "Epoch: [ 8/ 10], step: [ 336/ 390], loss: [0.2187], avg loss: [0.2934], time: [104.4781ms]\n", - "Epoch: [ 8/ 10], step: [ 337/ 390], loss: [0.2379], avg loss: [0.2932], time: [103.0433ms]\n", - "Epoch: [ 8/ 10], step: [ 338/ 390], loss: [0.3931], avg loss: [0.2935], time: [105.2401ms]\n", - "Epoch: [ 8/ 10], step: [ 339/ 390], loss: [0.2094], avg loss: [0.2933], time: [104.9283ms]\n", - "Epoch: [ 8/ 10], step: [ 340/ 390], loss: [0.2684], avg loss: [0.2932], time: [106.2303ms]\n", - "Epoch: [ 8/ 10], step: [ 341/ 390], loss: [0.3613], avg loss: [0.2934], time: [105.0959ms]\n", - "Epoch: [ 8/ 10], step: [ 342/ 390], loss: [0.2116], avg loss: [0.2932], time: [104.8949ms]\n", - "Epoch: [ 8/ 10], step: [ 343/ 390], loss: [0.4666], avg loss: [0.2937], time: [102.3049ms]\n", - "Epoch: [ 8/ 10], step: [ 344/ 390], loss: [0.2186], avg loss: [0.2934], time: [104.3375ms]\n", - "Epoch: [ 8/ 10], step: [ 345/ 390], loss: [0.3330], avg loss: [0.2936], time: [103.9658ms]\n", - "Epoch: [ 8/ 10], step: [ 346/ 390], loss: [0.2798], avg loss: [0.2935], time: [100.8401ms]\n", - "Epoch: [ 8/ 10], step: [ 347/ 390], loss: [0.1680], avg loss: [0.2932], time: [104.0971ms]\n", - "Epoch: [ 8/ 10], step: [ 348/ 390], loss: [0.2947], avg loss: [0.2932], time: [103.9884ms]\n", - "Epoch: [ 8/ 10], step: [ 349/ 390], loss: [0.1921], avg loss: [0.2929], time: [100.9669ms]\n", - "Epoch: [ 8/ 10], step: [ 350/ 390], loss: [0.2572], avg loss: [0.2928], time: [102.8190ms]\n", - "Epoch: [ 8/ 10], step: [ 351/ 390], loss: [0.3251], avg loss: [0.2929], time: [103.5602ms]\n", - "Epoch: [ 8/ 10], step: [ 352/ 390], loss: [0.1561], avg loss: [0.2925], time: [102.4060ms]\n", - "Epoch: [ 8/ 10], step: [ 353/ 390], loss: [0.3842], avg loss: [0.2927], time: [105.7391ms]\n", - "Epoch: [ 8/ 10], step: [ 354/ 390], loss: [0.3143], avg loss: [0.2928], time: [103.2579ms]\n", - "Epoch: [ 8/ 10], step: [ 355/ 390], loss: [0.3157], avg loss: [0.2929], time: [106.1723ms]\n", - "Epoch: [ 8/ 10], step: [ 356/ 390], loss: [0.2084], avg loss: [0.2926], time: [105.6907ms]\n", - "Epoch: [ 8/ 10], step: [ 357/ 390], loss: [0.3469], avg loss: [0.2928], time: [104.6863ms]\n", - "Epoch: [ 8/ 10], step: [ 358/ 390], loss: [0.2570], avg loss: [0.2927], time: [105.5679ms]\n", - "Epoch: [ 8/ 10], step: [ 359/ 390], loss: [0.1771], avg loss: [0.2924], time: [106.8208ms]\n", - "Epoch: [ 8/ 10], step: [ 360/ 390], loss: [0.4097], avg loss: [0.2927], time: [104.7275ms]\n", - "Epoch: [ 8/ 10], step: [ 361/ 390], loss: [0.2052], avg loss: [0.2924], time: [105.3350ms]\n", - "Epoch: [ 8/ 10], step: [ 362/ 390], loss: [0.2419], avg loss: [0.2923], time: [102.6196ms]\n", - "Epoch: [ 8/ 10], step: [ 363/ 390], loss: [0.2891], avg loss: [0.2923], time: [102.7620ms]\n", - "Epoch: [ 8/ 10], step: [ 364/ 390], loss: [0.3674], avg loss: [0.2925], time: [102.9015ms]\n", - "Epoch: [ 8/ 10], step: [ 365/ 390], loss: [0.3137], avg loss: [0.2926], time: [105.0744ms]\n", - "Epoch: [ 8/ 10], step: [ 366/ 390], loss: [0.3452], avg loss: [0.2927], time: [108.6090ms]\n", - "Epoch: [ 8/ 10], step: [ 367/ 390], loss: [0.3247], avg loss: [0.2928], time: [103.7612ms]\n", - "Epoch: [ 8/ 10], step: [ 368/ 390], loss: [0.2509], avg loss: [0.2927], time: [108.0258ms]\n", - "Epoch: [ 8/ 10], step: [ 369/ 390], loss: [0.3878], avg loss: [0.2929], time: [105.3483ms]\n", - "Epoch: [ 8/ 10], step: [ 370/ 390], loss: [0.3596], avg loss: [0.2931], time: [103.9951ms]\n", - "Epoch: [ 8/ 10], step: [ 371/ 390], loss: [0.3270], avg loss: [0.2932], time: [107.1894ms]\n" + "epoch: 8 step: 283, loss is 0.3052\n", + "epoch: 8 step: 284, loss is 0.3046\n", + "epoch: 8 step: 285, loss is 0.3282\n", + "epoch: 8 step: 286, loss is 0.2687\n", + "epoch: 8 step: 287, loss is 0.2085\n", + "epoch: 8 step: 288, loss is 0.2500\n", + "epoch: 8 step: 289, loss is 0.2477\n", + "epoch: 8 step: 290, loss is 0.1799\n", + "epoch: 8 step: 291, loss is 0.3890\n", + "epoch: 8 step: 292, loss is 0.2363\n", + "epoch: 8 step: 293, loss is 0.3996\n", + "epoch: 8 step: 294, loss is 0.3036\n", + "epoch: 8 step: 295, loss is 0.3625\n", + "epoch: 8 step: 296, loss is 0.3306\n", + "epoch: 8 step: 297, loss is 0.2989\n", + "epoch: 8 step: 298, loss is 0.3709\n", + "epoch: 8 step: 299, loss is 0.4077\n", + "epoch: 8 step: 300, loss is 0.3659\n", + "epoch: 8 step: 301, loss is 0.3173\n", + "epoch: 8 step: 302, loss is 0.2164\n", + "epoch: 8 step: 303, loss is 0.2811\n", + "epoch: 8 step: 304, loss is 0.2248\n", + "epoch: 8 step: 305, loss is 0.3226\n", + "epoch: 8 step: 306, loss is 0.4554\n", + "epoch: 8 step: 307, loss is 0.2045\n", + "epoch: 8 step: 308, loss is 0.2654\n", + "epoch: 8 step: 309, loss is 0.3877\n", + "epoch: 8 step: 310, loss is 0.3128\n", + "epoch: 8 step: 311, loss is 0.3225\n", + "epoch: 8 step: 312, loss is 0.2464\n", + "epoch: 8 step: 313, loss is 0.2058\n", + "epoch: 8 step: 314, loss is 0.2562\n", + "epoch: 8 step: 315, loss is 0.2906\n", + "epoch: 8 step: 316, loss is 0.2278\n", + "epoch: 8 step: 317, loss is 0.5644\n", + "epoch: 8 step: 318, loss is 0.2196\n", + "epoch: 8 step: 319, loss is 0.2686\n", + "epoch: 8 step: 320, loss is 0.4012\n", + "epoch: 8 step: 321, loss is 0.3391\n", + "epoch: 8 step: 322, loss is 0.2743\n", + "epoch: 8 step: 323, loss is 0.4422\n", + "epoch: 8 step: 324, loss is 0.3312\n", + "epoch: 8 step: 325, loss is 0.4168\n", + "epoch: 8 step: 326, loss is 0.2627\n", + "epoch: 8 step: 327, loss is 0.3838\n", + "epoch: 8 step: 328, loss is 0.3179\n", + "epoch: 8 step: 329, loss is 0.3666\n", + "epoch: 8 step: 330, loss is 0.3488\n", + "epoch: 8 step: 331, loss is 0.2525\n", + "epoch: 8 step: 332, loss is 0.2915\n", + "epoch: 8 step: 333, loss is 0.2774\n", + "epoch: 8 step: 334, loss is 0.2881\n", + "epoch: 8 step: 335, loss is 0.3295\n", + "epoch: 8 step: 336, loss is 0.2187\n", + "epoch: 8 step: 337, loss is 0.2379\n", + "epoch: 8 step: 338, loss is 0.3931\n", + "epoch: 8 step: 339, loss is 0.2094\n", + "epoch: 8 step: 340, loss is 0.2684\n", + "epoch: 8 step: 341, loss is 0.3613\n", + "epoch: 8 step: 342, loss is 0.2116\n", + "epoch: 8 step: 343, loss is 0.4666\n", + "epoch: 8 step: 344, loss is 0.2186\n", + "epoch: 8 step: 345, loss is 0.3330\n", + "epoch: 8 step: 346, loss is 0.2798\n", + "epoch: 8 step: 347, loss is 0.1680\n", + "epoch: 8 step: 348, loss is 0.2947\n", + "epoch: 8 step: 349, loss is 0.1921\n", + "epoch: 8 step: 350, loss is 0.2572\n", + "epoch: 8 step: 351, loss is 0.3251\n", + "epoch: 8 step: 352, loss is 0.1561\n", + "epoch: 8 step: 353, loss is 0.3842\n", + "epoch: 8 step: 354, loss is 0.3143\n", + "epoch: 8 step: 355, loss is 0.3157\n", + "epoch: 8 step: 356, loss is 0.2084\n", + "epoch: 8 step: 357, loss is 0.3469\n", + "epoch: 8 step: 358, loss is 0.2570\n", + "epoch: 8 step: 359, loss is 0.1771\n", + "epoch: 8 step: 360, loss is 0.4097\n", + "epoch: 8 step: 361, loss is 0.2052\n", + "epoch: 8 step: 362, loss is 0.2419\n", + "epoch: 8 step: 363, loss is 0.2891\n", + "epoch: 8 step: 364, loss is 0.3674\n", + "epoch: 8 step: 365, loss is 0.3137\n", + "epoch: 8 step: 366, loss is 0.3452\n", + "epoch: 8 step: 367, loss is 0.3247\n", + "epoch: 8 step: 368, loss is 0.2509\n", + "epoch: 8 step: 369, loss is 0.3878\n", + "epoch: 8 step: 370, loss is 0.3596\n", + "epoch: 8 step: 371, loss is 0.3270\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 8/ 10], step: [ 372/ 390], loss: [0.2237], avg loss: [0.2930], time: [104.1131ms]\n", - "Epoch: [ 8/ 10], step: [ 373/ 390], loss: [0.1964], avg loss: [0.2928], time: [107.8188ms]\n", - "Epoch: [ 8/ 10], step: [ 374/ 390], loss: [0.3240], avg loss: [0.2928], time: [102.8645ms]\n", - "Epoch: [ 8/ 10], step: [ 375/ 390], loss: [0.4185], avg loss: [0.2932], time: [101.9661ms]\n", - "Epoch: [ 8/ 10], step: [ 376/ 390], loss: [0.2762], avg loss: [0.2931], time: [102.5932ms]\n", - "Epoch: [ 8/ 10], step: [ 377/ 390], loss: [0.2433], avg loss: [0.2930], time: [105.0153ms]\n", - "Epoch: [ 8/ 10], step: [ 378/ 390], loss: [0.3024], avg loss: [0.2930], time: [103.4801ms]\n", - "Epoch: [ 8/ 10], step: [ 379/ 390], loss: [0.3009], avg loss: [0.2930], time: [106.7383ms]\n", - "Epoch: [ 8/ 10], step: [ 380/ 390], loss: [0.3313], avg loss: [0.2931], time: [103.3118ms]\n", - "Epoch: [ 8/ 10], step: [ 381/ 390], loss: [0.2318], avg loss: [0.2930], time: [100.6205ms]\n", - "Epoch: [ 8/ 10], step: [ 382/ 390], loss: [0.2963], avg loss: [0.2930], time: [103.0352ms]\n", - "Epoch: [ 8/ 10], step: [ 383/ 390], loss: [0.3568], avg loss: [0.2932], time: [100.2462ms]\n", - "Epoch: [ 8/ 10], step: [ 384/ 390], loss: [0.2718], avg loss: [0.2931], time: [101.7380ms]\n", - "Epoch: [ 8/ 10], step: [ 385/ 390], loss: [0.3772], avg loss: [0.2933], time: [106.9889ms]\n", - "Epoch: [ 8/ 10], step: [ 386/ 390], loss: [0.4922], avg loss: [0.2938], time: [104.3885ms]\n", - "Epoch: [ 8/ 10], step: [ 387/ 390], loss: [0.4117], avg loss: [0.2941], time: [103.9355ms]\n", - "Epoch: [ 8/ 10], step: [ 388/ 390], loss: [0.3131], avg loss: [0.2942], time: [102.3018ms]\n", - "Epoch: [ 8/ 10], step: [ 389/ 390], loss: [0.3322], avg loss: [0.2943], time: [98.4211ms]\n", - "Epoch: [ 8/ 10], step: [ 390/ 390], loss: [0.2457], avg loss: [0.2942], time: [825.6276ms]\n", - "Epoch time: 41559.210, per step time: 106.562\n", + "epoch: 8 step: 372, loss is 0.2237\n", + "epoch: 8 step: 373, loss is 0.1964\n", + "epoch: 8 step: 374, loss is 0.3240\n", + "epoch: 8 step: 375, loss is 0.4185\n", + "epoch: 8 step: 376, loss is 0.2762\n", + "epoch: 8 step: 377, loss is 0.2433\n", + "epoch: 8 step: 378, loss is 0.3024\n", + "epoch: 8 step: 379, loss is 0.3009\n", + "epoch: 8 step: 380, loss is 0.3313\n", + "epoch: 8 step: 381, loss is 0.2318\n", + "epoch: 8 step: 382, loss is 0.2963\n", + "epoch: 8 step: 383, loss is 0.3568\n", + "epoch: 8 step: 384, loss is 0.2718\n", + "epoch: 8 step: 385, loss is 0.3772\n", + "epoch: 8 step: 386, loss is 0.4922\n", + "epoch: 8 step: 387, loss is 0.4117\n", + "epoch: 8 step: 388, loss is 0.3131\n", + "epoch: 8 step: 389, loss is 0.3322\n", + "epoch: 8 step: 390, loss is 0.2457\n", "Epoch time: 41559.509, per step time: 106.563, avg loss: 0.294\n", "************************************************************\n", - "Epoch: [ 9/ 10], step: [ 1/ 390], loss: [0.2256], avg loss: [0.2256], time: [73.0402ms]\n", - "Epoch: [ 9/ 10], step: [ 2/ 390], loss: [0.3673], avg loss: [0.2965], time: [100.7907ms]\n", - "Epoch: [ 9/ 10], step: [ 3/ 390], loss: [0.3487], avg loss: [0.3139], time: [102.1595ms]\n", - "Epoch: [ 9/ 10], step: [ 4/ 390], loss: [0.2746], avg loss: [0.3041], time: [100.4121ms]\n", - "Epoch: [ 9/ 10], step: [ 5/ 390], loss: [0.2949], avg loss: [0.3022], time: [99.5049ms]\n", - "Epoch: [ 9/ 10], step: [ 6/ 390], loss: [0.2162], avg loss: [0.2879], time: [104.5511ms]\n", - "Epoch: [ 9/ 10], step: [ 7/ 390], loss: [0.2553], avg loss: [0.2832], time: [101.1810ms]\n", - "Epoch: [ 9/ 10], step: [ 8/ 390], loss: [0.2775], avg loss: [0.2825], time: [101.3844ms]\n", - "Epoch: [ 9/ 10], step: [ 9/ 390], loss: [0.2729], avg loss: [0.2815], time: [99.4809ms]\n", - "Epoch: [ 9/ 10], step: [ 10/ 390], loss: [0.3049], avg loss: [0.2838], time: [104.8808ms]\n", - "Epoch: [ 9/ 10], step: [ 11/ 390], loss: [0.2232], avg loss: [0.2783], time: [99.8337ms]\n", - "Epoch: [ 9/ 10], step: [ 12/ 390], loss: [0.4350], avg loss: [0.2913], time: [102.1557ms]\n", - "Epoch: [ 9/ 10], step: [ 13/ 390], loss: [0.2641], avg loss: [0.2893], time: [98.5370ms]\n", - "Epoch: [ 9/ 10], step: [ 14/ 390], loss: [0.2723], avg loss: [0.2880], time: [102.5147ms]\n", - "Epoch: [ 9/ 10], step: [ 15/ 390], loss: [0.3581], avg loss: [0.2927], time: [101.6605ms]\n", - "Epoch: [ 9/ 10], step: [ 16/ 390], loss: [0.3240], avg loss: [0.2947], time: [104.3739ms]\n", - "Epoch: [ 9/ 10], step: [ 17/ 390], loss: [0.2842], avg loss: [0.2941], time: [102.5045ms]\n", - "Epoch: [ 9/ 10], step: [ 18/ 390], loss: [0.2179], avg loss: [0.2898], time: [104.9385ms]\n", - "Epoch: [ 9/ 10], step: [ 19/ 390], loss: [0.2201], avg loss: [0.2862], time: [101.8097ms]\n", - "Epoch: [ 9/ 10], step: [ 20/ 390], loss: [0.2116], avg loss: [0.2824], time: [101.5544ms]\n", - "Epoch: [ 9/ 10], step: [ 21/ 390], loss: [0.2918], avg loss: [0.2829], time: [101.5136ms]\n", - "Epoch: [ 9/ 10], step: [ 22/ 390], loss: [0.3158], avg loss: [0.2844], time: [105.2084ms]\n", - "Epoch: [ 9/ 10], step: [ 23/ 390], loss: [0.2919], avg loss: [0.2847], time: [98.5651ms]\n", - "Epoch: [ 9/ 10], step: [ 24/ 390], loss: [0.3004], avg loss: [0.2853], time: [101.0883ms]\n", - "Epoch: [ 9/ 10], step: [ 25/ 390], loss: [0.1961], avg loss: [0.2818], time: [103.5347ms]\n", - "Epoch: [ 9/ 10], step: [ 26/ 390], loss: [0.1507], avg loss: [0.2767], time: [100.9510ms]\n", - "Epoch: [ 9/ 10], step: [ 27/ 390], loss: [0.2368], avg loss: [0.2753], time: [105.0570ms]\n", - "Epoch: [ 9/ 10], step: [ 28/ 390], loss: [0.2472], avg loss: [0.2743], time: [101.9182ms]\n", - "Epoch: [ 9/ 10], step: [ 29/ 390], loss: [0.3680], avg loss: [0.2775], time: [104.1172ms]\n", - "Epoch: [ 9/ 10], step: [ 30/ 390], loss: [0.2974], avg loss: [0.2782], time: [100.6999ms]\n", - "Epoch: [ 9/ 10], step: [ 31/ 390], loss: [0.4239], avg loss: [0.2829], time: [100.9908ms]\n", - "Epoch: [ 9/ 10], step: [ 32/ 390], loss: [0.2210], avg loss: [0.2809], time: [102.1395ms]\n", - "Epoch: [ 9/ 10], step: [ 33/ 390], loss: [0.2801], avg loss: [0.2809], time: [102.2515ms]\n", - "Epoch: [ 9/ 10], step: [ 34/ 390], loss: [0.3228], avg loss: [0.2821], time: [100.3528ms]\n", - "Epoch: [ 9/ 10], step: [ 35/ 390], loss: [0.2770], avg loss: [0.2820], time: [103.7469ms]\n", - "Epoch: [ 9/ 10], step: [ 36/ 390], loss: [0.2428], avg loss: [0.2809], time: [103.4606ms]\n", - "Epoch: [ 9/ 10], step: [ 37/ 390], loss: [0.3188], avg loss: [0.2819], time: [101.4192ms]\n", - "Epoch: [ 9/ 10], step: [ 38/ 390], loss: [0.3796], avg loss: [0.2845], time: [100.3752ms]\n", - "Epoch: [ 9/ 10], step: [ 39/ 390], loss: [0.3048], avg loss: [0.2850], time: [101.6319ms]\n", - "Epoch: [ 9/ 10], step: [ 40/ 390], loss: [0.3629], avg loss: [0.2870], time: [103.6632ms]\n", - "Epoch: [ 9/ 10], step: [ 41/ 390], loss: [0.2277], avg loss: [0.2855], time: [103.6563ms]\n", - "Epoch: [ 9/ 10], step: [ 42/ 390], loss: [0.3251], avg loss: [0.2865], time: [101.8090ms]\n", - "Epoch: [ 9/ 10], step: [ 43/ 390], loss: [0.2962], avg loss: [0.2867], time: [99.7391ms]\n", - "Epoch: [ 9/ 10], step: [ 44/ 390], loss: [0.3035], avg loss: [0.2871], time: [102.1402ms]\n", - "Epoch: [ 9/ 10], step: [ 45/ 390], loss: [0.2271], avg loss: [0.2857], time: [102.7102ms]\n", - "Epoch: [ 9/ 10], step: [ 46/ 390], loss: [0.3214], avg loss: [0.2865], time: [100.9073ms]\n", - "Epoch: [ 9/ 10], step: [ 47/ 390], loss: [0.3241], avg loss: [0.2873], time: [102.1171ms]\n", - "Epoch: [ 9/ 10], step: [ 48/ 390], loss: [0.2813], avg loss: [0.2872], time: [103.9498ms]\n", - "Epoch: [ 9/ 10], step: [ 49/ 390], loss: [0.2779], avg loss: [0.2870], time: [103.4803ms]\n", - "Epoch: [ 9/ 10], step: [ 50/ 390], loss: [0.3609], avg loss: [0.2885], time: [106.9639ms]\n", - "Epoch: [ 9/ 10], step: [ 51/ 390], loss: [0.2184], avg loss: [0.2871], time: [103.6048ms]\n", - "Epoch: [ 9/ 10], step: [ 52/ 390], loss: [0.2971], avg loss: [0.2873], time: [100.3568ms]\n", - "Epoch: [ 9/ 10], step: [ 53/ 390], loss: [0.2773], avg loss: [0.2871], time: [101.9478ms]\n", - "Epoch: [ 9/ 10], step: [ 54/ 390], loss: [0.2829], avg loss: [0.2870], time: [103.6716ms]\n", - "Epoch: [ 9/ 10], step: [ 55/ 390], loss: [0.2038], avg loss: [0.2855], time: [106.3128ms]\n", - "Epoch: [ 9/ 10], step: [ 56/ 390], loss: [0.1633], avg loss: [0.2833], time: [105.2680ms]\n", - "Epoch: [ 9/ 10], step: [ 57/ 390], loss: [0.3691], avg loss: [0.2848], time: [104.1119ms]\n", - "Epoch: [ 9/ 10], step: [ 58/ 390], loss: [0.2271], avg loss: [0.2838], time: [100.7919ms]\n", - "Epoch: [ 9/ 10], step: [ 59/ 390], loss: [0.2663], avg loss: [0.2835], time: [106.4491ms]\n", - "Epoch: [ 9/ 10], step: [ 60/ 390], loss: [0.4288], avg loss: [0.2860], time: [100.9979ms]\n", - "Epoch: [ 9/ 10], step: [ 61/ 390], loss: [0.2189], avg loss: [0.2849], time: [102.9048ms]\n", - "Epoch: [ 9/ 10], step: [ 62/ 390], loss: [0.4068], avg loss: [0.2868], time: [104.0356ms]\n", - "Epoch: [ 9/ 10], step: [ 63/ 390], loss: [0.2435], avg loss: [0.2861], time: [105.6540ms]\n", - "Epoch: [ 9/ 10], step: [ 64/ 390], loss: [0.3208], avg loss: [0.2867], time: [101.3505ms]\n", - "Epoch: [ 9/ 10], step: [ 65/ 390], loss: [0.1461], avg loss: [0.2845], time: [103.0321ms]\n", - "Epoch: [ 9/ 10], step: [ 66/ 390], loss: [0.2150], avg loss: [0.2835], time: [105.2101ms]\n", - "Epoch: [ 9/ 10], step: [ 67/ 390], loss: [0.3100], avg loss: [0.2839], time: [99.7190ms]\n", - "Epoch: [ 9/ 10], step: [ 68/ 390], loss: [0.2170], avg loss: [0.2829], time: [101.7153ms]\n" + "epoch: 9 step: 1, loss is 0.2256\n", + "epoch: 9 step: 2, loss is 0.3673\n", + "epoch: 9 step: 3, loss is 0.3487\n", + "epoch: 9 step: 4, loss is 0.2746\n", + "epoch: 9 step: 5, loss is 0.2949\n", + "epoch: 9 step: 6, loss is 0.2162\n", + "epoch: 9 step: 7, loss is 0.2553\n", + "epoch: 9 step: 8, loss is 0.2775\n", + "epoch: 9 step: 9, loss is 0.2729\n", + "epoch: 9 step: 10, loss is 0.3049\n", + "epoch: 9 step: 11, loss is 0.2232\n", + "epoch: 9 step: 12, loss is 0.4350\n", + "epoch: 9 step: 13, loss is 0.2641\n", + "epoch: 9 step: 14, loss is 0.2723\n", + "epoch: 9 step: 15, loss is 0.3581\n", + "epoch: 9 step: 16, loss is 0.3240\n", + "epoch: 9 step: 17, loss is 0.2842\n", + "epoch: 9 step: 18, loss is 0.2179\n", + "epoch: 9 step: 19, loss is 0.2201\n", + "epoch: 9 step: 20, loss is 0.2116\n", + "epoch: 9 step: 21, loss is 0.2918\n", + "epoch: 9 step: 22, loss is 0.3158\n", + "epoch: 9 step: 23, loss is 0.2919\n", + "epoch: 9 step: 24, loss is 0.3004\n", + "epoch: 9 step: 25, loss is 0.1961\n", + "epoch: 9 step: 26, loss is 0.1507\n", + "epoch: 9 step: 27, loss is 0.2368\n", + "epoch: 9 step: 28, loss is 0.2472\n", + "epoch: 9 step: 29, loss is 0.3680\n", + "epoch: 9 step: 30, loss is 0.2974\n", + "epoch: 9 step: 31, loss is 0.4239\n", + "epoch: 9 step: 32, loss is 0.2210\n", + "epoch: 9 step: 33, loss is 0.2801\n", + "epoch: 9 step: 34, loss is 0.3228\n", + "epoch: 9 step: 35, loss is 0.2770\n", + "epoch: 9 step: 36, loss is 0.2428\n", + "epoch: 9 step: 37, loss is 0.3188\n", + "epoch: 9 step: 38, loss is 0.3796\n", + "epoch: 9 step: 39, loss is 0.3048\n", + "epoch: 9 step: 40, loss is 0.3629\n", + "epoch: 9 step: 41, loss is 0.2277\n", + "epoch: 9 step: 42, loss is 0.3251\n", + "epoch: 9 step: 43, loss is 0.2962\n", + "epoch: 9 step: 44, loss is 0.3035\n", + "epoch: 9 step: 45, loss is 0.2271\n", + "epoch: 9 step: 46, loss is 0.3214\n", + "epoch: 9 step: 47, loss is 0.3241\n", + "epoch: 9 step: 48, loss is 0.2813\n", + "epoch: 9 step: 49, loss is 0.2779\n", + "epoch: 9 step: 50, loss is 0.3609\n", + "epoch: 9 step: 51, loss is 0.2184\n", + "epoch: 9 step: 52, loss is 0.2971\n", + "epoch: 9 step: 53, loss is 0.2773\n", + "epoch: 9 step: 54, loss is 0.2829\n", + "epoch: 9 step: 55, loss is 0.2038\n", + "epoch: 9 step: 56, loss is 0.1633\n", + "epoch: 9 step: 57, loss is 0.3691\n", + "epoch: 9 step: 58, loss is 0.2271\n", + "epoch: 9 step: 59, loss is 0.2663\n", + "epoch: 9 step: 60, loss is 0.4288\n", + "epoch: 9 step: 61, loss is 0.2189\n", + "epoch: 9 step: 62, loss is 0.4068\n", + "epoch: 9 step: 63, loss is 0.2435\n", + "epoch: 9 step: 64, loss is 0.3208\n", + "epoch: 9 step: 65, loss is 0.1461\n", + "epoch: 9 step: 66, loss is 0.2150\n", + "epoch: 9 step: 67, loss is 0.3100\n", + "epoch: 9 step: 68, loss is 0.2170\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 9/ 10], step: [ 69/ 390], loss: [0.4718], avg loss: [0.2856], time: [103.2302ms]\n", - "Epoch: [ 9/ 10], step: [ 70/ 390], loss: [0.4030], avg loss: [0.2873], time: [100.6932ms]\n", - "Epoch: [ 9/ 10], step: [ 71/ 390], loss: [0.3980], avg loss: [0.2888], time: [100.3315ms]\n", - "Epoch: [ 9/ 10], step: [ 72/ 390], loss: [0.2488], avg loss: [0.2883], time: [102.1490ms]\n", - "Epoch: [ 9/ 10], step: [ 73/ 390], loss: [0.1879], avg loss: [0.2869], time: [103.2121ms]\n", - "Epoch: [ 9/ 10], step: [ 74/ 390], loss: [0.3052], avg loss: [0.2872], time: [105.9821ms]\n", - "Epoch: [ 9/ 10], step: [ 75/ 390], loss: [0.1858], avg loss: [0.2858], time: [103.2846ms]\n", - "Epoch: [ 9/ 10], step: [ 76/ 390], loss: [0.1737], avg loss: [0.2843], time: [102.9892ms]\n", - "Epoch: [ 9/ 10], step: [ 77/ 390], loss: [0.3333], avg loss: [0.2850], time: [101.4016ms]\n", - "Epoch: [ 9/ 10], step: [ 78/ 390], loss: [0.1959], avg loss: [0.2838], time: [103.2929ms]\n", - "Epoch: [ 9/ 10], step: [ 79/ 390], loss: [0.2411], avg loss: [0.2833], time: [104.4221ms]\n", - "Epoch: [ 9/ 10], step: [ 80/ 390], loss: [0.2749], avg loss: [0.2832], time: [101.3968ms]\n", - "Epoch: [ 9/ 10], step: [ 81/ 390], loss: [0.1702], avg loss: [0.2818], time: [103.2207ms]\n", - "Epoch: [ 9/ 10], step: [ 82/ 390], loss: [0.1831], avg loss: [0.2806], time: [101.0070ms]\n", - "Epoch: [ 9/ 10], step: [ 83/ 390], loss: [0.3682], avg loss: [0.2816], time: [103.2400ms]\n", - "Epoch: [ 9/ 10], step: [ 84/ 390], loss: [0.1844], avg loss: [0.2805], time: [101.6552ms]\n", - "Epoch: [ 9/ 10], step: [ 85/ 390], loss: [0.2799], avg loss: [0.2805], time: [100.9090ms]\n", - "Epoch: [ 9/ 10], step: [ 86/ 390], loss: [0.2805], avg loss: [0.2805], time: [102.1225ms]\n", - "Epoch: [ 9/ 10], step: [ 87/ 390], loss: [0.3685], avg loss: [0.2815], time: [105.3522ms]\n", - "Epoch: [ 9/ 10], step: [ 88/ 390], loss: [0.2802], avg loss: [0.2815], time: [103.0819ms]\n", - "Epoch: [ 9/ 10], step: [ 89/ 390], loss: [0.1326], avg loss: [0.2798], time: [101.0733ms]\n", - "Epoch: [ 9/ 10], step: [ 90/ 390], loss: [0.1912], avg loss: [0.2788], time: [101.8717ms]\n", - "Epoch: [ 9/ 10], step: [ 91/ 390], loss: [0.3006], avg loss: [0.2791], time: [103.6603ms]\n", - "Epoch: [ 9/ 10], step: [ 92/ 390], loss: [0.1286], avg loss: [0.2774], time: [105.4566ms]\n", - "Epoch: [ 9/ 10], step: [ 93/ 390], loss: [0.2179], avg loss: [0.2768], time: [103.0874ms]\n", - "Epoch: [ 9/ 10], step: [ 94/ 390], loss: [0.1999], avg loss: [0.2760], time: [104.0862ms]\n", - "Epoch: [ 9/ 10], step: [ 95/ 390], loss: [0.2278], avg loss: [0.2755], time: [101.6471ms]\n", - "Epoch: [ 9/ 10], step: [ 96/ 390], loss: [0.1420], avg loss: [0.2741], time: [103.2350ms]\n", - "Epoch: [ 9/ 10], step: [ 97/ 390], loss: [0.1676], avg loss: [0.2730], time: [103.5488ms]\n", - "Epoch: [ 9/ 10], step: [ 98/ 390], loss: [0.2984], avg loss: [0.2732], time: [102.4947ms]\n", - "Epoch: [ 9/ 10], step: [ 99/ 390], loss: [0.2156], avg loss: [0.2726], time: [102.6287ms]\n", - "Epoch: [ 9/ 10], step: [ 100/ 390], loss: [0.2189], avg loss: [0.2721], time: [101.0580ms]\n", - "Epoch: [ 9/ 10], step: [ 101/ 390], loss: [0.2909], avg loss: [0.2723], time: [105.7246ms]\n", - "Epoch: [ 9/ 10], step: [ 102/ 390], loss: [0.3303], avg loss: [0.2729], time: [101.7463ms]\n", - "Epoch: [ 9/ 10], step: [ 103/ 390], loss: [0.4217], avg loss: [0.2743], time: [102.3078ms]\n", - "Epoch: [ 9/ 10], step: [ 104/ 390], loss: [0.2753], avg loss: [0.2743], time: [104.3847ms]\n", - "Epoch: [ 9/ 10], step: [ 105/ 390], loss: [0.2595], avg loss: [0.2742], time: [100.9948ms]\n", - "Epoch: [ 9/ 10], step: [ 106/ 390], loss: [0.2275], avg loss: [0.2737], time: [104.4550ms]\n", - "Epoch: [ 9/ 10], step: [ 107/ 390], loss: [0.3049], avg loss: [0.2740], time: [104.4550ms]\n", - "Epoch: [ 9/ 10], step: [ 108/ 390], loss: [0.3463], avg loss: [0.2747], time: [101.8348ms]\n", - "Epoch: [ 9/ 10], step: [ 109/ 390], loss: [0.2354], avg loss: [0.2743], time: [102.1039ms]\n", - "Epoch: [ 9/ 10], step: [ 110/ 390], loss: [0.2470], avg loss: [0.2741], time: [100.6250ms]\n", - "Epoch: [ 9/ 10], step: [ 111/ 390], loss: [0.2685], avg loss: [0.2740], time: [101.1736ms]\n", - "Epoch: [ 9/ 10], step: [ 112/ 390], loss: [0.2859], avg loss: [0.2741], time: [102.1161ms]\n", - "Epoch: [ 9/ 10], step: [ 113/ 390], loss: [0.2302], avg loss: [0.2738], time: [103.1101ms]\n", - "Epoch: [ 9/ 10], step: [ 114/ 390], loss: [0.2259], avg loss: [0.2733], time: [101.3191ms]\n", - "Epoch: [ 9/ 10], step: [ 115/ 390], loss: [0.2267], avg loss: [0.2729], time: [104.1887ms]\n", - "Epoch: [ 9/ 10], step: [ 116/ 390], loss: [0.2309], avg loss: [0.2726], time: [103.5342ms]\n", - "Epoch: [ 9/ 10], step: [ 117/ 390], loss: [0.3122], avg loss: [0.2729], time: [102.5238ms]\n", - "Epoch: [ 9/ 10], step: [ 118/ 390], loss: [0.2515], avg loss: [0.2727], time: [107.3129ms]\n", - "Epoch: [ 9/ 10], step: [ 119/ 390], loss: [0.2786], avg loss: [0.2728], time: [103.4453ms]\n", - "Epoch: [ 9/ 10], step: [ 120/ 390], loss: [0.2677], avg loss: [0.2727], time: [102.1745ms]\n", - "Epoch: [ 9/ 10], step: [ 121/ 390], loss: [0.3950], avg loss: [0.2737], time: [101.2299ms]\n", - "Epoch: [ 9/ 10], step: [ 122/ 390], loss: [0.2902], avg loss: [0.2739], time: [99.9250ms]\n", - "Epoch: [ 9/ 10], step: [ 123/ 390], loss: [0.2933], avg loss: [0.2740], time: [101.8765ms]\n", - "Epoch: [ 9/ 10], step: [ 124/ 390], loss: [0.3831], avg loss: [0.2749], time: [103.2825ms]\n", - "Epoch: [ 9/ 10], step: [ 125/ 390], loss: [0.2204], avg loss: [0.2745], time: [104.0571ms]\n", - "Epoch: [ 9/ 10], step: [ 126/ 390], loss: [0.3225], avg loss: [0.2749], time: [101.8565ms]\n", - "Epoch: [ 9/ 10], step: [ 127/ 390], loss: [0.3021], avg loss: [0.2751], time: [103.0264ms]\n", - "Epoch: [ 9/ 10], step: [ 128/ 390], loss: [0.3702], avg loss: [0.2758], time: [102.6061ms]\n", - "Epoch: [ 9/ 10], step: [ 129/ 390], loss: [0.3515], avg loss: [0.2764], time: [106.0541ms]\n", - "Epoch: [ 9/ 10], step: [ 130/ 390], loss: [0.2547], avg loss: [0.2762], time: [102.6504ms]\n", - "Epoch: [ 9/ 10], step: [ 131/ 390], loss: [0.2681], avg loss: [0.2762], time: [102.6218ms]\n", - "Epoch: [ 9/ 10], step: [ 132/ 390], loss: [0.3002], avg loss: [0.2764], time: [105.4668ms]\n", - "Epoch: [ 9/ 10], step: [ 133/ 390], loss: [0.3737], avg loss: [0.2771], time: [100.7266ms]\n", - "Epoch: [ 9/ 10], step: [ 134/ 390], loss: [0.2523], avg loss: [0.2769], time: [104.7270ms]\n", - "Epoch: [ 9/ 10], step: [ 135/ 390], loss: [0.3247], avg loss: [0.2773], time: [100.9140ms]\n", - "Epoch: [ 9/ 10], step: [ 136/ 390], loss: [0.3409], avg loss: [0.2777], time: [101.0613ms]\n", - "Epoch: [ 9/ 10], step: [ 137/ 390], loss: [0.3709], avg loss: [0.2784], time: [101.1641ms]\n", - "Epoch: [ 9/ 10], step: [ 138/ 390], loss: [0.1743], avg loss: [0.2776], time: [100.7006ms]\n", - "Epoch: [ 9/ 10], step: [ 139/ 390], loss: [0.3687], avg loss: [0.2783], time: [102.7555ms]\n", - "Epoch: [ 9/ 10], step: [ 140/ 390], loss: [0.3255], avg loss: [0.2786], time: [101.9893ms]\n", - "Epoch: [ 9/ 10], step: [ 141/ 390], loss: [0.2741], avg loss: [0.2786], time: [102.7770ms]\n", - "Epoch: [ 9/ 10], step: [ 142/ 390], loss: [0.1603], avg loss: [0.2778], time: [103.8938ms]\n", - "Epoch: [ 9/ 10], step: [ 143/ 390], loss: [0.3056], avg loss: [0.2780], time: [102.9959ms]\n", - "Epoch: [ 9/ 10], step: [ 144/ 390], loss: [0.3297], avg loss: [0.2783], time: [100.5542ms]\n", - "Epoch: [ 9/ 10], step: [ 145/ 390], loss: [0.2882], avg loss: [0.2784], time: [102.2995ms]\n", - "Epoch: [ 9/ 10], step: [ 146/ 390], loss: [0.3367], avg loss: [0.2788], time: [102.5589ms]\n", - "Epoch: [ 9/ 10], step: [ 147/ 390], loss: [0.1517], avg loss: [0.2779], time: [104.6801ms]\n", - "Epoch: [ 9/ 10], step: [ 148/ 390], loss: [0.2856], avg loss: [0.2780], time: [104.3811ms]\n", - "Epoch: [ 9/ 10], step: [ 149/ 390], loss: [0.3148], avg loss: [0.2782], time: [104.4352ms]\n", - "Epoch: [ 9/ 10], step: [ 150/ 390], loss: [0.2960], avg loss: [0.2783], time: [101.1314ms]\n", - "Epoch: [ 9/ 10], step: [ 151/ 390], loss: [0.2638], avg loss: [0.2783], time: [104.2705ms]\n", - "Epoch: [ 9/ 10], step: [ 152/ 390], loss: [0.1726], avg loss: [0.2776], time: [99.9887ms]\n", - "Epoch: [ 9/ 10], step: [ 153/ 390], loss: [0.3240], avg loss: [0.2779], time: [102.6273ms]\n", - "Epoch: [ 9/ 10], step: [ 154/ 390], loss: [0.2530], avg loss: [0.2777], time: [106.0991ms]\n", - "Epoch: [ 9/ 10], step: [ 155/ 390], loss: [0.2303], avg loss: [0.2774], time: [101.4292ms]\n", - "Epoch: [ 9/ 10], step: [ 156/ 390], loss: [0.2816], avg loss: [0.2774], time: [101.7385ms]\n", - "Epoch: [ 9/ 10], step: [ 157/ 390], loss: [0.3392], avg loss: [0.2778], time: [99.7787ms]\n" + "epoch: 9 step: 69, loss is 0.4718\n", + "epoch: 9 step: 70, loss is 0.4030\n", + "epoch: 9 step: 71, loss is 0.3980\n", + "epoch: 9 step: 72, loss is 0.2488\n", + "epoch: 9 step: 73, loss is 0.1879\n", + "epoch: 9 step: 74, loss is 0.3052\n", + "epoch: 9 step: 75, loss is 0.1858\n", + "epoch: 9 step: 76, loss is 0.1737\n", + "epoch: 9 step: 77, loss is 0.3333\n", + "epoch: 9 step: 78, loss is 0.1959\n", + "epoch: 9 step: 79, loss is 0.2411\n", + "epoch: 9 step: 80, loss is 0.2749\n", + "epoch: 9 step: 81, loss is 0.1702\n", + "epoch: 9 step: 82, loss is 0.1831\n", + "epoch: 9 step: 83, loss is 0.3682\n", + "epoch: 9 step: 84, loss is 0.1844\n", + "epoch: 9 step: 85, loss is 0.2799\n", + "epoch: 9 step: 86, loss is 0.2805\n", + "epoch: 9 step: 87, loss is 0.3685\n", + "epoch: 9 step: 88, loss is 0.2802\n", + "epoch: 9 step: 89, loss is 0.1326\n", + "epoch: 9 step: 90, loss is 0.1912\n", + "epoch: 9 step: 91, loss is 0.3006\n", + "epoch: 9 step: 92, loss is 0.1286\n", + "epoch: 9 step: 93, loss is 0.2179\n", + "epoch: 9 step: 94, loss is 0.1999\n", + "epoch: 9 step: 95, loss is 0.2278\n", + "epoch: 9 step: 96, loss is 0.1420\n", + "epoch: 9 step: 97, loss is 0.1676\n", + "epoch: 9 step: 98, loss is 0.2984\n", + "epoch: 9 step: 99, loss is 0.2156\n", + "epoch: 9 step: 100, loss is 0.2189\n", + "epoch: 9 step: 101, loss is 0.2909\n", + "epoch: 9 step: 102, loss is 0.3303\n", + "epoch: 9 step: 103, loss is 0.4217\n", + "epoch: 9 step: 104, loss is 0.2753\n", + "epoch: 9 step: 105, loss is 0.2595\n", + "epoch: 9 step: 106, loss is 0.2275\n", + "epoch: 9 step: 107, loss is 0.3049\n", + "epoch: 9 step: 108, loss is 0.3463\n", + "epoch: 9 step: 109, loss is 0.2354\n", + "epoch: 9 step: 110, loss is 0.2470\n", + "epoch: 9 step: 111, loss is 0.2685\n", + "epoch: 9 step: 112, loss is 0.2859\n", + "epoch: 9 step: 113, loss is 0.2302\n", + "epoch: 9 step: 114, loss is 0.2259\n", + "epoch: 9 step: 115, loss is 0.2267\n", + "epoch: 9 step: 116, loss is 0.2309\n", + "epoch: 9 step: 117, loss is 0.3122\n", + "epoch: 9 step: 118, loss is 0.2515\n", + "epoch: 9 step: 119, loss is 0.2786\n", + "epoch: 9 step: 120, loss is 0.2677\n", + "epoch: 9 step: 121, loss is 0.3950\n", + "epoch: 9 step: 122, loss is 0.2902\n", + "epoch: 9 step: 123, loss is 0.2933\n", + "epoch: 9 step: 124, loss is 0.3831\n", + "epoch: 9 step: 125, loss is 0.2204\n", + "epoch: 9 step: 126, loss is 0.3225\n", + "epoch: 9 step: 127, loss is 0.3021\n", + "epoch: 9 step: 128, loss is 0.3702\n", + "epoch: 9 step: 129, loss is 0.3515\n", + "epoch: 9 step: 130, loss is 0.2547\n", + "epoch: 9 step: 131, loss is 0.2681\n", + "epoch: 9 step: 132, loss is 0.3002\n", + "epoch: 9 step: 133, loss is 0.3737\n", + "epoch: 9 step: 134, loss is 0.2523\n", + "epoch: 9 step: 135, loss is 0.3247\n", + "epoch: 9 step: 136, loss is 0.3409\n", + "epoch: 9 step: 137, loss is 0.3709\n", + "epoch: 9 step: 138, loss is 0.1743\n", + "epoch: 9 step: 139, loss is 0.3687\n", + "epoch: 9 step: 140, loss is 0.3255\n", + "epoch: 9 step: 141, loss is 0.2741\n", + "epoch: 9 step: 142, loss is 0.1603\n", + "epoch: 9 step: 143, loss is 0.3056\n", + "epoch: 9 step: 144, loss is 0.3297\n", + "epoch: 9 step: 145, loss is 0.2882\n", + "epoch: 9 step: 146, loss is 0.3367\n", + "epoch: 9 step: 147, loss is 0.1517\n", + "epoch: 9 step: 148, loss is 0.2856\n", + "epoch: 9 step: 149, loss is 0.3148\n", + "epoch: 9 step: 150, loss is 0.2960\n", + "epoch: 9 step: 151, loss is 0.2638\n", + "epoch: 9 step: 152, loss is 0.1726\n", + "epoch: 9 step: 153, loss is 0.3240\n", + "epoch: 9 step: 154, loss is 0.2530\n", + "epoch: 9 step: 155, loss is 0.2303\n", + "epoch: 9 step: 156, loss is 0.2816\n", + "epoch: 9 step: 157, loss is 0.3392\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 9/ 10], step: [ 158/ 390], loss: [0.2664], avg loss: [0.2777], time: [101.3439ms]\n", - "Epoch: [ 9/ 10], step: [ 159/ 390], loss: [0.4234], avg loss: [0.2787], time: [104.7285ms]\n", - "Epoch: [ 9/ 10], step: [ 160/ 390], loss: [0.2787], avg loss: [0.2787], time: [101.1209ms]\n", - "Epoch: [ 9/ 10], step: [ 161/ 390], loss: [0.3272], avg loss: [0.2790], time: [101.0838ms]\n", - "Epoch: [ 9/ 10], step: [ 162/ 390], loss: [0.3409], avg loss: [0.2793], time: [101.6126ms]\n", - "Epoch: [ 9/ 10], step: [ 163/ 390], loss: [0.3722], avg loss: [0.2799], time: [102.9675ms]\n", - "Epoch: [ 9/ 10], step: [ 164/ 390], loss: [0.2464], avg loss: [0.2797], time: [103.0622ms]\n", - "Epoch: [ 9/ 10], step: [ 165/ 390], loss: [0.1451], avg loss: [0.2789], time: [101.7148ms]\n", - "Epoch: [ 9/ 10], step: [ 166/ 390], loss: [0.3036], avg loss: [0.2790], time: [102.2775ms]\n", - "Epoch: [ 9/ 10], step: [ 167/ 390], loss: [0.2150], avg loss: [0.2787], time: [102.2885ms]\n", - "Epoch: [ 9/ 10], step: [ 168/ 390], loss: [0.2903], avg loss: [0.2787], time: [105.4242ms]\n", - "Epoch: [ 9/ 10], step: [ 169/ 390], loss: [0.4836], avg loss: [0.2799], time: [99.7148ms]\n", - "Epoch: [ 9/ 10], step: [ 170/ 390], loss: [0.2690], avg loss: [0.2799], time: [100.1005ms]\n", - "Epoch: [ 9/ 10], step: [ 171/ 390], loss: [0.3030], avg loss: [0.2800], time: [103.6029ms]\n", - "Epoch: [ 9/ 10], step: [ 172/ 390], loss: [0.2788], avg loss: [0.2800], time: [101.7156ms]\n", - "Epoch: [ 9/ 10], step: [ 173/ 390], loss: [0.3095], avg loss: [0.2802], time: [101.5306ms]\n", - "Epoch: [ 9/ 10], step: [ 174/ 390], loss: [0.3485], avg loss: [0.2806], time: [99.8614ms]\n", - "Epoch: [ 9/ 10], step: [ 175/ 390], loss: [0.3854], avg loss: [0.2812], time: [102.8297ms]\n", - "Epoch: [ 9/ 10], step: [ 176/ 390], loss: [0.2738], avg loss: [0.2811], time: [101.7272ms]\n", - "Epoch: [ 9/ 10], step: [ 177/ 390], loss: [0.2012], avg loss: [0.2807], time: [101.0392ms]\n", - "Epoch: [ 9/ 10], step: [ 178/ 390], loss: [0.1913], avg loss: [0.2802], time: [105.7694ms]\n", - "Epoch: [ 9/ 10], step: [ 179/ 390], loss: [0.1811], avg loss: [0.2796], time: [101.5816ms]\n", - "Epoch: [ 9/ 10], step: [ 180/ 390], loss: [0.2216], avg loss: [0.2793], time: [103.5488ms]\n", - "Epoch: [ 9/ 10], step: [ 181/ 390], loss: [0.3418], avg loss: [0.2796], time: [103.5931ms]\n", - "Epoch: [ 9/ 10], step: [ 182/ 390], loss: [0.4854], avg loss: [0.2808], time: [101.1360ms]\n", - "Epoch: [ 9/ 10], step: [ 183/ 390], loss: [0.3358], avg loss: [0.2811], time: [101.8438ms]\n", - "Epoch: [ 9/ 10], step: [ 184/ 390], loss: [0.1935], avg loss: [0.2806], time: [104.8281ms]\n", - "Epoch: [ 9/ 10], step: [ 185/ 390], loss: [0.3501], avg loss: [0.2810], time: [103.6060ms]\n", - "Epoch: [ 9/ 10], step: [ 186/ 390], loss: [0.2153], avg loss: [0.2806], time: [102.9382ms]\n", - "Epoch: [ 9/ 10], step: [ 187/ 390], loss: [0.2664], avg loss: [0.2805], time: [103.3432ms]\n", - "Epoch: [ 9/ 10], step: [ 188/ 390], loss: [0.1765], avg loss: [0.2800], time: [100.9405ms]\n", - "Epoch: [ 9/ 10], step: [ 189/ 390], loss: [0.1346], avg loss: [0.2792], time: [105.4928ms]\n", - "Epoch: [ 9/ 10], step: [ 190/ 390], loss: [0.1991], avg loss: [0.2788], time: [102.2525ms]\n", - "Epoch: [ 9/ 10], step: [ 191/ 390], loss: [0.2464], avg loss: [0.2786], time: [103.5113ms]\n", - "Epoch: [ 9/ 10], step: [ 192/ 390], loss: [0.2229], avg loss: [0.2783], time: [100.6746ms]\n", - "Epoch: [ 9/ 10], step: [ 193/ 390], loss: [0.3363], avg loss: [0.2786], time: [101.0532ms]\n", - "Epoch: [ 9/ 10], step: [ 194/ 390], loss: [0.2198], avg loss: [0.2783], time: [100.8210ms]\n", - "Epoch: [ 9/ 10], step: [ 195/ 390], loss: [0.1779], avg loss: [0.2778], time: [104.1412ms]\n", - "Epoch: [ 9/ 10], step: [ 196/ 390], loss: [0.1399], avg loss: [0.2771], time: [104.0020ms]\n", - "Epoch: [ 9/ 10], step: [ 197/ 390], loss: [0.3189], avg loss: [0.2773], time: [100.5628ms]\n", - "Epoch: [ 9/ 10], step: [ 198/ 390], loss: [0.2123], avg loss: [0.2770], time: [102.3779ms]\n", - "Epoch: [ 9/ 10], step: [ 199/ 390], loss: [0.3724], avg loss: [0.2775], time: [101.0129ms]\n", - "Epoch: [ 9/ 10], step: [ 200/ 390], loss: [0.2345], avg loss: [0.2773], time: [105.2034ms]\n", - "Epoch: [ 9/ 10], step: [ 201/ 390], loss: [0.2212], avg loss: [0.2770], time: [103.3015ms]\n", - "Epoch: [ 9/ 10], step: [ 202/ 390], loss: [0.3194], avg loss: [0.2772], time: [104.3479ms]\n", - "Epoch: [ 9/ 10], step: [ 203/ 390], loss: [0.1540], avg loss: [0.2766], time: [103.8370ms]\n", - "Epoch: [ 9/ 10], step: [ 204/ 390], loss: [0.3313], avg loss: [0.2769], time: [102.0615ms]\n", - "Epoch: [ 9/ 10], step: [ 205/ 390], loss: [0.2585], avg loss: [0.2768], time: [103.6479ms]\n", - "Epoch: [ 9/ 10], step: [ 206/ 390], loss: [0.1736], avg loss: [0.2763], time: [105.9792ms]\n", - "Epoch: [ 9/ 10], step: [ 207/ 390], loss: [0.3516], avg loss: [0.2766], time: [100.9440ms]\n", - "Epoch: [ 9/ 10], step: [ 208/ 390], loss: [0.4077], avg loss: [0.2773], time: [101.6276ms]\n", - "Epoch: [ 9/ 10], step: [ 209/ 390], loss: [0.2779], avg loss: [0.2773], time: [103.9333ms]\n", - "Epoch: [ 9/ 10], step: [ 210/ 390], loss: [0.2984], avg loss: [0.2774], time: [104.6572ms]\n", - "Epoch: [ 9/ 10], step: [ 211/ 390], loss: [0.3921], avg loss: [0.2779], time: [100.5015ms]\n", - "Epoch: [ 9/ 10], step: [ 212/ 390], loss: [0.2446], avg loss: [0.2778], time: [102.1032ms]\n", - "Epoch: [ 9/ 10], step: [ 213/ 390], loss: [0.2475], avg loss: [0.2776], time: [103.1590ms]\n", - "Epoch: [ 9/ 10], step: [ 214/ 390], loss: [0.2972], avg loss: [0.2777], time: [102.2763ms]\n", - "Epoch: [ 9/ 10], step: [ 215/ 390], loss: [0.2834], avg loss: [0.2777], time: [102.6273ms]\n", - "Epoch: [ 9/ 10], step: [ 216/ 390], loss: [0.2070], avg loss: [0.2774], time: [104.4176ms]\n", - "Epoch: [ 9/ 10], step: [ 217/ 390], loss: [0.3333], avg loss: [0.2777], time: [101.0535ms]\n", - "Epoch: [ 9/ 10], step: [ 218/ 390], loss: [0.2225], avg loss: [0.2774], time: [103.3754ms]\n", - "Epoch: [ 9/ 10], step: [ 219/ 390], loss: [0.3896], avg loss: [0.2779], time: [99.9773ms]\n", - "Epoch: [ 9/ 10], step: [ 220/ 390], loss: [0.2675], avg loss: [0.2779], time: [104.7366ms]\n", - "Epoch: [ 9/ 10], step: [ 221/ 390], loss: [0.2908], avg loss: [0.2779], time: [101.2146ms]\n", - "Epoch: [ 9/ 10], step: [ 222/ 390], loss: [0.4031], avg loss: [0.2785], time: [102.5598ms]\n", - "Epoch: [ 9/ 10], step: [ 223/ 390], loss: [0.1974], avg loss: [0.2781], time: [103.5082ms]\n", - "Epoch: [ 9/ 10], step: [ 224/ 390], loss: [0.3648], avg loss: [0.2785], time: [102.3223ms]\n", - "Epoch: [ 9/ 10], step: [ 225/ 390], loss: [0.3166], avg loss: [0.2787], time: [101.7206ms]\n", - "Epoch: [ 9/ 10], step: [ 226/ 390], loss: [0.2183], avg loss: [0.2784], time: [101.8331ms]\n", - "Epoch: [ 9/ 10], step: [ 227/ 390], loss: [0.3256], avg loss: [0.2786], time: [103.4844ms]\n", - "Epoch: [ 9/ 10], step: [ 228/ 390], loss: [0.2786], avg loss: [0.2786], time: [101.1503ms]\n", - "Epoch: [ 9/ 10], step: [ 229/ 390], loss: [0.3497], avg loss: [0.2789], time: [102.6747ms]\n", - "Epoch: [ 9/ 10], step: [ 230/ 390], loss: [0.3478], avg loss: [0.2792], time: [101.1243ms]\n", - "Epoch: [ 9/ 10], step: [ 231/ 390], loss: [0.3882], avg loss: [0.2797], time: [103.8375ms]\n", - "Epoch: [ 9/ 10], step: [ 232/ 390], loss: [0.2460], avg loss: [0.2796], time: [100.4696ms]\n", - "Epoch: [ 9/ 10], step: [ 233/ 390], loss: [0.1955], avg loss: [0.2792], time: [104.6753ms]\n", - "Epoch: [ 9/ 10], step: [ 234/ 390], loss: [0.2888], avg loss: [0.2792], time: [106.8163ms]\n", - "Epoch: [ 9/ 10], step: [ 235/ 390], loss: [0.2994], avg loss: [0.2793], time: [102.1876ms]\n", - "Epoch: [ 9/ 10], step: [ 236/ 390], loss: [0.3871], avg loss: [0.2798], time: [106.7333ms]\n", - "Epoch: [ 9/ 10], step: [ 237/ 390], loss: [0.3991], avg loss: [0.2803], time: [104.6433ms]\n", - "Epoch: [ 9/ 10], step: [ 238/ 390], loss: [0.3099], avg loss: [0.2804], time: [101.4895ms]\n", - "Epoch: [ 9/ 10], step: [ 239/ 390], loss: [0.3141], avg loss: [0.2806], time: [102.6342ms]\n", - "Epoch: [ 9/ 10], step: [ 240/ 390], loss: [0.3390], avg loss: [0.2808], time: [102.2451ms]\n", - "Epoch: [ 9/ 10], step: [ 241/ 390], loss: [0.2310], avg loss: [0.2806], time: [104.7823ms]\n", - "Epoch: [ 9/ 10], step: [ 242/ 390], loss: [0.2700], avg loss: [0.2805], time: [103.6384ms]\n", - "Epoch: [ 9/ 10], step: [ 243/ 390], loss: [0.2811], avg loss: [0.2805], time: [102.1147ms]\n", - "Epoch: [ 9/ 10], step: [ 244/ 390], loss: [0.2345], avg loss: [0.2804], time: [103.6584ms]\n", - "Epoch: [ 9/ 10], step: [ 245/ 390], loss: [0.2672], avg loss: [0.2803], time: [101.4009ms]\n", - "Epoch: [ 9/ 10], step: [ 246/ 390], loss: [0.1876], avg loss: [0.2799], time: [106.1995ms]\n" + "epoch: 9 step: 158, loss is 0.2664\n", + "epoch: 9 step: 159, loss is 0.4234\n", + "epoch: 9 step: 160, loss is 0.2787\n", + "epoch: 9 step: 161, loss is 0.3272\n", + "epoch: 9 step: 162, loss is 0.3409\n", + "epoch: 9 step: 163, loss is 0.3722\n", + "epoch: 9 step: 164, loss is 0.2464\n", + "epoch: 9 step: 165, loss is 0.1451\n", + "epoch: 9 step: 166, loss is 0.3036\n", + "epoch: 9 step: 167, loss is 0.2150\n", + "epoch: 9 step: 168, loss is 0.2903\n", + "epoch: 9 step: 169, loss is 0.4836\n", + "epoch: 9 step: 170, loss is 0.2690\n", + "epoch: 9 step: 171, loss is 0.3030\n", + "epoch: 9 step: 172, loss is 0.2788\n", + "epoch: 9 step: 173, loss is 0.3095\n", + "epoch: 9 step: 174, loss is 0.3485\n", + "epoch: 9 step: 175, loss is 0.3854\n", + "epoch: 9 step: 176, loss is 0.2738\n", + "epoch: 9 step: 177, loss is 0.2012\n", + "epoch: 9 step: 178, loss is 0.1913\n", + "epoch: 9 step: 179, loss is 0.1811\n", + "epoch: 9 step: 180, loss is 0.2216\n", + "epoch: 9 step: 181, loss is 0.3418\n", + "epoch: 9 step: 182, loss is 0.4854\n", + "epoch: 9 step: 183, loss is 0.3358\n", + "epoch: 9 step: 184, loss is 0.1935\n", + "epoch: 9 step: 185, loss is 0.3501\n", + "epoch: 9 step: 186, loss is 0.2153\n", + "epoch: 9 step: 187, loss is 0.2664\n", + "epoch: 9 step: 188, loss is 0.1765\n", + "epoch: 9 step: 189, loss is 0.1346\n", + "epoch: 9 step: 190, loss is 0.1991\n", + "epoch: 9 step: 191, loss is 0.2464\n", + "epoch: 9 step: 192, loss is 0.2229\n", + "epoch: 9 step: 193, loss is 0.3363\n", + "epoch: 9 step: 194, loss is 0.2198\n", + "epoch: 9 step: 195, loss is 0.1779\n", + "epoch: 9 step: 196, loss is 0.1399\n", + "epoch: 9 step: 197, loss is 0.3189\n", + "epoch: 9 step: 198, loss is 0.2123\n", + "epoch: 9 step: 199, loss is 0.3724\n", + "epoch: 9 step: 200, loss is 0.2345\n", + "epoch: 9 step: 201, loss is 0.2212\n", + "epoch: 9 step: 202, loss is 0.3194\n", + "epoch: 9 step: 203, loss is 0.1540\n", + "epoch: 9 step: 204, loss is 0.3313\n", + "epoch: 9 step: 205, loss is 0.2585\n", + "epoch: 9 step: 206, loss is 0.1736\n", + "epoch: 9 step: 207, loss is 0.3516\n", + "epoch: 9 step: 208, loss is 0.4077\n", + "epoch: 9 step: 209, loss is 0.2779\n", + "epoch: 9 step: 210, loss is 0.2984\n", + "epoch: 9 step: 211, loss is 0.3921\n", + "epoch: 9 step: 212, loss is 0.2446\n", + "epoch: 9 step: 213, loss is 0.2475\n", + "epoch: 9 step: 214, loss is 0.2972\n", + "epoch: 9 step: 215, loss is 0.2834\n", + "epoch: 9 step: 216, loss is 0.2070\n", + "epoch: 9 step: 217, loss is 0.3333\n", + "epoch: 9 step: 218, loss is 0.2225\n", + "epoch: 9 step: 219, loss is 0.3896\n", + "epoch: 9 step: 220, loss is 0.2675\n", + "epoch: 9 step: 221, loss is 0.2908\n", + "epoch: 9 step: 222, loss is 0.4031\n", + "epoch: 9 step: 223, loss is 0.1974\n", + "epoch: 9 step: 224, loss is 0.3648\n", + "epoch: 9 step: 225, loss is 0.3166\n", + "epoch: 9 step: 226, loss is 0.2183\n", + "epoch: 9 step: 227, loss is 0.3256\n", + "epoch: 9 step: 228, loss is 0.2786\n", + "epoch: 9 step: 229, loss is 0.3497\n", + "epoch: 9 step: 230, loss is 0.3478\n", + "epoch: 9 step: 231, loss is 0.3882\n", + "epoch: 9 step: 232, loss is 0.2460\n", + "epoch: 9 step: 233, loss is 0.1955\n", + "epoch: 9 step: 234, loss is 0.2888\n", + "epoch: 9 step: 235, loss is 0.2994\n", + "epoch: 9 step: 236, loss is 0.3871\n", + "epoch: 9 step: 237, loss is 0.3991\n", + "epoch: 9 step: 238, loss is 0.3099\n", + "epoch: 9 step: 239, loss is 0.3141\n", + "epoch: 9 step: 240, loss is 0.3390\n", + "epoch: 9 step: 241, loss is 0.2310\n", + "epoch: 9 step: 242, loss is 0.2700\n", + "epoch: 9 step: 243, loss is 0.2811\n", + "epoch: 9 step: 244, loss is 0.2345\n", + "epoch: 9 step: 245, loss is 0.2672\n", + "epoch: 9 step: 246, loss is 0.1876\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 9/ 10], step: [ 247/ 390], loss: [0.3666], avg loss: [0.2803], time: [103.4901ms]\n", - "Epoch: [ 9/ 10], step: [ 248/ 390], loss: [0.2445], avg loss: [0.2801], time: [101.3665ms]\n", - "Epoch: [ 9/ 10], step: [ 249/ 390], loss: [0.2603], avg loss: [0.2801], time: [103.7555ms]\n", - "Epoch: [ 9/ 10], step: [ 250/ 390], loss: [0.2571], avg loss: [0.2800], time: [99.9429ms]\n", - "Epoch: [ 9/ 10], step: [ 251/ 390], loss: [0.4252], avg loss: [0.2805], time: [101.2111ms]\n", - "Epoch: [ 9/ 10], step: [ 252/ 390], loss: [0.3173], avg loss: [0.2807], time: [100.2634ms]\n", - "Epoch: [ 9/ 10], step: [ 253/ 390], loss: [0.2151], avg loss: [0.2804], time: [101.1360ms]\n", - "Epoch: [ 9/ 10], step: [ 254/ 390], loss: [0.3287], avg loss: [0.2806], time: [103.8651ms]\n", - "Epoch: [ 9/ 10], step: [ 255/ 390], loss: [0.2224], avg loss: [0.2804], time: [104.4121ms]\n", - "Epoch: [ 9/ 10], step: [ 256/ 390], loss: [0.2287], avg loss: [0.2802], time: [102.6955ms]\n", - "Epoch: [ 9/ 10], step: [ 257/ 390], loss: [0.2828], avg loss: [0.2802], time: [100.9130ms]\n", - "Epoch: [ 9/ 10], step: [ 258/ 390], loss: [0.4278], avg loss: [0.2808], time: [105.5205ms]\n", - "Epoch: [ 9/ 10], step: [ 259/ 390], loss: [0.2781], avg loss: [0.2808], time: [103.6808ms]\n", - "Epoch: [ 9/ 10], step: [ 260/ 390], loss: [0.2918], avg loss: [0.2808], time: [101.3958ms]\n", - "Epoch: [ 9/ 10], step: [ 261/ 390], loss: [0.2349], avg loss: [0.2806], time: [101.6803ms]\n", - "Epoch: [ 9/ 10], step: [ 262/ 390], loss: [0.3005], avg loss: [0.2807], time: [102.3669ms]\n", - "Epoch: [ 9/ 10], step: [ 263/ 390], loss: [0.2941], avg loss: [0.2808], time: [105.2403ms]\n", - "Epoch: [ 9/ 10], step: [ 264/ 390], loss: [0.2351], avg loss: [0.2806], time: [102.0873ms]\n", - "Epoch: [ 9/ 10], step: [ 265/ 390], loss: [0.3136], avg loss: [0.2807], time: [104.4846ms]\n", - "Epoch: [ 9/ 10], step: [ 266/ 390], loss: [0.3938], avg loss: [0.2811], time: [101.5229ms]\n", - "Epoch: [ 9/ 10], step: [ 267/ 390], loss: [0.1917], avg loss: [0.2808], time: [103.2660ms]\n", - "Epoch: [ 9/ 10], step: [ 268/ 390], loss: [0.2223], avg loss: [0.2806], time: [105.1934ms]\n", - "Epoch: [ 9/ 10], step: [ 269/ 390], loss: [0.1965], avg loss: [0.2803], time: [103.9193ms]\n", - "Epoch: [ 9/ 10], step: [ 270/ 390], loss: [0.2173], avg loss: [0.2800], time: [100.4248ms]\n", - "Epoch: [ 9/ 10], step: [ 271/ 390], loss: [0.3242], avg loss: [0.2802], time: [100.8677ms]\n", - "Epoch: [ 9/ 10], step: [ 272/ 390], loss: [0.2942], avg loss: [0.2802], time: [105.2458ms]\n", - "Epoch: [ 9/ 10], step: [ 273/ 390], loss: [0.3043], avg loss: [0.2803], time: [100.7495ms]\n", - "Epoch: [ 9/ 10], step: [ 274/ 390], loss: [0.5046], avg loss: [0.2812], time: [105.1064ms]\n", - "Epoch: [ 9/ 10], step: [ 275/ 390], loss: [0.2275], avg loss: [0.2810], time: [101.8381ms]\n", - "Epoch: [ 9/ 10], step: [ 276/ 390], loss: [0.2391], avg loss: [0.2808], time: [100.6587ms]\n", - "Epoch: [ 9/ 10], step: [ 277/ 390], loss: [0.2364], avg loss: [0.2806], time: [102.7703ms]\n", - "Epoch: [ 9/ 10], step: [ 278/ 390], loss: [0.2180], avg loss: [0.2804], time: [101.0392ms]\n", - "Epoch: [ 9/ 10], step: [ 279/ 390], loss: [0.2443], avg loss: [0.2803], time: [101.6967ms]\n", - "Epoch: [ 9/ 10], step: [ 280/ 390], loss: [0.3269], avg loss: [0.2805], time: [104.4416ms]\n", - "Epoch: [ 9/ 10], step: [ 281/ 390], loss: [0.2290], avg loss: [0.2803], time: [99.6125ms]\n", - "Epoch: [ 9/ 10], step: [ 282/ 390], loss: [0.2864], avg loss: [0.2803], time: [106.7188ms]\n", - "Epoch: [ 9/ 10], step: [ 283/ 390], loss: [0.5133], avg loss: [0.2811], time: [98.8364ms]\n", - "Epoch: [ 9/ 10], step: [ 284/ 390], loss: [0.3965], avg loss: [0.2815], time: [100.8110ms]\n", - "Epoch: [ 9/ 10], step: [ 285/ 390], loss: [0.2694], avg loss: [0.2815], time: [103.5564ms]\n", - "Epoch: [ 9/ 10], step: [ 286/ 390], loss: [0.2299], avg loss: [0.2813], time: [100.5192ms]\n", - "Epoch: [ 9/ 10], step: [ 287/ 390], loss: [0.2477], avg loss: [0.2812], time: [102.1681ms]\n", - "Epoch: [ 9/ 10], step: [ 288/ 390], loss: [0.2881], avg loss: [0.2812], time: [101.5325ms]\n", - "Epoch: [ 9/ 10], step: [ 289/ 390], loss: [0.2389], avg loss: [0.2811], time: [106.5199ms]\n", - "Epoch: [ 9/ 10], step: [ 290/ 390], loss: [0.1957], avg loss: [0.2808], time: [105.9854ms]\n", - "Epoch: [ 9/ 10], step: [ 291/ 390], loss: [0.4758], avg loss: [0.2814], time: [101.9738ms]\n", - "Epoch: [ 9/ 10], step: [ 292/ 390], loss: [0.2147], avg loss: [0.2812], time: [104.7535ms]\n", - "Epoch: [ 9/ 10], step: [ 293/ 390], loss: [0.1834], avg loss: [0.2809], time: [102.1748ms]\n", - "Epoch: [ 9/ 10], step: [ 294/ 390], loss: [0.3235], avg loss: [0.2810], time: [100.9235ms]\n", - "Epoch: [ 9/ 10], step: [ 295/ 390], loss: [0.2626], avg loss: [0.2810], time: [103.4160ms]\n", - "Epoch: [ 9/ 10], step: [ 296/ 390], loss: [0.2007], avg loss: [0.2807], time: [101.7847ms]\n", - "Epoch: [ 9/ 10], step: [ 297/ 390], loss: [0.3185], avg loss: [0.2808], time: [101.8078ms]\n", - "Epoch: [ 9/ 10], step: [ 298/ 390], loss: [0.2742], avg loss: [0.2808], time: [102.3898ms]\n", - "Epoch: [ 9/ 10], step: [ 299/ 390], loss: [0.3474], avg loss: [0.2810], time: [102.4704ms]\n", - "Epoch: [ 9/ 10], step: [ 300/ 390], loss: [0.4156], avg loss: [0.2815], time: [100.8892ms]\n", - "Epoch: [ 9/ 10], step: [ 301/ 390], loss: [0.3393], avg loss: [0.2817], time: [100.4710ms]\n", - "Epoch: [ 9/ 10], step: [ 302/ 390], loss: [0.2162], avg loss: [0.2814], time: [102.9241ms]\n", - "Epoch: [ 9/ 10], step: [ 303/ 390], loss: [0.3120], avg loss: [0.2815], time: [104.3801ms]\n", - "Epoch: [ 9/ 10], step: [ 304/ 390], loss: [0.3075], avg loss: [0.2816], time: [101.2642ms]\n", - "Epoch: [ 9/ 10], step: [ 305/ 390], loss: [0.2437], avg loss: [0.2815], time: [104.6188ms]\n", - "Epoch: [ 9/ 10], step: [ 306/ 390], loss: [0.1778], avg loss: [0.2812], time: [100.4915ms]\n", - "Epoch: [ 9/ 10], step: [ 307/ 390], loss: [0.3741], avg loss: [0.2815], time: [100.5793ms]\n", - "Epoch: [ 9/ 10], step: [ 308/ 390], loss: [0.2621], avg loss: [0.2814], time: [101.7408ms]\n", - "Epoch: [ 9/ 10], step: [ 309/ 390], loss: [0.2012], avg loss: [0.2811], time: [100.9459ms]\n", - "Epoch: [ 9/ 10], step: [ 310/ 390], loss: [0.2965], avg loss: [0.2812], time: [106.0131ms]\n", - "Epoch: [ 9/ 10], step: [ 311/ 390], loss: [0.2786], avg loss: [0.2812], time: [106.2589ms]\n", - "Epoch: [ 9/ 10], step: [ 312/ 390], loss: [0.3387], avg loss: [0.2814], time: [100.6551ms]\n", - "Epoch: [ 9/ 10], step: [ 313/ 390], loss: [0.1744], avg loss: [0.2810], time: [100.9841ms]\n", - "Epoch: [ 9/ 10], step: [ 314/ 390], loss: [0.1716], avg loss: [0.2807], time: [107.1465ms]\n", - "Epoch: [ 9/ 10], step: [ 315/ 390], loss: [0.2732], avg loss: [0.2807], time: [100.9767ms]\n", - "Epoch: [ 9/ 10], step: [ 316/ 390], loss: [0.2169], avg loss: [0.2805], time: [101.7272ms]\n", - "Epoch: [ 9/ 10], step: [ 317/ 390], loss: [0.2133], avg loss: [0.2802], time: [103.0226ms]\n", - "Epoch: [ 9/ 10], step: [ 318/ 390], loss: [0.2757], avg loss: [0.2802], time: [100.7805ms]\n", - "Epoch: [ 9/ 10], step: [ 319/ 390], loss: [0.2565], avg loss: [0.2802], time: [103.9732ms]\n", - "Epoch: [ 9/ 10], step: [ 320/ 390], loss: [0.3456], avg loss: [0.2804], time: [100.8816ms]\n", - "Epoch: [ 9/ 10], step: [ 321/ 390], loss: [0.1643], avg loss: [0.2800], time: [99.6258ms]\n", - "Epoch: [ 9/ 10], step: [ 322/ 390], loss: [0.2130], avg loss: [0.2798], time: [101.2025ms]\n", - "Epoch: [ 9/ 10], step: [ 323/ 390], loss: [0.2580], avg loss: [0.2797], time: [101.8205ms]\n", - "Epoch: [ 9/ 10], step: [ 324/ 390], loss: [0.4480], avg loss: [0.2802], time: [101.9170ms]\n", - "Epoch: [ 9/ 10], step: [ 325/ 390], loss: [0.1572], avg loss: [0.2799], time: [102.2940ms]\n", - "Epoch: [ 9/ 10], step: [ 326/ 390], loss: [0.2302], avg loss: [0.2797], time: [100.7555ms]\n", - "Epoch: [ 9/ 10], step: [ 327/ 390], loss: [0.3327], avg loss: [0.2799], time: [103.6375ms]\n", - "Epoch: [ 9/ 10], step: [ 328/ 390], loss: [0.2224], avg loss: [0.2797], time: [102.1852ms]\n", - "Epoch: [ 9/ 10], step: [ 329/ 390], loss: [0.1517], avg loss: [0.2793], time: [102.0155ms]\n", - "Epoch: [ 9/ 10], step: [ 330/ 390], loss: [0.3094], avg loss: [0.2794], time: [103.5008ms]\n", - "Epoch: [ 9/ 10], step: [ 331/ 390], loss: [0.3399], avg loss: [0.2796], time: [103.2772ms]\n", - "Epoch: [ 9/ 10], step: [ 332/ 390], loss: [0.3457], avg loss: [0.2798], time: [101.5022ms]\n", - "Epoch: [ 9/ 10], step: [ 333/ 390], loss: [0.4346], avg loss: [0.2802], time: [102.2038ms]\n", - "Epoch: [ 9/ 10], step: [ 334/ 390], loss: [0.3131], avg loss: [0.2803], time: [105.9587ms]\n", - "Epoch: [ 9/ 10], step: [ 335/ 390], loss: [0.2407], avg loss: [0.2802], time: [101.8615ms]\n" + "epoch: 9 step: 247, loss is 0.3666\n", + "epoch: 9 step: 248, loss is 0.2445\n", + "epoch: 9 step: 249, loss is 0.2603\n", + "epoch: 9 step: 250, loss is 0.2571\n", + "epoch: 9 step: 251, loss is 0.4252\n", + "epoch: 9 step: 252, loss is 0.3173\n", + "epoch: 9 step: 253, loss is 0.2151\n", + "epoch: 9 step: 254, loss is 0.3287\n", + "epoch: 9 step: 255, loss is 0.2224\n", + "epoch: 9 step: 256, loss is 0.2287\n", + "epoch: 9 step: 257, loss is 0.2828\n", + "epoch: 9 step: 258, loss is 0.4278\n", + "epoch: 9 step: 259, loss is 0.2781\n", + "epoch: 9 step: 260, loss is 0.2918\n", + "epoch: 9 step: 261, loss is 0.2349\n", + "epoch: 9 step: 262, loss is 0.3005\n", + "epoch: 9 step: 263, loss is 0.2941\n", + "epoch: 9 step: 264, loss is 0.2351\n", + "epoch: 9 step: 265, loss is 0.3136\n", + "epoch: 9 step: 266, loss is 0.3938\n", + "epoch: 9 step: 267, loss is 0.1917\n", + "epoch: 9 step: 268, loss is 0.2223\n", + "epoch: 9 step: 269, loss is 0.1965\n", + "epoch: 9 step: 270, loss is 0.2173\n", + "epoch: 9 step: 271, loss is 0.3242\n", + "epoch: 9 step: 272, loss is 0.2942\n", + "epoch: 9 step: 273, loss is 0.3043\n", + "epoch: 9 step: 274, loss is 0.5046\n", + "epoch: 9 step: 275, loss is 0.2275\n", + "epoch: 9 step: 276, loss is 0.2391\n", + "epoch: 9 step: 277, loss is 0.2364\n", + "epoch: 9 step: 278, loss is 0.2180\n", + "epoch: 9 step: 279, loss is 0.2443\n", + "epoch: 9 step: 280, loss is 0.3269\n", + "epoch: 9 step: 281, loss is 0.2290\n", + "epoch: 9 step: 282, loss is 0.2864\n", + "epoch: 9 step: 283, loss is 0.5133\n", + "epoch: 9 step: 284, loss is 0.3965\n", + "epoch: 9 step: 285, loss is 0.2694\n", + "epoch: 9 step: 286, loss is 0.2299\n", + "epoch: 9 step: 287, loss is 0.2477\n", + "epoch: 9 step: 288, loss is 0.2881\n", + "epoch: 9 step: 289, loss is 0.2389\n", + "epoch: 9 step: 290, loss is 0.1957\n", + "epoch: 9 step: 291, loss is 0.4758\n", + "epoch: 9 step: 292, loss is 0.2147\n", + "epoch: 9 step: 293, loss is 0.1834\n", + "epoch: 9 step: 294, loss is 0.3235\n", + "epoch: 9 step: 295, loss is 0.2626\n", + "epoch: 9 step: 296, loss is 0.2007\n", + "epoch: 9 step: 297, loss is 0.3185\n", + "epoch: 9 step: 298, loss is 0.2742\n", + "epoch: 9 step: 299, loss is 0.3474\n", + "epoch: 9 step: 300, loss is 0.4156\n", + "epoch: 9 step: 301, loss is 0.3393\n", + "epoch: 9 step: 302, loss is 0.2162\n", + "epoch: 9 step: 303, loss is 0.3120\n", + "epoch: 9 step: 304, loss is 0.3075\n", + "epoch: 9 step: 305, loss is 0.2437\n", + "epoch: 9 step: 306, loss is 0.1778\n", + "epoch: 9 step: 307, loss is 0.3741\n", + "epoch: 9 step: 308, loss is 0.2621\n", + "epoch: 9 step: 309, loss is 0.2012\n", + "epoch: 9 step: 310, loss is 0.2965\n", + "epoch: 9 step: 311, loss is 0.2786\n", + "epoch: 9 step: 312, loss is 0.3387\n", + "epoch: 9 step: 313, loss is 0.1744\n", + "epoch: 9 step: 314, loss is 0.1716\n", + "epoch: 9 step: 315, loss is 0.2732\n", + "epoch: 9 step: 316, loss is 0.2169\n", + "epoch: 9 step: 317, loss is 0.2133\n", + "epoch: 9 step: 318, loss is 0.2757\n", + "epoch: 9 step: 319, loss is 0.2565\n", + "epoch: 9 step: 320, loss is 0.3456\n", + "epoch: 9 step: 321, loss is 0.1643\n", + "epoch: 9 step: 322, loss is 0.2130\n", + "epoch: 9 step: 323, loss is 0.2580\n", + "epoch: 9 step: 324, loss is 0.4480\n", + "epoch: 9 step: 325, loss is 0.1572\n", + "epoch: 9 step: 326, loss is 0.2302\n", + "epoch: 9 step: 327, loss is 0.3327\n", + "epoch: 9 step: 328, loss is 0.2224\n", + "epoch: 9 step: 329, loss is 0.1517\n", + "epoch: 9 step: 330, loss is 0.3094\n", + "epoch: 9 step: 331, loss is 0.3399\n", + "epoch: 9 step: 332, loss is 0.3457\n", + "epoch: 9 step: 333, loss is 0.4346\n", + "epoch: 9 step: 334, loss is 0.3131\n", + "epoch: 9 step: 335, loss is 0.2407\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 9/ 10], step: [ 336/ 390], loss: [0.2749], avg loss: [0.2802], time: [101.8798ms]\n", - "Epoch: [ 9/ 10], step: [ 337/ 390], loss: [0.1938], avg loss: [0.2800], time: [100.8859ms]\n", - "Epoch: [ 9/ 10], step: [ 338/ 390], loss: [0.2136], avg loss: [0.2798], time: [101.9592ms]\n", - "Epoch: [ 9/ 10], step: [ 339/ 390], loss: [0.1703], avg loss: [0.2794], time: [98.4647ms]\n", - "Epoch: [ 9/ 10], step: [ 340/ 390], loss: [0.1344], avg loss: [0.2790], time: [100.3985ms]\n", - "Epoch: [ 9/ 10], step: [ 341/ 390], loss: [0.2446], avg loss: [0.2789], time: [100.2448ms]\n", - "Epoch: [ 9/ 10], step: [ 342/ 390], loss: [0.2180], avg loss: [0.2787], time: [103.6801ms]\n", - "Epoch: [ 9/ 10], step: [ 343/ 390], loss: [0.3273], avg loss: [0.2789], time: [101.4183ms]\n", - "Epoch: [ 9/ 10], step: [ 344/ 390], loss: [0.3550], avg loss: [0.2791], time: [106.7882ms]\n", - "Epoch: [ 9/ 10], step: [ 345/ 390], loss: [0.2465], avg loss: [0.2790], time: [104.7385ms]\n", - "Epoch: [ 9/ 10], step: [ 346/ 390], loss: [0.2084], avg loss: [0.2788], time: [101.6912ms]\n", - "Epoch: [ 9/ 10], step: [ 347/ 390], loss: [0.3962], avg loss: [0.2791], time: [104.4145ms]\n", - "Epoch: [ 9/ 10], step: [ 348/ 390], loss: [0.2505], avg loss: [0.2790], time: [102.1519ms]\n", - "Epoch: [ 9/ 10], step: [ 349/ 390], loss: [0.2329], avg loss: [0.2789], time: [103.4329ms]\n", - "Epoch: [ 9/ 10], step: [ 350/ 390], loss: [0.3404], avg loss: [0.2791], time: [102.0799ms]\n", - "Epoch: [ 9/ 10], step: [ 351/ 390], loss: [0.3228], avg loss: [0.2792], time: [105.7591ms]\n", - "Epoch: [ 9/ 10], step: [ 352/ 390], loss: [0.2663], avg loss: [0.2792], time: [100.6315ms]\n", - "Epoch: [ 9/ 10], step: [ 353/ 390], loss: [0.2314], avg loss: [0.2790], time: [103.6012ms]\n", - "Epoch: [ 9/ 10], step: [ 354/ 390], loss: [0.4019], avg loss: [0.2794], time: [101.9282ms]\n", - "Epoch: [ 9/ 10], step: [ 355/ 390], loss: [0.2190], avg loss: [0.2792], time: [101.0091ms]\n", - "Epoch: [ 9/ 10], step: [ 356/ 390], loss: [0.2142], avg loss: [0.2790], time: [103.3542ms]\n", - "Epoch: [ 9/ 10], step: [ 357/ 390], loss: [0.2802], avg loss: [0.2790], time: [102.8593ms]\n", - "Epoch: [ 9/ 10], step: [ 358/ 390], loss: [0.2102], avg loss: [0.2789], time: [105.3081ms]\n", - "Epoch: [ 9/ 10], step: [ 359/ 390], loss: [0.1795], avg loss: [0.2786], time: [102.2232ms]\n", - "Epoch: [ 9/ 10], step: [ 360/ 390], loss: [0.2005], avg loss: [0.2784], time: [106.1468ms]\n", - "Epoch: [ 9/ 10], step: [ 361/ 390], loss: [0.2372], avg loss: [0.2782], time: [101.1386ms]\n", - "Epoch: [ 9/ 10], step: [ 362/ 390], loss: [0.1931], avg loss: [0.2780], time: [101.7928ms]\n", - "Epoch: [ 9/ 10], step: [ 363/ 390], loss: [0.3196], avg loss: [0.2781], time: [104.0721ms]\n", - "Epoch: [ 9/ 10], step: [ 364/ 390], loss: [0.2563], avg loss: [0.2781], time: [101.5668ms]\n", - "Epoch: [ 9/ 10], step: [ 365/ 390], loss: [0.2488], avg loss: [0.2780], time: [102.6225ms]\n", - "Epoch: [ 9/ 10], step: [ 366/ 390], loss: [0.2499], avg loss: [0.2779], time: [102.6690ms]\n", - "Epoch: [ 9/ 10], step: [ 367/ 390], loss: [0.1904], avg loss: [0.2777], time: [105.2618ms]\n", - "Epoch: [ 9/ 10], step: [ 368/ 390], loss: [0.2042], avg loss: [0.2775], time: [102.2058ms]\n", - "Epoch: [ 9/ 10], step: [ 369/ 390], loss: [0.3357], avg loss: [0.2776], time: [100.1673ms]\n", - "Epoch: [ 9/ 10], step: [ 370/ 390], loss: [0.3050], avg loss: [0.2777], time: [103.9114ms]\n", - "Epoch: [ 9/ 10], step: [ 371/ 390], loss: [0.3618], avg loss: [0.2779], time: [99.7210ms]\n", - "Epoch: [ 9/ 10], step: [ 372/ 390], loss: [0.2830], avg loss: [0.2779], time: [101.2034ms]\n", - "Epoch: [ 9/ 10], step: [ 373/ 390], loss: [0.3102], avg loss: [0.2780], time: [101.0656ms]\n", - "Epoch: [ 9/ 10], step: [ 374/ 390], loss: [0.1494], avg loss: [0.2777], time: [101.4268ms]\n", - "Epoch: [ 9/ 10], step: [ 375/ 390], loss: [0.3108], avg loss: [0.2778], time: [105.6063ms]\n", - "Epoch: [ 9/ 10], step: [ 376/ 390], loss: [0.2621], avg loss: [0.2777], time: [105.5107ms]\n", - "Epoch: [ 9/ 10], step: [ 377/ 390], loss: [0.3015], avg loss: [0.2778], time: [103.9784ms]\n", - "Epoch: [ 9/ 10], step: [ 378/ 390], loss: [0.3440], avg loss: [0.2780], time: [103.6081ms]\n", - "Epoch: [ 9/ 10], step: [ 379/ 390], loss: [0.2310], avg loss: [0.2778], time: [101.1114ms]\n", - "Epoch: [ 9/ 10], step: [ 380/ 390], loss: [0.4890], avg loss: [0.2784], time: [103.5185ms]\n", - "Epoch: [ 9/ 10], step: [ 381/ 390], loss: [0.3627], avg loss: [0.2786], time: [102.2093ms]\n", - "Epoch: [ 9/ 10], step: [ 382/ 390], loss: [0.2582], avg loss: [0.2786], time: [101.8181ms]\n", - "Epoch: [ 9/ 10], step: [ 383/ 390], loss: [0.3308], avg loss: [0.2787], time: [102.9022ms]\n", - "Epoch: [ 9/ 10], step: [ 384/ 390], loss: [0.2705], avg loss: [0.2787], time: [101.2967ms]\n", - "Epoch: [ 9/ 10], step: [ 385/ 390], loss: [0.2209], avg loss: [0.2785], time: [103.0319ms]\n", - "Epoch: [ 9/ 10], step: [ 386/ 390], loss: [0.3860], avg loss: [0.2788], time: [101.7649ms]\n", - "Epoch: [ 9/ 10], step: [ 387/ 390], loss: [0.3459], avg loss: [0.2790], time: [104.6145ms]\n", - "Epoch: [ 9/ 10], step: [ 388/ 390], loss: [0.1994], avg loss: [0.2788], time: [101.5496ms]\n", - "Epoch: [ 9/ 10], step: [ 389/ 390], loss: [0.2605], avg loss: [0.2787], time: [101.8271ms]\n", - "Epoch: [ 9/ 10], step: [ 390/ 390], loss: [0.3933], avg loss: [0.2790], time: [828.5053ms]\n", - "Epoch time: 41044.284, per step time: 105.242\n", + "epoch: 9 step: 336, loss is 0.2749\n", + "epoch: 9 step: 337, loss is 0.1938\n", + "epoch: 9 step: 338, loss is 0.2136\n", + "epoch: 9 step: 339, loss is 0.1703\n", + "epoch: 9 step: 340, loss is 0.1344\n", + "epoch: 9 step: 341, loss is 0.2446\n", + "epoch: 9 step: 342, loss is 0.2180\n", + "epoch: 9 step: 343, loss is 0.3273\n", + "epoch: 9 step: 344, loss is 0.3550\n", + "epoch: 9 step: 345, loss is 0.2465\n", + "epoch: 9 step: 346, loss is 0.2084\n", + "epoch: 9 step: 347, loss is 0.3962\n", + "epoch: 9 step: 348, loss is 0.2505\n", + "epoch: 9 step: 349, loss is 0.2329\n", + "epoch: 9 step: 350, loss is 0.3404\n", + "epoch: 9 step: 351, loss is 0.3228\n", + "epoch: 9 step: 352, loss is 0.2663\n", + "epoch: 9 step: 353, loss is 0.2314\n", + "epoch: 9 step: 354, loss is 0.4019\n", + "epoch: 9 step: 355, loss is 0.2190\n", + "epoch: 9 step: 356, loss is 0.2142\n", + "epoch: 9 step: 357, loss is 0.2802\n", + "epoch: 9 step: 358, loss is 0.2102\n", + "epoch: 9 step: 359, loss is 0.1795\n", + "epoch: 9 step: 360, loss is 0.2005\n", + "epoch: 9 step: 361, loss is 0.2372\n", + "epoch: 9 step: 362, loss is 0.1931\n", + "epoch: 9 step: 363, loss is 0.3196\n", + "epoch: 9 step: 364, loss is 0.2563\n", + "epoch: 9 step: 365, loss is 0.2488\n", + "epoch: 9 step: 366, loss is 0.2499\n", + "epoch: 9 step: 367, loss is 0.1904\n", + "epoch: 9 step: 368, loss is 0.2042\n", + "epoch: 9 step: 369, loss is 0.3357\n", + "epoch: 9 step: 370, loss is 0.3050\n", + "epoch: 9 step: 371, loss is 0.3618\n", + "epoch: 9 step: 372, loss is 0.2830\n", + "epoch: 9 step: 373, loss is 0.3102\n", + "epoch: 9 step: 374, loss is 0.1494\n", + "epoch: 9 step: 375, loss is 0.3108\n", + "epoch: 9 step: 376, loss is 0.2621\n", + "epoch: 9 step: 377, loss is 0.3015\n", + "epoch: 9 step: 378, loss is 0.3440\n", + "epoch: 9 step: 379, loss is 0.2310\n", + "epoch: 9 step: 380, loss is 0.4890\n", + "epoch: 9 step: 381, loss is 0.3627\n", + "epoch: 9 step: 382, loss is 0.2582\n", + "epoch: 9 step: 383, loss is 0.3308\n", + "epoch: 9 step: 384, loss is 0.2705\n", + "epoch: 9 step: 385, loss is 0.2209\n", + "epoch: 9 step: 386, loss is 0.3860\n", + "epoch: 9 step: 387, loss is 0.3459\n", + "epoch: 9 step: 388, loss is 0.1994\n", + "epoch: 9 step: 389, loss is 0.2605\n", + "epoch: 9 step: 390, loss is 0.3933\n", "Epoch time: 41044.824, per step time: 105.243, avg loss: 0.279\n", "************************************************************\n", - "Epoch: [ 10/ 10], step: [ 1/ 390], loss: [0.2735], avg loss: [0.2735], time: [107.2650ms]\n", - "Epoch: [ 10/ 10], step: [ 2/ 390], loss: [0.2958], avg loss: [0.2847], time: [109.0686ms]\n", - "Epoch: [ 10/ 10], step: [ 3/ 390], loss: [0.3449], avg loss: [0.3047], time: [104.8942ms]\n", - "Epoch: [ 10/ 10], step: [ 4/ 390], loss: [0.2454], avg loss: [0.2899], time: [109.5195ms]\n", - "Epoch: [ 10/ 10], step: [ 5/ 390], loss: [0.2612], avg loss: [0.2841], time: [105.2437ms]\n", - "Epoch: [ 10/ 10], step: [ 6/ 390], loss: [0.1682], avg loss: [0.2648], time: [108.9017ms]\n", - "Epoch: [ 10/ 10], step: [ 7/ 390], loss: [0.3401], avg loss: [0.2756], time: [109.3621ms]\n", - "Epoch: [ 10/ 10], step: [ 8/ 390], loss: [0.2339], avg loss: [0.2704], time: [106.6265ms]\n", - "Epoch: [ 10/ 10], step: [ 9/ 390], loss: [0.1695], avg loss: [0.2591], time: [110.9552ms]\n", - "Epoch: [ 10/ 10], step: [ 10/ 390], loss: [0.2723], avg loss: [0.2605], time: [109.6358ms]\n", - "Epoch: [ 10/ 10], step: [ 11/ 390], loss: [0.1482], avg loss: [0.2503], time: [105.4866ms]\n", - "Epoch: [ 10/ 10], step: [ 12/ 390], loss: [0.4558], avg loss: [0.2674], time: [112.1240ms]\n", - "Epoch: [ 10/ 10], step: [ 13/ 390], loss: [0.2686], avg loss: [0.2675], time: [105.1400ms]\n", - "Epoch: [ 10/ 10], step: [ 14/ 390], loss: [0.2011], avg loss: [0.2627], time: [105.2475ms]\n", - "Epoch: [ 10/ 10], step: [ 15/ 390], loss: [0.2906], avg loss: [0.2646], time: [112.1695ms]\n", - "Epoch: [ 10/ 10], step: [ 16/ 390], loss: [0.2876], avg loss: [0.2660], time: [108.7074ms]\n", - "Epoch: [ 10/ 10], step: [ 17/ 390], loss: [0.1365], avg loss: [0.2584], time: [106.4265ms]\n", - "Epoch: [ 10/ 10], step: [ 18/ 390], loss: [0.1849], avg loss: [0.2543], time: [105.9957ms]\n", - "Epoch: [ 10/ 10], step: [ 19/ 390], loss: [0.2352], avg loss: [0.2533], time: [109.8456ms]\n", - "Epoch: [ 10/ 10], step: [ 20/ 390], loss: [0.3400], avg loss: [0.2577], time: [110.5843ms]\n", - "Epoch: [ 10/ 10], step: [ 21/ 390], loss: [0.2153], avg loss: [0.2556], time: [106.0696ms]\n", - "Epoch: [ 10/ 10], step: [ 22/ 390], loss: [0.3523], avg loss: [0.2600], time: [109.1082ms]\n", - "Epoch: [ 10/ 10], step: [ 23/ 390], loss: [0.2171], avg loss: [0.2582], time: [105.2434ms]\n", - "Epoch: [ 10/ 10], step: [ 24/ 390], loss: [0.1697], avg loss: [0.2545], time: [108.7265ms]\n", - "Epoch: [ 10/ 10], step: [ 25/ 390], loss: [0.2121], avg loss: [0.2528], time: [110.8615ms]\n", - "Epoch: [ 10/ 10], step: [ 26/ 390], loss: [0.2590], avg loss: [0.2530], time: [106.7364ms]\n", - "Epoch: [ 10/ 10], step: [ 27/ 390], loss: [0.1709], avg loss: [0.2500], time: [105.6533ms]\n", - "Epoch: [ 10/ 10], step: [ 28/ 390], loss: [0.2462], avg loss: [0.2499], time: [107.2991ms]\n", - "Epoch: [ 10/ 10], step: [ 29/ 390], loss: [0.2153], avg loss: [0.2487], time: [107.5842ms]\n", - "Epoch: [ 10/ 10], step: [ 30/ 390], loss: [0.2079], avg loss: [0.2473], time: [112.8020ms]\n", - "Epoch: [ 10/ 10], step: [ 31/ 390], loss: [0.3354], avg loss: [0.2501], time: [105.5183ms]\n", - "Epoch: [ 10/ 10], step: [ 32/ 390], loss: [0.2214], avg loss: [0.2492], time: [107.1084ms]\n" + "epoch: 10 step: 1, loss is 0.2735\n", + "epoch: 10 step: 2, loss is 0.2958\n", + "epoch: 10 step: 3, loss is 0.3449\n", + "epoch: 10 step: 4, loss is 0.2454\n", + "epoch: 10 step: 5, loss is 0.2612\n", + "epoch: 10 step: 6, loss is 0.1682\n", + "epoch: 10 step: 7, loss is 0.3401\n", + "epoch: 10 step: 8, loss is 0.2339\n", + "epoch: 10 step: 9, loss is 0.1695\n", + "epoch: 10 step: 10, loss is 0.2723\n", + "epoch: 10 step: 11, loss is 0.1482\n", + "epoch: 10 step: 12, loss is 0.4558\n", + "epoch: 10 step: 13, loss is 0.2686\n", + "epoch: 10 step: 14, loss is 0.2011\n", + "epoch: 10 step: 15, loss is 0.2906\n", + "epoch: 10 step: 16, loss is 0.2876\n", + "epoch: 10 step: 17, loss is 0.1365\n", + "epoch: 10 step: 18, loss is 0.1849\n", + "epoch: 10 step: 19, loss is 0.2352\n", + "epoch: 10 step: 20, loss is 0.3400\n", + "epoch: 10 step: 21, loss is 0.2153\n", + "epoch: 10 step: 22, loss is 0.3523\n", + "epoch: 10 step: 23, loss is 0.2171\n", + "epoch: 10 step: 24, loss is 0.1697\n", + "epoch: 10 step: 25, loss is 0.2121\n", + "epoch: 10 step: 26, loss is 0.2590\n", + "epoch: 10 step: 27, loss is 0.1709\n", + "epoch: 10 step: 28, loss is 0.2462\n", + "epoch: 10 step: 29, loss is 0.2153\n", + "epoch: 10 step: 30, loss is 0.2079\n", + "epoch: 10 step: 31, loss is 0.3354\n", + "epoch: 10 step: 32, loss is 0.2214\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 10/ 10], step: [ 33/ 390], loss: [0.2019], avg loss: [0.2478], time: [108.4888ms]\n", - "Epoch: [ 10/ 10], step: [ 34/ 390], loss: [0.2363], avg loss: [0.2475], time: [109.5145ms]\n", - "Epoch: [ 10/ 10], step: [ 35/ 390], loss: [0.1242], avg loss: [0.2440], time: [105.1886ms]\n", - "Epoch: [ 10/ 10], step: [ 36/ 390], loss: [0.1880], avg loss: [0.2424], time: [104.9492ms]\n", - "Epoch: [ 10/ 10], step: [ 37/ 390], loss: [0.2874], avg loss: [0.2436], time: [111.3589ms]\n", - "Epoch: [ 10/ 10], step: [ 38/ 390], loss: [0.1517], avg loss: [0.2412], time: [107.8691ms]\n", - "Epoch: [ 10/ 10], step: [ 39/ 390], loss: [0.2969], avg loss: [0.2426], time: [107.7905ms]\n", - "Epoch: [ 10/ 10], step: [ 40/ 390], loss: [0.2387], avg loss: [0.2425], time: [108.2902ms]\n", - "Epoch: [ 10/ 10], step: [ 41/ 390], loss: [0.1753], avg loss: [0.2409], time: [109.0782ms]\n", - "Epoch: [ 10/ 10], step: [ 42/ 390], loss: [0.1604], avg loss: [0.2390], time: [111.8894ms]\n", - "Epoch: [ 10/ 10], step: [ 43/ 390], loss: [0.2058], avg loss: [0.2382], time: [111.7148ms]\n", - "Epoch: [ 10/ 10], step: [ 44/ 390], loss: [0.1899], avg loss: [0.2371], time: [106.8997ms]\n", - "Epoch: [ 10/ 10], step: [ 45/ 390], loss: [0.1511], avg loss: [0.2352], time: [106.4148ms]\n", - "Epoch: [ 10/ 10], step: [ 46/ 390], loss: [0.2173], avg loss: [0.2348], time: [108.7077ms]\n", - "Epoch: [ 10/ 10], step: [ 47/ 390], loss: [0.1632], avg loss: [0.2333], time: [110.7609ms]\n", - "Epoch: [ 10/ 10], step: [ 48/ 390], loss: [0.3122], avg loss: [0.2349], time: [108.4449ms]\n", - "Epoch: [ 10/ 10], step: [ 49/ 390], loss: [0.3052], avg loss: [0.2364], time: [106.8697ms]\n", - "Epoch: [ 10/ 10], step: [ 50/ 390], loss: [0.3136], avg loss: [0.2379], time: [108.3434ms]\n", - "Epoch: [ 10/ 10], step: [ 51/ 390], loss: [0.3212], avg loss: [0.2395], time: [108.5846ms]\n", - "Epoch: [ 10/ 10], step: [ 52/ 390], loss: [0.3128], avg loss: [0.2409], time: [106.3209ms]\n", - "Epoch: [ 10/ 10], step: [ 53/ 390], loss: [0.2322], avg loss: [0.2408], time: [105.8729ms]\n", - "Epoch: [ 10/ 10], step: [ 54/ 390], loss: [0.1590], avg loss: [0.2393], time: [106.7362ms]\n", - "Epoch: [ 10/ 10], step: [ 55/ 390], loss: [0.2994], avg loss: [0.2404], time: [105.9508ms]\n", - "Epoch: [ 10/ 10], step: [ 56/ 390], loss: [0.1690], avg loss: [0.2391], time: [108.0444ms]\n", - "Epoch: [ 10/ 10], step: [ 57/ 390], loss: [0.2279], avg loss: [0.2389], time: [106.2686ms]\n", - "Epoch: [ 10/ 10], step: [ 58/ 390], loss: [0.2540], avg loss: [0.2392], time: [105.5427ms]\n", - "Epoch: [ 10/ 10], step: [ 59/ 390], loss: [0.3558], avg loss: [0.2411], time: [105.2840ms]\n", - "Epoch: [ 10/ 10], step: [ 60/ 390], loss: [0.2341], avg loss: [0.2410], time: [105.8826ms]\n", - "Epoch: [ 10/ 10], step: [ 61/ 390], loss: [0.2298], avg loss: [0.2408], time: [106.4563ms]\n", - "Epoch: [ 10/ 10], step: [ 62/ 390], loss: [0.3778], avg loss: [0.2430], time: [111.9242ms]\n", - "Epoch: [ 10/ 10], step: [ 63/ 390], loss: [0.3423], avg loss: [0.2446], time: [111.2790ms]\n", - "Epoch: [ 10/ 10], step: [ 64/ 390], loss: [0.3083], avg loss: [0.2456], time: [112.4961ms]\n", - "Epoch: [ 10/ 10], step: [ 65/ 390], loss: [0.2735], avg loss: [0.2460], time: [106.2584ms]\n", - "Epoch: [ 10/ 10], step: [ 66/ 390], loss: [0.2864], avg loss: [0.2466], time: [111.2642ms]\n", - "Epoch: [ 10/ 10], step: [ 67/ 390], loss: [0.1541], avg loss: [0.2453], time: [107.1799ms]\n", - "Epoch: [ 10/ 10], step: [ 68/ 390], loss: [0.2601], avg loss: [0.2455], time: [110.7268ms]\n", - "Epoch: [ 10/ 10], step: [ 69/ 390], loss: [0.1962], avg loss: [0.2448], time: [109.5340ms]\n", - "Epoch: [ 10/ 10], step: [ 70/ 390], loss: [0.3097], avg loss: [0.2457], time: [109.6215ms]\n", - "Epoch: [ 10/ 10], step: [ 71/ 390], loss: [0.3367], avg loss: [0.2470], time: [109.2317ms]\n", - "Epoch: [ 10/ 10], step: [ 72/ 390], loss: [0.1662], avg loss: [0.2459], time: [112.4499ms]\n", - "Epoch: [ 10/ 10], step: [ 73/ 390], loss: [0.2811], avg loss: [0.2463], time: [110.3280ms]\n", - "Epoch: [ 10/ 10], step: [ 74/ 390], loss: [0.2674], avg loss: [0.2466], time: [112.1042ms]\n", - "Epoch: [ 10/ 10], step: [ 75/ 390], loss: [0.1980], avg loss: [0.2460], time: [108.3951ms]\n", - "Epoch: [ 10/ 10], step: [ 76/ 390], loss: [0.3432], avg loss: [0.2473], time: [109.1630ms]\n", - "Epoch: [ 10/ 10], step: [ 77/ 390], loss: [0.2632], avg loss: [0.2475], time: [108.6371ms]\n", - "Epoch: [ 10/ 10], step: [ 78/ 390], loss: [0.3397], avg loss: [0.2486], time: [107.8193ms]\n", - "Epoch: [ 10/ 10], step: [ 79/ 390], loss: [0.2095], avg loss: [0.2482], time: [105.7773ms]\n", - "Epoch: [ 10/ 10], step: [ 80/ 390], loss: [0.2881], avg loss: [0.2487], time: [105.9463ms]\n", - "Epoch: [ 10/ 10], step: [ 81/ 390], loss: [0.2335], avg loss: [0.2485], time: [106.2307ms]\n", - "Epoch: [ 10/ 10], step: [ 82/ 390], loss: [0.2270], avg loss: [0.2482], time: [111.5823ms]\n", - "Epoch: [ 10/ 10], step: [ 83/ 390], loss: [0.2386], avg loss: [0.2481], time: [110.8563ms]\n", - "Epoch: [ 10/ 10], step: [ 84/ 390], loss: [0.3727], avg loss: [0.2496], time: [109.3268ms]\n", - "Epoch: [ 10/ 10], step: [ 85/ 390], loss: [0.2267], avg loss: [0.2493], time: [106.5936ms]\n", - "Epoch: [ 10/ 10], step: [ 86/ 390], loss: [0.3805], avg loss: [0.2508], time: [108.8834ms]\n", - "Epoch: [ 10/ 10], step: [ 87/ 390], loss: [0.2122], avg loss: [0.2504], time: [105.8927ms]\n", - "Epoch: [ 10/ 10], step: [ 88/ 390], loss: [0.2837], avg loss: [0.2508], time: [105.3069ms]\n", - "Epoch: [ 10/ 10], step: [ 89/ 390], loss: [0.2378], avg loss: [0.2506], time: [108.7477ms]\n", - "Epoch: [ 10/ 10], step: [ 90/ 390], loss: [0.2685], avg loss: [0.2508], time: [108.6853ms]\n", - "Epoch: [ 10/ 10], step: [ 91/ 390], loss: [0.2153], avg loss: [0.2504], time: [104.4049ms]\n", - "Epoch: [ 10/ 10], step: [ 92/ 390], loss: [0.1842], avg loss: [0.2497], time: [107.8780ms]\n", - "Epoch: [ 10/ 10], step: [ 93/ 390], loss: [0.2125], avg loss: [0.2493], time: [106.1418ms]\n", - "Epoch: [ 10/ 10], step: [ 94/ 390], loss: [0.2021], avg loss: [0.2488], time: [111.5768ms]\n", - "Epoch: [ 10/ 10], step: [ 95/ 390], loss: [0.3926], avg loss: [0.2503], time: [109.9434ms]\n", - "Epoch: [ 10/ 10], step: [ 96/ 390], loss: [0.1395], avg loss: [0.2492], time: [111.1112ms]\n", - "Epoch: [ 10/ 10], step: [ 97/ 390], loss: [0.1523], avg loss: [0.2482], time: [110.6732ms]\n", - "Epoch: [ 10/ 10], step: [ 98/ 390], loss: [0.1938], avg loss: [0.2476], time: [107.2299ms]\n", - "Epoch: [ 10/ 10], step: [ 99/ 390], loss: [0.2635], avg loss: [0.2478], time: [110.0309ms]\n", - "Epoch: [ 10/ 10], step: [ 100/ 390], loss: [0.2527], avg loss: [0.2478], time: [107.9366ms]\n", - "Epoch: [ 10/ 10], step: [ 101/ 390], loss: [0.2829], avg loss: [0.2482], time: [110.1241ms]\n", - "Epoch: [ 10/ 10], step: [ 102/ 390], loss: [0.2359], avg loss: [0.2480], time: [112.0892ms]\n", - "Epoch: [ 10/ 10], step: [ 103/ 390], loss: [0.0937], avg loss: [0.2465], time: [106.8828ms]\n", - "Epoch: [ 10/ 10], step: [ 104/ 390], loss: [0.1952], avg loss: [0.2461], time: [111.2108ms]\n", - "Epoch: [ 10/ 10], step: [ 105/ 390], loss: [0.2002], avg loss: [0.2456], time: [105.9570ms]\n", - "Epoch: [ 10/ 10], step: [ 106/ 390], loss: [0.3605], avg loss: [0.2467], time: [108.7041ms]\n", - "Epoch: [ 10/ 10], step: [ 107/ 390], loss: [0.3041], avg loss: [0.2472], time: [106.9710ms]\n", - "Epoch: [ 10/ 10], step: [ 108/ 390], loss: [0.2202], avg loss: [0.2470], time: [110.6596ms]\n", - "Epoch: [ 10/ 10], step: [ 109/ 390], loss: [0.2284], avg loss: [0.2468], time: [105.1683ms]\n", - "Epoch: [ 10/ 10], step: [ 110/ 390], loss: [0.2802], avg loss: [0.2471], time: [110.3268ms]\n", - "Epoch: [ 10/ 10], step: [ 111/ 390], loss: [0.2795], avg loss: [0.2474], time: [106.1668ms]\n", - "Epoch: [ 10/ 10], step: [ 112/ 390], loss: [0.2365], avg loss: [0.2473], time: [106.5586ms]\n", - "Epoch: [ 10/ 10], step: [ 113/ 390], loss: [0.3807], avg loss: [0.2485], time: [110.3783ms]\n", - "Epoch: [ 10/ 10], step: [ 114/ 390], loss: [0.2560], avg loss: [0.2486], time: [109.5302ms]\n", - "Epoch: [ 10/ 10], step: [ 115/ 390], loss: [0.2673], avg loss: [0.2487], time: [106.9703ms]\n", - "Epoch: [ 10/ 10], step: [ 116/ 390], loss: [0.3012], avg loss: [0.2492], time: [105.8586ms]\n", - "Epoch: [ 10/ 10], step: [ 117/ 390], loss: [0.2159], avg loss: [0.2489], time: [110.1904ms]\n", - "Epoch: [ 10/ 10], step: [ 118/ 390], loss: [0.1535], avg loss: [0.2481], time: [107.0156ms]\n", - "Epoch: [ 10/ 10], step: [ 119/ 390], loss: [0.2864], avg loss: [0.2484], time: [105.7677ms]\n", - "Epoch: [ 10/ 10], step: [ 120/ 390], loss: [0.2596], avg loss: [0.2485], time: [110.0290ms]\n", - "Epoch: [ 10/ 10], step: [ 121/ 390], loss: [0.3524], avg loss: [0.2494], time: [107.3010ms]\n" + "epoch: 10 step: 33, loss is 0.2019\n", + "epoch: 10 step: 34, loss is 0.2363\n", + "epoch: 10 step: 35, loss is 0.1242\n", + "epoch: 10 step: 36, loss is 0.1880\n", + "epoch: 10 step: 37, loss is 0.2874\n", + "epoch: 10 step: 38, loss is 0.1517\n", + "epoch: 10 step: 39, loss is 0.2969\n", + "epoch: 10 step: 40, loss is 0.2387\n", + "epoch: 10 step: 41, loss is 0.1753\n", + "epoch: 10 step: 42, loss is 0.1604\n", + "epoch: 10 step: 43, loss is 0.2058\n", + "epoch: 10 step: 44, loss is 0.1899\n", + "epoch: 10 step: 45, loss is 0.1511\n", + "epoch: 10 step: 46, loss is 0.2173\n", + "epoch: 10 step: 47, loss is 0.1632\n", + "epoch: 10 step: 48, loss is 0.3122\n", + "epoch: 10 step: 49, loss is 0.3052\n", + "epoch: 10 step: 50, loss is 0.3136\n", + "epoch: 10 step: 51, loss is 0.3212\n", + "epoch: 10 step: 52, loss is 0.3128\n", + "epoch: 10 step: 53, loss is 0.2322\n", + "epoch: 10 step: 54, loss is 0.1590\n", + "epoch: 10 step: 55, loss is 0.2994\n", + "epoch: 10 step: 56, loss is 0.1690\n", + "epoch: 10 step: 57, loss is 0.2279\n", + "epoch: 10 step: 58, loss is 0.2540\n", + "epoch: 10 step: 59, loss is 0.3558\n", + "epoch: 10 step: 60, loss is 0.2341\n", + "epoch: 10 step: 61, loss is 0.2298\n", + "epoch: 10 step: 62, loss is 0.3778\n", + "epoch: 10 step: 63, loss is 0.3423\n", + "epoch: 10 step: 64, loss is 0.3083\n", + "epoch: 10 step: 65, loss is 0.2735\n", + "epoch: 10 step: 66, loss is 0.2864\n", + "epoch: 10 step: 67, loss is 0.1541\n", + "epoch: 10 step: 68, loss is 0.2601\n", + "epoch: 10 step: 69, loss is 0.1962\n", + "epoch: 10 step: 70, loss is 0.3097\n", + "epoch: 10 step: 71, loss is 0.3367\n", + "epoch: 10 step: 72, loss is 0.1662\n", + "epoch: 10 step: 73, loss is 0.2811\n", + "epoch: 10 step: 74, loss is 0.2674\n", + "epoch: 10 step: 75, loss is 0.1980\n", + "epoch: 10 step: 76, loss is 0.3432\n", + "epoch: 10 step: 77, loss is 0.2632\n", + "epoch: 10 step: 78, loss is 0.3397\n", + "epoch: 10 step: 79, loss is 0.2095\n", + "epoch: 10 step: 80, loss is 0.2881\n", + "epoch: 10 step: 81, loss is 0.2335\n", + "epoch: 10 step: 82, loss is 0.2270\n", + "epoch: 10 step: 83, loss is 0.2386\n", + "epoch: 10 step: 84, loss is 0.3727\n", + "epoch: 10 step: 85, loss is 0.2267\n", + "epoch: 10 step: 86, loss is 0.3805\n", + "epoch: 10 step: 87, loss is 0.2122\n", + "epoch: 10 step: 88, loss is 0.2837\n", + "epoch: 10 step: 89, loss is 0.2378\n", + "epoch: 10 step: 90, loss is 0.2685\n", + "epoch: 10 step: 91, loss is 0.2153\n", + "epoch: 10 step: 92, loss is 0.1842\n", + "epoch: 10 step: 93, loss is 0.2125\n", + "epoch: 10 step: 94, loss is 0.2021\n", + "epoch: 10 step: 95, loss is 0.3926\n", + "epoch: 10 step: 96, loss is 0.1395\n", + "epoch: 10 step: 97, loss is 0.1523\n", + "epoch: 10 step: 98, loss is 0.1938\n", + "epoch: 10 step: 99, loss is 0.2635\n", + "epoch: 10 step: 100, loss is 0.2527\n", + "epoch: 10 step: 101, loss is 0.2829\n", + "epoch: 10 step: 102, loss is 0.2359\n", + "epoch: 10 step: 103, loss is 0.0937\n", + "epoch: 10 step: 104, loss is 0.1952\n", + "epoch: 10 step: 105, loss is 0.2002\n", + "epoch: 10 step: 106, loss is 0.3605\n", + "epoch: 10 step: 107, loss is 0.3041\n", + "epoch: 10 step: 108, loss is 0.2202\n", + "epoch: 10 step: 109, loss is 0.2284\n", + "epoch: 10 step: 110, loss is 0.2802\n", + "epoch: 10 step: 111, loss is 0.2795\n", + "epoch: 10 step: 112, loss is 0.2365\n", + "epoch: 10 step: 113, loss is 0.3807\n", + "epoch: 10 step: 114, loss is 0.2560\n", + "epoch: 10 step: 115, loss is 0.2673\n", + "epoch: 10 step: 116, loss is 0.3012\n", + "epoch: 10 step: 117, loss is 0.2159\n", + "epoch: 10 step: 118, loss is 0.1535\n", + "epoch: 10 step: 119, loss is 0.2864\n", + "epoch: 10 step: 120, loss is 0.2596\n", + "epoch: 10 step: 121, loss is 0.3524\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 10/ 10], step: [ 122/ 390], loss: [0.3619], avg loss: [0.2503], time: [106.2956ms]\n", - "Epoch: [ 10/ 10], step: [ 123/ 390], loss: [0.2152], avg loss: [0.2500], time: [106.0219ms]\n", - "Epoch: [ 10/ 10], step: [ 124/ 390], loss: [0.3646], avg loss: [0.2509], time: [107.1625ms]\n", - "Epoch: [ 10/ 10], step: [ 125/ 390], loss: [0.2300], avg loss: [0.2507], time: [107.6951ms]\n", - "Epoch: [ 10/ 10], step: [ 126/ 390], loss: [0.2405], avg loss: [0.2507], time: [112.6771ms]\n", - "Epoch: [ 10/ 10], step: [ 127/ 390], loss: [0.2607], avg loss: [0.2507], time: [109.9849ms]\n", - "Epoch: [ 10/ 10], step: [ 128/ 390], loss: [0.3845], avg loss: [0.2518], time: [107.1432ms]\n", - "Epoch: [ 10/ 10], step: [ 129/ 390], loss: [0.4600], avg loss: [0.2534], time: [110.2462ms]\n", - "Epoch: [ 10/ 10], step: [ 130/ 390], loss: [0.3505], avg loss: [0.2542], time: [110.1310ms]\n", - "Epoch: [ 10/ 10], step: [ 131/ 390], loss: [0.1911], avg loss: [0.2537], time: [108.8314ms]\n", - "Epoch: [ 10/ 10], step: [ 132/ 390], loss: [0.1612], avg loss: [0.2530], time: [109.6549ms]\n", - "Epoch: [ 10/ 10], step: [ 133/ 390], loss: [0.3517], avg loss: [0.2537], time: [107.7724ms]\n", - "Epoch: [ 10/ 10], step: [ 134/ 390], loss: [0.2793], avg loss: [0.2539], time: [105.5598ms]\n", - "Epoch: [ 10/ 10], step: [ 135/ 390], loss: [0.1697], avg loss: [0.2533], time: [110.5382ms]\n", - "Epoch: [ 10/ 10], step: [ 136/ 390], loss: [0.1566], avg loss: [0.2526], time: [109.5769ms]\n", - "Epoch: [ 10/ 10], step: [ 137/ 390], loss: [0.3282], avg loss: [0.2531], time: [106.1785ms]\n", - "Epoch: [ 10/ 10], step: [ 138/ 390], loss: [0.3097], avg loss: [0.2535], time: [109.4475ms]\n", - "Epoch: [ 10/ 10], step: [ 139/ 390], loss: [0.2631], avg loss: [0.2536], time: [104.8338ms]\n", - "Epoch: [ 10/ 10], step: [ 140/ 390], loss: [0.3907], avg loss: [0.2546], time: [108.2377ms]\n", - "Epoch: [ 10/ 10], step: [ 141/ 390], loss: [0.3358], avg loss: [0.2552], time: [109.1771ms]\n", - "Epoch: [ 10/ 10], step: [ 142/ 390], loss: [0.3061], avg loss: [0.2555], time: [112.1247ms]\n", - "Epoch: [ 10/ 10], step: [ 143/ 390], loss: [0.1727], avg loss: [0.2549], time: [107.6701ms]\n", - "Epoch: [ 10/ 10], step: [ 144/ 390], loss: [0.2522], avg loss: [0.2549], time: [110.6756ms]\n", - "Epoch: [ 10/ 10], step: [ 145/ 390], loss: [0.3008], avg loss: [0.2552], time: [107.2681ms]\n", - "Epoch: [ 10/ 10], step: [ 146/ 390], loss: [0.3309], avg loss: [0.2558], time: [108.1991ms]\n", - "Epoch: [ 10/ 10], step: [ 147/ 390], loss: [0.3308], avg loss: [0.2563], time: [106.8130ms]\n", - "Epoch: [ 10/ 10], step: [ 148/ 390], loss: [0.2165], avg loss: [0.2560], time: [109.6725ms]\n", - "Epoch: [ 10/ 10], step: [ 149/ 390], loss: [0.2901], avg loss: [0.2562], time: [109.9777ms]\n", - "Epoch: [ 10/ 10], step: [ 150/ 390], loss: [0.2647], avg loss: [0.2563], time: [105.8912ms]\n", - "Epoch: [ 10/ 10], step: [ 151/ 390], loss: [0.3280], avg loss: [0.2568], time: [106.5724ms]\n", - "Epoch: [ 10/ 10], step: [ 152/ 390], loss: [0.2017], avg loss: [0.2564], time: [105.4506ms]\n", - "Epoch: [ 10/ 10], step: [ 153/ 390], loss: [0.2675], avg loss: [0.2565], time: [106.7176ms]\n", - "Epoch: [ 10/ 10], step: [ 154/ 390], loss: [0.2361], avg loss: [0.2563], time: [107.7940ms]\n", - "Epoch: [ 10/ 10], step: [ 155/ 390], loss: [0.3119], avg loss: [0.2567], time: [110.4105ms]\n", - "Epoch: [ 10/ 10], step: [ 156/ 390], loss: [0.3522], avg loss: [0.2573], time: [107.7125ms]\n", - "Epoch: [ 10/ 10], step: [ 157/ 390], loss: [0.1649], avg loss: [0.2567], time: [107.8327ms]\n", - "Epoch: [ 10/ 10], step: [ 158/ 390], loss: [0.3038], avg loss: [0.2570], time: [111.7573ms]\n", - "Epoch: [ 10/ 10], step: [ 159/ 390], loss: [0.3336], avg loss: [0.2575], time: [108.8490ms]\n", - "Epoch: [ 10/ 10], step: [ 160/ 390], loss: [0.3087], avg loss: [0.2578], time: [109.7050ms]\n", - "Epoch: [ 10/ 10], step: [ 161/ 390], loss: [0.2617], avg loss: [0.2578], time: [106.5736ms]\n", - "Epoch: [ 10/ 10], step: [ 162/ 390], loss: [0.3109], avg loss: [0.2582], time: [109.8616ms]\n", - "Epoch: [ 10/ 10], step: [ 163/ 390], loss: [0.2865], avg loss: [0.2583], time: [109.3175ms]\n", - "Epoch: [ 10/ 10], step: [ 164/ 390], loss: [0.2566], avg loss: [0.2583], time: [110.2631ms]\n", - "Epoch: [ 10/ 10], step: [ 165/ 390], loss: [0.1423], avg loss: [0.2576], time: [107.8370ms]\n", - "Epoch: [ 10/ 10], step: [ 166/ 390], loss: [0.2079], avg loss: [0.2573], time: [110.5609ms]\n", - "Epoch: [ 10/ 10], step: [ 167/ 390], loss: [0.2017], avg loss: [0.2570], time: [107.8053ms]\n", - "Epoch: [ 10/ 10], step: [ 168/ 390], loss: [0.2564], avg loss: [0.2570], time: [104.9502ms]\n", - "Epoch: [ 10/ 10], step: [ 169/ 390], loss: [0.2955], avg loss: [0.2572], time: [105.9213ms]\n", - "Epoch: [ 10/ 10], step: [ 170/ 390], loss: [0.2940], avg loss: [0.2574], time: [106.9319ms]\n", - "Epoch: [ 10/ 10], step: [ 171/ 390], loss: [0.2015], avg loss: [0.2571], time: [111.7773ms]\n", - "Epoch: [ 10/ 10], step: [ 172/ 390], loss: [0.2100], avg loss: [0.2568], time: [108.8059ms]\n", - "Epoch: [ 10/ 10], step: [ 173/ 390], loss: [0.3030], avg loss: [0.2571], time: [107.4409ms]\n", - "Epoch: [ 10/ 10], step: [ 174/ 390], loss: [0.1818], avg loss: [0.2567], time: [107.5635ms]\n", - "Epoch: [ 10/ 10], step: [ 175/ 390], loss: [0.3993], avg loss: [0.2575], time: [111.4011ms]\n", - "Epoch: [ 10/ 10], step: [ 176/ 390], loss: [0.2567], avg loss: [0.2575], time: [105.8695ms]\n", - "Epoch: [ 10/ 10], step: [ 177/ 390], loss: [0.1747], avg loss: [0.2570], time: [111.1901ms]\n", - "Epoch: [ 10/ 10], step: [ 178/ 390], loss: [0.2136], avg loss: [0.2568], time: [106.9398ms]\n", - "Epoch: [ 10/ 10], step: [ 179/ 390], loss: [0.3743], avg loss: [0.2574], time: [105.7682ms]\n", - "Epoch: [ 10/ 10], step: [ 180/ 390], loss: [0.2902], avg loss: [0.2576], time: [111.6295ms]\n", - "Epoch: [ 10/ 10], step: [ 181/ 390], loss: [0.3440], avg loss: [0.2581], time: [105.8998ms]\n", - "Epoch: [ 10/ 10], step: [ 182/ 390], loss: [0.1998], avg loss: [0.2578], time: [109.8609ms]\n", - "Epoch: [ 10/ 10], step: [ 183/ 390], loss: [0.2522], avg loss: [0.2577], time: [105.9847ms]\n", - "Epoch: [ 10/ 10], step: [ 184/ 390], loss: [0.2341], avg loss: [0.2576], time: [106.9517ms]\n", - "Epoch: [ 10/ 10], step: [ 185/ 390], loss: [0.4920], avg loss: [0.2589], time: [109.2141ms]\n", - "Epoch: [ 10/ 10], step: [ 186/ 390], loss: [0.2786], avg loss: [0.2590], time: [108.4044ms]\n", - "Epoch: [ 10/ 10], step: [ 187/ 390], loss: [0.2460], avg loss: [0.2589], time: [107.6396ms]\n", - "Epoch: [ 10/ 10], step: [ 188/ 390], loss: [0.2580], avg loss: [0.2589], time: [107.5847ms]\n", - "Epoch: [ 10/ 10], step: [ 189/ 390], loss: [0.3025], avg loss: [0.2591], time: [103.6551ms]\n", - "Epoch: [ 10/ 10], step: [ 190/ 390], loss: [0.4090], avg loss: [0.2599], time: [111.1393ms]\n", - "Epoch: [ 10/ 10], step: [ 191/ 390], loss: [0.3260], avg loss: [0.2603], time: [107.5289ms]\n", - "Epoch: [ 10/ 10], step: [ 192/ 390], loss: [0.3987], avg loss: [0.2610], time: [108.2554ms]\n", - "Epoch: [ 10/ 10], step: [ 193/ 390], loss: [0.2152], avg loss: [0.2608], time: [108.1114ms]\n", - "Epoch: [ 10/ 10], step: [ 194/ 390], loss: [0.3381], avg loss: [0.2612], time: [108.8758ms]\n", - "Epoch: [ 10/ 10], step: [ 195/ 390], loss: [0.2607], avg loss: [0.2611], time: [108.9261ms]\n", - "Epoch: [ 10/ 10], step: [ 196/ 390], loss: [0.4350], avg loss: [0.2620], time: [111.8217ms]\n", - "Epoch: [ 10/ 10], step: [ 197/ 390], loss: [0.2947], avg loss: [0.2622], time: [104.9793ms]\n", - "Epoch: [ 10/ 10], step: [ 198/ 390], loss: [0.3505], avg loss: [0.2626], time: [105.8023ms]\n", - "Epoch: [ 10/ 10], step: [ 199/ 390], loss: [0.2185], avg loss: [0.2624], time: [107.3391ms]\n", - "Epoch: [ 10/ 10], step: [ 200/ 390], loss: [0.1370], avg loss: [0.2618], time: [110.1713ms]\n", - "Epoch: [ 10/ 10], step: [ 201/ 390], loss: [0.2361], avg loss: [0.2617], time: [110.3864ms]\n", - "Epoch: [ 10/ 10], step: [ 202/ 390], loss: [0.3012], avg loss: [0.2619], time: [106.5571ms]\n", - "Epoch: [ 10/ 10], step: [ 203/ 390], loss: [0.4109], avg loss: [0.2626], time: [106.7629ms]\n", - "Epoch: [ 10/ 10], step: [ 204/ 390], loss: [0.1969], avg loss: [0.2623], time: [106.7109ms]\n", - "Epoch: [ 10/ 10], step: [ 205/ 390], loss: [0.2397], avg loss: [0.2622], time: [109.5626ms]\n", - "Epoch: [ 10/ 10], step: [ 206/ 390], loss: [0.1650], avg loss: [0.2617], time: [106.1697ms]\n", - "Epoch: [ 10/ 10], step: [ 207/ 390], loss: [0.1021], avg loss: [0.2609], time: [109.8776ms]\n", - "Epoch: [ 10/ 10], step: [ 208/ 390], loss: [0.2504], avg loss: [0.2609], time: [107.1084ms]\n", - "Epoch: [ 10/ 10], step: [ 209/ 390], loss: [0.3086], avg loss: [0.2611], time: [108.1409ms]\n", - "Epoch: [ 10/ 10], step: [ 210/ 390], loss: [0.5832], avg loss: [0.2626], time: [110.9455ms]\n" + "epoch: 10 step: 122, loss is 0.3619\n", + "epoch: 10 step: 123, loss is 0.2152\n", + "epoch: 10 step: 124, loss is 0.3646\n", + "epoch: 10 step: 125, loss is 0.2300\n", + "epoch: 10 step: 126, loss is 0.2405\n", + "epoch: 10 step: 127, loss is 0.2607\n", + "epoch: 10 step: 128, loss is 0.3845\n", + "epoch: 10 step: 129, loss is 0.4600\n", + "epoch: 10 step: 130, loss is 0.3505\n", + "epoch: 10 step: 131, loss is 0.1911\n", + "epoch: 10 step: 132, loss is 0.1612\n", + "epoch: 10 step: 133, loss is 0.3517\n", + "epoch: 10 step: 134, loss is 0.2793\n", + "epoch: 10 step: 135, loss is 0.1697\n", + "epoch: 10 step: 136, loss is 0.1566\n", + "epoch: 10 step: 137, loss is 0.3282\n", + "epoch: 10 step: 138, loss is 0.3097\n", + "epoch: 10 step: 139, loss is 0.2631\n", + "epoch: 10 step: 140, loss is 0.3907\n", + "epoch: 10 step: 141, loss is 0.3358\n", + "epoch: 10 step: 142, loss is 0.3061\n", + "epoch: 10 step: 143, loss is 0.1727\n", + "epoch: 10 step: 144, loss is 0.2522\n", + "epoch: 10 step: 145, loss is 0.3008\n", + "epoch: 10 step: 146, loss is 0.3309\n", + "epoch: 10 step: 147, loss is 0.3308\n", + "epoch: 10 step: 148, loss is 0.2165\n", + "epoch: 10 step: 149, loss is 0.2901\n", + "epoch: 10 step: 150, loss is 0.2647\n", + "epoch: 10 step: 151, loss is 0.3280\n", + "epoch: 10 step: 152, loss is 0.2017\n", + "epoch: 10 step: 153, loss is 0.2675\n", + "epoch: 10 step: 154, loss is 0.2361\n", + "epoch: 10 step: 155, loss is 0.3119\n", + "epoch: 10 step: 156, loss is 0.3522\n", + "epoch: 10 step: 157, loss is 0.1649\n", + "epoch: 10 step: 158, loss is 0.3038\n", + "epoch: 10 step: 159, loss is 0.3336\n", + "epoch: 10 step: 160, loss is 0.3087\n", + "epoch: 10 step: 161, loss is 0.2617\n", + "epoch: 10 step: 162, loss is 0.3109\n", + "epoch: 10 step: 163, loss is 0.2865\n", + "epoch: 10 step: 164, loss is 0.2566\n", + "epoch: 10 step: 165, loss is 0.1423\n", + "epoch: 10 step: 166, loss is 0.2079\n", + "epoch: 10 step: 167, loss is 0.2017\n", + "epoch: 10 step: 168, loss is 0.2564\n", + "epoch: 10 step: 169, loss is 0.2955\n", + "epoch: 10 step: 170, loss is 0.2940\n", + "epoch: 10 step: 171, loss is 0.2015\n", + "epoch: 10 step: 172, loss is 0.2100\n", + "epoch: 10 step: 173, loss is 0.3030\n", + "epoch: 10 step: 174, loss is 0.1818\n", + "epoch: 10 step: 175, loss is 0.3993\n", + "epoch: 10 step: 176, loss is 0.2567\n", + "epoch: 10 step: 177, loss is 0.1747\n", + "epoch: 10 step: 178, loss is 0.2136\n", + "epoch: 10 step: 179, loss is 0.3743\n", + "epoch: 10 step: 180, loss is 0.2902\n", + "epoch: 10 step: 181, loss is 0.3440\n", + "epoch: 10 step: 182, loss is 0.1998\n", + "epoch: 10 step: 183, loss is 0.2522\n", + "epoch: 10 step: 184, loss is 0.2341\n", + "epoch: 10 step: 185, loss is 0.4920\n", + "epoch: 10 step: 186, loss is 0.2786\n", + "epoch: 10 step: 187, loss is 0.2460\n", + "epoch: 10 step: 188, loss is 0.2580\n", + "epoch: 10 step: 189, loss is 0.3025\n", + "epoch: 10 step: 190, loss is 0.4090\n", + "epoch: 10 step: 191, loss is 0.3260\n", + "epoch: 10 step: 192, loss is 0.3987\n", + "epoch: 10 step: 193, loss is 0.2152\n", + "epoch: 10 step: 194, loss is 0.3381\n", + "epoch: 10 step: 195, loss is 0.2607\n", + "epoch: 10 step: 196, loss is 0.4350\n", + "epoch: 10 step: 197, loss is 0.2947\n", + "epoch: 10 step: 198, loss is 0.3505\n", + "epoch: 10 step: 199, loss is 0.2185\n", + "epoch: 10 step: 200, loss is 0.1370\n", + "epoch: 10 step: 201, loss is 0.2361\n", + "epoch: 10 step: 202, loss is 0.3012\n", + "epoch: 10 step: 203, loss is 0.4109\n", + "epoch: 10 step: 204, loss is 0.1969\n", + "epoch: 10 step: 205, loss is 0.2397\n", + "epoch: 10 step: 206, loss is 0.1650\n", + "epoch: 10 step: 207, loss is 0.1021\n", + "epoch: 10 step: 208, loss is 0.2504\n", + "epoch: 10 step: 209, loss is 0.3086\n", + "epoch: 10 step: 210, loss is 0.5832\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 10/ 10], step: [ 211/ 390], loss: [0.3747], avg loss: [0.2632], time: [110.2545ms]\n", - "Epoch: [ 10/ 10], step: [ 212/ 390], loss: [0.1915], avg loss: [0.2628], time: [109.9415ms]\n", - "Epoch: [ 10/ 10], step: [ 213/ 390], loss: [0.2435], avg loss: [0.2627], time: [104.7785ms]\n", - "Epoch: [ 10/ 10], step: [ 214/ 390], loss: [0.1964], avg loss: [0.2624], time: [107.4312ms]\n", - "Epoch: [ 10/ 10], step: [ 215/ 390], loss: [0.1412], avg loss: [0.2619], time: [104.5918ms]\n", - "Epoch: [ 10/ 10], step: [ 216/ 390], loss: [0.3663], avg loss: [0.2623], time: [108.3212ms]\n", - "Epoch: [ 10/ 10], step: [ 217/ 390], loss: [0.2127], avg loss: [0.2621], time: [108.3050ms]\n", - "Epoch: [ 10/ 10], step: [ 218/ 390], loss: [0.3638], avg loss: [0.2626], time: [110.9850ms]\n", - "Epoch: [ 10/ 10], step: [ 219/ 390], loss: [0.2969], avg loss: [0.2627], time: [105.5558ms]\n", - "Epoch: [ 10/ 10], step: [ 220/ 390], loss: [0.2878], avg loss: [0.2629], time: [108.3949ms]\n", - "Epoch: [ 10/ 10], step: [ 221/ 390], loss: [0.3518], avg loss: [0.2633], time: [110.9416ms]\n", - "Epoch: [ 10/ 10], step: [ 222/ 390], loss: [0.2342], avg loss: [0.2631], time: [109.9663ms]\n", - "Epoch: [ 10/ 10], step: [ 223/ 390], loss: [0.2159], avg loss: [0.2629], time: [105.5830ms]\n", - "Epoch: [ 10/ 10], step: [ 224/ 390], loss: [0.3619], avg loss: [0.2634], time: [110.3156ms]\n", - "Epoch: [ 10/ 10], step: [ 225/ 390], loss: [0.2785], avg loss: [0.2634], time: [104.5990ms]\n", - "Epoch: [ 10/ 10], step: [ 226/ 390], loss: [0.2721], avg loss: [0.2635], time: [109.7393ms]\n", - "Epoch: [ 10/ 10], step: [ 227/ 390], loss: [0.2554], avg loss: [0.2634], time: [111.9573ms]\n", - "Epoch: [ 10/ 10], step: [ 228/ 390], loss: [0.3147], avg loss: [0.2637], time: [107.5308ms]\n", - "Epoch: [ 10/ 10], step: [ 229/ 390], loss: [0.2355], avg loss: [0.2635], time: [107.7449ms]\n", - "Epoch: [ 10/ 10], step: [ 230/ 390], loss: [0.2799], avg loss: [0.2636], time: [107.4722ms]\n", - "Epoch: [ 10/ 10], step: [ 231/ 390], loss: [0.3037], avg loss: [0.2638], time: [106.1707ms]\n", - "Epoch: [ 10/ 10], step: [ 232/ 390], loss: [0.3153], avg loss: [0.2640], time: [107.8808ms]\n", - "Epoch: [ 10/ 10], step: [ 233/ 390], loss: [0.2251], avg loss: [0.2638], time: [107.4030ms]\n", - "Epoch: [ 10/ 10], step: [ 234/ 390], loss: [0.3054], avg loss: [0.2640], time: [107.9900ms]\n", - "Epoch: [ 10/ 10], step: [ 235/ 390], loss: [0.2202], avg loss: [0.2638], time: [106.9703ms]\n", - "Epoch: [ 10/ 10], step: [ 236/ 390], loss: [0.3073], avg loss: [0.2640], time: [109.4832ms]\n", - "Epoch: [ 10/ 10], step: [ 237/ 390], loss: [0.2066], avg loss: [0.2638], time: [110.3923ms]\n", - "Epoch: [ 10/ 10], step: [ 238/ 390], loss: [0.1443], avg loss: [0.2633], time: [108.6230ms]\n", - "Epoch: [ 10/ 10], step: [ 239/ 390], loss: [0.2317], avg loss: [0.2631], time: [108.2540ms]\n", - "Epoch: [ 10/ 10], step: [ 240/ 390], loss: [0.3590], avg loss: [0.2635], time: [111.8877ms]\n", - "Epoch: [ 10/ 10], step: [ 241/ 390], loss: [0.2146], avg loss: [0.2633], time: [106.5567ms]\n", - "Epoch: [ 10/ 10], step: [ 242/ 390], loss: [0.3797], avg loss: [0.2638], time: [106.3190ms]\n", - "Epoch: [ 10/ 10], step: [ 243/ 390], loss: [0.2756], avg loss: [0.2639], time: [107.1041ms]\n", - "Epoch: [ 10/ 10], step: [ 244/ 390], loss: [0.1608], avg loss: [0.2634], time: [107.5895ms]\n", - "Epoch: [ 10/ 10], step: [ 245/ 390], loss: [0.2442], avg loss: [0.2634], time: [108.3300ms]\n", - "Epoch: [ 10/ 10], step: [ 246/ 390], loss: [0.2288], avg loss: [0.2632], time: [110.5840ms]\n", - "Epoch: [ 10/ 10], step: [ 247/ 390], loss: [0.2711], avg loss: [0.2632], time: [104.5589ms]\n", - "Epoch: [ 10/ 10], step: [ 248/ 390], loss: [0.0924], avg loss: [0.2626], time: [107.1560ms]\n", - "Epoch: [ 10/ 10], step: [ 249/ 390], loss: [0.3406], avg loss: [0.2629], time: [106.8752ms]\n", - "Epoch: [ 10/ 10], step: [ 250/ 390], loss: [0.2317], avg loss: [0.2627], time: [106.2210ms]\n", - "Epoch: [ 10/ 10], step: [ 251/ 390], loss: [0.2523], avg loss: [0.2627], time: [107.2834ms]\n", - "Epoch: [ 10/ 10], step: [ 252/ 390], loss: [0.2392], avg loss: [0.2626], time: [106.4541ms]\n", - "Epoch: [ 10/ 10], step: [ 253/ 390], loss: [0.2634], avg loss: [0.2626], time: [106.4119ms]\n", - "Epoch: [ 10/ 10], step: [ 254/ 390], loss: [0.3347], avg loss: [0.2629], time: [106.7481ms]\n", - "Epoch: [ 10/ 10], step: [ 255/ 390], loss: [0.2345], avg loss: [0.2628], time: [110.1220ms]\n", - "Epoch: [ 10/ 10], step: [ 256/ 390], loss: [0.3497], avg loss: [0.2631], time: [106.4658ms]\n", - "Epoch: [ 10/ 10], step: [ 257/ 390], loss: [0.2975], avg loss: [0.2633], time: [106.3850ms]\n", - "Epoch: [ 10/ 10], step: [ 258/ 390], loss: [0.2213], avg loss: [0.2631], time: [108.4590ms]\n", - "Epoch: [ 10/ 10], step: [ 259/ 390], loss: [0.2213], avg loss: [0.2629], time: [109.8859ms]\n", - "Epoch: [ 10/ 10], step: [ 260/ 390], loss: [0.3164], avg loss: [0.2631], time: [112.5925ms]\n", - "Epoch: [ 10/ 10], step: [ 261/ 390], loss: [0.2560], avg loss: [0.2631], time: [110.0361ms]\n", - "Epoch: [ 10/ 10], step: [ 262/ 390], loss: [0.1884], avg loss: [0.2628], time: [109.5445ms]\n", - "Epoch: [ 10/ 10], step: [ 263/ 390], loss: [0.3105], avg loss: [0.2630], time: [107.9669ms]\n", - "Epoch: [ 10/ 10], step: [ 264/ 390], loss: [0.2927], avg loss: [0.2631], time: [107.5475ms]\n", - "Epoch: [ 10/ 10], step: [ 265/ 390], loss: [0.2530], avg loss: [0.2631], time: [109.3583ms]\n", - "Epoch: [ 10/ 10], step: [ 266/ 390], loss: [0.3810], avg loss: [0.2635], time: [111.5427ms]\n", - "Epoch: [ 10/ 10], step: [ 267/ 390], loss: [0.2432], avg loss: [0.2635], time: [108.4878ms]\n", - "Epoch: [ 10/ 10], step: [ 268/ 390], loss: [0.3442], avg loss: [0.2638], time: [110.7740ms]\n", - "Epoch: [ 10/ 10], step: [ 269/ 390], loss: [0.2244], avg loss: [0.2636], time: [107.2319ms]\n", - "Epoch: [ 10/ 10], step: [ 270/ 390], loss: [0.3054], avg loss: [0.2638], time: [109.4458ms]\n", - "Epoch: [ 10/ 10], step: [ 271/ 390], loss: [0.2844], avg loss: [0.2638], time: [110.3549ms]\n", - "Epoch: [ 10/ 10], step: [ 272/ 390], loss: [0.3220], avg loss: [0.2641], time: [106.1864ms]\n", - "Epoch: [ 10/ 10], step: [ 273/ 390], loss: [0.2778], avg loss: [0.2641], time: [111.6083ms]\n", - "Epoch: [ 10/ 10], step: [ 274/ 390], loss: [0.2705], avg loss: [0.2641], time: [107.1002ms]\n", - "Epoch: [ 10/ 10], step: [ 275/ 390], loss: [0.1720], avg loss: [0.2638], time: [105.9129ms]\n", - "Epoch: [ 10/ 10], step: [ 276/ 390], loss: [0.1866], avg loss: [0.2635], time: [110.8553ms]\n", - "Epoch: [ 10/ 10], step: [ 277/ 390], loss: [0.3264], avg loss: [0.2637], time: [111.2182ms]\n", - "Epoch: [ 10/ 10], step: [ 278/ 390], loss: [0.3074], avg loss: [0.2639], time: [109.3917ms]\n", - "Epoch: [ 10/ 10], step: [ 279/ 390], loss: [0.1466], avg loss: [0.2635], time: [105.2890ms]\n", - "Epoch: [ 10/ 10], step: [ 280/ 390], loss: [0.1658], avg loss: [0.2631], time: [112.2177ms]\n", - "Epoch: [ 10/ 10], step: [ 281/ 390], loss: [0.2875], avg loss: [0.2632], time: [108.7754ms]\n", - "Epoch: [ 10/ 10], step: [ 282/ 390], loss: [0.2496], avg loss: [0.2632], time: [106.4527ms]\n", - "Epoch: [ 10/ 10], step: [ 283/ 390], loss: [0.2294], avg loss: [0.2630], time: [113.1241ms]\n", - "Epoch: [ 10/ 10], step: [ 284/ 390], loss: [0.2058], avg loss: [0.2628], time: [112.6666ms]\n", - "Epoch: [ 10/ 10], step: [ 285/ 390], loss: [0.2605], avg loss: [0.2628], time: [107.1360ms]\n", - "Epoch: [ 10/ 10], step: [ 286/ 390], loss: [0.3054], avg loss: [0.2630], time: [109.3924ms]\n", - "Epoch: [ 10/ 10], step: [ 287/ 390], loss: [0.2496], avg loss: [0.2629], time: [105.4373ms]\n", - "Epoch: [ 10/ 10], step: [ 288/ 390], loss: [0.1728], avg loss: [0.2626], time: [106.0085ms]\n", - "Epoch: [ 10/ 10], step: [ 289/ 390], loss: [0.3792], avg loss: [0.2630], time: [111.7752ms]\n", - "Epoch: [ 10/ 10], step: [ 290/ 390], loss: [0.1727], avg loss: [0.2627], time: [106.4239ms]\n", - "Epoch: [ 10/ 10], step: [ 291/ 390], loss: [0.2272], avg loss: [0.2626], time: [105.7825ms]\n", - "Epoch: [ 10/ 10], step: [ 292/ 390], loss: [0.2899], avg loss: [0.2627], time: [110.7328ms]\n", - "Epoch: [ 10/ 10], step: [ 293/ 390], loss: [0.3781], avg loss: [0.2631], time: [111.2952ms]\n", - "Epoch: [ 10/ 10], step: [ 294/ 390], loss: [0.2894], avg loss: [0.2632], time: [107.3008ms]\n", - "Epoch: [ 10/ 10], step: [ 295/ 390], loss: [0.2592], avg loss: [0.2632], time: [105.6979ms]\n", - "Epoch: [ 10/ 10], step: [ 296/ 390], loss: [0.2395], avg loss: [0.2631], time: [111.0971ms]\n", - "Epoch: [ 10/ 10], step: [ 297/ 390], loss: [0.2941], avg loss: [0.2632], time: [106.8094ms]\n", - "Epoch: [ 10/ 10], step: [ 298/ 390], loss: [0.2771], avg loss: [0.2632], time: [106.8492ms]\n", - "Epoch: [ 10/ 10], step: [ 299/ 390], loss: [0.2782], avg loss: [0.2633], time: [106.0860ms]\n" + "epoch: 10 step: 211, loss is 0.3747\n", + "epoch: 10 step: 212, loss is 0.1915\n", + "epoch: 10 step: 213, loss is 0.2435\n", + "epoch: 10 step: 214, loss is 0.1964\n", + "epoch: 10 step: 215, loss is 0.1412\n", + "epoch: 10 step: 216, loss is 0.3663\n", + "epoch: 10 step: 217, loss is 0.2127\n", + "epoch: 10 step: 218, loss is 0.3638\n", + "epoch: 10 step: 219, loss is 0.2969\n", + "epoch: 10 step: 220, loss is 0.2878\n", + "epoch: 10 step: 221, loss is 0.3518\n", + "epoch: 10 step: 222, loss is 0.2342\n", + "epoch: 10 step: 223, loss is 0.2159\n", + "epoch: 10 step: 224, loss is 0.3619\n", + "epoch: 10 step: 225, loss is 0.2785\n", + "epoch: 10 step: 226, loss is 0.2721\n", + "epoch: 10 step: 227, loss is 0.2554\n", + "epoch: 10 step: 228, loss is 0.3147\n", + "epoch: 10 step: 229, loss is 0.2355\n", + "epoch: 10 step: 230, loss is 0.2799\n", + "epoch: 10 step: 231, loss is 0.3037\n", + "epoch: 10 step: 232, loss is 0.3153\n", + "epoch: 10 step: 233, loss is 0.2251\n", + "epoch: 10 step: 234, loss is 0.3054\n", + "epoch: 10 step: 235, loss is 0.2202\n", + "epoch: 10 step: 236, loss is 0.3073\n", + "epoch: 10 step: 237, loss is 0.2066\n", + "epoch: 10 step: 238, loss is 0.1443\n", + "epoch: 10 step: 239, loss is 0.2317\n", + "epoch: 10 step: 240, loss is 0.3590\n", + "epoch: 10 step: 241, loss is 0.2146\n", + "epoch: 10 step: 242, loss is 0.3797\n", + "epoch: 10 step: 243, loss is 0.2756\n", + "epoch: 10 step: 244, loss is 0.1608\n", + "epoch: 10 step: 245, loss is 0.2442\n", + "epoch: 10 step: 246, loss is 0.2288\n", + "epoch: 10 step: 247, loss is 0.2711\n", + "epoch: 10 step: 248, loss is 0.0924\n", + "epoch: 10 step: 249, loss is 0.3406\n", + "epoch: 10 step: 250, loss is 0.2317\n", + "epoch: 10 step: 251, loss is 0.2523\n", + "epoch: 10 step: 252, loss is 0.2392\n", + "epoch: 10 step: 253, loss is 0.2634\n", + "epoch: 10 step: 254, loss is 0.3347\n", + "epoch: 10 step: 255, loss is 0.2345\n", + "epoch: 10 step: 256, loss is 0.3497\n", + "epoch: 10 step: 257, loss is 0.2975\n", + "epoch: 10 step: 258, loss is 0.2213\n", + "epoch: 10 step: 259, loss is 0.2213\n", + "epoch: 10 step: 260, loss is 0.3164\n", + "epoch: 10 step: 261, loss is 0.2560\n", + "epoch: 10 step: 262, loss is 0.1884\n", + "epoch: 10 step: 263, loss is 0.3105\n", + "epoch: 10 step: 264, loss is 0.2927\n", + "epoch: 10 step: 265, loss is 0.2530\n", + "epoch: 10 step: 266, loss is 0.3810\n", + "epoch: 10 step: 267, loss is 0.2432\n", + "epoch: 10 step: 268, loss is 0.3442\n", + "epoch: 10 step: 269, loss is 0.2244\n", + "epoch: 10 step: 270, loss is 0.3054\n", + "epoch: 10 step: 271, loss is 0.2844\n", + "epoch: 10 step: 272, loss is 0.3220\n", + "epoch: 10 step: 273, loss is 0.2778\n", + "epoch: 10 step: 274, loss is 0.2705\n", + "epoch: 10 step: 275, loss is 0.1720\n", + "epoch: 10 step: 276, loss is 0.1866\n", + "epoch: 10 step: 277, loss is 0.3264\n", + "epoch: 10 step: 278, loss is 0.3074\n", + "epoch: 10 step: 279, loss is 0.1466\n", + "epoch: 10 step: 280, loss is 0.1658\n", + "epoch: 10 step: 281, loss is 0.2875\n", + "epoch: 10 step: 282, loss is 0.2496\n", + "epoch: 10 step: 283, loss is 0.2294\n", + "epoch: 10 step: 284, loss is 0.2058\n", + "epoch: 10 step: 285, loss is 0.2605\n", + "epoch: 10 step: 286, loss is 0.3054\n", + "epoch: 10 step: 287, loss is 0.2496\n", + "epoch: 10 step: 288, loss is 0.1728\n", + "epoch: 10 step: 289, loss is 0.3792\n", + "epoch: 10 step: 290, loss is 0.1727\n", + "epoch: 10 step: 291, loss is 0.2272\n", + "epoch: 10 step: 292, loss is 0.2899\n", + "epoch: 10 step: 293, loss is 0.3781\n", + "epoch: 10 step: 294, loss is 0.2894\n", + "epoch: 10 step: 295, loss is 0.2592\n", + "epoch: 10 step: 296, loss is 0.2395\n", + "epoch: 10 step: 297, loss is 0.2941\n", + "epoch: 10 step: 298, loss is 0.2771\n", + "epoch: 10 step: 299, loss is 0.2782\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 10/ 10], step: [ 300/ 390], loss: [0.2784], avg loss: [0.2633], time: [106.0503ms]\n", - "Epoch: [ 10/ 10], step: [ 301/ 390], loss: [0.2806], avg loss: [0.2634], time: [105.9799ms]\n", - "Epoch: [ 10/ 10], step: [ 302/ 390], loss: [0.2436], avg loss: [0.2633], time: [106.9660ms]\n", - "Epoch: [ 10/ 10], step: [ 303/ 390], loss: [0.3769], avg loss: [0.2637], time: [109.2713ms]\n", - "Epoch: [ 10/ 10], step: [ 304/ 390], loss: [0.3425], avg loss: [0.2640], time: [108.8324ms]\n", - "Epoch: [ 10/ 10], step: [ 305/ 390], loss: [0.2269], avg loss: [0.2638], time: [107.3177ms]\n", - "Epoch: [ 10/ 10], step: [ 306/ 390], loss: [0.4220], avg loss: [0.2643], time: [109.8456ms]\n", - "Epoch: [ 10/ 10], step: [ 307/ 390], loss: [0.2467], avg loss: [0.2643], time: [105.4230ms]\n", - "Epoch: [ 10/ 10], step: [ 308/ 390], loss: [0.1316], avg loss: [0.2639], time: [110.8382ms]\n", - "Epoch: [ 10/ 10], step: [ 309/ 390], loss: [0.1762], avg loss: [0.2636], time: [109.5812ms]\n", - "Epoch: [ 10/ 10], step: [ 310/ 390], loss: [0.3126], avg loss: [0.2637], time: [108.0234ms]\n", - "Epoch: [ 10/ 10], step: [ 311/ 390], loss: [0.3991], avg loss: [0.2642], time: [111.2289ms]\n", - "Epoch: [ 10/ 10], step: [ 312/ 390], loss: [0.1567], avg loss: [0.2638], time: [107.8992ms]\n", - "Epoch: [ 10/ 10], step: [ 313/ 390], loss: [0.2893], avg loss: [0.2639], time: [106.9951ms]\n", - "Epoch: [ 10/ 10], step: [ 314/ 390], loss: [0.1417], avg loss: [0.2635], time: [106.4160ms]\n", - "Epoch: [ 10/ 10], step: [ 315/ 390], loss: [0.2252], avg loss: [0.2634], time: [105.4993ms]\n", - "Epoch: [ 10/ 10], step: [ 316/ 390], loss: [0.2381], avg loss: [0.2633], time: [109.7620ms]\n", - "Epoch: [ 10/ 10], step: [ 317/ 390], loss: [0.2423], avg loss: [0.2633], time: [111.3331ms]\n", - "Epoch: [ 10/ 10], step: [ 318/ 390], loss: [0.2374], avg loss: [0.2632], time: [108.0234ms]\n", - "Epoch: [ 10/ 10], step: [ 319/ 390], loss: [0.2307], avg loss: [0.2631], time: [108.1231ms]\n", - "Epoch: [ 10/ 10], step: [ 320/ 390], loss: [0.0773], avg loss: [0.2625], time: [112.3674ms]\n", - "Epoch: [ 10/ 10], step: [ 321/ 390], loss: [0.2638], avg loss: [0.2625], time: [107.9214ms]\n", - "Epoch: [ 10/ 10], step: [ 322/ 390], loss: [0.2122], avg loss: [0.2623], time: [105.8342ms]\n", - "Epoch: [ 10/ 10], step: [ 323/ 390], loss: [0.3638], avg loss: [0.2626], time: [111.7072ms]\n", - "Epoch: [ 10/ 10], step: [ 324/ 390], loss: [0.2257], avg loss: [0.2625], time: [110.1723ms]\n", - "Epoch: [ 10/ 10], step: [ 325/ 390], loss: [0.1227], avg loss: [0.2621], time: [109.4925ms]\n", - "Epoch: [ 10/ 10], step: [ 326/ 390], loss: [0.2076], avg loss: [0.2619], time: [111.5489ms]\n", - "Epoch: [ 10/ 10], step: [ 327/ 390], loss: [0.3363], avg loss: [0.2622], time: [110.1079ms]\n", - "Epoch: [ 10/ 10], step: [ 328/ 390], loss: [0.2720], avg loss: [0.2622], time: [107.8780ms]\n", - "Epoch: [ 10/ 10], step: [ 329/ 390], loss: [0.3177], avg loss: [0.2624], time: [106.6554ms]\n", - "Epoch: [ 10/ 10], step: [ 330/ 390], loss: [0.3589], avg loss: [0.2627], time: [112.8848ms]\n", - "Epoch: [ 10/ 10], step: [ 331/ 390], loss: [0.2251], avg loss: [0.2625], time: [106.4649ms]\n", - "Epoch: [ 10/ 10], step: [ 332/ 390], loss: [0.2356], avg loss: [0.2625], time: [111.0713ms]\n", - "Epoch: [ 10/ 10], step: [ 333/ 390], loss: [0.2400], avg loss: [0.2624], time: [109.3411ms]\n", - "Epoch: [ 10/ 10], step: [ 334/ 390], loss: [0.2644], avg loss: [0.2624], time: [109.3733ms]\n", - "Epoch: [ 10/ 10], step: [ 335/ 390], loss: [0.1769], avg loss: [0.2621], time: [109.7476ms]\n", - "Epoch: [ 10/ 10], step: [ 336/ 390], loss: [0.2161], avg loss: [0.2620], time: [112.4244ms]\n", - "Epoch: [ 10/ 10], step: [ 337/ 390], loss: [0.2156], avg loss: [0.2619], time: [106.9608ms]\n", - "Epoch: [ 10/ 10], step: [ 338/ 390], loss: [0.1552], avg loss: [0.2616], time: [111.5198ms]\n", - "Epoch: [ 10/ 10], step: [ 339/ 390], loss: [0.3564], avg loss: [0.2618], time: [108.8660ms]\n", - "Epoch: [ 10/ 10], step: [ 340/ 390], loss: [0.3401], avg loss: [0.2621], time: [112.4554ms]\n", - "Epoch: [ 10/ 10], step: [ 341/ 390], loss: [0.2185], avg loss: [0.2619], time: [107.0263ms]\n", - "Epoch: [ 10/ 10], step: [ 342/ 390], loss: [0.1962], avg loss: [0.2617], time: [108.1316ms]\n", - "Epoch: [ 10/ 10], step: [ 343/ 390], loss: [0.2351], avg loss: [0.2617], time: [106.5624ms]\n", - "Epoch: [ 10/ 10], step: [ 344/ 390], loss: [0.2256], avg loss: [0.2616], time: [106.1256ms]\n", - "Epoch: [ 10/ 10], step: [ 345/ 390], loss: [0.3031], avg loss: [0.2617], time: [107.1990ms]\n", - "Epoch: [ 10/ 10], step: [ 346/ 390], loss: [0.3497], avg loss: [0.2619], time: [107.4386ms]\n", - "Epoch: [ 10/ 10], step: [ 347/ 390], loss: [0.3768], avg loss: [0.2623], time: [105.7918ms]\n", - "Epoch: [ 10/ 10], step: [ 348/ 390], loss: [0.2074], avg loss: [0.2621], time: [105.9022ms]\n", - "Epoch: [ 10/ 10], step: [ 349/ 390], loss: [0.1948], avg loss: [0.2619], time: [106.8838ms]\n", - "Epoch: [ 10/ 10], step: [ 350/ 390], loss: [0.2780], avg loss: [0.2620], time: [110.0733ms]\n", - "Epoch: [ 10/ 10], step: [ 351/ 390], loss: [0.2888], avg loss: [0.2620], time: [108.5923ms]\n", - "Epoch: [ 10/ 10], step: [ 352/ 390], loss: [0.2742], avg loss: [0.2621], time: [109.2544ms]\n", - "Epoch: [ 10/ 10], step: [ 353/ 390], loss: [0.3123], avg loss: [0.2622], time: [106.8628ms]\n", - "Epoch: [ 10/ 10], step: [ 354/ 390], loss: [0.3578], avg loss: [0.2625], time: [109.7829ms]\n", - "Epoch: [ 10/ 10], step: [ 355/ 390], loss: [0.1633], avg loss: [0.2622], time: [110.7647ms]\n", - "Epoch: [ 10/ 10], step: [ 356/ 390], loss: [0.2015], avg loss: [0.2620], time: [107.0683ms]\n", - "Epoch: [ 10/ 10], step: [ 357/ 390], loss: [0.2081], avg loss: [0.2619], time: [109.8542ms]\n", - "Epoch: [ 10/ 10], step: [ 358/ 390], loss: [0.2807], avg loss: [0.2619], time: [110.7974ms]\n", - "Epoch: [ 10/ 10], step: [ 359/ 390], loss: [0.2153], avg loss: [0.2618], time: [108.9861ms]\n", - "Epoch: [ 10/ 10], step: [ 360/ 390], loss: [0.3053], avg loss: [0.2619], time: [105.6628ms]\n", - "Epoch: [ 10/ 10], step: [ 361/ 390], loss: [0.3514], avg loss: [0.2622], time: [108.7244ms]\n", - "Epoch: [ 10/ 10], step: [ 362/ 390], loss: [0.2499], avg loss: [0.2621], time: [105.8607ms]\n", - "Epoch: [ 10/ 10], step: [ 363/ 390], loss: [0.2624], avg loss: [0.2621], time: [110.3516ms]\n", - "Epoch: [ 10/ 10], step: [ 364/ 390], loss: [0.2889], avg loss: [0.2622], time: [108.8624ms]\n", - "Epoch: [ 10/ 10], step: [ 365/ 390], loss: [0.2481], avg loss: [0.2622], time: [107.4762ms]\n", - "Epoch: [ 10/ 10], step: [ 366/ 390], loss: [0.2942], avg loss: [0.2623], time: [111.0370ms]\n", - "Epoch: [ 10/ 10], step: [ 367/ 390], loss: [0.3332], avg loss: [0.2625], time: [107.2378ms]\n", - "Epoch: [ 10/ 10], step: [ 368/ 390], loss: [0.3419], avg loss: [0.2627], time: [109.6377ms]\n", - "Epoch: [ 10/ 10], step: [ 369/ 390], loss: [0.1517], avg loss: [0.2624], time: [105.7010ms]\n", - "Epoch: [ 10/ 10], step: [ 370/ 390], loss: [0.2912], avg loss: [0.2625], time: [106.8776ms]\n", - "Epoch: [ 10/ 10], step: [ 371/ 390], loss: [0.2824], avg loss: [0.2625], time: [109.8986ms]\n", - "Epoch: [ 10/ 10], step: [ 372/ 390], loss: [0.2197], avg loss: [0.2624], time: [112.1969ms]\n", - "Epoch: [ 10/ 10], step: [ 373/ 390], loss: [0.4275], avg loss: [0.2628], time: [110.7540ms]\n", - "Epoch: [ 10/ 10], step: [ 374/ 390], loss: [0.3104], avg loss: [0.2630], time: [110.7924ms]\n", - "Epoch: [ 10/ 10], step: [ 375/ 390], loss: [0.1147], avg loss: [0.2626], time: [107.7850ms]\n", - "Epoch: [ 10/ 10], step: [ 376/ 390], loss: [0.2216], avg loss: [0.2625], time: [106.1397ms]\n", - "Epoch: [ 10/ 10], step: [ 377/ 390], loss: [0.2799], avg loss: [0.2625], time: [105.4490ms]\n", - "Epoch: [ 10/ 10], step: [ 378/ 390], loss: [0.2447], avg loss: [0.2625], time: [110.6384ms]\n", - "Epoch: [ 10/ 10], step: [ 379/ 390], loss: [0.2776], avg loss: [0.2625], time: [107.8119ms]\n", - "Epoch: [ 10/ 10], step: [ 380/ 390], loss: [0.3090], avg loss: [0.2626], time: [109.9536ms]\n", - "Epoch: [ 10/ 10], step: [ 381/ 390], loss: [0.2692], avg loss: [0.2626], time: [105.9964ms]\n", - "Epoch: [ 10/ 10], step: [ 382/ 390], loss: [0.3088], avg loss: [0.2628], time: [109.5507ms]\n", - "Epoch: [ 10/ 10], step: [ 383/ 390], loss: [0.2008], avg loss: [0.2626], time: [105.6182ms]\n", - "Epoch: [ 10/ 10], step: [ 384/ 390], loss: [0.1450], avg loss: [0.2623], time: [107.9791ms]\n", - "Epoch: [ 10/ 10], step: [ 385/ 390], loss: [0.2522], avg loss: [0.2623], time: [108.9914ms]\n", - "Epoch: [ 10/ 10], step: [ 386/ 390], loss: [0.2532], avg loss: [0.2622], time: [106.7832ms]\n", - "Epoch: [ 10/ 10], step: [ 387/ 390], loss: [0.3558], avg loss: [0.2625], time: [111.0179ms]\n", - "Epoch: [ 10/ 10], step: [ 388/ 390], loss: [0.2641], avg loss: [0.2625], time: [107.2817ms]\n" + "epoch: 10 step: 300, loss is 0.2784\n", + "epoch: 10 step: 301, loss is 0.2806\n", + "epoch: 10 step: 302, loss is 0.2436\n", + "epoch: 10 step: 303, loss is 0.3769\n", + "epoch: 10 step: 304, loss is 0.3425\n", + "epoch: 10 step: 305, loss is 0.2269\n", + "epoch: 10 step: 306, loss is 0.4220\n", + "epoch: 10 step: 307, loss is 0.2467\n", + "epoch: 10 step: 308, loss is 0.1316\n", + "epoch: 10 step: 309, loss is 0.1762\n", + "epoch: 10 step: 310, loss is 0.3126\n", + "epoch: 10 step: 311, loss is 0.3991\n", + "epoch: 10 step: 312, loss is 0.1567\n", + "epoch: 10 step: 313, loss is 0.2893\n", + "epoch: 10 step: 314, loss is 0.1417\n", + "epoch: 10 step: 315, loss is 0.2252\n", + "epoch: 10 step: 316, loss is 0.2381\n", + "epoch: 10 step: 317, loss is 0.2423\n", + "epoch: 10 step: 318, loss is 0.2374\n", + "epoch: 10 step: 319, loss is 0.2307\n", + "epoch: 10 step: 320, loss is 0.0773\n", + "epoch: 10 step: 321, loss is 0.2638\n", + "epoch: 10 step: 322, loss is 0.2122\n", + "epoch: 10 step: 323, loss is 0.3638\n", + "epoch: 10 step: 324, loss is 0.2257\n", + "epoch: 10 step: 325, loss is 0.1227\n", + "epoch: 10 step: 326, loss is 0.2076\n", + "epoch: 10 step: 327, loss is 0.3363\n", + "epoch: 10 step: 328, loss is 0.2720\n", + "epoch: 10 step: 329, loss is 0.3177\n", + "epoch: 10 step: 330, loss is 0.3589\n", + "epoch: 10 step: 331, loss is 0.2251\n", + "epoch: 10 step: 332, loss is 0.2356\n", + "epoch: 10 step: 333, loss is 0.2400\n", + "epoch: 10 step: 334, loss is 0.2644\n", + "epoch: 10 step: 335, loss is 0.1769\n", + "epoch: 10 step: 336, loss is 0.2161\n", + "epoch: 10 step: 337, loss is 0.2156\n", + "epoch: 10 step: 338, loss is 0.1552\n", + "epoch: 10 step: 339, loss is 0.3564\n", + "epoch: 10 step: 340, loss is 0.3401\n", + "epoch: 10 step: 341, loss is 0.2185\n", + "epoch: 10 step: 342, loss is 0.1962\n", + "epoch: 10 step: 343, loss is 0.2351\n", + "epoch: 10 step: 344, loss is 0.2256\n", + "epoch: 10 step: 345, loss is 0.3031\n", + "epoch: 10 step: 346, loss is 0.3497\n", + "epoch: 10 step: 347, loss is 0.3768\n", + "epoch: 10 step: 348, loss is 0.2074\n", + "epoch: 10 step: 349, loss is 0.1948\n", + "epoch: 10 step: 350, loss is 0.2780\n", + "epoch: 10 step: 351, loss is 0.2888\n", + "epoch: 10 step: 352, loss is 0.2742\n", + "epoch: 10 step: 353, loss is 0.3123\n", + "epoch: 10 step: 354, loss is 0.3578\n", + "epoch: 10 step: 355, loss is 0.1633\n", + "epoch: 10 step: 356, loss is 0.2015\n", + "epoch: 10 step: 357, loss is 0.2081\n", + "epoch: 10 step: 358, loss is 0.2807\n", + "epoch: 10 step: 359, loss is 0.2153\n", + "epoch: 10 step: 360, loss is 0.3053\n", + "epoch: 10 step: 361, loss is 0.3514\n", + "epoch: 10 step: 362, loss is 0.2499\n", + "epoch: 10 step: 363, loss is 0.2624\n", + "epoch: 10 step: 364, loss is 0.2889\n", + "epoch: 10 step: 365, loss is 0.2481\n", + "epoch: 10 step: 366, loss is 0.2942\n", + "epoch: 10 step: 367, loss is 0.3332\n", + "epoch: 10 step: 368, loss is 0.3419\n", + "epoch: 10 step: 369, loss is 0.1517\n", + "epoch: 10 step: 370, loss is 0.2912\n", + "epoch: 10 step: 371, loss is 0.2824\n", + "epoch: 10 step: 372, loss is 0.2197\n", + "epoch: 10 step: 373, loss is 0.4275\n", + "epoch: 10 step: 374, loss is 0.3104\n", + "epoch: 10 step: 375, loss is 0.1147\n", + "epoch: 10 step: 376, loss is 0.2216\n", + "epoch: 10 step: 377, loss is 0.2799\n", + "epoch: 10 step: 378, loss is 0.2447\n", + "epoch: 10 step: 379, loss is 0.2776\n", + "epoch: 10 step: 380, loss is 0.3090\n", + "epoch: 10 step: 381, loss is 0.2692\n", + "epoch: 10 step: 382, loss is 0.3088\n", + "epoch: 10 step: 383, loss is 0.2008\n", + "epoch: 10 step: 384, loss is 0.1450\n", + "epoch: 10 step: 385, loss is 0.2522\n", + "epoch: 10 step: 386, loss is 0.2532\n", + "epoch: 10 step: 387, loss is 0.3558\n", + "epoch: 10 step: 388, loss is 0.2641\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch: [ 10/ 10], step: [ 389/ 390], loss: [0.2334], avg loss: [0.2624], time: [110.2269ms]\n", - "Epoch: [ 10/ 10], step: [ 390/ 390], loss: [0.1966], avg loss: [0.2622], time: [829.2229ms]\n", - "Epoch time: 43320.503, per step time: 111.078\n", + "epoch: 10 step: 389, loss is 0.2334\n", + "epoch: 10 step: 390, loss is 0.1966\n", "Epoch time: 43320.815, per step time: 111.079, avg loss: 0.262\n", "************************************************************\n", "============== Training Success ==============\n" -- GitLab