Chap 4 word2vec: polish chapter content (#541)

928d178c · daminglu · GitHub · 1b3bb17b · 928d178c · 928d178c
隐藏空白更改
内联并排

Showing with 55 addition and 42 deletion

04.word2vec/README.md 04.word2vec/README.md +21 -16

04.word2vec/index.html 04.word2vec/index.html +21 -16

04.word2vec/train.py 04.word2vec/train.py +13 -10

未找到文件。
--- a/04.word2vec/README.md
+++ b/04.word2vec/README.md
@@ -329,7 +329,9 @@ def train(use_cuda, train_program, params_dirname):
            if event.step % 10 == 0:
                print "Step %d: Average Cost %f" % (event.step, avg_cost)
-            # If average cost is lower than 5.0, we consider the model good enough to stop.
+            # If average cost is lower than 5.8, we consider the model good enough to stop.
+            # Note 5.8 is a relatively high value. In order to get a better model, one should
+            # aim for avg_cost lower than 3.5. But the training could take longer time.
            if avg_cost < 5.8:
                trainer.save_params(params_dirname)
                trainer.stop()
@@ -383,16 +385,17 @@ def infer(use_cuda, inference_program, params_dirname=None):
    # detail (lod) info of each LoDtensor should be [[1]] meaning there is only
    # one lod_level and there is only one sequence of one word on this level.
    # Note that lod info should be a list of lists.
-    lod1 = [[211]]  # 'among'
-    lod2 = [[6]]    # 'a'
-    lod3 = [[96]]   # 'group'
-    lod4 = [[4]]    # 'of'
-    base_shape = [1]
-    first_word  = fluid.create_lod_tensor(lod1, base_shape, place)
+    data1 = [[211]]  # 'among'
-    second_word = fluid.create_lod_tensor(lod2, base_shape, place)
+    data2 = [[6]]    # 'a'
-    third_word  = fluid.create_lod_tensor(lod3, base_shape, place)
+    data3 = [[96]]   # 'group'
-    fourth_word = fluid.create_lod_tensor(lod4, base_shape, place)
+    data4 = [[4]]    # 'of'
+    lod = [[1]]
+    first_word  = fluid.create_lod_tensor(data1, lod, place)
+    second_word = fluid.create_lod_tensor(data2, lod, place)
+    third_word  = fluid.create_lod_tensor(data3, lod, place)
+    fourth_word = fluid.create_lod_tensor(data4, lod, place)
    result = inferencer.infer(
        {
@@ -406,16 +409,18 @@ def infer(use_cuda, inference_program, params_dirname=None):
    print(numpy.array(result[0]))
    most_possible_word_index = numpy.argmax(result[0])
    print(most_possible_word_index)
-    print([key for key, value in word_dict.iteritems() if value == most_possible_word_index][0])
+    print([
+        key for key, value in word_dict.iteritems()
+        if value == most_possible_word_index
+    ][0])
 ```
-When we spent 30 mins in training, the output is like below, which means the next word for `among a group of` is `unknown`. After several hours training, it gives a meaningful prediction as `workers`.
+When we spent 3 mins in training, the output is like below, which means the next word for `among a group of` is `a`. If we train the model with a longer time, it will give a meaningful prediction as `workers`.
 ```text
-[[4.0056456e-02 5.4810006e-02 5.3107393e-05 ... 1.0061498e-04
+[[0.00106646 0.0007907  0.00072041 ... 0.00049024 0.00041355 0.00084464]]
-  8.9233123e-05 1.5757295e-01]]
+6
-2072
+a
-<unk>
 ```
 The main entrance of the program is fairly simple:

--- a/04.word2vec/index.html
+++ b/04.word2vec/index.html
@@ -371,7 +371,9 @@ def train(use_cuda, train_program, params_dirname):
            if event.step % 10 == 0:
                print "Step %d: Average Cost %f" % (event.step, avg_cost)
-            # If average cost is lower than 5.0, we consider the model good enough to stop.
+            # If average cost is lower than 5.8, we consider the model good enough to stop.
+            # Note 5.8 is a relatively high value. In order to get a better model, one should
+            # aim for avg_cost lower than 3.5. But the training could take longer time.
            if avg_cost < 5.8:
                trainer.save_params(params_dirname)
                trainer.stop()
@@ -425,16 +427,17 @@ def infer(use_cuda, inference_program, params_dirname=None):
    # detail (lod) info of each LoDtensor should be [[1]] meaning there is only
    # one lod_level and there is only one sequence of one word on this level.
    # Note that lod info should be a list of lists.
-    lod1 = [[211]]  # 'among'
-    lod2 = [[6]]    # 'a'
-    lod3 = [[96]]   # 'group'
-    lod4 = [[4]]    # 'of'
-    base_shape = [1]
-    first_word  = fluid.create_lod_tensor(lod1, base_shape, place)
+    data1 = [[211]]  # 'among'
-    second_word = fluid.create_lod_tensor(lod2, base_shape, place)
+    data2 = [[6]]    # 'a'
-    third_word  = fluid.create_lod_tensor(lod3, base_shape, place)
+    data3 = [[96]]   # 'group'
-    fourth_word = fluid.create_lod_tensor(lod4, base_shape, place)
+    data4 = [[4]]    # 'of'
+    lod = [[1]]
+    first_word  = fluid.create_lod_tensor(data1, lod, place)
+    second_word = fluid.create_lod_tensor(data2, lod, place)
+    third_word  = fluid.create_lod_tensor(data3, lod, place)
+    fourth_word = fluid.create_lod_tensor(data4, lod, place)
    result = inferencer.infer(
        {
@@ -448,16 +451,18 @@ def infer(use_cuda, inference_program, params_dirname=None):
    print(numpy.array(result[0]))
    most_possible_word_index = numpy.argmax(result[0])
    print(most_possible_word_index)
-    print([key for key, value in word_dict.iteritems() if value == most_possible_word_index][0])
+    print([
+        key for key, value in word_dict.iteritems()
+        if value == most_possible_word_index
+    ][0])
 ```
-When we spent 30 mins in training, the output is like below, which means the next word for `among a group of` is `unknown`. After several hours training, it gives a meaningful prediction as `workers`.
+When we spent 3 mins in training, the output is like below, which means the next word for `among a group of` is `board`. If we train the model with a longer time, it will give a meaningful prediction as `workers`.
 ```text
-[[4.0056456e-02 5.4810006e-02 5.3107393e-05 ... 1.0061498e-04
+[[0.00144043 0.00073983 0.00042264 ... 0.00061815 0.00038701 0.00099838]]
-  8.9233123e-05 1.5757295e-01]]
+142
-2072
+board
-<unk>
 ```
 The main entrance of the program is fairly simple:

--- a/04.word2vec/train.py
+++ b/04.word2vec/train.py
@@ -107,6 +107,9 @@ def train(use_cuda, train_program, params_dirname):
            if event.step % 10 == 0:
                print "Step %d: Average Cost %f" % (event.step, avg_cost)
+            # If average cost is lower than 5.8, we consider the model good enough to stop.
+            # Note 5.8 is a relatively high value. In order to get a better model, one should
+            # aim for avg_cost lower than 3.5. But the training could take longer time.
            if avg_cost < 5.8:
                trainer.save_params(params_dirname)
                trainer.stop()
@@ -138,17 +141,17 @@ def infer(use_cuda, inference_program, params_dirname=None):
    # detail (lod) info of each LoDtensor should be [[1]] meaning there is only
    # one lod_level and there is only one sequence of one word on this level.
    # Note that lod info should be a list of lists.
+    data1 = [[211]]  # 'among'
+    data2 = [[6]]  # 'a'
+    data3 = [[96]]  # 'group'
+    data4 = [[4]]  # 'of'
    lod = [[1]]
-    base_shape = [1]
-    # The range of random integers is [low, high]
+    first_word = fluid.create_lod_tensor(data1, lod, place)
-    first_word = fluid.create_random_int_lodtensor(
+    second_word = fluid.create_lod_tensor(data2, lod, place)
-        lod, base_shape, place, low=0, high=dict_size - 1)
+    third_word = fluid.create_lod_tensor(data3, lod, place)
-    second_word = fluid.create_random_int_lodtensor(
+    fourth_word = fluid.create_lod_tensor(data4, lod, place)
-        lod, base_shape, place, low=0, high=dict_size - 1)
-    third_word = fluid.create_random_int_lodtensor(
-        lod, base_shape, place, low=0, high=dict_size - 1)
-    fourth_word = fluid.create_random_int_lodtensor(
-        lod, base_shape, place, low=0, high=dict_size - 1)
    result = inferencer.infer(
        {