未验证 提交 928d178c 编写于 作者: D daminglu 提交者: GitHub

Chap 4 word2vec: polish chapter content (#541)

上级 1b3bb17b
...@@ -329,7 +329,9 @@ def train(use_cuda, train_program, params_dirname): ...@@ -329,7 +329,9 @@ def train(use_cuda, train_program, params_dirname):
if event.step % 10 == 0: if event.step % 10 == 0:
print "Step %d: Average Cost %f" % (event.step, avg_cost) print "Step %d: Average Cost %f" % (event.step, avg_cost)
# If average cost is lower than 5.0, we consider the model good enough to stop. # If average cost is lower than 5.8, we consider the model good enough to stop.
# Note 5.8 is a relatively high value. In order to get a better model, one should
# aim for avg_cost lower than 3.5. But the training could take longer time.
if avg_cost < 5.8: if avg_cost < 5.8:
trainer.save_params(params_dirname) trainer.save_params(params_dirname)
trainer.stop() trainer.stop()
...@@ -383,16 +385,17 @@ def infer(use_cuda, inference_program, params_dirname=None): ...@@ -383,16 +385,17 @@ def infer(use_cuda, inference_program, params_dirname=None):
# detail (lod) info of each LoDtensor should be [[1]] meaning there is only # detail (lod) info of each LoDtensor should be [[1]] meaning there is only
# one lod_level and there is only one sequence of one word on this level. # one lod_level and there is only one sequence of one word on this level.
# Note that lod info should be a list of lists. # Note that lod info should be a list of lists.
lod1 = [[211]] # 'among'
lod2 = [[6]] # 'a'
lod3 = [[96]] # 'group'
lod4 = [[4]] # 'of'
base_shape = [1]
first_word = fluid.create_lod_tensor(lod1, base_shape, place) data1 = [[211]] # 'among'
second_word = fluid.create_lod_tensor(lod2, base_shape, place) data2 = [[6]] # 'a'
third_word = fluid.create_lod_tensor(lod3, base_shape, place) data3 = [[96]] # 'group'
fourth_word = fluid.create_lod_tensor(lod4, base_shape, place) data4 = [[4]] # 'of'
lod = [[1]]
first_word = fluid.create_lod_tensor(data1, lod, place)
second_word = fluid.create_lod_tensor(data2, lod, place)
third_word = fluid.create_lod_tensor(data3, lod, place)
fourth_word = fluid.create_lod_tensor(data4, lod, place)
result = inferencer.infer( result = inferencer.infer(
{ {
...@@ -406,16 +409,18 @@ def infer(use_cuda, inference_program, params_dirname=None): ...@@ -406,16 +409,18 @@ def infer(use_cuda, inference_program, params_dirname=None):
print(numpy.array(result[0])) print(numpy.array(result[0]))
most_possible_word_index = numpy.argmax(result[0]) most_possible_word_index = numpy.argmax(result[0])
print(most_possible_word_index) print(most_possible_word_index)
print([key for key, value in word_dict.iteritems() if value == most_possible_word_index][0]) print([
key for key, value in word_dict.iteritems()
if value == most_possible_word_index
][0])
``` ```
When we spent 30 mins in training, the output is like below, which means the next word for `among a group of` is `unknown`. After several hours training, it gives a meaningful prediction as `workers`. When we spent 3 mins in training, the output is like below, which means the next word for `among a group of` is `a`. If we train the model with a longer time, it will give a meaningful prediction as `workers`.
```text ```text
[[4.0056456e-02 5.4810006e-02 5.3107393e-05 ... 1.0061498e-04 [[0.00106646 0.0007907 0.00072041 ... 0.00049024 0.00041355 0.00084464]]
8.9233123e-05 1.5757295e-01]] 6
2072 a
<unk>
``` ```
The main entrance of the program is fairly simple: The main entrance of the program is fairly simple:
......
...@@ -371,7 +371,9 @@ def train(use_cuda, train_program, params_dirname): ...@@ -371,7 +371,9 @@ def train(use_cuda, train_program, params_dirname):
if event.step % 10 == 0: if event.step % 10 == 0:
print "Step %d: Average Cost %f" % (event.step, avg_cost) print "Step %d: Average Cost %f" % (event.step, avg_cost)
# If average cost is lower than 5.0, we consider the model good enough to stop. # If average cost is lower than 5.8, we consider the model good enough to stop.
# Note 5.8 is a relatively high value. In order to get a better model, one should
# aim for avg_cost lower than 3.5. But the training could take longer time.
if avg_cost < 5.8: if avg_cost < 5.8:
trainer.save_params(params_dirname) trainer.save_params(params_dirname)
trainer.stop() trainer.stop()
...@@ -425,16 +427,17 @@ def infer(use_cuda, inference_program, params_dirname=None): ...@@ -425,16 +427,17 @@ def infer(use_cuda, inference_program, params_dirname=None):
# detail (lod) info of each LoDtensor should be [[1]] meaning there is only # detail (lod) info of each LoDtensor should be [[1]] meaning there is only
# one lod_level and there is only one sequence of one word on this level. # one lod_level and there is only one sequence of one word on this level.
# Note that lod info should be a list of lists. # Note that lod info should be a list of lists.
lod1 = [[211]] # 'among'
lod2 = [[6]] # 'a'
lod3 = [[96]] # 'group'
lod4 = [[4]] # 'of'
base_shape = [1]
first_word = fluid.create_lod_tensor(lod1, base_shape, place) data1 = [[211]] # 'among'
second_word = fluid.create_lod_tensor(lod2, base_shape, place) data2 = [[6]] # 'a'
third_word = fluid.create_lod_tensor(lod3, base_shape, place) data3 = [[96]] # 'group'
fourth_word = fluid.create_lod_tensor(lod4, base_shape, place) data4 = [[4]] # 'of'
lod = [[1]]
first_word = fluid.create_lod_tensor(data1, lod, place)
second_word = fluid.create_lod_tensor(data2, lod, place)
third_word = fluid.create_lod_tensor(data3, lod, place)
fourth_word = fluid.create_lod_tensor(data4, lod, place)
result = inferencer.infer( result = inferencer.infer(
{ {
...@@ -448,16 +451,18 @@ def infer(use_cuda, inference_program, params_dirname=None): ...@@ -448,16 +451,18 @@ def infer(use_cuda, inference_program, params_dirname=None):
print(numpy.array(result[0])) print(numpy.array(result[0]))
most_possible_word_index = numpy.argmax(result[0]) most_possible_word_index = numpy.argmax(result[0])
print(most_possible_word_index) print(most_possible_word_index)
print([key for key, value in word_dict.iteritems() if value == most_possible_word_index][0]) print([
key for key, value in word_dict.iteritems()
if value == most_possible_word_index
][0])
``` ```
When we spent 30 mins in training, the output is like below, which means the next word for `among a group of` is `unknown`. After several hours training, it gives a meaningful prediction as `workers`. When we spent 3 mins in training, the output is like below, which means the next word for `among a group of` is `board`. If we train the model with a longer time, it will give a meaningful prediction as `workers`.
```text ```text
[[4.0056456e-02 5.4810006e-02 5.3107393e-05 ... 1.0061498e-04 [[0.00144043 0.00073983 0.00042264 ... 0.00061815 0.00038701 0.00099838]]
8.9233123e-05 1.5757295e-01]] 142
2072 board
<unk>
``` ```
The main entrance of the program is fairly simple: The main entrance of the program is fairly simple:
......
...@@ -107,6 +107,9 @@ def train(use_cuda, train_program, params_dirname): ...@@ -107,6 +107,9 @@ def train(use_cuda, train_program, params_dirname):
if event.step % 10 == 0: if event.step % 10 == 0:
print "Step %d: Average Cost %f" % (event.step, avg_cost) print "Step %d: Average Cost %f" % (event.step, avg_cost)
# If average cost is lower than 5.8, we consider the model good enough to stop.
# Note 5.8 is a relatively high value. In order to get a better model, one should
# aim for avg_cost lower than 3.5. But the training could take longer time.
if avg_cost < 5.8: if avg_cost < 5.8:
trainer.save_params(params_dirname) trainer.save_params(params_dirname)
trainer.stop() trainer.stop()
...@@ -138,17 +141,17 @@ def infer(use_cuda, inference_program, params_dirname=None): ...@@ -138,17 +141,17 @@ def infer(use_cuda, inference_program, params_dirname=None):
# detail (lod) info of each LoDtensor should be [[1]] meaning there is only # detail (lod) info of each LoDtensor should be [[1]] meaning there is only
# one lod_level and there is only one sequence of one word on this level. # one lod_level and there is only one sequence of one word on this level.
# Note that lod info should be a list of lists. # Note that lod info should be a list of lists.
data1 = [[211]] # 'among'
data2 = [[6]] # 'a'
data3 = [[96]] # 'group'
data4 = [[4]] # 'of'
lod = [[1]] lod = [[1]]
base_shape = [1]
# The range of random integers is [low, high] first_word = fluid.create_lod_tensor(data1, lod, place)
first_word = fluid.create_random_int_lodtensor( second_word = fluid.create_lod_tensor(data2, lod, place)
lod, base_shape, place, low=0, high=dict_size - 1) third_word = fluid.create_lod_tensor(data3, lod, place)
second_word = fluid.create_random_int_lodtensor( fourth_word = fluid.create_lod_tensor(data4, lod, place)
lod, base_shape, place, low=0, high=dict_size - 1)
third_word = fluid.create_random_int_lodtensor(
lod, base_shape, place, low=0, high=dict_size - 1)
fourth_word = fluid.create_random_int_lodtensor(
lod, base_shape, place, low=0, high=dict_size - 1)
result = inferencer.infer( result = inferencer.infer(
{ {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册