未验证 提交 928d178c 编写于 作者: D daminglu 提交者: GitHub

Chap 4 word2vec: polish chapter content (#541)

上级 1b3bb17b
......@@ -329,7 +329,9 @@ def train(use_cuda, train_program, params_dirname):
if event.step % 10 == 0:
print "Step %d: Average Cost %f" % (event.step, avg_cost)
# If average cost is lower than 5.0, we consider the model good enough to stop.
# If average cost is lower than 5.8, we consider the model good enough to stop.
# Note 5.8 is a relatively high value. In order to get a better model, one should
# aim for avg_cost lower than 3.5. But the training could take longer time.
if avg_cost < 5.8:
trainer.save_params(params_dirname)
trainer.stop()
......@@ -383,16 +385,17 @@ def infer(use_cuda, inference_program, params_dirname=None):
# detail (lod) info of each LoDtensor should be [[1]] meaning there is only
# one lod_level and there is only one sequence of one word on this level.
# Note that lod info should be a list of lists.
lod1 = [[211]] # 'among'
lod2 = [[6]] # 'a'
lod3 = [[96]] # 'group'
lod4 = [[4]] # 'of'
base_shape = [1]
first_word = fluid.create_lod_tensor(lod1, base_shape, place)
second_word = fluid.create_lod_tensor(lod2, base_shape, place)
third_word = fluid.create_lod_tensor(lod3, base_shape, place)
fourth_word = fluid.create_lod_tensor(lod4, base_shape, place)
data1 = [[211]] # 'among'
data2 = [[6]] # 'a'
data3 = [[96]] # 'group'
data4 = [[4]] # 'of'
lod = [[1]]
first_word = fluid.create_lod_tensor(data1, lod, place)
second_word = fluid.create_lod_tensor(data2, lod, place)
third_word = fluid.create_lod_tensor(data3, lod, place)
fourth_word = fluid.create_lod_tensor(data4, lod, place)
result = inferencer.infer(
{
......@@ -406,16 +409,18 @@ def infer(use_cuda, inference_program, params_dirname=None):
print(numpy.array(result[0]))
most_possible_word_index = numpy.argmax(result[0])
print(most_possible_word_index)
print([key for key, value in word_dict.iteritems() if value == most_possible_word_index][0])
print([
key for key, value in word_dict.iteritems()
if value == most_possible_word_index
][0])
```
When we spent 30 mins in training, the output is like below, which means the next word for `among a group of` is `unknown`. After several hours training, it gives a meaningful prediction as `workers`.
When we spent 3 mins in training, the output is like below, which means the next word for `among a group of` is `a`. If we train the model with a longer time, it will give a meaningful prediction as `workers`.
```text
[[4.0056456e-02 5.4810006e-02 5.3107393e-05 ... 1.0061498e-04
8.9233123e-05 1.5757295e-01]]
2072
<unk>
[[0.00106646 0.0007907 0.00072041 ... 0.00049024 0.00041355 0.00084464]]
6
a
```
The main entrance of the program is fairly simple:
......
......@@ -371,7 +371,9 @@ def train(use_cuda, train_program, params_dirname):
if event.step % 10 == 0:
print "Step %d: Average Cost %f" % (event.step, avg_cost)
# If average cost is lower than 5.0, we consider the model good enough to stop.
# If average cost is lower than 5.8, we consider the model good enough to stop.
# Note 5.8 is a relatively high value. In order to get a better model, one should
# aim for avg_cost lower than 3.5. But the training could take longer time.
if avg_cost < 5.8:
trainer.save_params(params_dirname)
trainer.stop()
......@@ -425,16 +427,17 @@ def infer(use_cuda, inference_program, params_dirname=None):
# detail (lod) info of each LoDtensor should be [[1]] meaning there is only
# one lod_level and there is only one sequence of one word on this level.
# Note that lod info should be a list of lists.
lod1 = [[211]] # 'among'
lod2 = [[6]] # 'a'
lod3 = [[96]] # 'group'
lod4 = [[4]] # 'of'
base_shape = [1]
first_word = fluid.create_lod_tensor(lod1, base_shape, place)
second_word = fluid.create_lod_tensor(lod2, base_shape, place)
third_word = fluid.create_lod_tensor(lod3, base_shape, place)
fourth_word = fluid.create_lod_tensor(lod4, base_shape, place)
data1 = [[211]] # 'among'
data2 = [[6]] # 'a'
data3 = [[96]] # 'group'
data4 = [[4]] # 'of'
lod = [[1]]
first_word = fluid.create_lod_tensor(data1, lod, place)
second_word = fluid.create_lod_tensor(data2, lod, place)
third_word = fluid.create_lod_tensor(data3, lod, place)
fourth_word = fluid.create_lod_tensor(data4, lod, place)
result = inferencer.infer(
{
......@@ -448,16 +451,18 @@ def infer(use_cuda, inference_program, params_dirname=None):
print(numpy.array(result[0]))
most_possible_word_index = numpy.argmax(result[0])
print(most_possible_word_index)
print([key for key, value in word_dict.iteritems() if value == most_possible_word_index][0])
print([
key for key, value in word_dict.iteritems()
if value == most_possible_word_index
][0])
```
When we spent 30 mins in training, the output is like below, which means the next word for `among a group of` is `unknown`. After several hours training, it gives a meaningful prediction as `workers`.
When we spent 3 mins in training, the output is like below, which means the next word for `among a group of` is `board`. If we train the model with a longer time, it will give a meaningful prediction as `workers`.
```text
[[4.0056456e-02 5.4810006e-02 5.3107393e-05 ... 1.0061498e-04
8.9233123e-05 1.5757295e-01]]
2072
<unk>
[[0.00144043 0.00073983 0.00042264 ... 0.00061815 0.00038701 0.00099838]]
142
board
```
The main entrance of the program is fairly simple:
......
......@@ -107,6 +107,9 @@ def train(use_cuda, train_program, params_dirname):
if event.step % 10 == 0:
print "Step %d: Average Cost %f" % (event.step, avg_cost)
# If average cost is lower than 5.8, we consider the model good enough to stop.
# Note 5.8 is a relatively high value. In order to get a better model, one should
# aim for avg_cost lower than 3.5. But the training could take longer time.
if avg_cost < 5.8:
trainer.save_params(params_dirname)
trainer.stop()
......@@ -138,17 +141,17 @@ def infer(use_cuda, inference_program, params_dirname=None):
# detail (lod) info of each LoDtensor should be [[1]] meaning there is only
# one lod_level and there is only one sequence of one word on this level.
# Note that lod info should be a list of lists.
data1 = [[211]] # 'among'
data2 = [[6]] # 'a'
data3 = [[96]] # 'group'
data4 = [[4]] # 'of'
lod = [[1]]
base_shape = [1]
# The range of random integers is [low, high]
first_word = fluid.create_random_int_lodtensor(
lod, base_shape, place, low=0, high=dict_size - 1)
second_word = fluid.create_random_int_lodtensor(
lod, base_shape, place, low=0, high=dict_size - 1)
third_word = fluid.create_random_int_lodtensor(
lod, base_shape, place, low=0, high=dict_size - 1)
fourth_word = fluid.create_random_int_lodtensor(
lod, base_shape, place, low=0, high=dict_size - 1)
first_word = fluid.create_lod_tensor(data1, lod, place)
second_word = fluid.create_lod_tensor(data2, lod, place)
third_word = fluid.create_lod_tensor(data3, lod, place)
fourth_word = fluid.create_lod_tensor(data4, lod, place)
result = inferencer.infer(
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册