From 0ce38b77f2312390a61c61fbd05ec4b72347fea6 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Thu, 16 Nov 2017 22:16:22 -0600 Subject: [PATCH] correct optimizer import (#5699) --- .../v2/fluid/tests/book/test_fit_a_line.py | 29 ++--- .../book/test_image_classification_train.py | 100 ++++-------------- .../tests/book/test_recognize_digits_conv.py | 29 ++--- .../tests/book/test_recognize_digits_mlp.py | 35 ++---- .../tests/book/test_recommender_system.py | 99 +++++------------ .../book/test_understand_sentiment_conv.py | 11 +- .../test_understand_sentiment_dynamic_lstm.py | 10 +- .../book/test_understand_sentiment_lstm.py | 9 +- .../v2/fluid/tests/book/test_word2vec.py | 49 +++------ 9 files changed, 101 insertions(+), 270 deletions(-) diff --git a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py index ee677a2c56..a7f3bfc0ca 100644 --- a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py @@ -1,33 +1,22 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.framework as framework -from paddle.v2.fluid.io import save_persistables, load_persistables +import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.io import save_persistables, load_persistables +from paddle.v2.fluid.optimizer import SGDOptimizer -import numpy as np - -x = layers.data( - name='x', - shape=[13], - data_type='float32') +x = layers.data(name='x', shape=[13], data_type='float32') -y_predict = layers.fc(input=x, - size=1, - act=None) +y_predict = layers.fc(input=x, size=1, act=None) -y = layers.data( - name='y', - shape=[1], - data_type='float32') +y = layers.data(name='y', shape=[1], data_type='float32') -cost = layers.square_error_cost( - input=y_predict, - label=y) +cost = layers.square_error_cost(input=y_predict, label=y) avg_cost = layers.mean(x=cost) -sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) +sgd_optimizer = SGDOptimizer(learning_rate=0.001) opts = sgd_optimizer.minimize(avg_cost) BATCH_SIZE = 20 diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py index f4be835b3a..b850612550 100644 --- a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py +++ b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py @@ -1,21 +1,16 @@ import numpy as np import paddle.v2 as paddle import paddle.v2.fluid.core as core +import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets -import paddle.v2.fluid.optimizer as optimizer from paddle.v2.fluid.executor import Executor -import paddle.v2.fluid.framework as framework from paddle.v2.fluid.initializer import XavierInitializer +from paddle.v2.fluid.optimizer import AdamOptimizer def resnet_cifar10(input, depth=32): - def conv_bn_layer(input, - ch_out, - filter_size, - stride, - padding, - act='relu'): + def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'): tmp = layers.conv2d( input=input, filter_size=filter_size, @@ -24,9 +19,7 @@ def resnet_cifar10(input, depth=32): padding=padding, act=None, bias_attr=False) - return layers.batch_norm( - input=tmp, - act=act) + return layers.batch_norm(input=tmp, act=act) def shortcut(input, ch_in, ch_out, stride, program, init_program): if ch_in != ch_out: @@ -35,28 +28,11 @@ def resnet_cifar10(input, depth=32): else: return input - def basicblock(input, - ch_in, - ch_out, - stride): - tmp = conv_bn_layer( - input, - ch_out, - 3, - stride, - 1) - tmp = conv_bn_layer( - tmp, - ch_out, - 3, - 1, - 1, - act=None) + def basicblock(input, ch_in, ch_out, stride): + tmp = conv_bn_layer(input, ch_out, 3, stride, 1) + tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None) short = shortcut(input, ch_in, ch_out, stride) - return layers.elementwise_add( - x=tmp, - y=short, - act='relu') + return layers.elementwise_add(x=tmp, y=short, act='relu') def layer_warp(block_func, input, ch_in, ch_out, count, stride): tmp = block_func(input, ch_in, ch_out, stride) @@ -67,45 +43,17 @@ def resnet_cifar10(input, depth=32): assert (depth - 2) % 6 == 0 n = (depth - 2) / 6 conv1 = conv_bn_layer( - input=input, - ch_out=16, - filter_size=3, - stride=1, - padding=1) - res1 = layer_warp( - basicblock, - conv1, - 16, - 16, - n, - 1) - res2 = layer_warp( - basicblock, - res1, - 16, - 32, - n, - 2) - res3 = layer_warp( - basicblock, - res2, - 32, - 64, - n, - 2) + input=input, ch_out=16, filter_size=3, stride=1, padding=1) + res1 = layer_warp(basicblock, conv1, 16, 16, n, 1) + res2 = layer_warp(basicblock, res1, 16, 32, n, 2) + res3 = layer_warp(basicblock, res2, 32, 64, n, 2) pool = layers.pool2d( - input=res3, - pool_size=8, - pool_type='avg', - pool_stride=1) + input=res3, pool_size=8, pool_type='avg', pool_stride=1) return pool def vgg16_bn_drop(input): - def conv_block(input, - num_filter, - groups, - dropouts): + def conv_block(input, num_filter, groups, dropouts): return nets.img_conv_group( input=input, pool_size=2, @@ -123,22 +71,14 @@ def vgg16_bn_drop(input): conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) - drop = layers.dropout( - x=conv5, - dropout_prob=0.5) + drop = layers.dropout(x=conv5, dropout_prob=0.5) fc1 = layers.fc(input=drop, size=512, act=None, param_attr={"initializer": XavierInitializer()}) - reshape1 = layers.reshape( - x=fc1, - shape=list(fc1.shape + (1, 1))) - bn = layers.batch_norm( - input=reshape1, - act='relu') - drop2 = layers.dropout( - x=bn, - dropout_prob=0.5) + reshape1 = layers.reshape(x=fc1, shape=list(fc1.shape + (1, 1))) + bn = layers.batch_norm(input=reshape1, act='relu') + drop2 = layers.dropout(x=bn, dropout_prob=0.5) fc2 = layers.fc(input=drop2, size=512, act=None, @@ -165,8 +105,8 @@ cost = layers.cross_entropy(input=predict, label=label) avg_cost = layers.mean(x=cost) accuracy = layers.accuracy(input=predict, label=label) -# optimizer = optimizer.SGDOptimizer(learning_rate=0.001) -optimizer = optimizer.AdamOptimizer(learning_rate=0.001) +# optimizer = SGDOptimizer(learning_rate=0.001) +optimizer = AdamOptimizer(learning_rate=0.001) opts = optimizer.minimize(avg_cost) BATCH_SIZE = 128 diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py index f330ff5813..75fbaf83e8 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py @@ -1,22 +1,15 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers -import paddle.v2.fluid.nets as nets import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.evaluator as evaluator import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.nets as nets from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.optimizer import AdamOptimizer -import numpy as np - -images = layers.data( - name='pixel', - shape=[1, 28, 28], - data_type='float32') -label = layers.data( - name='label', - shape=[1], - data_type='int64') +images = layers.data(name='pixel', shape=[1, 28, 28], data_type='float32') +label = layers.data(name='label', shape=[1], data_type='int64') conv_pool_1 = nets.simple_img_conv_pool( input=images, filter_size=5, @@ -32,17 +25,13 @@ conv_pool_2 = nets.simple_img_conv_pool( pool_stride=2, act="relu") -predict = layers.fc(input=conv_pool_2, - size=10, - act="softmax") +predict = layers.fc(input=conv_pool_2, size=10, act="softmax") cost = layers.cross_entropy(input=predict, label=label) avg_cost = layers.mean(x=cost) -optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999) +optimizer = AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999) opts = optimizer.minimize(avg_cost) -accuracy, acc_out = evaluator.accuracy( - input=predict, - label=label) +accuracy, acc_out = evaluator.accuracy(input=predict, label=label) BATCH_SIZE = 50 PASS_NUM = 3 diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py index b0164e3e36..cf10b1942e 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py @@ -1,19 +1,15 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.regularizer import L2DecayRegularizer from paddle.v2.fluid.initializer import UniformInitializer - -import numpy as np +from paddle.v2.fluid.optimizer import MomentumOptimizer +from paddle.v2.fluid.regularizer import L2DecayRegularizer BATCH_SIZE = 128 -image = layers.data( - name='x', - shape=[784], - data_type='float32') +image = layers.data(name='x', shape=[784], data_type='float32') param_attr = { 'name': None, @@ -22,32 +18,21 @@ param_attr = { 'regularization': L2DecayRegularizer(0.0005 * BATCH_SIZE) } -hidden1 = layers.fc(input=image, - size=128, - act='relu', - param_attr=param_attr) -hidden2 = layers.fc(input=hidden1, - size=64, - act='relu', - param_attr=param_attr) +hidden1 = layers.fc(input=image, size=128, act='relu', param_attr=param_attr) +hidden2 = layers.fc(input=hidden1, size=64, act='relu', param_attr=param_attr) predict = layers.fc(input=hidden2, size=10, act='softmax', param_attr=param_attr) -label = layers.data( - name='y', - shape=[1], - data_type='int64') +label = layers.data(name='y', shape=[1], data_type='int64') cost = layers.cross_entropy(input=predict, label=label) avg_cost = layers.mean(x=cost) -accuracy = layers.accuracy( - input=predict, - label=label) +accuracy = layers.accuracy(input=predict, label=label) -optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9) +optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) opts = optimizer.minimize(avg_cost) train_reader = paddle.batch( diff --git a/python/paddle/v2/fluid/tests/book/test_recommender_system.py b/python/paddle/v2/fluid/tests/book/test_recommender_system.py index eefcb55beb..55ded3aed3 100644 --- a/python/paddle/v2/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/v2/fluid/tests/book/test_recommender_system.py @@ -1,12 +1,11 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers -import paddle.v2.fluid.nets as nets import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.nets as nets from paddle.v2.fluid.executor import Executor - -import numpy as np +from paddle.v2.fluid.optimizer import SGDOptimizer IS_SPARSE = True USE_GPU = False @@ -19,10 +18,7 @@ def get_usr_combined_features(): USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 - uid = layers.data( - name='user_id', - shape=[1], - data_type='int64') + uid = layers.data(name='user_id', shape=[1], data_type='int64') usr_emb = layers.embedding( input=uid, @@ -31,15 +27,11 @@ def get_usr_combined_features(): param_attr={'name': 'user_table'}, is_sparse=IS_SPARSE) - usr_fc = layers.fc(input=usr_emb, - size=32) + usr_fc = layers.fc(input=usr_emb, size=32) USR_GENDER_DICT_SIZE = 2 - usr_gender_id = layers.data( - name='gender_id', - shape=[1], - data_type='int64') + usr_gender_id = layers.data(name='gender_id', shape=[1], data_type='int64') usr_gender_emb = layers.embedding( input=usr_gender_id, @@ -47,14 +39,10 @@ def get_usr_combined_features(): param_attr={'name': 'gender_table'}, is_sparse=IS_SPARSE) - usr_gender_fc = layers.fc(input=usr_gender_emb, - size=16) + usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) - usr_age_id = layers.data( - name='age_id', - shape=[1], - data_type="int64") + usr_age_id = layers.data(name='age_id', shape=[1], data_type="int64") usr_age_emb = layers.embedding( input=usr_age_id, @@ -62,14 +50,10 @@ def get_usr_combined_features(): is_sparse=IS_SPARSE, param_attr={'name': 'age_table'}) - usr_age_fc = layers.fc(input=usr_age_emb, - size=16) + usr_age_fc = layers.fc(input=usr_age_emb, size=16) USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 - usr_job_id = layers.data( - name='job_id', - shape=[1], - data_type="int64") + usr_job_id = layers.data(name='job_id', shape=[1], data_type="int64") usr_job_emb = layers.embedding( input=usr_job_id, @@ -77,16 +61,12 @@ def get_usr_combined_features(): param_attr={'name': 'job_table'}, is_sparse=IS_SPARSE) - usr_job_fc = layers.fc(input=usr_job_emb, - size=16) + usr_job_fc = layers.fc(input=usr_job_emb, size=16) concat_embed = layers.concat( - input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], - axis=1) + input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1) - usr_combined_features = layers.fc(input=concat_embed, - size=200, - act="tanh") + usr_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") return usr_combined_features @@ -95,10 +75,7 @@ def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 - mov_id = layers.data( - name='movie_id', - shape=[1], - data_type='int64') + mov_id = layers.data(name='movie_id', shape=[1], data_type='int64') mov_emb = layers.embedding( input=mov_id, @@ -107,36 +84,24 @@ def get_mov_combined_features(): param_attr={'name': 'movie_table'}, is_sparse=IS_SPARSE) - mov_fc = layers.fc(input=mov_emb, - size=32) + mov_fc = layers.fc(input=mov_emb, size=32) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) - category_id = layers.data( - name='category_id', - shape=[1], - data_type='int64') + category_id = layers.data(name='category_id', shape=[1], data_type='int64') mov_categories_emb = layers.embedding( - input=category_id, - size=[CATEGORY_DICT_SIZE, 32], - is_sparse=IS_SPARSE) + input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_categories_hidden = layers.sequence_pool( - input=mov_categories_emb, - pool_type="sum") + input=mov_categories_emb, pool_type="sum") MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) - mov_title_id = layers.data( - name='movie_title', - shape=[1], - data_type='int64') + mov_title_id = layers.data(name='movie_title', shape=[1], data_type='int64') mov_title_emb = layers.embedding( - input=mov_title_id, - size=[MOV_TITLE_DICT_SIZE, 32], - is_sparse=IS_SPARSE) + input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_title_conv = nets.sequence_conv_pool( input=mov_title_emb, @@ -146,13 +111,10 @@ def get_mov_combined_features(): pool_type="sum") concat_embed = layers.concat( - input=[mov_fc, mov_categories_hidden, mov_title_conv], - axis=1) + input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1) # FIXME(dzh) : need tanh operator - mov_combined_features = layers.fc(input=concat_embed, - size=200, - act="tanh") + mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") return mov_combined_features @@ -162,18 +124,11 @@ def model(): mov_combined_features = get_mov_combined_features() # need cos sim - inference = layers.cos_sim( - X=usr_combined_features, - Y=mov_combined_features) + inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features) - label = layers.data( - name='score', - shape=[1], - data_type='float32') + label = layers.data(name='score', shape=[1], data_type='float32') - square_cost = layers.square_error_cost( - input=inference, - label=label) + square_cost = layers.square_error_cost(input=inference, label=label) avg_cost = layers.mean(x=square_cost) @@ -182,7 +137,7 @@ def model(): def main(): cost = model() - sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.2) + sgd_optimizer = SGDOptimizer(learning_rate=0.2) opts = sgd_optimizer.minimize(cost) if USE_GPU: diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py index 91fc79a987..e69b915a9c 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py @@ -1,12 +1,11 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers -import paddle.v2.fluid.nets as nets import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.nets as nets from paddle.v2.fluid.executor import Executor - -import numpy as np +from paddle.v2.fluid.optimizer import AdamOptimizer def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32): @@ -31,7 +30,7 @@ def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32): act="softmax") cost = layers.cross_entropy(input=prediction, label=label) avg_cost = layers.mean(x=cost) - adam_optimizer = optimizer.AdamOptimizer(learning_rate=0.002) + adam_optimizer = AdamOptimizer(learning_rate=0.002) opts = adam_optimizer.minimize(avg_cost) acc = layers.accuracy(input=prediction, label=label) return avg_cost, acc diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py index 8c3d448835..65d4454250 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py @@ -1,12 +1,10 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers -import paddle.v2.fluid.nets as nets import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor - -import numpy as np +from paddle.v2.fluid.optimizer import AdamOptimizer def stacked_lstm_net(input_dim, @@ -41,7 +39,7 @@ def stacked_lstm_net(input_dim, act='softmax') cost = layers.cross_entropy(input=prediction, label=label) avg_cost = layers.mean(x=cost) - adam_optimizer = optimizer.AdamOptimizer(learning_rate=0.002) + adam_optimizer = AdamOptimizer(learning_rate=0.002) opts = adam_optimizer.minimize(avg_cost) acc = layers.accuracy(input=prediction, label=label) return avg_cost, acc diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py index a7d791c1f3..280f6e902c 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py @@ -1,11 +1,10 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor - -import numpy as np +from paddle.v2.fluid.optimizer import AdamOptimizer def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50): @@ -33,7 +32,7 @@ def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50): cost = layers.cross_entropy(input=prediction, label=label) avg_cost = layers.mean(x=cost) - adam_optimizer = optimizer.AdamOptimizer(learning_rate=0.002) + adam_optimizer = AdamOptimizer(learning_rate=0.002) opts = adam_optimizer.minimize(avg_cost) acc = layers.accuracy(input=prediction, label=label) diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py index 9dcb6f2fea..afa7b28519 100644 --- a/python/paddle/v2/fluid/tests/book/test_word2vec.py +++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py @@ -1,11 +1,10 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor - -import numpy as np +from paddle.v2.fluid.optimizer import SGDOptimizer PASS_NUM = 100 EMBED_SIZE = 32 @@ -17,26 +16,11 @@ IS_SPARSE = True word_dict = paddle.dataset.imikolov.build_dict() dict_size = len(word_dict) -first_word = layers.data( - name='firstw', - shape=[1], - data_type='int64') -second_word = layers.data( - name='secondw', - shape=[1], - data_type='int64') -third_word = layers.data( - name='thirdw', - shape=[1], - data_type='int64') -forth_word = layers.data( - name='forthw', - shape=[1], - data_type='int64') -next_word = layers.data( - name='nextw', - shape=[1], - data_type='int64') +first_word = layers.data(name='firstw', shape=[1], data_type='int64') +second_word = layers.data(name='secondw', shape=[1], data_type='int64') +third_word = layers.data(name='thirdw', shape=[1], data_type='int64') +forth_word = layers.data(name='forthw', shape=[1], data_type='int64') +next_word = layers.data(name='nextw', shape=[1], data_type='int64') embed_first = layers.embedding( input=first_word, @@ -64,19 +48,12 @@ embed_forth = layers.embedding( param_attr={'name': 'shared_w'}) concat_embed = layers.concat( - input=[embed_first, embed_second, embed_third, embed_forth], - axis=1) -hidden1 = layers.fc(input=concat_embed, - size=HIDDEN_SIZE, - act='sigmoid') -predict_word = layers.fc(input=hidden1, - size=dict_size, - act='softmax') -cost = layers.cross_entropy( - input=predict_word, - label=next_word) + input=[embed_first, embed_second, embed_third, embed_forth], axis=1) +hidden1 = layers.fc(input=concat_embed, size=HIDDEN_SIZE, act='sigmoid') +predict_word = layers.fc(input=hidden1, size=dict_size, act='softmax') +cost = layers.cross_entropy(input=predict_word, label=next_word) avg_cost = layers.mean(x=cost) -sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) +sgd_optimizer = SGDOptimizer(learning_rate=0.001) opts = sgd_optimizer.minimize(avg_cost) train_reader = paddle.batch( -- GitLab