From d27d28f84f4dd5e7905bf5fdf10127abbbb9d5ff Mon Sep 17 00:00:00 2001 From: tangwei12 Date: Thu, 22 Nov 2018 18:37:08 +0800 Subject: [PATCH] code format --- fluid/PaddleRec/word2vec/network_conf.py | 64 +++++++++--------------- fluid/PaddleRec/word2vec/train.py | 21 +++++++- 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/fluid/PaddleRec/word2vec/network_conf.py b/fluid/PaddleRec/word2vec/network_conf.py index f9eb4ca5..c3447dd4 100644 --- a/fluid/PaddleRec/word2vec/network_conf.py +++ b/fluid/PaddleRec/word2vec/network_conf.py @@ -54,23 +54,8 @@ def skip_gram_word2vec(dict_size, return cost - def hsigmoid_layer(input, label, non_leaf_num, max_code_length, data_list): - hs_cost = None - ptable = None - pcode = None - if max_code_length != None: - ptable = fluid.layers.data( - name='ptable', shape=[max_code_length], dtype='int64') - pcode = fluid.layers.data( - name='pcode', shape=[max_code_length], dtype='int64') - data_list.append(pcode) - data_list.append(ptable) - else: - ptable = fluid.layers.data(name='ptable', shape=[40], dtype='int64') - pcode = fluid.layers.data(name='pcode', shape=[40], dtype='int64') - data_list.append(pcode) - data_list.append(ptable) - if non_leaf_num == None: + def hsigmoid_layer(input, label, ptable, pcode, non_leaf_num): + if non_leaf_num is None: non_leaf_num = dict_size cost = fluid.layers.hsigmoid( @@ -83,46 +68,47 @@ def skip_gram_word2vec(dict_size, return cost - data_shapes = [] - data_lod_levels = [] - data_types = [] - - # input_word - data_shapes.append((-1, 1)) - data_lod_levels.append(1) - data_types.append('int64') - # predict_word - data_shapes.append((-1, 1)) - data_lod_levels.append(1) - data_types.append('int64') - datas = [] input_word = fluid.layers.data(name="input_word", shape=[1], dtype='int64') predict_word = fluid.layers.data(name='predict_word', shape=[1], dtype='int64') + datas.append(input_word) + datas.append(predict_word) + + if with_hsigmoid: + if max_code_length: + ptable = fluid.layers.data( + name='ptable', shape=[max_code_length], dtype='int64') + pcode = fluid.layers.data( + name='pcode', shape=[max_code_length], dtype='int64') + else: + ptable = fluid.layers.data(name='ptable', shape=[40], dtype='int64') + pcode = fluid.layers.data(name='pcode', shape=[40], dtype='int64') + datas.append(ptable) + datas.append(pcode) + + py_reader = fluid.layers.create_py_reader_by_data(capacity=64, + feed_list=datas, + name='py_reader', + use_double_buffer=True) - datas.append(input_word, predict_word) + words = fluid.layers.read_file(py_reader) cost = None emb = fluid.layers.embedding( - input=input_word, + input=words[0], is_sparse=is_sparse, size=[dict_size, embedding_size], param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( scale=1 / math.sqrt(dict_size)))) if with_nce: - cost = nce_layer(emb, predict_word, embedding_size, dict_size, 5, "uniform", + cost = nce_layer(emb, words[1], embedding_size, dict_size, 5, "uniform", word_frequencys, None) if with_hsigmoid: - cost = hsigmoid_layer(emb, predict_word, dict_size, max_code_length, datas) + cost = hsigmoid_layer(emb, words[1], words[2], words[3], dict_size) avg_cost = fluid.layers.reduce_mean(cost) - py_reader = fluid.layers.create_py_reader_by_data(capacity=64, - feed_list=datas, - name='py_reader', - use_double_buffer=True) - return avg_cost, py_reader diff --git a/fluid/PaddleRec/word2vec/train.py b/fluid/PaddleRec/word2vec/train.py index 8b9417dc..1627812d 100644 --- a/fluid/PaddleRec/word2vec/train.py +++ b/fluid/PaddleRec/word2vec/train.py @@ -1,3 +1,20 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +train for word2vec +""" + from __future__ import print_function import argparse @@ -187,7 +204,7 @@ def train(): optimizer = fluid.optimizer.Adam(learning_rate=1e-3) optimizer.minimize(loss) - if os.environ["PADDLE_IS_LOCAL"] == "1": + if os.getenv("PADDLE_IS_LOCAL", "1") == "1": logger.info("run local training") main_program = fluid.default_main_program() train_loop(args, main_program, word2vec_reader, py_reader, loss, 0) @@ -235,4 +252,4 @@ def train(): if __name__ == '__main__': - train() + train() \ No newline at end of file -- GitLab