提交 d27d28f8 编写于 作者: T tangwei12

code format

上级 f8cddad5
...@@ -54,23 +54,8 @@ def skip_gram_word2vec(dict_size, ...@@ -54,23 +54,8 @@ def skip_gram_word2vec(dict_size,
return cost return cost
def hsigmoid_layer(input, label, non_leaf_num, max_code_length, data_list): def hsigmoid_layer(input, label, ptable, pcode, non_leaf_num):
hs_cost = None if non_leaf_num is None:
ptable = None
pcode = None
if max_code_length != None:
ptable = fluid.layers.data(
name='ptable', shape=[max_code_length], dtype='int64')
pcode = fluid.layers.data(
name='pcode', shape=[max_code_length], dtype='int64')
data_list.append(pcode)
data_list.append(ptable)
else:
ptable = fluid.layers.data(name='ptable', shape=[40], dtype='int64')
pcode = fluid.layers.data(name='pcode', shape=[40], dtype='int64')
data_list.append(pcode)
data_list.append(ptable)
if non_leaf_num == None:
non_leaf_num = dict_size non_leaf_num = dict_size
cost = fluid.layers.hsigmoid( cost = fluid.layers.hsigmoid(
...@@ -83,46 +68,47 @@ def skip_gram_word2vec(dict_size, ...@@ -83,46 +68,47 @@ def skip_gram_word2vec(dict_size,
return cost return cost
data_shapes = []
data_lod_levels = []
data_types = []
# input_word
data_shapes.append((-1, 1))
data_lod_levels.append(1)
data_types.append('int64')
# predict_word
data_shapes.append((-1, 1))
data_lod_levels.append(1)
data_types.append('int64')
datas = [] datas = []
input_word = fluid.layers.data(name="input_word", shape=[1], dtype='int64') input_word = fluid.layers.data(name="input_word", shape=[1], dtype='int64')
predict_word = fluid.layers.data(name='predict_word', shape=[1], dtype='int64') predict_word = fluid.layers.data(name='predict_word', shape=[1], dtype='int64')
datas.append(input_word)
datas.append(predict_word)
if with_hsigmoid:
if max_code_length:
ptable = fluid.layers.data(
name='ptable', shape=[max_code_length], dtype='int64')
pcode = fluid.layers.data(
name='pcode', shape=[max_code_length], dtype='int64')
else:
ptable = fluid.layers.data(name='ptable', shape=[40], dtype='int64')
pcode = fluid.layers.data(name='pcode', shape=[40], dtype='int64')
datas.append(ptable)
datas.append(pcode)
py_reader = fluid.layers.create_py_reader_by_data(capacity=64,
feed_list=datas,
name='py_reader',
use_double_buffer=True)
datas.append(input_word, predict_word) words = fluid.layers.read_file(py_reader)
cost = None cost = None
emb = fluid.layers.embedding( emb = fluid.layers.embedding(
input=input_word, input=words[0],
is_sparse=is_sparse, is_sparse=is_sparse,
size=[dict_size, embedding_size], size=[dict_size, embedding_size],
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(dict_size)))) scale=1 / math.sqrt(dict_size))))
if with_nce: if with_nce:
cost = nce_layer(emb, predict_word, embedding_size, dict_size, 5, "uniform", cost = nce_layer(emb, words[1], embedding_size, dict_size, 5, "uniform",
word_frequencys, None) word_frequencys, None)
if with_hsigmoid: if with_hsigmoid:
cost = hsigmoid_layer(emb, predict_word, dict_size, max_code_length, datas) cost = hsigmoid_layer(emb, words[1], words[2], words[3], dict_size)
avg_cost = fluid.layers.reduce_mean(cost) avg_cost = fluid.layers.reduce_mean(cost)
py_reader = fluid.layers.create_py_reader_by_data(capacity=64,
feed_list=datas,
name='py_reader',
use_double_buffer=True)
return avg_cost, py_reader return avg_cost, py_reader
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
train for word2vec
"""
from __future__ import print_function from __future__ import print_function
import argparse import argparse
...@@ -187,7 +204,7 @@ def train(): ...@@ -187,7 +204,7 @@ def train():
optimizer = fluid.optimizer.Adam(learning_rate=1e-3) optimizer = fluid.optimizer.Adam(learning_rate=1e-3)
optimizer.minimize(loss) optimizer.minimize(loss)
if os.environ["PADDLE_IS_LOCAL"] == "1": if os.getenv("PADDLE_IS_LOCAL", "1") == "1":
logger.info("run local training") logger.info("run local training")
main_program = fluid.default_main_program() main_program = fluid.default_main_program()
train_loop(args, main_program, word2vec_reader, py_reader, loss, 0) train_loop(args, main_program, word2vec_reader, py_reader, loss, 0)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册