提交 efb55496 编写于 作者: Z Zeyu Chen

move all test file tests dir

上级 62fd63c9
# coding: utf-8
import sys
# NOTE: just hack for fast test
sys.path.append("../")
sys.path.append("../paddle_hub/")
import os
import time
import unittest
......
......@@ -38,16 +38,18 @@ def mkdir(path):
class Module(object):
def __init__(self, module_url):
def __init__(self, module_url=None, module_dir=None):
if module_url == None and module_dir == None:
raise Exception("Module:module_url and module_dir are None!")
# donwload module
if module_url.startswith("http"):
if module_url is not None and module_url.startswith("http"):
# if it's remote url link, then download and uncompress it
self.module_name, self.module_dir = download_and_uncompress(
module_url)
else:
elif module_dir is not None:
# otherwise it's local path, no need to deal with it
print("Module.__init__", module_url)
self.module_dir = module_url
# use the path name as module name by default
self.module_name = module_url.split("/")[-1]
# load paddle inference model
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from module import *
class TestModule(unittest.TestCase):
def test_word2vec_module_usage(self):
module_link = "http://paddlehub.cdn.bcebos.com/word2vec/w2v_saved_inference_module.tar.gz"
module = Module(module_link)
inputs = [["it", "is", "new"], ["hello", "world"]]
tensor = module._process_input(inputs)
print(tensor)
result = module(inputs)
print(result)
if __name__ == "__main__":
unittest.main()
......@@ -13,13 +13,13 @@
# limitations under the License.
import unittest
import downloader as dl
import paddle_hub as hub
class TestDownloader(unittest.TestCase):
def test_download(self):
link = "http://paddlehub.bj.bcebos.com/word2vec/word2vec-dim16-simple-example-1.tar.gz"
module_path = dl.download_and_uncompress(link)
link = "http://paddlehub.bj.bcebos.com/word2vec/word2vec-dim16-simple-example-2.tar.gz"
module_path = hub.download_and_uncompress(link)
if __name__ == "__main__":
......
......@@ -171,7 +171,7 @@ def train(use_cuda=False):
fluid.io.save_persistables(
executor=exe, dirname=model_dir + "_save_persistables")
saved_model_dir = "./tmp/w2v_saved_inference_model"
saved_model_dir = "./tmp/word2vec_inference_model"
# save inference model including feed and fetch variable info
fluid.io.save_inference_model(
dirname=saved_model_dir,
......@@ -205,7 +205,7 @@ def test_save_module(use_cuda=False):
words, word_emb = module_fn()
exe.run(startup_program)
# load inference embedding parameters
saved_model_dir = "./tmp/w2v_saved_inference_model"
saved_model_dir = "./tmp/word2vec_inference_model"
fluid.io.load_inference_model(executor=exe, dirname=saved_model_dir)
feed_var_list = [main_program.global_block().var("words")]
......
# coding=utf-8
from __future__ import print_function
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import paddle_hub as hub
import unittest
import os
EMBED_SIZE = 64
HIDDEN_SIZE = 256
N = 5
BATCH_SIZE = 1
PASS_NUM = 100
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
_MOCK_DATA = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]
def mock_data():
for d in _MOCK_DATA:
yield d
batch_reader = paddle.batch(mock_data, BATCH_SIZE)
def word2vec(words, is_sparse):
embed_first = fluid.layers.embedding(
input=words[0],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='embedding')
embed_second = fluid.layers.embedding(
input=words[1],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='embedding')
embed_third = fluid.layers.embedding(
input=words[2],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='embedding')
embed_fourth = fluid.layers.embedding(
input=words[3],
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=is_sparse,
param_attr='embedding')
concat_emb = fluid.layers.concat(
input=[embed_first, embed_second, embed_third, embed_fourth], axis=1)
hidden1 = fluid.layers.fc(input=concat_emb, size=HIDDEN_SIZE, act='sigmoid')
predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax')
# declare later than predict word
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
avg_cost = fluid.layers.mean(cost)
return avg_cost
def train():
place = fluid.CPUPlace()
first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
forth_word = fluid.layers.data(name='fourthw', shape=[1], dtype='int64')
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
word_list = [first_word, second_word, third_word, forth_word, next_word]
avg_cost = word2vec(word_list, is_sparse=True)
main_program = fluid.default_main_program()
startup_program = fluid.default_startup_program()
sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=1e-3)
sgd_optimizer.minimize(avg_cost)
exe = fluid.Executor(place)
exe.run(startup_program) # initialization
for epoch in range(0, PASS_NUM):
for mini_batch in batch_reader():
# 定义输入变量
feed_var_list = [
main_program.global_block().var("firstw"),
main_program.global_block().var("secondw"),
main_program.global_block().var("thirdw"),
main_program.global_block().var("fourthw"),
main_program.global_block().var("nextw")
]
feeder = fluid.DataFeeder(feed_list=feed_var_list, place=place)
cost = exe.run(
main_program,
feed=feeder.feed(mini_batch),
fetch_list=[avg_cost])
print("Cost = %f" % cost[0])
model_dir = "./w2v_model"
var_list_to_saved = [main_program.global_block().var("embedding")]
print("saving model to %s" % model_dir)
fluid.io.save_vars(
executor=exe, dirname="./w2v_model/", vars=var_list_to_saved)
if __name__ == "__main__":
train()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册