move all test file tests dir

efb55496 · Zeyu Chen · 62fd63c9 · efb55496 · efb55496 · 62fd63c9
6 changed file
--- a/Senta/sentiment_classify.py
+++ b/Senta/sentiment_classify.py
 # coding: utf-8
 import sys
-# NOTE: just hack for fast test
-sys.path.append("../")
-sys.path.append("../paddle_hub/")
 import os
 import time
 import unittest

--- a/paddle_hub/module.py
+++ b/paddle_hub/module.py
@@ -38,16 +38,18 @@ def mkdir(path):


 class Module(object):
-    def __init__(self, module_url):
+    def __init__(self, module_url=None, module_dir=None):
+        if module_url == None and module_dir == None:
+            raise Exception("Module:module_url and module_dir are None!")
        # donwload module
-        if module_url.startswith("http"):
+        if module_url is not None and module_url.startswith("http"):
            # if it's remote url link, then download and uncompress it
            self.module_name, self.module_dir = download_and_uncompress(
                module_url)
-        else:
+        elif module_dir is not None:
            # otherwise it's local path, no need to deal with it
-            print("Module.__init__", module_url)
            self.module_dir = module_url
+            # use the path name as module name by default
            self.module_name = module_url.split("/")[-1]

        # load paddle inference model

--- a/paddle_hub/test_module.py
+++ b/paddle_hub/test_module.py
-#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-from module import *
-
-
-class TestModule(unittest.TestCase):
-    def test_word2vec_module_usage(self):
-        module_link = "http://paddlehub.cdn.bcebos.com/word2vec/w2v_saved_inference_module.tar.gz"
-        module = Module(module_link)
-        inputs = [["it", "is", "new"], ["hello", "world"]]
-        tensor = module._process_input(inputs)
-        print(tensor)
-        result = module(inputs)
-        print(result)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/paddle_hub/test_downloader.py
+++ b/paddle_hub/test_downloader.py
@@ -13,13 +13,13 @@
 # limitations under the License.

 import unittest
-import downloader as dl
+import paddle_hub as hub


 class TestDownloader(unittest.TestCase):
    def test_download(self):
-        link = "http://paddlehub.bj.bcebos.com/word2vec/word2vec-dim16-simple-example-1.tar.gz"
-        module_path = dl.download_and_uncompress(link)
+        link = "http://paddlehub.bj.bcebos.com/word2vec/word2vec-dim16-simple-example-2.tar.gz"
+        module_path = hub.download_and_uncompress(link)


 if __name__ == "__main__":

--- a/test_export_n_load_module.py
+++ b/test_export_n_load_module.py
@@ -171,7 +171,7 @@ def train(use_cuda=False):
    fluid.io.save_persistables(
        executor=exe, dirname=model_dir + "_save_persistables")

-    saved_model_dir = "./tmp/w2v_saved_inference_model"
+    saved_model_dir = "./tmp/word2vec_inference_model"
    # save inference model including feed and fetch variable info
    fluid.io.save_inference_model(
        dirname=saved_model_dir,
@@ -205,7 +205,7 @@ def test_save_module(use_cuda=False):
        words, word_emb = module_fn()
        exe.run(startup_program)
        # load inference embedding parameters
-        saved_model_dir = "./tmp/w2v_saved_inference_model"
+        saved_model_dir = "./tmp/word2vec_inference_model"
        fluid.io.load_inference_model(executor=exe, dirname=saved_model_dir)

        feed_var_list = [main_program.global_block().var("words")]

--- a/tests/test_train_w2v.py
+++ b/tests/test_train_w2v.py
+# coding=utf-8
+from __future__ import print_function
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.fluid as fluid
+import paddle_hub as hub
+import unittest
+import os
+
+EMBED_SIZE = 64
+HIDDEN_SIZE = 256
+N = 5
+BATCH_SIZE = 1
+PASS_NUM = 100
+
+word_dict = paddle.dataset.imikolov.build_dict()
+dict_size = len(word_dict)
+
+_MOCK_DATA = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]
+
+
+def mock_data():
+    for d in _MOCK_DATA:
+        yield d
+
+
+batch_reader = paddle.batch(mock_data, BATCH_SIZE)
+
+
+def word2vec(words, is_sparse):
+    embed_first = fluid.layers.embedding(
+        input=words[0],
+        size=[dict_size, EMBED_SIZE],
+        dtype='float32',
+        is_sparse=is_sparse,
+        param_attr='embedding')
+    embed_second = fluid.layers.embedding(
+        input=words[1],
+        size=[dict_size, EMBED_SIZE],
+        dtype='float32',
+        is_sparse=is_sparse,
+        param_attr='embedding')
+    embed_third = fluid.layers.embedding(
+        input=words[2],
+        size=[dict_size, EMBED_SIZE],
+        dtype='float32',
+        is_sparse=is_sparse,
+        param_attr='embedding')
+    embed_fourth = fluid.layers.embedding(
+        input=words[3],
+        size=[dict_size, EMBED_SIZE],
+        dtype='float32',
+        is_sparse=is_sparse,
+        param_attr='embedding')
+
+    concat_emb = fluid.layers.concat(
+        input=[embed_first, embed_second, embed_third, embed_fourth], axis=1)
+    hidden1 = fluid.layers.fc(input=concat_emb, size=HIDDEN_SIZE, act='sigmoid')
+    predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax')
+
+    # declare later than predict word
+    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
+
+    cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
+    avg_cost = fluid.layers.mean(cost)
+
+    return avg_cost
+
+
+def train():
+    place = fluid.CPUPlace()
+
+    first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
+    second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
+    third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
+    forth_word = fluid.layers.data(name='fourthw', shape=[1], dtype='int64')
+    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
+
+    word_list = [first_word, second_word, third_word, forth_word, next_word]
+    avg_cost = word2vec(word_list, is_sparse=True)
+
+    main_program = fluid.default_main_program()
+    startup_program = fluid.default_startup_program()
+
+    sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=1e-3)
+
+    sgd_optimizer.minimize(avg_cost)
+    exe = fluid.Executor(place)
+    exe.run(startup_program)  # initialization
+
+    for epoch in range(0, PASS_NUM):
+        for mini_batch in batch_reader():
+            # 定义输入变量
+            feed_var_list = [
+                main_program.global_block().var("firstw"),
+                main_program.global_block().var("secondw"),
+                main_program.global_block().var("thirdw"),
+                main_program.global_block().var("fourthw"),
+                main_program.global_block().var("nextw")
+            ]
+            feeder = fluid.DataFeeder(feed_list=feed_var_list, place=place)
+            cost = exe.run(
+                main_program,
+                feed=feeder.feed(mini_batch),
+                fetch_list=[avg_cost])
+            print("Cost = %f" % cost[0])
+
+    model_dir = "./w2v_model"
+    var_list_to_saved = [main_program.global_block().var("embedding")]
+    print("saving model to %s" % model_dir)
+    fluid.io.save_vars(
+        executor=exe, dirname="./w2v_model/", vars=var_list_to_saved)
+
+
+if __name__ == "__main__":
+    train()