diff --git a/paddle_hub/module.py b/paddle_hub/module.py index 74e80167ae982d7642c890025873d630f2f78088..ee8a420995e2ba6a82e4b5e84e71ff149499950e 100644 --- a/paddle_hub/module.py +++ b/paddle_hub/module.py @@ -52,7 +52,7 @@ def mkdir(path): class Module(object): """ - A module represents a + Core object of PaddleHub """ def __init__(self, module_url=None, module_dir=None): @@ -85,12 +85,10 @@ class Module(object): # remove feed fetch operator and variable ModuleUtils.remove_feed_fetch_op(self.inference_program) - print("inference_program") - print(self.inference_program) - print("feed_target_names") - print(self.feed_target_names) - print("fetch_targets") - print(self.fetch_targets) + # print("inference_program") + # print(self.inference_program) + print("**feed_target_names**\n{}".format(self.feed_target_names)) + print("**fetch_targets**\n{}".format(self.fetch_targets)) self.config = ModuleConfig(self.module_dir) self.config.load() @@ -105,7 +103,6 @@ class Module(object): def _process_parameter(self): global_block = self.inference_program.global_block() - filepath = os.path.join(self.module_dir, "param.pkl") param_path = ModuleConfig.meta_param_path(self.module_dir) with open(param_path, "rb") as file: param_arr = pickle.load(file) @@ -123,16 +120,6 @@ class Module(object): stop_gradient=var.stop_gradient, is_data=var.is_data) - def _construct_feed_dict(self, inputs): - """ Construct feed dict according to user's inputs and module config. - """ - feed_dict = {} - for k in inputs: - if k in self.feed_target_names: - feed_dict[k] = inputs[k] - - return feed_dict - def __call__(self, sign_name="default", trainable=False): """ Call default signature and return results """ @@ -153,77 +140,84 @@ class Module(object): return self.feed_target_names, self.fetch_targets, program - def get_vars(self): - """ - Return variable list of the module program - """ - return self.inference_program.list_vars() - - def get_feed_var(self, key, signature="default"): - """ - Get feed variable according to variable key and signature - """ - for var in self.inference_program.list_vars(): - if var.name == self.config.feed_var_name(key, signature): - return var - - raise Exception("Can't find input var {}".format(key)) - - def get_feed_var_by_index(self, index, signature="default"): - feed_vars = self.get_feed_vars(signature) - assert index < len( - feed_vars), "index out of range index {}, len {}".format( - index, len(feed_vars)) - return feed_vars[index] - - def get_fetch_var_by_index(self, index, signature="default"): - fetch_vars = self.get_fetch_vars(signature) - assert index < len( - fetch_vars), "index out of range index {}, len {}".format( - index, len(fetch_vars)) - return fetch_vars[index] - - def get_feed_vars(self, signature="default"): - """ - Get feed variable according to variable key and signature - """ - feed_vars = [] - for feed_var in self.config.feed_var_names(signature): - find_var = False - for var in self.inference_program.list_vars(): - if var.name == feed_var.var_name: - feed_vars.append(var) - find_var = True - if not find_var: - raise Exception("Can't find feed var {}".format(feed_var_name)) - - return feed_vars - - def get_fetch_vars(self, signature="default"): - """ - Get feed variable according to variable key and signature - """ - fetch_vars = [] - #TODO(ZeyuChen): use brute force to find variables, simple and easy to - #understand - for fetch_var in self.config.fetch_var_names(signature): - find_var = False - for var in self.inference_program.list_vars(): - if var.name == fetch_var.var_name: - fetch_vars.append(var) - find_var = True - if not find_var: - raise Exception("Can't find feed var {}".format(fetch_var_name)) - - return fetch_vars - - def get_fetch_var(self, key, signature="default"): - """ - Get fetch variable according to variable key and signature - """ - for var in self.inference_program.list_vars(): - if var.name == self.config.fetch_var_name(key, signature): - return var + # @deprecated + # def get_vars(self): + # """ + # Return variable list of the module program + # """ + # return self.inference_program.list_vars() + + # @deprecated + # def get_feed_var(self, key, signature="default"): + # """ + # Get feed variable according to variable key and signature + # """ + # for var in self.inference_program.list_vars(): + # if var.name == self.config.feed_var_name(key, signature): + # return var + + # raise Exception("Can't find input var {}".format(key)) + + # @deprecated + # def get_feed_var_by_index(self, index, signature="default"): + # feed_vars = self.get_feed_vars(signature) + # assert index < len( + # feed_vars), "index out of range index {}, len {}".format( + # index, len(feed_vars)) + # return feed_vars[index] + + # @deprecated + # def get_fetch_var_by_index(self, index, signature="default"): + # fetch_vars = self.get_fetch_vars(signature) + # assert index < len( + # fetch_vars), "index out of range index {}, len {}".format( + # index, len(fetch_vars)) + # return fetch_vars[index] + + # @deprecated + # def get_feed_vars(self, signature="default"): + # """ + # Get feed variable according to variable key and signature + # """ + # feed_vars = [] + # for feed_var in self.config.feed_var_names(signature): + # find_var = False + # for var in self.inference_program.list_vars(): + # if var.name == feed_var.var_name: + # feed_vars.append(var) + # find_var = True + # if not find_var: + # raise Exception("Can't find feed var {}".format(feed_var_name)) + + # return feed_vars + + # @deprecated + # def get_fetch_vars(self, signature="default"): + # """ + # Get feed variable according to variable key and signature + # """ + # fetch_vars = [] + # #TODO(ZeyuChen): use brute force to find variables, simple and easy to + # #understand + # for fetch_var in self.config.fetch_var_names(signature): + # find_var = False + # for var in self.inference_program.list_vars(): + # if var.name == fetch_var.var_name: + # fetch_vars.append(var) + # find_var = True + # if not find_var: + # raise Exception("Can't find feed var {}".format(fetch_var_name)) + + # return fetch_vars + + # @deprecated + # def get_fetch_var(self, key, signature="default"): + # """ + # Get fetch variable according to variable key and signature + # """ + # for var in self.inference_program.list_vars(): + # if var.name == self.config.fetch_var_name(key, signature): + # return var def get_inference_program(self): return self.inference_program diff --git a/tests/test_export_n_load_module.py b/tests/test_export_n_load_module.py index 671359ea1fb7c63a6263ee4b510338d92b1a3df4..4054bbe3eebe53f2e5c4e85571a2bbd99f402719 100644 --- a/tests/test_export_n_load_module.py +++ b/tests/test_export_n_load_module.py @@ -29,7 +29,7 @@ EMBED_SIZE = 16 HIDDEN_SIZE = 256 N = 5 BATCH_SIZE = 64 -PASS_NUM = 1 +PASS_NUM = 1000 word_dict = paddle.dataset.imikolov.build_dict() dict_size = len(word_dict) @@ -48,27 +48,6 @@ batch_reader = paddle.batch(mock_data, BATCH_SIZE) batch_size = 0 for d in batch_reader(): batch_size += 1 -print("imikolov simple dataset batch_size =", batch_size) - - -def module_fn(trainable=False): - # Define module function for saving module - # create word input - words = fluid.layers.data( - name="words", shape=[1], lod_level=1, dtype="int64") - - # create embedding - emb_name = "w2v_emb" - emb_param_attr = fluid.ParamAttr(name=emb_name, trainable=trainable) - word_emb = fluid.layers.embedding( - input=words, - size=[dict_size, EMBED_SIZE], - dtype='float32', - is_sparse=True, - param_attr=emb_param_attr) - - # return feeder and fetch_list - return words, word_emb def word2vec(words, is_sparse, trainable=True): @@ -101,19 +80,31 @@ def word2vec(words, is_sparse, trainable=True): concat_emb = fluid.layers.concat( input=[embed_first, embed_second, embed_third, embed_fourth], axis=1) hidden1 = fluid.layers.fc(input=concat_emb, size=HIDDEN_SIZE, act='sigmoid') - predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax') + pred_prob = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax') # declare later than predict word next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') - cost = fluid.layers.cross_entropy(input=predict_word, label=next_word) + cost = fluid.layers.cross_entropy(input=pred_prob, label=next_word) avg_cost = fluid.layers.mean(cost) - return predict_word, avg_cost + return pred_prob, avg_cost + + +def get_dictionary(word_dict): + dictionary = defaultdict(int) + w_id = 0 + for w in word_dict: + if isinstance(w, bytes): + w = w.decode("ascii") + dictionary[w] = w_id + w_id += 1 + + return dictionary -def train(use_cuda=False): - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() +def test_create_w2v_module(use_gpu=False): + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64') second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64') @@ -122,12 +113,12 @@ def train(use_cuda=False): next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') word_list = [first_word, second_word, third_word, forth_word, next_word] - predict_word, avg_cost = word2vec(word_list, is_sparse=True) + pred_prob, avg_cost = word2vec(word_list, is_sparse=True) main_program = fluid.default_main_program() startup_program = fluid.default_startup_program() - sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=1e-3) + sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=1e-2) sgd_optimizer.minimize(avg_cost) exe = fluid.Executor(place) @@ -136,8 +127,6 @@ def train(use_cuda=False): step = 0 for epoch in range(0, PASS_NUM): for mini_batch in batch_reader(): - # print("mini_batch", mini_batch) - # 定义输入变量 feed_var_list = [ main_program.global_block().var("firstw"), main_program.global_block().var("secondw"), @@ -154,90 +143,52 @@ def train(use_cuda=False): if step % 100 == 0: print("Epoch={} Step={} Cost={}".format(epoch, step, cost[0])) - saved_model_dir = "./tmp/word2vec_test_model" + saved_module_dir = "./tmp/word2vec_test_module" # save inference model including feed and fetch variable info - fluid.io.save_inference_model( - dirname=saved_model_dir, - feeded_var_names=["firstw", "secondw", "thirdw", "fourthw"], - target_vars=[predict_word], - executor=exe) - - dictionary = defaultdict(int) - w_id = 0 - for w in word_dict: - if isinstance(w, bytes): - w = w.decode("ascii") - dictionary[w] = w_id - w_id += 1 - - # save word dict to assets folder - config = hub.ModuleConfig(saved_model_dir) - config.save_dict(word_dict=dictionary) - config.dump() - - -def test_save_module(use_cuda=False): - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - - exe = fluid.Executor(place) - main_program = fluid.Program() - startup_program = fluid.Program() - - with fluid.program_guard(main_program, startup_program): - words, word_emb = module_fn() - exe.run(startup_program) - # load inference embedding parameters - saved_model_dir = "./tmp/word2vec_test_model" - fluid.io.load_inference_model(executor=exe, dirname=saved_model_dir) - - # feed_var_list = [main_program.global_block().var("words")] - # feeder = fluid.DataFeeder(feed_list=feed_var_list, place=place) - # results = exe.run( - # main_program, - # feed=feeder.feed([[[1, 2, 3, 4, 5]]]), - # fetch_list=[word_emb], - # return_numpy=False) - - # np_result = np.array(results[0]) - # print(np_result) - - # save module_dir - saved_module_dir = "./tmp/word2vec_test_module" - fluid.io.save_inference_model( - dirname=saved_module_dir, - feeded_var_names=["words"], - target_vars=[word_emb], - executor=exe) - - dictionary = defaultdict(int) - w_id = 0 - for w in word_dict: - if isinstance(w, bytes): - w = w.decode("ascii") - dictionary[w] = w_id - w_id += 1 - - signature = hub.create_signature( - "default", inputs=[words], outputs=[word_emb]) - hub.create_module( - sign_arr=signature, program=main_program, path=saved_module_dir) - - -def test_load_module(use_cuda=False): + dictionary = get_dictionary(word_dict) + + module_inputs = [ + main_program.global_block().var("firstw"), + main_program.global_block().var("secondw"), + main_program.global_block().var("thirdw"), + main_program.global_block().var("fourthw"), + ] + signature = hub.create_signature( + "default", inputs=module_inputs, outputs=[pred_prob]) + hub.create_module( + sign_arr=signature, + program=fluid.default_main_program(), + module_dir=saved_module_dir, + word_dict=dictionary) + + +def test_load_w2v_module(use_gpu=False): saved_module_dir = "./tmp/word2vec_test_module" w2v_module = hub.Module(module_dir=saved_module_dir) + feed_list, fetch_list, program = w2v_module( + sign_name="default", trainable=False) + with fluid.program_guard(main_program=program): + pred_prob = fetch_list[0] + + pred_word = fluid.layers.argmax(x=pred_prob, axis=1) + # set place, executor, datafeeder + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + feeder = fluid.DataFeeder(place=place, feed_list=feed_list) + + word_ids = [[1, 2, 3, 4]] + result = exe.run( + fluid.default_main_program(), + feed=feeder.feed(word_ids), + fetch_list=[pred_word], + return_numpy=True) - word_ids = [[1, 2, 3, 4, 5]] # test sequence - word_ids_lod_tensor = w2v_module._preprocess_input(word_ids) - result = w2v_module({"words": word_ids_lod_tensor}) - print(result) + print(result) if __name__ == "__main__": - use_cuda = False - print("train...") - train(use_cuda) - print("save module...") - test_save_module() - print("load module...") - test_load_module() + use_gpu = False + print("test create word2vec module") + test_create_w2v_module(use_gpu) + print("test load word2vec module") + test_load_w2v_module(use_gpu=False)