提交 97a0781d 编写于 作者: Z Zeyu Chen

fix tests export and load module error

上级 715fde80
...@@ -52,7 +52,7 @@ def mkdir(path): ...@@ -52,7 +52,7 @@ def mkdir(path):
class Module(object): class Module(object):
""" """
A module represents a Core object of PaddleHub
""" """
def __init__(self, module_url=None, module_dir=None): def __init__(self, module_url=None, module_dir=None):
...@@ -85,12 +85,10 @@ class Module(object): ...@@ -85,12 +85,10 @@ class Module(object):
# remove feed fetch operator and variable # remove feed fetch operator and variable
ModuleUtils.remove_feed_fetch_op(self.inference_program) ModuleUtils.remove_feed_fetch_op(self.inference_program)
print("inference_program") # print("inference_program")
print(self.inference_program) # print(self.inference_program)
print("feed_target_names") print("**feed_target_names**\n{}".format(self.feed_target_names))
print(self.feed_target_names) print("**fetch_targets**\n{}".format(self.fetch_targets))
print("fetch_targets")
print(self.fetch_targets)
self.config = ModuleConfig(self.module_dir) self.config = ModuleConfig(self.module_dir)
self.config.load() self.config.load()
...@@ -105,7 +103,6 @@ class Module(object): ...@@ -105,7 +103,6 @@ class Module(object):
def _process_parameter(self): def _process_parameter(self):
global_block = self.inference_program.global_block() global_block = self.inference_program.global_block()
filepath = os.path.join(self.module_dir, "param.pkl")
param_path = ModuleConfig.meta_param_path(self.module_dir) param_path = ModuleConfig.meta_param_path(self.module_dir)
with open(param_path, "rb") as file: with open(param_path, "rb") as file:
param_arr = pickle.load(file) param_arr = pickle.load(file)
...@@ -123,16 +120,6 @@ class Module(object): ...@@ -123,16 +120,6 @@ class Module(object):
stop_gradient=var.stop_gradient, stop_gradient=var.stop_gradient,
is_data=var.is_data) is_data=var.is_data)
def _construct_feed_dict(self, inputs):
""" Construct feed dict according to user's inputs and module config.
"""
feed_dict = {}
for k in inputs:
if k in self.feed_target_names:
feed_dict[k] = inputs[k]
return feed_dict
def __call__(self, sign_name="default", trainable=False): def __call__(self, sign_name="default", trainable=False):
""" Call default signature and return results """ Call default signature and return results
""" """
...@@ -153,77 +140,84 @@ class Module(object): ...@@ -153,77 +140,84 @@ class Module(object):
return self.feed_target_names, self.fetch_targets, program return self.feed_target_names, self.fetch_targets, program
def get_vars(self): # @deprecated
""" # def get_vars(self):
Return variable list of the module program # """
""" # Return variable list of the module program
return self.inference_program.list_vars() # """
# return self.inference_program.list_vars()
def get_feed_var(self, key, signature="default"):
""" # @deprecated
Get feed variable according to variable key and signature # def get_feed_var(self, key, signature="default"):
""" # """
for var in self.inference_program.list_vars(): # Get feed variable according to variable key and signature
if var.name == self.config.feed_var_name(key, signature): # """
return var # for var in self.inference_program.list_vars():
# if var.name == self.config.feed_var_name(key, signature):
raise Exception("Can't find input var {}".format(key)) # return var
def get_feed_var_by_index(self, index, signature="default"): # raise Exception("Can't find input var {}".format(key))
feed_vars = self.get_feed_vars(signature)
assert index < len( # @deprecated
feed_vars), "index out of range index {}, len {}".format( # def get_feed_var_by_index(self, index, signature="default"):
index, len(feed_vars)) # feed_vars = self.get_feed_vars(signature)
return feed_vars[index] # assert index < len(
# feed_vars), "index out of range index {}, len {}".format(
def get_fetch_var_by_index(self, index, signature="default"): # index, len(feed_vars))
fetch_vars = self.get_fetch_vars(signature) # return feed_vars[index]
assert index < len(
fetch_vars), "index out of range index {}, len {}".format( # @deprecated
index, len(fetch_vars)) # def get_fetch_var_by_index(self, index, signature="default"):
return fetch_vars[index] # fetch_vars = self.get_fetch_vars(signature)
# assert index < len(
def get_feed_vars(self, signature="default"): # fetch_vars), "index out of range index {}, len {}".format(
""" # index, len(fetch_vars))
Get feed variable according to variable key and signature # return fetch_vars[index]
"""
feed_vars = [] # @deprecated
for feed_var in self.config.feed_var_names(signature): # def get_feed_vars(self, signature="default"):
find_var = False # """
for var in self.inference_program.list_vars(): # Get feed variable according to variable key and signature
if var.name == feed_var.var_name: # """
feed_vars.append(var) # feed_vars = []
find_var = True # for feed_var in self.config.feed_var_names(signature):
if not find_var: # find_var = False
raise Exception("Can't find feed var {}".format(feed_var_name)) # for var in self.inference_program.list_vars():
# if var.name == feed_var.var_name:
return feed_vars # feed_vars.append(var)
# find_var = True
def get_fetch_vars(self, signature="default"): # if not find_var:
""" # raise Exception("Can't find feed var {}".format(feed_var_name))
Get feed variable according to variable key and signature
""" # return feed_vars
fetch_vars = []
#TODO(ZeyuChen): use brute force to find variables, simple and easy to # @deprecated
#understand # def get_fetch_vars(self, signature="default"):
for fetch_var in self.config.fetch_var_names(signature): # """
find_var = False # Get feed variable according to variable key and signature
for var in self.inference_program.list_vars(): # """
if var.name == fetch_var.var_name: # fetch_vars = []
fetch_vars.append(var) # #TODO(ZeyuChen): use brute force to find variables, simple and easy to
find_var = True # #understand
if not find_var: # for fetch_var in self.config.fetch_var_names(signature):
raise Exception("Can't find feed var {}".format(fetch_var_name)) # find_var = False
# for var in self.inference_program.list_vars():
return fetch_vars # if var.name == fetch_var.var_name:
# fetch_vars.append(var)
def get_fetch_var(self, key, signature="default"): # find_var = True
""" # if not find_var:
Get fetch variable according to variable key and signature # raise Exception("Can't find feed var {}".format(fetch_var_name))
"""
for var in self.inference_program.list_vars(): # return fetch_vars
if var.name == self.config.fetch_var_name(key, signature):
return var # @deprecated
# def get_fetch_var(self, key, signature="default"):
# """
# Get fetch variable according to variable key and signature
# """
# for var in self.inference_program.list_vars():
# if var.name == self.config.fetch_var_name(key, signature):
# return var
def get_inference_program(self): def get_inference_program(self):
return self.inference_program return self.inference_program
......
...@@ -29,7 +29,7 @@ EMBED_SIZE = 16 ...@@ -29,7 +29,7 @@ EMBED_SIZE = 16
HIDDEN_SIZE = 256 HIDDEN_SIZE = 256
N = 5 N = 5
BATCH_SIZE = 64 BATCH_SIZE = 64
PASS_NUM = 1 PASS_NUM = 1000
word_dict = paddle.dataset.imikolov.build_dict() word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict) dict_size = len(word_dict)
...@@ -48,27 +48,6 @@ batch_reader = paddle.batch(mock_data, BATCH_SIZE) ...@@ -48,27 +48,6 @@ batch_reader = paddle.batch(mock_data, BATCH_SIZE)
batch_size = 0 batch_size = 0
for d in batch_reader(): for d in batch_reader():
batch_size += 1 batch_size += 1
print("imikolov simple dataset batch_size =", batch_size)
def module_fn(trainable=False):
# Define module function for saving module
# create word input
words = fluid.layers.data(
name="words", shape=[1], lod_level=1, dtype="int64")
# create embedding
emb_name = "w2v_emb"
emb_param_attr = fluid.ParamAttr(name=emb_name, trainable=trainable)
word_emb = fluid.layers.embedding(
input=words,
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=True,
param_attr=emb_param_attr)
# return feeder and fetch_list
return words, word_emb
def word2vec(words, is_sparse, trainable=True): def word2vec(words, is_sparse, trainable=True):
...@@ -101,19 +80,31 @@ def word2vec(words, is_sparse, trainable=True): ...@@ -101,19 +80,31 @@ def word2vec(words, is_sparse, trainable=True):
concat_emb = fluid.layers.concat( concat_emb = fluid.layers.concat(
input=[embed_first, embed_second, embed_third, embed_fourth], axis=1) input=[embed_first, embed_second, embed_third, embed_fourth], axis=1)
hidden1 = fluid.layers.fc(input=concat_emb, size=HIDDEN_SIZE, act='sigmoid') hidden1 = fluid.layers.fc(input=concat_emb, size=HIDDEN_SIZE, act='sigmoid')
predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax') pred_prob = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax')
# declare later than predict word # declare later than predict word
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
cost = fluid.layers.cross_entropy(input=predict_word, label=next_word) cost = fluid.layers.cross_entropy(input=pred_prob, label=next_word)
avg_cost = fluid.layers.mean(cost) avg_cost = fluid.layers.mean(cost)
return predict_word, avg_cost return pred_prob, avg_cost
def get_dictionary(word_dict):
dictionary = defaultdict(int)
w_id = 0
for w in word_dict:
if isinstance(w, bytes):
w = w.decode("ascii")
dictionary[w] = w_id
w_id += 1
return dictionary
def train(use_cuda=False): def test_create_w2v_module(use_gpu=False):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64') first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64') second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
...@@ -122,12 +113,12 @@ def train(use_cuda=False): ...@@ -122,12 +113,12 @@ def train(use_cuda=False):
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
word_list = [first_word, second_word, third_word, forth_word, next_word] word_list = [first_word, second_word, third_word, forth_word, next_word]
predict_word, avg_cost = word2vec(word_list, is_sparse=True) pred_prob, avg_cost = word2vec(word_list, is_sparse=True)
main_program = fluid.default_main_program() main_program = fluid.default_main_program()
startup_program = fluid.default_startup_program() startup_program = fluid.default_startup_program()
sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=1e-3) sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=1e-2)
sgd_optimizer.minimize(avg_cost) sgd_optimizer.minimize(avg_cost)
exe = fluid.Executor(place) exe = fluid.Executor(place)
...@@ -136,8 +127,6 @@ def train(use_cuda=False): ...@@ -136,8 +127,6 @@ def train(use_cuda=False):
step = 0 step = 0
for epoch in range(0, PASS_NUM): for epoch in range(0, PASS_NUM):
for mini_batch in batch_reader(): for mini_batch in batch_reader():
# print("mini_batch", mini_batch)
# 定义输入变量
feed_var_list = [ feed_var_list = [
main_program.global_block().var("firstw"), main_program.global_block().var("firstw"),
main_program.global_block().var("secondw"), main_program.global_block().var("secondw"),
...@@ -154,90 +143,52 @@ def train(use_cuda=False): ...@@ -154,90 +143,52 @@ def train(use_cuda=False):
if step % 100 == 0: if step % 100 == 0:
print("Epoch={} Step={} Cost={}".format(epoch, step, cost[0])) print("Epoch={} Step={} Cost={}".format(epoch, step, cost[0]))
saved_model_dir = "./tmp/word2vec_test_model" saved_module_dir = "./tmp/word2vec_test_module"
# save inference model including feed and fetch variable info # save inference model including feed and fetch variable info
fluid.io.save_inference_model( dictionary = get_dictionary(word_dict)
dirname=saved_model_dir,
feeded_var_names=["firstw", "secondw", "thirdw", "fourthw"], module_inputs = [
target_vars=[predict_word], main_program.global_block().var("firstw"),
executor=exe) main_program.global_block().var("secondw"),
main_program.global_block().var("thirdw"),
dictionary = defaultdict(int) main_program.global_block().var("fourthw"),
w_id = 0 ]
for w in word_dict: signature = hub.create_signature(
if isinstance(w, bytes): "default", inputs=module_inputs, outputs=[pred_prob])
w = w.decode("ascii") hub.create_module(
dictionary[w] = w_id sign_arr=signature,
w_id += 1 program=fluid.default_main_program(),
module_dir=saved_module_dir,
# save word dict to assets folder word_dict=dictionary)
config = hub.ModuleConfig(saved_model_dir)
config.save_dict(word_dict=dictionary)
config.dump() def test_load_w2v_module(use_gpu=False):
def test_save_module(use_cuda=False):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
main_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
words, word_emb = module_fn()
exe.run(startup_program)
# load inference embedding parameters
saved_model_dir = "./tmp/word2vec_test_model"
fluid.io.load_inference_model(executor=exe, dirname=saved_model_dir)
# feed_var_list = [main_program.global_block().var("words")]
# feeder = fluid.DataFeeder(feed_list=feed_var_list, place=place)
# results = exe.run(
# main_program,
# feed=feeder.feed([[[1, 2, 3, 4, 5]]]),
# fetch_list=[word_emb],
# return_numpy=False)
# np_result = np.array(results[0])
# print(np_result)
# save module_dir
saved_module_dir = "./tmp/word2vec_test_module"
fluid.io.save_inference_model(
dirname=saved_module_dir,
feeded_var_names=["words"],
target_vars=[word_emb],
executor=exe)
dictionary = defaultdict(int)
w_id = 0
for w in word_dict:
if isinstance(w, bytes):
w = w.decode("ascii")
dictionary[w] = w_id
w_id += 1
signature = hub.create_signature(
"default", inputs=[words], outputs=[word_emb])
hub.create_module(
sign_arr=signature, program=main_program, path=saved_module_dir)
def test_load_module(use_cuda=False):
saved_module_dir = "./tmp/word2vec_test_module" saved_module_dir = "./tmp/word2vec_test_module"
w2v_module = hub.Module(module_dir=saved_module_dir) w2v_module = hub.Module(module_dir=saved_module_dir)
feed_list, fetch_list, program = w2v_module(
sign_name="default", trainable=False)
with fluid.program_guard(main_program=program):
pred_prob = fetch_list[0]
pred_word = fluid.layers.argmax(x=pred_prob, axis=1)
# set place, executor, datafeeder
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(place=place, feed_list=feed_list)
word_ids = [[1, 2, 3, 4]]
result = exe.run(
fluid.default_main_program(),
feed=feeder.feed(word_ids),
fetch_list=[pred_word],
return_numpy=True)
word_ids = [[1, 2, 3, 4, 5]] # test sequence print(result)
word_ids_lod_tensor = w2v_module._preprocess_input(word_ids)
result = w2v_module({"words": word_ids_lod_tensor})
print(result)
if __name__ == "__main__": if __name__ == "__main__":
use_cuda = False use_gpu = False
print("train...") print("test create word2vec module")
train(use_cuda) test_create_w2v_module(use_gpu)
print("save module...") print("test load word2vec module")
test_save_module() test_load_w2v_module(use_gpu=False)
print("load module...")
test_load_module()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册