Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
97a0781d
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 2 年 前同步成功
通知
285
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
97a0781d
编写于
1月 15, 2019
作者:
Z
Zeyu Chen
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix tests export and load module error
上级
715fde80
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
145 addition
and
200 deletion
+145
-200
paddle_hub/module.py
paddle_hub/module.py
+83
-89
tests/test_export_n_load_module.py
tests/test_export_n_load_module.py
+62
-111
未找到文件。
paddle_hub/module.py
浏览文件 @
97a0781d
...
@@ -52,7 +52,7 @@ def mkdir(path):
...
@@ -52,7 +52,7 @@ def mkdir(path):
class
Module
(
object
):
class
Module
(
object
):
"""
"""
A module represents a
Core object of PaddleHub
"""
"""
def
__init__
(
self
,
module_url
=
None
,
module_dir
=
None
):
def
__init__
(
self
,
module_url
=
None
,
module_dir
=
None
):
...
@@ -85,12 +85,10 @@ class Module(object):
...
@@ -85,12 +85,10 @@ class Module(object):
# remove feed fetch operator and variable
# remove feed fetch operator and variable
ModuleUtils
.
remove_feed_fetch_op
(
self
.
inference_program
)
ModuleUtils
.
remove_feed_fetch_op
(
self
.
inference_program
)
print
(
"inference_program"
)
# print("inference_program")
print
(
self
.
inference_program
)
# print(self.inference_program)
print
(
"feed_target_names"
)
print
(
"**feed_target_names**
\n
{}"
.
format
(
self
.
feed_target_names
))
print
(
self
.
feed_target_names
)
print
(
"**fetch_targets**
\n
{}"
.
format
(
self
.
fetch_targets
))
print
(
"fetch_targets"
)
print
(
self
.
fetch_targets
)
self
.
config
=
ModuleConfig
(
self
.
module_dir
)
self
.
config
=
ModuleConfig
(
self
.
module_dir
)
self
.
config
.
load
()
self
.
config
.
load
()
...
@@ -105,7 +103,6 @@ class Module(object):
...
@@ -105,7 +103,6 @@ class Module(object):
def
_process_parameter
(
self
):
def
_process_parameter
(
self
):
global_block
=
self
.
inference_program
.
global_block
()
global_block
=
self
.
inference_program
.
global_block
()
filepath
=
os
.
path
.
join
(
self
.
module_dir
,
"param.pkl"
)
param_path
=
ModuleConfig
.
meta_param_path
(
self
.
module_dir
)
param_path
=
ModuleConfig
.
meta_param_path
(
self
.
module_dir
)
with
open
(
param_path
,
"rb"
)
as
file
:
with
open
(
param_path
,
"rb"
)
as
file
:
param_arr
=
pickle
.
load
(
file
)
param_arr
=
pickle
.
load
(
file
)
...
@@ -123,16 +120,6 @@ class Module(object):
...
@@ -123,16 +120,6 @@ class Module(object):
stop_gradient
=
var
.
stop_gradient
,
stop_gradient
=
var
.
stop_gradient
,
is_data
=
var
.
is_data
)
is_data
=
var
.
is_data
)
def
_construct_feed_dict
(
self
,
inputs
):
""" Construct feed dict according to user's inputs and module config.
"""
feed_dict
=
{}
for
k
in
inputs
:
if
k
in
self
.
feed_target_names
:
feed_dict
[
k
]
=
inputs
[
k
]
return
feed_dict
def
__call__
(
self
,
sign_name
=
"default"
,
trainable
=
False
):
def
__call__
(
self
,
sign_name
=
"default"
,
trainable
=
False
):
""" Call default signature and return results
""" Call default signature and return results
"""
"""
...
@@ -153,77 +140,84 @@ class Module(object):
...
@@ -153,77 +140,84 @@ class Module(object):
return
self
.
feed_target_names
,
self
.
fetch_targets
,
program
return
self
.
feed_target_names
,
self
.
fetch_targets
,
program
def
get_vars
(
self
):
# @deprecated
"""
# def get_vars(self):
Return variable list of the module program
# """
"""
# Return variable list of the module program
return
self
.
inference_program
.
list_vars
()
# """
# return self.inference_program.list_vars()
def
get_feed_var
(
self
,
key
,
signature
=
"default"
):
"""
# @deprecated
Get feed variable according to variable key and signature
# def get_feed_var(self, key, signature="default"):
"""
# """
for
var
in
self
.
inference_program
.
list_vars
():
# Get feed variable according to variable key and signature
if
var
.
name
==
self
.
config
.
feed_var_name
(
key
,
signature
):
# """
return
var
# for var in self.inference_program.list_vars():
# if var.name == self.config.feed_var_name(key, signature):
raise
Exception
(
"Can't find input var {}"
.
format
(
key
))
# return var
def
get_feed_var_by_index
(
self
,
index
,
signature
=
"default"
):
# raise Exception("Can't find input var {}".format(key))
feed_vars
=
self
.
get_feed_vars
(
signature
)
assert
index
<
len
(
# @deprecated
feed_vars
),
"index out of range index {}, len {}"
.
format
(
# def get_feed_var_by_index(self, index, signature="default"):
index
,
len
(
feed_vars
))
# feed_vars = self.get_feed_vars(signature)
return
feed_vars
[
index
]
# assert index < len(
# feed_vars), "index out of range index {}, len {}".format(
def
get_fetch_var_by_index
(
self
,
index
,
signature
=
"default"
):
# index, len(feed_vars))
fetch_vars
=
self
.
get_fetch_vars
(
signature
)
# return feed_vars[index]
assert
index
<
len
(
fetch_vars
),
"index out of range index {}, len {}"
.
format
(
# @deprecated
index
,
len
(
fetch_vars
))
# def get_fetch_var_by_index(self, index, signature="default"):
return
fetch_vars
[
index
]
# fetch_vars = self.get_fetch_vars(signature)
# assert index < len(
def
get_feed_vars
(
self
,
signature
=
"default"
):
# fetch_vars), "index out of range index {}, len {}".format(
"""
# index, len(fetch_vars))
Get feed variable according to variable key and signature
# return fetch_vars[index]
"""
feed_vars
=
[]
# @deprecated
for
feed_var
in
self
.
config
.
feed_var_names
(
signature
):
# def get_feed_vars(self, signature="default"):
find_var
=
False
# """
for
var
in
self
.
inference_program
.
list_vars
():
# Get feed variable according to variable key and signature
if
var
.
name
==
feed_var
.
var_name
:
# """
feed_vars
.
append
(
var
)
# feed_vars = []
find_var
=
True
# for feed_var in self.config.feed_var_names(signature):
if
not
find_var
:
# find_var = False
raise
Exception
(
"Can't find feed var {}"
.
format
(
feed_var_name
))
# for var in self.inference_program.list_vars():
# if var.name == feed_var.var_name:
return
feed_vars
# feed_vars.append(var)
# find_var = True
def
get_fetch_vars
(
self
,
signature
=
"default"
):
# if not find_var:
"""
# raise Exception("Can't find feed var {}".format(feed_var_name))
Get feed variable according to variable key and signature
"""
# return feed_vars
fetch_vars
=
[]
#TODO(ZeyuChen): use brute force to find variables, simple and easy to
# @deprecated
#understand
# def get_fetch_vars(self, signature="default"):
for
fetch_var
in
self
.
config
.
fetch_var_names
(
signature
):
# """
find_var
=
False
# Get feed variable according to variable key and signature
for
var
in
self
.
inference_program
.
list_vars
():
# """
if
var
.
name
==
fetch_var
.
var_name
:
# fetch_vars = []
fetch_vars
.
append
(
var
)
# #TODO(ZeyuChen): use brute force to find variables, simple and easy to
find_var
=
True
# #understand
if
not
find_var
:
# for fetch_var in self.config.fetch_var_names(signature):
raise
Exception
(
"Can't find feed var {}"
.
format
(
fetch_var_name
))
# find_var = False
# for var in self.inference_program.list_vars():
return
fetch_vars
# if var.name == fetch_var.var_name:
# fetch_vars.append(var)
def
get_fetch_var
(
self
,
key
,
signature
=
"default"
):
# find_var = True
"""
# if not find_var:
Get fetch variable according to variable key and signature
# raise Exception("Can't find feed var {}".format(fetch_var_name))
"""
for
var
in
self
.
inference_program
.
list_vars
():
# return fetch_vars
if
var
.
name
==
self
.
config
.
fetch_var_name
(
key
,
signature
):
return
var
# @deprecated
# def get_fetch_var(self, key, signature="default"):
# """
# Get fetch variable according to variable key and signature
# """
# for var in self.inference_program.list_vars():
# if var.name == self.config.fetch_var_name(key, signature):
# return var
def
get_inference_program
(
self
):
def
get_inference_program
(
self
):
return
self
.
inference_program
return
self
.
inference_program
...
...
tests/test_export_n_load_module.py
浏览文件 @
97a0781d
...
@@ -29,7 +29,7 @@ EMBED_SIZE = 16
...
@@ -29,7 +29,7 @@ EMBED_SIZE = 16
HIDDEN_SIZE
=
256
HIDDEN_SIZE
=
256
N
=
5
N
=
5
BATCH_SIZE
=
64
BATCH_SIZE
=
64
PASS_NUM
=
1
PASS_NUM
=
1
000
word_dict
=
paddle
.
dataset
.
imikolov
.
build_dict
()
word_dict
=
paddle
.
dataset
.
imikolov
.
build_dict
()
dict_size
=
len
(
word_dict
)
dict_size
=
len
(
word_dict
)
...
@@ -48,27 +48,6 @@ batch_reader = paddle.batch(mock_data, BATCH_SIZE)
...
@@ -48,27 +48,6 @@ batch_reader = paddle.batch(mock_data, BATCH_SIZE)
batch_size
=
0
batch_size
=
0
for
d
in
batch_reader
():
for
d
in
batch_reader
():
batch_size
+=
1
batch_size
+=
1
print
(
"imikolov simple dataset batch_size ="
,
batch_size
)
def
module_fn
(
trainable
=
False
):
# Define module function for saving module
# create word input
words
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
lod_level
=
1
,
dtype
=
"int64"
)
# create embedding
emb_name
=
"w2v_emb"
emb_param_attr
=
fluid
.
ParamAttr
(
name
=
emb_name
,
trainable
=
trainable
)
word_emb
=
fluid
.
layers
.
embedding
(
input
=
words
,
size
=
[
dict_size
,
EMBED_SIZE
],
dtype
=
'float32'
,
is_sparse
=
True
,
param_attr
=
emb_param_attr
)
# return feeder and fetch_list
return
words
,
word_emb
def
word2vec
(
words
,
is_sparse
,
trainable
=
True
):
def
word2vec
(
words
,
is_sparse
,
trainable
=
True
):
...
@@ -101,19 +80,31 @@ def word2vec(words, is_sparse, trainable=True):
...
@@ -101,19 +80,31 @@ def word2vec(words, is_sparse, trainable=True):
concat_emb
=
fluid
.
layers
.
concat
(
concat_emb
=
fluid
.
layers
.
concat
(
input
=
[
embed_first
,
embed_second
,
embed_third
,
embed_fourth
],
axis
=
1
)
input
=
[
embed_first
,
embed_second
,
embed_third
,
embed_fourth
],
axis
=
1
)
hidden1
=
fluid
.
layers
.
fc
(
input
=
concat_emb
,
size
=
HIDDEN_SIZE
,
act
=
'sigmoid'
)
hidden1
=
fluid
.
layers
.
fc
(
input
=
concat_emb
,
size
=
HIDDEN_SIZE
,
act
=
'sigmoid'
)
pred
ict_word
=
fluid
.
layers
.
fc
(
input
=
hidden1
,
size
=
dict_size
,
act
=
'softmax'
)
pred
_prob
=
fluid
.
layers
.
fc
(
input
=
hidden1
,
size
=
dict_size
,
act
=
'softmax'
)
# declare later than predict word
# declare later than predict word
next_word
=
fluid
.
layers
.
data
(
name
=
'nextw'
,
shape
=
[
1
],
dtype
=
'int64'
)
next_word
=
fluid
.
layers
.
data
(
name
=
'nextw'
,
shape
=
[
1
],
dtype
=
'int64'
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
pred
ict_word
,
label
=
next_word
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
pred
_prob
,
label
=
next_word
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
return
predict_word
,
avg_cost
return
pred_prob
,
avg_cost
def
get_dictionary
(
word_dict
):
dictionary
=
defaultdict
(
int
)
w_id
=
0
for
w
in
word_dict
:
if
isinstance
(
w
,
bytes
):
w
=
w
.
decode
(
"ascii"
)
dictionary
[
w
]
=
w_id
w_id
+=
1
return
dictionary
def
t
rain
(
use_cuda
=
False
):
def
t
est_create_w2v_module
(
use_gpu
=
False
):
place
=
fluid
.
CUDAPlace
(
0
)
if
use_
cuda
else
fluid
.
CPUPlace
()
place
=
fluid
.
CUDAPlace
(
0
)
if
use_
gpu
else
fluid
.
CPUPlace
()
first_word
=
fluid
.
layers
.
data
(
name
=
'firstw'
,
shape
=
[
1
],
dtype
=
'int64'
)
first_word
=
fluid
.
layers
.
data
(
name
=
'firstw'
,
shape
=
[
1
],
dtype
=
'int64'
)
second_word
=
fluid
.
layers
.
data
(
name
=
'secondw'
,
shape
=
[
1
],
dtype
=
'int64'
)
second_word
=
fluid
.
layers
.
data
(
name
=
'secondw'
,
shape
=
[
1
],
dtype
=
'int64'
)
...
@@ -122,12 +113,12 @@ def train(use_cuda=False):
...
@@ -122,12 +113,12 @@ def train(use_cuda=False):
next_word
=
fluid
.
layers
.
data
(
name
=
'nextw'
,
shape
=
[
1
],
dtype
=
'int64'
)
next_word
=
fluid
.
layers
.
data
(
name
=
'nextw'
,
shape
=
[
1
],
dtype
=
'int64'
)
word_list
=
[
first_word
,
second_word
,
third_word
,
forth_word
,
next_word
]
word_list
=
[
first_word
,
second_word
,
third_word
,
forth_word
,
next_word
]
pred
ict_word
,
avg_cost
=
word2vec
(
word_list
,
is_sparse
=
True
)
pred
_prob
,
avg_cost
=
word2vec
(
word_list
,
is_sparse
=
True
)
main_program
=
fluid
.
default_main_program
()
main_program
=
fluid
.
default_main_program
()
startup_program
=
fluid
.
default_startup_program
()
startup_program
=
fluid
.
default_startup_program
()
sgd_optimizer
=
fluid
.
optimizer
.
SGDOptimizer
(
learning_rate
=
1e-
3
)
sgd_optimizer
=
fluid
.
optimizer
.
SGDOptimizer
(
learning_rate
=
1e-
2
)
sgd_optimizer
.
minimize
(
avg_cost
)
sgd_optimizer
.
minimize
(
avg_cost
)
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
...
@@ -136,8 +127,6 @@ def train(use_cuda=False):
...
@@ -136,8 +127,6 @@ def train(use_cuda=False):
step
=
0
step
=
0
for
epoch
in
range
(
0
,
PASS_NUM
):
for
epoch
in
range
(
0
,
PASS_NUM
):
for
mini_batch
in
batch_reader
():
for
mini_batch
in
batch_reader
():
# print("mini_batch", mini_batch)
# 定义输入变量
feed_var_list
=
[
feed_var_list
=
[
main_program
.
global_block
().
var
(
"firstw"
),
main_program
.
global_block
().
var
(
"firstw"
),
main_program
.
global_block
().
var
(
"secondw"
),
main_program
.
global_block
().
var
(
"secondw"
),
...
@@ -154,90 +143,52 @@ def train(use_cuda=False):
...
@@ -154,90 +143,52 @@ def train(use_cuda=False):
if
step
%
100
==
0
:
if
step
%
100
==
0
:
print
(
"Epoch={} Step={} Cost={}"
.
format
(
epoch
,
step
,
cost
[
0
]))
print
(
"Epoch={} Step={} Cost={}"
.
format
(
epoch
,
step
,
cost
[
0
]))
saved_mod
el_dir
=
"./tmp/word2vec_test_model
"
saved_mod
ule_dir
=
"./tmp/word2vec_test_module
"
# save inference model including feed and fetch variable info
# save inference model including feed and fetch variable info
fluid
.
io
.
save_inference_model
(
dictionary
=
get_dictionary
(
word_dict
)
dirname
=
saved_model_dir
,
feeded_var_names
=
[
"firstw"
,
"secondw"
,
"thirdw"
,
"fourthw"
],
module_inputs
=
[
target_vars
=
[
predict_word
],
main_program
.
global_block
().
var
(
"firstw"
),
executor
=
exe
)
main_program
.
global_block
().
var
(
"secondw"
),
main_program
.
global_block
().
var
(
"thirdw"
),
dictionary
=
defaultdict
(
int
)
main_program
.
global_block
().
var
(
"fourthw"
),
w_id
=
0
]
for
w
in
word_dict
:
signature
=
hub
.
create_signature
(
if
isinstance
(
w
,
bytes
):
"default"
,
inputs
=
module_inputs
,
outputs
=
[
pred_prob
])
w
=
w
.
decode
(
"ascii"
)
hub
.
create_module
(
dictionary
[
w
]
=
w_id
sign_arr
=
signature
,
w_id
+=
1
program
=
fluid
.
default_main_program
(),
module_dir
=
saved_module_dir
,
# save word dict to assets folder
word_dict
=
dictionary
)
config
=
hub
.
ModuleConfig
(
saved_model_dir
)
config
.
save_dict
(
word_dict
=
dictionary
)
config
.
dump
()
def
test_load_w2v_module
(
use_gpu
=
False
):
def
test_save_module
(
use_cuda
=
False
):
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
main_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_program
,
startup_program
):
words
,
word_emb
=
module_fn
()
exe
.
run
(
startup_program
)
# load inference embedding parameters
saved_model_dir
=
"./tmp/word2vec_test_model"
fluid
.
io
.
load_inference_model
(
executor
=
exe
,
dirname
=
saved_model_dir
)
# feed_var_list = [main_program.global_block().var("words")]
# feeder = fluid.DataFeeder(feed_list=feed_var_list, place=place)
# results = exe.run(
# main_program,
# feed=feeder.feed([[[1, 2, 3, 4, 5]]]),
# fetch_list=[word_emb],
# return_numpy=False)
# np_result = np.array(results[0])
# print(np_result)
# save module_dir
saved_module_dir
=
"./tmp/word2vec_test_module"
fluid
.
io
.
save_inference_model
(
dirname
=
saved_module_dir
,
feeded_var_names
=
[
"words"
],
target_vars
=
[
word_emb
],
executor
=
exe
)
dictionary
=
defaultdict
(
int
)
w_id
=
0
for
w
in
word_dict
:
if
isinstance
(
w
,
bytes
):
w
=
w
.
decode
(
"ascii"
)
dictionary
[
w
]
=
w_id
w_id
+=
1
signature
=
hub
.
create_signature
(
"default"
,
inputs
=
[
words
],
outputs
=
[
word_emb
])
hub
.
create_module
(
sign_arr
=
signature
,
program
=
main_program
,
path
=
saved_module_dir
)
def
test_load_module
(
use_cuda
=
False
):
saved_module_dir
=
"./tmp/word2vec_test_module"
saved_module_dir
=
"./tmp/word2vec_test_module"
w2v_module
=
hub
.
Module
(
module_dir
=
saved_module_dir
)
w2v_module
=
hub
.
Module
(
module_dir
=
saved_module_dir
)
feed_list
,
fetch_list
,
program
=
w2v_module
(
sign_name
=
"default"
,
trainable
=
False
)
with
fluid
.
program_guard
(
main_program
=
program
):
pred_prob
=
fetch_list
[
0
]
pred_word
=
fluid
.
layers
.
argmax
(
x
=
pred_prob
,
axis
=
1
)
# set place, executor, datafeeder
place
=
fluid
.
CUDAPlace
(
0
)
if
use_gpu
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
feed_list
)
word_ids
=
[[
1
,
2
,
3
,
4
]]
result
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
feeder
.
feed
(
word_ids
),
fetch_list
=
[
pred_word
],
return_numpy
=
True
)
word_ids
=
[[
1
,
2
,
3
,
4
,
5
]]
# test sequence
print
(
result
)
word_ids_lod_tensor
=
w2v_module
.
_preprocess_input
(
word_ids
)
result
=
w2v_module
({
"words"
:
word_ids_lod_tensor
})
print
(
result
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
use_cuda
=
False
use_gpu
=
False
print
(
"train..."
)
print
(
"test create word2vec module"
)
train
(
use_cuda
)
test_create_w2v_module
(
use_gpu
)
print
(
"save module..."
)
print
(
"test load word2vec module"
)
test_save_module
()
test_load_w2v_module
(
use_gpu
=
False
)
print
(
"load module..."
)
test_load_module
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录