Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
4920392b
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 1 年 前同步成功
通知
280
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
4920392b
编写于
3月 31, 2019
作者:
Z
Zeyu Chen
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
migrade all tools package to common
上级
6d13d7a4
变更
21
隐藏空白更改
内联
并排
Showing
21 changed file
with
62 addition
and
40 deletion
+62
-40
demo/bert-cls/reader/batching.py
demo/bert-cls/reader/batching.py
+6
-2
demo/bert-cls/reader/cls.py
demo/bert-cls/reader/cls.py
+1
-0
demo/bert-cls/run_fintune_with_hub.sh
demo/bert-cls/run_fintune_with_hub.sh
+1
-1
paddle_hub/__init__.py
paddle_hub/__init__.py
+12
-8
paddle_hub/common/__init__.py
paddle_hub/common/__init__.py
+0
-0
paddle_hub/common/arg_helper.py
paddle_hub/common/arg_helper.py
+1
-1
paddle_hub/common/dir.py
paddle_hub/common/dir.py
+0
-0
paddle_hub/common/downloader.py
paddle_hub/common/downloader.py
+2
-2
paddle_hub/common/hub_server.py
paddle_hub/common/hub_server.py
+2
-2
paddle_hub/common/logger.py
paddle_hub/common/logger.py
+0
-0
paddle_hub/common/paddle_helper.py
paddle_hub/common/paddle_helper.py
+3
-3
paddle_hub/common/utils.py
paddle_hub/common/utils.py
+1
-1
paddle_hub/dataset/chnsenticorp.py
paddle_hub/dataset/chnsenticorp.py
+15
-1
paddle_hub/finetune/finetune.py
paddle_hub/finetune/finetune.py
+1
-1
paddle_hub/io/augmentation.py
paddle_hub/io/augmentation.py
+1
-1
paddle_hub/io/type.py
paddle_hub/io/type.py
+2
-2
paddle_hub/module/checker.py
paddle_hub/module/checker.py
+1
-1
paddle_hub/module/manager.py
paddle_hub/module/manager.py
+2
-2
paddle_hub/module/module.py
paddle_hub/module/module.py
+4
-5
paddle_hub/module/signature.py
paddle_hub/module/signature.py
+1
-1
tests/tclist_all
tests/tclist_all
+6
-6
未找到文件。
demo/bert-cls/reader/batching.py
浏览文件 @
4920392b
...
...
@@ -77,6 +77,7 @@ def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3):
def
prepare_batch_data
(
insts
,
total_token_num
,
voc_size
=
0
,
max_seq_len
=
128
,
pad_id
=
None
,
cls_id
=
None
,
sep_id
=
None
,
...
...
@@ -115,15 +116,17 @@ def prepare_batch_data(insts,
out
=
batch_src_ids
# Second step: padding
src_id
,
self_input_mask
=
pad_batch_data
(
out
,
pad_idx
=
pad_id
,
return_input_mask
=
True
)
out
,
pad_idx
=
pad_id
,
max_seq_len
=
max_seq_len
,
return_input_mask
=
True
)
pos_id
=
pad_batch_data
(
batch_pos_ids
,
pad_idx
=
pad_id
,
max_seq_len
=
max_seq_len
,
return_pos
=
False
,
return_input_mask
=
False
)
sent_id
=
pad_batch_data
(
batch_sent_ids
,
pad_idx
=
pad_id
,
max_seq_len
=
max_seq_len
,
return_pos
=
False
,
return_input_mask
=
False
)
...
...
@@ -139,6 +142,7 @@ def prepare_batch_data(insts,
def
pad_batch_data
(
insts
,
pad_idx
=
0
,
max_seq_len
=
128
,
return_pos
=
False
,
return_input_mask
=
False
,
return_max_len
=
False
,
...
...
@@ -149,7 +153,7 @@ def pad_batch_data(insts,
"""
return_list
=
[]
#max_len = max(len(inst) for inst in insts)
max_len
=
50
max_len
=
max_seq_len
# Any token included in dict can be used to pad, since the paddings' loss
# will be masked out by weights and make no effect on parameter gradients.
...
...
demo/bert-cls/reader/cls.py
浏览文件 @
4920392b
...
...
@@ -93,6 +93,7 @@ class DataProcessor(object):
batch_data
,
total_token_num
,
voc_size
=-
1
,
max_seq_len
=
self
.
max_seq_len
,
pad_id
=
self
.
vocab
[
"[PAD]"
],
cls_id
=
self
.
vocab
[
"[CLS]"
],
sep_id
=
self
.
vocab
[
"[SEP]"
],
...
...
demo/bert-cls/run_fintune_with_hub.sh
浏览文件 @
4920392b
...
...
@@ -15,5 +15,5 @@ python -u finetune_with_hub.py \
--checkpoint_dir
$CKPT_DIR
\
--warmup_proportion
0.0
\
--epoch
3
\
--max_seq_len
50
\
--max_seq_len
128
\
--learning_rate
5e-5
paddle_hub/__init__.py
浏览文件 @
4920392b
...
...
@@ -11,21 +11,25 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.dir
import
USER_HOME
from
.dir
import
HUB_HOME
from
.dir
import
MODULE_HOME
from
.dir
import
CACHE_HOME
from
.
import
module
from
.
import
tools
from
.
import
common
from
.
import
io
from
.common.dir
import
USER_HOME
from
.common.dir
import
HUB_HOME
from
.common.dir
import
MODULE_HOME
from
.common.dir
import
CACHE_HOME
from
.common.logger
import
logger
from
.common.paddle_helper
import
connect_program
from
.common.hub_server
import
default_hub_server
from
.module.module
import
Module
,
create_module
from
.module.base_processor
import
BaseProcessor
from
.module.signature
import
Signature
,
create_signature
from
.module.manager
import
default_module_manager
from
.tools.logger
import
logger
from
.tools.paddle_helper
import
connect_program
from
.io.type
import
DataType
from
.hub_server
import
default_hub_server
from
.finetune.network
import
append_mlp_classifier
from
.finetune.finetune
import
finetune_and_eval
from
.finetune.config
import
FinetuneConfig
...
...
paddle_hub/
tools
/__init__.py
→
paddle_hub/
common
/__init__.py
浏览文件 @
4920392b
文件已移动
paddle_hub/
tools
/arg_helper.py
→
paddle_hub/
common
/arg_helper.py
浏览文件 @
4920392b
...
...
@@ -15,7 +15,7 @@
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle_hub.
tools
.logger
import
logger
from
paddle_hub.
common
.logger
import
logger
import
six
import
distutils.util
...
...
paddle_hub/dir.py
→
paddle_hub/
common/
dir.py
浏览文件 @
4920392b
文件已移动
paddle_hub/
tools
/downloader.py
→
paddle_hub/
common
/downloader.py
浏览文件 @
4920392b
...
...
@@ -26,8 +26,8 @@ import requests
import
tempfile
import
tarfile
from
paddle_hub.
tools
import
utils
from
paddle_hub.
tools
.logger
import
logger
from
paddle_hub.
common
import
utils
from
paddle_hub.
common
.logger
import
logger
from
paddle_hub.io.reader
import
csv_reader
__all__
=
[
'Downloader'
]
...
...
paddle_hub/hub_server.py
→
paddle_hub/
common/
hub_server.py
浏览文件 @
4920392b
...
...
@@ -15,8 +15,8 @@
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle_hub.
tools
import
utils
from
paddle_hub.
tools
.downloader
import
default_downloader
from
paddle_hub.
common
import
utils
from
paddle_hub.
common
.downloader
import
default_downloader
from
paddle_hub.io.reader
import
csv_reader
import
os
import
time
...
...
paddle_hub/
tools
/logger.py
→
paddle_hub/
common
/logger.py
浏览文件 @
4920392b
文件已移动
paddle_hub/
tools
/paddle_helper.py
→
paddle_hub/
common
/paddle_helper.py
浏览文件 @
4920392b
...
...
@@ -15,9 +15,9 @@
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle_hub
.module
import
module_desc_pb2
from
paddle_hub.tools
.utils
import
from_pyobj_to_flexible_data
,
from_flexible_data_to_pyobj
from
paddle_hub.tools
.logger
import
logger
from
.
.module
import
module_desc_pb2
from
.utils
import
from_pyobj_to_flexible_data
,
from_flexible_data_to_pyobj
from
.logger
import
logger
import
paddle
import
paddle.fluid
as
fluid
import
copy
...
...
paddle_hub/
tools
/utils.py
→
paddle_hub/
common
/utils.py
浏览文件 @
4920392b
...
...
@@ -18,7 +18,7 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle_hub.module
import
module_desc_pb2
from
paddle_hub.
tools
.logger
import
logger
from
paddle_hub.
common
.logger
import
logger
import
paddle
import
paddle.fluid
as
fluid
import
os
...
...
paddle_hub/dataset/chnsenticorp.py
浏览文件 @
4920392b
...
...
@@ -22,7 +22,21 @@ from collections import namedtuple
DATA_URL
=
"https://paddlehub-dataset.bj.bcebos.com/chnsenticorp_data.tar.gz"
class
ChnSentiCorp
(
object
):
class
HubDataset
(
object
):
def
get_train_examples
(
self
):
raise
NotImplementedError
()
def
get_dev_examples
(
self
):
raise
NotImplementedError
()
def
get_test_examples
(
self
):
raise
NotImplementedError
()
def
get_val_examples
(
self
):
return
self
.
get_dev_examples
()
class
ChnSentiCorp
(
HubDataset
):
def
__init__
(
self
):
ret
,
tips
,
self
.
dataset_dir
=
default_downloader
.
download_file_and_uncompress
(
url
=
DATA_URL
,
save_path
=
DATA_HOME
,
print_progress
=
True
)
...
...
paddle_hub/finetune/finetune.py
浏览文件 @
4920392b
...
...
@@ -23,7 +23,7 @@ import paddle
import
paddle.fluid
as
fluid
from
visualdl
import
LogWriter
from
paddle_hub.
tools
.logger
import
logger
from
paddle_hub.
common
.logger
import
logger
from
paddle_hub.finetune.optimization
import
bert_finetune
from
paddle_hub.finetune.checkpoint
import
load_checkpoint
,
save_checkpoint
...
...
paddle_hub/io/augmentation.py
浏览文件 @
4920392b
...
...
@@ -16,7 +16,7 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
PIL
import
Image
,
ImageEnhance
from
paddle_hub.
tools
import
utils
from
paddle_hub.
common
import
utils
import
numpy
as
np
...
...
paddle_hub/io/type.py
浏览文件 @
4920392b
...
...
@@ -14,8 +14,8 @@
from
enum
import
Enum
from
PIL
import
Image
from
paddle_hub.
tools
.logger
import
logger
from
paddle_hub.
tools
import
utils
from
paddle_hub.
common
.logger
import
logger
from
paddle_hub.
common
import
utils
class
DataType
(
Enum
):
...
...
paddle_hub/module/checker.py
浏览文件 @
4920392b
...
...
@@ -14,7 +14,7 @@
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle_hub.
tools
.logger
import
logger
from
paddle_hub.
common
.logger
import
logger
from
paddle_hub.module
import
check_info_pb2
from
paddle_hub.version
import
hub_version
,
module_proto_version
import
os
...
...
paddle_hub/module/manager.py
浏览文件 @
4920392b
...
...
@@ -19,8 +19,8 @@ from __future__ import print_function
import
os
import
shutil
from
paddle_hub.
tools
import
utils
from
paddle_hub.
tools
.downloader
import
default_downloader
from
paddle_hub.
common
import
utils
from
paddle_hub.
common
.downloader
import
default_downloader
import
paddle_hub
as
hub
...
...
paddle_hub/module/module.py
浏览文件 @
4920392b
...
...
@@ -15,10 +15,10 @@
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle_hub.
tools
import
utils
from
paddle_hub.
tools
.logger
import
logger
from
paddle_hub.
tools
.downloader
import
default_downloader
from
paddle_hub.
tools
import
paddle_helper
from
paddle_hub.
common
import
utils
from
paddle_hub.
common
.logger
import
logger
from
paddle_hub.
common
.downloader
import
default_downloader
from
paddle_hub.
common
import
paddle_helper
from
paddle_hub.module
import
module_desc_pb2
from
paddle_hub.module
import
check_info_pb2
from
paddle_hub.module.signature
import
Signature
,
create_signature
...
...
@@ -458,7 +458,6 @@ class Module(object):
# TODO(ZeyuChen) encapsulate into a funtion
# update BERT/ERNIE's input tensor's sequence length to max_seq_len
if
self
.
name
.
startswith
(
"bert"
)
or
self
.
name
.
startswith
(
"ernie"
):
print
(
"module_name"
,
self
.
name
)
MAX_SEQ_LENGTH
=
512
if
max_seq_len
>
MAX_SEQ_LENGTH
or
max_seq_len
<=
0
:
raise
ValueError
(
...
...
paddle_hub/module/signature.py
浏览文件 @
4920392b
...
...
@@ -16,7 +16,7 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle.fluid.framework
import
Variable
from
paddle_hub.
tools
.utils
import
to_list
from
paddle_hub.
common
.utils
import
to_list
class
Signature
:
...
...
tests/tclist_all
浏览文件 @
4920392b
test_downloader
test_export_n_load_module
#
test_downloader
#
test_export_n_load_module
#test_module
test_train_w2v
test_pyobj_serialize
test_signature
test_param_serialize
\ No newline at end of file
#test_train_w2v
#test_pyobj_serialize
#test_signature
#test_param_serialize
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录