Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PALM
提交
f9e37ecd
P
PALM
项目概览
PaddlePaddle
/
PALM
通知
5
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
10
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PALM
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
10
Issue
10
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f9e37ecd
编写于
12月 10, 2019
作者:
X
xixiaoyao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
release 0.3-api
上级
062d27a7
变更
32
展开全部
隐藏空白更改
内联
并排
Showing
32 changed file
with
932 addition
and
6 deletion
+932
-6
backbone/README.md
backbone/README.md
+0
-0
backbone/__init__.py
backbone/__init__.py
+0
-0
backbone/bert.py
backbone/bert.py
+0
-0
backbone/ernie.py
backbone/ernie.py
+0
-0
backbone/utils/__init__.py
backbone/utils/__init__.py
+0
-0
backbone/utils/transformer.py
backbone/utils/transformer.py
+0
-0
demo/demo2/config.yaml
demo/demo2/config.yaml
+1
-1
demo/demo2/run.py
demo/demo2/run.py
+39
-3
interface.py
interface.py
+0
-0
paddlepalm/__init__.py
paddlepalm/__init__.py
+2
-0
paddlepalm/conf_controller.py
paddlepalm/conf_controller.py
+1
-1
paddlepalm/controller.py
paddlepalm/controller.py
+617
-0
paddlepalm/downloader.py
paddlepalm/downloader.py
+0
-0
paddlepalm/optimizer/.adam.py.swp
paddlepalm/optimizer/.adam.py.swp
+0
-0
paddlepalm/optimizer/adam.py
paddlepalm/optimizer/adam.py
+18
-0
paddlepalm/task_instance.py
paddlepalm/task_instance.py
+248
-1
reader/__init__.py
reader/__init__.py
+0
-0
reader/cls.py
reader/cls.py
+0
-0
reader/match.py
reader/match.py
+6
-0
reader/mlm.py
reader/mlm.py
+0
-0
reader/mrc.py
reader/mrc.py
+0
-0
reader/utils/__init__.py
reader/utils/__init__.py
+0
-0
reader/utils/batching4bert.py
reader/utils/batching4bert.py
+0
-0
reader/utils/batching4ernie.py
reader/utils/batching4ernie.py
+0
-0
reader/utils/mlm_batching.py
reader/utils/mlm_batching.py
+0
-0
reader/utils/mrqa_helper.py
reader/utils/mrqa_helper.py
+0
-0
reader/utils/reader4ernie.py
reader/utils/reader4ernie.py
+0
-0
tasktype/__init__.py
tasktype/__init__.py
+0
-0
tasktype/cls.py
tasktype/cls.py
+0
-0
tasktype/match.py
tasktype/match.py
+0
-0
tasktype/mlm.py
tasktype/mlm.py
+0
-0
tasktype/mrc.py
tasktype/mrc.py
+0
-0
未找到文件。
paddlepalm/
backbone/README.md
→
backbone/README.md
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/
backbone/__init__.py
→
backbone/__init__.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/
backbone/bert.py
→
backbone/bert.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/
backbone/ernie.py
→
backbone/ernie.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/
backbone/utils/__init__.py
→
backbone/utils/__init__.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/
backbone/utils/transformer.py
→
backbone/utils/transformer.py
浏览文件 @
f9e37ecd
文件已移动
demo/demo2/config.yaml
浏览文件 @
f9e37ecd
t
ask_instance
:
"
mrqa,
mlm4mrqa,
match4mrqa"
ask_instance
:
"
mrqa,
mlm4mrqa,
match4mrqa"
target_tag
:
1, 0,
0
mix_ratio
:
1.0, 0.5,
0.5
...
...
demo/demo2/run.py
浏览文件 @
f9e37ecd
import
paddlepalm
as
palm
if
__name__
==
'__main__'
:
controller
=
palm
.
Controller
(
'config.yaml'
,
task_dir
=
'tasks'
)
match_reader
=
palm
.
reader
.
match
(
train_file
,
file_format
=
'csv'
,
tokenizer
=
'wordpiece'
,
lang
=
'en'
)
mrc_reader
=
palm
.
reader
.
mrc
(
train_file
,
phase
=
'train'
)
mlm_reader
=
palm
.
reader
.
mlm
(
train_file
,
phase
=
'train'
)
palm
.
reader
.
match
=
palm
.
tasktype
.
cls
(
num_classes
=
4
)
mrc
=
palm
.
tasktype
.
match
(
learning_strategy
=
'pairwise'
)
mlm
=
palm
.
tasktype
.
mlm
()
mlm
.
print
()
bb_flags
=
palm
.
load_json
(
'./pretrain/ernie/ernie_config.json'
)
bb
=
palm
.
backbone
.
ernie
(
bb_flags
[
'xx'
],
xxx
)
bb
.
print
()
match4mrqa
=
palm
.
Task
(
'match4mrqa'
,
match_reader
,
match_tt
)
mrc4mrqa
=
palm
.
Task
(
'match4mrqa'
,
match_reader
,
match_tt
)
# match4mrqa.reuse_with(mrc4mrqa)
controller
=
palm
.
Controller
([
mrqa
,
match4mrqa
,
mlm4mrqa
])
loss
=
controller
.
build_forward
(
bb
,
mask_task
=
[])
n_steps
=
controller
.
estimate_train_steps
(
basetask
=
mrqa
,
num_epochs
=
2
,
batch_size
=
8
,
dev_count
=
4
)
adam
=
palm
.
optimizer
.
Adam
(
loss
)
sched
=
palm
.
schedualer
.
LinearWarmup
(
learning_rate
,
max_train_steps
=
n_steps
,
warmup_steps
=
0.1
*
n_steps
)
controller
.
build_backward
(
optimizer
=
adam
,
schedualer
=
sched
,
weight_decay
=
0.001
,
use_ema
=
True
,
ema_decay
=
0.999
)
controller
.
random_init_params
()
controller
.
load_pretrain
(
'../../pretrain_model/ernie/params'
)
controller
.
train
()
controller
=
palm
.
Controller
(
config
=
'config.yaml'
,
task_dir
=
'tasks'
,
for_train
=
False
)
controller
.
pred
(
'mrqa'
,
inference_model_dir
=
'output_model/secondrun/mrqa/infer_model'
)
# controller = palm.Controller(config='config.yaml', task_dir='tasks', for_train=False)
# controller.pred('mrqa', inference_model_dir='output_model/secondrun/mrqa/infer_model')
paddlepalm/
interface.py
→
interface.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/__init__.py
浏览文件 @
f9e37ecd
import
sys
from
paddlepalm.mtl_controller
import
Controller
from
paddlepalm.task_instance
import
Task
sys
.
path
.
append
(
'paddlepalm'
)
paddlepalm/
mtl
_controller.py
→
paddlepalm/
conf
_controller.py
浏览文件 @
f9e37ecd
...
...
@@ -182,7 +182,7 @@ def _fit_attr(conf, fit_attr, strict=False):
return
conf
class
Controller
(
object
):
class
Con
fCon
troller
(
object
):
def
__init__
(
self
,
config
,
task_dir
=
'.'
,
for_train
=
True
):
"""
...
...
paddlepalm/controller.py
0 → 100755
浏览文件 @
f9e37ecd
此差异已折叠。
点击以展开。
paddlepalm/download.py
→
paddlepalm/download
er
.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/optimizer/.adam.py.swp
0 → 100644
浏览文件 @
f9e37ecd
文件已添加
paddlepalm/optimizer/adam.py
浏览文件 @
f9e37ecd
...
...
@@ -21,6 +21,24 @@ from __future__ import print_function
import
numpy
as
np
import
paddle.fluid
as
fluid
class
schedualer
(
object
):
def
__init__
(
self
):
pass
def
lr
(
self
):
pass
def
ConstantLearning
():
def
__init__
(
self
,
lr
):
self
.
_lr
=
lr
def
lr
(
self
):
return
self
.
_lr
def
LinearWarmupLearning
():
def
linear_warmup_decay
(
learning_rate
,
warmup_steps
,
num_train_steps
):
""" Applies linear warmup of learning rate from 0 and decay to 0."""
with
fluid
.
default_main_program
().
_lr_schedule_guard
():
...
...
paddlepalm/task_instance.py
浏览文件 @
f9e37ecd
...
...
@@ -22,6 +22,253 @@ import importlib
from
paddlepalm.default_settings
import
*
def
Task
(
object
):
def
__init__
(
self
,
name
,
reader
,
taskblock
,
mix_ratio
=
1.0
,
\
pred_reader
=
None
,
pred_taskblock
=
None
,
infermodel_save_path
=
None
,
save_infermodel_every_n_steps
=-
1
,
\
as_target_task
=
True
,
task_layer_reuse
=
None
,
silent
=
False
):
self
.
_name
=
name
self
.
_verbose
=
not
silent
if
infermodel_save_path
is
None
:
self
.
_save_infermodel_path
=
os
.
path
.
join
(
self
.
_config
[
'save_path'
],
self
.
_name
,
'infer_model'
)
else
:
self
.
_save_infermodel_path
=
infermodel_save_path
self
.
_save_infermodel_every_n_steps
=
save_infermodel_every_n_steps
self
.
_is_target
=
as_target
self
.
_first_target
=
False
self
.
_task_reuse_scope
=
name
if
task_layer_reuse
is
None
else
task_layer_reuse
self
.
_feeded_var_names
=
None
self
.
_target_vars
=
None
# training process management
self
.
_mix_ratio
=
mix_ratio
self
.
_expected_train_steps
=
None
self
.
_expected_train_epochs
=
None
self
.
_steps_pur_epoch
=
None
self
.
_cur_train_epoch
=
0
self
.
_cur_train_step
=
0
self
.
_train_finish
=
False
# 存放不同运行阶段(train,eval,pred)的数据集reader,key为phase,value为Reader实例
self
.
_reader
=
{
'train'
:
reader
,
'eval'
:
None
,
'pred'
:
pred_reader
}
self
.
_input_layer
=
None
self
.
_inputname_to_varname
=
{}
self
.
_task_layer
=
{
'train'
:
tasklayer
,
'eval'
:
None
,
'pred'
:
pred_tasklayer
}
self
.
_pred_input_name_list
=
[]
self
.
_pred_input_varname_list
=
[]
self
.
_pred_fetch_name_list
=
[]
self
.
_pred_fetch_var_list
=
[]
self
.
_exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
())
self
.
_save_protocol
=
{
'input_names'
:
'self._pred_input_name_list'
,
'input_varnames'
:
'self._pred_input_varname_list'
,
'fetch_list'
:
'self._pred_fetch_name_list'
}
self
.
_lock
=
False
def
_build_task_layer
(
self
,
net_inputs
,
phase
,
scope
=
""
):
output_vars
=
self
.
_task_layer
[
phase
].
build
(
net_inputs
,
scope_name
=
scope
)
if
phase
==
'pred'
:
if
output_vars
is
not
None
:
self
.
_pred_fetch_name_list
,
self
.
_pred_fetch_var_list
=
zip
(
*
output_vars
.
items
())
else
:
self
.
_pred_fetch_name_list
=
[]
self
.
_pred_fetch_var_list
=
[]
return
output_vars
def
_postprocess
(
self
,
rt_outputs
,
phase
):
return
self
.
_task_layer
[
phase
].
postprocess
(
rt_outputs
)
def
_epoch_postprocess
(
self
,
epoch_inputs
,
phase
):
return
self
.
_task_layer
[
phase
].
epoch_postprocess
(
epoch_inputs
)
def
save
(
self
,
suffix
=
''
):
dirpath
=
self
.
_save_infermodel_path
+
suffix
self
.
_pred_input_varname_list
=
[
str
(
i
)
for
i
in
self
.
_pred_input_varname_list
]
prog
=
fluid
.
default_main_program
().
clone
()
fluid
.
io
.
save_inference_model
(
dirpath
,
self
.
_pred_input_varname_list
,
self
.
_pred_fetch_var_list
,
self
.
_exe
,
prog
)
conf
=
{}
for
k
,
strv
in
self
.
_save_protocol
.
items
():
d
=
None
v
=
locals
()
exec
(
'd={}'
.
format
(
strv
),
globals
(),
v
)
conf
[
k
]
=
v
[
'd'
]
with
open
(
os
.
path
.
join
(
dirpath
,
'__conf__'
),
'w'
)
as
writer
:
writer
.
write
(
json
.
dumps
(
conf
,
indent
=
1
))
print
(
self
.
_name
+
': inference model saved at '
+
dirpath
)
def
_load
(
self
,
infer_model_path
=
None
):
if
infer_model_path
is
None
:
infer_model_path
=
self
.
_save_infermodel_path
for
k
,
v
in
json
.
load
(
open
(
os
.
path
.
join
(
infer_model_path
,
'__conf__'
))).
items
():
strv
=
self
.
_save_protocol
[
k
]
exec
(
'{}=v'
.
format
(
strv
))
pred_prog
,
self
.
_pred_input_varname_list
,
self
.
_pred_fetch_var_list
=
\
fluid
.
io
.
load_inference_model
(
infer_model_path
,
self
.
_exe
)
print
(
self
.
_name
+
': inference model loaded from '
+
infer_model_path
)
return
pred_prog
@
property
def
name
(
self
):
return
self
.
_name
@
property
def
_Reader
(
self
):
return
self
.
_Reader
@
property
def
_Paradigm
(
self
):
return
self
.
_Paradigm
@
property
def
_reader
(
self
):
return
self
.
_reader
@
property
def
_pred_input
(
self
):
return
zip
(
*
[
self
.
_pred_input_name_list
,
self
.
_pred_input_varname_list
])
@
_pred_input
.
setter
def
_pred_input
(
self
,
val
):
assert
isinstance
(
val
,
dict
)
self
.
_pred_input_name_list
,
self
.
_pred_input_varname_list
=
\
zip
(
*
[[
k
,
v
.
name
]
for
k
,
v
in
val
.
items
()])
@
property
def
_pred_fetch_list
(
self
):
return
[
self
.
_pred_fetch_name_list
,
self
.
_pred_fetch_var_list
]
@
property
def
_task_layer
(
self
):
return
self
.
_task_layer
@
property
def
_is_first_target
(
self
):
return
self
.
_is_first_target
@
_is_first_target
.
setter
def
_is_first_target
(
self
,
value
):
self
.
_is_first_target
=
bool
(
value
)
if
self
.
_is_first_target
:
assert
self
.
_is_target
,
"ERROR: only target task could be set as main task."
if
self
.
_verbose
and
self
.
_is_first_target
:
print
(
"{}: set as main task"
.
format
(
self
.
_name
))
@
property
def
_is_target
(
self
):
if
self
.
_is_target
is
not
None
:
return
self
.
_is_target
else
:
raise
ValueError
(
"{}: is_target is None"
.
format
(
self
.
_name
))
@
_is_target
.
setter
def
_is_target
(
self
,
value
):
self
.
_is_target
=
bool
(
value
)
if
self
.
_verbose
:
if
self
.
_is_target
:
print
(
'{}: set as target task.'
.
format
(
self
.
_name
))
else
:
print
(
'{}: set as aux task.'
.
format
(
self
.
_name
))
@
property
def
mix_ratio
(
self
):
if
self
.
_mix_ratio
is
not
None
:
return
self
.
_mix_ratio
else
:
raise
ValueError
(
"{}: mix_ratio is None"
.
format
(
self
.
_name
))
@
mix_ratio
.
setter
def
mix_ratio
(
self
,
value
):
self
.
_mix_ratio
=
float
(
value
)
if
self
.
_verbose
:
print
(
'{}: mix_ratio is set to {}'
.
format
(
self
.
_name
,
self
.
_mix_ratio
))
@
property
def
save_infermodel_every_n_steps
(
self
):
return
self
.
_save_infermodel_every_n_steps
@
save_infermodel_every_n_steps
.
setter
def
save_infermodel_every_n_steps
(
self
,
val
):
self
.
_save_infermodel_every_n_steps
=
val
@
property
def
expected_train_steps
(
self
):
return
self
.
_expected_train_steps
@
expected_train_steps
.
setter
def
_expected_train_steps
(
self
,
value
):
self
.
_expected_train_steps
=
value
self
.
_expected_train_epochs
=
value
/
float
(
self
.
_steps_pur_epoch
)
@
property
def
expected_train_epochs
(
self
):
return
self
.
_expected_train_epochs
@
property
def
cur_train_epoch
(
self
):
return
self
.
_cur_train_epoch
@
cur_train_epoch
.
setter
def
_cur_train_epoch
(
self
,
value
):
self
.
_cur_train_epoch
=
value
@
property
def
cur_train_step
(
self
):
return
self
.
_cur_train_step
@
cur_train_step
.
setter
def
_cur_train_step
(
self
,
value
):
self
.
_cur_train_step
=
value
if
self
.
_cur_train_step
>
self
.
_steps_pur_epoch
:
self
.
_cur_train_epoch
+=
1
self
.
_cur_train_step
=
1
if
self
.
_is_target
and
self
.
_cur_train_step
+
self
.
_cur_train_epoch
*
self
.
_steps_pur_epoch
>=
self
.
_expected_train_steps
:
self
.
_train_finish
=
True
@
property
def
steps_pur_epoch
(
self
):
return
self
.
_steps_pur_epoch
@
steps_pur_epoch
.
setter
def
_steps_pur_epoch
(
self
,
value
):
self
.
_steps_pur_epoch
=
value
@
property
def
train_finish
(
self
):
return
self
.
_train_finish
def
tasklayer_reuse_with
(
self
,
task
):
assert
isinstance
(
task
,
Task
)
if
self
.
_lock
:
raise
Exception
(
'you can only set tasklayer reuses BEFORE Controller created.'
)
self
.
_task_reuse_scope
=
task
.
name
def
_set_lock
(
self
):
self
.
_lock
=
True
# @property
# def task_reuse_scope(self):
# if self._task_reuse_scope is not None:
# return self._task_reuse_scope
# else:
# raise ValueError("{}: task_reuse_scope is None".format(self._name))
# @task_reuse_scope.setter
# def task_reuse_scope(self, scope_name):
# self._task_reuse_scope = str(scope_name)
# if self._verbose:
# print('{}: task_reuse_scope is set to {}'.format(self._name, self._task_reuse_scope))
def
check_req_args
(
conf
,
name
):
assert
'reader'
in
conf
,
name
+
': reader is required to build TaskInstance.'
assert
'paradigm'
in
conf
,
name
+
': paradigm is required to build TaskInstance.'
...
...
@@ -44,7 +291,7 @@ class TaskInstance(object):
parad_name
=
config
[
'paradigm'
]
parad_mod
=
importlib
.
import_module
(
PARADIGM_DIR
+
'.'
+
parad_name
)
Paradigm
=
getattr
(
parad_mod
,
'Task
Paradigm
'
)
Paradigm
=
getattr
(
parad_mod
,
'Task
Type
'
)
self
.
_Reader
=
Reader
self
.
_Paradigm
=
Paradigm
...
...
paddlepalm/
reader/__init__.py
→
reader/__init__.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/
reader/cls.py
→
reader/cls.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/
reader/match.py
→
reader/match.py
浏览文件 @
f9e37ecd
...
...
@@ -16,6 +16,12 @@
from
paddlepalm.interface
import
reader
from
paddlepalm.reader.utils.reader4ernie
import
ClassifyReader
def
match
(
vocab_path
,
max_seq_len
,
do_lower_case
=
True
,
phase
,
dev_count
=
1
):
config
=
{
xxx
}
return
Reader
(
config
())
class
Reader
(
reader
):
def
__init__
(
self
,
config
,
phase
=
'train'
,
dev_count
=
1
,
print_prefix
=
''
):
...
...
paddlepalm/
reader/mlm.py
→
reader/mlm.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/
reader/mrc.py
→
reader/mrc.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/
reader/utils/__init__.py
→
reader/utils/__init__.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/
reader/utils/batching4bert.py
→
reader/utils/batching4bert.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/
reader/utils/batching4ernie.py
→
reader/utils/batching4ernie.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/
reader/utils/mlm_batching.py
→
reader/utils/mlm_batching.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/
reader/utils/mrqa_helper.py
→
reader/utils/mrqa_helper.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/
reader/utils/reader4ernie.py
→
reader/utils/reader4ernie.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/task_paradigm
/__init__.py
→
tasktype
/__init__.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/task_paradigm
/cls.py
→
tasktype
/cls.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/task_paradigm
/match.py
→
tasktype
/match.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/task_paradigm
/mlm.py
→
tasktype
/mlm.py
浏览文件 @
f9e37ecd
文件已移动
paddlepalm/task_paradigm
/mrc.py
→
tasktype
/mrc.py
浏览文件 @
f9e37ecd
文件已移动
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录