Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PALM
提交
df98c24f
P
PALM
项目概览
PaddlePaddle
/
PALM
通知
7
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
10
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PALM
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
10
Issue
10
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
df98c24f
编写于
1月 10, 2020
作者:
X
xixiaoyao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix pred
上级
8a99149a
变更
10
展开全部
隐藏空白更改
内联
并排
Showing
10 changed file
with
1854 addition
and
20 deletion
+1854
-20
demo/demo3/log
demo/demo3/log
+1803
-0
demo/demo3/run.py
demo/demo3/run.py
+3
-2
demo/demo3/run.sh
demo/demo3/run.sh
+1
-1
paddlepalm/.trainer.py.swp
paddlepalm/.trainer.py.swp
+0
-0
paddlepalm/backbone/ernie.py
paddlepalm/backbone/ernie.py
+2
-0
paddlepalm/distribute/__init__.py
paddlepalm/distribute/__init__.py
+1
-1
paddlepalm/distribute/reader.py
paddlepalm/distribute/reader.py
+21
-9
paddlepalm/optimizer/adam.py
paddlepalm/optimizer/adam.py
+3
-0
paddlepalm/optimizer/base_optimizer.py
paddlepalm/optimizer/base_optimizer.py
+2
-1
paddlepalm/trainer.py
paddlepalm/trainer.py
+18
-6
未找到文件。
demo/demo3/log
0 → 100644
浏览文件 @
df98c24f
此差异已折叠。
点击以展开。
demo/demo3/run.py
浏览文件 @
df98c24f
...
...
@@ -6,7 +6,7 @@ if __name__ == '__main__':
max_seqlen
=
512
batch_size
=
4
num_epochs
=
2
num_epochs
=
2
0
lr
=
1e-3
vocab_path
=
'./pretrain/ernie/vocab.txt'
...
...
@@ -67,7 +67,8 @@ if __name__ == '__main__':
cls_pred_head
=
palm
.
head
.
Classify
(
4
,
1024
,
phase
=
'pred'
)
trainer
.
build_predict_head
(
cls_pred_head
,
pred_ernie
)
trainer
.
train
(
iterator_fn
,
print_steps
=
1
,
save_steps
=
5
,
save_path
=
'outputs'
,
save_type
=
'ckpt,predict'
)
# trainer.train(iterator_fn, print_steps=1, save_steps=5, save_path='outputs', save_type='ckpt,predict')
trainer
.
train
(
iterator_fn
,
print_steps
=
1
)
# trainer.save()
...
...
demo/demo3/run.sh
浏览文件 @
df98c24f
export
CUDA_VISIBLE_DEVICES
=
3
export
CUDA_VISIBLE_DEVICES
=
4
python run.py
paddlepalm/.trainer.py.swp
0 → 100644
浏览文件 @
df98c24f
文件已添加
paddlepalm/backbone/ernie.py
浏览文件 @
df98c24f
...
...
@@ -114,6 +114,8 @@ class ERNIE(BaseBackbone):
input_mask
=
inputs
[
'input_mask'
]
task_ids
=
inputs
[
'task_ids'
]
fluid
.
layers
.
Print
(
src_ids
)
# padding id in vocabulary must be set to 0
emb_out
=
fluid
.
embedding
(
input
=
src_ids
,
...
...
paddlepalm/distribute/__init__.py
浏览文件 @
df98c24f
...
...
@@ -5,5 +5,5 @@ import multiprocessing
gpu_dev_count
=
int
(
fluid
.
core
.
get_cuda_device_count
())
cpu_dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
from
reader
import
yield_pieces
,
data_feeder
from
reader
import
yield_pieces
,
data_feeder
,
decode_fake
paddlepalm/distribute/reader.py
浏览文件 @
df98c24f
...
...
@@ -11,8 +11,8 @@ def yield_pieces(data, distribute_strategy, batch_size):
distribute_strategy: support s=split, c=copy, u=unstack,
"""
assert
batch_size
%
dev_count
==
0
,
"batch_size need to be integer times larger than dev_count."
print
(
'data in yield pieces'
)
print
(
len
(
data
))
#
print('data in yield pieces')
#
print(len(data))
assert
type
(
data
)
==
type
(
distribute_strategy
),
[
type
(
data
),
type
(
distribute_strategy
)]
assert
len
(
data
)
==
len
(
distribute_strategy
),
[
len
(
data
),
len
(
distribute_strategy
)]
...
...
@@ -53,12 +53,11 @@ def yield_pieces(data, distribute_strategy, batch_size):
if
type
(
data
)
==
dict
:
yield
dict
(
zip
(
*
[
keys
,
temp
]))
else
:
print
(
'yielded pieces'
)
print
(
len
(
temp
))
#
print('yielded pieces')
#
print(len(temp))
yield
temp
def
data_feeder
(
reader
,
postprocess_fn
=
None
,
prefetch_steps
=
2
):
def
data_feeder
(
reader
,
postprocess_fn
=
None
,
prefetch_steps
=
2
,
phase
=
'train'
):
if
postprocess_fn
is
None
:
def
postprocess_fn
(
batch
):
return
batch
...
...
@@ -91,6 +90,7 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
queue
.
task_done
()
if
ret
is
not
None
:
batches
,
num_pad
=
ret
id
=
batches
[
0
][
'__task_id'
][
0
][
0
]
if
phase
==
'train'
else
-
1
batch_buf
=
[]
flag_buf
=
[]
for
idx
,
batch
in
enumerate
(
batches
):
...
...
@@ -98,12 +98,24 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
flag
=
idx
-
len
(
batches
)
<
-
num_pad
# if num_pad > 0:
# num_pad -= 1
batch
=
postprocess_fn
(
batch
)
batch
=
postprocess_fn
(
batch
,
id
)
batch_buf
.
append
(
batch
)
flag_buf
.
append
(
flag
)
yield
batch_buf
,
flag_buf
else
:
yield
batch_buf
,
flag_buf
,
id
else
:
break
queue
.
join
()
def
decode_fake
(
nums
,
mask
,
bs
):
n_t
=
0
for
flag
in
mask
:
if
not
flag
:
break
n_t
=
n_t
+
1
n_f
=
len
(
mask
)
-
n_t
p1
=
nums
-
(
n_t
-
1
)
*
bs
each_f
=
p1
/
(
n_f
+
1
)
return
each_f
*
n_f
paddlepalm/optimizer/adam.py
浏览文件 @
df98c24f
...
...
@@ -37,6 +37,8 @@ class Adam(BaseOptimizer):
if
self
.
_lr_schedualer
is
not
None
:
self
.
_lr
=
self
.
_lr_schedualer
.
build
(
self
.
_lr
)
fluid
.
layers
.
Print
(
self
.
_lr
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
self
.
_lr
)
if
grad_clip
is
not
None
:
...
...
@@ -46,6 +48,7 @@ class Adam(BaseOptimizer):
fluid
.
clip
.
set_gradient_clip
(
clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
clip_norm
=
clip_norm_thres
))
print
(
self
.
_loss
)
_
,
param_grads
=
optimizer
.
minimize
(
self
.
_loss
)
return
param_grads
...
...
paddlepalm/optimizer/base_optimizer.py
浏览文件 @
df98c24f
...
...
@@ -8,8 +8,9 @@ class BaseOptimizer():
def
build
(
self
,
grad_clip
=
None
):
pass
def
_set_prog
(
self
,
prog
):
def
_set_prog
(
self
,
prog
,
init_prog
):
self
.
_prog
=
prog
self
.
_init_prog
=
prog
if
self
.
_lr_schedualer
is
not
None
:
self
.
_lr_schedualer
.
_set_prog
(
prog
)
...
...
paddlepalm/trainer.py
浏览文件 @
df98c24f
...
...
@@ -21,7 +21,7 @@ import time
import
numpy
as
np
import
paddlepalm.utils.basic_helper
as
helper
from
paddlepalm.utils
import
reader_helper
,
saver
from
paddlepalm.distribute
import
gpu_dev_count
,
data_feeder
from
paddlepalm.distribute
import
gpu_dev_count
,
data_feeder
,
decode_fake
# from paddlepalm.default_settings import *
DEBUG
=
False
...
...
@@ -217,12 +217,16 @@ class Trainer(object):
with
fluid
.
program_guard
(
train_prog
,
train_init_prog
):
loss_var
=
fluid
.
layers
.
reduce_sum
(
task_output_vars
[
self
.
name
+
'.loss'
])
self
.
_distribute_train_prog
=
fluid
.
CompiledProgram
(
self
.
_train_prog
).
with_data_parallel
(
loss_name
=
loss_var
.
name
)
for
_id
,
block
in
enumerate
(
self
.
_train_prog
.
blocks
):
for
var
in
block
.
vars
:
print
(
"[debug] : %d, %s"
%
(
_id
,
var
))
return
loss_var
def
build_backward
(
self
,
optimizer
,
weight_decay
=
None
,
use_ema
=
False
,
ema_decay
=
0.9999
):
# build optimizer
optimizer
.
_set_prog
(
self
.
_train_prog
)
assert
self
.
_train_init_prog
is
not
None
,
"train graph not foung! You should build_forward first."
optimizer
.
_set_prog
(
self
.
_train_prog
,
self
.
_train_init_prog
)
with
fluid
.
program_guard
(
self
.
_train_prog
,
self
.
_train_init_prog
):
param_grads
=
optimizer
.
build
()
...
...
@@ -258,6 +262,13 @@ class Trainer(object):
ema
=
fluid
.
optimizer
.
ExponentialMovingAverage
(
ema_decay
)
ema
.
update
()
# for bid, block in enumerate(self._train_prog.blocks):
# print('block id: '+str(bid))
# for var in block.vars:
# print("%d : %s" % (bid, var))
# print(self._train_prog)
def
load_data
(
self
,
input_file
,
file_format
,
batch_size
,
num_epochs
=
None
,
shuffle_train
=
True
):
# load data
print
(
"preparing data..."
,
end
=
''
)
...
...
@@ -287,6 +298,7 @@ class Trainer(object):
def
random_init_params
(
self
):
assert
self
.
_train_init_prog
is
not
None
,
"train graph not foung! You should build_forward first before you random init parameters."
self
.
_distribute_train_prog
=
fluid
.
CompiledProgram
(
self
.
_train_prog
).
with_data_parallel
(
loss_name
=
loss_var
.
name
)
on_gpu
=
gpu_dev_count
>
0
self
.
_exe
=
helper
.
build_executor
(
on_gpu
)
print
(
'random init params...'
)
...
...
@@ -294,7 +306,7 @@ class Trainer(object):
def
load_ckpt
(
self
,
model_path
,
phase
=
'train'
):
# load pretrain model (or ckpt)
assert
self
.
_exe
is
not
None
,
"You need to random_init_params before load
pretrain model
s."
assert
self
.
_exe
is
not
None
,
"You need to random_init_params before load
checkpoint
s."
if
phase
==
'train'
:
assert
self
.
_train_init_prog
is
not
None
,
"train graph not found! You should build_forward first before load checkpoint."
...
...
@@ -437,12 +449,12 @@ class Trainer(object):
def
predict_one_batch
(
self
,
batch
):
if
gpu_dev_count
>
1
:
feed
,
mask
=
batch
rt_outputs
=
self
.
exe
.
run
(
self
.
_distribute_
train
_prog
,
feed
=
feed
,
fetch_list
=
self
.
_fetch_list
)
rt_outputs
=
self
.
exe
.
run
(
self
.
_distribute_
pred
_prog
,
feed
=
feed
,
fetch_list
=
self
.
_fetch_list
)
while
mask
.
pop
()
==
False
:
rt_outputs
.
pop
()
else
:
feed
=
self
.
_feed_batch_process_fn
(
batch
)
rt_outputs
=
self
.
_exe
.
run
(
self
.
_distribute_
train
_prog
,
feed
=
feed
,
fetch_list
=
self
.
_fetch_list
)
rt_outputs
=
self
.
_exe
.
run
(
self
.
_distribute_
pred
_prog
,
feed
=
feed
,
fetch_list
=
self
.
_fetch_list
)
rt_outputs
=
{
k
:
v
for
k
,
v
in
zip
(
self
.
_fetch_names
,
rt_outputs
)}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录