Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
c2ff52df
M
models
项目概览
PaddlePaddle
/
models
1 年多 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c2ff52df
编写于
8月 09, 2018
作者:
Q
Qingsheng Li
提交者:
GitHub
8月 09, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into soft-link
上级
676778bf
c79878fe
变更
32
隐藏空白更改
内联
并排
Showing
32 changed file
with
912 addition
and
62 deletion
+912
-62
fluid/image_classification/.gitignore
fluid/image_classification/.gitignore
+9
-0
fluid/image_classification/.run_ce.sh
fluid/image_classification/.run_ce.sh
+10
-0
fluid/image_classification/_ce.py
fluid/image_classification/_ce.py
+94
-0
fluid/image_classification/models/se_resnext.py
fluid/image_classification/models/se_resnext.py
+4
-1
fluid/image_classification/train.py
fluid/image_classification/train.py
+59
-2
fluid/language_model/.run_ce.sh
fluid/language_model/.run_ce.sh
+2
-2
fluid/language_model/train.py
fluid/language_model/train.py
+1
-1
fluid/neural_machine_translation/transformer/.run_ce.sh
fluid/neural_machine_translation/transformer/.run_ce.sh
+27
-0
fluid/neural_machine_translation/transformer/_ce.py
fluid/neural_machine_translation/transformer/_ce.py
+60
-0
fluid/neural_machine_translation/transformer/config.py
fluid/neural_machine_translation/transformer/config.py
+2
-0
fluid/neural_machine_translation/transformer/model.py
fluid/neural_machine_translation/transformer/model.py
+11
-3
fluid/neural_machine_translation/transformer/train.py
fluid/neural_machine_translation/transformer/train.py
+34
-7
fluid/object_detection/_ce.py
fluid/object_detection/_ce.py
+6
-6
fluid/object_detection/mobilenet_ssd.py
fluid/object_detection/mobilenet_ssd.py
+0
-1
fluid/sequence_tagging_for_ner/.run_ce.sh
fluid/sequence_tagging_for_ner/.run_ce.sh
+6
-0
fluid/sequence_tagging_for_ner/README.md
fluid/sequence_tagging_for_ner/README.md
+1
-5
fluid/sequence_tagging_for_ner/_ce.py
fluid/sequence_tagging_for_ner/_ce.py
+48
-0
fluid/sequence_tagging_for_ner/data/download.sh
fluid/sequence_tagging_for_ner/data/download.sh
+17
-0
fluid/sequence_tagging_for_ner/data/target.txt
fluid/sequence_tagging_for_ner/data/target.txt
+9
-0
fluid/sequence_tagging_for_ner/data/test
fluid/sequence_tagging_for_ner/data/test
+128
-0
fluid/sequence_tagging_for_ner/data/train
fluid/sequence_tagging_for_ner/data/train
+139
-0
fluid/sequence_tagging_for_ner/reader.py
fluid/sequence_tagging_for_ner/reader.py
+66
-0
fluid/sequence_tagging_for_ner/train.py
fluid/sequence_tagging_for_ner/train.py
+45
-20
fluid/sequence_tagging_for_ner/utils.py
fluid/sequence_tagging_for_ner/utils.py
+47
-0
fluid/text_classification/.run_ce.sh
fluid/text_classification/.run_ce.sh
+5
-0
fluid/text_classification/_ce.py
fluid/text_classification/_ce.py
+48
-0
fluid/text_classification/clouds/scdb_parallel_executor.py
fluid/text_classification/clouds/scdb_parallel_executor.py
+1
-1
fluid/text_classification/clouds/scdb_single_card.py
fluid/text_classification/clouds/scdb_single_card.py
+1
-1
fluid/text_classification/infer.py
fluid/text_classification/infer.py
+1
-1
fluid/text_classification/nets.py
fluid/text_classification/nets.py
+1
-1
fluid/text_classification/train.py
fluid/text_classification/train.py
+13
-1
fluid/text_classification/utils.py
fluid/text_classification/utils.py
+17
-9
未找到文件。
fluid/image_classification/.gitignore
0 → 100644
浏览文件 @
c2ff52df
# saved model
output/
# coco and pascalvoc data
data/ILSVRC2012/ILSVRC2012_img_val.tar
data/ILSVRC2012/ILSVRC2012_img_train.tar
data/ILSVRC2012/ImageNet_label.tgz
data/ILSVRC2012/train_list.txt
data/ILSVRC2012/val_list.txt
fluid/image_classification/.run_ce.sh
0 → 100644
浏览文件 @
c2ff52df
#!/bin/bash
# This file is only used for continuous evaluation.
cudaid
=
${
object_detection_cudaid
:
=0
}
export
CUDA_VISIBLE_DEVICES
=
$cudaid
python train.py
--batch_size
=
64
--num_passes
=
10
--total_images
=
6149
--enable_ce
=
True | python _ce.py
cudaid
=
${
object_detection_cudaid
:
=0, 1, 2, 3
}
export
CUDA_VISIBLE_DEVICES
=
$cudaid
python train.py
--batch_size
=
64
--num_passes
=
10
--total_images
=
6149
--enable_ce
=
True | python _ce.py
fluid/image_classification/_ce.py
0 → 100644
浏览文件 @
c2ff52df
####this file is only used for continuous evaluation test!
import
os
import
sys
sys
.
path
.
append
(
os
.
environ
[
'ceroot'
])
from
kpi
import
CostKpi
,
DurationKpi
,
AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_acc_top1_kpi
=
AccKpi
(
'train_acc_top1'
,
0.05
,
0
,
desc
=
'TOP1 ACC'
)
train_acc_top5_kpi
=
AccKpi
(
'train_acc_top5'
,
0.05
,
0
,
actived
=
False
,
desc
=
'TOP5 ACC'
)
train_cost_kpi
=
CostKpi
(
'train_cost'
,
0.3
,
0
,
actived
=
True
,
desc
=
'train cost'
)
test_acc_top1_kpi
=
AccKpi
(
'test_acc_top1'
,
0.05
,
0
,
desc
=
'TOP1 ACC'
)
test_acc_top5_kpi
=
AccKpi
(
'test_acc_top5'
,
0.05
,
0
,
actived
=
False
,
desc
=
'TOP5 ACC'
)
test_cost_kpi
=
CostKpi
(
'test_cost'
,
1.0
,
0
,
actived
=
True
,
desc
=
'train cost'
)
train_speed_kpi
=
AccKpi
(
'train_speed'
,
0.05
,
0
,
actived
=
True
,
unit_repr
=
'seconds/image'
,
desc
=
'train speed in one GPU card'
)
train_acc_top1_card4_kpi
=
AccKpi
(
'train_acc_top1_card4'
,
0.05
,
0
,
desc
=
'TOP1 ACC'
)
train_acc_top5_card4_kpi
=
AccKpi
(
'train_acc_top5_card4'
,
0.05
,
0
,
actived
=
False
,
desc
=
'TOP5 ACC'
)
train_cost_card4_kpi
=
CostKpi
(
'train_cost_kpi'
,
0.3
,
0
,
actived
=
True
,
desc
=
'train cost'
)
test_acc_top1_card4_kpi
=
AccKpi
(
'test_acc_top1_card4'
,
0.05
,
0
,
desc
=
'TOP1 ACC'
)
test_acc_top5_card4_kpi
=
AccKpi
(
'test_acc_top5_card4'
,
0.05
,
0
,
actived
=
False
,
desc
=
'TOP5 ACC'
)
test_cost_card4_kpi
=
CostKpi
(
'test_cost_card4'
,
1.0
,
0
,
actived
=
True
,
desc
=
'train cost'
)
train_speed_card4_kpi
=
AccKpi
(
'train_speed_card4'
,
0.05
,
0
,
actived
=
True
,
unit_repr
=
'seconds/image'
,
desc
=
'train speed in four GPU card'
)
tracking_kpis
=
[
train_acc_top1_kpi
,
train_acc_top5_kpi
,
train_cost_kpi
,
test_acc_top1_kpi
,
test_acc_top5_kpi
,
test_cost_kpi
,
train_speed_kpi
,
train_acc_top1_card4_kpi
,
train_acc_top5_card4_kpi
,
train_cost_card4_kpi
,
test_acc_top1_card4_kpi
,
test_acc_top5_card4_kpi
,
test_cost_card4_kpi
,
train_speed_card4_kpi
]
def
parse_log
(
log
):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost
\t
1.0
test_cost
\t
1.0
train_cost
\t
1.0
train_cost
\t
1.0
train_acc
\t
1.2
"
'''
for
line
in
log
.
split
(
'
\n
'
):
fs
=
line
.
strip
().
split
(
'
\t
'
)
print
(
fs
)
if
len
(
fs
)
==
3
and
fs
[
0
]
==
'kpis'
:
print
(
"-----%s"
%
fs
)
kpi_name
=
fs
[
1
]
kpi_value
=
float
(
fs
[
2
])
yield
kpi_name
,
kpi_value
def
log_to_ce
(
log
):
kpi_tracker
=
{}
for
kpi
in
tracking_kpis
:
kpi_tracker
[
kpi
.
name
]
=
kpi
for
(
kpi_name
,
kpi_value
)
in
parse_log
(
log
):
print
(
kpi_name
,
kpi_value
)
kpi_tracker
[
kpi_name
].
add_record
(
kpi_value
)
kpi_tracker
[
kpi_name
].
persist
()
if
__name__
==
'__main__'
:
log
=
sys
.
stdin
.
read
()
print
(
"*****"
)
print
log
print
(
"****"
)
log_to_ce
(
log
)
fluid/image_classification/models/se_resnext.py
浏览文件 @
c2ff52df
...
...
@@ -11,6 +11,7 @@ train_parameters = {
"input_size"
:
[
3
,
224
,
224
],
"input_mean"
:
[
0.485
,
0.456
,
0.406
],
"input_std"
:
[
0.229
,
0.224
,
0.225
],
"dropout_seed"
:
None
,
"learning_strategy"
:
{
"name"
:
"piecewise_decay"
,
"batch_size"
:
256
,
...
...
@@ -101,7 +102,9 @@ class SE_ResNeXt():
pool
=
fluid
.
layers
.
pool2d
(
input
=
conv
,
pool_size
=
7
,
pool_type
=
'avg'
,
global_pooling
=
True
)
drop
=
fluid
.
layers
.
dropout
(
x
=
pool
,
dropout_prob
=
0.5
)
# do not set seed when traning, it is only used for debug
drop
=
fluid
.
layers
.
dropout
(
x
=
pool
,
dropout_prob
=
0.5
,
seed
=
self
.
params
[
"dropout_seed"
])
stdv
=
1.0
/
math
.
sqrt
(
drop
.
shape
[
1
]
*
1.0
)
out
=
fluid
.
layers
.
fc
(
input
=
drop
,
size
=
class_dim
,
...
...
fluid/image_classification/train.py
浏览文件 @
c2ff52df
...
...
@@ -4,6 +4,7 @@ import time
import
sys
import
paddle
import
paddle.fluid
as
fluid
import
paddle.dataset.flowers
as
flowers
import
models
import
reader
import
argparse
...
...
@@ -28,6 +29,7 @@ add_arg('checkpoint', str, None, "Whether to resume chec
add_arg
(
'lr'
,
float
,
0.1
,
"set learning rate."
)
add_arg
(
'lr_strategy'
,
str
,
"piecewise_decay"
,
"Set the learning rate decay strategy."
)
add_arg
(
'model'
,
str
,
"SE_ResNeXt50_32x4d"
,
"Set the network to use."
)
add_arg
(
'enable_ce'
,
bool
,
False
,
"If set True, enable continuous evaluation job."
)
# yapf: enable
model_list
=
[
m
for
m
in
dir
(
models
)
if
"__"
not
in
m
]
...
...
@@ -100,6 +102,9 @@ def train(args):
# model definition
model
=
models
.
__dict__
[
model_name
]()
if
args
.
enable_ce
:
assert
model_name
==
"SE_ResNeXt50_32x4d"
if
model_name
is
"GoogleNet"
:
out0
,
out1
,
out2
=
model
.
net
(
input
=
image
,
class_dim
=
class_dim
)
cost0
=
fluid
.
layers
.
cross_entropy
(
input
=
out0
,
label
=
label
)
...
...
@@ -129,6 +134,8 @@ def train(args):
params
[
"num_epochs"
]
=
args
.
num_epochs
params
[
"learning_strategy"
][
"batch_size"
]
=
args
.
batch_size
params
[
"learning_strategy"
][
"name"
]
=
args
.
lr_strategy
if
args
.
enable_ce
:
params
[
"dropout_seed"
]
=
10
# initialize optimizer
optimizer
=
optimizer_setting
(
params
)
...
...
@@ -137,6 +144,9 @@ def train(args):
if
with_memory_optimization
:
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
if
args
.
enable_ce
:
fluid
.
default_startup_program
().
random_seed
=
1000
place
=
fluid
.
CUDAPlace
(
0
)
if
args
.
use_gpu
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
...
...
@@ -153,8 +163,20 @@ def train(args):
train_batch_size
=
args
.
batch_size
test_batch_size
=
16
train_reader
=
paddle
.
batch
(
reader
.
train
(),
batch_size
=
train_batch_size
)
test_reader
=
paddle
.
batch
(
reader
.
val
(),
batch_size
=
test_batch_size
)
if
not
args
.
enable_ce
:
train_reader
=
paddle
.
batch
(
reader
.
train
(),
batch_size
=
train_batch_size
)
test_reader
=
paddle
.
batch
(
reader
.
val
(),
batch_size
=
test_batch_size
)
else
:
# use flowers dataset for CE and set use_xmap False to avoid disorder data
# but it is time consuming. For faster speed, need another dataset.
import
random
random
.
seed
(
0
)
train_reader
=
paddle
.
batch
(
flowers
.
train
(
use_xmap
=
False
),
batch_size
=
train_batch_size
)
test_reader
=
paddle
.
batch
(
flowers
.
test
(
use_xmap
=
False
),
batch_size
=
test_batch_size
)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
image
,
label
])
train_exe
=
fluid
.
ParallelExecutor
(
...
...
@@ -162,9 +184,12 @@ def train(args):
fetch_list
=
[
avg_cost
.
name
,
acc_top1
.
name
,
acc_top5
.
name
]
gpu
=
os
.
getenv
(
"CUDA_VISIBLE_DEVICES"
)
or
""
gpu_nums
=
len
(
gpu
.
split
(
","
))
for
pass_id
in
range
(
params
[
"num_epochs"
]):
train_info
=
[[],
[],
[]]
test_info
=
[[],
[],
[]]
train_time
=
[]
for
batch_id
,
data
in
enumerate
(
train_reader
()):
t1
=
time
.
time
()
loss
,
acc1
,
acc5
=
train_exe
.
run
(
fetch_list
,
feed
=
feeder
.
feed
(
data
))
...
...
@@ -176,6 +201,7 @@ def train(args):
train_info
[
0
].
append
(
loss
)
train_info
[
1
].
append
(
acc1
)
train_info
[
2
].
append
(
acc5
)
train_time
.
append
(
period
)
if
batch_id
%
10
==
0
:
print
(
"Pass {0}, trainbatch {1}, loss {2},
\
acc1 {3}, acc5 {4} time {5}"
...
...
@@ -187,6 +213,7 @@ def train(args):
train_loss
=
np
.
array
(
train_info
[
0
]).
mean
()
train_acc1
=
np
.
array
(
train_info
[
1
]).
mean
()
train_acc5
=
np
.
array
(
train_info
[
2
]).
mean
()
train_speed
=
np
.
array
(
train_time
).
mean
()
/
train_batch_size
cnt
=
0
for
test_batch_id
,
data
in
enumerate
(
test_reader
()):
t1
=
time
.
time
()
...
...
@@ -226,6 +253,36 @@ def train(args):
os
.
makedirs
(
model_path
)
fluid
.
io
.
save_persistables
(
exe
,
model_path
)
# This is for continuous evaluation only
if
args
.
enable_ce
and
pass_id
==
args
.
num_epochs
-
1
:
if
gpu_nums
==
1
:
# Use the last cost/acc for training
print
(
"kpis train_cost %s"
%
train_loss
)
print
(
"kpis train_acc_top1 %s"
%
train_acc1
)
print
(
"kpis train_acc_top5 %s"
%
train_acc5
)
# Use the mean cost/acc for testing
print
(
"kpis test_cost %s"
%
test_loss
)
print
(
"kpis test_acc_top1 %s"
%
test_acc1
)
print
(
"kpis test_acc_top5 %s"
%
test_acc5
)
print
(
"kpis train_speed %s"
%
train_speed
)
else
:
# Use the last cost/acc for training
print
(
"kpis train_cost_card%s %s"
%
(
gpu_nums
,
train_loss
))
print
(
"kpis train_acc_top1_card%s %s"
%
(
gpu_nums
,
train_acc1
))
print
(
"kpis train_acc_top5_card%s %s"
%
(
gpu_nums
,
train_acc5
))
# Use the mean cost/acc for testing
print
(
"kpis test_cost_card%s %s"
%
(
gpu_nums
,
test_loss
))
print
(
"kpis test_acc_top1_card%s %s"
%
(
gpu_nums
,
test_acc1
))
print
(
"kpis test_acc_top5_card%s %s"
%
(
gpu_nums
,
test_acc5
))
print
(
"kpis train_speed_card%s %s"
%
(
gpu_nums
,
train_speed
))
def
main
():
args
=
parser
.
parse_args
()
...
...
fluid/language_model/.run_ce.sh
浏览文件 @
c2ff52df
...
...
@@ -6,9 +6,9 @@ export OMP_NUM_THREADS=1
cudaid
=
${
language_model
:
=0
}
# use 0-th card as default
export
CUDA_VISIBLE_DEVICES
=
$cudaid
FLAGS_benchmark
=
true
python train.py | python _ce.py
FLAGS_benchmark
=
true
python train.py
--enable_ce
| python _ce.py
cudaid
=
${
language_model_m
:
=0,1,2,3
}
# use 0,1,2,3 card as default
export
CUDA_VISIBLE_DEVICES
=
$cudaid
FLAGS_benchmark
=
true
python train.py | python _ce.py
FLAGS_benchmark
=
true
python train.py
--enable_ce
| python _ce.py
fluid/language_model/train.py
浏览文件 @
c2ff52df
...
...
@@ -145,7 +145,7 @@ def train(train_reader,
if
pass_idx
==
pass_num
-
1
and
args
.
enable_ce
:
#Note: The following logs are special for CE monitoring.
#Other situations do not need to care about these logs.
gpu_num
=
get_cards
()
gpu_num
=
get_cards
(
args
.
enable_ce
)
if
gpu_num
==
1
:
print
(
"kpis imikolov_20_pass_duration %s"
%
(
total_time
/
epoch_idx
))
...
...
fluid/neural_machine_translation/transformer/.run_ce.sh
0 → 100644
浏览文件 @
c2ff52df
#!/bin/bash
DATA_PATH
=
$HOME
/.cache/paddle/dataset/wmt16
if
[
!
-d
$DATA_PATH
]
;
then
python
-c
'import paddle;paddle.dataset.wmt16.train(10000, 10000, "en")'
\
'().next()'
tar
-zxf
$DATA_PATH
/wmt16.tar.gz
-C
$DATA_PATH
fi
train
(){
python
-u
train.py
\
--src_vocab_fpath
$DATA_PATH
/en_10000.dict
\
--trg_vocab_fpath
$DATA_PATH
/de_10000.dict
\
--special_token
'<s>'
'<e>'
'<unk>'
\
--train_file_pattern
$DATA_PATH
/wmt16/train
\
--val_file_pattern
$DATA_PATH
/wmt16/val
\
--use_token_batch
True
\
--batch_size
2048
\
--sort_type
pool
\
--pool_size
10000
\
--enable_ce
True
\
weight_sharing False
\
pass_num 20
\
dropout_seed 10
}
train | python _ce.py
fluid/neural_machine_translation/transformer/_ce.py
0 → 100644
浏览文件 @
c2ff52df
####this file is only used for continuous evaluation test!
import
os
import
sys
sys
.
path
.
append
(
os
.
environ
[
'ceroot'
])
from
kpi
import
CostKpi
,
DurationKpi
,
AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi
=
CostKpi
(
'train_cost'
,
0.02
,
0
,
actived
=
True
)
test_cost_kpi
=
CostKpi
(
'test_cost'
,
0.005
,
0
,
actived
=
True
)
train_duration_kpi
=
DurationKpi
(
'train_duration'
,
0.06
,
0
,
actived
=
True
)
tracking_kpis
=
[
train_cost_kpi
,
test_cost_kpi
,
train_duration_kpi
,
]
def
parse_log
(
log
):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost
\t
1.0
test_cost
\t
1.0
train_cost
\t
1.0
train_cost
\t
1.0
train_acc
\t
1.2
"
'''
for
line
in
log
.
split
(
'
\n
'
):
fs
=
line
.
strip
().
split
(
'
\t
'
)
print
(
fs
)
if
len
(
fs
)
==
3
and
fs
[
0
]
==
'kpis'
:
print
(
"-----%s"
%
fs
)
kpi_name
=
fs
[
1
]
kpi_value
=
float
(
fs
[
2
])
yield
kpi_name
,
kpi_value
def
log_to_ce
(
log
):
kpi_tracker
=
{}
for
kpi
in
tracking_kpis
:
kpi_tracker
[
kpi
.
name
]
=
kpi
for
(
kpi_name
,
kpi_value
)
in
parse_log
(
log
):
print
(
kpi_name
,
kpi_value
)
kpi_tracker
[
kpi_name
].
add_record
(
kpi_value
)
kpi_tracker
[
kpi_name
].
persist
()
if
__name__
==
'__main__'
:
log
=
sys
.
stdin
.
read
()
print
(
"*****"
)
print
(
log
)
print
(
"****"
)
log_to_ce
(
log
)
\ No newline at end of file
fluid/neural_machine_translation/transformer/config.py
浏览文件 @
c2ff52df
...
...
@@ -81,6 +81,8 @@ class ModelHyperParams(object):
n_layer
=
6
# dropout rate used by all dropout layers.
dropout
=
0.1
# random seed used in dropout for CE.
dropout_seed
=
None
# the flag indicating whether to share embedding and softmax weights.
# vocabularies in source and target should be same for weight sharing.
weight_sharing
=
True
...
...
fluid/neural_machine_translation/transformer/model.py
浏览文件 @
c2ff52df
...
...
@@ -111,7 +111,10 @@ def multi_head_attention(queries,
x
=
weights
,
shape
=
product
.
shape
,
actual_shape
=
post_softmax_shape
)
if
dropout_rate
:
weights
=
layers
.
dropout
(
weights
,
dropout_prob
=
dropout_rate
,
is_test
=
False
)
weights
,
dropout_prob
=
dropout_rate
,
seed
=
ModelHyperParams
.
dropout_seed
,
is_test
=
False
)
out
=
layers
.
matmul
(
weights
,
v
)
return
out
...
...
@@ -171,7 +174,10 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.):
elif
cmd
==
"d"
:
# add dropout
if
dropout_rate
:
out
=
layers
.
dropout
(
out
,
dropout_prob
=
dropout_rate
,
is_test
=
False
)
out
,
dropout_prob
=
dropout_rate
,
seed
=
ModelHyperParams
.
dropout_seed
,
is_test
=
False
)
return
out
...
...
@@ -211,7 +217,9 @@ def prepare_encoder(src_word,
shape
=
[
batch_size
,
seq_len
,
src_emb_dim
],
actual_shape
=
src_data_shape
)
return
layers
.
dropout
(
enc_input
,
dropout_prob
=
dropout_rate
,
enc_input
,
dropout_prob
=
dropout_rate
,
seed
=
ModelHyperParams
.
dropout_seed
,
is_test
=
False
)
if
dropout_rate
else
enc_input
...
...
fluid/neural_machine_translation/transformer/train.py
浏览文件 @
c2ff52df
...
...
@@ -103,6 +103,12 @@ def parse_args():
help
=
"The device type."
)
parser
.
add_argument
(
'--sync'
,
type
=
ast
.
literal_eval
,
default
=
True
,
help
=
"sync mode."
)
parser
.
add_argument
(
"--enable_ce"
,
type
=
ast
.
literal_eval
,
default
=
True
,
help
=
"The flag indicating whether to run the task "
"for continuous evaluation."
)
args
=
parser
.
parse_args
()
# Append args related to dict
...
...
@@ -382,6 +388,12 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
data_input_names
,
util_input_names
,
sum_cost
,
token_num
)
# the best cross-entropy value with label smoothing
loss_normalizer
=
-
((
1.
-
TrainTaskConfig
.
label_smooth_eps
)
*
np
.
log
(
(
1.
-
TrainTaskConfig
.
label_smooth_eps
))
+
TrainTaskConfig
.
label_smooth_eps
*
np
.
log
(
TrainTaskConfig
.
label_smooth_eps
/
(
ModelHyperParams
.
trg_vocab_size
-
1
)
+
1e-20
))
init
=
False
for
pass_id
in
xrange
(
TrainTaskConfig
.
pass_num
):
pass_start_time
=
time
.
time
()
...
...
@@ -421,19 +433,27 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
)
# sum the cost from multi-devices
total_token_num
=
token_num_val
.
sum
()
total_avg_cost
=
total_sum_cost
/
total_token_num
print
(
"epoch: %d, batch: %d, sum loss: %f, avg loss: %f, ppl: %f"
%
(
pass_id
,
batch_id
,
total_sum_cost
,
total_avg_cost
,
np
.
exp
([
min
(
total_avg_cost
,
100
)])))
print
(
"epoch: %d, batch: %d, avg loss: %f, normalized loss: %f,"
" ppl: %f"
%
(
pass_id
,
batch_id
,
total_avg_cost
,
total_avg_cost
-
loss_normalizer
,
np
.
exp
([
min
(
total_avg_cost
,
100
)])))
if
batch_id
>
0
and
batch_id
%
1000
==
0
:
fluid
.
io
.
save_persistables
(
exe
,
os
.
path
.
join
(
TrainTaskConfig
.
ckpt_dir
,
"latest.checkpoint"
))
init
=
True
time_consumed
=
time
.
time
()
-
pass_start_time
# Validate and save the model for inference.
print
(
"epoch: %d, "
%
pass_id
+
(
"val avg loss: %f, val ppl: %f, "
%
test
()
if
args
.
val_file_pattern
is
not
None
else
""
)
+
"consumed %fs"
%
(
time
.
time
()
-
pass_start_time
))
if
args
.
val_file_pattern
is
not
None
:
val_avg_cost
,
val_ppl
=
test
()
print
(
"epoch: %d, val avg loss: %f, val normalized loss: %f, val ppl: %f,"
" consumed %fs"
%
(
pass_id
,
val_avg_cost
,
val_avg_cost
-
loss_normalizer
,
val_ppl
,
time_consumed
))
else
:
print
(
"epoch: %d, consumed %fs"
%
(
pass_id
,
time_consumed
))
fluid
.
io
.
save_persistables
(
exe
,
os
.
path
.
join
(
TrainTaskConfig
.
ckpt_dir
,
...
...
@@ -442,6 +462,10 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
os
.
path
.
join
(
TrainTaskConfig
.
model_dir
,
"pass_"
+
str
(
pass_id
)
+
".infer.model"
),
data_input_names
[:
-
2
]
+
util_input_names
,
[
predict
],
exe
)
if
args
.
enable_ce
:
# For CE
print
(
"kpis
\t
train_cost
\t
%f"
%
total_avg_cost
)
print
(
"kpis
\t
test_cost
\t
%f"
%
val_avg_cost
)
print
(
"kpis
\t
train_duration
\t
%f"
%
time_consumed
)
def
train
(
args
):
...
...
@@ -465,6 +489,9 @@ def train(args):
exe
=
fluid
.
Executor
(
place
)
if
args
.
enable_ce
:
fluid
.
default_startup_program
().
random_seed
=
1000
sum_cost
,
avg_cost
,
predict
,
token_num
=
transformer
(
ModelHyperParams
.
src_vocab_size
,
ModelHyperParams
.
trg_vocab_size
,
ModelHyperParams
.
max_length
+
1
,
ModelHyperParams
.
n_layer
,
...
...
fluid/object_detection/_ce.py
浏览文件 @
c2ff52df
...
...
@@ -7,12 +7,12 @@ from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi
=
CostKpi
(
'train_cost'
,
0.02
,
actived
=
True
)
test_acc_kpi
=
AccKpi
(
'test_acc'
,
0.01
,
actived
=
True
)
train_speed_kpi
=
AccKpi
(
'train_speed'
,
0.2
,
actived
=
True
)
train_cost_card4_kpi
=
CostKpi
(
'train_cost_card4'
,
0.02
,
actived
=
True
)
test_acc_card4_kpi
=
AccKpi
(
'test_acc_card4'
,
0.01
,
actived
=
True
)
train_speed_card4_kpi
=
AccKpi
(
'train_speed_card4'
,
0.2
,
actived
=
True
)
train_cost_kpi
=
CostKpi
(
'train_cost'
,
0.02
,
0
,
actived
=
True
)
test_acc_kpi
=
AccKpi
(
'test_acc'
,
0.01
,
0
,
actived
=
True
)
train_speed_kpi
=
AccKpi
(
'train_speed'
,
0.2
,
0
,
actived
=
True
)
train_cost_card4_kpi
=
CostKpi
(
'train_cost_card4'
,
0.02
,
0
,
actived
=
True
)
test_acc_card4_kpi
=
AccKpi
(
'test_acc_card4'
,
0.01
,
0
,
actived
=
True
)
train_speed_card4_kpi
=
AccKpi
(
'train_speed_card4'
,
0.2
,
0
,
actived
=
True
)
tracking_kpis
=
[
train_cost_kpi
,
...
...
fluid/object_detection/mobilenet_ssd.py
浏览文件 @
c2ff52df
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.initializer
import
MSRA
from
paddle.fluid.param_attr
import
ParamAttr
...
...
fluid/sequence_tagging_for_ner/.run_ce.sh
0 → 100755
浏览文件 @
c2ff52df
###!/bin/bash
####This file is only used for continuous evaluation.
export
CE_MODE_X
=
1
sh data/download.sh
python train.py | python _ce.py
fluid/sequence_tagging_for_ner/README.md
浏览文件 @
c2ff52df
...
...
@@ -22,11 +22,7 @@
## 数据获取
请参考PaddlePaddle v2版本
[
命名实体识别
](
https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/README.md
)
一节中数据获取方式,将该例中的data文件夹拷贝至本例目录下,运行其中的download.sh脚本获取训练和测试数据。
## 通用脚本获取
请将PaddlePaddle v2版本
[
命名实体识别
](
https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/README.md
)
中提供的用于数据读取的文件
[
reader.py
](
https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/reader.py
)
以及包含字典导入等通用功能的文件
[
utils.py
](
https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/utils.py
)
复制到本目录下。本例将会使用到这两个脚本。
完整数据的获取请参考PaddlePaddle v2版本
[
命名实体识别
](
https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/README.md
)
一节中的方式。本例的示例数据同样可以通过运行data/download.sh来获取。
## 训练
...
...
fluid/sequence_tagging_for_ner/_ce.py
0 → 100644
浏览文件 @
c2ff52df
####this file is only used for continuous evaluation test!
import
os
import
sys
sys
.
path
.
append
(
os
.
environ
[
'ceroot'
])
from
kpi
import
CostKpi
,
DurationKpi
,
AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_acc_kpi
=
AccKpi
(
'train_precision'
,
0.005
,
actived
=
True
)
test_acc_kpi
=
CostKpi
(
'test_precision'
,
0.005
,
actived
=
True
)
train_duration_kpi
=
DurationKpi
(
'train_duration'
,
0.05
,
actived
=
True
)
tracking_kpis
=
[
train_acc_kpi
,
test_acc_kpi
,
train_duration_kpi
,
]
def
parse_log
(
log
):
for
line
in
log
.
split
(
'
\n
'
):
fs
=
line
.
strip
().
split
(
'
\t
'
)
print
(
fs
)
if
len
(
fs
)
==
3
and
fs
[
0
]
==
'kpis'
:
print
(
"-----%s"
%
fs
)
kpi_name
=
fs
[
1
]
kpi_value
=
float
(
fs
[
2
])
yield
kpi_name
,
kpi_value
def
log_to_ce
(
log
):
kpi_tracker
=
{}
for
kpi
in
tracking_kpis
:
kpi_tracker
[
kpi
.
name
]
=
kpi
for
(
kpi_name
,
kpi_value
)
in
parse_log
(
log
):
print
(
kpi_name
,
kpi_value
)
kpi_tracker
[
kpi_name
].
add_record
(
kpi_value
)
kpi_tracker
[
kpi_name
].
persist
()
if
__name__
==
'__main__'
:
log
=
sys
.
stdin
.
read
()
print
(
"*****"
)
print
(
log
)
print
(
"****"
)
log_to_ce
(
log
)
fluid/sequence_tagging_for_ner/data/download.sh
0 → 100755
浏览文件 @
c2ff52df
if
[
-f
assignment2.zip
]
;
then
echo
"data exist"
exit
0
else
wget http://cs224d.stanford.edu/assignment2/assignment2.zip
fi
if
[
$?
-eq
0
]
;
then
unzip assignment2.zip
cp
assignment2_release/data/ner/wordVectors.txt ./data
cp
assignment2_release/data/ner/vocab.txt ./data
rm
-rf
assignment2_release
else
echo
"download data error!"
>>
/dev/stderr
exit
1
fi
fluid/sequence_tagging_for_ner/data/target.txt
0 → 100644
浏览文件 @
c2ff52df
B-LOC
I-LOC
B-MISC
I-MISC
B-ORG
I-ORG
B-PER
I-PER
O
fluid/sequence_tagging_for_ner/data/test
0 → 100644
浏览文件 @
c2ff52df
CRICKET NNP I-NP O
- : O O
LEICESTERSHIRE NNP I-NP I-ORG
TAKE NNP I-NP O
OVER IN I-PP O
AT NNP I-NP O
TOP NNP I-NP O
AFTER NNP I-NP O
INNINGS NNP I-NP O
VICTORY NN I-NP O
. . O O
LONDON NNP I-NP I-LOC
1996-08-30 CD I-NP O
West NNP I-NP I-MISC
Indian NNP I-NP I-MISC
all-rounder NN I-NP O
Phil NNP I-NP I-PER
Simmons NNP I-NP I-PER
took VBD I-VP O
four CD I-NP O
for IN I-PP O
38 CD I-NP O
on IN I-PP O
Friday NNP I-NP O
as IN I-PP O
Leicestershire NNP I-NP I-ORG
beat VBD I-VP O
Somerset NNP I-NP I-ORG
by IN I-PP O
an DT I-NP O
innings NN I-NP O
and CC O O
39 CD I-NP O
runs NNS I-NP O
in IN I-PP O
two CD I-NP O
days NNS I-NP O
to TO I-VP O
take VB I-VP O
over IN I-PP O
at IN B-PP O
the DT I-NP O
head NN I-NP O
of IN I-PP O
the DT I-NP O
county NN I-NP O
championship NN I-NP O
. . O O
Their PRP$ I-NP O
stay NN I-NP O
on IN I-PP O
top NN I-NP O
, , O O
though RB I-ADVP O
, , O O
may MD I-VP O
be VB I-VP O
short-lived JJ I-ADJP O
as IN I-PP O
title NN I-NP O
rivals NNS I-NP O
Essex NNP I-NP I-ORG
, , O O
Derbyshire NNP I-NP I-ORG
and CC I-NP O
Surrey NNP I-NP I-ORG
all DT O O
closed VBD I-VP O
in RP I-PRT O
on IN I-PP O
victory NN I-NP O
while IN I-SBAR O
Kent NNP I-NP I-ORG
made VBD I-VP O
up RP I-PRT O
for IN I-PP O
lost VBN I-NP O
time NN I-NP O
in IN I-PP O
their PRP$ I-NP O
rain-affected JJ I-NP O
match NN I-NP O
against IN I-PP O
Nottinghamshire NNP I-NP I-ORG
. . O O
After IN I-PP O
bowling VBG I-NP O
Somerset NNP I-NP I-ORG
out RP I-PRT O
for IN I-PP O
83 CD I-NP O
on IN I-PP O
the DT I-NP O
opening NN I-NP O
morning NN I-NP O
at IN I-PP O
Grace NNP I-NP I-LOC
Road NNP I-NP I-LOC
, , O O
Leicestershire NNP I-NP I-ORG
extended VBD I-VP O
their PRP$ I-NP O
first JJ I-NP O
innings NN I-NP O
by IN I-PP O
94 CD I-NP O
runs VBZ I-VP O
before IN I-PP O
being VBG I-VP O
bowled VBD I-VP O
out RP I-PRT O
for IN I-PP O
296 CD I-NP O
with IN I-PP O
England NNP I-NP I-LOC
discard VBP I-VP O
Andy NNP I-NP I-PER
Caddick NNP I-NP I-PER
taking VBG I-VP O
three CD I-NP O
for IN I-PP O
83 CD I-NP O
. . O O
fluid/sequence_tagging_for_ner/data/train
0 → 100644
浏览文件 @
c2ff52df
EU NNP I-NP I-ORG
rejects VBZ I-VP O
German JJ I-NP I-MISC
call NN I-NP O
to TO I-VP O
boycott VB I-VP O
British JJ I-NP I-MISC
lamb NN I-NP O
. . O O
Peter NNP I-NP I-PER
Blackburn NNP I-NP I-PER
BRUSSELS NNP I-NP I-LOC
1996-08-22 CD I-NP O
The DT I-NP O
European NNP I-NP I-ORG
Commission NNP I-NP I-ORG
said VBD I-VP O
on IN I-PP O
Thursday NNP I-NP O
it PRP B-NP O
disagreed VBD I-VP O
with IN I-PP O
German JJ I-NP I-MISC
advice NN I-NP O
to TO I-PP O
consumers NNS I-NP O
to TO I-VP O
shun VB I-VP O
British JJ I-NP I-MISC
lamb NN I-NP O
until IN I-SBAR O
scientists NNS I-NP O
determine VBP I-VP O
whether IN I-SBAR O
mad JJ I-NP O
cow NN I-NP O
disease NN I-NP O
can MD I-VP O
be VB I-VP O
transmitted VBN I-VP O
to TO I-PP O
sheep NN I-NP O
. . O O
Germany NNP I-NP I-LOC
's POS B-NP O
representative NN I-NP O
to TO I-PP O
the DT I-NP O
European NNP I-NP I-ORG
Union NNP I-NP I-ORG
's POS B-NP O
veterinary JJ I-NP O
committee NN I-NP O
Werner NNP I-NP I-PER
Zwingmann NNP I-NP I-PER
said VBD I-VP O
on IN I-PP O
Wednesday NNP I-NP O
consumers NNS I-NP O
should MD I-VP O
buy VB I-VP O
sheepmeat NN I-NP O
from IN I-PP O
countries NNS I-NP O
other JJ I-ADJP O
than IN I-PP O
Britain NNP I-NP I-LOC
until IN I-SBAR O
the DT I-NP O
scientific JJ I-NP O
advice NN I-NP O
was VBD I-VP O
clearer JJR I-ADJP O
. . O O
" " O O
We PRP I-NP O
do VBP I-VP O
n't RB I-VP O
support VB I-VP O
any DT I-NP O
such JJ I-NP O
recommendation NN I-NP O
because IN I-SBAR O
we PRP I-NP O
do VBP I-VP O
n't RB I-VP O
see VB I-VP O
any DT I-NP O
grounds NNS I-NP O
for IN I-PP O
it PRP I-NP O
, , O O
" " O O
the DT I-NP O
Commission NNP I-NP I-ORG
's POS B-NP O
chief JJ I-NP O
spokesman NN I-NP O
Nikolaus NNP I-NP I-PER
van NNP I-NP I-PER
der FW I-NP I-PER
Pas NNP I-NP I-PER
told VBD I-VP O
a DT I-NP O
news NN I-NP O
briefing NN I-NP O
. . O O
He PRP I-NP O
said VBD I-VP O
further JJ I-NP O
scientific JJ I-NP O
study NN I-NP O
was VBD I-VP O
required VBN I-VP O
and CC O O
if IN I-SBAR O
it PRP I-NP O
was VBD I-VP O
found VBN I-VP O
that IN I-SBAR O
action NN I-NP O
was VBD I-VP O
needed VBN I-VP O
it PRP I-NP O
should MD I-VP O
be VB I-VP O
taken VBN I-VP O
by IN I-PP O
the DT I-NP O
European NNP I-NP I-ORG
Union NNP I-NP I-ORG
. . O O
fluid/sequence_tagging_for_ner/reader.py
0 → 100644
浏览文件 @
c2ff52df
"""
Conll03 dataset.
"""
from
utils
import
*
__all__
=
[
"data_reader"
]
def
canonicalize_digits
(
word
):
if
any
([
c
.
isalpha
()
for
c
in
word
]):
return
word
word
=
re
.
sub
(
"\d"
,
"DG"
,
word
)
if
word
.
startswith
(
"DG"
):
word
=
word
.
replace
(
","
,
""
)
# remove thousands separator
return
word
def
canonicalize_word
(
word
,
wordset
=
None
,
digits
=
True
):
word
=
word
.
lower
()
if
digits
:
if
(
wordset
!=
None
)
and
(
word
in
wordset
):
return
word
word
=
canonicalize_digits
(
word
)
# try to canonicalize numbers
if
(
wordset
==
None
)
or
(
word
in
wordset
):
return
word
else
:
return
"UUUNKKK"
# unknown token
def
data_reader
(
data_file
,
word_dict
,
label_dict
):
"""
The dataset can be obtained according to http://www.clips.uantwerpen.be/conll2003/ner/.
It returns a reader creator, each sample in the reader includes:
word id sequence, label id sequence and raw sentence.
:return: reader creator
:rtype: callable
"""
def
reader
():
UNK_IDX
=
word_dict
[
"UUUNKKK"
]
sentence
=
[]
labels
=
[]
with
open
(
data_file
,
"r"
)
as
f
:
for
line
in
f
:
if
len
(
line
.
strip
())
==
0
:
if
len
(
sentence
)
>
0
:
word_idx
=
[
word_dict
.
get
(
canonicalize_word
(
w
,
word_dict
),
UNK_IDX
)
for
w
in
sentence
]
mark
=
[
1
if
w
[
0
].
isupper
()
else
0
for
w
in
sentence
]
label_idx
=
[
label_dict
[
l
]
for
l
in
labels
]
yield
word_idx
,
mark
,
label_idx
sentence
=
[]
labels
=
[]
else
:
segs
=
line
.
strip
().
split
()
sentence
.
append
(
segs
[
0
])
# transform I-TYPE to BIO schema
if
segs
[
-
1
]
!=
"O"
and
(
len
(
labels
)
==
0
or
labels
[
-
1
][
1
:]
!=
segs
[
-
1
][
1
:]):
labels
.
append
(
"B"
+
segs
[
-
1
][
1
:])
else
:
labels
.
append
(
segs
[
-
1
])
return
reader
fluid/sequence_tagging_for_ner/train.py
浏览文件 @
c2ff52df
import
os
import
math
import
time
import
numpy
as
np
import
paddle
.v2
as
paddle
import
paddle
import
paddle.fluid
as
fluid
import
reader
...
...
@@ -24,12 +25,19 @@ def test(exe, chunk_evaluator, inference_program, test_data, place):
return
chunk_evaluator
.
eval
(
exe
)
def
main
(
train_data_file
,
test_data_file
,
vocab_file
,
target_file
,
emb_file
,
model_save_dir
,
num_passes
,
use_gpu
,
parallel
):
def
main
(
train_data_file
,
test_data_file
,
vocab_file
,
target_file
,
emb_file
,
model_save_dir
,
num_passes
,
use_gpu
,
parallel
,
batch_size
=
200
):
if
not
os
.
path
.
exists
(
model_save_dir
):
os
.
mkdir
(
model_save_dir
)
BATCH_SIZE
=
200
word_dict
=
load_dict
(
vocab_file
)
label_dict
=
load_dict
(
target_file
)
...
...
@@ -53,60 +61,76 @@ def main(train_data_file, test_data_file, vocab_file, target_file, emb_file,
chunk_scheme
=
"IOB"
,
num_chunk_types
=
int
(
math
.
ceil
((
label_dict_len
-
1
)
/
2.0
)))
inference_program
=
fluid
.
default_main_program
().
clone
()
inference_program
=
fluid
.
default_main_program
().
clone
(
for_test
=
True
)
with
fluid
.
program_guard
(
inference_program
):
test_target
=
chunk_evaluator
.
metrics
+
chunk_evaluator
.
states
inference_program
=
fluid
.
io
.
get_inference_program
(
test_target
)
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
if
"CE_MODE_X"
not
in
os
.
environ
:
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
reader
.
data_reader
(
train_data_file
,
word_dict
,
label_dict
),
buf_size
=
20000
),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
reader
.
data_reader
(
test_data_file
,
word_dict
,
label_dict
),
buf_size
=
20000
),
batch_size
=
batch_size
)
else
:
train_reader
=
paddle
.
batch
(
reader
.
data_reader
(
train_data_file
,
word_dict
,
label_dict
),
buf_size
=
20000
),
batch_size
=
BATCH_SIZE
)
test_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
reader
.
data_reader
(
test_data_file
,
word_dict
,
label_dict
),
buf_size
=
20000
),
batch_size
=
BATCH_SIZE
)
batch_size
=
batch_size
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_gpu
else
fluid
.
CPUPlace
()
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
word
,
mark
,
target
],
place
=
place
)
exe
=
fluid
.
Executor
(
place
)
if
"CE_MODE_X"
in
os
.
environ
:
fluid
.
default_startup_program
().
random_seed
=
110
exe
.
run
(
fluid
.
default_startup_program
())
embedding_name
=
'emb'
embedding_param
=
fluid
.
global_scope
().
find_var
(
embedding_name
).
get_tensor
()
embedding_param
.
set
(
word_vector_values
,
place
)
batch_id
=
0
for
pass_id
in
xrange
(
num_passes
):
chunk_evaluator
.
reset
(
exe
)
for
data
in
train_reader
(
):
for
batch_id
,
data
in
enumerate
(
train_reader
()
):
cost
,
batch_precision
,
batch_recall
,
batch_f1_score
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
]
+
chunk_evaluator
.
metrics
)
if
batch_id
%
5
==
0
:
print
(
cost
)
print
(
"Pass "
+
str
(
pass_id
)
+
", Batch "
+
str
(
batch_id
)
+
", Cost "
+
str
(
cost
[
0
])
+
", Precision "
+
str
(
batch_precision
[
0
])
+
", Recall "
+
str
(
batch_recall
[
0
])
+
", F1_score"
+
str
(
batch_f1_score
[
0
]))
batch_id
=
batch_id
+
1
pass_precision
,
pass_recall
,
pass_f1_score
=
chunk_evaluator
.
eval
(
exe
)
print
(
"[TrainSet] pass_id:"
+
str
(
pass_id
)
+
" pass_precision:"
+
str
(
pass_precision
)
+
" pass_recall:"
+
str
(
pass_recall
)
+
" pass_f1_score:"
+
str
(
pass_f1_score
))
pass_precision
,
pass_recall
,
pass_f1_score
=
test
(
test_pass_precision
,
test_pass_recall
,
test_
pass_f1_score
=
test
(
exe
,
chunk_evaluator
,
inference_program
,
test_reader
,
place
)
print
(
"[TestSet] pass_id:"
+
str
(
pass_id
)
+
" pass_precision:"
+
str
(
pass_precision
)
+
" pass_recall:"
+
str
(
pass_recall
)
+
" pass_f1_score:"
+
str
(
pass_f1_score
))
test_pass_precision
)
+
" pass_recall:"
+
str
(
test_
pass_recall
)
+
" pass_f1_score:"
+
str
(
test_
pass_f1_score
))
save_dirname
=
os
.
path
.
join
(
model_save_dir
,
"params_pass_%d"
%
pass_id
)
fluid
.
io
.
save_inference_model
(
save_dirname
,
[
'word'
,
'mark'
,
'target'
],
[
crf_decode
],
exe
)
crf_decode
,
exe
)
if
(
"CE_MODE_X"
in
os
.
environ
)
and
(
pass_id
%
50
==
0
):
if
pass_id
>
0
:
print
(
"kpis train_precision %f"
%
pass_precision
)
print
(
"kpis test_precision %f"
%
test_pass_precision
)
print
(
"kpis train_duration %f"
%
(
time
.
time
()
-
time_begin
))
time_begin
=
time
.
time
()
if
__name__
==
"__main__"
:
...
...
@@ -118,5 +142,6 @@ if __name__ == "__main__":
emb_file
=
"data/wordVectors.txt"
,
model_save_dir
=
"models"
,
num_passes
=
1000
,
batch_size
=
1
,
use_gpu
=
False
,
parallel
=
False
)
fluid/sequence_tagging_for_ner/utils.py
0 → 100644
浏览文件 @
c2ff52df
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import
logging
import
os
import
re
import
argparse
import
numpy
as
np
from
collections
import
defaultdict
logger
=
logging
.
getLogger
(
"paddle"
)
logger
.
setLevel
(
logging
.
INFO
)
def
get_embedding
(
emb_file
=
'data/wordVectors.txt'
):
"""
Get the trained word vector.
"""
return
np
.
loadtxt
(
emb_file
,
dtype
=
float
)
def
load_dict
(
dict_path
):
"""
Load the word dictionary from the given file.
Each line of the given file is a word, which can include multiple columns
seperated by tab.
This function takes the first column (columns in a line are seperated by
tab) as key and takes line number of a line as the key (index of the word
in the dictionary).
"""
return
dict
((
line
.
strip
().
split
(
"
\t
"
)[
0
],
idx
)
for
idx
,
line
in
enumerate
(
open
(
dict_path
,
"r"
).
readlines
()))
def
load_reverse_dict
(
dict_path
):
"""
Load the word dictionary from the given file.
Each line of the given file is a word, which can include multiple columns
seperated by tab.
This function takes line number of a line as the key (index of the word in
the dictionary) and the first column (columns in a line are seperated by
tab) as the value.
"""
return
dict
((
idx
,
line
.
strip
().
split
(
"
\t
"
)[
0
])
for
idx
,
line
in
enumerate
(
open
(
dict_path
,
"r"
).
readlines
()))
fluid/text_classification/.run_ce.sh
0 → 100755
浏览文件 @
c2ff52df
###!/bin/bash
####This file is only used for continuous evaluation.
export
CE_MODE_X
=
1
python train.py cnn | python _ce.py
fluid/text_classification/_ce.py
0 → 100644
浏览文件 @
c2ff52df
####this file is only used for continuous evaluation test!
import
os
import
sys
sys
.
path
.
append
(
os
.
environ
[
'ceroot'
])
from
kpi
import
CostKpi
,
DurationKpi
,
AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_acc_kpi
=
AccKpi
(
'train_acc'
,
0.005
,
actived
=
True
)
train_cost_kpi
=
CostKpi
(
'train_cost'
,
0.005
,
actived
=
True
)
train_duration_kpi
=
DurationKpi
(
'train_duration'
,
0.05
,
actived
=
True
)
tracking_kpis
=
[
train_acc_kpi
,
train_cost_kpi
,
train_duration_kpi
,
]
def
parse_log
(
log
):
for
line
in
log
.
split
(
'
\n
'
):
fs
=
line
.
strip
().
split
(
'
\t
'
)
print
(
fs
)
if
len
(
fs
)
==
3
and
fs
[
0
]
==
'kpis'
:
print
(
"-----%s"
%
fs
)
kpi_name
=
fs
[
1
]
kpi_value
=
float
(
fs
[
2
])
yield
kpi_name
,
kpi_value
def
log_to_ce
(
log
):
kpi_tracker
=
{}
for
kpi
in
tracking_kpis
:
kpi_tracker
[
kpi
.
name
]
=
kpi
for
(
kpi_name
,
kpi_value
)
in
parse_log
(
log
):
print
(
kpi_name
,
kpi_value
)
kpi_tracker
[
kpi_name
].
add_record
(
kpi_value
)
kpi_tracker
[
kpi_name
].
persist
()
if
__name__
==
'__main__'
:
log
=
sys
.
stdin
.
read
()
print
(
"*****"
)
print
(
log
)
print
(
"****"
)
log_to_ce
(
log
)
fluid/text_classification/clouds/scdb_parallel_executor.py
浏览文件 @
c2ff52df
import
unittest
import
contextlib
import
paddle
import
paddle.fluid
as
fluid
import
paddle.v2
as
paddle
import
numpy
as
np
import
sys
import
time
...
...
fluid/text_classification/clouds/scdb_single_card.py
浏览文件 @
c2ff52df
import
unittest
import
contextlib
import
paddle
import
paddle.fluid
as
fluid
import
paddle.v2
as
paddle
import
numpy
as
np
import
sys
import
time
...
...
fluid/text_classification/infer.py
浏览文件 @
c2ff52df
...
...
@@ -4,8 +4,8 @@ import unittest
import
contextlib
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
import
paddle.v2
as
paddle
import
utils
...
...
fluid/text_classification/nets.py
浏览文件 @
c2ff52df
...
...
@@ -2,8 +2,8 @@ import sys
import
time
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
import
paddle.v2
as
paddle
def
bow_net
(
data
,
...
...
fluid/text_classification/train.py
浏览文件 @
c2ff52df
import
os
import
sys
import
time
import
unittest
import
contextlib
import
paddle
import
paddle.fluid
as
fluid
import
paddle.v2
as
paddle
import
utils
from
nets
import
bow_net
...
...
@@ -53,8 +54,12 @@ def train(train_reader,
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
data
,
label
],
place
=
place
)
# For internal continuous evaluation
if
"CE_MODE_X"
in
os
.
environ
:
fluid
.
default_startup_program
().
random_seed
=
110
exe
.
run
(
fluid
.
default_startup_program
())
for
pass_id
in
xrange
(
pass_num
):
pass_start
=
time
.
time
()
data_size
,
data_count
,
total_acc
,
total_cost
=
0
,
0
,
0.0
,
0.0
for
data
in
train_reader
():
avg_cost_np
,
avg_acc_np
=
exe
.
run
(
fluid
.
default_main_program
(),
...
...
@@ -73,6 +78,13 @@ def train(train_reader,
epoch_model
=
save_dirname
+
"/"
+
"epoch"
+
str
(
pass_id
)
fluid
.
io
.
save_inference_model
(
epoch_model
,
[
"words"
,
"label"
],
acc
,
exe
)
pass_end
=
time
.
time
()
# For internal continuous evaluation
if
"CE_MODE_X"
in
os
.
environ
:
print
(
"kpis train_acc %f"
%
avg_acc
)
print
(
"kpis train_cost %f"
%
avg_cost
)
print
(
"kpis train_duration %f"
%
(
pass_end
-
pass_start
))
def
train_net
():
word_dict
,
train_reader
,
test_reader
=
utils
.
prepare_data
(
...
...
fluid/text_classification/utils.py
浏览文件 @
c2ff52df
import
os
import
sys
import
time
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
import
paddle.v2
as
paddle
def
to_lodtensor
(
data
,
place
):
...
...
@@ -64,15 +65,22 @@ def prepare_data(data_type="imdb",
raise
RuntimeError
(
"No such dataset"
)
if
data_type
==
"imdb"
:
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
buf_size
=
buf_size
),
batch_size
=
batch_size
)
if
"CE_MODE_X"
in
os
.
environ
:
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
imdb
.
test
(
word_dict
),
buf_size
=
buf_size
),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
test
(
word_dict
),
batch_size
=
batch_size
)
else
:
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
buf_size
=
buf_size
),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
imdb
.
test
(
word_dict
),
buf_size
=
buf_size
),
batch_size
=
batch_size
)
else
:
raise
RuntimeError
(
"no such dataset"
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录