Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
book
提交
20fbfb0c
B
book
项目概览
PaddlePaddle
/
book
通知
16
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
40
列表
看板
标记
里程碑
合并请求
37
Wiki
5
Wiki
分析
仓库
DevOps
项目成员
Pages
B
book
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
40
Issue
40
列表
看板
标记
里程碑
合并请求
37
合并请求
37
Pages
分析
分析
仓库分析
DevOps
Wiki
5
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
20fbfb0c
编写于
12月 04, 2018
作者:
L
lujun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
ses-1,fix review for pr-644,test=develop
上级
3d0283e5
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
177 addition
and
170 deletion
+177
-170
01.fit_a_line/README.cn.md
01.fit_a_line/README.cn.md
+60
-56
01.fit_a_line/index.cn.html
01.fit_a_line/index.cn.html
+60
-56
01.fit_a_line/train.py
01.fit_a_line/train.py
+57
-58
未找到文件。
01.fit_a_line/README.cn.md
浏览文件 @
20fbfb0c
...
...
@@ -106,6 +106,8 @@ import numpy
import
math
import
sys
from
__future__
import
print_function
import
os
os
.
environ
[
'CPU_NUM'
]
=
'1'
```
我们通过uci_housing模块引入了数据集合
[
UCI Housing Data Set
](
https://archive.ics.uci.edu/ml/datasets/Housing
)
...
...
@@ -115,7 +117,7 @@ from __future__ import print_function
1.
数据下载的过程。下载数据保存在~/.cache/paddle/dataset/uci_housing/housing.data。
2.
[
数据预处理
](
#数据预处理
)
的过程。
接下来我们定义了用于训练的数据提供器。提供器每次读入一个大小为
`BATCH_SIZE`
的数据批次。如果用户希望加一些随机性,
她
可以同时定义一个批次大小和一个缓存大小。这样的话,每次数据提供器会从缓存中随机读取批次大小那么多的数据。
接下来我们定义了用于训练的数据提供器。提供器每次读入一个大小为
`BATCH_SIZE`
的数据批次。如果用户希望加一些随机性,
它
可以同时定义一个批次大小和一个缓存大小。这样的话,每次数据提供器会从缓存中随机读取批次大小那么多的数据。
```
python
BATCH_SIZE
=
20
...
...
@@ -124,6 +126,11 @@ train_reader = paddle.batch(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
uci_housing
.
train
(),
buf_size
=
500
),
batch_size
=
BATCH_SIZE
)
test_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
uci_housing
.
test
(),
buf_size
=
500
),
batch_size
=
BATCH_SIZE
)
```
### 配置训练程序
...
...
@@ -134,6 +141,9 @@ x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y
=
fluid
.
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
dtype
=
'float32'
)
y_predict
=
fluid
.
layers
.
fc
(
input
=
x
,
size
=
1
,
act
=
None
)
main_program
=
fluid
.
default_main_program
()
startup_program
=
fluid
.
default_startup_program
()
cost
=
fluid
.
layers
.
square_error_cost
(
input
=
y_predict
,
label
=
y
)
avg_loss
=
fluid
.
layers
.
mean
(
cost
)
```
...
...
@@ -145,6 +155,9 @@ avg_loss = fluid.layers.mean(cost)
```
python
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.001
)
sgd_optimizer
.
minimize
(
avg_loss
)
#clone a test_program
test_program
=
main_program
.
clone
(
for_test
=
True
)
```
### 定义运算场所
...
...
@@ -153,13 +166,16 @@ sgd_optimizer.minimize(avg_loss)
```
python
use_cuda
=
False
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
ParallelExecutor
(
use_cuda
,
main_program
=
main_program
)
```
除此之外,还可以通过画图,来展现
`训练进程`
:
```
python
# Plot data
from
paddle.
v2
.plot
import
Ploter
from
paddle.
utils
.plot
import
Ploter
train_title
=
"Train cost"
test_title
=
"Test cost"
...
...
@@ -171,22 +187,19 @@ plot_cost = Ploter(train_title, test_title)
训练需要有一个训练程序和一些必要参数,并构建了一个获取训练过程中测试误差的函数。
```
python
exe
=
fluid
.
Executor
(
place
)
num_epochs
=
100
# For training test cost
def
train_test
(
train_program
,
feeder
):
exe_test
=
fluid
.
Executor
(
place
)
def
train_test
(
executor
,
reader
,
feeder
,
fetch_list
):
accumulated
=
1
*
[
0
]
count
=
0
test_program
=
train_program
.
clone
(
for_test
=
True
)
for
data_test
in
test_reader
():
outs
=
exe_test
.
run
(
program
=
test_program
,
feed
=
feeder
.
feed
(
data_test
),
fetch_list
=
[
avg_loss
])
for
data_test
in
reader
():
outs
=
executor
.
run
(
feed
=
feeder
.
feed
(
data_test
),
fetch_list
=
fetch_list
)
accumulated
=
[
x_c
[
0
]
+
x_c
[
1
][
0
]
for
x_c
in
zip
(
accumulated
,
outs
)]
count
+=
1
return
[
x_d
/
count
for
x_d
in
accumulated
]
```
### 训练主循环
...
...
@@ -194,55 +207,46 @@ PaddlePaddle提供了读取数据者发生器机制来读取训练数据。读
如果训练顺利,可以把训练参数保存到
`params_dirname`
。
```
python
%
matplotlib
inline
# Specify the directory to save the parameters
params_dirname
=
"fit_a_line.inference.model"
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
x
,
y
])
naive_exe
=
fluid
.
Executor
(
place
)
naive_exe
.
run
(
startup_program
)
step
=
0
exe_test
=
fluid
.
ParallelExecutor
(
use_cuda
,
main_program
=
test_program
,
share_vars_from
=
exe
)
# main train loop.
def
train_loop
(
main_program
):
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
x
,
y
])
feeder_test
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
x
,
y
])
exe
.
run
(
fluid
.
default_startup_program
())
step
=
0
for
pass_id
in
range
(
num_epochs
):
for
pass_id
in
range
(
num_epochs
):
for
data_train
in
train_reader
():
avg_loss_value
,
=
exe
.
run
(
main_program
,
feed
=
feeder
.
feed
(
data_train
),
fetch_list
=
[
avg_loss
])
avg_loss_value
,
=
exe
.
run
(
feed
=
feeder
.
feed
(
data_train
),
fetch_list
=
[
avg_loss
.
name
])
if
step
%
10
==
0
:
# record a train cost every 10 batches
plot_cost
.
append
(
train_title
,
step
,
avg_loss_value
[
0
])
plot_cost
.
plot
()
if
step
%
100
==
0
:
# record a test cost every 100 batches
test_metics
=
train_test
(
train_program
=
main_program
,
feeder
=
feeder_test
)
test_metics
=
train_test
(
executor
=
exe_test
,
reader
=
test_reader
,
fetch_list
=
[
avg_loss
.
name
],
feeder
=
feeder
)
plot_cost
.
append
(
test_title
,
step
,
test_metics
[
0
])
plot_cost
.
plot
()
# If the accuracy is good enough, we can stop the training.
if
test_metics
[
0
]
<
10.0
:
return
break
step
+=
1
if
math
.
isnan
(
float
(
avg_loss_value
)):
if
math
.
isnan
(
float
(
avg_loss_value
[
0
]
)):
sys
.
exit
(
"got NaN loss, training failed."
)
if
params_dirname
is
not
None
:
# We can save the trained parameters for the inferences later
fluid
.
io
.
save_inference_model
(
params_dirname
,
[
'x'
],
[
y_predict
],
exe
)
```
### 开始训练
```
python
%
matplotlib
inline
# The training could take up to a few minutes.
train_loop
(
fluid
.
default_main_program
())
[
y_predict
],
naive_exe
)
```
## 预测
需要构建一个使用训练好的参数来进行预测的程序,训练好的参数位置在
`params_dirname`
。
...
...
@@ -260,7 +264,7 @@ inference_scope = fluid.core.Scope()
```
python
with
fluid
.
scope_guard
(
inference_scope
):
[
inference_program
,
feed_target_names
,
fetch_targets
]
=
fluid
.
io
.
load_inference_model
(
params_dirname
,
exe
)
fetch_targets
]
=
fluid
.
io
.
load_inference_model
(
params_dirname
,
infer_
exe
)
batch_size
=
10
infer_reader
=
paddle
.
batch
(
...
...
01.fit_a_line/index.cn.html
浏览文件 @
20fbfb0c
...
...
@@ -148,6 +148,8 @@ import numpy
import math
import sys
from __future__ import print_function
import os
os.environ['CPU_NUM'] = '1'
```
我们通过uci_housing模块引入了数据集合[UCI Housing Data Set](https://archive.ics.uci.edu/ml/datasets/Housing)
...
...
@@ -157,7 +159,7 @@ from __future__ import print_function
1. 数据下载的过程。下载数据保存在~/.cache/paddle/dataset/uci_housing/housing.data。
2. [数据预处理](#数据预处理)的过程。
接下来我们定义了用于训练的数据提供器。提供器每次读入一个大小为`BATCH_SIZE`的数据批次。如果用户希望加一些随机性,
她
可以同时定义一个批次大小和一个缓存大小。这样的话,每次数据提供器会从缓存中随机读取批次大小那么多的数据。
接下来我们定义了用于训练的数据提供器。提供器每次读入一个大小为`BATCH_SIZE`的数据批次。如果用户希望加一些随机性,
它
可以同时定义一个批次大小和一个缓存大小。这样的话,每次数据提供器会从缓存中随机读取批次大小那么多的数据。
```python
BATCH_SIZE = 20
...
...
@@ -166,6 +168,11 @@ train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.uci_housing.train(), buf_size=500),
batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.uci_housing.test(), buf_size=500),
batch_size=BATCH_SIZE)
```
### 配置训练程序
...
...
@@ -176,6 +183,9 @@ x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
main_program = fluid.default_main_program()
startup_program = fluid.default_startup_program()
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_loss = fluid.layers.mean(cost)
```
...
...
@@ -187,6 +197,9 @@ avg_loss = fluid.layers.mean(cost)
```python
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
sgd_optimizer.minimize(avg_loss)
#clone a test_program
test_program = main_program.clone(for_test=True)
```
### 定义运算场所
...
...
@@ -195,13 +208,16 @@ sgd_optimizer.minimize(avg_loss)
```python
use_cuda = False
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.ParallelExecutor(use_cuda, main_program=main_program)
```
除此之外,还可以通过画图,来展现`训练进程`:
```python
# Plot data
from paddle.
v2
.plot import Ploter
from paddle.
utils
.plot import Ploter
train_title = "Train cost"
test_title = "Test cost"
...
...
@@ -213,22 +229,19 @@ plot_cost = Ploter(train_title, test_title)
训练需要有一个训练程序和一些必要参数,并构建了一个获取训练过程中测试误差的函数。
```python
exe = fluid.Executor(place)
num_epochs = 100
# For training test cost
def train_test(train_program, feeder):
exe_test = fluid.Executor(place)
def train_test(executor, reader, feeder, fetch_list):
accumulated = 1 * [0]
count = 0
test_program = train_program.clone(for_test=True)
for data_test in test_reader():
outs = exe_test.run(program=test_program,
feed=feeder.feed(data_test),
fetch_list=[avg_loss])
for data_test in reader():
outs = executor.run(feed=feeder.feed(data_test),
fetch_list=fetch_list)
accumulated = [x_c[0] + x_c[1][0] for x_c in zip(accumulated, outs)]
count += 1
return [x_d / count for x_d in accumulated]
```
### 训练主循环
...
...
@@ -236,55 +249,46 @@ PaddlePaddle提供了读取数据者发生器机制来读取训练数据。读
如果训练顺利,可以把训练参数保存到`params_dirname`。
```python
%matplotlib inline
# Specify the directory to save the parameters
params_dirname = "fit_a_line.inference.model"
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
naive_exe = fluid.Executor(place)
naive_exe.run(startup_program)
step = 0
exe_test = fluid.ParallelExecutor(use_cuda,
main_program=test_program,
share_vars_from=exe)
# main train loop.
def train_loop(main_program):
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
feeder_test = fluid.DataFeeder(place=place, feed_list=[x, y])
exe.run(fluid.default_startup_program())
step = 0
for pass_id in range(num_epochs):
for pass_id in range(num_epochs):
for data_train in train_reader():
avg_loss_value, = exe.run(main_program,
feed=feeder.feed(data_train),
fetch_list=[avg_loss])
avg_loss_value, = exe.run(feed=feeder.feed(data_train),
fetch_list=[avg_loss.name])
if step % 10 == 0: # record a train cost every 10 batches
plot_cost.append(train_title, step, avg_loss_value[0])
plot_cost.plot()
if step % 100 == 0: # record a test cost every 100 batches
test_metics = train_test(train_program=main_program,
feeder=feeder_test)
test_metics = train_test(executor=exe_test,
reader=test_reader,
fetch_list=[avg_loss.name],
feeder=feeder)
plot_cost.append(test_title, step, test_metics[0])
plot_cost.plot()
# If the accuracy is good enough, we can stop the training.
if test_metics[0]
<
10.0
:
return
break
step
+=
1
if
math.isnan
(
float
(
avg_loss_value
))
:
if
math.isnan
(
float
(
avg_loss_value
[0]
))
:
sys.exit
("
got
NaN
loss
,
training
failed.
")
if
params_dirname
is
not
None:
#
We
can
save
the
trained
parameters
for
the
inferences
later
fluid.io.save_inference_model
(
params_dirname
,
['
x
'],
[
y_predict
],
exe
)
```
###
开始训练
```
python
%
matplotlib
inline
#
The
training
could
take
up
to
a
few
minutes.
train_loop
(
fluid.default_main_program
())
[
y_predict
],
naive_exe
)
```
##
预测
需要构建一个使用训练好的参数来进行预测的程序
,
训练好的参数位置在
`
params_dirname
`。
...
...
@@ -302,7 +306,7 @@ inference_scope = fluid.core.Scope()
```
python
with
fluid.scope_guard
(
inference_scope
)
:
[
inference_program
,
feed_target_names
,
fetch_targets] =
fluid.io.load_inference_model(params_dirname,
exe
)
fetch_targets] =
fluid.io.load_inference_model(params_dirname,
infer_
exe
)
batch_size =
10
infer_reader =
paddle.batch(
...
...
01.fit_a_line/train.py
浏览文件 @
20fbfb0c
...
...
@@ -19,10 +19,22 @@ import paddle.fluid as fluid
import
numpy
import
math
import
sys
import
os
os
.
environ
[
'CPU_NUM'
]
=
'1'
def
main
():
# For training test cost
def
train_test
(
executor
,
reader
,
feeder
,
fetch_list
):
accumulated
=
1
*
[
0
]
count
=
0
for
data_test
in
reader
():
outs
=
executor
.
run
(
feed
=
feeder
.
feed
(
data_test
),
fetch_list
=
fetch_list
)
accumulated
=
[
x_c
[
0
]
+
x_c
[
1
][
0
]
for
x_c
in
zip
(
accumulated
,
outs
)]
count
+=
1
return
[
x_d
/
count
for
x_d
in
accumulated
]
def
main
():
batch_size
=
20
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
uci_housing
.
train
(),
buf_size
=
500
),
...
...
@@ -37,7 +49,7 @@ def main():
y_predict
=
fluid
.
layers
.
fc
(
input
=
x
,
size
=
1
,
act
=
None
)
main_program
=
fluid
.
default_main_program
()
star_program
=
fluid
.
default_startup_program
()
star
tup
_program
=
fluid
.
default_startup_program
()
cost
=
fluid
.
layers
.
square_error_cost
(
input
=
y_predict
,
label
=
y
)
avg_loss
=
fluid
.
layers
.
mean
(
cost
)
...
...
@@ -50,73 +62,60 @@ def main():
# can use CPU or GPU
use_cuda
=
False
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
ParallelExecutor
(
use_cuda
,
main_program
=
main_program
)
# Specify the directory to save the parameters
params_dirname
=
"fit_a_line.inference.model"
num_epochs
=
100
# For training test cost
def
train_test
(
program
,
feeder
):
exe_test
=
fluid
.
Executor
(
place
)
accumulated
=
1
*
[
0
]
count
=
0
for
data_test
in
test_reader
():
outs
=
exe_test
.
run
(
program
=
program
,
feed
=
feeder
.
feed
(
data_test
),
fetch_list
=
[
avg_loss
])
accumulated
=
[
x_c
[
0
]
+
x_c
[
1
][
0
]
for
x_c
in
zip
(
accumulated
,
outs
)]
count
+=
1
return
[
x_d
/
count
for
x_d
in
accumulated
]
num_epochs
=
200
# main train loop.
def
train_loop
():
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
x
,
y
])
feeder_test
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
x
,
y
]
)
exe
.
run
(
star
_program
)
naive_exe
=
fluid
.
Executor
(
place
)
naive_exe
.
run
(
startup
_program
)
train_title
=
"Train cost"
test_title
=
"Test cost"
train_prompt
=
"Train cost"
test_prompt
=
"Test cost"
step
=
0
exe_test
=
fluid
.
ParallelExecutor
(
use_cuda
,
main_program
=
test_program
,
share_vars_from
=
exe
)
for
pass_id
in
range
(
num_epochs
):
for
data_train
in
train_reader
():
avg_loss_value
,
=
exe
.
run
(
main_program
,
feed
=
feeder
.
feed
(
data_train
),
fetch_list
=
[
avg_loss
])
feed
=
feeder
.
feed
(
data_train
),
fetch_list
=
[
avg_loss
.
name
])
if
step
%
10
==
0
:
# record a train cost every 10 batches
print
(
"%s, Step %d, Cost %f"
%
(
train_title
,
step
,
avg_loss_value
[
0
]))
(
train_prompt
,
step
,
avg_loss_value
[
0
]))
if
step
%
100
==
0
:
# record a test cost every 100 batches
test_metics
=
train_test
(
program
=
test_program
,
feeder
=
feeder_test
)
executor
=
exe_test
,
reader
=
test_reader
,
fetch_list
=
[
avg_loss
.
name
],
feeder
=
feeder
)
print
(
"%s, Step %d, Cost %f"
%
(
test_title
,
step
,
test_metics
[
0
]))
(
test_prompt
,
step
,
test_metics
[
0
]))
# If the accuracy is good enough, we can stop the training.
if
test_metics
[
0
]
<
10.0
:
return
break
step
+=
1
if
math
.
isnan
(
float
(
avg_loss_value
)):
if
math
.
isnan
(
float
(
avg_loss_value
[
0
]
)):
sys
.
exit
(
"got NaN loss, training failed."
)
if
params_dirname
is
not
None
:
# We can save the trained parameters for the inferences later
fluid
.
io
.
save_inference_model
(
params_dirname
,
[
'x'
],
[
y_predict
],
exe
)
train_loop
()
fluid
.
io
.
save_inference_model
(
params_dirname
,
[
'x'
],
[
y_predict
],
naive_exe
)
infer_exe
=
fluid
.
Executor
(
place
)
inference_scope
=
fluid
.
core
.
Scope
()
# infer
with
fluid
.
scope_guard
(
inference_scope
):
[
inference_program
,
feed_target_names
,
fetch_targets
]
=
fluid
.
io
.
load_inference_model
(
params_dirname
,
exe
)
[
inference_program
,
feed_target_names
,
fetch_targets
]
=
fluid
.
io
.
load_inference_model
(
params_dirname
,
infer_
exe
)
batch_size
=
10
infer_reader
=
paddle
.
batch
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录