Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
b265ccab
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b265ccab
编写于
2月 28, 2017
作者:
Y
Yu Yang
提交者:
GitHub
2月 28, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1464 from reyoung/feature/clean_mnist_v2
Combine Reader/Feeder together in trainer.train
上级
ce325996
eee1320b
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
124 addition
and
109 deletion
+124
-109
demo/mnist/api_train_v2.py
demo/mnist/api_train_v2.py
+4
-11
python/CMakeLists.txt
python/CMakeLists.txt
+2
-2
python/paddle/reader/tests/CMakeLists.txt
python/paddle/reader/tests/CMakeLists.txt
+0
-9
python/paddle/v2/__init__.py
python/paddle/v2/__init__.py
+4
-1
python/paddle/v2/dataset/__init__.py
python/paddle/v2/dataset/__init__.py
+3
-0
python/paddle/v2/dataset/mnist.py
python/paddle/v2/dataset/mnist.py
+11
-4
python/paddle/v2/reader/__init__.py
python/paddle/v2/reader/__init__.py
+0
-0
python/paddle/v2/reader/creator.py
python/paddle/v2/reader/creator.py
+0
-0
python/paddle/v2/reader/decorator.py
python/paddle/v2/reader/decorator.py
+23
-1
python/paddle/v2/reader/tests/CMakeLists.txt
python/paddle/v2/reader/tests/CMakeLists.txt
+3
-0
python/paddle/v2/reader/tests/__init__.py
python/paddle/v2/reader/tests/__init__.py
+0
-0
python/paddle/v2/reader/tests/creator_test.py
python/paddle/v2/reader/tests/creator_test.py
+6
-4
python/paddle/v2/reader/tests/decorator_test.py
python/paddle/v2/reader/tests/decorator_test.py
+14
-13
python/paddle/v2/reader/tests/run_tests.sh
python/paddle/v2/reader/tests/run_tests.sh
+35
-0
python/paddle/v2/reader/tests/test_data_creator.txt
python/paddle/v2/reader/tests/test_data_creator.txt
+0
-0
python/paddle/v2/trainer.py
python/paddle/v2/trainer.py
+16
-63
python/setup.py.in
python/setup.py.in
+3
-1
未找到文件。
demo/mnist/api_train_v2.py
浏览文件 @
b265ccab
import
paddle.v2
as
paddle
import
mnist_util
def
train_reader
():
train_file
=
'./data/raw_data/train'
generator
=
mnist_util
.
read_from_mnist
(
train_file
)
for
item
in
generator
:
yield
item
def
main
():
paddle
.
init
(
use_gpu
=
False
,
trainer_count
=
1
)
...
...
@@ -40,11 +31,13 @@ def main():
trainer
=
paddle
.
trainer
.
SGD
(
update_equation
=
adam_optimizer
)
trainer
.
train
(
train_data_reader
=
train_reader
,
reader
=
paddle
.
reader
.
batched
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
8192
),
batch_size
=
32
),
cost
=
cost
,
parameters
=
parameters
,
event_handler
=
event_handler
,
batch_size
=
32
,
# batch size should be refactor in Data reader
reader_dict
=
{
images
.
name
:
0
,
label
.
name
:
1
})
...
...
python/CMakeLists.txt
浏览文件 @
b265ccab
...
...
@@ -4,7 +4,7 @@ set(OUTPUT_DIR
file
(
GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py
)
file
(
GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py
)
file
(
GLOB UTILS_PY_FILES . ./paddle/utils/*.py
)
file
(
GLOB
V2_PY_FILES . ./paddle/v2/
*.py
)
file
(
GLOB
_RECURSE V2_PY_FILES ./paddle/v2/
*.py
)
set
(
PY_FILES paddle/__init__.py
${
TRAINER_PY_FILES
}
...
...
@@ -24,7 +24,7 @@ add_custom_target(paddle_python ALL DEPENDS
${
OUTPUT_DIR
}
/.timestamp
)
add_subdirectory
(
paddle/trainer_config_helpers/tests
)
add_subdirectory
(
paddle/reader/tests
)
add_subdirectory
(
paddle/
v2/
reader/tests
)
add_subdirectory
(
paddle/v2/tests
)
install
(
DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/dist/
...
...
python/paddle/reader/tests/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
ce325996
add_test
(
NAME reader_decorator_test
COMMAND
${
PROJ_ROOT
}
/paddle/.set_python_path.sh -d
${
PROJ_ROOT
}
/python/
${
PYTHON_EXECUTABLE
}
${
PROJ_ROOT
}
/python/paddle/reader/tests/decorator_test.py
WORKING_DIRECTORY
${
PROJ_ROOT
}
/python/paddle
)
add_test
(
NAME reader_creator_test
COMMAND
${
PROJ_ROOT
}
/paddle/.set_python_path.sh -d
${
PROJ_ROOT
}
/python/
${
PYTHON_EXECUTABLE
}
${
PROJ_ROOT
}
/python/paddle/reader/tests/creator_test.py
WORKING_DIRECTORY
${
PROJ_ROOT
}
/python/paddle
)
python/paddle/v2/__init__.py
浏览文件 @
b265ccab
...
...
@@ -20,13 +20,16 @@ import event
import
data_type
import
topology
import
data_feeder
from
.
import
dataset
from
.
import
reader
import
attr
import
pooling
import
py_paddle.swig_paddle
as
api
__all__
=
[
'optimizer'
,
'layer'
,
'activation'
,
'parameters'
,
'init'
,
'trainer'
,
'event'
,
'data_type'
,
'attr'
,
'pooling'
,
'data_feeder'
,
'topology'
'event'
,
'data_type'
,
'attr'
,
'pooling'
,
'data_feeder'
,
'dataset'
,
'reader'
,
'topology'
]
...
...
python/paddle/v2/dataset/__init__.py
浏览文件 @
b265ccab
import
mnist
__all__
=
[
'mnist'
]
python/paddle/v2/dataset/mnist.py
浏览文件 @
b265ccab
"""
MNIST dataset.
"""
import
numpy
import
paddle.v2.dataset.common
import
subprocess
import
numpy
import
platform
__all__
=
[
'train'
,
'test'
]
URL_PREFIX
=
'http://yann.lecun.com/exdb/mnist/'
...
...
@@ -20,12 +20,19 @@ TRAIN_LABEL_MD5 = 'd53e105ee54ea40749a09fcbcd1e9432'
def
reader_creator
(
image_filename
,
label_filename
,
buffer_size
):
def
reader
():
if
platform
.
system
()
==
'Darwin'
:
zcat_cmd
=
'gzcat'
elif
platform
.
system
()
==
'Linux'
:
zcat_cmd
=
'zcat'
else
:
raise
NotImplementedError
()
# According to http://stackoverflow.com/a/38061619/724872, we
# cannot use standard package gzip here.
m
=
subprocess
.
Popen
([
"zcat"
,
image_filename
],
stdout
=
subprocess
.
PIPE
)
m
=
subprocess
.
Popen
([
zcat_cmd
,
image_filename
],
stdout
=
subprocess
.
PIPE
)
m
.
stdout
.
read
(
16
)
# skip some magic bytes
l
=
subprocess
.
Popen
([
"zcat"
,
label_filename
],
stdout
=
subprocess
.
PIPE
)
l
=
subprocess
.
Popen
([
zcat_cmd
,
label_filename
],
stdout
=
subprocess
.
PIPE
)
l
.
stdout
.
read
(
8
)
# skip some magic bytes
while
True
:
...
...
python/paddle/reader/__init__.py
→
python/paddle/
v2/
reader/__init__.py
浏览文件 @
b265ccab
文件已移动
python/paddle/reader/creator.py
→
python/paddle/
v2/
reader/creator.py
浏览文件 @
b265ccab
文件已移动
python/paddle/reader/decorator.py
→
python/paddle/
v2/
reader/decorator.py
浏览文件 @
b265ccab
...
...
@@ -14,7 +14,7 @@
__all__
=
[
'map_readers'
,
'buffered'
,
'compose'
,
'chain'
,
'shuffle'
,
'ComposeNotAligned'
'ComposeNotAligned'
,
'batched'
]
from
Queue
import
Queue
...
...
@@ -191,3 +191,25 @@ def buffered(reader, size):
e
=
q
.
get
()
return
data_reader
def
batched
(
reader
,
batch_size
):
"""
Create a batched reader.
:param reader: the data reader to read from.
:param batch_size: batch_size
:return: the batched reader.
"""
def
batched_reader
():
r
=
reader
()
batch
=
[]
for
instance
in
r
:
batch
.
append
(
instance
)
if
len
(
batch
)
==
batch_size
:
yield
batch
batch
=
[]
if
batch
:
yield
batch
return
batched_reader
python/paddle/v2/reader/tests/CMakeLists.txt
0 → 100644
浏览文件 @
b265ccab
add_test
(
NAME reader_tests
COMMAND bash
${
PROJ_ROOT
}
/python/paddle/v2/reader/tests/run_tests.sh
${
PYTHON_EXECUTABLE
}
)
python/paddle/v2/reader/tests/__init__.py
0 → 100644
浏览文件 @
b265ccab
python/paddle/reader/tests/creator_test.py
→
python/paddle/
v2/
reader/tests/creator_test.py
浏览文件 @
b265ccab
...
...
@@ -11,17 +11,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
unittest
import
paddle.reader.creator
import
numpy
as
np
import
os
import
paddle.v2.reader.creator
class
TestNumpyArray
(
unittest
.
TestCase
):
def
test_numpy_array
(
self
):
l
=
[[
1
,
2
,
3
],
[
4
,
5
,
6
]]
x
=
np
.
array
(
l
,
np
.
int32
)
reader
=
paddle
.
reader
.
creator
.
np_array
(
x
)
reader
=
paddle
.
v2
.
reader
.
creator
.
np_array
(
x
)
for
idx
,
e
in
enumerate
(
reader
()):
self
.
assertItemsEqual
(
e
,
l
[
idx
])
...
...
@@ -29,7 +31,7 @@ class TestNumpyArray(unittest.TestCase):
class
TestTextFile
(
unittest
.
TestCase
):
def
test_text_file
(
self
):
path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"test_data_creator.txt"
)
reader
=
paddle
.
reader
.
creator
.
text_file
(
path
)
reader
=
paddle
.
v2
.
reader
.
creator
.
text_file
(
path
)
for
idx
,
e
in
enumerate
(
reader
()):
self
.
assertEqual
(
e
,
str
(
idx
*
2
)
+
" "
+
str
(
idx
*
2
+
1
))
...
...
python/paddle/reader/tests/decorator_test.py
→
python/paddle/
v2/
reader/tests/decorator_test.py
浏览文件 @
b265ccab
...
...
@@ -11,9 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
paddle.reader
import
time
import
unittest
import
paddle.v2.reader
def
reader_creator_10
(
dur
):
...
...
@@ -37,7 +38,7 @@ class TestMap(unittest.TestCase):
yield
"h"
yield
"i"
r
=
paddle
.
reader
.
map_readers
(
tokenize
,
read
)
r
=
paddle
.
v2
.
reader
.
map_readers
(
tokenize
,
read
)
for
i
,
e
in
enumerate
(
r
()):
self
.
assertEqual
(
e
,
i
)
...
...
@@ -45,7 +46,7 @@ class TestMap(unittest.TestCase):
class
TestBuffered
(
unittest
.
TestCase
):
def
test_read
(
self
):
for
size
in
range
(
20
):
b
=
paddle
.
reader
.
buffered
(
reader_creator_10
(
0
),
size
)
b
=
paddle
.
v2
.
reader
.
buffered
(
reader_creator_10
(
0
),
size
)
c
=
0
for
i
in
b
():
self
.
assertEqual
(
i
,
c
)
...
...
@@ -54,7 +55,7 @@ class TestBuffered(unittest.TestCase):
def
test_buffering
(
self
):
# read have 30ms delay.
b
=
paddle
.
reader
.
buffered
(
reader_creator_10
(
0.03
),
10
)
b
=
paddle
.
v2
.
reader
.
buffered
(
reader_creator_10
(
0.03
),
10
)
last_time
=
time
.
time
()
for
idx
,
i
in
enumerate
(
b
()):
elapsed_time
=
time
.
time
()
-
last_time
...
...
@@ -68,17 +69,17 @@ class TestBuffered(unittest.TestCase):
class
TestCompose
(
unittest
.
TestCase
):
def
test_compse
(
self
):
reader
=
paddle
.
reader
.
compose
(
reader
=
paddle
.
v2
.
reader
.
compose
(
reader_creator_10
(
0
),
reader_creator_10
(
0
))
for
idx
,
e
in
enumerate
(
reader
()):
self
.
assertEqual
(
e
,
(
idx
,
idx
))
def
test_compose_not_aligned
(
self
):
total
=
0
reader
=
paddle
.
reader
.
compose
(
paddle
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
)),
reader
=
paddle
.
v2
.
reader
.
compose
(
paddle
.
v2
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
)),
reader_creator_10
(
0
))
with
self
.
assertRaises
(
paddle
.
reader
.
ComposeNotAligned
):
with
self
.
assertRaises
(
paddle
.
v2
.
reader
.
ComposeNotAligned
):
for
e
in
reader
():
total
+=
1
# expecting 10, not 20
...
...
@@ -86,8 +87,8 @@ class TestCompose(unittest.TestCase):
def
test_compose_not_aligned_no_check
(
self
):
total
=
0
reader
=
paddle
.
reader
.
compose
(
paddle
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
)),
reader
=
paddle
.
v2
.
reader
.
compose
(
paddle
.
v2
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
)),
reader_creator_10
(
0
),
check_alignment
=
False
)
for
e
in
reader
():
...
...
@@ -98,7 +99,7 @@ class TestCompose(unittest.TestCase):
class
TestChain
(
unittest
.
TestCase
):
def
test_chain
(
self
):
c
=
paddle
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
))
c
=
paddle
.
v2
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
))
idx
=
0
for
e
in
c
():
self
.
assertEqual
(
e
,
idx
%
10
)
...
...
@@ -111,7 +112,7 @@ class TestShuffle(unittest.TestCase):
case
=
[(
0
,
True
),
(
1
,
True
),
(
10
,
False
),
(
100
,
False
)]
a
=
reader_creator_10
(
0
)
for
size
,
checkEq
in
case
:
s
=
paddle
.
reader
.
shuffle
(
a
,
size
)
s
=
paddle
.
v2
.
reader
.
shuffle
(
a
,
size
)
total
=
0
for
idx
,
e
in
enumerate
(
s
()):
if
checkEq
:
...
...
python/paddle/v2/reader/tests/run_tests.sh
0 → 100755
浏览文件 @
b265ccab
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pushd
`
dirname
$0
`
>
/dev/null
SCRIPTPATH
=
$PWD
popd
>
/dev/null
cd
$SCRIPTPATH
$1
-m
pip
install
../../../../../paddle/dist/
*
.whl
test_list
=
"creator_test.py decorator_test.py"
export
PYTHONPATH
=
$PWD
/../../../../../python/
for
fn
in
$test_list
do
echo
"test
$fn
"
$1
$fn
if
[
$?
-ne
0
]
;
then
exit
1
fi
done
python/paddle/reader/tests/test_data_creator.txt
→
python/paddle/
v2/
reader/tests/test_data_creator.txt
浏览文件 @
b265ccab
文件已移动
python/paddle/v2/trainer.py
浏览文件 @
b265ccab
...
...
@@ -27,19 +27,13 @@ class ITrainer(object):
The interface of Trainer. The only exposed method is `train`.
"""
def
train
(
self
,
train_data_reader
,
cost
,
parameters
,
test_data_reader
=
None
,
event_handler
=
None
):
def
train
(
self
,
reader
,
topology
,
parameters
,
event_handler
=
None
):
"""
train method.
:param
train_data_
reader:
:param
cost
:
:param reader:
:param
topology
:
:param parameters:
:param test_data_reader:
:param event_handler:
:return:
"""
...
...
@@ -61,26 +55,22 @@ class SGD(ITrainer):
self
.
__optimizer__
=
update_equation
def
train
(
self
,
train_data_
reader
,
reader
,
cost
,
parameters
,
num_passes
=
1
,
test_data_reader
=
None
,
event_handler
=
None
,
batch_size
=
32
,
reader_dict
=
None
):
"""
Training method. Will train num_passes of input data.
:param
train_data_
reader:
:param
cost: cost layers, to be optimized
.
:param reader:
:param
topology: Network Topology, use one or more Layers to represent it
.
:param parameters: The parameter pools.
:param num_passes: The total train passes.
:param test_data_reader:
:param event_handler: Event handler. A method will be invoked when event
occurred.
:type event_handler: (BaseEvent) => None
:param batch_size: Not important, will be removed after data refactor.
:return:
"""
if
event_handler
is
None
:
...
...
@@ -112,9 +102,9 @@ class SGD(ITrainer):
event_handler
(
v2_event
.
BeginPass
(
pass_id
))
pass_evaluator
.
start
()
updater
.
startPass
()
for
batch_id
,
data_batch
in
enumerate
(
__data_reader_to_batch__
(
train_data_reader
,
batch_size
,
topology
)):
for
batch_id
,
data_batch
in
enumerate
(
reader
()):
pass_type
=
updater
.
startBatch
(
len
(
data_batch
))
gm
.
forwardBackward
(
feeder
(
data_batch
),
out_args
,
pass_type
)
batch_evaluator
.
start
()
event_handler
(
v2_event
.
BeginIteration
(
...
...
@@ -144,56 +134,19 @@ class SGD(ITrainer):
gm
.
finish
()
def
__data_reader_to_batch__
(
reader
,
batch_size
,
topology
):
"""
This function is not important, and will be removed when data refactored.
"""
def
input_reorder
(
func
):
for
item
in
func
():
retv
=
[]
for
__layer_name__
in
topology
.
proto
().
input_layer_names
:
retv
.
append
(
item
[
__layer_name__
])
yield
retv
return
__generator_to_batch__
(
input_reorder
(
reader
),
batch_size
=
batch_size
)
def
__generator_to_batch__
(
generator
,
batch_size
):
"""
This function is not important, and will be removed when data refactored.
"""
ret_val
=
list
()
for
each_item
in
generator
:
ret_val
.
append
(
each_item
)
if
len
(
ret_val
)
==
batch_size
:
yield
ret_val
ret_val
=
list
()
if
len
(
ret_val
)
!=
0
:
yield
ret_val
def
__check_train_args__
(
train_data_reader
,
topology
,
parameters
,
test_data_reader
,
event_handler
,
**
kwargs
):
def
__check_train_args__
(
reader
,
topology
,
parameters
,
event_handler
,
**
kwargs
):
"""
Check train function's argument types
"""
if
not
callable
(
train_data_reader
)
or
not
isinstance
(
train_data_reader
(),
collections
.
Iterator
):
raise
ValueError
(
'train_data_reader should be a function, '
'which can return a iterator'
)
if
test_data_reader
is
not
None
:
if
not
callable
(
test_data_reader
)
or
not
isinstance
(
test_data_reader
(),
collections
.
Iterator
):
raise
ValueError
(
'test_data_reader should be a function, which can '
'return a iterator'
)
if
not
callable
(
reader
)
or
not
isinstance
(
reader
(),
collections
.
Iterator
):
raise
TypeError
(
'train_data_reader should be a function, '
'which can return a iterator'
)
if
not
isinstance
(
topology
,
Topology
):
raise
Valu
eError
(
'topology should be a model config'
)
raise
Typ
eError
(
'topology should be a model config'
)
if
not
isinstance
(
parameters
,
v2_parameters
.
Parameters
):
raise
Valu
eError
(
'parameters should be a parameter pool'
)
raise
Typ
eError
(
'parameters should be a parameter pool'
)
if
not
callable
(
event_handler
):
raise
Valu
eError
(
'event handler should be a function'
)
raise
Typ
eError
(
'event handler should be a function'
)
python/setup.py.in
浏览文件 @
b265ccab
...
...
@@ -5,7 +5,9 @@ packages=['paddle',
'paddle.trainer',
'paddle.trainer_config_helpers',
'paddle.utils',
'paddle.v2']
'paddle.v2',
'paddle.v2.dataset',
'paddle.v2.reader']
setup(name='paddle',
version='${PADDLE_VERSION}',
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录