Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
d60116db
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
d60116db
编写于
2月 28, 2017
作者:
D
dangqingqing
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into srl_api_v2
上级
ba39e688
c6bfb712
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
124 addition
and
110 deletion
+124
-110
demo/mnist/api_train_v2.py
demo/mnist/api_train_v2.py
+4
-11
paddle/function/CosSimOpGpu.cu
paddle/function/CosSimOpGpu.cu
+0
-1
python/CMakeLists.txt
python/CMakeLists.txt
+2
-2
python/paddle/reader/tests/CMakeLists.txt
python/paddle/reader/tests/CMakeLists.txt
+0
-9
python/paddle/v2/__init__.py
python/paddle/v2/__init__.py
+4
-1
python/paddle/v2/dataset/__init__.py
python/paddle/v2/dataset/__init__.py
+3
-0
python/paddle/v2/dataset/mnist.py
python/paddle/v2/dataset/mnist.py
+11
-4
python/paddle/v2/reader/__init__.py
python/paddle/v2/reader/__init__.py
+0
-0
python/paddle/v2/reader/creator.py
python/paddle/v2/reader/creator.py
+0
-0
python/paddle/v2/reader/decorator.py
python/paddle/v2/reader/decorator.py
+23
-1
python/paddle/v2/reader/tests/CMakeLists.txt
python/paddle/v2/reader/tests/CMakeLists.txt
+3
-0
python/paddle/v2/reader/tests/__init__.py
python/paddle/v2/reader/tests/__init__.py
+0
-0
python/paddle/v2/reader/tests/creator_test.py
python/paddle/v2/reader/tests/creator_test.py
+6
-4
python/paddle/v2/reader/tests/decorator_test.py
python/paddle/v2/reader/tests/decorator_test.py
+14
-13
python/paddle/v2/reader/tests/run_tests.sh
python/paddle/v2/reader/tests/run_tests.sh
+35
-0
python/paddle/v2/reader/tests/test_data_creator.txt
python/paddle/v2/reader/tests/test_data_creator.txt
+0
-0
python/paddle/v2/trainer.py
python/paddle/v2/trainer.py
+16
-63
python/setup.py.in
python/setup.py.in
+3
-1
未找到文件。
demo/mnist/api_train_v2.py
浏览文件 @
d60116db
import
paddle.v2
as
paddle
import
mnist_util
def
train_reader
():
train_file
=
'./data/raw_data/train'
generator
=
mnist_util
.
read_from_mnist
(
train_file
)
for
item
in
generator
:
yield
item
def
main
():
paddle
.
init
(
use_gpu
=
False
,
trainer_count
=
1
)
...
...
@@ -40,11 +31,13 @@ def main():
trainer
=
paddle
.
trainer
.
SGD
(
update_equation
=
adam_optimizer
)
trainer
.
train
(
train_data_reader
=
train_reader
,
reader
=
paddle
.
reader
.
batched
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
8192
),
batch_size
=
32
),
cost
=
cost
,
parameters
=
parameters
,
event_handler
=
event_handler
,
batch_size
=
32
,
# batch size should be refactor in Data reader
reader_dict
=
{
images
.
name
:
0
,
label
.
name
:
1
})
...
...
paddle/function/CosSimOpGpu.cu
浏览文件 @
d60116db
...
...
@@ -92,7 +92,6 @@ void CosSimForward<DEVICE_TYPE_GPU>(GpuMatrix& out_mat,
CHECK
(
in1_mat
.
useGpu_
==
true
&&
in2_mat
.
useGpu_
==
true
)
<<
"Matrix type are not GPU"
;
size_t
num_samples
=
out_mat
.
getHeight
();
size_t
dim
=
in1_mat
.
getWidth
();
real
*
out
=
out_mat
.
getData
();
const
real
*
x
=
in1_mat
.
getData
();
...
...
python/CMakeLists.txt
浏览文件 @
d60116db
...
...
@@ -4,7 +4,7 @@ set(OUTPUT_DIR
file
(
GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py
)
file
(
GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py
)
file
(
GLOB UTILS_PY_FILES . ./paddle/utils/*.py
)
file
(
GLOB
V2_PY_FILES . ./paddle/v2/
*.py
)
file
(
GLOB
_RECURSE V2_PY_FILES ./paddle/v2/
*.py
)
set
(
PY_FILES paddle/__init__.py
${
TRAINER_PY_FILES
}
...
...
@@ -24,7 +24,7 @@ add_custom_target(paddle_python ALL DEPENDS
${
OUTPUT_DIR
}
/.timestamp
)
add_subdirectory
(
paddle/trainer_config_helpers/tests
)
add_subdirectory
(
paddle/reader/tests
)
add_subdirectory
(
paddle/
v2/
reader/tests
)
add_subdirectory
(
paddle/v2/tests
)
install
(
DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/dist/
...
...
python/paddle/reader/tests/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
ba39e688
add_test
(
NAME reader_decorator_test
COMMAND
${
PROJ_ROOT
}
/paddle/.set_python_path.sh -d
${
PROJ_ROOT
}
/python/
${
PYTHON_EXECUTABLE
}
${
PROJ_ROOT
}
/python/paddle/reader/tests/decorator_test.py
WORKING_DIRECTORY
${
PROJ_ROOT
}
/python/paddle
)
add_test
(
NAME reader_creator_test
COMMAND
${
PROJ_ROOT
}
/paddle/.set_python_path.sh -d
${
PROJ_ROOT
}
/python/
${
PYTHON_EXECUTABLE
}
${
PROJ_ROOT
}
/python/paddle/reader/tests/creator_test.py
WORKING_DIRECTORY
${
PROJ_ROOT
}
/python/paddle
)
python/paddle/v2/__init__.py
浏览文件 @
d60116db
...
...
@@ -20,13 +20,16 @@ import event
import
data_type
import
topology
import
data_feeder
from
.
import
dataset
from
.
import
reader
import
attr
import
pooling
import
py_paddle.swig_paddle
as
api
__all__
=
[
'optimizer'
,
'layer'
,
'activation'
,
'parameters'
,
'init'
,
'trainer'
,
'event'
,
'data_type'
,
'attr'
,
'pooling'
,
'data_feeder'
,
'topology'
'event'
,
'data_type'
,
'attr'
,
'pooling'
,
'data_feeder'
,
'dataset'
,
'reader'
,
'topology'
]
...
...
python/paddle/v2/dataset/__init__.py
浏览文件 @
d60116db
import
mnist
__all__
=
[
'mnist'
]
python/paddle/v2/dataset/mnist.py
浏览文件 @
d60116db
"""
MNIST dataset.
"""
import
numpy
import
paddle.v2.dataset.common
import
subprocess
import
numpy
import
platform
__all__
=
[
'train'
,
'test'
]
URL_PREFIX
=
'http://yann.lecun.com/exdb/mnist/'
...
...
@@ -20,12 +20,19 @@ TRAIN_LABEL_MD5 = 'd53e105ee54ea40749a09fcbcd1e9432'
def
reader_creator
(
image_filename
,
label_filename
,
buffer_size
):
def
reader
():
if
platform
.
system
()
==
'Darwin'
:
zcat_cmd
=
'gzcat'
elif
platform
.
system
()
==
'Linux'
:
zcat_cmd
=
'zcat'
else
:
raise
NotImplementedError
()
# According to http://stackoverflow.com/a/38061619/724872, we
# cannot use standard package gzip here.
m
=
subprocess
.
Popen
([
"zcat"
,
image_filename
],
stdout
=
subprocess
.
PIPE
)
m
=
subprocess
.
Popen
([
zcat_cmd
,
image_filename
],
stdout
=
subprocess
.
PIPE
)
m
.
stdout
.
read
(
16
)
# skip some magic bytes
l
=
subprocess
.
Popen
([
"zcat"
,
label_filename
],
stdout
=
subprocess
.
PIPE
)
l
=
subprocess
.
Popen
([
zcat_cmd
,
label_filename
],
stdout
=
subprocess
.
PIPE
)
l
.
stdout
.
read
(
8
)
# skip some magic bytes
while
True
:
...
...
python/paddle/reader/__init__.py
→
python/paddle/
v2/
reader/__init__.py
浏览文件 @
d60116db
文件已移动
python/paddle/reader/creator.py
→
python/paddle/
v2/
reader/creator.py
浏览文件 @
d60116db
文件已移动
python/paddle/reader/decorator.py
→
python/paddle/
v2/
reader/decorator.py
浏览文件 @
d60116db
...
...
@@ -14,7 +14,7 @@
__all__
=
[
'map_readers'
,
'buffered'
,
'compose'
,
'chain'
,
'shuffle'
,
'ComposeNotAligned'
'ComposeNotAligned'
,
'batched'
]
from
Queue
import
Queue
...
...
@@ -191,3 +191,25 @@ def buffered(reader, size):
e
=
q
.
get
()
return
data_reader
def
batched
(
reader
,
batch_size
):
"""
Create a batched reader.
:param reader: the data reader to read from.
:param batch_size: batch_size
:return: the batched reader.
"""
def
batched_reader
():
r
=
reader
()
batch
=
[]
for
instance
in
r
:
batch
.
append
(
instance
)
if
len
(
batch
)
==
batch_size
:
yield
batch
batch
=
[]
if
batch
:
yield
batch
return
batched_reader
python/paddle/v2/reader/tests/CMakeLists.txt
0 → 100644
浏览文件 @
d60116db
add_test
(
NAME reader_tests
COMMAND bash
${
PROJ_ROOT
}
/python/paddle/v2/reader/tests/run_tests.sh
${
PYTHON_EXECUTABLE
}
)
python/paddle/v2/reader/tests/__init__.py
0 → 100644
浏览文件 @
d60116db
python/paddle/reader/tests/creator_test.py
→
python/paddle/
v2/
reader/tests/creator_test.py
浏览文件 @
d60116db
...
...
@@ -11,17 +11,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
unittest
import
paddle.reader.creator
import
numpy
as
np
import
os
import
paddle.v2.reader.creator
class
TestNumpyArray
(
unittest
.
TestCase
):
def
test_numpy_array
(
self
):
l
=
[[
1
,
2
,
3
],
[
4
,
5
,
6
]]
x
=
np
.
array
(
l
,
np
.
int32
)
reader
=
paddle
.
reader
.
creator
.
np_array
(
x
)
reader
=
paddle
.
v2
.
reader
.
creator
.
np_array
(
x
)
for
idx
,
e
in
enumerate
(
reader
()):
self
.
assertItemsEqual
(
e
,
l
[
idx
])
...
...
@@ -29,7 +31,7 @@ class TestNumpyArray(unittest.TestCase):
class
TestTextFile
(
unittest
.
TestCase
):
def
test_text_file
(
self
):
path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"test_data_creator.txt"
)
reader
=
paddle
.
reader
.
creator
.
text_file
(
path
)
reader
=
paddle
.
v2
.
reader
.
creator
.
text_file
(
path
)
for
idx
,
e
in
enumerate
(
reader
()):
self
.
assertEqual
(
e
,
str
(
idx
*
2
)
+
" "
+
str
(
idx
*
2
+
1
))
...
...
python/paddle/reader/tests/decorator_test.py
→
python/paddle/
v2/
reader/tests/decorator_test.py
浏览文件 @
d60116db
...
...
@@ -11,9 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
paddle.reader
import
time
import
unittest
import
paddle.v2.reader
def
reader_creator_10
(
dur
):
...
...
@@ -37,7 +38,7 @@ class TestMap(unittest.TestCase):
yield
"h"
yield
"i"
r
=
paddle
.
reader
.
map_readers
(
tokenize
,
read
)
r
=
paddle
.
v2
.
reader
.
map_readers
(
tokenize
,
read
)
for
i
,
e
in
enumerate
(
r
()):
self
.
assertEqual
(
e
,
i
)
...
...
@@ -45,7 +46,7 @@ class TestMap(unittest.TestCase):
class
TestBuffered
(
unittest
.
TestCase
):
def
test_read
(
self
):
for
size
in
range
(
20
):
b
=
paddle
.
reader
.
buffered
(
reader_creator_10
(
0
),
size
)
b
=
paddle
.
v2
.
reader
.
buffered
(
reader_creator_10
(
0
),
size
)
c
=
0
for
i
in
b
():
self
.
assertEqual
(
i
,
c
)
...
...
@@ -54,7 +55,7 @@ class TestBuffered(unittest.TestCase):
def
test_buffering
(
self
):
# read have 30ms delay.
b
=
paddle
.
reader
.
buffered
(
reader_creator_10
(
0.03
),
10
)
b
=
paddle
.
v2
.
reader
.
buffered
(
reader_creator_10
(
0.03
),
10
)
last_time
=
time
.
time
()
for
idx
,
i
in
enumerate
(
b
()):
elapsed_time
=
time
.
time
()
-
last_time
...
...
@@ -68,17 +69,17 @@ class TestBuffered(unittest.TestCase):
class
TestCompose
(
unittest
.
TestCase
):
def
test_compse
(
self
):
reader
=
paddle
.
reader
.
compose
(
reader
=
paddle
.
v2
.
reader
.
compose
(
reader_creator_10
(
0
),
reader_creator_10
(
0
))
for
idx
,
e
in
enumerate
(
reader
()):
self
.
assertEqual
(
e
,
(
idx
,
idx
))
def
test_compose_not_aligned
(
self
):
total
=
0
reader
=
paddle
.
reader
.
compose
(
paddle
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
)),
reader
=
paddle
.
v2
.
reader
.
compose
(
paddle
.
v2
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
)),
reader_creator_10
(
0
))
with
self
.
assertRaises
(
paddle
.
reader
.
ComposeNotAligned
):
with
self
.
assertRaises
(
paddle
.
v2
.
reader
.
ComposeNotAligned
):
for
e
in
reader
():
total
+=
1
# expecting 10, not 20
...
...
@@ -86,8 +87,8 @@ class TestCompose(unittest.TestCase):
def
test_compose_not_aligned_no_check
(
self
):
total
=
0
reader
=
paddle
.
reader
.
compose
(
paddle
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
)),
reader
=
paddle
.
v2
.
reader
.
compose
(
paddle
.
v2
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
)),
reader_creator_10
(
0
),
check_alignment
=
False
)
for
e
in
reader
():
...
...
@@ -98,7 +99,7 @@ class TestCompose(unittest.TestCase):
class
TestChain
(
unittest
.
TestCase
):
def
test_chain
(
self
):
c
=
paddle
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
))
c
=
paddle
.
v2
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
))
idx
=
0
for
e
in
c
():
self
.
assertEqual
(
e
,
idx
%
10
)
...
...
@@ -111,7 +112,7 @@ class TestShuffle(unittest.TestCase):
case
=
[(
0
,
True
),
(
1
,
True
),
(
10
,
False
),
(
100
,
False
)]
a
=
reader_creator_10
(
0
)
for
size
,
checkEq
in
case
:
s
=
paddle
.
reader
.
shuffle
(
a
,
size
)
s
=
paddle
.
v2
.
reader
.
shuffle
(
a
,
size
)
total
=
0
for
idx
,
e
in
enumerate
(
s
()):
if
checkEq
:
...
...
python/paddle/v2/reader/tests/run_tests.sh
0 → 100755
浏览文件 @
d60116db
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pushd
`
dirname
$0
`
>
/dev/null
SCRIPTPATH
=
$PWD
popd
>
/dev/null
cd
$SCRIPTPATH
$1
-m
pip
install
../../../../../paddle/dist/
*
.whl
test_list
=
"creator_test.py decorator_test.py"
export
PYTHONPATH
=
$PWD
/../../../../../python/
for
fn
in
$test_list
do
echo
"test
$fn
"
$1
$fn
if
[
$?
-ne
0
]
;
then
exit
1
fi
done
python/paddle/reader/tests/test_data_creator.txt
→
python/paddle/
v2/
reader/tests/test_data_creator.txt
浏览文件 @
d60116db
文件已移动
python/paddle/v2/trainer.py
浏览文件 @
d60116db
...
...
@@ -27,19 +27,13 @@ class ITrainer(object):
The interface of Trainer. The only exposed method is `train`.
"""
def
train
(
self
,
train_data_reader
,
cost
,
parameters
,
test_data_reader
=
None
,
event_handler
=
None
):
def
train
(
self
,
reader
,
topology
,
parameters
,
event_handler
=
None
):
"""
train method.
:param
train_data_
reader:
:param
cost
:
:param reader:
:param
topology
:
:param parameters:
:param test_data_reader:
:param event_handler:
:return:
"""
...
...
@@ -61,26 +55,22 @@ class SGD(ITrainer):
self
.
__optimizer__
=
update_equation
def
train
(
self
,
train_data_
reader
,
reader
,
cost
,
parameters
,
num_passes
=
1
,
test_data_reader
=
None
,
event_handler
=
None
,
batch_size
=
32
,
reader_dict
=
None
):
"""
Training method. Will train num_passes of input data.
:param
train_data_
reader:
:param
cost: cost layers, to be optimized
.
:param reader:
:param
topology: Network Topology, use one or more Layers to represent it
.
:param parameters: The parameter pools.
:param num_passes: The total train passes.
:param test_data_reader:
:param event_handler: Event handler. A method will be invoked when event
occurred.
:type event_handler: (BaseEvent) => None
:param batch_size: Not important, will be removed after data refactor.
:return:
"""
if
event_handler
is
None
:
...
...
@@ -112,9 +102,9 @@ class SGD(ITrainer):
event_handler
(
v2_event
.
BeginPass
(
pass_id
))
pass_evaluator
.
start
()
updater
.
startPass
()
for
batch_id
,
data_batch
in
enumerate
(
__data_reader_to_batch__
(
train_data_reader
,
batch_size
,
topology
)):
for
batch_id
,
data_batch
in
enumerate
(
reader
()):
pass_type
=
updater
.
startBatch
(
len
(
data_batch
))
gm
.
forwardBackward
(
feeder
(
data_batch
),
out_args
,
pass_type
)
batch_evaluator
.
start
()
event_handler
(
v2_event
.
BeginIteration
(
...
...
@@ -144,56 +134,19 @@ class SGD(ITrainer):
gm
.
finish
()
def
__data_reader_to_batch__
(
reader
,
batch_size
,
topology
):
"""
This function is not important, and will be removed when data refactored.
"""
def
input_reorder
(
func
):
for
item
in
func
():
retv
=
[]
for
__layer_name__
in
topology
.
proto
().
input_layer_names
:
retv
.
append
(
item
[
__layer_name__
])
yield
retv
return
__generator_to_batch__
(
input_reorder
(
reader
),
batch_size
=
batch_size
)
def
__generator_to_batch__
(
generator
,
batch_size
):
"""
This function is not important, and will be removed when data refactored.
"""
ret_val
=
list
()
for
each_item
in
generator
:
ret_val
.
append
(
each_item
)
if
len
(
ret_val
)
==
batch_size
:
yield
ret_val
ret_val
=
list
()
if
len
(
ret_val
)
!=
0
:
yield
ret_val
def
__check_train_args__
(
train_data_reader
,
topology
,
parameters
,
test_data_reader
,
event_handler
,
**
kwargs
):
def
__check_train_args__
(
reader
,
topology
,
parameters
,
event_handler
,
**
kwargs
):
"""
Check train function's argument types
"""
if
not
callable
(
train_data_reader
)
or
not
isinstance
(
train_data_reader
(),
collections
.
Iterator
):
raise
ValueError
(
'train_data_reader should be a function, '
'which can return a iterator'
)
if
test_data_reader
is
not
None
:
if
not
callable
(
test_data_reader
)
or
not
isinstance
(
test_data_reader
(),
collections
.
Iterator
):
raise
ValueError
(
'test_data_reader should be a function, which can '
'return a iterator'
)
if
not
callable
(
reader
)
or
not
isinstance
(
reader
(),
collections
.
Iterator
):
raise
TypeError
(
'train_data_reader should be a function, '
'which can return a iterator'
)
if
not
isinstance
(
topology
,
Topology
):
raise
Valu
eError
(
'topology should be a model config'
)
raise
Typ
eError
(
'topology should be a model config'
)
if
not
isinstance
(
parameters
,
v2_parameters
.
Parameters
):
raise
Valu
eError
(
'parameters should be a parameter pool'
)
raise
Typ
eError
(
'parameters should be a parameter pool'
)
if
not
callable
(
event_handler
):
raise
Valu
eError
(
'event handler should be a function'
)
raise
Typ
eError
(
'event handler should be a function'
)
python/setup.py.in
浏览文件 @
d60116db
...
...
@@ -5,7 +5,9 @@ packages=['paddle',
'paddle.trainer',
'paddle.trainer_config_helpers',
'paddle.utils',
'paddle.v2']
'paddle.v2',
'paddle.v2.dataset',
'paddle.v2.reader']
setup(name='paddle',
version='${PADDLE_VERSION}',
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录