Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
0eba01c0
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0eba01c0
编写于
2月 28, 2017
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'feature/tester' into feature/recommendation_v2_api
上级
bdee792a
cb9d156b
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
209 addition
and
147 deletion
+209
-147
.travis.yml
.travis.yml
+1
-1
demo/mnist/api_train_v2.py
demo/mnist/api_train_v2.py
+18
-25
python/CMakeLists.txt
python/CMakeLists.txt
+2
-2
python/paddle/reader/tests/CMakeLists.txt
python/paddle/reader/tests/CMakeLists.txt
+0
-9
python/paddle/v2/__init__.py
python/paddle/v2/__init__.py
+3
-1
python/paddle/v2/dataset/__init__.py
python/paddle/v2/dataset/__init__.py
+3
-0
python/paddle/v2/event.py
python/paddle/v2/event.py
+9
-1
python/paddle/v2/reader/__init__.py
python/paddle/v2/reader/__init__.py
+0
-0
python/paddle/v2/reader/creator.py
python/paddle/v2/reader/creator.py
+0
-0
python/paddle/v2/reader/decorator.py
python/paddle/v2/reader/decorator.py
+23
-1
python/paddle/v2/reader/tests/CMakeLists.txt
python/paddle/v2/reader/tests/CMakeLists.txt
+3
-0
python/paddle/v2/reader/tests/__init__.py
python/paddle/v2/reader/tests/__init__.py
+0
-0
python/paddle/v2/reader/tests/creator_test.py
python/paddle/v2/reader/tests/creator_test.py
+6
-4
python/paddle/v2/reader/tests/decorator_test.py
python/paddle/v2/reader/tests/decorator_test.py
+14
-13
python/paddle/v2/reader/tests/run_tests.sh
python/paddle/v2/reader/tests/run_tests.sh
+35
-0
python/paddle/v2/reader/tests/test_data_creator.txt
python/paddle/v2/reader/tests/test_data_creator.txt
+0
-0
python/paddle/v2/trainer.py
python/paddle/v2/trainer.py
+84
-88
python/setup.py.in
python/setup.py.in
+8
-2
未找到文件。
.travis.yml
浏览文件 @
0eba01c0
...
...
@@ -57,7 +57,7 @@ before_install:
-
if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
# Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
# protobuf version.
-
pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx_rtd_theme virtualenv pre-commit requests==2.9.2 LinkChecker
-
pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx_rtd_theme virtualenv pre-commit requests==2.9.2 LinkChecker
'scikit-learn>=0.18.0' 'scipy>=0.18.0'
script
:
-
paddle/scripts/travis/main.sh
notifications
:
...
...
demo/mnist/api_train_v2.py
浏览文件 @
0eba01c0
import
numpy
import
paddle.v2
as
paddle
import
mnist_util
def
train_reader
():
train_file
=
'./data/raw_data/train'
generator
=
mnist_util
.
read_from_mnist
(
train_file
)
for
item
in
generator
:
yield
item
def
main
():
paddle
.
init
(
use_gpu
=
False
,
trainer_count
=
1
)
...
...
@@ -30,27 +21,29 @@ def main():
adam_optimizer
=
paddle
.
optimizer
.
Adam
(
learning_rate
=
0.01
)
trainer
=
paddle
.
trainer
.
SGD
(
topology
=
cost
,
parameters
=
parameters
,
update_equation
=
adam_optimizer
)
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
100
==
0
:
print
"Pass %d, Batch %d, Cost %f, %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
)
if
event
.
batch_id
%
1000
==
0
:
result
=
trainer
.
test
(
reader
=
paddle
.
reader
.
batched
(
paddle
.
dataset
.
mnist
.
test_creator
(),
batch_size
=
256
))
print
"Pass %d, Batch %d, Cost %f, %s, Testing metrics %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
,
result
.
metrics
)
else
:
pass
trainer
=
paddle
.
trainer
.
SGD
(
update_equation
=
adam_optimizer
)
trainer
.
train
(
train_data_reader
=
train_reader
,
topology
=
cost
,
parameters
=
parameters
,
event_handler
=
event_handler
,
batch_size
=
32
,
# batch size should be refactor in Data reader
data_types
=
[
# data_types will be removed, It should be in
# network topology
(
'pixel'
,
images
.
type
),
(
'label'
,
label
.
type
)],
reader_dict
=
{
'pixel'
:
0
,
'label'
:
1
}
)
trainer
.
train
(
reader
=
paddle
.
reader
.
batched
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train_creator
(),
buf_size
=
8192
),
batch_size
=
32
),
event_handler
=
event_handler
)
if
__name__
==
'__main__'
:
...
...
python/CMakeLists.txt
浏览文件 @
0eba01c0
...
...
@@ -4,7 +4,7 @@ set(OUTPUT_DIR
file
(
GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py
)
file
(
GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py
)
file
(
GLOB UTILS_PY_FILES . ./paddle/utils/*.py
)
file
(
GLOB
V2_PY_FILES . ./paddle/v2/
*.py
)
file
(
GLOB
_RECURSE V2_PY_FILES ./paddle/v2/
*.py
)
set
(
PY_FILES paddle/__init__.py
${
TRAINER_PY_FILES
}
...
...
@@ -24,7 +24,7 @@ add_custom_target(paddle_python ALL DEPENDS
${
OUTPUT_DIR
}
/.timestamp
)
add_subdirectory
(
paddle/trainer_config_helpers/tests
)
add_subdirectory
(
paddle/reader/tests
)
add_subdirectory
(
paddle/
v2/
reader/tests
)
add_subdirectory
(
paddle/v2/tests
)
install
(
DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
/dist/
...
...
python/paddle/reader/tests/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
bdee792a
add_test
(
NAME reader_decorator_test
COMMAND
${
PROJ_ROOT
}
/paddle/.set_python_path.sh -d
${
PROJ_ROOT
}
/python/
${
PYTHON_EXECUTABLE
}
${
PROJ_ROOT
}
/python/paddle/reader/tests/decorator_test.py
WORKING_DIRECTORY
${
PROJ_ROOT
}
/python/paddle
)
add_test
(
NAME reader_creator_test
COMMAND
${
PROJ_ROOT
}
/paddle/.set_python_path.sh -d
${
PROJ_ROOT
}
/python/
${
PYTHON_EXECUTABLE
}
${
PROJ_ROOT
}
/python/paddle/reader/tests/creator_test.py
WORKING_DIRECTORY
${
PROJ_ROOT
}
/python/paddle
)
python/paddle/v2/__init__.py
浏览文件 @
0eba01c0
...
...
@@ -19,13 +19,15 @@ import trainer
import
event
import
data_type
import
data_feeder
from
.
import
dataset
from
.
import
reader
import
attr
import
pooling
import
py_paddle.swig_paddle
as
api
__all__
=
[
'optimizer'
,
'layer'
,
'activation'
,
'parameters'
,
'init'
,
'trainer'
,
'event'
,
'data_type'
,
'attr'
,
'pooling'
,
'data_feeder'
'event'
,
'data_type'
,
'attr'
,
'pooling'
,
'data_feeder'
,
'dataset'
,
'reader'
]
...
...
python/paddle/v2/dataset/__init__.py
浏览文件 @
0eba01c0
import
mnist
__all__
=
[
'mnist'
]
python/paddle/v2/event.py
浏览文件 @
0eba01c0
...
...
@@ -11,7 +11,10 @@ There are:
TODO(yuyang18): Complete it!
"""
import
py_paddle.swig_paddle
as
api
__all__
=
[
'EndIteration'
,
'BeginIteration'
,
'BeginPass'
,
'EndPass'
]
__all__
=
[
'EndIteration'
,
'BeginIteration'
,
'BeginPass'
,
'EndPass'
,
'TestResult'
]
class
WithMetric
(
object
):
...
...
@@ -30,6 +33,11 @@ class WithMetric(object):
return
retv
class
TestResult
(
WithMetric
):
def
__init__
(
self
,
evaluator
):
super
(
TestResult
,
self
).
__init__
(
evaluator
)
class
BeginPass
(
object
):
"""
Event On One Pass Training Start.
...
...
python/paddle/reader/__init__.py
→
python/paddle/
v2/
reader/__init__.py
浏览文件 @
0eba01c0
文件已移动
python/paddle/reader/creator.py
→
python/paddle/
v2/
reader/creator.py
浏览文件 @
0eba01c0
文件已移动
python/paddle/reader/decorator.py
→
python/paddle/
v2/
reader/decorator.py
浏览文件 @
0eba01c0
...
...
@@ -14,7 +14,7 @@
__all__
=
[
'map_readers'
,
'buffered'
,
'compose'
,
'chain'
,
'shuffle'
,
'ComposeNotAligned'
'ComposeNotAligned'
,
'batched'
]
from
Queue
import
Queue
...
...
@@ -191,3 +191,25 @@ def buffered(reader, size):
e
=
q
.
get
()
return
data_reader
def
batched
(
reader
,
batch_size
):
"""
Create a batched reader.
:param reader: the data reader to read from.
:param batch_size: batch_size
:return: the batched reader.
"""
def
__impl__
():
r
=
reader
()
batch
=
[]
for
instance
in
r
:
batch
.
append
(
instance
)
if
len
(
batch
)
==
batch_size
:
yield
batch
batch
=
[]
if
batch
:
yield
batch
return
__impl__
python/paddle/v2/reader/tests/CMakeLists.txt
0 → 100644
浏览文件 @
0eba01c0
add_test
(
NAME reader_tests
COMMAND bash
${
PROJ_ROOT
}
/python/paddle/v2/reader/tests/run_tests.sh
${
PYTHON_EXECUTABLE
}
)
python/paddle/v2/reader/tests/__init__.py
0 → 100644
浏览文件 @
0eba01c0
python/paddle/reader/tests/creator_test.py
→
python/paddle/
v2/
reader/tests/creator_test.py
浏览文件 @
0eba01c0
...
...
@@ -11,17 +11,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
unittest
import
paddle.reader.creator
import
numpy
as
np
import
os
import
paddle.v2.reader.creator
class
TestNumpyArray
(
unittest
.
TestCase
):
def
test_numpy_array
(
self
):
l
=
[[
1
,
2
,
3
],
[
4
,
5
,
6
]]
x
=
np
.
array
(
l
,
np
.
int32
)
reader
=
paddle
.
reader
.
creator
.
np_array
(
x
)
reader
=
paddle
.
v2
.
reader
.
creator
.
np_array
(
x
)
for
idx
,
e
in
enumerate
(
reader
()):
self
.
assertItemsEqual
(
e
,
l
[
idx
])
...
...
@@ -29,7 +31,7 @@ class TestNumpyArray(unittest.TestCase):
class
TestTextFile
(
unittest
.
TestCase
):
def
test_text_file
(
self
):
path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"test_data_creator.txt"
)
reader
=
paddle
.
reader
.
creator
.
text_file
(
path
)
reader
=
paddle
.
v2
.
reader
.
creator
.
text_file
(
path
)
for
idx
,
e
in
enumerate
(
reader
()):
self
.
assertEqual
(
e
,
str
(
idx
*
2
)
+
" "
+
str
(
idx
*
2
+
1
))
...
...
python/paddle/reader/tests/decorator_test.py
→
python/paddle/
v2/
reader/tests/decorator_test.py
浏览文件 @
0eba01c0
...
...
@@ -11,9 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
paddle.reader
import
time
import
unittest
import
paddle.v2.reader
def
reader_creator_10
(
dur
):
...
...
@@ -37,7 +38,7 @@ class TestMap(unittest.TestCase):
yield
"h"
yield
"i"
r
=
paddle
.
reader
.
map_readers
(
tokenize
,
read
)
r
=
paddle
.
v2
.
reader
.
map_readers
(
tokenize
,
read
)
for
i
,
e
in
enumerate
(
r
()):
self
.
assertEqual
(
e
,
i
)
...
...
@@ -45,7 +46,7 @@ class TestMap(unittest.TestCase):
class
TestBuffered
(
unittest
.
TestCase
):
def
test_read
(
self
):
for
size
in
range
(
20
):
b
=
paddle
.
reader
.
buffered
(
reader_creator_10
(
0
),
size
)
b
=
paddle
.
v2
.
reader
.
buffered
(
reader_creator_10
(
0
),
size
)
c
=
0
for
i
in
b
():
self
.
assertEqual
(
i
,
c
)
...
...
@@ -54,7 +55,7 @@ class TestBuffered(unittest.TestCase):
def
test_buffering
(
self
):
# read have 30ms delay.
b
=
paddle
.
reader
.
buffered
(
reader_creator_10
(
0.03
),
10
)
b
=
paddle
.
v2
.
reader
.
buffered
(
reader_creator_10
(
0.03
),
10
)
last_time
=
time
.
time
()
for
idx
,
i
in
enumerate
(
b
()):
elapsed_time
=
time
.
time
()
-
last_time
...
...
@@ -68,17 +69,17 @@ class TestBuffered(unittest.TestCase):
class
TestCompose
(
unittest
.
TestCase
):
def
test_compse
(
self
):
reader
=
paddle
.
reader
.
compose
(
reader
=
paddle
.
v2
.
reader
.
compose
(
reader_creator_10
(
0
),
reader_creator_10
(
0
))
for
idx
,
e
in
enumerate
(
reader
()):
self
.
assertEqual
(
e
,
(
idx
,
idx
))
def
test_compose_not_aligned
(
self
):
total
=
0
reader
=
paddle
.
reader
.
compose
(
paddle
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
)),
reader
=
paddle
.
v2
.
reader
.
compose
(
paddle
.
v2
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
)),
reader_creator_10
(
0
))
with
self
.
assertRaises
(
paddle
.
reader
.
ComposeNotAligned
):
with
self
.
assertRaises
(
paddle
.
v2
.
reader
.
ComposeNotAligned
):
for
e
in
reader
():
total
+=
1
# expecting 10, not 20
...
...
@@ -86,8 +87,8 @@ class TestCompose(unittest.TestCase):
def
test_compose_not_aligned_no_check
(
self
):
total
=
0
reader
=
paddle
.
reader
.
compose
(
paddle
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
)),
reader
=
paddle
.
v2
.
reader
.
compose
(
paddle
.
v2
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
)),
reader_creator_10
(
0
),
check_alignment
=
False
)
for
e
in
reader
():
...
...
@@ -98,7 +99,7 @@ class TestCompose(unittest.TestCase):
class
TestChain
(
unittest
.
TestCase
):
def
test_chain
(
self
):
c
=
paddle
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
))
c
=
paddle
.
v2
.
reader
.
chain
(
reader_creator_10
(
0
),
reader_creator_10
(
0
))
idx
=
0
for
e
in
c
():
self
.
assertEqual
(
e
,
idx
%
10
)
...
...
@@ -111,7 +112,7 @@ class TestShuffle(unittest.TestCase):
case
=
[(
0
,
True
),
(
1
,
True
),
(
10
,
False
),
(
100
,
False
)]
a
=
reader_creator_10
(
0
)
for
size
,
checkEq
in
case
:
s
=
paddle
.
reader
.
shuffle
(
a
,
size
)
s
=
paddle
.
v2
.
reader
.
shuffle
(
a
,
size
)
total
=
0
for
idx
,
e
in
enumerate
(
s
()):
if
checkEq
:
...
...
python/paddle/v2/reader/tests/run_tests.sh
0 → 100755
浏览文件 @
0eba01c0
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pushd
`
dirname
$0
`
>
/dev/null
SCRIPTPATH
=
$PWD
popd
>
/dev/null
cd
$SCRIPTPATH
$1
-m
pip
install
../../../../../paddle/dist/
*
.whl
test_list
=
"creator_test.py decorator_test.py"
export
PYTHONPATH
=
$PWD
/../../../../../python/
for
fn
in
$test_list
do
echo
"test
$fn
"
$1
$fn
if
[
$?
-ne
0
]
;
then
exit
1
fi
done
python/paddle/reader/tests/test_data_creator.txt
→
python/paddle/
v2/
reader/tests/test_data_creator.txt
浏览文件 @
0eba01c0
文件已移动
python/paddle/v2/trainer.py
浏览文件 @
0eba01c0
...
...
@@ -23,24 +23,25 @@ def default_event_handler(event):
pass
def
__bfs_travel_topology__
(
callback
,
*
topologies
):
for
each_layer
in
topologies
:
callback
(
each_layer
)
__bfs_travel_topology__
(
callback
,
*
each_layer
.
__parent_layers__
.
values
())
class
ITrainer
(
object
):
"""
The interface of Trainer. The only exposed method is `train`.
"""
def
train
(
self
,
train_data_reader
,
topology
,
parameters
,
test_data_reader
=
None
,
event_handler
=
None
):
def
train
(
self
,
reader
,
topology
,
parameters
,
event_handler
=
None
):
"""
train method.
:param
train_data_
reader:
:param reader:
:param topology:
:param parameters:
:param test_data_reader:
:param event_handler:
:return:
"""
...
...
@@ -49,83 +50,99 @@ class ITrainer(object):
class
SGD
(
ITrainer
):
def
__init__
(
self
,
update_equation
):
def
__init__
(
self
,
topology
,
parameters
,
update_equation
):
"""
Simple SGD Trainer.
:param update_equation: The optimizer object.
:type update_equation: v2_optimizer.Optimizer
"""
if
not
isinstance
(
parameters
,
v2_parameters
.
Parameters
):
raise
TypeError
(
'parameters should be parameters'
)
if
not
isinstance
(
update_equation
,
v2_optimizer
.
Optimizer
):
raise
Valu
eError
(
"update equation parameter must be "
"paddle.v2.optimizer.Optimizer"
)
raise
Typ
eError
(
"update equation parameter must be "
"paddle.v2.optimizer.Optimizer"
)
self
.
__optimizer__
=
update_equation
self
.
__topology__
=
topology
self
.
__parameters__
=
parameters
self
.
__topology_in_proto__
=
v2_layer
.
parse_network
(
topology
)
data_types
=
dict
()
def
__travel__
(
l
):
if
hasattr
(
l
,
'type'
):
data_types
[
l
.
name
]
=
l
.
type
if
not
isinstance
(
topology
,
collections
.
Sequence
):
topology
=
[
topology
]
__bfs_travel_topology__
(
__travel__
,
*
topology
)
self
.
__data_types__
=
[
(
iname
,
data_types
[
iname
])
for
iname
in
self
.
__topology_in_proto__
.
input_layer_names
]
if
not
isinstance
(
self
.
__topology_in_proto__
,
ModelConfig
):
raise
TypeError
(
'topology should be a model config'
)
gm
=
api
.
GradientMachine
.
createFromConfigProto
(
self
.
__topology_in_proto__
,
api
.
CREATE_MODE_NORMAL
,
self
.
__optimizer__
.
enable_types
())
assert
isinstance
(
gm
,
api
.
GradientMachine
)
parameters
.
append_gradient_machine
(
gm
)
self
.
__gradient_machine__
=
gm
self
.
__gradient_machine__
.
randParameters
()
def
train
(
self
,
train_data_reader
,
topology
,
parameters
,
num_passes
=
1
,
test_data_reader
=
None
,
event_handler
=
None
,
batch_size
=
32
,
data_types
=
None
,
reader_dict
=
None
):
def
train
(
self
,
reader
,
num_passes
=
1
,
event_handler
=
None
,
reader_dict
=
None
):
"""
Training method. Will train num_passes of input data.
:param
train_data_
reader:
:param reader:
:param topology: Network Topology, use one or more Layers to represent it.
:param parameters: The parameter pools.
:param num_passes: The total train passes.
:param test_data_reader:
:param event_handler: Event handler. A method will be invoked when event
occurred.
:type event_handler: (BaseEvent) => None
:param batch_size: Not important, will be removed after data refactor.
:param data_types: Not important, will be removed after data refactor.
:return:
"""
if
event_handler
is
None
:
event_handler
=
default_event_handler
topology
=
v2_layer
.
parse_network
(
topology
)
if
reader_dict
is
None
:
reader_dict
=
self
.
default_reader_dict
()
__check_train_args__
(
**
locals
())
gm
=
api
.
GradientMachine
.
createFromConfigProto
(
topology
,
api
.
CREATE_MODE_NORMAL
,
self
.
__optimizer__
.
enable_types
())
assert
isinstance
(
gm
,
api
.
GradientMachine
)
parameters
.
append_gradient_machine
(
gm
)
gm
.
randParameters
()
updater
=
self
.
__optimizer__
.
create_local_updater
()
updater
.
init
(
gm
)
updater
.
init
(
self
.
__gradient_machine__
)
gm
.
start
()
batch_evaluator
=
gm
.
makeEvaluator
()
self
.
__gradient_machine__
.
start
()
batch_evaluator
=
self
.
__gradient_machine__
.
makeEvaluator
()
assert
isinstance
(
batch_evaluator
,
api
.
Evaluator
)
pass_evaluator
=
gm
.
makeEvaluator
()
pass_evaluator
=
self
.
__gradient_machine__
.
makeEvaluator
()
assert
isinstance
(
pass_evaluator
,
api
.
Evaluator
)
out_args
=
api
.
Arguments
.
createArguments
(
0
)
feeder
=
DataFeeder
(
data_types
,
reader_dict
)
feeder
=
DataFeeder
(
self
.
__data_types__
,
reader_dict
)
for
pass_id
in
xrange
(
num_passes
):
event_handler
(
v2_event
.
BeginPass
(
pass_id
))
pass_evaluator
.
start
()
updater
.
startPass
()
for
batch_id
,
data_batch
in
enumerate
(
__data_reader_to_batch__
(
train_data_reader
,
batch_size
,
topology
)):
for
batch_id
,
data_batch
in
enumerate
(
reader
()):
pass_type
=
updater
.
startBatch
(
len
(
data_batch
))
self
.
__gradient_machine__
.
forwardBackward
(
feeder
(
data_batch
),
out_args
,
pass_type
)
batch_evaluator
.
start
()
event_handler
(
v2_event
.
BeginIteration
(
pass_id
=
pass_id
,
batch_id
=
batch_id
))
pass_type
=
updater
.
startBatch
(
len
(
data_batch
))
gm
.
forwardBackward
(
feeder
(
data_batch
),
out_args
,
pass_type
)
gm
.
eval
(
pass_evaluator
)
gm
.
eval
(
batch_evaluator
)
for
each_param
in
gm
.
getParameters
():
self
.
__gradient_machine__
.
forwardBackward
(
feeder
(
data_batch
),
out_args
,
pass_type
)
self
.
__gradient_machine__
.
eval
(
pass_evaluator
)
self
.
__gradient_machine__
.
eval
(
batch_evaluator
)
for
each_param
in
self
.
__gradient_machine__
.
getParameters
():
updater
.
update
(
each_param
)
# Get cost. We use numpy to calculate total cost for this batch.
cost_vec
=
out_args
.
getSlotValue
(
0
)
...
...
@@ -143,59 +160,38 @@ class SGD(ITrainer):
updater
.
finishPass
()
pass_evaluator
.
finish
()
event_handler
(
v2_event
.
EndPass
(
pass_id
,
evaluator
=
pass_evaluator
))
gm
.
finish
()
self
.
__gradient_machine__
.
finish
()
def
default_reader_dict
(
self
):
reader_dict
=
dict
()
for
i
,
tp
in
enumerate
(
self
.
__data_types__
):
reader_dict
[
tp
[
0
]]
=
i
return
reader_dict
def
__data_reader_to_batch__
(
reader
,
batch_size
,
topology
):
"""
This function is not important, and will be removed when data refactored.
"""
def
test
(
self
,
reader
,
reader_dict
=
None
):
if
reader_dict
is
None
:
reader_dict
=
self
.
default_reader_dict
()
def
input_reorder
(
func
):
for
item
in
func
():
retv
=
[]
for
__layer_name__
in
topology
.
input_layer_names
:
retv
.
append
(
item
[
__layer_name__
])
yield
retv
feeder
=
DataFeeder
(
self
.
__data_types__
,
reader_dict
)
evaluator
=
self
.
__gradient_machine__
.
makeEvaluator
()
out_args
=
api
.
Arguments
.
createArguments
(
0
)
evaluator
.
start
()
for
data_batch
in
reader
():
self
.
__gradient_machine__
.
forward
(
feeder
(
data_batch
),
out_args
,
api
.
PASS_TEST
)
self
.
__gradient_machine__
.
eval
(
evaluator
)
return
__generator_to_batch__
(
input_reorder
(
reader
),
batch_size
=
batch_size
)
evaluator
.
finish
()
return
v2_event
.
TestResult
(
evaluator
=
evaluator
)
def
__generator_to_batch__
(
generator
,
batch_size
):
"""
This function is not important, and will be removed when data refactored.
"""
ret_val
=
list
()
for
each_item
in
generator
:
ret_val
.
append
(
each_item
)
if
len
(
ret_val
)
==
batch_size
:
yield
ret_val
ret_val
=
list
()
if
len
(
ret_val
)
!=
0
:
yield
ret_val
def
__check_train_args__
(
train_data_reader
,
topology
,
parameters
,
test_data_reader
,
event_handler
,
**
kwargs
):
def
__check_train_args__
(
reader
,
event_handler
,
**
kwargs
):
"""
Check train function's argument types
"""
if
not
callable
(
train_data_reader
)
or
not
isinstance
(
train_data_reader
(),
collections
.
Iterator
):
raise
ValueError
(
'train_data_reader should be a function, '
'which can return a iterator'
)
if
test_data_reader
is
not
None
:
if
not
callable
(
test_data_reader
)
or
not
isinstance
(
test_data_reader
(),
collections
.
Iterator
):
raise
ValueError
(
'test_data_reader should be a function, which can '
'return a iterator'
)
if
not
isinstance
(
topology
,
ModelConfig
):
raise
ValueError
(
'topology should be a model config'
)
if
not
isinstance
(
parameters
,
v2_parameters
.
Parameters
):
raise
ValueError
(
'parameters should be a parameter pool'
)
if
not
callable
(
reader
)
or
not
isinstance
(
reader
(),
collections
.
Iterator
):
raise
TypeError
(
'train_data_reader should be a function, '
'which can return a iterator'
)
if
not
callable
(
event_handler
):
raise
Valu
eError
(
'event handler should be a function'
)
raise
Typ
eError
(
'event handler should be a function'
)
python/setup.py.in
浏览文件 @
0eba01c0
...
...
@@ -5,7 +5,9 @@ packages=['paddle',
'paddle.trainer',
'paddle.trainer_config_helpers',
'paddle.utils',
'paddle.v2']
'paddle.v2',
'paddle.v2.dataset',
'paddle.v2.reader']
setup(name='paddle',
version='${PADDLE_VERSION}',
...
...
@@ -13,5 +15,9 @@ setup(name='paddle',
packages=packages,
package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}'
}
},
install_requires = [
'scikit-learn>=0.18.0',
'scipy>=0.18.0',
]
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录