Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
d5c697e6
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d5c697e6
编写于
2月 27, 2017
作者:
Y
Yu Yang
提交者:
GitHub
2月 27, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into feature/EvaluatorToEvent
上级
173a81b5
c3caa842
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
440 addition
and
23 deletion
+440
-23
demo/mnist/api_train_v2.py
demo/mnist/api_train_v2.py
+6
-6
paddle/py_paddle/dataprovider_converter.py
paddle/py_paddle/dataprovider_converter.py
+2
-4
python/paddle/v2/__init__.py
python/paddle/v2/__init__.py
+2
-1
python/paddle/v2/data_feeder.py
python/paddle/v2/data_feeder.py
+100
-0
python/paddle/v2/data_type.py
python/paddle/v2/data_type.py
+2
-2
python/paddle/v2/dataset/__init__.py
python/paddle/v2/dataset/__init__.py
+0
-0
python/paddle/v2/dataset/config.py
python/paddle/v2/dataset/config.py
+8
-0
python/paddle/v2/dataset/mnist.py
python/paddle/v2/dataset/mnist.py
+39
-0
python/paddle/v2/tests/CMakeLists.txt
python/paddle/v2/tests/CMakeLists.txt
+2
-0
python/paddle/v2/tests/run_tests.sh
python/paddle/v2/tests/run_tests.sh
+36
-0
python/paddle/v2/tests/test_data_feeder.py
python/paddle/v2/tests/test_data_feeder.py
+238
-0
python/paddle/v2/trainer.py
python/paddle/v2/trainer.py
+5
-10
未找到文件。
demo/mnist/api_train_v2.py
浏览文件 @
d5c697e6
...
...
@@ -44,13 +44,13 @@ def main():
topology
=
cost
,
parameters
=
parameters
,
event_handler
=
event_handler
,
num_passes
=
100
,
batch_size
=
200
,
# batch size should be refactor in Data reader
data_types
=
{
# data_types will be removed, It should be in
batch_size
=
32
,
# batch size should be refactor in Data reader
data_types
=
[
# data_types will be removed, It should be in
# network topology
'pixel'
:
images
.
type
,
'label'
:
label
.
type
})
(
'pixel'
,
images
.
type
),
(
'label'
,
label
.
type
)],
reader_dict
=
{
'pixel'
:
0
,
'label'
:
1
}
)
if
__name__
==
'__main__'
:
...
...
paddle/py_paddle/dataprovider_converter.py
浏览文件 @
d5c697e6
...
...
@@ -23,7 +23,8 @@ __all__ = ['DataProviderConverter']
class
IScanner
(
object
):
def
__init__
(
self
,
input_type
,
pos
):
self
.
input_type
=
input_type
assert
isinstance
(
self
.
input_type
,
dp2
.
InputType
)
if
not
isinstance
(
self
.
input_type
,
dp2
.
InputType
):
raise
ValueError
(
"input type should be dataprovider2.InputType"
)
self
.
pos
=
pos
def
scan
(
self
,
dat
):
...
...
@@ -50,7 +51,6 @@ class DenseScanner(IScanner):
def
finish_scan
(
self
,
argument
):
assert
isinstance
(
argument
,
swig_paddle
.
Arguments
)
assert
isinstance
(
self
.
input_type
,
dp2
.
InputType
)
if
self
.
__mat__
.
dtype
!=
numpy
.
float32
:
self
.
__mat__
=
self
.
__mat__
.
astype
(
numpy
.
float32
)
m
=
swig_paddle
.
Matrix
.
createDenseFromNumpy
(
self
.
__mat__
,
True
,
False
)
...
...
@@ -63,7 +63,6 @@ class SparseBinaryScanner(IScanner):
self
.
__rows__
=
[
0
]
self
.
__cols__
=
[]
self
.
__height__
=
0
self
.
__nnz__
=
0
self
.
__value__
=
[]
def
scan
(
self
,
dat
):
...
...
@@ -76,7 +75,6 @@ class SparseBinaryScanner(IScanner):
def
finish_scan
(
self
,
argument
):
assert
isinstance
(
argument
,
swig_paddle
.
Arguments
)
assert
isinstance
(
self
.
input_type
,
dp2
.
InputType
)
m
=
swig_paddle
.
Matrix
.
createSparse
(
self
.
__height__
,
self
.
input_type
.
dim
,
len
(
self
.
__cols__
),
...
...
python/paddle/v2/__init__.py
浏览文件 @
d5c697e6
...
...
@@ -18,12 +18,13 @@ import parameters
import
trainer
import
event
import
data_type
import
data_feeder
import
attr
import
py_paddle.swig_paddle
as
api
__all__
=
[
'optimizer'
,
'layer'
,
'activation'
,
'parameters'
,
'init'
,
'trainer'
,
'event'
,
'data_type'
,
'attr'
'event'
,
'data_type'
,
'attr'
,
'data_feeder'
]
...
...
python/paddle/v2/data_feeder.py
0 → 100644
浏览文件 @
d5c697e6
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
py_paddle
import
swig_paddle
from
py_paddle
import
DataProviderConverter
import
data_type
__all__
=
[
'DataFeeder'
]
class
DataFeeder
(
DataProviderConverter
):
"""
DataFeeder converts the data returned by paddle.reader into a data structure
of Arguments which is defined in the API. The paddle.reader usually returns
a list of mini-batch data entries. Each data entry in the list is one sampe.
Each sample is a list or a tuple with one feature or multiple features.
DataFeeder converts this mini-batch data entries into Arguments in order
to feed it to C++ interface.
The example usage:
data_types = [('image', paddle.data_type.dense_vector(784)),
('label', paddle.data_type.integer_value(10))]
reader_dict = {'image':0, 'label':1}
feeder = DataFeeder(data_types=data_types, reader_dict=reader_dict)
minibatch_data = [
( [1.0,2.0,3.0,4.0], 5, [6,7,8] ), # first sample
( [1.0,2.0,3.0,4.0], 5, [6,7,8] ) # second sample
]
# or minibatch_data = [
# [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ], # first sample
# [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample
# ]
arg = feeder(minibatch_data)
"""
def
__init__
(
self
,
data_types
,
reader_dict
):
"""
:param data_types: A list to specify data name and type. Each item is
a tuple of (data_name, data_type). For example:
[('image', paddle.data_type.dense_vector(784)),
('label', paddle.data_type.integer_value(10))]
:type data_types: A list of tuple
:param reader_dict: A dictionary to specify the position of each data
in the input data.
:type reader_dict: dict()
"""
self
.
input_names
=
[]
input_types
=
[]
self
.
reader_dict
=
reader_dict
for
each
in
data_types
:
self
.
input_names
.
append
(
each
[
0
])
assert
isinstance
(
each
[
1
],
data_type
.
InputType
)
input_types
.
append
(
each
[
1
])
DataProviderConverter
.
__init__
(
self
,
input_types
)
def
convert
(
self
,
dat
,
argument
=
None
):
"""
:param dat: A list of mini-batch data. Each sample is a list or tuple
one feature or multiple features.
for example:
[
([0.2, 0.2], ), # first sample
([0.8, 0.3], ), # second sample
]
or,
[
[[0.2, 0.2], ], # first sample
[[0.8, 0.3], ], # second sample
]
:type dat: List
:param argument: An Arguments object contains this mini-batch data with
one or multiple features. The Arguments definition is
in the API.
:type argument: swig_paddle.Arguments
"""
def
reorder_data
(
data
):
retv
=
[]
for
each
in
data
:
reorder
=
[]
for
name
in
self
.
input_names
:
reorder
.
append
(
each
[
self
.
reader_dict
[
name
]])
retv
.
append
(
reorder
)
return
retv
return
DataProviderConverter
.
convert
(
self
,
reorder_data
(
dat
),
argument
)
python/paddle/v2/data_type.py
浏览文件 @
d5c697e6
...
...
@@ -14,9 +14,9 @@
from
paddle.trainer.PyDataProvider2
import
\
InputType
,
dense_vector
,
sparse_binary_vector
,
\
sparse_vector
,
integer_value
sparse_vector
,
integer_value
,
integer_value_sequence
__all__
=
[
'InputType'
,
'dense_vector'
,
'sparse_binary_vector'
,
'sparse_vector'
,
'integer_value'
'integer_value'
,
'integer_value_sequence'
]
python/paddle/v2/dataset/__init__.py
0 → 100644
浏览文件 @
d5c697e6
python/paddle/v2/dataset/config.py
0 → 100644
浏览文件 @
d5c697e6
import
os
__all__
=
[
'DATA_HOME'
]
DATA_HOME
=
os
.
path
.
expanduser
(
'~/.cache/paddle_data_set'
)
if
not
os
.
path
.
exists
(
DATA_HOME
):
os
.
makedirs
(
DATA_HOME
)
python/paddle/v2/dataset/mnist.py
0 → 100644
浏览文件 @
d5c697e6
import
sklearn.datasets.mldata
import
sklearn.model_selection
import
numpy
from
config
import
DATA_HOME
__all__
=
[
'train_creator'
,
'test_creator'
]
def
__mnist_reader_creator__
(
data
,
target
):
def
reader
():
n_samples
=
data
.
shape
[
0
]
for
i
in
xrange
(
n_samples
):
yield
(
data
[
i
]
/
255.0
).
astype
(
numpy
.
float32
),
int
(
target
[
i
])
return
reader
TEST_SIZE
=
10000
data
=
sklearn
.
datasets
.
mldata
.
fetch_mldata
(
"MNIST original"
,
data_home
=
DATA_HOME
)
X_train
,
X_test
,
y_train
,
y_test
=
sklearn
.
model_selection
.
train_test_split
(
data
.
data
,
data
.
target
,
test_size
=
TEST_SIZE
,
random_state
=
0
)
def
train_creator
():
return
__mnist_reader_creator__
(
X_train
,
y_train
)
def
test_creator
():
return
__mnist_reader_creator__
(
X_test
,
y_test
)
def
unittest
():
assert
len
(
list
(
test_creator
()()))
==
TEST_SIZE
if
__name__
==
'__main__'
:
unittest
()
python/paddle/v2/tests/CMakeLists.txt
浏览文件 @
d5c697e6
...
...
@@ -2,3 +2,5 @@ add_test(NAME test_v2_layer
COMMAND
${
PROJ_ROOT
}
/paddle/.set_python_path.sh -d
${
PROJ_ROOT
}
/python/
${
PYTHON_EXECUTABLE
}
${
PROJ_ROOT
}
/python/paddle/v2/tests/test_layer.py
WORKING_DIRECTORY
${
PROJ_ROOT
}
/python/paddle
)
add_test
(
NAME test_v2_api
COMMAND bash
${
PROJ_ROOT
}
/python/paddle/v2/tests/run_tests.sh
${
PYTHON_EXECUTABLE
}
)
python/paddle/v2/tests/run_tests.sh
0 → 100755
浏览文件 @
d5c697e6
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pushd
`
dirname
$0
`
>
/dev/null
SCRIPTPATH
=
$PWD
popd
>
/dev/null
cd
$SCRIPTPATH
$1
-m
pip
install
../../../../paddle/dist/
*
.whl
test_list
=
"test_data_feeder.py"
export
PYTHONPATH
=
$PWD
/../../../../python/
for
fn
in
$test_list
do
echo
"test
$fn
"
$1
$fn
if
[
$?
-ne
0
]
;
then
exit
1
fi
done
python/paddle/v2/tests/test_data_feeder.py
0 → 100644
浏览文件 @
d5c697e6
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
py_paddle.swig_paddle
as
api
import
numpy
as
np
from
paddle.v2
import
data_type
from
paddle.v2.data_feeder
import
DataFeeder
class
DataFeederTest
(
unittest
.
TestCase
):
def
dense_reader
(
self
,
size
):
data
=
np
.
random
.
random
(
size
)
return
data
def
sparse_binary_reader
(
self
,
high
,
size_limit
,
non_empty
=
False
):
num
=
np
.
random
.
randint
(
size_limit
)
# num could be 0
while
non_empty
and
num
==
0
:
num
=
np
.
random
.
randint
(
size_limit
)
return
np
.
random
.
randint
(
high
,
size
=
num
).
tolist
()
def
test_dense
(
self
):
def
compare
(
input
):
feeder
=
DataFeeder
([(
'image'
,
data_type
.
dense_vector
(
784
))],
{
'image'
:
0
})
arg
=
feeder
(
input
)
output
=
arg
.
getSlotValue
(
0
).
copyToNumpyMat
()
input
=
np
.
array
(
input
,
dtype
=
'float32'
)
self
.
assertAlmostEqual
(
input
.
all
(),
output
.
all
())
# test numpy array
batch_size
=
32
dim
=
784
data
=
[]
for
i
in
xrange
(
batch_size
):
each_sample
=
[]
each_sample
.
append
(
self
.
dense_reader
(
dim
))
data
.
append
(
each_sample
)
compare
(
data
)
# each feature is a list
data
=
[]
for
i
in
xrange
(
batch_size
):
each_sample
=
[]
each_sample
.
append
(
self
.
dense_reader
(
dim
).
tolist
())
data
.
append
(
each_sample
)
compare
(
data
)
# test tuple
data
=
[]
for
i
in
xrange
(
batch_size
):
each_sample
=
(
self
.
dense_reader
(
dim
).
tolist
(),
)
data
.
append
(
each_sample
)
compare
(
data
)
def
test_sparse_binary
(
self
):
dim
=
10000
batch_size
=
32
data
=
[]
for
i
in
xrange
(
batch_size
):
each_sample
=
[]
each_sample
.
append
(
self
.
sparse_binary_reader
(
dim
,
50
))
data
.
append
(
each_sample
)
feeder
=
DataFeeder
([(
'input'
,
data_type
.
sparse_binary_vector
(
dim
))],
{
'input'
:
0
})
arg
=
feeder
(
data
)
output
=
arg
.
getSlotValue
(
0
)
assert
isinstance
(
output
,
api
.
Matrix
)
for
i
in
xrange
(
batch_size
):
self
.
assertEqual
(
output
.
getSparseRowCols
(
i
),
data
[
i
][
0
])
def
test_sparse
(
self
):
dim
=
10000
batch_size
=
32
v
=
[]
w
=
[]
data
=
[]
for
dat
in
xrange
(
batch_size
):
each_sample
=
[]
a
=
self
.
sparse_binary_reader
(
dim
,
40
,
non_empty
=
True
)
b
=
self
.
dense_reader
(
len
(
a
)).
tolist
()
v
.
append
(
a
)
w
.
append
(
np
.
array
(
b
,
dtype
=
"float32"
))
each_sample
.
append
(
zip
(
a
,
b
))
data
.
append
(
each_sample
)
feeder
=
DataFeeder
([(
'input'
,
data_type
.
sparse_vector
(
dim
))],
{
'input'
:
0
})
arg
=
feeder
(
data
)
output
=
arg
.
getSlotValue
(
0
)
assert
isinstance
(
output
,
api
.
Matrix
)
for
i
in
xrange
(
batch_size
):
self
.
assertEqual
(
output
.
getSparseRowCols
(
i
),
v
[
i
])
cols_value
=
output
.
getSparseRowColsVal
(
i
)
value
=
[
val
[
1
]
for
val
in
cols_value
]
value
=
np
.
array
(
value
,
dtype
=
"float32"
)
self
.
assertAlmostEqual
(
value
.
all
(),
w
[
i
].
all
())
def
test_integer
(
self
):
dim
=
100
batch_size
=
32
index
=
[]
for
i
in
xrange
(
batch_size
):
each_sample
=
[]
each_sample
.
append
(
np
.
random
.
randint
(
dim
))
index
.
append
(
each_sample
)
feeder
=
DataFeeder
([(
'input'
,
data_type
.
integer_value
(
dim
))],
{
'input'
:
0
})
arg
=
feeder
(
index
)
output
=
arg
.
getSlotIds
(
0
).
copyToNumpyArray
()
index
=
np
.
array
(
index
,
dtype
=
'int'
)
self
.
assertEqual
(
output
.
all
(),
index
.
flatten
().
all
())
def
test_integer_sequence
(
self
):
dim
=
10000
batch_size
=
32
start
=
[
0
]
data
=
[]
for
i
in
xrange
(
batch_size
):
each_sample
=
[]
each_sample
.
append
(
self
.
sparse_binary_reader
(
dim
,
30
,
non_empty
=
True
))
data
.
append
(
each_sample
)
start
.
append
(
len
(
each_sample
[
0
])
+
start
[
-
1
])
feeder
=
DataFeeder
([(
'input'
,
data_type
.
integer_value_sequence
(
dim
))],
{
'input'
:
0
})
arg
=
feeder
(
data
)
output_data
=
arg
.
getSlotIds
(
0
).
copyToNumpyArray
()
output_start
=
arg
.
getSlotSequenceStartPositions
(
0
).
copyToNumpyArray
()
index
=
[]
for
dat
in
data
:
index
.
extend
(
x
for
x
in
dat
[
0
])
# only one feature, so dat[0]
index
=
np
.
array
(
index
,
dtype
=
'int'
)
start
=
np
.
array
(
start
,
dtype
=
'int'
)
self
.
assertEqual
(
output_data
.
all
(),
index
.
all
())
self
.
assertEqual
(
output_start
.
all
(),
start
.
all
())
def
test_multiple_features
(
self
):
batch_size
=
2
data
=
[]
for
i
in
xrange
(
batch_size
):
each_sample
=
[]
each_sample
.
append
(
np
.
random
.
randint
(
10
))
each_sample
.
append
(
self
.
sparse_binary_reader
(
20000
,
40
,
non_empty
=
True
))
each_sample
.
append
(
self
.
dense_reader
(
100
))
data
.
append
(
each_sample
)
# test multiple features
data_types
=
[(
'fea0'
,
data_type
.
dense_vector
(
100
)),
(
'fea1'
,
data_type
.
sparse_binary_vector
(
20000
)),
(
'fea2'
,
data_type
.
integer_value
(
10
))]
feeder
=
DataFeeder
(
data_types
,
{
'fea0'
:
2
,
'fea1'
:
1
,
'fea2'
:
0
})
arg
=
feeder
(
data
)
output_dense
=
arg
.
getSlotValue
(
0
).
copyToNumpyMat
()
output_sparse
=
arg
.
getSlotValue
(
1
)
output_index
=
arg
.
getSlotIds
(
2
).
copyToNumpyArray
()
for
i
in
xrange
(
batch_size
):
self
.
assertEqual
(
output_dense
[
i
].
all
(),
data
[
i
][
2
].
all
())
self
.
assertEqual
(
output_sparse
.
getSparseRowCols
(
i
),
data
[
i
][
1
])
self
.
assertEqual
(
output_index
[
i
],
data
[
i
][
0
])
# reader returns 3 features, but only use 2 features
data_types
=
[(
'fea0'
,
data_type
.
dense_vector
(
100
)),
(
'fea2'
,
data_type
.
integer_value
(
10
))]
feeder
=
DataFeeder
(
data_types
,
{
'fea0'
:
2
,
'fea2'
:
0
})
arg
=
feeder
(
data
)
output_dense
=
arg
.
getSlotValue
(
0
).
copyToNumpyMat
()
output_index
=
arg
.
getSlotIds
(
1
).
copyToNumpyArray
()
for
i
in
xrange
(
batch_size
):
self
.
assertEqual
(
output_dense
[
i
].
all
(),
data
[
i
][
2
].
all
())
self
.
assertEqual
(
output_index
[
i
],
data
[
i
][
0
])
# reader returns 3 featreus, one is duplicate data
data_types
=
[(
'fea0'
,
data_type
.
dense_vector
(
100
)),
(
'fea1'
,
data_type
.
sparse_binary_vector
(
20000
)),
(
'fea2'
,
data_type
.
integer_value
(
10
)),
(
'fea3'
,
data_type
.
dense_vector
(
100
))]
feeder
=
DataFeeder
(
data_types
,
{
'fea0'
:
2
,
'fea1'
:
1
,
'fea2'
:
0
,
'fea3'
:
2
})
arg
=
feeder
(
data
)
fea0
=
arg
.
getSlotValue
(
0
).
copyToNumpyMat
()
fea1
=
arg
.
getSlotValue
(
1
)
fea2
=
arg
.
getSlotIds
(
2
).
copyToNumpyArray
()
fea3
=
arg
.
getSlotValue
(
3
).
copyToNumpyMat
()
for
i
in
xrange
(
batch_size
):
self
.
assertEqual
(
fea0
[
i
].
all
(),
data
[
i
][
2
].
all
())
self
.
assertEqual
(
fea1
.
getSparseRowCols
(
i
),
data
[
i
][
1
])
self
.
assertEqual
(
fea2
[
i
],
data
[
i
][
0
])
self
.
assertEqual
(
fea3
[
i
].
all
(),
data
[
i
][
2
].
all
())
def
test_multiple_features_tuple
(
self
):
batch_size
=
2
data
=
[]
for
i
in
xrange
(
batch_size
):
a
=
np
.
random
.
randint
(
10
)
b
=
self
.
sparse_binary_reader
(
20000
,
40
,
non_empty
=
True
)
c
=
self
.
dense_reader
(
100
)
each_sample
=
(
a
,
b
,
c
)
data
.
append
(
each_sample
)
# test multiple features
data_types
=
[(
'fea0'
,
data_type
.
dense_vector
(
100
)),
(
'fea1'
,
data_type
.
sparse_binary_vector
(
20000
)),
(
'fea2'
,
data_type
.
integer_value
(
10
))]
feeder
=
DataFeeder
(
data_types
,
{
'fea0'
:
2
,
'fea1'
:
1
,
'fea2'
:
0
})
arg
=
feeder
(
data
)
out_dense
=
arg
.
getSlotValue
(
0
).
copyToNumpyMat
()
out_sparse
=
arg
.
getSlotValue
(
1
)
out_index
=
arg
.
getSlotIds
(
2
).
copyToNumpyArray
()
for
i
in
xrange
(
batch_size
):
self
.
assertEqual
(
out_dense
[
i
].
all
(),
data
[
i
][
2
].
all
())
self
.
assertEqual
(
out_sparse
.
getSparseRowCols
(
i
),
data
[
i
][
1
])
self
.
assertEqual
(
out_index
[
i
],
data
[
i
][
0
])
if
__name__
==
'__main__'
:
api
.
initPaddle
(
"--use_gpu=0"
)
unittest
.
main
()
python/paddle/v2/trainer.py
浏览文件 @
d5c697e6
...
...
@@ -2,7 +2,7 @@ import collections
import
py_paddle.swig_paddle
as
api
from
paddle.proto.ModelConfig_pb2
import
ModelConfig
from
py_paddle
import
DataProviderConvert
er
from
data_feeder
import
DataFeed
er
from
.
import
event
as
v2_event
from
.
import
layer
as
v2_layer
...
...
@@ -69,7 +69,8 @@ class SGD(ITrainer):
test_data_reader
=
None
,
event_handler
=
None
,
batch_size
=
32
,
data_types
=
None
):
data_types
=
None
,
reader_dict
=
None
):
"""
Training method. Will train num_passes of input data.
...
...
@@ -107,13 +108,7 @@ class SGD(ITrainer):
assert
isinstance
(
pass_evaluator
,
api
.
Evaluator
)
out_args
=
api
.
Arguments
.
createArguments
(
0
)
data_types_lists
=
[]
for
each
in
topology
.
input_layer_names
:
if
each
not
in
data_types
:
raise
ValueError
()
data_types_lists
.
append
(
data_types
[
each
])
converter
=
DataProviderConverter
(
input_types
=
data_types_lists
)
feeder
=
DataFeeder
(
data_types
,
reader_dict
)
for
pass_id
in
xrange
(
num_passes
):
event_handler
(
v2_event
.
BeginPass
(
pass_id
))
...
...
@@ -127,7 +122,7 @@ class SGD(ITrainer):
v2_event
.
BeginIteration
(
pass_id
=
pass_id
,
batch_id
=
batch_id
))
pass_type
=
updater
.
startBatch
(
len
(
data_batch
))
gm
.
forwardBackward
(
convert
er
(
data_batch
),
out_args
,
pass_type
)
gm
.
forwardBackward
(
feed
er
(
data_batch
),
out_args
,
pass_type
)
gm
.
eval
(
pass_evaluator
)
gm
.
eval
(
batch_evaluator
)
for
each_param
in
gm
.
getParameters
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录