Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
ee6ce4db
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ee6ce4db
编写于
2月 28, 2017
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'feature/tester' into feature/recommendation_v2_api
上级
32b168c7
75466f4e
变更
18
显示空白变更内容
内联
并排
Showing
18 changed file
with
319 addition
and
131 deletion
+319
-131
.travis.yml
.travis.yml
+1
-1
demo/mnist/api_train_v2.py
demo/mnist/api_train_v2.py
+3
-4
python/paddle/v2/__init__.py
python/paddle/v2/__init__.py
+3
-1
python/paddle/v2/data_feeder.py
python/paddle/v2/data_feeder.py
+1
-1
python/paddle/v2/data_type.py
python/paddle/v2/data_type.py
+3
-3
python/paddle/v2/dataset/cifar.py
python/paddle/v2/dataset/cifar.py
+32
-53
python/paddle/v2/dataset/common.py
python/paddle/v2/dataset/common.py
+0
-1
python/paddle/v2/dataset/mnist.py
python/paddle/v2/dataset/mnist.py
+17
-8
python/paddle/v2/dataset/tests/cifar_test.py
python/paddle/v2/dataset/tests/cifar_test.py
+42
-0
python/paddle/v2/dataset/tests/mnist_test.py
python/paddle/v2/dataset/tests/mnist_test.py
+13
-9
python/paddle/v2/layer.py
python/paddle/v2/layer.py
+1
-0
python/paddle/v2/parameters.py
python/paddle/v2/parameters.py
+6
-11
python/paddle/v2/reader/decorator.py
python/paddle/v2/reader/decorator.py
+2
-2
python/paddle/v2/tests/CMakeLists.txt
python/paddle/v2/tests/CMakeLists.txt
+7
-1
python/paddle/v2/tests/test_topology.py
python/paddle/v2/tests/test_topology.py
+83
-0
python/paddle/v2/topology.py
python/paddle/v2/topology.py
+96
-0
python/paddle/v2/trainer.py
python/paddle/v2/trainer.py
+8
-31
python/setup.py.in
python/setup.py.in
+1
-5
未找到文件。
.travis.yml
浏览文件 @
ee6ce4db
...
...
@@ -57,7 +57,7 @@ before_install:
-
if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
# Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
# protobuf version.
-
pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx_rtd_theme virtualenv pre-commit requests==2.9.2 LinkChecker
'scikit-learn>=0.18.0' 'scipy>=0.18.0'
-
pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx_rtd_theme virtualenv pre-commit requests==2.9.2 LinkChecker
script
:
-
paddle/scripts/travis/main.sh
notifications
:
...
...
demo/mnist/api_train_v2.py
浏览文件 @
ee6ce4db
import
numpy
import
paddle.v2
as
paddle
...
...
@@ -21,7 +20,7 @@ def main():
adam_optimizer
=
paddle
.
optimizer
.
Adam
(
learning_rate
=
0.01
)
trainer
=
paddle
.
trainer
.
SGD
(
topology
=
cost
,
trainer
=
paddle
.
trainer
.
SGD
(
cost
=
cost
,
parameters
=
parameters
,
update_equation
=
adam_optimizer
)
...
...
@@ -29,7 +28,7 @@ def main():
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
1000
==
0
:
result
=
trainer
.
test
(
reader
=
paddle
.
reader
.
batched
(
paddle
.
dataset
.
mnist
.
test
_creator
(),
batch_size
=
256
))
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
256
))
print
"Pass %d, Batch %d, Cost %f, %s, Testing metrics %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
,
...
...
@@ -41,7 +40,7 @@ def main():
trainer
.
train
(
reader
=
paddle
.
reader
.
batched
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
_creator
(),
buf_size
=
8192
),
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
8192
),
batch_size
=
32
),
event_handler
=
event_handler
)
...
...
python/paddle/v2/__init__.py
浏览文件 @
ee6ce4db
...
...
@@ -18,6 +18,7 @@ import parameters
import
trainer
import
event
import
data_type
import
topology
import
data_feeder
from
.
import
dataset
from
.
import
reader
...
...
@@ -27,7 +28,8 @@ import py_paddle.swig_paddle as api
__all__
=
[
'optimizer'
,
'layer'
,
'activation'
,
'parameters'
,
'init'
,
'trainer'
,
'event'
,
'data_type'
,
'attr'
,
'pooling'
,
'data_feeder'
,
'dataset'
,
'reader'
'event'
,
'data_type'
,
'attr'
,
'pooling'
,
'data_feeder'
,
'dataset'
,
'reader'
,
'topology'
]
...
...
python/paddle/v2/data_feeder.py
浏览文件 @
ee6ce4db
...
...
@@ -23,7 +23,7 @@ class DataFeeder(DataProviderConverter):
"""
DataFeeder converts the data returned by paddle.reader into a data structure
of Arguments which is defined in the API. The paddle.reader usually returns
a list of mini-batch data entries. Each data entry in the list is one sampe.
a list of mini-batch data entries. Each data entry in the list is one samp
l
e.
Each sample is a list or a tuple with one feature or multiple features.
DataFeeder converts this mini-batch data entries into Arguments in order
to feed it to C++ interface.
...
...
python/paddle/v2/data_type.py
浏览文件 @
ee6ce4db
...
...
@@ -13,10 +13,10 @@
# limitations under the License.
from
paddle.trainer.PyDataProvider2
import
\
InputType
,
dense_vector
,
sparse_binary_vector
,
\
InputType
,
DataType
,
dense_vector
,
sparse_binary_vector
,
\
sparse_vector
,
integer_value
,
integer_value_sequence
__all__
=
[
'InputType'
,
'
dense_vector'
,
'sparse_binary_vector'
,
'sparse
_vector'
,
'integer_value'
,
'integer_value_sequence'
'InputType'
,
'
DataType'
,
'dense_vector'
,
'sparse_binary
_vector'
,
'
sparse_vector'
,
'
integer_value'
,
'integer_value_sequence'
]
python/paddle/v2/dataset/cifar.py
浏览文件 @
ee6ce4db
"""
CIFAR Dataset.
URL: https://www.cs.toronto.edu/~kriz/cifar.html
the default train_creator, test_creator used for CIFAR-10 dataset.
CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
"""
import
cPickle
import
itertools
import
tarfile
import
numpy
import
paddle.v2.dataset.common
import
tarfile
from
common
import
download
__all__
=
[
'cifar_100_train_creator'
,
'cifar_100_test_creator'
,
'train_creator'
,
'test_creator'
]
__all__
=
[
'train100'
,
'test100'
,
'train10'
,
'test10'
]
CIFAR10_URL
=
'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
URL_PREFIX
=
'https://www.cs.toronto.edu/~kriz/'
CIFAR10_URL
=
URL_PREFIX
+
'cifar-10-python.tar.gz'
CIFAR10_MD5
=
'c58f30108f718f92721af3b95e74349a'
CIFAR100_URL
=
'https://www.cs.toronto.edu/~kriz/
cifar-100-python.tar.gz'
CIFAR100_URL
=
URL_PREFIX
+
'
cifar-100-python.tar.gz'
CIFAR100_MD5
=
'eb9058c3a382ffc7106e4002c42a8d85'
def
__read_batch__
(
filename
,
sub_name
):
def
reader
():
def
__read_one_batch_impl__
(
batch
):
def
reader_creator
(
filename
,
sub_name
):
def
read_batch
(
batch
):
data
=
batch
[
'data'
]
labels
=
batch
.
get
(
'labels'
,
batch
.
get
(
'fine_labels'
,
None
))
assert
labels
is
not
None
for
sample
,
label
in
itertools
.
izip
(
data
,
labels
):
yield
(
sample
/
255.0
).
astype
(
numpy
.
float32
),
int
(
label
)
def
reader
():
with
tarfile
.
open
(
filename
,
mode
=
'r'
)
as
f
:
names
=
(
each_item
.
name
for
each_item
in
f
if
sub_name
in
each_item
.
name
)
for
name
in
names
:
batch
=
cPickle
.
load
(
f
.
extractfile
(
name
))
for
item
in
__read_one_batch_impl__
(
batch
):
for
item
in
read_batch
(
batch
):
yield
item
return
reader
def
cifar_100_train_creator
():
fn
=
download
(
url
=
CIFAR100_URL
,
md5
=
CIFAR100_MD5
)
return
__read_batch__
(
fn
,
'train'
)
def
cifar_100_test_creator
():
fn
=
download
(
url
=
CIFAR100_URL
,
md5
=
CIFAR100_MD5
)
return
__read_batch__
(
fn
,
'test'
)
def
train_creator
():
"""
Default train reader creator. Use CIFAR-10 dataset.
"""
fn
=
download
(
url
=
CIFAR10_URL
,
md5
=
CIFAR10_MD5
)
return
__read_batch__
(
fn
,
'data_batch'
)
def
train100
():
return
reader_creator
(
paddle
.
v2
.
dataset
.
common
.
download
(
CIFAR100_URL
,
'cifar'
,
CIFAR100_MD5
),
'train'
)
def
test_creator
():
"""
Default test reader creator. Use CIFAR-10 dataset.
"""
fn
=
download
(
url
=
CIFAR10_URL
,
md5
=
CIFAR10_MD5
)
return
__read_batch__
(
fn
,
'test_batch'
)
def
test100
():
return
reader_creator
(
paddle
.
v2
.
dataset
.
common
.
download
(
CIFAR100_URL
,
'cifar'
,
CIFAR100_MD5
),
'test'
)
def
unittest
():
for
_
in
train_creator
()():
pass
for
_
in
test_creator
()():
pass
def
train10
():
return
reader_creator
(
paddle
.
v2
.
dataset
.
common
.
download
(
CIFAR10_URL
,
'cifar'
,
CIFAR10_MD5
),
'data_batch'
)
if
__name__
==
'__main__'
:
unittest
()
def
test10
():
return
reader_creator
(
paddle
.
v2
.
dataset
.
common
.
download
(
CIFAR10_URL
,
'cifar'
,
CIFAR10_MD5
),
'test_batch'
)
python/paddle/v2/dataset/common.py
浏览文件 @
ee6ce4db
...
...
@@ -27,7 +27,6 @@ def download(url, module_name, md5sum):
filename
=
os
.
path
.
join
(
dirname
,
url
.
split
(
'/'
)[
-
1
])
if
not
(
os
.
path
.
exists
(
filename
)
and
md5file
(
filename
)
==
md5sum
):
# If file doesn't exist or MD5 doesn't match, then download.
r
=
requests
.
get
(
url
,
stream
=
True
)
with
open
(
filename
,
'w'
)
as
f
:
shutil
.
copyfileobj
(
r
.
raw
,
f
)
...
...
python/paddle/v2/dataset/mnist.py
浏览文件 @
ee6ce4db
"""
MNIST dataset.
"""
import
paddle.v2.dataset.common
import
subprocess
import
numpy
import
platform
__all__
=
[
'train'
,
'test'
]
URL_PREFIX
=
'http://yann.lecun.com/exdb/mnist/'
TEST_IMAGE_URL
=
URL_PREFIX
+
't10k-images-idx3-ubyte.gz'
TEST_IMAGE_MD5
=
'
25e3cc63507ef6e98d5dc541e8672bb6
'
TEST_IMAGE_MD5
=
'
9fb629c4189551a2d022fa330f9573f3
'
TEST_LABEL_URL
=
URL_PREFIX
+
't10k-labels-idx1-ubyte.gz'
TEST_LABEL_MD5
=
'
4e9511fe019b2189026bd0421ba7b688
'
TEST_LABEL_MD5
=
'
ec29112dd5afa0611ce80d1b7f02629c
'
TRAIN_IMAGE_URL
=
URL_PREFIX
+
'train-images-idx3-ubyte.gz'
TRAIN_IMAGE_MD5
=
'f68b3c2dcbeaaa9fbdd348bbdeb94873'
TRAIN_LABEL_URL
=
URL_PREFIX
+
'train-labels-idx1-ubyte.gz'
...
...
@@ -18,12 +20,19 @@ TRAIN_LABEL_MD5 = 'd53e105ee54ea40749a09fcbcd1e9432'
def
reader_creator
(
image_filename
,
label_filename
,
buffer_size
):
def
reader
():
if
platform
.
system
()
==
'Darwin'
:
zcat_cmd
=
'gzcat'
elif
platform
.
system
()
==
'Linux'
:
zcat_cmd
=
'zcat'
else
:
raise
NotImplementedError
()
# According to http://stackoverflow.com/a/38061619/724872, we
# cannot use standard package gzip here.
m
=
subprocess
.
Popen
([
"zcat"
,
image_filename
],
stdout
=
subprocess
.
PIPE
)
m
=
subprocess
.
Popen
([
zcat_cmd
,
image_filename
],
stdout
=
subprocess
.
PIPE
)
m
.
stdout
.
read
(
16
)
# skip some magic bytes
l
=
subprocess
.
Popen
([
"zcat"
,
label_filename
],
stdout
=
subprocess
.
PIPE
)
l
=
subprocess
.
Popen
([
zcat_cmd
,
label_filename
],
stdout
=
subprocess
.
PIPE
)
l
.
stdout
.
read
(
8
)
# skip some magic bytes
while
True
:
...
...
@@ -40,12 +49,12 @@ def reader_creator(image_filename, label_filename, buffer_size):
images
=
images
/
255.0
*
2.0
-
1.0
for
i
in
xrange
(
buffer_size
):
yield
images
[
i
,
:],
labels
[
i
]
yield
images
[
i
,
:],
int
(
labels
[
i
])
m
.
terminate
()
l
.
terminate
()
return
reader
()
return
reader
def
train
():
...
...
python/paddle/v2/dataset/tests/cifar_test.py
0 → 100644
浏览文件 @
ee6ce4db
import
paddle.v2.dataset.cifar
import
unittest
class
TestCIFAR
(
unittest
.
TestCase
):
def
check_reader
(
self
,
reader
):
sum
=
0
label
=
0
for
l
in
reader
():
self
.
assertEqual
(
l
[
0
].
size
,
3072
)
if
l
[
1
]
>
label
:
label
=
l
[
1
]
sum
+=
1
return
sum
,
label
def
test_test10
(
self
):
instances
,
max_label_value
=
self
.
check_reader
(
paddle
.
v2
.
dataset
.
cifar
.
test10
())
self
.
assertEqual
(
instances
,
10000
)
self
.
assertEqual
(
max_label_value
,
9
)
def
test_train10
(
self
):
instances
,
max_label_value
=
self
.
check_reader
(
paddle
.
v2
.
dataset
.
cifar
.
train10
())
self
.
assertEqual
(
instances
,
50000
)
self
.
assertEqual
(
max_label_value
,
9
)
def
test_test100
(
self
):
instances
,
max_label_value
=
self
.
check_reader
(
paddle
.
v2
.
dataset
.
cifar
.
test100
())
self
.
assertEqual
(
instances
,
10000
)
self
.
assertEqual
(
max_label_value
,
99
)
def
test_train100
(
self
):
instances
,
max_label_value
=
self
.
check_reader
(
paddle
.
v2
.
dataset
.
cifar
.
train100
())
self
.
assertEqual
(
instances
,
50000
)
self
.
assertEqual
(
max_label_value
,
99
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/dataset/tests/mnist_test.py
浏览文件 @
ee6ce4db
...
...
@@ -5,21 +5,25 @@ import unittest
class
TestMNIST
(
unittest
.
TestCase
):
def
check_reader
(
self
,
reader
):
sum
=
0
for
l
in
reader
:
label
=
0
for
l
in
reader
():
self
.
assertEqual
(
l
[
0
].
size
,
784
)
self
.
assertEqual
(
l
[
1
].
size
,
1
)
self
.
assertLess
(
l
[
1
],
10
)
self
.
assertGreaterEqual
(
l
[
1
],
0
)
if
l
[
1
]
>
label
:
label
=
l
[
1
]
sum
+=
1
return
sum
return
sum
,
label
def
test_train
(
self
):
self
.
assertEqual
(
self
.
check_reader
(
paddle
.
v2
.
dataset
.
mnist
.
train
()),
60000
)
instances
,
max_label_value
=
self
.
check_reader
(
paddle
.
v2
.
dataset
.
mnist
.
train
())
self
.
assertEqual
(
instances
,
60000
)
self
.
assertEqual
(
max_label_value
,
9
)
def
test_test
(
self
):
self
.
assertEqual
(
self
.
check_reader
(
paddle
.
v2
.
dataset
.
mnist
.
test
()),
10000
)
instances
,
max_label_value
=
self
.
check_reader
(
paddle
.
v2
.
dataset
.
mnist
.
test
())
self
.
assertEqual
(
instances
,
10000
)
self
.
assertEqual
(
max_label_value
,
9
)
if
__name__
==
'__main__'
:
...
...
python/paddle/v2/layer.py
浏览文件 @
ee6ce4db
...
...
@@ -284,6 +284,7 @@ def mixed(size=0,
return
MixedLayerV2
(
size
,
input
,
name
,
act
,
bias_attr
,
layer_attr
)
LayerV2
=
Layer
data
=
DataLayerV2
AggregateLevel
=
conf_helps
.
layers
.
AggregateLevel
ExpandLevel
=
conf_helps
.
layers
.
ExpandLevel
...
...
python/paddle/v2/parameters.py
浏览文件 @
ee6ce4db
import
numpy
as
np
from
.
import
layer
as
v2_layer
import
py_paddle.swig_paddle
as
api
from
paddle.proto.ParameterConfig_pb2
import
ParameterConfig
from
topology
import
Topology
__all__
=
[
'Parameters'
,
'create'
]
def
create
(
*
layers
):
def
create
(
layers
):
"""
Create parameter pool by layers. In paddle, layer can be represent a
model config.
Create parameter pool by topology.
:param layers:
:return:
"""
for
layer
in
layers
:
if
not
isinstance
(
layer
,
v2_layer
.
Layer
):
raise
ValueError
(
'create must pass a topologies which type is paddle.layer.Layer'
)
model_config
=
v2_layer
.
parse_network
(
*
layers
)
topology
=
Topology
(
layers
)
pool
=
Parameters
()
for
param
in
model_config
.
parameters
:
for
param
in
topology
.
proto
()
.
parameters
:
pool
.
__append_config__
(
param
)
return
pool
...
...
python/paddle/v2/reader/decorator.py
浏览文件 @
ee6ce4db
...
...
@@ -201,7 +201,7 @@ def batched(reader, batch_size):
:return: the batched reader.
"""
def
__impl__
():
def
batched_reader
():
r
=
reader
()
batch
=
[]
for
instance
in
r
:
...
...
@@ -212,4 +212,4 @@ def batched(reader, batch_size):
if
batch
:
yield
batch
return
__impl__
return
batched_reader
python/paddle/v2/tests/CMakeLists.txt
浏览文件 @
ee6ce4db
...
...
@@ -2,5 +2,11 @@ add_test(NAME test_v2_layer
COMMAND
${
PROJ_ROOT
}
/paddle/.set_python_path.sh -d
${
PROJ_ROOT
}
/python/
${
PYTHON_EXECUTABLE
}
${
PROJ_ROOT
}
/python/paddle/v2/tests/test_layer.py
WORKING_DIRECTORY
${
PROJ_ROOT
}
/python/paddle
)
add_test
(
NAME test_v2_api
COMMAND bash
${
PROJ_ROOT
}
/python/paddle/v2/tests/run_tests.sh
${
PYTHON_EXECUTABLE
}
)
add_test
(
NAME topology_test
COMMAND
${
PROJ_ROOT
}
/paddle/.set_python_path.sh -d
${
PROJ_ROOT
}
/python/
${
PYTHON_EXECUTABLE
}
${
PROJ_ROOT
}
/python/paddle/v2/tests/test_topology.py
WORKING_DIRECTORY
${
PROJ_ROOT
}
/python/paddle
)
python/paddle/v2/tests/test_topology.py
0 → 100644
浏览文件 @
ee6ce4db
# Copyright PaddlePaddle contributors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
paddle.v2.layer
as
layer
import
paddle.v2.topology
as
topology
import
paddle.v2.data_type
as
data_type
import
paddle.trainer_config_helpers
as
conf_helps
class
TestTopology
(
unittest
.
TestCase
):
def
test_data_type
(
self
):
pixel
=
layer
.
data
(
name
=
'pixel'
,
type
=
data_type
.
dense_vector
(
784
))
label
=
layer
.
data
(
name
=
'label'
,
type
=
data_type
.
integer_value
(
10
))
hidden
=
layer
.
fc
(
input
=
pixel
,
size
=
100
,
act
=
conf_helps
.
SigmoidActivation
())
inference
=
layer
.
fc
(
input
=
hidden
,
size
=
10
,
act
=
conf_helps
.
SoftmaxActivation
())
cost
=
layer
.
classification_cost
(
input
=
inference
,
label
=
label
)
topo
=
topology
.
Topology
(
cost
)
data_types
=
topo
.
data_type
()
self
.
assertEqual
(
len
(
data_types
),
2
)
pixel_data_type
=
filter
(
lambda
type
:
type
[
0
]
==
"pixel"
,
data_types
)
self
.
assertEqual
(
len
(
pixel_data_type
),
1
)
pixel_data_type
=
pixel_data_type
[
0
]
self
.
assertEqual
(
pixel_data_type
[
1
].
type
,
data_type
.
DataType
.
Dense
)
self
.
assertEqual
(
pixel_data_type
[
1
].
dim
,
784
)
label_data_type
=
filter
(
lambda
type
:
type
[
0
]
==
"label"
,
data_types
)
self
.
assertEqual
(
len
(
label_data_type
),
1
)
label_data_type
=
label_data_type
[
0
]
self
.
assertEqual
(
label_data_type
[
1
].
type
,
data_type
.
DataType
.
Index
)
self
.
assertEqual
(
label_data_type
[
1
].
dim
,
10
)
def
test_get_layer
(
self
):
pixel
=
layer
.
data
(
name
=
'pixel'
,
type
=
data_type
.
dense_vector
(
784
))
label
=
layer
.
data
(
name
=
'label'
,
type
=
data_type
.
integer_value
(
10
))
hidden
=
layer
.
fc
(
input
=
pixel
,
size
=
100
,
act
=
conf_helps
.
SigmoidActivation
())
inference
=
layer
.
fc
(
input
=
hidden
,
size
=
10
,
act
=
conf_helps
.
SoftmaxActivation
())
cost
=
layer
.
classification_cost
(
input
=
inference
,
label
=
label
)
topo
=
topology
.
Topology
(
cost
)
pixel_layer
=
topo
.
get_layer
(
"pixel"
)
label_layer
=
topo
.
get_layer
(
"label"
)
self
.
assertEqual
(
pixel_layer
,
pixel
)
self
.
assertEqual
(
label_layer
,
label
)
def
test_parse
(
self
):
pixel
=
layer
.
data
(
name
=
'pixel'
,
type
=
data_type
.
dense_vector
(
784
))
label
=
layer
.
data
(
name
=
'label'
,
type
=
data_type
.
integer_value
(
10
))
hidden
=
layer
.
fc
(
input
=
pixel
,
size
=
100
,
act
=
conf_helps
.
SigmoidActivation
())
inference
=
layer
.
fc
(
input
=
hidden
,
size
=
10
,
act
=
conf_helps
.
SoftmaxActivation
())
maxid
=
layer
.
max_id
(
input
=
inference
)
cost1
=
layer
.
classification_cost
(
input
=
inference
,
label
=
label
)
cost2
=
layer
.
cross_entropy_cost
(
input
=
inference
,
label
=
label
)
topology
.
Topology
(
cost2
).
proto
()
topology
.
Topology
([
cost1
]).
proto
()
topology
.
Topology
([
cost1
,
cost2
]).
proto
()
topology
.
Topology
([
inference
,
maxid
]).
proto
()
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/topology.py
0 → 100644
浏览文件 @
ee6ce4db
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
collections
from
paddle.proto.ModelConfig_pb2
import
ModelConfig
import
layer
as
v2_layer
__all__
=
[
'Topology'
]
def
__bfs_travel__
(
callback
,
*
layers
):
for
each_layer
in
layers
:
__break__
=
callback
(
each_layer
)
if
__break__
:
return
__bfs_travel__
(
callback
,
*
each_layer
.
__parent_layers__
.
values
())
class
Topology
(
object
):
"""
Topology is used to store the information about all layers
and network configs.
"""
def
__init__
(
self
,
layers
):
if
not
isinstance
(
layers
,
collections
.
Sequence
):
__check_layer_type__
(
layers
)
layers
=
[
layers
]
for
layer
in
layers
:
__check_layer_type__
(
layer
)
self
.
layers
=
layers
self
.
__model_config__
=
v2_layer
.
parse_network
(
*
layers
)
assert
isinstance
(
self
.
__model_config__
,
ModelConfig
)
def
proto
(
self
):
return
self
.
__model_config__
def
get_layer
(
self
,
name
):
"""
get v2.Layer Class instance by layer name
:param name:
:return:
"""
result_layer
=
[
None
]
def
__impl__
(
l
):
if
l
.
name
==
name
:
result_layer
[
0
]
=
l
return
True
# break
return
False
__bfs_travel__
(
__impl__
,
*
self
.
layers
)
if
result_layer
[
0
]
is
None
:
raise
ValueError
(
"No such layer %s"
%
name
)
return
result_layer
[
0
]
def
data_layers
(
self
):
"""
get all data layer
:return:
"""
data_layers
=
dict
()
def
__impl__
(
l
):
if
isinstance
(
l
,
v2_layer
.
DataLayerV2
):
data_layers
[
l
.
name
]
=
l
__bfs_travel__
(
__impl__
,
*
self
.
layers
)
return
data_layers
def
data_type
(
self
):
"""
get data_type from proto, such as:
[('image', dense_vector(768)), ('label', integer_value(10))]
"""
data_layers
=
self
.
data_layers
()
return
[(
nm
,
data_layers
[
nm
].
type
)
for
nm
in
self
.
proto
().
input_layer_names
]
def
__check_layer_type__
(
layer
):
if
not
isinstance
(
layer
,
v2_layer
.
LayerV2
):
raise
ValueError
(
'layer should have type paddle.layer.Layer'
)
python/paddle/v2/trainer.py
浏览文件 @
ee6ce4db
import
collections
import
py_paddle.swig_paddle
as
api
from
paddle.proto.ModelConfig_pb2
import
ModelConfig
from
data_feeder
import
DataFeeder
from
data_feeder
import
DataFeeder
from
topology
import
Topology
from
.
import
event
as
v2_event
from
.
import
layer
as
v2_layer
from
.
import
optimizer
as
v2_optimizer
from
.
import
parameters
as
v2_parameters
...
...
@@ -23,13 +22,6 @@ def default_event_handler(event):
pass
def
__bfs_travel_topology__
(
callback
,
*
topologies
):
for
each_layer
in
topologies
:
callback
(
each_layer
)
__bfs_travel_topology__
(
callback
,
*
each_layer
.
__parent_layers__
.
values
())
class
ITrainer
(
object
):
"""
The interface of Trainer. The only exposed method is `train`.
...
...
@@ -50,40 +42,26 @@ class ITrainer(object):
class
SGD
(
ITrainer
):
def
__init__
(
self
,
topology
,
parameters
,
update_equation
):
def
__init__
(
self
,
cost
,
parameters
,
update_equation
):
"""
Simple SGD Trainer.
:param update_equation: The optimizer object.
:type update_equation: v2_optimizer.Optimizer
"""
if
not
isinstance
(
parameters
,
v2_parameters
.
Parameters
):
raise
TypeError
(
'parameters should be parameters'
)
if
not
isinstance
(
update_equation
,
v2_optimizer
.
Optimizer
):
raise
TypeError
(
"update equation parameter must be "
"paddle.v2.optimizer.Optimizer"
)
topology
=
Topology
(
cost
)
self
.
__optimizer__
=
update_equation
self
.
__topology__
=
topology
self
.
__parameters__
=
parameters
self
.
__topology_in_proto__
=
v2_layer
.
parse_network
(
topology
)
data_types
=
dict
()
def
__travel__
(
l
):
if
hasattr
(
l
,
'type'
):
data_types
[
l
.
name
]
=
l
.
type
if
not
isinstance
(
topology
,
collections
.
Sequence
):
topology
=
[
topology
]
__bfs_travel_topology__
(
__travel__
,
*
topology
)
self
.
__data_types__
=
[
(
iname
,
data_types
[
iname
])
for
iname
in
self
.
__topology_in_proto__
.
input_layer_names
]
if
not
isinstance
(
self
.
__topology_in_proto__
,
ModelConfig
):
raise
TypeError
(
'topology should be a model config'
)
self
.
__topology_in_proto__
=
topology
.
proto
()
self
.
__data_types__
=
topology
.
data_type
()
gm
=
api
.
GradientMachine
.
createFromConfigProto
(
self
.
__topology_in_proto__
,
api
.
CREATE_MODE_NORMAL
,
self
.
__optimizer__
.
enable_types
())
...
...
@@ -103,7 +81,6 @@ class SGD(ITrainer):
:param event_handler: Event handler. A method will be invoked when event
occurred.
:type event_handler: (BaseEvent) => None
:param data_types: Not important, will be removed after data refactor.
:return:
"""
if
event_handler
is
None
:
...
...
@@ -113,6 +90,7 @@ class SGD(ITrainer):
reader_dict
=
self
.
default_reader_dict
()
__check_train_args__
(
**
locals
())
updater
=
self
.
__optimizer__
.
create_local_updater
()
updater
.
init
(
self
.
__gradient_machine__
)
...
...
@@ -192,6 +170,5 @@ def __check_train_args__(reader, event_handler, **kwargs):
if
not
callable
(
reader
)
or
not
isinstance
(
reader
(),
collections
.
Iterator
):
raise
TypeError
(
'train_data_reader should be a function, '
'which can return a iterator'
)
if
not
callable
(
event_handler
):
raise
TypeError
(
'event handler should be a function'
)
python/setup.py.in
浏览文件 @
ee6ce4db
...
...
@@ -15,9 +15,5 @@ setup(name='paddle',
packages=packages,
package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}'
},
install_requires = [
'scikit-learn>=0.18.0',
'scipy>=0.18.0',
]
}
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录