Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
ce49124d
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ce49124d
编写于
2月 13, 2017
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Draft for new API
上级
d6292cca
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
344 addition
and
2 deletion
+344
-2
demo/mnist/api_train_v2.py
demo/mnist/api_train_v2.py
+61
-0
python/paddle/v2/__init__.py
python/paddle/v2/__init__.py
+12
-2
python/paddle/v2/parameters.py
python/paddle/v2/parameters.py
+111
-0
python/paddle/v2/trainer.py
python/paddle/v2/trainer.py
+160
-0
未找到文件。
demo/mnist/api_train_v2.py
0 → 100644
浏览文件 @
ce49124d
from
paddle.trainer_config_helpers
import
*
from
paddle.trainer.PyDataProvider2
import
dense_vector
,
integer_value
import
paddle.v2
as
paddle_v2
import
numpy
import
mnist_util
def
train_reader
():
train_file
=
'./data/raw_data/train'
generator
=
mnist_util
.
read_from_mnist
(
train_file
)
for
item
in
generator
:
yield
item
def
network_config
():
imgs
=
data_layer
(
name
=
'pixel'
,
size
=
784
)
hidden1
=
fc_layer
(
input
=
imgs
,
size
=
200
)
hidden2
=
fc_layer
(
input
=
hidden1
,
size
=
200
)
inference
=
fc_layer
(
input
=
hidden2
,
size
=
10
,
act
=
SoftmaxActivation
())
cost
=
classification_cost
(
input
=
inference
,
label
=
data_layer
(
name
=
'label'
,
size
=
10
))
outputs
(
cost
)
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle_v2
.
trainer
.
CompleteTrainOneBatch
):
print
"Pass %d, Batch %d, Cost %f"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
)
else
:
pass
def
main
():
paddle_v2
.
init
(
use_gpu
=
False
,
trainer_count
=
1
)
model_config
=
parse_network_config
(
network_config
)
pool
=
paddle_v2
.
parameters
.
create
(
model_config
)
for
param_name
in
pool
.
get_names
():
array
=
pool
.
get_parameter
(
param_name
)
array
[:]
=
numpy
.
random
.
uniform
(
low
=-
1.0
,
high
=
1.0
,
size
=
array
.
shape
)
trainer
=
paddle_v2
.
trainer
.
SGDTrainer
(
update_equation
=
paddle_v2
.
optimizer
.
Adam
(
learning_rate
=
1e-4
,
model_average
=
ModelAverage
(
average_window
=
0.5
),
regularization
=
L2Regularization
(
rate
=
0.5
)))
trainer
.
train
(
train_data_reader
=
train_reader
,
topology
=
model_config
,
parameters
=
pool
,
event_handler
=
event_handler
,
batch_size
=
32
,
# batch size should be refactor in Data reader
data_types
=
{
# data_types will be removed, It should be in
# network topology
'pixel'
:
dense_vector
(
784
),
'label'
:
integer_value
(
10
)
})
if
__name__
==
'__main__'
:
main
()
python/paddle/v2/__init__.py
浏览文件 @
ce49124d
...
...
@@ -11,7 +11,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
optimizer
import
parameters
import
py_paddle.swig_paddle
as
api
import
trainer
__all__
=
[
'optimizer'
,
'parameters'
,
'init'
,
'trainer'
]
def
init
(
**
kwargs
):
args
=
[]
for
key
in
kwargs
.
keys
():
args
.
append
(
'--%s=%s'
%
(
key
,
str
(
kwargs
[
key
])))
__all__
=
[
'optimizer'
]
api
.
initPaddle
(
*
args
)
python/paddle/v2/parameters.py
0 → 100644
浏览文件 @
ce49124d
import
numpy
as
np
from
paddle.proto.ModelConfig_pb2
import
ModelConfig
from
paddle.proto.ParameterConfig_pb2
import
ParameterConfig
__all__
=
[
'IParameterPool'
,
'create'
,
'ParameterFlag'
]
class
ParameterFlag
(
object
):
"""
The flag for IParameterPool.get_parameter. If writeable, operation on return
numpy array will also apply to Paddle parameter. But it will be slower in
GPU mode.
"""
READ_ONLY
=
0x01
WRITE_ONLY
=
0x02
READ_WRITE
=
READ_ONLY
|
WRITE_ONLY
class
IParameterPool
(
object
):
"""
Interface of Parameter Pool. The parameter pool is a dictionary of
parameters. User can modify parameter or customize parameter value
by `get_parameter`.
.. code-block:: python
pool = paddle.parameters.create(topo1, topo2)
embedding = pool.get_parameter("embedding")
assert isinstance(embedding, numpy.ndarray)
print embedding[1:]
"""
def
get_parameter
(
self
,
name
,
flag
=
ParameterFlag
.
READ_WRITE
):
"""
Get a parameter by name.
:param name: parameter name.
:type name: basestring
:param flag: the flag for return value. readable or writable.
:type flag: int
:return: The parameter value
:rtype: np.ndarray
"""
raise
NotImplementedError
()
def
get_names
(
self
):
"""
Get all parameter names
:return: all parameter names
:rtype: list
"""
raise
NotImplementedError
()
class
NumpyParameterPool
(
IParameterPool
):
def
__init__
(
self
):
self
.
__param_configs__
=
dict
()
self
.
__params__
=
dict
()
def
append
(
self
,
conf
):
if
not
isinstance
(
conf
,
ParameterConfig
):
raise
ValueError
(
"conf must be ParameterConfig"
)
if
not
conf
.
IsInitialized
():
raise
ValueError
(
"conf is not initialized"
)
self
.
__param_configs__
[
conf
.
name
]
=
conf
self
.
__params__
[
conf
.
name
]
=
None
def
get_config
(
self
,
name
):
if
name
not
in
self
.
__param_configs__
:
raise
ValueError
(
"parameter %s is not appended"
%
name
)
return
self
.
__param_configs__
[
name
]
def
get_parameter
(
self
,
name
,
*
args
,
**
kwargs
):
if
name
not
in
self
.
__params__
:
raise
ValueError
(
"parameter %s is not appended"
%
name
)
param
=
self
.
__params__
[
name
]
if
param
is
None
:
shape
=
self
.
__param_configs__
[
name
].
dims
if
len
(
shape
)
==
0
:
raise
ValueError
(
"parameter %s is no shape"
%
name
)
param
=
np
.
ndarray
(
shape
=
[
int
(
item
)
for
item
in
shape
],
dtype
=
'float32'
)
self
.
__params__
[
name
]
=
param
return
param
def
get_names
(
self
):
return
self
.
__param_configs__
.
keys
()
def
create
(
*
topologies
):
"""
Create parameter pool by topologies.
:param topologies:
:return:
"""
pool
=
NumpyParameterPool
()
for
topo
in
topologies
:
if
not
isinstance
(
topo
,
ModelConfig
):
raise
ValueError
(
'create must pass a topologies which type is ModelConfig'
)
for
param
in
topo
.
parameters
:
pool
.
append
(
param
)
return
pool
python/paddle/v2/trainer.py
0 → 100644
浏览文件 @
ce49124d
import
collections
from
paddle.proto.ModelConfig_pb2
import
ModelConfig
import
paddle.v2.parameters
import
paddle.v2.optimizer
import
py_paddle.swig_paddle
as
api
from
py_paddle
import
DataProviderConverter
__all__
=
[
'ITrainer'
,
'SGDTrainer'
,
'CompleteTrainOneBatch'
,
'BaseEvent'
]
class
BaseEvent
(
object
):
"""
Just a marker class
"""
pass
class
CompleteTrainOneBatch
(
BaseEvent
):
def
__init__
(
self
,
pass_id
,
batch_id
,
cost
):
self
.
pass_id
=
pass_id
self
.
batch_id
=
batch_id
self
.
cost
=
cost
def
default_event_handler
(
event
):
pass
class
ITrainer
(
object
):
def
train
(
self
,
train_data_reader
,
topology
,
parameters
,
test_data_reader
=
None
,
event_handler
=
None
):
raise
NotImplementedError
()
class
SGDTrainer
(
ITrainer
):
def
__init__
(
self
,
update_equation
):
if
not
isinstance
(
update_equation
,
paddle
.
v2
.
optimizer
.
Optimizer
):
raise
ValueError
()
self
.
__optimizer__
=
update_equation
def
train
(
self
,
train_data_reader
,
topology
,
parameters
,
num_passes
=
1
,
test_data_reader
=
None
,
event_handler
=
None
,
batch_size
=
32
,
data_types
=
None
):
if
event_handler
is
None
:
event_handler
=
default_event_handler
__check_train_args__
(
**
locals
())
gm
=
api
.
GradientMachine
.
createFromConfigProto
(
topology
,
api
.
CREATE_MODE_NORMAL
,
self
.
__optimizer__
.
enable_types
())
assert
isinstance
(
gm
,
api
.
GradientMachine
)
__copy_parameter_from_pool__
(
gm
,
parameters
)
updater
=
self
.
__optimizer__
.
create_local_updater
()
assert
isinstance
(
updater
,
api
.
ParameterUpdater
)
updater
.
init
(
gm
)
data_types_lists
=
[]
for
each
in
topology
.
input_layer_names
:
if
each
not
in
data_types
:
raise
ValueError
()
data_types_lists
.
append
(
data_types
[
each
])
converter
=
DataProviderConverter
(
input_types
=
data_types_lists
)
def
input_reorder
(
func
):
for
item
in
func
():
retv
=
[]
for
__layer_name__
in
topology
.
input_layer_names
:
retv
.
append
(
item
[
__layer_name__
])
yield
retv
gm
.
start
()
out_args
=
api
.
Arguments
.
createArguments
(
0
)
for
pass_id
in
xrange
(
num_passes
):
updater
.
startPass
()
for
batch_id
,
data_batch
in
enumerate
(
__generator_to_batch__
(
input_reorder
(
train_data_reader
),
batch_size
=
batch_size
)):
pass_type
=
updater
.
startBatch
(
len
(
data_batch
))
gm
.
forwardBackward
(
converter
(
data_batch
),
out_args
,
pass_type
)
for
each_param
in
gm
.
getParameters
():
updater
.
update
(
each_param
)
# Get cost. We use numpy to calculate total cost for this batch.
cost_vec
=
out_args
.
getSlotValue
(
0
)
cost_vec
=
cost_vec
.
copyToNumpyMat
()
cost
=
cost_vec
.
sum
()
/
len
(
data_batch
)
updater
.
finishBatch
(
cost
)
event_handler
(
CompleteTrainOneBatch
(
pass_id
=
pass_id
,
batch_id
=
batch_id
,
cost
=
cost
))
updater
.
finishPass
()
gm
.
finish
()
def
__generator_to_batch__
(
generator
,
batch_size
):
ret_val
=
list
()
for
each_item
in
generator
:
ret_val
.
append
(
each_item
)
if
len
(
ret_val
)
==
batch_size
:
yield
ret_val
ret_val
=
list
()
if
len
(
ret_val
)
!=
0
:
yield
ret_val
def
__copy_parameter_from_pool__
(
gm
,
pool
):
"""
:param gm:
:type gm: api.GradientMachine
:param pool:
:type pool: paddle.v2.parameters.IParameterPool
:return:
"""
assert
isinstance
(
pool
,
paddle
.
v2
.
parameters
.
IParameterPool
)
for
each_param
in
gm
.
getParameters
():
name
=
each_param
.
getName
()
param
=
pool
.
get_parameter
(
name
,
paddle
.
v2
.
parameters
.
ParameterFlag
.
READ_ONLY
)
each_param
.
getBuf
(
api
.
PARAMETER_VALUE
).
copyFromNumpyArray
(
param
.
flatten
(
).
astype
(
'float32'
))
def
__check_train_args__
(
train_data_reader
,
topology
,
parameters
,
test_data_reader
,
event_handler
,
**
kwargs
):
if
not
callable
(
train_data_reader
)
or
not
isinstance
(
train_data_reader
(),
collections
.
Iterator
):
raise
ValueError
(
'train_data_reader should be a function, '
'which can return a iterator'
)
if
test_data_reader
is
not
None
:
if
not
callable
(
test_data_reader
)
or
not
isinstance
(
test_data_reader
(),
collections
.
Iterator
):
raise
ValueError
(
'test_data_reader should be a function, which can '
'return a iterator'
)
if
not
isinstance
(
topology
,
ModelConfig
):
raise
ValueError
(
'topology should be a model config'
)
if
not
isinstance
(
parameters
,
paddle
.
v2
.
parameters
.
IParameterPool
):
raise
ValueError
(
'parameters should be a parameter pool'
)
if
not
callable
(
event_handler
):
raise
ValueError
(
'event handler should be a function'
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录