Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
ce49124d
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ce49124d
编写于
2月 13, 2017
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Draft for new API
上级
d6292cca
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
344 addition
and
2 deletion
+344
-2
demo/mnist/api_train_v2.py
demo/mnist/api_train_v2.py
+61
-0
python/paddle/v2/__init__.py
python/paddle/v2/__init__.py
+12
-2
python/paddle/v2/parameters.py
python/paddle/v2/parameters.py
+111
-0
python/paddle/v2/trainer.py
python/paddle/v2/trainer.py
+160
-0
未找到文件。
demo/mnist/api_train_v2.py
0 → 100644
浏览文件 @
ce49124d
from
paddle.trainer_config_helpers
import
*
from
paddle.trainer.PyDataProvider2
import
dense_vector
,
integer_value
import
paddle.v2
as
paddle_v2
import
numpy
import
mnist_util
def
train_reader
():
train_file
=
'./data/raw_data/train'
generator
=
mnist_util
.
read_from_mnist
(
train_file
)
for
item
in
generator
:
yield
item
def
network_config
():
imgs
=
data_layer
(
name
=
'pixel'
,
size
=
784
)
hidden1
=
fc_layer
(
input
=
imgs
,
size
=
200
)
hidden2
=
fc_layer
(
input
=
hidden1
,
size
=
200
)
inference
=
fc_layer
(
input
=
hidden2
,
size
=
10
,
act
=
SoftmaxActivation
())
cost
=
classification_cost
(
input
=
inference
,
label
=
data_layer
(
name
=
'label'
,
size
=
10
))
outputs
(
cost
)
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle_v2
.
trainer
.
CompleteTrainOneBatch
):
print
"Pass %d, Batch %d, Cost %f"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
)
else
:
pass
def
main
():
paddle_v2
.
init
(
use_gpu
=
False
,
trainer_count
=
1
)
model_config
=
parse_network_config
(
network_config
)
pool
=
paddle_v2
.
parameters
.
create
(
model_config
)
for
param_name
in
pool
.
get_names
():
array
=
pool
.
get_parameter
(
param_name
)
array
[:]
=
numpy
.
random
.
uniform
(
low
=-
1.0
,
high
=
1.0
,
size
=
array
.
shape
)
trainer
=
paddle_v2
.
trainer
.
SGDTrainer
(
update_equation
=
paddle_v2
.
optimizer
.
Adam
(
learning_rate
=
1e-4
,
model_average
=
ModelAverage
(
average_window
=
0.5
),
regularization
=
L2Regularization
(
rate
=
0.5
)))
trainer
.
train
(
train_data_reader
=
train_reader
,
topology
=
model_config
,
parameters
=
pool
,
event_handler
=
event_handler
,
batch_size
=
32
,
# batch size should be refactor in Data reader
data_types
=
{
# data_types will be removed, It should be in
# network topology
'pixel'
:
dense_vector
(
784
),
'label'
:
integer_value
(
10
)
})
if
__name__
==
'__main__'
:
main
()
python/paddle/v2/__init__.py
浏览文件 @
ce49124d
...
...
@@ -11,7 +11,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
optimizer
import
parameters
import
py_paddle.swig_paddle
as
api
import
trainer
__all__
=
[
'optimizer'
,
'parameters'
,
'init'
,
'trainer'
]
def
init
(
**
kwargs
):
args
=
[]
for
key
in
kwargs
.
keys
():
args
.
append
(
'--%s=%s'
%
(
key
,
str
(
kwargs
[
key
])))
__all__
=
[
'optimizer'
]
api
.
initPaddle
(
*
args
)
python/paddle/v2/parameters.py
0 → 100644
浏览文件 @
ce49124d
import
numpy
as
np
from
paddle.proto.ModelConfig_pb2
import
ModelConfig
from
paddle.proto.ParameterConfig_pb2
import
ParameterConfig
__all__
=
[
'IParameterPool'
,
'create'
,
'ParameterFlag'
]
class
ParameterFlag
(
object
):
"""
The flag for IParameterPool.get_parameter. If writeable, operation on return
numpy array will also apply to Paddle parameter. But it will be slower in
GPU mode.
"""
READ_ONLY
=
0x01
WRITE_ONLY
=
0x02
READ_WRITE
=
READ_ONLY
|
WRITE_ONLY
class
IParameterPool
(
object
):
"""
Interface of Parameter Pool. The parameter pool is a dictionary of
parameters. User can modify parameter or customize parameter value
by `get_parameter`.
.. code-block:: python
pool = paddle.parameters.create(topo1, topo2)
embedding = pool.get_parameter("embedding")
assert isinstance(embedding, numpy.ndarray)
print embedding[1:]
"""
def
get_parameter
(
self
,
name
,
flag
=
ParameterFlag
.
READ_WRITE
):
"""
Get a parameter by name.
:param name: parameter name.
:type name: basestring
:param flag: the flag for return value. readable or writable.
:type flag: int
:return: The parameter value
:rtype: np.ndarray
"""
raise
NotImplementedError
()
def
get_names
(
self
):
"""
Get all parameter names
:return: all parameter names
:rtype: list
"""
raise
NotImplementedError
()
class
NumpyParameterPool
(
IParameterPool
):
def
__init__
(
self
):
self
.
__param_configs__
=
dict
()
self
.
__params__
=
dict
()
def
append
(
self
,
conf
):
if
not
isinstance
(
conf
,
ParameterConfig
):
raise
ValueError
(
"conf must be ParameterConfig"
)
if
not
conf
.
IsInitialized
():
raise
ValueError
(
"conf is not initialized"
)
self
.
__param_configs__
[
conf
.
name
]
=
conf
self
.
__params__
[
conf
.
name
]
=
None
def
get_config
(
self
,
name
):
if
name
not
in
self
.
__param_configs__
:
raise
ValueError
(
"parameter %s is not appended"
%
name
)
return
self
.
__param_configs__
[
name
]
def
get_parameter
(
self
,
name
,
*
args
,
**
kwargs
):
if
name
not
in
self
.
__params__
:
raise
ValueError
(
"parameter %s is not appended"
%
name
)
param
=
self
.
__params__
[
name
]
if
param
is
None
:
shape
=
self
.
__param_configs__
[
name
].
dims
if
len
(
shape
)
==
0
:
raise
ValueError
(
"parameter %s is no shape"
%
name
)
param
=
np
.
ndarray
(
shape
=
[
int
(
item
)
for
item
in
shape
],
dtype
=
'float32'
)
self
.
__params__
[
name
]
=
param
return
param
def
get_names
(
self
):
return
self
.
__param_configs__
.
keys
()
def
create
(
*
topologies
):
"""
Create parameter pool by topologies.
:param topologies:
:return:
"""
pool
=
NumpyParameterPool
()
for
topo
in
topologies
:
if
not
isinstance
(
topo
,
ModelConfig
):
raise
ValueError
(
'create must pass a topologies which type is ModelConfig'
)
for
param
in
topo
.
parameters
:
pool
.
append
(
param
)
return
pool
python/paddle/v2/trainer.py
0 → 100644
浏览文件 @
ce49124d
import
collections
from
paddle.proto.ModelConfig_pb2
import
ModelConfig
import
paddle.v2.parameters
import
paddle.v2.optimizer
import
py_paddle.swig_paddle
as
api
from
py_paddle
import
DataProviderConverter
__all__
=
[
'ITrainer'
,
'SGDTrainer'
,
'CompleteTrainOneBatch'
,
'BaseEvent'
]
class
BaseEvent
(
object
):
"""
Just a marker class
"""
pass
class
CompleteTrainOneBatch
(
BaseEvent
):
def
__init__
(
self
,
pass_id
,
batch_id
,
cost
):
self
.
pass_id
=
pass_id
self
.
batch_id
=
batch_id
self
.
cost
=
cost
def
default_event_handler
(
event
):
pass
class
ITrainer
(
object
):
def
train
(
self
,
train_data_reader
,
topology
,
parameters
,
test_data_reader
=
None
,
event_handler
=
None
):
raise
NotImplementedError
()
class
SGDTrainer
(
ITrainer
):
def
__init__
(
self
,
update_equation
):
if
not
isinstance
(
update_equation
,
paddle
.
v2
.
optimizer
.
Optimizer
):
raise
ValueError
()
self
.
__optimizer__
=
update_equation
def
train
(
self
,
train_data_reader
,
topology
,
parameters
,
num_passes
=
1
,
test_data_reader
=
None
,
event_handler
=
None
,
batch_size
=
32
,
data_types
=
None
):
if
event_handler
is
None
:
event_handler
=
default_event_handler
__check_train_args__
(
**
locals
())
gm
=
api
.
GradientMachine
.
createFromConfigProto
(
topology
,
api
.
CREATE_MODE_NORMAL
,
self
.
__optimizer__
.
enable_types
())
assert
isinstance
(
gm
,
api
.
GradientMachine
)
__copy_parameter_from_pool__
(
gm
,
parameters
)
updater
=
self
.
__optimizer__
.
create_local_updater
()
assert
isinstance
(
updater
,
api
.
ParameterUpdater
)
updater
.
init
(
gm
)
data_types_lists
=
[]
for
each
in
topology
.
input_layer_names
:
if
each
not
in
data_types
:
raise
ValueError
()
data_types_lists
.
append
(
data_types
[
each
])
converter
=
DataProviderConverter
(
input_types
=
data_types_lists
)
def
input_reorder
(
func
):
for
item
in
func
():
retv
=
[]
for
__layer_name__
in
topology
.
input_layer_names
:
retv
.
append
(
item
[
__layer_name__
])
yield
retv
gm
.
start
()
out_args
=
api
.
Arguments
.
createArguments
(
0
)
for
pass_id
in
xrange
(
num_passes
):
updater
.
startPass
()
for
batch_id
,
data_batch
in
enumerate
(
__generator_to_batch__
(
input_reorder
(
train_data_reader
),
batch_size
=
batch_size
)):
pass_type
=
updater
.
startBatch
(
len
(
data_batch
))
gm
.
forwardBackward
(
converter
(
data_batch
),
out_args
,
pass_type
)
for
each_param
in
gm
.
getParameters
():
updater
.
update
(
each_param
)
# Get cost. We use numpy to calculate total cost for this batch.
cost_vec
=
out_args
.
getSlotValue
(
0
)
cost_vec
=
cost_vec
.
copyToNumpyMat
()
cost
=
cost_vec
.
sum
()
/
len
(
data_batch
)
updater
.
finishBatch
(
cost
)
event_handler
(
CompleteTrainOneBatch
(
pass_id
=
pass_id
,
batch_id
=
batch_id
,
cost
=
cost
))
updater
.
finishPass
()
gm
.
finish
()
def
__generator_to_batch__
(
generator
,
batch_size
):
ret_val
=
list
()
for
each_item
in
generator
:
ret_val
.
append
(
each_item
)
if
len
(
ret_val
)
==
batch_size
:
yield
ret_val
ret_val
=
list
()
if
len
(
ret_val
)
!=
0
:
yield
ret_val
def
__copy_parameter_from_pool__
(
gm
,
pool
):
"""
:param gm:
:type gm: api.GradientMachine
:param pool:
:type pool: paddle.v2.parameters.IParameterPool
:return:
"""
assert
isinstance
(
pool
,
paddle
.
v2
.
parameters
.
IParameterPool
)
for
each_param
in
gm
.
getParameters
():
name
=
each_param
.
getName
()
param
=
pool
.
get_parameter
(
name
,
paddle
.
v2
.
parameters
.
ParameterFlag
.
READ_ONLY
)
each_param
.
getBuf
(
api
.
PARAMETER_VALUE
).
copyFromNumpyArray
(
param
.
flatten
(
).
astype
(
'float32'
))
def
__check_train_args__
(
train_data_reader
,
topology
,
parameters
,
test_data_reader
,
event_handler
,
**
kwargs
):
if
not
callable
(
train_data_reader
)
or
not
isinstance
(
train_data_reader
(),
collections
.
Iterator
):
raise
ValueError
(
'train_data_reader should be a function, '
'which can return a iterator'
)
if
test_data_reader
is
not
None
:
if
not
callable
(
test_data_reader
)
or
not
isinstance
(
test_data_reader
(),
collections
.
Iterator
):
raise
ValueError
(
'test_data_reader should be a function, which can '
'return a iterator'
)
if
not
isinstance
(
topology
,
ModelConfig
):
raise
ValueError
(
'topology should be a model config'
)
if
not
isinstance
(
parameters
,
paddle
.
v2
.
parameters
.
IParameterPool
):
raise
ValueError
(
'parameters should be a parameter pool'
)
if
not
callable
(
event_handler
):
raise
ValueError
(
'event handler should be a function'
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录