提交 ce49124d 编写于 作者: Y Yu Yang

Draft for new API

上级 d6292cca
from paddle.trainer_config_helpers import *
from paddle.trainer.PyDataProvider2 import dense_vector, integer_value
import paddle.v2 as paddle_v2
import numpy
import mnist_util
def train_reader():
train_file = './data/raw_data/train'
generator = mnist_util.read_from_mnist(train_file)
for item in generator:
yield item
def network_config():
imgs = data_layer(name='pixel', size=784)
hidden1 = fc_layer(input=imgs, size=200)
hidden2 = fc_layer(input=hidden1, size=200)
inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
cost = classification_cost(
input=inference, label=data_layer(
name='label', size=10))
outputs(cost)
def event_handler(event):
if isinstance(event, paddle_v2.trainer.CompleteTrainOneBatch):
print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id,
event.cost)
else:
pass
def main():
paddle_v2.init(use_gpu=False, trainer_count=1)
model_config = parse_network_config(network_config)
pool = paddle_v2.parameters.create(model_config)
for param_name in pool.get_names():
array = pool.get_parameter(param_name)
array[:] = numpy.random.uniform(low=-1.0, high=1.0, size=array.shape)
trainer = paddle_v2.trainer.SGDTrainer(
update_equation=paddle_v2.optimizer.Adam(
learning_rate=1e-4,
model_average=ModelAverage(average_window=0.5),
regularization=L2Regularization(rate=0.5)))
trainer.train(train_data_reader=train_reader,
topology=model_config,
parameters=pool,
event_handler=event_handler,
batch_size=32, # batch size should be refactor in Data reader
data_types={ # data_types will be removed, It should be in
# network topology
'pixel': dense_vector(784),
'label': integer_value(10)
})
if __name__ == '__main__':
main()
......@@ -11,7 +11,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import optimizer
import parameters
import py_paddle.swig_paddle as api
import trainer
__all__ = ['optimizer', 'parameters', 'init', 'trainer']
def init(**kwargs):
args = []
for key in kwargs.keys():
args.append('--%s=%s' % (key, str(kwargs[key])))
__all__ = ['optimizer']
api.initPaddle(*args)
import numpy as np
from paddle.proto.ModelConfig_pb2 import ModelConfig
from paddle.proto.ParameterConfig_pb2 import ParameterConfig
__all__ = ['IParameterPool', 'create', 'ParameterFlag']
class ParameterFlag(object):
"""
The flag for IParameterPool.get_parameter. If writeable, operation on return
numpy array will also apply to Paddle parameter. But it will be slower in
GPU mode.
"""
READ_ONLY = 0x01
WRITE_ONLY = 0x02
READ_WRITE = READ_ONLY | WRITE_ONLY
class IParameterPool(object):
"""
Interface of Parameter Pool. The parameter pool is a dictionary of
parameters. User can modify parameter or customize parameter value
by `get_parameter`.
.. code-block:: python
pool = paddle.parameters.create(topo1, topo2)
embedding = pool.get_parameter("embedding")
assert isinstance(embedding, numpy.ndarray)
print embedding[1:]
"""
def get_parameter(self, name, flag=ParameterFlag.READ_WRITE):
"""
Get a parameter by name.
:param name: parameter name.
:type name: basestring
:param flag: the flag for return value. readable or writable.
:type flag: int
:return: The parameter value
:rtype: np.ndarray
"""
raise NotImplementedError()
def get_names(self):
"""
Get all parameter names
:return: all parameter names
:rtype: list
"""
raise NotImplementedError()
class NumpyParameterPool(IParameterPool):
def __init__(self):
self.__param_configs__ = dict()
self.__params__ = dict()
def append(self, conf):
if not isinstance(conf, ParameterConfig):
raise ValueError("conf must be ParameterConfig")
if not conf.IsInitialized():
raise ValueError("conf is not initialized")
self.__param_configs__[conf.name] = conf
self.__params__[conf.name] = None
def get_config(self, name):
if name not in self.__param_configs__:
raise ValueError("parameter %s is not appended" % name)
return self.__param_configs__[name]
def get_parameter(self, name, *args, **kwargs):
if name not in self.__params__:
raise ValueError("parameter %s is not appended" % name)
param = self.__params__[name]
if param is None:
shape = self.__param_configs__[name].dims
if len(shape) == 0:
raise ValueError("parameter %s is no shape" % name)
param = np.ndarray(
shape=[int(item) for item in shape], dtype='float32')
self.__params__[name] = param
return param
def get_names(self):
return self.__param_configs__.keys()
def create(*topologies):
"""
Create parameter pool by topologies.
:param topologies:
:return:
"""
pool = NumpyParameterPool()
for topo in topologies:
if not isinstance(topo, ModelConfig):
raise ValueError(
'create must pass a topologies which type is ModelConfig')
for param in topo.parameters:
pool.append(param)
return pool
import collections
from paddle.proto.ModelConfig_pb2 import ModelConfig
import paddle.v2.parameters
import paddle.v2.optimizer
import py_paddle.swig_paddle as api
from py_paddle import DataProviderConverter
__all__ = ['ITrainer', 'SGDTrainer', 'CompleteTrainOneBatch', 'BaseEvent']
class BaseEvent(object):
"""
Just a marker class
"""
pass
class CompleteTrainOneBatch(BaseEvent):
def __init__(self, pass_id, batch_id, cost):
self.pass_id = pass_id
self.batch_id = batch_id
self.cost = cost
def default_event_handler(event):
pass
class ITrainer(object):
def train(self,
train_data_reader,
topology,
parameters,
test_data_reader=None,
event_handler=None):
raise NotImplementedError()
class SGDTrainer(ITrainer):
def __init__(self, update_equation):
if not isinstance(update_equation, paddle.v2.optimizer.Optimizer):
raise ValueError()
self.__optimizer__ = update_equation
def train(self,
train_data_reader,
topology,
parameters,
num_passes=1,
test_data_reader=None,
event_handler=None,
batch_size=32,
data_types=None):
if event_handler is None:
event_handler = default_event_handler
__check_train_args__(**locals())
gm = api.GradientMachine.createFromConfigProto(
topology, api.CREATE_MODE_NORMAL, self.__optimizer__.enable_types())
assert isinstance(gm, api.GradientMachine)
__copy_parameter_from_pool__(gm, parameters)
updater = self.__optimizer__.create_local_updater()
assert isinstance(updater, api.ParameterUpdater)
updater.init(gm)
data_types_lists = []
for each in topology.input_layer_names:
if each not in data_types:
raise ValueError()
data_types_lists.append(data_types[each])
converter = DataProviderConverter(input_types=data_types_lists)
def input_reorder(func):
for item in func():
retv = []
for __layer_name__ in topology.input_layer_names:
retv.append(item[__layer_name__])
yield retv
gm.start()
out_args = api.Arguments.createArguments(0)
for pass_id in xrange(num_passes):
updater.startPass()
for batch_id, data_batch in enumerate(
__generator_to_batch__(
input_reorder(train_data_reader),
batch_size=batch_size)):
pass_type = updater.startBatch(len(data_batch))
gm.forwardBackward(converter(data_batch), out_args, pass_type)
for each_param in gm.getParameters():
updater.update(each_param)
# Get cost. We use numpy to calculate total cost for this batch.
cost_vec = out_args.getSlotValue(0)
cost_vec = cost_vec.copyToNumpyMat()
cost = cost_vec.sum() / len(data_batch)
updater.finishBatch(cost)
event_handler(
CompleteTrainOneBatch(
pass_id=pass_id, batch_id=batch_id, cost=cost))
updater.finishPass()
gm.finish()
def __generator_to_batch__(generator, batch_size):
ret_val = list()
for each_item in generator:
ret_val.append(each_item)
if len(ret_val) == batch_size:
yield ret_val
ret_val = list()
if len(ret_val) != 0:
yield ret_val
def __copy_parameter_from_pool__(gm, pool):
"""
:param gm:
:type gm: api.GradientMachine
:param pool:
:type pool: paddle.v2.parameters.IParameterPool
:return:
"""
assert isinstance(pool, paddle.v2.parameters.IParameterPool)
for each_param in gm.getParameters():
name = each_param.getName()
param = pool.get_parameter(name,
paddle.v2.parameters.ParameterFlag.READ_ONLY)
each_param.getBuf(api.PARAMETER_VALUE).copyFromNumpyArray(param.flatten(
).astype('float32'))
def __check_train_args__(train_data_reader, topology, parameters,
test_data_reader, event_handler, **kwargs):
if not callable(train_data_reader) or not isinstance(train_data_reader(),
collections.Iterator):
raise ValueError('train_data_reader should be a function, '
'which can return a iterator')
if test_data_reader is not None:
if not callable(test_data_reader) or not isinstance(
test_data_reader(), collections.Iterator):
raise ValueError('test_data_reader should be a function, which can '
'return a iterator')
if not isinstance(topology, ModelConfig):
raise ValueError('topology should be a model config')
if not isinstance(parameters, paddle.v2.parameters.IParameterPool):
raise ValueError('parameters should be a parameter pool')
if not callable(event_handler):
raise ValueError('event handler should be a function')
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册