diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index ab1581325aa95dfc4378f89a31d7dcd8aa53c210..d0ccc2c5aa8cf044b0ac601f182a0212b4748e06 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -1,6 +1,6 @@ from paddle.trainer_config_helpers import * from paddle.trainer.PyDataProvider2 import dense_vector, integer_value -import paddle.v2 as paddle_v2 +import paddle.v2 as paddle import numpy import mnist_util @@ -24,7 +24,7 @@ def network_config(): def event_handler(event): - if isinstance(event, paddle_v2.trainer.CompleteTrainOneBatch): + if isinstance(event, paddle.trainer.CompleteTrainOneBatch): print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id, event.cost) else: @@ -32,31 +32,16 @@ def event_handler(event): def main(): - paddle_v2.init(use_gpu=False, trainer_count=1) + paddle.init(use_gpu=False, trainer_count=1) model_config = parse_network_config(network_config) - pool = paddle_v2.parameters.create(model_config) + pool = paddle.parameters.create(model_config) for param_name in pool.get_names(): array = pool.get_parameter(param_name) array[:] = numpy.random.uniform(low=-1.0, high=1.0, size=array.shape) - def nag(v, g, vel_t_1): - """ - NAG Optimizer. A optimizer which Paddle CPP is not implemented. - https://arxiv.org/pdf/1212.0901v2.pdf eq.6 eq.7 - :param v: parameter value - :param g: parameter gradient - :param vel_t_1: t-1 velocity - :return: - """ - mu = 0.09 - e = 0.00001 + adam_optimizer = paddle.optimizer.Adam(learning_rate=1e-3) - vel_t = mu * vel_t_1 - e * g - - v[:] = v + (mu**2) * vel_t - (1 + mu) * e * g - vel_t_1[:] = vel_t - - trainer = paddle_v2.trainer.SGDTrainer(update_equation=nag) + trainer = paddle.trainer.SGDTrainer(update_equation=adam_optimizer) trainer.train(train_data_reader=train_reader, topology=model_config, diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 9620e7542081007732d8fe599c23483488782400..41dfb522dba6174d80e560904f9cadcc0e9cd6bf 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -2,7 +2,7 @@ import collections from paddle.proto.ModelConfig_pb2 import ModelConfig from paddle.proto.ParameterConfig_pb2 import ParameterConfig from . import parameters as v2_parameters -import numpy +from . import optimizer as v2_optimizer import py_paddle.swig_paddle as api from py_paddle import DataProviderConverter @@ -93,72 +93,6 @@ class LazyParameterPool(v2_parameters.IParameterPool): self.arrays = dict() -class CustomizeUpdateEquation(object): - def __init__(self, callback): - self.__callback__ = callback - if self.__callback__.func_code.co_argcount < 2: - raise ValueError( - "The update equation at least should contain 2 arguments, " - "first is value, second is gradient") - - self.local_params_count = self.__callback__.func_code.co_argcount - 2 - self.local_params = dict() - - def enable_types(self): - return [api.PARAMETER_VALUE, api.PARAMETER_GRADIENT] - - def init(self, gradient_machine): - assert isinstance(gradient_machine, api.GradientMachine) - for param in gradient_machine.getParameters(): - conf = param.getConfig().toProto() - shape = map(int, conf.dims) - self.local_params[conf.name] = [] - for _ in xrange(self.local_params_count): - self.local_params[conf.name].append( - numpy.zeros( - shape=shape, dtype='float32')) - - def create_local_updater(self): - return self - - def startPass(self): - pass - - def finishPass(self): - pass - - def startBatch(self, batch_size): - return api.PASS_TRAIN - - def finishBatch(self, cost): - pass - - def update(self, param): - conf = param.getConfig().toProto() - shape = map(int, conf.dims) - if not api.isUsingGpu(): - v = param.getBuf(api.PARAMETER_VALUE).toNumpyArrayInplace().reshape( - shape) - g = param.getBuf(api.PARAMETER_GRADIENT).toNumpyArrayInplace( - ).reshape(shape) - - else: - v = param.getBuf(api.PARAMETER_VALUE).copyToNumpyArray().reshape( - shape) - g = param.getBuf(api.PARAMETER_GRADIENT).copyToNumpyArray().reshape( - shape) - - args = [v, g] - for arg in self.local_params[conf.name]: - args.append(arg) - self.__callback__(*args) - - if api.isUsingGpu(): - param.getBuf(api.PARAMETER_VALUE).copyFromNumpyArray(v.flatten( - ).astype('float32')) - # discard gradient changed. - - class SGDTrainer(ITrainer): def __init__(self, update_equation): """ @@ -166,9 +100,9 @@ class SGDTrainer(ITrainer): :param update_equation: Maybe we should give a DSL for update equation? """ - if callable(update_equation): - update_equation = CustomizeUpdateEquation(update_equation) - + if not isinstance(update_equation, v2_optimizer.Optimizer): + raise ValueError("update equation parameter must be " + "paddle.v2.optimizer.Optimizer") self.__optimizer__ = update_equation def train(self,