diff --git a/demo/image_classification/api_v2_resnet.py b/demo/image_classification/api_v2_resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..19d20540780becf504973a23b50445d4b65dc2ef --- /dev/null +++ b/demo/image_classification/api_v2_resnet.py @@ -0,0 +1,74 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.v2 as paddle + +__all__ = ['resnet_cifar10'] + + +def conv_bn_layer(input, + ch_out, + filter_size, + stride, + padding, + active_type=paddle.activation.Relu(), + ch_in=None): + tmp = paddle.layer.img_conv( + input=input, + filter_size=filter_size, + num_channels=ch_in, + num_filters=ch_out, + stride=stride, + padding=padding, + act=paddle.activation.Linear(), + bias_attr=False) + return paddle.layer.batch_norm(input=tmp, act=active_type) + + +def shortcut(ipt, n_in, n_out, stride): + if n_in != n_out: + return conv_bn_layer(ipt, n_out, 1, stride, 0, + paddle.activation.Linear()) + else: + return ipt + + +def basicblock(ipt, ch_out, stride): + ch_in = ch_out * 2 + tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1) + tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear()) + short = shortcut(ipt, ch_in, ch_out, stride) + return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu()) + + +def layer_warp(block_func, ipt, features, count, stride): + tmp = block_func(ipt, features, stride) + for i in range(1, count): + tmp = block_func(tmp, features, 1) + return tmp + + +def resnet_cifar10(ipt, depth=32): + # depth should be one of 20, 32, 44, 56, 110, 1202 + assert (depth - 2) % 6 == 0 + n = (depth - 2) / 6 + nStages = {16, 64, 128} + conv1 = conv_bn_layer( + ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1) + res1 = layer_warp(basicblock, conv1, 16, n, 1) + res2 = layer_warp(basicblock, res1, 32, n, 2) + res3 = layer_warp(basicblock, res2, 64, n, 2) + pool = paddle.layer.img_pool( + input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg()) + return pool diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py new file mode 100644 index 0000000000000000000000000000000000000000..585f61c6fa4c89c8621815a168742429ac236898 --- /dev/null +++ b/demo/image_classification/api_v2_train.py @@ -0,0 +1,91 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License + +import sys +import paddle.v2 as paddle +from api_v2_vgg import vgg_bn_drop +from api_v2_resnet import resnet_cifar10 + + +def main(): + datadim = 3 * 32 * 32 + classdim = 10 + + # PaddlePaddle init + paddle.init(use_gpu=True, trainer_count=1) + + image = paddle.layer.data( + name="image", type=paddle.data_type.dense_vector(datadim)) + + # Add neural network config + # option 1. resnet + net = resnet_cifar10(image, depth=32) + # option 2. vgg + # net = vgg_bn_drop(image) + + out = paddle.layer.fc(input=net, + size=classdim, + act=paddle.activation.Softmax()) + + lbl = paddle.layer.data( + name="label", type=paddle.data_type.integer_value(classdim)) + cost = paddle.layer.classification_cost(input=out, label=lbl) + + # Create parameters + parameters = paddle.parameters.create(cost) + + # Create optimizer + momentum_optimizer = paddle.optimizer.Momentum( + momentum=0.9, + regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128), + learning_rate=0.1 / 128.0, + learning_rate_decay_a=0.1, + learning_rate_decay_b=50000 * 100, + learning_rate_schedule='discexp', + batch_size=128) + + # End batch and end pass event handler + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + print "\nPass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + else: + sys.stdout.write('.') + sys.stdout.flush() + if isinstance(event, paddle.event.EndPass): + result = trainer.test( + reader=paddle.reader.batched( + paddle.dataset.cifar.test10(), batch_size=128), + reader_dict={'image': 0, + 'label': 1}) + print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) + + # Create trainer + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=momentum_optimizer) + trainer.train( + reader=paddle.reader.batched( + paddle.reader.shuffle( + paddle.dataset.cifar.train10(), buf_size=50000), + batch_size=128), + num_passes=5, + event_handler=event_handler, + reader_dict={'image': 0, + 'label': 1}) + + +if __name__ == '__main__': + main() diff --git a/demo/image_classification/api_v2_vgg.py b/demo/image_classification/api_v2_vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..1e0e6b93adde30425f17aa9cd07542275f4fec37 --- /dev/null +++ b/demo/image_classification/api_v2_vgg.py @@ -0,0 +1,47 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.v2 as paddle + +__all__ = ['vgg_bn_drop'] + + +def vgg_bn_drop(input): + def conv_block(ipt, num_filter, groups, dropouts, num_channels=None): + return paddle.networks.img_conv_group( + input=ipt, + num_channels=num_channels, + pool_size=2, + pool_stride=2, + conv_num_filter=[num_filter] * groups, + conv_filter_size=3, + conv_act=paddle.activation.Relu(), + conv_with_batchnorm=True, + conv_batchnorm_drop_rate=dropouts, + pool_type=paddle.pooling.Max()) + + conv1 = conv_block(input, 64, 2, [0.3, 0], 3) + conv2 = conv_block(conv1, 128, 2, [0.4, 0]) + conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) + conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) + conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) + + drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5) + fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear()) + bn = paddle.layer.batch_norm( + input=fc1, + act=paddle.activation.Relu(), + layer_attr=paddle.attr.Extra(drop_rate=0.5)) + fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear()) + return fc2 diff --git a/demo/introduction/api_train_v2.py b/demo/introduction/api_train_v2.py index 49496c7f085723e63903ecd1510bd8c15bdab6cb..75dd65f9fc8cd8e7fab5bf30a6337574a645e89f 100644 --- a/demo/introduction/api_train_v2.py +++ b/demo/introduction/api_train_v2.py @@ -40,8 +40,7 @@ def main(): reader_dict={'x': 0, 'y': 1}) if event.pass_id % 10 == 0: - print "Test %d, Cost %f, %s" % (event.pass_id, event.cost, - result.metrics) + print "Test %d, %s" % (event.pass_id, result.metrics) # training trainer.train( diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp index 538ca2999f8f05afc45ac2d2f526133c8024f066..dcb5fe086fdccf8ec62ee52cbaaac4b7dbbe2f9d 100644 --- a/paddle/api/GradientMachine.cpp +++ b/paddle/api/GradientMachine.cpp @@ -142,6 +142,20 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) { } } +size_t GradientMachine::getNonStaticParameterSize() const { + return m->machine->getNonStaticParameters().size(); +} + +Parameter* GradientMachine::getNonStaticParameter(size_t i) throw(RangeError) { + auto params = m->machine->getNonStaticParameters(); + if (i < params.size()) { + return Parameter::createFromSharedPtr( + &m->machine->getNonStaticParameters()[i]); + } else { + throw RangeError(); + } +} + void GradientMachine::randParameters() { m->machine->randParameters(); } Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h index d99e9a4ad48ea4764c7a1ea56c507d754d56853b..762f86ac79461558b6a2eb7105ffd05961f5d3e2 100644 --- a/paddle/api/PaddleAPI.h +++ b/paddle/api/PaddleAPI.h @@ -771,6 +771,9 @@ public: size_t getParameterSize() const; Parameter* getParameter(size_t i) throw(RangeError); + size_t getNonStaticParameterSize() const; + Parameter* getNonStaticParameter(size_t i) throw(RangeError); + void randParameters(); Arguments* getLayerOutput(const std::string& layerName) const diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py index a708def1d2d7f6da2998a5905f9473accc6db969..1c9455fab5f9c1179bddffb100cd53fe8adfb6b1 100644 --- a/paddle/py_paddle/util.py +++ b/paddle/py_paddle/util.py @@ -195,6 +195,12 @@ def __monkeypatch_gradient_machine__(): swig_paddle.GradientMachine.getParameters = getParameters + def getNonStaticParameters(self): + return (self.getNonStaticParameter(i) + for i in xrange(self.getNonStaticParameterSize())) + + swig_paddle.GradientMachine.getNonStaticParameters = getNonStaticParameters + def getLayerOutputs(self, layerNames): """ getLayerOutputs. get outputs of layers and return a numpy matrix dict. diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index 0058e1df11080382e4c11fc3b07ea45042a96484..f96e1f742a8bdd90517dc5c537666e32849ac073 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -1,4 +1,8 @@ import mnist +import imikolov +import imdb +import cifar +import movielens import uci_housing -__all__ = ['mnist', 'uci_housing'] +__all__ = ['mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'uci_housing'] diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 5003f55f3e0d15149d28d1478e0487d6873d6e0a..e743a49523ff21627ea2abfb76cee8b9ffd685e2 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -120,7 +120,8 @@ class SGD(ITrainer): feeder(data_batch), out_args, pass_type) self.__gradient_machine__.eval(pass_evaluator) self.__gradient_machine__.eval(batch_evaluator) - for each_param in self.__gradient_machine__.getParameters(): + for each_param in self.__gradient_machine__.getNonStaticParameters( + ): updater.update(each_param) # Get cost. We use numpy to calculate total cost for this batch. cost_vec = out_args.getSlotValue(0)