From aec1c2f0ebe03f95873be1cadecc77fe61cf5085 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 2 Jan 2017 09:19:12 +0800 Subject: [PATCH] fix code style and copyright --- image_classification/README.md | 17 +++++------------ image_classification/classify.py | 2 +- image_classification/data/cifar10.py | 23 ++++++++++++++++++++--- image_classification/data/get_data.sh | 3 ++- image_classification/train.sh | 6 +++--- 5 files changed, 31 insertions(+), 20 deletions(-) diff --git a/image_classification/README.md b/image_classification/README.md index e62dd19..3f9a7e2 100644 --- a/image_classification/README.md +++ b/image_classification/README.md @@ -67,7 +67,7 @@ Alex Krizhevsky在2012年ILSVRC提出的CNN模型 \[[9](#参考文献)\] 取得 - Droupout \[[10](#参考文献)\] : 在模型训练阶段随机让一些隐层节点权重不工作,提高网络的泛化能力 ,一定程度上防止过拟合。 -除过上面这些基本的组件外,另一个非常值得一提是2015年提出的Batch Normalization(BN)算法 \[[14](#参考文献)\],作者指出在训练过程中由于每层参数不断更新,会导致下一次输入分布发生变化,这样导致训练过程需要精心设计超参数。而BN算法是每个batch对网络中的每一层的特征做归一化,使得每层分布相对稳定,起到一定的正则作用,同时也弱化了超参数的设计,经过实验证明,BN加速了训练过程。BN在后来较深的模型中被广泛使用。 +传统CNN包含卷积层、全连接层等组件,采用softmax多类别分类器和多类交叉熵损失函数。另一个非常值得一提是2015年提出的Batch Normalization(BN)算法 \[[14](#参考文献)\],作者指出在训练过程中由于每层参数不断更新,会导致下一次输入分布发生变化,这样导致训练过程需要精心设计超参数。而BN算法是每个batch对网络中的每一层的特征做归一化,使得每层分布相对稳定,起到一定的正则作用,同时也弱化了超参数的设计,经过实验证明,BN加速了训练过程。BN在后来较深的模型中被广泛使用。 接下来我们主要介绍VGG,GooleNet和ResNet网络结构。 @@ -425,7 +425,7 @@ Tester.cpp:115] Test samples=10000 cost=1.99246 Eval: classification_error_eval 可以按照下面方式预测图片的类别,默认使用GPU预测,如果使用CPU预测,在后面加参数 `-c`即可。 -```python +```bash python classify.py --job=predict --model=output/pass-00299 --data=image/dog.png # -c ``` @@ -439,7 +439,7 @@ Label of image/dog.png is: 5 可以按照下面方式对图片提取特征,和预测使用方式不同的是指定job类型为extract,并需要指定提取的层。`classify.py` 默认已第一层卷积特征为例提取特征,并给出了可视化图,如图10所示,VGG模型的第一层卷积有64个通道,图 13 展示的为每个通道的灰度图。 -```python +```bash python classify.py --job=extract --model=output/pass-00299 --data=image/dog.png # -c ``` @@ -481,17 +481,10 @@ python classify.py --job=extract --model=output/pass-00299 --data=image/dog.png [13] Lin, M., Chen, Q., and Yan, S. Network in network. In Proc. ICLR, 2014. -[14] S. Ioffe and C. Szegedy. Batch normalization: Accelerating deep -network training by reducing internal covariate shift. In ICML, 2015. - +[14] S. Ioffe and C. Szegedy. Batch normalization: Accelerating deep network training by reducing internal covariate shift. In ICML, 2015. + [15] K. He, X. Zhang, S. Ren, J. Sun. Deep Residual Learning for Image Recognition. CVPR 2016. [16] Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z.: Rethinking the incep-tion architecture for computer vision. In: CVPR. (2016). [17] Szegedy, C., Ioffe, S., Vanhoucke, V.: Inception-v4, inception-resnet and the impact of residual connections on learning. arXiv:1602.07261 (2016). - - - - - - \ No newline at end of file diff --git a/image_classification/classify.py b/image_classification/classify.py index acd0d3c..5a49bc2 100644 --- a/image_classification/classify.py +++ b/image_classification/classify.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/image_classification/data/cifar10.py b/image_classification/data/cifar10.py index 9657572..0f51fd9 100755 --- a/image_classification/data/cifar10.py +++ b/image_classification/data/cifar10.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import numpy as np import cPickle @@ -7,22 +21,24 @@ CHANNEL = 3 HEIGHT = 32 WIDTH = 32 + def create_mean(dataset): if not os.path.isfile("mean.meta"): mean = np.zeros(CHANNEL * HEIGHT * WIDTH) num = 0 for f in dataset: batch = np.load(f) - mean += batch['data'].sum(0) + mean += batch['data'].sum(0) num += len(batch['data']) mean /= num print mean.size data = {"mean": mean, "size": mean.size} - cPickle.dump(data, open("mean.meta", 'w'), protocol=cPickle.HIGHEST_PROTOCOL) + cPickle.dump( + data, open("mean.meta", 'w'), protocol=cPickle.HIGHEST_PROTOCOL) def create_data(): - train_set = [DATA + "/data_batch_%d" % (i + 1) for i in xrange(0,5)] + train_set = [DATA + "/data_batch_%d" % (i + 1) for i in xrange(0, 5)] test_set = [DATA + "/test_batch"] # create mean values @@ -39,5 +55,6 @@ def create_data(): open("test.txt", "w").write("\n".join(test)) open("test.list", "w").write("\n".join(["data/test.txt"])) + if __name__ == '__main__': create_data() diff --git a/image_classification/data/get_data.sh b/image_classification/data/get_data.sh index 519521f..82b4888 100755 --- a/image_classification/data/get_data.sh +++ b/image_classification/data/get_data.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. set -e + wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz tar zxf cifar-10-python.tar.gz rm cifar-10-python.tar.gz diff --git a/image_classification/train.sh b/image_classification/train.sh index c8e82a4..48d743c 100755 --- a/image_classification/train.sh +++ b/image_classification/train.sh @@ -14,8 +14,8 @@ # limitations under the License. set -e -config=models/resnet.py -#config=models/vgg.py +#config=models/resnet.py +config=models/vgg.py output=output log=train.log @@ -26,4 +26,4 @@ paddle train \ --log_period=100 \ --num_passes=300 \ --save_dir=$output - #2>&1 | tee $log + 2>&1 | tee $log -- GitLab