From 906e2565a7ab6720e5636d3272b6887ff2245dfb Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Sat, 4 Nov 2017 05:01:48 +0800 Subject: [PATCH] Add acc test to image classification (#5336) * add acc layer * memory log level change from 3 to 10 * use gaussian random to init conv parameters * use initializer * fix import * batch_norm use helper to create persistable var * refine code * train only 2 batches for test * use g_program and g_init_program * use XavierInitializer to init fc parameter --- paddle/framework/operator.h | 2 - paddle/operators/batch_norm_op.cc | 5 +- python/paddle/v2/framework/layer_helper.py | 5 +- python/paddle/v2/framework/layers.py | 50 +++++++++------- .../tests/test_image_classification_train.py | 57 ++++++++----------- .../tests/test_recognize_digits_mlp.py | 6 +- 6 files changed, 63 insertions(+), 62 deletions(-) diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index b8a7040ed02..5c1989c26b6 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -408,7 +408,6 @@ class OperatorWithKernel : public OperatorBase { // indicate kernel DataType by input data. Defaultly all input data must be // same. virtual DataType IndicateDataType(const ExecutionContext& ctx) const { - VLOG(3) << "Default IndicateDataType " << this->Type(); auto& scope = ctx.scope(); int data_type = -1; for (auto& input : this->inputs_) { @@ -425,7 +424,6 @@ class OperatorWithKernel : public OperatorBase { } if (t != nullptr) { int tmp = static_cast(ToDataType(t->type())); - VLOG(3) << "Input " << ipt_name << " with data_type " << tmp; PADDLE_ENFORCE(tmp == data_type || data_type == -1, "DataType of Paddle Op %s must be the same.", Type()); diff --git a/paddle/operators/batch_norm_op.cc b/paddle/operators/batch_norm_op.cc index f2c8be4c54e..9c4bfd24c13 100644 --- a/paddle/operators/batch_norm_op.cc +++ b/paddle/operators/batch_norm_op.cc @@ -51,6 +51,10 @@ class BatchNormOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasOutput("SavedMean"), ""); PADDLE_ENFORCE(ctx->HasOutput("SavedVariance"), ""); + const float epsilon = ctx->Attrs().Get("epsilon"); + PADDLE_ENFORCE_GE(epsilon, 0.0, "epsilon should be larger than 0"); + PADDLE_ENFORCE_LE(epsilon, 0.001, "epsilon should not be too large"); + // make sure Mean/MeanOut and Variance/VarianceOut share memory in Python PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0], "Mean and MeanOut should share the same memory"); @@ -297,7 +301,6 @@ class BatchNormGradOp : public framework::OperatorWithKernel { framework::DataType IndicateDataType( const framework::ExecutionContext &ctx) const override { - VLOG(3) << "IndicateDataType " << this->Type(); const auto *var = ctx.InputVar(framework::GradVarName("Y")); if (var == nullptr) { PADDLE_THROW("can't find Y@GRAD"); diff --git a/python/paddle/v2/framework/layer_helper.py b/python/paddle/v2/framework/layer_helper.py index aa7dd0b50d4..9e80eaa6473 100644 --- a/python/paddle/v2/framework/layer_helper.py +++ b/python/paddle/v2/framework/layer_helper.py @@ -112,9 +112,12 @@ class LayerHelper(object): raise ValueError("Data Type mismatch") return dtype - def create_parameter(self, attr, shape, dtype, suffix='w'): + def create_parameter(self, attr, shape, dtype, suffix='w', + initializer=None): # Deepcopy the attr so that parameters can be shared in program attr_copy = copy.deepcopy(attr) + if initializer is not None: + attr_copy['initializer'] = initializer if attr_copy['name'] is None: attr_copy['name'] = unique_name(".".join([self.name, suffix])) self.init_program.global_block().create_parameter( diff --git a/python/paddle/v2/framework/layers.py b/python/paddle/v2/framework/layers.py index d6b5be94582..8b7d6fc32bf 100644 --- a/python/paddle/v2/framework/layers.py +++ b/python/paddle/v2/framework/layers.py @@ -1,8 +1,7 @@ -from paddle.v2.framework.layer_helper import LayerHelper, unique_name import paddle.v2.framework.core as core -from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, \ - Operator -from paddle.v2.framework.initializer import ConstantInitializer +from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, Operator +from paddle.v2.framework.initializer import ConstantInitializer, NormalInitializer +from paddle.v2.framework.layer_helper import LayerHelper, unique_name import re __all__ = [ @@ -344,8 +343,13 @@ def conv2d(input, input_shape = input.shape filter_shape = [num_filters, num_filter_channels] + filter_size + + std = (2.0 / (filter_size[0]**2 * num_channels))**0.5 filter = helper.create_parameter( - attr=helper.param_attr, shape=filter_shape, dtype=dtype) + attr=helper.param_attr, + shape=filter_shape, + dtype=dtype, + initializer=NormalInitializer(0.0, std, 0)) pre_bias = helper.create_tmp_variable(dtype) helper.append_op( @@ -420,7 +424,7 @@ def batch_norm(input, act=None, is_test=False, momentum=0.9, - epsilon=1e05, + epsilon=1e-05, param_attr=None, bias_attr=None, data_layout='NCHW', @@ -438,27 +442,29 @@ def batch_norm(input, else: raise ValueError("unsupported data layout:" + data_layout) - def create_persistable_var(dtype, shape, initializer=None): - name = unique_name(".".join([helper.name, "xxxx"])) - var = init_program.global_block().create_var( - dtype=dtype, shape=shape, name=name, persistable=True) - if initializer is not None: - initializer(var, var.block) - return program.global_block().create_var( - name=name, dtype=dtype, shape=shape, persistable=True) - param_shape = [channel_num] # create parameter scale = helper.create_parameter( - attr=helper.param_attr, shape=param_shape, dtype=dtype) + attr=helper.param_attr, + shape=param_shape, + dtype=dtype, + initializer=ConstantInitializer(1.0)) bias = helper.create_parameter( - attr=helper.param_attr, shape=param_shape, dtype=dtype) - - # create input - mean = create_persistable_var(dtype, param_shape, ConstantInitializer(0.0)) - variance = create_persistable_var(dtype, param_shape, - ConstantInitializer(1.0)) + attr=helper.param_attr, + shape=param_shape, + dtype=dtype, + initializer=ConstantInitializer(0.0)) + + mean = helper.create_global_variable( + dtype=input.data_type, shape=param_shape, persistable=True) + helper.set_variable_initializer( + var=mean, initializer=ConstantInitializer(0.0)) + + variance = helper.create_global_variable( + dtype=input.data_type, shape=param_shape, persistable=True) + helper.set_variable_initializer( + var=variance, initializer=ConstantInitializer(1.0)) # create output # mean and mean_out share the same memory diff --git a/python/paddle/v2/framework/tests/test_image_classification_train.py b/python/paddle/v2/framework/tests/test_image_classification_train.py index 21adc7f38f8..7189adbf8fc 100644 --- a/python/paddle/v2/framework/tests/test_image_classification_train.py +++ b/python/paddle/v2/framework/tests/test_image_classification_train.py @@ -1,13 +1,12 @@ +import numpy as np import paddle.v2 as paddle +import paddle.v2.framework.core as core import paddle.v2.framework.layers as layers import paddle.v2.framework.nets as nets -import paddle.v2.framework.core as core import paddle.v2.framework.optimizer as optimizer - -from paddle.v2.framework.framework import Program, g_program from paddle.v2.framework.executor import Executor - -import numpy as np +from paddle.v2.framework.framework import g_init_program, g_program +from paddle.v2.framework.initializer import XavierInitializer def resnet_cifar10(input, depth=32, program=None, init_program=None): @@ -124,7 +123,7 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None): return pool -def vgg16_bn_drop(input, program, init_program): +def vgg16_bn_drop(input, program=None, init_program=None): def conv_block(input, num_filter, groups, @@ -155,6 +154,7 @@ def vgg16_bn_drop(input, program, init_program): fc1 = layers.fc(input=drop, size=512, act=None, + param_attr={"initializer": XavierInitializer()}, program=program, init_program=init_program) reshape1 = layers.reshape( @@ -169,46 +169,34 @@ def vgg16_bn_drop(input, program, init_program): fc2 = layers.fc(input=drop2, size=512, act=None, + param_attr={"initializer": XavierInitializer()}, program=program, init_program=init_program) return fc2 -init_program = Program() -program = Program() - classdim = 10 data_shape = [3, 32, 32] -images = layers.data( - name='pixel', shape=data_shape, data_type='float32', program=program) - -label = layers.data( - name='label', - shape=[1], - data_type='int64', - program=program, - init_program=init_program) +images = layers.data(name='pixel', shape=data_shape, data_type='float32') +label = layers.data(name='label', shape=[1], data_type='int64') # Add neural network config # option 1. resnet -net = resnet_cifar10(images, 32, program, init_program) +# net = resnet_cifar10(images, 32) # option 2. vgg -# net = vgg16_bn_drop(images, program, init_program) +net = vgg16_bn_drop(images) # print(program) -predict = layers.fc(input=net, - size=classdim, - act='softmax', - program=program, - init_program=init_program) -cost = layers.cross_entropy( - input=predict, label=label, program=program, init_program=init_program) -avg_cost = layers.mean(x=cost, program=program, init_program=init_program) +predict = layers.fc(input=net, size=classdim, act='softmax') +cost = layers.cross_entropy(input=predict, label=label) +avg_cost = layers.mean(x=cost) +accuracy = layers.accuracy(input=predict, label=label) -sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) -opts = sgd_optimizer.minimize(avg_cost, init_program) +# optimizer = optimizer.SGDOptimizer(learning_rate=0.001) +optimizer = optimizer.AdamOptimizer(learning_rate=0.001) +opts = optimizer.minimize(avg_cost) BATCH_SIZE = 128 PASS_NUM = 1 @@ -221,7 +209,7 @@ train_reader = paddle.batch( place = core.CPUPlace() exe = Executor(place) -exe.run(init_program, feed={}, fetch_list=[]) +exe.run(g_init_program, feed={}, fetch_list=[]) for pass_id in range(PASS_NUM): batch_id = 0 @@ -239,14 +227,15 @@ for pass_id in range(PASS_NUM): tensor_img.set(img_data, place) tensor_y.set(y_data, place) - outs = exe.run(program, + outs = exe.run(g_program, feed={"pixel": tensor_img, "label": tensor_y}, - fetch_list=[avg_cost]) + fetch_list=[avg_cost, accuracy]) loss = np.array(outs[0]) + acc = np.array(outs[1]) print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) + - " loss:" + str(loss)) + " loss:" + str(loss) + " acc:" + str(acc)) batch_id = batch_id + 1 if batch_id > 1: diff --git a/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py b/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py index c116d1a6d35..e848db17016 100644 --- a/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py +++ b/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py @@ -57,6 +57,8 @@ label = layers.data( cost = layers.cross_entropy( input=predict, label=label, program=program, init_program=init_program) avg_cost = layers.mean(x=cost, program=program, init_program=init_program) +accuracy = layers.accuracy( + input=predict, label=label, program=program, init_program=init_program) optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9) opts = optimizer.minimize(avg_cost, init_program) @@ -87,9 +89,9 @@ for pass_id in range(PASS_NUM): outs = exe.run(program, feed={'x': tensor_x, 'y': tensor_y}, - fetch_list=[avg_cost]) + fetch_list=[avg_cost, accuracy]) out = np.array(outs[0]) - + acc = np.array(outs[1]) if out[0] < 5.0: exit(0) # if avg cost less than 5.0, we think our code is good. exit(1) -- GitLab