未验证 提交 906e2565 编写于 作者: Q Qiao Longfei 提交者: GitHub

Add acc test to image classification (#5336)

* add acc layer
* memory log level change from 3 to 10
* use gaussian random to init conv parameters
* use initializer
* fix import
* batch_norm use helper to create persistable var
* refine code
* train only 2 batches for test
* use g_program and g_init_program
* use XavierInitializer to init fc parameter
上级 74849158
...@@ -408,7 +408,6 @@ class OperatorWithKernel : public OperatorBase { ...@@ -408,7 +408,6 @@ class OperatorWithKernel : public OperatorBase {
// indicate kernel DataType by input data. Defaultly all input data must be // indicate kernel DataType by input data. Defaultly all input data must be
// same. // same.
virtual DataType IndicateDataType(const ExecutionContext& ctx) const { virtual DataType IndicateDataType(const ExecutionContext& ctx) const {
VLOG(3) << "Default IndicateDataType " << this->Type();
auto& scope = ctx.scope(); auto& scope = ctx.scope();
int data_type = -1; int data_type = -1;
for (auto& input : this->inputs_) { for (auto& input : this->inputs_) {
...@@ -425,7 +424,6 @@ class OperatorWithKernel : public OperatorBase { ...@@ -425,7 +424,6 @@ class OperatorWithKernel : public OperatorBase {
} }
if (t != nullptr) { if (t != nullptr) {
int tmp = static_cast<int>(ToDataType(t->type())); int tmp = static_cast<int>(ToDataType(t->type()));
VLOG(3) << "Input " << ipt_name << " with data_type " << tmp;
PADDLE_ENFORCE(tmp == data_type || data_type == -1, PADDLE_ENFORCE(tmp == data_type || data_type == -1,
"DataType of Paddle Op %s must be the same.", "DataType of Paddle Op %s must be the same.",
Type()); Type());
......
...@@ -51,6 +51,10 @@ class BatchNormOp : public framework::OperatorWithKernel { ...@@ -51,6 +51,10 @@ class BatchNormOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasOutput("SavedMean"), ""); PADDLE_ENFORCE(ctx->HasOutput("SavedMean"), "");
PADDLE_ENFORCE(ctx->HasOutput("SavedVariance"), ""); PADDLE_ENFORCE(ctx->HasOutput("SavedVariance"), "");
const float epsilon = ctx->Attrs().Get<float>("epsilon");
PADDLE_ENFORCE_GE(epsilon, 0.0, "epsilon should be larger than 0");
PADDLE_ENFORCE_LE(epsilon, 0.001, "epsilon should not be too large");
// make sure Mean/MeanOut and Variance/VarianceOut share memory in Python // make sure Mean/MeanOut and Variance/VarianceOut share memory in Python
PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0], PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0],
"Mean and MeanOut should share the same memory"); "Mean and MeanOut should share the same memory");
...@@ -297,7 +301,6 @@ class BatchNormGradOp : public framework::OperatorWithKernel { ...@@ -297,7 +301,6 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
framework::DataType IndicateDataType( framework::DataType IndicateDataType(
const framework::ExecutionContext &ctx) const override { const framework::ExecutionContext &ctx) const override {
VLOG(3) << "IndicateDataType " << this->Type();
const auto *var = ctx.InputVar(framework::GradVarName("Y")); const auto *var = ctx.InputVar(framework::GradVarName("Y"));
if (var == nullptr) { if (var == nullptr) {
PADDLE_THROW("can't find Y@GRAD"); PADDLE_THROW("can't find Y@GRAD");
......
...@@ -112,9 +112,12 @@ class LayerHelper(object): ...@@ -112,9 +112,12 @@ class LayerHelper(object):
raise ValueError("Data Type mismatch") raise ValueError("Data Type mismatch")
return dtype return dtype
def create_parameter(self, attr, shape, dtype, suffix='w'): def create_parameter(self, attr, shape, dtype, suffix='w',
initializer=None):
# Deepcopy the attr so that parameters can be shared in program # Deepcopy the attr so that parameters can be shared in program
attr_copy = copy.deepcopy(attr) attr_copy = copy.deepcopy(attr)
if initializer is not None:
attr_copy['initializer'] = initializer
if attr_copy['name'] is None: if attr_copy['name'] is None:
attr_copy['name'] = unique_name(".".join([self.name, suffix])) attr_copy['name'] = unique_name(".".join([self.name, suffix]))
self.init_program.global_block().create_parameter( self.init_program.global_block().create_parameter(
......
from paddle.v2.framework.layer_helper import LayerHelper, unique_name
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, \ from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, Operator
Operator from paddle.v2.framework.initializer import ConstantInitializer, NormalInitializer
from paddle.v2.framework.initializer import ConstantInitializer from paddle.v2.framework.layer_helper import LayerHelper, unique_name
import re import re
__all__ = [ __all__ = [
...@@ -344,8 +343,13 @@ def conv2d(input, ...@@ -344,8 +343,13 @@ def conv2d(input,
input_shape = input.shape input_shape = input.shape
filter_shape = [num_filters, num_filter_channels] + filter_size filter_shape = [num_filters, num_filter_channels] + filter_size
std = (2.0 / (filter_size[0]**2 * num_channels))**0.5
filter = helper.create_parameter( filter = helper.create_parameter(
attr=helper.param_attr, shape=filter_shape, dtype=dtype) attr=helper.param_attr,
shape=filter_shape,
dtype=dtype,
initializer=NormalInitializer(0.0, std, 0))
pre_bias = helper.create_tmp_variable(dtype) pre_bias = helper.create_tmp_variable(dtype)
helper.append_op( helper.append_op(
...@@ -420,7 +424,7 @@ def batch_norm(input, ...@@ -420,7 +424,7 @@ def batch_norm(input,
act=None, act=None,
is_test=False, is_test=False,
momentum=0.9, momentum=0.9,
epsilon=1e05, epsilon=1e-05,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
data_layout='NCHW', data_layout='NCHW',
...@@ -438,27 +442,29 @@ def batch_norm(input, ...@@ -438,27 +442,29 @@ def batch_norm(input,
else: else:
raise ValueError("unsupported data layout:" + data_layout) raise ValueError("unsupported data layout:" + data_layout)
def create_persistable_var(dtype, shape, initializer=None):
name = unique_name(".".join([helper.name, "xxxx"]))
var = init_program.global_block().create_var(
dtype=dtype, shape=shape, name=name, persistable=True)
if initializer is not None:
initializer(var, var.block)
return program.global_block().create_var(
name=name, dtype=dtype, shape=shape, persistable=True)
param_shape = [channel_num] param_shape = [channel_num]
# create parameter # create parameter
scale = helper.create_parameter( scale = helper.create_parameter(
attr=helper.param_attr, shape=param_shape, dtype=dtype) attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
initializer=ConstantInitializer(1.0))
bias = helper.create_parameter( bias = helper.create_parameter(
attr=helper.param_attr, shape=param_shape, dtype=dtype) attr=helper.param_attr,
shape=param_shape,
# create input dtype=dtype,
mean = create_persistable_var(dtype, param_shape, ConstantInitializer(0.0)) initializer=ConstantInitializer(0.0))
variance = create_persistable_var(dtype, param_shape,
ConstantInitializer(1.0)) mean = helper.create_global_variable(
dtype=input.data_type, shape=param_shape, persistable=True)
helper.set_variable_initializer(
var=mean, initializer=ConstantInitializer(0.0))
variance = helper.create_global_variable(
dtype=input.data_type, shape=param_shape, persistable=True)
helper.set_variable_initializer(
var=variance, initializer=ConstantInitializer(1.0))
# create output # create output
# mean and mean_out share the same memory # mean and mean_out share the same memory
......
import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.core as core
import paddle.v2.framework.layers as layers import paddle.v2.framework.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.framework.nets as nets
import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program, g_program
from paddle.v2.framework.executor import Executor from paddle.v2.framework.executor import Executor
from paddle.v2.framework.framework import g_init_program, g_program
import numpy as np from paddle.v2.framework.initializer import XavierInitializer
def resnet_cifar10(input, depth=32, program=None, init_program=None): def resnet_cifar10(input, depth=32, program=None, init_program=None):
...@@ -124,7 +123,7 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None): ...@@ -124,7 +123,7 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None):
return pool return pool
def vgg16_bn_drop(input, program, init_program): def vgg16_bn_drop(input, program=None, init_program=None):
def conv_block(input, def conv_block(input,
num_filter, num_filter,
groups, groups,
...@@ -155,6 +154,7 @@ def vgg16_bn_drop(input, program, init_program): ...@@ -155,6 +154,7 @@ def vgg16_bn_drop(input, program, init_program):
fc1 = layers.fc(input=drop, fc1 = layers.fc(input=drop,
size=512, size=512,
act=None, act=None,
param_attr={"initializer": XavierInitializer()},
program=program, program=program,
init_program=init_program) init_program=init_program)
reshape1 = layers.reshape( reshape1 = layers.reshape(
...@@ -169,46 +169,34 @@ def vgg16_bn_drop(input, program, init_program): ...@@ -169,46 +169,34 @@ def vgg16_bn_drop(input, program, init_program):
fc2 = layers.fc(input=drop2, fc2 = layers.fc(input=drop2,
size=512, size=512,
act=None, act=None,
param_attr={"initializer": XavierInitializer()},
program=program, program=program,
init_program=init_program) init_program=init_program)
return fc2 return fc2
init_program = Program()
program = Program()
classdim = 10 classdim = 10
data_shape = [3, 32, 32] data_shape = [3, 32, 32]
images = layers.data( images = layers.data(name='pixel', shape=data_shape, data_type='float32')
name='pixel', shape=data_shape, data_type='float32', program=program) label = layers.data(name='label', shape=[1], data_type='int64')
label = layers.data(
name='label',
shape=[1],
data_type='int64',
program=program,
init_program=init_program)
# Add neural network config # Add neural network config
# option 1. resnet # option 1. resnet
net = resnet_cifar10(images, 32, program, init_program) # net = resnet_cifar10(images, 32)
# option 2. vgg # option 2. vgg
# net = vgg16_bn_drop(images, program, init_program) net = vgg16_bn_drop(images)
# print(program) # print(program)
predict = layers.fc(input=net, predict = layers.fc(input=net, size=classdim, act='softmax')
size=classdim, cost = layers.cross_entropy(input=predict, label=label)
act='softmax', avg_cost = layers.mean(x=cost)
program=program, accuracy = layers.accuracy(input=predict, label=label)
init_program=init_program)
cost = layers.cross_entropy(
input=predict, label=label, program=program, init_program=init_program)
avg_cost = layers.mean(x=cost, program=program, init_program=init_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) # optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, init_program) optimizer = optimizer.AdamOptimizer(learning_rate=0.001)
opts = optimizer.minimize(avg_cost)
BATCH_SIZE = 128 BATCH_SIZE = 128
PASS_NUM = 1 PASS_NUM = 1
...@@ -221,7 +209,7 @@ train_reader = paddle.batch( ...@@ -221,7 +209,7 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(init_program, feed={}, fetch_list=[]) exe.run(g_init_program, feed={}, fetch_list=[])
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
batch_id = 0 batch_id = 0
...@@ -239,14 +227,15 @@ for pass_id in range(PASS_NUM): ...@@ -239,14 +227,15 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place) tensor_img.set(img_data, place)
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(program, outs = exe.run(g_program,
feed={"pixel": tensor_img, feed={"pixel": tensor_img,
"label": tensor_y}, "label": tensor_y},
fetch_list=[avg_cost]) fetch_list=[avg_cost, accuracy])
loss = np.array(outs[0]) loss = np.array(outs[0])
acc = np.array(outs[1])
print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) + print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) +
" loss:" + str(loss)) " loss:" + str(loss) + " acc:" + str(acc))
batch_id = batch_id + 1 batch_id = batch_id + 1
if batch_id > 1: if batch_id > 1:
......
...@@ -57,6 +57,8 @@ label = layers.data( ...@@ -57,6 +57,8 @@ label = layers.data(
cost = layers.cross_entropy( cost = layers.cross_entropy(
input=predict, label=label, program=program, init_program=init_program) input=predict, label=label, program=program, init_program=init_program)
avg_cost = layers.mean(x=cost, program=program, init_program=init_program) avg_cost = layers.mean(x=cost, program=program, init_program=init_program)
accuracy = layers.accuracy(
input=predict, label=label, program=program, init_program=init_program)
optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
opts = optimizer.minimize(avg_cost, init_program) opts = optimizer.minimize(avg_cost, init_program)
...@@ -87,9 +89,9 @@ for pass_id in range(PASS_NUM): ...@@ -87,9 +89,9 @@ for pass_id in range(PASS_NUM):
outs = exe.run(program, outs = exe.run(program,
feed={'x': tensor_x, feed={'x': tensor_x,
'y': tensor_y}, 'y': tensor_y},
fetch_list=[avg_cost]) fetch_list=[avg_cost, accuracy])
out = np.array(outs[0]) out = np.array(outs[0])
acc = np.array(outs[1])
if out[0] < 5.0: if out[0] < 5.0:
exit(0) # if avg cost less than 5.0, we think our code is good. exit(0) # if avg cost less than 5.0, we think our code is good.
exit(1) exit(1)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册