未验证 提交 906e2565 编写于 作者: Q Qiao Longfei 提交者: GitHub

Add acc test to image classification (#5336)

* add acc layer
* memory log level change from 3 to 10
* use gaussian random to init conv parameters
* use initializer
* fix import
* batch_norm use helper to create persistable var
* refine code
* train only 2 batches for test
* use g_program and g_init_program
* use XavierInitializer to init fc parameter
上级 74849158
......@@ -408,7 +408,6 @@ class OperatorWithKernel : public OperatorBase {
// indicate kernel DataType by input data. Defaultly all input data must be
// same.
virtual DataType IndicateDataType(const ExecutionContext& ctx) const {
VLOG(3) << "Default IndicateDataType " << this->Type();
auto& scope = ctx.scope();
int data_type = -1;
for (auto& input : this->inputs_) {
......@@ -425,7 +424,6 @@ class OperatorWithKernel : public OperatorBase {
}
if (t != nullptr) {
int tmp = static_cast<int>(ToDataType(t->type()));
VLOG(3) << "Input " << ipt_name << " with data_type " << tmp;
PADDLE_ENFORCE(tmp == data_type || data_type == -1,
"DataType of Paddle Op %s must be the same.",
Type());
......
......@@ -51,6 +51,10 @@ class BatchNormOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasOutput("SavedMean"), "");
PADDLE_ENFORCE(ctx->HasOutput("SavedVariance"), "");
const float epsilon = ctx->Attrs().Get<float>("epsilon");
PADDLE_ENFORCE_GE(epsilon, 0.0, "epsilon should be larger than 0");
PADDLE_ENFORCE_LE(epsilon, 0.001, "epsilon should not be too large");
// make sure Mean/MeanOut and Variance/VarianceOut share memory in Python
PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0],
"Mean and MeanOut should share the same memory");
......@@ -297,7 +301,6 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
framework::DataType IndicateDataType(
const framework::ExecutionContext &ctx) const override {
VLOG(3) << "IndicateDataType " << this->Type();
const auto *var = ctx.InputVar(framework::GradVarName("Y"));
if (var == nullptr) {
PADDLE_THROW("can't find Y@GRAD");
......
......@@ -112,9 +112,12 @@ class LayerHelper(object):
raise ValueError("Data Type mismatch")
return dtype
def create_parameter(self, attr, shape, dtype, suffix='w'):
def create_parameter(self, attr, shape, dtype, suffix='w',
initializer=None):
# Deepcopy the attr so that parameters can be shared in program
attr_copy = copy.deepcopy(attr)
if initializer is not None:
attr_copy['initializer'] = initializer
if attr_copy['name'] is None:
attr_copy['name'] = unique_name(".".join([self.name, suffix]))
self.init_program.global_block().create_parameter(
......
from paddle.v2.framework.layer_helper import LayerHelper, unique_name
import paddle.v2.framework.core as core
from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, \
Operator
from paddle.v2.framework.initializer import ConstantInitializer
from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, Operator
from paddle.v2.framework.initializer import ConstantInitializer, NormalInitializer
from paddle.v2.framework.layer_helper import LayerHelper, unique_name
import re
__all__ = [
......@@ -344,8 +343,13 @@ def conv2d(input,
input_shape = input.shape
filter_shape = [num_filters, num_filter_channels] + filter_size
std = (2.0 / (filter_size[0]**2 * num_channels))**0.5
filter = helper.create_parameter(
attr=helper.param_attr, shape=filter_shape, dtype=dtype)
attr=helper.param_attr,
shape=filter_shape,
dtype=dtype,
initializer=NormalInitializer(0.0, std, 0))
pre_bias = helper.create_tmp_variable(dtype)
helper.append_op(
......@@ -420,7 +424,7 @@ def batch_norm(input,
act=None,
is_test=False,
momentum=0.9,
epsilon=1e05,
epsilon=1e-05,
param_attr=None,
bias_attr=None,
data_layout='NCHW',
......@@ -438,27 +442,29 @@ def batch_norm(input,
else:
raise ValueError("unsupported data layout:" + data_layout)
def create_persistable_var(dtype, shape, initializer=None):
name = unique_name(".".join([helper.name, "xxxx"]))
var = init_program.global_block().create_var(
dtype=dtype, shape=shape, name=name, persistable=True)
if initializer is not None:
initializer(var, var.block)
return program.global_block().create_var(
name=name, dtype=dtype, shape=shape, persistable=True)
param_shape = [channel_num]
# create parameter
scale = helper.create_parameter(
attr=helper.param_attr, shape=param_shape, dtype=dtype)
attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
initializer=ConstantInitializer(1.0))
bias = helper.create_parameter(
attr=helper.param_attr, shape=param_shape, dtype=dtype)
# create input
mean = create_persistable_var(dtype, param_shape, ConstantInitializer(0.0))
variance = create_persistable_var(dtype, param_shape,
ConstantInitializer(1.0))
attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
initializer=ConstantInitializer(0.0))
mean = helper.create_global_variable(
dtype=input.data_type, shape=param_shape, persistable=True)
helper.set_variable_initializer(
var=mean, initializer=ConstantInitializer(0.0))
variance = helper.create_global_variable(
dtype=input.data_type, shape=param_shape, persistable=True)
helper.set_variable_initializer(
var=variance, initializer=ConstantInitializer(1.0))
# create output
# mean and mean_out share the same memory
......
import numpy as np
import paddle.v2 as paddle
import paddle.v2.framework.core as core
import paddle.v2.framework.layers as layers
import paddle.v2.framework.nets as nets
import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program, g_program
from paddle.v2.framework.executor import Executor
import numpy as np
from paddle.v2.framework.framework import g_init_program, g_program
from paddle.v2.framework.initializer import XavierInitializer
def resnet_cifar10(input, depth=32, program=None, init_program=None):
......@@ -124,7 +123,7 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None):
return pool
def vgg16_bn_drop(input, program, init_program):
def vgg16_bn_drop(input, program=None, init_program=None):
def conv_block(input,
num_filter,
groups,
......@@ -155,6 +154,7 @@ def vgg16_bn_drop(input, program, init_program):
fc1 = layers.fc(input=drop,
size=512,
act=None,
param_attr={"initializer": XavierInitializer()},
program=program,
init_program=init_program)
reshape1 = layers.reshape(
......@@ -169,46 +169,34 @@ def vgg16_bn_drop(input, program, init_program):
fc2 = layers.fc(input=drop2,
size=512,
act=None,
param_attr={"initializer": XavierInitializer()},
program=program,
init_program=init_program)
return fc2
init_program = Program()
program = Program()
classdim = 10
data_shape = [3, 32, 32]
images = layers.data(
name='pixel', shape=data_shape, data_type='float32', program=program)
label = layers.data(
name='label',
shape=[1],
data_type='int64',
program=program,
init_program=init_program)
images = layers.data(name='pixel', shape=data_shape, data_type='float32')
label = layers.data(name='label', shape=[1], data_type='int64')
# Add neural network config
# option 1. resnet
net = resnet_cifar10(images, 32, program, init_program)
# net = resnet_cifar10(images, 32)
# option 2. vgg
# net = vgg16_bn_drop(images, program, init_program)
net = vgg16_bn_drop(images)
# print(program)
predict = layers.fc(input=net,
size=classdim,
act='softmax',
program=program,
init_program=init_program)
cost = layers.cross_entropy(
input=predict, label=label, program=program, init_program=init_program)
avg_cost = layers.mean(x=cost, program=program, init_program=init_program)
predict = layers.fc(input=net, size=classdim, act='softmax')
cost = layers.cross_entropy(input=predict, label=label)
avg_cost = layers.mean(x=cost)
accuracy = layers.accuracy(input=predict, label=label)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, init_program)
# optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
optimizer = optimizer.AdamOptimizer(learning_rate=0.001)
opts = optimizer.minimize(avg_cost)
BATCH_SIZE = 128
PASS_NUM = 1
......@@ -221,7 +209,7 @@ train_reader = paddle.batch(
place = core.CPUPlace()
exe = Executor(place)
exe.run(init_program, feed={}, fetch_list=[])
exe.run(g_init_program, feed={}, fetch_list=[])
for pass_id in range(PASS_NUM):
batch_id = 0
......@@ -239,14 +227,15 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place)
tensor_y.set(y_data, place)
outs = exe.run(program,
outs = exe.run(g_program,
feed={"pixel": tensor_img,
"label": tensor_y},
fetch_list=[avg_cost])
fetch_list=[avg_cost, accuracy])
loss = np.array(outs[0])
acc = np.array(outs[1])
print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) +
" loss:" + str(loss))
" loss:" + str(loss) + " acc:" + str(acc))
batch_id = batch_id + 1
if batch_id > 1:
......
......@@ -57,6 +57,8 @@ label = layers.data(
cost = layers.cross_entropy(
input=predict, label=label, program=program, init_program=init_program)
avg_cost = layers.mean(x=cost, program=program, init_program=init_program)
accuracy = layers.accuracy(
input=predict, label=label, program=program, init_program=init_program)
optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
opts = optimizer.minimize(avg_cost, init_program)
......@@ -87,9 +89,9 @@ for pass_id in range(PASS_NUM):
outs = exe.run(program,
feed={'x': tensor_x,
'y': tensor_y},
fetch_list=[avg_cost])
fetch_list=[avg_cost, accuracy])
out = np.array(outs[0])
acc = np.array(outs[1])
if out[0] < 5.0:
exit(0) # if avg cost less than 5.0, we think our code is good.
exit(1)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册