未验证 提交 a619695b 编写于 作者: Y Yu Yang 提交者: GitHub

Feature/enhance evaluator (#5824)

* Stash

* Stash

* Polish Evaluator

* Merge code

* Revert
上级 1f6002ed
...@@ -227,7 +227,6 @@ template struct SelectedRowsAddToTensor<platform::GPUPlace, float>; ...@@ -227,7 +227,6 @@ template struct SelectedRowsAddToTensor<platform::GPUPlace, float>;
template struct SelectedRowsAddToTensor<platform::GPUPlace, double>; template struct SelectedRowsAddToTensor<platform::GPUPlace, double>;
template struct SelectedRowsAddToTensor<platform::GPUPlace, int>; template struct SelectedRowsAddToTensor<platform::GPUPlace, int>;
template struct SelectedRowsAddToTensor<platform::GPUPlace, int64_t>; template struct SelectedRowsAddToTensor<platform::GPUPlace, int64_t>;
} // namespace math } // namespace math
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
import numpy as np import numpy as np
from paddle.v2.fluid.framework import Program, g_main_program, unique_name, Variable
import paddle.v2.fluid.core as core
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program, unique_name, \
Variable
from paddle.v2.fluid.layer_helper import LayerHelper
def _clone_var_in_block_(block, var): __all__ = ['Accuracy']
def _clone_var_(block, var):
assert isinstance(var, Variable) assert isinstance(var, Variable)
return block.create_var( return block.create_var(
name=var.name, name=var.name,
...@@ -16,175 +21,115 @@ def _clone_var_in_block_(block, var): ...@@ -16,175 +21,115 @@ def _clone_var_in_block_(block, var):
class Evaluator(object): class Evaluator(object):
""" """
Evalutor Base class. Base Class for all evaluators
create metric states Args:
add mini-batch evaluator caculate operator name(str): The name of evaluator. such as, "accuracy". Used for generate
add increment operator to accumulate the metric states temporary variable name.
main_program(Program, optional): The evaluator should be added to this
main_program. Default g_main_program
startup_program(Program, optional):The parameter should be added to this
startup_program. Default g_startup_program
Attributes:
states(list): The list of state variables. states will be reset to zero
when `reset` is invoked.
metrics(list): The list of metrics variables. They will be calculate
every mini-batch
""" """
def __init__(self, name, **kwargs): def __init__(self, name, **kwargs):
self.states = []
self.metrics = []
self.helper = LayerHelper(name, **kwargs)
def reset(self, executor, reset_program=None):
""" """
init the global states reset metric states at the begin of each pass/user specified batch
""" """
self._states = {} if reset_program is None:
if kwargs.has_key("main_program"): reset_program = Program()
self._main_program = kwargs.get("main_program")
else:
self._main_program = g_main_program
def states(self): for var in self.states:
return self._states assert isinstance(var, Variable)
g_var = _clone_var_(reset_program.current_block(), var)
layers.fill_constant(
shape=g_var.shape,
value=0.0,
dtype=g_var.dtype,
out=g_var,
main_program=reset_program)
executor.run(reset_program)
def _update_ops(self, *args, **kwargs): def eval(self, executor, eval_program=None):
""" """
append update ops to the global states Evaluate the statistics merged by multiple mini-batches.
""" """
raise NotImplementedError() raise NotImplementedError()
def reset(self, executor, reset_program=None): def create_state(self, suffix, dtype, shape):
"""
Clear metric states at the begin of each pass/user specified batch
""" """
if reset_program == None: Create state variable.
reset_program = Program()
else: NOTE: It is not a public API.
reset_program = program
block = reset_program.global_block() Args:
for k, var in self._states.iteritems(): suffix(str): the state suffix.
g_var = _clone_var_in_block_(block, var) dtype(str|core.DataType): the state data type
zeros = block.create_var(dtype="float32", persistable=True) shape(tuple|list): the shape of state
block.append_op(
type="fill_constant", Returns: State variable
outputs={"Out": [zeros]},
attrs={
"shape": g_var.shape,
"value": .0,
"dtype": 5,
})
block.append_op(
type="scale", inputs={"X": zeros}, outputs={"Out": g_var})
executor.run(reset_program, fetch_list=self._states.values())
def eval(self, executor, eval_program=None):
"""
Merge the mini-batch statistics to form the evaluation result for multiple mini-batches.
""" """
raise NotImplementedError() state = self.helper.create_variable(
name="_".join([unique_name(self.helper.name), suffix]),
persistable=True,
dtype=dtype,
shape=shape)
self.states.append(state)
return state
class Accuracy(Evaluator): class Accuracy(Evaluator):
""" """
Accuracy need two state variable Total, Correct Average Accuracy for multiple mini-batches.
""" """
def __init__(self, *args, **kwargs): def __init__(self, input, label, k=1, **kwargs):
super(Accuracy, self).__init__("accuracy", **kwargs) super(Accuracy, self).__init__("accuracy", **kwargs)
block = self._main_program.global_block() main_program = self.helper.main_program
g_total = block.create_var( if main_program.current_block().idx != 0:
name=unique_name("Total"), raise ValueError("You can only invoke Evaluator in root block")
persistable=True,
dtype="int64", self.total = self.create_state(dtype='int64', shape=[1], suffix='total')
shape=[1]) self.correct = self.create_state(
g_correct = block.create_var( dtype='int64', shape=[1], suffix='correct')
name=unique_name("Correct"), kwargs = {'main_program': main_program}
persistable=True, total = self.helper.create_tmp_variable(dtype='int')
dtype="int64", correct = self.helper.create_tmp_variable(dtype='int')
shape=[1]) acc = layers.accuracy(
self._states["Total"] = g_total input=input,
self._states["Correct"] = g_correct label=label,
k=k,
def _update_ops(self, input, label, k=1, **kwargs): total=total,
block = self._main_program.global_block() correct=correct,
topk_out = block.create_var(dtype=input.dtype) **kwargs)
topk_indices = block.create_var(dtype="int64") total = layers.cast(x=total, dtype='int64', **kwargs)
block.append_op( correct = layers.cast(x=correct, dtype='int64', **kwargs)
type="top_k", layers.sums(input=[self.total, total], out=self.total, **kwargs)
inputs={"X": [input]}, layers.sums(input=[self.correct, correct], out=self.correct, **kwargs)
outputs={"Out": [topk_out],
"Indices": [topk_indices]}, self.metrics.append(acc)
attrs={"k": k})
acc_out = block.create_var(dtype=kwargs.get("out_dtype", "float32"))
correct = block.create_var(dtype="int64", persistable=True)
total = block.create_var(dtype="int64", persistable=True)
block.append_op(
type="accuracy",
inputs={
"Out": [topk_out],
"Indices": [topk_indices],
"Label": [label]
},
outputs={
"Accuracy": [acc_out],
"Correct": [correct],
"Total": [total],
})
block.append_op(
type="cast",
inputs={"X": [self._states["Total"]]},
outputs={"Out": [self._states["Total"]]},
attrs={
"in_dtype": 5, # float32
"out_dtype": 2, # int32
})
block.append_op(
type="cast",
inputs={"X": [self._states["Correct"]]},
outputs={"Out": [self._states["Correct"]]},
attrs={
"in_dtype": 5,
"out_dtype": 2,
})
block.append_op(
type="elementwise_add",
inputs={"X": [self._states["Total"]],
"Y": [total]},
outputs={"Out": [self._states["Total"]]})
block.append_op(
type="elementwise_add",
inputs={"X": [self._states["Correct"]],
"Y": [correct]},
outputs={"Out": [self._states["Correct"]]})
return acc_out
def eval(self, executor, eval_program=None): def eval(self, executor, eval_program=None):
if eval_program != None: if eval_program is None:
eval_program = eval_program
else:
eval_program = Program() eval_program = Program()
block = eval_program.global_block() block = eval_program.current_block()
eval_out = block.create_var(dtype=self._states["Total"].dtype) kwargs = {'main_program': eval_program}
e_total = _clone_var_in_block_(block, self._states["Total"]) total = _clone_var_(block, self.total)
e_correct = _clone_var_in_block_(block, self._states["Correct"]) correct = _clone_var_(block, self.correct)
block.append_op( total = layers.cast(total, dtype='float32', **kwargs)
type="cast", correct = layers.cast(correct, dtype='float32', **kwargs)
inputs={"X": [e_total]}, out = layers.elementwise_div(x=correct, y=total, **kwargs)
outputs={"Out": [e_total]}, return np.array(executor.run(eval_program, fetch_list=[out])[0])
attrs={
"in_dtype": 2, # int32
"out_dtype": 5, # float32
})
block.append_op(
type="cast",
inputs={"X": [e_correct]},
outputs={"Out": [e_correct]},
attrs={
"in_dtype": 2,
"out_dtype": 5,
})
block.append_op(
type="elementwise_div",
inputs={"X": e_correct,
"Y": e_total},
outputs={"Out": eval_out})
out = executor.run(eval_program, fetch_list=[eval_out])
return np.array(out[0])
def accuracy(*args, **kwargs):
cls = Accuracy(*args, **kwargs)
out = cls._update_ops(*args, **kwargs)
return cls, out
...@@ -418,6 +418,7 @@ def _create_op_func_(op_type): ...@@ -418,6 +418,7 @@ def _create_op_func_(op_type):
_create_op_func_('mean') _create_op_func_('mean')
_create_op_func_('mul') _create_op_func_('mul')
_create_op_func_('elementwise_add') _create_op_func_('elementwise_add')
_create_op_func_('elementwise_div')
_create_op_func_('dropout') _create_op_func_('dropout')
_create_op_func_('reshape') _create_op_func_('reshape')
_create_op_func_('sigmoid') _create_op_func_('sigmoid')
...@@ -457,12 +458,13 @@ def concat(input, axis, main_program=None, startup_program=None): ...@@ -457,12 +458,13 @@ def concat(input, axis, main_program=None, startup_program=None):
return out return out
def sums(input, main_program=None, startup_program=None): def sums(input, out=None, main_program=None, startup_program=None):
""" """
This function takes in the input and performs the sum operation on it This function takes in the input and performs the sum operation on it
and returns that as the output. and returns that as the output.
""" """
helper = LayerHelper('sum', **locals()) helper = LayerHelper('sum', **locals())
if out is None:
out = helper.create_tmp_variable(dtype=helper.input_dtype()) out = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(type='sum', inputs={'X': input}, outputs={'Out': out}) helper.append_op(type='sum', inputs={'X': input}, outputs={'Out': out})
return out return out
...@@ -606,7 +608,7 @@ def square_error_cost(input, label, **kwargs): ...@@ -606,7 +608,7 @@ def square_error_cost(input, label, **kwargs):
return square_out return square_out
def accuracy(input, label, k=1, **kwargs): def accuracy(input, label, k=1, correct=None, total=None, **kwargs):
""" """
This function computes the accuracy using the input and label. This function computes the accuracy using the input and label.
The output is the top_k inputs and their indices. The output is the top_k inputs and their indices.
...@@ -620,9 +622,10 @@ def accuracy(input, label, k=1, **kwargs): ...@@ -620,9 +622,10 @@ def accuracy(input, label, k=1, **kwargs):
outputs={"Out": [topk_out], outputs={"Out": [topk_out],
"Indices": [topk_indices]}, "Indices": [topk_indices]},
attrs={"k": k}) attrs={"k": k})
acc_out_dtype = kwargs.get("out_dtype", "float32")
acc_out = helper.create_tmp_variable(dtype="float32") acc_out = helper.create_tmp_variable(dtype="float32")
if correct is None:
correct = helper.create_tmp_variable(dtype="int64") correct = helper.create_tmp_variable(dtype="int64")
if total is None:
total = helper.create_tmp_variable(dtype="int64") total = helper.create_tmp_variable(dtype="int64")
helper.append_op( helper.append_op(
type="accuracy", type="accuracy",
...@@ -1355,6 +1358,19 @@ def lod_rank_table(x, level=0, main_program=None): ...@@ -1355,6 +1358,19 @@ def lod_rank_table(x, level=0, main_program=None):
return table return table
def topk(input, k, main_program=None, startup_program=None):
helper = LayerHelper('topk', **locals())
topk_out = helper.create_tmp_variable(dtype=input.data_type)
topk_indices = helper.create_tmp_variable(dtype='int64')
helper.append_op(
type='top_k',
inputs={'X': [input]},
outputs={'Out': [topk_out],
'Indices': [topk_indices]},
attrs={'k': k})
return topk_out, topk_indices
def lod_tensor_to_array(x, table, main_program=None): def lod_tensor_to_array(x, table, main_program=None):
""" """
This function creates an operator to convert an LOD_Tensor to This function creates an operator to convert an LOD_Tensor to
...@@ -1388,13 +1404,19 @@ def array_to_lod_tensor(x, table, main_program=None): ...@@ -1388,13 +1404,19 @@ def array_to_lod_tensor(x, table, main_program=None):
return tmp return tmp
def fill_constant(shape, dtype, value, main_program=None, startup_program=None): def fill_constant(shape,
dtype,
value,
out=None,
main_program=None,
startup_program=None):
""" """
This function creates a tensor , with shape as mentioned in the input and This function creates a tensor , with shape as mentioned in the input and
specified dtype and fills this up with a constant value that specified dtype and fills this up with a constant value that
comes in the input. It also sets the stop_gradient to be True. comes in the input. It also sets the stop_gradient to be True.
""" """
helper = LayerHelper("fill_constant", **locals()) helper = LayerHelper("fill_constant", **locals())
if out is None:
out = helper.create_tmp_variable(dtype=dtype) out = helper.create_tmp_variable(dtype=dtype)
helper.append_op( helper.append_op(
type='fill_constant', type='fill_constant',
......
...@@ -5,7 +5,6 @@ import paddle.v2.fluid.framework as framework ...@@ -5,7 +5,6 @@ import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.evaluator as evaluator import paddle.v2.fluid.evaluator as evaluator
from paddle.v2.fluid.io import get_inference_program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.initializer import XavierInitializer from paddle.v2.fluid.initializer import XavierInitializer
from paddle.v2.fluid.optimizer import AdamOptimizer from paddle.v2.fluid.optimizer import AdamOptimizer
...@@ -110,18 +109,16 @@ avg_cost = layers.mean(x=cost) ...@@ -110,18 +109,16 @@ avg_cost = layers.mean(x=cost)
optimizer = AdamOptimizer(learning_rate=0.001) optimizer = AdamOptimizer(learning_rate=0.001)
opts = optimizer.minimize(avg_cost) opts = optimizer.minimize(avg_cost)
accuracy, acc_out = evaluator.accuracy(input=predict, label=label) accuracy = evaluator.Accuracy(input=predict, label=label)
BATCH_SIZE = 128 BATCH_SIZE = 128
PASS_NUM = 1 PASS_NUM = 1
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
paddle.dataset.cifar.train10(), buf_size=BATCH_SIZE * 10), paddle.dataset.cifar.train10(), buf_size=128 * 10),
batch_size=BATCH_SIZE) batch_size=BATCH_SIZE)
test_reader = paddle.batch(paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE)
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
...@@ -147,46 +144,15 @@ for pass_id in range(PASS_NUM): ...@@ -147,46 +144,15 @@ for pass_id in range(PASS_NUM):
outs = exe.run(framework.default_main_program(), outs = exe.run(framework.default_main_program(),
feed={"pixel": tensor_img, feed={"pixel": tensor_img,
"label": tensor_y}, "label": tensor_y},
fetch_list=[avg_cost, acc_out]) fetch_list=[avg_cost] + accuracy.metrics)
loss = np.array(outs[0]) loss = np.array(outs[0])
acc = np.array(outs[1]) acc = np.array(outs[1])
pass_acc = accuracy.eval(exe) pass_acc = accuracy.eval(exe)
batch_id = batch_id + 1
test_accuracy, test_acc_out = evaluator.accuracy(
input=predict, label=label)
test_target = [avg_cost, test_acc_out] + test_accuracy.states().values()
inference_program = get_inference_program(test_target)
test_accuracy.reset(exe)
for data in test_reader():
x_data = np.array(map(lambda x: x[0].reshape(data_shape),
data)).astype("float32")
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = np.expand_dims(y_data, axis=1)
tensor_x = core.LoDTensor()
tensor_x.set(x_data, place)
tensor_y = core.LoDTensor()
tensor_y.set(y_data, place)
outs = exe.run(inference_program,
feed={'pixel': tensor_x,
'label': tensor_y},
fetch_list=[avg_cost, test_acc_out])
out = np.array(outs[0])
acc = np.array(outs[1])
test_pass_acc = test_accuracy.eval(exe)
print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) + print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) +
" loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str( " loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str(
pass_acc) + " test_pass_acc:" + str(test_pass_acc)) pass_acc))
batch_id = batch_id + 1
if batch_id > 1: if batch_id > 1:
# this model is slow, so if we can train two mini batch, we think it works properly. # this model is slow, so if we can train two mini batch, we think it works properly.
......
...@@ -31,7 +31,7 @@ avg_cost = layers.mean(x=cost) ...@@ -31,7 +31,7 @@ avg_cost = layers.mean(x=cost)
optimizer = AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999) optimizer = AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999)
opts = optimizer.minimize(avg_cost) opts = optimizer.minimize(avg_cost)
accuracy, acc_out = evaluator.accuracy(input=predict, label=label) accuracy = evaluator.Accuracy(input=predict, label=label)
BATCH_SIZE = 50 BATCH_SIZE = 50
PASS_NUM = 3 PASS_NUM = 3
...@@ -61,7 +61,7 @@ for pass_id in range(PASS_NUM): ...@@ -61,7 +61,7 @@ for pass_id in range(PASS_NUM):
outs = exe.run(framework.default_main_program(), outs = exe.run(framework.default_main_program(),
feed={"pixel": tensor_img, feed={"pixel": tensor_img,
"label": tensor_y}, "label": tensor_y},
fetch_list=[avg_cost, acc_out]) fetch_list=[avg_cost] + accuracy.metrics)
loss = np.array(outs[0]) loss = np.array(outs[0])
acc = np.array(outs[1]) acc = np.array(outs[1])
pass_acc = accuracy.eval(exe) pass_acc = accuracy.eval(exe)
......
...@@ -36,7 +36,7 @@ avg_cost = layers.mean(x=cost) ...@@ -36,7 +36,7 @@ avg_cost = layers.mean(x=cost)
optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
opts = optimizer.minimize(avg_cost) opts = optimizer.minimize(avg_cost)
accuracy, acc_out = evaluator.accuracy(input=predict, label=label) accuracy = evaluator.Accuracy(input=predict, label=label)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -67,15 +67,14 @@ for pass_id in range(PASS_NUM): ...@@ -67,15 +67,14 @@ for pass_id in range(PASS_NUM):
outs = exe.run(framework.default_main_program(), outs = exe.run(framework.default_main_program(),
feed={'x': tensor_x, feed={'x': tensor_x,
'y': tensor_y}, 'y': tensor_y},
fetch_list=[avg_cost, acc_out]) fetch_list=[avg_cost] + accuracy.metrics)
out = np.array(outs[0]) out = np.array(outs[0])
acc = np.array(outs[1]) acc = np.array(outs[1])
pass_acc = accuracy.eval(exe) pass_acc = accuracy.eval(exe)
test_accuracy, test_acc_out = evaluator.accuracy( test_accuracy = evaluator.Accuracy(input=predict, label=label)
input=predict, label=label)
test_target = [avg_cost, test_acc_out] + test_accuracy.states().values() test_target = [avg_cost] + test_accuracy.metrics + test_accuracy.states
inference_program = get_inference_program(test_target) inference_program = get_inference_program(test_target)
test_accuracy.reset(exe) test_accuracy.reset(exe)
...@@ -93,7 +92,7 @@ for pass_id in range(PASS_NUM): ...@@ -93,7 +92,7 @@ for pass_id in range(PASS_NUM):
outs = exe.run(inference_program, outs = exe.run(inference_program,
feed={'x': tensor_x, feed={'x': tensor_x,
'y': tensor_y}, 'y': tensor_y},
fetch_list=[avg_cost, test_acc_out]) fetch_list=[avg_cost] + test_accuracy.metrics)
out = np.array(outs[0]) out = np.array(outs[0])
acc = np.array(outs[1]) acc = np.array(outs[1])
......
...@@ -32,9 +32,9 @@ def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32): ...@@ -32,9 +32,9 @@ def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32):
cost = layers.cross_entropy(input=prediction, label=label) cost = layers.cross_entropy(input=prediction, label=label)
avg_cost = layers.mean(x=cost) avg_cost = layers.mean(x=cost)
adam_optimizer = AdamOptimizer(learning_rate=0.002) adam_optimizer = AdamOptimizer(learning_rate=0.002)
opts = adam_optimizer.minimize(avg_cost) adam_optimizer.minimize(avg_cost)
accuracy, acc_out = evaluator.accuracy(input=prediction, label=label) accuracy = evaluator.Accuracy(input=prediction, label=label)
return avg_cost, accuracy, acc_out return avg_cost, accuracy, accuracy.metrics[0]
def to_lodtensor(data, place): def to_lodtensor(data, place):
......
...@@ -41,9 +41,9 @@ def stacked_lstm_net(input_dim, ...@@ -41,9 +41,9 @@ def stacked_lstm_net(input_dim,
cost = layers.cross_entropy(input=prediction, label=label) cost = layers.cross_entropy(input=prediction, label=label)
avg_cost = layers.mean(x=cost) avg_cost = layers.mean(x=cost)
adam_optimizer = AdamOptimizer(learning_rate=0.002) adam_optimizer = AdamOptimizer(learning_rate=0.002)
opts = adam_optimizer.minimize(avg_cost) adam_optimizer.minimize(avg_cost)
accuracy, acc_out = evaluator.accuracy(input=prediction, label=label) accuracy = evaluator.Accuracy(input=prediction, label=label)
return avg_cost, accuracy, acc_out return avg_cost, accuracy, accuracy.metrics[0]
def to_lodtensor(data, place): def to_lodtensor(data, place):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册