提交 b5c17665 编写于 作者: W wanghaoshuang

1. Add eval script

2. Add inference script
3. Add load model script
4. Add some functions into ctc_reader
上级 f1fe166c
import paddle.v2.fluid as fluid import paddle.fluid as fluid
def conv_bn_pool(input, def conv_bn_pool(input,
...@@ -7,7 +7,8 @@ def conv_bn_pool(input, ...@@ -7,7 +7,8 @@ def conv_bn_pool(input,
act="relu", act="relu",
param=None, param=None,
bias=None, bias=None,
param_0=None): param_0=None,
is_test=False):
tmp = input tmp = input
for i in xrange(group): for i in xrange(group):
tmp = fluid.layers.conv2d( tmp = fluid.layers.conv2d(
...@@ -19,14 +20,23 @@ def conv_bn_pool(input, ...@@ -19,14 +20,23 @@ def conv_bn_pool(input,
act=None, # LinearActivation act=None, # LinearActivation
use_cudnn=True) use_cudnn=True)
tmp = fluid.layers.batch_norm( tmp = fluid.layers.batch_norm(
input=tmp, act=act, param_attr=param, bias_attr=bias) input=tmp,
act=act,
param_attr=param,
bias_attr=bias,
is_test=is_test)
tmp = fluid.layers.pool2d( tmp = fluid.layers.pool2d(
input=tmp, pool_size=2, pool_type='max', pool_stride=2, use_cudnn=True) input=tmp, pool_size=2, pool_type='max', pool_stride=2, use_cudnn=True)
return tmp return tmp
def ocr_convs(input, num, with_bn, regularizer=None, gradient_clip=None): def ocr_convs(input,
num,
with_bn,
regularizer=None,
gradient_clip=None,
is_test=False):
assert (num % 4 == 0) assert (num % 4 == 0)
b = fluid.ParamAttr( b = fluid.ParamAttr(
...@@ -42,10 +52,11 @@ def ocr_convs(input, num, with_bn, regularizer=None, gradient_clip=None): ...@@ -42,10 +52,11 @@ def ocr_convs(input, num, with_bn, regularizer=None, gradient_clip=None):
gradient_clip=gradient_clip, gradient_clip=gradient_clip,
initializer=fluid.initializer.Normal(0.0, 0.01)) initializer=fluid.initializer.Normal(0.0, 0.01))
tmp = input tmp = input
tmp = conv_bn_pool(tmp, 2, [16, 16], param=w1, bias=b, param_0=w0) tmp = conv_bn_pool(
tmp = conv_bn_pool(tmp, 2, [32, 32], param=w1, bias=b) tmp, 2, [16, 16], param=w1, bias=b, param_0=w0, is_test=is_test)
tmp = conv_bn_pool(tmp, 2, [64, 64], param=w1, bias=b) tmp = conv_bn_pool(tmp, 2, [32, 32], param=w1, bias=b, is_test=is_test)
tmp = conv_bn_pool(tmp, 2, [128, 128], param=w1, bias=b) tmp = conv_bn_pool(tmp, 2, [64, 64], param=w1, bias=b, is_test=is_test)
tmp = conv_bn_pool(tmp, 2, [128, 128], param=w1, bias=b, is_test=is_test)
return tmp return tmp
...@@ -53,9 +64,15 @@ def encoder_net(images, ...@@ -53,9 +64,15 @@ def encoder_net(images,
num_classes, num_classes,
rnn_hidden_size=200, rnn_hidden_size=200,
regularizer=None, regularizer=None,
gradient_clip=None): gradient_clip=None,
is_test=False):
conv_features = ocr_convs( conv_features = ocr_convs(
images, 8, True, regularizer=regularizer, gradient_clip=gradient_clip) images,
8,
True,
regularizer=regularizer,
gradient_clip=gradient_clip,
is_test=is_test)
sliced_feature = fluid.layers.im2sequence( sliced_feature = fluid.layers.im2sequence(
input=conv_features, input=conv_features,
stride=[1, 1], stride=[1, 1],
...@@ -111,12 +128,12 @@ def encoder_net(images, ...@@ -111,12 +128,12 @@ def encoder_net(images,
size=num_classes + 1, size=num_classes + 1,
param_attr=w_attr, param_attr=w_attr,
bias_attr=b_attr) bias_attr=b_attr)
return fc_out return fc_out
def ctc_train_net(images, label, args, num_classes): def ctc_train_net(images, label, args, num_classes):
regularizer = fluid.regularizer.L2Decay(args.l2) regularizer = fluid.regularizer.L2Decay(args.l2)
# gradient_clip=fluid.clip.GradientClipByValue(args.max_clip, args.min_clip)
gradient_clip = None gradient_clip = None
fc_out = encoder_net( fc_out = encoder_net(
images, images,
...@@ -127,7 +144,7 @@ def ctc_train_net(images, label, args, num_classes): ...@@ -127,7 +144,7 @@ def ctc_train_net(images, label, args, num_classes):
cost = fluid.layers.warpctc( cost = fluid.layers.warpctc(
input=fc_out, input=fc_out,
label=label, label=label,
size=num_classes + 1, # size=num_classes + 1,
blank=num_classes, blank=num_classes,
norm_by_times=True) norm_by_times=True)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(x=cost)
...@@ -142,3 +159,27 @@ def ctc_train_net(images, label, args, num_classes): ...@@ -142,3 +159,27 @@ def ctc_train_net(images, label, args, num_classes):
error_evaluator = fluid.evaluator.EditDistance( error_evaluator = fluid.evaluator.EditDistance(
input=decoded_out, label=casted_label) input=decoded_out, label=casted_label)
return avg_cost, error_evaluator return avg_cost, error_evaluator
def ctc_infer(images, num_classes):
fc_out = encoder_net(images, num_classes, is_test=True)
return fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes)
def ctc_eval(images, label, num_classes):
fc_out = encoder_net(images, num_classes, is_test=True)
decoded_out = fluid.layers.ctc_greedy_decoder(
input=fc_out, blank=num_classes)
casted_label = fluid.layers.cast(x=label, dtype='int64')
error_evaluator = fluid.evaluator.EditDistance(
input=decoded_out, label=casted_label)
cost = fluid.layers.warpctc(
input=fc_out,
label=label,
#size=num_classes + 1,
blank=num_classes,
norm_by_times=True)
return error_evaluator, cost
...@@ -4,6 +4,10 @@ import numpy as np ...@@ -4,6 +4,10 @@ import numpy as np
from PIL import Image from PIL import Image
from paddle.v2.image import load_image from paddle.v2.image import load_image
import paddle.v2 as paddle
NUM_CLASSES = 10784
DATA_SHAPE = [1, 48, 512]
class DataGenerator(object): class DataGenerator(object):
...@@ -95,3 +99,28 @@ class DataGenerator(object): ...@@ -95,3 +99,28 @@ class DataGenerator(object):
yield img, label yield img, label
return reader return reader
def num_classes():
return NUM_CLASSES
def data_shape():
return DATA_SHAPE
def train(batch_size):
generator = DataGenerator()
return generator.train_reader(
"/home/disk1/wanghaoshuang/models/fluid/ocr_recognition/data/train_images/",
"/home/disk1/wanghaoshuang/models/fluid/ocr_recognition/data/train.list",
batch_size)
def test(batch_size=1):
generator = DataGenerator()
return paddle.batch(
generator.test_reader(
"/home/disk1/wanghaoshuang/models/fluid/ocr_recognition/data/test_images/",
"/home/disk1/wanghaoshuang/models/fluid/ocr_recognition/data/test.list"
), batch_size)
"""Trainer for OCR CTC model.""" """Trainer for OCR CTC model."""
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import dummy_reader import dummy_reader
import ctc_reader
import argparse import argparse
from load_model import load_param
import functools import functools
import sys import sys
from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
...@@ -12,7 +14,7 @@ parser = argparse.ArgumentParser(description=__doc__) ...@@ -12,7 +14,7 @@ parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
add_arg('batch_size', int, 32, "Minibatch size.") add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('pass_num', int, 32, "# of training epochs.") add_arg('pass_num', int, 100, "# of training epochs.")
add_arg('log_period', int, 1000, "Log period.") add_arg('log_period', int, 1000, "Log period.")
add_arg('learning_rate', float, 1.0e-3, "Learning rate.") add_arg('learning_rate', float, 1.0e-3, "Learning rate.")
add_arg('l2', float, 0.0004, "L2 regularizer.") add_arg('l2', float, 0.0004, "L2 regularizer.")
...@@ -24,6 +26,15 @@ add_arg('device', int, 0, "Device id.'-1' means running on CPU" ...@@ -24,6 +26,15 @@ add_arg('device', int, 0, "Device id.'-1' means running on CPU"
"while '0' means GPU-0.") "while '0' means GPU-0.")
# yapf: disable # yapf: disable
def load_parameter(place):
params = load_param('./name.map', './data/model/results_without_avg_window/pass-00000/')
for name in params:
# print "param: %s" % name
t = fluid.global_scope().find_var(name).get_tensor()
t.set(params[name], place)
def train(args, data_reader=dummy_reader): def train(args, data_reader=dummy_reader):
"""OCR CTC training""" """OCR CTC training"""
num_classes = data_reader.num_classes() num_classes = data_reader.num_classes()
...@@ -42,38 +53,54 @@ def train(args, data_reader=dummy_reader): ...@@ -42,38 +53,54 @@ def train(args, data_reader=dummy_reader):
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
load_parameter(place)
inference_program = fluid.io.get_inference_program(error_evaluator) inference_program = fluid.io.get_inference_program(error_evaluator)
# evaluate model on test data
error_evaluator.reset(exe)
for data in test_reader():
exe.run(inference_program, feed=get_feeder_data(data, place))
_, test_seq_error = error_evaluator.eval(exe)
print "\nEnd pass[%d]; Test seq error: %s.\n" % (
-1, str(test_seq_error[0]))
for pass_id in range(args.pass_num): for pass_id in range(args.pass_num):
error_evaluator.reset(exe) error_evaluator.reset(exe)
batch_id = 0 batch_id = 1
total_loss = 0.0
total_seq_error = 0.0
# train a pass # train a pass
for data in train_reader(): for data in train_reader():
loss, batch_edit_distance = exe.run( batch_loss, _, batch_seq_error = exe.run(
fluid.default_main_program(), fluid.default_main_program(),
feed=get_feeder_data(data, place), feed=get_feeder_data(data, place),
fetch_list=[avg_cost] + error_evaluator.metrics) fetch_list=[avg_cost] + error_evaluator.metrics)
if batch_id % args.log_period == 0: total_loss += batch_loss[0]
print "Pass[%d]-batch[%d]; Loss: %s; Word error: %s." % ( total_seq_error += batch_seq_error[0]
pass_id, batch_id, loss[0], batch_edit_distance[0] / float(args.batch_size)) if batch_id % 10 == 1:
print '.',
sys.stdout.flush()
if batch_id % args.log_period == 1:
print "\nPass[%d]-batch[%d]; Avg Warp-CTC loss: %s; Avg seq error: %s." % (
pass_id, batch_id, total_loss / batch_id, total_seq_error / (batch_id * args.batch_size))
sys.stdout.flush() sys.stdout.flush()
batch_id += 1 batch_id += 1
train_edit_distance = error_evaluator.eval(exe)
print "End pass[%d]; Train word error: %s.\n" % (
pass_id, str(train_edit_distance[0]))
# evaluate model on test data # evaluate model on test data
error_evaluator.reset(exe) error_evaluator.reset(exe)
for data in test_reader(): for data in test_reader():
exe.run(inference_program, feed=get_feeder_data(data, place)) exe.run(inference_program, feed=get_feeder_data(data, place))
test_edit_distance = error_evaluator.eval(exe) _, test_seq_error = error_evaluator.eval(exe)
print "End pass[%d]; Test word error: %s.\n" % ( print "\nEnd pass[%d]; Test seq error: %s.\n" % (
pass_id, str(test_edit_distance[0])) pass_id, str(test_seq_error[0]))
def main(): def main():
args = parser.parse_args() args = parser.parse_args()
print_arguments(args) print_arguments(args)
train(args, data_reader=dummy_reader) train(args, data_reader=ctc_reader)
if __name__ == "__main__": if __name__ == "__main__":
main() main()
...@@ -32,7 +32,7 @@ def _read_creater(num_sample=1024, min_seq_len=1, max_seq_len=10): ...@@ -32,7 +32,7 @@ def _read_creater(num_sample=1024, min_seq_len=1, max_seq_len=10):
return reader return reader
def train(batch_size, num_sample=16): def train(batch_size, num_sample=128):
"""Get train dataset reader.""" """Get train dataset reader."""
return paddle.batch(_read_creater(num_sample=num_sample), batch_size) return paddle.batch(_read_creater(num_sample=num_sample), batch_size)
......
import paddle.v2 as paddle
import paddle.fluid as fluid
from load_model import load_param
from utility import get_feeder_data
from crnn_ctc_model import ctc_eval
import ctc_reader
import dummy_reader
def load_parameter(place):
params = load_param('./name.map', './data/model/results/pass-00062/')
for name in params:
print "param: %s" % name
t = fluid.global_scope().find_var(name).get_tensor()
t.set(params[name], place)
def evaluate(eval=ctc_eval, data_reader=dummy_reader):
"""OCR inference"""
num_classes = data_reader.num_classes()
data_shape = data_reader.data_shape()
# define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int32', lod_level=1)
evaluator, cost = eval(images, label, num_classes)
# data reader
test_reader = data_reader.test()
# prepare environment
place = fluid.CUDAPlace(0)
#place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
print fluid.default_main_program()
load_parameter(place)
evaluator.reset(exe)
count = 0
for data in test_reader():
count += 1
print 'Process samples: %d\r' % (count, ),
result, avg_distance, avg_seq_error = exe.run(
fluid.default_main_program(),
feed=get_feeder_data(data, place),
fetch_list=[cost] + evaluator.metrics)
avg_distance, avg_seq_error = evaluator.eval(exe)
print "avg_distance: %s; avg_seq_error: %s" % (avg_distance, avg_seq_error)
def main():
evaluate(data_reader=ctc_reader)
if __name__ == "__main__":
main()
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
from load_model import load_param
from utility import get_feeder_data
from crnn_ctc_model import ctc_infer
import ctc_reader
import dummy_reader
def load_parameter(place):
params = load_param('./name.map', './data/model/results/pass-00062/')
for name in params:
print "param: %s" % name
t = fluid.global_scope().find_var(name).get_tensor()
t.set(params[name], place)
def inference(infer=ctc_infer, data_reader=dummy_reader):
"""OCR inference"""
num_classes = data_reader.num_classes()
data_shape = data_reader.data_shape()
# define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
sequence, tmp = infer(images, num_classes)
fluid.layers.Print(tmp)
# data reader
test_reader = data_reader.test()
# prepare environment
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
load_parameter(place)
for data in test_reader():
result = exe.run(fluid.default_main_program(),
feed=get_feeder_data(
data, place, need_label=False),
fetch_list=[tmp])
print "result: %s" % (list(result[0].flatten()), )
def main():
inference(data_reader=ctc_reader)
if __name__ == "__main__":
main()
import sys
import numpy as np
import ast
def load_parameter(file_name):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32)
def load_param(name_map_file, old_param_dir):
result = {}
name_map = {}
shape_map = {}
with open(name_map_file, 'r') as map_file:
for param in map_file:
old_name, new_name, shape = param.strip().split('=')
name_map[new_name] = old_name
shape_map[new_name] = ast.literal_eval(shape)
for new_name in name_map:
result[new_name] = load_parameter("/".join(
[old_param_dir, name_map[new_name]])).reshape(shape_map[new_name])
return result
if __name__ == "__main__":
name_map_file = "./name.map"
old_param_dir = "./data/model/results/pass-00062/"
result = load_param(name_map_file, old_param_dir)
for p in result:
print "name: %s; param.shape: %s" % (p, result[p].shape)
...@@ -18,7 +18,7 @@ from __future__ import division ...@@ -18,7 +18,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import distutils.util import distutils.util
import numpy as np import numpy as np
from paddle.v2.fluid import core from paddle.fluid import core
def print_arguments(args): def print_arguments(args):
...@@ -77,11 +77,14 @@ def to_lodtensor(data, place): ...@@ -77,11 +77,14 @@ def to_lodtensor(data, place):
return res return res
def get_feeder_data(data, place): def get_feeder_data(data, place, need_label=True):
pixel_tensor = core.LoDTensor() pixel_tensor = core.LoDTensor()
pixel_data = None pixel_data = None
pixel_data = np.concatenate( pixel_data = np.concatenate(
map(lambda x: x[0][np.newaxis, :], data), axis=0).astype("float32") map(lambda x: x[0][np.newaxis, :], data), axis=0).astype("float32")
pixel_tensor.set(pixel_data, place) pixel_tensor.set(pixel_data, place)
label_tensor = to_lodtensor(map(lambda x: x[1], data), place) label_tensor = to_lodtensor(map(lambda x: x[1], data), place)
if need_label:
return {"pixel": pixel_tensor, "label": label_tensor} return {"pixel": pixel_tensor, "label": label_tensor}
else:
return {"pixel": pixel_tensor}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册