提交 b5c17665 编写于 作者: W wanghaoshuang

1. Add eval script

2. Add inference script
3. Add load model script
4. Add some functions into ctc_reader
上级 f1fe166c
import paddle.v2.fluid as fluid
import paddle.fluid as fluid
def conv_bn_pool(input,
......@@ -7,7 +7,8 @@ def conv_bn_pool(input,
act="relu",
param=None,
bias=None,
param_0=None):
param_0=None,
is_test=False):
tmp = input
for i in xrange(group):
tmp = fluid.layers.conv2d(
......@@ -19,14 +20,23 @@ def conv_bn_pool(input,
act=None, # LinearActivation
use_cudnn=True)
tmp = fluid.layers.batch_norm(
input=tmp, act=act, param_attr=param, bias_attr=bias)
input=tmp,
act=act,
param_attr=param,
bias_attr=bias,
is_test=is_test)
tmp = fluid.layers.pool2d(
input=tmp, pool_size=2, pool_type='max', pool_stride=2, use_cudnn=True)
return tmp
def ocr_convs(input, num, with_bn, regularizer=None, gradient_clip=None):
def ocr_convs(input,
num,
with_bn,
regularizer=None,
gradient_clip=None,
is_test=False):
assert (num % 4 == 0)
b = fluid.ParamAttr(
......@@ -42,10 +52,11 @@ def ocr_convs(input, num, with_bn, regularizer=None, gradient_clip=None):
gradient_clip=gradient_clip,
initializer=fluid.initializer.Normal(0.0, 0.01))
tmp = input
tmp = conv_bn_pool(tmp, 2, [16, 16], param=w1, bias=b, param_0=w0)
tmp = conv_bn_pool(tmp, 2, [32, 32], param=w1, bias=b)
tmp = conv_bn_pool(tmp, 2, [64, 64], param=w1, bias=b)
tmp = conv_bn_pool(tmp, 2, [128, 128], param=w1, bias=b)
tmp = conv_bn_pool(
tmp, 2, [16, 16], param=w1, bias=b, param_0=w0, is_test=is_test)
tmp = conv_bn_pool(tmp, 2, [32, 32], param=w1, bias=b, is_test=is_test)
tmp = conv_bn_pool(tmp, 2, [64, 64], param=w1, bias=b, is_test=is_test)
tmp = conv_bn_pool(tmp, 2, [128, 128], param=w1, bias=b, is_test=is_test)
return tmp
......@@ -53,9 +64,15 @@ def encoder_net(images,
num_classes,
rnn_hidden_size=200,
regularizer=None,
gradient_clip=None):
gradient_clip=None,
is_test=False):
conv_features = ocr_convs(
images, 8, True, regularizer=regularizer, gradient_clip=gradient_clip)
images,
8,
True,
regularizer=regularizer,
gradient_clip=gradient_clip,
is_test=is_test)
sliced_feature = fluid.layers.im2sequence(
input=conv_features,
stride=[1, 1],
......@@ -111,12 +128,12 @@ def encoder_net(images,
size=num_classes + 1,
param_attr=w_attr,
bias_attr=b_attr)
return fc_out
def ctc_train_net(images, label, args, num_classes):
regularizer = fluid.regularizer.L2Decay(args.l2)
# gradient_clip=fluid.clip.GradientClipByValue(args.max_clip, args.min_clip)
gradient_clip = None
fc_out = encoder_net(
images,
......@@ -127,7 +144,7 @@ def ctc_train_net(images, label, args, num_classes):
cost = fluid.layers.warpctc(
input=fc_out,
label=label,
size=num_classes + 1,
# size=num_classes + 1,
blank=num_classes,
norm_by_times=True)
avg_cost = fluid.layers.mean(x=cost)
......@@ -142,3 +159,27 @@ def ctc_train_net(images, label, args, num_classes):
error_evaluator = fluid.evaluator.EditDistance(
input=decoded_out, label=casted_label)
return avg_cost, error_evaluator
def ctc_infer(images, num_classes):
fc_out = encoder_net(images, num_classes, is_test=True)
return fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes)
def ctc_eval(images, label, num_classes):
fc_out = encoder_net(images, num_classes, is_test=True)
decoded_out = fluid.layers.ctc_greedy_decoder(
input=fc_out, blank=num_classes)
casted_label = fluid.layers.cast(x=label, dtype='int64')
error_evaluator = fluid.evaluator.EditDistance(
input=decoded_out, label=casted_label)
cost = fluid.layers.warpctc(
input=fc_out,
label=label,
#size=num_classes + 1,
blank=num_classes,
norm_by_times=True)
return error_evaluator, cost
......@@ -4,6 +4,10 @@ import numpy as np
from PIL import Image
from paddle.v2.image import load_image
import paddle.v2 as paddle
NUM_CLASSES = 10784
DATA_SHAPE = [1, 48, 512]
class DataGenerator(object):
......@@ -15,10 +19,10 @@ class DataGenerator(object):
Reader interface for training.
:param img_root_dir: The root path of the image for training.
:type file_list: str
:type file_list: str
:param img_label_list: The path of the <image_name, label> file for training.
:type file_list: str
:type file_list: str
'''
......@@ -76,7 +80,7 @@ class DataGenerator(object):
Reader interface for inference.
:param img_root_dir: The root path of the images for training.
:type file_list: str
:type file_list: str
:param img_label_list: The path of the <image_name, label> file for testing.
:type file_list: list
......@@ -95,3 +99,28 @@ class DataGenerator(object):
yield img, label
return reader
def num_classes():
return NUM_CLASSES
def data_shape():
return DATA_SHAPE
def train(batch_size):
generator = DataGenerator()
return generator.train_reader(
"/home/disk1/wanghaoshuang/models/fluid/ocr_recognition/data/train_images/",
"/home/disk1/wanghaoshuang/models/fluid/ocr_recognition/data/train.list",
batch_size)
def test(batch_size=1):
generator = DataGenerator()
return paddle.batch(
generator.test_reader(
"/home/disk1/wanghaoshuang/models/fluid/ocr_recognition/data/test_images/",
"/home/disk1/wanghaoshuang/models/fluid/ocr_recognition/data/test.list"
), batch_size)
"""Trainer for OCR CTC model."""
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import paddle.fluid as fluid
import dummy_reader
import ctc_reader
import argparse
from load_model import load_param
import functools
import sys
from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
......@@ -12,7 +14,7 @@ parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('pass_num', int, 32, "# of training epochs.")
add_arg('pass_num', int, 100, "# of training epochs.")
add_arg('log_period', int, 1000, "Log period.")
add_arg('learning_rate', float, 1.0e-3, "Learning rate.")
add_arg('l2', float, 0.0004, "L2 regularizer.")
......@@ -24,6 +26,15 @@ add_arg('device', int, 0, "Device id.'-1' means running on CPU"
"while '0' means GPU-0.")
# yapf: disable
def load_parameter(place):
params = load_param('./name.map', './data/model/results_without_avg_window/pass-00000/')
for name in params:
# print "param: %s" % name
t = fluid.global_scope().find_var(name).get_tensor()
t.set(params[name], place)
def train(args, data_reader=dummy_reader):
"""OCR CTC training"""
num_classes = data_reader.num_classes()
......@@ -42,38 +53,54 @@ def train(args, data_reader=dummy_reader):
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
load_parameter(place)
inference_program = fluid.io.get_inference_program(error_evaluator)
# evaluate model on test data
error_evaluator.reset(exe)
for data in test_reader():
exe.run(inference_program, feed=get_feeder_data(data, place))
_, test_seq_error = error_evaluator.eval(exe)
print "\nEnd pass[%d]; Test seq error: %s.\n" % (
-1, str(test_seq_error[0]))
for pass_id in range(args.pass_num):
error_evaluator.reset(exe)
batch_id = 0
batch_id = 1
total_loss = 0.0
total_seq_error = 0.0
# train a pass
for data in train_reader():
loss, batch_edit_distance = exe.run(
batch_loss, _, batch_seq_error = exe.run(
fluid.default_main_program(),
feed=get_feeder_data(data, place),
fetch_list=[avg_cost] + error_evaluator.metrics)
if batch_id % args.log_period == 0:
print "Pass[%d]-batch[%d]; Loss: %s; Word error: %s." % (
pass_id, batch_id, loss[0], batch_edit_distance[0] / float(args.batch_size))
total_loss += batch_loss[0]
total_seq_error += batch_seq_error[0]
if batch_id % 10 == 1:
print '.',
sys.stdout.flush()
if batch_id % args.log_period == 1:
print "\nPass[%d]-batch[%d]; Avg Warp-CTC loss: %s; Avg seq error: %s." % (
pass_id, batch_id, total_loss / batch_id, total_seq_error / (batch_id * args.batch_size))
sys.stdout.flush()
batch_id += 1
train_edit_distance = error_evaluator.eval(exe)
print "End pass[%d]; Train word error: %s.\n" % (
pass_id, str(train_edit_distance[0]))
# evaluate model on test data
error_evaluator.reset(exe)
for data in test_reader():
exe.run(inference_program, feed=get_feeder_data(data, place))
test_edit_distance = error_evaluator.eval(exe)
print "End pass[%d]; Test word error: %s.\n" % (
pass_id, str(test_edit_distance[0]))
_, test_seq_error = error_evaluator.eval(exe)
print "\nEnd pass[%d]; Test seq error: %s.\n" % (
pass_id, str(test_seq_error[0]))
def main():
args = parser.parse_args()
print_arguments(args)
train(args, data_reader=dummy_reader)
train(args, data_reader=ctc_reader)
if __name__ == "__main__":
main()
......@@ -32,7 +32,7 @@ def _read_creater(num_sample=1024, min_seq_len=1, max_seq_len=10):
return reader
def train(batch_size, num_sample=16):
def train(batch_size, num_sample=128):
"""Get train dataset reader."""
return paddle.batch(_read_creater(num_sample=num_sample), batch_size)
......
import paddle.v2 as paddle
import paddle.fluid as fluid
from load_model import load_param
from utility import get_feeder_data
from crnn_ctc_model import ctc_eval
import ctc_reader
import dummy_reader
def load_parameter(place):
params = load_param('./name.map', './data/model/results/pass-00062/')
for name in params:
print "param: %s" % name
t = fluid.global_scope().find_var(name).get_tensor()
t.set(params[name], place)
def evaluate(eval=ctc_eval, data_reader=dummy_reader):
"""OCR inference"""
num_classes = data_reader.num_classes()
data_shape = data_reader.data_shape()
# define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int32', lod_level=1)
evaluator, cost = eval(images, label, num_classes)
# data reader
test_reader = data_reader.test()
# prepare environment
place = fluid.CUDAPlace(0)
#place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
print fluid.default_main_program()
load_parameter(place)
evaluator.reset(exe)
count = 0
for data in test_reader():
count += 1
print 'Process samples: %d\r' % (count, ),
result, avg_distance, avg_seq_error = exe.run(
fluid.default_main_program(),
feed=get_feeder_data(data, place),
fetch_list=[cost] + evaluator.metrics)
avg_distance, avg_seq_error = evaluator.eval(exe)
print "avg_distance: %s; avg_seq_error: %s" % (avg_distance, avg_seq_error)
def main():
evaluate(data_reader=ctc_reader)
if __name__ == "__main__":
main()
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
from load_model import load_param
from utility import get_feeder_data
from crnn_ctc_model import ctc_infer
import ctc_reader
import dummy_reader
def load_parameter(place):
params = load_param('./name.map', './data/model/results/pass-00062/')
for name in params:
print "param: %s" % name
t = fluid.global_scope().find_var(name).get_tensor()
t.set(params[name], place)
def inference(infer=ctc_infer, data_reader=dummy_reader):
"""OCR inference"""
num_classes = data_reader.num_classes()
data_shape = data_reader.data_shape()
# define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
sequence, tmp = infer(images, num_classes)
fluid.layers.Print(tmp)
# data reader
test_reader = data_reader.test()
# prepare environment
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
load_parameter(place)
for data in test_reader():
result = exe.run(fluid.default_main_program(),
feed=get_feeder_data(
data, place, need_label=False),
fetch_list=[tmp])
print "result: %s" % (list(result[0].flatten()), )
def main():
inference(data_reader=ctc_reader)
if __name__ == "__main__":
main()
import sys
import numpy as np
import ast
def load_parameter(file_name):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32)
def load_param(name_map_file, old_param_dir):
result = {}
name_map = {}
shape_map = {}
with open(name_map_file, 'r') as map_file:
for param in map_file:
old_name, new_name, shape = param.strip().split('=')
name_map[new_name] = old_name
shape_map[new_name] = ast.literal_eval(shape)
for new_name in name_map:
result[new_name] = load_parameter("/".join(
[old_param_dir, name_map[new_name]])).reshape(shape_map[new_name])
return result
if __name__ == "__main__":
name_map_file = "./name.map"
old_param_dir = "./data/model/results/pass-00062/"
result = load_param(name_map_file, old_param_dir)
for p in result:
print "name: %s; param.shape: %s" % (p, result[p].shape)
......@@ -18,7 +18,7 @@ from __future__ import division
from __future__ import print_function
import distutils.util
import numpy as np
from paddle.v2.fluid import core
from paddle.fluid import core
def print_arguments(args):
......@@ -77,11 +77,14 @@ def to_lodtensor(data, place):
return res
def get_feeder_data(data, place):
def get_feeder_data(data, place, need_label=True):
pixel_tensor = core.LoDTensor()
pixel_data = None
pixel_data = np.concatenate(
map(lambda x: x[0][np.newaxis, :], data), axis=0).astype("float32")
pixel_tensor.set(pixel_data, place)
label_tensor = to_lodtensor(map(lambda x: x[1], data), place)
return {"pixel": pixel_tensor, "label": label_tensor}
if need_label:
return {"pixel": pixel_tensor, "label": label_tensor}
else:
return {"pixel": pixel_tensor}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册