提交 6552fcf2 编写于 作者: R root

Merge branch 'develop' of https://github.com/PaddlePaddle/models into ce_image_classification2

...@@ -165,10 +165,10 @@ python widerface_eval.py --infer=True --confs_threshold=0.15 ...@@ -165,10 +165,10 @@ python widerface_eval.py --infer=True --confs_threshold=0.15
``` ```
下图可视化了模型的预测结果: 下图可视化了模型的预测结果:
<p align="center"> <p align="center">
<img src="images/0_Parade_marchingband_1_356.jpg" height=300 width=400 hspace='10'/> <img src="images/0_Parade_marchingband_1_356.jpg" height=400 width=400 hspace='10'/>
<img src="images/28_Sports_Fan_Sports_Fan_28_770.jpg" height=300 width=400 hspace='10'/> <img src="images/28_Sports_Fan_Sports_Fan_28_770.jpg" height=400 width=400 hspace='10'/>
<img src="images/4_Dancing_Dancing_4_194.jpg" height=300 width=400 hspace='10'/> <img src="images/4_Dancing_Dancing_4_194.jpg" height=400 width=400 hspace='10'/>
<img src="images/2_Demonstration_Demonstration_Or_Protest_2_58.jpg" height=300 width=400 hspace='10'/> <br /> <img src="images/2_Demonstration_Demonstration_Or_Protest_2_58.jpg" height=400 width=400 hspace='10'/> <br />
Pyramidbox 预测可视化 Pyramidbox 预测可视化
</p> </p>
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# This file is only used for continuous evaluation. # This file is only used for continuous evaluation.
export ce_mode=1
rm -rf *_factor.txt rm -rf *_factor.txt
python train.py --use_gpu=True 1> log python train.py --use_gpu=True --random_mirror=False --random_scaling=False 1> log
cat log | python _ce.py cat log | python _ce.py
...@@ -7,8 +7,8 @@ from kpi import CostKpi, DurationKpi, AccKpi ...@@ -7,8 +7,8 @@ from kpi import CostKpi, DurationKpi, AccKpi
# NOTE kpi.py should shared in models in some way!!!! # NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, actived=True) train_cost_kpi = CostKpi('train_cost', 0.05, 0, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.06, actived=True) train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True)
tracking_kpis = [ tracking_kpis = [
train_cost_kpi, train_cost_kpi,
......
"""Reader for Cityscape dataset. """Reader for Cityscape dataset.
""" """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
import cv2 import cv2
import numpy as np import numpy as np
...@@ -173,8 +176,8 @@ class DataGenerater: ...@@ -173,8 +176,8 @@ class DataGenerater:
""" """
Scale label according to factor. Scale label according to factor.
""" """
h = label.shape[0] / factor h = label.shape[0] // factor
w = label.shape[1] / factor w = label.shape[1] // factor
return cv2.resize( return cv2.resize(
label, (h, w), interpolation=cv2.INTER_NEAREST)[:, :, np.newaxis] label, (h, w), interpolation=cv2.INTER_NEAREST)[:, :, np.newaxis]
......
...@@ -64,7 +64,7 @@ def eval(args): ...@@ -64,7 +64,7 @@ def eval(args):
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
assert os.path.exists(args.model_path) assert os.path.exists(args.model_path)
fluid.io.load_params(exe, args.model_path) fluid.io.load_params(exe, args.model_path)
print "loaded model from: %s" % args.model_path print("loaded model from: %s" % args.model_path)
sys.stdout.flush() sys.stdout.flush()
fetch_vars = [iou, out_w, out_r] fetch_vars = [iou, out_w, out_r]
...@@ -80,11 +80,10 @@ def eval(args): ...@@ -80,11 +80,10 @@ def eval(args):
fetch_list=fetch_vars) fetch_list=fetch_vars)
out_wrong += result[1] out_wrong += result[1]
out_right += result[2] out_right += result[2]
print "count: %s; current iou: %.3f;\r" % (count, result[0]),
sys.stdout.flush() sys.stdout.flush()
iou = cal_mean_iou(out_wrong, out_right) iou = cal_mean_iou(out_wrong, out_right)
print "\nmean iou: %.3f" % iou print("\nmean iou: %.3f" % iou)
print "kpis test_acc %f" % iou print("kpis test_acc %f" % iou)
def main(): def main():
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
import sys import sys
...@@ -20,8 +23,8 @@ def conv(input, ...@@ -20,8 +23,8 @@ def conv(input,
if padding == "SAME": if padding == "SAME":
padding_h = max(k_h - s_h, 0) padding_h = max(k_h - s_h, 0)
padding_w = max(k_w - s_w, 0) padding_w = max(k_w - s_w, 0)
padding_top = padding_h / 2 padding_top = padding_h // 2
padding_left = padding_w / 2 padding_left = padding_w // 2
padding_bottom = padding_h - padding_top padding_bottom = padding_h - padding_top
padding_right = padding_w - padding_left padding_right = padding_w - padding_left
padding = [ padding = [
...@@ -57,8 +60,8 @@ def atrous_conv(input, ...@@ -57,8 +60,8 @@ def atrous_conv(input,
if padding == "SAME": if padding == "SAME":
padding_h = max(k_h - s_h, 0) padding_h = max(k_h - s_h, 0)
padding_w = max(k_w - s_w, 0) padding_w = max(k_w - s_w, 0)
padding_top = padding_h / 2 padding_top = padding_h // 2
padding_left = padding_w / 2 padding_left = padding_w // 2
padding_bottom = padding_h - padding_top padding_bottom = padding_h - padding_top
padding_right = padding_w - padding_left padding_right = padding_w - padding_left
padding = [ padding = [
...@@ -141,15 +144,15 @@ def dilation_convs(input): ...@@ -141,15 +144,15 @@ def dilation_convs(input):
def pyramis_pooling(input, input_shape): def pyramis_pooling(input, input_shape):
shape = np.ceil(input_shape / 32).astype("int32") shape = np.ceil(input_shape // 32).astype("int32")
h, w = shape h, w = shape
pool1 = avg_pool(input, h, w, h, w) pool1 = avg_pool(input, h, w, h, w)
pool1_interp = interp(pool1, shape) pool1_interp = interp(pool1, shape)
pool2 = avg_pool(input, h / 2, w / 2, h / 2, w / 2) pool2 = avg_pool(input, h // 2, w // 2, h // 2, w // 2)
pool2_interp = interp(pool2, shape) pool2_interp = interp(pool2, shape)
pool3 = avg_pool(input, h / 3, w / 3, h / 3, w / 3) pool3 = avg_pool(input, h // 3, w // 3, h // 3, w // 3)
pool3_interp = interp(pool3, shape) pool3_interp = interp(pool3, shape)
pool4 = avg_pool(input, h / 4, w / 4, h / 4, w / 4) pool4 = avg_pool(input, h // 4, w // 4, h // 4, w // 4)
pool4_interp = interp(pool4, shape) pool4_interp = interp(pool4, shape)
conv5_3_sum = input + pool4_interp + pool3_interp + pool2_interp + pool1_interp conv5_3_sum = input + pool4_interp + pool3_interp + pool2_interp + pool1_interp
return conv5_3_sum return conv5_3_sum
...@@ -172,14 +175,14 @@ def shared_convs(image): ...@@ -172,14 +175,14 @@ def shared_convs(image):
def res_block(input, filter_num, padding=0, dilation=None, name=None): def res_block(input, filter_num, padding=0, dilation=None, name=None):
tmp = conv(input, 1, 1, filter_num / 4, 1, 1, name=name + "_1_1_reduce") tmp = conv(input, 1, 1, filter_num // 4, 1, 1, name=name + "_1_1_reduce")
tmp = bn(tmp, relu=True) tmp = bn(tmp, relu=True)
tmp = zero_padding(tmp, padding=padding) tmp = zero_padding(tmp, padding=padding)
if dilation is None: if dilation is None:
tmp = conv(tmp, 3, 3, filter_num / 4, 1, 1, name=name + "_3_3") tmp = conv(tmp, 3, 3, filter_num // 4, 1, 1, name=name + "_3_3")
else: else:
tmp = atrous_conv( tmp = atrous_conv(
tmp, 3, 3, filter_num / 4, dilation, name=name + "_3_3") tmp, 3, 3, filter_num // 4, dilation, name=name + "_3_3")
tmp = bn(tmp, relu=True) tmp = bn(tmp, relu=True)
tmp = conv(tmp, 1, 1, filter_num, 1, 1, name=name + "_1_1_increase") tmp = conv(tmp, 1, 1, filter_num, 1, 1, name=name + "_1_1_increase")
tmp = bn(tmp, relu=False) tmp = bn(tmp, relu=False)
...@@ -195,7 +198,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1, ...@@ -195,7 +198,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1,
proj_bn = bn(proj, relu=False) proj_bn = bn(proj, relu=False)
tmp = conv( tmp = conv(
input, 1, 1, filter_num / 4, stride, stride, name=name + "_1_1_reduce") input, 1, 1, filter_num // 4, stride, stride, name=name + "_1_1_reduce")
tmp = bn(tmp, relu=True) tmp = bn(tmp, relu=True)
tmp = zero_padding(tmp, padding=padding) tmp = zero_padding(tmp, padding=padding)
...@@ -208,7 +211,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1, ...@@ -208,7 +211,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1,
tmp, tmp,
3, 3,
3, 3,
filter_num / 4, filter_num // 4,
1, 1,
1, 1,
padding=padding, padding=padding,
...@@ -218,7 +221,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1, ...@@ -218,7 +221,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1,
tmp, tmp,
3, 3,
3, 3,
filter_num / 4, filter_num // 4,
dilation, dilation,
padding=padding, padding=padding,
name=name + "_3_3") name=name + "_3_3")
...@@ -232,12 +235,12 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1, ...@@ -232,12 +235,12 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1,
def sub_net_4(input, input_shape): def sub_net_4(input, input_shape):
tmp = interp(input, out_shape=np.ceil(input_shape / 32)) tmp = interp(input, out_shape=np.ceil(input_shape // 32))
tmp = dilation_convs(tmp) tmp = dilation_convs(tmp)
tmp = pyramis_pooling(tmp, input_shape) tmp = pyramis_pooling(tmp, input_shape)
tmp = conv(tmp, 1, 1, 256, 1, 1, name="conv5_4_k1") tmp = conv(tmp, 1, 1, 256, 1, 1, name="conv5_4_k1")
tmp = bn(tmp, relu=True) tmp = bn(tmp, relu=True)
tmp = interp(tmp, input_shape / 16) tmp = interp(tmp, input_shape // 16)
return tmp return tmp
...@@ -265,7 +268,7 @@ def CCF24(sub2_out, sub4_out, input_shape): ...@@ -265,7 +268,7 @@ def CCF24(sub2_out, sub4_out, input_shape):
tmp = bn(tmp, relu=False) tmp = bn(tmp, relu=False)
tmp = tmp + sub2_out tmp = tmp + sub2_out
tmp = fluid.layers.relu(tmp) tmp = fluid.layers.relu(tmp)
tmp = interp(tmp, input_shape / 8) tmp = interp(tmp, input_shape // 8)
return tmp return tmp
...@@ -275,7 +278,7 @@ def CCF124(sub1_out, sub24_out, input_shape): ...@@ -275,7 +278,7 @@ def CCF124(sub1_out, sub24_out, input_shape):
tmp = bn(tmp, relu=False) tmp = bn(tmp, relu=False)
tmp = tmp + sub1_out tmp = tmp + sub1_out
tmp = fluid.layers.relu(tmp) tmp = fluid.layers.relu(tmp)
tmp = interp(tmp, input_shape / 4) tmp = interp(tmp, input_shape // 4)
return tmp return tmp
......
"""Infer for ICNet model.""" """Infer for ICNet model."""
from __future__ import print_function
import cityscape import cityscape
import argparse import argparse
import functools import functools
...@@ -101,7 +102,7 @@ def infer(args): ...@@ -101,7 +102,7 @@ def infer(args):
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
assert os.path.exists(args.model_path) assert os.path.exists(args.model_path)
fluid.io.load_params(exe, args.model_path) fluid.io.load_params(exe, args.model_path)
print "loaded model from: %s" % args.model_path print("loaded model from: %s" % args.model_path)
sys.stdout.flush() sys.stdout.flush()
if not os.path.isdir(args.out_path): if not os.path.isdir(args.out_path):
......
"""Trainer for ICNet model.""" """Trainer for ICNet model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from icnet import icnet from icnet import icnet
import cityscape import cityscape
import argparse import argparse
import functools import functools
import sys import sys
import os
import time import time
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
...@@ -11,9 +15,8 @@ from utils import add_arguments, print_arguments, get_feeder_data ...@@ -11,9 +15,8 @@ from utils import add_arguments, print_arguments, get_feeder_data
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
from paddle.fluid.initializer import init_on_cpu from paddle.fluid.initializer import init_on_cpu
SEED = 90 if 'ce_mode' in os.environ:
# random seed must set before configuring the network. np.random.seed(10)
fluid.default_startup_program().random_seed = SEED
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
...@@ -87,10 +90,14 @@ def train(args): ...@@ -87,10 +90,14 @@ def train(args):
if args.use_gpu: if args.use_gpu:
place = fluid.CUDAPlace(0) place = fluid.CUDAPlace(0)
exe = fluid.Executor(place) exe = fluid.Executor(place)
if 'ce_mode' in os.environ:
fluid.default_startup_program().random_seed = 90
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
if args.init_model is not None: if args.init_model is not None:
print "load model from: %s" % args.init_model print("load model from: %s" % args.init_model)
sys.stdout.flush() sys.stdout.flush()
fluid.io.load_params(exe, args.init_model) fluid.io.load_params(exe, args.init_model)
...@@ -107,7 +114,7 @@ def train(args): ...@@ -107,7 +114,7 @@ def train(args):
for data in train_reader(): for data in train_reader():
if iter_id > TOTAL_STEP: if iter_id > TOTAL_STEP:
end_time = time.time() end_time = time.time()
print "kpis train_duration %f" % (end_time - start_time) print("kpis train_duration %f" % (end_time - start_time))
return return
iter_id += 1 iter_id += 1
results = exe.run( results = exe.run(
...@@ -119,10 +126,10 @@ def train(args): ...@@ -119,10 +126,10 @@ def train(args):
sub124_loss += results[3] sub124_loss += results[3]
# training log # training log
if iter_id % LOG_PERIOD == 0: if iter_id % LOG_PERIOD == 0:
print "Iter[%d]; train loss: %.3f; sub4_loss: %.3f; sub24_loss: %.3f; sub124_loss: %.3f" % ( print("Iter[%d]; train loss: %.3f; sub4_loss: %.3f; sub24_loss: %.3f; sub124_loss: %.3f" % (
iter_id, t_loss / LOG_PERIOD, sub4_loss / LOG_PERIOD, iter_id, t_loss / LOG_PERIOD, sub4_loss / LOG_PERIOD,
sub24_loss / LOG_PERIOD, sub124_loss / LOG_PERIOD) sub24_loss / LOG_PERIOD, sub124_loss / LOG_PERIOD))
print "kpis train_cost %f" % (t_loss / LOG_PERIOD) print("kpis train_cost %f" % (t_loss / LOG_PERIOD))
t_loss = 0. t_loss = 0.
sub4_loss = 0. sub4_loss = 0.
...@@ -133,7 +140,7 @@ def train(args): ...@@ -133,7 +140,7 @@ def train(args):
if iter_id % CHECKPOINT_PERIOD == 0 and args.checkpoint_path is not None: if iter_id % CHECKPOINT_PERIOD == 0 and args.checkpoint_path is not None:
dir_name = args.checkpoint_path + "/" + str(iter_id) dir_name = args.checkpoint_path + "/" + str(iter_id)
fluid.io.save_persistables(exe, dirname=dir_name) fluid.io.save_persistables(exe, dirname=dir_name)
print "Saved checkpoint: %s" % (dir_name) print("Saved checkpoint: %s" % (dir_name))
def main(): def main():
......
...@@ -19,6 +19,7 @@ from __future__ import print_function ...@@ -19,6 +19,7 @@ from __future__ import print_function
import distutils.util import distutils.util
import numpy as np import numpy as np
from paddle.fluid import core from paddle.fluid import core
import six
def print_arguments(args): def print_arguments(args):
...@@ -37,7 +38,7 @@ def print_arguments(args): ...@@ -37,7 +38,7 @@ def print_arguments(args):
:type args: argparse.Namespace :type args: argparse.Namespace
""" """
print("----------- Configuration Arguments -----------") print("----------- Configuration Arguments -----------")
for arg, value in sorted(vars(args).iteritems()): for arg, value in sorted(six.iteritems(vars(args))):
print("%s: %s" % (arg, value)) print("%s: %s" % (arg, value))
print("------------------------------------------------") print("------------------------------------------------")
......
...@@ -5,6 +5,6 @@ cudaid=${object_detection_cudaid:=0} ...@@ -5,6 +5,6 @@ cudaid=${object_detection_cudaid:=0}
export CUDA_VISIBLE_DEVICES=$cudaid export CUDA_VISIBLE_DEVICES=$cudaid
python train.py --batch_size=64 --num_epochs=10 --total_images=6149 --enable_ce=True | python _ce.py python train.py --batch_size=64 --num_epochs=10 --total_images=6149 --enable_ce=True | python _ce.py
cudaid=${object_detection_cudaid:=0, 1, 2, 3} cudaid=${object_detection_cudaid_m:=0, 1, 2, 3}
export CUDA_VISIBLE_DEVICES=$cudaid export CUDA_VISIBLE_DEVICES=$cudaid
python train.py --batch_size=64 --num_epochs=10 --total_images=6149 --enable_ce=True | python _ce.py python train.py --batch_size=64 --num_epochs=10 --total_images=6149 --enable_ce=True | python _ce.py
...@@ -45,7 +45,7 @@ def calc_diff(f1, f2): ...@@ -45,7 +45,7 @@ def calc_diff(f1, f2):
sq_df = np.mean(df * df) sq_df = np.mean(df * df)
return max_df, sq_df return max_df, sq_df
except Exception as e: except Exception as e:
return -1.0, -1.0 return 1.0, 1.0
def compare(path1, path2, no_exception): def compare(path1, path2, no_exception):
......
...@@ -245,10 +245,18 @@ class Network(object): ...@@ -245,10 +245,18 @@ class Network(object):
@layer @layer
def prelu(self, input, channel_shared, name): def prelu(self, input, channel_shared, name):
#fluid = import_fluid() fluid = import_fluid()
#output = fluid.layers.relu(input) if channel_shared:
#return output mode = 'all'
raise NotImplementedError('prelu not implemented') else:
mode = 'channel'
prefix = name + '_'
output = fluid.layers.prelu(
input,
mode=mode,
param_attr=fluid.ParamAttr(name=prefix + 'negslope'))
return output
def pool(self, pool_type, input, k_h, k_w, s_h, s_w, ceil_mode, padding, def pool(self, pool_type, input, k_h, k_w, s_h, s_w, ceil_mode, padding,
name): name):
......
...@@ -176,6 +176,7 @@ class DataReshaper(object): ...@@ -176,6 +176,7 @@ class DataReshaper(object):
del node.reshaped_data del node.reshaped_data
return graph return graph
class CropFuser(object): class CropFuser(object):
''' '''
Crop is to return a scalar output Blob for an input Blob of arbitrary size. Crop is to return a scalar output Blob for an input Blob of arbitrary size.
...@@ -197,7 +198,8 @@ class CropFuser(object): ...@@ -197,7 +198,8 @@ class CropFuser(object):
cls._traced_names[fname] = [] cls._traced_names[fname] = []
cls._traced_names[fname].append(tname) cls._traced_names[fname].append(tname)
def __init__(self, allowed_parent_types=[NodeKind.Input, NodeKind.DummyData]): def __init__(self,
allowed_parent_types=[NodeKind.Input, NodeKind.DummyData]):
self.allowed_parent_types = allowed_parent_types self.allowed_parent_types = allowed_parent_types
def __call__(self, graph): def __call__(self, graph):
...@@ -232,7 +234,11 @@ class CropFuser(object): ...@@ -232,7 +234,11 @@ class CropFuser(object):
def merge(self, parent, child): def merge(self, parent, child):
'''Merge the parent node into the child.''' '''Merge the parent node into the child.'''
child.metadata['shape'] = [parent.output_shape.batch_size, parent.output_shape.channels, parent.output_shape.height, parent.output_shape.width] child.metadata['shape'] = [
parent.output_shape.batch_size, parent.output_shape.channels,
parent.output_shape.height, parent.output_shape.width
]
class SubNodeFuser(object): class SubNodeFuser(object):
''' '''
...@@ -395,6 +401,8 @@ class ParameterNamer(object): ...@@ -395,6 +401,8 @@ class ParameterNamer(object):
names = ('scale', ) names = ('scale', )
if getattr(node.parameters, 'bias_term', False): if getattr(node.parameters, 'bias_term', False):
names = ('scale', 'offset') names = ('scale', 'offset')
elif node.kind == NodeKind.PReLU:
names = ('negslope', )
elif node.kind == "Normalize": elif node.kind == "Normalize":
names = ('scale', ) names = ('scale', )
else: else:
......
...@@ -18,6 +18,7 @@ from __future__ import print_function ...@@ -18,6 +18,7 @@ from __future__ import print_function
import numpy as np import numpy as np
import os import os
import six
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -102,7 +103,7 @@ def infer(): ...@@ -102,7 +103,7 @@ def infer():
init_recursive_seq_lens, place) init_recursive_seq_lens, place)
# Feed dict for inference # Feed dict for inference
feed_dict = feeder.feed(map(lambda x: [x[0]], data)) feed_dict = feeder.feed([[x[0]] for x in data])
feed_dict['init_ids'] = init_ids feed_dict['init_ids'] = init_ids
feed_dict['init_scores'] = init_scores feed_dict['init_scores'] = init_scores
...@@ -115,7 +116,7 @@ def infer(): ...@@ -115,7 +116,7 @@ def infer():
lod_level_1 = fetch_outs[0].lod()[1] lod_level_1 = fetch_outs[0].lod()[1]
token_array = np.array(fetch_outs[0]) token_array = np.array(fetch_outs[0])
result = [] result = []
for i in xrange(len(lod_level_1) - 1): for i in six.moves.xrange(len(lod_level_1) - 1):
sentence_list = [ sentence_list = [
trg_dict[token] trg_dict[token]
for token in token_array[lod_level_1[i]:lod_level_1[i + 1]] for token in token_array[lod_level_1[i]:lod_level_1[i + 1]]
...@@ -125,7 +126,7 @@ def infer(): ...@@ -125,7 +126,7 @@ def infer():
lod_level_0 = fetch_outs[0].lod()[0] lod_level_0 = fetch_outs[0].lod()[0]
paragraphs = [ paragraphs = [
result[lod_level_0[i]:lod_level_0[i + 1]] result[lod_level_0[i]:lod_level_0[i + 1]]
for i in xrange(len(lod_level_0) - 1) for i in six.moves.xrange(len(lod_level_0) - 1)
] ]
for paragraph in paragraphs: for paragraph in paragraphs:
......
...@@ -7,7 +7,7 @@ from kpi import CostKpi, DurationKpi, AccKpi ...@@ -7,7 +7,7 @@ from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!! #### NOTE kpi.py should shared in models in some way!!!!
train_cost_card1_kpi = CostKpi('train_cost_card1', 0.01, 0, actived=True) train_cost_card1_kpi = CostKpi('train_cost_card1', 0.02, 0, actived=True)
test_cost_card1_kpi = CostKpi('test_cost_card1', 0.005, 0, actived=True) test_cost_card1_kpi = CostKpi('test_cost_card1', 0.005, 0, actived=True)
train_duration_card1_kpi = DurationKpi( train_duration_card1_kpi = DurationKpi(
'train_duration_card1', 0.06, 0, actived=True) 'train_duration_card1', 0.06, 0, actived=True)
......
...@@ -14,6 +14,6 @@ cudaid=${object_detection_cudaid:=0} ...@@ -14,6 +14,6 @@ cudaid=${object_detection_cudaid:=0}
export CUDA_VISIBLE_DEVICES=$cudaid export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
cudaid=${object_detection_cudaid:=0,1,2,3} cudaid=${object_detection_cudaid_m:=0,1,2,3}
export CUDA_VISIBLE_DEVICES=$cudaid export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
...@@ -8,8 +8,8 @@ from kpi import CostKpi, DurationKpi, AccKpi ...@@ -8,8 +8,8 @@ from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!! #### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True) train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
test_acc_kpi = AccKpi('test_acc', 0.01, 0, actived=True) test_acc_kpi = AccKpi('test_acc', 0.01, 0, actived=False)
train_speed_kpi = AccKpi('train_speed', 0.2, 0, actived=True) train_speed_kpi = AccKpi('train_speed', 0.2, 0, actived=False)
train_cost_card4_kpi = CostKpi('train_cost_card4', 0.02, 0, actived=True) train_cost_card4_kpi = CostKpi('train_cost_card4', 0.02, 0, actived=True)
test_acc_card4_kpi = AccKpi('test_acc_card4', 0.01, 0, actived=True) test_acc_card4_kpi = AccKpi('test_acc_card4', 0.01, 0, actived=True)
train_speed_card4_kpi = AccKpi('train_speed_card4', 0.2, 0, actived=True) train_speed_card4_kpi = AccKpi('train_speed_card4', 0.2, 0, actived=True)
......
...@@ -22,6 +22,7 @@ import xml.etree.ElementTree ...@@ -22,6 +22,7 @@ import xml.etree.ElementTree
import os import os
import time import time
import copy import copy
import six
class Settings(object): class Settings(object):
...@@ -151,7 +152,7 @@ def preprocess(img, bbox_labels, mode, settings): ...@@ -151,7 +152,7 @@ def preprocess(img, bbox_labels, mode, settings):
mirror = int(random.uniform(0, 2)) mirror = int(random.uniform(0, 2))
if mirror == 1: if mirror == 1:
img = img[:, ::-1, :] img = img[:, ::-1, :]
for i in xrange(len(sampled_labels)): for i in six.moves.xrange(len(sampled_labels)):
tmp = sampled_labels[i][1] tmp = sampled_labels[i][1]
sampled_labels[i][1] = 1 - sampled_labels[i][3] sampled_labels[i][1] = 1 - sampled_labels[i][3]
sampled_labels[i][3] = 1 - tmp sampled_labels[i][3] = 1 - tmp
......
...@@ -65,7 +65,6 @@ def train(args, ...@@ -65,7 +65,6 @@ def train(args,
name='gt_label', shape=[1], dtype='int32', lod_level=1) name='gt_label', shape=[1], dtype='int32', lod_level=1)
difficult = fluid.layers.data( difficult = fluid.layers.data(
name='gt_difficult', shape=[1], dtype='int32', lod_level=1) name='gt_difficult', shape=[1], dtype='int32', lod_level=1)
locs, confs, box, box_var = mobile_net(num_classes, image, image_shape) locs, confs, box, box_var = mobile_net(num_classes, image, image_shape)
nmsed_out = fluid.layers.detection_output( nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=args.nms_threshold) locs, confs, box, box_var, nms_threshold=args.nms_threshold)
...@@ -88,16 +87,16 @@ def train(args, ...@@ -88,16 +87,16 @@ def train(args,
if 'coco' in data_args.dataset: if 'coco' in data_args.dataset:
# learning rate decay in 12, 19 pass, respectively # learning rate decay in 12, 19 pass, respectively
if '2014' in train_file_list: if '2014' in train_file_list:
epocs = 82783 / batch_size epocs = 82783 // batch_size
boundaries = [epocs * 12, epocs * 19] boundaries = [epocs * 12, epocs * 19]
elif '2017' in train_file_list: elif '2017' in train_file_list:
epocs = 118287 / batch_size epocs = 118287 // batch_size
boundaries = [epocs * 12, epocs * 19] boundaries = [epocs * 12, epocs * 19]
values = [ values = [
learning_rate, learning_rate * 0.5, learning_rate * 0.25 learning_rate, learning_rate * 0.5, learning_rate * 0.25
] ]
elif 'pascalvoc' in data_args.dataset: elif 'pascalvoc' in data_args.dataset:
epocs = 19200 / batch_size epocs = 19200 // batch_size
boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100] boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100]
values = [ values = [
learning_rate, learning_rate * 0.5, learning_rate * 0.25, learning_rate, learning_rate * 0.5, learning_rate * 0.25,
...@@ -126,6 +125,9 @@ def train(args, ...@@ -126,6 +125,9 @@ def train(args,
train_reader = paddle.batch( train_reader = paddle.batch(
reader.train(data_args, train_file_list), batch_size=batch_size) reader.train(data_args, train_file_list), batch_size=batch_size)
else: else:
import random
random.seed(0)
np.random.seed(0)
train_reader = paddle.batch( train_reader = paddle.batch(
reader.train(data_args, train_file_list, False), batch_size=batch_size) reader.train(data_args, train_file_list, False), batch_size=batch_size)
test_reader = paddle.batch( test_reader = paddle.batch(
...@@ -137,7 +139,7 @@ def train(args, ...@@ -137,7 +139,7 @@ def train(args,
model_path = os.path.join(model_save_dir, postfix) model_path = os.path.join(model_save_dir, postfix)
if os.path.isdir(model_path): if os.path.isdir(model_path):
shutil.rmtree(model_path) shutil.rmtree(model_path)
print 'save models to %s' % (model_path) print('save models to %s' % (model_path))
fluid.io.save_persistables(exe, model_path) fluid.io.save_persistables(exe, model_path)
best_map = 0. best_map = 0.
...@@ -166,8 +168,6 @@ def train(args, ...@@ -166,8 +168,6 @@ def train(args,
start_time = time.time() start_time = time.time()
prev_start_time = start_time prev_start_time = start_time
every_pass_loss = [] every_pass_loss = []
iter = 0
pass_duration = 0.0
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
prev_start_time = start_time prev_start_time = start_time
start_time = time.time() start_time = time.time()
...@@ -193,15 +193,15 @@ def train(args, ...@@ -193,15 +193,15 @@ def train(args,
total_time += end_time - start_time total_time += end_time - start_time
train_avg_loss = np.mean(every_pass_loss) train_avg_loss = np.mean(every_pass_loss)
if devices_num == 1: if devices_num == 1:
print ("kpis train_cost %s" % train_avg_loss) print("kpis train_cost %s" % train_avg_loss)
print ("kpis test_acc %s" % mean_map) print("kpis test_acc %s" % mean_map)
print ("kpis train_speed %s" % (total_time / epoch_idx)) print("kpis train_speed %s" % (total_time / epoch_idx))
else: else:
print ("kpis train_cost_card%s %s" % print("kpis train_cost_card%s %s" %
(devices_num, train_avg_loss)) (devices_num, train_avg_loss))
print ("kpis test_acc_card%s %s" % print("kpis test_acc_card%s %s" %
(devices_num, mean_map)) (devices_num, mean_map))
print ("kpis train_speed_card%s %f" % print("kpis train_speed_card%s %f" %
(devices_num, total_time / epoch_idx)) (devices_num, total_time / epoch_idx))
......
...@@ -16,8 +16,10 @@ ...@@ -16,8 +16,10 @@
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import distutils.util import distutils.util
import numpy as np import numpy as np
import six
from paddle.fluid import core from paddle.fluid import core
...@@ -37,7 +39,7 @@ def print_arguments(args): ...@@ -37,7 +39,7 @@ def print_arguments(args):
:type args: argparse.Namespace :type args: argparse.Namespace
""" """
print("----------- Configuration Arguments -----------") print("----------- Configuration Arguments -----------")
for arg, value in sorted(vars(args).iteritems()): for arg, value in sorted(six.iteritems(vars(args))):
print("%s: %s" % (arg, value)) print("%s: %s" % (arg, value))
print("------------------------------------------------") print("------------------------------------------------")
......
python ctc_train.py --batch_size=128 --total_step=10000 --eval_period=10000 --log_period=10000 --use_gpu=True
export ce_mode=1 export ce_mode=1
rm *factor.txt python train.py --batch_size=32 --total_step=1 --eval_period=1 --log_period=1 --use_gpu=True 1> ./tmp.log
python ctc_train.py --batch_size=32 --total_step=30000 --eval_period=30000 --log_period=30000 --use_gpu=True 1> ./tmp.log
cat tmp.log | python _ce.py cat tmp.log | python _ce.py
rm tmp.log rm tmp.log
...@@ -5,8 +5,9 @@ ...@@ -5,8 +5,9 @@
## 代码结构 ## 代码结构
``` ```
├── ctc_reader.py # 下载、读取、处理数据。 ├── ctc_reader.py # 下载、读取、处理数据。
├── crnn_ctc_model.py # 定义了训练网络、预测网络和evaluate网络。 ├── crnn_ctc_model.py # 定义了OCR CTC model的网络结构。
├── ctc_train.py # 用于模型的训练。 ├── attention_model.py # 定义了OCR attention model的网络结构。
├── train.py # 用于模型的训练。
├── infer.py # 加载训练好的模型文件,对新数据进行预测。 ├── infer.py # 加载训练好的模型文件,对新数据进行预测。
├── eval.py # 评估模型在指定数据集上的效果。 ├── eval.py # 评估模型在指定数据集上的效果。
└── utils.py # 定义通用的函数。 └── utils.py # 定义通用的函数。
...@@ -15,9 +16,16 @@ ...@@ -15,9 +16,16 @@
## 简介 ## 简介
本章的任务是识别含有单行汉语字符图片,首先采用卷积将图片转为特征图, 然后使用`im2sequence op`将特征图转为序列,通过`双向GRU`学习到序列特征。训练过程选用的损失函数为CTC(Connectionist Temporal Classification) loss,最终的评估指标为样本级别的错误率 本章的任务是识别图片中单行英文字符,这里我们分别使用CTC model和attention model两种不同的模型来完成该任务
这两种模型的有相同的编码部分,首先采用卷积将图片转为特征图, 然后使用`im2sequence op`将特征图转为序列,通过`双向GRU`学习到序列特征。
两种模型的解码部分和使用的损失函数区别如下:
- CTC model: 训练过程选用的损失函数为CTC(Connectionist Temporal Classification) loss, 预测阶段采用的是贪婪策略和CTC解码策略。
- Attention model: 训练过程选用的是带注意力机制的解码策略和交叉信息熵损失函数,预测阶段采用的是柱搜索策略。
训练以上两种模型的评估指标为样本级别的错误率。
## 数据 ## 数据
...@@ -124,15 +132,23 @@ env OMP_NUM_THREADS=<num_of_physical_cores> python ctc_train.py --use_gpu False ...@@ -124,15 +132,23 @@ env OMP_NUM_THREADS=<num_of_physical_cores> python ctc_train.py --use_gpu False
env CUDA_VISIABLE_DEVICES=0,1,2,3 python ctc_train.py --parallel=True env CUDA_VISIABLE_DEVICES=0,1,2,3 python ctc_train.py --parallel=True
``` ```
默认使用的是`CTC model`, 可以通过选项`--model="attention"`切换为`attention model`
执行`python ctc_train.py --help`可查看更多使用方式和参数详细说明。 执行`python ctc_train.py --help`可查看更多使用方式和参数详细说明。
图2为使用默认参数和默认数据集训练的收敛曲线,其中横坐标轴为训练迭代次数,纵轴为样本级错误率。其中,蓝线为训练集上的样本错误率,红线为测试集上的样本错误率。在60轮迭代训练中,测试集上最低错误率为第32轮的22.0%. 图2为使用默认参数在默认数据集上训练`CTC model`的收敛曲线,其中横坐标轴为训练迭代次数,纵轴为样本级错误率。其中,蓝线为训练集上的样本错误率,红线为测试集上的样本错误率。测试集上最低错误率为22.0%.
<p align="center"> <p align="center">
<img src="images/train.jpg" width="620" hspace='10'/> <br/> <img src="images/train.jpg" width="400" hspace='10'/> <br/>
<strong>图 2</strong> <strong>图 2</strong>
</p> </p>
图3为使用默认参数在默认数据集上训练`attention model`的收敛曲线,其中横坐标轴为训练迭代次数,纵轴为样本级错误率。其中,蓝线为训练集上的样本错误率,红线为测试集上的样本错误率。测试集上最低错误率为16.25%.
<p align="center">
<img src="images/train_attention.jpg" width="400" hspace='10'/> <br/>
<strong>图 3</strong>
</p>
## 测试 ## 测试
......
...@@ -7,7 +7,7 @@ from kpi import CostKpi, DurationKpi, AccKpi ...@@ -7,7 +7,7 @@ from kpi import CostKpi, DurationKpi, AccKpi
# NOTE kpi.py should shared in models in some way!!!! # NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True) train_cost_kpi = CostKpi('train_cost', 0.05, 0, actived=True)
test_acc_kpi = AccKpi('test_acc', 0.005, 0, actived=True) test_acc_kpi = AccKpi('test_acc', 0.005, 0, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True) train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True)
train_acc_kpi = AccKpi('train_acc', 0.005, 0, actived=True) train_acc_kpi = AccKpi('train_acc', 0.005, 0, actived=True)
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
decoder_size = 128
word_vector_dim = 128
max_length = 100
sos = 0
eos = 1
gradient_clip = 10
LR = 1.0
beam_size = 2
learning_rate_decay = None
def conv_bn_pool(input,
group,
out_ch,
act="relu",
is_test=False,
pool=True,
use_cudnn=True):
tmp = input
for i in xrange(group):
filter_size = 3
conv_std = (2.0 / (filter_size**2 * tmp.shape[1]))**0.5
conv_param = fluid.ParamAttr(
initializer=fluid.initializer.Normal(0.0, conv_std))
tmp = fluid.layers.conv2d(
input=tmp,
num_filters=out_ch[i],
filter_size=3,
padding=1,
bias_attr=False,
param_attr=conv_param,
act=None, # LinearActivation
use_cudnn=use_cudnn)
tmp = fluid.layers.batch_norm(input=tmp, act=act, is_test=is_test)
if pool == True:
tmp = fluid.layers.pool2d(
input=tmp,
pool_size=2,
pool_type='max',
pool_stride=2,
use_cudnn=use_cudnn,
ceil_mode=True)
return tmp
def ocr_convs(input, is_test=False, use_cudnn=True):
tmp = input
tmp = conv_bn_pool(tmp, 2, [16, 16], is_test=is_test, use_cudnn=use_cudnn)
tmp = conv_bn_pool(tmp, 2, [32, 32], is_test=is_test, use_cudnn=use_cudnn)
tmp = conv_bn_pool(tmp, 2, [64, 64], is_test=is_test, use_cudnn=use_cudnn)
tmp = conv_bn_pool(
tmp, 2, [128, 128], is_test=is_test, pool=False, use_cudnn=use_cudnn)
return tmp
def encoder_net(images, rnn_hidden_size=200, is_test=False, use_cudnn=True):
conv_features = ocr_convs(images, is_test=is_test, use_cudnn=use_cudnn)
sliced_feature = fluid.layers.im2sequence(
input=conv_features,
stride=[1, 1],
filter_size=[conv_features.shape[2], 1])
para_attr = fluid.ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.02))
bias_attr = fluid.ParamAttr(
initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0)
fc_1 = fluid.layers.fc(input=sliced_feature,
size=rnn_hidden_size * 3,
param_attr=para_attr,
bias_attr=False)
fc_2 = fluid.layers.fc(input=sliced_feature,
size=rnn_hidden_size * 3,
param_attr=para_attr,
bias_attr=False)
gru_forward = fluid.layers.dynamic_gru(
input=fc_1,
size=rnn_hidden_size,
param_attr=para_attr,
bias_attr=bias_attr,
candidate_activation='relu')
gru_backward = fluid.layers.dynamic_gru(
input=fc_2,
size=rnn_hidden_size,
is_reverse=True,
param_attr=para_attr,
bias_attr=bias_attr,
candidate_activation='relu')
encoded_vector = fluid.layers.concat(
input=[gru_forward, gru_backward], axis=1)
encoded_proj = fluid.layers.fc(input=encoded_vector,
size=decoder_size,
bias_attr=False)
return gru_backward, encoded_vector, encoded_proj
def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj,
decoder_boot, decoder_size, num_classes):
def simple_attention(encoder_vec, encoder_proj, decoder_state):
decoder_state_proj = fluid.layers.fc(input=decoder_state,
size=decoder_size,
bias_attr=False)
decoder_state_expand = fluid.layers.sequence_expand(
x=decoder_state_proj, y=encoder_proj)
concated = encoder_proj + decoder_state_expand
concated = fluid.layers.tanh(x=concated)
attention_weights = fluid.layers.fc(input=concated,
size=1,
act=None,
bias_attr=False)
attention_weights = fluid.layers.sequence_softmax(
input=attention_weights)
weigths_reshape = fluid.layers.reshape(x=attention_weights, shape=[-1])
scaled = fluid.layers.elementwise_mul(
x=encoder_vec, y=weigths_reshape, axis=0)
context = fluid.layers.sequence_pool(input=scaled, pool_type='sum')
return context
rnn = fluid.layers.DynamicRNN()
with rnn.block():
current_word = rnn.step_input(target_embedding)
encoder_vec = rnn.static_input(encoder_vec)
encoder_proj = rnn.static_input(encoder_proj)
hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True)
context = simple_attention(encoder_vec, encoder_proj, hidden_mem)
fc_1 = fluid.layers.fc(input=context,
size=decoder_size * 3,
bias_attr=False)
fc_2 = fluid.layers.fc(input=current_word,
size=decoder_size * 3,
bias_attr=False)
decoder_inputs = fc_1 + fc_2
h, _, _ = fluid.layers.gru_unit(
input=decoder_inputs, hidden=hidden_mem, size=decoder_size * 3)
rnn.update_memory(hidden_mem, h)
out = fluid.layers.fc(input=h,
size=num_classes + 2,
bias_attr=True,
act='softmax')
rnn.output(out)
return rnn()
def attention_train_net(args, data_shape, num_classes):
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
label_in = fluid.layers.data(
name='label_in', shape=[1], dtype='int32', lod_level=1)
label_out = fluid.layers.data(
name='label_out', shape=[1], dtype='int32', lod_level=1)
gru_backward, encoded_vector, encoded_proj = encoder_net(images)
backward_first = fluid.layers.sequence_pool(
input=gru_backward, pool_type='first')
decoder_boot = fluid.layers.fc(input=backward_first,
size=decoder_size,
bias_attr=False,
act="relu")
label_in = fluid.layers.cast(x=label_in, dtype='int64')
trg_embedding = fluid.layers.embedding(
input=label_in,
size=[num_classes + 2, word_vector_dim],
dtype='float32')
prediction = gru_decoder_with_attention(trg_embedding, encoded_vector,
encoded_proj, decoder_boot,
decoder_size, num_classes)
fluid.clip.set_gradient_clip(fluid.clip.GradientClipByValue(gradient_clip))
label_out = fluid.layers.cast(x=label_out, dtype='int64')
_, maxid = fluid.layers.topk(input=prediction, k=1)
error_evaluator = fluid.evaluator.EditDistance(
input=maxid, label=label_out, ignored_tokens=[sos, eos])
inference_program = fluid.default_main_program().clone(for_test=True)
cost = fluid.layers.cross_entropy(input=prediction, label=label_out)
sum_cost = fluid.layers.reduce_sum(cost)
if learning_rate_decay == "piecewise_decay":
learning_rate = fluid.layers.piecewise_decay([50000], [LR, LR * 0.01])
else:
learning_rate = LR
optimizer = fluid.optimizer.Adadelta(
learning_rate=learning_rate, epsilon=1.0e-6, rho=0.9)
optimizer.minimize(sum_cost)
model_average = None
if args.average_window > 0:
model_average = fluid.optimizer.ModelAverage(
args.average_window,
min_average_window=args.min_average_window,
max_average_window=args.max_average_window)
return sum_cost, error_evaluator, inference_program, model_average
def simple_attention(encoder_vec, encoder_proj, decoder_state, decoder_size):
decoder_state_proj = fluid.layers.fc(input=decoder_state,
size=decoder_size,
bias_attr=False)
decoder_state_expand = fluid.layers.sequence_expand(
x=decoder_state_proj, y=encoder_proj)
concated = fluid.layers.elementwise_add(encoder_proj, decoder_state_expand)
concated = fluid.layers.tanh(x=concated)
attention_weights = fluid.layers.fc(input=concated,
size=1,
act=None,
bias_attr=False)
attention_weights = fluid.layers.sequence_softmax(input=attention_weights)
weigths_reshape = fluid.layers.reshape(x=attention_weights, shape=[-1])
scaled = fluid.layers.elementwise_mul(
x=encoder_vec, y=weigths_reshape, axis=0)
context = fluid.layers.sequence_pool(input=scaled, pool_type='sum')
return context
def attention_infer(images, num_classes, use_cudnn=True):
max_length = 20
gru_backward, encoded_vector, encoded_proj = encoder_net(
images, is_test=True, use_cudnn=use_cudnn)
backward_first = fluid.layers.sequence_pool(
input=gru_backward, pool_type='first')
decoder_boot = fluid.layers.fc(input=backward_first,
size=decoder_size,
bias_attr=False,
act="relu")
init_state = decoder_boot
array_len = fluid.layers.fill_constant(
shape=[1], dtype='int64', value=max_length)
counter = fluid.layers.zeros(shape=[1], dtype='int64', force_cpu=True)
# fill the first element with init_state
state_array = fluid.layers.create_array('float32')
fluid.layers.array_write(init_state, array=state_array, i=counter)
# ids, scores as memory
ids_array = fluid.layers.create_array('int64')
scores_array = fluid.layers.create_array('float32')
init_ids = fluid.layers.data(
name="init_ids", shape=[1], dtype="int64", lod_level=2)
init_scores = fluid.layers.data(
name="init_scores", shape=[1], dtype="float32", lod_level=2)
fluid.layers.array_write(init_ids, array=ids_array, i=counter)
fluid.layers.array_write(init_scores, array=scores_array, i=counter)
cond = fluid.layers.less_than(x=counter, y=array_len)
while_op = fluid.layers.While(cond=cond)
with while_op.block():
pre_ids = fluid.layers.array_read(array=ids_array, i=counter)
pre_state = fluid.layers.array_read(array=state_array, i=counter)
pre_score = fluid.layers.array_read(array=scores_array, i=counter)
pre_ids_emb = fluid.layers.embedding(
input=pre_ids,
size=[num_classes + 2, word_vector_dim],
dtype='float32')
context = simple_attention(encoded_vector, encoded_proj, pre_state,
decoder_size)
# expand the recursive_sequence_lengths of pre_state to be the same with pre_score
pre_state_expanded = fluid.layers.sequence_expand(pre_state, pre_score)
context_expanded = fluid.layers.sequence_expand(context, pre_score)
fc_1 = fluid.layers.fc(input=context_expanded,
size=decoder_size * 3,
bias_attr=False)
fc_2 = fluid.layers.fc(input=pre_ids_emb,
size=decoder_size * 3,
bias_attr=False)
decoder_inputs = fc_1 + fc_2
current_state, _, _ = fluid.layers.gru_unit(
input=decoder_inputs,
hidden=pre_state_expanded,
size=decoder_size * 3)
current_state_with_lod = fluid.layers.lod_reset(
x=current_state, y=pre_score)
# use score to do beam search
current_score = fluid.layers.fc(input=current_state_with_lod,
size=num_classes + 2,
bias_attr=True,
act='softmax')
topk_scores, topk_indices = fluid.layers.topk(
current_score, k=beam_size)
# calculate accumulated scores after topk to reduce computation cost
accu_scores = fluid.layers.elementwise_add(
x=fluid.layers.log(topk_scores),
y=fluid.layers.reshape(
pre_score, shape=[-1]),
axis=0)
selected_ids, selected_scores = fluid.layers.beam_search(
pre_ids,
pre_score,
topk_indices,
accu_scores,
beam_size,
1, # end_id
#level=0
)
fluid.layers.increment(x=counter, value=1, in_place=True)
# update the memories
fluid.layers.array_write(current_state, array=state_array, i=counter)
fluid.layers.array_write(selected_ids, array=ids_array, i=counter)
fluid.layers.array_write(selected_scores, array=scores_array, i=counter)
# update the break condition: up to the max length or all candidates of
# source sentences have ended.
length_cond = fluid.layers.less_than(x=counter, y=array_len)
finish_cond = fluid.layers.logical_not(
fluid.layers.is_empty(x=selected_ids))
fluid.layers.logical_and(x=length_cond, y=finish_cond, out=cond)
ids, scores = fluid.layers.beam_search_decode(ids_array, scores_array,
beam_size, eos)
return ids
def attention_eval(data_shape, num_classes):
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
label_in = fluid.layers.data(
name='label_in', shape=[1], dtype='int32', lod_level=1)
label_out = fluid.layers.data(
name='label_out', shape=[1], dtype='int32', lod_level=1)
label_out = fluid.layers.cast(x=label_out, dtype='int64')
label_in = fluid.layers.cast(x=label_in, dtype='int64')
gru_backward, encoded_vector, encoded_proj = encoder_net(
images, is_test=True)
backward_first = fluid.layers.sequence_pool(
input=gru_backward, pool_type='first')
decoder_boot = fluid.layers.fc(input=backward_first,
size=decoder_size,
bias_attr=False,
act="relu")
trg_embedding = fluid.layers.embedding(
input=label_in,
size=[num_classes + 2, word_vector_dim],
dtype='float32')
prediction = gru_decoder_with_attention(trg_embedding, encoded_vector,
encoded_proj, decoder_boot,
decoder_size, num_classes)
_, maxid = fluid.layers.topk(input=prediction, k=1)
error_evaluator = fluid.evaluator.EditDistance(
input=maxid, label=label_out, ignored_tokens=[sos, eos])
cost = fluid.layers.cross_entropy(input=prediction, label=label_out)
sum_cost = fluid.layers.reduce_sum(cost)
return error_evaluator, sum_cost
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
from paddle.fluid.initializer import init_on_cpu from paddle.fluid.initializer import init_on_cpu
import math import math
import six
def conv_bn_pool(input, def conv_bn_pool(input,
...@@ -15,7 +19,7 @@ def conv_bn_pool(input, ...@@ -15,7 +19,7 @@ def conv_bn_pool(input,
pooling=True, pooling=True,
use_cudnn=False): use_cudnn=False):
tmp = input tmp = input
for i in xrange(group): for i in six.moves.xrange(group):
tmp = fluid.layers.conv2d( tmp = fluid.layers.conv2d(
input=tmp, input=tmp,
num_filters=out_ch[i], num_filters=out_ch[i],
...@@ -166,13 +170,16 @@ def encoder_net(images, ...@@ -166,13 +170,16 @@ def encoder_net(images,
return fc_out return fc_out
def ctc_train_net(images, label, args, num_classes): def ctc_train_net(args, data_shape, num_classes):
L2_RATE = 0.0004 L2_RATE = 0.0004
LR = 1.0e-3 LR = 1.0e-3
MOMENTUM = 0.9 MOMENTUM = 0.9
learning_rate_decay = None learning_rate_decay = None
regularizer = fluid.regularizer.L2Decay(L2_RATE) regularizer = fluid.regularizer.L2Decay(L2_RATE)
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int32', lod_level=1)
fc_out = encoder_net( fc_out = encoder_net(
images, images,
num_classes, num_classes,
...@@ -189,7 +196,7 @@ def ctc_train_net(images, label, args, num_classes): ...@@ -189,7 +196,7 @@ def ctc_train_net(images, label, args, num_classes):
inference_program = fluid.default_main_program().clone(for_test=True) inference_program = fluid.default_main_program().clone(for_test=True)
if learning_rate_decay == "piecewise_decay": if learning_rate_decay == "piecewise_decay":
learning_rate = fluid.layers.piecewise_decay([ learning_rate = fluid.layers.piecewise_decay([
args.total_step / 4, args.total_step / 2, args.total_step * 3 / 4 args.total_step // 4, args.total_step // 2, args.total_step * 3 // 4
], [LR, LR * 0.1, LR * 0.01, LR * 0.001]) ], [LR, LR * 0.1, LR * 0.01, LR * 0.001])
else: else:
learning_rate = LR learning_rate = LR
...@@ -211,7 +218,10 @@ def ctc_infer(images, num_classes, use_cudnn): ...@@ -211,7 +218,10 @@ def ctc_infer(images, num_classes, use_cudnn):
return fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes) return fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes)
def ctc_eval(images, label, num_classes, use_cudnn): def ctc_eval(data_shape, num_classes, use_cudnn):
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int32', lod_level=1)
fc_out = encoder_net(images, num_classes, is_test=True, use_cudnn=use_cudnn) fc_out = encoder_net(images, num_classes, is_test=True, use_cudnn=use_cudnn)
decoded_out = fluid.layers.ctc_greedy_decoder( decoded_out = fluid.layers.ctc_greedy_decoder(
input=fc_out, blank=num_classes) input=fc_out, blank=num_classes)
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
import cv2 import cv2
import tarfile import tarfile
import numpy as np import numpy as np
from PIL import Image from PIL import Image
from os import path from os import path
from paddle.v2.image import load_image from paddle.dataset.image import load_image
import paddle.v2 as paddle import paddle
SOS = 0
EOS = 1
NUM_CLASSES = 95 NUM_CLASSES = 95
DATA_SHAPE = [1, 48, 512] DATA_SHAPE = [1, 48, 512]
...@@ -22,8 +27,8 @@ TEST_LIST_FILE_NAME = "test.list" ...@@ -22,8 +27,8 @@ TEST_LIST_FILE_NAME = "test.list"
class DataGenerator(object): class DataGenerator(object):
def __init__(self): def __init__(self, model="crnn_ctc"):
pass self.model = model
def train_reader(self, def train_reader(self,
img_root_dir, img_root_dir,
...@@ -65,11 +70,11 @@ class DataGenerator(object): ...@@ -65,11 +70,11 @@ class DataGenerator(object):
batchsize batchsize
) + "; i++) print $(4*i+1)\" \"$(4*i+2)\" \"$(4*i+3)\" \"$(4*i+4);}}' > " + to_file ) + "; i++) print $(4*i+1)\" \"$(4*i+2)\" \"$(4*i+3)\" \"$(4*i+4);}}' > " + to_file
os.system(cmd) os.system(cmd)
print "finish batch shuffle" print("finish batch shuffle")
img_label_lines = open(to_file, 'r').readlines() img_label_lines = open(to_file, 'r').readlines()
def reader(): def reader():
sizes = len(img_label_lines) / batchsize sizes = len(img_label_lines) // batchsize
if sizes == 0: if sizes == 0:
raise ValueError('Batch size is bigger than the dataset size.') raise ValueError('Batch size is bigger than the dataset size.')
while True: while True:
...@@ -89,7 +94,10 @@ class DataGenerator(object): ...@@ -89,7 +94,10 @@ class DataGenerator(object):
img = img.resize((sz[0], sz[1])) img = img.resize((sz[0], sz[1]))
img = np.array(img) - 127.5 img = np.array(img) - 127.5
img = img[np.newaxis, ...] img = img[np.newaxis, ...]
result.append([img, label]) if self.model == "crnn_ctc":
result.append([img, label])
else:
result.append([img, [SOS] + label, label + [EOS]])
yield result yield result
if not cycle: if not cycle:
break break
...@@ -117,7 +125,10 @@ class DataGenerator(object): ...@@ -117,7 +125,10 @@ class DataGenerator(object):
'L') 'L')
img = np.array(img) - 127.5 img = np.array(img) - 127.5
img = img[np.newaxis, ...] img = img[np.newaxis, ...]
yield img, label if self.model == "crnn_ctc":
yield img, label
else:
yield img, [SOS] + label, label + [EOS]
return reader return reader
...@@ -185,8 +196,12 @@ def data_shape(): ...@@ -185,8 +196,12 @@ def data_shape():
return DATA_SHAPE return DATA_SHAPE
def train(batch_size, train_images_dir=None, train_list_file=None, cycle=False): def train(batch_size,
generator = DataGenerator() train_images_dir=None,
train_list_file=None,
cycle=False,
model="crnn_ctc"):
generator = DataGenerator(model)
if train_images_dir is None: if train_images_dir is None:
data_dir = download_data() data_dir = download_data()
train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME) train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME)
...@@ -199,8 +214,11 @@ def train(batch_size, train_images_dir=None, train_list_file=None, cycle=False): ...@@ -199,8 +214,11 @@ def train(batch_size, train_images_dir=None, train_list_file=None, cycle=False):
train_images_dir, train_list_file, batch_size, cycle, shuffle=shuffle) train_images_dir, train_list_file, batch_size, cycle, shuffle=shuffle)
def test(batch_size=1, test_images_dir=None, test_list_file=None): def test(batch_size=1,
generator = DataGenerator() test_images_dir=None,
test_list_file=None,
model="crnn_ctc"):
generator = DataGenerator(model)
if test_images_dir is None: if test_images_dir is None:
data_dir = download_data() data_dir = download_data()
test_images_dir = path.join(data_dir, TEST_DATA_DIR_NAME) test_images_dir = path.join(data_dir, TEST_DATA_DIR_NAME)
...@@ -213,8 +231,9 @@ def test(batch_size=1, test_images_dir=None, test_list_file=None): ...@@ -213,8 +231,9 @@ def test(batch_size=1, test_images_dir=None, test_list_file=None):
def inference(batch_size=1, def inference(batch_size=1,
infer_images_dir=None, infer_images_dir=None,
infer_list_file=None, infer_list_file=None,
cycle=False): cycle=False,
generator = DataGenerator() model="crnn_ctc"):
generator = DataGenerator(model)
return paddle.batch( return paddle.batch(
generator.infer_reader(infer_images_dir, infer_list_file, cycle), generator.infer_reader(infer_images_dir, infer_list_file, cycle),
batch_size) batch_size)
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_data
from crnn_ctc_model import ctc_infer from attention_model import attention_eval
from crnn_ctc_model import ctc_eval from crnn_ctc_model import ctc_eval
import ctc_reader import data_reader
import argparse import argparse
import functools import functools
import os import os
...@@ -11,27 +11,34 @@ import os ...@@ -11,27 +11,34 @@ import os
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
add_arg('model_path', str, None, "The model path to be used for inference.") add_arg('model', str, "crnn_ctc", "Which type of network to be used. 'crnn_ctc' or 'attention'")
add_arg('model_path', str, "", "The model path to be used for inference.")
add_arg('input_images_dir', str, None, "The directory of images.") add_arg('input_images_dir', str, None, "The directory of images.")
add_arg('input_images_list', str, None, "The list file of images.") add_arg('input_images_list', str, None, "The list file of images.")
add_arg('use_gpu', bool, True, "Whether use GPU to eval.") add_arg('use_gpu', bool, True, "Whether use GPU to eval.")
# yapf: enable # yapf: enable
def evaluate(args, eval=ctc_eval, data_reader=ctc_reader): def evaluate(args):
"""OCR inference""" """OCR inference"""
if args.model == "crnn_ctc":
eval = ctc_eval
get_feeder_data = get_ctc_feeder_data
else:
eval = attention_eval
get_feeder_data = get_attention_feeder_data
num_classes = data_reader.num_classes() num_classes = data_reader.num_classes()
data_shape = data_reader.data_shape() data_shape = data_reader.data_shape()
# define network # define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') evaluator, cost = eval(data_shape, num_classes)
label = fluid.layers.data(
name='label', shape=[1], dtype='int32', lod_level=1)
evaluator, cost = eval(images, label, num_classes)
# data reader # data reader
test_reader = data_reader.test( test_reader = data_reader.test(
test_images_dir=args.input_images_dir, test_images_dir=args.input_images_dir,
test_list_file=args.input_images_list) test_list_file=args.input_images_list,
model=args.model)
# prepare environment # prepare environment
place = fluid.CPUPlace() place = fluid.CPUPlace()
...@@ -48,7 +55,7 @@ def evaluate(args, eval=ctc_eval, data_reader=ctc_reader): ...@@ -48,7 +55,7 @@ def evaluate(args, eval=ctc_eval, data_reader=ctc_reader):
model_dir = os.path.dirname(args.model_path) model_dir = os.path.dirname(args.model_path)
model_file_name = os.path.basename(args.model_path) model_file_name = os.path.basename(args.model_path)
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
print "Init model from: %s." % args.model_path print("Init model from: %s." % args.model_path)
evaluator.reset(exe) evaluator.reset(exe)
count = 0 count = 0
...@@ -56,14 +63,14 @@ def evaluate(args, eval=ctc_eval, data_reader=ctc_reader): ...@@ -56,14 +63,14 @@ def evaluate(args, eval=ctc_eval, data_reader=ctc_reader):
count += 1 count += 1
exe.run(fluid.default_main_program(), feed=get_feeder_data(data, place)) exe.run(fluid.default_main_program(), feed=get_feeder_data(data, place))
avg_distance, avg_seq_error = evaluator.eval(exe) avg_distance, avg_seq_error = evaluator.eval(exe)
print "Read %d samples; avg_distance: %s; avg_seq_error: %s" % ( print("Read %d samples; avg_distance: %s; avg_seq_error: %s" % (
count, avg_distance, avg_seq_error) count, avg_distance, avg_seq_error))
def main(): def main():
args = parser.parse_args() args = parser.parse_args()
print_arguments(args) print_arguments(args)
evaluate(args, data_reader=ctc_reader) evaluate(args)
if __name__ == "__main__": if __name__ == "__main__":
......
from __future__ import print_function
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_for_infer
import paddle.fluid.profiler as profiler import paddle.fluid.profiler as profiler
from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from crnn_ctc_model import ctc_infer from crnn_ctc_model import ctc_infer
from attention_model import attention_infer
import numpy as np import numpy as np
import ctc_reader import data_reader
import argparse import argparse
import functools import functools
import os import os
...@@ -13,6 +15,7 @@ import time ...@@ -13,6 +15,7 @@ import time
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
add_arg('model', str, "crnn_ctc", "Which type of network to be used. 'crnn_ctc' or 'attention'")
add_arg('model_path', str, None, "The model path to be used for inference.") add_arg('model_path', str, None, "The model path to be used for inference.")
add_arg('input_images_dir', str, None, "The directory of images.") add_arg('input_images_dir', str, None, "The directory of images.")
add_arg('input_images_list', str, None, "The list file of images.") add_arg('input_images_list', str, None, "The list file of images.")
...@@ -25,20 +28,28 @@ add_arg('batch_size', int, 1, "The minibatch size.") ...@@ -25,20 +28,28 @@ add_arg('batch_size', int, 1, "The minibatch size.")
# yapf: enable # yapf: enable
def inference(args, infer=ctc_infer, data_reader=ctc_reader): def inference(args):
"""OCR inference""" """OCR inference"""
if args.model == "crnn_ctc":
infer = ctc_infer
get_feeder_data = get_ctc_feeder_data
else:
infer = attention_infer
get_feeder_data = get_attention_feeder_for_infer
eos = 1
sos = 0
num_classes = data_reader.num_classes() num_classes = data_reader.num_classes()
data_shape = data_reader.data_shape() data_shape = data_reader.data_shape()
# define network # define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
sequence = infer( ids = infer(images, num_classes, use_cudnn=True if args.use_gpu else False)
images, num_classes, use_cudnn=True if args.use_gpu else False)
# data reader # data reader
infer_reader = data_reader.inference( infer_reader = data_reader.inference(
batch_size=args.batch_size, batch_size=args.batch_size,
infer_images_dir=args.input_images_dir, infer_images_dir=args.input_images_dir,
infer_list_file=args.input_images_list, infer_list_file=args.input_images_list,
cycle=True if args.iterations > 0 else False) cycle=True if args.iterations > 0 else False,
model=args.model)
# prepare environment # prepare environment
place = fluid.CPUPlace() place = fluid.CPUPlace()
if args.use_gpu: if args.use_gpu:
...@@ -54,7 +65,7 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader): ...@@ -54,7 +65,7 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
with open(args.dict) as dict_file: with open(args.dict) as dict_file:
for i, word in enumerate(dict_file): for i, word in enumerate(dict_file):
dict_map[i] = word.strip() dict_map[i] = word.strip()
print "Loaded dict from %s" % args.dict print("Loaded dict from %s" % args.dict)
# load init model # load init model
model_dir = args.model_path model_dir = args.model_path
...@@ -63,11 +74,12 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader): ...@@ -63,11 +74,12 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
model_dir = os.path.dirname(args.model_path) model_dir = os.path.dirname(args.model_path)
model_file_name = os.path.basename(args.model_path) model_file_name = os.path.basename(args.model_path)
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
print "Init model from: %s." % args.model_path print("Init model from: %s." % args.model_path)
batch_times = [] batch_times = []
iters = 0 iters = 0
for data in infer_reader(): for data in infer_reader():
feed_dict = get_feeder_data(data, place)
if args.iterations > 0 and iters == args.iterations + args.skip_batch_num: if args.iterations > 0 and iters == args.iterations + args.skip_batch_num:
break break
if iters < args.skip_batch_num: if iters < args.skip_batch_num:
...@@ -77,26 +89,25 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader): ...@@ -77,26 +89,25 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
start = time.time() start = time.time()
result = exe.run(fluid.default_main_program(), result = exe.run(fluid.default_main_program(),
feed=get_feeder_data( feed=feed_dict,
data, place, need_label=False), fetch_list=[ids],
fetch_list=[sequence],
return_numpy=False) return_numpy=False)
indexes = prune(np.array(result[0]).flatten(), 0, 1)
batch_time = time.time() - start batch_time = time.time() - start
fps = args.batch_size / batch_time fps = args.batch_size / batch_time
batch_times.append(batch_time) batch_times.append(batch_time)
indexes = np.array(result[0]).flatten()
if dict_map is not None: if dict_map is not None:
print "Iteration %d, latency: %.5f s, fps: %f, result: %s" % ( print("Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
iters, iters,
batch_time, batch_time,
fps, fps,
[dict_map[index] for index in indexes], ) [dict_map[index] for index in indexes], ))
else: else:
print "Iteration %d, latency: %.5f s, fps: %f, result: %s" % ( print("Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
iters, iters,
batch_time, batch_time,
fps, fps,
indexes, ) indexes, ))
iters += 1 iters += 1
...@@ -114,18 +125,29 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader): ...@@ -114,18 +125,29 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
print('average fps: %.5f, fps for 99pc latency: %.5f' % (fps_avg, fps_pc99)) print('average fps: %.5f, fps for 99pc latency: %.5f' % (fps_avg, fps_pc99))
def prune(words, sos, eos):
"""Remove unused tokens in prediction result."""
start_index = 0
end_index = len(words)
if sos in words:
start_index = np.where(words == sos)[0][0] + 1
if eos in words:
end_index = np.where(words == eos)[0][0]
return words[start_index:end_index]
def main(): def main():
args = parser.parse_args() args = parser.parse_args()
print_arguments(args) print_arguments(args)
if args.profile: if args.profile:
if args.use_gpu: if args.use_gpu:
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
inference(args, data_reader=ctc_reader) inference(args)
else: else:
with profiler.profiler("CPU", sorted_key='total') as cpuprof: with profiler.profiler("CPU", sorted_key='total') as cpuprof:
inference(args, data_reader=ctc_reader) inference(args)
else: else:
inference(args, data_reader=ctc_reader) inference(args)
if __name__ == "__main__": if __name__ == "__main__":
......
"""Trainer for OCR CTC model.""" """Trainer for OCR CTC or attention model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid import paddle.fluid as fluid
from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_data
import paddle.fluid.profiler as profiler import paddle.fluid.profiler as profiler
from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from crnn_ctc_model import ctc_train_net from crnn_ctc_model import ctc_train_net
import ctc_reader from attention_model import attention_train_net
import data_reader
import argparse import argparse
import functools import functools
import sys import sys
...@@ -20,6 +24,7 @@ add_arg('log_period', int, 1000, "Log period.") ...@@ -20,6 +24,7 @@ add_arg('log_period', int, 1000, "Log period.")
add_arg('save_model_period', int, 15000, "Save model period. '-1' means never saving the model.") add_arg('save_model_period', int, 15000, "Save model period. '-1' means never saving the model.")
add_arg('eval_period', int, 15000, "Evaluate period. '-1' means never evaluating the model.") add_arg('eval_period', int, 15000, "Evaluate period. '-1' means never evaluating the model.")
add_arg('save_model_dir', str, "./models", "The directory the model to be saved to.") add_arg('save_model_dir', str, "./models", "The directory the model to be saved to.")
add_arg('model', str, "crnn_ctc", "Which type of network to be used. 'crnn_ctc' or 'attention'")
add_arg('init_model', str, None, "The init model file of directory.") add_arg('init_model', str, None, "The init model file of directory.")
add_arg('use_gpu', bool, True, "Whether use GPU to train.") add_arg('use_gpu', bool, True, "Whether use GPU to train.")
add_arg('min_average_window',int, 10000, "Min average window.") add_arg('min_average_window',int, 10000, "Min average window.")
...@@ -32,8 +37,16 @@ add_arg('skip_test', bool, False, "Whether to skip test phase.") ...@@ -32,8 +37,16 @@ add_arg('skip_test', bool, False, "Whether to skip test phase.")
# yapf: enable # yapf: enable
def train(args, data_reader=ctc_reader): def train(args):
"""OCR CTC training""" """OCR training"""
if args.model == "crnn_ctc":
train_net = ctc_train_net
get_feeder_data = get_ctc_feeder_data
else:
train_net = attention_train_net
get_feeder_data = get_attention_feeder_data
num_classes = None num_classes = None
train_images = None train_images = None
train_list = None train_list = None
...@@ -43,20 +56,18 @@ def train(args, data_reader=ctc_reader): ...@@ -43,20 +56,18 @@ def train(args, data_reader=ctc_reader):
) if num_classes is None else num_classes ) if num_classes is None else num_classes
data_shape = data_reader.data_shape() data_shape = data_reader.data_shape()
# define network # define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') sum_cost, error_evaluator, inference_program, model_average = train_net(
label = fluid.layers.data( args, data_shape, num_classes)
name='label', shape=[1], dtype='int32', lod_level=1)
sum_cost, error_evaluator, inference_program, model_average = ctc_train_net(
images, label, args, num_classes)
# data reader # data reader
train_reader = data_reader.train( train_reader = data_reader.train(
args.batch_size, args.batch_size,
train_images_dir=train_images, train_images_dir=train_images,
train_list_file=train_list, train_list_file=train_list,
cycle=args.total_step > 0) cycle=args.total_step > 0,
model=args.model)
test_reader = data_reader.test( test_reader = data_reader.test(
test_images_dir=test_images, test_list_file=test_list) test_images_dir=test_images, test_list_file=test_list, model=args.model)
# prepare environment # prepare environment
place = fluid.CPUPlace() place = fluid.CPUPlace()
...@@ -77,7 +88,7 @@ def train(args, data_reader=ctc_reader): ...@@ -77,7 +88,7 @@ def train(args, data_reader=ctc_reader):
model_dir = os.path.dirname(args.init_model) model_dir = os.path.dirname(args.init_model)
model_file_name = os.path.basename(args.init_model) model_file_name = os.path.basename(args.init_model)
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
print "Init model from: %s." % args.init_model print("Init model from: %s." % args.init_model)
train_exe = exe train_exe = exe
error_evaluator.reset(exe) error_evaluator.reset(exe)
...@@ -104,18 +115,18 @@ def train(args, data_reader=ctc_reader): ...@@ -104,18 +115,18 @@ def train(args, data_reader=ctc_reader):
for data in test_reader(): for data in test_reader():
exe.run(inference_program, feed=get_feeder_data(data, place)) exe.run(inference_program, feed=get_feeder_data(data, place))
_, test_seq_error = error_evaluator.eval(exe) _, test_seq_error = error_evaluator.eval(exe)
print "\nTime: %s; Iter[%d]; Test seq error: %s.\n" % ( print("\nTime: %s; Iter[%d]; Test seq error: %s.\n" % (
time.time(), iter_num, str(test_seq_error[0])) time.time(), iter_num, str(test_seq_error[0])))
#Note: The following logs are special for CE monitoring. #Note: The following logs are special for CE monitoring.
#Other situations do not need to care about these logs. #Other situations do not need to care about these logs.
print "kpis test_acc %f" % (1 - test_seq_error[0]) print("kpis test_acc %f" % (1 - test_seq_error[0]))
def save_model(args, exe, iter_num): def save_model(args, exe, iter_num):
filename = "model_%05d" % iter_num filename = "model_%05d" % iter_num
fluid.io.save_params( fluid.io.save_params(
exe, dirname=args.save_model_dir, filename=filename) exe, dirname=args.save_model_dir, filename=filename)
print "Saved model to: %s/%s." % (args.save_model_dir, filename) print("Saved model to: %s/%s." % (args.save_model_dir, filename))
iter_num = 0 iter_num = 0
stop = False stop = False
...@@ -144,18 +155,18 @@ def train(args, data_reader=ctc_reader): ...@@ -144,18 +155,18 @@ def train(args, data_reader=ctc_reader):
iter_num += 1 iter_num += 1
# training log # training log
if iter_num % args.log_period == 0: if iter_num % args.log_period == 0:
print "\nTime: %s; Iter[%d]; Avg Warp-CTC loss: %.3f; Avg seq err: %.3f" % ( print("\nTime: %s; Iter[%d]; Avg loss: %.3f; Avg seq err: %.3f" % (
time.time(), iter_num, time.time(), iter_num,
total_loss / (args.log_period * args.batch_size), total_loss / (args.log_period * args.batch_size),
total_seq_error / (args.log_period * args.batch_size)) total_seq_error / (args.log_period * args.batch_size)))
print "kpis train_cost %f" % (total_loss / (args.log_period * print("kpis train_cost %f" % (total_loss / (args.log_period *
args.batch_size)) args.batch_size)))
print "kpis train_acc %f" % ( print("kpis train_acc %f" % (
1 - total_seq_error / (args.log_period * args.batch_size)) 1 - total_seq_error / (args.log_period * args.batch_size)))
total_loss = 0.0 total_loss = 0.0
total_seq_error = 0.0 total_seq_error = 0.0
# evaluate # evaluate
if not args.skip_test and iter_num % args.eval_period == 0: if not args.skip_test and iter_num % args.eval_period == 0:
if model_average: if model_average:
with model_average.apply(exe): with model_average.apply(exe):
...@@ -171,7 +182,7 @@ def train(args, data_reader=ctc_reader): ...@@ -171,7 +182,7 @@ def train(args, data_reader=ctc_reader):
else: else:
save_model(args, exe, iter_num) save_model(args, exe, iter_num)
end_time = time.time() end_time = time.time()
print "kpis train_duration %f" % (end_time - start_time) print("kpis train_duration %f" % (end_time - start_time))
# Postprocess benchmark data # Postprocess benchmark data
latencies = batch_times[args.skip_batch_num:] latencies = batch_times[args.skip_batch_num:]
latency_avg = np.average(latencies) latency_avg = np.average(latencies)
...@@ -195,12 +206,12 @@ def main(): ...@@ -195,12 +206,12 @@ def main():
if args.profile: if args.profile:
if args.use_gpu: if args.use_gpu:
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
train(args, data_reader=ctc_reader) train(args)
else: else:
with profiler.profiler("CPU", sorted_key='total') as cpuprof: with profiler.profiler("CPU", sorted_key='total') as cpuprof:
train(args, data_reader=ctc_reader) train(args)
else: else:
train(args, data_reader=ctc_reader) train(args)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -19,6 +19,8 @@ from __future__ import print_function ...@@ -19,6 +19,8 @@ from __future__ import print_function
import distutils.util import distutils.util
import numpy as np import numpy as np
from paddle.fluid import core from paddle.fluid import core
import paddle.fluid as fluid
import six
def print_arguments(args): def print_arguments(args):
...@@ -37,7 +39,7 @@ def print_arguments(args): ...@@ -37,7 +39,7 @@ def print_arguments(args):
:type args: argparse.Namespace :type args: argparse.Namespace
""" """
print("----------- Configuration Arguments -----------") print("----------- Configuration Arguments -----------")
for arg, value in sorted(vars(args).iteritems()): for arg, value in sorted(six.iteritems(vars(args))):
print("%s: %s" % (arg, value)) print("%s: %s" % (arg, value))
print("------------------------------------------------") print("------------------------------------------------")
...@@ -77,14 +79,58 @@ def to_lodtensor(data, place): ...@@ -77,14 +79,58 @@ def to_lodtensor(data, place):
return res return res
def get_feeder_data(data, place, need_label=True): def get_ctc_feeder_data(data, place, need_label=True):
pixel_tensor = core.LoDTensor() pixel_tensor = core.LoDTensor()
pixel_data = None pixel_data = None
pixel_data = np.concatenate( pixel_data = np.concatenate(
map(lambda x: x[0][np.newaxis, :], data), axis=0).astype("float32") list(map(lambda x: x[0][np.newaxis, :], data)), axis=0).astype("float32")
pixel_tensor.set(pixel_data, place) pixel_tensor.set(pixel_data, place)
label_tensor = to_lodtensor(map(lambda x: x[1], data), place) label_tensor = to_lodtensor(list(map(lambda x: x[1], data)), place)
if need_label: if need_label:
return {"pixel": pixel_tensor, "label": label_tensor} return {"pixel": pixel_tensor, "label": label_tensor}
else: else:
return {"pixel": pixel_tensor} return {"pixel": pixel_tensor}
def get_attention_feeder_data(data, place, need_label=True):
pixel_tensor = core.LoDTensor()
pixel_data = None
pixel_data = np.concatenate(
list(map(lambda x: x[0][np.newaxis, :], data)), axis=0).astype("float32")
pixel_tensor.set(pixel_data, place)
label_in_tensor = to_lodtensor(list(map(lambda x: x[1], data)), place)
label_out_tensor = to_lodtensor(list(map(lambda x: x[2], data)), place)
if need_label:
return {
"pixel": pixel_tensor,
"label_in": label_in_tensor,
"label_out": label_out_tensor
}
else:
return {"pixel": pixel_tensor}
def get_attention_feeder_for_infer(data, place):
batch_size = len(data)
init_ids_data = np.array([0 for _ in range(batch_size)], dtype='int64')
init_scores_data = np.array(
[1. for _ in range(batch_size)], dtype='float32')
init_ids_data = init_ids_data.reshape((batch_size, 1))
init_scores_data = init_scores_data.reshape((batch_size, 1))
init_recursive_seq_lens = [1] * batch_size
init_recursive_seq_lens = [init_recursive_seq_lens, init_recursive_seq_lens]
init_ids = fluid.create_lod_tensor(init_ids_data, init_recursive_seq_lens,
place)
init_scores = fluid.create_lod_tensor(init_scores_data,
init_recursive_seq_lens, place)
pixel_tensor = core.LoDTensor()
pixel_data = None
pixel_data = np.concatenate(
list(map(lambda x: x[0][np.newaxis, :], data)), axis=0).astype("float32")
pixel_tensor.set(pixel_data, place)
return {
"pixel": pixel_tensor,
"init_ids": init_ids,
"init_scores": init_scores
}
...@@ -7,8 +7,8 @@ from kpi import CostKpi, DurationKpi, AccKpi ...@@ -7,8 +7,8 @@ from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!! #### NOTE kpi.py should shared in models in some way!!!!
train_acc_kpi = AccKpi('train_precision', 0.005, actived=True) train_acc_kpi = AccKpi('train_precision', 0.005, actived=False)
test_acc_kpi = CostKpi('test_precision', 0.005, actived=True) test_acc_kpi = CostKpi('test_precision', 0.005, actived=False)
train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True) train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True)
tracking_kpis = [ tracking_kpis = [
......
...@@ -8,7 +8,7 @@ from kpi import CostKpi, DurationKpi, AccKpi ...@@ -8,7 +8,7 @@ from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!! #### NOTE kpi.py should shared in models in some way!!!!
train_acc_kpi = AccKpi('train_acc', 0.005, actived=True) train_acc_kpi = AccKpi('train_acc', 0.005, actived=True)
train_cost_kpi = CostKpi('train_cost', 0.005, actived=True) train_cost_kpi = CostKpi('train_cost', 0.005, actived=False)
train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True) train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True)
tracking_kpis = [ tracking_kpis = [
......
...@@ -3,6 +3,7 @@ import contextlib ...@@ -3,6 +3,7 @@ import contextlib
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
import six
import sys import sys
import time import time
import os import os
...@@ -46,8 +47,8 @@ def data2tensor(data, place): ...@@ -46,8 +47,8 @@ def data2tensor(data, place):
""" """
data2tensor data2tensor
""" """
input_seq = to_lodtensor(map(lambda x: x[0], data), place) input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data} return {"words": input_seq, "label": y_data}
...@@ -56,8 +57,8 @@ def data2pred(data, place): ...@@ -56,8 +57,8 @@ def data2pred(data, place):
""" """
data2tensor data2tensor
""" """
input_seq = to_lodtensor(map(lambda x: x[0], data), place) input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
return {"words": input_seq} return {"words": input_seq}
...@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab): ...@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab):
Save dict into file Save dict into file
""" """
with open(vocab, "w") as fout: with open(vocab, "w") as fout:
for k, v in word_dict.iteritems(): for k, v in six.iteritems(word_dict):
outstr = ("%s\t%s\n" % (k, v)).encode("gb18030") outstr = ("%s\t%s\n" % (k, v)).encode("gb18030")
fout.write(outstr) fout.write(outstr)
...@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg", ...@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg",
def scdb_test_data(test_file, w_dict): def scdb_test_data(test_file, w_dict):
""" """
test_set=["car", "lbs", "spot", "weibo", test_set=["car", "lbs", "spot", "weibo",
"baby", "toutiao", "3c", "movie", "haogan"] "baby", "toutiao", "3c", "movie", "haogan"]
""" """
return data_reader(test_file, w_dict) return data_reader(test_file, w_dict)
...@@ -424,7 +425,7 @@ def start_train(train_reader, ...@@ -424,7 +425,7 @@ def start_train(train_reader,
start_exe.run(fluid.default_startup_program()) start_exe.run(fluid.default_startup_program())
exe = fluid.ParallelExecutor(use_cuda, loss_name=cost.name) exe = fluid.ParallelExecutor(use_cuda, loss_name=cost.name)
for pass_id in xrange(pass_num): for pass_id in six.moves.xrange(pass_num):
total_acc, total_cost, total_count, avg_cost, avg_acc = 0.0, 0.0, 0.0, 0.0, 0.0 total_acc, total_cost, total_count, avg_cost, avg_acc = 0.0, 0.0, 0.0, 0.0, 0.0
for data in train_reader(): for data in train_reader():
cost_val, acc_val = exe.run(feed=feeder.feed(data), cost_val, acc_val = exe.run(feed=feeder.feed(data),
...@@ -452,7 +453,7 @@ def train_net(vocab="./thirdparty/train.vocab", ...@@ -452,7 +453,7 @@ def train_net(vocab="./thirdparty/train.vocab",
""" """
w_dict = scdb_word_dict(vocab=vocab) w_dict = scdb_word_dict(vocab=vocab)
test_files = [ "./thirdparty" + os.sep + f for f in test_list] test_files = [ "./thirdparty" + os.sep + f for f in test_list]
train_reader = paddle.batch( train_reader = paddle.batch(
scdb_train_data(train_dir, w_dict), scdb_train_data(train_dir, w_dict),
batch_size = 256) batch_size = 256)
......
...@@ -3,6 +3,7 @@ import contextlib ...@@ -3,6 +3,7 @@ import contextlib
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
import six
import sys import sys
import time import time
import os import os
...@@ -46,8 +47,8 @@ def data2tensor(data, place): ...@@ -46,8 +47,8 @@ def data2tensor(data, place):
""" """
data2tensor data2tensor
""" """
input_seq = to_lodtensor(map(lambda x: x[0], data), place) input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data} return {"words": input_seq, "label": y_data}
...@@ -56,8 +57,8 @@ def data2pred(data, place): ...@@ -56,8 +57,8 @@ def data2pred(data, place):
""" """
data2tensor data2tensor
""" """
input_seq = to_lodtensor(map(lambda x: x[0], data), place) input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
return {"words": input_seq} return {"words": input_seq}
...@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab): ...@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab):
Save dict into file Save dict into file
""" """
with open(vocab, "w") as fout: with open(vocab, "w") as fout:
for k, v in word_dict.iteritems(): for k, v in six.iteritems(word_dict):
outstr = ("%s\t%s\n" % (k, v)).encode("gb18030") outstr = ("%s\t%s\n" % (k, v)).encode("gb18030")
fout.write(outstr) fout.write(outstr)
...@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg", ...@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg",
def scdb_test_data(test_file, w_dict): def scdb_test_data(test_file, w_dict):
""" """
test_set=["car", "lbs", "spot", "weibo", test_set=["car", "lbs", "spot", "weibo",
"baby", "toutiao", "3c", "movie", "haogan"] "baby", "toutiao", "3c", "movie", "haogan"]
""" """
return data_reader(test_file, w_dict) return data_reader(test_file, w_dict)
...@@ -422,7 +423,7 @@ def start_train(train_reader, ...@@ -422,7 +423,7 @@ def start_train(train_reader,
feeder = fluid.DataFeeder(feed_list=[data, label], place=place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
for pass_id in xrange(pass_num): for pass_id in six.moves.xrange(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for data in train_reader(): for data in train_reader():
avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(), avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(),
......
import os import os
import six
import sys import sys
import time import time
import unittest import unittest
...@@ -58,7 +59,7 @@ def train(train_reader, ...@@ -58,7 +59,7 @@ def train(train_reader,
if "CE_MODE_X" in os.environ: if "CE_MODE_X" in os.environ:
fluid.default_startup_program().random_seed = 110 fluid.default_startup_program().random_seed = 110
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
for pass_id in xrange(pass_num): for pass_id in six.moves.xrange(pass_num):
pass_start = time.time() pass_start = time.time()
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for data in train_reader(): for data in train_reader():
......
...@@ -43,8 +43,8 @@ def data2tensor(data, place): ...@@ -43,8 +43,8 @@ def data2tensor(data, place):
""" """
data2tensor data2tensor
""" """
input_seq = to_lodtensor(map(lambda x: x[0], data), place) input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data} return {"words": input_seq, "label": y_data}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册