diff --git a/fluid/face_detection/README_cn.md b/fluid/face_detection/README_cn.md index a160fbd9ce25c4fac4333b9914058a16fda7e58a..c0a32ceb350fee6dd9eda94a9ceb0fa6c8ceee27 100644 --- a/fluid/face_detection/README_cn.md +++ b/fluid/face_detection/README_cn.md @@ -165,10 +165,10 @@ python widerface_eval.py --infer=True --confs_threshold=0.15 ``` 下图可视化了模型的预测结果:

- - - -
+ + + +
Pyramidbox 预测可视化

diff --git a/fluid/face_detection/images/0_Parade_marchingband_1_356.jpg b/fluid/face_detection/images/0_Parade_marchingband_1_356.jpg index d5175c5a9d57184945ae63ab1cfc0a2621ef9b70..d383deefadae83816693858db93adae2397c75ea 100644 Binary files a/fluid/face_detection/images/0_Parade_marchingband_1_356.jpg and b/fluid/face_detection/images/0_Parade_marchingband_1_356.jpg differ diff --git a/fluid/face_detection/images/28_Sports_Fan_Sports_Fan_28_770.jpg b/fluid/face_detection/images/28_Sports_Fan_Sports_Fan_28_770.jpg index 661ad9abb2d21e6e6b872b2a44e14fc476e732e7..e6a481ec554a53fc433f41dacad01a101e9e930f 100644 Binary files a/fluid/face_detection/images/28_Sports_Fan_Sports_Fan_28_770.jpg and b/fluid/face_detection/images/28_Sports_Fan_Sports_Fan_28_770.jpg differ diff --git a/fluid/face_detection/images/2_Demonstration_Demonstration_Or_Protest_2_58.jpg b/fluid/face_detection/images/2_Demonstration_Demonstration_Or_Protest_2_58.jpg index febe7411ae2ff258a608275fc9050294014e8342..e934fa6c45777c7a66103cc2b3d55e5f4fd18096 100644 Binary files a/fluid/face_detection/images/2_Demonstration_Demonstration_Or_Protest_2_58.jpg and b/fluid/face_detection/images/2_Demonstration_Demonstration_Or_Protest_2_58.jpg differ diff --git a/fluid/face_detection/images/4_Dancing_Dancing_4_194.jpg b/fluid/face_detection/images/4_Dancing_Dancing_4_194.jpg index 8161593708db628c6beb4f32c8133226e19f0f13..52476b8ee84762fc87e7c4e3eb560b567e44435c 100644 Binary files a/fluid/face_detection/images/4_Dancing_Dancing_4_194.jpg and b/fluid/face_detection/images/4_Dancing_Dancing_4_194.jpg differ diff --git a/fluid/icnet/.run_ce.sh b/fluid/icnet/.run_ce.sh index a46081c7978395697b843c5fef95e6091b47e4e5..643c1ed4cd1bd1012935e063cd8b3e3bbfd4f6d0 100755 --- a/fluid/icnet/.run_ce.sh +++ b/fluid/icnet/.run_ce.sh @@ -2,6 +2,7 @@ # This file is only used for continuous evaluation. +export ce_mode=1 rm -rf *_factor.txt -python train.py --use_gpu=True 1> log +python train.py --use_gpu=True --random_mirror=False --random_scaling=False 1> log cat log | python _ce.py diff --git a/fluid/icnet/_ce.py b/fluid/icnet/_ce.py index 3844eefde620f9587d747594ad0d5351999859c8..8953488259d5bc194921637e0b141cb90081e4f9 100644 --- a/fluid/icnet/_ce.py +++ b/fluid/icnet/_ce.py @@ -7,8 +7,8 @@ from kpi import CostKpi, DurationKpi, AccKpi # NOTE kpi.py should shared in models in some way!!!! -train_cost_kpi = CostKpi('train_cost', 0.02, actived=True) -train_duration_kpi = DurationKpi('train_duration', 0.06, actived=True) +train_cost_kpi = CostKpi('train_cost', 0.05, 0, actived=True) +train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True) tracking_kpis = [ train_cost_kpi, diff --git a/fluid/icnet/cityscape.py b/fluid/icnet/cityscape.py index 0b862d9a92728fdcc2b12f673c3583ec096d4d25..c5c08afcf3a3c85b9f43c9110e8a8dedc5900d5b 100644 --- a/fluid/icnet/cityscape.py +++ b/fluid/icnet/cityscape.py @@ -1,5 +1,8 @@ """Reader for Cityscape dataset. """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import os import cv2 import numpy as np @@ -173,8 +176,8 @@ class DataGenerater: """ Scale label according to factor. """ - h = label.shape[0] / factor - w = label.shape[1] / factor + h = label.shape[0] // factor + w = label.shape[1] // factor return cv2.resize( label, (h, w), interpolation=cv2.INTER_NEAREST)[:, :, np.newaxis] diff --git a/fluid/icnet/eval.py b/fluid/icnet/eval.py index bdebe7ad72d799e709bd529711d600a9d692a838..dc2f574359a568de794b07cf05d854f6d388482a 100644 --- a/fluid/icnet/eval.py +++ b/fluid/icnet/eval.py @@ -64,7 +64,7 @@ def eval(args): exe.run(fluid.default_startup_program()) assert os.path.exists(args.model_path) fluid.io.load_params(exe, args.model_path) - print "loaded model from: %s" % args.model_path + print("loaded model from: %s" % args.model_path) sys.stdout.flush() fetch_vars = [iou, out_w, out_r] @@ -80,11 +80,10 @@ def eval(args): fetch_list=fetch_vars) out_wrong += result[1] out_right += result[2] - print "count: %s; current iou: %.3f;\r" % (count, result[0]), sys.stdout.flush() iou = cal_mean_iou(out_wrong, out_right) - print "\nmean iou: %.3f" % iou - print "kpis test_acc %f" % iou + print("\nmean iou: %.3f" % iou) + print("kpis test_acc %f" % iou) def main(): diff --git a/fluid/icnet/icnet.py b/fluid/icnet/icnet.py index afe3fa9d352bd8fbf6b2fad46f24ad4c9841a1ff..d640621eb9def4bfb1411667ea68f5384fbd5489 100644 --- a/fluid/icnet/icnet.py +++ b/fluid/icnet/icnet.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import paddle.fluid as fluid import numpy as np import sys @@ -20,8 +23,8 @@ def conv(input, if padding == "SAME": padding_h = max(k_h - s_h, 0) padding_w = max(k_w - s_w, 0) - padding_top = padding_h / 2 - padding_left = padding_w / 2 + padding_top = padding_h // 2 + padding_left = padding_w // 2 padding_bottom = padding_h - padding_top padding_right = padding_w - padding_left padding = [ @@ -57,8 +60,8 @@ def atrous_conv(input, if padding == "SAME": padding_h = max(k_h - s_h, 0) padding_w = max(k_w - s_w, 0) - padding_top = padding_h / 2 - padding_left = padding_w / 2 + padding_top = padding_h // 2 + padding_left = padding_w // 2 padding_bottom = padding_h - padding_top padding_right = padding_w - padding_left padding = [ @@ -141,15 +144,15 @@ def dilation_convs(input): def pyramis_pooling(input, input_shape): - shape = np.ceil(input_shape / 32).astype("int32") + shape = np.ceil(input_shape // 32).astype("int32") h, w = shape pool1 = avg_pool(input, h, w, h, w) pool1_interp = interp(pool1, shape) - pool2 = avg_pool(input, h / 2, w / 2, h / 2, w / 2) + pool2 = avg_pool(input, h // 2, w // 2, h // 2, w // 2) pool2_interp = interp(pool2, shape) - pool3 = avg_pool(input, h / 3, w / 3, h / 3, w / 3) + pool3 = avg_pool(input, h // 3, w // 3, h // 3, w // 3) pool3_interp = interp(pool3, shape) - pool4 = avg_pool(input, h / 4, w / 4, h / 4, w / 4) + pool4 = avg_pool(input, h // 4, w // 4, h // 4, w // 4) pool4_interp = interp(pool4, shape) conv5_3_sum = input + pool4_interp + pool3_interp + pool2_interp + pool1_interp return conv5_3_sum @@ -172,14 +175,14 @@ def shared_convs(image): def res_block(input, filter_num, padding=0, dilation=None, name=None): - tmp = conv(input, 1, 1, filter_num / 4, 1, 1, name=name + "_1_1_reduce") + tmp = conv(input, 1, 1, filter_num // 4, 1, 1, name=name + "_1_1_reduce") tmp = bn(tmp, relu=True) tmp = zero_padding(tmp, padding=padding) if dilation is None: - tmp = conv(tmp, 3, 3, filter_num / 4, 1, 1, name=name + "_3_3") + tmp = conv(tmp, 3, 3, filter_num // 4, 1, 1, name=name + "_3_3") else: tmp = atrous_conv( - tmp, 3, 3, filter_num / 4, dilation, name=name + "_3_3") + tmp, 3, 3, filter_num // 4, dilation, name=name + "_3_3") tmp = bn(tmp, relu=True) tmp = conv(tmp, 1, 1, filter_num, 1, 1, name=name + "_1_1_increase") tmp = bn(tmp, relu=False) @@ -195,7 +198,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1, proj_bn = bn(proj, relu=False) tmp = conv( - input, 1, 1, filter_num / 4, stride, stride, name=name + "_1_1_reduce") + input, 1, 1, filter_num // 4, stride, stride, name=name + "_1_1_reduce") tmp = bn(tmp, relu=True) tmp = zero_padding(tmp, padding=padding) @@ -208,7 +211,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1, tmp, 3, 3, - filter_num / 4, + filter_num // 4, 1, 1, padding=padding, @@ -218,7 +221,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1, tmp, 3, 3, - filter_num / 4, + filter_num // 4, dilation, padding=padding, name=name + "_3_3") @@ -232,12 +235,12 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1, def sub_net_4(input, input_shape): - tmp = interp(input, out_shape=np.ceil(input_shape / 32)) + tmp = interp(input, out_shape=np.ceil(input_shape // 32)) tmp = dilation_convs(tmp) tmp = pyramis_pooling(tmp, input_shape) tmp = conv(tmp, 1, 1, 256, 1, 1, name="conv5_4_k1") tmp = bn(tmp, relu=True) - tmp = interp(tmp, input_shape / 16) + tmp = interp(tmp, input_shape // 16) return tmp @@ -265,7 +268,7 @@ def CCF24(sub2_out, sub4_out, input_shape): tmp = bn(tmp, relu=False) tmp = tmp + sub2_out tmp = fluid.layers.relu(tmp) - tmp = interp(tmp, input_shape / 8) + tmp = interp(tmp, input_shape // 8) return tmp @@ -275,7 +278,7 @@ def CCF124(sub1_out, sub24_out, input_shape): tmp = bn(tmp, relu=False) tmp = tmp + sub1_out tmp = fluid.layers.relu(tmp) - tmp = interp(tmp, input_shape / 4) + tmp = interp(tmp, input_shape // 4) return tmp diff --git a/fluid/icnet/infer.py b/fluid/icnet/infer.py index 63fb3268060248f70462cf914c613c53a1fc1f89..f93469f157660a4c5adae7d4ff2bc9b315bce41e 100644 --- a/fluid/icnet/infer.py +++ b/fluid/icnet/infer.py @@ -1,4 +1,5 @@ """Infer for ICNet model.""" +from __future__ import print_function import cityscape import argparse import functools @@ -101,7 +102,7 @@ def infer(args): exe.run(fluid.default_startup_program()) assert os.path.exists(args.model_path) fluid.io.load_params(exe, args.model_path) - print "loaded model from: %s" % args.model_path + print("loaded model from: %s" % args.model_path) sys.stdout.flush() if not os.path.isdir(args.out_path): diff --git a/fluid/icnet/train.py b/fluid/icnet/train.py index b38f08258b9b3e1bd28d808b2779416259f9d827..1059e353337d79f9b879e8aeb0fbb095c9689df9 100644 --- a/fluid/icnet/train.py +++ b/fluid/icnet/train.py @@ -1,9 +1,13 @@ """Trainer for ICNet model.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function from icnet import icnet import cityscape import argparse import functools import sys +import os import time import paddle.fluid as fluid import numpy as np @@ -11,9 +15,8 @@ from utils import add_arguments, print_arguments, get_feeder_data from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter from paddle.fluid.initializer import init_on_cpu -SEED = 90 -# random seed must set before configuring the network. -fluid.default_startup_program().random_seed = SEED +if 'ce_mode' in os.environ: + np.random.seed(10) parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) @@ -87,10 +90,14 @@ def train(args): if args.use_gpu: place = fluid.CUDAPlace(0) exe = fluid.Executor(place) + + if 'ce_mode' in os.environ: + fluid.default_startup_program().random_seed = 90 + exe.run(fluid.default_startup_program()) if args.init_model is not None: - print "load model from: %s" % args.init_model + print("load model from: %s" % args.init_model) sys.stdout.flush() fluid.io.load_params(exe, args.init_model) @@ -107,7 +114,7 @@ def train(args): for data in train_reader(): if iter_id > TOTAL_STEP: end_time = time.time() - print "kpis train_duration %f" % (end_time - start_time) + print("kpis train_duration %f" % (end_time - start_time)) return iter_id += 1 results = exe.run( @@ -119,10 +126,10 @@ def train(args): sub124_loss += results[3] # training log if iter_id % LOG_PERIOD == 0: - print "Iter[%d]; train loss: %.3f; sub4_loss: %.3f; sub24_loss: %.3f; sub124_loss: %.3f" % ( + print("Iter[%d]; train loss: %.3f; sub4_loss: %.3f; sub24_loss: %.3f; sub124_loss: %.3f" % ( iter_id, t_loss / LOG_PERIOD, sub4_loss / LOG_PERIOD, - sub24_loss / LOG_PERIOD, sub124_loss / LOG_PERIOD) - print "kpis train_cost %f" % (t_loss / LOG_PERIOD) + sub24_loss / LOG_PERIOD, sub124_loss / LOG_PERIOD)) + print("kpis train_cost %f" % (t_loss / LOG_PERIOD)) t_loss = 0. sub4_loss = 0. @@ -133,7 +140,7 @@ def train(args): if iter_id % CHECKPOINT_PERIOD == 0 and args.checkpoint_path is not None: dir_name = args.checkpoint_path + "/" + str(iter_id) fluid.io.save_persistables(exe, dirname=dir_name) - print "Saved checkpoint: %s" % (dir_name) + print("Saved checkpoint: %s" % (dir_name)) def main(): diff --git a/fluid/icnet/utils.py b/fluid/icnet/utils.py index 699841d65f16ffd0dfae0d27e33c2ec52479826e..7d58060eb96fd95a04f377f8c852eda02e59b5f6 100644 --- a/fluid/icnet/utils.py +++ b/fluid/icnet/utils.py @@ -19,6 +19,7 @@ from __future__ import print_function import distutils.util import numpy as np from paddle.fluid import core +import six def print_arguments(args): @@ -37,7 +38,7 @@ def print_arguments(args): :type args: argparse.Namespace """ print("----------- Configuration Arguments -----------") - for arg, value in sorted(vars(args).iteritems()): + for arg, value in sorted(six.iteritems(vars(args))): print("%s: %s" % (arg, value)) print("------------------------------------------------") diff --git a/fluid/image_classification/.run_ce.sh b/fluid/image_classification/.run_ce.sh index f06fdf769024aef68ab5278fafb956f859228038..be1a37615885f0e7f6700b0399e419ac90afaa33 100755 --- a/fluid/image_classification/.run_ce.sh +++ b/fluid/image_classification/.run_ce.sh @@ -5,6 +5,6 @@ cudaid=${object_detection_cudaid:=0} export CUDA_VISIBLE_DEVICES=$cudaid python train.py --batch_size=64 --num_epochs=10 --total_images=6149 --enable_ce=True | python _ce.py -cudaid=${object_detection_cudaid:=0, 1, 2, 3} +cudaid=${object_detection_cudaid_m:=0, 1, 2, 3} export CUDA_VISIBLE_DEVICES=$cudaid python train.py --batch_size=64 --num_epochs=10 --total_images=6149 --enable_ce=True | python _ce.py diff --git a/fluid/image_classification/caffe2fluid/examples/imagenet/compare.py b/fluid/image_classification/caffe2fluid/examples/imagenet/compare.py index 45b1f5303ce77de7c7f5e3a232517c26e159b2fa..c995e6df17a4be068984cece06a9b3a33f6ea4f4 100644 --- a/fluid/image_classification/caffe2fluid/examples/imagenet/compare.py +++ b/fluid/image_classification/caffe2fluid/examples/imagenet/compare.py @@ -45,7 +45,7 @@ def calc_diff(f1, f2): sq_df = np.mean(df * df) return max_df, sq_df except Exception as e: - return -1.0, -1.0 + return 1.0, 1.0 def compare(path1, path2, no_exception): diff --git a/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py b/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py index 4b6f38be6647b23a4c79dcc7850aaab55fa6ffa8..0c1354530c6af3b35977bb51e830b89d481de6b6 100644 --- a/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py +++ b/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py @@ -245,10 +245,18 @@ class Network(object): @layer def prelu(self, input, channel_shared, name): - #fluid = import_fluid() - #output = fluid.layers.relu(input) - #return output - raise NotImplementedError('prelu not implemented') + fluid = import_fluid() + if channel_shared: + mode = 'all' + else: + mode = 'channel' + + prefix = name + '_' + output = fluid.layers.prelu( + input, + mode=mode, + param_attr=fluid.ParamAttr(name=prefix + 'negslope')) + return output def pool(self, pool_type, input, k_h, k_w, s_h, s_w, ceil_mode, padding, name): diff --git a/fluid/image_classification/caffe2fluid/kaffe/transformers.py b/fluid/image_classification/caffe2fluid/kaffe/transformers.py index b92e93bbd84989e39bd658d107166f68e9dc4a27..f436ec8cbfb49394ef2880dbf0cb46aeff7a6ca0 100644 --- a/fluid/image_classification/caffe2fluid/kaffe/transformers.py +++ b/fluid/image_classification/caffe2fluid/kaffe/transformers.py @@ -176,6 +176,7 @@ class DataReshaper(object): del node.reshaped_data return graph + class CropFuser(object): ''' Crop is to return a scalar output Blob for an input Blob of arbitrary size. @@ -197,7 +198,8 @@ class CropFuser(object): cls._traced_names[fname] = [] cls._traced_names[fname].append(tname) - def __init__(self, allowed_parent_types=[NodeKind.Input, NodeKind.DummyData]): + def __init__(self, + allowed_parent_types=[NodeKind.Input, NodeKind.DummyData]): self.allowed_parent_types = allowed_parent_types def __call__(self, graph): @@ -232,7 +234,11 @@ class CropFuser(object): def merge(self, parent, child): '''Merge the parent node into the child.''' - child.metadata['shape'] = [parent.output_shape.batch_size, parent.output_shape.channels, parent.output_shape.height, parent.output_shape.width] + child.metadata['shape'] = [ + parent.output_shape.batch_size, parent.output_shape.channels, + parent.output_shape.height, parent.output_shape.width + ] + class SubNodeFuser(object): ''' @@ -395,6 +401,8 @@ class ParameterNamer(object): names = ('scale', ) if getattr(node.parameters, 'bias_term', False): names = ('scale', 'offset') + elif node.kind == NodeKind.PReLU: + names = ('negslope', ) elif node.kind == "Normalize": names = ('scale', ) else: diff --git a/fluid/neural_machine_translation/rnn_search/infer.py b/fluid/neural_machine_translation/rnn_search/infer.py index 51bdf9cda4694d4d849ff333e5c8e47978fb8815..4345b29c4084f953d7382b740148e5c5db773da6 100644 --- a/fluid/neural_machine_translation/rnn_search/infer.py +++ b/fluid/neural_machine_translation/rnn_search/infer.py @@ -18,6 +18,7 @@ from __future__ import print_function import numpy as np import os +import six import paddle import paddle.fluid as fluid @@ -102,7 +103,7 @@ def infer(): init_recursive_seq_lens, place) # Feed dict for inference - feed_dict = feeder.feed(map(lambda x: [x[0]], data)) + feed_dict = feeder.feed([[x[0]] for x in data]) feed_dict['init_ids'] = init_ids feed_dict['init_scores'] = init_scores @@ -115,7 +116,7 @@ def infer(): lod_level_1 = fetch_outs[0].lod()[1] token_array = np.array(fetch_outs[0]) result = [] - for i in xrange(len(lod_level_1) - 1): + for i in six.moves.xrange(len(lod_level_1) - 1): sentence_list = [ trg_dict[token] for token in token_array[lod_level_1[i]:lod_level_1[i + 1]] @@ -125,7 +126,7 @@ def infer(): lod_level_0 = fetch_outs[0].lod()[0] paragraphs = [ result[lod_level_0[i]:lod_level_0[i + 1]] - for i in xrange(len(lod_level_0) - 1) + for i in six.moves.xrange(len(lod_level_0) - 1) ] for paragraph in paragraphs: diff --git a/fluid/neural_machine_translation/transformer/_ce.py b/fluid/neural_machine_translation/transformer/_ce.py index 88886a4bc1068bcfc7197a247f26c7a092aee7c2..447652c4f4d60765011a621371b381e75573612e 100644 --- a/fluid/neural_machine_translation/transformer/_ce.py +++ b/fluid/neural_machine_translation/transformer/_ce.py @@ -7,7 +7,7 @@ from kpi import CostKpi, DurationKpi, AccKpi #### NOTE kpi.py should shared in models in some way!!!! -train_cost_card1_kpi = CostKpi('train_cost_card1', 0.01, 0, actived=True) +train_cost_card1_kpi = CostKpi('train_cost_card1', 0.02, 0, actived=True) test_cost_card1_kpi = CostKpi('test_cost_card1', 0.005, 0, actived=True) train_duration_card1_kpi = DurationKpi( 'train_duration_card1', 0.06, 0, actived=True) diff --git a/fluid/object_detection/.run_ce.sh b/fluid/object_detection/.run_ce.sh index 50809e77043e0eb0bb5f6bf5a9904d8113c85756..8b3d1a525e2556f2acd0603bec369b3ca99541e1 100755 --- a/fluid/object_detection/.run_ce.sh +++ b/fluid/object_detection/.run_ce.sh @@ -14,6 +14,6 @@ cudaid=${object_detection_cudaid:=0} export CUDA_VISIBLE_DEVICES=$cudaid FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py -cudaid=${object_detection_cudaid:=0,1,2,3} +cudaid=${object_detection_cudaid_m:=0,1,2,3} export CUDA_VISIBLE_DEVICES=$cudaid FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py diff --git a/fluid/object_detection/_ce.py b/fluid/object_detection/_ce.py index 4f17ff324d8c4bb1d0cecca2401e584a7ec5e3af..f90887c9a3b6a67dc8fd2c29dd3fc384237d7e43 100644 --- a/fluid/object_detection/_ce.py +++ b/fluid/object_detection/_ce.py @@ -8,8 +8,8 @@ from kpi import CostKpi, DurationKpi, AccKpi #### NOTE kpi.py should shared in models in some way!!!! train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True) -test_acc_kpi = AccKpi('test_acc', 0.01, 0, actived=True) -train_speed_kpi = AccKpi('train_speed', 0.2, 0, actived=True) +test_acc_kpi = AccKpi('test_acc', 0.01, 0, actived=False) +train_speed_kpi = AccKpi('train_speed', 0.2, 0, actived=False) train_cost_card4_kpi = CostKpi('train_cost_card4', 0.02, 0, actived=True) test_acc_card4_kpi = AccKpi('test_acc_card4', 0.01, 0, actived=True) train_speed_card4_kpi = AccKpi('train_speed_card4', 0.2, 0, actived=True) diff --git a/fluid/object_detection/reader.py b/fluid/object_detection/reader.py index a30ae797b9415efbb594a9e3238985eded49d421..da0aa788570369d64f1a3db5303fc056984e3c74 100644 --- a/fluid/object_detection/reader.py +++ b/fluid/object_detection/reader.py @@ -22,6 +22,7 @@ import xml.etree.ElementTree import os import time import copy +import six class Settings(object): @@ -151,7 +152,7 @@ def preprocess(img, bbox_labels, mode, settings): mirror = int(random.uniform(0, 2)) if mirror == 1: img = img[:, ::-1, :] - for i in xrange(len(sampled_labels)): + for i in six.moves.xrange(len(sampled_labels)): tmp = sampled_labels[i][1] sampled_labels[i][1] = 1 - sampled_labels[i][3] sampled_labels[i][3] = 1 - tmp diff --git a/fluid/object_detection/train.py b/fluid/object_detection/train.py index 46af235ff7f6c3067e1cc2d35de76ebaf59be885..706a33be1d5c1e4c2a6c9cae49310804962a6315 100644 --- a/fluid/object_detection/train.py +++ b/fluid/object_detection/train.py @@ -65,7 +65,6 @@ def train(args, name='gt_label', shape=[1], dtype='int32', lod_level=1) difficult = fluid.layers.data( name='gt_difficult', shape=[1], dtype='int32', lod_level=1) - locs, confs, box, box_var = mobile_net(num_classes, image, image_shape) nmsed_out = fluid.layers.detection_output( locs, confs, box, box_var, nms_threshold=args.nms_threshold) @@ -88,16 +87,16 @@ def train(args, if 'coco' in data_args.dataset: # learning rate decay in 12, 19 pass, respectively if '2014' in train_file_list: - epocs = 82783 / batch_size + epocs = 82783 // batch_size boundaries = [epocs * 12, epocs * 19] elif '2017' in train_file_list: - epocs = 118287 / batch_size + epocs = 118287 // batch_size boundaries = [epocs * 12, epocs * 19] values = [ learning_rate, learning_rate * 0.5, learning_rate * 0.25 ] elif 'pascalvoc' in data_args.dataset: - epocs = 19200 / batch_size + epocs = 19200 // batch_size boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100] values = [ learning_rate, learning_rate * 0.5, learning_rate * 0.25, @@ -126,6 +125,9 @@ def train(args, train_reader = paddle.batch( reader.train(data_args, train_file_list), batch_size=batch_size) else: + import random + random.seed(0) + np.random.seed(0) train_reader = paddle.batch( reader.train(data_args, train_file_list, False), batch_size=batch_size) test_reader = paddle.batch( @@ -137,7 +139,7 @@ def train(args, model_path = os.path.join(model_save_dir, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) - print 'save models to %s' % (model_path) + print('save models to %s' % (model_path)) fluid.io.save_persistables(exe, model_path) best_map = 0. @@ -166,8 +168,6 @@ def train(args, start_time = time.time() prev_start_time = start_time every_pass_loss = [] - iter = 0 - pass_duration = 0.0 for batch_id, data in enumerate(train_reader()): prev_start_time = start_time start_time = time.time() @@ -193,15 +193,15 @@ def train(args, total_time += end_time - start_time train_avg_loss = np.mean(every_pass_loss) if devices_num == 1: - print ("kpis train_cost %s" % train_avg_loss) - print ("kpis test_acc %s" % mean_map) - print ("kpis train_speed %s" % (total_time / epoch_idx)) + print("kpis train_cost %s" % train_avg_loss) + print("kpis test_acc %s" % mean_map) + print("kpis train_speed %s" % (total_time / epoch_idx)) else: - print ("kpis train_cost_card%s %s" % + print("kpis train_cost_card%s %s" % (devices_num, train_avg_loss)) - print ("kpis test_acc_card%s %s" % + print("kpis test_acc_card%s %s" % (devices_num, mean_map)) - print ("kpis train_speed_card%s %f" % + print("kpis train_speed_card%s %f" % (devices_num, total_time / epoch_idx)) diff --git a/fluid/object_detection/utility.py b/fluid/object_detection/utility.py index 506e6007ceb9059caf1163befb6ff594d67b547a..746ce25a73979a9026f740fc8fce483857677082 100644 --- a/fluid/object_detection/utility.py +++ b/fluid/object_detection/utility.py @@ -16,8 +16,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function + import distutils.util import numpy as np +import six from paddle.fluid import core @@ -37,7 +39,7 @@ def print_arguments(args): :type args: argparse.Namespace """ print("----------- Configuration Arguments -----------") - for arg, value in sorted(vars(args).iteritems()): + for arg, value in sorted(six.iteritems(vars(args))): print("%s: %s" % (arg, value)) print("------------------------------------------------") diff --git a/fluid/ocr_recognition/.run.sh b/fluid/ocr_recognition/.run.sh deleted file mode 100644 index 6fd313d34afc8441fa8256e909991263317f1dc6..0000000000000000000000000000000000000000 --- a/fluid/ocr_recognition/.run.sh +++ /dev/null @@ -1 +0,0 @@ -python ctc_train.py --batch_size=128 --total_step=10000 --eval_period=10000 --log_period=10000 --use_gpu=True diff --git a/fluid/ocr_recognition/.run_ce.sh b/fluid/ocr_recognition/.run_ce.sh old mode 100644 new mode 100755 index 202c5b483c8f35fd34a5cfa39c9ba11702a9bbd3..90abc143f8953a96ef94146ca9b3b308cc9e930b --- a/fluid/ocr_recognition/.run_ce.sh +++ b/fluid/ocr_recognition/.run_ce.sh @@ -1,5 +1,4 @@ export ce_mode=1 -rm *factor.txt -python ctc_train.py --batch_size=32 --total_step=30000 --eval_period=30000 --log_period=30000 --use_gpu=True 1> ./tmp.log +python train.py --batch_size=32 --total_step=1 --eval_period=1 --log_period=1 --use_gpu=True 1> ./tmp.log cat tmp.log | python _ce.py rm tmp.log diff --git a/fluid/ocr_recognition/README.md b/fluid/ocr_recognition/README.md index 50b72440818384a0d8e80ab214faaabddbd93f90..ad70aa0c904adad72a18987d306973aa5b89d650 100644 --- a/fluid/ocr_recognition/README.md +++ b/fluid/ocr_recognition/README.md @@ -5,8 +5,9 @@ ## 代码结构 ``` ├── ctc_reader.py # 下载、读取、处理数据。 -├── crnn_ctc_model.py # 定义了训练网络、预测网络和evaluate网络。 -├── ctc_train.py # 用于模型的训练。 +├── crnn_ctc_model.py # 定义了OCR CTC model的网络结构。 +├── attention_model.py # 定义了OCR attention model的网络结构。 +├── train.py # 用于模型的训练。 ├── infer.py # 加载训练好的模型文件,对新数据进行预测。 ├── eval.py # 评估模型在指定数据集上的效果。 └── utils.py # 定义通用的函数。 @@ -15,9 +16,16 @@ ## 简介 -本章的任务是识别含有单行汉语字符图片,首先采用卷积将图片转为特征图, 然后使用`im2sequence op`将特征图转为序列,通过`双向GRU`学习到序列特征。训练过程选用的损失函数为CTC(Connectionist Temporal Classification) loss,最终的评估指标为样本级别的错误率。 +本章的任务是识别图片中单行英文字符,这里我们分别使用CTC model和attention model两种不同的模型来完成该任务。 +这两种模型的有相同的编码部分,首先采用卷积将图片转为特征图, 然后使用`im2sequence op`将特征图转为序列,通过`双向GRU`学习到序列特征。 +两种模型的解码部分和使用的损失函数区别如下: + +- CTC model: 训练过程选用的损失函数为CTC(Connectionist Temporal Classification) loss, 预测阶段采用的是贪婪策略和CTC解码策略。 +- Attention model: 训练过程选用的是带注意力机制的解码策略和交叉信息熵损失函数,预测阶段采用的是柱搜索策略。 + +训练以上两种模型的评估指标为样本级别的错误率。 ## 数据 @@ -124,15 +132,23 @@ env OMP_NUM_THREADS= python ctc_train.py --use_gpu False env CUDA_VISIABLE_DEVICES=0,1,2,3 python ctc_train.py --parallel=True ``` +默认使用的是`CTC model`, 可以通过选项`--model="attention"`切换为`attention model`。 + 执行`python ctc_train.py --help`可查看更多使用方式和参数详细说明。 -图2为使用默认参数和默认数据集训练的收敛曲线,其中横坐标轴为训练迭代次数,纵轴为样本级错误率。其中,蓝线为训练集上的样本错误率,红线为测试集上的样本错误率。在60轮迭代训练中,测试集上最低错误率为第32轮的22.0%. +图2为使用默认参数在默认数据集上训练`CTC model`的收敛曲线,其中横坐标轴为训练迭代次数,纵轴为样本级错误率。其中,蓝线为训练集上的样本错误率,红线为测试集上的样本错误率。测试集上最低错误率为22.0%.

-
+
图 2

+图3为使用默认参数在默认数据集上训练`attention model`的收敛曲线,其中横坐标轴为训练迭代次数,纵轴为样本级错误率。其中,蓝线为训练集上的样本错误率,红线为测试集上的样本错误率。测试集上最低错误率为16.25%. + +

+
+图 3 +

## 测试 diff --git a/fluid/ocr_recognition/_ce.py b/fluid/ocr_recognition/_ce.py index 365639f6dac48862fbd3d5f6da32a16c4135a20b..b1b1e365077b18f15e4443b1b374a69f570da64a 100644 --- a/fluid/ocr_recognition/_ce.py +++ b/fluid/ocr_recognition/_ce.py @@ -7,7 +7,7 @@ from kpi import CostKpi, DurationKpi, AccKpi # NOTE kpi.py should shared in models in some way!!!! -train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True) +train_cost_kpi = CostKpi('train_cost', 0.05, 0, actived=True) test_acc_kpi = AccKpi('test_acc', 0.005, 0, actived=True) train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True) train_acc_kpi = AccKpi('train_acc', 0.005, 0, actived=True) diff --git a/fluid/ocr_recognition/attention_model.py b/fluid/ocr_recognition/attention_model.py new file mode 100755 index 0000000000000000000000000000000000000000..363c03070e98c721a63891ca8c7f35ce7046ac6d --- /dev/null +++ b/fluid/ocr_recognition/attention_model.py @@ -0,0 +1,371 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle.fluid as fluid + +decoder_size = 128 +word_vector_dim = 128 +max_length = 100 +sos = 0 +eos = 1 +gradient_clip = 10 +LR = 1.0 +beam_size = 2 +learning_rate_decay = None + + +def conv_bn_pool(input, + group, + out_ch, + act="relu", + is_test=False, + pool=True, + use_cudnn=True): + tmp = input + for i in xrange(group): + filter_size = 3 + conv_std = (2.0 / (filter_size**2 * tmp.shape[1]))**0.5 + conv_param = fluid.ParamAttr( + initializer=fluid.initializer.Normal(0.0, conv_std)) + tmp = fluid.layers.conv2d( + input=tmp, + num_filters=out_ch[i], + filter_size=3, + padding=1, + bias_attr=False, + param_attr=conv_param, + act=None, # LinearActivation + use_cudnn=use_cudnn) + + tmp = fluid.layers.batch_norm(input=tmp, act=act, is_test=is_test) + if pool == True: + tmp = fluid.layers.pool2d( + input=tmp, + pool_size=2, + pool_type='max', + pool_stride=2, + use_cudnn=use_cudnn, + ceil_mode=True) + + return tmp + + +def ocr_convs(input, is_test=False, use_cudnn=True): + tmp = input + tmp = conv_bn_pool(tmp, 2, [16, 16], is_test=is_test, use_cudnn=use_cudnn) + tmp = conv_bn_pool(tmp, 2, [32, 32], is_test=is_test, use_cudnn=use_cudnn) + tmp = conv_bn_pool(tmp, 2, [64, 64], is_test=is_test, use_cudnn=use_cudnn) + tmp = conv_bn_pool( + tmp, 2, [128, 128], is_test=is_test, pool=False, use_cudnn=use_cudnn) + return tmp + + +def encoder_net(images, rnn_hidden_size=200, is_test=False, use_cudnn=True): + + conv_features = ocr_convs(images, is_test=is_test, use_cudnn=use_cudnn) + + sliced_feature = fluid.layers.im2sequence( + input=conv_features, + stride=[1, 1], + filter_size=[conv_features.shape[2], 1]) + + para_attr = fluid.ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.02)) + bias_attr = fluid.ParamAttr( + initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0) + + fc_1 = fluid.layers.fc(input=sliced_feature, + size=rnn_hidden_size * 3, + param_attr=para_attr, + bias_attr=False) + fc_2 = fluid.layers.fc(input=sliced_feature, + size=rnn_hidden_size * 3, + param_attr=para_attr, + bias_attr=False) + + gru_forward = fluid.layers.dynamic_gru( + input=fc_1, + size=rnn_hidden_size, + param_attr=para_attr, + bias_attr=bias_attr, + candidate_activation='relu') + gru_backward = fluid.layers.dynamic_gru( + input=fc_2, + size=rnn_hidden_size, + is_reverse=True, + param_attr=para_attr, + bias_attr=bias_attr, + candidate_activation='relu') + + encoded_vector = fluid.layers.concat( + input=[gru_forward, gru_backward], axis=1) + encoded_proj = fluid.layers.fc(input=encoded_vector, + size=decoder_size, + bias_attr=False) + + return gru_backward, encoded_vector, encoded_proj + + +def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, + decoder_boot, decoder_size, num_classes): + def simple_attention(encoder_vec, encoder_proj, decoder_state): + decoder_state_proj = fluid.layers.fc(input=decoder_state, + size=decoder_size, + bias_attr=False) + decoder_state_expand = fluid.layers.sequence_expand( + x=decoder_state_proj, y=encoder_proj) + concated = encoder_proj + decoder_state_expand + concated = fluid.layers.tanh(x=concated) + attention_weights = fluid.layers.fc(input=concated, + size=1, + act=None, + bias_attr=False) + attention_weights = fluid.layers.sequence_softmax( + input=attention_weights) + weigths_reshape = fluid.layers.reshape(x=attention_weights, shape=[-1]) + scaled = fluid.layers.elementwise_mul( + x=encoder_vec, y=weigths_reshape, axis=0) + context = fluid.layers.sequence_pool(input=scaled, pool_type='sum') + return context + + rnn = fluid.layers.DynamicRNN() + + with rnn.block(): + current_word = rnn.step_input(target_embedding) + encoder_vec = rnn.static_input(encoder_vec) + encoder_proj = rnn.static_input(encoder_proj) + hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True) + context = simple_attention(encoder_vec, encoder_proj, hidden_mem) + fc_1 = fluid.layers.fc(input=context, + size=decoder_size * 3, + bias_attr=False) + fc_2 = fluid.layers.fc(input=current_word, + size=decoder_size * 3, + bias_attr=False) + decoder_inputs = fc_1 + fc_2 + h, _, _ = fluid.layers.gru_unit( + input=decoder_inputs, hidden=hidden_mem, size=decoder_size * 3) + rnn.update_memory(hidden_mem, h) + out = fluid.layers.fc(input=h, + size=num_classes + 2, + bias_attr=True, + act='softmax') + rnn.output(out) + return rnn() + + +def attention_train_net(args, data_shape, num_classes): + + images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') + label_in = fluid.layers.data( + name='label_in', shape=[1], dtype='int32', lod_level=1) + label_out = fluid.layers.data( + name='label_out', shape=[1], dtype='int32', lod_level=1) + + gru_backward, encoded_vector, encoded_proj = encoder_net(images) + + backward_first = fluid.layers.sequence_pool( + input=gru_backward, pool_type='first') + decoder_boot = fluid.layers.fc(input=backward_first, + size=decoder_size, + bias_attr=False, + act="relu") + + label_in = fluid.layers.cast(x=label_in, dtype='int64') + trg_embedding = fluid.layers.embedding( + input=label_in, + size=[num_classes + 2, word_vector_dim], + dtype='float32') + prediction = gru_decoder_with_attention(trg_embedding, encoded_vector, + encoded_proj, decoder_boot, + decoder_size, num_classes) + fluid.clip.set_gradient_clip(fluid.clip.GradientClipByValue(gradient_clip)) + label_out = fluid.layers.cast(x=label_out, dtype='int64') + + _, maxid = fluid.layers.topk(input=prediction, k=1) + error_evaluator = fluid.evaluator.EditDistance( + input=maxid, label=label_out, ignored_tokens=[sos, eos]) + + inference_program = fluid.default_main_program().clone(for_test=True) + + cost = fluid.layers.cross_entropy(input=prediction, label=label_out) + sum_cost = fluid.layers.reduce_sum(cost) + + if learning_rate_decay == "piecewise_decay": + learning_rate = fluid.layers.piecewise_decay([50000], [LR, LR * 0.01]) + else: + learning_rate = LR + + optimizer = fluid.optimizer.Adadelta( + learning_rate=learning_rate, epsilon=1.0e-6, rho=0.9) + optimizer.minimize(sum_cost) + + model_average = None + if args.average_window > 0: + model_average = fluid.optimizer.ModelAverage( + args.average_window, + min_average_window=args.min_average_window, + max_average_window=args.max_average_window) + + return sum_cost, error_evaluator, inference_program, model_average + + +def simple_attention(encoder_vec, encoder_proj, decoder_state, decoder_size): + decoder_state_proj = fluid.layers.fc(input=decoder_state, + size=decoder_size, + bias_attr=False) + decoder_state_expand = fluid.layers.sequence_expand( + x=decoder_state_proj, y=encoder_proj) + concated = fluid.layers.elementwise_add(encoder_proj, decoder_state_expand) + concated = fluid.layers.tanh(x=concated) + attention_weights = fluid.layers.fc(input=concated, + size=1, + act=None, + bias_attr=False) + attention_weights = fluid.layers.sequence_softmax(input=attention_weights) + weigths_reshape = fluid.layers.reshape(x=attention_weights, shape=[-1]) + scaled = fluid.layers.elementwise_mul( + x=encoder_vec, y=weigths_reshape, axis=0) + context = fluid.layers.sequence_pool(input=scaled, pool_type='sum') + return context + + +def attention_infer(images, num_classes, use_cudnn=True): + + max_length = 20 + gru_backward, encoded_vector, encoded_proj = encoder_net( + images, is_test=True, use_cudnn=use_cudnn) + + backward_first = fluid.layers.sequence_pool( + input=gru_backward, pool_type='first') + decoder_boot = fluid.layers.fc(input=backward_first, + size=decoder_size, + bias_attr=False, + act="relu") + init_state = decoder_boot + array_len = fluid.layers.fill_constant( + shape=[1], dtype='int64', value=max_length) + counter = fluid.layers.zeros(shape=[1], dtype='int64', force_cpu=True) + + # fill the first element with init_state + state_array = fluid.layers.create_array('float32') + fluid.layers.array_write(init_state, array=state_array, i=counter) + + # ids, scores as memory + ids_array = fluid.layers.create_array('int64') + scores_array = fluid.layers.create_array('float32') + + init_ids = fluid.layers.data( + name="init_ids", shape=[1], dtype="int64", lod_level=2) + init_scores = fluid.layers.data( + name="init_scores", shape=[1], dtype="float32", lod_level=2) + + fluid.layers.array_write(init_ids, array=ids_array, i=counter) + fluid.layers.array_write(init_scores, array=scores_array, i=counter) + + cond = fluid.layers.less_than(x=counter, y=array_len) + while_op = fluid.layers.While(cond=cond) + with while_op.block(): + pre_ids = fluid.layers.array_read(array=ids_array, i=counter) + pre_state = fluid.layers.array_read(array=state_array, i=counter) + pre_score = fluid.layers.array_read(array=scores_array, i=counter) + + pre_ids_emb = fluid.layers.embedding( + input=pre_ids, + size=[num_classes + 2, word_vector_dim], + dtype='float32') + + context = simple_attention(encoded_vector, encoded_proj, pre_state, + decoder_size) + + # expand the recursive_sequence_lengths of pre_state to be the same with pre_score + pre_state_expanded = fluid.layers.sequence_expand(pre_state, pre_score) + context_expanded = fluid.layers.sequence_expand(context, pre_score) + fc_1 = fluid.layers.fc(input=context_expanded, + size=decoder_size * 3, + bias_attr=False) + fc_2 = fluid.layers.fc(input=pre_ids_emb, + size=decoder_size * 3, + bias_attr=False) + + decoder_inputs = fc_1 + fc_2 + current_state, _, _ = fluid.layers.gru_unit( + input=decoder_inputs, + hidden=pre_state_expanded, + size=decoder_size * 3) + + current_state_with_lod = fluid.layers.lod_reset( + x=current_state, y=pre_score) + # use score to do beam search + current_score = fluid.layers.fc(input=current_state_with_lod, + size=num_classes + 2, + bias_attr=True, + act='softmax') + topk_scores, topk_indices = fluid.layers.topk( + current_score, k=beam_size) + + # calculate accumulated scores after topk to reduce computation cost + accu_scores = fluid.layers.elementwise_add( + x=fluid.layers.log(topk_scores), + y=fluid.layers.reshape( + pre_score, shape=[-1]), + axis=0) + selected_ids, selected_scores = fluid.layers.beam_search( + pre_ids, + pre_score, + topk_indices, + accu_scores, + beam_size, + 1, # end_id + #level=0 + ) + + fluid.layers.increment(x=counter, value=1, in_place=True) + + # update the memories + fluid.layers.array_write(current_state, array=state_array, i=counter) + fluid.layers.array_write(selected_ids, array=ids_array, i=counter) + fluid.layers.array_write(selected_scores, array=scores_array, i=counter) + + # update the break condition: up to the max length or all candidates of + # source sentences have ended. + length_cond = fluid.layers.less_than(x=counter, y=array_len) + finish_cond = fluid.layers.logical_not( + fluid.layers.is_empty(x=selected_ids)) + fluid.layers.logical_and(x=length_cond, y=finish_cond, out=cond) + + ids, scores = fluid.layers.beam_search_decode(ids_array, scores_array, + beam_size, eos) + return ids + + +def attention_eval(data_shape, num_classes): + images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') + label_in = fluid.layers.data( + name='label_in', shape=[1], dtype='int32', lod_level=1) + label_out = fluid.layers.data( + name='label_out', shape=[1], dtype='int32', lod_level=1) + label_out = fluid.layers.cast(x=label_out, dtype='int64') + label_in = fluid.layers.cast(x=label_in, dtype='int64') + + gru_backward, encoded_vector, encoded_proj = encoder_net( + images, is_test=True) + + backward_first = fluid.layers.sequence_pool( + input=gru_backward, pool_type='first') + decoder_boot = fluid.layers.fc(input=backward_first, + size=decoder_size, + bias_attr=False, + act="relu") + trg_embedding = fluid.layers.embedding( + input=label_in, + size=[num_classes + 2, word_vector_dim], + dtype='float32') + prediction = gru_decoder_with_attention(trg_embedding, encoded_vector, + encoded_proj, decoder_boot, + decoder_size, num_classes) + _, maxid = fluid.layers.topk(input=prediction, k=1) + error_evaluator = fluid.evaluator.EditDistance( + input=maxid, label=label_out, ignored_tokens=[sos, eos]) + cost = fluid.layers.cross_entropy(input=prediction, label=label_out) + sum_cost = fluid.layers.reduce_sum(cost) + return error_evaluator, sum_cost diff --git a/fluid/ocr_recognition/crnn_ctc_model.py b/fluid/ocr_recognition/crnn_ctc_model.py old mode 100644 new mode 100755 index a5d4c70f868a6c973ff3e8b372a2eb387d1f191f..aa46d4ff086cee1697bafba76becf787ce619de8 --- a/fluid/ocr_recognition/crnn_ctc_model.py +++ b/fluid/ocr_recognition/crnn_ctc_model.py @@ -1,7 +1,11 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import paddle.fluid as fluid from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter from paddle.fluid.initializer import init_on_cpu import math +import six def conv_bn_pool(input, @@ -15,7 +19,7 @@ def conv_bn_pool(input, pooling=True, use_cudnn=False): tmp = input - for i in xrange(group): + for i in six.moves.xrange(group): tmp = fluid.layers.conv2d( input=tmp, num_filters=out_ch[i], @@ -166,13 +170,16 @@ def encoder_net(images, return fc_out -def ctc_train_net(images, label, args, num_classes): +def ctc_train_net(args, data_shape, num_classes): L2_RATE = 0.0004 LR = 1.0e-3 MOMENTUM = 0.9 learning_rate_decay = None regularizer = fluid.regularizer.L2Decay(L2_RATE) + images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') + label = fluid.layers.data( + name='label', shape=[1], dtype='int32', lod_level=1) fc_out = encoder_net( images, num_classes, @@ -189,7 +196,7 @@ def ctc_train_net(images, label, args, num_classes): inference_program = fluid.default_main_program().clone(for_test=True) if learning_rate_decay == "piecewise_decay": learning_rate = fluid.layers.piecewise_decay([ - args.total_step / 4, args.total_step / 2, args.total_step * 3 / 4 + args.total_step // 4, args.total_step // 2, args.total_step * 3 // 4 ], [LR, LR * 0.1, LR * 0.01, LR * 0.001]) else: learning_rate = LR @@ -211,7 +218,10 @@ def ctc_infer(images, num_classes, use_cudnn): return fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes) -def ctc_eval(images, label, num_classes, use_cudnn): +def ctc_eval(data_shape, num_classes, use_cudnn): + images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') + label = fluid.layers.data( + name='label', shape=[1], dtype='int32', lod_level=1) fc_out = encoder_net(images, num_classes, is_test=True, use_cudnn=use_cudnn) decoded_out = fluid.layers.ctc_greedy_decoder( input=fc_out, blank=num_classes) diff --git a/fluid/ocr_recognition/ctc_reader.py b/fluid/ocr_recognition/data_reader.py similarity index 87% rename from fluid/ocr_recognition/ctc_reader.py rename to fluid/ocr_recognition/data_reader.py index a272e91a43684e62fdeab31d244989751b20fcc5..1a1c5c87162efd998e73bc19ec2cb511fdabcb79 100644 --- a/fluid/ocr_recognition/ctc_reader.py +++ b/fluid/ocr_recognition/data_reader.py @@ -1,12 +1,17 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import os import cv2 import tarfile import numpy as np from PIL import Image from os import path -from paddle.v2.image import load_image -import paddle.v2 as paddle +from paddle.dataset.image import load_image +import paddle +SOS = 0 +EOS = 1 NUM_CLASSES = 95 DATA_SHAPE = [1, 48, 512] @@ -22,8 +27,8 @@ TEST_LIST_FILE_NAME = "test.list" class DataGenerator(object): - def __init__(self): - pass + def __init__(self, model="crnn_ctc"): + self.model = model def train_reader(self, img_root_dir, @@ -65,11 +70,11 @@ class DataGenerator(object): batchsize ) + "; i++) print $(4*i+1)\" \"$(4*i+2)\" \"$(4*i+3)\" \"$(4*i+4);}}' > " + to_file os.system(cmd) - print "finish batch shuffle" + print("finish batch shuffle") img_label_lines = open(to_file, 'r').readlines() def reader(): - sizes = len(img_label_lines) / batchsize + sizes = len(img_label_lines) // batchsize if sizes == 0: raise ValueError('Batch size is bigger than the dataset size.') while True: @@ -89,7 +94,10 @@ class DataGenerator(object): img = img.resize((sz[0], sz[1])) img = np.array(img) - 127.5 img = img[np.newaxis, ...] - result.append([img, label]) + if self.model == "crnn_ctc": + result.append([img, label]) + else: + result.append([img, [SOS] + label, label + [EOS]]) yield result if not cycle: break @@ -117,7 +125,10 @@ class DataGenerator(object): 'L') img = np.array(img) - 127.5 img = img[np.newaxis, ...] - yield img, label + if self.model == "crnn_ctc": + yield img, label + else: + yield img, [SOS] + label, label + [EOS] return reader @@ -185,8 +196,12 @@ def data_shape(): return DATA_SHAPE -def train(batch_size, train_images_dir=None, train_list_file=None, cycle=False): - generator = DataGenerator() +def train(batch_size, + train_images_dir=None, + train_list_file=None, + cycle=False, + model="crnn_ctc"): + generator = DataGenerator(model) if train_images_dir is None: data_dir = download_data() train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME) @@ -199,8 +214,11 @@ def train(batch_size, train_images_dir=None, train_list_file=None, cycle=False): train_images_dir, train_list_file, batch_size, cycle, shuffle=shuffle) -def test(batch_size=1, test_images_dir=None, test_list_file=None): - generator = DataGenerator() +def test(batch_size=1, + test_images_dir=None, + test_list_file=None, + model="crnn_ctc"): + generator = DataGenerator(model) if test_images_dir is None: data_dir = download_data() test_images_dir = path.join(data_dir, TEST_DATA_DIR_NAME) @@ -213,8 +231,9 @@ def test(batch_size=1, test_images_dir=None, test_list_file=None): def inference(batch_size=1, infer_images_dir=None, infer_list_file=None, - cycle=False): - generator = DataGenerator() + cycle=False, + model="crnn_ctc"): + generator = DataGenerator(model) return paddle.batch( generator.infer_reader(infer_images_dir, infer_list_file, cycle), batch_size) diff --git a/fluid/ocr_recognition/eval.py b/fluid/ocr_recognition/eval.py index 6924131686a1387a55cdf85136da39a249a369a7..1d553999eb545e3a1134658e78592fb74a4a8c3c 100644 --- a/fluid/ocr_recognition/eval.py +++ b/fluid/ocr_recognition/eval.py @@ -1,9 +1,9 @@ import paddle.v2 as paddle import paddle.fluid as fluid -from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data -from crnn_ctc_model import ctc_infer +from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_data +from attention_model import attention_eval from crnn_ctc_model import ctc_eval -import ctc_reader +import data_reader import argparse import functools import os @@ -11,27 +11,34 @@ import os parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable -add_arg('model_path', str, None, "The model path to be used for inference.") +add_arg('model', str, "crnn_ctc", "Which type of network to be used. 'crnn_ctc' or 'attention'") +add_arg('model_path', str, "", "The model path to be used for inference.") add_arg('input_images_dir', str, None, "The directory of images.") add_arg('input_images_list', str, None, "The list file of images.") add_arg('use_gpu', bool, True, "Whether use GPU to eval.") # yapf: enable -def evaluate(args, eval=ctc_eval, data_reader=ctc_reader): +def evaluate(args): """OCR inference""" + + if args.model == "crnn_ctc": + eval = ctc_eval + get_feeder_data = get_ctc_feeder_data + else: + eval = attention_eval + get_feeder_data = get_attention_feeder_data + num_classes = data_reader.num_classes() data_shape = data_reader.data_shape() # define network - images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int32', lod_level=1) - evaluator, cost = eval(images, label, num_classes) + evaluator, cost = eval(data_shape, num_classes) # data reader test_reader = data_reader.test( test_images_dir=args.input_images_dir, - test_list_file=args.input_images_list) + test_list_file=args.input_images_list, + model=args.model) # prepare environment place = fluid.CPUPlace() @@ -48,7 +55,7 @@ def evaluate(args, eval=ctc_eval, data_reader=ctc_reader): model_dir = os.path.dirname(args.model_path) model_file_name = os.path.basename(args.model_path) fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) - print "Init model from: %s." % args.model_path + print("Init model from: %s." % args.model_path) evaluator.reset(exe) count = 0 @@ -56,14 +63,14 @@ def evaluate(args, eval=ctc_eval, data_reader=ctc_reader): count += 1 exe.run(fluid.default_main_program(), feed=get_feeder_data(data, place)) avg_distance, avg_seq_error = evaluator.eval(exe) - print "Read %d samples; avg_distance: %s; avg_seq_error: %s" % ( - count, avg_distance, avg_seq_error) + print("Read %d samples; avg_distance: %s; avg_seq_error: %s" % ( + count, avg_distance, avg_seq_error)) def main(): args = parser.parse_args() print_arguments(args) - evaluate(args, data_reader=ctc_reader) + evaluate(args) if __name__ == "__main__": diff --git a/fluid/ocr_recognition/images/train_attention.jpg b/fluid/ocr_recognition/images/train_attention.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f9c1ce30bb8f5e9e704255e31c896f355727e2fd Binary files /dev/null and b/fluid/ocr_recognition/images/train_attention.jpg differ diff --git a/fluid/ocr_recognition/infer.py b/fluid/ocr_recognition/infer.py old mode 100644 new mode 100755 index 154242c9e3ca8fea26f34b5cda0c2bac5a3d0ef1..5ba77ced81c2802d372c348520d002f4e5b3452e --- a/fluid/ocr_recognition/infer.py +++ b/fluid/ocr_recognition/infer.py @@ -1,10 +1,12 @@ +from __future__ import print_function import paddle.v2 as paddle import paddle.fluid as fluid +from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_for_infer import paddle.fluid.profiler as profiler -from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data from crnn_ctc_model import ctc_infer +from attention_model import attention_infer import numpy as np -import ctc_reader +import data_reader import argparse import functools import os @@ -13,6 +15,7 @@ import time parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable +add_arg('model', str, "crnn_ctc", "Which type of network to be used. 'crnn_ctc' or 'attention'") add_arg('model_path', str, None, "The model path to be used for inference.") add_arg('input_images_dir', str, None, "The directory of images.") add_arg('input_images_list', str, None, "The list file of images.") @@ -25,20 +28,28 @@ add_arg('batch_size', int, 1, "The minibatch size.") # yapf: enable -def inference(args, infer=ctc_infer, data_reader=ctc_reader): +def inference(args): """OCR inference""" + if args.model == "crnn_ctc": + infer = ctc_infer + get_feeder_data = get_ctc_feeder_data + else: + infer = attention_infer + get_feeder_data = get_attention_feeder_for_infer + eos = 1 + sos = 0 num_classes = data_reader.num_classes() data_shape = data_reader.data_shape() # define network images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') - sequence = infer( - images, num_classes, use_cudnn=True if args.use_gpu else False) + ids = infer(images, num_classes, use_cudnn=True if args.use_gpu else False) # data reader infer_reader = data_reader.inference( batch_size=args.batch_size, infer_images_dir=args.input_images_dir, infer_list_file=args.input_images_list, - cycle=True if args.iterations > 0 else False) + cycle=True if args.iterations > 0 else False, + model=args.model) # prepare environment place = fluid.CPUPlace() if args.use_gpu: @@ -54,7 +65,7 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader): with open(args.dict) as dict_file: for i, word in enumerate(dict_file): dict_map[i] = word.strip() - print "Loaded dict from %s" % args.dict + print("Loaded dict from %s" % args.dict) # load init model model_dir = args.model_path @@ -63,11 +74,12 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader): model_dir = os.path.dirname(args.model_path) model_file_name = os.path.basename(args.model_path) fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) - print "Init model from: %s." % args.model_path + print("Init model from: %s." % args.model_path) batch_times = [] iters = 0 for data in infer_reader(): + feed_dict = get_feeder_data(data, place) if args.iterations > 0 and iters == args.iterations + args.skip_batch_num: break if iters < args.skip_batch_num: @@ -77,26 +89,25 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader): start = time.time() result = exe.run(fluid.default_main_program(), - feed=get_feeder_data( - data, place, need_label=False), - fetch_list=[sequence], + feed=feed_dict, + fetch_list=[ids], return_numpy=False) + indexes = prune(np.array(result[0]).flatten(), 0, 1) batch_time = time.time() - start fps = args.batch_size / batch_time batch_times.append(batch_time) - indexes = np.array(result[0]).flatten() if dict_map is not None: - print "Iteration %d, latency: %.5f s, fps: %f, result: %s" % ( + print("Iteration %d, latency: %.5f s, fps: %f, result: %s" % ( iters, batch_time, fps, - [dict_map[index] for index in indexes], ) + [dict_map[index] for index in indexes], )) else: - print "Iteration %d, latency: %.5f s, fps: %f, result: %s" % ( + print("Iteration %d, latency: %.5f s, fps: %f, result: %s" % ( iters, batch_time, fps, - indexes, ) + indexes, )) iters += 1 @@ -114,18 +125,29 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader): print('average fps: %.5f, fps for 99pc latency: %.5f' % (fps_avg, fps_pc99)) +def prune(words, sos, eos): + """Remove unused tokens in prediction result.""" + start_index = 0 + end_index = len(words) + if sos in words: + start_index = np.where(words == sos)[0][0] + 1 + if eos in words: + end_index = np.where(words == eos)[0][0] + return words[start_index:end_index] + + def main(): args = parser.parse_args() print_arguments(args) if args.profile: if args.use_gpu: with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: - inference(args, data_reader=ctc_reader) + inference(args) else: with profiler.profiler("CPU", sorted_key='total') as cpuprof: - inference(args, data_reader=ctc_reader) + inference(args) else: - inference(args, data_reader=ctc_reader) + inference(args) if __name__ == "__main__": diff --git a/fluid/ocr_recognition/ctc_train.py b/fluid/ocr_recognition/train.py old mode 100644 new mode 100755 similarity index 82% rename from fluid/ocr_recognition/ctc_train.py rename to fluid/ocr_recognition/train.py index a1cb52993500322c11d80ede732156d376fbed88..7954d23dc02c93159315e4220ec2db0289fddb44 --- a/fluid/ocr_recognition/ctc_train.py +++ b/fluid/ocr_recognition/train.py @@ -1,9 +1,13 @@ -"""Trainer for OCR CTC model.""" +"""Trainer for OCR CTC or attention model.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import paddle.fluid as fluid +from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_data import paddle.fluid.profiler as profiler -from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data from crnn_ctc_model import ctc_train_net -import ctc_reader +from attention_model import attention_train_net +import data_reader import argparse import functools import sys @@ -20,6 +24,7 @@ add_arg('log_period', int, 1000, "Log period.") add_arg('save_model_period', int, 15000, "Save model period. '-1' means never saving the model.") add_arg('eval_period', int, 15000, "Evaluate period. '-1' means never evaluating the model.") add_arg('save_model_dir', str, "./models", "The directory the model to be saved to.") +add_arg('model', str, "crnn_ctc", "Which type of network to be used. 'crnn_ctc' or 'attention'") add_arg('init_model', str, None, "The init model file of directory.") add_arg('use_gpu', bool, True, "Whether use GPU to train.") add_arg('min_average_window',int, 10000, "Min average window.") @@ -32,8 +37,16 @@ add_arg('skip_test', bool, False, "Whether to skip test phase.") # yapf: enable -def train(args, data_reader=ctc_reader): - """OCR CTC training""" +def train(args): + """OCR training""" + + if args.model == "crnn_ctc": + train_net = ctc_train_net + get_feeder_data = get_ctc_feeder_data + else: + train_net = attention_train_net + get_feeder_data = get_attention_feeder_data + num_classes = None train_images = None train_list = None @@ -43,20 +56,18 @@ def train(args, data_reader=ctc_reader): ) if num_classes is None else num_classes data_shape = data_reader.data_shape() # define network - images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int32', lod_level=1) - sum_cost, error_evaluator, inference_program, model_average = ctc_train_net( - images, label, args, num_classes) + sum_cost, error_evaluator, inference_program, model_average = train_net( + args, data_shape, num_classes) # data reader train_reader = data_reader.train( args.batch_size, train_images_dir=train_images, train_list_file=train_list, - cycle=args.total_step > 0) + cycle=args.total_step > 0, + model=args.model) test_reader = data_reader.test( - test_images_dir=test_images, test_list_file=test_list) + test_images_dir=test_images, test_list_file=test_list, model=args.model) # prepare environment place = fluid.CPUPlace() @@ -77,7 +88,7 @@ def train(args, data_reader=ctc_reader): model_dir = os.path.dirname(args.init_model) model_file_name = os.path.basename(args.init_model) fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) - print "Init model from: %s." % args.init_model + print("Init model from: %s." % args.init_model) train_exe = exe error_evaluator.reset(exe) @@ -104,18 +115,18 @@ def train(args, data_reader=ctc_reader): for data in test_reader(): exe.run(inference_program, feed=get_feeder_data(data, place)) _, test_seq_error = error_evaluator.eval(exe) - print "\nTime: %s; Iter[%d]; Test seq error: %s.\n" % ( - time.time(), iter_num, str(test_seq_error[0])) + print("\nTime: %s; Iter[%d]; Test seq error: %s.\n" % ( + time.time(), iter_num, str(test_seq_error[0]))) #Note: The following logs are special for CE monitoring. #Other situations do not need to care about these logs. - print "kpis test_acc %f" % (1 - test_seq_error[0]) + print("kpis test_acc %f" % (1 - test_seq_error[0])) def save_model(args, exe, iter_num): filename = "model_%05d" % iter_num fluid.io.save_params( exe, dirname=args.save_model_dir, filename=filename) - print "Saved model to: %s/%s." % (args.save_model_dir, filename) + print("Saved model to: %s/%s." % (args.save_model_dir, filename)) iter_num = 0 stop = False @@ -144,18 +155,18 @@ def train(args, data_reader=ctc_reader): iter_num += 1 # training log if iter_num % args.log_period == 0: - print "\nTime: %s; Iter[%d]; Avg Warp-CTC loss: %.3f; Avg seq err: %.3f" % ( + print("\nTime: %s; Iter[%d]; Avg loss: %.3f; Avg seq err: %.3f" % ( time.time(), iter_num, total_loss / (args.log_period * args.batch_size), - total_seq_error / (args.log_period * args.batch_size)) - print "kpis train_cost %f" % (total_loss / (args.log_period * - args.batch_size)) - print "kpis train_acc %f" % ( - 1 - total_seq_error / (args.log_period * args.batch_size)) + total_seq_error / (args.log_period * args.batch_size))) + print("kpis train_cost %f" % (total_loss / (args.log_period * + args.batch_size))) + print("kpis train_acc %f" % ( + 1 - total_seq_error / (args.log_period * args.batch_size))) total_loss = 0.0 total_seq_error = 0.0 -# evaluate + # evaluate if not args.skip_test and iter_num % args.eval_period == 0: if model_average: with model_average.apply(exe): @@ -171,7 +182,7 @@ def train(args, data_reader=ctc_reader): else: save_model(args, exe, iter_num) end_time = time.time() - print "kpis train_duration %f" % (end_time - start_time) + print("kpis train_duration %f" % (end_time - start_time)) # Postprocess benchmark data latencies = batch_times[args.skip_batch_num:] latency_avg = np.average(latencies) @@ -195,12 +206,12 @@ def main(): if args.profile: if args.use_gpu: with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: - train(args, data_reader=ctc_reader) + train(args) else: with profiler.profiler("CPU", sorted_key='total') as cpuprof: - train(args, data_reader=ctc_reader) + train(args) else: - train(args, data_reader=ctc_reader) + train(args) if __name__ == "__main__": diff --git a/fluid/ocr_recognition/utility.py b/fluid/ocr_recognition/utility.py old mode 100644 new mode 100755 index 67a5bfa018bad5a4d69ba9d0d3cb63ff59214775..fb8d066c7a389b9fc3356025edbae4e6c0aa5720 --- a/fluid/ocr_recognition/utility.py +++ b/fluid/ocr_recognition/utility.py @@ -19,6 +19,8 @@ from __future__ import print_function import distutils.util import numpy as np from paddle.fluid import core +import paddle.fluid as fluid +import six def print_arguments(args): @@ -37,7 +39,7 @@ def print_arguments(args): :type args: argparse.Namespace """ print("----------- Configuration Arguments -----------") - for arg, value in sorted(vars(args).iteritems()): + for arg, value in sorted(six.iteritems(vars(args))): print("%s: %s" % (arg, value)) print("------------------------------------------------") @@ -77,14 +79,58 @@ def to_lodtensor(data, place): return res -def get_feeder_data(data, place, need_label=True): +def get_ctc_feeder_data(data, place, need_label=True): pixel_tensor = core.LoDTensor() pixel_data = None pixel_data = np.concatenate( - map(lambda x: x[0][np.newaxis, :], data), axis=0).astype("float32") + list(map(lambda x: x[0][np.newaxis, :], data)), axis=0).astype("float32") pixel_tensor.set(pixel_data, place) - label_tensor = to_lodtensor(map(lambda x: x[1], data), place) + label_tensor = to_lodtensor(list(map(lambda x: x[1], data)), place) if need_label: return {"pixel": pixel_tensor, "label": label_tensor} else: return {"pixel": pixel_tensor} + + +def get_attention_feeder_data(data, place, need_label=True): + pixel_tensor = core.LoDTensor() + pixel_data = None + pixel_data = np.concatenate( + list(map(lambda x: x[0][np.newaxis, :], data)), axis=0).astype("float32") + pixel_tensor.set(pixel_data, place) + label_in_tensor = to_lodtensor(list(map(lambda x: x[1], data)), place) + label_out_tensor = to_lodtensor(list(map(lambda x: x[2], data)), place) + if need_label: + return { + "pixel": pixel_tensor, + "label_in": label_in_tensor, + "label_out": label_out_tensor + } + else: + return {"pixel": pixel_tensor} + + +def get_attention_feeder_for_infer(data, place): + batch_size = len(data) + init_ids_data = np.array([0 for _ in range(batch_size)], dtype='int64') + init_scores_data = np.array( + [1. for _ in range(batch_size)], dtype='float32') + init_ids_data = init_ids_data.reshape((batch_size, 1)) + init_scores_data = init_scores_data.reshape((batch_size, 1)) + init_recursive_seq_lens = [1] * batch_size + init_recursive_seq_lens = [init_recursive_seq_lens, init_recursive_seq_lens] + init_ids = fluid.create_lod_tensor(init_ids_data, init_recursive_seq_lens, + place) + init_scores = fluid.create_lod_tensor(init_scores_data, + init_recursive_seq_lens, place) + + pixel_tensor = core.LoDTensor() + pixel_data = None + pixel_data = np.concatenate( + list(map(lambda x: x[0][np.newaxis, :], data)), axis=0).astype("float32") + pixel_tensor.set(pixel_data, place) + return { + "pixel": pixel_tensor, + "init_ids": init_ids, + "init_scores": init_scores + } diff --git a/fluid/sequence_tagging_for_ner/_ce.py b/fluid/sequence_tagging_for_ner/_ce.py index 111a4d566b1cb69543bd7747cd76568f7de4b94c..8afd863032b374f76a644dea15af7999736a0f59 100644 --- a/fluid/sequence_tagging_for_ner/_ce.py +++ b/fluid/sequence_tagging_for_ner/_ce.py @@ -7,8 +7,8 @@ from kpi import CostKpi, DurationKpi, AccKpi #### NOTE kpi.py should shared in models in some way!!!! -train_acc_kpi = AccKpi('train_precision', 0.005, actived=True) -test_acc_kpi = CostKpi('test_precision', 0.005, actived=True) +train_acc_kpi = AccKpi('train_precision', 0.005, actived=False) +test_acc_kpi = CostKpi('test_precision', 0.005, actived=False) train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True) tracking_kpis = [ diff --git a/fluid/text_classification/_ce.py b/fluid/text_classification/_ce.py index 100357204db7f3a8d0c1d3cbcbdc707410b20023..6c0b1ac428d21145ab9f89ef134614b43c3db3e9 100644 --- a/fluid/text_classification/_ce.py +++ b/fluid/text_classification/_ce.py @@ -8,7 +8,7 @@ from kpi import CostKpi, DurationKpi, AccKpi #### NOTE kpi.py should shared in models in some way!!!! train_acc_kpi = AccKpi('train_acc', 0.005, actived=True) -train_cost_kpi = CostKpi('train_cost', 0.005, actived=True) +train_cost_kpi = CostKpi('train_cost', 0.005, actived=False) train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True) tracking_kpis = [ diff --git a/fluid/text_classification/clouds/scdb_parallel_executor.py b/fluid/text_classification/clouds/scdb_parallel_executor.py index 9d7722e9776d11c591f1ff0bd97b3e295205d300..cc5cd4ee9f9c86a0ed3f7c27e482026d6dbf7a13 100644 --- a/fluid/text_classification/clouds/scdb_parallel_executor.py +++ b/fluid/text_classification/clouds/scdb_parallel_executor.py @@ -3,6 +3,7 @@ import contextlib import paddle import paddle.fluid as fluid import numpy as np +import six import sys import time import os @@ -46,8 +47,8 @@ def data2tensor(data, place): """ data2tensor """ - input_seq = to_lodtensor(map(lambda x: x[0], data), place) - y_data = np.array(map(lambda x: x[1], data)).astype("int64") + input_seq = to_lodtensor([x[0] for x in data], place) + y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([-1, 1]) return {"words": input_seq, "label": y_data} @@ -56,8 +57,8 @@ def data2pred(data, place): """ data2tensor """ - input_seq = to_lodtensor(map(lambda x: x[0], data), place) - y_data = np.array(map(lambda x: x[1], data)).astype("int64") + input_seq = to_lodtensor([x[0] for x in data], place) + y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([-1, 1]) return {"words": input_seq} @@ -79,7 +80,7 @@ def save_dict(word_dict, vocab): Save dict into file """ with open(vocab, "w") as fout: - for k, v in word_dict.iteritems(): + for k, v in six.iteritems(word_dict): outstr = ("%s\t%s\n" % (k, v)).encode("gb18030") fout.write(outstr) @@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg", def scdb_test_data(test_file, w_dict): """ - test_set=["car", "lbs", "spot", "weibo", + test_set=["car", "lbs", "spot", "weibo", "baby", "toutiao", "3c", "movie", "haogan"] """ return data_reader(test_file, w_dict) @@ -424,7 +425,7 @@ def start_train(train_reader, start_exe.run(fluid.default_startup_program()) exe = fluid.ParallelExecutor(use_cuda, loss_name=cost.name) - for pass_id in xrange(pass_num): + for pass_id in six.moves.xrange(pass_num): total_acc, total_cost, total_count, avg_cost, avg_acc = 0.0, 0.0, 0.0, 0.0, 0.0 for data in train_reader(): cost_val, acc_val = exe.run(feed=feeder.feed(data), @@ -452,7 +453,7 @@ def train_net(vocab="./thirdparty/train.vocab", """ w_dict = scdb_word_dict(vocab=vocab) test_files = [ "./thirdparty" + os.sep + f for f in test_list] - + train_reader = paddle.batch( scdb_train_data(train_dir, w_dict), batch_size = 256) diff --git a/fluid/text_classification/clouds/scdb_single_card.py b/fluid/text_classification/clouds/scdb_single_card.py index 9cc39269913ab97341e5389b31ad9a5da2e8af51..3690e92776636d8a7c8cef0d9cd4d72414b7a628 100644 --- a/fluid/text_classification/clouds/scdb_single_card.py +++ b/fluid/text_classification/clouds/scdb_single_card.py @@ -3,6 +3,7 @@ import contextlib import paddle import paddle.fluid as fluid import numpy as np +import six import sys import time import os @@ -46,8 +47,8 @@ def data2tensor(data, place): """ data2tensor """ - input_seq = to_lodtensor(map(lambda x: x[0], data), place) - y_data = np.array(map(lambda x: x[1], data)).astype("int64") + input_seq = to_lodtensor([x[0] for x in data], place) + y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([-1, 1]) return {"words": input_seq, "label": y_data} @@ -56,8 +57,8 @@ def data2pred(data, place): """ data2tensor """ - input_seq = to_lodtensor(map(lambda x: x[0], data), place) - y_data = np.array(map(lambda x: x[1], data)).astype("int64") + input_seq = to_lodtensor([x[0] for x in data], place) + y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([-1, 1]) return {"words": input_seq} @@ -79,7 +80,7 @@ def save_dict(word_dict, vocab): Save dict into file """ with open(vocab, "w") as fout: - for k, v in word_dict.iteritems(): + for k, v in six.iteritems(word_dict): outstr = ("%s\t%s\n" % (k, v)).encode("gb18030") fout.write(outstr) @@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg", def scdb_test_data(test_file, w_dict): """ - test_set=["car", "lbs", "spot", "weibo", + test_set=["car", "lbs", "spot", "weibo", "baby", "toutiao", "3c", "movie", "haogan"] """ return data_reader(test_file, w_dict) @@ -422,7 +423,7 @@ def start_train(train_reader, feeder = fluid.DataFeeder(feed_list=[data, label], place=place) exe.run(fluid.default_startup_program()) - for pass_id in xrange(pass_num): + for pass_id in six.moves.xrange(pass_num): data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 for data in train_reader(): avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(), diff --git a/fluid/text_classification/train.py b/fluid/text_classification/train.py index 0ef923544f18d62f5a049ac79353ce4884b8d195..b2ffe4c6723120103c9b3e310b070f4c773aeeb4 100644 --- a/fluid/text_classification/train.py +++ b/fluid/text_classification/train.py @@ -1,4 +1,5 @@ import os +import six import sys import time import unittest @@ -58,7 +59,7 @@ def train(train_reader, if "CE_MODE_X" in os.environ: fluid.default_startup_program().random_seed = 110 exe.run(fluid.default_startup_program()) - for pass_id in xrange(pass_num): + for pass_id in six.moves.xrange(pass_num): pass_start = time.time() data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 for data in train_reader(): diff --git a/fluid/text_classification/utils.py b/fluid/text_classification/utils.py index 3673946b6f39eade1811dfc4d81c99b0ef9400bb..dce4743d9219aa9ed5ca78b9f690eb1366d92304 100644 --- a/fluid/text_classification/utils.py +++ b/fluid/text_classification/utils.py @@ -43,8 +43,8 @@ def data2tensor(data, place): """ data2tensor """ - input_seq = to_lodtensor(map(lambda x: x[0], data), place) - y_data = np.array(map(lambda x: x[1], data)).astype("int64") + input_seq = to_lodtensor([x[0] for x in data], place) + y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([-1, 1]) return {"words": input_seq, "label": y_data}