diff --git a/fluid/face_detection/README_cn.md b/fluid/face_detection/README_cn.md
index a160fbd9ce25c4fac4333b9914058a16fda7e58a..c0a32ceb350fee6dd9eda94a9ceb0fa6c8ceee27 100644
--- a/fluid/face_detection/README_cn.md
+++ b/fluid/face_detection/README_cn.md
@@ -165,10 +165,10 @@ python widerface_eval.py --infer=True --confs_threshold=0.15
```
下图可视化了模型的预测结果:
-
-
-
-
+
+
+
+
Pyramidbox 预测可视化
diff --git a/fluid/face_detection/images/0_Parade_marchingband_1_356.jpg b/fluid/face_detection/images/0_Parade_marchingband_1_356.jpg
index d5175c5a9d57184945ae63ab1cfc0a2621ef9b70..d383deefadae83816693858db93adae2397c75ea 100644
Binary files a/fluid/face_detection/images/0_Parade_marchingband_1_356.jpg and b/fluid/face_detection/images/0_Parade_marchingband_1_356.jpg differ
diff --git a/fluid/face_detection/images/28_Sports_Fan_Sports_Fan_28_770.jpg b/fluid/face_detection/images/28_Sports_Fan_Sports_Fan_28_770.jpg
index 661ad9abb2d21e6e6b872b2a44e14fc476e732e7..e6a481ec554a53fc433f41dacad01a101e9e930f 100644
Binary files a/fluid/face_detection/images/28_Sports_Fan_Sports_Fan_28_770.jpg and b/fluid/face_detection/images/28_Sports_Fan_Sports_Fan_28_770.jpg differ
diff --git a/fluid/face_detection/images/2_Demonstration_Demonstration_Or_Protest_2_58.jpg b/fluid/face_detection/images/2_Demonstration_Demonstration_Or_Protest_2_58.jpg
index febe7411ae2ff258a608275fc9050294014e8342..e934fa6c45777c7a66103cc2b3d55e5f4fd18096 100644
Binary files a/fluid/face_detection/images/2_Demonstration_Demonstration_Or_Protest_2_58.jpg and b/fluid/face_detection/images/2_Demonstration_Demonstration_Or_Protest_2_58.jpg differ
diff --git a/fluid/face_detection/images/4_Dancing_Dancing_4_194.jpg b/fluid/face_detection/images/4_Dancing_Dancing_4_194.jpg
index 8161593708db628c6beb4f32c8133226e19f0f13..52476b8ee84762fc87e7c4e3eb560b567e44435c 100644
Binary files a/fluid/face_detection/images/4_Dancing_Dancing_4_194.jpg and b/fluid/face_detection/images/4_Dancing_Dancing_4_194.jpg differ
diff --git a/fluid/icnet/.run_ce.sh b/fluid/icnet/.run_ce.sh
index a46081c7978395697b843c5fef95e6091b47e4e5..643c1ed4cd1bd1012935e063cd8b3e3bbfd4f6d0 100755
--- a/fluid/icnet/.run_ce.sh
+++ b/fluid/icnet/.run_ce.sh
@@ -2,6 +2,7 @@
# This file is only used for continuous evaluation.
+export ce_mode=1
rm -rf *_factor.txt
-python train.py --use_gpu=True 1> log
+python train.py --use_gpu=True --random_mirror=False --random_scaling=False 1> log
cat log | python _ce.py
diff --git a/fluid/icnet/_ce.py b/fluid/icnet/_ce.py
index 3844eefde620f9587d747594ad0d5351999859c8..8953488259d5bc194921637e0b141cb90081e4f9 100644
--- a/fluid/icnet/_ce.py
+++ b/fluid/icnet/_ce.py
@@ -7,8 +7,8 @@ from kpi import CostKpi, DurationKpi, AccKpi
# NOTE kpi.py should shared in models in some way!!!!
-train_cost_kpi = CostKpi('train_cost', 0.02, actived=True)
-train_duration_kpi = DurationKpi('train_duration', 0.06, actived=True)
+train_cost_kpi = CostKpi('train_cost', 0.05, 0, actived=True)
+train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True)
tracking_kpis = [
train_cost_kpi,
diff --git a/fluid/icnet/cityscape.py b/fluid/icnet/cityscape.py
index 0b862d9a92728fdcc2b12f673c3583ec096d4d25..c5c08afcf3a3c85b9f43c9110e8a8dedc5900d5b 100644
--- a/fluid/icnet/cityscape.py
+++ b/fluid/icnet/cityscape.py
@@ -1,5 +1,8 @@
"""Reader for Cityscape dataset.
"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
import os
import cv2
import numpy as np
@@ -173,8 +176,8 @@ class DataGenerater:
"""
Scale label according to factor.
"""
- h = label.shape[0] / factor
- w = label.shape[1] / factor
+ h = label.shape[0] // factor
+ w = label.shape[1] // factor
return cv2.resize(
label, (h, w), interpolation=cv2.INTER_NEAREST)[:, :, np.newaxis]
diff --git a/fluid/icnet/eval.py b/fluid/icnet/eval.py
index bdebe7ad72d799e709bd529711d600a9d692a838..dc2f574359a568de794b07cf05d854f6d388482a 100644
--- a/fluid/icnet/eval.py
+++ b/fluid/icnet/eval.py
@@ -64,7 +64,7 @@ def eval(args):
exe.run(fluid.default_startup_program())
assert os.path.exists(args.model_path)
fluid.io.load_params(exe, args.model_path)
- print "loaded model from: %s" % args.model_path
+ print("loaded model from: %s" % args.model_path)
sys.stdout.flush()
fetch_vars = [iou, out_w, out_r]
@@ -80,11 +80,10 @@ def eval(args):
fetch_list=fetch_vars)
out_wrong += result[1]
out_right += result[2]
- print "count: %s; current iou: %.3f;\r" % (count, result[0]),
sys.stdout.flush()
iou = cal_mean_iou(out_wrong, out_right)
- print "\nmean iou: %.3f" % iou
- print "kpis test_acc %f" % iou
+ print("\nmean iou: %.3f" % iou)
+ print("kpis test_acc %f" % iou)
def main():
diff --git a/fluid/icnet/icnet.py b/fluid/icnet/icnet.py
index afe3fa9d352bd8fbf6b2fad46f24ad4c9841a1ff..d640621eb9def4bfb1411667ea68f5384fbd5489 100644
--- a/fluid/icnet/icnet.py
+++ b/fluid/icnet/icnet.py
@@ -1,3 +1,6 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
import paddle.fluid as fluid
import numpy as np
import sys
@@ -20,8 +23,8 @@ def conv(input,
if padding == "SAME":
padding_h = max(k_h - s_h, 0)
padding_w = max(k_w - s_w, 0)
- padding_top = padding_h / 2
- padding_left = padding_w / 2
+ padding_top = padding_h // 2
+ padding_left = padding_w // 2
padding_bottom = padding_h - padding_top
padding_right = padding_w - padding_left
padding = [
@@ -57,8 +60,8 @@ def atrous_conv(input,
if padding == "SAME":
padding_h = max(k_h - s_h, 0)
padding_w = max(k_w - s_w, 0)
- padding_top = padding_h / 2
- padding_left = padding_w / 2
+ padding_top = padding_h // 2
+ padding_left = padding_w // 2
padding_bottom = padding_h - padding_top
padding_right = padding_w - padding_left
padding = [
@@ -141,15 +144,15 @@ def dilation_convs(input):
def pyramis_pooling(input, input_shape):
- shape = np.ceil(input_shape / 32).astype("int32")
+ shape = np.ceil(input_shape // 32).astype("int32")
h, w = shape
pool1 = avg_pool(input, h, w, h, w)
pool1_interp = interp(pool1, shape)
- pool2 = avg_pool(input, h / 2, w / 2, h / 2, w / 2)
+ pool2 = avg_pool(input, h // 2, w // 2, h // 2, w // 2)
pool2_interp = interp(pool2, shape)
- pool3 = avg_pool(input, h / 3, w / 3, h / 3, w / 3)
+ pool3 = avg_pool(input, h // 3, w // 3, h // 3, w // 3)
pool3_interp = interp(pool3, shape)
- pool4 = avg_pool(input, h / 4, w / 4, h / 4, w / 4)
+ pool4 = avg_pool(input, h // 4, w // 4, h // 4, w // 4)
pool4_interp = interp(pool4, shape)
conv5_3_sum = input + pool4_interp + pool3_interp + pool2_interp + pool1_interp
return conv5_3_sum
@@ -172,14 +175,14 @@ def shared_convs(image):
def res_block(input, filter_num, padding=0, dilation=None, name=None):
- tmp = conv(input, 1, 1, filter_num / 4, 1, 1, name=name + "_1_1_reduce")
+ tmp = conv(input, 1, 1, filter_num // 4, 1, 1, name=name + "_1_1_reduce")
tmp = bn(tmp, relu=True)
tmp = zero_padding(tmp, padding=padding)
if dilation is None:
- tmp = conv(tmp, 3, 3, filter_num / 4, 1, 1, name=name + "_3_3")
+ tmp = conv(tmp, 3, 3, filter_num // 4, 1, 1, name=name + "_3_3")
else:
tmp = atrous_conv(
- tmp, 3, 3, filter_num / 4, dilation, name=name + "_3_3")
+ tmp, 3, 3, filter_num // 4, dilation, name=name + "_3_3")
tmp = bn(tmp, relu=True)
tmp = conv(tmp, 1, 1, filter_num, 1, 1, name=name + "_1_1_increase")
tmp = bn(tmp, relu=False)
@@ -195,7 +198,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1,
proj_bn = bn(proj, relu=False)
tmp = conv(
- input, 1, 1, filter_num / 4, stride, stride, name=name + "_1_1_reduce")
+ input, 1, 1, filter_num // 4, stride, stride, name=name + "_1_1_reduce")
tmp = bn(tmp, relu=True)
tmp = zero_padding(tmp, padding=padding)
@@ -208,7 +211,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1,
tmp,
3,
3,
- filter_num / 4,
+ filter_num // 4,
1,
1,
padding=padding,
@@ -218,7 +221,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1,
tmp,
3,
3,
- filter_num / 4,
+ filter_num // 4,
dilation,
padding=padding,
name=name + "_3_3")
@@ -232,12 +235,12 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1,
def sub_net_4(input, input_shape):
- tmp = interp(input, out_shape=np.ceil(input_shape / 32))
+ tmp = interp(input, out_shape=np.ceil(input_shape // 32))
tmp = dilation_convs(tmp)
tmp = pyramis_pooling(tmp, input_shape)
tmp = conv(tmp, 1, 1, 256, 1, 1, name="conv5_4_k1")
tmp = bn(tmp, relu=True)
- tmp = interp(tmp, input_shape / 16)
+ tmp = interp(tmp, input_shape // 16)
return tmp
@@ -265,7 +268,7 @@ def CCF24(sub2_out, sub4_out, input_shape):
tmp = bn(tmp, relu=False)
tmp = tmp + sub2_out
tmp = fluid.layers.relu(tmp)
- tmp = interp(tmp, input_shape / 8)
+ tmp = interp(tmp, input_shape // 8)
return tmp
@@ -275,7 +278,7 @@ def CCF124(sub1_out, sub24_out, input_shape):
tmp = bn(tmp, relu=False)
tmp = tmp + sub1_out
tmp = fluid.layers.relu(tmp)
- tmp = interp(tmp, input_shape / 4)
+ tmp = interp(tmp, input_shape // 4)
return tmp
diff --git a/fluid/icnet/infer.py b/fluid/icnet/infer.py
index 63fb3268060248f70462cf914c613c53a1fc1f89..f93469f157660a4c5adae7d4ff2bc9b315bce41e 100644
--- a/fluid/icnet/infer.py
+++ b/fluid/icnet/infer.py
@@ -1,4 +1,5 @@
"""Infer for ICNet model."""
+from __future__ import print_function
import cityscape
import argparse
import functools
@@ -101,7 +102,7 @@ def infer(args):
exe.run(fluid.default_startup_program())
assert os.path.exists(args.model_path)
fluid.io.load_params(exe, args.model_path)
- print "loaded model from: %s" % args.model_path
+ print("loaded model from: %s" % args.model_path)
sys.stdout.flush()
if not os.path.isdir(args.out_path):
diff --git a/fluid/icnet/train.py b/fluid/icnet/train.py
index b38f08258b9b3e1bd28d808b2779416259f9d827..1059e353337d79f9b879e8aeb0fbb095c9689df9 100644
--- a/fluid/icnet/train.py
+++ b/fluid/icnet/train.py
@@ -1,9 +1,13 @@
"""Trainer for ICNet model."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
from icnet import icnet
import cityscape
import argparse
import functools
import sys
+import os
import time
import paddle.fluid as fluid
import numpy as np
@@ -11,9 +15,8 @@ from utils import add_arguments, print_arguments, get_feeder_data
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
from paddle.fluid.initializer import init_on_cpu
-SEED = 90
-# random seed must set before configuring the network.
-fluid.default_startup_program().random_seed = SEED
+if 'ce_mode' in os.environ:
+ np.random.seed(10)
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
@@ -87,10 +90,14 @@ def train(args):
if args.use_gpu:
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
+
+ if 'ce_mode' in os.environ:
+ fluid.default_startup_program().random_seed = 90
+
exe.run(fluid.default_startup_program())
if args.init_model is not None:
- print "load model from: %s" % args.init_model
+ print("load model from: %s" % args.init_model)
sys.stdout.flush()
fluid.io.load_params(exe, args.init_model)
@@ -107,7 +114,7 @@ def train(args):
for data in train_reader():
if iter_id > TOTAL_STEP:
end_time = time.time()
- print "kpis train_duration %f" % (end_time - start_time)
+ print("kpis train_duration %f" % (end_time - start_time))
return
iter_id += 1
results = exe.run(
@@ -119,10 +126,10 @@ def train(args):
sub124_loss += results[3]
# training log
if iter_id % LOG_PERIOD == 0:
- print "Iter[%d]; train loss: %.3f; sub4_loss: %.3f; sub24_loss: %.3f; sub124_loss: %.3f" % (
+ print("Iter[%d]; train loss: %.3f; sub4_loss: %.3f; sub24_loss: %.3f; sub124_loss: %.3f" % (
iter_id, t_loss / LOG_PERIOD, sub4_loss / LOG_PERIOD,
- sub24_loss / LOG_PERIOD, sub124_loss / LOG_PERIOD)
- print "kpis train_cost %f" % (t_loss / LOG_PERIOD)
+ sub24_loss / LOG_PERIOD, sub124_loss / LOG_PERIOD))
+ print("kpis train_cost %f" % (t_loss / LOG_PERIOD))
t_loss = 0.
sub4_loss = 0.
@@ -133,7 +140,7 @@ def train(args):
if iter_id % CHECKPOINT_PERIOD == 0 and args.checkpoint_path is not None:
dir_name = args.checkpoint_path + "/" + str(iter_id)
fluid.io.save_persistables(exe, dirname=dir_name)
- print "Saved checkpoint: %s" % (dir_name)
+ print("Saved checkpoint: %s" % (dir_name))
def main():
diff --git a/fluid/icnet/utils.py b/fluid/icnet/utils.py
index 699841d65f16ffd0dfae0d27e33c2ec52479826e..7d58060eb96fd95a04f377f8c852eda02e59b5f6 100644
--- a/fluid/icnet/utils.py
+++ b/fluid/icnet/utils.py
@@ -19,6 +19,7 @@ from __future__ import print_function
import distutils.util
import numpy as np
from paddle.fluid import core
+import six
def print_arguments(args):
@@ -37,7 +38,7 @@ def print_arguments(args):
:type args: argparse.Namespace
"""
print("----------- Configuration Arguments -----------")
- for arg, value in sorted(vars(args).iteritems()):
+ for arg, value in sorted(six.iteritems(vars(args))):
print("%s: %s" % (arg, value))
print("------------------------------------------------")
diff --git a/fluid/image_classification/.run_ce.sh b/fluid/image_classification/.run_ce.sh
index f06fdf769024aef68ab5278fafb956f859228038..be1a37615885f0e7f6700b0399e419ac90afaa33 100755
--- a/fluid/image_classification/.run_ce.sh
+++ b/fluid/image_classification/.run_ce.sh
@@ -5,6 +5,6 @@ cudaid=${object_detection_cudaid:=0}
export CUDA_VISIBLE_DEVICES=$cudaid
python train.py --batch_size=64 --num_epochs=10 --total_images=6149 --enable_ce=True | python _ce.py
-cudaid=${object_detection_cudaid:=0, 1, 2, 3}
+cudaid=${object_detection_cudaid_m:=0, 1, 2, 3}
export CUDA_VISIBLE_DEVICES=$cudaid
python train.py --batch_size=64 --num_epochs=10 --total_images=6149 --enable_ce=True | python _ce.py
diff --git a/fluid/image_classification/caffe2fluid/examples/imagenet/compare.py b/fluid/image_classification/caffe2fluid/examples/imagenet/compare.py
index 45b1f5303ce77de7c7f5e3a232517c26e159b2fa..c995e6df17a4be068984cece06a9b3a33f6ea4f4 100644
--- a/fluid/image_classification/caffe2fluid/examples/imagenet/compare.py
+++ b/fluid/image_classification/caffe2fluid/examples/imagenet/compare.py
@@ -45,7 +45,7 @@ def calc_diff(f1, f2):
sq_df = np.mean(df * df)
return max_df, sq_df
except Exception as e:
- return -1.0, -1.0
+ return 1.0, 1.0
def compare(path1, path2, no_exception):
diff --git a/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py b/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
index 4b6f38be6647b23a4c79dcc7850aaab55fa6ffa8..0c1354530c6af3b35977bb51e830b89d481de6b6 100644
--- a/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
@@ -245,10 +245,18 @@ class Network(object):
@layer
def prelu(self, input, channel_shared, name):
- #fluid = import_fluid()
- #output = fluid.layers.relu(input)
- #return output
- raise NotImplementedError('prelu not implemented')
+ fluid = import_fluid()
+ if channel_shared:
+ mode = 'all'
+ else:
+ mode = 'channel'
+
+ prefix = name + '_'
+ output = fluid.layers.prelu(
+ input,
+ mode=mode,
+ param_attr=fluid.ParamAttr(name=prefix + 'negslope'))
+ return output
def pool(self, pool_type, input, k_h, k_w, s_h, s_w, ceil_mode, padding,
name):
diff --git a/fluid/image_classification/caffe2fluid/kaffe/transformers.py b/fluid/image_classification/caffe2fluid/kaffe/transformers.py
index b92e93bbd84989e39bd658d107166f68e9dc4a27..f436ec8cbfb49394ef2880dbf0cb46aeff7a6ca0 100644
--- a/fluid/image_classification/caffe2fluid/kaffe/transformers.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/transformers.py
@@ -176,6 +176,7 @@ class DataReshaper(object):
del node.reshaped_data
return graph
+
class CropFuser(object):
'''
Crop is to return a scalar output Blob for an input Blob of arbitrary size.
@@ -197,7 +198,8 @@ class CropFuser(object):
cls._traced_names[fname] = []
cls._traced_names[fname].append(tname)
- def __init__(self, allowed_parent_types=[NodeKind.Input, NodeKind.DummyData]):
+ def __init__(self,
+ allowed_parent_types=[NodeKind.Input, NodeKind.DummyData]):
self.allowed_parent_types = allowed_parent_types
def __call__(self, graph):
@@ -232,7 +234,11 @@ class CropFuser(object):
def merge(self, parent, child):
'''Merge the parent node into the child.'''
- child.metadata['shape'] = [parent.output_shape.batch_size, parent.output_shape.channels, parent.output_shape.height, parent.output_shape.width]
+ child.metadata['shape'] = [
+ parent.output_shape.batch_size, parent.output_shape.channels,
+ parent.output_shape.height, parent.output_shape.width
+ ]
+
class SubNodeFuser(object):
'''
@@ -395,6 +401,8 @@ class ParameterNamer(object):
names = ('scale', )
if getattr(node.parameters, 'bias_term', False):
names = ('scale', 'offset')
+ elif node.kind == NodeKind.PReLU:
+ names = ('negslope', )
elif node.kind == "Normalize":
names = ('scale', )
else:
diff --git a/fluid/neural_machine_translation/rnn_search/infer.py b/fluid/neural_machine_translation/rnn_search/infer.py
index 51bdf9cda4694d4d849ff333e5c8e47978fb8815..4345b29c4084f953d7382b740148e5c5db773da6 100644
--- a/fluid/neural_machine_translation/rnn_search/infer.py
+++ b/fluid/neural_machine_translation/rnn_search/infer.py
@@ -18,6 +18,7 @@ from __future__ import print_function
import numpy as np
import os
+import six
import paddle
import paddle.fluid as fluid
@@ -102,7 +103,7 @@ def infer():
init_recursive_seq_lens, place)
# Feed dict for inference
- feed_dict = feeder.feed(map(lambda x: [x[0]], data))
+ feed_dict = feeder.feed([[x[0]] for x in data])
feed_dict['init_ids'] = init_ids
feed_dict['init_scores'] = init_scores
@@ -115,7 +116,7 @@ def infer():
lod_level_1 = fetch_outs[0].lod()[1]
token_array = np.array(fetch_outs[0])
result = []
- for i in xrange(len(lod_level_1) - 1):
+ for i in six.moves.xrange(len(lod_level_1) - 1):
sentence_list = [
trg_dict[token]
for token in token_array[lod_level_1[i]:lod_level_1[i + 1]]
@@ -125,7 +126,7 @@ def infer():
lod_level_0 = fetch_outs[0].lod()[0]
paragraphs = [
result[lod_level_0[i]:lod_level_0[i + 1]]
- for i in xrange(len(lod_level_0) - 1)
+ for i in six.moves.xrange(len(lod_level_0) - 1)
]
for paragraph in paragraphs:
diff --git a/fluid/neural_machine_translation/transformer/_ce.py b/fluid/neural_machine_translation/transformer/_ce.py
index 88886a4bc1068bcfc7197a247f26c7a092aee7c2..447652c4f4d60765011a621371b381e75573612e 100644
--- a/fluid/neural_machine_translation/transformer/_ce.py
+++ b/fluid/neural_machine_translation/transformer/_ce.py
@@ -7,7 +7,7 @@ from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
-train_cost_card1_kpi = CostKpi('train_cost_card1', 0.01, 0, actived=True)
+train_cost_card1_kpi = CostKpi('train_cost_card1', 0.02, 0, actived=True)
test_cost_card1_kpi = CostKpi('test_cost_card1', 0.005, 0, actived=True)
train_duration_card1_kpi = DurationKpi(
'train_duration_card1', 0.06, 0, actived=True)
diff --git a/fluid/object_detection/.run_ce.sh b/fluid/object_detection/.run_ce.sh
index 50809e77043e0eb0bb5f6bf5a9904d8113c85756..8b3d1a525e2556f2acd0603bec369b3ca99541e1 100755
--- a/fluid/object_detection/.run_ce.sh
+++ b/fluid/object_detection/.run_ce.sh
@@ -14,6 +14,6 @@ cudaid=${object_detection_cudaid:=0}
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
-cudaid=${object_detection_cudaid:=0,1,2,3}
+cudaid=${object_detection_cudaid_m:=0,1,2,3}
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
diff --git a/fluid/object_detection/_ce.py b/fluid/object_detection/_ce.py
index 4f17ff324d8c4bb1d0cecca2401e584a7ec5e3af..f90887c9a3b6a67dc8fd2c29dd3fc384237d7e43 100644
--- a/fluid/object_detection/_ce.py
+++ b/fluid/object_detection/_ce.py
@@ -8,8 +8,8 @@ from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
-test_acc_kpi = AccKpi('test_acc', 0.01, 0, actived=True)
-train_speed_kpi = AccKpi('train_speed', 0.2, 0, actived=True)
+test_acc_kpi = AccKpi('test_acc', 0.01, 0, actived=False)
+train_speed_kpi = AccKpi('train_speed', 0.2, 0, actived=False)
train_cost_card4_kpi = CostKpi('train_cost_card4', 0.02, 0, actived=True)
test_acc_card4_kpi = AccKpi('test_acc_card4', 0.01, 0, actived=True)
train_speed_card4_kpi = AccKpi('train_speed_card4', 0.2, 0, actived=True)
diff --git a/fluid/object_detection/reader.py b/fluid/object_detection/reader.py
index a30ae797b9415efbb594a9e3238985eded49d421..da0aa788570369d64f1a3db5303fc056984e3c74 100644
--- a/fluid/object_detection/reader.py
+++ b/fluid/object_detection/reader.py
@@ -22,6 +22,7 @@ import xml.etree.ElementTree
import os
import time
import copy
+import six
class Settings(object):
@@ -151,7 +152,7 @@ def preprocess(img, bbox_labels, mode, settings):
mirror = int(random.uniform(0, 2))
if mirror == 1:
img = img[:, ::-1, :]
- for i in xrange(len(sampled_labels)):
+ for i in six.moves.xrange(len(sampled_labels)):
tmp = sampled_labels[i][1]
sampled_labels[i][1] = 1 - sampled_labels[i][3]
sampled_labels[i][3] = 1 - tmp
diff --git a/fluid/object_detection/train.py b/fluid/object_detection/train.py
index 46af235ff7f6c3067e1cc2d35de76ebaf59be885..706a33be1d5c1e4c2a6c9cae49310804962a6315 100644
--- a/fluid/object_detection/train.py
+++ b/fluid/object_detection/train.py
@@ -65,7 +65,6 @@ def train(args,
name='gt_label', shape=[1], dtype='int32', lod_level=1)
difficult = fluid.layers.data(
name='gt_difficult', shape=[1], dtype='int32', lod_level=1)
-
locs, confs, box, box_var = mobile_net(num_classes, image, image_shape)
nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=args.nms_threshold)
@@ -88,16 +87,16 @@ def train(args,
if 'coco' in data_args.dataset:
# learning rate decay in 12, 19 pass, respectively
if '2014' in train_file_list:
- epocs = 82783 / batch_size
+ epocs = 82783 // batch_size
boundaries = [epocs * 12, epocs * 19]
elif '2017' in train_file_list:
- epocs = 118287 / batch_size
+ epocs = 118287 // batch_size
boundaries = [epocs * 12, epocs * 19]
values = [
learning_rate, learning_rate * 0.5, learning_rate * 0.25
]
elif 'pascalvoc' in data_args.dataset:
- epocs = 19200 / batch_size
+ epocs = 19200 // batch_size
boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100]
values = [
learning_rate, learning_rate * 0.5, learning_rate * 0.25,
@@ -126,6 +125,9 @@ def train(args,
train_reader = paddle.batch(
reader.train(data_args, train_file_list), batch_size=batch_size)
else:
+ import random
+ random.seed(0)
+ np.random.seed(0)
train_reader = paddle.batch(
reader.train(data_args, train_file_list, False), batch_size=batch_size)
test_reader = paddle.batch(
@@ -137,7 +139,7 @@ def train(args,
model_path = os.path.join(model_save_dir, postfix)
if os.path.isdir(model_path):
shutil.rmtree(model_path)
- print 'save models to %s' % (model_path)
+ print('save models to %s' % (model_path))
fluid.io.save_persistables(exe, model_path)
best_map = 0.
@@ -166,8 +168,6 @@ def train(args,
start_time = time.time()
prev_start_time = start_time
every_pass_loss = []
- iter = 0
- pass_duration = 0.0
for batch_id, data in enumerate(train_reader()):
prev_start_time = start_time
start_time = time.time()
@@ -193,15 +193,15 @@ def train(args,
total_time += end_time - start_time
train_avg_loss = np.mean(every_pass_loss)
if devices_num == 1:
- print ("kpis train_cost %s" % train_avg_loss)
- print ("kpis test_acc %s" % mean_map)
- print ("kpis train_speed %s" % (total_time / epoch_idx))
+ print("kpis train_cost %s" % train_avg_loss)
+ print("kpis test_acc %s" % mean_map)
+ print("kpis train_speed %s" % (total_time / epoch_idx))
else:
- print ("kpis train_cost_card%s %s" %
+ print("kpis train_cost_card%s %s" %
(devices_num, train_avg_loss))
- print ("kpis test_acc_card%s %s" %
+ print("kpis test_acc_card%s %s" %
(devices_num, mean_map))
- print ("kpis train_speed_card%s %f" %
+ print("kpis train_speed_card%s %f" %
(devices_num, total_time / epoch_idx))
diff --git a/fluid/object_detection/utility.py b/fluid/object_detection/utility.py
index 506e6007ceb9059caf1163befb6ff594d67b547a..746ce25a73979a9026f740fc8fce483857677082 100644
--- a/fluid/object_detection/utility.py
+++ b/fluid/object_detection/utility.py
@@ -16,8 +16,10 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+
import distutils.util
import numpy as np
+import six
from paddle.fluid import core
@@ -37,7 +39,7 @@ def print_arguments(args):
:type args: argparse.Namespace
"""
print("----------- Configuration Arguments -----------")
- for arg, value in sorted(vars(args).iteritems()):
+ for arg, value in sorted(six.iteritems(vars(args))):
print("%s: %s" % (arg, value))
print("------------------------------------------------")
diff --git a/fluid/ocr_recognition/.run.sh b/fluid/ocr_recognition/.run.sh
deleted file mode 100644
index 6fd313d34afc8441fa8256e909991263317f1dc6..0000000000000000000000000000000000000000
--- a/fluid/ocr_recognition/.run.sh
+++ /dev/null
@@ -1 +0,0 @@
-python ctc_train.py --batch_size=128 --total_step=10000 --eval_period=10000 --log_period=10000 --use_gpu=True
diff --git a/fluid/ocr_recognition/.run_ce.sh b/fluid/ocr_recognition/.run_ce.sh
old mode 100644
new mode 100755
index 202c5b483c8f35fd34a5cfa39c9ba11702a9bbd3..90abc143f8953a96ef94146ca9b3b308cc9e930b
--- a/fluid/ocr_recognition/.run_ce.sh
+++ b/fluid/ocr_recognition/.run_ce.sh
@@ -1,5 +1,4 @@
export ce_mode=1
-rm *factor.txt
-python ctc_train.py --batch_size=32 --total_step=30000 --eval_period=30000 --log_period=30000 --use_gpu=True 1> ./tmp.log
+python train.py --batch_size=32 --total_step=1 --eval_period=1 --log_period=1 --use_gpu=True 1> ./tmp.log
cat tmp.log | python _ce.py
rm tmp.log
diff --git a/fluid/ocr_recognition/README.md b/fluid/ocr_recognition/README.md
index 50b72440818384a0d8e80ab214faaabddbd93f90..ad70aa0c904adad72a18987d306973aa5b89d650 100644
--- a/fluid/ocr_recognition/README.md
+++ b/fluid/ocr_recognition/README.md
@@ -5,8 +5,9 @@
## 代码结构
```
├── ctc_reader.py # 下载、读取、处理数据。
-├── crnn_ctc_model.py # 定义了训练网络、预测网络和evaluate网络。
-├── ctc_train.py # 用于模型的训练。
+├── crnn_ctc_model.py # 定义了OCR CTC model的网络结构。
+├── attention_model.py # 定义了OCR attention model的网络结构。
+├── train.py # 用于模型的训练。
├── infer.py # 加载训练好的模型文件,对新数据进行预测。
├── eval.py # 评估模型在指定数据集上的效果。
└── utils.py # 定义通用的函数。
@@ -15,9 +16,16 @@
## 简介
-本章的任务是识别含有单行汉语字符图片,首先采用卷积将图片转为特征图, 然后使用`im2sequence op`将特征图转为序列,通过`双向GRU`学习到序列特征。训练过程选用的损失函数为CTC(Connectionist Temporal Classification) loss,最终的评估指标为样本级别的错误率。
+本章的任务是识别图片中单行英文字符,这里我们分别使用CTC model和attention model两种不同的模型来完成该任务。
+这两种模型的有相同的编码部分,首先采用卷积将图片转为特征图, 然后使用`im2sequence op`将特征图转为序列,通过`双向GRU`学习到序列特征。
+两种模型的解码部分和使用的损失函数区别如下:
+
+- CTC model: 训练过程选用的损失函数为CTC(Connectionist Temporal Classification) loss, 预测阶段采用的是贪婪策略和CTC解码策略。
+- Attention model: 训练过程选用的是带注意力机制的解码策略和交叉信息熵损失函数,预测阶段采用的是柱搜索策略。
+
+训练以上两种模型的评估指标为样本级别的错误率。
## 数据
@@ -124,15 +132,23 @@ env OMP_NUM_THREADS= python ctc_train.py --use_gpu False
env CUDA_VISIABLE_DEVICES=0,1,2,3 python ctc_train.py --parallel=True
```
+默认使用的是`CTC model`, 可以通过选项`--model="attention"`切换为`attention model`。
+
执行`python ctc_train.py --help`可查看更多使用方式和参数详细说明。
-图2为使用默认参数和默认数据集训练的收敛曲线,其中横坐标轴为训练迭代次数,纵轴为样本级错误率。其中,蓝线为训练集上的样本错误率,红线为测试集上的样本错误率。在60轮迭代训练中,测试集上最低错误率为第32轮的22.0%.
+图2为使用默认参数在默认数据集上训练`CTC model`的收敛曲线,其中横坐标轴为训练迭代次数,纵轴为样本级错误率。其中,蓝线为训练集上的样本错误率,红线为测试集上的样本错误率。测试集上最低错误率为22.0%.
-
+
图 2
+图3为使用默认参数在默认数据集上训练`attention model`的收敛曲线,其中横坐标轴为训练迭代次数,纵轴为样本级错误率。其中,蓝线为训练集上的样本错误率,红线为测试集上的样本错误率。测试集上最低错误率为16.25%.
+
+
+
+图 3
+
## 测试
diff --git a/fluid/ocr_recognition/_ce.py b/fluid/ocr_recognition/_ce.py
index 365639f6dac48862fbd3d5f6da32a16c4135a20b..b1b1e365077b18f15e4443b1b374a69f570da64a 100644
--- a/fluid/ocr_recognition/_ce.py
+++ b/fluid/ocr_recognition/_ce.py
@@ -7,7 +7,7 @@ from kpi import CostKpi, DurationKpi, AccKpi
# NOTE kpi.py should shared in models in some way!!!!
-train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
+train_cost_kpi = CostKpi('train_cost', 0.05, 0, actived=True)
test_acc_kpi = AccKpi('test_acc', 0.005, 0, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True)
train_acc_kpi = AccKpi('train_acc', 0.005, 0, actived=True)
diff --git a/fluid/ocr_recognition/attention_model.py b/fluid/ocr_recognition/attention_model.py
new file mode 100755
index 0000000000000000000000000000000000000000..363c03070e98c721a63891ca8c7f35ce7046ac6d
--- /dev/null
+++ b/fluid/ocr_recognition/attention_model.py
@@ -0,0 +1,371 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle.fluid as fluid
+
+decoder_size = 128
+word_vector_dim = 128
+max_length = 100
+sos = 0
+eos = 1
+gradient_clip = 10
+LR = 1.0
+beam_size = 2
+learning_rate_decay = None
+
+
+def conv_bn_pool(input,
+ group,
+ out_ch,
+ act="relu",
+ is_test=False,
+ pool=True,
+ use_cudnn=True):
+ tmp = input
+ for i in xrange(group):
+ filter_size = 3
+ conv_std = (2.0 / (filter_size**2 * tmp.shape[1]))**0.5
+ conv_param = fluid.ParamAttr(
+ initializer=fluid.initializer.Normal(0.0, conv_std))
+ tmp = fluid.layers.conv2d(
+ input=tmp,
+ num_filters=out_ch[i],
+ filter_size=3,
+ padding=1,
+ bias_attr=False,
+ param_attr=conv_param,
+ act=None, # LinearActivation
+ use_cudnn=use_cudnn)
+
+ tmp = fluid.layers.batch_norm(input=tmp, act=act, is_test=is_test)
+ if pool == True:
+ tmp = fluid.layers.pool2d(
+ input=tmp,
+ pool_size=2,
+ pool_type='max',
+ pool_stride=2,
+ use_cudnn=use_cudnn,
+ ceil_mode=True)
+
+ return tmp
+
+
+def ocr_convs(input, is_test=False, use_cudnn=True):
+ tmp = input
+ tmp = conv_bn_pool(tmp, 2, [16, 16], is_test=is_test, use_cudnn=use_cudnn)
+ tmp = conv_bn_pool(tmp, 2, [32, 32], is_test=is_test, use_cudnn=use_cudnn)
+ tmp = conv_bn_pool(tmp, 2, [64, 64], is_test=is_test, use_cudnn=use_cudnn)
+ tmp = conv_bn_pool(
+ tmp, 2, [128, 128], is_test=is_test, pool=False, use_cudnn=use_cudnn)
+ return tmp
+
+
+def encoder_net(images, rnn_hidden_size=200, is_test=False, use_cudnn=True):
+
+ conv_features = ocr_convs(images, is_test=is_test, use_cudnn=use_cudnn)
+
+ sliced_feature = fluid.layers.im2sequence(
+ input=conv_features,
+ stride=[1, 1],
+ filter_size=[conv_features.shape[2], 1])
+
+ para_attr = fluid.ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.02))
+ bias_attr = fluid.ParamAttr(
+ initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0)
+
+ fc_1 = fluid.layers.fc(input=sliced_feature,
+ size=rnn_hidden_size * 3,
+ param_attr=para_attr,
+ bias_attr=False)
+ fc_2 = fluid.layers.fc(input=sliced_feature,
+ size=rnn_hidden_size * 3,
+ param_attr=para_attr,
+ bias_attr=False)
+
+ gru_forward = fluid.layers.dynamic_gru(
+ input=fc_1,
+ size=rnn_hidden_size,
+ param_attr=para_attr,
+ bias_attr=bias_attr,
+ candidate_activation='relu')
+ gru_backward = fluid.layers.dynamic_gru(
+ input=fc_2,
+ size=rnn_hidden_size,
+ is_reverse=True,
+ param_attr=para_attr,
+ bias_attr=bias_attr,
+ candidate_activation='relu')
+
+ encoded_vector = fluid.layers.concat(
+ input=[gru_forward, gru_backward], axis=1)
+ encoded_proj = fluid.layers.fc(input=encoded_vector,
+ size=decoder_size,
+ bias_attr=False)
+
+ return gru_backward, encoded_vector, encoded_proj
+
+
+def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj,
+ decoder_boot, decoder_size, num_classes):
+ def simple_attention(encoder_vec, encoder_proj, decoder_state):
+ decoder_state_proj = fluid.layers.fc(input=decoder_state,
+ size=decoder_size,
+ bias_attr=False)
+ decoder_state_expand = fluid.layers.sequence_expand(
+ x=decoder_state_proj, y=encoder_proj)
+ concated = encoder_proj + decoder_state_expand
+ concated = fluid.layers.tanh(x=concated)
+ attention_weights = fluid.layers.fc(input=concated,
+ size=1,
+ act=None,
+ bias_attr=False)
+ attention_weights = fluid.layers.sequence_softmax(
+ input=attention_weights)
+ weigths_reshape = fluid.layers.reshape(x=attention_weights, shape=[-1])
+ scaled = fluid.layers.elementwise_mul(
+ x=encoder_vec, y=weigths_reshape, axis=0)
+ context = fluid.layers.sequence_pool(input=scaled, pool_type='sum')
+ return context
+
+ rnn = fluid.layers.DynamicRNN()
+
+ with rnn.block():
+ current_word = rnn.step_input(target_embedding)
+ encoder_vec = rnn.static_input(encoder_vec)
+ encoder_proj = rnn.static_input(encoder_proj)
+ hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True)
+ context = simple_attention(encoder_vec, encoder_proj, hidden_mem)
+ fc_1 = fluid.layers.fc(input=context,
+ size=decoder_size * 3,
+ bias_attr=False)
+ fc_2 = fluid.layers.fc(input=current_word,
+ size=decoder_size * 3,
+ bias_attr=False)
+ decoder_inputs = fc_1 + fc_2
+ h, _, _ = fluid.layers.gru_unit(
+ input=decoder_inputs, hidden=hidden_mem, size=decoder_size * 3)
+ rnn.update_memory(hidden_mem, h)
+ out = fluid.layers.fc(input=h,
+ size=num_classes + 2,
+ bias_attr=True,
+ act='softmax')
+ rnn.output(out)
+ return rnn()
+
+
+def attention_train_net(args, data_shape, num_classes):
+
+ images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+ label_in = fluid.layers.data(
+ name='label_in', shape=[1], dtype='int32', lod_level=1)
+ label_out = fluid.layers.data(
+ name='label_out', shape=[1], dtype='int32', lod_level=1)
+
+ gru_backward, encoded_vector, encoded_proj = encoder_net(images)
+
+ backward_first = fluid.layers.sequence_pool(
+ input=gru_backward, pool_type='first')
+ decoder_boot = fluid.layers.fc(input=backward_first,
+ size=decoder_size,
+ bias_attr=False,
+ act="relu")
+
+ label_in = fluid.layers.cast(x=label_in, dtype='int64')
+ trg_embedding = fluid.layers.embedding(
+ input=label_in,
+ size=[num_classes + 2, word_vector_dim],
+ dtype='float32')
+ prediction = gru_decoder_with_attention(trg_embedding, encoded_vector,
+ encoded_proj, decoder_boot,
+ decoder_size, num_classes)
+ fluid.clip.set_gradient_clip(fluid.clip.GradientClipByValue(gradient_clip))
+ label_out = fluid.layers.cast(x=label_out, dtype='int64')
+
+ _, maxid = fluid.layers.topk(input=prediction, k=1)
+ error_evaluator = fluid.evaluator.EditDistance(
+ input=maxid, label=label_out, ignored_tokens=[sos, eos])
+
+ inference_program = fluid.default_main_program().clone(for_test=True)
+
+ cost = fluid.layers.cross_entropy(input=prediction, label=label_out)
+ sum_cost = fluid.layers.reduce_sum(cost)
+
+ if learning_rate_decay == "piecewise_decay":
+ learning_rate = fluid.layers.piecewise_decay([50000], [LR, LR * 0.01])
+ else:
+ learning_rate = LR
+
+ optimizer = fluid.optimizer.Adadelta(
+ learning_rate=learning_rate, epsilon=1.0e-6, rho=0.9)
+ optimizer.minimize(sum_cost)
+
+ model_average = None
+ if args.average_window > 0:
+ model_average = fluid.optimizer.ModelAverage(
+ args.average_window,
+ min_average_window=args.min_average_window,
+ max_average_window=args.max_average_window)
+
+ return sum_cost, error_evaluator, inference_program, model_average
+
+
+def simple_attention(encoder_vec, encoder_proj, decoder_state, decoder_size):
+ decoder_state_proj = fluid.layers.fc(input=decoder_state,
+ size=decoder_size,
+ bias_attr=False)
+ decoder_state_expand = fluid.layers.sequence_expand(
+ x=decoder_state_proj, y=encoder_proj)
+ concated = fluid.layers.elementwise_add(encoder_proj, decoder_state_expand)
+ concated = fluid.layers.tanh(x=concated)
+ attention_weights = fluid.layers.fc(input=concated,
+ size=1,
+ act=None,
+ bias_attr=False)
+ attention_weights = fluid.layers.sequence_softmax(input=attention_weights)
+ weigths_reshape = fluid.layers.reshape(x=attention_weights, shape=[-1])
+ scaled = fluid.layers.elementwise_mul(
+ x=encoder_vec, y=weigths_reshape, axis=0)
+ context = fluid.layers.sequence_pool(input=scaled, pool_type='sum')
+ return context
+
+
+def attention_infer(images, num_classes, use_cudnn=True):
+
+ max_length = 20
+ gru_backward, encoded_vector, encoded_proj = encoder_net(
+ images, is_test=True, use_cudnn=use_cudnn)
+
+ backward_first = fluid.layers.sequence_pool(
+ input=gru_backward, pool_type='first')
+ decoder_boot = fluid.layers.fc(input=backward_first,
+ size=decoder_size,
+ bias_attr=False,
+ act="relu")
+ init_state = decoder_boot
+ array_len = fluid.layers.fill_constant(
+ shape=[1], dtype='int64', value=max_length)
+ counter = fluid.layers.zeros(shape=[1], dtype='int64', force_cpu=True)
+
+ # fill the first element with init_state
+ state_array = fluid.layers.create_array('float32')
+ fluid.layers.array_write(init_state, array=state_array, i=counter)
+
+ # ids, scores as memory
+ ids_array = fluid.layers.create_array('int64')
+ scores_array = fluid.layers.create_array('float32')
+
+ init_ids = fluid.layers.data(
+ name="init_ids", shape=[1], dtype="int64", lod_level=2)
+ init_scores = fluid.layers.data(
+ name="init_scores", shape=[1], dtype="float32", lod_level=2)
+
+ fluid.layers.array_write(init_ids, array=ids_array, i=counter)
+ fluid.layers.array_write(init_scores, array=scores_array, i=counter)
+
+ cond = fluid.layers.less_than(x=counter, y=array_len)
+ while_op = fluid.layers.While(cond=cond)
+ with while_op.block():
+ pre_ids = fluid.layers.array_read(array=ids_array, i=counter)
+ pre_state = fluid.layers.array_read(array=state_array, i=counter)
+ pre_score = fluid.layers.array_read(array=scores_array, i=counter)
+
+ pre_ids_emb = fluid.layers.embedding(
+ input=pre_ids,
+ size=[num_classes + 2, word_vector_dim],
+ dtype='float32')
+
+ context = simple_attention(encoded_vector, encoded_proj, pre_state,
+ decoder_size)
+
+ # expand the recursive_sequence_lengths of pre_state to be the same with pre_score
+ pre_state_expanded = fluid.layers.sequence_expand(pre_state, pre_score)
+ context_expanded = fluid.layers.sequence_expand(context, pre_score)
+ fc_1 = fluid.layers.fc(input=context_expanded,
+ size=decoder_size * 3,
+ bias_attr=False)
+ fc_2 = fluid.layers.fc(input=pre_ids_emb,
+ size=decoder_size * 3,
+ bias_attr=False)
+
+ decoder_inputs = fc_1 + fc_2
+ current_state, _, _ = fluid.layers.gru_unit(
+ input=decoder_inputs,
+ hidden=pre_state_expanded,
+ size=decoder_size * 3)
+
+ current_state_with_lod = fluid.layers.lod_reset(
+ x=current_state, y=pre_score)
+ # use score to do beam search
+ current_score = fluid.layers.fc(input=current_state_with_lod,
+ size=num_classes + 2,
+ bias_attr=True,
+ act='softmax')
+ topk_scores, topk_indices = fluid.layers.topk(
+ current_score, k=beam_size)
+
+ # calculate accumulated scores after topk to reduce computation cost
+ accu_scores = fluid.layers.elementwise_add(
+ x=fluid.layers.log(topk_scores),
+ y=fluid.layers.reshape(
+ pre_score, shape=[-1]),
+ axis=0)
+ selected_ids, selected_scores = fluid.layers.beam_search(
+ pre_ids,
+ pre_score,
+ topk_indices,
+ accu_scores,
+ beam_size,
+ 1, # end_id
+ #level=0
+ )
+
+ fluid.layers.increment(x=counter, value=1, in_place=True)
+
+ # update the memories
+ fluid.layers.array_write(current_state, array=state_array, i=counter)
+ fluid.layers.array_write(selected_ids, array=ids_array, i=counter)
+ fluid.layers.array_write(selected_scores, array=scores_array, i=counter)
+
+ # update the break condition: up to the max length or all candidates of
+ # source sentences have ended.
+ length_cond = fluid.layers.less_than(x=counter, y=array_len)
+ finish_cond = fluid.layers.logical_not(
+ fluid.layers.is_empty(x=selected_ids))
+ fluid.layers.logical_and(x=length_cond, y=finish_cond, out=cond)
+
+ ids, scores = fluid.layers.beam_search_decode(ids_array, scores_array,
+ beam_size, eos)
+ return ids
+
+
+def attention_eval(data_shape, num_classes):
+ images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+ label_in = fluid.layers.data(
+ name='label_in', shape=[1], dtype='int32', lod_level=1)
+ label_out = fluid.layers.data(
+ name='label_out', shape=[1], dtype='int32', lod_level=1)
+ label_out = fluid.layers.cast(x=label_out, dtype='int64')
+ label_in = fluid.layers.cast(x=label_in, dtype='int64')
+
+ gru_backward, encoded_vector, encoded_proj = encoder_net(
+ images, is_test=True)
+
+ backward_first = fluid.layers.sequence_pool(
+ input=gru_backward, pool_type='first')
+ decoder_boot = fluid.layers.fc(input=backward_first,
+ size=decoder_size,
+ bias_attr=False,
+ act="relu")
+ trg_embedding = fluid.layers.embedding(
+ input=label_in,
+ size=[num_classes + 2, word_vector_dim],
+ dtype='float32')
+ prediction = gru_decoder_with_attention(trg_embedding, encoded_vector,
+ encoded_proj, decoder_boot,
+ decoder_size, num_classes)
+ _, maxid = fluid.layers.topk(input=prediction, k=1)
+ error_evaluator = fluid.evaluator.EditDistance(
+ input=maxid, label=label_out, ignored_tokens=[sos, eos])
+ cost = fluid.layers.cross_entropy(input=prediction, label=label_out)
+ sum_cost = fluid.layers.reduce_sum(cost)
+ return error_evaluator, sum_cost
diff --git a/fluid/ocr_recognition/crnn_ctc_model.py b/fluid/ocr_recognition/crnn_ctc_model.py
old mode 100644
new mode 100755
index a5d4c70f868a6c973ff3e8b372a2eb387d1f191f..aa46d4ff086cee1697bafba76becf787ce619de8
--- a/fluid/ocr_recognition/crnn_ctc_model.py
+++ b/fluid/ocr_recognition/crnn_ctc_model.py
@@ -1,7 +1,11 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
from paddle.fluid.initializer import init_on_cpu
import math
+import six
def conv_bn_pool(input,
@@ -15,7 +19,7 @@ def conv_bn_pool(input,
pooling=True,
use_cudnn=False):
tmp = input
- for i in xrange(group):
+ for i in six.moves.xrange(group):
tmp = fluid.layers.conv2d(
input=tmp,
num_filters=out_ch[i],
@@ -166,13 +170,16 @@ def encoder_net(images,
return fc_out
-def ctc_train_net(images, label, args, num_classes):
+def ctc_train_net(args, data_shape, num_classes):
L2_RATE = 0.0004
LR = 1.0e-3
MOMENTUM = 0.9
learning_rate_decay = None
regularizer = fluid.regularizer.L2Decay(L2_RATE)
+ images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+ label = fluid.layers.data(
+ name='label', shape=[1], dtype='int32', lod_level=1)
fc_out = encoder_net(
images,
num_classes,
@@ -189,7 +196,7 @@ def ctc_train_net(images, label, args, num_classes):
inference_program = fluid.default_main_program().clone(for_test=True)
if learning_rate_decay == "piecewise_decay":
learning_rate = fluid.layers.piecewise_decay([
- args.total_step / 4, args.total_step / 2, args.total_step * 3 / 4
+ args.total_step // 4, args.total_step // 2, args.total_step * 3 // 4
], [LR, LR * 0.1, LR * 0.01, LR * 0.001])
else:
learning_rate = LR
@@ -211,7 +218,10 @@ def ctc_infer(images, num_classes, use_cudnn):
return fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes)
-def ctc_eval(images, label, num_classes, use_cudnn):
+def ctc_eval(data_shape, num_classes, use_cudnn):
+ images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+ label = fluid.layers.data(
+ name='label', shape=[1], dtype='int32', lod_level=1)
fc_out = encoder_net(images, num_classes, is_test=True, use_cudnn=use_cudnn)
decoded_out = fluid.layers.ctc_greedy_decoder(
input=fc_out, blank=num_classes)
diff --git a/fluid/ocr_recognition/ctc_reader.py b/fluid/ocr_recognition/data_reader.py
similarity index 87%
rename from fluid/ocr_recognition/ctc_reader.py
rename to fluid/ocr_recognition/data_reader.py
index a272e91a43684e62fdeab31d244989751b20fcc5..1a1c5c87162efd998e73bc19ec2cb511fdabcb79 100644
--- a/fluid/ocr_recognition/ctc_reader.py
+++ b/fluid/ocr_recognition/data_reader.py
@@ -1,12 +1,17 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
import os
import cv2
import tarfile
import numpy as np
from PIL import Image
from os import path
-from paddle.v2.image import load_image
-import paddle.v2 as paddle
+from paddle.dataset.image import load_image
+import paddle
+SOS = 0
+EOS = 1
NUM_CLASSES = 95
DATA_SHAPE = [1, 48, 512]
@@ -22,8 +27,8 @@ TEST_LIST_FILE_NAME = "test.list"
class DataGenerator(object):
- def __init__(self):
- pass
+ def __init__(self, model="crnn_ctc"):
+ self.model = model
def train_reader(self,
img_root_dir,
@@ -65,11 +70,11 @@ class DataGenerator(object):
batchsize
) + "; i++) print $(4*i+1)\" \"$(4*i+2)\" \"$(4*i+3)\" \"$(4*i+4);}}' > " + to_file
os.system(cmd)
- print "finish batch shuffle"
+ print("finish batch shuffle")
img_label_lines = open(to_file, 'r').readlines()
def reader():
- sizes = len(img_label_lines) / batchsize
+ sizes = len(img_label_lines) // batchsize
if sizes == 0:
raise ValueError('Batch size is bigger than the dataset size.')
while True:
@@ -89,7 +94,10 @@ class DataGenerator(object):
img = img.resize((sz[0], sz[1]))
img = np.array(img) - 127.5
img = img[np.newaxis, ...]
- result.append([img, label])
+ if self.model == "crnn_ctc":
+ result.append([img, label])
+ else:
+ result.append([img, [SOS] + label, label + [EOS]])
yield result
if not cycle:
break
@@ -117,7 +125,10 @@ class DataGenerator(object):
'L')
img = np.array(img) - 127.5
img = img[np.newaxis, ...]
- yield img, label
+ if self.model == "crnn_ctc":
+ yield img, label
+ else:
+ yield img, [SOS] + label, label + [EOS]
return reader
@@ -185,8 +196,12 @@ def data_shape():
return DATA_SHAPE
-def train(batch_size, train_images_dir=None, train_list_file=None, cycle=False):
- generator = DataGenerator()
+def train(batch_size,
+ train_images_dir=None,
+ train_list_file=None,
+ cycle=False,
+ model="crnn_ctc"):
+ generator = DataGenerator(model)
if train_images_dir is None:
data_dir = download_data()
train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME)
@@ -199,8 +214,11 @@ def train(batch_size, train_images_dir=None, train_list_file=None, cycle=False):
train_images_dir, train_list_file, batch_size, cycle, shuffle=shuffle)
-def test(batch_size=1, test_images_dir=None, test_list_file=None):
- generator = DataGenerator()
+def test(batch_size=1,
+ test_images_dir=None,
+ test_list_file=None,
+ model="crnn_ctc"):
+ generator = DataGenerator(model)
if test_images_dir is None:
data_dir = download_data()
test_images_dir = path.join(data_dir, TEST_DATA_DIR_NAME)
@@ -213,8 +231,9 @@ def test(batch_size=1, test_images_dir=None, test_list_file=None):
def inference(batch_size=1,
infer_images_dir=None,
infer_list_file=None,
- cycle=False):
- generator = DataGenerator()
+ cycle=False,
+ model="crnn_ctc"):
+ generator = DataGenerator(model)
return paddle.batch(
generator.infer_reader(infer_images_dir, infer_list_file, cycle),
batch_size)
diff --git a/fluid/ocr_recognition/eval.py b/fluid/ocr_recognition/eval.py
index 6924131686a1387a55cdf85136da39a249a369a7..1d553999eb545e3a1134658e78592fb74a4a8c3c 100644
--- a/fluid/ocr_recognition/eval.py
+++ b/fluid/ocr_recognition/eval.py
@@ -1,9 +1,9 @@
import paddle.v2 as paddle
import paddle.fluid as fluid
-from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
-from crnn_ctc_model import ctc_infer
+from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_data
+from attention_model import attention_eval
from crnn_ctc_model import ctc_eval
-import ctc_reader
+import data_reader
import argparse
import functools
import os
@@ -11,27 +11,34 @@ import os
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
-add_arg('model_path', str, None, "The model path to be used for inference.")
+add_arg('model', str, "crnn_ctc", "Which type of network to be used. 'crnn_ctc' or 'attention'")
+add_arg('model_path', str, "", "The model path to be used for inference.")
add_arg('input_images_dir', str, None, "The directory of images.")
add_arg('input_images_list', str, None, "The list file of images.")
add_arg('use_gpu', bool, True, "Whether use GPU to eval.")
# yapf: enable
-def evaluate(args, eval=ctc_eval, data_reader=ctc_reader):
+def evaluate(args):
"""OCR inference"""
+
+ if args.model == "crnn_ctc":
+ eval = ctc_eval
+ get_feeder_data = get_ctc_feeder_data
+ else:
+ eval = attention_eval
+ get_feeder_data = get_attention_feeder_data
+
num_classes = data_reader.num_classes()
data_shape = data_reader.data_shape()
# define network
- images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
- label = fluid.layers.data(
- name='label', shape=[1], dtype='int32', lod_level=1)
- evaluator, cost = eval(images, label, num_classes)
+ evaluator, cost = eval(data_shape, num_classes)
# data reader
test_reader = data_reader.test(
test_images_dir=args.input_images_dir,
- test_list_file=args.input_images_list)
+ test_list_file=args.input_images_list,
+ model=args.model)
# prepare environment
place = fluid.CPUPlace()
@@ -48,7 +55,7 @@ def evaluate(args, eval=ctc_eval, data_reader=ctc_reader):
model_dir = os.path.dirname(args.model_path)
model_file_name = os.path.basename(args.model_path)
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
- print "Init model from: %s." % args.model_path
+ print("Init model from: %s." % args.model_path)
evaluator.reset(exe)
count = 0
@@ -56,14 +63,14 @@ def evaluate(args, eval=ctc_eval, data_reader=ctc_reader):
count += 1
exe.run(fluid.default_main_program(), feed=get_feeder_data(data, place))
avg_distance, avg_seq_error = evaluator.eval(exe)
- print "Read %d samples; avg_distance: %s; avg_seq_error: %s" % (
- count, avg_distance, avg_seq_error)
+ print("Read %d samples; avg_distance: %s; avg_seq_error: %s" % (
+ count, avg_distance, avg_seq_error))
def main():
args = parser.parse_args()
print_arguments(args)
- evaluate(args, data_reader=ctc_reader)
+ evaluate(args)
if __name__ == "__main__":
diff --git a/fluid/ocr_recognition/images/train_attention.jpg b/fluid/ocr_recognition/images/train_attention.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f9c1ce30bb8f5e9e704255e31c896f355727e2fd
Binary files /dev/null and b/fluid/ocr_recognition/images/train_attention.jpg differ
diff --git a/fluid/ocr_recognition/infer.py b/fluid/ocr_recognition/infer.py
old mode 100644
new mode 100755
index 154242c9e3ca8fea26f34b5cda0c2bac5a3d0ef1..5ba77ced81c2802d372c348520d002f4e5b3452e
--- a/fluid/ocr_recognition/infer.py
+++ b/fluid/ocr_recognition/infer.py
@@ -1,10 +1,12 @@
+from __future__ import print_function
import paddle.v2 as paddle
import paddle.fluid as fluid
+from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_for_infer
import paddle.fluid.profiler as profiler
-from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from crnn_ctc_model import ctc_infer
+from attention_model import attention_infer
import numpy as np
-import ctc_reader
+import data_reader
import argparse
import functools
import os
@@ -13,6 +15,7 @@ import time
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
+add_arg('model', str, "crnn_ctc", "Which type of network to be used. 'crnn_ctc' or 'attention'")
add_arg('model_path', str, None, "The model path to be used for inference.")
add_arg('input_images_dir', str, None, "The directory of images.")
add_arg('input_images_list', str, None, "The list file of images.")
@@ -25,20 +28,28 @@ add_arg('batch_size', int, 1, "The minibatch size.")
# yapf: enable
-def inference(args, infer=ctc_infer, data_reader=ctc_reader):
+def inference(args):
"""OCR inference"""
+ if args.model == "crnn_ctc":
+ infer = ctc_infer
+ get_feeder_data = get_ctc_feeder_data
+ else:
+ infer = attention_infer
+ get_feeder_data = get_attention_feeder_for_infer
+ eos = 1
+ sos = 0
num_classes = data_reader.num_classes()
data_shape = data_reader.data_shape()
# define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
- sequence = infer(
- images, num_classes, use_cudnn=True if args.use_gpu else False)
+ ids = infer(images, num_classes, use_cudnn=True if args.use_gpu else False)
# data reader
infer_reader = data_reader.inference(
batch_size=args.batch_size,
infer_images_dir=args.input_images_dir,
infer_list_file=args.input_images_list,
- cycle=True if args.iterations > 0 else False)
+ cycle=True if args.iterations > 0 else False,
+ model=args.model)
# prepare environment
place = fluid.CPUPlace()
if args.use_gpu:
@@ -54,7 +65,7 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
with open(args.dict) as dict_file:
for i, word in enumerate(dict_file):
dict_map[i] = word.strip()
- print "Loaded dict from %s" % args.dict
+ print("Loaded dict from %s" % args.dict)
# load init model
model_dir = args.model_path
@@ -63,11 +74,12 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
model_dir = os.path.dirname(args.model_path)
model_file_name = os.path.basename(args.model_path)
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
- print "Init model from: %s." % args.model_path
+ print("Init model from: %s." % args.model_path)
batch_times = []
iters = 0
for data in infer_reader():
+ feed_dict = get_feeder_data(data, place)
if args.iterations > 0 and iters == args.iterations + args.skip_batch_num:
break
if iters < args.skip_batch_num:
@@ -77,26 +89,25 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
start = time.time()
result = exe.run(fluid.default_main_program(),
- feed=get_feeder_data(
- data, place, need_label=False),
- fetch_list=[sequence],
+ feed=feed_dict,
+ fetch_list=[ids],
return_numpy=False)
+ indexes = prune(np.array(result[0]).flatten(), 0, 1)
batch_time = time.time() - start
fps = args.batch_size / batch_time
batch_times.append(batch_time)
- indexes = np.array(result[0]).flatten()
if dict_map is not None:
- print "Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
+ print("Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
iters,
batch_time,
fps,
- [dict_map[index] for index in indexes], )
+ [dict_map[index] for index in indexes], ))
else:
- print "Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
+ print("Iteration %d, latency: %.5f s, fps: %f, result: %s" % (
iters,
batch_time,
fps,
- indexes, )
+ indexes, ))
iters += 1
@@ -114,18 +125,29 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader):
print('average fps: %.5f, fps for 99pc latency: %.5f' % (fps_avg, fps_pc99))
+def prune(words, sos, eos):
+ """Remove unused tokens in prediction result."""
+ start_index = 0
+ end_index = len(words)
+ if sos in words:
+ start_index = np.where(words == sos)[0][0] + 1
+ if eos in words:
+ end_index = np.where(words == eos)[0][0]
+ return words[start_index:end_index]
+
+
def main():
args = parser.parse_args()
print_arguments(args)
if args.profile:
if args.use_gpu:
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
- inference(args, data_reader=ctc_reader)
+ inference(args)
else:
with profiler.profiler("CPU", sorted_key='total') as cpuprof:
- inference(args, data_reader=ctc_reader)
+ inference(args)
else:
- inference(args, data_reader=ctc_reader)
+ inference(args)
if __name__ == "__main__":
diff --git a/fluid/ocr_recognition/ctc_train.py b/fluid/ocr_recognition/train.py
old mode 100644
new mode 100755
similarity index 82%
rename from fluid/ocr_recognition/ctc_train.py
rename to fluid/ocr_recognition/train.py
index a1cb52993500322c11d80ede732156d376fbed88..7954d23dc02c93159315e4220ec2db0289fddb44
--- a/fluid/ocr_recognition/ctc_train.py
+++ b/fluid/ocr_recognition/train.py
@@ -1,9 +1,13 @@
-"""Trainer for OCR CTC model."""
+"""Trainer for OCR CTC or attention model."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
import paddle.fluid as fluid
+from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_data
import paddle.fluid.profiler as profiler
-from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from crnn_ctc_model import ctc_train_net
-import ctc_reader
+from attention_model import attention_train_net
+import data_reader
import argparse
import functools
import sys
@@ -20,6 +24,7 @@ add_arg('log_period', int, 1000, "Log period.")
add_arg('save_model_period', int, 15000, "Save model period. '-1' means never saving the model.")
add_arg('eval_period', int, 15000, "Evaluate period. '-1' means never evaluating the model.")
add_arg('save_model_dir', str, "./models", "The directory the model to be saved to.")
+add_arg('model', str, "crnn_ctc", "Which type of network to be used. 'crnn_ctc' or 'attention'")
add_arg('init_model', str, None, "The init model file of directory.")
add_arg('use_gpu', bool, True, "Whether use GPU to train.")
add_arg('min_average_window',int, 10000, "Min average window.")
@@ -32,8 +37,16 @@ add_arg('skip_test', bool, False, "Whether to skip test phase.")
# yapf: enable
-def train(args, data_reader=ctc_reader):
- """OCR CTC training"""
+def train(args):
+ """OCR training"""
+
+ if args.model == "crnn_ctc":
+ train_net = ctc_train_net
+ get_feeder_data = get_ctc_feeder_data
+ else:
+ train_net = attention_train_net
+ get_feeder_data = get_attention_feeder_data
+
num_classes = None
train_images = None
train_list = None
@@ -43,20 +56,18 @@ def train(args, data_reader=ctc_reader):
) if num_classes is None else num_classes
data_shape = data_reader.data_shape()
# define network
- images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
- label = fluid.layers.data(
- name='label', shape=[1], dtype='int32', lod_level=1)
- sum_cost, error_evaluator, inference_program, model_average = ctc_train_net(
- images, label, args, num_classes)
+ sum_cost, error_evaluator, inference_program, model_average = train_net(
+ args, data_shape, num_classes)
# data reader
train_reader = data_reader.train(
args.batch_size,
train_images_dir=train_images,
train_list_file=train_list,
- cycle=args.total_step > 0)
+ cycle=args.total_step > 0,
+ model=args.model)
test_reader = data_reader.test(
- test_images_dir=test_images, test_list_file=test_list)
+ test_images_dir=test_images, test_list_file=test_list, model=args.model)
# prepare environment
place = fluid.CPUPlace()
@@ -77,7 +88,7 @@ def train(args, data_reader=ctc_reader):
model_dir = os.path.dirname(args.init_model)
model_file_name = os.path.basename(args.init_model)
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
- print "Init model from: %s." % args.init_model
+ print("Init model from: %s." % args.init_model)
train_exe = exe
error_evaluator.reset(exe)
@@ -104,18 +115,18 @@ def train(args, data_reader=ctc_reader):
for data in test_reader():
exe.run(inference_program, feed=get_feeder_data(data, place))
_, test_seq_error = error_evaluator.eval(exe)
- print "\nTime: %s; Iter[%d]; Test seq error: %s.\n" % (
- time.time(), iter_num, str(test_seq_error[0]))
+ print("\nTime: %s; Iter[%d]; Test seq error: %s.\n" % (
+ time.time(), iter_num, str(test_seq_error[0])))
#Note: The following logs are special for CE monitoring.
#Other situations do not need to care about these logs.
- print "kpis test_acc %f" % (1 - test_seq_error[0])
+ print("kpis test_acc %f" % (1 - test_seq_error[0]))
def save_model(args, exe, iter_num):
filename = "model_%05d" % iter_num
fluid.io.save_params(
exe, dirname=args.save_model_dir, filename=filename)
- print "Saved model to: %s/%s." % (args.save_model_dir, filename)
+ print("Saved model to: %s/%s." % (args.save_model_dir, filename))
iter_num = 0
stop = False
@@ -144,18 +155,18 @@ def train(args, data_reader=ctc_reader):
iter_num += 1
# training log
if iter_num % args.log_period == 0:
- print "\nTime: %s; Iter[%d]; Avg Warp-CTC loss: %.3f; Avg seq err: %.3f" % (
+ print("\nTime: %s; Iter[%d]; Avg loss: %.3f; Avg seq err: %.3f" % (
time.time(), iter_num,
total_loss / (args.log_period * args.batch_size),
- total_seq_error / (args.log_period * args.batch_size))
- print "kpis train_cost %f" % (total_loss / (args.log_period *
- args.batch_size))
- print "kpis train_acc %f" % (
- 1 - total_seq_error / (args.log_period * args.batch_size))
+ total_seq_error / (args.log_period * args.batch_size)))
+ print("kpis train_cost %f" % (total_loss / (args.log_period *
+ args.batch_size)))
+ print("kpis train_acc %f" % (
+ 1 - total_seq_error / (args.log_period * args.batch_size)))
total_loss = 0.0
total_seq_error = 0.0
-# evaluate
+ # evaluate
if not args.skip_test and iter_num % args.eval_period == 0:
if model_average:
with model_average.apply(exe):
@@ -171,7 +182,7 @@ def train(args, data_reader=ctc_reader):
else:
save_model(args, exe, iter_num)
end_time = time.time()
- print "kpis train_duration %f" % (end_time - start_time)
+ print("kpis train_duration %f" % (end_time - start_time))
# Postprocess benchmark data
latencies = batch_times[args.skip_batch_num:]
latency_avg = np.average(latencies)
@@ -195,12 +206,12 @@ def main():
if args.profile:
if args.use_gpu:
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
- train(args, data_reader=ctc_reader)
+ train(args)
else:
with profiler.profiler("CPU", sorted_key='total') as cpuprof:
- train(args, data_reader=ctc_reader)
+ train(args)
else:
- train(args, data_reader=ctc_reader)
+ train(args)
if __name__ == "__main__":
diff --git a/fluid/ocr_recognition/utility.py b/fluid/ocr_recognition/utility.py
old mode 100644
new mode 100755
index 67a5bfa018bad5a4d69ba9d0d3cb63ff59214775..fb8d066c7a389b9fc3356025edbae4e6c0aa5720
--- a/fluid/ocr_recognition/utility.py
+++ b/fluid/ocr_recognition/utility.py
@@ -19,6 +19,8 @@ from __future__ import print_function
import distutils.util
import numpy as np
from paddle.fluid import core
+import paddle.fluid as fluid
+import six
def print_arguments(args):
@@ -37,7 +39,7 @@ def print_arguments(args):
:type args: argparse.Namespace
"""
print("----------- Configuration Arguments -----------")
- for arg, value in sorted(vars(args).iteritems()):
+ for arg, value in sorted(six.iteritems(vars(args))):
print("%s: %s" % (arg, value))
print("------------------------------------------------")
@@ -77,14 +79,58 @@ def to_lodtensor(data, place):
return res
-def get_feeder_data(data, place, need_label=True):
+def get_ctc_feeder_data(data, place, need_label=True):
pixel_tensor = core.LoDTensor()
pixel_data = None
pixel_data = np.concatenate(
- map(lambda x: x[0][np.newaxis, :], data), axis=0).astype("float32")
+ list(map(lambda x: x[0][np.newaxis, :], data)), axis=0).astype("float32")
pixel_tensor.set(pixel_data, place)
- label_tensor = to_lodtensor(map(lambda x: x[1], data), place)
+ label_tensor = to_lodtensor(list(map(lambda x: x[1], data)), place)
if need_label:
return {"pixel": pixel_tensor, "label": label_tensor}
else:
return {"pixel": pixel_tensor}
+
+
+def get_attention_feeder_data(data, place, need_label=True):
+ pixel_tensor = core.LoDTensor()
+ pixel_data = None
+ pixel_data = np.concatenate(
+ list(map(lambda x: x[0][np.newaxis, :], data)), axis=0).astype("float32")
+ pixel_tensor.set(pixel_data, place)
+ label_in_tensor = to_lodtensor(list(map(lambda x: x[1], data)), place)
+ label_out_tensor = to_lodtensor(list(map(lambda x: x[2], data)), place)
+ if need_label:
+ return {
+ "pixel": pixel_tensor,
+ "label_in": label_in_tensor,
+ "label_out": label_out_tensor
+ }
+ else:
+ return {"pixel": pixel_tensor}
+
+
+def get_attention_feeder_for_infer(data, place):
+ batch_size = len(data)
+ init_ids_data = np.array([0 for _ in range(batch_size)], dtype='int64')
+ init_scores_data = np.array(
+ [1. for _ in range(batch_size)], dtype='float32')
+ init_ids_data = init_ids_data.reshape((batch_size, 1))
+ init_scores_data = init_scores_data.reshape((batch_size, 1))
+ init_recursive_seq_lens = [1] * batch_size
+ init_recursive_seq_lens = [init_recursive_seq_lens, init_recursive_seq_lens]
+ init_ids = fluid.create_lod_tensor(init_ids_data, init_recursive_seq_lens,
+ place)
+ init_scores = fluid.create_lod_tensor(init_scores_data,
+ init_recursive_seq_lens, place)
+
+ pixel_tensor = core.LoDTensor()
+ pixel_data = None
+ pixel_data = np.concatenate(
+ list(map(lambda x: x[0][np.newaxis, :], data)), axis=0).astype("float32")
+ pixel_tensor.set(pixel_data, place)
+ return {
+ "pixel": pixel_tensor,
+ "init_ids": init_ids,
+ "init_scores": init_scores
+ }
diff --git a/fluid/sequence_tagging_for_ner/_ce.py b/fluid/sequence_tagging_for_ner/_ce.py
index 111a4d566b1cb69543bd7747cd76568f7de4b94c..8afd863032b374f76a644dea15af7999736a0f59 100644
--- a/fluid/sequence_tagging_for_ner/_ce.py
+++ b/fluid/sequence_tagging_for_ner/_ce.py
@@ -7,8 +7,8 @@ from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
-train_acc_kpi = AccKpi('train_precision', 0.005, actived=True)
-test_acc_kpi = CostKpi('test_precision', 0.005, actived=True)
+train_acc_kpi = AccKpi('train_precision', 0.005, actived=False)
+test_acc_kpi = CostKpi('test_precision', 0.005, actived=False)
train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True)
tracking_kpis = [
diff --git a/fluid/text_classification/_ce.py b/fluid/text_classification/_ce.py
index 100357204db7f3a8d0c1d3cbcbdc707410b20023..6c0b1ac428d21145ab9f89ef134614b43c3db3e9 100644
--- a/fluid/text_classification/_ce.py
+++ b/fluid/text_classification/_ce.py
@@ -8,7 +8,7 @@ from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_acc_kpi = AccKpi('train_acc', 0.005, actived=True)
-train_cost_kpi = CostKpi('train_cost', 0.005, actived=True)
+train_cost_kpi = CostKpi('train_cost', 0.005, actived=False)
train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True)
tracking_kpis = [
diff --git a/fluid/text_classification/clouds/scdb_parallel_executor.py b/fluid/text_classification/clouds/scdb_parallel_executor.py
index 9d7722e9776d11c591f1ff0bd97b3e295205d300..cc5cd4ee9f9c86a0ed3f7c27e482026d6dbf7a13 100644
--- a/fluid/text_classification/clouds/scdb_parallel_executor.py
+++ b/fluid/text_classification/clouds/scdb_parallel_executor.py
@@ -3,6 +3,7 @@ import contextlib
import paddle
import paddle.fluid as fluid
import numpy as np
+import six
import sys
import time
import os
@@ -46,8 +47,8 @@ def data2tensor(data, place):
"""
data2tensor
"""
- input_seq = to_lodtensor(map(lambda x: x[0], data), place)
- y_data = np.array(map(lambda x: x[1], data)).astype("int64")
+ input_seq = to_lodtensor([x[0] for x in data], place)
+ y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data}
@@ -56,8 +57,8 @@ def data2pred(data, place):
"""
data2tensor
"""
- input_seq = to_lodtensor(map(lambda x: x[0], data), place)
- y_data = np.array(map(lambda x: x[1], data)).astype("int64")
+ input_seq = to_lodtensor([x[0] for x in data], place)
+ y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1])
return {"words": input_seq}
@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab):
Save dict into file
"""
with open(vocab, "w") as fout:
- for k, v in word_dict.iteritems():
+ for k, v in six.iteritems(word_dict):
outstr = ("%s\t%s\n" % (k, v)).encode("gb18030")
fout.write(outstr)
@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg",
def scdb_test_data(test_file, w_dict):
"""
- test_set=["car", "lbs", "spot", "weibo",
+ test_set=["car", "lbs", "spot", "weibo",
"baby", "toutiao", "3c", "movie", "haogan"]
"""
return data_reader(test_file, w_dict)
@@ -424,7 +425,7 @@ def start_train(train_reader,
start_exe.run(fluid.default_startup_program())
exe = fluid.ParallelExecutor(use_cuda, loss_name=cost.name)
- for pass_id in xrange(pass_num):
+ for pass_id in six.moves.xrange(pass_num):
total_acc, total_cost, total_count, avg_cost, avg_acc = 0.0, 0.0, 0.0, 0.0, 0.0
for data in train_reader():
cost_val, acc_val = exe.run(feed=feeder.feed(data),
@@ -452,7 +453,7 @@ def train_net(vocab="./thirdparty/train.vocab",
"""
w_dict = scdb_word_dict(vocab=vocab)
test_files = [ "./thirdparty" + os.sep + f for f in test_list]
-
+
train_reader = paddle.batch(
scdb_train_data(train_dir, w_dict),
batch_size = 256)
diff --git a/fluid/text_classification/clouds/scdb_single_card.py b/fluid/text_classification/clouds/scdb_single_card.py
index 9cc39269913ab97341e5389b31ad9a5da2e8af51..3690e92776636d8a7c8cef0d9cd4d72414b7a628 100644
--- a/fluid/text_classification/clouds/scdb_single_card.py
+++ b/fluid/text_classification/clouds/scdb_single_card.py
@@ -3,6 +3,7 @@ import contextlib
import paddle
import paddle.fluid as fluid
import numpy as np
+import six
import sys
import time
import os
@@ -46,8 +47,8 @@ def data2tensor(data, place):
"""
data2tensor
"""
- input_seq = to_lodtensor(map(lambda x: x[0], data), place)
- y_data = np.array(map(lambda x: x[1], data)).astype("int64")
+ input_seq = to_lodtensor([x[0] for x in data], place)
+ y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data}
@@ -56,8 +57,8 @@ def data2pred(data, place):
"""
data2tensor
"""
- input_seq = to_lodtensor(map(lambda x: x[0], data), place)
- y_data = np.array(map(lambda x: x[1], data)).astype("int64")
+ input_seq = to_lodtensor([x[0] for x in data], place)
+ y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1])
return {"words": input_seq}
@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab):
Save dict into file
"""
with open(vocab, "w") as fout:
- for k, v in word_dict.iteritems():
+ for k, v in six.iteritems(word_dict):
outstr = ("%s\t%s\n" % (k, v)).encode("gb18030")
fout.write(outstr)
@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg",
def scdb_test_data(test_file, w_dict):
"""
- test_set=["car", "lbs", "spot", "weibo",
+ test_set=["car", "lbs", "spot", "weibo",
"baby", "toutiao", "3c", "movie", "haogan"]
"""
return data_reader(test_file, w_dict)
@@ -422,7 +423,7 @@ def start_train(train_reader,
feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
exe.run(fluid.default_startup_program())
- for pass_id in xrange(pass_num):
+ for pass_id in six.moves.xrange(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for data in train_reader():
avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(),
diff --git a/fluid/text_classification/train.py b/fluid/text_classification/train.py
index 0ef923544f18d62f5a049ac79353ce4884b8d195..b2ffe4c6723120103c9b3e310b070f4c773aeeb4 100644
--- a/fluid/text_classification/train.py
+++ b/fluid/text_classification/train.py
@@ -1,4 +1,5 @@
import os
+import six
import sys
import time
import unittest
@@ -58,7 +59,7 @@ def train(train_reader,
if "CE_MODE_X" in os.environ:
fluid.default_startup_program().random_seed = 110
exe.run(fluid.default_startup_program())
- for pass_id in xrange(pass_num):
+ for pass_id in six.moves.xrange(pass_num):
pass_start = time.time()
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for data in train_reader():
diff --git a/fluid/text_classification/utils.py b/fluid/text_classification/utils.py
index 3673946b6f39eade1811dfc4d81c99b0ef9400bb..dce4743d9219aa9ed5ca78b9f690eb1366d92304 100644
--- a/fluid/text_classification/utils.py
+++ b/fluid/text_classification/utils.py
@@ -43,8 +43,8 @@ def data2tensor(data, place):
"""
data2tensor
"""
- input_seq = to_lodtensor(map(lambda x: x[0], data), place)
- y_data = np.array(map(lambda x: x[1], data)).astype("int64")
+ input_seq = to_lodtensor([x[0] for x in data], place)
+ y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data}