diff --git a/fluid/ocr_recognition/crnn_ctc_model.py b/fluid/ocr_recognition/crnn_ctc_model.py index 719c0158ec0e28c46a2915e42bd81533f848673c..73616ecb36ca2661eb8e4898caf34fc2d91b9bdc 100644 --- a/fluid/ocr_recognition/crnn_ctc_model.py +++ b/fluid/ocr_recognition/crnn_ctc_model.py @@ -26,7 +26,12 @@ def conv_bn_pool(input, bias_attr=bias, is_test=is_test) tmp = fluid.layers.pool2d( - input=tmp, pool_size=2, pool_type='max', pool_stride=2, use_cudnn=True) + input=tmp, + pool_size=2, + pool_type='max', + pool_stride=2, + use_cudnn=True, + ceil_mode=True) return tmp @@ -136,26 +141,61 @@ def encoder_net(images, def ctc_train_net(images, label, args, num_classes): regularizer = fluid.regularizer.L2Decay(args.l2) gradient_clip = None - fc_out = encoder_net( - images, - num_classes, - regularizer=regularizer, - gradient_clip=gradient_clip) + if args.parallel: + places = fluid.layers.get_places() + pd = fluid.layers.ParallelDo(places) + with pd.do(): + images_ = pd.read_input(images) + label_ = pd.read_input(label) + + fc_out = encoder_net( + images_, + num_classes, + regularizer=regularizer, + gradient_clip=gradient_clip) + + cost = fluid.layers.warpctc( + input=fc_out, + label=label_, + blank=num_classes, + norm_by_times=True) + sum_cost = fluid.layers.reduce_sum(cost) + + decoded_out = fluid.layers.ctc_greedy_decoder( + input=fc_out, blank=num_classes) + + pd.write_output(sum_cost) + pd.write_output(decoded_out) + + sum_cost, decoded_out = pd() + sum_cost = fluid.layers.reduce_sum(sum_cost) + + else: + fc_out = encoder_net( + images, + num_classes, + regularizer=regularizer, + gradient_clip=gradient_clip) + + cost = fluid.layers.warpctc( + input=fc_out, label=label, blank=num_classes, norm_by_times=True) + sum_cost = fluid.layers.reduce_sum(cost) + decoded_out = fluid.layers.ctc_greedy_decoder( + input=fc_out, blank=num_classes) - cost = fluid.layers.warpctc( - input=fc_out, label=label, blank=num_classes, norm_by_times=True) - sum_cost = fluid.layers.reduce_sum(cost) + casted_label = fluid.layers.cast(x=label, dtype='int64') + error_evaluator = fluid.evaluator.EditDistance( + input=decoded_out, label=casted_label) + + inference_program = fluid.default_main_program().clone() + with fluid.program_guard(inference_program): + inference_program = fluid.io.get_inference_program(error_evaluator) optimizer = fluid.optimizer.Momentum( learning_rate=args.learning_rate, momentum=args.momentum) - optimizer.minimize(sum_cost) + _, params_grads = optimizer.minimize(sum_cost) - decoded_out = fluid.layers.ctc_greedy_decoder( - input=fc_out, blank=num_classes) - casted_label = fluid.layers.cast(x=label, dtype='int64') - error_evaluator = fluid.evaluator.EditDistance( - input=decoded_out, label=casted_label) - return sum_cost, error_evaluator + return sum_cost, error_evaluator, inference_program def ctc_infer(images, num_classes): diff --git a/fluid/ocr_recognition/ctc_train.py b/fluid/ocr_recognition/ctc_train.py index 85b1d2e708f73d7ac049af276626a38e76d19399..c2d8fd26bbdeb3ad5c9fb2c1ade3b2b22a0dfd44 100644 --- a/fluid/ocr_recognition/ctc_train.py +++ b/fluid/ocr_recognition/ctc_train.py @@ -1,5 +1,4 @@ """Trainer for OCR CTC model.""" -import paddle.v2 as paddle import paddle.fluid as fluid import dummy_reader import ctc_reader @@ -24,12 +23,12 @@ add_arg('momentum', float, 0.9, "Momentum.") add_arg('rnn_hidden_size',int, 200, "Hidden size of rnn layers.") add_arg('device', int, 0, "Device id.'-1' means running on CPU" "while '0' means GPU-0.") +add_arg('parallel', bool, True, "Whether use parallel training.") # yapf: disable def load_parameter(place): params = load_param('./name.map', './data/model/results_without_avg_window/pass-00000/') for name in params: - # print "param: %s" % name t = fluid.global_scope().find_var(name).get_tensor() t.set(params[name], place) @@ -41,7 +40,8 @@ def train(args, data_reader=dummy_reader): # define network images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int32', lod_level=1) - sum_cost, error_evaluator = ctc_train_net(images, label, args, num_classes) + sum_cost, error_evaluator, inference_program = ctc_train_net(images, label, args, num_classes) + # data reader train_reader = data_reader.train(args.batch_size) test_reader = data_reader.test() @@ -51,11 +51,8 @@ def train(args, data_reader=dummy_reader): place = fluid.CUDAPlace(args.device) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - #load_parameter(place) - inference_program = fluid.io.get_inference_program(error_evaluator) - for pass_id in range(args.pass_num): error_evaluator.reset(exe) batch_id = 1 @@ -78,7 +75,6 @@ def train(args, data_reader=dummy_reader): sys.stdout.flush() batch_id += 1 - # evaluate model on test data error_evaluator.reset(exe) for data in test_reader(): exe.run(inference_program, feed=get_feeder_data(data, place))