# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function import os import paddle.fluid.profiler as profiler import paddle.fluid as fluid import data_reader from paddle.fluid.dygraph.base import to_variable import argparse import functools from utility import add_arguments, print_arguments, get_attention_feeder_data from nets import OCRAttention from eval import evaluate parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable add_arg('batch_size', int, 32, "Minibatch size.") add_arg('epoch_num', int, 30, "Epoch number.") add_arg('lr', float, 0.001, "Learning rate.") add_arg('lr_decay_strategy', str, "", "Learning rate decay strategy.") add_arg('log_period', int, 200, "Log period.") add_arg('save_model_period', int, 2000, "Save model period. '-1' means never saving the model.") add_arg('eval_period', int, 2000, "Evaluate period. '-1' means never evaluating the model.") add_arg('save_model_dir', str, "./output", "The directory the model to be saved to.") add_arg('train_images', str, None, "The directory of images to be used for training.") add_arg('train_list', str, None, "The list file of images to be used for training.") add_arg('test_images', str, None, "The directory of images to be used for test.") add_arg('test_list', str, None, "The list file of images to be used for training.") add_arg('init_model', str, None, "The init model file of directory.") add_arg('use_gpu', bool, True, "Whether use GPU to train.") add_arg('parallel', bool, False, "Whether use parallel training.") add_arg('profile', bool, False, "Whether to use profiling.") add_arg('skip_batch_num', int, 0, "The number of first minibatches to skip as warm-up for better performance test.") add_arg('skip_test', bool, False, "Whether to skip test phase.") # model hyper paramters add_arg('encoder_size', int, 200, "Encoder size.") add_arg('decoder_size', int, 128, "Decoder size.") add_arg('word_vector_dim', int, 128, "Word vector dim.") add_arg('num_classes', int, 95, "Number classes.") add_arg('gradient_clip', float, 5.0, "Gradient clip value.") def train(args): with fluid.dygraph.guard(): backward_strategy = fluid.dygraph.BackwardStrategy() backward_strategy.sort_sum_gradient = True ocr_attention = OCRAttention(batch_size=args.batch_size, encoder_size=args.encoder_size, decoder_size=args.decoder_size, num_classes=args.num_classes, word_vector_dim=args.word_vector_dim) LR = args.lr if args.lr_decay_strategy == "piecewise_decay": learning_rate = fluid.layers.piecewise_decay([200000, 250000], [LR, LR * 0.1, LR * 0.01]) else: learning_rate = LR optimizer = fluid.optimizer.Adam(learning_rate=learning_rate, parameter_list=ocr_attention.parameters()) grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(args.gradient_clip) train_reader = data_reader.data_reader( args.batch_size, shuffle=True, images_dir=args.train_images, list_file=args.train_list, data_type='train') test_reader = data_reader.data_reader( args.batch_size, images_dir=args.test_images, list_file=args.test_list, data_type="test") if not os.path.exists(args.save_model_dir): os.makedirs(args.save_model_dir) total_step = 0 epoch_num = args.epoch_num for epoch in range(epoch_num): batch_id = 0 total_loss = 0.0 for data in train_reader(): total_step += 1 data_dict = get_attention_feeder_data(data) label_in = to_variable(data_dict["label_in"]) label_out = to_variable(data_dict["label_out"]) label_out.stop_gradient = True img = to_variable(data_dict["pixel"]) prediction = ocr_attention(img, label_in) prediction = fluid.layers.reshape( prediction, [label_out.shape[0] * label_out.shape[1], -1], inplace=False) label_out = fluid.layers.reshape(label_out, [-1, 1], inplace=False) loss = fluid.layers.cross_entropy( input=prediction, label=label_out) mask = to_variable(data_dict["mask"]) loss = fluid.layers.elementwise_mul( loss, mask, axis=0) avg_loss = fluid.layers.reduce_sum(loss) total_loss += avg_loss.numpy() avg_loss.backward() optimizer.minimize(avg_loss, grad_clip=grad_clip) ocr_attention.clear_gradients() if batch_id > 0 and batch_id % args.log_period == 0: print("epoch: {}, batch_id: {}, lr: {}, loss {}".format(epoch, batch_id, optimizer._global_learning_rate().numpy(), total_loss / args.batch_size / args.log_period)) total_loss = 0.0 if total_step > 0 and total_step % args.save_model_period == 0: if fluid.dygraph.parallel.Env().dev_id == 0: model_file = os.path.join(args.save_model_dir, 'step_{}'.format(total_step)) fluid.save_dygraph(ocr_attention.state_dict(), model_file) print('step_{}.pdparams saved!'.format(total_step)) if total_step > 0 and total_step % args.eval_period == 0: ocr_attention.eval() evaluate(ocr_attention, test_reader, args.batch_size) ocr_attention.train() batch_id += 1 if __name__ == '__main__': args = parser.parse_args() print_arguments(args) if args.profile: if args.use_gpu: with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: train(args) else: with profiler.profiler("CPU", sorted_key='total') as cpuprof: train(args) else: train(args)