# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function import os import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import paddle.fluid.profiler as profiler import paddle.fluid as fluid import data_reader from paddle.fluid.dygraph.base import to_variable import argparse import functools from utility import add_arguments, print_arguments, get_attention_feeder_data from model import Input, set_device from nets import OCRAttention, CrossEntropyCriterion from eval import evaluate parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable add_arg('batch_size', int, 32, "Minibatch size.") add_arg('epoch_num', int, 30, "Epoch number.") add_arg('lr', float, 0.001, "Learning rate.") add_arg('lr_decay_strategy', str, "", "Learning rate decay strategy.") add_arg('log_period', int, 200, "Log period.") add_arg('save_model_period', int, 2000, "Save model period. '-1' means never saving the model.") add_arg('eval_period', int, 2000, "Evaluate period. '-1' means never evaluating the model.") add_arg('save_model_dir', str, "./output", "The directory the model to be saved to.") add_arg('train_images', str, None, "The directory of images to be used for training.") add_arg('train_list', str, None, "The list file of images to be used for training.") add_arg('test_images', str, None, "The directory of images to be used for test.") add_arg('test_list', str, None, "The list file of images to be used for training.") add_arg('init_model', str, None, "The init model file of directory.") add_arg('use_gpu', bool, True, "Whether use GPU to train.") add_arg('parallel', bool, False, "Whether use parallel training.") add_arg('profile', bool, False, "Whether to use profiling.") add_arg('skip_batch_num', int, 0, "The number of first minibatches to skip as warm-up for better performance test.") add_arg('skip_test', bool, False, "Whether to skip test phase.") # model hyper paramters add_arg('encoder_size', int, 200, "Encoder size.") add_arg('decoder_size', int, 128, "Decoder size.") add_arg('word_vector_dim', int, 128, "Word vector dim.") add_arg('num_classes', int, 95, "Number classes.") add_arg('gradient_clip', float, 5.0, "Gradient clip value.") add_arg('dynamic', bool, False, "Whether to use dygraph.") def train(args): device = set_device("gpu" if args.use_gpu else "cpu") fluid.enable_dygraph(device) if args.dynamic else None ocr_attention = OCRAttention(encoder_size=args.encoder_size, decoder_size=args.decoder_size, num_classes=args.num_classes, word_vector_dim=args.word_vector_dim) LR = args.lr if args.lr_decay_strategy == "piecewise_decay": learning_rate = fluid.layers.piecewise_decay([200000, 250000], [LR, LR * 0.1, LR * 0.01]) else: learning_rate = LR optimizer = fluid.optimizer.Adam(learning_rate=learning_rate, parameter_list=ocr_attention.parameters()) # grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(args.gradient_clip) inputs = [ Input([None, 1, 48, 384], "float32", name="pixel"), Input([None, None], "int64", name="label_in"), ] labels = [ Input([None, None], "int64", name="label_out"), Input([None, None], "float32", name="mask")] ocr_attention.prepare(optimizer, CrossEntropyCriterion(), inputs=inputs, labels=labels) train_reader = data_reader.data_reader( args.batch_size, shuffle=True, images_dir=args.train_images, list_file=args.train_list, data_type='train') # test_reader = data_reader.data_reader( # args.batch_size, # images_dir=args.test_images, # list_file=args.test_list, # data_type="test") # if not os.path.exists(args.save_model_dir): # os.makedirs(args.save_model_dir) total_step = 0 epoch_num = args.epoch_num for epoch in range(epoch_num): batch_id = 0 total_loss = 0.0 for data in train_reader(): total_step += 1 data_dict = get_attention_feeder_data(data) pixel = data_dict["pixel"] label_in = data_dict["label_in"].reshape([pixel.shape[0], -1]) label_out = data_dict["label_out"].reshape([pixel.shape[0], -1]) mask = data_dict["mask"].reshape(label_out.shape).astype("float32") avg_loss = ocr_attention.train(inputs=[pixel, label_in], labels=[label_out, mask])[0] total_loss += avg_loss if True:#batch_id > 0 and batch_id % args.log_period == 0: print("epoch: {}, batch_id: {}, loss {}".format(epoch, batch_id, total_loss / args.batch_size / args.log_period)) total_loss = 0.0 batch_id += 1 if __name__ == '__main__': args = parser.parse_args() print_arguments(args) if args.profile: if args.use_gpu: with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: train(args) else: with profiler.profiler("CPU", sorted_key='total') as cpuprof: train(args) else: train(args)