train.py 4.4 KB
Newer Older
L
LDOUBLEV 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import sys
21
__dir__ = os.path.dirname(os.path.abspath(__file__))
L
LDOUBLEV 已提交
22
sys.path.append(__dir__)
23
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
L
LDOUBLEV 已提交
24 25 26 27 28 29 30 31 32 33


def set_paddle_flags(**kwargs):
    for key, value in kwargs.items():
        if os.environ.get(key, None) is None:
            os.environ[key] = str(value)


# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
T
tink2123 已提交
34
# not take any effect.
L
LDOUBLEV 已提交
35 36 37 38
set_paddle_flags(
    FLAGS_eager_delete_tensor_gb=0,  # enable GC to save memory
)

39
import tools.program as program
L
LDOUBLEV 已提交
40 41 42 43 44
from paddle import fluid
from ppocr.utils.utility import initial_logger
logger = initial_logger()
from ppocr.data.reader_main import reader_main
from ppocr.utils.save_load import init_model
农夫三拳_'s avatar
农夫三拳_ 已提交
45
from paddle.fluid.contrib.model_stat import summary
L
LDOUBLEV 已提交
46 47 48


def main():
49
    # build train program
L
LDOUBLEV 已提交
50 51 52 53 54 55
    train_build_outputs = program.build(
        config, train_program, startup_program, mode='train')
    train_loader = train_build_outputs[0]
    train_fetch_name_list = train_build_outputs[1]
    train_fetch_varname_list = train_build_outputs[2]
    train_opt_loss_name = train_build_outputs[3]
T
tink2123 已提交
56
    model_average = train_build_outputs[-1]
L
LDOUBLEV 已提交
57

58
    # build eval program
L
LDOUBLEV 已提交
59 60 61 62 63 64 65
    eval_program = fluid.Program()
    eval_build_outputs = program.build(
        config, eval_program, startup_program, mode='eval')
    eval_fetch_name_list = eval_build_outputs[1]
    eval_fetch_varname_list = eval_build_outputs[2]
    eval_program = eval_program.clone(for_test=True)

66
    # initialize train reader
L
LDOUBLEV 已提交
67 68 69
    train_reader = reader_main(config=config, mode="train")
    train_loader.set_sample_list_generator(train_reader, places=place)

70
    # initialize eval reader
L
LDOUBLEV 已提交
71 72 73 74 75 76 77 78
    eval_reader = reader_main(config=config, mode="eval")

    exe = fluid.Executor(place)
    exe.run(startup_program)

    # compile program for multi-devices
    train_compile_program = program.create_multi_devices_program(
        train_program, train_opt_loss_name)
农夫三拳_'s avatar
农夫三拳_ 已提交
79 80 81

    # dump mode structure
    if config['Global']['debug']:
W
WenmuZhou 已提交
82 83
        if train_alg_type == 'rec' and 'attention' in config['Global'][
                'loss_type']:
农夫三拳_'s avatar
农夫三拳_ 已提交
84 85 86 87
            logger.warning('Does not suport dump attention...')
        else:
            summary(train_program)

L
LDOUBLEV 已提交
88 89 90 91 92 93
    init_model(config, train_program, exe)

    train_info_dict = {'compile_program':train_compile_program,\
        'train_program':train_program,\
        'reader':train_loader,\
        'fetch_name_list':train_fetch_name_list,\
T
tink2123 已提交
94 95
        'fetch_varname_list':train_fetch_varname_list,\
        'model_average': model_average}
L
LDOUBLEV 已提交
96 97 98 99 100 101

    eval_info_dict = {'program':eval_program,\
        'reader':eval_reader,\
        'fetch_name_list':eval_fetch_name_list,\
        'fetch_varname_list':eval_fetch_varname_list}

S
shaohua.zhang 已提交
102
    if train_alg_type == 'det':
L
LDOUBLEV 已提交
103
        program.train_eval_det_run(config, exe, train_info_dict, eval_info_dict)
W
WenmuZhou 已提交
104
    elif train_alg_type == 'rec':
L
LDOUBLEV 已提交
105
        program.train_eval_rec_run(config, exe, train_info_dict, eval_info_dict)
W
WenmuZhou 已提交
106 107
    else:
        program.train_eval_cls_run(config, exe, train_info_dict, eval_info_dict)
L
LDOUBLEV 已提交
108 109


110
def test_reader():
littletomatodonkey's avatar
littletomatodonkey 已提交
111
    logger.info(config)
112 113 114 115 116 117 118 119 120 121
    train_reader = reader_main(config=config, mode="train")
    import time
    starttime = time.time()
    count = 0
    try:
        for data in train_reader():
            count += 1
            if count % 1 == 0:
                batch_time = time.time() - starttime
                starttime = time.time()
littletomatodonkey's avatar
littletomatodonkey 已提交
122
                logger.info("reader:", count, len(data), batch_time)
123
    except Exception as e:
L
LDOUBLEV 已提交
124 125
        logger.info(e)
    logger.info("finish reader: {}, Success!".format(count))
126 127


L
LDOUBLEV 已提交
128
if __name__ == '__main__':
W
WenmuZhou 已提交
129 130
    startup_program, train_program, place, config, train_alg_type = program.preprocess(
    )
L
LDOUBLEV 已提交
131 132
    main()
#     test_reader()