checkpoint.py 5.8 KB
Newer Older
W
wangguanzhong 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16 17 18 19
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

W
wangguanzhong 已提交
20
import errno
21
import os
22
import time
23
import re
F
FDInSky 已提交
24
import numpy as np
W
wangguanzhong 已提交
25
import paddle
26
from .download import get_weights_path
K
Kaipeng Deng 已提交
27 28 29

from .logger import setup_logger
logger = setup_logger(__name__)
30 31


W
wangguanzhong 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
def is_url(path):
    """
    Whether path is URL.
    Args:
        path (string): URL string or not.
    """
    return path.startswith('http://') or path.startswith('https://')


def get_weight_path(path):
    env = os.environ
    if 'PADDLE_TRAINERS_NUM' in env and 'PADDLE_TRAINER_ID' in env:
        trainer_id = int(env['PADDLE_TRAINER_ID'])
        num_trainers = int(env['PADDLE_TRAINERS_NUM'])
        if num_trainers <= 1:
F
FDInSky 已提交
47
            path = get_weights_path(path)
W
wangguanzhong 已提交
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
        else:
            from ppdet.utils.download import map_path, WEIGHTS_HOME
            weight_path = map_path(path, WEIGHTS_HOME)
            lock_path = weight_path + '.lock'
            if not os.path.exists(weight_path):
                try:
                    os.makedirs(os.path.dirname(weight_path))
                except OSError as e:
                    if e.errno != errno.EEXIST:
                        raise
                with open(lock_path, 'w'):  # touch    
                    os.utime(lock_path, None)
                if trainer_id == 0:
                    get_weights_path(path)
                    os.remove(lock_path)
                else:
                    while os.path.exists(lock_path):
                        time.sleep(1)
            path = weight_path
    else:
        path = get_weights_path(path)

    return path

72

W
wangguanzhong 已提交
73 74 75 76
def _strip_postfix(path):
    path, ext = os.path.splitext(path)
    assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \
            "Unknown postfix {} from weights".format(ext)
K
Kaipeng Deng 已提交
77 78 79
    return path


W
wangguanzhong 已提交
80 81 82 83 84 85 86 87 88 89 90 91 92 93
def load_weight(model, weight, optimizer=None):
    if is_url(weight):
        weight = get_weight_path(weight)

    path = _strip_postfix(weight)
    pdparam_path = path + '.pdparams'
    if not os.path.exists(pdparam_path):
        raise ValueError("Model pretrain path {} does not "
                         "exists.".format(pdparam_path))

    param_state_dict = paddle.load(pdparam_path)
    model.set_dict(param_state_dict)

    if optimizer is not None and os.path.exists(path + '.pdopt'):
94
        last_epoch = 0
W
wangguanzhong 已提交
95
        optim_state_dict = paddle.load(path + '.pdopt')
96 97 98 99
        # to slove resume bug, will it be fixed in paddle 2.0
        for key in optimizer.state_dict().keys():
            if not key in optim_state_dict.keys():
                optim_state_dict[key] = optimizer.state_dict()[key]
100 101
        if 'last_epoch' in optim_state_dict:
            last_epoch = optim_state_dict.pop('last_epoch')
W
wangguanzhong 已提交
102
        optimizer.set_state_dict(optim_state_dict)
103
        return last_epoch
W
wangguanzhong 已提交
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
    return


def load_pretrain_weight(model,
                         pretrain_weight,
                         load_static_weights=False,
                         weight_type='pretrain'):
    assert weight_type in ['pretrain', 'finetune']
    if is_url(pretrain_weight):
        pretrain_weight = get_weight_path(pretrain_weight)

    path = _strip_postfix(pretrain_weight)
    if not (os.path.isdir(path) or os.path.isfile(path) or
            os.path.exists(path + '.pdparams')):
        raise ValueError("Model pretrain path {} does not "
                         "exists.".format(path))

    model_dict = model.state_dict()
F
FDInSky 已提交
122

123
    if load_static_weights:
W
wangguanzhong 已提交
124
        pre_state_dict = paddle.static.load_program_state(path)
125 126 127 128
        param_state_dict = {}
        for key in model_dict.keys():
            weight_name = model_dict[key].name
            if weight_name in pre_state_dict.keys():
W
wangguanzhong 已提交
129
                logger.info('Load weight: {}, shape: {}'.format(
130 131 132 133 134
                    weight_name, pre_state_dict[weight_name].shape))
                param_state_dict[key] = pre_state_dict[weight_name]
            else:
                param_state_dict[key] = model_dict[key]
        model.set_dict(param_state_dict)
W
wangguanzhong 已提交
135
        return
F
FDInSky 已提交
136

W
wangguanzhong 已提交
137 138
    param_state_dict = paddle.load(path + '.pdparams')
    if weight_type == 'pretrain':
139 140
        model.backbone.set_dict(param_state_dict)
    else:
W
wangguanzhong 已提交
141 142 143 144 145
        ignore_set = set()
        for name, weight in model_dict:
            if name in param_state_dict:
                if weight.shape != param_state_dict[name].shape:
                    param_state_dict.pop(name, None)
146
        model.set_dict(param_state_dict)
W
wangguanzhong 已提交
147
    return
F
FDInSky 已提交
148 149


150
def save_model(model, optimizer, save_dir, save_name, last_epoch):
Q
qingqing01 已提交
151 152 153 154 155 156 157 158 159 160 161
    """
    save model into disk.
    
    Args:
        model (paddle.nn.Layer): the Layer instalce to save parameters.
        optimizer (paddle.optimizer.Optimizer): the Optimizer instance to
            save optimizer states.
        save_dir (str): the directory to be saved.
        save_name (str): the path to be saved.
        last_epoch (int): the epoch index.
    """
F
FDInSky 已提交
162 163
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
W
wangguanzhong 已提交
164
    save_path = os.path.join(save_dir, save_name)
W
wangguanzhong 已提交
165
    paddle.save(model.state_dict(), save_path + ".pdparams")
166 167 168
    state_dict = optimizer.state_dict()
    state_dict['last_epoch'] = last_epoch
    paddle.save(state_dict, save_path + ".pdopt")
W
wangguanzhong 已提交
169
    logger.info("Save checkpoint: {}".format(save_dir))