workspace.py 8.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division

import importlib
import os
import sys

import yaml
24
import copy
25
import collections
26

27 28 29 30 31
try:
    collectionsAbc = collections.abc
except AttributeError:
    collectionsAbc = collections

32
from .config.schema import SchemaDict, SharedConfig, extract_schema
33 34 35
from .config.yaml_helpers import serializable

__all__ = [
W
wangguanzhong 已提交
36 37 38 39 40 41 42 43
    'global_config',
    'load_config',
    'merge_config',
    'get_registered_modules',
    'create',
    'register',
    'serializable',
    'dump_value',
44 45 46
]


W
wangguanzhong 已提交
47 48 49 50 51 52 53 54 55 56 57 58
def dump_value(value):
    # XXX this is hackish, but collections.abc is not available in python 2
    if hasattr(value, '__dict__') or isinstance(value, (dict, tuple, list)):
        value = yaml.dump(value, default_flow_style=True)
        value = value.replace('\n', '')
        value = value.replace('...', '')
        return "'{}'".format(value)
    else:
        # primitive types
        return str(value)


59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
class AttrDict(dict):
    """Single level attribute dict, NOT recursive"""

    def __init__(self, **kwargs):
        super(AttrDict, self).__init__()
        super(AttrDict, self).update(kwargs)

    def __getattr__(self, key):
        if key in self:
            return self[key]
        raise AttributeError("object has no attribute '{}'".format(key))


global_config = AttrDict()

74 75
READER_KEY = '_READER_'

76 77 78 79 80 81 82 83 84 85 86 87

def load_config(file_path):
    """
    Load config from file.

    Args:
        file_path (str): Path of the config file to be loaded.

    Returns: global config
    """
    _, ext = os.path.splitext(file_path)
    assert ext in ['.yml', '.yaml'], "only support yaml files for now"
88 89

    cfg = AttrDict()
W
walloollaw 已提交
90
    with open(file_path) as f:
91 92 93 94 95 96 97 98 99 100 101 102 103 104
        cfg = merge_config(yaml.load(f, Loader=yaml.Loader), cfg)

    if READER_KEY in cfg:
        reader_cfg = cfg[READER_KEY]
        if reader_cfg.startswith("~"):
            reader_cfg = os.path.expanduser(reader_cfg)
        if not reader_cfg.startswith('/'):
            reader_cfg = os.path.join(os.path.dirname(file_path), reader_cfg)

        with open(reader_cfg) as f:
            merge_config(yaml.load(f, Loader=yaml.Loader))
        del cfg[READER_KEY]

    merge_config(cfg)
105

106 107 108
    return global_config


109 110 111 112 113 114 115 116 117 118 119 120 121 122
def dict_merge(dct, merge_dct):
    """ Recursive dict merge. Inspired by :meth:``dict.update()``, instead of
    updating only top-level keys, dict_merge recurses down into dicts nested
    to an arbitrary depth, updating keys. The ``merge_dct`` is merged into
    ``dct``.

    Args:
        dct: dict onto which the merge is executed
        merge_dct: dct merged into dct

    Returns: dct
    """
    for k, v in merge_dct.items():
        if (k in dct and isinstance(dct[k], dict) and
123
                isinstance(merge_dct[k], collectionsAbc.Mapping)):
124 125 126 127 128 129 130
            dict_merge(dct[k], merge_dct[k])
        else:
            dct[k] = merge_dct[k]
    return dct


def merge_config(config, another_cfg=None):
131
    """
132
    Merge config into global config or another_cfg.
133 134 135 136 137 138

    Args:
        config (dict): Config to be merged.

    Returns: global config
    """
139 140
    global global_config
    dct = another_cfg if another_cfg is not None else global_config
141 142 143 144 145 146
    dct = dict_merge(dct, config)

    # NOTE: training batch size defined only in TrainReader, sychornized
    #       batch size config to global, models can get batch size config
    #       from global config when building model.
    #       batch size in evaluation or inference can also be added here
147
    if 'TrainReader' in dct and 'batch_size' in dct['TrainReader']:
148 149 150
        dct['train_batch_size'] = dct['TrainReader']['batch_size']

    return dct
151 152 153 154 155 156 157


def get_registered_modules():
    return {k: v for k, v in global_config.items() if isinstance(v, SchemaDict)}


def make_partial(cls):
Y
Yang Zhang 已提交
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
    if isinstance(cls.__op__, str):
        sep = cls.__op__.split('.')
        op_name = sep[-1]
        op_module = importlib.import_module('.'.join(sep[:-1]))
    else:
        op_name = cls.__op__.__name__
        op_module = importlib.import_module(cls.__op__.__module__)

    if not hasattr(op_module, op_name):
        import logging
        logger = logging.getLogger(__name__)
        logger.warn('{} OP not found, maybe a newer version of paddle '
                    'is required.'.format(cls.__op__))
        return cls

    op = getattr(op_module, op_name)
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
    cls.__category__ = getattr(cls, '__category__', None) or 'op'

    def partial_apply(self, *args, **kwargs):
        kwargs_ = self.__dict__.copy()
        kwargs_.update(kwargs)
        return op(*args, **kwargs_)

    if getattr(cls, '__append_doc__', True):  # XXX should default to True?
        if sys.version_info[0] > 2:
            cls.__doc__ = "Wrapper for `{}` OP".format(op.__name__)
            cls.__init__.__doc__ = op.__doc__
            cls.__call__ = partial_apply
            cls.__call__.__doc__ = op.__doc__
        else:
            # XXX work around for python 2
            partial_apply.__doc__ = op.__doc__
            cls.__call__ = partial_apply
    return cls


def register(cls):
    """
    Register a given module class.

    Args:
        cls (type): Module class to be registered.

    Returns: cls
    """
    if cls.__name__ in global_config:
        raise ValueError("Module class already registered: {}".format(
            cls.__name__))
    if hasattr(cls, '__op__'):
        cls = make_partial(cls)
    global_config[cls.__name__] = extract_schema(cls)
    return cls


def create(cls_or_name, **kwargs):
    """
    Create an instance of given module class.

    Args:
        cls_or_name (type or str): Class of which to create instance.

    Returns: instance of type `cls_or_name`
    """
    assert type(cls_or_name) in [type, str
                                 ], "should be a class or name of a class"
    name = type(cls_or_name) == str and cls_or_name or cls_or_name.__name__
224 225
    assert name in global_config and \
        isinstance(global_config[name], SchemaDict), \
226 227 228 229 230 231 232 233
        "the module {} is not registered".format(name)
    config = global_config[name]
    config.update(kwargs)
    config.validate()
    cls = getattr(config.pymodule, name)

    kwargs = {}
    kwargs.update(global_config[name])
234 235 236 237 238 239 240

    # parse `shared` annoation of registered modules
    if getattr(config, 'shared', None):
        for k in config.shared:
            target_key = config[k]
            shared_conf = config.schema[k].default
            assert isinstance(shared_conf, SharedConfig)
W
wangguanzhong 已提交
241 242 243
            if target_key is not None and not isinstance(target_key,
                                                         SharedConfig):
                continue  # value is given for the module
244 245 246 247 248 249 250
            elif shared_conf.key in global_config:
                # `key` is present in config
                kwargs[k] = global_config[shared_conf.key]
            else:
                kwargs[k] = shared_conf.default_value

    # parse `inject` annoation of registered modules
251 252
    if getattr(config, 'inject', None):
        for k in config.inject:
253
            target_key = config[k]
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
            # optional dependency
            if target_key is None:
                continue
            # also accept dictionaries and serialized objects
            if isinstance(target_key, dict) or hasattr(target_key, '__dict__'):
                continue
            elif isinstance(target_key, str):
                if target_key not in global_config:
                    raise ValueError("Missing injection config:", target_key)
                target = global_config[target_key]
                if isinstance(target, SchemaDict):
                    kwargs[k] = create(target_key)
                elif hasattr(target, '__dict__'):  # serialized object
                    kwargs[k] = target
            else:
                raise ValueError("Unsupported injection type:", target_key)
270 271 272
    # prevent modification of global config values of reference types
    # (e.g., list, dict) from within the created module instances
    kwargs = copy.deepcopy(kwargs)
273
    return cls(**kwargs)