remove python files outside

98f6b52f · dengkaipeng · df61a05c · df61a05c · df61a05c · df61a05c
Showing with 0 addition and 3355 deletion

callbacks.py callbacks.py +0 -279

distributed.py distributed.py +0 -222

metrics.py metrics.py +0 -127

model.py model.py +0 -1268

progressbar.py progressbar.py +0 -163

text.py text.py +0 -1296

未找到文件。
--- a/callbacks.py
+++ b/callbacks.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import six
-import copy
-from progressbar import ProgressBar
-from paddle.fluid.dygraph.parallel import ParallelEnv
-def config_callbacks(callbacks=None,
-                     model=None,
-                     batch_size=None,
-                     epochs=None,
-                     steps=None,
-                     log_freq=2,
-                     verbose=2,
-                     save_freq=1,
-                     save_dir=None,
-                     metrics=None,
-                     mode='train'):
-    cbks = callbacks or []
-    cbks = cbks if isinstance(cbks, (list, tuple)) else [cbks]
-    if not any(isinstance(k, ProgBarLogger) for k in cbks) and verbose:
-        cbks = cbks + [ProgBarLogger(log_freq, verbose=verbose)]
-    if not any(isinstance(k, ModelCheckpoint) for k in cbks):
-        cbks = cbks + [ModelCheckpoint(save_freq, save_dir)]
-    cbk_list = CallbackList(cbks)
-    cbk_list.set_model(model)
-    metrics = metrics or [] if mode != 'test' else []
-    params = {
-        'batch_size': batch_size,
-        'epochs': epochs,
-        'steps': steps,
-        'verbose': verbose,
-        'metrics': metrics,
-    }
-    cbk_list.set_params(params)
-    return cbk_list
-class CallbackList(object):
-    def __init__(self, callbacks=None):
-        # copy
-        self.callbacks = [c for c in callbacks]
-        self.params = {}
-        self.model = None
-    def append(self, callback):
-        self.callbacks.append(callback)
-    def __iter__(self):
-        return iter(self.callbacks)
-    def set_params(self, params):
-        for c in self.callbacks:
-            c.set_params(params)
-    def set_model(self, model):
-        for c in self.callbacks:
-            c.set_model(model)
-    def _call(self, name, *args):
-        for c in self.callbacks:
-            func = getattr(c, name)
-            func(*args)
-    def _check_mode(self, mode):
-        assert mode in ['train', 'eval', 'test'], \
-            'mode should be train, eval or test'
-    def on_begin(self, mode, logs=None):
-        self._check_mode(mode)
-        name = 'on_{}_begin'.format(mode)
-        self._call(name, logs)
-    def on_end(self, mode, logs=None):
-        self._check_mode(mode)
-        name = 'on_{}_end'.format(mode)
-        self._call(name, logs)
-    def on_epoch_begin(self, epoch=None, logs=None):
-        self._call('on_epoch_begin', epoch, logs)
-    def on_epoch_end(self, epoch=None, logs=None):
-        self._call('on_epoch_end', epoch, logs)
-    def on_batch_begin(self, mode, step=None, logs=None):
-        self._check_mode(mode)
-        name = 'on_{}_batch_begin'.format(mode)
-        self._call(name, step, logs)
-    def on_batch_end(self, mode, step=None, logs=None):
-        self._check_mode(mode)
-        name = 'on_{}_batch_end'.format(mode)
-        self._call(name, step, logs)
-class Callback(object):
-    def __init__(self):
-        self.model = None
-        self.params = {}
-    def set_params(self, params):
-        self.params = params
-    def set_model(self, model):
-        self.model = model
-    def on_train_begin(self, logs=None):
-        """
-        """
-    def on_train_end(self, logs=None):
-        """
-        """
-    def on_eval_begin(self, logs=None):
-        """
-        """
-    def on_eval_end(self, logs=None):
-        """
-        """
-    def on_test_begin(self, logs=None):
-        """
-        """
-    def on_test_end(self, logs=None):
-        """
-        """
-    def on_epoch_begin(self, epoch, logs=None):
-        """
-        """
-    def on_epoch_end(self, epoch, logs=None):
-        """
-        """
-    def on_train_batch_begin(self, step, logs=None):
-        """
-        """
-    def on_train_batch_end(self, step, logs=None):
-        """
-        """
-    def on_eval_batch_begin(self, step, logs=None):
-        """
-        """
-    def on_eval_batch_end(self, step, logs=None):
-        """
-        """
-    def on_eval_batch_begin(self, step, logs=None):
-        """
-        """
-    def on_eval_batch_end(self, step, logs=None):
-        """
-        """
-class ProgBarLogger(Callback):
-    def __init__(self, log_freq=1, verbose=2):
-        self.epochs = None
-        self.steps = None
-        self.progbar = None
-        self.verbose = verbose
-        self.log_freq = log_freq
-    def on_train_begin(self, logs=None):
-        self.epochs = self.params['epochs']
-        assert self.epochs
-        self.train_metrics = self.params['metrics']
-        assert self.train_metrics
-    def on_epoch_begin(self, epoch=None, logs=None):
-        self.steps = self.params['steps']
-        self.epoch = epoch
-        self.train_step = 0
-        if self.verbose and self.epochs and ParallelEnv().local_rank == 0:
-            print('Epoch %d/%d' % (epoch + 1, self.epochs))
-        self.train_progbar = ProgressBar(num=self.steps, verbose=self.verbose)
-    def _updates(self, logs, mode):
-        values = []
-        metrics = getattr(self, '%s_metrics' % (mode))
-        progbar = getattr(self, '%s_progbar' % (mode))
-        steps = getattr(self, '%s_step' % (mode))
-        for k in metrics:
-            if k in logs:
-                values.append((k, logs[k]))
-        progbar.update(steps, values)
-    def on_train_batch_end(self, step, logs=None):
-        logs = logs or {}
-        self.train_step += 1
-        if self.train_step % self.log_freq == 0 and self.verbose and ParallelEnv(
-        ).local_rank == 0:
-            # if steps is not None, last step will update in on_epoch_end
-            if self.steps and self.train_step < self.steps:
-                self._updates(logs, 'train')
-            else:
-                self._updates(logs, 'train')
-    def on_epoch_end(self, epoch, logs=None):
-        logs = logs or {}
-        if self.verbose and ParallelEnv().local_rank == 0:
-            self._updates(logs, 'train')
-    def on_eval_begin(self, logs=None):
-        self.eval_steps = logs.get('steps', None)
-        self.eval_metrics = logs.get('metrics_name', [])
-        self.eval_step = 0
-        self.evaled_samples = 0
-        self.eval_progbar = ProgressBar(
-            num=self.eval_steps, verbose=self.verbose)
-        if ParallelEnv().local_rank == 0:
-            print('Eval begin...')
-    def on_eval_batch_end(self, step, logs=None):
-        logs = logs or {}
-        self.eval_step = step
-        samples = logs.get('batch_size', 1)
-        self.evaled_samples += samples
-        if self.eval_step % self.log_freq == 0 and self.verbose and ParallelEnv(
-        ).local_rank == 0:
-            # if steps is not None, last step will update in on_epoch_end
-            if self.eval_steps and self.eval_step < self.eval_steps:
-                self._updates(logs, 'eval')
-    def on_eval_end(self, logs=None):
-        logs = logs or {}
-        if self.verbose and ParallelEnv().local_rank == 0:
-            self._updates(logs, 'eval')
-            print('Eval samples: %d' % (self.evaled_samples))
-class ModelCheckpoint(Callback):
-    def __init__(self, save_freq=1, save_dir=None):
-        self.save_freq = save_freq
-        self.save_dir = save_dir
-    def on_epoch_begin(self, epoch=None, logs=None):
-        self.epoch = epoch
-    def _is_save(self):
-        return self.model and self.save_dir and ParallelEnv().local_rank == 0
-    def on_epoch_end(self, epoch, logs=None):
-        if self._is_save() and self.epoch % self.save_freq == 0:
-            path = '{}/{}'.format(self.save_dir, epoch)
-            print('save checkpoint at {}'.format(path))
-            self.model.save(path)
-    def on_train_end(self, logs=None):
-        if self._is_save():
-            path = '{}/final'.format(self.save_dir)
-            print('save checkpoint at {}'.format(path))
-            self.model.save(path)
--- a/distributed.py
+++ b/distributed.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import sys
-import six
-import time
-import math
-import socket
-import contextlib
-import numpy as np
-from paddle import fluid
-from paddle.fluid.layers import collective
-from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy
-from paddle.io import BatchSampler
-_parallel_context_initialized = False
-class DistributedBatchSampler(BatchSampler):
-    """Sampler that restricts data loading to a subset of the dataset.
-    In such case, each process can pass a DistributedBatchSampler instance 
-    as a DataLoader sampler, and load a subset of the original dataset that 
-    is exclusive to it.
-    .. note::
-        Dataset is assumed to be of constant size.
-    Args:
-        data_source: this could be a `paddle.io.Dataset` implement
-                     or other python object which implemented
-                     `__len__` for BatchSampler to get sample
-                     number of data source.
-        batch_size(int): sample indice number in a mini-batch indices.
-        shuffle(bool): whther to shuffle indices order before genrating
-            batch indices. Default False.
-        drop_last(bool): whether drop the last incomplete batch dataset size
-            is not divisible by the batch size. Default False
-    """
-    def __init__(self, dataset, batch_size, shuffle=False, drop_last=False):
-        self.dataset = dataset
-        assert isinstance(batch_size, int) and batch_size > 0, \
-                "batch_size should be a positive integer"
-        self.batch_size = batch_size
-        assert isinstance(shuffle, bool), \
-                "shuffle should be a boolean value"
-        self.shuffle = shuffle
-        assert isinstance(drop_last, bool), \
-                "drop_last should be a boolean number"
-        self.drop_last = drop_last
-        self.nranks = ParallelEnv().nranks
-        self.local_rank = ParallelEnv().local_rank
-        self.epoch = 0
-        self.num_samples = int(
-            math.ceil(len(self.dataset) * 1.0 / self.nranks))
-        self.total_size = self.num_samples * self.nranks
-    def __iter__(self):
-        num_samples = len(self.dataset)
-        indices = np.arange(num_samples).tolist()
-        indices += indices[:(self.total_size - len(indices))]
-        assert len(indices) == self.total_size
-        if self.shuffle:
-            np.random.RandomState(self.epoch).shuffle(indices)
-            self.epoch += 1
-        # subsample
-        def _get_indices_by_batch_size(indices):
-            subsampled_indices = []
-            last_batch_size = self.total_size % (self.batch_size * self.nranks)
-            assert last_batch_size % self.nranks == 0
-            last_local_batch_size = last_batch_size // self.nranks
-            for i in range(self.local_rank * self.batch_size,
-                           len(indices) - last_batch_size,
-                           self.batch_size * self.nranks):
-                subsampled_indices.extend(indices[i:i + self.batch_size])
-            indices = indices[len(indices) - last_batch_size:]
-            subsampled_indices.extend(indices[
-                self.local_rank * last_local_batch_size:(
-                    self.local_rank + 1) * last_local_batch_size])
-            return subsampled_indices
-        if self.nranks > 1:
-            indices = _get_indices_by_batch_size(indices)
-        assert len(indices) == self.num_samples
-        _sample_iter = iter(indices)
-        batch_indices = []
-        for idx in _sample_iter:
-            batch_indices.append(idx)
-            if len(batch_indices) == self.batch_size:
-                yield batch_indices
-                batch_indices = []
-        if not self.drop_last and len(batch_indices) > 0:
-            yield batch_indices
-    def __len__(self):
-        num_samples = self.num_samples
-        num_samples += int(not self.drop_last) * (self.batch_size - 1)
-        return num_samples // self.batch_size
-    def set_epoch(self, epoch):
-        self.epoch = epoch
-def _all_gather(x, nranks, ring_id=0, use_calc_stream=True):
-    return collective._c_allgather(
-        x, nranks, ring_id=ring_id, use_calc_stream=use_calc_stream)
-def wait_server_ready(endpoints):
-    assert not isinstance(endpoints, six.string_types)
-    while True:
-        all_ok = True
-        not_ready_endpoints = []
-        for ep in endpoints:
-            ip_port = ep.split(":")
-            with contextlib.closing(
-                    socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
-                sock.settimeout(2)
-                result = sock.connect_ex((ip_port[0], int(ip_port[1])))
-                if result != 0:
-                    all_ok = False
-                    not_ready_endpoints.append(ep)
-        if not all_ok:
-            time.sleep(3)
-        else:
-            break
-def init_communicator(program, rank, nranks, wait_port, current_endpoint,
-                      endpoints):
-    if nranks < 2:
-        return
-    other_endpoints = endpoints[:]
-    other_endpoints.remove(current_endpoint)
-    if rank == 0 and wait_port:
-        wait_server_ready(other_endpoints)
-    block = program.global_block()
-    nccl_id_var = block.create_var(
-        name=fluid.unique_name.generate('nccl_id'),
-        persistable=True,
-        type=fluid.core.VarDesc.VarType.RAW)
-    block.append_op(
-        type='c_gen_nccl_id',
-        inputs={},
-        outputs={'Out': nccl_id_var},
-        attrs={
-            'rank': rank,
-            'endpoint': current_endpoint,
-            'other_endpoints': other_endpoints
-        })
-    block.append_op(
-        type='c_comm_init',
-        inputs={'X': nccl_id_var},
-        outputs={},
-        attrs={
-            'nranks': nranks,
-            'rank': rank,
-            'ring_id': 0,
-        })
-def prepare_distributed_context(place=None):
-    if place is None:
-        place = fluid.CUDAPlace(ParallelEnv().dev_id) if ParallelEnv().nranks > 1 \
-            else fluid.CUDAPlace(0)
-    strategy = ParallelStrategy()
-    strategy.nranks = ParallelEnv().nranks
-    strategy.local_rank = ParallelEnv().local_rank
-    strategy.trainer_endpoints = ParallelEnv().trainer_endpoints
-    strategy.current_endpoint = ParallelEnv().current_endpoint
-    if strategy.nranks < 2:
-        return
-    global _parallel_context_initialized
-    if not _parallel_context_initialized and isinstance(place,
-                                                        fluid.CUDAPlace):
-        def _init_context():
-            communicator_prog = fluid.Program()
-            init_communicator(communicator_prog, strategy.local_rank,
-                              strategy.nranks, True, strategy.current_endpoint,
-                              strategy.trainer_endpoints)
-            exe = fluid.Executor(place)
-            exe.run(communicator_prog)
-        if fluid.in_dygraph_mode():
-            fluid.disable_dygraph()
-            _init_context()
-            fluid.enable_dygraph(place)
-        else:
-            _init_context()
-    else:
-        assert ("Only support CUDAPlace for now.")
-    _parallel_context_initialized = True
-    return strategy
--- a/metrics.py
+++ b/metrics.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-import six
-import abc
-import numpy as np
-import paddle.fluid as fluid
-import logging
-FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
-logging.basicConfig(level=logging.INFO, format=FORMAT)
-logger = logging.getLogger(__name__)
-__all__ = ['Metric', 'Accuracy']
-@six.add_metaclass(abc.ABCMeta)
-class Metric(object):
-    """
-    Base class for metric, encapsulates metric logic and APIs
-    Usage:
-    m = SomeMetric()
-    for prediction, label in ...:
-        m.update(prediction, label)
-    m.accumulate()
-    """
-    @abc.abstractmethod
-    def reset(self):
-        """
-        Reset states and result
-        """
-        raise NotImplementedError("function 'reset' not implemented in {}.".
-                                  format(self.__class__.__name__))
-    @abc.abstractmethod
-    def update(self, *args, **kwargs):
-        """
-        Update states for metric
-        """
-        raise NotImplementedError("function 'update' not implemented in {}.".
-                                  format(self.__class__.__name__))
-    @abc.abstractmethod
-    def accumulate(self):
-        """
-        Accumulates statistics, computes and returns the metric value
-        """
-        raise NotImplementedError(
-            "function 'accumulate' not implemented in {}.".format(
-                self.__class__.__name__))
-    @abc.abstractmethod
-    def name(self):
-        """
-        Returns metric name
-        """
-        raise NotImplementedError("function 'name' not implemented in {}.".
-                                  format(self.__class__.__name__))
-    def add_metric_op(self, pred, label):
-        """
-        Add process op for metric in program
-        """
-        return pred, label
-class Accuracy(Metric):
-    """
-    Encapsulates accuracy metric logic
-    """
-    def __init__(self, topk=(1, ), name=None, *args, **kwargs):
-        super(Accuracy, self).__init__(*args, **kwargs)
-        self.topk = topk
-        self.maxk = max(topk)
-        self._init_name(name)
-        self.reset()
-    def add_metric_op(self, pred, label, *args, **kwargs):
-        pred = fluid.layers.argsort(pred[0], descending=True)[1][:, :self.maxk]
-        correct = pred == label[0]
-        return correct
-    def update(self, correct, *args, **kwargs):
-        accs = []
-        for i, k in enumerate(self.topk):
-            num_corrects = correct[:, :k].sum()
-            num_samples = len(correct)
-            accs.append(float(num_corrects) / num_samples)
-            self.total[i] += num_corrects
-            self.count[i] += num_samples
-        return accs
-    def reset(self):
-        self.total = [0.] * len(self.topk)
-        self.count = [0] * len(self.topk)
-    def accumulate(self):
-        res = []
-        for t, c in zip(self.total, self.count):
-            res.append(float(t) / c)
-        return res
-    def _init_name(self, name):
-        name = name or 'acc'
-        if self.maxk != 1:
-            self._name = ['{}_top{}'.format(name, k) for k in self.topk]
-        else:
-            self._name = ['acc']
-    def name(self):
-        return self._name
--- a/model.py
+++ b/model.py
--- a/progressbar.py
+++ b/progressbar.py
-import sys
-import time
-import numpy as np
-class ProgressBar(object):
-    """progress bar """
-    def __init__(self,
-                 num=None,
-                 width=30,
-                 verbose=1,
-                 start=True,
-                 file=sys.stdout):
-        self._num = num
-        if isinstance(num, int) and num <= 0:
-            raise TypeError('num should be None or integer (> 0)')
-        max_width = self._get_max_width()
-        self._width = width if width <= max_width else max_width
-        self._total_width = 0
-        self._verbose = verbose
-        self.file = file
-        self._values = {}
-        self._values_order = []
-        if start:
-            self._start = time.time()
-        self._last_update = 0
-        self._dynamic_display = (
-            (hasattr(self.file, 'isatty') and
-             self.file.isatty()) or 'ipykernel' in sys.modules or
-            'posix' in sys.modules or 'PYCHARM_HOSTED' in os.environ)
-    def _get_max_width(self):
-        if sys.version_info > (3, 3):
-            from shutil import get_terminal_size
-        else:
-            from backports.shutil_get_terminal_size import get_terminal_size
-        terminal_width, _ = get_terminal_size()
-        max_width = min(int(terminal_width * 0.6), terminal_width - 50)
-        return max_width
-    def start(self):
-        self.file.flush()
-        self._start = time.time()
-    def update(self, current_num, values=None):
-        now = time.time()
-        if current_num:
-            time_per_unit = (now - self._start) / current_num
-        else:
-            time_per_unit = 0
-        if time_per_unit >= 1 or time_per_unit == 0:
-            fps = ' - %.0fs/%s' % (time_per_unit, 'step')
-        elif time_per_unit >= 1e-3:
-            fps = ' - %.0fms/%s' % (time_per_unit * 1e3, 'step')
-        else:
-            fps = ' - %.0fus/%s' % (time_per_unit * 1e6, 'step')
-        info = ''
-        if self._verbose == 1:
-            prev_total_width = self._total_width
-            if self._dynamic_display:
-                sys.stdout.write('\b' * prev_total_width)
-                sys.stdout.write('\r')
-            else:
-                sys.stdout.write('\n')
-            if self._num is not None:
-                numdigits = int(np.log10(self._num)) + 1
-                bar_chars = ('step %' + str(numdigits) + 'd/%d [') % (
-                    current_num, self._num)
-                prog = float(current_num) / self._num
-                prog_width = int(self._width * prog)
-                if prog_width > 0:
-                    bar_chars += ('=' * (prog_width - 1))
-                    if current_num < self._num:
-                        bar_chars += '>'
-                    else:
-                        bar_chars += '='
-                bar_chars += ('.' * (self._width - prog_width))
-                bar_chars += ']'
-            else:
-                bar_chars = 'step %3d' % current_num
-            self._total_width = len(bar_chars)
-            sys.stdout.write(bar_chars)
-            for k, val in values:
-                info += ' - %s:' % k
-                val = val if isinstance(val, list) else [val]
-                for i, v in enumerate(val):
-                    if isinstance(v, (float, np.float32, np.float64)):
-                        if abs(v) > 1e-3:
-                            info += ' %.4f' % v
-                        else:
-                            info += ' %.4e' % v
-                    else:
-                        info += ' %s' % v
-            if self._num is not None and current_num < self._num:
-                eta = time_per_unit * (self._num - current_num)
-                if eta > 3600:
-                    eta_format = '%d:%02d:%02d' % (eta // 3600, (eta % 3600) //
-                                                   60, eta % 60)
-                elif eta > 60:
-                    eta_format = '%d:%02d' % (eta // 60, eta % 60)
-                else:
-                    eta_format = '%ds' % eta
-                info += ' - ETA: %s' % eta_format
-            info += fps
-            self._total_width += len(info)
-            if prev_total_width > self._total_width:
-                info += (' ' * (prev_total_width - self._total_width))
-            # newline for another epoch
-            if self._num is not None and current_num >= self._num:
-                info += '\n'
-            if self._num is None:
-                info += '\n'
-            sys.stdout.write(info)
-            sys.stdout.flush()
-            self._last_update = now
-        elif self._verbose == 2:
-            if self._num:
-                numdigits = int(np.log10(self._num)) + 1
-                count = ('step %' + str(numdigits) + 'd/%d') % (current_num,
-                                                                self._num)
-            else:
-                count = 'step %3d' % current_num
-            info = count + info
-            for k, val in values:
-                info += ' - %s:' % k
-                val = val if isinstance(val, list) else [val]
-                for v in val:
-                    if isinstance(v, (float, np.float32, np.float64)):
-                        if abs(v) > 1e-3:
-                            info += ' %.4f' % v
-                        else:
-                            info += ' %.4e' % v
-                    elif isinstance(v, np.ndarray) and \
-                        v.size == 1 and \
-                        isinstance(v.dtype, (np.float32, np.float64)):
-                        if abs(v[0]) > 1e-3:
-                            info += ' %.4f' % v[0]
-                        else:
-                            info += ' %.4e' % v[0]
-                    else:
-                        info += ' %s' % v
-            info += fps
-            info += '\n'
-            sys.stdout.write(info)
-            sys.stdout.flush()
--- a/text.py
+++ b/text.py