提交 27c66cfa 编写于 作者: L LielinJiang

Merge branch 'master' of https://github.com/PaddlePaddle/hapi into style-transfer

......@@ -2,3 +2,6 @@
*.json
output*
*checkpoint*
build
dist
hapi.egg-info
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import six
import copy
from progressbar import ProgressBar
from paddle.fluid.dygraph.parallel import ParallelEnv
def config_callbacks(callbacks=None,
model=None,
batch_size=None,
epochs=None,
steps=None,
log_freq=2,
verbose=2,
save_freq=1,
save_dir=None,
metrics=None,
mode='train'):
cbks = callbacks or []
cbks = cbks if isinstance(cbks, (list, tuple)) else [cbks]
if not any(isinstance(k, ProgBarLogger) for k in cbks) and verbose:
cbks = cbks + [ProgBarLogger(log_freq, verbose=verbose)]
if not any(isinstance(k, ModelCheckpoint) for k in cbks):
cbks = cbks + [ModelCheckpoint(save_freq, save_dir)]
cbk_list = CallbackList(cbks)
cbk_list.set_model(model)
metrics = metrics or [] if mode != 'test' else []
params = {
'batch_size': batch_size,
'epochs': epochs,
'steps': steps,
'verbose': verbose,
'metrics': metrics,
}
cbk_list.set_params(params)
return cbk_list
class CallbackList(object):
def __init__(self, callbacks=None):
# copy
self.callbacks = [c for c in callbacks]
self.params = {}
self.model = None
def append(self, callback):
self.callbacks.append(callback)
def __iter__(self):
return iter(self.callbacks)
def set_params(self, params):
for c in self.callbacks:
c.set_params(params)
def set_model(self, model):
for c in self.callbacks:
c.set_model(model)
def _call(self, name, *args):
for c in self.callbacks:
func = getattr(c, name)
func(*args)
def _check_mode(self, mode):
assert mode in ['train', 'eval', 'test'], \
'mode should be train, eval or test'
def on_begin(self, mode, logs=None):
self._check_mode(mode)
name = 'on_{}_begin'.format(mode)
self._call(name, logs)
def on_end(self, mode, logs=None):
self._check_mode(mode)
name = 'on_{}_end'.format(mode)
self._call(name, logs)
def on_epoch_begin(self, epoch=None, logs=None):
self._call('on_epoch_begin', epoch, logs)
def on_epoch_end(self, epoch=None, logs=None):
self._call('on_epoch_end', epoch, logs)
def on_batch_begin(self, mode, step=None, logs=None):
self._check_mode(mode)
name = 'on_{}_batch_begin'.format(mode)
self._call(name, step, logs)
def on_batch_end(self, mode, step=None, logs=None):
self._check_mode(mode)
name = 'on_{}_batch_end'.format(mode)
self._call(name, step, logs)
class Callback(object):
def __init__(self):
self.model = None
self.params = {}
def set_params(self, params):
self.params = params
def set_model(self, model):
self.model = model
def on_train_begin(self, logs=None):
"""
"""
def on_train_end(self, logs=None):
"""
"""
def on_eval_begin(self, logs=None):
"""
"""
def on_eval_end(self, logs=None):
"""
"""
def on_test_begin(self, logs=None):
"""
"""
def on_test_end(self, logs=None):
"""
"""
def on_epoch_begin(self, epoch, logs=None):
"""
"""
def on_epoch_end(self, epoch, logs=None):
"""
"""
def on_train_batch_begin(self, step, logs=None):
"""
"""
def on_train_batch_end(self, step, logs=None):
"""
"""
def on_eval_batch_begin(self, step, logs=None):
"""
"""
def on_eval_batch_end(self, step, logs=None):
"""
"""
def on_eval_batch_begin(self, step, logs=None):
"""
"""
def on_eval_batch_end(self, step, logs=None):
"""
"""
class ProgBarLogger(Callback):
def __init__(self, log_freq=1, verbose=2):
self.epochs = None
self.steps = None
self.progbar = None
self.verbose = verbose
self.log_freq = log_freq
def on_train_begin(self, logs=None):
self.epochs = self.params['epochs']
assert self.epochs
self.train_metrics = self.params['metrics']
assert self.train_metrics
def on_epoch_begin(self, epoch=None, logs=None):
self.steps = self.params['steps']
self.epoch = epoch
self.train_step = 0
if self.verbose and self.epochs and ParallelEnv().local_rank == 0:
print('Epoch %d/%d' % (epoch + 1, self.epochs))
self.train_progbar = ProgressBar(num=self.steps, verbose=self.verbose)
def _updates(self, logs, mode):
values = []
metrics = getattr(self, '%s_metrics' % (mode))
progbar = getattr(self, '%s_progbar' % (mode))
steps = getattr(self, '%s_step' % (mode))
for k in metrics:
if k in logs:
values.append((k, logs[k]))
progbar.update(steps, values)
def on_train_batch_end(self, step, logs=None):
logs = logs or {}
self.train_step += 1
if self.train_step % self.log_freq == 0 and self.verbose and ParallelEnv(
).local_rank == 0:
# if steps is not None, last step will update in on_epoch_end
if self.steps and self.train_step < self.steps:
self._updates(logs, 'train')
else:
self._updates(logs, 'train')
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
if self.verbose and ParallelEnv().local_rank == 0:
self._updates(logs, 'train')
def on_eval_begin(self, logs=None):
self.eval_steps = logs.get('steps', None)
self.eval_metrics = logs.get('metrics_name', [])
self.eval_step = 0
self.evaled_samples = 0
self.eval_progbar = ProgressBar(
num=self.eval_steps, verbose=self.verbose)
if ParallelEnv().local_rank == 0:
print('Eval begin...')
def on_eval_batch_end(self, step, logs=None):
logs = logs or {}
self.eval_step = step
samples = logs.get('batch_size', 1)
self.evaled_samples += samples
if self.eval_step % self.log_freq == 0 and self.verbose and ParallelEnv(
).local_rank == 0:
# if steps is not None, last step will update in on_epoch_end
if self.eval_steps and self.eval_step < self.eval_steps:
self._updates(logs, 'eval')
def on_eval_end(self, logs=None):
logs = logs or {}
if self.verbose and ParallelEnv().local_rank == 0:
self._updates(logs, 'eval')
print('Eval samples: %d' % (self.evaled_samples))
class ModelCheckpoint(Callback):
def __init__(self, save_freq=1, save_dir=None):
self.save_freq = save_freq
self.save_dir = save_dir
def on_epoch_begin(self, epoch=None, logs=None):
self.epoch = epoch
def _is_save(self):
return self.model and self.save_dir and ParallelEnv().local_rank == 0
def on_epoch_end(self, epoch, logs=None):
if self._is_save() and self.epoch % self.save_freq == 0:
path = '{}/{}'.format(self.save_dir, epoch)
print('save checkpoint at {}'.format(path))
self.model.save(path)
def on_train_end(self, logs=None):
if self._is_save():
path = '{}/final'.format(self.save_dir)
print('save checkpoint at {}'.format(path))
self.model.save(path)
......@@ -30,7 +30,7 @@ IMAGES_ROOT = "./data/" + DATASET + "/"
import paddle.fluid as fluid
class Cityscapes(fluid.io.Dataset):
class Cityscapes(paddle.io.Dataset):
def __init__(self, root_path, file_path, mode='train', return_name=False):
self.root_path = root_path
self.file_path = file_path
......
......@@ -86,13 +86,13 @@ def main():
if FLAGS.resume:
g.load(FLAGS.resume)
loader_A = fluid.io.DataLoader(
loader_A = paddle.io.DataLoader(
data.DataA(),
places=place,
shuffle=True,
return_list=True,
batch_size=FLAGS.batch_size)
loader_B = fluid.io.DataLoader(
loader_B = paddle.io.DataLoader(
data.DataB(),
places=place,
shuffle=True,
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import six
import time
import math
import socket
import contextlib
import numpy as np
from paddle import fluid
from paddle.fluid.layers import collective
from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy
from paddle.fluid.io import BatchSampler
_parallel_context_initialized = False
class DistributedBatchSampler(BatchSampler):
"""Sampler that restricts data loading to a subset of the dataset.
In such case, each process can pass a DistributedBatchSampler instance
as a DataLoader sampler, and load a subset of the original dataset that
is exclusive to it.
.. note::
Dataset is assumed to be of constant size.
Args:
data_source: this could be a `fluid.io.Dataset` implement
or other python object which implemented
`__len__` for BatchSampler to get sample
number of data source.
batch_size(int): sample indice number in a mini-batch indices.
shuffle(bool): whther to shuffle indices order before genrating
batch indices. Default False.
drop_last(bool): whether drop the last incomplete batch dataset size
is not divisible by the batch size. Default False
"""
def __init__(self, dataset, batch_size, shuffle=False, drop_last=False):
self.dataset = dataset
assert isinstance(batch_size, int) and batch_size > 0, \
"batch_size should be a positive integer"
self.batch_size = batch_size
assert isinstance(shuffle, bool), \
"shuffle should be a boolean value"
self.shuffle = shuffle
assert isinstance(drop_last, bool), \
"drop_last should be a boolean number"
self.drop_last = drop_last
self.nranks = ParallelEnv().nranks
self.local_rank = ParallelEnv().local_rank
self.epoch = 0
self.num_samples = int(
math.ceil(len(self.dataset) * 1.0 / self.nranks))
self.total_size = self.num_samples * self.nranks
def __iter__(self):
num_samples = len(self.dataset)
indices = np.arange(num_samples).tolist()
indices += indices[:(self.total_size - len(indices))]
assert len(indices) == self.total_size
if self.shuffle:
np.random.RandomState(self.epoch).shuffle(indices)
self.epoch += 1
# subsample
def _get_indices_by_batch_size(indices):
subsampled_indices = []
last_batch_size = self.total_size % (self.batch_size * self.nranks)
assert last_batch_size % self.nranks == 0
last_local_batch_size = last_batch_size // self.nranks
for i in range(self.local_rank * self.batch_size,
len(indices) - last_batch_size,
self.batch_size * self.nranks):
subsampled_indices.extend(indices[i:i + self.batch_size])
indices = indices[len(indices) - last_batch_size:]
subsampled_indices.extend(indices[
self.local_rank * last_local_batch_size:(
self.local_rank + 1) * last_local_batch_size])
return subsampled_indices
if self.nranks > 1:
indices = _get_indices_by_batch_size(indices)
assert len(indices) == self.num_samples
_sample_iter = iter(indices)
batch_indices = []
for idx in _sample_iter:
batch_indices.append(idx)
if len(batch_indices) == self.batch_size:
yield batch_indices
batch_indices = []
if not self.drop_last and len(batch_indices) > 0:
yield batch_indices
def __len__(self):
num_samples = self.num_samples
num_samples += int(not self.drop_last) * (self.batch_size - 1)
return num_samples // self.batch_size
def set_epoch(self, epoch):
self.epoch = epoch
def _all_gather(x, nranks, ring_id=0, use_calc_stream=True):
return collective._c_allgather(
x, nranks, ring_id=ring_id, use_calc_stream=use_calc_stream)
def wait_server_ready(endpoints):
assert not isinstance(endpoints, six.string_types)
while True:
all_ok = True
not_ready_endpoints = []
for ep in endpoints:
ip_port = ep.split(":")
with contextlib.closing(
socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2)
result = sock.connect_ex((ip_port[0], int(ip_port[1])))
if result != 0:
all_ok = False
not_ready_endpoints.append(ep)
if not all_ok:
time.sleep(3)
else:
break
def init_communicator(program, rank, nranks, wait_port, current_endpoint,
endpoints):
if nranks < 2:
return
other_endpoints = endpoints[:]
other_endpoints.remove(current_endpoint)
if rank == 0 and wait_port:
wait_server_ready(other_endpoints)
block = program.global_block()
nccl_id_var = block.create_var(
name=fluid.unique_name.generate('nccl_id'),
persistable=True,
type=fluid.core.VarDesc.VarType.RAW)
block.append_op(
type='c_gen_nccl_id',
inputs={},
outputs={'Out': nccl_id_var},
attrs={
'rank': rank,
'endpoint': current_endpoint,
'other_endpoints': other_endpoints
})
block.append_op(
type='c_comm_init',
inputs={'X': nccl_id_var},
outputs={},
attrs={
'nranks': nranks,
'rank': rank,
'ring_id': 0,
})
def prepare_distributed_context(place=None):
if place is None:
place = fluid.CUDAPlace(ParallelEnv().dev_id) if ParallelEnv().nranks > 1 \
else fluid.CUDAPlace(0)
strategy = ParallelStrategy()
strategy.nranks = ParallelEnv().nranks
strategy.local_rank = ParallelEnv().local_rank
strategy.trainer_endpoints = ParallelEnv().trainer_endpoints
strategy.current_endpoint = ParallelEnv().current_endpoint
if strategy.nranks < 2:
return
global _parallel_context_initialized
if not _parallel_context_initialized and isinstance(place,
fluid.CUDAPlace):
def _init_context():
communicator_prog = fluid.Program()
init_communicator(communicator_prog, strategy.local_rank,
strategy.nranks, True, strategy.current_endpoint,
strategy.trainer_endpoints)
exe = fluid.Executor(place)
exe.run(communicator_prog)
if fluid.in_dygraph_mode():
fluid.disable_dygraph()
_init_context()
fluid.enable_dygraph(place)
else:
_init_context()
else:
assert ("Only support CUDAPlace for now.")
_parallel_context_initialized = True
return strategy
......@@ -20,7 +20,7 @@ import json
sys.path.append('../')
from metrics import Metric
from hapi.metrics import Metric
from bmn_utils import boundary_choose, bmn_post_processing
......
......@@ -162,56 +162,3 @@ def bmn_post_processing(video_dict, subset, output_path, result_path):
outfile.close()
def _get_interp1d_bin_mask(seg_xmin, seg_xmax, tscale, num_sample,
num_sample_perbin):
""" generate sample mask for a boundary-matching pair """
plen = float(seg_xmax - seg_xmin)
plen_sample = plen / (num_sample * num_sample_perbin - 1.0)
total_samples = [
seg_xmin + plen_sample * ii
for ii in range(num_sample * num_sample_perbin)
]
p_mask = []
for idx in range(num_sample):
bin_samples = total_samples[idx * num_sample_perbin:(idx + 1) *
num_sample_perbin]
bin_vector = np.zeros([tscale])
for sample in bin_samples:
sample_upper = math.ceil(sample)
sample_decimal, sample_down = math.modf(sample)
if int(sample_down) <= (tscale - 1) and int(sample_down) >= 0:
bin_vector[int(sample_down)] += 1 - sample_decimal
if int(sample_upper) <= (tscale - 1) and int(sample_upper) >= 0:
bin_vector[int(sample_upper)] += sample_decimal
bin_vector = 1.0 / num_sample_perbin * bin_vector
p_mask.append(bin_vector)
p_mask = np.stack(p_mask, axis=1)
return p_mask
def get_interp1d_mask(tscale, dscale, prop_boundary_ratio, num_sample,
num_sample_perbin):
""" generate sample mask for each point in Boundary-Matching Map """
mask_mat = []
for start_index in range(tscale):
mask_mat_vector = []
for duration_index in range(dscale):
if start_index + duration_index < tscale:
p_xmin = start_index
p_xmax = start_index + duration_index
center_len = float(p_xmax - p_xmin) + 1
sample_xmin = p_xmin - center_len * prop_boundary_ratio
sample_xmax = p_xmax + center_len * prop_boundary_ratio
p_mask = _get_interp1d_bin_mask(sample_xmin, sample_xmax,
tscale, num_sample,
num_sample_perbin)
else:
p_mask = np.zeros([tscale, num_sample])
mask_mat_vector.append(p_mask)
mask_mat_vector = np.stack(mask_mat_vector, axis=2)
mask_mat.append(mask_mat_vector)
mask_mat = np.stack(mask_mat, axis=3)
mask_mat = mask_mat.astype(np.float32)
sample_mask = np.reshape(mask_mat, [tscale, -1])
return sample_mask
......@@ -18,11 +18,9 @@ import sys
import logging
import paddle.fluid as fluid
sys.path.append('../')
from model import set_device, Input
from hapi.model import set_device, Input
from hapi.vision.models import BMN, BmnLoss
from bmn_metric import BmnMetric
from bmn_model import BMN, BmnLoss
from reader import BmnDataset
from config_utils import *
......
......@@ -18,11 +18,9 @@ import os
import logging
import paddle.fluid as fluid
sys.path.append('../')
from model import set_device, Input
from hapi.model import set_device, Input
from hapi.vision.models import BMN, BmnLoss
from bmn_metric import BmnMetric
from bmn_model import BMN, BmnLoss
from reader import BmnDataset
from config_utils import *
......
......@@ -22,7 +22,7 @@ import sys
sys.path.append('../')
from distributed import DistributedBatchSampler
from paddle.fluid.io import Dataset, DataLoader
from paddle.io import Dataset, DataLoader
logger = logging.getLogger(__name__)
......
......@@ -18,10 +18,8 @@ import logging
import sys
import os
sys.path.append('../')
from model import set_device, Input
from bmn_model import BMN, BmnLoss
from hapi.model import set_device, Input
from hapi.vision.models import BMN, BmnLoss
from reader import BmnDataset
from config_utils import *
......
......@@ -18,8 +18,8 @@ import math
import random
import numpy as np
from datasets.folder import DatasetFolder
from transform import transforms
from hapi.datasets import DatasetFolder
from hapi.vision.transforms import transforms
from paddle import fluid
......@@ -45,7 +45,8 @@ class ImageNetDataset(DatasetFolder):
def __getitem__(self, idx):
img_path, label = self.samples[idx]
img = cv2.imread(img_path).astype(np.float32)
return self.transform(img), [label]
label = np.array([label])
return self.transform(img, label)
def __len__(self):
return len(self.samples)
......@@ -24,15 +24,17 @@ sys.path.append('../')
import time
import math
import numpy as np
import models
import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.io import BatchSampler, DataLoader
from hapi.model import CrossEntropy, Input, set_device
from hapi.distributed import DistributedBatchSampler
from hapi.metrics import Accuracy
import hapi.vision.models as models
from model import CrossEntropy, Input, set_device
from imagenet_dataset import ImageNetDataset
from distributed import DistributedBatchSampler
from paddle.fluid.dygraph.parallel import ParallelEnv
from metrics import Accuracy
from paddle.fluid.io import BatchSampler, DataLoader
def make_optimizer(step_per_epoch, parameter_list=None):
......
......@@ -19,8 +19,8 @@ import os
import argparse
import numpy as np
from model import Input, set_device
from models import tsm_resnet50
from hapi.model import Input, set_device
from hapi.vision.models import tsm_resnet50
from check import check_gpu, check_version
from kinetics_dataset import KineticsDataset
......
......@@ -26,7 +26,7 @@ except ImportError:
import pickle
from io import BytesIO
from paddle.fluid.io import Dataset
from paddle.io import Dataset
import logging
logger = logging.getLogger(__name__)
......
......@@ -22,9 +22,9 @@ import numpy as np
from paddle import fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
from model import Model, CrossEntropy, Input, set_device
from metrics import Accuracy
from models import tsm_resnet50
from hapi.model import Model, CrossEntropy, Input, set_device
from hapi.metrics import Accuracy
from hapi.vision.models import tsm_resnet50
from check import check_gpu, check_version
from kinetics_dataset import KineticsDataset
......
......@@ -17,7 +17,7 @@ import os.path as osp
import sys
import tarfile
from models.download import _download
from hapi.download import _download
import logging
logger = logging.getLogger(__name__)
......
......@@ -22,13 +22,12 @@ from PIL import Image
from paddle import fluid
from paddle.fluid.optimizer import Momentum
from paddle.fluid.io import DataLoader
from paddle.io import DataLoader
from model import Model, Input, set_device
from models import yolov3_darknet53, YoloLoss
from hapi.model import Model, Input, set_device
from hapi.vision.models import yolov3_darknet53, YoloLoss
from hapi.vision.transforms import *
from coco import COCODataset
from transforms import *
from visualizer import draw_bbox
import logging
......@@ -65,7 +64,8 @@ def main():
device = set_device(FLAGS.device)
fluid.enable_dygraph(device) if FLAGS.dynamic else None
inputs = [Input([None, 3], 'int32', name='img_info'),
inputs = [Input([None, 1], 'int64', name='img_id'),
Input([None, 2], 'int32', name='img_shape'),
Input([None, 3, None, None], 'float32', name='image')]
cat2name = load_labels(FLAGS.label_list, with_background=False)
......@@ -87,9 +87,10 @@ def main():
img -= np.array(IMAGE_MEAN)
img /= np.array(IMAGE_STD)
img = img.transpose((2, 0, 1))[np.newaxis, :]
img_info = np.array([0, h, w]).astype('int32')[np.newaxis, :]
img_id = np.array([0]).astype('int64')[np.newaxis, :]
img_shape = np.array([h, w]).astype('int32')[np.newaxis, :]
_, bboxes = model.test([img_info, img])
_, bboxes = model.test([img_id, img_shape, img])
vis_img = draw_bbox(orig_img, cat2name, bboxes, FLAGS.draw_threshold)
save_name = get_save_image_name(FLAGS.output_dir, FLAGS.infer_image)
......
......@@ -23,15 +23,15 @@ import numpy as np
from paddle import fluid
from paddle.fluid.optimizer import Momentum
from paddle.fluid.io import DataLoader
from paddle.io import DataLoader
from model import Model, Input, set_device
from distributed import DistributedBatchSampler
from models import yolov3_darknet53, YoloLoss
from hapi.model import Model, Input, set_device
from hapi.distributed import DistributedBatchSampler
from hapi.datasets import COCODataset
from hapi.vision.transforms import *
from hapi.vision.models import yolov3_darknet53, YoloLoss
from coco_metric import COCOMetric
from coco import COCODataset
from transforms import *
NUM_MAX_BOXES = 50
......
......@@ -12,4 +12,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from hapi.configure import Config as Config
from hapi.configure import Config
from hapi import callbacks
from hapi import datasets
from hapi import distributed
from hapi import download
from hapi import metrics
from hapi import model
from hapi import progressbar
from hapi import text
from hapi import vision
__all__ = [
'Config',
'callbacks',
'datasets',
'distributed',
'download',
'metrics',
'model',
'progressbar',
'text',
'vision',
]
......@@ -15,7 +15,7 @@
import six
import copy
from hapi.progressbar import ProgressBar
from progressbar import ProgressBar
from paddle.fluid.dygraph.parallel import ParallelEnv
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .folder import *
from .mnist import *
from .flowers import *
from .coco import *
......@@ -20,7 +20,7 @@ import cv2
import numpy as np
from pycocotools.coco import COCO
from paddle.fluid.io import Dataset
from paddle.io import Dataset
import logging
logger = logging.getLogger(__name__)
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import io
import tarfile
import numpy as np
import scipy.io as scio
from PIL import Image
from paddle.io import Dataset
from .utils import _check_exists_and_download
__all__ = ["Flowers"]
DATA_URL = 'http://paddlemodels.bj.bcebos.com/flowers/102flowers.tgz'
LABEL_URL = 'http://paddlemodels.bj.bcebos.com/flowers/imagelabels.mat'
SETID_URL = 'http://paddlemodels.bj.bcebos.com/flowers/setid.mat'
DATA_MD5 = '52808999861908f626f3c1f4e79d11fa'
LABEL_MD5 = 'e0620be6f572b9609742df49c70aed4d'
SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c'
# In official 'readme', tstid is the flag of test data
# and trnid is the flag of train data. But test data is more than train data.
# So we exchange the train data and test data.
MODE_FLAG_MAP = {'train': 'tstid', 'test': 'trnid', 'valid': "valid"}
class Flowers(Dataset):
"""
Implement of flowers dataset
Args:
data_file(str): path to data file, can be set None if
:attr:`download` is True. Default None
label_file(str): path to label file, can be set None if
:attr:`download` is True. Default None
setid_file(str): path to subset index file, can be set
None if :attr:`download` is True. Default None
mode(str): 'train', 'valid' or 'test' mode. Default 'train'.
download(bool): whether auto download mnist dataset if
:attr:`image_path`/:attr:`label_path` unset. Default
True
Examples:
.. code-block:: python
from hapi.vision.datasets import Flowers
flowers = Flowers(mode='test')
for i in range(len(flowers)):
sample = flowers[i]
print(sample[0].shape, sample[1])
"""
def __init__(self,
data_file=None,
label_file=None,
setid_file=None,
mode='train',
transform=None,
download=True):
assert mode.lower() in ['train', 'valid', 'test'], \
"mode should be 'train', 'valid' or 'test', but got {}".format(mode)
self.flag = MODE_FLAG_MAP[mode.lower()]
self.data_file = data_file
if self.data_file is None:
assert download, "data_file not set and auto download disabled"
self.data_file = _check_exists_and_download(
data_file, DATA_URL, DATA_MD5, 'flowers', download)
self.label_file = label_file
if self.label_file is None:
assert download, "label_file not set and auto download disabled"
self.label_file = _check_exists_and_download(
label_file, LABEL_URL, LABEL_MD5, 'flowers', download)
self.setid_file = setid_file
if self.setid_file is None:
assert download, "setid_file not set and auto download disabled"
self.setid_file = _check_exists_and_download(
setid_file, SETID_URL, SETID_MD5, 'flowers', download)
self.transform = transform
# read dataset into memory
self._load_anno()
def _load_anno(self):
self.name2mem = {}
self.data_tar = tarfile.open(self.data_file)
for ele in self.data_tar.getmembers():
self.name2mem[ele.name] = ele
self.labels = scio.loadmat(self.label_file)['labels'][0]
self.indexes = scio.loadmat(self.setid_file)[self.flag][0]
def __getitem__(self, idx):
index = self.indexes[idx]
label = np.array([self.labels[index - 1]])
img_name = "jpg/image_%05d.jpg" % index
img_ele = self.name2mem[img_name]
image = self.data_tar.extractfile(img_ele).read()
image = np.array(Image.open(io.BytesIO(image)))
if self.transform is not None:
image, label = self.transform(image, label)
return image, label
def __len__(self):
return len(self.indexes)
......@@ -16,7 +16,9 @@ import os
import sys
import cv2
from paddle.fluid.io import Dataset
from paddle.io import Dataset
__all__ = ["DatasetFolder"]
def has_valid_extension(filename, extensions):
......@@ -76,8 +78,6 @@ class DatasetFolder(Dataset):
both extensions and is_valid_file should not be passed.
transform (callable|optional): A function/transform that takes in
a sample and returns a transformed version.
target_transform (callable|optional): A function/transform that takes
in the target and transforms it.
is_valid_file (callable|optional): A function that takes path of a file
and check if the file is a valid file (used to check of corrupt files)
both extensions and is_valid_file should not be passed.
......@@ -94,11 +94,9 @@ class DatasetFolder(Dataset):
loader=None,
extensions=None,
transform=None,
target_transform=None,
is_valid_file=None):
self.root = root
self.transform = transform
self.target_transform = target_transform
if extensions is None:
extensions = IMG_EXTENSIONS
classes, class_to_idx = self._find_classes(self.root)
......@@ -152,9 +150,7 @@ class DatasetFolder(Dataset):
path, target = self.samples[index]
sample = self.loader(path)
if self.transform is not None:
sample = self.transform(sample)
if self.target_transform is not None:
target = self.target_transform(target)
sample, target = self.transform(sample, target)
return sample, target
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import gzip
import struct
import numpy as np
import paddle.dataset.common
from paddle.io import Dataset
from .utils import _check_exists_and_download
__all__ = ["MNIST"]
URL_PREFIX = 'https://dataset.bj.bcebos.com/mnist/'
TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz'
TEST_IMAGE_MD5 = '9fb629c4189551a2d022fa330f9573f3'
TEST_LABEL_URL = URL_PREFIX + 't10k-labels-idx1-ubyte.gz'
TEST_LABEL_MD5 = 'ec29112dd5afa0611ce80d1b7f02629c'
TRAIN_IMAGE_URL = URL_PREFIX + 'train-images-idx3-ubyte.gz'
TRAIN_IMAGE_MD5 = 'f68b3c2dcbeaaa9fbdd348bbdeb94873'
TRAIN_LABEL_URL = URL_PREFIX + 'train-labels-idx1-ubyte.gz'
TRAIN_LABEL_MD5 = 'd53e105ee54ea40749a09fcbcd1e9432'
class MNIST(Dataset):
"""
Implement of MNIST dataset
Args:
image_path(str): path to image file, can be set None if
:attr:`download` is True. Default None
label_path(str): path to label file, can be set None if
:attr:`download` is True. Default None
mode(str): 'train' or 'test' mode. Default 'train'.
download(bool): whether auto download mnist dataset if
:attr:`image_path`/:attr:`label_path` unset. Default
True
Returns:
Dataset: MNIST Dataset.
Examples:
.. code-block:: python
from hapi.vision.datasets import MNIST
mnist = MNIST(mode='test')
for i in range(len(mnist)):
sample = mnist[i]
print(sample[0].shape, sample[1])
"""
def __init__(self,
image_path=None,
label_path=None,
mode='train',
transform=None,
download=True):
assert mode.lower() in ['train', 'test'], \
"mode should be 'train' or 'test', but got {}".format(mode)
self.mode = mode.lower()
self.image_path = image_path
if self.image_path is None:
assert download, "image_path not set and auto download disabled"
image_url = TRAIN_IMAGE_URL if mode == 'train' else TEST_IMAGE_URL
image_md5 = TRAIN_IMAGE_MD5 if mode == 'train' else TEST_IMAGE_MD5
self.image_path = _check_exists_and_download(
image_path, image_url, image_md5, 'mnist', download)
self.label_path = label_path
if self.label_path is None:
assert download, "label_path not set and auto download disabled"
label_url = TRAIN_LABEL_URL if mode == 'train' else TEST_LABEL_URL
label_md5 = TRAIN_LABEL_MD5 if mode == 'train' else TEST_LABEL_MD5
self.label_path = _check_exists_and_download(
label_path, label_url, label_md5, 'mnist', download)
self.transform = transform
# read dataset into memory
self._parse_dataset()
def _parse_dataset(self, buffer_size=100):
self.images = []
self.labels = []
with gzip.GzipFile(self.image_path, 'rb') as image_file:
img_buf = image_file.read()
with gzip.GzipFile(self.label_path, 'rb') as label_file:
lab_buf = label_file.read()
step_label = 0
offset_img = 0
# read from Big-endian
# get file info from magic byte
# image file : 16B
magic_byte_img = '>IIII'
magic_img, image_num, rows, cols = struct.unpack_from(
magic_byte_img, img_buf, offset_img)
offset_img += struct.calcsize(magic_byte_img)
offset_lab = 0
# label file : 8B
magic_byte_lab = '>II'
magic_lab, label_num = struct.unpack_from(magic_byte_lab,
lab_buf, offset_lab)
offset_lab += struct.calcsize(magic_byte_lab)
while True:
if step_label >= label_num:
break
fmt_label = '>' + str(buffer_size) + 'B'
labels = struct.unpack_from(fmt_label, lab_buf, offset_lab)
offset_lab += struct.calcsize(fmt_label)
step_label += buffer_size
fmt_images = '>' + str(buffer_size * rows * cols) + 'B'
images_temp = struct.unpack_from(fmt_images, img_buf,
offset_img)
images = np.reshape(images_temp, (buffer_size, rows *
cols)).astype('float32')
offset_img += struct.calcsize(fmt_images)
images = images / 255.0
images = images * 2.0
images = images - 1.0
for i in range(buffer_size):
self.images.append(images[i, :])
self.labels.append(np.array([labels[i]]))
def __getitem__(self, idx):
image, label = self.images[idx], self.labels[idx]
if self.transform is not None:
image, label = self.transform(image, label)
return image, label
def __len__(self):
return len(self.labels)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import paddle.dataset.common
def _check_exists_and_download(path, url, md5, module_name, download=True):
if path and os.path.exists(path):
return path
if download:
return paddle.dataset.common.download(url, module_name, md5)
else:
raise FileNotFoundError(
'{} not exists and auto download disabled'.format(path))
......@@ -23,7 +23,7 @@ import numpy as np
from paddle import fluid
from paddle.fluid.layers import collective
from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy
from paddle.fluid.io import BatchSampler
from paddle.io import BatchSampler
_parallel_context_initialized = False
......@@ -39,7 +39,7 @@ class DistributedBatchSampler(BatchSampler):
Dataset is assumed to be of constant size.
Args:
data_source: this could be a `fluid.io.Dataset` implement
data_source: this could be a `paddle.io.Dataset` implement
or other python object which implemented
`__len__` for BatchSampler to get sample
number of data source.
......
......@@ -32,7 +32,7 @@ from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.layers.utils import flatten
from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy
from paddle.fluid.incubate.fleet.base import role_maker
from paddle.fluid.io import DataLoader, Dataset
from paddle.io import DataLoader, Dataset
from hapi.distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized
from hapi.metrics import Metric
......@@ -45,6 +45,14 @@ __all__ = [
def set_device(device):
"""
Args:
device (str): specify device type, 'cpu' or 'gpu'.
Returns:
fluid.CUDAPlace or fluid.CPUPlace: Created GPU or CPU place.
"""
assert isinstance(device, six.string_types) and device.lower() in ['cpu', 'gpu'], \
"Expected device in ['cpu', 'gpu'], but got {}".format(device)
......@@ -117,9 +125,9 @@ class Loss(object):
def forward(self, outputs, labels):
raise NotImplementedError()
def __call__(self, outputs, labels):
def __call__(self, outputs, labels=None):
labels = to_list(labels)
if in_dygraph_mode():
if in_dygraph_mode() and labels:
labels = [to_variable(l) for l in labels]
losses = to_list(self.forward(to_list(outputs), labels))
if self.average:
......@@ -366,10 +374,27 @@ class StaticGraphAdapter(object):
metric_list, metric_splits = flatten_list(endpoints['metric'])
fetch_list = endpoints['loss'] + metric_list
num_loss = len(endpoints['loss'])
# if fetch Variable is same as input Variable, do not fetch
# from program, get it from input directly
pruned_fetch_list = []
pruned_fetch_idx_name_map = [""] * len(fetch_list)
for i, fetch_var in enumerate(fetch_list):
if fetch_var.name in feed.keys():
pruned_fetch_idx_name_map[i] = fetch_var.name
else:
pruned_fetch_list.append(fetch_var)
rets = self._executor.run(compiled_prog,
feed=feed,
fetch_list=fetch_list,
fetch_list=pruned_fetch_list,
return_numpy=False)
# restore pruned fetch_list Variable from feeds
for i, name in enumerate(pruned_fetch_idx_name_map):
if len(name) > 0:
rets.insert(i, feed[name])
# LoDTensor cannot be fetch as numpy directly
rets = [np.array(v) for v in rets]
if self.mode == 'test':
......@@ -867,8 +892,6 @@ class Model(fluid.dygraph.Layer):
if not isinstance(inputs, (list, dict, Input)):
raise TypeError(
"'inputs' must be list or dict in static graph mode")
if loss_function and not isinstance(labels, (list, Input)):
raise TypeError("'labels' must be list in static graph mode")
metrics = metrics or []
for metric in to_list(metrics):
......@@ -904,11 +927,11 @@ class Model(fluid.dygraph.Layer):
FIXME: add more comments and usage
Args:
train_data (Dataset|DataLoader): An iterable data loader is used for
train. An instance of paddle.fluid.io.Dataset or
paddle.fluid.io.Dataloader is recomended.
train. An instance of paddle paddle.io.Dataset or
paddle.io.Dataloader is recomended.
eval_data (Dataset|DataLoader): An iterable data loader is used for
evaluation at the end of epoch. If None, will not do evaluation.
An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader
An instance of paddle.io.Dataset or paddle.io.Dataloader
is recomended.
batch_size (int): Integer number. The batch size of train_data and eval_data.
When train_data and eval_data are both the instance of Dataloader, this
......@@ -1032,8 +1055,8 @@ class Model(fluid.dygraph.Layer):
FIXME: add more comments and usage
Args:
eval_data (Dataset|DataLoader): An iterable data loader is used for
evaluation. An instance of paddle.fluid.io.Dataset or
paddle.fluid.io.Dataloader is recomended.
evaluation. An instance of paddle.io.Dataset or
paddle.io.Dataloader is recomended.
batch_size (int): Integer number. The batch size of train_data and eval_data.
When train_data and eval_data are both the instance of Dataloader, this
parameter will be ignored.
......@@ -1098,12 +1121,16 @@ class Model(fluid.dygraph.Layer):
return eval_result
def predict(self, test_data, batch_size=1, num_workers=0):
def predict(self,
test_data,
batch_size=1,
num_workers=0,
stack_outputs=True):
"""
FIXME: add more comments and usage
Args:
test_data (Dataset|DataLoader): An iterable data loader is used for
predict. An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader
predict. An instance of paddle.io.Dataset or paddle.io.Dataloader
is recomended.
batch_size (int): Integer number. The batch size of train_data and eval_data.
When train_data and eval_data are both the instance of Dataloader, this
......@@ -1111,6 +1138,12 @@ class Model(fluid.dygraph.Layer):
num_workers (int): the number of subprocess to load data, 0 for no subprocess
used and loading data in main process. When train_data and eval_data are
both the instance of Dataloader, this parameter will be ignored.
stack_output (bool): whether stack output field like a batch, as for an output
filed of a sample is in shape [X, Y], test_data contains N samples, predict
output field will be in shape [N, X, Y] if stack_output is True, and will
be a length N list in shape [[X, Y], [X, Y], ....[X, Y]] if stack_outputs
is False. stack_outputs as False is used for LoDTensor output situation,
it is recommended set as True if outputs contains no LoDTensor. Default False
"""
if fluid.in_dygraph_mode():
......@@ -1137,19 +1170,16 @@ class Model(fluid.dygraph.Layer):
if not isinstance(test_loader, Iterable):
loader = test_loader()
outputs = None
outputs = []
for data in tqdm.tqdm(loader):
if not fluid.in_dygraph_mode():
data = data[0]
outs = self.test(*data)
data = flatten(data)
outputs.append(self.test(data[:len(self._inputs)]))
if outputs is None:
outputs = outs
else:
outputs = [
np.vstack([x, outs[i]]) for i, x in enumerate(outputs)
]
# NOTE: for lod tensor output, we should not stack outputs
# for stacking may loss its detail info
outputs = list(zip(*outputs))
if stack_outputs:
outputs = [np.stack(outs, axis=0) for outs in outputs]
self._test_dataloader = None
if test_loader is not None and self._adapter._nranks > 1 \
......@@ -1161,8 +1191,8 @@ class Model(fluid.dygraph.Layer):
"""
Args:
eval_data (Dataset|DataLoader|None): An iterable data loader is used for
eval. An instance of paddle.fluid.io.Dataset or
paddle.fluid.io.Dataloader is recomended.
eval. An instance of paddle.io.Dataset or
paddle.io.Dataloader is recomended.
"""
assert isinstance(
eval_data,
......
......@@ -25,7 +25,7 @@ from functools import partial
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.io import BatchSampler, DataLoader, Dataset
from paddle.io import BatchSampler, DataLoader, Dataset
from hapi.distributed import DistributedBatchSampler
from hapi.text.bert.data_processor import DataProcessor, XnliProcessor, ColaProcessor, MrpcProcessor, MnliProcessor
from hapi.text.bert.batching import prepare_batch_data
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import models
from . import transforms
__all__ = ["models", "transforms"]
......@@ -19,6 +19,7 @@ from . import mobilenetv2
from . import darknet
from . import yolov3
from . import tsm
from . import bmn
from .resnet import *
from .mobilenetv1 import *
......@@ -27,6 +28,7 @@ from .vgg import *
from .darknet import *
from .yolov3 import *
from .tsm import *
from .bmn import *
__all__ = resnet.__all__ \
+ vgg.__all__ \
......@@ -34,4 +36,5 @@ __all__ = resnet.__all__ \
+ mobilenetv2.__all__ \
+ darknet.__all__ \
+ yolov3.__all__ \
+ tsm.__all__
+ tsm.__all__ \
+ bmn.__all__
......@@ -17,12 +17,68 @@ from paddle.fluid import ParamAttr
import numpy as np
import math
from bmn_utils import get_interp1d_mask
from model import Model, Loss
from hapi.model import Model, Loss
__all__ = ["BMN", "BmnLoss"]
DATATYPE = 'float32'
def _get_interp1d_bin_mask(seg_xmin, seg_xmax, tscale, num_sample,
num_sample_perbin):
""" generate sample mask for a boundary-matching pair """
plen = float(seg_xmax - seg_xmin)
plen_sample = plen / (num_sample * num_sample_perbin - 1.0)
total_samples = [
seg_xmin + plen_sample * ii
for ii in range(num_sample * num_sample_perbin)
]
p_mask = []
for idx in range(num_sample):
bin_samples = total_samples[idx * num_sample_perbin:(idx + 1) *
num_sample_perbin]
bin_vector = np.zeros([tscale])
for sample in bin_samples:
sample_upper = math.ceil(sample)
sample_decimal, sample_down = math.modf(sample)
if int(sample_down) <= (tscale - 1) and int(sample_down) >= 0:
bin_vector[int(sample_down)] += 1 - sample_decimal
if int(sample_upper) <= (tscale - 1) and int(sample_upper) >= 0:
bin_vector[int(sample_upper)] += sample_decimal
bin_vector = 1.0 / num_sample_perbin * bin_vector
p_mask.append(bin_vector)
p_mask = np.stack(p_mask, axis=1)
return p_mask
def get_interp1d_mask(tscale, dscale, prop_boundary_ratio, num_sample,
num_sample_perbin):
""" generate sample mask for each point in Boundary-Matching Map """
mask_mat = []
for start_index in range(tscale):
mask_mat_vector = []
for duration_index in range(dscale):
if start_index + duration_index < tscale:
p_xmin = start_index
p_xmax = start_index + duration_index
center_len = float(p_xmax - p_xmin) + 1
sample_xmin = p_xmin - center_len * prop_boundary_ratio
sample_xmax = p_xmax + center_len * prop_boundary_ratio
p_mask = _get_interp1d_bin_mask(sample_xmin, sample_xmax,
tscale, num_sample,
num_sample_perbin)
else:
p_mask = np.zeros([tscale, num_sample])
mask_mat_vector.append(p_mask)
mask_mat_vector = np.stack(mask_mat_vector, axis=2)
mask_mat.append(mask_mat_vector)
mask_mat = np.stack(mask_mat, axis=3)
mask_mat = mask_mat.astype(np.float32)
sample_mask = np.reshape(mask_mat, [tscale, -1])
return sample_mask
# Net
class Conv1D(fluid.dygraph.Layer):
def __init__(self,
......
......@@ -18,10 +18,10 @@ from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, BatchNorm
from model import Model
from .download import get_weights_path
from hapi.model import Model
from hapi.download import get_weights_path
__all__ = ['DarkNet53', 'ConvBNLayer', 'darknet53']
__all__ = ['DarkNet', 'ConvBNLayer', 'darknet53']
# {num_layers: (url, md5)}
pretrain_infos = {
......@@ -136,9 +136,17 @@ class LayerWarp(fluid.dygraph.Layer):
DarkNet_cfg = {53: ([1, 2, 8, 8, 4])}
class DarkNet53(Model):
class DarkNet(Model):
"""DarkNet model from
`"YOLOv3: An Incremental Improvement" <https://arxiv.org/abs/1804.02767>`_
Args:
num_layers (int): layer number of DarkNet, only 53 supported currently, default: 53.
ch_in (int): channel number of input data, default 3.
"""
def __init__(self, num_layers=53, ch_in=3):
super(DarkNet53, self).__init__()
super(DarkNet, self).__init__()
assert num_layers in DarkNet_cfg.keys(), \
"only support num_layers in {} currently" \
.format(DarkNet_cfg.keys())
......@@ -188,7 +196,7 @@ class DarkNet53(Model):
def _darknet(num_layers=53, input_channels=3, pretrained=True):
model = DarkNet53(num_layers, input_channels)
model = DarkNet(num_layers, input_channels)
if pretrained:
assert num_layers in pretrain_infos.keys(), \
"DarkNet{} do not have pretrained weights now, " \
......@@ -201,4 +209,11 @@ def _darknet(num_layers=53, input_channels=3, pretrained=True):
def darknet53(input_channels=3, pretrained=True):
"""DarkNet 53-layer model
Args:
input_channels (bool): channel number of input data, default 3.
pretrained (bool): If True, returns a model pre-trained on ImageNet,
default True.
"""
return _darknet(53, input_channels, pretrained)
......@@ -19,8 +19,8 @@ from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from model import Model
from .download import get_weights_path
from hapi.model import Model
from hapi.download import get_weights_path
__all__ = ['MobileNetV1', 'mobilenet_v1']
......
......@@ -18,8 +18,8 @@ import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from model import Model
from .download import get_weights_path
from hapi.model import Model
from hapi.download import get_weights_path
__all__ = ['MobileNetV2', 'mobilenet_v2']
......
......@@ -22,8 +22,8 @@ from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.container import Sequential
from model import Model
from .download import get_weights_path
from hapi.model import Model
from hapi.download import get_weights_path
__all__ = [
'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'
......
......@@ -17,8 +17,8 @@ import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from model import Model
from .download import get_weights_path
from hapi.model import Model
from hapi.download import get_weights_path
__all__ = ["TSM_ResNet", "tsm_resnet50"]
......@@ -201,4 +201,12 @@ def _tsm_resnet(num_layers, seg_num=8, num_classes=400, pretrained=True):
def tsm_resnet50(seg_num=8, num_classes=400, pretrained=True):
"""TSM model with 50-layer ResNet as backbone
Args:
seg_num (int): segment number of each video sample. Default 8.
num_classes (int): video class number. Default 400.
pretrained (bool): If True, returns a model with pre-trained model
on COCO, default True
"""
return _tsm_resnet(50, seg_num, num_classes, pretrained)
......@@ -17,8 +17,8 @@ import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.container import Sequential
from model import Model
from .download import get_weights_path
from hapi.model import Model
from hapi.download import get_weights_path
__all__ = [
'VGG',
......
......@@ -20,9 +20,9 @@ from paddle.fluid.dygraph.nn import Conv2D
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay
from model import Model, Loss
from hapi.model import Model, Loss
from hapi.download import get_weights_path
from .darknet import darknet53, ConvBNLayer
from .download import get_weights_path
__all__ = ['YoloLoss', 'YOLOv3', 'yolov3_darknet53']
......@@ -88,6 +88,20 @@ class YoloDetectionBlock(fluid.dygraph.Layer):
class YOLOv3(Model):
"""YOLOv3 model from
`"YOLOv3: An Incremental Improvement" <https://arxiv.org/abs/1804.02767>`_
Args:
num_classes (int): class number, default 80.
model_mode (str): 'train', 'eval', 'test' mode, network structure
will be diffrent in the output layer and data, in 'train' mode,
no output layer append, in 'eval' and 'test', output feature
map will be decode to predictions by 'fluid.layers.yolo_box',
in 'eval' mode, return feature maps and predictions, in 'test'
mode, only return predictions. Default 'train'.
"""
def __init__(self, num_classes=80, model_mode='train'):
super(YOLOv3, self).__init__()
self.num_classes = num_classes
......@@ -245,4 +259,17 @@ def _yolov3_darknet(num_layers=53, num_classes=80,
def yolov3_darknet53(num_classes=80, model_mode='train', pretrained=True):
"""YOLOv3 model with 53-layer DarkNet as backbone
Args:
num_classes (int): class number, default 80.
model_mode (str): 'train', 'eval', 'test' mode, network structure
will be diffrent in the output layer and data, in 'train' mode,
no output layer append, in 'eval' and 'test', output feature
map will be decode to predictions by 'fluid.layers.yolo_box',
in 'eval' mode, return feature maps and predictions, in 'test'
mode, only return predictions. Default 'train'.
pretrained (bool): If True, returns a model with pre-trained model
on COCO, default True
"""
return _yolov3_darknet(53, num_classes, model_mode, pretrained)
......@@ -13,3 +13,5 @@
# limitations under the License.
from .transforms import *
from .functional import *
from .detection_transforms import *
......@@ -19,48 +19,18 @@ import cv2
import traceback
import numpy as np
import logging
logger = logging.getLogger(__name__)
__all__ = ['ColorDistort', 'RandomExpand', 'RandomCrop', 'RandomFlip',
'NormalizeBox', 'PadBox', 'RandomShape', 'NormalizeImage',
'BboxXYXY2XYWH', 'ResizeImage', 'Compose', 'BatchCompose']
class Compose(object):
def __init__(self, transforms=[]):
self.transforms = transforms
def __call__(self, *data):
for f in self.transforms:
try:
data = f(*data)
except Exception as e:
stack_info = traceback.format_exc()
logger.info("fail to perform transform [{}] with error: "
"{} and stack:\n{}".format(f, e, str(stack_info)))
raise e
return data
class BatchCompose(object):
def __init__(self, transforms=[]):
self.transforms = transforms
def __call__(self, data):
for f in self.transforms:
try:
data = f(data)
except Exception as e:
stack_info = traceback.format_exc()
logger.info("fail to perform batch transform [{}] with error: "
"{} and stack:\n{}".format(f, e, str(stack_info)))
raise e
# sample list to batch data
batch = list(zip(*data))
return batch
__all__ = [
'ColorDistort',
'RandomExpand',
'RandomCrop',
'RandomFlip',
'NormalizeBox',
'PadBox',
'RandomShape',
'NormalizeImage',
'BboxXYXY2XYWH',
'ResizeImage',
]
class ColorDistort(object):
......
......@@ -24,6 +24,7 @@ import numbers
import types
import collections
import warnings
import traceback
from . import functional as F
......@@ -34,6 +35,7 @@ else:
__all__ = [
"Compose",
"BatchCompose",
"Resize",
"RandomResizedCrop",
"CenterCropResize",
......@@ -62,10 +64,16 @@ class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, img):
for t in self.transforms:
img = t(img)
return img
def __call__(self, *data):
for f in self.transforms:
try:
data = f(*data)
except Exception as e:
stack_info = traceback.format_exc()
print("fail to perform transform [{}] with error: "
"{} and stack:\n{}".format(f, e, str(stack_info)))
raise e
return data
def __repr__(self):
format_string = self.__class__.__name__ + '('
......@@ -76,6 +84,33 @@ class Compose(object):
return format_string
class BatchCompose(object):
"""Composes several batch transforms together
Args:
transforms (list of ``Transform`` objects): list of transforms to compose.
these transforms perform on batch data.
"""
def __init__(self, transforms=[]):
self.transforms = transforms
def __call__(self, data):
for f in self.transforms:
try:
data = f(data)
except Exception as e:
stack_info = traceback.format_exc()
print("fail to perform batch transform [{}] with error: "
"{} and stack:\n{}".format(f, e, str(stack_info)))
raise e
# sample list to batch data
batch = list(zip(*data))
return batch
class Resize(object):
"""Resize the input PIL Image to the given size.
......@@ -94,7 +129,7 @@ class Resize(object):
self.size = size
self.interpolation = interpolation
def __call__(self, img):
def __call__(self, img, lbl):
"""
Args:
img (PIL Image): Image to be scaled.
......@@ -102,7 +137,7 @@ class Resize(object):
Returns:
PIL Image: Rescaled image.
"""
return F.resize(img, self.size, self.interpolation)
return F.resize(img, self.size, self.interpolation), lbl
class RandomResizedCrop(object):
......@@ -164,10 +199,10 @@ class RandomResizedCrop(object):
y = (height - h) // 2
return x, y, w, h
def __call__(self, img):
def __call__(self, img, lbl):
x, y, w, h = self._get_params(img)
cropped_img = img[y:y + h, x:x + w]
return F.resize(cropped_img, self.output_size, self.interpolation)
return F.resize(cropped_img, self.output_size, self.interpolation), lbl
class CenterCropResize(object):
......@@ -195,10 +230,10 @@ class CenterCropResize(object):
y = (w + 1 - c) // 2
return c, x, y
def __call__(self, img):
def __call__(self, img, lbl):
c, x, y = self._get_params(img)
cropped_img = img[x:x + c, y:y + c, :]
return F.resize(cropped_img, self.size, self.interpolation)
return F.resize(cropped_img, self.size, self.interpolation), lbl
class CenterCrop(object):
......@@ -222,10 +257,10 @@ class CenterCrop(object):
y = int(round((h - th) / 2.0))
return x, y
def __call__(self, img):
def __call__(self, img, lbl):
x, y = self._get_params(img)
th, tw = self.output_size
return img[y:y + th, x:x + tw]
return img[y:y + th, x:x + tw], lbl
class RandomHorizontalFlip(object):
......@@ -238,10 +273,10 @@ class RandomHorizontalFlip(object):
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, img):
def __call__(self, img, lbl):
if np.random.random() < self.prob:
return F.flip(img, code=1)
return img
return F.flip(img, code=1), lbl
return img, lbl
class RandomVerticalFlip(object):
......@@ -254,10 +289,10 @@ class RandomVerticalFlip(object):
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, img):
def __call__(self, img, lbl):
if np.random.random() < self.prob:
return F.flip(img, code=0)
return img
return F.flip(img, code=0), lbl
return img, lbl
class Normalize(object):
......@@ -282,8 +317,8 @@ class Normalize(object):
self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1)
self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1)
def __call__(self, img):
return (img - self.mean) / self.std
def __call__(self, img, lbl):
return (img - self.mean) / self.std, lbl
class Permute(object):
......@@ -302,10 +337,10 @@ class Permute(object):
], "Only support 'CHW' mode, but received mode: {}".format(mode)
self.mode = mode
def __call__(self, img):
def __call__(self, img, lbl):
if self.mode == "CHW":
return img.transpose((2, 0, 1))[::-1, ...]
return img
return img.transpose((2, 0, 1))[::-1, ...], lbl
return img, lbl
class GaussianNoise(object):
......@@ -321,11 +356,11 @@ class GaussianNoise(object):
self.mean = np.array(mean, dtype=np.float32)
self.std = np.array(std, dtype=np.float32)
def __call__(self, img):
def __call__(self, img, lbl):
dtype = img.dtype
noise = np.random.normal(self.mean, self.std, img.shape) * 255
img = img + noise.astype(np.float32)
return np.clip(img, 0, 255).astype(dtype)
return np.clip(img, 0, 255).astype(dtype), lbl
class BrightnessTransform(object):
......@@ -341,15 +376,15 @@ class BrightnessTransform(object):
raise ValueError("brightness value should be non-negative")
self.value = value
def __call__(self, img):
def __call__(self, img, lbl):
if self.value == 0:
return img
return img, lbl
dtype = img.dtype
img = img.astype(np.float32)
alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value)
img = img * alpha
return img.clip(0, 255).astype(dtype)
return img.clip(0, 255).astype(dtype), lbl
class ContrastTransform(object):
......@@ -365,16 +400,16 @@ class ContrastTransform(object):
raise ValueError("contrast value should be non-negative")
self.value = value
def __call__(self, img):
def __call__(self, img, lbl):
if self.value == 0:
return img
return img, lbl
dtype = img.dtype
img = img.astype(np.float32)
alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value)
img = img * alpha + cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).mean() * (
1 - alpha)
return img.clip(0, 255).astype(dtype)
return img.clip(0, 255).astype(dtype), lbl
class SaturationTransform(object):
......@@ -390,9 +425,9 @@ class SaturationTransform(object):
raise ValueError("saturation value should be non-negative")
self.value = value
def __call__(self, img):
def __call__(self, img, lbl):
if self.value == 0:
return img
return img, lbl
dtype = img.dtype
img = img.astype(np.float32)
......@@ -400,7 +435,7 @@ class SaturationTransform(object):
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray_img = gray_img[..., np.newaxis]
img = img * alpha + gray_img * (1 - alpha)
return img.clip(0, 255).astype(dtype)
return img.clip(0, 255).astype(dtype), lbl
class HueTransform(object):
......@@ -416,9 +451,9 @@ class HueTransform(object):
raise ValueError("hue value should be in [0.0, 0.5]")
self.value = value
def __call__(self, img):
def __call__(self, img, lbl):
if self.value == 0:
return img
return img, lbl
dtype = img.dtype
img = img.astype(np.uint8)
......@@ -431,7 +466,7 @@ class HueTransform(object):
with np.errstate(over="ignore"):
h += np.uint8(alpha * 255)
hsv_img = cv2.merge([h, s, v])
return cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR_FULL).astype(dtype)
return cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR_FULL).astype(dtype), lbl
class ColorJitter(object):
......@@ -466,5 +501,5 @@ class ColorJitter(object):
random.shuffle(transforms)
self.transforms = Compose(transforms)
def __call__(self, img):
return self.transforms(img)
def __call__(self, img, lbl):
return self.transforms(img, lbl)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import six
import abc
import numpy as np
import paddle.fluid as fluid
import logging
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)
__all__ = ['Metric', 'Accuracy']
@six.add_metaclass(abc.ABCMeta)
class Metric(object):
"""
Base class for metric, encapsulates metric logic and APIs
Usage:
m = SomeMetric()
for prediction, label in ...:
m.update(prediction, label)
m.accumulate()
"""
@abc.abstractmethod
def reset(self):
"""
Reset states and result
"""
raise NotImplementedError("function 'reset' not implemented in {}.".
format(self.__class__.__name__))
@abc.abstractmethod
def update(self, *args, **kwargs):
"""
Update states for metric
"""
raise NotImplementedError("function 'update' not implemented in {}.".
format(self.__class__.__name__))
@abc.abstractmethod
def accumulate(self):
"""
Accumulates statistics, computes and returns the metric value
"""
raise NotImplementedError(
"function 'accumulate' not implemented in {}.".format(
self.__class__.__name__))
@abc.abstractmethod
def name(self):
"""
Returns metric name
"""
raise NotImplementedError("function 'name' not implemented in {}.".
format(self.__class__.__name__))
def add_metric_op(self, pred, label):
"""
Add process op for metric in program
"""
return pred, label
class Accuracy(Metric):
"""
Encapsulates accuracy metric logic
"""
def __init__(self, topk=(1, ), name=None, *args, **kwargs):
super(Accuracy, self).__init__(*args, **kwargs)
self.topk = topk
self.maxk = max(topk)
self._init_name(name)
self.reset()
def add_metric_op(self, pred, label, *args, **kwargs):
pred = fluid.layers.argsort(pred[0], descending=True)[1][:, :self.maxk]
correct = pred == label[0]
return correct
def update(self, correct, *args, **kwargs):
accs = []
for i, k in enumerate(self.topk):
num_corrects = correct[:, :k].sum()
num_samples = len(correct)
accs.append(float(num_corrects) / num_samples)
self.total[i] += num_corrects
self.count[i] += num_samples
return accs
def reset(self):
self.total = [0.] * len(self.topk)
self.count = [0] * len(self.topk)
def accumulate(self):
res = []
for t, c in zip(self.total, self.count):
res.append(float(t) / c)
return res
def _init_name(self, name):
name = name or 'acc'
if self.maxk != 1:
self._name = ['{}_top{}'.format(name, k) for k in self.topk]
else:
self._name = ['acc']
def name(self):
return self._name
......@@ -24,7 +24,7 @@ import numpy as np
from paddle import fluid
from paddle.fluid.optimizer import Momentum
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from paddle.fluid.io import MNIST as MnistDataset
from vision.datasets import MNIST as MnistDataset
from model import Model, CrossEntropy, Input, set_device
from metrics import Accuracy
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import inspect
import os
import pickle
import numpy as np
import six
import warnings
import tqdm
from collections import Iterable
from paddle import fluid
from paddle.fluid.framework import in_dygraph_mode, Variable
from paddle.fluid.executor import global_scope
from paddle.fluid.io import is_belong_to_optimizer
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.layers.utils import flatten
from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy
from paddle.fluid.incubate.fleet.base import role_maker
from paddle.fluid.io import DataLoader, Dataset
from distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized
from metrics import Metric
from callbacks import config_callbacks
__all__ = ['Model', 'Loss', 'CrossEntropy', 'Input', 'set_device']
def set_device(device):
"""
Args:
device (str): specify device type, 'cpu' or 'gpu'.
Returns:
fluid.CUDAPlace or fluid.CPUPlace: Created GPU or CPU place.
"""
assert isinstance(device, six.string_types) and device.lower() in ['cpu', 'gpu'], \
"Expected device in ['cpu', 'gpu'], but got {}".format(device)
place = fluid.CUDAPlace(ParallelEnv().dev_id) \
if device.lower() == 'gpu' and fluid.is_compiled_with_cuda() \
else fluid.CPUPlace()
return place
def to_list(value):
if value is None:
return value
if isinstance(value, (list, tuple)):
return value
return [value]
def to_numpy(var):
assert isinstance(var, (Variable, fluid.core.VarBase)), "not a variable"
if isinstance(var, fluid.core.VarBase):
return var.numpy()
t = global_scope().find_var(var.name).get_tensor()
return np.array(t)
def flatten_list(l):
assert isinstance(l, list), "not a list"
outl = []
splits = []
for sl in l:
assert isinstance(sl, list), "sub content not a list"
splits.append(len(sl))
outl += sl
return outl, splits
def restore_flatten_list(l, splits):
outl = []
for split in splits:
assert len(l) >= split, "list length invalid"
sl, l = l[:split], l[split:]
outl.append(sl)
return outl
def extract_args(func):
if hasattr(inspect, 'getfullargspec'):
return inspect.getfullargspec(func)[0]
else:
return inspect.getargspec(func)[0]
class Input(fluid.dygraph.Layer):
def __init__(self, shape=None, dtype=None, name=None):
super(Input, self).__init__()
self.shape = shape
self.dtype = dtype
self.name = name
def forward(self):
return fluid.data(self.name, shape=self.shape, dtype=self.dtype)
class Loss(object):
def __init__(self, average=True):
super(Loss, self).__init__()
self.average = average
def forward(self, outputs, labels):
raise NotImplementedError()
def __call__(self, outputs, labels=None):
labels = to_list(labels)
if in_dygraph_mode() and labels:
labels = [to_variable(l) for l in labels]
losses = to_list(self.forward(to_list(outputs), labels))
if self.average:
losses = [fluid.layers.reduce_mean(l) for l in losses]
else:
losses = [fluid.layers.reduce_sum(l) for l in losses]
return losses
class CrossEntropy(Loss):
def __init__(self, average=True):
super(CrossEntropy, self).__init__()
def forward(self, outputs, labels):
return [
fluid.layers.cross_entropy(o, l) for o, l in zip(outputs, labels)
]
class StaticGraphAdapter(object):
def __init__(self, model):
super(StaticGraphAdapter, self).__init__()
self.model = model
# with `_build_once` gone, parameters are now created in `__init__`
# so we need to keep track of the parameters already created
self._startup_prog = fluid.default_startup_program()
self._orig_prog = fluid.default_main_program()
self._label_vars = {} # label variables
self._input_vars = {} # label variables
self._endpoints = {}
self._loss_endpoint = None
self._executor = None
self._progs = {}
self._compiled_progs = {}
self._merge_count = {
'eval_total': 0,
'test_total': 0,
'eval_batch': 0,
'test_batch': 0
}
self._nranks = ParallelEnv().nranks
self._local_rank = ParallelEnv().local_rank
@property
def mode(self):
return self.model.mode
@mode.setter
def mode(self, value):
self.model.mode = value
def train(self, inputs, labels=None):
assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first"
self.mode = 'train'
return self._run(inputs, labels)
def eval(self, inputs, labels=None):
self.mode = 'eval'
return self._run(inputs, labels)
def test(self, inputs):
self.mode = 'test'
return self._run(inputs, None)
def parameters(self, *args, **kwargs):
return super(Model, self.model).parameters(*args, **kwargs)
def save(self, path):
def _save(state, path):
if not state:
return
state = {
k: to_numpy(v) if isinstance(v, Variable) else v
for k, v in state.items()
}
with open(path, 'wb') as f:
pickle.dump(state, f)
base = os.path.basename(path)
assert base != "", "path should be of 'dirname/filename' format"
dir_name = os.path.dirname(path)
if dir_name and not os.path.exists(dir_name):
os.makedirs(dir_name)
param_path = path + ".pdparams"
_save(self.model.state_dict(), param_path)
prog = self._progs.get('train', None)
if prog is None or self.model._optimizer is None:
return
# XXX `optimizer.state_dict()` only work in dygraph mode
optim_path = path + ".pdopt"
optim = {
p.name: p
for p in filter(is_belong_to_optimizer, prog.list_vars())
}
if not optim:
return
_save(optim, optim_path)
def load(self, param_state_pairs, optim_state):
if self._executor is None:
executor = fluid.Executor(fluid.CPUPlace())._default_executor
else:
executor = self._executor._default_executor
# restore parameter states
fluid.core._create_loaded_parameter(
[param for param, state in param_state_pairs],
global_scope(), executor)
for param, state in param_state_pairs:
self._set_var(param, state)
# restore optimizer states
# FIXME what if a different optimizer is used?
if not self.model._optimizer or not optim_state:
return
self._load_optimizer(optim_state, executor)
def _load_optimizer(self, state, executor):
prog = self._progs.get('train', None)
optim = list(filter(is_belong_to_optimizer, prog.list_vars()))
if not optim:
return
fluid.core._create_loaded_parameter(optim, global_scope(), executor)
converted_state = dict(state)
for var in optim:
if var.name in ["@LR_DECAY_COUNTER@", "global_step"]:
# When using learning rate scheduler, dygraph would name the
# global step var as "global_step" to save, while static-graph
# would has a state var named as "@LR_DECAY_COUNTER@".
# NOTE: dygraph saved global_step is 1 larger than that in
# static-graph, since the time of global_step to increase is
# different.
state_val = (
np.array(converted_state.pop("global_step")) - 1
) if "global_step" in converted_state else converted_state.pop(
"@LR_DECAY_COUNTER@", None)
if state_val is not None:
converted_state[var.name] = state_val
elif var.name.startswith("learning_rate_"):
# When using static learning rate, static-graph would make it
# a persistable var named 'unique_name.generate("learning_rate")',
# However, dygraph wouldn't save it.
if var.name not in state:
continue
else:
# moment and other accumulators
if var.name not in converted_state:
# try to convert from dygraph name
opt_name = self.model._optimizer._name
opt_cls_name = self.model._optimizer.__class__.__name__
opt_unq_name = None
for name in self.model._optimizer._accumulators.keys():
accum_name = name if opt_name is None else name[len(
opt_name) + 1:]
for param_name, state_var in self.model._optimizer._accumulators[
name].items():
if opt_unq_name is None:
# can not infer out the exact unique(opt_name),
# thus try to extract rather than generate
for state_key in sorted(
state.keys(),
key=lambda x: len(x),
reverse=True):
prefix = param_name + "_" + (
opt_cls_name if opt_name is None else
opt_name) + "_"
if state_key.startswith(prefix):
prefix_offset = state_key[len(
prefix):].find("_") + len(prefix)
opt_unq_name = state_key[len(
param_name + "_"):prefix_offset]
# TODO: assert
# assert opt_unq_name is None
# gen(param.name + "_" + gen(opt_name) + "_" + accum_name)
# always end with "_0" since the unique optimizer._name
dy_state_name = (param_name + "_" + opt_unq_name +
"_" + accum_name + "_0")
converted_state[
state_var.name] = converted_state.pop(
dy_state_name)
assert var.name in converted_state, \
"variable [{}] is not in optimizer state file".format(var.name)
self._set_var(var, converted_state[var.name])
def _set_var(self, var, ndarray):
t = global_scope().find_var(var.name).get_tensor()
p = t._place()
if p.is_cpu_place():
place = fluid.CPUPlace()
elif p.is_cuda_pinned_place():
place = fluid.CUDAPinnedPlace()
else:
p = fluid.core.Place()
p.set_place(t._place())
place = fluid.CUDAPlace(p.gpu_device_id())
t.set(ndarray, place)
def _run(self, inputs, labels=None):
compiled_prog = self._compiled_progs.get(self.mode, None)
assert compiled_prog, \
"Model is not ready, please call `model.prepare()` first"
inputs = to_list(inputs)
if labels is not None:
labels = to_list(labels)
assert len(inputs) == len(self._input_vars[self.mode]), \
"number of inputs" \
+ " does not match number of arguments of `forward` method"
feed = {}
input_names = [v.name for v in self._input_vars[self.mode]]
for idx, n in enumerate(input_names):
# train and test may take different arguments
if inputs[idx] is not None:
feed[n] = inputs[idx]
if labels is not None:
for idx, v in enumerate(self._label_vars[self.mode]):
feed[v.name] = labels[idx]
endpoints = self._endpoints[self.mode]
if self.mode == 'test':
fetch_list = endpoints['output']
else:
metric_list, metric_splits = flatten_list(endpoints['metric'])
fetch_list = endpoints['loss'] + metric_list
num_loss = len(endpoints['loss'])
# if fetch Variable is same as input Variable, do not fetch
# from program, get it from input directly
pruned_fetch_list = []
pruned_fetch_idx_name_map = [""] * len(fetch_list)
for i, fetch_var in enumerate(fetch_list):
if fetch_var.name in feed.keys():
pruned_fetch_idx_name_map[i] = fetch_var.name
else:
pruned_fetch_list.append(fetch_var)
rets = self._executor.run(compiled_prog,
feed=feed,
fetch_list=pruned_fetch_list,
return_numpy=False)
# restore pruned fetch_list Variable from feeds
for i, name in enumerate(pruned_fetch_idx_name_map):
if len(name) > 0:
rets.insert(i, feed[name])
# LoDTensor cannot be fetch as numpy directly
rets = [np.array(v) for v in rets]
if self.mode == 'test':
return rets[:]
losses = rets[:num_loss]
metric_states = restore_flatten_list(rets[num_loss:], metric_splits)
metrics = []
for metric, state in zip(self.model._metrics, metric_states):
# cut off padding size
if self.mode != 'train' and self.model._test_dataloader is not None \
and isinstance(self.model._test_dataloader, DataLoader) \
and self._nranks > 1:
total_size = len(self.model._test_dataloader.dataset)
# TODO: fixme if have better way to get batch size
samples = state[0].shape[0]
current_count = self._merge_count.get(self.mode + '_total', 0)
if current_count + samples >= total_size:
state = [
s[:total_size - current_count, ...] for s in state
]
self._merge_count[self.mode + '_total'] = 0
self._merge_count[self.mode +
'_batch'] = total_size - current_count
else:
self._merge_count[self.mode + '_total'] += samples
self._merge_count[self.mode + '_batch'] = samples
metrics.append(metric.update(*state))
return (losses, metrics) if len(metrics) > 0 else losses
def prepare(self):
modes = ['train', 'eval', 'test']
for mode in modes:
self._make_program(mode)
self._compile_and_initialize(self._progs[mode], mode)
def _make_program(self, mode):
prog = self._progs.get(mode, None)
if prog is not None:
return
prog = self._orig_prog.clone()
# NOTE: When defining learning rate scheduling in static-graph, ops to
# increase the global step var and calculate learning rate would be
# prepended into _orig_prog. test program maked by `_orig_prog.clone`
# also would include these ops. Thus must prune these ops in test
# program, otherwise the global step would be changed in test.
if mode != 'train':
for op in list(prog.global_block().ops):
prog.global_block()._remove_op(0)
if mode == 'train' and self.model._optimizer \
and self.model._optimizer._learning_rate_map:
# HACK workaround learning rate map issue
lr_var = self.model._optimizer._learning_rate_map[self._orig_prog]
new_lr_var = prog.global_block().vars[lr_var.name]
self.model._optimizer._learning_rate_map[prog] = new_lr_var
losses = []
metrics = []
with fluid.program_guard(prog, self._startup_prog):
ins = self.model._inputs
lbls = self.model._labels if self.model._labels else []
inputs = [k.forward() for k in to_list(ins)]
labels = [k.forward() for k in to_list(lbls)]
self._label_vars[mode] = labels
outputs = to_list(self.model.forward(*inputs))
if mode != 'test' and self.model._loss_function:
losses = self.model._loss_function(outputs, labels)
if self._nranks > 1 and mode != 'train':
outputs = [_all_gather(o, self._nranks) for o in outputs]
if mode != 'test':
labels = [_all_gather(l, self._nranks) for l in labels]
if mode != 'test':
for metric in self.model._metrics:
metrics.append(
to_list(metric.add_metric_op(outputs, labels)))
if mode == 'train' and self.model._optimizer:
self._loss_endpoint = fluid.layers.sum(losses)
if self._nranks > 1:
role = role_maker.PaddleCloudRoleMaker(is_collective=True)
fleet.init(role)
dist_strategy = DistributedStrategy()
dist_strategy.mode = "collective"
dist_strategy.collective_mode = "grad_allreduce"
self.model._optimizer = fleet.distributed_optimizer(
self.model._optimizer, strategy=dist_strategy)
self.model._optimizer.minimize(self._loss_endpoint)
if mode != 'train': # clone again to put it in test mode
prog = prog.clone(for_test=True)
self._input_vars[mode] = inputs
self._progs[mode] = prog
self._endpoints[mode] = {
"output": outputs,
"loss": losses,
"metric": metrics
}
def _compile_and_initialize(self, prog, mode):
compiled_prog = self._compiled_progs.get(mode, None)
if compiled_prog is not None:
return compiled_prog
assert self.model._place is not None, \
"device is not set, please call `model.prepare()` first"
place = self.model._place
# XXX *ALL WEIGHTS* should be initialized upon model construction
# even if `forward()` may run different code path for different mode
# therefore startup program only needs to run once
if self._executor is None:
self._executor = fluid.Executor(place)
# XXX incremental initialization
uninitialized = []
for var_py in self._startup_prog.list_vars():
var = fluid.global_scope().find_var(var_py.name)
if not var_py.name.startswith('nccl_id') and var and \
var.get_tensor()._is_initialized():
continue
uninitialized.append(var_py)
if uninitialized:
startup_prog = self._startup_prog._prune(uninitialized)
self._executor.run(startup_prog)
if self._nranks < 2:
compiled_prog = fluid.CompiledProgram(prog)
else:
compiled_prog = prog
self._compiled_progs[mode] = compiled_prog
class DynamicGraphAdapter(object):
def __init__(self, model):
super(DynamicGraphAdapter, self).__init__()
self.model = model
self._nranks = ParallelEnv().nranks
self._local_rank = ParallelEnv().local_rank
self._merge_count = {
'eval_total': 0,
'test_total': 0,
'eval_batch': 0,
'test_batch': 0
}
if self._nranks > 1:
stradegy = fluid.dygraph.parallel.ParallelStrategy()
stradegy.nranks = ParallelEnv().nranks
stradegy.local_rank = ParallelEnv().local_rank
stradegy.trainer_endpoints = ParallelEnv().trainer_endpoints
stradegy.current_endpoint = ParallelEnv().current_endpoint
self.ddp_model = fluid.dygraph.parallel.DataParallel(self.model,
stradegy)
@property
def mode(self):
return self.model.mode
@mode.setter
def mode(self, value):
self.model.mode = value
# TODO multi device in dygraph mode not implemented at present time
def train(self, inputs, labels=None):
assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first"
super(Model, self.model).train()
self.mode = 'train'
inputs = to_list(inputs)
if labels is not None:
labels = [to_variable(l) for l in to_list(labels)]
if self._nranks > 1:
outputs = self.ddp_model.forward(*[to_variable(x) for x in inputs])
losses = self.model._loss_function(outputs, labels)
final_loss = fluid.layers.sum(losses)
final_loss = self.ddp_model.scale_loss(final_loss)
final_loss.backward()
self.ddp_model.apply_collective_grads()
else:
outputs = self.model.forward(*[to_variable(x) for x in inputs])
losses = self.model._loss_function(outputs, labels)
final_loss = fluid.layers.sum(losses)
final_loss.backward()
self.model._optimizer.minimize(final_loss)
self.model.clear_gradients()
metrics = []
for metric in self.model._metrics:
metric_outs = metric.add_metric_op(
to_list(outputs), to_list(labels))
m = metric.update(*[to_numpy(m) for m in to_list(metric_outs)])
metrics.append(m)
return ([to_numpy(l) for l in losses], metrics) \
if len(metrics) > 0 else [to_numpy(l) for l in losses]
def eval(self, inputs, labels=None):
super(Model, self.model).eval()
self.mode = 'eval'
inputs = to_list(inputs)
if labels is not None:
labels = [to_variable(l) for l in to_list(labels)]
outputs = self.model.forward(*[to_variable(x) for x in inputs])
if self.model._loss_function:
losses = self.model._loss_function(outputs, labels)
else:
losses = []
if self._nranks > 1:
outputs = [_all_gather(o, self._nranks) for o in to_list(outputs)]
labels = [_all_gather(l, self._nranks) for l in labels]
metrics = []
for metric in self.model._metrics:
# cut off padding value.
if self.model._test_dataloader is not None and self._nranks > 1 \
and isinstance(self.model._test_dataloader, DataLoader):
total_size = len(self.model._test_dataloader.dataset)
samples = outputs[0].shape[0]
current_count = self._merge_count.get(self.mode + '_total', 0)
if current_count + samples >= total_size:
outputs = [o[:total_size - current_count] for o in outputs]
labels = [l[:total_size - current_count] for l in labels]
self._merge_count[self.mode + '_total'] = 0
self._merge_count[self.mode +
'_batch'] = total_size - current_count
else:
self._merge_count[self.mode + '_total'] += samples
self._merge_count[self.mode + '_batch'] = samples
metric_outs = metric.add_metric_op(to_list(outputs), labels)
m = metric.update(*[to_numpy(m) for m in to_list(metric_outs)])
metrics.append(m)
# To be consistent with static graph
# return empty loss if loss_function is None
return ([to_numpy(l) for l in losses], metrics) \
if len(metrics) > 0 else [to_numpy(l) for l in losses]
def test(self, inputs):
super(Model, self.model).eval()
self.mode = 'test'
inputs = [to_variable(x) for x in to_list(inputs)]
outputs = self.model.forward(*inputs)
if self._nranks > 1 and isinstance(self.model._place, fluid.CUDAPlace):
outputs = [_all_gather(o, self._nranks) for o in to_list(outputs)]
return [to_numpy(o) for o in to_list(outputs)]
def parameters(self, *args, **kwargs):
return super(Model, self.model).parameters(*args, **kwargs)
def save(self, path):
params = self.model.state_dict()
fluid.save_dygraph(params, path)
if self.model._optimizer is None:
return
if self.model._optimizer.state_dict():
optim = self.model._optimizer.state_dict()
fluid.save_dygraph(optim, path)
def load(self, param_state_pairs, optim_state):
# restore parameter states
for param, state in param_state_pairs:
param.set_value(state)
# resotre optimizer states
if not self.model._optimizer or not optim_state:
return
# If optimizer performs set_dict when state vars haven't been created,
# which would happen when set_dict before minimize, the state would be
# stored in optimizer._accumulators_holder and loaded lazily.
# To contrive this when loading from static-graph saved states, extend
# state dict to include keys named accoring to dygraph naming rules.
# TODO: if len(self.model._optimizer._accumulators) > 0
converted_state = dict(optim_state)
opt_unq_name = self.model._optimizer._name
opt_cls_name = self.model._optimizer.__class__.__name__
opt_name = opt_unq_name[:opt_unq_name.rfind("_")] # remove suffix idx
param_names = [param.name for param in self.model.parameters()]
for var_name, state_var in sorted(
optim_state.items(), key=lambda x: len(x[0]), reverse=True):
if var_name in ["@LR_DECAY_COUNTER@", "global_step"]:
# NOTE: dygraph saved global_step is 1 larger than that in
# static-graph, since the time of global_step to increase is
# different.
if var_name == "@LR_DECAY_COUNTER@":
converted_state["global_step"] = np.array(
converted_state.pop("@LR_DECAY_COUNTER@")) + 1
else:
# moment and other accumulators
# extend state dict to include promising dygraph names
for param_name in param_names:
if var_name.startswith(param_name + "_" + opt_name):
# when init optimizer with name
accum_name = var_name[len(param_name + "_" + opt_name +
"_"):]
elif var_name.startswith(param_name +
"_") and opt_name == opt_cls_name:
# when init optimizer without name
accum_name = var_name[len(param_name + "_"):]
else:
continue
# remove suffix idx
accum_name = accum_name[:accum_name.rfind("_")]
# state names always end with "_0" in dygraph because of the
# unique optimizer._name
dy_state_name = (param_name + "_" + opt_unq_name + "_" +
accum_name + "_0")
converted_state[dy_state_name] = state_var
self.model._optimizer.set_dict(converted_state)
class Model(fluid.dygraph.Layer):
"""
FIXME: add more comments and usage
"""
def __init__(self):
super(Model, self).__init__(self.__class__.__name__)
self.mode = 'train'
self._inputs = None
self._labels = None
self._loss_function = None
self._loss_weights = None
self._optimizer = None
self._device = None
self._optimizer = None
self._test_dataloader = None
# init backend
if fluid.in_dygraph_mode():
self._adapter = DynamicGraphAdapter(self)
else:
self._adapter = StaticGraphAdapter(self)
def train(self, *args, **kwargs):
return self._adapter.train(*args, **kwargs)
def eval(self, *args, **kwargs):
return self._adapter.eval(*args, **kwargs)
def test(self, *args, **kwargs):
return self._adapter.test(*args, **kwargs)
def save(self, *args, **kwargs):
if ParallelEnv().local_rank == 0:
return self._adapter.save(*args, **kwargs)
def load(self, path, skip_mismatch=False, reset_optimizer=False):
"""
Load from files storing the model states and optimizer states. The file
for optimizer states is not necessary if no need to restore the optimizer.
NOTE: parameters are retrieved out from the file storing model states
accoring to their structured names.
For fine-tuning or transfer-learning models where some of the layers have
changed, keep parameters needed to restore have same structured names in
the pre-trained model and fine-tuning model.
Args:
path (str): The prefix of files storing the model states and
optimizer states. The files would be `path.pdparams` and
`path.pdopt` separately, and the latter is not necessary
when no need to restore.
skip_mismatch (bool): Whether to skip the loading of mismatch
parameter or raise an error when mismatch happens (not found
the parameter in file storing model states of or receives a
mismatch shape).
reset_optimizer (bool): If True, ignore the providing file storing
optimizer states and initialize optimizer states from scratch.
Otherwise, restore optimizer states from `path.pdopt` if
a optimizer has been set to the model. Default False.
"""
def _load_state_from_path(path):
if not os.path.exists(path):
return
with open(path, 'rb') as f:
return pickle.load(f) if six.PY2 else pickle.load(
f, encoding='latin1')
def _check_match(key, param):
state = param_state.get(key, None)
if state is None:
raise ValueError(
"{} is not found in the providing file.".format(key))
if list(state.shape) != list(param.shape):
raise ValueError(
"{} receives a shape {}, but the expected shape is {}.".
format(key, list(state.shape), list(param.shape)))
return param, state
param_state = _load_state_from_path(path + ".pdparams")
assert param_state, "Failed to load parameters, please check path."
matched_param_state = []
for key, param in self.state_dict().items():
try:
match_res = _check_match(key, param)
except ValueError as err:
if skip_mismatch:
warnings.warn(
("Skip loading for {}. ".format(key) + err.message))
# reset optimizer when mismatch happens
reset_optimizer = True
else:
raise err
matched_param_state.append(match_res)
optim_state = None if reset_optimizer else _load_state_from_path(
path + ".pdopt")
return self._adapter.load(matched_param_state, optim_state)
def parameters(self, *args, **kwargs):
return self._adapter.parameters(*args, **kwargs)
def prepare(self,
optimizer=None,
loss_function=None,
metrics=None,
inputs=None,
labels=None,
device=None):
"""
FIXME: add comments
Args:
optimizer (Optimizer|None): optimizer must be set in training
and should be a Optimizer instance. It can be None in eval
and test mode.
loss_function (Loss|None): loss function must be set in training
and should be a Loss instance. It can be None when there is
no loss.
metrics (Metric|list of Metric|None): if metrics is set, all
metric will be calculate and output in train/eval mode.
inputs (Input|list|dict|None): inputs, entry points of network,
could be a Input layer, or lits of Input layers,
or dict (name: Input), or None. For static graph,
inputs must be set. For dynamic graph, it could be None.
labels (Input|list|None): labels, entry points of network,
could be a Input layer or lits of Input layers, or None.
For static graph, if set loss_function in Model.prepare(), it
must be set. Otherwise, it could be None.
device (str|None): specify device type, 'CPU' or 'GPU'.
If None, automatically select device according to
installation package version.
"""
if isinstance(device, fluid.CUDAPlace) or \
(isinstance(device, six.string_types) and device.lower() == 'gpu') \
or (device is None and fluid.is_compiled_with_cuda()):
if isinstance(device, fluid.CUDAPlace):
self._place = device
else:
self._place = fluid.CUDAPlace(ParallelEnv().dev_id) \
if ParallelEnv().nranks > 1 else fluid.CUDAPlace(0)
global _parallel_context_initialized
if ParallelEnv().nranks > 1 and not _parallel_context_initialized:
if fluid.in_dygraph_mode():
fluid.disable_dygraph()
fluid.enable_dygraph(self._place)
fluid.dygraph.parallel.prepare_context()
else:
prepare_distributed_context(self._place)
_parallel_context_initialized = True
elif isinstance(device, fluid.CPUPlace):
self._place = device
elif (isinstance(device, six.string_types) and device.lower() == 'cpu') \
or (device is None):
self._place = fluid.CPUPlace()
else:
raise ValueError(
"Expected device in ('gpu', 'cpu', fluid.CUDAPlace, fluid.CPUPlace, None), \
but got {}".format(device))
self._optimizer = optimizer
if loss_function:
if not isinstance(loss_function, Loss):
raise TypeError(
"'loss_function' must be sub classes of 'Loss'")
self._loss_function = loss_function
if not in_dygraph_mode():
if not isinstance(inputs, (list, dict, Input)):
raise TypeError(
"'inputs' must be list or dict in static graph mode")
metrics = metrics or []
for metric in to_list(metrics):
assert isinstance(metric, Metric), \
"{} is not sub class of Metric".format(
metric.__class__.__name__)
self._metrics = to_list(metrics)
self._inputs = to_list(inputs) if not isinstance(inputs, dict) else [
inputs[n] for n in extract_args(self.forward) if n != 'self'
]
self._labels = to_list(labels)
if not in_dygraph_mode():
self._adapter.prepare()
def fit(
self,
train_data=None,
eval_data=None,
batch_size=1,
epochs=1,
eval_freq=1,
log_freq=10,
save_dir=None,
save_freq=1,
verbose=2,
drop_last=False,
shuffle=True,
num_workers=0,
callbacks=None, ):
"""
FIXME: add more comments and usage
Args:
train_data (Dataset|DataLoader): An iterable data loader is used for
train. An instance of paddle.fluid.io.Dataset or
paddle.fluid.io.Dataloader is recomended.
eval_data (Dataset|DataLoader): An iterable data loader is used for
evaluation at the end of epoch. If None, will not do evaluation.
An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader
is recomended.
batch_size (int): Integer number. The batch size of train_data and eval_data.
When train_data and eval_data are both the instance of Dataloader, this
parameter will be ignored.
epochs (int): Integer number. The number of epochs to train the model.
eval_freq (int): The frequency, in number of epochs, an evalutation
is performed.
log_freq (int): The frequency, in number of steps, the training logs
are printed.
save_dir(str|None): The directory to save checkpoint during training.
If None, will not save checkpoint.
save_freq (int): The frequency, in number of epochs, to save checkpoint.
verbose (int): The verbosity mode, should be 0, 1, or 2.
0 = silent, 1 = progress bar, 2 = one line per epoch.
drop_last (bool): whether drop the last incomplete batch of train_data
when dataset size is not divisible by the batch size. When train_data
is an instance of Dataloader, this parameter will be ignored.
shuffle (bool): whther to shuffle train_data. When train_data is an instance
of Dataloader, this parameter will be ignored.
num_workers (int): the number of subprocess to load data, 0 for no subprocess
used and loading data in main process. When train_data and eval_data are
both the instance of Dataloader, this parameter will be ignored.
callbacks (Callback|None): A list of `Callback` instances to apply
during training. If None, `ProgBarLogger` and `ModelCheckpoint`
are automatically inserted.
"""
assert train_data is not None, \
"train_data must be given!"
if fluid.in_dygraph_mode():
feed_list = None
else:
feed_list = [x.forward() for x in self._inputs + self._labels]
if isinstance(train_data, Dataset):
train_sampler = DistributedBatchSampler(
train_data,
batch_size=batch_size,
shuffle=shuffle,
drop_last=drop_last)
train_loader = DataLoader(
train_data,
batch_sampler=train_sampler,
places=self._place,
feed_list=feed_list,
num_workers=num_workers,
return_list=True)
else:
train_loader = train_data
if eval_data is not None and isinstance(eval_data, Dataset):
eval_sampler = DistributedBatchSampler(
eval_data, batch_size=batch_size)
eval_loader = DataLoader(
eval_data,
batch_sampler=eval_sampler,
places=self._place,
feed_list=feed_list,
num_workers=num_workers,
return_list=True)
elif eval_data is not None:
eval_loader = eval_data
else:
eval_loader = None
do_eval = eval_loader is not None
self._test_dataloader = eval_loader
metrics_name = self._metrics_name()
steps = len(train_loader) if hasattr(train_loader, '__len__') else None
cbks = config_callbacks(
callbacks,
model=self,
epochs=epochs,
steps=steps,
log_freq=log_freq,
save_freq=save_freq,
save_dir=save_dir,
verbose=verbose,
metrics=self._metrics_name(), )
cbks.on_begin('train')
for epoch in range(epochs):
# FIXME: adapt to DataLoader
loader = train_loader
if not isinstance(train_loader, Iterable):
loader = train_loader()
logs = self._run_one_epoch(
loader, cbks, 'train', metrics_name, epoch=epoch)
if do_eval and epoch % eval_freq == 0:
# FIXME: adapt to DataLoader
loader = eval_loader
if not isinstance(eval_loader, Iterable):
loader = eval_loader()
eval_steps = len(loader) if hasattr(loader,
'__len__') else None
cbks.on_begin('eval', {
'steps': eval_steps,
'metrics_name': metrics_name
})
logs = self._run_one_epoch(loader, cbks, 'eval', metrics_name)
cbks.on_end('eval', logs)
cbks.on_end('train', logs)
self._test_dataloader = None
def evaluate(
self,
eval_data,
batch_size=1,
log_freq=10,
verbose=2,
num_workers=0,
callbacks=None, ):
"""
FIXME: add more comments and usage
Args:
eval_data (Dataset|DataLoader): An iterable data loader is used for
evaluation. An instance of paddle.fluid.io.Dataset or
paddle.fluid.io.Dataloader is recomended.
batch_size (int): Integer number. The batch size of train_data and eval_data.
When train_data and eval_data are both the instance of Dataloader, this
parameter will be ignored.
log_freq (int): The frequency, in number of steps, the eval logs
are printed.
verbose (int): The verbosity mode, should be 0, 1, or 2.
0 = silent, 1 = progress bar, 2 = one line per epoch.
num_workers (int): The number of subprocess to load data, 0 for no subprocess
used and loading data in main process. When train_data and eval_data are
both the instance of Dataloader, this parameter will be ignored.
callbacks (Callback|None): A list of `Callback` instances to apply
during training. If None, `ProgBarLogger` and `ModelCheckpoint`
are automatically inserted.
"""
if fluid.in_dygraph_mode():
feed_list = None
else:
feed_list = [x.forward() for x in self._inputs + self._labels]
if eval_data is not None and isinstance(eval_data, Dataset):
eval_sampler = DistributedBatchSampler(
eval_data, batch_size=batch_size)
eval_loader = DataLoader(
eval_data,
batch_sampler=eval_sampler,
places=self._place,
feed_list=feed_list,
num_workers=num_workers,
return_list=True)
else:
eval_loader = eval_data
self._test_dataloader = eval_loader
metrics_name = self._metrics_name()
cbks = config_callbacks(
callbacks,
model=self,
log_freq=log_freq,
verbose=verbose,
metrics=self._metrics_name(), )
loader = eval_loader
if not isinstance(eval_loader, Iterable):
loader = eval_loader()
eval_steps = len(loader) if hasattr(loader, '__len__') else None
cbks.on_begin('eval',
{'steps': eval_steps,
'metrics_name': metrics_name})
logs = self._run_one_epoch(loader, cbks, 'eval', metrics_name)
cbks.on_end('eval', logs)
self._test_dataloader = None
eval_result = {}
for k in self._metrics_name():
eval_result[k] = logs[k]
return eval_result
def predict(self,
test_data,
batch_size=1,
num_workers=0,
stack_outputs=True):
"""
FIXME: add more comments and usage
Args:
test_data (Dataset|DataLoader): An iterable data loader is used for
predict. An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader
is recomended.
batch_size (int): Integer number. The batch size of train_data and eval_data.
When train_data and eval_data are both the instance of Dataloader, this
parameter will be ignored.
num_workers (int): the number of subprocess to load data, 0 for no subprocess
used and loading data in main process. When train_data and eval_data are
both the instance of Dataloader, this parameter will be ignored.
stack_output (bool): whether stack output field like a batch, as for an output
filed of a sample is in shape [X, Y], test_data contains N samples, predict
output field will be in shape [N, X, Y] if stack_output is True, and will
be a length N list in shape [[X, Y], [X, Y], ....[X, Y]] if stack_outputs
is False. stack_outputs as False is used for LoDTensor output situation,
it is recommended set as True if outputs contains no LoDTensor. Default False
"""
if fluid.in_dygraph_mode():
feed_list = None
else:
feed_list = [x.forward() for x in self._inputs + self._labels]
if test_data is not None and isinstance(test_data, Dataset):
test_sampler = DistributedBatchSampler(
test_data, batch_size=batch_size)
test_loader = DataLoader(
test_data,
batch_sampler=test_sampler,
places=self._place,
feed_list=feed_list,
num_workers=num_workers,
return_list=True)
else:
test_loader = test_data
self._test_dataloader = test_loader
loader = test_loader
if not isinstance(test_loader, Iterable):
loader = test_loader()
outputs = []
for data in tqdm.tqdm(loader):
data = flatten(data)
outputs.append(self.test(data[:len(self._inputs)]))
# NOTE: for lod tensor output, we should not stack outputs
# for stacking may loss its detail info
outputs = list(zip(*outputs))
if stack_outputs:
outputs = [np.stack(outs, axis=0) for outs in outputs]
self._test_dataloader = None
if test_loader is not None and self._adapter._nranks > 1 \
and isinstance(test_loader, DataLoader):
outputs = [o[:len(test_loader.dataset)] for o in outputs]
return outputs
def set_eval_data(self, eval_data):
"""
Args:
eval_data (Dataset|DataLoader|None): An iterable data loader is used for
eval. An instance of paddle.fluid.io.Dataset or
paddle.fluid.io.Dataloader is recomended.
"""
assert isinstance(
eval_data,
DataLoader), "eval_data must be a instance of Dataloader!"
self._test_dataloader = eval_data
def _run_one_epoch(self,
data_loader,
callbacks,
mode,
metrics_name,
epoch=None):
size = len(data_loader) if hasattr(data_loader, '__len__') else None
logs = {
'steps': size,
'metrics_name': metrics_name,
}
if mode == 'train':
assert epoch is not None, 'when mode is train, epoch must be given'
callbacks.on_epoch_begin(epoch)
for step, data in enumerate(data_loader):
# data might come from different types of data_loader and have
# different format, as following:
# 1. DataLoader in static graph:
# [[input1, input2, ..., label1, lable2, ...]]
# 2. DataLoader in dygraph
# [input1, input2, ..., label1, lable2, ...]
# 3. custumed iterator yield concated inputs and labels:
# [input1, input2, ..., label1, lable2, ...]
# 4. custumed iterator yield seperated inputs and labels:
# ([input1, input2, ...], [label1, lable2, ...])
# To handle all of these, flatten (nested) list to list.
data = flatten(data)
# LoDTensor.shape is callable, where LoDTensor comes from
# DataLoader in static graph
batch_size = data[0].shape()[0] if callable(data[
0].shape) else data[0].shape[0]
callbacks.on_batch_begin(mode, step, logs)
if mode == 'train':
outs = self.train(data[:len(self._inputs)],
data[len(self._inputs):])
else:
outs = self.eval(data[:len(self._inputs)],
data[len(self._inputs):])
# losses
loss = outs[0] if self._metrics else outs
metrics = [[l[0] for l in loss]]
# metrics
for metric in self._metrics:
res = metric.accumulate()
metrics.extend(to_list(res))
assert len(metrics_name) == len(metrics)
for k, v in zip(metrics_name, metrics):
logs[k] = v
logs['step'] = step
if mode == 'train' or self._adapter._merge_count.get(
mode + '_batch', 0) <= 0:
logs['batch_size'] = batch_size * ParallelEnv().nranks
else:
logs['batch_size'] = self._adapter._merge_count[mode +
'_batch']
callbacks.on_batch_end(mode, step, logs)
self._reset_metrics()
if mode == 'train':
assert epoch is not None, 'when mode is train, epoch must be given'
callbacks.on_epoch_end(epoch)
return logs
def _reset_metrics(self):
for metric in self._metrics:
metric.reset()
def _metrics_name(self):
metrics_name = ['loss']
for m in self._metrics:
metrics_name.extend(to_list(m.name()))
return metrics_name
import sys
import time
import numpy as np
class ProgressBar(object):
"""progress bar """
def __init__(self,
num=None,
width=30,
verbose=1,
start=True,
file=sys.stdout):
self._num = num
if isinstance(num, int) and num <= 0:
raise TypeError('num should be None or integer (> 0)')
max_width = self._get_max_width()
self._width = width if width <= max_width else max_width
self._total_width = 0
self._verbose = verbose
self.file = file
self._values = {}
self._values_order = []
if start:
self._start = time.time()
self._last_update = 0
self._dynamic_display = (
(hasattr(self.file, 'isatty') and
self.file.isatty()) or 'ipykernel' in sys.modules or
'posix' in sys.modules or 'PYCHARM_HOSTED' in os.environ)
def _get_max_width(self):
if sys.version_info > (3, 3):
from shutil import get_terminal_size
else:
from backports.shutil_get_terminal_size import get_terminal_size
terminal_width, _ = get_terminal_size()
max_width = min(int(terminal_width * 0.6), terminal_width - 50)
return max_width
def start(self):
self.file.flush()
self._start = time.time()
def update(self, current_num, values=None):
now = time.time()
if current_num:
time_per_unit = (now - self._start) / current_num
else:
time_per_unit = 0
if time_per_unit >= 1 or time_per_unit == 0:
fps = ' - %.0fs/%s' % (time_per_unit, 'step')
elif time_per_unit >= 1e-3:
fps = ' - %.0fms/%s' % (time_per_unit * 1e3, 'step')
else:
fps = ' - %.0fus/%s' % (time_per_unit * 1e6, 'step')
info = ''
if self._verbose == 1:
prev_total_width = self._total_width
if self._dynamic_display:
sys.stdout.write('\b' * prev_total_width)
sys.stdout.write('\r')
else:
sys.stdout.write('\n')
if self._num is not None:
numdigits = int(np.log10(self._num)) + 1
bar_chars = ('step %' + str(numdigits) + 'd/%d [') % (
current_num, self._num)
prog = float(current_num) / self._num
prog_width = int(self._width * prog)
if prog_width > 0:
bar_chars += ('=' * (prog_width - 1))
if current_num < self._num:
bar_chars += '>'
else:
bar_chars += '='
bar_chars += ('.' * (self._width - prog_width))
bar_chars += ']'
else:
bar_chars = 'step %3d' % current_num
self._total_width = len(bar_chars)
sys.stdout.write(bar_chars)
for k, val in values:
info += ' - %s:' % k
val = val if isinstance(val, list) else [val]
for i, v in enumerate(val):
if isinstance(v, (float, np.float32, np.float64)):
if abs(v) > 1e-3:
info += ' %.4f' % v
else:
info += ' %.4e' % v
else:
info += ' %s' % v
if self._num is not None and current_num < self._num:
eta = time_per_unit * (self._num - current_num)
if eta > 3600:
eta_format = '%d:%02d:%02d' % (eta // 3600, (eta % 3600) //
60, eta % 60)
elif eta > 60:
eta_format = '%d:%02d' % (eta // 60, eta % 60)
else:
eta_format = '%ds' % eta
info += ' - ETA: %s' % eta_format
info += fps
self._total_width += len(info)
if prev_total_width > self._total_width:
info += (' ' * (prev_total_width - self._total_width))
# newline for another epoch
if self._num is not None and current_num >= self._num:
info += '\n'
if self._num is None:
info += '\n'
sys.stdout.write(info)
sys.stdout.flush()
self._last_update = now
elif self._verbose == 2:
if self._num:
numdigits = int(np.log10(self._num)) + 1
count = ('step %' + str(numdigits) + 'd/%d') % (current_num,
self._num)
else:
count = 'step %3d' % current_num
info = count + info
for k, val in values:
info += ' - %s:' % k
val = val if isinstance(val, list) else [val]
for v in val:
if isinstance(v, (float, np.float32, np.float64)):
if abs(v) > 1e-3:
info += ' %.4f' % v
else:
info += ' %.4e' % v
elif isinstance(v, np.ndarray) and \
v.size == 1 and \
isinstance(v.dtype, (np.float32, np.float64)):
if abs(v[0]) > 1e-3:
info += ' %.4f' % v[0]
else:
info += ' %.4e' % v[0]
else:
info += ' %s' % v
info += fps
info += '\n'
sys.stdout.write(info)
sys.stdout.flush()
......@@ -168,13 +168,13 @@ def create_lexnet_data_generator(args, reader, file_name, place, mode="train"):
def create_dataloader(generator, place, feed_list=None):
if not feed_list:
data_loader = fluid.io.DataLoader.from_generator(
data_loader = paddle.io.DataLoader.from_generator(
capacity=50,
use_double_buffer=True,
iterable=True,
return_list=True)
else:
data_loader = fluid.io.DataLoader.from_generator(
data_loader = paddle.io.DataLoader.from_generator(
feed_list=feed_list,
capacity=50,
use_double_buffer=True,
......
......@@ -31,15 +31,26 @@ setuptools.setup(
description="A Paddle High-level API that supports both static and dynamic execution modes (still under development)",
url="https://github.com/PaddlePaddle/hapi",
packages=[
'hapi', 'hapi.text', 'hapi.text.tokenizer', 'hapi.text.bert',
'hapi.text.bert.utils'
'hapi',
'hapi.datasets',
'hapi.text',
'hapi.text.tokenizer',
'hapi.text.bert',
'hapi.text.bert.utils',
'hapi.vision',
'hapi.vision.models',
'hapi.vision.transforms',
],
package_dir={
'hapi': './hapi',
'hapi.datasets': './hapi/datasets',
'hapi.text': './hapi/text',
'hapi.text.tokenizer': './hapi/text/tokenizer',
'hapi.text.bert': './hapi/text/bert',
'hapi.text.bert.utils': './hapi/text/bert/utils',
'hapi.vision': './hapi/vision',
'hapi.vision.models': './hapi/vision/models',
'hapi.vision.transforms': './hapi/vision/transforms',
},
platforms="any",
license='Apache 2.0',
......
......@@ -18,7 +18,7 @@ import unittest
import time
import random
from callbacks import config_callbacks
from hapi.callbacks import config_callbacks
class TestCallbacks(unittest.TestCase):
......
......@@ -14,14 +14,16 @@
# when test, you should add hapi root path to the PYTHONPATH,
# export PYTHONPATH=PATH_TO_HAPI:$PYTHONPATH
import unittest
import numpy as np
from datasets.folder import DatasetFolder
from hapi.datasets import *
class TestFolderDatasets(unittest.TestCase):
def test_dataset(self):
dataset_folder = DatasetFolder('test_data')
dataset_folder = DatasetFolder('tests/test_data')
for _ in dataset_folder:
pass
......@@ -30,5 +32,71 @@ class TestFolderDatasets(unittest.TestCase):
assert len(dataset_folder.classes) == 2
class TestMNISTTest(unittest.TestCase):
def test_main(self):
mnist = MNIST(mode='test')
self.assertTrue(len(mnist) == 10000)
for i in range(len(mnist)):
image, label = mnist[i]
self.assertTrue(image.shape[0] == 784)
self.assertTrue(label.shape[0] == 1)
self.assertTrue(0 <= int(label) <= 9)
class TestMNISTTrain(unittest.TestCase):
def test_main(self):
mnist = MNIST(mode='train')
self.assertTrue(len(mnist) == 60000)
for i in range(len(mnist)):
image, label = mnist[i]
self.assertTrue(image.shape[0] == 784)
self.assertTrue(label.shape[0] == 1)
self.assertTrue(0 <= int(label) <= 9)
class TestFlowersTrain(unittest.TestCase):
def test_main(self):
flowers = Flowers(mode='train')
self.assertTrue(len(flowers) == 6149)
# traversal whole dataset may cost a
# long time, randomly check 1 sample
idx = np.random.randint(0, 6149)
image, label = flowers[idx]
self.assertTrue(len(image.shape) == 3)
self.assertTrue(image.shape[2] == 3)
self.assertTrue(label.shape[0] == 1)
class TestFlowersValid(unittest.TestCase):
def test_main(self):
flowers = Flowers(mode='valid')
self.assertTrue(len(flowers) == 1020)
# traversal whole dataset may cost a
# long time, randomly check 1 sample
idx = np.random.randint(0, 1020)
image, label = flowers[idx]
self.assertTrue(len(image.shape) == 3)
self.assertTrue(image.shape[2] == 3)
self.assertTrue(label.shape[0] == 1)
class TestFlowersTest(unittest.TestCase):
def test_main(self):
flowers = Flowers(mode='test')
self.assertTrue(len(flowers) == 1020)
# traversal whole dataset may cost a
# long time, randomly check 1 sample
idx = np.random.randint(0, 1020)
image, label = flowers[idx]
self.assertTrue(len(image.shape) == 3)
self.assertTrue(image.shape[2] == 3)
self.assertTrue(label.shape[0] == 1)
if __name__ == '__main__':
unittest.main()
......@@ -28,11 +28,12 @@ import contextlib
import paddle
from paddle import fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from model import Model, CrossEntropy, Input, Loss, set_device
from metrics import Accuracy
from callbacks import ProgBarLogger
from paddle.fluid.io import BatchSampler, DataLoader
from paddle.fluid.io import MNIST as MnistDataset
from paddle.io import BatchSampler, DataLoader
from hapi.model import Model, CrossEntropy, Input, Loss, set_device
from hapi.metrics import Accuracy
from hapi.callbacks import ProgBarLogger
from hapi.datasets import MNIST as MnistDataset
class SimpleImgConvPool(fluid.dygraph.Layer):
......
......@@ -18,7 +18,7 @@ import unittest
import random
import time
from progressbar import ProgressBar
from hapi.progressbar import ProgressBar
class TestProgressBar(unittest.TestCase):
......
......@@ -16,13 +16,13 @@
# export PYTHONPATH=PATH_TO_HAPI:$PYTHONPATH
import unittest
from datasets.folder import DatasetFolder
from transform import transforms
from hapi.datasets import DatasetFolder
import hapi.vision.transforms as transforms
class TestTransforms(unittest.TestCase):
def do_transform(self, trans):
dataset_folder = DatasetFolder('test_data', transform=trans)
dataset_folder = DatasetFolder('tests/test_data', transform=trans)
for _ in dataset_folder:
pass
......
import collections
import copy
import six
import sys
from functools import partial, reduce
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers.utils as utils
from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as
from paddle.fluid.dygraph import to_variable, Embedding, Linear, LayerNorm, GRUUnit
from paddle.fluid.data_feeder import convert_dtype
from paddle.fluid import layers
from paddle.fluid.dygraph import Layer
from paddle.fluid.layers import BeamSearchDecoder
__all__ = [
'RNNCell', 'BasicLSTMCell', 'BasicGRUCell', 'RNN', 'DynamicDecode',
'BeamSearchDecoder', 'MultiHeadAttention', 'FFN',
'TransformerEncoderLayer', 'TransformerEncoder', 'TransformerDecoderLayer',
'TransformerDecoder', 'TransformerBeamSearchDecoder', 'DynamicGRU', 'BiGRU',
'Linear_chain_crf', 'Crf_decoding', 'SequenceTagging']
class RNNCell(Layer):
def get_initial_states(self,
batch_ref,
shape=None,
dtype=None,
init_value=0,
batch_dim_idx=0):
"""
Generate initialized states according to provided shape, data type and
value.
Parameters:
batch_ref: A (possibly nested structure of) tensor variable[s].
The first dimension of the tensor will be used as batch size to
initialize states.
shape: A (possiblely nested structure of) shape[s], where a shape is
represented as a list/tuple of integer). -1(for batch size) will
beautomatically inserted if shape is not started with it. If None,
property `state_shape` will be used. The default value is None.
dtype: A (possiblely nested structure of) data type[s]. The structure
must be same as that of `shape`, except when all tensors' in states
has the same data type, a single data type can be used. If None and
property `cell.state_shape` is not available, float32 will be used
as the data type. The default value is None.
init_value: A float value used to initialize states.
Returns:
Variable: tensor variable[s] packed in the same structure provided \
by shape, representing the initialized states.
"""
# TODO: use inputs and batch_size
batch_ref = flatten(batch_ref)[0]
def _is_shape_sequence(seq):
if sys.version_info < (3, ):
integer_types = (
int,
long, )
else:
integer_types = (int, )
"""For shape, list/tuple of integer is the finest-grained objection"""
if (isinstance(seq, list) or isinstance(seq, tuple)):
if reduce(
lambda flag, x: isinstance(x, integer_types) and flag,
seq, True):
return False
# TODO: Add check for the illegal
if isinstance(seq, dict):
return True
return (isinstance(seq, collections.Sequence) and
not isinstance(seq, six.string_types))
class Shape(object):
def __init__(self, shape):
self.shape = shape if shape[0] == -1 else ([-1] + list(shape))
# nested structure of shapes
states_shapes = self.state_shape if shape is None else shape
is_sequence_ori = utils.is_sequence
utils.is_sequence = _is_shape_sequence
states_shapes = map_structure(lambda shape: Shape(shape),
states_shapes)
utils.is_sequence = is_sequence_ori
# nested structure of dtypes
try:
states_dtypes = self.state_dtype if dtype is None else dtype
except NotImplementedError: # use fp32 as default
states_dtypes = "float32"
if len(flatten(states_dtypes)) == 1:
dtype = flatten(states_dtypes)[0]
states_dtypes = map_structure(lambda shape: dtype, states_shapes)
init_states = map_structure(
lambda shape, dtype: fluid.layers.fill_constant_batch_size_like(
input=batch_ref,
shape=shape.shape,
dtype=dtype,
value=init_value,
input_dim_idx=batch_dim_idx), states_shapes, states_dtypes)
return init_states
@property
def state_shape(self):
"""
Abstract method (property).
Used to initialize states.
A (possiblely nested structure of) shape[s], where a shape is represented
as a list/tuple of integers (-1 for batch size would be automatically
inserted into a shape if shape is not started with it).
Not necessary to be implemented if states are not initialized by
`get_initial_states` or the `shape` argument is provided when using
`get_initial_states`.
"""
raise NotImplementedError(
"Please add implementaion for `state_shape` in the used cell.")
@property
def state_dtype(self):
"""
Abstract method (property).
Used to initialize states.
A (possiblely nested structure of) data types[s]. The structure must be
same as that of `shape`, except when all tensors' in states has the same
data type, a signle data type can be used.
Not necessary to be implemented if states are not initialized
by `get_initial_states` or the `dtype` argument is provided when using
`get_initial_states`.
"""
raise NotImplementedError(
"Please add implementaion for `state_dtype` in the used cell.")
class BasicLSTMCell(RNNCell):
"""
****
BasicLSTMUnit class, Using basic operator to build LSTM
The algorithm can be described as the code below.
.. math::
i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i)
f_t &= \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + b_f + forget_bias )
o_t &= \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + b_o)
\\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + b_c)
c_t &= f_t \odot c_{t-1} + i_t \odot \\tilde{c_t}
h_t &= o_t \odot tanh(c_t)
- $W$ terms denote weight matrices (e.g. $W_{ix}$ is the matrix
of weights from the input gate to the input)
- The b terms denote bias vectors ($bx_i$ and $bh_i$ are the input gate bias vector).
- sigmoid is the logistic sigmoid function.
- $i, f, o$ and $c$ are the input gate, forget gate, output gate,
and cell activation vectors, respectively, all of which have the same size as
the cell output activation vector $h$.
- The :math:`\odot` is the element-wise product of the vectors.
- :math:`tanh` is the activation functions.
- :math:`\\tilde{c_t}` is also called candidate hidden state,
which is computed based on the current input and the previous hidden state.
Args:
name_scope(string) : The name scope used to identify parameter and bias name
hidden_size (integer): The hidden size used in the Unit.
param_attr(ParamAttr|None): The parameter attribute for the learnable
weight matrix. Note:
If it is set to None or one attribute of ParamAttr, lstm_unit will
create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|None): The parameter attribute for the bias
of LSTM unit.
If it is set to None or one attribute of ParamAttr, lstm_unit will
create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized as zero. Default: None.
gate_activation (function|None): The activation function for gates (actGate).
Default: 'fluid.layers.sigmoid'
activation (function|None): The activation function for cells (actNode).
Default: 'fluid.layers.tanh'
forget_bias(float|1.0): forget bias used when computing forget gate
dtype(string): data type used in this unit
"""
def __init__(self,
input_size,
hidden_size,
param_attr=None,
bias_attr=None,
gate_activation=None,
activation=None,
forget_bias=1.0,
dtype='float32'):
super(BasicLSTMCell, self).__init__()
self._hidden_size = hidden_size
self._param_attr = param_attr
self._bias_attr = bias_attr
self._gate_activation = gate_activation or layers.sigmoid
self._activation = activation or layers.tanh
self._forget_bias = layers.fill_constant(
[1], dtype=dtype, value=forget_bias)
self._forget_bias.stop_gradient = False
self._dtype = dtype
self._input_size = input_size
self._weight = self.create_parameter(
attr=self._param_attr,
shape=[
self._input_size + self._hidden_size, 4 * self._hidden_size
],
dtype=self._dtype)
self._bias = self.create_parameter(
attr=self._bias_attr,
shape=[4 * self._hidden_size],
dtype=self._dtype,
is_bias=True)
def forward(self, input, state):
pre_hidden, pre_cell = state
concat_input_hidden = layers.concat([input, pre_hidden], 1)
gate_input = layers.matmul(x=concat_input_hidden, y=self._weight)
gate_input = layers.elementwise_add(gate_input, self._bias)
i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1)
new_cell = layers.elementwise_add(
layers.elementwise_mul(
pre_cell,
layers.sigmoid(layers.elementwise_add(f, self._forget_bias))),
layers.elementwise_mul(layers.sigmoid(i), layers.tanh(j)))
new_hidden = layers.tanh(new_cell) * layers.sigmoid(o)
return new_hidden, [new_hidden, new_cell]
@property
def state_shape(self):
return [[self._hidden_size], [self._hidden_size]]
class BasicGRUCell(RNNCell):
"""
****
BasicGRUUnit class, using basic operators to build GRU
The algorithm can be described as the equations below.
.. math::
u_t & = actGate(W_ux xu_{t} + W_uh h_{t-1} + b_u)
r_t & = actGate(W_rx xr_{t} + W_rh h_{t-1} + b_r)
m_t & = actNode(W_cx xm_t + W_ch dot(r_t, h_{t-1}) + b_m)
h_t & = dot(u_t, h_{t-1}) + dot((1-u_t), m_t)
Args:
hidden_size (integer): The hidden size used in the Unit.
param_attr(ParamAttr|None): The parameter attribute for the learnable
weight matrix. Note:
If it is set to None or one attribute of ParamAttr, gru_unit will
create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|None): The parameter attribute for the bias
of GRU unit.
If it is set to None or one attribute of ParamAttr, gru_unit will
create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
gate_activation (function|None): The activation function for gates (actGate).
Default: 'fluid.layers.sigmoid'
activation (function|None): The activation function for cell (actNode).
Default: 'fluid.layers.tanh'
dtype(string): data type used in this unit
"""
def __init__(self,
input_size,
hidden_size,
param_attr=None,
bias_attr=None,
gate_activation=None,
activation=None,
dtype='float32'):
super(BasicGRUCell, self).__init__()
self._input_size = input_size
self._hiden_size = hidden_size
self._param_attr = param_attr
self._bias_attr = bias_attr
self._gate_activation = gate_activation or layers.sigmoid
self._activation = activation or layers.tanh
self._dtype = dtype
if self._param_attr is not None and self._param_attr.name is not None:
gate_param_attr = copy.deepcopy(self._param_attr)
candidate_param_attr = copy.deepcopy(self._param_attr)
gate_param_attr.name += "_gate"
candidate_param_attr.name += "_candidate"
else:
gate_param_attr = self._param_attr
candidate_param_attr = self._param_attr
self._gate_weight = self.create_parameter(
attr=gate_param_attr,
shape=[self._input_size + self._hiden_size, 2 * self._hiden_size],
dtype=self._dtype)
self._candidate_weight = self.create_parameter(
attr=candidate_param_attr,
shape=[self._input_size + self._hiden_size, self._hiden_size],
dtype=self._dtype)
if self._bias_attr is not None and self._bias_attr.name is not None:
gate_bias_attr = copy.deepcopy(self._bias_attr)
candidate_bias_attr = copy.deepcopy(self._bias_attr)
gate_bias_attr.name += "_gate"
candidate_bias_attr.name += "_candidate"
else:
gate_bias_attr = self._bias_attr
candidate_bias_attr = self._bias_attr
self._gate_bias = self.create_parameter(
attr=gate_bias_attr,
shape=[2 * self._hiden_size],
dtype=self._dtype,
is_bias=True)
self._candidate_bias = self.create_parameter(
attr=candidate_bias_attr,
shape=[self._hiden_size],
dtype=self._dtype,
is_bias=True)
def forward(self, input, state):
pre_hidden = state
concat_input_hidden = layers.concat([input, pre_hidden], axis=1)
gate_input = layers.matmul(x=concat_input_hidden, y=self._gate_weight)
gate_input = layers.elementwise_add(gate_input, self._gate_bias)
gate_input = self._gate_activation(gate_input)
r, u = layers.split(gate_input, num_or_sections=2, dim=1)
r_hidden = r * pre_hidden
candidate = layers.matmul(
layers.concat([input, r_hidden], 1), self._candidate_weight)
candidate = layers.elementwise_add(candidate, self._candidate_bias)
c = self._activation(candidate)
new_hidden = u * pre_hidden + (1 - u) * c
return new_hidden
@property
def state_shape(self):
return [self._hidden_size]
class RNN(fluid.dygraph.Layer):
def __init__(self, cell, is_reverse=False, time_major=False):
super(RNN, self).__init__()
self.cell = cell
if not hasattr(self.cell, "call"):
self.cell.call = self.cell.forward
self.is_reverse = is_reverse
self.time_major = time_major
self.batch_index, self.time_step_index = (1, 0) if time_major else (0,
1)
def forward(self,
inputs,
initial_states=None,
sequence_length=None,
**kwargs):
if fluid.in_dygraph_mode():
class ArrayWrapper(object):
def __init__(self, x):
self.array = [x]
def append(self, x):
self.array.append(x)
return self
def _maybe_copy(state, new_state, step_mask):
# TODO: use where_op
new_state = fluid.layers.elementwise_mul(
new_state, step_mask,
axis=0) - fluid.layers.elementwise_mul(
state, (step_mask - 1), axis=0)
return new_state
flat_inputs = flatten(inputs)
batch_size, time_steps = (
flat_inputs[0].shape[self.batch_index],
flat_inputs[0].shape[self.time_step_index])
if initial_states is None:
initial_states = self.cell.get_initial_states(
batch_ref=inputs, batch_dim_idx=self.batch_index)
if not self.time_major:
inputs = map_structure(
lambda x: fluid.layers.transpose(x, [1, 0] + list(
range(2, len(x.shape)))), inputs)
if sequence_length:
mask = fluid.layers.sequence_mask(
sequence_length,
maxlen=time_steps,
dtype=flatten(initial_states)[0].dtype)
mask = fluid.layers.transpose(mask, [1, 0])
if self.is_reverse:
inputs = map_structure(
lambda x: fluid.layers.reverse(x, axis=[0]), inputs)
mask = fluid.layers.reverse(
mask, axis=[0]) if sequence_length else None
states = initial_states
outputs = []
for i in range(time_steps):
step_inputs = map_structure(lambda x: x[i], inputs)
step_outputs, new_states = self.cell(step_inputs, states,
**kwargs)
if sequence_length:
new_states = map_structure(
partial(
_maybe_copy, step_mask=mask[i]),
states,
new_states)
states = new_states
outputs = map_structure(
lambda x: ArrayWrapper(x),
step_outputs) if i == 0 else map_structure(
lambda x, x_array: x_array.append(x), step_outputs,
outputs)
final_outputs = map_structure(
lambda x: fluid.layers.stack(x.array,
axis=self.time_step_index),
outputs)
if self.is_reverse:
final_outputs = map_structure(
lambda x: fluid.layers.reverse(x,
axis=self.time_step_index),
final_outputs)
final_states = new_states
else:
final_outputs, final_states = fluid.layers.rnn(
self.cell,
inputs,
initial_states=initial_states,
sequence_length=sequence_length,
time_major=self.time_major,
is_reverse=self.is_reverse,
**kwargs)
return final_outputs, final_states
class DynamicDecode(Layer):
def __init__(self,
decoder,
max_step_num=None,
output_time_major=False,
impute_finished=False,
is_test=False,
return_length=False):
super(DynamicDecode, self).__init__()
self.decoder = decoder
self.max_step_num = max_step_num
self.output_time_major = output_time_major
self.impute_finished = impute_finished
self.is_test = is_test
self.return_length = return_length
def forward(self, inits=None, **kwargs):
if fluid.in_dygraph_mode():
class ArrayWrapper(object):
def __init__(self, x):
self.array = [x]
def append(self, x):
self.array.append(x)
return self
def __getitem__(self, item):
return self.array.__getitem__(item)
def _maybe_copy(state, new_state, step_mask):
# TODO: use where_op
state_dtype = state.dtype
if convert_dtype(state_dtype) in ["bool"]:
state = layers.cast(state, dtype="float32")
new_state = layers.cast(new_state, dtype="float32")
if step_mask.dtype != state.dtype:
step_mask = layers.cast(step_mask, dtype=state.dtype)
# otherwise, renamed bool gradients of would be summed up leading
# to sum(bool) error.
step_mask.stop_gradient = True
new_state = layers.elementwise_mul(
state, step_mask, axis=0) - layers.elementwise_mul(
new_state, (step_mask - 1), axis=0)
if convert_dtype(state_dtype) in ["bool"]:
new_state = layers.cast(new_state, dtype=state_dtype)
return new_state
initial_inputs, initial_states, initial_finished = self.decoder.initialize(
inits)
inputs, states, finished = (initial_inputs, initial_states,
initial_finished)
cond = layers.logical_not((layers.reduce_all(initial_finished)))
sequence_lengths = layers.cast(
layers.zeros_like(initial_finished), "int64")
outputs = None
step_idx = 0
step_idx_tensor = layers.fill_constant(
shape=[1], dtype="int64", value=step_idx)
while cond.numpy():
(step_outputs, next_states, next_inputs,
next_finished) = self.decoder.step(step_idx_tensor, inputs,
states, **kwargs)
if not self.decoder.tracks_own_finished:
# BeamSearchDecoder would track it own finished, since
# beams would be reordered and the finished status of each
# entry might change. Otherwise, perform logical OR which
# would not change the already finished.
next_finished = layers.logical_or(next_finished, finished)
# To confirm states.finished/finished be consistent with
# next_finished.
layers.assign(next_finished, finished)
next_sequence_lengths = layers.elementwise_add(
sequence_lengths,
layers.cast(
layers.logical_not(finished), sequence_lengths.dtype))
if self.impute_finished: # rectify the states for the finished.
next_states = map_structure(
lambda x, y: _maybe_copy(x, y, finished), states,
next_states)
outputs = map_structure(
lambda x: ArrayWrapper(x),
step_outputs) if step_idx == 0 else map_structure(
lambda x, x_array: x_array.append(x), step_outputs,
outputs)
inputs, states, finished, sequence_lengths = (
next_inputs, next_states, next_finished,
next_sequence_lengths)
layers.increment(x=step_idx_tensor, value=1.0, in_place=True)
step_idx += 1
layers.logical_not(layers.reduce_all(finished), cond)
if self.max_step_num is not None and step_idx > self.max_step_num:
break
final_outputs = map_structure(
lambda x: fluid.layers.stack(x.array, axis=0), outputs)
final_states = states
try:
final_outputs, final_states = self.decoder.finalize(
final_outputs, final_states, sequence_lengths)
except NotImplementedError:
pass
if not self.output_time_major:
final_outputs = map_structure(
lambda x: layers.transpose(x, [1, 0] + list(
range(2, len(x.shape)))), final_outputs)
return (final_outputs, final_states,
sequence_lengths) if self.return_length else (
final_outputs, final_states)
else:
return fluid.layers.dynamic_decode(
self.decoder,
inits,
max_step_num=self.max_step_num,
output_time_major=self.output_time_major,
impute_finished=self.impute_finished,
is_test=self.is_test,
return_length=self.return_length,
**kwargs)
class TransfomerCell(object):
"""
Let inputs=(trg_word, trg_pos), states=cache to make Transformer can be
used as RNNCell
"""
def __init__(self, decoder):
self.decoder = decoder
def __call__(self, inputs, states, trg_src_attn_bias, enc_output,
static_caches):
trg_word, trg_pos = inputs
for cache, static_cache in zip(states, static_caches):
cache.update(static_cache)
logits = self.decoder(trg_word, trg_pos, None, trg_src_attn_bias,
enc_output, states)
new_states = [{"k": cache["k"], "v": cache["v"]} for cache in states]
return logits, new_states
class TransformerBeamSearchDecoder(layers.BeamSearchDecoder):
def __init__(self, cell, start_token, end_token, beam_size,
var_dim_in_state):
super(TransformerBeamSearchDecoder,
self).__init__(cell, start_token, end_token, beam_size)
self.cell = cell
self.var_dim_in_state = var_dim_in_state
def _merge_batch_beams_with_var_dim(self, x):
# init length of cache is 0, and it increases with decoding carrying on,
# thus need to reshape elaborately
var_dim_in_state = self.var_dim_in_state + 1 # count in beam dim
x = layers.transpose(x,
list(range(var_dim_in_state, len(x.shape))) +
list(range(0, var_dim_in_state)))
x = layers.reshape(
x, [0] * (len(x.shape) - var_dim_in_state
) + [self.batch_size * self.beam_size] +
[int(size) for size in x.shape[-var_dim_in_state + 2:]])
x = layers.transpose(
x,
list(range((len(x.shape) + 1 - var_dim_in_state), len(x.shape))) +
list(range(0, (len(x.shape) + 1 - var_dim_in_state))))
return x
def _split_batch_beams_with_var_dim(self, x):
var_dim_size = layers.shape(x)[self.var_dim_in_state]
x = layers.reshape(
x, [-1, self.beam_size] +
[int(size)
for size in x.shape[1:self.var_dim_in_state]] + [var_dim_size] +
[int(size) for size in x.shape[self.var_dim_in_state + 1:]])
return x
def step(self, time, inputs, states, **kwargs):
# compared to RNN, Transformer has 3D data at every decoding step
inputs = layers.reshape(inputs, [-1, 1]) # token
pos = layers.ones_like(inputs) * time # pos
cell_states = map_structure(self._merge_batch_beams_with_var_dim,
states.cell_states)
cell_outputs, next_cell_states = self.cell((inputs, pos), cell_states,
**kwargs)
cell_outputs = map_structure(self._split_batch_beams, cell_outputs)
next_cell_states = map_structure(self._split_batch_beams_with_var_dim,
next_cell_states)
beam_search_output, beam_search_state = self._beam_search_step(
time=time,
logits=cell_outputs,
next_cell_states=next_cell_states,
beam_state=states)
next_inputs, finished = (beam_search_output.predicted_ids,
beam_search_state.finished)
return (beam_search_output, beam_search_state, next_inputs, finished)
### Transformer Modules ###
class PrePostProcessLayer(Layer):
"""
PrePostProcessLayer
"""
def __init__(self, process_cmd, d_model, dropout_rate):
super(PrePostProcessLayer, self).__init__()
self.process_cmd = process_cmd
self.functors = []
for cmd in self.process_cmd:
if cmd == "a": # add residual connection
self.functors.append(lambda x, y: x + y if y else x)
elif cmd == "n": # add layer normalization
self.functors.append(
self.add_sublayer(
"layer_norm_%d" % len(
self.sublayers(include_sublayers=False)),
LayerNorm(
normalized_shape=d_model,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.)),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.)))))
elif cmd == "d": # add dropout
self.functors.append(lambda x: layers.dropout(
x, dropout_prob=dropout_rate, is_test=False)
if dropout_rate else x)
def forward(self, x, residual=None):
for i, cmd in enumerate(self.process_cmd):
if cmd == "a":
x = self.functors[i](x, residual)
else:
x = self.functors[i](x)
return x
class MultiHeadAttention(Layer):
"""
Multi-Head Attention
"""
def __init__(self, d_key, d_value, d_model, n_head=1, dropout_rate=0.):
super(MultiHeadAttention, self).__init__()
self.n_head = n_head
self.d_key = d_key
self.d_value = d_value
self.d_model = d_model
self.dropout_rate = dropout_rate
self.q_fc = Linear(
input_dim=d_model, output_dim=d_key * n_head, bias_attr=False)
self.k_fc = Linear(
input_dim=d_model, output_dim=d_key * n_head, bias_attr=False)
self.v_fc = Linear(
input_dim=d_model, output_dim=d_value * n_head, bias_attr=False)
self.proj_fc = Linear(
input_dim=d_value * n_head, output_dim=d_model, bias_attr=False)
def _prepare_qkv(self, queries, keys, values, cache=None):
if keys is None: # self-attention
keys, values = queries, queries
static_kv = False
else: # cross-attention
static_kv = True
q = self.q_fc(queries)
q = layers.reshape(x=q, shape=[0, 0, self.n_head, self.d_key])
q = layers.transpose(x=q, perm=[0, 2, 1, 3])
if cache is not None and static_kv and "static_k" in cache:
# for encoder-decoder attention in inference and has cached
k = cache["static_k"]
v = cache["static_v"]
else:
k = self.k_fc(keys)
v = self.v_fc(values)
k = layers.reshape(x=k, shape=[0, 0, self.n_head, self.d_key])
k = layers.transpose(x=k, perm=[0, 2, 1, 3])
v = layers.reshape(x=v, shape=[0, 0, self.n_head, self.d_value])
v = layers.transpose(x=v, perm=[0, 2, 1, 3])
if cache is not None:
if static_kv and not "static_k" in cache:
# for encoder-decoder attention in inference and has not cached
cache["static_k"], cache["static_v"] = k, v
elif not static_kv:
# for decoder self-attention in inference
cache_k, cache_v = cache["k"], cache["v"]
k = layers.concat([cache_k, k], axis=2)
v = layers.concat([cache_v, v], axis=2)
cache["k"], cache["v"] = k, v
return q, k, v
def forward(self, queries, keys, values, attn_bias, cache=None):
# compute q ,k ,v
q, k, v = self._prepare_qkv(queries, keys, values, cache)
# scale dot product attention
product = layers.matmul(
x=q, y=k, transpose_y=True, alpha=self.d_model**-0.5)
if attn_bias:
product += attn_bias
weights = layers.softmax(product)
if self.dropout_rate:
weights = layers.dropout(
weights, dropout_prob=self.dropout_rate, is_test=False)
out = layers.matmul(weights, v)
# combine heads
out = layers.transpose(out, perm=[0, 2, 1, 3])
out = layers.reshape(x=out, shape=[0, 0, out.shape[2] * out.shape[3]])
# project to output
out = self.proj_fc(out)
return out
def cal_kv(self, keys, values):
k = self.k_fc(keys)
v = self.v_fc(values)
k = layers.reshape(x=k, shape=[0, 0, self.n_head, self.d_key])
k = layers.transpose(x=k, perm=[0, 2, 1, 3])
v = layers.reshape(x=v, shape=[0, 0, self.n_head, self.d_value])
v = layers.transpose(x=v, perm=[0, 2, 1, 3])
return k, v
class FFN(Layer):
"""
Feed-Forward Network
"""
def __init__(self, d_inner_hid, d_model, dropout_rate):
super(FFN, self).__init__()
self.dropout_rate = dropout_rate
self.fc1 = Linear(
input_dim=d_model, output_dim=d_inner_hid, act="relu")
self.fc2 = Linear(input_dim=d_inner_hid, output_dim=d_model)
def forward(self, x):
hidden = self.fc1(x)
if self.dropout_rate:
hidden = layers.dropout(
hidden, dropout_prob=self.dropout_rate, is_test=False)
out = self.fc2(hidden)
return out
class TransformerEncoderLayer(Layer):
"""
EncoderLayer
"""
def __init__(self,
n_head,
d_key,
d_value,
d_model,
d_inner_hid,
prepostprocess_dropout,
attention_dropout,
relu_dropout,
preprocess_cmd="n",
postprocess_cmd="da"):
super(TransformerEncoderLayer, self).__init__()
self.preprocesser1 = PrePostProcessLayer(preprocess_cmd, d_model,
prepostprocess_dropout)
self.self_attn = MultiHeadAttention(d_key, d_value, d_model, n_head,
attention_dropout)
self.postprocesser1 = PrePostProcessLayer(postprocess_cmd, d_model,
prepostprocess_dropout)
self.preprocesser2 = PrePostProcessLayer(preprocess_cmd, d_model,
prepostprocess_dropout)
self.ffn = FFN(d_inner_hid, d_model, relu_dropout)
self.postprocesser2 = PrePostProcessLayer(postprocess_cmd, d_model,
prepostprocess_dropout)
def forward(self, enc_input, attn_bias):
attn_output = self.self_attn(
self.preprocesser1(enc_input), None, None, attn_bias)
attn_output = self.postprocesser1(attn_output, enc_input)
ffn_output = self.ffn(self.preprocesser2(attn_output))
ffn_output = self.postprocesser2(ffn_output, attn_output)
return ffn_output
class TransformerEncoder(Layer):
"""
encoder
"""
def __init__(self,
n_layer,
n_head,
d_key,
d_value,
d_model,
d_inner_hid,
prepostprocess_dropout,
attention_dropout,
relu_dropout,
preprocess_cmd="n",
postprocess_cmd="da"):
super(TransformerEncoder, self).__init__()
self.encoder_layers = list()
for i in range(n_layer):
self.encoder_layers.append(
self.add_sublayer(
"layer_%d" % i,
TransformerEncoderLayer(
n_head, d_key, d_value, d_model, d_inner_hid,
prepostprocess_dropout, attention_dropout,
relu_dropout, preprocess_cmd, postprocess_cmd)))
self.processer = PrePostProcessLayer(preprocess_cmd, d_model,
prepostprocess_dropout)
def forward(self, enc_input, attn_bias):
for encoder_layer in self.encoder_layers:
enc_output = encoder_layer(enc_input, attn_bias)
enc_input = enc_output
return self.processer(enc_output)
class TransformerDecoderLayer(Layer):
"""
decoder
"""
def __init__(self,
n_head,
d_key,
d_value,
d_model,
d_inner_hid,
prepostprocess_dropout,
attention_dropout,
relu_dropout,
preprocess_cmd="n",
postprocess_cmd="da"):
super(TransformerDecoderLayer, self).__init__()
self.preprocesser1 = PrePostProcessLayer(preprocess_cmd, d_model,
prepostprocess_dropout)
self.self_attn = MultiHeadAttention(d_key, d_value, d_model, n_head,
attention_dropout)
self.postprocesser1 = PrePostProcessLayer(postprocess_cmd, d_model,
prepostprocess_dropout)
self.preprocesser2 = PrePostProcessLayer(preprocess_cmd, d_model,
prepostprocess_dropout)
self.cross_attn = MultiHeadAttention(d_key, d_value, d_model, n_head,
attention_dropout)
self.postprocesser2 = PrePostProcessLayer(postprocess_cmd, d_model,
prepostprocess_dropout)
self.preprocesser3 = PrePostProcessLayer(preprocess_cmd, d_model,
prepostprocess_dropout)
self.ffn = FFN(d_inner_hid, d_model, relu_dropout)
self.postprocesser3 = PrePostProcessLayer(postprocess_cmd, d_model,
prepostprocess_dropout)
def forward(self,
dec_input,
enc_output,
self_attn_bias,
cross_attn_bias,
cache=None):
self_attn_output = self.self_attn(
self.preprocesser1(dec_input), None, None, self_attn_bias, cache)
self_attn_output = self.postprocesser1(self_attn_output, dec_input)
cross_attn_output = self.cross_attn(
self.preprocesser2(self_attn_output), enc_output, enc_output,
cross_attn_bias, cache)
cross_attn_output = self.postprocesser2(cross_attn_output,
self_attn_output)
ffn_output = self.ffn(self.preprocesser3(cross_attn_output))
ffn_output = self.postprocesser3(ffn_output, cross_attn_output)
return ffn_output
class TransformerDecoder(Layer):
"""
decoder
"""
def __init__(self, n_layer, n_head, d_key, d_value, d_model, d_inner_hid,
prepostprocess_dropout, attention_dropout, relu_dropout,
preprocess_cmd, postprocess_cmd):
super(TransformerDecoder, self).__init__()
self.decoder_layers = list()
for i in range(n_layer):
self.decoder_layers.append(
self.add_sublayer(
"layer_%d" % i,
TransformerDecoderLayer(
n_head, d_key, d_value, d_model, d_inner_hid,
prepostprocess_dropout, attention_dropout,
relu_dropout, preprocess_cmd, postprocess_cmd)))
self.processer = PrePostProcessLayer(preprocess_cmd, d_model,
prepostprocess_dropout)
def forward(self,
dec_input,
enc_output,
self_attn_bias,
cross_attn_bias,
caches=None):
for i, decoder_layer in enumerate(self.decoder_layers):
dec_output = decoder_layer(dec_input, enc_output, self_attn_bias,
cross_attn_bias, None
if caches is None else caches[i])
dec_input = dec_output
return self.processer(dec_output)
def prepare_static_cache(self, enc_output):
return [
dict(
zip(("static_k", "static_v"),
decoder_layer.cross_attn.cal_kv(enc_output, enc_output)))
for decoder_layer in self.decoder_layers
]
class DynamicGRU(fluid.dygraph.Layer):
def __init__(self,
size,
h_0=None,
param_attr=None,
bias_attr=None,
is_reverse=False,
gate_activation='sigmoid',
candidate_activation='tanh',
origin_mode=False,
init_size=None):
super(DynamicGRU, self).__init__()
self.gru_unit = GRUUnit(
size * 3,
param_attr=param_attr,
bias_attr=bias_attr,
activation=candidate_activation,
gate_activation=gate_activation,
origin_mode=origin_mode)
self.size = size
self.h_0 = h_0
self.is_reverse = is_reverse
def forward(self, inputs):
hidden = self.h_0
res = []
for i in range(inputs.shape[1]):
if self.is_reverse:
i = inputs.shape[1] - 1 - i
input_ = inputs[:, i:i + 1, :]
input_ = fluid.layers.reshape(
input_, [-1, input_.shape[2]], inplace=False)
hidden, reset, gate = self.gru_unit(input_, hidden)
hidden_ = fluid.layers.reshape(
hidden, [-1, 1, hidden.shape[1]], inplace=False)
res.append(hidden_)
if self.is_reverse:
res = res[::-1]
res = fluid.layers.concat(res, axis=1)
return res
class BiGRU(fluid.dygraph.Layer):
def __init__(self, input_dim, grnn_hidden_dim, init_bound, h_0=None):
super(BiGRU, self).__init__()
self.pre_gru = Linear(
input_dim=input_dim,
output_dim=grnn_hidden_dim * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.gru = DynamicGRU(
size=grnn_hidden_dim,
h_0=h_0,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.pre_gru_r = Linear(
input_dim=input_dim,
output_dim=grnn_hidden_dim * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.gru_r = DynamicGRU(
size=grnn_hidden_dim,
is_reverse=True,
h_0=h_0,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
def forward(self, input_feature):
res_pre_gru = self.pre_gru(input_feature)
res_gru = self.gru(res_pre_gru)
res_pre_gru_r = self.pre_gru_r(input_feature)
res_gru_r = self.gru_r(res_pre_gru_r)
bi_merge = fluid.layers.concat(input=[res_gru, res_gru_r], axis=-1)
return bi_merge
class Linear_chain_crf(fluid.dygraph.Layer):
def __init__(self, param_attr, size=None, is_test=False, dtype='float32'):
super(Linear_chain_crf, self).__init__()
self._param_attr = param_attr
self._dtype = dtype
self._size = size
self._is_test = is_test
self._transition = self.create_parameter(
attr=self._param_attr,
shape=[self._size + 2, self._size],
dtype=self._dtype)
@property
def weight(self):
return self._transition
@weight.setter
def weight(self, value):
self._transition = value
def forward(self, input, label, length=None):
alpha = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
emission_exps = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
transition_exps = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
log_likelihood = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
this_inputs = {
"Emission": [input],
"Transition": self._transition,
"Label": [label]
}
if length:
this_inputs['Length'] = [length]
self._helper.append_op(
type='linear_chain_crf',
inputs=this_inputs,
outputs={
"Alpha": [alpha],
"EmissionExps": [emission_exps],
"TransitionExps": transition_exps,
"LogLikelihood": log_likelihood
},
attrs={"is_test": self._is_test, })
return log_likelihood
class Crf_decoding(fluid.dygraph.Layer):
def __init__(self, param_attr, size=None, is_test=False, dtype='float32'):
super(Crf_decoding, self).__init__()
self._dtype = dtype
self._size = size
self._is_test = is_test
self._param_attr = param_attr
self._transition = self.create_parameter(
attr=self._param_attr,
shape=[self._size + 2, self._size],
dtype=self._dtype)
@property
def weight(self):
return self._transition
@weight.setter
def weight(self, value):
self._transition = value
def forward(self, input, label=None, length=None):
viterbi_path = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
this_inputs = {
"Emission": [input],
"Transition": self._transition,
"Label": label
}
if length:
this_inputs['Length'] = [length]
self._helper.append_op(
type='crf_decoding',
inputs=this_inputs,
outputs={"ViterbiPath": [viterbi_path]},
attrs={"is_test": self._is_test, })
return viterbi_path
class SequenceTagging(fluid.dygraph.Layer):
def __init__(self,
vocab_size,
num_labels,
batch_size,
word_emb_dim=128,
grnn_hidden_dim=128,
emb_learning_rate=0.1,
crf_learning_rate=0.1,
bigru_num=2,
init_bound=0.1,
length=None):
super(SequenceTagging, self).__init__()
"""
define the sequence tagging network structure
word: stores the input of the model
for_infer: a boolean value, indicating if the model to be created is for training or predicting.
return:
for infer: return the prediction
otherwise: return the prediction
"""
self.word_emb_dim = word_emb_dim
self.vocab_size = vocab_size
self.num_labels = num_labels
self.grnn_hidden_dim = grnn_hidden_dim
self.emb_lr = emb_learning_rate
self.crf_lr = crf_learning_rate
self.bigru_num = bigru_num
self.batch_size = batch_size
self.init_bound = 0.1
self.word_embedding = Embedding(
size=[self.vocab_size, self.word_emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(
learning_rate=self.emb_lr,
name="word_emb",
initializer=fluid.initializer.Uniform(
low=-self.init_bound, high=self.init_bound)))
h_0 = fluid.layers.create_global_var(
shape=[self.batch_size, self.grnn_hidden_dim],
value=0.0,
dtype='float32',
persistable=True,
force_cpu=True,
name='h_0')
self.bigru_units = []
for i in range(self.bigru_num):
if i == 0:
self.bigru_units.append(
self.add_sublayer(
"bigru_units%d" % i,
BiGRU(
self.grnn_hidden_dim,
self.grnn_hidden_dim,
self.init_bound,
h_0=h_0)))
else:
self.bigru_units.append(
self.add_sublayer(
"bigru_units%d" % i,
BiGRU(
self.grnn_hidden_dim * 2,
self.grnn_hidden_dim,
self.init_bound,
h_0=h_0)))
self.fc = Linear(
input_dim=self.grnn_hidden_dim * 2,
output_dim=self.num_labels,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-self.init_bound, high=self.init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.linear_chain_crf = Linear_chain_crf(
param_attr=fluid.ParamAttr(
name='linear_chain_crfw', learning_rate=self.crf_lr),
size=self.num_labels)
self.crf_decoding = Crf_decoding(
param_attr=fluid.ParamAttr(
name='crfw', learning_rate=self.crf_lr),
size=self.num_labels)
def forward(self, word, target, lengths):
"""
Configure the network
"""
word_embed = self.word_embedding(word)
input_feature = word_embed
for i in range(self.bigru_num):
bigru_output = self.bigru_units[i](input_feature)
input_feature = bigru_output
emission = self.fc(bigru_output)
crf_cost = self.linear_chain_crf(
input=emission, label=target, length=lengths)
avg_cost = fluid.layers.mean(x=crf_cost)
self.crf_decoding.weight = self.linear_chain_crf.weight
crf_decode = self.crf_decoding(input=emission, length=lengths)
return crf_decode, avg_cost, lengths
......@@ -22,7 +22,7 @@ from functools import partial
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.io import DataLoader
from paddle.io import DataLoader
from paddle.fluid.layers.utils import flatten
from utils.configure import PDConfig
......
......@@ -22,7 +22,7 @@ from functools import partial
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.io import BatchSampler, DataLoader, Dataset
from paddle.io import BatchSampler, DataLoader, Dataset
def create_data_loader(args, device):
......
......@@ -21,7 +21,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.io import DataLoader
from paddle.io import DataLoader
from utils.configure import PDConfig
from utils.check import check_gpu, check_version
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册