提交 e246cde7 编写于 作者: F frankwhzhang

Merge branch 'develop' of https://github.com/PaddlePaddle/models into develop

......@@ -21,6 +21,7 @@ import reduction
custom_layers = get_registered_layers()
def set_args(f, params, node=None):
""" set args for function 'f' using the parameters in node.layer.parameters
......
......@@ -18,12 +18,14 @@ def crop_shape(input_shape, shape=None):
assert len(input_shape) == 2, "the number of crop's inputs must be 2"
return input_shape[1]
elif not shape is None:
assert len(shape) == len(input_shape.shape), "input_shape is diff with output_shape"
assert len(shape) == len(
input_shape.shape), "input_shape is diff with output_shape"
return shape
else:
raise Exception,"crop_shape input error"
raise Exception, "crop_shape input error"
return None
def crop_layer(input, name, shape=None, axis=2, offset=None):
""" build a layer of type 'Crop' using fluid
......@@ -46,23 +48,28 @@ def crop_layer(input, name, shape=None, axis=2, offset=None):
output_shape = input[1].shape
input_tensor = input[0]
elif not shape is None:
assert len(shape) == len(input.shape), "input_shape is diff with output_shape"
assert len(shape) == len(
input.shape), "input_shape is diff with output_shape"
input_shape = input.shape
output_shape = shape
input_tensor = input
else:
raise Exception,"crop_layer input error"
raise Exception, "crop_layer input error"
assert len(output_shape) == len(input_shape), "input_shape is diff with output_shape"
assert len(output_shape) == len(
input_shape), "input_shape is diff with output_shape"
if axis < 0:
axis += len(input_shape)
if offset is not None:
assert (len(input_shape) - axis) == len(offset), "invalid offset[%s] in crop layer" % (str(offset))
assert (len(input_shape) - axis
) == len(offset), "invalid offset[%s] in crop layer" % (
str(offset))
offset = [0] * axis + offset
import paddle.fluid as fluid
output = fluid.layers.crop(input_tensor, shape=output_shape, offsets=offset, name=name)
output = fluid.layers.crop(
input_tensor, shape=output_shape, offsets=offset, name=name)
return output
......
""" A custom layer for 'detectionout' used in 'SSD' model to produce outputs
Note: Since Paddle's implementation of 'detectionout' applied 'flatten' and 'softmax' ops on the input of 'conf',
while Caffe's implementation do not. Hence, you should ajust generated 'ssd.py' to remove 'softmax' and 'flatten' ops applied on 'conf' input.
while Caffe's implementation do not.
"""
from .register import register
......
......@@ -18,7 +18,7 @@ def reduction_shape(input_shape, axis=0):
axis += len(input_shape) + 1
assert axis <= len(input_shape), 'invalid axis[%d] error' % (axis)
return input_shape[0:axis]
......@@ -35,27 +35,33 @@ def reduction_layer(input, name, axis=0, operation=1, coeff=1.0):
Returns:
output (variable): output variable for this layer
"""
assert operation >= 1 and operation <= 4, "reduction reduction [%s] error" % (operation)
assert operation >= 1 and operation <= 4, "reduction reduction [%s] error" % (
operation)
input_len = len(input.shape)
if axis < 0:
axis += input_len + 1
dim = range(input_len)
import paddle.fluid as fluid
if operation == 1: ## operation = SUM
output = fluid.layers.reduce_sum(input, dim=dim[axis:], keep_dim=False, name=name)
elif operation == 2: ## operation = ASUM
if operation == 1: ## operation = SUM
output = fluid.layers.reduce_sum(
input, dim=dim[axis:], keep_dim=False, name=name)
elif operation == 2: ## operation = ASUM
absout = fluid.layers.abs(input)
output = fluid.layers.reduce_sum(absout, dim=dim[axis:], keep_dim=False, name=name)
elif operation == 3: ## operation = SUMSQ
output = fluid.layers.reduce_sum(
absout, dim=dim[axis:], keep_dim=False, name=name)
elif operation == 3: ## operation = SUMSQ
powout = fluid.layers.pow(x=input, factor=2.0)
output = fluid.layers.reduce_sum(powout, dim=dim[axis:], keep_dim=False, name=name)
else: ## operation = MEAN
output = fluid.layers.reduce_mean(input, dim=dim[axis:], keep_dim=False, name=name)
output = fluid.layers.reduce_sum(
powout, dim=dim[axis:], keep_dim=False, name=name)
else: ## operation = MEAN
output = fluid.layers.reduce_mean(
input, dim=dim[axis:], keep_dim=False, name=name)
mulout = fluid.layers.scale(x=output, scale=coeff)
return mulout
register(kind='Reduction', shape=reduction_shape, layer=reduction_layer)
......@@ -23,10 +23,13 @@ def layer(op):
else:
layer_input = list(self.terminals)
self.layer_reverse_trace[name] = layer_input
# Perform the operation and get the output.
layer_output = op(self, layer_input, *args, **kwargs)
# Add to layer LUT.
self.layers[name] = layer_output
self.var2name[layer_output.name] = (name, layer_output)
# This output is now the input for the next layer.
self.feed(layer_output)
# Return self for chained calls.
......@@ -49,12 +52,31 @@ class Network(object):
self.paddle_env = None
self.output_names = []
self.name_trace = None
self.layer_reverse_trace = {}
self.var2name = {}
self.setup()
def setup(self):
'''Construct the network. '''
raise NotImplementedError('Must be implemented by the subclass.')
def locate_ancestor(self, v, which=[0], ancestor_level=1):
""" find a ancestor for a node 'v' which is a fluid variable
"""
ancestor = None
which = which * ancestor_level
name = self.var2name[v.name][0]
for i in range(ancestor_level):
v = self.layer_reverse_trace[name]
if type(v) is list:
ancestor = self.var2name[v[which[i]].name]
else:
ancestor = self.var2name[v.name]
name = ancestor[0]
return ancestor
def load(self, data_path, exe=None, place=None, ignore_missing=False):
'''Load network weights.
data_path: The path to the numpy-serialized network weights
......@@ -316,7 +338,8 @@ class Network(object):
s_w,
ceil_mode,
padding,
name=self.get_unique_output_name(name, 'avg_pool'))
name=self.get_unique_output_name(name, 'avg_pool'),
exclusive=False)
@layer
def sigmoid(self, input, name):
......@@ -395,17 +418,35 @@ class Network(object):
return output
@layer
def softmax(self, input, name):
def softmax(self, input, axis=2, name=None):
fluid = import_fluid()
shape = input.shape
if len(shape) > 2:
for sz in shape[2:]:
assert sz == 1, "invalid input shape[%s] for softmax" % (
str(shape))
input = fluid.layers.reshape(input, shape[0:2])
dims = len(shape)
axis = axis + dims if axis < 0 else axis
need_transpose = False
if axis + 1 != dims:
need_transpose = True
if need_transpose:
order = range(dims)
order.remove(axis).append(axis)
input = fluid.layers.transpose(
input,
perm=order,
name=self.get_unique_output_name(name, 'transpose'))
output = fluid.layers.softmax(
input, name=self.get_unique_output_name(name, 'softmax'))
if need_transpose:
order = range(len(shape))
order[axis] = dims - 1
order[-1] = axis
output = fluid.layers.transpose(
output,
perm=order,
name=self.get_unique_output_name(name, 'transpose'))
return output
@layer
......@@ -502,6 +543,13 @@ class Network(object):
def custom_layer(self, inputs, kind, name, *args, **kwargs):
""" make custom layer
"""
#FIX ME:
# there is a trick for different API between caffe and paddle
if kind == "DetectionOutput":
conf_var = inputs[1]
real_conf_var = self.locate_ancestor(conf_var, ancestor_level=2)
inputs[1] = real_conf_var[1]
name = self.get_unique_output_name(name, kind)
layer_factory = self.custom_layer_factory()
return layer_factory(kind, inputs, name, *args, **kwargs)
......@@ -156,7 +156,7 @@ class PaddleMapper(NodeMapper):
return MaybeActivated(node)('fc', node.parameters.num_output)
def map_softmax(self, node):
return PaddleNode('softmax')
return PaddleNode('softmax', node.parameters.axis)
def map_lrn(self, node):
params = node.parameters
......
......@@ -62,9 +62,11 @@ def shape_identity(node):
def shape_scalar(node):
return make_tensor(1, 1, 1, 1)
def shape_crop(node):
raise KaffeError('crop function had been defined in customer_layers')
def shape_data(node):
if node.output_shape:
# Old-style input specification
......
......@@ -166,7 +166,7 @@ def xception_block(input,
filters = check(filters, repeat_number)
strides = check(strides, repeat_number)
data = input
datum = []
results = []
for i in range(repeat_number):
with scope('separable_conv' + str(i + 1)):
if not activation_fn_in_separable_conv:
......@@ -185,9 +185,9 @@ def xception_block(input,
filters[i],
dilation=dilation,
act=relu)
datum.append(data)
results.append(data)
if not has_skip:
return append_op_result(data, 'xception_block'), datum
return append_op_result(data, 'xception_block'), results
if skip_conv:
with scope('shortcut'):
skip = bn(
......@@ -195,7 +195,7 @@ def xception_block(input,
input, channels[-1], 1, strides[-1], groups=1, padding=0))
else:
skip = input
return append_op_result(data + skip, 'xception_block'), datum
return append_op_result(data + skip, 'xception_block'), results
def entry_flow(data):
......@@ -209,10 +209,10 @@ def entry_flow(data):
with scope("block1"):
data, _ = xception_block(data, 128, [1, 1, 2])
with scope("block2"):
data, datum = xception_block(data, 256, [1, 1, 2])
data, results = xception_block(data, 256, [1, 1, 2])
with scope("block3"):
data, _ = xception_block(data, 728, [1, 1, 2])
return data, datum[1]
return data, results[1]
def middle_flow(data):
......
......@@ -90,21 +90,11 @@ To train the model, [cocoapi](https://github.com/cocodataset/cocoapi) is needed.
* Use momentum optimizer with momentum=0.9.
* Weight decay is 0.0001.
* In first 500 iteration, the learning rate increases linearly from 0.00333 to 0.01. Then lr is decayed at 120000, 160000 iteration with multiplier 0.1, 0.01. The maximum iteration is 180000.
* In first 500 iteration, the learning rate increases linearly from 0.00333 to 0.01. Then lr is decayed at 120000, 160000 iteration with multiplier 0.1, 0.01. The maximum iteration is 180000. Also, we released a 2x model which has 360000 iterations and lr is decayed at 240000, 320000. These configuration can be set by max_iter and lr_steps in config.py.
* Set the learning rate of bias to two times as global lr in non basic convolutional layers.
* In basic convolutional layers, parameters of affine layers and res body do not update.
* Use Nvidia Tesla V100 8GPU, total time for training is about 40 hours.
Training result is shown as below:
<p align="center">
<img src="image/train_loss.jpg" height=500 width=650 hspace='10'/> <br />
Faster RCNN train loss
</p>
* Fluid RoIPool minibatch padding: Use RoIPool. Images in one batch padding to the same size. This method is same as detectron.
* Fluid RoIpool no padding: Use RoIPool. Images without padding.
* Fluid RoIAlign no padding: Use RoIAlign. Images without padding.
## Evaluation
Evaluation is to evaluate the performance of a trained model. This sample provides `eval_coco_map.py` which uses a COCO-specific mAP metric defined by [COCO committee](http://cocodataset.org/#detections-eval).
......@@ -118,20 +108,18 @@ Evaluation is to evaluate the performance of a trained model. This sample provid
- Set ```export CUDA_VISIBLE_DEVICES=0``` to specifiy one GPU to eval.
Evalutaion result is shown as below:
<p align="center">
<img src="image/mAP.jpg" height=500 width=650 hspace='10'/> <br />
Faster RCNN mAP
</p>
| Model | RoI function | Batch size | Max iteration | mAP |
| :--------------- | :--------: | :------------: | :------------------: |------: |
| [Fluid RoIPool minibatch padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_minibatch_padding.tar.gz) | RoIPool | 8 | 180000 | 0.314 |
| [Fluid RoIPool no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_no_padding.tar.gz) | RoIPool | 8 | 180000 | 0.316 |
| [Fluid RoIAlign no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding.tar.gz) | RoIAlign | 8 | 180000 | 0.345 |
| [Fluid RoIAlign no padding 2x](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding_2x.tar.gz) | RoIAlign | 8 | 360000 | 0.364 |
* Fluid RoIPool minibatch padding: Use RoIPool. Images in one batch padding to the same size. This method is same as detectron.
* Fluid RoIPool no padding: Images without padding.
* Fluid RoIAlign no padding: Images without padding.
* Fluid RoIAlign no padding 2x: Images without padding, train for 360000 iterations, learning rate is decayed at 240000, 320000.
## Inference and Visualization
......
......@@ -81,20 +81,10 @@ Faster RCNN 目标检测模型
* RPN选择anchor时,rpn\_fg\_fraction=0.5,rpn\_positive\_overlap=0.7,rpn\_negative\_overlap=0.3
下图为模型训练结果:
<p align="center">
<img src="image/train_loss.jpg" height=500 width=650 hspace='10'/> <br />
Faster RCNN 训练loss
</p>
* Fluid RoIPool minibatch padding: 使用RoIPool,同一个batch内的图像填充为相同尺寸。该方法与detectron处理相同。
* Fluid RoIPool no padding: 使用RoIPool,不对图像做填充处理。
* Fluid RoIAlign no padding: 使用RoIAlign,不对图像做填充处理。
**训练策略:**
* 采用momentum优化算法训练Faster RCNN,momentum=0.9。
* 权重衰减系数为0.0001,前500轮学习率从0.00333线性增加至0.01。在120000,160000轮时使用0.1,0.01乘子进行学习率衰减,最大训练180000轮。
* 权重衰减系数为0.0001,前500轮学习率从0.00333线性增加至0.01。在120000,160000轮时使用0.1,0.01乘子进行学习率衰减,最大训练180000轮。同时我们也提供了2x模型,该模型采用更多的迭代轮数进行训练,训练360000轮,学习率在240000,320000轮衰减,其他参数不变,训练最大轮数和学习率策略可以在config.py中对max_iter和lr_steps进行设置。
* 非基础卷积层卷积bias学习率为整体学习率2倍。
* 基础卷积层中,affine_layers参数不更新,res2层参数不更新。
* 使用Nvidia Tesla V100 8卡并行,总共训练时长大约40小时。
......@@ -111,24 +101,21 @@ Faster RCNN 训练loss
- 通过设置export CUDA\_VISIBLE\_DEVICES=0指定单卡GPU评估。
下图为模型评估结果:
<p align="center">
<img src="image/mAP.jpg" height=500 width=650 hspace='10'/> <br />
Faster RCNN mAP
</p>
下表为模型评估结果:
| 模型 | RoI处理方式 | 批量大小 | 迭代次数 | mAP |
| :--------------- | :--------: | :------------: | :------------------: |------: |
| [Fluid RoIPool minibatch padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_minibatch_padding.tar.gz) | RoIPool | 8 | 180000 | 0.314 |
| [Fluid RoIPool no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_no_padding.tar.gz) | RoIPool | 8 | 180000 | 0.316 |
| [Fluid RoIAlign no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding.tar.gz) | RoIAlign | 8 | 180000 | 0.345 |
| [Fluid RoIAlign no padding 2x](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding_2x.tar.gz) | RoIAlign | 8 | 360000 | 0.364 |
* Fluid RoIPool minibatch padding: 使用RoIPool,同一个batch内的图像填充为相同尺寸。该方法与detectron处理相同。
* Fluid RoIPool no padding: 使用RoIPool,不对图像做填充处理。
* Fluid RoIAlign no padding: 使用RoIAlign,不对图像做填充处理。
* Fluid RoIAlign no padding 2x: 使用RoIAlign,不对图像做填充处理。训练360000轮,学习率在240000,320000轮衰减。
## 模型推断及可视化
......
......@@ -163,15 +163,17 @@ _C.spatial_scale = 1. / 16.
# derived learning rate the to get the final learning rate.
_C.learning_rate = 0.01
# maximum number of iterations
# maximum number of iterations, 1x: 180000, 2x:360000
_C.max_iter = 180000
#_C.max_iter = 360000
# warm up to learning rate
_C.warm_up_iter = 500
_C.warm_up_factor = 1. / 3.
# lr steps_with_decay
# lr steps_with_decay, 1x: [120000, 160000], 2x: [240000, 320000]
_C.lr_steps = [120000, 160000]
#_C.lr_steps = [240000, 320000]
_C.lr_gamma = 0.1
# L2 regularization hyperparameter
......
# Simple Baselines for Human Pose Estimation in Fluid
## Introduction
This is a simple demonstration of re-implementation in [PaddlePaddle.Fluid](http://www.paddlepaddle.org/en) for the paper [Simple Baselines for Human Pose Estimation and Tracking](https://arxiv.org/abs/1804.06208) (ECCV'18) from MSRA.
![demo](demo.gif)
> **Video in Demo**: *Bruno Mars - That’s What I Like [Official Video]*.
## Requirements
- Python == 2.7
- PaddlePaddle >= 1.0
- opencv-python >= 3.3
- tqdm >= 4.25
## Environment
The code is developed and tested under 4 Tesla K40 GPUS cards on CentOS with installed CUDA-9.2/8.0 and cuDNN-7.1.
## Known Issues
- The model does not converge with large batch\_size (e.g. = 32) on Tesla P40 / V100 / P100 GPUS cards, because PaddlePaddle uses the batch normalization function of cuDNN. Changing batch\_size into 1 image on each card during training will ease this problem, but not sure the performance. The issue can be tracked at [here](https://github.com/PaddlePaddle/Paddle/issues/14580).
## Results on MPII Val
| Arch | Head | Shoulder | Elbow | Wrist | Hip | Knee | Ankle | Mean | Mean@0.1| Models |
| ---- |:----:|:--------:|:-----:|:-----:|:---:|:----:|:-----:|:----:|:-------:|:------:|
| 383x384\_pose\_resnet\_50 in PyTorch | 96.658 | 95.754 | 89.790 | 84.614 | 88.523 | 84.666 | 79.287 | 89.066 | 38.046 | - |
| 383x384\_pose\_resnet\_50 in Fluid | 96.248 | 95.346 | 89.807 | 84.873 | 88.298 | 83.679 | 78.649 | 88.767 | 37.374 | [`link`](http://paddlemodels.bj.bcebos.com/pose/pose-resnet-50-384x384-mpii.tar.gz) |
### Notes:
- Flip test is used.
- We do not hardly search the best model, just use the last saved model to make validation.
## Getting Start
### Prepare Datasets and Pretrained Models
- Following the [instruction](https://github.com/Microsoft/human-pose-estimation.pytorch#data-preparation) to prepare datasets.
- Download the pretrained ResNet-50 model in PaddlePaddle.Fluid on ImageNet from [Model Zoo](https://github.com/PaddlePaddle/models/tree/develop/fluid/PaddleCV/image_classification#supported-models-and-performances).
```bash
wget http://paddle-imagenet-models.bj.bcebos.com/resnet_50_model.tar
```
Then, put them in the folder `pretrained` under the directory root of this repo, make them look like:
```
${THIS REPO ROOT}
`-- pretrained
`-- resnet_50
|-- 115
`-- data
`-- coco
|-- annotations
|-- images
`-- mpii
|-- annot
|-- images
```
### Install [COCOAPI](https://github.com/cocodataset/cocoapi)
```bash
# COCOAPI=/path/to/clone/cocoapi
git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
cd $COCOAPI/PythonAPI
# if cython is not installed
pip install Cython
# Install into global site-packages
make install
# Alternatively, if you do not have permissions or prefer
# not to install the COCO API into global site-packages
python2 setup.py install --user
```
### Perform Validating
Downloading the checkpoints of Pose-ResNet-50 trained on MPII dataset from [here](http://paddlemodels.bj.bcebos.com/pose/pose-resnet-50-384x384-mpii.tar.gz). Extract it into the folder `checkpoints` under the directory root of this repo. Then run
```bash
python2 val.py --dataset 'mpii' --checkpoint 'checkpoints/pose-resnet-50-384x384-mpii'
```
### Perform Training
```bash
python2 train.py --dataset 'mpii' # or coco
```
**Note**: Configurations for training are aggregated in the `lib/mpii_reader.py` and `lib/coco_reader.py`.
### Perform Test on Images
Put the images into the folder `test` under the directory root of this repo. Then run
```bash
python2 test.py --checkpoint 'checkpoints/pose-resnet-50-384x384-mpii'
```
If there are multiple persons in images, detectors such as [Faster R-CNN](https://github.com/PaddlePaddle/models/tree/develop/fluid/PaddleCV/faster_rcnn), [SSD](https://github.com/PaddlePaddle/models/tree/develop/fluid/PaddleCV/object_detection) or others should be used first to crop them out. Because the simple baseline for human pose estimation is a top-down method.
## Reference
- Simple Baselines for Human Pose Estimation and Tracking in PyTorch [`code`](https://github.com/Microsoft/human-pose-estimation.pytorch#data-preparation)
## License
This code is released under the Apache License 2.0.
因为 它太大了无法显示 image diff 。你可以改为 查看blob
# Copyright (c) 2018-present, Baidu, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
"""Libs for data reader."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import shutil
import cv2
import numpy as np
def visualize(cfg, filename, data_numpy, input, joints, target):
"""
:param cfg: global configurations for dataset
:param filename: the name of image file
:param data_numpy: original numpy image data
:param input: input tensor [b, c, h, w]
:param joints: [num_joints, 3]
:param target: target tensor [b, c, h, w]
"""
TMPDIR = cfg.TMPDIR
NUM_JOINTS = cfg.NUM_JOINTS
if os.path.exists(TMPDIR):
shutil.rmtree(TMPDIR)
os.mkdir(TMPDIR)
else:
os.mkdir(TMPDIR)
f = open(os.path.join(TMPDIR, filename), 'w')
f.close()
cv2.imwrite(os.path.join(TMPDIR, 'flip.jpg'), data_numpy)
cv2.imwrite(os.path.join(TMPDIR, 'input.jpg'), input)
for i in range(NUM_JOINTS):
cv2.imwrite(os.path.join(TMPDIR, 'target_{}.jpg'.format(i)), cv2.applyColorMap(
np.uint8(np.expand_dims(target[i], 2)*255.), cv2.COLORMAP_JET))
cv2.circle(input, (int(joints[i, 0]), int(joints[i, 1])), 5, [170, 255, 0], -1)
cv2.imwrite(os.path.join(TMPDIR, 'input_kps.jpg'), input)
def generate_target(cfg, joints, joints_vis):
"""
:param joints: [num_joints, 3]
:param joints_vis: [num_joints, 3]
:return: target, target_weight(1: visible, 0: invisible)
"""
NUM_JOINTS = cfg.NUM_JOINTS
TARGET_TYPE = cfg.TARGET_TYPE
HEATMAP_SIZE = cfg.HEATMAP_SIZE
IMAGE_SIZE = cfg.IMAGE_SIZE
SIGMA = cfg.SIGMA
target_weight = np.ones((NUM_JOINTS, 1), dtype=np.float32)
target_weight[:, 0] = joints_vis[:, 0]
assert TARGET_TYPE == 'gaussian', \
'Only support gaussian map now!'
if TARGET_TYPE == 'gaussian':
target = np.zeros((NUM_JOINTS,
HEATMAP_SIZE[1],
HEATMAP_SIZE[0]),
dtype=np.float32)
tmp_size = SIGMA * 3
for joint_id in range(NUM_JOINTS):
feat_stride = np.array(IMAGE_SIZE) / np.array(HEATMAP_SIZE)
mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
# Check that any part of the gaussian is in-bounds
ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
if ul[0] >= HEATMAP_SIZE[0] or ul[1] >= HEATMAP_SIZE[1] \
or br[0] < 0 or br[1] < 0:
# If not, just return the image as is
target_weight[joint_id] = 0
continue
# Generate gaussian
size = 2 * tmp_size + 1
x = np.arange(0, size, 1, np.float32)
y = x[:, np.newaxis]
x0 = y0 = size // 2
# The gaussian is not normalized, we want the center value to equal 1
g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * SIGMA ** 2))
# Usable gaussian range
g_x = max(0, -ul[0]), min(br[0], HEATMAP_SIZE[0]) - ul[0]
g_y = max(0, -ul[1]), min(br[1], HEATMAP_SIZE[1]) - ul[1]
# Image range
img_x = max(0, ul[0]), min(br[0], HEATMAP_SIZE[0])
img_y = max(0, ul[1]), min(br[1], HEATMAP_SIZE[1])
v = target_weight[joint_id]
if v > 0.5:
target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
return target, target_weight
# Copyright (c) 2018-present, Baidu, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
"""Data reader for COCO dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import functools
import numpy as np
import cv2
import random
from utils.transforms import fliplr_joints
from utils.transforms import get_affine_transform
from utils.transforms import affine_transform
from lib.base_reader import visualize, generate_target
from pycocotools.coco import COCO
# NOTE
# -- COCO Datatset --
# "keypoints":
# {
# 0: "nose",
# 1: "left_eye",
# 2: "right_eye",
# 3: "left_ear",
# 4: "right_ear",
# 5: "left_shoulder",
# 6: "right_shoulder",
# 7: "left_elbow",
# 8: "right_elbow",
# 9: "left_wrist",
# 10: "right_wrist",
# 11: "left_hip",
# 12: "right_hip",
# 13: "left_knee",
# 14: "right_knee",
# 15: "left_ankle",
# 16: "right_ankle"
# },
#
# "skeleton":
# [
# [16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13], [6,7],[6,8],
# [7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]
# ]
class Config:
"""Configurations for COCO dataset.
"""
DEBUG = False
TMPDIR = 'tmp_fold_for_debug'
# For reader
BUF_SIZE = 102400
THREAD = 1 if DEBUG else 8 # have to be larger than 0
# Fixed infos of dataset
DATAROOT = 'data/coco'
IMAGEDIR = 'images'
NUM_JOINTS = 17
FLIP_PAIRS = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
PARENT_IDS = None
# CFGS
SCALE_FACTOR = 0.3
ROT_FACTOR = 40
FLIP = True
TARGET_TYPE = 'gaussian'
SIGMA = 3
IMAGE_SIZE = [288, 384]
HEATMAP_SIZE = [72, 96]
ASPECT_RATIO = IMAGE_SIZE[0] * 1.0 / IMAGE_SIZE[1]
MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]
PIXEL_STD = 200
cfg = Config()
def _box2cs(box):
x, y, w, h = box[:4]
return _xywh2cs(x, y, w, h)
def _xywh2cs(x, y, w, h):
center = np.zeros((2), dtype=np.float32)
center[0] = x + w * 0.5
center[1] = y + h * 0.5
if w > cfg.ASPECT_RATIO * h:
h = w * 1.0 / cfg.ASPECT_RATIO
elif w < cfg.ASPECT_RATIO * h:
w = h * cfg.ASPECT_RATIO
scale = np.array(
[w * 1.0 / cfg.PIXEL_STD, h * 1.0 / cfg.PIXEL_STD],
dtype=np.float32)
if center[0] != -1:
scale = scale * 1.25
return center, scale
def _select_data(db):
db_selected = []
for rec in db:
num_vis = 0
joints_x = 0.0
joints_y = 0.0
for joint, joint_vis in zip(
rec['joints_3d'], rec['joints_3d_vis']):
if joint_vis[0] <= 0:
continue
num_vis += 1
joints_x += joint[0]
joints_y += joint[1]
if num_vis == 0:
continue
joints_x, joints_y = joints_x / num_vis, joints_y / num_vis
area = rec['scale'][0] * rec['scale'][1] * (cfg.PIXEL_STD**2)
joints_center = np.array([joints_x, joints_y])
bbox_center = np.array(rec['center'])
diff_norm2 = np.linalg.norm((joints_center-bbox_center), 2)
ks = np.exp(-1.0*(diff_norm2**2) / ((0.2)**2*2.0*area))
metric = (0.2 / 16) * num_vis + 0.45 - 0.2 / 16
if ks > metric:
db_selected.append(rec)
print('=> num db: {}'.format(len(db)))
print('=> num selected db: {}'.format(len(db_selected)))
return db_selected
def _load_coco_keypoint_annotation(image_set_index, coco, _coco_ind_to_class_ind, image_set):
"""Ground truth bbox and keypoints.
"""
print('generating coco gt_db...')
gt_db = []
for index in image_set_index:
im_ann = coco.loadImgs(index)[0]
width = im_ann['width']
height = im_ann['height']
annIds = coco.getAnnIds(imgIds=index, iscrowd=False)
objs = coco.loadAnns(annIds)
# Sanitize bboxes
valid_objs = []
for obj in objs:
x, y, w, h = obj['bbox']
x1 = np.max((0, x))
y1 = np.max((0, y))
x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
obj['clean_bbox'] = [x1, y1, x2-x1, y2-y1]
valid_objs.append(obj)
objs = valid_objs
rec = []
for obj in objs:
cls = _coco_ind_to_class_ind[obj['category_id']]
if cls != 1:
continue
# Ignore objs without keypoints annotation
if max(obj['keypoints']) == 0:
continue
joints_3d = np.zeros((cfg.NUM_JOINTS, 3), dtype=np.float)
joints_3d_vis = np.zeros((cfg.NUM_JOINTS, 3), dtype=np.float)
for ipt in range(cfg.NUM_JOINTS):
joints_3d[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
joints_3d[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
joints_3d[ipt, 2] = 0
t_vis = obj['keypoints'][ipt * 3 + 2]
if t_vis > 1:
t_vis = 1
joints_3d_vis[ipt, 0] = t_vis
joints_3d_vis[ipt, 1] = t_vis
joints_3d_vis[ipt, 2] = 0
center, scale = _box2cs(obj['clean_bbox'][:4])
rec.append({
'image': os.path.join(cfg.DATAROOT, cfg.IMAGEDIR, image_set+'2017', '%012d.jpg' % index),
'center': center,
'scale': scale,
'joints_3d': joints_3d,
'joints_3d_vis': joints_3d_vis,
'filename': '%012d.jpg' % index,
'imgnum': 0,
})
gt_db.extend(rec)
return gt_db
def data_augmentation(sample, is_train):
image_file = sample['image']
filename = sample['filename'] if 'filename' in sample else ''
joints = sample['joints_3d']
joints_vis = sample['joints_3d_vis']
c = sample['center']
s = sample['scale']
# score = sample['score'] if 'score' in sample else 1
# imgnum = sample['imgnum'] if 'imgnum' in sample else ''
r = 0
data_numpy = cv2.imread(
image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
if is_train:
sf = cfg.SCALE_FACTOR
rf = cfg.ROT_FACTOR
s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
if random.random() <= 0.6 else 0
if cfg.FLIP and random.random() <= 0.5:
data_numpy = data_numpy[:, ::-1, :]
joints, joints_vis = fliplr_joints(
joints, joints_vis, data_numpy.shape[1], cfg.FLIP_PAIRS)
c[0] = data_numpy.shape[1] - c[0] - 1
trans = get_affine_transform(c, s, r, cfg.IMAGE_SIZE)
input = cv2.warpAffine(
data_numpy,
trans,
(int(cfg.IMAGE_SIZE[0]), int(cfg.IMAGE_SIZE[1])),
flags=cv2.INTER_LINEAR)
for i in range(cfg.NUM_JOINTS):
if joints_vis[i, 0] > 0.0:
joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
# Numpy target
target, target_weight = generate_target(cfg, joints, joints_vis)
if cfg.DEBUG:
visualize(cfg, filename, data_numpy, input.copy(), joints, target)
# Normalization
input = input.astype('float32').transpose((2, 0, 1)) / 255
input -= np.array(cfg.MEAN).reshape((3, 1, 1))
input /= np.array(cfg.STD).reshape((3, 1, 1))
if is_train:
return input, target, target_weight
else:
return input, target, target_weight, c, s
# Create a reader
def _reader_creator(root, image_set, shuffle=False, is_train=False, use_gt_bbox=False):
def reader():
if image_set in ['train', 'val']:
file_name = os.path.join(root, 'annotations', 'person_keypoints_'+image_set+'2017.json')
elif image_set in ['test', 'test-dev']:
file_name = os.path.join(root, 'annotations', 'image_info_'+image_set+'2017.json')
else:
raise ValueError("The dataset '{}' is not supported".format(image_set))
# Load annotations
coco = COCO(file_name)
# Deal with class names
cats = [cat['name']
for cat in coco.loadCats(coco.getCatIds())]
classes = ['__background__'] + cats
print('=> classes: {}'.format(classes))
num_classes = len(classes)
_class_to_ind = dict(zip(classes, range(num_classes)))
_class_to_coco_ind = dict(zip(cats, coco.getCatIds()))
_coco_ind_to_class_ind = dict([(_class_to_coco_ind[cls],
_class_to_ind[cls])
for cls in classes[1:]])
# Load image file names
image_set_index = coco.getImgIds()
num_images = len(image_set_index)
print('=> num_images: {}'.format(num_images))
if is_train or use_gt_bbox:
gt_db = _load_coco_keypoint_annotation(
image_set_index, coco, _coco_ind_to_class_ind, image_set)
gt_db = _select_data(gt_db)
if shuffle:
random.shuffle(gt_db)
for db in gt_db:
yield db
mapper = functools.partial(data_augmentation, is_train=is_train)
return reader, mapper
def train():
reader, mapper = _reader_creator(cfg.DATAROOT, 'train', shuffle=True, is_train=True)
def pop():
for i, x in enumerate(reader()):
yield mapper(x)
return pop
def valid():
reader, mapper = _reader_creator(cfg.DATAROOT, 'val', shuffle=False, is_train=False)
def pop():
for i, x in enumerate(reader()):
yield mapper(x)
return pop
# Copyright (c) 2018-present, Baidu, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
"""Data reader for MPII."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import random
import functools
import json
import numpy as np
import cv2
from utils.transforms import fliplr_joints
from utils.transforms import get_affine_transform
from utils.transforms import affine_transform
from lib.base_reader import visualize, generate_target
class Config:
"""Configurations for MPII dataset.
"""
DEBUG = False
TMPDIR = 'tmp_fold_for_debug'
# For reader
BUF_SIZE = 102400
THREAD = 1 if DEBUG else 8 # have to be larger than 0
# Fixed infos of dataset
DATAROOT = 'data/mpii'
IMAGEDIR = 'images'
NUM_JOINTS = 16
FLIP_PAIRS = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]]
PARENT_IDS = [1, 2, 6, 6, 3, 4, 6, 6, 7, 8, 11, 12, 7, 7, 13, 14]
# CFGS
SCALE_FACTOR = 0.3
ROT_FACTOR = 40
FLIP = True
TARGET_TYPE = 'gaussian'
SIGMA = 3
IMAGE_SIZE = [384, 384]
HEATMAP_SIZE = [96, 96]
MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]
cfg = Config()
def data_augmentation(sample, is_train):
image_file = sample['image']
filename = sample['filename'] if 'filename' in sample else ''
joints = sample['joints_3d']
joints_vis = sample['joints_3d_vis']
c = sample['center']
s = sample['scale']
score = sample['score'] if 'score' in sample else 1
# imgnum = sample['imgnum'] if 'imgnum' in sample else ''
r = 0
data_numpy = cv2.imread(
image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
if is_train:
sf = cfg.SCALE_FACTOR
rf = cfg.ROT_FACTOR
s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
if random.random() <= 0.6 else 0
if cfg.FLIP and random.random() <= 0.5:
data_numpy = data_numpy[:, ::-1, :]
joints, joints_vis = fliplr_joints(
joints, joints_vis, data_numpy.shape[1], cfg.FLIP_PAIRS)
c[0] = data_numpy.shape[1] - c[0] - 1
trans = get_affine_transform(c, s, r, cfg.IMAGE_SIZE)
input = cv2.warpAffine(
data_numpy,
trans,
(int(cfg.IMAGE_SIZE[0]), int(cfg.IMAGE_SIZE[1])),
flags=cv2.INTER_LINEAR)
for i in range(cfg.NUM_JOINTS):
if joints_vis[i, 0] > 0.0:
joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
# Numpy target
target, target_weight = generate_target(cfg, joints, joints_vis)
if cfg.DEBUG:
visualize(cfg, filename, data_numpy, input.copy(), joints, target)
# Normalization
input = input.astype('float32').transpose((2, 0, 1)) / 255
input -= np.array(cfg.MEAN).reshape((3, 1, 1))
input /= np.array(cfg.STD).reshape((3, 1, 1))
if is_train:
return input, target, target_weight
else:
return input, target, target_weight, c, s, score
def test_data_augmentation(sample):
image_file = sample['image']
filename = sample['filename'] if 'filename' in sample else ''
file_id = int(filename.split('.')[0].split('_')[1])
input = cv2.imread(
image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
input = cv2.resize(input, (int(cfg.IMAGE_SIZE[0]), int(cfg.IMAGE_SIZE[1])))
# Normalization
input = input.astype('float32').transpose((2, 0, 1)) / 255
input -= np.array(cfg.MEAN).reshape((3, 1, 1))
input /= np.array(cfg.STD).reshape((3, 1, 1))
return input, file_id
# Create a reader
def _reader_creator(root, image_set, shuffle=False, is_train=False):
def reader():
if image_set != 'test':
file_name = os.path.join(root, 'annot', image_set+'.json')
with open(file_name) as anno_file:
anno = json.load(anno_file)
print('=> load {} samples of {} dataset'.format(len(anno), image_set))
if shuffle:
random.shuffle(anno)
for a in anno:
image_name = a['image']
c = np.array(a['center'], dtype=np.float)
s = np.array([a['scale'], a['scale']], dtype=np.float)
# Adjust center/scale slightly to avoid cropping limbs
if c[0] != -1:
c[1] = c[1] + 15 * s[1]
s = s * 1.25
# MPII uses matlab format, index is based 1,
# we should first convert to 0-based index
c = c - 1
joints_3d = np.zeros((cfg.NUM_JOINTS, 3), dtype=np.float)
joints_3d_vis = np.zeros((cfg.NUM_JOINTS, 3), dtype=np.float)
joints = np.array(a['joints'])
joints[:, 0:2] = joints[:, 0:2] - 1
joints_vis = np.array(a['joints_vis'])
assert len(joints) == cfg.NUM_JOINTS, \
'joint num diff: {} vs {}'.format(len(joints), cfg.NUM_JOINTS)
joints_3d[:, 0:2] = joints[:, 0:2]
joints_3d_vis[:, 0] = joints_vis[:]
joints_3d_vis[:, 1] = joints_vis[:]
yield dict(
image = os.path.join(cfg.DATAROOT, cfg.IMAGEDIR, image_name),
center = c,
scale = s,
joints_3d = joints_3d,
joints_3d_vis = joints_3d_vis,
filename = image_name,
test_mode = False,
imagenum = 0)
else:
fold = 'test'
for img_name in os.listdir(fold):
yield dict(image = os.path.join(fold, img_name),
filename = img_name)
if not image_set == 'test':
mapper = functools.partial(data_augmentation, is_train=is_train)
else:
mapper = functools.partial(test_data_augmentation)
return reader, mapper
def train():
reader, mapper = _reader_creator(cfg.DATAROOT, 'train', shuffle=True, is_train=True)
def pop():
for i, x in enumerate(reader()):
yield mapper(x)
return pop
def valid():
reader, mapper = _reader_creator(cfg.DATAROOT, 'valid', shuffle=False, is_train=False)
def pop():
for i, x in enumerate(reader()):
yield mapper(x)
return pop
def test():
reader, mapper = _reader_creator(cfg.DATAROOT, 'test')
def pop():
for i, x in enumerate(reader()):
yield mapper(x)
return pop
# Copyright (c) 2018-present, Baidu, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
"""Functions for building network."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
__all__ = ["ResNet", "ResNet50", "ResNet101", "ResNet152"]
# Global parameters
BN_MOMENTUM = 0.1
class ResNet():
def __init__(self, layers=50, kps_num=16, test_mode=False):
"""
:param layers: int, the layers number which is used here
:param kps_num: int, the number of keypoints in accord with the dataset
:param test_mode: bool, if True, only return output heatmaps, no loss
:return: loss, output heatmaps
"""
self.k = kps_num
self.layers = layers
self.test_mode = test_mode
def net(self, input, target=None, target_weight=None):
layers = self.layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_filters = [64, 128, 256, 512]
conv = self.conv_bn_layer(
input=input, num_filters=64, filter_size=7, stride=2, act='relu')
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
for block in range(len(depth)):
for i in range(depth[block]):
conv = self.bottleneck_block(
input=conv,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1)
conv = fluid.layers.conv2d_transpose(
input=conv, num_filters=256,
filter_size=4,
padding=1,
stride=2,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Normal(0., 0.001)),
act=None,
bias_attr=False)
conv = fluid.layers.batch_norm(input=conv, act='relu', momentum=BN_MOMENTUM)
conv = fluid.layers.conv2d_transpose(
input=conv, num_filters=256,
filter_size=4,
padding=1,
stride=2,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Normal(0., 0.001)),
act=None,
bias_attr=False)
conv = fluid.layers.batch_norm(input=conv, act='relu', momentum=BN_MOMENTUM)
conv = fluid.layers.conv2d_transpose(
input=conv, num_filters=256,
filter_size=4,
padding=1,
stride=2,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Normal(0., 0.001)),
act=None,
bias_attr=False)
conv = fluid.layers.batch_norm(input=conv, act='relu', momentum=BN_MOMENTUM)
out = fluid.layers.conv2d(
input=conv,
num_filters=self.k,
filter_size=1,
stride=1,
padding=0,
act=None,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Normal(0., 0.001)))
if self.test_mode:
return out
else:
loss = self.calc_loss(out, target, target_weight)
return loss, out
def conv_bn_layer(self,
input,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Normal(0., 0.001)),
act=None,
bias_attr=False)
return fluid.layers.batch_norm(input=conv, act=act, momentum=BN_MOMENTUM)
def shortcut(self, input, ch_out, stride):
ch_in = input.shape[1]
if ch_in != ch_out or stride != 1:
return self.conv_bn_layer(input, ch_out, 1, stride)
else:
return input
def calc_loss(self, heatmap, target, target_weight):
_, c, h, w = heatmap.shape
x = fluid.layers.reshape(heatmap, (-1, self.k, h*w))
y = fluid.layers.reshape(target, (-1, self.k, h*w))
w = fluid.layers.reshape(target_weight, (-1, self.k))
x = fluid.layers.split(x, num_or_sections=self.k, dim=1)
y = fluid.layers.split(y, num_or_sections=self.k, dim=1)
w = fluid.layers.split(w, num_or_sections=self.k, dim=1)
_list = []
for idx in range(self.k):
_tmp = fluid.layers.scale(x=x[idx] - y[idx], scale=1.)
_tmp = _tmp * _tmp
_tmp = fluid.layers.reduce_mean(_tmp, dim=2)
_list.append(_tmp * w[idx])
_loss = fluid.layers.concat(_list, axis=0)
_loss = fluid.layers.reduce_mean(_loss)
return 0.5 * _loss
def bottleneck_block(self, input, num_filters, stride):
conv0 = self.conv_bn_layer(
input=input, num_filters=num_filters, filter_size=1, act='relu')
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
conv2 = self.conv_bn_layer(
input=conv1, num_filters=num_filters * 4, filter_size=1, act=None)
short = self.shortcut(input, num_filters * 4, stride)
return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
def ResNet50():
model = ResNet(layers=50)
return model
def ResNet101():
model = ResNet(layers=101)
return model
def ResNet152():
model = ResNet(layers=152)
return model
# Copyright (c) 2018-present, Baidu, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
"""Functions for inference."""
import os
import argparse
import functools
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from tqdm import tqdm
from lib import pose_resnet
from utils.transforms import flip_back
from utils.utility import *
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('dataset', str, 'mpii', "Dataset")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('num_epochs', int, 140, "Number of epochs.")
add_arg('total_images', int, 144406, "Training image number.")
add_arg('kp_dim', int, 16, "Class number.")
add_arg('model_save_dir', str, "output", "Model save directory")
add_arg('with_mem_opt', bool, True, "Whether to use memory optimization or not.")
add_arg('pretrained_model', str, None, "Whether to use pretrained model.")
add_arg('checkpoint', str, None, "Whether to resume checkpoint.")
add_arg('lr', float, 0.001, "Set learning rate.")
add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.")
add_arg('flip_test', bool, True, "Flip test")
add_arg('shift_heatmap', bool, True, "Shift heatmap")
add_arg('post_process', bool, False, "post process")
# yapf: enable
FLIP_PAIRS = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]]
def test(args):
if args.dataset == 'coco':
import lib.coco_reader as reader
IMAGE_SIZE = [288, 384]
# HEATMAP_SIZE = [72, 96]
FLIP_PAIRS = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
args.kp_dim = 17
args.total_images = 144406 # 149813
elif args.dataset == 'mpii':
import lib.mpii_reader as reader
IMAGE_SIZE = [384, 384]
# HEATMAP_SIZE = [96, 96]
FLIP_PAIRS = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]]
args.kp_dim = 16
args.total_images = 2958 # validation
else:
raise ValueError('The dataset {} is not supported yet.'.format(args.dataset))
print_arguments(args)
# Image and target
image = layers.data(name='image', shape=[3, IMAGE_SIZE[1], IMAGE_SIZE[0]], dtype='float32')
file_id = layers.data(name='file_id', shape=[1,], dtype='int')
# Build model
model = pose_resnet.ResNet(layers=50, kps_num=args.kp_dim, test_mode=True)
# Output
output = model.net(input=image, target=None, target_weight=None)
# Parameters from model and arguments
params = {}
params["total_images"] = args.total_images
params["lr"] = args.lr
params["num_epochs"] = args.num_epochs
params["learning_strategy"] = {}
params["learning_strategy"]["batch_size"] = args.batch_size
params["learning_strategy"]["name"] = args.lr_strategy
if args.with_mem_opt:
fluid.memory_optimize(fluid.default_main_program(),
skip_opt_set=[output.name])
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
args.pretrained_model = './pretrained/resnet_50/115'
if args.pretrained_model:
def if_exist(var):
exist_flag = os.path.exists(os.path.join(args.pretrained_model, var.name))
return exist_flag
fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)
if args.checkpoint is not None:
fluid.io.load_persistables(exe, args.checkpoint)
# Dataloader
test_reader = paddle.batch(reader.test(), batch_size=args.batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, file_id])
test_exe = fluid.ParallelExecutor(
use_cuda=True if args.use_gpu else False,
main_program=fluid.default_main_program().clone(for_test=False),
loss_name=None)
fetch_list = [image.name, output.name]
for batch_id, data in tqdm(enumerate(test_reader())):
num_images = len(data)
file_ids = []
for i in range(num_images):
file_ids.append(data[i][1])
input_image, out_heatmaps = test_exe.run(
fetch_list=fetch_list,
feed=feeder.feed(data))
if args.flip_test:
# Flip all the images in a same batch
data_fliped = []
for i in range(num_images):
data_fliped.append((
data[i][0][:, :, ::-1],
data[i][1]))
# Inference again
_, output_flipped = test_exe.run(
fetch_list=fetch_list,
feed=feeder.feed(data_fliped))
# Flip back
output_flipped = flip_back(output_flipped, FLIP_PAIRS)
# Feature is not aligned, shift flipped heatmap for higher accuracy
if args.shift_heatmap:
output_flipped[:, :, :, 1:] = \
output_flipped.copy()[:, :, :, 0:-1]
# Aggregate
out_heatmaps = (out_heatmaps + output_flipped) * 0.5
save_predict_results(input_image, out_heatmaps, file_ids, fold_name='results')
if __name__ == '__main__':
args = parser.parse_args()
test(args)
# Copyright (c) 2018-present, Baidu, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
"""Functions for training."""
import os
import numpy as np
import cv2
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
import argparse
import functools
from lib import pose_resnet
from utils.utility import *
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('dataset', str, 'mpii', "Dataset")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('num_epochs', int, 140, "Number of epochs.")
add_arg('total_images', int, 144406, "Training image number.")
add_arg('kp_dim', int, 16, "Class number.")
add_arg('model_save_dir', str, "output", "Model save directory")
add_arg('with_mem_opt', bool, True, "Whether to use memory optimization or not.")
add_arg('pretrained_model', str, None, "Whether to use pretrained model.")
add_arg('checkpoint', str, None, "Whether to resume checkpoint.")
add_arg('lr', float, 0.001, "Set learning rate.")
add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.")
# yapf: enable
def optimizer_setting(args, params):
lr_drop_ratio = 0.1
ls = params["learning_strategy"]
if ls["name"] == "piecewise_decay":
total_images = params["total_images"]
batch_size = ls["batch_size"]
step = int(total_images / batch_size + 1)
ls['epochs'] = [91, 121]
print('=> LR will be dropped at the epoch of {}'.format(ls['epochs']))
bd = [step * e for e in ls["epochs"]]
base_lr = params["lr"]
lr = []
lr = [base_lr * (lr_drop_ratio**i) for i in range(len(bd) + 1)]
# AdamOptimizer
optimizer = paddle.fluid.optimizer.AdamOptimizer(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr))
else:
lr = params["lr"]
optimizer = fluid.optimizer.Momentum(
learning_rate=lr,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(0.0005))
return optimizer
def train(args):
if args.dataset == 'coco':
import lib.coco_reader as reader
IMAGE_SIZE = [288, 384]
HEATMAP_SIZE = [72, 96]
args.kp_dim = 17
args.total_images = 144406 # 149813
elif args.dataset == 'mpii':
import lib.mpii_reader as reader
IMAGE_SIZE = [384, 384]
HEATMAP_SIZE = [96, 96]
args.kp_dim = 16
args.total_images = 22246
else:
raise ValueError('The dataset {} is not supported yet.'.format(args.dataset))
print_arguments(args)
# Image and target
image = layers.data(name='image', shape=[3, IMAGE_SIZE[1], IMAGE_SIZE[0]], dtype='float32')
target = layers.data(name='target', shape=[args.kp_dim, HEATMAP_SIZE[1], HEATMAP_SIZE[0]], dtype='float32')
target_weight = layers.data(name='target_weight', shape=[args.kp_dim, 1], dtype='float32')
# Build model
model = pose_resnet.ResNet(layers=50, kps_num=args.kp_dim)
# Output
loss, output = model.net(input=image, target=target, target_weight=target_weight)
# Parameters from model and arguments
params = {}
params["total_images"] = args.total_images
params["lr"] = args.lr
params["num_epochs"] = args.num_epochs
params["learning_strategy"] = {}
params["learning_strategy"]["batch_size"] = args.batch_size
params["learning_strategy"]["name"] = args.lr_strategy
# Initialize optimizer
optimizer = optimizer_setting(args, params)
optimizer.minimize(loss)
if args.with_mem_opt:
fluid.memory_optimize(fluid.default_main_program(),
skip_opt_set=[loss.name, output.name, target.name])
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
args.pretrained_model = './pretrained/resnet_50/115'
if args.pretrained_model:
def if_exist(var):
exist_flag = os.path.exists(os.path.join(args.pretrained_model, var.name))
return exist_flag
fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)
if args.checkpoint is not None:
fluid.io.load_persistables(exe, args.checkpoint)
# Dataloader
train_reader = paddle.batch(reader.train(), batch_size=args.batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, target, target_weight])
train_exe = fluid.ParallelExecutor(
use_cuda=True if args.use_gpu else False, loss_name=loss.name)
fetch_list = [image.name, loss.name, output.name]
for pass_id in range(params["num_epochs"]):
for batch_id, data in enumerate(train_reader()):
current_lr = np.array(paddle.fluid.global_scope().find_var('learning_rate').get_tensor())
input_image, loss, out_heatmaps = train_exe.run(
fetch_list, feed=feeder.feed(data))
loss = np.mean(np.array(loss))
print('Epoch [{:4d}/{:3d}] LR: {:.10f} '
'Loss = {:.5f}'.format(
batch_id, pass_id, current_lr[0], loss))
if batch_id % 10 == 0:
save_batch_heatmaps(input_image, out_heatmaps, file_name='visualization@train.jpg', normalize=True)
model_path = os.path.join(args.model_save_dir + '/' + 'simplebase-{}'.format(args.dataset),
str(pass_id))
if not os.path.isdir(model_path):
os.makedirs(model_path)
fluid.io.save_persistables(exe, model_path)
if __name__ == '__main__':
args = parser.parse_args()
train(args)
# Copyright (c) 2018-present, Baidu, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on
# ------------------------------------------------------------------------------
# https://github.com/Microsoft/human-pose-estimation.pytorch
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
# ------------------------------------------------------------------------------
"""Transforms functions."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import cv2
def flip_back(output_flipped, matched_parts):
"""
:param ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width)
"""
assert output_flipped.ndim == 4,\
'output_flipped should be [batch_size, num_joints, height, width]'
output_flipped = output_flipped[:, :, :, ::-1]
for pair in matched_parts:
tmp = output_flipped[:, pair[0], :, :].copy()
output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
output_flipped[:, pair[1], :, :] = tmp
return output_flipped
def fliplr_joints(joints, joints_vis, width, matched_parts):
"""Flip coords.
"""
# Flip horizontal
joints[:, 0] = width - joints[:, 0] - 1
# Change left-right parts
for pair in matched_parts:
joints[pair[0], :], joints[pair[1], :] = \
joints[pair[1], :], joints[pair[0], :].copy()
joints_vis[pair[0], :], joints_vis[pair[1], :] = \
joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
return joints*joints_vis, joints_vis
def transform_preds(coords, center, scale, output_size):
target_coords = np.zeros(coords.shape)
trans = get_affine_transform(center, scale, 0, output_size, inv=1)
for p in range(coords.shape[0]):
target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
return target_coords
def get_affine_transform(center,
scale,
rot,
output_size,
shift=np.array([0, 0], dtype=np.float32),
inv=0):
if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
print(scale)
scale = np.array([scale, scale])
scale_tmp = scale * 200.0
src_w = scale_tmp[0]
dst_w = output_size[0]
dst_h = output_size[1]
rot_rad = np.pi * rot / 180
src_dir = get_dir([0, src_w * -0.5], rot_rad)
dst_dir = np.array([0, dst_w * -0.5], np.float32)
src = np.zeros((3, 2), dtype=np.float32)
dst = np.zeros((3, 2), dtype=np.float32)
src[0, :] = center + scale_tmp * shift
src[1, :] = center + src_dir + scale_tmp * shift
dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
if inv:
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
else:
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
return trans
def affine_transform(pt, t):
new_pt = np.array([pt[0], pt[1], 1.]).T
new_pt = np.dot(t, new_pt)
return new_pt[:2]
def get_3rd_point(a, b):
direct = a - b
return b + np.array([-direct[1], direct[0]], dtype=np.float32)
def get_dir(src_point, rot_rad):
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
src_result = [0, 0]
src_result[0] = src_point[0] * cs - src_point[1] * sn
src_result[1] = src_point[0] * sn + src_point[1] * cs
return src_result
def crop(img, center, scale, output_size, rot=0):
trans = get_affine_transform(center, scale, rot, output_size)
dst_img = cv2.warpAffine(img,
trans,
(int(output_size[0]), int(output_size[1])),
flags=cv2.INTER_LINEAR)
return dst_img
# Copyright (c) 2018-present, Baidu, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
"""Utility functions."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import distutils.util
import numpy as np
import cv2
from pathlib import Path
def print_arguments(args):
"""Print argparse's arguments.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
parser.add_argument("name", default="Jonh", type=str, help="User name.")
args = parser.parse_args()
print_arguments(args)
:param args: Input argparse.Namespace for printing.
:type args: argparse.Namespace
"""
print("----------- Configuration Arguments -----------")
for arg, value in sorted(vars(args).iteritems()):
print("%s: %s" % (arg, value))
print("------------------------------------------------")
def add_arguments(argname, type, default, help, argparser, **kwargs):
"""Add argparse's argument.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
add_argument("name", str, "Jonh", "User name.", parser)
args = parser.parse_args()
"""
type = distutils.util.strtobool if type == bool else type
argparser.add_argument(
"--" + argname,
default=default,
type=type,
help=help + ' Default: %(default)s.',
**kwargs)
def get_max_preds(batch_heatmaps):
"""Get predictions from score maps.
heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
"""
assert isinstance(batch_heatmaps, np.ndarray), \
'batch_heatmaps should be numpy.ndarray'
assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim'
batch_size = batch_heatmaps.shape[0]
num_joints = batch_heatmaps.shape[1]
width = batch_heatmaps.shape[3]
heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
idx = np.argmax(heatmaps_reshaped, 2)
maxvals = np.amax(heatmaps_reshaped, 2)
maxvals = maxvals.reshape((batch_size, num_joints, 1))
idx = idx.reshape((batch_size, num_joints, 1))
preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
preds[:, :, 0] = (preds[:, :, 0]) % width
preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
pred_mask = pred_mask.astype(np.float32)
preds *= pred_mask
return preds, maxvals
def affine_transform(pt, t):
new_pt = np.array([pt[0], pt[1], 1.]).T
new_pt = np.dot(t, new_pt)
return new_pt[:2]
def get_3rd_point(a, b):
direct = a - b
return b + np.array([-direct[1], direct[0]], dtype=np.float32)
def get_dir(src_point, rot_rad):
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
src_result = [0, 0]
src_result[0] = src_point[0] * cs - src_point[1] * sn
src_result[1] = src_point[0] * sn + src_point[1] * cs
return src_result
def crop(img, center, scale, output_size, rot=0):
trans = get_affine_transform(center, scale, rot, output_size)
dst_img = cv2.warpAffine(img,
trans,
(int(output_size[0]), int(output_size[1])),
flags=cv2.INTER_LINEAR)
return dst_img
def get_affine_transform(center,
scale,
rot,
output_size,
shift=np.array([0, 0], dtype=np.float32),
inv=0):
if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
print(scale)
scale = np.array([scale, scale])
scale_tmp = scale * 200.0
src_w = scale_tmp[0]
dst_w = output_size[0]
dst_h = output_size[1]
rot_rad = np.pi * rot / 180
src_dir = get_dir([0, src_w * -0.5], rot_rad)
dst_dir = np.array([0, dst_w * -0.5], np.float32)
src = np.zeros((3, 2), dtype=np.float32)
dst = np.zeros((3, 2), dtype=np.float32)
src[0, :] = center + scale_tmp * shift
src[1, :] = center + src_dir + scale_tmp * shift
dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
if inv:
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
else:
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
return trans
def transform_preds(coords, center, scale, output_size):
target_coords = np.zeros(coords.shape)
trans = get_affine_transform(center, scale, 0, output_size, inv=1)
for p in range(coords.shape[0]):
target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
return target_coords
def get_final_preds(args, batch_heatmaps, center, scale):
coords, maxvals = get_max_preds(batch_heatmaps)
heatmap_height = batch_heatmaps.shape[2]
heatmap_width = batch_heatmaps.shape[3]
# Post-processing
if args.post_process:
for n in range(coords.shape[0]):
for p in range(coords.shape[1]):
hm = batch_heatmaps[n][p]
px = int(math.floor(coords[n][p][0] + 0.5))
py = int(math.floor(coords[n][p][1] + 0.5))
if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1:
diff = np.array([hm[py][px+1] - hm[py][px-1],
hm[py+1][px]-hm[py-1][px]])
coords[n][p] += np.sign(diff) * .25
preds = coords.copy()
# Transform back
for i in range(coords.shape[0]):
preds[i] = transform_preds(coords[i], center[i], scale[i],
[heatmap_width, heatmap_height])
return preds, maxvals
def calc_dists(preds, target, normalize):
preds = preds.astype(np.float32)
target = target.astype(np.float32)
dists = np.zeros((preds.shape[1], preds.shape[0]))
for n in range(preds.shape[0]):
for c in range(preds.shape[1]):
if target[n, c, 0] > 1 and target[n, c, 1] > 1:
normed_preds = preds[n, c, :] / normalize[n]
normed_targets = target[n, c, :] / normalize[n]
dists[c, n] = np.linalg.norm(normed_preds - normed_targets)
else:
dists[c, n] = -1
return dists
def dist_acc(dists, thr=0.5):
"""Return percentage below threshold while ignoring values with a -1.
"""
dist_cal = np.not_equal(dists, -1)
num_dist_cal = dist_cal.sum()
if num_dist_cal > 0:
return np.less(dists[dist_cal], thr).sum() * 1.0 / num_dist_cal
else:
return -1
def accuracy(output, target, hm_type='gaussian', thr=0.5):
"""
Calculate accuracy according to PCK,
but uses ground truth heatmap rather than x,y locations
First value to be returned is average accuracy across 'idxs',
followed by individual accuracies
"""
idx = list(range(output.shape[1]))
norm = 1.0
if hm_type == 'gaussian':
pred, _ = get_max_preds(output)
target, _ = get_max_preds(target)
h = output.shape[2]
w = output.shape[3]
norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10
dists = calc_dists(pred, target, norm)
acc = np.zeros((len(idx) + 1))
avg_acc = 0
cnt = 0
for i in range(len(idx)):
acc[i + 1] = dist_acc(dists[idx[i]])
if acc[i + 1] >= 0:
avg_acc = avg_acc + acc[i + 1]
cnt += 1
avg_acc = avg_acc / cnt if cnt != 0 else 0
if cnt != 0:
acc[0] = avg_acc
return acc, avg_acc, cnt, pred
def save_batch_heatmaps(batch_image, batch_heatmaps, file_name, normalize=True):
"""
:param batch_image: [batch_size, channel, height, width]
:param batch_heatmaps: ['batch_size, num_joints, height, width]
:param file_name: saved file name
"""
if normalize:
min = np.array(batch_image.min(), dtype=np.float)
max = np.array(batch_image.max(), dtype=np.float)
batch_image = np.add(batch_image, -min)
batch_image = np.divide(batch_image, max - min + 1e-5)
batch_size, num_joints, \
heatmap_height, heatmap_width = batch_heatmaps.shape
grid_image = np.zeros((batch_size*heatmap_height,
(num_joints+1)*heatmap_width,
3),
dtype=np.uint8)
preds, maxvals = get_max_preds(batch_heatmaps)
for i in range(batch_size):
image = batch_image[i] * 255
image = image.clip(0, 255).astype(np.uint8)
image = image.transpose(1, 2, 0)
heatmaps = batch_heatmaps[i] * 255
heatmaps = heatmaps.clip(0, 255).astype(np.uint8)
resized_image = cv2.resize(image,
(int(heatmap_width), int(heatmap_height)))
height_begin = heatmap_height * i
height_end = heatmap_height * (i + 1)
for j in range(num_joints):
cv2.circle(resized_image,
(int(preds[i][j][0]), int(preds[i][j][1])),
1, [0, 0, 255], 1)
heatmap = heatmaps[j, :, :]
colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
masked_image = colored_heatmap*0.7 + resized_image*0.3
cv2.circle(masked_image,
(int(preds[i][j][0]), int(preds[i][j][1])),
1, [0, 0, 255], 1)
width_begin = heatmap_width * (j+1)
width_end = heatmap_width * (j+2)
grid_image[height_begin:height_end, width_begin:width_end, :] = \
masked_image
grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image
cv2.imwrite(file_name, grid_image)
def save_predict_results(batch_image, batch_heatmaps, file_ids, fold_name, normalize=True):
"""
:param batch_image: [batch_size, channel, height, width]
:param batch_heatmaps: ['batch_size, num_joints, height, width]
:param fold_name: saved files in this folder
"""
save_dir = Path('./{}'.format(fold_name))
try:
save_dir.mkdir()
except OSError:
pass
if normalize:
min = np.array(batch_image.min(), dtype=np.float)
max = np.array(batch_image.max(), dtype=np.float)
batch_image = np.add(batch_image, -min)
batch_image = np.divide(batch_image, max - min + 1e-5)
batch_size, num_joints, \
heatmap_height, heatmap_width = batch_heatmaps.shape
# (32, 16, 2), (32, 16, 1))
preds, maxvals = get_max_preds(batch_heatmaps)
# Blue
icolor = (255, 137, 0)
ocolor = (138, 255, 0)
for i in range(batch_size):
image = batch_image[i] * 255
image = image.clip(0, 255).astype(np.uint8)
image = image.transpose(1, 2, 0)
image = cv2.resize(image, (384, 384))
file_id = file_ids[i]
imgname = save_dir.joinpath('rendered_{}.png'.format(str(file_id).zfill(7)))
for j in range(num_joints):
x, y = preds[i][j]
cv2.circle(image, (int(x * 4), int(y * 4)), 3, icolor, -1, 16)
cv2.circle(image, (int(x * 4), int(y * 4)), 6, ocolor, 1, 16)
cv2.imwrite(str(imgname), image)
# Clean format output
def print_name_value(name_value, full_arch_name):
names = name_value.keys()
values = name_value.values()
num_values = len(name_value)
results = []
for value in values:
results.append('| {:.3f}'.format(value))
print(
'| Arch ' +
' '.join(['| {}'.format(name) for name in names]) +
' |'
)
print('|---' * (num_values+1) + '|')
print('| ' + 'SIMPLEBASE RESNET50 ' + ' '.join(results) + ' |')
class AverageMeter(object):
"""Computes and stores the average and current value.
"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count if self.count != 0 else 0
# Copyright (c) 2018-present, Baidu, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
"""Functions for validation."""
import os
import argparse
import functools
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from collections import OrderedDict
from scipy.io import loadmat, savemat
from lib import pose_resnet
from utils.transforms import flip_back
from utils.utility import *
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('dataset', str, 'mpii', "Dataset")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('num_epochs', int, 140, "Number of epochs.")
add_arg('total_images', int, 144406, "Training image number.")
add_arg('kp_dim', int, 16, "Class number.")
add_arg('model_save_dir', str, "output", "Model save directory")
add_arg('with_mem_opt', bool, True, "Whether to use memory optimization or not.")
add_arg('pretrained_model', str, None, "Whether to use pretrained model.")
add_arg('checkpoint', str, None, "Whether to resume checkpoint.")
add_arg('lr', float, 0.001, "Set learning rate.")
add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.")
add_arg('flip_test', bool, True, "Flip test")
add_arg('shift_heatmap', bool, True, "Shift heatmap")
add_arg('post_process', bool, True, "Post process")
# yapf: enable
def valid(args):
if args.dataset == 'coco':
import lib.coco_reader as reader
IMAGE_SIZE = [288, 384]
HEATMAP_SIZE = [72, 96]
FLIP_PAIRS = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
args.kp_dim = 17
args.total_images = 144406 # 149813
elif args.dataset == 'mpii':
import lib.mpii_reader as reader
IMAGE_SIZE = [384, 384]
HEATMAP_SIZE = [96, 96]
FLIP_PAIRS = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]]
args.kp_dim = 16
args.total_images = 2958 # validation
else:
raise ValueError('The dataset {} is not supported yet.'.format(args.dataset))
print_arguments(args)
# Image and target
image = layers.data(name='image', shape=[3, IMAGE_SIZE[1], IMAGE_SIZE[0]], dtype='float32')
target = layers.data(name='target', shape=[args.kp_dim, HEATMAP_SIZE[1], HEATMAP_SIZE[0]], dtype='float32')
target_weight = layers.data(name='target_weight', shape=[args.kp_dim, 1], dtype='float32')
center = layers.data(name='center', shape=[2,], dtype='float32')
scale = layers.data(name='scale', shape=[2,], dtype='float32')
score = layers.data(name='score', shape=[1,], dtype='float32')
# Build model
model = pose_resnet.ResNet(layers=50, kps_num=args.kp_dim)
# Output
loss, output = model.net(input=image, target=target, target_weight=target_weight)
# Parameters from model and arguments
params = {}
params["total_images"] = args.total_images
params["lr"] = args.lr
params["num_epochs"] = args.num_epochs
params["learning_strategy"] = {}
params["learning_strategy"]["batch_size"] = args.batch_size
params["learning_strategy"]["name"] = args.lr_strategy
if args.with_mem_opt:
fluid.memory_optimize(fluid.default_main_program(),
skip_opt_set=[loss.name, output.name, target.name])
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
args.pretrained_model = './pretrained/resnet_50/115'
if args.pretrained_model:
def if_exist(var):
exist_flag = os.path.exists(os.path.join(args.pretrained_model, var.name))
return exist_flag
fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)
if args.checkpoint is not None:
fluid.io.load_persistables(exe, args.checkpoint)
# Dataloader
valid_reader = paddle.batch(reader.valid(), batch_size=args.batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, target, target_weight, center, scale, score])
valid_exe = fluid.ParallelExecutor(
use_cuda=True if args.use_gpu else False,
main_program=fluid.default_main_program().clone(for_test=False),
loss_name=loss.name)
fetch_list = [image.name, loss.name, output.name, target.name]
# For validation
acc = AverageMeter()
idx = 0
num_samples = args.total_images
all_preds = np.zeros((num_samples, args.kp_dim, 3),
dtype=np.float32)
all_boxes = np.zeros((num_samples, 6))
for batch_id, data in enumerate(valid_reader()):
num_images = len(data)
centers = []
scales = []
scores = []
for i in range(num_images):
centers.append(data[i][3])
scales.append(data[i][4])
scores.append(data[i][5])
input_image, loss, out_heatmaps, target_heatmaps = valid_exe.run(
fetch_list=fetch_list,
feed=feeder.feed(data))
if args.flip_test:
# Flip all the images in a same batch
data_fliped = []
for i in range(num_images):
# Input, target, target_weight, c, s, score
data_fliped.append((
# np.flip(input_image, 3)[i],
data[i][0][:, :, ::-1],
data[i][1],
data[i][2],
data[i][3],
data[i][4],
data[i][5]))
# Inference again
_, _, output_flipped, _ = valid_exe.run(
fetch_list=fetch_list,
feed=feeder.feed(data_fliped))
# Flip back
output_flipped = flip_back(output_flipped, FLIP_PAIRS)
# Feature is not aligned, shift flipped heatmap for higher accuracy
if args.shift_heatmap:
output_flipped[:, :, :, 1:] = \
output_flipped.copy()[:, :, :, 0:-1]
# Aggregate
# out_heatmaps.shape: size[b, args.kp_dim, 96, 96]
out_heatmaps = (out_heatmaps + output_flipped) * 0.5
loss = np.mean(np.array(loss))
# Accuracy
_, avg_acc, cnt, pred = accuracy(out_heatmaps, target_heatmaps)
acc.update(avg_acc, cnt)
# Current center, scale, score
centers = np.array(centers)
scales = np.array(scales)
scores = np.array(scores)
preds, maxvals = get_final_preds(
args, out_heatmaps, centers, scales)
all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
all_preds[idx:idx + num_images, :, 2:3] = maxvals
# Double check this all_boxes parts
all_boxes[idx:idx + num_images, 0:2] = centers[:, 0:2]
all_boxes[idx:idx + num_images, 2:4] = scales[:, 0:2]
all_boxes[idx:idx + num_images, 4] = np.prod(scales*200, 1)
all_boxes[idx:idx + num_images, 5] = scores
# image_path.extend(meta['image'])
idx += num_images
print('Epoch [{:4d}] '
'Loss = {:.5f} '
'Acc = {:.5f}'.format(batch_id, loss, acc.avg))
if batch_id % 10 == 0:
save_batch_heatmaps(input_image, out_heatmaps, file_name='visualization@val.jpg', normalize=True)
# Evaluate
args.DATAROOT = 'data/mpii'
args.TEST_SET = 'valid'
output_dir = ''
filenames = []
imgnums = []
image_path = []
name_values, perf_indicator = mpii_evaluate(
args, all_preds, output_dir, all_boxes, image_path,
filenames, imgnums)
print_name_value(name_values, perf_indicator)
def mpii_evaluate(cfg, preds, output_dir, *args, **kwargs):
# Convert 0-based index to 1-based index
preds = preds[:, :, 0:2] + 1.0
if output_dir:
pred_file = os.path.join(output_dir, 'pred.mat')
savemat(pred_file, mdict={'preds': preds})
if 'test' in cfg.TEST_SET:
return {'Null': 0.0}, 0.0
SC_BIAS = 0.6
threshold = 0.5
gt_file = os.path.join(cfg.DATAROOT,
'annot',
'gt_{}.mat'.format(cfg.TEST_SET))
gt_dict = loadmat(gt_file)
dataset_joints = gt_dict['dataset_joints']
jnt_missing = gt_dict['jnt_missing']
pos_gt_src = gt_dict['pos_gt_src']
headboxes_src = gt_dict['headboxes_src']
pos_pred_src = np.transpose(preds, [1, 2, 0])
head = np.where(dataset_joints == 'head')[1][0]
lsho = np.where(dataset_joints == 'lsho')[1][0]
lelb = np.where(dataset_joints == 'lelb')[1][0]
lwri = np.where(dataset_joints == 'lwri')[1][0]
lhip = np.where(dataset_joints == 'lhip')[1][0]
lkne = np.where(dataset_joints == 'lkne')[1][0]
lank = np.where(dataset_joints == 'lank')[1][0]
rsho = np.where(dataset_joints == 'rsho')[1][0]
relb = np.where(dataset_joints == 'relb')[1][0]
rwri = np.where(dataset_joints == 'rwri')[1][0]
rkne = np.where(dataset_joints == 'rkne')[1][0]
rank = np.where(dataset_joints == 'rank')[1][0]
rhip = np.where(dataset_joints == 'rhip')[1][0]
jnt_visible = 1 - jnt_missing
uv_error = pos_pred_src - pos_gt_src
uv_err = np.linalg.norm(uv_error, axis=1)
headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :]
headsizes = np.linalg.norm(headsizes, axis=0)
headsizes *= SC_BIAS
scale = np.multiply(headsizes, np.ones((len(uv_err), 1)))
scaled_uv_err = np.divide(uv_err, scale)
scaled_uv_err = np.multiply(scaled_uv_err, jnt_visible)
jnt_count = np.sum(jnt_visible, axis=1)
less_than_threshold = np.multiply((scaled_uv_err <= threshold),
jnt_visible)
PCKh = np.divide(100.*np.sum(less_than_threshold, axis=1), jnt_count)
# Save
rng = np.arange(0, 0.5+0.01, 0.01)
pckAll = np.zeros((len(rng), cfg.kp_dim))
for r in range(len(rng)):
threshold = rng[r]
less_than_threshold = np.multiply(scaled_uv_err <= threshold,
jnt_visible)
pckAll[r, :] = np.divide(100.*np.sum(less_than_threshold, axis=1),
jnt_count)
PCKh = np.ma.array(PCKh, mask=False)
PCKh.mask[6:8] = True
jnt_count = np.ma.array(jnt_count, mask=False)
jnt_count.mask[6:8] = True
jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64)
name_value = [
('Head', PCKh[head]),
('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])),
('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])),
('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])),
('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])),
('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])),
('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])),
('Mean', np.sum(PCKh * jnt_ratio)),
('Mean@0.1', np.sum(pckAll[11, :] * jnt_ratio))
]
name_value = OrderedDict(name_value)
return name_value, name_value['Mean']
# TODO: coco_evaluate()
if __name__ == '__main__':
args = parser.parse_args()
valid(args)
......@@ -17,87 +17,62 @@ Running sample code in this directory requires PaddelPaddle Fluid v0.14.0 and la
## Data preparation
Caltech-UCSD Birds 200 (CUB-200) is an image dataset including 200 bird species. We use it to conduct the metric learning experiments. More details of this dataset can be found from its [official website](http://www.vision.caltech.edu/visipedia/CUB-200.html). First of all, preparation of CUB-200 data can be done as:
Stanford Online Product(SOP) dataset contains 120,053 images of 22,634 products downloaded from eBay.com. We use it to conduct the metric learning experiments. For training, 59,5511 out of 11,318 classes are used, and 11,316 classes(60,502 images) are held out for testing. First of all, preparation of SOP data can be done as:
```
cd data/
sh download_cub200.sh
```
The script ```data/split.py``` is used to split train/valid set. In our settings, we use images from first 100 classes(001-100) as training data while the other 100 classes are validation data. After the splitting, there are two label files which contain train and validation image labels respectively:
* *CUB200_train.txt*: label file of CUB-200 training set, with each line seperated by ```SPACE```, like:
```
current_path/images/097.Orchard_Oriole/Orchard_Oriole_0021_2432168643.jpg 97
current_path/images/097.Orchard_Oriole/Orchard_Oriole_0022_549995638.jpg 97
current_path/images/097.Orchard_Oriole/Orchard_Oriole_0034_2244771004.jpg 97
current_path/images/097.Orchard_Oriole/Orchard_Oriole_0010_2501839798.jpg 97
current_path/images/097.Orchard_Oriole/Orchard_Oriole_0008_491860362.jpg 97
current_path/images/097.Orchard_Oriole/Orchard_Oriole_0015_2545116359.jpg 97
...
```
* *CUB200_val.txt*: label file of CUB-200 validation set, with each line seperated by ```SPACE```, like.
```
current_path/images/154.Red_eyed_Vireo/Red_eyed_Vireo_0029_59210443.jpg 154
current_path/images/154.Red_eyed_Vireo/Red_eyed_Vireo_0021_2693953672.jpg 154
current_path/images/154.Red_eyed_Vireo/Red_eyed_Vireo_0016_2917350638.jpg 154
current_path/images/154.Red_eyed_Vireo/Red_eyed_Vireo_0027_2503540454.jpg 154
current_path/images/154.Red_eyed_Vireo/Red_eyed_Vireo_0026_2502710393.jpg 154
current_path/images/154.Red_eyed_Vireo/Red_eyed_Vireo_0022_2693134681.jpg 154
...
sh download_sop.sh
```
## Training metric learning models
To train a metric learning model, one need to set the neural network as backbone and the metric loss function to optimize. One example of training triplet loss using ResNet-50 is shown below:
To train a metric learning model, one need to set the neural network as backbone and the metric loss function to optimize. We train meiric learning model using softmax or [arcmargin](https://arxiv.org/abs/1801.07698) loss firstly, and then fine-turned the model using other metric learning loss, such as triplet, [quadruplet](https://arxiv.org/abs/1710.00478) and [eml](https://arxiv.org/abs/1212.6094) loss. One example of training using arcmargin loss is shown below:
```
python train.py \
python train_elem.py \
--model=ResNet50 \
--lr=0.001 \
--num_epochs=120 \
--train_batch_size=256 \
--test_batch_size=50 \
--lr=0.01 \
--total_iter_num=30000 \
--use_gpu=True \
--train_batch_size=20 \
--test_batch_size=20 \
--loss_name=tripletloss \
--model_save_dir="output_tripletloss"
--pretrained_model=${path_to_pretrain_imagenet_model} \
--model_save_dir=${output_model_path} \
--loss_name=arcmargin \
--arc_scale=80.0 \
--arc_margin=0.15 \
--arc_easy_margin=False
```
**parameter introduction:**
* **model**: name model to use. Default: "SE_ResNeXt50_32x4d".
* **num_epochs**: the number of epochs. Default: 120.
* **batch_size**: the size of each mini-batch. Default: 256.
* **model**: name model to use. Default: "ResNet50".
* **train_batch_size**: the size of each training mini-batch. Default: 256.
* **test_batch_size**: the size of each testing mini-batch. Default: 50.
* **lr**: initialized learning rate. Default: 0.01.
* **total_iter_num**: total number of training iterations. Default: 30000.
* **use_gpu**: whether to use GPU or not. Default: True.
* **model_save_dir**: the directory to save trained model. Default: "output".
* **lr**: initialized learning rate. Default: 0.1.
* **pretrained_model**: model path for pretraining. Default: None.
**training log:** the log from training ResNet-50 based triplet loss is like:
```
Pass 0, trainbatch 0, lr 9.99999974738e-05, loss_metric 0.0700866878033, loss_cls 5.23635625839, acc1 0.0, acc5 0.100000008941, time 0.16 sec
Pass 0, trainbatch 10, lr 9.99999974738e-05, loss_metric 0.0752244070172, loss_cls 5.30303478241, acc1 0.0, acc5 0.100000008941, time 0.14 sec
Pass 0, trainbatch 20, lr 9.99999974738e-05, loss_metric 0.0840565115213, loss_cls 5.41880941391, acc1 0.0, acc5 0.0333333350718, time 0.14 sec
Pass 0, trainbatch 30, lr 9.99999974738e-05, loss_metric 0.0698839947581, loss_cls 5.35385560989, acc1 0.0, acc5 0.0333333350718, time 0.14 sec
Pass 0, trainbatch 40, lr 9.99999974738e-05, loss_metric 0.0596057735384, loss_cls 5.34744024277, acc1 0.0, acc5 0.0, time 0.14 sec
Pass 0, trainbatch 50, lr 9.99999974738e-05, loss_metric 0.067836754024, loss_cls 5.37124729156, acc1 0.0, acc5 0.0333333350718, time 0.14 sec
Pass 0, trainbatch 60, lr 9.99999974738e-05, loss_metric 0.0637686774135, loss_cls 5.47412204742, acc1 0.0, acc5 0.0333333350718, time 0.14 sec
Pass 0, trainbatch 70, lr 9.99999974738e-05, loss_metric 0.0772982165217, loss_cls 5.38295936584, acc1 0.0, acc5 0.0, time 0.14 sec
Pass 0, trainbatch 80, lr 9.99999974738e-05, loss_metric 0.0861896127462, loss_cls 5.41250753403, acc1 0.0, acc5 0.0, time 0.14 sec
Pass 0, trainbatch 90, lr 9.99999974738e-05, loss_metric 0.0653102770448, loss_cls 5.53133153915, acc1 0.0, acc5 0.0, time 0.14 sec
...
```
* **model_save_dir**: the directory to save trained model. Default: "output".
* **loss_name**: loss fortraining model. Default: "softmax".
* **arc_scale**: parameter of arcmargin loss. Default: 80.0.
* **arc_margin**: parameter of arcmargin loss. Default: 0.15.
* **arc_easy_margin**: parameter of arcmargin loss. Default: False.
## Finetuning
Finetuning is to finetune model weights in a specific task by loading pretrained weights. After initializing ```path_to_pretrain_model```, one can finetune a model as:
Finetuning is to finetune model weights in a specific task by loading pretrained weights. After training model using softmax or arcmargin loss, one can finetune the model using triplet, quadruplet or eml loss. One example of fine-turned using eml loss is shown below:
```
python train.py \
python train_pair.py \
--model=ResNet50 \
--pretrained_model=${path_to_pretrain_model} \
--lr=0.001 \
--num_epochs=120 \
--train_batch_size=160 \
--test_batch_size=50 \
--lr=0.0001 \
--total_iter_num=100000 \
--use_gpu=True \
--train_batch_size=20 \
--test_batch_size=20 \
--loss_name=tripletloss \
--model_save_dir="output_tripletloss"
--pretrained_model=${path_to_pretrain_arcmargin_model} \
--model_save_dir=${output_model_path} \
--loss_name=eml \
--samples_each_class=2
```
## Evaluation
......@@ -105,58 +80,26 @@ Evaluation is to evaluate the performance of a trained model. One can download [
```
python eval.py \
--model=ResNet50 \
--batch_size=50 \
--pretrained_model=${path_to_pretrain_model} \
--batch_size=30 \
--loss_name=tripletloss
```
According to the congfiguration of evaluation, the output log is like:
```
testbatch 0, loss 17.0384693146, recall 0.133333333333, time 0.08 sec
testbatch 10, loss 15.4248628616, recall 0.2, time 0.07 sec
testbatch 20, loss 19.3986873627, recall 0.0666666666667, time 0.07 sec
testbatch 30, loss 19.8149013519, recall 0.166666666667, time 0.07 sec
testbatch 40, loss 18.7500724792, recall 0.0333333333333, time 0.07 sec
testbatch 50, loss 15.1477527618, recall 0.166666666667, time 0.07 sec
testbatch 60, loss 21.6039619446, recall 0.0666666666667, time 0.07 sec
testbatch 70, loss 16.3203811646, recall 0.1, time 0.08 sec
testbatch 80, loss 17.3300457001, recall 0.133333333333, time 0.14 sec
testbatch 90, loss 17.9943237305, recall 0.0333333333333, time 0.07 sec
testbatch 100, loss 20.4538421631, recall 0.1, time 0.07 sec
End test, test_loss 18.2126255035, test recall 0.573597359736
...
```
## Inference
Inference is used to get prediction score or image features based on trained models.
```
python infer.py --model=ResNet50 \
--pretrained_model=${path_to_pretrain_model}
```
The output contains learned feature for each test sample:
```
Test-0-feature: [0.1551965 0.48882252 0.3528545 ... 0.35809007 0.6210782 0.34474897]
Test-1-feature: [0.26215672 0.71406883 0.36118034 ... 0.4711366 0.6783772 0.26591945]
Test-2-feature: [0.26164916 0.46013424 0.38381338 ... 0.47984493 0.5830286 0.22124235]
Test-3-feature: [0.22502825 0.44153655 0.29287377 ... 0.45510024 0.81386226 0.21451607]
Test-4-feature: [0.27748746 0.49068335 0.28269237 ... 0.47356504 0.73254013 0.22317657]
Test-5-feature: [0.17743547 0.5232162 0.35012805 ... 0.38921246 0.80238944 0.26693743]
Test-6-feature: [0.18314484 0.4294481 0.37652573 ... 0.4795592 0.7446839 0.24178651]
Test-7-feature: [0.25836483 0.49866533 0.3469289 ... 0.38316026 0.56015515 0.22388287]
Test-8-feature: [0.30613047 0.5200348 0.2847372 ... 0.5700768 0.76645917 0.26504722]
Test-9-feature: [0.3305695 0.46257797 0.27108437 ... 0.42891273 0.5112956 0.26442713]
Test-10-feature: [0.16024818 0.46871603 0.32608703 ... 0.3341719 0.6876993 0.26097256]
Test-11-feature: [0.37611157 0.6006333 0.3023942 ... 0.4729057 0.53841203 0.19621202]
Test-12-feature: [0.17515017 0.41597834 0.45567667 ... 0.45650777 0.5987687 0.25734115]
...
python infer.py \
--model=ResNet50 \
--batch_size=1 \
--pretrained_model=${path_to_pretrain_model}
```
## Performances
For comparation, many metric learning models with different neural networks and loss functions are trained using corresponding experiential parameters. Recall@Rank-1 is used as evaluation metric and the performance is listed in the table. Pretrained models can be downloaded by clicking related model names.
|model | ResNet50 | SE-ResNeXt-50
|pretrain model | softmax | arcmargin
|- | - | -:
|[triplet loss]() | 57.36% | 51.62%
|[eml loss]() | 58.84% | 52.94%
|[quadruplet loss]() | 62.67% | 56.40%
|without fine-tuned | 77.42% | 78.11%
|fine-tuned with triplet | 78.37% | 79.21%
|fine-tuned with quadruplet | 78.10% | 79.59%
|fine-tuned with eml | 79.32% | 80.11%
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
# NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02 0, actived=True)
test_recall_kpi = AccKpi('test_recall', 0.02, 0, actived=True)
tracking_kpis = [
train_cost_kpi,
test_recall_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
wget http://www.vision.caltech.edu/visipedia-data/CUB-200/images.tgz
tar zxf images.tgz
find images|grep jpg|grep -v "\._" > list.txt
python split.py
rm -rf images.tgz list.txt
wget ftp://cs.stanford.edu/cs/cvgl/Stanford_Online_Products.zip
unzip Stanford_Online_Products.zip
input = open("list.txt", "r").readlines()
fout_train = open("CUB200_train.txt", "w")
fout_valid = open("CUB200_val.txt", "w")
for i, item in enumerate(input):
label = item.strip().split("/")[-2].split(".")[0]
label = int(label)
if label <= 100:
fout = fout_train
else:
fout = fout_valid
fout.write(item.strip() + " " + str(label) + "\n")
fout_train.close()
fout_valid.close()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import time
import sys
import math
import time
import argparse
import functools
import numpy as np
import paddle
import paddle.fluid as fluid
import models
import argparse
import functools
from losses import tripletloss
from losses import quadrupletloss
from losses import emlloss
from losses.metrics import recall_topk
import reader
from utility import add_arguments, print_arguments
import math
from utility import fmt_time, recall_topk
# yapf: disable
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
add_arg('batch_size', int, 120, "Minibatch size.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('image_shape', str, "3,224,224", "Input image size.")
add_arg('with_mem_opt', bool, False, "Whether to use memory optimization or not.")
add_arg('pretrained_model', str, None, "Whether to use pretrained model.")
add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.")
add_arg('loss_name', str, "emlloss", "Loss name.")
add_arg('model', str, "ResNet50", "Set the network to use.")
add_arg('embedding_size', int, 0, "Embedding size.")
add_arg('batch_size', int, 10, "Minibatch size.")
add_arg('image_shape', str, "3,224,224", "Input image size.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('with_mem_opt', bool, False, "Whether to use memory optimization or not.")
add_arg('pretrained_model', str, None, "Whether to use pretrained model.")
# yapf: enable
model_list = [m for m in dir(models) if "__" not in m]
......@@ -34,8 +36,6 @@ def eval(args):
model_name = args.model
pretrained_model = args.pretrained_model
with_memory_optimization = args.with_mem_opt
loss_name = args.loss_name
image_shape = [int(m) for m in args.image_shape.split(",")]
assert model_name in model_list, "{} is not in lists: {}".format(args.model,
......@@ -46,19 +46,8 @@ def eval(args):
# model definition
model = models.__dict__[model_name]()
out = model.net(input=image, class_dim=200)
if loss_name == "tripletloss":
metricloss = tripletloss()
cost = metricloss.loss(out[0])
elif loss_name == "quadrupletloss":
metricloss = quadrupletloss()
cost = metricloss.loss(out[0])
elif loss_name == "emlloss":
metricloss = emlloss()
cost = metricloss.loss(out[0])
avg_cost = fluid.layers.mean(x=cost)
out = model.net(input=image, embedding_size=args.embedding_size)
test_program = fluid.default_main_program().clone(for_test=True)
if with_memory_optimization:
......@@ -75,39 +64,29 @@ def eval(args):
fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)
test_reader = paddle.batch(metricloss.test_reader, batch_size=args.batch_size)
test_reader = paddle.batch(reader.test(args), batch_size=args.batch_size, drop_last=False)
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
fetch_list = [avg_cost.name, out[0].name]
fetch_list = [out.name]
test_info = [[]]
f = []
l = []
f, l = [], []
for batch_id, data in enumerate(test_reader()):
if len(data) < args.batch_size:
continue
t1 = time.time()
loss, feas = exe.run(test_program,
fetch_list=fetch_list,
feed=feeder.feed(data))
[feas] = exe.run(test_program, fetch_list=fetch_list, feed=feeder.feed(data))
label = np.asarray([x[1] for x in data])
f.append(feas)
l.append(label)
t2 = time.time()
period = t2 - t1
loss = np.mean(np.array(loss))
test_info[0].append(loss)
if batch_id % 20 == 0:
print("testbatch {0}, loss {1}, time {2}".format( \
batch_id, loss, "%2.2f sec" % period))
print("[%s] testbatch %d, time %2.2f sec" % \
(fmt_time(), batch_id, period))
test_loss = np.array(test_info[0]).mean()
f = np.vstack(f)
l = np.hstack(l)
recall = recall_topk(f, l, k=1)
print("End test, test_loss {0}, test recall {1}".format( \
test_loss, recall))
print("[%s] End test %d, test_recall %.5f" % (fmt_time(), len(f), recall))
sys.stdout.flush()
......
""" tools for processing images
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import math
import random
import functools
import numpy as np
#random.seed(0)
def rotate_image(img):
""" rotate_image """
(h, w) = img.shape[:2]
center = (w // 2, h // 2)
angle = random.randint(-10, 10)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(img, M, (w, h))
return rotated
def random_crop(img, size, scale=None, ratio=None):
""" random_crop """
scale = [0.08, 1.0] if scale is None else scale
ratio = [3. / 4., 4. / 3.] if ratio is None else ratio
aspect_ratio = math.sqrt(random.uniform(*ratio))
w = 1. * aspect_ratio
h = 1. / aspect_ratio
bound = min((float(img.shape[1]) / img.shape[0]) / (w ** 2),
(float(img.shape[0]) / img.shape[1]) / (h ** 2))
scale_max = min(scale[1], bound)
scale_min = min(scale[0], bound)
target_area = img.shape[0] * img.shape[1] * random.uniform(scale_min,
scale_max)
target_size = math.sqrt(target_area)
w = int(target_size * w)
h = int(target_size * h)
i = random.randint(0, img.shape[0] - h)
j = random.randint(0, img.shape[1] - w)
img = img[i:i+h, j:j+w, :]
resized = cv2.resize(img, (size, size), interpolation=cv2.INTER_LANCZOS4)
return resized
def distort_color(img):
return img
def resize_short(img, target_size):
""" resize_short """
percent = float(target_size) / min(img.shape[0], img.shape[1])
resized_width = int(round(img.shape[1] * percent))
resized_height = int(round(img.shape[0] * percent))
resized = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_LANCZOS4)
return resized
def crop_image(img, target_size, center):
""" crop_image """
height, width = img.shape[:2]
size = target_size
if center == True:
w_start = (width - size) // 2
h_start = (height - size) // 2
else:
w_start = random.randint(0, width - size)
h_start = random.randint(0, height - size)
w_end = w_start + size
h_end = h_start + size
img = img[h_start:h_end, w_start:w_end, :]
return img
def process_image(sample, mode, color_jitter, rotate,
crop_size=224, mean=None, std=None):
""" process_image """
mean = [0.485, 0.456, 0.406] if mean is None else mean
std = [0.229, 0.224, 0.225] if std is None else std
image_name = sample[0]
img = cv2.imread(image_name) # BGR mode, but need RGB mode
if mode == 'train':
if rotate:
img = rotate_image(img)
if crop_size > 0:
img = random_crop(img, crop_size)
if color_jitter:
img = distort_color(img)
if random.randint(0, 1) == 1:
img = img[:, ::-1, :]
else:
if crop_size > 0:
img = resize_short(img, crop_size)
img = crop_image(img, target_size=crop_size, center=True)
img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255
img_mean = np.array(mean).reshape((3, 1, 1))
img_std = np.array(std).reshape((3, 1, 1))
img -= img_mean
img /= img_std
if mode == 'train' or mode == 'val':
return (img, sample[1])
elif mode == 'test':
return (img, )
def image_mapper(**kwargs):
""" image_mapper """
return functools.partial(process_image, **kwargs)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import time
import sys
import math
import time
import argparse
import functools
import numpy as np
import paddle
import paddle.fluid as fluid
import models
import argparse
import functools
from losses import tripletloss
import reader
from utility import add_arguments, print_arguments
import math
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 1, "Minibatch size.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('image_shape', str, "3,224,224", "Input image size.")
add_arg('with_mem_opt', bool, False, "Whether to use memory optimization or not.")
add_arg('pretrained_model', str, None, "Whether to use pretrained model.")
add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.")
add_arg('model', str, "ResNet50", "Set the network to use.")
add_arg('embedding_size', int, 0, "Embedding size.")
add_arg('batch_size', int, 1, "Minibatch size.")
add_arg('image_shape', str, "3,224,224", "Input image size.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('with_mem_opt', bool, False, "Whether to use memory optimization or not.")
add_arg('pretrained_model', str, None, "Whether to use pretrained model.")
# yapf: enable
model_list = [m for m in dir(models) if "__" not in m]
......@@ -39,7 +44,8 @@ def infer(args):
# model definition
model = models.__dict__[model_name]()
out = model.net(input=image, class_dim=200)
out = model.net(input=image, embedding_size=args.embedding_size)
test_program = fluid.default_main_program().clone(for_test=True)
if with_memory_optimization:
......@@ -56,15 +62,13 @@ def infer(args):
fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)
infer_reader = paddle.batch(tripletloss().infer_reader, batch_size=args.batch_size)
infer_reader = paddle.batch(reader.infer(args), batch_size=args.batch_size, drop_last=False)
feeder = fluid.DataFeeder(place=place, feed_list=[image])
fetch_list = [out[0].name]
fetch_list = [out.name]
for batch_id, data in enumerate(infer_reader()):
result = exe.run(test_program,
fetch_list=fetch_list,
feed=feeder.feed(data))
result = exe.run(test_program, fetch_list=fetch_list, feed=feeder.feed(data))
result = result[0][0].reshape(-1)
print("Test-{0}-feature: {1}".format(batch_id, result))
sys.stdout.flush()
......
from .tripletloss import tripletloss
from .quadrupletloss import quadrupletloss
from .emlloss import emlloss
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from .softmaxloss import SoftmaxLoss
from .arcmarginloss import ArcMarginLoss
from .tripletloss import TripletLoss
from .quadrupletloss import QuadrupletLoss
from .emlloss import EmlLoss
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle.fluid as fluid
class ArcMarginLoss():
def __init__(self, class_dim, margin=0.15, scale=80.0, easy_margin=False):
self.class_dim = class_dim
self.margin = margin
self.scale = scale
self.easy_margin = easy_margin
def loss(self, input, label):
out = self.arc_margin_product(input, label, self.class_dim, self.margin, self.scale, self.easy_margin)
#loss = fluid.layers.softmax_with_cross_entropy(logits=out, label=label)
out = fluid.layers.softmax(input=out)
loss = fluid.layers.cross_entropy(input=out, label=label)
return loss, out
def arc_margin_product(self, input, label, out_dim, m, s, easy_margin=False):
#input = fluid.layers.l2_normalize(input, axis=1)
input_norm = fluid.layers.sqrt(fluid.layers.reduce_sum(fluid.layers.square(input), dim=1))
input = fluid.layers.elementwise_div(input, input_norm, axis=0)
weight = fluid.layers.create_parameter(
shape=[out_dim, input.shape[1]],
dtype='float32',
name='weight_norm',
attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Xavier()))
#weight = fluid.layers.l2_normalize(weight, axis=1)
weight_norm = fluid.layers.sqrt(fluid.layers.reduce_sum(fluid.layers.square(weight), dim=1))
weight = fluid.layers.elementwise_div(weight, weight_norm, axis=0)
weight = fluid.layers.transpose(weight, perm = [1, 0])
cosine = fluid.layers.mul(input, weight)
sine = fluid.layers.sqrt(1.0 - fluid.layers.square(cosine) + 1e-6)
cos_m = math.cos(m)
sin_m = math.sin(m)
phi = cosine * cos_m - sine * sin_m
th = math.cos(math.pi - m)
mm = math.sin(math.pi - m) * m
if easy_margin:
phi = self.paddle_where_more_than(cosine, 0, phi, cosine)
else:
phi = self.paddle_where_more_than(cosine, th, phi, cosine-mm)
one_hot = fluid.layers.one_hot(input=label, depth=out_dim)
output = fluid.layers.elementwise_mul(one_hot, phi) + fluid.layers.elementwise_mul((1.0 - one_hot), cosine)
output = output * s
return output
def paddle_where_more_than(self, target, limit, x, y):
mask = fluid.layers.cast(x=(target>limit), dtype='float32')
output = fluid.layers.elementwise_mul(mask, x) + fluid.layers.elementwise_mul((1.0 - mask), y)
return output
import numpy as np
def recall_topk(fea, lab, k = 1):
fea = np.array(fea)
fea = fea.reshape(fea.shape[0], -1)
n = np.sqrt(np.sum(fea**2, 1)).reshape(-1, 1)
fea = fea/n
a = np.sum(fea ** 2, 1).reshape(-1, 1)
b = a.T
ab = np.dot(fea, fea.T)
d = a + b - 2*ab
d = d + np.eye(len(fea)) * 1e8
sorted_index = np.argsort(d, 1)
res = 0
for i in range(len(fea)):
pred = lab[sorted_index[i][0]]
if lab[i] == pred:
res += 1.0
res = res/len(fea)
return res
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import subprocess
import os
def get_gpu_num():
visibledevice = os.getenv('CUDA_VISIBLE_DEVICES')
if visibledevice:
devicenum = len(visibledevice.split(','))
else:
devicenum = subprocess.check_output(
[str.encode('nvidia-smi'), str.encode('-L')]).decode('utf-8').count('\n')
return devicenum
import numpy as np
import paddle as paddle
import paddle.fluid as fluid
def generate_index(batch_size, samples_each_class):
a = np.arange(0, batch_size * batch_size)
a = a.reshape(-1, batch_size)
a = np.arange(0, batch_size * batch_size) # N*N x 1
a = a.reshape(-1, batch_size) # N x N
steps = batch_size // samples_each_class
res = []
for i in range(batch_size):
......@@ -72,7 +46,3 @@ def calculate_order_dist_matrix(feature, batch_size, samples_each_class):
d = fluid.layers.gather(d, index=index_var)
d = fluid.layers.reshape(d, shape=[-1, batch_size])
return d
import os
import math
import random
import functools
import numpy as np
import paddle
from PIL import Image, ImageEnhance
random.seed(0)
DATA_DIM = 224
THREAD = 8
BUF_SIZE = 1024000
DATA_DIR = "./data/"
TRAIN_LIST = './data/CUB200_train.txt'
TEST_LIST = './data/CUB200_val.txt'
#DATA_DIR = "./data/CUB200/"
#TRAIN_LIST = './data/CUB200/CUB200_train.txt'
#TEST_LIST = './data/CUB200/CUB200_val.txt'
train_data = {}
test_data = {}
train_list = open(TRAIN_LIST, "r").readlines()
train_image_list = []
for i, item in enumerate(train_list):
path, label = item.strip().split()
label = int(label) - 1
train_image_list.append((path, label))
if label not in train_data:
train_data[label] = []
train_data[label].append(path)
test_list = open(TEST_LIST, "r").readlines()
test_image_list = []
infer_image_list = []
for i, item in enumerate(test_list):
path, label = item.strip().split()
label = int(label) - 1
test_image_list.append((path, label))
infer_image_list.append(path)
if label not in test_data:
test_data[label] = []
test_data[label].append(path)
print("train_data size:", len(train_data))
print("test_data size:", len(test_data))
print("test_data image number:", len(test_image_list))
random.shuffle(test_image_list)
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
def resize_short(img, target_size):
percent = float(target_size) / min(img.size[0], img.size[1])
resized_width = int(round(img.size[0] * percent))
resized_height = int(round(img.size[1] * percent))
img = img.resize((resized_width, resized_height), Image.BILINEAR)
return img
def Scale(img, size):
w, h = img.size
if (w <= h and w == size) or (h <= w and h == size):
return img
if w < h:
ow = size
oh = int(size * h / w)
return img.resize((ow, oh), Image.BILINEAR)
else:
oh = size
ow = int(size * w / h)
return img.resize((ow, oh), Image.BILINEAR)
def CenterCrop(img, size):
w, h = img.size
th, tw = int(size), int(size)
x1 = int(round((w - tw) / 2.))
y1 = int(round((h - th) / 2.))
return img.crop((x1, y1, x1 + tw, y1 + th))
def crop_image(img, target_size, center):
width, height = img.size
size = target_size
if center == True:
w_start = (width - size) / 2
h_start = (height - size) / 2
else:
w_start = random.randint(0, width - size)
h_start = random.randint(0, height - size)
w_end = w_start + size
h_end = h_start + size
img = img.crop((w_start, h_start, w_end, h_end))
return img
def RandomResizedCrop(img, size):
for attempt in range(10):
area = img.size[0] * img.size[1]
target_area = random.uniform(0.08, 1.0) * area
aspect_ratio = random.uniform(3. / 4, 4. / 3)
w = int(round(math.sqrt(target_area * aspect_ratio)))
h = int(round(math.sqrt(target_area / aspect_ratio)))
if random.random() < 0.5:
w, h = h, w
if w <= img.size[0] and h <= img.size[1]:
x1 = random.randint(0, img.size[0] - w)
y1 = random.randint(0, img.size[1] - h)
img = img.crop((x1, y1, x1 + w, y1 + h))
assert(img.size == (w, h))
return img.resize((size, size), Image.BILINEAR)
w = min(img.size[0], img.size[1])
i = (img.size[1] - w) // 2
j = (img.size[0] - w) // 2
img = img.crop((i, j, i+w, j+w))
img = img.resize((size, size), Image.BILINEAR)
return img
def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
aspect_ratio = math.sqrt(random.uniform(*ratio))
w = 1. * aspect_ratio
h = 1. / aspect_ratio
bound = min((float(img.size[0]) / img.size[1]) / (w**2),
(float(img.size[1]) / img.size[0]) / (h**2))
scale_max = min(scale[1], bound)
scale_min = min(scale[0], bound)
target_area = img.size[0] * img.size[1] * random.uniform(scale_min,
scale_max)
target_size = math.sqrt(target_area)
w = int(target_size * w)
h = int(target_size * h)
i = random.randint(0, img.size[0] - w)
j = random.randint(0, img.size[1] - h)
img = img.crop((i, j, i + w, j + h))
img = img.resize((size, size), Image.BILINEAR)
return img
def rotate_image(img):
angle = random.randint(-10, 10)
img = img.rotate(angle)
return img
def distort_color(img):
def random_brightness(img, lower=0.8, upper=1.2):
e = random.uniform(lower, upper)
return ImageEnhance.Brightness(img).enhance(e)
def random_contrast(img, lower=0.8, upper=1.2):
e = random.uniform(lower, upper)
return ImageEnhance.Contrast(img).enhance(e)
def random_color(img, lower=0.8, upper=1.2):
e = random.uniform(lower, upper)
return ImageEnhance.Color(img).enhance(e)
ops = [random_brightness, random_contrast, random_color]
random.shuffle(ops)
img = ops[0](img)
img = ops[1](img)
img = ops[2](img)
return img
def process_image_imagepath(sample, mode, color_jitter, rotate):
imgpath = sample[0]
img = Image.open(imgpath)
if mode == 'train':
if rotate: img = rotate_image(img)
img = RandomResizedCrop(img, DATA_DIM)
else:
img = Scale(img, 256)
img = CenterCrop(img, DATA_DIM)
if mode == 'train':
if color_jitter:
img = distort_color(img)
if random.randint(0, 1) == 1:
img = img.transpose(Image.FLIP_LEFT_RIGHT)
if img.mode != 'RGB':
img = img.convert('RGB')
img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
img -= img_mean
img /= img_std
if mode in ['train', 'test']:
return img, sample[1]
elif mode == 'infer':
return [img]
def eml_iterator(data,
mode,
batch_size,
samples_each_class,
iter_size,
shuffle=False,
color_jitter=False,
rotate=False):
def reader():
labs = list(data.keys())
lab_num = len(labs)
ind = list(range(0, lab_num))
assert batch_size % samples_each_class == 0, "batch_size % samples_each_class != 0"
num_class = batch_size // samples_each_class
for i in range(iter_size):
random.shuffle(ind)
for n in range(num_class):
lab_ind = ind[n]
label = labs[lab_ind]
data_list = data[label]
random.shuffle(data_list)
for s in range(samples_each_class):
path = DATA_DIR + data_list[s]
yield path, label
mapper = functools.partial(
process_image_imagepath, mode=mode, color_jitter=color_jitter, rotate=rotate)
return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE, order=True)
def quadruplet_iterator(data,
mode,
class_num,
samples_each_class,
iter_size,
shuffle=False,
color_jitter=False,
rotate=False):
def reader():
labs = list(data.keys())
lab_num = len(labs)
ind = list(range(0, lab_num))
for i in range(iter_size):
random.shuffle(ind)
ind_sample = ind[:class_num]
for ind_i in ind_sample:
lab = labs[ind_i]
data_list = data[lab]
data_ind = list(range(0, len(data_list)))
random.shuffle(data_ind)
anchor_ind = data_ind[:samples_each_class]
for anchor_ind_i in anchor_ind:
anchor_path = DATA_DIR + data_list[anchor_ind_i]
yield anchor_path, lab
mapper = functools.partial(
process_image_imagepath, mode=mode, color_jitter=color_jitter, rotate=rotate)
return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE, order=True)
def triplet_iterator(data,
mode,
batch_size,
iter_size,
shuffle=False,
color_jitter=False,
rotate=False):
def reader():
labs = list(data.keys())
lab_num = len(labs)
ind = list(range(0, lab_num))
for i in range(iter_size):
random.shuffle(ind)
ind_pos, ind_neg = ind[:2]
lab_pos = labs[ind_pos]
pos_data_list = data[lab_pos]
data_ind = list(range(0, len(pos_data_list)))
random.shuffle(data_ind)
anchor_ind, pos_ind = data_ind[:2]
lab_neg = labs[ind_neg]
neg_data_list = data[lab_neg]
neg_ind = random.randint(0, len(neg_data_list) - 1)
anchor_path = DATA_DIR + pos_data_list[anchor_ind]
yield anchor_path, lab_pos
pos_path = DATA_DIR + pos_data_list[pos_ind]
yield pos_path, lab_pos
neg_path = DATA_DIR + neg_data_list[neg_ind]
yield neg_path, lab_neg
mapper = functools.partial(
process_image_imagepath, mode=mode, color_jitter=color_jitter, rotate=rotate)
return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE, order=True)
def image_iterator(data,
mode,
shuffle=False,
color_jitter=False,
rotate=False):
def test_reader():
for i in range(len(data)):
path, label = data[i]
path = DATA_DIR + path
yield path, label
def infer_reader():
for i in range(len(data)):
path = data[i]
path = DATA_DIR + path
yield [path]
if mode == "test":
mapper = functools.partial(
process_image_imagepath, mode=mode, color_jitter=color_jitter, rotate=rotate)
return paddle.reader.xmap_readers(mapper, test_reader, THREAD, BUF_SIZE)
elif mode == "infer":
mapper = functools.partial(
process_image_imagepath, mode=mode, color_jitter=color_jitter, rotate=rotate)
return paddle.reader.xmap_readers(mapper, infer_reader, THREAD, BUF_SIZE)
def eml_train(batch_size, samples_each_class):
return eml_iterator(train_data, 'train', batch_size, samples_each_class, iter_size = 100, \
shuffle=True, color_jitter=False, rotate=False)
def quadruplet_train(class_num, samples_each_class):
return quadruplet_iterator(train_data, 'train', class_num, samples_each_class, iter_size=100, \
shuffle=True, color_jitter=False, rotate=False)
def triplet_train(batch_size):
assert(batch_size % 3 == 0)
return triplet_iterator(train_data, 'train', batch_size, iter_size = batch_size//3 * 100, \
shuffle=True, color_jitter=False, rotate=False)
def test():
return image_iterator(test_image_list, "test", shuffle=False)
def infer():
return image_iterator(infer_image_list, "infer", shuffle=False)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import numpy as np
import paddle.fluid as fluid
from . import datareader as reader
from .metrics import calculate_order_dist_matrix
from .metrics import get_gpu_num
from utility import get_gpu_num
from .commonfunc import calculate_order_dist_matrix
class emlloss():
class EmlLoss():
def __init__(self, train_batch_size = 40, samples_each_class=2):
num_gpus = get_gpu_num()
self.samples_each_class = samples_each_class
self.train_batch_size = train_batch_size
num_gpus = get_gpu_num()
assert(train_batch_size % num_gpus == 0)
self.cal_loss_batch_size = train_batch_size // num_gpus
assert(self.cal_loss_batch_size % samples_each_class == 0)
class_num = train_batch_size // samples_each_class
self.train_reader = reader.eml_train(train_batch_size, samples_each_class)
self.test_reader = reader.test()
def surrogate_function(self, beta, theta, bias):
x = theta * fluid.layers.exp(bias)
......@@ -41,7 +40,10 @@ class emlloss():
def loss(self, input):
samples_each_class = self.samples_each_class
batch_size = self.cal_loss_batch_size
batch_size = self.cal_loss_batch_size
#input = fluid.layers.l2_normalize(input, axis=1)
#input_norm = fluid.layers.sqrt(fluid.layers.reduce_sum(fluid.layers.square(input), dim=1))
#input = fluid.layers.elementwise_div(input, input_norm, axis=0)
d = calculate_order_dist_matrix(input, self.cal_loss_batch_size, self.samples_each_class)
ignore, pos, neg = fluid.layers.split(d, num_or_sections= [1,
samples_each_class-1, batch_size-samples_each_class], dim=1)
......
import numpy as np
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from . import datareader as reader
from .metrics import calculate_order_dist_matrix
from .metrics import get_gpu_num
from utility import get_gpu_num
from .commonfunc import calculate_order_dist_matrix
class quadrupletloss():
class QuadrupletLoss():
def __init__(self,
train_batch_size = 80,
samples_each_class = 2,
margin=0.1):
margin = 0.1):
self.margin = margin
num_gpus = get_gpu_num()
self.samples_each_class = samples_each_class
self.train_batch_size = train_batch_size
num_gpus = get_gpu_num()
assert(train_batch_size % num_gpus == 0)
self.cal_loss_batch_size = train_batch_size // num_gpus
assert(self.cal_loss_batch_size % samples_each_class == 0)
class_num = train_batch_size // samples_each_class
self.train_reader = reader.quadruplet_train(class_num, samples_each_class)
self.test_reader = reader.test()
def loss(self, input):
feature = fluid.layers.l2_normalize(input, axis=1)
#input = fluid.layers.l2_normalize(input, axis=1)
input_norm = fluid.layers.sqrt(fluid.layers.reduce_sum(fluid.layers.square(input), dim=1))
input = fluid.layers.elementwise_div(input, input_norm, axis=0)
samples_each_class = self.samples_each_class
batch_size = self.cal_loss_batch_size
margin = self.margin
d = calculate_order_dist_matrix(feature, self.cal_loss_batch_size, self.samples_each_class)
d = calculate_order_dist_matrix(input, self.cal_loss_batch_size, self.samples_each_class)
ignore, pos, neg = fluid.layers.split(d, num_or_sections= [1,
samples_each_class-1, batch_size-samples_each_class], dim=1)
ignore.stop_gradient = True
pos_max = fluid.layers.reduce_max(pos)
neg_min = fluid.layers.reduce_min(neg)
pos_max = fluid.layers.sqrt(pos_max)
neg_min = fluid.layers.sqrt(neg_min)
#pos_max = fluid.layers.sqrt(pos_max + 1e-6)
#neg_min = fluid.layers.sqrt(neg_min + 1e-6)
loss = fluid.layers.relu(pos_max - neg_min + margin)
return loss
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle.fluid as fluid
class SoftmaxLoss():
def __init__(self, class_dim):
self.class_dim = class_dim
def loss(self, input, label):
out = self.fc_product(input, self.class_dim)
loss = fluid.layers.cross_entropy(input=out, label=label)
return loss, out
def fc_product(self, input, out_dim):
stdv = 1.0 / math.sqrt(input.shape[1] * 1.0)
out = fluid.layers.fc(input=input,
size=out_dim,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv,
stdv)))
return out
from . import datareader as reader
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
class tripletloss():
def __init__(self, train_batch_size = 120, margin=0.1):
self.train_reader = reader.triplet_train(train_batch_size)
self.test_reader = reader.test()
self.infer_reader = reader.infer()
class TripletLoss():
def __init__(self, margin=0.1):
self.margin = margin
def loss(self, input):
margin = self.margin
fea_dim = input.shape[1] # number of channels
#input = fluid.layers.l2_normalize(input, axis=1)
input_norm = fluid.layers.sqrt(fluid.layers.reduce_sum(fluid.layers.square(input), dim=1))
input = fluid.layers.elementwise_div(input, input_norm, axis=0)
output = fluid.layers.reshape(input, shape = [-1, 3, fea_dim])
output = fluid.layers.l2_normalize(output, axis=2)
anchor, positive, negative = fluid.layers.split(output, num_or_sections = 3, dim = 1)
anchor = fluid.layers.reshape(anchor, shape = [-1, fea_dim])
......@@ -23,7 +26,7 @@ class tripletloss():
a_n = fluid.layers.square(anchor - negative)
a_p = fluid.layers.reduce_sum(a_p, dim = 1)
a_n = fluid.layers.reduce_sum(a_n, dim = 1)
a_p = fluid.layers.sqrt(a_p)
a_n = fluid.layers.sqrt(a_n)
#a_p = fluid.layers.sqrt(a_p + 1e-6)
#a_n = fluid.layers.sqrt(a_n + 1e-6)
loss = fluid.layers.relu(a_p + margin - a_n)
return loss
from .resnet import ResNet50
from .resnet import ResNet101
from .resnet import ResNet152
from .se_resnext import SE_ResNeXt50_32x4d
from .se_resnext import SE_ResNeXt101_32x4d
from .se_resnext import SE_ResNeXt152_32x4d
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from .resnet_embedding import ResNet50
from .resnet_embedding import ResNet101
from .resnet_embedding import ResNet152
import paddle
import paddle.fluid as fluid
import math
from paddle.fluid.param_attr import ParamAttr
__all__ = ["ResNet", "ResNet50", "ResNet101", "ResNet152"]
......@@ -22,7 +23,7 @@ class ResNet():
self.params = train_parameters
self.layers = layers
def net(self, input, class_dim=1000):
def net(self, input, embedding_size=256):
layers = self.layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
......@@ -37,7 +38,7 @@ class ResNet():
num_filters = [64, 128, 256, 512]
conv = self.conv_bn_layer(
input=input, num_filters=64, filter_size=7, stride=2, act='relu')
input=input, num_filters=64, filter_size=7, stride=2, act='relu',name="conv1")
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
......@@ -47,21 +48,26 @@ class ResNet():
for block in range(len(depth)):
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name="res"+str(block+2)+"a"
else:
conv_name="res"+str(block+2)+"b"+str(i)
else:
conv_name="res"+str(block+2)+chr(97+i)
conv = self.bottleneck_block(
input=conv,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1)
stride=2 if i == 0 and block != 0 else 1,name=conv_name)
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(input=pool,
size=class_dim,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv,
stdv)))
return pool, out
if embedding_size > 0:
embedding = fluid.layers.fc(input=pool, size=embedding_size)
return embedding
else:
return pool
def conv_bn_layer(self,
input,
......@@ -69,7 +75,8 @@ class ResNet():
filter_size,
stride=1,
groups=1,
act=None):
act=None,
name=None):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
......@@ -78,31 +85,44 @@ class ResNet():
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False)
return fluid.layers.batch_norm(input=conv, act=act)
def shortcut(self, input, ch_out, stride):
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False,
name=name + '.conv2d.output.1')
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
return fluid.layers.batch_norm(input=conv,
act=act,
name=bn_name+'.output.1',
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance',)
def shortcut(self, input, ch_out, stride, name):
ch_in = input.shape[1]
if ch_in != ch_out or stride != 1:
return self.conv_bn_layer(input, ch_out, 1, stride)
return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
else:
return input
def bottleneck_block(self, input, num_filters, stride):
def bottleneck_block(self, input, num_filters, stride, name):
conv0 = self.conv_bn_layer(
input=input, num_filters=num_filters, filter_size=1, act='relu')
input=input, num_filters=num_filters, filter_size=1, act='relu',name=name+"_branch2a")
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
act='relu',
name=name+"_branch2b")
conv2 = self.conv_bn_layer(
input=conv1, num_filters=num_filters * 4, filter_size=1, act=None)
input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name+"_branch2c")
short = self.shortcut(input, num_filters * 4, stride)
short = self.shortcut(input, num_filters * 4, stride, name=name + "_branch1")
return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
return fluid.layers.elementwise_add(x=short, y=conv2, act='relu',name=name+".add.output.5")
def ResNet50():
......
import paddle
import paddle.fluid as fluid
import math
__all__ = ["SE_ResNeXt", "SE_ResNeXt50_32x4d", "SE_ResNeXt101_32x4d", "SE_ResNeXt152_32x4d"]
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class SE_ResNeXt():
def __init__(self, layers = 50):
self.params = train_parameters
self.layers = layers
def net(self, input, class_dim = 1000):
layers = self.layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
cardinality = 32
reduction_ratio = 16
depth = [3, 4, 6, 3]
num_filters = [128, 256, 512, 1024]
conv = self.conv_bn_layer(
input=input, num_filters=64, filter_size=7, stride=2, act='relu')
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 101:
cardinality = 32
reduction_ratio = 16
depth = [3, 4, 23, 3]
num_filters = [128, 256, 512, 1024]
conv = self.conv_bn_layer(
input=input, num_filters=64, filter_size=7, stride=2, act='relu')
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 152:
cardinality = 64
reduction_ratio = 16
depth = [3, 8, 36, 3]
num_filters = [128, 256, 512, 1024]
conv = self.conv_bn_layer(
input=input, num_filters=64, filter_size=3, stride=2, act='relu')
conv = self.conv_bn_layer(
input=conv, num_filters=64, filter_size=3, stride=1, act='relu')
conv = self.conv_bn_layer(
input=conv, num_filters=128, filter_size=3, stride=1, act='relu')
conv = fluid.layers.pool2d(
input=conv, pool_size=3, pool_stride=2, pool_padding=1, \
pool_type='max')
for block in range(len(depth)):
for i in range(depth[block]):
conv = self.bottleneck_block(
input=conv,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
cardinality=cardinality,
reduction_ratio=reduction_ratio)
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
drop = fluid.layers.dropout(x=pool, dropout_prob=0.5)
stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0)
out = fluid.layers.fc(input=drop,
size=class_dim,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv,
stdv)))
return pool, out
def shortcut(self, input, ch_out, stride):
ch_in = input.shape[1]
if ch_in != ch_out or stride != 1:
filter_size = 1
return self.conv_bn_layer(input, ch_out, filter_size, stride)
else:
return input
def bottleneck_block(self, input, num_filters, stride, cardinality, reduction_ratio):
conv0 = self.conv_bn_layer(
input=input, num_filters=num_filters, filter_size=1, act='relu')
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=stride,
groups=cardinality,
act='relu')
conv2 = self.conv_bn_layer(
input=conv1, num_filters=num_filters * 2, filter_size=1, act=None)
scale = self.squeeze_excitation(
input=conv2,
num_channels=num_filters * 2,
reduction_ratio=reduction_ratio)
short = self.shortcut(input, num_filters * 2, stride)
return fluid.layers.elementwise_add(x=short, y=scale, act='relu')
def conv_bn_layer(self, input, num_filters, filter_size, stride=1, groups=1,
act=None):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False)
return fluid.layers.batch_norm(input=conv, act=act)
def squeeze_excitation(self, input, num_channels, reduction_ratio):
pool = fluid.layers.pool2d(
input=input, pool_size=0, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
squeeze = fluid.layers.fc(input=pool,
size=num_channels / reduction_ratio,
act='relu',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv,
stdv)))
stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0)
excitation = fluid.layers.fc(input=squeeze,
size=num_channels,
act='sigmoid',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(
-stdv, stdv)))
scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
return scale
def SE_ResNeXt50_32x4d():
model = SE_ResNeXt(layers = 50)
return model
def SE_ResNeXt101_32x4d():
model = SE_ResNeXt(layers = 101)
return model
def SE_ResNeXt152_32x4d():
model = SE_ResNeXt(layers = 152)
return model
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import math
import random
import functools
import numpy as np
import paddle
from imgtool import process_image
random.seed(0)
DATA_DIR = "./data/Stanford_Online_Products/"
TRAIN_LIST = './data/Stanford_Online_Products/Ebay_train.txt'
VAL_LIST = './data/Stanford_Online_Products/Ebay_test.txt'
def init_sop(mode):
if mode == 'train':
train_data = {}
train_image_list = []
train_list = open(TRAIN_LIST, "r").readlines()
for i, item in enumerate(train_list):
items = item.strip().split()
if items[0] == 'image_id':
continue
path = items[3]
label = int(items[1]) - 1
train_image_list.append((path, label))
if label not in train_data:
train_data[label] = []
train_data[label].append(path)
random.shuffle(train_image_list)
print("{} dataset size: {}".format(mode, len(train_data)))
return train_data, train_image_list
else:
val_data = {}
val_image_list = []
test_image_list = []
val_list = open(VAL_LIST, "r").readlines()
for i, item in enumerate(val_list):
items = item.strip().split()
if items[0] == 'image_id':
continue
path = items[3]
label = int(items[1])
val_image_list.append((path, label))
test_image_list.append(path)
if label not in val_data:
val_data[label] = []
val_data[label].append(path)
print("{} dataset size: {}".format(mode, len(val_data)))
if mode == 'val':
return val_data, val_image_list
else:
return test_image_list
def common_iterator(data, settings):
batch_size = settings.train_batch_size
samples_each_class = settings.samples_each_class
assert (batch_size % samples_each_class == 0)
class_num = batch_size // samples_each_class
def train_iterator():
labs = list(data.keys())
lab_num = len(labs)
ind = list(range(0, lab_num))
while True:
random.shuffle(ind)
ind_sample = ind[:class_num]
for ind_i in ind_sample:
lab = labs[ind_i]
data_list = data[lab]
data_ind = list(range(0, len(data_list)))
random.shuffle(data_ind)
anchor_ind = data_ind[:samples_each_class]
for anchor_ind_i in anchor_ind:
anchor_path = DATA_DIR + data_list[anchor_ind_i]
yield anchor_path, lab
return train_iterator
def triplet_iterator(data, settings):
batch_size = settings.train_batch_size
assert (batch_size % 3 == 0)
def train_iterator():
labs = list(data.keys())
lab_num = len(labs)
ind = list(range(0, lab_num))
while True:
random.shuffle(ind)
ind_pos, ind_neg = ind[:2]
lab_pos = labs[ind_pos]
pos_data_list = data[lab_pos]
data_ind = list(range(0, len(pos_data_list)))
random.shuffle(data_ind)
anchor_ind, pos_ind = data_ind[:2]
lab_neg = labs[ind_neg]
neg_data_list = data[lab_neg]
neg_ind = random.randint(0, len(neg_data_list) - 1)
anchor_path = DATA_DIR + pos_data_list[anchor_ind]
yield anchor_path, lab_pos
pos_path = DATA_DIR + pos_data_list[pos_ind]
yield pos_path, lab_pos
neg_path = DATA_DIR + neg_data_list[neg_ind]
yield neg_path, lab_neg
return train_iterator
def arcmargin_iterator(data, settings):
def train_iterator():
while True:
for items in data:
path, label = items
path = DATA_DIR + path
yield path, label
return train_iterator
def image_iterator(data, mode):
def val_iterator():
for items in data:
path, label = items
path = DATA_DIR + path
yield path, label
def test_iterator():
for item in data:
path = item
path = DATA_DIR + path
yield [path]
if mode == 'val':
return val_iterator
else:
return test_iterator
def createreader(settings, mode):
def metric_reader():
if mode == 'train':
train_data, train_image_list = init_sop('train')
loss_name = settings.loss_name
if loss_name in ["softmax", "arcmargin"]:
return arcmargin_iterator(train_image_list, settings)()
elif loss_name == 'triplet':
return triplet_iterator(train_data, settings)()
else:
return common_iterator(train_data, settings)()
elif mode == 'val':
val_data, val_image_list = init_sop('val')
return image_iterator(val_image_list, 'val')()
else:
test_image_list = init_sop('test')
return image_iterator(test_image_list, 'test')()
image_shape = settings.image_shape.split(',')
assert(image_shape[1] == image_shape[2])
image_size = int(image_shape[2])
keep_order = False if mode != 'train' or settings.loss_name in ['softmax', 'arcmargin'] else True
image_mapper = functools.partial(process_image,
mode=mode, color_jitter=False, rotate=False, crop_size=image_size)
reader = paddle.reader.xmap_readers(
image_mapper, metric_reader, 8, 1000, order=keep_order)
return reader
def train(settings):
return createreader(settings, "train")
def test(settings):
return createreader(settings, "val")
def infer(settings):
return createreader(settings, "test")
import os
import sys
import math
import time
import argparse
import functools
import numpy as np
import paddle
import paddle.fluid as fluid
import models
from losses import tripletloss
from losses import quadrupletloss
from losses import emlloss
from losses.metrics import recall_topk
from utility import add_arguments, print_arguments
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('train_batch_size', int, 80, "Minibatch size.")
add_arg('test_batch_size', int, 10, "Minibatch size.")
add_arg('num_epochs', int, 120, "number of epochs.")
add_arg('image_shape', str, "3,224,224", "input image size")
add_arg('model_save_dir', str, "output", "model save directory")
add_arg('with_mem_opt', bool, True,
"Whether to use memory optimization or not.")
add_arg('pretrained_model', str, None, "Whether to use pretrained model.")
add_arg('checkpoint', str, None, "Whether to resume checkpoint.")
add_arg('lr', float, 0.1, "set learning rate.")
add_arg('lr_strategy', str, "piecewise_decay",
"Set the learning rate decay strategy.")
add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.")
add_arg('loss_name', str, "tripletloss", "Set the loss type to use.")
add_arg('samples_each_class', int, 2, "Samples each class.")
add_arg('margin', float, 0.1, "margin.")
add_arg('alpha', float, 0.0, "alpha.")
# yapf: enable
model_list = [m for m in dir(models) if "__" not in m]
def optimizer_setting(params):
ls = params["learning_strategy"]
assert ls["name"] == "piecewise_decay", \
"learning rate strategy must be {}, \
but got {}".format("piecewise_decay", lr["name"])
step = 10000
bd = [step * e for e in ls["epochs"]]
base_lr = params["lr"]
lr = []
lr = [base_lr * (0.1 ** i) for i in range(len(bd) + 1)]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
return optimizer
def train(args):
# parameters from arguments
model_name = args.model
checkpoint = args.checkpoint
pretrained_model = args.pretrained_model
with_memory_optimization = args.with_mem_opt
model_save_dir = args.model_save_dir
loss_name = args.loss_name
image_shape = [int(m) for m in args.image_shape.split(",")]
assert model_name in model_list, "{} is not in lists: {}".format(args.model, model_list)
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# model definition
model = models.__dict__[model_name]()
out = model.net(input=image, class_dim=200)
if loss_name == "tripletloss":
metricloss = tripletloss(
train_batch_size = args.train_batch_size,
margin=args.margin)
cost_metric = metricloss.loss(out[0])
avg_cost_metric = fluid.layers.mean(x=cost_metric)
elif loss_name == "quadrupletloss":
metricloss = quadrupletloss(
train_batch_size = args.train_batch_size,
samples_each_class = args.samples_each_class,
margin=args.margin)
cost_metric = metricloss.loss(out[0])
avg_cost_metric = fluid.layers.mean(x=cost_metric)
elif loss_name == "emlloss":
metricloss = emlloss(
train_batch_size = args.train_batch_size,
samples_each_class = args.samples_each_class
)
cost_metric = metricloss.loss(out[0])
avg_cost_metric = fluid.layers.mean(x=cost_metric)
cost_cls = fluid.layers.cross_entropy(input=out[1], label=label)
avg_cost_cls = fluid.layers.mean(x=cost_cls)
acc_top1 = fluid.layers.accuracy(input=out[1], label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out[1], label=label, k=5)
avg_cost = avg_cost_metric + args.alpha*avg_cost_cls
test_program = fluid.default_main_program().clone(for_test=True)
# parameters from model and arguments
params = model.params
params["lr"] = args.lr
params["num_epochs"] = args.num_epochs
params["learning_strategy"]["batch_size"] = args.train_batch_size
params["learning_strategy"]["name"] = args.lr_strategy
# initialize optimizer
optimizer = optimizer_setting(params)
opts = optimizer.minimize(avg_cost)
global_lr = optimizer._global_learning_rate()
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
if checkpoint is not None:
fluid.io.load_persistables(exe, checkpoint)
if pretrained_model:
assert(checkpoint is None)
def if_exist(var):
has_var = os.path.exists(os.path.join(pretrained_model, var.name))
if has_var:
print('var: %s found' % (var.name))
return has_var
fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)
train_reader = paddle.batch(metricloss.train_reader, batch_size=args.train_batch_size)
test_reader = paddle.batch(metricloss.test_reader, batch_size=args.test_batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
fetch_list_train = [avg_cost_metric.name, avg_cost_cls.name, acc_top1.name, acc_top5.name, global_lr.name]
fetch_list_test = [out[0].name]
if with_memory_optimization:
fluid.memory_optimize(fluid.default_main_program(), skip_opt_set=set(fetch_list_train))
for pass_id in range(params["num_epochs"]):
train_info = [[], [], [], []]
for batch_id, data in enumerate(train_reader()):
t1 = time.time()
loss_metric, loss_cls, acc1, acc5, lr = train_exe.run(fetch_list_train, feed=feeder.feed(data))
t2 = time.time()
period = t2 - t1
loss_metric = np.mean(np.array(loss_metric))
loss_cls = np.mean(np.array(loss_cls))
acc1 = np.mean(np.array(acc1))
acc5 = np.mean(np.array(acc5))
lr = np.mean(np.array(lr))
train_info[0].append(loss_metric)
train_info[1].append(loss_cls)
train_info[2].append(acc1)
train_info[3].append(acc5)
if batch_id % 10 == 0:
print("Pass {0}, trainbatch {1}, lr {2}, loss_metric {3}, loss_cls {4}, acc1 {5}, acc5 {6}, time {7}".format(pass_id, \
batch_id, lr, loss_metric, loss_cls, acc1, acc5, "%2.2f sec" % period))
train_loss_metric = np.array(train_info[0]).mean()
train_loss_cls = np.array(train_info[1]).mean()
train_acc1 = np.array(train_info[2]).mean()
train_acc5 = np.array(train_info[3]).mean()
f = []
l = []
for batch_id, data in enumerate(test_reader()):
if len(data) < args.test_batch_size:
continue
t1 = time.time()
[feas] = exe.run(test_program, fetch_list = fetch_list_test, feed=feeder.feed(data))
label = np.asarray([x[1] for x in data])
f.append(feas)
l.append(label)
t2 = time.time()
period = t2 - t1
if batch_id % 20 == 0:
print("Pass {0}, testbatch {1}, time {2}".format(pass_id, \
batch_id, "%2.2f sec" % period))
f = np.vstack(f)
l = np.hstack(l)
recall = recall_topk(f, l, k = 1)
print("End pass {0}, train_loss_metric {1}, train_loss_cls {2}, train_acc1 {3}, train_acc5 {4}, test_recall {5}".format(pass_id, \
train_loss_metric, train_loss_cls, train_acc1, train_acc5, recall))
sys.stdout.flush()
model_path = os.path.join(model_save_dir + '/' + model_name,
str(pass_id))
if not os.path.isdir(model_path):
os.makedirs(model_path)
fluid.io.save_persistables(exe, model_path)
def main():
args = parser.parse_args()
print_arguments(args)
train(args)
if __name__ == '__main__':
main()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import math
import time
import logging
import argparse
import functools
import threading
import subprocess
import numpy as np
import paddle
import paddle.fluid as fluid
import models
import reader
from losses import SoftmaxLoss
from losses import ArcMarginLoss
from utility import add_arguments, print_arguments
from utility import fmt_time, recall_topk, get_gpu_num
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('model', str, "ResNet50", "Set the network to use.")
add_arg('embedding_size', int, 0, "Embedding size.")
add_arg('train_batch_size', int, 256, "Minibatch size.")
add_arg('test_batch_size', int, 50, "Minibatch size.")
add_arg('image_shape', str, "3,224,224", "input image size")
add_arg('class_dim', int, 11318 , "Class number.")
add_arg('lr', float, 0.01, "set learning rate.")
add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.")
add_arg('lr_steps', str, "30000", "step of lr")
add_arg('total_iter_num', int, 30000, "total_iter_num")
add_arg('display_iter_step', int, 10, "display_iter_step.")
add_arg('test_iter_step', int, 1000, "test_iter_step.")
add_arg('save_iter_step', int, 1000, "save_iter_step.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('with_mem_opt', bool, True, "Whether to use memory optimization or not.")
add_arg('pretrained_model', str, None, "Whether to use pretrained model.")
add_arg('checkpoint', str, None, "Whether to resume checkpoint.")
add_arg('model_save_dir', str, "output", "model save directory")
add_arg('loss_name', str, "softmax", "Set the loss type to use.")
add_arg('arc_scale', float, 80.0, "arc scale.")
add_arg('arc_margin', float, 0.15, "arc margin.")
add_arg('arc_easy_margin', bool, False, "arc easy margin.")
add_arg('enable_ce', bool, False, "If set True, enable continuous evaluation job.")
# yapf: enable
model_list = [m for m in dir(models) if "__" not in m]
def optimizer_setting(params):
ls = params["learning_strategy"]
assert ls["name"] == "piecewise_decay", \
"learning rate strategy must be {}, \
but got {}".format("piecewise_decay", lr["name"])
bd = [int(e) for e in ls["lr_steps"].split(',')]
base_lr = params["lr"]
lr = [base_lr * (0.1 ** i) for i in range(len(bd) + 1)]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
return optimizer
def net_config(image, label, model, args, is_train):
assert args.model in model_list, "{} is not in lists: {}".format(
args.model, model_list)
out = model.net(input=image, embedding_size=args.embedding_size)
if not is_train:
return None, None, None, out
if args.loss_name == "softmax":
metricloss = SoftmaxLoss(
class_dim=args.class_dim,
)
elif args.loss_name == "arcmargin":
metricloss = ArcMarginLoss(
class_dim = args.class_dim,
margin = args.arc_margin,
scale = args.arc_scale,
easy_margin = args.arc_easy_margin,
)
cost, logit = metricloss.loss(out, label)
avg_cost = fluid.layers.mean(x=cost)
acc_top1 = fluid.layers.accuracy(input=logit, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=logit, label=label, k=5)
return avg_cost, acc_top1, acc_top5, out
def build_program(is_train, main_prog, startup_prog, args):
image_shape = [int(m) for m in args.image_shape.split(",")]
model = models.__dict__[args.model]()
with fluid.program_guard(main_prog, startup_prog):
if is_train:
queue_capacity = 64
py_reader = fluid.layers.py_reader(
capacity=queue_capacity,
shapes=[[-1] + image_shape, [-1, 1]],
lod_levels=[0, 0],
dtypes=["float32", "int64"],
use_double_buffer=True)
image, label = fluid.layers.read_file(py_reader)
else:
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
with fluid.unique_name.guard():
avg_cost, acc_top1, acc_top5, out = net_config(image, label, model, args, is_train)
if is_train:
params = model.params
params["lr"] = args.lr
params["learning_strategy"]["lr_steps"] = args.lr_steps
params["learning_strategy"]["name"] = args.lr_strategy
optimizer = optimizer_setting(params)
optimizer.minimize(avg_cost)
global_lr = optimizer._global_learning_rate()
"""
if not is_train:
main_prog = main_prog.clone(for_test=True)
"""
if is_train:
return py_reader, avg_cost, acc_top1, acc_top5, global_lr
else:
return out, image, label
def train_async(args):
# parameters from arguments
logging.debug('enter train')
model_name = args.model
checkpoint = args.checkpoint
pretrained_model = args.pretrained_model
model_save_dir = args.model_save_dir
startup_prog = fluid.Program()
train_prog = fluid.Program()
tmp_prog = fluid.Program()
if args.enable_ce:
assert args.model == "ResNet50"
assert args.loss_name == "arcmargin"
np.random.seed(0)
startup_prog.random_seed = 1000
train_prog.random_seed = 1000
tmp_prog.random_seed = 1000
train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program(
is_train=True,
main_prog=train_prog,
startup_prog=startup_prog,
args=args)
test_feas, image, label = build_program(
is_train=False,
main_prog=tmp_prog,
startup_prog=startup_prog,
args=args)
test_prog = tmp_prog.clone(for_test=True)
train_fetch_list = [global_lr.name, train_cost.name, train_acc1.name, train_acc5.name]
test_fetch_list = [test_feas.name]
if args.with_mem_opt:
fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list))
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_prog)
logging.debug('after run startup program')
if checkpoint is not None:
fluid.io.load_persistables(exe, checkpoint, main_program=train_prog)
if pretrained_model:
def if_exist(var):
return os.path.exists(os.path.join(pretrained_model, var.name))
fluid.io.load_vars(
exe, pretrained_model, main_program=train_prog, predicate=if_exist)
devicenum = get_gpu_num()
assert (args.train_batch_size % devicenum) == 0
train_batch_size = args.train_batch_size // devicenum
test_batch_size = args.test_batch_size
train_reader = paddle.batch(reader.train(args), batch_size=train_batch_size, drop_last=True)
test_reader = paddle.batch(reader.test(args), batch_size=test_batch_size, drop_last=False)
test_feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
train_py_reader.decorate_paddle_reader(train_reader)
train_exe = fluid.ParallelExecutor(
main_program=train_prog,
use_cuda=args.use_gpu,
loss_name=train_cost.name)
totalruntime = 0
train_py_reader.start()
iter_no = 0
train_info = [0, 0, 0, 0]
while iter_no <= args.total_iter_num:
t1 = time.time()
lr, loss, acc1, acc5 = train_exe.run(fetch_list=train_fetch_list)
t2 = time.time()
period = t2 - t1
lr = np.mean(np.array(lr))
train_info[0] += np.mean(np.array(loss))
train_info[1] += np.mean(np.array(acc1))
train_info[2] += np.mean(np.array(acc5))
train_info[3] += 1
if iter_no % args.display_iter_step == 0:
avgruntime = totalruntime / args.display_iter_step
avg_loss = train_info[0] / train_info[3]
avg_acc1 = train_info[1] / train_info[3]
avg_acc5 = train_info[2] / train_info[3]
print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\
"acc1 %.4f, acc5 %.4f, time %2.2f sec" % \
(fmt_time(), iter_no, lr, avg_loss, avg_acc1, avg_acc5, avgruntime))
sys.stdout.flush()
totalruntime = 0
if iter_no % 1000 == 0:
train_info = [0, 0, 0, 0]
totalruntime += period
if iter_no % args.test_iter_step == 0 and iter_no != 0:
f, l = [], []
for batch_id, data in enumerate(test_reader()):
t1 = time.time()
[feas] = exe.run(test_prog, fetch_list = test_fetch_list, feed=test_feeder.feed(data))
label = np.asarray([x[1] for x in data])
f.append(feas)
l.append(label)
t2 = time.time()
period = t2 - t1
if batch_id % 20 == 0:
print("[%s] testbatch %d, time %2.2f sec" % \
(fmt_time(), batch_id, period))
f = np.vstack(f)
l = np.hstack(l)
recall = recall_topk(f, l, k=1)
print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \
(fmt_time(), len(f), iter_no, recall))
sys.stdout.flush()
if iter_no % args.save_iter_step == 0 and iter_no != 0:
model_path = os.path.join(model_save_dir + '/' + model_name,
str(iter_no))
if not os.path.isdir(model_path):
os.makedirs(model_path)
fluid.io.save_persistables(exe, model_path, main_program=train_prog)
iter_no += 1
# This is for continuous evaluation only
if args.enable_ce:
# Use the mean cost/acc for training
print("kpis train_cost %s" % (avg_loss))
print("kpis test_recall %s" % (recall))
def initlogging():
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
loglevel = logging.DEBUG
logging.basicConfig(
level=loglevel,
# logger.BASIC_FORMAT,
format=
"%(levelname)s:%(filename)s[%(lineno)s] %(name)s:%(funcName)s->%(message)s",
datefmt='%a, %d %b %Y %H:%M:%S')
def main():
args = parser.parse_args()
print_arguments(args)
train_async(args)
if __name__ == '__main__':
main()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import math
import time
import logging
import argparse
import functools
import threading
import subprocess
import numpy as np
import paddle
import paddle.fluid as fluid
import models
import reader
from losses import TripletLoss
from losses import QuadrupletLoss
from losses import EmlLoss
from utility import add_arguments, print_arguments
from utility import fmt_time, recall_topk, get_gpu_num
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('model', str, "ResNet50", "Set the network to use.")
add_arg('embedding_size', int, 0, "Embedding size.")
add_arg('train_batch_size', int, 120, "Minibatch size.")
add_arg('test_batch_size', int, 50, "Minibatch size.")
add_arg('image_shape', str, "3,224,224", "input image size")
add_arg('class_dim', int, 11318, "Class number.")
add_arg('lr', float, 0.0001, "set learning rate.")
add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.")
add_arg('lr_steps', str, "100000", "step of lr")
add_arg('total_iter_num', int, 100000, "total_iter_num")
add_arg('display_iter_step', int, 10, "display_iter_step.")
add_arg('test_iter_step', int, 5000, "test_iter_step.")
add_arg('save_iter_step', int, 5000, "save_iter_step.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('with_mem_opt', bool, True, "Whether to use memory optimization or not.")
add_arg('pretrained_model', str, None, "Whether to use pretrained model.")
add_arg('checkpoint', str, None, "Whether to resume checkpoint.")
add_arg('model_save_dir', str, "output", "model save directory")
add_arg('loss_name', str, "triplet", "Set the loss type to use.")
add_arg('samples_each_class', int, 2, "samples_each_class.")
add_arg('margin', float, 0.1, "margin.")
# yapf: enable
model_list = [m for m in dir(models) if "__" not in m]
def optimizer_setting(params):
ls = params["learning_strategy"]
assert ls["name"] == "piecewise_decay", \
"learning rate strategy must be {}, \
but got {}".format("piecewise_decay", lr["name"])
bd = [int(e) for e in ls["lr_steps"].split(',')]
base_lr = params["lr"]
lr = [base_lr * (0.1 ** i) for i in range(len(bd) + 1)]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
return optimizer
def net_config(image, label, model, args, is_train):
assert args.model in model_list, "{} is not in lists: {}".format(
args.model, model_list)
out = model.net(input=image, embedding_size=args.embedding_size)
if not is_train:
return None, out
if args.loss_name == "triplet":
metricloss = TripletLoss(
margin=args.margin,
)
elif args.loss_name == "quadruplet":
metricloss = QuadrupletLoss(
train_batch_size = args.train_batch_size,
samples_each_class = args.samples_each_class,
margin=args.margin,
)
elif args.loss_name == "eml":
metricloss = EmlLoss(
train_batch_size = args.train_batch_size,
samples_each_class = args.samples_each_class,
)
cost = metricloss.loss(out)
avg_cost = fluid.layers.mean(x=cost)
return avg_cost, out
def build_program(is_train, main_prog, startup_prog, args):
image_shape = [int(m) for m in args.image_shape.split(",")]
model = models.__dict__[args.model]()
with fluid.program_guard(main_prog, startup_prog):
if is_train:
queue_capacity = 64
py_reader = fluid.layers.py_reader(
capacity=queue_capacity,
shapes=[[-1] + image_shape, [-1, 1]],
lod_levels=[0, 0],
dtypes=["float32", "int64"],
use_double_buffer=True)
image, label = fluid.layers.read_file(py_reader)
else:
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
with fluid.unique_name.guard():
avg_cost, out = net_config(image, label, model, args, is_train)
if is_train:
params = model.params
params["lr"] = args.lr
params["learning_strategy"]["lr_steps"] = args.lr_steps
params["learning_strategy"]["name"] = args.lr_strategy
optimizer = optimizer_setting(params)
optimizer.minimize(avg_cost)
global_lr = optimizer._global_learning_rate()
"""
if not is_train:
main_prog = main_prog.clone(for_test=True)
"""
if is_train:
return py_reader, avg_cost, global_lr, out, label
else:
return out, image, label
def train_async(args):
# parameters from arguments
logging.debug('enter train')
model_name = args.model
checkpoint = args.checkpoint
pretrained_model = args.pretrained_model
model_save_dir = args.model_save_dir
startup_prog = fluid.Program()
train_prog = fluid.Program()
tmp_prog = fluid.Program()
train_py_reader, train_cost, global_lr, train_feas, train_label = build_program(
is_train=True,
main_prog=train_prog,
startup_prog=startup_prog,
args=args)
test_feas, image, label = build_program(
is_train=False,
main_prog=tmp_prog,
startup_prog=startup_prog,
args=args)
test_prog = tmp_prog.clone(for_test=True)
train_fetch_list = [global_lr.name, train_cost.name, train_feas.name, train_label.name]
test_fetch_list = [test_feas.name]
if args.with_mem_opt:
fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list))
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_prog)
logging.debug('after run startup program')
if checkpoint is not None:
fluid.io.load_persistables(exe, checkpoint, main_program=train_prog)
if pretrained_model:
def if_exist(var):
return os.path.exists(os.path.join(pretrained_model, var.name))
fluid.io.load_vars(
exe, pretrained_model, main_program=train_prog, predicate=if_exist)
devicenum = get_gpu_num()
assert (args.train_batch_size % devicenum) == 0
train_batch_size = args.train_batch_size / devicenum
test_batch_size = args.test_batch_size
train_reader = paddle.batch(reader.train(args), batch_size=train_batch_size, drop_last=True)
test_reader = paddle.batch(reader.test(args), batch_size=test_batch_size, drop_last=False)
test_feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
train_py_reader.decorate_paddle_reader(train_reader)
train_exe = fluid.ParallelExecutor(
main_program=train_prog,
use_cuda=args.use_gpu,
loss_name=train_cost.name)
totalruntime = 0
train_py_reader.start()
iter_no = 0
train_info = [0, 0, 0]
while iter_no <= args.total_iter_num:
t1 = time.time()
lr, loss, feas, label = train_exe.run(fetch_list=train_fetch_list)
t2 = time.time()
period = t2 - t1
lr = np.mean(np.array(lr))
train_info[0] += np.mean(np.array(loss))
train_info[1] += recall_topk(feas, label, k=1)
train_info[2] += 1
if iter_no % args.display_iter_step == 0:
avgruntime = totalruntime / args.display_iter_step
avg_loss = train_info[0] / train_info[2]
avg_recall = train_info[1] / train_info[2]
print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\
"recall %.4f, time %2.2f sec" % \
(fmt_time(), iter_no, lr, avg_loss, avg_recall, avgruntime))
sys.stdout.flush()
totalruntime = 0
if iter_no % 1000 == 0:
train_info = [0, 0, 0]
totalruntime += period
if iter_no % args.test_iter_step == 0 and iter_no != 0:
f, l = [], []
for batch_id, data in enumerate(test_reader()):
t1 = time.time()
[feas] = exe.run(test_prog, fetch_list = test_fetch_list, feed=test_feeder.feed(data))
label = np.asarray([x[1] for x in data])
f.append(feas)
l.append(label)
t2 = time.time()
period = t2 - t1
if batch_id % 20 == 0:
print("[%s] testbatch %d, time %2.2f sec" % \
(fmt_time(), batch_id, period))
f = np.vstack(f)
l = np.hstack(l)
recall = recall_topk(f, l, k=1)
print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \
(fmt_time(), len(f), iter_no, recall))
sys.stdout.flush()
if iter_no % args.save_iter_step == 0 and iter_no != 0:
model_path = os.path.join(model_save_dir + '/' + model_name,
str(iter_no))
if not os.path.isdir(model_path):
os.makedirs(model_path)
fluid.io.save_persistables(exe, model_path, main_program=train_prog)
iter_no += 1
def initlogging():
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
loglevel = logging.DEBUG
logging.basicConfig(
level=loglevel,
# logger.BASIC_FORMAT,
format=
"%(levelname)s:%(filename)s[%(lineno)s] %(name)s:%(funcName)s->%(message)s",
datefmt='%a, %d %b %Y %H:%M:%S')
def main():
args = parser.parse_args()
print_arguments(args)
train_async(args)
if __name__ == '__main__':
main()
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册