未验证 提交 a742f5c6 编写于 作者: Q qingqing01 提交者: GitHub

Refine code and fix some model. (#1693)

* Fix exeception when finishing training in detection.
* Fix fluid.Trainer in HiNAS_models.
* Remove coco doc in SSD detection.
上级 ba609980
...@@ -21,6 +21,7 @@ import math ...@@ -21,6 +21,7 @@ import math
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.contrib.trainer import *
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
import reader import reader
...@@ -104,7 +105,7 @@ class Model(object): ...@@ -104,7 +105,7 @@ class Model(object):
accs = [] accs = []
def event_handler(event): def event_handler(event):
if isinstance(event, fluid.EndStepEvent): if isinstance(event, EndStepEvent):
costs.append(event.metrics[0]) costs.append(event.metrics[0])
accs.append(event.metrics[1]) accs.append(event.metrics[1])
if event.step % 20 == 0: if event.step % 20 == 0:
...@@ -113,7 +114,7 @@ class Model(object): ...@@ -113,7 +114,7 @@ class Model(object):
del costs[:] del costs[:]
del accs[:] del accs[:]
if isinstance(event, fluid.EndEpochEvent): if isinstance(event, EndEpochEvent):
if event.epoch % 3 == 0 or event.epoch == FLAGS.num_epochs - 1: if event.epoch % 3 == 0 or event.epoch == FLAGS.num_epochs - 1:
avg_cost, accuracy = trainer.test( avg_cost, accuracy = trainer.test(
reader=test_reader, feed_order=['pixel', 'label']) reader=test_reader, feed_order=['pixel', 'label'])
...@@ -126,7 +127,7 @@ class Model(object): ...@@ -126,7 +127,7 @@ class Model(object):
event_handler.best_acc = 0.0 event_handler.best_acc = 0.0
place = fluid.CUDAPlace(0) place = fluid.CUDAPlace(0)
trainer = fluid.Trainer( trainer = Trainer(
train_func=self.train_network, train_func=self.train_network,
optimizer_func=self.optimizer_program, optimizer_func=self.optimizer_program,
place=place) place=place)
......
"""
This code is based on https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import numpy as np
import threading
import multiprocessing
import traceback
try:
import queue
except ImportError:
import Queue as queue
class GeneratorEnqueuer(object):
"""
Builds a queue out of a data generator.
Args:
generator: a generator function which endlessly yields data
use_multiprocessing (bool): use multiprocessing if True,
otherwise use threading.
wait_time (float): time to sleep in-between calls to `put()`.
random_seed (int): Initial seed for workers,
will be incremented by one for each workers.
"""
def __init__(self,
generator,
use_multiprocessing=False,
wait_time=0.05,
random_seed=None):
self.wait_time = wait_time
self._generator = generator
self._use_multiprocessing = use_multiprocessing
self._threads = []
self._stop_event = None
self.queue = None
self._manager = None
self.seed = random_seed
def start(self, workers=1, max_queue_size=10):
"""
Start worker threads which add data from the generator into the queue.
Args:
workers (int): number of worker threads
max_queue_size (int): queue size
(when full, threads could block on `put()`)
"""
def data_generator_task():
"""
Data generator task.
"""
def task():
if (self.queue is not None and
self.queue.qsize() < max_queue_size):
generator_output = next(self._generator)
self.queue.put((generator_output))
else:
time.sleep(self.wait_time)
if not self._use_multiprocessing:
while not self._stop_event.is_set():
with self.genlock:
try:
task()
except Exception:
traceback.print_exc()
self._stop_event.set()
break
else:
while not self._stop_event.is_set():
try:
task()
except Exception:
traceback.print_exc()
self._stop_event.set()
break
try:
if self._use_multiprocessing:
self._manager = multiprocessing.Manager()
self.queue = self._manager.Queue(maxsize=max_queue_size)
self._stop_event = multiprocessing.Event()
else:
self.genlock = threading.Lock()
self.queue = queue.Queue()
self._stop_event = threading.Event()
for _ in range(workers):
if self._use_multiprocessing:
# Reset random seed else all children processes
# share the same seed
np.random.seed(self.seed)
thread = multiprocessing.Process(target=data_generator_task)
thread.daemon = True
if self.seed is not None:
self.seed += 1
else:
thread = threading.Thread(target=data_generator_task)
self._threads.append(thread)
thread.start()
except:
self.stop()
raise
def is_running(self):
"""
Returns:
bool: Whether the worker theads are running.
"""
return self._stop_event is not None and not self._stop_event.is_set()
def stop(self, timeout=None):
"""
Stops running threads and wait for them to exit, if necessary.
Should be called by the same thread which called `start()`.
Args:
timeout(int|None): maximum time to wait on `thread.join()`.
"""
if self.is_running():
self._stop_event.set()
for thread in self._threads:
if self._use_multiprocessing:
if thread.is_alive():
thread.terminate()
else:
thread.join(timeout)
if self._manager:
self._manager.shutdown()
self._threads = []
self._stop_event = None
self.queue = None
def get(self):
"""
Creates a generator to extract data from the queue.
Skip the data if it is `None`.
# Yields
tuple of data in the queue.
"""
while self.is_running():
if not self.queue.empty():
inputs = self.queue.get()
if inputs is not None:
yield inputs
else:
time.sleep(self.wait_time)
...@@ -16,8 +16,6 @@ from __future__ import absolute_import ...@@ -16,8 +16,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import image_util
from paddle.utils.image_util import *
from PIL import Image from PIL import Image
from PIL import ImageDraw from PIL import ImageDraw
import numpy as np import numpy as np
...@@ -28,7 +26,10 @@ import copy ...@@ -28,7 +26,10 @@ import copy
import random import random
import cv2 import cv2
import six import six
from data_util import GeneratorEnqueuer import math
from itertools import islice
import paddle
import image_util
class Settings(object): class Settings(object):
...@@ -199,7 +200,7 @@ def load_file_list(input_txt): ...@@ -199,7 +200,7 @@ def load_file_list(input_txt):
else: else:
file_dict[num_class].append(line_txt) file_dict[num_class].append(line_txt)
return file_dict return file_dict.values()
def expand_bboxes(bboxes, def expand_bboxes(bboxes,
...@@ -227,13 +228,12 @@ def expand_bboxes(bboxes, ...@@ -227,13 +228,12 @@ def expand_bboxes(bboxes,
def train_generator(settings, file_list, batch_size, shuffle=True): def train_generator(settings, file_list, batch_size, shuffle=True):
file_dict = load_file_list(file_list) def reader():
while True:
if shuffle: if shuffle:
np.random.shuffle(file_dict) np.random.shuffle(file_list)
batch_out = [] batch_out = []
for index_image in file_dict.keys(): for item in file_list:
image_name = file_dict[index_image][0] image_name = item[0]
image_path = os.path.join(settings.data_dir, image_name) image_path = os.path.join(settings.data_dir, image_name)
im = Image.open(image_path) im = Image.open(image_path)
if im.mode == 'L': if im.mode == 'L':
...@@ -242,10 +242,10 @@ def train_generator(settings, file_list, batch_size, shuffle=True): ...@@ -242,10 +242,10 @@ def train_generator(settings, file_list, batch_size, shuffle=True):
# layout: label | xmin | ymin | xmax | ymax # layout: label | xmin | ymin | xmax | ymax
bbox_labels = [] bbox_labels = []
for index_box in range(len(file_dict[index_image])): for index_box in range(len(item)):
if index_box >= 2: if index_box >= 2:
bbox_sample = [] bbox_sample = []
temp_info_box = file_dict[index_image][index_box].split(' ') temp_info_box = item[index_box].split(' ')
xmin = float(temp_info_box[0]) xmin = float(temp_info_box[0])
ymin = float(temp_info_box[1]) ymin = float(temp_info_box[1])
w = float(temp_info_box[2]) w = float(temp_info_box[2])
...@@ -277,43 +277,25 @@ def train_generator(settings, file_list, batch_size, shuffle=True): ...@@ -277,43 +277,25 @@ def train_generator(settings, file_list, batch_size, shuffle=True):
yield batch_out yield batch_out
batch_out = [] batch_out = []
return reader
def train(settings,
file_list,
batch_size,
shuffle=True,
use_multiprocessing=True,
num_workers=8,
max_queue=24):
def reader():
try:
enqueuer = GeneratorEnqueuer(
train_generator(settings, file_list, batch_size, shuffle),
use_multiprocessing=use_multiprocessing)
enqueuer.start(max_queue_size=max_queue, workers=num_workers)
generator_output = None
while True:
while enqueuer.is_running():
if not enqueuer.queue.empty():
generator_output = enqueuer.queue.get()
break
else:
time.sleep(0.01)
yield generator_output
generator_output = None
finally:
if enqueuer is not None:
enqueuer.stop()
return reader def train(settings, file_list, batch_size, shuffle=True, num_workers=8):
file_lists = load_file_list(file_list)
n = int(math.ceil(len(file_lists) // num_workers))
split_lists = [file_lists[i:i + n] for i in range(0, len(file_lists), n)]
readers = []
for iterm in split_lists:
readers.append(train_generator(settings, iterm, batch_size, shuffle))
return paddle.reader.multiprocess_reader(readers, False)
def test(settings, file_list): def test(settings, file_list):
file_dict = load_file_list(file_list) file_lists = load_file_list(file_list)
def reader(): def reader():
for index_image in file_dict.keys(): for image in file_lists:
image_name = file_dict[index_image][0] image_name = image[0]
image_path = os.path.join(settings.data_dir, image_name) image_path = os.path.join(settings.data_dir, image_name)
im = Image.open(image_path) im = Image.open(image_path)
if im.mode == 'L': if im.mode == 'L':
......
...@@ -163,9 +163,7 @@ def train(args, config, train_params, train_file_list): ...@@ -163,9 +163,7 @@ def train(args, config, train_params, train_file_list):
train_file_list, train_file_list,
batch_size_per_device, batch_size_per_device,
shuffle = is_shuffle, shuffle = is_shuffle,
use_multiprocessing=True, num_workers = num_workers)
num_workers = num_workers,
max_queue=24)
train_py_reader.decorate_paddle_reader(train_reader) train_py_reader.decorate_paddle_reader(train_reader)
if args.parallel: if args.parallel:
...@@ -182,61 +180,59 @@ def train(args, config, train_params, train_file_list): ...@@ -182,61 +180,59 @@ def train(args, config, train_params, train_file_list):
print('save models to %s' % (model_path)) print('save models to %s' % (model_path))
fluid.io.save_persistables(exe, model_path, main_program=program) fluid.io.save_persistables(exe, model_path, main_program=program)
train_py_reader.start() total_time = 0.0
try: epoch_idx = 0
total_time = 0.0 face_loss = 0
epoch_idx = 0 head_loss = 0
face_loss = 0 for pass_id in range(start_epoc, epoc_num):
head_loss = 0 epoch_idx += 1
for pass_id in range(start_epoc, epoc_num): start_time = time.time()
epoch_idx += 1 prev_start_time = start_time
start_time = time.time() end_time = 0
prev_start_time = start_time batch_id = 0
end_time = 0 train_py_reader.start()
batch_id = 0 while True:
for batch_id in range(iters_per_epoc): try:
prev_start_time = start_time prev_start_time = start_time
start_time = time.time() start_time = time.time()
if args.parallel: if args.parallel:
fetch_vars = train_exe.run(fetch_list= fetch_vars = train_exe.run(fetch_list=
[v.name for v in fetches]) [v.name for v in fetches])
else: else:
fetch_vars = exe.run(train_prog, fetch_vars = exe.run(train_prog, fetch_list=fetches)
fetch_list=fetches)
end_time = time.time() end_time = time.time()
fetch_vars = [np.mean(np.array(v)) for v in fetch_vars] fetch_vars = [np.mean(np.array(v)) for v in fetch_vars]
face_loss = fetch_vars[0]
head_loss = fetch_vars[1]
if batch_id % 10 == 0: if batch_id % 10 == 0:
if not args.use_pyramidbox: if not args.use_pyramidbox:
print("Pass {:d}, batch {:d}, loss {:.6f}, time {:.5f}".format( print("Pass {:d}, batch {:d}, loss {:.6f}, time {:.5f}".format(
pass_id, batch_id, fetch_vars[0], pass_id, batch_id, face_loss,
start_time - prev_start_time)) start_time - prev_start_time))
else: else:
print("Pass {:d}, batch {:d}, face loss {:.6f}, " \ print("Pass {:d}, batch {:d}, face loss {:.6f}, " \
"head loss {:.6f}, " \ "head loss {:.6f}, " \
"time {:.5f}".format(pass_id, "time {:.5f}".format(pass_id,
batch_id, fetch_vars[0], fetch_vars[1], batch_id, face_loss, head_loss,
start_time - prev_start_time)) start_time - prev_start_time))
face_loss = fetch_vars[0] batch_id += 1
head_loss = fetch_vars[1] except (fluid.core.EOFException, StopIteration):
epoch_end_time = time.time() train_py_reader.reset()
total_time += epoch_end_time - start_time break
if pass_id % 1 == 0 or pass_id == epoc_num - 1: epoch_end_time = time.time()
save_model(str(pass_id), train_prog) total_time += epoch_end_time - start_time
# only for ce save_model(str(pass_id), train_prog)
if args.enable_ce:
gpu_num = get_cards(args) # only for ce
print("kpis\teach_pass_duration_card%s\t%s" % if args.enable_ce:
(gpu_num, total_time / epoch_idx)) gpu_num = get_cards(args)
print("kpis\ttrain_face_loss_card%s\t%s" % print("kpis\teach_pass_duration_card%s\t%s" %
(gpu_num, face_loss)) (gpu_num, total_time / epoch_idx))
print("kpis\ttrain_head_loss_card%s\t%s" % print("kpis\ttrain_face_loss_card%s\t%s" %
(gpu_num, head_loss)) (gpu_num, face_loss))
print("kpis\ttrain_head_loss_card%s\t%s" %
except fluid.core.EOFException: (gpu_num, head_loss))
train_py_reader.reset()
except StopIteration:
train_py_reader.reset()
train_py_reader.reset()
def get_cards(args): def get_cards(args):
......
...@@ -21,9 +21,7 @@ SSD is readily pluggable into a wide variant standard convolutional network, suc ...@@ -21,9 +21,7 @@ SSD is readily pluggable into a wide variant standard convolutional network, suc
### Data Preparation ### Data Preparation
You can use [PASCAL VOC dataset](http://host.robots.ox.ac.uk/pascal/VOC/) or [MS-COCO dataset](http://cocodataset.org/#download). Please download [PASCAL VOC dataset](http://host.robots.ox.ac.uk/pascal/VOC/) at first, skip this step if you already have one.
If you want to train a model on PASCAL VOC dataset, please download dataset at first, skip this step if you already have one.
```bash ```bash
cd data/pascalvoc cd data/pascalvoc
...@@ -32,30 +30,18 @@ cd data/pascalvoc ...@@ -32,30 +30,18 @@ cd data/pascalvoc
The command `download.sh` also will create training and testing file lists. The command `download.sh` also will create training and testing file lists.
If you want to train a model on MS-COCO dataset, please download dataset at first, skip this step if you already have one.
```
cd data/coco
./download.sh
```
### Train ### Train
#### Download the Pre-trained Model. #### Download the Pre-trained Model.
We provide two pre-trained models. The one is MobileNet-v1 SSD trained on COCO dataset, but removed the convolutional predictors for COCO dataset. This model can be used to initialize the models when training other datasets, like PASCAL VOC. The other pre-trained model is MobileNet-v1 trained on ImageNet 2012 dataset but removed the last weights and bias in the Fully-Connected layer. We provide two pre-trained models. The one is MobileNet-v1 SSD trained on COCO dataset, but removed the convolutional predictors for COCO dataset. This model can be used to initialize the models when training other datasets, like PASCAL VOC. The other pre-trained model is MobileNet-v1 trained on ImageNet 2012 dataset but removed the last weights and bias in the Fully-Connected layer. Download MobileNet-v1 SSD:
Declaration: the MobileNet-v1 SSD model is converted by [TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md). The MobileNet-v1 model is converted from [Caffe](https://github.com/shicai/MobileNet-Caffe).
We will release the pre-trained models by ourself in the upcoming soon.
- Download MobileNet-v1 SSD:
```bash ```bash
./pretrained/download_coco.sh ./pretrained/download_coco.sh
``` ```
- Download MobileNet-v1:
```bash Declaration: the MobileNet-v1 SSD model is converted by [TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md).
./pretrained/download_imagenet.sh
```
#### Train on PASCAL VOC #### Train on PASCAL VOC
...@@ -64,7 +50,6 @@ We will release the pre-trained models by ourself in the upcoming soon. ...@@ -64,7 +50,6 @@ We will release the pre-trained models by ourself in the upcoming soon.
python -u train.py --batch_size=64 --dataset='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/' python -u train.py --batch_size=64 --dataset='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/'
``` ```
- Set ```export CUDA_VISIBLE_DEVICES=0,1``` to specifiy the number of GPU you want to use. - Set ```export CUDA_VISIBLE_DEVICES=0,1``` to specifiy the number of GPU you want to use.
- Set ```--dataset='coco2014'``` or ```--dataset='coco2017'``` to train model on MS COCO dataset.
- For more help on arguments: - For more help on arguments:
```bash ```bash
...@@ -88,19 +73,6 @@ You can evaluate your trained model in different metrics like 11point, integral ...@@ -88,19 +73,6 @@ You can evaluate your trained model in different metrics like 11point, integral
python eval.py --dataset='pascalvoc' --model_dir='train_pascal_model/best_model' --data_dir='data/pascalvoc' --test_list='test.txt' --ap_version='11point' --nms_threshold=0.45 python eval.py --dataset='pascalvoc' --model_dir='train_pascal_model/best_model' --data_dir='data/pascalvoc' --test_list='test.txt' --ap_version='11point' --nms_threshold=0.45
``` ```
You can set ```--dataset``` to ```coco2014``` or ```coco2017``` to evaluate COCO dataset. Moreover, we provide `eval_coco_map.py` which uses a COCO-specific mAP metric defined by [COCO committee](http://cocodataset.org/#detections-eval). To use this eval_coco_map.py, [cocoapi](https://github.com/cocodataset/cocoapi) is needed.
Install the cocoapi:
```
# COCOAPI=/path/to/clone/cocoapi
git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
cd $COCOAPI/PythonAPI
# Install into global site-packages
make install
# Alternatively, if you do not have permissions or prefer
# not to install the COCO API into global site-packages
python2 setup.py install --user
```
### Infer and Visualize ### Infer and Visualize
`infer.py` is the main caller of the inferring module. Examples of usage are shown below. `infer.py` is the main caller of the inferring module. Examples of usage are shown below.
```bash ```bash
......
...@@ -21,9 +21,8 @@ SSD 可以方便地插入到任何一种标准卷积网络中,比如 VGG、Res ...@@ -21,9 +21,8 @@ SSD 可以方便地插入到任何一种标准卷积网络中,比如 VGG、Res
### 数据准备 ### 数据准备
你可以使用 [PASCAL VOC 数据集](http://host.robots.ox.ac.uk/pascal/VOC/) 或者 [MS-COCO 数据集](http://cocodataset.org/#download)
如果你想在 PASCAL VOC 数据集上进行训练,请先使用下面的命令下载数据集。 请先使用下面的命令下载 [PASCAL VOC 数据集](http://host.robots.ox.ac.uk/pascal/VOC/)
```bash ```bash
cd data/pascalvoc cd data/pascalvoc
...@@ -32,29 +31,19 @@ cd data/pascalvoc ...@@ -32,29 +31,19 @@ cd data/pascalvoc
`download.sh` 命令会自动创建训练和测试用的列表文件。 `download.sh` 命令会自动创建训练和测试用的列表文件。
如果你想在 MS-COCO 数据集上进行训练,请先使用下面的命令下载数据集。
```
cd data/coco
./download.sh
```
### 模型训练 ### 模型训练
#### 下载预训练模型 #### 下载预训练模型
我们提供了两个预训练模型。第一个模型是在 COCO 数据集上预训练的 MobileNet-v1 SSD,我们将它的预测头移除了以便在 COCO 以外的数据集上进行训练。第二个模型是在 ImageNet 2012 数据集上预训练的 MobileNet-v1,我们也将最后的全连接层移除以便进行目标检测训练。 我们提供了两个预训练模型。第一个模型是在 COCO 数据集上预训练的 MobileNet-v1 SSD,我们将它的预测头移除了以便在 COCO 以外的数据集上进行训练。第二个模型是在 ImageNet 2012 数据集上预训练的 MobileNet-v1,我们也将最后的全连接层移除以便进行目标检测训练。下载 MobileNet-v1 SSD:
声明:MobileNet-v1 SSD 模型转换自[TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md)。MobileNet-v1 模型转换自[Caffe](https://github.com/shicai/MobileNet-Caffe)。我们不久也会发布我们自己预训练的模型。
- 下载 MobileNet-v1 SSD:
```bash ```bash
./pretrained/download_coco.sh ./pretrained/download_coco.sh
``` ```
- 下载 MobileNet-v1:
```bash 声明:MobileNet-v1 SSD 模型转换自[TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md)。MobileNet-v1 模型转换自[Caffe](https://github.com/shicai/MobileNet-Caffe)
./pretrained/download_imagenet.sh
```
#### 训练 #### 训练
...@@ -63,7 +52,6 @@ cd data/coco ...@@ -63,7 +52,6 @@ cd data/coco
python -u train.py --batch_size=64 --dataset='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/' python -u train.py --batch_size=64 --dataset='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/'
``` ```
- 可以通过设置 ```export CUDA_VISIBLE_DEVICES=0,1``` 指定想要使用的GPU数量。 - 可以通过设置 ```export CUDA_VISIBLE_DEVICES=0,1``` 指定想要使用的GPU数量。
- 可以通过设置 ```--dataset='coco2014'``````--dataset='coco2017'``` 指定训练 MS-COCO数据集。
- 更多的可选参数见: - 更多的可选参数见:
```bash ```bash
...@@ -80,25 +68,13 @@ cd data/coco ...@@ -80,25 +68,13 @@ cd data/coco
### 模型评估 ### 模型评估
你可以使用11point、integral等指标在PASCAL VOC 和 COCO 数据集上评估训练好的模型。不失一般性,我们采用相应数据集的测试列表作为样例代码的默认列表,你也可以通过设置```--test_list```来指定自己的测试样本列表。 你可以使用11point、integral等指标在PASCAL VOC 数据集上评估训练好的模型。不失一般性,我们采用相应数据集的测试列表作为样例代码的默认列表,你也可以通过设置```--test_list```来指定自己的测试样本列表。
`eval.py`是评估模块的主要执行程序,调用示例如下: `eval.py`是评估模块的主要执行程序,调用示例如下:
```bash ```bash
python eval.py --dataset='pascalvoc' --model_dir='train_pascal_model/best_model' --data_dir='data/pascalvoc' --test_list='test.txt' --ap_version='11point' --nms_threshold=0.45 python eval.py --dataset='pascalvoc' --model_dir='train_pascal_model/best_model' --data_dir='data/pascalvoc' --test_list='test.txt' --ap_version='11point' --nms_threshold=0.45
``` ```
你可以设置```--dataset``````coco2014``````coco2017```来评估 COCO 数据集。我们也提供了`eval_coco_map.py`以进行[COCO官方评估](http://cocodataset.org/#detections-eval)。若要使用 eval_coco_map.py, 需要首先下载[cocoapi](https://github.com/cocodataset/cocoapi)
```
# COCOAPI=/path/to/clone/cocoapi
git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
cd $COCOAPI/PythonAPI
# Install into global site-packages
make install
# Alternatively, if you do not have permissions or prefer
# not to install the COCO API into global site-packages
python2 setup.py install --user
```
### 模型预测以及可视化 ### 模型预测以及可视化
`infer.py`是预测及可视化模块的主要执行程序,调用示例如下: `infer.py`是预测及可视化模块的主要执行程序,调用示例如下:
......
"""
This code is based on https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py
"""
import time
import numpy as np
import threading
import multiprocessing
try:
import queue
except ImportError:
import Queue as queue
class GeneratorEnqueuer(object):
"""
Builds a queue out of a data generator.
Args:
generator: a generator function which endlessly yields data
use_multiprocessing (bool): use multiprocessing if True,
otherwise use threading.
wait_time (float): time to sleep in-between calls to `put()`.
random_seed (int): Initial seed for workers,
will be incremented by one for each workers.
"""
def __init__(self,
generator,
use_multiprocessing=False,
wait_time=0.05,
random_seed=None):
self.wait_time = wait_time
self._generator = generator
self._use_multiprocessing = use_multiprocessing
self._threads = []
self._stop_event = None
self.queue = None
self._manager = None
self.seed = random_seed
def start(self, workers=1, max_queue_size=10):
"""
Start worker threads which add data from the generator into the queue.
Args:
workers (int): number of worker threads
max_queue_size (int): queue size
(when full, threads could block on `put()`)
"""
def data_generator_task():
"""
Data generator task.
"""
def task():
if (self.queue is not None and
self.queue.qsize() < max_queue_size):
generator_output = next(self._generator)
self.queue.put((generator_output))
else:
time.sleep(self.wait_time)
if not self._use_multiprocessing:
while not self._stop_event.is_set():
with self.genlock:
try:
task()
except Exception:
traceback.print_exc()
self._stop_event.set()
break
else:
while not self._stop_event.is_set():
try:
task()
except Exception:
traceback.print_exc()
self._stop_event.set()
break
try:
if self._use_multiprocessing:
self._manager = multiprocessing.Manager()
self.queue = self._manager.Queue(maxsize=max_queue_size)
self._stop_event = multiprocessing.Event()
else:
self.genlock = threading.Lock()
self.queue = queue.Queue()
self._stop_event = threading.Event()
for _ in range(workers):
if self._use_multiprocessing:
# Reset random seed else all children processes
# share the same seed
np.random.seed(self.seed)
thread = multiprocessing.Process(target=data_generator_task)
thread.daemon = True
if self.seed is not None:
self.seed += 1
else:
thread = threading.Thread(target=data_generator_task)
self._threads.append(thread)
thread.start()
except:
self.stop()
raise
def is_running(self):
"""
Returns:
bool: Whether the worker theads are running.
"""
return self._stop_event is not None and not self._stop_event.is_set()
def stop(self, timeout=None):
"""
Stops running threads and wait for them to exit, if necessary.
Should be called by the same thread which called `start()`.
Args:
timeout(int|None): maximum time to wait on `thread.join()`.
"""
if self.is_running():
self._stop_event.set()
for thread in self._threads:
if self._use_multiprocessing:
if thread.is_alive():
thread.terminate()
else:
thread.join(timeout)
if self._manager:
self._manager.shutdown()
self._threads = []
self._stop_event = None
self.queue = None
def get(self):
"""
Creates a generator to extract data from the queue.
Skip the data if it is `None`.
# Yields
tuple of data in the queue.
"""
while self.is_running():
if not self.queue.empty():
inputs = self.queue.get()
if inputs is not None:
yield inputs
else:
time.sleep(self.wait_time)
...@@ -52,7 +52,7 @@ def build_program(main_prog, startup_prog, args, data_args): ...@@ -52,7 +52,7 @@ def build_program(main_prog, startup_prog, args, data_args):
nmsed_out = fluid.layers.detection_output( nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=args.nms_threshold) locs, confs, box, box_var, nms_threshold=args.nms_threshold)
with fluid.program_guard(main_prog): with fluid.program_guard(main_prog):
map = fluid.evaluator.DetectionMAP( map = fluid.metrics.DetectionMAP(
nmsed_out, nmsed_out,
gt_label, gt_label,
gt_box, gt_box,
......
...@@ -47,7 +47,7 @@ def eval(args, data_args, test_list, batch_size, model_dir=None): ...@@ -47,7 +47,7 @@ def eval(args, data_args, test_list, batch_size, model_dir=None):
gt_iscrowd = fluid.layers.data( gt_iscrowd = fluid.layers.data(
name='gt_iscrowd', shape=[1], dtype='int32', lod_level=1) name='gt_iscrowd', shape=[1], dtype='int32', lod_level=1)
gt_image_info = fluid.layers.data( gt_image_info = fluid.layers.data(
name='gt_image_id', shape=[3], dtype='int32', lod_level=1) name='gt_image_id', shape=[3], dtype='int32')
locs, confs, box, box_var = mobile_net(num_classes, image, image_shape) locs, confs, box, box_var = mobile_net(num_classes, image, image_shape)
nmsed_out = fluid.layers.detection_output( nmsed_out = fluid.layers.detection_output(
...@@ -57,14 +57,14 @@ def eval(args, data_args, test_list, batch_size, model_dir=None): ...@@ -57,14 +57,14 @@ def eval(args, data_args, test_list, batch_size, model_dir=None):
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
# yapf: disable # yapf: disable
if model_dir: if model_dir:
def if_exist(var): def if_exist(var):
return os.path.exists(os.path.join(model_dir, var.name)) return os.path.exists(os.path.join(model_dir, var.name))
fluid.io.load_vars(exe, model_dir, predicate=if_exist) fluid.io.load_vars(exe, model_dir, predicate=if_exist)
# yapf: enable # yapf: enable
test_reader = paddle.batch( test_reader = reader.test(data_args, test_list, batch_size)
reader.test(data_args, test_list), batch_size=batch_size)
feeder = fluid.DataFeeder( feeder = fluid.DataFeeder(
place=place, place=place,
feed_list=[image, gt_box, gt_label, gt_iscrowd, gt_image_info]) feed_list=[image, gt_box, gt_label, gt_iscrowd, gt_image_info])
...@@ -146,8 +146,7 @@ if __name__ == '__main__': ...@@ -146,8 +146,7 @@ if __name__ == '__main__':
mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R], mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R],
apply_distort=False, apply_distort=False,
apply_expand=False, apply_expand=False,
ap_version=args.ap_version, ap_version=args.ap_version)
toy=0)
eval( eval(
args, args,
data_args=data_args, data_args=data_args,
......
...@@ -12,17 +12,17 @@ ...@@ -12,17 +12,17 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import image_util
from paddle.utils.image_util import *
from PIL import Image
from PIL import ImageDraw
import numpy as np
import xml.etree.ElementTree import xml.etree.ElementTree
import os import os
import time import time
import copy import copy
import six import six
from data_util import GeneratorEnqueuer import math
import numpy as np
from PIL import Image
from PIL import ImageDraw
import image_util
import paddle
class Settings(object): class Settings(object):
...@@ -162,26 +162,14 @@ def preprocess(img, bbox_labels, mode, settings): ...@@ -162,26 +162,14 @@ def preprocess(img, bbox_labels, mode, settings):
return img, sampled_labels return img, sampled_labels
def coco(settings, file_list, mode, batch_size, shuffle): def coco(settings, coco_api, file_list, mode, batch_size, shuffle, data_dir):
# cocoapi
from pycocotools.coco import COCO from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
coco = COCO(file_list)
image_ids = coco.getImgIds()
images = coco.loadImgs(image_ids)
print("{} on {} with {} images".format(mode, settings.dataset, len(images)))
def reader(): def reader():
if mode == 'train' and shuffle: if mode == 'train' and shuffle:
np.random.shuffle(images) np.random.shuffle(file_list)
batch_out = [] batch_out = []
if '2014' in file_list: for image in file_list:
sub_dir = "train2014" if model == "train" else "val2014"
elif '2017' in file_list:
sub_dir = "train2017" if mode == "train" else "val2017"
data_dir = os.path.join(settings.data_dir, sub_dir)
for image in images:
image_name = image['file_name'] image_name = image['file_name']
image_path = os.path.join(data_dir, image_name) image_path = os.path.join(data_dir, image_name)
if not os.path.exists(image_path): if not os.path.exists(image_path):
...@@ -195,8 +183,8 @@ def coco(settings, file_list, mode, batch_size, shuffle): ...@@ -195,8 +183,8 @@ def coco(settings, file_list, mode, batch_size, shuffle):
# layout: category_id | xmin | ymin | xmax | ymax | iscrowd # layout: category_id | xmin | ymin | xmax | ymax | iscrowd
bbox_labels = [] bbox_labels = []
annIds = coco.getAnnIds(imgIds=image['id']) annIds = coco_api.getAnnIds(imgIds=image['id'])
anns = coco.loadAnns(annIds) anns = coco_api.loadAnns(annIds)
for ann in anns: for ann in anns:
bbox_sample = [] bbox_sample = []
# start from 1, leave 0 to background # start from 1, leave 0 to background
...@@ -236,16 +224,12 @@ def coco(settings, file_list, mode, batch_size, shuffle): ...@@ -236,16 +224,12 @@ def coco(settings, file_list, mode, batch_size, shuffle):
def pascalvoc(settings, file_list, mode, batch_size, shuffle): def pascalvoc(settings, file_list, mode, batch_size, shuffle):
flist = open(file_list)
images = [line.strip() for line in flist]
print("{} on {} with {} images".format(mode, settings.dataset, len(images)))
def reader(): def reader():
if mode == 'train' and shuffle: if mode == 'train' and shuffle:
np.random.shuffle(images) np.random.shuffle(file_list)
batch_out = [] batch_out = []
cnt = 0 cnt = 0
for image in images: for image in file_list:
image_path, label_path = image.split() image_path, label_path = image.split()
image_path = os.path.join(settings.data_dir, image_path) image_path = os.path.join(settings.data_dir, image_path)
label_path = os.path.join(settings.data_dir, label_path) label_path = os.path.join(settings.data_dir, label_path)
...@@ -299,52 +283,55 @@ def train(settings, ...@@ -299,52 +283,55 @@ def train(settings,
file_list, file_list,
batch_size, batch_size,
shuffle=True, shuffle=True,
use_multiprocessing=True,
num_workers=8, num_workers=8,
max_queue=24,
enable_ce=False): enable_ce=False):
file_list = os.path.join(settings.data_dir, file_list) file_path = os.path.join(settings.data_dir, file_list)
readers = []
if 'coco' in settings.dataset: if 'coco' in settings.dataset:
generator = coco(settings, file_list, "train", batch_size, shuffle) # cocoapi
else: from pycocotools.coco import COCO
generator = pascalvoc(settings, file_list, "train", batch_size, shuffle) coco_api = COCO(file_path)
image_ids = coco_api.getImgIds()
images = coco_api.loadImgs(image_ids)
n = int(math.ceil(len(images) // num_workers))
image_lists = [images[i:i + n] for i in range(0, len(images), n)]
def infinite_reader(): if '2014' in file_list:
while True: sub_dir = "train2014"
for data in generator(): elif '2017' in file_list:
yield data sub_dir = "train2017"
data_dir = os.path.join(settings.data_dir, sub_dir)
def reader(): for l in image_lists:
try: readers.append(
enqueuer = GeneratorEnqueuer( coco(settings, coco_api, l, 'train', batch_size, shuffle,
infinite_reader(), use_multiprocessing=use_multiprocessing) data_dir))
enqueuer.start(max_queue_size=max_queue, workers=num_workers)
generator_output = None
while True:
while enqueuer.is_running():
if not enqueuer.queue.empty():
generator_output = enqueuer.queue.get()
break
else:
time.sleep(0.02)
yield generator_output
generator_output = None
finally:
if enqueuer is not None:
enqueuer.stop()
if enable_ce:
return infinite_reader
else: else:
return reader images = [line.strip() for line in open(file_path)]
n = int(math.ceil(len(images) // num_workers))
image_lists = [images[i:i + n] for i in range(0, len(images), n)]
for l in image_lists:
readers.append(pascalvoc(settings, l, 'train', batch_size, shuffle))
return paddle.reader.multiprocess_reader(readers, False)
def test(settings, file_list, batch_size): def test(settings, file_list, batch_size):
file_list = os.path.join(settings.data_dir, file_list) file_list = os.path.join(settings.data_dir, file_list)
if 'coco' in settings.dataset: if 'coco' in settings.dataset:
return coco(settings, file_list, 'test', batch_size, False) from pycocotools.coco import COCO
coco_api = COCO(file_list)
image_ids = coco_api.getImgIds()
images = coco_api.loadImgs(image_ids)
if '2014' in file_list:
sub_dir = "val2014"
elif '2017' in file_list:
sub_dir = "val2017"
data_dir = os.path.join(settings.data_dir, sub_dir)
return coco(settings, coco_api, images, 'test', batch_size, False,
data_dir)
else: else:
return pascalvoc(settings, file_list, 'test', batch_size, False) image_list = [line.strip() for line in open(file_list)]
return pascalvoc(settings, image_list, 'test', batch_size, False)
def infer(settings, image_path): def infer(settings, image_path):
......
...@@ -105,7 +105,7 @@ def build_program(main_prog, startup_prog, train_params, is_train): ...@@ -105,7 +105,7 @@ def build_program(main_prog, startup_prog, train_params, is_train):
with fluid.unique_name.guard("inference"): with fluid.unique_name.guard("inference"):
nmsed_out = fluid.layers.detection_output( nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=0.45) locs, confs, box, box_var, nms_threshold=0.45)
map_eval = fluid.evaluator.DetectionMAP( map_eval = fluid.metrics.DetectionMAP(
nmsed_out, nmsed_out,
gt_label, gt_label,
gt_box, gt_box,
...@@ -156,6 +156,7 @@ def train(args, ...@@ -156,6 +156,7 @@ def train(args,
startup_prog.random_seed = 111 startup_prog.random_seed = 111
train_prog.random_seed = 111 train_prog.random_seed = 111
test_prog.random_seed = 111 test_prog.random_seed = 111
num_workers = 1
train_py_reader, loss = build_program( train_py_reader, loss = build_program(
main_prog=train_prog, main_prog=train_prog,
...@@ -186,9 +187,7 @@ def train(args, ...@@ -186,9 +187,7 @@ def train(args,
train_file_list, train_file_list,
batch_size_per_device, batch_size_per_device,
shuffle=is_shuffle, shuffle=is_shuffle,
use_multiprocessing=True,
num_workers=num_workers, num_workers=num_workers,
max_queue=24,
enable_ce=enable_ce) enable_ce=enable_ce)
test_reader = reader.test(data_args, val_file_list, batch_size) test_reader = reader.test(data_args, val_file_list, batch_size)
train_py_reader.decorate_paddle_reader(train_reader) train_py_reader.decorate_paddle_reader(train_reader)
...@@ -205,7 +204,7 @@ def train(args, ...@@ -205,7 +204,7 @@ def train(args,
def test(epoc_id, best_map): def test(epoc_id, best_map):
_, accum_map = map_eval.get_map_var() _, accum_map = map_eval.get_map_var()
map_eval.reset(exe) map_eval.reset(exe)
every_epoc_map=[] every_epoc_map=[] # for CE
test_py_reader.start() test_py_reader.start()
try: try:
batch_id = 0 batch_id = 0
...@@ -218,22 +217,23 @@ def train(args, ...@@ -218,22 +217,23 @@ def train(args,
except fluid.core.EOFException: except fluid.core.EOFException:
test_py_reader.reset() test_py_reader.reset()
mean_map = np.mean(every_epoc_map) mean_map = np.mean(every_epoc_map)
print("Epoc {0}, test map {1}".format(epoc_id, test_map)) print("Epoc {0}, test map {1}".format(epoc_id, test_map[0]))
if test_map[0] > best_map: if test_map[0] > best_map:
best_map = test_map[0] best_map = test_map[0]
save_model('best_model', test_prog) save_model('best_model', test_prog)
return best_map, mean_map return best_map, mean_map
train_py_reader.start()
total_time = 0.0 total_time = 0.0
try: for epoc_id in range(epoc_num):
for epoc_id in range(epoc_num): epoch_idx = epoc_id + 1
epoch_idx = epoc_id + 1 start_time = time.time()
start_time = time.time() prev_start_time = start_time
prev_start_time = start_time every_epoc_loss = []
every_epoc_loss = [] batch_id = 0
for batch_id in range(iters_per_epoc): train_py_reader.start()
while True:
try:
prev_start_time = start_time prev_start_time = start_time
start_time = time.time() start_time = time.time()
if parallel: if parallel:
...@@ -242,34 +242,35 @@ def train(args, ...@@ -242,34 +242,35 @@ def train(args,
loss_v, = exe.run(train_prog, fetch_list=[loss]) loss_v, = exe.run(train_prog, fetch_list=[loss])
loss_v = np.mean(np.array(loss_v)) loss_v = np.mean(np.array(loss_v))
every_epoc_loss.append(loss_v) every_epoc_loss.append(loss_v)
if batch_id % 20 == 0: if batch_id % 10 == 0:
print("Epoc {:d}, batch {:d}, loss {:.6f}, time {:.5f}".format( print("Epoc {:d}, batch {:d}, loss {:.6f}, time {:.5f}".format(
epoc_id, batch_id, loss_v, start_time - prev_start_time)) epoc_id, batch_id, loss_v, start_time - prev_start_time))
end_time = time.time() batch_id += 1
total_time += end_time - start_time except (fluid.core.EOFException, StopIteration):
train_reader().close()
best_map, mean_map = test(epoc_id, best_map) train_py_reader.reset()
print("Best test map {0}".format(best_map)) break
if epoc_id % 10 == 0 or epoc_id == epoc_num - 1:
save_model(str(epoc_id), train_prog) end_time = time.time()
total_time += end_time - start_time
if enable_ce and epoc_id == epoc_num - 1: best_map, mean_map = test(epoc_id, best_map)
train_avg_loss = np.mean(every_epoc_loss) print("Best test map {0}".format(best_map))
if devices_num == 1: if epoc_id % 10 == 0 or epoc_id == epoc_num - 1:
print("kpis train_cost %s" % train_avg_loss) save_model(str(epoc_id), train_prog)
print("kpis test_acc %s" % mean_map)
print("kpis train_speed %s" % (total_time / epoch_idx)) if enable_ce:
else: train_avg_loss = np.mean(every_epoc_loss)
print("kpis train_cost_card%s %s" % if devices_num == 1:
(devices_num, train_avg_loss)) print("kpis train_cost %s" % train_avg_loss)
print("kpis test_acc_card%s %s" % print("kpis test_acc %s" % mean_map)
(devices_num, mean_map)) print("kpis train_speed %s" % (total_time / epoch_idx))
print("kpis train_speed_card%s %f" % else:
(devices_num, total_time / epoch_idx)) print("kpis train_cost_card%s %s" %
(devices_num, train_avg_loss))
except (fluid.core.EOFException, StopIteration): print("kpis test_acc_card%s %s" %
train_reader().close() (devices_num, mean_map))
train_py_reader.reset() print("kpis train_speed_card%s %f" %
(devices_num, total_time / epoch_idx))
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册