未验证 提交 a742f5c6 编写于 作者: Q qingqing01 提交者: GitHub

Refine code and fix some model. (#1693)

* Fix exeception when finishing training in detection.
* Fix fluid.Trainer in HiNAS_models.
* Remove coco doc in SSD detection.
上级 ba609980
......@@ -21,6 +21,7 @@ import math
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.contrib.trainer import *
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
import reader
......@@ -104,7 +105,7 @@ class Model(object):
accs = []
def event_handler(event):
if isinstance(event, fluid.EndStepEvent):
if isinstance(event, EndStepEvent):
costs.append(event.metrics[0])
accs.append(event.metrics[1])
if event.step % 20 == 0:
......@@ -113,7 +114,7 @@ class Model(object):
del costs[:]
del accs[:]
if isinstance(event, fluid.EndEpochEvent):
if isinstance(event, EndEpochEvent):
if event.epoch % 3 == 0 or event.epoch == FLAGS.num_epochs - 1:
avg_cost, accuracy = trainer.test(
reader=test_reader, feed_order=['pixel', 'label'])
......@@ -126,7 +127,7 @@ class Model(object):
event_handler.best_acc = 0.0
place = fluid.CUDAPlace(0)
trainer = fluid.Trainer(
trainer = Trainer(
train_func=self.train_network,
optimizer_func=self.optimizer_program,
place=place)
......
"""
This code is based on https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import numpy as np
import threading
import multiprocessing
import traceback
try:
import queue
except ImportError:
import Queue as queue
class GeneratorEnqueuer(object):
"""
Builds a queue out of a data generator.
Args:
generator: a generator function which endlessly yields data
use_multiprocessing (bool): use multiprocessing if True,
otherwise use threading.
wait_time (float): time to sleep in-between calls to `put()`.
random_seed (int): Initial seed for workers,
will be incremented by one for each workers.
"""
def __init__(self,
generator,
use_multiprocessing=False,
wait_time=0.05,
random_seed=None):
self.wait_time = wait_time
self._generator = generator
self._use_multiprocessing = use_multiprocessing
self._threads = []
self._stop_event = None
self.queue = None
self._manager = None
self.seed = random_seed
def start(self, workers=1, max_queue_size=10):
"""
Start worker threads which add data from the generator into the queue.
Args:
workers (int): number of worker threads
max_queue_size (int): queue size
(when full, threads could block on `put()`)
"""
def data_generator_task():
"""
Data generator task.
"""
def task():
if (self.queue is not None and
self.queue.qsize() < max_queue_size):
generator_output = next(self._generator)
self.queue.put((generator_output))
else:
time.sleep(self.wait_time)
if not self._use_multiprocessing:
while not self._stop_event.is_set():
with self.genlock:
try:
task()
except Exception:
traceback.print_exc()
self._stop_event.set()
break
else:
while not self._stop_event.is_set():
try:
task()
except Exception:
traceback.print_exc()
self._stop_event.set()
break
try:
if self._use_multiprocessing:
self._manager = multiprocessing.Manager()
self.queue = self._manager.Queue(maxsize=max_queue_size)
self._stop_event = multiprocessing.Event()
else:
self.genlock = threading.Lock()
self.queue = queue.Queue()
self._stop_event = threading.Event()
for _ in range(workers):
if self._use_multiprocessing:
# Reset random seed else all children processes
# share the same seed
np.random.seed(self.seed)
thread = multiprocessing.Process(target=data_generator_task)
thread.daemon = True
if self.seed is not None:
self.seed += 1
else:
thread = threading.Thread(target=data_generator_task)
self._threads.append(thread)
thread.start()
except:
self.stop()
raise
def is_running(self):
"""
Returns:
bool: Whether the worker theads are running.
"""
return self._stop_event is not None and not self._stop_event.is_set()
def stop(self, timeout=None):
"""
Stops running threads and wait for them to exit, if necessary.
Should be called by the same thread which called `start()`.
Args:
timeout(int|None): maximum time to wait on `thread.join()`.
"""
if self.is_running():
self._stop_event.set()
for thread in self._threads:
if self._use_multiprocessing:
if thread.is_alive():
thread.terminate()
else:
thread.join(timeout)
if self._manager:
self._manager.shutdown()
self._threads = []
self._stop_event = None
self.queue = None
def get(self):
"""
Creates a generator to extract data from the queue.
Skip the data if it is `None`.
# Yields
tuple of data in the queue.
"""
while self.is_running():
if not self.queue.empty():
inputs = self.queue.get()
if inputs is not None:
yield inputs
else:
time.sleep(self.wait_time)
......@@ -16,8 +16,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import image_util
from paddle.utils.image_util import *
from PIL import Image
from PIL import ImageDraw
import numpy as np
......@@ -28,7 +26,10 @@ import copy
import random
import cv2
import six
from data_util import GeneratorEnqueuer
import math
from itertools import islice
import paddle
import image_util
class Settings(object):
......@@ -199,7 +200,7 @@ def load_file_list(input_txt):
else:
file_dict[num_class].append(line_txt)
return file_dict
return file_dict.values()
def expand_bboxes(bboxes,
......@@ -227,13 +228,12 @@ def expand_bboxes(bboxes,
def train_generator(settings, file_list, batch_size, shuffle=True):
file_dict = load_file_list(file_list)
while True:
def reader():
if shuffle:
np.random.shuffle(file_dict)
np.random.shuffle(file_list)
batch_out = []
for index_image in file_dict.keys():
image_name = file_dict[index_image][0]
for item in file_list:
image_name = item[0]
image_path = os.path.join(settings.data_dir, image_name)
im = Image.open(image_path)
if im.mode == 'L':
......@@ -242,10 +242,10 @@ def train_generator(settings, file_list, batch_size, shuffle=True):
# layout: label | xmin | ymin | xmax | ymax
bbox_labels = []
for index_box in range(len(file_dict[index_image])):
for index_box in range(len(item)):
if index_box >= 2:
bbox_sample = []
temp_info_box = file_dict[index_image][index_box].split(' ')
temp_info_box = item[index_box].split(' ')
xmin = float(temp_info_box[0])
ymin = float(temp_info_box[1])
w = float(temp_info_box[2])
......@@ -277,43 +277,25 @@ def train_generator(settings, file_list, batch_size, shuffle=True):
yield batch_out
batch_out = []
return reader
def train(settings,
file_list,
batch_size,
shuffle=True,
use_multiprocessing=True,
num_workers=8,
max_queue=24):
def reader():
try:
enqueuer = GeneratorEnqueuer(
train_generator(settings, file_list, batch_size, shuffle),
use_multiprocessing=use_multiprocessing)
enqueuer.start(max_queue_size=max_queue, workers=num_workers)
generator_output = None
while True:
while enqueuer.is_running():
if not enqueuer.queue.empty():
generator_output = enqueuer.queue.get()
break
else:
time.sleep(0.01)
yield generator_output
generator_output = None
finally:
if enqueuer is not None:
enqueuer.stop()
return reader
def train(settings, file_list, batch_size, shuffle=True, num_workers=8):
file_lists = load_file_list(file_list)
n = int(math.ceil(len(file_lists) // num_workers))
split_lists = [file_lists[i:i + n] for i in range(0, len(file_lists), n)]
readers = []
for iterm in split_lists:
readers.append(train_generator(settings, iterm, batch_size, shuffle))
return paddle.reader.multiprocess_reader(readers, False)
def test(settings, file_list):
file_dict = load_file_list(file_list)
file_lists = load_file_list(file_list)
def reader():
for index_image in file_dict.keys():
image_name = file_dict[index_image][0]
for image in file_lists:
image_name = image[0]
image_path = os.path.join(settings.data_dir, image_name)
im = Image.open(image_path)
if im.mode == 'L':
......
......@@ -163,9 +163,7 @@ def train(args, config, train_params, train_file_list):
train_file_list,
batch_size_per_device,
shuffle = is_shuffle,
use_multiprocessing=True,
num_workers = num_workers,
max_queue=24)
num_workers = num_workers)
train_py_reader.decorate_paddle_reader(train_reader)
if args.parallel:
......@@ -182,61 +180,59 @@ def train(args, config, train_params, train_file_list):
print('save models to %s' % (model_path))
fluid.io.save_persistables(exe, model_path, main_program=program)
train_py_reader.start()
try:
total_time = 0.0
epoch_idx = 0
face_loss = 0
head_loss = 0
for pass_id in range(start_epoc, epoc_num):
epoch_idx += 1
start_time = time.time()
prev_start_time = start_time
end_time = 0
batch_id = 0
for batch_id in range(iters_per_epoc):
total_time = 0.0
epoch_idx = 0
face_loss = 0
head_loss = 0
for pass_id in range(start_epoc, epoc_num):
epoch_idx += 1
start_time = time.time()
prev_start_time = start_time
end_time = 0
batch_id = 0
train_py_reader.start()
while True:
try:
prev_start_time = start_time
start_time = time.time()
if args.parallel:
fetch_vars = train_exe.run(fetch_list=
[v.name for v in fetches])
else:
fetch_vars = exe.run(train_prog,
fetch_list=fetches)
fetch_vars = exe.run(train_prog, fetch_list=fetches)
end_time = time.time()
fetch_vars = [np.mean(np.array(v)) for v in fetch_vars]
face_loss = fetch_vars[0]
head_loss = fetch_vars[1]
if batch_id % 10 == 0:
if not args.use_pyramidbox:
print("Pass {:d}, batch {:d}, loss {:.6f}, time {:.5f}".format(
pass_id, batch_id, fetch_vars[0],
pass_id, batch_id, face_loss,
start_time - prev_start_time))
else:
print("Pass {:d}, batch {:d}, face loss {:.6f}, " \
"head loss {:.6f}, " \
"time {:.5f}".format(pass_id,
batch_id, fetch_vars[0], fetch_vars[1],
batch_id, face_loss, head_loss,
start_time - prev_start_time))
face_loss = fetch_vars[0]
head_loss = fetch_vars[1]
epoch_end_time = time.time()
total_time += epoch_end_time - start_time
if pass_id % 1 == 0 or pass_id == epoc_num - 1:
save_model(str(pass_id), train_prog)
# only for ce
if args.enable_ce:
gpu_num = get_cards(args)
print("kpis\teach_pass_duration_card%s\t%s" %
(gpu_num, total_time / epoch_idx))
print("kpis\ttrain_face_loss_card%s\t%s" %
(gpu_num, face_loss))
print("kpis\ttrain_head_loss_card%s\t%s" %
(gpu_num, head_loss))
except fluid.core.EOFException:
train_py_reader.reset()
except StopIteration:
train_py_reader.reset()
train_py_reader.reset()
batch_id += 1
except (fluid.core.EOFException, StopIteration):
train_py_reader.reset()
break
epoch_end_time = time.time()
total_time += epoch_end_time - start_time
save_model(str(pass_id), train_prog)
# only for ce
if args.enable_ce:
gpu_num = get_cards(args)
print("kpis\teach_pass_duration_card%s\t%s" %
(gpu_num, total_time / epoch_idx))
print("kpis\ttrain_face_loss_card%s\t%s" %
(gpu_num, face_loss))
print("kpis\ttrain_head_loss_card%s\t%s" %
(gpu_num, head_loss))
def get_cards(args):
......
......@@ -21,9 +21,7 @@ SSD is readily pluggable into a wide variant standard convolutional network, suc
### Data Preparation
You can use [PASCAL VOC dataset](http://host.robots.ox.ac.uk/pascal/VOC/) or [MS-COCO dataset](http://cocodataset.org/#download).
If you want to train a model on PASCAL VOC dataset, please download dataset at first, skip this step if you already have one.
Please download [PASCAL VOC dataset](http://host.robots.ox.ac.uk/pascal/VOC/) at first, skip this step if you already have one.
```bash
cd data/pascalvoc
......@@ -32,30 +30,18 @@ cd data/pascalvoc
The command `download.sh` also will create training and testing file lists.
If you want to train a model on MS-COCO dataset, please download dataset at first, skip this step if you already have one.
```
cd data/coco
./download.sh
```
### Train
#### Download the Pre-trained Model.
We provide two pre-trained models. The one is MobileNet-v1 SSD trained on COCO dataset, but removed the convolutional predictors for COCO dataset. This model can be used to initialize the models when training other datasets, like PASCAL VOC. The other pre-trained model is MobileNet-v1 trained on ImageNet 2012 dataset but removed the last weights and bias in the Fully-Connected layer.
Declaration: the MobileNet-v1 SSD model is converted by [TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md). The MobileNet-v1 model is converted from [Caffe](https://github.com/shicai/MobileNet-Caffe).
We will release the pre-trained models by ourself in the upcoming soon.
We provide two pre-trained models. The one is MobileNet-v1 SSD trained on COCO dataset, but removed the convolutional predictors for COCO dataset. This model can be used to initialize the models when training other datasets, like PASCAL VOC. The other pre-trained model is MobileNet-v1 trained on ImageNet 2012 dataset but removed the last weights and bias in the Fully-Connected layer. Download MobileNet-v1 SSD:
- Download MobileNet-v1 SSD:
```bash
./pretrained/download_coco.sh
```
- Download MobileNet-v1:
```bash
./pretrained/download_imagenet.sh
```
Declaration: the MobileNet-v1 SSD model is converted by [TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md).
#### Train on PASCAL VOC
......@@ -64,7 +50,6 @@ We will release the pre-trained models by ourself in the upcoming soon.
python -u train.py --batch_size=64 --dataset='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/'
```
- Set ```export CUDA_VISIBLE_DEVICES=0,1``` to specifiy the number of GPU you want to use.
- Set ```--dataset='coco2014'``` or ```--dataset='coco2017'``` to train model on MS COCO dataset.
- For more help on arguments:
```bash
......@@ -88,19 +73,6 @@ You can evaluate your trained model in different metrics like 11point, integral
python eval.py --dataset='pascalvoc' --model_dir='train_pascal_model/best_model' --data_dir='data/pascalvoc' --test_list='test.txt' --ap_version='11point' --nms_threshold=0.45
```
You can set ```--dataset``` to ```coco2014``` or ```coco2017``` to evaluate COCO dataset. Moreover, we provide `eval_coco_map.py` which uses a COCO-specific mAP metric defined by [COCO committee](http://cocodataset.org/#detections-eval). To use this eval_coco_map.py, [cocoapi](https://github.com/cocodataset/cocoapi) is needed.
Install the cocoapi:
```
# COCOAPI=/path/to/clone/cocoapi
git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
cd $COCOAPI/PythonAPI
# Install into global site-packages
make install
# Alternatively, if you do not have permissions or prefer
# not to install the COCO API into global site-packages
python2 setup.py install --user
```
### Infer and Visualize
`infer.py` is the main caller of the inferring module. Examples of usage are shown below.
```bash
......
......@@ -21,9 +21,8 @@ SSD 可以方便地插入到任何一种标准卷积网络中,比如 VGG、Res
### 数据准备
你可以使用 [PASCAL VOC 数据集](http://host.robots.ox.ac.uk/pascal/VOC/) 或者 [MS-COCO 数据集](http://cocodataset.org/#download)
如果你想在 PASCAL VOC 数据集上进行训练,请先使用下面的命令下载数据集。
请先使用下面的命令下载 [PASCAL VOC 数据集](http://host.robots.ox.ac.uk/pascal/VOC/)
```bash
cd data/pascalvoc
......@@ -32,29 +31,19 @@ cd data/pascalvoc
`download.sh` 命令会自动创建训练和测试用的列表文件。
如果你想在 MS-COCO 数据集上进行训练,请先使用下面的命令下载数据集。
```
cd data/coco
./download.sh
```
### 模型训练
#### 下载预训练模型
我们提供了两个预训练模型。第一个模型是在 COCO 数据集上预训练的 MobileNet-v1 SSD,我们将它的预测头移除了以便在 COCO 以外的数据集上进行训练。第二个模型是在 ImageNet 2012 数据集上预训练的 MobileNet-v1,我们也将最后的全连接层移除以便进行目标检测训练。
声明:MobileNet-v1 SSD 模型转换自[TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md)。MobileNet-v1 模型转换自[Caffe](https://github.com/shicai/MobileNet-Caffe)。我们不久也会发布我们自己预训练的模型。
我们提供了两个预训练模型。第一个模型是在 COCO 数据集上预训练的 MobileNet-v1 SSD,我们将它的预测头移除了以便在 COCO 以外的数据集上进行训练。第二个模型是在 ImageNet 2012 数据集上预训练的 MobileNet-v1,我们也将最后的全连接层移除以便进行目标检测训练。下载 MobileNet-v1 SSD:
- 下载 MobileNet-v1 SSD:
```bash
./pretrained/download_coco.sh
```
- 下载 MobileNet-v1:
```bash
./pretrained/download_imagenet.sh
```
声明:MobileNet-v1 SSD 模型转换自[TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md)。MobileNet-v1 模型转换自[Caffe](https://github.com/shicai/MobileNet-Caffe)
#### 训练
......@@ -63,7 +52,6 @@ cd data/coco
python -u train.py --batch_size=64 --dataset='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/'
```
- 可以通过设置 ```export CUDA_VISIBLE_DEVICES=0,1``` 指定想要使用的GPU数量。
- 可以通过设置 ```--dataset='coco2014'``````--dataset='coco2017'``` 指定训练 MS-COCO数据集。
- 更多的可选参数见:
```bash
......@@ -80,25 +68,13 @@ cd data/coco
### 模型评估
你可以使用11point、integral等指标在PASCAL VOC 和 COCO 数据集上评估训练好的模型。不失一般性,我们采用相应数据集的测试列表作为样例代码的默认列表,你也可以通过设置```--test_list```来指定自己的测试样本列表。
你可以使用11point、integral等指标在PASCAL VOC 数据集上评估训练好的模型。不失一般性,我们采用相应数据集的测试列表作为样例代码的默认列表,你也可以通过设置```--test_list```来指定自己的测试样本列表。
`eval.py`是评估模块的主要执行程序,调用示例如下:
```bash
python eval.py --dataset='pascalvoc' --model_dir='train_pascal_model/best_model' --data_dir='data/pascalvoc' --test_list='test.txt' --ap_version='11point' --nms_threshold=0.45
```
你可以设置```--dataset``````coco2014``````coco2017```来评估 COCO 数据集。我们也提供了`eval_coco_map.py`以进行[COCO官方评估](http://cocodataset.org/#detections-eval)。若要使用 eval_coco_map.py, 需要首先下载[cocoapi](https://github.com/cocodataset/cocoapi)
```
# COCOAPI=/path/to/clone/cocoapi
git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
cd $COCOAPI/PythonAPI
# Install into global site-packages
make install
# Alternatively, if you do not have permissions or prefer
# not to install the COCO API into global site-packages
python2 setup.py install --user
```
### 模型预测以及可视化
`infer.py`是预测及可视化模块的主要执行程序,调用示例如下:
......
"""
This code is based on https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py
"""
import time
import numpy as np
import threading
import multiprocessing
try:
import queue
except ImportError:
import Queue as queue
class GeneratorEnqueuer(object):
"""
Builds a queue out of a data generator.
Args:
generator: a generator function which endlessly yields data
use_multiprocessing (bool): use multiprocessing if True,
otherwise use threading.
wait_time (float): time to sleep in-between calls to `put()`.
random_seed (int): Initial seed for workers,
will be incremented by one for each workers.
"""
def __init__(self,
generator,
use_multiprocessing=False,
wait_time=0.05,
random_seed=None):
self.wait_time = wait_time
self._generator = generator
self._use_multiprocessing = use_multiprocessing
self._threads = []
self._stop_event = None
self.queue = None
self._manager = None
self.seed = random_seed
def start(self, workers=1, max_queue_size=10):
"""
Start worker threads which add data from the generator into the queue.
Args:
workers (int): number of worker threads
max_queue_size (int): queue size
(when full, threads could block on `put()`)
"""
def data_generator_task():
"""
Data generator task.
"""
def task():
if (self.queue is not None and
self.queue.qsize() < max_queue_size):
generator_output = next(self._generator)
self.queue.put((generator_output))
else:
time.sleep(self.wait_time)
if not self._use_multiprocessing:
while not self._stop_event.is_set():
with self.genlock:
try:
task()
except Exception:
traceback.print_exc()
self._stop_event.set()
break
else:
while not self._stop_event.is_set():
try:
task()
except Exception:
traceback.print_exc()
self._stop_event.set()
break
try:
if self._use_multiprocessing:
self._manager = multiprocessing.Manager()
self.queue = self._manager.Queue(maxsize=max_queue_size)
self._stop_event = multiprocessing.Event()
else:
self.genlock = threading.Lock()
self.queue = queue.Queue()
self._stop_event = threading.Event()
for _ in range(workers):
if self._use_multiprocessing:
# Reset random seed else all children processes
# share the same seed
np.random.seed(self.seed)
thread = multiprocessing.Process(target=data_generator_task)
thread.daemon = True
if self.seed is not None:
self.seed += 1
else:
thread = threading.Thread(target=data_generator_task)
self._threads.append(thread)
thread.start()
except:
self.stop()
raise
def is_running(self):
"""
Returns:
bool: Whether the worker theads are running.
"""
return self._stop_event is not None and not self._stop_event.is_set()
def stop(self, timeout=None):
"""
Stops running threads and wait for them to exit, if necessary.
Should be called by the same thread which called `start()`.
Args:
timeout(int|None): maximum time to wait on `thread.join()`.
"""
if self.is_running():
self._stop_event.set()
for thread in self._threads:
if self._use_multiprocessing:
if thread.is_alive():
thread.terminate()
else:
thread.join(timeout)
if self._manager:
self._manager.shutdown()
self._threads = []
self._stop_event = None
self.queue = None
def get(self):
"""
Creates a generator to extract data from the queue.
Skip the data if it is `None`.
# Yields
tuple of data in the queue.
"""
while self.is_running():
if not self.queue.empty():
inputs = self.queue.get()
if inputs is not None:
yield inputs
else:
time.sleep(self.wait_time)
......@@ -52,7 +52,7 @@ def build_program(main_prog, startup_prog, args, data_args):
nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=args.nms_threshold)
with fluid.program_guard(main_prog):
map = fluid.evaluator.DetectionMAP(
map = fluid.metrics.DetectionMAP(
nmsed_out,
gt_label,
gt_box,
......
......@@ -47,7 +47,7 @@ def eval(args, data_args, test_list, batch_size, model_dir=None):
gt_iscrowd = fluid.layers.data(
name='gt_iscrowd', shape=[1], dtype='int32', lod_level=1)
gt_image_info = fluid.layers.data(
name='gt_image_id', shape=[3], dtype='int32', lod_level=1)
name='gt_image_id', shape=[3], dtype='int32')
locs, confs, box, box_var = mobile_net(num_classes, image, image_shape)
nmsed_out = fluid.layers.detection_output(
......@@ -57,14 +57,14 @@ def eval(args, data_args, test_list, batch_size, model_dir=None):
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
# yapf: disable
if model_dir:
def if_exist(var):
return os.path.exists(os.path.join(model_dir, var.name))
fluid.io.load_vars(exe, model_dir, predicate=if_exist)
# yapf: enable
test_reader = paddle.batch(
reader.test(data_args, test_list), batch_size=batch_size)
test_reader = reader.test(data_args, test_list, batch_size)
feeder = fluid.DataFeeder(
place=place,
feed_list=[image, gt_box, gt_label, gt_iscrowd, gt_image_info])
......@@ -146,8 +146,7 @@ if __name__ == '__main__':
mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R],
apply_distort=False,
apply_expand=False,
ap_version=args.ap_version,
toy=0)
ap_version=args.ap_version)
eval(
args,
data_args=data_args,
......
......@@ -12,17 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import image_util
from paddle.utils.image_util import *
from PIL import Image
from PIL import ImageDraw
import numpy as np
import xml.etree.ElementTree
import os
import time
import copy
import six
from data_util import GeneratorEnqueuer
import math
import numpy as np
from PIL import Image
from PIL import ImageDraw
import image_util
import paddle
class Settings(object):
......@@ -162,26 +162,14 @@ def preprocess(img, bbox_labels, mode, settings):
return img, sampled_labels
def coco(settings, file_list, mode, batch_size, shuffle):
# cocoapi
def coco(settings, coco_api, file_list, mode, batch_size, shuffle, data_dir):
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
coco = COCO(file_list)
image_ids = coco.getImgIds()
images = coco.loadImgs(image_ids)
print("{} on {} with {} images".format(mode, settings.dataset, len(images)))
def reader():
if mode == 'train' and shuffle:
np.random.shuffle(images)
np.random.shuffle(file_list)
batch_out = []
if '2014' in file_list:
sub_dir = "train2014" if model == "train" else "val2014"
elif '2017' in file_list:
sub_dir = "train2017" if mode == "train" else "val2017"
data_dir = os.path.join(settings.data_dir, sub_dir)
for image in images:
for image in file_list:
image_name = image['file_name']
image_path = os.path.join(data_dir, image_name)
if not os.path.exists(image_path):
......@@ -195,8 +183,8 @@ def coco(settings, file_list, mode, batch_size, shuffle):
# layout: category_id | xmin | ymin | xmax | ymax | iscrowd
bbox_labels = []
annIds = coco.getAnnIds(imgIds=image['id'])
anns = coco.loadAnns(annIds)
annIds = coco_api.getAnnIds(imgIds=image['id'])
anns = coco_api.loadAnns(annIds)
for ann in anns:
bbox_sample = []
# start from 1, leave 0 to background
......@@ -236,16 +224,12 @@ def coco(settings, file_list, mode, batch_size, shuffle):
def pascalvoc(settings, file_list, mode, batch_size, shuffle):
flist = open(file_list)
images = [line.strip() for line in flist]
print("{} on {} with {} images".format(mode, settings.dataset, len(images)))
def reader():
if mode == 'train' and shuffle:
np.random.shuffle(images)
np.random.shuffle(file_list)
batch_out = []
cnt = 0
for image in images:
for image in file_list:
image_path, label_path = image.split()
image_path = os.path.join(settings.data_dir, image_path)
label_path = os.path.join(settings.data_dir, label_path)
......@@ -299,52 +283,55 @@ def train(settings,
file_list,
batch_size,
shuffle=True,
use_multiprocessing=True,
num_workers=8,
max_queue=24,
enable_ce=False):
file_list = os.path.join(settings.data_dir, file_list)
file_path = os.path.join(settings.data_dir, file_list)
readers = []
if 'coco' in settings.dataset:
generator = coco(settings, file_list, "train", batch_size, shuffle)
else:
generator = pascalvoc(settings, file_list, "train", batch_size, shuffle)
# cocoapi
from pycocotools.coco import COCO
coco_api = COCO(file_path)
image_ids = coco_api.getImgIds()
images = coco_api.loadImgs(image_ids)
n = int(math.ceil(len(images) // num_workers))
image_lists = [images[i:i + n] for i in range(0, len(images), n)]
def infinite_reader():
while True:
for data in generator():
yield data
def reader():
try:
enqueuer = GeneratorEnqueuer(
infinite_reader(), use_multiprocessing=use_multiprocessing)
enqueuer.start(max_queue_size=max_queue, workers=num_workers)
generator_output = None
while True:
while enqueuer.is_running():
if not enqueuer.queue.empty():
generator_output = enqueuer.queue.get()
break
else:
time.sleep(0.02)
yield generator_output
generator_output = None
finally:
if enqueuer is not None:
enqueuer.stop()
if enable_ce:
return infinite_reader
if '2014' in file_list:
sub_dir = "train2014"
elif '2017' in file_list:
sub_dir = "train2017"
data_dir = os.path.join(settings.data_dir, sub_dir)
for l in image_lists:
readers.append(
coco(settings, coco_api, l, 'train', batch_size, shuffle,
data_dir))
else:
return reader
images = [line.strip() for line in open(file_path)]
n = int(math.ceil(len(images) // num_workers))
image_lists = [images[i:i + n] for i in range(0, len(images), n)]
for l in image_lists:
readers.append(pascalvoc(settings, l, 'train', batch_size, shuffle))
return paddle.reader.multiprocess_reader(readers, False)
def test(settings, file_list, batch_size):
file_list = os.path.join(settings.data_dir, file_list)
if 'coco' in settings.dataset:
return coco(settings, file_list, 'test', batch_size, False)
from pycocotools.coco import COCO
coco_api = COCO(file_list)
image_ids = coco_api.getImgIds()
images = coco_api.loadImgs(image_ids)
if '2014' in file_list:
sub_dir = "val2014"
elif '2017' in file_list:
sub_dir = "val2017"
data_dir = os.path.join(settings.data_dir, sub_dir)
return coco(settings, coco_api, images, 'test', batch_size, False,
data_dir)
else:
return pascalvoc(settings, file_list, 'test', batch_size, False)
image_list = [line.strip() for line in open(file_list)]
return pascalvoc(settings, image_list, 'test', batch_size, False)
def infer(settings, image_path):
......
......@@ -105,7 +105,7 @@ def build_program(main_prog, startup_prog, train_params, is_train):
with fluid.unique_name.guard("inference"):
nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=0.45)
map_eval = fluid.evaluator.DetectionMAP(
map_eval = fluid.metrics.DetectionMAP(
nmsed_out,
gt_label,
gt_box,
......@@ -156,6 +156,7 @@ def train(args,
startup_prog.random_seed = 111
train_prog.random_seed = 111
test_prog.random_seed = 111
num_workers = 1
train_py_reader, loss = build_program(
main_prog=train_prog,
......@@ -186,9 +187,7 @@ def train(args,
train_file_list,
batch_size_per_device,
shuffle=is_shuffle,
use_multiprocessing=True,
num_workers=num_workers,
max_queue=24,
enable_ce=enable_ce)
test_reader = reader.test(data_args, val_file_list, batch_size)
train_py_reader.decorate_paddle_reader(train_reader)
......@@ -205,7 +204,7 @@ def train(args,
def test(epoc_id, best_map):
_, accum_map = map_eval.get_map_var()
map_eval.reset(exe)
every_epoc_map=[]
every_epoc_map=[] # for CE
test_py_reader.start()
try:
batch_id = 0
......@@ -218,22 +217,23 @@ def train(args,
except fluid.core.EOFException:
test_py_reader.reset()
mean_map = np.mean(every_epoc_map)
print("Epoc {0}, test map {1}".format(epoc_id, test_map))
print("Epoc {0}, test map {1}".format(epoc_id, test_map[0]))
if test_map[0] > best_map:
best_map = test_map[0]
save_model('best_model', test_prog)
return best_map, mean_map
train_py_reader.start()
total_time = 0.0
try:
for epoc_id in range(epoc_num):
epoch_idx = epoc_id + 1
start_time = time.time()
prev_start_time = start_time
every_epoc_loss = []
for batch_id in range(iters_per_epoc):
for epoc_id in range(epoc_num):
epoch_idx = epoc_id + 1
start_time = time.time()
prev_start_time = start_time
every_epoc_loss = []
batch_id = 0
train_py_reader.start()
while True:
try:
prev_start_time = start_time
start_time = time.time()
if parallel:
......@@ -242,34 +242,35 @@ def train(args,
loss_v, = exe.run(train_prog, fetch_list=[loss])
loss_v = np.mean(np.array(loss_v))
every_epoc_loss.append(loss_v)
if batch_id % 20 == 0:
if batch_id % 10 == 0:
print("Epoc {:d}, batch {:d}, loss {:.6f}, time {:.5f}".format(
epoc_id, batch_id, loss_v, start_time - prev_start_time))
end_time = time.time()
total_time += end_time - start_time
best_map, mean_map = test(epoc_id, best_map)
print("Best test map {0}".format(best_map))
if epoc_id % 10 == 0 or epoc_id == epoc_num - 1:
save_model(str(epoc_id), train_prog)
if enable_ce and epoc_id == epoc_num - 1:
train_avg_loss = np.mean(every_epoc_loss)
if devices_num == 1:
print("kpis train_cost %s" % train_avg_loss)
print("kpis test_acc %s" % mean_map)
print("kpis train_speed %s" % (total_time / epoch_idx))
else:
print("kpis train_cost_card%s %s" %
(devices_num, train_avg_loss))
print("kpis test_acc_card%s %s" %
(devices_num, mean_map))
print("kpis train_speed_card%s %f" %
(devices_num, total_time / epoch_idx))
except (fluid.core.EOFException, StopIteration):
train_reader().close()
train_py_reader.reset()
batch_id += 1
except (fluid.core.EOFException, StopIteration):
train_reader().close()
train_py_reader.reset()
break
end_time = time.time()
total_time += end_time - start_time
best_map, mean_map = test(epoc_id, best_map)
print("Best test map {0}".format(best_map))
if epoc_id % 10 == 0 or epoc_id == epoc_num - 1:
save_model(str(epoc_id), train_prog)
if enable_ce:
train_avg_loss = np.mean(every_epoc_loss)
if devices_num == 1:
print("kpis train_cost %s" % train_avg_loss)
print("kpis test_acc %s" % mean_map)
print("kpis train_speed %s" % (total_time / epoch_idx))
else:
print("kpis train_cost_card%s %s" %
(devices_num, train_avg_loss))
print("kpis test_acc_card%s %s" %
(devices_num, mean_map))
print("kpis train_speed_card%s %f" %
(devices_num, total_time / epoch_idx))
if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册