未验证 提交 2c825d54 编写于 作者: K kinghuin 提交者: GitHub

ready 1.8 (#745)

上级 95395059
#coding:utf-8
import argparse
import os
import ast
import paddle.fluid as fluid
import paddlehub as hub
import numpy as np
from paddlehub.reader.cv_reader import ObjectDetectionReader
from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset
from paddlehub.contrib.ppdet.utils.coco_eval import bbox2out
from paddlehub.common.detection_config import get_model_type, get_feed_list, get_mid_feature
from paddlehub.common import detection_config as dconf
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for predict.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.")
parser.add_argument("--batch_size", type=int, default=2, help="Total examples' number in batch for training.")
parser.add_argument("--module", type=str, default="ssd", help="Module used as a feature extractor.")
parser.add_argument("--dataset", type=str, default="coco10", help="Dataset to finetune.")
# yapf: enable.
module_map = {
"yolov3": "yolov3_darknet53_coco2017",
"ssd": "ssd_vgg16_512_coco2017",
"faster_rcnn": "faster_rcnn_resnet50_coco2017",
}
def predict(args):
module_name = args.module # 'yolov3_darknet53_coco2017'
model_type = get_model_type(module_name) # 'yolo'
# define data
ds = hub.dataset.Coco10(model_type)
print("ds.num_labels", ds.num_labels)
data_reader = ObjectDetectionReader(dataset=ds, model_type=model_type)
# define model(program)
module = hub.Module(name=module_name)
if model_type == 'rcnn':
input_dict, output_dict, program = module.context(
trainable=True, phase='train')
input_dict_pred, output_dict_pred, program_pred = module.context(
trainable=False)
else:
input_dict, output_dict, program = module.context(trainable=True)
input_dict_pred = output_dict_pred = None
feed_list, pred_feed_list = get_feed_list(module_name, input_dict,
input_dict_pred)
feature, pred_feature = get_mid_feature(module_name, output_dict,
output_dict_pred)
config = hub.RunConfig(
use_data_parallel=False,
use_pyreader=True,
use_cuda=args.use_gpu,
batch_size=args.batch_size,
enable_memory_optim=False,
checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
task = hub.DetectionTask(
data_reader=data_reader,
num_classes=ds.num_labels,
feed_list=feed_list,
feature=feature,
predict_feed_list=pred_feed_list,
predict_feature=pred_feature,
model_type=model_type,
config=config)
data = [
"./test/test_img_bird.jpg",
"./test/test_img_cat.jpg",
]
label_map = ds.label_dict()
run_states = task.predict(data=data, accelerate_mode=False)
results = [run_state.run_results for run_state in run_states]
for outs in results:
keys = ['im_shape', 'im_id', 'bbox']
res = {
k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(keys, outs)
}
print("im_id", res['im_id'])
is_bbox_normalized = dconf.conf[model_type]['is_bbox_normalized']
clsid2catid = {}
for k in label_map:
clsid2catid[k] = k
bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized)
print(bbox_results)
if __name__ == "__main__":
args = parser.parse_args()
if not args.module in module_map:
hub.logger.error("module should in %s" % module_map.keys())
exit(1)
args.module = module_map[args.module]
predict(args)
# -*- coding:utf8 -*-
import argparse
import os
import ast
import paddle.fluid as fluid
import paddlehub as hub
from paddlehub.reader.cv_reader import ObjectDetectionReader
from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset
import numpy as np
from paddlehub.common.detection_config import get_model_type, get_feed_list, get_mid_feature
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=50, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for fine-tuning.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.")
parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.")
parser.add_argument("--module", type=str, default="ssd", help="Module used as feature extractor.")
parser.add_argument("--dataset", type=str, default="coco_10", help="Dataset to finetune.")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
# yapf: enable.
module_map = {
"yolov3": "yolov3_darknet53_coco2017",
"ssd": "ssd_vgg16_512_coco2017",
"faster_rcnn": "faster_rcnn_resnet50_coco2017",
}
def finetune(args):
module_name = args.module # 'yolov3_darknet53_coco2017'
model_type = get_model_type(module_name) # 'yolo'
# define dataset
ds = hub.dataset.Coco10(model_type)
# base_path = '/home/local3/zhaopenghao/data/detect/paddle-job-84942-0'
# train_dir = 'train_data/images'
# train_list = 'train_data/coco/instances_coco.json'
# val_dir = 'eval_data/images'
# val_list = 'eval_data/coco/instances_coco.json'
# ds = ObjectDetectionDataset(base_path, train_dir, train_list, val_dir, val_list, val_dir, val_list, model_type=model_type)
# print(ds.label_dict())
print("ds.num_labels", ds.num_labels)
# define batch reader
data_reader = ObjectDetectionReader(dataset=ds, model_type=model_type)
# define model(program)
module = hub.Module(name=module_name)
if model_type == 'rcnn':
input_dict, output_dict, program = module.context(
trainable=True, phase='train')
input_dict_pred, output_dict_pred, program_pred = module.context(
trainable=False)
else:
input_dict, output_dict, program = module.context(trainable=True)
input_dict_pred = output_dict_pred = None
print("input_dict keys", input_dict.keys())
print("output_dict keys", output_dict.keys())
feed_list, pred_feed_list = get_feed_list(module_name, input_dict,
input_dict_pred)
print("output_dict length:", len(output_dict))
print(output_dict.keys())
if output_dict_pred is not None:
print(output_dict_pred.keys())
feature, pred_feature = get_mid_feature(module_name, output_dict,
output_dict_pred)
config = hub.RunConfig(
log_interval=10,
eval_interval=100,
use_data_parallel=args.use_data_parallel,
use_pyreader=True,
use_cuda=args.use_gpu,
num_epoch=args.num_epoch,
batch_size=args.batch_size,
enable_memory_optim=False,
checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy(
learning_rate=0.00025, optimizer_name="adam"))
task = hub.DetectionTask(
data_reader=data_reader,
num_classes=ds.num_labels,
feed_list=feed_list,
feature=feature,
predict_feed_list=pred_feed_list,
predict_feature=pred_feature,
model_type=model_type,
config=config)
task.finetune_and_eval()
if __name__ == "__main__":
args = parser.parse_args()
if not args.module in module_map:
hub.logger.error("module should in %s" % module_map.keys())
exit(1)
args.module = module_map[args.module]
finetune(args)
......@@ -38,4 +38,3 @@ from .flowers import FlowersDataset as Flowers
from .stanford_dogs import StanfordDogsDataset as StanfordDogs
from .food101 import Food101Dataset as Food101
from .indoor67 import Indoor67Dataset as Indoor67
from .coco10 import Coco10
#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import paddlehub as hub
from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset
class Coco10(ObjectDetectionDataset):
def __init__(self, model_type='ssd'):
dataset_path = os.path.join(hub.common.dir.DATA_HOME, "coco_10")
# self.base_path = self._download_dataset(
# dataset_path=dataset_path,
# url="https://bj.bcebos.com/paddlehub-dataset/dog-cat.tar.gz")
base_path = dataset_path
train_image_dir = 'val'
train_list_file = 'annotations/val.json'
validate_image_dir = 'val'
validate_list_file = 'annotations/val.json'
test_image_dir = 'val'
test_list_file = 'annotations/val.json'
super(Coco10, self).__init__(
base_path, train_image_dir, train_list_file, validate_image_dir,
validate_list_file, test_image_dir, test_list_file, model_type)
......@@ -106,8 +106,9 @@ class BaseDataset(object):
"As label_list has been assigned, label_file is noneffective"
)
self.label_index = dict(
zip(self.label_list, range(len(self.label_list))))
if self.label_list:
self.label_index = dict(
zip(self.label_list, range(len(self.label_list))))
def get_train_examples(self):
return self.train_examples
......
......@@ -35,7 +35,7 @@ import paddle.fluid as fluid
from visualdl import LogWriter
import paddlehub as hub
from paddlehub.reader.nlp_reader import BaseNLPReader
from paddlehub.reader.nlp_reader import BaseReader, BaseNLPReader
from paddlehub.common.paddle_helper import dtype_map, clone_program
from paddlehub.common.utils import mkdir
from paddlehub.common.dir import tmp_dir
......@@ -350,13 +350,14 @@ class BaseTask(object):
self._base_data_reader = data_reader
self._base_feed_list = feed_list
if isinstance(data_reader, BaseNLPReader):
if isinstance(data_reader, BaseReader):
self._compatible_mode = True
logger.warning(
"PaddleHub v1.8 has deprecated the reader and feed_list parameters in the nlp Task. We provided an easier usage, "
"in which you can use your tokenizer to preprocess dataset and run task in a clear flow. "
"New demo see https://github.com/PaddlePaddle/PaddleHub/blob/release/v1.8/demo/text_classification/text_cls.py"
)
if isinstance(data_reader, BaseNLPReader):
logger.warning(
"PaddleHub v1.8 has deprecated the reader and feed_list parameters in the nlp Task. We provided an easier usage, "
"in which you can use your tokenizer to preprocess dataset and run task in a clear flow. "
"New demo see https://github.com/PaddlePaddle/PaddleHub/blob/release/v1.8/demo/text_classification/text_cls.py"
)
else:
self._compatible_mode = False
......
......@@ -196,7 +196,6 @@ class TextClassifierTask(ClassifierTask):
feature(Variable): the `feature` will be used to classify texts. It must be the sentence-level feature, shape as [-1, emb_size]. `Token_feature` and `feature` couldn't be setted at the same time. One of them must be setted as not None. Default None.
token_feature(Variable): the `feature` will be used to connect the pre-defined network. It must be the token-level feature, shape as [-1, seq_len, emb_size]. Default None.
network(str): the pre-defined network. Choices: 'bilstm', 'bow', 'cnn', 'dpcnn', 'gru' and 'lstm'. Default None. If network is setted, then `token_feature` must be setted and `feature` must be None.
main_program (object): the customized main program, default None.
startup_program (object): the customized startup program, default None.
config (RunConfig): run config for the task, such as batch_size, epoch, learning_rate setting and so on. Default None.
hidden_units(list): the element of `hidden_units` list is the full-connect layer size. It will add the full-connect layers to the program. Default None.
......
......@@ -164,21 +164,6 @@ class TextGenerationTask(BaseTask):
# Define decoder and initialize it.
dec_cell = AttentionDecoderCell(self.num_layers, self.hidden_size,
self.dropout)
enc_last_step = fluid.layers.slice(
self.token_feature,
axes=[1],
starts=[-1],
ends=[self.token_feature.shape[1] + 1])
dec_init_cell = fluid.layers.fc(
input=enc_last_step,
size=self.hidden_size,
num_flatten_dims=1,
param_attr=fluid.ParamAttr(
name="dec_init_cell_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="dec_init_cell_b",
initializer=fluid.initializer.Constant(0.)))
dec_init_hidden = fluid.layers.fc(
input=self.feature,
size=self.hidden_size,
......@@ -189,12 +174,14 @@ class TextGenerationTask(BaseTask):
bias_attr=fluid.ParamAttr(
name="dec_init_hidden_b",
initializer=fluid.initializer.Constant(0.)))
# TODO: maybe dec_init_hidden can use self.feature, and dec_init_cell can be get_initial_states
dec_initial_states = [
[[dec_init_hidden, dec_init_cell]] * self.num_layers,
dec_initial_states = [[[
dec_init_hidden,
dec_cell.get_initial_states(
batch_ref=self.token_feature, shape=[self.hidden_size])
]
batch_ref=self.feature, shape=[self.hidden_size])
]] * self.num_layers,
dec_cell.get_initial_states(
batch_ref=self.feature,
shape=[self.hidden_size])]
tar_vocab_size = len(self._label_list)
tar_embeder = lambda x: fluid.embedding(
input=x,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册