未验证 提交 2c825d54 编写于 作者: K kinghuin 提交者: GitHub

ready 1.8 (#745)

上级 95395059
#coding:utf-8
import argparse
import os
import ast
import paddle.fluid as fluid
import paddlehub as hub
import numpy as np
from paddlehub.reader.cv_reader import ObjectDetectionReader
from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset
from paddlehub.contrib.ppdet.utils.coco_eval import bbox2out
from paddlehub.common.detection_config import get_model_type, get_feed_list, get_mid_feature
from paddlehub.common import detection_config as dconf
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for predict.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.")
parser.add_argument("--batch_size", type=int, default=2, help="Total examples' number in batch for training.")
parser.add_argument("--module", type=str, default="ssd", help="Module used as a feature extractor.")
parser.add_argument("--dataset", type=str, default="coco10", help="Dataset to finetune.")
# yapf: enable.
module_map = {
"yolov3": "yolov3_darknet53_coco2017",
"ssd": "ssd_vgg16_512_coco2017",
"faster_rcnn": "faster_rcnn_resnet50_coco2017",
}
def predict(args):
module_name = args.module # 'yolov3_darknet53_coco2017'
model_type = get_model_type(module_name) # 'yolo'
# define data
ds = hub.dataset.Coco10(model_type)
print("ds.num_labels", ds.num_labels)
data_reader = ObjectDetectionReader(dataset=ds, model_type=model_type)
# define model(program)
module = hub.Module(name=module_name)
if model_type == 'rcnn':
input_dict, output_dict, program = module.context(
trainable=True, phase='train')
input_dict_pred, output_dict_pred, program_pred = module.context(
trainable=False)
else:
input_dict, output_dict, program = module.context(trainable=True)
input_dict_pred = output_dict_pred = None
feed_list, pred_feed_list = get_feed_list(module_name, input_dict,
input_dict_pred)
feature, pred_feature = get_mid_feature(module_name, output_dict,
output_dict_pred)
config = hub.RunConfig(
use_data_parallel=False,
use_pyreader=True,
use_cuda=args.use_gpu,
batch_size=args.batch_size,
enable_memory_optim=False,
checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
task = hub.DetectionTask(
data_reader=data_reader,
num_classes=ds.num_labels,
feed_list=feed_list,
feature=feature,
predict_feed_list=pred_feed_list,
predict_feature=pred_feature,
model_type=model_type,
config=config)
data = [
"./test/test_img_bird.jpg",
"./test/test_img_cat.jpg",
]
label_map = ds.label_dict()
run_states = task.predict(data=data, accelerate_mode=False)
results = [run_state.run_results for run_state in run_states]
for outs in results:
keys = ['im_shape', 'im_id', 'bbox']
res = {
k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(keys, outs)
}
print("im_id", res['im_id'])
is_bbox_normalized = dconf.conf[model_type]['is_bbox_normalized']
clsid2catid = {}
for k in label_map:
clsid2catid[k] = k
bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized)
print(bbox_results)
if __name__ == "__main__":
args = parser.parse_args()
if not args.module in module_map:
hub.logger.error("module should in %s" % module_map.keys())
exit(1)
args.module = module_map[args.module]
predict(args)
# -*- coding:utf8 -*-
import argparse
import os
import ast
import paddle.fluid as fluid
import paddlehub as hub
from paddlehub.reader.cv_reader import ObjectDetectionReader
from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset
import numpy as np
from paddlehub.common.detection_config import get_model_type, get_feed_list, get_mid_feature
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=50, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for fine-tuning.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.")
parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.")
parser.add_argument("--module", type=str, default="ssd", help="Module used as feature extractor.")
parser.add_argument("--dataset", type=str, default="coco_10", help="Dataset to finetune.")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
# yapf: enable.
module_map = {
"yolov3": "yolov3_darknet53_coco2017",
"ssd": "ssd_vgg16_512_coco2017",
"faster_rcnn": "faster_rcnn_resnet50_coco2017",
}
def finetune(args):
module_name = args.module # 'yolov3_darknet53_coco2017'
model_type = get_model_type(module_name) # 'yolo'
# define dataset
ds = hub.dataset.Coco10(model_type)
# base_path = '/home/local3/zhaopenghao/data/detect/paddle-job-84942-0'
# train_dir = 'train_data/images'
# train_list = 'train_data/coco/instances_coco.json'
# val_dir = 'eval_data/images'
# val_list = 'eval_data/coco/instances_coco.json'
# ds = ObjectDetectionDataset(base_path, train_dir, train_list, val_dir, val_list, val_dir, val_list, model_type=model_type)
# print(ds.label_dict())
print("ds.num_labels", ds.num_labels)
# define batch reader
data_reader = ObjectDetectionReader(dataset=ds, model_type=model_type)
# define model(program)
module = hub.Module(name=module_name)
if model_type == 'rcnn':
input_dict, output_dict, program = module.context(
trainable=True, phase='train')
input_dict_pred, output_dict_pred, program_pred = module.context(
trainable=False)
else:
input_dict, output_dict, program = module.context(trainable=True)
input_dict_pred = output_dict_pred = None
print("input_dict keys", input_dict.keys())
print("output_dict keys", output_dict.keys())
feed_list, pred_feed_list = get_feed_list(module_name, input_dict,
input_dict_pred)
print("output_dict length:", len(output_dict))
print(output_dict.keys())
if output_dict_pred is not None:
print(output_dict_pred.keys())
feature, pred_feature = get_mid_feature(module_name, output_dict,
output_dict_pred)
config = hub.RunConfig(
log_interval=10,
eval_interval=100,
use_data_parallel=args.use_data_parallel,
use_pyreader=True,
use_cuda=args.use_gpu,
num_epoch=args.num_epoch,
batch_size=args.batch_size,
enable_memory_optim=False,
checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy(
learning_rate=0.00025, optimizer_name="adam"))
task = hub.DetectionTask(
data_reader=data_reader,
num_classes=ds.num_labels,
feed_list=feed_list,
feature=feature,
predict_feed_list=pred_feed_list,
predict_feature=pred_feature,
model_type=model_type,
config=config)
task.finetune_and_eval()
if __name__ == "__main__":
args = parser.parse_args()
if not args.module in module_map:
hub.logger.error("module should in %s" % module_map.keys())
exit(1)
args.module = module_map[args.module]
finetune(args)
...@@ -38,4 +38,3 @@ from .flowers import FlowersDataset as Flowers ...@@ -38,4 +38,3 @@ from .flowers import FlowersDataset as Flowers
from .stanford_dogs import StanfordDogsDataset as StanfordDogs from .stanford_dogs import StanfordDogsDataset as StanfordDogs
from .food101 import Food101Dataset as Food101 from .food101 import Food101Dataset as Food101
from .indoor67 import Indoor67Dataset as Indoor67 from .indoor67 import Indoor67Dataset as Indoor67
from .coco10 import Coco10
#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import paddlehub as hub
from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset
class Coco10(ObjectDetectionDataset):
def __init__(self, model_type='ssd'):
dataset_path = os.path.join(hub.common.dir.DATA_HOME, "coco_10")
# self.base_path = self._download_dataset(
# dataset_path=dataset_path,
# url="https://bj.bcebos.com/paddlehub-dataset/dog-cat.tar.gz")
base_path = dataset_path
train_image_dir = 'val'
train_list_file = 'annotations/val.json'
validate_image_dir = 'val'
validate_list_file = 'annotations/val.json'
test_image_dir = 'val'
test_list_file = 'annotations/val.json'
super(Coco10, self).__init__(
base_path, train_image_dir, train_list_file, validate_image_dir,
validate_list_file, test_image_dir, test_list_file, model_type)
...@@ -106,6 +106,7 @@ class BaseDataset(object): ...@@ -106,6 +106,7 @@ class BaseDataset(object):
"As label_list has been assigned, label_file is noneffective" "As label_list has been assigned, label_file is noneffective"
) )
if self.label_list:
self.label_index = dict( self.label_index = dict(
zip(self.label_list, range(len(self.label_list)))) zip(self.label_list, range(len(self.label_list))))
......
...@@ -35,7 +35,7 @@ import paddle.fluid as fluid ...@@ -35,7 +35,7 @@ import paddle.fluid as fluid
from visualdl import LogWriter from visualdl import LogWriter
import paddlehub as hub import paddlehub as hub
from paddlehub.reader.nlp_reader import BaseNLPReader from paddlehub.reader.nlp_reader import BaseReader, BaseNLPReader
from paddlehub.common.paddle_helper import dtype_map, clone_program from paddlehub.common.paddle_helper import dtype_map, clone_program
from paddlehub.common.utils import mkdir from paddlehub.common.utils import mkdir
from paddlehub.common.dir import tmp_dir from paddlehub.common.dir import tmp_dir
...@@ -350,8 +350,9 @@ class BaseTask(object): ...@@ -350,8 +350,9 @@ class BaseTask(object):
self._base_data_reader = data_reader self._base_data_reader = data_reader
self._base_feed_list = feed_list self._base_feed_list = feed_list
if isinstance(data_reader, BaseNLPReader): if isinstance(data_reader, BaseReader):
self._compatible_mode = True self._compatible_mode = True
if isinstance(data_reader, BaseNLPReader):
logger.warning( logger.warning(
"PaddleHub v1.8 has deprecated the reader and feed_list parameters in the nlp Task. We provided an easier usage, " "PaddleHub v1.8 has deprecated the reader and feed_list parameters in the nlp Task. We provided an easier usage, "
"in which you can use your tokenizer to preprocess dataset and run task in a clear flow. " "in which you can use your tokenizer to preprocess dataset and run task in a clear flow. "
......
...@@ -196,7 +196,6 @@ class TextClassifierTask(ClassifierTask): ...@@ -196,7 +196,6 @@ class TextClassifierTask(ClassifierTask):
feature(Variable): the `feature` will be used to classify texts. It must be the sentence-level feature, shape as [-1, emb_size]. `Token_feature` and `feature` couldn't be setted at the same time. One of them must be setted as not None. Default None. feature(Variable): the `feature` will be used to classify texts. It must be the sentence-level feature, shape as [-1, emb_size]. `Token_feature` and `feature` couldn't be setted at the same time. One of them must be setted as not None. Default None.
token_feature(Variable): the `feature` will be used to connect the pre-defined network. It must be the token-level feature, shape as [-1, seq_len, emb_size]. Default None. token_feature(Variable): the `feature` will be used to connect the pre-defined network. It must be the token-level feature, shape as [-1, seq_len, emb_size]. Default None.
network(str): the pre-defined network. Choices: 'bilstm', 'bow', 'cnn', 'dpcnn', 'gru' and 'lstm'. Default None. If network is setted, then `token_feature` must be setted and `feature` must be None. network(str): the pre-defined network. Choices: 'bilstm', 'bow', 'cnn', 'dpcnn', 'gru' and 'lstm'. Default None. If network is setted, then `token_feature` must be setted and `feature` must be None.
main_program (object): the customized main program, default None.
startup_program (object): the customized startup program, default None. startup_program (object): the customized startup program, default None.
config (RunConfig): run config for the task, such as batch_size, epoch, learning_rate setting and so on. Default None. config (RunConfig): run config for the task, such as batch_size, epoch, learning_rate setting and so on. Default None.
hidden_units(list): the element of `hidden_units` list is the full-connect layer size. It will add the full-connect layers to the program. Default None. hidden_units(list): the element of `hidden_units` list is the full-connect layer size. It will add the full-connect layers to the program. Default None.
......
...@@ -164,21 +164,6 @@ class TextGenerationTask(BaseTask): ...@@ -164,21 +164,6 @@ class TextGenerationTask(BaseTask):
# Define decoder and initialize it. # Define decoder and initialize it.
dec_cell = AttentionDecoderCell(self.num_layers, self.hidden_size, dec_cell = AttentionDecoderCell(self.num_layers, self.hidden_size,
self.dropout) self.dropout)
enc_last_step = fluid.layers.slice(
self.token_feature,
axes=[1],
starts=[-1],
ends=[self.token_feature.shape[1] + 1])
dec_init_cell = fluid.layers.fc(
input=enc_last_step,
size=self.hidden_size,
num_flatten_dims=1,
param_attr=fluid.ParamAttr(
name="dec_init_cell_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="dec_init_cell_b",
initializer=fluid.initializer.Constant(0.)))
dec_init_hidden = fluid.layers.fc( dec_init_hidden = fluid.layers.fc(
input=self.feature, input=self.feature,
size=self.hidden_size, size=self.hidden_size,
...@@ -189,12 +174,14 @@ class TextGenerationTask(BaseTask): ...@@ -189,12 +174,14 @@ class TextGenerationTask(BaseTask):
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
name="dec_init_hidden_b", name="dec_init_hidden_b",
initializer=fluid.initializer.Constant(0.))) initializer=fluid.initializer.Constant(0.)))
# TODO: maybe dec_init_hidden can use self.feature, and dec_init_cell can be get_initial_states dec_initial_states = [[[
dec_initial_states = [ dec_init_hidden,
[[dec_init_hidden, dec_init_cell]] * self.num_layers,
dec_cell.get_initial_states( dec_cell.get_initial_states(
batch_ref=self.token_feature, shape=[self.hidden_size]) batch_ref=self.feature, shape=[self.hidden_size])
] ]] * self.num_layers,
dec_cell.get_initial_states(
batch_ref=self.feature,
shape=[self.hidden_size])]
tar_vocab_size = len(self._label_list) tar_vocab_size = len(self._label_list)
tar_embeder = lambda x: fluid.embedding( tar_embeder = lambda x: fluid.embedding(
input=x, input=x,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册