Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
b25dfb58
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 1 年 前同步成功
通知
282
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b25dfb58
编写于
3月 28, 2019
作者:
W
wuzewu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add cv reader and dataset
上级
08ef63b6
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
232 addition
and
0 deletion
+232
-0
paddle_hub/dataset/base_cv_dataset.py
paddle_hub/dataset/base_cv_dataset.py
+78
-0
paddle_hub/dataset/cv_reader.py
paddle_hub/dataset/cv_reader.py
+84
-0
paddle_hub/dataset/dogcat.py
paddle_hub/dataset/dogcat.py
+35
-0
paddle_hub/dataset/flowers.py
paddle_hub/dataset/flowers.py
+35
-0
未找到文件。
paddle_hub/dataset/base_cv_dataset.py
0 → 100644
浏览文件 @
b25dfb58
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
paddle_hub
as
hub
from
paddle_hub.tools.downloader
import
default_downloader
class
ImageClassificationDataset
:
def
__init__
(
self
):
self
.
base_path
=
None
self
.
train_list_file
=
None
self
.
test_list_file
=
None
self
.
validate_list_file
=
None
self
.
num_labels
=
0
def
_download_dataset
(
self
,
dataset_path
,
url
):
if
not
os
.
path
.
exists
(
dataset_path
):
result
,
tips
,
dataset_path
=
default_downloader
.
download_file_and_uncompress
(
url
=
url
,
save_path
=
hub
.
dir
.
DATA_HOME
,
print_progress
=
True
,
replace
=
True
)
if
not
result
:
print
(
tips
)
exit
()
return
dataset_path
def
_parse_data
(
self
,
data_path
,
shuffle
=
False
):
def
_base_reader
():
data
=
[]
with
open
(
data_path
,
"r"
)
as
file
:
while
True
:
line
=
file
.
readline
()
if
not
line
:
break
line
=
line
.
strip
()
items
=
line
.
split
(
" "
)
image_path
=
os
.
path
.
join
(
self
.
base_path
,
items
[
0
])
label
=
items
[
1
]
data
.
append
((
image_path
,
items
[
1
]))
if
shuffle
:
np
.
random
.
shuffle
(
data
)
for
item
in
data
:
yield
item
return
_base_reader
()
def
train_data
(
self
,
shuffle
=
True
):
train_data_path
=
os
.
path
.
join
(
self
.
base_path
,
self
.
train_list_file
)
return
self
.
_parse_data
(
train_data_path
,
shuffle
)
def
test_data
(
self
,
shuffle
=
False
):
test_data_path
=
os
.
path
.
join
(
self
.
base_path
,
self
.
test_list_file
)
return
self
.
_parse_data
(
test_data_path
,
shuffle
)
def
validate_data
(
self
,
shuffle
=
False
):
validate_data_path
=
os
.
path
.
join
(
self
.
base_path
,
self
.
validate_list_file
)
return
self
.
_parse_data
(
validate_data_path
,
shuffle
)
paddle_hub/dataset/cv_reader.py
0 → 100644
浏览文件 @
b25dfb58
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
from
PIL
import
Image
import
paddle_hub.io.augmentation
as
image_augmentation
color_mode_dict
=
{
"RGB"
:
[
0
,
1
,
2
],
"RBG"
:
[
0
,
2
,
1
],
"GBR"
:
[
1
,
2
,
0
],
"GRB"
:
[
1
,
0
,
2
],
"BGR"
:
[
2
,
1
,
0
],
"BRG"
:
[
2
,
0
,
1
]
}
class
ImageClassificationReader
:
def
__init__
(
self
,
image_width
,
image_height
,
dataset
,
color_mode
=
"RGB"
,
data_augmentation
=
False
):
self
.
image_width
=
image_width
self
.
image_height
=
image_height
self
.
color_mode
=
color_mode
self
.
dataset
=
dataset
self
.
data_augmentation
=
data_augmentation
if
self
.
color_mode
not
in
color_mode_dict
:
raise
ValueError
(
"Color_mode should in %s."
%
color_mode_dict
.
keys
())
if
self
.
image_width
<=
0
or
self
.
image_height
<=
0
:
raise
ValueError
(
"Image width and height should not be negative."
)
def
data_generator
(
self
,
phase
,
shuffle
=
False
):
if
phase
==
"train"
:
data
=
self
.
dataset
.
train_data
(
shuffle
)
elif
phase
==
"test"
:
shuffle
=
False
data
=
self
.
dataset
.
test_data
(
shuffle
)
elif
phase
==
"validate"
:
shuffle
=
False
data
=
self
.
dataset
.
validate_data
(
shuffle
)
def
_data_reader
():
for
image_path
,
label
in
data
:
image
=
Image
.
open
(
image_path
)
image
=
image_augmentation
.
image_resize
(
image
,
self
.
image_width
,
self
.
image_height
)
if
self
.
data_augmentation
:
image
=
image_augmentation
.
image_random_process
(
image
,
enable_resize
=
False
)
# only support RGB
image
=
image
.
convert
(
'RGB'
)
# HWC to CHW
image
=
np
.
array
(
image
)
if
len
(
image
.
shape
)
==
3
:
image
=
np
.
swapaxes
(
image
,
1
,
2
)
image
=
np
.
swapaxes
(
image
,
1
,
0
)
image
=
image
[
color_mode_dict
[
self
.
color_mode
],
:,
:]
yield
((
image
,
label
))
return
_data_reader
paddle_hub/dataset/dogcat.py
0 → 100644
浏览文件 @
b25dfb58
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
paddle_hub
as
hub
from
paddle_hub.dataset.base_cv_dataset
import
ImageClassificationDataset
class
DogCatDataset
(
ImageClassificationDataset
):
def
__init__
(
self
):
super
(
DogCatDataset
,
self
).
__init__
()
dataset_path
=
os
.
path
.
join
(
hub
.
dir
.
DATA_HOME
,
"dog-cat"
)
self
.
base_path
=
self
.
_download_dataset
(
dataset_path
=
dataset_path
,
url
=
"https://paddlehub-dataset.bj.bcebos.com/dog-cat.tar.gz"
)
self
.
train_list_file
=
"train_list.txt"
self
.
test_list_file
=
"test_list.txt"
self
.
validate_list_file
=
"validate_list.txt"
self
.
num_labels
=
2
paddle_hub/dataset/flowers.py
0 → 100644
浏览文件 @
b25dfb58
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
paddle_hub
as
hub
from
paddle_hub.dataset.base_cv_dataset
import
ImageClassificationDataset
class
FlowersDataset
(
ImageClassificationDataset
):
def
__init__
(
self
):
super
(
FlowersDataset
,
self
).
__init__
()
dataset_path
=
os
.
path
.
join
(
hub
.
dir
.
DATA_HOME
,
"flower_photos"
)
self
.
base_path
=
self
.
_download_dataset
(
dataset_path
=
dataset_path
,
url
=
"https://paddlehub-dataset.bj.bcebos.com/flower_photos.tar.gz"
)
self
.
train_list_file
=
"train_list.txt"
self
.
test_list_file
=
"test_list.txt"
self
.
validate_list_file
=
"validate_list.txt"
self
.
num_labels
=
5
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录