Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
b92e7760
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b92e7760
编写于
8月 18, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 18, 2020
浏览文件
操作
浏览文件
下载
差异文件
!4208 C++ API Support for Manifest Dataset
Merge pull request !4208 from jiangzhiwen/jzw/c_api_manifest
上级
1d55e4e3
a88273bd
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
304 addition
and
0 deletion
+304
-0
mindspore/ccsrc/minddata/dataset/api/datasets.cc
mindspore/ccsrc/minddata/dataset/api/datasets.cc
+57
-0
mindspore/ccsrc/minddata/dataset/include/datasets.h
mindspore/ccsrc/minddata/dataset/include/datasets.h
+41
-0
tests/ut/cpp/dataset/c_api_dataset_manifest_test.cc
tests/ut/cpp/dataset/c_api_dataset_manifest_test.cc
+206
-0
未找到文件。
mindspore/ccsrc/minddata/dataset/api/datasets.cc
浏览文件 @
b92e7760
...
...
@@ -26,6 +26,7 @@
#include "minddata/dataset/engine/datasetops/source/clue_op.h"
#include "minddata/dataset/engine/datasetops/source/coco_op.h"
#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
#include "minddata/dataset/engine/datasetops/source/manifest_op.h"
#include "minddata/dataset/engine/datasetops/source/mnist_op.h"
#include "minddata/dataset/engine/datasetops/source/text_file_op.h"
#include "minddata/dataset/engine/datasetops/source/voc_op.h"
...
...
@@ -44,6 +45,7 @@
// Sampler headers (in alphabetical order)
#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
#include "minddata/dataset/core/config_manager.h"
#include "minddata/dataset/util/random.h"
...
...
@@ -164,6 +166,16 @@ std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir,
return
ds
->
ValidateParams
()
?
ds
:
nullptr
;
}
// Function to create a ManifestDataset.
std
::
shared_ptr
<
ManifestDataset
>
Manifest
(
std
::
string
dataset_file
,
std
::
string
usage
,
std
::
shared_ptr
<
SamplerObj
>
sampler
,
const
std
::
map
<
std
::
string
,
int32_t
>
&
class_indexing
,
bool
decode
)
{
auto
ds
=
std
::
make_shared
<
ManifestDataset
>
(
dataset_file
,
usage
,
sampler
,
class_indexing
,
decode
);
// Call derived class validation method.
return
ds
->
ValidateParams
()
?
ds
:
nullptr
;
}
// Function to create a MnistDataset.
std
::
shared_ptr
<
MnistDataset
>
Mnist
(
const
std
::
string
&
dataset_dir
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
)
{
auto
ds
=
std
::
make_shared
<
MnistDataset
>
(
dataset_dir
,
sampler
);
...
...
@@ -877,6 +889,51 @@ std::vector<std::shared_ptr<DatasetOp>> ImageFolderDataset::Build() {
return
node_ops
;
}
ManifestDataset
::
ManifestDataset
(
std
::
string
dataset_file
,
std
::
string
usage
,
std
::
shared_ptr
<
SamplerObj
>
sampler
,
const
std
::
map
<
std
::
string
,
int32_t
>
&
class_indexing
,
bool
decode
)
:
dataset_file_
(
dataset_file
),
usage_
(
usage
),
decode_
(
decode
),
class_index_
(
class_indexing
),
sampler_
(
sampler
)
{}
bool
ManifestDataset
::
ValidateParams
()
{
Path
manifest_file
(
dataset_file_
);
if
(
!
manifest_file
.
Exists
())
{
MS_LOG
(
ERROR
)
<<
"dataset file: ["
<<
dataset_file_
<<
"] is invalid or not exist"
;
return
false
;
}
std
::
vector
<
std
::
string
>
usage_list
=
{
"train"
,
"eval"
,
"inference"
};
if
(
find
(
usage_list
.
begin
(),
usage_list
.
end
(),
usage_
)
==
usage_list
.
end
())
{
MS_LOG
(
ERROR
)
<<
"usage should be train, eval or inference."
;
return
false
;
}
return
true
;
}
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
ManifestDataset
::
Build
()
{
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if
(
sampler_
==
nullptr
)
{
sampler_
=
CreateDefaultSampler
();
}
// Do internal Schema generation.
auto
schema
=
std
::
make_unique
<
DataSchema
>
();
RETURN_EMPTY_IF_ERROR
(
schema
->
AddColumn
(
ColDescriptor
(
"image"
,
DataType
(
DataType
::
DE_UINT8
),
TensorImpl
::
kCv
,
1
)));
TensorShape
scalar
=
TensorShape
::
CreateScalar
();
RETURN_EMPTY_IF_ERROR
(
schema
->
AddColumn
(
ColDescriptor
(
"label"
,
DataType
(
DataType
::
DE_UINT32
),
TensorImpl
::
kFlexible
,
0
,
&
scalar
)));
std
::
shared_ptr
<
ManifestOp
>
manifest_op
;
manifest_op
=
std
::
make_shared
<
ManifestOp
>
(
num_workers_
,
rows_per_buffer_
,
dataset_file_
,
connector_que_size_
,
decode_
,
class_index_
,
std
::
move
(
schema
),
std
::
move
(
sampler_
->
Build
()),
usage_
);
node_ops
.
push_back
(
manifest_op
);
return
node_ops
;
}
MnistDataset
::
MnistDataset
(
std
::
string
dataset_dir
,
std
::
shared_ptr
<
SamplerObj
>
sampler
)
:
dataset_dir_
(
dataset_dir
),
sampler_
(
sampler
)
{}
...
...
mindspore/ccsrc/minddata/dataset/include/datasets.h
浏览文件 @
b92e7760
...
...
@@ -49,6 +49,7 @@ class Cifar100Dataset;
class
CLUEDataset
;
class
CocoDataset
;
class
ImageFolderDataset
;
class
ManifestDataset
;
class
MnistDataset
;
class
TextFileDataset
;
class
VOCDataset
;
...
...
@@ -154,6 +155,21 @@ std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir,
const
std
::
set
<
std
::
string
>
&
extensions
=
{},
const
std
::
map
<
std
::
string
,
int32_t
>
&
class_indexing
=
{});
/// \brief Function to create a ManifestDataset
/// \notes The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_file The dataset file to be read
/// \param[in] usage Need "train", "eval" or "inference" data (default="train")
/// \param[in] decode Decode the images after reading (default=false).
/// \param[in] class_indexing A str-to-int mapping from label name to index (default={}, the folder
/// names will be sorted alphabetically and each class will be given a unique index starting from 0).
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
/// A `RandomSampler` will be used to randomly iterate the entire dataset
/// \return Shared pointer to the current ManifestDataset
std
::
shared_ptr
<
ManifestDataset
>
Manifest
(
std
::
string
dataset_file
,
std
::
string
usage
=
"train"
,
std
::
shared_ptr
<
SamplerObj
>
sampler
=
nullptr
,
const
std
::
map
<
std
::
string
,
int32_t
>
&
class_indexing
=
{},
bool
decode
=
false
);
/// \brief Function to create a MnistDataset
/// \notes The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_dir Path to the root directory that contains the dataset
...
...
@@ -500,6 +516,31 @@ class ImageFolderDataset : public Dataset {
std
::
set
<
std
::
string
>
exts_
;
};
class
ManifestDataset
:
public
Dataset
{
public:
/// \brief Constructor
ManifestDataset
(
std
::
string
dataset_file
,
std
::
string
usage
,
std
::
shared_ptr
<
SamplerObj
>
sampler
,
const
std
::
map
<
std
::
string
,
int32_t
>
&
class_indexing
,
bool
decode
);
/// \brief Destructor
~
ManifestDataset
()
=
default
;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return The list of shared pointers to the newly created DatasetOps
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
Build
()
override
;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool
ValidateParams
()
override
;
private:
std
::
string
dataset_file_
;
std
::
string
usage_
;
bool
decode_
;
std
::
map
<
std
::
string
,
int32_t
>
class_index_
;
std
::
shared_ptr
<
SamplerObj
>
sampler_
;
};
class
MnistDataset
:
public
Dataset
{
public:
/// \brief Constructor
...
...
tests/ut/cpp/dataset/c_api_dataset_manifest_test.cc
0 → 100644
浏览文件 @
b92e7760
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common/common.h"
#include "minddata/dataset/include/datasets.h"
using
namespace
mindspore
::
dataset
::
api
;
using
mindspore
::
dataset
::
Tensor
;
class
MindDataTestPipeline
:
public
UT
::
DatasetOpTesting
{
protected:
};
TEST_F
(
MindDataTestPipeline
,
TestManifestBasic
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestManifestBasic."
;
std
::
string
file_path
=
datasets_root_path_
+
"/testManifestData/cpp.json"
;
// Create a Manifest Dataset
std
::
shared_ptr
<
Dataset
>
ds
=
Manifest
(
file_path
);
EXPECT_NE
(
ds
,
nullptr
);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std
::
shared_ptr
<
Iterator
>
iter
=
ds
->
CreateIterator
();
EXPECT_NE
(
iter
,
nullptr
);
// Iterate the dataset and get each row
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Tensor
>>
row
;
iter
->
GetNextRow
(
&
row
);
uint64_t
i
=
0
;
while
(
row
.
size
()
!=
0
)
{
i
++
;
auto
image
=
row
[
"image"
];
MS_LOG
(
INFO
)
<<
"Tensor image shape: "
<<
image
->
shape
();
iter
->
GetNextRow
(
&
row
);
}
EXPECT_EQ
(
i
,
2
);
// Manually terminate the pipeline
iter
->
Stop
();
}
TEST_F
(
MindDataTestPipeline
,
TestManifestDecode
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestManifestDecode."
;
std
::
string
file_path
=
datasets_root_path_
+
"/testManifestData/cpp.json"
;
// Create a Manifest Dataset
std
::
shared_ptr
<
Dataset
>
ds
=
Manifest
(
file_path
,
"train"
,
nullptr
,
{},
true
);
EXPECT_NE
(
ds
,
nullptr
);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std
::
shared_ptr
<
Iterator
>
iter
=
ds
->
CreateIterator
();
EXPECT_NE
(
iter
,
nullptr
);
// Iterate the dataset and get each row
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Tensor
>>
row
;
iter
->
GetNextRow
(
&
row
);
uint64_t
i
=
0
;
while
(
row
.
size
()
!=
0
)
{
i
++
;
auto
image
=
row
[
"image"
];
auto
shape
=
image
->
shape
();
MS_LOG
(
INFO
)
<<
"Tensor image shape size: "
<<
shape
.
Size
();
MS_LOG
(
INFO
)
<<
"Tensor image shape: "
<<
image
->
shape
();
EXPECT_GT
(
shape
.
Size
(),
1
);
// Verify decode=true took effect
iter
->
GetNextRow
(
&
row
);
}
EXPECT_EQ
(
i
,
2
);
// Manually terminate the pipeline
iter
->
Stop
();
}
TEST_F
(
MindDataTestPipeline
,
TestManifestEval
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestManifestEval."
;
std
::
string
file_path
=
datasets_root_path_
+
"/testManifestData/cpp.json"
;
// Create a Manifest Dataset
std
::
shared_ptr
<
Dataset
>
ds
=
Manifest
(
file_path
,
"eval"
);
EXPECT_NE
(
ds
,
nullptr
);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std
::
shared_ptr
<
Iterator
>
iter
=
ds
->
CreateIterator
();
EXPECT_NE
(
iter
,
nullptr
);
// Iterate the dataset and get each row
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Tensor
>>
row
;
iter
->
GetNextRow
(
&
row
);
uint64_t
i
=
0
;
while
(
row
.
size
()
!=
0
)
{
i
++
;
auto
image
=
row
[
"image"
];
MS_LOG
(
INFO
)
<<
"Tensor image shape: "
<<
image
->
shape
();
iter
->
GetNextRow
(
&
row
);
}
EXPECT_EQ
(
i
,
1
);
// Manually terminate the pipeline
iter
->
Stop
();
}
TEST_F
(
MindDataTestPipeline
,
TestManifestClassIndex
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestManifestClassIndex."
;
std
::
string
file_path
=
datasets_root_path_
+
"/testManifestData/cpp.json"
;
std
::
map
<
std
::
string
,
int32_t
>
map
;
map
[
"cat"
]
=
111
;
// forward slash is not good, but we need to add this somewhere, also in windows, its a '\'
map
[
"dog"
]
=
222
;
// forward slash is not good, but we need to add this somewhere, also in windows, its a '\'
map
[
"wrong folder name"
]
=
1234
;
// this is skipped
std
::
vector
<
int
>
expected_label
=
{
111
,
222
};
// Create a Manifest Dataset
std
::
shared_ptr
<
Dataset
>
ds
=
Manifest
(
file_path
,
"train"
,
nullptr
,
map
,
true
);
EXPECT_NE
(
ds
,
nullptr
);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std
::
shared_ptr
<
Iterator
>
iter
=
ds
->
CreateIterator
();
EXPECT_NE
(
iter
,
nullptr
);
// Iterate the dataset and get each row
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Tensor
>>
row
;
iter
->
GetNextRow
(
&
row
);
uint64_t
i
=
0
;
int32_t
label_idx
=
0
;
while
(
row
.
size
()
!=
0
)
{
i
++
;
auto
image
=
row
[
"image"
];
MS_LOG
(
INFO
)
<<
"Tensor image shape: "
<<
image
->
shape
();
row
[
"label"
]
->
GetItemAt
<
int32_t
>
(
&
label_idx
,
{});
MS_LOG
(
INFO
)
<<
"Tensor label value: "
<<
label_idx
;
auto
label_it
=
std
::
find
(
expected_label
.
begin
(),
expected_label
.
end
(),
label_idx
);
EXPECT_NE
(
label_it
,
expected_label
.
end
());
iter
->
GetNextRow
(
&
row
);
}
EXPECT_EQ
(
i
,
2
);
// Manually terminate the pipeline
iter
->
Stop
();
}
TEST_F
(
MindDataTestPipeline
,
TestManifestNumSamplers
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestManifestNumSamplers."
;
std
::
string
file_path
=
datasets_root_path_
+
"/testManifestData/cpp.json"
;
// Create a Manifest Dataset
std
::
shared_ptr
<
Dataset
>
ds
=
Manifest
(
file_path
,
"train"
,
SequentialSampler
(
0
,
1
),
{},
true
);
EXPECT_NE
(
ds
,
nullptr
);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std
::
shared_ptr
<
Iterator
>
iter
=
ds
->
CreateIterator
();
EXPECT_NE
(
iter
,
nullptr
);
// Iterate the dataset and get each row
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Tensor
>>
row
;
iter
->
GetNextRow
(
&
row
);
uint64_t
i
=
0
;
while
(
row
.
size
()
!=
0
)
{
i
++
;
auto
image
=
row
[
"image"
];
MS_LOG
(
INFO
)
<<
"Tensor image shape: "
<<
image
->
shape
();
iter
->
GetNextRow
(
&
row
);
}
EXPECT_EQ
(
i
,
1
);
// Manually terminate the pipeline
iter
->
Stop
();
}
TEST_F
(
MindDataTestPipeline
,
TestManifestError
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestManifestError."
;
std
::
string
file_path
=
datasets_root_path_
+
"/testManifestData/cpp.json"
;
// Create a Manifest Dataset with not exist file
std
::
shared_ptr
<
Dataset
>
ds0
=
Manifest
(
"NotExistFile"
,
"train"
);
EXPECT_EQ
(
ds0
,
nullptr
);
// Create a Manifest Dataset with invalid usage
std
::
shared_ptr
<
Dataset
>
ds1
=
Manifest
(
file_path
,
"invalid_usage"
);
EXPECT_EQ
(
ds1
,
nullptr
);
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录