Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2301_77200941
mindspore
提交
716329df
M
mindspore
项目概览
2301_77200941
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
716329df
编写于
8月 27, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 27, 2020
浏览文件
操作
浏览文件
下载
差异文件
!5278 Change default value of sampler in c-api
Merge pull request !5278 from luoyang/c-api
上级
7aaf4118
419478b4
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
356 addition
and
127 deletion
+356
-127
mindspore/ccsrc/minddata/dataset/api/datasets.cc
mindspore/ccsrc/minddata/dataset/api/datasets.cc
+104
-79
mindspore/ccsrc/minddata/dataset/include/datasets.h
mindspore/ccsrc/minddata/dataset/include/datasets.h
+36
-36
tests/ut/cpp/dataset/c_api_dataset_album_test.cc
tests/ut/cpp/dataset/c_api_dataset_album_test.cc
+23
-1
tests/ut/cpp/dataset/c_api_dataset_cifar_test.cc
tests/ut/cpp/dataset/c_api_dataset_cifar_test.cc
+30
-0
tests/ut/cpp/dataset/c_api_dataset_coco_test.cc
tests/ut/cpp/dataset/c_api_dataset_coco_test.cc
+11
-0
tests/ut/cpp/dataset/c_api_dataset_csv_test.cc
tests/ut/cpp/dataset/c_api_dataset_csv_test.cc
+11
-0
tests/ut/cpp/dataset/c_api_dataset_manifest_test.cc
tests/ut/cpp/dataset/c_api_dataset_manifest_test.cc
+11
-2
tests/ut/cpp/dataset/c_api_dataset_ops_test.cc
tests/ut/cpp/dataset/c_api_dataset_ops_test.cc
+46
-0
tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc
tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc
+25
-1
tests/ut/cpp/dataset/c_api_dataset_voc_test.cc
tests/ut/cpp/dataset/c_api_dataset_voc_test.cc
+10
-0
tests/ut/cpp/dataset/c_api_datasets_test.cc
tests/ut/cpp/dataset/c_api_datasets_test.cc
+49
-8
未找到文件。
mindspore/ccsrc/minddata/dataset/api/datasets.cc
浏览文件 @
716329df
...
...
@@ -201,8 +201,8 @@ std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir,
}
// Function to create a ManifestDataset.
std
::
shared_ptr
<
ManifestDataset
>
Manifest
(
std
::
string
dataset_file
,
std
::
string
usage
,
std
::
shared_ptr
<
SamplerObj
>
sampler
,
std
::
shared_ptr
<
ManifestDataset
>
Manifest
(
const
std
::
string
&
dataset_file
,
const
std
::
string
&
usage
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
,
const
std
::
map
<
std
::
string
,
int32_t
>
&
class_indexing
,
bool
decode
)
{
auto
ds
=
std
::
make_shared
<
ManifestDataset
>
(
dataset_file
,
usage
,
sampler
,
class_indexing
,
decode
);
...
...
@@ -590,13 +590,6 @@ bool SchemaObj::from_json(nlohmann::json json_obj) {
// OTHER FUNCTIONS
// Helper function to create default RandomSampler.
std
::
shared_ptr
<
SamplerObj
>
CreateDefaultSampler
()
{
const
int32_t
num_samples
=
0
;
// 0 means to sample all ids.
bool
replacement
=
false
;
return
std
::
make_shared
<
RandomSamplerObj
>
(
replacement
,
num_samples
);
}
// Helper function to compute a default shuffle size
Status
ComputeShuffleSize
(
int64_t
num_files
,
int64_t
num_devices
,
int64_t
num_rows
,
int64_t
total_rows
,
int64_t
*
shuffle_size
)
{
...
...
@@ -692,6 +685,36 @@ bool ValidateDatasetShardParams(const std::string &dataset_name, int32_t num_sha
return
true
;
}
// Helper function to validate dataset sampler parameter
bool
ValidateDatasetSampler
(
const
std
::
string
&
dataset_name
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
)
{
if
(
sampler
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
dataset_name
<<
": Sampler is not constructed correctly, sampler: nullptr"
;
return
false
;
}
return
true
;
}
// Helper function to validate dataset input/output column parameter
bool
ValidateDatasetColumnParam
(
const
std
::
string
&
dataset_name
,
const
std
::
string
&
column_param
,
const
std
::
vector
<
std
::
string
>
&
columns
)
{
if
(
columns
.
empty
())
{
MS_LOG
(
ERROR
)
<<
dataset_name
<<
":"
<<
column_param
<<
" should not be empty"
;
return
false
;
}
for
(
uint32_t
i
=
0
;
i
<
columns
.
size
();
++
i
)
{
if
(
columns
[
i
].
empty
())
{
MS_LOG
(
ERROR
)
<<
dataset_name
<<
":"
<<
column_param
<<
"["
<<
i
<<
"] should not be empty"
;
return
false
;
}
}
std
::
set
<
std
::
string
>
columns_set
(
columns
.
begin
(),
columns
.
end
());
if
(
columns_set
.
size
()
!=
columns
.
size
())
{
MS_LOG
(
ERROR
)
<<
dataset_name
<<
":"
<<
column_param
<<
": Every column name should not be same with others"
;
return
false
;
}
return
true
;
}
/* ####################################### Derived Dataset classes ################################# */
// DERIVED DATASET CLASSES LEAF-NODE DATASETS
...
...
@@ -716,6 +739,16 @@ bool AlbumDataset::ValidateParams() {
return
false
;
}
if
(
!
ValidateDatasetSampler
(
"AlbumDataset"
,
sampler_
))
{
return
false
;
}
if
(
!
column_names_
.
empty
())
{
if
(
!
ValidateDatasetColumnParam
(
"AlbumDataset"
,
"column_names"
,
column_names_
))
{
return
false
;
}
}
return
true
;
}
...
...
@@ -724,11 +757,6 @@ std::vector<std::shared_ptr<DatasetOp>> AlbumDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
// If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
if
(
sampler_
==
nullptr
)
{
sampler_
=
CreateDefaultSampler
();
}
auto
schema
=
std
::
make_unique
<
DataSchema
>
();
RETURN_EMPTY_IF_ERROR
(
schema
->
LoadSchemaFile
(
schema_path_
,
column_names_
));
...
...
@@ -754,6 +782,9 @@ bool CelebADataset::ValidateParams() {
if
(
!
ValidateDatasetDirParam
(
"CelebADataset"
,
dataset_dir_
))
{
return
false
;
}
if
(
!
ValidateDatasetSampler
(
"CelebADataset"
,
sampler_
))
{
return
false
;
}
std
::
set
<
std
::
string
>
dataset_type_list
=
{
"all"
,
"train"
,
"valid"
,
"test"
};
auto
iter
=
dataset_type_list
.
find
(
dataset_type_
);
if
(
iter
==
dataset_type_list
.
end
())
{
...
...
@@ -768,11 +799,6 @@ std::vector<std::shared_ptr<DatasetOp>> CelebADataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if
(
sampler_
==
nullptr
)
{
sampler_
=
CreateDefaultSampler
();
}
std
::
unique_ptr
<
DataSchema
>
schema
=
std
::
make_unique
<
DataSchema
>
();
RETURN_EMPTY_IF_ERROR
(
schema
->
AddColumn
(
ColDescriptor
(
"image"
,
DataType
(
DataType
::
DE_UINT8
),
TensorImpl
::
kFlexible
,
1
)));
...
...
@@ -789,18 +815,15 @@ std::vector<std::shared_ptr<DatasetOp>> CelebADataset::Build() {
Cifar10Dataset
::
Cifar10Dataset
(
const
std
::
string
&
dataset_dir
,
std
::
shared_ptr
<
SamplerObj
>
sampler
)
:
dataset_dir_
(
dataset_dir
),
sampler_
(
sampler
)
{}
bool
Cifar10Dataset
::
ValidateParams
()
{
return
ValidateDatasetDirParam
(
"Cifar10Dataset"
,
dataset_dir_
);
}
bool
Cifar10Dataset
::
ValidateParams
()
{
return
ValidateDatasetDirParam
(
"Cifar10Dataset"
,
dataset_dir_
)
&&
ValidateDatasetSampler
(
"Cifar10Dataset"
,
sampler_
);
}
// Function to build CifarOp for Cifar10
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
Cifar10Dataset
::
Build
()
{
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if
(
sampler_
==
nullptr
)
{
sampler_
=
CreateDefaultSampler
();
}
// Do internal Schema generation.
auto
schema
=
std
::
make_unique
<
DataSchema
>
();
RETURN_EMPTY_IF_ERROR
(
schema
->
AddColumn
(
ColDescriptor
(
"image"
,
DataType
(
DataType
::
DE_UINT8
),
TensorImpl
::
kCv
,
1
)));
...
...
@@ -818,18 +841,16 @@ std::vector<std::shared_ptr<DatasetOp>> Cifar10Dataset::Build() {
Cifar100Dataset
::
Cifar100Dataset
(
const
std
::
string
&
dataset_dir
,
std
::
shared_ptr
<
SamplerObj
>
sampler
)
:
dataset_dir_
(
dataset_dir
),
sampler_
(
sampler
)
{}
bool
Cifar100Dataset
::
ValidateParams
()
{
return
ValidateDatasetDirParam
(
"Cifar100Dataset"
,
dataset_dir_
);
}
bool
Cifar100Dataset
::
ValidateParams
()
{
return
ValidateDatasetDirParam
(
"Cifar100Dataset"
,
dataset_dir_
)
&&
ValidateDatasetSampler
(
"Cifar100Dataset"
,
sampler_
);
}
// Function to build CifarOp for Cifar100
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
Cifar100Dataset
::
Build
()
{
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if
(
sampler_
==
nullptr
)
{
sampler_
=
CreateDefaultSampler
();
}
// Do internal Schema generation.
auto
schema
=
std
::
make_unique
<
DataSchema
>
();
RETURN_EMPTY_IF_ERROR
(
schema
->
AddColumn
(
ColDescriptor
(
"image"
,
DataType
(
DataType
::
DE_UINT8
),
TensorImpl
::
kCv
,
1
)));
...
...
@@ -1045,6 +1066,9 @@ bool CocoDataset::ValidateParams() {
if
(
!
ValidateDatasetDirParam
(
"CocoDataset"
,
dataset_dir_
))
{
return
false
;
}
if
(
!
ValidateDatasetSampler
(
"CocoDataset"
,
sampler_
))
{
return
false
;
}
Path
annotation_file
(
annotation_file_
);
if
(
!
annotation_file
.
Exists
())
{
MS_LOG
(
ERROR
)
<<
"annotation_file is invalid or not exist"
;
...
...
@@ -1064,11 +1088,6 @@ std::vector<std::shared_ptr<DatasetOp>> CocoDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if
(
sampler_
==
nullptr
)
{
sampler_
=
CreateDefaultSampler
();
}
CocoOp
::
TaskType
task_type
;
if
(
task_
==
"Detection"
)
{
task_type
=
CocoOp
::
TaskType
::
Detection
;
...
...
@@ -1158,6 +1177,12 @@ bool CSVDataset::ValidateParams() {
return
false
;
}
if
(
!
column_names_
.
empty
())
{
if
(
!
ValidateDatasetColumnParam
(
"CSVDataset"
,
"column_names"
,
column_names_
))
{
return
false
;
}
}
return
true
;
}
...
...
@@ -1218,17 +1243,15 @@ ImageFolderDataset::ImageFolderDataset(std::string dataset_dir, bool decode, std
class_indexing_
(
class_indexing
),
exts_
(
extensions
)
{}
bool
ImageFolderDataset
::
ValidateParams
()
{
return
ValidateDatasetDirParam
(
"ImageFolderDataset"
,
dataset_dir_
);
}
bool
ImageFolderDataset
::
ValidateParams
()
{
return
ValidateDatasetDirParam
(
"ImageFolderDataset"
,
dataset_dir_
)
&&
ValidateDatasetSampler
(
"ImageFolderDataset"
,
sampler_
);
}
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
ImageFolderDataset
::
Build
()
{
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
// If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
if
(
sampler_
==
nullptr
)
{
sampler_
=
CreateDefaultSampler
();
}
// Do internal Schema generation.
// This arg is exist in ImageFolderOp, but not externalized (in Python API).
std
::
unique_ptr
<
DataSchema
>
schema
=
std
::
make_unique
<
DataSchema
>
();
...
...
@@ -1243,7 +1266,8 @@ std::vector<std::shared_ptr<DatasetOp>> ImageFolderDataset::Build() {
return
node_ops
;
}
ManifestDataset
::
ManifestDataset
(
std
::
string
dataset_file
,
std
::
string
usage
,
std
::
shared_ptr
<
SamplerObj
>
sampler
,
ManifestDataset
::
ManifestDataset
(
const
std
::
string
&
dataset_file
,
const
std
::
string
&
usage
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
,
const
std
::
map
<
std
::
string
,
int32_t
>
&
class_indexing
,
bool
decode
)
:
dataset_file_
(
dataset_file
),
usage_
(
usage
),
decode_
(
decode
),
class_index_
(
class_indexing
),
sampler_
(
sampler
)
{}
...
...
@@ -1254,6 +1278,10 @@ bool ManifestDataset::ValidateParams() {
return
false
;
}
if
(
!
ValidateDatasetSampler
(
"ManifestDataset"
,
sampler_
))
{
return
false
;
}
std
::
vector
<
std
::
string
>
usage_list
=
{
"train"
,
"eval"
,
"inference"
};
if
(
find
(
usage_list
.
begin
(),
usage_list
.
end
(),
usage_
)
==
usage_list
.
end
())
{
MS_LOG
(
ERROR
)
<<
"usage should be train, eval or inference."
;
...
...
@@ -1267,11 +1295,6 @@ std::vector<std::shared_ptr<DatasetOp>> ManifestDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if
(
sampler_
==
nullptr
)
{
sampler_
=
CreateDefaultSampler
();
}
// Do internal Schema generation.
auto
schema
=
std
::
make_unique
<
DataSchema
>
();
RETURN_EMPTY_IF_ERROR
(
schema
->
AddColumn
(
ColDescriptor
(
"image"
,
DataType
(
DataType
::
DE_UINT8
),
TensorImpl
::
kCv
,
1
)));
...
...
@@ -1291,17 +1314,14 @@ std::vector<std::shared_ptr<DatasetOp>> ManifestDataset::Build() {
MnistDataset
::
MnistDataset
(
std
::
string
dataset_dir
,
std
::
shared_ptr
<
SamplerObj
>
sampler
)
:
dataset_dir_
(
dataset_dir
),
sampler_
(
sampler
)
{}
bool
MnistDataset
::
ValidateParams
()
{
return
ValidateDatasetDirParam
(
"MnistDataset"
,
dataset_dir_
);
}
bool
MnistDataset
::
ValidateParams
()
{
return
ValidateDatasetDirParam
(
"MnistDataset"
,
dataset_dir_
)
&&
ValidateDatasetSampler
(
"MnistDataset"
,
sampler_
);
}
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
MnistDataset
::
Build
()
{
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
// If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
if
(
sampler_
==
nullptr
)
{
sampler_
=
CreateDefaultSampler
();
}
// Do internal Schema generation.
auto
schema
=
std
::
make_unique
<
DataSchema
>
();
RETURN_EMPTY_IF_ERROR
(
schema
->
AddColumn
(
ColDescriptor
(
"image"
,
DataType
(
DataType
::
DE_UINT8
),
TensorImpl
::
kCv
,
1
)));
...
...
@@ -1320,6 +1340,14 @@ bool RandomDataset::ValidateParams() {
MS_LOG
(
ERROR
)
<<
"RandomDataset: total_rows must be greater than 0, now get "
<<
total_rows_
;
return
false
;
}
if
(
!
ValidateDatasetSampler
(
"RandomDataset"
,
sampler_
))
{
return
false
;
}
if
(
!
columns_list_
.
empty
())
{
if
(
!
ValidateDatasetColumnParam
(
"RandomDataset"
,
"columns_list"
,
columns_list_
))
{
return
false
;
}
}
return
true
;
}
...
...
@@ -1342,11 +1370,6 @@ std::vector<std::shared_ptr<DatasetOp>> RandomDataset::Build() {
total_rows_
=
schema_obj
->
get_num_rows
();
}
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if
(
sampler_
==
nullptr
)
{
sampler_
=
CreateDefaultSampler
();
}
std
::
string
schema_json_string
,
schema_file_path
;
if
(
schema_
!=
nullptr
)
{
schema_
->
set_dataset_type
(
"Random"
);
...
...
@@ -1459,6 +1482,9 @@ bool VOCDataset::ValidateParams() {
MS_LOG
(
ERROR
)
<<
"Invalid dataset path or no dataset path is specified."
;
return
false
;
}
if
(
!
ValidateDatasetSampler
(
"VOCDataset"
,
sampler_
))
{
return
false
;
}
if
(
task_
==
"Segmentation"
)
{
if
(
!
class_index_
.
empty
())
{
MS_LOG
(
ERROR
)
<<
"class_indexing is invalid in Segmentation task."
;
...
...
@@ -1487,11 +1513,6 @@ std::vector<std::shared_ptr<DatasetOp>> VOCDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if
(
sampler_
==
nullptr
)
{
sampler_
=
CreateDefaultSampler
();
}
auto
schema
=
std
::
make_unique
<
DataSchema
>
();
VOCOp
::
TaskType
task_type_
;
...
...
@@ -1657,7 +1678,21 @@ bool MapDataset::ValidateParams() {
MS_LOG
(
ERROR
)
<<
"Map: No operation is specified."
;
return
false
;
}
if
(
!
input_columns_
.
empty
())
{
if
(
!
ValidateDatasetColumnParam
(
"MapDataset"
,
"input_columns"
,
input_columns_
))
{
return
false
;
}
}
if
(
!
output_columns_
.
empty
())
{
if
(
!
ValidateDatasetColumnParam
(
"MapDataset"
,
"output_columns"
,
output_columns_
))
{
return
false
;
}
}
if
(
!
project_columns_
.
empty
())
{
if
(
!
ValidateDatasetColumnParam
(
"MapDataset"
,
"project_columns"
,
project_columns_
))
{
return
false
;
}
}
return
true
;
}
...
...
@@ -1686,23 +1721,13 @@ RenameDataset::RenameDataset(const std::vector<std::string> &input_columns,
:
input_columns_
(
input_columns
),
output_columns_
(
output_columns
)
{}
bool
RenameDataset
::
ValidateParams
()
{
if
(
input_columns_
.
empty
()
||
output_columns_
.
empty
())
{
MS_LOG
(
ERROR
)
<<
"input and output columns must be specified"
;
return
false
;
}
if
(
input_columns_
.
size
()
!=
output_columns_
.
size
())
{
MS_LOG
(
ERROR
)
<<
"input and output columns must be the same size"
;
MS_LOG
(
ERROR
)
<<
"
RenameDataset:
input and output columns must be the same size"
;
return
false
;
}
for
(
uint32_t
i
=
0
;
i
<
input_columns_
.
size
();
++
i
)
{
if
(
input_columns_
[
i
].
empty
())
{
MS_LOG
(
ERROR
)
<<
"input_columns: column name should not be empty."
;
return
false
;
}
if
(
output_columns_
[
i
].
empty
())
{
MS_LOG
(
ERROR
)
<<
"output_columns: column name should not be empty."
;
return
false
;
}
if
(
!
ValidateDatasetColumnParam
(
"RenameDataset"
,
"input_columns"
,
input_columns_
)
||
!
ValidateDatasetColumnParam
(
"RenameDataset"
,
"output_columns"
,
output_columns_
))
{
return
false
;
}
return
true
;
}
...
...
mindspore/ccsrc/minddata/dataset/include/datasets.h
浏览文件 @
716329df
...
...
@@ -87,44 +87,44 @@ std::shared_ptr<SchemaObj> Schema(const std::string &schema_file = "");
/// \param[in] column_names Column names used to specify columns to load, if empty, will read all columns.
/// (default = {})
/// \param[in] decode the option to decode the images in dataset (default = false)
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
`nullptr`
,
///
A `RandomSampler` will be used to randomly iterate the entire dataset (default = nullptr
)
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
not given
,
///
a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()
)
/// \return Shared pointer to the current Dataset
std
::
shared_ptr
<
AlbumDataset
>
Album
(
const
std
::
string
&
dataset_dir
,
const
std
::
string
&
data_schema
,
const
std
::
vector
<
std
::
string
>
&
column_names
=
{},
bool
decode
=
false
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
nullptr
);
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
RandomSampler
()
);
/// \brief Function to create a CelebADataset
/// \notes The generated dataset has two columns ['image', 'attr'].
// The type of the image tensor is uint8. The attr tensor is uint32 and one hot type.
/// \param[in] dataset_dir Path to the root directory that contains the dataset.
/// \param[in] dataset_type One of 'all', 'train', 'valid' or 'test'.
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
`nullptr`, A `RandomSampler`
///
will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
not given,
///
a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \param[in] decode Decode the images after reading (default=false).
/// \param[in] extensions Set of file extensions to be included in the dataset (default={}).
/// \return Shared pointer to the current Dataset
std
::
shared_ptr
<
CelebADataset
>
CelebA
(
const
std
::
string
&
dataset_dir
,
const
std
::
string
&
dataset_type
=
"all"
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
nullptr
,
bool
decode
=
false
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
RandomSampler
()
,
bool
decode
=
false
,
const
std
::
set
<
std
::
string
>
&
extensions
=
{});
/// \brief Function to create a Cifar10 Dataset
/// \notes The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_dir Path to the root directory that contains the dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
`nullptr`, A `RandomSampler`
///
will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
not given,
///
a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \return Shared pointer to the current Dataset
std
::
shared_ptr
<
Cifar10Dataset
>
Cifar10
(
const
std
::
string
&
dataset_dir
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
nullptr
);
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
RandomSampler
()
);
/// \brief Function to create a Cifar100 Dataset
/// \notes The generated dataset has three columns ['image', 'coarse_label', 'fine_label']
/// \param[in] dataset_dir Path to the root directory that contains the dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
`nullptr`, A `RandomSampler`
///
will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
not given,
///
a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \return Shared pointer to the current Dataset
std
::
shared_ptr
<
Cifar100Dataset
>
Cifar100
(
const
std
::
string
&
dataset_dir
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
nullptr
);
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
RandomSampler
()
);
/// \brief Function to create a CLUEDataset
/// \notes The generated dataset has a variable number of columns depending on the task and usage
...
...
@@ -161,12 +161,12 @@ std::shared_ptr<CLUEDataset> CLUE(const std::vector<std::string> &dataset_files,
/// \param[in] annotation_file Path to the annotation json
/// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint'
/// \param[in] decode Decode the images after reading
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
`nullptr`, A `RandomSampler`
///
will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
not given,
///
a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \return Shared pointer to the current Dataset
std
::
shared_ptr
<
CocoDataset
>
Coco
(
const
std
::
string
&
dataset_dir
,
const
std
::
string
&
annotation_file
,
const
std
::
string
&
task
=
"Detection"
,
const
bool
&
decode
=
false
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
nullptr
);
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
RandomSampler
()
);
/// \brief Function to create a CSVDataset
/// \notes The generated dataset has a variable number of columns
...
...
@@ -200,13 +200,13 @@ std::shared_ptr<CSVDataset> CSV(const std::vector<std::string> &dataset_files, c
/// The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_dir Path to the root directory that contains the dataset
/// \param[in] decode A flag to decode in ImageFolder
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
`nullptr`
,
///
A `RandomSampler` will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
not given
,
///
a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \param[in] extensions File extensions to be read
/// \param[in] class_indexing a class name to label map
/// \return Shared pointer to the current ImageFolderDataset
std
::
shared_ptr
<
ImageFolderDataset
>
ImageFolder
(
const
std
::
string
&
dataset_dir
,
bool
decode
=
false
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
nullptr
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
RandomSampler
()
,
const
std
::
set
<
std
::
string
>
&
extensions
=
{},
const
std
::
map
<
std
::
string
,
int32_t
>
&
class_indexing
=
{});
...
...
@@ -214,25 +214,25 @@ std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir,
/// \notes The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_file The dataset file to be read
/// \param[in] usage Need "train", "eval" or "inference" data (default="train")
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
`nullptr`
,
///
A `RandomSampler` will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
not given
,
///
a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \param[in] class_indexing A str-to-int mapping from label name to index (default={}, the folder
/// names will be sorted alphabetically and each class will be given a unique index starting from 0).
/// \param[in] decode Decode the images after reading (default=false).
/// \return Shared pointer to the current ManifestDataset
std
::
shared_ptr
<
ManifestDataset
>
Manifest
(
std
::
string
dataset_file
,
std
::
string
usage
=
"train"
,
std
::
shared_ptr
<
SamplerObj
>
sampler
=
nullptr
,
std
::
shared_ptr
<
ManifestDataset
>
Manifest
(
const
std
::
string
&
dataset_file
,
const
std
::
string
&
usage
=
"train"
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
RandomSampler
()
,
const
std
::
map
<
std
::
string
,
int32_t
>
&
class_indexing
=
{},
bool
decode
=
false
);
/// \brief Function to create a MnistDataset
/// \notes The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_dir Path to the root directory that contains the dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
`nullptr`
,
///
A `RandomSampler` will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
not given
,
///
a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \return Shared pointer to the current MnistDataset
std
::
shared_ptr
<
MnistDataset
>
Mnist
(
const
std
::
string
&
dataset_dir
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
nullptr
);
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
RandomSampler
()
);
/// \brief Function to create a ConcatDataset
/// \notes Reload "+" operator to concat two datasets
...
...
@@ -246,14 +246,14 @@ std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &dataset
/// \param[in] total_rows Number of rows for the dataset to generate (default=0, number of rows is random)
/// \param[in] schema SchemaObj to set column type, data type and data shape
/// \param[in] columns_list List of columns to be read (default={}, read all columns)
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
`nullptr`, A `RandomSampler`
///
will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
not given,
///
a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \return Shared pointer to the current Dataset
template
<
typename
T
=
std
::
shared_ptr
<
SchemaObj
>
>
std
::
shared_ptr
<
RandomDataset
>
RandomData
(
const
int32_t
&
total_rows
=
0
,
T
schema
=
nullptr
,
const
std
::
vector
<
std
::
string
>
&
columns_list
=
{},
std
::
shared_ptr
<
SamplerObj
>
sampler
=
nullptr
)
{
auto
ds
=
std
::
make_shared
<
RandomDataset
>
(
total_rows
,
schema
,
std
::
move
(
columns_list
)
,
std
::
move
(
sampler
));
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
RandomSampler
()
)
{
auto
ds
=
std
::
make_shared
<
RandomDataset
>
(
total_rows
,
schema
,
columns_list
,
std
::
move
(
sampler
));
return
ds
->
ValidateParams
()
?
ds
:
nullptr
;
}
...
...
@@ -286,13 +286,13 @@ std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &datase
/// \param[in] mode Set the data list txt file to be readed
/// \param[in] class_indexing A str-to-int mapping from label name to index
/// \param[in] decode Decode the images after reading
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
`nullptr`, A `RandomSampler`
///
will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is
not given,
///
a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \return Shared pointer to the current Dataset
std
::
shared_ptr
<
VOCDataset
>
VOC
(
const
std
::
string
&
dataset_dir
,
const
std
::
string
&
task
=
"Segmentation"
,
const
std
::
string
&
mode
=
"train"
,
const
std
::
map
<
std
::
string
,
int32_t
>
&
class_indexing
=
{},
bool
decode
=
false
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
nullptr
);
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
=
RandomSampler
()
);
/// \brief Function to create a ZipDataset
/// \notes Applies zip to the dataset
...
...
@@ -756,7 +756,7 @@ class ImageFolderDataset : public Dataset {
class
ManifestDataset
:
public
Dataset
{
public:
/// \brief Constructor
ManifestDataset
(
std
::
string
dataset_file
,
std
::
string
usage
,
std
::
shared_ptr
<
SamplerObj
>
sampler
,
ManifestDataset
(
const
std
::
string
&
dataset_file
,
const
std
::
string
&
usage
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
,
const
std
::
map
<
std
::
string
,
int32_t
>
&
class_indexing
,
bool
decode
);
/// \brief Destructor
...
...
@@ -808,7 +808,7 @@ class RandomDataset : public Dataset {
/// \brief Constructor
RandomDataset
(
const
int32_t
&
total_rows
,
std
::
shared_ptr
<
SchemaObj
>
schema
,
const
std
::
vector
<
std
::
string
>
&
columns_list
,
std
::
shared_ptr
<
SamplerObj
>
sampler
)
const
std
::
vector
<
std
::
string
>
&
columns_list
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
)
:
total_rows_
(
total_rows
),
schema_path_
(
""
),
schema_
(
std
::
move
(
schema
)),
...
...
@@ -816,8 +816,8 @@ class RandomDataset : public Dataset {
sampler_
(
std
::
move
(
sampler
))
{}
/// \brief Constructor
RandomDataset
(
const
int32_t
&
total_rows
,
std
::
string
schema_path
,
std
::
vector
<
std
::
string
>
columns_list
,
std
::
shared_ptr
<
SamplerObj
>
sampler
)
RandomDataset
(
const
int32_t
&
total_rows
,
std
::
string
schema_path
,
const
std
::
vector
<
std
::
string
>
&
columns_list
,
const
std
::
shared_ptr
<
SamplerObj
>
&
sampler
)
:
total_rows_
(
total_rows
),
schema_path_
(
schema_path
),
columns_list_
(
columns_list
),
sampler_
(
std
::
move
(
sampler
))
{}
/// \brief Destructor
...
...
tests/ut/cpp/dataset/c_api_dataset_album_test.cc
浏览文件 @
716329df
...
...
@@ -93,7 +93,7 @@ TEST_F(MindDataTestPipeline, TestAlbumDecode) {
TEST_F
(
MindDataTestPipeline
,
TestAlbumNumSamplers
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestAlbumNumSamplers."
;
std
::
string
folder_path
=
datasets_root_path_
+
"/testAlbum/images"
;
std
::
string
schema_file
=
datasets_root_path_
+
"/testAlbum/datasetSchema.json"
;
std
::
vector
<
std
::
string
>
column_names
=
{
"image"
,
"label"
,
"id"
};
...
...
@@ -134,3 +134,25 @@ TEST_F(MindDataTestPipeline, TestAlbumError) {
EXPECT_EQ
(
ds
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestAlbumWithNullSampler
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestAlbumWithNullSampler."
;
std
::
string
folder_path
=
datasets_root_path_
+
"/testAlbum/images"
;
std
::
string
schema_file
=
datasets_root_path_
+
"/testAlbum/datasetSchema.json"
;
std
::
vector
<
std
::
string
>
column_names
=
{
"image"
,
"label"
,
"id"
};
// Create a Album Dataset
std
::
shared_ptr
<
Dataset
>
ds
=
Album
(
folder_path
,
schema_file
,
column_names
,
true
,
nullptr
);
// Expect failure: sampler can not be nullptr
EXPECT_EQ
(
ds
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestAlbumDuplicateColumnName
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestAlbumDuplicateColumnName."
;
std
::
string
folder_path
=
datasets_root_path_
+
"/testAlbum/images"
;
std
::
string
schema_file
=
datasets_root_path_
+
"/testAlbum/datasetSchema.json"
;
std
::
vector
<
std
::
string
>
column_names
=
{
"image"
,
"image"
,
"id"
};
// Create a Album Dataset
std
::
shared_ptr
<
Dataset
>
ds
=
Album
(
folder_path
,
schema_file
,
column_names
,
true
);
// Expect failure: duplicate column names
EXPECT_EQ
(
ds
,
nullptr
);
}
tests/ut/cpp/dataset/c_api_dataset_cifar_test.cc
浏览文件 @
716329df
...
...
@@ -107,3 +107,33 @@ TEST_F(MindDataTestPipeline, TestCifar10DatasetFail1) {
std
::
shared_ptr
<
Dataset
>
ds
=
Cifar10
(
""
,
RandomSampler
(
false
,
10
));
EXPECT_EQ
(
ds
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestCifar10DatasetWithNullSampler
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestCifar10DatasetWithNullSampler."
;
// Create a Cifar10 Dataset
std
::
string
folder_path
=
datasets_root_path_
+
"/testCifar10Data/"
;
std
::
shared_ptr
<
Dataset
>
ds
=
Cifar10
(
folder_path
,
nullptr
);
// Expect failure: sampler can not be nullptr
EXPECT_EQ
(
ds
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestCifar100DatasetWithNullSampler
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestCifar100DatasetWithNullSampler."
;
// Create a Cifar10 Dataset
std
::
string
folder_path
=
datasets_root_path_
+
"/testCifar100Data/"
;
std
::
shared_ptr
<
Dataset
>
ds
=
Cifar100
(
folder_path
,
nullptr
);
// Expect failure: sampler can not be nullptr
EXPECT_EQ
(
ds
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestCifar100DatasetWithWrongSampler
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestCifar100DatasetWithWrongSampler."
;
// Create a Cifar10 Dataset
std
::
string
folder_path
=
datasets_root_path_
+
"/testCifar100Data/"
;
std
::
shared_ptr
<
Dataset
>
ds
=
Cifar100
(
folder_path
,
RandomSampler
(
false
,
-
10
));
// Expect failure: sampler is not construnced correctly
EXPECT_EQ
(
ds
,
nullptr
);
}
tests/ut/cpp/dataset/c_api_dataset_coco_test.cc
浏览文件 @
716329df
...
...
@@ -290,3 +290,14 @@ TEST_F(MindDataTestPipeline, TestCocoStuff) {
// Manually terminate the pipeline
iter
->
Stop
();
}
TEST_F
(
MindDataTestPipeline
,
TestCocoWithNullSampler
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestCocoWithNullSampler."
;
// Create a Coco Dataset
std
::
string
folder_path
=
datasets_root_path_
+
"/testCOCO/train"
;
std
::
string
annotation_file
=
datasets_root_path_
+
"/testCOCO/annotations/train.json"
;
std
::
shared_ptr
<
Dataset
>
ds
=
Coco
(
folder_path
,
annotation_file
,
"Detection"
,
false
,
nullptr
);
// Expect failure: sampler can not be nullptr
EXPECT_EQ
(
ds
,
nullptr
);
}
tests/ut/cpp/dataset/c_api_dataset_csv_test.cc
浏览文件 @
716329df
...
...
@@ -533,3 +533,14 @@ TEST_F(MindDataTestPipeline, TestCSVDatasetShuffleGlobal) {
GlobalContext
::
config_manager
()
->
set_seed
(
original_seed
);
GlobalContext
::
config_manager
()
->
set_num_parallel_workers
(
original_num_parallel_workers
);
}
TEST_F
(
MindDataTestPipeline
,
TestCSVDatasetDuplicateColumnName
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestCSVDatasetDuplicateColumnName."
;
// Create a CSVDataset, with single CSV file
std
::
string
train_file
=
datasets_root_path_
+
"/testCSV/1.csv"
;
std
::
vector
<
std
::
string
>
column_names
=
{
"col1"
,
"col1"
,
"col3"
,
"col4"
};
std
::
shared_ptr
<
Dataset
>
ds
=
CSV
({
train_file
},
','
,
{},
column_names
,
-
1
,
ShuffleMode
::
kFalse
);
// Expect failure: duplicate column names
EXPECT_EQ
(
ds
,
nullptr
);
}
tests/ut/cpp/dataset/c_api_dataset_manifest_test.cc
浏览文件 @
716329df
...
...
@@ -59,7 +59,7 @@ TEST_F(MindDataTestPipeline, TestManifestDecode) {
std
::
string
file_path
=
datasets_root_path_
+
"/testManifestData/cpp.json"
;
// Create a Manifest Dataset
std
::
shared_ptr
<
Dataset
>
ds
=
Manifest
(
file_path
,
"train"
,
nullptr
,
{},
true
);
std
::
shared_ptr
<
Dataset
>
ds
=
Manifest
(
file_path
,
"train"
,
RandomSampler
()
,
{},
true
);
EXPECT_NE
(
ds
,
nullptr
);
// Create an iterator over the result of the above dataset
...
...
@@ -130,7 +130,7 @@ TEST_F(MindDataTestPipeline, TestManifestClassIndex) {
std
::
vector
<
int
>
expected_label
=
{
111
,
222
};
// Create a Manifest Dataset
std
::
shared_ptr
<
Dataset
>
ds
=
Manifest
(
file_path
,
"train"
,
nullptr
,
map
,
true
);
std
::
shared_ptr
<
Dataset
>
ds
=
Manifest
(
file_path
,
"train"
,
RandomSampler
()
,
map
,
true
);
EXPECT_NE
(
ds
,
nullptr
);
// Create an iterator over the result of the above dataset
...
...
@@ -204,3 +204,12 @@ TEST_F(MindDataTestPipeline, TestManifestError) {
std
::
shared_ptr
<
Dataset
>
ds1
=
Manifest
(
file_path
,
"invalid_usage"
);
EXPECT_EQ
(
ds1
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestManifestWithNullSampler
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestManifestWithNullSampler."
;
std
::
string
file_path
=
datasets_root_path_
+
"/testManifestData/cpp.json"
;
// Create a Manifest Dataset
std
::
shared_ptr
<
Dataset
>
ds
=
Manifest
(
file_path
,
"train"
,
nullptr
);
// Expect failure: sampler can not be nullptr
EXPECT_EQ
(
ds
,
nullptr
);
}
tests/ut/cpp/dataset/c_api_dataset_ops_test.cc
浏览文件 @
716329df
...
...
@@ -311,6 +311,34 @@ TEST_F(MindDataTestPipeline, TestProjectMap) {
iter
->
Stop
();
}
TEST_F
(
MindDataTestPipeline
,
TestMapDuplicateColumn
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestMapDuplicateColumn."
;
// Create an ImageFolder Dataset
std
::
string
folder_path
=
datasets_root_path_
+
"/testPK/data/"
;
std
::
shared_ptr
<
Dataset
>
ds
=
ImageFolder
(
folder_path
,
true
,
RandomSampler
(
false
,
10
));
EXPECT_NE
(
ds
,
nullptr
);
// Create objects for the tensor ops
std
::
shared_ptr
<
TensorOperation
>
random_vertical_flip_op
=
vision
::
RandomVerticalFlip
(
0.5
);
EXPECT_NE
(
random_vertical_flip_op
,
nullptr
);
// Create a Map operation on ds
auto
ds1
=
ds
->
Map
({
random_vertical_flip_op
},
{
"image"
,
"image"
},
{},
{});
// Expect failure: duplicate input column name
EXPECT_EQ
(
ds1
,
nullptr
);
// Create a Map operation on ds
auto
ds2
=
ds
->
Map
({
random_vertical_flip_op
},
{},
{
"label"
,
"label"
},
{});
// Expect failure: duplicate output column name
EXPECT_EQ
(
ds2
,
nullptr
);
// Create a Map operation on ds
auto
ds3
=
ds
->
Map
({
random_vertical_flip_op
},
{},
{},
{
"image"
,
"image"
});
// Expect failure: duplicate project column name
EXPECT_EQ
(
ds3
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestProjectMapAutoInjection
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline.TestProjectMapAutoInjection"
;
...
...
@@ -395,6 +423,24 @@ TEST_F(MindDataTestPipeline, TestRenameFail2) {
EXPECT_EQ
(
ds
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestRenameFail3
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestRenameFail3."
;
// We expect this test to fail because duplicate column name
// Create an ImageFolder Dataset
std
::
string
folder_path
=
datasets_root_path_
+
"/testPK/data/"
;
std
::
shared_ptr
<
Dataset
>
ds
=
ImageFolder
(
folder_path
,
true
,
RandomSampler
(
false
,
10
));
EXPECT_NE
(
ds
,
nullptr
);
// Create a Rename operation on ds
auto
ds1
=
ds
->
Rename
({
"image"
,
"image"
},
{
"col1"
,
"col2"
});
EXPECT_EQ
(
ds1
,
nullptr
);
// Create a Rename operation on ds
auto
ds2
=
ds
->
Rename
({
"image"
,
"label"
},
{
"col1"
,
"col1"
});
EXPECT_EQ
(
ds2
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestRenameSuccess
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestRenameSuccess."
;
...
...
tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc
浏览文件 @
716329df
...
...
@@ -265,4 +265,28 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetBasic4) {
// Manually terminate the pipeline
iter
->
Stop
();
GlobalContext
::
config_manager
()
->
set_seed
(
curr_seed
);
}
\ No newline at end of file
}
TEST_F
(
MindDataTestPipeline
,
TestRandomDatasetWithNullSampler
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestRandomDatasetWithNullSampler."
;
// Create a RandomDataset
std
::
shared_ptr
<
SchemaObj
>
schema
=
Schema
();
schema
->
add_column
(
"image"
,
mindspore
::
TypeId
::
kNumberTypeUInt8
,
{
2
});
schema
->
add_column
(
"label"
,
mindspore
::
TypeId
::
kNumberTypeUInt8
,
{
1
});
std
::
shared_ptr
<
Dataset
>
ds
=
RandomData
(
50
,
schema
,
{},
nullptr
);
// Expect failure: sampler can not be nullptr
EXPECT_EQ
(
ds
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestRandomDatasetDuplicateColumnName
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestRandomDatasetDuplicateColumnName."
;
// Create a RandomDataset
std
::
shared_ptr
<
SchemaObj
>
schema
=
Schema
();
schema
->
add_column
(
"image"
,
mindspore
::
TypeId
::
kNumberTypeUInt8
,
{
2
});
schema
->
add_column
(
"label"
,
mindspore
::
TypeId
::
kNumberTypeUInt8
,
{
1
});
std
::
shared_ptr
<
Dataset
>
ds
=
RandomData
(
50
,
schema
,
{
"image"
,
"image"
});
// Expect failure: duplicate column names
EXPECT_EQ
(
ds
,
nullptr
);
}
tests/ut/cpp/dataset/c_api_dataset_voc_test.cc
浏览文件 @
716329df
...
...
@@ -194,3 +194,13 @@ TEST_F(MindDataTestPipeline, TestVOCSegmentationError1) {
// Expect nullptr for segmentation task with class_index
EXPECT_EQ
(
ds
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestVOCWithNullSampler
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestVOCWithNullSampler."
;
// Create a VOC Dataset
std
::
string
folder_path
=
datasets_root_path_
+
"/testVOC2012_2"
;
std
::
shared_ptr
<
Dataset
>
ds
=
VOC
(
folder_path
,
"Segmentation"
,
"train"
,
{},
false
,
nullptr
);
// Expect failure: sampler can not be nullptr
EXPECT_EQ
(
ds
,
nullptr
);
}
tests/ut/cpp/dataset/c_api_datasets_test.cc
浏览文件 @
716329df
...
...
@@ -118,24 +118,44 @@ TEST_F(MindDataTestPipeline, TestCelebAException) {
EXPECT_EQ
(
ds1
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
Test
ImageFolderFail1
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-Test
ImageFolderFail1
."
;
TEST_F
(
MindDataTestPipeline
,
Test
CelebADatasetWithNullSampler
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-Test
CelebADataset
."
;
// Create an ImageFolder Dataset
std
::
shared_ptr
<
Dataset
>
ds
=
ImageFolder
(
""
,
true
,
nullptr
);
// Create a CelebA Dataset
std
::
string
folder_path
=
datasets_root_path_
+
"/testCelebAData/"
;
std
::
shared_ptr
<
Dataset
>
ds
=
CelebA
(
folder_path
,
"all"
,
nullptr
,
false
,
{});
// Expect failure: sampler can not be nullptr
EXPECT_EQ
(
ds
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestMnistFail
1
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestMnistFail
1
."
;
TEST_F
(
MindDataTestPipeline
,
TestMnistFail
WithWrongDatasetDir
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestMnistFail
WithWrongDatasetDir
."
;
// Create a Mnist Dataset
std
::
shared_ptr
<
Dataset
>
ds
=
Mnist
(
""
,
RandomSampler
(
false
,
10
));
EXPECT_EQ
(
ds
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestImageFolderFail2
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestImageFolderFail2."
;
TEST_F
(
MindDataTestPipeline
,
TestMnistFailWithNullSampler
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestMnistFailWithNullSampler."
;
// Create a Mnist Dataset
std
::
string
folder_path
=
datasets_root_path_
+
"/testMnistData/"
;
std
::
shared_ptr
<
Dataset
>
ds
=
Mnist
(
folder_path
,
nullptr
);
// Expect failure: sampler can not be nullptr
EXPECT_EQ
(
ds
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestImageFolderWithWrongDatasetDir
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestImageFolderWithWrongDatasetDir."
;
// Create an ImageFolder Dataset
std
::
shared_ptr
<
Dataset
>
ds
=
ImageFolder
(
""
,
true
,
nullptr
);
EXPECT_EQ
(
ds
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestImageFolderFailWithWrongExtension
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestImageFolderFailWithWrongExtension."
;
// Create an ImageFolder Dataset
std
::
string
folder_path
=
datasets_root_path_
+
"/testPK/data/"
;
...
...
@@ -150,8 +170,29 @@ TEST_F(MindDataTestPipeline, TestImageFolderFail2) {
// Iterate the dataset and get each row
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Tensor
>>
row
;
iter
->
GetNextRow
(
&
row
);
// Expect no data: can not find files with specified extension
EXPECT_EQ
(
row
.
size
(),
0
);
// Manually terminate the pipeline
iter
->
Stop
();
}
TEST_F
(
MindDataTestPipeline
,
TestImageFolderFailWithNullSampler
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestImageFolderFailWithNullSampler."
;
// Create an ImageFolder Dataset
std
::
string
folder_path
=
datasets_root_path_
+
"/testPK/data/"
;
std
::
shared_ptr
<
Dataset
>
ds
=
ImageFolder
(
folder_path
,
true
,
nullptr
);
// Expect failure: sampler can not be nullptr
EXPECT_EQ
(
ds
,
nullptr
);
}
TEST_F
(
MindDataTestPipeline
,
TestImageFolderFailWithWrongSampler
)
{
MS_LOG
(
INFO
)
<<
"Doing MindDataTestPipeline-TestImageFolderFailWithWrongSampler."
;
// Create a Cifar10 Dataset
std
::
string
folder_path
=
datasets_root_path_
+
"/testCifar100Data/"
;
std
::
shared_ptr
<
Dataset
>
ds
=
ImageFolder
(
folder_path
,
true
,
SequentialSampler
(
-
2
,
5
));
// Expect failure: sampler is not construnced correctly
EXPECT_EQ
(
ds
,
nullptr
);
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录