Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
bd5a777f
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
bd5a777f
编写于
7月 10, 2020
作者:
E
ervinzhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
introducing new C++ API
上级
2f565f4c
变更
58
展开全部
隐藏空白更改
内联
并排
Showing
58 changed file
with
3500 addition
and
196 deletion
+3500
-196
CMakeLists.txt
CMakeLists.txt
+4
-0
build.sh
build.sh
+9
-2
cmake/options.cmake
cmake/options.cmake
+1
-0
mindspore/ccsrc/dataset/CMakeLists.txt
mindspore/ccsrc/dataset/CMakeLists.txt
+12
-2
mindspore/ccsrc/dataset/api/CMakeLists.txt
mindspore/ccsrc/dataset/api/CMakeLists.txt
+13
-4
mindspore/ccsrc/dataset/api/datasets.cc
mindspore/ccsrc/dataset/api/datasets.cc
+446
-0
mindspore/ccsrc/dataset/api/iterator.cc
mindspore/ccsrc/dataset/api/iterator.cc
+101
-0
mindspore/ccsrc/dataset/api/python_bindings.cc
mindspore/ccsrc/dataset/api/python_bindings.cc
+2
-2
mindspore/ccsrc/dataset/api/samplers.cc
mindspore/ccsrc/dataset/api/samplers.cc
+224
-0
mindspore/ccsrc/dataset/api/transforms.cc
mindspore/ccsrc/dataset/api/transforms.cc
+491
-0
mindspore/ccsrc/dataset/core/CMakeLists.txt
mindspore/ccsrc/dataset/core/CMakeLists.txt
+10
-7
mindspore/ccsrc/dataset/core/client.h
mindspore/ccsrc/dataset/core/client.h
+9
-5
mindspore/ccsrc/dataset/core/constants.h
mindspore/ccsrc/dataset/core/constants.h
+6
-0
mindspore/ccsrc/dataset/core/data_type.cc
mindspore/ccsrc/dataset/core/data_type.cc
+7
-2
mindspore/ccsrc/dataset/core/data_type.h
mindspore/ccsrc/dataset/core/data_type.h
+51
-27
mindspore/ccsrc/dataset/core/tensor.cc
mindspore/ccsrc/dataset/core/tensor.cc
+22
-10
mindspore/ccsrc/dataset/core/tensor.h
mindspore/ccsrc/dataset/core/tensor.h
+25
-13
mindspore/ccsrc/dataset/core/tensor_row.cc
mindspore/ccsrc/dataset/core/tensor_row.cc
+0
-1
mindspore/ccsrc/dataset/core/tensor_shape.cc
mindspore/ccsrc/dataset/core/tensor_shape.cc
+4
-0
mindspore/ccsrc/dataset/core/tensor_shape.h
mindspore/ccsrc/dataset/core/tensor_shape.h
+63
-54
mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt
mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt
+15
-6
mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
+31
-1
mindspore/ccsrc/dataset/engine/datasetops/batch_op.h
mindspore/ccsrc/dataset/engine/datasetops/batch_op.h
+16
-0
mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
...ore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
+20
-7
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt
...c/dataset/engine/datasetops/source/sampler/CMakeLists.txt
+11
-2
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
...ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
+2
-0
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h
.../ccsrc/dataset/engine/datasetops/source/sampler/sampler.h
+3
-1
mindspore/ccsrc/dataset/engine/gnn/graph.cc
mindspore/ccsrc/dataset/engine/gnn/graph.cc
+2
-0
mindspore/ccsrc/dataset/engine/gnn/graph.h
mindspore/ccsrc/dataset/engine/gnn/graph.h
+2
-0
mindspore/ccsrc/dataset/engine/opt/pass.cc
mindspore/ccsrc/dataset/engine/opt/pass.cc
+12
-8
mindspore/ccsrc/dataset/engine/opt/pass.h
mindspore/ccsrc/dataset/engine/opt/pass.h
+12
-8
mindspore/ccsrc/dataset/engine/opt/util/printer_pass.cc
mindspore/ccsrc/dataset/engine/opt/util/printer_pass.cc
+14
-11
mindspore/ccsrc/dataset/engine/opt/util/printer_pass.h
mindspore/ccsrc/dataset/engine/opt/util/printer_pass.h
+6
-4
mindspore/ccsrc/dataset/include/dataset/core/constants.h
mindspore/ccsrc/dataset/include/dataset/core/constants.h
+1
-0
mindspore/ccsrc/dataset/include/dataset/core/data_type.h
mindspore/ccsrc/dataset/include/dataset/core/data_type.h
+1
-0
mindspore/ccsrc/dataset/include/dataset/core/tensor_shape.h
mindspore/ccsrc/dataset/include/dataset/core/tensor_shape.h
+1
-0
mindspore/ccsrc/dataset/include/dataset/util/status.h
mindspore/ccsrc/dataset/include/dataset/util/status.h
+1
-0
mindspore/ccsrc/dataset/include/datasets.h
mindspore/ccsrc/dataset/include/datasets.h
+357
-0
mindspore/ccsrc/dataset/include/iterator.h
mindspore/ccsrc/dataset/include/iterator.h
+115
-0
mindspore/ccsrc/dataset/include/samplers.h
mindspore/ccsrc/dataset/include/samplers.h
+199
-0
mindspore/ccsrc/dataset/include/status.h
mindspore/ccsrc/dataset/include/status.h
+1
-0
mindspore/ccsrc/dataset/include/tensor.h
mindspore/ccsrc/dataset/include/tensor.h
+1
-0
mindspore/ccsrc/dataset/include/transforms.h
mindspore/ccsrc/dataset/include/transforms.h
+380
-0
mindspore/ccsrc/dataset/include/utils/log_adapter.h
mindspore/ccsrc/dataset/include/utils/log_adapter.h
+1
-0
mindspore/ccsrc/dataset/include/utils/overload.h
mindspore/ccsrc/dataset/include/utils/overload.h
+1
-0
mindspore/ccsrc/dataset/kernels/CMakeLists.txt
mindspore/ccsrc/dataset/kernels/CMakeLists.txt
+10
-4
mindspore/ccsrc/dataset/kernels/data/data_utils.cc
mindspore/ccsrc/dataset/kernels/data/data_utils.cc
+2
-0
mindspore/ccsrc/dataset/kernels/image/image_utils.cc
mindspore/ccsrc/dataset/kernels/image/image_utils.cc
+0
-1
mindspore/ccsrc/dataset/kernels/image/image_utils.h
mindspore/ccsrc/dataset/kernels/image/image_utils.h
+0
-4
mindspore/ccsrc/dataset/kernels/image/pad_op.cc
mindspore/ccsrc/dataset/kernels/image/pad_op.cc
+1
-0
mindspore/ccsrc/dataset/kernels/image/pad_op.h
mindspore/ccsrc/dataset/kernels/image/pad_op.h
+1
-1
mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_with_bbox_op.cc
...aset/kernels/image/random_horizontal_flip_with_bbox_op.cc
+0
-1
mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h
...taset/kernels/image/random_horizontal_flip_with_bbox_op.h
+0
-4
mindspore/ccsrc/dataset/text/kernels/ngram_op.h
mindspore/ccsrc/dataset/text/kernels/ngram_op.h
+0
-1
tests/ut/cpp/CMakeLists.txt
tests/ut/cpp/CMakeLists.txt
+9
-1
tests/ut/cpp/dataset/CMakeLists.txt
tests/ut/cpp/dataset/CMakeLists.txt
+1
-0
tests/ut/cpp/dataset/c_api_test.cc
tests/ut/cpp/dataset/c_api_test.cc
+771
-0
tests/ut/cpp/dataset/datatype_test.cc
tests/ut/cpp/dataset/datatype_test.cc
+0
-2
未找到文件。
CMakeLists.txt
浏览文件 @
bd5a777f
...
...
@@ -17,6 +17,10 @@ else()
set
(
CMAKE_CXX_FLAGS_RELEASE
"$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2"
)
endif
()
if
(
ENABLE_PYTHON
)
add_compile_definitions
(
ENABLE_PYTHON
)
endif
()
set
(
CMAKE_CXX_FLAGS_DEBUG
"$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC"
)
...
...
build.sh
浏览文件 @
bd5a777f
...
...
@@ -25,7 +25,7 @@ usage()
echo
"Usage:"
echo
"bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train]
\\
"
echo
" [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu]
\\
"
echo
" [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]"
echo
" [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]
[-l on|off]
"
echo
""
echo
"Options:"
echo
" -d Debug mode"
...
...
@@ -56,6 +56,7 @@ usage()
echo
" -s Enable serving module, default off"
echo
" -B Enable debugger, default off"
echo
" -E Enable IBVERBS for parameter server, default off"
echo
" -l Compile with python dependency, default on"
}
# check value of input is 'on' or 'off'
...
...
@@ -98,9 +99,10 @@ checkopts()
ENABLE_SERVING
=
"off"
ENABLE_DEBUGGER
=
"off"
ENABLE_IBVERBS
=
"off"
ENABLE_PYTHON
=
"on"
# Process the options
while
getopts
'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E'
opt
while
getopts
'drvj:c:t:hsb:a:g:p:ie:m:
l:
I:LRP:Q:D:zM:V:K:sB:E'
opt
do
OPTARG
=
$(
echo
${
OPTARG
}
|
tr
'[A-Z]'
'[a-z]'
)
case
"
${
opt
}
"
in
...
...
@@ -151,6 +153,10 @@ checkopts()
check_on_off
$OPTARG
p
ENABLE_PROFILE
=
"
$OPTARG
"
;;
l
)
check_on_off
$OPTARG
l
ENABLE_PYTHON
=
"
$OPTARG
"
;;
i
)
INC_BUILD
=
"on"
;;
...
...
@@ -316,6 +322,7 @@ build_mindspore()
CMAKE_ARGS
=
"
${
CMAKE_ARGS
}
-DENABLE_DUMP_E2E=ON"
fi
CMAKE_ARGS
=
"
${
CMAKE_ARGS
}
-DENABLE_DUMP_IR=
${
ENABLE_DUMP_IR
}
"
CMAKE_ARGS
=
"
${
CMAKE_ARGS
}
-DENABLE_PYTHON=
${
ENABLE_PYTHON
}
"
if
[[
"X
$ENABLE_MPI
"
=
"Xon"
]]
;
then
CMAKE_ARGS
=
"
${
CMAKE_ARGS
}
-DENABLE_MPI=ON"
fi
...
...
cmake/options.cmake
浏览文件 @
bd5a777f
...
...
@@ -19,6 +19,7 @@ option(ENABLE_MPI "enable mpi" OFF)
option
(
ENABLE_AKG
"enable akg"
OFF
)
option
(
ENABLE_DEBUGGER
"enable debugger"
OFF
)
option
(
ENABLE_IBVERBS
"enable IBVERBS for parameter server"
OFF
)
option
(
ENABLE_PYTHON
"Enable python"
ON
)
if
(
CMAKE_CXX_COMPILER_ID STREQUAL
"GNU"
)
if
(
WIN32
)
...
...
mindspore/ccsrc/dataset/CMakeLists.txt
浏览文件 @
bd5a777f
...
...
@@ -39,6 +39,7 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/device/ascend/platform)
include_directories
(
${
CMAKE_BINARY_DIR
}
)
# for protobuf generated .h
include_directories
(
${
CMAKE_SOURCE_DIR
}
/mindspore/ccsrc/mindrecord/include
)
include_directories
(
${
CMAKE_SOURCE_DIR
}
/mindspore/ccsrc/dataset/include
)
######################################################################
####################### Flags ########################################
...
...
@@ -67,7 +68,10 @@ add_dependencies(engine-gnn core)
add_dependencies
(
engine core
)
add_dependencies
(
text core
)
add_dependencies
(
text-kernels core
)
add_dependencies
(
APItoPython core
)
add_dependencies
(
cpp-API core
)
if
(
ENABLE_PYTHON
)
add_dependencies
(
APItoPython core
)
endif
()
if
(
ENABLE_TDTQUE
)
add_dependencies
(
engine-tdt core
)
endif
()
...
...
@@ -78,7 +82,7 @@ set(submodules
$<TARGET_OBJECTS:kernels>
$<TARGET_OBJECTS:kernels-image>
$<TARGET_OBJECTS:kernels-data>
$<TARGET_OBJECTS:
APItoPython
>
$<TARGET_OBJECTS:
cpp-API
>
$<TARGET_OBJECTS:engine-datasetops-source>
$<TARGET_OBJECTS:engine-datasetops-source-sampler>
$<TARGET_OBJECTS:engine-gnn>
...
...
@@ -90,6 +94,12 @@ set(submodules
$<TARGET_OBJECTS:text-kernels>
)
if
(
ENABLE_PYTHON
)
set
(
submodules
${
submodules
}
$<TARGET_OBJECTS:APItoPython>
)
endif
()
if
(
ENABLE_TDTQUE
)
add_library
(
_c_dataengine SHARED
${
submodules
}
$<TARGET_OBJECTS:engine-tdt>
)
else
()
...
...
mindspore/ccsrc/dataset/api/CMakeLists.txt
浏览文件 @
bd5a777f
file
(
GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE
${
CMAKE_CURRENT_SOURCE_DIR
}
"*.cc"
)
set_property
(
SOURCE
${
_CURRENT_SRC_FILES
}
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD
)
add_library
(
APItoPython OBJECT
de_pipeline.cc
python_bindings.cc
if
(
ENABLE_PYTHON
)
add_library
(
APItoPython OBJECT
de_pipeline.cc
python_bindings.cc
)
target_include_directories
(
APItoPython PRIVATE
${
pybind11_INCLUDE_DIRS
}
)
endif
()
add_library
(
cpp-API OBJECT
datasets.cc
iterator.cc
transforms.cc
samplers.cc
)
target_include_directories
(
APItoPython PRIVATE
${
pybind11_INCLUDE_DIRS
}
)
mindspore/ccsrc/dataset/api/datasets.cc
0 → 100644
浏览文件 @
bd5a777f
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <fstream>
#include "dataset/include/datasets.h"
#include "dataset/include/transforms.h"
#include "dataset/include/samplers.h"
#include "dataset/engine/dataset_iterator.h"
#include "dataset/engine/datasetops/source/image_folder_op.h"
#include "dataset/engine/datasetops/source/mnist_op.h"
#include "dataset/engine/datasetops/source/cifar_op.h"
#include "dataset/engine/datasetops/batch_op.h"
#include "dataset/engine/datasetops/map_op.h"
#include "dataset/engine/datasetops/repeat_op.h"
#include "dataset/engine/datasetops/shuffle_op.h"
#include "dataset/engine/datasetops/project_op.h"
#include "dataset/engine/datasetops/source/sampler/sampler.h"
#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
#include "dataset/core/config_manager.h"
#include "dataset/util/random.h"
namespace
mindspore
{
namespace
dataset
{
namespace
api
{
#define RETURN_NULL_IF_ERROR(_s) \
do { \
Status __rc = (_s); \
if (__rc.IsError()) { \
return nullptr; \
} \
} while (false)
// Function to create the iterator, which will build and launch the execution tree.
std
::
shared_ptr
<
Iterator
>
Dataset
::
CreateIterator
()
{
std
::
shared_ptr
<
Iterator
>
iter
;
try
{
iter
=
std
::
make_shared
<
Iterator
>
();
Status
rc
=
iter
->
BuildAndLaunchTree
(
shared_from_this
());
if
(
rc
.
IsError
())
{
MS_LOG
(
ERROR
)
<<
"CreateIterator failed."
;
return
nullptr
;
}
return
iter
;
}
catch
(
const
std
::
exception
&
err
)
{
MS_LOG
(
ERROR
)
<<
"CreateIterator: Iterator exception caught: "
<<
err
.
what
();
return
nullptr
;
}
return
iter
;
}
// Constructor
Dataset
::
Dataset
()
{
// Fetch some default value from config manager
std
::
shared_ptr
<
ConfigManager
>
cfg
=
GlobalContext
::
config_manager
();
num_workers_
=
cfg
->
num_parallel_workers
();
rows_per_buffer_
=
cfg
->
rows_per_buffer
();
connector_que_size_
=
cfg
->
op_connector_size
();
}
// Function to create a ImageFolderDataset.
std
::
shared_ptr
<
ImageFolderDataset
>
ImageFolder
(
std
::
string
dataset_dir
,
bool
decode
,
std
::
shared_ptr
<
SamplerObj
>
sampler
,
std
::
set
<
std
::
string
>
extensions
,
std
::
map
<
std
::
string
,
int32_t
>
class_indexing
)
{
// This arg is exist in ImageFolderOp, but not externalized (in Python API). The default value is false.
bool
recursive
=
false
;
// Create logical representation of ImageFolderDataset.
auto
ds
=
std
::
make_shared
<
ImageFolderDataset
>
(
dataset_dir
,
decode
,
sampler
,
recursive
,
extensions
,
class_indexing
);
// Call derived class validation method.
return
ds
->
ValidateParams
()
?
ds
:
nullptr
;
}
// Function to create a MnistDataset.
std
::
shared_ptr
<
MnistDataset
>
Mnist
(
std
::
string
dataset_dir
,
std
::
shared_ptr
<
SamplerObj
>
sampler
)
{
auto
ds
=
std
::
make_shared
<
MnistDataset
>
(
dataset_dir
,
sampler
);
// Call derived class validation method.
return
ds
->
ValidateParams
()
?
ds
:
nullptr
;
}
// Function to create a Cifar10Dataset.
std
::
shared_ptr
<
Cifar10Dataset
>
Cifar10
(
const
std
::
string
&
dataset_dir
,
int32_t
num_samples
,
std
::
shared_ptr
<
SamplerObj
>
sampler
)
{
auto
ds
=
std
::
make_shared
<
Cifar10Dataset
>
(
dataset_dir
,
num_samples
,
sampler
);
// Call derived class validation method.
return
ds
->
ValidateParams
()
?
ds
:
nullptr
;
}
// Function to create a Batch dataset
std
::
shared_ptr
<
BatchDataset
>
Dataset
::
Batch
(
int32_t
batch_size
,
bool
drop_remainder
)
{
// Default values
std
::
vector
<
std
::
string
>
cols_to_map
=
{};
std
::
map
<
std
::
string
,
std
::
pair
<
TensorShape
,
std
::
shared_ptr
<
Tensor
>>>
pad_map
;
bool
pad
=
false
;
auto
ds
=
std
::
make_shared
<
BatchDataset
>
(
batch_size
,
drop_remainder
,
pad
,
cols_to_map
,
pad_map
);
if
(
!
ds
->
ValidateParams
())
{
return
nullptr
;
}
ds
->
children
.
push_back
(
shared_from_this
());
return
ds
;
}
// Function to create Repeat dataset.
std
::
shared_ptr
<
Dataset
>
Dataset
::
Repeat
(
int32_t
count
)
{
// Workaround for repeat == 1, do not inject repeat.
if
(
count
==
1
)
{
return
shared_from_this
();
}
auto
ds
=
std
::
make_shared
<
RepeatDataset
>
(
count
);
if
(
!
ds
->
ValidateParams
())
{
return
nullptr
;
}
ds
->
children
.
push_back
(
shared_from_this
());
return
ds
;
}
// Function to create a Map dataset.
std
::
shared_ptr
<
MapDataset
>
Dataset
::
Map
(
std
::
vector
<
std
::
shared_ptr
<
TensorOperation
>>
operations
,
std
::
vector
<
std
::
string
>
input_columns
,
std
::
vector
<
std
::
string
>
output_columns
,
const
std
::
vector
<
std
::
string
>
&
project_columns
)
{
auto
ds
=
std
::
make_shared
<
MapDataset
>
(
operations
,
input_columns
,
output_columns
,
project_columns
);
if
(
!
ds
->
ValidateParams
())
{
return
nullptr
;
}
ds
->
children
.
push_back
(
shared_from_this
());
return
ds
;
}
// Function to create a ShuffleOp
std
::
shared_ptr
<
ShuffleDataset
>
Dataset
::
Shuffle
(
int32_t
shuffle_size
)
{
// Pass in reshuffle_each_epoch with true
auto
ds
=
std
::
make_shared
<
ShuffleDataset
>
(
shuffle_size
,
true
);
if
(
!
ds
->
ValidateParams
())
{
return
nullptr
;
}
ds
->
children
.
push_back
(
shared_from_this
());
return
ds
;
}
// Function to create a ProjectDataset.
std
::
shared_ptr
<
ProjectDataset
>
Dataset
::
Project
(
const
std
::
vector
<
std
::
string
>
&
columns
)
{
auto
ds
=
std
::
make_shared
<
ProjectDataset
>
(
columns
);
// Call derived class validation method.
if
(
!
ds
->
ValidateParams
())
{
return
nullptr
;
}
ds
->
children
.
push_back
(
shared_from_this
());
return
ds
;
}
// Helper function to create default RandomSampler.
std
::
shared_ptr
<
SamplerObj
>
CreateDefaultSampler
()
{
int32_t
num_samples
=
0
;
// 0 means to sample all ids.
bool
replacement
=
false
;
return
std
::
make_shared
<
RandomSamplerObj
>
(
replacement
,
num_samples
);
}
/* ####################################### Derived Dataset classes ################################# */
ImageFolderDataset
::
ImageFolderDataset
(
std
::
string
dataset_dir
,
bool
decode
,
std
::
shared_ptr
<
SamplerObj
>
sampler
,
bool
recursive
,
std
::
set
<
std
::
string
>
extensions
,
std
::
map
<
std
::
string
,
int32_t
>
class_indexing
)
:
dataset_dir_
(
dataset_dir
),
decode_
(
decode
),
sampler_
(
sampler
),
recursive_
(
recursive
),
class_indexing_
(
class_indexing
),
exts_
(
extensions
)
{}
bool
ImageFolderDataset
::
ValidateParams
()
{
if
(
dataset_dir_
.
empty
())
{
MS_LOG
(
ERROR
)
<<
"No dataset path is specified."
;
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
ImageFolderDataset
::
Build
()
{
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
// If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
if
(
sampler_
==
nullptr
)
{
sampler_
=
CreateDefaultSampler
();
}
// Do internal Schema generation.
// This arg is exist in ImageFolderOp, but not externalized (in Python API).
std
::
unique_ptr
<
DataSchema
>
schema
=
std
::
make_unique
<
DataSchema
>
();
TensorShape
scalar
=
TensorShape
::
CreateScalar
();
RETURN_NULL_IF_ERROR
(
schema
->
AddColumn
(
ColDescriptor
(
"image"
,
DataType
(
DataType
::
DE_UINT8
),
TensorImpl
::
kFlexible
,
1
)));
RETURN_NULL_IF_ERROR
(
schema
->
AddColumn
(
ColDescriptor
(
"label"
,
DataType
(
DataType
::
DE_INT32
),
TensorImpl
::
kFlexible
,
0
,
&
scalar
)));
node_ops
.
push_back
(
std
::
make_shared
<
ImageFolderOp
>
(
num_workers_
,
rows_per_buffer_
,
dataset_dir_
,
connector_que_size_
,
recursive_
,
decode_
,
exts_
,
class_indexing_
,
std
::
move
(
schema
),
std
::
move
(
sampler_
->
Build
())));
return
std
::
make_shared
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
(
node_ops
);
}
MnistDataset
::
MnistDataset
(
std
::
string
dataset_dir
,
std
::
shared_ptr
<
SamplerObj
>
sampler
)
:
dataset_dir_
(
dataset_dir
),
sampler_
(
sampler
)
{}
bool
MnistDataset
::
ValidateParams
()
{
if
(
dataset_dir_
.
empty
())
{
MS_LOG
(
ERROR
)
<<
"No dataset path is specified."
;
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
MnistDataset
::
Build
()
{
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
// If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
if
(
sampler_
==
nullptr
)
{
sampler_
=
CreateDefaultSampler
();
}
// Do internal Schema generation.
auto
schema
=
std
::
make_unique
<
DataSchema
>
();
RETURN_NULL_IF_ERROR
(
schema
->
AddColumn
(
ColDescriptor
(
"image"
,
DataType
(
DataType
::
DE_UINT8
),
TensorImpl
::
kCv
,
1
)));
TensorShape
scalar
=
TensorShape
::
CreateScalar
();
RETURN_NULL_IF_ERROR
(
schema
->
AddColumn
(
ColDescriptor
(
"label"
,
DataType
(
DataType
::
DE_UINT32
),
TensorImpl
::
kFlexible
,
0
,
&
scalar
)));
node_ops
.
push_back
(
std
::
make_shared
<
MnistOp
>
(
num_workers_
,
rows_per_buffer_
,
dataset_dir_
,
connector_que_size_
,
std
::
move
(
schema
),
std
::
move
(
sampler_
->
Build
())));
return
std
::
make_shared
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
(
node_ops
);
}
BatchDataset
::
BatchDataset
(
int32_t
batch_size
,
bool
drop_remainder
,
bool
pad
,
std
::
vector
<
std
::
string
>
cols_to_map
,
std
::
map
<
std
::
string
,
std
::
pair
<
TensorShape
,
std
::
shared_ptr
<
Tensor
>>>
pad_map
)
:
batch_size_
(
batch_size
),
drop_remainder_
(
drop_remainder
),
pad_
(
pad
),
cols_to_map_
(
cols_to_map
),
pad_map_
(
pad_map
)
{}
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
BatchDataset
::
Build
()
{
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
#ifdef ENABLE_PYTHON
py
::
function
noop
;
node_ops
.
push_back
(
std
::
make_shared
<
BatchOp
>
(
batch_size_
,
drop_remainder_
,
pad_
,
connector_que_size_
,
num_workers_
,
cols_to_map_
,
noop
,
noop
,
pad_map_
));
#else
node_ops
.
push_back
(
std
::
make_shared
<
BatchOp
>
(
batch_size_
,
drop_remainder_
,
pad_
,
connector_que_size_
,
num_workers_
,
cols_to_map_
,
pad_map_
));
#endif
return
std
::
make_shared
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
(
node_ops
);
}
bool
BatchDataset
::
ValidateParams
()
{
if
(
batch_size_
<=
0
)
{
return
false
;
}
return
true
;
}
RepeatDataset
::
RepeatDataset
(
uint32_t
count
)
:
repeat_count_
(
count
)
{}
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
RepeatDataset
::
Build
()
{
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
node_ops
.
push_back
(
std
::
make_shared
<
RepeatOp
>
(
repeat_count_
));
return
std
::
make_shared
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
(
node_ops
);
}
bool
RepeatDataset
::
ValidateParams
()
{
if
(
repeat_count_
<=
0
)
{
return
false
;
}
return
true
;
}
MapDataset
::
MapDataset
(
std
::
vector
<
std
::
shared_ptr
<
TensorOperation
>>
operations
,
std
::
vector
<
std
::
string
>
input_columns
,
std
::
vector
<
std
::
string
>
output_columns
,
const
std
::
vector
<
std
::
string
>
&
project_columns
)
:
operations_
(
operations
),
input_columns_
(
input_columns
),
output_columns_
(
output_columns
),
project_columns_
(
project_columns
)
{}
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
MapDataset
::
Build
()
{
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
// Currently default is true, and this is not exposed to user.
bool
perf_mode
=
true
;
std
::
vector
<
std
::
shared_ptr
<
TensorOp
>>
tensor_ops
;
// Build tensorOp from tensorOperation vector
// This is to ensure each iterator hold its own copy of the tensorOp objects.
(
void
)
std
::
transform
(
operations_
.
begin
(),
operations_
.
end
(),
std
::
back_inserter
(
tensor_ops
),
[](
std
::
shared_ptr
<
TensorOperation
>
operation
)
->
std
::
shared_ptr
<
TensorOp
>
{
return
operation
->
Build
();
});
// This parameter will be removed with next rebase
std
::
vector
<
std
::
string
>
col_orders
;
auto
map_op
=
std
::
make_shared
<
MapOp
>
(
input_columns_
,
output_columns_
,
tensor_ops
,
num_workers_
,
connector_que_size_
,
perf_mode
);
if
(
!
project_columns_
.
empty
())
{
auto
project_op
=
std
::
make_shared
<
ProjectOp
>
(
project_columns_
);
node_ops
.
push_back
(
project_op
);
}
node_ops
.
push_back
(
map_op
);
return
std
::
make_shared
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
(
node_ops
);
}
bool
MapDataset
::
ValidateParams
()
{
if
(
operations_
.
empty
())
{
return
false
;
}
return
true
;
}
// Constructor for ShuffleDataset
ShuffleDataset
::
ShuffleDataset
(
int32_t
shuffle_size
,
bool
reset_every_epoch
)
:
shuffle_size_
(
shuffle_size
),
shuffle_seed_
(
GetSeed
()),
reset_every_epoch_
(
reset_every_epoch
)
{}
// Function to build the ShuffleOp
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
ShuffleDataset
::
Build
()
{
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
node_ops
.
push_back
(
std
::
make_shared
<
ShuffleOp
>
(
shuffle_size_
,
shuffle_seed_
,
connector_que_size_
,
reset_every_epoch_
,
rows_per_buffer_
));
return
std
::
make_shared
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
(
node_ops
);
}
// Function to validate the parameters for ShuffleDataset
bool
ShuffleDataset
::
ValidateParams
()
{
if
(
shuffle_size_
<=
1
)
{
MS_LOG
(
ERROR
)
<<
"ShuffleDataset: Invalid input, shuffle_size: "
<<
shuffle_size_
;
return
false
;
}
return
true
;
}
// Constructor for Cifar10Dataset
Cifar10Dataset
::
Cifar10Dataset
(
const
std
::
string
&
dataset_dir
,
int32_t
num_samples
,
std
::
shared_ptr
<
SamplerObj
>
sampler
)
:
dataset_dir_
(
dataset_dir
),
num_samples_
(
num_samples
),
sampler_
(
sampler
)
{}
bool
Cifar10Dataset
::
ValidateParams
()
{
if
(
dataset_dir_
.
empty
())
{
MS_LOG
(
ERROR
)
<<
"No dataset path is specified."
;
return
false
;
}
if
(
num_samples_
<
0
)
{
MS_LOG
(
ERROR
)
<<
"Number of samples cannot be negative"
;
return
false
;
}
return
true
;
}
// Function to build CifarOp
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
Cifar10Dataset
::
Build
()
{
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if
(
sampler_
==
nullptr
)
{
sampler_
=
CreateDefaultSampler
();
}
// Do internal Schema generation.
auto
schema
=
std
::
make_unique
<
DataSchema
>
();
RETURN_NULL_IF_ERROR
(
schema
->
AddColumn
(
ColDescriptor
(
"image"
,
DataType
(
DataType
::
DE_UINT8
),
TensorImpl
::
kCv
,
1
)));
TensorShape
scalar
=
TensorShape
::
CreateScalar
();
RETURN_NULL_IF_ERROR
(
schema
->
AddColumn
(
ColDescriptor
(
"label"
,
DataType
(
DataType
::
DE_UINT32
),
TensorImpl
::
kFlexible
,
0
,
&
scalar
)));
node_ops
.
push_back
(
std
::
make_shared
<
CifarOp
>
(
CifarOp
::
CifarType
::
kCifar10
,
num_workers_
,
rows_per_buffer_
,
dataset_dir_
,
connector_que_size_
,
std
::
move
(
schema
),
std
::
move
(
sampler_
->
Build
())));
return
std
::
make_shared
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
(
node_ops
);
}
// Function to build ProjectOp
ProjectDataset
::
ProjectDataset
(
const
std
::
vector
<
std
::
string
>
&
columns
)
:
columns_
(
columns
)
{}
bool
ProjectDataset
::
ValidateParams
()
{
if
(
columns_
.
empty
())
{
MS_LOG
(
ERROR
)
<<
"No columns are specified."
;
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
ProjectDataset
::
Build
()
{
// A vector containing shared pointer to the Dataset Ops that this object will create
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>
node_ops
;
node_ops
.
push_back
(
std
::
make_shared
<
ProjectOp
>
(
columns_
));
return
std
::
make_shared
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
(
node_ops
);
}
}
// namespace api
}
// namespace dataset
}
// namespace mindspore
mindspore/ccsrc/dataset/api/iterator.cc
0 → 100644
浏览文件 @
bd5a777f
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dataset/include/iterator.h"
#include "dataset/core/client.h"
#include "dataset/include/datasets.h"
namespace
mindspore
{
namespace
dataset
{
namespace
api
{
// Get the next row from the data pipeline.
void
Iterator
::
GetNextRow
(
TensorMap
*
row
)
{
Status
rc
=
iterator_
->
GetNextAsMap
(
row
);
if
(
rc
.
IsError
())
{
MS_LOG
(
ERROR
)
<<
"GetNextRow: Failed to get next row."
;
row
->
clear
();
}
}
// Shut down the data pipeline.
void
Iterator
::
Stop
()
{
// Releasing the iterator_ unique_ptre. This should trigger the destructor of iterator_.
iterator_
.
reset
();
// Release ownership of tree_ shared pointer. This will decrement the ref count.
tree_
.
reset
();
}
// Function to build and launch the execution tree.
Status
Iterator
::
BuildAndLaunchTree
(
std
::
shared_ptr
<
Dataset
>
ds
)
{
// One time init
Status
rc
;
rc
=
GlobalInit
();
RETURN_IF_NOT_OK
(
rc
);
// Instantiate the execution tree
tree_
=
std
::
make_shared
<
ExecutionTree
>
();
// Iterative BFS converting Dataset tree into runtime Execution tree.
std
::
queue
<
std
::
pair
<
std
::
shared_ptr
<
Dataset
>
,
std
::
shared_ptr
<
DatasetOp
>>>
q
;
if
(
ds
!=
nullptr
)
{
// Convert the current root node.
auto
root_op
=
ds
->
Build
()
->
front
();
RETURN_UNEXPECTED_IF_NULL
(
root_op
);
RETURN_IF_NOT_OK
(
tree_
->
AssociateNode
(
root_op
));
q
.
push
(
std
::
make_pair
(
ds
,
root_op
));
// Traverse down to the children and convert them to the corresponding DatasetOps (i.e. execution tree nodes)
while
(
!
q
.
empty
())
{
auto
node_pair
=
q
.
front
();
q
.
pop
();
// Iterate through all the direct children of the first element in our BFS queue
for
(
auto
child
:
node_pair
.
first
->
children
)
{
auto
child_ops
=
child
->
Build
();
RETURN_UNEXPECTED_IF_NULL
(
child_ops
);
auto
node_op
=
node_pair
.
second
;
// Iterate through all the DatasetOps returned by calling Build on the last Dataset object, associate them
// with the execution tree and add the child and parent relationship between the nodes
// Note that some Dataset objects might return more than one DatasetOps
// e.g. MapDataset will return MapOp and ProjectOp if project_columns is set for MapDataset
for
(
auto
child_op
:
*
child_ops
)
{
RETURN_IF_NOT_OK
(
tree_
->
AssociateNode
(
child_op
));
RETURN_IF_NOT_OK
(
node_op
->
AddChild
(
child_op
));
node_op
=
child_op
;
}
// Add the child and the last element of the returned DatasetOps (which is now the leaf node in our current
// execution tree) to the BFS queue
q
.
push
(
std
::
make_pair
(
child
,
child_ops
->
back
()));
}
}
RETURN_IF_NOT_OK
(
tree_
->
AssignRoot
(
root_op
));
}
// Launch the execution tree.
RETURN_IF_NOT_OK
(
tree_
->
Prepare
());
RETURN_IF_NOT_OK
(
tree_
->
Launch
());
iterator_
=
std
::
make_unique
<
DatasetIterator
>
(
tree_
);
RETURN_UNEXPECTED_IF_NULL
(
iterator_
);
return
rc
;
}
}
// namespace api
}
// namespace dataset
}
// namespace mindspore
mindspore/ccsrc/dataset/api/python_bindings.cc
浏览文件 @
bd5a777f
...
...
@@ -297,7 +297,7 @@ void bindTensor(py::module *m) {
}))
.
def_buffer
([](
Tensor
&
tensor
)
{
py
::
buffer_info
info
;
THROW_IF_ERROR
(
Tensor
::
GetBufferInfo
(
tensor
,
&
info
));
THROW_IF_ERROR
(
Tensor
::
GetBufferInfo
(
&
tensor
,
&
info
));
return
info
;
})
.
def
(
"__str__"
,
&
Tensor
::
ToString
)
...
...
@@ -311,7 +311,7 @@ void bindTensor(py::module *m) {
return
res
;
}
py
::
buffer_info
info
;
THROW_IF_ERROR
(
Tensor
::
GetBufferInfo
(
tensor
,
&
info
));
THROW_IF_ERROR
(
Tensor
::
GetBufferInfo
(
&
tensor
,
&
info
));
return
py
::
array
(
pybind11
::
dtype
(
info
),
info
.
shape
,
info
.
strides
,
info
.
ptr
,
t
);
});
...
...
mindspore/ccsrc/dataset/api/samplers.cc
0 → 100644
浏览文件 @
bd5a777f
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dataset/include/samplers.h"
#include "dataset/engine/datasetops/source/sampler/sampler.h"
#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
namespace
mindspore
{
namespace
dataset
{
namespace
api
{
SamplerObj
::
SamplerObj
()
{}
/// Function to create a Distributed Sampler.
std
::
shared_ptr
<
DistributedSamplerObj
>
DistributedSampler
(
int64_t
num_shards
,
int64_t
shard_id
,
bool
shuffle
,
int64_t
num_samples
,
uint32_t
seed
)
{
auto
sampler
=
std
::
make_shared
<
DistributedSamplerObj
>
(
num_shards
,
shard_id
,
shuffle
,
num_samples
,
seed
);
// Input validation
if
(
!
sampler
->
ValidateParams
())
{
return
nullptr
;
}
return
sampler
;
}
/// Function to create a PK Sampler.
std
::
shared_ptr
<
PKSamplerObj
>
PKSampler
(
int64_t
num_val
,
bool
shuffle
,
int64_t
num_samples
)
{
auto
sampler
=
std
::
make_shared
<
PKSamplerObj
>
(
num_val
,
shuffle
,
num_samples
);
// Input validation
if
(
!
sampler
->
ValidateParams
())
{
return
nullptr
;
}
return
sampler
;
}
/// Function to create a Random Sampler.
std
::
shared_ptr
<
RandomSamplerObj
>
RandomSampler
(
bool
replacement
,
int64_t
num_samples
)
{
auto
sampler
=
std
::
make_shared
<
RandomSamplerObj
>
(
replacement
,
num_samples
);
// Input validation
if
(
!
sampler
->
ValidateParams
())
{
return
nullptr
;
}
return
sampler
;
}
/// Function to create a Sequential Sampler.
std
::
shared_ptr
<
SequentialSamplerObj
>
SequentialSampler
(
int64_t
start_index
,
int64_t
num_samples
)
{
auto
sampler
=
std
::
make_shared
<
SequentialSamplerObj
>
(
start_index
,
num_samples
);
// Input validation
if
(
!
sampler
->
ValidateParams
())
{
return
nullptr
;
}
return
sampler
;
}
/// Function to create a Subset Random Sampler.
std
::
shared_ptr
<
SubsetRandomSamplerObj
>
SubsetRandomSampler
(
const
std
::
vector
<
int64_t
>
&
indices
,
int64_t
num_samples
)
{
auto
sampler
=
std
::
make_shared
<
SubsetRandomSamplerObj
>
(
indices
,
num_samples
);
// Input validation
if
(
!
sampler
->
ValidateParams
())
{
return
nullptr
;
}
return
sampler
;
}
/// Function to create a Weighted Random Sampler.
std
::
shared_ptr
<
WeightedRandomSamplerObj
>
WeightedRandomSampler
(
const
std
::
vector
<
double
>
&
weights
,
int64_t
num_samples
,
bool
replacement
)
{
auto
sampler
=
std
::
make_shared
<
WeightedRandomSamplerObj
>
(
weights
,
num_samples
,
replacement
);
// Input validation
if
(
!
sampler
->
ValidateParams
())
{
return
nullptr
;
}
return
sampler
;
}
/* ####################################### Derived Sampler classes ################################# */
// DistributedSampler
DistributedSamplerObj
::
DistributedSamplerObj
(
int64_t
num_shards
,
int64_t
shard_id
,
bool
shuffle
,
int64_t
num_samples
,
uint32_t
seed
)
:
num_shards_
(
num_shards
),
shard_id_
(
shard_id
),
shuffle_
(
shuffle
),
num_samples_
(
num_samples
),
seed_
(
seed
)
{}
bool
DistributedSamplerObj
::
ValidateParams
()
{
if
(
num_shards_
<=
0
)
{
MS_LOG
(
ERROR
)
<<
"DistributedSampler: invalid num_shards: "
<<
num_shards_
;
return
false
;
}
if
(
shard_id_
<
0
||
shard_id_
>=
num_shards_
)
{
MS_LOG
(
ERROR
)
<<
"DistributedSampler: invalid input, shard_id: "
<<
shard_id_
<<
", num_shards: "
<<
num_shards_
;
return
false
;
}
if
(
num_samples_
<
0
)
{
MS_LOG
(
ERROR
)
<<
"DistributedSampler: invalid num_samples: "
<<
num_samples_
;
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
Sampler
>
DistributedSamplerObj
::
Build
()
{
return
std
::
make_shared
<
dataset
::
DistributedSampler
>
(
num_samples_
,
num_shards_
,
shard_id_
,
shuffle_
,
seed_
);
}
// PKSampler
PKSamplerObj
::
PKSamplerObj
(
int64_t
num_val
,
bool
shuffle
,
int64_t
num_samples
)
:
num_val_
(
num_val
),
shuffle_
(
shuffle
),
num_samples_
(
num_samples
)
{}
bool
PKSamplerObj
::
ValidateParams
()
{
if
(
num_val_
<=
0
)
{
MS_LOG
(
ERROR
)
<<
"PKSampler: invalid num_val: "
<<
num_val_
;
return
false
;
}
if
(
num_samples_
<
0
)
{
MS_LOG
(
ERROR
)
<<
"PKSampler: invalid num_samples: "
<<
num_samples_
;
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
Sampler
>
PKSamplerObj
::
Build
()
{
return
std
::
make_shared
<
dataset
::
PKSampler
>
(
num_samples_
,
num_val_
,
shuffle_
);
}
// RandomSampler
RandomSamplerObj
::
RandomSamplerObj
(
bool
replacement
,
int64_t
num_samples
)
:
replacement_
(
replacement
),
num_samples_
(
num_samples
)
{}
bool
RandomSamplerObj
::
ValidateParams
()
{
if
(
num_samples_
<
0
)
{
MS_LOG
(
ERROR
)
<<
"RandomSampler: invalid num_samples: "
<<
num_samples_
;
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
Sampler
>
RandomSamplerObj
::
Build
()
{
bool
reshuffle_each_epoch
=
true
;
auto
sampler
=
std
::
make_shared
<
dataset
::
RandomSampler
>
(
num_samples_
,
replacement_
,
reshuffle_each_epoch
);
return
sampler
;
}
// SequentialSampler
SequentialSamplerObj
::
SequentialSamplerObj
(
int64_t
start_index
,
int64_t
num_samples
)
:
start_index_
(
start_index
),
num_samples_
(
num_samples
)
{}
bool
SequentialSamplerObj
::
ValidateParams
()
{
if
(
num_samples_
<
0
)
{
MS_LOG
(
ERROR
)
<<
"SequentialSampler: invalid num_samples: "
<<
num_samples_
;
return
false
;
}
if
(
start_index_
<
0
)
{
MS_LOG
(
ERROR
)
<<
"SequentialSampler: invalid start_index: "
<<
start_index_
;
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
Sampler
>
SequentialSamplerObj
::
Build
()
{
auto
sampler
=
std
::
make_shared
<
dataset
::
SequentialSampler
>
(
num_samples_
,
start_index_
);
return
sampler
;
}
// SubsetRandomSampler
SubsetRandomSamplerObj
::
SubsetRandomSamplerObj
(
const
std
::
vector
<
int64_t
>
&
indices
,
int64_t
num_samples
)
:
indices_
(
indices
),
num_samples_
(
num_samples
)
{}
bool
SubsetRandomSamplerObj
::
ValidateParams
()
{
if
(
num_samples_
<
0
)
{
MS_LOG
(
ERROR
)
<<
"SubsetRandomSampler: invalid num_samples: "
<<
num_samples_
;
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
Sampler
>
SubsetRandomSamplerObj
::
Build
()
{
auto
sampler
=
std
::
make_shared
<
dataset
::
SubsetRandomSampler
>
(
num_samples_
,
indices_
);
return
sampler
;
}
// WeightedRandomSampler
WeightedRandomSamplerObj
::
WeightedRandomSamplerObj
(
const
std
::
vector
<
double
>
&
weights
,
int64_t
num_samples
,
bool
replacement
)
:
weights_
(
weights
),
num_samples_
(
num_samples
),
replacement_
(
replacement
)
{}
bool
WeightedRandomSamplerObj
::
ValidateParams
()
{
if
(
num_samples_
<
0
)
{
MS_LOG
(
ERROR
)
<<
"WeightedRandomSampler: invalid num_samples: "
<<
num_samples_
;
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
Sampler
>
WeightedRandomSamplerObj
::
Build
()
{
auto
sampler
=
std
::
make_shared
<
dataset
::
WeightedRandomSampler
>
(
num_samples_
,
weights_
,
replacement_
);
return
sampler
;
}
}
// namespace api
}
// namespace dataset
}
// namespace mindspore
mindspore/ccsrc/dataset/api/transforms.cc
0 → 100644
浏览文件 @
bd5a777f
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dataset/include/transforms.h"
#include "dataset/kernels/image/image_utils.h"
#include "dataset/kernels/image/normalize_op.h"
#include "dataset/kernels/image/decode_op.h"
#include "dataset/kernels/image/resize_op.h"
#include "dataset/kernels/image/random_crop_op.h"
#include "dataset/kernels/image/center_crop_op.h"
#include "dataset/kernels/image/uniform_aug_op.h"
#include "dataset/kernels/image/random_horizontal_flip_op.h"
#include "dataset/kernels/image/random_vertical_flip_op.h"
#include "dataset/kernels/image/random_rotation_op.h"
#include "dataset/kernels/image/cut_out_op.h"
#include "dataset/kernels/image/random_color_adjust_op.h"
#include "dataset/kernels/image/pad_op.h"
namespace
mindspore
{
namespace
dataset
{
namespace
api
{
TensorOperation
::
TensorOperation
()
{}
// Transform operations for computer vision.
namespace
vision
{
// Function to create NormalizeOperation.
std
::
shared_ptr
<
NormalizeOperation
>
Normalize
(
std
::
vector
<
float
>
mean
,
std
::
vector
<
float
>
std
)
{
auto
op
=
std
::
make_shared
<
NormalizeOperation
>
(
mean
,
std
);
// Input validation
if
(
!
op
->
ValidateParams
())
{
return
nullptr
;
}
return
op
;
}
// Function to create DecodeOperation.
std
::
shared_ptr
<
DecodeOperation
>
Decode
(
bool
rgb
)
{
auto
op
=
std
::
make_shared
<
DecodeOperation
>
(
rgb
);
// Input validation
if
(
!
op
->
ValidateParams
())
{
return
nullptr
;
}
return
op
;
}
// Function to create ResizeOperation.
std
::
shared_ptr
<
ResizeOperation
>
Resize
(
std
::
vector
<
int32_t
>
size
,
InterpolationMode
interpolation
)
{
auto
op
=
std
::
make_shared
<
ResizeOperation
>
(
size
,
interpolation
);
// Input validation
if
(
!
op
->
ValidateParams
())
{
return
nullptr
;
}
return
op
;
}
// Function to create RandomCropOperation.
std
::
shared_ptr
<
RandomCropOperation
>
RandomCrop
(
std
::
vector
<
int32_t
>
size
,
std
::
vector
<
int32_t
>
padding
,
bool
pad_if_needed
,
std
::
vector
<
uint8_t
>
fill_value
)
{
auto
op
=
std
::
make_shared
<
RandomCropOperation
>
(
size
,
padding
,
pad_if_needed
,
fill_value
);
// Input validation
if
(
!
op
->
ValidateParams
())
{
return
nullptr
;
}
return
op
;
}
// Function to create CenterCropOperation.
std
::
shared_ptr
<
CenterCropOperation
>
CenterCrop
(
std
::
vector
<
int32_t
>
size
)
{
auto
op
=
std
::
make_shared
<
CenterCropOperation
>
(
size
);
// Input validation
if
(
!
op
->
ValidateParams
())
{
return
nullptr
;
}
return
op
;
}
// Function to create UniformAugOperation.
std
::
shared_ptr
<
UniformAugOperation
>
UniformAugment
(
std
::
vector
<
std
::
shared_ptr
<
TensorOperation
>>
operations
,
int32_t
num_ops
)
{
auto
op
=
std
::
make_shared
<
UniformAugOperation
>
(
operations
,
num_ops
);
// Input validation
if
(
!
op
->
ValidateParams
())
{
return
nullptr
;
}
return
op
;
}
// Function to create RandomHorizontalFlipOperation.
std
::
shared_ptr
<
RandomHorizontalFlipOperation
>
RandomHorizontalFlip
(
float
prob
)
{
auto
op
=
std
::
make_shared
<
RandomHorizontalFlipOperation
>
(
prob
);
// Input validation
if
(
!
op
->
ValidateParams
())
{
return
nullptr
;
}
return
op
;
}
// Function to create RandomVerticalFlipOperation.
std
::
shared_ptr
<
RandomVerticalFlipOperation
>
RandomVerticalFlip
(
float
prob
)
{
auto
op
=
std
::
make_shared
<
RandomVerticalFlipOperation
>
(
prob
);
// Input validation
if
(
!
op
->
ValidateParams
())
{
return
nullptr
;
}
return
op
;
}
// Function to create RandomRotationOperation.
std
::
shared_ptr
<
RandomRotationOperation
>
RandomRotation
(
std
::
vector
<
float
>
degrees
,
InterpolationMode
resample
,
bool
expand
,
std
::
vector
<
float
>
center
,
std
::
vector
<
uint8_t
>
fill_value
)
{
auto
op
=
std
::
make_shared
<
RandomRotationOperation
>
(
degrees
,
resample
,
expand
,
center
,
fill_value
);
// Input validation
if
(
!
op
->
ValidateParams
())
{
return
nullptr
;
}
return
op
;
}
// Function to create PadOperation.
std
::
shared_ptr
<
PadOperation
>
Pad
(
std
::
vector
<
int32_t
>
padding
,
std
::
vector
<
uint8_t
>
fill_value
,
BorderType
padding_mode
)
{
auto
op
=
std
::
make_shared
<
PadOperation
>
(
padding
,
fill_value
,
padding_mode
);
// Input validation
if
(
!
op
->
ValidateParams
())
{
return
nullptr
;
}
return
op
;
}
// Function to create CutOutOp.
std
::
shared_ptr
<
CutOutOperation
>
CutOut
(
int32_t
length
,
int32_t
num_patches
)
{
auto
op
=
std
::
make_shared
<
CutOutOperation
>
(
length
,
num_patches
);
// Input validation
if
(
!
op
->
ValidateParams
())
{
return
nullptr
;
}
return
op
;
}
// Function to create RandomColorAdjustOperation.
std
::
shared_ptr
<
RandomColorAdjustOperation
>
RandomColorAdjust
(
std
::
vector
<
float
>
brightness
,
std
::
vector
<
float
>
contrast
,
std
::
vector
<
float
>
saturation
,
std
::
vector
<
float
>
hue
)
{
auto
op
=
std
::
make_shared
<
RandomColorAdjustOperation
>
(
brightness
,
contrast
,
saturation
,
hue
);
// Input validation
if
(
!
op
->
ValidateParams
())
{
return
nullptr
;
}
return
op
;
}
/* ####################################### Derived TensorOperation classes ################################# */
// NormalizeOperation
NormalizeOperation
::
NormalizeOperation
(
std
::
vector
<
float
>
mean
,
std
::
vector
<
float
>
std
)
:
mean_
(
mean
),
std_
(
std
)
{}
bool
NormalizeOperation
::
ValidateParams
()
{
if
(
mean_
.
size
()
!=
3
)
{
MS_LOG
(
ERROR
)
<<
"Normalize: mean vector has incorrect size: "
<<
mean_
.
size
();
return
false
;
}
if
(
std_
.
size
()
!=
3
)
{
MS_LOG
(
ERROR
)
<<
"Normalize: std vector has incorrect size: "
<<
std_
.
size
();
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
TensorOp
>
NormalizeOperation
::
Build
()
{
return
std
::
make_shared
<
NormalizeOp
>
(
mean_
[
0
],
mean_
[
1
],
mean_
[
2
],
std_
[
0
],
std_
[
1
],
std_
[
2
]);
}
// DecodeOperation
DecodeOperation
::
DecodeOperation
(
bool
rgb
)
:
rgb_
(
rgb
)
{}
bool
DecodeOperation
::
ValidateParams
()
{
return
true
;
}
std
::
shared_ptr
<
TensorOp
>
DecodeOperation
::
Build
()
{
return
std
::
make_shared
<
DecodeOp
>
(
rgb_
);
}
// ResizeOperation
ResizeOperation
::
ResizeOperation
(
std
::
vector
<
int32_t
>
size
,
InterpolationMode
interpolation
)
:
size_
(
size
),
interpolation_
(
interpolation
)
{}
bool
ResizeOperation
::
ValidateParams
()
{
if
(
size_
.
empty
()
||
size_
.
size
()
>
2
)
{
MS_LOG
(
ERROR
)
<<
"Resize: size vector has incorrect size: "
<<
size_
.
size
();
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
TensorOp
>
ResizeOperation
::
Build
()
{
int32_t
height
=
size_
[
0
];
int32_t
width
=
0
;
// User specified the width value.
if
(
size_
.
size
()
==
2
)
{
width
=
size_
[
1
];
}
return
std
::
make_shared
<
ResizeOp
>
(
height
,
width
,
interpolation_
);
}
// RandomCropOperation
RandomCropOperation
::
RandomCropOperation
(
std
::
vector
<
int32_t
>
size
,
std
::
vector
<
int32_t
>
padding
,
bool
pad_if_needed
,
std
::
vector
<
uint8_t
>
fill_value
)
:
size_
(
size
),
padding_
(
padding
),
pad_if_needed_
(
pad_if_needed
),
fill_value_
(
fill_value
)
{}
bool
RandomCropOperation
::
ValidateParams
()
{
if
(
size_
.
empty
()
||
size_
.
size
()
>
2
)
{
MS_LOG
(
ERROR
)
<<
"RandomCrop: size vector has incorrect size: "
<<
size_
.
size
();
return
false
;
}
if
(
padding_
.
empty
()
||
padding_
.
size
()
!=
4
)
{
MS_LOG
(
ERROR
)
<<
"RandomCrop: padding vector has incorrect size: padding.size()"
;
return
false
;
}
if
(
fill_value_
.
empty
()
||
fill_value_
.
size
()
!=
3
)
{
MS_LOG
(
ERROR
)
<<
"RandomCrop: fill_value vector has incorrect size: fill_value.size()"
;
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
TensorOp
>
RandomCropOperation
::
Build
()
{
int32_t
crop_height
=
size_
[
0
];
int32_t
crop_width
=
0
;
int32_t
pad_top
=
padding_
[
0
];
int32_t
pad_bottom
=
padding_
[
1
];
int32_t
pad_left
=
padding_
[
2
];
int32_t
pad_right
=
padding_
[
3
];
uint8_t
fill_r
=
fill_value_
[
0
];
uint8_t
fill_g
=
fill_value_
[
1
];
uint8_t
fill_b
=
fill_value_
[
2
];
// User has specified the crop_width value.
if
(
size_
.
size
()
==
2
)
{
crop_width
=
size_
[
1
];
}
auto
tensor_op
=
std
::
make_shared
<
RandomCropOp
>
(
crop_height
,
crop_width
,
pad_top
,
pad_bottom
,
pad_left
,
pad_right
,
BorderType
::
kConstant
,
pad_if_needed_
,
fill_r
,
fill_g
,
fill_b
);
return
tensor_op
;
}
// CenterCropOperation
CenterCropOperation
::
CenterCropOperation
(
std
::
vector
<
int32_t
>
size
)
:
size_
(
size
)
{}
bool
CenterCropOperation
::
ValidateParams
()
{
if
(
size_
.
empty
()
||
size_
.
size
()
>
2
)
{
MS_LOG
(
ERROR
)
<<
"CenterCrop: size vector has incorrect size."
;
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
TensorOp
>
CenterCropOperation
::
Build
()
{
int32_t
crop_height
=
size_
[
0
];
int32_t
crop_width
=
0
;
// User has specified crop_width.
if
(
size_
.
size
()
==
2
)
{
crop_width
=
size_
[
1
];
}
std
::
shared_ptr
<
CenterCropOp
>
tensor_op
=
std
::
make_shared
<
CenterCropOp
>
(
crop_height
,
crop_width
);
return
tensor_op
;
}
// UniformAugOperation
UniformAugOperation
::
UniformAugOperation
(
std
::
vector
<
std
::
shared_ptr
<
TensorOperation
>>
operations
,
int32_t
num_ops
)
:
operations_
(
operations
),
num_ops_
(
num_ops
)
{}
bool
UniformAugOperation
::
ValidateParams
()
{
return
true
;
}
std
::
shared_ptr
<
TensorOp
>
UniformAugOperation
::
Build
()
{
std
::
vector
<
std
::
shared_ptr
<
TensorOp
>>
tensor_ops
;
(
void
)
std
::
transform
(
operations_
.
begin
(),
operations_
.
end
(),
std
::
back_inserter
(
tensor_ops
),
[](
std
::
shared_ptr
<
TensorOperation
>
op
)
->
std
::
shared_ptr
<
TensorOp
>
{
return
op
->
Build
();
});
std
::
shared_ptr
<
UniformAugOp
>
tensor_op
=
std
::
make_shared
<
UniformAugOp
>
(
tensor_ops
,
num_ops_
);
return
tensor_op
;
}
// RandomHorizontalFlipOperation
RandomHorizontalFlipOperation
::
RandomHorizontalFlipOperation
(
float
probability
)
:
probability_
(
probability
)
{}
bool
RandomHorizontalFlipOperation
::
ValidateParams
()
{
return
true
;
}
std
::
shared_ptr
<
TensorOp
>
RandomHorizontalFlipOperation
::
Build
()
{
std
::
shared_ptr
<
RandomHorizontalFlipOp
>
tensor_op
=
std
::
make_shared
<
RandomHorizontalFlipOp
>
(
probability_
);
return
tensor_op
;
}
// RandomVerticalFlipOperation
RandomVerticalFlipOperation
::
RandomVerticalFlipOperation
(
float
probability
)
:
probability_
(
probability
)
{}
bool
RandomVerticalFlipOperation
::
ValidateParams
()
{
return
true
;
}
std
::
shared_ptr
<
TensorOp
>
RandomVerticalFlipOperation
::
Build
()
{
std
::
shared_ptr
<
RandomVerticalFlipOp
>
tensor_op
=
std
::
make_shared
<
RandomVerticalFlipOp
>
(
probability_
);
return
tensor_op
;
}
// Function to create RandomRotationOperation.
RandomRotationOperation
::
RandomRotationOperation
(
std
::
vector
<
float
>
degrees
,
InterpolationMode
interpolation_mode
,
bool
expand
,
std
::
vector
<
float
>
center
,
std
::
vector
<
uint8_t
>
fill_value
)
:
degrees_
(
degrees
),
interpolation_mode_
(
interpolation_mode
),
expand_
(
expand
),
center_
(
center
),
fill_value_
(
fill_value
)
{}
bool
RandomRotationOperation
::
ValidateParams
()
{
if
(
degrees_
.
empty
()
||
degrees_
.
size
()
!=
2
)
{
MS_LOG
(
ERROR
)
<<
"RandomRotation: degrees vector has incorrect size: degrees.size()"
;
return
false
;
}
if
(
center_
.
empty
()
||
center_
.
size
()
!=
2
)
{
MS_LOG
(
ERROR
)
<<
"RandomRotation: center vector has incorrect size: center.size()"
;
return
false
;
}
if
(
fill_value_
.
empty
()
||
fill_value_
.
size
()
!=
3
)
{
MS_LOG
(
ERROR
)
<<
"RandomRotation: fill_value vector has incorrect size: fill_value.size()"
;
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
TensorOp
>
RandomRotationOperation
::
Build
()
{
std
::
shared_ptr
<
RandomRotationOp
>
tensor_op
=
std
::
make_shared
<
RandomRotationOp
>
(
degrees_
[
0
],
degrees_
[
1
],
center_
[
0
],
center_
[
1
],
interpolation_mode_
,
expand_
,
fill_value_
[
0
],
fill_value_
[
1
],
fill_value_
[
2
]);
return
tensor_op
;
}
// PadOperation
PadOperation
::
PadOperation
(
std
::
vector
<
int32_t
>
padding
,
std
::
vector
<
uint8_t
>
fill_value
,
BorderType
padding_mode
)
:
padding_
(
padding
),
fill_value_
(
fill_value
),
padding_mode_
(
padding_mode
)
{}
bool
PadOperation
::
ValidateParams
()
{
if
(
padding_
.
empty
()
||
padding_
.
size
()
==
3
||
padding_
.
size
()
>
4
)
{
MS_LOG
(
ERROR
)
<<
"Pad: padding vector has incorrect size: padding.size()"
;
return
false
;
}
if
(
fill_value_
.
empty
()
||
(
fill_value_
.
size
()
!=
1
&&
fill_value_
.
size
()
!=
3
))
{
MS_LOG
(
ERROR
)
<<
"Pad: fill_value vector has incorrect size: fill_value.size()"
;
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
TensorOp
>
PadOperation
::
Build
()
{
int32_t
pad_top
,
pad_bottom
,
pad_left
,
pad_right
;
switch
(
padding_
.
size
())
{
case
1
:
pad_left
=
padding_
[
0
];
pad_top
=
padding_
[
0
];
pad_right
=
padding_
[
0
];
pad_bottom
=
padding_
[
0
];
break
;
case
2
:
pad_left
=
padding_
[
0
];
pad_top
=
padding_
[
1
];
pad_right
=
padding_
[
0
];
pad_bottom
=
padding_
[
1
];
break
;
default:
pad_left
=
padding_
[
0
];
pad_top
=
padding_
[
1
];
pad_right
=
padding_
[
2
];
pad_bottom
=
padding_
[
3
];
}
uint8_t
fill_r
,
fill_g
,
fill_b
;
fill_r
=
fill_value_
[
0
];
fill_g
=
fill_value_
[
0
];
fill_b
=
fill_value_
[
0
];
if
(
fill_value_
.
size
()
==
3
)
{
fill_r
=
fill_value_
[
0
];
fill_g
=
fill_value_
[
1
];
fill_b
=
fill_value_
[
2
];
}
std
::
shared_ptr
<
PadOp
>
tensor_op
=
std
::
make_shared
<
PadOp
>
(
pad_top
,
pad_bottom
,
pad_left
,
pad_right
,
padding_mode_
,
fill_r
,
fill_g
,
fill_b
);
return
tensor_op
;
}
// CutOutOperation
CutOutOperation
::
CutOutOperation
(
int32_t
length
,
int32_t
num_patches
)
:
length_
(
length
),
num_patches_
(
num_patches
)
{}
bool
CutOutOperation
::
ValidateParams
()
{
if
(
length_
<
0
)
{
MS_LOG
(
ERROR
)
<<
"CutOut: length cannot be negative"
;
return
false
;
}
if
(
num_patches_
<
0
)
{
MS_LOG
(
ERROR
)
<<
"CutOut: number of patches cannot be negative"
;
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
TensorOp
>
CutOutOperation
::
Build
()
{
std
::
shared_ptr
<
CutOutOp
>
tensor_op
=
std
::
make_shared
<
CutOutOp
>
(
length_
,
length_
,
num_patches_
,
false
,
0
,
0
,
0
);
return
tensor_op
;
}
// RandomColorAdjustOperation.
RandomColorAdjustOperation
::
RandomColorAdjustOperation
(
std
::
vector
<
float
>
brightness
,
std
::
vector
<
float
>
contrast
,
std
::
vector
<
float
>
saturation
,
std
::
vector
<
float
>
hue
)
:
brightness_
(
brightness
),
contrast_
(
contrast
),
saturation_
(
saturation
),
hue_
(
hue
)
{}
bool
RandomColorAdjustOperation
::
ValidateParams
()
{
// Do some input validation.
if
(
brightness_
.
empty
()
||
brightness_
.
size
()
>
2
)
{
MS_LOG
(
ERROR
)
<<
"RandomColorAdjust: brightness must be a vector of one or two values"
;
return
false
;
}
if
(
contrast_
.
empty
()
||
contrast_
.
size
()
>
2
)
{
MS_LOG
(
ERROR
)
<<
"RandomColorAdjust: contrast must be a vector of one or two values"
;
return
false
;
}
if
(
saturation_
.
empty
()
||
saturation_
.
size
()
>
2
)
{
MS_LOG
(
ERROR
)
<<
"RandomColorAdjust: saturation must be a vector of one or two values"
;
return
false
;
}
if
(
hue_
.
empty
()
||
hue_
.
size
()
>
2
)
{
MS_LOG
(
ERROR
)
<<
"RandomColorAdjust: hue must be a vector of one or two values"
;
return
false
;
}
return
true
;
}
std
::
shared_ptr
<
TensorOp
>
RandomColorAdjustOperation
::
Build
()
{
float
brightness_lb
,
brightness_ub
,
contrast_lb
,
contrast_ub
,
saturation_lb
,
saturation_ub
,
hue_lb
,
hue_ub
;
brightness_lb
=
brightness_
[
0
];
brightness_ub
=
brightness_
[
0
];
if
(
brightness_
.
size
()
==
2
)
brightness_ub
=
brightness_
[
1
];
contrast_lb
=
contrast_
[
0
];
contrast_ub
=
contrast_
[
0
];
if
(
contrast_
.
size
()
==
2
)
contrast_ub
=
contrast_
[
1
];
saturation_lb
=
saturation_
[
0
];
saturation_ub
=
saturation_
[
0
];
if
(
saturation_
.
size
()
==
2
)
saturation_ub
=
saturation_
[
1
];
hue_lb
=
hue_
[
0
];
hue_ub
=
hue_
[
0
];
if
(
hue_
.
size
()
==
2
)
hue_ub
=
hue_
[
1
];
std
::
shared_ptr
<
RandomColorAdjustOp
>
tensor_op
=
std
::
make_shared
<
RandomColorAdjustOp
>
(
brightness_lb
,
brightness_ub
,
contrast_lb
,
contrast_ub
,
saturation_lb
,
saturation_ub
,
hue_lb
,
hue_ub
);
return
tensor_op
;
}
}
// namespace vision
}
// namespace api
}
// namespace dataset
}
// namespace mindspore
mindspore/ccsrc/dataset/core/CMakeLists.txt
浏览文件 @
bd5a777f
ms_protobuf_generate
(
EXAMPLE_SRCS EXAMPLE_HDRS example.proto
)
ms_protobuf_generate
(
FEATURE_SRCS FEATURE_HDRS feature.proto
)
file
(
GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE
${
CMAKE_CURRENT_SOURCE_DIR
}
"*.cc"
)
set_property
(
SOURCE
${
_CURRENT_SRC_FILES
}
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD
)
add_library
(
core OBJECT
${
EXAMPLE_SRCS
}
${
FEATURE_SRCS
}
set
(
DATASET_CORE_SRC_FILES
client.cc
config_manager.cc
cv_tensor.cc
...
...
@@ -13,6 +9,13 @@ add_library(core OBJECT
tensor.cc
tensor_row.cc
tensor_shape.cc
)
)
ms_protobuf_generate
(
EXAMPLE_SRCS EXAMPLE_HDRS example.proto
)
ms_protobuf_generate
(
FEATURE_SRCS FEATURE_HDRS feature.proto
)
add_library
(
core OBJECT
${
DATASET_CORE_SRC_FILES
}
${
EXAMPLE_SRCS
}
${
FEATURE_SRCS
}
)
add_dependencies
(
core mindspore::protobuf
)
target_include_directories
(
core PRIVATE
${
pybind11_INCLUDE_DIRS
}
)
if
(
ENABLE_PYTHON
)
target_include_directories
(
core PRIVATE
${
pybind11_INCLUDE_DIRS
}
)
endif
()
mindspore/ccsrc/dataset/core/client.h
浏览文件 @
bd5a777f
...
...
@@ -25,21 +25,25 @@
#include "dataset/core/tensor_shape.h"
#include "dataset/engine/data_schema.h"
#include "dataset/engine/dataset_iterator.h"
#include "dataset/engine/datasetops/source/mindrecord_op.h"
#include "dataset/engine/datasetops/source/tf_reader_op.h"
#ifdef ENABLE_PYTHON
#include "dataset/engine/datasetops/barrier_op.h"
#include "dataset/engine/datasetops/batch_op.h"
#include "dataset/engine/datasetops/filter_op.h"
#include "dataset/engine/datasetops/source/generator_op.h"
#include "dataset/engine/datasetops/build_vocab_op.h"
#endif
#include "dataset/engine/datasetops/batch_op.h"
#include "dataset/engine/datasetops/dataset_op.h"
#include "dataset/engine/datasetops/device_queue_op.h"
#include "dataset/engine/datasetops/map_op.h"
#include "dataset/engine/datasetops/project_op.h"
#include "dataset/engine/datasetops/rename_op.h"
#include "dataset/engine/datasetops/filter_op.h"
#include "dataset/engine/datasetops/repeat_op.h"
#include "dataset/engine/datasetops/skip_op.h"
#include "dataset/engine/datasetops/shuffle_op.h"
#include "dataset/engine/datasetops/source/generator_op.h"
#include "dataset/engine/datasetops/source/mindrecord_op.h"
#include "dataset/engine/datasetops/source/tf_reader_op.h"
#include "dataset/engine/datasetops/take_op.h"
#include "dataset/engine/datasetops/zip_op.h"
#include "dataset/engine/datasetops/concat_op.h"
...
...
mindspore/ccsrc/dataset/core/constants.h
浏览文件 @
bd5a777f
...
...
@@ -32,6 +32,12 @@ enum class DatasetType { kUnknown, kArrow, kTf };
// Possible flavours of Tensor implementations
enum
class
TensorImpl
{
kNone
,
kFlexible
,
kCv
,
kNP
};
// Possible values for Border types
enum
class
BorderType
{
kConstant
=
0
,
kEdge
=
1
,
kReflect
=
2
,
kSymmetric
=
3
};
// Possible interpolation modes
enum
class
InterpolationMode
{
kLinear
=
0
,
kNearestNeighbour
=
1
,
kCubic
=
2
,
kArea
=
3
};
// convenience functions for 32bit int bitmask
inline
bool
BitTest
(
uint32_t
bits
,
uint32_t
bitMask
)
{
return
(
bits
&
bitMask
)
==
bitMask
;
}
...
...
mindspore/ccsrc/dataset/core/data_type.cc
浏览文件 @
bd5a777f
...
...
@@ -14,11 +14,12 @@
* limitations under the License.
*/
#include "dataset/core/data_type.h"
#ifdef ENABLE_PYTHON
#include "dataset/core/pybind_support.h"
#endif
#include "utils/log_adapter.h"
#include "dataset/core/pybind_support.h"
namespace
mindspore
{
namespace
dataset
{
...
...
@@ -29,12 +30,14 @@ uint8_t DataType::SizeInBytes() const {
return
0
;
}
#ifdef ENABLE_PYTHON
py
::
dtype
DataType
::
AsNumpyType
()
const
{
if
(
type_
<
DataType
::
NUM_OF_TYPES
)
return
py
::
dtype
(
kTypeInfo
[
type_
].
pybindType_
);
else
return
py
::
dtype
(
"unknown"
);
}
#endif
uint8_t
DataType
::
AsCVType
()
const
{
uint8_t
res
=
kCVInvalidType
;
...
...
@@ -112,6 +115,7 @@ std::string DataType::ToString() const {
return
"unknown"
;
}
#ifdef ENABLE_PYTHON
DataType
DataType
::
FromNpArray
(
const
py
::
array
&
arr
)
{
if
(
py
::
isinstance
<
py
::
array_t
<
bool
>>
(
arr
))
{
return
DataType
(
DataType
::
DE_BOOL
);
...
...
@@ -156,6 +160,7 @@ std::string DataType::GetPybindFormat() const {
}
return
res
;
}
#endif
}
// namespace dataset
}
// namespace mindspore
mindspore/ccsrc/dataset/core/data_type.h
浏览文件 @
bd5a777f
...
...
@@ -19,14 +19,16 @@
#include <opencv2/core/hal/interface.h>
#include <string>
#ifdef ENABLE_PYTHON
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
#include "dataset/core/constants.h"
#include "dataset/core/pybind_support.h"
namespace
py
=
pybind11
;
#else
#include "Eigen/Core"
using
float16
=
Eigen
::
half
;
#endif
#include "dataset/core/constants.h"
namespace
mindspore
{
namespace
dataset
{
...
...
@@ -59,6 +61,7 @@ class DataType {
const
uint8_t
cvType_
;
// OpenCv matching type
};
#ifdef ENABLE_PYTHON
static
inline
const
TypeInfo
kTypeInfo
[]
=
{
// name, sizeInBytes, pybindTypem formatDescriptor, openCV
{
"unknown"
,
0
,
"object"
,
""
,
kCVInvalidType
},
// DE_UNKNOWN
...
...
@@ -76,19 +79,38 @@ class DataType {
{
"float64"
,
8
,
"double"
,
py
::
format_descriptor
<
double
>::
format
(),
CV_64F
},
// DE_FLOAT64
{
"string"
,
0
,
"bytes"
,
"S"
,
kCVInvalidType
}
// DE_STRING
};
#else
static
inline
const
TypeInfo
kTypeInfo
[]
=
{
// name, sizeInBytes, pybindTypem formatDescriptor, openCV
{
"unknown"
,
0
,
"object"
,
""
,
kCVInvalidType
},
// DE_UNKNOWN
{
"bool"
,
1
,
"bool"
,
""
,
CV_8U
},
// DE_BOOL
{
"int8"
,
1
,
"int8"
,
""
,
CV_8S
},
// DE_INT8
{
"uint8"
,
1
,
"uint8"
,
""
,
CV_8U
},
// DE_UINT8
{
"int16"
,
2
,
"int16"
,
""
,
CV_16S
},
// DE_INT16
{
"uint16"
,
2
,
"uint16"
,
""
,
CV_16U
},
// DE_UINT16
{
"int32"
,
4
,
"int32"
,
""
,
CV_32S
},
// DE_INT32
{
"uint32"
,
4
,
"uint32"
,
""
,
kCVInvalidType
},
// DE_UINT32
{
"int64"
,
8
,
"int64"
,
""
,
kCVInvalidType
},
// DE_INT64
{
"uint64"
,
8
,
"uint64"
,
""
,
kCVInvalidType
},
// DE_UINT64
{
"float16"
,
2
,
"float16"
,
""
,
CV_16F
},
// DE_FLOAT16
{
"float32"
,
4
,
"float32"
,
""
,
CV_32F
},
// DE_FLOAT32
{
"float64"
,
8
,
"double"
,
""
,
CV_64F
},
// DE_FLOAT64
{
"string"
,
0
,
"bytes"
,
""
,
kCVInvalidType
}
// DE_STRING
};
#endif
// No arg constructor to create an unknown shape
DataType
()
:
type_
(
DE_UNKNOWN
)
{}
// Create a type from a given string
//
@
param type_str
//
/ \
param type_str
explicit
DataType
(
const
std
::
string
&
type_str
);
// Default destructor
~
DataType
()
=
default
;
// Create a type from a given enum
//
@
param d
//
/ \
param d
constexpr
explicit
DataType
(
Type
d
)
:
type_
(
d
)
{}
constexpr
bool
operator
==
(
const
DataType
a
)
const
{
return
type_
==
a
.
type_
;
}
...
...
@@ -100,49 +122,49 @@ class DataType {
constexpr
bool
operator
!=
(
const
Type
a
)
const
{
return
type_
!=
a
;
}
// Disable this usage `if(d)` where d is of type DataType
//
@
return
//
/ \
return
operator
bool
()
=
delete
;
// To be used in Switch/case
//
@
return
//
/ \
return
operator
Type
()
const
{
return
type_
;
}
// The number of bytes needed to store one value of this type
//
@
return
//
/ \
return
uint8_t
SizeInBytes
()
const
;
// Convert from DataType to OpenCV type
//
@
return
//
/ \
return
uint8_t
AsCVType
()
const
;
// Convert from OpenCV type to DataType
//
@
param cv_type
//
@
return
//
/ \
param cv_type
//
/ \
return
static
DataType
FromCVType
(
int
cv_type
);
// Returns a string representation of the type
//
@
return
//
/ \
return
std
::
string
ToString
()
const
;
// returns true if the template type is the same as the Tensor type_
//
@
tparam T
//
@
return true or false
//
/ \
tparam T
//
/ \
return true or false
template
<
typename
T
>
bool
IsCompatible
()
const
{
return
type_
==
FromCType
<
T
>
();
}
// returns true if the template type is the same as the Tensor type_
//
@
tparam T
//
@
return true or false
//
/ \
tparam T
//
/ \
return true or false
template
<
typename
T
>
bool
IsLooselyCompatible
()
const
;
// << Stream output operator overload
//
@
notes This allows you to print the info using stream operators
//
@
param out - reference to the output stream being overloaded
//
@
param rO - reference to the DataType to display
//
@
return - the output stream must be returned
//
/ \
notes This allows you to print the info using stream operators
//
/ \
param out - reference to the output stream being overloaded
//
/ \
param rO - reference to the DataType to display
//
/ \
return - the output stream must be returned
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
DataType
&
so
)
{
out
<<
so
.
ToString
();
return
out
;
...
...
@@ -151,22 +173,24 @@ class DataType {
template
<
typename
T
>
static
DataType
FromCType
();
#ifdef ENABLE_PYTHON
// Convert from DataType to Pybind type
//
@
return
//
/ \
return
py
::
dtype
AsNumpyType
()
const
;
// Convert from NP type to DataType
//
@
param type
//
@
return
//
/ \
param type
//
/ \
return
static
DataType
FromNpType
(
const
py
::
dtype
&
type
);
// Convert from NP array to DataType
//
@
param py array
//
@
return
//
/ \
param py array
//
/ \
return
static
DataType
FromNpArray
(
const
py
::
array
&
arr
);
#endif
// Get the buffer string format of the current type. Used in pybind buffer protocol.
//
@
return
//
/ \
return
std
::
string
GetPybindFormat
()
const
;
bool
IsSignedInt
()
const
{
...
...
mindspore/ccsrc/dataset/core/tensor.cc
浏览文件 @
bd5a777f
...
...
@@ -28,10 +28,12 @@
#include "dataset/core/constants.h"
#include "dataset/core/cv_tensor.h"
#include "dataset/core/global_context.h"
#ifdef ENABLE_PYTHON
#include "dataset/core/pybind_support.h"
namespace
py
=
pybind11
;
#endif
#include "dataset/core/tensor_shape.h"
namespace
py
=
pybind11
;
namespace
mindspore
{
namespace
dataset
{
// Helper macros for printing tensor elements
...
...
@@ -155,6 +157,7 @@ Tensor::Tensor(const std::vector<std::string> &strings, const TensorShape &shape
MS_ASSERT
(
num_bytes
==
0
);
if
(
shape
.
known
())
Tensor
::
Reshape
(
shape
);
}
Tensor
::
Tensor
(
const
dataengine
::
BytesList
&
bytes_list
,
const
TensorShape
&
shape
)
:
Tensor
(
TensorShape
({
static_cast
<
dsize_t
>
(
bytes_list
.
value_size
())}),
DataType
(
DataType
::
DE_STRING
))
{
// total bytes needed = offset array + strings
...
...
@@ -194,6 +197,7 @@ Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape
MS_ASSERT
(
num_bytes
==
0
);
if
(
shape
.
known
())
Tensor
::
Reshape
(
shape
);
}
Status
Tensor
::
CreateTensor
(
std
::
shared_ptr
<
Tensor
>
*
ptr
,
TensorImpl
tensor_impl
,
const
TensorShape
&
shape
,
DataType
type
,
const
unsigned
char
*
data
)
{
if
(
!
shape
.
known
())
{
...
...
@@ -223,6 +227,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl
return
Status
::
OK
();
// returns base-class shared_ptr
}
#ifdef ENABLE_PYTHON
Status
Tensor
::
CreateTensorFromNumpyString
(
std
::
shared_ptr
<
Tensor
>
*
ptr
,
py
::
array
arr
)
{
std
::
vector
<
dsize_t
>
shape
;
for
(
dsize_t
i
=
0
;
i
<
arr
.
ndim
();
i
++
)
{
...
...
@@ -297,6 +302,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
return
Status
::
OK
();
// returns base-class shared_ptr
}
#endif
Status
Tensor
::
CreateTensor
(
std
::
shared_ptr
<
Tensor
>
*
ptr
,
const
std
::
vector
<
std
::
string
>
&
strings
,
const
TensorShape
&
shape
)
{
...
...
@@ -698,21 +704,24 @@ std::vector<dsize_t> Tensor::Strides() {
return
strides
;
}
Status
Tensor
::
GetBufferInfo
(
Tensor
&
t
,
py
::
buffer_info
*
out
)
{
CHECK_FAIL_RETURN_UNEXPECTED
(
t
.
type
().
IsNumeric
(),
"Cannot use GetBufferInfo on tensor of strings."
);
#ifdef ENABLE_PYTHON
Status
Tensor
::
GetBufferInfo
(
Tensor
*
t
,
py
::
buffer_info
*
out
)
{
RETURN_UNEXPECTED_IF_NULL
(
t
);
CHECK_FAIL_RETURN_UNEXPECTED
(
t
->
type
().
IsNumeric
(),
"Cannot use GetBufferInfo on tensor of strings."
);
std
::
string
format_desc
=
t
.
type
().
GetPybindFormat
();
std
::
string
format_desc
=
t
->
type
().
GetPybindFormat
();
if
(
format_desc
.
empty
())
{
RETURN_STATUS_UNEXPECTED
(
"Cannot convert DE type tp pybind format"
);
}
*
out
=
py
::
buffer_info
(
t
.
GetMutableBuffer
(),
/* Pointer to buffer */
t
.
type
().
SizeInBytes
(),
/* Size of one scalar */
format_desc
,
/* Python struct-style format descriptor */
t
.
Rank
(),
/* Number of dimensions */
t
.
shape
().
AsVector
(),
/* Buffer dimensions */
t
.
Strides
());
*
out
=
py
::
buffer_info
(
t
->
GetMutableBuffer
(),
/* Pointer to buffer */
t
->
type
().
SizeInBytes
(),
/* Size of one scalar */
format_desc
,
/* Python struct-style format descriptor */
t
->
Rank
(),
/* Number of dimensions */
t
->
shape
().
AsVector
(),
/* Buffer dimensions */
t
->
Strides
());
return
Status
::
OK
();
}
#endif
template
<
typename
T
>
Status
Tensor
::
GetItemAt
(
T
*
o
,
const
std
::
vector
<
dsize_t
>
&
index
)
const
{
...
...
@@ -752,6 +761,8 @@ Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index)
o
->
swap
(
sv
);
return
Status
::
OK
();
}
#ifdef ENABLE_PYTHON
// return data as numpy, should return status
Status
Tensor
::
GetDataAsNumpy
(
py
::
array
*
data
)
{
RETURN_UNEXPECTED_IF_NULL
(
data_
);
...
...
@@ -815,6 +826,7 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) {
data_allocator_
->
deallocate
(
reinterpret_cast
<
uchar
*>
(
tmp_data
));
return
Status
::
OK
();
}
#endif
void
Tensor
::
Squeeze
()
{
shape_
=
shape_
.
Squeeze
();
}
...
...
mindspore/ccsrc/dataset/core/tensor.h
浏览文件 @
bd5a777f
...
...
@@ -26,20 +26,27 @@
#undef HAVE_STDDEF_H
#undef HAVE_STDLIB_H
#endif
#ifdef ENABLE_PYTHON
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
#endif
#include "dataset/core/constants.h"
#include "dataset/core/data_type.h"
#include "dataset/core/tensor_shape.h"
#include "dataset/util/allocator.h"
#include "dataset/util/status.h"
#include "proto/example.pb.h"
#ifdef ENABLE_PYTHON
namespace
py
=
pybind11
;
#endif
namespace
mindspore
{
namespace
dataset
{
class
Tensor
;
template
<
typename
T
>
class
Allocator
;
using
CharAllocPtr
=
std
::
unique_ptr
<
Allocator
<
unsigned
char
>>
;
using
TensorAllocPtr
=
std
::
shared_ptr
<
Allocator
<
Tensor
>>
;
// An allocator shared_ptr for Tensors
...
...
@@ -114,16 +121,17 @@ class Tensor {
static
Status
CreateTensor
(
std
::
shared_ptr
<
Tensor
>
*
,
TensorImpl
tensor_impl
,
const
TensorShape
&
shape
,
DataType
type
,
const
unsigned
char
*
data
=
nullptr
);
//
/
Create a copy of the input tensor
//
/ \
param out [out] output tensor to be generated
//
/ \
param in [in] orginal tensor to be copied
//
/ \
return Status
// Create a copy of the input tensor
//
@
param out [out] output tensor to be generated
//
@
param in [in] orginal tensor to be copied
//
@
return Status
static
Status
CreateTensor
(
std
::
shared_ptr
<
Tensor
>
*
out
,
const
std
::
shared_ptr
<
Tensor
>
&
in
)
{
const
TensorAlloc
*
alloc
=
GlobalContext
::
Instance
()
->
tensor_allocator
();
*
out
=
std
::
allocate_shared
<
Tensor
>
(
*
alloc
,
in
->
shape
(),
in
->
type
(),
in
->
GetBuffer
(),
in
->
SizeInBytes
());
return
Status
::
OK
();
}
#ifdef ENABLE_PYTHON
// A static factory method to create a Tensor from a given py::array.
// @param ptr output argument to hold the created Tensor
// @param arr py::array
...
...
@@ -132,6 +140,7 @@ class Tensor {
// Helper function to create a tensor from Numpy of strings
static
Status
CreateTensorFromNumpyString
(
std
::
shared_ptr
<
Tensor
>
*
ptr
,
py
::
array
arr
);
#endif
// A static factory method to create a Tensor from a given list of strings.
// @param ptr output argument to hold the created Tensor
...
...
@@ -170,6 +179,7 @@ class Tensor {
static
Status
CreateTensor
(
std
::
shared_ptr
<
Tensor
>
*
ptr
,
const
T
&
item
)
{
return
CreateTensor
<
T
>
(
ptr
,
{
item
},
TensorShape
::
CreateScalar
());
}
// Create tensor from protobuf bytelist with uint8 or int8 types
static
Status
CreateTensor
(
std
::
shared_ptr
<
Tensor
>
*
ptr
,
const
dataengine
::
BytesList
&
bytes_list
,
const
TensorShape
&
shape
,
const
DataType
&
type
,
dsize_t
pad_size
);
...
...
@@ -346,12 +356,12 @@ class Tensor {
virtual
void
Squeeze
();
//
/
Calculates the strides of the Tensor
//
/
Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
//
/
The strides will be {6,2,1}.
//
/
Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
//
/
The strides will be {24,8,4}.
//
/
@return vector of integers
// Calculates the strides of the Tensor
// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
// The strides will be {6,2,1}.
// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
// The strides will be {24,8,4}.
// @return vector of integers
std
::
vector
<
dsize_t
>
Strides
();
std
::
string
ToString
()
{
...
...
@@ -376,6 +386,7 @@ class Tensor {
// Slice string tensors
Status
SliceString
(
std
::
shared_ptr
<
Tensor
>
*
out
,
const
std
::
vector
<
dsize_t
>
&
indices
);
#ifdef ENABLE_PYTHON
// Constructs numpy array from input tensor
// @param data this data is the location of python data
// @return Status code
...
...
@@ -383,7 +394,8 @@ class Tensor {
Status
GetDataAsNumpyStrings
(
py
::
array
*
data
);
static
Status
GetBufferInfo
(
Tensor
&
t
,
py
::
buffer_info
*
out
);
static
Status
GetBufferInfo
(
Tensor
*
t
,
py
::
buffer_info
*
out
);
#endif
// Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor
Status
Concatenate
(
const
std
::
vector
<
dsize_t
>
&
index
,
const
std
::
shared_ptr
<
Tensor
>
&
input
);
...
...
@@ -570,7 +582,7 @@ class Tensor {
// Return a TensorIterator that points to the start of the Tensor.
// It's the user responsibility to use the correct type that matches the Tensor type
// @
t
param T The type of values in the Tensor
// @param T The type of values in the Tensor
// @return TensorIterator
template
<
typename
T
>
TensorIterator
<
T
>
begin
()
{
...
...
mindspore/ccsrc/dataset/core/tensor_row.cc
浏览文件 @
bd5a777f
...
...
@@ -18,7 +18,6 @@
#include "dataset/core/tensor_row.h"
namespace
py
=
pybind11
;
namespace
mindspore
{
namespace
dataset
{
...
...
mindspore/ccsrc/dataset/core/tensor_shape.cc
浏览文件 @
bd5a777f
...
...
@@ -77,6 +77,7 @@ TensorShape::TensorShape(const TensorShape &shape)
known_
=
shape
.
known_
;
// override with the input shape in case of unknown-rank tensor shape.
}
#ifdef ENABLE_PYTHON
TensorShape
::
TensorShape
(
py
::
list
l
)
:
raw_shape_
(
*
GlobalContext
::
Instance
()
->
int_allocator
()),
strides_
(
*
GlobalContext
::
Instance
()
->
int_allocator
())
{
std
::
vector
<
dsize_t
>
list_c
;
...
...
@@ -89,6 +90,7 @@ TensorShape::TensorShape(py::list l)
}
AddListToShape
(
list_c
);
}
#endif
TensorShape
::
TensorShape
(
cv
::
MatSize
cv_size
,
uint32_t
type
)
:
raw_shape_
(
*
GlobalContext
::
Instance
()
->
int_allocator
()),
strides_
(
*
GlobalContext
::
Instance
()
->
int_allocator
())
{
...
...
@@ -197,6 +199,7 @@ TensorShape TensorShape::AppendDim(dsize_t dim) const {
return
TensorShape
(
vec
);
}
#ifdef ENABLE_PYTHON
py
::
list
TensorShape
::
AsPyList
()
{
py
::
list
list
;
for
(
auto
i
:
raw_shape_
)
{
...
...
@@ -204,6 +207,7 @@ py::list TensorShape::AsPyList() {
}
return
list
;
}
#endif
TensorShape
TensorShape
::
Squeeze
()
const
{
std
::
vector
<
dsize_t
>
new_shape
;
...
...
mindspore/ccsrc/dataset/core/tensor_shape.h
浏览文件 @
bd5a777f
...
...
@@ -24,13 +24,16 @@
#include <opencv2/core/mat.hpp>
#ifdef ENABLE_PYTHON
#include "pybind11/pybind11.h"
namespace
py
=
pybind11
;
#endif
#include "dataset/core/constants.h"
#include "dataset/util/status.h"
#include "dataset/core/global_context.h"
#include "dataset/util/allocator.h"
namespace
py
=
pybind11
;
namespace
mindspore
{
namespace
dataset
{
// Class that represents a shape of a Tensor. A shape can be:
...
...
@@ -43,7 +46,8 @@ namespace dataset {
// -# one or more dim is unknown --> not empty vector --> <d1, d2, d2, d3, ...> where di is unknown\n
// Example: <3,?> (the 1st dim is unknown)\n
// <2,?,?,?> (all dims but the 0th dim are unknown)
// TensorShape supports any dim > 0 and < 2^31-1
/// \brief TensorShape supports any dim > 0 and < 2^31-1
class
TensorShape
{
public:
static
constexpr
dsize_t
kDimUnknown
=
-
1
;
// constant for an unknown dimension
...
...
@@ -51,57 +55,59 @@ class TensorShape {
// Force the compiler to not create a no-arg constructor
TensorShape
()
=
delete
;
// Create a Shape from an initialization list (e.g., TensorShape s = {2,2}).
// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
//
@param
list
//
/ \brief
Create a Shape from an initialization list (e.g., TensorShape s = {2,2}).
//
/
If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
//
/ \param[in]
list
explicit
TensorShape
(
const
std
::
initializer_list
<
dsize_t
>
&
list
);
// Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ).
// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
//
@param
list
//
/ \brief
Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ).
//
/
If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
//
/ \param[in]
list
explicit
TensorShape
(
const
std
::
vector
<
dsize_t
>
&
list
);
// Copy constructor
//
@param
shape
//
/ \brief
Copy constructor
//
/ \param[in]
shape
TensorShape
(
const
TensorShape
&
shape
);
// construct a TensorShape via a python list
// @param py::list l - a list object from python
#ifdef ENABLE_PYTHON
/// \brief construct a TensorShape via a python list
/// \param[in] py::list l - a list object from python
explicit
TensorShape
(
py
::
list
l
);
#endif
~
TensorShape
()
=
default
;
// Create a scalar Shape (i.e., empty shape with mKnown = true)
//
@
return TensorShape
//
/ \brief
Create a scalar Shape (i.e., empty shape with mKnown = true)
//
/ \
return TensorShape
static
TensorShape
CreateScalar
()
{
return
TensorShape
({});
}
// Create a shape with an unknown rank.
//
@
return TensorShape
//
/ \brief
Create a shape with an unknown rank.
//
/ \
return TensorShape
static
TensorShape
CreateUnknownRankShape
();
// Create a shape with a known rank .
//
@
return TensorShape
//
/ \brief
Create a shape with a known rank .
//
/ \
return TensorShape
static
TensorShape
CreateUnknownShapeWithRank
(
dsize_t
rank
);
// Insert a new dim into a copy of the current shape.
//
@param
dim to be added
//
@param
axis the index where dim should be added
//
@
return New modified shape
//
/ \brief
Insert a new dim into a copy of the current shape.
//
/ \param[in]
dim to be added
//
/ \param[in]
axis the index where dim should be added
//
/ \
return New modified shape
TensorShape
InsertDim
(
dsize_t
axis
,
dsize_t
dim
)
const
;
// Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4>
//
@param
dim
//
@
return
//
/ \brief
Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4>
//
/ \param[in]
dim
//
/ \
return
TensorShape
PrependDim
(
dsize_t
dim
)
const
;
// Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4>
//
@param
dim
//
@
return
//
/ \brief
Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4>
//
/ \param[in]
dim
//
/ \
return
TensorShape
AppendDim
(
dsize_t
dim
)
const
;
// Create a shape based on OpenCV shape and type
//
@param
cv_size
//
@param
type int that represent the type in OpenCV, example CV_8U, CV_64S
//
/ \brief
Create a shape based on OpenCV shape and type
//
/ \param[in]
cv_size
//
/ \param[in]
type int that represent the type in OpenCV, example CV_8U, CV_64S
TensorShape
(
cv
::
MatSize
cv_size
,
uint32_t
type
);
dsize_t
Size
()
const
{
return
raw_shape_
.
size
();
}
...
...
@@ -123,47 +129,50 @@ class TensorShape {
return
raw_shape_
[
index
];
}
// Return the Shape as a vector
//
@
return
//
/ \brief
Return the Shape as a vector
//
/ \
return
std
::
vector
<
dsize_t
>
AsVector
()
const
;
// Returns the class info as a string
//
@
return
//
/ \brief
Returns the class info as a string
//
/ \
return
std
::
string
ToString
()
const
{
std
::
stringstream
ss
;
ss
<<
*
this
;
return
ss
.
str
();
}
// Actual print function used by operator<<
//
@
param out output string stream
//
/ \brief
Actual print function used by operator<<
//
/ \
param out output string stream
void
Print
(
std
::
ostream
&
out
)
const
;
// << Stream output operator overload
//
@notes
This allows you to print the info using stream operators
//
@param
out - reference to the output stream being overloaded
//
@param
rO - reference to the TensorShape to display
//
@
return - the output stream must be returned
//
/ \brief
<< Stream output operator overload
//
/
This allows you to print the info using stream operators
//
/ \param[in]
out - reference to the output stream being overloaded
//
/ \param[in]
rO - reference to the TensorShape to display
//
/ \
return - the output stream must be returned
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
TensorShape
&
so
)
{
so
.
Print
(
out
);
return
out
;
}
#ifdef ENABLE_PYTHON
py
::
list
AsPyList
();
#endif
// Checks if the given index is a valid index for this tensor.
// For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not.
//
@param
index
//
@
return bool
//
/ \brief
Checks if the given index is a valid index for this tensor.
//
/
For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not.
//
/ \param[in]
index
//
/ \
return bool
bool
IsValidIndex
(
const
std
::
vector
<
dsize_t
>
&
index
)
const
;
TensorShape
Squeeze
()
const
;
std
::
vector
<
dsize_t
>
Strides
()
const
;
// Returns the location of the item assuming row major memory layout.
// @param index
// @return
/// \brief Returns the location of the item assuming row major memory layout.
/// \param[in] index
/// \param[out] flat_index
/// \return
Status
ToFlatIndex
(
const
std
::
vector
<
dsize_t
>
&
index
,
dsize_t
*
flat_index
)
const
;
private:
...
...
@@ -174,11 +183,11 @@ class TensorShape {
// Vector to keep the strides of the shape. The size is rank+1
std
::
vector
<
dsize_t
,
IntAlloc
>
strides_
;
//
Internal utility function to iterate over a list, check if the dim is valid and then insert it into the shape.
//
@tparam T list
//
@param
list Iterable list
//
@
return true if the shape is valid and no overflow would be generated when counting the number of elements.
//
False otherwise.
//
/ \brief Internal utility function to iterate over a list,
//
/ check if the dim is valid and then insert it into the shape.
//
/ \param[in]
list Iterable list
//
/ \
return true if the shape is valid and no overflow would be generated when counting the number of elements.
//
/
False otherwise.
template
<
typename
T
>
void
AddListToShape
(
const
T
&
list
);
};
...
...
mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt
浏览文件 @
bd5a777f
...
...
@@ -2,13 +2,12 @@ add_subdirectory(source)
file
(
GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE
${
CMAKE_CURRENT_SOURCE_DIR
}
"*.cc"
)
set_property
(
SOURCE
${
_CURRENT_SRC_FILES
}
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD
)
add_library
(
engine-datasetops OBJECT
set
(
DATASET_ENGINE_DATASETOPS_SRC_FILES
dataset_op.cc
parallel_op.cc
pipeline_op.cc
barrier_op.cc
batch_op.cc
bucket_batch_by_length_op.cc
device_queue_op.cc
map_op.cc
project_op.cc
...
...
@@ -18,8 +17,18 @@ add_library(engine-datasetops OBJECT
take_op.cc
shuffle_op.cc
zip_op.cc
concat_op.cc
filter_op.cc
build_vocab_op.cc
concat_op.cc
)
if
(
ENABLE_PYTHON
)
set
(
DATASET_ENGINE_DATASETOPS_SRC_FILES
${
DATASET_ENGINE_DATASETOPS_SRC_FILES
}
bucket_batch_by_length_op.cc
barrier_op.cc
filter_op.cc
build_vocab_op.cc
)
endif
()
add_library
(
engine-datasetops OBJECT
${
DATASET_ENGINE_DATASETOPS_SRC_FILES
}
)
mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
浏览文件 @
bd5a777f
...
...
@@ -19,7 +19,9 @@
#include <iomanip>
#include "common/utils.h"
#ifdef ENABLE_PYTHON
#include "dataset/core/pybind_support.h"
#endif
#include "dataset/engine/data_buffer.h"
#include "dataset/engine/db_connector.h"
#include "dataset/engine/opt/pass.h"
...
...
@@ -38,9 +40,14 @@ BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false), builder_pa
Status
BatchOp
::
Builder
::
Build
(
std
::
shared_ptr
<
BatchOp
>
*
ptr
)
{
RETURN_IF_NOT_OK
(
SanityCheck
());
#ifdef ENABLE_PYTHON
*
ptr
=
std
::
make_shared
<
BatchOp
>
(
builder_batch_size_
,
builder_drop_
,
builder_pad_
,
builder_op_connector_size_
,
builder_num_workers_
,
builder_cols_to_map_
,
builder_batch_size_func_
,
builder_batch_map_func_
,
builder_pad_map_
);
#else
*
ptr
=
std
::
make_shared
<
BatchOp
>
(
builder_batch_size_
,
builder_drop_
,
builder_pad_
,
builder_op_connector_size_
,
builder_num_workers_
,
builder_cols_to_map_
,
builder_pad_map_
);
#endif
return
Status
::
OK
();
}
...
...
@@ -52,6 +59,7 @@ Status BatchOp::Builder::SanityCheck() {
return
err
.
empty
()
?
Status
::
OK
()
:
Status
(
StatusCode
::
kUnexpectedError
,
__LINE__
,
__FILE__
,
common
::
SafeCStr
(
err
));
}
#ifdef ENABLE_PYTHON
BatchOp
::
BatchOp
(
int32_t
batch_size
,
bool
drop
,
bool
pad
,
int32_t
op_queue_size
,
int32_t
num_workers
,
const
std
::
vector
<
std
::
string
>
&
cols_to_map
,
py
::
function
batch_size_func
,
py
::
function
batch_map_func
,
PadInfo
pad_map
)
...
...
@@ -65,6 +73,18 @@ BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size,
pad_info_
(
pad_map
)
{
worker_queues_
.
Init
(
num_workers
,
op_queue_size
);
}
#else
BatchOp
::
BatchOp
(
int32_t
batch_size
,
bool
drop
,
bool
pad
,
int32_t
op_queue_size
,
int32_t
num_workers
,
const
std
::
vector
<
std
::
string
>
&
cols_to_map
,
PadInfo
pad_map
)
:
ParallelOp
(
num_workers
,
op_queue_size
),
start_batch_size_
(
batch_size
),
drop_
(
drop
),
pad_
(
pad
),
pyfunc_column_names_
(
cols_to_map
),
pad_info_
(
pad_map
)
{
worker_queues_
.
Init
(
num_workers
,
op_queue_size
);
}
#endif
Status
BatchOp
::
operator
()()
{
Status
rc
=
LaunchThreadsAndInitOp
();
...
...
@@ -206,7 +226,9 @@ Status BatchOp::WorkerEntry(int32_t workerId) {
Status
BatchOp
::
MakeBatchedBuffer
(
std
::
pair
<
std
::
unique_ptr
<
TensorQTable
>
,
CBatchInfo
>
table_pair
,
std
::
unique_ptr
<
DataBuffer
>
*
db
)
{
RETURN_UNEXPECTED_IF_NULL
(
table_pair
.
first
);
if
(
!
pyfunc_column_names_
.
empty
())
RETURN_IF_NOT_OK
(
MapColumns
(
&
table_pair
));
// pass it through pyfunc
#ifdef ENABLE_PYTHON
if
(
!
pyfunc_column_names_
.
empty
())
RETURN_IF_NOT_OK
(
MapColumns
(
&
table_pair
));
// pass it through pyfunc
#endif
if
(
pad_
)
RETURN_IF_NOT_OK
(
PadColumns
(
&
table_pair
.
first
,
pad_info_
,
column_name_id_map_
));
// do padding if needed
(
*
db
)
=
std
::
make_unique
<
DataBuffer
>
(
table_pair
.
second
.
batch_num_
,
DataBuffer
::
kDeBFlagNone
);
std
::
unique_ptr
<
TensorQTable
>
dest_table
=
std
::
make_unique
<
TensorQTable
>
();
...
...
@@ -229,6 +251,7 @@ Status BatchOp::EoeReceived(int32_t) {
return
Status
::
OK
();
}
#ifdef ENABLE_PYTHON
Status
BatchOp
::
MapColumns
(
std
::
pair
<
std
::
unique_ptr
<
TensorQTable
>
,
CBatchInfo
>
*
table_pair
)
{
TensorBatchTable
input_table
;
input_table
.
reserve
(
pyfunc_column_names_
.
size
());
...
...
@@ -259,16 +282,22 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>
}
return
Status
::
OK
();
}
#endif
Status
BatchOp
::
GetBatchSize
(
int32_t
*
batch_size
,
CBatchInfo
info
)
{
#ifdef ENABLE_PYTHON
if
(
batch_size_func_
!=
nullptr
)
{
RETURN_IF_NOT_OK
(
InvokeBatchSizeFunc
(
batch_size
,
info
));
}
else
{
(
*
batch_size
)
=
start_batch_size_
;
}
#else
(
*
batch_size
)
=
start_batch_size_
;
#endif
return
Status
::
OK
();
}
#ifdef ENABLE_PYTHON
Status
BatchOp
::
InvokeBatchSizeFunc
(
int32_t
*
batch_size
,
CBatchInfo
info
)
{
{
// Acquire Python GIL
...
...
@@ -336,6 +365,7 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou
}
return
Status
(
StatusCode
::
kOK
);
}
#endif
Status
BatchOp
::
PadColumns
(
std
::
unique_ptr
<
TensorQTable
>
*
table
,
const
PadInfo
&
pad_info
,
const
std
::
unordered_map
<
std
::
string
,
int32_t
>
&
column_name_id_map
)
{
...
...
mindspore/ccsrc/dataset/engine/datasetops/batch_op.h
浏览文件 @
bd5a777f
...
...
@@ -89,6 +89,7 @@ class BatchOp : public ParallelOp {
return
*
this
;
}
#ifdef ENABLE_PYTHON
// set columns to perform map on
// @param const std::vector<std::string> & cols_to_map - name of columns to perform map on
// @return Builder & reference to builder class object
...
...
@@ -104,6 +105,7 @@ class BatchOp : public ParallelOp {
builder_batch_size_func_
=
batch_size_func
;
return
*
this
;
}
#endif
// @param std::shared_ptr<BatchOp> *ptr pointer to shared_ptr, actual return arg
// @return Status - The error code return
...
...
@@ -121,8 +123,10 @@ class BatchOp : public ParallelOp {
int32_t
builder_op_connector_size_
;
std
::
vector
<
std
::
string
>
builder_cols_to_map_
;
PadInfo
builder_pad_map_
;
#ifdef ENABLE_PYTHON
py
::
function
builder_batch_size_func_
;
py
::
function
builder_batch_map_func_
;
#endif
};
enum
batchCtrl
:
int8_t
{
kNoCtrl
=
0
,
kEOE
=
1
,
kEOF
=
2
,
kQuit
=
3
};
...
...
@@ -144,6 +148,7 @@ class BatchOp : public ParallelOp {
const
int64_t
get_epoch_num
()
const
{
return
epoch_num_
;
}
};
#ifdef ENABLE_PYTHON
// BatchOp constructor
// @param int32_t batch_size
// @param bool drop
...
...
@@ -152,6 +157,10 @@ class BatchOp : public ParallelOp {
// @param int32_t num_workers
BatchOp
(
int32_t
batch_size
,
bool
drop
,
bool
pad
,
int32_t
op_queue_size
,
int32_t
num_workers
,
const
std
::
vector
<
std
::
string
>
&
,
py
::
function
batch_size_func
,
py
::
function
batch_map_func
,
PadInfo
pad_map
);
#else
BatchOp
(
int32_t
batch_size
,
bool
drop
,
bool
pad
,
int32_t
op_queue_size
,
int32_t
num_workers
,
const
std
::
vector
<
std
::
string
>
&
,
PadInfo
pad_map
);
#endif
// BatchOp destructor
~
BatchOp
()
{}
...
...
@@ -219,10 +228,13 @@ class BatchOp : public ParallelOp {
// @return Status - The error code return
Status
MakeBatchedBuffer
(
std
::
pair
<
std
::
unique_ptr
<
TensorQTable
>
,
CBatchInfo
>
table_pair
,
std
::
unique_ptr
<
DataBuffer
>
*
db
);
#ifdef ENABLE_PYTHON
// Function that calls pyfunc to perform map on batch
// @param (std::pair<std::unique_ptr<TensorQTable>, batch_stats> *table_pair - contains un-batched tensor
// @return Status - The error code return
Status
MapColumns
(
std
::
pair
<
std
::
unique_ptr
<
TensorQTable
>
,
CBatchInfo
>
*
table_pair
);
#endif
// @param const PadInfo &pad_info pad info to unpack
// @param const std::unordered_map<std::string, int32_t>& column_name_id_map - column names to index mapping
...
...
@@ -247,6 +259,7 @@ class BatchOp : public ParallelOp {
// @return Status - The error code return
Status
LaunchThreadsAndInitOp
();
#ifdef ENABLE_PYTHON
// Invoke batch size function with current BatchInfo to generate batch size.
// @return Status - The error code return
Status
InvokeBatchSizeFunc
(
int32_t
*
batch_size
,
CBatchInfo
info
);
...
...
@@ -254,6 +267,7 @@ class BatchOp : public ParallelOp {
// Invoke batch map function with current BatchInfo to generate tensors to batch.
// @return Status - The error code return
Status
InvokeBatchMapFunc
(
TensorTable
*
input
,
TensorTable
*
output
,
CBatchInfo
info
);
#endif
int32_t
start_batch_size_
;
bool
drop_
;
// bool for whether to drop remainder or not
...
...
@@ -262,8 +276,10 @@ class BatchOp : public ParallelOp {
PadInfo
pad_info_
;
// column names to perform padding on
std
::
unique_ptr
<
ChildIterator
>
child_iterator_
;
// child iterator for fetching TensorRows 1 by 1
QueueList
<
std
::
pair
<
std
::
unique_ptr
<
TensorQTable
>
,
CBatchInfo
>>
worker_queues_
;
// internal queue for syncing worker
#ifdef ENABLE_PYTHON
py
::
function
batch_size_func_
;
// Function pointer of batch size function
py
::
function
batch_map_func_
;
// Function pointer of per batch map function
#endif
};
}
// namespace dataset
}
// namespace mindspore
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
浏览文件 @
bd5a777f
add_subdirectory
(
sampler
)
file
(
GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE
${
CMAKE_CURRENT_SOURCE_DIR
}
"*.cc"
)
set_property
(
SOURCE
${
_CURRENT_SRC_FILES
}
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD
)
add_library
(
engine-datasetops-source OBJECT
generator_op.cc
set
(
DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
io_block.cc
mindrecord_op.cc
tf_reader_op.cc
image_folder_op.cc
mnist_op.cc
voc_op.cc
coco_op.cc
manifest_op.cc
cifar_op.cc
random_data_op.cc
celeba_op.cc
text_file_op.cc
clue_op.cc
)
\ No newline at end of file
)
set
(
DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
${
DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
}
mindrecord_op.cc
tf_reader_op.cc
)
if
(
ENABLE_PYTHON
)
set
(
DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
${
DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
}
generator_op.cc
voc_op.cc
manifest_op.cc
)
endif
()
add_library
(
engine-datasetops-source OBJECT
${
DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
}
)
\ No newline at end of file
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt
浏览文件 @
bd5a777f
file
(
GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE
${
CMAKE_CURRENT_SOURCE_DIR
}
"*.cc"
)
set_property
(
SOURCE
${
_CURRENT_SRC_FILES
}
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD
)
add_library
(
engine-datasetops-source-sampler OBJECT
set
(
DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES
distributed_sampler.cc
pk_sampler.cc
python_sampler.cc
random_sampler.cc
sampler.cc
sequential_sampler.cc
subset_random_sampler.cc
weighted_random_sampler.cc
)
if
(
ENABLE_PYTHON
)
set
(
DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES
${
DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES
}
python_sampler.cc
)
endif
()
add_library
(
engine-datasetops-source-sampler OBJECT
${
DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES
}
)
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
浏览文件 @
bd5a777f
...
...
@@ -89,6 +89,7 @@ void Sampler::Print(std::ostream &out, bool show_all) const {
}
}
#ifdef ENABLE_PYTHON
Status
Sampler
::
GetAllIdsThenReset
(
py
::
array
*
data
)
{
std
::
unique_ptr
<
DataBuffer
>
db
;
std
::
shared_ptr
<
Tensor
>
sample_ids
;
...
...
@@ -120,6 +121,7 @@ Status Sampler::GetAllIdsThenReset(py::array *data) {
RETURN_IF_NOT_OK
(
ResetSampler
());
return
Status
::
OK
();
}
#endif
Status
Sampler
::
SetNumSamples
(
int64_t
num_samples
)
{
CHECK_FAIL_RETURN_UNEXPECTED
(
num_samples
>=
0
,
"num_samples is negative"
);
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h
浏览文件 @
bd5a777f
...
...
@@ -74,8 +74,11 @@ class Sampler {
// @return - The error code return
virtual
Status
GetNextSample
(
std
::
unique_ptr
<
DataBuffer
>
*
out_buffer
)
=
0
;
// This function only called by python layer. Not needed by Android.
#ifdef ENABLE_PYTHON
// return all ids in one epoch as a numpy array, then call reset
Status
GetAllIdsThenReset
(
py
::
array
*
data
);
#endif
// for next epoch of sampleIds
// @return - The error code return
...
...
@@ -155,5 +158,4 @@ class Sampler {
};
}
// namespace dataset
}
// namespace mindspore
#endif // DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SAMPLER_H_
mindspore/ccsrc/dataset/engine/gnn/graph.cc
浏览文件 @
bd5a777f
...
...
@@ -387,6 +387,7 @@ Status Graph::GetMetaInfo(MetaInfo *meta_info) {
return
Status
::
OK
();
}
#ifdef ENABLE_PYTHON
Status
Graph
::
GraphInfo
(
py
::
dict
*
out
)
{
MetaInfo
meta_info
;
RETURN_IF_NOT_OK
(
GetMetaInfo
(
&
meta_info
));
...
...
@@ -398,6 +399,7 @@ Status Graph::GraphInfo(py::dict *out) {
(
*
out
)[
"edge_feature_type"
]
=
py
::
cast
(
meta_info
.
edge_feature_type
);
return
Status
::
OK
();
}
#endif
Status
Graph
::
LoadNodeAndEdge
()
{
GraphLoader
gl
(
dataset_file_
,
num_workers_
);
...
...
mindspore/ccsrc/dataset/engine/gnn/graph.h
浏览文件 @
bd5a777f
...
...
@@ -140,8 +140,10 @@ class Graph {
// @return Status - The error code return
Status
GetMetaInfo
(
MetaInfo
*
meta_info
);
#ifdef ENABLE_PYTHON
// Return meta information to python layer
Status
GraphInfo
(
py
::
dict
*
out
);
#endif
Status
Init
();
...
...
mindspore/ccsrc/dataset/engine/opt/pass.cc
浏览文件 @
bd5a777f
...
...
@@ -21,13 +21,15 @@
#include "dataset/engine/datasetops/map_op.h"
#include "dataset/engine/datasetops/project_op.h"
#include "dataset/engine/datasetops/rename_op.h"
#include "dataset/engine/datasetops/filter_op.h"
#include "dataset/engine/datasetops/repeat_op.h"
#include "dataset/engine/datasetops/skip_op.h"
#include "dataset/engine/datasetops/shuffle_op.h"
#include "dataset/engine/datasetops/source/generator_op.h"
#include "dataset/engine/datasetops/source/mindrecord_op.h"
#include "dataset/engine/datasetops/source/tf_reader_op.h"
#ifdef ENABLE_PYTHON
#include "dataset/engine/datasetops/filter_op.h"
#include "dataset/engine/datasetops/source/generator_op.h"
#endif
#include "dataset/engine/datasetops/source/image_folder_op.h"
#include "dataset/engine/datasetops/take_op.h"
#include "dataset/engine/datasetops/zip_op.h"
...
...
@@ -111,35 +113,37 @@ Status NodePass::RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) {
return
RunOnNode
(
std
::
static_pointer_cast
<
DatasetOp
>
(
node
),
modified
);
}
Status
NodePass
::
RunOnNode
(
std
::
shared_ptr
<
Filter
Op
>
node
,
bool
*
modified
)
{
Status
NodePass
::
RunOnNode
(
std
::
shared_ptr
<
Skip
Op
>
node
,
bool
*
modified
)
{
// Fallback to base class visitor by default
return
RunOnNode
(
std
::
static_pointer_cast
<
DatasetOp
>
(
node
),
modified
);
}
Status
NodePass
::
RunOnNode
(
std
::
shared_ptr
<
S
kip
Op
>
node
,
bool
*
modified
)
{
Status
NodePass
::
RunOnNode
(
std
::
shared_ptr
<
S
huffle
Op
>
node
,
bool
*
modified
)
{
// Fallback to base class visitor by default
return
RunOnNode
(
std
::
static_pointer_cast
<
DatasetOp
>
(
node
),
modified
);
}
Status
NodePass
::
RunOnNode
(
std
::
shared_ptr
<
Shuffle
Op
>
node
,
bool
*
modified
)
{
Status
NodePass
::
RunOnNode
(
std
::
shared_ptr
<
MindRecord
Op
>
node
,
bool
*
modified
)
{
// Fallback to base class visitor by default
return
RunOnNode
(
std
::
static_pointer_cast
<
DatasetOp
>
(
node
),
modified
);
}
Status
NodePass
::
RunOnNode
(
std
::
shared_ptr
<
Generato
rOp
>
node
,
bool
*
modified
)
{
Status
NodePass
::
RunOnNode
(
std
::
shared_ptr
<
TFReade
rOp
>
node
,
bool
*
modified
)
{
// Fallback to base class visitor by default
return
RunOnNode
(
std
::
static_pointer_cast
<
DatasetOp
>
(
node
),
modified
);
}
Status
NodePass
::
RunOnNode
(
std
::
shared_ptr
<
MindRecordOp
>
node
,
bool
*
modified
)
{
#ifdef ENABLE_PYTHON
Status
NodePass
::
RunOnNode
(
std
::
shared_ptr
<
FilterOp
>
node
,
bool
*
modified
)
{
// Fallback to base class visitor by default
return
RunOnNode
(
std
::
static_pointer_cast
<
DatasetOp
>
(
node
),
modified
);
}
Status
NodePass
::
RunOnNode
(
std
::
shared_ptr
<
TFReade
rOp
>
node
,
bool
*
modified
)
{
Status
NodePass
::
RunOnNode
(
std
::
shared_ptr
<
Generato
rOp
>
node
,
bool
*
modified
)
{
// Fallback to base class visitor by default
return
RunOnNode
(
std
::
static_pointer_cast
<
DatasetOp
>
(
node
),
modified
);
}
#endif
Status
NodePass
::
RunOnNode
(
std
::
shared_ptr
<
TakeOp
>
node
,
bool
*
modified
)
{
// Fallback to base class visitor by default
...
...
mindspore/ccsrc/dataset/engine/opt/pass.h
浏览文件 @
bd5a777f
...
...
@@ -33,18 +33,20 @@ class ProjectOp;
class
RenameOp
;
class
FilterOp
;
class
SkipOp
;
class
ShuffleOp
;
class
GeneratorOp
;
class
MindRecordOp
;
class
TFReaderOp
;
#ifdef ENABLE_PYTHON
class
FilterOp
;
class
GeneratorOp
;
#endif
class
TakeOp
;
class
ZipOp
;
...
...
@@ -122,18 +124,20 @@ class NodePass : public Pass {
virtual
Status
RunOnNode
(
std
::
shared_ptr
<
RenameOp
>
node
,
bool
*
modified
);
virtual
Status
RunOnNode
(
std
::
shared_ptr
<
FilterOp
>
node
,
bool
*
modified
);
virtual
Status
RunOnNode
(
std
::
shared_ptr
<
SkipOp
>
node
,
bool
*
modified
);
virtual
Status
RunOnNode
(
std
::
shared_ptr
<
ShuffleOp
>
node
,
bool
*
modified
);
virtual
Status
RunOnNode
(
std
::
shared_ptr
<
GeneratorOp
>
node
,
bool
*
modified
);
virtual
Status
RunOnNode
(
std
::
shared_ptr
<
MindRecordOp
>
node
,
bool
*
modified
);
virtual
Status
RunOnNode
(
std
::
shared_ptr
<
TFReaderOp
>
node
,
bool
*
modified
);
#ifdef ENABLE_PYTHON
virtual
Status
RunOnNode
(
std
::
shared_ptr
<
FilterOp
>
node
,
bool
*
modified
);
virtual
Status
RunOnNode
(
std
::
shared_ptr
<
GeneratorOp
>
node
,
bool
*
modified
);
#endif
virtual
Status
RunOnNode
(
std
::
shared_ptr
<
TakeOp
>
node
,
bool
*
modified
);
virtual
Status
RunOnNode
(
std
::
shared_ptr
<
ZipOp
>
node
,
bool
*
modified
);
...
...
mindspore/ccsrc/dataset/engine/opt/util/printer_pass.cc
浏览文件 @
bd5a777f
...
...
@@ -50,12 +50,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) {
return
Status
::
OK
();
}
Status
PrinterPass
::
RunOnNode
(
std
::
shared_ptr
<
FilterOp
>
node
,
bool
*
modified
)
{
*
modified
=
false
;
std
::
cout
<<
"Visiting FilterOp"
<<
'\n'
;
return
Status
::
OK
();
}
Status
PrinterPass
::
RunOnNode
(
std
::
shared_ptr
<
SkipOp
>
node
,
bool
*
modified
)
{
*
modified
=
false
;
std
::
cout
<<
"Visiting SkipOp"
<<
'\n'
;
...
...
@@ -67,11 +61,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) {
return
Status
::
OK
();
}
Status
PrinterPass
::
RunOnNode
(
std
::
shared_ptr
<
GeneratorOp
>
node
,
bool
*
modified
)
{
*
modified
=
false
;
std
::
cout
<<
"Visiting GeneratorOp"
<<
'\n'
;
return
Status
::
OK
();
}
Status
PrinterPass
::
RunOnNode
(
std
::
shared_ptr
<
MindRecordOp
>
node
,
bool
*
modified
)
{
*
modified
=
false
;
std
::
cout
<<
"Visiting MindRecordOp"
<<
'\n'
;
...
...
@@ -84,6 +73,20 @@ Status PrinterPass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified)
return
Status
::
OK
();
}
#ifdef ENABLE_PYTHON
Status
PrinterPass
::
RunOnNode
(
std
::
shared_ptr
<
FilterOp
>
node
,
bool
*
modified
)
{
*
modified
=
false
;
std
::
cout
<<
"Visiting FilterOp"
<<
'\n'
;
return
Status
::
OK
();
}
Status
PrinterPass
::
RunOnNode
(
std
::
shared_ptr
<
GeneratorOp
>
node
,
bool
*
modified
)
{
*
modified
=
false
;
std
::
cout
<<
"Visiting GeneratorOp"
<<
'\n'
;
return
Status
::
OK
();
}
#endif
Status
PrinterPass
::
RunOnNode
(
std
::
shared_ptr
<
TakeOp
>
node
,
bool
*
modified
)
{
*
modified
=
false
;
std
::
cout
<<
"Visiting TakeOp"
<<
'\n'
;
...
...
mindspore/ccsrc/dataset/engine/opt/util/printer_pass.h
浏览文件 @
bd5a777f
...
...
@@ -35,18 +35,20 @@ class PrinterPass : public NodePass {
Status
RunOnNode
(
std
::
shared_ptr
<
RenameOp
>
node
,
bool
*
modified
)
override
;
Status
RunOnNode
(
std
::
shared_ptr
<
FilterOp
>
node
,
bool
*
modified
)
override
;
Status
RunOnNode
(
std
::
shared_ptr
<
SkipOp
>
node
,
bool
*
modified
)
override
;
Status
RunOnNode
(
std
::
shared_ptr
<
ShuffleOp
>
node
,
bool
*
modified
)
override
;
Status
RunOnNode
(
std
::
shared_ptr
<
GeneratorOp
>
node
,
bool
*
modified
)
override
;
Status
RunOnNode
(
std
::
shared_ptr
<
MindRecordOp
>
node
,
bool
*
modified
)
override
;
Status
RunOnNode
(
std
::
shared_ptr
<
TFReaderOp
>
node
,
bool
*
modified
)
override
;
#ifdef ENABLE_PYTHON
Status
RunOnNode
(
std
::
shared_ptr
<
FilterOp
>
node
,
bool
*
modified
)
override
;
Status
RunOnNode
(
std
::
shared_ptr
<
GeneratorOp
>
node
,
bool
*
modified
)
override
;
#endif
Status
RunOnNode
(
std
::
shared_ptr
<
TakeOp
>
node
,
bool
*
modified
)
override
;
Status
RunOnNode
(
std
::
shared_ptr
<
ZipOp
>
node
,
bool
*
modified
)
override
;
...
...
mindspore/ccsrc/dataset/include/dataset/core/constants.h
0 → 120000
浏览文件 @
bd5a777f
..
/
..
/
..
/
core
/
constants
.
h
\ No newline at end of file
mindspore/ccsrc/dataset/include/dataset/core/data_type.h
0 → 120000
浏览文件 @
bd5a777f
..
/
..
/
..
/
core
/
data_type
.
h
\ No newline at end of file
mindspore/ccsrc/dataset/include/dataset/core/tensor_shape.h
0 → 120000
浏览文件 @
bd5a777f
..
/
..
/
..
/
core
/
tensor_shape
.
h
\ No newline at end of file
mindspore/ccsrc/dataset/include/dataset/util/status.h
0 → 120000
浏览文件 @
bd5a777f
..
/
..
/
..
/
util
/
status
.
h
\ No newline at end of file
mindspore/ccsrc/dataset/include/datasets.h
0 → 100644
浏览文件 @
bd5a777f
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DATASET_INCLUDE_DATASETS_H_
#define DATASET_INCLUDE_DATASETS_H_
#include <vector>
#include <memory>
#include <set>
#include <map>
#include <utility>
#include <string>
#include "dataset/include/tensor.h"
#include "dataset/include/iterator.h"
#include "dataset/include/samplers.h"
namespace
mindspore
{
namespace
dataset
{
// Forward declare
class
DatasetOp
;
class
DataSchema
;
class
Tensor
;
class
TensorShape
;
namespace
api
{
class
TensorOperation
;
class
SamplerObj
;
class
ImageFolderDataset
;
class
MnistDataset
;
class
BatchDataset
;
class
RepeatDataset
;
class
MapDataset
;
class
ShuffleDataset
;
class
Cifar10Dataset
;
class
ProjectDataset
;
/// \brief Function to create an ImageFolderDataset
/// \notes A source dataset that reads images from a tree of directories
/// All images within one folder have the same label
/// The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_dir Path to the root directory that contains the dataset
/// \param[in] decode A flag to decode in ImageFolder
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
/// A `RandomSampler` will be used to randomly iterate the entire dataset
/// \param[in] extensions File extensions to be read
/// \param[in] class_indexing a class name to label map
/// \return Shared pointer to the current ImageFolderDataset
std
::
shared_ptr
<
ImageFolderDataset
>
ImageFolder
(
std
::
string
dataset_dir
,
bool
decode
=
false
,
std
::
shared_ptr
<
SamplerObj
>
sampler
=
nullptr
,
std
::
set
<
std
::
string
>
extensions
=
{},
std
::
map
<
std
::
string
,
int32_t
>
class_indexing
=
{});
/// \brief Function to create a MnistDataset
/// \notes The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_dir Path to the root directory that contains the dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
/// A `RandomSampler` will be used to randomly iterate the entire dataset
/// \return Shared pointer to the current MnistDataset
std
::
shared_ptr
<
MnistDataset
>
Mnist
(
std
::
string
dataset_dir
,
std
::
shared_ptr
<
SamplerObj
>
sampler
=
nullptr
);
/// \brief Function to create a Cifar10 Dataset
/// \notes The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_dir Path to the root directory that contains the dataset
/// \param[in] num_samples The number of images to be included in the dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
/// will be used to randomly iterate the entire dataset
/// \return Shared pointer to the current Dataset
std
::
shared_ptr
<
Cifar10Dataset
>
Cifar10
(
const
std
::
string
&
dataset_dir
,
int32_t
num_samples
,
std
::
shared_ptr
<
SamplerObj
>
sampler
);
/// \class Dataset datasets.h
/// \brief A base class to represent a dataset in the data pipeline.
class
Dataset
:
public
std
::
enable_shared_from_this
<
Dataset
>
{
public:
friend
class
Iterator
;
/// \brief Constructor
Dataset
();
/// \brief Destructor
~
Dataset
()
=
default
;
/// \brief Pure virtual function to convert a Dataset class into a runtime dataset object
/// \return shared pointer to the list of newly created DatasetOps
virtual
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
Build
()
=
0
;
/// \brief Pure virtual function for derived class to implement parameters validation
/// \return bool True if all the params are valid
virtual
bool
ValidateParams
()
=
0
;
/// \brief Setter function for runtime number of workers
/// \param[in] num_workers The number of threads in this operator
/// \return Shared pointer to the original object
std
::
shared_ptr
<
Dataset
>
SetNumWorkers
(
int32_t
num_workers
)
{
num_workers_
=
num_workers
;
return
shared_from_this
();
}
/// \brief Function to create an Iterator over the Dataset pipeline
/// \return Shared pointer to the Iterator
std
::
shared_ptr
<
Iterator
>
CreateIterator
();
/// \brief Function to create a BatchDataset
/// \notes Combines batch_size number of consecutive rows into batches
/// \param[in] batch_size Path to the root directory that contains the dataset
/// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete
/// batch. If true, and if there are less than batch_size rows
/// available to make the last batch, then those rows will
/// be dropped and not propagated to the next node
/// \return Shared pointer to the current BatchDataset
std
::
shared_ptr
<
BatchDataset
>
Batch
(
int32_t
batch_size
,
bool
drop_remainder
=
false
);
/// \brief Function to create a RepeatDataset
/// \notes Repeats this dataset count times. Repeat indefinitely if count is -1
/// \param[in] count Number of times the dataset should be repeated
/// \return Shared pointer to the current Dataset
/// \note Repeat will return shared pointer to `Dataset` instead of `RepeatDataset`
/// due to a limitation in the current implementation
std
::
shared_ptr
<
Dataset
>
Repeat
(
int32_t
count
=
-
1
);
/// \brief Function to create a MapDataset
/// \notes Applies each operation in operations to this dataset
/// \param[in] operations Vector of operations to be applied on the dataset. Operations are
/// applied in the order they appear in this list
/// \param[in] input_columns Vector of the names of the columns that will be passed to the first
/// operation as input. The size of this list must match the number of
/// input columns expected by the first operator. The default input_columns
/// is the first column
/// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation
/// This parameter is mandatory if len(input_columns) != len(output_columns)
/// The size of this list must match the number of output columns of the
/// last operation. The default output_columns will have the same
/// name as the input columns, i.e., the columns will be replaced
/// \param[in] project_columns A list of column names to project
/// \return Shared pointer to the current MapDataset
std
::
shared_ptr
<
MapDataset
>
Map
(
std
::
vector
<
std
::
shared_ptr
<
TensorOperation
>>
operations
,
std
::
vector
<
std
::
string
>
input_columns
=
{},
std
::
vector
<
std
::
string
>
output_columns
=
{},
const
std
::
vector
<
std
::
string
>
&
project_columns
=
{});
/// \brief Function to create a Shuffle Dataset
/// \notes Randomly shuffles the rows of this dataset
/// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling
/// \return Shared pointer to the current ShuffleDataset
std
::
shared_ptr
<
ShuffleDataset
>
Shuffle
(
int32_t
shuffle_size
);
/// \brief Function to create a Project Dataset
/// \notes Applies project to the dataset
/// \param[in] columns The name of columns to project
/// \return Shared pointer to the current Dataset
std
::
shared_ptr
<
ProjectDataset
>
Project
(
const
std
::
vector
<
std
::
string
>
&
columns
);
protected:
std
::
vector
<
std
::
shared_ptr
<
Dataset
>>
children
;
std
::
shared_ptr
<
Dataset
>
parent
;
int32_t
num_workers_
;
int32_t
rows_per_buffer_
;
int32_t
connector_que_size_
;
};
/* ####################################### Derived Dataset classes ################################# */
/// \class ImageFolderDataset
/// \brief A Dataset derived class to represent ImageFolder dataset
class
ImageFolderDataset
:
public
Dataset
{
public:
/// \brief Constructor
ImageFolderDataset
(
std
::
string
dataset_dir
,
bool
decode
,
std
::
shared_ptr
<
SamplerObj
>
sampler
,
bool
recursive
,
std
::
set
<
std
::
string
>
extensions
,
std
::
map
<
std
::
string
,
int32_t
>
class_indexing
);
/// \brief Destructor
~
ImageFolderDataset
()
=
default
;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return shared pointer to the list of newly created DatasetOps
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
Build
()
override
;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool
ValidateParams
()
override
;
private:
std
::
string
dataset_dir_
;
bool
decode_
;
bool
recursive_
;
std
::
shared_ptr
<
SamplerObj
>
sampler_
;
std
::
map
<
std
::
string
,
int32_t
>
class_indexing_
;
std
::
set
<
std
::
string
>
exts_
;
};
class
MnistDataset
:
public
Dataset
{
public:
/// \brief Constructor
MnistDataset
(
std
::
string
dataset_dir
,
std
::
shared_ptr
<
SamplerObj
>
sampler
);
/// \brief Destructor
~
MnistDataset
()
=
default
;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return shared pointer to the list of newly created DatasetOps
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
Build
()
override
;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool
ValidateParams
()
override
;
private:
std
::
string
dataset_dir_
;
std
::
shared_ptr
<
SamplerObj
>
sampler_
;
};
class
BatchDataset
:
public
Dataset
{
public:
/// \brief Constructor
BatchDataset
(
int32_t
batch_size
,
bool
drop_remainder
,
bool
pad
,
std
::
vector
<
std
::
string
>
cols_to_map
,
std
::
map
<
std
::
string
,
std
::
pair
<
TensorShape
,
std
::
shared_ptr
<
Tensor
>>>
pad_map
);
/// \brief Destructor
~
BatchDataset
()
=
default
;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return shared pointer to the list of newly created DatasetOps
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
Build
()
override
;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool
ValidateParams
()
override
;
private:
int32_t
batch_size_
;
bool
drop_remainder_
;
bool
pad_
;
std
::
vector
<
std
::
string
>
cols_to_map_
;
std
::
map
<
std
::
string
,
std
::
pair
<
TensorShape
,
std
::
shared_ptr
<
Tensor
>>>
pad_map_
;
};
class
RepeatDataset
:
public
Dataset
{
public:
/// \brief Constructor
explicit
RepeatDataset
(
uint32_t
count
);
/// \brief Destructor
~
RepeatDataset
()
=
default
;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return shared pointer to the list of newly created DatasetOps
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
Build
()
override
;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool
ValidateParams
()
override
;
private:
uint32_t
repeat_count_
;
};
class
ShuffleDataset
:
public
Dataset
{
public:
ShuffleDataset
(
int32_t
shuffle_size
,
bool
reset_every_epoch
);
~
ShuffleDataset
()
=
default
;
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
int32_t
shuffle_size_
;
uint32_t
shuffle_seed_
;
bool
reset_every_epoch_
;
};
class
MapDataset
:
public
Dataset
{
public:
/// \brief Constructor
MapDataset
(
std
::
vector
<
std
::
shared_ptr
<
TensorOperation
>>
operations
,
std
::
vector
<
std
::
string
>
input_columns
=
{},
std
::
vector
<
std
::
string
>
output_columns
=
{},
const
std
::
vector
<
std
::
string
>
&
columns
=
{});
/// \brief Destructor
~
MapDataset
()
=
default
;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return shared pointer to the list of newly created DatasetOps
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
Build
()
override
;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool
ValidateParams
()
override
;
private:
std
::
vector
<
std
::
shared_ptr
<
TensorOperation
>>
operations_
;
std
::
vector
<
std
::
string
>
input_columns_
;
std
::
vector
<
std
::
string
>
output_columns_
;
std
::
vector
<
std
::
string
>
project_columns_
;
};
class
Cifar10Dataset
:
public
Dataset
{
public:
/// \brief Constructor
Cifar10Dataset
(
const
std
::
string
&
dataset_dir
,
int32_t
num_samples
,
std
::
shared_ptr
<
SamplerObj
>
sampler
);
/// \brief Destructor
~
Cifar10Dataset
()
=
default
;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return shared pointer to the list of newly created DatasetOps
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
Build
()
override
;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool
ValidateParams
()
override
;
private:
std
::
string
dataset_dir_
;
int32_t
num_samples_
;
std
::
shared_ptr
<
SamplerObj
>
sampler_
;
};
class
ProjectDataset
:
public
Dataset
{
public:
/// \brief Constructor
explicit
ProjectDataset
(
const
std
::
vector
<
std
::
string
>
&
columns
);
/// \brief Destructor
~
ProjectDataset
()
=
default
;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return shared pointer to the list of newly created DatasetOps
std
::
shared_ptr
<
std
::
vector
<
std
::
shared_ptr
<
DatasetOp
>>>
Build
()
override
;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool
ValidateParams
()
override
;
private:
std
::
vector
<
std
::
string
>
columns_
;
};
}
// namespace api
}
// namespace dataset
}
// namespace mindspore
#endif // DATASET_INCLUDE_DATASETS_H_
mindspore/ccsrc/dataset/include/iterator.h
0 → 100644
浏览文件 @
bd5a777f
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DATASET_INCLUDE_ITERATOR_H_
#define DATASET_INCLUDE_ITERATOR_H_
#include <unordered_map>
#include <memory>
#include <vector>
#include <string>
#include "dataset/include/status.h"
namespace
mindspore
{
namespace
dataset
{
// Forward declare
class
ExecutionTree
;
class
DatasetIterator
;
class
DatasetOp
;
class
Tensor
;
namespace
api
{
class
Dataset
;
using
TensorMap
=
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Tensor
>>
;
// Abstract class for iterating over the dataset.
class
Iterator
{
public:
/// \brief Constructor
Iterator
()
=
default
;
/// \brief Destructor
~
Iterator
()
=
default
;
/// \brief Method for building and launching the pipeline.
/// \param[in] ops - a vector of DatasetOp in the data pipeline.
/// \return - a Status error code, returns OK if no error encountered.
Status
BuildAndLaunchTree
(
std
::
shared_ptr
<
Dataset
>
ds
);
/// \brief Function to get the next row from the data pipeline.
/// \param[out] row - the output tensor row.
void
GetNextRow
(
TensorMap
*
row
);
/// \brief Function to shut down the data pipeline.
void
Stop
();
class
_Iterator
{
public:
explicit
_Iterator
(
Iterator
*
lt
)
:
lt_
{
lt
},
cur_row_
{
nullptr
}
{
if
(
lt_
)
{
cur_row_
=
new
TensorMap
();
lt_
->
GetNextRow
(
cur_row_
);
}
}
// Destructor
~
_Iterator
()
{
if
(
cur_row_
)
{
delete
cur_row_
;
}
}
_Iterator
&
operator
++
()
{
if
(
lt_
)
{
++
ind_
;
lt_
->
GetNextRow
(
cur_row_
);
}
if
(
cur_row_
&&
cur_row_
->
size
()
==
0
)
{
delete
cur_row_
;
cur_row_
=
nullptr
;
}
return
*
this
;
}
// prefix ++ overload
TensorMap
&
operator
*
()
{
return
*
cur_row_
;
}
// dereference operator
TensorMap
*
operator
->
()
{
return
cur_row_
;
}
bool
operator
!=
(
const
_Iterator
&
rhs
)
{
return
cur_row_
!=
rhs
.
cur_row_
;
}
private:
int
ind_
;
// the cur node our Iterator points to
Iterator
*
lt_
;
TensorMap
*
cur_row_
;
};
_Iterator
begin
()
{
return
_Iterator
(
this
);
}
_Iterator
end
()
{
return
_Iterator
(
nullptr
);
}
private:
// Runtime tree.
// Use shared_ptr instead of unique_ptr because the DatasetIterator constructor takes in a shared_ptr type.
std
::
shared_ptr
<
ExecutionTree
>
tree_
;
// Runtime iterator
std
::
unique_ptr
<
DatasetIterator
>
iterator_
;
};
}
// namespace api
}
// namespace dataset
}
// namespace mindspore
#endif // DATASET_INCLUDE_ITERATOR_H_
mindspore/ccsrc/dataset/include/samplers.h
0 → 100644
浏览文件 @
bd5a777f
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DATASET_API_SAMPLERS_H_
#define DATASET_API_SAMPLERS_H_
#include <vector>
#include <memory>
namespace
mindspore
{
namespace
dataset
{
// Internal Sampler class forward declaration
class
Sampler
;
namespace
api
{
class
SamplerObj
:
public
std
::
enable_shared_from_this
<
SamplerObj
>
{
public:
SamplerObj
();
~
SamplerObj
()
=
default
;
virtual
std
::
shared_ptr
<
Sampler
>
Build
()
=
0
;
virtual
bool
ValidateParams
()
=
0
;
};
class
DistributedSamplerObj
;
class
PKSamplerObj
;
class
RandomSamplerObj
;
class
SequentialSamplerObj
;
class
SubsetRandomSamplerObj
;
class
WeightedRandomSamplerObj
;
/// Function to create a Distributed Sampler.
/// \notes A Sampler that access a shard of the dataset.
/// \param[in] num_shards - Number of shards to divide the dataset into.
/// \param[in] shard_id - Shard ID of the current shard within num_shards.
/// \param[in] shuffle - If true, the indices are shuffled.
/// \param[in] num_samples - The number of samples to draw (default to all elements).
/// \param[in] seed - The seed in use when shuffle is true.
/// \return Shared pointer to the current Sampler.
std
::
shared_ptr
<
DistributedSamplerObj
>
DistributedSampler
(
int64_t
num_shards
,
int64_t
shard_id
,
bool
shuffle
=
true
,
int64_t
num_samples
=
0
,
uint32_t
seed
=
1
);
/// Function to create a PK Sampler.
/// \notes Samples K elements for each P class in the dataset.
/// This will sample all classes.
/// \param[in] num_val - Number of elements to sample for each class.
/// \param[in] shuffle - If true, the class IDs are shuffled.
/// \param[in] num_samples - The number of samples to draw (default to all elements).
/// \return Shared pointer to the current Sampler.
std
::
shared_ptr
<
PKSamplerObj
>
PKSampler
(
int64_t
num_val
,
bool
shuffle
=
false
,
int64_t
num_samples
=
0
);
/// Function to create a Random Sampler.
/// \notes Samples the elements randomly.
/// \param[in] replacement - If True, put the sample ID back for the next draw.
/// \param[in] num_samples - The number of samples to draw (default to all elements).
/// \return Shared pointer to the current Sampler.
std
::
shared_ptr
<
RandomSamplerObj
>
RandomSampler
(
bool
replacement
=
false
,
int64_t
num_samples
=
0
);
/// Function to create a Sequential Sampler.
/// \notes Samples the dataset elements sequentially, same as not having a sampler.
/// \param[in] start_index - Index to start sampling at (dafault to start at first id).
/// \param[in] num_samples - The number of samples to draw (default to all elements).
/// \return Shared pointer to the current Sampler.
std
::
shared_ptr
<
SequentialSamplerObj
>
SequentialSampler
(
int64_t
start_index
=
0
,
int64_t
num_samples
=
0
);
/// Function to create a Subset Random Sampler.
/// \notes Samples the elements randomly from a sequence of indices.
/// \param[in] indices - A vector sequence of indices.
/// \param[in] num_samples - The number of samples to draw (default to all elements).
/// \return Shared pointer to the current Sampler.
std
::
shared_ptr
<
SubsetRandomSamplerObj
>
SubsetRandomSampler
(
const
std
::
vector
<
int64_t
>
&
indices
,
int64_t
num_samples
=
0
);
/// Function to create a Weighted Random Sampler.
/// \notes Samples the elements from [0, len(weights) - 1] randomly with the given
/// weights (probabilities).
/// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1.
/// \param[in] num_samples - The number of samples to draw (default to all elements).
/// \param[in] replacement - If True, put the sample ID back for the next draw.
/// \return Shared pointer to the current Sampler.
std
::
shared_ptr
<
WeightedRandomSamplerObj
>
WeightedRandomSampler
(
const
std
::
vector
<
double
>
&
weights
,
int64_t
num_samples
=
0
,
bool
replacement
=
true
);
/* ####################################### Derived Sampler classes ################################# */
class
DistributedSamplerObj
:
public
SamplerObj
{
public:
DistributedSamplerObj
(
int64_t
num_shards
,
int64_t
shard_id
,
bool
shuffle
,
int64_t
num_samples
,
uint32_t
seed
);
~
DistributedSamplerObj
()
=
default
;
std
::
shared_ptr
<
Sampler
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
int64_t
num_shards_
;
int64_t
shard_id_
;
bool
shuffle_
;
int64_t
num_samples_
;
uint32_t
seed_
;
};
class
PKSamplerObj
:
public
SamplerObj
{
public:
PKSamplerObj
(
int64_t
num_val
,
bool
shuffle
,
int64_t
num_samples
);
~
PKSamplerObj
()
=
default
;
std
::
shared_ptr
<
Sampler
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
int64_t
num_val_
;
bool
shuffle_
;
int64_t
num_samples_
;
};
class
RandomSamplerObj
:
public
SamplerObj
{
public:
RandomSamplerObj
(
bool
replacement
,
int64_t
num_samples
);
~
RandomSamplerObj
()
=
default
;
std
::
shared_ptr
<
Sampler
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
bool
replacement_
;
int64_t
num_samples_
;
};
class
SequentialSamplerObj
:
public
SamplerObj
{
public:
SequentialSamplerObj
(
int64_t
start_index
,
int64_t
num_samples
);
~
SequentialSamplerObj
()
=
default
;
std
::
shared_ptr
<
Sampler
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
int64_t
start_index_
;
int64_t
num_samples_
;
};
class
SubsetRandomSamplerObj
:
public
SamplerObj
{
public:
SubsetRandomSamplerObj
(
const
std
::
vector
<
int64_t
>
&
indices
,
int64_t
num_samples
);
~
SubsetRandomSamplerObj
()
=
default
;
std
::
shared_ptr
<
Sampler
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
const
std
::
vector
<
int64_t
>
&
indices_
;
int64_t
num_samples_
;
};
class
WeightedRandomSamplerObj
:
public
SamplerObj
{
public:
explicit
WeightedRandomSamplerObj
(
const
std
::
vector
<
double
>
&
weights
,
int64_t
num_samples
=
0
,
bool
replacement
=
true
);
~
WeightedRandomSamplerObj
()
=
default
;
std
::
shared_ptr
<
Sampler
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
const
std
::
vector
<
double
>
&
weights_
;
int64_t
num_samples_
;
bool
replacement_
;
};
}
// namespace api
}
// namespace dataset
}
// namespace mindspore
#endif // DATASET_API_SAMPLERS_H_
mindspore/ccsrc/dataset/include/status.h
0 → 120000
浏览文件 @
bd5a777f
..
/
util
/
status
.
h
\ No newline at end of file
mindspore/ccsrc/dataset/include/tensor.h
0 → 120000
浏览文件 @
bd5a777f
..
/
core
/
tensor
.
h
\ No newline at end of file
mindspore/ccsrc/dataset/include/transforms.h
0 → 100644
浏览文件 @
bd5a777f
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DATASET_API_TRANSFORMS_H_
#define DATASET_API_TRANSFORMS_H_
#include <vector>
#include <memory>
#include "dataset/core/constants.h"
namespace
mindspore
{
namespace
dataset
{
class
TensorOp
;
namespace
api
{
// Abstract class to represent a dataset in the data pipeline.
class
TensorOperation
:
public
std
::
enable_shared_from_this
<
TensorOperation
>
{
public:
/// \brief Constructor
TensorOperation
();
/// \brief Destructor
~
TensorOperation
()
=
default
;
/// \brief Pure virtual function to convert a TensorOperation class into a runtime TensorOp object.
/// \return shared pointer to the newly created TensorOp.
virtual
std
::
shared_ptr
<
TensorOp
>
Build
()
=
0
;
virtual
bool
ValidateParams
()
=
0
;
};
// Transform operations for performing computer vision.
namespace
vision
{
class
NormalizeOperation
;
class
DecodeOperation
;
class
ResizeOperation
;
class
RandomCropOperation
;
class
CenterCropOperation
;
class
UniformAugOperation
;
class
RandomHorizontalFlipOperation
;
class
RandomVerticalFlipOperation
;
class
RandomRotationOperation
;
class
PadOperation
;
class
CutOutOperation
;
class
RandomColorAdjustOperation
;
/// \brief Function to create a Normalize TensorOperation.
/// \notes Normalize the input image with respect to mean and standard deviation.
/// \param[in] mean - a vector of mean values for each channel, w.r.t channel order.
/// \param[in] std - a vector of standard deviations for each channel, w.r.t. channel order.
/// \return Shared pointer to the current TensorOperation.
std
::
shared_ptr
<
NormalizeOperation
>
Normalize
(
std
::
vector
<
float
>
mean
,
std
::
vector
<
float
>
std
);
/// \brief Function to create a Decode TensorOperation.
/// \notes Decode the input image in RGB mode.
/// \param[in] rgb - a boolean of whether to decode in RGB mode or not.
/// \return Shared pointer to the current TensorOperation.
std
::
shared_ptr
<
DecodeOperation
>
Decode
(
bool
rgb
=
true
);
/// \brief Function to create a Resize TensorOperation.
/// \notes Resize the input image to the given size..
/// \param[in] size - a vector representing the output size of the resized image.
/// If size is a single value, the image will be resized to this value with
/// the same image aspect ratio. If size has 2 values, it should be (height, width).
/// \param[in] interpolation An enum for the mode of interpolation
/// \return Shared pointer to the current TensorOperation.
std
::
shared_ptr
<
ResizeOperation
>
Resize
(
std
::
vector
<
int32_t
>
size
,
InterpolationMode
interpolation
=
InterpolationMode
::
kLinear
);
/// \brief Function to create a RandomCrop TensorOperation.
/// \notes Crop the input image at a random location.
/// \param[in] size - a vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
/// \param[in] padding - a vector with the value of pixels to pad the image. If 4 values are provided,
/// it pads the left, top, right and bottom respectively.
/// \param[in] pad_if_needed - a boolean whether to pad the image if either side is smaller than
/// the given output size.
/// \param[in] fill_value - a vector representing the pixel intensity of the borders, it is used to
/// fill R, G, B channels respectively.
/// \return Shared pointer to the current TensorOperation.
std
::
shared_ptr
<
RandomCropOperation
>
RandomCrop
(
std
::
vector
<
int32_t
>
size
,
std
::
vector
<
int32_t
>
padding
=
{
0
,
0
,
0
,
0
},
bool
pad_if_needed
=
false
,
std
::
vector
<
uint8_t
>
fill_value
=
{
0
,
0
,
0
});
/// \brief Function to create a CenterCrop TensorOperation.
/// \notes Crops the input image at the center to the given size.
/// \param[in] size - a vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
/// \return Shared pointer to the current TensorOperation.
std
::
shared_ptr
<
CenterCropOperation
>
CenterCrop
(
std
::
vector
<
int32_t
>
size
);
/// \brief Function to create a UniformAugment TensorOperation.
/// \notes Tensor operation to perform randomly selected augmentation.
/// \param[in] operations - a vector of TensorOperation operations.
/// \param[in] num_ops - integer representing the number of OPs to be selected and applied.
/// \return Shared pointer to the current TensorOperation.
std
::
shared_ptr
<
UniformAugOperation
>
UniformAugment
(
std
::
vector
<
std
::
shared_ptr
<
TensorOperation
>>
operations
,
int32_t
num_ops
=
2
);
/// \brief Function to create a RandomHorizontalFlip TensorOperation.
/// \notes Tensor operation to perform random horizontal flip.
/// \param[in] prob - float representing the probability of flip.
/// \return Shared pointer to the current TensorOperation.
std
::
shared_ptr
<
RandomHorizontalFlipOperation
>
RandomHorizontalFlip
(
float
prob
=
0.5
);
/// \brief Function to create a RandomVerticalFlip TensorOperation.
/// \notes Tensor operation to perform random vertical flip.
/// \param[in] prob - float representing the probability of flip.
/// \return Shared pointer to the current TensorOperation.
std
::
shared_ptr
<
RandomVerticalFlipOperation
>
RandomVerticalFlip
(
float
prob
=
0.5
);
/// \brief Function to create a RandomRotation TensorOp
/// \notes Rotates the image according to parameters
/// \param[in] degrees A float vector size 2, representing the starting and ending degree
/// \param[in] resample An enum for the mode of interpolation
/// \param[in] expand A boolean representing whether the image is expanded after rotation
/// \param[in] center A float vector size 2, representing the x and y center of rotation.
/// \param[in] fill_value A uint8_t vector size 3, representing the rgb value of the fill color
/// \return Shared pointer to the current TensorOp
std
::
shared_ptr
<
RandomRotationOperation
>
RandomRotation
(
std
::
vector
<
float
>
degrees
,
InterpolationMode
resample
=
InterpolationMode
::
kNearestNeighbour
,
bool
expand
=
false
,
std
::
vector
<
float
>
center
=
{
-
1
,
-
1
},
std
::
vector
<
uint8_t
>
fill_value
=
{
0
,
0
,
0
});
/// \brief Function to create a Pad TensorOp
/// \notes Pads the image according to padding parameters
/// \param[in] padding A vector representing the number of pixels to pad the image
/// If vector has one value, it pads all sides of the image with that value
/// If vector has two values, it pads left and right with the first and
/// top and bottom with the second value
/// If vector has four values, it pads left, top, right, and bottom with
/// those values respectively
/// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
/// BorderType.kConstant. If 3 values are provided,
/// it is used to fill R, G, B channels respectively
/// \param[in] padding_mode The method of padding (default=BorderType.kConstant)
/// Can be any of
/// [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric]
/// - BorderType.kConstant, means it fills the border with constant values
/// - BorderType.kEdge, means it pads with the last value on the edge
/// - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge
/// - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge
/// \return Shared pointer to the current TensorOp
std
::
shared_ptr
<
PadOperation
>
Pad
(
std
::
vector
<
int32_t
>
padding
,
std
::
vector
<
uint8_t
>
fill_value
=
{
0
},
BorderType
padding_mode
=
BorderType
::
kConstant
);
/// \brief Function to create a CutOut TensorOp
/// \notes Randomly cut (mask) out a given number of square patches from the input image
/// \param[in] length Integer representing the side length of each square patch
/// \param[in] num_patches Integer representing the number of patches to be cut out of an image
/// \return Shared pointer to the current TensorOp
std
::
shared_ptr
<
CutOutOperation
>
CutOut
(
int32_t
length
,
int32_t
num_patches
=
1
);
/// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image
/// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
/// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
/// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
/// \param[in] hue Brightness adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5
/// Default value is {0, 0}
/// \return Shared pointer to the current TensorOp
std
::
shared_ptr
<
RandomColorAdjustOperation
>
RandomColorAdjust
(
std
::
vector
<
float
>
brightness
=
{
1.0
,
1.0
},
std
::
vector
<
float
>
contrast
=
{
1.0
,
1.0
},
std
::
vector
<
float
>
saturation
=
{
1.0
,
1.0
},
std
::
vector
<
float
>
hue
=
{
0.0
,
0.0
});
/* ####################################### Derived TensorOperation classes ################################# */
class
NormalizeOperation
:
public
TensorOperation
{
public:
NormalizeOperation
(
std
::
vector
<
float
>
mean
,
std
::
vector
<
float
>
std
);
~
NormalizeOperation
()
=
default
;
std
::
shared_ptr
<
TensorOp
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
std
::
vector
<
float
>
mean_
;
std
::
vector
<
float
>
std_
;
};
class
DecodeOperation
:
public
TensorOperation
{
public:
explicit
DecodeOperation
(
bool
rgb
=
true
);
~
DecodeOperation
()
=
default
;
std
::
shared_ptr
<
TensorOp
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
bool
rgb_
;
};
class
ResizeOperation
:
public
TensorOperation
{
public:
explicit
ResizeOperation
(
std
::
vector
<
int32_t
>
size
,
InterpolationMode
interpolation_mode
=
InterpolationMode
::
kLinear
);
~
ResizeOperation
()
=
default
;
std
::
shared_ptr
<
TensorOp
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
std
::
vector
<
int32_t
>
size_
;
InterpolationMode
interpolation_
;
};
class
RandomCropOperation
:
public
TensorOperation
{
public:
RandomCropOperation
(
std
::
vector
<
int32_t
>
size
,
std
::
vector
<
int32_t
>
padding
=
{
0
,
0
,
0
,
0
},
bool
pad_if_needed
=
false
,
std
::
vector
<
uint8_t
>
fill_value
=
{
0
,
0
,
0
});
~
RandomCropOperation
()
=
default
;
std
::
shared_ptr
<
TensorOp
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
std
::
vector
<
int32_t
>
size_
;
std
::
vector
<
int32_t
>
padding_
;
bool
pad_if_needed_
;
std
::
vector
<
uint8_t
>
fill_value_
;
};
class
CenterCropOperation
:
public
TensorOperation
{
public:
explicit
CenterCropOperation
(
std
::
vector
<
int32_t
>
size
);
~
CenterCropOperation
()
=
default
;
std
::
shared_ptr
<
TensorOp
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
std
::
vector
<
int32_t
>
size_
;
};
class
UniformAugOperation
:
public
TensorOperation
{
public:
explicit
UniformAugOperation
(
std
::
vector
<
std
::
shared_ptr
<
TensorOperation
>>
operations
,
int32_t
num_ops
=
2
);
~
UniformAugOperation
()
=
default
;
std
::
shared_ptr
<
TensorOp
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
std
::
vector
<
std
::
shared_ptr
<
TensorOperation
>>
operations_
;
int32_t
num_ops_
;
};
class
RandomHorizontalFlipOperation
:
public
TensorOperation
{
public:
explicit
RandomHorizontalFlipOperation
(
float
probability
=
0.5
);
~
RandomHorizontalFlipOperation
()
=
default
;
std
::
shared_ptr
<
TensorOp
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
float
probability_
;
};
class
RandomVerticalFlipOperation
:
public
TensorOperation
{
public:
explicit
RandomVerticalFlipOperation
(
float
probability
=
0.5
);
~
RandomVerticalFlipOperation
()
=
default
;
std
::
shared_ptr
<
TensorOp
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
float
probability_
;
};
class
RandomRotationOperation
:
public
TensorOperation
{
public:
RandomRotationOperation
(
std
::
vector
<
float
>
degrees
,
InterpolationMode
interpolation_mode
,
bool
expand
,
std
::
vector
<
float
>
center
,
std
::
vector
<
uint8_t
>
fill_value
);
~
RandomRotationOperation
()
=
default
;
std
::
shared_ptr
<
TensorOp
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
std
::
vector
<
float
>
degrees_
;
InterpolationMode
interpolation_mode_
;
std
::
vector
<
float
>
center_
;
bool
expand_
;
std
::
vector
<
uint8_t
>
fill_value_
;
};
class
PadOperation
:
public
TensorOperation
{
public:
PadOperation
(
std
::
vector
<
int32_t
>
padding
,
std
::
vector
<
uint8_t
>
fill_value
=
{
0
},
BorderType
padding_mode
=
BorderType
::
kConstant
);
~
PadOperation
()
=
default
;
std
::
shared_ptr
<
TensorOp
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
std
::
vector
<
int32_t
>
padding_
;
std
::
vector
<
uint8_t
>
fill_value_
;
BorderType
padding_mode_
;
};
class
CutOutOperation
:
public
TensorOperation
{
public:
explicit
CutOutOperation
(
int32_t
length
,
int32_t
num_patches
=
1
);
~
CutOutOperation
()
=
default
;
std
::
shared_ptr
<
TensorOp
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
int32_t
length_
;
int32_t
num_patches_
;
};
class
RandomColorAdjustOperation
:
public
TensorOperation
{
public:
RandomColorAdjustOperation
(
std
::
vector
<
float
>
brightness
=
{
1.0
,
1.0
},
std
::
vector
<
float
>
contrast
=
{
1.0
,
1.0
},
std
::
vector
<
float
>
saturation
=
{
1.0
,
1.0
},
std
::
vector
<
float
>
hue
=
{
0.0
,
0.0
});
~
RandomColorAdjustOperation
()
=
default
;
std
::
shared_ptr
<
TensorOp
>
Build
()
override
;
bool
ValidateParams
()
override
;
private:
std
::
vector
<
float
>
brightness_
;
std
::
vector
<
float
>
contrast_
;
std
::
vector
<
float
>
saturation_
;
std
::
vector
<
float
>
hue_
;
};
}
// namespace vision
}
// namespace api
}
// namespace dataset
}
// namespace mindspore
#endif // DATASET_API_TRANSFORMS_H_
mindspore/ccsrc/dataset/include/utils/log_adapter.h
0 → 120000
浏览文件 @
bd5a777f
..
/
..
/
..
/
utils
/
log_adapter
.
h
\ No newline at end of file
mindspore/ccsrc/dataset/include/utils/overload.h
0 → 120000
浏览文件 @
bd5a777f
..
/
..
/
..
/
utils
/
overload
.
h
\ No newline at end of file
mindspore/ccsrc/dataset/kernels/CMakeLists.txt
浏览文件 @
bd5a777f
...
...
@@ -2,7 +2,13 @@ add_subdirectory(image)
add_subdirectory
(
data
)
file
(
GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE
${
CMAKE_CURRENT_SOURCE_DIR
}
"*.cc"
)
set_property
(
SOURCE
${
_CURRENT_SRC_FILES
}
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD
)
add_library
(
kernels OBJECT
py_func_op.cc
tensor_op.cc
)
target_include_directories
(
kernels PRIVATE
${
pybind11_INCLUDE_DIRS
}
)
if
(
ENABLE_PYTHON
)
add_library
(
kernels OBJECT
py_func_op.cc
tensor_op.cc
)
target_include_directories
(
kernels PRIVATE
${
pybind11_INCLUDE_DIRS
}
)
else
()
add_library
(
kernels OBJECT
tensor_op.cc
)
endif
()
mindspore/ccsrc/dataset/kernels/data/data_utils.cc
浏览文件 @
bd5a777f
...
...
@@ -23,7 +23,9 @@
#include "dataset/core/constants.h"
#include "dataset/core/data_type.h"
#ifdef ENABLE_PYTHON
#include "dataset/core/pybind_support.h"
#endif
#include "dataset/core/tensor.h"
#include "dataset/core/tensor_shape.h"
#include "dataset/kernels/data/type_cast_op.h"
...
...
mindspore/ccsrc/dataset/kernels/image/image_utils.cc
浏览文件 @
bd5a777f
...
...
@@ -729,7 +729,6 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output
int
num_channels
=
input_cv
->
shape
()[
2
];
if
(
input_cv
->
Rank
()
==
3
&&
num_channels
==
1
&&
output_cv
->
Rank
()
==
2
)
output_cv
->
ExpandDim
(
2
);
*
output
=
std
::
static_pointer_cast
<
Tensor
>
(
output_cv
);
return
Status
::
OK
();
}
catch
(
const
cv
::
Exception
&
e
)
{
RETURN_STATUS_UNEXPECTED
(
"Unexpected error in pad"
);
...
...
mindspore/ccsrc/dataset/kernels/image/image_utils.h
浏览文件 @
bd5a777f
...
...
@@ -35,10 +35,6 @@
namespace
mindspore
{
namespace
dataset
{
enum
class
InterpolationMode
{
kLinear
=
0
,
kNearestNeighbour
=
1
,
kCubic
=
2
,
kArea
=
3
};
enum
class
BorderType
{
kConstant
=
0
,
kEdge
=
1
,
kReflect
=
2
,
kSymmetric
=
3
};
void
JpegErrorExitCustom
(
j_common_ptr
cinfo
);
struct
JpegErrorManagerCustom
{
...
...
mindspore/ccsrc/dataset/kernels/image/pad_op.cc
浏览文件 @
bd5a777f
...
...
@@ -16,6 +16,7 @@
#include "dataset/kernels/image/pad_op.h"
#include "dataset/kernels/image/image_utils.h"
#include "dataset/core/constants.h"
#include "dataset/util/status.h"
namespace
mindspore
{
...
...
mindspore/ccsrc/dataset/kernels/image/pad_op.h
浏览文件 @
bd5a777f
...
...
@@ -21,7 +21,7 @@
#include "dataset/core/tensor.h"
#include "dataset/kernels/tensor_op.h"
#include "dataset/
kernels/image/image_util
s.h"
#include "dataset/
core/constant
s.h"
#include "dataset/util/status.h"
namespace
mindspore
{
...
...
mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_with_bbox_op.cc
浏览文件 @
bd5a777f
...
...
@@ -18,7 +18,6 @@
#include "dataset/kernels/image/image_utils.h"
#include "dataset/util/status.h"
#include "dataset/core/cv_tensor.h"
#include "dataset/core/pybind_support.h"
namespace
mindspore
{
namespace
dataset
{
...
...
mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h
浏览文件 @
bd5a777f
...
...
@@ -16,8 +16,6 @@
#ifndef DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_
#define DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#include <memory>
#include <random>
#include <cstdlib>
...
...
@@ -26,8 +24,6 @@
#include "dataset/kernels/tensor_op.h"
#include "dataset/util/random.h"
#include "dataset/util/status.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl_bind.h"
namespace
mindspore
{
namespace
dataset
{
...
...
mindspore/ccsrc/dataset/text/kernels/ngram_op.h
浏览文件 @
bd5a777f
...
...
@@ -27,7 +27,6 @@
namespace
mindspore
{
namespace
dataset
{
namespace
py
=
pybind11
;
class
NgramOp
:
public
TensorOp
{
public:
...
...
tests/ut/cpp/CMakeLists.txt
浏览文件 @
bd5a777f
...
...
@@ -32,7 +32,15 @@ if(ENABLE_MINDDATA)
endif
()
# fetch ut test files
if
(
ENABLE_MINDDATA
)
file
(
GLOB_RECURSE UT_SRCS ./*.cc
)
file
(
GLOB_RECURSE UT_SRCS RELATIVE
${
CMAKE_CURRENT_SOURCE_DIR
}
./*.cc
)
if
(
NOT ENABLE_PYTHON
)
set
(
PYTHON_RELATED_SRCS
dataset/filter_op_test.cc
dataset/voc_op_test.cc
dataset/manifest_op_test.cc
)
list
(
REMOVE_ITEM UT_SRCS
${
PYTHON_RELATED_SRCS
}
)
endif
()
else
()
file
(
GLOB_RECURSE TEMP_UT_SRCS ./*.cc
)
foreach
(
OBJ
${
TEMP_UT_SRCS
}
)
...
...
tests/ut/cpp/dataset/CMakeLists.txt
浏览文件 @
bd5a777f
...
...
@@ -90,6 +90,7 @@ SET(DE_UT_SRCS
concatenate_op_test.cc
cyclic_array_test.cc
perf_data_test.cc
c_api_test.cc
)
add_executable
(
de_ut_tests
${
DE_UT_SRCS
}
)
...
...
tests/ut/cpp/dataset/c_api_test.cc
0 → 100644
浏览文件 @
bd5a777f
此差异已折叠。
点击以展开。
tests/ut/cpp/dataset/datatype_test.cc
浏览文件 @
bd5a777f
...
...
@@ -23,8 +23,6 @@
using
namespace
mindspore
::
dataset
;
namespace
py
=
pybind11
;
class
MindDataTestDatatype
:
public
UT
::
Common
{
public:
MindDataTestDatatype
()
=
default
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录