Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
051567ba
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
051567ba
编写于
5月 18, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
5月 18, 2020
浏览文件
操作
浏览文件
下载
差异文件
!1192 Support string type Stage !
Merge pull request !1192 from h.farahat/string_Tensor
上级
4ecc9389
e8ca2433
变更
43
隐藏空白更改
内联
并排
Showing
43 changed file
with
781 addition
and
312 deletion
+781
-312
mindspore/ccsrc/dataset/api/python_bindings.cc
mindspore/ccsrc/dataset/api/python_bindings.cc
+5
-0
mindspore/ccsrc/dataset/core/cv_tensor.cc
mindspore/ccsrc/dataset/core/cv_tensor.cc
+6
-6
mindspore/ccsrc/dataset/core/data_type.cc
mindspore/ccsrc/dataset/core/data_type.cc
+36
-185
mindspore/ccsrc/dataset/core/data_type.h
mindspore/ccsrc/dataset/core/data_type.h
+66
-12
mindspore/ccsrc/dataset/core/tensor.cc
mindspore/ccsrc/dataset/core/tensor.cc
+157
-18
mindspore/ccsrc/dataset/core/tensor.h
mindspore/ccsrc/dataset/core/tensor.h
+199
-19
mindspore/ccsrc/dataset/core/tensor_shape.cc
mindspore/ccsrc/dataset/core/tensor_shape.cc
+12
-0
mindspore/ccsrc/dataset/core/tensor_shape.h
mindspore/ccsrc/dataset/core/tensor_shape.h
+2
-0
mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
+6
-2
mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
+8
-1
mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
...spore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
+1
-1
mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
+2
-2
mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
...ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
+1
-1
mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
...ore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
+1
-1
mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
+1
-1
mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc
.../ccsrc/dataset/engine/datasetops/source/random_data_op.cc
+1
-1
mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h
...e/ccsrc/dataset/engine/datasetops/source/random_data_op.h
+0
-1
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
...t/engine/datasetops/source/sampler/distributed_sampler.cc
+1
-1
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc
...rc/dataset/engine/datasetops/source/sampler/pk_sampler.cc
+1
-1
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
...ataset/engine/datasetops/source/sampler/random_sampler.cc
+1
-1
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
...ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
+1
-1
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
...et/engine/datasetops/source/sampler/sequential_sampler.cc
+1
-1
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
...engine/datasetops/source/sampler/subset_random_sampler.cc
+1
-1
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
...gine/datasetops/source/sampler/weighted_random_sampler.cc
+1
-1
mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
...re/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
+1
-4
mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
...re/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
+2
-2
mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
+1
-1
mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc
mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc
+1
-1
mindspore/ccsrc/dataset/kernels/data/data_utils.cc
mindspore/ccsrc/dataset/kernels/data/data_utils.cc
+2
-2
mindspore/ccsrc/dataset/kernels/image/image_utils.cc
mindspore/ccsrc/dataset/kernels/image/image_utils.cc
+5
-5
mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.cc
...src/dataset/kernels/image/random_crop_decode_resize_op.cc
+2
-2
tests/ut/cpp/dataset/CMakeLists.txt
tests/ut/cpp/dataset/CMakeLists.txt
+1
-0
tests/ut/cpp/dataset/common/cvop_common.cc
tests/ut/cpp/dataset/common/cvop_common.cc
+1
-1
tests/ut/cpp/dataset/datatype_test.cc
tests/ut/cpp/dataset/datatype_test.cc
+11
-11
tests/ut/cpp/dataset/image_folder_op_test.cc
tests/ut/cpp/dataset/image_folder_op_test.cc
+1
-1
tests/ut/cpp/dataset/map_op_test.cc
tests/ut/cpp/dataset/map_op_test.cc
+4
-4
tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc
tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc
+4
-4
tests/ut/cpp/dataset/stand_alone_samplers_test.cc
tests/ut/cpp/dataset/stand_alone_samplers_test.cc
+1
-1
tests/ut/cpp/dataset/tensor_string_test.cc
tests/ut/cpp/dataset/tensor_string_test.cc
+153
-0
tests/ut/cpp/dataset/tensor_test.cc
tests/ut/cpp/dataset/tensor_test.cc
+8
-8
tests/ut/python/dataset/test_datasets_textfileop.py
tests/ut/python/dataset/test_datasets_textfileop.py
+4
-4
tests/ut/python/dataset/test_flat_map.py
tests/ut/python/dataset/test_flat_map.py
+3
-3
tests/ut/python/dataset/test_tensor_string.py
tests/ut/python/dataset/test_tensor_string.py
+65
-0
未找到文件。
mindspore/ccsrc/dataset/api/python_bindings.cc
浏览文件 @
051567ba
...
...
@@ -237,6 +237,11 @@ void bindTensor(py::module *m) {
.
def
(
"type"
,
&
Tensor
::
type
)
.
def
(
"as_array"
,
[](
py
::
object
&
t
)
{
auto
&
tensor
=
py
::
cast
<
Tensor
&>
(
t
);
if
(
tensor
.
type
()
==
DataType
::
DE_STRING
)
{
py
::
array
res
;
tensor
.
GetDataAsNumpyStrings
(
&
res
);
return
res
;
}
py
::
buffer_info
info
;
THROW_IF_ERROR
(
Tensor
::
GetBufferInfo
(
tensor
,
&
info
));
return
py
::
array
(
pybind11
::
dtype
(
info
),
info
.
shape
,
info
.
strides
,
info
.
ptr
,
t
);
...
...
mindspore/ccsrc/dataset/core/cv_tensor.cc
浏览文件 @
051567ba
...
...
@@ -24,15 +24,15 @@
namespace
mindspore
{
namespace
dataset
{
CVTensor
::
CVTensor
(
const
TensorShape
&
shape
,
const
DataType
&
type
)
:
Tensor
(
shape
,
type
)
{
(
void
)
this
->
MatInit
(
StartAdd
r
(),
shape_
,
type_
,
&
mat_
);
(
void
)
this
->
MatInit
(
GetMutableBuffe
r
(),
shape_
,
type_
,
&
mat_
);
}
CVTensor
::
CVTensor
(
const
TensorShape
&
shape
,
const
DataType
&
type
,
const
uchar
*
data
)
:
Tensor
(
shape
,
type
,
data
)
{
(
void
)
this
->
MatInit
(
StartAdd
r
(),
shape_
,
type_
,
&
mat_
);
(
void
)
this
->
MatInit
(
GetMutableBuffe
r
(),
shape_
,
type_
,
&
mat_
);
}
CVTensor
::
CVTensor
(
std
::
shared_ptr
<
Tensor
>
tensor
)
:
Tensor
(
std
::
move
(
*
tensor
))
{
(
void
)
this
->
MatInit
(
StartAdd
r
(),
shape_
,
type_
,
&
mat_
);
(
void
)
this
->
MatInit
(
GetMutableBuffe
r
(),
shape_
,
type_
,
&
mat_
);
}
std
::
pair
<
std
::
array
<
int
,
2
>
,
int
>
CVTensor
::
IsValidImage
(
const
TensorShape
&
shape
,
const
DataType
&
type
)
{
...
...
@@ -83,19 +83,19 @@ Status CVTensor::MatInit(uchar *data, const TensorShape &shape, const DataType &
Status
CVTensor
::
Reshape
(
const
TensorShape
&
shape
)
{
RETURN_IF_NOT_OK
(
Tensor
::
Reshape
(
shape
));
RETURN_IF_NOT_OK
(
this
->
MatInit
(
StartAdd
r
(),
shape_
,
type_
,
&
mat_
));
RETURN_IF_NOT_OK
(
this
->
MatInit
(
GetMutableBuffe
r
(),
shape_
,
type_
,
&
mat_
));
return
Status
::
OK
();
}
Status
CVTensor
::
ExpandDim
(
const
dsize_t
&
axis
)
{
RETURN_IF_NOT_OK
(
Tensor
::
ExpandDim
(
axis
));
RETURN_IF_NOT_OK
(
this
->
MatInit
(
StartAdd
r
(),
shape_
,
type_
,
&
mat_
));
RETURN_IF_NOT_OK
(
this
->
MatInit
(
GetMutableBuffe
r
(),
shape_
,
type_
,
&
mat_
));
return
Status
::
OK
();
}
void
CVTensor
::
Squeeze
()
{
Tensor
::
Squeeze
();
(
void
)
this
->
MatInit
(
StartAdd
r
(),
shape_
,
type_
,
&
mat_
);
(
void
)
this
->
MatInit
(
GetMutableBuffe
r
(),
shape_
,
type_
,
&
mat_
);
}
}
// namespace dataset
}
// namespace mindspore
mindspore/ccsrc/dataset/core/data_type.cc
浏览文件 @
051567ba
...
...
@@ -15,116 +15,40 @@
*/
#include "dataset/core/data_type.h"
#include <opencv2/core/hal/interface.h>
#include "utils/log_adapter.h"
#include "dataset/core/constants.h"
#include "dataset/core/pybind_support.h"
#include "dataset/util/de_error.h"
namespace
mindspore
{
namespace
dataset
{
uint8_t
DataType
::
SizeInBytes
()
const
{
switch
(
type_
)
{
case
DataType
::
DE_BOOL
:
case
DataType
::
DE_INT8
:
case
DataType
::
DE_UINT8
:
return
1
;
case
DataType
::
DE_INT16
:
case
DataType
::
DE_UINT16
:
case
DataType
::
DE_FLOAT16
:
return
2
;
case
DataType
::
DE_INT32
:
case
DataType
::
DE_UINT32
:
case
DataType
::
DE_FLOAT32
:
return
4
;
case
DataType
::
DE_INT64
:
case
DataType
::
DE_UINT64
:
case
DataType
::
DE_FLOAT64
:
return
8
;
default:
return
0
;
}
if
(
type_
<
DataType
::
NUM_OF_TYPES
)
return
SIZE_IN_BYTES
[
type_
];
else
return
0
;
}
py
::
dtype
DataType
::
AsNumpyType
()
const
{
std
::
string
s
;
switch
(
type_
)
{
case
DataType
::
DE_BOOL
:
s
=
"bool"
;
break
;
case
DataType
::
DE_INT8
:
s
=
"int8"
;
break
;
case
DataType
::
DE_UINT8
:
s
=
"uint8"
;
break
;
case
DataType
::
DE_INT16
:
s
=
"int16"
;
break
;
case
DataType
::
DE_UINT16
:
s
=
"uint16"
;
break
;
case
DataType
::
DE_INT32
:
s
=
"int32"
;
break
;
case
DataType
::
DE_UINT32
:
s
=
"uint32"
;
break
;
case
DataType
::
DE_INT64
:
s
=
"int64"
;
break
;
case
DataType
::
DE_UINT64
:
s
=
"uint64"
;
break
;
case
DataType
::
DE_FLOAT16
:
s
=
"float16"
;
break
;
case
DataType
::
DE_FLOAT32
:
s
=
"float32"
;
break
;
case
DataType
::
DE_FLOAT64
:
s
=
"double"
;
break
;
case
DataType
::
DE_UNKNOWN
:
s
=
"unknown"
;
break
;
default:
s
=
"unknown"
;
break
;
}
return
py
::
dtype
(
s
);
if
(
type_
<
DataType
::
NUM_OF_TYPES
)
return
py
::
dtype
(
PYBIND_TYPES
[
type_
]);
else
return
py
::
dtype
(
"unknown"
);
}
uint8_t
DataType
::
AsCVType
()
const
{
switch
(
type_
)
{
case
DataType
::
DE_BOOL
:
return
CV_8U
;
case
DataType
::
DE_INT8
:
return
CV_8S
;
case
DataType
::
DE_UINT8
:
return
CV_8U
;
case
DataType
::
DE_INT16
:
return
CV_16S
;
case
DataType
::
DE_UINT16
:
return
CV_16U
;
case
DataType
::
DE_INT32
:
return
CV_32S
;
case
DataType
::
DE_FLOAT16
:
return
CV_16F
;
case
DataType
::
DE_FLOAT32
:
return
CV_32F
;
case
DataType
::
DE_FLOAT64
:
return
CV_64F
;
case
DataType
::
DE_UINT32
:
case
DataType
::
DE_INT64
:
case
DataType
::
DE_UINT64
:
default:
MS_LOG
(
ERROR
)
<<
"Cannot convert to OpenCV type. Return invalid type!"
;
return
kCVInvalidType
;
uint8_t
res
=
kCVInvalidType
;
if
(
type_
<
DataType
::
NUM_OF_TYPES
)
{
res
=
CV_TYPES
[
type_
];
}
}
if
(
res
==
kCVInvalidType
)
{
MS_LOG
(
ERROR
)
<<
"Cannot convert to OpenCV type. Return invalid type!"
;
}
return
res
;
}
// namespace dataset
DataType
DataType
::
FromCVType
(
int
cv_type
)
{
auto
depth
=
static_cast
<
uchar
>
(
cv_type
)
&
static_cast
<
uchar
>
(
CV_MAT_DEPTH_MASK
);
...
...
@@ -176,72 +100,17 @@ DataType::DataType(const std::string &type_str) {
type_
=
DE_FLOAT32
;
else
if
(
type_str
==
"float64"
)
type_
=
DE_FLOAT64
;
else
if
(
type_str
==
"string"
)
type_
=
DE_STRING
;
else
type_
=
DE_UNKNOWN
;
}
std
::
string
DataType
::
ToString
()
const
{
switch
(
type_
)
{
case
DataType
::
DE_BOOL
:
return
"bool"
;
case
DataType
::
DE_INT8
:
return
"int8"
;
case
DataType
::
DE_UINT8
:
return
"uint8"
;
case
DataType
::
DE_INT16
:
return
"int16"
;
case
DataType
::
DE_UINT16
:
return
"uint16"
;
case
DataType
::
DE_INT32
:
return
"int32"
;
case
DataType
::
DE_UINT32
:
return
"uint32"
;
case
DataType
::
DE_INT64
:
return
"int64"
;
case
DataType
::
DE_UINT64
:
return
"uint64"
;
case
DataType
::
DE_FLOAT16
:
return
"float16"
;
case
DataType
::
DE_FLOAT32
:
return
"float32"
;
case
DataType
::
DE_FLOAT64
:
return
"float64"
;
case
DataType
::
DE_UNKNOWN
:
return
"unknown"
;
default:
return
"unknown"
;
}
}
DataType
DataType
::
FromNpType
(
const
py
::
dtype
&
type
)
{
if
(
type
.
is
(
py
::
dtype
(
"bool"
)))
{
return
DataType
(
DataType
::
DE_BOOL
);
}
else
if
(
type
.
is
(
py
::
dtype
(
"int8"
)))
{
return
DataType
(
DataType
::
DE_INT8
);
}
else
if
(
type
.
is
(
py
::
dtype
(
"uint8"
)))
{
return
DataType
(
DataType
::
DE_UINT8
);
}
else
if
(
type
.
is
(
py
::
dtype
(
"int16"
)))
{
return
DataType
(
DataType
::
DE_INT16
);
}
else
if
(
type
.
is
(
py
::
dtype
(
"uint16"
)))
{
return
DataType
(
DataType
::
DE_UINT16
);
}
else
if
(
type
.
is
(
py
::
dtype
(
"int32"
)))
{
return
DataType
(
DataType
::
DE_INT32
);
}
else
if
(
type
.
is
(
py
::
dtype
(
"uint32"
)))
{
return
DataType
(
DataType
::
DE_UINT32
);
}
else
if
(
type
.
is
(
py
::
dtype
(
"int64"
)))
{
return
DataType
(
DataType
::
DE_INT64
);
}
else
if
(
type
.
is
(
py
::
dtype
(
"uint64"
)))
{
return
DataType
(
DataType
::
DE_UINT64
);
}
else
if
(
type
.
is
(
py
::
dtype
(
"float16"
)))
{
return
DataType
(
DataType
::
DE_FLOAT16
);
}
else
if
(
type
.
is
(
py
::
dtype
(
"float32"
)))
{
return
DataType
(
DataType
::
DE_FLOAT32
);
}
else
if
(
type
.
is
(
py
::
dtype
(
"double"
)))
{
return
DataType
(
DataType
::
DE_FLOAT64
);
}
else
{
MS_LOG
(
ERROR
)
<<
"Cannot convert from numpy type. Unknown data type is returned!"
;
return
DataType
(
DataType
::
DE_UNKNOWN
);
}
if
(
type_
<
DataType
::
NUM_OF_TYPES
)
return
TO_STRINGS
[
type_
];
else
return
"unknown"
;
}
DataType
DataType
::
FromNpArray
(
const
py
::
array
&
arr
)
{
...
...
@@ -269,6 +138,8 @@ DataType DataType::FromNpArray(const py::array &arr) {
return
DataType
(
DataType
::
DE_FLOAT32
);
}
else
if
(
py
::
isinstance
<
py
::
array_t
<
std
::
double_t
>>
(
arr
))
{
return
DataType
(
DataType
::
DE_FLOAT64
);
}
else
if
(
arr
.
dtype
().
kind
()
==
'S'
)
{
return
DataType
(
DataType
::
DE_STRING
);
}
else
{
MS_LOG
(
ERROR
)
<<
"Cannot convert from numpy type. Unknown data type is returned!"
;
return
DataType
(
DataType
::
DE_UNKNOWN
);
...
...
@@ -276,36 +147,16 @@ DataType DataType::FromNpArray(const py::array &arr) {
}
std
::
string
DataType
::
GetPybindFormat
()
const
{
switch
(
type_
)
{
case
DataType
::
DE_BOOL
:
return
py
::
format_descriptor
<
bool
>::
format
();
case
DataType
::
DE_INT8
:
return
py
::
format_descriptor
<
int8_t
>::
format
();
case
DataType
::
DE_UINT8
:
return
py
::
format_descriptor
<
uint8_t
>::
format
();
case
DataType
::
DE_INT16
:
return
py
::
format_descriptor
<
int16_t
>::
format
();
case
DataType
::
DE_UINT16
:
return
py
::
format_descriptor
<
uint16_t
>::
format
();
case
DataType
::
DE_INT32
:
return
py
::
format_descriptor
<
int32_t
>::
format
();
case
DataType
::
DE_UINT32
:
return
py
::
format_descriptor
<
uint32_t
>::
format
();
case
DataType
::
DE_INT64
:
return
py
::
format_descriptor
<
int64_t
>::
format
();
case
DataType
::
DE_UINT64
:
return
py
::
format_descriptor
<
uint64_t
>::
format
();
case
DataType
::
DE_FLOAT16
:
// Eigen 3.3.7 doesn't support py::format_descriptor<Eigen::half>::format()
return
"e"
;
case
DataType
::
DE_FLOAT32
:
return
py
::
format_descriptor
<
float
>::
format
();
case
DataType
::
DE_FLOAT64
:
return
py
::
format_descriptor
<
double
>::
format
();
default:
MS_LOG
(
ERROR
)
<<
"Cannot convert from data type to pybind format descriptor!"
;
return
""
;
std
::
string
res
;
if
(
type_
<
DataType
::
NUM_OF_TYPES
)
{
res
=
PYBIND_FORMAT_DESCRIPTOR
[
type_
];
}
if
(
res
.
empty
())
{
MS_LOG
(
ERROR
)
<<
"Cannot convert from data type to pybind format descriptor!"
;
}
return
res
;
}
}
// namespace dataset
}
// namespace mindspore
mindspore/ccsrc/dataset/core/data_type.h
浏览文件 @
051567ba
...
...
@@ -16,18 +16,25 @@
#ifndef DATASET_CORE_DATA_TYPE_H_
#define DATASET_CORE_DATA_TYPE_H_
#include <opencv2/core/hal/interface.h>
#include <string>
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
#include "dataset/core/constants.h"
#include "dataset/core/pybind_support.h"
namespace
py
=
pybind11
;
namespace
mindspore
{
namespace
dataset
{
// Class that represents basic data types in DataEngine.
class
DataType
{
public:
enum
Type
:
uint8_t
{
DE_UNKNOWN
=
0
,
DE_BOOL
,
DE_INT8
,
DE_UINT8
,
...
...
@@ -40,20 +47,60 @@ class DataType {
DE_FLOAT16
,
DE_FLOAT32
,
DE_FLOAT64
,
DE_UNKNOWN
DE_STRING
,
NUM_OF_TYPES
};
static
constexpr
uint8_t
DE_BOOL_SIZE
=
1
;
static
constexpr
uint8_t
DE_UINT8_SIZE
=
1
;
static
constexpr
uint8_t
DE_INT8_SIZE
=
1
;
static
constexpr
uint8_t
DE_UINT16_SIZE
=
2
;
static
constexpr
uint8_t
DE_INT16_SIZE
=
2
;
static
constexpr
uint8_t
DE_UINT32_SIZE
=
4
;
static
constexpr
uint8_t
DE_INT32_SIZE
=
4
;
static
constexpr
uint8_t
DE_INT64_SIZE
=
8
;
static
constexpr
uint8_t
DE_UINT64_SIZE
=
8
;
static
constexpr
uint8_t
DE_FLOAT32_SIZE
=
4
;
static
constexpr
uint8_t
DE_FLOAT64_SIZE
=
8
;
inline
static
constexpr
uint8_t
SIZE_IN_BYTES
[]
=
{
0
,
// DE_UNKNOWN
1
,
// DE_BOOL
1
,
// DE_INT8
1
,
// DE_UINT8
2
,
// DE_INT16
2
,
// DE_UINT16
4
,
// DE_INT32
4
,
// DE_UINT32
8
,
// DE_INT64
8
,
// DE_UINT64
2
,
// DE_FLOAT16
4
,
// DE_FLOAT32
8
,
// DE_FLOAT64
0
};
// DE_STRING
inline
static
const
char
*
TO_STRINGS
[]
=
{
"unknown"
,
"bool"
,
"int8"
,
"uint8"
,
"int16"
,
"uint16"
,
"int32"
,
"uint32"
,
"int64"
,
"uint64"
,
"float16"
,
"float32"
,
"float64"
,
"string"
};
inline
static
const
char
*
PYBIND_TYPES
[]
=
{
"object"
,
"bool"
,
"int8"
,
"uint8"
,
"int16"
,
"uint16"
,
"int32"
,
"uint32"
,
"int64"
,
"uint64"
,
"float16"
,
"float32"
,
"double"
,
"bytes"
};
inline
static
const
std
::
string
PYBIND_FORMAT_DESCRIPTOR
[]
=
{
""
,
// DE_UNKNOWN
py
::
format_descriptor
<
bool
>::
format
(),
// DE_BOOL
py
::
format_descriptor
<
int8_t
>::
format
(),
// DE_INT8
py
::
format_descriptor
<
uint8_t
>::
format
(),
// DE_UINT8
py
::
format_descriptor
<
int16_t
>::
format
(),
// DE_INT16
py
::
format_descriptor
<
uint16_t
>::
format
(),
// DE_UINT16
py
::
format_descriptor
<
int32_t
>::
format
(),
// DE_INT32
py
::
format_descriptor
<
uint32_t
>::
format
(),
// DE_UINT32
py
::
format_descriptor
<
int64_t
>::
format
(),
// DE_INT64
py
::
format_descriptor
<
uint64_t
>::
format
(),
// DE_UINT64
"e"
,
// DE_FLOAT16
py
::
format_descriptor
<
float
>::
format
(),
// DE_FLOAT32
py
::
format_descriptor
<
double
>::
format
(),
// DE_FLOAT64
"S"
};
// DE_STRING
inline
static
constexpr
uint8_t
CV_TYPES
[]
=
{
kCVInvalidType
,
// DE_UNKNOWN
CV_8U
,
// DE_BOOL
CV_8S
,
// DE_INT8
CV_8U
,
// DE_UINT8
CV_16S
,
// DE_INT16
CV_16U
,
// DE_UINT16
CV_32S
,
// DE_INT32
kCVInvalidType
,
// DE_UINT32
kCVInvalidType
,
// DE_INT64
kCVInvalidType
,
// DE_UINT64
CV_16F
,
// DE_FLOAT16
CV_32F
,
// DE_FLOAT32
CV_64F
,
// DE_FLOAT64
kCVInvalidType
};
// DE_STRING
// No arg constructor to create an unknown shape
DataType
()
:
type_
(
DE_UNKNOWN
)
{}
...
...
@@ -160,6 +207,8 @@ class DataType {
bool
IsBool
()
const
{
return
type_
==
DataType
::
DE_BOOL
;
}
bool
IsNumeric
()
const
{
return
type_
!=
DataType
::
DE_STRING
;
}
Type
value
()
const
{
return
type_
;
}
private:
...
...
@@ -226,6 +275,11 @@ inline bool DataType::IsCompatible<uint8_t>() const {
return
type_
==
DataType
::
DE_UINT8
;
}
template
<
>
inline
bool
DataType
::
IsCompatible
<
std
::
string_view
>
()
const
{
return
type_
==
DataType
::
DE_STRING
;
}
template
<
>
inline
bool
DataType
::
IsLooselyCompatible
<
bool
>
()
const
{
return
type_
==
DataType
::
DE_BOOL
;
...
...
mindspore/ccsrc/dataset/core/tensor.cc
浏览文件 @
051567ba
...
...
@@ -15,6 +15,7 @@
*/
#include "dataset/core/tensor.h"
#include <algorithm>
#include <iomanip>
#include <iostream>
#include <memory>
...
...
@@ -60,7 +61,7 @@ Tensor::Tensor(const TensorShape &shape, const DataType &type, const unsigned ch
if
(
data
!=
nullptr
)
{
// Given the shape/type of this tensor, compute the data size and copy in the input bytes.
int64_t
byte_size
=
this
->
SizeInBytes
();
static_cast
<
void
>
(
this
->
StartAdd
r
());
// Allocates data_ inside itself
static_cast
<
void
>
(
this
->
GetMutableBuffe
r
());
// Allocates data_ inside itself
if
(
data_
!=
nullptr
)
{
int
ret_code
=
memcpy_s
(
data_
,
byte_size
,
data
,
byte_size
);
if
(
ret_code
!=
0
)
{
...
...
@@ -75,7 +76,7 @@ Tensor::Tensor(const TensorShape &shape, const DataType &type, const unsigned ch
Tensor
::
Tensor
(
Tensor
&&
other
)
noexcept
:
shape_
(
other
.
shape
()),
type_
(
other
.
type
()),
data_
(
other
.
StartAdd
r
()),
data_
(
other
.
GetMutableBuffe
r
()),
data_allocator_
(
std
::
move
(
other
.
data_allocator_
))
{
other
.
Invalidate
();
}
...
...
@@ -84,7 +85,7 @@ Tensor &Tensor::operator=(Tensor &&other) noexcept {
if
(
&
other
!=
this
)
{
shape_
=
other
.
shape
();
type_
=
other
.
type
();
data_
=
other
.
StartAdd
r
();
data_
=
other
.
GetMutableBuffe
r
();
data_end_
=
other
.
data_end_
;
data_allocator_
=
std
::
move
(
other
.
data_allocator_
);
other
.
Invalidate
();
...
...
@@ -92,6 +93,37 @@ Tensor &Tensor::operator=(Tensor &&other) noexcept {
return
*
this
;
}
Tensor
::
Tensor
(
const
std
::
vector
<
std
::
string
>
&
strings
,
const
TensorShape
&
shape
)
:
Tensor
(
TensorShape
({
static_cast
<
dsize_t
>
(
strings
.
size
())}),
DataType
(
DataType
::
DE_STRING
))
{
auto
length_sum
=
[](
dsize_t
sum
,
const
std
::
string
&
s
)
{
return
s
.
length
()
+
sum
;
};
dsize_t
total_length
=
std
::
accumulate
(
strings
.
begin
(),
strings
.
end
(),
0
,
length_sum
);
dsize_t
num_bytes
=
(
kOffsetSize
+
1
)
*
shape_
.
NumOfElements
()
+
total_length
;
data_
=
data_allocator_
->
allocate
(
num_bytes
);
auto
offset_arr
=
reinterpret_cast
<
offset_t
*>
(
data_
);
uchar
*
buf
=
GetStringsBuffer
();
offset_t
offset
=
-
1
;
uint32_t
i
=
0
;
for
(
const
auto
&
str
:
strings
)
{
// insert the end index of the string
// end index of a string is the end index of previous string + the length (including \0)
offset
=
offset
+
str
.
length
()
+
1
;
offset_arr
[
i
++
]
=
offset
;
// total bytes are reduced by kOffsetSize
num_bytes
-=
kOffsetSize
;
// insert actual string
memcpy_s
(
buf
,
num_bytes
,
str
.
c_str
(),
str
.
length
()
+
1
);
buf
+=
str
.
length
()
+
1
;
num_bytes
-=
str
.
length
()
+
1
;
}
this
->
data_end_
=
buf
;
DS_ASSERT
(
num_bytes
==
0
);
if
(
shape
.
known
())
Tensor
::
Reshape
(
shape
);
}
Status
Tensor
::
CreateTensor
(
std
::
shared_ptr
<
Tensor
>
*
ptr
,
TensorImpl
tensor_impl
,
const
TensorShape
&
shape
,
DataType
type
,
const
unsigned
char
*
data
)
{
if
(
!
shape
.
known
())
{
...
...
@@ -120,8 +152,28 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl
}
return
Status
::
OK
();
// returns base-class shared_ptr
}
std
::
string
to
(
std
::
string
x
)
{
return
x
;
}
Status
Tensor
::
CreateTensorFromNumpyString
(
std
::
shared_ptr
<
Tensor
>
*
ptr
,
py
::
array
arr
)
{
std
::
vector
<
dsize_t
>
shape
;
for
(
dsize_t
i
=
0
;
i
<
arr
.
ndim
();
i
++
)
{
shape
.
push_back
(
static_cast
<
dsize_t
>
(
arr
.
shape
()[
i
]));
}
arr
.
resize
({
arr
.
size
()});
auto
itr
=
arr
.
begin
();
std
::
vector
<
std
::
string
>
strings
;
for
(;
itr
!=
arr
.
end
();
itr
++
)
{
std
::
string
s
=
to
(
py
::
cast
<
py
::
bytes
>
(
*
itr
));
strings
.
push_back
(
s
);
}
arr
.
resize
(
shape
);
return
CreateTensor
(
ptr
,
strings
,
TensorShape
{
shape
});
}
Status
Tensor
::
CreateTensor
(
std
::
shared_ptr
<
Tensor
>
*
ptr
,
py
::
array
arr
)
{
if
(
DataType
::
FromNpArray
(
arr
)
==
DataType
::
DE_STRING
)
{
return
CreateTensorFromNumpyString
(
ptr
,
arr
);
}
const
TensorAlloc
*
alloc
=
GlobalContext
::
Instance
()
->
tensor_allocator
();
*
ptr
=
std
::
allocate_shared
<
Tensor
>
(
*
alloc
,
TensorShape
({}),
DataType
(
DataType
::
DE_UNKNOWN
));
...
...
@@ -138,7 +190,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
std
::
shared_ptr
<
MemoryPool
>
global_pool
=
GlobalContext
::
Instance
()
->
mem_pool
();
(
*
ptr
)
->
data_allocator_
=
std
::
make_unique
<
Allocator
<
unsigned
char
>>
(
global_pool
);
static_cast
<
void
>
((
*
ptr
)
->
StartAdd
r
());
static_cast
<
void
>
((
*
ptr
)
->
GetMutableBuffe
r
());
int64_t
byte_size
=
(
*
ptr
)
->
SizeInBytes
();
unsigned
char
*
data
=
static_cast
<
unsigned
char
*>
(
arr
.
request
().
ptr
);
if
((
*
ptr
)
->
data_
==
nullptr
)
{
...
...
@@ -173,6 +225,13 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
return
Status
::
OK
();
// returns base-class shared_ptr
}
Status
Tensor
::
CreateTensor
(
std
::
shared_ptr
<
Tensor
>
*
ptr
,
const
std
::
vector
<
std
::
string
>
&
strings
,
const
TensorShape
&
shape
)
{
const
TensorAlloc
*
alloc
=
GlobalContext
::
Instance
()
->
tensor_allocator
();
*
ptr
=
std
::
allocate_shared
<
Tensor
>
(
*
alloc
,
strings
,
shape
);
return
Status
::
OK
();
}
// Memcpy the given strided array's used part to consecutive memory
// Consider a 3-d array
// A[(i * shape[1] + j) * shape[2] + k] = B[i][j][k] = C[i * strides[0] + j * strides[1] + k * strides[2]]
...
...
@@ -264,6 +323,12 @@ void Tensor::PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) c
CASE_PRINT
(
DataType
::
DE_FLOAT64
,
double
);
case
DataType
::
DE_STRING
:
{
std
::
string_view
o
{
""
};
GetItemAt
(
&
o
,
index
);
out
<<
"
\"
"
<<
o
<<
"
\"
"
;
break
;
}
default:
{
out
<<
"?"
;
break
;
...
...
@@ -324,12 +389,12 @@ Status Tensor::ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_inde
return
Status
::
OK
();
}
const
unsigned
char
*
Tensor
::
StartAdd
r
()
const
{
const
unsigned
char
*
Tensor
::
GetBuffe
r
()
const
{
// This version cannot modify anything. data_ could possibly be null.
return
data_
;
}
unsigned
char
*
Tensor
::
StartAdd
r
()
{
unsigned
char
*
Tensor
::
GetMutableBuffe
r
()
{
if
(
!
shape_
.
known
()
||
type_
==
DataType
::
DE_UNKNOWN
)
{
return
nullptr
;
}
...
...
@@ -381,6 +446,25 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
dsize_t
flat_idx
;
RETURN_IF_NOT_OK
(
ToFlatIndex
(
index
,
&
flat_idx
));
*
ptr
=
reinterpret_cast
<
T
*>
(
data_
+
flat_idx
*
type_
.
SizeInBytes
());
return
Status
::
OK
();
}
else
{
std
::
string
err
=
"data type not compatible"
;
RETURN_STATUS_UNEXPECTED
(
err
);
}
}
Status
Tensor
::
GetItemPtr
(
uchar
**
ptr
,
const
std
::
vector
<
dsize_t
>
&
index
,
offset_t
*
length
)
const
{
if
(
type_
==
DataType
::
DE_STRING
)
{
if
(
data_
==
nullptr
)
{
std
::
string
err
=
"Data is not allocated yet"
;
RETURN_STATUS_UNEXPECTED
(
err
);
}
dsize_t
flat_idx
;
RETURN_IF_NOT_OK
(
ToFlatIndex
(
index
,
&
flat_idx
));
offset_t
length_temp
=
0
;
RETURN_IF_NOT_OK
(
GetStringAt
(
flat_idx
,
ptr
,
&
length_temp
));
if
(
length
!=
nullptr
)
*
length
=
length_temp
;
return
Status
::
OK
();
}
else
{
std
::
string
err
=
"data type not compatible"
;
...
...
@@ -389,23 +473,27 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
}
Status
Tensor
::
StartAddrOfIndex
(
std
::
vector
<
dsize_t
>
ind
,
uchar
**
start_addr_of_index
,
TensorShape
*
remaining
)
{
if
(
type
()
==
DataType
::
DE_STRING
)
{
RETURN_STATUS_UNEXPECTED
(
"StartAddrOfIndex does not support string tensors yet."
);
}
dsize_t
flat_ind
;
std
::
vector
<
dsize_t
>
t_shape
=
shape
().
AsVector
();
std
::
vector
<
dsize_t
>
r
(
t_shape
.
begin
()
+
ind
.
size
(),
t_shape
.
end
());
*
remaining
=
TensorShape
(
r
);
ind
.
resize
(
this
->
Rank
(),
0
);
// same as -> while (ind.size() < this->Rank()) ind.push_back(0);
RETURN_IF_NOT_OK
(
ToFlatIndex
(
ind
,
&
flat_ind
));
// check if
StartAdd
r() returns null, we should flag this as an error, this sanity check will only
// check if
GetBuffe
r() returns null, we should flag this as an error, this sanity check will only
// be true is the tensor failed to allocate memory.
if
(
StartAdd
r
()
==
nullptr
)
{
RETURN_STATUS_UNEXPECTED
(
"Invalid
StartAdd
r in Tensor, got nullptr"
);
if
(
GetMutableBuffe
r
()
==
nullptr
)
{
RETURN_STATUS_UNEXPECTED
(
"Invalid
GetBuffe
r in Tensor, got nullptr"
);
}
*
start_addr_of_index
=
StartAdd
r
()
+
flat_ind
*
this
->
type
().
SizeInBytes
();
*
start_addr_of_index
=
GetMutableBuffe
r
()
+
flat_ind
*
this
->
type
().
SizeInBytes
();
return
Status
::
OK
();
}
Status
Tensor
::
InsertTensor
(
const
std
::
vector
<
dsize_t
>
&
ind
,
const
std
::
shared_ptr
<
Tensor
>
&
tensor
)
{
std
::
string
err_msg
;
err_msg
+=
(
this
->
type
()
==
DataType
::
DE_STRING
)
?
"[Tensor] Cannot batch tensors of type string
\n
"
:
""
;
err_msg
+=
(
!
this
->
shape
().
known
()
||
!
tensor
->
shape
().
known
())
?
"[Tensor] unknown shape
\n
"
:
""
;
err_msg
+=
(
ind
.
size
()
+
tensor
->
Rank
()
!=
this
->
Rank
())
?
"[Tensor] incorrect index
\n
"
:
""
;
err_msg
+=
tensor
->
type
().
SizeInBytes
()
!=
this
->
type
().
SizeInBytes
()
?
"[Tensor] incorrect datatype
\n
"
:
""
;
...
...
@@ -418,7 +506,8 @@ Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_p
RETURN_STATUS_UNEXPECTED
(
err_msg
);
}
else
{
if
(
start_addr_of_ind
!=
nullptr
)
{
int
ret_code
=
memcpy_s
(
start_addr_of_ind
,
tensor
->
SizeInBytes
(),
tensor
->
StartAddr
(),
tensor
->
SizeInBytes
());
int
ret_code
=
memcpy_s
(
start_addr_of_ind
,
tensor
->
SizeInBytes
(),
tensor
->
GetMutableBuffer
(),
tensor
->
SizeInBytes
());
if
(
ret_code
==
0
)
{
return
Status
::
OK
();
}
else
{
...
...
@@ -446,21 +535,20 @@ Status Tensor::ExpandDim(const dsize_t &axis) {
}
std
::
vector
<
dsize_t
>
Tensor
::
Strides
()
{
std
::
vector
<
dsize_t
>
strides
(
Rank
());
dsize_t
count
=
shape_
.
NumOfElements
();
for
(
dsize_t
i
=
0
;
i
<
Rank
();
i
++
)
{
count
/=
shape_
[
i
];
strides
[
i
]
=
type_
.
SizeInBytes
()
*
count
;
}
std
::
vector
<
dsize_t
>
strides
=
shape_
.
Strides
();
uint8_t
size
=
type_
.
SizeInBytes
();
std
::
transform
(
strides
.
begin
(),
strides
.
end
(),
strides
.
begin
(),
[
&
size
](
const
auto
&
c
)
{
return
c
*
size
;
});
return
strides
;
}
Status
Tensor
::
GetBufferInfo
(
Tensor
&
t
,
py
::
buffer_info
*
out
)
{
CHECK_FAIL_RETURN_UNEXPECTED
(
t
.
type
().
IsNumeric
(),
"Cannot use GetBufferInfo on tensor of strings."
);
std
::
string
format_desc
=
t
.
type
().
GetPybindFormat
();
if
(
format_desc
.
empty
())
{
RETURN_STATUS_UNEXPECTED
(
"Cannot convert DE type tp pybind format"
);
}
*
out
=
py
::
buffer_info
(
t
.
StartAddr
(),
/* Pointer to buffer */
*
out
=
py
::
buffer_info
(
t
.
GetMutableBuffer
(),
/* Pointer to buffer */
t
.
type
().
SizeInBytes
(),
/* Size of one scalar */
format_desc
,
/* Python struct-style format descriptor */
t
.
Rank
(),
/* Number of dimensions */
...
...
@@ -495,6 +583,18 @@ Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
return
Status
::
OK
();
}
Status
Tensor
::
GetItemAt
(
std
::
string_view
*
o
,
const
std
::
vector
<
dsize_t
>
&
index
)
const
{
RETURN_UNEXPECTED_IF_NULL
(
data_
);
RETURN_UNEXPECTED_IF_NULL
(
o
);
CHECK_FAIL_RETURN_UNEXPECTED
(
type_
==
DataType
::
DE_STRING
,
"Type is not DE_STRING"
);
uchar
*
buf
=
nullptr
;
offset_t
length
=
0
;
RETURN_IF_NOT_OK
(
GetItemPtr
(
&
buf
,
index
,
&
length
));
std
::
string_view
sv
{
reinterpret_cast
<
const
char
*>
(
buf
),
length
};
o
->
swap
(
sv
);
return
Status
::
OK
();
}
// return data as numpy, should return status
Status
Tensor
::
GetDataAsNumpy
(
py
::
array
*
data
)
{
RETURN_UNEXPECTED_IF_NULL
(
data_
);
...
...
@@ -523,11 +623,36 @@ Status Tensor::GetDataAsNumpy(py::array *data) {
*
data
=
py
::
array_t
<
float
>
(
shape_
.
AsVector
(),
reinterpret_cast
<
float
*>
(
data_
));
}
else
if
(
type_
==
DataType
::
DE_FLOAT64
)
{
*
data
=
py
::
array_t
<
double
>
(
shape_
.
AsVector
(),
reinterpret_cast
<
double
*>
(
data_
));
}
else
if
(
type_
==
DataType
::
DE_STRING
)
{
GetDataAsNumpyStrings
(
data
);
}
else
{
RETURN_STATUS_UNEXPECTED
(
"Got unexpected type when returning numpy"
);
}
return
Status
::
OK
();
}
Status
Tensor
::
GetDataAsNumpyStrings
(
py
::
array
*
data
)
{
auto
itr
=
begin
<
std
::
string_view
>
();
uint64_t
max
=
0
;
for
(;
itr
!=
end
<
std
::
string_view
>
();
itr
++
)
{
max
=
std
::
max
((
*
itr
).
length
(),
max
);
}
uint64_t
total_size
=
shape_
.
NumOfElements
()
*
max
;
char
*
tmp_data
=
reinterpret_cast
<
char
*>
(
data_allocator_
->
allocate
(
total_size
));
if
(
tmp_data
==
nullptr
)
RETURN_STATUS_UNEXPECTED
(
"Cannot create temp array."
);
memset
(
tmp_data
,
0
,
total_size
);
itr
=
begin
<
std
::
string_view
>
();
uint64_t
i
=
0
;
for
(;
itr
!=
end
<
std
::
string_view
>
();
itr
++
)
{
(
void
)
memcpy_s
(
tmp_data
+
i
*
max
,
total_size
,
(
*
itr
).
data
(),
(
*
itr
).
length
());
i
++
;
}
auto
strides
=
shape_
.
Strides
();
std
::
transform
(
strides
.
begin
(),
strides
.
end
(),
strides
.
begin
(),
[
&
max
](
const
auto
&
s
)
{
return
s
*
max
;
});
*
data
=
py
::
array
(
py
::
dtype
(
"S"
+
std
::
to_string
(
max
)),
shape_
.
AsVector
(),
strides
,
tmp_data
);
data_allocator_
->
deallocate
(
reinterpret_cast
<
uchar
*>
(
tmp_data
));
return
Status
::
OK
();
}
void
Tensor
::
Squeeze
()
{
shape_
=
shape_
.
Squeeze
();
}
...
...
@@ -647,5 +772,19 @@ Status Tensor::GetFloatAt(T *o, const std::vector<dsize_t> &index) const {
}
return
Status
::
OK
();
}
Status
Tensor
::
GetStringAt
(
dsize_t
index
,
uchar
**
string_start
,
offset_t
*
length
)
const
{
CHECK_FAIL_RETURN_UNEXPECTED
(
type_
==
DataType
::
DE_STRING
,
"Type is not string"
);
RETURN_UNEXPECTED_IF_NULL
(
data_
);
RETURN_UNEXPECTED_IF_NULL
(
string_start
);
RETURN_UNEXPECTED_IF_NULL
(
length
);
auto
*
offset_ptr
=
reinterpret_cast
<
offset_t
*>
(
data_
);
// offsets starts here
offset_t
end
=
offset_ptr
[
index
];
offset_t
start
=
0
;
if
(
index
!=
0
)
start
=
offset_ptr
[
index
-
1
]
+
1
;
// string starts at where the previous string ends + 1
uchar
*
buf
=
GetStringsBuffer
();
// string data starts here
*
string_start
=
buf
+
start
;
*
length
=
end
-
start
;
return
Status
::
OK
();
}
}
// namespace dataset
}
// namespace mindspore
mindspore/ccsrc/dataset/core/tensor.h
浏览文件 @
051567ba
...
...
@@ -47,8 +47,6 @@ using TensorRow = std::vector<std::shared_ptr<Tensor>>; // A row is a set of
using
TensorTable
=
std
::
vector
<
TensorRow
>
;
// The table of tensors is a vector of rows
using
TensorQTable
=
std
::
deque
<
TensorRow
>
;
// A different flavour of tensor table, this one has queue functionality
// Tensor base class which holds the data in an unsigned char* buffer.
class
Tensor
{
public:
Tensor
()
=
delete
;
...
...
@@ -74,6 +72,27 @@ class Tensor {
Tensor
&
operator
=
(
Tensor
&&
other
)
noexcept
;
// type of offest values to store strings information
using
offset_t
=
uint32_t
;
// const of the size of the offset variable
static
constexpr
uint8_t
kOffsetSize
=
sizeof
(
offset_t
);
// Tensor base class which holds the data in an unsigned char* buffer.
// Construct a scalar string Tensor
explicit
Tensor
(
const
std
::
string
&
str
)
:
Tensor
(
std
::
vector
<
std
::
string
>
{
str
},
TensorShape
::
CreateScalar
())
{}
// Construct a tensor from a list of strings. Reshape the tensor with `shape` if given, otherwise assume the shape is
// the size of the vector `strings`.
// The memory layout of a Tensor of strings consists of the Offset_array followed by the strings.
// OFFSET1, OFFSET2, ... String1, String2, ...
// The value of each offset is the end index of the corresponding string
// Offsets is of type offest_t
// strings will ne null-terminated
// example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING)
// 3 6 a b c \0 d e \0
explicit
Tensor
(
const
std
::
vector
<
std
::
string
>
&
strings
,
const
TensorShape
&
shape
=
TensorShape
::
CreateUnknownRankShape
());
// A static factory method to create the given flavour of derived Tensor
// Returns the base class reference for the Tensor.
// @param ptr output argument to hold the created Tensor of given tensor_impl
...
...
@@ -91,6 +110,17 @@ class Tensor {
// @return Status Code
static
Status
CreateTensor
(
std
::
shared_ptr
<
Tensor
>
*
ptr
,
py
::
array
arr
);
// Helper function to create a tensor from Numpy of strings
static
Status
CreateTensorFromNumpyString
(
std
::
shared_ptr
<
Tensor
>
*
ptr
,
py
::
array
arr
);
// A static factory method to create a Tensor from a given list of strings.
// @param ptr output argument to hold the created Tensor
// @param strings elements of the tensor
// @param shape shape of the tensor
// @return Status Code
static
Status
CreateTensor
(
std
::
shared_ptr
<
Tensor
>
*
ptr
,
const
std
::
vector
<
std
::
string
>
&
strings
,
const
TensorShape
&
shape
=
TensorShape
::
CreateUnknownRankShape
());
// Copy raw data of a array based on shape and strides to the destination pointer
// @param dst Pointer to the destination array where the content is to be copied
// @param src Pointer to the source of strided array to be copied
...
...
@@ -116,6 +146,11 @@ class Tensor {
template
<
typename
T
>
Status
GetItemAt
(
T
*
o
,
const
std
::
vector
<
dsize_t
>
&
index
)
const
;
// Get string located at `index`.
// @param index vector<dsize_t>
// @return return std::string_view specified at index
Status
GetItemAt
(
std
::
string_view
*
o
,
const
std
::
vector
<
dsize_t
>
&
index
)
const
;
template
<
typename
T
>
Status
GetUnsignedIntAt
(
T
*
o
,
const
std
::
vector
<
dsize_t
>
&
index
)
const
;
...
...
@@ -131,26 +166,44 @@ class Tensor {
// @param value of type `T`
template
<
typename
T
>
Status
SetItemAt
(
const
std
::
vector
<
dsize_t
>
&
index
,
const
T
&
value
)
{
static_cast
<
void
>
(
StartAdd
r
());
static_cast
<
void
>
(
GetMutableBuffe
r
());
T
*
ptr
=
nullptr
;
RETURN_IF_NOT_OK
(
GetItemPtr
<
T
>
(
&
ptr
,
index
));
*
ptr
=
value
;
return
Status
::
OK
();
}
// fill tensor with Zeros
// set string item at location specified by index
// @param index
// @param value of type std::string
Status
SetItemAt
(
const
std
::
vector
<
dsize_t
>
&
index
,
const
std
::
string
&
value
)
{
RETURN_UNEXPECTED_IF_NULL
(
data_
);
uchar
*
ptr
=
nullptr
;
offset_t
length
=
0
;
RETURN_IF_NOT_OK
(
GetItemPtr
(
&
ptr
,
index
,
&
length
));
if
(
value
.
length
()
!=
length
)
{
RETURN_STATUS_UNEXPECTED
(
"Length of the new string does not match the item."
);
}
memcpy_s
(
reinterpret_cast
<
char
*>
(
ptr
),
length
,
value
.
c_str
(),
length
);
return
Status
::
OK
();
}
// fill tensor with Zeros. Does not support strings.
Status
Zero
()
{
CHECK_FAIL_RETURN_UNEXPECTED
(
type_
!=
DataType
::
DE_STRING
,
"Cannot use Zero on tensor of strings.."
);
dsize_t
size
=
SizeInBytes
();
CHECK_FAIL_RETURN_UNEXPECTED
(
memset_sp
(
StartAddr
(),
size
,
0
,
size
)
==
0
,
"Failed to fill tensor with zeroes."
);
CHECK_FAIL_RETURN_UNEXPECTED
(
memset_sp
(
GetMutableBuffer
(),
size
,
0
,
size
)
==
0
,
"Failed to fill tensor with zeroes."
);
return
Status
::
OK
();
}
// Fill all elements in the Tensor with the given value of type `T`
// Fill all elements in the Tensor with the given value of type `T`
. Does not support strings.
// @tparam T
// @param value
template
<
typename
T
>
Status
Fill
(
const
T
&
value
)
{
static_cast
<
void
>
(
StartAddr
());
CHECK_FAIL_RETURN_UNEXPECTED
(
type_
!=
DataType
::
DE_STRING
,
"Cannot use fill on tensor of strings."
);
static_cast
<
void
>
(
GetMutableBuffer
());
int64_t
cellSize
=
type_
.
SizeInBytes
();
if
((
data_
!=
nullptr
)
&&
type_
.
IsCompatible
<
T
>
())
{
for
(
dsize_t
i
=
0
;
i
<
Size
();
i
++
)
{
...
...
@@ -177,7 +230,10 @@ class Tensor {
dsize_t
Size
()
const
{
return
shape
().
NumOfElements
();
}
// @return the number of bytes this tensor is needs
dsize_t
SizeInBytes
()
const
{
return
Size
()
*
type_
.
SizeInBytes
();
}
dsize_t
SizeInBytes
()
const
{
if
(
data_end_
==
nullptr
)
return
type_
.
SizeInBytes
()
*
shape_
.
NumOfElements
();
return
data_end_
-
data_
;
}
// @return the rank of the tensor
dsize_t
Rank
()
const
{
return
shape
().
Rank
();
}
...
...
@@ -185,12 +241,12 @@ class Tensor {
// Get the starting memory address as a constant for the data of the tensor. This potentially
// drives an allocation if the data area.
// @return const unsigned char*
const
unsigned
char
*
StartAdd
r
()
const
;
const
unsigned
char
*
GetBuffe
r
()
const
;
// Get the starting memory address for the data of the tensor. This potentially
// drives an allocation if the data area.
// @return unsigned char*
unsigned
char
*
StartAdd
r
();
unsigned
char
*
GetMutableBuffe
r
();
// Getter of the type
// @return
...
...
@@ -236,12 +292,12 @@ class Tensor {
virtual
void
Squeeze
();
// Calculates the strides of the Tensor
// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
// The strides will be {6,2,1}.
// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
// The strides will be {24,8,4}.
// @return vector of integers
//
/
Calculates the strides of the Tensor
//
/
Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
//
/
The strides will be {6,2,1}.
//
/
Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
//
/
The strides will be {24,8,4}.
//
/
@return vector of integers
std
::
vector
<
dsize_t
>
Strides
();
std
::
string
ToString
()
{
...
...
@@ -255,12 +311,14 @@ class Tensor {
// @return Status code
Status
GetDataAsNumpy
(
py
::
array
*
data
);
Status
GetDataAsNumpyStrings
(
py
::
array
*
data
);
static
Status
GetBufferInfo
(
Tensor
&
t
,
py
::
buffer_info
*
out
);
// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor
// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6
// @tparam T type of values in the Tensor Iterator
template
<
typename
T
>
template
<
typename
T
,
bool
=
true
>
class
TensorIterator
{
public:
using
iterator_category
=
std
::
random_access_iterator_tag
;
...
...
@@ -271,11 +329,14 @@ class Tensor {
explicit
TensorIterator
(
uchar
*
ptr
=
nullptr
)
{
ptr_
=
reinterpret_cast
<
T
*>
(
ptr
);
}
TensorIterator
(
const
TensorIterator
<
T
>
&
raw_iterator
)
=
default
;
TensorIterator
(
const
TensorIterator
<
T
>
&
raw_iterator
)
{
ptr_
=
raw_iterator
.
ptr_
;
}
~
TensorIterator
()
=
default
;
TensorIterator
<
T
>
&
operator
=
(
const
TensorIterator
<
T
>
&
rhs
)
=
default
;
TensorIterator
<
T
>
&
operator
=
(
const
TensorIterator
<
T
>
&
rhs
)
{
ptr_
=
rhs
.
ptr_
;
return
*
this
;
}
TensorIterator
<
T
>
&
operator
=
(
T
*
rhs
)
{
ptr_
=
rhs
;
...
...
@@ -346,6 +407,99 @@ class Tensor {
T
*
ptr_
;
};
// Specialization of TensorIterator for strings. It returns std::string_view for every item.
// @tparam DUMMY, used to mbe able to specialize the inner class
template
<
bool
DUMMY
>
class
TensorIterator
<
std
::
string_view
,
DUMMY
>
{
public:
using
iterator_category
=
std
::
random_access_iterator_tag
;
using
value_type
=
std
::
string_view
;
using
difference_type
=
ptrdiff_t
;
using
pointer
=
std
::
string_view
*
;
using
reference
=
std
::
string_view
&
;
explicit
TensorIterator
(
uchar
*
offset
=
nullptr
,
const
uchar
*
buf
=
nullptr
,
dsize_t
index
=
0
)
{
offset_
=
reinterpret_cast
<
offset_t
*>
(
offset
);
buf_
=
reinterpret_cast
<
const
char
*>
(
buf
);
index_
=
index
;
}
TensorIterator
(
const
TensorIterator
<
std
::
string_view
,
DUMMY
>
&
raw_iterator
)
{
offset_
=
raw_iterator
.
offset_
;
buf_
=
raw_iterator
.
buf_
;
index_
=
raw_iterator
.
index_
;
}
~
TensorIterator
()
=
default
;
bool
operator
==
(
const
TensorIterator
<
std
::
string_view
>
&
rhs
)
{
return
buf_
==
rhs
.
buf_
&&
offset_
==
rhs
.
offset_
&&
index_
==
rhs
.
index_
;
}
bool
operator
!=
(
const
TensorIterator
<
std
::
string_view
>
&
rhs
)
{
return
!
(
*
this
==
rhs
);
}
operator
bool
()
const
{
return
offset_
!=
nullptr
;
}
std
::
string_view
operator
*
()
const
{
offset_t
start
=
0
;
if
(
index_
!=
0
)
start
=
offset_
[
index_
-
1
]
+
1
;
return
std
::
string_view
{
buf_
+
start
};
}
TensorIterator
<
std
::
string_view
>
&
operator
+=
(
const
dsize_t
&
inc
)
{
index_
+=
inc
;
return
*
this
;
}
TensorIterator
<
std
::
string_view
>
&
operator
-=
(
const
dsize_t
&
inc
)
{
index_
-=
inc
;
return
*
this
;
}
TensorIterator
<
std
::
string_view
>
&
operator
++
()
{
++
index_
;
return
*
this
;
}
TensorIterator
<
std
::
string_view
>
&
operator
--
()
{
--
index_
;
return
*
this
;
}
TensorIterator
<
std
::
string_view
>
operator
++
(
int
)
{
auto
temp
(
*
this
);
++
index_
;
return
temp
;
}
TensorIterator
<
std
::
string_view
>
operator
--
(
int
)
{
auto
temp
(
*
this
);
--
index_
;
return
temp
;
}
TensorIterator
<
std
::
string_view
>
operator
+
(
const
dsize_t
&
inc
)
{
auto
oldPtr
=
index_
;
index_
+=
inc
;
auto
temp
(
*
this
);
index_
=
oldPtr
;
return
temp
;
}
TensorIterator
<
std
::
string_view
>
operator
-
(
const
dsize_t
&
inc
)
{
auto
oldPtr
=
index_
;
index_
-=
inc
;
auto
temp
(
*
this
);
index_
=
oldPtr
;
return
temp
;
}
protected:
dsize_t
index_
;
offset_t
*
offset_
;
const
char
*
buf_
;
};
// Return a TensorIterator that points to the start of the Tensor.
// It's the user responsibility to use the correct type that matches the Tensor type
// @tparam T The type of values in the Tensor
...
...
@@ -391,6 +545,22 @@ class Tensor {
template
<
typename
T
>
Status
GetItemPtr
(
T
**
,
const
std
::
vector
<
dsize_t
>
&
index
)
const
;
// Get pointer to string located at `index` and the length of string
// @param index vector<dsize_t>
// @return return a pointer to the string specified at index and the length of the string
Status
GetItemPtr
(
uchar
**
,
const
std
::
vector
<
dsize_t
>
&
index
,
offset_t
*
length
=
nullptr
)
const
;
// Given a flat index of an item string, return the start and length of the item
// @param index flat index of the item
// @return start address of the ths string
// @return length of the string
Status
GetStringAt
(
dsize_t
index
,
uchar
**
string_start
,
offset_t
*
length
)
const
;
// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if the
// tensor's type is a string, otherwise undefined address would be returned.
// @return address of the first string of the tensor.
uchar
*
GetStringsBuffer
()
const
{
return
data_
+
kOffsetSize
*
shape_
.
NumOfElements
();
}
// all access to shape_ should be via shape
TensorShape
shape_
;
// data type of tensor
...
...
@@ -402,6 +572,16 @@ class Tensor {
// pointer to the end of the physical data
unsigned
char
*
data_end_
=
nullptr
;
};
template
<
>
inline
Tensor
::
TensorIterator
<
std
::
string_view
>
Tensor
::
begin
<
std
::
string_view
>
()
{
uchar
*
buf
=
GetStringsBuffer
();
return
TensorIterator
<
std
::
string_view
>
(
data_
,
buf
);
}
template
<
>
inline
Tensor
::
TensorIterator
<
std
::
string_view
>
Tensor
::
end
<
std
::
string_view
>
()
{
uchar
*
buf
=
GetStringsBuffer
();
return
TensorIterator
<
std
::
string_view
>
(
data_
,
buf
,
shape_
.
NumOfElements
());
}
}
// namespace dataset
}
// namespace mindspore
#endif // DATASET_CORE_TENSOR_H_
mindspore/ccsrc/dataset/core/tensor_shape.cc
浏览文件 @
051567ba
...
...
@@ -215,5 +215,17 @@ TensorShape TensorShape::Squeeze() const {
}
return
TensorShape
(
new_shape
);
}
std
::
vector
<
dsize_t
>
TensorShape
::
Strides
()
{
std
::
vector
<
dsize_t
>
strides
(
Rank
());
dsize_t
count
=
NumOfElements
();
for
(
dsize_t
i
=
0
;
i
<
Rank
();
i
++
)
{
if
(
raw_shape_
[
i
]
!=
0
)
count
/=
raw_shape_
[
i
];
else
count
=
0
;
strides
[
i
]
=
count
;
}
return
strides
;
}
}
// namespace dataset
}
// namespace mindspore
mindspore/ccsrc/dataset/core/tensor_shape.h
浏览文件 @
051567ba
...
...
@@ -156,6 +156,8 @@ class TensorShape {
TensorShape
Squeeze
()
const
;
std
::
vector
<
dsize_t
>
Strides
();
private:
// True if known and valid shape, false otherwise
bool
known_
;
...
...
mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
浏览文件 @
051567ba
...
...
@@ -74,6 +74,10 @@ Status BatchOp::operator()() {
std
::
unique_ptr
<
TensorQTable
>
table
=
std
::
make_unique
<
TensorQTable
>
();
child_iterator_
=
std
::
make_unique
<
ChildIterator
>
(
this
,
0
,
0
);
RETURN_IF_NOT_OK
(
child_iterator_
->
FetchNextTensorRow
(
&
new_row
));
for
(
const
auto
&
t
:
new_row
)
{
CHECK_FAIL_RETURN_UNEXPECTED
(
t
->
type
().
IsNumeric
(),
"[Batch ERROR] Batch does not support Tensor of type string yet."
);
}
RETURN_IF_NOT_OK
(
DatasetOp
::
AssignColMapFromChild
());
// must come after the first fetch above
int32_t
cur_batch_size
=
0
;
RETURN_IF_NOT_OK
(
GetBatchSize
(
&
cur_batch_size
,
CBatchInfo
(
0
,
0
,
0
)));
...
...
@@ -445,8 +449,8 @@ Status BatchOp::PadHelper(std::shared_ptr<Tensor> src, std::shared_ptr<Tensor> d
src_flat_ind
+=
src_s
[
i
]
*
cur_ind
[
i
];
dst_flat_ind
+=
dst_s
[
i
]
*
cur_ind
[
i
];
}
unsigned
char
*
src_addr
=
src
->
StartAdd
r
()
+
src_flat_ind
*
type_size
;
unsigned
char
*
dst_addr
=
dst
->
StartAdd
r
()
+
dst_flat_ind
*
type_size
;
unsigned
char
*
src_addr
=
src
->
GetMutableBuffe
r
()
+
src_flat_ind
*
type_size
;
unsigned
char
*
dst_addr
=
dst
->
GetMutableBuffe
r
()
+
dst_flat_ind
*
type_size
;
CHECK_FAIL_RETURN_UNEXPECTED
(
memcpy_s
(
dst_addr
,
len
,
src_addr
,
len
)
==
0
,
"memcpy error"
);
}
else
{
// not the last dimension, keep doing recursion
dsize_t
min_ind
=
std
::
min
(
dst
->
shape
()[
cur_dim
],
src
->
shape
()[
cur_dim
]);
...
...
mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
浏览文件 @
051567ba
...
...
@@ -85,6 +85,13 @@ Status DeviceQueueOp::operator()() {
Status
DeviceQueueOp
::
CheckExceptions
(
const
std
::
unique_ptr
<
DataBuffer
>
&
buffer
)
const
{
// this method checks if the buffer meets the conditions to be sent to TDT
if
(
buffer
->
NumRows
()
!=
0
)
{
TensorRow
row
;
buffer
->
GetRow
(
0
,
&
row
);
for
(
const
auto
&
item
:
row
)
{
CHECK_FAIL_RETURN_UNEXPECTED
(
item
->
type
().
IsNumeric
(),
"Cannot send tensor of string type to device."
);
}
}
return
Status
::
OK
();
}
...
...
@@ -207,7 +214,7 @@ Status DeviceQueueOp::MallocForGPUData(std::vector<device::DataItemGpu> *items,
return
Status
(
StatusCode
::
kUnexpectedError
,
__LINE__
,
__FILE__
,
"memory malloc failed."
);
}
(
void
)
memset_s
(
sub_item
.
data_ptr_
,
sub_item
.
data_len_
,
0
,
sub_item
.
data_len_
);
unsigned
char
*
column_data
=
curr_row
[
i
]
->
StartAdd
r
();
unsigned
char
*
column_data
=
curr_row
[
i
]
->
GetMutableBuffe
r
();
if
(
memcpy_s
(
sub_item
.
data_ptr_
,
sub_item
.
data_len_
,
column_data
,
static_cast
<
uint32_t
>
(
curr_row
[
i
++
]
->
SizeInBytes
()))
!=
0
)
{
MS_LOG
(
ERROR
)
<<
"memcpy_s failed!"
;
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
浏览文件 @
051567ba
...
...
@@ -407,7 +407,7 @@ Status CelebAOp::LoadTensorRow(const std::pair<std::string, std::vector<int32_t>
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
&
image
,
data_schema_
->
column
(
0
).
tensorImpl
(),
TensorShape
(
std
::
vector
<
dsize_t
>
(
1
,
num_elements
)),
data_schema_
->
column
(
0
).
type
()));
(
void
)
handle
.
read
(
reinterpret_cast
<
char
*>
(
image
->
StartAdd
r
()),
num_elements
);
(
void
)
handle
.
read
(
reinterpret_cast
<
char
*>
(
image
->
GetMutableBuffe
r
()),
num_elements
);
if
(
decode_
==
true
)
{
Status
rc
=
Decode
(
image
,
&
image
);
if
(
rc
.
IsError
())
{
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
浏览文件 @
051567ba
...
...
@@ -197,7 +197,7 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) {
std
::
shared_ptr
<
Tensor
>
fine_label
;
std
::
shared_ptr
<
Tensor
>
ori_image
=
cifar_image_label_pairs_
[
index
].
first
;
std
::
shared_ptr
<
Tensor
>
copy_image
=
std
::
make_shared
<
Tensor
>
(
ori_image
->
shape
(),
ori_image
->
type
(),
ori_image
->
StartAdd
r
());
std
::
make_shared
<
Tensor
>
(
ori_image
->
shape
(),
ori_image
->
type
(),
ori_image
->
GetMutableBuffe
r
());
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
&
label
,
data_schema_
->
column
(
1
).
tensorImpl
(),
data_schema_
->
column
(
1
).
shape
(),
data_schema_
->
column
(
1
).
type
(),
reinterpret_cast
<
unsigned
char
*>
(
&
cifar_image_label_pairs_
[
index
].
second
[
0
])));
...
...
@@ -394,7 +394,7 @@ Status CifarOp::ParseCifarData() {
data_schema_
->
column
(
0
).
type
()));
for
(
int
ch
=
0
;
ch
<
kCifarImageChannel
;
++
ch
)
{
for
(
int
pix
=
0
;
pix
<
kCifarImageHeight
*
kCifarImageWidth
;
++
pix
)
{
(
image_tensor
->
StartAdd
r
())[
pix
*
kCifarImageChannel
+
ch
]
=
block
[
cur_block_index
++
];
(
image_tensor
->
GetMutableBuffe
r
())[
pix
*
kCifarImageChannel
+
ch
]
=
block
[
cur_block_index
++
];
}
}
cifar_image_label_pairs_
.
emplace_back
(
std
::
make_pair
(
image_tensor
,
labels
));
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
浏览文件 @
051567ba
...
...
@@ -216,7 +216,7 @@ Status ImageFolderOp::LoadTensorRow(ImageLabelPair pairPtr, TensorRow *trow) {
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
&
image
,
data_schema_
->
column
(
0
).
tensorImpl
(),
TensorShape
(
std
::
vector
<
dsize_t
>
(
1
,
num_elements
)),
data_schema_
->
column
(
0
).
type
(),
nullptr
));
(
void
)
fs
.
read
(
reinterpret_cast
<
char
*>
(
image
->
StartAdd
r
()),
num_elements
);
(
void
)
fs
.
read
(
reinterpret_cast
<
char
*>
(
image
->
GetMutableBuffe
r
()),
num_elements
);
fs
.
close
();
if
(
decode_
==
true
)
{
Status
rc
=
Decode
(
image
,
&
image
);
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
浏览文件 @
051567ba
...
...
@@ -210,7 +210,7 @@ Status ManifestOp::LoadTensorRow(const std::pair<std::string, std::vector<std::s
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
&
image
,
data_schema_
->
column
(
0
).
tensorImpl
(),
TensorShape
(
std
::
vector
<
dsize_t
>
(
1
,
num_elements
)),
data_schema_
->
column
(
0
).
type
(),
nullptr
));
(
void
)
fs
.
read
(
reinterpret_cast
<
char
*>
(
image
->
StartAdd
r
()),
num_elements
);
(
void
)
fs
.
read
(
reinterpret_cast
<
char
*>
(
image
->
GetMutableBuffe
r
()),
num_elements
);
if
(
fs
.
fail
())
{
fs
.
close
();
RETURN_STATUS_UNEXPECTED
(
"Fail to read file: "
+
data
.
first
);
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
浏览文件 @
051567ba
...
...
@@ -170,7 +170,7 @@ Status MnistOp::LoadTensorRow(const MnistLabelPair &mnist_pair, TensorRow *trow)
int32_t
l
=
mnist_pair
.
second
;
// make a copy of cached tensor
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
&
image
,
data_schema_
->
column
(
0
).
tensorImpl
(),
mnist_pair
.
first
->
shape
(),
mnist_pair
.
first
->
type
(),
mnist_pair
.
first
->
StartAdd
r
()));
mnist_pair
.
first
->
type
(),
mnist_pair
.
first
->
GetMutableBuffe
r
()));
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
&
label
,
data_schema_
->
column
(
1
).
tensorImpl
(),
data_schema_
->
column
(
1
).
shape
(),
data_schema_
->
column
(
1
).
type
(),
reinterpret_cast
<
unsigned
char
*>
(
&
l
)));
(
*
trow
)
=
{
std
::
move
(
image
),
std
::
move
(
label
)};
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc
浏览文件 @
051567ba
...
...
@@ -127,7 +127,7 @@ Status RandomDataOp::GenerateSchema() {
// For each column:
// - choose a datatype
// - generate a shape that randomly chooses the number of dimensions and the dimension values.
DataType
::
Type
newType
=
static_cast
<
DataType
::
Type
>
(
GenRandomInt
(
0
,
kMaxDataType
));
DataType
::
Type
newType
=
static_cast
<
DataType
::
Type
>
(
GenRandomInt
(
0
,
DataType
::
NUM_OF_TYPES
-
2
));
int32_t
rank
=
GenRandomInt
(
1
,
kMaxRank
);
std
::
vector
<
dsize_t
>
dims
;
for
(
int32_t
d
=
0
;
d
<
rank
;
d
++
)
{
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h
浏览文件 @
051567ba
...
...
@@ -43,7 +43,6 @@ class RandomDataOp : public ParallelOp {
static
constexpr
int32_t
kMaxNumColumns
=
4
;
static
constexpr
int32_t
kMaxRank
=
4
;
static
constexpr
int32_t
kMaxDimValue
=
2048
;
static
constexpr
int32_t
kMaxDataType
=
(
DataType
::
DE_UNKNOWN
-
1
);
static
constexpr
int32_t
kMaxTotalRows
=
1024
;
// A nested builder class to aid in the construction of a RandomDataOp
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
浏览文件 @
051567ba
...
...
@@ -58,7 +58,7 @@ Status DistributedSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer
(
*
out_buffer
)
=
std
::
make_unique
<
DataBuffer
>
(
cnt_
,
DataBuffer
::
kDeBFlagNone
);
std
::
shared_ptr
<
Tensor
>
sample_ids
;
RETURN_IF_NOT_OK
(
CreateSamplerTensor
(
&
sample_ids
,
samples_per_buffer_
));
int64_t
*
id_ptr
=
reinterpret_cast
<
int64_t
*>
(
sample_ids
->
StartAdd
r
());
int64_t
*
id_ptr
=
reinterpret_cast
<
int64_t
*>
(
sample_ids
->
GetMutableBuffe
r
());
while
(
cnt_
<
samples_per_buffer_
)
{
int64_t
next_id
=
(
num_devices_
*
(
cnt_
++
)
+
device_id_
)
%
num_rows_
;
*
(
id_ptr
++
)
=
shuffle_
?
shuffle_vec_
[
static_cast
<
size_t
>
(
next_id
)]
:
next_id
;
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc
浏览文件 @
051567ba
...
...
@@ -58,7 +58,7 @@ Status PKSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) {
int64_t
last_id
=
(
samples_per_buffer_
+
next_id_
>
num_pk_samples_
)
?
num_pk_samples_
:
samples_per_buffer_
+
next_id_
;
RETURN_IF_NOT_OK
(
CreateSamplerTensor
(
&
sample_ids
,
last_id
-
next_id_
));
int64_t
*
id_ptr
=
reinterpret_cast
<
int64_t
*>
(
sample_ids
->
StartAdd
r
());
int64_t
*
id_ptr
=
reinterpret_cast
<
int64_t
*>
(
sample_ids
->
GetMutableBuffe
r
());
while
(
next_id_
<
last_id
)
{
int64_t
cls_id
=
next_id_
++
/
samples_per_class_
;
const
std
::
vector
<
int64_t
>
&
samples
=
label_to_ids_
[
labels_
[
cls_id
]];
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
浏览文件 @
051567ba
...
...
@@ -38,7 +38,7 @@ Status RandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) {
std
::
shared_ptr
<
Tensor
>
sampleIds
;
int64_t
last_id
=
samples_per_buffer_
+
next_id_
>
num_samples_
?
num_samples_
:
samples_per_buffer_
+
next_id_
;
RETURN_IF_NOT_OK
(
CreateSamplerTensor
(
&
sampleIds
,
last_id
-
next_id_
));
int64_t
*
id_ptr
=
reinterpret_cast
<
int64_t
*>
(
sampleIds
->
StartAdd
r
());
int64_t
*
id_ptr
=
reinterpret_cast
<
int64_t
*>
(
sampleIds
->
GetMutableBuffe
r
());
for
(
int64_t
i
=
0
;
i
<
(
last_id
-
next_id_
);
i
++
)
{
*
(
id_ptr
+
i
)
=
replacement_
?
(
*
dist
)(
rnd_
)
:
shuffled_ids_
[
static_cast
<
size_t
>
(
i
+
next_id_
)];
}
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
浏览文件 @
051567ba
...
...
@@ -40,7 +40,7 @@ Status Sampler::CreateSamplerTensor(std::shared_ptr<Tensor> *sample_ids, int64_t
}
TensorShape
shape
(
std
::
vector
<
dsize_t
>
(
1
,
num_elements
));
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
sample_ids
,
col_desc_
->
tensorImpl
(),
shape
,
col_desc_
->
type
()));
(
void
)(
*
sample_ids
)
->
StartAdd
r
();
// allocate memory in case user forgets!
(
void
)(
*
sample_ids
)
->
GetMutableBuffe
r
();
// allocate memory in case user forgets!
return
Status
::
OK
();
}
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
浏览文件 @
051567ba
...
...
@@ -31,7 +31,7 @@ Status SequentialSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer)
std
::
shared_ptr
<
Tensor
>
sampleIds
;
int64_t
lastId
=
(
samples_per_buffer_
+
next_id_
>
num_samples_
)
?
num_samples_
:
samples_per_buffer_
+
next_id_
;
RETURN_IF_NOT_OK
(
CreateSamplerTensor
(
&
sampleIds
,
lastId
-
next_id_
));
int64_t
*
idPtr
=
reinterpret_cast
<
int64_t
*>
(
sampleIds
->
StartAdd
r
());
int64_t
*
idPtr
=
reinterpret_cast
<
int64_t
*>
(
sampleIds
->
GetMutableBuffe
r
());
while
(
next_id_
<
lastId
)
{
*
(
idPtr
++
)
=
next_id_
++
;
}
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
浏览文件 @
051567ba
...
...
@@ -78,7 +78,7 @@ Status SubsetRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffe
RETURN_IF_NOT_OK
(
CreateSamplerTensor
(
&
outputIds
,
last_id
-
sample_id_
));
// Initialize tensor
int64_t
*
id_ptr
=
reinterpret_cast
<
int64_t
*>
(
outputIds
->
StartAdd
r
());
int64_t
*
id_ptr
=
reinterpret_cast
<
int64_t
*>
(
outputIds
->
GetMutableBuffe
r
());
while
(
sample_id_
<
last_id
)
{
if
(
indices_
[
sample_id_
]
>=
num_rows_
)
{
std
::
string
err_msg
=
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
浏览文件 @
051567ba
...
...
@@ -111,7 +111,7 @@ Status WeightedRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buf
RETURN_IF_NOT_OK
(
CreateSamplerTensor
(
&
outputIds
,
last_id
-
sample_id_
));
// Initialize tensor.
int64_t
*
id_ptr
=
reinterpret_cast
<
int64_t
*>
(
outputIds
->
StartAdd
r
());
int64_t
*
id_ptr
=
reinterpret_cast
<
int64_t
*>
(
outputIds
->
GetMutableBuffe
r
());
// Assign the data to tensor element.
while
(
sample_id_
<
last_id
)
{
int64_t
genId
;
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
浏览文件 @
051567ba
...
...
@@ -146,10 +146,7 @@ Status TextFileOp::LoadTensor(const std::string &line, std::unique_ptr<TensorQTa
(
*
tensor_table
)
->
push_back
(
std
::
move
(
tRow
));
std
::
shared_ptr
<
Tensor
>
tensor
;
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
&
tensor
,
data_schema_
->
column
(
0
).
tensorImpl
(),
TensorShape
(
std
::
vector
<
dsize_t
>
(
1
,
line
.
size
())),
data_schema_
->
column
(
0
).
type
(),
const_cast
<
unsigned
char
*>
(
reinterpret_cast
<
const
unsigned
char
*>
(
common
::
SafeCStr
(
line
)))));
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
&
tensor
,
{
line
},
TensorShape
::
CreateScalar
()));
(
**
tensor_table
)[
row
][
0
]
=
std
::
move
(
tensor
);
return
Status
::
OK
();
}
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
浏览文件 @
051567ba
...
...
@@ -759,7 +759,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
tensor
,
current_col
.
tensorImpl
(),
current_shape
,
current_col
.
type
()));
// Tensors are lazily allocated, this eagerly allocates memory for the tensor.
unsigned
char
*
current_tensor_addr
=
(
*
tensor
)
->
StartAdd
r
();
unsigned
char
*
current_tensor_addr
=
(
*
tensor
)
->
GetMutableBuffe
r
();
int64_t
tensor_bytes_remaining
=
(
*
num_elements
)
*
pad_size
;
if
(
current_tensor_addr
==
nullptr
)
{
...
...
@@ -878,7 +878,7 @@ Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengin
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
tensor
,
current_col
.
tensorImpl
(),
current_shape
,
current_col
.
type
()));
// Tensors are lazily allocated, this eagerly allocates memory for the tensor.
(
void
)(
*
tensor
)
->
StartAdd
r
();
(
void
)(
*
tensor
)
->
GetMutableBuffe
r
();
int64_t
i
=
0
;
auto
it
=
(
*
tensor
)
->
begin
<
T
>
();
...
...
mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
浏览文件 @
051567ba
...
...
@@ -388,7 +388,7 @@ Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &co
(
void
)
fs
.
seekg
(
0
,
std
::
ios
::
beg
);
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
tensor
,
col
.
tensorImpl
(),
TensorShape
(
std
::
vector
<
dsize_t
>
(
1
,
num_elements
)),
col
.
type
()));
(
void
)
fs
.
read
(
reinterpret_cast
<
char
*>
((
*
tensor
)
->
StartAdd
r
()),
num_elements
);
(
void
)
fs
.
read
(
reinterpret_cast
<
char
*>
((
*
tensor
)
->
GetMutableBuffe
r
()),
num_elements
);
fs
.
close
();
if
(
decode_
==
true
)
{
Status
rc
=
Decode
(
*
tensor
,
tensor
);
...
...
mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc
浏览文件 @
051567ba
...
...
@@ -110,7 +110,7 @@ TdtStatus TdtPlugin::translate(const TensorRow &ts_row, std::vector<DataItem> &i
data_item
.
tensorShape_
=
dataShapes
;
data_item
.
tensorType_
=
datatype
;
data_item
.
dataLen_
=
ts
->
SizeInBytes
();
data_item
.
dataPtr_
=
std
::
shared_ptr
<
void
>
(
reinterpret_cast
<
void
*>
(
ts
->
StartAdd
r
()),
[](
void
*
elem
)
{});
data_item
.
dataPtr_
=
std
::
shared_ptr
<
void
>
(
reinterpret_cast
<
void
*>
(
ts
->
GetMutableBuffe
r
()),
[](
void
*
elem
)
{});
items
.
emplace_back
(
data_item
);
MS_LOG
(
INFO
)
<<
"TDT data type is "
<<
datatype
<<
", data shape is "
<<
dataShapes
<<
", data length is "
<<
ts
->
Size
()
<<
"."
;
...
...
mindspore/ccsrc/dataset/kernels/data/data_utils.cc
浏览文件 @
051567ba
...
...
@@ -162,7 +162,7 @@ void CastFrom(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
Status
TypeCast
(
const
std
::
shared_ptr
<
Tensor
>
&
input
,
std
::
shared_ptr
<
Tensor
>
*
output
,
const
DataType
&
data_type
)
{
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
output
,
TensorImpl
::
kFlexible
,
input
->
shape
(),
data_type
));
static_cast
<
void
>
((
*
output
)
->
StartAdd
r
());
static_cast
<
void
>
((
*
output
)
->
GetMutableBuffe
r
());
switch
(
input
->
type
().
value
())
{
case
DataType
::
DE_BOOL
:
CastFrom
<
bool
>
(
input
,
output
);
...
...
@@ -211,7 +211,7 @@ Status ToFloat16(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *
// initiate new tensor for type cast
DataType
new_type
=
DataType
(
"float16"
);
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
output
,
TensorImpl
::
kFlexible
,
input
->
shape
(),
new_type
));
static_cast
<
void
>
((
*
output
)
->
StartAdd
r
());
static_cast
<
void
>
((
*
output
)
->
GetMutableBuffe
r
());
auto
in_itr
=
input
->
begin
<
float
>
();
auto
out_itr
=
(
*
output
)
->
begin
<
float16
>
();
...
...
mindspore/ccsrc/dataset/kernels/image/image_utils.cc
浏览文件 @
051567ba
...
...
@@ -64,7 +64,7 @@ Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int
std
::
shared_ptr
<
CVTensor
>
output_cv
=
std
::
make_shared
<
CVTensor
>
(
input_cv
->
shape
(),
input_cv
->
type
());
RETURN_UNEXPECTED_IF_NULL
(
output_cv
);
(
void
)
output_cv
->
StartAdd
r
();
(
void
)
output_cv
->
GetMutableBuffe
r
();
if
(
input_cv
->
mat
().
data
)
{
try
{
cv
::
flip
(
input_cv
->
mat
(),
output_cv
->
mat
(),
flip_code
);
...
...
@@ -125,10 +125,10 @@ bool HasJpegMagic(const unsigned char *data, size_t data_size) {
}
Status
Decode
(
const
std
::
shared_ptr
<
Tensor
>
&
input
,
std
::
shared_ptr
<
Tensor
>
*
output
)
{
if
(
input
->
StartAdd
r
()
==
nullptr
)
{
if
(
input
->
GetMutableBuffe
r
()
==
nullptr
)
{
RETURN_STATUS_UNEXPECTED
(
"Tensor is nullptr"
);
}
if
(
HasJpegMagic
(
input
->
StartAdd
r
(),
input
->
SizeInBytes
()))
{
if
(
HasJpegMagic
(
input
->
GetMutableBuffe
r
(),
input
->
SizeInBytes
()))
{
return
JpegCropAndDecode
(
input
,
output
);
}
else
{
return
DecodeCv
(
input
,
output
);
...
...
@@ -282,7 +282,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
jerr
.
pub
.
error_exit
=
JpegErrorExitCustom
;
try
{
jpeg_create_decompress
(
&
cinfo
);
JpegSetSource
(
&
cinfo
,
input
->
StartAdd
r
(),
input
->
SizeInBytes
());
JpegSetSource
(
&
cinfo
,
input
->
GetMutableBuffe
r
(),
input
->
SizeInBytes
());
(
void
)
jpeg_read_header
(
&
cinfo
,
TRUE
);
RETURN_IF_NOT_OK
(
JpegSetColorSpace
(
&
cinfo
));
jpeg_calc_output_dimensions
(
&
cinfo
);
...
...
@@ -311,7 +311,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
TensorShape
ts
=
TensorShape
({
crop_h
,
crop_w
,
kOutNumComponents
});
auto
output_tensor
=
std
::
make_shared
<
Tensor
>
(
ts
,
DataType
(
DataType
::
DE_UINT8
));
const
int
buffer_size
=
output_tensor
->
SizeInBytes
();
JSAMPLE
*
buffer
=
static_cast
<
JSAMPLE
*>
(
output_tensor
->
StartAdd
r
());
JSAMPLE
*
buffer
=
static_cast
<
JSAMPLE
*>
(
output_tensor
->
GetMutableBuffe
r
());
const
int
max_scanlines_to_read
=
skipped_scanlines
+
crop_h
;
// stride refers to output tensor, which has 3 components at most
const
int
stride
=
crop_w
*
kOutNumComponents
;
...
...
mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.cc
浏览文件 @
051567ba
...
...
@@ -31,7 +31,7 @@ Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr<Tensor> &input, s
if
(
input
==
nullptr
)
{
RETURN_STATUS_UNEXPECTED
(
"input tensor is null"
);
}
if
(
!
HasJpegMagic
(
input
->
StartAdd
r
(),
input
->
SizeInBytes
()))
{
if
(
!
HasJpegMagic
(
input
->
GetMutableBuffe
r
(),
input
->
SizeInBytes
()))
{
DecodeOp
op
(
true
);
std
::
shared_ptr
<
Tensor
>
decoded
;
RETURN_IF_NOT_OK
(
op
.
Compute
(
input
,
&
decoded
));
...
...
@@ -43,7 +43,7 @@ Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr<Tensor> &input, s
jerr
.
pub
.
error_exit
=
JpegErrorExitCustom
;
try
{
jpeg_create_decompress
(
&
cinfo
);
JpegSetSource
(
&
cinfo
,
input
->
StartAdd
r
(),
input
->
SizeInBytes
());
JpegSetSource
(
&
cinfo
,
input
->
GetMutableBuffe
r
(),
input
->
SizeInBytes
());
(
void
)
jpeg_read_header
(
&
cinfo
,
TRUE
);
jpeg_calc_output_dimensions
(
&
cinfo
);
}
catch
(
std
::
runtime_error
&
e
)
{
...
...
tests/ut/cpp/dataset/CMakeLists.txt
浏览文件 @
051567ba
...
...
@@ -50,6 +50,7 @@ SET(DE_UT_SRCS
storage_op_test.cc
task_manager_test.cc
tensor_test.cc
tensor_string_test.cc
tensorshape_test.cc
tfReader_op_test.cc
to_float16_op_test.cc
...
...
tests/ut/cpp/dataset/common/cvop_common.cc
浏览文件 @
051567ba
...
...
@@ -60,7 +60,7 @@ void CVOpCommon::GetInputImage(std::string filename) {
TensorShape
in_shape
({
file_size
});
raw_input_tensor_
=
std
::
make_shared
<
Tensor
>
(
in_shape
,
DataType
(
DataType
::
DE_UINT8
));
file
.
read
(
reinterpret_cast
<
char
*>
(
raw_input_tensor_
->
StartAdd
r
()),
raw_input_tensor_
->
SizeInBytes
());
file
.
read
(
reinterpret_cast
<
char
*>
(
raw_input_tensor_
->
GetMutableBuffe
r
()),
raw_input_tensor_
->
SizeInBytes
());
raw_cv_image_
=
cv
::
imread
(
filename
,
cv
::
ImreadModes
::
IMREAD_COLOR
);
input_tensor_
=
std
::
dynamic_pointer_cast
<
Tensor
>
(
std
::
make_shared
<
CVTensor
>
(
raw_cv_image_
));
SwapRedAndBlue
(
input_tensor_
,
&
input_tensor_
);
...
...
tests/ut/cpp/dataset/datatype_test.cc
浏览文件 @
051567ba
...
...
@@ -32,47 +32,47 @@ class MindDataTestDatatype : public UT::Common {
TEST_F
(
MindDataTestDatatype
,
TestSizes
)
{
uint8_t
x
=
DataType
::
DE_BOOL_SIZE
;
uint8_t
x
=
DataType
::
SIZE_IN_BYTES
[
DataType
::
DE_BOOL
]
;
DataType
d
=
DataType
(
DataType
::
DE_BOOL
);
ASSERT_EQ
(
x
,
1
);
ASSERT_EQ
(
d
.
SizeInBytes
(),
x
);
x
=
DataType
::
DE_INT8_SIZE
;
x
=
DataType
::
SIZE_IN_BYTES
[
DataType
::
DE_INT8
]
;
d
=
DataType
(
DataType
::
DE_INT8
);
ASSERT_EQ
(
x
,
1
);
ASSERT_EQ
(
d
.
SizeInBytes
(),
x
);
x
=
DataType
::
DE_UINT8_SIZE
;
x
=
DataType
::
SIZE_IN_BYTES
[
DataType
::
DE_UINT8
]
;
d
=
DataType
(
DataType
::
DE_UINT8
);
ASSERT_EQ
(
x
,
1
);
ASSERT_EQ
(
d
.
SizeInBytes
(),
x
);
x
=
DataType
::
DE_INT16_SIZE
;
x
=
DataType
::
SIZE_IN_BYTES
[
DataType
::
DE_INT16
]
;
d
=
DataType
(
DataType
::
DE_INT16
);
ASSERT_EQ
(
x
,
2
);
ASSERT_EQ
(
d
.
SizeInBytes
(),
x
);
x
=
DataType
::
DE_UINT16_SIZE
;
x
=
DataType
::
SIZE_IN_BYTES
[
DataType
::
DE_UINT16
]
;
d
=
DataType
(
DataType
::
DE_UINT16
);
ASSERT_EQ
(
x
,
2
);
ASSERT_EQ
(
d
.
SizeInBytes
(),
x
);
x
=
DataType
::
DE_INT32_SIZE
;
x
=
DataType
::
SIZE_IN_BYTES
[
DataType
::
DE_INT32
]
;
d
=
DataType
(
DataType
::
DE_INT32
);
ASSERT_EQ
(
x
,
4
);
ASSERT_EQ
(
d
.
SizeInBytes
(),
x
);
x
=
DataType
::
DE_UINT32_SIZE
;
x
=
DataType
::
SIZE_IN_BYTES
[
DataType
::
DE_UINT32
]
;
d
=
DataType
(
DataType
::
DE_UINT32
);
ASSERT_EQ
(
x
,
4
);
ASSERT_EQ
(
d
.
SizeInBytes
(),
x
);
x
=
DataType
::
DE_INT64_SIZE
;
x
=
DataType
::
SIZE_IN_BYTES
[
DataType
::
DE_INT64
]
;
d
=
DataType
(
DataType
::
DE_INT64
);
ASSERT_EQ
(
x
,
8
);
ASSERT_EQ
(
d
.
SizeInBytes
(),
x
);
x
=
DataType
::
DE_UINT64_SIZE
;
x
=
DataType
::
SIZE_IN_BYTES
[
DataType
::
DE_UINT64
]
;
d
=
DataType
(
DataType
::
DE_UINT64
);
ASSERT_EQ
(
x
,
8
);
ASSERT_EQ
(
d
.
SizeInBytes
(),
x
);
x
=
DataType
::
DE_FLOAT32_SIZE
;
x
=
DataType
::
SIZE_IN_BYTES
[
DataType
::
DE_FLOAT32
]
;
d
=
DataType
(
DataType
::
DE_FLOAT32
);
ASSERT_EQ
(
x
,
4
);
ASSERT_EQ
(
d
.
SizeInBytes
(),
x
);
x
=
DataType
::
DE_FLOAT64_SIZE
;
x
=
DataType
::
SIZE_IN_BYTES
[
DataType
::
DE_FLOAT64
]
;
d
=
DataType
(
DataType
::
DE_FLOAT64
);
ASSERT_EQ
(
x
,
8
);
ASSERT_EQ
(
d
.
SizeInBytes
(),
x
);
...
...
tests/ut/cpp/dataset/image_folder_op_test.cc
浏览文件 @
051567ba
...
...
@@ -74,7 +74,7 @@ Status Create1DTensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elements,
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
sample_ids
,
TensorImpl
::
kFlexible
,
shape
,
DataType
(
data_type
),
data
));
if
(
data
==
nullptr
)
{
(
*
sample_ids
)
->
StartAdd
r
();
// allocate memory in case user forgets!
(
*
sample_ids
)
->
GetMutableBuffe
r
();
// allocate memory in case user forgets!
}
return
Status
::
OK
();
}
...
...
tests/ut/cpp/dataset/map_op_test.cc
浏览文件 @
051567ba
...
...
@@ -190,7 +190,7 @@ TEST_F(MindDataTestMapOp, TestByPosition) {
EXPECT_EQ
(
tensor_list
[
i
]
->
type
(),
golden_types
[
i
]);
EXPECT_EQ
(
tensor_list
[
i
]
->
Rank
(),
golden_ranks
[
i
]);
EXPECT_EQ
(
tensor_list
[
i
]
->
shape
(),
golden_shapes
[
i
]);
EXPECT_NE
(
tensor_list
[
i
]
->
StartAdd
r
(),
nullptr
);
EXPECT_NE
(
tensor_list
[
i
]
->
GetMutableBuffe
r
(),
nullptr
);
}
}
...
...
@@ -366,7 +366,7 @@ TEST_F(MindDataTestMapOp, Test1to3) {
EXPECT_EQ
(
tensor_list
[
i
]
->
type
(),
golden_types
[
i
]);
EXPECT_EQ
(
tensor_list
[
i
]
->
Rank
(),
golden_ranks
[
i
]);
EXPECT_EQ
(
tensor_list
[
i
]
->
shape
(),
golden_shapes
[
i
]);
EXPECT_NE
(
tensor_list
[
i
]
->
StartAdd
r
(),
nullptr
);
EXPECT_NE
(
tensor_list
[
i
]
->
GetMutableBuffe
r
(),
nullptr
);
}
rc
=
di
.
FetchNextTensorRow
(
&
tensor_list
);
EXPECT_TRUE
(
rc
.
IsOk
());
...
...
@@ -700,7 +700,7 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize) {
MS_LOG
(
DEBUG
)
<<
"row:"
<<
i
<<
"
\t
label:"
<<
label
<<
"
\n
"
;
EXPECT_TRUE
(
img_class
[(
i
%
44
)
/
11
]
==
label
);
// Dump all the image into string, to be used as a comparison later.
result
.
append
((
char
*
)
tensor_map
[
"image"
]
->
StartAdd
r
(),
(
int64_t
)
tensor_map
[
"image"
]
->
Size
());
result
.
append
((
char
*
)
tensor_map
[
"image"
]
->
GetMutableBuffe
r
(),
(
int64_t
)
tensor_map
[
"image"
]
->
Size
());
di
.
GetNextAsMap
(
&
tensor_map
);
i
++
;
}
...
...
@@ -745,7 +745,7 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize) {
tensor_map
[
"label"
]
->
GetItemAt
<
int32_t
>
(
&
label
,
{});
MS_LOG
(
DEBUG
)
<<
"row:"
<<
i
<<
"
\t
label:"
<<
label
<<
"
\n
"
;
EXPECT_TRUE
(
img_class
[(
i
%
44
)
/
11
]
==
label
);
result2
.
append
((
char
*
)
tensor_map
[
"image"
]
->
StartAdd
r
(),
(
int64_t
)
tensor_map
[
"image"
]
->
Size
());
result2
.
append
((
char
*
)
tensor_map
[
"image"
]
->
GetMutableBuffe
r
(),
(
int64_t
)
tensor_map
[
"image"
]
->
Size
());
di2
.
GetNextAsMap
(
&
tensor_map
);
i
++
;
}
...
...
tests/ut/cpp/dataset/random_crop_decode_resize_op_test.cc
浏览文件 @
051567ba
...
...
@@ -57,8 +57,8 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp2) {
for
(
int
i
=
0
;
i
<
100
;
i
++
)
{
(
void
)
crop_and_decode
.
Compute
(
raw_input_tensor_
,
&
crop_and_decode_output
);
(
void
)
decode_and_crop
.
Compute
(
input_tensor_
,
&
decode_and_crop_output
);
cv
::
Mat
output1
(
target_height
,
target_width
,
CV_8UC3
,
crop_and_decode_output
->
StartAdd
r
());
cv
::
Mat
output2
(
target_height
,
target_width
,
CV_8UC3
,
decode_and_crop_output
->
StartAdd
r
());
cv
::
Mat
output1
(
target_height
,
target_width
,
CV_8UC3
,
crop_and_decode_output
->
GetMutableBuffe
r
());
cv
::
Mat
output2
(
target_height
,
target_width
,
CV_8UC3
,
decode_and_crop_output
->
GetMutableBuffe
r
());
long
int
mse_sum
=
0
;
long
int
count
=
0
;
int
a
,
b
;
...
...
@@ -133,8 +133,8 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp1) {
crop_and_decode_status
=
Crop
(
decoded
,
&
decoded_and_cropped
,
x
,
y
,
crop_width
,
crop_height
);
decode_and_crop_status
=
JpegCropAndDecode
(
raw_input_tensor_
,
&
cropped_and_decoded
,
x
,
y
,
crop_width
,
crop_height
);
{
cv
::
Mat
M1
(
crop_height
,
crop_width
,
CV_8UC3
,
decoded_and_cropped
->
StartAdd
r
());
cv
::
Mat
M2
(
crop_height
,
crop_width
,
CV_8UC3
,
cropped_and_decoded
->
StartAdd
r
());
cv
::
Mat
M1
(
crop_height
,
crop_width
,
CV_8UC3
,
decoded_and_cropped
->
GetMutableBuffe
r
());
cv
::
Mat
M2
(
crop_height
,
crop_width
,
CV_8UC3
,
cropped_and_decoded
->
GetMutableBuffe
r
());
for
(
int
i
=
0
;
i
<
crop_height
;
++
i
)
{
for
(
int
j
=
0
;
j
<
crop_width
;
++
j
)
{
m1
=
M1
.
at
<
cv
::
Vec3b
>
(
i
,
j
)[
1
];
...
...
tests/ut/cpp/dataset/stand_alone_samplers_test.cc
浏览文件 @
051567ba
...
...
@@ -34,7 +34,7 @@ Status CreateINT64Tensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elemen
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
sample_ids
,
TensorImpl
::
kFlexible
,
shape
,
DataType
(
DataType
::
DE_INT64
),
data
));
if
(
data
==
nullptr
)
{
(
*
sample_ids
)
->
StartAdd
r
();
// allocate memory in case user forgets!
(
*
sample_ids
)
->
GetMutableBuffe
r
();
// allocate memory in case user forgets!
}
return
Status
::
OK
();
}
...
...
tests/ut/cpp/dataset/tensor_string_test.cc
0 → 100644
浏览文件 @
051567ba
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <memory>
#include <string>
#include "dataset/core/client.h"
#include "common/common.h"
#include "gtest/gtest.h"
#include "securec.h"
#include "dataset/core/tensor.h"
#include "dataset/core/cv_tensor.h"
#include "dataset/core/data_type.h"
#include "dataset/util/de_error.h"
using
namespace
mindspore
::
dataset
;
namespace
py
=
pybind11
;
class
MindDataTestStringTensorDE
:
public
UT
::
Common
{
public:
MindDataTestStringTensorDE
()
=
default
;
void
SetUp
()
override
{
GlobalInit
();
}
};
TEST_F
(
MindDataTestStringTensorDE
,
Basics
)
{
std
::
shared_ptr
<
Tensor
>
t
=
std
::
make_shared
<
Tensor
>
(
"Hi"
);
ASSERT_TRUE
(
t
->
shape
()
==
TensorShape
({}));
std
::
string_view
s
=
""
;
t
->
GetItemAt
(
&
s
,
{});
ASSERT_TRUE
(
s
==
"Hi"
);
std
::
shared_ptr
<
Tensor
>
t2
=
std
::
make_shared
<
Tensor
>
(
std
::
vector
<
std
::
string
>
{
"Hi"
,
"Bye"
});
ASSERT_TRUE
(
t2
->
shape
()
==
TensorShape
({
2
}));
t2
->
GetItemAt
(
&
s
,
{
0
});
ASSERT_TRUE
(
s
==
"Hi"
);
t2
->
GetItemAt
(
&
s
,
{
1
});
ASSERT_TRUE
(
s
==
"Bye"
);
std
::
vector
<
std
::
string
>
strings
{
"abc"
,
"defg"
,
"hi"
,
"klmno"
,
"123"
,
"789"
};
std
::
shared_ptr
<
Tensor
>
t3
=
std
::
make_shared
<
Tensor
>
(
strings
,
TensorShape
({
2
,
3
}));
ASSERT_TRUE
(
t3
->
shape
()
==
TensorShape
({
2
,
3
}));
uint32_t
index
=
0
;
for
(
uint32_t
i
=
0
;
i
<
2
;
i
++
)
{
for
(
uint32_t
j
=
0
;
j
<
3
;
j
++
)
{
std
::
string_view
s
=
""
;
t3
->
GetItemAt
(
&
s
,
{
i
,
j
});
ASSERT_TRUE
(
s
==
strings
[
index
++
]);
}
}
}
TEST_F
(
MindDataTestStringTensorDE
,
Basics2
)
{
std
::
shared_ptr
<
Tensor
>
t
=
std
::
make_shared
<
Tensor
>
(
std
::
vector
<
std
::
string
>
{
"abc"
,
"defg"
,
"hi"
,
"klmno"
,
"123"
,
"789"
},
TensorShape
({
2
,
3
}));
ASSERT_TRUE
(
t
->
SizeInBytes
()
==
6
*
5
+
20
);
std
::
vector
<
uint32_t
>
offsets
=
{
3
,
8
,
11
,
17
,
21
,
25
};
uint32_t
ctr
=
0
;
for
(
auto
i
:
offsets
)
{
ASSERT_TRUE
(
*
(
reinterpret_cast
<
uint32_t
*>
(
t
->
GetMutableBuffer
()
+
ctr
))
==
i
);
ctr
+=
4
;
}
const
char
*
buf
=
reinterpret_cast
<
char
*>
(
t
->
GetMutableBuffer
())
+
6
*
4
;
std
::
vector
<
uint32_t
>
starts
=
{
0
,
4
,
9
,
12
,
18
,
22
};
uint32_t
index
=
0
;
for
(
uint32_t
i
=
0
;
i
<
2
;
i
++
)
{
for
(
uint32_t
j
=
0
;
j
<
3
;
j
++
)
{
std
::
string_view
s
=
""
;
t
->
GetItemAt
(
&
s
,
{
i
,
j
});
ASSERT_TRUE
(
s
.
data
()
==
buf
+
starts
[
index
++
]);
}
}
}
TEST_F
(
MindDataTestStringTensorDE
,
Empty
)
{
std
::
vector
<
std
::
string
>
strings
{
"abc"
,
"defg"
,
""
,
""
,
"123"
,
""
};
std
::
shared_ptr
<
Tensor
>
t
=
std
::
make_shared
<
Tensor
>
(
strings
,
TensorShape
({
2
,
3
}));
// abc_defg___123__
// 0123456789012345
ASSERT_TRUE
(
t
->
SizeInBytes
()
==
6
*
5
+
10
);
std
::
vector
<
uint32_t
>
offsets
=
{
3
,
8
,
9
,
10
,
14
,
15
};
uint32_t
ctr
=
0
;
for
(
auto
i
:
offsets
)
{
ASSERT_TRUE
(
*
(
reinterpret_cast
<
uint32_t
*>
(
t
->
GetMutableBuffer
()
+
ctr
))
==
i
);
ctr
+=
4
;
}
const
char
*
buf
=
reinterpret_cast
<
char
*>
(
t
->
GetMutableBuffer
())
+
6
*
4
;
std
::
vector
<
uint32_t
>
starts
=
{
0
,
4
,
9
,
10
,
11
,
15
};
uint32_t
index
=
0
;
for
(
uint32_t
i
=
0
;
i
<
2
;
i
++
)
{
for
(
uint32_t
j
=
0
;
j
<
3
;
j
++
)
{
std
::
string_view
s
=
""
;
t
->
GetItemAt
(
&
s
,
{
i
,
j
});
ASSERT_TRUE
(
s
.
data
()
==
buf
+
starts
[
index
]);
ASSERT_TRUE
(
s
==
strings
[
index
++
]);
}
}
}
TEST_F
(
MindDataTestStringTensorDE
,
SetItem
)
{
std
::
vector
<
std
::
string
>
strings
{
"abc"
,
"defg"
,
"hi"
,
"klmno"
,
"123"
,
"789"
};
std
::
shared_ptr
<
Tensor
>
t3
=
std
::
make_shared
<
Tensor
>
(
strings
,
TensorShape
({
2
,
3
}));
ASSERT_TRUE
(
t3
->
shape
()
==
TensorShape
({
2
,
3
}));
t3
->
SetItemAt
({
0
,
1
},
std
::
string
{
"xyzz"
});
strings
[
1
]
=
"xyzz"
;
t3
->
SetItemAt
({
0
,
2
},
std
::
string
{
"07"
});
strings
[
2
]
=
"07"
;
t3
->
SetItemAt
({
1
,
2
},
std
::
string
{
"987"
});
strings
[
5
]
=
"987"
;
uint32_t
index
=
0
;
for
(
uint32_t
i
=
0
;
i
<
2
;
i
++
)
{
for
(
uint32_t
j
=
0
;
j
<
3
;
j
++
)
{
std
::
string_view
s
=
""
;
t3
->
GetItemAt
(
&
s
,
{
i
,
j
});
ASSERT_TRUE
(
s
==
strings
[
index
++
]);
}
}
}
TEST_F
(
MindDataTestStringTensorDE
,
Iterator
)
{
std
::
vector
<
std
::
string
>
strings
{
"abc"
,
"defg"
,
"hi"
,
"klmno"
,
"123"
,
"789"
};
std
::
shared_ptr
<
Tensor
>
t
=
std
::
make_shared
<
Tensor
>
(
strings
,
TensorShape
({
2
,
3
}));
uint32_t
index
=
0
;
auto
itr
=
t
->
begin
<
std
::
string_view
>
();
for
(;
itr
!=
t
->
end
<
std
::
string_view
>
();
itr
++
)
{
ASSERT_TRUE
(
*
itr
==
strings
[
index
++
]);
}
index
=
0
;
itr
=
t
->
begin
<
std
::
string_view
>
();
for
(;
itr
!=
t
->
end
<
std
::
string_view
>
();
itr
+=
2
)
{
ASSERT_TRUE
(
*
itr
==
strings
[
index
]);
index
+=
2
;
}
}
\ No newline at end of file
tests/ut/cpp/dataset/tensor_test.cc
浏览文件 @
051567ba
...
...
@@ -111,17 +111,17 @@ TEST_F(MindDataTestTensorDE, CopyTensor) {
int16_t
o
;
t
->
GetItemAt
<
int16_t
>
(
&
o
,
{});
ASSERT_EQ
(
o
,
-
66
);
unsigned
char
*
addr
=
t
->
StartAdd
r
();
unsigned
char
*
addr
=
t
->
GetMutableBuffe
r
();
auto
t2
=
std
::
make_shared
<
Tensor
>
(
std
::
move
(
*
t
));
ASSERT_EQ
(
t2
->
shape
(),
TensorShape
({}));
ASSERT_EQ
(
t2
->
type
(),
DataType
::
DE_INT16
);
t2
->
GetItemAt
<
int16_t
>
(
&
o
,
{});
ASSERT_EQ
(
o
,
-
66
);
unsigned
char
*
new_addr
=
t2
->
StartAdd
r
();
unsigned
char
*
new_addr
=
t2
->
GetMutableBuffe
r
();
ASSERT_EQ
(
addr
,
new_addr
);
ASSERT_EQ
(
t
->
shape
(),
TensorShape
::
CreateUnknownRankShape
());
ASSERT_EQ
(
t
->
type
(),
DataType
::
DE_UNKNOWN
);
ASSERT_EQ
(
t
->
StartAdd
r
(),
nullptr
);
ASSERT_EQ
(
t
->
GetMutableBuffe
r
(),
nullptr
);
Status
rc
=
t
->
GetItemAt
<
int16_t
>
(
&
o
,
{});
ASSERT_TRUE
(
rc
.
IsError
());
}
...
...
@@ -237,7 +237,7 @@ TEST_F(MindDataTestTensorDE, Strides) {
void
checkCvMat
(
TensorShape
shape
,
DataType
type
)
{
std
::
shared_ptr
<
CVTensor
>
t
=
std
::
make_shared
<
CVTensor
>
(
shape
,
type
);
cv
::
Mat
m
=
t
->
mat
();
ASSERT_EQ
(
m
.
data
,
t
->
StartAdd
r
());
ASSERT_EQ
(
m
.
data
,
t
->
GetMutableBuffe
r
());
ASSERT_EQ
(
static_cast
<
uchar
>
(
m
.
type
())
&
static_cast
<
uchar
>
(
CV_MAT_DEPTH_MASK
),
type
.
AsCVType
());
if
(
shape
.
Rank
()
<
4
)
{
if
(
shape
.
Rank
()
>
1
)
{
...
...
@@ -311,15 +311,15 @@ TEST_F(MindDataTestTensorDE, CVTensorFromMat) {
TEST_F
(
MindDataTestTensorDE
,
CVTensorAs
)
{
std
::
shared_ptr
<
Tensor
>
t
=
std
::
make_shared
<
Tensor
>
(
TensorShape
({
3
,
2
}),
DataType
(
DataType
::
DE_FLOAT64
));
t
->
Fill
<
double
>
(
2.2
);
unsigned
char
*
addr
=
t
->
StartAdd
r
();
unsigned
char
*
addr
=
t
->
GetMutableBuffe
r
();
std
::
shared_ptr
<
Tensor
>
t2
=
std
::
make_shared
<
Tensor
>
(
TensorShape
({
3
,
2
}),
DataType
(
DataType
::
DE_FLOAT64
));
t2
->
Fill
<
double
>
(
4.4
);
std
::
shared_ptr
<
CVTensor
>
ctv
=
CVTensor
::
AsCVTensor
(
t
);
ASSERT_EQ
(
t
->
StartAdd
r
(),
nullptr
);
ASSERT_EQ
(
ctv
->
StartAdd
r
(),
addr
);
ASSERT_EQ
(
t
->
GetMutableBuffe
r
(),
nullptr
);
ASSERT_EQ
(
ctv
->
GetMutableBuffe
r
(),
addr
);
cv
::
Mat
m
=
ctv
->
mat
();
m
=
2
*
m
;
ASSERT_EQ
(
ctv
->
StartAdd
r
(),
addr
);
ASSERT_EQ
(
ctv
->
GetMutableBuffe
r
(),
addr
);
ASSERT_TRUE
(
*
t2
==
*
ctv
);
MS_LOG
(
DEBUG
)
<<
*
t2
<<
std
::
endl
<<
*
ctv
;
}
...
...
tests/ut/python/dataset/test_datasets_textfileop.py
浏览文件 @
051567ba
...
...
@@ -41,8 +41,8 @@ def test_textline_dataset_totext():
count
=
0
line
=
[
"This is a text file."
,
"Another file."
,
"Be happy every day."
,
"End of file."
,
"Good luck to everyone."
]
for
i
in
data
.
create_dict_iterator
():
str
=
nlp
.
as_text
(
i
[
"text"
]
)
assert
(
str
==
line
[
count
])
str
=
i
[
"text"
].
item
().
decode
(
"utf8"
)
assert
(
str
==
line
[
count
])
count
+=
1
assert
(
count
==
5
)
...
...
@@ -68,8 +68,8 @@ def test_textline_dataset_repeat():
"This is a text file."
,
"Be happy every day."
,
"Good luck to everyone."
,
"This is a text file."
,
"Be happy every day."
,
"Good luck to everyone."
]
for
i
in
data
.
create_dict_iterator
():
str
=
nlp
.
as_text
(
i
[
"text"
]
)
assert
(
str
==
line
[
count
])
str
=
i
[
"text"
].
item
().
decode
(
"utf8"
)
assert
(
str
==
line
[
count
])
count
+=
1
assert
(
count
==
9
)
...
...
tests/ut/python/dataset/test_flat_map.py
浏览文件 @
051567ba
...
...
@@ -26,7 +26,7 @@ def test_flat_map_1():
import
mindspore.dataset.transforms.nlp.utils
as
nlp
def
flat_map_func
(
x
):
data_dir
=
nlp
.
as_text
(
x
[
0
]
)
data_dir
=
x
[
0
].
item
().
decode
(
'utf8'
)
d
=
ds
.
ImageFolderDatasetV2
(
data_dir
)
return
d
...
...
@@ -47,12 +47,12 @@ def test_flat_map_2():
import
mindspore.dataset.transforms.nlp.utils
as
nlp
def
flat_map_func_1
(
x
):
data_dir
=
nlp
.
as_text
(
x
[
0
]
)
data_dir
=
x
[
0
].
item
().
decode
(
'utf8'
)
d
=
ds
.
ImageFolderDatasetV2
(
data_dir
)
return
d
def
flat_map_func_2
(
x
):
text_file
=
nlp
.
as_text
(
x
[
0
]
)
text_file
=
x
[
0
].
item
().
decode
(
'utf8'
)
d
=
ds
.
TextFileDataset
(
text_file
)
d
=
d
.
flat_map
(
flat_map_func_1
)
return
d
...
...
tests/ut/python/dataset/test_tensor_string.py
0 → 100644
浏览文件 @
051567ba
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import
mindspore._c_dataengine
as
cde
import
mindspore.dataset
as
ds
import
pytest
import
numpy
as
np
def
test_basic
():
x
=
np
.
array
([[
"ab"
,
"cde"
,
"121"
],
[
"x"
,
"km"
,
"789"
]],
dtype
=
'S'
)
# x = np.array(["ab", "cde"], dtype='S')
n
=
cde
.
Tensor
(
x
)
arr
=
n
.
as_array
()
y
=
np
.
array
([
1
,
2
])
assert
all
(
y
==
y
)
# assert np.testing.assert_array_equal(y,y)
def
compare
(
strings
):
arr
=
np
.
array
(
strings
,
dtype
=
'S'
)
def
gen
():
yield
arr
,
data
=
ds
.
GeneratorDataset
(
gen
,
column_names
=
[
"col"
])
for
d
in
data
:
np
.
testing
.
assert_array_equal
(
d
[
0
],
arr
)
def
test_generator
():
compare
([
"ab"
])
compare
([
"ab"
,
"cde"
,
"121"
])
compare
([[
"ab"
,
"cde"
,
"121"
],
[
"x"
,
"km"
,
"789"
]])
def
test_batching_strings
():
def
gen
():
yield
np
.
array
([
"ab"
,
"cde"
,
"121"
],
dtype
=
'S'
),
data
=
ds
.
GeneratorDataset
(
gen
,
column_names
=
[
"col"
]).
batch
(
10
)
with
pytest
.
raises
(
RuntimeError
)
as
info
:
for
_
in
data
:
pass
assert
"[Batch ERROR] Batch does not support"
in
str
(
info
)
if
__name__
==
'__main__'
:
test_generator
()
test_basic
()
test_batching_strings
()
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录