Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
11826fb2
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
11826fb2
编写于
6月 18, 2020
作者:
H
hesham
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
DuplicateOp
上级
90bb9320
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
222 addition
and
16 deletion
+222
-16
mindspore/ccsrc/dataset/api/python_bindings.cc
mindspore/ccsrc/dataset/api/python_bindings.cc
+4
-0
mindspore/ccsrc/dataset/core/tensor.h
mindspore/ccsrc/dataset/core/tensor.h
+10
-0
mindspore/ccsrc/dataset/kernels/data/CMakeLists.txt
mindspore/ccsrc/dataset/kernels/data/CMakeLists.txt
+1
-0
mindspore/ccsrc/dataset/kernels/data/duplicate_op.cc
mindspore/ccsrc/dataset/kernels/data/duplicate_op.cc
+35
-0
mindspore/ccsrc/dataset/kernels/data/duplicate_op.h
mindspore/ccsrc/dataset/kernels/data/duplicate_op.h
+42
-0
mindspore/dataset/engine/datasets.py
mindspore/dataset/engine/datasets.py
+4
-4
mindspore/dataset/text/utils.py
mindspore/dataset/text/utils.py
+6
-6
mindspore/dataset/transforms/c_transforms.py
mindspore/dataset/transforms/c_transforms.py
+19
-0
tests/ut/cpp/dataset/duplicate_op_test.cc
tests/ut/cpp/dataset/duplicate_op_test.cc
+49
-0
tests/ut/python/dataset/test_duplicate_op.py
tests/ut/python/dataset/test_duplicate_op.py
+40
-0
tests/ut/python/dataset/test_vocab.py
tests/ut/python/dataset/test_vocab.py
+12
-6
未找到文件。
mindspore/ccsrc/dataset/api/python_bindings.cc
浏览文件 @
11826fb2
...
...
@@ -40,6 +40,7 @@
#include "dataset/kernels/image/resize_op.h"
#include "dataset/kernels/image/uniform_aug_op.h"
#include "dataset/kernels/image/bounding_box_augment_op.h"
#include "dataset/kernels/data/duplicate_op.h"
#include "dataset/kernels/data/fill_op.h"
#include "dataset/kernels/data/mask_op.h"
#include "dataset/kernels/data/pad_end_op.h"
...
...
@@ -443,6 +444,9 @@ void bindTensorOps2(py::module *m) {
"Tensor mask operation using relational comparator"
)
.
def
(
py
::
init
<
RelationalOp
,
std
::
shared_ptr
<
Tensor
>
,
DataType
>
());
(
void
)
py
::
class_
<
DuplicateOp
,
TensorOp
,
std
::
shared_ptr
<
DuplicateOp
>>
(
*
m
,
"DuplicateOp"
,
"Duplicate tensor."
)
.
def
(
py
::
init
<>
());
(
void
)
py
::
class_
<
TruncateSequencePairOp
,
TensorOp
,
std
::
shared_ptr
<
TruncateSequencePairOp
>>
(
*
m
,
"TruncateSequencePairOp"
,
"Tensor operation to truncate two tensors to a max_length"
)
.
def
(
py
::
init
<
int64_t
>
());
...
...
mindspore/ccsrc/dataset/core/tensor.h
浏览文件 @
11826fb2
...
...
@@ -115,6 +115,16 @@ class Tensor {
static
Status
CreateTensor
(
std
::
shared_ptr
<
Tensor
>
*
,
TensorImpl
tensor_impl
,
const
TensorShape
&
shape
,
DataType
type
,
const
unsigned
char
*
data
=
nullptr
);
/// Create a copy of the input tensor
/// \param out [out] output tensor to be generated
/// \param in [in] orginal tensor to be copied
/// \return Status
static
Status
CreateTensor
(
std
::
shared_ptr
<
Tensor
>
*
out
,
const
std
::
shared_ptr
<
Tensor
>
&
in
)
{
const
TensorAlloc
*
alloc
=
GlobalContext
::
Instance
()
->
tensor_allocator
();
*
out
=
std
::
allocate_shared
<
Tensor
>
(
*
alloc
,
in
->
shape
(),
in
->
type
(),
in
->
GetBuffer
(),
in
->
SizeInBytes
());
return
Status
::
OK
();
}
// A static factory method to create a Tensor from a given py::array.
// @param ptr output argument to hold the created Tensor
// @param arr py::array
...
...
mindspore/ccsrc/dataset/kernels/data/CMakeLists.txt
浏览文件 @
11826fb2
...
...
@@ -10,4 +10,5 @@ add_library(kernels-data OBJECT
slice_op.cc
mask_op.cc
concatenate_op.cc
duplicate_op.cc
)
mindspore/ccsrc/dataset/kernels/data/duplicate_op.cc
0 → 100644
浏览文件 @
11826fb2
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dataset/kernels/data/duplicate_op.h"
#include "dataset/core/tensor.h"
#include "dataset/kernels/tensor_op.h"
namespace
mindspore
{
namespace
dataset
{
Status
DuplicateOp
::
Compute
(
const
TensorRow
&
input
,
TensorRow
*
output
)
{
IO_CHECK_VECTOR
(
input
,
output
);
CHECK_FAIL_RETURN_UNEXPECTED
(
input
.
size
()
==
1
,
"Input should be one tensor"
);
std
::
shared_ptr
<
Tensor
>
out
;
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
&
out
,
input
[
0
]));
output
->
push_back
(
input
[
0
]);
output
->
push_back
(
out
);
return
Status
::
OK
();
}
}
// namespace dataset
}
// namespace mindspore
mindspore/ccsrc/dataset/kernels/data/duplicate_op.h
0 → 100644
浏览文件 @
11826fb2
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DATASET_KERNELS_DATA_DUPLICATE_OP_H_
#define DATASET_KERNELS_DATA_DUPLICATE_OP_H_
#include <vector>
#include <memory>
#include "dataset/core/tensor.h"
#include "dataset/kernels/tensor_op.h"
namespace
mindspore
{
namespace
dataset
{
class
DuplicateOp
:
public
TensorOp
{
public:
DuplicateOp
()
=
default
;
~
DuplicateOp
()
override
=
default
;
void
Print
(
std
::
ostream
&
out
)
const
override
{
out
<<
"DuplicateOp"
;
}
Status
Compute
(
const
TensorRow
&
input
,
TensorRow
*
output
)
override
;
uint32_t
NumOutput
()
override
{
return
2
;
}
};
}
// namespace dataset
}
// namespace mindspore
#endif // DATASET_KERNELS_DUPLICATE_OP_H_
mindspore/dataset/engine/datasets.py
浏览文件 @
11826fb2
...
...
@@ -4869,10 +4869,10 @@ class BuildVocabDataset(DatasetOp):
top_k(int, optional): top_k > 0. Number of words to be built into vocab. top_k most frequent words are
taken. The top_k is taken after freq_range. If not enough top_k, all words will be taken (default=None,
all words are included).
special_tokens(list
): a list of strings, each one is a special token. for e.g. ["<pad>","<unk>"]
(default=None, no special tokens will be added).
special_first(bool
): whether special_tokens will be prepended/appended to vocab, If special_tokens i
s
specified and special_first is set to None, special_tokens will be prepended. (default=None).
special_tokens(list
, optional): a list of strings, each one is a special token. for example
special_tokens=["<pad>","<unk>"]
(default=None, no special tokens will be added).
special_first(bool
, optional): whether special_tokens will be prepended/appended to vocab, If special_token
s
is
specified and special_first is set to None, special_tokens will be prepended. (default=None).
prefetch_size (int, optional): prefetch number of records ahead of the user's request (default=None).
"""
...
...
mindspore/dataset/text/utils.py
浏览文件 @
11826fb2
...
...
@@ -50,8 +50,8 @@ class Vocab(cde.Vocab):
top_k(int, optional): top_k > 0. Number of words to be built into vocab. top_k most frequent words are
taken. top_k is taken after freq_range. If not enough top_k, all words will be taken. (default=None
all words are included).
special_tokens(list
): a list of strings, each one is a special token. for e.g. ["<pad>","<unk>"]
(default=None, no special tokens will be added).
special_tokens(list
, optional): a list of strings, each one is a special token. for example
special_tokens=["<pad>","<unk>"]
(default=None, no special tokens will be added).
special_first(bool, optional): whether special_tokens will be prepended/appended to vocab. If special_tokens
is specified and special_first is set to None, special_tokens will be prepended. (default=None).
return:
...
...
@@ -72,8 +72,8 @@ class Vocab(cde.Vocab):
build a vocab object from a list of word.
Args:
word_list(list): a list of string where each element is a word of type string.
special_tokens(list
): a list of strings, each one is a special token. for e.g. ["<pad>","<unk>"]
(default=None, no special tokens will be added).
special_tokens(list
, optional): a list of strings, each one is a special token. for example
special_tokens=["<pad>","<unk>"]
(default=None, no special tokens will be added).
special_first(bool, optional): whether special_tokens will be prepended/appended to vocab, If special_tokens
is specified and special_first is set to None, special_tokens will be prepended. (default=None).
"""
...
...
@@ -89,8 +89,8 @@ class Vocab(cde.Vocab):
delimiter(str, optional): a delimiter to break up each line in file, the first element is taken to be
the word (default=None).
vocab_size(int, optional): number of words to read from file_path (default=None, all words are taken).
special_tokens(list
): a list of strings, each one is a special token. for e.g. ["<pad>","<unk>"]
(default=None, no special tokens will be added).
special_tokens(list
, optional): a list of strings, each one is a special token. for example
special_tokens=["<pad>","<unk>"]
(default=None, no special tokens will be added).
special_first(bool, optional): whether special_tokens will be prepended/appended to vocab, If special_tokens
is specified and special_first is set to None, special_tokens will be prepended. (default=None).
"""
...
...
mindspore/dataset/transforms/c_transforms.py
浏览文件 @
11826fb2
...
...
@@ -203,3 +203,22 @@ class Concatenate(cde.ConcatenateOp):
def
__init__
(
self
,
axis
=
0
,
prepend
=
None
,
append
=
None
):
# add some validations here later
super
().
__init__
(
axis
,
prepend
,
append
)
class
Duplicate
(
cde
.
DuplicateOp
):
"""
Duplicate the input tensor to a new output tensor. The input tensor is carried over to the output list.
Examples:
>>> # Data before
>>> # | x |
>>> # +---------+
>>> # | [1,2,3] |
>>> # +---------+
>>> data = data.map(input_columns=["x"], operations=Duplicate(),
>>> output_columns=["x", "y"], output_order=["x", "y"])
>>> # Data after
>>> # | x | y |
>>> # +---------+---------+
>>> # | [1,2,3] | [1,2,3] |
>>> # +---------+---------+
"""
tests/ut/cpp/dataset/duplicate_op_test.cc
0 → 100644
浏览文件 @
11826fb2
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dataset/core/client.h"
#include "common/common.h"
#include "gtest/gtest.h"
#include "dataset/core/tensor.h"
#include "dataset/util/de_error.h"
#include "dataset/kernels/data/duplicate_op.h"
using
namespace
mindspore
::
dataset
;
namespace
py
=
pybind11
;
class
MindDataTestDuplicateOp
:
public
UT
::
Common
{
public:
MindDataTestDuplicateOp
()
{}
void
SetUp
()
{
GlobalInit
();
}
};
TEST_F
(
MindDataTestDuplicateOp
,
Basics
)
{
std
::
shared_ptr
<
Tensor
>
t
;
Tensor
::
CreateTensor
(
&
t
,
std
::
vector
<
uint32_t
>
({
1
,
2
,
3
,
4
,
5
,
6
}));
std
::
shared_ptr
<
Tensor
>
v
;
Tensor
::
CreateTensor
(
&
v
,
std
::
vector
<
uint32_t
>
({
3
}),
TensorShape
::
CreateScalar
());
std
::
shared_ptr
<
DuplicateOp
>
op
=
std
::
make_shared
<
DuplicateOp
>
();
TensorRow
in
;
in
.
push_back
(
t
);
TensorRow
out
;
ASSERT_TRUE
(
op
->
Compute
(
in
,
&
out
).
IsOk
());
ASSERT_TRUE
(
*
t
==
*
out
[
0
]);
ASSERT_TRUE
(
*
t
==
*
out
[
1
]);
ASSERT_TRUE
(
t
->
GetBuffer
()
==
out
[
0
]
->
GetBuffer
());
ASSERT_TRUE
(
t
->
GetBuffer
()
!=
out
[
1
]
->
GetBuffer
());
}
tests/ut/python/dataset/test_duplicate_op.py
0 → 100644
浏览文件 @
11826fb2
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Testing Duplicate op in DE
"""
import
numpy
as
np
import
mindspore.dataset
as
ds
import
mindspore.dataset.transforms.c_transforms
as
ops
def
compare
(
array
):
data
=
ds
.
NumpySlicesDataset
([
array
],
column_names
=
"x"
)
array
=
np
.
array
(
array
)
data
=
data
.
map
(
input_columns
=
[
"x"
],
output_columns
=
[
"x"
,
"y"
],
columns_order
=
[
"x"
,
"y"
],
operations
=
ops
.
Duplicate
())
for
d
in
data
.
create_dict_iterator
():
np
.
testing
.
assert_array_equal
(
array
,
d
[
"x"
])
np
.
testing
.
assert_array_equal
(
array
,
d
[
"y"
])
def
test_duplicate_basics
():
compare
([
1
,
2
,
3
])
compare
([
b
"1"
,
b
"2"
,
b
"3"
])
if
__name__
==
"__main__"
:
test_duplicate_basics
()
tests/ut/python/dataset/test_vocab.py
浏览文件 @
11826fb2
# Copyright 20
19
Huawei Technologies Co., Ltd
# Copyright 20
20
Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -94,9 +94,10 @@ def test_from_file():
for
word
in
texts
.
split
(
" "
):
yield
(
np
.
array
(
word
,
dtype
=
'S'
),)
def
test_config
(
lookup_str
,
special_tokens
,
special_first
):
def
test_config
(
lookup_str
,
vocab_size
,
special_tokens
,
special_first
):
try
:
vocab
=
text
.
Vocab
.
from_file
(
SIMPLE_VOCAB_FILE
,
special_tokens
=
special_tokens
,
special_first
=
special_first
)
vocab
=
text
.
Vocab
.
from_file
(
SIMPLE_VOCAB_FILE
,
vocab_size
=
vocab_size
,
special_tokens
=
special_tokens
,
special_first
=
special_first
)
data
=
ds
.
GeneratorDataset
(
gen
(
lookup_str
),
column_names
=
[
"text"
])
data
=
data
.
map
(
input_columns
=
[
"text"
],
operations
=
text
.
Lookup
(
vocab
))
res
=
[]
...
...
@@ -106,9 +107,14 @@ def test_from_file():
except
ValueError
as
e
:
return
str
(
e
)
assert
test_config
(
"w1 w2 w3"
,
[
"s1"
,
"s2"
,
"s3"
],
True
)
==
[
3
,
4
,
5
]
assert
test_config
(
"w1 w2 w3"
,
[
"s1"
,
"s2"
,
"s3"
],
False
)
==
[
0
,
1
,
2
]
assert
"special_tokens contains duplicate"
in
test_config
(
"w1"
,
[
"s1"
,
"s1"
],
True
)
# test special tokens are prepended
assert
test_config
(
"w1 w2 w3 s1 s2 s3"
,
None
,
[
"s1"
,
"s2"
,
"s3"
],
True
)
==
[
3
,
4
,
5
,
0
,
1
,
2
]
# test special tokens are appended
assert
test_config
(
"w1 w2 w3 s1 s2 s3"
,
None
,
[
"s1"
,
"s2"
,
"s3"
],
False
)
==
[
0
,
1
,
2
,
8
,
9
,
10
]
# test special tokens are prepended when not all words in file are used
assert
test_config
(
"w1 w2 w3 s1 s2 s3"
,
3
,
[
"s1"
,
"s2"
,
"s3"
],
False
)
==
[
0
,
1
,
2
,
3
,
4
,
5
]
# text exception special_words contains duplicate words
assert
"special_tokens contains duplicate"
in
test_config
(
"w1"
,
None
,
[
"s1"
,
"s1"
],
True
)
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录