Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
387dac58
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
387dac58
编写于
7月 29, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
7月 29, 2020
浏览文件
操作
浏览文件
下载
差异文件
!3651 change num_samples definition
Merge pull request !3651 from jiangzhiwen/dataset/change_num_samples
上级
a3e7c4c7
1eda0ef0
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
10 addition
and
6 deletion
+10
-6
mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc
...ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc
+2
-2
mindspore/dataset/engine/datasets.py
mindspore/dataset/engine/datasets.py
+3
-3
mindspore/dataset/engine/validators.py
mindspore/dataset/engine/validators.py
+5
-1
未找到文件。
mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc
浏览文件 @
387dac58
...
...
@@ -27,7 +27,7 @@
namespace
mindspore
{
namespace
dataset
{
CsvOp
::
Builder
::
Builder
()
:
builder_device_id_
(
0
),
builder_num_devices_
(
1
),
builder_num_samples_
(
0
),
builder_shuffle_files_
(
false
)
{
:
builder_device_id_
(
0
),
builder_num_devices_
(
1
),
builder_num_samples_
(
-
1
),
builder_shuffle_files_
(
false
)
{
std
::
shared_ptr
<
ConfigManager
>
config_manager
=
GlobalContext
::
config_manager
();
builder_num_workers_
=
config_manager
->
num_parallel_workers
();
builder_op_connector_size_
=
config_manager
->
op_connector_size
();
...
...
@@ -451,7 +451,7 @@ Status CsvOp::operator()() {
RETURN_IF_NOT_OK
(
jagged_buffer_connector_
->
Pop
(
0
,
&
buffer
));
if
(
buffer
->
eoe
())
{
workers_done
++
;
}
else
if
(
num_samples_
==
0
||
rows_read
<
num_samples_
)
{
}
else
if
(
num_samples_
==
-
1
||
rows_read
<
num_samples_
)
{
if
((
num_samples_
>
0
)
&&
(
rows_read
+
buffer
->
NumRows
()
>
num_samples_
))
{
int64_t
rowsToRemove
=
buffer
->
NumRows
()
-
(
num_samples_
-
rows_read
);
RETURN_IF_NOT_OK
(
buffer
->
SliceOff
(
rowsToRemove
));
...
...
mindspore/dataset/engine/datasets.py
浏览文件 @
387dac58
...
...
@@ -4935,7 +4935,7 @@ class CSVDataset(SourceDataset):
columns as string type.
column_names (list[str], optional): List of column names of the dataset (default=None). If this
is not provided, infers the column_names from the first row of CSV file.
num_samples (int, optional): number of samples(rows) to read (default=
None
, reads the full dataset).
num_samples (int, optional): number of samples(rows) to read (default=
-1
, reads the full dataset).
num_parallel_workers (int, optional): number of workers to read the data
(default=None, number set in the config).
shuffle (Union[bool, Shuffle level], optional): perform reshuffling of the data every epoch
...
...
@@ -4959,7 +4959,7 @@ class CSVDataset(SourceDataset):
"""
@
check_csvdataset
def
__init__
(
self
,
dataset_files
,
field_delim
=
','
,
column_defaults
=
None
,
column_names
=
None
,
num_samples
=
None
,
def
__init__
(
self
,
dataset_files
,
field_delim
=
','
,
column_defaults
=
None
,
column_names
=
None
,
num_samples
=
-
1
,
num_parallel_workers
=
None
,
shuffle
=
Shuffle
.
GLOBAL
,
num_shards
=
None
,
shard_id
=
None
):
super
().
__init__
(
num_parallel_workers
)
self
.
dataset_files
=
self
.
_find_files
(
dataset_files
)
...
...
@@ -5010,7 +5010,7 @@ class CSVDataset(SourceDataset):
if
self
.
_dataset_size
is
None
:
num_rows
=
CsvOp
.
get_num_rows
(
self
.
dataset_files
,
self
.
column_names
is
None
)
num_rows
=
get_num_rows
(
num_rows
,
self
.
num_shards
)
if
self
.
num_samples
is
None
:
if
self
.
num_samples
==
-
1
:
return
num_rows
return
min
(
self
.
num_samples
,
num_rows
)
return
self
.
_dataset_size
...
...
mindspore/dataset/engine/validators.py
浏览文件 @
387dac58
...
...
@@ -815,12 +815,16 @@ def check_csvdataset(method):
def
new_method
(
self
,
*
args
,
**
kwargs
):
_
,
param_dict
=
parse_user_args
(
method
,
*
args
,
**
kwargs
)
nreq_param_int
=
[
'num_
samples'
,
'num_
parallel_workers'
,
'num_shards'
,
'shard_id'
]
nreq_param_int
=
[
'num_parallel_workers'
,
'num_shards'
,
'shard_id'
]
# check dataset_files; required argument
dataset_files
=
param_dict
.
get
(
'dataset_files'
)
type_check
(
dataset_files
,
(
str
,
list
),
"dataset files"
)
# check num_samples
num_samples
=
param_dict
.
get
(
'num_samples'
)
check_value
(
num_samples
,
[
-
1
,
INT32_MAX
],
"num_samples"
)
# check field_delim
field_delim
=
param_dict
.
get
(
'field_delim'
)
type_check
(
field_delim
,
(
str
,),
'field delim'
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录