Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
2cc6b5cb
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2cc6b5cb
编写于
7月 28, 2020
作者:
J
jiangzhiwen
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix number of columns not match
上级
623d884e
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
29 addition
and
13 deletion
+29
-13
mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc
...ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc
+24
-10
mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.h
.../ccsrc/minddata/dataset/engine/datasetops/source/csv_op.h
+3
-1
tests/ut/python/dataset/test_datasets_csv.py
tests/ut/python/dataset/test_datasets_csv.py
+2
-2
未找到文件。
mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc
浏览文件 @
2cc6b5cb
...
...
@@ -100,6 +100,10 @@ Status CsvOp::Init() {
int
CsvOp
::
CsvParser
::
put_record
(
char
c
)
{
std
::
string
s
=
std
::
string
(
str_buf_
.
begin
(),
str_buf_
.
begin
()
+
pos_
);
std
::
shared_ptr
<
Tensor
>
t
;
if
(
cur_col_
>=
column_default_
.
size
())
{
err_message_
=
"Number of file columns does not match the default records"
;
return
-
1
;
}
switch
(
column_default_
[
cur_col_
]
->
type
)
{
case
CsvOp
::
INT
:
Tensor
::
CreateTensor
(
&
t
,
TensorImpl
::
kFlexible
,
TensorShape
::
CreateScalar
(),
DataType
(
DataType
::
DE_INT32
));
...
...
@@ -116,6 +120,10 @@ int CsvOp::CsvParser::put_record(char c) {
Tensor
::
CreateTensor
(
&
t
,
{
s
},
TensorShape
::
CreateScalar
());
break
;
}
if
(
cur_col_
>=
(
*
tensor_table_
)[
cur_row_
].
size
())
{
err_message_
=
"Number of file columns does not match the tensor table"
;
return
-
1
;
}
(
*
tensor_table_
)[
cur_row_
][
cur_col_
]
=
std
::
move
(
t
);
pos_
=
0
;
cur_col_
++
;
...
...
@@ -134,7 +142,11 @@ int CsvOp::CsvParser::put_row(char c) {
return
0
;
}
put_record
(
c
);
int
ret
=
put_record
(
c
);
if
(
ret
<
0
)
{
return
ret
;
}
total_rows_
++
;
cur_row_
++
;
cur_col_
=
0
;
...
...
@@ -265,8 +277,7 @@ Status CsvOp::CsvParser::initCsvParser() {
[
this
](
CsvParser
&
,
char
c
)
->
int
{
this
->
tensor_table_
=
std
::
make_unique
<
TensorQTable
>
();
this
->
tensor_table_
->
push_back
(
TensorRow
(
column_default_
.
size
(),
nullptr
));
this
->
put_record
(
c
);
return
0
;
return
this
->
put_record
(
c
);
}}},
{{
State
::
START_OF_FILE
,
Message
::
MS_QUOTE
},
{
State
::
QUOTE
,
...
...
@@ -367,8 +378,7 @@ Status CsvOp::CsvParser::initCsvParser() {
if
(
this
->
total_rows_
>
this
->
start_offset_
&&
this
->
total_rows_
<=
this
->
end_offset_
)
{
this
->
tensor_table_
->
push_back
(
TensorRow
(
column_default_
.
size
(),
nullptr
));
}
this
->
put_record
(
c
);
return
0
;
return
this
->
put_record
(
c
);
}}},
{{
State
::
END_OF_LINE
,
Message
::
MS_QUOTE
},
{
State
::
QUOTE
,
...
...
@@ -408,15 +418,16 @@ Status CsvOp::LoadFile(const std::string &file, const int64_t start_offset, cons
while
(
ifs
.
good
())
{
char
chr
=
ifs
.
get
();
if
(
csv_parser
.
processMessage
(
chr
)
!=
0
)
{
RETURN_STATUS_UNEXPECTED
(
"Failed to parse CSV file "
+
file
+
":"
+
std
::
to_string
(
csv_parser
.
total_rows_
));
RETURN_STATUS_UNEXPECTED
(
"Failed to parse file "
+
file
+
":"
+
std
::
to_string
(
csv_parser
.
total_rows_
+
1
)
+
". error message: "
+
csv_parser
.
err_message_
);
}
}
}
catch
(
std
::
invalid_argument
&
ia
)
{
std
::
string
err_row
=
std
::
to_string
(
csv_parser
.
total_rows_
);
RETURN_STATUS_UNEXPECTED
(
file
+
":"
+
err_row
+
",
invalid argument of "
+
std
::
string
(
ia
.
what
())
);
std
::
string
err_row
=
std
::
to_string
(
csv_parser
.
total_rows_
+
1
);
RETURN_STATUS_UNEXPECTED
(
file
+
":"
+
err_row
+
",
type does not match"
);
}
catch
(
std
::
out_of_range
&
oor
)
{
std
::
string
err_row
=
std
::
to_string
(
csv_parser
.
total_rows_
);
RETURN_STATUS_UNEXPECTED
(
file
+
":"
+
err_row
+
", out of
Range error: "
+
std
::
string
(
oor
.
what
())
);
std
::
string
err_row
=
std
::
to_string
(
csv_parser
.
total_rows_
+
1
);
RETURN_STATUS_UNEXPECTED
(
file
+
":"
+
err_row
+
", out of
range"
);
}
return
Status
::
OK
();
}
...
...
@@ -763,6 +774,9 @@ Status CsvOp::ComputeColMap() {
column_default_list_
.
push_back
(
std
::
make_shared
<
CsvOp
::
Record
<
std
::
string
>>
(
CsvOp
::
STRING
,
""
));
}
}
if
(
column_default_list_
.
size
()
!=
column_name_id_map_
.
size
())
{
RETURN_STATUS_UNEXPECTED
(
"The number of column names does not match the column defaults"
);
}
return
Status
::
OK
();
}
}
// namespace dataset
...
...
mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.h
浏览文件 @
2cc6b5cb
...
...
@@ -76,7 +76,8 @@ class CsvOp : public ParallelOp {
cur_col_
(
0
),
total_rows_
(
0
),
start_offset_
(
0
),
end_offset_
(
std
::
numeric_limits
<
int64_t
>::
max
())
{
end_offset_
(
std
::
numeric_limits
<
int64_t
>::
max
()),
err_message_
(
"unkonw"
)
{
cur_buffer_
=
std
::
make_unique
<
DataBuffer
>
(
0
,
DataBuffer
::
BufferFlags
::
kDeBFlagNone
);
initCsvParser
();
}
...
...
@@ -189,6 +190,7 @@ class CsvOp : public ParallelOp {
std
::
vector
<
char
>
str_buf_
;
std
::
unique_ptr
<
TensorQTable
>
tensor_table_
;
std
::
unique_ptr
<
DataBuffer
>
cur_buffer_
;
std
::
string
err_message_
;
};
class
Builder
{
...
...
tests/ut/python/dataset/test_datasets_csv.py
浏览文件 @
2cc6b5cb
...
...
@@ -205,7 +205,7 @@ def test_csv_dataset_exception():
with
pytest
.
raises
(
Exception
)
as
err
:
for
_
in
data
.
create_dict_iterator
():
pass
assert
"Failed to parse
CSV
file"
in
str
(
err
.
value
)
assert
"Failed to parse file"
in
str
(
err
.
value
)
def
test_csv_dataset_type_error
():
...
...
@@ -218,7 +218,7 @@ def test_csv_dataset_type_error():
with
pytest
.
raises
(
Exception
)
as
err
:
for
_
in
data
.
create_dict_iterator
():
pass
assert
"
invalid argument of stoi
"
in
str
(
err
.
value
)
assert
"
type does not match
"
in
str
(
err
.
value
)
if
__name__
==
"__main__"
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录