Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
cb2beb47
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
cb2beb47
编写于
2月 16, 2021
作者:
N
Nikita Mikhaylov
提交者:
GitHub
2月 16, 2021
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #20516 from nikitamikhaylov/more-checks-filesegmentation-engine
More checks in fileSegmentationEngine
上级
87615c53
d615b8e5
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
29 addition
and
14 deletion
+29
-14
src/Formats/JSONEachRowUtils.cpp
src/Formats/JSONEachRowUtils.cpp
+8
-3
src/IO/BufferWithOwnMemory.h
src/IO/BufferWithOwnMemory.h
+3
-3
src/Processors/Formats/Impl/CSVRowInputFormat.cpp
src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+9
-4
src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
+4
-1
src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+5
-3
未找到文件。
src/Formats/JSONEachRowUtils.cpp
浏览文件 @
cb2beb47
...
@@ -6,6 +6,7 @@ namespace DB
...
@@ -6,6 +6,7 @@ namespace DB
namespace
ErrorCodes
namespace
ErrorCodes
{
{
extern
const
int
INCORRECT_DATA
;
extern
const
int
INCORRECT_DATA
;
extern
const
int
LOGICAL_ERROR
;
}
}
std
::
pair
<
bool
,
size_t
>
fileSegmentationEngineJSONEachRowImpl
(
ReadBuffer
&
in
,
DB
::
Memory
<>
&
memory
,
size_t
min_chunk_size
)
std
::
pair
<
bool
,
size_t
>
fileSegmentationEngineJSONEachRowImpl
(
ReadBuffer
&
in
,
DB
::
Memory
<>
&
memory
,
size_t
min_chunk_size
)
...
@@ -28,7 +29,9 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
...
@@ -28,7 +29,9 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
if
(
quotes
)
if
(
quotes
)
{
{
pos
=
find_first_symbols
<
'\\'
,
'"'
>
(
pos
,
in
.
buffer
().
end
());
pos
=
find_first_symbols
<
'\\'
,
'"'
>
(
pos
,
in
.
buffer
().
end
());
if
(
pos
==
in
.
buffer
().
end
())
if
(
pos
>
in
.
buffer
().
end
())
throw
Exception
(
"Position in buffer is out of bounds. There must be a bug."
,
ErrorCodes
::
LOGICAL_ERROR
);
else
if
(
pos
==
in
.
buffer
().
end
())
continue
;
continue
;
if
(
*
pos
==
'\\'
)
if
(
*
pos
==
'\\'
)
{
{
...
@@ -45,9 +48,11 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
...
@@ -45,9 +48,11 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
else
else
{
{
pos
=
find_first_symbols
<
'{'
,
'}'
,
'\\'
,
'"'
>
(
pos
,
in
.
buffer
().
end
());
pos
=
find_first_symbols
<
'{'
,
'}'
,
'\\'
,
'"'
>
(
pos
,
in
.
buffer
().
end
());
if
(
pos
==
in
.
buffer
().
end
())
if
(
pos
>
in
.
buffer
().
end
())
throw
Exception
(
"Position in buffer is out of bounds. There must be a bug."
,
ErrorCodes
::
LOGICAL_ERROR
);
else
if
(
pos
==
in
.
buffer
().
end
())
continue
;
continue
;
if
(
*
pos
==
'{'
)
else
if
(
*
pos
==
'{'
)
{
{
++
balance
;
++
balance
;
++
pos
;
++
pos
;
...
...
src/IO/BufferWithOwnMemory.h
浏览文件 @
cb2beb47
...
@@ -35,10 +35,10 @@ struct Memory : boost::noncopyable, Allocator
...
@@ -35,10 +35,10 @@ struct Memory : boost::noncopyable, Allocator
char
*
m_data
=
nullptr
;
char
*
m_data
=
nullptr
;
size_t
alignment
=
0
;
size_t
alignment
=
0
;
Memory
()
{}
Memory
()
=
default
;
/// If alignment != 0, then allocate memory aligned to specified value.
/// If alignment != 0, then allocate memory aligned to specified value.
Memory
(
size_t
size_
,
size_t
alignment_
=
0
)
:
m_capacity
(
size_
),
m_size
(
m_capacity
),
alignment
(
alignment_
)
explicit
Memory
(
size_t
size_
,
size_t
alignment_
=
0
)
:
m_capacity
(
size_
),
m_size
(
m_capacity
),
alignment
(
alignment_
)
{
{
alloc
();
alloc
();
}
}
...
@@ -140,7 +140,7 @@ protected:
...
@@ -140,7 +140,7 @@ protected:
Memory
<>
memory
;
Memory
<>
memory
;
public:
public:
/// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership.
/// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership.
BufferWithOwnMemory
(
size_t
size
=
DBMS_DEFAULT_BUFFER_SIZE
,
char
*
existing_memory
=
nullptr
,
size_t
alignment
=
0
)
explicit
BufferWithOwnMemory
(
size_t
size
=
DBMS_DEFAULT_BUFFER_SIZE
,
char
*
existing_memory
=
nullptr
,
size_t
alignment
=
0
)
:
Base
(
nullptr
,
0
),
memory
(
existing_memory
?
0
:
size
,
alignment
)
:
Base
(
nullptr
,
0
),
memory
(
existing_memory
?
0
:
size
,
alignment
)
{
{
Base
::
set
(
existing_memory
?
existing_memory
:
memory
.
data
(),
size
);
Base
::
set
(
existing_memory
?
existing_memory
:
memory
.
data
(),
size
);
...
...
src/Processors/Formats/Impl/CSVRowInputFormat.cpp
浏览文件 @
cb2beb47
...
@@ -15,6 +15,7 @@ namespace ErrorCodes
...
@@ -15,6 +15,7 @@ namespace ErrorCodes
{
{
extern
const
int
BAD_ARGUMENTS
;
extern
const
int
BAD_ARGUMENTS
;
extern
const
int
INCORRECT_DATA
;
extern
const
int
INCORRECT_DATA
;
extern
const
int
LOGICAL_ERROR
;
}
}
...
@@ -436,9 +437,11 @@ static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB
...
@@ -436,9 +437,11 @@ static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB
if
(
quotes
)
if
(
quotes
)
{
{
pos
=
find_first_symbols
<
'"'
>
(
pos
,
in
.
buffer
().
end
());
pos
=
find_first_symbols
<
'"'
>
(
pos
,
in
.
buffer
().
end
());
if
(
pos
==
in
.
buffer
().
end
())
if
(
pos
>
in
.
buffer
().
end
())
throw
Exception
(
"Position in buffer is out of bounds. There must be a bug."
,
ErrorCodes
::
LOGICAL_ERROR
);
else
if
(
pos
==
in
.
buffer
().
end
())
continue
;
continue
;
if
(
*
pos
==
'"'
)
else
if
(
*
pos
==
'"'
)
{
{
++
pos
;
++
pos
;
if
(
loadAtPosition
(
in
,
memory
,
pos
)
&&
*
pos
==
'"'
)
if
(
loadAtPosition
(
in
,
memory
,
pos
)
&&
*
pos
==
'"'
)
...
@@ -450,9 +453,11 @@ static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB
...
@@ -450,9 +453,11 @@ static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB
else
else
{
{
pos
=
find_first_symbols
<
'"'
,
'\r'
,
'\n'
>
(
pos
,
in
.
buffer
().
end
());
pos
=
find_first_symbols
<
'"'
,
'\r'
,
'\n'
>
(
pos
,
in
.
buffer
().
end
());
if
(
pos
==
in
.
buffer
().
end
())
if
(
pos
>
in
.
buffer
().
end
())
throw
Exception
(
"Position in buffer is out of bounds. There must be a bug."
,
ErrorCodes
::
LOGICAL_ERROR
);
else
if
(
pos
==
in
.
buffer
().
end
())
continue
;
continue
;
if
(
*
pos
==
'"'
)
else
if
(
*
pos
==
'"'
)
{
{
quotes
=
true
;
quotes
=
true
;
++
pos
;
++
pos
;
...
...
src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
浏览文件 @
cb2beb47
...
@@ -11,6 +11,7 @@ namespace ErrorCodes
...
@@ -11,6 +11,7 @@ namespace ErrorCodes
{
{
extern
const
int
INCORRECT_DATA
;
extern
const
int
INCORRECT_DATA
;
extern
const
int
BAD_ARGUMENTS
;
extern
const
int
BAD_ARGUMENTS
;
extern
const
int
LOGICAL_ERROR
;
}
}
RegexpRowInputFormat
::
RegexpRowInputFormat
(
RegexpRowInputFormat
::
RegexpRowInputFormat
(
...
@@ -182,7 +183,9 @@ static std::pair<bool, size_t> fileSegmentationEngineRegexpImpl(ReadBuffer & in,
...
@@ -182,7 +183,9 @@ static std::pair<bool, size_t> fileSegmentationEngineRegexpImpl(ReadBuffer & in,
while
(
loadAtPosition
(
in
,
memory
,
pos
)
&&
need_more_data
)
while
(
loadAtPosition
(
in
,
memory
,
pos
)
&&
need_more_data
)
{
{
pos
=
find_first_symbols
<
'\n'
,
'\r'
>
(
pos
,
in
.
buffer
().
end
());
pos
=
find_first_symbols
<
'\n'
,
'\r'
>
(
pos
,
in
.
buffer
().
end
());
if
(
pos
==
in
.
buffer
().
end
())
if
(
pos
>
in
.
buffer
().
end
())
throw
Exception
(
"Position in buffer is out of bounds. There must be a bug."
,
ErrorCodes
::
LOGICAL_ERROR
);
else
if
(
pos
==
in
.
buffer
().
end
())
continue
;
continue
;
// Support DOS-style newline ("\r\n")
// Support DOS-style newline ("\r\n")
...
...
src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
浏览文件 @
cb2beb47
...
@@ -15,6 +15,7 @@ namespace DB
...
@@ -15,6 +15,7 @@ namespace DB
namespace
ErrorCodes
namespace
ErrorCodes
{
{
extern
const
int
INCORRECT_DATA
;
extern
const
int
INCORRECT_DATA
;
extern
const
int
LOGICAL_ERROR
;
}
}
...
@@ -433,10 +434,11 @@ static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer
...
@@ -433,10 +434,11 @@ static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer
{
{
pos
=
find_first_symbols
<
'\\'
,
'\r'
,
'\n'
>
(
pos
,
in
.
buffer
().
end
());
pos
=
find_first_symbols
<
'\\'
,
'\r'
,
'\n'
>
(
pos
,
in
.
buffer
().
end
());
if
(
pos
==
in
.
buffer
().
end
())
if
(
pos
>
in
.
buffer
().
end
())
throw
Exception
(
"Position in buffer is out of bounds. There must be a bug."
,
ErrorCodes
::
LOGICAL_ERROR
);
else
if
(
pos
==
in
.
buffer
().
end
())
continue
;
continue
;
else
if
(
*
pos
==
'\\'
)
if
(
*
pos
==
'\\'
)
{
{
++
pos
;
++
pos
;
if
(
loadAtPosition
(
in
,
memory
,
pos
))
if
(
loadAtPosition
(
in
,
memory
,
pos
))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录