Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
405a6fb0
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
405a6fb0
编写于
8月 17, 2020
作者:
H
hexiaoting
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
New feature: LineAsString format. #13630
上级
17eb8d24
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
135 addition
and
0 deletion
+135
-0
src/Formats/FormatFactory.cpp
src/Formats/FormatFactory.cpp
+1
-0
src/Formats/FormatFactory.h
src/Formats/FormatFactory.h
+1
-0
src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
+101
-0
src/Processors/Formats/Impl/LineAsStringRowInputFormat.h
src/Processors/Formats/Impl/LineAsStringRowInputFormat.h
+31
-0
src/Processors/ya.make
src/Processors/ya.make
+1
-0
未找到文件。
src/Formats/FormatFactory.cpp
浏览文件 @
405a6fb0
...
...
@@ -365,6 +365,7 @@ FormatFactory::FormatFactory()
registerInputFormatProcessorMsgPack
(
*
this
);
registerOutputFormatProcessorMsgPack
(
*
this
);
registerInputFormatProcessorJSONAsString
(
*
this
);
registerInputFormatProcessorLineAsString
(
*
this
);
registerFileSegmentationEngineTabSeparated
(
*
this
);
registerFileSegmentationEngineCSV
(
*
this
);
...
...
src/Formats/FormatFactory.h
浏览文件 @
405a6fb0
...
...
@@ -210,5 +210,6 @@ void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory);
void
registerInputFormatProcessorCapnProto
(
FormatFactory
&
factory
);
void
registerInputFormatProcessorRegexp
(
FormatFactory
&
factory
);
void
registerInputFormatProcessorJSONAsString
(
FormatFactory
&
factory
);
void
registerInputFormatProcessorLineAsString
(
FormatFactory
&
factory
);
}
src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
0 → 100644
浏览文件 @
405a6fb0
#include <Processors/Formats/Impl/LineAsStringRowInputFormat.h>
#include <Formats/JSONEachRowUtils.h>
#include <common/find_symbols.h>
#include <IO/ReadHelpers.h>
namespace
DB
{
namespace
ErrorCodes
{
extern
const
int
LOGICAL_ERROR
;
extern
const
int
INCORRECT_DATA
;
}
LineAsStringRowInputFormat
::
LineAsStringRowInputFormat
(
const
Block
&
header_
,
ReadBuffer
&
in_
,
Params
params_
)
:
IRowInputFormat
(
header_
,
in_
,
std
::
move
(
params_
)),
buf
(
in
)
{
if
(
header_
.
columns
()
>
1
||
header_
.
getDataTypes
()[
0
]
->
getTypeId
()
!=
TypeIndex
::
String
)
{
throw
Exception
(
"This input format is only suitable for tables with a single column of type String."
,
ErrorCodes
::
LOGICAL_ERROR
);
}
}
void
LineAsStringRowInputFormat
::
resetParser
()
{
IRowInputFormat
::
resetParser
();
buf
.
reset
();
}
void
LineAsStringRowInputFormat
::
readLineObject
(
IColumn
&
column
)
{
PeekableReadBufferCheckpoint
checkpoint
{
buf
};
size_t
balance
=
0
;
if
(
*
buf
.
position
()
!=
'"'
)
throw
Exception
(
"Line object must begin with '
\"
'."
,
ErrorCodes
::
INCORRECT_DATA
);
++
buf
.
position
();
++
balance
;
char
*
pos
;
while
(
balance
)
{
if
(
buf
.
eof
())
throw
Exception
(
"Unexpected end of file while parsing Line object."
,
ErrorCodes
::
INCORRECT_DATA
);
pos
=
find_last_symbols_or_null
<
'"'
,
'\\'
>
(
buf
.
position
(),
buf
.
buffer
().
end
());
buf
.
position
()
=
pos
;
if
(
buf
.
position
()
==
buf
.
buffer
().
end
())
continue
;
else
if
(
*
buf
.
position
()
==
'"'
)
{
--
balance
;
++
buf
.
position
();
}
else
if
(
*
buf
.
position
()
==
'\\'
)
{
++
buf
.
position
();
if
(
!
buf
.
eof
())
{
++
buf
.
position
();
}
}
}
buf
.
makeContinuousMemoryFromCheckpointToPos
();
char
*
end
=
buf
.
position
();
buf
.
rollbackToCheckpoint
();
column
.
insertData
(
buf
.
position
(),
end
-
buf
.
position
());
buf
.
position
()
=
end
;
}
bool
LineAsStringRowInputFormat
::
readRow
(
MutableColumns
&
columns
,
RowReadExtension
&
)
{
skipWhitespaceIfAny
(
buf
);
if
(
!
buf
.
eof
())
readLineObject
(
*
columns
[
0
]);
skipWhitespaceIfAny
(
buf
);
if
(
!
buf
.
eof
()
&&
*
buf
.
position
()
==
','
)
++
buf
.
position
();
skipWhitespaceIfAny
(
buf
);
return
!
buf
.
eof
();
}
void
registerInputFormatProcessorLineAsString
(
FormatFactory
&
factory
)
{
factory
.
registerInputFormatProcessor
(
"LineAsString"
,
[](
ReadBuffer
&
buf
,
const
Block
&
sample
,
const
RowInputFormatParams
&
params
,
const
FormatSettings
&
)
{
return
std
::
make_shared
<
LineAsStringRowInputFormat
>
(
sample
,
buf
,
params
);
});
}
}
src/Processors/Formats/Impl/LineAsStringRowInputFormat.h
0 → 100644
浏览文件 @
405a6fb0
#pragma once
#include <Processors/Formats/IRowInputFormat.h>
#include <Formats/FormatFactory.h>
#include <IO/PeekableReadBuffer.h>
namespace
DB
{
class
ReadBuffer
;
/// This format parses a sequence of Line objects separated by newlines, spaces and/or comma.
/// Each Line object is parsed as a whole to string.
/// This format can only parse a table with single field of type String.
class
LineAsStringRowInputFormat
:
public
IRowInputFormat
{
public:
LineAsStringRowInputFormat
(
const
Block
&
header_
,
ReadBuffer
&
in_
,
Params
params_
);
bool
readRow
(
MutableColumns
&
columns
,
RowReadExtension
&
ext
)
override
;
String
getName
()
const
override
{
return
"LineAsStringRowInputFormat"
;
}
void
resetParser
()
override
;
private:
void
readLineObject
(
IColumn
&
column
);
PeekableReadBuffer
buf
;
};
}
src/Processors/ya.make
浏览文件 @
405a6fb0
...
...
@@ -23,6 +23,7 @@ SRCS(
Formats/Impl/ConstantExpressionTemplate.cpp
Formats/Impl/CSVRowInputFormat.cpp
Formats/Impl/CSVRowOutputFormat.cpp
Formats/Impl/LineAsStringRowInputFormat.cpp
Formats/Impl/JSONAsStringRowInputFormat.cpp
Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录