Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
d3ee3a69
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
d3ee3a69
编写于
5月 08, 2019
作者:
A
Alexander Tokmakov
提交者:
Alexander Tokmakov
9月 02, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Batch insert Values with expressions (draft)
上级
804c911d
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
306 addition
and
33 deletion
+306
-33
dbms/src/Common/ErrorCodes.cpp
dbms/src/Common/ErrorCodes.cpp
+3
-0
dbms/src/Formats/ConstantExpressionTemplate.cpp
dbms/src/Formats/ConstantExpressionTemplate.cpp
+166
-0
dbms/src/Formats/ConstantExpressionTemplate.h
dbms/src/Formats/ConstantExpressionTemplate.h
+35
-0
dbms/src/Formats/ValuesBlockInputStream.cpp
dbms/src/Formats/ValuesBlockInputStream.cpp
+94
-33
dbms/src/Formats/ValuesBlockInputStream.h
dbms/src/Formats/ValuesBlockInputStream.h
+8
-0
未找到文件。
dbms/src/Common/ErrorCodes.cpp
浏览文件 @
d3ee3a69
...
...
@@ -448,6 +448,9 @@ namespace ErrorCodes
extern
const
int
SETTINGS_ARE_NOT_SUPPORTED
=
471
;
extern
const
int
READONLY_SETTING
=
472
;
extern
const
int
INVALID_TEMPLATE_FORMAT
=
473
;
extern
const
int
CANNOT_CREATE_EXPRESSION_TEMPLATE
=
474
;
extern
const
int
CANNOT_PARSE_EXPRESSION_USING_TEMPLATE
=
475
;
extern
const
int
CANNOT_EVALUATE_EXPRESSION_TEMPLATE
=
476
;
extern
const
int
KEEPER_EXCEPTION
=
999
;
extern
const
int
POCO_EXCEPTION
=
1000
;
...
...
dbms/src/Formats/ConstantExpressionTemplate.cpp
0 → 100644
浏览文件 @
d3ee3a69
#include <DataTypes/DataTypeFactory.h>
#include <Formats/BlockInputStreamFromRowInputStream.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/SyntaxAnalyzer.h>
#include <IO/ReadHelpers.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Formats/ConstantExpressionTemplate.h>
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/ExpressionListParsers.h>
#include <DataTypes/DataTypesNumber.h>
namespace
DB
{
namespace
ErrorCodes
{
extern
const
int
CANNOT_CREATE_EXPRESSION_TEMPLATE
;
extern
const
int
CANNOT_PARSE_EXPRESSION_USING_TEMPLATE
;
extern
const
int
CANNOT_EVALUATE_EXPRESSION_TEMPLATE
;
}
ConstantExpressionTemplate
::
ConstantExpressionTemplate
(
const
IDataType
&
result_column_type
,
TokenIterator
begin
,
TokenIterator
end
,
const
Context
&
context
)
{
std
::
pair
<
String
,
NamesAndTypesList
>
expr_template
=
replaceLiteralsWithDummyIdentifiers
(
begin
,
end
);
for
(
const
auto
&
col
:
expr_template
.
second
)
literals
.
insert
({
nullptr
,
col
.
type
,
col
.
name
});
columns
=
literals
.
cloneEmptyColumns
();
ParserExpression
parser
;
Expected
expected
;
Tokens
template_tokens
(
expr_template
.
first
.
data
(),
expr_template
.
first
.
data
()
+
expr_template
.
first
.
size
());
TokenIterator
token_iterator1
(
template_tokens
);
ASTPtr
ast_template
;
if
(
!
parser
.
parse
(
token_iterator1
,
ast_template
,
expected
))
throw
Exception
(
"Cannot parse template after replacing literals: "
,
ErrorCodes
::
CANNOT_CREATE_EXPRESSION_TEMPLATE
);
addNodesToCastResult
(
result_column_type
,
ast_template
);
result_column_name
=
ast_template
->
getColumnName
();
auto
syntax_result
=
SyntaxAnalyzer
(
context
).
analyze
(
ast_template
,
expr_template
.
second
);
actions_on_literals
=
ExpressionAnalyzer
(
ast_template
,
syntax_result
,
context
).
getActions
(
false
);
}
void
ConstantExpressionTemplate
::
parseExpression
(
ReadBuffer
&
istr
,
const
FormatSettings
&
settings
)
{
size_t
cur_column
=
0
;
try
{
size_t
cur_token
=
0
;
while
(
cur_column
<
literals
.
columns
())
{
size_t
skip_tokens_until
=
token_after_literal_idx
[
cur_column
];
while
(
cur_token
<
skip_tokens_until
)
{
// TODO skip comments
skipWhitespaceIfAny
(
istr
);
assertString
(
tokens
[
cur_token
++
],
istr
);
}
skipWhitespaceIfAny
(
istr
);
const
IDataType
&
type
=
*
literals
.
getByPosition
(
cur_column
).
type
;
type
.
deserializeAsTextQuoted
(
*
columns
[
cur_column
],
istr
,
settings
);
++
cur_column
;
}
while
(
cur_token
<
tokens
.
size
())
{
skipWhitespaceIfAny
(
istr
);
assertString
(
tokens
[
cur_token
++
],
istr
);
}
}
catch
(
DB
::
Exception
&
e
)
{
for
(
size_t
i
=
0
;
i
<
cur_column
;
++
i
)
columns
[
i
]
->
popBack
(
1
);
if
(
!
isParseError
(
e
.
code
()))
throw
;
}
throw
DB
::
Exception
(
"Cannot parse expression using template"
,
ErrorCodes
::
CANNOT_PARSE_EXPRESSION_USING_TEMPLATE
);
}
ColumnPtr
ConstantExpressionTemplate
::
evaluateAll
()
{
Block
evaluated
=
literals
.
cloneWithColumns
(
std
::
move
(
columns
));
columns
=
literals
.
cloneEmptyColumns
();
actions_on_literals
->
execute
(
evaluated
);
if
(
!
evaluated
||
evaluated
.
rows
()
==
0
)
throw
Exception
(
"Logical error: empty block after evaluation of batch of constant expressions"
,
ErrorCodes
::
LOGICAL_ERROR
);
if
(
!
evaluated
.
has
(
result_column_name
))
throw
Exception
(
"Cannot evaluate template "
+
result_column_name
+
", block structure:
\n
"
+
evaluated
.
dumpStructure
(),
ErrorCodes
::
CANNOT_EVALUATE_EXPRESSION_TEMPLATE
);
return
evaluated
.
getByName
(
result_column_name
).
column
;
}
std
::
pair
<
String
,
NamesAndTypesList
>
ConstantExpressionTemplate
::
replaceLiteralsWithDummyIdentifiers
(
TokenIterator
&
begin
,
TokenIterator
&
end
)
{
NamesAndTypesList
dummy_columns
;
ParserLiteral
parser
;
String
result
;
size_t
token_idx
=
0
;
while
(
begin
!=
end
)
{
const
Token
&
t
=
*
begin
;
if
(
t
.
isError
())
throw
DB
::
Exception
(
"Error in tokens"
,
ErrorCodes
::
CANNOT_CREATE_EXPRESSION_TEMPLATE
);
// TODO don't convert constant string arguments of functions such as CAST(x, 'type')
// TODO process Array as one literal to make possible parsing constant arrays of different size
if
(
t
.
type
==
TokenType
::
Number
||
t
.
type
==
TokenType
::
StringLiteral
)
{
Expected
expected
;
ASTPtr
ast
;
if
(
!
parser
.
parse
(
begin
,
ast
,
expected
))
throw
DB
::
Exception
(
"Cannot determine literal type"
,
ErrorCodes
::
CANNOT_CREATE_EXPRESSION_TEMPLATE
);
// TODO use nullable type if necessary (e.g. value is not NULL, but result_column_type is nullable and next rows may contain NULLs)
// TODO parse numbers more carefully: sign is a separate token before number
Field
&
value
=
ast
->
as
<
ASTLiteral
&>
().
value
;
DataTypePtr
type
=
DataTypeFactory
::
instance
().
get
(
value
.
getTypeName
());
// TODO ensure dummy_col_name is unique (there was no _dummy_x identifier in expression)
String
dummy_col_name
=
"_dummy_"
+
std
::
to_string
(
dummy_columns
.
size
());
dummy_columns
.
push_back
(
NameAndTypePair
(
dummy_col_name
,
type
));
token_after_literal_idx
.
push_back
(
token_idx
);
result
.
append
(
dummy_col_name
);
}
else
{
tokens
.
emplace_back
(
t
.
begin
,
t
.
size
());
result
.
append
(
tokens
.
back
());
++
begin
;
++
token_idx
;
}
result
.
append
(
" "
);
}
if
(
dummy_columns
.
empty
())
// TODO
throw
DB
::
Exception
(
"not implemented yet"
,
ErrorCodes
::
CANNOT_CREATE_EXPRESSION_TEMPLATE
);
return
std
::
make_pair
(
result
,
dummy_columns
);
}
void
ConstantExpressionTemplate
::
addNodesToCastResult
(
const
IDataType
&
result_column_type
,
ASTPtr
&
expr
)
{
auto
result_type
=
std
::
make_shared
<
ASTLiteral
>
(
result_column_type
.
getName
());
auto
arguments
=
std
::
make_shared
<
ASTExpressionList
>
();
arguments
->
children
.
push_back
(
std
::
move
(
expr
));
arguments
->
children
.
push_back
(
std
::
move
(
result_type
));
auto
cast
=
std
::
make_shared
<
ASTFunction
>
();
cast
->
name
=
"CAST"
;
cast
->
arguments
=
std
::
move
(
arguments
);
cast
->
children
.
push_back
(
cast
->
arguments
);
expr
=
std
::
move
(
cast
);
}
}
dbms/src/Formats/ConstantExpressionTemplate.h
0 → 100644
浏览文件 @
d3ee3a69
#pragma once
#include <Core/Block.h>
#include <Interpreters/ExpressionActions.h>
#include <Formats/FormatSettings.h>
#include <Parsers/TokenIterator.h>
namespace
DB
{
class
ConstantExpressionTemplate
{
public:
ConstantExpressionTemplate
(
const
IDataType
&
result_column_type
,
TokenIterator
begin
,
TokenIterator
end
,
const
Context
&
context
);
void
parseExpression
(
ReadBuffer
&
istr
,
const
FormatSettings
&
settings
);
ColumnPtr
evaluateAll
();
private:
std
::
pair
<
String
,
NamesAndTypesList
>
replaceLiteralsWithDummyIdentifiers
(
TokenIterator
&
begin
,
TokenIterator
&
end
);
static
void
addNodesToCastResult
(
const
IDataType
&
result_column_type
,
ASTPtr
&
expr
);
private:
std
::
vector
<
String
>
tokens
;
std
::
vector
<
size_t
>
token_after_literal_idx
;
String
result_column_name
;
ExpressionActionsPtr
actions_on_literals
;
Block
literals
;
MutableColumns
columns
;
};
}
dbms/src/Formats/ValuesBlockInputStream.cpp
浏览文件 @
d3ee3a69
...
...
@@ -26,6 +26,9 @@ namespace ErrorCodes
extern
const
int
CANNOT_PARSE_DATE
;
extern
const
int
SYNTAX_ERROR
;
extern
const
int
VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE
;
extern
const
int
CANNOT_CREATE_EXPRESSION_TEMPLATE
;
extern
const
int
CANNOT_PARSE_EXPRESSION_USING_TEMPLATE
;
extern
const
int
CANNOT_EVALUATE_EXPRESSION_TEMPLATE
;
}
...
...
@@ -34,6 +37,7 @@ ValuesBlockInputStream::ValuesBlockInputStream(ReadBuffer & istr_, const Block &
:
istr
(
istr_
),
header
(
header_
),
context
(
std
::
make_unique
<
Context
>
(
context_
)),
format_settings
(
format_settings
),
max_block_size
(
max_block_size_
)
{
templates
.
resize
(
header
.
columns
());
/// In this format, BOM at beginning of stream cannot be confused with value, so it is safe to skip it.
skipBOMIfExists
(
istr
);
}
...
...
@@ -52,7 +56,6 @@ bool ValuesBlockInputStream::read(MutableColumns & columns)
* But as an exception, it also supports processing arbitrary expressions instead of values.
* This is very inefficient. But if there are no expressions, then there is no overhead.
*/
ParserExpression
parser
;
assertChar
(
'('
,
istr
);
...
...
@@ -66,8 +69,16 @@ bool ValuesBlockInputStream::read(MutableColumns & columns)
bool
rollback_on_exception
=
false
;
try
{
header
.
getByPosition
(
i
).
type
->
deserializeAsTextQuoted
(
*
columns
[
i
],
istr
,
format_settings
);
rollback_on_exception
=
true
;
if
(
templates
[
i
])
{
templates
[
i
].
value
().
parseExpression
(
istr
,
format_settings
);
}
else
{
header
.
getByPosition
(
i
).
type
->
deserializeAsTextQuoted
(
*
columns
[
i
],
istr
,
format_settings
);
rollback_on_exception
=
true
;
}
skipWhitespaceIfAny
(
istr
);
if
(
i
!=
num_columns
-
1
)
...
...
@@ -89,7 +100,8 @@ bool ValuesBlockInputStream::read(MutableColumns & columns)
||
e
.
code
()
==
ErrorCodes
::
CANNOT_PARSE_NUMBER
||
e
.
code
()
==
ErrorCodes
::
CANNOT_PARSE_DATE
||
e
.
code
()
==
ErrorCodes
::
CANNOT_PARSE_DATETIME
||
e
.
code
()
==
ErrorCodes
::
CANNOT_READ_ARRAY_FROM_TEXT
)
||
e
.
code
()
==
ErrorCodes
::
CANNOT_READ_ARRAY_FROM_TEXT
||
e
.
code
()
==
ErrorCodes
::
CANNOT_PARSE_EXPRESSION_USING_TEMPLATE
)
{
/// TODO Case when the expression does not fit entirely in the buffer.
...
...
@@ -100,35 +112,19 @@ bool ValuesBlockInputStream::read(MutableColumns & columns)
if
(
rollback_on_exception
)
columns
[
i
]
->
popBack
(
1
);
const
IDataType
&
type
=
*
header
.
getByPosition
(
i
).
type
;
Expected
expected
;
Tokens
tokens
(
prev_istr_position
,
istr
.
buffer
().
end
());
TokenIterator
token_iterator
(
tokens
);
ASTPtr
ast
;
if
(
!
parser
.
parse
(
token_iterator
,
ast
,
expected
))
throw
Exception
(
"Cannot parse expression of type "
+
type
.
getName
()
+
" here: "
+
String
(
prev_istr_position
,
std
::
min
(
SHOW_CHARS_ON_SYNTAX_ERROR
,
istr
.
buffer
().
end
()
-
prev_istr_position
)),
ErrorCodes
::
SYNTAX_ERROR
);
istr
.
position
()
=
const_cast
<
char
*>
(
token_iterator
->
begin
);
std
::
pair
<
Field
,
DataTypePtr
>
value_raw
=
evaluateConstantExpression
(
ast
,
*
context
);
Field
value
=
convertFieldToType
(
value_raw
.
first
,
type
,
value_raw
.
second
.
get
());
/// Check that we are indeed allowed to insert a NULL.
if
(
value
.
isNull
())
// TODO read(MutableColumns & columns) should not know number of rows in block an should not assign to columns
if
(
likely
(
rows_in_block
))
{
if
(
!
type
.
isNullable
())
throw
Exception
{
"Expression returns value "
+
applyVisitor
(
FieldVisitorToString
(),
value
)
+
", that is out of range of type "
+
type
.
getName
()
+
", at: "
+
String
(
prev_istr_position
,
std
::
min
(
SHOW_CHARS_ON_SYNTAX_ERROR
,
istr
.
buffer
().
end
()
-
prev_istr_position
)),
ErrorCodes
::
VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE
};
if
(
e
.
code
()
==
ErrorCodes
::
CANNOT_PARSE_EXPRESSION_USING_TEMPLATE
)
{
/// Expression in the current row is not match generated on the first row template.
/// Evaluate expressions, which were parsed using this template.
columns
[
i
]
=
std
::
move
(
*
templates
[
i
].
value
().
evaluateAll
()).
mutate
();
/// And do not use the template anymore.
templates
[
i
].
reset
();
}
}
columns
[
i
]
->
insert
(
value
);
parseExpression
(
prev_istr_position
,
columns
,
i
,
rows_in_block
==
0
);
skipWhitespaceIfAny
(
istr
);
...
...
@@ -153,13 +149,13 @@ Block ValuesBlockInputStream::readImpl()
{
MutableColumns
columns
=
header
.
cloneEmptyColumns
();
for
(
size_t
rows
=
0
;
rows
<
max_block_size
;
++
rows
)
for
(
rows_in_block
=
0
;
rows_in_block
<
max_block_size
;
++
rows_in_block
)
{
try
{
++
total_rows
;
if
(
!
read
(
columns
))
break
;
++
total_rows
;
}
catch
(
Exception
&
e
)
{
...
...
@@ -169,12 +165,77 @@ Block ValuesBlockInputStream::readImpl()
}
}
/// Evaluate expressions, which were parsed using this template, if any
for
(
size_t
i
=
0
;
i
<
columns
.
size
();
++
i
)
{
if
(
templates
[
i
])
{
columns
[
i
]
=
std
::
move
(
*
templates
[
i
].
value
().
evaluateAll
()).
mutate
();
templates
[
i
].
reset
();
}
}
if
(
columns
.
empty
()
||
columns
[
0
]
->
empty
())
return
{};
return
header
.
cloneWithColumns
(
std
::
move
(
columns
));
}
Field
ValuesBlockInputStream
::
parseExpression
(
char
*
prev_istr_position
,
MutableColumns
&
columns
,
size_t
column_idx
,
bool
generate_template
)
{
const
IDataType
&
type
=
*
header
.
getByPosition
(
column_idx
).
type
;
Expected
expected
;
Tokens
tokens
(
prev_istr_position
,
istr
.
buffer
().
end
());
TokenIterator
token_iterator
(
tokens
);
ASTPtr
ast
;
if
(
!
parser
.
parse
(
token_iterator
,
ast
,
expected
))
throw
Exception
(
"Cannot parse expression of type "
+
type
.
getName
()
+
" here: "
+
String
(
prev_istr_position
,
std
::
min
(
SHOW_CHARS_ON_SYNTAX_ERROR
,
istr
.
buffer
().
end
()
-
prev_istr_position
)),
ErrorCodes
::
SYNTAX_ERROR
);
istr
.
position
()
=
const_cast
<
char
*>
(
token_iterator
->
begin
);
std
::
pair
<
Field
,
DataTypePtr
>
value_raw
=
evaluateConstantExpression
(
ast
,
*
context
);
Field
value
=
convertFieldToType
(
value_raw
.
first
,
type
,
value_raw
.
second
.
get
());
/// Check that we are indeed allowed to insert a NULL.
if
(
value
.
isNull
())
{
if
(
!
type
.
isNullable
())
throw
Exception
{
"Expression returns value "
+
applyVisitor
(
FieldVisitorToString
(),
value
)
+
", that is out of range of type "
+
type
.
getName
()
+
", at: "
+
String
(
prev_istr_position
,
std
::
min
(
SHOW_CHARS_ON_SYNTAX_ERROR
,
istr
.
buffer
().
end
()
-
prev_istr_position
)),
ErrorCodes
::
VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE
};
}
if
(
generate_template
)
{
try
{
templates
[
column_idx
]
=
ConstantExpressionTemplate
(
type
,
TokenIterator
(
tokens
),
token_iterator
,
*
context
);
istr
.
position
()
=
prev_istr_position
;
templates
[
column_idx
].
value
().
parseExpression
(
istr
,
format_settings
);
}
catch
(
DB
::
Exception
&
)
{
/// Continue parsing without template
templates
[
column_idx
].
reset
();
columns
[
column_idx
]
->
insert
(
value
);
istr
.
position
()
=
const_cast
<
char
*>
(
token_iterator
->
begin
);
}
}
else
{
columns
[
column_idx
]
->
insert
(
value
);
}
return
value
;
}
void
registerInputFormatValues
(
FormatFactory
&
factory
)
{
...
...
dbms/src/Formats/ValuesBlockInputStream.h
浏览文件 @
d3ee3a69
...
...
@@ -3,6 +3,7 @@
#include <Core/Block.h>
#include <DataStreams/IBlockInputStream.h>
#include <Formats/FormatSettings.h>
#include <Formats/ConstantExpressionTemplate.h>
namespace
DB
...
...
@@ -33,15 +34,22 @@ public:
bool
read
(
MutableColumns
&
columns
);
private:
typedef
std
::
vector
<
std
::
optional
<
ConstantExpressionTemplate
>>
ConstantExpressionTemplates
;
Block
readImpl
()
override
;
Field
parseExpression
(
char
*
prev_istr_position
,
MutableColumns
&
columns
,
size_t
column_idx
,
bool
generate_template
);
private:
ReadBuffer
&
istr
;
Block
header
;
std
::
unique_ptr
<
Context
>
context
;
/// pimpl
const
FormatSettings
format_settings
;
UInt64
max_block_size
;
UInt64
rows_in_block
=
0
;
size_t
total_rows
=
0
;
ParserExpression
parser
;
ConstantExpressionTemplates
templates
;
};
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录