Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
629cb44d
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
629cb44d
编写于
1月 28, 2021
作者:
A
Alexander Kuzmenkov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
everything was wrong
上级
0d69249c
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
124 addition
and
219 deletion
+124
-219
src/Parsers/ExpressionElementParsers.cpp
src/Parsers/ExpressionElementParsers.cpp
+1
-1
src/Processors/Transforms/WindowTransform.cpp
src/Processors/Transforms/WindowTransform.cpp
+108
-196
src/Processors/Transforms/WindowTransform.h
src/Processors/Transforms/WindowTransform.h
+15
-22
未找到文件。
src/Parsers/ExpressionElementParsers.cpp
浏览文件 @
629cb44d
...
...
@@ -577,7 +577,7 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
}
}
if
(
node
->
frame
!=
WindowFrame
{}
)
if
(
!
(
node
->
frame
==
WindowFrame
{})
)
{
node
->
frame
.
is_default
=
false
;
}
...
...
src/Processors/Transforms/WindowTransform.cpp
浏览文件 @
629cb44d
...
...
@@ -165,97 +165,103 @@ void WindowTransform::advancePartitionEnd()
partition_etalon
=
RowNumber
{
block_number
,
block_rows
-
1
};
}
void
WindowTransform
::
advanceGroupEnd
()
void
WindowTransform
::
advanceFrameStart
()
{
// Frame start is always UNBOUNDED PRECEDING for now, so we don't have to
// move it. It is initialized when the new partition starts.
}
bool
WindowTransform
::
arePeers
(
const
RowNumber
&
x
,
const
RowNumber
&
y
)
const
{
if
(
group_ended
)
if
(
x
==
y
)
{
return
;
// For convenience, a row is always its own peer.
return
true
;
}
switch
(
window_description
.
frame
.
type
)
if
(
window_description
.
frame
.
type
==
WindowFrame
::
FrameType
::
Rows
)
{
case
WindowFrame
::
FrameType
::
Range
:
case
WindowFrame
::
FrameType
::
Groups
:
advanceGroupEndOrderBy
();
break
;
case
WindowFrame
::
FrameType
::
Rows
:
advanceGroupEndTrivial
();
break
;
// For ROWS frame, row is only peers with itself (checked above);
return
false
;
}
}
void
WindowTransform
::
advanceGroupEndTrivial
()
{
// ROWS mode, peer groups always contains only the current row.
// We cannot advance the groups if the group start is already beyond the
// end of partition.
assert
(
group_start
<
partition_end
);
group_end
=
group_start
;
advanceRowNumber
(
group_end
);
group_ended
=
true
;
}
void
WindowTransform
::
advanceGroupEndOrderBy
()
{
// For RANGE frame, rows that compare equal w/ORDER BY are peers.
assert
(
window_description
.
frame
.
type
==
WindowFrame
::
FrameType
::
Range
);
const
size_t
n
=
order_by_indices
.
size
();
if
(
n
==
0
)
{
// No ORDER BY, so all rows are the same group. The group will end
// with the partition.
group_end
=
partition_end
;
group_ended
=
partition_ended
;
// No ORDER BY, so all rows are peers.
return
true
;
}
// `partition_end` is either end of partition or end of data.
for
(;
group_end
<
partition_end
;
advanceRowNumber
(
group_end
)
)
size_t
i
=
0
;
for
(;
i
<
n
;
i
++
)
{
// Check for group end.
size_t
i
=
0
;
for
(;
i
<
n
;
i
++
)
const
auto
*
column_x
=
inputAt
(
x
)[
order_by_indices
[
i
]].
get
();
const
auto
*
column_y
=
inputAt
(
y
)[
order_by_indices
[
i
]].
get
();
if
(
column_x
->
compareAt
(
x
.
row
,
y
.
row
,
*
column_y
,
1
/* nan_direction_hint */
)
!=
0
)
{
const
auto
*
ref
=
inputAt
(
group_start
)[
order_by_indices
[
i
]].
get
();
const
auto
*
c
=
inputAt
(
group_end
)[
order_by_indices
[
i
]].
get
();
if
(
c
->
compareAt
(
group_end
.
row
,
group_start
.
row
,
*
ref
,
1
/* nan_direction_hint */
)
!=
0
)
{
break
;
}
return
false
;
}
}
if
(
i
<
n
)
return
true
;
}
void
WindowTransform
::
advanceFrameEndCurrentRow
()
{
// We only process one block here, and frame_end must be already in it: if
// we didn't find the end in the previous block, frame_end is now the first
// row of the current block. We need this knowledge to write a simpler loop
// (only loop over rows and not over blocks), that should hopefully be more
// efficient.
// partition_end is either in this new block or past-the-end.
assert
(
frame_end
.
block
==
partition_end
.
block
||
frame_end
.
block
+
1
==
partition_end
.
block
);
if
(
frame_end
==
partition_end
)
{
// The case when we get a new block and find out that the partition has
// ended.
assert
(
partition_ended
);
frame_ended
=
partition_ended
;
return
;
}
const
auto
block_rows
=
blockRowsNumber
(
frame_end
);
// We could retreat the frame_end here, but for some reason I am reluctant
// to do this... It would have better data locality.
auto
reference
=
current_row
;
for
(;
frame_end
.
row
<
block_rows
;
++
frame_end
.
row
)
{
if
(
!
arePeers
(
reference
,
frame_end
))
{
group_ended
=
true
;
//fmt::print(stderr, "{} and {} don't match\n", reference, frame_end);
frame_ended
=
true
;
return
;
}
reference
=
frame_end
;
}
assert
(
group_end
==
partition_end
);
if
(
partition_ended
)
{
// A corner case -- the ORDER BY columns were the same, but the group
// still ended because the partition has ended.
group_ended
=
true
;
}
}
// Got to the end of current block, have to properly update the row number.
++
frame_end
.
block
;
frame_end
.
row
=
0
;
void
WindowTransform
::
advanceFrameStart
()
{
// Frame start is always UNBOUNDED PRECEDING for now, so we don't have to
// move it. It is initialized when the new partition starts.
// Got to the end of partition (frame ended as well then) or end of data.
assert
(
frame_end
==
partition_end
);
frame_ended
=
partition_ended
;
}
void
WindowTransform
::
advanceFrameEnd
()
{
// This should be called when we know the boundaries of the group (probably
// not a fundamental requirement, but currently it's written this way).
assert
(
group_ended
);
// No reason for this function to be called again after it succeeded.
assert
(
!
frame_ended
);
const
auto
frame_end_before
=
frame_end
;
// Frame end is always the current group end, for now.
// In ROWS mode the group is going to contain only the current row.
frame_end
=
group_end
;
frame_ended
=
group_ended
;
// The only frame end we have for now is CURRENT ROW.
advanceFrameEndCurrentRow
();
// Add the columns over which we advanced the frame to the aggregate function
// states.
...
...
@@ -321,13 +327,10 @@ void WindowTransform::advanceFrameEnd()
}
}
void
WindowTransform
::
writeOut
Group
()
void
WindowTransform
::
writeOut
CurrentRow
()
{
// fmt::print(stderr, "write out group [{}..{})\n",
// group_start, group_end);
// Empty groups don't make sense.
assert
(
group_start
<
group_end
);
assert
(
current_row
<
partition_end
);
assert
(
current_row
.
block
>=
first_block_number
);
for
(
size_t
wi
=
0
;
wi
<
workspaces
.
size
();
++
wi
)
{
...
...
@@ -336,93 +339,11 @@ void WindowTransform::writeOutGroup()
const
auto
*
a
=
f
.
aggregate_function
.
get
();
auto
*
buf
=
ws
.
aggregate_function_state
.
data
();
// We'll calculate the value once for the first row in the group, and
// insert its copy for each other row in the group.
IColumn
*
reference_column
=
outputAt
(
group_start
)[
wi
].
get
();
const
size_t
reference_row
=
group_start
.
row
;
IColumn
*
result_column
=
outputAt
(
current_row
)[
wi
].
get
();
// FIXME does it also allocate the result on the arena?
// We'll have to pass it out with blocks then...
a
->
insertResultInto
(
buf
,
*
reference_column
,
arena
.
get
());
// The row we just added to the end of the column must correspond to the
// first row of the group.
assert
(
reference_column
->
size
()
==
reference_row
+
1
);
// fmt::print(stderr, "calculated value of function {} is '{}'\n",
// wi, toString((*reference_column)[reference_row]));
// Now duplicate the calculated value into all other rows.
auto
first_row_to_copy_to
=
group_start
;
advanceRowNumber
(
first_row_to_copy_to
);
// We use two explicit loops here instead of using advanceRowNumber(),
// because we want to batch the inserts per-block.
// Unfortunately this leads to tricky loop conditions, because the
// frame_end might be either a past-the-end block, or a valid block, in
// which case we also have to process its head. We have to avoid stepping
// into the past-the-end block because it might not be valid.
// Moreover, the past-the-end row is not in the past-the-end block, but
// in the block before it.
// And we also have to remember to reset the row number when moving to
// the next block.
uint64_t
past_the_end_block
;
uint64_t
past_the_end_row
;
if
(
group_end
.
row
==
0
)
{
// group_end might not be valid.
past_the_end_block
=
group_end
.
block
;
// Otherwise a group would end at the start of data, this is not
// possible.
assert
(
group_end
.
block
>
0
);
const
size_t
first_valid_block
=
group_end
.
block
-
1
;
assert
(
first_valid_block
>=
first_block_number
);
past_the_end_row
=
blocks
[
first_valid_block
-
first_block_number
]
.
input_columns
[
0
]
->
size
();
}
else
{
past_the_end_block
=
group_end
.
block
+
1
;
past_the_end_row
=
group_end
.
row
;
}
for
(
auto
block_index
=
first_row_to_copy_to
.
block
;
block_index
<
past_the_end_block
;
++
block_index
)
{
const
auto
&
block
=
blocks
[
block_index
-
first_block_number
];
// We process tail of the first block, all rows of intermediate
// blocks, and the head of the last block.
const
auto
block_first_row
=
(
block_index
==
first_row_to_copy_to
.
block
)
?
first_row_to_copy_to
.
row
:
0
;
const
auto
block_last_row
=
((
block_index
+
1
)
==
past_the_end_block
)
?
past_the_end_row
:
block
.
numRows
();
// fmt::print(stderr,
// "group rest [{}, {}), pteb {}, pter {}, cur {}, fr {}, lr {}\n",
// group_start, group_end, past_the_end_block, group_end.row,
// block_index, block_first_row, block_last_row);
// The number of the elements left to insert may be zero, but we must
// notice it on the first block. Other blocks shouldn't be empty,
// because we don't generally have empty block, and advanceRowNumber()
// doesn't generate past-the-end row numbers, so we wouldn't get into
// a block we don't want to process.
if
(
block_first_row
==
block_last_row
)
{
assert
(
block_index
==
first_row_to_copy_to
.
block
);
break
;
}
block
.
output_columns
[
wi
]
->
insertManyFrom
(
*
reference_column
,
reference_row
,
block_last_row
-
block_first_row
);
}
a
->
insertResultInto
(
buf
,
*
result_column
,
arena
.
get
());
}
first_not_ready_row
=
group_end
;
}
void
WindowTransform
::
appendChunk
(
Chunk
&
chunk
)
...
...
@@ -434,6 +355,7 @@ void WindowTransform::appendChunk(Chunk & chunk)
// have it if it's end of data, though.
if
(
!
input_is_finished
)
{
assert
(
chunk
.
hasRows
());
blocks
.
push_back
({});
auto
&
block
=
blocks
.
back
();
block
.
input_columns
=
chunk
.
detachColumns
();
...
...
@@ -470,25 +392,11 @@ void WindowTransform::appendChunk(Chunk & chunk)
assert
(
input_is_finished
);
}
// After that,
advance the peer groups. We can advance peer groups until
//
the end of partition or current end of data, which is precisely the
//
descrip
tion of `partition_end`.
while
(
group_start
<
partition_end
)
// After that,
try to calculate window functions for each next row.
//
We can continue until the end of partition or current end of data,
//
which is precisely the defini
tion of `partition_end`.
while
(
current_row
<
partition_end
)
{
advanceGroupEnd
();
// fmt::print(stderr, "group [{}, {}), {}\n", group_start, group_end,
// group_ended);
if
(
!
group_ended
)
{
// Wait for more input data to find the end of group.
assert
(
!
input_is_finished
);
assert
(
!
partition_ended
);
return
;
}
// The group ended.
// Advance the frame start, updating the state of the aggregate
// functions.
advanceFrameStart
();
...
...
@@ -496,6 +404,9 @@ void WindowTransform::appendChunk(Chunk & chunk)
// functions.
advanceFrameEnd
();
// fmt::print(stderr, "row {} frame [{}, {}) {}\n",
// current_row, frame_start, frame_end, frame_ended);
if
(
!
frame_ended
)
{
// Wait for more input data to find the end of frame.
...
...
@@ -504,16 +415,16 @@ void WindowTransform::appendChunk(Chunk & chunk)
return
;
}
//
Write out the aggregation results
writeOutGroup
(
);
//
The frame shouldn't be empty (probably?).
assert
(
frame_start
<
frame_end
);
// Move to the next group.
// The frame will have to be recalculated.
frame_ended
=
false
;
// Write out the aggregation results.
writeOutCurrentRow
();
// Move to the next group.
group_ended
=
false
;
group_start
=
group_end
;
// Move to the next row. The frame will have to be recalculated.
advanceRowNumber
(
current_row
);
first_not_ready_row
=
current_row
;
frame_ended
=
false
;
}
if
(
input_is_finished
)
...
...
@@ -543,10 +454,7 @@ void WindowTransform::appendChunk(Chunk & chunk)
// for now.
frame_start
=
new_partition_start
;
frame_end
=
new_partition_start
;
group_start
=
new_partition_start
;
group_end
=
new_partition_start
;
// The group pointers are already reset to the partition start, see the
// above loop.
assert
(
current_row
==
new_partition_start
);
// fmt::print(stderr, "reinitialize agg data at start of {}\n",
// new_partition_start);
...
...
@@ -653,6 +561,17 @@ IProcessor::Status WindowTransform::prepare()
if
(
!
has_input
&&
input
.
hasData
())
{
input_data
=
input
.
pullData
(
true
/* set_not_needed */
);
// If we got an exception from input, just return it and mark that we're
// finished.
if
(
input_data
.
exception
)
{
output
.
pushData
(
std
::
move
(
input_data
));
output
.
finish
();
return
Status
::
PortFull
;
}
has_input
=
true
;
// Now we have new input and can try to generate more output in work().
...
...
@@ -678,14 +597,8 @@ IProcessor::Status WindowTransform::prepare()
void
WindowTransform
::
work
()
{
if
(
input_data
.
exception
)
{
/// Skip transform in case of exception.
output_data
=
std
::
move
(
input_data
);
has_input
=
false
;
has_output
=
true
;
return
;
}
// Exceptions should be skipped in prepare().
assert
(
!
input_data
.
exception
);
assert
(
has_input
||
input_is_finished
);
...
...
@@ -697,7 +610,6 @@ void WindowTransform::work()
catch
(
DB
::
Exception
&
)
{
output_data
.
exception
=
std
::
current_exception
();
has_output
=
true
;
has_input
=
false
;
return
;
}
...
...
@@ -705,12 +617,12 @@ void WindowTransform::work()
// We don't really have to keep the entire partition, and it can be big, so
// we want to drop the starting blocks to save memory.
// We can drop the old blocks if we already returned them as output, and the
// frame,
group and the partition etalon are already past them. Note that the
//
frame start can be further than group start for some frame specs (e.g.
// EXCLUDE CURRENT ROW), so we have to check both.
// frame,
the current row and the partition etalon are already past them.
//
Note that the frame start can be further than current row for some frame
//
specs (e.g.
EXCLUDE CURRENT ROW), so we have to check both.
const
auto
first_used_block
=
std
::
min
(
next_output_block_number
,
std
::
min
(
frame_start
.
block
,
std
::
min
(
group_start
.
block
,
std
::
min
(
current_row
.
block
,
partition_etalon
.
block
)));
if
(
first_block_number
<
first_used_block
)
{
...
...
@@ -723,7 +635,7 @@ void WindowTransform::work()
assert
(
next_output_block_number
>=
first_block_number
);
assert
(
frame_start
.
block
>=
first_block_number
);
assert
(
group_start
.
block
>=
first_block_number
);
assert
(
current_row
.
block
>=
first_block_number
);
}
}
...
...
src/Processors/Transforms/WindowTransform.h
浏览文件 @
629cb44d
...
...
@@ -61,17 +61,14 @@ struct RowNumber
* be sorted by PARTITION BY (in any order), then by ORDER BY.
* We need to track the following pointers:
* 1) boundaries of partition -- rows that compare equal w/PARTITION BY.
* 2) boundaries of peer group -- rows that compare equal w/ORDER BY (empty
* ORDER BY means all rows are peers).
* 3) boundaries of the frame.
* 2) current row for which we will compute the window functions.
* 3) boundaries of the frame for this row.
* Both the peer group and the frame are inside the partition, but can have any
* position relative to each other.
* All pointers only move forward. For partition and group boundaries, this is
* ensured by the order of input data. This property also trivially holds for
* the ROWS and GROUPS frames. For the RANGE frame, the proof requires the
* additional fact that the ranges are specified in terms of (the single)
* ORDER BY column.
* The value of the window function is the same for all rows of the peer group.
* All pointers only move forward. For partition boundaries, this is ensured by
* the order of input data. This property also trivially holds for the ROWS and
* GROUPS frames. For the RANGE frame, the proof requires the additional fact
* that the ranges are specified in terms of (the single) ORDER BY column.
*/
class
WindowTransform
:
public
IProcessor
/* public ISimpleTransform */
{
...
...
@@ -105,13 +102,11 @@ public:
private:
void
advancePartitionEnd
();
void
advanceGroupEnd
();
void
advanceGroupEndOrderBy
();
void
advanceGroupEndTrivial
();
void
advanceGroupEndRange
();
void
advanceFrameStart
();
void
advanceFrameEnd
();
void
writeOutGroup
();
void
advanceFrameEndCurrentRow
();
bool
arePeers
(
const
RowNumber
&
x
,
const
RowNumber
&
y
)
const
;
void
writeOutCurrentRow
();
Columns
&
inputAt
(
const
RowNumber
&
x
)
{
...
...
@@ -179,7 +174,8 @@ private:
public:
/*
* Data (formerly) inherited from ISimpleTransform.
* Data (formerly) inherited from ISimpleTransform, needed for the
* implementation of the IProcessor interface.
*/
InputPort
&
input
;
OutputPort
&
output
;
...
...
@@ -231,21 +227,18 @@ public:
RowNumber
partition_end
;
bool
partition_ended
=
false
;
// Current peer group is [group_start, group_end) if group_ended,
// [group_start, ?) otherwise.
RowNumber
group_start
;
RowNumber
group_end
;
bool
group_ended
=
false
;
// This is the row for which we are computing the window functions now.
RowNumber
current_row
;
// The frame is [frame_start, frame_end) if frame_ended, and unknown
// otherwise. Note that when we move to the next
peer group
, both the
// otherwise. Note that when we move to the next
row
, both the
// frame_start and the frame_end may jump forward by an unknown amount of
// blocks, e.g. if we use a RANGE frame. This means that sometimes we don't
// know neither frame_end nor frame_start.
// We update the states of the window functions as we track the frame
// boundaries.
// After we have found the final boundaries of the frame, we can immediately
// output the result for the current
group
, w/o waiting for more data.
// output the result for the current
row
, w/o waiting for more data.
RowNumber
frame_start
;
RowNumber
frame_end
;
bool
frame_ended
=
false
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录