Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Gpdb
提交
01a819ab
G
Gpdb
项目概览
Greenplum
/
Gpdb
通知
7
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
G
Gpdb
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
01a819ab
编写于
6月 11, 2001
作者:
T
Tom Lane
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Make planner compute the number of hash buckets the same way that
nodeHash.c will compute it (by sharing code).
上级
ccda1a67
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
161 addition
and
127 deletion
+161
-127
src/backend/executor/nodeHash.c
src/backend/executor/nodeHash.c
+119
-94
src/backend/optimizer/path/costsize.c
src/backend/optimizer/path/costsize.c
+37
-29
src/include/executor/nodeHash.h
src/include/executor/nodeHash.h
+5
-4
未找到文件。
src/backend/executor/nodeHash.c
浏览文件 @
01a819ab
...
...
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* $Id: nodeHash.c,v 1.5
7 2001/05/27 20:42:18
tgl Exp $
* $Id: nodeHash.c,v 1.5
8 2001/06/11 00:17:07
tgl Exp $
*
*-------------------------------------------------------------------------
*/
...
...
@@ -16,14 +16,12 @@
* ExecHash - generate an in-memory hash table of the relation
* ExecInitHash - initialize node and subnodes
* ExecEndHash - shutdown node and subnodes
*
*/
#include "postgres.h"
#include <sys/types.h>
#include <math.h>
#include "postgres.h"
#include "executor/execdebug.h"
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
...
...
@@ -209,111 +207,27 @@ ExecEndHash(Hash *node)
* create a hashtable in shared memory for hashjoin.
* ----------------------------------------------------------------
*/
#define FUDGE_FAC 2.0
HashJoinTable
ExecHashTableCreate
(
Hash
*
node
)
{
Plan
*
outerNode
;
double
ntuples
;
int
tupsize
;
double
inner_rel_bytes
;
double
hash_table_bytes
;
int
nbatch
;
HashJoinTable
hashtable
;
int
nbuckets
;
Plan
*
outerNode
;
int
totalbuckets
;
int
bucketsize
;
int
nbuckets
;
int
nbatch
;
int
i
;
MemoryContext
oldcxt
;
/*
* Get information about the size of the relation to be hashed (it's
* the "outer" subtree of this node, but the inner relation of the
* hashjoin).
*
* Caution: this is only the planner's estimates, and so can't be trusted
* too far. Apply a healthy fudge factor.
* hashjoin). Compute the appropriate size of the hash table.
*/
outerNode
=
outerPlan
(
node
);
ntuples
=
outerNode
->
plan_rows
;
if
(
ntuples
<=
0
.
0
)
/* force a plausible size if no info */
ntuples
=
1000
.
0
;
/*
* estimate tupsize based on footprint of tuple in hashtable... but
* what about palloc overhead?
*/
tupsize
=
MAXALIGN
(
outerNode
->
plan_width
)
+
MAXALIGN
(
sizeof
(
HashJoinTupleData
));
inner_rel_bytes
=
ntuples
*
tupsize
*
FUDGE_FAC
;
/*
* Target hashtable size is SortMem kilobytes, but not less than
* sqrt(estimated inner rel size), so as to avoid horrible
* performance.
*/
hash_table_bytes
=
sqrt
(
inner_rel_bytes
);
if
(
hash_table_bytes
<
(
SortMem
*
1024L
))
hash_table_bytes
=
SortMem
*
1024L
;
/*
* Count the number of hash buckets we want for the whole relation,
* for an average bucket load of NTUP_PER_BUCKET (per virtual
* bucket!).
*/
totalbuckets
=
(
int
)
ceil
(
ntuples
*
FUDGE_FAC
/
NTUP_PER_BUCKET
);
/*
* Count the number of buckets we think will actually fit in the
* target memory size, at a loading of NTUP_PER_BUCKET (physical
* buckets). NOTE: FUDGE_FAC here determines the fraction of the
* hashtable space reserved to allow for nonuniform distribution of
* hash values. Perhaps this should be a different number from the
* other uses of FUDGE_FAC, but since we have no real good way to pick
* either one...
*/
bucketsize
=
NTUP_PER_BUCKET
*
tupsize
;
nbuckets
=
(
int
)
(
hash_table_bytes
/
(
bucketsize
*
FUDGE_FAC
));
if
(
nbuckets
<=
0
)
nbuckets
=
1
;
if
(
totalbuckets
<=
nbuckets
)
{
ExecChooseHashTableSize
(
outerNode
->
plan_rows
,
outerNode
->
plan_width
,
&
totalbuckets
,
&
nbuckets
,
&
nbatch
);
/*
* We have enough space, so no batching. In theory we could even
* reduce nbuckets, but since that could lead to poor behavior if
* estimated ntuples is much less than reality, it seems better to
* make more buckets instead of fewer.
*/
totalbuckets
=
nbuckets
;
nbatch
=
0
;
}
else
{
/*
* Need to batch; compute how many batches we want to use. Note
* that nbatch doesn't have to have anything to do with the ratio
* totalbuckets/nbuckets; in fact, it is the number of groups we
* will use for the part of the data that doesn't fall into the
* first nbuckets hash buckets.
*/
nbatch
=
(
int
)
ceil
((
inner_rel_bytes
-
hash_table_bytes
)
/
hash_table_bytes
);
if
(
nbatch
<=
0
)
nbatch
=
1
;
}
/*
* Now, totalbuckets is the number of (virtual) hashbuckets for the
* whole relation, and nbuckets is the number of physical hashbuckets
* we will use in the first pass. Data falling into the first
* nbuckets virtual hashbuckets gets handled in the first pass;
* everything else gets divided into nbatch batches to be processed in
* additional passes.
*/
#ifdef HJDEBUG
printf
(
"nbatch = %d, totalbuckets = %d, nbuckets = %d
\n
"
,
nbatch
,
totalbuckets
,
nbuckets
);
...
...
@@ -407,6 +321,117 @@ ExecHashTableCreate(Hash *node)
return
hashtable
;
}
/*
* Compute appropriate size for hashtable given the estimated size of the
* relation to be hashed (number of rows and average row width).
*
* Caution: the input is only the planner's estimates, and so can't be
* trusted too far. Apply a healthy fudge factor.
*
* This is exported so that the planner's costsize.c can use it.
*/
/* Target bucket loading (tuples per bucket) */
#define NTUP_PER_BUCKET 10
/* Fudge factor to allow for inaccuracy of input estimates */
#define FUDGE_FAC 2.0
void
ExecChooseHashTableSize
(
double
ntuples
,
int
tupwidth
,
int
*
virtualbuckets
,
int
*
physicalbuckets
,
int
*
numbatches
)
{
int
tupsize
;
double
inner_rel_bytes
;
double
hash_table_bytes
;
int
nbatch
;
int
nbuckets
;
int
totalbuckets
;
int
bucketsize
;
/* Force a plausible relation size if no info */
if
(
ntuples
<=
0
.
0
)
ntuples
=
1000
.
0
;
/*
* Estimate tupsize based on footprint of tuple in hashtable... but
* what about palloc overhead?
*/
tupsize
=
MAXALIGN
(
tupwidth
)
+
MAXALIGN
(
sizeof
(
HashJoinTupleData
));
inner_rel_bytes
=
ntuples
*
tupsize
*
FUDGE_FAC
;
/*
* Target hashtable size is SortMem kilobytes, but not less than
* sqrt(estimated inner rel size), so as to avoid horrible
* performance.
*/
hash_table_bytes
=
sqrt
(
inner_rel_bytes
);
if
(
hash_table_bytes
<
(
SortMem
*
1024L
))
hash_table_bytes
=
SortMem
*
1024L
;
/*
* Count the number of hash buckets we want for the whole relation,
* for an average bucket load of NTUP_PER_BUCKET (per virtual
* bucket!).
*/
totalbuckets
=
(
int
)
ceil
(
ntuples
*
FUDGE_FAC
/
NTUP_PER_BUCKET
);
/*
* Count the number of buckets we think will actually fit in the
* target memory size, at a loading of NTUP_PER_BUCKET (physical
* buckets). NOTE: FUDGE_FAC here determines the fraction of the
* hashtable space reserved to allow for nonuniform distribution of
* hash values. Perhaps this should be a different number from the
* other uses of FUDGE_FAC, but since we have no real good way to pick
* either one...
*/
bucketsize
=
NTUP_PER_BUCKET
*
tupsize
;
nbuckets
=
(
int
)
(
hash_table_bytes
/
(
bucketsize
*
FUDGE_FAC
));
if
(
nbuckets
<=
0
)
nbuckets
=
1
;
if
(
totalbuckets
<=
nbuckets
)
{
/*
* We have enough space, so no batching. In theory we could even
* reduce nbuckets, but since that could lead to poor behavior if
* estimated ntuples is much less than reality, it seems better to
* make more buckets instead of fewer.
*/
totalbuckets
=
nbuckets
;
nbatch
=
0
;
}
else
{
/*
* Need to batch; compute how many batches we want to use. Note
* that nbatch doesn't have to have anything to do with the ratio
* totalbuckets/nbuckets; in fact, it is the number of groups we
* will use for the part of the data that doesn't fall into the
* first nbuckets hash buckets.
*/
nbatch
=
(
int
)
ceil
((
inner_rel_bytes
-
hash_table_bytes
)
/
hash_table_bytes
);
if
(
nbatch
<=
0
)
nbatch
=
1
;
}
/*
* Now, totalbuckets is the number of (virtual) hashbuckets for the
* whole relation, and nbuckets is the number of physical hashbuckets
* we will use in the first pass. Data falling into the first
* nbuckets virtual hashbuckets gets handled in the first pass;
* everything else gets divided into nbatch batches to be processed in
* additional passes.
*/
*
virtualbuckets
=
totalbuckets
;
*
physicalbuckets
=
nbuckets
;
*
numbatches
=
nbatch
;
}
/* ----------------------------------------------------------------
* ExecHashTableDestroy
*
...
...
src/backend/optimizer/path/costsize.c
浏览文件 @
01a819ab
...
...
@@ -42,7 +42,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.7
6 2001/06/10 02:59:35
tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.7
7 2001/06/11 00:17:08
tgl Exp $
*
*-------------------------------------------------------------------------
*/
...
...
@@ -791,19 +791,19 @@ cost_hashjoin(Path *path, Query *root,
* smart enough to figure out how the restrict clauses might change the
* distribution, so this will have to do for now.
*
*
The executor tries for average bucket loading of NTUP_PER_BUCKET by setting
*
number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield
*
a bucketsize fraction of NTUP_PER_BUCKET / ntuples. But that goal will
*
be reached only if the data values are uniformly distributed among the
*
buckets, which requires (a) at least ntuples / NTUP_PER_BUCKET distinct
*
data values, and (b) a not-too-skewed data distribution. Otherwise the
* b
uckets will be nonuniformly occupied. If the other relation in the join
*
has a similar distribution, the most-loaded buckets are exactly thos
e
*
that will be probed most often. Therefore, the "average" bucket size for
*
costing purposes should really be taken as something close to the "worst
*
case" bucket size. We try to estimate this by first scaling up if ther
e
*
are too few distinct data values, and then scaling up again by the
* ratio of the most common value's frequency to the average frequency.
*
We can get the number of buckets the executor will use for the given
*
input relation. If the data were perfectly distributed, with the same
*
number of tuples going into each available bucket, then the bucketsize
*
fraction would be 1/nbuckets. But this happy state of affairs will occur
*
only if (a) there are at least nbuckets distinct data values, and (b)
*
we have a not-too-skewed data distribution. Otherwise the buckets will
* b
e nonuniformly occupied. If the other relation in the join has a key
*
distribution similar to this one's, then the most-loaded buckets ar
e
*
exactly those that will be probed most often. Therefore, the "average"
*
bucket size for costing purposes should really be taken as something close
*
to the "worst case" bucket size. We try to estimate this by adjusting th
e
*
fraction if there are too few distinct data values, and then scaling up
*
by the
ratio of the most common value's frequency to the average frequency.
*
* If no statistics are available, use a default estimate of 0.1. This will
* discourage use of a hash rather strongly if the inner relation is large,
...
...
@@ -815,11 +815,13 @@ estimate_hash_bucketsize(Query *root, Var *var)
{
Oid
relid
;
RelOptInfo
*
rel
;
int
virtualbuckets
;
int
physicalbuckets
;
int
numbatches
;
HeapTuple
tuple
;
Form_pg_statistic
stats
;
double
estfract
,
ndistinct
,
needdistinct
,
mcvfreq
,
avgfreq
;
float4
*
numbers
;
...
...
@@ -841,6 +843,12 @@ estimate_hash_bucketsize(Query *root, Var *var)
if
(
rel
->
tuples
<=
0
.
0
||
rel
->
rows
<=
0
.
0
)
return
0
.
1
;
/* ensure we can divide below */
/* Get hash table size that executor would use for this relation */
ExecChooseHashTableSize
(
rel
->
rows
,
rel
->
width
,
&
virtualbuckets
,
&
physicalbuckets
,
&
numbatches
);
tuple
=
SearchSysCache
(
STATRELATT
,
ObjectIdGetDatum
(
relid
),
Int16GetDatum
(
var
->
varattno
),
...
...
@@ -857,7 +865,7 @@ estimate_hash_bucketsize(Query *root, Var *var)
case
ObjectIdAttributeNumber
:
case
SelfItemPointerAttributeNumber
:
/* these are unique, so buckets should be well-distributed */
return
(
double
)
NTUP_PER_BUCKET
/
rel
->
row
s
;
return
1
.
0
/
(
double
)
virtualbucket
s
;
case
TableOidAttributeNumber
:
/* hashing this is a terrible idea... */
return
1
.
0
;
...
...
@@ -873,6 +881,12 @@ estimate_hash_bucketsize(Query *root, Var *var)
if
(
ndistinct
<
0
.
0
)
ndistinct
=
-
ndistinct
*
rel
->
tuples
;
if
(
ndistinct
<=
0
.
0
)
/* ensure we can divide */
{
ReleaseSysCache
(
tuple
);
return
0
.
1
;
}
/* Also compute avg freq of all distinct data values in raw relation */
avgfreq
=
(
1
.
0
-
stats
->
stanullfrac
)
/
ndistinct
;
...
...
@@ -887,20 +901,14 @@ estimate_hash_bucketsize(Query *root, Var *var)
ndistinct
*=
rel
->
rows
/
rel
->
tuples
;
/*
* Form initial estimate of bucketsize fraction. Here we use rel->rows,
* ie the number of rows after applying restriction clauses, because
* that's what the fraction will eventually be multiplied by in
* cost_heapjoin.
* Initial estimate of bucketsize fraction is 1/nbuckets as long as
* the number of buckets is less than the expected number of distinct
* values; otherwise it is 1/ndistinct.
*/
estfract
=
(
double
)
NTUP_PER_BUCKET
/
rel
->
rows
;
/*
* Adjust estimated bucketsize if too few distinct values (after
* restriction clauses) to fill all the buckets.
*/
needdistinct
=
rel
->
rows
/
(
double
)
NTUP_PER_BUCKET
;
if
(
ndistinct
<
needdistinct
)
estfract
*=
needdistinct
/
ndistinct
;
if
(
ndistinct
>
(
double
)
virtualbuckets
)
estfract
=
1
.
0
/
(
double
)
virtualbuckets
;
else
estfract
=
1
.
0
/
ndistinct
;
/*
* Look up the frequency of the most common value, if available.
...
...
src/include/executor/nodeHash.h
浏览文件 @
01a819ab
...
...
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: nodeHash.h,v 1.
19 2001/03/22 04:00:44 momjian
Exp $
* $Id: nodeHash.h,v 1.
20 2001/06/11 00:17:07 tgl
Exp $
*
*-------------------------------------------------------------------------
*/
...
...
@@ -16,9 +16,6 @@
#include "nodes/plannodes.h"
/* NTUP_PER_BUCKET is exported because planner wants to see it */
#define NTUP_PER_BUCKET 10
extern
TupleTableSlot
*
ExecHash
(
Hash
*
node
);
extern
bool
ExecInitHash
(
Hash
*
node
,
EState
*
estate
,
Plan
*
parent
);
extern
int
ExecCountSlotsHash
(
Hash
*
node
);
...
...
@@ -35,5 +32,9 @@ extern HeapTuple ExecScanHashBucket(HashJoinState *hjstate, List *hjclauses,
ExprContext
*
econtext
);
extern
void
ExecHashTableReset
(
HashJoinTable
hashtable
,
long
ntuples
);
extern
void
ExecReScanHash
(
Hash
*
node
,
ExprContext
*
exprCtxt
,
Plan
*
parent
);
extern
void
ExecChooseHashTableSize
(
double
ntuples
,
int
tupwidth
,
int
*
virtualbuckets
,
int
*
physicalbuckets
,
int
*
numbatches
);
#endif
/* NODEHASH_H */
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录