提交 999e9654 编写于 作者: B Bhuvnesh Chaudhary

Fixing setting flow->hashExpr plan references

When flow->hashExpr is created, we ignore the relable node while
checking the existence of the expression in the targetlist, if a match
is not found in the targetlist, hashExpr is added to the targetlist with
resjunk=true.

In create_join_plan
```
  /**
   * If plan has a flow node, ensure all entries of hashExpr
   * are in the targetlist.
   */
  if (plan->flow && plan->flow->hashExpr)
  {
  	plan->targetlist = add_to_flat_tlist(plan->targetlist, plan->flow->hashExpr, true /* resjunk */ );
  }
```
add_to_flat_tlist uses tlist_member_ignore_relabel
Later, in set_plan_refs, the references of the vars are updated. This
commit ensures that the contract of ignoring the relable node is
held while trying to update the references in hashExpr. Prior to this
commit, in set_plan_refs while checking the existing of hashExpr in
targetlist relable node were not ignored which resulted in not finding
the coressponding projection element and the planning used to fail with
ERROR: variable not found in subplan target list

fix_upper_expr ensures that the cast/type of the variable in the upper
plan node is not ignore, thus it considered a plain match for non vars
expression. Thus, added a new method to perform special handling of
flow->hashExpr as they have to be matched with the target list at the
same level (not of the child).

This has been a bug in 5 and previous version of greenplum since long,
this is a greenplum specific fix, so may be was never caught.
上级 ad0f1934
......@@ -76,6 +76,7 @@ typedef struct
indexed_tlist *subplan_itlist;
Index newvarno;
int rtoffset;
bool flow_expr;
} fix_upper_expr_context;
typedef struct
......@@ -124,6 +125,9 @@ static Var *search_indexed_tlist_for_var(Var *var,
static Var *search_indexed_tlist_for_non_var(Node *node,
indexed_tlist *itlist,
Index newvarno);
static Var *search_indexed_tlist_for_non_var_in_flow_expr(Node *node,
indexed_tlist *itlist,
Index newvarno);
static Var *search_indexed_tlist_for_sortgroupref(Node *node,
Index sortgroupref,
indexed_tlist *itlist,
......@@ -151,6 +155,11 @@ static Node *fix_upper_expr(PlannerInfo *root,
indexed_tlist *subplan_itlist,
Index newvarno,
int rtoffset);
static Node *fix_upper_flow_expr(PlannerInfo *root,
Node *node,
indexed_tlist *subplan_itlist,
Index newvarno,
int rtoffset);
static Node *fix_upper_expr_mutator(Node *node,
fix_upper_expr_context *context);
static List *set_returning_clause_references(PlannerInfo *root,
......@@ -589,7 +598,7 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
indexed_tlist *plan_itlist = build_tlist_index(plan->targetlist);
plan->flow->hashExprs =
(List *) fix_upper_expr(root,
(List *) fix_upper_flow_expr(root,
(Node *) plan->flow->hashExprs,
plan_itlist,
OUTER_VAR,
......@@ -2086,6 +2095,30 @@ search_indexed_tlist_for_non_var(Node *node,
return NULL; /* no match */
}
/*
* GPDB: same as search_indexed_tlist_for_non_var, except it ignores
* relabel for matching non vars in flow->hashExpr nodes.
*/
static Var *
search_indexed_tlist_for_non_var_in_flow_expr(Node *node,
indexed_tlist *itlist, Index newvarno)
{
TargetEntry *tle;
tle = tlist_member_ignore_relabel(node, itlist->tlist);
if (tle)
{
/* Found a matching subplan output expression */
Var *newvar;
newvar = makeVarFromTargetEntry(newvarno, tle);
newvar->varnoold = 0; /* wasn't ever a plain Var */
newvar->varoattno = 0;
return newvar;
}
return NULL; /* no match */
}
/*
* search_indexed_tlist_for_sortgroupref --- find a sort/group expression
* (which is assumed not to be just a Var)
......@@ -2417,9 +2450,26 @@ fix_upper_expr(PlannerInfo *root,
context.subplan_itlist = subplan_itlist;
context.newvarno = newvarno;
context.rtoffset = rtoffset;
context.flow_expr = false;
return fix_upper_expr_mutator(node, &context);
}
static Node *
fix_upper_flow_expr(PlannerInfo *root,
Node *node,
indexed_tlist *subplan_itlist,
Index newvarno,
int rtoffset)
{
fix_upper_expr_context context;
context.root = root;
context.subplan_itlist = subplan_itlist;
context.newvarno = newvarno;
context.rtoffset = rtoffset;
context.flow_expr = true;
return fix_upper_expr_mutator(node, &context);
}
static Node *
fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context)
{
......@@ -2458,9 +2508,18 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context)
/* Try matching more complex expressions too, if tlist has any */
if (context->subplan_itlist->has_non_vars && !IsA(node, GroupId))
{
newvar = search_indexed_tlist_for_non_var(node,
context->subplan_itlist,
context->newvarno);
if (context->flow_expr)
{
newvar = search_indexed_tlist_for_non_var_in_flow_expr(node,
context->subplan_itlist,
context->newvarno);
}
else
{
newvar = search_indexed_tlist_for_non_var(node,
context->subplan_itlist,
context->newvarno);
}
if (newvar)
return (Node *) newvar;
}
......
......@@ -838,3 +838,101 @@ reset enable_nestloop;
reset enable_material;
reset enable_seqscan;
reset enable_bitmapscan;
-- test that flow->hashExpr variables can be resolved
CREATE TABLE hexpr_t1 (c1 int, c2 character varying(16)) DISTRIBUTED BY (c1);
CREATE TABLE hexpr_t2 (c3 character varying(16)) DISTRIBUTED BY (c3);
INSERT INTO hexpr_t1 SELECT i, i::character varying FROM generate_series(1,10)i;
INSERT INTO hexpr_t2 SELECT i::character varying FROM generate_series(1,10)i;
EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
QUERY PLAN
----------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice2; segments: 3) (cost=3.23..6.70 rows=10 width=2)
-> Hash Left Join (cost=3.23..6.70 rows=4 width=2)
Hash Cond: (btrim((hexpr_t1.c2)::text) = (hexpr_t2.c3)::text)
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..3.30 rows=4 width=2)
Hash Key: btrim((hexpr_t1.c2)::text)
-> Seq Scan on hexpr_t1 (cost=0.00..3.10 rows=4 width=2)
-> Hash (cost=3.10..3.10 rows=4 width=2)
-> Seq Scan on hexpr_t2 (cost=0.00..3.10 rows=4 width=2)
Optimizer: Postgres query optimizer
(9 rows)
EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
QUERY PLAN
----------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice2; segments: 3) (cost=3.23..6.70 rows=10 width=2)
-> Hash Left Join (cost=3.23..6.70 rows=4 width=2)
Hash Cond: (btrim((hexpr_t1.c2)::text) = (hexpr_t2.c3)::text)
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..3.30 rows=4 width=2)
Hash Key: btrim((hexpr_t1.c2)::text)
-> Seq Scan on hexpr_t1 (cost=0.00..3.10 rows=4 width=2)
-> Hash (cost=3.10..3.10 rows=4 width=2)
-> Seq Scan on hexpr_t2 (cost=0.00..3.10 rows=4 width=2)
Optimizer: Postgres query optimizer
(9 rows)
EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
QUERY PLAN
----------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice2; segments: 3) (cost=3.23..6.70 rows=10 width=2)
-> Hash Left Join (cost=3.23..6.70 rows=4 width=2)
Hash Cond: (btrim((hexpr_t1.c2)::text) = (hexpr_t2.c3)::text)
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..3.30 rows=4 width=2)
Hash Key: btrim((hexpr_t1.c2)::text)
-> Seq Scan on hexpr_t1 (cost=0.00..3.10 rows=4 width=2)
-> Hash (cost=3.10..3.10 rows=4 width=2)
-> Seq Scan on hexpr_t2 (cost=0.00..3.10 rows=4 width=2)
Optimizer: Postgres query optimizer
(9 rows)
SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
foo
-----
5
6
8
2
3
4
7
9
10
1
(10 rows)
SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
foo
-----
2
3
4
7
5
6
8
9
10
1
(10 rows)
SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
foo
-----
2
3
4
7
8
5
6
1
9
10
(10 rows)
......@@ -847,3 +847,104 @@ reset enable_nestloop;
reset enable_material;
reset enable_seqscan;
reset enable_bitmapscan;
-- test that flow->hashExpr variables can be resolved
CREATE TABLE hexpr_t1 (c1 int, c2 character varying(16)) DISTRIBUTED BY (c1);
CREATE TABLE hexpr_t2 (c3 character varying(16)) DISTRIBUTED BY (c3);
INSERT INTO hexpr_t1 SELECT i, i::character varying FROM generate_series(1,10)i;
INSERT INTO hexpr_t2 SELECT i::character varying FROM generate_series(1,10)i;
EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
QUERY PLAN
------------------------------------------------------------------------------------------------------
Result (cost=0.00..862.00 rows=5 width=8)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..862.00 rows=14 width=2)
-> Hash Left Join (cost=0.00..862.00 rows=5 width=2)
Hash Cond: (btrim((hexpr_t1.c2)::text) = (hexpr_t2.c3)::text)
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=4 width=2)
Hash Key: btrim((hexpr_t1.c2)::text)
-> Seq Scan on hexpr_t1 (cost=0.00..431.00 rows=4 width=2)
-> Hash (cost=431.00..431.00 rows=4 width=2)
-> Seq Scan on hexpr_t2 (cost=0.00..431.00 rows=4 width=2)
Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0
(10 rows)
EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
QUERY PLAN
------------------------------------------------------------------------------------------------------
Result (cost=0.00..862.00 rows=5 width=8)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..862.00 rows=14 width=2)
-> Hash Left Join (cost=0.00..862.00 rows=5 width=2)
Hash Cond: (btrim((hexpr_t1.c2)::text) = (hexpr_t2.c3)::text)
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=4 width=2)
Hash Key: btrim((hexpr_t1.c2)::text)
-> Seq Scan on hexpr_t1 (cost=0.00..431.00 rows=4 width=2)
-> Hash (cost=431.00..431.00 rows=4 width=2)
-> Seq Scan on hexpr_t2 (cost=0.00..431.00 rows=4 width=2)
Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0
(10 rows)
EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
QUERY PLAN
------------------------------------------------------------------------------------------------------
Result (cost=0.00..862.00 rows=5 width=8)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..862.00 rows=14 width=2)
-> Hash Left Join (cost=0.00..862.00 rows=5 width=2)
Hash Cond: (btrim((hexpr_t1.c2)::text) = (hexpr_t2.c3)::text)
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=4 width=2)
Hash Key: btrim((hexpr_t1.c2)::text)
-> Seq Scan on hexpr_t1 (cost=0.00..431.00 rows=4 width=2)
-> Hash (cost=431.00..431.00 rows=4 width=2)
-> Seq Scan on hexpr_t2 (cost=0.00..431.00 rows=4 width=2)
Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0
(10 rows)
SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
foo
-----
2
3
4
7
5
6
8
9
10
1
(10 rows)
SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
foo
-----
8
5
6
1
9
10
2
3
4
7
(10 rows)
SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
foo
-----
9
10
1
2
3
4
7
5
6
8
(10 rows)
......@@ -434,3 +434,21 @@ reset enable_nestloop;
reset enable_material;
reset enable_seqscan;
reset enable_bitmapscan;
-- test that flow->hashExpr variables can be resolved
CREATE TABLE hexpr_t1 (c1 int, c2 character varying(16)) DISTRIBUTED BY (c1);
CREATE TABLE hexpr_t2 (c3 character varying(16)) DISTRIBUTED BY (c3);
INSERT INTO hexpr_t1 SELECT i, i::character varying FROM generate_series(1,10)i;
INSERT INTO hexpr_t2 SELECT i::character varying FROM generate_series(1,10)i;
EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
EXPLAIN SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
SELECT btrim(hexpr_t1.c2::text)::character varying AS foo FROM hexpr_t1 LEFT JOIN hexpr_t2
ON hexpr_t2.c3::text = btrim(hexpr_t1.c2::text);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册