提交 e7ff3ef1 编写于 作者: E Ekta Khanna and Jemish Patel 提交者: Dhanashree Kashid

Don't assume a subquery's output is unique if there's a SRF in its tlist

Author: Tom Lane <tgl@sss.pgh.pa.us>
Date:   Tue Jul 8 14:03:32 2014 -0400

    While the x output of "select x from t group by x" can be presumed unique,
    this does not hold for "select x, generate_series(1,10) from t group by x",
    because we may expand the set-returning function after the grouping step.
    (Perhaps that should be re-thought; but considering all the other oddities
    involved with SRFs in targetlists, it seems unlikely we'll change it.)
    Put a check in query_is_distinct_for() so it's not fooled by such cases.

    Back-patch to all supported branches.

    David Rowley

(cherry picked from commit 2e7469dc8b3bac4fe0f9bd042aaf802132efde85)
上级 8bd49b1b
......@@ -23,6 +23,7 @@
#include "executor/executor.h"
#include "miscadmin.h"
#include "optimizer/clauses.h" /* contain_mutable_functions() */
#include "nodes/nodeFuncs.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
......@@ -2147,6 +2148,17 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
Assert(list_length(colnos) == list_length(opids));
/*
* A set-returning function in the query's targetlist can result in
* returning duplicate rows, if the SRF is evaluated after the
* de-duplication step; so we play it safe and say "no" if there are any
* SRFs. (We could be certain that it's okay if SRFs appear only in the
* specified columns, since those must be evaluated before de-duplication;
* but it doesn't presently seem worth the complication to check that.)
*/
if (expression_returns_set((Node *) query->targetList))
return false;
/*
* DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
* columns in the DISTINCT clause appear in colnos and operator semantics
......
......@@ -640,3 +640,31 @@ select * from int4_tbl where
0
(1 row)
--
-- Check for incorrect optimization when IN subquery contains a SRF
--
set enable_hashjoin to 0;
explain select * from int4_tbl o where (f1, f1) in
(select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
QUERY PLAN
---------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice2; segments: 3) (cost=1.02..2.03 rows=1 width=4)
-> Seq Scan on int4_tbl o (cost=1.02..2.03 rows=1 width=4)
Filter: (hashed SubPlan 1)
SubPlan 1
-> Materialize (cost=1.02..1.03 rows=1 width=4)
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.01 rows=1 width=4)
-> Result (cost=0.00..1.01 rows=1 width=4)
-> Seq Scan on int4_tbl i (cost=0.00..1.01 rows=1 width=4)
Settings: enable_hashjoin=off; optimizer=off
Optimizer status: legacy query optimizer
(10 rows)
select * from int4_tbl o where (f1, f1) in
(select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
f1
----
0
(1 row)
reset enable_hashjoin;
......@@ -658,3 +658,31 @@ select * from int4_tbl where
0
(1 row)
--
-- Check for incorrect optimization when IN subquery contains a SRF
--
set enable_hashjoin to 0;
explain select * from int4_tbl o where (f1, f1) in
(select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
QUERY PLAN
---------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice2; segments: 3) (cost=1.02..2.03 rows=1 width=4)
-> Seq Scan on int4_tbl o (cost=1.02..2.03 rows=1 width=4)
Filter: (hashed SubPlan 1)
SubPlan 1
-> Materialize (cost=1.02..1.03 rows=1 width=4)
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.01 rows=1 width=4)
-> Result (cost=0.00..1.01 rows=1 width=4)
-> Seq Scan on int4_tbl i (cost=0.00..1.01 rows=1 width=4)
Settings: enable_hashjoin=off; optimizer=on
Optimizer status: legacy query optimizer
(10 rows)
select * from int4_tbl o where (f1, f1) in
(select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
f1
----
0
(1 row)
reset enable_hashjoin;
......@@ -383,3 +383,13 @@ select '1'::text in (select '1'::name union all select '1'::name);
select * from int4_tbl where
(case when f1 in (select unique1 from tenk1 a) then f1 else null end) in
(select ten from tenk1 b);
--
-- Check for incorrect optimization when IN subquery contains a SRF
--
set enable_hashjoin to 0;
explain select * from int4_tbl o where (f1, f1) in
(select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
select * from int4_tbl o where (f1, f1) in
(select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
reset enable_hashjoin;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册