-- count number of certain operators in a given plan -- start_ignore create language plpythonu; -- end_ignore create or replace function count_operator(query text, operator text) returns int as $$ rv = plpy.execute('EXPLAIN ' + query) search_text = operator result = 0 for i in range(len(rv)): cur_line = rv[i]['QUERY PLAN'] if search_text.lower() in cur_line.lower(): result = result+1 return result $$ language plpythonu; --start_ignore DROP TABLE IF EXISTS bfv_subquery_p; NOTICE: table "bfv_subquery_p" does not exist, skipping DROP TABLE IF EXISTS bfv_subquery_r; NOTICE: table "bfv_subquery_r" does not exist, skipping --end_ignore -- subquery over partitioned table CREATE TABLE bfv_subquery_(a int, b int) partition by range(b) (start(1) end(10)); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. NOTICE: CREATE TABLE will create partition "bfv_subquery__1_prt_1" for table "bfv_subquery_" CREATE TABLE bfv_subquery_r (a int, b int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO bfv_subquery_ SELECT i,i FROM generate_series(1,9)i; INSERT INTO bfv_subquery_r SELECT i,i FROM generate_series(1,9)i; SELECT a FROM bfv_subquery_r WHERE b < ( SELECT 0.5 * sum(a) FROM bfv_subquery_ WHERE b >= 3) ORDER BY 1; a --- 1 2 3 4 5 6 7 8 9 (9 rows) --start_ignore drop table if exists bfv_subquery_r2; NOTICE: table "bfv_subquery_r2" does not exist, skipping drop table if exists s; NOTICE: table "s" does not exist, skipping --end_ignore -- subquery with distinct and outer references create table bfv_subquery_r2(a int, b int) distributed by (a); create table bfv_subquery_s2(a int, b int) distributed by (a); insert into bfv_subquery_r2 values (1,1); insert into bfv_subquery_r2 values (2,1); insert into bfv_subquery_r2 values (2,NULL); insert into bfv_subquery_r2 values (NULL,0); insert into bfv_subquery_r2 values (NULL,NULL); insert into bfv_subquery_s2 values (2,2); insert into bfv_subquery_s2 values (1,0); insert into bfv_subquery_s2 values (1,1); select * from bfv_subquery_r2 where a = (select x.a from (select distinct a from bfv_subquery_s2 where bfv_subquery_s2.b = bfv_subquery_r2 .b) x); a | b ---+--- 1 | 1 (1 row) -- start_ignore DROP FUNCTION IF EXISTS csq_f(a int); NOTICE: function csq_f(pg_catalog.int4) does not exist, skipping -- end_ignore CREATE FUNCTION csq_f(a int) RETURNS int AS $$ select $1 $$ LANGUAGE SQL; --start_ignore DROP TABLE IF EXISTS csq_r; NOTICE: table "csq_r" does not exist, skipping --end_ignore CREATE TABLE csq_r(a int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO csq_r VALUES (1); SELECT * FROM csq_r WHERE a IN (SELECT * FROM csq_f(csq_r.a)); a --- 1 (1 row) -- subquery in the select list --start_ignore drop table if exists bfv_subquery_t1; NOTICE: table "bfv_subquery_t1" does not exist, skipping drop table if exists bfv_subquery_t2; NOTICE: table "bfv_subquery_t2" does not exist, skipping --end_ignore create table bfv_subquery_t1(i int, j int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table bfv_subquery_t2(i int, j int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into bfv_subquery_t1 select i, i%5 from generate_series(1,10)i; insert into bfv_subquery_t2 values (1, 10); select count_operator('select bfv_subquery_t1.i, (select bfv_subquery_t1.i from bfv_subquery_t2) from bfv_subquery_t1;', 'Seq Scan') > 0; ?column? ---------- t (1 row) select bfv_subquery_t1.i, (select bfv_subquery_t1.i from bfv_subquery_t2) from bfv_subquery_t1 order by 1, 2; i | i ----+---- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 10 | 10 (10 rows) -- start_ignore drop table if exists bfv_subquery_t3; NOTICE: table "bfv_subquery_t3" does not exist, skipping drop table if exists bfv_subquery_s3; NOTICE: table "bfv_subquery_s3" does not exist, skipping -- end_ignore create table bfv_subquery_t3(a int, b int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into bfv_subquery_t3 values (1,4),(0,3); create table bfv_subquery_s3(i int, j int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -- ALL subquery select * from bfv_subquery_t3 where a < all (select i from bfv_subquery_s3 limit 1) order by a; a | b ---+--- 0 | 3 1 | 4 (2 rows) select * from bfv_subquery_t3 where a < all (select i from bfv_subquery_s3) order by a; a | b ---+--- 0 | 3 1 | 4 (2 rows) select * from bfv_subquery_t3 where a < all (select i from bfv_subquery_s3 limit 2) order by a; a | b ---+--- 0 | 3 1 | 4 (2 rows) select * from bfv_subquery_t3 where a < all (select i from bfv_subquery_s3) order by a; a | b ---+--- 0 | 3 1 | 4 (2 rows) -- Direct Dispatch caused reader gang process hanging on start_xact_command DROP TABLE IF EXISTS bfv_subquery_a1; NOTICE: table "bfv_subquery_a1" does not exist, skipping DROP TABLE IF EXISTS bfv_subquery_b1; NOTICE: table "bfv_subquery_b1" does not exist, skipping CREATE TABLE bfv_subquery_a1(i INT, j INT); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO bfv_subquery_a1(SELECT i, i * i FROM generate_series(1, 10) AS i); CREATE TABLE bfv_subquery_b1(i INT, j INT); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO bfv_subquery_b1(SELECT i, i * i FROM generate_series(1, 10) AS i); SELECT bfv_subquery_a1.* FROM bfv_subquery_a1 INNER JOIN bfv_subquery_b1 ON bfv_subquery_a1.i = bfv_subquery_b1.i WHERE bfv_subquery_a1.j NOT IN (SELECT j FROM bfv_subquery_a1 a2 where a2.j = bfv_subquery_b1.j) and bfv_subquery_a1.i = 1; i | j ---+--- (0 rows) DROP TABLE IF EXISTS bfv_subquery_a2; NOTICE: table "bfv_subquery_a2" does not exist, skipping CREATE TABLE bfv_subquery_a2(i INT, j INT); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO bfv_subquery_a2(SELECT i, i * i FROM generate_series(1, 10) AS i); SELECT bfv_subquery_a2.* FROM bfv_subquery_a2 WHERE bfv_subquery_a2.j NOT IN (SELECT j FROM bfv_subquery_a2 a2 where a2.j = bfv_subquery_a2.j) and bfv_subquery_a2.i = 1; i | j ---+--- (0 rows) -- prohibit plans with Motions above outer references --start_ignore drop table if exists bfv_subquery_foo1; NOTICE: table "bfv_subquery_foo1" does not exist, skipping --end_ignore create table bfv_subquery_foo1(a integer, b integer) distributed by (a); insert into bfv_subquery_foo1 values(1,1); insert into bfv_subquery_foo1 values(2,2); select (select a from bfv_subquery_foo1 inner1 where inner1.a=outer1.a union select b from bfv_subquery_foo1 inner2 where inner2.b=outer1.b) from bfv_subquery_foo1 outer1; a --- 1 2 (2 rows) -- using of subqueries with unnest with IN or NOT IN predicates select 1 where 22 not in (SELECT unnest(array[1,2])); ?column? ---------- 1 (1 row) select 1 where 22 in (SELECT unnest(array[1,2])); ?column? ---------- (0 rows) select 1 where 22 in (SELECT unnest(array[1,2,22])); ?column? ---------- 1 (1 row) select 1 where 22 not in (SELECT unnest(array[1,2,22])); ?column? ---------- (0 rows) -- start_ignore drop table if exists mpp_t1; NOTICE: table "mpp_t1" does not exist, skipping drop table if exists mpp_t2; NOTICE: table "mpp_t2" does not exist, skipping drop table if exists mpp_t3; NOTICE: table "mpp_t3" does not exist, skipping create table mpp_t1(a int,b int) distributed by (a); create table mpp_t2(a int,b int) distributed by (b); create table mpp_t3(like mpp_t1); NOTICE: table doesn't have 'DISTRIBUTED BY' clause, defaulting to distribution columns from LIKE table -- end_ignore select * from mpp_t1 where a=1 and a=2 and a > (select mpp_t2.b from mpp_t2); a | b ---+--- (0 rows) select * from mpp_t1 where a<1 and a>2 and a > (select mpp_t2.b from mpp_t2); a | b ---+--- (0 rows) select * from mpp_t3 where a in ( select a from mpp_t1 where a<1 and a>2 and a > (select mpp_t2.b from mpp_t2)); a | b ---+--- (0 rows) select * from mpp_t3 where a <1 and a=1 and a in ( select a from mpp_t1 where a > (select mpp_t2.b from mpp_t2)); a | b ---+--- (0 rows) select * from mpp_t1 where a <1 and a=1 and a in ( select a from mpp_t1 where a > (select mpp_t2.b from mpp_t2)); a | b ---+--- (0 rows) select * from mpp_t1 where a = (select a FROM mpp_t2 where mpp_t2.b > (select max(b) from mpp_t3 group by b) and mpp_t2.b=1 and mpp_t2.b=2); a | b ---+--- (0 rows) -- start_ignore drop table if exists mpp_t1; drop table if exists mpp_t2; drop table if exists mpp_t3; -- end_ignore -- -- Test case for when there is case clause in join filter -- -- start_ignore drop table if exists t_case_subquery1; NOTICE: table "t_case_subquery1" does not exist, skipping -- end_ignore create table t_case_subquery1 (a int, b int, c text); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t_case_subquery1 values(1, 5, NULL), (1, 2, NULL); select t1.* from t_case_subquery1 t1 where t1.b = ( select max(b) from t_case_subquery1 t2 where t1.a = t2.a and t2.b < 5 and case when t1.c is not null and t2.c is not null then t1.c = t2.c end ); a | b | c ---+---+--- (0 rows) -- start_ignore drop table if exists t_case_subquery1; -- end_ignore -- -- Test case for if coalesce is needed for specific cases where a subquery with -- count aggregate has to return 0 or null. Count returns 0 on empty relations -- where other queries return NULL. -- -- start_ignore drop table if exists t_coalesce_count_subquery; NOTICE: table "t_coalesce_count_subquery" does not exist, skipping drop table if exists t_coalesce_count_subquery_empty; NOTICE: table "t_coalesce_count_subquery_empty" does not exist, skipping drop table if exists t_coalesce_count_subquery_empty2; NOTICE: table "t_coalesce_count_subquery_empty2" does not exist, skipping CREATE TABLE t_coalesce_count_subquery(a, b) AS VALUES (1, 1); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE t_coalesce_count_subquery_empty(c int, d int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. CREATE TABLE t_coalesce_count_subquery_empty2(e int, f int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'e' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -- end_ignore SELECT (SELECT count(*) FROM t_coalesce_count_subquery_empty where c = a) FROM t_coalesce_count_subquery; count ------- 0 (1 row) SELECT (SELECT COUNT(*) FROM t_coalesce_count_subquery_empty GROUP BY c LIMIT 1) FROM t_coalesce_count_subquery; count ------- (1 row) SELECT (SELECT a1 FROM (SELECT count(*) FROM t_coalesce_count_subquery_empty2 group by e union all SELECT count(*) from t_coalesce_count_subquery_empty group by c) x(a1) LIMIT 1) FROM t_coalesce_count_subquery; a1 ---- (1 row) SELECT (SELECT a1 FROM (SELECT count(*) from t_coalesce_count_subquery_empty group by c union all SELECT count(*) FROM t_coalesce_count_subquery_empty2 group by e) x(a1) LIMIT 1) FROM t_coalesce_count_subquery; a1 ---- (1 row) -- start_ignore drop table if exists t_coalesce_count_subquery; drop table if exists t_coalesce_count_subquery_empty; drop table if exists t_coalesce_count_subquery_empty2; -- start_ignore drop table if exists t_outer; NOTICE: table "t_outer" does not exist, skipping drop table if exists t_inner; NOTICE: table "t_inner" does not exist, skipping create table t_outer (a oid, b tid); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t_inner (c int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -- end_ignore SET enable_nestloop=off; SET enable_hashjoin=off; set enable_mergejoin = on; select * from t_outer where t_outer.b not in (select ctid from t_inner); NOTICE: SELECT uses system-defined column "t_inner.ctid" without the necessary companion column "t_inner.gp_segment_id" HINT: To uniquely identify a row within a distributed table, use the "gp_segment_id" column together with the "ctid" column. a | b ---+--- (0 rows) RESET enable_nestloop; RESET enable_hashjoin; RESET enable_mergejoin; -- start_ignore drop table if exists t_outer; drop table if exists t_inner; -- end_ignore -- -- In some cases of a NOT EXISTS subquery, planner mistook one side of the -- predicate as a (derived or direct) attribute on the inner relation, and -- incorrectly decorrelated the subquery into a JOIN -- start_ignore drop table if exists foo; NOTICE: table "foo" does not exist, skipping drop table if exists bar; NOTICE: table "bar" does not exist, skipping create table foo(a, b) as (values (1, 'a'), (2, 'b')); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table bar(c, d) as (values (1, 'a'), (2, 'b')); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'c' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -- end_ignore select * from foo where not exists (select * from bar where foo.a + bar.c = 1); a | b ---+--- 1 | a 2 | b (2 rows) select * from foo where not exists (select * from bar where foo.b || bar.d = 'hola'); a | b ---+--- 1 | a 2 | b (2 rows) select * from foo where not exists (select * from bar where foo.a = foo.a + 1); a | b ---+--- 1 | a 2 | b (2 rows) select * from foo where not exists (select * from bar where foo.b = foo.b || 'a'); a | b ---+--- 1 | a 2 | b (2 rows) select * from foo where foo.a = (select min(bar.c) from bar where foo.b || bar.d = 'bb'); a | b ---+--- 2 | b (1 row) drop table foo, bar; -- -- Test subquery with rescan of RESULT node -- create table foo_rescan_result(a, b) as (values (1, 2), (1, 1)); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'column1' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table bar_rescan_result(a, b) as (values (1, 1)); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'column1' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. select * from foo_rescan_result t1 where (select count(*) from bar_rescan_result where t1.a=t1.b) > 0; a | b ---+--- 1 | 1 (1 row) -- -- subqueries with unnest in projectlist -- -- start_ignore DROP TABLE IF EXISTS A; NOTICE: table "a" does not exist, skipping CREATE TABLE A AS SELECT ARRAY[1,2,3] AS X; NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'x' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO A VALUES(NULL::int4[]); -- end_ignore SELECT (NOT EXISTS (SELECT UNNEST(X))) AS B FROM A; b --- f t (2 rows) SELECT (EXISTS (SELECT UNNEST(X))) AS B FROM A; b --- t f (2 rows) EXPLAIN SELECT (EXISTS (SELECT UNNEST(X))) AS B FROM A; QUERY PLAN ----------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1.02 rows=1 width=33) -> Seq Scan on a (cost=0.00..1.02 rows=1 width=33) SubPlan 1 -> Result (cost=0.00..0.01 rows=1 width=0) Settings: optimizer=off Optimizer status: Postgres query optimizer (6 rows) DROP TABLE A;