-- Test using an external table in a subquery. -- -- We used to have a bug where the scan on the external table was not -- broadcast to all nodes, so each segment scanned only its own portion -- of the external table, when the scan was in a subquery. In that case, -- the count(*) calculated for each value below was 1, but it should be -- equal to the number of segments, because this external table produces -- the same rows on every segment. CREATE EXTERNAL WEB TABLE echotable (c1 int, c2 int, c3 int) EXECUTE 'echo "1,2,3"; echo "4,5,6";' FORMAT 'TEXT' (DELIMITER ','); create table test_ext_foo (c1 int, c2 int4); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into test_ext_foo select g, g from generate_series(1, 20) g; -- This should return 2 and 5, as the two rows are duplicated in -- every segment (assuming you have at least two segments in your -- cluster). select c2 from echotable group by c2 having count(*) >= 2; c2 ---- 2 5 (2 rows) select * from test_ext_foo as o where (select count(*) from echotable as i where i.c2 = o.c2) >= 2; c1 | c2 ----+---- 5 | 5 2 | 2 (2 rows) -- Planner test to make sure the initplan is not removed for function scan -- VACUUM FULL: To generate a deterministic plan for the query below. VACUUM FULL pg_database; VACUUM FULL pg_authid; explain (costs off) select sess_id from pg_stat_activity where query = (select current_query()) and usename='xxx' and datname='xxx'; QUERY PLAN ----------------------------------------------------------- Hash Join Hash Cond: (s.usesysid = u.oid) InitPlan 1 (returns $0) -> Result -> Hash Join Hash Cond: (s.datid = d.oid) -> Function Scan on pg_stat_get_activity s Filter: (query = $0) -> Hash -> Seq Scan on pg_database d Filter: (datname = 'xxx'::name) -> Hash -> Seq Scan on pg_authid u Filter: (rolname = 'xxx'::name) Optimizer: Postgres query optimizer (15 rows) -- Planner test: constant folding in subplan testexpr produces no error create table subselect_t1 (a int, b int, c int) distributed by (a); create table subselect_t2 (a int, b int, c int) distributed by (a); insert into subselect_t1 values (1,1,1); insert into subselect_t2 values (1,1,1); select * from subselect_t1 where NULL in (select c from subselect_t2); a | b | c ---+---+--- (0 rows) select * from subselect_t1 where NULL in (select c from subselect_t2) and exists (select generate_series(1,2)); a | b | c ---+---+--- (0 rows) -- Planner test to make sure initplan is removed when no param is used select * from subselect_t2 where false and exists (select generate_series(1,2)); a | b | c ---+---+--- (0 rows)