From 0387161bb2ae0ca95f0e872a44abaee497036d39 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Fri, 10 Aug 2018 18:29:46 -0700 Subject: [PATCH] Faithfully parse inheritance-recursion for external tables When SQL standard table inheritance was added in upstream (by commit 2fb6cc9045 in Postgres 7.1), mentioning a table in the FROM clause of a query would necessarily mean traversing through the inheritance hierarchy. The need to distinguish between the (legacy, less common, but legitimate nonetheless) intent of not recursing into child tables gave rise to two things: the guc `sql_inheritance` which toggles the default semantics of parent tables, and the `ONLY` keyword used in front of parent table names to explicitly skip descendant tables. ORCA doesn't like queries that skip descendant tables: it falls back to the legacy planner as soon as it detects that intent. Way way back in Greenplum-land, when external tables were given a separate designation in relstorage (RELSTORAGE_EXTERNAL), we seemed to have added code in parser (parse analysis) so that queries on external tables *never* recurse into their child tables, regardless of what the user specifies -- either via `ONLY` or `*` in the query, or via guc `sql_inheritance`. Technically, that process scrubs the range table entries to hard-code "do not recurse". The combination of those two things -- hard coding "do not recurse" in the RTE for the analyzed parse tree and ORCA detecting intent of `ONLY` through RTE -- led ORCA to *always* fall back to planner when an external table is mentioned in the FROM clause. commit 013a6e9dfb tried fixing this by *detecting harder* whether there's an external table. The behavior of the parse-analyzer hard coding a "do not recurse" in the RTE for an external table seems wrong for several reasons: 1. It seems unnecessarily defensive 2. It doesn't seem to belong in the parser. a. While changing "recurse" back to "do not recurse" abounds, all other occurrences happen in the planner as an optimization for childless tables. b. It deprives an optimizer of the actual intent expressed by the user: because of this hardcoding, neither ORCA nor planner would have a way of knowing whether the user specified `ONLY` in the query. c. It deprives the user of the ability to use child tables with an external table, either deliberately or coincidentally. d. A corollary is that any old views created as `SELECT a,b FROM ext_table` will be perpetuated as `SELECT a,b FROM ONLY ext_table`. This commit removes this defensive setting in the parse analyzer. As a consequence, we're able to reinstate the simpler RTE check before commit 013a6e9dfb. Queries and new views will include child tables as expected. Note that this commit will introduce a behavior change: (taken from https://github.com/greenplum-db/gpdb/pull/5455#issuecomment-412247709) 1. a (not external) table by default means "me and my descendants" even if it's childless 2. an external table with child tables previously would never recurse into child tables 2. after this patch you need to use ONLY to exclude descendant tables. (cherry picked from commit 2371cb3bba98860179268afe33065292ad1ba6c1) --- .../gpopt/translate/CTranslatorQueryToDXL.cpp | 16 ++++----- src/backend/parser/parse_relation.c | 3 -- .../regress/expected/qp_orca_fallback.out | 7 ++-- .../expected/qp_orca_fallback_optimizer.out | 36 +++++++++---------- src/test/regress/sql/qp_orca_fallback.sql | 5 ++- 5 files changed, 28 insertions(+), 39 deletions(-) diff --git a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp index 93ec2826c9..08947cb39a 100644 --- a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp @@ -2995,15 +2995,7 @@ CTranslatorQueryToDXL::TranslateRTEToDXLLogicalGet(const RangeTblEntry *rte, ULONG //current_query_level ) { - CDXLTableDescr *table_descr = CTranslatorUtils::GetTableDescr( - m_mp, m_md_accessor, m_colid_counter, rte, &m_has_distributed_tables); - - CDXLLogicalGet *dxlop = NULL; - const IMDRelation *md_rel = m_md_accessor->RetrieveRel(table_descr->MDId()); - - - if (false == rte->inh && - IMDRelation::ErelstorageExternal != md_rel->RetrieveRelStorageType()) + if (false == rte->inh) { GPOS_ASSERT(RTE_RELATION == rte->rtekind); // RangeTblEntry::inh is set to false iff there is ONLY in the FROM @@ -3013,6 +3005,12 @@ CTranslatorQueryToDXL::TranslateRTEToDXLLogicalGet(const RangeTblEntry *rte, } // construct table descriptor for the scan node from the range table entry + CDXLTableDescr *table_descr = CTranslatorUtils::GetTableDescr( + m_mp, m_md_accessor, m_colid_counter, rte, &m_has_distributed_tables); + + CDXLLogicalGet *dxlop = NULL; + const IMDRelation *md_rel = m_md_accessor->RetrieveRel(table_descr->MDId()); + if (IMDRelation::ErelstorageExternal == md_rel->RetrieveRelStorageType()) { dxlop = GPOS_NEW(m_mp) CDXLLogicalExternalGet(m_mp, table_descr); diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index 2e000ea7da..5ebb309278 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -822,9 +822,6 @@ addRangeTableEntry(ParseState *pstate, rte->alias = alias; rte->rtekind = RTE_RELATION; - /* external tables don't allow inheritance */ - if (RelationIsExternal(rel)) - inh = false; /* * Build the list of effective column names using user-supplied aliases diff --git a/src/test/regress/expected/qp_orca_fallback.out b/src/test/regress/expected/qp_orca_fallback.out index 40b08b5c68..9bb20e2c10 100644 --- a/src/test/regress/expected/qp_orca_fallback.out +++ b/src/test/regress/expected/qp_orca_fallback.out @@ -105,12 +105,8 @@ SELECT * FROM homer; 4 | 3 | 44 (4 rows) --- ORCA should not fallback when ONLY clause is used on external tables +-- ORCA should not fallback just because external tables are in FROM clause -- start_ignore -DROP TABLE IF EXISTS t1; -NOTICE: table "t1" does not exist, skipping -DROP TABLE IF EXISTS ext_table_no_fallback; -NOTICE: table "ext_table_no_fallback" does not exist, skipping CREATE TABLE heap_t1 (a int, b int) DISTRIBUTED BY (b); CREATE EXTERNAL TABLE ext_table_no_fallback (a int, b int) LOCATION ('gpfdist://myhost:8080/test.csv') FORMAT 'CSV'; -- end_ignore @@ -122,6 +118,7 @@ EXPLAIN SELECT * FROM ext_table_no_fallback; Optimizer: legacy query optimizer (3 rows) +-- ORCA should fallback due to ONLY keyword EXPLAIN SELECT * FROM ONLY ext_table_no_fallback; QUERY PLAN ----------------------------------------------------------------------------------------- diff --git a/src/test/regress/expected/qp_orca_fallback_optimizer.out b/src/test/regress/expected/qp_orca_fallback_optimizer.out index a7d2380e05..e9822cf99d 100644 --- a/src/test/regress/expected/qp_orca_fallback_optimizer.out +++ b/src/test/regress/expected/qp_orca_fallback_optimizer.out @@ -144,12 +144,8 @@ SELECT * FROM homer; 4 | 3 | 44 (4 rows) --- ORCA should not fallback when ONLY clause is used on external tables +-- ORCA should not fallback just because external tables are in FROM clause -- start_ignore -DROP TABLE IF EXISTS t1; -NOTICE: table "t1" does not exist, skipping -DROP TABLE IF EXISTS ext_table_no_fallback; -NOTICE: table "ext_table_no_fallback" does not exist, skipping CREATE TABLE heap_t1 (a int, b int) DISTRIBUTED BY (b); CREATE EXTERNAL TABLE ext_table_no_fallback (a int, b int) LOCATION ('gpfdist://myhost:8080/test.csv') FORMAT 'CSV'; -- end_ignore @@ -161,24 +157,26 @@ EXPLAIN SELECT * FROM ext_table_no_fallback; Optimizer: PQO version 2.67.0 (3 rows) +-- ORCA should fallback due to ONLY keyword EXPLAIN SELECT * FROM ONLY ext_table_no_fallback; - QUERY PLAN ---------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..472.74 rows=1000000 width=8) - -> External Scan on ext_table_no_fallback (cost=0.00..437.97 rows=333334 width=8) - Optimizer: PQO version 2.67.0 +INFO: GPORCA failed to produce a plan, falling back to planner + QUERY PLAN +----------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..11000.00 rows=1000000 width=8) + -> External Scan on ext_table_no_fallback (cost=0.00..11000.00 rows=333334 width=8) + Optimizer: legacy query optimizer (3 rows) EXPLAIN INSERT INTO heap_t1 SELECT * FROM ONLY ext_table_no_fallback; - QUERY PLAN ------------------------------------------------------------------------------------------------------ - Insert (cost=0.00..16080.27 rows=333334 width=8) - -> Result (cost=0.00..455.27 rows=333334 width=12) - -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..451.27 rows=333334 width=8) - Hash Key: ext_table_no_fallback.b - -> External Scan on ext_table_no_fallback (cost=0.00..437.97 rows=333334 width=8) - Optimizer: PQO version 2.67.0 -(6 rows) +INFO: GPORCA failed to produce a plan, falling back to planner + QUERY PLAN +------------------------------------------------------------------------------------------------- + Insert (slice0; segments: 3) (rows=333334 width=8) + -> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..11000.00 rows=333334 width=8) + Hash Key: ext_table_no_fallback.b + -> External Scan on ext_table_no_fallback (cost=0.00..11000.00 rows=333334 width=8) + Optimizer: legacy query optimizer +(5 rows) set optimizer_enable_dml=off; EXPLAIN INSERT INTO homer VALUES (1,0,40),(2,1,43),(3,2,41),(4,3,44); diff --git a/src/test/regress/sql/qp_orca_fallback.sql b/src/test/regress/sql/qp_orca_fallback.sql index 9637f0dc71..ad8965f0e0 100644 --- a/src/test/regress/sql/qp_orca_fallback.sql +++ b/src/test/regress/sql/qp_orca_fallback.sql @@ -75,14 +75,13 @@ SELECT * FROM homer; DELETE FROM ONLY homer WHERE a = 3; SELECT * FROM homer; --- ORCA should not fallback when ONLY clause is used on external tables +-- ORCA should not fallback just because external tables are in FROM clause -- start_ignore -DROP TABLE IF EXISTS t1; -DROP TABLE IF EXISTS ext_table_no_fallback; CREATE TABLE heap_t1 (a int, b int) DISTRIBUTED BY (b); CREATE EXTERNAL TABLE ext_table_no_fallback (a int, b int) LOCATION ('gpfdist://myhost:8080/test.csv') FORMAT 'CSV'; -- end_ignore EXPLAIN SELECT * FROM ext_table_no_fallback; +-- ORCA should fallback due to ONLY keyword EXPLAIN SELECT * FROM ONLY ext_table_no_fallback; EXPLAIN INSERT INTO heap_t1 SELECT * FROM ONLY ext_table_no_fallback; -- GitLab