From cbb8ea180cf3e6d261032fec8ef88027ac3d9f4a Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 4 Sep 2017 12:13:01 +0300 Subject: [PATCH] Share external URL-mapping code between planner and ORCA. Planner and ORCA translator both implemented the same logic, to assign external table URIs to segments. But I spotted one case where the logic differed: CREATE EXTERNAL TABLE exttab_with_on_master( i int, j text ) LOCATION ('file://@hostname@@abs_srcdir@/data/exttab_few_errors.data') ON MASTER FORMAT 'TEXT' (DELIMITER '|'); SELECT * FROM exttab_with_on_master; ERROR: 'ON MASTER' is not supported by this protocol yet. With ORCA you got a less user-friendly error: set optimizer=on; set optimizer_enable_master_only_queries = on; postgres=# explain SELECT * FROM exttab_with_on_master; ERROR: External scan error: Could not assign a segment database for external file (CTranslatorDXLToPlStmt.cpp:472) The immediate cause of that was that commit fcf82234e7 didn't remember to modify the ORCA translator's copy of the same logic. But really, it's silly and error-prone to duplicate the code, so modify ORCA to use the same code that the planner does. --- src/backend/gpopt/gpdbwrappers.cpp | 15 + .../translate/CTranslatorDXLToPlStmt.cpp | 613 +----------------- src/backend/nodes/outfuncs.c | 11 +- src/backend/optimizer/plan/createplan.c | 531 +++++++-------- src/backend/optimizer/util/plancat.c | 15 +- src/backend/optimizer/util/relnode.c | 11 +- src/include/gpopt/gpdbwrappers.h | 3 + .../gpopt/translate/CTranslatorDXLToPlStmt.h | 82 --- src/include/gpopt/utils/gpdbdefs.h | 1 + src/include/nodes/relation.h | 11 +- src/include/optimizer/planmain.h | 3 + 11 files changed, 303 insertions(+), 993 deletions(-) diff --git a/src/backend/gpopt/gpdbwrappers.cpp b/src/backend/gpopt/gpdbwrappers.cpp index 753685ea13..fc2da42da2 100644 --- a/src/backend/gpopt/gpdbwrappers.cpp +++ b/src/backend/gpopt/gpdbwrappers.cpp @@ -2463,6 +2463,21 @@ gpdb::Pexttable return NULL; } +List * +gpdb::PlExternalScanUriList + ( + ExtTableEntry *ext, + bool *ismasteronlyp + ) +{ + GP_WRAP_START; + { + return create_external_scan_uri_list(ext, ismasteronlyp); + } + GP_WRAP_END; + return NULL; +} + TargetEntry * gpdb::PteMember ( diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp index 640a8aee48..dc3908e6fc 100644 --- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp +++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp @@ -409,613 +409,6 @@ CTranslatorDXLToPlStmt::SetParamIds(Plan* pplan) } -//--------------------------------------------------------------------------- -// @function: -// CTranslatorDXLToPlStmt::MapLocationsFile -// -// @doc: -// Segment mapping for tables with LOCATION http:// or file:// -// These two protocols are very similar in that they enforce a 1-URI:1-segdb -// relationship. The only difference between them is that file:// URI must -// be assigned to a segdb on a host that is local to that URI. -// -//--------------------------------------------------------------------------- -void -CTranslatorDXLToPlStmt::MapLocationsFile - ( - OID oidRel, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB - ) -{ - // extract file path and name from URI strings and assign them a primary segdb - - ExtTableEntry *extentry = gpdb::Pexttable(oidRel); - - ListCell *plcLocation = NULL; - ForEach (plcLocation, extentry->urilocations) - { - Value* pvLocation = (Value *)lfirst(plcLocation); - CHAR *szUri = pvLocation->val.str; - - Uri *pUri = gpdb::PuriParseExternalTable(szUri); - - BOOL fCandidateFound = false; - BOOL fMatchFound = false; - - // try to find a segment database that can handle this uri - for (int i = 0; i < pcdbCompDB->total_segment_dbs && !fMatchFound; i++) - { - CdbComponentDatabaseInfo *pcdbCompDBInfo = &pcdbCompDB->segment_db_info[i]; - INT iSegInd = pcdbCompDBInfo->segindex; - if (SEGMENT_IS_ACTIVE_PRIMARY(pcdbCompDBInfo)) - { - if (URI_FILE == pUri->protocol && - 0 != gpdb::IStrCmpIgnoreCase(pUri->hostname, pcdbCompDBInfo->hostname) && - 0 != gpdb::IStrCmpIgnoreCase(pUri->hostname, pcdbCompDBInfo->address)) - { - continue; - } - - fCandidateFound = true; - if (NULL == rgszSegFileMap[iSegInd]) - { - rgszSegFileMap[iSegInd] = PStrDup(szUri); - fMatchFound = true; - } - } - } - - if (!fMatchFound) - { - GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXL2PlStmtExternalScanError, - GPOS_WSZ_LIT("Could not assign a segment database for external file")); - } - } -} - -//--------------------------------------------------------------------------- -// @function: -// CTranslatorDXLToPlStmt::MapLocationsFdist -// -// @doc: -// Segment mapping for tables with LOCATION gpfdist(s):// or custom protocol -// The user supplied gpfdist(s):// URIs are duplicated so that there is one -// available to every segdb. However, in some cases (as determined by -// gp_external_max_segs GUC) we don't want to use *all* segdbs but instead -// figure out how many and pick them randomly (for better performance) -// -//--------------------------------------------------------------------------- -void -CTranslatorDXLToPlStmt::MapLocationsFdist - ( - OID oidRel, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB, - Uri *pUri, - const ULONG ulTotalPrimaries - ) -{ - ULONG ulParticipatingSegments = ulTotalPrimaries; - ULONG ulMaxParticipants = ulParticipatingSegments; - - ExtTableEntry *extentry = gpdb::Pexttable(oidRel); - - const ULONG ulLocations = gpdb::UlListLength(extentry->urilocations); - if (URI_GPFDIST == pUri->protocol || URI_GPFDISTS == pUri->protocol) - { - ulMaxParticipants = ulLocations * gp_external_max_segs; - } - - ULONG ulSkip = 0; - BOOL fSkipRandomly = false; - if (ulParticipatingSegments > ulMaxParticipants) - { - ulSkip = ulParticipatingSegments - ulMaxParticipants; - ulParticipatingSegments = ulMaxParticipants; - fSkipRandomly = true; - } - - if (ulLocations > ulParticipatingSegments) - { - // This should match the same error in createplan.c - char msgbuf[200]; - - snprintf(msgbuf, sizeof(msgbuf), - "There are more external files (URLs) than primary segments that can read them. Found %d URLs and %d primary segments.", - ulLocations, ulParticipatingSegments); - - GpdbEreport(ERRCODE_INVALID_TABLE_DEFINITION, // errcode - ERROR, - msgbuf, // errmsg - NULL); // errhint - } - - BOOL fDone = false; - List *plModifiedLocations = NIL; - ULONG ulModifiedLocations = 0; - while (!fDone) - { - ListCell *plcLocation = NULL; - ForEach (plcLocation, extentry->urilocations) - { - Value* pvLocation = (Value *)lfirst(plcLocation); - CHAR *szUri = pvLocation->val.str; - plModifiedLocations = gpdb::PlAppendElement(plModifiedLocations, gpdb::PvalMakeString(szUri)); - ulModifiedLocations ++; - - if (ulModifiedLocations == ulParticipatingSegments) - { - fDone = true; - break; - } - - if (ulModifiedLocations > ulParticipatingSegments) - { - GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXL2PlStmtExternalScanError, - GPOS_WSZ_LIT("External scan location list failed building distribution")); - } - } - } - - BOOL *rgfSkipMap = NULL; - if (fSkipRandomly) - { - rgfSkipMap = gpdb::RgfRandomSegMap(ulTotalPrimaries, ulSkip); - } - - // assign each URI from the new location list a primary segdb - ListCell *plc = NULL; - ForEach (plc, plModifiedLocations) - { - const CHAR *szUri = (CHAR *) strVal(lfirst(plc)); - - BOOL fCandidateFound = false; - BOOL fMatchFound = false; - - for (int i = 0; i < pcdbCompDB->total_segment_dbs && !fMatchFound; i++) - { - CdbComponentDatabaseInfo *pcdbCompDBInfo = &pcdbCompDB->segment_db_info[i]; - INT iSegInd = pcdbCompDBInfo->segindex; - - if (SEGMENT_IS_ACTIVE_PRIMARY(pcdbCompDBInfo)) - { - if (fSkipRandomly) - { - GPOS_ASSERT(iSegInd < (INT) ulTotalPrimaries); - if (rgfSkipMap[iSegInd]) - { - continue; - } - } - - fCandidateFound = true; - if (NULL == rgszSegFileMap[iSegInd]) - { - rgszSegFileMap[iSegInd] = PStrDup(szUri); - fMatchFound = true; - } - } - } - - if (!fMatchFound) - { - GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXL2PlStmtExternalScanError, - GPOS_WSZ_LIT("Unable to assign segments for gpfdist(s)")); - } - } -} - -//--------------------------------------------------------------------------- -// @function: -// CTranslatorDXLToPlStmt::MapLocationsExecute -// -// @doc: -// Segment mapping for tables with EXECUTE 'cmd' ON. -// In here we don't have URI's. We have a single command string and a -// specification of the segdb granularity it should get executed on (the -// ON clause). Depending on the ON clause specification we could go many -// different ways, for example: assign the command to all segdb, or one -// command per host, or assign to 5 random segments, etc... -// -//--------------------------------------------------------------------------- -void -CTranslatorDXLToPlStmt::MapLocationsExecute - ( - OID oidRel, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB, - const ULONG ulTotalPrimaries - ) -{ - ExtTableEntry *extentry = gpdb::Pexttable(oidRel); - CHAR *szCommand = extentry->command; - const CHAR *szPrefix = "execute:"; - - StringInfo si = gpdb::SiMakeStringInfo(); - gpdb::AppendStringInfo(si, szPrefix, szCommand); - CHAR *szPrefixedCommand = PStrDup(si->data); - - gpdb::GPDBFree(si->data); - gpdb::GPDBFree(si); - si = NULL; - - // get the ON clause (execute location) information - Value *pvOnClause = (Value *) gpdb::PvListNth(extentry->execlocations, 0); - CHAR *szOnClause = pvOnClause->val.str; - - if (0 == gpos::clib::IStrCmp(szOnClause, "ALL_SEGMENTS")) - { - MapLocationsExecuteAllSegments(szPrefixedCommand, rgszSegFileMap, pcdbCompDB); - } - else if (0 == gpos::clib::IStrCmp(szOnClause, "PER_HOST")) - { - MapLocationsExecutePerHost(szPrefixedCommand, rgszSegFileMap, pcdbCompDB); - } - else if (0 == gpos::clib::IStrNCmp(szOnClause, "HOST:", gpos::clib::UlStrLen("HOST:"))) - { - CHAR *szHostName = szOnClause + gpos::clib::UlStrLen("HOST:"); - MapLocationsExecuteOneHost(szHostName, szPrefixedCommand, rgszSegFileMap, pcdbCompDB); - } - else if (0 == gpos::clib::IStrNCmp(szOnClause, "SEGMENT_ID:", gpos::clib::UlStrLen("SEGMENT_ID:"))) - { - CHAR *pcEnd = NULL; - INT iTargetSegInd = (INT) gpos::clib::LStrToL(szOnClause + gpos::clib::UlStrLen("SEGMENT_ID:"), &pcEnd, 10); - MapLocationsExecuteOneSegment(iTargetSegInd, szPrefixedCommand, rgszSegFileMap, pcdbCompDB); - } - else if (0 == gpos::clib::IStrNCmp(szOnClause, "TOTAL_SEGS:", gpos::clib::UlStrLen("TOTAL_SEGS:"))) - { - // total n segments selected randomly - CHAR *pcEnd = NULL; - ULONG ulSegsToUse = gpos::clib::LStrToL(szOnClause + gpos::clib::UlStrLen("TOTAL_SEGS:"), &pcEnd, 10); - - MapLocationsExecuteRandomSegments(ulSegsToUse, ulTotalPrimaries, szPrefixedCommand, rgszSegFileMap, pcdbCompDB); - } - else if (0 == gpos::clib::IStrCmp(szOnClause, "MASTER_ONLY")) - { - rgszSegFileMap[0] = PStrDup(szPrefixedCommand); - } - else - { - GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXL2PlStmtExternalScanError, - GPOS_WSZ_LIT("Invalid ON clause")); - } - - gpdb::GPDBFree(szPrefixedCommand); -} - -//--------------------------------------------------------------------------- -// @function: -// CTranslatorDXLToPlStmt::MapLocationsExecuteAllSegments -// -// @doc: -// Segment mapping for tables with EXECUTE 'cmd' on all segments -// -//--------------------------------------------------------------------------- -void -CTranslatorDXLToPlStmt::MapLocationsExecuteAllSegments - ( - CHAR *szPrefixedCommand, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB - ) -{ - for (int i = 0; i < pcdbCompDB->total_segment_dbs; i++) - { - CdbComponentDatabaseInfo *pcdbCompDBInfo = &pcdbCompDB->segment_db_info[i]; - INT iSegInd = pcdbCompDBInfo->segindex; - if (SEGMENT_IS_ACTIVE_PRIMARY(pcdbCompDBInfo)) - { - rgszSegFileMap[iSegInd] = PStrDup(szPrefixedCommand); - } - } -} - -//--------------------------------------------------------------------------- -// @function: -// CTranslatorDXLToPlStmt::MapLocationsExecutePerHost -// -// @doc: -// Segment mapping for tables with EXECUTE 'cmd' per host -// -//--------------------------------------------------------------------------- -void -CTranslatorDXLToPlStmt::MapLocationsExecutePerHost - ( - CHAR *szPrefixedCommand, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB - ) -{ - List *plVisitedHosts = NIL; - for (int i = 0; i < pcdbCompDB->total_segment_dbs; i++) - { - CdbComponentDatabaseInfo *pcdbCompDBInfo = &pcdbCompDB->segment_db_info[i]; - INT iSegInd = pcdbCompDBInfo->segindex; - if (SEGMENT_IS_ACTIVE_PRIMARY(pcdbCompDBInfo)) - { - BOOL fHostTaken = false; - ListCell *plc = NULL; - ForEach (plc, plVisitedHosts) - { - const CHAR *szHostName = (CHAR *) strVal(lfirst(plc)); - if (0 == gpdb::IStrCmpIgnoreCase(szHostName, pcdbCompDBInfo->hostname)) - { - fHostTaken = true; - break; - } - } - - if (!fHostTaken) - { - rgszSegFileMap[iSegInd] = PStrDup(szPrefixedCommand); - plVisitedHosts = gpdb::PlAppendElement - ( - plVisitedHosts, - gpdb::PvalMakeString(PStrDup(pcdbCompDBInfo->hostname)) - ); - } - } - } -} - -//--------------------------------------------------------------------------- -// @function: -// CTranslatorDXLToPlStmt::MapLocationsExecuteOneHost -// -// @doc: -// Segment mapping for tables with EXECUTE 'cmd' on a given host -// -//--------------------------------------------------------------------------- -void -CTranslatorDXLToPlStmt::MapLocationsExecuteOneHost - ( - CHAR *szHostName, - CHAR *szPrefixedCommand, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB - ) -{ - BOOL fMatchFound = false; - for (int i = 0; i < pcdbCompDB->total_segment_dbs; i++) - { - CdbComponentDatabaseInfo *pcdbCompDBInfo = &pcdbCompDB->segment_db_info[i]; - INT iSegInd = pcdbCompDBInfo->segindex; - - if (SEGMENT_IS_ACTIVE_PRIMARY(pcdbCompDBInfo) && - 0 == gpdb::IStrCmpIgnoreCase(szHostName, pcdbCompDBInfo->hostname)) - { - rgszSegFileMap[iSegInd] = PStrDup(szPrefixedCommand); - fMatchFound = true; - } - } - - if (!fMatchFound) - { - GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXL2PlStmtExternalScanError, - GPOS_WSZ_LIT("Could not assign a segment database for given command. No valid primary segment was found in the requested host name.")); - } -} - -//--------------------------------------------------------------------------- -// @function: -// CTranslatorDXLToPlStmt::MapLocationsExecuteOneSegment -// -// @doc: -// Segment mapping for tables with EXECUTE 'cmd' on a given segment -// -//--------------------------------------------------------------------------- -void -CTranslatorDXLToPlStmt::MapLocationsExecuteOneSegment - ( - INT iTargetSegInd, - CHAR *szPrefixedCommand, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB - ) -{ - BOOL fMatchFound = false; - for (int i = 0; i < pcdbCompDB->total_segment_dbs; i++) - { - CdbComponentDatabaseInfo *pcdbCompDBInfo = &pcdbCompDB->segment_db_info[i]; - INT iSegInd = pcdbCompDBInfo->segindex; - if (SEGMENT_IS_ACTIVE_PRIMARY(pcdbCompDBInfo) && iSegInd == iTargetSegInd) - { - rgszSegFileMap[iSegInd] = PStrDup(szPrefixedCommand); - fMatchFound = true; - } - } - - if(!fMatchFound) - { - GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXL2PlStmtExternalScanError, - GPOS_WSZ_LIT("Could not assign a segment database for given command. The requested segment id is not a valid primary segment.")); - } -} - -//--------------------------------------------------------------------------- -// @function: -// CTranslatorDXLToPlStmt::MapLocationsExecuteRandomSegments -// -// @doc: -// Segment mapping for tables with EXECUTE 'cmd' on N random segments -// -//--------------------------------------------------------------------------- -void -CTranslatorDXLToPlStmt::MapLocationsExecuteRandomSegments - ( - ULONG ulSegments, - const ULONG ulTotalPrimaries, - CHAR *szPrefixedCommand, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB - ) -{ - if (ulSegments > ulTotalPrimaries) - { - GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXL2PlStmtExternalScanError, - GPOS_WSZ_LIT("More segments in table definition than valid primary segments in the database.")); - } - - ULONG ulSkip = ulTotalPrimaries - ulSegments; - BOOL *rgfSkipMap = gpdb::RgfRandomSegMap(ulTotalPrimaries, ulSkip); - - for (int i = 0; i < pcdbCompDB->total_segment_dbs; i++) - { - CdbComponentDatabaseInfo *pcdbCompDBInfo = &pcdbCompDB->segment_db_info[i]; - INT iSegInd = pcdbCompDBInfo->segindex; - if (SEGMENT_IS_ACTIVE_PRIMARY(pcdbCompDBInfo)) - { - GPOS_ASSERT(iSegInd < (INT) ulTotalPrimaries); - if (rgfSkipMap[iSegInd]) - { - continue; - } - rgszSegFileMap[iSegInd] = PStrDup(szPrefixedCommand); - } - } -} - -//--------------------------------------------------------------------------- -// @function: -// CTranslatorDXLToPlStmt::MapLocationsHdfs -// -// @doc: -// Segment mapping for tables with LOCATION gphdfs:// -// The file chuck division and assignment will be done in the external -// Java program. We simply assign the location to all the segdbs. -// -//--------------------------------------------------------------------------- -void -CTranslatorDXLToPlStmt::MapLocationsHdfs - ( - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB, - CHAR *szFirstUri - ) -{ - for (int i = 0; i < pcdbCompDB->total_segment_dbs; i++) - { - CdbComponentDatabaseInfo *pcdbCompDBInfo = &pcdbCompDB->segment_db_info[i]; - rgszSegFileMap[pcdbCompDBInfo->segindex] = PStrDup(szFirstUri); - } -} - -//--------------------------------------------------------------------------- -// @function: -// CTranslatorDXLToPlStmt::PlExternalScanUriList -// -// @doc: -// List of URIs for external scan -// -//--------------------------------------------------------------------------- -List* -CTranslatorDXLToPlStmt::PlExternalScanUriList - ( - OID oidRel - ) -{ - ExtTableEntry *extentry = gpdb::Pexttable(oidRel); - - if (extentry->iswritable) - { - // This should match the same error in createplan.c - GpdbEreport(ERRCODE_WRONG_OBJECT_TYPE, // errcode - ERROR, - "cannot read from a WRITABLE external table", // errmsg - "Create the table as READABLE instead."); // errhint - } - - //get the total valid primary segdb count - CdbComponentDatabases *pcdbCompDB = gpdb::PcdbComponentDatabases(); - ULONG ulTotalPrimaries = 0; - for (int i = 0; i < pcdbCompDB->total_segment_dbs; i++) - { - CdbComponentDatabaseInfo *pcdbCompDBInfo = &pcdbCompDB->segment_db_info[i]; - if (SEGMENT_IS_ACTIVE_PRIMARY(pcdbCompDBInfo)) - { - ulTotalPrimaries++; - } - } - - char **rgszSegFileMap = NULL; - rgszSegFileMap = (char **) gpdb::GPDBAlloc(ulTotalPrimaries * sizeof(char *)); - gpos::clib::PvMemSet(rgszSegFileMap, 0, ulTotalPrimaries * sizeof(char *)); - - // is this an EXECUTE table or a LOCATION (URI) table - BOOL fUsingExecute = false; - BOOL fUsingLocation = false; - const CHAR *szCommand = extentry->command; - if (NULL != szCommand) - { - if (!gp_external_enable_exec) - { - // This should match the same error in createplan.c - GpdbEreport(ERRCODE_GP_FEATURE_NOT_CONFIGURED, // errcode - ERROR, - "Using external tables with OS level commands (EXECUTE clause) is disabled", // errmsg - "To enable set gp_external_enable_exec=on"); // errhint - } - fUsingExecute = true; - } - else - { - fUsingLocation = true; - } - - GPOS_ASSERT(0 < gpdb::UlListLength(extentry->urilocations)); - - CHAR *szFirstUri = NULL; - Uri *pUri = NULL; - if (!fUsingExecute) - { - szFirstUri = ((Value *) gpdb::PvListNth(extentry->urilocations, 0))->val.str; - pUri = gpdb::PuriParseExternalTable(szFirstUri); - } - - if (fUsingLocation && (URI_FILE == pUri->protocol || URI_HTTP == pUri->protocol)) - { - MapLocationsFile(oidRel, rgszSegFileMap, pcdbCompDB); - } - else if (fUsingLocation && (URI_GPFDIST == pUri->protocol || URI_GPFDISTS == pUri->protocol || URI_CUSTOM == pUri->protocol)) - { - MapLocationsFdist(oidRel, rgszSegFileMap, pcdbCompDB, pUri, ulTotalPrimaries); - } - else if (fUsingExecute) - { - MapLocationsExecute(oidRel, rgszSegFileMap, pcdbCompDB, ulTotalPrimaries); - } - else if (fUsingLocation && URI_GPHDFS == pUri->protocol) - { - MapLocationsHdfs(rgszSegFileMap, pcdbCompDB, szFirstUri); - } - else - { - GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXL2PlStmtExternalScanError, GPOS_WSZ_LIT("Unsupported protocol and/or file location")); - } - - // convert array map to a list so it can be serialized as part of the plan - List *plFileNames = NIL; - for (ULONG ul = 0; ul < ulTotalPrimaries; ul++) - { - Value *pval = NULL; - if (NULL != rgszSegFileMap[ul]) - { - pval = gpdb::PvalMakeString(rgszSegFileMap[ul]); - } - else - { - // no file for this segdb. add a null entry - pval = MakeNode(Value); - pval->type = T_Null; - } - plFileNames = gpdb::PlAppendElement(plFileNames, pval); - } - - return plFileNames; -} - - //--------------------------------------------------------------------------- // @function: // CTranslatorDXLToPlStmt::PtsFromDXLTblScan @@ -1055,15 +448,17 @@ CTranslatorDXLToPlStmt::PtsFromDXLTblScan const IMDRelationExternal *pmdrelext = dynamic_cast(pmdrel); OID oidRel = CMDIdGPDB::PmdidConvert(pmdrel->Pmdid())->OidObjectId(); ExtTableEntry *pextentry = gpdb::Pexttable(oidRel); + bool isMasterOnly; // create external scan node ExternalScan *pes = MakeNode(ExternalScan); pes->scan.scanrelid = iRel; - pes->uriList = PlExternalScanUriList(oidRel); + pes->uriList = gpdb::PlExternalScanUriList(pextentry, &isMasterOnly); Value *pval = gpdb::PvalMakeString(pextentry->fmtopts); pes->fmtOpts = ListMake1(pval); pes->fmtType = pextentry->fmtcode; - pes->isMasterOnly = (IMDRelation::EreldistrMasterOnly == pmdrelext->Ereldistribution()); + pes->isMasterOnly = isMasterOnly; + GPOS_ASSERT((IMDRelation::EreldistrMasterOnly == pmdrelext->Ereldistribution()) == isMasterOnly); pes->rejLimit = pmdrelext->IRejectLimit(); pes->rejLimitInRows = pmdrelext->FRejLimitInRows(); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 4fca0be3b8..c276779a5f 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2044,16 +2044,7 @@ _outRelOptInfo(StringInfo str, RelOptInfo *node) WRITE_UINT_FIELD(pages); WRITE_FLOAT_FIELD(tuples, "%.0f"); WRITE_NODE_FIELD(subplan); - WRITE_NODE_FIELD(urilocationlist); - WRITE_NODE_FIELD(execlocationlist); - WRITE_STRING_FIELD(execcommand); - WRITE_CHAR_FIELD(fmttype); - WRITE_STRING_FIELD(fmtopts); - WRITE_INT_FIELD(rejectlimit); - WRITE_CHAR_FIELD(rejectlimittype); - WRITE_OID_FIELD(fmterrtbl); - WRITE_INT_FIELD(ext_encoding); - WRITE_BOOL_FIELD(writable); + /* no output function for ExtTableEntry */ WRITE_NODE_FIELD(subrtable); WRITE_NODE_FIELD(baserestrictinfo); WRITE_NODE_FIELD(joininfo); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 46c175c4ae..2ad68d182b 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -20,6 +20,7 @@ #include +#include "catalog/pg_exttable.h" #include "catalog/pg_type.h" /* INT8OID */ #include "access/skey.h" #include "nodes/makefuncs.h" @@ -1197,36 +1198,13 @@ create_externalscan_plan(PlannerInfo *root, Path *best_path, ExternalScan *scan_plan; Index scan_relid = best_path->parent->relid; RelOptInfo *rel = best_path->parent; - CdbComponentDatabases *db_info; - Uri *uri = NULL; - List *filenames = NIL; - List *fmtopts = NIL; - List *modifiedloclist = NIL; - ListCell *c = NULL; - char **segdb_file_map = NULL; - char *first_uri_str = NULL; + List *filenames; bool ismasteronly = false; bool islimitinrows = false; int rejectlimit = -1; - int encoding = -1; - int total_primaries = 0; - int i; Oid fmtErrTblOid = InvalidOid; - char *on_clause = NULL; - - /* various processing flags */ - bool using_execute = false; /* true if EXECUTE is used */ - bool using_location = false; /* true if LOCATION is used */ - bool found_candidate = false; - bool found_match = false; - bool done = false; - - /* gpfdist(s) or EXECUTE specific variables */ - int total_to_skip = 0; - int max_participants_allowed = 0; - int num_segs_participating = 0; - bool *skip_map = NULL; - bool should_skip_randomly = false; + ExtTableEntry *ext = rel->extEntry; + List *fmtopts; /* it should be an external rel... */ Assert(scan_relid > 0); @@ -1238,30 +1216,84 @@ create_externalscan_plan(PlannerInfo *root, Path *best_path, /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ scan_clauses = extract_actual_clauses(scan_clauses, false); - /* get the total valid primary segdb count */ - db_info = getCdbComponentDatabases(); - for (i = 0; i < db_info->total_segment_dbs; i++) + Assert(ext->execlocations != NIL); + + if (ext->rejectlimit != -1) { - CdbComponentDatabaseInfo *p = &db_info->segment_db_info[i]; + /* + * single row error handling is requested, make sure reject limit and + * error table (if requested) are valid. + * + * NOTE: this should never happen unless somebody modified the catalog + * manually. We are just being pedantic here. + */ + VerifyRejectLimit(ext->rejectlimittype, ext->rejectlimit); + } - if (SEGMENT_IS_ACTIVE_PRIMARY(p)) - total_primaries++; + /* assign Uris to segments. */ + filenames = create_external_scan_uri_list(ext, &ismasteronly); + + /* data format description */ + Assert(ext->fmtopts); + fmtopts = list_make1(makeString(pstrdup(ext->fmtopts))); + + /* single row error handling */ + if (ext->rejectlimit != -1) + { + islimitinrows = (ext->rejectlimittype == 'r' ? true : false); + rejectlimit = ext->rejectlimit; + fmtErrTblOid = ext->fmterrtbl; } - /* - * initialize a file-to-segdb mapping. segdb_file_map string array indexes - * segindex and the entries are the external file path is assigned to this - * segment datbase. For example if segdb_file_map[2] has "/tmp/emp.1" then - * this file is assigned to primary segdb 2. if an entry has NULL then - * that segdb isn't assigned any file. - */ - segdb_file_map = (char **) palloc(total_primaries * sizeof(char *)); - MemSet(segdb_file_map, 0, total_primaries * sizeof(char *)); + scan_plan = make_externalscan(tlist, + scan_clauses, + scan_relid, + filenames, + fmtopts, + ext->fmtcode, + ismasteronly, + rejectlimit, + islimitinrows, + fmtErrTblOid, + ext->encoding); + + copy_path_costsize(root, &scan_plan->scan.plan, best_path); + + return scan_plan; +} + +List * +create_external_scan_uri_list(ExtTableEntry *ext, bool *ismasteronly) +{ + ListCell *c; + List *modifiedloclist = NIL; + int i; + CdbComponentDatabases *db_info; + int total_primaries; + char **segdb_file_map; + + /* various processing flags */ + bool using_execute = false; /* true if EXECUTE is used */ + bool using_location; /* true if LOCATION is used */ + bool found_candidate = false; + bool found_match = false; + bool done = false; + List *filenames; + + /* gpfdist(s) or EXECUTE specific variables */ + int total_to_skip = 0; + int max_participants_allowed = 0; + int num_segs_participating = 0; + bool *skip_map = NULL; + bool should_skip_randomly = false; - Assert(rel->execlocationlist != NIL); + Uri *uri; + char *on_clause; + + *ismasteronly = false; /* is this an EXECUTE table or a LOCATION (URI) table */ - if (rel->execcommand) + if (ext->command) { using_execute = true; using_location = false; @@ -1272,52 +1304,65 @@ create_externalscan_plan(PlannerInfo *root, Path *best_path, using_location = true; } - /* various validations */ - - if (rel->writable) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("cannot read from a WRITABLE external table"), - errhint("Create the table as READABLE instead."))); - - - if (rel->rejectlimit != -1) + /* is this an EXECUTE table or a LOCATION (URI) table */ + if (ext->command && !gp_external_enable_exec) { - /* - * single row error handling is requested, make sure reject limit and - * error table (if requested) are valid. - * - * NOTE: this should never happen unless somebody modified the catalog - * manually. We are just being pedantic here. - */ - VerifyRejectLimit(rel->rejectlimittype, rel->rejectlimit); - } - - if (using_execute && !gp_external_enable_exec) ereport(ERROR, (errcode(ERRCODE_GP_FEATURE_NOT_CONFIGURED), /* any better errcode? */ errmsg("Using external tables with OS level commands " "(EXECUTE clause) is disabled"), errhint("To enable set gp_external_enable_exec=on"))); + } + + /* various validations */ + if (ext->iswritable) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot read from a WRITABLE external table"), + errhint("Create the table as READABLE instead."))); /* * take a peek at the first URI so we know which protocol we'll deal with */ if (!using_execute) { - first_uri_str = (char *) strVal(lfirst(list_head(rel->urilocationlist))); + char *first_uri_str; + + first_uri_str = strVal(linitial(ext->urilocations)); uri = ParseExternalTableUri(first_uri_str); } + else + uri = NULL; /* get the ON clause information, and restrict 'ON MASTER' to custom * protocols only */ - on_clause = (char *) strVal(lfirst(list_head(rel->execlocationlist))); + on_clause = (char *) strVal(linitial(ext->execlocations)); if ((strcmp(on_clause, "MASTER_ONLY") == 0) && using_location && (uri->protocol != URI_CUSTOM)) { ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("\'ON MASTER\' is not supported by this protocol yet."))); } + /* get the total valid primary segdb count */ + db_info = getCdbComponentDatabases(); + total_primaries = 0; + for (i = 0; i < db_info->total_segment_dbs; i++) + { + CdbComponentDatabaseInfo *p = &db_info->segment_db_info[i]; + + if (SEGMENT_IS_ACTIVE_PRIMARY(p)) + total_primaries++; + } + + /* + * initialize a file-to-segdb mapping. segdb_file_map string array indexes + * segindex and the entries are the external file path is assigned to this + * segment datbase. For example if segdb_file_map[2] has "/tmp/emp.1" then + * this file is assigned to primary segdb 2. if an entry has NULL then + * that segdb isn't assigned any file. + */ + segdb_file_map = (char **) palloc0(total_primaries * sizeof(char *)); + /* * Now we do the actual assignment of work to the segment databases (where * work is either a URI to open or a command to execute). Due to the big @@ -1367,7 +1412,7 @@ create_externalscan_plan(PlannerInfo *root, Path *best_path, * extract file path and name from URI strings and assign them a * primary segdb */ - foreach(c, rel->urilocationlist) + foreach(c, ext->urilocations) { const char *uri_str = (char *) strVal(lfirst(c)); @@ -1376,7 +1421,6 @@ create_externalscan_plan(PlannerInfo *root, Path *best_path, found_candidate = false; found_match = false; - /* * look through our segment database list and try to find a * database that can handle this uri. @@ -1464,195 +1508,198 @@ create_externalscan_plan(PlannerInfo *root, Path *best_path, uri->protocol == URI_GPFDISTS || uri->protocol == URI_CUSTOM)) { - if ((strcmp(on_clause, "MASTER_ONLY") == 0) && (uri->protocol == URI_CUSTOM)) { - const char *uri_str = (char *) strVal(lfirst(list_head(rel->urilocationlist))); - segdb_file_map[0] = pstrdup(uri_str); - ismasteronly = true; - } else { - /* - * Re-write the location list for GPFDIST or GPFDISTS before mapping to segments. - * - * If we happen to be dealing with URI's with the 'gpfdist' (or 'gpfdists') protocol - * we do an extra step here. - * - * (*) We modify the urilocationlist so that every - * primary segdb will get a URI (therefore we duplicate the existing - * URI's until the list is of size = total_primaries). - * Example: 2 URIs, 7 total segdbs. - * Original LocationList: URI1->URI2 - * Modified LocationList: URI1->URI2->URI1->URI2->URI1->URI2->URI1 - * - * (**) We also make sure that we don't allocate more segdbs than - * (# of URIs x gp_external_max_segs). - * Example: 2 URIs, 7 total segdbs, gp_external_max_segs = 3 - * Original LocationList: URI1->URI2 - * Modified LocationList: URI1->URI2->URI1->URI2->URI1->URI2 (6 total). - * - * (***) In that case that we need to allocate only a subset of primary - * segdbs and not all we then also create a random map of segments to skip. - * Using the previous example a we create a map of 7 entries and need to - * randomly select 1 segdb to skip (7 - 6 = 1). so it may look like this: - * [F F T F F F F] - in which case we know to skip the 3rd segment only. - */ - - /* total num of segs that will participate in the external operation */ - num_segs_participating = total_primaries; - - /* max num of segs that are allowed to participate in the operation */ - if ((uri->protocol == URI_GPFDIST) || (uri->protocol == URI_GPFDISTS)) + if ((strcmp(on_clause, "MASTER_ONLY") == 0) && (uri->protocol == URI_CUSTOM)) { - max_participants_allowed = list_length(rel->urilocationlist) * - gp_external_max_segs; + const char *uri_str = strVal(linitial(ext->urilocations)); + segdb_file_map[0] = pstrdup(uri_str); + *ismasteronly = true; } else { /* - * for custom protocol, set max_participants_allowed to - * num_segs_participating so that assignment to segments will use - * all available segments + * Re-write the location list for GPFDIST or GPFDISTS before mapping to segments. + * + * If we happen to be dealing with URI's with the 'gpfdist' (or 'gpfdists') protocol + * we do an extra step here. + * + * (*) We modify the urilocationlist so that every + * primary segdb will get a URI (therefore we duplicate the existing + * URI's until the list is of size = total_primaries). + * Example: 2 URIs, 7 total segdbs. + * Original LocationList: URI1->URI2 + * Modified LocationList: URI1->URI2->URI1->URI2->URI1->URI2->URI1 + * + * (**) We also make sure that we don't allocate more segdbs than + * (# of URIs x gp_external_max_segs). + * Example: 2 URIs, 7 total segdbs, gp_external_max_segs = 3 + * Original LocationList: URI1->URI2 + * Modified LocationList: URI1->URI2->URI1->URI2->URI1->URI2 (6 total). + * + * (***) In that case that we need to allocate only a subset of primary + * segdbs and not all we then also create a random map of segments to skip. + * Using the previous example a we create a map of 7 entries and need to + * randomly select 1 segdb to skip (7 - 6 = 1). so it may look like this: + * [F F T F F F F] - in which case we know to skip the 3rd segment only. */ - max_participants_allowed = num_segs_participating; - } - elog(DEBUG5, - "num_segs_participating = %d. max_participants_allowed = %d. number of URIs = %d", - num_segs_participating, max_participants_allowed, list_length(rel->urilocationlist)); + /* total num of segs that will participate in the external operation */ + num_segs_participating = total_primaries; - /* see (**) above */ - if (num_segs_participating > max_participants_allowed) - { - total_to_skip = num_segs_participating - max_participants_allowed; - num_segs_participating = max_participants_allowed; - should_skip_randomly = true; - - elog(NOTICE, "External scan %s will utilize %d out " - "of %d segment databases", - (uri->protocol == URI_GPFDIST ? "from gpfdist(s) server" : "using custom protocol"), - num_segs_participating, - total_primaries); - } + /* max num of segs that are allowed to participate in the operation */ + if ((uri->protocol == URI_GPFDIST) || (uri->protocol == URI_GPFDISTS)) + { + max_participants_allowed = list_length(ext->urilocations) * + gp_external_max_segs; + } + else + { + /* + * for custom protocol, set max_participants_allowed to + * num_segs_participating so that assignment to segments will use + * all available segments + */ + max_participants_allowed = num_segs_participating; + } - if (list_length(rel->urilocationlist) > num_segs_participating) - ereport(ERROR, - (errcode(ERRCODE_INVALID_TABLE_DEFINITION), - errmsg("There are more external files (URLs) than primary " - "segments that can read them. Found %d URLs and " - "%d primary segments.",list_length(rel->urilocationlist), - num_segs_participating))); + elog(DEBUG5, + "num_segs_participating = %d. max_participants_allowed = %d. number of URIs = %d", + num_segs_participating, max_participants_allowed, list_length(ext->urilocations)); - /* - * restart location list and fill in new list until number of - * locations equals the number of segments participating in this - * action (see (*) above for more details). - */ - while (!done) - { - foreach(c, rel->urilocationlist) + /* see (**) above */ + if (num_segs_participating > max_participants_allowed) { - char *uri_str = (char *) strVal(lfirst(c)); - - /* append to a list of Value nodes, size nelems */ - modifiedloclist = lappend(modifiedloclist, makeString(pstrdup(uri_str))); + total_to_skip = num_segs_participating - max_participants_allowed; + num_segs_participating = max_participants_allowed; + should_skip_randomly = true; + + elog(NOTICE, "External scan %s will utilize %d out " + "of %d segment databases", + (uri->protocol == URI_GPFDIST ? "from gpfdist(s) server" : "using custom protocol"), + num_segs_participating, + total_primaries); + } - if (list_length(modifiedloclist) == num_segs_participating) - { - done = true; - break; - } + if (list_length(ext->urilocations) > num_segs_participating) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("There are more external files (URLs) than primary " + "segments that can read them. Found %d URLs and " + "%d primary segments.", + list_length(ext->urilocations), + num_segs_participating))); - if (list_length(modifiedloclist) > num_segs_participating) + /* + * restart location list and fill in new list until number of + * locations equals the number of segments participating in this + * action (see (*) above for more details). + */ + while (!done) + { + foreach(c, ext->urilocations) { - elog(ERROR, "External scan location list failed building distribution."); - } - } - } + char *uri_str = (char *) strVal(lfirst(c)); - /* See (***) above for details */ - if (should_skip_randomly) - skip_map = makeRandomSegMap(total_primaries, total_to_skip); + /* append to a list of Value nodes, size nelems */ + modifiedloclist = lappend(modifiedloclist, makeString(pstrdup(uri_str))); - /* - * assign each URI from the new location list a primary segdb - */ - foreach(c, modifiedloclist) - { - const char *uri_str = (char *) strVal(lfirst(c)); + if (list_length(modifiedloclist) == num_segs_participating) + { + done = true; + break; + } - uri = ParseExternalTableUri(uri_str); + if (list_length(modifiedloclist) > num_segs_participating) + { + elog(ERROR, "External scan location list failed building distribution."); + } + } + } - found_candidate = false; - found_match = false; + /* See (***) above for details */ + if (should_skip_randomly) + skip_map = makeRandomSegMap(total_primaries, total_to_skip); /* - * look through our segment database list and try to find a - * database that can handle this uri. + * assign each URI from the new location list a primary segdb */ - for (i = 0; i < db_info->total_segment_dbs && !found_match; i++) + foreach(c, modifiedloclist) { - CdbComponentDatabaseInfo *p = &db_info->segment_db_info[i]; - int segind = p->segindex; + const char *uri_str = strVal(lfirst(c)); - /* - * Assign mapping of external file to this segdb only if: - * 1) This segdb is a valid primary. - * 2) An external file wasn't already assigned to it. - */ - if (SEGMENT_IS_ACTIVE_PRIMARY(p)) + uri = ParseExternalTableUri(uri_str); + + found_candidate = false; + found_match = false; + + /* + * look through our segment database list and try to find a + * database that can handle this uri. + */ + for (i = 0; i < db_info->total_segment_dbs && !found_match; i++) { + CdbComponentDatabaseInfo *p = &db_info->segment_db_info[i]; + int segind = p->segindex; + /* - * skip this segdb if skip_map for this seg index tells us - * to skip it (set to 'true'). + * Assign mapping of external file to this segdb only if: + * 1) This segdb is a valid primary. + * 2) An external file wasn't already assigned to it. */ - if (should_skip_randomly) + if (SEGMENT_IS_ACTIVE_PRIMARY(p)) { - Assert(segind < total_primaries); + /* + * skip this segdb if skip_map for this seg index tells us + * to skip it (set to 'true'). + */ + if (should_skip_randomly) + { + Assert(segind < total_primaries); - if (skip_map[segind]) - continue; /* skip it */ - } + if (skip_map[segind]) + continue; /* skip it */ + } - /* a valid primary segdb exist on this host */ - found_candidate = true; + /* a valid primary segdb exist on this host */ + found_candidate = true; - if (segdb_file_map[segind] == NULL) - { - /* segdb not taken yet. assign this URI to this segdb */ - segdb_file_map[segind] = pstrdup(uri_str); - found_match = true; - } + if (segdb_file_map[segind] == NULL) + { + /* segdb not taken yet. assign this URI to this segdb */ + segdb_file_map[segind] = pstrdup(uri_str); + found_match = true; + } - /* - * too bad. this segdb already has an external source - * assigned - */ + /* + * too bad. this segdb already has an external source + * assigned + */ + } } - } - /* We failed to find a segdb for this gpfdist(s) URI */ - if (!found_match) - { - /* should never happen */ - elog(LOG, "external tables gpfdist(s) allocation error. " - "total_primaries: %d, num_segs_participating %d " - "max_participants_allowed %d, total_to_skip %d", - total_primaries, num_segs_participating, - max_participants_allowed, total_to_skip); + /* We failed to find a segdb for this gpfdist(s) URI */ + if (!found_match) + { + /* should never happen */ + elog(LOG, "external tables gpfdist(s) allocation error. " + "total_primaries: %d, num_segs_participating %d " + "max_participants_allowed %d, total_to_skip %d", + total_primaries, num_segs_participating, + max_participants_allowed, total_to_skip); - ereport(ERROR, - (errcode(ERRCODE_GP_INTERNAL_ERROR), - errmsg("Internal error in createplan for external tables" - " when trying to assign segments for gpfdist(s)"))); - } + ereport(ERROR, + (errcode(ERRCODE_GP_INTERNAL_ERROR), + errmsg("Internal error in createplan for external tables" + " when trying to assign segments for gpfdist(s)"))); + } + } } } - } /* (3) */ else if (using_execute) { - const char *command = rel->execcommand; + const char *command = ext->command; const char *prefix = "execute:"; - char *prefixed_command = NULL; - bool match_found = false; + char *prefixed_command; /* build the command string for the executor - 'execute:command' */ StringInfo buf = makeStringInfo(); @@ -1693,7 +1740,7 @@ create_externalscan_plan(PlannerInfo *root, Path *best_path, /* 1 seg per host */ List *visited_hosts = NIL; - ListCell *lc = NULL; + ListCell *lc; for (i = 0; i < db_info->total_segment_dbs; i++) { @@ -1706,7 +1753,7 @@ create_externalscan_plan(PlannerInfo *root, Path *best_path, foreach(lc, visited_hosts) { - const char *hostname = (char *) strVal(lfirst(lc)); + const char *hostname = strVal(lfirst(lc)); if (pg_strcasecmp(hostname, p->hostname) == 0) { @@ -1733,6 +1780,7 @@ create_externalscan_plan(PlannerInfo *root, Path *best_path, { /* all segs on the specified host get copy of the command */ char *hostname = on_clause + strlen("HOST:"); + bool match_found = false; for (i = 0; i < db_info->total_segment_dbs; i++) { @@ -1758,8 +1806,8 @@ create_externalscan_plan(PlannerInfo *root, Path *best_path, else if (strncmp(on_clause, "SEGMENT_ID:", strlen("SEGMENT_ID:")) == 0) { /* 1 seg with specified id gets a copy of the command */ - int target_segid = atoi(on_clause + strlen("SEGMENT_ID:")); + bool match_found = false; for (i = 0; i < db_info->total_segment_dbs; i++) { @@ -1819,7 +1867,7 @@ create_externalscan_plan(PlannerInfo *root, Path *best_path, * meant for the master segment (not seg o). */ segdb_file_map[0] = pstrdup(prefixed_command); - ismasteronly = true; + *ismasteronly = true; } else { @@ -1832,7 +1880,7 @@ create_externalscan_plan(PlannerInfo *root, Path *best_path, /* (4) */ else if (using_location && uri->protocol == URI_GPHDFS) { - const char *uri_str = (char *) strVal(lfirst(list_head(rel->urilocationlist))); + const char *uri_str = strVal(linitial(ext->urilocations)); for (i = 0; i < db_info->total_segment_dbs; i++) { @@ -1850,10 +1898,10 @@ create_externalscan_plan(PlannerInfo *root, Path *best_path, errmsg("Internal error in createplan for external tables"))); } - /* * convert array map to a list so it can be serialized as part of the plan */ + filenames = NIL; for (i = 0; i < total_primaries; i++) { if (segdb_file_map[i] != NULL) @@ -1868,45 +1916,12 @@ create_externalscan_plan(PlannerInfo *root, Path *best_path, } } - /* data format description */ - Assert(rel->fmtopts); - fmtopts = lappend(fmtopts, makeString(pstrdup(rel->fmtopts))); - - /* single row error handling */ - if (rel->rejectlimit != -1) - { - islimitinrows = (rel->rejectlimittype == 'r' ? true : false); - rejectlimit = rel->rejectlimit; - fmtErrTblOid = rel->fmterrtbl; - } - - /* data encoding */ - encoding = rel->ext_encoding; - - scan_plan = make_externalscan(tlist, - scan_clauses, - scan_relid, - filenames, - fmtopts, - rel->fmttype, - ismasteronly, - rejectlimit, - islimitinrows, - fmtErrTblOid, - encoding); - - copy_path_costsize(root, &scan_plan->scan.plan, best_path); - - pfree(segdb_file_map); - - if (skip_map) - pfree(skip_map); - freeCdbComponentDatabases(db_info); - return scan_plan; + return filenames; } + /* * create_indexscan_plan * Returns an indexscan plan for the base relation scanned by 'best_path' diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 841b5ee3a1..f1aa7fa782 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -375,8 +375,6 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, static void get_external_relation_info(Relation relation, RelOptInfo *rel) { - ExtTableEntry *extentry; - /* * Get partitioning key info for distributed relation. */ @@ -385,18 +383,7 @@ get_external_relation_info(Relation relation, RelOptInfo *rel) /* * Get the pg_exttable fields for this table */ - extentry = GetExtTableEntry(RelationGetRelid(relation)); - - rel->urilocationlist = extentry->urilocations; - rel->execlocationlist = extentry->execlocations; - rel->execcommand = extentry->command; - rel->fmttype = extentry->fmtcode; - rel->fmtopts = extentry->fmtopts; - rel->rejectlimit = extentry->rejectlimit; - rel->rejectlimittype = extentry->rejectlimittype; - rel->fmterrtbl = extentry->fmterrtbl; - rel->ext_encoding = extentry->encoding; - rel->writable = extentry->iswritable; + rel->extEntry = GetExtTableEntry(RelationGetRelid(relation)); } /* diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index e670d18552..72c28ab57c 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -92,16 +92,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) rel->tuples = 0; rel->subplan = NULL; rel->subrtable = NIL; - rel->urilocationlist = NIL; - rel->execlocationlist = NIL; - rel->execcommand = NULL; - rel->fmttype = '\0'; - rel->fmtopts = NULL; - rel->rejectlimit = -1; - rel->rejectlimittype = '\0'; - rel->fmterrtbl = InvalidOid; - rel->ext_encoding = -1; - rel->writable = false; + rel->extEntry = NULL; rel->baserestrictinfo = NIL; rel->baserestrictcost.startup = 0; rel->baserestrictcost.per_tuple = 0; diff --git a/src/include/gpopt/gpdbwrappers.h b/src/include/gpopt/gpdbwrappers.h index 524a78bd71..54b48a3c68 100644 --- a/src/include/gpopt/gpdbwrappers.h +++ b/src/include/gpopt/gpdbwrappers.h @@ -535,6 +535,9 @@ namespace gpdb { // get external table entry with given oid ExtTableEntry *Pexttable(Oid relationId); + // get external table entry with given oid + List *PlExternalScanUriList(ExtTableEntry *ext, bool *isMasterOnlyP); + // return the first member of the given targetlist whose expression is // equal to the given expression, or NULL if no such member exists TargetEntry *PteMember(Node *node, List *targetlist); diff --git a/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h b/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h index bc960d3c69..7e686535eb 100644 --- a/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h +++ b/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h @@ -160,88 +160,6 @@ namespace gpdxl // private copy ctor CTranslatorDXLToPlStmt(const CTranslatorDXLToPlStmt&); - // segment mapping for tables with LOCATION http:// or file:// - void MapLocationsFile - ( - OID oidRel, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB - ); - - // segment mapping for tables with LOCATION gpfdist(s):// or custom protocol - void MapLocationsFdist - ( - OID oidRel, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB, - Uri *pUri, - const ULONG ulTotalPrimaries - ); - - // segment mapping for tables with EXECUTE 'cmd' ON. - void MapLocationsExecute - ( - OID oidRel, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB, - const ULONG ulTotalPrimaries - ); - - // segment mapping for tables with EXECUTE 'cmd' on all segments - void MapLocationsExecuteAllSegments - ( - CHAR *szPrefixedCommand, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB - ); - - // segment mapping for tables with EXECUTE 'cmd' per host - void MapLocationsExecutePerHost - ( - CHAR *szPrefixedCommand, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB - ); - - // segment mapping for tables with EXECUTE 'cmd' on a given host - void MapLocationsExecuteOneHost - ( - CHAR *szHostName, - CHAR *szPrefixedCommand, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB - ); - - //segment mapping for tables with EXECUTE 'cmd' on N random segments - void MapLocationsExecuteRandomSegments - ( - ULONG ulSegments, - const ULONG ulTotalPrimaries, - CHAR *szPrefixedCommand, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB - ); - - // segment mapping for tables with EXECUTE 'cmd' on a given segment - void MapLocationsExecuteOneSegment - ( - INT iTargetSegInd, - CHAR *szPrefixedCommand, - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB - ); - - // segment mapping for tables with LOCATION gphdfs:// - void MapLocationsHdfs - ( - char **rgszSegFileMap, - CdbComponentDatabases *pcdbCompDB, - CHAR *szFirstUri - ); - - // list of URIs for external scan - List* PlExternalScanUriList(OID oidRel); - // walker to set index var attno's static BOOL FSetIndexVarAttno(Node *pnode, SContextIndexVarAttno *pctxtidxvarattno); diff --git a/src/include/gpopt/utils/gpdbdefs.h b/src/include/gpopt/utils/gpdbdefs.h index b3a77a65ef..11c421574f 100644 --- a/src/include/gpopt/utils/gpdbdefs.h +++ b/src/include/gpopt/utils/gpdbdefs.h @@ -51,6 +51,7 @@ extern "C" { #include "utils/typcache.h" #include "utils/numeric.h" #include "optimizer/tlist.h" +#include "optimizer/planmain.h" #include "nodes/makefuncs.h" #include "catalog/pg_operator.h" #include "lib/stringinfo.h" diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 36e02d9688..07cd793bc3 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -492,16 +492,7 @@ typedef struct RelOptInfo List *subrtable; /* if subquery */ /* used by external scan */ - List *urilocationlist; - List *execlocationlist; - char *execcommand; - char fmttype; - char *fmtopts; - int32 rejectlimit; - char rejectlimittype; - Oid fmterrtbl; - int32 ext_encoding; - bool writable; /* true for writable, false for readable ext tables*/ + struct ExtTableEntry *extEntry; /* used by various scans and joins: */ List *baserestrictinfo; /* RestrictInfo structures (if base diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index d680c0ac73..546c9b784a 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -19,6 +19,7 @@ #include "nodes/plannodes.h" #include "nodes/relation.h" #include "optimizer/clauses.h" /* AggClauseCounts */ +#include "utils/uri.h" /* GUC parameters */ #define DEFAULT_CURSOR_TUPLE_FRACTION 1.0 /* assume all rows will be fetched */ @@ -209,6 +210,8 @@ extern Plan *add_agg_cost(PlannerInfo *root, Plan *plan, int numAggs, int transSpace); extern Plan *plan_pushdown_tlist(PlannerInfo *root, Plan *plan, List *tlist); /*CDB*/ +extern List *create_external_scan_uri_list(struct ExtTableEntry *extEntry, bool *ismasteronly); + /* * prototypes for plan/initsplan.c */ -- GitLab