diff --git a/src/backend/cdb/cdbmutate.c b/src/backend/cdb/cdbmutate.c index fd1e60a00463e6bb6fb672df97e67f12be5a122a..5d715a29130de618356211b00284b49b4a01f9e6 100644 --- a/src/backend/cdb/cdbmutate.c +++ b/src/backend/cdb/cdbmutate.c @@ -66,12 +66,10 @@ static bool replace_shareinput_targetlists_walker(Node *node, PlannerInfo *root, Motion * -make_union_motion(Plan *lefttree, int numsegments) +make_union_motion(Plan *lefttree) { Motion *motion; - Assert(numsegments > 0); - motion = make_motion(NULL, lefttree, 0, NULL, NULL, NULL, NULL /* no ordering */); @@ -85,13 +83,10 @@ make_union_motion(Plan *lefttree, int numsegments) Motion * make_sorted_union_motion(PlannerInfo *root, Plan *lefttree, int numSortCols, AttrNumber *sortColIdx, Oid *sortOperators, - Oid *collations, bool *nullsFirst, - int numsegments) + Oid *collations, bool *nullsFirst) { Motion *motion; - Assert(numsegments > 0); - motion = make_motion(root, lefttree, numSortCols, sortColIdx, sortOperators, collations, nullsFirst); motion->motionType = MOTIONTYPE_GATHER; @@ -105,7 +100,7 @@ Motion * make_hashed_motion(Plan *lefttree, List *hashExprs, List *hashOpfamilies, - int numsegments) + int numHashSegments) { Motion *motion; Oid *hashFuncs; @@ -113,7 +108,7 @@ make_hashed_motion(Plan *lefttree, ListCell *opf_cell; int i; - Assert(numsegments > 0); + Assert(numHashSegments > 0); Assert(list_length(hashExprs) == list_length(hashOpfamilies)); /* Look up the right hash functions for the hash expressions */ @@ -133,17 +128,16 @@ make_hashed_motion(Plan *lefttree, motion->motionType = MOTIONTYPE_HASH; motion->hashExprs = hashExprs; motion->hashFuncs = hashFuncs; + motion->numHashSegments = numHashSegments; return motion; } Motion * -make_broadcast_motion(Plan *lefttree, int numsegments) +make_broadcast_motion(Plan *lefttree) { Motion *motion; - Assert(numsegments > 0); - motion = make_motion(NULL, lefttree, 0, NULL, NULL, NULL, NULL /* no ordering */); motion->motionType = MOTIONTYPE_BROADCAST; @@ -154,14 +148,13 @@ make_broadcast_motion(Plan *lefttree, int numsegments) } Plan * -make_explicit_motion(PlannerInfo *root, Plan *lefttree, AttrNumber segidColIdx, int numsegments) +make_explicit_motion(PlannerInfo *root, Plan *lefttree, AttrNumber segidColIdx) { Motion *motion; plan_tree_base_prefix base; base.node = (Node *) root; - Assert(numsegments > 0); Assert(segidColIdx > 0 && segidColIdx <= list_length(lefttree->targetlist)); motion = make_motion(NULL, lefttree, diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 517f82a16d5ab6fb0f0b609cb283e3ba6fd07eb0..da2dbc7907ab9a79f56e9ca4a36bf0fec4001f85 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1422,7 +1422,7 @@ ExplainNode(PlanState *planstate, List *ancestors, Assert(plan->lefttree); motion_snd = list_length(es->currentSlice->segments); - motion_recv = (parentSlice == NULL ? 1 : list_length(parentSlice->segments)); + motion_recv = parentSlice == NULL ? 1 : list_length(parentSlice->segments); /* scale the number of rows by the number of segments sending data */ scaleFactor = motion_snd; @@ -1450,6 +1450,7 @@ ExplainNode(PlanState *planstate, List *ancestors, break; default: sname = "???"; + motion_recv = -1; break; } @@ -2101,6 +2102,15 @@ ExplainNode(PlanState *planstate, List *ancestors, pMotion->sortColIdx, "Merge Key", ancestors, es); + if (pMotion->motionType == MOTIONTYPE_HASH && + pMotion->numHashSegments != motion_recv) + { + Assert(pMotion->numHashSegments < motion_recv); + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfo(es->str, + "Hash Module: %d\n", + pMotion->numHashSegments); + } } break; case T_AssertOp: diff --git a/src/backend/executor/nodeMotion.c b/src/backend/executor/nodeMotion.c index d7dfad96f7b5cc368d781a4dec292c50980c4d35..49ae4e5f5c7bc48f61ba34d4562b96d4e0a70bc8 100644 --- a/src/backend/executor/nodeMotion.c +++ b/src/backend/executor/nodeMotion.c @@ -242,7 +242,7 @@ execMotionSender(MotionState *node) done = true; } else if (motion->motionType == MOTIONTYPE_GATHER_SINGLE && - GpIdentity.segindex != (gp_session_id % node->numHashSegments)) + GpIdentity.segindex != (gp_session_id % node->numInputSegs)) { /* * For explicit gather motion, receiver gets data from one @@ -621,6 +621,7 @@ ExecInitMotion(Motion *node, EState *estate, int eflags) Assert(node->motionID > 0); Assert(node->motionID < sliceTable->numSlices); + AssertImply(node->motionType == MOTIONTYPE_HASH, node->numHashSegments > 0); parentIndex = estate->currentSliceId; estate->currentSliceId = node->motionID; @@ -759,13 +760,15 @@ ExecInitMotion(Motion *node, EState *estate, int eflags) } motionstate->ps.ps_ProjInfo = NULL; + motionstate->numHashSegments = node->numHashSegments; /* Set up motion send data structures */ - motionstate->numHashSegments = recvSlice->planNumSegments; if (motionstate->mstype == MOTIONSTATE_SEND && node->motionType == MOTIONTYPE_HASH) { int nkeys; + Assert(node->numHashSegments > 0); + Assert(node->numHashSegments <= recvSlice->planNumSegments); nkeys = list_length(node->hashExprs); if (nkeys > 0) diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp index 96f8c9546523f6d37850576b0dfcabdaae331ccf..6ee6643371e528ecad7511115ff0713e88e3526c 100644 --- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp +++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp @@ -2000,6 +2000,8 @@ CTranslatorDXLToPlStmt::TranslateDXLMotion case EdxlopPhysicalMotionRandom: { motion->motionType = MOTIONTYPE_HASH; + motion->numHashSegments = (int)motion_dxlop->GetOutputSegIdsArray()->Size(); + GPOS_ASSERT(motion->numHashSegments > 0); break; } case EdxlopPhysicalMotionBroadcast: diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index d76e4b966e9ff2576855e94a58d7a907212fa59a..3ded79aafccd63fa00e6a851862c1300234e6e2a 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1428,6 +1428,7 @@ _copyMotion(const Motion *from) COPY_POINTER_FIELD(nullsFirst, from->numSortCols * sizeof(bool)); COPY_SCALAR_FIELD(segidColIdx); + COPY_SCALAR_FIELD(numHashSegments); if (from->senderSliceInfo) { diff --git a/src/backend/nodes/outfast.c b/src/backend/nodes/outfast.c index fcb6e75f192fd2e83f7542581c4258d82ad517a1..62d66ef17f21bcde33c8b1bedffec747d6714feb 100644 --- a/src/backend/nodes/outfast.c +++ b/src/backend/nodes/outfast.c @@ -537,6 +537,7 @@ _outMotion(StringInfo str, Motion *node) WRITE_BOOL_ARRAY(nullsFirst, node->numSortCols); WRITE_INT_FIELD(segidColIdx); + WRITE_INT_FIELD(numHashSegments); _outPlanInfo(str, (Plan *) node); } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index ef550fec848128bb1d5cd419d2a7b772a0d0be95..7ba7dbc628aa2ac6ea21d23b4e21d85cfa12a8c4 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1317,6 +1317,8 @@ _outMotion(StringInfo str, const Motion *node) WRITE_INT_FIELD(segidColIdx); + WRITE_INT_FIELD(numHashSegments); + /* senderSliceInfo is intentionally omitted. It's only used during planning */ _outPlanInfo(str, (Plan *) node); diff --git a/src/backend/nodes/readfast.c b/src/backend/nodes/readfast.c index f64e81396341a4fc31604a920534a6b4aa05909c..0dd470fbd602393b57d94d5403b8425f88804c04 100644 --- a/src/backend/nodes/readfast.c +++ b/src/backend/nodes/readfast.c @@ -1570,6 +1570,7 @@ _readMotion(void) READ_BOOL_ARRAY(nullsFirst, local_node->numSortCols); READ_INT_FIELD(segidColIdx); + READ_INT_FIELD(numHashSegments); ReadCommonPlan(&local_node->plan); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index c6fe4f12189eab17b96e6efe6bd715373dada3ed..cd62fd21b9a527571912faaf90c0cbf80e163795 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -7369,13 +7369,14 @@ cdbpathtoplan_create_motion_plan(PlannerInfo *root, { Motion *motion = NULL; Path *subpath = path->subpath; - int numsegments; - - if (CdbPathLocus_IsOuterQuery(path->path.locus) || - CdbPathLocus_IsEntry(path->path.locus)) - numsegments = 1; /* dummy numsegments */ - else - numsegments = CdbPathLocus_NumSegments(path->path.locus); + /* + * numHashSegments is only used for hash motion. + * It's the module number of the cdb hash, its value + * is not necessarily the same as the number of segments + * in the parent slice. + */ + int numHashSegments; + numHashSegments = CdbPathLocus_NumSegments(path->path.locus); if (path->is_explicit_motion) { @@ -7398,8 +7399,7 @@ cdbpathtoplan_create_motion_plan(PlannerInfo *root, segmentid_tle = find_junk_tle(subplan->targetlist, "gp_segment_id"); if (!segmentid_tle) elog(ERROR, "could not find gp_segment_id in subplan's targetlist"); - motion = (Motion *) make_explicit_motion(root, subplan, segmentid_tle->resno, - numsegments); + motion = (Motion *) make_explicit_motion(root, subplan, segmentid_tle->resno); } else if (path->policy) { @@ -7421,11 +7421,11 @@ cdbpathtoplan_create_motion_plan(PlannerInfo *root, motion = make_hashed_motion(subplan, hashExprs, hashOpfamilies, - numsegments); + numHashSegments); } else if (CdbPathLocus_IsOuterQuery(path->path.locus)) { - motion = make_union_motion(subplan, numsegments); + motion = make_union_motion(subplan); motion->motionType = MOTIONTYPE_OUTER_QUERY; } /* Send all tuples to a single process? */ @@ -7470,25 +7470,25 @@ cdbpathtoplan_create_motion_plan(PlannerInfo *root, */ subplan = prep; motion = make_sorted_union_motion(root, subplan, numSortCols, sortColIdx, sortOperators, collations, - nullsFirst, numsegments); + nullsFirst); } else { /* Degenerate ordering... build unordered Union Receive */ - motion = make_union_motion(subplan, numsegments); + motion = make_union_motion(subplan); } } /* Unordered Union Receive */ else { - motion = make_union_motion(subplan, numsegments); + motion = make_union_motion(subplan); } } /* Send all of the tuples to all of the QEs in gang above... */ else if (CdbPathLocus_IsReplicated(path->path.locus)) - motion = make_broadcast_motion(subplan, numsegments); + motion = make_broadcast_motion(subplan); /* Hashed redistribution to all QEs in gang above... */ else if (CdbPathLocus_IsHashed(path->path.locus) || @@ -7507,7 +7507,7 @@ cdbpathtoplan_create_motion_plan(PlannerInfo *root, motion = make_hashed_motion(subplan, hashExprs, hashOpfamilies, - numsegments); + numHashSegments); } /* Hashed redistribution to all QEs in gang above... */ else if (CdbPathLocus_IsStrewn(path->path.locus)) @@ -7515,7 +7515,7 @@ cdbpathtoplan_create_motion_plan(PlannerInfo *root, motion = make_hashed_motion(subplan, NIL, NIL, - numsegments); + numHashSegments); } else elog(ERROR, "unexpected target locus type %d for Motion node", path->path.locus.locustype); diff --git a/src/include/cdb/cdbmutate.h b/src/include/cdb/cdbmutate.h index c47f41d2e720ebf73cb9e0d45bf3d60c696a5e3f..cd280edd551733d0418cc59f38024a3bb26444ec 100644 --- a/src/include/cdb/cdbmutate.h +++ b/src/include/cdb/cdbmutate.h @@ -20,21 +20,19 @@ #include "nodes/relation.h" #include "optimizer/walkers.h" -extern Motion *make_union_motion(Plan *lefttree, int numsegments); +extern Motion *make_union_motion(Plan *lefttree); extern Motion *make_sorted_union_motion(PlannerInfo *root, Plan *lefttree, int numSortCols, AttrNumber *sortColIdx, Oid *sortOperators, - Oid *collations, bool *nullsFirst, int numsegments); + Oid *collations, bool *nullsFirst); extern Motion *make_hashed_motion(Plan *lefttree, List *hashExpr, List *hashOpfamilies, - int numsegments); + int numHashSegments); -extern Motion *make_broadcast_motion(Plan *lefttree, - int numsegments); +extern Motion *make_broadcast_motion(Plan *lefttree); extern Plan *make_explicit_motion(PlannerInfo *root, Plan *lefttree, - AttrNumber segidColIdx, - int numsegments); + AttrNumber segidColIdx); void cdbmutate_warn_ctid_without_segid(struct PlannerInfo *root, struct RelOptInfo *rel); diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 7bee12236c0a2574f1a198fee4fa3db9a4398e4d..59cd91103b2080124b82e77a93e2c18c775e76cd 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -1321,6 +1321,7 @@ typedef struct Motion /* For Hash */ List *hashExprs; /* list of hash expressions */ Oid *hashFuncs; /* corresponding hash functions */ + int numHashSegments; /* the module number of the hash function */ /* For Explicit */ AttrNumber segidColIdx; /* index of the segid column in the target list */ diff --git a/src/test/regress/expected/union_gp.out b/src/test/regress/expected/union_gp.out index e850311ceda95bd05b6cc4583d901c501a1bb692..4ec411f4a8a9b5bcaaa192962fc875f06a68378f 100644 --- a/src/test/regress/expected/union_gp.out +++ b/src/test/regress/expected/union_gp.out @@ -2026,6 +2026,195 @@ select from t2_ncols union select * from t2_ncols; ERROR: each UNION query must have the same number of columns LINE 1: select from t2_ncols union select * from t2_ncols; ^ +-- Test the result of union of 2 tables distributed on different number of segments +-- start_ignore +drop schema if exists union_schema CASCADE; +NOTICE: schema "union_schema" does not exist, skipping +-- end_ignore +create schema union_schema; +create table union_schema.t1(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table union_schema.t2(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table union_schema.t3(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +set allow_system_table_mods = on; +update gp_distribution_policy set numsegments = 1 + where localoid = 'union_schema.t1'::regclass::oid; +update gp_distribution_policy set numsegments = 2 + where localoid = 'union_schema.t2'::regclass::oid; +select relname, policytype, numsegments, distkey + from pg_class, gp_distribution_policy, pg_namespace ns + where pg_class.oid = localoid and relnamespace = ns.oid + and nspname = 'union_schema' + and relname in ('t1', 't2', 't3') + order by relname; + relname | policytype | numsegments | distkey +---------+------------+-------------+--------- + t1 | p | 1 | 1 + t2 | p | 2 | 1 + t3 | p | 3 | 1 +(3 rows) + +insert into union_schema.t1 select i, i from generate_series(1,10)i; +insert into union_schema.t2 select i, i from generate_series(1,20)i; +analyze union_schema.t1; +analyze union_schema.t2; +explain + select * from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=1.23..4.20 rows=10 width=16) + -> Hash Join (cost=1.23..4.00 rows=10 width=16) + Hash Cond: (t2.b = t1.a) + -> Redistribute Motion 2:1 (slice2; segments: 2) (cost=0.00..2.60 rows=10 width=8) + Hash Key: t2.b + -> Seq Scan on t2 (cost=0.00..2.20 rows=10 width=8) + -> Hash (cost=1.10..1.10 rows=10 width=8) + -> Seq Scan on t1 (cost=0.00..1.10 rows=10 width=8) + Optimizer: Postgres query optimizer +(9 rows) + +explain + select union_schema.t1.a, union_schema.t2.b + from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b + union all + select * from union_schema.t3; + QUERY PLAN +----------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=1.23..2687.30 rows=86110 width=8) + -> Append (cost=1.23..965.10 rows=28704 width=8) + -> Hash Join (cost=1.23..4.00 rows=4 width=8) + Hash Cond: (t2.b = t1.a) + -> Redistribute Motion 2:3 (slice2; segments: 2) (cost=0.00..2.60 rows=10 width=4) + Hash Key: t2.b + Hash Module: 1 + -> Seq Scan on t2 (cost=0.00..2.20 rows=10 width=4) + -> Hash (cost=1.10..1.10 rows=4 width=4) + -> Seq Scan on t1 (cost=0.00..1.10 rows=4 width=4) + -> Seq Scan on t3 (cost=0.00..961.00 rows=28700 width=8) + Optimizer: Postgres query optimizer +(12 rows) + +select * from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b; + a | b | a | b +----+----+----+---- + 2 | 2 | 2 | 2 + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 + 6 | 6 | 6 | 6 + 7 | 7 | 7 | 7 + 8 | 8 | 8 | 8 + 9 | 9 | 9 | 9 + 10 | 10 | 10 | 10 + 1 | 1 | 1 | 1 + 5 | 5 | 5 | 5 +(10 rows) + +select union_schema.t1.a, union_schema.t2.b + from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b +union all +select * from union_schema.t3; + a | b +----+---- + 2 | 2 + 3 | 3 + 4 | 4 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 + 1 | 1 + 5 | 5 +(10 rows) + +truncate union_schema.t1, union_schema.t2; +insert into union_schema.t1 select i, i from generate_series(1,20)i; +insert into union_schema.t2 select i, i from generate_series(1,10)i; +analyze union_schema.t1; +analyze union_schema.t2; +explain + select * from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=2.43..4.00 rows=10 width=16) + -> Hash Join (cost=2.43..3.80 rows=10 width=16) + Hash Cond: (t1.a = t2.b) + -> Seq Scan on t1 (cost=0.00..1.20 rows=20 width=8) + -> Hash (cost=2.30..2.30 rows=10 width=8) + -> Redistribute Motion 2:1 (slice2; segments: 2) (cost=0.00..2.30 rows=5 width=8) + Hash Key: t2.b + -> Seq Scan on t2 (cost=0.00..2.10 rows=5 width=8) + Optimizer: Postgres query optimizer +(9 rows) + +explain + select union_schema.t1.a, union_schema.t2.b + from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b + union all + select * from union_schema.t3; + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=2.43..2687.10 rows=86110 width=8) + -> Append (cost=2.43..964.90 rows=28704 width=8) + -> Hash Join (cost=2.43..3.80 rows=4 width=8) + Hash Cond: (t1.a = t2.b) + -> Seq Scan on t1 (cost=0.00..1.20 rows=7 width=4) + -> Hash (cost=2.30..2.30 rows=4 width=4) + -> Redistribute Motion 2:3 (slice2; segments: 2) (cost=0.00..2.30 rows=5 width=4) + Hash Key: t2.b + Hash Module: 1 + -> Seq Scan on t2 (cost=0.00..2.10 rows=5 width=4) + -> Seq Scan on t3 (cost=0.00..961.00 rows=28700 width=8) + Optimizer: Postgres query optimizer +(12 rows) + +select * from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b; + a | b | a | b +----+----+----+---- + 1 | 1 | 1 | 1 + 2 | 2 | 2 | 2 + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 + 5 | 5 | 5 | 5 + 6 | 6 | 6 | 6 + 7 | 7 | 7 | 7 + 8 | 8 | 8 | 8 + 9 | 9 | 9 | 9 + 10 | 10 | 10 | 10 +(10 rows) + +select union_schema.t1.a, union_schema.t2.b + from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b +union all +select * from union_schema.t3; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +reset allow_system_table_mods; -- -- Clean up -- @@ -2035,3 +2224,8 @@ DROP TABLE IF EXISTS T_random CASCADE; DROP VIEW IF EXISTS v1_ncols CASCADE; DROP TABLE IF EXISTS t1_ncols CASCADE; DROP TABLE IF EXISTS t2_ncols CASCADE; +DROP SCHEMA IF EXISTS union_schema CASCADE; +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table union_schema.t1 +drop cascades to table union_schema.t2 +drop cascades to table union_schema.t3 diff --git a/src/test/regress/sql/union_gp.sql b/src/test/regress/sql/union_gp.sql index a592f739ed08f904dc60f977ad022c8d2594293c..4b2c4061efbdbe35813c2bf8824bb0c044c70d12 100644 --- a/src/test/regress/sql/union_gp.sql +++ b/src/test/regress/sql/union_gp.sql @@ -594,6 +594,75 @@ select x.aa/100 aaa, x.c, y.c from cte1 x join cte1 y on x.aa=y.aa; select from t2_ncols union select * from t2_ncols; +-- Test the result of union of 2 tables distributed on different number of segments +-- start_ignore +drop schema if exists union_schema CASCADE; +-- end_ignore +create schema union_schema; +create table union_schema.t1(a int, b int); +create table union_schema.t2(a int, b int); +create table union_schema.t3(a int, b int); + +set allow_system_table_mods = on; +update gp_distribution_policy set numsegments = 1 + where localoid = 'union_schema.t1'::regclass::oid; +update gp_distribution_policy set numsegments = 2 + where localoid = 'union_schema.t2'::regclass::oid; +select relname, policytype, numsegments, distkey + from pg_class, gp_distribution_policy, pg_namespace ns + where pg_class.oid = localoid and relnamespace = ns.oid + and nspname = 'union_schema' + and relname in ('t1', 't2', 't3') + order by relname; + +insert into union_schema.t1 select i, i from generate_series(1,10)i; +insert into union_schema.t2 select i, i from generate_series(1,20)i; +analyze union_schema.t1; +analyze union_schema.t2; + +explain + select * from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b; +explain + select union_schema.t1.a, union_schema.t2.b + from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b + union all + select * from union_schema.t3; + +select * from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b; +select union_schema.t1.a, union_schema.t2.b + from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b +union all +select * from union_schema.t3; + +truncate union_schema.t1, union_schema.t2; +insert into union_schema.t1 select i, i from generate_series(1,20)i; +insert into union_schema.t2 select i, i from generate_series(1,10)i; +analyze union_schema.t1; +analyze union_schema.t2; + +explain + select * from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b; +explain + select union_schema.t1.a, union_schema.t2.b + from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b + union all + select * from union_schema.t3; + +select * from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b; +select union_schema.t1.a, union_schema.t2.b + from union_schema.t1 join union_schema.t2 + on union_schema.t1.a = union_schema.t2.b +union all +select * from union_schema.t3; + +reset allow_system_table_mods; -- -- Clean up -- @@ -604,3 +673,4 @@ DROP TABLE IF EXISTS T_random CASCADE; DROP VIEW IF EXISTS v1_ncols CASCADE; DROP TABLE IF EXISTS t1_ncols CASCADE; DROP TABLE IF EXISTS t2_ncols CASCADE; +DROP SCHEMA IF EXISTS union_schema CASCADE;