From 10052f48d9bdf3122d3cedc901ddcd51fafcbcbf Mon Sep 17 00:00:00 2001 From: Zhenghua Lyu Date: Sun, 30 Jun 2019 10:54:58 +0800 Subject: [PATCH] Improve select statement with locking clause With GDD enabled, and under some simple cases (refer the commit 6ebce73 and the function checkCanOptSelectLockingClause for details), we might also do some optimizations for the select statement with locking clause and limit clause. Greenplum generates two-stage-merge sort or limit plan to implement sort or limit and we can only lock tuples on segments. We prefer locking more tuples on segments rather than locking the whole table. Without the whole table lock, performance for OLTP should be improved. Also, after lockrows data cannot be assumed in order, but we do merge gather after lockrows. It is reasonable because even for postgres: `select * from t order by c for update` cannot guarantee result's order. --- src/backend/executor/execAmi.c | 1 + src/backend/optimizer/plan/planner.c | 102 ++++++++++----------- src/backend/parser/analyze.c | 7 -- src/test/isolation2/expected/lockmodes.out | 35 +++---- 4 files changed, 66 insertions(+), 79 deletions(-) diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 129daac0a1..28409eed19 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -647,6 +647,7 @@ ExecSquelchNode(PlanState *node) case T_BitmapOrState: case T_DynamicBitmapHeapScanState: case T_LimitState: + case T_LockRowsState: case T_NestLoopState: case T_MergeJoinState: case T_RepeatState: diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 8a17e8205e..0c328e3233 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3064,28 +3064,38 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) } /* - * Greenplum specific behavior: - * In Postgres, the process order is sort -> rowmarks -> limit, - * the reason I think is that we should only lock those tuples after - * limit. Based on the one tuple one time model, the following plan - * limit - * -> lockrows - * -> sort - * -> scan - * will only lock one tuple even the sort node sort all tuples. - * - * But for Greenplum, we might not emit lockrows plannode due to many - * limitations in MPP architecture. Greenplum can only behave like - * postgres in some simple cases (refer the function - * `parser/analyze.c:checkCanOptSelectLockingClause` for details). - * Select-statement with limit clause and locking clause is not a - * simple case, because we can only know which tuples will be output - * on QD, but lock rows have to work on QEs. So for select-statement - * with limit clause and locking clause will not emit lockrows plannode, - * that is, in such case, it will ignore the rowmarks. - * - * So Greenplum changes the process order as: rowmarks(maybe) -> sort -> limit. + * If ORDER BY was given and we were not able to make the plan come out in + * the right order, add an explicit sort step. */ + if (parse->sortClause) + { + if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) + { + result_plan = (Plan *) make_sort_from_pathkeys(root, + result_plan, + root->sort_pathkeys, + limit_tuples, + true); + mark_sort_locus(result_plan); + current_pathkeys = root->sort_pathkeys; + result_plan->flow = pull_up_Flow(result_plan, result_plan->lefttree); + } + + if (must_gather && + result_plan->flow->flotype != FLOW_SINGLETON && + !parse->canOptSelectLockingClause) /* We can only lock rows on segments */ + { + /* + * current_pathkeys might contain unneeded columns that have been + * eliminated from the final target list, and we cannot maintain + * such an order in the Motion anymore. So use root->sortpathkeys + * rather than current_pathkeys here. (See similar case in LIMIT + * handling below. + */ + current_pathkeys = root->sort_pathkeys; + result_plan = (Plan *) make_motion_gather(root, result_plan, current_pathkeys); + } + } /* * Greenplum specific behavior: @@ -3127,6 +3137,22 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) SS_assign_special_param(root)); result_plan->flow = pull_up_Flow(result_plan, result_plan->lefttree); + + if (must_gather && result_plan->flow->flotype != FLOW_SINGLETON) + { + /* + * Greeplum specific behavior: + * Add the postponed gather motion of sorting here. + *`The SQL can reach here has the pattern: select xxx from t order by yyy for update (see + * above must_gather's assignment for details). For read committed isolation level, + * The statement `select order by for update` cannot guarantee order (see discussion + * https://www.postgresql.org/message-id/CAO0i4_QCf8LUCO9xDgDpJ0zdsyM7q83APtqHamdsswQ6NVT3ZQ%40mail.gmail.com + * for details) even in postgres. So it is OK to generate two-stage merge sort plan here. And + * document the limitations in Greenplum. + */ + result_plan = (Plan *) make_motion_gather(root, result_plan, current_pathkeys); + } + /* * The result can no longer be assumed sorted, since locking might * cause the sort key columns to be replaced with new values. @@ -3135,38 +3161,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) } } - /* - * If ORDER BY was given and we were not able to make the plan come out in - * the right order, add an explicit sort step. - */ - if (parse->sortClause) - { - if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) - { - result_plan = (Plan *) make_sort_from_pathkeys(root, - result_plan, - root->sort_pathkeys, - limit_tuples, - true); - mark_sort_locus(result_plan); - current_pathkeys = root->sort_pathkeys; - result_plan->flow = pull_up_Flow(result_plan, result_plan->lefttree); - } - - if (must_gather && result_plan->flow->flotype != FLOW_SINGLETON) - { - /* - * current_pathkeys might contain unneeded columns that have been - * eliminated from the final target list, and we cannot maintain - * such an order in the Motion anymore. So use root->sortpathkeys - * rather than current_pathkeys here. (See similar case in LIMIT - * handling below. - */ - current_pathkeys = root->sort_pathkeys; - result_plan = (Plan *) make_motion_gather(root, result_plan, current_pathkeys); - } - } - /* * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node. */ @@ -3190,8 +3184,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) */ if (parse->sortClause) { - if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) - elog(ERROR, "invalid result order generated for ORDER BY + LIMIT"); current_pathkeys = root->sort_pathkeys; result_plan = (Plan *) make_motion_gather_to_QE(root, result_plan, current_pathkeys); diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index d584d5e8f8..658a7beb34 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -3815,13 +3815,6 @@ checkCanOptSelectLockingClause(SelectStmt *stmt) if (!IsA(linitial(stmt->fromClause), RangeVar)) return false; - /* - * In Greenplum, the results of limit can only be known on QD, - * so we cannot lock correct tuples on QEs. - */ - if (stmt->limitCount != NULL || stmt->limitOffset != NULL) - return false; - if (!stmt->lockingClause) return false; diff --git a/src/test/isolation2/expected/lockmodes.out b/src/test/isolation2/expected/lockmodes.out index 9ee9081538..33491072de 100644 --- a/src/test/isolation2/expected/lockmodes.out +++ b/src/test/isolation2/expected/lockmodes.out @@ -1784,17 +1784,18 @@ ABORT 1: begin; BEGIN 1: explain select * from t_lockmods order by c limit 1 for update; - QUERY PLAN ------------------------------------------------------------------------------------ - Limit (cost=2.07..2.10 rows=1 width=10) - -> Gather Motion 3:1 (slice1; segments: 3) (cost=2.07..2.10 rows=1 width=10) - Merge Key: c - -> Limit (cost=2.07..2.08 rows=1 width=10) - -> Sort (cost=2.07..2.09 rows=2 width=10) - Sort Key: c - -> Seq Scan on t_lockmods (cost=0.00..2.05 rows=2 width=10) - Optimizer: Postgres query optimizer -(8 rows) + QUERY PLAN +----------------------------------------------------------------------------------------- + Limit (cost=3.07..3.11 rows=1 width=10) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=3.07..3.11 rows=1 width=10) + Merge Key: c + -> Limit (cost=3.07..3.09 rows=1 width=10) + -> LockRows (cost=3.07..3.14 rows=2 width=10) + -> Sort (cost=3.07..3.09 rows=2 width=10) + Sort Key: c + -> Seq Scan on t_lockmods (cost=0.00..3.05 rows=2 width=10) + Optimizer: Postgres query optimizer +(9 rows) 1: select * from t_lockmods order by c limit 1 for update; c --- @@ -1804,7 +1805,7 @@ BEGIN locktype | mode | granted | relation ----------+-----------------+---------+------------ relation | AccessShareLock | t | t_lockmods - relation | ExclusiveLock | t | t_lockmods + relation | RowShareLock | t | t_lockmods (2 rows) 1: abort; ABORT @@ -1838,12 +1839,12 @@ BEGIN 1: explain select * from t_lockmods order by c for update; QUERY PLAN ----------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=2.16..2.17 rows=5 width=10) + Gather Motion 3:1 (slice1; segments: 3) (cost=3.11..3.17 rows=5 width=10) Merge Key: c - -> Sort (cost=2.16..2.17 rows=2 width=10) - Sort Key: c - -> LockRows (cost=0.00..2.10 rows=2 width=10) - -> Seq Scan on t_lockmods (cost=0.00..2.05 rows=2 width=10) + -> LockRows (cost=3.11..3.17 rows=2 width=10) + -> Sort (cost=3.11..3.12 rows=2 width=10) + Sort Key: c + -> Seq Scan on t_lockmods (cost=0.00..3.05 rows=2 width=10) Optimizer: Postgres query optimizer (7 rows) 1: select * from t_lockmods order by c for update; -- GitLab