From 8df08c84894001d3d3f5d10b3290a1063a453316 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 22 Mar 2011 00:34:31 -0400 Subject: [PATCH] Reimplement planner's handling of MIN/MAX aggregate optimization (again). Instead of playing cute games with pathkeys, just build a direct representation of the intended sub-select, and feed it through query_planner to get a Path for the index access. This is a bit slower than 9.1's previous method, since we'll duplicate most of the overhead of query_planner; but since the whole optimization only applies to rather simple single-table queries, that probably won't be much of a problem in practice. The advantage is that we get to do the right thing when there's a partial index that needs the implicit IS NOT NULL clause to be usable. Also, although this makes planagg.c be a bit more closely tied to the ordering of operations in grouping_planner, we can get rid of some coupling to lower-level parts of the planner. Per complaint from Marti Raudsepp. --- src/backend/nodes/copyfuncs.c | 19 - src/backend/nodes/equalfuncs.c | 14 - src/backend/nodes/outfuncs.c | 5 +- src/backend/optimizer/path/indxpath.c | 14 +- src/backend/optimizer/path/pathkeys.c | 96 +--- src/backend/optimizer/plan/planagg.c | 585 +++++++++-------------- src/backend/optimizer/plan/planmain.c | 9 - src/backend/optimizer/plan/planner.c | 5 +- src/include/nodes/relation.h | 8 +- src/include/optimizer/paths.h | 16 - src/test/regress/expected/aggregates.out | 30 +- src/test/regress/sql/aggregates.sql | 7 +- 12 files changed, 263 insertions(+), 545 deletions(-) diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 6e52d36a17..b903e457a0 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1930,22 +1930,6 @@ _copyPlaceHolderInfo(PlaceHolderInfo *from) return newnode; } -/* - * _copyMinMaxAggInfo - */ -static MinMaxAggInfo * -_copyMinMaxAggInfo(MinMaxAggInfo *from) -{ - MinMaxAggInfo *newnode = makeNode(MinMaxAggInfo); - - COPY_SCALAR_FIELD(aggfnoid); - COPY_SCALAR_FIELD(aggsortop); - COPY_NODE_FIELD(target); - COPY_NODE_FIELD(pathkeys); - - return newnode; -} - /* **************************************************************** * parsenodes.h copy functions * **************************************************************** @@ -4129,9 +4113,6 @@ copyObject(void *from) case T_PlaceHolderInfo: retval = _copyPlaceHolderInfo(from); break; - case T_MinMaxAggInfo: - retval = _copyMinMaxAggInfo(from); - break; /* * VALUE NODES diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 7340aa0525..10b160f537 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -886,17 +886,6 @@ _equalPlaceHolderInfo(PlaceHolderInfo *a, PlaceHolderInfo *b) return true; } -static bool -_equalMinMaxAggInfo(MinMaxAggInfo *a, MinMaxAggInfo *b) -{ - COMPARE_SCALAR_FIELD(aggfnoid); - COMPARE_SCALAR_FIELD(aggsortop); - COMPARE_NODE_FIELD(target); - COMPARE_NODE_FIELD(pathkeys); - - return true; -} - /* * Stuff from parsenodes.h @@ -2690,9 +2679,6 @@ equal(void *a, void *b) case T_PlaceHolderInfo: retval = _equalPlaceHolderInfo(a, b); break; - case T_MinMaxAggInfo: - retval = _equalMinMaxAggInfo(a, b); - break; case T_List: case T_IntList: diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index db4a33c30d..46435ab21c 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1914,7 +1914,10 @@ _outMinMaxAggInfo(StringInfo str, MinMaxAggInfo *node) WRITE_OID_FIELD(aggfnoid); WRITE_OID_FIELD(aggsortop); WRITE_NODE_FIELD(target); - WRITE_NODE_FIELD(pathkeys); + /* We intentionally omit subroot --- too large, not interesting enough */ + WRITE_NODE_FIELD(path); + WRITE_FLOAT_FIELD(pathcost, "%.2f"); + WRITE_NODE_FIELD(param); } static void diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 1ac0ff6ee8..925155c64d 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -41,6 +41,13 @@ #define IsBooleanOpfamily(opfamily) \ ((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID) +/* Whether to use ScalarArrayOpExpr to build index qualifications */ +typedef enum +{ + SAOP_FORBID, /* Do not use ScalarArrayOpExpr */ + SAOP_ALLOW, /* OK to use ScalarArrayOpExpr */ + SAOP_REQUIRE /* Require ScalarArrayOpExpr */ +} SaOpControl; /* Whether we are looking for plain indexscan, bitmap scan, or either */ typedef enum @@ -78,6 +85,11 @@ static PathClauseUsage *classify_index_clause_usage(Path *path, List **clauselist); static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds); static int find_list_position(Node *node, List **nodelist); +static List *group_clauses_by_indexkey(IndexOptInfo *index, + List *clauses, List *outer_clauses, + Relids outer_relids, + SaOpControl saop_control, + bool *found_clause); static bool match_clause_to_indexcol(IndexOptInfo *index, int indexcol, RestrictInfo *rinfo, @@ -1060,7 +1072,7 @@ find_list_position(Node *node, List **nodelist) * from multiple places. Defend against redundant outputs by using * list_append_unique_ptr (pointer equality should be good enough). */ -List * +static List * group_clauses_by_indexkey(IndexOptInfo *index, List *clauses, List *outer_clauses, Relids outer_relids, diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index de3e4ac74e..34c772356b 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -905,39 +905,6 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, return pathkeys; } -/**************************************************************************** - * PATHKEYS AND AGGREGATES - ****************************************************************************/ - -/* - * make_pathkeys_for_aggregate - * Generate a pathkeys list (always a 1-item list) that represents - * the sort order needed by a MIN/MAX aggregate - * - * This is only called before EquivalenceClass merging, so we can assume - * we are not supposed to canonicalize. - */ -List * -make_pathkeys_for_aggregate(PlannerInfo *root, - Expr *aggtarget, - Oid aggsortop) -{ - PathKey *pathkey; - - /* - * We arbitrarily set nulls_first to false. Actually, a MIN/MAX agg can - * use either nulls ordering option, but that is dealt with elsewhere. - */ - pathkey = make_pathkey_from_sortop(root, - aggtarget, - aggsortop, - false, /* nulls_first */ - 0, - true, - false); - return list_make1(pathkey); -} - /**************************************************************************** * PATHKEYS AND MERGECLAUSES ****************************************************************************/ @@ -1407,11 +1374,10 @@ make_inner_pathkeys_for_merge(PlannerInfo *root, * PATHKEY USEFULNESS CHECKS * * We only want to remember as many of the pathkeys of a path as have some - * potential use, which can include subsequent mergejoins, meeting the query's - * requested output ordering, or implementing MIN/MAX aggregates. This - * ensures that add_path() won't consider a path to have a usefully different - * ordering unless it really is useful. These routines check for usefulness - * of given pathkeys. + * potential use, either for subsequent mergejoins or for meeting the query's + * requested output ordering. This ensures that add_path() won't consider + * a path to have a usefully different ordering unless it really is useful. + * These routines check for usefulness of given pathkeys. ****************************************************************************/ /* @@ -1553,50 +1519,6 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) return 0; /* path ordering not useful */ } -/* - * pathkeys_useful_for_minmax - * Count the number of pathkeys that are useful for implementing - * some MIN/MAX aggregate. - * - * Like pathkeys_useful_for_ordering, this is a yes-or-no affair, but - * there could be several MIN/MAX aggregates and we can match to any one. - * - * We can't use pathkeys_contained_in() because we would like to match - * pathkeys regardless of the nulls_first setting. However, we know that - * MIN/MAX aggregates will have at most one item in their pathkeys, so it's - * not too complicated to match by brute force. - */ -static int -pathkeys_useful_for_minmax(PlannerInfo *root, List *pathkeys) -{ - PathKey *pathkey; - ListCell *lc; - - if (pathkeys == NIL) - return 0; /* unordered path */ - pathkey = (PathKey *) linitial(pathkeys); - - foreach(lc, root->minmax_aggs) - { - MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); - PathKey *mmpathkey; - - /* Ignore minmax agg if its pathkey turned out to be redundant */ - if (mminfo->pathkeys == NIL) - continue; - - Assert(list_length(mminfo->pathkeys) == 1); - mmpathkey = (PathKey *) linitial(mminfo->pathkeys); - - if (mmpathkey->pk_eclass == pathkey->pk_eclass && - mmpathkey->pk_opfamily == pathkey->pk_opfamily && - mmpathkey->pk_strategy == pathkey->pk_strategy) - return 1; - } - - return 0; /* path ordering not useful */ -} - /* * truncate_useless_pathkeys * Shorten the given pathkey list to just the useful pathkeys. @@ -1608,15 +1530,11 @@ truncate_useless_pathkeys(PlannerInfo *root, { int nuseful; int nuseful2; - int nuseful3; nuseful = pathkeys_useful_for_merging(root, rel, pathkeys); nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); if (nuseful2 > nuseful) nuseful = nuseful2; - nuseful3 = pathkeys_useful_for_minmax(root, pathkeys); - if (nuseful3 > nuseful) - nuseful = nuseful3; /* * Note: not safe to modify input list destructively, but we can avoid @@ -1642,8 +1560,8 @@ truncate_useless_pathkeys(PlannerInfo *root, * * We could make the test more complex, for example checking to see if any of * the joinclauses are really mergejoinable, but that likely wouldn't win - * often enough to repay the extra cycles. Queries with no join, sort, or - * aggregate at all are reasonably common, so this much work seems worthwhile. + * often enough to repay the extra cycles. Queries with neither a join nor + * a sort are reasonably common, though, so this much work seems worthwhile. */ bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel) @@ -1652,7 +1570,5 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel) return true; /* might be able to use pathkeys for merging */ if (root->query_pathkeys != NIL) return true; /* might be able to use them for ordering */ - if (root->minmax_aggs != NIL) - return true; /* might be able to use them for MIN/MAX */ return false; /* definitely useless */ } diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index f885385296..39203703e6 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -5,7 +5,10 @@ * * This module tries to replace MIN/MAX aggregate functions by subqueries * of the form - * (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1) + * (SELECT col FROM tab + * WHERE col IS NOT NULL AND existing-quals + * ORDER BY col ASC/DESC + * LIMIT 1) * Given a suitable index on tab.col, this can be much faster than the * generic scan-all-the-rows aggregation plan. We can handle multiple * MIN/MAX aggregates by generating multiple subqueries, and their @@ -26,41 +29,25 @@ #include "postgres.h" #include "catalog/pg_aggregate.h" -#include "catalog/pg_am.h" #include "catalog/pg_type.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" -#include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" -#include "optimizer/prep.h" -#include "optimizer/restrictinfo.h" #include "optimizer/subselect.h" #include "parser/parsetree.h" +#include "parser/parse_clause.h" #include "utils/lsyscache.h" #include "utils/syscache.h" -/* Per-aggregate info during optimize_minmax_aggregates() */ -typedef struct -{ - MinMaxAggInfo *mminfo; /* info gathered by preprocessing */ - Path *path; /* access path for ordered scan */ - Cost pathcost; /* estimated cost to fetch first row */ - Param *param; /* param for subplan's output */ -} PrivateMMAggInfo; - static bool find_minmax_aggs_walker(Node *node, List **context); -static PrivateMMAggInfo *find_minmax_path(PlannerInfo *root, RelOptInfo *rel, - MinMaxAggInfo *mminfo); -static bool path_usable_for_agg(Path *path); -static void make_agg_subplan(PlannerInfo *root, RelOptInfo *rel, - PrivateMMAggInfo *info); -static void add_notnull_qual(PlannerInfo *root, RelOptInfo *rel, - PrivateMMAggInfo *info, Path *path); -static Node *replace_aggs_with_params_mutator(Node *node, List **context); +static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, + Oid eqop, Oid sortop, bool nulls_first); +static void make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *mminfo); +static Node *replace_aggs_with_params_mutator(Node *node, PlannerInfo *root); static Oid fetch_agg_sort_op(Oid aggfnoid); @@ -100,7 +87,10 @@ preprocess_minmax_aggregates(PlannerInfo *root, List *tlist) * * We don't handle GROUP BY or windowing, because our current * implementations of grouping require looking at all the rows anyway, and - * so there's not much point in optimizing MIN/MAX. + * so there's not much point in optimizing MIN/MAX. (Note: relaxing + * this would likely require some restructuring in grouping_planner(), + * since it performs assorted processing related to these features between + * calling preprocess_minmax_aggregates and optimize_minmax_aggregates.) */ if (parse->groupClause || parse->hasWindowFuncs) return; @@ -139,24 +129,48 @@ preprocess_minmax_aggregates(PlannerInfo *root, List *tlist) /* * OK, there is at least the possibility of performing the optimization. - * Build pathkeys (and thereby EquivalenceClasses) for each aggregate. - * The existence of the EquivalenceClasses will prompt the path generation - * logic to try to build paths matching the desired sort ordering(s). - * - * Note: the pathkeys are non-canonical at this point. They'll be fixed - * later by canonicalize_all_pathkeys(). + * Build an access path for each aggregate. (We must do this now because + * we need to call query_planner with a pristine copy of the current query + * tree; it'll be too late when optimize_minmax_aggregates gets called.) + * If any of the aggregates prove to be non-indexable, give up; there is + * no point in optimizing just some of them. */ foreach(lc, aggs_list) { MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); + Oid eqop; + bool reverse; + + /* + * We'll need the equality operator that goes with the aggregate's + * ordering operator. + */ + eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse); + if (!OidIsValid(eqop)) /* shouldn't happen */ + elog(ERROR, "could not find equality operator for ordering operator %u", + mminfo->aggsortop); - mminfo->pathkeys = make_pathkeys_for_aggregate(root, - mminfo->target, - mminfo->aggsortop); + /* + * We can use either an ordering that gives NULLS FIRST or one that + * gives NULLS LAST; furthermore there's unlikely to be much + * performance difference between them, so it doesn't seem worth + * costing out both ways if we get a hit on the first one. NULLS + * FIRST is more likely to be available if the operator is a + * reverse-sort operator, so try that first if reverse. + */ + if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse)) + continue; + if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse)) + continue; + + /* No indexable path for this aggregate, so fail */ + return; } /* * We're done until path generation is complete. Save info for later. + * (Setting root->minmax_aggs non-NIL signals we succeeded in making + * index access paths for all the aggregates.) */ root->minmax_aggs = aggs_list; } @@ -164,11 +178,15 @@ preprocess_minmax_aggregates(PlannerInfo *root, List *tlist) /* * optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes * - * Check to see whether all the aggregates are in fact optimizable into - * indexscans. If so, and the result is estimated to be cheaper than the - * generic aggregate method, then generate and return a Plan that does it + * Check to see whether using the aggregate indexscans is cheaper than the + * generic aggregate method. If so, generate and return a Plan that does it * that way. Otherwise, return NULL. * + * Note: it seems likely that the generic method will never be cheaper + * in practice, except maybe for tiny tables where it'd hardly matter. + * Should we skip even trying to build the standard plan, if + * preprocess_minmax_aggregates succeeds? + * * We are passed the preprocessed tlist, as well as the best path devised for * computing the input of a standard Agg node. */ @@ -176,59 +194,34 @@ Plan * optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path) { Query *parse = root->parse; - FromExpr *jtnode; - RangeTblRef *rtr; - RelOptInfo *rel; - List *aggs_list; - ListCell *lc; Cost total_cost; Path agg_p; Plan *plan; Node *hqual; QualCost tlist_cost; + ListCell *lc; /* Nothing to do if preprocess_minmax_aggs rejected the query */ if (root->minmax_aggs == NIL) return NULL; - /* Re-locate the one real table identified by preprocess_minmax_aggs */ - jtnode = parse->jointree; - while (IsA(jtnode, FromExpr)) - { - Assert(list_length(jtnode->fromlist) == 1); - jtnode = linitial(jtnode->fromlist); - } - Assert(IsA(jtnode, RangeTblRef)); - rtr = (RangeTblRef *) jtnode; - rel = find_base_rel(root, rtr->rtindex); - /* - * Examine each agg to see if we can find a suitable ordered path for it. - * Give up if any agg isn't indexable. + * Now we have enough info to compare costs against the generic aggregate + * implementation. + * + * Note that we don't include evaluation cost of the tlist here; this is + * OK since it isn't included in best_path's cost either, and should be + * the same in either case. */ - aggs_list = NIL; total_cost = 0; foreach(lc, root->minmax_aggs) { MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); - PrivateMMAggInfo *info; - info = find_minmax_path(root, rel, mminfo); - if (!info) - return NULL; - aggs_list = lappend(aggs_list, info); - total_cost += info->pathcost; + total_cost += mminfo->pathcost; } - /* - * Now we have enough info to compare costs against the generic aggregate - * implementation. - * - * Note that we don't include evaluation cost of the tlist here; this is - * OK since it isn't included in best_path's cost either, and should be - * the same in either case. - */ - cost_agg(&agg_p, root, AGG_PLAIN, list_length(aggs_list), + cost_agg(&agg_p, root, AGG_PLAIN, list_length(root->minmax_aggs), 0, 0, best_path->startup_cost, best_path->total_cost, best_path->parent->rows); @@ -241,18 +234,18 @@ optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path) * * First, generate a subplan and output Param node for each agg. */ - foreach(lc, aggs_list) + foreach(lc, root->minmax_aggs) { - make_agg_subplan(root, rel, (PrivateMMAggInfo *) lfirst(lc)); + MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); + + make_agg_subplan(root, mminfo); } /* * Modify the targetlist and HAVING qual to reference subquery outputs */ - tlist = (List *) replace_aggs_with_params_mutator((Node *) tlist, - &aggs_list); - hqual = replace_aggs_with_params_mutator(parse->havingQual, - &aggs_list); + tlist = (List *) replace_aggs_with_params_mutator((Node *) tlist, root); + hqual = replace_aggs_with_params_mutator(parse->havingQual, root); /* * We have to replace Aggrefs with Params in equivalence classes too, else @@ -264,7 +257,7 @@ optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path) */ mutate_eclass_expressions(root, replace_aggs_with_params_mutator, - &aggs_list); + (void *) root); /* * Generate the output plan --- basically just a Result @@ -340,7 +333,10 @@ find_minmax_aggs_walker(Node *node, List **context) mminfo->aggfnoid = aggref->aggfnoid; mminfo->aggsortop = aggsortop; mminfo->target = curTarget->expr; - mminfo->pathkeys = NIL; /* don't compute pathkeys yet */ + mminfo->subroot = NULL; /* don't compute path yet */ + mminfo->path = NULL; + mminfo->pathcost = 0; + mminfo->param = NULL; *context = lappend(*context, mminfo); @@ -356,198 +352,162 @@ find_minmax_aggs_walker(Node *node, List **context) } /* - * find_minmax_path - * Given a MIN/MAX aggregate, try to find an ordered Path it can be + * build_minmax_path + * Given a MIN/MAX aggregate, try to build an indexscan Path it can be * optimized with. * - * If successful, build and return a PrivateMMAggInfo struct. Otherwise, - * return NULL. + * If successful, stash the best path in *mminfo and return TRUE. + * Otherwise, return FALSE. */ -static PrivateMMAggInfo * -find_minmax_path(PlannerInfo *root, RelOptInfo *rel, MinMaxAggInfo *mminfo) +static bool +build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, + Oid eqop, Oid sortop, bool nulls_first) { - PrivateMMAggInfo *info; - Path *best_path = NULL; - Cost best_cost = 0; + PlannerInfo *subroot; + Query *parse; + TargetEntry *tle; + NullTest *ntest; + SortGroupClause *sortcl; + Path *cheapest_path; + Path *sorted_path; + double dNumGroups; + Cost path_cost; double path_fraction; - PathKey *mmpathkey; - ListCell *lc; - /* - * Punt if the aggregate's pathkey turned out to be redundant, ie its - * pathkeys list is now empty. This would happen with something like - * "SELECT max(x) ... WHERE x = constant". There's no need to try to - * optimize such a case, because if there is an index that would help, - * it should already have been used with the WHERE clause. + /*---------- + * Generate modified query of the form + * (SELECT col FROM tab + * WHERE col IS NOT NULL AND existing-quals + * ORDER BY col ASC/DESC + * LIMIT 1) + *---------- */ - if (mminfo->pathkeys == NIL) - return NULL; + subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo)); + memcpy(subroot, root, sizeof(PlannerInfo)); + subroot->parse = parse = (Query *) copyObject(root->parse); + /* make sure subroot planning won't change root->init_plans contents */ + subroot->init_plans = list_copy(root->init_plans); + /* There shouldn't be any OJ info to translate, as yet */ + Assert(subroot->join_info_list == NIL); + /* and we haven't created PlaceHolderInfos, either */ + Assert(subroot->placeholder_list == NIL); + + /* single tlist entry that is the aggregate target */ + tle = makeTargetEntry(copyObject(mminfo->target), + (AttrNumber) 1, + pstrdup("agg_target"), + false); + parse->targetList = list_make1(tle); + + /* No HAVING, no DISTINCT, no aggregates anymore */ + parse->havingQual = NULL; + subroot->hasHavingQual = false; + parse->distinctClause = NIL; + parse->hasDistinctOn = false; + parse->hasAggs = false; + + /* Build "target IS NOT NULL" expression */ + ntest = makeNode(NullTest); + ntest->nulltesttype = IS_NOT_NULL; + ntest->arg = copyObject(mminfo->target); + /* we checked it wasn't a rowtype in find_minmax_aggs_walker */ + ntest->argisrow = false; + + /* User might have had that in WHERE already */ + if (!list_member((List *) parse->jointree->quals, ntest)) + parse->jointree->quals = (Node *) + lcons(ntest, (List *) parse->jointree->quals); + + /* Build suitable ORDER BY clause */ + sortcl = makeNode(SortGroupClause); + sortcl->tleSortGroupRef = assignSortGroupRef(tle, parse->targetList); + sortcl->eqop = eqop; + sortcl->sortop = sortop; + sortcl->nulls_first = nulls_first; + sortcl->hashable = false; /* no need to make this accurate */ + parse->sortClause = list_make1(sortcl); + + /* set up expressions for LIMIT 1 */ + parse->limitOffset = NULL; + parse->limitCount = (Node *) makeConst(INT8OID, -1, sizeof(int64), + Int64GetDatum(1), false, + FLOAT8PASSBYVAL); /* - * Search the paths that were generated for the rel to see if there are - * any with the desired ordering. There could be multiple such paths, - * in which case take the cheapest (as measured according to how fast it - * will be to fetch the first row). - * - * We can't use pathkeys_contained_in() to check the ordering, because we - * would like to match pathkeys regardless of the nulls_first setting. - * However, we know that MIN/MAX aggregates will have at most one item in - * their pathkeys, so it's not too complicated to match by brute force. - * - * Note: this test ignores the possible costs associated with skipping - * NULL tuples. We assume that adding the not-null criterion to the - * indexqual doesn't really cost anything. + * Set up requested pathkeys. */ - if (rel->rows > 1.0) - path_fraction = 1.0 / rel->rows; - else - path_fraction = 1.0; + subroot->group_pathkeys = NIL; + subroot->window_pathkeys = NIL; + subroot->distinct_pathkeys = NIL; - Assert(list_length(mminfo->pathkeys) == 1); - mmpathkey = (PathKey *) linitial(mminfo->pathkeys); + subroot->sort_pathkeys = + make_pathkeys_for_sortclauses(subroot, + parse->sortClause, + parse->targetList, + false); - foreach(lc, rel->pathlist) - { - Path *path = (Path *) lfirst(lc); - PathKey *pathkey; - Cost path_cost; + subroot->query_pathkeys = subroot->sort_pathkeys; - if (path->pathkeys == NIL) - continue; /* unordered path */ - pathkey = (PathKey *) linitial(path->pathkeys); + /* + * Generate the best paths for this query, telling query_planner that + * we have LIMIT 1. + */ + query_planner(subroot, parse->targetList, 1.0, 1.0, + &cheapest_path, &sorted_path, &dNumGroups); - if (mmpathkey->pk_eclass == pathkey->pk_eclass && - mmpathkey->pk_opfamily == pathkey->pk_opfamily && - mmpathkey->pk_strategy == pathkey->pk_strategy) - { - /* - * OK, it has the right ordering; is it acceptable otherwise? - * (We test in this order because the pathkey check is cheap.) - */ - if (path_usable_for_agg(path)) - { - /* - * It'll work; but is it the cheapest? - * - * Note: cost calculation here should match - * compare_fractional_path_costs(). - */ - path_cost = path->startup_cost + - path_fraction * (path->total_cost - path->startup_cost); - - if (best_path == NULL || path_cost < best_cost) - { - best_path = path; - best_cost = path_cost; - } - } - } + /* + * Fail if no presorted path. However, if query_planner determines that + * the presorted path is also the cheapest, it will set sorted_path to + * NULL ... don't be fooled. (This is kind of a pain here, but it + * simplifies life for grouping_planner, so leave it be.) + */ + if (!sorted_path) + { + if (cheapest_path && + pathkeys_contained_in(subroot->sort_pathkeys, + cheapest_path->pathkeys)) + sorted_path = cheapest_path; + else + return false; } - /* Fail if no suitable path */ - if (best_path == NULL) - return NULL; - - /* Construct private state for further processing */ - info = (PrivateMMAggInfo *) palloc(sizeof(PrivateMMAggInfo)); - info->mminfo = mminfo; - info->path = best_path; - info->pathcost = best_cost; - info->param = NULL; /* will be set later */ - - return info; -} + /* + * Determine cost to get just the first row of the presorted path. + * + * Note: cost calculation here should match + * compare_fractional_path_costs(). + */ + if (sorted_path->parent->rows > 1.0) + path_fraction = 1.0 / sorted_path->parent->rows; + else + path_fraction = 1.0; -/* - * To be usable, a Path needs to be an IndexPath on a btree index, or be a - * MergeAppendPath of such IndexPaths. This restriction is mainly because - * we need to be sure the index can handle an added NOT NULL constraint at - * minimal additional cost. If you wish to relax it, you'll need to improve - * add_notnull_qual() too. - */ -static bool -path_usable_for_agg(Path *path) -{ - if (IsA(path, IndexPath)) - { - IndexPath *ipath = (IndexPath *) path; + path_cost = sorted_path->startup_cost + + path_fraction * (sorted_path->total_cost - sorted_path->startup_cost); - /* OK if it's a btree index */ - if (ipath->indexinfo->relam == BTREE_AM_OID) - return true; - } - else if (IsA(path, MergeAppendPath)) - { - MergeAppendPath *mpath = (MergeAppendPath *) path; - ListCell *lc; + /* Save state for further processing */ + mminfo->subroot = subroot; + mminfo->path = sorted_path; + mminfo->pathcost = path_cost; - foreach(lc, mpath->subpaths) - { - if (!path_usable_for_agg((Path *) lfirst(lc))) - return false; - } - return true; - } - return false; + return true; } /* * Construct a suitable plan for a converted aggregate query */ static void -make_agg_subplan(PlannerInfo *root, RelOptInfo *rel, PrivateMMAggInfo *info) +make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *mminfo) { - PlannerInfo subroot; - Query *subparse; + PlannerInfo *subroot = mminfo->subroot; + Query *subparse = subroot->parse; Plan *plan; - TargetEntry *tle; - - /* - * Generate a suitably modified query. Much of the work here is probably - * unnecessary in the normal case, but we want to make it look good if - * someone tries to EXPLAIN the result. - */ - memcpy(&subroot, root, sizeof(PlannerInfo)); - subroot.parse = subparse = (Query *) copyObject(root->parse); - subparse->commandType = CMD_SELECT; - subparse->resultRelation = 0; - subparse->returningList = NIL; - subparse->utilityStmt = NULL; - subparse->intoClause = NULL; - subparse->hasAggs = false; - subparse->hasDistinctOn = false; - subparse->groupClause = NIL; - subparse->havingQual = NULL; - subparse->distinctClause = NIL; - subparse->sortClause = NIL; - subroot.hasHavingQual = false; - - /* single tlist entry that is the aggregate target */ - tle = makeTargetEntry(copyObject(info->mminfo->target), - 1, - pstrdup("agg_target"), - false); - subparse->targetList = list_make1(tle); - - /* set up expressions for LIMIT 1 */ - subparse->limitOffset = NULL; - subparse->limitCount = (Node *) makeConst(INT8OID, -1, sizeof(int64), - Int64GetDatum(1), false, - FLOAT8PASSBYVAL); - - /* - * Modify the ordered Path to add an indexed "target IS NOT NULL" - * condition to each scan. We need this to ensure we don't return a NULL, - * which'd be contrary to the standard behavior of MIN/MAX. We insist on - * it being indexed, else the Path might not be as cheap as we thought. - */ - add_notnull_qual(root, rel, info, info->path); /* * Generate the plan for the subquery. We already have a Path, but we have * to convert it to a Plan and attach a LIMIT node above it. */ - plan = create_plan(&subroot, info->path); + plan = create_plan(subroot, mminfo->path); plan->targetlist = subparse->targetList; @@ -559,137 +519,28 @@ make_agg_subplan(PlannerInfo *root, RelOptInfo *rel, PrivateMMAggInfo *info) /* * Convert the plan into an InitPlan, and make a Param for its result. */ - info->param = SS_make_initplan_from_plan(&subroot, plan, - exprType((Node *) tle->expr), - -1, - exprCollation((Node *) tle->expr)); + mminfo->param = + SS_make_initplan_from_plan(subroot, plan, + exprType((Node *) mminfo->target), + -1, + exprCollation((Node *) mminfo->target)); /* - * Put the updated list of InitPlans back into the outer PlannerInfo. + * Make sure the initplan gets into the outer PlannerInfo, along with + * any other initplans generated by the sub-planning run. We had to + * include the outer PlannerInfo's pre-existing initplans into the + * inner one's init_plans list earlier, so make sure we don't put back + * any duplicate entries. */ - root->init_plans = subroot.init_plans; -} - -/* - * Attach a suitable NOT NULL qual to the IndexPath, or each of the member - * IndexPaths. Note we assume we can modify the paths in-place. - */ -static void -add_notnull_qual(PlannerInfo *root, RelOptInfo *rel, PrivateMMAggInfo *info, - Path *path) -{ - if (IsA(path, IndexPath)) - { - IndexPath *ipath = (IndexPath *) path; - Expr *target; - NullTest *ntest; - RestrictInfo *rinfo; - List *newquals; - bool found_clause; - - /* - * If we are looking at a child of the original rel, we have to adjust - * the agg target expression to match the child. - */ - if (ipath->path.parent != rel) - { - AppendRelInfo *appinfo = NULL; - ListCell *lc; - - /* Search for the appropriate AppendRelInfo */ - foreach(lc, root->append_rel_list) - { - appinfo = (AppendRelInfo *) lfirst(lc); - if (appinfo->parent_relid == rel->relid && - appinfo->child_relid == ipath->path.parent->relid) - break; - appinfo = NULL; - } - if (!appinfo) - elog(ERROR, "failed to find AppendRelInfo for child rel"); - target = (Expr *) - adjust_appendrel_attrs((Node *) info->mminfo->target, - appinfo); - } - else - { - /* Otherwise, just make a copy (may not be necessary) */ - target = copyObject(info->mminfo->target); - } - - /* Build "target IS NOT NULL" expression */ - ntest = makeNode(NullTest); - ntest->nulltesttype = IS_NOT_NULL; - ntest->arg = target; - /* we checked it wasn't a rowtype in find_minmax_aggs_walker */ - ntest->argisrow = false; - - /* - * We can skip adding the NOT NULL qual if it duplicates either an - * already-given index condition, or a clause of the index predicate. - */ - if (list_member(get_actual_clauses(ipath->indexquals), ntest) || - list_member(ipath->indexinfo->indpred, ntest)) - return; - - /* Wrap it in a RestrictInfo and prepend to existing indexquals */ - rinfo = make_restrictinfo((Expr *) ntest, - true, - false, - false, - NULL, - NULL); - - newquals = list_concat(list_make1(rinfo), ipath->indexquals); - - /* - * We can't just stick the IS NOT NULL at the front of the list, - * though. It has to go in the right position corresponding to its - * index column, which might not be the first one. Easiest way to fix - * this is to run the quals through group_clauses_by_indexkey again. - */ - newquals = group_clauses_by_indexkey(ipath->indexinfo, - newquals, - NIL, - NULL, - SAOP_FORBID, - &found_clause); - - newquals = flatten_clausegroups_list(newquals); - - /* Trouble if we lost any quals */ - if (list_length(newquals) != list_length(ipath->indexquals) + 1) - elog(ERROR, "add_notnull_qual failed to add NOT NULL qual"); - - /* - * And update the path's indexquals. Note we don't bother adding - * to indexclauses, which is OK since this is like a generated - * index qual. - */ - ipath->indexquals = newquals; - } - else if (IsA(path, MergeAppendPath)) - { - MergeAppendPath *mpath = (MergeAppendPath *) path; - ListCell *lc; - - foreach(lc, mpath->subpaths) - { - add_notnull_qual(root, rel, info, (Path *) lfirst(lc)); - } - } - else - { - /* shouldn't get here, because of path_usable_for_agg checks */ - elog(ERROR, "add_notnull_qual failed"); - } + root->init_plans = list_concat_unique_ptr(root->init_plans, + subroot->init_plans); } /* * Replace original aggregate calls with subplan output Params */ static Node * -replace_aggs_with_params_mutator(Node *node, List **context) +replace_aggs_with_params_mutator(Node *node, PlannerInfo *root) { if (node == NULL) return NULL; @@ -697,21 +548,21 @@ replace_aggs_with_params_mutator(Node *node, List **context) { Aggref *aggref = (Aggref *) node; TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args); - ListCell *l; + ListCell *lc; - foreach(l, *context) + foreach(lc, root->minmax_aggs) { - PrivateMMAggInfo *info = (PrivateMMAggInfo *) lfirst(l); + MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); - if (info->mminfo->aggfnoid == aggref->aggfnoid && - equal(info->mminfo->target, curTarget->expr)) - return (Node *) info->param; + if (mminfo->aggfnoid == aggref->aggfnoid && + equal(mminfo->target, curTarget->expr)) + return (Node *) mminfo->param; } - elog(ERROR, "failed to re-find PrivateMMAggInfo record"); + elog(ERROR, "failed to re-find MinMaxAggInfo record"); } Assert(!IsA(node, SubLink)); return expression_tree_mutator(node, replace_aggs_with_params_mutator, - (void *) context); + (void *) root); } /* diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index d0e872b646..3dc23662e7 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -440,18 +440,9 @@ query_planner(PlannerInfo *root, List *tlist, static void canonicalize_all_pathkeys(PlannerInfo *root) { - ListCell *lc; - root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys); root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys); root->window_pathkeys = canonicalize_pathkeys(root, root->window_pathkeys); root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys); root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys); - - foreach(lc, root->minmax_aggs) - { - MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); - - mminfo->pathkeys = canonicalize_pathkeys(root, mminfo->pathkeys); - } } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 38112f1501..56d25abc6d 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1042,7 +1042,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) count_agg_clauses(parse->havingQual, &agg_counts); /* - * Preprocess MIN/MAX aggregates, if any. + * Preprocess MIN/MAX aggregates, if any. Note: be careful about + * adding logic between here and the optimize_minmax_aggregates + * call. Anything that is needed in MIN/MAX-optimizable cases + * will have to be duplicated in planagg.c. */ preprocess_minmax_aggregates(root, tlist); } diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 7fee3f1464..f340f18b41 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1387,9 +1387,6 @@ typedef struct PlaceHolderInfo /* * For each potentially index-optimizable MIN/MAX aggregate function, * root->minmax_aggs stores a MinMaxAggInfo describing it. - * - * Note: a MIN/MAX agg doesn't really care about the nulls_first property, - * so the pathkey's nulls_first flag should be ignored. */ typedef struct MinMaxAggInfo { @@ -1398,7 +1395,10 @@ typedef struct MinMaxAggInfo Oid aggfnoid; /* pg_proc Oid of the aggregate */ Oid aggsortop; /* Oid of its sort operator */ Expr *target; /* expression we are aggregating on */ - List *pathkeys; /* pathkeys representing needed sort order */ + PlannerInfo *subroot; /* modified "root" for planning the subquery */ + Path *path; /* access path for subquery */ + Cost pathcost; /* estimated cost to fetch first row */ + Param *param; /* param for subplan's output */ } MinMaxAggInfo; /* diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 06aed5f317..5ff951e67e 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -42,14 +42,6 @@ extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel); * indxpath.c * routines to generate index paths */ -typedef enum -{ - /* Whether to use ScalarArrayOpExpr to build index qualifications */ - SAOP_FORBID, /* Do not use ScalarArrayOpExpr */ - SAOP_ALLOW, /* OK to use ScalarArrayOpExpr */ - SAOP_REQUIRE /* Require ScalarArrayOpExpr */ -} SaOpControl; - extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel); extern List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, List *clauses, List *outer_clauses, @@ -59,11 +51,6 @@ extern void best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, Path **cheapest_startup, Path **cheapest_total); extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist); -extern List *group_clauses_by_indexkey(IndexOptInfo *index, - List *clauses, List *outer_clauses, - Relids outer_relids, - SaOpControl saop_control, - bool *found_clause); extern bool eclass_matches_any_index(EquivalenceClass *ec, EquivalenceMember *em, RelOptInfo *rel); @@ -176,9 +163,6 @@ extern List *make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist, bool canonicalize); -extern List *make_pathkeys_for_aggregate(PlannerInfo *root, - Expr *aggtarget, - Oid aggsortop); extern void initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo); extern void update_mergeclause_eclasses(PlannerInfo *root, diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 82407bc9fd..48610066bb 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -690,32 +690,19 @@ select max(unique2), generate_series(1,3) as g from tenk1 order by g desc; 9999 | 1 (3 rows) --- this is an interesting special case as of 9.1 -explain (costs off) - select min(unique2) from tenk1 where unique2 = 42; - QUERY PLAN ------------------------------------------------ - Aggregate - -> Index Scan using tenk1_unique2 on tenk1 - Index Cond: (unique2 = 42) -(3 rows) - -select min(unique2) from tenk1 where unique2 = 42; - min ------ - 42 -(1 row) - -- try it on an inheritance tree create table minmaxtest(f1 int); create table minmaxtest1() inherits (minmaxtest); create table minmaxtest2() inherits (minmaxtest); +create table minmaxtest3() inherits (minmaxtest); create index minmaxtesti on minmaxtest(f1); create index minmaxtest1i on minmaxtest1(f1); create index minmaxtest2i on minmaxtest2(f1 desc); +create index minmaxtest3i on minmaxtest3(f1) where f1 is not null; insert into minmaxtest values(11), (12); insert into minmaxtest1 values(13), (14); insert into minmaxtest2 values(15), (16); +insert into minmaxtest3 values(17), (18); explain (costs off) select min(f1), max(f1) from minmaxtest; QUERY PLAN @@ -731,6 +718,8 @@ explain (costs off) Index Cond: (f1 IS NOT NULL) -> Index Scan Backward using minmaxtest2i on minmaxtest2 minmaxtest Index Cond: (f1 IS NOT NULL) + -> Index Scan using minmaxtest3i on minmaxtest3 minmaxtest + Index Cond: (f1 IS NOT NULL) InitPlan 2 (returns $1) -> Limit -> Merge Append @@ -741,18 +730,21 @@ explain (costs off) Index Cond: (f1 IS NOT NULL) -> Index Scan using minmaxtest2i on minmaxtest2 minmaxtest Index Cond: (f1 IS NOT NULL) -(21 rows) + -> Index Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest + Index Cond: (f1 IS NOT NULL) +(25 rows) select min(f1), max(f1) from minmaxtest; min | max -----+----- - 11 | 16 + 11 | 18 (1 row) drop table minmaxtest cascade; -NOTICE: drop cascades to 2 other objects +NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to table minmaxtest1 drop cascades to table minmaxtest2 +drop cascades to table minmaxtest3 -- -- Test combinations of DISTINCT and/or ORDER BY -- diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 232649937a..04ec67b33a 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -258,22 +258,21 @@ select max(unique2) from tenk1 order by max(unique2)+1; explain (costs off) select max(unique2), generate_series(1,3) as g from tenk1 order by g desc; select max(unique2), generate_series(1,3) as g from tenk1 order by g desc; --- this is an interesting special case as of 9.1 -explain (costs off) - select min(unique2) from tenk1 where unique2 = 42; -select min(unique2) from tenk1 where unique2 = 42; -- try it on an inheritance tree create table minmaxtest(f1 int); create table minmaxtest1() inherits (minmaxtest); create table minmaxtest2() inherits (minmaxtest); +create table minmaxtest3() inherits (minmaxtest); create index minmaxtesti on minmaxtest(f1); create index minmaxtest1i on minmaxtest1(f1); create index minmaxtest2i on minmaxtest2(f1 desc); +create index minmaxtest3i on minmaxtest3(f1) where f1 is not null; insert into minmaxtest values(11), (12); insert into minmaxtest1 values(13), (14); insert into minmaxtest2 values(15), (16); +insert into minmaxtest3 values(17), (18); explain (costs off) select min(f1), max(f1) from minmaxtest; -- GitLab