提交 addc42c3 编写于 作者: T Tom Lane

Create the planner mechanism for optimizing simple MIN and MAX queries

into indexscans on matching indexes.  For the moment, it only handles
int4 and text datatypes; next step is to add a column to pg_aggregate
so that all MIN/MAX aggregates can be handled.  Per my recent proposal.
上级 c3294f1c
......@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.172 2005/03/28 00:58:22 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.173 2005/04/11 23:06:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -54,7 +54,6 @@
((opclass) == BOOL_BTREE_OPS_OID || (opclass) == BOOL_HASH_OPS_OID)
static List *group_clauses_by_indexkey(IndexOptInfo *index);
static List *group_clauses_by_indexkey_for_join(Query *root,
IndexOptInfo *index,
Relids outer_relids,
......@@ -72,8 +71,6 @@ static bool pred_test_simple_clause(Expr *predicate, Node *clause);
static Relids indexable_outerrelids(IndexOptInfo *index);
static Path *make_innerjoin_index_path(Query *root, IndexOptInfo *index,
List *clausegroups);
static bool match_index_to_operand(Node *operand, int indexcol,
IndexOptInfo *index);
static bool match_boolean_index_clause(Node *clause, int indexcol,
IndexOptInfo *index);
static bool match_special_index_operator(Expr *clause, Oid opclass,
......@@ -234,7 +231,7 @@ create_index_paths(Query *root, RelOptInfo *rel)
* clauses matching column C, because the executor couldn't use them anyway.
* Therefore, there are no empty sublists in the result.
*/
static List *
List *
group_clauses_by_indexkey(IndexOptInfo *index)
{
List *clausegroup_list = NIL;
......@@ -1774,7 +1771,7 @@ make_expr_from_indexclauses(List *indexclauses)
* indexcol: the column number of the index (counting from 0)
* index: the index of interest
*/
static bool
bool
match_index_to_operand(Node *operand,
int indexcol,
IndexOptInfo *index)
......
......@@ -4,7 +4,7 @@
# Makefile for optimizer/plan
#
# IDENTIFICATION
# $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.12 2003/11/29 19:51:50 pgsql Exp $
# $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.13 2005/04/11 23:06:55 tgl Exp $
#
#-------------------------------------------------------------------------
......@@ -12,7 +12,8 @@ subdir = src/backend/optimizer/plan
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
OBJS = createplan.o initsplan.o planmain.o planner.o setrefs.o subselect.o
OBJS = createplan.o initsplan.o planagg.o planmain.o planner.o \
setrefs.o subselect.o
all: SUBSYS.o
......
/*-------------------------------------------------------------------------
*
* planagg.c
* Special planning for aggregate queries.
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.1 2005/04/11 23:06:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/skey.h"
#include "catalog/pg_aggregate.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
#include "optimizer/subselect.h"
#include "parser/parsetree.h"
#include "parser/parse_clause.h"
#include "parser/parse_expr.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
typedef struct
{
Oid aggfnoid; /* pg_proc Oid of the aggregate */
Oid aggsortop; /* Oid of its sort operator */
Expr *target; /* expression we are aggregating on */
IndexPath *path; /* access path for index scan */
Cost pathcost; /* estimated cost to fetch first row */
Param *param; /* param for subplan's output */
} MinMaxAggInfo;
static bool find_minmax_aggs_walker(Node *node, List **context);
static bool build_minmax_path(Query *root, RelOptInfo *rel,
MinMaxAggInfo *info);
static ScanDirection match_agg_to_index_col(MinMaxAggInfo *info,
IndexOptInfo *index, int indexcol);
static void make_agg_subplan(Query *root, MinMaxAggInfo *info,
List *constant_quals);
static Node *replace_aggs_with_params_mutator(Node *node, List **context);
static Oid fetch_agg_sort_op(Oid aggfnoid);
/*
* optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
*
* This checks to see if we can replace MIN/MAX aggregate functions by
* subqueries of the form
* (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1)
* Given a suitable index on tab.col, this can be much faster than the
* generic scan-all-the-rows plan.
*
* We are passed the Query, the preprocessed tlist, and the best path
* devised for computing the input of a standard Agg node. If we are able
* to optimize all the aggregates, and the result is estimated to be cheaper
* than the generic aggregate method, then generate and return a Plan that
* does it that way. Otherwise, return NULL.
*/
Plan *
optimize_minmax_aggregates(Query *root, List *tlist, Path *best_path)
{
RangeTblRef *rtr;
RangeTblEntry *rte;
RelOptInfo *rel;
List *aggs_list;
ListCell *l;
Cost total_cost;
Path agg_p;
Plan *plan;
Node *hqual;
QualCost tlist_cost;
List *constant_quals;
/* Nothing to do if query has no aggregates */
if (!root->hasAggs)
return NULL;
Assert(!root->setOperations); /* shouldn't get here if a setop */
Assert(root->rowMarks == NIL); /* nor if FOR UPDATE */
/*
* Reject unoptimizable cases.
*
* We don't handle GROUP BY, because our current implementations of
* grouping require looking at all the rows anyway, and so there's not
* much point in optimizing MIN/MAX.
*/
if (root->groupClause)
return NULL;
/*
* We also restrict the query to reference exactly one table, since
* join conditions can't be handled reasonably. (We could perhaps
* handle a query containing cartesian-product joins, but it hardly
* seems worth the trouble.)
*/
Assert(root->jointree != NULL && IsA(root->jointree, FromExpr));
if (list_length(root->jointree->fromlist) != 1)
return NULL;
rtr = (RangeTblRef *) linitial(root->jointree->fromlist);
if (!IsA(rtr, RangeTblRef))
return NULL;
rte = rt_fetch(rtr->rtindex, root->rtable);
if (rte->rtekind != RTE_RELATION)
return NULL;
rel = find_base_rel(root, rtr->rtindex);
/*
* Also reject cases with subplans or volatile functions in WHERE.
* This may be overly paranoid, but it's not entirely clear if the
* transformation is safe then.
*/
if (contain_subplans(root->jointree->quals) ||
contain_volatile_functions(root->jointree->quals))
return NULL;
/*
* Since this optimization is not applicable all that often, we want
* to fall out before doing very much work if possible. Therefore
* we do the work in several passes. The first pass scans the tlist
* and HAVING qual to find all the aggregates and verify that
* each of them is a MIN/MAX aggregate. If that succeeds, the second
* pass looks at each aggregate to see if it is optimizable; if so
* we make an IndexPath describing how we would scan it. (We do not
* try to optimize if only some aggs are optimizable, since that means
* we'll have to scan all the rows anyway.) If that succeeds, we have
* enough info to compare costs against the generic implementation.
* Only if that test passes do we build a Plan.
*/
/* Pass 1: find all the aggregates */
aggs_list = NIL;
if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
return NULL;
if (find_minmax_aggs_walker(root->havingQual, &aggs_list))
return NULL;
/* Pass 2: see if each one is optimizable */
total_cost = 0;
foreach(l, aggs_list)
{
MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
if (!build_minmax_path(root, rel, info))
return NULL;
total_cost += info->pathcost;
}
/*
* Make the cost comparison.
*
* Note that we don't include evaluation cost of the tlist here;
* this is OK since it isn't included in best_path's cost either,
* and should be the same in either case.
*/
cost_agg(&agg_p, root, AGG_PLAIN, list_length(aggs_list),
0, 0,
best_path->startup_cost, best_path->total_cost,
best_path->parent->rows);
if (total_cost > agg_p.total_cost)
return NULL; /* too expensive */
/*
* OK, we are going to generate an optimized plan. The first thing we
* need to do is look for any non-variable WHERE clauses that query_planner
* might have removed from the basic plan. (Normal WHERE clauses will
* be properly incorporated into the sub-plans by create_plan.) If there
* are any, they will be in a gating Result node atop the best_path.
* They have to be incorporated into a gating Result in each sub-plan
* in order to produce the semantically correct result.
*/
if (IsA(best_path, ResultPath))
{
Assert(((ResultPath *) best_path)->subpath != NULL);
constant_quals = ((ResultPath *) best_path)->constantqual;
}
else
constant_quals = NIL;
/* Pass 3: generate subplans and output Param nodes */
foreach(l, aggs_list)
{
make_agg_subplan(root, (MinMaxAggInfo *) lfirst(l), constant_quals);
}
/*
* Modify the targetlist and HAVING qual to reference subquery outputs
*/
tlist = (List *) replace_aggs_with_params_mutator((Node *) tlist,
&aggs_list);
hqual = replace_aggs_with_params_mutator(root->havingQual,
&aggs_list);
/*
* Generate the output plan --- basically just a Result
*/
plan = (Plan *) make_result(tlist, hqual, NULL);
/* Account for evaluation cost of the tlist (make_result did the rest) */
cost_qual_eval(&tlist_cost, tlist);
plan->startup_cost += tlist_cost.startup;
plan->total_cost += tlist_cost.startup + tlist_cost.per_tuple;
return plan;
}
/*
* find_minmax_aggs_walker
* Recursively scan the Aggref nodes in an expression tree, and check
* that each one is a MIN/MAX aggregate. If so, build a list of the
* distinct aggregate calls in the tree.
*
* Returns TRUE if a non-MIN/MAX aggregate is found, FALSE otherwise.
* (This seemingly-backward definition is used because expression_tree_walker
* aborts the scan on TRUE return, which is what we want.)
*
* Found aggregates are added to the list at *context; it's up to the caller
* to initialize the list to NIL.
*
* This does not descend into subqueries, and so should be used only after
* reduction of sublinks to subplans. There mustn't be outer-aggregate
* references either.
*/
static bool
find_minmax_aggs_walker(Node *node, List **context)
{
if (node == NULL)
return false;
if (IsA(node, Aggref))
{
Aggref *aggref = (Aggref *) node;
Oid aggsortop;
MinMaxAggInfo *info;
ListCell *l;
Assert(aggref->agglevelsup == 0);
if (aggref->aggstar)
return true; /* foo(*) is surely not optimizable */
/* note: we do not care if DISTINCT is mentioned ... */
aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
if (!OidIsValid(aggsortop))
return true; /* not a MIN/MAX aggregate */
/*
* Check whether it's already in the list, and add it if not.
*/
foreach(l, *context)
{
info = (MinMaxAggInfo *) lfirst(l);
if (info->aggfnoid == aggref->aggfnoid &&
equal(info->target, aggref->target))
return false;
}
info = (MinMaxAggInfo *) palloc0(sizeof(MinMaxAggInfo));
info->aggfnoid = aggref->aggfnoid;
info->aggsortop = aggsortop;
info->target = aggref->target;
*context = lappend(*context, info);
/*
* We need not recurse into the argument, since it can't contain
* any aggregates.
*/
return false;
}
Assert(!IsA(node, SubLink));
return expression_tree_walker(node, find_minmax_aggs_walker,
(void *) context);
}
/*
* build_minmax_path
* Given a MIN/MAX aggregate, try to find an index it can be optimized
* with. Build a Path describing the best such index path.
*
* Returns TRUE if successful, FALSE if not. In the TRUE case, info->path
* is filled in.
*
* XXX look at sharing more code with indxpath.c.
*
* Note: check_partial_indexes() must have been run previously.
*/
static bool
build_minmax_path(Query *root, RelOptInfo *rel, MinMaxAggInfo *info)
{
IndexPath *best_path = NULL;
Cost best_cost = 0;
ListCell *l;
foreach(l, rel->indexlist)
{
IndexOptInfo *index = (IndexOptInfo *) lfirst(l);
ScanDirection indexscandir = NoMovementScanDirection;
int indexcol;
int prevcol;
List *restrictclauses;
IndexPath *new_path;
Cost new_cost;
/* Ignore non-btree indexes */
if (index->relam != BTREE_AM_OID)
continue;
/* Ignore partial indexes that do not match the query */
if (index->indpred != NIL && !index->predOK)
continue;
/*
* Look for a match to one of the index columns. (In a stupidly
* designed index, there could be multiple matches, but we only
* care about the first one.)
*/
for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
{
indexscandir = match_agg_to_index_col(info, index, indexcol);
if (!ScanDirectionIsNoMovement(indexscandir))
break;
}
if (ScanDirectionIsNoMovement(indexscandir))
continue;
/*
* If the match is not at the first index column, we have to verify
* that there are "x = something" restrictions on all the earlier
* index columns. Since we'll need the restrictclauses list anyway
* to build the path, it's convenient to extract that first and then
* look through it for the equality restrictions.
*/
restrictclauses = group_clauses_by_indexkey(index);
if (list_length(restrictclauses) < indexcol)
continue; /* definitely haven't got enough */
for (prevcol = 0; prevcol < indexcol; prevcol++)
{
List *rinfos = (List *) list_nth(restrictclauses, prevcol);
ListCell *ll;
foreach(ll, rinfos)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(ll);
int strategy;
Assert(is_opclause(rinfo->clause));
strategy =
get_op_opclass_strategy(((OpExpr *) rinfo->clause)->opno,
index->classlist[prevcol]);
if (strategy == BTEqualStrategyNumber)
break;
}
if (ll == NULL)
break; /* none are Equal for this index col */
}
if (prevcol < indexcol)
continue; /* didn't find all Equal clauses */
/*
* Build the access path. We don't bother marking it with pathkeys.
*/
new_path = create_index_path(root, index,
restrictclauses,
NIL,
indexscandir);
/*
* Estimate actual cost of fetching just one row.
*/
if (new_path->rows > 1.0)
new_cost = new_path->path.startup_cost +
(new_path->path.total_cost - new_path->path.startup_cost)
* 1.0 / new_path->rows;
else
new_cost = new_path->path.total_cost;
/*
* Keep if first or if cheaper than previous best.
*/
if (best_path == NULL || new_cost < best_cost)
{
best_path = new_path;
best_cost = new_cost;
}
}
info->path = best_path;
info->pathcost = best_cost;
return (best_path != NULL);
}
/*
* match_agg_to_index_col
* Does an aggregate match an index column?
*
* It matches if its argument is equal to the index column's data and its
* sortop is either the LessThan or GreaterThan member of the column's opclass.
*
* We return ForwardScanDirection if match the LessThan member,
* BackwardScanDirection if match the GreaterThan member,
* and NoMovementScanDirection if there's no match.
*/
static ScanDirection
match_agg_to_index_col(MinMaxAggInfo *info, IndexOptInfo *index, int indexcol)
{
int strategy;
/* Check for data match */
if (!match_index_to_operand((Node *) info->target, indexcol, index))
return NoMovementScanDirection;
/* Look up the operator in the opclass */
strategy = get_op_opclass_strategy(info->aggsortop,
index->classlist[indexcol]);
if (strategy == BTLessStrategyNumber)
return ForwardScanDirection;
if (strategy == BTGreaterStrategyNumber)
return BackwardScanDirection;
return NoMovementScanDirection;
}
/*
* Construct a suitable plan for a converted aggregate query
*/
static void
make_agg_subplan(Query *root, MinMaxAggInfo *info, List *constant_quals)
{
Query *subquery;
Path *path;
Plan *plan;
TargetEntry *tle;
SortClause *sortcl;
/*
* Generate a suitably modified Query node. Much of the work here is
* probably unnecessary in the normal case, but we want to make it look
* good if someone tries to EXPLAIN the result.
*/
subquery = (Query *) copyObject(root);
subquery->commandType = CMD_SELECT;
subquery->resultRelation = 0;
subquery->resultRelations = NIL;
subquery->into = NULL;
subquery->hasAggs = false;
subquery->groupClause = NIL;
subquery->havingQual = NULL;
subquery->hasHavingQual = false;
subquery->distinctClause = NIL;
/* single tlist entry that is the aggregate target */
tle = makeTargetEntry(copyObject(info->target),
1,
pstrdup("agg_target"),
false);
subquery->targetList = list_make1(tle);
/* set up the appropriate ORDER BY entry */
sortcl = makeNode(SortClause);
sortcl->tleSortGroupRef = assignSortGroupRef(tle, subquery->targetList);
sortcl->sortop = info->aggsortop;
subquery->sortClause = list_make1(sortcl);
/* set up LIMIT 1 */
subquery->limitOffset = NULL;
subquery->limitCount = (Node *) makeConst(INT4OID, sizeof(int4),
Int32GetDatum(1),
false, true);
/*
* Generate the plan for the subquery. We already have a Path for
* the basic indexscan, but we have to convert it to a Plan and
* attach a LIMIT node above it. We might need a gating Result, too,
* which is most easily added at the Path stage.
*/
path = (Path *) info->path;
if (constant_quals)
path = (Path *) create_result_path(NULL,
path,
copyObject(constant_quals));
plan = create_plan(subquery, path);
plan->targetlist = copyObject(subquery->targetList);
plan = (Plan *) make_limit(plan,
subquery->limitOffset,
subquery->limitCount);
/*
* Convert the plan into an InitPlan, and make a Param for its result.
*/
info->param = SS_make_initplan_from_plan(subquery, plan,
exprType((Node *) tle->expr),
-1);
}
/*
* Replace original aggregate calls with subplan output Params
*/
static Node *
replace_aggs_with_params_mutator(Node *node, List **context)
{
if (node == NULL)
return NULL;
if (IsA(node, Aggref))
{
Aggref *aggref = (Aggref *) node;
ListCell *l;
foreach(l, *context)
{
MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
if (info->aggfnoid == aggref->aggfnoid &&
equal(info->target, aggref->target))
return (Node *) info->param;
}
elog(ERROR, "failed to re-find aggregate info record");
}
Assert(!IsA(node, SubLink));
return expression_tree_mutator(node, replace_aggs_with_params_mutator,
(void *) context);
}
/*
* Get the OID of the sort operator, if any, associated with an aggregate.
* Returns InvalidOid if there is no such operator.
*/
static Oid
fetch_agg_sort_op(Oid aggfnoid)
{
#ifdef NOT_YET
HeapTuple aggTuple;
Form_pg_aggregate aggform;
Oid aggsortop;
/* fetch aggregate entry from pg_aggregate */
aggTuple = SearchSysCache(AGGFNOID,
ObjectIdGetDatum(aggfnoid),
0, 0, 0);
if (!HeapTupleIsValid(aggTuple))
return InvalidOid;
aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
aggsortop = aggform->aggsortop;
ReleaseSysCache(aggTuple);
return aggsortop;
#else
/*
* XXX stub implementation for testing: hardwire a few cases.
*/
if (aggfnoid == 2132) /* min(int4) -> int4lt */
return 97;
if (aggfnoid == 2116) /* max(int4) -> int4gt */
return 521;
if (aggfnoid == 2145) /* min(text) -> text_lt */
return 664;
if (aggfnoid == 2129) /* max(text) -> text_gt */
return 666;
return InvalidOid;
#endif
}
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.183 2005/04/10 19:50:08 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.184 2005/04/11 23:06:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -362,43 +362,12 @@ subquery_planner(Query *parse, double tuple_fraction)
/*
* If any subplans were generated, or if we're inside a subplan, build
* initPlan list and extParam/allParam sets for plan nodes.
* initPlan list and extParam/allParam sets for plan nodes, and attach
* the initPlans to the top plan node.
*/
if (PlannerPlanId != saved_planid || PlannerQueryLevel > 1)
{
Cost initplan_cost = 0;
/* Prepare extParam/allParam sets for all nodes in tree */
SS_finalize_plan(plan, parse->rtable);
/*
* SS_finalize_plan doesn't handle initPlans, so we have to
* manually attach them to the topmost plan node, and add their
* extParams to the topmost node's, too.
*
* We also add the total_cost of each initPlan to the startup cost of
* the top node. This is a conservative overestimate, since in
* fact each initPlan might be executed later than plan startup,
* or even not at all.
*/
plan->initPlan = PlannerInitPlan;
foreach(l, plan->initPlan)
{
SubPlan *initplan = (SubPlan *) lfirst(l);
plan->extParam = bms_add_members(plan->extParam,
initplan->plan->extParam);
/* allParam must include all members of extParam */
plan->allParam = bms_add_members(plan->allParam,
plan->extParam);
initplan_cost += initplan->plan->total_cost;
}
plan->startup_cost += initplan_cost;
plan->total_cost += initplan_cost;
}
/* Return to outer subquery context */
PlannerQueryLevel--;
PlannerInitPlan = saved_initplan;
......@@ -692,6 +661,7 @@ grouping_planner(Query *parse, double tuple_fraction)
double sub_tuple_fraction;
Path *cheapest_path;
Path *sorted_path;
Path *best_path;
double dNumGroups = 0;
long numGroups = 0;
AggClauseCounts agg_counts;
......@@ -959,114 +929,175 @@ grouping_planner(Query *parse, double tuple_fraction)
}
/*
* Select the best path and create a plan to execute it.
*
* If we are doing hashed grouping, we will always read all the input
* tuples, so use the cheapest-total path. Otherwise, trust
* query_planner's decision about which to use.
* Select the best path. If we are doing hashed grouping, we will
* always read all the input tuples, so use the cheapest-total
* path. Otherwise, trust query_planner's decision about which to use.
*/
if (sorted_path && !use_hashed_grouping)
{
result_plan = create_plan(parse, sorted_path);
current_pathkeys = sorted_path->pathkeys;
}
if (use_hashed_grouping || !sorted_path)
best_path = cheapest_path;
else
{
result_plan = create_plan(parse, cheapest_path);
current_pathkeys = cheapest_path->pathkeys;
}
best_path = sorted_path;
/*
* create_plan() returns a plan with just a "flat" tlist of
* required Vars. Usually we need to insert the sub_tlist as the
* tlist of the top plan node. However, we can skip that if we
* determined that whatever query_planner chose to return will be
* good enough.
* Check to see if it's possible to optimize MIN/MAX aggregates.
* If so, we will forget all the work we did so far to choose a
* "regular" path ... but we had to do it anyway to be able to
* tell which way is cheaper.
*/
if (need_tlist_eval)
result_plan = optimize_minmax_aggregates(parse,
tlist,
best_path);
if (result_plan != NULL)
{
/*
* optimize_minmax_aggregates generated the full plan, with
* the right tlist, and it has no sort order.
*/
current_pathkeys = NIL;
}
else
{
/*
* If the top-level plan node is one that cannot do expression
* evaluation, we must insert a Result node to project the
* desired tlist.
* Normal case --- create a plan according to query_planner's
* results.
*/
if (!is_projection_capable_plan(result_plan))
result_plan = create_plan(parse, best_path);
current_pathkeys = best_path->pathkeys;
/*
* create_plan() returns a plan with just a "flat" tlist of
* required Vars. Usually we need to insert the sub_tlist as the
* tlist of the top plan node. However, we can skip that if we
* determined that whatever query_planner chose to return will be
* good enough.
*/
if (need_tlist_eval)
{
result_plan = (Plan *) make_result(sub_tlist, NULL,
result_plan);
/*
* If the top-level plan node is one that cannot do expression
* evaluation, we must insert a Result node to project the
* desired tlist.
*/
if (!is_projection_capable_plan(result_plan))
{
result_plan = (Plan *) make_result(sub_tlist, NULL,
result_plan);
}
else
{
/*
* Otherwise, just replace the subplan's flat tlist with
* the desired tlist.
*/
result_plan->targetlist = sub_tlist;
}
/*
* Also, account for the cost of evaluation of the sub_tlist.
*
* Up to now, we have only been dealing with "flat" tlists,
* containing just Vars. So their evaluation cost is zero
* according to the model used by cost_qual_eval() (or if you
* prefer, the cost is factored into cpu_tuple_cost). Thus we
* can avoid accounting for tlist cost throughout
* query_planner() and subroutines. But now we've inserted a
* tlist that might contain actual operators, sub-selects, etc
* --- so we'd better account for its cost.
*
* Below this point, any tlist eval cost for added-on nodes
* should be accounted for as we create those nodes.
* Presently, of the node types we can add on, only Agg and
* Group project new tlists (the rest just copy their input
* tuples) --- so make_agg() and make_group() are responsible
* for computing the added cost.
*/
cost_qual_eval(&tlist_cost, sub_tlist);
result_plan->startup_cost += tlist_cost.startup;
result_plan->total_cost += tlist_cost.startup +
tlist_cost.per_tuple * result_plan->plan_rows;
}
else
{
/*
* Otherwise, just replace the subplan's flat tlist with
* the desired tlist.
* Since we're using query_planner's tlist and not the one
* make_subplanTargetList calculated, we have to refigure any
* grouping-column indexes make_subplanTargetList computed.
*/
result_plan->targetlist = sub_tlist;
locate_grouping_columns(parse, tlist, result_plan->targetlist,
groupColIdx);
}
/*
* Also, account for the cost of evaluation of the sub_tlist.
*
* Up to now, we have only been dealing with "flat" tlists,
* containing just Vars. So their evaluation cost is zero
* according to the model used by cost_qual_eval() (or if you
* prefer, the cost is factored into cpu_tuple_cost). Thus we
* can avoid accounting for tlist cost throughout
* query_planner() and subroutines. But now we've inserted a
* tlist that might contain actual operators, sub-selects, etc
* --- so we'd better account for its cost.
* Insert AGG or GROUP node if needed, plus an explicit sort step
* if necessary.
*
* Below this point, any tlist eval cost for added-on nodes
* should be accounted for as we create those nodes.
* Presently, of the node types we can add on, only Agg and
* Group project new tlists (the rest just copy their input
* tuples) --- so make_agg() and make_group() are responsible
* for computing the added cost.
*/
cost_qual_eval(&tlist_cost, sub_tlist);
result_plan->startup_cost += tlist_cost.startup;
result_plan->total_cost += tlist_cost.startup +
tlist_cost.per_tuple * result_plan->plan_rows;
}
else
{
/*
* Since we're using query_planner's tlist and not the one
* make_subplanTargetList calculated, we have to refigure any
* grouping-column indexes make_subplanTargetList computed.
* HAVING clause, if any, becomes qual of the Agg or Group node.
*/
locate_grouping_columns(parse, tlist, result_plan->targetlist,
groupColIdx);
}
if (use_hashed_grouping)
{
/* Hashed aggregate plan --- no sort needed */
result_plan = (Plan *) make_agg(parse,
tlist,
(List *) parse->havingQual,
AGG_HASHED,
numGroupCols,
groupColIdx,
numGroups,
agg_counts.numAggs,
result_plan);
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
}
else if (parse->hasAggs)
{
/* Plain aggregate plan --- sort if needed */
AggStrategy aggstrategy;
/*
* Insert AGG or GROUP node if needed, plus an explicit sort step
* if necessary.
*
* HAVING clause, if any, becomes qual of the Agg or Group node.
*/
if (use_hashed_grouping)
{
/* Hashed aggregate plan --- no sort needed */
result_plan = (Plan *) make_agg(parse,
tlist,
(List *) parse->havingQual,
AGG_HASHED,
numGroupCols,
groupColIdx,
numGroups,
agg_counts.numAggs,
result_plan);
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
}
else if (parse->hasAggs)
{
/* Plain aggregate plan --- sort if needed */
AggStrategy aggstrategy;
if (parse->groupClause)
{
if (!pathkeys_contained_in(group_pathkeys,
current_pathkeys))
{
result_plan = (Plan *)
make_sort_from_groupcols(parse,
parse->groupClause,
groupColIdx,
result_plan);
current_pathkeys = group_pathkeys;
}
aggstrategy = AGG_SORTED;
if (parse->groupClause)
/*
* The AGG node will not change the sort ordering of its
* groups, so current_pathkeys describes the result too.
*/
}
else
{
aggstrategy = AGG_PLAIN;
/* Result will be only one row anyway; no sort order */
current_pathkeys = NIL;
}
result_plan = (Plan *) make_agg(parse,
tlist,
(List *) parse->havingQual,
aggstrategy,
numGroupCols,
groupColIdx,
numGroups,
agg_counts.numAggs,
result_plan);
}
else if (parse->groupClause)
{
/*
* GROUP BY without aggregation, so insert a group node (plus
* the appropriate sort node, if necessary).
*
* Add an explicit sort if we couldn't make the path come
* out the way the GROUP node needs it.
*/
if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
{
result_plan = (Plan *)
......@@ -1076,75 +1107,34 @@ grouping_planner(Query *parse, double tuple_fraction)
result_plan);
current_pathkeys = group_pathkeys;
}
aggstrategy = AGG_SORTED;
/*
* The AGG node will not change the sort ordering of its
* groups, so current_pathkeys describes the result too.
*/
result_plan = (Plan *) make_group(parse,
tlist,
(List *) parse->havingQual,
numGroupCols,
groupColIdx,
dNumGroups,
result_plan);
/* The Group node won't change sort ordering */
}
else
else if (parse->hasHavingQual)
{
aggstrategy = AGG_PLAIN;
/* Result will be only one row anyway; no sort order */
current_pathkeys = NIL;
}
result_plan = (Plan *) make_agg(parse,
tlist,
(List *) parse->havingQual,
aggstrategy,
numGroupCols,
groupColIdx,
numGroups,
agg_counts.numAggs,
result_plan);
}
else if (parse->groupClause)
{
/*
* GROUP BY without aggregation, so insert a group node (plus the
* appropriate sort node, if necessary).
*
* Add an explicit sort if we couldn't make the path come
* out the way the GROUP node needs it.
*/
if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
{
result_plan = (Plan *)
make_sort_from_groupcols(parse,
parse->groupClause,
groupColIdx,
result_plan);
current_pathkeys = group_pathkeys;
/*
* No aggregates, and no GROUP BY, but we have a HAVING qual.
* This is a degenerate case in which we are supposed to emit
* either 0 or 1 row depending on whether HAVING succeeds.
* Furthermore, there cannot be any variables in either HAVING
* or the targetlist, so we actually do not need the FROM table
* at all! We can just throw away the plan-so-far and generate
* a Result node. This is a sufficiently unusual corner case
* that it's not worth contorting the structure of this routine
* to avoid having to generate the plan in the first place.
*/
result_plan = (Plan *) make_result(tlist,
parse->havingQual,
NULL);
}
result_plan = (Plan *) make_group(parse,
tlist,
(List *) parse->havingQual,
numGroupCols,
groupColIdx,
dNumGroups,
result_plan);
/* The Group node won't change sort ordering */
}
else if (parse->hasHavingQual)
{
/*
* No aggregates, and no GROUP BY, but we have a HAVING qual.
* This is a degenerate case in which we are supposed to emit
* either 0 or 1 row depending on whether HAVING succeeds.
* Furthermore, there cannot be any variables in either HAVING
* or the targetlist, so we actually do not need the FROM table
* at all! We can just throw away the plan-so-far and generate
* a Result node. This is a sufficiently unusual corner case
* that it's not worth contorting the structure of this routine
* to avoid having to generate the plan in the first place.
*/
result_plan = (Plan *) make_result(tlist,
parse->havingQual,
NULL);
}
} /* end of non-minmax-aggregate case */
} /* end of if (setOperations) */
/*
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.95 2005/04/06 16:34:05 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.96 2005/04/11 23:06:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -915,14 +915,16 @@ process_sublinks_mutator(Node *node, bool *isTopQual)
/*
* SS_finalize_plan - do final sublink processing for a completed Plan.
*
* This recursively computes the extParam and allParam sets
* for every Plan node in the given plan tree.
* This recursively computes the extParam and allParam sets for every Plan
* node in the given plan tree. It also attaches any generated InitPlans
* to the top plan node.
*/
void
SS_finalize_plan(Plan *plan, List *rtable)
{
Bitmapset *outer_params = NULL;
Bitmapset *valid_params = NULL;
Cost initplan_cost = 0;
int paramid;
ListCell *l;
......@@ -959,6 +961,33 @@ SS_finalize_plan(Plan *plan, List *rtable)
bms_free(outer_params);
bms_free(valid_params);
/*
* Finally, attach any initPlans to the topmost plan node,
* and add their extParams to the topmost node's, too.
*
* We also add the total_cost of each initPlan to the startup cost of
* the top node. This is a conservative overestimate, since in
* fact each initPlan might be executed later than plan startup,
* or even not at all.
*/
plan->initPlan = PlannerInitPlan;
PlannerInitPlan = NIL; /* make sure they're not attached twice */
foreach(l, plan->initPlan)
{
SubPlan *initplan = (SubPlan *) lfirst(l);
plan->extParam = bms_add_members(plan->extParam,
initplan->plan->extParam);
/* allParam must include all members of extParam */
plan->allParam = bms_add_members(plan->allParam,
plan->extParam);
initplan_cost += initplan->plan->total_cost;
}
plan->startup_cost += initplan_cost;
plan->total_cost += initplan_cost;
}
/*
......@@ -1165,3 +1194,75 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
return expression_tree_walker(node, finalize_primnode,
(void *) context);
}
/*
* SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
*
* The plan is expected to return a scalar value of the indicated type.
* We build an EXPR_SUBLINK SubPlan node and put it into the initplan
* list for the current query level. A Param that represents the initplan's
* output is returned.
*
* We assume the plan hasn't been put through SS_finalize_plan.
*/
Param *
SS_make_initplan_from_plan(Query *root, Plan *plan,
Oid resulttype, int32 resulttypmod)
{
List *saved_initplan = PlannerInitPlan;
SubPlan *node;
Param *prm;
Bitmapset *tmpset;
int paramid;
/*
* Set up for a new level of subquery. This is just to keep
* SS_finalize_plan from becoming confused.
*/
PlannerQueryLevel++;
PlannerInitPlan = NIL;
/*
* Build extParam/allParam sets for plan nodes.
*/
SS_finalize_plan(plan, root->rtable);
/* Return to outer subquery context */
PlannerQueryLevel--;
PlannerInitPlan = saved_initplan;
/*
* Create a SubPlan node and add it to the outer list of InitPlans.
*/
node = makeNode(SubPlan);
node->subLinkType = EXPR_SUBLINK;
node->plan = plan;
node->plan_id = PlannerPlanId++; /* Assign unique ID to this
* SubPlan */
node->rtable = root->rtable;
PlannerInitPlan = lappend(PlannerInitPlan, node);
/*
* Make parParam list of params that current query level will pass to
* this child plan. (In current usage there probably aren't any.)
*/
tmpset = bms_copy(plan->extParam);
while ((paramid = bms_first_member(tmpset)) >= 0)
{
PlannerParamItem *pitem = list_nth(PlannerParamList, paramid);
if (pitem->abslevel == PlannerQueryLevel)
node->parParam = lappend_int(node->parParam, paramid);
}
bms_free(tmpset);
/*
* Make a Param that will be the subplan's output.
*/
prm = generate_new_param(resulttype, resulttypmod);
node->setParam = list_make1_int(prm->paramid);
return prm;
}
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.122 2005/03/31 22:46:14 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.123 2005/04/11 23:06:56 tgl Exp $
*
* NOTES
* Eventually, the index information should go through here, too.
......@@ -53,6 +53,31 @@ op_in_opclass(Oid opno, Oid opclass)
0, 0);
}
/*
* get_op_opclass_strategy
*
* Get the operator's strategy number within the specified opclass,
* or 0 if it's not a member of the opclass.
*/
int
get_op_opclass_strategy(Oid opno, Oid opclass)
{
HeapTuple tp;
Form_pg_amop amop_tup;
int result;
tp = SearchSysCache(AMOPOPID,
ObjectIdGetDatum(opno),
ObjectIdGetDatum(opclass),
0, 0);
if (!HeapTupleIsValid(tp))
return 0;
amop_tup = (Form_pg_amop) GETSTRUCT(tp);
result = amop_tup->amopstrategy;
ReleaseSysCache(tp);
return result;
}
/*
* get_op_opclass_properties
*
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.80 2005/03/27 06:29:49 tgl Exp $
* $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.81 2005/04/11 23:06:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -38,8 +38,11 @@ extern void debug_print_rel(Query *root, RelOptInfo *rel);
extern void create_index_paths(Query *root, RelOptInfo *rel);
extern Path *best_inner_indexscan(Query *root, RelOptInfo *rel,
Relids outer_relids, JoinType jointype);
extern List *group_clauses_by_indexkey(IndexOptInfo *index);
extern List *group_clauses_by_indexkey_for_or(IndexOptInfo *index,
Expr *orsubclause);
extern bool match_index_to_operand(Node *operand, int indexcol,
IndexOptInfo *index);
extern List *expand_indexqual_conditions(IndexOptInfo *index,
List *clausegroups);
extern void check_partial_indexes(Query *root, RelOptInfo *rel);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.80 2005/03/10 23:21:25 tgl Exp $
* $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.81 2005/04/11 23:06:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -23,6 +23,12 @@
extern void query_planner(Query *root, List *tlist, double tuple_fraction,
Path **cheapest_path, Path **sorted_path);
/*
* prototypes for plan/planagg.c
*/
extern Plan *optimize_minmax_aggregates(Query *root, List *tlist,
Path *best_path);
/*
* prototypes for plan/createplan.c
*/
......
......@@ -5,7 +5,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.23 2004/12/31 22:03:36 pgsql Exp $
* $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.24 2005/04/11 23:06:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -24,5 +24,7 @@ extern Node *convert_IN_to_join(Query *parse, SubLink *sublink);
extern Node *SS_replace_correlation_vars(Node *expr);
extern Node *SS_process_sublinks(Node *expr, bool isQual);
extern void SS_finalize_plan(Plan *plan, List *rtable);
extern Param *SS_make_initplan_from_plan(Query *root, Plan *plan,
Oid resulttype, int32 resulttypmod);
#endif /* SUBSELECT_H */
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.96 2005/03/31 22:46:27 tgl Exp $
* $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.97 2005/04/11 23:06:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -25,6 +25,7 @@ typedef enum IOFuncSelector
} IOFuncSelector;
extern bool op_in_opclass(Oid opno, Oid opclass);
extern int get_op_opclass_strategy(Oid opno, Oid opclass);
extern void get_op_opclass_properties(Oid opno, Oid opclass,
int *strategy, Oid *subtype,
bool *recheck);
......
......@@ -293,3 +293,58 @@ FROM bool_test;
t | t | f | | f | t
(1 row)
--
-- Test several cases that should be optimized into indexscans instead of
-- the generic aggregate implementation. We can't actually verify that they
-- are done as indexscans, but we can check that the results are correct.
--
-- Basic cases
select max(unique1) from tenk1;
max
------
9999
(1 row)
select max(unique1) from tenk1 where unique1 < 42;
max
-----
41
(1 row)
select max(unique1) from tenk1 where unique1 > 42;
max
------
9999
(1 row)
select max(unique1) from tenk1 where unique1 > 42000;
max
-----
(1 row)
-- multi-column index (uses tenk1_thous_tenthous)
select max(tenthous) from tenk1 where thousand = 33;
max
------
9033
(1 row)
select min(tenthous) from tenk1 where thousand = 33;
min
-----
33
(1 row)
-- check parameter propagation into an indexscan subquery
select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
from int4_tbl;
f1 | gt
-------------+----
0 | 1
123456 |
-123456 | 0
2147483647 |
-2147483647 | 0
(5 rows)
......@@ -12,6 +12,7 @@ CREATE INDEX onek_stringu1 ON onek USING btree(stringu1 name_ops);
CREATE INDEX tenk1_unique1 ON tenk1 USING btree(unique1 int4_ops);
CREATE INDEX tenk1_unique2 ON tenk1 USING btree(unique2 int4_ops);
CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops);
CREATE INDEX tenk1_thous_tenthous ON tenk1 (thousand, tenthous);
CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops);
CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops);
CREATE INDEX tenk2_hundred ON tenk2 USING btree(hundred int4_ops);
......
......@@ -180,3 +180,23 @@ SELECT
BOOL_OR(NOT b2) AS "f",
BOOL_OR(NOT b3) AS "t"
FROM bool_test;
--
-- Test several cases that should be optimized into indexscans instead of
-- the generic aggregate implementation. We can't actually verify that they
-- are done as indexscans, but we can check that the results are correct.
--
-- Basic cases
select max(unique1) from tenk1;
select max(unique1) from tenk1 where unique1 < 42;
select max(unique1) from tenk1 where unique1 > 42;
select max(unique1) from tenk1 where unique1 > 42000;
-- multi-column index (uses tenk1_thous_tenthous)
select max(tenthous) from tenk1 where thousand = 33;
select min(tenthous) from tenk1 where thousand = 33;
-- check parameter propagation into an indexscan subquery
select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
from int4_tbl;
......@@ -20,6 +20,8 @@ CREATE INDEX tenk1_unique2 ON tenk1 USING btree(unique2 int4_ops);
CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops);
CREATE INDEX tenk1_thous_tenthous ON tenk1 (thousand, tenthous);
CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops);
CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册