Reimplement planner's handling of MIN/MAX aggregate optimization (again).

Instead of playing cute games with pathkeys, just build a direct representation of the intended sub-select, and feed it through query_planner to get a Path for the index access. This is a bit slower than 9.1's previous method, since we'll duplicate most of the overhead of query_planner; but since the whole optimization only applies to rather simple single-table queries, that probably won't be much of a problem in practice. The advantage is that we get to do the right thing when there's a partial index that needs the implicit IS NOT NULL clause to be usable. Also, although this makes planagg.c be a bit more closely tied to the ordering of operations in grouping_planner, we can get rid of some coupling to lower-level parts of the planner. Per complaint from Marti Raudsepp.

Reimplement planner's handling of MIN/MAX aggregate optimization (again).
Instead of playing cute games with pathkeys, just build a direct representation of the intended sub-select, and feed it through query_planner to get a Path for the index access. This is a bit slower than 9.1's previous method, since we'll duplicate most of the overhead of query_planner; but since the whole optimization only applies to rather simple single-table queries, that probably won't be much of a problem in practice. The advantage is that we get to do the right thing when there's a partial index that needs the implicit IS NOT NULL clause to be usable. Also, although this makes planagg.c be a bit more closely tied to the ordering of operations in grouping_planner, we can get rid of some coupling to lower-level parts of the planner. Per complaint from Marti Raudsepp.
8df08c84 · Tom Lane · 6d8096e2 · 8df08c84 · 8df08c84 · 8df08c84
12 changed file
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -1930,22 +1930,6 @@ _copyPlaceHolderInfo(PlaceHolderInfo *from)
 	return newnode;
 }

-/*
- * _copyMinMaxAggInfo
- */
-static MinMaxAggInfo *
-_copyMinMaxAggInfo(MinMaxAggInfo *from)
-{
-	MinMaxAggInfo *newnode = makeNode(MinMaxAggInfo);
-
-	COPY_SCALAR_FIELD(aggfnoid);
-	COPY_SCALAR_FIELD(aggsortop);
-	COPY_NODE_FIELD(target);
-	COPY_NODE_FIELD(pathkeys);
-
-	return newnode;
-}
-
 /* ****************************************************************
 *					parsenodes.h copy functions
 * ****************************************************************
@@ -4129,9 +4113,6 @@ copyObject(void *from)
 		case T_PlaceHolderInfo:
 			retval = _copyPlaceHolderInfo(from);
 			break;
-		case T_MinMaxAggInfo:
-			retval = _copyMinMaxAggInfo(from);
-			break;

 			/*
 			 * VALUE NODES

--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -886,17 +886,6 @@ _equalPlaceHolderInfo(PlaceHolderInfo *a, PlaceHolderInfo *b)
 	return true;
 }

-static bool
-_equalMinMaxAggInfo(MinMaxAggInfo *a, MinMaxAggInfo *b)
-{
-	COMPARE_SCALAR_FIELD(aggfnoid);
-	COMPARE_SCALAR_FIELD(aggsortop);
-	COMPARE_NODE_FIELD(target);
-	COMPARE_NODE_FIELD(pathkeys);
-
-	return true;
-}
-

 /*
 * Stuff from parsenodes.h
@@ -2690,9 +2679,6 @@ equal(void *a, void *b)
 		case T_PlaceHolderInfo:
 			retval = _equalPlaceHolderInfo(a, b);
 			break;
-		case T_MinMaxAggInfo:
-			retval = _equalMinMaxAggInfo(a, b);
-			break;

 		case T_List:
 		case T_IntList:

--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1914,7 +1914,10 @@ _outMinMaxAggInfo(StringInfo str, MinMaxAggInfo *node)
 	WRITE_OID_FIELD(aggfnoid);
 	WRITE_OID_FIELD(aggsortop);
 	WRITE_NODE_FIELD(target);
-	WRITE_NODE_FIELD(pathkeys);
+	/* We intentionally omit subroot --- too large, not interesting enough */
+	WRITE_NODE_FIELD(path);
+	WRITE_FLOAT_FIELD(pathcost, "%.2f");
+	WRITE_NODE_FIELD(param);
 }

 static void

--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -41,6 +41,13 @@
 #define IsBooleanOpfamily(opfamily) \
 	((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID)

+/* Whether to use ScalarArrayOpExpr to build index qualifications */
+typedef enum
+{
+	SAOP_FORBID,				/* Do not use ScalarArrayOpExpr */
+	SAOP_ALLOW,					/* OK to use ScalarArrayOpExpr */
+	SAOP_REQUIRE				/* Require ScalarArrayOpExpr */
+} SaOpControl;

 /* Whether we are looking for plain indexscan, bitmap scan, or either */
 typedef enum
@@ -78,6 +85,11 @@ static PathClauseUsage *classify_index_clause_usage(Path *path,
 							List **clauselist);
 static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds);
 static int	find_list_position(Node *node, List **nodelist);
+static List *group_clauses_by_indexkey(IndexOptInfo *index,
+						  List *clauses, List *outer_clauses,
+						  Relids outer_relids,
+						  SaOpControl saop_control,
+						  bool *found_clause);
 static bool match_clause_to_indexcol(IndexOptInfo *index,
 						 int indexcol,
 						 RestrictInfo *rinfo,
@@ -1060,7 +1072,7 @@ find_list_position(Node *node, List **nodelist)
 * from multiple places.  Defend against redundant outputs by using
 * list_append_unique_ptr (pointer equality should be good enough).
 */
-List *
+static List *
 group_clauses_by_indexkey(IndexOptInfo *index,
 						  List *clauses, List *outer_clauses,
 						  Relids outer_relids,

--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -905,39 +905,6 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
 	return pathkeys;
 }

-/****************************************************************************
- *		PATHKEYS AND AGGREGATES
- ****************************************************************************/
-
-/*
- * make_pathkeys_for_aggregate
- *		Generate a pathkeys list (always a 1-item list) that represents
- *		the sort order needed by a MIN/MAX aggregate
- *
- * This is only called before EquivalenceClass merging, so we can assume
- * we are not supposed to canonicalize.
- */
-List *
-make_pathkeys_for_aggregate(PlannerInfo *root,
-							Expr *aggtarget,
-							Oid aggsortop)
-{
-	PathKey    *pathkey;
-
-	/*
-	 * We arbitrarily set nulls_first to false.  Actually, a MIN/MAX agg can
-	 * use either nulls ordering option, but that is dealt with elsewhere.
-	 */
-	pathkey = make_pathkey_from_sortop(root,
-									   aggtarget,
-									   aggsortop,
-									   false,	/* nulls_first */
-									   0,
-									   true,
-									   false);
-	return list_make1(pathkey);
-}
-
 /****************************************************************************
 *		PATHKEYS AND MERGECLAUSES
 ****************************************************************************/
@@ -1407,11 +1374,10 @@ make_inner_pathkeys_for_merge(PlannerInfo *root,
 *		PATHKEY USEFULNESS CHECKS
 *
 * We only want to remember as many of the pathkeys of a path as have some
- * potential use, which can include subsequent mergejoins, meeting the query's
- * requested output ordering, or implementing MIN/MAX aggregates.  This
- * ensures that add_path() won't consider a path to have a usefully different
- * ordering unless it really is useful.  These routines check for usefulness
- * of given pathkeys.
+ * potential use, either for subsequent mergejoins or for meeting the query's
+ * requested output ordering.  This ensures that add_path() won't consider
+ * a path to have a usefully different ordering unless it really is useful.
+ * These routines check for usefulness of given pathkeys.
 ****************************************************************************/

 /*
@@ -1553,50 +1519,6 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return 0;					/* path ordering not useful */
 }

-/*
- * pathkeys_useful_for_minmax
- *		Count the number of pathkeys that are useful for implementing
- *		some MIN/MAX aggregate.
- *
- * Like pathkeys_useful_for_ordering, this is a yes-or-no affair, but
- * there could be several MIN/MAX aggregates and we can match to any one.
- *
- * We can't use pathkeys_contained_in() because we would like to match
- * pathkeys regardless of the nulls_first setting.  However, we know that
- * MIN/MAX aggregates will have at most one item in their pathkeys, so it's
- * not too complicated to match by brute force.
- */
-static int
-pathkeys_useful_for_minmax(PlannerInfo *root, List *pathkeys)
-{
-	PathKey    *pathkey;
-	ListCell   *lc;
-
-	if (pathkeys == NIL)
-		return 0;				/* unordered path */
-	pathkey = (PathKey *) linitial(pathkeys);
-
-	foreach(lc, root->minmax_aggs)
-	{
-		MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
-		PathKey    *mmpathkey;
-
-		/* Ignore minmax agg if its pathkey turned out to be redundant */
-		if (mminfo->pathkeys == NIL)
-			continue;
-
-		Assert(list_length(mminfo->pathkeys) == 1);
-		mmpathkey = (PathKey *) linitial(mminfo->pathkeys);
-
-		if (mmpathkey->pk_eclass == pathkey->pk_eclass &&
-			mmpathkey->pk_opfamily == pathkey->pk_opfamily &&
-			mmpathkey->pk_strategy == pathkey->pk_strategy)
-			return 1;
-	}
-
-	return 0;					/* path ordering not useful */
-}
-
 /*
 * truncate_useless_pathkeys
 *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1608,15 +1530,11 @@ truncate_useless_pathkeys(PlannerInfo *root,
 {
 	int			nuseful;
 	int			nuseful2;
-	int			nuseful3;

 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
-	nuseful3 = pathkeys_useful_for_minmax(root, pathkeys);
-	if (nuseful3 > nuseful)
-		nuseful = nuseful3;

 	/*
 	 * Note: not safe to modify input list destructively, but we can avoid
@@ -1642,8 +1560,8 @@ truncate_useless_pathkeys(PlannerInfo *root,
 *
 * We could make the test more complex, for example checking to see if any of
 * the joinclauses are really mergejoinable, but that likely wouldn't win
- * often enough to repay the extra cycles.	Queries with no join, sort, or
- * aggregate at all are reasonably common, so this much work seems worthwhile.
+ * often enough to repay the extra cycles.	Queries with neither a join nor
+ * a sort are reasonably common, though, so this much work seems worthwhile.
 */
 bool
 has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
@@ -1652,7 +1570,5 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 		return true;			/* might be able to use pathkeys for merging */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
-	if (root->minmax_aggs != NIL)
-		return true;			/* might be able to use them for MIN/MAX */
 	return false;				/* definitely useless */
 }
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -440,18 +440,9 @@ query_planner(PlannerInfo *root, List *tlist,
 static void
 canonicalize_all_pathkeys(PlannerInfo *root)
 {
-	ListCell   *lc;
-
 	root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
 	root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
 	root->window_pathkeys = canonicalize_pathkeys(root, root->window_pathkeys);
 	root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys);
 	root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
-
-	foreach(lc, root->minmax_aggs)
-	{
-		MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
-
-		mminfo->pathkeys = canonicalize_pathkeys(root, mminfo->pathkeys);
-	}
 }
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1042,7 +1042,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 			count_agg_clauses(parse->havingQual, &agg_counts);

 			/*
-			 * Preprocess MIN/MAX aggregates, if any.
+			 * Preprocess MIN/MAX aggregates, if any.  Note: be careful about
+			 * adding logic between here and the optimize_minmax_aggregates
+			 * call.  Anything that is needed in MIN/MAX-optimizable cases
+			 * will have to be duplicated in planagg.c.
 			 */
 			preprocess_minmax_aggregates(root, tlist);
 		}

--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1387,9 +1387,6 @@ typedef struct PlaceHolderInfo
 /*
 * For each potentially index-optimizable MIN/MAX aggregate function,
 * root->minmax_aggs stores a MinMaxAggInfo describing it.
- *
- * Note: a MIN/MAX agg doesn't really care about the nulls_first property,
- * so the pathkey's nulls_first flag should be ignored.
 */
 typedef struct MinMaxAggInfo
 {
@@ -1398,7 +1395,10 @@ typedef struct MinMaxAggInfo
 	Oid			aggfnoid;		/* pg_proc Oid of the aggregate */
 	Oid			aggsortop;		/* Oid of its sort operator */
 	Expr	   *target;			/* expression we are aggregating on */
-	List	   *pathkeys;		/* pathkeys representing needed sort order */
+	PlannerInfo *subroot;		/* modified "root" for planning the subquery */
+	Path	   *path;			/* access path for subquery */
+	Cost		pathcost;		/* estimated cost to fetch first row */
+	Param	   *param;			/* param for subplan's output */
 } MinMaxAggInfo;

 /*

--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -42,14 +42,6 @@ extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
 * indxpath.c
 *	  routines to generate index paths
 */
-typedef enum
-{
-	/* Whether to use ScalarArrayOpExpr to build index qualifications */
-	SAOP_FORBID,				/* Do not use ScalarArrayOpExpr */
-	SAOP_ALLOW,					/* OK to use ScalarArrayOpExpr */
-	SAOP_REQUIRE				/* Require ScalarArrayOpExpr */
-} SaOpControl;
-
 extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel);
 extern List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
 						 List *clauses, List *outer_clauses,
@@ -59,11 +51,6 @@ extern void best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
 					 Path **cheapest_startup, Path **cheapest_total);
 extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
 							  List *restrictlist);
-extern List *group_clauses_by_indexkey(IndexOptInfo *index,
-						  List *clauses, List *outer_clauses,
-						  Relids outer_relids,
-						  SaOpControl saop_control,
-						  bool *found_clause);
 extern bool eclass_matches_any_index(EquivalenceClass *ec,
 						 EquivalenceMember *em,
 						 RelOptInfo *rel);
@@ -176,9 +163,6 @@ extern List *make_pathkeys_for_sortclauses(PlannerInfo *root,
 							  List *sortclauses,
 							  List *tlist,
 							  bool canonicalize);
-extern List *make_pathkeys_for_aggregate(PlannerInfo *root,
-										 Expr *aggtarget,
-										 Oid aggsortop);
 extern void initialize_mergeclause_eclasses(PlannerInfo *root,
 											RestrictInfo *restrictinfo);
 extern void update_mergeclause_eclasses(PlannerInfo *root,

--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -690,32 +690,19 @@ select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
 9999 | 1
 (3 rows)

-- this is an interesting special case as of 9.1
-explain (costs off)
-  select min(unique2) from tenk1 where unique2 = 42;
-                  QUERY PLAN                   
-----------------------------------------------
- Aggregate
-   ->  Index Scan using tenk1_unique2 on tenk1
-         Index Cond: (unique2 = 42)
-(3 rows)
-
-select min(unique2) from tenk1 where unique2 = 42;
- min 
-----
-  42
-(1 row)
-
 -- try it on an inheritance tree
 create table minmaxtest(f1 int);
 create table minmaxtest1() inherits (minmaxtest);
 create table minmaxtest2() inherits (minmaxtest);
+create table minmaxtest3() inherits (minmaxtest);
 create index minmaxtesti on minmaxtest(f1);
 create index minmaxtest1i on minmaxtest1(f1);
 create index minmaxtest2i on minmaxtest2(f1 desc);
+create index minmaxtest3i on minmaxtest3(f1) where f1 is not null;
 insert into minmaxtest values(11), (12);
 insert into minmaxtest1 values(13), (14);
 insert into minmaxtest2 values(15), (16);
+insert into minmaxtest3 values(17), (18);
 explain (costs off)
  select min(f1), max(f1) from minmaxtest;
                                      QUERY PLAN                                      
@@ -731,6 +718,8 @@ explain (costs off)
                       Index Cond: (f1 IS NOT NULL)
                 ->  Index Scan Backward using minmaxtest2i on minmaxtest2 minmaxtest
                       Index Cond: (f1 IS NOT NULL)
+                 ->  Index Scan using minmaxtest3i on minmaxtest3 minmaxtest
+                       Index Cond: (f1 IS NOT NULL)
   InitPlan 2 (returns $1)
     ->  Limit
           ->  Merge Append
@@ -741,18 +730,21 @@ explain (costs off)
                       Index Cond: (f1 IS NOT NULL)
                 ->  Index Scan using minmaxtest2i on minmaxtest2 minmaxtest
                       Index Cond: (f1 IS NOT NULL)
-(21 rows)
+                 ->  Index Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest
+                       Index Cond: (f1 IS NOT NULL)
+(25 rows)

 select min(f1), max(f1) from minmaxtest;
 min | max 
 -----+-----
-  11 |  16
+  11 |  18
 (1 row)

 drop table minmaxtest cascade;
-NOTICE:  drop cascades to 2 other objects
+NOTICE:  drop cascades to 3 other objects
 DETAIL:  drop cascades to table minmaxtest1
 drop cascades to table minmaxtest2
+drop cascades to table minmaxtest3
 --
 -- Test combinations of DISTINCT and/or ORDER BY
 --

--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -258,22 +258,21 @@ select max(unique2) from tenk1 order by max(unique2)+1;
 explain (costs off)
  select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
 select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
-- this is an interesting special case as of 9.1
-explain (costs off)
-  select min(unique2) from tenk1 where unique2 = 42;
-select min(unique2) from tenk1 where unique2 = 42;

 -- try it on an inheritance tree
 create table minmaxtest(f1 int);
 create table minmaxtest1() inherits (minmaxtest);
 create table minmaxtest2() inherits (minmaxtest);
+create table minmaxtest3() inherits (minmaxtest);
 create index minmaxtesti on minmaxtest(f1);
 create index minmaxtest1i on minmaxtest1(f1);
 create index minmaxtest2i on minmaxtest2(f1 desc);
+create index minmaxtest3i on minmaxtest3(f1) where f1 is not null;

 insert into minmaxtest values(11), (12);
 insert into minmaxtest1 values(13), (14);
 insert into minmaxtest2 values(15), (16);
+insert into minmaxtest3 values(17), (18);

 explain (costs off)
  select min(f1), max(f1) from minmaxtest;