提交 cea49fe8 编写于 作者: T Tom Lane

Dept of second thoughts: improve the API for AnalyzeForeignTable.

If we make the initially-called function return the table physical-size
estimate, acquire_inherited_sample_rows will be able to use that to
allocate numbers of samples among child tables, when the day comes that
we want to support foreign tables in inheritance trees.
上级 263d9de6
......@@ -125,7 +125,9 @@ static void fileBeginForeignScan(ForeignScanState *node, int eflags);
static TupleTableSlot *fileIterateForeignScan(ForeignScanState *node);
static void fileReScanForeignScan(ForeignScanState *node);
static void fileEndForeignScan(ForeignScanState *node);
static AcquireSampleRowsFunc fileAnalyzeForeignTable(Relation relation);
static bool fileAnalyzeForeignTable(Relation relation,
AcquireSampleRowsFunc *func,
BlockNumber *totalpages);
/*
* Helper functions
......@@ -141,8 +143,7 @@ static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
Cost *startup_cost, Cost *total_cost);
static int file_acquire_sample_rows(Relation onerel, int elevel,
HeapTuple *rows, int targrows,
double *totalrows, double *totaldeadrows,
BlockNumber *totalpages);
double *totalrows, double *totaldeadrows);
/*
......@@ -656,10 +657,39 @@ fileEndForeignScan(ForeignScanState *node)
* fileAnalyzeForeignTable
* Test whether analyzing this foreign table is supported
*/
static AcquireSampleRowsFunc
fileAnalyzeForeignTable(Relation relation)
static bool
fileAnalyzeForeignTable(Relation relation,
AcquireSampleRowsFunc *func,
BlockNumber *totalpages)
{
return file_acquire_sample_rows;
char *filename;
List *options;
struct stat stat_buf;
/* Fetch options of foreign table */
fileGetOptions(RelationGetRelid(relation), &filename, &options);
/*
* Get size of the file. (XXX if we fail here, would it be better to
* just return false to skip analyzing the table?)
*/
if (stat(filename, &stat_buf) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m",
filename)));
/*
* Convert size to pages. Must return at least 1 so that we can tell
* later on that pg_class.relpages is not default.
*/
*totalpages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ;
if (*totalpages < 1)
*totalpages = 1;
*func = file_acquire_sample_rows;
return true;
}
/*
......@@ -780,8 +810,7 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
* which must have at least targrows entries.
* The actual number of rows selected is returned as the function result.
* We also count the total number of rows in the file and return it into
* *totalrows, and return the file's physical size in *totalpages.
* Note that *totaldeadrows is always set to 0.
* *totalrows. Note that *totaldeadrows is always set to 0.
*
* Note that the returned list of rows is not always in order by physical
* position in the file. Therefore, correlation estimates derived later
......@@ -791,8 +820,7 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
static int
file_acquire_sample_rows(Relation onerel, int elevel,
HeapTuple *rows, int targrows,
double *totalrows, double *totaldeadrows,
BlockNumber *totalpages)
double *totalrows, double *totaldeadrows)
{
int numrows = 0;
double rowstoskip = -1; /* -1 means not set yet */
......@@ -802,7 +830,6 @@ file_acquire_sample_rows(Relation onerel, int elevel,
bool *nulls;
bool found;
char *filename;
struct stat stat_buf;
List *options;
CopyState cstate;
ErrorContextCallback errcontext;
......@@ -819,22 +846,6 @@ file_acquire_sample_rows(Relation onerel, int elevel,
/* Fetch options of foreign table */
fileGetOptions(RelationGetRelid(onerel), &filename, &options);
/*
* Get size of the file.
*/
if (stat(filename, &stat_buf) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m",
filename)));
/*
* Convert size to pages for use in I/O cost estimate.
*/
*totalpages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ;
if (*totalpages < 1)
*totalpages = 1;
/*
* Create CopyState from FDW options.
*/
......@@ -931,10 +942,10 @@ file_acquire_sample_rows(Relation onerel, int elevel,
* Emit some interesting relation info
*/
ereport(elevel,
(errmsg("\"%s\": scanned %u pages containing %.0f rows; "
(errmsg("\"%s\": file contains %.0f rows; "
"%d rows in sample",
RelationGetRelationName(onerel),
*totalpages, *totalrows, numrows)));
*totalrows, numrows)));
return numrows;
}
......@@ -279,15 +279,19 @@ EndForeignScan (ForeignScanState *node);
<para>
<programlisting>
AcquireSampleRowsFunc
AnalyzeForeignTable (Relation relation);
bool
AnalyzeForeignTable (Relation relation,
AcquireSampleRowsFunc *func,
BlockNumber *totalpages);
</programlisting>
This function is called when <xref linkend="sql-analyze"> is executed on
a foreign table. If the FDW supports collecting statistics for this
foreign table, it should return a pointer to a function that will collect
sample rows from the table. Otherwise, return <literal>NULL</>. If the
FDW does not support collecting statistics for any tables, the
a foreign table. If the FDW can collect statistics for this
foreign table, it should return <literal>true</>, and provide a pointer
to a function that will collect sample rows from the table in
<parameter>func</>, plus the estimated size of the table in pages in
<parameter>totalpages</>. Otherwise, return <literal>false</>.
If the FDW does not support collecting statistics for any tables, the
<function>AnalyzeForeignTable</> pointer can be set to <literal>NULL</>.
</para>
......@@ -298,18 +302,16 @@ int
AcquireSampleRowsFunc (Relation relation, int elevel,
HeapTuple *rows, int targrows,
double *totalrows,
double *totaldeadrows,
BlockNumber *totalpages);
double *totaldeadrows);
</programlisting>
A random sample of up to <parameter>targrows</> rows should be collected
from the table and stored into the caller-provided <parameter>rows</>
array. The actual number of rows collected must be returned. In
addition, store estimates of the total numbers of live rows, dead rows,
and pages in the table into the output parameters
<parameter>totalrows</>, <parameter>totaldeadrows</>, and
<parameter>totalpages</>. These numbers will be recorded in the table's
<structname>pg_class</> entry for future use.
addition, store estimates of the total numbers of live and dead rows in
the table into the output parameters <parameter>totalrows</> and
<parameter>totaldeadrows</>. (Set <parameter>totaldeadrows</> to zero
if the FDW does not have any concept of dead rows.)
</para>
<para>
......
......@@ -84,7 +84,8 @@ static BufferAccessStrategy vac_strategy;
static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
AcquireSampleRowsFunc acquirefunc, bool inh, int elevel);
AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
bool inh, int elevel);
static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
int samplesize);
static bool BlockSampler_HasMore(BlockSampler bs);
......@@ -97,8 +98,7 @@ static VacAttrStats *examine_attribute(Relation onerel, int attnum,
Node *index_expr);
static int acquire_sample_rows(Relation onerel, int elevel,
HeapTuple *rows, int targrows,
double *totalrows, double *totaldeadrows,
BlockNumber *totalpages);
double *totalrows, double *totaldeadrows);
static int compare_rows(const void *a, const void *b);
static int acquire_inherited_sample_rows(Relation onerel, int elevel,
HeapTuple *rows, int targrows,
......@@ -117,7 +117,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
{
Relation onerel;
int elevel;
AcquireSampleRowsFunc acquirefunc;
AcquireSampleRowsFunc acquirefunc = NULL;
BlockNumber relpages = 0;
/* Select logging level */
if (vacstmt->options & VACOPT_VERBOSE)
......@@ -182,6 +183,27 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
return;
}
/*
* Silently ignore tables that are temp tables of other backends ---
* trying to analyze these is rather pointless, since their contents are
* probably not up-to-date on disk. (We don't throw a warning here; it
* would just lead to chatter during a database-wide ANALYZE.)
*/
if (RELATION_IS_OTHER_TEMP(onerel))
{
relation_close(onerel, ShareUpdateExclusiveLock);
return;
}
/*
* We can ANALYZE any table except pg_statistic. See update_attstats
*/
if (RelationGetRelid(onerel) == StatisticRelationId)
{
relation_close(onerel, ShareUpdateExclusiveLock);
return;
}
/*
* Check that it's a plain table or foreign table; we used to do this
* in get_rel_oids() but seems safer to check after we've locked the
......@@ -191,6 +213,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
{
/* Regular table, so we'll use the regular row acquisition function */
acquirefunc = acquire_sample_rows;
/* Also get regular table's size */
relpages = RelationGetNumberOfBlocks(onerel);
}
else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
{
......@@ -199,15 +223,16 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
* supports analysis.
*/
FdwRoutine *fdwroutine;
bool ok = false;
fdwroutine = GetFdwRoutineByRelId(RelationGetRelid(onerel));
if (fdwroutine->AnalyzeForeignTable != NULL)
acquirefunc = fdwroutine->AnalyzeForeignTable(onerel);
else
acquirefunc = NULL;
ok = fdwroutine->AnalyzeForeignTable(onerel,
&acquirefunc,
&relpages);
if (acquirefunc == NULL)
if (!ok)
{
ereport(WARNING,
(errmsg("skipping \"%s\" --- cannot analyze this foreign table",
......@@ -227,27 +252,6 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
return;
}
/*
* Silently ignore tables that are temp tables of other backends ---
* trying to analyze these is rather pointless, since their contents are
* probably not up-to-date on disk. (We don't throw a warning here; it
* would just lead to chatter during a database-wide ANALYZE.)
*/
if (RELATION_IS_OTHER_TEMP(onerel))
{
relation_close(onerel, ShareUpdateExclusiveLock);
return;
}
/*
* We can ANALYZE any table except pg_statistic. See update_attstats
*/
if (RelationGetRelid(onerel) == StatisticRelationId)
{
relation_close(onerel, ShareUpdateExclusiveLock);
return;
}
/*
* OK, let's do it. First let other backends know I'm in ANALYZE.
*/
......@@ -258,13 +262,13 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
/*
* Do the normal non-recursive ANALYZE.
*/
do_analyze_rel(onerel, vacstmt, acquirefunc, false, elevel);
do_analyze_rel(onerel, vacstmt, acquirefunc, relpages, false, elevel);
/*
* If there are child tables, do recursive ANALYZE.
*/
if (onerel->rd_rel->relhassubclass)
do_analyze_rel(onerel, vacstmt, acquirefunc, true, elevel);
do_analyze_rel(onerel, vacstmt, acquirefunc, relpages, true, elevel);
/*
* Close source relation now, but keep lock so that no one deletes it
......@@ -293,7 +297,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
*/
static void
do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
AcquireSampleRowsFunc acquirefunc, bool inh, int elevel)
AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
bool inh, int elevel)
{
int attr_cnt,
tcnt,
......@@ -308,7 +313,6 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
numrows;
double totalrows,
totaldeadrows;
BlockNumber totalpages;
HeapTuple *rows;
PGRUsage ru0;
TimestampTz starttime = 0;
......@@ -485,17 +489,13 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
*/
rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
if (inh)
{
numrows = acquire_inherited_sample_rows(onerel, elevel,
rows, targrows,
&totalrows, &totaldeadrows);
totalpages = 0; /* not needed in this path */
}
else
numrows = (*acquirefunc) (onerel, elevel,
rows, targrows,
&totalrows, &totaldeadrows,
&totalpages);
&totalrows, &totaldeadrows);
/*
* Compute the statistics. Temporary results during the calculations for
......@@ -576,7 +576,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
*/
if (!inh)
vac_update_relstats(onerel,
totalpages,
relpages,
totalrows,
visibilitymap_count(onerel),
hasindex,
......@@ -1032,7 +1032,6 @@ BlockSampler_Next(BlockSampler bs)
* The actual number of rows selected is returned as the function result.
* We also estimate the total numbers of live and dead rows in the table,
* and return them into *totalrows and *totaldeadrows, respectively.
* Also, the number of pages in the relation is returned into *totalpages.
*
* The returned list of tuples is in order by physical position in the table.
* (We will rely on this later to derive correlation estimates.)
......@@ -1061,8 +1060,7 @@ BlockSampler_Next(BlockSampler bs)
static int
acquire_sample_rows(Relation onerel, int elevel,
HeapTuple *rows, int targrows,
double *totalrows, double *totaldeadrows,
BlockNumber *totalpages)
double *totalrows, double *totaldeadrows)
{
int numrows = 0; /* # rows now in reservoir */
double samplerows = 0; /* total # rows collected */
......@@ -1077,7 +1075,6 @@ acquire_sample_rows(Relation onerel, int elevel,
Assert(targrows > 0);
totalblocks = RelationGetNumberOfBlocks(onerel);
*totalpages = totalblocks;
/* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
OldestXmin = GetOldestXmin(onerel->rd_rel->relisshared, true);
......@@ -1438,7 +1435,7 @@ compare_rows(const void *a, const void *b)
/*
* acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
*
* This has largely the same API as acquire_sample_rows, except that rows are
* This has the same API as acquire_sample_rows, except that rows are
* collected from all inheritance children as well as the specified table.
* We fail and return zero if there are no inheritance children.
*/
......@@ -1481,11 +1478,6 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
/*
* Count the blocks in all the relations. The result could overflow
* BlockNumber, so we use double arithmetic.
*
* XXX eventually we will probably want to allow child tables that are
* foreign tables. Since we can't do RelationGetNumberOfBlocks on a
* foreign table, it's not very clear what fraction of the total to assign
* to it here.
*/
rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation));
relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double));
......@@ -1540,7 +1532,6 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
int childrows;
double trows,
tdrows;
BlockNumber tpages;
/* Fetch a random sample of the child's rows */
childrows = acquire_sample_rows(childrel,
......@@ -1548,8 +1539,7 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
rows + numrows,
childtargrows,
&trows,
&tdrows,
&tpages);
&tdrows);
/* We may need to convert from child's rowtype to parent's */
if (childrows > 0 &&
......
......@@ -53,11 +53,11 @@ typedef void (*EndForeignScan_function) (ForeignScanState *node);
typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
HeapTuple *rows, int targrows,
double *totalrows,
double *totaldeadrows,
BlockNumber *totalpages);
typedef AcquireSampleRowsFunc (*AnalyzeForeignTable_function) (Relation relation);
double *totaldeadrows);
typedef bool (*AnalyzeForeignTable_function) (Relation relation,
AcquireSampleRowsFunc *func,
BlockNumber *totalpages);
/*
* FdwRoutine is the struct returned by a foreign-data wrapper's handler
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册