提交 8492807f 编写于 作者: B Bhuvnesh Chaudhary and Ekta Khanna 提交者: Bhuvnesh

Ignore very wide columns in analyze sample

Analyze collects a sample from the table, in case the
sample contains columns with huge length, it may
result in memory usage to go high cancelling the query.

This commit masks wide values `i.e pg_column_size(col) > WIDTH_THRESHOLD
(1024)` in variable length columns to avoid high
memory usage while collecting sample. Column values exceeding
WIDTH_THRESHOLD will be marked as NULL and will be ignored from the collected
samples tuples while computing stats on the relation.

In case of expression/predicate indexes on the relation, the wide columns will be
treated as NULL and will not be filtered out. Is it rare to have such
indexes on very wide columns, so the effects on stats (nullfrac etc) will be minimal.
Signed-off-by: NOmer Arap <oarap@pivotal.io>
上级 038457a5
......@@ -51,6 +51,16 @@
#include "utils/tuplesort.h"
#include "utils/tuplesort_mk.h"
/*
* To avoid consuming too much memory during analysis and/or too much space
* in the resulting pg_statistic rows, we ignore varlena datums that are wider
* than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV
* and distinct-value calculations since a wide value is unlikely to be
* duplicated at all, much less be a most-common value. For the same reason,
* ignoring wide values will not affect our estimates of histogram bin
* boundaries very much.
*/
#define WIDTH_THRESHOLD 1024
/* Data structure for Algorithm S from Knuth 3.4.2 */
typedef struct
......@@ -72,6 +82,16 @@ typedef struct AnlIndexData
int attr_cnt;
} AnlIndexData;
/*
* Maintain the row index for large datums which must not be considered for
* samples while calculating statistcs. The sample value at the row index for
* a column are masked as NULL.
*/
typedef struct RowIndexes
{
bool* rows;
int toowide_cnt;
} RowIndexes;
/* Default statistics target (GUC parameter) */
int default_statistics_target = 10;
......@@ -96,7 +116,7 @@ static VacAttrStats *examine_attribute(Relation onerel, int attnum);
static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
int targrows, double *totalrows, double *totaldeadrows);
static int acquire_sample_rows_by_query(Relation onerel, int nattrs, VacAttrStats **attrstats, HeapTuple **rows,
int targrows, double *totalrows, double *totaldeadrows, BlockNumber *totalpages, bool rootonly);
int targrows, double *totalrows, double *totaldeadrows, BlockNumber *totalpages, bool rootonly, RowIndexes **colLargeRowIndexes /* Maintain information if the row of a column exceeds WIDTH_THRESHOLD */);
static double random_fract(void);
static double init_selection_state(int n);
static double get_next_S(double t, int n, double *stateptr);
......@@ -173,6 +193,7 @@ analyze_rel_internal(Oid relid, VacuumStmt *vacstmt,
Oid save_userid;
int save_sec_context;
int save_nestlevel;
RowIndexes **colLargeRowIndexes;
if (vacstmt->verbose)
elevel = INFO;
......@@ -421,11 +442,16 @@ analyze_rel_internal(Oid relid, VacuumStmt *vacstmt,
}
}
/*
* Maintain information if the row of a column exceeds WIDTH_THRESHOLD
*/
colLargeRowIndexes = (RowIndexes **) palloc(sizeof(RowIndexes *) * attr_cnt);
/*
* Acquire the sample rows
*/
numrows = acquire_sample_rows_by_query(onerel, attr_cnt, vacattrstats, &rows, targrows,
&totalrows, &totaldeadrows, &totalpages, vacstmt->rootonly);
&totalrows, &totaldeadrows, &totalpages, vacstmt->rootonly, colLargeRowIndexes);
/*
* Compute the statistics. Temporary results during the calculations for
......@@ -435,6 +461,7 @@ analyze_rel_internal(Oid relid, VacuumStmt *vacstmt,
*/
if (numrows > 0)
{
HeapTuple *validRows = (HeapTuple *) palloc(numrows * sizeof(HeapTuple));
MemoryContext col_context,
old_context;
......@@ -448,16 +475,45 @@ analyze_rel_internal(Oid relid, VacuumStmt *vacstmt,
for (i = 0; i < attr_cnt; i++)
{
VacAttrStats *stats = vacattrstats[i];
RowIndexes *rowIndexes = colLargeRowIndexes[i];
int validRowsLength = numrows - rowIndexes->toowide_cnt;
/* If there are too wide rows in the sample, remove them
* from the sample being sent for stats collection
*/
if (rowIndexes->toowide_cnt > 0)
{
int validRowsIdx = 0;
for (int rownum=0; rownum < numrows; rownum++)
{
if (rowIndexes->rows[rownum]) // if row is too wide, ignore it from the sample
continue;
validRows[validRowsIdx] = rows[rownum];
validRowsIdx++;
}
stats->rows = validRows;
validRowsLength = validRowsIdx;
}
else
{
stats->rows = rows;
validRowsLength = numrows;
}
stats->tupDesc = onerel->rd_att;
(*stats->compute_stats) (stats,
std_fetch_func,
numrows,
validRowsLength, // numbers of rows in sample excluding toowide if any.
totalrows);
stats->rows = rows; // Reset to original rows
MemoryContextResetAndDeleteChildren(col_context);
}
/*
* Datums exceeding WIDTH_THRESHOLD are masked as NULL in the sample, and
* are used as is to evaluate index statistics. It is less likely to have
* indexes on very wide columns, so the effect will be minimal.
*/
if (hasindex)
compute_index_stats(onerel, totalrows,
indexdata, nindexes,
......@@ -1342,7 +1398,7 @@ compare_rows(const void *a, const void *b)
static int
acquire_sample_rows_by_query(Relation onerel, int nattrs, VacAttrStats **attrstats,
HeapTuple **rows, int targrows,
double *totalrows, double *totaldeadrows, BlockNumber *totalblocks, bool rootonly)
double *totalrows, double *totaldeadrows, BlockNumber *totalblocks, bool rootonly, RowIndexes **colLargeRowIndexes)
{
StringInfoData str;
StringInfoData columnStr;
......@@ -1358,6 +1414,7 @@ acquire_sample_rows_by_query(Relation onerel, int nattrs, VacAttrStats **attrsta
Datum *vals;
bool *nulls;
MemoryContext oldcxt;
bool *isVarlenaCol = (bool *) palloc(sizeof(bool)*nattrs);
Assert(targrows > 0.0);
......@@ -1389,13 +1446,44 @@ acquire_sample_rows_by_query(Relation onerel, int nattrs, VacAttrStats **attrsta
{
for (i = 0; i < nattrs; i++)
{
if (i != 0)
isVarlenaCol[i] = false;
const char *attname = quote_identifier(NameStr(attrstats[i]->attr->attname));
bool is_varlena = (!attrstats[i]->attr->attbyval &&
attrstats[i]->attr->attlen == -1);
bool is_varwidth = (!attrstats[i]->attr->attbyval &&
attrstats[i]->attr->attlen < 0);
if (is_varlena || is_varwidth)
{
appendStringInfo(&columnStr,
"(case when pg_column_size(Ta.%s) > %d then NULL else Ta.%s end) as %s, ",
attname,
WIDTH_THRESHOLD,
attname,
attname);
appendStringInfo(&columnStr,
"(case when Ta.%s is NULL then %s else %s end)",
attname,
"false", // Less than WIDTH_THRESHOLD
"true"); // Greater than WIDTH_THRESHOLD
isVarlenaCol[i] = true;
}
else
{
appendStringInfo(&columnStr, "Ta.%s ", attname);
}
if (i != nattrs - 1 )
{
appendStringInfo(&columnStr, ", ");
appendStringInfo(&columnStr, "Ta.%s", quote_identifier(NameStr(attrstats[i]->attr->attname)));
}
}
}
else
{
appendStringInfo(&columnStr, "NULL");
}
/*
* If table is partitioned, we create a sample over all parts.
......@@ -1474,21 +1562,52 @@ acquire_sample_rows_by_query(Relation onerel, int nattrs, VacAttrStats **attrsta
nulls[i] = true;
}
/* Initialize the arrays to hold information about column width */
for (i = 0; i < nattrs; i++)
{
colLargeRowIndexes[i] = (RowIndexes *) palloc0(sizeof(RowIndexes));
colLargeRowIndexes[i]->rows = (bool *) palloc(sizeof(bool) * sampleTuples);
colLargeRowIndexes[i]->toowide_cnt = 0;
}
*rows = (HeapTuple *) palloc(sampleTuples * sizeof(HeapTuple));
for (i = 0; i < sampleTuples; i++)
{
HeapTuple sampletup = SPI_tuptable->vals[i];
int j;
int index = 0;
for (j = 0; j < nattrs; j++)
{
colLargeRowIndexes[j]->rows[i] = false;
int tupattnum = attrstats[j]->tupattnum;
Assert(tupattnum >= 1 && tupattnum <= RelationGetNumberOfAttributes(onerel));
vals[tupattnum - 1] = heap_getattr(sampletup, j + 1,
vals[tupattnum - 1] = heap_getattr(sampletup, index + 1,
SPI_tuptable->tupdesc,
&nulls[tupattnum - 1]);
if (isVarlenaCol[j])
{
index++; /* Move the index to the supplementary column*/
if (nulls[tupattnum - 1])
{
bool dummyNull = false;
Datum dummyVal = heap_getattr(sampletup, index + 1,
SPI_tuptable->tupdesc,
&dummyNull);
/*
* If Datum is too large, mark the index position as true
* and increase the too wide count
*/
if (DatumGetInt32(dummyVal))
{
colLargeRowIndexes[j]->rows[i] = true;
colLargeRowIndexes[j]->toowide_cnt++;
}
}
}
index++; /* Move index to the next table attribute */
}
(*rows)[i] = heap_form_tuple(onerel->rd_att, vals, nulls);
}
......@@ -1859,18 +1978,6 @@ ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
*==========================================================================
*/
/*
* To avoid consuming too much memory during analysis and/or too much space
* in the resulting pg_statistic rows, we ignore varlena datums that are wider
* than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV
* and distinct-value calculations since a wide value is unlikely to be
* duplicated at all, much less be a most-common value. For the same reason,
* ignoring wide values will not affect our estimates of histogram bin
* boundaries very much.
*/
#define WIDTH_THRESHOLD 1024
#define swapInt(a,b) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)
#define swapDatum(a,b) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)
......
......@@ -791,6 +791,38 @@ select * from pg_stats where tablename like 'p3_sales%' order by tablename, attn
public | p3_sales | year | 0 | 4 | -0.5 | {2002} | {1} | | 1
(5 rows)
---
--- Test statistics collection on very large datums. In the current implementation,
--- they are left out of the sample, to avoid running out of memory for the main relation
--- statistics. In case of indexes on the relation, large datums are masked as NULLs in the sample
--- and are evaluated as NULL in index stats collection.
--- Expression / partial indexes are not commonly used, and its rare to have them on wide columns, so the
--- effect of considering them as NULL is minimal.
---
CREATE TABLE foo_stats (a text, b bytea, c varchar, d int) DISTRIBUTED RANDOMLY;
CREATE INDEX expression_idx_foo_stats ON foo_stats (upper(a));
INSERT INTO foo_stats values ('aaa', 'bbbbb', 'cccc', 2);
INSERT INTO foo_stats values ('aaa', 'bbbbb', 'cccc', 2);
--- Insert large datum values
INSERT INTO foo_stats values (repeat('a', 3000), 'bbbbb2', 'cccc2', 3);
INSERT INTO foo_stats values (repeat('a', 3000), 'bbbbb2', 'cccc2', 3);
ANALYZE foo_stats;
SELECT schemaname, tablename, attname, null_frac, avg_width, n_distinct, most_common_vals, most_common_freqs, histogram_bounds FROM pg_stats WHERE tablename='foo_stats' ORDER BY attname;
schemaname | tablename | attname | null_frac | avg_width | n_distinct | most_common_vals | most_common_freqs | histogram_bounds
------------+-----------+---------+-----------+-----------+------------+------------------+-------------------+------------------
public | foo_stats | a | 0 | 4 | -0.25 | {aaa} | {1} |
public | foo_stats | b | 0 | 6 | -0.5 | {bbbbb,bbbbb2} | {0.5,0.5} |
public | foo_stats | c | 0 | 5 | -0.5 | {cccc,cccc2} | {0.5,0.5} |
public | foo_stats | d | 0 | 4 | -0.5 | {2,3} | {0.5,0.5} |
(4 rows)
SELECT schemaname, tablename, attname, null_frac, avg_width, n_distinct, most_common_vals, most_common_freqs, histogram_bounds FROM pg_stats WHERE tablename='expression_idx_foo_stats' ORDER BY attname;
schemaname | tablename | attname | null_frac | avg_width | n_distinct | most_common_vals | most_common_freqs | histogram_bounds
------------+--------------------------+-----------------+-----------+-----------+------------+------------------+-------------------+------------------
public | expression_idx_foo_stats | pg_expression_1 | 0.5 | 7 | -0.25 | {AAA} | {0.5} |
(1 row)
DROP TABLE IF EXISTS foo_stats;
-- start_ignore
DROP TABLE IF EXISTS p3_sales;
-- end_ignore
......@@ -364,6 +364,26 @@ analyze rootpartition p3_sales;
select relname, reltuples, relpages from pg_class where relname like 'p3_sales%' order by relname;
select * from pg_stats where tablename like 'p3_sales%' order by tablename, attname;
---
--- Test statistics collection on very large datums. In the current implementation,
--- they are left out of the sample, to avoid running out of memory for the main relation
--- statistics. In case of indexes on the relation, large datums are masked as NULLs in the sample
--- and are evaluated as NULL in index stats collection.
--- Expression / partial indexes are not commonly used, and its rare to have them on wide columns, so the
--- effect of considering them as NULL is minimal.
---
CREATE TABLE foo_stats (a text, b bytea, c varchar, d int) DISTRIBUTED RANDOMLY;
CREATE INDEX expression_idx_foo_stats ON foo_stats (upper(a));
INSERT INTO foo_stats values ('aaa', 'bbbbb', 'cccc', 2);
INSERT INTO foo_stats values ('aaa', 'bbbbb', 'cccc', 2);
--- Insert large datum values
INSERT INTO foo_stats values (repeat('a', 3000), 'bbbbb2', 'cccc2', 3);
INSERT INTO foo_stats values (repeat('a', 3000), 'bbbbb2', 'cccc2', 3);
ANALYZE foo_stats;
SELECT schemaname, tablename, attname, null_frac, avg_width, n_distinct, most_common_vals, most_common_freqs, histogram_bounds FROM pg_stats WHERE tablename='foo_stats' ORDER BY attname;
SELECT schemaname, tablename, attname, null_frac, avg_width, n_distinct, most_common_vals, most_common_freqs, histogram_bounds FROM pg_stats WHERE tablename='expression_idx_foo_stats' ORDER BY attname;
DROP TABLE IF EXISTS foo_stats;
-- start_ignore
DROP TABLE IF EXISTS p3_sales;
-- end_ignore
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册