提交 f8a80aeb 编写于 作者: H Heikki Linnakangas

Fix and clean up db/relation/tablespace size functions.

This fixes several small bugs:

- Schema-qualify the functions in all queries.

- Quote database and tablespace names correctly in the dispatched
  queries.

- In the variants that take OID, also dispatch the OID rather than the
  resolved name. This avoids having to deal with quoting schema and table
  names in the query, and seems like the right thing to do anyway.

- Dispatch pg_table_size() pg_indexes_size() variants. These were added
  in PostgreSQL 9.0, but we missed modifying them in the merge, the same
  way that we have modified all the other variants.

Also, refactor the internal function used to dispatch the pg_*_size()
calls to use CdbDispatchCommand directly, instead of using SPI and the
gp_dist_random('gp_id') trick. Seems more straightforward, although I
believe that trick should've worked, too.

Add tests. We didn't have any bespoken tests for these functions, although
we used some of the variants in other tests.
Reviewed-by: NDaniel Gustafsson <dgustafsson@pivotal.io>
上级 4b31e46f
......@@ -15,8 +15,6 @@
#include <sys/stat.h>
#include <glob.h>
#include "lib/stringinfo.h"
#include "access/heapam.h"
#include "access/appendonlywriter.h"
#include "access/aocssegfiles.h"
......@@ -40,82 +38,65 @@
#include "utils/relmapper.h"
#include "utils/syscache.h"
#include "libpq-fe.h"
#include "cdb/cdbdisp_query.h"
#include "cdb/cdbdispatchresult.h"
#include "cdb/cdbvars.h"
static int64 calculate_total_relation_size(Relation rel);
/*
* Helper function to dispatch a size-returning command.
*
* Dispatches the given SQL query to segments, and sums up the results.
* The query is expected to return one int8 value.
*/
static int64
get_size_from_segDBs(const char * cmd)
get_size_from_segDBs(const char *cmd)
{
int spiresult;
bool succeeded = false;
int64 result = 0;
volatile bool connected = false;
int64 result;
CdbPgResults cdb_pgresults = {NULL, 0};
int i;
Assert(Gp_role == GP_ROLE_DISPATCH);
PG_TRY();
CdbDispatchCommand(cmd, DF_WITH_SNAPSHOT, &cdb_pgresults);
result = 0;
for (i = 0; i < cdb_pgresults.numResults; i++)
{
HeapTuple tup;
TupleDesc tupdesc;
bool isnull;
Datum size;
Datum value;
struct pg_result *pgresult = cdb_pgresults.pg_results[i];
do
if (PQresultStatus(pgresult) != PGRES_TUPLES_OK)
{
/* Establish an SPI session as a client of myself. */
if (SPI_connect() != SPI_OK_CONNECT)
break;
connected = true;
/* Do the query. */
spiresult = SPI_execute(cmd, false, 0);
/* Did the query succeed? */
if (spiresult != SPI_OK_SELECT)
break;
if (SPI_processed < 1)
break;
tup = SPI_tuptable->vals[0];
tupdesc = SPI_tuptable->tupdesc;
size = heap_getattr(SPI_tuptable->vals[0], 1, SPI_tuptable->tupdesc, &isnull);
if (isnull)
break;
result = DatumGetInt64(size);
succeeded = true;
cdbdisp_clearCdbPgResults(&cdb_pgresults);
ereport(ERROR,
(errmsg("unexpected result from segment: %d",
PQresultStatus(pgresult))));
}
while (0);
/* End recursive session. */
connected = false;
SPI_finish();
if (!succeeded)
elog(ERROR, "Unable to get sizes from segments");
if (PQntuples(pgresult) != 1 || PQnfields(pgresult) != 1)
{
cdbdisp_clearCdbPgResults(&cdb_pgresults);
ereport(ERROR,
(errmsg("unexpected shape of result from segment (%d rows, %d cols)",
PQntuples(pgresult), PQnfields(pgresult))));
}
if (PQgetisnull(pgresult, 0, 0))
value = 0;
else
value = DirectFunctionCall1(int8in,
CStringGetDatum(PQgetvalue(pgresult, 0, 0)));
result += value;
}
/* Clean up in case of error. */
PG_CATCH();
{
/* End recursive session. */
if (connected)
SPI_finish();
/* Carry on with error handling. */
PG_RE_THROW();
}
PG_END_TRY();
cdbdisp_clearCdbPgResults(&cdb_pgresults);
return result;
}
/* Return physical size of directory contents, or 0 if dir doesn't exist */
int64
static int64
db_dir_size(const char *path)
{
int64 dirsize = 0;
......@@ -211,20 +192,18 @@ calculate_database_size(Oid dbOid)
Datum
pg_database_size_oid(PG_FUNCTION_ARGS)
{
int64 size;
Oid dbOid = PG_GETARG_OID(0);
int64 size;
size = calculate_database_size(dbOid);
if (Gp_role == GP_ROLE_DISPATCH)
{
StringInfoData buffer;
initStringInfo(&buffer);
char *sql;
appendStringInfo(&buffer, "select sum(pg_database_size(%u))::int8 from gp_dist_random('gp_id');", dbOid);
sql = psprintf("select pg_catalog.pg_database_size(%u)", dbOid);
size += get_size_from_segDBs(buffer.data);
size += get_size_from_segDBs(sql);
}
if (size == 0)
......@@ -236,21 +215,20 @@ pg_database_size_oid(PG_FUNCTION_ARGS)
Datum
pg_database_size_name(PG_FUNCTION_ARGS)
{
int64 size;
Name dbName = PG_GETARG_NAME(0);
Oid dbOid = get_database_oid(NameStr(*dbName), false);
int64 size;
size = calculate_database_size(dbOid);
if (Gp_role == GP_ROLE_DISPATCH)
{
StringInfoData buffer;
initStringInfo(&buffer);
char *sql;
appendStringInfo(&buffer, "select sum(pg_database_size('%s'))::int8 from gp_dist_random('gp_id');", NameStr(*dbName));
sql = psprintf("select pg_catalog.pg_database_size(%s)",
quote_literal_cstr(NameStr(*dbName)));
size += get_size_from_segDBs(buffer.data);
size += get_size_from_segDBs(sql);
}
if (size == 0)
......@@ -340,16 +318,14 @@ pg_tablespace_size_oid(PG_FUNCTION_ARGS)
int64 size;
size = calculate_tablespace_size(tblspcOid);
if (Gp_role == GP_ROLE_DISPATCH)
{
StringInfoData buffer;
initStringInfo(&buffer);
char *sql;
appendStringInfo(&buffer, "select sum(pg_tablespace_size(%u))::int8 from gp_dist_random('gp_id');", tblspcOid);
sql = psprintf("select pg_catalog.pg_tablespace_size(%u)", tblspcOid);
size += get_size_from_segDBs(buffer.data);
size += get_size_from_segDBs(sql);
}
if (size < 0)
......@@ -366,16 +342,15 @@ pg_tablespace_size_name(PG_FUNCTION_ARGS)
int64 size;
size = calculate_tablespace_size(tblspcOid);
if (Gp_role == GP_ROLE_DISPATCH)
{
StringInfoData buffer;
initStringInfo(&buffer);
char *sql;
appendStringInfo(&buffer, "select sum(pg_tablespace_size('%s'))::int8 from gp_dist_random('gp_id');", NameStr(*tblspcName));
sql = psprintf("select pg_catalog.pg_tablespace_size(%s)",
quote_literal_cstr(NameStr(*tblspcName)));
size += get_size_from_segDBs(buffer.data);
size += get_size_from_segDBs(sql);
}
if (size < 0)
......@@ -384,13 +359,14 @@ pg_tablespace_size_name(PG_FUNCTION_ARGS)
PG_RETURN_INT64(size);
}
/*
* calculate size of (one fork of) a relation
*
* Iterator over all files belong to the relation and do stat.
* The obviously better way is to use glob. For whatever reason,
* glob is extremely slow if there are lots of relations in the
* database. So we handle all cases, instead.
* database. So we handle all cases, instead.
*/
static int64
calculate_relation_size(Relation rel, ForkNumber forknum)
......@@ -447,8 +423,8 @@ else if (forknum == MAIN_FORKNUM)
}
}
/* RELSTORAGE_VIRTUAL has no space usage */
return totalsize;
/* RELSTORAGE_VIRTUAL has no space usage */
return totalsize;
}
Datum
......@@ -495,22 +471,11 @@ pg_relation_size(PG_FUNCTION_ARGS)
if (Gp_role == GP_ROLE_DISPATCH)
{
StringInfoData buffer;
char *schemaName;
char *relName;
schemaName = get_namespace_name(get_rel_namespace(relOid));
if (schemaName == NULL)
elog(ERROR, "Cannot find schema for oid %d", relOid);
relName = get_rel_name(relOid);
if (relName == NULL)
elog(ERROR, "Cannot find relation for oid %d", relOid);
char *sql;
initStringInfo(&buffer);
sql = psprintf("select pg_catalog.pg_relation_size(%u)", relOid);
appendStringInfo(&buffer, "select sum(pg_relation_size('%s.%s'))::int8 from gp_dist_random('gp_id');", quote_identifier(schemaName), quote_identifier(relName));
size += get_size_from_segDBs(buffer.data);
size += get_size_from_segDBs(sql);
}
relation_close(rel, AccessShareLock);
......@@ -662,6 +627,15 @@ pg_table_size(PG_FUNCTION_ARGS)
size = calculate_table_size(rel);
if (Gp_role == GP_ROLE_DISPATCH)
{
char *sql;
sql = psprintf("select pg_catalog.pg_table_size(%u)", relOid);
size += get_size_from_segDBs(sql);
}
relation_close(rel, AccessShareLock);
PG_RETURN_INT64(size);
......@@ -681,6 +655,15 @@ pg_indexes_size(PG_FUNCTION_ARGS)
size = calculate_indexes_size(rel);
if (Gp_role == GP_ROLE_DISPATCH)
{
char *sql;
sql = psprintf("select pg_catalog.pg_indexes_size(%u)", relOid);
size += get_size_from_segDBs(sql);
}
relation_close(rel, AccessShareLock);
PG_RETURN_INT64(size);
......@@ -730,31 +713,15 @@ pg_total_relation_size(PG_FUNCTION_ARGS)
PG_RETURN_NULL();
size = calculate_total_relation_size(rel);
if (Gp_role == GP_ROLE_DISPATCH)
{
StringInfoData buffer;
char *schemaName;
char *relName;
schemaName = get_namespace_name(get_rel_namespace(relOid));
if (schemaName == NULL)
{
elog(ERROR, "Cannot find schema for oid %d", relOid);
}
relName = get_rel_name(relOid);
if (relName == NULL)
{
elog(ERROR, "Cannot find relation for oid %d", relOid);
}
initStringInfo(&buffer);
char *sql;
appendStringInfo(&buffer, "select pg_catalog.sum(pg_catalog.pg_total_relation_size('%s.%s'))::int8 from gp_dist_random('gp_id');",
quote_identifier(schemaName), quote_identifier(relName));
sql = psprintf("select pg_catalog.pg_total_relation_size(%u)",
relOid);
size += get_size_from_segDBs(buffer.data);
size += get_size_from_segDBs(sql);
}
relation_close(rel, AccessShareLock);
......
......@@ -504,8 +504,6 @@ extern void process_local_preload_libraries(void);
extern void pg_bindtextdomain(const char *domain);
extern bool is_authenticated_user_replication_role(void);
extern int64 db_dir_size(const char *path); /* implemented in dbsize.c */
/*
* Auxiliary-process type identifiers. These used to be in bootstrap.h
* but it seems saner to have them here, with the ProcessingMode stuff.
......
--
-- Tests on the pg_database_size(), pg_tablespace_size(), pg_relation_size(), etc.
-- functions.
--
-- These functions exist in PostgreSQL, but they have been modified in GPDB,
-- to collect the totals across segments, and to support AO / AOCS tables.
-- Hence, we better have extra tests for those things.
--
-- The total depends on the number of segments, and will also change whenever
-- the built-in objects change, so be lenient.
-- As of this writing, the total size of template0 database, across three segments,
-- is 67307536 bytes.
select pg_database_size('template0'::name) between 40000000 and 200000000;
?column?
----------
t
(1 row)
select pg_database_size(12510::oid) = pg_database_size('template0'::name);
?column?
----------
t
(1 row)
-- 19713632 bytes, as of this writing
select pg_tablespace_size('pg_global'::name) between 10000000 and 50000000;
?column?
----------
t
(1 row)
select pg_tablespace_size(1664::oid) between 10000000 and 50000000;
?column?
----------
t
(1 row)
select pg_tablespace_size('pg_global'::name) = pg_tablespace_size(1664::oid);
?column?
----------
t
(1 row)
-- Non-existent name/OID. These should return NULL or throw an error,
-- depending on the variant.
select pg_database_size('nonexistent');
ERROR: database "nonexistent" does not exist
select pg_database_size(9999);
pg_database_size
------------------
(1 row)
select pg_tablespace_size('nonexistent');
ERROR: tablespace "nonexistent" does not exist
select pg_tablespace_size(9999);
pg_tablespace_size
--------------------
(1 row)
select pg_relation_size(9999);
pg_relation_size
------------------
(1 row)
select pg_table_size(9999);
pg_table_size
---------------
(1 row)
select pg_indexes_size(9999);
pg_indexes_size
-----------------
(1 row)
select pg_total_relation_size(9999);
pg_total_relation_size
------------------------
(1 row)
-- Test on relations that have no storage (pg_tables is a view)
select pg_relation_size('pg_tables');
pg_relation_size
------------------
0
(1 row)
select pg_table_size('pg_tables');
pg_table_size
---------------
0
(1 row)
select pg_indexes_size('pg_tables');
pg_indexes_size
-----------------
0
(1 row)
select pg_total_relation_size('pg_tables');
pg_total_relation_size
------------------------
0
(1 row)
--
-- Tests on the table and index size variants.
--
CREATE TABLE heapsizetest (a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- First test with an empty table and no indexes. Should be all zeros.
select pg_relation_size('heapsizetest');
pg_relation_size
------------------
0
(1 row)
select pg_table_size('heapsizetest');
pg_table_size
---------------
0
(1 row)
select pg_indexes_size('heapsizetest');
pg_indexes_size
-----------------
0
(1 row)
select pg_total_relation_size('heapsizetest');
pg_total_relation_size
------------------------
0
(1 row)
-- Now test with a non-empty table (still no indexes, though).
insert into heapsizetest select generate_series(1, 100000);
vacuum heapsizetest;
-- Check that the values are in an expected ranges.
select pg_relation_size('heapsizetest') between 3000000 and 5000000; -- 3637248
?column?
----------
t
(1 row)
select pg_table_size('heapsizetest') between 3000000 and 5000000; -- 4030464
?column?
----------
t
(1 row)
select pg_table_size('heapsizetest') > pg_relation_size('heapsizetest');
?column?
----------
t
(1 row)
select pg_indexes_size('heapsizetest');
pg_indexes_size
-----------------
0
(1 row)
select pg_total_relation_size('heapsizetest') between 3000000 and 5000000; -- 4030464
?column?
----------
t
(1 row)
select pg_total_relation_size('heapsizetest') = pg_table_size('heapsizetest');
?column?
----------
t
(1 row)
-- Now also indexes
create index on heapsizetest(a);
select pg_relation_size('heapsizetest') between 3000000 and 5000000; -- 3637248
?column?
----------
t
(1 row)
select pg_table_size('heapsizetest') between 3000000 and 5000000; -- 4030464
?column?
----------
t
(1 row)
select pg_indexes_size('heapsizetest') between 2000000 and 3000000; -- 2490368
?column?
----------
t
(1 row)
select pg_total_relation_size('heapsizetest') between 6000000 and 7000000; -- 6520832
?column?
----------
t
(1 row)
select pg_total_relation_size('heapsizetest') = pg_table_size('heapsizetest') + pg_indexes_size('heapsizetest');
?column?
----------
t
(1 row)
-- Test on AO and AOCS tables
CREATE TABLE aosizetest (a int) WITH (appendonly=true, orientation=row);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into aosizetest select generate_series(1, 100000);
select pg_relation_size('aosizetest') between 750000 and 1500000; --1001648
?column?
----------
t
(1 row)
select pg_table_size('aosizetest') between 1000000 and 1500000; -- 1263792
?column?
----------
t
(1 row)
select pg_table_size('aosizetest') > pg_relation_size('aosizetest');
?column?
----------
t
(1 row)
select pg_total_relation_size('aosizetest') between 1000000 and 1500000; -- 1263792
?column?
----------
t
(1 row)
select pg_total_relation_size('aosizetest') = pg_table_size('aosizetest');
?column?
----------
t
(1 row)
CREATE TABLE aocssizetest (a int, col1 int, col2 text) WITH (appendonly=true, orientation=column);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into aocssizetest select g, g, 'x' || g from generate_series(1, 100000) g;
select pg_relation_size('aocssizetest') between 1000000 and 2000000; -- 1491240
?column?
----------
t
(1 row)
select pg_table_size('aocssizetest') between 1500000 and 3000000; -- 1884456
?column?
----------
t
(1 row)
select pg_table_size('aocssizetest') > pg_relation_size('aocssizetest');
?column?
----------
t
(1 row)
select pg_total_relation_size('aocssizetest') between 1500000 and 3000000; -- 1884456
?column?
----------
t
(1 row)
select pg_total_relation_size('aocssizetest') = pg_table_size('aocssizetest');
?column?
----------
t
(1 row)
--
-- Tests on the pg_database_size(), pg_tablespace_size(), pg_relation_size(), etc.
-- functions.
--
-- These functions exist in PostgreSQL, but they have been modified in GPDB,
-- to collect the totals across segments, and to support AO / AOCS tables.
-- Hence, we better have extra tests for those things.
--
-- The total depends on the number of segments, and will also change whenever
-- the built-in objects change, so be lenient.
-- As of this writing, the total size of template0 database, across three segments,
-- is 67307536 bytes.
select pg_database_size('template0'::name) between 40000000 and 200000000;
select pg_database_size(12510::oid) = pg_database_size('template0'::name);
-- 19713632 bytes, as of this writing
select pg_tablespace_size('pg_global'::name) between 10000000 and 50000000;
select pg_tablespace_size(1664::oid) between 10000000 and 50000000;
select pg_tablespace_size('pg_global'::name) = pg_tablespace_size(1664::oid);
-- Non-existent name/OID. These should return NULL or throw an error,
-- depending on the variant.
select pg_database_size('nonexistent');
select pg_database_size(9999);
select pg_tablespace_size('nonexistent');
select pg_tablespace_size(9999);
select pg_relation_size(9999);
select pg_table_size(9999);
select pg_indexes_size(9999);
select pg_total_relation_size(9999);
-- Test on relations that have no storage (pg_tables is a view)
select pg_relation_size('pg_tables');
select pg_table_size('pg_tables');
select pg_indexes_size('pg_tables');
select pg_total_relation_size('pg_tables');
--
-- Tests on the table and index size variants.
--
CREATE TABLE heapsizetest (a int);
-- First test with an empty table and no indexes. Should be all zeros.
select pg_relation_size('heapsizetest');
select pg_table_size('heapsizetest');
select pg_indexes_size('heapsizetest');
select pg_total_relation_size('heapsizetest');
-- Now test with a non-empty table (still no indexes, though).
insert into heapsizetest select generate_series(1, 100000);
vacuum heapsizetest;
-- Check that the values are in an expected ranges.
select pg_relation_size('heapsizetest') between 3000000 and 5000000; -- 3637248
select pg_table_size('heapsizetest') between 3000000 and 5000000; -- 4030464
select pg_table_size('heapsizetest') > pg_relation_size('heapsizetest');
select pg_indexes_size('heapsizetest');
select pg_total_relation_size('heapsizetest') between 3000000 and 5000000; -- 4030464
select pg_total_relation_size('heapsizetest') = pg_table_size('heapsizetest');
-- Now also indexes
create index on heapsizetest(a);
select pg_relation_size('heapsizetest') between 3000000 and 5000000; -- 3637248
select pg_table_size('heapsizetest') between 3000000 and 5000000; -- 4030464
select pg_indexes_size('heapsizetest') between 2000000 and 3000000; -- 2490368
select pg_total_relation_size('heapsizetest') between 6000000 and 7000000; -- 6520832
select pg_total_relation_size('heapsizetest') = pg_table_size('heapsizetest') + pg_indexes_size('heapsizetest');
-- Test on AO and AOCS tables
CREATE TABLE aosizetest (a int) WITH (appendonly=true, orientation=row);
insert into aosizetest select generate_series(1, 100000);
select pg_relation_size('aosizetest') between 750000 and 1500000; --1001648
select pg_table_size('aosizetest') between 1000000 and 1500000; -- 1263792
select pg_table_size('aosizetest') > pg_relation_size('aosizetest');
select pg_total_relation_size('aosizetest') between 1000000 and 1500000; -- 1263792
select pg_total_relation_size('aosizetest') = pg_table_size('aosizetest');
CREATE TABLE aocssizetest (a int, col1 int, col2 text) WITH (appendonly=true, orientation=column);
insert into aocssizetest select g, g, 'x' || g from generate_series(1, 100000) g;
select pg_relation_size('aocssizetest') between 1000000 and 2000000; -- 1491240
select pg_table_size('aocssizetest') between 1500000 and 3000000; -- 1884456
select pg_table_size('aocssizetest') > pg_relation_size('aocssizetest');
select pg_total_relation_size('aocssizetest') between 1500000 and 3000000; -- 1884456
select pg_total_relation_size('aocssizetest') = pg_table_size('aocssizetest');
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册