提交 8838ac98 编写于 作者: A Ashwin Agrawal

Efficient deletion of AO/CO files.

Previous algorithm scans entire directory to find specific relfilenode
extensions to be deleted. This is not optimal for large directory sizes. This
patch introduces extra logic based on the table extension pattern which helps
to avoid directory scan.

Algorithm is coded based on assumption that for CO tables a given concurrency
level either all columns have the file or none as well as the following file
table extension pattern:

  Heap Tables: contiguous extensions, no upper bound
  AO Tables: non contiguous extensions [.0 - .127]
  CO Tables: non contiguous extensions
         [  .0 - .127] for first column
         [.128 - .255] for second column
         [.256 - .283] for third column
         etc

  AO file format can be treated as a special case of CO tables with 1 column.

High level logic:
 1) Finds for which concurrency levels the table has files. This is
    calculated based off the first column. It performs 127
    (MAX_AOREL_CONCURRENCY) unlink().
 2) Iterates over the single column and deletes all concurrency level files.
    For AO tables this will exit fast.

This algorithm can be used for heap tables as well, however to prevent merge
conflicts it currently is only used for CO/AO tables.
Co-authored-by: NDavid Kimura <dkimura@pivotal.io>
上级 85fee736
......@@ -30,8 +30,8 @@
#include <sys/file.h>
#include "utils/guc.h"
#include "access/appendonlytid.h"
#include "cdb/cdbappendonlystorage.h"
#include "access/appendonlytid.h"
#include "access/appendonlywriter.h"
#include "cdb/cdbappendonlyxlog.h"
......@@ -197,6 +197,118 @@ TruncateAOSegmentFile(File fd, Relation rel, int32 segFileNum, int64 offset)
xlog_ao_truncate(rel->rd_node, segFileNum, offset);
}
/*
* Delete All segment file extensions, in case it was an AO or AOCS
* table. Ideally the logic works even for heap tables, but is only used
* currently for AO and AOCS tables to avoid merge conflicts.
*
* There are different rules for the naming of the files, depending on
* the type of table:
*
* Heap Tables: contiguous extensions, no upper bound
* AO Tables: non contiguous extensions [.1 - .127]
* CO Tables: non contiguous extensions
* [ .1 - .127] for first column
* [.129 - .255] for second column
* [.257 - .283] for third column
* etc
*
* Algorithm is coded with the assumption for CO tables that for a given
* concurrency level either all columns have the file or none.
*
* 1) Finds for which concurrency levels the table has files. This is
* calculated based off the first column. It performs 127
* (MAX_AOREL_CONCURRENCY) unlink().
* 2) Iterates over the single column and deletes all concurrency level files.
* For AO tables this will exit fast.
*/
void
mdunlink_ao(const char *path)
{
int path_size = strlen(path);
char *segpath = (char *) palloc(path_size + 12);
int segNumberArray[AOTupleId_MaxSegmentFileNum];
int segNumberArraySize;
char *segpath_suffix_position = segpath + path_size;
strncpy(segpath, path, path_size);
/*
* The 0 based extensions such as .128, .256, ... for CO tables are
* created by ALTER table or utility mode insert. These also need to be
* deleted; however, they may not exist hence are treated separately
* here. Column 0 concurrency level 0 file is always
* present. MaxHeapAttributeNumber is used as a sanity check; we expect
* the loop to terminate based on unlink return value.
*/
for(int colnum = 1; colnum <= MaxHeapAttributeNumber; colnum++)
{
sprintf(segpath_suffix_position, ".%u", colnum*AOTupleId_MultiplierSegmentFileNum);
if (unlink(segpath) != 0)
{
/* ENOENT is expected after the end of the extensions */
if (errno != ENOENT)
ereport(WARNING,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m", segpath)));
else
break;
}
}
segNumberArraySize = 0;
/* Collect all the segmentNumbers in [1..127]. */
for (int concurrency_index = 1; concurrency_index < MAX_AOREL_CONCURRENCY;
concurrency_index++)
{
sprintf(segpath_suffix_position, ".%u", concurrency_index);
if (unlink(segpath) != 0)
{
if (errno != ENOENT)
ereport(WARNING,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m", segpath)));
continue;
}
segNumberArray[segNumberArraySize] = concurrency_index;
segNumberArraySize++;
}
if (segNumberArraySize == 0)
{
pfree(segpath);
return;
}
for (int colnum = 1; colnum <= MaxHeapAttributeNumber; colnum++)
{
bool finished = false;
for (int i = 0; i < segNumberArraySize; i++)
{
bool finished = false;
sprintf(segpath_suffix_position, ".%u",
colnum*AOTupleId_MultiplierSegmentFileNum + segNumberArray[i]);
if (unlink(segpath) != 0)
{
/* ENOENT is expected after the end of the extensions */
if (errno != ENOENT)
ereport(WARNING,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m", segpath)));
else
{
finished = true;
break;
}
}
}
if (finished)
break;
}
pfree(segpath);
}
static void
copy_file(char *srcsegpath, char* dstsegpath,
RelFileNode dst, int segfilenum, bool use_wal)
......
......@@ -3,10 +3,67 @@
#include <setjmp.h>
#include "cmockery.h"
#include "../aomd.c"
#include "postgres.h"
#include "access/appendonlywriter.h"
#include "catalog/pg_tablespace.h"
#define PATH_TO_DATA_FILE "/tmp/md_test/1234"
/*
* file_present is a 1-based array used to determine return values for
* access/unlink.
*/
static bool file_present[MAX_AOREL_CONCURRENCY * MaxHeapAttributeNumber + 2];
static int num_unlink_called = 0;
static bool unlink_passing = true;
static void
setup_test_structures()
{
num_unlink_called = 0;
memset(file_present, false, sizeof(file_present));
unlink_passing = true;
}
/*
*******************************************************************************
* Mocking access and unlink for unittesting
*******************************************************************************
*/
#undef unlink
#define unlink mock_unlink
int mock_unlink(const char * path)
{
int ec = 0;
u_int segfile = 0; /* parse the path */
char *tmp_path = path + strlen(PATH_TO_DATA_FILE) + 1;
if (strcmp(tmp_path, "") != 0)
{
segfile = atoi(tmp_path);
}
if (!file_present[segfile])
{
ec = -1;
errno = ENOENT;
}
else
{
num_unlink_called++;
}
#if 0
elog(WARNING, "UNLINK %s %d num_times_called=%d unlink_passing %d\n",
path, segfile, num_unlink_called, unlink_passing);
#endif
return ec;
}
/*
*******************************************************************************
*/
#include "../aomd.c"
void
test__AOSegmentFilePathNameLen(void **state)
{
......@@ -96,6 +153,104 @@ test__MakeAOSegmentFileName(void **state)
assert_int_equal(fileSegNo, 256);
}
void
test_mdunlink_co_no_file_exists(void **state)
{
setup_test_structures();
mdunlink_ao(PATH_TO_DATA_FILE);
// called 1 time checking column
assert_true(num_unlink_called == 0);
return;
}
/* concurrency = 1 max_column = 4 */
void
test_mdunlink_co_4_columns_1_concurrency(void **state)
{
setup_test_structures();
/* concurrency 1 exists */
file_present[1] = true;
/* max column exists */
file_present[(1*AOTupleId_MultiplierSegmentFileNum) + 1] = true;
file_present[(2*AOTupleId_MultiplierSegmentFileNum) + 1] = true;
file_present[(3*AOTupleId_MultiplierSegmentFileNum) + 1] = true;
mdunlink_ao(PATH_TO_DATA_FILE);
assert_true(num_unlink_called == 4);
assert_true(unlink_passing);
return;
}
/* concurrency = 1,5 max_column = 3 */
void
test_mdunlink_co_3_columns_2_concurrency(void **state)
{
setup_test_structures();
/* concurrency 1,5 exists */
file_present[1] = true;
file_present[5] = true;
/* Concurrency 1 files */
file_present[(1*AOTupleId_MultiplierSegmentFileNum) + 1] = true;
file_present[(2*AOTupleId_MultiplierSegmentFileNum) + 1] = true;
/* Concurrency 5 files */
file_present[(1*AOTupleId_MultiplierSegmentFileNum) + 5] = true;
file_present[(2*AOTupleId_MultiplierSegmentFileNum) + 5] = true;
mdunlink_ao(PATH_TO_DATA_FILE);
assert_true(num_unlink_called == 6);
assert_true(unlink_passing);
return;
}
void
test_mdunlink_co_all_columns_full_concurrency(void **state)
{
setup_test_structures();
memset(file_present, true, sizeof(file_present));
file_present[MAX_AOREL_CONCURRENCY * MaxHeapAttributeNumber + 1] = false;
mdunlink_ao(PATH_TO_DATA_FILE);
assert_true(num_unlink_called == MaxHeapAttributeNumber * MAX_AOREL_CONCURRENCY);
assert_true(unlink_passing);
return;
}
void
test_mdunlink_co_one_columns_one_concurrency(void **state)
{
setup_test_structures();
file_present[1] = true;
mdunlink_ao(PATH_TO_DATA_FILE);
assert_true(num_unlink_called == 1);
assert_true(unlink_passing);
return;
}
void
test_mdunlink_co_one_columns_full_concurrency(void **state)
{
setup_test_structures();
for (int filenum=1; filenum < MAX_AOREL_CONCURRENCY; filenum++)
file_present[filenum] = true;
mdunlink_ao(PATH_TO_DATA_FILE);
assert_true(num_unlink_called == 127);
assert_true(unlink_passing);
return;
}
int
main(int argc, char *argv[])
......@@ -105,7 +260,13 @@ main(int argc, char *argv[])
const UnitTest tests[] = {
unit_test(test__AOSegmentFilePathNameLen),
unit_test(test__FormatAOSegmentFileName),
unit_test(test__MakeAOSegmentFileName)
unit_test(test__MakeAOSegmentFileName),
unit_test(test_mdunlink_co_one_columns_full_concurrency),
unit_test(test_mdunlink_co_one_columns_one_concurrency),
unit_test(test_mdunlink_co_all_columns_full_concurrency),
unit_test(test_mdunlink_co_3_columns_2_concurrency),
unit_test(test_mdunlink_co_4_columns_1_concurrency),
unit_test(test_mdunlink_co_no_file_exists)
};
MemoryContextInit();
......
......@@ -20,6 +20,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include "access/aomd.h"
#include "catalog/catalog.h"
#include "miscadmin.h"
#include "portability/instr_time.h"
......@@ -348,7 +349,6 @@ mdcreate_ao(RelFileNodeBackend rnode, int32 segmentFileNum, bool isRedo)
pfree(path);
}
/*
* mdunlink() -- Unlink a relation.
*
......@@ -452,6 +452,14 @@ mdunlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo, char relstor
*/
if (ret >= 0)
{
if (relstorage_is_ao(relstorage))
{
Assert(forkNum == MAIN_FORKNUM);
mdunlink_ao(path);
pfree(path);
return;
}
char *segpath = (char *) palloc(strlen(path) + 12);
BlockNumber segno;
......@@ -468,97 +476,10 @@ mdunlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo, char relstor
if (errno != ENOENT)
ereport(WARNING,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m", segpath)));
errmsg("could not remove file \"%s\": %m", segpath)));
break;
}
}
/*
* Delete All segment file extensions, in case it was an AO or AOCS
* table.
*
* WALREP_FIXME: This currently works by scanning the directory, looking
* for the pattern "<relfilenode>.<segno>". That is slow. We used to do
* do this before, and had to switch over to the information from the
* persistent tables for performance reasons somewhere around GPDB 3.X
* or 4.X. Persistent tables are no more, so we had to go back to
* scanning the directory, but we know that's going to be unacceptably
* slow if there are a lot of files in the directory.
*
* There are different rules for the naming of the files, depending on
* the type of table:
*
* Heap Tables: contiguous extensions, no upper bound
* AO Tables: non contiguous extensions [.1 - .127]
* CO Tables: non contiguous extensions
* [ .1 - .127] for first column
* [.128 - .255] for second column
* [.256 - .283] for third column
* etc
*
* However, we don't try to be smart here, we just always scan the
* directory. We don't know what kind of a table it was down here.
*
* NOTE: If you find a smarter way to do this than by scanning the dir,
* consider changing copy_append_only_data(), in tablecmds.c, to also
* use the smarter way.
*/
if (forkNum == MAIN_FORKNUM)
{
DIR *dir;
struct dirent *de;
char *dirpart;
char *filepart;
char *filedot;
/*
* The base path is like "<path>/<rnode>". Split it into
* path and filename parts.
*/
reldir_and_filename(rnode.node, InvalidBackendId, forkNum, &dirpart, &filepart);
filedot = psprintf("%s.", filepart);
/* Scan the directory */
dir = AllocateDir(dirpart);
while ((de = ReadDir(dir, dirpart)) != NULL)
{
char *suffix;
if (strcmp(de->d_name, ".") == 0 ||
strcmp(de->d_name, "..") == 0)
continue;
/* Does it begin with the relfilenode? */
if (strlen(de->d_name) <= strlen(filedot) ||
strncmp(de->d_name, filedot, strlen(filedot)) != 0)
continue;
/*
* Does it have a digits-only suffix? (This is not really
* necessary to check, but better be conservative when deleting
* files.)
*/
suffix = de->d_name + strlen(filedot);
if (strspn(suffix, "0123456789") != strlen(suffix) ||
strlen(suffix) > 10)
continue;
/* Looks like a match. Go ahead and delete it. */
sprintf(segpath, "%s.%s", path, suffix);
if (unlink(segpath) < 0)
{
ereport(WARNING,
(errcode_for_file_access(),
errmsg("could not remove segment %s of relation %s: %m",
suffix, path)));
}
}
FreeDir(dir);
pfree(filedot);
pfree(filepart);
pfree(dirpart);
}
pfree(segpath);
}
......
......@@ -312,6 +312,10 @@ smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isRedo, char relstorage
/*
* Get rid of any remaining buffers for the relation. bufmgr will just
* drop them without bothering to write the contents.
*
* Apart from relstorage == RELSTORAGE_HEAP do any other RELSTOARGE type
* expected to have buffers in shared memory ? Can check only for
* RELSTORAGE_HEAP below.
*/
if ((relstorage != RELSTORAGE_AOROWS) &&
(relstorage != RELSTORAGE_AOCOLS))
......
......@@ -47,6 +47,8 @@ TruncateAOSegmentFile(File fd,
int32 segmentFileNum,
int64 offset);
extern void
mdunlink_ao(const char *path);
extern void
copy_append_only_data(RelFileNode src, RelFileNode dst, BackendId backendid, char relpersistence);
#endif /* AOMD_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册