提交 ce4586a7 编写于 作者: P Pengcheng Tang 提交者: Jimmy Yih

Backup and restore to report errors found from segments' status file

Gpcrondump and gpdbrestore redirect stderr into status file for segments,
errors should be reflected into the master's report file.
Warnings will be given if process completed but errors were generated in
the report file.

Also made some refactor of the code.

This commit also reverts 08f4ada7.

Authors: Pengcheng Tang, Nikhil Kak
上级 339ff865
......@@ -343,7 +343,7 @@ Feature: Validate command line arguments
And the database "bkdb" does not exist
And database "bkdb" exists
And the user runs gp_restore with the the stored timestamp and subdir in "bkdb"
And gp_restore should return a return code of 0
And gp_restore should return a return code of 2
And verify that partitioned tables "ao_part_table, co_part_table, heap_part_table" in "bkdb" have 6 partitions
And verify that partitioned tables "ao_part_table_comp, co_part_table_comp" in "bkdb" have 6 partitions
And verify that partitioned tables "part_external" in "bkdb" have 5 partitions in partition level "0"
......@@ -431,7 +431,7 @@ Feature: Validate command line arguments
And the database "bkdb" does not exist
And database "bkdb" exists
And the user runs gp_restore with the stored timestamp and subdir in "bkdb" and backup_dir "/tmp"
And gp_restore should return a return code of 0
And gp_restore should return a return code of 2
And verify that partitioned tables "ao_part_table, co_part_table, heap_part_table" in "bkdb" have 6 partitions
And verify that partitioned tables "ao_part_table_comp, co_part_table_comp" in "bkdb" have 6 partitions
And verify that partitioned tables "part_external" in "bkdb" have 5 partitions in partition level "0"
......
......@@ -283,7 +283,7 @@ Feature: NetBackup Integration with GPDB
And the database "nbubkdb" does not exist
And database "nbubkdb" exists
When the user runs gp_restore with the the stored timestamp and subdir in "nbubkdb" using netbackup
Then gp_restore should return a return code of 0
Then gp_restore should return a return code of 2
And verify that the data of "68" tables in "nbubkdb" is validated after restore
@nbuall
......@@ -300,7 +300,7 @@ Feature: NetBackup Integration with GPDB
And the database "nbubkdb" does not exist
And database "nbubkdb" exists
When the user runs "gp_restore -i --gp-k 20131228111527 --gp-d db_dumps --gp-i --gp-r db_dumps --gp-l=p -d nbubkdb --gp-c" using netbackup
Then gp_restore should return a return code of 0
Then gp_restore should return a return code of 2
And verify that the data of "68" tables in "nbubkdb" is validated after restore
@nbusmoke
......
......@@ -80,7 +80,7 @@ def impl(context, dbname):
@then('database "{dbname}" is created if not exists on host "{HOST}" with port "{PORT}" with user "{USER}"')
def impl(context, dbname, HOST, PORT, USER):
host = os.environ.get(HOST)
port = int(os.environ.get(PORT))
port = 0 if os.environ.get(PORT) == None else int(os.environ.get(PORT))
user = os.environ.get(USER)
create_database_if_not_exists(context, dbname, host, port, user)
......
......@@ -35,8 +35,6 @@
int optreset;
#endif
#define DUMP_PREFIX (dump_prefix==NULL?"":dump_prefix)
/*
* static helper functions. See each function body for a description.
*/
......@@ -87,7 +85,6 @@ static char *selectSchemaName = NULL; /* name of a single schema to dump */
static char *tableFileName = NULL; /* file name with tables to dump (--table-file) */
static char *schemaFileName = NULL; /* file name with tables to dump (--schema-file) */
static char *excludeTableFileName = NULL; /* file name with tables to exclude (--exclude-table-file) */
static char *dump_prefix = NULL;
/* NetBackup related variables */
static char *netbackup_service_host = NULL;
......@@ -132,6 +129,7 @@ static pthread_mutex_t MyMutex2 = PTHREAD_MUTEX_INITIALIZER;
static bool g_b_SendCancelMessage = false;
static const char *pszAgent = "gp_dump_agent";
PQExpBuffer dump_prefix_buf = NULL;
#ifdef USE_DDBOOST
#include "ddp_api.h"
......@@ -143,6 +141,7 @@ main(int argc, char **argv)
{
int rc = 0; /* return code */
int remote_version;
dump_prefix_buf = createPQExpBuffer();
InputOptions inputOpts; /* command line parameters */
SegmentDatabaseArray segDBAr; /* array of the segdbs from the master
......@@ -240,6 +239,8 @@ cleanup:
if (master_db_conn != NULL)
PQfinish(master_db_conn);
destroyPQExpBuffer(dump_prefix_buf);
exit(rc);
}
......@@ -1213,8 +1214,8 @@ fillInputOptions(int argc, char **argv, InputOptions * pInputOpts)
no_expand_children = true;
break;
case 12:
dump_prefix = pg_strdup(optarg);
pInputOpts->pszPassThroughParms = addPassThroughLongParm("prefix", DUMP_PREFIX, pInputOpts->pszPassThroughParms);
appendPQExpBuffer(dump_prefix_buf, "%s", optarg);
pInputOpts->pszPassThroughParms = addPassThroughLongParm("prefix", dump_prefix_buf->data, pInputOpts->pszPassThroughParms);
break;
case 13:
incremental_filter = pg_strdup(optarg);
......@@ -1693,7 +1694,7 @@ reportBackupResults(InputOptions inputopts, ThreadParmArray *pParmAr)
else
pszFormat = "%s%sgp_dump_%s.rpt";
pszReportPathName = MakeString(pszFormat, pszReportDirectory, DUMP_PREFIX, pParmAr->pData[0].pOptionsData->pszKey);
pszReportPathName = MakeString(pszFormat, pszReportDirectory, dump_prefix_buf == NULL ? "" : dump_prefix_buf->data, pParmAr->pData[0].pOptionsData->pszKey);
fRptFile = fopen(pszReportPathName, "w");
if (fRptFile == NULL)
......@@ -1947,11 +1948,17 @@ spinOffThreads(PGconn *pConn,
if (pParm->pTargetSegDBData->role == ROLE_MASTER && no_lock)
{
char *dumpkey = pParmAr->pData[0].pOptionsData->pszKey;
char *filedir = pg_strdup(pInputOpts->pszReportDirectory);
if (filedir == NULL)
char *filedir = NULL;
if (pInputOpts->pszReportDirectory == NULL)
{
if (getenv("MASTER_DATA_DIRECTORY") != NULL)
filedir = pg_strdup(getenv("MASTER_DATA_DIRECTORY"));
else
filedir = pg_strdup("./");
}
else
filedir = pg_strdup(pInputOpts->pszReportDirectory);
char *signalFileName = MakeString("%s/gp_lockfile_%s", filedir, &dumpkey[8]);
mpp_msg(logInfo, progname, "Signal filename is %s.\n", signalFileName);
bool success = false;
......@@ -2282,8 +2289,9 @@ threadProc(void *arg)
BFT_BACKUP_STATUS, progname, pqBuffer);
if(status != 0)
{
pParm->pszErrorMsg = pqBuffer->data;
pParm->pszErrorMsg = MakeString("%s", pqBuffer->data);
}
destroyPQExpBuffer(pqBuffer);
}
/*
......
......@@ -69,8 +69,8 @@ static const char *logError = "ERROR";
static const char *logFatal = "FATAL";
const char *progname;
static char * addPassThroughLongParm(const char *Parm, const char *pszValue, char *pszPassThroughParmString);
static char *dump_prefix = NULL;
static char *status_file_dir = NULL;
PQExpBuffer dir_buf = NULL;
PQExpBuffer dump_prefix_buf = NULL;
/* NetBackup related variable */
static char *netbackup_service_host = NULL;
......@@ -92,6 +92,9 @@ main(int argc, char **argv)
SegmentDatabase *sourceSegDB = NULL;
SegmentDatabase *targetSegDB = NULL;
dir_buf = createPQExpBuffer();
dump_prefix_buf = createPQExpBuffer();
progname = get_progname(argv[0]);
/* This struct holds the values of the command line parameters */
......@@ -239,10 +242,12 @@ main(int argc, char **argv)
failCount = reportRestoreResults(inputOpts.pszReportDirectory, &masterParm, &parmAr);
cleanup:
FreeRestorePairArray(&restorePairAr);
FreeInputOptions(&inputOpts);
if (opts != NULL)
free(opts);
FreeRestorePairArray(&restorePairAr);
freeThreadParmArray(&parmAr);
if (masterParm.pszErrorMsg)
......@@ -254,6 +259,9 @@ cleanup:
if (pConn != NULL)
PQfinish(pConn);
destroyPQExpBuffer(dir_buf);
destroyPQExpBuffer(dump_prefix_buf);
return failCount;
}
......@@ -734,25 +742,29 @@ fillInputOptions(int argc, char **argv, InputOptions * pInputOpts)
bAoStats = false;
break;
case 13:
dump_prefix = Safe_strdup(optarg);
pInputOpts->pszPassThroughParms = addPassThroughLongParm("prefix", DUMP_PREFIX, pInputOpts->pszPassThroughParms);
appendPQExpBuffer(dump_prefix_buf, "%s", optarg);
pInputOpts->pszPassThroughParms = addPassThroughLongParm("prefix", dump_prefix_buf->data, pInputOpts->pszPassThroughParms);
break;
case 14:
status_file_dir = Safe_strdup(optarg);
pInputOpts->pszPassThroughParms = addPassThroughLongParm("status", status_file_dir, pInputOpts->pszPassThroughParms);
appendPQExpBuffer(dir_buf, "%s", optarg);
pInputOpts->pszPassThroughParms = addPassThroughLongParm("status", dir_buf->data, pInputOpts->pszPassThroughParms);
break;
case 15:
netbackup_service_host = Safe_strdup(optarg);
pInputOpts->pszPassThroughParms = addPassThroughLongParm("netbackup-service-host", netbackup_service_host, pInputOpts->pszPassThroughParms);
if (netbackup_service_host != NULL)
free(netbackup_service_host);
break;
case 16:
netbackup_block_size = Safe_strdup(optarg);
pInputOpts->pszPassThroughParms = addPassThroughLongParm("netbackup-block-size", netbackup_block_size, pInputOpts->pszPassThroughParms);
if (netbackup_block_size != NULL)
free(netbackup_block_size);
break;
case 17:
change_schema = Safe_strdup(optarg);
pInputOpts->pszPassThroughParms = addPassThroughLongParm("change-schema", change_schema, pInputOpts->pszPassThroughParms);
if (change_schema!= NULL)
if (change_schema != NULL)
free(change_schema);
break;
default:
......@@ -1138,6 +1150,11 @@ threadProc(void *arg)
pszKey, sSegDB->dbid, tSegDB->dbid);
mpp_err_msg(logError, progname, pParm->pszErrorMsg);
PQfinish(pConn);
free(pszNotifyRelName);
free(pszNotifyRelNameStart);
free(pszNotifyRelNameSucceed);
free(pszNotifyRelNameFail);
free(pollInput);
return NULL;
}
......@@ -1148,6 +1165,11 @@ threadProc(void *arg)
pParm->pszErrorMsg = MakeString("connection went down for backup key %s, source dbid %d, target dbid %d\n",
pszKey, sSegDB->dbid, tSegDB->dbid);
mpp_err_msg(logFatal, progname, pParm->pszErrorMsg);
free(pszNotifyRelName);
free(pszNotifyRelNameStart);
free(pszNotifyRelNameSucceed);
free(pszNotifyRelNameFail);
free(pollInput);
PQfinish(pConn);
return NULL;
}
......@@ -1180,8 +1202,9 @@ threadProc(void *arg)
pParm->bSuccess = false;
pqBuffer = createPQExpBuffer();
/* Make call to get error message from file on server */
ReadBackendBackupFileError(pConn, status_file_dir, pszKey, BFT_RESTORE_STATUS, progname, pqBuffer);
pParm->pszErrorMsg = pqBuffer->data;
ReadBackendBackupFileError(pConn, dir_buf->data, pszKey, BFT_RESTORE_STATUS, progname, pqBuffer);
pParm->pszErrorMsg = MakeString("%s", pqBuffer->data);
destroyPQExpBuffer(pqBuffer);
mpp_err_msg(logError, progname, "restore failed for source dbid %d, target dbid %d on host %s\n",
sSegDB->dbid, tSegDB->dbid, StringNotNull(sSegDB->pszHost, "localhost"));
......@@ -1217,11 +1240,12 @@ threadProc(void *arg)
if(pParm->bSuccess || pParm->pszErrorMsg == NULL)
{
pqBuffer = createPQExpBuffer();
int status = ReadBackendBackupFileError(pConn, status_file_dir, pszKey, BFT_RESTORE_STATUS, progname, pqBuffer);
int status = ReadBackendBackupFileError(pConn, dir_buf == NULL ? "" : dir_buf->data, pszKey, BFT_RESTORE_STATUS, progname, pqBuffer);
if (status != 0)
{
pParm->pszErrorMsg = pqBuffer->data;
pParm->pszErrorMsg = MakeString("%s", pqBuffer->data);
}
destroyPQExpBuffer(pqBuffer);
}
if (pszNotifyRelName != NULL)
......@@ -1627,7 +1651,7 @@ reportRestoreResults(const char *pszReportDirectory, const ThreadParm * pMasterP
* report directory not set by user - default to
* $MASTER_DATA_DIRECTORY
*/
pszReportDirectory = Safe_strdup(getenv("MASTER_DATA_DIRECTORY"));
pszReportDirectory = getenv("MASTER_DATA_DIRECTORY");
}
else
{
......@@ -1642,7 +1666,13 @@ reportRestoreResults(const char *pszReportDirectory, const ThreadParm * pMasterP
else
pszFormat = "%s%sgp_restore_%s.rpt";
pszReportPathName = MakeString(pszFormat, pszReportDirectory, DUMP_PREFIX, pOptions->pszKey);
pszReportPathName = MakeString(pszFormat, pszReportDirectory, dump_prefix_buf == NULL ? "" : dump_prefix_buf->data, pOptions->pszKey);
if (pszReportPathName == NULL)
{
mpp_err_msg(logError, progname, "Cannot allocate memory for report file path\n");
exit(-1);
}
fRptFile = fopen(pszReportPathName, "w");
if (fRptFile == NULL)
......@@ -1795,7 +1825,7 @@ reportMasterError(InputOptions inputopts, const ThreadParm * pMasterParm, const
* report directory not set by user - default to
* $MASTER_DATA_DIRECTORY
*/
pszReportDirectory = Safe_strdup(getenv("MASTER_DATA_DIRECTORY"));
pszReportDirectory = getenv("MASTER_DATA_DIRECTORY");
}
else
{
......@@ -1812,6 +1842,12 @@ reportMasterError(InputOptions inputopts, const ThreadParm * pMasterParm, const
pszReportPathName = MakeString(pszFormat, pszReportDirectory, pOptions->pszKey);
if (pszReportPathName == NULL)
{
mpp_err_msg(logError, progname, "Cannot allocate memory for report file path\n");
exit(-1);
}
fRptFile = fopen(pszReportPathName, "w");
if (fRptFile == NULL)
mpp_err_msg(logWarn, progname, "Cannot open report file %s for writing. Will use stdout instead.\n",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册