提交 c8a03d20 编写于 作者: H Hao Wang 提交者: Wang Hao

gpperfmon: fix long query text cannot load into queries_history

1. When doing harvesting, raise the gp_max_csv_line_length to
maximum legal value in session level.
2. For query longer than gp_max_csv_line_length, this workaround
replaces line breaks in query text with space to prevent load
failure. It may lead long query statement changed when load to
history table, but it is still better than fail to load or truncate
the query text.

Co-authored-by: Teng Zhang tezhang@pivotal.io
Co-authored-by: Hao Wang haowang@pivotal.io
上级 49a2b32c
......@@ -1391,6 +1391,7 @@ static int get_and_print_next_query_file_kvp(FILE* outfd, FILE* queryfd, char* q
char *p = NULL;
int field_len = 0;
int retCode = 0;
bool replace_line_breaks = false;
p = fgets(line, line_size, queryfd);
line[line_size-1] = 0; // in case libc is buggy
......@@ -1421,6 +1422,10 @@ static int get_and_print_next_query_file_kvp(FILE* outfd, FILE* queryfd, char* q
return APR_NOTFOUND;
}
if (field_len >= MAX_GP_MAX_CSV_LINE_LENGTH - HARVEST_CSV_SAFEGUARD) {
replace_line_breaks = true;
}
fprintf(outfd, "\"");
(*bytes_written)++;
......@@ -1437,6 +1442,19 @@ static int get_and_print_next_query_file_kvp(FILE* outfd, FILE* queryfd, char* q
(*bytes_written)++;
}
/**
* MPP-29418 COPY/External table have a limit for CSV length
* If it loads a row with line breaks and row length exceeds
* gp_max_csv_line_length, it will treat this CSV as bad format
* and fail the query.
* This workaround checks for line length and replace line breaks
* with space to prevent load failure.
* It may lead the long query statement slightly changed, but it
* is still better than fail to load or truncate the query text.
*/
if (replace_line_breaks && (*p == '\n' || *p == '\r'))
*p = ' ';
fputc(*p, outfd);
(*bytes_written)++;
......
......@@ -58,11 +58,8 @@ static const bool gpdb_exec_ddl(PGconn* conn, const char* ddl_query)
return errmsg == NULL;
}
// creates a connection and then runs the query
static const char* gpdb_exec(PGconn** pconn, PGresult** pres, const char* query)
static const char* gpdb_exec_with_connstr(PGconn** pconn, PGresult** pres, const char* query, const char* connstr)
{
const char *connstr = "dbname='" GPMON_DB "' user='" GPMON_DBUSER
"' connect_timeout='30'";
PGconn *conn = NULL;
conn = PQconnectdb(connstr);
......@@ -75,6 +72,26 @@ static const char* gpdb_exec(PGconn** pconn, PGresult** pres, const char* query)
return gpdb_exec_only(conn, pres, query);
}
// creates a connection and then runs the query
static const char* gpdb_exec(PGconn** pconn, PGresult** pres, const char* query)
{
const char *connstr = "dbname='" GPMON_DB "' user='" GPMON_DBUSER
"' connect_timeout='30'";
return gpdb_exec_with_connstr(pconn, pres, query, connstr);
}
static const char* gpdb_exec_harvest(PGconn** pconn, PGresult** pres, const char* query)
{
const int CONNBUFSIZ = 256;
char connstr[CONNBUFSIZ];
snprintf(connstr, CONNBUFSIZ, "dbname='" GPMON_DB "' user='" GPMON_DBUSER
"' connect_timeout='30' options='-c gp_max_csv_line_length=%d'", MAX_GP_MAX_CSV_LINE_LENGTH);
return gpdb_exec_with_connstr(pconn, pres, query, connstr);
}
// persistant_conn is optional if you are already holding an open connectionconn
// return 1 if more than 0 rows are returned from query
// return 0 if zero rows are returned from query
......@@ -920,7 +937,7 @@ static apr_status_t harvest(const char* tbl, apr_pool_t* pool, PGconn* conN)
snprintf(qrybuf, QRYBUFSIZ, QRYFMT, tbl, tbl);
errmsg = gpdb_exec(&conn, &result, qrybuf);
errmsg = gpdb_exec_harvest(&conn, &result, qrybuf);
if (errmsg)
{
res = 1;
......
......@@ -4,6 +4,7 @@
#include "apr_general.h"
#include "apr_md5.h"
#include "apr_hash.h"
#include "cdb/cdbcsv.h"
/**
* Validate the the gpperfmon database is correct and
......@@ -97,5 +98,12 @@ void process_line_in_hadoop_cluster_info(apr_pool_t*, apr_hash_t*, char*, char*,
int get_hadoop_hosts_and_add_to_hosts(apr_pool_t*, apr_hash_t*, mmon_options_t*);
apr_status_t truncate_file(char*, apr_pool_t*);
/**
* MPP-29418 workaround copy/external issue that csv line with like breaks cannot
* exceed gp_max_csv_line_length, here we substruct 1K for fixed length columns for
* data load safety
*/
#define HARVEST_CSV_SAFEGUARD (1024)
#endif /* GPMONDB_H */
......@@ -21,6 +21,7 @@
#include "access/url.h"
#include "access/xlog_internal.h"
#include "cdb/cdbappendonlyam.h"
#include "cdb/cdbcsv.h"
#include "cdb/cdbdisp.h"
#include "cdb/cdbfilerep.h"
#include "cdb/cdbsreh.h"
......@@ -3642,7 +3643,7 @@ struct config_int ConfigureNamesInt_gp[] =
GUC_GPDB_ADDOPT
},
&gp_max_csv_line_length,
1 * 1024 * 1024, 32 * 1024, 4 * 1024 * 1024, NULL, NULL
1 * 1024 * 1024, 32 * 1024, MAX_GP_MAX_CSV_LINE_LENGTH, NULL, NULL
},
/*
......
/*-------------------------------------------------------------------------
*
* cdbcsv.h
* Header file for copy/external table process csv file format
*
* Portions Copyright (c) 2005-2008, Greenplum inc
* Portions Copyright (c) 2012-Present Pivotal Software, Inc.
*
*
* IDENTIFICATION
* src/include/cdb/cdbcsv.h
*
*-------------------------------------------------------------------------
*/
#ifndef CDBCSV_H
#define CDBCSV_H
#define MAX_GP_MAX_CSV_LINE_LENGTH (4 * 1024 * 1024)
#endif /* CDBCSV_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册