提交 589b7e7d 编写于 作者: H Haisheng Yuan

Fix minirepro bug that causes minirepro unable to dump complete sql

This patch changes gp_dump_query_oids by traversing the parsed query
tree instead of traversing the query tree struct, which has too
many node type and corner cases to consider. Even it is a little bit
risky to traverse the pased query tree string, we haven't seen any
sign that postgresql upstream is going to change the format.

In addition, we also fix a minirepro python script bug when column
stats has text type most common values that containing single quote,
minirepro failed to escape that text, which causes sql grammar error
and can't insert that statistics.

Update minirepro to deal with error messages correctly, set PGUSER
as default user, let output file accept relative path.
Also updated minirepro behave test, make it pass.

Closes #1024
上级 c296bdb3
......@@ -39,16 +39,22 @@ Feature: Dump minimum database objects that is related to the query
Then minirepro error should contain relation "tbl_none" does not exist
@minirepro_core
Scenario: Query parse error with multiple queries
Scenario: Query parse with multiple queries
Given the file "/tmp/in.sql" exists and contains "select * from t1; delete from t2;"
When the user runs "minirepro minireprodb -q /tmp/in.sql -f /tmp/out.sql"
Then minirepro error should contain Error while running gp_toolkit.gp_dump_query_oids
Then the output file "/tmp/out.sql" should exist
And the output file "/tmp/out.sql" should contain "CREATE TABLE t1"
And the output file "/tmp/out.sql" should contain "CREATE TABLE t2"
And the output file "/tmp/out.sql" should contain "WHERE relname = 't1'"
And the output file "/tmp/out.sql" should contain "WHERE relname = 't2'"
And the output file "/tmp/out.sql" should be loaded to database "minidb_tmp" without error
And the file "/tmp/in.sql" should be executed in database "minidb_tmp" without error
@minirepro_core
Scenario: Query parse error with wrong syntax query
Given the file "/tmp/in.sql" exists and contains "delete * from t1"
When the user runs "minirepro minireprodb -q /tmp/in.sql -f /tmp/out.sql"
Then minirepro error should contain Error while running gp_toolkit.gp_dump_query_oids
Then minirepro error should contain Error when executing function gp_toolkit.gp_dump_query_oids
@minirepro_core
Scenario: Dump database objects related with select query
......
......@@ -35,7 +35,8 @@ PARAMETERS
-U <username>
Greenplum Database user name to log into the database and run the
SQL command. Default is the OS user name running the utility.
SQL command. Default is the PGUSER environment variable. If PGUSER
is not defined, OS user name running the utility is used.
-p <port>
Port that is used to connect to Greenplum Database.
......@@ -65,6 +66,7 @@ from datetime import datetime
version = '1.10'
PATH_PREFIX = '/tmp/'
PGDUMP_FILE = 'pg_dump_out.sql'
sysnslist = "('pg_toast', 'pg_bitmapindex', 'pg_catalog', 'information_schema', 'gp_toolkit')"
pgoptions = '-c gp_session_role=utility'
......@@ -136,7 +138,12 @@ def dump_query(connectionInfo, query_file):
errormsg = p.communicate()[1]
sys.stderr.writelines('\nError when executing function gp_toolkit.gp_dump_query_oids.\n\n' + errormsg + '\n\n')
sys.exit(1)
return p.communicate()[0]
outmsg, errormsg = p.communicate()
if errormsg:
sys.stderr.writelines('\nError when executing function gp_toolkit.gp_dump_query_oids.\n\n' + errormsg + '\n\n')
sys.exit(1)
return outmsg
# relation and function oids will be extracted from the dump string
def parse_oids(cursor, json_oids):
......@@ -168,7 +175,7 @@ def parse_oids(cursor, json_oids):
return result
def pg_dump_object(mr_query, connectionInfo, envOpts):
out_file = PATH_PREFIX + 'minirepro.dp.sql'
out_file = PATH_PREFIX + PGDUMP_FILE
dmp_cmd = 'pg_dump -h %s -p %s -U %s -sxO %s' % connectionInfo
dmp_cmd = "%s --relation-oids %s --function-oids %s -f %s" % \
(dmp_cmd, mr_query.relids, mr_query.funcids, E(out_file))
......@@ -178,16 +185,6 @@ def pg_dump_object(mr_query, connectionInfo, envOpts):
sys.stderr.write('\nError while dumping schema.\n\n' + p.communicate()[1] + '\n\n')
sys.exit(1)
def print_obj_ddl(filename, f_out):
if filename.endswith('minirepro.dp.sql'):
f_path = os.path.join(PATH_PREFIX, filename)
with open(f_path, 'r') as f_opened:
line_no = 1
for line in f_opened:
if line_no == 12 or line_no > 16:
f_out.writelines(line)
line_no += 1
def dump_tuple_count(cur, oid_str, f_out):
stmt = "SELECT pgc.relname, pgn.nspname, pgc.relpages, pgc.reltuples FROM pg_class pgc, pg_namespace pgn " \
"WHERE pgc.relnamespace = pgn.oid and pgc.oid in (%s) and pgn.nspname NOT LIKE 'pg_temp_%%' " \
......@@ -257,7 +254,7 @@ def dump_stats(cur, oid_str, f_out):
if val is None:
val = 'NULL'
elif isinstance(val, (str, unicode)) and val[0] == '{':
val = "E'%s'" % val
val = "E'%s'" % E(val)
rowVals.append('\t{0}::{1}'.format(val, typ))
f_out.writelines(pstring.format(E(vals[0]), E(vals[2]), ',\n'.join(rowVals)))
......@@ -272,8 +269,8 @@ def main():
envOpts = os.environ
db = args[0]
host = options.host or platform.node()
user = options.user or os.getlogin()
port = options.port or envOpts['PGPORT'] or '5432'
user = options.user or ('PGUSER' in envOpts and envOpts['PGUSER']) or os.getlogin()
port = options.port or ('PGPORT' in envOpts and envOpts['PGPORT']) or '5432'
query_file = options.query_file
output_file = options.output_file
......@@ -286,6 +283,7 @@ def main():
if not os.path.isfile(query_file):
parser.error('Query file %s does not exist.' % query_file)
exit(1)
output_file = os.path.abspath(output_file)
timestamp = generate_timestamp()
global PATH_PREFIX
......@@ -349,8 +347,8 @@ def main():
# write relation and function DDLs
print "Writing relation and function DDLs ..."
for f in os.listdir(PATH_PREFIX):
print_obj_ddl(f, f_out)
with open(PATH_PREFIX + PGDUMP_FILE, 'r') as f_pgdump:
f_out.writelines(f_pgdump)
# explicitly allow editing of these pg_class & pg_statistic tables
f_out.writelines(['\n-- ',
......
......@@ -11,107 +11,69 @@
* AS '$libdir/gpoptutils', 'gp_dump_query_oids'
* LANGUAGE C STRICT;
*/
#include "postgres_fe.h"
#include "postgres.h"
#include "funcapi.h"
#include "utils/builtins.h"
#include "gpopt/utils/nodeutils.h"
#include "rewrite/rewriteHandler.h"
#include "tcop/tcopprot.h"
#include "c.h"
extern
List *pg_parse_and_rewrite(const char *query_string, Oid *paramTypes, int iNumParams);
extern
List *QueryRewrite(Query *parsetree);
static
void traverseQueryOids(Query *pquery, HTAB *relhtab, StringInfoData *relbuf, HTAB *funchtab, StringInfoData *funcbuf);
#define atooid(x) ((Oid) strtoul((x), NULL, 10))
Datum gp_dump_query_oids(PG_FUNCTION_ARGS);
#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif
PG_FUNCTION_INFO_V1(gp_dump_query_oids);
static void traverseQueryOids
static void
traverseQueryOids
(
Query *pquery,
HTAB *relhtab,
Query *pquery,
HTAB *relhtab,
StringInfoData *relbuf,
HTAB *funchtab,
HTAB *funchtab,
StringInfoData *funcbuf
)
{
ListCell *plc;
bool relFound, funcFound;
foreach (plc, pquery->rtable)
{
RangeTblEntry *rte = (RangeTblEntry *) lfirst(plc);
bool found;
const char *whitespace = " \t\n\r";
char *query = nodeToString(pquery);
char *token = strtok(query, whitespace);
switch (rte->rtekind)
while (token)
{
if (pg_strcasecmp(token, ":relid") == 0)
{
case RTE_RELATION:
token = strtok(NULL, whitespace);
if (token)
{
hash_search(relhtab, (void *)&rte->relid, HASH_ENTER, &relFound);
if (!relFound)
Oid relid = atooid(token);
hash_search(relhtab, (void *)&relid, HASH_ENTER, &found);
if (!found)
{
if (0 != relbuf->len)
if (relbuf->len != 0)
appendStringInfo(relbuf, "%s", ",");
appendStringInfo(relbuf, "%u", rte->relid);
appendStringInfo(relbuf, "%u", relid);
}
}
break;
case RTE_FUNCTION:
}
else if (pg_strcasecmp(token, ":funcid") == 0)
{
token = strtok(NULL, whitespace);
if (token)
{
FuncExpr *node = (FuncExpr *)rte->funcexpr;
hash_search(funchtab, (void *)&node->funcid, HASH_ENTER, &funcFound);
if (!funcFound)
Oid funcid = atooid(token);
hash_search(funchtab, (void *)&funcid, HASH_ENTER, &found);
if (!found)
{
if (0 != funcbuf->len)
if (funcbuf->len != 0)
appendStringInfo(funcbuf, "%s", ",");
appendStringInfo(funcbuf, "%u", node->funcid);
appendStringInfo(funcbuf, "%u", funcid);
}
}
break;
case RTE_SUBQUERY:
traverseQueryOids(rte->subquery, relhtab, relbuf, funchtab, funcbuf);
break;
default:
break;
}
}
foreach (plc, pquery->targetList)
{
Expr *expr = ((TargetEntry *) lfirst(plc))->expr;
if (expr->type == T_FuncExpr)
{
// expression node for a function call, i.e. select f();
FuncExpr *node = (FuncExpr *)expr;
hash_search(funchtab, (void *)&node->funcid, HASH_ENTER, &funcFound);
if (!funcFound)
{
if (0 != funcbuf->len)
appendStringInfo(funcbuf, "%s", ",");
appendStringInfo(funcbuf, "%u", node->funcid);
}
}
else if(expr->type == T_SubLink)
{
// subselect appearing in an expression
SubLink *sublink = (SubLink *)expr;
traverseQueryOids((Query *)sublink->subselect, relhtab, relbuf, funchtab, funcbuf);
}
}
foreach(plc, pquery->cteList)
{
CommonTableExpr *cte = (CommonTableExpr *) lfirst(plc);
traverseQueryOids((Query *)cte->ctequery, relhtab, relbuf, funchtab, funcbuf);
token = strtok(NULL, whitespace);
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册