提交 d4081ea1 编写于 作者: W Wang Hao

Fix gp_max_csv_line_length does not work in INSERT INTO ... SELECT ...

gp_max_csv_line_length is a session level GUC. When change it in
session, it affects statement like select * from <external_table>.
But it does not work for INSERT INTO table SELECT * FROM <external_table>.
For such statement, the scan of external table happens in a QE backend
process, not the QD.
This fix add GUC_GPDB_ADDOPT so that setting this GUC in session level
can affect both QD and QE process.
上级 b97bec43
......@@ -3639,6 +3639,7 @@ struct config_int ConfigureNamesInt_gp[] =
{"gp_max_csv_line_length", PGC_USERSET, EXTERNAL_TABLES,
gettext_noop("Maximum allowed length of a csv input data row in bytes"),
NULL,
GUC_GPDB_ADDOPT
},
&gp_max_csv_line_length,
1 * 1024 * 1024, 32 * 1024, 4 * 1024 * 1024, NULL, NULL
......
......@@ -1574,3 +1574,32 @@ SET gp_log_gang TO DEFAULT;
DROP FUNCTION exttab_error_context_callback_func();
DROP EXTERNAL TABLE exttab_error_context_callback;
-- --------------------------------------
-- gp_max_csv_line_length
-- --------------------------------------
CREATE EXTERNAL TABLE gp_max_csv_line_length_issue (word text)
LOCATION ('file://@hostname@/tmp/long_text.csv')
FORMAT 'CSV';
CREATE TABLE gp_max_csv_line_length_target (word text);
-- generate a csv with single line > 3MB with line breaks
COPY (
SELECT array_to_string(ARRAY(
SELECT array_to_string(ARRAY(
SELECT chr((65 + round(random() * 25)) :: integer)
FROM generate_series(1, 1023)
), '') || chr(10)
FROM generate_series(1, 3*1024+512)), '')
) TO '/tmp/long_text.csv' WITH CSV;
-- This will fail as data too long
INSERT INTO gp_max_csv_line_length_target SELECT * FROM gp_max_csv_line_length_issue;
SET gp_max_csv_line_length TO 4194304;
-- This should pass the gp_max_csv_line_length
INSERT INTO gp_max_csv_line_length_target SELECT * FROM gp_max_csv_line_length_issue;
DROP TABLE gp_max_csv_line_length_target;
DROP EXTERNAL TABLE gp_max_csv_line_length_issue;
......@@ -2971,3 +2971,30 @@ CONTEXT: PL/pgSQL function "exttab_error_context_callback_func" line 4 at FOR o
SET gp_log_gang TO DEFAULT;
DROP FUNCTION exttab_error_context_callback_func();
DROP EXTERNAL TABLE exttab_error_context_callback;
-- --------------------------------------
-- gp_max_csv_line_length
-- --------------------------------------
CREATE EXTERNAL TABLE gp_max_csv_line_length_issue (word text)
LOCATION ('file://@hostname@/tmp/long_text.csv')
FORMAT 'CSV';
CREATE TABLE gp_max_csv_line_length_target (word text);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'word' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- generate a csv with single line > 3MB with line breaks
COPY (
SELECT array_to_string(ARRAY(
SELECT array_to_string(ARRAY(
SELECT chr((65 + round(random() * 25)) :: integer)
FROM generate_series(1, 1023)
), '') || chr(10)
FROM generate_series(1, 3*1024+512)), '')
) TO '/tmp/long_text.csv' WITH CSV;
-- This will fail as data too long
INSERT INTO gp_max_csv_line_length_target SELECT * FROM gp_max_csv_line_length_issue;
ERROR: data line too long. likely due to invalid csv data (seg0 slice1 @hostname@:25432 pid=2592)
DETAIL: External table gp_max_csv_line_length_issue, line 1024 of file file://@hostname@/tmp/long_text.csv
SET gp_max_csv_line_length TO 4194304;
-- This should pass the gp_max_csv_line_length
INSERT INTO gp_max_csv_line_length_target SELECT * FROM gp_max_csv_line_length_issue;
DROP TABLE gp_max_csv_line_length_target;
DROP EXTERNAL TABLE gp_max_csv_line_length_issue;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册