diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 87ecdab0ff720378ce67ac853401b3306c0d4300..276b54fe2caad2c84b4f08c9f60c925bed799731 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -3639,6 +3639,7 @@ struct config_int ConfigureNamesInt_gp[] = {"gp_max_csv_line_length", PGC_USERSET, EXTERNAL_TABLES, gettext_noop("Maximum allowed length of a csv input data row in bytes"), NULL, + GUC_GPDB_ADDOPT }, &gp_max_csv_line_length, 1 * 1024 * 1024, 32 * 1024, 4 * 1024 * 1024, NULL, NULL diff --git a/src/test/regress/input/external_table.source b/src/test/regress/input/external_table.source index b2e888542398da1a10436cbeccdf1d7ecf3ffb39..d6227c327244204db4940a9dc4589bc6cfc96169 100644 --- a/src/test/regress/input/external_table.source +++ b/src/test/regress/input/external_table.source @@ -1574,3 +1574,32 @@ SET gp_log_gang TO DEFAULT; DROP FUNCTION exttab_error_context_callback_func(); DROP EXTERNAL TABLE exttab_error_context_callback; + +-- -------------------------------------- +-- gp_max_csv_line_length +-- -------------------------------------- +CREATE EXTERNAL TABLE gp_max_csv_line_length_issue (word text) +LOCATION ('file://@hostname@/tmp/long_text.csv') +FORMAT 'CSV'; + +CREATE TABLE gp_max_csv_line_length_target (word text); + +-- generate a csv with single line > 3MB with line breaks +COPY ( + SELECT array_to_string(ARRAY( + SELECT array_to_string(ARRAY( + SELECT chr((65 + round(random() * 25)) :: integer) + FROM generate_series(1, 1023) + ), '') || chr(10) + FROM generate_series(1, 3*1024+512)), '') +) TO '/tmp/long_text.csv' WITH CSV; + +-- This will fail as data too long +INSERT INTO gp_max_csv_line_length_target SELECT * FROM gp_max_csv_line_length_issue; + +SET gp_max_csv_line_length TO 4194304; +-- This should pass the gp_max_csv_line_length +INSERT INTO gp_max_csv_line_length_target SELECT * FROM gp_max_csv_line_length_issue; + +DROP TABLE gp_max_csv_line_length_target; +DROP EXTERNAL TABLE gp_max_csv_line_length_issue; diff --git a/src/test/regress/output/external_table.source b/src/test/regress/output/external_table.source index ec3079eb30521e35f3b46e053fd37d7cb2dc0b82..7e2ba662ee392965e50aedee8125eb7f587060e6 100644 --- a/src/test/regress/output/external_table.source +++ b/src/test/regress/output/external_table.source @@ -2971,3 +2971,30 @@ CONTEXT: PL/pgSQL function "exttab_error_context_callback_func" line 4 at FOR o SET gp_log_gang TO DEFAULT; DROP FUNCTION exttab_error_context_callback_func(); DROP EXTERNAL TABLE exttab_error_context_callback; +-- -------------------------------------- +-- gp_max_csv_line_length +-- -------------------------------------- +CREATE EXTERNAL TABLE gp_max_csv_line_length_issue (word text) +LOCATION ('file://@hostname@/tmp/long_text.csv') +FORMAT 'CSV'; +CREATE TABLE gp_max_csv_line_length_target (word text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'word' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +-- generate a csv with single line > 3MB with line breaks +COPY ( + SELECT array_to_string(ARRAY( + SELECT array_to_string(ARRAY( + SELECT chr((65 + round(random() * 25)) :: integer) + FROM generate_series(1, 1023) + ), '') || chr(10) + FROM generate_series(1, 3*1024+512)), '') +) TO '/tmp/long_text.csv' WITH CSV; +-- This will fail as data too long +INSERT INTO gp_max_csv_line_length_target SELECT * FROM gp_max_csv_line_length_issue; +ERROR: data line too long. likely due to invalid csv data (seg0 slice1 @hostname@:25432 pid=2592) +DETAIL: External table gp_max_csv_line_length_issue, line 1024 of file file://@hostname@/tmp/long_text.csv +SET gp_max_csv_line_length TO 4194304; +-- This should pass the gp_max_csv_line_length +INSERT INTO gp_max_csv_line_length_target SELECT * FROM gp_max_csv_line_length_issue; +DROP TABLE gp_max_csv_line_length_target; +DROP EXTERNAL TABLE gp_max_csv_line_length_issue;