提交 f1afa4f4 编写于 作者: P Peifeng Qiu 提交者: Adam Lee

s3ext: add regressions for s3 uploading

Add regression tests on writing lots of files onto s3, join query between
local table and s3 external table, mixed data format query (with different
data format, CSV and TEXT, or TEXT with different delimiters).
Signed-off-by: NKuien Liu <kliu@pivotal.io>
上级 34b9880b
......@@ -7,6 +7,7 @@ write_prefix := s3test\.pivotal\.io\/regress\/s3write\/$(shell date +%Y%m%d)-$(s
installcheck:
@echo "The sub-directory for this test instance is: $(write_prefix)"
@rm -rf source_replaced
@mkdir -p source_replaced
@cp -rf input source_replaced/input
@cp -rf output source_replaced/output
......
CREATE OR REPLACE FUNCTION read_from_s3() RETURNS integer AS
'$libdir/gps3ext.so', 's3_import' LANGUAGE C STABLE;
CREATE OR REPLACE FUNCTION write_to_s3() RETURNS integer AS
'$libdir/gps3ext.so', 's3_export' LANGUAGE C STABLE;
CREATE PROTOCOL s3 (
readfunc = read_from_s3,
writefunc = write_to_s3
);
DROP EXTERNAL TABLE IF EXISTS s3write_lots_of_files_read;
DROP EXTERNAL TABLE IF EXISTS s3write_lots_of_files_write;
CREATE READABLE EXTERNAL TABLE s3write_lots_of_files_read (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/lotsoffiles/ config=@config_file@') FORMAT 'csv';
CREATE WRITABLE EXTERNAL TABLE s3write_lots_of_files_write (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/lotsoffiles/ config=@config_file@') FORMAT 'csv';
SELECT count(*) FROM s3write_lots_of_files_read;
CREATE OR REPLACE FUNCTION write_lots_of_files(n integer) RETURNS text AS
$$
DECLARE
i int;
BEGIN
FOR i IN SELECT * FROM generate_series(1, n) LOOP
INSERT INTO s3write_lots_of_files_write (date, time, status, sample1, sample2, volume)
SELECT current_date, localtime, (random() > 0.5)::bool,
trunc(random()::numeric, 8), trunc(random()::numeric, 8), i;
END LOOP;
RETURN textcat(textcat('Insert ', n), ' files');
END;
$$ LANGUAGE 'plpgsql';
SELECT * FROM write_lots_of_files(20);
select min(volume), max(volume), count(volume) from s3write_lots_of_files_read;
DROP FUNCTION write_lots_of_files (integer);
DROP EXTERNAL TABLE IF EXISTS s3write_lots_of_files_read;
DROP EXTERNAL TABLE IF EXISTS s3write_lots_of_files_write;
DROP PROTOCOL s3;
CREATE OR REPLACE FUNCTION read_from_s3() RETURNS integer AS
'$libdir/gps3ext.so', 's3_import' LANGUAGE C STABLE;
CREATE OR REPLACE FUNCTION write_to_s3() RETURNS integer AS
'$libdir/gps3ext.so', 's3_export' LANGUAGE C STABLE;
CREATE PROTOCOL s3 (
readfunc = read_from_s3,
writefunc = write_to_s3
);
DROP EXTERNAL TABLE IF EXISTS s3write_join_query_read;
DROP EXTERNAL TABLE IF EXISTS s3write_join_query_write;
CREATE READABLE EXTERNAL TABLE s3write_join_query_read (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/joinquery/ config=@config_file@') FORMAT 'csv';
CREATE WRITABLE EXTERNAL TABLE s3write_join_query_write (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/joinquery/ config=@config_file@') FORMAT 'csv';
CREATE TEMP TABLE s3write_local_joinquery (date text, time text, status bool, sample1 float, sample2 float, volume int) DISTRIBUTED RANDOMLY;
INSERT INTO s3write_local_joinquery (date, time, status, sample1, sample2, volume)
SELECT current_date, localtime, (random() > 0.5)::bool, trunc(random()::numeric, 8), trunc(random()::numeric, 8), v
FROM generate_series(1, 1000) as v;
INSERT INTO s3write_join_query_write SELECT * FROM s3write_local_joinquery;
SELECT count(*) FROM s3write_join_query_read remote, s3write_local_joinquery local
WHERE remote.volume = local.volume;
DROP EXTERNAL TABLE IF EXISTS s3write_join_query_read;
DROP EXTERNAL TABLE IF EXISTS s3write_join_query_write;
DROP PROTOCOL s3;
CREATE OR REPLACE FUNCTION read_from_s3() RETURNS integer AS
'$libdir/gps3ext.so', 's3_import' LANGUAGE C STABLE;
CREATE OR REPLACE FUNCTION write_to_s3() RETURNS integer AS
'$libdir/gps3ext.so', 's3_export' LANGUAGE C STABLE;
CREATE PROTOCOL s3 (
readfunc = read_from_s3,
writefunc = write_to_s3
);
DROP EXTERNAL TABLE IF EXISTS s3write_mixedfmt_read;
DROP EXTERNAL TABLE IF EXISTS s3write_mixedfmt_write;
CREATE READABLE EXTERNAL TABLE s3write_mixedfmt_read (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/mixedfmt/ config=@config_file@') FORMAT 'csv';
CREATE WRITABLE EXTERNAL TABLE s3write_mixedfmt_write_csv (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/mixedfmt/ config=@config_file@') FORMAT 'csv';
CREATE WRITABLE EXTERNAL TABLE s3write_mixedfmt_write_txt (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/mixedfmt/ config=@config_file@') FORMAT 'text' (DELIMITER ',');
CREATE WRITABLE EXTERNAL TABLE s3write_mixedfmt_write_txt2 (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/mixedfmt/ config=@config_file@') FORMAT 'text';
-- empty
SELECT * FROM s3write_mixedfmt_read;
-- one csv row
INSERT INTO s3write_mixedfmt_write_csv (date, time, status, sample1, sample2, volume)
VALUES ('2016-07-28', '12:00:00', false, 0.5, 0.3, 1);
-- one row
SELECT * FROM s3write_mixedfmt_read;
-- one text row with ',' delimiter
INSERT INTO s3write_mixedfmt_write_txt (date, time, status, sample1, sample2, volume)
VALUES ('2016-07-28', '12:00:00', true, 0.5, 0.3, 2);
-- two rows
SELECT * FROM s3write_mixedfmt_read;
INSERT INTO s3write_mixedfmt_write_txt2 (date, time, status, sample1, sample2, volume)
VALUES ('2016-07-28', '12:00:00', true, 0.5, 0.3, 3);
-- mixed formats, should fail
SELECT * FROM s3write_mixedfmt_read;
DROP EXTERNAL TABLE IF EXISTS s3write_mixedfmt_read;
DROP EXTERNAL TABLE IF EXISTS s3write_mixedfmt_write_csv;
DROP EXTERNAL TABLE IF EXISTS s3write_mixedfmt_write_txt;
DROP PROTOCOL s3;
CREATE OR REPLACE FUNCTION read_from_s3() RETURNS integer AS
'$libdir/gps3ext.so', 's3_import' LANGUAGE C STABLE;
CREATE OR REPLACE FUNCTION write_to_s3() RETURNS integer AS
'$libdir/gps3ext.so', 's3_export' LANGUAGE C STABLE;
CREATE PROTOCOL s3 (
readfunc = read_from_s3,
writefunc = write_to_s3
);
DROP EXTERNAL TABLE IF EXISTS s3write_lots_of_files_read;
NOTICE: table "s3write_lots_of_files_read" does not exist, skipping
DROP EXTERNAL TABLE IF EXISTS s3write_lots_of_files_write;
NOTICE: table "s3write_lots_of_files_write" does not exist, skipping
CREATE READABLE EXTERNAL TABLE s3write_lots_of_files_read (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/lotsoffiles/ config=@config_file@') FORMAT 'csv';
CREATE WRITABLE EXTERNAL TABLE s3write_lots_of_files_write (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/lotsoffiles/ config=@config_file@') FORMAT 'csv';
SELECT count(*) FROM s3write_lots_of_files_read;
count
-------
0
(1 row)
CREATE OR REPLACE FUNCTION write_lots_of_files(n integer) RETURNS text AS
$$
DECLARE
i int;
BEGIN
FOR i IN SELECT * FROM generate_series(1, n) LOOP
INSERT INTO s3write_lots_of_files_write (date, time, status, sample1, sample2, volume)
SELECT current_date, localtime, (random() > 0.5)::bool,
trunc(random()::numeric, 8), trunc(random()::numeric, 8), i;
END LOOP;
RETURN textcat(textcat('Insert ', n), ' files');
END;
$$ LANGUAGE 'plpgsql';
SELECT * FROM write_lots_of_files(20);
write_lots_of_files
---------------------
Insert 20 files
(1 row)
select min(volume), max(volume), count(volume) from s3write_lots_of_files_read;
min | max | count
-----+-----+-------
1 | 20 | 20
(1 row)
DROP FUNCTION write_lots_of_files (integer);
DROP EXTERNAL TABLE IF EXISTS s3write_lots_of_files_read;
DROP EXTERNAL TABLE IF EXISTS s3write_lots_of_files_write;
DROP PROTOCOL s3;
CREATE OR REPLACE FUNCTION read_from_s3() RETURNS integer AS
'$libdir/gps3ext.so', 's3_import' LANGUAGE C STABLE;
CREATE OR REPLACE FUNCTION write_to_s3() RETURNS integer AS
'$libdir/gps3ext.so', 's3_export' LANGUAGE C STABLE;
CREATE PROTOCOL s3 (
readfunc = read_from_s3,
writefunc = write_to_s3
);
DROP EXTERNAL TABLE IF EXISTS s3write_join_query_read;
NOTICE: table "s3write_join_query_read" does not exist, skipping
DROP EXTERNAL TABLE IF EXISTS s3write_join_query_write;
NOTICE: table "s3write_join_query_write" does not exist, skipping
CREATE READABLE EXTERNAL TABLE s3write_join_query_read (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/joinquery/ config=@config_file@') FORMAT 'csv';
CREATE WRITABLE EXTERNAL TABLE s3write_join_query_write (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/joinquery/ config=@config_file@') FORMAT 'csv';
CREATE TEMP TABLE s3write_local_joinquery (date text, time text, status bool, sample1 float, sample2 float, volume int) DISTRIBUTED RANDOMLY;
INSERT INTO s3write_local_joinquery (date, time, status, sample1, sample2, volume)
SELECT current_date, localtime, (random() > 0.5)::bool, trunc(random()::numeric, 8), trunc(random()::numeric, 8), v
FROM generate_series(1, 1000) as v;
INSERT INTO s3write_join_query_write SELECT * FROM s3write_local_joinquery;
SELECT count(*) FROM s3write_join_query_read remote, s3write_local_joinquery local
WHERE remote.volume = local.volume;
count
-------
1000
(1 row)
DROP EXTERNAL TABLE IF EXISTS s3write_join_query_read;
DROP EXTERNAL TABLE IF EXISTS s3write_join_query_write;
DROP PROTOCOL s3;
CREATE OR REPLACE FUNCTION read_from_s3() RETURNS integer AS
'$libdir/gps3ext.so', 's3_import' LANGUAGE C STABLE;
CREATE OR REPLACE FUNCTION write_to_s3() RETURNS integer AS
'$libdir/gps3ext.so', 's3_export' LANGUAGE C STABLE;
CREATE PROTOCOL s3 (
readfunc = read_from_s3,
writefunc = write_to_s3
);
DROP EXTERNAL TABLE IF EXISTS s3write_mixedfmt_read;
NOTICE: table "s3write_mixedfmt_read" does not exist, skipping
DROP EXTERNAL TABLE IF EXISTS s3write_mixedfmt_write;
NOTICE: table "s3write_mixedfmt_write" does not exist, skipping
CREATE READABLE EXTERNAL TABLE s3write_mixedfmt_read (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/mixedfmt/ config=@config_file@') FORMAT 'csv';
CREATE WRITABLE EXTERNAL TABLE s3write_mixedfmt_write_csv (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/mixedfmt/ config=@config_file@') FORMAT 'csv';
CREATE WRITABLE EXTERNAL TABLE s3write_mixedfmt_write_txt (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/mixedfmt/ config=@config_file@') FORMAT 'text' (DELIMITER ',');
CREATE WRITABLE EXTERNAL TABLE s3write_mixedfmt_write_txt2 (date text, time text, status bool, sample1 float, sample2 float,
volume int) LOCATION('s3://s3-us-west-2.amazonaws.com/@write_prefix@/mixedfmt/ config=@config_file@') FORMAT 'text';
-- empty
SELECT * FROM s3write_mixedfmt_read;
date | time | status | sample1 | sample2 | volume
------+------+--------+---------+---------+--------
(0 rows)
-- one csv row
INSERT INTO s3write_mixedfmt_write_csv (date, time, status, sample1, sample2, volume)
VALUES ('2016-07-28', '12:00:00', false, 0.5, 0.3, 1);
-- one row
SELECT * FROM s3write_mixedfmt_read;
date | time | status | sample1 | sample2 | volume
------------+----------+--------+---------+---------+--------
2016-07-28 | 12:00:00 | f | 0.5 | 0.3 | 1
(1 row)
-- one text row with ',' delimiter
INSERT INTO s3write_mixedfmt_write_txt (date, time, status, sample1, sample2, volume)
VALUES ('2016-07-28', '12:00:00', true, 0.5, 0.3, 2);
-- two rows
SELECT * FROM s3write_mixedfmt_read;
date | time | status | sample1 | sample2 | volume
------------+----------+--------+---------+---------+--------
2016-07-28 | 12:00:00 | f | 0.5 | 0.3 | 1
2016-07-28 | 12:00:00 | t | 0.5 | 0.3 | 2
(2 rows)
INSERT INTO s3write_mixedfmt_write_txt2 (date, time, status, sample1, sample2, volume)
VALUES ('2016-07-28', '12:00:00', true, 0.5, 0.3, 3);
-- mixed formats, should fail
SELECT * FROM s3write_mixedfmt_read;
ERROR: missing data for column "time" (seg1 slice1 gpdb4dev:40001 pid=17945)
DETAIL: External table s3write_mixedfmt_read, line 1 of s3://s3-us-west-2.amazonaws.com/s3write.pivotal.io/regress/s3write/20160824-BAJVJZel/mixedfmt/ config=/home/gpadmin/s3.conf: "2016-07-28 12:00:00 t 0.5 0.3 3"
DROP EXTERNAL TABLE IF EXISTS s3write_mixedfmt_read;
DROP EXTERNAL TABLE IF EXISTS s3write_mixedfmt_write_csv;
DROP EXTERNAL TABLE IF EXISTS s3write_mixedfmt_write_txt;
DROP PROTOCOL s3;
......@@ -22,4 +22,7 @@ test: 3_03_insert_lots_of_rows
test: 3_04_insert_mixed_workload
test: 3_05_insert_to_wet_from_ret
test: 3_06_special_characters
test: 3_07_write_lots_of_files
test: 3_08_join_query_wet_local_tbl
test: 4_01_create_invalid_wet
test: 4_02_wet_with_mixed_format
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册