diff --git a/concourse/scripts/regression_tests_gphdfs.bash b/concourse/scripts/regression_tests_gphdfs.bash index 8bc060b18bbe554614d6f8b9228845923e5b423f..e894af8be48d9effea6a77b9533cf91f8e9e63ea 100755 --- a/concourse/scripts/regression_tests_gphdfs.bash +++ b/concourse/scripts/regression_tests_gphdfs.bash @@ -42,6 +42,7 @@ function gen_env(){ tar zxf /tmp/hadoop-2.7.3.tar.gz -C /tmp export HADOOP_HOME=/tmp/hadoop-2.7.3 + wget -O \${HADOOP_HOME}/share/hadoop/common/lib/parquet-hadoop-bundle-1.7.0.jar http://central.maven.org/maven2/org/apache/parquet/parquet-hadoop-bundle/1.7.0/parquet-hadoop-bundle-1.7.0.jar cat > "\${HADOOP_HOME}/etc/hadoop/core-site.xml" <<-EOFF diff --git a/gpAux/extensions/gphdfs/regression/gphdfs_regress_schedule b/gpAux/extensions/gphdfs/regression/gphdfs_regress_schedule index adaf7f74a527aa728ee51ea18173be6ba97b4bef..9b1da335bd8c77b91f9618f24009c0da2b140061 100644 --- a/gpAux/extensions/gphdfs/regression/gphdfs_regress_schedule +++ b/gpAux/extensions/gphdfs/regression/gphdfs_regress_schedule @@ -45,3 +45,4 @@ test: varchar_Extreadwrite test: varchar_ExtwriteToHdfsToExtread test: varchar_hdfsTextToGPDB #test: MPP16780 +test: int_ExtreadwriteParquet \ No newline at end of file diff --git a/gpAux/extensions/gphdfs/regression/input/int_ExtreadwriteParquet.source b/gpAux/extensions/gphdfs/regression/input/int_ExtreadwriteParquet.source new file mode 100644 index 0000000000000000000000000000000000000000..e5a9c1ad296ee96c4cf85e37f3a3022a0e8815fa --- /dev/null +++ b/gpAux/extensions/gphdfs/regression/input/int_ExtreadwriteParquet.source @@ -0,0 +1,23 @@ +-- start_ignore +drop external table int_heap; +drop external table int_readhdfs_parquet; +drop external table int_writehdfs_parquet; +-- end_ignore + +create readable external table int_heap(datatype_int varchar,xcount_int bigint, max_int int, min_int int, x_int int, reverse_int int, increment_int int, nullcol_int int) location ('gpfdist://%localhost%:%gpfdistPort%/int.txt')format 'TEXT'; + +create readable external table int_readhdfs_parquet +( + like int_heap +) location ('gphdfs://%HDFSaddr%/extwrite/int_parquet') format 'PARQUET'; +-- start_ignore +create writable external table int_writehdfs_parquet +( + like int_heap +) location ('gphdfs://%HDFSaddr%/extwrite/int_parquet') format 'PARQUET'; +-- end_ignore + +insert into int_writehdfs_parquet +select * from int_heap; + +select count(*) from int_readhdfs_parquet; diff --git a/gpAux/extensions/gphdfs/regression/output/int_ExtreadwriteParquet.source b/gpAux/extensions/gphdfs/regression/output/int_ExtreadwriteParquet.source new file mode 100644 index 0000000000000000000000000000000000000000..cad7d2408e43c4d0fc6f1de45a0c3549d3c08ffb --- /dev/null +++ b/gpAux/extensions/gphdfs/regression/output/int_ExtreadwriteParquet.source @@ -0,0 +1,34 @@ +-- start_ignore +drop external table int_heap; +ERROR: table "int_heap" does not exist +drop external table int_readhdfs_parquet; +ERROR: table "int_readhdfs_parquet" does not exist +drop external table int_writehdfs_parquet; +ERROR: table "int_writehdfs_parquet" does not exist +-- end_ignore +create readable external table int_heap(datatype_int varchar,xcount_int bigint, max_int int, min_int int, x_int int, reverse_int int, increment_int int, nullcol_int int) location ('gphdfs://localhost:9000/plaintext/int.txt')format 'TEXT'; +create readable external table int_readhdfs_parquet +( + like int_heap +) location ('gphdfs://localhost:9000/extwrite/int_parquet') format 'PARQUET'; +-- start_ignore +create writable external table int_writehdfs_parquet +( + like int_heap +) location ('gphdfs://localhost:9000/extwrite/int_parquet') format 'PARQUET'; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, defaulting to distribution columns from LIKE table +--end_ignore +insert into int_writehdfs_parquet +select * from int_heap; +select count(*) from int_readhdfs_parquet; + count +------- + 5000 +(1 row) + +--start_ignore +\!/tmp/hadoop-2.7.3/bin/hadoop fs -rm -r /mapreduce/* +rm: `/mapreduce/*': No such file or directory +\!/tmp/hadoop-2.7.3/bin/hadoop fs -rm -r /mapred/* +rm: `/mapred/*': No such file or directory +--end_ignore \ No newline at end of file diff --git a/gpAux/extensions/gphdfs/src/java/1.2/com/emc/greenplum/gpdb/hadoop/formathandler/GpdbParquetFileReader.java b/gpAux/extensions/gphdfs/src/java/1.2/com/emc/greenplum/gpdb/hadoop/formathandler/GpdbParquetFileReader.java index cd2c033a6eee5fa2afd79a634ed0afa7b7e038d3..b27a927cea9ba2e90b8e97461e3a1ec426741184 100644 --- a/gpAux/extensions/gphdfs/src/java/1.2/com/emc/greenplum/gpdb/hadoop/formathandler/GpdbParquetFileReader.java +++ b/gpAux/extensions/gphdfs/src/java/1.2/com/emc/greenplum/gpdb/hadoop/formathandler/GpdbParquetFileReader.java @@ -1,5 +1,6 @@ package com.emc.greenplum.gpdb.hadoop.formathandler; +import java.io.BufferedOutputStream; import java.io.DataOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -103,7 +104,7 @@ public class GpdbParquetFileReader { Collections.sort(toReadFileList); - DataOutputStream dos = new DataOutputStream(out); + DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(out)); int counter = 0; for (FileStatus toRead : toReadFileList) {