提交 bec4705e 编写于 作者: O Oleksandr Diachenko

Enabled buffering in GPHDFS for a Parquet reader.

上级 cc799db4
......@@ -42,6 +42,7 @@ function gen_env(){
tar zxf /tmp/hadoop-2.7.3.tar.gz -C /tmp
export HADOOP_HOME=/tmp/hadoop-2.7.3
wget -O \${HADOOP_HOME}/share/hadoop/common/lib/parquet-hadoop-bundle-1.7.0.jar http://central.maven.org/maven2/org/apache/parquet/parquet-hadoop-bundle/1.7.0/parquet-hadoop-bundle-1.7.0.jar
cat > "\${HADOOP_HOME}/etc/hadoop/core-site.xml" <<-EOFF
<configuration>
<property>
......
......@@ -45,3 +45,4 @@ test: varchar_Extreadwrite
test: varchar_ExtwriteToHdfsToExtread
test: varchar_hdfsTextToGPDB
#test: MPP16780
test: int_ExtreadwriteParquet
\ No newline at end of file
-- start_ignore
drop external table int_heap;
drop external table int_readhdfs_parquet;
drop external table int_writehdfs_parquet;
-- end_ignore
create readable external table int_heap(datatype_int varchar,xcount_int bigint, max_int int, min_int int, x_int int, reverse_int int, increment_int int, nullcol_int int) location ('gpfdist://%localhost%:%gpfdistPort%/int.txt')format 'TEXT';
create readable external table int_readhdfs_parquet
(
like int_heap
) location ('gphdfs://%HDFSaddr%/extwrite/int_parquet') format 'PARQUET';
-- start_ignore
create writable external table int_writehdfs_parquet
(
like int_heap
) location ('gphdfs://%HDFSaddr%/extwrite/int_parquet') format 'PARQUET';
-- end_ignore
insert into int_writehdfs_parquet
select * from int_heap;
select count(*) from int_readhdfs_parquet;
-- start_ignore
drop external table int_heap;
ERROR: table "int_heap" does not exist
drop external table int_readhdfs_parquet;
ERROR: table "int_readhdfs_parquet" does not exist
drop external table int_writehdfs_parquet;
ERROR: table "int_writehdfs_parquet" does not exist
-- end_ignore
create readable external table int_heap(datatype_int varchar,xcount_int bigint, max_int int, min_int int, x_int int, reverse_int int, increment_int int, nullcol_int int) location ('gphdfs://localhost:9000/plaintext/int.txt')format 'TEXT';
create readable external table int_readhdfs_parquet
(
like int_heap
) location ('gphdfs://localhost:9000/extwrite/int_parquet') format 'PARQUET';
-- start_ignore
create writable external table int_writehdfs_parquet
(
like int_heap
) location ('gphdfs://localhost:9000/extwrite/int_parquet') format 'PARQUET';
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, defaulting to distribution columns from LIKE table
--end_ignore
insert into int_writehdfs_parquet
select * from int_heap;
select count(*) from int_readhdfs_parquet;
count
-------
5000
(1 row)
--start_ignore
\!/tmp/hadoop-2.7.3/bin/hadoop fs -rm -r /mapreduce/*
rm: `/mapreduce/*': No such file or directory
\!/tmp/hadoop-2.7.3/bin/hadoop fs -rm -r /mapred/*
rm: `/mapred/*': No such file or directory
--end_ignore
\ No newline at end of file
package com.emc.greenplum.gpdb.hadoop.formathandler;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
......@@ -103,7 +104,7 @@ public class GpdbParquetFileReader {
Collections.sort(toReadFileList);
DataOutputStream dos = new DataOutputStream(out);
DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(out));
int counter = 0;
for (FileStatus toRead : toReadFileList) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册