diff --git a/internal/storage/cwrapper/CMakeLists.txt b/internal/storage/cwrapper/CMakeLists.txt index ef18d29a1f9d2295b9ced0b0fcc9054bd2b62105..00c5cd59dd43da92b8c99eebdca4b5d2484a0154 100644 --- a/internal/storage/cwrapper/CMakeLists.txt +++ b/internal/storage/cwrapper/CMakeLists.txt @@ -42,7 +42,8 @@ macro( build_arrow ) set( ARROW_CMAKE_ARGS "-DARROW_WITH_LZ4=OFF" - "-DARROW_WITH_ZSTD=OFF" + "-DARROW_WITH_ZSTD=ON" + "-Dzstd_SOURCE=BUNDLED" "-DARROW_WITH_BROTLI=OFF" "-DARROW_WITH_SNAPPY=OFF" "-DARROW_WITH_ZLIB=OFF" diff --git a/internal/storage/cwrapper/ParquetWrapper.cpp b/internal/storage/cwrapper/ParquetWrapper.cpp index c9e4ccdd7a9c6b9694675685c2c49dd17b590ea3..cc73d7b9af9ef11bdecf6f8f45d07e0d50c6636e 100644 --- a/internal/storage/cwrapper/ParquetWrapper.cpp +++ b/internal/storage/cwrapper/ParquetWrapper.cpp @@ -302,6 +302,7 @@ CStatus FinishPayloadWriter(CPayloadWriter payloadWriter) { st.error_msg = ErrorMsg("arrow builder is nullptr"); return st; } + if (p->output == nullptr) { std::shared_ptr array; auto ast = p->builder->Finish(&array); @@ -310,10 +311,14 @@ CStatus FinishPayloadWriter(CPayloadWriter payloadWriter) { st.error_msg = ErrorMsg(ast.message()); return st; } + auto table = arrow::Table::Make(p->schema, {array}); p->output = std::make_shared(); auto mem_pool = arrow::default_memory_pool(); - ast = parquet::arrow::WriteTable(*table, mem_pool, p->output, 1024 * 1024 * 1024); + ast = parquet::arrow::WriteTable(*table, mem_pool, p->output, 1024 * 1024 * 1024, + parquet::WriterProperties::Builder().compression(arrow::Compression::ZSTD) + ->compression_level(3)->build()); + if (!ast.ok()) { st.error_code = static_cast(ErrorCode::UNEXPECTED_ERROR); st.error_msg = ErrorMsg(ast.message());