未验证 提交 af3c52d5 编写于 作者: A alexey-milovidov 提交者: GitHub

Merge pull request #8334 from maxulan/parquet_list_reading_fix

Parquet list reading fix
......@@ -128,6 +128,9 @@
[submodule "contrib/icu"]
path = contrib/icu
url = https://github.com/unicode-org/icu.git
[submodule "contrib/flatbuffers"]
path = contrib/flatbuffers
url = https://github.com/google/flatbuffers.git
[submodule "contrib/libc-headers"]
path = contrib/libc-headers
url = https://github.com/ClickHouse-Extras/libc-headers.git
......@@ -54,10 +54,12 @@ elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD)
endif()
if(${USE_STATIC_LIBRARIES})
set(FLATBUFFERS_LIBRARY flatbuffers)
set(ARROW_LIBRARY arrow_static)
set(PARQUET_LIBRARY parquet_static)
set(THRIFT_LIBRARY thrift_static)
else()
set(FLATBUFFERS_LIBRARY flatbuffers_shared)
set(ARROW_LIBRARY arrow_shared)
set(PARQUET_LIBRARY parquet_shared)
if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
......@@ -74,7 +76,7 @@ endif()
endif()
if(USE_PARQUET)
message(STATUS "Using Parquet: ${ARROW_LIBRARY}:${ARROW_INCLUDE_DIR} ; ${PARQUET_LIBRARY}:${PARQUET_INCLUDE_DIR} ; ${THRIFT_LIBRARY}")
message(STATUS "Using Parquet: ${ARROW_LIBRARY}:${ARROW_INCLUDE_DIR} ; ${PARQUET_LIBRARY}:${PARQUET_INCLUDE_DIR} ; ${THRIFT_LIBRARY} ; ${FLATBUFFERS_LIBRARY}")
else()
message(STATUS "Building without Parquet support")
endif()
......@@ -159,6 +159,8 @@ if (USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
set (ARROW_PARQUET ON CACHE INTERNAL "")
set (ARROW_VERBOSE_THIRDPARTY_BUILD ON CACHE INTERNAL "")
set (ARROW_BUILD_SHARED 1 CACHE INTERNAL "")
set (ARROW_BUILD_UTILITIES OFF CACHE INTERNAL "")
set (ARROW_BUILD_INTEGRATION OFF CACHE INTERNAL "")
set (ARROW_BOOST_HEADER_ONLY ON CACHE INTERNAL "")
set (Boost_FOUND 1 CACHE INTERNAL "")
if (MAKE_STATIC_LIBRARIES)
......
Subproject commit 87ac6fddaf21d0b4ee8b8090533ff293db0da1b4
Subproject commit b789226ccb2124285792107c758bb3b40b3d082a
include(ExternalProject)
# === thrift
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp)
......@@ -34,13 +36,13 @@ set(thriftcpp_SOURCES
${LIBRARY_DIR}/src/thrift/server/TSimpleServer.cpp
${LIBRARY_DIR}/src/thrift/server/TThreadPoolServer.cpp
${LIBRARY_DIR}/src/thrift/server/TThreadedServer.cpp
)
set( thriftcpp_threads_SOURCES
)
set(thriftcpp_threads_SOURCES
${LIBRARY_DIR}/src/thrift/concurrency/ThreadFactory.cpp
${LIBRARY_DIR}/src/thrift/concurrency/Thread.cpp
${LIBRARY_DIR}/src/thrift/concurrency/Monitor.cpp
${LIBRARY_DIR}/src/thrift/concurrency/Mutex.cpp
)
)
add_library(${THRIFT_LIBRARY} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES})
set_target_properties(${THRIFT_LIBRARY} PROPERTIES CXX_STANDARD 14) # REMOVE after https://github.com/apache/thrift/pull/1641
target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src PRIVATE ${Boost_INCLUDE_DIRS})
......@@ -70,13 +72,78 @@ add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
--cpp_out="${CMAKE_CURRENT_BINARY_DIR}"
"${PROTO_DIR}/orc_proto.proto")
# === flatbuffers
##############################################################
# fbs - Step 1: build flatbuffers lib and flatc compiler
##############################################################
set(FLATBUFFERS_SRC_DIR ${ClickHouse_SOURCE_DIR}/contrib/flatbuffers)
set(FLATBUFFERS_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/flatbuffers)
set(FLATBUFFERS_INCLUDE_DIR ${FLATBUFFERS_SRC_DIR}/include)
set(FLATBUFFERS_COMPILER "${FLATBUFFERS_BINARY_DIR}/flatc")
# set flatbuffers CMake options
if (${USE_STATIC_LIBRARIES})
set(FLATBUFFERS_BUILD_FLATLIB ON CACHE BOOL "Enable the build of the flatbuffers library")
set(FLATBUFFERS_BUILD_SHAREDLIB OFF CACHE BOOL "Disable the build of the flatbuffers shared library")
else ()
set(FLATBUFFERS_BUILD_SHAREDLIB ON CACHE BOOL "Enable the build of the flatbuffers shared library")
set(FLATBUFFERS_BUILD_FLATLIB OFF CACHE BOOL "Disable the build of the flatbuffers library")
endif ()
set(FLATBUFFERS_BUILD_FLATC ON CACHE BOOL "Build flatbuffers compiler")
set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "Skip flatbuffers tests")
add_subdirectory(${FLATBUFFERS_SRC_DIR} "${FLATBUFFERS_BINARY_DIR}")
###################################
# fbs - Step 2: compile *.fbs files
###################################
set(ARROW_IPC_SRC_DIR ${ARROW_SRC_DIR}/arrow/ipc)
set(ARROW_FORMAT_SRC_DIR ${ARROW_SRC_DIR}/../../format)
set(FLATBUFFERS_COMPILED_OUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/arrow/ipc)
set(FBS_OUTPUT_FILES "${FLATBUFFERS_COMPILED_OUT_DIR}/File_generated.h" "${FLATBUFFERS_COMPILED_OUT_DIR}/Message_generated.h"
"${FLATBUFFERS_COMPILED_OUT_DIR}/feather_generated.h")
set(FBS_SRC
${ARROW_FORMAT_SRC_DIR}/Message.fbs
${ARROW_FORMAT_SRC_DIR}/File.fbs
${ARROW_FORMAT_SRC_DIR}/Schema.fbs
${ARROW_FORMAT_SRC_DIR}/Tensor.fbs
${ARROW_FORMAT_SRC_DIR}/SparseTensor.fbs
${ARROW_IPC_SRC_DIR}/feather.fbs)
foreach (FIL ${FBS_SRC})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
list(APPEND ABS_FBS_SRC ${ABS_FIL})
endforeach ()
message(STATUS "FLATBUFFERS_LIBRARY: ${FLATBUFFERS_LIBRARY}, FLATBUFFERS_COMPILER: ${FLATBUFFERS_COMPILER}")
message(STATUS "FLATBUFFERS_COMPILED_OUT_DIR: ${FLATBUFFERS_COMPILED_OUT_DIR}")
message(STATUS "flatc: ${FLATBUFFERS_COMPILER} -c -o ${FLATBUFFERS_COMPILED_OUT_DIR}/ ${ABS_FBS_SRC}")
add_custom_command(OUTPUT ${FBS_OUTPUT_FILES}
COMMAND ${FLATBUFFERS_COMPILER}
-c
-o
${FLATBUFFERS_COMPILED_OUT_DIR}/
${ABS_FBS_SRC}
DEPENDS flatc ${ABS_FBS_SRC}
COMMENT "Running flatc compiler on ${ABS_FBS_SRC}"
VERBATIM)
add_custom_target(metadata_fbs DEPENDS ${FBS_OUTPUT_FILES})
add_dependencies(metadata_fbs flatc)
# arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features.
# Apple Clang compiler failed to compile this code without specifying c++11 standard.
# As result these compiler features detected as absent. In result it failed to compile orc itself.
# In orc makefile there is code that sets flags, but arrow-cmake ignores these flags.
if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
set (CXX11_FLAGS "-std=c++0x")
endif()
set(CXX11_FLAGS "-std=c++0x")
endif ()
include(${ClickHouse_SOURCE_DIR}/contrib/orc/cmake_modules/CheckSourceCompiles.cmake)
include(orc_check.cmake)
......@@ -86,6 +153,7 @@ configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/A
set(ORC_SRCS
${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc
${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc
${ORC_SOURCE_SRC_DIR}/Exceptions.cc
${ORC_SOURCE_SRC_DIR}/OrcFile.cc
${ORC_SOURCE_SRC_DIR}/Reader.cc
......@@ -119,126 +187,165 @@ set(ORC_SRCS
# === arrow
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/arrow)
configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/arrow/util/config.h")
# arrow/cpp/src/arrow/CMakeLists.txt
set(ARROW_SRCS
${LIBRARY_DIR}/array.cc
${LIBRARY_DIR}/builder.cc
${LIBRARY_DIR}/array/builder_adaptive.cc
${LIBRARY_DIR}/array/builder_base.cc
${LIBRARY_DIR}/array/builder_binary.cc
${LIBRARY_DIR}/array/builder_decimal.cc
${LIBRARY_DIR}/array/builder_dict.cc
${LIBRARY_DIR}/array/builder_nested.cc
${LIBRARY_DIR}/array/builder_primitive.cc
${LIBRARY_DIR}/buffer.cc
${LIBRARY_DIR}/builder.cc
${LIBRARY_DIR}/compare.cc
${LIBRARY_DIR}/extension_type.cc
${LIBRARY_DIR}/memory_pool.cc
${LIBRARY_DIR}/pretty_print.cc
${LIBRARY_DIR}/record_batch.cc
${LIBRARY_DIR}/result.cc
${LIBRARY_DIR}/scalar.cc
${LIBRARY_DIR}/sparse_tensor.cc
${LIBRARY_DIR}/status.cc
${LIBRARY_DIR}/table.cc
${LIBRARY_DIR}/table_builder.cc
${LIBRARY_DIR}/table.cc
${LIBRARY_DIR}/tensor.cc
${LIBRARY_DIR}/sparse_tensor.cc
${LIBRARY_DIR}/type.cc
${LIBRARY_DIR}/visitor.cc
${LIBRARY_DIR}/array/builder_adaptive.cc
${LIBRARY_DIR}/array/builder_base.cc
${LIBRARY_DIR}/array/builder_binary.cc
${LIBRARY_DIR}/array/builder_decimal.cc
${LIBRARY_DIR}/array/builder_dict.cc
${LIBRARY_DIR}/array/builder_nested.cc
${LIBRARY_DIR}/array/builder_primitive.cc
${LIBRARY_DIR}/array/builder_union.cc
${LIBRARY_DIR}/array/concatenate.cc
${LIBRARY_DIR}/array/dict_internal.cc
${LIBRARY_DIR}/array/diff.cc
${LIBRARY_DIR}/csv/converter.cc
${LIBRARY_DIR}/csv/chunker.cc
${LIBRARY_DIR}/csv/column-builder.cc
${LIBRARY_DIR}/csv/column_builder.cc
${LIBRARY_DIR}/csv/options.cc
${LIBRARY_DIR}/csv/parser.cc
${LIBRARY_DIR}/csv/reader.cc
${LIBRARY_DIR}/ipc/dictionary.cc
${LIBRARY_DIR}/ipc/feather.cc
# ${LIBRARY_DIR}/ipc/file_to_stream.cc
${LIBRARY_DIR}/ipc/message.cc
${LIBRARY_DIR}/ipc/metadata_internal.cc
${LIBRARY_DIR}/ipc/options.cc
${LIBRARY_DIR}/ipc/reader.cc
# ${LIBRARY_DIR}/ipc/stream_to_file.cc
${LIBRARY_DIR}/ipc/writer.cc
${LIBRARY_DIR}/io/buffered.cc
${LIBRARY_DIR}/io/compressed.cc
${LIBRARY_DIR}/io/file.cc
${LIBRARY_DIR}/io/interfaces.cc
${LIBRARY_DIR}/io/memory.cc
${LIBRARY_DIR}/io/readahead.cc
${LIBRARY_DIR}/io/slow.cc
${LIBRARY_DIR}/util/bit-util.cc
${LIBRARY_DIR}/util/basic_decimal.cc
${LIBRARY_DIR}/util/bit_util.cc
# ${LIBRARY_DIR}/util/compression_brotli.cc
# ${LIBRARY_DIR}/util/compression_bz2.cc
${LIBRARY_DIR}/util/compression.cc
${LIBRARY_DIR}/util/cpu-info.cc
${LIBRARY_DIR}/util/compression_lz4.cc
${LIBRARY_DIR}/util/compression_snappy.cc
${LIBRARY_DIR}/util/compression_zlib.cc
${LIBRARY_DIR}/util/compression_zstd.cc
${LIBRARY_DIR}/util/cpu_info.cc
${LIBRARY_DIR}/util/decimal.cc
${LIBRARY_DIR}/util/int-util.cc
${LIBRARY_DIR}/util/io-util.cc
${LIBRARY_DIR}/util/logging.cc
${LIBRARY_DIR}/util/int_util.cc
${LIBRARY_DIR}/util/io_util.cc
${LIBRARY_DIR}/util/key_value_metadata.cc
${LIBRARY_DIR}/util/task-group.cc
${LIBRARY_DIR}/util/thread-pool.cc
${LIBRARY_DIR}/util/logging.cc
${LIBRARY_DIR}/util/memory.cc
${LIBRARY_DIR}/util/string_builder.cc
${LIBRARY_DIR}/util/string.cc
${LIBRARY_DIR}/util/task_group.cc
${LIBRARY_DIR}/util/thread_pool.cc
${LIBRARY_DIR}/util/trie.cc
# ${LIBRARY_DIR}/util/uri.cc
${LIBRARY_DIR}/util/utf8.cc
${LIBRARY_DIR}/vendored/base64.cpp
${ORC_SRCS}
)
)
set(ARROW_SRCS ${ARROW_SRCS}
${LIBRARY_DIR}/compute/context.cc
${LIBRARY_DIR}/compute/kernels/boolean.cc
${LIBRARY_DIR}/compute/kernels/cast.cc
${LIBRARY_DIR}/compute/kernels/hash.cc
${LIBRARY_DIR}/compute/kernels/util-internal.cc
)
${LIBRARY_DIR}/compute/kernels/util_internal.cc
)
if (LZ4_INCLUDE_DIR AND LZ4_LIBRARY)
set(ARROW_WITH_LZ4 1)
endif()
endif ()
if(SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
set(ARROW_WITH_SNAPPY 1)
endif()
endif ()
if(ZLIB_INCLUDE_DIR AND ZLIB_LIBRARIES)
if (ZLIB_INCLUDE_DIR AND ZLIB_LIBRARIES)
set(ARROW_WITH_ZLIB 1)
endif()
endif ()
if (ZSTD_INCLUDE_DIR AND ZSTD_LIBRARY)
set(ARROW_WITH_ZSTD 1)
endif()
endif ()
if (ARROW_WITH_LZ4)
add_definitions(-DARROW_WITH_LZ4)
SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_lz4.cc ${ARROW_SRCS})
endif()
endif ()
if (ARROW_WITH_SNAPPY)
add_definitions(-DARROW_WITH_SNAPPY)
SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_snappy.cc ${ARROW_SRCS})
endif()
endif ()
if (ARROW_WITH_ZLIB)
add_definitions(-DARROW_WITH_ZLIB)
SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_zlib.cc ${ARROW_SRCS})
endif()
endif ()
if (ARROW_WITH_ZSTD)
add_definitions(-DARROW_WITH_ZSTD)
SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_zstd.cc ${ARROW_SRCS})
endif()
endif ()
add_library(${ARROW_LIBRARY} ${ARROW_SRCS})
# Arrow dependencies
add_dependencies(${ARROW_LIBRARY} ${FLATBUFFERS_LIBRARY} metadata_fbs)
target_link_libraries(${ARROW_LIBRARY} PRIVATE boost_system_internal boost_filesystem_internal boost_regex_internal)
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${FLATBUFFERS_LIBRARY})
if (USE_INTERNAL_PROTOBUF_LIBRARY)
add_dependencies(${ARROW_LIBRARY} protoc)
endif()
endif ()
target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src ${Boost_INCLUDE_DIRS})
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${Protobuf_LIBRARY})
if (ARROW_WITH_LZ4)
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${LZ4_LIBRARY})
endif()
endif ()
if (ARROW_WITH_SNAPPY)
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${SNAPPY_LIBRARY})
endif()
endif ()
if (ARROW_WITH_ZLIB)
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${ZLIB_LIBRARIES})
endif()
endif ()
if (ARROW_WITH_ZSTD)
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${ZSTD_LIBRARY})
endif()
endif ()
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_INCLUDE_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_SOURCE_SRC_DIR})
......@@ -248,52 +355,54 @@ target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_BUILD_SRC_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_BUILD_INCLUDE_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_ADDITION_SOURCE_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ARROW_SRC_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${FLATBUFFERS_INCLUDE_DIR})
# === parquet
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/parquet)
# arrow/cpp/src/parquet/CMakeLists.txt
set(PARQUET_SRCS
${LIBRARY_DIR}/arrow/reader.cc
${LIBRARY_DIR}/arrow/record_reader.cc
${LIBRARY_DIR}/arrow/reader_internal.cc
${LIBRARY_DIR}/arrow/schema.cc
${LIBRARY_DIR}/arrow/writer.cc
${LIBRARY_DIR}/bloom_filter.cc
${LIBRARY_DIR}/column_reader.cc
${LIBRARY_DIR}/column_scanner.cc
${LIBRARY_DIR}/column_writer.cc
${LIBRARY_DIR}/deprecated_io.cc
${LIBRARY_DIR}/encoding.cc
${LIBRARY_DIR}/file_reader.cc
${LIBRARY_DIR}/file_writer.cc
${LIBRARY_DIR}/metadata.cc
${LIBRARY_DIR}/murmur3.cc
${LIBRARY_DIR}/platform.cc
${LIBRARY_DIR}/printer.cc
${LIBRARY_DIR}/properties.cc
${LIBRARY_DIR}/schema.cc
${LIBRARY_DIR}/statistics.cc
${LIBRARY_DIR}/types.cc
${LIBRARY_DIR}/util/comparison.cc
${LIBRARY_DIR}/util/memory.cc
)
)
#list(TRANSFORM PARQUET_SRCS PREPEND ${LIBRARY_DIR}/) # cmake 3.12
list(APPEND PARQUET_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/parquet/parquet_constants.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/parquet/parquet_types.cpp
)
)
add_library(${PARQUET_LIBRARY} ${PARQUET_SRCS})
target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
include(${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake) # makes config.h
target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} ${Boost_REGEX_LIBRARY})
target_include_directories(${PARQUET_LIBRARY} PRIVATE ${Boost_INCLUDE_DIRS})
if(SANITIZE STREQUAL "undefined")
if (SANITIZE STREQUAL "undefined")
target_compile_options(${PARQUET_LIBRARY} PRIVATE -fno-sanitize=undefined)
target_compile_options(${ARROW_LIBRARY} PRIVATE -fno-sanitize=undefined)
endif()
endif ()
# === tools
set(TOOLS_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/tools/parquet)
set(PARQUET_TOOLS parquet-dump-schema parquet-reader parquet-scan)
foreach(TOOL ${PARQUET_TOOLS})
set(PARQUET_TOOLS parquet_dump_schema parquet_reader parquet_scan)
foreach (TOOL ${PARQUET_TOOLS})
add_executable(${TOOL} ${TOOLS_DIR}/${TOOL}.cc)
target_link_libraries(${TOOL} PRIVATE ${PARQUET_LIBRARY})
endforeach()
endforeach ()
// automatically generated by the FlatBuffers compiler, do not modify
#ifndef FLATBUFFERS_GENERATED_FILE_ORG_APACHE_ARROW_FLATBUF_H_
#define FLATBUFFERS_GENERATED_FILE_ORG_APACHE_ARROW_FLATBUF_H_
#include "flatbuffers/flatbuffers.h"
#include "Schema_generated.h"
namespace org {
namespace apache {
namespace arrow {
namespace flatbuf {
struct Footer;
struct Block;
FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) Block FLATBUFFERS_FINAL_CLASS {
private:
int64_t offset_;
int32_t metaDataLength_;
int32_t padding0__;
int64_t bodyLength_;
public:
Block() {
memset(static_cast<void *>(this), 0, sizeof(Block));
}
Block(int64_t _offset, int32_t _metaDataLength, int64_t _bodyLength)
: offset_(flatbuffers::EndianScalar(_offset)),
metaDataLength_(flatbuffers::EndianScalar(_metaDataLength)),
padding0__(0),
bodyLength_(flatbuffers::EndianScalar(_bodyLength)) {
(void)padding0__;
}
/// Index to the start of the RecordBlock (note this is past the Message header)
int64_t offset() const {
return flatbuffers::EndianScalar(offset_);
}
/// Length of the metadata
int32_t metaDataLength() const {
return flatbuffers::EndianScalar(metaDataLength_);
}
/// Length of the data (this is aligned so there can be a gap between this and
/// the metatdata).
int64_t bodyLength() const {
return flatbuffers::EndianScalar(bodyLength_);
}
};
FLATBUFFERS_STRUCT_END(Block, 24);
/// ----------------------------------------------------------------------
/// Arrow File metadata
///
struct Footer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_VERSION = 4,
VT_SCHEMA = 6,
VT_DICTIONARIES = 8,
VT_RECORDBATCHES = 10
};
MetadataVersion version() const {
return static_cast<MetadataVersion>(GetField<int16_t>(VT_VERSION, 0));
}
const Schema *schema() const {
return GetPointer<const Schema *>(VT_SCHEMA);
}
const flatbuffers::Vector<const Block *> *dictionaries() const {
return GetPointer<const flatbuffers::Vector<const Block *> *>(VT_DICTIONARIES);
}
const flatbuffers::Vector<const Block *> *recordBatches() const {
return GetPointer<const flatbuffers::Vector<const Block *> *>(VT_RECORDBATCHES);
}
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<int16_t>(verifier, VT_VERSION) &&
VerifyOffset(verifier, VT_SCHEMA) &&
verifier.VerifyTable(schema()) &&
VerifyOffset(verifier, VT_DICTIONARIES) &&
verifier.VerifyVector(dictionaries()) &&
VerifyOffset(verifier, VT_RECORDBATCHES) &&
verifier.VerifyVector(recordBatches()) &&
verifier.EndTable();
}
};
struct FooterBuilder {
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_version(MetadataVersion version) {
fbb_.AddElement<int16_t>(Footer::VT_VERSION, static_cast<int16_t>(version), 0);
}
void add_schema(flatbuffers::Offset<Schema> schema) {
fbb_.AddOffset(Footer::VT_SCHEMA, schema);
}
void add_dictionaries(flatbuffers::Offset<flatbuffers::Vector<const Block *>> dictionaries) {
fbb_.AddOffset(Footer::VT_DICTIONARIES, dictionaries);
}
void add_recordBatches(flatbuffers::Offset<flatbuffers::Vector<const Block *>> recordBatches) {
fbb_.AddOffset(Footer::VT_RECORDBATCHES, recordBatches);
}
explicit FooterBuilder(flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
}
FooterBuilder &operator=(const FooterBuilder &);
flatbuffers::Offset<Footer> Finish() {
const auto end = fbb_.EndTable(start_);
auto o = flatbuffers::Offset<Footer>(end);
return o;
}
};
inline flatbuffers::Offset<Footer> CreateFooter(
flatbuffers::FlatBufferBuilder &_fbb,
MetadataVersion version = MetadataVersion_V1,
flatbuffers::Offset<Schema> schema = 0,
flatbuffers::Offset<flatbuffers::Vector<const Block *>> dictionaries = 0,
flatbuffers::Offset<flatbuffers::Vector<const Block *>> recordBatches = 0) {
FooterBuilder builder_(_fbb);
builder_.add_recordBatches(recordBatches);
builder_.add_dictionaries(dictionaries);
builder_.add_schema(schema);
builder_.add_version(version);
return builder_.Finish();
}
inline flatbuffers::Offset<Footer> CreateFooterDirect(
flatbuffers::FlatBufferBuilder &_fbb,
MetadataVersion version = MetadataVersion_V1,
flatbuffers::Offset<Schema> schema = 0,
const std::vector<Block> *dictionaries = nullptr,
const std::vector<Block> *recordBatches = nullptr) {
auto dictionaries__ = dictionaries ? _fbb.CreateVectorOfStructs<Block>(*dictionaries) : 0;
auto recordBatches__ = recordBatches ? _fbb.CreateVectorOfStructs<Block>(*recordBatches) : 0;
return org::apache::arrow::flatbuf::CreateFooter(
_fbb,
version,
schema,
dictionaries__,
recordBatches__);
}
inline const org::apache::arrow::flatbuf::Footer *GetFooter(const void *buf) {
return flatbuffers::GetRoot<org::apache::arrow::flatbuf::Footer>(buf);
}
inline const org::apache::arrow::flatbuf::Footer *GetSizePrefixedFooter(const void *buf) {
return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::Footer>(buf);
}
inline bool VerifyFooterBuffer(
flatbuffers::Verifier &verifier) {
return verifier.VerifyBuffer<org::apache::arrow::flatbuf::Footer>(nullptr);
}
inline bool VerifySizePrefixedFooterBuffer(
flatbuffers::Verifier &verifier) {
return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::Footer>(nullptr);
}
inline void FinishFooterBuffer(
flatbuffers::FlatBufferBuilder &fbb,
flatbuffers::Offset<org::apache::arrow::flatbuf::Footer> root) {
fbb.Finish(root);
}
inline void FinishSizePrefixedFooterBuffer(
flatbuffers::FlatBufferBuilder &fbb,
flatbuffers::Offset<org::apache::arrow::flatbuf::Footer> root) {
fbb.FinishSizePrefixed(root);
}
} // namespace flatbuf
} // namespace arrow
} // namespace apache
} // namespace org
#endif // FLATBUFFERS_GENERATED_FILE_ORG_APACHE_ARROW_FLATBUF_H_
// automatically generated by the FlatBuffers compiler, do not modify
#ifndef FLATBUFFERS_GENERATED_MESSAGE_ORG_APACHE_ARROW_FLATBUF_H_
#define FLATBUFFERS_GENERATED_MESSAGE_ORG_APACHE_ARROW_FLATBUF_H_
#include "flatbuffers/flatbuffers.h"
#include "Schema_generated.h"
#include "SparseTensor_generated.h"
#include "Tensor_generated.h"
namespace org {
namespace apache {
namespace arrow {
namespace flatbuf {
struct FieldNode;
struct RecordBatch;
struct DictionaryBatch;
struct Message;
/// ----------------------------------------------------------------------
/// The root Message type
/// This union enables us to easily send different message types without
/// redundant storage, and in the future we can easily add new message types.
///
/// Arrow implementations do not need to implement all of the message types,
/// which may include experimental metadata types. For maximum compatibility,
/// it is best to send data using RecordBatch
enum MessageHeader {
MessageHeader_NONE = 0,
MessageHeader_Schema = 1,
MessageHeader_DictionaryBatch = 2,
MessageHeader_RecordBatch = 3,
MessageHeader_Tensor = 4,
MessageHeader_SparseTensor = 5,
MessageHeader_MIN = MessageHeader_NONE,
MessageHeader_MAX = MessageHeader_SparseTensor
};
inline const MessageHeader (&EnumValuesMessageHeader())[6] {
static const MessageHeader values[] = {
MessageHeader_NONE,
MessageHeader_Schema,
MessageHeader_DictionaryBatch,
MessageHeader_RecordBatch,
MessageHeader_Tensor,
MessageHeader_SparseTensor
};
return values;
}
inline const char * const *EnumNamesMessageHeader() {
static const char * const names[] = {
"NONE",
"Schema",
"DictionaryBatch",
"RecordBatch",
"Tensor",
"SparseTensor",
nullptr
};
return names;
}
inline const char *EnumNameMessageHeader(MessageHeader e) {
if (e < MessageHeader_NONE || e > MessageHeader_SparseTensor) return "";
const size_t index = static_cast<size_t>(e);
return EnumNamesMessageHeader()[index];
}
template<typename T> struct MessageHeaderTraits {
static const MessageHeader enum_value = MessageHeader_NONE;
};
template<> struct MessageHeaderTraits<Schema> {
static const MessageHeader enum_value = MessageHeader_Schema;
};
template<> struct MessageHeaderTraits<DictionaryBatch> {
static const MessageHeader enum_value = MessageHeader_DictionaryBatch;
};
template<> struct MessageHeaderTraits<RecordBatch> {
static const MessageHeader enum_value = MessageHeader_RecordBatch;
};
template<> struct MessageHeaderTraits<Tensor> {
static const MessageHeader enum_value = MessageHeader_Tensor;
};
template<> struct MessageHeaderTraits<SparseTensor> {
static const MessageHeader enum_value = MessageHeader_SparseTensor;
};
bool VerifyMessageHeader(flatbuffers::Verifier &verifier, const void *obj, MessageHeader type);
bool VerifyMessageHeaderVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
/// ----------------------------------------------------------------------
/// Data structures for describing a table row batch (a collection of
/// equal-length Arrow arrays)
/// Metadata about a field at some level of a nested type tree (but not
/// its children).
///
/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
/// null_count: 0} for its Int16 node, as separate FieldNode structs
FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) FieldNode FLATBUFFERS_FINAL_CLASS {
private:
int64_t length_;
int64_t null_count_;
public:
FieldNode() {
memset(static_cast<void *>(this), 0, sizeof(FieldNode));
}
FieldNode(int64_t _length, int64_t _null_count)
: length_(flatbuffers::EndianScalar(_length)),
null_count_(flatbuffers::EndianScalar(_null_count)) {
}
/// The number of value slots in the Arrow array at this level of a nested
/// tree
int64_t length() const {
return flatbuffers::EndianScalar(length_);
}
/// The number of observed nulls. Fields with null_count == 0 may choose not
/// to write their physical validity bitmap out as a materialized buffer,
/// instead setting the length of the bitmap buffer to 0.
int64_t null_count() const {
return flatbuffers::EndianScalar(null_count_);
}
};
FLATBUFFERS_STRUCT_END(FieldNode, 16);
/// A data header describing the shared memory layout of a "record" or "row"
/// batch. Some systems call this a "row batch" internally and others a "record
/// batch".
struct RecordBatch FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_LENGTH = 4,
VT_NODES = 6,
VT_BUFFERS = 8
};
/// number of records / rows. The arrays in the batch should all have this
/// length
int64_t length() const {
return GetField<int64_t>(VT_LENGTH, 0);
}
/// Nodes correspond to the pre-ordered flattened logical schema
const flatbuffers::Vector<const FieldNode *> *nodes() const {
return GetPointer<const flatbuffers::Vector<const FieldNode *> *>(VT_NODES);
}
/// Buffers correspond to the pre-ordered flattened buffer tree
///
/// The number of buffers appended to this list depends on the schema. For
/// example, most primitive arrays will have 2 buffers, 1 for the validity
/// bitmap and 1 for the values. For struct arrays, there will only be a
/// single buffer for the validity (nulls) bitmap
const flatbuffers::Vector<const Buffer *> *buffers() const {
return GetPointer<const flatbuffers::Vector<const Buffer *> *>(VT_BUFFERS);
}
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<int64_t>(verifier, VT_LENGTH) &&
VerifyOffset(verifier, VT_NODES) &&
verifier.VerifyVector(nodes()) &&
VerifyOffset(verifier, VT_BUFFERS) &&
verifier.VerifyVector(buffers()) &&
verifier.EndTable();
}
};
struct RecordBatchBuilder {
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_length(int64_t length) {
fbb_.AddElement<int64_t>(RecordBatch::VT_LENGTH, length, 0);
}
void add_nodes(flatbuffers::Offset<flatbuffers::Vector<const FieldNode *>> nodes) {
fbb_.AddOffset(RecordBatch::VT_NODES, nodes);
}
void add_buffers(flatbuffers::Offset<flatbuffers::Vector<const Buffer *>> buffers) {
fbb_.AddOffset(RecordBatch::VT_BUFFERS, buffers);
}
explicit RecordBatchBuilder(flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
}
RecordBatchBuilder &operator=(const RecordBatchBuilder &);
flatbuffers::Offset<RecordBatch> Finish() {
const auto end = fbb_.EndTable(start_);
auto o = flatbuffers::Offset<RecordBatch>(end);
return o;
}
};
inline flatbuffers::Offset<RecordBatch> CreateRecordBatch(
flatbuffers::FlatBufferBuilder &_fbb,
int64_t length = 0,
flatbuffers::Offset<flatbuffers::Vector<const FieldNode *>> nodes = 0,
flatbuffers::Offset<flatbuffers::Vector<const Buffer *>> buffers = 0) {
RecordBatchBuilder builder_(_fbb);
builder_.add_length(length);
builder_.add_buffers(buffers);
builder_.add_nodes(nodes);
return builder_.Finish();
}
inline flatbuffers::Offset<RecordBatch> CreateRecordBatchDirect(
flatbuffers::FlatBufferBuilder &_fbb,
int64_t length = 0,
const std::vector<FieldNode> *nodes = nullptr,
const std::vector<Buffer> *buffers = nullptr) {
auto nodes__ = nodes ? _fbb.CreateVectorOfStructs<FieldNode>(*nodes) : 0;
auto buffers__ = buffers ? _fbb.CreateVectorOfStructs<Buffer>(*buffers) : 0;
return org::apache::arrow::flatbuf::CreateRecordBatch(
_fbb,
length,
nodes__,
buffers__);
}
/// For sending dictionary encoding information. Any Field can be
/// dictionary-encoded, but in this case none of its children may be
/// dictionary-encoded.
/// There is one vector / column per dictionary, but that vector / column
/// may be spread across multiple dictionary batches by using the isDelta
/// flag
struct DictionaryBatch FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_ID = 4,
VT_DATA = 6,
VT_ISDELTA = 8
};
int64_t id() const {
return GetField<int64_t>(VT_ID, 0);
}
const RecordBatch *data() const {
return GetPointer<const RecordBatch *>(VT_DATA);
}
/// If isDelta is true the values in the dictionary are to be appended to a
/// dictionary with the indicated id
bool isDelta() const {
return GetField<uint8_t>(VT_ISDELTA, 0) != 0;
}
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<int64_t>(verifier, VT_ID) &&
VerifyOffset(verifier, VT_DATA) &&
verifier.VerifyTable(data()) &&
VerifyField<uint8_t>(verifier, VT_ISDELTA) &&
verifier.EndTable();
}
};
struct DictionaryBatchBuilder {
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_id(int64_t id) {
fbb_.AddElement<int64_t>(DictionaryBatch::VT_ID, id, 0);
}
void add_data(flatbuffers::Offset<RecordBatch> data) {
fbb_.AddOffset(DictionaryBatch::VT_DATA, data);
}
void add_isDelta(bool isDelta) {
fbb_.AddElement<uint8_t>(DictionaryBatch::VT_ISDELTA, static_cast<uint8_t>(isDelta), 0);
}
explicit DictionaryBatchBuilder(flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
}
DictionaryBatchBuilder &operator=(const DictionaryBatchBuilder &);
flatbuffers::Offset<DictionaryBatch> Finish() {
const auto end = fbb_.EndTable(start_);
auto o = flatbuffers::Offset<DictionaryBatch>(end);
return o;
}
};
inline flatbuffers::Offset<DictionaryBatch> CreateDictionaryBatch(
flatbuffers::FlatBufferBuilder &_fbb,
int64_t id = 0,
flatbuffers::Offset<RecordBatch> data = 0,
bool isDelta = false) {
DictionaryBatchBuilder builder_(_fbb);
builder_.add_id(id);
builder_.add_data(data);
builder_.add_isDelta(isDelta);
return builder_.Finish();
}
struct Message FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_VERSION = 4,
VT_HEADER_TYPE = 6,
VT_HEADER = 8,
VT_BODYLENGTH = 10,
VT_CUSTOM_METADATA = 12
};
MetadataVersion version() const {
return static_cast<MetadataVersion>(GetField<int16_t>(VT_VERSION, 0));
}
MessageHeader header_type() const {
return static_cast<MessageHeader>(GetField<uint8_t>(VT_HEADER_TYPE, 0));
}
const void *header() const {
return GetPointer<const void *>(VT_HEADER);
}
template<typename T> const T *header_as() const;
const Schema *header_as_Schema() const {
return header_type() == MessageHeader_Schema ? static_cast<const Schema *>(header()) : nullptr;
}
const DictionaryBatch *header_as_DictionaryBatch() const {
return header_type() == MessageHeader_DictionaryBatch ? static_cast<const DictionaryBatch *>(header()) : nullptr;
}
const RecordBatch *header_as_RecordBatch() const {
return header_type() == MessageHeader_RecordBatch ? static_cast<const RecordBatch *>(header()) : nullptr;
}
const Tensor *header_as_Tensor() const {
return header_type() == MessageHeader_Tensor ? static_cast<const Tensor *>(header()) : nullptr;
}
const SparseTensor *header_as_SparseTensor() const {
return header_type() == MessageHeader_SparseTensor ? static_cast<const SparseTensor *>(header()) : nullptr;
}
int64_t bodyLength() const {
return GetField<int64_t>(VT_BODYLENGTH, 0);
}
const flatbuffers::Vector<flatbuffers::Offset<KeyValue>> *custom_metadata() const {
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<KeyValue>> *>(VT_CUSTOM_METADATA);
}
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<int16_t>(verifier, VT_VERSION) &&
VerifyField<uint8_t>(verifier, VT_HEADER_TYPE) &&
VerifyOffset(verifier, VT_HEADER) &&
VerifyMessageHeader(verifier, header(), header_type()) &&
VerifyField<int64_t>(verifier, VT_BODYLENGTH) &&
VerifyOffset(verifier, VT_CUSTOM_METADATA) &&
verifier.VerifyVector(custom_metadata()) &&
verifier.VerifyVectorOfTables(custom_metadata()) &&
verifier.EndTable();
}
};
template<> inline const Schema *Message::header_as<Schema>() const {
return header_as_Schema();
}
template<> inline const DictionaryBatch *Message::header_as<DictionaryBatch>() const {
return header_as_DictionaryBatch();
}
template<> inline const RecordBatch *Message::header_as<RecordBatch>() const {
return header_as_RecordBatch();
}
template<> inline const Tensor *Message::header_as<Tensor>() const {
return header_as_Tensor();
}
template<> inline const SparseTensor *Message::header_as<SparseTensor>() const {
return header_as_SparseTensor();
}
struct MessageBuilder {
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_version(MetadataVersion version) {
fbb_.AddElement<int16_t>(Message::VT_VERSION, static_cast<int16_t>(version), 0);
}
void add_header_type(MessageHeader header_type) {
fbb_.AddElement<uint8_t>(Message::VT_HEADER_TYPE, static_cast<uint8_t>(header_type), 0);
}
void add_header(flatbuffers::Offset<void> header) {
fbb_.AddOffset(Message::VT_HEADER, header);
}
void add_bodyLength(int64_t bodyLength) {
fbb_.AddElement<int64_t>(Message::VT_BODYLENGTH, bodyLength, 0);
}
void add_custom_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<KeyValue>>> custom_metadata) {
fbb_.AddOffset(Message::VT_CUSTOM_METADATA, custom_metadata);
}
explicit MessageBuilder(flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
}
MessageBuilder &operator=(const MessageBuilder &);
flatbuffers::Offset<Message> Finish() {
const auto end = fbb_.EndTable(start_);
auto o = flatbuffers::Offset<Message>(end);
return o;
}
};
inline flatbuffers::Offset<Message> CreateMessage(
flatbuffers::FlatBufferBuilder &_fbb,
MetadataVersion version = MetadataVersion_V1,
MessageHeader header_type = MessageHeader_NONE,
flatbuffers::Offset<void> header = 0,
int64_t bodyLength = 0,
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<KeyValue>>> custom_metadata = 0) {
MessageBuilder builder_(_fbb);
builder_.add_bodyLength(bodyLength);
builder_.add_custom_metadata(custom_metadata);
builder_.add_header(header);
builder_.add_version(version);
builder_.add_header_type(header_type);
return builder_.Finish();
}
inline flatbuffers::Offset<Message> CreateMessageDirect(
flatbuffers::FlatBufferBuilder &_fbb,
MetadataVersion version = MetadataVersion_V1,
MessageHeader header_type = MessageHeader_NONE,
flatbuffers::Offset<void> header = 0,
int64_t bodyLength = 0,
const std::vector<flatbuffers::Offset<KeyValue>> *custom_metadata = nullptr) {
auto custom_metadata__ = custom_metadata ? _fbb.CreateVector<flatbuffers::Offset<KeyValue>>(*custom_metadata) : 0;
return org::apache::arrow::flatbuf::CreateMessage(
_fbb,
version,
header_type,
header,
bodyLength,
custom_metadata__);
}
inline bool VerifyMessageHeader(flatbuffers::Verifier &verifier, const void *obj, MessageHeader type) {
switch (type) {
case MessageHeader_NONE: {
return true;
}
case MessageHeader_Schema: {
auto ptr = reinterpret_cast<const Schema *>(obj);
return verifier.VerifyTable(ptr);
}
case MessageHeader_DictionaryBatch: {
auto ptr = reinterpret_cast<const DictionaryBatch *>(obj);
return verifier.VerifyTable(ptr);
}
case MessageHeader_RecordBatch: {
auto ptr = reinterpret_cast<const RecordBatch *>(obj);
return verifier.VerifyTable(ptr);
}
case MessageHeader_Tensor: {
auto ptr = reinterpret_cast<const Tensor *>(obj);
return verifier.VerifyTable(ptr);
}
case MessageHeader_SparseTensor: {
auto ptr = reinterpret_cast<const SparseTensor *>(obj);
return verifier.VerifyTable(ptr);
}
default: return false;
}
}
inline bool VerifyMessageHeaderVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
if (!values || !types) return !values && !types;
if (values->size() != types->size()) return false;
for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
if (!VerifyMessageHeader(
verifier, values->Get(i), types->GetEnum<MessageHeader>(i))) {
return false;
}
}
return true;
}
inline const org::apache::arrow::flatbuf::Message *GetMessage(const void *buf) {
return flatbuffers::GetRoot<org::apache::arrow::flatbuf::Message>(buf);
}
inline const org::apache::arrow::flatbuf::Message *GetSizePrefixedMessage(const void *buf) {
return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::Message>(buf);
}
inline bool VerifyMessageBuffer(
flatbuffers::Verifier &verifier) {
return verifier.VerifyBuffer<org::apache::arrow::flatbuf::Message>(nullptr);
}
inline bool VerifySizePrefixedMessageBuffer(
flatbuffers::Verifier &verifier) {
return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::Message>(nullptr);
}
inline void FinishMessageBuffer(
flatbuffers::FlatBufferBuilder &fbb,
flatbuffers::Offset<org::apache::arrow::flatbuf::Message> root) {
fbb.Finish(root);
}
inline void FinishSizePrefixedMessageBuffer(
flatbuffers::FlatBufferBuilder &fbb,
flatbuffers::Offset<org::apache::arrow::flatbuf::Message> root) {
fbb.FinishSizePrefixed(root);
}
} // namespace flatbuf
} // namespace arrow
} // namespace apache
} // namespace org
#endif // FLATBUFFERS_GENERATED_MESSAGE_ORG_APACHE_ARROW_FLATBUF_H_
// automatically generated by the FlatBuffers compiler, do not modify
#ifndef FLATBUFFERS_GENERATED_TENSOR_ORG_APACHE_ARROW_FLATBUF_H_
#define FLATBUFFERS_GENERATED_TENSOR_ORG_APACHE_ARROW_FLATBUF_H_
#include "flatbuffers/flatbuffers.h"
#include "Schema_generated.h"
namespace org {
namespace apache {
namespace arrow {
namespace flatbuf {
struct TensorDim;
struct Tensor;
/// ----------------------------------------------------------------------
/// Data structures for dense tensors
/// Shape data for a single axis in a tensor
struct TensorDim FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_SIZE = 4,
VT_NAME = 6
};
/// Length of dimension
int64_t size() const {
return GetField<int64_t>(VT_SIZE, 0);
}
/// Name of the dimension, optional
const flatbuffers::String *name() const {
return GetPointer<const flatbuffers::String *>(VT_NAME);
}
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<int64_t>(verifier, VT_SIZE) &&
VerifyOffset(verifier, VT_NAME) &&
verifier.VerifyString(name()) &&
verifier.EndTable();
}
};
struct TensorDimBuilder {
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_size(int64_t size) {
fbb_.AddElement<int64_t>(TensorDim::VT_SIZE, size, 0);
}
void add_name(flatbuffers::Offset<flatbuffers::String> name) {
fbb_.AddOffset(TensorDim::VT_NAME, name);
}
explicit TensorDimBuilder(flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
}
TensorDimBuilder &operator=(const TensorDimBuilder &);
flatbuffers::Offset<TensorDim> Finish() {
const auto end = fbb_.EndTable(start_);
auto o = flatbuffers::Offset<TensorDim>(end);
return o;
}
};
inline flatbuffers::Offset<TensorDim> CreateTensorDim(
flatbuffers::FlatBufferBuilder &_fbb,
int64_t size = 0,
flatbuffers::Offset<flatbuffers::String> name = 0) {
TensorDimBuilder builder_(_fbb);
builder_.add_size(size);
builder_.add_name(name);
return builder_.Finish();
}
inline flatbuffers::Offset<TensorDim> CreateTensorDimDirect(
flatbuffers::FlatBufferBuilder &_fbb,
int64_t size = 0,
const char *name = nullptr) {
auto name__ = name ? _fbb.CreateString(name) : 0;
return org::apache::arrow::flatbuf::CreateTensorDim(
_fbb,
size,
name__);
}
struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_TYPE_TYPE = 4,
VT_TYPE = 6,
VT_SHAPE = 8,
VT_STRIDES = 10,
VT_DATA = 12
};
Type type_type() const {
return static_cast<Type>(GetField<uint8_t>(VT_TYPE_TYPE, 0));
}
/// The type of data contained in a value cell. Currently only fixed-width
/// value types are supported, no strings or nested types
const void *type() const {
return GetPointer<const void *>(VT_TYPE);
}
template<typename T> const T *type_as() const;
const Null *type_as_Null() const {
return type_type() == Type_Null ? static_cast<const Null *>(type()) : nullptr;
}
const Int *type_as_Int() const {
return type_type() == Type_Int ? static_cast<const Int *>(type()) : nullptr;
}
const FloatingPoint *type_as_FloatingPoint() const {
return type_type() == Type_FloatingPoint ? static_cast<const FloatingPoint *>(type()) : nullptr;
}
const Binary *type_as_Binary() const {
return type_type() == Type_Binary ? static_cast<const Binary *>(type()) : nullptr;
}
const Utf8 *type_as_Utf8() const {
return type_type() == Type_Utf8 ? static_cast<const Utf8 *>(type()) : nullptr;
}
const Bool *type_as_Bool() const {
return type_type() == Type_Bool ? static_cast<const Bool *>(type()) : nullptr;
}
const Decimal *type_as_Decimal() const {
return type_type() == Type_Decimal ? static_cast<const Decimal *>(type()) : nullptr;
}
const Date *type_as_Date() const {
return type_type() == Type_Date ? static_cast<const Date *>(type()) : nullptr;
}
const Time *type_as_Time() const {
return type_type() == Type_Time ? static_cast<const Time *>(type()) : nullptr;
}
const Timestamp *type_as_Timestamp() const {
return type_type() == Type_Timestamp ? static_cast<const Timestamp *>(type()) : nullptr;
}
const Interval *type_as_Interval() const {
return type_type() == Type_Interval ? static_cast<const Interval *>(type()) : nullptr;
}
const List *type_as_List() const {
return type_type() == Type_List ? static_cast<const List *>(type()) : nullptr;
}
const Struct_ *type_as_Struct_() const {
return type_type() == Type_Struct_ ? static_cast<const Struct_ *>(type()) : nullptr;
}
const Union *type_as_Union() const {
return type_type() == Type_Union ? static_cast<const Union *>(type()) : nullptr;
}
const FixedSizeBinary *type_as_FixedSizeBinary() const {
return type_type() == Type_FixedSizeBinary ? static_cast<const FixedSizeBinary *>(type()) : nullptr;
}
const FixedSizeList *type_as_FixedSizeList() const {
return type_type() == Type_FixedSizeList ? static_cast<const FixedSizeList *>(type()) : nullptr;
}
const Map *type_as_Map() const {
return type_type() == Type_Map ? static_cast<const Map *>(type()) : nullptr;
}
const Duration *type_as_Duration() const {
return type_type() == Type_Duration ? static_cast<const Duration *>(type()) : nullptr;
}
const LargeBinary *type_as_LargeBinary() const {
return type_type() == Type_LargeBinary ? static_cast<const LargeBinary *>(type()) : nullptr;
}
const LargeUtf8 *type_as_LargeUtf8() const {
return type_type() == Type_LargeUtf8 ? static_cast<const LargeUtf8 *>(type()) : nullptr;
}
const LargeList *type_as_LargeList() const {
return type_type() == Type_LargeList ? static_cast<const LargeList *>(type()) : nullptr;
}
/// The dimensions of the tensor, optionally named
const flatbuffers::Vector<flatbuffers::Offset<TensorDim>> *shape() const {
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<TensorDim>> *>(VT_SHAPE);
}
/// Non-negative byte offsets to advance one value cell along each dimension
const flatbuffers::Vector<int64_t> *strides() const {
return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_STRIDES);
}
/// The location and size of the tensor's data
const Buffer *data() const {
return GetStruct<const Buffer *>(VT_DATA);
}
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<uint8_t>(verifier, VT_TYPE_TYPE) &&
VerifyOffset(verifier, VT_TYPE) &&
VerifyType(verifier, type(), type_type()) &&
VerifyOffset(verifier, VT_SHAPE) &&
verifier.VerifyVector(shape()) &&
verifier.VerifyVectorOfTables(shape()) &&
VerifyOffset(verifier, VT_STRIDES) &&
verifier.VerifyVector(strides()) &&
VerifyField<Buffer>(verifier, VT_DATA) &&
verifier.EndTable();
}
};
template<> inline const Null *Tensor::type_as<Null>() const {
return type_as_Null();
}
template<> inline const Int *Tensor::type_as<Int>() const {
return type_as_Int();
}
template<> inline const FloatingPoint *Tensor::type_as<FloatingPoint>() const {
return type_as_FloatingPoint();
}
template<> inline const Binary *Tensor::type_as<Binary>() const {
return type_as_Binary();
}
template<> inline const Utf8 *Tensor::type_as<Utf8>() const {
return type_as_Utf8();
}
template<> inline const Bool *Tensor::type_as<Bool>() const {
return type_as_Bool();
}
template<> inline const Decimal *Tensor::type_as<Decimal>() const {
return type_as_Decimal();
}
template<> inline const Date *Tensor::type_as<Date>() const {
return type_as_Date();
}
template<> inline const Time *Tensor::type_as<Time>() const {
return type_as_Time();
}
template<> inline const Timestamp *Tensor::type_as<Timestamp>() const {
return type_as_Timestamp();
}
template<> inline const Interval *Tensor::type_as<Interval>() const {
return type_as_Interval();
}
template<> inline const List *Tensor::type_as<List>() const {
return type_as_List();
}
template<> inline const Struct_ *Tensor::type_as<Struct_>() const {
return type_as_Struct_();
}
template<> inline const Union *Tensor::type_as<Union>() const {
return type_as_Union();
}
template<> inline const FixedSizeBinary *Tensor::type_as<FixedSizeBinary>() const {
return type_as_FixedSizeBinary();
}
template<> inline const FixedSizeList *Tensor::type_as<FixedSizeList>() const {
return type_as_FixedSizeList();
}
template<> inline const Map *Tensor::type_as<Map>() const {
return type_as_Map();
}
template<> inline const Duration *Tensor::type_as<Duration>() const {
return type_as_Duration();
}
template<> inline const LargeBinary *Tensor::type_as<LargeBinary>() const {
return type_as_LargeBinary();
}
template<> inline const LargeUtf8 *Tensor::type_as<LargeUtf8>() const {
return type_as_LargeUtf8();
}
template<> inline const LargeList *Tensor::type_as<LargeList>() const {
return type_as_LargeList();
}
struct TensorBuilder {
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_type_type(Type type_type) {
fbb_.AddElement<uint8_t>(Tensor::VT_TYPE_TYPE, static_cast<uint8_t>(type_type), 0);
}
void add_type(flatbuffers::Offset<void> type) {
fbb_.AddOffset(Tensor::VT_TYPE, type);
}
void add_shape(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<TensorDim>>> shape) {
fbb_.AddOffset(Tensor::VT_SHAPE, shape);
}
void add_strides(flatbuffers::Offset<flatbuffers::Vector<int64_t>> strides) {
fbb_.AddOffset(Tensor::VT_STRIDES, strides);
}
void add_data(const Buffer *data) {
fbb_.AddStruct(Tensor::VT_DATA, data);
}
explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
}
TensorBuilder &operator=(const TensorBuilder &);
flatbuffers::Offset<Tensor> Finish() {
const auto end = fbb_.EndTable(start_);
auto o = flatbuffers::Offset<Tensor>(end);
return o;
}
};
inline flatbuffers::Offset<Tensor> CreateTensor(
flatbuffers::FlatBufferBuilder &_fbb,
Type type_type = Type_NONE,
flatbuffers::Offset<void> type = 0,
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<TensorDim>>> shape = 0,
flatbuffers::Offset<flatbuffers::Vector<int64_t>> strides = 0,
const Buffer *data = 0) {
TensorBuilder builder_(_fbb);
builder_.add_data(data);
builder_.add_strides(strides);
builder_.add_shape(shape);
builder_.add_type(type);
builder_.add_type_type(type_type);
return builder_.Finish();
}
inline flatbuffers::Offset<Tensor> CreateTensorDirect(
flatbuffers::FlatBufferBuilder &_fbb,
Type type_type = Type_NONE,
flatbuffers::Offset<void> type = 0,
const std::vector<flatbuffers::Offset<TensorDim>> *shape = nullptr,
const std::vector<int64_t> *strides = nullptr,
const Buffer *data = 0) {
auto shape__ = shape ? _fbb.CreateVector<flatbuffers::Offset<TensorDim>>(*shape) : 0;
auto strides__ = strides ? _fbb.CreateVector<int64_t>(*strides) : 0;
return org::apache::arrow::flatbuf::CreateTensor(
_fbb,
type_type,
type,
shape__,
strides__,
data);
}
inline const org::apache::arrow::flatbuf::Tensor *GetTensor(const void *buf) {
return flatbuffers::GetRoot<org::apache::arrow::flatbuf::Tensor>(buf);
}
inline const org::apache::arrow::flatbuf::Tensor *GetSizePrefixedTensor(const void *buf) {
return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::Tensor>(buf);
}
inline bool VerifyTensorBuffer(
flatbuffers::Verifier &verifier) {
return verifier.VerifyBuffer<org::apache::arrow::flatbuf::Tensor>(nullptr);
}
inline bool VerifySizePrefixedTensorBuffer(
flatbuffers::Verifier &verifier) {
return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::Tensor>(nullptr);
}
inline void FinishTensorBuffer(
flatbuffers::FlatBufferBuilder &fbb,
flatbuffers::Offset<org::apache::arrow::flatbuf::Tensor> root) {
fbb.Finish(root);
}
inline void FinishSizePrefixedTensorBuffer(
flatbuffers::FlatBufferBuilder &fbb,
flatbuffers::Offset<org::apache::arrow::flatbuf::Tensor> root) {
fbb.FinishSizePrefixed(root);
}
} // namespace flatbuf
} // namespace arrow
} // namespace apache
} // namespace org
#endif // FLATBUFFERS_GENERATED_TENSOR_ORG_APACHE_ARROW_FLATBUF_H_
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#define ARROW_VERSION_MAJOR
#define ARROW_VERSION_MINOR
#define ARROW_VERSION_PATCH
#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH
/* #undef DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY */
/* #undef GRPCPP_PP_INCLUDE */
/**
* Autogenerated by Thrift Compiler (0.11.0)
* Autogenerated by Thrift Compiler (0.12.0)
*
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
* @generated
......
/**
* Autogenerated by Thrift Compiler (0.11.0)
* Autogenerated by Thrift Compiler (0.12.0)
*
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
* @generated
......
Subproject commit bf9eb67ab9371755c6bcece13cadc7693bcbf264
......@@ -45,9 +45,11 @@ namespace DB
buffer = std::make_unique<arrow::Buffer>(file_data);
// TODO: maybe use parquet::RandomAccessSource?
auto reader = parquet::ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(*buffer));
file_reader = std::make_unique<parquet::arrow::FileReader>(::arrow::default_memory_pool(),
std::move(reader));
auto status = parquet::arrow::FileReader::Make(
::arrow::default_memory_pool(),
parquet::ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(*buffer)),
&file_reader);
row_group_total = file_reader->num_row_groups();
row_group_current = 0;
}
......
......@@ -21,9 +21,10 @@
#include <arrow/api.h>
#include <arrow/io/api.h>
#include <arrow/util/decimal.h>
#include <arrow/util/memory.h>
#include <parquet/arrow/writer.h>
#include <parquet/exception.h>
#include <parquet/util/memory.h>
#include <parquet/deprecated_io.h>
namespace DB
......@@ -238,22 +239,39 @@ static const PaddedPODArray<UInt8> * extractNullBytemapPtr(ColumnPtr column)
}
class OstreamOutputStream : public parquet::OutputStream
class OstreamOutputStream : public arrow::io::OutputStream
{
public:
explicit OstreamOutputStream(WriteBuffer & ostr_) : ostr(ostr_) {}
virtual ~OstreamOutputStream() {}
virtual void Close() {}
virtual int64_t Tell() { return total_length; }
virtual void Write(const uint8_t * data, int64_t length)
explicit OstreamOutputStream(WriteBuffer & ostr_) : ostr(ostr_) { is_open = true; }
~OstreamOutputStream() override {}
// FileInterface
::arrow::Status Close() override
{
is_open = false;
return ::arrow::Status::OK();
}
::arrow::Status Tell(int64_t* position) const override
{
*position = total_length;
return ::arrow::Status::OK();
}
bool closed() const override { return !is_open; }
// Writable
::arrow::Status Write(const void* data, int64_t length) override
{
ostr.write(reinterpret_cast<const char *>(data), length);
total_length += length;
return ::arrow::Status::OK();
}
private:
WriteBuffer & ostr;
int64_t total_length = 0;
bool is_open = false;
PARQUET_DISALLOW_COPY_AND_ASSIGN(OstreamOutputStream);
};
......@@ -396,7 +414,6 @@ void ParquetBlockOutputFormat::consume(Chunk chunk)
arrow::default_memory_pool(),
sink,
props, /*parquet::default_writer_properties(),*/
parquet::arrow::default_arrow_writer_properties(),
&file_writer);
if (!status.ok())
throw Exception{"Error while opening a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION};
......
......@@ -171,19 +171,19 @@ Code: 33. DB::Ex---tion: Error while reading Parquet data: IOError: Arrow error:
Code: 8. DB::Ex---tion: Column "element" is not presented in input data
=== Try load data from nested_maps.snappy.parquet
Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Currently only nesting with Lists is supported.
Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Reading lists of structs from Parquet files not yet supported: key_value: list<key_value: struct<key: string not null, value: struct<key_value: list<key_value: struct<key: int32 not null, value: bool not null> not null> not null>> not null> not null
=== Try load data from nonnullable.impala.parquet
Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Currently only nesting with Lists is supported.
Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Reading lists of structs from Parquet files not yet supported: map: list<map: struct<key: string not null, value: int32 not null> not null> not null
=== Try load data from nullable.impala.parquet
Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Currently only nesting with Lists is supported.
Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Reading lists of structs from Parquet files not yet supported: map: list<map: struct<key: string not null, value: int32> not null> not null
=== Try load data from nulls.snappy.parquet
Code: 8. DB::Ex---tion: Column "b_c_int" is not presented in input data
=== Try load data from repeated_no_annotation.parquet
Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Currently only nesting with Lists is supported.
Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Reading lists of structs from Parquet files not yet supported: phone: list<phone: struct<number: int64 not null, kind: string> not null> not null
=== Try load data from userdata1.parquet
1454486129 1 Amanda Jordan ajordan0@com.com Female 1.197.201.2 6759521864920116 Indonesia 3/8/1971 49756.53 Internal Auditor 1E+02
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册