未验证 提交 af3c52d5 编写于 作者: A alexey-milovidov 提交者: GitHub

Merge pull request #8334 from maxulan/parquet_list_reading_fix

Parquet list reading fix
......@@ -128,6 +128,9 @@
[submodule "contrib/icu"]
path = contrib/icu
url = https://github.com/unicode-org/icu.git
[submodule "contrib/flatbuffers"]
path = contrib/flatbuffers
url = https://github.com/google/flatbuffers.git
[submodule "contrib/libc-headers"]
path = contrib/libc-headers
url = https://github.com/ClickHouse-Extras/libc-headers.git
......@@ -54,10 +54,12 @@ elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD)
endif()
if(${USE_STATIC_LIBRARIES})
set(FLATBUFFERS_LIBRARY flatbuffers)
set(ARROW_LIBRARY arrow_static)
set(PARQUET_LIBRARY parquet_static)
set(THRIFT_LIBRARY thrift_static)
else()
set(FLATBUFFERS_LIBRARY flatbuffers_shared)
set(ARROW_LIBRARY arrow_shared)
set(PARQUET_LIBRARY parquet_shared)
if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
......@@ -74,7 +76,7 @@ endif()
endif()
if(USE_PARQUET)
message(STATUS "Using Parquet: ${ARROW_LIBRARY}:${ARROW_INCLUDE_DIR} ; ${PARQUET_LIBRARY}:${PARQUET_INCLUDE_DIR} ; ${THRIFT_LIBRARY}")
message(STATUS "Using Parquet: ${ARROW_LIBRARY}:${ARROW_INCLUDE_DIR} ; ${PARQUET_LIBRARY}:${PARQUET_INCLUDE_DIR} ; ${THRIFT_LIBRARY} ; ${FLATBUFFERS_LIBRARY}")
else()
message(STATUS "Building without Parquet support")
endif()
......@@ -159,6 +159,8 @@ if (USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
set (ARROW_PARQUET ON CACHE INTERNAL "")
set (ARROW_VERBOSE_THIRDPARTY_BUILD ON CACHE INTERNAL "")
set (ARROW_BUILD_SHARED 1 CACHE INTERNAL "")
set (ARROW_BUILD_UTILITIES OFF CACHE INTERNAL "")
set (ARROW_BUILD_INTEGRATION OFF CACHE INTERNAL "")
set (ARROW_BOOST_HEADER_ONLY ON CACHE INTERNAL "")
set (Boost_FOUND 1 CACHE INTERNAL "")
if (MAKE_STATIC_LIBRARIES)
......
Subproject commit 87ac6fddaf21d0b4ee8b8090533ff293db0da1b4
Subproject commit b789226ccb2124285792107c758bb3b40b3d082a
此差异已折叠。
// automatically generated by the FlatBuffers compiler, do not modify
#ifndef FLATBUFFERS_GENERATED_FILE_ORG_APACHE_ARROW_FLATBUF_H_
#define FLATBUFFERS_GENERATED_FILE_ORG_APACHE_ARROW_FLATBUF_H_
#include "flatbuffers/flatbuffers.h"
#include "Schema_generated.h"
namespace org {
namespace apache {
namespace arrow {
namespace flatbuf {
struct Footer;
struct Block;
FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) Block FLATBUFFERS_FINAL_CLASS {
private:
int64_t offset_;
int32_t metaDataLength_;
int32_t padding0__;
int64_t bodyLength_;
public:
Block() {
memset(static_cast<void *>(this), 0, sizeof(Block));
}
Block(int64_t _offset, int32_t _metaDataLength, int64_t _bodyLength)
: offset_(flatbuffers::EndianScalar(_offset)),
metaDataLength_(flatbuffers::EndianScalar(_metaDataLength)),
padding0__(0),
bodyLength_(flatbuffers::EndianScalar(_bodyLength)) {
(void)padding0__;
}
/// Index to the start of the RecordBlock (note this is past the Message header)
int64_t offset() const {
return flatbuffers::EndianScalar(offset_);
}
/// Length of the metadata
int32_t metaDataLength() const {
return flatbuffers::EndianScalar(metaDataLength_);
}
/// Length of the data (this is aligned so there can be a gap between this and
/// the metatdata).
int64_t bodyLength() const {
return flatbuffers::EndianScalar(bodyLength_);
}
};
FLATBUFFERS_STRUCT_END(Block, 24);
/// ----------------------------------------------------------------------
/// Arrow File metadata
///
struct Footer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_VERSION = 4,
VT_SCHEMA = 6,
VT_DICTIONARIES = 8,
VT_RECORDBATCHES = 10
};
MetadataVersion version() const {
return static_cast<MetadataVersion>(GetField<int16_t>(VT_VERSION, 0));
}
const Schema *schema() const {
return GetPointer<const Schema *>(VT_SCHEMA);
}
const flatbuffers::Vector<const Block *> *dictionaries() const {
return GetPointer<const flatbuffers::Vector<const Block *> *>(VT_DICTIONARIES);
}
const flatbuffers::Vector<const Block *> *recordBatches() const {
return GetPointer<const flatbuffers::Vector<const Block *> *>(VT_RECORDBATCHES);
}
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<int16_t>(verifier, VT_VERSION) &&
VerifyOffset(verifier, VT_SCHEMA) &&
verifier.VerifyTable(schema()) &&
VerifyOffset(verifier, VT_DICTIONARIES) &&
verifier.VerifyVector(dictionaries()) &&
VerifyOffset(verifier, VT_RECORDBATCHES) &&
verifier.VerifyVector(recordBatches()) &&
verifier.EndTable();
}
};
struct FooterBuilder {
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_version(MetadataVersion version) {
fbb_.AddElement<int16_t>(Footer::VT_VERSION, static_cast<int16_t>(version), 0);
}
void add_schema(flatbuffers::Offset<Schema> schema) {
fbb_.AddOffset(Footer::VT_SCHEMA, schema);
}
void add_dictionaries(flatbuffers::Offset<flatbuffers::Vector<const Block *>> dictionaries) {
fbb_.AddOffset(Footer::VT_DICTIONARIES, dictionaries);
}
void add_recordBatches(flatbuffers::Offset<flatbuffers::Vector<const Block *>> recordBatches) {
fbb_.AddOffset(Footer::VT_RECORDBATCHES, recordBatches);
}
explicit FooterBuilder(flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
}
FooterBuilder &operator=(const FooterBuilder &);
flatbuffers::Offset<Footer> Finish() {
const auto end = fbb_.EndTable(start_);
auto o = flatbuffers::Offset<Footer>(end);
return o;
}
};
inline flatbuffers::Offset<Footer> CreateFooter(
flatbuffers::FlatBufferBuilder &_fbb,
MetadataVersion version = MetadataVersion_V1,
flatbuffers::Offset<Schema> schema = 0,
flatbuffers::Offset<flatbuffers::Vector<const Block *>> dictionaries = 0,
flatbuffers::Offset<flatbuffers::Vector<const Block *>> recordBatches = 0) {
FooterBuilder builder_(_fbb);
builder_.add_recordBatches(recordBatches);
builder_.add_dictionaries(dictionaries);
builder_.add_schema(schema);
builder_.add_version(version);
return builder_.Finish();
}
inline flatbuffers::Offset<Footer> CreateFooterDirect(
flatbuffers::FlatBufferBuilder &_fbb,
MetadataVersion version = MetadataVersion_V1,
flatbuffers::Offset<Schema> schema = 0,
const std::vector<Block> *dictionaries = nullptr,
const std::vector<Block> *recordBatches = nullptr) {
auto dictionaries__ = dictionaries ? _fbb.CreateVectorOfStructs<Block>(*dictionaries) : 0;
auto recordBatches__ = recordBatches ? _fbb.CreateVectorOfStructs<Block>(*recordBatches) : 0;
return org::apache::arrow::flatbuf::CreateFooter(
_fbb,
version,
schema,
dictionaries__,
recordBatches__);
}
inline const org::apache::arrow::flatbuf::Footer *GetFooter(const void *buf) {
return flatbuffers::GetRoot<org::apache::arrow::flatbuf::Footer>(buf);
}
inline const org::apache::arrow::flatbuf::Footer *GetSizePrefixedFooter(const void *buf) {
return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::Footer>(buf);
}
inline bool VerifyFooterBuffer(
flatbuffers::Verifier &verifier) {
return verifier.VerifyBuffer<org::apache::arrow::flatbuf::Footer>(nullptr);
}
inline bool VerifySizePrefixedFooterBuffer(
flatbuffers::Verifier &verifier) {
return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::Footer>(nullptr);
}
inline void FinishFooterBuffer(
flatbuffers::FlatBufferBuilder &fbb,
flatbuffers::Offset<org::apache::arrow::flatbuf::Footer> root) {
fbb.Finish(root);
}
inline void FinishSizePrefixedFooterBuffer(
flatbuffers::FlatBufferBuilder &fbb,
flatbuffers::Offset<org::apache::arrow::flatbuf::Footer> root) {
fbb.FinishSizePrefixed(root);
}
} // namespace flatbuf
} // namespace arrow
} // namespace apache
} // namespace org
#endif // FLATBUFFERS_GENERATED_FILE_ORG_APACHE_ARROW_FLATBUF_H_
// automatically generated by the FlatBuffers compiler, do not modify
#ifndef FLATBUFFERS_GENERATED_MESSAGE_ORG_APACHE_ARROW_FLATBUF_H_
#define FLATBUFFERS_GENERATED_MESSAGE_ORG_APACHE_ARROW_FLATBUF_H_
#include "flatbuffers/flatbuffers.h"
#include "Schema_generated.h"
#include "SparseTensor_generated.h"
#include "Tensor_generated.h"
namespace org {
namespace apache {
namespace arrow {
namespace flatbuf {
struct FieldNode;
struct RecordBatch;
struct DictionaryBatch;
struct Message;
/// ----------------------------------------------------------------------
/// The root Message type
/// This union enables us to easily send different message types without
/// redundant storage, and in the future we can easily add new message types.
///
/// Arrow implementations do not need to implement all of the message types,
/// which may include experimental metadata types. For maximum compatibility,
/// it is best to send data using RecordBatch
enum MessageHeader {
MessageHeader_NONE = 0,
MessageHeader_Schema = 1,
MessageHeader_DictionaryBatch = 2,
MessageHeader_RecordBatch = 3,
MessageHeader_Tensor = 4,
MessageHeader_SparseTensor = 5,
MessageHeader_MIN = MessageHeader_NONE,
MessageHeader_MAX = MessageHeader_SparseTensor
};
inline const MessageHeader (&EnumValuesMessageHeader())[6] {
static const MessageHeader values[] = {
MessageHeader_NONE,
MessageHeader_Schema,
MessageHeader_DictionaryBatch,
MessageHeader_RecordBatch,
MessageHeader_Tensor,
MessageHeader_SparseTensor
};
return values;
}
inline const char * const *EnumNamesMessageHeader() {
static const char * const names[] = {
"NONE",
"Schema",
"DictionaryBatch",
"RecordBatch",
"Tensor",
"SparseTensor",
nullptr
};
return names;
}
inline const char *EnumNameMessageHeader(MessageHeader e) {
if (e < MessageHeader_NONE || e > MessageHeader_SparseTensor) return "";
const size_t index = static_cast<size_t>(e);
return EnumNamesMessageHeader()[index];
}
template<typename T> struct MessageHeaderTraits {
static const MessageHeader enum_value = MessageHeader_NONE;
};
template<> struct MessageHeaderTraits<Schema> {
static const MessageHeader enum_value = MessageHeader_Schema;
};
template<> struct MessageHeaderTraits<DictionaryBatch> {
static const MessageHeader enum_value = MessageHeader_DictionaryBatch;
};
template<> struct MessageHeaderTraits<RecordBatch> {
static const MessageHeader enum_value = MessageHeader_RecordBatch;
};
template<> struct MessageHeaderTraits<Tensor> {
static const MessageHeader enum_value = MessageHeader_Tensor;
};
template<> struct MessageHeaderTraits<SparseTensor> {
static const MessageHeader enum_value = MessageHeader_SparseTensor;
};
bool VerifyMessageHeader(flatbuffers::Verifier &verifier, const void *obj, MessageHeader type);
bool VerifyMessageHeaderVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
/// ----------------------------------------------------------------------
/// Data structures for describing a table row batch (a collection of
/// equal-length Arrow arrays)
/// Metadata about a field at some level of a nested type tree (but not
/// its children).
///
/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
/// null_count: 0} for its Int16 node, as separate FieldNode structs
FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) FieldNode FLATBUFFERS_FINAL_CLASS {
private:
int64_t length_;
int64_t null_count_;
public:
FieldNode() {
memset(static_cast<void *>(this), 0, sizeof(FieldNode));
}
FieldNode(int64_t _length, int64_t _null_count)
: length_(flatbuffers::EndianScalar(_length)),
null_count_(flatbuffers::EndianScalar(_null_count)) {
}
/// The number of value slots in the Arrow array at this level of a nested
/// tree
int64_t length() const {
return flatbuffers::EndianScalar(length_);
}
/// The number of observed nulls. Fields with null_count == 0 may choose not
/// to write their physical validity bitmap out as a materialized buffer,
/// instead setting the length of the bitmap buffer to 0.
int64_t null_count() const {
return flatbuffers::EndianScalar(null_count_);
}
};
FLATBUFFERS_STRUCT_END(FieldNode, 16);
/// A data header describing the shared memory layout of a "record" or "row"
/// batch. Some systems call this a "row batch" internally and others a "record
/// batch".
struct RecordBatch FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_LENGTH = 4,
VT_NODES = 6,
VT_BUFFERS = 8
};
/// number of records / rows. The arrays in the batch should all have this
/// length
int64_t length() const {
return GetField<int64_t>(VT_LENGTH, 0);
}
/// Nodes correspond to the pre-ordered flattened logical schema
const flatbuffers::Vector<const FieldNode *> *nodes() const {
return GetPointer<const flatbuffers::Vector<const FieldNode *> *>(VT_NODES);
}
/// Buffers correspond to the pre-ordered flattened buffer tree
///
/// The number of buffers appended to this list depends on the schema. For
/// example, most primitive arrays will have 2 buffers, 1 for the validity
/// bitmap and 1 for the values. For struct arrays, there will only be a
/// single buffer for the validity (nulls) bitmap
const flatbuffers::Vector<const Buffer *> *buffers() const {
return GetPointer<const flatbuffers::Vector<const Buffer *> *>(VT_BUFFERS);
}
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<int64_t>(verifier, VT_LENGTH) &&
VerifyOffset(verifier, VT_NODES) &&
verifier.VerifyVector(nodes()) &&
VerifyOffset(verifier, VT_BUFFERS) &&
verifier.VerifyVector(buffers()) &&
verifier.EndTable();
}
};
struct RecordBatchBuilder {
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_length(int64_t length) {
fbb_.AddElement<int64_t>(RecordBatch::VT_LENGTH, length, 0);
}
void add_nodes(flatbuffers::Offset<flatbuffers::Vector<const FieldNode *>> nodes) {
fbb_.AddOffset(RecordBatch::VT_NODES, nodes);
}
void add_buffers(flatbuffers::Offset<flatbuffers::Vector<const Buffer *>> buffers) {
fbb_.AddOffset(RecordBatch::VT_BUFFERS, buffers);
}
explicit RecordBatchBuilder(flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
}
RecordBatchBuilder &operator=(const RecordBatchBuilder &);
flatbuffers::Offset<RecordBatch> Finish() {
const auto end = fbb_.EndTable(start_);
auto o = flatbuffers::Offset<RecordBatch>(end);
return o;
}
};
inline flatbuffers::Offset<RecordBatch> CreateRecordBatch(
flatbuffers::FlatBufferBuilder &_fbb,
int64_t length = 0,
flatbuffers::Offset<flatbuffers::Vector<const FieldNode *>> nodes = 0,
flatbuffers::Offset<flatbuffers::Vector<const Buffer *>> buffers = 0) {
RecordBatchBuilder builder_(_fbb);
builder_.add_length(length);
builder_.add_buffers(buffers);
builder_.add_nodes(nodes);
return builder_.Finish();
}
inline flatbuffers::Offset<RecordBatch> CreateRecordBatchDirect(
flatbuffers::FlatBufferBuilder &_fbb,
int64_t length = 0,
const std::vector<FieldNode> *nodes = nullptr,
const std::vector<Buffer> *buffers = nullptr) {
auto nodes__ = nodes ? _fbb.CreateVectorOfStructs<FieldNode>(*nodes) : 0;
auto buffers__ = buffers ? _fbb.CreateVectorOfStructs<Buffer>(*buffers) : 0;
return org::apache::arrow::flatbuf::CreateRecordBatch(
_fbb,
length,
nodes__,
buffers__);
}
/// For sending dictionary encoding information. Any Field can be
/// dictionary-encoded, but in this case none of its children may be
/// dictionary-encoded.
/// There is one vector / column per dictionary, but that vector / column
/// may be spread across multiple dictionary batches by using the isDelta
/// flag
struct DictionaryBatch FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_ID = 4,
VT_DATA = 6,
VT_ISDELTA = 8
};
int64_t id() const {
return GetField<int64_t>(VT_ID, 0);
}
const RecordBatch *data() const {
return GetPointer<const RecordBatch *>(VT_DATA);
}
/// If isDelta is true the values in the dictionary are to be appended to a
/// dictionary with the indicated id
bool isDelta() const {
return GetField<uint8_t>(VT_ISDELTA, 0) != 0;
}
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<int64_t>(verifier, VT_ID) &&
VerifyOffset(verifier, VT_DATA) &&
verifier.VerifyTable(data()) &&
VerifyField<uint8_t>(verifier, VT_ISDELTA) &&
verifier.EndTable();
}
};
struct DictionaryBatchBuilder {
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_id(int64_t id) {
fbb_.AddElement<int64_t>(DictionaryBatch::VT_ID, id, 0);
}
void add_data(flatbuffers::Offset<RecordBatch> data) {
fbb_.AddOffset(DictionaryBatch::VT_DATA, data);
}
void add_isDelta(bool isDelta) {
fbb_.AddElement<uint8_t>(DictionaryBatch::VT_ISDELTA, static_cast<uint8_t>(isDelta), 0);
}
explicit DictionaryBatchBuilder(flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
}
DictionaryBatchBuilder &operator=(const DictionaryBatchBuilder &);
flatbuffers::Offset<DictionaryBatch> Finish() {
const auto end = fbb_.EndTable(start_);
auto o = flatbuffers::Offset<DictionaryBatch>(end);
return o;
}
};
inline flatbuffers::Offset<DictionaryBatch> CreateDictionaryBatch(
flatbuffers::FlatBufferBuilder &_fbb,
int64_t id = 0,
flatbuffers::Offset<RecordBatch> data = 0,
bool isDelta = false) {
DictionaryBatchBuilder builder_(_fbb);
builder_.add_id(id);
builder_.add_data(data);
builder_.add_isDelta(isDelta);
return builder_.Finish();
}
struct Message FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_VERSION = 4,
VT_HEADER_TYPE = 6,
VT_HEADER = 8,
VT_BODYLENGTH = 10,
VT_CUSTOM_METADATA = 12
};
MetadataVersion version() const {
return static_cast<MetadataVersion>(GetField<int16_t>(VT_VERSION, 0));
}
MessageHeader header_type() const {
return static_cast<MessageHeader>(GetField<uint8_t>(VT_HEADER_TYPE, 0));
}
const void *header() const {
return GetPointer<const void *>(VT_HEADER);
}
template<typename T> const T *header_as() const;
const Schema *header_as_Schema() const {
return header_type() == MessageHeader_Schema ? static_cast<const Schema *>(header()) : nullptr;
}
const DictionaryBatch *header_as_DictionaryBatch() const {
return header_type() == MessageHeader_DictionaryBatch ? static_cast<const DictionaryBatch *>(header()) : nullptr;
}
const RecordBatch *header_as_RecordBatch() const {
return header_type() == MessageHeader_RecordBatch ? static_cast<const RecordBatch *>(header()) : nullptr;
}
const Tensor *header_as_Tensor() const {
return header_type() == MessageHeader_Tensor ? static_cast<const Tensor *>(header()) : nullptr;
}
const SparseTensor *header_as_SparseTensor() const {
return header_type() == MessageHeader_SparseTensor ? static_cast<const SparseTensor *>(header()) : nullptr;
}
int64_t bodyLength() const {
return GetField<int64_t>(VT_BODYLENGTH, 0);
}
const flatbuffers::Vector<flatbuffers::Offset<KeyValue>> *custom_metadata() const {
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<KeyValue>> *>(VT_CUSTOM_METADATA);
}
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<int16_t>(verifier, VT_VERSION) &&
VerifyField<uint8_t>(verifier, VT_HEADER_TYPE) &&
VerifyOffset(verifier, VT_HEADER) &&
VerifyMessageHeader(verifier, header(), header_type()) &&
VerifyField<int64_t>(verifier, VT_BODYLENGTH) &&
VerifyOffset(verifier, VT_CUSTOM_METADATA) &&
verifier.VerifyVector(custom_metadata()) &&
verifier.VerifyVectorOfTables(custom_metadata()) &&
verifier.EndTable();
}
};
template<> inline const Schema *Message::header_as<Schema>() const {
return header_as_Schema();
}
template<> inline const DictionaryBatch *Message::header_as<DictionaryBatch>() const {
return header_as_DictionaryBatch();
}
template<> inline const RecordBatch *Message::header_as<RecordBatch>() const {
return header_as_RecordBatch();
}
template<> inline const Tensor *Message::header_as<Tensor>() const {
return header_as_Tensor();
}
template<> inline const SparseTensor *Message::header_as<SparseTensor>() const {
return header_as_SparseTensor();
}
struct MessageBuilder {
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_version(MetadataVersion version) {
fbb_.AddElement<int16_t>(Message::VT_VERSION, static_cast<int16_t>(version), 0);
}
void add_header_type(MessageHeader header_type) {
fbb_.AddElement<uint8_t>(Message::VT_HEADER_TYPE, static_cast<uint8_t>(header_type), 0);
}
void add_header(flatbuffers::Offset<void> header) {
fbb_.AddOffset(Message::VT_HEADER, header);
}
void add_bodyLength(int64_t bodyLength) {
fbb_.AddElement<int64_t>(Message::VT_BODYLENGTH, bodyLength, 0);
}
void add_custom_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<KeyValue>>> custom_metadata) {
fbb_.AddOffset(Message::VT_CUSTOM_METADATA, custom_metadata);
}
explicit MessageBuilder(flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
}
MessageBuilder &operator=(const MessageBuilder &);
flatbuffers::Offset<Message> Finish() {
const auto end = fbb_.EndTable(start_);
auto o = flatbuffers::Offset<Message>(end);
return o;
}
};
inline flatbuffers::Offset<Message> CreateMessage(
flatbuffers::FlatBufferBuilder &_fbb,
MetadataVersion version = MetadataVersion_V1,
MessageHeader header_type = MessageHeader_NONE,
flatbuffers::Offset<void> header = 0,
int64_t bodyLength = 0,
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<KeyValue>>> custom_metadata = 0) {
MessageBuilder builder_(_fbb);
builder_.add_bodyLength(bodyLength);
builder_.add_custom_metadata(custom_metadata);
builder_.add_header(header);
builder_.add_version(version);
builder_.add_header_type(header_type);
return builder_.Finish();
}
inline flatbuffers::Offset<Message> CreateMessageDirect(
flatbuffers::FlatBufferBuilder &_fbb,
MetadataVersion version = MetadataVersion_V1,
MessageHeader header_type = MessageHeader_NONE,
flatbuffers::Offset<void> header = 0,
int64_t bodyLength = 0,
const std::vector<flatbuffers::Offset<KeyValue>> *custom_metadata = nullptr) {
auto custom_metadata__ = custom_metadata ? _fbb.CreateVector<flatbuffers::Offset<KeyValue>>(*custom_metadata) : 0;
return org::apache::arrow::flatbuf::CreateMessage(
_fbb,
version,
header_type,
header,
bodyLength,
custom_metadata__);
}
inline bool VerifyMessageHeader(flatbuffers::Verifier &verifier, const void *obj, MessageHeader type) {
switch (type) {
case MessageHeader_NONE: {
return true;
}
case MessageHeader_Schema: {
auto ptr = reinterpret_cast<const Schema *>(obj);
return verifier.VerifyTable(ptr);
}
case MessageHeader_DictionaryBatch: {
auto ptr = reinterpret_cast<const DictionaryBatch *>(obj);
return verifier.VerifyTable(ptr);
}
case MessageHeader_RecordBatch: {
auto ptr = reinterpret_cast<const RecordBatch *>(obj);
return verifier.VerifyTable(ptr);
}
case MessageHeader_Tensor: {
auto ptr = reinterpret_cast<const Tensor *>(obj);
return verifier.VerifyTable(ptr);
}
case MessageHeader_SparseTensor: {
auto ptr = reinterpret_cast<const SparseTensor *>(obj);
return verifier.VerifyTable(ptr);
}
default: return false;
}
}
inline bool VerifyMessageHeaderVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
if (!values || !types) return !values && !types;
if (values->size() != types->size()) return false;
for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
if (!VerifyMessageHeader(
verifier, values->Get(i), types->GetEnum<MessageHeader>(i))) {
return false;
}
}
return true;
}
inline const org::apache::arrow::flatbuf::Message *GetMessage(const void *buf) {
return flatbuffers::GetRoot<org::apache::arrow::flatbuf::Message>(buf);
}
inline const org::apache::arrow::flatbuf::Message *GetSizePrefixedMessage(const void *buf) {
return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::Message>(buf);
}
inline bool VerifyMessageBuffer(
flatbuffers::Verifier &verifier) {
return verifier.VerifyBuffer<org::apache::arrow::flatbuf::Message>(nullptr);
}
inline bool VerifySizePrefixedMessageBuffer(
flatbuffers::Verifier &verifier) {
return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::Message>(nullptr);
}
inline void FinishMessageBuffer(
flatbuffers::FlatBufferBuilder &fbb,
flatbuffers::Offset<org::apache::arrow::flatbuf::Message> root) {
fbb.Finish(root);
}
inline void FinishSizePrefixedMessageBuffer(
flatbuffers::FlatBufferBuilder &fbb,
flatbuffers::Offset<org::apache::arrow::flatbuf::Message> root) {
fbb.FinishSizePrefixed(root);
}
} // namespace flatbuf
} // namespace arrow
} // namespace apache
} // namespace org
#endif // FLATBUFFERS_GENERATED_MESSAGE_ORG_APACHE_ARROW_FLATBUF_H_
// automatically generated by the FlatBuffers compiler, do not modify
#ifndef FLATBUFFERS_GENERATED_TENSOR_ORG_APACHE_ARROW_FLATBUF_H_
#define FLATBUFFERS_GENERATED_TENSOR_ORG_APACHE_ARROW_FLATBUF_H_
#include "flatbuffers/flatbuffers.h"
#include "Schema_generated.h"
namespace org {
namespace apache {
namespace arrow {
namespace flatbuf {
struct TensorDim;
struct Tensor;
/// ----------------------------------------------------------------------
/// Data structures for dense tensors
/// Shape data for a single axis in a tensor
struct TensorDim FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_SIZE = 4,
VT_NAME = 6
};
/// Length of dimension
int64_t size() const {
return GetField<int64_t>(VT_SIZE, 0);
}
/// Name of the dimension, optional
const flatbuffers::String *name() const {
return GetPointer<const flatbuffers::String *>(VT_NAME);
}
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<int64_t>(verifier, VT_SIZE) &&
VerifyOffset(verifier, VT_NAME) &&
verifier.VerifyString(name()) &&
verifier.EndTable();
}
};
struct TensorDimBuilder {
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_size(int64_t size) {
fbb_.AddElement<int64_t>(TensorDim::VT_SIZE, size, 0);
}
void add_name(flatbuffers::Offset<flatbuffers::String> name) {
fbb_.AddOffset(TensorDim::VT_NAME, name);
}
explicit TensorDimBuilder(flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
}
TensorDimBuilder &operator=(const TensorDimBuilder &);
flatbuffers::Offset<TensorDim> Finish() {
const auto end = fbb_.EndTable(start_);
auto o = flatbuffers::Offset<TensorDim>(end);
return o;
}
};
inline flatbuffers::Offset<TensorDim> CreateTensorDim(
flatbuffers::FlatBufferBuilder &_fbb,
int64_t size = 0,
flatbuffers::Offset<flatbuffers::String> name = 0) {
TensorDimBuilder builder_(_fbb);
builder_.add_size(size);
builder_.add_name(name);
return builder_.Finish();
}
inline flatbuffers::Offset<TensorDim> CreateTensorDimDirect(
flatbuffers::FlatBufferBuilder &_fbb,
int64_t size = 0,
const char *name = nullptr) {
auto name__ = name ? _fbb.CreateString(name) : 0;
return org::apache::arrow::flatbuf::CreateTensorDim(
_fbb,
size,
name__);
}
struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_TYPE_TYPE = 4,
VT_TYPE = 6,
VT_SHAPE = 8,
VT_STRIDES = 10,
VT_DATA = 12
};
Type type_type() const {
return static_cast<Type>(GetField<uint8_t>(VT_TYPE_TYPE, 0));
}
/// The type of data contained in a value cell. Currently only fixed-width
/// value types are supported, no strings or nested types
const void *type() const {
return GetPointer<const void *>(VT_TYPE);
}
template<typename T> const T *type_as() const;
const Null *type_as_Null() const {
return type_type() == Type_Null ? static_cast<const Null *>(type()) : nullptr;
}
const Int *type_as_Int() const {
return type_type() == Type_Int ? static_cast<const Int *>(type()) : nullptr;
}
const FloatingPoint *type_as_FloatingPoint() const {
return type_type() == Type_FloatingPoint ? static_cast<const FloatingPoint *>(type()) : nullptr;
}
const Binary *type_as_Binary() const {
return type_type() == Type_Binary ? static_cast<const Binary *>(type()) : nullptr;
}
const Utf8 *type_as_Utf8() const {
return type_type() == Type_Utf8 ? static_cast<const Utf8 *>(type()) : nullptr;
}
const Bool *type_as_Bool() const {
return type_type() == Type_Bool ? static_cast<const Bool *>(type()) : nullptr;
}
const Decimal *type_as_Decimal() const {
return type_type() == Type_Decimal ? static_cast<const Decimal *>(type()) : nullptr;
}
const Date *type_as_Date() const {
return type_type() == Type_Date ? static_cast<const Date *>(type()) : nullptr;
}
const Time *type_as_Time() const {
return type_type() == Type_Time ? static_cast<const Time *>(type()) : nullptr;
}
const Timestamp *type_as_Timestamp() const {
return type_type() == Type_Timestamp ? static_cast<const Timestamp *>(type()) : nullptr;
}
const Interval *type_as_Interval() const {
return type_type() == Type_Interval ? static_cast<const Interval *>(type()) : nullptr;
}
const List *type_as_List() const {
return type_type() == Type_List ? static_cast<const List *>(type()) : nullptr;
}
const Struct_ *type_as_Struct_() const {
return type_type() == Type_Struct_ ? static_cast<const Struct_ *>(type()) : nullptr;
}
const Union *type_as_Union() const {
return type_type() == Type_Union ? static_cast<const Union *>(type()) : nullptr;
}
const FixedSizeBinary *type_as_FixedSizeBinary() const {
return type_type() == Type_FixedSizeBinary ? static_cast<const FixedSizeBinary *>(type()) : nullptr;
}
const FixedSizeList *type_as_FixedSizeList() const {
return type_type() == Type_FixedSizeList ? static_cast<const FixedSizeList *>(type()) : nullptr;
}
const Map *type_as_Map() const {
return type_type() == Type_Map ? static_cast<const Map *>(type()) : nullptr;
}
const Duration *type_as_Duration() const {
return type_type() == Type_Duration ? static_cast<const Duration *>(type()) : nullptr;
}
const LargeBinary *type_as_LargeBinary() const {
return type_type() == Type_LargeBinary ? static_cast<const LargeBinary *>(type()) : nullptr;
}
const LargeUtf8 *type_as_LargeUtf8() const {
return type_type() == Type_LargeUtf8 ? static_cast<const LargeUtf8 *>(type()) : nullptr;
}
const LargeList *type_as_LargeList() const {
return type_type() == Type_LargeList ? static_cast<const LargeList *>(type()) : nullptr;
}
/// The dimensions of the tensor, optionally named
const flatbuffers::Vector<flatbuffers::Offset<TensorDim>> *shape() const {
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<TensorDim>> *>(VT_SHAPE);
}
/// Non-negative byte offsets to advance one value cell along each dimension
const flatbuffers::Vector<int64_t> *strides() const {
return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_STRIDES);
}
/// The location and size of the tensor's data
const Buffer *data() const {
return GetStruct<const Buffer *>(VT_DATA);
}
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<uint8_t>(verifier, VT_TYPE_TYPE) &&
VerifyOffset(verifier, VT_TYPE) &&
VerifyType(verifier, type(), type_type()) &&
VerifyOffset(verifier, VT_SHAPE) &&
verifier.VerifyVector(shape()) &&
verifier.VerifyVectorOfTables(shape()) &&
VerifyOffset(verifier, VT_STRIDES) &&
verifier.VerifyVector(strides()) &&
VerifyField<Buffer>(verifier, VT_DATA) &&
verifier.EndTable();
}
};
template<> inline const Null *Tensor::type_as<Null>() const {
return type_as_Null();
}
template<> inline const Int *Tensor::type_as<Int>() const {
return type_as_Int();
}
template<> inline const FloatingPoint *Tensor::type_as<FloatingPoint>() const {
return type_as_FloatingPoint();
}
template<> inline const Binary *Tensor::type_as<Binary>() const {
return type_as_Binary();
}
template<> inline const Utf8 *Tensor::type_as<Utf8>() const {
return type_as_Utf8();
}
template<> inline const Bool *Tensor::type_as<Bool>() const {
return type_as_Bool();
}
template<> inline const Decimal *Tensor::type_as<Decimal>() const {
return type_as_Decimal();
}
template<> inline const Date *Tensor::type_as<Date>() const {
return type_as_Date();
}
template<> inline const Time *Tensor::type_as<Time>() const {
return type_as_Time();
}
template<> inline const Timestamp *Tensor::type_as<Timestamp>() const {
return type_as_Timestamp();
}
template<> inline const Interval *Tensor::type_as<Interval>() const {
return type_as_Interval();
}
template<> inline const List *Tensor::type_as<List>() const {
return type_as_List();
}
template<> inline const Struct_ *Tensor::type_as<Struct_>() const {
return type_as_Struct_();
}
template<> inline const Union *Tensor::type_as<Union>() const {
return type_as_Union();
}
template<> inline const FixedSizeBinary *Tensor::type_as<FixedSizeBinary>() const {
return type_as_FixedSizeBinary();
}
template<> inline const FixedSizeList *Tensor::type_as<FixedSizeList>() const {
return type_as_FixedSizeList();
}
template<> inline const Map *Tensor::type_as<Map>() const {
return type_as_Map();
}
template<> inline const Duration *Tensor::type_as<Duration>() const {
return type_as_Duration();
}
template<> inline const LargeBinary *Tensor::type_as<LargeBinary>() const {
return type_as_LargeBinary();
}
template<> inline const LargeUtf8 *Tensor::type_as<LargeUtf8>() const {
return type_as_LargeUtf8();
}
template<> inline const LargeList *Tensor::type_as<LargeList>() const {
return type_as_LargeList();
}
struct TensorBuilder {
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_type_type(Type type_type) {
fbb_.AddElement<uint8_t>(Tensor::VT_TYPE_TYPE, static_cast<uint8_t>(type_type), 0);
}
void add_type(flatbuffers::Offset<void> type) {
fbb_.AddOffset(Tensor::VT_TYPE, type);
}
void add_shape(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<TensorDim>>> shape) {
fbb_.AddOffset(Tensor::VT_SHAPE, shape);
}
void add_strides(flatbuffers::Offset<flatbuffers::Vector<int64_t>> strides) {
fbb_.AddOffset(Tensor::VT_STRIDES, strides);
}
void add_data(const Buffer *data) {
fbb_.AddStruct(Tensor::VT_DATA, data);
}
explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
}
TensorBuilder &operator=(const TensorBuilder &);
flatbuffers::Offset<Tensor> Finish() {
const auto end = fbb_.EndTable(start_);
auto o = flatbuffers::Offset<Tensor>(end);
return o;
}
};
inline flatbuffers::Offset<Tensor> CreateTensor(
flatbuffers::FlatBufferBuilder &_fbb,
Type type_type = Type_NONE,
flatbuffers::Offset<void> type = 0,
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<TensorDim>>> shape = 0,
flatbuffers::Offset<flatbuffers::Vector<int64_t>> strides = 0,
const Buffer *data = 0) {
TensorBuilder builder_(_fbb);
builder_.add_data(data);
builder_.add_strides(strides);
builder_.add_shape(shape);
builder_.add_type(type);
builder_.add_type_type(type_type);
return builder_.Finish();
}
inline flatbuffers::Offset<Tensor> CreateTensorDirect(
flatbuffers::FlatBufferBuilder &_fbb,
Type type_type = Type_NONE,
flatbuffers::Offset<void> type = 0,
const std::vector<flatbuffers::Offset<TensorDim>> *shape = nullptr,
const std::vector<int64_t> *strides = nullptr,
const Buffer *data = 0) {
auto shape__ = shape ? _fbb.CreateVector<flatbuffers::Offset<TensorDim>>(*shape) : 0;
auto strides__ = strides ? _fbb.CreateVector<int64_t>(*strides) : 0;
return org::apache::arrow::flatbuf::CreateTensor(
_fbb,
type_type,
type,
shape__,
strides__,
data);
}
inline const org::apache::arrow::flatbuf::Tensor *GetTensor(const void *buf) {
return flatbuffers::GetRoot<org::apache::arrow::flatbuf::Tensor>(buf);
}
inline const org::apache::arrow::flatbuf::Tensor *GetSizePrefixedTensor(const void *buf) {
return flatbuffers::GetSizePrefixedRoot<org::apache::arrow::flatbuf::Tensor>(buf);
}
inline bool VerifyTensorBuffer(
flatbuffers::Verifier &verifier) {
return verifier.VerifyBuffer<org::apache::arrow::flatbuf::Tensor>(nullptr);
}
inline bool VerifySizePrefixedTensorBuffer(
flatbuffers::Verifier &verifier) {
return verifier.VerifySizePrefixedBuffer<org::apache::arrow::flatbuf::Tensor>(nullptr);
}
inline void FinishTensorBuffer(
flatbuffers::FlatBufferBuilder &fbb,
flatbuffers::Offset<org::apache::arrow::flatbuf::Tensor> root) {
fbb.Finish(root);
}
inline void FinishSizePrefixedTensorBuffer(
flatbuffers::FlatBufferBuilder &fbb,
flatbuffers::Offset<org::apache::arrow::flatbuf::Tensor> root) {
fbb.FinishSizePrefixed(root);
}
} // namespace flatbuf
} // namespace arrow
} // namespace apache
} // namespace org
#endif // FLATBUFFERS_GENERATED_TENSOR_ORG_APACHE_ARROW_FLATBUF_H_
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#define ARROW_VERSION_MAJOR
#define ARROW_VERSION_MINOR
#define ARROW_VERSION_PATCH
#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH
/* #undef DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY */
/* #undef GRPCPP_PP_INCLUDE */
/**
* Autogenerated by Thrift Compiler (0.11.0)
* Autogenerated by Thrift Compiler (0.12.0)
*
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
* @generated
......
/**
* Autogenerated by Thrift Compiler (0.11.0)
* Autogenerated by Thrift Compiler (0.12.0)
*
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
* @generated
......
Subproject commit bf9eb67ab9371755c6bcece13cadc7693bcbf264
......@@ -45,9 +45,11 @@ namespace DB
buffer = std::make_unique<arrow::Buffer>(file_data);
// TODO: maybe use parquet::RandomAccessSource?
auto reader = parquet::ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(*buffer));
file_reader = std::make_unique<parquet::arrow::FileReader>(::arrow::default_memory_pool(),
std::move(reader));
auto status = parquet::arrow::FileReader::Make(
::arrow::default_memory_pool(),
parquet::ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(*buffer)),
&file_reader);
row_group_total = file_reader->num_row_groups();
row_group_current = 0;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册