diff --git a/.gitmodules b/.gitmodules index 7d975031c5433567c184b5f04cf4f8bc6245c46d..6075b7e924327d99cc7703b45e2bfbf0d6dd7d76 100644 --- a/.gitmodules +++ b/.gitmodules @@ -128,6 +128,9 @@ [submodule "contrib/icu"] path = contrib/icu url = https://github.com/unicode-org/icu.git +[submodule "contrib/flatbuffers"] + path = contrib/flatbuffers + url = https://github.com/google/flatbuffers.git [submodule "contrib/libc-headers"] path = contrib/libc-headers url = https://github.com/ClickHouse-Extras/libc-headers.git diff --git a/cmake/find/parquet.cmake b/cmake/find/parquet.cmake index d302fd64e1e876da84b24b329ef7a17ce8e2db10..6501a05d7bb38fbb48450a62f64841d7681542a1 100644 --- a/cmake/find/parquet.cmake +++ b/cmake/find/parquet.cmake @@ -54,10 +54,12 @@ elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD) endif() if(${USE_STATIC_LIBRARIES}) + set(FLATBUFFERS_LIBRARY flatbuffers) set(ARROW_LIBRARY arrow_static) set(PARQUET_LIBRARY parquet_static) set(THRIFT_LIBRARY thrift_static) else() + set(FLATBUFFERS_LIBRARY flatbuffers_shared) set(ARROW_LIBRARY arrow_shared) set(PARQUET_LIBRARY parquet_shared) if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE) @@ -74,7 +76,7 @@ endif() endif() if(USE_PARQUET) - message(STATUS "Using Parquet: ${ARROW_LIBRARY}:${ARROW_INCLUDE_DIR} ; ${PARQUET_LIBRARY}:${PARQUET_INCLUDE_DIR} ; ${THRIFT_LIBRARY}") + message(STATUS "Using Parquet: ${ARROW_LIBRARY}:${ARROW_INCLUDE_DIR} ; ${PARQUET_LIBRARY}:${PARQUET_INCLUDE_DIR} ; ${THRIFT_LIBRARY} ; ${FLATBUFFERS_LIBRARY}") else() message(STATUS "Building without Parquet support") endif() diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 415d3a88703d5b9370f7fe33ed8024e0a768c171..80ccd8cce585d2443758d124aefbe2ff1ee1c9f7 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -159,6 +159,8 @@ if (USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE) set (ARROW_PARQUET ON CACHE INTERNAL "") set (ARROW_VERBOSE_THIRDPARTY_BUILD ON CACHE INTERNAL "") set (ARROW_BUILD_SHARED 1 CACHE INTERNAL "") + set (ARROW_BUILD_UTILITIES OFF CACHE INTERNAL "") + set (ARROW_BUILD_INTEGRATION OFF CACHE INTERNAL "") set (ARROW_BOOST_HEADER_ONLY ON CACHE INTERNAL "") set (Boost_FOUND 1 CACHE INTERNAL "") if (MAKE_STATIC_LIBRARIES) diff --git a/contrib/arrow b/contrib/arrow index 87ac6fddaf21d0b4ee8b8090533ff293db0da1b4..b789226ccb2124285792107c758bb3b40b3d082a 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit 87ac6fddaf21d0b4ee8b8090533ff293db0da1b4 +Subproject commit b789226ccb2124285792107c758bb3b40b3d082a diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index 3ba24f49498d8f2cc7bd4c2f1e8de45bc7133393..ccf3021d143901b0cb1f27c8a6e5bfbccb1f1bd9 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -1,46 +1,48 @@ +include(ExternalProject) + # === thrift set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp) # contrib/thrift/lib/cpp/CMakeLists.txt set(thriftcpp_SOURCES - ${LIBRARY_DIR}/src/thrift/TApplicationException.cpp - ${LIBRARY_DIR}/src/thrift/TOutput.cpp - ${LIBRARY_DIR}/src/thrift/async/TAsyncChannel.cpp - ${LIBRARY_DIR}/src/thrift/async/TAsyncProtocolProcessor.cpp - ${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.h - ${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.cpp - ${LIBRARY_DIR}/src/thrift/concurrency/ThreadManager.cpp - ${LIBRARY_DIR}/src/thrift/concurrency/TimerManager.cpp - ${LIBRARY_DIR}/src/thrift/concurrency/Util.cpp - ${LIBRARY_DIR}/src/thrift/processor/PeekProcessor.cpp - ${LIBRARY_DIR}/src/thrift/protocol/TBase64Utils.cpp - ${LIBRARY_DIR}/src/thrift/protocol/TDebugProtocol.cpp - ${LIBRARY_DIR}/src/thrift/protocol/TJSONProtocol.cpp - ${LIBRARY_DIR}/src/thrift/protocol/TMultiplexedProtocol.cpp - ${LIBRARY_DIR}/src/thrift/protocol/TProtocol.cpp - ${LIBRARY_DIR}/src/thrift/transport/TTransportException.cpp - ${LIBRARY_DIR}/src/thrift/transport/TFDTransport.cpp - ${LIBRARY_DIR}/src/thrift/transport/TSimpleFileTransport.cpp - ${LIBRARY_DIR}/src/thrift/transport/THttpTransport.cpp - ${LIBRARY_DIR}/src/thrift/transport/THttpClient.cpp - ${LIBRARY_DIR}/src/thrift/transport/THttpServer.cpp - ${LIBRARY_DIR}/src/thrift/transport/TSocket.cpp - ${LIBRARY_DIR}/src/thrift/transport/TSocketPool.cpp - ${LIBRARY_DIR}/src/thrift/transport/TServerSocket.cpp - ${LIBRARY_DIR}/src/thrift/transport/TTransportUtils.cpp - ${LIBRARY_DIR}/src/thrift/transport/TBufferTransports.cpp - ${LIBRARY_DIR}/src/thrift/server/TConnectedClient.cpp - ${LIBRARY_DIR}/src/thrift/server/TServerFramework.cpp - ${LIBRARY_DIR}/src/thrift/server/TSimpleServer.cpp - ${LIBRARY_DIR}/src/thrift/server/TThreadPoolServer.cpp - ${LIBRARY_DIR}/src/thrift/server/TThreadedServer.cpp -) -set( thriftcpp_threads_SOURCES - ${LIBRARY_DIR}/src/thrift/concurrency/ThreadFactory.cpp - ${LIBRARY_DIR}/src/thrift/concurrency/Thread.cpp - ${LIBRARY_DIR}/src/thrift/concurrency/Monitor.cpp - ${LIBRARY_DIR}/src/thrift/concurrency/Mutex.cpp -) + ${LIBRARY_DIR}/src/thrift/TApplicationException.cpp + ${LIBRARY_DIR}/src/thrift/TOutput.cpp + ${LIBRARY_DIR}/src/thrift/async/TAsyncChannel.cpp + ${LIBRARY_DIR}/src/thrift/async/TAsyncProtocolProcessor.cpp + ${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.h + ${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.cpp + ${LIBRARY_DIR}/src/thrift/concurrency/ThreadManager.cpp + ${LIBRARY_DIR}/src/thrift/concurrency/TimerManager.cpp + ${LIBRARY_DIR}/src/thrift/concurrency/Util.cpp + ${LIBRARY_DIR}/src/thrift/processor/PeekProcessor.cpp + ${LIBRARY_DIR}/src/thrift/protocol/TBase64Utils.cpp + ${LIBRARY_DIR}/src/thrift/protocol/TDebugProtocol.cpp + ${LIBRARY_DIR}/src/thrift/protocol/TJSONProtocol.cpp + ${LIBRARY_DIR}/src/thrift/protocol/TMultiplexedProtocol.cpp + ${LIBRARY_DIR}/src/thrift/protocol/TProtocol.cpp + ${LIBRARY_DIR}/src/thrift/transport/TTransportException.cpp + ${LIBRARY_DIR}/src/thrift/transport/TFDTransport.cpp + ${LIBRARY_DIR}/src/thrift/transport/TSimpleFileTransport.cpp + ${LIBRARY_DIR}/src/thrift/transport/THttpTransport.cpp + ${LIBRARY_DIR}/src/thrift/transport/THttpClient.cpp + ${LIBRARY_DIR}/src/thrift/transport/THttpServer.cpp + ${LIBRARY_DIR}/src/thrift/transport/TSocket.cpp + ${LIBRARY_DIR}/src/thrift/transport/TSocketPool.cpp + ${LIBRARY_DIR}/src/thrift/transport/TServerSocket.cpp + ${LIBRARY_DIR}/src/thrift/transport/TTransportUtils.cpp + ${LIBRARY_DIR}/src/thrift/transport/TBufferTransports.cpp + ${LIBRARY_DIR}/src/thrift/server/TConnectedClient.cpp + ${LIBRARY_DIR}/src/thrift/server/TServerFramework.cpp + ${LIBRARY_DIR}/src/thrift/server/TSimpleServer.cpp + ${LIBRARY_DIR}/src/thrift/server/TThreadPoolServer.cpp + ${LIBRARY_DIR}/src/thrift/server/TThreadedServer.cpp + ) +set(thriftcpp_threads_SOURCES + ${LIBRARY_DIR}/src/thrift/concurrency/ThreadFactory.cpp + ${LIBRARY_DIR}/src/thrift/concurrency/Thread.cpp + ${LIBRARY_DIR}/src/thrift/concurrency/Monitor.cpp + ${LIBRARY_DIR}/src/thrift/concurrency/Mutex.cpp + ) add_library(${THRIFT_LIBRARY} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES}) set_target_properties(${THRIFT_LIBRARY} PROPERTIES CXX_STANDARD 14) # REMOVE after https://github.com/apache/thrift/pull/1641 target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src PRIVATE ${Boost_INCLUDE_DIRS}) @@ -70,22 +72,88 @@ add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc --cpp_out="${CMAKE_CURRENT_BINARY_DIR}" "${PROTO_DIR}/orc_proto.proto") + +# === flatbuffers + +############################################################## +# fbs - Step 1: build flatbuffers lib and flatc compiler +############################################################## +set(FLATBUFFERS_SRC_DIR ${ClickHouse_SOURCE_DIR}/contrib/flatbuffers) +set(FLATBUFFERS_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/flatbuffers) +set(FLATBUFFERS_INCLUDE_DIR ${FLATBUFFERS_SRC_DIR}/include) +set(FLATBUFFERS_COMPILER "${FLATBUFFERS_BINARY_DIR}/flatc") + +# set flatbuffers CMake options +if (${USE_STATIC_LIBRARIES}) + set(FLATBUFFERS_BUILD_FLATLIB ON CACHE BOOL "Enable the build of the flatbuffers library") + set(FLATBUFFERS_BUILD_SHAREDLIB OFF CACHE BOOL "Disable the build of the flatbuffers shared library") +else () + set(FLATBUFFERS_BUILD_SHAREDLIB ON CACHE BOOL "Enable the build of the flatbuffers shared library") + set(FLATBUFFERS_BUILD_FLATLIB OFF CACHE BOOL "Disable the build of the flatbuffers library") +endif () +set(FLATBUFFERS_BUILD_FLATC ON CACHE BOOL "Build flatbuffers compiler") +set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "Skip flatbuffers tests") + +add_subdirectory(${FLATBUFFERS_SRC_DIR} "${FLATBUFFERS_BINARY_DIR}") + +################################### +# fbs - Step 2: compile *.fbs files +################################### +set(ARROW_IPC_SRC_DIR ${ARROW_SRC_DIR}/arrow/ipc) +set(ARROW_FORMAT_SRC_DIR ${ARROW_SRC_DIR}/../../format) + +set(FLATBUFFERS_COMPILED_OUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/arrow/ipc) + +set(FBS_OUTPUT_FILES "${FLATBUFFERS_COMPILED_OUT_DIR}/File_generated.h" "${FLATBUFFERS_COMPILED_OUT_DIR}/Message_generated.h" + "${FLATBUFFERS_COMPILED_OUT_DIR}/feather_generated.h") + +set(FBS_SRC + ${ARROW_FORMAT_SRC_DIR}/Message.fbs + ${ARROW_FORMAT_SRC_DIR}/File.fbs + ${ARROW_FORMAT_SRC_DIR}/Schema.fbs + ${ARROW_FORMAT_SRC_DIR}/Tensor.fbs + ${ARROW_FORMAT_SRC_DIR}/SparseTensor.fbs + ${ARROW_IPC_SRC_DIR}/feather.fbs) + +foreach (FIL ${FBS_SRC}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + list(APPEND ABS_FBS_SRC ${ABS_FIL}) +endforeach () + +message(STATUS "FLATBUFFERS_LIBRARY: ${FLATBUFFERS_LIBRARY}, FLATBUFFERS_COMPILER: ${FLATBUFFERS_COMPILER}") +message(STATUS "FLATBUFFERS_COMPILED_OUT_DIR: ${FLATBUFFERS_COMPILED_OUT_DIR}") +message(STATUS "flatc: ${FLATBUFFERS_COMPILER} -c -o ${FLATBUFFERS_COMPILED_OUT_DIR}/ ${ABS_FBS_SRC}") + +add_custom_command(OUTPUT ${FBS_OUTPUT_FILES} + COMMAND ${FLATBUFFERS_COMPILER} + -c + -o + ${FLATBUFFERS_COMPILED_OUT_DIR}/ + ${ABS_FBS_SRC} + DEPENDS flatc ${ABS_FBS_SRC} + COMMENT "Running flatc compiler on ${ABS_FBS_SRC}" + VERBATIM) + +add_custom_target(metadata_fbs DEPENDS ${FBS_OUTPUT_FILES}) +add_dependencies(metadata_fbs flatc) + # arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features. # Apple Clang compiler failed to compile this code without specifying c++11 standard. # As result these compiler features detected as absent. In result it failed to compile orc itself. # In orc makefile there is code that sets flags, but arrow-cmake ignores these flags. if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") - set (CXX11_FLAGS "-std=c++0x") -endif() + set(CXX11_FLAGS "-std=c++0x") +endif () include(${ClickHouse_SOURCE_DIR}/contrib/orc/cmake_modules/CheckSourceCompiles.cmake) include(orc_check.cmake) -configure_file("${ORC_INCLUDE_DIR}/orc/orc-config.hh.in" "${ORC_BUILD_INCLUDE_DIR}/orc/orc-config.hh") +configure_file("${ORC_INCLUDE_DIR}/orc/orc-config.hh.in" "${ORC_BUILD_INCLUDE_DIR}/orc/orc-config.hh") configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/Adaptor.hh") set(ORC_SRCS ${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc + ${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc ${ORC_SOURCE_SRC_DIR}/Exceptions.cc ${ORC_SOURCE_SRC_DIR}/OrcFile.cc ${ORC_SOURCE_SRC_DIR}/Reader.cc @@ -119,126 +187,165 @@ set(ORC_SRCS # === arrow set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/arrow) + +configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/arrow/util/config.h") + # arrow/cpp/src/arrow/CMakeLists.txt set(ARROW_SRCS - ${LIBRARY_DIR}/array.cc - - ${LIBRARY_DIR}/builder.cc - ${LIBRARY_DIR}/array/builder_adaptive.cc - ${LIBRARY_DIR}/array/builder_base.cc - ${LIBRARY_DIR}/array/builder_binary.cc - ${LIBRARY_DIR}/array/builder_decimal.cc - ${LIBRARY_DIR}/array/builder_dict.cc - ${LIBRARY_DIR}/array/builder_nested.cc - ${LIBRARY_DIR}/array/builder_primitive.cc - - ${LIBRARY_DIR}/buffer.cc - ${LIBRARY_DIR}/compare.cc - ${LIBRARY_DIR}/memory_pool.cc - ${LIBRARY_DIR}/pretty_print.cc - ${LIBRARY_DIR}/record_batch.cc - ${LIBRARY_DIR}/status.cc - ${LIBRARY_DIR}/table.cc - ${LIBRARY_DIR}/table_builder.cc - ${LIBRARY_DIR}/tensor.cc - ${LIBRARY_DIR}/sparse_tensor.cc - ${LIBRARY_DIR}/type.cc - ${LIBRARY_DIR}/visitor.cc - - ${LIBRARY_DIR}/csv/converter.cc - ${LIBRARY_DIR}/csv/chunker.cc - ${LIBRARY_DIR}/csv/column-builder.cc - ${LIBRARY_DIR}/csv/options.cc - ${LIBRARY_DIR}/csv/parser.cc - ${LIBRARY_DIR}/csv/reader.cc - - ${LIBRARY_DIR}/io/buffered.cc - ${LIBRARY_DIR}/io/compressed.cc - ${LIBRARY_DIR}/io/file.cc - ${LIBRARY_DIR}/io/interfaces.cc - ${LIBRARY_DIR}/io/memory.cc - ${LIBRARY_DIR}/io/readahead.cc - - ${LIBRARY_DIR}/util/bit-util.cc - ${LIBRARY_DIR}/util/compression.cc - ${LIBRARY_DIR}/util/cpu-info.cc - ${LIBRARY_DIR}/util/decimal.cc - ${LIBRARY_DIR}/util/int-util.cc - ${LIBRARY_DIR}/util/io-util.cc - ${LIBRARY_DIR}/util/logging.cc - ${LIBRARY_DIR}/util/key_value_metadata.cc - ${LIBRARY_DIR}/util/task-group.cc - ${LIBRARY_DIR}/util/thread-pool.cc - ${LIBRARY_DIR}/util/trie.cc - ${LIBRARY_DIR}/util/utf8.cc - ${ORC_SRCS} -) + ${LIBRARY_DIR}/array.cc + ${LIBRARY_DIR}/buffer.cc + ${LIBRARY_DIR}/builder.cc + ${LIBRARY_DIR}/compare.cc + ${LIBRARY_DIR}/extension_type.cc + ${LIBRARY_DIR}/memory_pool.cc + ${LIBRARY_DIR}/pretty_print.cc + ${LIBRARY_DIR}/record_batch.cc + ${LIBRARY_DIR}/result.cc + ${LIBRARY_DIR}/scalar.cc + ${LIBRARY_DIR}/sparse_tensor.cc + ${LIBRARY_DIR}/status.cc + ${LIBRARY_DIR}/table_builder.cc + ${LIBRARY_DIR}/table.cc + ${LIBRARY_DIR}/tensor.cc + ${LIBRARY_DIR}/type.cc + ${LIBRARY_DIR}/visitor.cc + + ${LIBRARY_DIR}/array/builder_adaptive.cc + ${LIBRARY_DIR}/array/builder_base.cc + ${LIBRARY_DIR}/array/builder_binary.cc + ${LIBRARY_DIR}/array/builder_decimal.cc + ${LIBRARY_DIR}/array/builder_dict.cc + ${LIBRARY_DIR}/array/builder_nested.cc + ${LIBRARY_DIR}/array/builder_primitive.cc + ${LIBRARY_DIR}/array/builder_union.cc + ${LIBRARY_DIR}/array/concatenate.cc + ${LIBRARY_DIR}/array/dict_internal.cc + ${LIBRARY_DIR}/array/diff.cc + + ${LIBRARY_DIR}/csv/converter.cc + ${LIBRARY_DIR}/csv/chunker.cc + ${LIBRARY_DIR}/csv/column_builder.cc + ${LIBRARY_DIR}/csv/options.cc + ${LIBRARY_DIR}/csv/parser.cc + ${LIBRARY_DIR}/csv/reader.cc + + ${LIBRARY_DIR}/ipc/dictionary.cc + ${LIBRARY_DIR}/ipc/feather.cc +# ${LIBRARY_DIR}/ipc/file_to_stream.cc + ${LIBRARY_DIR}/ipc/message.cc + ${LIBRARY_DIR}/ipc/metadata_internal.cc + ${LIBRARY_DIR}/ipc/options.cc + ${LIBRARY_DIR}/ipc/reader.cc +# ${LIBRARY_DIR}/ipc/stream_to_file.cc + ${LIBRARY_DIR}/ipc/writer.cc + + ${LIBRARY_DIR}/io/buffered.cc + ${LIBRARY_DIR}/io/compressed.cc + ${LIBRARY_DIR}/io/file.cc + ${LIBRARY_DIR}/io/interfaces.cc + ${LIBRARY_DIR}/io/memory.cc + ${LIBRARY_DIR}/io/readahead.cc + ${LIBRARY_DIR}/io/slow.cc + + ${LIBRARY_DIR}/util/basic_decimal.cc + ${LIBRARY_DIR}/util/bit_util.cc + # ${LIBRARY_DIR}/util/compression_brotli.cc +# ${LIBRARY_DIR}/util/compression_bz2.cc + ${LIBRARY_DIR}/util/compression.cc + ${LIBRARY_DIR}/util/compression_lz4.cc + ${LIBRARY_DIR}/util/compression_snappy.cc + ${LIBRARY_DIR}/util/compression_zlib.cc + ${LIBRARY_DIR}/util/compression_zstd.cc + ${LIBRARY_DIR}/util/cpu_info.cc + ${LIBRARY_DIR}/util/decimal.cc + ${LIBRARY_DIR}/util/int_util.cc + ${LIBRARY_DIR}/util/io_util.cc + ${LIBRARY_DIR}/util/key_value_metadata.cc + ${LIBRARY_DIR}/util/logging.cc + ${LIBRARY_DIR}/util/memory.cc + ${LIBRARY_DIR}/util/string_builder.cc + ${LIBRARY_DIR}/util/string.cc + ${LIBRARY_DIR}/util/task_group.cc + ${LIBRARY_DIR}/util/thread_pool.cc + ${LIBRARY_DIR}/util/trie.cc + # ${LIBRARY_DIR}/util/uri.cc + ${LIBRARY_DIR}/util/utf8.cc + + ${LIBRARY_DIR}/vendored/base64.cpp + ${ORC_SRCS} + ) set(ARROW_SRCS ${ARROW_SRCS} - ${LIBRARY_DIR}/compute/context.cc - ${LIBRARY_DIR}/compute/kernels/boolean.cc - ${LIBRARY_DIR}/compute/kernels/cast.cc - ${LIBRARY_DIR}/compute/kernels/hash.cc - ${LIBRARY_DIR}/compute/kernels/util-internal.cc -) + ${LIBRARY_DIR}/compute/context.cc + ${LIBRARY_DIR}/compute/kernels/boolean.cc + ${LIBRARY_DIR}/compute/kernels/cast.cc + ${LIBRARY_DIR}/compute/kernels/hash.cc + ${LIBRARY_DIR}/compute/kernels/util_internal.cc + ) if (LZ4_INCLUDE_DIR AND LZ4_LIBRARY) set(ARROW_WITH_LZ4 1) -endif() +endif () -if(SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY) +if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY) set(ARROW_WITH_SNAPPY 1) -endif() +endif () -if(ZLIB_INCLUDE_DIR AND ZLIB_LIBRARIES) +if (ZLIB_INCLUDE_DIR AND ZLIB_LIBRARIES) set(ARROW_WITH_ZLIB 1) -endif() +endif () if (ZSTD_INCLUDE_DIR AND ZSTD_LIBRARY) set(ARROW_WITH_ZSTD 1) -endif() +endif () if (ARROW_WITH_LZ4) - add_definitions(-DARROW_WITH_LZ4) - SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_lz4.cc ${ARROW_SRCS}) -endif() + add_definitions(-DARROW_WITH_LZ4) + SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_lz4.cc ${ARROW_SRCS}) +endif () if (ARROW_WITH_SNAPPY) - add_definitions(-DARROW_WITH_SNAPPY) - SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_snappy.cc ${ARROW_SRCS}) -endif() + add_definitions(-DARROW_WITH_SNAPPY) + SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_snappy.cc ${ARROW_SRCS}) +endif () if (ARROW_WITH_ZLIB) - add_definitions(-DARROW_WITH_ZLIB) - SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_zlib.cc ${ARROW_SRCS}) -endif() + add_definitions(-DARROW_WITH_ZLIB) + SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_zlib.cc ${ARROW_SRCS}) +endif () if (ARROW_WITH_ZSTD) - add_definitions(-DARROW_WITH_ZSTD) - SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_zstd.cc ${ARROW_SRCS}) -endif() + add_definitions(-DARROW_WITH_ZSTD) + SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_zstd.cc ${ARROW_SRCS}) +endif () add_library(${ARROW_LIBRARY} ${ARROW_SRCS}) +# Arrow dependencies +add_dependencies(${ARROW_LIBRARY} ${FLATBUFFERS_LIBRARY} metadata_fbs) + +target_link_libraries(${ARROW_LIBRARY} PRIVATE boost_system_internal boost_filesystem_internal boost_regex_internal) +target_link_libraries(${ARROW_LIBRARY} PRIVATE ${FLATBUFFERS_LIBRARY}) + if (USE_INTERNAL_PROTOBUF_LIBRARY) add_dependencies(${ARROW_LIBRARY} protoc) -endif() +endif () target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src ${Boost_INCLUDE_DIRS}) target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${Protobuf_LIBRARY}) if (ARROW_WITH_LZ4) target_link_libraries(${ARROW_LIBRARY} PRIVATE ${LZ4_LIBRARY}) -endif() +endif () if (ARROW_WITH_SNAPPY) target_link_libraries(${ARROW_LIBRARY} PRIVATE ${SNAPPY_LIBRARY}) -endif() +endif () if (ARROW_WITH_ZLIB) target_link_libraries(${ARROW_LIBRARY} PRIVATE ${ZLIB_LIBRARIES}) -endif() +endif () if (ARROW_WITH_ZSTD) target_link_libraries(${ARROW_LIBRARY} PRIVATE ${ZSTD_LIBRARY}) -endif() +endif () target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_INCLUDE_DIR}) target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_SOURCE_SRC_DIR}) @@ -248,52 +355,54 @@ target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_BUILD_SRC_DIR}) target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_BUILD_INCLUDE_DIR}) target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_ADDITION_SOURCE_DIR}) target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ARROW_SRC_DIR}) - +target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${FLATBUFFERS_INCLUDE_DIR}) # === parquet set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/parquet) # arrow/cpp/src/parquet/CMakeLists.txt set(PARQUET_SRCS - ${LIBRARY_DIR}/arrow/reader.cc - ${LIBRARY_DIR}/arrow/record_reader.cc - ${LIBRARY_DIR}/arrow/schema.cc - ${LIBRARY_DIR}/arrow/writer.cc - ${LIBRARY_DIR}/bloom_filter.cc - ${LIBRARY_DIR}/column_reader.cc - ${LIBRARY_DIR}/column_scanner.cc - ${LIBRARY_DIR}/column_writer.cc - ${LIBRARY_DIR}/file_reader.cc - ${LIBRARY_DIR}/file_writer.cc - ${LIBRARY_DIR}/metadata.cc - ${LIBRARY_DIR}/murmur3.cc - ${LIBRARY_DIR}/printer.cc - ${LIBRARY_DIR}/schema.cc - ${LIBRARY_DIR}/statistics.cc - ${LIBRARY_DIR}/types.cc - ${LIBRARY_DIR}/util/comparison.cc - ${LIBRARY_DIR}/util/memory.cc -) + ${LIBRARY_DIR}/arrow/reader.cc + ${LIBRARY_DIR}/arrow/reader_internal.cc + ${LIBRARY_DIR}/arrow/schema.cc + ${LIBRARY_DIR}/arrow/writer.cc + ${LIBRARY_DIR}/bloom_filter.cc + ${LIBRARY_DIR}/column_reader.cc + ${LIBRARY_DIR}/column_scanner.cc + ${LIBRARY_DIR}/column_writer.cc + ${LIBRARY_DIR}/deprecated_io.cc + ${LIBRARY_DIR}/encoding.cc + ${LIBRARY_DIR}/file_reader.cc + ${LIBRARY_DIR}/file_writer.cc + ${LIBRARY_DIR}/metadata.cc + ${LIBRARY_DIR}/murmur3.cc + ${LIBRARY_DIR}/platform.cc + ${LIBRARY_DIR}/printer.cc + ${LIBRARY_DIR}/properties.cc + ${LIBRARY_DIR}/schema.cc + ${LIBRARY_DIR}/statistics.cc + ${LIBRARY_DIR}/types.cc + ) #list(TRANSFORM PARQUET_SRCS PREPEND ${LIBRARY_DIR}/) # cmake 3.12 list(APPEND PARQUET_SRCS - ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/parquet/parquet_constants.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/parquet/parquet_types.cpp -) + ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/parquet/parquet_constants.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/parquet/parquet_types.cpp + ) add_library(${PARQUET_LIBRARY} ${PARQUET_SRCS}) target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src) include(${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake) # makes config.h target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} ${Boost_REGEX_LIBRARY}) target_include_directories(${PARQUET_LIBRARY} PRIVATE ${Boost_INCLUDE_DIRS}) -if(SANITIZE STREQUAL "undefined") +if (SANITIZE STREQUAL "undefined") target_compile_options(${PARQUET_LIBRARY} PRIVATE -fno-sanitize=undefined) target_compile_options(${ARROW_LIBRARY} PRIVATE -fno-sanitize=undefined) -endif() +endif () # === tools set(TOOLS_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/tools/parquet) -set(PARQUET_TOOLS parquet-dump-schema parquet-reader parquet-scan) -foreach(TOOL ${PARQUET_TOOLS}) +set(PARQUET_TOOLS parquet_dump_schema parquet_reader parquet_scan) +foreach (TOOL ${PARQUET_TOOLS}) add_executable(${TOOL} ${TOOLS_DIR}/${TOOL}.cc) target_link_libraries(${TOOL} PRIVATE ${PARQUET_LIBRARY}) -endforeach() +endforeach () diff --git a/contrib/arrow-cmake/cpp/src/arrow/ipc/File_generated.h b/contrib/arrow-cmake/cpp/src/arrow/ipc/File_generated.h new file mode 100644 index 0000000000000000000000000000000000000000..37e2f9c7ea4a9206d70c4bf45f5ccffaaec9d1f5 --- /dev/null +++ b/contrib/arrow-cmake/cpp/src/arrow/ipc/File_generated.h @@ -0,0 +1,181 @@ +// automatically generated by the FlatBuffers compiler, do not modify + + +#ifndef FLATBUFFERS_GENERATED_FILE_ORG_APACHE_ARROW_FLATBUF_H_ +#define FLATBUFFERS_GENERATED_FILE_ORG_APACHE_ARROW_FLATBUF_H_ + +#include "flatbuffers/flatbuffers.h" + +#include "Schema_generated.h" + +namespace org { +namespace apache { +namespace arrow { +namespace flatbuf { + +struct Footer; + +struct Block; + +FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) Block FLATBUFFERS_FINAL_CLASS { + private: + int64_t offset_; + int32_t metaDataLength_; + int32_t padding0__; + int64_t bodyLength_; + + public: + Block() { + memset(static_cast(this), 0, sizeof(Block)); + } + Block(int64_t _offset, int32_t _metaDataLength, int64_t _bodyLength) + : offset_(flatbuffers::EndianScalar(_offset)), + metaDataLength_(flatbuffers::EndianScalar(_metaDataLength)), + padding0__(0), + bodyLength_(flatbuffers::EndianScalar(_bodyLength)) { + (void)padding0__; + } + /// Index to the start of the RecordBlock (note this is past the Message header) + int64_t offset() const { + return flatbuffers::EndianScalar(offset_); + } + /// Length of the metadata + int32_t metaDataLength() const { + return flatbuffers::EndianScalar(metaDataLength_); + } + /// Length of the data (this is aligned so there can be a gap between this and + /// the metatdata). + int64_t bodyLength() const { + return flatbuffers::EndianScalar(bodyLength_); + } +}; +FLATBUFFERS_STRUCT_END(Block, 24); + +/// ---------------------------------------------------------------------- +/// Arrow File metadata +/// +struct Footer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VERSION = 4, + VT_SCHEMA = 6, + VT_DICTIONARIES = 8, + VT_RECORDBATCHES = 10 + }; + MetadataVersion version() const { + return static_cast(GetField(VT_VERSION, 0)); + } + const Schema *schema() const { + return GetPointer(VT_SCHEMA); + } + const flatbuffers::Vector *dictionaries() const { + return GetPointer *>(VT_DICTIONARIES); + } + const flatbuffers::Vector *recordBatches() const { + return GetPointer *>(VT_RECORDBATCHES); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_VERSION) && + VerifyOffset(verifier, VT_SCHEMA) && + verifier.VerifyTable(schema()) && + VerifyOffset(verifier, VT_DICTIONARIES) && + verifier.VerifyVector(dictionaries()) && + VerifyOffset(verifier, VT_RECORDBATCHES) && + verifier.VerifyVector(recordBatches()) && + verifier.EndTable(); + } +}; + +struct FooterBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_version(MetadataVersion version) { + fbb_.AddElement(Footer::VT_VERSION, static_cast(version), 0); + } + void add_schema(flatbuffers::Offset schema) { + fbb_.AddOffset(Footer::VT_SCHEMA, schema); + } + void add_dictionaries(flatbuffers::Offset> dictionaries) { + fbb_.AddOffset(Footer::VT_DICTIONARIES, dictionaries); + } + void add_recordBatches(flatbuffers::Offset> recordBatches) { + fbb_.AddOffset(Footer::VT_RECORDBATCHES, recordBatches); + } + explicit FooterBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + FooterBuilder &operator=(const FooterBuilder &); + flatbuffers::Offset