提交 244687bf 编写于 作者: A Alexander Kuzmenkov

Merge remote-tracking branch 'origin/master' into HEAD

codecov:
max_report_age: off
strict_yaml_branch: "master"
ignore:
- "contrib"
- "docs"
- "benchmark"
- "tests"
- "docker"
- "debian"
- "cmake"
comment: false
github_checks:
annotations: false
\ No newline at end of file
......@@ -186,3 +186,4 @@
[submodule "contrib/cyrus-sasl"]
path = contrib/cyrus-sasl
url = https://github.com/cyrusimap/cyrus-sasl
branch = cyrus-sasl-2.1
......@@ -17,5 +17,4 @@ ClickHouse is an open-source column-oriented database management system that all
## Upcoming Events
* [ClickHouse for Edge Analytics](https://ones2020.sched.com/event/bWPs) on September 29, 2020.
* [ClickHouse online meetup (in Russian)](https://clck.ru/R2zB9) on October 1, 2020.
......@@ -38,10 +38,10 @@ bool hasInputData()
}
LineReader::Suggest::WordsRange LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length) const
std::optional<LineReader::Suggest::WordsRange> LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length) const
{
if (!ready)
return std::make_pair(words.end(), words.end());
return std::nullopt;
std::string_view last_word;
......
......@@ -4,6 +4,7 @@
#include <atomic>
#include <vector>
#include <optional>
class LineReader
{
......@@ -18,7 +19,7 @@ public:
std::atomic<bool> ready{false};
/// Get iterators for the matched range of words if any.
WordsRange getCompletions(const String & prefix, size_t prefix_length) const;
std::optional<WordsRange> getCompletions(const String & prefix, size_t prefix_length) const;
};
using Patterns = std::vector<const char *>;
......
......@@ -30,7 +30,8 @@ static LineReader::Suggest::Words::const_iterator end;
static void findRange(const char * prefix, size_t prefix_length)
{
std::string prefix_str(prefix);
std::tie(pos, end) = suggest->getCompletions(prefix_str, prefix_length);
if (auto completions = suggest->getCompletions(prefix_str, prefix_length))
std::tie(pos, end) = *completions;
}
/// Iterates through matched range.
......
......@@ -70,8 +70,9 @@ ReplxxLineReader::ReplxxLineReader(
auto callback = [&suggest] (const String & context, size_t context_size)
{
auto range = suggest.getCompletions(context, context_size);
return Replxx::completions_t(range.first, range.second);
if (auto range = suggest.getCompletions(context, context_size))
return Replxx::completions_t(range->first, range->second);
return Replxx::completions_t();
};
rx.set_completion_callback(callback);
......
......@@ -436,7 +436,54 @@ private:
}
template <typename T>
constexpr static auto multiply(const integer<Bits, Signed> & lhs, const T & rhs)
constexpr static integer<Bits, Signed>
multiply(const integer<Bits, Signed> & lhs, const T & rhs)
{
if constexpr (Bits == 256 && sizeof(base_type) == 8)
{
/// @sa https://github.com/abseil/abseil-cpp/blob/master/absl/numeric/int128.h
using HalfType = unsigned __int128;
HalfType a01 = (HalfType(lhs.items[little(1)]) << 64) + lhs.items[little(0)];
HalfType a23 = (HalfType(lhs.items[little(3)]) << 64) + lhs.items[little(2)];
HalfType a0 = lhs.items[little(0)];
HalfType a1 = lhs.items[little(1)];
HalfType b01 = rhs;
uint64_t b0 = b01;
uint64_t b1 = 0;
HalfType b23 = 0;
if constexpr (sizeof(T) > 8)
b1 = b01 >> 64;
if constexpr (sizeof(T) > 16)
b23 = (HalfType(rhs.items[little(3)]) << 64) + rhs.items[little(2)];
HalfType r23 = a23 * b01 + a01 * b23 + a1 * b1;
HalfType r01 = a0 * b0;
HalfType r12 = (r01 >> 64) + (r23 << 64);
HalfType r12_x = a1 * b0;
integer<Bits, Signed> res;
res.items[little(0)] = r01;
res.items[little(3)] = r23 >> 64;
if constexpr (sizeof(T) > 8)
{
HalfType r12_y = a0 * b1;
r12_x += r12_y;
if (r12_x < r12_y)
++res.items[little(3)];
}
r12 += r12_x;
if (r12 < r12_x)
++res.items[little(3)];
res.items[little(1)] = r12;
res.items[little(2)] = r12 >> 64;
return res;
}
else
{
integer<Bits, Signed> res{};
#if 1
......@@ -477,6 +524,7 @@ private:
return res;
}
}
public:
constexpr static integer<Bits, Signed> operator_unary_tilda(const integer<Bits, Signed> & lhs) noexcept
......
......@@ -23,6 +23,7 @@ ExtendedLogMessage ExtendedLogMessage::getFrom(const Poco::Message & base)
msg_ext.time_seconds = static_cast<UInt32>(tv.tv_sec);
msg_ext.time_microseconds = static_cast<UInt32>(tv.tv_usec);
msg_ext.time_in_microseconds = static_cast<UInt64>((tv.tv_sec) * 1000000U + (tv.tv_usec));
if (current_thread)
{
......
......@@ -23,6 +23,7 @@ public:
uint32_t time_seconds = 0;
uint32_t time_microseconds = 0;
uint64_t time_in_microseconds = 0;
uint64_t thread_id = 0;
std::string query_id;
......
......@@ -76,6 +76,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg)
TextLogElement elem;
elem.event_time = msg_ext.time_seconds;
elem.event_time_microseconds = msg_ext.time_in_microseconds;
elem.microseconds = msg_ext.time_microseconds;
elem.thread_name = getThreadName();
......
......@@ -6,7 +6,7 @@ if (ENABLE_CLANG_TIDY)
message(FATAL_ERROR "clang-tidy requires CMake version at least 3.6.")
endif()
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-10" "clang-tidy-9" "clang-tidy-8")
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-11" "clang-tidy-10" "clang-tidy-9" "clang-tidy-8")
if (CLANG_TIDY_PATH)
message(STATUS
......
......@@ -2,17 +2,21 @@ option(ENABLE_PROTOBUF "Enable protobuf" ${ENABLE_LIBRARIES})
if(NOT ENABLE_PROTOBUF)
if(USE_INTERNAL_PROTOBUF_LIBRARY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal protobuf with ENABLE_PROTOBUF=OFF")
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal protobuf with ENABLE_PROTOBUF=OFF")
endif()
return()
endif()
option(USE_INTERNAL_PROTOBUF_LIBRARY "Set to FALSE to use system protobuf instead of bundled" ${NOT_UNBUNDLED})
# Normally we use the internal protobuf library.
# You can set USE_INTERNAL_PROTOBUF_LIBRARY to OFF to force using the external protobuf library, which should be installed in the system in this case.
# The external protobuf library can be installed in the system by running
# sudo apt-get install libprotobuf-dev protobuf-compiler libprotoc-dev
option(USE_INTERNAL_PROTOBUF_LIBRARY "Set to FALSE to use system protobuf instead of bundled. (Experimental. Set to OFF on your own risk)" ${NOT_UNBUNDLED})
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/protobuf/cmake/CMakeLists.txt")
if(USE_INTERNAL_PROTOBUF_LIBRARY)
message(WARNING "submodule contrib/protobuf is missing. to fix try run: \n git submodule update --init --recursive")
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal protobuf")
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal protobuf")
set(USE_INTERNAL_PROTOBUF_LIBRARY 0)
endif()
set(MISSING_INTERNAL_PROTOBUF_LIBRARY 1)
......@@ -20,25 +24,28 @@ endif()
if(NOT USE_INTERNAL_PROTOBUF_LIBRARY)
find_package(Protobuf)
if (Protobuf_LIBRARY AND Protobuf_INCLUDE_DIR AND Protobuf_PROTOC_EXECUTABLE)
if(NOT Protobuf_INCLUDE_DIR OR NOT Protobuf_LIBRARY)
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system protobuf library")
set(EXTERNAL_PROTOBUF_LIBRARY_FOUND 0)
elseif(NOT Protobuf_PROTOC_EXECUTABLE)
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system protobuf compiler")
set(EXTERNAL_PROTOBUF_LIBRARY_FOUND 0)
else()
set(EXTERNAL_PROTOBUF_LIBRARY_FOUND 1)
set(USE_PROTOBUF 1)
else()
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system protobuf")
set(EXTERNAL_PROTOBUF_LIBRARY_FOUND 0)
endif()
endif()
if (NOT EXTERNAL_PROTOBUF_LIBRARY_FOUND AND NOT MISSING_INTERNAL_PROTOBUF_LIBRARY)
if(NOT EXTERNAL_PROTOBUF_LIBRARY_FOUND AND NOT MISSING_INTERNAL_PROTOBUF_LIBRARY)
set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src")
set(USE_PROTOBUF 1)
set(USE_INTERNAL_PROTOBUF_LIBRARY 1)
set(Protobuf_LIBRARY libprotobuf)
set(Protobuf_PROTOC_EXECUTABLE "$<TARGET_FILE:protoc>")
set(Protobuf_PROTOC_LIBRARY libprotoc)
set(Protobuf_LITE_LIBRARY libprotobuf-lite)
set(Protobuf_PROTOC_EXECUTABLE "$<TARGET_FILE:protoc>")
include("${ClickHouse_SOURCE_DIR}/contrib/protobuf-cmake/protobuf_generate.cmake")
set(USE_INTERNAL_PROTOBUF_LIBRARY 1)
set(USE_PROTOBUF 1)
endif()
if(OS_FREEBSD AND SANITIZE STREQUAL "address")
......@@ -47,11 +54,9 @@ if(OS_FREEBSD AND SANITIZE STREQUAL "address")
if(LLVM_INCLUDE_DIRS)
set(Protobuf_INCLUDE_DIR "${Protobuf_INCLUDE_DIR}" ${LLVM_INCLUDE_DIRS})
else()
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use protobuf on FreeBSD with address sanitizer without LLVM")
message(${RECONFIGURE_MESSAGE_LEVEL} "Can't use protobuf on FreeBSD with address sanitizer without LLVM")
set(USE_PROTOBUF 0)
endif()
endif()
include ("${ClickHouse_SOURCE_DIR}/cmake/protobuf_generate_cpp.cmake")
message(STATUS "Using protobuf=${USE_PROTOBUF}: ${Protobuf_INCLUDE_DIR} : ${Protobuf_LIBRARY} : ${Protobuf_PROTOC_EXECUTABLE}")
message(STATUS "Using protobuf=${USE_PROTOBUF}: ${Protobuf_INCLUDE_DIR} : ${Protobuf_LIBRARY} : ${Protobuf_PROTOC_EXECUTABLE} : ${Protobuf_PROTOC_LIBRARY}")
......@@ -14,10 +14,10 @@ if (NOT ENABLE_RDKAFKA)
return()
endif()
if (NOT ARCH_ARM AND USE_LIBGSASL)
if (NOT ARCH_ARM)
option (USE_INTERNAL_RDKAFKA_LIBRARY "Set to FALSE to use system librdkafka instead of the bundled" ${NOT_UNBUNDLED})
elseif(USE_INTERNAL_RDKAFKA_LIBRARY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal librdkafka with ARCH_ARM=${ARCH_ARM} AND USE_LIBGSASL=${USE_LIBGSASL}")
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal librdkafka with ARCH_ARM=${ARCH_ARM}")
endif ()
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cppkafka/CMakeLists.txt")
......
# This file declares functions adding custom commands for generating C++ files from *.proto files:
# function (protobuf_generate_cpp SRCS HDRS)
# function (protobuf_generate_grpc_cpp SRCS HDRS)
if (NOT USE_PROTOBUF)
message (WARNING "Could not use protobuf_generate_cpp() without the protobuf library")
return()
endif()
if (NOT DEFINED PROTOBUF_PROTOC_EXECUTABLE)
set (PROTOBUF_PROTOC_EXECUTABLE "$<TARGET_FILE:protoc>")
endif()
if (NOT DEFINED GRPC_CPP_PLUGIN_EXECUTABLE)
set (GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:grpc_cpp_plugin>)
endif()
if (NOT DEFINED PROTOBUF_GENERATE_CPP_APPEND_PATH)
set (PROTOBUF_GENERATE_CPP_APPEND_PATH TRUE)
endif()
function(protobuf_generate_cpp_impl SRCS HDRS MODES OUTPUT_FILE_EXTS PLUGIN)
if(NOT ARGN)
message(SEND_ERROR "Error: protobuf_generate_cpp() called without any proto files")
return()
endif()
if(PROTOBUF_GENERATE_CPP_APPEND_PATH)
# Create an include path for each file specified
foreach(FIL ${ARGN})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(ABS_PATH ${ABS_FIL} PATH)
list(FIND protobuf_include_path ${ABS_PATH} _contains_already)
if(${_contains_already} EQUAL -1)
list(APPEND protobuf_include_path -I ${ABS_PATH})
endif()
endforeach()
else()
set(protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR})
endif()
if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS)
set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}")
endif()
if(DEFINED Protobuf_IMPORT_DIRS)
foreach(DIR ${Protobuf_IMPORT_DIRS})
get_filename_component(ABS_PATH ${DIR} ABSOLUTE)
list(FIND protobuf_include_path ${ABS_PATH} _contains_already)
if(${_contains_already} EQUAL -1)
list(APPEND protobuf_include_path -I ${ABS_PATH})
endif()
endforeach()
endif()
set (intermediate_dir ${CMAKE_CURRENT_BINARY_DIR}/intermediate)
file (MAKE_DIRECTORY ${intermediate_dir})
set (protoc_args)
foreach (mode ${MODES})
list (APPEND protoc_args "--${mode}_out" ${intermediate_dir})
endforeach()
if (PLUGIN)
list (APPEND protoc_args "--plugin=${PLUGIN}")
endif()
set(srcs)
set(hdrs)
set(all_intermediate_outputs)
foreach(input_name ${ARGN})
get_filename_component(abs_name ${input_name} ABSOLUTE)
get_filename_component(name ${input_name} NAME_WE)
set (intermediate_outputs)
foreach (ext ${OUTPUT_FILE_EXTS})
set (filename "${name}${ext}")
set (output "${CMAKE_CURRENT_BINARY_DIR}/${filename}")
set (intermediate_output "${intermediate_dir}/${filename}")
list (APPEND intermediate_outputs "${intermediate_output}")
list (APPEND all_intermediate_outputs "${intermediate_output}")
if (${ext} MATCHES ".*\\.h")
list(APPEND hdrs "${output}")
else()
list(APPEND srcs "${output}")
endif()
add_custom_command(
OUTPUT ${output}
COMMAND ${CMAKE_COMMAND} -DPROTOBUF_GENERATE_CPP_SCRIPT_MODE=1 -DUSE_PROTOBUF=1 -DDIR=${CMAKE_CURRENT_BINARY_DIR} -DFILENAME=${filename} -DCOMPILER_ID=${CMAKE_CXX_COMPILER_ID} -P ${ClickHouse_SOURCE_DIR}/cmake/protobuf_generate_cpp.cmake
DEPENDS ${intermediate_output})
endforeach()
add_custom_command(
OUTPUT ${intermediate_outputs}
COMMAND ${Protobuf_PROTOC_EXECUTABLE}
ARGS ${protobuf_include_path} ${protoc_args} ${abs_name}
DEPENDS ${abs_name} ${Protobuf_PROTOC_EXECUTABLE} ${PLUGIN}
COMMENT "Running C++ protocol buffer compiler on ${name}"
VERBATIM )
endforeach()
set_source_files_properties(${srcs} ${hdrs} ${all_intermediate_outputs} PROPERTIES GENERATED TRUE)
set(${SRCS} ${srcs} PARENT_SCOPE)
set(${HDRS} ${hdrs} PARENT_SCOPE)
endfunction()
if (PROTOBUF_GENERATE_CPP_SCRIPT_MODE)
set (output "${DIR}/${FILENAME}")
set (intermediate_dir ${DIR}/intermediate)
set (intermediate_output "${intermediate_dir}/${FILENAME}")
if (COMPILER_ID MATCHES "Clang")
set (pragma_push "#pragma clang diagnostic push\n")
set (pragma_pop "#pragma clang diagnostic pop\n")
set (pragma_disable_warnings "#pragma clang diagnostic ignored \"-Weverything\"\n")
elseif (COMPILER_ID MATCHES "GNU")
set (pragma_push "#pragma GCC diagnostic push\n")
set (pragma_pop "#pragma GCC diagnostic pop\n")
set (pragma_disable_warnings "#pragma GCC diagnostic ignored \"-Wall\"\n"
"#pragma GCC diagnostic ignored \"-Wextra\"\n"
"#pragma GCC diagnostic ignored \"-Warray-bounds\"\n"
"#pragma GCC diagnostic ignored \"-Wold-style-cast\"\n"
"#pragma GCC diagnostic ignored \"-Wshadow\"\n"
"#pragma GCC diagnostic ignored \"-Wsuggest-override\"\n"
"#pragma GCC diagnostic ignored \"-Wcast-qual\"\n"
"#pragma GCC diagnostic ignored \"-Wunused-parameter\"\n")
endif()
if (${FILENAME} MATCHES ".*\\.h")
file(WRITE "${output}"
"#pragma once\n"
${pragma_push}
${pragma_disable_warnings}
"#include \"${intermediate_output}\"\n"
${pragma_pop}
)
else()
file(WRITE "${output}"
${pragma_disable_warnings}
"#include \"${intermediate_output}\"\n"
)
endif()
return()
endif()
function(protobuf_generate_cpp SRCS HDRS)
set (modes cpp)
set (output_file_exts ".pb.cc" ".pb.h")
set (plugin)
protobuf_generate_cpp_impl(srcs hdrs "${modes}" "${output_file_exts}" "${plugin}" ${ARGN})
set(${SRCS} ${srcs} PARENT_SCOPE)
set(${HDRS} ${hdrs} PARENT_SCOPE)
endfunction()
function(protobuf_generate_grpc_cpp SRCS HDRS)
set (modes cpp grpc)
set (output_file_exts ".pb.cc" ".pb.h" ".grpc.pb.cc" ".grpc.pb.h")
set (plugin "protoc-gen-grpc=${GRPC_CPP_PLUGIN_EXECUTABLE}")
protobuf_generate_cpp_impl(srcs hdrs "${modes}" "${output_file_exts}" "${plugin}" ${ARGN})
set(${SRCS} ${srcs} PARENT_SCOPE)
set(${HDRS} ${hdrs} PARENT_SCOPE)
endfunction()
Subproject commit 6054630889fd1cd8d0659573d69badcee1e23a00
Subproject commit 9995bf9d8e14f58934d9313ac64f13780d6dd3c9
Subproject commit 297fc905e166392156f83b96aaa5f44e8a6a35c4
Subproject commit 757d947235b307675cff964f29b19d388140a9eb
# The code in this file was copied from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake
#[[
Add custom commands to process ``.proto`` files to C++::
protobuf_generate_cpp (<SRCS> <HDRS>
[DESCRIPTORS <DESC>] [EXPORT_MACRO <MACRO>] [<ARGN>...])
``SRCS``
Variable to define with autogenerated source files
``HDRS``
Variable to define with autogenerated header files
``DESCRIPTORS``
Variable to define with autogenerated descriptor files, if requested.
``EXPORT_MACRO``
is a macro which should expand to ``__declspec(dllexport)`` or
``__declspec(dllimport)`` depending on what is being compiled.
``ARGN``
``.proto`` files
#]]
function(PROTOBUF_GENERATE_CPP SRCS HDRS)
cmake_parse_arguments(protobuf_generate_cpp "" "EXPORT_MACRO;DESCRIPTORS" "" ${ARGN})
set(_proto_files "${protobuf_generate_cpp_UNPARSED_ARGUMENTS}")
if(NOT _proto_files)
message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP() called without any proto files")
return()
endif()
if(PROTOBUF_GENERATE_CPP_APPEND_PATH)
set(_append_arg APPEND_PATH)
endif()
if(protobuf_generate_cpp_DESCRIPTORS)
set(_descriptors DESCRIPTORS)
endif()
if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS)
set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}")
endif()
if(DEFINED Protobuf_IMPORT_DIRS)
set(_import_arg IMPORT_DIRS ${Protobuf_IMPORT_DIRS})
endif()
set(_outvar)
protobuf_generate(${_append_arg} ${_descriptors} LANGUAGE cpp EXPORT_MACRO ${protobuf_generate_cpp_EXPORT_MACRO} OUT_VAR _outvar ${_import_arg} PROTOS ${_proto_files})
set(${SRCS})
set(${HDRS})
if(protobuf_generate_cpp_DESCRIPTORS)
set(${protobuf_generate_cpp_DESCRIPTORS})
endif()
foreach(_file ${_outvar})
if(_file MATCHES "cc$")
list(APPEND ${SRCS} ${_file})
elseif(_file MATCHES "desc$")
list(APPEND ${protobuf_generate_cpp_DESCRIPTORS} ${_file})
else()
list(APPEND ${HDRS} ${_file})
endif()
endforeach()
set(${SRCS} ${${SRCS}} PARENT_SCOPE)
set(${HDRS} ${${HDRS}} PARENT_SCOPE)
if(protobuf_generate_cpp_DESCRIPTORS)
set(${protobuf_generate_cpp_DESCRIPTORS} "${${protobuf_generate_cpp_DESCRIPTORS}}" PARENT_SCOPE)
endif()
endfunction()
# By default have PROTOBUF_GENERATE_CPP macro pass -I to protoc
# for each directory where a proto file is referenced.
if(NOT DEFINED PROTOBUF_GENERATE_CPP_APPEND_PATH)
set(PROTOBUF_GENERATE_CPP_APPEND_PATH TRUE)
endif()
function(protobuf_generate)
set(_options APPEND_PATH DESCRIPTORS)
set(_singleargs LANGUAGE OUT_VAR EXPORT_MACRO PROTOC_OUT_DIR)
if(COMMAND target_sources)
list(APPEND _singleargs TARGET)
endif()
set(_multiargs PROTOS IMPORT_DIRS GENERATE_EXTENSIONS)
cmake_parse_arguments(protobuf_generate "${_options}" "${_singleargs}" "${_multiargs}" "${ARGN}")
if(NOT protobuf_generate_PROTOS AND NOT protobuf_generate_TARGET)
message(SEND_ERROR "Error: protobuf_generate called without any targets or source files")
return()
endif()
if(NOT protobuf_generate_OUT_VAR AND NOT protobuf_generate_TARGET)
message(SEND_ERROR "Error: protobuf_generate called without a target or output variable")
return()
endif()
if(NOT protobuf_generate_LANGUAGE)
set(protobuf_generate_LANGUAGE cpp)
endif()
string(TOLOWER ${protobuf_generate_LANGUAGE} protobuf_generate_LANGUAGE)
if(NOT protobuf_generate_PROTOC_OUT_DIR)
set(protobuf_generate_PROTOC_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR})
endif()
if(protobuf_generate_EXPORT_MACRO AND protobuf_generate_LANGUAGE STREQUAL cpp)
set(_dll_export_decl "dllexport_decl=${protobuf_generate_EXPORT_MACRO}:")
endif()
if(NOT protobuf_generate_GENERATE_EXTENSIONS)
if(protobuf_generate_LANGUAGE STREQUAL cpp)
set(protobuf_generate_GENERATE_EXTENSIONS .pb.h .pb.cc)
elseif(protobuf_generate_LANGUAGE STREQUAL python)
set(protobuf_generate_GENERATE_EXTENSIONS _pb2.py)
else()
message(SEND_ERROR "Error: protobuf_generate given unknown Language ${LANGUAGE}, please provide a value for GENERATE_EXTENSIONS")
return()
endif()
endif()
if(protobuf_generate_TARGET)
get_target_property(_source_list ${protobuf_generate_TARGET} SOURCES)
foreach(_file ${_source_list})
if(_file MATCHES "proto$")
list(APPEND protobuf_generate_PROTOS ${_file})
endif()
endforeach()
endif()
if(NOT protobuf_generate_PROTOS)
message(SEND_ERROR "Error: protobuf_generate could not find any .proto files")
return()
endif()
if(protobuf_generate_APPEND_PATH)
# Create an include path for each file specified
foreach(_file ${protobuf_generate_PROTOS})
get_filename_component(_abs_file ${_file} ABSOLUTE)
get_filename_component(_abs_path ${_abs_file} PATH)
list(FIND _protobuf_include_path ${_abs_path} _contains_already)
if(${_contains_already} EQUAL -1)
list(APPEND _protobuf_include_path -I ${_abs_path})
endif()
endforeach()
else()
set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR})
endif()
foreach(DIR ${protobuf_generate_IMPORT_DIRS})
get_filename_component(ABS_PATH ${DIR} ABSOLUTE)
list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
if(${_contains_already} EQUAL -1)
list(APPEND _protobuf_include_path -I ${ABS_PATH})
endif()
endforeach()
set(_generated_srcs_all)
foreach(_proto ${protobuf_generate_PROTOS})
get_filename_component(_abs_file ${_proto} ABSOLUTE)
get_filename_component(_abs_dir ${_abs_file} DIRECTORY)
get_filename_component(_basename ${_proto} NAME_WE)
file(RELATIVE_PATH _rel_dir ${CMAKE_CURRENT_SOURCE_DIR} ${_abs_dir})
set(_possible_rel_dir)
if (NOT protobuf_generate_APPEND_PATH)
set(_possible_rel_dir ${_rel_dir}/)
endif()
set(_generated_srcs)
foreach(_ext ${protobuf_generate_GENERATE_EXTENSIONS})
list(APPEND _generated_srcs "${protobuf_generate_PROTOC_OUT_DIR}/${_possible_rel_dir}${_basename}${_ext}")
endforeach()
if(protobuf_generate_DESCRIPTORS AND protobuf_generate_LANGUAGE STREQUAL cpp)
set(_descriptor_file "${CMAKE_CURRENT_BINARY_DIR}/${_basename}.desc")
set(_dll_desc_out "--descriptor_set_out=${_descriptor_file}")
list(APPEND _generated_srcs ${_descriptor_file})
endif()
list(APPEND _generated_srcs_all ${_generated_srcs})
add_custom_command(
OUTPUT ${_generated_srcs}
COMMAND protobuf::protoc
ARGS --${protobuf_generate_LANGUAGE}_out ${_dll_export_decl}${protobuf_generate_PROTOC_OUT_DIR} ${_dll_desc_out} ${_protobuf_include_path} ${_abs_file}
DEPENDS ${_abs_file} protobuf::protoc
COMMENT "Running ${protobuf_generate_LANGUAGE} protocol buffer compiler on ${_proto}"
VERBATIM )
endforeach()
set_source_files_properties(${_generated_srcs_all} PROPERTIES GENERATED TRUE)
if(protobuf_generate_OUT_VAR)
set(${protobuf_generate_OUT_VAR} ${_generated_srcs_all} PARENT_SCOPE)
endif()
if(protobuf_generate_TARGET)
target_sources(${protobuf_generate_TARGET} PRIVATE ${_generated_srcs_all})
endif()
endfunction()
#!/usr/bin/env bash
set -e
#ccache -s # uncomment to display CCache statistics
mkdir -p /server/build_docker
cd /server/build_docker
cmake -G Ninja /server -DCMAKE_C_COMPILER=`which gcc-9` -DCMAKE_CXX_COMPILER=`which g++-9`
cmake -G Ninja /server "-DCMAKE_C_COMPILER=$(command -v gcc-9)" "-DCMAKE_CXX_COMPILER=$(command -v g++-9)"
# Set the number of build jobs to the half of number of virtual CPU cores (rounded up).
# By default, ninja use all virtual CPU cores, that leads to very high memory consumption without much improvement in build time.
......
......@@ -133,6 +133,10 @@
"name": "yandex/clickhouse-postgresql-java-client",
"dependent": []
},
"docker/test/integration/kerberos_kdc": {
"name": "yandex/clickhouse-kerberos-kdc",
"dependent": []
},
"docker/test/base": {
"name": "yandex/clickhouse-test-base",
"dependent": [
......
......@@ -17,7 +17,8 @@ ccache --show-stats ||:
ccache --zero-stats ||:
ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:
rm -f CMakeCache.txt
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER -DENABLE_CHECK_HEAVY_BUILDS=1 $CMAKE_FLAGS ..
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "$CMAKE_FLAGS" ..
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
ninja $NINJA_FLAGS clickhouse-bundle
mv ./programs/clickhouse* /output
mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
......
......@@ -4,16 +4,16 @@ set -x -e
ccache --show-stats ||:
ccache --zero-stats ||:
build/release --no-pbuilder $ALIEN_PKGS | ts '%Y-%m-%d %H:%M:%S'
build/release --no-pbuilder "$ALIEN_PKGS" | ts '%Y-%m-%d %H:%M:%S'
mv /*.deb /output
mv *.changes /output
mv *.buildinfo /output
mv -- *.changes /output
mv -- *.buildinfo /output
mv /*.rpm /output ||: # if exists
mv /*.tgz /output ||: # if exists
if [ -n "$BINARY_OUTPUT" ] && { [ "$BINARY_OUTPUT" = "programs" ] || [ "$BINARY_OUTPUT" = "tests" ] ;}
then
echo Place $BINARY_OUTPUT to output
echo "Place $BINARY_OUTPUT to output"
mkdir /output/binary ||: # if exists
mv /build/obj-*/programs/clickhouse* /output/binary
if [ "$BINARY_OUTPUT" = "tests" ]
......
......@@ -4,10 +4,10 @@ set -x -e
ccache --show-stats ||:
ccache --zero-stats ||:
build/release --no-pbuilder $ALIEN_PKGS | ts '%Y-%m-%d %H:%M:%S'
build/release --no-pbuilder "$ALIEN_PKGS" | ts '%Y-%m-%d %H:%M:%S'
mv /*.deb /output
mv *.changes /output
mv *.buildinfo /output
mv -- *.changes /output
mv -- *.buildinfo /output
mv /*.rpm /output ||: # if exists
mv /*.tgz /output ||: # if exists
......
......@@ -48,10 +48,15 @@ RUN apt-get update \
tzdata \
--yes --no-install-recommends
# Sanitizer options
# Sanitizer options for services (clickhouse-server)
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7'" >> /etc/environment; \
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
echo "MSAN_OPTIONS='abort_on_error=1'" >> /etc/environment; \
ln -s /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-symbolizer /usr/bin/llvm-symbolizer;
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
# (but w/o verbosity for TSAN, otherwise test.reference will not match)
ENV TSAN_OPTIONS='halt_on_error=1 history_size=7'
ENV UBSAN_OPTIONS='print_stacktrace=1'
ENV MSAN_OPTIONS='abort_on_error=1'
CMD sleep 1
......@@ -58,7 +58,7 @@ function watchdog
echo "Fuzzing run has timed out"
killall clickhouse-client ||:
for x in {1..10}
for _ in {1..10}
do
if ! pgrep -f clickhouse-client
then
......@@ -81,6 +81,9 @@ function fuzz
echo Server started
fuzzer_exit_code=0
# SC2012: Use find instead of ls to better handle non-alphanumeric filenames.
# They are all alphanumeric.
# shellcheck disable=SC2012
./clickhouse-client --query-fuzzer-runs=1000 \
< <(for f in $(ls ch/tests/queries/0_stateless/*.sql | sort -R); do cat "$f"; echo ';'; done) \
> >(tail -10000 > fuzzer.log) \
......
......@@ -16,7 +16,8 @@ RUN apt-get update \
odbc-postgresql \
sqlite3 \
curl \
tar
tar \
krb5-user
RUN rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \
......
# docker build -t yandex/clickhouse-kerberos-kdc .
FROM centos:6.6
# old OS to make is faster and smaller
RUN yum install -y krb5-server krb5-libs krb5-auth-dialog krb5-workstation
EXPOSE 88 749
RUN touch /config.sh
# should be overwritten e.g. via docker_compose volumes
# volumes: /some_path/my_kerberos_config.sh:/config.sh:ro
ENTRYPOINT ["/bin/bash", "/config.sh"]
version: '2.3'
services:
kafka_kerberized_zookeeper:
image: confluentinc/cp-zookeeper:5.2.0
# restart: always
hostname: kafka_kerberized_zookeeper
environment:
ZOOKEEPER_SERVER_ID: 1
ZOOKEEPER_CLIENT_PORT: 2181
ZOOKEEPER_SERVERS: "kafka_kerberized_zookeeper:2888:3888"
KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/zookeeper_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dzookeeper.authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider -Dsun.security.krb5.debug=true"
volumes:
- ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
- /dev/urandom:/dev/random
depends_on:
- kafka_kerberos
security_opt:
- label:disable
kerberized_kafka1:
image: confluentinc/cp-kafka:5.2.0
# restart: always
hostname: kerberized_kafka1
ports:
- "9092:9092"
- "9093:9093"
environment:
KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://:9093
KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:9093
# KAFKA_LISTENERS: INSIDE://kerberized_kafka1:9092,OUTSIDE://kerberized_kafka1:19092
# KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kerberized_kafka1:19092
KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: GSSAPI
KAFKA_SASL_ENABLED_MECHANISMS: GSSAPI
KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: OUTSIDE:SASL_PLAINTEXT,UNSECURED_OUTSIDE:PLAINTEXT,UNSECURED_INSIDE:PLAINTEXT,
KAFKA_INTER_BROKER_LISTENER_NAME: OUTSIDE
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: "kafka_kerberized_zookeeper:2181"
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true"
volumes:
- ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
- /dev/urandom:/dev/random
depends_on:
- kafka_kerberized_zookeeper
- kafka_kerberos
security_opt:
- label:disable
kafka_kerberos:
image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG}
hostname: kafka_kerberos
volumes:
- ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab
- ${KERBERIZED_KAFKA_DIR}/../../kerberos_image_config.sh:/config.sh
- /dev/urandom:/dev/random
ports: [88, 749]
......@@ -7,7 +7,7 @@ set +e
reties=0
while true; do
docker info &>/dev/null && break
reties=$[$reties+1]
reties=$((reties+1))
if [[ $reties -ge 100 ]]; then # 10 sec max
echo "Can't start docker daemon, timeout exceeded." >&2
exit 1;
......@@ -27,6 +27,7 @@ export DOCKER_MYSQL_JAVA_CLIENT_TAG=${DOCKER_MYSQL_JAVA_CLIENT_TAG:=latest}
export DOCKER_MYSQL_JS_CLIENT_TAG=${DOCKER_MYSQL_JS_CLIENT_TAG:=latest}
export DOCKER_MYSQL_PHP_CLIENT_TAG=${DOCKER_MYSQL_PHP_CLIENT_TAG:=latest}
export DOCKER_POSTGRESQL_JAVA_CLIENT_TAG=${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:=latest}
export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest}
cd /ClickHouse/tests/integration
exec "$@"
......@@ -9,7 +9,7 @@ right_version=${2}
if [ "$left_version" == "" ] || [ "$right_version" == "" ]
then
>&2 echo Usage: $(basename "$0") left_version right_version
>&2 echo "Usage: $(basename "$0") left_version right_version"
exit 1
fi
......
......@@ -181,6 +181,9 @@ function run_tests
# Randomize test order.
test_files=$(for f in $test_files; do echo "$f"; done | sort -R)
# Limit profiling time to 10 minutes, not to run for too long.
profile_seconds_left=600
# Run the tests.
test_name="<none>"
for test in $test_files
......@@ -194,15 +197,24 @@ function run_tests
test_name=$(basename "$test" ".xml")
echo test "$test_name"
# Don't profile if we're past the time limit.
# Use awk because bash doesn't support floating point arithmetics.
profile_seconds=$(awk "BEGIN { print ($profile_seconds_left > 0 ? 10 : 0) }")
TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n")
# The grep is to filter out set -x output and keep only time output.
# The '2>&1 >/dev/null' redirects stderr to stdout, and discards stdout.
{ \
time "$script_dir/perf.py" --host localhost localhost --port 9001 9002 \
--runs "$CHPC_RUNS" --max-queries "$CHPC_MAX_QUERIES" \
--profile-seconds "$profile_seconds" \
-- "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" ; \
} 2>&1 >/dev/null | tee >(grep -v ^+ >> "wall-clock-times.tsv") \
|| echo "Test $test_name failed with error code $?" >> "$test_name-err.log"
profile_seconds_left=$(awk -F' ' \
'BEGIN { s = '$profile_seconds_left'; } /^profile-total/ { s -= $2 } END { print s }' \
"$test_name-raw.tsv")
done
unset TIMEFORMAT
......@@ -294,6 +306,7 @@ for test_file in *-raw.tsv
do
test_name=$(basename "$test_file" "-raw.tsv")
sed -n "s/^query\t/$test_name\t/p" < "$test_file" >> "analyze/query-runs.tsv"
sed -n "s/^profile\t/$test_name\t/p" < "$test_file" >> "analyze/query-profiles.tsv"
sed -n "s/^client-time\t/$test_name\t/p" < "$test_file" >> "analyze/client-times.tsv"
sed -n "s/^report-threshold\t/$test_name\t/p" < "$test_file" >> "analyze/report-thresholds.tsv"
sed -n "s/^skipped\t/$test_name\t/p" < "$test_file" >> "analyze/skipped-tests.tsv"
......@@ -658,13 +671,15 @@ create view test_runs as
group by test
;
create table test_times_report engine File(TSV, 'report/test-times.tsv') as
select wall_clock_time_per_test.test, real,
toDecimal64(total_client_time, 3),
create view test_times_view as
select
wall_clock_time_per_test.test test,
real,
total_client_time,
queries,
toDecimal64(query_max, 3),
toDecimal64(real / queries, 3) avg_real_per_query,
toDecimal64(query_min, 3),
query_max,
real / queries avg_real_per_query,
query_min,
runs
from test_time
-- wall clock times are also measured for skipped tests, so don't
......@@ -673,7 +688,43 @@ create table test_times_report engine File(TSV, 'report/test-times.tsv') as
on wall_clock_time_per_test.test = test_time.test
full join test_runs
on test_runs.test = test_time.test
order by avg_real_per_query desc;
;
-- WITH TOTALS doesn't work with INSERT SELECT, so we have to jump through these
-- hoops: https://github.com/ClickHouse/ClickHouse/issues/15227
create view test_times_view_total as
select
'Total' test,
sum(real),
sum(total_client_time),
sum(queries),
max(query_max),
sum(real) / sum(queries) avg_real_per_query,
min(query_min),
-- Totaling the number of runs doesn't make sense, but use the max so
-- that the reporting script doesn't complain about queries being too
-- long.
max(runs)
from test_times_view
;
create table test_times_report engine File(TSV, 'report/test-times.tsv') as
select
test,
toDecimal64(real, 3),
toDecimal64(total_client_time, 3),
queries,
toDecimal64(query_max, 3),
toDecimal64(avg_real_per_query, 3),
toDecimal64(query_min, 3),
runs
from (
select * from test_times_view
union all
select * from test_times_view_total
)
order by test = 'Total' desc, avg_real_per_query desc
;
-- report for all queries page, only main metric
create table all_tests_report engine File(TSV, 'report/all-queries.tsv') as
......@@ -694,13 +745,12 @@ create table all_tests_report engine File(TSV, 'report/all-queries.tsv') as
test, query_index, query_display_name
from queries order by test, query_index;
-- queries for which we will build flamegraphs (see below)
create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
'report/queries-for-flamegraph.tsv') as
select test, query_index from queries where unstable_show or changed_show
;
-- Report of queries that have inconsistent 'short' markings:
-- 1) have short duration, but are not marked as 'short'
-- 2) the reverse -- marked 'short' but take too long.
-- The threshold for 2) is significantly larger than the threshold for 1), to
-- avoid jitter.
create view shortness
as select
(test, query_index) in
......@@ -718,11 +768,6 @@ create view shortness
and times.query_index = query_display_names.query_index
;
-- Report of queries that have inconsistent 'short' markings:
-- 1) have short duration, but are not marked as 'short'
-- 2) the reverse -- marked 'short' but take too long.
-- The threshold for 2) is significantly larger than the threshold for 1), to
-- avoid jitter.
create table inconsistent_short_marking_report
engine File(TSV, 'report/unexpected-query-duration.tsv')
as select
......@@ -759,18 +804,15 @@ create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.ts
" 2> >(tee -a report/errors.log 1>&2)
# Prepare source data for metrics and flamegraphs for unstable queries.
# Prepare source data for metrics and flamegraphs for queries that were profiled
# by perf.py.
for version in {right,left}
do
rm -rf data
clickhouse-local --query "
create view queries_for_flamegraph as
select * from file('report/queries-for-flamegraph.tsv', TSVWithNamesAndTypes,
'test text, query_index int');
create view query_runs as
create view query_profiles as
with 0 as left, 1 as right
select * from file('analyze/query-runs.tsv', TSV,
select * from file('analyze/query-profiles.tsv', TSV,
'test text, query_index int, query_id text, version UInt8, time float')
where version = $version
;
......@@ -782,15 +824,12 @@ create view query_display_names as select * from
create table unstable_query_runs engine File(TSVWithNamesAndTypes,
'unstable-query-runs.$version.rep') as
select query_runs.test test, query_runs.query_index query_index,
select query_profiles.test test, query_profiles.query_index query_index,
query_display_name, query_id
from query_runs
join queries_for_flamegraph on
query_runs.test = queries_for_flamegraph.test
and query_runs.query_index = queries_for_flamegraph.query_index
from query_profiles
left join query_display_names on
query_runs.test = query_display_names.test
and query_runs.query_index = query_display_names.query_index
query_profiles.test = query_display_names.test
and query_profiles.query_index = query_display_names.query_index
;
create view query_log as select *
......
......@@ -10,7 +10,7 @@ mkdir left ||:
left_pr=$1
left_sha=$2
right_pr=$3
# right_pr=$3 not used for now
right_sha=$4
datasets=${CHPC_DATASETS:-"hits1 hits10 hits100 values"}
......
......@@ -18,9 +18,22 @@ import xml.etree.ElementTree as et
from threading import Thread
from scipy import stats
total_start_seconds = time.perf_counter()
stage_start_seconds = total_start_seconds
def reportStageEnd(stage):
global stage_start_seconds, total_start_seconds
current = time.perf_counter()
print(f'stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}')
stage_start_seconds = current
def tsv_escape(s):
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
parser = argparse.ArgumentParser(description='Run performance test.')
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
......@@ -29,16 +42,21 @@ parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated l
parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
args = parser.parse_args()
reportStageEnd('start')
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
tree = et.parse(args.file[0])
root = tree.getroot()
reportStageEnd('parse')
# Process query parameters
subst_elems = root.findall('substitutions/substitution')
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
......@@ -112,15 +130,21 @@ if not args.long:
sys.exit(0)
# Print report threshold for the test if it is set.
ignored_relative_change = 0.05
if 'max_ignored_relative_change' in root.attrib:
print(f'report-threshold\t{root.attrib["max_ignored_relative_change"]}')
ignored_relative_change = float(root.attrib["max_ignored_relative_change"])
print(f'report-threshold\t{ignored_relative_change}')
reportStageEnd('before-connect')
# Open connections
servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)]
servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)]
all_connections = [clickhouse_driver.Client(**server) for server in servers]
for s in servers:
print('server\t{}\t{}'.format(s['host'], s['port']))
for i, s in enumerate(servers):
print(f'server\t{i}\t{s["host"]}\t{s["port"]}')
reportStageEnd('connect')
# Run drop queries, ignoring errors. Do this before all other activity, because
# clickhouse_driver disconnects on error (this is not configurable), and the new
......@@ -135,6 +159,8 @@ for conn_index, c in enumerate(all_connections):
except:
pass
reportStageEnd('drop-1')
# Apply settings.
# If there are errors, report them and continue -- maybe a new test uses a setting
# that is not in master, but the queries can still run. If we have multiple
......@@ -152,6 +178,8 @@ for conn_index, c in enumerate(all_connections):
except:
print(traceback.format_exc(), file=sys.stderr)
reportStageEnd('settings')
# Check tables that should exist. If they don't exist, just skip this test.
tables = [e.text for e in root.findall('preconditions/table_exists')]
for t in tables:
......@@ -164,6 +192,8 @@ for t in tables:
print(f'skipped\t{tsv_escape(skipped_message)}')
sys.exit(0)
reportStageEnd('preconditions')
# Run create and fill queries. We will run them simultaneously for both servers,
# to save time.
# The weird search is to keep the relative order of elements, which matters, and
......@@ -194,6 +224,9 @@ for t in threads:
for t in threads:
t.join()
reportStageEnd('create')
# By default, test all queries.
queries_to_run = range(0, len(test_queries))
if args.max_queries:
......@@ -205,6 +238,7 @@ if args.queries_to_run:
queries_to_run = args.queries_to_run
# Run test queries.
profile_total_seconds = 0
for query_index in queries_to_run:
q = test_queries[query_index]
query_prefix = f'{test_name}.query{query_index}'
......@@ -324,14 +358,30 @@ for query_index in queries_to_run:
client_seconds = time.perf_counter() - start_seconds
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
#print(all_server_times)
#print(stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue)
# Run additional profiling queries to collect profile data, but only if test times appeared to be different.
# We have to do it after normal runs because otherwise it will affect test statistics too much
if len(all_server_times) == 2 and stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue < 0.1:
if len(all_server_times) != 2:
continue
if len(all_server_times[0]) < 3:
# Don't fail if for some reason there are not enough measurements.
continue
pvalue = stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue
median = [statistics.median(t) for t in all_server_times]
# Keep this consistent with the value used in report. Should eventually move
# to (median[1] - median[0]) / min(median), which is compatible with "times"
# difference we use in report (max(median) / min(median)).
relative_diff = (median[1] - median[0]) / median[0]
print(f'diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}')
if abs(relative_diff) < ignored_relative_change or pvalue > 0.05:
continue
# Perform profile runs for fixed amount of time. Don't limit the number
# of runs, because we also have short queries.
profile_start_seconds = time.perf_counter()
run = 0
while True:
while time.perf_counter() - profile_start_seconds < args.profile_seconds:
run_id = f'{query_prefix}.profile{run}'
for conn_index, c in enumerate(this_query_connections):
......@@ -344,14 +394,13 @@ for query_index in queries_to_run:
e.message = run_id + ': ' + e.message
raise
elapsed = c.last_query.elapsed
profile_seconds += elapsed
run += 1
# Don't spend too much time for profile runs
if run > args.runs or profile_seconds > 10:
break
# And don't bother with short queries
profile_total_seconds += time.perf_counter() - profile_start_seconds
print(f'profile-total\t{profile_total_seconds}')
reportStageEnd('run')
# Run drop queries
drop_queries = substitute_parameters(drop_query_templates)
......@@ -359,3 +408,5 @@ for conn_index, c in enumerate(all_connections):
for q in drop_queries:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
reportStageEnd('drop-2')
......@@ -487,7 +487,7 @@ if args.report == 'main':
for r in rows:
anchor = f'{currentTableAnchor()}.{r[0]}'
total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers
if float(r[5]) > allowed_average_run_time * total_runs:
if r[0] != 'Total' and float(r[5]) > allowed_average_run_time * total_runs:
# FIXME should be 15s max -- investigate parallel_insert
slow_average_tests += 1
attrs[5] = f'style="background: {color_bad}"'
......@@ -495,7 +495,7 @@ if args.report == 'main':
else:
attrs[5] = ''
if float(r[4]) > allowed_single_run_time * total_runs:
if r[0] != 'Total' and float(r[4]) > allowed_single_run_time * total_runs:
slow_average_tests += 1
attrs[4] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="./all-queries.html#all-query-times.{r[0]}.0">Some query of the test \'{r[0]}\' is too slow to run. See the all queries report'])
......
......@@ -26,11 +26,12 @@ function start()
fi
timeout 120 service clickhouse-server start
sleep 0.5
counter=$(($counter + 1))
counter=$((counter + 1))
done
}
start
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
/s3downloader --dataset-names $DATASETS
chmod 777 -R /var/lib/clickhouse
clickhouse-client --query "SHOW DATABASES"
......@@ -43,8 +44,8 @@ clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
clickhouse-client --query "SHOW TABLES FROM test"
if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test ; then
SKIP_LIST_OPT="--use-skip-list"
fi
clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
#!/bin/bash
kill_clickhouse () {
kill `pgrep -u clickhouse` 2>/dev/null
kill "$(pgrep -u clickhouse)" 2>/dev/null
for i in {1..10}
for _ in {1..10}
do
if ! kill -0 `pgrep -u clickhouse`; then
if ! kill -0 "$(pgrep -u clickhouse)"; then
echo "No clickhouse process"
break
else
echo "Process" `pgrep -u clickhouse` "still alive"
echo "Process $(pgrep -u clickhouse) still alive"
sleep 10
fi
done
......@@ -20,19 +20,19 @@ start_clickhouse () {
}
wait_llvm_profdata () {
while kill -0 `pgrep llvm-profdata-10`;
while kill -0 "$(pgrep llvm-profdata-10)"
do
echo "Waiting for profdata" `pgrep llvm-profdata-10` "still alive"
echo "Waiting for profdata $(pgrep llvm-profdata-10) still alive"
sleep 3
done
}
merge_client_files_in_background () {
client_files=`ls /client_*profraw 2>/dev/null`
if [ ! -z "$client_files" ]
client_files=$(ls /client_*profraw 2>/dev/null)
if [ -n "$client_files" ]
then
llvm-profdata-10 merge -sparse $client_files -o merged_client_`date +%s`.profraw
rm $client_files
llvm-profdata-10 merge -sparse "$client_files" -o "merged_client_$(date +%s).profraw"
rm "$client_files"
fi
}
......@@ -66,12 +66,13 @@ function start()
fi
timeout 120 service clickhouse-server start
sleep 0.5
counter=$(($counter + 1))
counter=$((counter + 1))
done
}
start
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
if ! /s3downloader --dataset-names $DATASETS; then
echo "Cannot download datatsets"
exit 1
......@@ -100,11 +101,11 @@ LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TA
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
SKIP_LIST_OPT="--use-skip-list"
fi
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
kill_clickhouse
......
......@@ -13,8 +13,8 @@ dpkg -i package_folder/clickhouse-test_*.deb
service clickhouse-server start && sleep 5
if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
SKIP_LIST_OPT="--use-skip-list"
fi
clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
......@@ -13,8 +13,8 @@ dpkg -i package_folder/clickhouse-test_*.deb
service clickhouse-server start && sleep 5
if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
SKIP_LIST_OPT="--use-skip-list"
fi
clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
#!/bin/bash
kill_clickhouse () {
echo "clickhouse pids" `ps aux | grep clickhouse` | ts '%Y-%m-%d %H:%M:%S'
kill `pgrep -u clickhouse` 2>/dev/null
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
kill "$(pgrep -u clickhouse)" 2>/dev/null
for i in {1..10}
for _ in {1..10}
do
if ! kill -0 `pgrep -u clickhouse`; then
if ! kill -0 "$(pgrep -u clickhouse)"; then
echo "No clickhouse process" | ts '%Y-%m-%d %H:%M:%S'
break
else
echo "Process" `pgrep -u clickhouse` "still alive" | ts '%Y-%m-%d %H:%M:%S'
echo "Process $(pgrep -u clickhouse) still alive" | ts '%Y-%m-%d %H:%M:%S'
sleep 10
fi
done
echo "Will try to send second kill signal for sure"
kill `pgrep -u clickhouse` 2>/dev/null
kill "$(pgrep -u clickhouse)" 2>/dev/null
sleep 5
echo "clickhouse pids" `ps aux | grep clickhouse` | ts '%Y-%m-%d %H:%M:%S'
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
}
start_clickhouse () {
......@@ -47,11 +47,11 @@ start_clickhouse
sleep 10
if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
SKIP_LIST_OPT="--use-skip-list"
fi
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
kill_clickhouse
......
......@@ -13,7 +13,7 @@ function stop()
timeout 120 service clickhouse-server stop
# Wait for process to disappear from processlist and also try to kill zombies.
while kill -9 $(pidof clickhouse-server)
while kill -9 "$(pidof clickhouse-server)"
do
echo "Killed clickhouse-server"
sleep 0.5
......@@ -35,17 +35,21 @@ function start()
fi
timeout 120 service clickhouse-server start
sleep 0.5
counter=$(($counter + 1))
counter=$((counter + 1))
done
}
# install test configs
/usr/share/clickhouse-test/config/install.sh
# for clickhouse-server (via service)
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
# for clickhouse-client
export ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'
start
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
/s3downloader --dataset-names $DATASETS
chmod 777 -R /var/lib/clickhouse
clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"
......
......@@ -2,17 +2,19 @@
set -e -x
# Not sure why shellcheck complains that rc is not assigned before it is referenced.
# shellcheck disable=SC2154
trap 'rc=$?; echo EXITED WITH: $rc; exit $rc' EXIT
# CLI option to prevent rebuilding images, just re-run tests with images leftover from previuos time
readonly NO_REBUILD_FLAG="--no-rebuild"
readonly CLICKHOUSE_DOCKER_DIR="$(realpath ${1})"
readonly CLICKHOUSE_DOCKER_DIR="$(realpath "${1}")"
readonly CLICKHOUSE_PACKAGES_ARG="${2}"
CLICKHOUSE_SERVER_IMAGE="${3}"
if [ ${CLICKHOUSE_PACKAGES_ARG} != ${NO_REBUILD_FLAG} ]; then
readonly CLICKHOUSE_PACKAGES_DIR="$(realpath ${2})" # or --no-rebuild
if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then
readonly CLICKHOUSE_PACKAGES_DIR="$(realpath "${2}")" # or --no-rebuild
fi
......@@ -25,7 +27,7 @@ fi
# TODO: optionally mount most recent clickhouse-test and queries directory from local machine
if [ ${CLICKHOUSE_PACKAGES_ARG} != ${NO_REBUILD_FLAG} ]; then
if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then
docker build --network=host \
-f "${CLICKHOUSE_DOCKER_DIR}/test/stateless/clickhouse-statelest-test-runner.Dockerfile" \
--target clickhouse-test-runner-base \
......@@ -49,7 +51,7 @@ fi
if [ -z "${CLICKHOUSE_SERVER_IMAGE}" ]; then
CLICKHOUSE_SERVER_IMAGE="yandex/clickhouse-server:local"
if [ ${CLICKHOUSE_PACKAGES_ARG} != ${NO_REBUILD_FLAG} ]; then
if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then
docker build --network=host \
-f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \
--target clickhouse-server-base \
......
......@@ -7,7 +7,7 @@ set +e
reties=0
while true; do
docker info &>/dev/null && break
reties=$[$reties+1]
reties=$((reties+1))
if [[ $reties -ge 100 ]]; then # 10 sec max
echo "Can't start docker daemon, timeout exceeded." >&2
exit 1;
......
......@@ -165,6 +165,22 @@ Similar to GraphiteMergeTree, the Kafka engine supports extended configuration u
For a list of possible configuration options, see the [librdkafka configuration reference](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Use the underscore (`_`) instead of a dot in the ClickHouse configuration. For example, `check.crcs=true` will be `<check_crcs>true</check_crcs>`.
### Kerberos support {#kafka-kerberos-support}
To deal with Kerberos-aware Kafka, add `security_protocol` child element with `sasl_plaintext` value. It is enough if Kerberos ticket-granting ticket is obtained and cached by OS facilities.
ClickHouse is able to maintain Kerberos credentials using a keytab file. Consider `sasl_kerberos_service_name`, `sasl_kerberos_keytab`, `sasl_kerberos_principal` and `sasl.kerberos.kinit.cmd` child elements.
Example:
``` xml
<!-- Kerberos-aware Kafka -->
<kafka>
<security_protocol>SASL_PLAINTEXT</security_protocol>
<sasl_kerberos_keytab>/home/kafkauser/kafkauser.keytab</sasl_kerberos_keytab>
<sasl_kerberos_principal>kafkauser/kafkahost@EXAMPLE.COM</sasl_kerberos_principal>
</kafka>
```
## Virtual Columns {#virtual-columns}
- `_topic` — Kafka topic.
......
......@@ -33,6 +33,7 @@ Columns:
- `'ExceptionWhileProcessing' = 4` — Exception during the query execution.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Query starting date.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time.
- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time with microseconds precision.
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution.
- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution in milliseconds.
......@@ -84,16 +85,18 @@ Columns:
**Example**
``` sql
SELECT * FROM system.query_log LIMIT 1 FORMAT Vertical;
SELECT * FROM system.query_log LIMIT 1 \G
```
``` text
Row 1:
──────
type: QueryStart
event_date: 2020-05-13
event_time: 2020-05-13 14:02:28
query_start_time: 2020-05-13 14:02:28
event_date: 2020-09-11
event_time: 2020-09-11 10:08:17
event_time_microseconds: 2020-09-11 10:08:17.063321
query_start_time: 2020-09-11 10:08:17
query_start_time_microseconds: 2020-09-11 10:08:17.063321
query_duration_ms: 0
read_rows: 0
read_bytes: 0
......@@ -102,36 +105,37 @@ written_bytes: 0
result_rows: 0
result_bytes: 0
memory_usage: 0
query: SELECT 1
current_database: default
query: INSERT INTO test1 VALUES
exception_code: 0
exception:
stack_trace:
is_initial_query: 1
user: default
query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a
query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
address: ::ffff:127.0.0.1
port: 57720
port: 33452
initial_user: default
initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a
initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
initial_address: ::ffff:127.0.0.1
initial_port: 57720
initial_port: 33452
interface: 1
os_user: bayonet
client_hostname: clickhouse.ru-central1.internal
client_name: ClickHouse client
client_revision: 54434
os_user: bharatnc
client_hostname: tower
client_name: ClickHouse
client_revision: 54437
client_version_major: 20
client_version_minor: 4
client_version_patch: 1
client_version_minor: 7
client_version_patch: 2
http_method: 0
http_user_agent:
quota_key:
revision: 54434
revision: 54440
thread_ids: []
ProfileEvents.Names: []
ProfileEvents.Values: []
Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage']
Settings.Values: ['0','random','1','10000000000']
Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage','allow_introspection_functions']
Settings.Values: ['0','random','1','10000000000','1']
```
**See Also**
......
......@@ -15,6 +15,7 @@ Columns:
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query.
- `event_time_microsecinds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision.
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution.
- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution.
......@@ -63,50 +64,51 @@ Columns:
**Example**
``` sql
SELECT * FROM system.query_thread_log LIMIT 1 FORMAT Vertical
SELECT * FROM system.query_thread_log LIMIT 1 \G
```
``` text
Row 1:
──────
event_date: 2020-05-13
event_time: 2020-05-13 14:02:28
query_start_time: 2020-05-13 14:02:28
query_duration_ms: 0
read_rows: 1
read_bytes: 1
written_rows: 0
written_bytes: 0
memory_usage: 0
peak_memory_usage: 0
thread_name: QueryPipelineEx
thread_id: 28952
master_thread_id: 28924
query: SELECT 1
event_date: 2020-09-11
event_time: 2020-09-11 10:08:17
event_time_microseconds: 2020-09-11 10:08:17.134042
query_start_time: 2020-09-11 10:08:17
query_start_time_microseconds: 2020-09-11 10:08:17.063150
query_duration_ms: 70
read_rows: 0
read_bytes: 0
written_rows: 1
written_bytes: 12
memory_usage: 4300844
peak_memory_usage: 4300844
thread_name: TCPHandler
thread_id: 638133
master_thread_id: 638133
query: INSERT INTO test1 VALUES
is_initial_query: 1
user: default
query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a
query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
address: ::ffff:127.0.0.1
port: 57720
port: 33452
initial_user: default
initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a
initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef
initial_address: ::ffff:127.0.0.1
initial_port: 57720
initial_port: 33452
interface: 1
os_user: bayonet
client_hostname: clickhouse.ru-central1.internal
client_name: ClickHouse client
client_revision: 54434
os_user: bharatnc
client_hostname: tower
client_name: ClickHouse
client_revision: 54437
client_version_major: 20
client_version_minor: 4
client_version_patch: 1
client_version_minor: 7
client_version_patch: 2
http_method: 0
http_user_agent:
quota_key:
revision: 54434
ProfileEvents.Names: ['ContextLock','RealTimeMicroseconds','UserTimeMicroseconds','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds']
ProfileEvents.Values: [1,97,81,5,81]
...
revision: 54440
ProfileEvents.Names: ['Query','InsertQuery','FileOpen','WriteBufferFromFileDescriptorWrite','WriteBufferFromFileDescriptorWriteBytes','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','FunctionExecute','CreatedWriteBufferOrdinary','DiskWriteElapsedMicroseconds','NetworkReceiveElapsedMicroseconds','NetworkSendElapsedMicroseconds','InsertedRows','InsertedBytes','SelectedRows','SelectedBytes','MergeTreeDataWriterRows','MergeTreeDataWriterUncompressedBytes','MergeTreeDataWriterCompressedBytes','MergeTreeDataWriterBlocks','MergeTreeDataWriterBlocksAlreadySorted','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SoftPageFaults','OSCPUVirtualTimeMicroseconds','OSWriteBytes','OSReadChars','OSWriteChars']
ProfileEvents.Values: [1,1,11,11,591,148,3,71,29,6533808,1,11,72,18,47,1,12,1,12,1,12,189,1,1,10,2,70853,2748,49,2747,45056,422,1520]
```
**See Also**
......
......@@ -6,6 +6,7 @@ Columns:
- `event_date` (Date) — Date of the entry.
- `event_time` (DateTime) — Time of the entry.
- `event_time_microseconds` (DateTime) — Time of the entry with microseconds precision.
- `microseconds` (UInt32) — Microseconds of the entry.
- `thread_name` (String) — Name of the thread from which the logging was done.
- `thread_id` (UInt64) — OS thread ID.
......@@ -25,4 +26,28 @@ Columns:
- `source_file` (LowCardinality(String)) — Source file from which the logging was done.
- `source_line` (UInt64) — Source line from which the logging was done.
**Example**
``` sql
SELECT * FROM system.text_log LIMIT 1 \G
```
``` text
Row 1:
──────
event_date: 2020-09-10
event_time: 2020-09-10 11:23:07
event_time_microseconds: 2020-09-10 11:23:07.871397
microseconds: 871397
thread_name: clickhouse-serv
thread_id: 564917
level: Information
query_id:
logger_name: DNSCacheUpdater
message: Update period 15 seconds
revision: 54440
source_file: /ClickHouse/src/Interpreters/DNSCacheUpdater.cpp; void DB::DNSCacheUpdater::start()
source_line: 45
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/text_log) <!--hide-->
\ No newline at end of file
......@@ -12,6 +12,8 @@ Columns:
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment.
- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment with microseconds precision.
- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds.
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision.
......@@ -38,13 +40,16 @@ SELECT * FROM system.trace_log LIMIT 1 \G
``` text
Row 1:
──────
event_date: 2019-11-15
event_time: 2019-11-15 15:09:38
revision: 54428
timer_type: Real
thread_number: 48
query_id: acc4d61f-5bd1-4a3e-bc91-2180be37c915
trace: [94222141367858,94222152240175,94222152325351,94222152329944,94222152330796,94222151449980,94222144088167,94222151682763,94222144088167,94222151682763,94222144088167,94222144058283,94222144059248,94222091840750,94222091842302,94222091831228,94222189631488,140509950166747,140509942945935]
event_date: 2020-09-10
event_time: 2020-09-10 11:23:09
event_time_microseconds: 2020-09-10 11:23:09.872924
timestamp_ns: 1599762189872924510
revision: 54440
trace_type: Memory
thread_id: 564963
query_id:
trace: [371912858,371912789,371798468,371799717,371801313,371790250,624462773,566365041,566440261,566445834,566460071,566459914,566459842,566459580,566459469,566459389,566459341,566455774,371993941,371988245,372158848,372187428,372187309,372187093,372185478,140222123165193,140222122205443]
size: 5244400
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/trace_log) <!--hide-->
\ No newline at end of file
......@@ -357,7 +357,7 @@ SELECT date_trunc('hour', now())
## now {#now}
Accepts zero arguments and returns the current time at one of the moments of request execution.
Accepts zero or one arguments(timezone) and returns the current time at one of the moments of request execution, or current time of specific timezone at one of the moments of request execution if `timezone` argument provided.
This function returns a constant, even if the request took a long time to complete.
## today {#today}
......
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
machine_translated: false
machine_translated_rev:
toc_priority: 0
toc_title: "Descripci\xF3n"
toc_title: "Descripción"
---
# ¿Qué es ClickHouse? {#what-is-clickhouse}
ClickHouse es un sistema de gestión de bases de datos orientado a columnas (DBMS) para el procesamiento analítico en línea de consultas (OLAP).
ClickHouse es un sistema de gestión de bases de datos (DBMS), orientado a columnas, para el procesamiento analítico de consultas en línea (OLAP).
En un “normal” DBMS orientado a filas, los datos se almacenan en este orden:
En un DBMS “normal”, orientado a filas, los datos se almacenan en este orden:
| Fila | Argumento | JavaEnable | Titular | GoodEvent | EventTime |
|------|-------------|------------|---------------------------|-----------|---------------------|
......@@ -36,7 +36,7 @@ Estos ejemplos solo muestran el orden en el que se organizan los datos. Los valo
Ejemplos de un DBMS orientado a columnas: Vertica, Paraccel (Actian Matrix y Amazon Redshift), Sybase IQ, Exasol, Infobright, InfiniDB, MonetDB (VectorWise y Actian Vector), LucidDB, SAP HANA, Google Dremel, Google PowerDrill, Druid y kdb+.
Different orders for storing data are better suited to different scenarios. The data access scenario refers to what queries are made, how often, and in what proportion; how much data is read for each type of query – rows, columns, and bytes; the relationship between reading and updating data; the working size of the data and how locally it is used; whether transactions are used, and how isolated they are; requirements for data replication and logical integrity; requirements for latency and throughput for each type of query, and so on.
Los diferentes modos de ordenar los datos al guardarlos se adecúan mejor a diferentes escenarios. El escenario de acceso a los datos se refiere a qué consultas se hacen, con qué frecuencia y en qué proporción; cuántos datos se leen para cada tipo de consulta - filas, columnas y bytes; la relación entre lectura y actualización de datos; el tamaño de trabajo de los datos y qué tan localmente son usados; si se usan transacciones y qué tan aisladas están;requerimientos de replicación de los datos y de integridad lógica, requerimientos de latencia y caudal (throughput) para cada tipo de consulta, y cosas por el estilo.
Cuanto mayor sea la carga en el sistema, más importante es personalizar el sistema configurado para que coincida con los requisitos del escenario de uso, y más fino será esta personalización. No existe un sistema que sea igualmente adecuado para escenarios significativamente diferentes. Si un sistema es adaptable a un amplio conjunto de escenarios, bajo una carga alta, el sistema manejará todos los escenarios igualmente mal, o funcionará bien para solo uno o algunos de los escenarios posibles.
......
# Секция INTO OUTFILE {#into-outfile-clause}
Чтобы перенаправить вывод `SELECT` запроса в указанный файл на стороне клиента, добавьте к нему секцию `INTO OUTFILE filename` (где filenam — строковый литерал).
Чтобы перенаправить вывод `SELECT` запроса в указанный файл на стороне клиента, добавьте к нему секцию `INTO OUTFILE filename` (где filename — строковый литерал).
## Детали реализации {#implementation-details}
......
......@@ -185,7 +185,7 @@ def build(args):
test.test_templates(args.website_dir)
if not args.skip_docs:
generate_cmake_flags_files(os.path.join(os.path.dirname(__file__), '..', '..'))
generate_cmake_flags_files()
build_docs(args)
from github import build_releases
......
......@@ -55,7 +55,7 @@ def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> No
anchor=make_anchor(name),
name=name,
path=path,
line=line if line > 0 else 1)
line=line)
formatted_description: str = "".join(description.split("\n"))
......@@ -66,8 +66,8 @@ def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> No
entities[name] = path, formatted_entity
def process_file(root_path: str, input_name: str) -> None:
with open(os.path.join(root_path, input_name), 'r') as cmake_file:
def process_file(root_path: str, file_path: str, file_name: str) -> None:
with open(os.path.join(file_path, file_name), 'r') as cmake_file:
contents: str = cmake_file.read()
def get_line_and_comment(target: str) -> Tuple[int, str]:
......@@ -75,7 +75,7 @@ def process_file(root_path: str, input_name: str) -> None:
comment: str = ""
for n, line in enumerate(contents_list):
if line.find(target) == -1:
if 'option' not in line.lower() or target not in line:
continue
for maybe_comment_line in contents_list[n - 1::-1]:
......@@ -84,27 +84,35 @@ def process_file(root_path: str, input_name: str) -> None:
comment = re.sub("\s*#\s*", "", maybe_comment_line) + " " + comment
return n, comment
# line numbering starts with 1
return n + 1, comment
matches: Optional[List[Entity]] = re.findall(cmake_option_regex, contents, re.MULTILINE)
file_rel_path_with_name: str = os.path.join(file_path[len(root_path):], file_name)
if file_rel_path_with_name.startswith('/'):
file_rel_path_with_name = file_rel_path_with_name[1:]
if matches:
for entity in matches:
build_entity(os.path.join(root_path[6:], input_name), entity, get_line_and_comment(entity[0]))
build_entity(file_rel_path_with_name, entity, get_line_and_comment(entity[0]))
def process_folder(root_path:str, name: str) -> None:
def process_folder(root_path: str, name: str) -> None:
for root, _, files in os.walk(os.path.join(root_path, name)):
for f in files:
if f == "CMakeLists.txt" or ".cmake" in f:
process_file(root, f)
process_file(root_path, root, f)
def generate_cmake_flags_files() -> None:
root_path: str = os.path.join(os.path.dirname(__file__), '..', '..')
def generate_cmake_flags_files(root_path: str) -> None:
output_file_name: str = os.path.join(root_path, "docs/en/development/cmake-in-clickhouse.md")
header_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_header.md")
footer_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_footer.md")
process_file(root_path, "CMakeLists.txt")
process_file(root_path, "programs/CMakeLists.txt")
process_file(root_path, root_path, "CMakeLists.txt")
process_file(root_path, os.path.join(root_path, "programs"), "CMakeLists.txt")
process_folder(root_path, "base")
process_folder(root_path, "cmake")
......@@ -132,14 +140,14 @@ def generate_cmake_flags_files(root_path: str) -> None:
f.write(entities[k][1] + "\n")
ignored_keys.append(k)
f.write("\n### External libraries system/bundled mode\n" + table_header)
f.write("\n\n### External libraries system/bundled mode\n" + table_header)
for k in sorted_keys:
if k.startswith("USE_INTERNAL_"):
f.write(entities[k][1] + "\n")
ignored_keys.append(k)
f.write("\n### Other flags\n" + table_header)
f.write("\n\n### Other flags\n" + table_header)
for k in sorted(set(sorted_keys).difference(set(ignored_keys))):
f.write(entities[k][1] + "\n")
......@@ -149,4 +157,4 @@ def generate_cmake_flags_files(root_path: str) -> None:
if __name__ == '__main__':
generate_cmake_flags_files("../../")
generate_cmake_flags_files()
......@@ -18,7 +18,7 @@ Markdown==3.2.1
MarkupSafe==1.1.1
mkdocs==1.1.2
mkdocs-htmlproofer-plugin==0.0.3
mkdocs-macros-plugin==0.4.13
mkdocs-macros-plugin==0.4.17
nltk==3.5
nose==1.3.7
protobuf==3.13.0
......
......@@ -7,9 +7,11 @@ toc_title: "\u5E94\u7528CatBoost\u6A21\u578B"
# 在ClickHouse中应用Catboost模型 {#applying-catboost-model-in-clickhouse}
[CatBoost](https://catboost.ai) 是一个自由和开源的梯度提升库开发 [Yandex](https://yandex.com/company/) 用于机器学习。
[CatBoost](https://catboost.ai) 是一个用于机器学习的免费开源梯度提升开发库 [Yandex](https://yandex.com/company/)
通过这篇指导,您将学会如何将预先从SQL推理出的运行模型作为训练好的模型应用到ClickHouse中去。
通过此指令,您将学习如何通过从SQL运行模型推理在ClickHouse中应用预先训练好的模型。
在ClickHouse中应用CatBoost模型:
......@@ -18,18 +20,18 @@ toc_title: "\u5E94\u7528CatBoost\u6A21\u578B"
3. [将CatBoost集成到ClickHouse中](#integrate-catboost-into-clickhouse) (可选步骤)。
4. [从SQL运行模型推理](#run-model-inference).
有关训练CatBoost模型的详细信息,请参阅 [培训和应用模型](https://catboost.ai/docs/features/training.html#training).
有关训练CatBoost模型的详细信息,请参阅 [训练和使用模型](https://catboost.ai/docs/features/training.html#training).
## 先决条件 {#prerequisites}
如果你没有 [Docker](https://docs.docker.com/install/) 然而,安装它
请先安装好 [Docker](https://docs.docker.com/install/)
!!! note "注"
[Docker](https://www.docker.com) 是一个软件平台,允许您创建容器,将CatBoost和ClickHouse安装与系统的其余部分隔离。
在应用CatBoost模型之前:
**1.** [码头窗口映像](https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) 从注册表:
**1.** 从容器仓库拉取docker映像 (https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) :
``` bash
$ docker pull yandex/tutorial-catboost-clickhouse
......@@ -126,15 +128,15 @@ FROM amazon_train
CatBoost集成到ClickHouse步骤:
**1.** 构建评估库
**1.** 构建测试库文件
评估CatBoost模型的最快方法是编译 `libcatboostmodel.<so|dll|dylib>` 图书馆. 有关如何构建库的详细信息,请参阅 [CatBoost文件](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html).
测试CatBoost模型的最快方法是编译 `libcatboostmodel.<so|dll|dylib>` 库文件. 有关如何构建库文件的详细信息,请参阅 [CatBoost文件](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html).
**2.** 例如,在任何地方和任何名称创建一个新目录, `data` 并将创建的库放入其中。 Docker映像已经包含了库 `data/libcatboostmodel.so`.
**2.** 任意创建一个新目录, 如 `data` 并将创建的库文件放入其中。 Docker映像已经包含了库 `data/libcatboostmodel.so`.
**3.** 例如,在任何地方和任何名称为config model创建一个新目录, `models`.
**3.** 任意创建一个新目录来放配置模型, 如 `models`.
**4.** 创建具有任意名称的模型配置文件,例如, `models/amazon_model.xml`.
**4.** 任意创建一个模型配置文件,如 `models/amazon_model.xml`.
**5.** 描述模型配置:
......@@ -153,7 +155,7 @@ CatBoost集成到ClickHouse步骤:
</models>
```
**6.** 将CatBoost的路径和模型配置添加到ClickHouse配置:
**6.** 将CatBoost库文件的路径和模型配置添加到ClickHouse配置:
``` xml
<!-- File etc/clickhouse-server/config.d/models_config.xml. -->
......@@ -161,11 +163,11 @@ CatBoost集成到ClickHouse步骤:
<models_config>/home/catboost/models/*_model.xml</models_config>
```
## 4. 从SQL运行模型推理 {#run-model-inference}
## 4. 运行从SQL推理的模型 {#run-model-inference}
对于测试模型,运行ClickHouse客户端 `$ clickhouse client`.
测试模型是否正常,运行ClickHouse客户端 `$ clickhouse client`.
让我们确保模型正常工作:
让我们确保模型正常工作:
``` sql
:) SELECT
......@@ -185,7 +187,7 @@ LIMIT 10
```
!!! note "注"
功能 [模型值](../sql-reference/functions/other-functions.md#function-modelevaluate) 返回带有多类模型的每类原始预测的元组。
函数 [modelEvaluate](../sql-reference/functions/other-functions.md#function-modelevaluate) 返回带有多类模型的每类原始预测的元组。
让我们预测一下:
......@@ -208,7 +210,7 @@ LIMIT 10
```
!!! note "注"
更多信息 [exp()](../sql-reference/functions/math-functions.md) 功能
查看函数说明 [exp()](../sql-reference/functions/math-functions.md)
让我们计算样本的LogLoss:
......@@ -234,6 +236,6 @@ FROM
```
!!! note "注"
更多信息 [avg()](../sql-reference/aggregate-functions/reference.md#agg_function-avg)[日志()](../sql-reference/functions/math-functions.md) 功能
查看函数说明 [avg()](../sql-reference/aggregate-functions/reference.md#agg_function-avg)[log()](../sql-reference/functions/math-functions.md)
[原始文章](https://clickhouse.tech/docs/en/guides/apply_catboost_model/) <!--hide-->
......@@ -552,7 +552,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
#if defined(__linux__)
fmt::print("Setting capabilities for clickhouse binary. This is optional.\n");
std::string command = fmt::format("command setcap && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {}", main_bin_path.string());
std::string command = fmt::format("command -v setcap && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {}", main_bin_path.string());
fmt::print(" {}\n", command);
executeScript(command);
#endif
......
......@@ -210,7 +210,7 @@ try
/// Maybe useless
if (config().has("macros"))
context->setMacros(std::make_unique<Macros>(config(), "macros"));
context->setMacros(std::make_unique<Macros>(config(), "macros", log));
/// Skip networking
......
......@@ -534,7 +534,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
}
if (config().has("macros"))
global_context->setMacros(std::make_unique<Macros>(config(), "macros"));
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
/// Initialize main config reloader.
std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");
......@@ -559,7 +559,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
//setTextLog(global_context->getTextLog());
//buildLoggers(*config, logger());
global_context->setClustersConfig(config);
global_context->setMacros(std::make_unique<Macros>(*config, "macros"));
global_context->setMacros(std::make_unique<Macros>(*config, "macros", log));
global_context->setExternalAuthenticatorsConfig(*config);
/// Setup protection to avoid accidental DROP for big tables (that are greater than 50 GB by default)
......
......@@ -115,7 +115,7 @@ public:
: IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>(arguments, {})
{
const auto x_arg = arguments.at(0).get();
const auto y_arg = arguments.at(0).get();
const auto y_arg = arguments.at(1).get();
if (!x_arg->isValueRepresentedByNumber() || !y_arg->isValueRepresentedByNumber())
throw Exception("Illegal types of arguments of aggregate function " + getName() + ", must have number representation.",
......
......@@ -2,6 +2,7 @@
#include <Common/Macros.h>
#include <Common/Exception.h>
#include <IO/WriteHelpers.h>
#include <common/logger_useful.h>
namespace DB
......@@ -12,19 +13,32 @@ namespace ErrorCodes
extern const int SYNTAX_ERROR;
}
Macros::Macros(const Poco::Util::AbstractConfiguration & config, const String & root_key)
Macros::Macros(const Poco::Util::AbstractConfiguration & config, const String & root_key, Poco::Logger * log)
{
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(root_key, keys);
for (const String & key : keys)
{
macros[key] = config.getString(root_key + "." + key);
if (key == "database" || key == "table" || key == "uuid")
{
if (log)
LOG_WARNING(log,
"Config file contains '{}' macro. This macro has special meaning "
"and it's explicit definition is not recommended. Implicit unfolding for "
"'database', 'table' and 'uuid' macros will be disabled.",
key);
enable_special_macros = false;
}
}
}
String Macros::expand(const String & s,
MacroExpansionInfo & info) const
{
/// Do not allow recursion if we expand only special macros, because it will be infinite recursion
assert(info.level == 0 || !info.expand_special_macros_only);
if (s.find('{') == String::npos)
return s;
......@@ -34,6 +48,10 @@ String Macros::expand(const String & s,
if (info.level >= 10)
throw Exception("Too deep recursion while expanding macros: '" + s + "'", ErrorCodes::SYNTAX_ERROR);
/// If config file contains explicit special macro, then we do not expand it in this mode.
if (!enable_special_macros && info.expand_special_macros_only)
return s;
String res;
size_t pos = 0;
while (true)
......@@ -59,15 +77,21 @@ String Macros::expand(const String & s,
auto it = macros.find(macro_name);
/// Prefer explicit macros over implicit.
if (it != macros.end())
if (it != macros.end() && !info.expand_special_macros_only)
res += it->second;
else if (macro_name == "database" && !info.database_name.empty())
res += info.database_name;
else if (macro_name == "table" && !info.table_name.empty())
res += info.table_name;
else if (macro_name == "database" && !info.table_id.database_name.empty())
{
res += info.table_id.database_name;
info.expanded_database = true;
}
else if (macro_name == "table" && !info.table_id.table_name.empty())
{
res += info.table_id.table_name;
info.expanded_table = true;
}
else if (macro_name == "uuid")
{
if (info.uuid == UUIDHelpers::Nil)
if (info.table_id.uuid == UUIDHelpers::Nil)
throw Exception("Macro 'uuid' and empty arguments of ReplicatedMergeTree "
"are supported only for ON CLUSTER queries with Atomic database engine",
ErrorCodes::SYNTAX_ERROR);
......@@ -76,12 +100,16 @@ String Macros::expand(const String & s,
/// It becomes impossible to check if {uuid} is contained inside some unknown macro.
if (info.level)
throw Exception("Macro 'uuid' should not be inside another macro", ErrorCodes::SYNTAX_ERROR);
res += toString(info.uuid);
res += toString(info.table_id.uuid);
info.expanded_uuid = true;
}
else if (info.ignore_unknown)
else if (info.ignore_unknown || info.expand_special_macros_only)
{
if (info.expand_special_macros_only)
res += '{';
res += macro_name;
if (info.expand_special_macros_only)
res += '}';
info.has_unknown = true;
}
else
......@@ -93,6 +121,9 @@ String Macros::expand(const String & s,
}
++info.level;
if (info.expand_special_macros_only)
return res;
return expand(res, info);
}
......@@ -113,9 +144,9 @@ String Macros::expand(const String & s) const
String Macros::expand(const String & s, const StorageID & table_id, bool allow_uuid) const
{
MacroExpansionInfo info;
info.database_name = table_id.database_name;
info.table_name = table_id.table_name;
info.uuid = allow_uuid ? table_id.uuid : UUIDHelpers::Nil;
info.table_id = table_id;
if (!allow_uuid)
info.table_id.uuid = UUIDHelpers::Nil;
return expand(s, info);
}
......
......@@ -13,6 +13,7 @@ namespace Poco
{
class AbstractConfiguration;
}
class Logger;
}
......@@ -25,18 +26,19 @@ class Macros
{
public:
Macros() = default;
Macros(const Poco::Util::AbstractConfiguration & config, const String & key);
Macros(const Poco::Util::AbstractConfiguration & config, const String & key, Poco::Logger * log = nullptr);
struct MacroExpansionInfo
{
/// Settings
String database_name;
String table_name;
UUID uuid = UUIDHelpers::Nil;
StorageID table_id = StorageID::createEmpty();
bool ignore_unknown = false;
bool expand_special_macros_only = false;
/// Information about macro expansion
size_t level = 0;
bool expanded_database = false;
bool expanded_table = false;
bool expanded_uuid = false;
bool has_unknown = false;
};
......@@ -64,6 +66,7 @@ public:
private:
MacroMap macros;
bool enable_special_macros = true;
};
......
......@@ -35,12 +35,14 @@ namespace ErrorCodes
extern const int CANNOT_CREATE_CHILD_PROCESS;
}
ShellCommand::ShellCommand(pid_t pid_, int in_fd_, int out_fd_, int err_fd_, bool terminate_in_destructor_)
ShellCommand::ShellCommand(pid_t pid_, int & in_fd_, int & out_fd_, int & err_fd_, bool terminate_in_destructor_)
: pid(pid_)
, terminate_in_destructor(terminate_in_destructor_)
, in(in_fd_)
, out(out_fd_)
, err(err_fd_) {}
, err(err_fd_)
{
}
Poco::Logger * ShellCommand::getLogger()
{
......@@ -144,12 +146,6 @@ std::unique_ptr<ShellCommand> ShellCommand::executeImpl(
pid, pipe_stdin.fds_rw[1], pipe_stdout.fds_rw[0], pipe_stderr.fds_rw[0], terminate_in_destructor));
LOG_TRACE(getLogger(), "Started shell command '{}' with pid {}", filename, pid);
/// Now the ownership of the file descriptors is passed to the result.
pipe_stdin.fds_rw[1] = -1;
pipe_stdout.fds_rw[0] = -1;
pipe_stderr.fds_rw[0] = -1;
return res;
}
......
......@@ -30,7 +30,7 @@ private:
bool wait_called = false;
bool terminate_in_destructor;
ShellCommand(pid_t pid_, int in_fd_, int out_fd_, int err_fd_, bool terminate_in_destructor_);
ShellCommand(pid_t pid_, int & in_fd_, int & out_fd_, int & err_fd_, bool terminate_in_destructor_);
static Poco::Logger * getLogger();
......
......@@ -13,7 +13,6 @@ inline UInt64 clock_gettime_ns(clockid_t clock_type = CLOCK_MONOTONIC)
return UInt64(ts.tv_sec * 1000000000LL + ts.tv_nsec);
}
/** Differs from Poco::Stopwatch only by using 'clock_gettime' instead of 'gettimeofday',
* returns nanoseconds instead of microseconds, and also by other minor differencies.
*/
......
......@@ -234,10 +234,16 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
std::is_same_v<Thread, std::thread> ? CurrentMetrics::GlobalThreadActive : CurrentMetrics::LocalThreadActive);
job();
/// job should be reseted before decrementing scheduled_jobs to
/// ensure that the Job destroyed before wait() returns.
job = {};
}
catch (...)
{
/// job should be reseted before decrementing scheduled_jobs to
/// ensure that the Job destroyed before wait() returns.
job = {};
{
std::unique_lock lock(mutex);
if (!first_exception)
......
......@@ -11,6 +11,7 @@
#include <Poco/Event.h>
#include <Common/ThreadStatus.h>
#include <ext/scope_guard.h>
/** Very simple thread pool similar to boost::threadpool.
......@@ -161,21 +162,19 @@ public:
GlobalThreadPool::instance().scheduleOrThrow([
state = state,
func = std::forward<Function>(func),
args = std::make_tuple(std::forward<Args>(args)...)]
{
try
args = std::make_tuple(std::forward<Args>(args)...)]() mutable /// mutable is needed to destroy capture
{
SCOPE_EXIT(state->set());
/// This moves are needed to destroy function and arguments before exit.
/// It will guarantee that after ThreadFromGlobalPool::join all captured params are destroyed.
auto function = std::move(func);
auto arguments = std::move(args);
/// Thread status holds raw pointer on query context, thus it always must be destroyed
/// before sending signal that permits to join this thread.
DB::ThreadStatus thread_status;
std::apply(func, args);
}
catch (...)
{
state->set();
throw;
}
state->set();
std::apply(function, arguments);
});
}
......
......@@ -141,8 +141,14 @@ void TraceCollector::run()
if (trace_log)
{
UInt64 time = clock_gettime_ns(CLOCK_REALTIME);
TraceLogElement element{time_t(time / 1000000000), time, trace_type, thread_id, query_id, trace, size};
// time and time_in_microseconds are both being constructed from the same timespec so that the
// times will be equal upto the precision of a second.
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
UInt64 time = UInt64(ts.tv_sec * 1000000000LL + ts.tv_nsec);
UInt64 time_in_microseconds = UInt64((ts.tv_sec * 1000000LL) + (ts.tv_nsec / 1000));
TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size};
trace_log->add(element);
}
}
......
......@@ -663,7 +663,7 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition &
{
WaitForDisappearStatePtr state = std::make_shared<WaitForDisappearState>();
auto callback = [state](const Coordination::ExistsResponse & response)
auto callback = [state](const Coordination::GetResponse & response)
{
state->code = int32_t(response.error);
if (state->code)
......@@ -683,8 +683,9 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition &
while (!condition || !condition())
{
/// NOTE: if the node doesn't exist, the watch will leak.
impl->exists(path, callback, watch);
/// Use getData insteand of exists to avoid watch leak.
impl->get(path, callback, watch);
if (!condition)
state->event.wait();
else if (!state->event.tryWait(1000))
......
......@@ -422,6 +422,18 @@ void ZooKeeperRequest::write(WriteBuffer & out) const
}
static void removeRootPath(String & path, const String & root_path)
{
if (root_path.empty())
return;
if (path.size() <= root_path.size())
throw Exception("Received path is not longer than root_path", Error::ZDATAINCONSISTENCY);
path = path.substr(root_path.size());
}
struct ZooKeeperResponse : virtual Response
{
virtual ~ZooKeeperResponse() override = default;
......@@ -1092,8 +1104,6 @@ void ZooKeeper::sendThread()
{
info.request->has_watch = true;
CurrentMetrics::add(CurrentMetrics::ZooKeeperWatch);
std::lock_guard lock(watches_mutex);
watches[info.request->getPath()].emplace_back(std::move(info.watch));
}
if (expired)
......@@ -1278,6 +1288,30 @@ void ZooKeeper::receiveEvent()
response->removeRootPath(root_path);
}
/// Instead of setting the watch in sendEvent, set it in receiveEvent becuase need to check the response.
/// The watch shouldn't be set if the node does not exist and it will never exist like sequential ephemeral nodes.
/// By using getData() instead of exists(), a watch won't be set if the node doesn't exist.
if (request_info.watch)
{
bool add_watch = false;
/// 3 indicates the ZooKeeperExistsRequest.
// For exists, we set the watch on both node exist and nonexist case.
// For other case like getData, we only set the watch when node exists.
if (request_info.request->getOpNum() == 3)
add_watch = (response->error == Error::ZOK || response->error == Error::ZNONODE);
else
add_watch = response->error == Error::ZOK;
if (add_watch)
{
/// The key of wathces should exclude the root_path
String req_path = request_info.request->getPath();
removeRootPath(req_path, root_path);
std::lock_guard lock(watches_mutex);
watches[req_path].emplace_back(std::move(request_info.watch));
}
}
int32_t actual_length = in->count() - count_before_event;
if (length != actual_length)
throw Exception("Response length doesn't match. Expected: " + toString(length) + ", actual: " + toString(actual_length), Error::ZMARSHALLINGERROR);
......
......@@ -136,7 +136,7 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_
namespace
{
UInt8 getDeltaBytesSize(DataTypePtr column_type)
UInt8 getDeltaBytesSize(const IDataType * column_type)
{
if (!column_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Delta is not applicable for {} because the data type is not of fixed size",
......@@ -155,7 +155,7 @@ UInt8 getDeltaBytesSize(DataTypePtr column_type)
void registerCodecDelta(CompressionCodecFactory & factory)
{
UInt8 method_code = UInt8(CompressionMethodByte::Delta);
factory.registerCompressionCodecWithType("Delta", method_code, [&](const ASTPtr & arguments, DataTypePtr column_type) -> CompressionCodecPtr
factory.registerCompressionCodecWithType("Delta", method_code, [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
{
UInt8 delta_bytes_size = 0;
......
......@@ -307,7 +307,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
}
}
UInt8 getDataBytesSize(DataTypePtr column_type)
UInt8 getDataBytesSize(const IDataType * column_type)
{
if (!column_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec DoubleDelta is not applicable for {} because the data type is not of fixed size",
......@@ -413,7 +413,7 @@ void registerCodecDoubleDelta(CompressionCodecFactory & factory)
{
UInt8 method_code = UInt8(CompressionMethodByte::DoubleDelta);
factory.registerCompressionCodecWithType("DoubleDelta", method_code,
[&](const ASTPtr & arguments, DataTypePtr column_type) -> CompressionCodecPtr
[&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
{
if (arguments)
throw Exception("Codec DoubleDelta does not accept any arguments", ErrorCodes::BAD_ARGUMENTS);
......
......@@ -222,7 +222,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
}
}
UInt8 getDataBytesSize(DataTypePtr column_type)
UInt8 getDataBytesSize(const IDataType * column_type)
{
if (!column_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Gorilla is not applicable for {} because the data type is not of fixed size",
......@@ -329,7 +329,7 @@ void registerCodecGorilla(CompressionCodecFactory & factory)
{
UInt8 method_code = UInt8(CompressionMethodByte::Gorilla);
factory.registerCompressionCodecWithType("Gorilla", method_code,
[&](const ASTPtr & arguments, DataTypePtr column_type) -> CompressionCodecPtr
[&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
{
if (arguments)
throw Exception("Codec Gorilla does not accept any arguments", ErrorCodes::BAD_ARGUMENTS);
......
......@@ -136,7 +136,7 @@ TypeIndex baseType(TypeIndex type_idx)
return TypeIndex::Nothing;
}
TypeIndex typeIdx(const DataTypePtr & data_type)
TypeIndex typeIdx(const IDataType * data_type)
{
if (!data_type)
return TypeIndex::Nothing;
......@@ -656,7 +656,7 @@ void CompressionCodecT64::updateHash(SipHash & hash) const
void registerCodecT64(CompressionCodecFactory & factory)
{
auto reg_func = [&](const ASTPtr & arguments, DataTypePtr type) -> CompressionCodecPtr
auto reg_func = [&](const ASTPtr & arguments, const IDataType * type) -> CompressionCodecPtr
{
Variant variant = Variant::Byte;
......@@ -683,7 +683,7 @@ void registerCodecT64(CompressionCodecFactory & factory)
auto type_idx = typeIdx(type);
if (type && type_idx == TypeIndex::Nothing)
throw Exception("T64 codec is not supported for specified type", ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
throw Exception("T64 codec is not supported for specified type " + type->getName(), ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
return std::make_shared<CompressionCodecT64>(type_idx, variant);
};
......
......@@ -6,6 +6,7 @@
#include <IO/ReadBuffer.h>
#include <Parsers/queryToString.h>
#include <Compression/CompressionCodecMultiple.h>
#include <Compression/CompressionCodecNone.h>
#include <IO/WriteHelpers.h>
#include <boost/algorithm/string/join.hpp>
......@@ -57,7 +58,7 @@ void CompressionCodecFactory::validateCodec(const String & family_name, std::opt
}
}
ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr & ast, DataTypePtr column_type, bool sanity_check) const
ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const IDataType * column_type, bool sanity_check) const
{
if (const auto * func = ast->as<ASTFunction>())
{
......@@ -67,6 +68,7 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr
bool has_none = false;
std::optional<size_t> generic_compression_codec_pos;
bool can_substitute_codec_arguments = true;
for (size_t i = 0; i < func->arguments->children.size(); ++i)
{
const auto & inner_codec_ast = func->arguments->children[i];
......@@ -99,7 +101,34 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr
}
else
{
result_codec = getImpl(codec_family_name, codec_arguments, column_type);
if (column_type)
{
CompressionCodecPtr prev_codec;
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & substream_type)
{
if (IDataType::isSpecialCompressionAllowed(substream_path))
{
result_codec = getImpl(codec_family_name, codec_arguments, &substream_type);
/// Case for column Tuple, which compressed with codec which depends on data type, like Delta.
/// We cannot substitute parameters for such codecs.
if (prev_codec && prev_codec->getHash() != result_codec->getHash())
can_substitute_codec_arguments = false;
prev_codec = result_codec;
}
};
IDataType::SubstreamPath stream_path;
column_type->enumerateStreams(callback, stream_path);
if (!result_codec)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find any substream with data type for type {}. It's a bug", column_type->getName());
}
else
{
result_codec = getImpl(codec_family_name, codec_arguments, nullptr);
}
codecs_descriptions->children.emplace_back(result_codec->getCodecDesc());
}
......@@ -140,16 +169,30 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", ErrorCodes::BAD_ARGUMENTS);
}
/// For columns with nested types like Tuple(UInt32, UInt64) we
/// obviously cannot substitute parameters for codecs which depend on
/// data type, because for the first column Delta(4) is suitable and
/// Delta(8) for the second. So we should leave codec description as is
/// and deduce them in get method for each subtype separately. For all
/// other types it's better to substitute parameters, for better
/// readability and backward compatibility.
if (can_substitute_codec_arguments)
{
std::shared_ptr<ASTFunction> result = std::make_shared<ASTFunction>();
result->name = "CODEC";
result->arguments = codecs_descriptions;
return result;
}
else
{
return ast;
}
}
throw Exception("Unknown codec family: " + queryToString(ast), ErrorCodes::UNKNOWN_CODEC);
}
CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, DataTypePtr column_type, CompressionCodecPtr current_default) const
CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, const IDataType * column_type, CompressionCodecPtr current_default, bool only_generic) const
{
if (current_default == nullptr)
current_default = default_codec;
......@@ -175,10 +218,16 @@ CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, DataTypePtr
else
throw Exception("Unexpected AST element for compression codec", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
CompressionCodecPtr codec;
if (codec_family_name == DEFAULT_CODEC_NAME)
codecs.emplace_back(current_default);
codec = current_default;
else
codecs.emplace_back(getImpl(codec_family_name, codec_arguments, column_type));
codec = getImpl(codec_family_name, codec_arguments, column_type);
if (only_generic && !codec->isGenericCompression())
continue;
codecs.emplace_back(codec);
}
CompressionCodecPtr res;
......@@ -187,6 +236,8 @@ CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, DataTypePtr
return codecs.back();
else if (codecs.size() > 1)
return std::make_shared<CompressionCodecMultiple>(codecs);
else
return std::make_shared<CompressionCodecNone>();
}
throw Exception("Unexpected AST structure for compression codec: " + queryToString(ast), ErrorCodes::UNEXPECTED_AST_STRUCTURE);
......@@ -203,7 +254,7 @@ CompressionCodecPtr CompressionCodecFactory::get(const uint8_t byte_code) const
}
CompressionCodecPtr CompressionCodecFactory::getImpl(const String & family_name, const ASTPtr & arguments, DataTypePtr column_type) const
CompressionCodecPtr CompressionCodecFactory::getImpl(const String & family_name, const ASTPtr & arguments, const IDataType * column_type) const
{
if (family_name == "Multiple")
throw Exception("Codec Multiple cannot be specified directly", ErrorCodes::UNKNOWN_CODEC);
......@@ -235,7 +286,7 @@ void CompressionCodecFactory::registerCompressionCodecWithType(
void CompressionCodecFactory::registerCompressionCodec(const String & family_name, std::optional<uint8_t> byte_code, Creator creator)
{
registerCompressionCodecWithType(family_name, byte_code, [family_name, creator](const ASTPtr & ast, DataTypePtr /* data_type */)
registerCompressionCodecWithType(family_name, byte_code, [family_name, creator](const ASTPtr & ast, const IDataType * /* data_type */)
{
return creator(ast);
});
......
......@@ -26,7 +26,7 @@ class CompressionCodecFactory final : private boost::noncopyable
{
protected:
using Creator = std::function<CompressionCodecPtr(const ASTPtr & parameters)>;
using CreatorWithType = std::function<CompressionCodecPtr(const ASTPtr & parameters, DataTypePtr column_type)>;
using CreatorWithType = std::function<CompressionCodecPtr(const ASTPtr & parameters, const IDataType * column_type)>;
using SimpleCreator = std::function<CompressionCodecPtr()>;
using CompressionCodecsDictionary = std::unordered_map<String, CreatorWithType>;
using CompressionCodecsCodeDictionary = std::unordered_map<uint8_t, CreatorWithType>;
......@@ -38,7 +38,13 @@ public:
CompressionCodecPtr getDefaultCodec() const;
/// Validate codecs AST specified by user and parses codecs description (substitute default parameters)
ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, DataTypePtr column_type, bool sanity_check) const;
ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const IDataType * column_type, bool sanity_check) const;
/// Just wrapper for previous method.
ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check) const
{
return validateCodecAndGetPreprocessedAST(ast, column_type.get(), sanity_check);
}
/// Validate codecs AST specified by user
void validateCodec(const String & family_name, std::optional<int> level, bool sanity_check) const;
......@@ -47,8 +53,18 @@ public:
/// information about type to improve inner settings, but every codec should
/// be able to work without information about type. Also AST can contain
/// codec, which can be alias to current default codec, which can be changed
/// in runtime.
CompressionCodecPtr get(const ASTPtr & ast, DataTypePtr column_type, CompressionCodecPtr current_default = nullptr) const;
/// in runtime. If only_generic is true than method will filter all
/// isGenericCompression() == false codecs from result. If nothing found
/// will return codec NONE. It's useful for auxiliary parts of complex columns
/// like Nullable, Array and so on. If all codecs are non generic and
/// only_generic = true, than codec NONE will be returned.
CompressionCodecPtr get(const ASTPtr & ast, const IDataType * column_type, CompressionCodecPtr current_default = nullptr, bool only_generic = false) const;
/// Just wrapper for previous method.
CompressionCodecPtr get(const ASTPtr & ast, const DataTypePtr & column_type, CompressionCodecPtr current_default = nullptr, bool only_generic = false) const
{
return get(ast, column_type.get(), current_default, only_generic);
}
/// Get codec by method byte (no params available)
CompressionCodecPtr get(const uint8_t byte_code) const;
......@@ -65,7 +81,7 @@ public:
void registerSimpleCompressionCodec(const String & family_name, std::optional<uint8_t> byte_code, SimpleCreator creator);
protected:
CompressionCodecPtr getImpl(const String & family_name, const ASTPtr & arguments, DataTypePtr column_type) const;
CompressionCodecPtr getImpl(const String & family_name, const ASTPtr & arguments, const IDataType * column_type) const;
private:
CompressionCodecsDictionary family_name_with_codec;
......
......@@ -7,6 +7,7 @@
#include <Common/Exception.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTIdentifier.h>
#include <Compression/CompressionCodecMultiple.h>
namespace DB
......
......@@ -17,7 +17,6 @@ using CompressionCodecPtr = std::shared_ptr<ICompressionCodec>;
using Codecs = std::vector<CompressionCodecPtr>;
class IDataType;
using DataTypePtr = std::shared_ptr<const IDataType>;
/**
* Represents interface for compression codecs like LZ4, ZSTD, etc.
......
......@@ -27,7 +27,7 @@ bool ColumnWithTypeAndName::operator==(const ColumnWithTypeAndName & other) cons
}
void ColumnWithTypeAndName::dumpStructure(WriteBuffer & out) const
void ColumnWithTypeAndName::dumpNameAndType(WriteBuffer & out) const
{
out << name;
......@@ -35,6 +35,11 @@ void ColumnWithTypeAndName::dumpStructure(WriteBuffer & out) const
out << ' ' << type->getName();
else
out << " nullptr";
}
void ColumnWithTypeAndName::dumpStructure(WriteBuffer & out) const
{
dumpNameAndType(out);
if (column)
out << ' ' << column->dumpStructure();
......
......@@ -33,6 +33,7 @@ struct ColumnWithTypeAndName
ColumnWithTypeAndName cloneEmpty() const;
bool operator==(const ColumnWithTypeAndName & other) const;
void dumpNameAndType(WriteBuffer & out) const;
void dumpStructure(WriteBuffer & out) const;
String dumpStructure() const;
};
......
......@@ -86,7 +86,7 @@ struct MultiEnum
return right.operator==(left);
}
template <typename L>
template <typename L, typename = typename std::enable_if<!std::is_same_v<L, MultiEnum>>::type>
friend bool operator!=(L left, MultiEnum right)
{
return !(right.operator==(left));
......
......@@ -353,6 +353,7 @@ class IColumn;
\
M(DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic, "Default database engine.", 0) \
M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \
M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
......@@ -369,7 +370,6 @@ class IColumn;
\
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
M(Bool, use_compact_format_in_distributed_parts_names, false, "Changes format of directories names for distributed table insert parts.", 0) \
M(UInt64, multiple_joins_rewriter_version, 2, "1 or 2. Second rewriter version knows about table columns and keep not clashed names as is.", 0) \
M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \
......@@ -400,6 +400,7 @@ class IColumn;
M(UInt64, mark_cache_min_lifetime, 0, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \
M(Bool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \
M(UInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \
M(UInt64, multiple_joins_rewriter_version, 0, "Obsolete setting, does nothing. Will be removed after 2021-03-31", 0) \
\
M(Bool, force_optimize_skip_unused_shards_no_nested, false, "Obsolete setting, does nothing. Will be removed after 2020-12-01. Use force_optimize_skip_unused_shards_nesting instead.", 0) \
M(Bool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0) \
......@@ -463,7 +464,7 @@ class IColumn;
M(String, format_custom_result_after_delimiter, "", "Suffix after result set (for CustomSeparated format)", 0) \
\
M(String, format_regexp, "", "Regular expression (for Regexp format)", 0) \
M(String, format_regexp_escaping_rule, "Escaped", "Field escaping rule (for Regexp format)", 0) \
M(String, format_regexp_escaping_rule, "Raw", "Field escaping rule (for Regexp format)", 0) \
M(Bool, format_regexp_skip_unmatched, false, "Skip lines unmatched by regular expression (for Regexp format", 0) \
\
M(Bool, output_format_enable_streaming, false, "Enable streaming in output formats that support it.", 0) \
......
......@@ -151,7 +151,7 @@ namespace
void DataTypeArray::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
{
path.push_back(Substream::ArraySizes);
callback(path);
callback(path, *this);
path.back() = Substream::ArrayElements;
nested->enumerateStreams(callback, path);
path.pop_back();
......
......@@ -54,7 +54,7 @@ void DataTypeLowCardinality::enumerateStreams(const StreamCallback & callback, S
path.push_back(Substream::DictionaryKeys);
dictionary_type->enumerateStreams(callback, path);
path.back() = Substream::DictionaryIndexes;
callback(path);
callback(path, *this);
path.pop_back();
}
......@@ -774,7 +774,7 @@ void DataTypeLowCardinality::deserializeTextQuoted(IColumn & column, ReadBuffer
void DataTypeLowCardinality::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeImpl(column, &IDataType::deserializeAsTextEscaped, istr, settings);
deserializeImpl(column, &IDataType::deserializeAsWholeText, istr, settings);
}
void DataTypeLowCardinality::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
......
......@@ -44,7 +44,7 @@ bool DataTypeNullable::onlyNull() const
void DataTypeNullable::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
{
path.push_back(Substream::NullMap);
callback(path);
callback(path, *this);
path.back() = Substream::NullableElements;
nested_data_type->enumerateStreams(callback, path);
path.pop_back();
......
......@@ -130,6 +130,18 @@ String IDataType::getFileNameForStream(const String & column_name, const IDataTy
}
bool IDataType::isSpecialCompressionAllowed(const SubstreamPath & path)
{
for (const Substream & elem : path)
{
if (elem.type == Substream::NullMap
|| elem.type == Substream::ArraySizes
|| elem.type == Substream::DictionaryIndexes)
return false;
}
return true;
}
void IDataType::insertDefaultInto(IColumn & column) const
{
column.insertDefault();
......
......@@ -104,10 +104,11 @@ public:
using SubstreamPath = std::vector<Substream>;
using StreamCallback = std::function<void(const SubstreamPath &)>;
using StreamCallback = std::function<void(const SubstreamPath &, const IDataType &)>;
virtual void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
{
callback(path);
callback(path, *this);
}
void enumerateStreams(const StreamCallback & callback, SubstreamPath && path) const { enumerateStreams(callback, path); }
void enumerateStreams(const StreamCallback & callback) const { enumerateStreams(callback, {}); }
......@@ -442,6 +443,10 @@ public:
static String getFileNameForStream(const String & column_name, const SubstreamPath & path);
/// Substream path supports special compression methods like codec Delta.
/// For all other substreams (like ArraySizes, NullMasks, etc.) we use only
/// generic compression codecs like LZ4.
static bool isSpecialCompressionAllowed(const SubstreamPath & path);
private:
friend class DataTypeFactory;
/// Customize this DataType
......@@ -685,4 +690,3 @@ template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDateTime> = t
template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDateTime64> = true;
}
......@@ -83,7 +83,7 @@ void DatabaseAtomic::attachTable(const String & name, const StoragePtr & table,
assert(relative_table_path != data_path && !relative_table_path.empty());
DetachedTables not_in_use;
std::unique_lock lock(mutex);
not_in_use = cleenupDetachedTables();
not_in_use = cleanupDetachedTables();
auto table_id = table->getStorageID();
assertDetachedTableNotInUse(table_id.uuid);
DatabaseWithDictionaries::attachTableUnlocked(name, table, lock);
......@@ -97,7 +97,7 @@ StoragePtr DatabaseAtomic::detachTable(const String & name)
auto table = DatabaseWithDictionaries::detachTableUnlocked(name, lock);
table_name_to_path.erase(name);
detached_tables.emplace(table->getStorageID().uuid, table);
not_in_use = cleenupDetachedTables();
not_in_use = cleanupDetachedTables();
return table;
}
......@@ -207,11 +207,13 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot move dictionary to other database");
StoragePtr table = getTableUnlocked(table_name, db_lock);
table->checkTableCanBeRenamed();
assert_can_move_mat_view(table);
StoragePtr other_table;
if (exchange)
{
other_table = other_db.getTableUnlocked(to_table_name, other_db_lock);
other_table->checkTableCanBeRenamed();
assert_can_move_mat_view(other_table);
}
......@@ -261,7 +263,7 @@ void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const Stora
if (query.database != database_name)
throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database was renamed to `{}`, cannot create table in `{}`",
database_name, query.database);
not_in_use = cleenupDetachedTables();
not_in_use = cleanupDetachedTables();
assertDetachedTableNotInUse(query.uuid);
renameNoReplace(table_metadata_tmp_path, table_metadata_path);
attachTableUnlocked(query.table, table, lock); /// Should never throw
......@@ -304,7 +306,7 @@ void DatabaseAtomic::assertDetachedTableNotInUse(const UUID & uuid)
", because it was detached but still used by some query. Retry later.", ErrorCodes::TABLE_ALREADY_EXISTS);
}
DatabaseAtomic::DetachedTables DatabaseAtomic::cleenupDetachedTables()
DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables()
{
DetachedTables not_in_use;
auto it = detached_tables.begin();
......@@ -322,14 +324,14 @@ DatabaseAtomic::DetachedTables DatabaseAtomic::cleenupDetachedTables()
return not_in_use;
}
void DatabaseAtomic::assertCanBeDetached(bool cleenup)
void DatabaseAtomic::assertCanBeDetached(bool cleanup)
{
if (cleenup)
if (cleanup)
{
DetachedTables not_in_use;
{
std::lock_guard lock(mutex);
not_in_use = cleenupDetachedTables();
not_in_use = cleanupDetachedTables();
}
}
std::lock_guard lock(mutex);
......@@ -498,6 +500,28 @@ void DatabaseAtomic::renameDictionaryInMemoryUnlocked(const StorageID & old_name
const auto & dict = dynamic_cast<const IDictionaryBase &>(*result.object);
dict.updateDictionaryName(new_name);
}
void DatabaseAtomic::waitDetachedTableNotInUse(const UUID & uuid)
{
{
std::lock_guard lock{mutex};
if (detached_tables.count(uuid) == 0)
return;
}
/// Table is in use while its shared_ptr counter is greater than 1.
/// We cannot trigger condvar on shared_ptr destruction, so it's busy wait.
while (true)
{
DetachedTables not_in_use;
{
std::lock_guard lock{mutex};
not_in_use = cleanupDetachedTables();
if (detached_tables.count(uuid) == 0)
return;
}
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
}
}
......@@ -51,13 +51,15 @@ public:
void loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach) override;
/// Atomic database cannot be detached if there is detached table which still in use
void assertCanBeDetached(bool cleenup);
void assertCanBeDetached(bool cleanup);
UUID tryGetTableUUID(const String & table_name) const override;
void tryCreateSymlink(const String & table_name, const String & actual_data_path);
void tryRemoveSymlink(const String & table_name);
void waitDetachedTableNotInUse(const UUID & uuid);
private:
void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path) override;
void commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
......@@ -65,7 +67,7 @@ private:
void assertDetachedTableNotInUse(const UUID & uuid);
typedef std::unordered_map<UUID, StoragePtr> DetachedTables;
[[nodiscard]] DetachedTables cleenupDetachedTables();
[[nodiscard]] DetachedTables cleanupDetachedTables();
void tryCreateMetadataSymlink();
......
......@@ -19,6 +19,7 @@
#if USE_MYSQL
# include <Core/MySQL/MySQLClient.h>
# include <Databases/MySQL/ConnectionMySQLSettings.h>
# include <Databases/MySQL/DatabaseConnectionMySQL.h>
# include <Databases/MySQL/MaterializeMySQLSettings.h>
# include <Databases/MySQL/DatabaseMaterializeMySQL.h>
......@@ -83,7 +84,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS);
if (engine_define->engine->parameters || engine_define->partition_by || engine_define->primary_key || engine_define->order_by ||
engine_define->sample_by || (engine_name != "MaterializeMySQL" && engine_define->settings))
engine_define->sample_by || (!endsWith(engine_name, "MySQL") && engine_define->settings))
throw Exception("Database engine " + engine_name + " cannot have parameters, primary_key, order_by, sample_by, settings",
ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
......@@ -133,8 +134,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
, std::move(materialize_mode_settings));
}
auto mysql_database_settings = std::make_unique<ConnectionMySQLSettings>();
mysql_database_settings->loadFromQueryContext(context);
mysql_database_settings->loadFromQuery(*engine_define); /// higher priority
return std::make_shared<DatabaseConnectionMySQL>(
context, database_name, metadata_path, engine_define, mysql_database_name, std::move(mysql_pool));
context, database_name, metadata_path, engine_define, mysql_database_name, std::move(mysql_database_settings), std::move(mysql_pool));
}
catch (...)
{
......
#include <Databases/MySQL/ConnectionMySQLSettings.h>
#include <Core/SettingsFields.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTCreateQuery.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_SETTING;
extern const int BAD_ARGUMENTS;
}
IMPLEMENT_SETTINGS_TRAITS(ConnectionMySQLSettingsTraits, LIST_OF_CONNECTION_MYSQL_SETTINGS)
void ConnectionMySQLSettings::loadFromQuery(ASTStorage & storage_def)
{
if (storage_def.settings)
{
try
{
applyChanges(storage_def.settings->changes);
}
catch (Exception & e)
{
if (e.code() == ErrorCodes::UNKNOWN_SETTING)
throw Exception(e.message() + " for database " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS);
else
e.rethrow();
}
}
else
{
auto settings_ast = std::make_shared<ASTSetQuery>();
settings_ast->is_standalone = false;
storage_def.set(storage_def.settings, settings_ast);
}
SettingsChanges & changes = storage_def.settings->changes;
#define ADD_IF_ABSENT(NAME) \
if (std::find_if(changes.begin(), changes.end(), \
[](const SettingChange & c) { return c.name == #NAME; }) \
== changes.end()) \
changes.push_back(SettingChange{#NAME, static_cast<Field>(NAME)});
APPLY_FOR_IMMUTABLE_CONNECTION_MYSQL_SETTINGS(ADD_IF_ABSENT)
#undef ADD_IF_ABSENT
}
void ConnectionMySQLSettings::loadFromQueryContext(const Context & context)
{
if (!context.hasQueryContext())
return;
const Settings & settings = context.getQueryContext().getSettingsRef();
if (settings.mysql_datatypes_support_level.value != mysql_datatypes_support_level.value)
set("mysql_datatypes_support_level", settings.mysql_datatypes_support_level.toString());
}
}
#pragma once
#include <Core/Defines.h>
#include <Core/BaseSettings.h>
#include <Core/SettingsEnums.h>
namespace DB
{
class Context;
class ASTStorage;
#define LIST_OF_CONNECTION_MYSQL_SETTINGS(M) \
M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precison are seen as String on ClickHouse's side.", 0) \
/// Settings that should not change after the creation of a database.
#define APPLY_FOR_IMMUTABLE_CONNECTION_MYSQL_SETTINGS(M) \
M(mysql_datatypes_support_level)
DECLARE_SETTINGS_TRAITS(ConnectionMySQLSettingsTraits, LIST_OF_CONNECTION_MYSQL_SETTINGS)
/** Settings for the MySQL database engine.
* Could be loaded from a CREATE DATABASE query (SETTINGS clause) and Query settings.
*/
struct ConnectionMySQLSettings : public BaseSettings<ConnectionMySQLSettingsTraits>
{
void loadFromQuery(ASTStorage & storage_def);
void loadFromQueryContext(const Context & context);
};
}
......@@ -45,13 +45,13 @@ static constexpr const std::chrono::seconds cleaner_sleep_time{30};
static const std::chrono::seconds lock_acquire_timeout{10};
DatabaseConnectionMySQL::DatabaseConnectionMySQL(const Context & context, const String & database_name_, const String & metadata_path_,
const ASTStorage * database_engine_define_, const String & database_name_in_mysql_, mysqlxx::Pool && pool)
const ASTStorage * database_engine_define_, const String & database_name_in_mysql_, std::unique_ptr<ConnectionMySQLSettings> settings_, mysqlxx::Pool && pool)
: IDatabase(database_name_)
, global_context(context.getGlobalContext())
, metadata_path(metadata_path_)
, database_engine_define(database_engine_define_->clone())
, database_name_in_mysql(database_name_in_mysql_)
, mysql_datatypes_support_level(context.getQueryContext().getSettingsRef().mysql_datatypes_support_level)
, database_settings(std::move(settings_))
, mysql_pool(std::move(pool))
{
empty(); /// test database is works fine.
......@@ -133,9 +133,20 @@ static ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr
columns_expression_list->children.emplace_back(column_declaration);
}
ASTStorage * ast_storage = table_storage_define->as<ASTStorage>();
ASTs storage_children = ast_storage->children;
auto storage_engine_arguments = ast_storage->engine->arguments;
/// Add table_name to engine arguments
auto mysql_table_name = std::make_shared<ASTLiteral>(table_id.table_name);
auto storage_engine_arguments = table_storage_define->as<ASTStorage>()->engine->arguments;
storage_engine_arguments->children.insert(storage_engine_arguments->children.begin() + 2, mysql_table_name);
/// Unset settings
storage_children.erase(
std::remove_if(storage_children.begin(), storage_children.end(),
[&](const ASTPtr & element) { return element.get() == ast_storage->settings; }),
storage_children.end());
ast_storage->settings = nullptr;
}
return create_table_query;
......@@ -273,7 +284,7 @@ std::map<String, NamesAndTypesList> DatabaseConnectionMySQL::fetchTablesColumnsL
database_name_in_mysql,
tables_name,
settings.external_table_functions_use_nulls,
mysql_datatypes_support_level);
database_settings->mysql_datatypes_support_level);
}
void DatabaseConnectionMySQL::shutdown()
......
......@@ -8,6 +8,7 @@
#include <Core/MultiEnum.h>
#include <Common/ThreadPool.h>
#include <Databases/DatabasesCommon.h>
#include <Databases/MySQL/ConnectionMySQLSettings.h>
#include <Parsers/ASTCreateQuery.h>
#include <atomic>
......@@ -36,7 +37,8 @@ public:
DatabaseConnectionMySQL(
const Context & context, const String & database_name, const String & metadata_path,
const ASTStorage * database_engine_define, const String & database_name_in_mysql, mysqlxx::Pool && pool);
const ASTStorage * database_engine_define, const String & database_name_in_mysql, std::unique_ptr<ConnectionMySQLSettings> settings_,
mysqlxx::Pool && pool);
String getEngineName() const override { return "MySQL"; }
......@@ -76,9 +78,7 @@ private:
String metadata_path;
ASTPtr database_engine_define;
String database_name_in_mysql;
// Cache setting for later from query context upon creation,
// so column types depend on the settings set at query-level.
MultiEnum<MySQLDataTypesSupport> mysql_datatypes_support_level;
std::unique_ptr<ConnectionMySQLSettings> database_settings;
std::atomic<bool> quit{false};
std::condition_variable cond;
......
......@@ -17,6 +17,7 @@ SRCS(
DatabaseOrdinary.cpp
DatabasesCommon.cpp
DatabaseWithDictionaries.cpp
MySQL/ConnectionMySQLSettings.cpp
MySQL/DatabaseConnectionMySQL.cpp
MySQL/DatabaseMaterializeMySQL.cpp
MySQL/FetchTablesColumnsList.cpp
......
......@@ -368,6 +368,8 @@ void registerInputFormatProcessorArrow(FormatFactory & factory);
void registerOutputFormatProcessorArrow(FormatFactory & factory);
void registerInputFormatProcessorAvro(FormatFactory & factory);
void registerOutputFormatProcessorAvro(FormatFactory & factory);
void registerInputFormatProcessorRawBLOB(FormatFactory & factory);
void registerOutputFormatProcessorRawBLOB(FormatFactory & factory);
/// Output only (presentational) formats.
......@@ -428,6 +430,9 @@ FormatFactory::FormatFactory()
registerOutputFormatProcessorTemplate(*this);
registerInputFormatProcessorMsgPack(*this);
registerOutputFormatProcessorMsgPack(*this);
registerInputFormatProcessorRawBLOB(*this);
registerOutputFormatProcessorRawBLOB(*this);
#if !defined(ARCADIA_BUILD)
registerInputFormatProcessorORC(*this);
registerOutputFormatProcessorORC(*this);
......@@ -458,6 +463,7 @@ FormatFactory::FormatFactory()
registerInputFormatProcessorRegexp(*this);
registerInputFormatProcessorJSONAsString(*this);
registerInputFormatProcessorLineAsString(*this);
#if !defined(ARCADIA_BUILD)
registerInputFormatProcessorCapnProto(*this);
#endif
......
......@@ -5,11 +5,19 @@
#include <Functions/FunctionFactory.h>
#include <Core/Field.h>
#include <Functions/extractTimeZoneFromFunctionArguments.h>
#include <time.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{
......@@ -35,7 +43,7 @@ private:
class FunctionBaseNow : public IFunctionBaseImpl
{
public:
explicit FunctionBaseNow(time_t time_) : time_value(time_), return_type(std::make_shared<DataTypeDateTime>()) {}
explicit FunctionBaseNow(time_t time_, DataTypePtr return_type_) : time_value(time_), return_type(return_type_) {}
String getName() const override { return "now"; }
......@@ -72,14 +80,44 @@ public:
bool isDeterministic() const override { return false; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
static FunctionOverloadResolverImplPtr create(const Context &) { return std::make_unique<NowOverloadResolver>(); }
DataTypePtr getReturnType(const DataTypes &) const override { return std::make_shared<DataTypeDateTime>(); }
DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() > 1)
{
throw Exception("Arguments size of function " + getName() + " should be 0 or 1", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if (arguments.size() == 1 && !isStringOrFixedString(arguments[0].type))
{
throw Exception(
"Arguments of function " + getName() + " should be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
if (arguments.size() == 1)
{
return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 0, 0));
}
return std::make_shared<DataTypeDateTime>();
}
FunctionBaseImplPtr build(const ColumnsWithTypeAndName &, const DataTypePtr &) const override
FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const override
{
return std::make_unique<FunctionBaseNow>(time(nullptr));
if (arguments.size() > 1)
{
throw Exception("Arguments size of function " + getName() + " should be 0 or 1", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if (arguments.size() == 1 && !isStringOrFixedString(arguments[0].type))
{
throw Exception(
"Arguments of function " + getName() + " should be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
if (arguments.size() == 1)
return std::make_unique<FunctionBaseNow>(
time(nullptr), std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 0, 0)));
return std::make_unique<FunctionBaseNow>(time(nullptr), std::make_shared<DataTypeDateTime>());
}
};
......
......@@ -54,7 +54,7 @@ ReadBufferFromFile::ReadBufferFromFile(
ReadBufferFromFile::ReadBufferFromFile(
int fd_,
int & fd_,
const std::string & original_file_name,
size_t buf_size,
char * existing_memory,
......@@ -63,6 +63,7 @@ ReadBufferFromFile::ReadBufferFromFile(
ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment),
file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
{
fd_ = -1;
}
......
......@@ -29,7 +29,10 @@ public:
char * existing_memory = nullptr, size_t alignment = 0);
/// Use pre-opened file descriptor.
ReadBufferFromFile(int fd, const std::string & original_file_name = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
ReadBufferFromFile(
int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
const std::string & original_file_name = {},
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr, size_t alignment = 0);
~ReadBufferFromFile() override;
......
......@@ -59,7 +59,7 @@ WriteBufferFromFile::WriteBufferFromFile(
/// Use pre-opened file descriptor.
WriteBufferFromFile::WriteBufferFromFile(
int fd_,
int & fd_,
const std::string & original_file_name,
size_t buf_size,
char * existing_memory,
......@@ -68,6 +68,7 @@ WriteBufferFromFile::WriteBufferFromFile(
WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment),
file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
{
fd_ = -1;
}
......
......@@ -39,7 +39,7 @@ public:
/// Use pre-opened file descriptor.
WriteBufferFromFile(
int fd,
int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
const std::string & original_file_name = {},
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
......
......@@ -56,7 +56,6 @@ WriteBufferFromS3::WriteBufferFromS3(
initiate();
}
void WriteBufferFromS3::nextImpl()
{
if (!offset())
......@@ -79,23 +78,31 @@ void WriteBufferFromS3::nextImpl()
}
}
void WriteBufferFromS3::finalize()
{
finalizeImpl();
}
void WriteBufferFromS3::finalizeImpl()
{
if (!finalized)
{
next();
if (is_multipart)
writePart(temporary_buffer->str());
complete();
}
finalized = true;
}
}
WriteBufferFromS3::~WriteBufferFromS3()
{
try
{
next();
finalizeImpl();
}
catch (...)
{
......@@ -103,7 +110,6 @@ WriteBufferFromS3::~WriteBufferFromS3()
}
}
void WriteBufferFromS3::initiate()
{
Aws::S3::Model::CreateMultipartUploadRequest req;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册