提交 bec14b19 编写于 作者: A Alexey Milovidov

Merge branch 'master' into add_symlinks_for_lld

......@@ -52,12 +52,12 @@ IncludeCategories:
ReflowComments: false
AlignEscapedNewlinesLeft: false
AlignEscapedNewlines: DontAlign
AlignTrailingComments: true
# Not changed:
AccessModifierOffset: -4
AlignConsecutiveAssignments: false
AlignOperands: false
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
......
......@@ -15,6 +15,7 @@
/docs/build
/docs/publish
/docs/edit
/docs/website
/docs/tools/venv/
/docs/en/single.md
/docs/ru/single.md
......
......@@ -104,6 +104,10 @@
[submodule "contrib/sparsehash-c11"]
path = contrib/sparsehash-c11
url = https://github.com/sparsehash/sparsehash-c11.git
[submodule "contrib/grpc"]
path = contrib/grpc
url = https://github.com/grpc/grpc.git
branch = v1.25.0
[submodule "contrib/aws"]
path = contrib/aws
url = https://github.com/aws/aws-sdk-cpp.git
......
......@@ -341,6 +341,7 @@ endif()
include (cmake/find/libxml2.cmake)
include (cmake/find/brotli.cmake)
include (cmake/find/protobuf.cmake)
include (cmake/find/grpc.cmake)
include (cmake/find/pdqsort.cmake)
include (cmake/find/hdfs3.cmake) # uses protobuf
include (cmake/find/s3.cmake)
......
[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/ClickHouse/raw/master/website/images/logo-400x240.png)](https://clickhouse.yandex)
[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/ClickHouse/raw/master/website/images/logo-400x240.png)](https://clickhouse.tech)
ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time.
## Useful Links
* [Official website](https://clickhouse.yandex/) has quick high-level overview of ClickHouse on main page.
* [Tutorial](https://clickhouse.yandex/tutorial.html) shows how to set up and query small ClickHouse cluster.
* [Documentation](https://clickhouse.yandex/docs/en/) provides more in-depth information.
* [Official website](https://clickhouse.tech/) has quick high-level overview of ClickHouse on main page.
* [Tutorial](https://clickhouse.tech/docs/en/getting_started/tutorial/) shows how to set up and query small ClickHouse cluster.
* [Documentation](https://clickhouse.tech/docs/en/) provides more in-depth information.
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announces and reports about events.
* [Contacts](https://clickhouse.yandex/#contacts) can help to get your questions answered if there are any.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person.
## Upcoming Events
## Upcoming Events
* [ClickHouse Meetup in San Francisco](https://www.eventbrite.com/e/clickhouse-february-meetup-registration-88496227599) on February 5.
* [ClickHouse Meetup in New York](https://www.meetup.com/Uber-Engineering-Events-New-York/events/268328663/) on February 11.
* [ClickHouse Meetup in Athens](https://www.meetup.com/Athens-Big-Data/events/268379195/) on March 5.
set(_PROTOBUF_PROTOC $<TARGET_FILE:protoc>)
set(_GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:grpc_cpp_plugin>)
function(PROTOBUF_GENERATE_GRPC_CPP SRCS HDRS)
if(NOT ARGN)
message(SEND_ERROR "Error: PROTOBUF_GENERATE_GRPC_CPP() called without any proto files")
return()
endif()
if(PROTOBUF_GENERATE_CPP_APPEND_PATH)
foreach(FIL ${ARGN})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(ABS_PATH ${ABS_FIL} PATH)
list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
if(${_contains_already} EQUAL -1)
list(APPEND _protobuf_include_path -I ${ABS_PATH})
endif()
endforeach()
else()
set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR})
endif()
if(DEFINED PROTOBUF_IMPORT_DIRS)
foreach(DIR ${Protobuf_IMPORT_DIRS})
get_filename_component(ABS_PATH ${DIR} ABSOLUTE)
list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
if(${_contains_already} EQUAL -1)
list(APPEND _protobuf_include_path -I ${ABS_PATH})
endif()
endforeach()
endif()
set(${SRCS})
set(${HDRS})
foreach(FIL ${ARGN})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(FIL_WE ${FIL} NAME_WE)
list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.cc")
list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.h")
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.cc"
"${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.h"
COMMAND ${_PROTOBUF_PROTOC}
ARGS --grpc_out=${CMAKE_CURRENT_BINARY_DIR}
--plugin=protoc-gen-grpc=${_GRPC_CPP_PLUGIN_EXECUTABLE}
${_protobuf_include_path} ${ABS_FIL}
DEPENDS ${ABS_FIL}
COMMENT "Running gRPC C++ protocol buffer compiler on ${FIL}"
VERBATIM)
endforeach()
set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
set(${SRCS} ${${SRCS}} PARENT_SCOPE)
set(${HDRS} ${${HDRS}} PARENT_SCOPE)
endfunction()
if (NOT APPLE AND NOT ARCH_32)
if (NOT ARCH_32)
option (USE_INTERNAL_LIBGSASL_LIBRARY "Set to FALSE to use system libgsasl library instead of bundled" ${NOT_UNBUNDLED})
endif ()
......@@ -16,7 +16,7 @@ if (NOT USE_INTERNAL_LIBGSASL_LIBRARY)
endif ()
if (LIBGSASL_LIBRARY AND LIBGSASL_INCLUDE_DIR)
elseif (NOT MISSING_INTERNAL_LIBGSASL_LIBRARY AND NOT APPLE AND NOT ARCH_32)
elseif (NOT MISSING_INTERNAL_LIBGSASL_LIBRARY AND NOT ARCH_32)
set (LIBGSASL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libgsasl/src ${ClickHouse_SOURCE_DIR}/contrib/libgsasl/linux_x86_64/include)
set (USE_INTERNAL_LIBGSASL_LIBRARY 1)
set (LIBGSASL_LIBRARY libgsasl)
......
# Freebsd: contrib/cppkafka/include/cppkafka/detail/endianness.h:53:23: error: 'betoh16' was not declared in this scope
if (NOT ARCH_ARM AND NOT ARCH_32 AND NOT APPLE AND NOT OS_FREEBSD AND OPENSSL_FOUND)
if (NOT ARCH_ARM AND NOT ARCH_32 AND NOT OS_FREEBSD AND OPENSSL_FOUND)
option (ENABLE_RDKAFKA "Enable kafka" ${ENABLE_LIBRARIES})
endif ()
......@@ -10,7 +10,7 @@ endif ()
if (ENABLE_RDKAFKA)
if (OS_LINUX AND NOT ARCH_ARM AND USE_LIBGSASL)
if (NOT ARCH_ARM AND USE_LIBGSASL)
option (USE_INTERNAL_RDKAFKA_LIBRARY "Set to FALSE to use system librdkafka instead of the bundled" ${NOT_UNBUNDLED})
endif ()
......
......@@ -336,4 +336,6 @@ if (USE_FASTOPS)
add_subdirectory (fastops-cmake)
endif()
add_subdirectory(grpc-cmake)
add_subdirectory(replxx-cmake)
Subproject commit 5b2752041c8d2f75eb5c1dbec8b4c25fc0e24d12
Subproject commit 6cfcf6c24293af100d523b89b61d1ab216fa4735
......@@ -146,3 +146,5 @@ target_compile_definitions(curl PRIVATE HAVE_CONFIG_H BUILDING_LIBCURL CURL_HIDD
target_include_directories(curl PUBLIC ${CURL_DIR}/include ${CURL_DIR}/lib .)
target_compile_definitions(curl PRIVATE OS="${CMAKE_SYSTEM_NAME}")
target_link_libraries(curl PRIVATE ssl)
#define CURL_CA_BUNDLE "/etc/ssl/certs/ca-certificates.crt"
#define CURL_DISABLE_FTP
#define CURL_DISABLE_TFTP
#define CURL_DISABLE_LDAP
......@@ -9,9 +10,14 @@
#define SIZEOF_CURL_OFF_T 8
#define SIZEOF_SIZE_T 8
#define HAVE_ALARM
#define HAVE_FCNTL_O_NONBLOCK
#define HAVE_GETADDRINFO
#define HAVE_LONGLONG
#define HAVE_POLL_FINE
#define HAVE_SIGACTION
#define HAVE_SIGNAL
#define HAVE_SIGSETJMP
#define HAVE_SOCKET
#define HAVE_STRUCT_TIMEVAL
......@@ -34,5 +40,11 @@
#define HAVE_ERRNO_H
#define HAVE_FCNTL_H
#define HAVE_NETDB_H
#define HAVE_NETINET_IN_H
#define HAVE_SETJMP_H
#define HAVE_SYS_STAT_H
#define HAVE_UNISTD_H
#define ENABLE_IPV6
#define USE_OPENSSL
#define USE_THREADS_POSIX
Subproject commit c1d176528fd8da9dd4066d16554bcd216d29033f
此差异已折叠。
Subproject commit 3b8948a4042e34fb00b4fb987535dc9e02e39040
Subproject commit 42ef20687042637252e64df1934b6d47771486d1
Subproject commit 6160ec275a5bb0a4088ede3c5f2afde638bbef65
Subproject commit 4ffe54b4f59ee5ae3767f9f25dc14651a3384d62
......@@ -23,6 +23,8 @@ set(SRCS
${RDKAFKA_SOURCE_DIR}/rdkafka_lz4.c
${RDKAFKA_SOURCE_DIR}/rdkafka_metadata.c
${RDKAFKA_SOURCE_DIR}/rdkafka_metadata_cache.c
${RDKAFKA_SOURCE_DIR}/rdkafka_mock.c
${RDKAFKA_SOURCE_DIR}/rdkafka_mock_handlers.c
${RDKAFKA_SOURCE_DIR}/rdkafka_msg.c
${RDKAFKA_SOURCE_DIR}/rdkafka_msgset_reader.c
${RDKAFKA_SOURCE_DIR}/rdkafka_msgset_writer.c
......
......@@ -75,8 +75,18 @@
#define HAVE_STRNDUP 1
// strerror_r
#define HAVE_STRERROR_R 1
#ifdef __APPLE__
// pthread_setname_np
#define HAVE_PTHREAD_SETNAME_DARWIN 1
#if (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ <= 101400)
#define _TTHREAD_EMULATE_TIMESPEC_GET_
#endif
#else
// pthread_setname_gnu
#define HAVE_PTHREAD_SETNAME_GNU 1
#endif
// python
//#define HAVE_PYTHON 1
// disable C11 threads for compatibility with old libc
......
Subproject commit d478f62bd93c9cd14eb343756ef73a4ae622ddf5
Subproject commit d805cf5ca4cf8bdc642261cfcbe7a0a241cb7298
......@@ -575,6 +575,7 @@ void HTTPHandler::processQuery(
try
{
char b;
//FIXME looks like MSG_DONTWAIT is useless because of POCO_BROKEN_TIMEOUTS
int status = socket.receiveBytes(&b, 1, MSG_DONTWAIT | MSG_PEEK);
if (status == 0)
context.killCurrentQuery();
......
......@@ -2,6 +2,7 @@
#include <Common/config.h>
#include <Poco/Net/TCPServerConnection.h>
#include <Common/getFQDNOrHostName.h>
#include <Common/CurrentMetrics.h>
#include <Core/MySQLProtocol.h>
#include "IServer.h"
......@@ -9,6 +10,11 @@
#include <Poco/Net/SecureStreamSocket.h>
#endif
namespace CurrentMetrics
{
extern const Metric MySQLConnection;
}
namespace DB
{
/// Handler for MySQL wire protocol connections. Allows to connect to ClickHouse using MySQL client.
......@@ -20,6 +26,8 @@ public:
void run() final;
private:
CurrentMetrics::Increment metric_increment{CurrentMetrics::MySQLConnection};
/// Enables SSL, if client requested.
void finishHandshake(MySQLProtocol::HandshakeResponse &);
......
......@@ -76,6 +76,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request
}
else
{
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_SERVICE_UNAVAILABLE);
response.send() << message.rdbuf();
}
}
......
......@@ -578,6 +578,25 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->initializeTraceCollector();
#endif
/// Describe multiple reasons when query profiler cannot work.
#if !USE_UNWIND
LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they cannot work without bundled unwind (stack unwinding) library.");
#endif
#if WITH_COVERAGE
LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they work extremely slow with test coverage.");
#endif
#if defined(SANITIZER)
LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they cannot work under sanitizers"
" when two different stack unwinding methods will interfere with each other.");
#endif
if (!hasPHDRCache())
LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they require PHDR cache to be created"
" (otherwise the function 'dl_iterate_phdr' is not lock free and not async-signal safe).");
global_context->setCurrentDatabase(default_database);
if (has_zookeeper && config().has("distributed_ddl"))
......
......@@ -900,6 +900,10 @@ void TCPHandler::receiveQuery()
client_info.initial_query_id = client_info.current_query_id;
client_info.initial_address = client_info.current_address;
}
else
{
query_context->switchRowPolicy();
}
}
/// Per query settings.
......
......@@ -185,7 +185,7 @@
<mlock_executable>false</mlock_executable>
<!-- Configuration of clusters that could be used in Distributed tables.
https://clickhouse.yandex/docs/en/table_engines/distributed/
https://clickhouse.tech/docs/en/operations/table_engines/distributed/
-->
<remote_servers incl="clickhouse_remote_servers" >
<!-- Test only shard config for testing distributed storage -->
......
......@@ -74,7 +74,8 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
current_resolved_address = DNSResolver::instance().resolveAddress(host, port);
socket->connect(*current_resolved_address, timeouts.connection_timeout);
const auto & connection_timeout = static_cast<bool>(secure) ? timeouts.secure_connection_timeout : timeouts.connection_timeout;
socket->connect(*current_resolved_address, connection_timeout);
socket->setReceiveTimeout(timeouts.receive_timeout);
socket->setSendTimeout(timeouts.send_timeout);
socket->setNoDelay(true);
......
......@@ -440,6 +440,8 @@ XMLDocumentPtr ConfigProcessor::processConfig(
zkutil::ZooKeeperNodeCache * zk_node_cache,
const zkutil::EventPtr & zk_changed_event)
{
LOG_DEBUG(log, "Processing configuration file '" + path + "'.");
XMLDocumentPtr config = dom_parser.parse(path);
std::vector<std::string> contributing_files;
......@@ -449,6 +451,8 @@ XMLDocumentPtr ConfigProcessor::processConfig(
{
try
{
LOG_DEBUG(log, "Merging configuration file '" + merge_file + "'.");
XMLDocumentPtr with = dom_parser.parse(merge_file);
merge(config, with);
contributing_files.push_back(merge_file);
......@@ -484,6 +488,8 @@ XMLDocumentPtr ConfigProcessor::processConfig(
}
if (!include_from_path.empty())
{
LOG_DEBUG(log, "Including configuration file '" + include_from_path + "'.");
contributing_files.push_back(include_from_path);
include_from = dom_parser.parse(include_from_path);
}
......@@ -613,6 +619,7 @@ void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config,
Poco::File(preprocessed_path_parent).createDirectories();
}
DOMWriter().writeNode(preprocessed_path, loaded_config.preprocessed_xml);
LOG_DEBUG(log, "Saved preprocessed configuration to '" << preprocessed_path << "'.");
}
catch (Poco::Exception & e)
{
......
......@@ -15,7 +15,8 @@
M(DiskSpaceReservedForMerge, "Disk space reserved for currently running background merges. It is slightly more than the total size of currently merging parts.") \
M(DistributedSend, "Number of connections to remote servers sending data that was INSERTed into Distributed tables. Both synchronous and asynchronous mode.") \
M(QueryPreempted, "Number of queries that are stopped and waiting due to 'priority' setting.") \
M(TCPConnection, "Number of connections to TCP server (clients with native interface)") \
M(TCPConnection, "Number of connections to TCP server (clients with native interface), also included server-server distributed query connections") \
M(MySQLConnection, "Number of client connections using MySQL protocol") \
M(HTTPConnection, "Number of connections to HTTP server") \
M(InterserverConnection, "Number of connections from other replicas to fetch parts") \
M(OpenFileForRead, "Number of files open for reading") \
......
......@@ -95,6 +95,7 @@ void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_
* with_stacktrace - prints stack trace for DB::Exception.
* check_embedded_stacktrace - if DB::Exception has embedded stacktrace then
* only this stack trace will be printed.
* with_extra_info - add information about the filesystem in case of "No space left on device" and similar.
*/
std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded_stacktrace = false,
bool with_extra_info = true);
......
#include <cstdlib>
#include "MemoryTracker.h"
#include <common/likely.h>
#include <common/logger_useful.h>
#include <IO/WriteHelpers.h>
#include "Common/TraceCollector.h"
#include <Common/CurrentThread.h>
#include <Common/Exception.h>
#include <Common/formatReadable.h>
#include <Common/CurrentThread.h>
#include <IO/WriteHelpers.h>
#include <common/likely.h>
#include <common/logger_useful.h>
#include <ext/singleton.h>
#include <atomic>
#include <cmath>
#include <cstdlib>
namespace DB
......@@ -73,7 +78,7 @@ void MemoryTracker::alloc(Int64 size)
return;
/** Using memory_order_relaxed means that if allocations are done simultaneously,
* we allow exception about memory limit exceeded to be thrown only on next allocation.
* we allow exception about memory limit exceeded to be thrown only on next allocation.
* So, we allow over-allocations.
*/
Int64 will_be = size + amount.fetch_add(size, std::memory_order_relaxed);
......@@ -81,7 +86,8 @@ void MemoryTracker::alloc(Int64 size)
if (metric != CurrentMetrics::end())
CurrentMetrics::add(metric, size);
Int64 current_limit = limit.load(std::memory_order_relaxed);
Int64 current_hard_limit = hard_limit.load(std::memory_order_relaxed);
Int64 current_profiler_limit = profiler_limit.load(std::memory_order_relaxed);
/// Using non-thread-safe random number generator. Joint distribution in different threads would not be uniform.
/// In this case, it doesn't matter.
......@@ -98,12 +104,19 @@ void MemoryTracker::alloc(Int64 size)
message << " " << description;
message << ": fault injected. Would use " << formatReadableSizeWithBinarySuffix(will_be)
<< " (attempt to allocate chunk of " << size << " bytes)"
<< ", maximum: " << formatReadableSizeWithBinarySuffix(current_limit);
<< ", maximum: " << formatReadableSizeWithBinarySuffix(current_hard_limit);
throw DB::Exception(message.str(), DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED);
}
if (unlikely(current_limit && will_be > current_limit))
if (unlikely(current_profiler_limit && will_be > current_profiler_limit))
{
auto no_track = blocker.cancel();
ext::Singleton<DB::TraceCollector>()->collect(size);
setOrRaiseProfilerLimit(current_profiler_limit + Int64(std::ceil((will_be - current_profiler_limit) / profiler_step)) * profiler_step);
}
if (unlikely(current_hard_limit && will_be > current_hard_limit))
{
free(size);
......@@ -116,7 +129,7 @@ void MemoryTracker::alloc(Int64 size)
message << " " << description;
message << " exceeded: would use " << formatReadableSizeWithBinarySuffix(will_be)
<< " (attempt to allocate chunk of " << size << " bytes)"
<< ", maximum: " << formatReadableSizeWithBinarySuffix(current_limit);
<< ", maximum: " << formatReadableSizeWithBinarySuffix(current_hard_limit);
throw DB::Exception(message.str(), DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED);
}
......@@ -174,7 +187,8 @@ void MemoryTracker::resetCounters()
{
amount.store(0, std::memory_order_relaxed);
peak.store(0, std::memory_order_relaxed);
limit.store(0, std::memory_order_relaxed);
hard_limit.store(0, std::memory_order_relaxed);
profiler_limit.store(0, std::memory_order_relaxed);
}
......@@ -187,11 +201,20 @@ void MemoryTracker::reset()
}
void MemoryTracker::setOrRaiseLimit(Int64 value)
void MemoryTracker::setOrRaiseHardLimit(Int64 value)
{
/// This is just atomic set to maximum.
Int64 old_value = hard_limit.load(std::memory_order_relaxed);
while (old_value < value && !hard_limit.compare_exchange_weak(old_value, value))
;
}
void MemoryTracker::setOrRaiseProfilerLimit(Int64 value)
{
/// This is just atomic set to maximum.
Int64 old_value = limit.load(std::memory_order_relaxed);
while (old_value < value && !limit.compare_exchange_weak(old_value, value))
Int64 old_value = profiler_limit.load(std::memory_order_relaxed);
while (old_value < value && !profiler_limit.compare_exchange_weak(old_value, value))
;
}
......@@ -207,7 +230,7 @@ namespace CurrentMemoryTracker
if (untracked > untracked_memory_limit)
{
/// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes
/// more. It could be usefull for enlarge Exception message in rethrow logic.
/// more. It could be useful to enlarge Exception message in rethrow logic.
Int64 tmp = untracked;
untracked = 0;
memory_tracker->alloc(tmp);
......@@ -218,10 +241,7 @@ namespace CurrentMemoryTracker
void realloc(Int64 old_size, Int64 new_size)
{
Int64 addition = new_size - old_size;
if (addition > 0)
alloc(addition);
else
free(-addition);
addition > 0 ? alloc(addition) : free(-addition);
}
void free(Int64 size)
......
......@@ -15,7 +15,10 @@ class MemoryTracker
{
std::atomic<Int64> amount {0};
std::atomic<Int64> peak {0};
std::atomic<Int64> limit {0};
std::atomic<Int64> hard_limit {0};
std::atomic<Int64> profiler_limit {0};
Int64 profiler_step = 0;
/// To test exception safety of calling code, memory tracker throws an exception on each memory allocation with specified probability.
double fault_probability = 0;
......@@ -32,7 +35,6 @@ class MemoryTracker
public:
MemoryTracker(VariableContext level_ = VariableContext::Thread) : level(level_) {}
MemoryTracker(Int64 limit_, VariableContext level_ = VariableContext::Thread) : limit(limit_), level(level_) {}
MemoryTracker(MemoryTracker * parent_, VariableContext level_ = VariableContext::Thread) : parent(parent_), level(level_) {}
~MemoryTracker();
......@@ -66,21 +68,22 @@ public:
return peak.load(std::memory_order_relaxed);
}
void setLimit(Int64 limit_)
{
limit.store(limit_, std::memory_order_relaxed);
}
/** Set limit if it was not set.
* Otherwise, set limit to new value, if new value is greater than previous limit.
*/
void setOrRaiseLimit(Int64 value);
void setOrRaiseHardLimit(Int64 value);
void setOrRaiseProfilerLimit(Int64 value);
void setFaultProbability(double value)
{
fault_probability = value;
}
void setProfilerStep(Int64 value)
{
profiler_step = value;
}
/// next should be changed only once: from nullptr to some value.
/// NOTE: It is not true in MergeListElement
void setParent(MemoryTracker * elem)
......
#include "QueryProfiler.h"
#include <random>
#include <common/phdr_cache.h>
#include <common/config_common.h>
#include <common/StringRef.h>
#include <common/logger_useful.h>
#include <Common/PipeFDs.h>
#include <Common/StackTrace.h>
#include <Common/CurrentThread.h>
#include <IO/WriteHelpers.h>
#include <Common/Exception.h>
#include <Common/StackTrace.h>
#include <Common/TraceCollector.h>
#include <Common/thread_local_rng.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
#include <common/StringRef.h>
#include <common/config_common.h>
#include <common/logger_useful.h>
#include <common/phdr_cache.h>
#include <ext/singleton.h>
#include <random>
namespace ProfileEvents
{
extern const Event QueryProfilerSignalOverruns;
}
namespace DB
{
extern LazyPipeFDs trace_pipe;
namespace
{
/// Normally query_id is a UUID (string with a fixed length) but user can provide custom query_id.
/// Thus upper bound on query_id length should be introduced to avoid buffer overflow in signal handler.
constexpr size_t QUERY_ID_MAX_LEN = 1024;
#if defined(OS_LINUX)
thread_local size_t write_trace_iteration = 0;
#endif
void writeTraceInfo(TimerType timer_type, int /* sig */, siginfo_t * info, void * context)
void writeTraceInfo(TraceType trace_type, int /* sig */, siginfo_t * info, void * context)
{
int overrun_count = 0;
#if defined(OS_LINUX)
/// Quickly drop if signal handler is called too frequently.
/// Otherwise we may end up infinitelly processing signals instead of doing any useful work.
++write_trace_iteration;
if (info && info->si_overrun > 0)
{
/// But pass with some frequency to avoid drop of all traces.
if (write_trace_iteration % info->si_overrun == 0)
{
ProfileEvents::increment(ProfileEvents::QueryProfilerSignalOverruns, info->si_overrun);
}
else
{
ProfileEvents::increment(ProfileEvents::QueryProfilerSignalOverruns, info->si_overrun + 1);
return;
}
}
if (info)
overrun_count = info->si_overrun;
#else
UNUSED(info);
#endif
constexpr size_t buf_size = sizeof(char) + // TraceCollector stop flag
8 * sizeof(char) + // maximum VarUInt length for string size
QUERY_ID_MAX_LEN * sizeof(char) + // maximum query_id length
sizeof(UInt8) + // number of stack frames
sizeof(StackTrace::Frames) + // collected stack trace, maximum capacity
sizeof(TimerType) + // timer type
sizeof(UInt64); // thread_id
char buffer[buf_size];
WriteBufferFromFileDescriptorDiscardOnFailure out(trace_pipe.fds_rw[1], buf_size, buffer);
StringRef query_id = CurrentThread::getQueryId();
query_id.size = std::min(query_id.size, QUERY_ID_MAX_LEN);
UInt64 thread_id = CurrentThread::get().thread_id;
const auto signal_context = *reinterpret_cast<ucontext_t *>(context);
const StackTrace stack_trace(signal_context);
writeChar(false, out);
writeStringBinary(query_id, out);
size_t stack_trace_size = stack_trace.getSize();
size_t stack_trace_offset = stack_trace.getOffset();
writeIntBinary(UInt8(stack_trace_size - stack_trace_offset), out);
for (size_t i = stack_trace_offset; i < stack_trace_size; ++i)
writePODBinary(stack_trace.getFrames()[i], out);
writePODBinary(timer_type, out);
writePODBinary(thread_id, out);
out.next();
ext::Singleton<TraceCollector>()->collect(trace_type, stack_trace, overrun_count);
}
[[maybe_unused]] const UInt32 TIMER_PRECISION = 1e9;
......@@ -135,11 +81,11 @@ QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(const UInt64 thread_id, const
sev.sigev_notify = SIGEV_THREAD_ID;
sev.sigev_signo = pause_signal;
#if defined(__FreeBSD__)
# if defined(__FreeBSD__)
sev._sigev_un._threadid = thread_id;
#else
# else
sev._sigev_un._tid = thread_id;
#endif
# endif
if (timer_create(clock_type, &sev, &timer_id))
{
/// In Google Cloud Run, the function "timer_create" is implemented incorrectly as of 2020-01-25.
......@@ -206,7 +152,7 @@ QueryProfilerReal::QueryProfilerReal(const UInt64 thread_id, const UInt32 period
void QueryProfilerReal::signalHandler(int sig, siginfo_t * info, void * context)
{
writeTraceInfo(TimerType::Real, sig, info, context);
writeTraceInfo(TraceType::REAL_TIME, sig, info, context);
}
QueryProfilerCpu::QueryProfilerCpu(const UInt64 thread_id, const UInt32 period)
......@@ -215,7 +161,7 @@ QueryProfilerCpu::QueryProfilerCpu(const UInt64 thread_id, const UInt32 period)
void QueryProfilerCpu::signalHandler(int sig, siginfo_t * info, void * context)
{
writeTraceInfo(TimerType::Cpu, sig, info, context);
writeTraceInfo(TraceType::CPU_TIME, sig, info, context);
}
}
......@@ -15,12 +15,6 @@ namespace Poco
namespace DB
{
enum class TimerType : UInt8
{
Real,
Cpu,
};
/**
* Query profiler implementation for selected thread.
*
......
......@@ -57,7 +57,7 @@ ShellCommand::~ShellCommand()
std::unique_ptr<ShellCommand> ShellCommand::executeImpl(const char * filename, char * const argv[], bool pipe_stdin_only, bool terminate_in_destructor)
{
/** Here it is written that with a normal call `vfork`, there is a chance of deadlock in multithreaded programs,
* because of the resolving of characters in the shared library
* because of the resolving of symbols in the shared library
* http://www.oracle.com/technetwork/server-storage/solaris10/subprocess-136439.html
* Therefore, separate the resolving of the symbol from the call.
*/
......
#include "TraceCollector.h"
#include <Core/Field.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/TraceLog.h>
#include <Poco/Logger.h>
#include <Common/Exception.h>
#include <Common/PipeFDs.h>
#include <Common/StackTrace.h>
#include <common/logger_useful.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <Common/Exception.h>
#include <Interpreters/TraceLog.h>
#include <unistd.h>
#include <fcntl.h>
namespace ProfileEvents
{
extern const Event QueryProfilerSignalOverruns;
}
namespace DB
{
LazyPipeFDs trace_pipe;
namespace
{
/// Normally query_id is a UUID (string with a fixed length) but user can provide custom query_id.
/// Thus upper bound on query_id length should be introduced to avoid buffer overflow in signal handler.
constexpr size_t QUERY_ID_MAX_LEN = 1024;
thread_local size_t write_trace_iteration = 0;
}
namespace ErrorCodes
{
......@@ -27,20 +40,15 @@ namespace ErrorCodes
extern const int THREAD_IS_NOT_JOINABLE;
}
TraceCollector::TraceCollector(std::shared_ptr<TraceLog> & trace_log_)
: log(&Poco::Logger::get("TraceCollector"))
, trace_log(trace_log_)
TraceCollector::TraceCollector()
{
if (trace_log == nullptr)
throw Exception("Invalid trace log pointer passed", ErrorCodes::NULL_POINTER_DEREFERENCE);
trace_pipe.open();
pipe.open();
/** Turn write end of pipe to non-blocking mode to avoid deadlocks
* when QueryProfiler is invoked under locks and TraceCollector cannot pull data from pipe.
*/
trace_pipe.setNonBlocking();
trace_pipe.tryIncreaseSize(1 << 20);
pipe.setNonBlocking();
pipe.tryIncreaseSize(1 << 20);
thread = ThreadFromGlobalPool(&TraceCollector::run, this);
}
......@@ -48,14 +56,101 @@ TraceCollector::TraceCollector(std::shared_ptr<TraceLog> & trace_log_)
TraceCollector::~TraceCollector()
{
if (!thread.joinable())
LOG_ERROR(log, "TraceCollector thread is malformed and cannot be joined");
LOG_ERROR(&Poco::Logger::get("TraceCollector"), "TraceCollector thread is malformed and cannot be joined");
else
{
TraceCollector::notifyToStop();
stop();
thread.join();
}
trace_pipe.close();
pipe.close();
}
void TraceCollector::collect(TraceType trace_type, const StackTrace & stack_trace, int overrun_count)
{
/// Quickly drop if signal handler is called too frequently.
/// Otherwise we may end up infinitelly processing signals instead of doing any useful work.
++write_trace_iteration;
if (overrun_count)
{
/// But pass with some frequency to avoid drop of all traces.
if (write_trace_iteration % overrun_count == 0)
{
ProfileEvents::increment(ProfileEvents::QueryProfilerSignalOverruns, overrun_count);
}
else
{
ProfileEvents::increment(ProfileEvents::QueryProfilerSignalOverruns, overrun_count + 1);
return;
}
}
constexpr size_t buf_size = sizeof(char) + // TraceCollector stop flag
8 * sizeof(char) + // maximum VarUInt length for string size
QUERY_ID_MAX_LEN * sizeof(char) + // maximum query_id length
sizeof(UInt8) + // number of stack frames
sizeof(StackTrace::Frames) + // collected stack trace, maximum capacity
sizeof(TraceType) + // trace type
sizeof(UInt64) + // thread_id
sizeof(UInt64); // size
char buffer[buf_size];
WriteBufferFromFileDescriptorDiscardOnFailure out(pipe.fds_rw[1], buf_size, buffer);
StringRef query_id = CurrentThread::getQueryId();
query_id.size = std::min(query_id.size, QUERY_ID_MAX_LEN);
auto thread_id = CurrentThread::get().thread_id;
writeChar(false, out);
writeStringBinary(query_id, out);
size_t stack_trace_size = stack_trace.getSize();
size_t stack_trace_offset = stack_trace.getOffset();
writeIntBinary(UInt8(stack_trace_size - stack_trace_offset), out);
for (size_t i = stack_trace_offset; i < stack_trace_size; ++i)
writePODBinary(stack_trace.getFrames()[i], out);
writePODBinary(trace_type, out);
writePODBinary(thread_id, out);
writePODBinary(UInt64(0), out);
out.next();
}
void TraceCollector::collect(UInt64 size)
{
constexpr size_t buf_size = sizeof(char) + // TraceCollector stop flag
8 * sizeof(char) + // maximum VarUInt length for string size
QUERY_ID_MAX_LEN * sizeof(char) + // maximum query_id length
sizeof(UInt8) + // number of stack frames
sizeof(StackTrace::Frames) + // collected stack trace, maximum capacity
sizeof(TraceType) + // trace type
sizeof(UInt64) + // thread_id
sizeof(UInt64); // size
char buffer[buf_size];
WriteBufferFromFileDescriptorDiscardOnFailure out(pipe.fds_rw[1], buf_size, buffer);
StringRef query_id = CurrentThread::getQueryId();
query_id.size = std::min(query_id.size, QUERY_ID_MAX_LEN);
auto thread_id = CurrentThread::get().thread_id;
writeChar(false, out);
writeStringBinary(query_id, out);
const auto & stack_trace = StackTrace();
size_t stack_trace_size = stack_trace.getSize();
size_t stack_trace_offset = stack_trace.getOffset();
writeIntBinary(UInt8(stack_trace_size - stack_trace_offset), out);
for (size_t i = stack_trace_offset; i < stack_trace_size; ++i)
writePODBinary(stack_trace.getFrames()[i], out);
writePODBinary(TraceType::MEMORY, out);
writePODBinary(thread_id, out);
writePODBinary(size, out);
out.next();
}
/**
......@@ -68,16 +163,16 @@ TraceCollector::~TraceCollector()
* NOTE: TraceCollector will NOT stop immediately as there may be some data left in the pipe
* before stop message.
*/
void TraceCollector::notifyToStop()
void TraceCollector::stop()
{
WriteBufferFromFileDescriptor out(trace_pipe.fds_rw[1]);
WriteBufferFromFileDescriptor out(pipe.fds_rw[1]);
writeChar(true, out);
out.next();
}
void TraceCollector::run()
{
ReadBufferFromFileDescriptor in(trace_pipe.fds_rw[0]);
ReadBufferFromFileDescriptor in(pipe.fds_rw[0]);
while (true)
{
......@@ -89,27 +184,33 @@ void TraceCollector::run()
std::string query_id;
readStringBinary(query_id, in);
UInt8 size = 0;
readIntBinary(size, in);
UInt8 trace_size = 0;
readIntBinary(trace_size, in);
Array trace;
trace.reserve(size);
trace.reserve(trace_size);
for (size_t i = 0; i < size; i++)
for (size_t i = 0; i < trace_size; i++)
{
uintptr_t addr = 0;
readPODBinary(addr, in);
trace.emplace_back(UInt64(addr));
}
TimerType timer_type;
readPODBinary(timer_type, in);
TraceType trace_type;
readPODBinary(trace_type, in);
UInt64 thread_id;
readPODBinary(thread_id, in);
TraceLogElement element{std::time(nullptr), timer_type, thread_id, query_id, trace};
trace_log->add(element);
UInt64 size;
readPODBinary(size, in);
if (trace_log)
{
TraceLogElement element{std::time(nullptr), trace_type, thread_id, query_id, trace, size};
trace_log->add(element);
}
}
}
......
#pragma once
#include "Common/PipeFDs.h"
#include <Common/ThreadPool.h>
class StackTrace;
namespace Poco
{
class Logger;
......@@ -12,21 +15,31 @@ namespace DB
class TraceLog;
enum class TraceType : UInt8
{
REAL_TIME,
CPU_TIME,
MEMORY,
};
class TraceCollector
{
public:
TraceCollector();
~TraceCollector();
void setTraceLog(const std::shared_ptr<TraceLog> & trace_log_) { trace_log = trace_log_; }
void collect(TraceType type, const StackTrace & stack_trace, int overrun_count = 0);
void collect(UInt64 size);
private:
Poco::Logger * log;
std::shared_ptr<TraceLog> trace_log;
ThreadFromGlobalPool thread;
LazyPipeFDs pipe;
void run();
static void notifyToStop();
public:
TraceCollector(std::shared_ptr<TraceLog> & trace_log_);
~TraceCollector();
void stop();
};
}
#if defined(OS_LINUX)
#include <malloc.h>
#elif defined(OS_DARWIN)
#include <malloc/malloc.h>
#endif
#include <new>
#include <common/config_common.h>
#include <common/memory.h>
#include <Common/MemoryTracker.h>
#include <iostream>
#include <new>
#if defined(OS_LINUX)
# include <malloc.h>
#elif defined(OS_DARWIN)
# include <malloc/malloc.h>
#endif
/// Replace default new/delete with memory tracking versions.
/// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new
/// https://en.cppreference.com/w/cpp/memory/new/operator_delete
......@@ -29,7 +31,7 @@ ALWAYS_INLINE void trackMemory(std::size_t size)
#endif
}
ALWAYS_INLINE bool trackMemoryNoExept(std::size_t size) noexcept
ALWAYS_INLINE bool trackMemoryNoExcept(std::size_t size) noexcept
{
try
{
......@@ -54,11 +56,11 @@ ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [
#else
if (size)
CurrentMemoryTracker::free(size);
#ifdef _GNU_SOURCE
# ifdef _GNU_SOURCE
/// It's innaccurate resource free for sanitizers. malloc_usable_size() result is greater or equal to allocated size.
else
CurrentMemoryTracker::free(malloc_usable_size(ptr));
#endif
# endif
#endif
}
catch (...)
......@@ -83,14 +85,14 @@ void * operator new[](std::size_t size)
void * operator new(std::size_t size, const std::nothrow_t &) noexcept
{
if (likely(Memory::trackMemoryNoExept(size)))
if (likely(Memory::trackMemoryNoExcept(size)))
return Memory::newNoExept(size);
return nullptr;
}
void * operator new[](std::size_t size, const std::nothrow_t &) noexcept
{
if (likely(Memory::trackMemoryNoExept(size)))
if (likely(Memory::trackMemoryNoExcept(size)))
return Memory::newNoExept(size);
return nullptr;
}
......
#pragma once
#include <Interpreters/Context.h>
inline DB::Context createContext()
{
auto context = DB::Context::createGlobal();
context.makeGlobalContext();
context.setPath("./");
return context;
}
inline const DB::Context & getContext()
{
static DB::Context global_context = createContext();
return global_context;
}
......@@ -6,6 +6,7 @@
#define DBMS_DEFAULT_HTTP_PORT 8123
#define DBMS_DEFAULT_CONNECT_TIMEOUT_SEC 10
#define DBMS_DEFAULT_CONNECT_TIMEOUT_WITH_FAILOVER_MS 50
#define DBMS_DEFAULT_CONNECT_TIMEOUT_WITH_FAILOVER_SECURE_MS 100
#define DBMS_DEFAULT_SEND_TIMEOUT_SEC 300
#define DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC 300
/// Timeout for synchronous request-result protocol call (like Ping or TablesStatus).
......
......@@ -62,6 +62,7 @@ struct Settings : public SettingsCollection<Settings>
M(SettingUInt64, interactive_delay, 100000, "The interval in microseconds to check if the request is cancelled, and to send progress info.", 0) \
M(SettingSeconds, connect_timeout, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, "Connection timeout if there are no replicas.", 0) \
M(SettingMilliseconds, connect_timeout_with_failover_ms, DBMS_DEFAULT_CONNECT_TIMEOUT_WITH_FAILOVER_MS, "Connection timeout for selecting first healthy replica.", 0) \
M(SettingMilliseconds, connect_timeout_with_failover_secure_ms, DBMS_DEFAULT_CONNECT_TIMEOUT_WITH_FAILOVER_SECURE_MS, "Connection timeout for selecting first healthy replica (for secure connections).", 0) \
M(SettingSeconds, receive_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "", 0) \
M(SettingSeconds, send_timeout, DBMS_DEFAULT_SEND_TIMEOUT_SEC, "", 0) \
M(SettingSeconds, tcp_keep_alive_timeout, 0, "The time in seconds the connection needs to remain idle before TCP starts sending keepalive probes", 0) \
......@@ -331,6 +332,7 @@ struct Settings : public SettingsCollection<Settings>
M(SettingUInt64, max_memory_usage, 0, "Maximum memory usage for processing of single query. Zero means unlimited.", 0) \
M(SettingUInt64, max_memory_usage_for_user, 0, "Maximum memory usage for processing all concurrently running queries for the user. Zero means unlimited.", 0) \
M(SettingUInt64, max_memory_usage_for_all_queries, 0, "Maximum memory usage for processing all concurrently running queries on the server. Zero means unlimited.", 0) \
M(SettingUInt64, memory_profiler_step, 0, "Every number of bytes the memory profiler will dump the allocating stacktrace. Zero means disabled memory profiler.", 0) \
\
M(SettingUInt64, max_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for a query. Zero means unlimited.", 0) \
M(SettingUInt64, max_network_bytes, 0, "The maximum number of bytes (compressed) to receive or transmit over the network for execution of the query.", 0) \
......@@ -382,7 +384,6 @@ struct Settings : public SettingsCollection<Settings>
\
M(SettingBool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \
M(SettingSeconds, live_view_heartbeat_interval, DEFAULT_LIVE_VIEW_HEARTBEAT_INTERVAL_SEC, "The heartbeat interval in seconds to indicate live query is alive.", 0) \
M(SettingSeconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \
M(SettingUInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \
M(SettingUInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \
\
......@@ -406,6 +407,8 @@ struct Settings : public SettingsCollection<Settings>
M(SettingBool, merge_tree_uniform_read_distribution, true, "Obsolete setting, does nothing. Will be removed after 2020-05-20", 0) \
M(SettingUInt64, mark_cache_min_lifetime, 0, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \
M(SettingUInt64, max_parser_depth, 1000, "Maximum parser depth.", 0) \
M(SettingSeconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \
DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS)
......
#include <DataStreams/ExpressionBlockInputStream.h>
#include <DataStreams/CheckConstraintsBlockOutputStream.h>
#include <Parsers/formatAST.h>
#include <Interpreters/ExpressionActions.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnsNumber.h>
#include <Common/assert_cast.h>
......
......@@ -34,7 +34,7 @@ void ParallelParsingBlockInputStream::segmentatorThreadFunction()
unit.is_last = !have_more_data;
unit.status = READY_TO_PARSE;
scheduleParserThreadForUnitWithNumber(current_unit_number);
scheduleParserThreadForUnitWithNumber(segmentator_ticket_number);
++segmentator_ticket_number;
if (!have_more_data)
......@@ -49,12 +49,13 @@ void ParallelParsingBlockInputStream::segmentatorThreadFunction()
}
}
void ParallelParsingBlockInputStream::parserThreadFunction(size_t current_unit_number)
void ParallelParsingBlockInputStream::parserThreadFunction(size_t current_ticket_number)
{
try
{
setThreadName("ChunkParser");
const auto current_unit_number = current_ticket_number % processing_units.size();
auto & unit = processing_units[current_unit_number];
/*
......@@ -64,9 +65,9 @@ void ParallelParsingBlockInputStream::parserThreadFunction(size_t current_unit_n
* can use it from multiple threads simultaneously.
*/
ReadBuffer read_buffer(unit.segment.data(), unit.segment.size(), 0);
auto parser = std::make_unique<InputStreamFromInputFormat>(
input_processor_creator(read_buffer, header,
row_input_format_params, format_settings));
auto format = input_processor_creator(read_buffer, header, row_input_format_params, format_settings);
format->setCurrentUnitNumber(current_ticket_number);
auto parser = std::make_unique<InputStreamFromInputFormat>(std::move(format));
unit.block_ext.block.clear();
unit.block_ext.block_missing_values.clear();
......
......@@ -213,9 +213,9 @@ private:
std::deque<ProcessingUnit> processing_units;
void scheduleParserThreadForUnitWithNumber(size_t unit_number)
void scheduleParserThreadForUnitWithNumber(size_t ticket_number)
{
pool.scheduleOrThrowOnError(std::bind(&ParallelParsingBlockInputStream::parserThreadFunction, this, unit_number));
pool.scheduleOrThrowOnError(std::bind(&ParallelParsingBlockInputStream::parserThreadFunction, this, ticket_number));
}
void finishAndWait()
......
......@@ -8,7 +8,6 @@
#include <Parsers/ASTInsertQuery.h>
#include <Common/CurrentThread.h>
#include <Common/setThreadName.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/ThreadPool.h>
#include <Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h>
#include <Storages/StorageValues.h>
......@@ -51,8 +50,10 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
ASTPtr query;
BlockOutputStreamPtr out;
if (auto * materialized_view = dynamic_cast<const StorageMaterializedView *>(dependent_table.get()))
if (auto * materialized_view = dynamic_cast<StorageMaterializedView *>(dependent_table.get()))
{
addTableLock(materialized_view->lockStructureForShare(true, context.getInitialQueryId()));
StoragePtr inner_table = materialized_view->getTargetTable();
auto inner_table_id = inner_table->getStorageID();
query = materialized_view->getInnerQuery();
......
......@@ -252,20 +252,23 @@ void DatabaseOrdinary::alterTable(
ast->replace(ast_create_query.select, metadata.select);
}
ASTStorage & storage_ast = *ast_create_query.storage;
/// ORDER BY may change, but cannot appear, it's required construction
if (metadata.order_by_ast && storage_ast.order_by)
storage_ast.set(storage_ast.order_by, metadata.order_by_ast);
if (metadata.primary_key_ast)
storage_ast.set(storage_ast.primary_key, metadata.primary_key_ast);
/// MaterializedView is one type of CREATE query without storage.
if (ast_create_query.storage)
{
ASTStorage & storage_ast = *ast_create_query.storage;
/// ORDER BY may change, but cannot appear, it's required construction
if (metadata.order_by_ast && storage_ast.order_by)
storage_ast.set(storage_ast.order_by, metadata.order_by_ast);
if (metadata.ttl_for_table_ast)
storage_ast.set(storage_ast.ttl_table, metadata.ttl_for_table_ast);
if (metadata.primary_key_ast)
storage_ast.set(storage_ast.primary_key, metadata.primary_key_ast);
if (metadata.settings_ast)
storage_ast.set(storage_ast.settings, metadata.settings_ast);
if (metadata.ttl_for_table_ast)
storage_ast.set(storage_ast.ttl_table, metadata.ttl_for_table_ast);
if (metadata.settings_ast)
storage_ast.set(storage_ast.settings, metadata.settings_ast);
}
statement = getObjectDefinitionFromCreateQuery(ast);
{
......
......@@ -74,6 +74,11 @@ AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type)
return AttributeUnderlyingType::utDecimal128;
}
// Temporary hack to allow arrays in keys, since they are never retrieved for polygon dictionaries.
// TODO: This should be fixed by fully supporting arrays in dictionaries.
if (type.find("Array") == 0)
return AttributeUnderlyingType::utString;
throw Exception{"Unknown type " + type, ErrorCodes::UNKNOWN_TYPE};
}
......
此差异已折叠。
#pragma once
#include <atomic>
#include <variant>
#include <Core/Block.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
#include <boost/geometry.hpp>
#include <boost/geometry/geometries/multi_polygon.hpp>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
namespace DB
{
namespace bg = boost::geometry;
/** An interface for polygon dictionaries.
* Polygons are read and stored as multi_polygons from boost::geometry in Euclidean coordinates.
* An implementation should inherit from this base class and preprocess the data upon construction if needed.
* It must override the find method of this class which retrieves the polygon containing a single point.
*/
class IPolygonDictionary : public IDictionaryBase
{
public:
/** Controls the different types of polygons allowed as input.
* The structure of a multi-polygon is as follows:
* - A multi-polygon is represented by a nonempty array of polygons.
* - A polygon is represented by a nonempty array of rings. The first element represents the outer ring. Zero
* or more following rings are cut out from the polygon.
* - A ring is represented by a nonempty array of points.
* - A point is represented by its coordinates stored in an according structure (see below).
* A simple polygon is represented by an one-dimensional array of points, stored in the according structure.
*/
enum class InputType
{
MultiPolygon,
SimplePolygon
};
/** Controls the different types allowed for providing the coordinates of points.
* Right now a point can be represented by either an array or a tuple of two Float64 values.
*/
enum class PointType
{
Array,
Tuple,
};
IPolygonDictionary(
const std::string & database_,
const std::string & name_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
DictionaryLifetime dict_lifetime_,
InputType input_type_,
PointType point_type_);
const std::string & getDatabase() const override;
const std::string & getName() const override;
const std::string & getFullName() const override;
std::string getTypeName() const override;
std::string getKeyDescription() const;
size_t getBytesAllocated() const override;
size_t getQueryCount() const override;
double getHitRate() const override;
size_t getElementCount() const override;
double getLoadFactor() const override;
const IDictionarySource * getSource() const override;
const DictionaryStructure & getStructure() const override;
const DictionaryLifetime & getLifetime() const override;
bool isInjective(const std::string & attribute_name) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
/** Functions used to retrieve attributes of specific type by key. */
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes &, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes &, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes &,
const ColumnString * const def,
ColumnString * const out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes &, \
const TYPE def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const;
/** Checks whether or not a point can be found in one of the polygons in the dictionary.
* The check is performed for multiple points represented by columns of their x and y coordinates.
* The boolean result is written to out.
*/
// TODO: Refactor the whole dictionary design to perform stronger checks, i.e. make this an override.
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
/** A two-dimensional point in Euclidean coordinates. */
using Point = bg::model::point<Float64, 2, bg::cs::cartesian>;
/** A polygon in boost is a an outer ring of points with zero or more cut out inner rings. */
using Polygon = bg::model::polygon<Point>;
protected:
/** Returns true if the given point can be found in the polygon dictionary.
* If true id is set to the index of a polygon containing the given point.
* Overridden in different implementations of this interface.
*/
virtual bool find(const Point & point, size_t & id) const = 0;
std::vector<Polygon> polygons;
/** Since the original data may have been in the form of multi-polygons, an id is stored for each single polygon
* corresponding to the row in which any other attributes for this entry are located.
*/
std::vector<size_t> ids;
const std::string database;
const std::string name;
const std::string full_name;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;
const DictionaryLifetime dict_lifetime;
const InputType input_type;
const PointType point_type;
private:
/** Helper functions for loading the data from the configuration.
* The polygons serving as keys are extracted into boost types.
* All other values are stored in one column per attribute.
*/
void createAttributes();
void blockToAttributes(const Block & block);
void loadData();
void calculateBytesAllocated();
/** Checks whether a given attribute exists and returns its index */
size_t getAttributeIndex(const std::string & attribute_name) const;
/** Helper functions to retrieve and instantiate the provided null value of an attribute.
* Since a null value is obligatory for every attribute they are simply appended to null_values defined below.
*/
template <typename T>
void appendNullValueImpl(const Field & null_value);
void appendNullValue(AttributeUnderlyingType type, const Field & value);
/** Helper function for retrieving the value of an attribute by key. */
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsImpl(size_t attribute_ind, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
/** A mapping from the names of the attributes to their index in the two vectors defined below. */
std::map<std::string, size_t> attribute_index_by_name;
/** A vector of columns storing the values of each attribute. */
Columns attributes;
/** A vector of null values corresponding to each attribute. */
std::vector<std::variant<
UInt8,
UInt16,
UInt32,
UInt64,
UInt128,
Int8,
Int16,
Int32,
Int64,
Decimal32,
Decimal64,
Decimal128,
Float32,
Float64,
String>> null_values;
size_t bytes_allocated = 0;
size_t element_count = 0;
mutable std::atomic<size_t> query_count{0};
/** Extracts a list of polygons from a column according to input_type and point_type.
* The polygons are appended to the dictionary with the corresponding ids.
*/
void extractPolygons(const ColumnPtr & column);
/** Extracts a list of points from two columns representing their x and y coordinates. */
static std::vector<Point> extractPoints(const Columns &key_columns);
};
/** Simple implementation of the polygon dictionary. Doesn't generate anything during its construction.
* Iterates over all stored polygons for each query, checking each of them in linear time.
* Retrieves the polygon with the smallest area containing the given point. If there is more than one any such polygon
* may be returned.
*/
class SimplePolygonDictionary : public IPolygonDictionary
{
public:
SimplePolygonDictionary(
const std::string & database_,
const std::string & name_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
DictionaryLifetime dict_lifetime_,
InputType input_type_,
PointType point_type_);
std::shared_ptr<const IExternalLoadable> clone() const override;
private:
bool find(const Point & point, size_t & id) const override;
};
}
......@@ -29,6 +29,7 @@ void registerDictionaries()
registerDictionaryFlat(factory);
registerDictionaryHashed(factory);
registerDictionaryCache(factory);
registerDictionaryPolygon(factory);
}
}
......
......@@ -24,6 +24,7 @@ void registerDictionaryTrie(DictionaryFactory & factory);
void registerDictionaryFlat(DictionaryFactory & factory);
void registerDictionaryHashed(DictionaryFactory & factory);
void registerDictionaryCache(DictionaryFactory & factory);
void registerDictionaryPolygon(DictionaryFactory & factory);
void registerDictionaries();
}
......@@ -145,9 +145,19 @@ BlockInputStreamPtr FormatFactory::getInput(
// Doesn't make sense to use parallel parsing with less than four threads
// (segmentator + two parsers + reader).
if (settings.input_format_parallel_parsing
&& file_segmentation_engine
&& settings.max_threads >= 4)
bool parallel_parsing = settings.input_format_parallel_parsing && file_segmentation_engine && settings.max_threads >= 4;
if (parallel_parsing && name == "JSONEachRow")
{
/// FIXME ParallelParsingBlockInputStream doesn't support formats with non-trivial readPrefix() and readSuffix()
/// For JSONEachRow we can safely skip whitespace characters
skipWhitespaceIfAny(buf);
if (buf.eof() || *buf.position() == '[')
parallel_parsing = false; /// Disable it for JSONEachRow if data is in square brackets (see JSONEachRowRowInputFormat)
}
if (parallel_parsing)
{
const auto & input_getter = getCreators(name).input_processor_creator;
if (!input_getter)
......
#pragma once
#include <Core/Types.h>
#include <Columns/IColumn.h>
#include <DataStreams/IBlockStream_fwd.h>
#include <IO/BufferWithOwnMemory.h>
......@@ -9,7 +10,6 @@
#include <unordered_map>
#include <boost/noncopyable.hpp>
namespace DB
{
......@@ -53,7 +53,9 @@ public:
/// This callback allows to perform some additional actions after writing a single row.
/// It's initial purpose was to flush Kafka message for each row.
using WriteCallback = std::function<void()>;
using WriteCallback = std::function<void(
const Columns & columns,
size_t row)>;
private:
using InputCreator = std::function<BlockInputStreamPtr(
......
......@@ -45,7 +45,7 @@ try
BlockInputStreamPtr block_input = std::make_shared<InputStreamFromInputFormat>(std::move(input_format));
BlockOutputStreamPtr block_output = std::make_shared<OutputStreamToOutputFormat>(
std::make_shared<TabSeparatedRowOutputFormat>(out_buf, sample, false, false, [] {}, format_settings));
std::make_shared<TabSeparatedRowOutputFormat>(out_buf, sample, false, false, [](const Columns & /* columns */, size_t /* row */){}, format_settings));
copyData(*block_input, *block_output);
return 0;
......
......@@ -2,9 +2,11 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/Native.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnFixedString.h>
#include <Functions/IFunctionImpl.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/castTypeToEither.h>
......@@ -51,6 +53,18 @@ struct UnaryOperationImpl
};
template <typename Op>
struct FixedStringUnaryOperationImpl
{
static void NO_INLINE vector(const ColumnFixedString::Chars & a, ColumnFixedString::Chars & c)
{
size_t size = a.size();
for (size_t i = 0; i < size; ++i)
c[i] = Op::apply(a[i]);
}
};
template <typename FunctionName>
struct FunctionUnaryArithmeticMonotonicity;
......@@ -65,6 +79,7 @@ template <template <typename> class Op, typename Name, bool is_injective>
class FunctionUnaryArithmetic : public IFunction
{
static constexpr bool allow_decimal = std::is_same_v<Op<Int8>, NegateImpl<Int8>> || std::is_same_v<Op<Int8>, AbsImpl<Int8>>;
static constexpr bool allow_fixed_string = Op<UInt8>::allow_fixed_string;
template <typename F>
static bool castType(const IDataType * type, F && f)
......@@ -82,7 +97,8 @@ class FunctionUnaryArithmetic : public IFunction
DataTypeFloat64,
DataTypeDecimal<Decimal32>,
DataTypeDecimal<Decimal64>,
DataTypeDecimal<Decimal128>
DataTypeDecimal<Decimal128>,
DataTypeFixedString
>(type, std::forward<F>(f));
}
......@@ -106,16 +122,25 @@ public:
bool valid = castType(arguments[0].get(), [&](const auto & type)
{
using DataType = std::decay_t<decltype(type)>;
using T0 = typename DataType::FieldType;
if constexpr (IsDataTypeDecimal<DataType>)
if constexpr (std::is_same_v<DataTypeFixedString, DataType>)
{
if constexpr (!allow_decimal)
if constexpr (!Op<DataTypeFixedString>::allow_fixed_string)
return false;
result = std::make_shared<DataType>(type.getPrecision(), type.getScale());
result = std::make_shared<DataType>(type.getN());
}
else
result = std::make_shared<DataTypeNumber<typename Op<T0>::ResultType>>();
{
using T0 = typename DataType::FieldType;
if constexpr (IsDataTypeDecimal<DataType>)
{
if constexpr (!allow_decimal)
return false;
result = std::make_shared<DataType>(type.getPrecision(), type.getScale());
}
else
result = std::make_shared<DataTypeNumber<typename Op<T0>::ResultType>>();
}
return true;
});
if (!valid)
......@@ -129,10 +154,25 @@ public:
bool valid = castType(block.getByPosition(arguments[0]).type.get(), [&](const auto & type)
{
using DataType = std::decay_t<decltype(type)>;
using T0 = typename DataType::FieldType;
if constexpr (IsDataTypeDecimal<DataType>)
if constexpr (std::is_same_v<DataTypeFixedString, DataType>)
{
if constexpr (allow_fixed_string)
{
if (auto col = checkAndGetColumn<ColumnFixedString>(block.getByPosition(arguments[0]).column.get()))
{
auto col_res = ColumnFixedString::create(col->getN());
auto & vec_res = col_res->getChars();
vec_res.resize(col->size() * col->getN());
FixedStringUnaryOperationImpl<Op<UInt8>>::vector(col->getChars(), vec_res);
block.getByPosition(result).column = std::move(col_res);
return true;
}
}
}
else if constexpr (IsDataTypeDecimal<DataType>)
{
using T0 = typename DataType::FieldType;
if constexpr (allow_decimal)
{
if (auto col = checkAndGetColumn<ColumnDecimal<T0>>(block.getByPosition(arguments[0]).column.get()))
......@@ -148,6 +188,7 @@ public:
}
else
{
using T0 = typename DataType::FieldType;
if (auto col = checkAndGetColumn<ColumnVector<T0>>(block.getByPosition(arguments[0]).column.get()))
{
auto col_res = ColumnVector<typename Op<T0>::ResultType>::create();
......@@ -171,7 +212,10 @@ public:
return castType(arguments[0].get(), [&](const auto & type)
{
using DataType = std::decay_t<decltype(type)>;
return !IsDataTypeDecimal<DataType> && Op<typename DataType::FieldType>::compilable;
if constexpr (std::is_same_v<DataTypeFixedString, DataType>)
return false;
else
return !IsDataTypeDecimal<DataType> && Op<typename DataType::FieldType>::compilable;
});
}
......@@ -181,14 +225,19 @@ public:
castType(types[0].get(), [&](const auto & type)
{
using DataType = std::decay_t<decltype(type)>;
using T0 = typename DataType::FieldType;
using T1 = typename Op<T0>::ResultType;
if constexpr (!std::is_same_v<T1, InvalidType> && !IsDataTypeDecimal<DataType> && Op<T0>::compilable)
if constexpr (std::is_same_v<DataTypeFixedString, DataType>)
return false;
else
{
auto & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * v = nativeCast(b, types[0], values[0](), std::make_shared<DataTypeNumber<T1>>());
result = Op<T0>::compile(b, v, is_signed_v<T1>);
return true;
using T0 = typename DataType::FieldType;
using T1 = typename Op<T0>::ResultType;
if constexpr (!std::is_same_v<T1, InvalidType> && !IsDataTypeDecimal<DataType> && Op<T0>::compilable)
{
auto & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * v = nativeCast(b, types[0], values[0](), std::make_shared<DataTypeNumber<T1>>());
result = Op<T0>::compile(b, v, is_signed_v<T1>);
return true;
}
}
return false;
});
......
......@@ -35,7 +35,6 @@
#include <Columns/ColumnsCommon.h>
#include <Common/FieldVisitors.h>
#include <Common/assert_cast.h>
#include <Interpreters/ExpressionActions.h>
#include <Functions/IFunctionAdaptors.h>
#include <Functions/FunctionsMiscellaneous.h>
#include <Functions/FunctionHelpers.h>
......
......@@ -33,6 +33,7 @@
#include <Dictionaries/ComplexKeyCacheDictionary.h>
#include <Dictionaries/RangeHashedDictionary.h>
#include <Dictionaries/TrieDictionary.h>
#include <Dictionaries/PolygonDictionary.h>
#include <ext/range.h>
......@@ -134,7 +135,8 @@ private:
!executeDispatchSimple<CacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyHashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyCacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict_ptr))
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<SimplePolygonDictionary>(block, arguments, result, dict_ptr))
throw Exception{"Unsupported dictionary type " + dict_ptr->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
}
......@@ -305,6 +307,7 @@ private:
!executeDispatchComplex<ComplexKeyHashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyCacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<SimplePolygonDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchRange<RangeHashedDictionary>(block, arguments, result, dict_ptr))
throw Exception{"Unsupported dictionary type " + dict_ptr->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
}
......@@ -485,6 +488,7 @@ private:
!executeDispatch<CacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyHashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyCacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<SimplePolygonDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict_ptr))
throw Exception{"Unsupported dictionary type " + dict_ptr->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
}
......@@ -823,6 +827,7 @@ private:
!executeDispatchComplex<ComplexKeyHashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyCacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<SimplePolygonDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchRange<RangeHashedDictionary>(block, arguments, result, dict_ptr))
throw Exception{"Unsupported dictionary type " + dict_ptr->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
}
......@@ -1081,6 +1086,7 @@ private:
!executeDispatch<CacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyHashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyCacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<SimplePolygonDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict_ptr))
throw Exception{"Unsupported dictionary type " + dict_ptr->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
}
......
......@@ -261,7 +261,7 @@ using FunctionReinterpretAsDate = FunctionReinterpretStringAs<DataTypeDate,
using FunctionReinterpretAsDateTime = FunctionReinterpretStringAs<DataTypeDateTime, NameReinterpretAsDateTime>;
using FunctionReinterpretAsString = FunctionReinterpretAsStringImpl<NameReinterpretAsString>;
using FunctionReinterpretAsFixedString = FunctionReinterpretAsStringImpl<NameReinterpretAsFixedString>;
using FunctionReinterpretAsFixedString = FunctionReinterpretAsFixedStringImpl<NameReinterpretAsFixedString>;
}
......@@ -10,6 +10,7 @@ template <typename A>
struct AbsImpl
{
using ResultType = std::conditional_t<IsDecimalNumber<A>, A, typename NumberTraits::ResultOfAbs<A>::Type>;
static const constexpr bool allow_fixed_string = false;
static inline NO_SANITIZE_UNDEFINED ResultType apply(A a)
{
......
......@@ -9,6 +9,7 @@ template <typename A, typename B>
struct BitAndImpl
{
using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
static constexpr const bool allow_fixed_string = true;
template <typename Result = ResultType>
static inline Result apply(A a, B b)
......@@ -29,7 +30,7 @@ struct BitAndImpl
};
struct NameBitAnd { static constexpr auto name = "bitAnd"; };
using FunctionBitAnd = FunctionBinaryArithmetic<BitAndImpl, NameBitAnd>;
using FunctionBitAnd = FunctionBinaryArithmetic<BitAndImpl, NameBitAnd, true>;
void registerFunctionBitAnd(FunctionFactory & factory)
{
......
......@@ -18,6 +18,7 @@ namespace DB
struct BitBoolMaskAndImpl
{
using ResultType = UInt8;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline Result apply(A left, B right)
......
......@@ -18,6 +18,7 @@ namespace DB
struct BitBoolMaskOrImpl
{
using ResultType = UInt8;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline Result apply(A left, B right)
......
......@@ -10,6 +10,7 @@ template <typename A>
struct BitCountImpl
{
using ResultType = UInt8;
static constexpr bool allow_fixed_string = false;
static inline ResultType apply(A a)
{
......
......@@ -10,6 +10,7 @@ template <typename A>
struct BitNotImpl
{
using ResultType = typename NumberTraits::ResultOfBitNot<A>::Type;
static const constexpr bool allow_fixed_string = true;
static inline ResultType apply(A a)
{
......
......@@ -8,6 +8,7 @@ template <typename A, typename B>
struct BitOrImpl
{
using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
static constexpr const bool allow_fixed_string = true;
template <typename Result = ResultType>
static inline Result apply(A a, B b)
......@@ -28,7 +29,7 @@ struct BitOrImpl
};
struct NameBitOr { static constexpr auto name = "bitOr"; };
using FunctionBitOr = FunctionBinaryArithmetic<BitOrImpl, NameBitOr>;
using FunctionBitOr = FunctionBinaryArithmetic<BitOrImpl, NameBitOr, true>;
void registerFunctionBitOr(FunctionFactory & factory)
{
......
......@@ -8,6 +8,7 @@ template <typename A, typename B>
struct BitRotateLeftImpl
{
using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
......
......@@ -8,6 +8,7 @@ template <typename A, typename B>
struct BitRotateRightImpl
{
using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
......
......@@ -8,6 +8,7 @@ template <typename A, typename B>
struct BitShiftLeftImpl
{
using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
......
......@@ -8,6 +8,7 @@ template <typename A, typename B>
struct BitShiftRightImpl
{
using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
......
......@@ -15,6 +15,7 @@ namespace DB
struct BitSwapLastTwoImpl
{
using ResultType = UInt8;
static constexpr const bool allow_fixed_string = false;
static inline ResultType NO_SANITIZE_UNDEFINED apply(A a)
{
......
......@@ -10,6 +10,7 @@ template <typename A, typename B>
struct BitTestImpl
{
using ResultType = UInt8;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
NO_SANITIZE_UNDEFINED static inline Result apply(A a, B b)
......
......@@ -16,6 +16,7 @@ namespace DB
struct BitWrapperFuncImpl
{
using ResultType = UInt8;
static constexpr const bool allow_fixed_string = false;
static inline ResultType NO_SANITIZE_UNDEFINED apply(A a)
{
......
......@@ -8,6 +8,7 @@ template <typename A, typename B>
struct BitXorImpl
{
using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
static constexpr bool allow_fixed_string = true;
template <typename Result = ResultType>
static inline Result apply(A a, B b)
......@@ -28,7 +29,7 @@ struct BitXorImpl
};
struct NameBitXor { static constexpr auto name = "bitXor"; };
using FunctionBitXor = FunctionBinaryArithmetic<BitXorImpl, NameBitXor>;
using FunctionBitXor = FunctionBinaryArithmetic<BitXorImpl, NameBitXor, true>;
void registerFunctionBitXor(FunctionFactory & factory)
{
......
......@@ -9,6 +9,7 @@ struct DivideFloatingImpl
{
using ResultType = typename NumberTraits::ResultOfFloatingPointDivision<A, B>::Type;
static const constexpr bool allow_decimal = true;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
......
......@@ -10,6 +10,7 @@ template <typename A, typename B>
struct GCDImpl
{
using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<A, B>::Type;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline Result apply(A a, B b)
......
......@@ -9,6 +9,7 @@ template <typename A, typename B>
struct GreatestBaseImpl
{
using ResultType = NumberTraits::ResultOfGreatest<A, B>;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline Result apply(A a, B b)
......@@ -34,6 +35,7 @@ template <typename A, typename B>
struct GreatestSpecialImpl
{
using ResultType = std::make_unsigned_t<A>;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline Result apply(A a, B b)
......
......@@ -19,6 +19,7 @@ struct DivideIntegralByConstantImpl
: BinaryOperationImplBase<A, B, DivideIntegralImpl<A, B>>
{
using ResultType = typename DivideIntegralImpl<A, B>::ResultType;
static const constexpr bool allow_fixed_string = false;
static void vector_constant(const PaddedPODArray<A> & a, B b, PaddedPODArray<ResultType> & c)
{
......
......@@ -50,6 +50,7 @@ template <typename A, typename B>
struct DivideIntegralImpl
{
using ResultType = typename NumberTraits::ResultOfIntegerDivision<A, B>::Type;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline Result apply(A a, B b)
......
......@@ -11,6 +11,7 @@ template <typename A, typename B>
struct DivideIntegralOrZeroImpl
{
using ResultType = typename NumberTraits::ResultOfIntegerDivision<A, B>::Type;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline Result apply(A a, B b)
......
......@@ -10,6 +10,7 @@ template <typename A>
struct IntExp10Impl
{
using ResultType = UInt64;
static constexpr const bool allow_fixed_string = false;
static inline ResultType apply(A a)
{
......
......@@ -10,6 +10,7 @@ template <typename A>
struct IntExp2Impl
{
using ResultType = UInt64;
static constexpr const bool allow_fixed_string = false;
static inline ResultType apply(A a)
{
......
......@@ -10,6 +10,8 @@ template <typename A, typename B>
struct LCMImpl
{
using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<A, B>::Type;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline Result apply(A a, B b)
......
......@@ -9,6 +9,7 @@ template <typename A, typename B>
struct LeastBaseImpl
{
using ResultType = NumberTraits::ResultOfLeast<A, B>;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline Result apply(A a, B b)
......@@ -34,6 +35,7 @@ template <typename A, typename B>
struct LeastSpecialImpl
{
using ResultType = std::make_signed_t<A>;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline Result apply(A a, B b)
......
......@@ -10,6 +10,7 @@ struct MinusImpl
{
using ResultType = typename NumberTraits::ResultOfSubtraction<A, B>::Type;
static const constexpr bool allow_decimal = true;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
......
......@@ -20,6 +20,7 @@ template <typename A, typename B>
struct ModuloImpl
{
using ResultType = typename NumberTraits::ResultOfModulo<A, B>::Type;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline Result apply(A a, B b)
......@@ -38,6 +39,7 @@ struct ModuloByConstantImpl
: BinaryOperationImplBase<A, B, ModuloImpl<A, B>>
{
using ResultType = typename ModuloImpl<A, B>::ResultType;
static const constexpr bool allow_fixed_string = false;
static void vector_constant(const PaddedPODArray<A> & a, B b, PaddedPODArray<ResultType> & c)
{
......
......@@ -10,6 +10,7 @@ struct MultiplyImpl
{
using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<A, B>::Type;
static const constexpr bool allow_decimal = true;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
......
......@@ -9,6 +9,7 @@ template <typename A>
struct NegateImpl
{
using ResultType = std::conditional_t<IsDecimalNumber<A>, A, typename NumberTraits::ResultOfNegate<A>::Type>;
static constexpr const bool allow_fixed_string = false;
static inline NO_SANITIZE_UNDEFINED ResultType apply(A a)
{
......
......@@ -10,6 +10,7 @@ struct PlusImpl
{
using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<A, B>::Type;
static const constexpr bool allow_decimal = true;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
......
......@@ -8,6 +8,7 @@ template <typename A>
struct RoundAgeImpl
{
using ResultType = UInt8;
static constexpr const bool allow_fixed_string = false;
static inline ResultType apply(A x)
{
......
......@@ -8,6 +8,7 @@ template <typename A>
struct RoundDurationImpl
{
using ResultType = UInt16;
static constexpr const bool allow_fixed_string = false;
static inline ResultType apply(A x)
{
......
......@@ -49,6 +49,7 @@ template <typename T>
struct RoundToExp2Impl
{
using ResultType = T;
static constexpr const bool allow_fixed_string = false;
static inline T apply(T x)
{
......
......@@ -16,6 +16,7 @@ struct ConnectionTimeouts
Poco::Timespan receive_timeout;
Poco::Timespan tcp_keep_alive_timeout;
Poco::Timespan http_keep_alive_timeout;
Poco::Timespan secure_connection_timeout;
ConnectionTimeouts() = default;
......@@ -26,7 +27,8 @@ struct ConnectionTimeouts
send_timeout(send_timeout_),
receive_timeout(receive_timeout_),
tcp_keep_alive_timeout(0),
http_keep_alive_timeout(0)
http_keep_alive_timeout(0),
secure_connection_timeout(connection_timeout)
{
}
......@@ -38,7 +40,8 @@ struct ConnectionTimeouts
send_timeout(send_timeout_),
receive_timeout(receive_timeout_),
tcp_keep_alive_timeout(tcp_keep_alive_timeout_),
http_keep_alive_timeout(0)
http_keep_alive_timeout(0),
secure_connection_timeout(connection_timeout)
{
}
ConnectionTimeouts(const Poco::Timespan & connection_timeout_,
......@@ -50,10 +53,25 @@ struct ConnectionTimeouts
send_timeout(send_timeout_),
receive_timeout(receive_timeout_),
tcp_keep_alive_timeout(tcp_keep_alive_timeout_),
http_keep_alive_timeout(http_keep_alive_timeout_)
http_keep_alive_timeout(http_keep_alive_timeout_),
secure_connection_timeout(connection_timeout)
{
}
ConnectionTimeouts(const Poco::Timespan & connection_timeout_,
const Poco::Timespan & send_timeout_,
const Poco::Timespan & receive_timeout_,
const Poco::Timespan & tcp_keep_alive_timeout_,
const Poco::Timespan & http_keep_alive_timeout_,
const Poco::Timespan & secure_connection_timeout_)
: connection_timeout(connection_timeout_),
send_timeout(send_timeout_),
receive_timeout(receive_timeout_),
tcp_keep_alive_timeout(tcp_keep_alive_timeout_),
http_keep_alive_timeout(http_keep_alive_timeout_),
secure_connection_timeout(secure_connection_timeout_)
{
}
static Poco::Timespan saturate(const Poco::Timespan & timespan, const Poco::Timespan & limit)
{
......@@ -69,7 +87,8 @@ struct ConnectionTimeouts
saturate(send_timeout, limit),
saturate(receive_timeout, limit),
saturate(tcp_keep_alive_timeout, limit),
saturate(http_keep_alive_timeout, limit));
saturate(http_keep_alive_timeout, limit),
saturate(secure_connection_timeout, limit));
}
/// Timeouts for the case when we have just single attempt to connect.
......@@ -81,7 +100,7 @@ struct ConnectionTimeouts
/// Timeouts for the case when we will try many addresses in a loop.
static ConnectionTimeouts getTCPTimeoutsWithFailover(const Settings & settings)
{
return ConnectionTimeouts(settings.connect_timeout_with_failover_ms, settings.send_timeout, settings.receive_timeout, settings.tcp_keep_alive_timeout);
return ConnectionTimeouts(settings.connect_timeout_with_failover_ms, settings.send_timeout, settings.receive_timeout, settings.tcp_keep_alive_timeout, 0, settings.connect_timeout_with_failover_secure_ms);
}
static ConnectionTimeouts getHTTPTimeouts(const Context & context)
......
......@@ -6,17 +6,28 @@
#include <port/unistd.h>
#include <IO/ReadBufferAIO.h>
#include <fstream>
#include <string>
namespace
{
std::string createTmpFileForEOFtest()
{
char pattern[] = "/tmp/fileXXXXXX";
char * dir = ::mkdtemp(pattern);
return std::string(dir) + "/foo";
if (char * dir = ::mkdtemp(pattern); dir)
{
return std::string(dir) + "/foo";
}
else
{
/// We have no tmp in docker
/// So we have to use root
std::string almost_rand_dir = std::string{"/"} + std::to_string(rand()) + "foo";
return almost_rand_dir;
}
}
void prepare_for_eof(std::string & filename, std::string & buf)
void prepareForEOF(std::string & filename, std::string & buf)
{
static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
......@@ -28,7 +39,7 @@ void prepare_for_eof(std::string & filename, std::string & buf)
for (size_t i = 0; i < n; ++i)
buf += symbols[i % symbols.length()];
std::ofstream out(filename.c_str());
std::ofstream out(filename);
out << buf;
}
......@@ -39,7 +50,7 @@ TEST(ReadBufferAIOTest, TestReadAfterAIO)
using namespace DB;
std::string data;
std::string file_path;
prepare_for_eof(file_path, data);
prepareForEOF(file_path, data);
ReadBufferAIO testbuf(file_path);
std::string newdata;
......
#include "Common/quoteString.h"
#include <Common/typeid_cast.h>
#include <Common/PODArray.h>
#include <Core/Row.h>
......@@ -334,7 +335,7 @@ void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr & ast,
found = true;
if (found)
throw Exception("Column " + column_name.get(ast) + " is not under aggregate function and not in GROUP BY.",
throw Exception("Column " + backQuote(column_name.get(ast)) + " is not under aggregate function and not in GROUP BY",
ErrorCodes::NOT_AN_AGGREGATE);
/// Special check for WITH statement alias. Add alias action to be able to use this alias.
......
......@@ -2,7 +2,6 @@
#include <Parsers/IAST.h>
#include <Interpreters/PreparedSets.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/SubqueryForSet.h>
#include <Interpreters/InDepthNodeVisitor.h>
......@@ -13,6 +12,9 @@ namespace DB
class Context;
class ASTFunction;
class ExpressionActions;
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
/// The case of an explicit enumeration of values.
SetPtr makeExplicitSet(
const ASTFunction * node, const Block & sample_block, bool create_ordered_set,
......
此差异已折叠。
......@@ -57,6 +57,9 @@ public:
UInt16 port;
String user;
String password;
UInt32 shard_index{}; /// shard serial number in configuration file, starting from 1.
UInt32 replica_index{}; /// replica serial number in this shard, starting from 1; zero means no replicas.
/// This database is selected when no database is specified for Distributed table
String default_database;
/// The locality is determined at the initialization, and is not changed even if DNS is changed
......@@ -67,7 +70,7 @@ public:
Protocol::Secure secure = Protocol::Secure::Disable;
Address() = default;
Address(const Poco::Util::AbstractConfiguration & config, const String & config_prefix);
Address(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, UInt32 shard_index_ = 0, UInt32 replica_index_ = 0);
Address(const String & host_port_, const String & user_, const String & password_, UInt16 clickhouse_port, bool secure_ = false);
/// Returns 'escaped_host_name:port'
......@@ -80,8 +83,10 @@ public:
static std::pair<String, UInt16> fromString(const String & host_port_string);
/// Retrurns escaped user:password@resolved_host_address:resolved_host_port#default_database
/// Returns escaped shard{shard_index}_replica{replica_index}
String toFullString() const;
/// Returns address with only shard index and replica index or full address without shard index and replica index
static Address fromFullString(const String & address_full_string);
/// Returns resolved address if it does resolve.
......
此差异已折叠。
......@@ -253,6 +253,10 @@ public:
std::shared_ptr<QuotaContext> getQuota() const { return quota; }
std::shared_ptr<RowPolicyContext> getRowPolicy() const { return row_policy; }
/// TODO: we need much better code for switching policies, quotas, access rights for initial user
/// Switches row policy in case we have initial user in client info
void switchRowPolicy();
/** Take the list of users, quotas and configuration profiles from this config.
* The list of users is completely replaced.
* The accumulated quota values are not reset if the quota is not deleted.
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册