Fixed translation errors; miscellaneous changes [#CLICKHOUSE-3].

7bf7242a · Alexey Milovidov · ff02af98 · 7bf7242a · 7bf7242a · 7bf7242a
23 changed file
--- a/dbms/src/Common/CombinedCardinalityEstimator.h
+++ b/dbms/src/Common/CombinedCardinalityEstimator.h
@@ -23,7 +23,7 @@ static inline ContainerType max(const ContainerType & lhs, const ContainerType &

 }

-/** For a small number of keys - an array of fixed size "on the stack."
+/** For a small number of keys - an array of fixed size "on the stack".
  * For the average, HashSet is allocated.
  * For large, HyperLogLog is allocated.
  */

--- a/dbms/src/Common/HashTable/Hash.h
+++ b/dbms/src/Common/HashTable/Hash.h
@@ -5,7 +5,7 @@

 /** Hash functions that are better than the trivial function std::hash.
  *
-  * Example: when aggregated by the visitor ID, the performance increase is more than 5 times.
+  * Example: when we do aggregation by the visitor ID, the performance increase is more than 5 times.
  * This is because of following reasons:
  * - in Yandex, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
  * - in typical implementation of standard library, hash function for integers is trivial and just use lower bits;

--- a/dbms/src/Common/HashTable/HashTable.h
+++ b/dbms/src/Common/HashTable/HashTable.h
@@ -695,7 +695,7 @@ public:


    /** Insert the key,
-      * return the iterator to a position that can be used for `placement new` of value,
+      * return an iterator to a position that can be used for `placement new` of value,
      * as well as the flag - whether a new key was inserted.
      *
      * You have to make `placement new` of value if you inserted a new key,

--- a/dbms/src/Common/HashTable/SmallTable.h
+++ b/dbms/src/Common/HashTable/SmallTable.h
@@ -212,7 +212,7 @@ public:


    /** Insert the key,
-      * return the iterator to a position that can be used for `placement new` of value,
+      * return an iterator to a position that can be used for `placement new` of value,
      * as well as the flag - whether a new key was inserted.
      *
      * You have to make `placement new` of value if you inserted a new key,

--- a/dbms/src/Common/HashTable/TwoLevelHashTable.h
+++ b/dbms/src/Common/HashTable/TwoLevelHashTable.h
@@ -9,9 +9,9 @@
  *
  * Usually works a little slower than a simple hash table.
  * However, it has advantages in some cases:
-  * - if you need to measure two hash tables together, then you can easily parallelize them by buckets;
-  * - lag during resizes is spread, since the small hash tables will be resized separately;
-  * - in theory, the cache resize is local in a larger range of sizes.
+  * - if you need to merge two hash tables together, then you can easily parallelize it by buckets;
+  * - delay during resizes is amortized, since the small hash tables will be resized separately;
+  * - in theory, resizes are cache-local in a larger range of sizes.
  */

 template <size_t initial_size_degree = 8>
@@ -52,7 +52,7 @@ public:

    size_t hash(const Key & x) const { return Hash::operator()(x); }

-    /// NOTE Bad for hash tables for more than 2^32 cells.
+    /// NOTE Bad for hash tables with more than 2^32 cells.
    static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; }

 protected:
@@ -95,7 +95,7 @@ public:
    {
        typename Source::const_iterator it = src.begin();

-        /// It is assumed that the zero key (stored separately) when iterating is first.
+        /// It is assumed that the zero key (stored separately) is first in iteration order.
        if (it != src.end() && it.getPtr()->isZero(src))
        {
            insert(*it);
@@ -221,7 +221,7 @@ public:


    /** Insert the key,
-      * return the iterator to a position that can be used for `placement new` value,
+      * return an iterator to a position that can be used for `placement new` of value,
      * as well as the flag - whether a new key was inserted.
      *
      * You have to make `placement new` values if you inserted a new key,

--- a/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h
+++ b/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h
@@ -9,7 +9,7 @@ namespace DB
 {


-/** For a small number of keys - an array of fixed size "on the stack."
+/** For a small number of keys - an array of fixed size "on the stack".
  * For large, HyperLogLog is allocated.
  * See also the more practical implementation in CombinedCardinalityEstimator.h,
  *  where a hash table is also used for medium-sized sets.

--- a/dbms/src/Common/Increment.h
+++ b/dbms/src/Common/Increment.h
@@ -3,8 +3,8 @@
 #include <Common/CounterInFile.h>


-/** Lets you receive an auto-increment number, storing it in a file.
-  * Designed for rare calls (not designed for performance).
+/** Allows to get an auto-increment number, storing it in a file.
+  * Intended for rare calls (not designed for performance).
  */
 class Increment
 {
@@ -39,13 +39,13 @@ public:
        return getBunch(0, create_if_need);
    }

-    /** Get the next number and increase the count by `count`.
-     * If the `create_if_need` parameter is not set to true, then
-     * the file should already have a number written (if not - create the file manually with zero).
-     *
-     * To protect against race conditions between different processes, file locks are used.
-     * (But when the first file is created, the race condition is possible, so it's better to create the file in advance.)
-     */
+    /** Get the next number and increase the counter by `count`.
+      * If the `create_if_need` parameter is not set to true, then
+      *  the file should already have a number written (if not - create the file manually with zero).
+      *
+      * To protect against race conditions between different processes, file locks are used.
+      * (But when the first file is created, the race condition is possible, so it's better to create the file in advance.)
+      */
    UInt64 getBunch(UInt64 count, bool create_if_need = false)
    {
        return static_cast<UInt64>(counter.add(static_cast<Int64>(count), create_if_need) - count + 1);

--- a/dbms/src/Common/Macros.h
+++ b/dbms/src/Common/Macros.h
@@ -4,10 +4,11 @@
 #include <Poco/Util/AbstractConfiguration.h>
 #include <map>

+
 namespace DB
 {

-/** Apply the macros from the config in the line.
+/** Apply substitutions from the macros in config to the string.
  */
 class Macros
 {

--- a/dbms/src/Common/MemoryTracker.h
+++ b/dbms/src/Common/MemoryTracker.h
@@ -102,10 +102,10 @@ public:
 };


-/** The MemoryTracker object is quite difficult to drag to all places where significant amounts of memory are allocated.
-  * Therefore, a thread-local pointer to used MemoryTracker or nullptr is used, if it does not need to be used.
-  * This pointer is set when memory consumption is monitored in this thread.
-  * So, you just need to drag it to all the threads that handle one request.
+/** The MemoryTracker object is quite difficult to pass to all places where significant amounts of memory are allocated.
+  * Therefore, a thread-local pointer to used MemoryTracker is set, or nullptr if MemoryTracker does not need to be used.
+  * This pointer is set when memory consumption is monitored in current thread.
+  * So, you just need to pass it to all the threads that handle one request.
  */
 extern __thread MemoryTracker * current_memory_tracker;


--- a/dbms/src/Common/OptimizedRegularExpression.inl.h
+++ b/dbms/src/Common/OptimizedRegularExpression.inl.h
@@ -8,8 +8,9 @@
 #define MIN_LENGTH_FOR_STRSTR 3
 #define MAX_SUBPATTERNS 5

-template <bool b>
-void OptimizedRegularExpressionImpl<b>::analyze(
+
+template <bool thread_safe>
+void OptimizedRegularExpressionImpl<thread_safe>::analyze(
    const std::string & regexp,
    std::string & required_substring,
    bool & is_trivial,
@@ -20,7 +21,8 @@ void OptimizedRegularExpressionImpl<b>::analyze(
      *  a string outside parentheses,
      *  in which all metacharacters are escaped,
      *  and also if there are no '|' outside the brackets,
-      *  and also avoid substrings of the form `http://` or `www`.
+      *  and also avoid substrings of the form `http://` or `www` and some other
+      *   (this is the hack for typical use case in Yandex.Metrica).
      */
    const char * begin = regexp.data();
    const char * pos = begin;
@@ -32,9 +34,9 @@ void OptimizedRegularExpressionImpl<b>::analyze(
    bool has_alternative_on_depth_0 = false;

    /// Substring with a position.
-    typedef std::pair<std::string, size_t> Substring;
+    using Substring = std::pair<std::string, size_t>;
+    using Substrings = std::vector<Substring>;

-    typedef std::vector<Substring> Substrings;
    Substrings trivial_substrings(1);
    Substring * last_substring = &trivial_substrings.back();

@@ -157,7 +159,7 @@ void OptimizedRegularExpressionImpl<b>::analyze(
                ++pos;
                break;

-             /// Quantifiers that allow a zero number.
+            /// Quantifiers that allow a zero number of occurences.
            case '{':
                in_curly_braces = true;
            case '?': case '*':
@@ -208,7 +210,7 @@ void OptimizedRegularExpressionImpl<b>::analyze(
            {
                if (((it->second == 0 && candidate_it->second != 0)
                        || ((it->second == 0) == (candidate_it->second == 0) && it->first.size() > max_length))
-                    /// Tuning for the domain
+                    /// Tuning for typical usage domain
                    && (it->first.size() > strlen("://") || strncmp(it->first.data(), "://", strlen("://")))
                    && (it->first.size() > strlen("http://") || strncmp(it->first.data(), "http", strlen("http")))
                    && (it->first.size() > strlen("www.") || strncmp(it->first.data(), "www", strlen("www")))
@@ -241,12 +243,12 @@ void OptimizedRegularExpressionImpl<b>::analyze(
 }


-template <bool b>
-OptimizedRegularExpressionImpl<b>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
+template <bool thread_safe>
+OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
 {
    analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix);

-    /// 3 options are supported
+    /// Just three following options are supported
    if (options & (~(RE_CASELESS | RE_NO_CAPTURE | RE_DOT_NL)))
        throw Poco::Exception("OptimizedRegularExpression: Unsupported option.");

@@ -280,8 +282,8 @@ OptimizedRegularExpressionImpl<b>::OptimizedRegularExpressionImpl(const std::str
 }


-template <bool b>
-bool OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subject_size) const
+template <bool thread_safe>
+bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size) const
 {
    if (is_trivial)
    {
@@ -309,8 +311,8 @@ bool OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subje
 }


-template <bool b>
-bool OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subject_size, Match & match) const
+template <bool thread_safe>
+bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, Match & match) const
 {
    if (is_trivial)
    {
@@ -357,8 +359,8 @@ bool OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subje
 }


-template <bool b>
-unsigned OptimizedRegularExpressionImpl<b>::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
+template <bool thread_safe>
+unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
 {
    matches.clear();


--- a/dbms/src/Common/PODArray.h
+++ b/dbms/src/Common/PODArray.h
@@ -24,7 +24,7 @@ namespace DB
  * To be more precise - for use in ColumnVector.
  * It differs from std::vector in that it does not initialize the elements.
  *
-  * Made uncopable so that there are no random copies. You can copy the data using `assign` method.
+  * Made noncopyable so that there are no accidential copies. You can copy the data using `assign` method.
  *
  * Only part of the std::vector interface is supported.
  *
@@ -40,20 +40,20 @@ template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocato
 class PODArray : private boost::noncopyable, private TAllocator    /// empty base optimization
 {
 private:
-    /// Round padding up to an integer number of elements to simplify arithmetic.
+    /// Round padding up to an whole number of elements to simplify arithmetic.
    static constexpr size_t pad_right = (pad_right_ + sizeof(T) - 1) / sizeof(T) * sizeof(T);

-    char * c_start             = nullptr;
-    char * c_end             = nullptr;
+    char * c_start          = nullptr;
+    char * c_end            = nullptr;
    char * c_end_of_storage = nullptr;    /// Does not include pad_right.

-    T * t_start()                         { return reinterpret_cast<T *>(c_start); }
-    T * t_end()                         { return reinterpret_cast<T *>(c_end); }
-    T * t_end_of_storage()                 { return reinterpret_cast<T *>(c_end_of_storage); }
+    T * t_start()                      { return reinterpret_cast<T *>(c_start); }
+    T * t_end()                        { return reinterpret_cast<T *>(c_end); }
+    T * t_end_of_storage()             { return reinterpret_cast<T *>(c_end_of_storage); }

-    const T * t_start() const             { return reinterpret_cast<const T *>(c_start); }
-    const T * t_end() const             { return reinterpret_cast<const T *>(c_end); }
-    const T * t_end_of_storage() const     { return reinterpret_cast<const T *>(c_end_of_storage); }
+    const T * t_start() const          { return reinterpret_cast<const T *>(c_start); }
+    const T * t_end() const            { return reinterpret_cast<const T *>(c_end); }
+    const T * t_end_of_storage() const { return reinterpret_cast<const T *>(c_end_of_storage); }

    /// The amount of memory occupied by the num_elements of the elements.
    static size_t byte_size(size_t num_elements) { return num_elements * sizeof(T); }
@@ -173,16 +173,16 @@ public:
    const T & operator[] (size_t n) const     { return t_start()[n]; }

    T & front()             { return t_start()[0]; }
-    T & back()                 { return t_end()[-1]; }
+    T & back()              { return t_end()[-1]; }
    const T & front() const { return t_start()[0]; }
    const T & back() const  { return t_end()[-1]; }

-    iterator begin()                 { return t_start(); }
-    iterator end()                     { return t_end(); }
-    const_iterator begin() const    { return t_start(); }
-    const_iterator end() const        { return t_end(); }
-    const_iterator cbegin() const    { return t_start(); }
-    const_iterator cend() const        { return t_end(); }
+    iterator begin()              { return t_start(); }
+    iterator end()                { return t_end(); }
+    const_iterator begin() const  { return t_start(); }
+    const_iterator end() const    { return t_end(); }
+    const_iterator cbegin() const { return t_start(); }
+    const_iterator cend() const   { return t_end(); }

    void reserve(size_t n)
    {
@@ -209,7 +209,7 @@ public:
        c_end = c_start + byte_size(n);
    }

-    /// Same as resize, but zeros new elements.
+    /// Same as resize, but zeroes new elements.
    void resize_fill(size_t n)
    {
        size_t old_size = size();
@@ -261,7 +261,7 @@ public:
        c_end -= byte_size(1);
    }

-    /// Do not insert a piece of yourself into the array. Because with the resize, the iterators on themselves can be invalidated.
+    /// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated.
    template <typename It1, typename It2>
    void insert(It1 from_begin, It2 from_end)
    {

--- a/dbms/src/Common/PoolBase.h
+++ b/dbms/src/Common/PoolBase.h
@@ -8,8 +8,17 @@
 #include <common/logger_useful.h>
 #include <Common/Exception.h>

+
+namespace DB
+{
+    namespace ErrorCodes
+    {
+        extern const int LOGICAL_ERROR;
+    }
+}
+
 /** A class from which you can inherit and get a pool of something. Used for database connection pools.
-  * The heir must provide a method for creating a new object to place in the pool.
+  * Descendant class must provide a method for creating a new object to place in the pool.
  */

 template <typename TObject>
@@ -63,27 +72,27 @@ public:
        Entry() {}    /// For deferred initialization.

        /** The `Entry` object protects the resource from being used by another thread.
-         * The following methods are forbidden for `rvalue`, so you can not write a similar to
-         *
-         * auto q = pool.Get()->query("SELECT .."); // Oops, after this line Entry was destroyed
-         * q.execute (); // Someone else can use this Connection
-         */
+          * The following methods are forbidden for `rvalue`, so you can not write a similar to
+          *
+          * auto q = pool.Get()->query("SELECT .."); // Oops, after this line Entry was destroyed
+          * q.execute (); // Someone else can use this Connection
+          */
        Object * operator->() && = delete;
        const Object * operator->() const && = delete;
        Object & operator*() && = delete;
        const Object & operator*() const && = delete;

-        Object * operator->() &            { return &*data->data.object; }
-        const Object * operator->() const &    { return &*data->data.object; }
-        Object & operator*() &                { return *data->data.object; }
-        const Object & operator*() const &    { return *data->data.object; }
+        Object * operator->() &             { return &*data->data.object; }
+        const Object * operator->() const & { return &*data->data.object; }
+        Object & operator*() &              { return *data->data.object; }
+        const Object & operator*() const &  { return *data->data.object; }

        bool isNull() const { return data == nullptr; }

        PoolBase * getPool() const
        {
            if (!data)
-                throw DB::Exception("attempt to get pool from uninitialized entry");
+                throw DB::Exception("Attempt to get pool from uninitialized entry", DB::ErrorCodes::LOGICAL_ERROR);
            return &data->data.pool;
        }

@@ -95,7 +104,7 @@ public:

    virtual ~PoolBase() {}

-    /** Allocates the object for the job. With timeout < 0, the timeout is infinite. */
+    /** Allocates the object. Wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. */
    Entry get(Poco::Timespan::TimeDiff timeout)
    {
        std::unique_lock<std::mutex> lock(mutex);
@@ -137,7 +146,7 @@ private:
    /** Pool. */
    Objects items;

-    /** Block to access the pool. */
+    /** Lock to access the pool. */
    std::mutex mutex;
    std::condition_variable available;

@@ -151,7 +160,7 @@ protected:
        items.reserve(max_items);
    }

-    /** Creates a new object to put in the pool. */
+    /** Creates a new object to put into the pool. */
    virtual ObjectPtr allocObject() = 0;
 };

--- a/dbms/src/Common/RadixSort.h
+++ b/dbms/src/Common/RadixSort.h
@@ -13,10 +13,10 @@
 #include <Core/Defines.h>


-/** Bitwise sort, has the following functionality:
+/** Radix sort, has the following functionality:
  * Can sort unsigned, signed numbers, and floats.
  * Can sort an array of fixed length elements that contain something else besides the key.
-  * Customizable digit size.
+  * Customizable radix size.
  *
  * LSB, stable.
  * NOTE For some applications it makes sense to add MSB-radix-sort,
@@ -49,7 +49,7 @@ struct RadixSortMallocAllocator
 template <typename KeyBits>
 struct RadixSortFloatTransform
 {
-    /// Is it worth writing the result in memory, or is it better to do it every time again?
+    /// Is it worth writing the result in memory, or is it better to do calculation every time again?
    static constexpr bool transform_is_simple = false;

    static KeyBits forward(KeyBits x)
@@ -74,7 +74,7 @@ struct RadixSortFloatTraits
    /// The type to which the key is transformed to do bit operations. This UInt is the same size as the key.
    using KeyBits = typename std::conditional<sizeof(Float) == 8, uint64_t, uint32_t>::type;

-    static constexpr size_t PART_SIZE_BITS = 8;    /// With what pieces of the key, it bits, to do one pass - reshuffle of the array.
+    static constexpr size_t PART_SIZE_BITS = 8;    /// With what pieces of the key, in bits, to do one pass - reshuffle of the array.

    /// Converting a key into KeyBits is such that the order relation over the key corresponds to the order relation over KeyBits.
    using Transform = RadixSortFloatTransform<KeyBits>;
@@ -95,7 +95,7 @@ struct RadixSortIdentityTransform
    static constexpr bool transform_is_simple = true;

    static KeyBits forward(KeyBits x)     { return x; }
-    static KeyBits backward(KeyBits x)     { return x; }
+    static KeyBits backward(KeyBits x)    { return x; }
 };


@@ -105,7 +105,7 @@ struct RadixSortSignedTransform
    static constexpr bool transform_is_simple = true;

    static KeyBits forward(KeyBits x)     { return x ^ (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)); }
-    static KeyBits backward(KeyBits x)     { return x ^ (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)); }
+    static KeyBits backward(KeyBits x)    { return x ^ (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)); }
 };


@@ -150,7 +150,7 @@ struct RadixSort
 private:
    using Element     = typename Traits::Element;
    using Key         = typename Traits::Key;
-    using CountType = typename Traits::CountType;
+    using CountType   = typename Traits::CountType;
    using KeyBits     = typename Traits::KeyBits;

    static constexpr size_t HISTOGRAM_SIZE = 1 << Traits::PART_SIZE_BITS;
@@ -174,9 +174,9 @@ public:
    {
        /// If the array is smaller than 256, then it is better to use another algorithm.

-        /// There are loops of NUM_PASSES. It is very important that they unfold in compile-time.
+        /// There are loops of NUM_PASSES. It is very important that they are unfolded at compile-time.

-        /// For each of the NUM_PASSES bits of the key, consider how many times each value of this piece met.
+        /// For each of the NUM_PASSES bit ranges of the key, consider how many times each value of this bit range met.
        CountType histograms[HISTOGRAM_SIZE * NUM_PASSES] = {0};

        typename Traits::Allocator allocator;
@@ -230,6 +230,7 @@ public:
        }

        /// If the number of passes is odd, the result array is in a temporary buffer. Copy it to the place of the original array.
+        /// NOTE Sometimes it will be more optimal to provide non-destructive interface, that will not modify original array.
        if (NUM_PASSES % 2)
            memcpy(arr, swap_buffer, size * sizeof(Element));


--- a/dbms/src/Common/ShellCommand.h
+++ b/dbms/src/Common/ShellCommand.h
@@ -10,8 +10,8 @@ namespace DB


 /** Lets you run the command,
- *   read it stdout, stderr, write to stdin,
- *   wait for completion.
+  *  read it stdout and stderr; write to stdin;
+  *  wait for completion.
  *
  * The implementation is similar to the popen function from POSIX (see libc source code).
  *
@@ -20,8 +20,8 @@ namespace DB
  *  with some overcommit settings, if the address space of the process is more than half the amount of available memory.
  * Also, changing memory maps - a fairly resource-intensive operation.
  *
-  * The second difference - allows to work simultaneously with stdin, and with stdout, and with stderr running process,
-  *  and also find out the code and the completion status.
+  * The second difference - allows to work simultaneously with stdin, and with stdout, and with stderr of running process,
+  *  and also to obtain the return code and completion status.
  */
 class ShellCommand
 {

--- a/dbms/src/Common/SimpleCache.h
+++ b/dbms/src/Common/SimpleCache.h
@@ -7,9 +7,9 @@


 /** The simplest cache for a free function.
-  * You can also pass a static class method or lambda without capturing.
-  * The size is unlimited. Values are not obsolete.
-  * To synchronize, use mutex.
+  * You can also pass a static class method or lambda without captures.
+  * The size is unlimited. Values are stored permanently and never evicted.
+  * Mutex is used for synchronization.
  * Suitable only for the simplest cases.
  *
  * Usage

--- a/dbms/src/Common/SipHash.h
+++ b/dbms/src/Common/SipHash.h
@@ -3,17 +3,17 @@
 /** SipHash is a fast cryptographic hash function for short strings.
  * Taken from here: https://www.131002.net/siphash/
  *
+  * This is SipHash 2-4 variant.
+  *
  * Two changes are made:
-  * - returns 128 bits, not 64;
+  * - returns also 128 bits, not only 64;
  * - done streaming (can be calculated in parts).
  *
  * On short strings (URL, search phrases) more than 3 times faster than MD5 from OpenSSL.
  * (~ 700 MB/sec, 15 million strings per second)
  */

-#include <cstdint>
-#include <cstddef>
-#include <Core/Types.h>
+#include <common/Types.h>

 #define ROTL(x,b) static_cast<u64>( ((x) << (b)) | ( (x) >> (64 - (b))) )

@@ -30,23 +30,20 @@
 class SipHash
 {
 private:
-    using u64 = DB::UInt64;
-    using u8 = DB::UInt8;
-
-    /// Status.
-    u64 v0;
-    u64 v1;
-    u64 v2;
-    u64 v3;
+    /// State.
+    UInt64 v0;
+    UInt64 v1;
+    UInt64 v2;
+    UInt64 v3;

    /// How many bytes have been processed.
-    u64 cnt;
+    UInt64 cnt;

    /// The current 8 bytes of input data.
    union
    {
-        u64 current_word;
-        u8 current_bytes[8];
+        UInt64 current_word;
+        UInt8 current_bytes[8];
    };

    void finalize()
@@ -68,7 +65,7 @@ private:

 public:
    /// Arguments - seed.
-    SipHash(u64 k0 = 0, u64 k1 = 0)
+    SipHash(UInt64 k0 = 0, UInt64 k1 = 0)
    {
        /// Initialize the state with some random bytes and seed.
        v0 = 0x736f6d6570736575ULL ^ k0;
@@ -80,7 +77,7 @@ public:
        current_word = 0;
    }

-    void update(const char * data, u64 size)
+    void update(const char * data, UInt64 size)
    {
        const char * end = data + size;

@@ -94,7 +91,7 @@ public:
                ++cnt;
            }

-            /// If you still do not have enough bytes to an 8-byte word.
+            /// If we still do not have enough bytes to an 8-byte word.
            if (cnt & 7)
                return;

@@ -108,7 +105,7 @@ public:

        while (data + 8 <= end)
        {
-            current_word = *reinterpret_cast<const u64 *>(data);
+            current_word = *reinterpret_cast<const UInt64 *>(data);

            v3 ^= current_word;
            SIPROUND;
@@ -138,18 +135,18 @@ public:
    void get128(char * out)
    {
        finalize();
-        reinterpret_cast<u64 *>(out)[0] = v0 ^ v1;
-        reinterpret_cast<u64 *>(out)[1] = v2 ^ v3;
+        reinterpret_cast<UInt64 *>(out)[0] = v0 ^ v1;
+        reinterpret_cast<UInt64 *>(out)[1] = v2 ^ v3;
    }

-    void get128(u64 & lo, u64 & hi)
+    void get128(UInt64 & lo, UInt64 & hi)
    {
        finalize();
        lo = v0 ^ v1;
        hi = v2 ^ v3;
    }

-    u64 get64()
+    UInt64 get64()
    {
        finalize();
        return v0 ^ v1 ^ v2 ^ v3;
@@ -160,6 +157,7 @@ public:
 #undef ROTL
 #undef SIPROUND

+#include <cstddef>

 inline void sipHash128(const char * data, const size_t size, char * out)
 {
@@ -168,7 +166,7 @@ inline void sipHash128(const char * data, const size_t size, char * out)
    hash.get128(out);
 }

-inline DB::UInt64 sipHash64(const char * data, const size_t size)
+inline UInt64 sipHash64(const char * data, const size_t size)
 {
    SipHash hash;
    hash.update(data, size);
@@ -177,7 +175,7 @@ inline DB::UInt64 sipHash64(const char * data, const size_t size)

 #include <string>

-inline DB::UInt64 sipHash64(const std::string & s)
+inline UInt64 sipHash64(const std::string & s)
 {
    return sipHash64(s.data(), s.size());
 }
--- a/dbms/src/Common/StringSearcher.h
+++ b/dbms/src/Common/StringSearcher.h
@@ -19,15 +19,14 @@
 namespace DB
 {

-
 namespace ErrorCodes
 {
    extern const int UNSUPPORTED_PARAMETER;
 }


-/** Variants for finding a substring in a string.
-  * In most cases, less productive than Volnitsky (see Volnitsky.h).
+/** Variants for searching a substring in a string.
+  * In most cases, performance is less than Volnitsky (see Volnitsky.h).
  */


@@ -37,7 +36,7 @@ struct StringSearcherBase
    static constexpr auto n = sizeof(__m128i);
    const int page_size = getpagesize();

-    bool page_safe(const void * const ptr) const
+    bool pageSafe(const void * const ptr) const
    {
        return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
    }
@@ -55,7 +54,7 @@ class StringSearcher<false, false> : private StringSearcherBase
 private:
    using UTF8SequenceBuffer = UInt8[6];

-    /// string to be searched for
+    /// substring to be searched for
    const UInt8 * const needle;
    const std::size_t needle_size;
    const UInt8 * const needle_end = needle + needle_size;
@@ -135,8 +134,7 @@ public:
            if (!(dst_l_len == dst_u_len && dst_u_len == src_len))
                throw DB::Exception{
                    "UTF8 sequences with different lowercase and uppercase lengths are not supported",
-                    DB::ErrorCodes::UNSUPPORTED_PARAMETER
-                };
+                    DB::ErrorCodes::UNSUPPORTED_PARAMETER};

            cache_actual_len += src_len;
            if (cache_actual_len < n)
@@ -165,7 +163,7 @@ public:
        static const Poco::UTF8Encoding utf8;

 #if __SSE4_1__
-        if (page_safe(pos))
+        if (pageSafe(pos))
        {
            const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
            const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
@@ -230,7 +228,7 @@ public:
        while (haystack < haystack_end)
        {
 #if __SSE4_1__
-            if (haystack + n <= haystack_end && page_safe(haystack))
+            if (haystack + n <= haystack_end && pageSafe(haystack))
            {
                const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
                const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl);
@@ -249,7 +247,7 @@ public:
                const auto offset = __builtin_ctz(mask);
                haystack += offset;

-                if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
+                if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
                {
                    const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
                    const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
@@ -377,7 +375,7 @@ public:
    bool compare(const UInt8 * pos) const
    {
 #if __SSE4_1__
-        if (page_safe(pos))
+        if (pageSafe(pos))
        {
            const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
            const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
@@ -429,7 +427,7 @@ public:
        while (haystack < haystack_end)
        {
 #if __SSE4_1__
-            if (haystack + n <= haystack_end && page_safe(haystack))
+            if (haystack + n <= haystack_end && pageSafe(haystack))
            {
                const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
                const auto v_against_l = _mm_cmpeq_epi8(v_haystack, patl);
@@ -447,7 +445,7 @@ public:
                const auto offset = __builtin_ctz(mask);
                haystack += offset;

-                if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
+                if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
                {
                    const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
                    const auto v_against_l = _mm_cmpeq_epi8(v_haystack, cachel);
@@ -559,7 +557,7 @@ public:
    bool compare(const UInt8 * pos) const
    {
 #if __SSE4_1__
-        if (page_safe(pos))
+        if (pageSafe(pos))
        {
            const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
            const auto v_against_cache = _mm_cmpeq_epi8(v_haystack, cache);
@@ -609,7 +607,7 @@ public:
        while (haystack < haystack_end)
        {
 #if __SSE4_1__
-            if (haystack + n <= haystack_end && page_safe(haystack))
+            if (haystack + n <= haystack_end && pageSafe(haystack))
            {
                /// find first character
                const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
@@ -627,7 +625,7 @@ public:
                const auto offset = __builtin_ctz(mask);
                haystack += offset;

-                if (haystack < haystack_end && haystack + n <= haystack_end && page_safe(haystack))
+                if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
                {
                    /// check for first 16 octets
                    const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
@@ -694,9 +692,9 @@ using UTF8CaseInsensitiveStringSearcher = StringSearcher<false, false>;


 /** Uses functions from libc.
-  * It makes sense to use short strings when cheap initialization is required.
-  * There is no option for register-independent search for UTF-8 strings.
-  * It is required that the end of the lines be zero byte.
+  * It makes sense to use only with short haystacks when cheap initialization is required.
+  * There is no option for case-insensitive search for UTF-8 strings.
+  * It is required that strings are zero-terminated.
  */

 struct LibCASCIICaseSensitiveStringSearcher

--- a/dbms/src/Common/Throttler.h
+++ b/dbms/src/Common/Throttler.h
 #pragma once

+#include <time.h>   /// nanosleep
 #include <mutex>
 #include <memory>
 #include <Common/Stopwatch.h>
 #include <Common/Exception.h>
 #include <IO/WriteHelpers.h>

+
 namespace DB
 {

@@ -15,12 +17,12 @@ namespace ErrorCodes
 }


-/** Allows you to limit the speed of something (in pieces per second) using sleep.
+/** Allows you to limit the speed of something (in entities per second) using sleep.
  * Specifics of work:
  * - only the average speed is considered, from the moment of the first call of `add` function;
  *   if there were periods with low speed, then during some time after them, the speed will be higher;
  *
-  * Also allows you to set a limit on the maximum number of pieces. If you exceed, an exception is thrown.
+  * Also allows you to set a limit on the maximum number of entities. If exceeded, an exception will be thrown.
  */
 class Throttler
 {
@@ -56,7 +58,7 @@ public:

        if (max_speed)
        {
-            /// How much time would have gone for the speed to become `max_speed`.
+            /// How much time to wait for the average speed to become `max_speed`.
            UInt64 desired_ns = new_count * 1000000000 / max_speed;

            if (desired_ns > elapsed_ns)
@@ -65,7 +67,7 @@ public:
                timespec sleep_ts;
                sleep_ts.tv_sec = sleep_ns / 1000000000;
                sleep_ts.tv_nsec = sleep_ns % 1000000000;
-                nanosleep(&sleep_ts, nullptr);    /// NOTE Ends early in case of a signal. This is considered normal.
+                nanosleep(&sleep_ts, nullptr);    /// NOTE Returns early in case of a signal. This is considered normal.
            }
        }
    }

--- a/dbms/src/Common/VirtualColumnUtils.h
+++ b/dbms/src/Common/VirtualColumnUtils.h
@@ -16,10 +16,10 @@ class Context;
 namespace VirtualColumnUtils
 {

-/// Calculate the minimum numeric suffix to add to the row so that it is not present in the set
+/// Calculate the minimum numeric suffix to add to the string so that it is not present in the set
 String chooseSuffix(const NamesAndTypesList & columns, const String & name);

-/// Calculate the minimum total numeric suffix to add to each row,
+/// Calculate the minimum total numeric suffix to add to each string,
 /// so that none is present in the set.
 String chooseSuffixForSet(const NamesAndTypesList & columns, const std::vector<String> & names);


--- a/dbms/src/Common/Volnitsky.h
+++ b/dbms/src/Common/Volnitsky.h
@@ -2,6 +2,7 @@

 #include <Common/StringSearcher.h>
 #include <Common/StringUtils.h>
+#include <Core/Types.h>
 #include <Poco/UTF8Encoding.h>
 #include <Poco/Unicode.h>
 #include <ext/range.hpp>
@@ -12,7 +13,7 @@
 /** Search for a substring in a string by Volnitsky's algorithm
  * http://volnitsky.com/project/str_search/
  *
-  * `haystack` and `needle` can contain null bytes.
+  * `haystack` and `needle` can contain zero bytes.
  *
  * Algorithm:
  * - if the `needle` is too small or too large, or too small `haystack`, use std::search or memchr;
@@ -23,7 +24,7 @@
  * - bigrams can be inserted several times if they occur in the needle several times;
  * - when searching, take from haystack bigram, which should correspond to the last bigram of needle (comparing from the end);
  * - look for it in the hash table, if found - get the offset from the hash table and compare the string bytewise;
-  * - if it did not work, we check the next cell of the hash table from the collision resolution chain;
+  * - if it did not match, we check the next cell of the hash table from the collision resolution chain;
  * - if not found, skip to haystack almost the size of the needle bytes;
  *
  * Unaligned memory access is used.
@@ -39,34 +40,35 @@ template <typename CRTP>
 class VolnitskyBase
 {
 protected:
-    using offset_t = uint8_t;    /// Offset in the needle. For the basic algorithm, the length of the needle must not be greater than 255.
-    using ngram_t = uint16_t;    /// n-gram (2 bytes).
+    using Offset = UInt8;    /// Offset in the needle. For the basic algorithm, the length of the needle must not be greater than 255.
+    using Ngram = UInt16;    /// n-gram (2 bytes).

    const UInt8 * const needle;
    const size_t needle_size;
    const UInt8 * const needle_end = needle + needle_size;
    /// For how long we move, if the n-gram from haystack is not found in the hash table.
-    const size_t step = needle_size - sizeof(ngram_t) + 1;
+    const size_t step = needle_size - sizeof(Ngram) + 1;

    /** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
-     *    storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
-    static const size_t hash_size = 64 * 1024;    /// Fits into the L2 cache.
-    offset_t hash[hash_size];    /// Hash table.
+      *  storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
+    static const size_t hash_size = 64 * 1024;    /// Fits into the L2 cache (of common Intel CPUs).
+    Offset hash[hash_size];    /// Hash table.

    /// min haystack size to use main algorithm instead of fallback
    static constexpr auto min_haystack_size_for_algorithm = 20000;
-    const bool fallback;                /// Do I need to use the fallback algorithm.
+    const bool fallback; /// Do we need to use the fallback algorithm.

 public:
-    /** haystack_size_hint - the expected total size of the haystack for `search` calls. Can not specify.
+    /** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
      * If you specify it small enough, the fallback algorithm will be used,
      *  since it is considered that it's useless to waste time initializing the hash table.
      */
    VolnitskyBase(const char * const needle, const size_t needle_size, size_t haystack_size_hint = 0)
    : needle{reinterpret_cast<const UInt8 *>(needle)}, needle_size{needle_size},
      fallback{
-          needle_size < 2 * sizeof(ngram_t) || needle_size >= std::numeric_limits<offset_t>::max() ||
-          (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm)}
+          needle_size < 2 * sizeof(Ngram)
+          || needle_size >= std::numeric_limits<Offset>::max()
+          || (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm)}
    {
        if (fallback)
            return;
@@ -74,7 +76,7 @@ public:
        memset(hash, 0, sizeof(hash));

        /// int is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
-        for (auto i = static_cast<int>(needle_size - sizeof(ngram_t)); i >= 0; --i)
+        for (auto i = static_cast<int>(needle_size - sizeof(Ngram)); i >= 0; --i)
            self().putNGram(this->needle + i, i + 1, this->needle);
    }

@@ -91,7 +93,7 @@ public:
            return self().search_fallback(haystack, haystack_end);

        /// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle.
-        const auto * pos = haystack + needle_size - sizeof(ngram_t);
+        const auto * pos = haystack + needle_size - sizeof(Ngram);
        for (; pos <= haystack_end - needle_size; pos += step)
        {
            /// We look at all the cells of the hash table that can correspond to the n-gram from haystack.
@@ -119,12 +121,12 @@ protected:
    CRTP & self() { return static_cast<CRTP &>(*this); }
    const CRTP & self() const { return const_cast<VolnitskyBase *>(this)->self(); }

-    static const ngram_t & toNGram(const UInt8 * const pos)
+    static const Ngram & toNGram(const UInt8 * const pos)
    {
-        return *reinterpret_cast<const ngram_t *>(pos);
+        return *reinterpret_cast<const Ngram *>(pos);
    }

-    void putNGramBase(const ngram_t ngram, const int offset)
+    void putNGramBase(const Ngram ngram, const int offset)
    {
        /// Put the offset for the n-gram in the corresponding cell or the nearest free cell.
        size_t cell_num = ngram % hash_size;
@@ -145,7 +147,7 @@ protected:

        union
        {
-            ngram_t n;
+            Ngram n;
            Chars chars;
        };

@@ -260,7 +262,7 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal

        union
        {
-            ngram_t n;
+            Ngram n;
            Chars chars;
        };

@@ -277,10 +279,12 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
              *  or intersect with two code points.
              *
              * In the first case, you need to consider up to two alternatives - this code point in upper and lower case,
-              *  and in the second case - up to four alternatives - fragments of two code points in all combinations of registers.
+              *  and in the second case - up to four alternatives - fragments of two code points in all combinations of cases.
              *
-              * It does not take into account the dependence of the transformation between the registers from the locale (for example - Turkish `Ii`)
+              * It does not take into account the dependence of the case-transformation from the locale (for example - Turkish `Ii`)
              *  as well as composition / decomposition and other features.
+              *
+              * It also does not work if characters with lower and upper cases are represented by different number of bytes or code points.
              */

            using Seq = UInt8[6];
@@ -302,12 +306,12 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
                    putNGramBase(n, offset);
                else
                {
-                    /// where is the given ngram in respect to UTF-8 sequence start?
+                    /// where is the given ngram in respect to the start of UTF-8 sequence?
                    const auto seq_ngram_offset = pos - seq_pos;

                    Seq seq;

-                    /// put ngram from lowercase
+                    /// put ngram for lowercase
                    utf8.convert(l_u32, seq, sizeof(seq));
                    chars.c0 = seq[seq_ngram_offset];
                    chars.c1 = seq[seq_ngram_offset + 1];
@@ -326,7 +330,7 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
                /// first sequence may start before u_pos if it is not ASCII
                auto first_seq_pos = pos;
                UTF8::syncBackward(first_seq_pos, begin);
-                /// where is the given ngram in respect to the first UTF-8 sequence start?
+                /// where is the given ngram in respect to the start of first UTF-8 sequence?
                const auto seq_ngram_offset = pos - first_seq_pos;

                const auto first_u32 = utf8.convert(first_seq_pos);

--- a/dbms/src/Common/formatReadable.h
+++ b/dbms/src/Common/formatReadable.h
@@ -4,11 +4,11 @@
 #include <IO/WriteBuffer.h>


-/// Displays the transmitted size in bytes as 123.45 GiB.
+/// Displays the passed size in bytes as 123.45 GiB.
 void formatReadableSizeWithBinarySuffix(double value, DB::WriteBuffer & out, int precision = 2);
 std::string formatReadableSizeWithBinarySuffix(double value, int precision = 2);

-/// Displays the transmitted size in bytes as 132.55 GB.
+/// Displays the passed size in bytes as 132.55 GB.
 void formatReadableSizeWithDecimalSuffix(double value, DB::WriteBuffer & out, int precision = 2);
 std::string formatReadableSizeWithDecimalSuffix(double value, int precision = 2);


--- a/dbms/src/Common/getFQDNOrHostName.h
+++ b/dbms/src/Common/getFQDNOrHostName.h
@@ -2,7 +2,7 @@

 #include <string>

-/** Get the FQDN for the local server by resolving DNS hostname - similar to calling the hostname utility with the -f flag.
-  * If it does not work, return hostname - similar to calling hostname without flags or uname -n.
+/** Get the FQDN for the local server by resolving DNS hostname - similar to calling the 'hostname' tool with the -f flag.
+  * If it does not work, return hostname - similar to calling 'hostname' without flags or 'uname -n'.
  */
 const std::string & getFQDNOrHostName();
--- a/dbms/src/Common/typeid_cast.h
+++ b/dbms/src/Common/typeid_cast.h
@@ -16,7 +16,7 @@ namespace DB
 }


-/** Checks match of type by comparing typeid.
+/** Checks type by comparing typeid.
  * The exact match of the type is checked. That is, cast in the ancestor will be unsuccessful.
  * In the rest, behaves like a dynamic_cast.
  */