translate comments

46db4545 · f1yegor · alexey-milovidov · 4a24d4f3 · 46db4545 · 46db4545
46 changed file
--- a/dbms/src/Common/AIO.h
+++ b/dbms/src/Common/AIO.h
@@ -15,7 +15,7 @@
 #include <unistd.h>


-/** Небольшие обёртки для асинхронного ввода-вывода.
+/** Small wrappers for asynchronous I/O.
  */



--- a/dbms/src/Common/Allocator.cpp
+++ b/dbms/src/Common/Allocator.cpp
@@ -22,15 +22,15 @@ namespace ErrorCodes
 }


-/** Многие современные аллокаторы (например, tcmalloc) не умеют делать mremap для realloc,
-  *  даже в случае достаточно больших кусков памяти.
-  * Хотя это позволяет увеличить производительность и уменьшить потребление памяти во время realloc-а.
-  * Чтобы это исправить, делаем mremap самостоятельно, если кусок памяти достаточно большой.
-  * Порог (64 МБ) выбран достаточно большим, так как изменение адресного пространства
-  *  довольно сильно тормозит, особенно в случае наличия большого количества потоков.
-  * Рассчитываем, что набор операций mmap/что-то сделать/mremap может выполняться всего лишь около 1000 раз в секунду.
+/** Many modern allocators (for example, tcmalloc) do not know how to do a mremap for realloc,
+  *  even in case of large enough chunks of memory.
+  * Although this allows you to increase performance and reduce memory consumption during realloc.
+  * To fix this, do the mremap yourself if the chunk of memory is large enough.
+  * The threshold (64 MB) is chosen quite large, since changing the address space is
+  *  rather slow, especially in the case of a large number of threads.
+  * We expect that the set of operations mmap/something to do/mremap can only be performed about 1000 times per second.
  *
-  * PS. Также это требуется, потому что tcmalloc не может выделить кусок памяти больше 16 GB.
+  * PS. This is also required, because tcmalloc can not allocate a chunk of memory greater than 16 GB.
  */
 static constexpr size_t MMAP_THRESHOLD = 64 * (1 << 20);
 static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;

--- a/dbms/src/Common/Allocator.h
+++ b/dbms/src/Common/Allocator.h
@@ -3,13 +3,13 @@
 #include <string.h>


-/** Отвечает за выделение/освобождение памяти. Используется, например, в PODArray, Arena.
-  * Также используется в хэш-таблицах.
-  * Интерфейс отличается от std::allocator
-  * - наличием метода realloc, который для больших кусков памяти использует mremap;
-  * - передачей размера в метод free;
-  * - наличием аргумента alignment;
-  * - возможностью зануления памяти (используется в хэш-таблицах);
+/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
+  * Also used in hash tables.
+  * The interface is different from std::allocator
+  * - the presence of the method realloc, which for large chunks of memory uses mremap;
+  * - passing the size into the `free` method;
+  * - by the presence of the `alignment` argument;
+  * - the possibility of zeroing memory (used in hash tables);
  */
 template <bool clear_memory_>
 class Allocator
@@ -38,9 +38,9 @@ protected:
 };


-/** При использовании AllocatorWithStackMemory, размещённом на стеке,
-  *  GCC 4.9 ошибочно делает предположение, что мы можем вызывать free от указателя на стек.
-  * На самом деле, комбинация условий внутри AllocatorWithStackMemory этого не допускает.
+/** When using AllocatorWithStackMemory, located on the stack,
+  *  GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack.
+  * In fact, the combination of conditions inside AllocatorWithStackMemory does not allow this.
  */
 #if !__clang__
 #pragma GCC diagnostic push

--- a/dbms/src/Common/ArenaWithFreeLists.h
+++ b/dbms/src/Common/ArenaWithFreeLists.h
@@ -8,40 +8,40 @@ namespace DB
 {


-/** В отличие от Arena, позволяет освобождать (для последующего повторного использования)
-  *  выделенные ранее (не обязательно только что) куски памяти.
-  * Для этого, запрашиваемый размер округляется вверх до степени двух
-  *  (или до 8, если меньше; или используется выделение памяти вне Arena, если размер больше 65536).
-  * При освобождении памяти, для каждого размера (всего 14 вариантов: 8, 16... 65536),
-  *  поддерживается односвязный список свободных блоков.
-  * При аллокации, мы берём голову списка свободных блоков,
-  *  либо, если список пуст - выделяем новый блок, используя Arena.
+/** Unlike Arena, allows you to release (for later re-use)
+  *  previously allocated (not necessarily just recently) chunks of memory.
+  * For this, the requested size is rounded up to the power of two
+  *  (or up to 8, if less, or using memory allocation outside Arena if the size is greater than 65536).
+  * When freeing memory, for each size (14 options in all: 8, 16 ... 65536),
+  *  a one-link list of free blocks is kept track.
+  * When allocating, we take the head of the list of free blocks,
+  *  or, if the list is empty - allocate a new block using Arena.
  */
 class ArenaWithFreeLists : private Allocator<false>, private boost::noncopyable
 {
 private:
-    /// Если блок свободен, то в его начале хранится указатель на следующий свободный блок, либо nullptr, если свободных блоков больше нет.
-    /// Если блок используется, то в нём хранятся какие-то данные.
+    /// If the block is free, then the pointer to the next free block is stored at its beginning, or nullptr, if there are no more free blocks.
+    /// If the block is used, then some data is stored in it.
    union Block
    {
        Block * next;
        char data[0];
    };

-    /// Максимальный размер куска памяти, который выделяется с помощью Arena. Иначе используем Allocator напрямую.
+    /// The maximum size of a piece of memory that is allocated with Arena. Otherwise, we use Allocator directly.
    static constexpr size_t max_fixed_block_size = 65536;

-    /// Получить индекс в массиве freelist-ов для заданного размера.
+    /// Get the index in the freelist array for the specified size.
    static size_t findFreeListIndex(const size_t size)
    {
        return size <= 8 ? 2 : bitScanReverse(size - 1);
    }

-    /// Для выделения блоков не слишком большого размера используется Arena.
+    /// Arena is used to allocate blocks that are not too large.
    Arena pool;

-    /// Списки свободных блоков. Каждый элемент указывает на голову соответствующего списка, либо равен nullptr.
-    /// Первые два элемента не используются, а предназначены для упрощения арифметики.
+    /// Lists of free blocks. Each element points to the head of the corresponding list, or is nullptr.
+    /// The first two elements are not used, but are intended to simplify arithmetic.
    Block * free_lists[16] {};

 public:
@@ -60,10 +60,10 @@ public:
        /// find list of required size
        const auto list_idx = findFreeListIndex(size);

-        /// Если есть свободный блок.
+        /// If there is a free block.
        if (auto & free_block_ptr = free_lists[list_idx])
        {
-            /// Возьмём его. И поменяем голову списка на следующий элемент списка.
+            /// Let's take it. And change the head of the list to the next item in the list.
            const auto res = free_block_ptr->data;
            free_block_ptr = free_block_ptr->next;
            return res;
@@ -81,14 +81,14 @@ public:
        /// find list of required size
        const auto list_idx = findFreeListIndex(size);

-        /// Вставим освобождённый блок в голову списка.
+        /// Insert the released block into the head of the list.
        auto & free_block_ptr = free_lists[list_idx];
        const auto old_head = free_block_ptr;
        free_block_ptr = reinterpret_cast<Block *>(ptr);
        free_block_ptr->next = old_head;
    }

-    /// Размер выделенного пула в байтах
+    /// Size of the allocated pool in bytes
    size_t size() const
    {
        return pool.size();

--- a/dbms/src/Common/AutoArray.h
+++ b/dbms/src/Common/AutoArray.h
@@ -8,30 +8,30 @@
 namespace DB
 {

-/** Массив (почти) неизменяемого размера:
-  *  размер задаётся в конструкторе;
-  *  метод resize приводит к удалению старых данных и нужен лишь для того,
-  *   чтобы можно было сначала создать пустой объект, используя конструктор по-умолчанию,
-  *   а потом уже определиться с размером.
+/** An array of (almost) unchangable size:
+  *  the size is specified in the constructor;
+  *  `resize` method removes old data, and necessary only for
+  *  so that you can first create an empty object using the default constructor,
+  *  and then decide on the size.
  *
-  * Есть возможность не инициализировать элементы по-умолчанию, а создавать их inplace.
-  * Деструкторы элементов вызываются автоматически.
+  * There is a possibility to not initialize elements by default, but create them inplace.
+  * Member destructors are called automatically.
  *
-  * sizeof равен размеру одного указателя.
+  * `sizeof` is equal to the size of one pointer.
  *
-  * Не exception-safe.
-  * Копирование не поддерживается. Перемещение опустошает исходный объект.
-  * То есть, использовать этот массив во многих случаях неудобно.
+  * Not exception-safe.
+  * Copying is not supported. Moving empties the original object.
+  * That is, it is inconvenient to use this array in many cases.
  *
-  * Предназначен для ситуаций, в которых создаётся много массивов одинакового небольшого размера,
-  *  но при этом размер не известен во время компиляции.
-  * Также даёт существенное преимущество в случаях, когда важно, чтобы sizeof был минимальным.
-  * Например, если массивы кладутся в open-addressing хэш-таблицу с inplace хранением значений (как HashMap)
+  * Designed for situations in which many arrays of the same small size are created,
+  *  but the size is not known at compile time.
+  * Also gives a significant advantage in cases where it is important that `sizeof` is minimal.
+  * For example, if arrays are put in an open-addressing hash table with inplace storage of values (like HashMap)
  *
-  * В этом случае, по сравнению с std::vector:
-  * - для массивов размером в 1 элемент - преимущество примерно в 2 раза;
-  * - для массивов размером в 5 элементов - преимущество примерно в 1.5 раза
-  *   (в качестве T использовались DB::Field, содержащие UInt64 и String);
+  * In this case, compared to std::vector:
+  * - for arrays of 1 element size - an advantage of about 2 times;
+  * - for arrays of 5 elements - an advantage of about 1.5 times
+  *   (DB::Field, containing UInt64 and String, used as T);
  */

 const size_t empty_auto_array_helper = 0;
@@ -42,7 +42,7 @@ template <typename T>
 class AutoArray
 {
 public:
-    /// Для отложенного создания.
+    /// For deferred creation.
    AutoArray()
    {
        setEmpty();
@@ -53,16 +53,16 @@ public:
        init(size_, false);
    }

-    /** Не будут вызваны конструкторы по-умолчанию для элементов.
-      * В этом случае, вы должны вставить все элементы с помощью функции place и placement new,
-      *  так как для них потом будут вызваны деструкторы.
+    /** The default constructors for elements will not be called.
+      * In this case, you must insert all elements using the `place` and `placement new` functions,
+      *  since destructors are then called for them.
      */
    AutoArray(size_t size_, const DontInitElemsTag & tag)
    {
        init(size_, true);
    }

-    /** Инициализирует все элементы копирующим конструктором с параметром value.
+    /** Initializes all elements with a copy constructor with the `value` parameter.
      */
    AutoArray(size_t size_, const T & value)
    {
@@ -74,7 +74,7 @@ public:
        }
    }

-    /** resize удаляет все существующие элементы.
+    /** `resize` removes all existing items.
      */
    void resize(size_t size_, bool dont_init_elems = false)
    {
@@ -82,7 +82,7 @@ public:
        init(size_, dont_init_elems);
    }

-    /** Премещение.
+    /** Preposition.
      */
    AutoArray(AutoArray && src)
    {
@@ -125,10 +125,10 @@ public:
        setEmpty();
    }

-    /** Можно читать и модифицировать элементы с помощью оператора []
-      *  только если элементы были инициализированы
-      *  (то есть, в конструктор не был передан DontInitElemsTag,
-      *   или вы их инициализировали с помощью place и placement new).
+    /** You can read and modify elements using the [] operator
+      *  only if items were initialized
+      *  (that is, into the constructor was not passed DontInitElemsTag,
+      *   or you initialized them using `place` and `placement new`).
      */
    T & operator[](size_t i)
    {
@@ -140,9 +140,9 @@ public:
        return elem(i);
    }

-    /** Получить кусок памяти, в котором должен быть расположен элемент.
-      * Функция предназначена, чтобы инициализировать элемент,
-      *  который ещё не был инициализирован:
+    /** Get the piece of memory in which the element should be located.
+      * The function is intended to initialize an element,
+      *  which has not yet been initialized
      * new (arr.place(i)) T(args);
      */
    char * place(size_t i)

--- a/dbms/src/Common/CombinedCardinalityEstimator.h
+++ b/dbms/src/Common/CombinedCardinalityEstimator.h
@@ -23,9 +23,9 @@ static inline ContainerType max(const ContainerType & lhs, const ContainerType &

 }

-/** Для маленького количества ключей - массив фиксированного размера "на стеке".
-  * Для среднего - выделяется HashSet.
-  * Для большого - выделяется HyperLogLog.
+/** For a small number of keys - an array of fixed size "on the stack."
+  * For the average, HashSet is allocated.
+  * For large, HyperLogLog is allocated.
  */
 template
 <
@@ -146,7 +146,7 @@ public:
            getContainer<Large>().merge(rhs.getContainer<Large>());
    }

-    /// Можно вызывать только для пустого объекта.
+    /// You can only call for an empty object.
    void read(DB::ReadBuffer & in)
    {
        UInt8 v;
@@ -171,8 +171,8 @@ public:
    {
        auto container_type = getContainerType();

-        /// Если readAndMerge вызывается с пустым состоянием, просто десериализуем
-        /// состояние задано в качестве параметра.
+        /// If readAndMerge is called with an empty state, just deserialize
+        /// the state is specified as a parameter.
        if ((container_type == details::ContainerType::SMALL) && small.empty())
        {
            read(in);

--- a/dbms/src/Common/CompactArray.h
+++ b/dbms/src/Common/CompactArray.h
@@ -15,11 +15,11 @@ namespace ErrorCodes
 }


-/** Компактный массив для хранения данных, размер content_width, в битах, которых составляет
-  * меньше одного байта. Вместо того, чтобы хранить каждое значение в отдельный
-  * байт, что приводит к растрате 37.5% пространства для content_width=5, CompactArray хранит
-  * смежные content_width-битные значения в массиве байтов, т.е. фактически CompactArray
-  * симулирует массив content_width-битных значений.
+/** Compact array for data storage, size `content_width`, in bits, of which is
+  * less than one byte. Instead of storing each value in a separate
+  * bytes, which leads to a waste of 37.5% of the space for content_width = 5, CompactArray stores
+  * adjacent `content_width`-bit values in the byte array, that is actually CompactArray
+  * simulates an array of `content_width`-bit values.
  */
 template <typename BucketIndex, UInt8 content_width, size_t bucket_count>
 class __attribute__ ((packed)) CompactArray final
@@ -76,12 +76,12 @@ public:
    }

 private:
-    /// число байт в битсете
+    /// number of bytes in bitset
    static constexpr size_t BITSET_SIZE = (static_cast<size_t>(bucket_count) * content_width + 7) / 8;
    UInt8 bitset[BITSET_SIZE] = { 0 };
 };

-/** Класс для последовательного чтения ячеек из компактного массива на диске.
+/** A class for sequentially reading cells from a compact array on a disk.
  */
 template <typename BucketIndex, UInt8 content_width, size_t bucket_count>
 class CompactArray<BucketIndex, content_width, bucket_count>::Reader final
@@ -135,7 +135,7 @@ public:
        return true;
    }

-    /** Вернуть текущий номер ячейки и соответствующее содержание.
+    /** Return the current cell number and the corresponding content.
      */
    inline std::pair<BucketIndex, UInt8> get() const
    {
@@ -150,26 +150,26 @@ public:

 private:
    ReadBuffer & in;
-    /// Физическое расположение текущей ячейки.
+    /// The physical location of the current cell.
    Locus locus;
-    /// Текущая позиция в файле в виде номера ячейки.
+    /// The current position in the file as a cell number.
    BucketIndex current_bucket_index = 0;
-    /// Количество прочитанных байтов.
+    /// The number of bytes read.
    size_t read_count = 0;
-    /// Содержание в текущей позиции.
+    /// The content in the current position.
    UInt8 value_l;
    UInt8 value_r;
    ///
    bool is_eof = false;
-    /// Влезает ли ячейка полностью в один байт?
+    /// Does the cell fully fit into one byte?
    bool fits_in_byte;
 };

-/** Структура Locus содержит необходимую информацию, чтобы найти для каждой ячейки
-  * соответствующие байт и смещение, в битах, от начала ячейки. Поскольку в общем
-  * случае размер одного байта не делится на размер одной ячейки, возможны случаи,
-  * когда одна ячейка перекрывает два байта. Поэтому структура Locus содержит две
-  * пары (индекс, смещение).
+/** The `Locus` structure contains the necessary information to find for each cell
+  * the corresponding byte and offset, in bits, from the beginning of the cell. Since in general
+  * case the size of one byte is not divisible by the size of one cell, cases possible
+  * when one cell overlaps two bytes. Therefore, the `Locus` structure contains two
+  * pairs (index, offset).
  */
 template <typename BucketIndex, UInt8 content_width, size_t bucket_count>
 class CompactArray<BucketIndex, content_width, bucket_count>::Locus final
@@ -190,13 +190,13 @@ public:
    {
        if ((index_l == index_r) || (index_l == (BITSET_SIZE - 1)))
        {
-            /// Ячейка полностью влезает в один байт.
+            /// The cell completely fits into one byte.
            *content_l &= ~(((1 << content_width) - 1) << offset_l);
            *content_l |= content << offset_l;
        }
        else
        {
-            /// Ячейка перекрывает два байта.
+            /// The cell overlaps two bytes.
            size_t left = 8 - offset_l;

            *content_l &= ~(((1 << left) - 1) << offset_l);
@@ -230,13 +230,13 @@ private:

    UInt8 ALWAYS_INLINE read(UInt8 value_l) const
    {
-        /// Ячейка полностью влезает в один байт.
+        /// The cell completely fits into one byte.
        return (value_l >> offset_l) & ((1 << content_width) - 1);
    }

    UInt8 ALWAYS_INLINE read(UInt8 value_l, UInt8 value_r) const
    {
-        /// Ячейка перекрывает два байта.
+        /// The cell overlaps two bytes.
        return ((value_l >> offset_l) & ((1 << (8 - offset_l)) - 1))
            | ((value_r & ((1 << offset_r) - 1)) << (8 - offset_l));
    }
@@ -250,7 +250,7 @@ private:
    UInt8 * content_l;
    UInt8 * content_r;

-    /// Проверки
+    /// Checks
    static_assert((content_width > 0) && (content_width < 8), "Invalid parameter value");
    static_assert(bucket_count <= (std::numeric_limits<size_t>::max() / content_width), "Invalid parameter value");
 };

--- a/dbms/src/Common/ConcurrentBoundedQueue.h
+++ b/dbms/src/Common/ConcurrentBoundedQueue.h
@@ -38,9 +38,9 @@ namespace detail
    }
 };

-/** Очень простая thread-safe очередь ограниченной длины.
-  * Если пытаться вынуть элемент из пустой очереди, то поток блокируется, пока очередь не станет непустой.
-  * Если пытаться вставить элемент в переполненную очередь, то поток блокируется, пока в очереди не появится элемент.
+/** A very simple thread-safe queue of limited length.
+  * If you try to pop an item from an empty queue, the thread is blocked until the queue becomes nonempty.
+  * If you try to push an element into an overflowed queue, the thread is blocked until space appears in the queue.
  */
 template <typename T>
 class ConcurrentBoundedQueue

--- a/dbms/src/Common/CounterInFile.h
+++ b/dbms/src/Common/CounterInFile.h
@@ -22,24 +22,24 @@
 #define SMALL_READ_WRITE_BUFFER_SIZE 16


-/** Хранит в файле число.
- * Предназначен для редких вызовов (не рассчитан на производительность).
+/** Stores a number in the file.
+ * Designed for rare calls (not designed for performance).
 */
 class CounterInFile
 {
 public:
-    /// path - имя файла, включая путь
+    /// path - the name of the file, including the path
    CounterInFile(const std::string & path_) : path(path_) {}

-    /** Добавить delta к числу в файле и вернуть новое значение.
-     * Если параметр create_if_need не установлен в true, то
-     *  в файле уже должно быть записано какое-нибудь число (если нет - создайте файл вручную с нулём).
+    /** Add `delta` to the number in the file and return the new value.
+     * If the `create_if_need` parameter is not set to true, then
+     *  the file should already have a number written (if not - create the file manually with zero).
     *
-     * Для защиты от race condition-ов между разными процессами, используются файловые блокировки.
-     * (Но при первом создании файла race condition возможен, так что лучше создать файл заранее.)
+     * To protect against race conditions between different processes, file locks are used.
+     * (But when the first file is created, the race condition is possible, so it's better to create the file in advance.)
     *
-     * locked_callback вызывается при заблокированном файле со счетчиком. В него передается новое значение.
-     * locked_callback можно использовать, чтобы делать что-нибудь атомарно с увеличением счетчика (например, переименовывать файлы).
+     * `locked_callback` is called when the counter file is locked. A new value is passed to it.
+     * `locked_callback` can be used to do something atomically with incrementing the counter (for example, renaming files).
     */
    template <typename Callback>
    Int64 add(Int64 delta, Callback && locked_callback, bool create_if_need = false)
@@ -74,7 +74,7 @@ public:
                }
                catch (const DB::Exception & e)
                {
-                    /// Более понятное сообщение об ошибке.
+                    /// A more understandable error message.
                    if (e.code() == DB::ErrorCodes::CANNOT_READ_ALL_DATA || e.code() == DB::ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF)
                        throw DB::Exception("File " + path + " is empty. You must fill it manually with appropriate value.", e.code());
                    else
@@ -118,13 +118,13 @@ public:
        return path;
    }

-    /// Изменить путь к файлу.
+    /// Change the path to the file.
    void setPath(std::string path_)
    {
        path = path_;
    }

-    // Не thread-safe и не синхронизирован между процессами.
+    // Not thread-safe and not synchronized between processes.
    void fixIfBroken(UInt64 value)
    {
        bool file_exists = Poco::File(path).exists();

--- a/dbms/src/Common/Exception.h
+++ b/dbms/src/Common/Exception.h
@@ -35,7 +35,7 @@ public:
    DB::Exception * clone() const override { return new DB::Exception(*this); }
    void rethrow() const override { throw *this; }

-    /// Дописать к существующему сообщению что-нибудь ещё.
+    /// Add something to the existing message.
    void addMessage(const std::string & arg) { extendedMessage(arg); }

    const StackTrace & getStackTrace() const { return trace; }
@@ -45,7 +45,7 @@ private:
 };


-/// Содержит дополнительный член saved_errno. См. функцию throwFromErrno.
+/// Contains an additional member `saved_errno`. See the throwFromErrno function.
 class ErrnoException : public Exception
 {
 public:
@@ -73,8 +73,8 @@ using Exceptions = std::vector<std::exception_ptr>;
 void throwFromErrno(const std::string & s, int code = 0, int the_errno = errno);


-/** Попробовать записать исключение в лог (и забыть про него).
-  * Можно использовать в деструкторах в блоке catch (...).
+/** Try to write an exception to the log (and forget about it).
+  * Can be used in destructors in the catch block (...).
  */
 void tryLogCurrentException(const char * log_name, const std::string & start_of_message = "");
 void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message = "");

--- a/dbms/src/Common/ExternalTable.h
+++ b/dbms/src/Common/ExternalTable.h
@@ -25,16 +25,16 @@ namespace ErrorCodes
 }


-/// Базовый класс содержащий основную информацию о внешней таблице и
-/// основные функции для извлечения этой информации из текстовых полей.
+/// The base class containing the basic information about external table and
+/// basic functions for extracting this information from text fields.
 class BaseExternalTable
 {
 public:
-    std::string file;         /// Файл с данными или '-' если stdin
-    std::string name;         /// Имя таблицы
-    std::string format;     /// Название формата хранения данных
+    std::string file;       /// File with data or '-' if stdin
+    std::string name;       /// The name of the table
+    std::string format;     /// Name of the data storage format

-    /// Описание структуры таблицы: (имя столбца, имя типа данных)
+    /// Description of the table structure: (column name, data type name)
    std::vector<std::pair<std::string, std::string> > structure;

    std::unique_ptr<ReadBuffer> read_buffer;
@@ -42,10 +42,10 @@ public:

    virtual ~BaseExternalTable() {};

-    /// Инициализировать read_buffer в зависимости от источника данных. По умолчанию не делает ничего.
+    /// Initialize read_buffer, depending on the data source. By default, does nothing.
    virtual void initReadBuffer() {};

-    /// Инициализировать sample_block по структуре таблицы сохраненной в structure
+    /// Initialize sample_block according to the structure of the table stored in the `structure`
    virtual void initSampleBlock(const Context & context)
    {
        const DataTypeFactory & data_type_factory = DataTypeFactory::instance();
@@ -60,7 +60,7 @@ public:
        }
    }

-    /// Получить данные таблицы - пару (поток с содержимым таблицы, имя таблицы)
+    /// Get the table data - a pair (a thread with the contents of the table, the name of the table)
    virtual ExternalTableData getData(const Context & context)
    {
        initReadBuffer();
@@ -71,7 +71,7 @@ public:
    }

 protected:
-    /// Очистить всю накопленную информацию
+    /// Clear all accumulated information
    void clean()
    {
        name = "";
@@ -82,7 +82,7 @@ protected:
        read_buffer.reset();
    }

-    /// Функция для отладочного вывода информации
+    /// Function for debugging information output
    void write()
    {
        std::cerr << "file " << file << std::endl;
@@ -100,7 +100,7 @@ protected:
        return res;
    }

-    /// Построить вектор structure по текстовому полю structure
+    /// Construct the `structure` vector from the text field `structure`
    virtual void parseStructureFromStructureField(const std::string & argument)
    {
        std::vector<std::string> vals = split(argument, " ,");
@@ -112,7 +112,7 @@ protected:
            structure.emplace_back(vals[i], vals[i + 1]);
    }

-    /// Построить вектор structure по текстовому полю types
+    /// Construct the `structure` vector from the text field `types`
    virtual void parseStructureFromTypesField(const std::string & argument)
    {
        std::vector<std::string> vals = split(argument, " ,");
@@ -123,7 +123,7 @@ protected:
 };


-/// Парсинг внешей таблицы, используемый в tcp клиенте.
+/// Parsing of external table used in the tcp client.
 class ExternalTable : public BaseExternalTable
 {
 public:
@@ -135,7 +135,7 @@ public:
            read_buffer = std::make_unique<ReadBufferFromFile>(file);
    }

-    /// Извлечение параметров из variables_map, которая строится по командной строке клиента
+    /// Extract parameters from variables_map, which is built on the client command line
    ExternalTable(const boost::program_options::variables_map & external_options)
    {
        if (external_options.count("file"))
@@ -162,9 +162,9 @@ public:
    }
 };

-/// Парсинг внешей таблицы, используемый при отправке таблиц через http
-/// Функция handlePart будет вызываться для каждой переданной таблицы,
-/// поэтому так же необходимо вызывать clean в конце handlePart.
+/// Parsing of external table used when sending tables via http
+/// The `handlePart` function will be called for each table passed,
+ /// so it's also necessary to call `clean` at the end of the `handlePart`.
 class ExternalTablesHandler : public Poco::Net::PartHandler, BaseExternalTable
 {
 public:
@@ -174,15 +174,15 @@ public:

    void handlePart(const Poco::Net::MessageHeader & header, std::istream & stream)
    {
-        /// Буфер инициализируется здесь, а не в виртуальной функции initReadBuffer
+        /// The buffer is initialized here, not in the virtual function initReadBuffer
        read_buffer = std::make_unique<ReadBufferFromIStream>(stream);

-        /// Извлекаем коллекцию параметров из MessageHeader
+        /// Retrieve a collection of parameters from MessageHeader
        Poco::Net::NameValueCollection content;
        std::string label;
        Poco::Net::MessageHeader::splitParameters(header.get("Content-Disposition"), label, content);

-        /// Получаем параметры
+        /// Get parameters
        name = content.get("name", "_data");
        format = params.get(name + "_format", "TabSeparated");

@@ -195,13 +195,13 @@ public:

        ExternalTableData data = getData(context);

-        /// Создаем таблицу
+        /// Create table
        NamesAndTypesListPtr columns = std::make_shared<NamesAndTypesList>(sample_block.getColumnsList());
        StoragePtr storage = StorageMemory::create(data.second, columns);
        context.addExternalTable(data.second, storage);
        BlockOutputStreamPtr output = storage->write(ASTPtr(), context.getSettingsRef());

-        /// Записываем данные
+        /// Write data
        data.first->readPrefix();
        output->writePrefix();
        while(Block block = data.first->read())
@@ -210,7 +210,7 @@ public:
        output->writeSuffix();

        names.push_back(name);
-        /// Подготавливаемся к приему следующего файла, для этого очищаем всю полученную информацию
+        /// We are ready to receive the next file, for this we clear all the information received
        clean();
    }


--- a/dbms/src/Common/FileChecker.h
+++ b/dbms/src/Common/FileChecker.h
@@ -8,11 +8,11 @@
 namespace DB
 {

-/// хранит размеры всех столбцов, и может проверять не побились ли столбцы
+/// stores the sizes of all columns, and can check whether the columns are corrupted
 class FileChecker
 {
 private:
-    /// Имя файла -> размер.
+    /// File name -> size.
    using Map = std::map<std::string, size_t>;

 public:
@@ -23,7 +23,7 @@ public:
    void update(const Poco::File & file);
    void update(const Files::const_iterator & begin, const Files::const_iterator & end);

-    /// Проверяем файлы, параметры которых указаны в sizes.json
+    /// Check the files whose parameters are specified in sizes.json
    bool check() const;

 private:
@@ -35,7 +35,7 @@ private:
    std::string files_info_path;
    std::string tmp_files_info_path;

-    /// Данные из файла читаются лениво.
+    /// The data from the file is read lazily.
    Map map;
    bool initialized = false;


--- a/dbms/src/Common/HashTable/ClearableHashSet.h
+++ b/dbms/src/Common/HashTable/ClearableHashSet.h
@@ -4,12 +4,12 @@
 #include <Common/HashTable/HashSet.h>


-/** Хеш-таблица, позволяющая очищать таблицу за O(1).
-  * Еще более простая, чем HashSet: Key и Mapped должны быть POD-типами.
+/** A hash table that allows you to clear the table in O(1).
+  * Even simpler than HashSet: Key and Mapped must be POD-types.
  *
-  * Вместо этого класса можно было бы просто использовать в HashSet в качестве ключа пару <версия, ключ>,
-  * но тогда таблица накапливала бы все ключи, которые в нее когда-либо складывали, и неоправданно росла.
-  * Этот класс идет на шаг дальше и считает ключи со старой версией пустыми местами в хеш-таблице.
+  * Instead of this class, you could just use the couple <version, key> in the HashSet as the key
+  * but then the table would accumulate all the keys that it ever stored, and it was unreasonably growing.
+  * This class goes a step further and considers the keys with the old version empty in the hash table.
  */


@@ -17,11 +17,11 @@ struct ClearableHashSetState
 {
    UInt32 version = 1;

-    /// Сериализация, в бинарном и текстовом виде.
+    /// Serialization, in binary and text form.
    void write(DB::WriteBuffer & wb) const         { DB::writeBinary(version, wb); }
    void writeText(DB::WriteBuffer & wb) const     { DB::writeText(version, wb); }

-    /// Десериализация, в бинарном и текстовом виде.
+    /// Deserialization, in binary and text form.
    void read(DB::ReadBuffer & rb)                 { DB::readBinary(version, rb); }
    void readText(DB::ReadBuffer & rb)             { DB::readText(version, rb); }
 };
@@ -38,10 +38,10 @@ struct ClearableHashTableCell : public BaseCell
    bool isZero(const State & state) const { return version != state.version; }
    static bool isZero(const Key & key, const State & state) { return false; }

-    /// Установить значение ключа в ноль.
+    /// Set the key value to zero.
    void setZero() { version = 0; }

-    /// Нужно ли хранить нулевой ключ отдельно (то есть, могут ли в хэш-таблицу вставить нулевой ключ).
+    /// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
    static constexpr bool need_zero_value_storage = false;

    ClearableHashTableCell() {}

--- a/dbms/src/Common/HashTable/Hash.h
+++ b/dbms/src/Common/HashTable/Hash.h
@@ -3,12 +3,12 @@
 #include <Core/Types.h>


-/** Хэш функции, которые лучше чем тривиальная функция std::hash.
-  * (при агрегации по идентификатору посетителя, прирост производительности более чем в 5 раз)
+/** Hash functions that are better than the trivial function std::hash.
+  * (when aggregated by the visitor ID, the performance increase is more than 5 times)
  */

-/** Взято из MurmurHash.
-  * Быстрее, чем intHash32 при вставке в хэш-таблицу UInt64 -> UInt64, где ключ - идентификатор посетителя.
+/** Taken from MurmurHash.
+  * Faster than intHash32 when inserting into the hash table UInt64 -> UInt64, where the key is the visitor ID.
  */
 inline DB::UInt64 intHash64(DB::UInt64 x)
 {
@@ -21,12 +21,12 @@ inline DB::UInt64 intHash64(DB::UInt64 x)
    return x;
 }

-/** CRC32C является не очень качественной в роли хэш функции,
-  *  согласно avalanche и bit independence тестам, а также малым количеством бит,
-  *  но может вести себя хорошо при использовании в хэш-таблицах,
-  *  за счёт высокой скорости (latency 3 + 1 такт, througput 1 такт).
-  * Работает только при поддержке SSE 4.2.
-  * Используется asm вместо интринсика, чтобы не обязательно было собирать весь проект с -msse4.
+/** CRC32C is not very high-quality as a hash function,
+  *  according to avalanche and bit independence tests, as well as a small number of bits,
+  *  but can behave well when used in hash tables,
+  *  due to high speed (latency 3 + 1 clock cycle, throughput 1 clock cycle).
+  * Works only with SSE 4.2 support.
+  * Used asm instead of intrinsics, so you do not have to build the entire project with -msse4.
  */
 inline DB::UInt64 intHashCRC32(DB::UInt64 x)
 {
@@ -35,7 +35,7 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x)
    asm("crc32q %[x], %[crc]\n" : [crc] "+r" (crc) : [x] "rm" (x));
    return crc;
 #else
-    /// На других платформах используем не обязательно CRC32. NOTE Это может сбить с толку.
+    /// On other platforms we do not need CRC32. NOTE This can be confusing.
    return intHash64(x);
 #endif
 }
@@ -117,7 +117,7 @@ DEFINE_HASH(DB::Float64)
 #undef DEFINE_HASH


-/// Разумно использовать для UInt8, UInt16 при достаточном размере хэш-таблицы.
+/// It is reasonable to use for UInt8, UInt16 with sufficient hash table size.
 struct TrivialHash
 {
    template <typename T>
@@ -128,17 +128,17 @@ struct TrivialHash
 };


-/** Сравнительно неплохая некриптографическая хэш функция из UInt64 в UInt32.
-  * Но хуже (и по качеству и по скорости), чем просто срезка intHash64.
-  * Взята отсюда: http://www.concentric.net/~ttwang/tech/inthash.htm
+/** A relatively good non-cryptic hash function from UInt64 to UInt32.
+  * But worse (both in quality and speed) than just cutting intHash64.
+  * Taken from here: http://www.concentric.net/~ttwang/tech/inthash.htm
  *
-  * Немного изменена по сравнению с функцией по ссылке: сдвиги вправо случайно заменены на цикличесвие сдвиги вправо.
-  * Это изменение никак не повлияло на результаты тестов smhasher.
+  * Slightly changed compared to the function by link: shifts to the right are accidentally replaced by a cyclic shift to the right.
+  * This change did not affect the smhasher test results.
  *
-  * Рекомендуется для разных задач использовать разные salt.
-  * А то был случай, что в БД значения сортировались по хэшу (для некачественного псевдослучайного разбрасывания),
-  *  а в другом месте, в агрегатной функции, в хэш таблице использовался такой же хэш,
-  *  в результате чего, эта агрегатная функция чудовищно тормозила из-за коллизий.
+  * It is recommended to use different salt for different tasks.
+  * That was the case that in the database values were sorted by hash (for low-quality pseudo-random spread),
+  *  and in another place, in the aggregate function, the same hash was used in the hash table,
+  *  as a result, this aggregate function was monstrously slowed due to collisions.
  */
 template <DB::UInt64 salt>
 inline DB::UInt32 intHash32(DB::UInt64 key)
@@ -156,7 +156,7 @@ inline DB::UInt32 intHash32(DB::UInt64 key)
 }


-/// Для контейнеров.
+/// For containers.
 template <typename T, DB::UInt64 salt = 0>
 struct IntHash32
 {

--- a/dbms/src/Common/HashTable/HashMap.h
+++ b/dbms/src/Common/HashTable/HashMap.h
@@ -13,7 +13,7 @@

 struct NoInitTag {};

-/// Пара, которая не инициализирует элементы, если не нужно.
+/// A pair that does not initialize the elements, if not needed.
 template <typename First, typename Second>
 struct PairNoInit
 {
@@ -60,18 +60,18 @@ struct HashMapCell
    bool isZero(const State & state) const { return isZero(value.first, state); }
    static bool isZero(const Key & key, const State & state) { return ZeroTraits::check(key); }

-    /// Установить значение ключа в ноль.
+    /// Set the key value to zero.
    void setZero() { ZeroTraits::set(value.first); }

-    /// Нужно ли хранить нулевой ключ отдельно (то есть, могут ли в хэш-таблицу вставить нулевой ключ).
+    /// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
    static constexpr bool need_zero_value_storage = true;

-    /// Является ли ячейка удалённой.
+    /// Whether the cell is removed.
    bool isDeleted() const { return false; }

    void setMapped(const value_type & value_) { value.second = value_.second; }

-    /// Сериализация, в бинарном и текстовом виде.
+    /// Serialization, in binary and text form.
    void write(DB::WriteBuffer & wb) const
    {
        DB::writeBinary(value.first, wb);
@@ -85,7 +85,7 @@ struct HashMapCell
        DB::writeDoubleQuoted(value.second, wb);
    }

-    /// Десериализация, в бинарном и текстовом виде.
+    /// Deserialization, in binary and text form.
    void read(DB::ReadBuffer & rb)
    {
        DB::readBinary(value.first, rb);
@@ -141,19 +141,19 @@ public:
        bool inserted;
        this->emplace(x, it, inserted);

-        /** Может показаться, что инициализация не обязательна для POD-типов (или __has_trivial_constructor),
-          *  так как кусок памяти для хэш-таблицы изначально инициализирован нулями.
-          * Но, на самом деле, пустая ячейка может быть не инициализирована нулями в следующих случаях:
-          * - ZeroValueStorage (в нём зануляется только ключ);
-          * - после ресайза и переноса части ячеек в новую половину хэш-таблицы, у старых ячеек, тоже зануляется только ключ.
+        /** It may seem that initialization is not necessary for POD-types (or __has_trivial_constructor),
+          *  since the hash table memory is initially initialized with zeros.
+          * But, in fact, an empty cell may not be initialized with zeros in the following cases:
+          * - ZeroValueStorage (it only zeros the key);
+          * - after resizing and moving a part of the cells to the new half of the hash table, the old cells also have only the key to zero.
          *
-          * По производительности, разницы почти всегда нет, за счёт того, что it->second как правило присваивается сразу
-          *  после вызова operator[], и так как operator[] инлайнится, компилятор убирает лишнюю инициализацию.
+          * On performance, there is almost always no difference, due to the fact that it->second is usually assigned immediately
+          *  after calling `operator[]`, and since `operator[]` is inlined, the compiler removes unnecessary initialization.
          *
-          * Иногда из-за инициализации, производительность даже растёт. Это происходит в коде вида ++map[key].
-          * Когда мы делаем инициализацию, то для новых ячеек, достаточно сразу сделать store 1.
-          * А если бы мы не делали инициализацию, то не смотря на то, что в ячейке был ноль,
-          *  компилятор не может об этом догадаться, и генерирует код load, increment, store.
+          * Sometimes due to initialization, the performance even grows. This occurs in code like `++map[key]`.
+          * When we do the initialization, for new cells, it's enough to make `store 1` right away.
+          * And if we did not initialize, then even though there was zero in the cell,
+          *  the compiler can not guess about this, and generates the `load`, `increment`, `store` code.
          */
        if (inserted)
            new(&it->second) mapped_type();

--- a/dbms/src/Common/HashTable/HashTable.h
+++ b/dbms/src/Common/HashTable/HashTable.h
--- a/dbms/src/Common/HashTable/SmallTable.h
+++ b/dbms/src/Common/HashTable/SmallTable.h
@@ -3,15 +3,15 @@
 #include <Common/HashTable/HashMap.h>


-/** Замена хэш-таблицы для маленького количества (единицы) ключей.
-  * Реализована в виде массива с линейным поиском.
-  * Массив расположен внутри объекта.
-  * Интерфейс является подмножеством интерфейса HashTable.
+/** Replacement of the hash table for a small number (<10) of keys.
+  * Implemented as an array with linear search.
+  * The array is located inside the object.
+  * The interface is a subset of the HashTable interface.
  *
-  * Вставка возможна только если метод full возвращает false.
-  * При неизвестном количестве различных ключей,
-  *  вы должны проверять, не заполнена ли таблица,
-  *  и делать fallback в этом случае (например, использовать полноценную хэш-таблицу).
+  * Insert is possible only if the `full` method returns false.
+  * With an unknown number of different keys,
+  *  you should check if the table is not full,
+  *  and do a `fallback` in this case (for example, use a real hash table).
  */

 template
@@ -32,11 +32,11 @@ protected:
    using Self = SmallTable<Key, Cell, capacity>;
    using cell_type = Cell;

-    size_t m_size = 0;        /// Количество элементов.
-    Cell buf[capacity];        /// Кусок памяти для всех элементов.
+    size_t m_size = 0;        /// Amount of elements.
+    Cell buf[capacity];       /// A piece of memory for all elements.


-    /// Найти ячейку с тем же ключём или пустую ячейку, начиная с заданного места и далее по цепочке разрешения коллизий.
+    /// Find a cell with the same key or an empty cell, starting from the specified position and then by the collision resolution chain.
    const Cell * ALWAYS_INLINE findCell(const Key & x) const
    {
        const Cell * it = buf;
@@ -188,8 +188,8 @@ protected:


 public:
-    /** Таблица переполнена.
-      * В переполненную таблицу ничего нельзя вставлять.
+    /** The table is full.
+      * You can not insert anything into the full table.
      */
    bool full()
    {
@@ -197,7 +197,7 @@ public:
    }


-    /// Вставить значение. В случае хоть сколько-нибудь сложных значений, лучше используйте функцию emplace.
+    /// Insert the value. In the case of any more complex values, it is better to use the `emplace` function.
    std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x)
    {
        std::pair<iterator, bool> res;
@@ -211,14 +211,14 @@ public:
    }


-    /** Вставить ключ,
-      * вернуть итератор на позицию, которую можно использовать для placement new значения,
-      * а также флаг - был ли вставлен новый ключ.
+    /** Insert the key,
+      * return the iterator to a position that can be used for `placement new` of value,
+      * as well as the flag - whether a new key was inserted.
      *
-      * Вы обязаны сделать placement new значения, если был вставлен новый ключ,
-      * так как при уничтожении хэш-таблицы для него будет вызываться деструктор!
+      * You have to make `placement new` of value if you inserted a new key,
+      * since when destroying a hash table, a destructor will be called for it!
      *
-      * Пример использования:
+      * Example usage:
      *
      * Map::iterator it;
      * bool inserted;
@@ -239,7 +239,7 @@ public:
    }


-    /// То же самое, но вернуть false, если переполнено.
+    /// Same, but return false if it's full.
    bool ALWAYS_INLINE tryEmplace(Key x, iterator & it, bool & inserted)
    {
        Cell * res = findCell(x);
@@ -257,7 +257,7 @@ public:
    }


-    /// Скопировать ячейку из другой хэш-таблицы. Предполагается, что такого ключа в таблице ещё не было.
+    /// Copy the cell from another hash table. It is assumed that there was no such key in the table yet.
    void ALWAYS_INLINE insertUnique(const Cell * cell)
    {
        memcpy(&buf[m_size], cell, sizeof(*cell));

--- a/dbms/src/Common/HashTable/TwoLevelHashTable.h
+++ b/dbms/src/Common/HashTable/TwoLevelHashTable.h
@@ -3,21 +3,21 @@
 #include <Common/HashTable/HashTable.h>


-/** Двухуровневая хэш-таблица.
-  * Представляет собой 256 (или 1 << BITS_FOR_BUCKET) маленьких хэш-таблиц (bucket-ов первого уровня).
-  * Для определения, какую из них использовать, берётся один из байтов хэш-функции.
+/** Two-level hash table.
+  * Represents 256 (or 1 << BITS_FOR_BUCKET) small hash tables (buckets of the first level).
+  * To determine which one to use, one of the bytes of the hash function is taken.
  *
-  * Обычно работает чуть-чуть медленнее простой хэш-таблицы.
-  * Тем не менее, обладает преимуществами в некоторых случаях:
-  * - если надо мерджить две хэш-таблицы вместе, то это можно легко распараллелить по bucket-ам;
-  * - лаг при ресайзах размазан, так как маленькие хэш-таблицы ресайзятся по-отдельности;
-  * - по идее, ресайзы кэш-локальны в большем диапазоне размеров.
+  * Usually works a little slower than a simple hash table.
+  * However, it has advantages in some cases:
+  * - if you need to measure two hash tables together, then you can easily parallelize them by buckets;
+  * - lag during resizes is spread, since the small hash tables will be resized separately;
+  * - in theory, the cache resize is local in a larger range of sizes.
  */

 template <size_t initial_size_degree = 8>
 struct TwoLevelHashTableGrower : public HashTableGrower<initial_size_degree>
 {
-    /// Увеличить размер хэш-таблицы.
+    /// Increase the size of the hash table.
    void increaseSize()
    {
        this->size_degree += this->size_degree >= 15 ? 1 : 2;
@@ -52,7 +52,7 @@ public:

    size_t hash(const Key & x) const { return Hash::operator()(x); }

-    /// NOTE Плохо для хэш-таблиц больше чем на 2^32 ячеек.
+    /// NOTE Bad for hash tables for more than 2^32 cells.
    static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; }

 protected:
@@ -89,13 +89,13 @@ public:

    TwoLevelHashTable() {}

-    /// Скопировать данные из другой (обычной) хэш-таблицы. У неё должна быть такая же хэш-функция.
+    /// Copy the data from another (normal) hash table. It should have the same hash function.
    template <typename Source>
    TwoLevelHashTable(const Source & src)
    {
        typename Source::const_iterator it = src.begin();

-        /// Предполагается, что нулевой ключ (хранящийся отдельно) при итерировании идёт первым.
+        /// It is assumed that the zero key (stored separately) when iterating is first.
        if (it != src.end() && it.getPtr()->isZero(src))
        {
            insert(*it);
@@ -205,7 +205,7 @@ public:
    iterator end()                     { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; }


-    /// Вставить значение. В случае хоть сколько-нибудь сложных значений, лучше используйте функцию emplace.
+    /// Insert a value. In the case of any more complex values, it is better to use the `emplace` function.
    std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x)
    {
        size_t hash_value = hash(Cell::getKey(x));
@@ -220,14 +220,14 @@ public:
    }


-    /** Вставить ключ,
-      * вернуть итератор на позицию, которую можно использовать для placement new значения,
-      * а также флаг - был ли вставлен новый ключ.
+    /** Insert the key,
+      * return the iterator to a position that can be used for `placement new` value,
+      * as well as the flag - whether a new key was inserted.
      *
-      * Вы обязаны сделать placement new значения, если был вставлен новый ключ,
-      * так как при уничтожении хэш-таблицы для него будет вызываться деструктор!
+      * You have to make `placement new` values if you inserted a new key,
+      * since when destroying a hash table, the destructor will be invoked for it!
      *
-      * Пример использования:
+      * Example usage:
      *
      * Map::iterator it;
      * bool inserted;
@@ -242,7 +242,7 @@ public:
    }


-    /// То же самое, но с заранее вычисленным значением хэш-функции.
+    /// Same, but with a precalculated values of hash function.
    void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t hash_value)
    {
        size_t buck = getBucketFromHash(hash_value);

--- a/dbms/src/Common/HyperLogLogBiasEstimator.h
+++ b/dbms/src/Common/HyperLogLogBiasEstimator.h
@@ -7,10 +7,10 @@
 #include <tuple>
 #include <type_traits>

-/** Этот класс предоставляет способ, чтобы оценить погрешность результата применения алгоритма HyperLogLog.
-  * Эмирические наблюдения показывают, что большие погрешности возникают при E < 5 * 2^precision, где
-  * E - возвращаемое значение алгоритмом HyperLogLog, и precision - параметр точности HyperLogLog.
-  * См. "HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm".
+/** This class provides a way to evaluate the error in the result of applying the HyperLogLog algorithm.
+  * Empirical observations show that large errors occur at E < 5 * 2^precision, where
+  * E is the return value of the HyperLogLog algorithm, and `precision` is the HyperLogLog precision parameter.
+  * See "HyperLogLog in Practice: Algorithmic Engineering of a State of the Art Cardinality Estimation Algorithm".
  * (S. Heule et al., Proceedings of the EDBT 2013 Conference).
  */
 template <typename BiasData>
@@ -22,14 +22,14 @@ public:
        return false;
    }

-    /// Предельное количество уникальных значений до которого должна примениться поправка
-    /// из алгоритма LinearCounting.
+    /// Maximum number of unique values to which the correction should apply
+    /// from the LinearCounting algorithm.
    static double getThreshold()
    {
        return BiasData::getThreshold();
    }

-    /// Вернуть оценку погрешности.
+    /// Return the error estimate.
    static double getBias(double raw_estimate)
    {
        const auto & estimates = BiasData::getRawEstimates();
@@ -52,7 +52,7 @@ public:
        }
        else
        {
-            /// Получаем оценку погрешности путём линейной интерполяции.
+            /// We get the error estimate by linear interpolation.
            size_t index = std::distance(estimates.begin(), it);

            double estimate1 = estimates[index - 1];
@@ -60,7 +60,7 @@ public:

            double bias1 = biases[index - 1];
            double bias2 = biases[index];
-            /// Предполагается, что условие estimate1 < estimate2 всегда выполнено.
+            /// It is assumed that the estimate1 < estimate2 condition is always satisfied.
            double slope = (bias2 - bias1) / (estimate2 - estimate1);

            return bias1 + slope * (raw_estimate - estimate1);
@@ -68,7 +68,7 @@ public:
    }

 private:
-    /// Статические проверки.
+    /// Static checks.
    using TRawEstimatesRef = decltype(BiasData::getRawEstimates());
    using TRawEstimates = typename std::remove_reference<TRawEstimatesRef>::type;

@@ -82,10 +82,10 @@ private:
                  "Bias estimator has inconsistent data");
 };

-/** Тривиальный случай HyperLogLogBiasEstimator: употребляется, если не хотим исправить
-  * погрешность. Это имеет смысль при маленьких значениях параметра точности, например 5 или 12.
-  * Тогда применяются поправки из оригинальной версии алгоритма HyperLogLog.
-  * См. "HyperLogLog: The analysis of a near-optimal cardinality estimation algorithm"
+/** Trivial case of HyperLogLogBiasEstimator: used if we do not want to fix
+  * error. This has meaning for small values of the accuracy parameter, for example 5 or 12.
+  * Then the corrections from the original version of the HyperLogLog algorithm are applied.
+  * See "HyperLogLog: The analysis of a near-optimal cardinality estimation algorithm"
  * (P. Flajolet et al., AOFA '07: Proceedings of the 2007 International Conference on Analysis
  * of Algorithms)
  */

--- a/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h
+++ b/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h
@@ -9,10 +9,10 @@ namespace DB
 {


-/** Для маленького количества ключей - массив фиксированного размера "на стеке".
-  * Для большого - выделяется HyperLogLog.
-  * Смотрите также более практичную реализацию в CombinedCardinalityEstimator.h,
-  *  где используется также хэш-таблица для множеств среднего размера.
+/** For a small number of keys - an array of fixed size "on the stack."
+  * For large, HyperLogLog is allocated.
+  * See also the more practical implementation in CombinedCardinalityEstimator.h,
+  *  where a hash table is also used for medium-sized sets.
  */
 template
 <
@@ -39,7 +39,7 @@ private:
    {
        CurrentMemoryTracker::alloc(sizeof(large));

-        /// На время копирования данных из tiny, устанавливать значение large ещё нельзя (иначе оно перезатрёт часть данных).
+        /// At the time of copying data from `tiny`, setting the value of `large` is still not possible (otherwise it will overwrite some data).
        Large * tmp_large = new Large;

        for (const auto & x : small)
@@ -99,7 +99,7 @@ public:
        }
    }

-    /// Можно вызывать только для пустого объекта.
+    /// You can only call for an empty object.
    void read(DB::ReadBuffer & in)
    {
        bool is_large;

--- a/dbms/src/Common/Increment.h
+++ b/dbms/src/Common/Increment.h
@@ -3,24 +3,24 @@
 #include <Common/CounterInFile.h>


-/** Позволяет получать авто-инкрементное число, храня его в файле.
-  * Предназначен для редких вызовов (не рассчитан на производительность).
+/** Lets you receive an auto-increment number, storing it in a file.
+  * Designed for rare calls (not designed for performance).
  */
 class Increment
 {
 public:
-    /// path - имя файла, включая путь
+    /// path - the name of the file, including the path
    Increment(const std::string & path_) : counter(path_) {}

-    /** Получить следующее число.
-      * Если параметр create_if_need не установлен в true, то
-      *  в файле уже должно быть записано какое-нибудь число (если нет - создайте файл вручную с нулём).
+    /** Get the next number.
+      * If the `create_if_need` parameter is not set to true, then
+      *  the file must already have a number written (if not - create the file manually with zero).
      *
-      * Для защиты от race condition-ов между разными процессами, используются файловые блокировки.
-      * (Но при первом создании файла race condition возможен, так что лучше создать файл заранее.)
+      * To protect against race conditions between different processes, file locks are used.
+      * (But when the first file is created, the race condition is possible, so it's better to create the file in advance.)
      *
-      * locked_callback вызывается при заблокированном файле со счетчиком. В него передается новое значение.
-      * locked_callback можно использовать, чтобы делать что-нибудь атомарно с увеличением счетчика (например, переименовывать файлы).
+      * `locked_callback` is called when the counter file is locked. A new value is passed to it.
+      * `locked_callback` can be used to do something atomically with the increment of the counter (for example, rename files).
      */
    template <typename Callback>
    UInt64 get(Callback && locked_callback, bool create_if_need = false)
@@ -33,25 +33,25 @@ public:
        return getBunch(1, create_if_need);
    }

-    /// Посмотреть следующее значение.
+    /// Peek the next value.
    UInt64 peek(bool create_if_need = false)
    {
        return getBunch(0, create_if_need);
    }

-    /** Получить следующее число и увеличить счетчик на count.
-     * Если параметр create_if_need не установлен в true, то
-     *  в файле уже должно быть записано какое-нибудь число (если нет - создайте файл вручную с нулём).
+    /** Get the next number and increase the count by `count`.
+     * If the `create_if_need` parameter is not set to true, then
+     * the file should already have a number written (if not - create the file manually with zero).
     *
-     * Для защиты от race condition-ов между разными процессами, используются файловые блокировки.
-     * (Но при первом создании файла race condition возможен, так что лучше создать файл заранее.)
+     * To protect against race conditions between different processes, file locks are used.
+     * (But when the first file is created, the race condition is possible, so it's better to create the file in advance.)
     */
    UInt64 getBunch(UInt64 count, bool create_if_need = false)
    {
        return static_cast<UInt64>(counter.add(static_cast<Int64>(count), create_if_need) - count + 1);
    }

-    /// Изменить путь к файлу.
+    /// Change the path to the file.
    void setPath(std::string path_)
    {
        counter.setPath(path_);
@@ -67,7 +67,7 @@ private:
 };


-/** То же самое, но без хранения в файле.
+/** The same, but without storing it in a file.
  */
 struct SimpleIncrement : private boost::noncopyable
 {

--- a/dbms/src/Common/Macros.h
+++ b/dbms/src/Common/Macros.h
@@ -7,7 +7,7 @@
 namespace DB
 {

-/** Раскрывает в строке макросы из конфига.
+/** Apply the macros from the config in the line.
  */
 class Macros
 {
@@ -15,8 +15,8 @@ public:
    Macros();
    Macros(const Poco::Util::AbstractConfiguration & config, const String & key);

-    /** Заменить в строке подстроки вида {macro_name} на значение для macro_name, полученное из конфига.
-      * level - уровень рекурсии.
+    /** Replace the substring of the form {macro_name} with the value for macro_name, obtained from the config file.
+      * level - the level of recursion.
      */
    String expand(const String & s, size_t level = 0) const;


--- a/dbms/src/Common/MemoryTracker.h
+++ b/dbms/src/Common/MemoryTracker.h
@@ -102,10 +102,10 @@ public:
 };


-/** Объект MemoryTracker довольно трудно протащить во все места, где выделяются существенные объёмы памяти.
-  * Поэтому, используется thread-local указатель на используемый MemoryTracker или nullptr, если его не нужно использовать.
-  * Этот указатель выставляется, когда в данном потоке следует отслеживать потребление памяти.
-  * Таким образом, его нужно всего-лишь протащить во все потоки, в которых обрабатывается один запрос.
+/** The MemoryTracker object is quite difficult to drag to all places where significant amounts of memory are allocated.
+  * Therefore, a thread-local pointer to used MemoryTracker or nullptr is used, if it does not need to be used.
+  * This pointer is set when memory consumption is monitored in this thread.
+  * So, you just need to drag it to all the threads that handle one request.
  */
 extern __thread MemoryTracker * current_memory_tracker;


--- a/dbms/src/Common/OptimizedRegularExpression.h
+++ b/dbms/src/Common/OptimizedRegularExpression.h
@@ -12,20 +12,20 @@
 #endif


-/** Использует два способа оптимизации регулярного выражения:
-  * 1. Если регулярное выражение является тривиальным (сводится к поиску подстроки в строке),
-  *     то заменяет поиск на strstr или strcasestr.
-  * 2. Если регулярное выражение содержит безальтернативную подстроку достаточной длины,
-  *     то перед проверкой используется strstr или strcasestr достаточной длины;
-  *     регулярное выражение проверяется полностью только если подстрока найдена.
-  * 3. В остальных случаях, используется движок re2.
+/** Uses two ways to optimize a regular expression:
+  * 1. If the regular expression is trivial (reduces to finding a substring in a string),
+  *     then replaces the search with strstr or strcasestr.
+  * 2. If the regular expression contains a non-alternative substring of sufficient length,
+  *     then before testing, strstr or strcasestr of sufficient length is used;
+  *     regular expression is only fully checked if a substring is found.
+  * 3. In other cases, the re2 engine is used.
  *
-  * Это имеет смысл, так как strstr и strcasestr в libc под Linux хорошо оптимизированы.
+  * This makes sense, since strstr and strcasestr in libc for Linux are well optimized.
  *
-  * Подходит, если одновременно выполнены следующие условия:
-  * - если в большинстве вызовов, регулярное выражение не матчится;
-  * - если регулярное выражение совместимо с движком re2;
-  * - можете использовать на свой риск, так как, возможно, не все случаи учтены.
+  * Suitable if the following conditions are simultaneously met:
+  * - if in most calls, the regular expression does not match;
+  * - if the regular expression is compatible with the re2 engine;
+  * - you can use at your own risk, since, probably, not all cases are taken into account.
  */

 namespace OptimizedRegularExpressionDetails
@@ -82,7 +82,7 @@ public:

    unsigned getNumberOfSubpatterns() const { return number_of_subpatterns; }

-    /// Получить регексп re2 или nullptr, если шаблон тривиален (для вывода в лог).
+    /// Get the regexp re2 or nullptr if the pattern is trivial (for output to the log).
    const std::unique_ptr<RegexType>& getRE2() const { return re2; }

    static void analyze(const std::string & regexp_, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix);

--- a/dbms/src/Common/OptimizedRegularExpression.inl
+++ b/dbms/src/Common/OptimizedRegularExpression.inl
@@ -15,12 +15,12 @@ void OptimizedRegularExpressionImpl<b>::analyze(
 	bool & is_trivial,
 	bool & required_substring_is_prefix)
 {
-	/** Выражение тривиально, если в нём все метасимволы эскейплены.
-	  * Безальтернативная строка - это
-	  *  строка вне скобок,
-	  *  в которой все метасимволы эскейплены,
-	  *  а также если вне скобок нет '|',
-	  *  а также избегаются подстроки вида http:// или www.
+	/** The expression is trivial if all the metacharacters in it are escaped.
+ 	  * The non-alternative string is
+ 	  *  a string outside parentheses,
+ 	  *  in which all metacharacters are escaped,
+ 	  *  and also if there are no '|' outside the brackets,
+ 	  *  and also avoid substrings of the form `http://` or `www`.
 	  */
 	const char * begin = regexp.data();
 	const char * pos = begin;
@@ -31,7 +31,7 @@ void OptimizedRegularExpressionImpl<b>::analyze(
 	required_substring.clear();
 	bool has_alternative_on_depth_0 = false;

-	/// Подстрока с позицией.
+ 	/// Substring with a position.
 	typedef std::pair<std::string, size_t> Substring;

 	typedef std::vector<Substring> Substrings;
@@ -66,7 +66,7 @@ void OptimizedRegularExpressionImpl<b>::analyze(
 						}
 						break;
 					default:
-						/// все остальные escape-последовательности не поддерживаем
+ 						/// all other escape sequences are not supported
 						is_trivial = false;
 						if (!last_substring->first.empty())
 						{
@@ -157,7 +157,7 @@ void OptimizedRegularExpressionImpl<b>::analyze(
 				++pos;
 				break;

-			/// Квантификаторы, допускающие нулевое количество.
+ 			/// Quantifiers that allow a zero number.
 			case '{':
 				in_curly_braces = true;
 			case '?': case '*':
@@ -179,7 +179,7 @@ void OptimizedRegularExpressionImpl<b>::analyze(
 				++pos;
 				break;

-			ordinary:	/// Обычный, не заэскейпленный символ.
+ 			ordinary:   /// Normal, not escaped symbol.
 			default:
 				if (depth == 0 && !in_curly_braces && !in_square_braces)
 				{
@@ -199,8 +199,8 @@ void OptimizedRegularExpressionImpl<b>::analyze(
 	{
 		if (!has_alternative_on_depth_0)
 		{
-			/** Выберем безальтернативную подстроку максимальной длины, среди префиксов,
-			  *  или безальтернативную подстроку максимальной длины.
+			/** We choose the non-alternative substring of the maximum length, among the prefixes,
+			  *  or a non-alternative substring of maximum length.
 			  */
 			size_t max_length = 0;
 			Substrings::const_iterator candidate_it = trivial_substrings.begin();
@@ -208,7 +208,7 @@ void OptimizedRegularExpressionImpl<b>::analyze(
 			{
 				if (((it->second == 0 && candidate_it->second != 0)
 						|| ((it->second == 0) == (candidate_it->second == 0) && it->first.size() > max_length))
-					/// Тюнинг для предметной области
+ 					/// Tuning for the domain
 					&& (it->first.size() > strlen("://") || strncmp(it->first.data(), "://", strlen("://")))
 					&& (it->first.size() > strlen("http://") || strncmp(it->first.data(), "http", strlen("http")))
 					&& (it->first.size() > strlen("www.") || strncmp(it->first.data(), "www", strlen("www")))
@@ -246,7 +246,7 @@ OptimizedRegularExpressionImpl<b>::OptimizedRegularExpressionImpl(const std::str
 {
 	analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix);

-	/// Поддерживаются 3 опции
+ 	/// 3 options are supported
 	if (options & (~(RE_CASELESS | RE_NO_CAPTURE | RE_DOT_NL)))
 		throw Poco::Exception("OptimizedRegularExpression: Unsupported option.");

@@ -257,7 +257,7 @@ OptimizedRegularExpressionImpl<b>::OptimizedRegularExpressionImpl(const std::str
 	number_of_subpatterns = 0;
 	if (!is_trivial)
 	{
-		/// Скомпилируем регулярное выражение re2.
+ 		/// Compile the re2 regular expression.
 		typename RegexType::Options options;

 		if (is_case_insensitive)

--- a/dbms/src/Common/PODArray.h
+++ b/dbms/src/Common/PODArray.h
@@ -19,33 +19,33 @@
 namespace DB
 {

-/** Динамический массив для POD-типов.
-  * Предназначен для небольшого количества больших массивов (а не большого количества маленьких).
-  * А точнее - для использования в ColumnVector.
-  * Отличается от std::vector тем, что не инициализирует элементы.
+/** A dynamic array for POD types.
+  * Designed for a small number of large arrays (rather than a lot of small ones).
+  * To be more precise - for use in ColumnVector.
+  * It differs from std::vector in that it does not initialize the elements.
  *
-  * Сделан некопируемым, чтобы не было случайных копий. Скопировать данные можно с помощью метода assign.
+  * Made uncopable so that there are no random copies. You can copy the data using `assign` method.
  *
-  * Поддерживается только часть интерфейса std::vector.
+  * Only part of the std::vector interface is supported.
  *
-  * Конструктор по-умолчанию создаёт пустой объект, который не выделяет память.
-  * Затем выделяется память минимум в INITIAL_SIZE байт.
+  * The default constructor creates an empty object that does not allocate memory.
+  * Then the memory is allocated at least INITIAL_SIZE bytes.
  *
-  * Если вставлять элементы push_back-ом, не делая reserve, то PODArray примерно в 2.5 раза быстрее std::vector.
+  * If you insert elements with push_back, without making a `reserve`, then PODArray is about 2.5 times faster than std::vector.
  *
-  * Шаблонный параметр pad_right - всегда выделять в конце массива столько неиспользуемых байт.
-  * Может использоваться для того, чтобы делать оптимистичное чтение, запись, копирование невыровненными SIMD-инструкциями.
+  * The template parameter `pad_right` - always allocate at the end of the array as many unused bytes.
+  * Can be used to make optimistic reading, writing, copying with unaligned SIMD instructions.
  */
 template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>, size_t pad_right_ = 0>
 class PODArray : private boost::noncopyable, private TAllocator    /// empty base optimization
 {
 private:
-    /// Округление padding-а вверх до целого количества элементов, чтобы упростить арифметику.
+    /// Round padding up to an integer number of elements to simplify arithmetic.
    static constexpr size_t pad_right = (pad_right_ + sizeof(T) - 1) / sizeof(T) * sizeof(T);

    char * c_start             = nullptr;
    char * c_end             = nullptr;
-    char * c_end_of_storage = nullptr;    /// Не включает в себя pad_right.
+    char * c_end_of_storage = nullptr;    /// Does not include pad_right.

    T * t_start()                         { return reinterpret_cast<T *>(c_start); }
    T * t_end()                         { return reinterpret_cast<T *>(c_end); }
@@ -55,10 +55,10 @@ private:
    const T * t_end() const             { return reinterpret_cast<const T *>(c_end); }
    const T * t_end_of_storage() const     { return reinterpret_cast<const T *>(c_end_of_storage); }

-    /// Количество памяти, занимаемое num_elements элементов.
+    /// The amount of memory occupied by the num_elements of the elements.
    static size_t byte_size(size_t num_elements) { return num_elements * sizeof(T); }

-    /// Минимальное количество памяти, которое нужно выделить для num_elements элементов, включая padding.
+    /// Minimum amount of memory to allocate for num_elements, including padding.
    static size_t minimum_memory_for_elements(size_t num_elements) { return byte_size(num_elements) + pad_right; }

    void alloc_for_num_elements(size_t num_elements)
@@ -112,7 +112,7 @@ public:

    size_t allocated_size() const { return c_end_of_storage - c_start + pad_right; }

-    /// Просто typedef нельзя, так как возникает неоднозначность для конструкторов и функций assign.
+    /// You can not just use `typedef`, because there is ambiguity for the constructors and `assign` functions.
    struct iterator : public boost::iterator_adaptor<iterator, T*>
    {
        iterator() {}
@@ -209,7 +209,7 @@ public:
        c_end = c_start + byte_size(n);
    }

-    /// Как resize, но обнуляет новые элементы.
+    /// Same as resize, but zeros new elements.
    void resize_fill(size_t n)
    {
        size_t old_size = size();
@@ -261,7 +261,7 @@ public:
        c_end -= byte_size(1);
    }

-    /// Не вставляйте в массив кусок самого себя. Потому что при ресайзе, итераторы на самого себя могут инвалидироваться.
+    /// Do not insert a piece of yourself into the array. Because with the resize, the iterators on themselves can be invalidated.
    template <typename It1, typename It2>
    void insert(It1 from_begin, It2 from_end)
    {
@@ -458,7 +458,7 @@ void swap(PODArray<T, INITIAL_SIZE, TAllocator, pad_right_> & lhs, PODArray<T, I
    lhs.swap(rhs);
 }

-/** Для столбцов. Padding-а хватает, чтобы читать и писать xmm-регистр по адресу последнего элемента. */
+/** For columns. Padding is enough to read and write xmm-register at the address of the last element. */
 template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>>
 using PaddedPODArray = PODArray<T, INITIAL_SIZE, TAllocator, 15>;


--- a/dbms/src/Common/PoolBase.h
+++ b/dbms/src/Common/PoolBase.h
@@ -8,8 +8,8 @@
 #include <common/logger_useful.h>
 #include <Common/Exception.h>

-/** Класс, от которого можно унаследоваться и получить пул чего-нибудь. Используется для пулов соединений с БД.
-  * Наследник должен предоставить метод для создания нового объекта для помещения в пул.
+/** A class from which you can inherit and get a pool of something. Used for database connection pools.
+  * The heir must provide a method for creating a new object to place in the pool.
  */

 template <typename TObject>
@@ -22,7 +22,7 @@ public:

 private:

-    /** Объект с флагом, используется ли он сейчас. */
+    /** The object with the flag, whether it is currently used. */
    struct PooledObject
    {
        PooledObject(ObjectPtr object_, PoolBase & pool_)
@@ -37,8 +37,8 @@ private:

    using Objects = std::vector<std::shared_ptr<PooledObject>>;

-    /** Помощник, который устанавливает флаг использования объекта, а в деструкторе - снимает,
-      *  а также уведомляет о событии с помощью condvar-а.
+    /** The helper, which sets the flag for using the object, and in the destructor - removes,
+      *  and also notifies the event using condvar.
      */
    struct PoolEntryHelper
    {
@@ -54,19 +54,19 @@ private:
    };

 public:
-    /** То, что выдаётся пользователю. */
+    /** What is given to the user. */
    class Entry
    {
    public:
        friend class PoolBase<Object>;

-        Entry() {}    /// Для отложенной инициализации.
+        Entry() {}    /// For deferred initialization.

-        /** Объект Entry защищает ресурс от использования другим потоком.
-         * Следующие методы запрещены для rvalue, чтобы нельзя было написать подобное
+        /** The `Entry` object protects the resource from being used by another thread.
+         * The following methods are forbidden for `rvalue`, so you can not write a similar to
         *
-         * auto q = pool.Get()->query("SELECT .."); // Упс, после этой строчки Entry уничтожился
-         * q.execute();  // Кто-то еще может использовать этот Connection
+         * auto q = pool.Get()->query("SELECT .."); // Oops, after this line Entry was destroyed
+         * q.execute (); // Someone else can use this Connection
         */
        Object * operator->() && = delete;
        const Object * operator->() const && = delete;
@@ -95,7 +95,7 @@ public:

    virtual ~PoolBase() {}

-    /** Выделяет объект для работы. При timeout < 0 таймаут бесконечный. */
+    /** Allocates the object for the job. With timeout < 0, the timeout is infinite. */
    Entry get(Poco::Timespan::TimeDiff timeout)
    {
        std::unique_lock<std::mutex> lock(mutex);
@@ -131,13 +131,13 @@ public:
    }

 private:
-    /** Максимальный размер пула. */
+    /** The maximum size of the pool. */
    unsigned max_items;

-    /** Пул. */
+    /** Pool. */
    Objects items;

-    /** Блокировка для доступа к пулу. */
+    /** Block to access the pool. */
    std::mutex mutex;
    std::condition_variable available;

@@ -151,7 +151,7 @@ protected:
        items.reserve(max_items);
    }

-    /** Создает новый объект для помещения в пул. */
+    /** Creates a new object to put in the pool. */
    virtual ObjectPtr allocObject() = 0;
 };

--- a/dbms/src/Common/RadixSort.h
+++ b/dbms/src/Common/RadixSort.h
@@ -13,18 +13,18 @@
 #include <Core/Defines.h>


-/** Поразрядная сортировка, обладает следующей функциональностью:
-  * Может сортировать unsigned, signed числа, а также float-ы.
-  * Может сортировать массив элементов фиксированной длины, которые содержат что-то ещё кроме ключа.
-  * Настраиваемый размер разряда.
+/** Bitwise sort, has the following functionality:
+  * Can sort unsigned, signed numbers, and floats.
+  * Can sort an array of fixed length elements that contain something else besides the key.
+  * Customizable digit size.
  *
  * LSB, stable.
-  * NOTE Для некоторых приложений имеет смысл добавить MSB-radix-sort,
-  *  а также алгоритмы radix-select, radix-partial-sort, radix-get-permutation на его основе.
+  * NOTE For some applications it makes sense to add MSB-radix-sort,
+  *  as well as radix-select, radix-partial-sort, radix-get-permutation algorithms based on it.
  */


-/** Используется в качестве параметра шаблона. См. ниже.
+/** Used as a template parameter. See below.
  */
 struct RadixSortMallocAllocator
 {
@@ -40,16 +40,16 @@ struct RadixSortMallocAllocator
 };


-/** Преобразование, которое переводит битовое представление ключа в такое целое беззнаковое число,
-  *  что отношение порядка над ключами будет соответствовать отношению порядка над полученными беззнаковыми числами.
-  * Для float-ов это преобразование делает следующее:
-  *  если выставлен знаковый бит, то переворачивает все остальные биты.
-  * При этом, NaN-ы оказываются больше всех нормальных чисел.
+/** A transformation that transforms the bit representation of a key into an unsigned integer number,
+  *  that the order relation over the keys will match the order relation over the obtained unsigned numbers.
+  * For floats this conversion does the following:
+  *  if the signed bit is set, it flips all other bits.
+  * In this case, NaN-s are bigger than all normal numbers.
  */
 template <typename KeyBits>
 struct RadixSortFloatTransform
 {
-    /// Стоит ли записывать результат в память, или лучше делать его каждый раз заново?
+    /// Is it worth writing the result in memory, or is it better to do it every time again?
    static constexpr bool transform_is_simple = false;

    static KeyBits forward(KeyBits x)
@@ -67,24 +67,24 @@ struct RadixSortFloatTransform
 template <typename Float>
 struct RadixSortFloatTraits
 {
-    using Element = Float;        /// Тип элемента. Это может быть структура с ключём и ещё каким-то payload-ом. Либо просто ключ.
-    using Key = Float;            /// Ключ, по которому нужно сортировать.
-    using CountType = uint32_t;    /// Тип для подсчёта гистограмм. В случае заведомо маленького количества элементов, может быть меньше чем size_t.
+    using Element = Float;        /// The type of the element. It can be a structure with a key and some other payload. Or just a key.
+    using Key = Float;            /// The key to sort.
+    using CountType = uint32_t;   /// Type for calculating histograms. In the case of a known small number of elements, it can be less than size_t.

-    /// Тип, в который переводится ключ, чтобы делать битовые операции. Это UInt такого же размера, как ключ.
+    /// The type to which the key is transformed to do bit operations. This UInt is the same size as the key.
    using KeyBits = typename std::conditional<sizeof(Float) == 8, uint64_t, uint32_t>::type;

-    static constexpr size_t PART_SIZE_BITS = 8;    /// Какими кусочками ключа в количестве бит делать один проход - перестановку массива.
+    static constexpr size_t PART_SIZE_BITS = 8;    /// With what pieces of the key, it bits, to do one pass - reshuffle of the array.

-    /// Преобразования ключа в KeyBits такое, что отношение порядка над ключём соответствует отношению порядка над KeyBits.
+    /// Converting a key into KeyBits is such that the order relation over the key corresponds to the order relation over KeyBits.
    using Transform = RadixSortFloatTransform<KeyBits>;

-    /// Объект с функциями allocate и deallocate.
-    /// Может быть использован, например, чтобы выделить память для временного массива на стеке.
-    /// Для этого сам аллокатор создаётся на стеке.
+    /// An object with the functions allocate and deallocate.
+    /// Can be used, for example, to allocate memory for a temporary array on the stack.
+    /// To do this, the allocator itself is created on the stack.
    using Allocator = RadixSortMallocAllocator;

-    /// Функция получения ключа из элемента массива.
+    /// The function to get the key from an array element.
    static Key & extractKey(Element & elem) { return elem; }
 };

@@ -122,7 +122,7 @@ struct RadixSortUIntTraits
    using Transform = RadixSortIdentityTransform<KeyBits>;
    using Allocator = RadixSortMallocAllocator;

-    /// Функция получения ключа из элемента массива.
+    /// The function to get the key from an array element.
    static Key & extractKey(Element & elem) { return elem; }
 };

@@ -139,7 +139,7 @@ struct RadixSortIntTraits
    using Transform = RadixSortSignedTransform<KeyBits>;
    using Allocator = RadixSortMallocAllocator;

-    /// Функция получения ключа из элемента массива.
+    /// The function to get the key from an array element.
    static Key & extractKey(Element & elem) { return elem; }
 };

@@ -172,19 +172,19 @@ private:
 public:
    static void execute(Element * arr, size_t size)
    {
-        /// Если массив имеет размер меньше 256, то лучше использовать другой алгоритм.
+        /// If the array is smaller than 256, then it is better to use another algorithm.

-        /// Здесь есть циклы по NUM_PASSES. Очень важно, что они разворачиваются в compile-time.
+        /// There are loops of NUM_PASSES. It is very important that they unfold in compile-time.

-        /// Для каждого из NUM_PASSES кусков бит ключа, считаем, сколько раз каждое значение этого куска встретилось.
+        /// For each of the NUM_PASSES bits of the key, consider how many times each value of this piece met.
        CountType histograms[HISTOGRAM_SIZE * NUM_PASSES] = {0};

        typename Traits::Allocator allocator;

-        /// Будем делать несколько проходов по массиву. На каждом проходе, данные перекладываются в другой массив. Выделим этот временный массив.
+        /// We will do several passes through the array. On each pass, the data is transferred to another array. Let's allocate this temporary array.
        Element * swap_buffer = reinterpret_cast<Element *>(allocator.allocate(size * sizeof(Element)));

-        /// Трансформируем массив и вычисляем гистограмму.
+        /// Transform the array and calculate the histogram.
        for (size_t i = 0; i < size; ++i)
        {
            if (!Traits::Transform::transform_is_simple)
@@ -195,7 +195,7 @@ public:
        }

        {
-            /// Заменяем гистограммы на суммы с накоплением: значение в позиции i равно сумме в предыдущих позициях минус один.
+            /// Replace the histograms with the accumulated sums: the value in position i is the sum of the previous positions minus one.
            size_t sums[NUM_PASSES] = {0};

            for (size_t i = 0; i < HISTOGRAM_SIZE; ++i)
@@ -209,7 +209,7 @@ public:
            }
        }

-        /// Перекладываем элементы в порядке начиная от младшего куска бит, и далее делаем несколько проходов по количеству кусков.
+        /// Move the elements in the order starting from the least bit piece, and then do a few passes on the number of pieces.
        for (size_t j = 0; j < NUM_PASSES; ++j)
        {
            Element * writer = j % 2 ? arr : swap_buffer;
@@ -219,17 +219,17 @@ public:
            {
                size_t pos = getPart(j, keyToBits(Traits::extractKey(reader[i])));

-                /// Размещаем элемент на следующей свободной позиции.
+                /// Place the element on the next free position.
                auto & dest = writer[++histograms[j * HISTOGRAM_SIZE + pos]];
                dest = reader[i];

-                /// На последнем перекладывании, делаем обратную трансформацию.
+                /// On the last pass, we do the reverse transformation.
                if (!Traits::Transform::transform_is_simple && j == NUM_PASSES - 1)
                    Traits::extractKey(dest) = bitsToKey(Traits::Transform::backward(keyToBits(Traits::extractKey(reader[i]))));
            }
        }

-        /// Если число проходов нечётное, то результирующий массив находится во временном буфере. Скопируем его на место исходного массива.
+        /// If the number of passes is odd, the result array is in a temporary buffer. Copy it to the place of the original array.
        if (NUM_PASSES % 2)
            memcpy(arr, swap_buffer, size * sizeof(Element));


--- a/dbms/src/Common/ShellCommand.h
+++ b/dbms/src/Common/ShellCommand.h
@@ -9,19 +9,19 @@ namespace DB
 {


-/** Позволяет запустить команду,
-  *  читать её stdout, stderr, писать в stdin,
-  *  дождаться завершения.
+/** Lets you run the command,
+ *   read it stdout, stderr, write to stdin,
+ *   wait for completion.
  *
-  * Реализация похожа на функцию popen из POSIX (посмотреть можно в исходниках libc).
+  * The implementation is similar to the popen function from POSIX (see libc source code).
  *
-  * Наиболее важное отличие: использует vfork вместо fork.
-  * Это сделано, потому что fork не работает (с ошибкой о нехватке памяти),
-  *  при некоторых настройках overcommit-а, если размер адресного пространства процесса больше половины количества доступной памяти.
-  * Также, изменение memory map-ов - довольно ресурсоёмкая операция.
+  * The most important difference: uses vfork instead of fork.
+  * This is done because fork does not work (with a memory shortage error),
+  *  with some overcommit settings, if the address space of the process is more than half the amount of available memory.
+  * Also, changing memory maps - a fairly resource-intensive operation.
  *
-  * Второе отличие - позволяет работать одновременно и с stdin, и с stdout, и с stderr запущенного процесса,
-  *  а также узнать код и статус завершения.
+  * The second difference - allows to work simultaneously with stdin, and with stdout, and with stderr running process,
+  *  and also find out the code and the completion status.
  */
 class ShellCommand
 {
@@ -34,20 +34,20 @@ private:
    static std::unique_ptr<ShellCommand> executeImpl(const char * filename, char * const argv[], bool pipe_stdin_only);

 public:
-    WriteBufferFromFile in;        /// Если команда читает из stdin, то не забудьте вызвать in.close() после записи туда всех данных.
+    WriteBufferFromFile in;        /// If the command reads from stdin, do not forget to call in.close() after writing all the data there.
    ReadBufferFromFile out;
    ReadBufferFromFile err;

-    /// Выполнить команду с использованием /bin/sh -c
+    /// Run the command using /bin/sh -c
    static std::unique_ptr<ShellCommand> execute(const std::string & command, bool pipe_stdin_only = false);

-    /// Выполнить исполняемый файл с указаннами аргументами. arguments - без argv[0].
+    /// Run the executable with the specified arguments. `arguments` - without argv[0].
    static std::unique_ptr<ShellCommand> executeDirect(const std::string & path, const std::vector<std::string> & arguments);

-    /// Подождать завершения процесса, кинуть исключение, если код не 0 или если процесс был завершён не самостоятельно.
+    /// Wait for the process to end, throw an exception if the code is not 0 or if the process was not completed by itself.
    void wait();

-    /// Подождать завершения процесса, узнать код возврата. Кинуть исключение, если процесс был завершён не самостоятельно.
+    /// Wait for the process to finish, see the return code. To throw an exception if the process was not completed independently.
    int tryWait();
 };


--- a/dbms/src/Common/SimpleCache.h
+++ b/dbms/src/Common/SimpleCache.h
@@ -6,13 +6,13 @@
 #include <ext/function_traits.hpp>


-/** Простейший кэш для свободной функции.
-  * Можете также передать статический метод класса или лямбду без захвата.
-  * Размер неограничен. Значения не устаревают.
-  * Для синхронизации используется mutex.
-  * Подходит только для простейших случаев.
+/** The simplest cache for a free function.
+  * You can also pass a static class method or lambda without capturing.
+  * The size is unlimited. Values are not obsolete.
+  * To synchronize, use mutex.
+  * Suitable only for the simplest cases.
  *
-  * Использование:
+  * Usage
  *
  * SimpleCache<decltype(func), &func> func_cached;
  * std::cerr << func_cached(args...);
@@ -41,7 +41,7 @@ public:
                return it->second;
        }

-        /// Сами вычисления делаются не под mutex-ом.
+        /// The calculations themselves are not done under mutex.
        Result res = f(std::forward<Args>(args)...);

        {

--- a/dbms/src/Common/SipHash.h
+++ b/dbms/src/Common/SipHash.h
 #pragma once

-/** SipHash - быстрая криптографическая хэш функция для коротких строк.
-  * Взято отсюда: https://www.131002.net/siphash/
+/** SipHash is a fast cryptographic hash function for short strings.
+  * Taken from here: https://www.131002.net/siphash/
  *
-  * Сделано два изменения:
-  * - возвращает 128 бит, а не 64;
-  * - сделано потоковой (можно вычислять по частям).
+  * Two changes are made:
+  * - returns 128 bits, not 64;
+  * - done streaming (can be calculated in parts).
  *
-  * На коротких строках (URL, поисковые фразы) более чем в 3 раза быстрее MD5 от OpenSSL.
-  * (~ 700 МБ/сек., 15 млн. строк в секунду)
+  * On short strings (URL, search phrases) more than 3 times faster than MD5 from OpenSSL.
+  * (~ 700 MB/sec, 15 million strings per second)
  */

 #include <cstdint>
@@ -33,16 +33,16 @@ private:
    using u64 = DB::UInt64;
    using u8 = DB::UInt8;

-    /// Состояние.
+    /// Status.
    u64 v0;
    u64 v1;
    u64 v2;
    u64 v3;

-    /// Сколько байт обработано.
+    /// How many bytes have been processed.
    u64 cnt;

-    /// Текущие 8 байт входных данных.
+    /// The current 8 bytes of input data.
    union
    {
        u64 current_word;
@@ -51,7 +51,7 @@ private:

    void finalize()
    {
-        /// В последний свободный байт пишем остаток от деления длины на 256.
+        /// In the last free byte, we write the remainder of the division by 256.
        current_bytes[7] = cnt;

        v3 ^= current_word;
@@ -67,10 +67,10 @@ private:
    }

 public:
-    /// Аргументы - seed.
+    /// Arguments - seed.
    SipHash(u64 k0 = 0, u64 k1 = 0)
    {
-        /// Инициализируем состояние некоторыми случайными байтами и seed-ом.
+        /// Initialize the state with some random bytes and seed.
        v0 = 0x736f6d6570736575ULL ^ k0;
        v1 = 0x646f72616e646f6dULL ^ k1;
        v2 = 0x6c7967656e657261ULL ^ k0;
@@ -84,7 +84,7 @@ public:
    {
        const char * end = data + size;

-        /// Дообработаем остаток от предыдущего апдейта, если есть.
+        /// We'll finish to process the remainder of the previous update, if any.
        if (cnt & 7)
        {
            while (cnt & 7 && data < end)
@@ -94,7 +94,7 @@ public:
                ++cnt;
            }

-            /// Если всё ещё не хватает байт до восьмибайтового слова.
+            /// If you still do not have enough bytes to an 8-byte word.
            if (cnt & 7)
                return;

@@ -118,7 +118,7 @@ public:
            data += 8;
        }

-        /// Заполняем остаток, которого не хватает до восьмибайтового слова.
+        /// Pad the remainder, which is missing up to an 8-byte word.
        current_word = 0;
        switch (end - data)
        {
@@ -133,7 +133,7 @@ public:
        }
    }

-    /// Получить результат в некотором виде. Это можно сделать только один раз!
+    /// Get the result in some form. This can only be done once!

    void get128(char * out)
    {

--- a/dbms/src/Common/SmallObjectPool.h
+++ b/dbms/src/Common/SmallObjectPool.h
@@ -73,7 +73,7 @@ public:
        free_list = block;
    }

-    /// Размер выделенного пула в байтах
+    /// The size of the allocated pool in bytes
    size_t size() const
    {
        return pool.size();

--- a/dbms/src/Common/StackTrace.h
+++ b/dbms/src/Common/StackTrace.h
@@ -6,14 +6,14 @@
 #define STACK_TRACE_MAX_DEPTH 32


-/// Позволяет получить стек-трейс
+/// Lets you get a stacktrace
 class StackTrace
 {
 public:
-    /// Стектрейс снимается в момент создания объекта
+    /// The stacktrace is captured when the object is created
    StackTrace();

-    /// Вывести в строку
+    /// Print to string
    std::string toString() const;

 private:

--- a/dbms/src/Common/StringSearcher.h
+++ b/dbms/src/Common/StringSearcher.h
@@ -26,8 +26,8 @@ namespace ErrorCodes
 }


-/** Варианты поиска подстроки в строке.
-  * В большинстве случаев, менее производительные, чем Volnitsky (см. Volnitsky.h).
+/** Variants for finding a substring in a string.
+  * In most cases, less productive than Volnitsky (see Volnitsky.h).
  */


@@ -693,10 +693,10 @@ using UTF8CaseSensitiveStringSearcher = StringSearcher<true, false>;
 using UTF8CaseInsensitiveStringSearcher = StringSearcher<false, false>;


-/** Используют функции из libc.
-  * Имеет смысл использовать для коротких строк, когда требуется дешёвая инициализация.
-  * Нет варианта для регистронезависимого поиска UTF-8 строк.
-  * Требуется, чтобы за концом строк был нулевой байт.
+/** Uses functions from libc.
+  * It makes sense to use short strings when cheap initialization is required.
+  * There is no option for register-independent search for UTF-8 strings.
+  * It is required that the end of the lines be zero byte.
  */

 struct LibCASCIICaseSensitiveStringSearcher

--- a/dbms/src/Common/Throttler.h
+++ b/dbms/src/Common/Throttler.h
@@ -15,12 +15,12 @@ namespace ErrorCodes
 }


-/** Позволяет ограничить скорость чего либо (в штуках в секунду) с помощью sleep.
-  * Особенности работы:
-  * - считается только средняя скорость, от момента первого вызова функции add;
-  *   если были периоды с низкой скоростью, то в течение промежутка времени после них, скорость будет выше;
+/** Allows you to limit the speed of something (in pieces per second) using sleep.
+  * Specifics of work:
+  * - only the average speed is considered, from the moment of the first call of `add` function;
+  *   if there were periods with low speed, then during some time after them, the speed will be higher;
  *
-  * Также позволяет задать ограничение на максимальное количество в штуках. При превышении кидается исключение.
+  * Also allows you to set a limit on the maximum number of pieces. If you exceed, an exception is thrown.
  */
 class Throttler
 {
@@ -56,7 +56,7 @@ public:

        if (max_speed)
        {
-            /// Сколько должно было бы пройти времени, если бы скорость была равна max_speed.
+            /// How much time would have gone for the speed to become `max_speed`.
            UInt64 desired_ns = new_count * 1000000000 / max_speed;

            if (desired_ns > elapsed_ns)
@@ -65,7 +65,7 @@ public:
                timespec sleep_ts;
                sleep_ts.tv_sec = sleep_ns / 1000000000;
                sleep_ts.tv_nsec = sleep_ns % 1000000000;
-                nanosleep(&sleep_ts, nullptr);    /// NOTE Завершается раньше в случае сигнала. Это считается нормальным.
+                nanosleep(&sleep_ts, nullptr);    /// NOTE Ends early in case of a signal. This is considered normal.
            }
        }
    }
@@ -73,7 +73,7 @@ public:
 private:
    size_t max_speed = 0;
    size_t count = 0;
-    size_t limit = 0;        /// 0 - не ограничено.
+    size_t limit = 0;        /// 0 - not limited.
    const char * limit_exceeded_exception_message = nullptr;
    Stopwatch watch {CLOCK_MONOTONIC_COARSE};
    std::mutex mutex;

--- a/dbms/src/Common/UInt128.h
+++ b/dbms/src/Common/UInt128.h
@@ -9,7 +9,7 @@ namespace DB
 {


-/// Для агрегации по SipHash или конкатенации нескольких полей.
+/// For aggregation by SipHash or concatenation of several fields.
 struct UInt128
 {
 /// Suppress gcc7 warnings: 'prev_key.DB::UInt128::first' may be used uninitialized in this function
@@ -57,7 +57,7 @@ struct UInt128HashCRC32

 #else

-/// На других платформах используем не обязательно CRC32. NOTE Это может сбить с толку.
+/// On other platforms we do not use CRC32. NOTE This can be confusing.
 struct UInt128HashCRC32 : public UInt128Hash {};

 #endif
@@ -71,7 +71,7 @@ inline void readBinary(UInt128 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
 inline void writeBinary(const UInt128 & x, WriteBuffer & buf) { writePODBinary(x, buf); }


-/** Используется при агрегации, для укладки большого количества ключей постоянной длины в хэш-таблицу.
+/** Used for aggregation, for putting a large number of constant-length keys in a hash table.
  */
 struct UInt256
 {
@@ -91,7 +91,7 @@ struct UInt256
    {
        return a == rhs.a && b == rhs.b && c == rhs.c && d == rhs.d;

-    /* Так получается не лучше.
+    /* So it's no better.
        return 0xFFFF == _mm_movemask_epi8(_mm_and_si128(
            _mm_cmpeq_epi8(
                _mm_loadu_si128(reinterpret_cast<const __m128i *>(&a)),
@@ -139,13 +139,13 @@ struct UInt256HashCRC32

 #else

-/// На других платформах используем не обязательно CRC32. NOTE Это может сбить с толку.
+/// We do not need to use CRC32 on other platforms. NOTE This can be confusing.
 struct UInt256HashCRC32
 {
    DefaultHash<UInt64> hash64;
    size_t operator()(UInt256 x) const
    {
-        /// TODO Это не оптимально.
+        /// TODO This is not optimal.
        return hash64(hash64(hash64(hash64(x.a) ^ x.b) ^ x.c) ^ x.d);
    }
 };

--- a/dbms/src/Common/UnicodeBar.h
+++ b/dbms/src/Common/UnicodeBar.h
@@ -8,7 +8,7 @@
 #define UNICODE_BAR_CHAR_SIZE (strlen("█"))


-/** Позволяет нарисовать unicode-art полоску, ширина которой отображается с разрешением 1/8 символа.
+/** Allows you to draw a unicode-art bar whose width is displayed with a resolution of 1/8 character.
  */


@@ -32,7 +32,7 @@ namespace UnicodeBar
        return ceil(width - 1.0 / 8) * UNICODE_BAR_CHAR_SIZE;
    }

-    /// В dst должно быть место для barWidthInBytes(width) символов и завершающего нуля.
+    /// In `dst` there must be a space for barWidthInBytes(width) characters and a trailing zero.
    inline void render(double width, char * dst)
    {
        size_t floor_width = floor(width);

--- a/dbms/src/Common/VirtualColumnUtils.h
+++ b/dbms/src/Common/VirtualColumnUtils.h
@@ -16,23 +16,23 @@ class Context;
 namespace VirtualColumnUtils
 {

-/// Вычислить минимальный числовый суффикс, который надо добавить к строке, чтобы она не присутствовала в множестве
+/// Calculate the minimum numeric suffix to add to the row so that it is not present in the set
 String chooseSuffix(const NamesAndTypesList & columns, const String & name);

-/// Вычислить минимальный общий числовый суффикс, который надо добавить к каждой строке,
-/// чтобы ни одна не присутствовала в множестве.
+/// Calculate the minimum total numeric suffix to add to each row,
+/// so that none is present in the set.
 String chooseSuffixForSet(const NamesAndTypesList & columns, const std::vector<String> & names);

-/// Добавляет в селект запрос секцию select column_name as value
-/// Например select _port as 9000.
+/// Adds to the select query section `select column_name as value`
+/// For example select _port as 9000.
 void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value);

-/// Оставить в блоке только строки, подходящие под секции WHERE и PREWHERE запроса.
-/// Рассматриваются только элементы внешней конъюнкции, зависящие только от столбцов, присутствующих в блоке.
-/// Возвращает true, если хоть одна строка выброшена.
+/// Leave in the block only the rows that fit under the WHERE clause and the PREWHERE clause of the query.
+/// Only elements of the outer conjunction are considered, depending only on the columns present in the block.
+/// Returns true if at least one row is discarded.
 bool filterBlockWithQuery(ASTPtr query, Block & block, const Context & context);

-/// Извлечь из входного потока множество значений столбца name
+/// Extract from the input stream a set of `name` column values
 template<typename T1>
 std::multiset<T1> extractSingleValueFromBlock(const Block & block, const String & name)
 {

--- a/dbms/src/Common/Volnitsky.h
+++ b/dbms/src/Common/Volnitsky.h
@@ -9,24 +9,24 @@
 #include <string.h>


-/** Поиск подстроки в строке по алгоритму Вольницкого:
+/** Search for a substring in a string by Volnitsky's algorithm
  * http://volnitsky.com/project/str_search/
  *
-  * haystack и needle могут содержать нулевые байты.
+  * `haystack` and `needle` can contain null bytes.
  *
-  * Алгоритм:
-  * - при слишком маленьком или слишком большом размере needle, или слишком маленьком haystack, используем std::search или memchr;
-  * - при инициализации, заполняем open-addressing linear probing хэш-таблицу вида:
-  *    хэш от биграммы из needle -> позиция этой биграммы в needle + 1.
-  *    (прибавлена единица только чтобы отличить смещение ноль от пустой ячейки)
-  * - в хэш-таблице ключи не хранятся, хранятся только значения;
-  * - биграммы могут быть вставлены несколько раз, если они встречаются в needle несколько раз;
-  * - при поиске, берём из haystack биграмму, которая должна соответствовать последней биграмме needle (сравниваем с конца);
-  * - ищем её в хэш-таблице, если нашли - достаём смещение из хэш-таблицы и сравниваем строку побайтово;
-  * - если сравнить не получилось - проверяем следующую ячейку хэш-таблицы из цепочки разрешения коллизий;
-  * - если не нашли, пропускаем в haystack почти размер needle байт;
+  * Algorithm:
+  * - if the `needle` is too small or too large, or too small `haystack`, use std::search or memchr;
+  * - when initializing, fill in an open-addressing linear probing hash table of the form
+  *    hash from the bigram of needle -> the position of this bigram in needle + 1.
+  *    (one is added only to distinguish zero offset from an empty cell)
+  * - the keys are not stored in the hash table, only the values are stored;
+  * - bigrams can be inserted several times if they occur in the needle several times;
+  * - when searching, take from haystack bigram, which should correspond to the last bigram of needle (comparing from the end);
+  * - look for it in the hash table, if found - get the offset from the hash table and compare the string bytewise;
+  * - if it did not work, we check the next cell of the hash table from the collision resolution chain;
+  * - if not found, skip to haystack almost the size of the needle bytes;
  *
-  * Используется невыровненный доступ к памяти.
+  * Unaligned memory access is used.
  */


@@ -39,28 +39,28 @@ template <typename CRTP>
 class VolnitskyBase
 {
 protected:
-    using offset_t = uint8_t;    /// Смещение в needle. Для основного алгоритма, длина needle не должна быть больше 255.
-    using ngram_t = uint16_t;    /// n-грамма (2 байта).
+    using offset_t = uint8_t;    /// Offset in the needle. For the basic algorithm, the length of the needle must not be greater than 255.
+    using ngram_t = uint16_t;    /// n-gram (2 bytes).

    const UInt8 * const needle;
    const size_t needle_size;
    const UInt8 * const needle_end = needle + needle_size;
-    /// На сколько двигаемся, если n-грамма из haystack не нашлась в хэш-таблице.
+    /// For how long we move, if the n-gram from haystack is not found in the hash table.
    const size_t step = needle_size - sizeof(ngram_t) + 1;

    /** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
     *    storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
-    static const size_t hash_size = 64 * 1024;    /// Помещается в L2-кэш.
-    offset_t hash[hash_size];    /// Хэш-таблица.
+    static const size_t hash_size = 64 * 1024;    /// Fits into the L2 cache.
+    offset_t hash[hash_size];    /// Hash table.

    /// min haystack size to use main algorithm instead of fallback
    static constexpr auto min_haystack_size_for_algorithm = 20000;
-    const bool fallback;                /// Нужно ли использовать fallback алгоритм.
+    const bool fallback;                /// Do I need to use the fallback algorithm.

 public:
-    /** haystack_size_hint - ожидаемый суммарный размер haystack при вызовах search. Можно не указывать.
-      * Если указать его достаточно маленьким, то будет использован fallback алгоритм,
-      *  так как считается, что тратить время на инициализацию хэш-таблицы не имеет смысла.
+    /** haystack_size_hint - the expected total size of the haystack for `search` calls. Can not specify.
+      * If you specify it small enough, the fallback algorithm will be used,
+      *  since it is considered that it's useless to waste time initializing the hash table.
      */
    VolnitskyBase(const char * const needle, const size_t needle_size, size_t haystack_size_hint = 0)
    : needle{reinterpret_cast<const UInt8 *>(needle)}, needle_size{needle_size},
@@ -79,7 +79,7 @@ public:
    }


-    /// Если не найдено - возвращается конец haystack.
+    /// If not found, the end of the haystack is returned.
    const UInt8 * search(const UInt8 * const haystack, const size_t haystack_size) const
    {
        if (needle_size == 0)
@@ -90,15 +90,15 @@ public:
        if (needle_size == 1 || fallback || haystack_size <= needle_size)
            return self().search_fallback(haystack, haystack_end);

-        /// Будем "прикладывать" needle к haystack и сравнивать n-грам из конца needle.
+        /// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle.
        const auto * pos = haystack + needle_size - sizeof(ngram_t);
        for (; pos <= haystack_end - needle_size; pos += step)
        {
-            /// Смотрим все ячейки хэш-таблицы, которые могут соответствовать n-граму из haystack.
+            /// We look at all the cells of the hash table that can correspond to the n-gram from haystack.
            for (size_t cell_num = toNGram(pos) % hash_size; hash[cell_num];
                 cell_num = (cell_num + 1) % hash_size)
            {
-                /// Когда нашли - сравниваем побайтово, используя смещение из хэш-таблицы.
+                /// When found - compare bytewise, using the offset from the hash table.
                const auto res = pos - (hash[cell_num] - 1);

                if (self().compare(res))
@@ -106,7 +106,7 @@ public:
            }
        }

-        /// Оставшийся хвостик.
+        /// The remaining tail.
        return self().search_fallback(pos - step + 1, haystack_end);
    }

@@ -126,11 +126,11 @@ protected:

    void putNGramBase(const ngram_t ngram, const int offset)
    {
-        /// Кладём смещение для n-грама в соответствующую ему ячейку или ближайшую свободную.
+        /// Put the offset for the n-gram in the corresponding cell or the nearest free cell.
        size_t cell_num = ngram % hash_size;

        while (hash[cell_num])
-            cell_num = (cell_num + 1) % hash_size; /// Поиск следующей свободной ячейки.
+            cell_num = (cell_num + 1) % hash_size; /// Search for the next free cell.

        hash[cell_num] = offset;
    }
@@ -272,15 +272,15 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
        }
        else
        {
-            /** n-грам (в случае n = 2)
-              *  может быть целиком расположен внутри одной кодовой точки,
-              *  либо пересекаться с двумя кодовыми точками.
+            /** n-gram (in the case of n = 2)
+              *  can be entirely located within one code point,
+              *  or intersect with two code points.
              *
-              * В первом случае, нужно рассматривать до двух альтернатив - эта кодовая точка в верхнем и нижнем регистре,
-              *  а во втором случае - до четырёх альтернатив - фрагменты двух кодовых точек во всех комбинациях регистров.
+              * In the first case, you need to consider up to two alternatives - this code point in upper and lower case,
+              *  and in the second case - up to four alternatives - fragments of two code points in all combinations of registers.
              *
-              * При этом не учитывается зависимость перевода между регистрами от локали (пример - турецкие Ii)
-              *  а также композиция/декомпозиция и другие особенности.
+              * It does not take into account the dependence of the transformation between the registers from the locale (for example - Turkish `Ii`)
+              *  as well as composition / decomposition and other features.
              */

            using Seq = UInt8[6];

--- a/dbms/src/Common/formatReadable.h
+++ b/dbms/src/Common/formatReadable.h
@@ -4,14 +4,14 @@
 #include <IO/WriteBuffer.h>


-/// Выводит переданный размер в байтах в виде 123.45 GiB.
+/// Displays the transmitted size in bytes as 123.45 GiB.
 void formatReadableSizeWithBinarySuffix(double value, DB::WriteBuffer & out, int precision = 2);
 std::string formatReadableSizeWithBinarySuffix(double value, int precision = 2);

-/// Выводит переданный размер в байтах в виде 132.55 GB.
+/// Displays the transmitted size in bytes as 132.55 GB.
 void formatReadableSizeWithDecimalSuffix(double value, DB::WriteBuffer & out, int precision = 2);
 std::string formatReadableSizeWithDecimalSuffix(double value, int precision = 2);

-/// Выводит число в виде 123.45 billion.
+/// Prints the number as 123.45 billion.
 void formatReadableQuantity(double value, DB::WriteBuffer & out, int precision = 2);
 std::string formatReadableQuantity(double value, int precision = 2);
--- a/dbms/src/Common/getFQDNOrHostName.h
+++ b/dbms/src/Common/getFQDNOrHostName.h
@@ -2,7 +2,7 @@

 #include <string>

-/** Получить FQDN для локального сервера путём DNS-резолвинга hostname - аналогично вызову утилиты hostname с флагом -f.
-  * Если не получилось отрезолвить, то вернуть hostname - аналогично вызову утилиты hostname без флагов или uname -n.
+/** Get the FQDN for the local server by resolving DNS hostname - similar to calling the hostname utility with the -f flag.
+  * If it does not work, return hostname - similar to calling hostname without flags or uname -n.
  */
 const std::string & getFQDNOrHostName();
--- a/dbms/src/Common/isLocalAddress.h
+++ b/dbms/src/Common/isLocalAddress.h
@@ -12,13 +12,13 @@ namespace Poco
 namespace DB
 {

-    /** Позволяет проверить, похож ли адрес на localhost.
-     * Цель этой проверки обычно состоит в том, чтобы сделать предположение,
-     *  что при хождении на этот адрес через интернет, мы попадём на себя.
-     * Следует иметь ввиду, что эта проверка делается неточно:
-     * - адрес просто сравнивается с адресами сетевых интерфейсов;
-     * - для каждого сетевого интерфейса берётся только первый адрес;
-     * - не проверяются правила маршрутизации, которые влияют, через какой сетевой интерфейс мы пойдём на заданный адрес.
+    /** Lets you check if the address is similar to `localhost`.
+     * The purpose of this check is usually to make an assumption,
+     *  that when we go to this address via the Internet, we'll get to ourselves.
+     * Please note that this check is not accurate:
+     * - the address is simply compared to the addresses of the network interfaces;
+     * - only the first address is taken for each network interface;
+     * - the routing rules that affect which network interface we go to the specified address are not checked.
     */
    bool isLocalAddress(const Poco::Net::SocketAddress & address);


--- a/dbms/src/Common/localBackup.h
+++ b/dbms/src/Common/localBackup.h
@@ -3,14 +3,14 @@
 #include <Poco/Path.h>


-/** Создаёт локальный (в той же точке монтирования) бэкап (снэпшот) директории.
+/** Creates a local (at the same mount point) backup (snapshot) directory.
  *
-  * В указанной destination-директории создаёт hard link-и на все файлы source-директории
-  *  и во всех вложенных директориях, с сохранением (созданием) всех относительных путей;
-  *  а также делает chown, снимая разрешение на запись.
+  * In the specified destination directory, it creates a hard links on all source-directory files
+  *  and in all nested directories, with saving (creating) all relative paths;
+  *  and also `chown`, removing the write permission.
  *
-  * Это защищает данные от случайного удаления или модификации,
-  *  и предназначено для использования как простое средство защиты от человеческой или программной ошибки,
-  *  но не от аппаратного сбоя.
+  * This protects data from accidental deletion or modification,
+  *  and is intended to be used as a simple means of protection against a human or program error,
+  *  but not from a hardware failure.
  */
 void localBackup(Poco::Path source_path, Poco::Path destination_path);
--- a/dbms/src/Common/setThreadName.h
+++ b/dbms/src/Common/setThreadName.h
 #pragma once

-/** Устанавливает имя потока (максимальная длина - 15 байт),
-  *  которое будет видно в ps, gdb, /proc,
-  *  для удобства наблюдений и отладки.
+/** Sets the thread name (maximum length is 15 bytes),
+  *  which will be visible in ps, gdb, /proc,
+  *  for convenience of observation and debugging.
  */
 void setThreadName(const char * name);
--- a/dbms/src/Common/typeid_cast.h
+++ b/dbms/src/Common/typeid_cast.h
@@ -16,9 +16,9 @@ namespace DB
 }


-/** Проверяет совпадение типа путём сравнения typeid-ов.
-  * Проверяется точное совпадение типа. То есть, cast в предка будет неуспешным.
-  * В остальном, ведёт себя как dynamic_cast.
+/** Checks match of type by comparing typeid.
+  * The exact match of the type is checked. That is, cast in the ancestor will be unsuccessful.
+  * In the rest, behaves like a dynamic_cast.
  */
 template <typename To, typename From>
 typename std::enable_if<std::is_reference<To>::value, To>::type typeid_cast(From & from)

--- a/release_lib.sh
+++ b/release_lib.sh
@@ -8,8 +8,8 @@ function make_control {
    true
 }

-# Генерируем номер ревизии.
-# выставляются переменные окружения REVISION, AUTHOR
+# Generate revision number.
+# set environment variables REVISION, AUTHOR
 function gen_revision_author {
    REVISION=$(get_revision)

@@ -87,8 +87,8 @@ function get_revision_author {
    export AUTHOR
 }

-# Генерируем changelog из changelog.in.
-# изменяет
+# Generate changelog from changelog.in.
+# changes
 #   programs/CMakeLists.txt
 #   dbms/src/CMakeLists.txt
 function gen_changelog {
@@ -105,11 +105,11 @@ function gen_changelog {
        < $CHLOG.in > $CHLOG
 }

-# Загрузка в репозитории Метрики
-# рабочая директория - где лежит сам скрипт
+# Upload to Metrica repository
+# working directory - where script is itself
 function upload_debs {
    REVISION="$1"
-    # Определим репозиторий, в который надо загружать пакеты. Он соответствует версии Ubuntu.
+    # Determine the repository, in which you need to upload the packages. It corresponds to the version of Ubuntu.
    source /etc/lsb-release

    if [ "$DISTRIB_CODENAME" == "precise" ]; then
@@ -122,7 +122,7 @@ function upload_debs {
        echo -e "\n\e[0;31mUnknown Ubuntu version $DISTRIB_CODENAME \e[0;0m\n"
    fi

-    # Загрузка в репозиторий Метрики.
+    # Upload to Metrica repository.

    cd ../
    DUPLOAD_CONF=dupload.conf