translate comments

5ffa35cf · f1yegor · alexey-milovidov · e9d9abf3 · 5ffa35cf · 5ffa35cf
12 changed file
--- a/dbms/src/Core/Block.h
+++ b/dbms/src/Core/Block.h
@@ -42,20 +42,20 @@ public:
    Block(std::initializer_list<ColumnWithTypeAndName> il);
    Block(const ColumnsWithTypeAndName & data_);

-    /// вставить столбец в заданную позицию
+    /// insert the column at the specified position
    void insert(size_t position, const ColumnWithTypeAndName & elem);
    void insert(size_t position, ColumnWithTypeAndName && elem);
-    /// вставить столбец в конец
+    /// insert the column to the end
    void insert(const ColumnWithTypeAndName & elem);
    void insert(ColumnWithTypeAndName && elem);
-    /// вставить столбец в конец, если столбца с таким именем ещё нет
+    /// insert the column to the end, if there is no column with that name yet
    void insertUnique(const ColumnWithTypeAndName & elem);
    void insertUnique(ColumnWithTypeAndName && elem);
-    /// удалить столбец в заданной позиции
+    /// remove the column at the specified position
    void erase(size_t position);
-    /// удалить столбец с заданным именем
+    /// remove the column with the specified name
    void erase(const String & name);
-    /// Добавляет в блок недостающие столбцы со значениями по-умолчанию
+    /// Adds missing columns to the block with default values
    void addDefaults(const NamesAndTypesList & required_columns);

    /// References are invalidated after calling functions above.
@@ -90,23 +90,23 @@ public:
    operator bool() const { return !data.empty(); }
    bool operator!() const { return data.empty(); }

-    /** Получить список имён столбцов через запятую. */
+    /** Get a list of column names separated by commas. */
    std::string dumpNames() const;

-    /** Список имен, типов и длин столбцов. Предназначен для отладки. */
+     /** List of names, types and lengths of columns. Designed for debugging. */
    std::string dumpStructure() const;

-    /** Получить такой же блок, но пустой. */
+    /** Get the same block, but empty. */
    Block cloneEmpty() const;

-    /** Получить блок со столбцами, переставленными в порядке их имён. */
+    /** Get a block with columns that have been rearranged in the order of their names. */
    Block sortColumns() const;

-    /** Заменяет столбцы смещений внутри вложенных таблиц на один общий для таблицы.
-     *  Кидает исключение, если эти смещения вдруг оказались неодинаковы.
+    /** Replaces the offset columns within the nested tables by one common for the table.
+     *  Throws an exception if these offsets suddenly turn out to be different.
     */
    void optimizeNestedArraysOffsets();
-    /** Тоже самое, только без замены смещений. */
+    /** The same, only without changing the offsets. */
    void checkNestedArraysOffsets() const;

    void clear();
@@ -128,15 +128,15 @@ using Blocks = std::vector<Block>;
 using BlocksList = std::list<Block>;


-/// Сравнить типы столбцов у блоков. Порядок столбцов имеет значение. Имена не имеют значения.
+/// Compare column types for blocks. The order of the columns matters. Names do not matter.
 bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs);

 /// Calculate difference in structure of blocks and write description into output strings.
 void getBlocksDifference(const Block & lhs, const Block & rhs, std::string & out_lhs_diff, std::string & out_rhs_diff);


-/** Дополнительные данные к блокам. Они пока нужны только для запроса
-  * DESCRIBE TABLE с Distributed-таблицами.
+/** Additional data to the blocks. They are only needed for a query
+  * DESCRIBE TABLE with Distributed tables.
  */
 struct BlockExtraInfo
 {

--- a/dbms/src/Core/BlockInfo.h
+++ b/dbms/src/Core/BlockInfo.h
@@ -9,20 +9,20 @@ namespace DB
 class ReadBuffer;
 class WriteBuffer;

-/** Дополнительная информация о блоке.
+/** More information about the block.
  */
 struct BlockInfo
 {
    /** is_overflows:
-      * После выполнения GROUP BY ... WITH TOTALS с настройками max_rows_to_group_by и group_by_overflow_mode = 'any',
-      *  в отдельный блок засовывается строчка с аргегированными значениями, не прошедшими max_rows_to_group_by.
-      * Если это такой блок, то для него is_overflows выставляется в true.
+      * After running GROUP BY ... WITH TOTALS with the max_rows_to_group_by and group_by_overflow_mode = 'any' settings,
+      *  a row is inserted in the separate block with aggregated values that have not passed max_rows_to_group_by.
+      * If it is such a block, then is_overflows is set to true for it.
      */

    /** bucket_num:
-      * При использовании двухуровневого метода агрегации, данные с разными группами ключей раскидываются по разным корзинам.
-      * В таком случае здесь указывается номер корзины. Он используется для оптимизации слияния при распределённой аргегации.
-      * Иначе - -1.
+      * When using the two-level aggregation method, data with different key groups are scattered across different buckets.
+      * In this case, the bucket number is indicated here. It is used to optimize the merge for distributed aggregation.
+      * Otherwise -1.
      */

 #define APPLY_FOR_BLOCK_INFO_FIELDS(M) \
@@ -36,10 +36,10 @@ struct BlockInfo

 #undef DECLARE_FIELD

-    /// Записать значения в бинарном виде. NOTE: Можно было бы использовать protobuf, но он был бы overkill для данного случая.
+    /// Write the values in binary form. NOTE: You could use protobuf, but it would be overkill for this case.
    void write(WriteBuffer & out) const;

-    /// Прочитать значения в бинарном виде.
+    /// Read the values in binary form.
    void read(ReadBuffer & in);
 };


--- a/dbms/src/Core/Defines.h
+++ b/dbms/src/Core/Defines.h
@@ -16,30 +16,30 @@
 #define DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC                   5
 #define DBMS_DEFAULT_POLL_INTERVAL                                 10

-/// Размер буфера ввода-вывода по-умолчанию.
+/// The size of the I/O buffer by default.
 #define DBMS_DEFAULT_BUFFER_SIZE                                 1048576ULL

-/// При записи данных, для сжатия выделяется буфер размером max_compress_block_size. При переполнении буфера или если в буфер
-/// записано данных больше или равно, чем min_compress_block_size, то при очередной засечке, данные так же будут сжиматься
-/// В результате, для маленьких столбцов (числа 1-8 байт), при index_granularity = 8192, размер блока будет 64 KБ.
-/// А для больших столбцов (Title - строка ~100 байт), размер блока будет ~819 КБ. За счёт этого, коэффициент сжатия почти не ухудшится.
+/// When writing data, a buffer of `max_compress_block_size` size is allocated for compression. When the buffer overflows or if into the buffer
+/// more or equal data is written than `min_compress_block_size`, then with the next mark, the data will also compressed
+/// As a result, for small columns (numbers 1-8 bytes), with index_granularity = 8192, the block size will be 64 KB.
+/// And for large columns (Title - string ~100 bytes), the block size will be ~819 KB. Due to this, the compression ratio almost does not get worse.
 #define DEFAULT_MIN_COMPRESS_BLOCK_SIZE                            65536
 #define DEFAULT_MAX_COMPRESS_BLOCK_SIZE                         1048576

-/** Какими блоками по-умолчанию читаются данные (в числе строк).
-  * Меньшие значения дают лучшую кэш-локальность, меньшее потребление оперативки, но больший оверхед на обработку запроса.
+/** Which blocks by default read the data (by number of rows).
+  * Smaller values give better cache locality, less consumption of RAM, but more overhead to process the query.
  */
 #define DEFAULT_BLOCK_SIZE                                         65536

-/** Какие блоки следует формировать для вставки в таблицу, если мы управляем формированием блоков.
-  * (Иногда в таблицу вставляются ровно такие блоки, какие были считаны / переданы извне, и на их размер этот параметр не влияет.)
-  * Больше, чем DEFAULT_BLOCK_SIZE, так как в некоторых таблицах на каждый блок создаётся кусок данных на диске (довольно большая штука),
-  *  и если бы куски были маленькими, то их было бы накладно потом объединять.
+/** Which blocks should be formed for insertion into the table, if we control the formation of blocks.
+  * (Sometimes the blocks are inserted exactly such blocks that have been read / transmitted from the outside, and this parameter does not affect their size.)
+  * More than DEFAULT_BLOCK_SIZE, because in some tables a block of data on the disk is created for each block (quite a big thing),
+  *  and if the parts were small, then it would be costly then to combine them.
  */
 #define DEFAULT_INSERT_BLOCK_SIZE                                1048576

-/** То же самое, но для операций слияния. Меньше DEFAULT_BLOCK_SIZE для экономии оперативки (так как читаются все столбцы).
-  * Сильно меньше, так как бывают 10-way слияния.
+/** The same, but for merge operations. Less DEFAULT_BLOCK_SIZE for saving RAM (since all the columns are read).
+  * Significantly less, since there are 10-way mergers.
  */
 #define DEFAULT_MERGE_BLOCK_SIZE                                 8192

@@ -49,16 +49,16 @@
 #define DEFAULT_INTERACTIVE_DELAY                                100000
 #define DBMS_DEFAULT_DISTRIBUTED_CONNECTIONS_POOL_SIZE             1024
 #define DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES     3
-/// каждый период уменьшаем счетчик ошибок в 2 раза
-/// слишком маленький период может приводить, что ошибки исчезают сразу после создания.
+/// each period reduces the error counter by 2 times
+/// too short a period can cause errors to disappear immediately after creation.
 #define DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD     (2 * DBMS_DEFAULT_SEND_TIMEOUT_SEC)
-#define DEFAULT_QUERIES_QUEUE_WAIT_TIME_MS                         5000    /// Максимальное время ожидания в очереди запросов.
+#define DEFAULT_QUERIES_QUEUE_WAIT_TIME_MS                         5000    /// Maximum waiting time in the request queue.
 #define DBMS_DEFAULT_BACKGROUND_POOL_SIZE                        16

-/// Используется в методе reserve, когда известно число строк, но неизвестны их размеры.
+/// Used in the `reserve` method, when the number of rows is known, but their dimensions are unknown.
 #define DBMS_APPROX_STRING_SIZE 64

-/// Суффикс имени для столбца, содержащего смещения массива.
+/// Name suffix for the column containing the array offsets.
 #define ARRAY_SIZES_COLUMN_NAME_SUFFIX                             ".size"

 #define DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES                    50264
@@ -74,7 +74,7 @@

 #define DBMS_DISTRIBUTED_DIRECTORY_MONITOR_SLEEP_TIME_MS        100

-/// Граница, на которых должны быть выровнены блоки для асинхронных файловых операций.
+/// The boundary on which the blocks for asynchronous file operations should be aligned.
 #define DEFAULT_AIO_FILE_BLOCK_SIZE                                4096

 #define DEFAULT_QUERY_LOG_FLUSH_INTERVAL_MILLISECONDS        7500

--- a/dbms/src/Core/Field.h
+++ b/dbms/src/Core/Field.h
@@ -31,13 +31,13 @@ STRONG_TYPEDEF(TupleBackend, Tuple); /// Array and Tuple are different types wit
 #define DBMS_MIN_FIELD_SIZE 32


-/** Discriminated union из нескольких типов.
-  * Сделан для замены boost::variant:
-  *  является не обобщённым,
-  *  зато несколько более эффективным, и более простым.
+/** Discriminated union of several types.
+  * Made for replacement of `boost::variant`
+  *  is not generalized,
+  *  but somewhat more efficient, and simpler.
  *
-  * Используется для представления единичного значения одного из нескольких типов в оперативке.
-  * Внимание! Предпочтительно вместо единичных значений хранить кусочки столбцов. См. Column.h
+  * Used to represent a unit value of one of several types in the RAM.
+  * Warning! Preferably, instead of single values, store the pieces of the columns. See Column.h
  */
 class Field
 {
@@ -80,7 +80,7 @@ public:
    };


-    /// Позволяет получить идентификатор для типа или наоборот.
+    /// Returns an identifier for the type or vice versa.
    template <typename T> struct TypeToEnum;
    template <Types::Which which> struct EnumToType;

@@ -90,8 +90,8 @@ public:
    {
    }

-    /** Не смотря на наличие шаблонного конструктора, этот конструктор всё-равно нужен,
-      *  так как при его отсутствии, компилятор всё-равно сгенерирует конструктор по-умолчанию.
+    /** Despite the presence of a template constructor, this constructor is still needed,
+      *  since, in its absence, the compiler will still generate the default constructor.
      */
    Field(const Field & rhs)
    {
@@ -110,7 +110,7 @@ public:
        createConcrete(std::forward<T>(rhs));
    }

-    /// Создать строку inplace.
+    /// Create a string inplace.
    Field(const char * data, size_t size)
    {
        create(data, size);
@@ -491,13 +491,13 @@ typename NearestFieldType<T>::Type nearestFieldType(const T & x)
 class ReadBuffer;
 class WriteBuffer;

-/// Предполагается что у всех элементов массива одинаковый тип.
+/// It is assumed that all elements of the array have the same type.
 void readBinary(Array & x, ReadBuffer & buf);

 inline void readText(Array & x, ReadBuffer & buf)             { throw Exception("Cannot read Array.", ErrorCodes::NOT_IMPLEMENTED); }
 inline void readQuoted(Array & x, ReadBuffer & buf)         { throw Exception("Cannot read Array.", ErrorCodes::NOT_IMPLEMENTED); }

-/// Предполагается что у всех элементов массива одинаковый тип.
+/// It is assumed that all elements of the array have the same type.
 void writeBinary(const Array & x, WriteBuffer & buf);

 void writeText(const Array & x, WriteBuffer & buf);

--- a/dbms/src/Core/NamesAndTypes.h
+++ b/dbms/src/Core/NamesAndTypes.h
@@ -44,22 +44,22 @@ public:
    String toString() const;
    static NamesAndTypesList parse(const String & s);

-    /// Все элементы rhs должны быть различны.
+    /// All `rhs` elements must be different.
    bool isSubsetOf(const NamesAndTypesList & rhs) const;

-    /// Расстояние Хемминга между множествами
-    ///  (иными словами, добавленные и удаленные столбцы считаются один раз; столбцы, изменившие тип, - дважды).
+    /// Hamming distance between sets
+    ///  (in other words, the added and deleted columns are counted once, the columns that changed the type - twice).
    size_t sizeOfDifference(const NamesAndTypesList & rhs) const;

    Names getNames() const;

-    /// Оставить только столбцы, имена которых есть в names. В names могут быть лишние столбцы.
+    /// Leave only the columns whose names are in the `names`. In `names` there can be superfluous columns.
    NamesAndTypesList filter(const NameSet & names) const;

-    /// Оставить только столбцы, имена которых есть в names. В names могут быть лишние столбцы.
+    /// Leave only the columns whose names are in the `names`. In `names` there can be superfluous columns.
    NamesAndTypesList filter(const Names & names) const;

-    /// В отличие от filter, возвращает столбцы в том порядке, в котором они идут в names.
+    /// Unlike `filter`, returns columns in the order in which they go in `names`.
    NamesAndTypesList addTypes(const Names & names) const;
 };


--- a/dbms/src/Core/QueryProcessingStage.h
+++ b/dbms/src/Core/QueryProcessingStage.h
@@ -6,15 +6,15 @@
 namespace DB
 {

-/// До какой стадии выполнен или нужно выполнить SELECT запрос.
+/// Up to what stage the SELECT query is executed or needs to be executed.
 namespace QueryProcessingStage
 {
-    /// Номера имеют значение - более поздняя стадия имеет больший номер.
+    /// Numbers matter - the later stage has a larger number.
    enum Enum
    {
-        FetchColumns        = 0,    /// Только прочитать/прочитаны указанные в запросе столбцы.
-        WithMergeableState     = 1,    /// До стадии, когда результаты обработки на разных серверах можно объединить.
-        Complete             = 2,    /// Полностью.
+        FetchColumns        = 0,    /// Only read/have been read the columns specified in the query.
+        WithMergeableState  = 1,    /// Until the stage where the results of processing on different servers can be combined.
+        Complete            = 2,    /// Completely.
    };

    inline const char * toString(UInt64 stage)

--- a/dbms/src/Core/Row.h
+++ b/dbms/src/Core/Row.h
@@ -9,8 +9,8 @@
 namespace DB
 {

-/** Тип данных для представления одной строки таблицы в оперативке.
-  * Внимание! Предпочтительно вместо единичных строк хранить блоки столбцов. См. Block.h
+/** The data type for representing one row of the table in the RAM.
+  * Warning! It is preferable to store column blocks instead of single rows. See Block.h
  */

 using Row = AutoArray<Field>;

--- a/dbms/src/Core/SortCursor.h
+++ b/dbms/src/Core/SortCursor.h
@@ -35,7 +35,7 @@ struct SortCursorImpl
    /** Should we use Collator to sort a column? */
    NeedCollationFlags need_collation;

-    /** Есть ли хотя бы один столбец с Collator. */
+    /** Is there at least one column with Collator. */
    bool has_collation = false;

    SortCursorImpl() {}
@@ -48,7 +48,7 @@ struct SortCursorImpl

    bool empty() const { return rows == 0; }

-    /// Установить курсор в начало нового блока.
+    /// Set the cursor to the beginning of the new block.
    void reset(const Block & block)
    {
        all_columns.clear();
@@ -81,7 +81,7 @@ struct SortCursorImpl
 };


-/// Для лёгкости копирования.
+/// For easy copying.
 struct SortCursor
 {
    SortCursorImpl * impl;
@@ -90,7 +90,7 @@ struct SortCursor
    SortCursorImpl * operator-> () { return impl; }
    const SortCursorImpl * operator-> () const { return impl; }

-    /// Указанная строка данного курсора больше указанной строки другого курсора.
+    /// The specified row of this cursor is greater than the specified row of another cursor.
    bool greaterAt(const SortCursor & rhs, size_t lhs_pos, size_t rhs_pos) const
    {
        for (size_t i = 0; i < impl->sort_columns_size; ++i)
@@ -106,13 +106,13 @@ struct SortCursor
        return impl->order > rhs.impl->order;
    }

-    /// Проверяет, что все строки в текущем блоке данного курсора меньше или равны, чем все строки текущего блока другого курсора.
+    /// Checks that all rows in the current block of this cursor are less than or equal to all the rows of the current block of another cursor.
    bool totallyLessOrEquals(const SortCursor & rhs) const
    {
        if (impl->rows == 0 || rhs.impl->rows == 0)
            return false;

-        /// Последняя строка данного курсора не больше первой строки другого.
+        /// The last row of this cursor is no larger than the first row of the another cursor.
        return !greaterAt(rhs, impl->rows - 1, 0);
    }

@@ -121,7 +121,7 @@ struct SortCursor
        return greaterAt(rhs, impl->pos, rhs.impl->pos);
    }

-    /// Инвертировано, чтобы из priority queue элементы вынимались в порядке по возрастанию.
+    /// Inverted so that the priority queue elements are removed in ascending order.
    bool operator< (const SortCursor & rhs) const
    {
        return greater(rhs);
@@ -129,7 +129,7 @@ struct SortCursor
 };


-/// Отдельный компаратор для locale-sensitive сравнения строк
+/// Separate comparator for locale-sensitive string comparisons
 struct SortCursorWithCollation
 {
    SortCursorImpl * impl;
@@ -167,7 +167,7 @@ struct SortCursorWithCollation
        if (impl->rows == 0 || rhs.impl->rows == 0)
            return false;

-        /// Последняя строка данного курсора не больше первой строки другого.
+        /// The last row of this cursor is no larger than the first row of the another cursor.
        return !greaterAt(rhs, impl->rows - 1, 0);
    }


--- a/dbms/src/Core/SortDescription.h
+++ b/dbms/src/Core/SortDescription.h
@@ -11,11 +11,11 @@ class Collator;
 namespace DB
 {

-/// Описание правила сортировки по одному столбцу.
+/// Description of the sorting rule by one column.
 struct SortColumnDescription
 {
-    std::string column_name;                        /// Имя столбца.
-    size_t column_number;                    /// Номер столбца (используется, если не задано имя).
+    std::string column_name;                        /// The name of the column.
+    size_t column_number;                    /// Column number (used if no name is given).
    int direction;                            /// 1 - ascending, -1 - descending.
    int nulls_direction;                    /// 1 - NULLs and NaNs are greater, -1 - less.
                                            /// To achieve NULLS LAST, set it equal to direction, to achieve NULLS FIRST, set it opposite.
@@ -31,7 +31,7 @@ struct SortColumnDescription
    std::string getID() const;
 };

-/// Описание правила сортировки по нескольким столбцам.
+/// Description of the sorting rule for several columns.
 using SortDescription = std::vector<SortColumnDescription>;

 }

--- a/dbms/src/Core/StringRef.h
+++ b/dbms/src/Core/StringRef.h
@@ -15,7 +15,7 @@
 #include <Common/unaligned.h>


-/// Штука, чтобы не создавать строки для поиска подстроки в хэш таблице.
+/// The thing to avoid creating strings to find substrings in the hash table.
 struct StringRef
 {
    const char * data = nullptr;
@@ -37,9 +37,9 @@ using UInt64 = DB::UInt64;

 #if __SSE2__

-/** Сравнение строк на равенство.
-  * Подход является спорным и выигрывает не во всех случаях.
-  * Подробнее смотрите hash_map_string_2.cpp
+/** Compare strings for equality.
+  * The approach is controversial and does not win in all cases.
+  * For more information, see hash_map_string_2.cpp
  */

 inline bool compareSSE2(const char * p1, const char * p2)
@@ -153,12 +153,12 @@ inline bool operator> (StringRef lhs, StringRef rhs)
 }


-/** Хэш-функции.
-  * Можно использовать либо CityHash64,
-  *  либо функцию на основе инструкции crc32,
-  *  которая является заведомо менее качественной, но на реальных наборах данных,
-  *  при использовании в хэш-таблице, работает существенно быстрее.
-  * Подробнее см. hash_map_string_3.cpp
+/** Hash functions.
+  * You can use either CityHash64,
+  *  or a function based on the crc32 statement,
+  *  which is obviously less qualitative, but on real data sets,
+  *  when used in a hash table, works much faster.
+  * For more information, see hash_map_string_3.cpp
  */

 struct StringRefHash64
@@ -183,7 +183,7 @@ inline UInt64 _mm_crc32_u64(UInt64 crc, UInt64 value)

 #endif

-/// Кусочки взяты из CityHash.
+/// Parts are taken from CityHash.

 inline UInt64 hashLen16(UInt64 u, UInt64 v)
 {
@@ -262,7 +262,7 @@ struct CRC32Hash
            pos += 8;
        } while (pos + 8 < end);

-        UInt64 word = unalignedLoad<UInt64>(end - 8);    /// Не уверен, что это нормально.
+        UInt64 word = unalignedLoad<UInt64>(end - 8);    /// I'm not sure if this is normal.
        res = _mm_crc32_u64(res, word);

        return res;

--- a/dbms/src/Core/Types.h
+++ b/dbms/src/Core/Types.h
@@ -9,7 +9,7 @@
 namespace DB
 {

-/** Типы данных для представления значений из БД в оперативке.
+/** Data types for representing values from a database in RAM.
  */

 STRONG_TYPEDEF(char, Null);
@@ -74,7 +74,7 @@ template <> struct TypeName<Float32>     { static std::string get() { return "Fl
 template <> struct TypeName<Float64>     { static std::string get() { return "Float64";     } };
 template <> struct TypeName<String>     { static std::string get() { return "String";     } };

-/// Этот тип не поддерживается СУБД, но используется в некоторых внутренних преобразованиях.
+/// This type is not supported by the DBMS, but is used in some internal transformations.
 template <> struct TypeName<long double>{ static std::string get() { return "long double";     } };

 }
--- a/dbms/src/Core/toField.h
+++ b/dbms/src/Core/toField.h
@@ -12,7 +12,7 @@
 namespace DB
 {

-/// Перевести что угодно в Field.
+/// Transform anything to Field.
 template <typename T>
 inline Field toField(const T & x)
 {