Make "mark_cache_min_lifetime" setting obsolete

3339b8e0 · Alexey Milovidov · 700e727f · 3339b8e0 · 3339b8e0 · 3339b8e0
8 changed file
--- a/dbms/src/Common/LRUCache.h
+++ b/dbms/src/Common/LRUCache.h
@@ -23,11 +23,10 @@ struct TrivialWeightFunction
 };


-/// Thread-safe cache that evicts entries which are not used for a long time or are expired.
+/// Thread-safe cache that evicts entries which are not used for a long time.
 /// WeightFunction is a functor that takes Mapped as a parameter and returns "weight" (approximate size)
 /// of that value.
-/// Cache starts to evict entries when their total weight exceeds max_size and when expiration time of these
-/// entries is due.
+/// Cache starts to evict entries when their total weight exceeds max_size.
 /// Value weight should not change after insertion.
 template <typename TKey, typename TMapped, typename HashFunction = std::hash<TKey>, typename WeightFunction = TrivialWeightFunction<TMapped>>
 class LRUCache
@@ -36,15 +35,13 @@ public:
    using Key = TKey;
    using Mapped = TMapped;
    using MappedPtr = std::shared_ptr<Mapped>;
-    using Delay = std::chrono::seconds;

 private:
    using Clock = std::chrono::steady_clock;
-    using Timestamp = Clock::time_point;

 public:
-    LRUCache(size_t max_size_, const Delay & expiration_delay_ = Delay::zero())
-        : max_size(std::max(static_cast<size_t>(1), max_size_)), expiration_delay(expiration_delay_) {}
+    LRUCache(size_t max_size_)
+        : max_size(std::max(static_cast<size_t>(1), max_size_)) {}

    MappedPtr get(const Key & key)
    {
@@ -167,16 +164,9 @@ protected:

    struct Cell
    {
-        bool expired(const Timestamp & last_timestamp, const Delay & delay) const
-        {
-            return (delay == Delay::zero()) ||
-                ((last_timestamp > timestamp) && ((last_timestamp - timestamp) > delay));
-        }
-
        MappedPtr value;
        size_t size;
        LRUQueueIterator queue_iterator;
-        Timestamp timestamp;
    };

    using Cells = std::unordered_map<Key, Cell, HashFunction>;
@@ -257,7 +247,6 @@ private:
    /// Total weight of values.
    size_t current_size = 0;
    const size_t max_size;
-    const Delay expiration_delay;

    std::atomic<size_t> hits {0};
    std::atomic<size_t> misses {0};
@@ -273,7 +262,6 @@ private:
        }

        Cell & cell = it->second;
-        updateCellTimestamp(cell);

        /// Move the key to the end of the queue. The iterator remains valid.
        queue.splice(queue.end(), queue, cell.queue_iterator);
@@ -303,18 +291,11 @@ private:
        cell.value = mapped;
        cell.size = cell.value ? weight_function(*cell.value) : 0;
        current_size += cell.size;
-        updateCellTimestamp(cell);

-        removeOverflow(cell.timestamp);
-    }
-
-    void updateCellTimestamp(Cell & cell)
-    {
-        if (expiration_delay != Delay::zero())
-            cell.timestamp = Clock::now();
+        removeOverflow();
    }

-    void removeOverflow(const Timestamp & last_timestamp)
+    void removeOverflow()
    {
        size_t current_weight_lost = 0;
        size_t queue_size = cells.size();
@@ -330,8 +311,6 @@ private:
            }

            const auto & cell = it->second;
-            if (!cell.expired(last_timestamp, expiration_delay))
-                break;

            current_size -= cell.size;
            current_weight_lost += cell.size;

--- a/dbms/src/Core/Settings.h
+++ b/dbms/src/Core/Settings.h
@@ -131,8 +131,6 @@ struct Settings : public SettingsCollection<Settings>
    M(SettingBool, force_index_by_date, 0, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
    M(SettingBool, force_primary_key, 0, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
    \
-    M(SettingUInt64, mark_cache_min_lifetime, 10000, "If the maximum size of mark_cache is exceeded, delete only records older than mark_cache_min_lifetime seconds.", 0) \
-    \
    M(SettingFloat, max_streams_to_max_threads_ratio, 1, "Allows you to use more sources than the number of threads - to more evenly distribute work across threads. It is assumed that this is a temporary solution, since it will be possible in the future to make the number of sources equal to the number of threads, but for each source to dynamically select available work for itself.", 0) \
    M(SettingFloat, max_streams_multiplier_for_merge_tables, 5, "Ask more streams when reading from Merge table. Streams will be spread across tables that Merge table will use. This allows more even distribution of work across threads and especially helpful when merged tables differ in size.", 0) \
    \
@@ -393,6 +391,7 @@ struct Settings : public SettingsCollection<Settings>
    M(SettingBool, allow_experimental_cross_to_join_conversion, true, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \
    M(SettingBool, allow_experimental_data_skipping_indices, true, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \
    M(SettingBool, merge_tree_uniform_read_distribution, true, "Obsolete setting, does nothing. Will be removed after 2020-05-20", 0) \
+    M(SettingUInt64, mark_cache_min_lifetime, 0, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \

    DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS)


--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@@ -1436,7 +1436,7 @@ void Context::setMarkCache(size_t cache_size_in_bytes)
    if (shared->mark_cache)
        throw Exception("Mark cache has been already created.", ErrorCodes::LOGICAL_ERROR);

-    shared->mark_cache = std::make_shared<MarkCache>(cache_size_in_bytes, std::chrono::seconds(settings.mark_cache_min_lifetime));
+    shared->mark_cache = std::make_shared<MarkCache>(cache_size_in_bytes);
 }



--- a/dbms/src/Storages/MarkCache.h
+++ b/dbms/src/Storages/MarkCache.h
@@ -38,8 +38,8 @@ private:
    using Base = LRUCache<UInt128, MarksInCompressedFile, UInt128TrivialHash, MarksWeightFunction>;

 public:
-    MarkCache(size_t max_size_in_bytes, const Delay & expiration_delay_)
-        : Base(max_size_in_bytes, expiration_delay_) {}
+    MarkCache(size_t max_size_in_bytes)
+        : Base(max_size_in_bytes) {}

    /// Calculate key from path to file and offset.
    static UInt128 hash(const String & path_to_file)

--- a/docs/en/operations/server_settings/settings.md
+++ b/docs/en/operations/server_settings/settings.md
@@ -372,9 +372,6 @@ Approximate size (in bytes) of the cache of marks used by table engines of the [

 The cache is shared for the server and memory is allocated as needed. The cache size must be at least 5368709120.

-!!! warning "Warning"
-    This parameter could be exceeded by the [mark_cache_min_lifetime](../settings/settings.md#settings-mark_cache_min_lifetime) setting.
-
 **Example**

 ```xml

--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -218,11 +218,11 @@ Ok.
 Enables or disables template deduction for an SQL expressions in [Values](../../interfaces/formats.md#data-format-values) format. It allows to parse and interpret expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse will try to deduce template of an expression, parse the following rows using this template and evaluate the expression on batch of successfully parsed rows. For the following query:
 ```sql
 INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (upper('Values')), ...
-``` 
+```
 - if `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=0` expressions will be interpreted separately for each row (this is very slow for large number of rows)
 - if `input_format_values_interpret_expressions=0` and `format_values_deduce_templates_of_expressions=1` expressions in the first, second and third rows will be parsed using template `lower(String)` and interpreted together, expression is the forth row will be parsed with another template (`upper(String)`)
 - if `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=1` - the same as in previous case, but also allows fallback to interpreting expressions separately if it's not possible to deduce template.
-  
+
 Enabled by default.

 ## input_format_values_accurate_types_of_literals {#settings-input_format_values_accurate_types_of_literals}
@@ -232,7 +232,7 @@ This setting is used only when `input_format_values_deduce_templates_of_expressi
 (..., abs(3.141592654), ...),   -- Float64 literal
 (..., abs(-1), ...),            -- Int64 literal
 ```
-When this setting is enabled, ClickHouse will check actual type of literal and will use expression template of the corresponding type. In some cases it may significantly slow down expression evaluation in `Values`. 
+When this setting is enabled, ClickHouse will check actual type of literal and will use expression template of the corresponding type. In some cases it may significantly slow down expression evaluation in `Values`.
 When disabled, ClickHouse may use more general type for some literals (e.g. `Float64` or `Int64` instead of `UInt64` for `42`), but it may cause overflow and precision issues.
 Enabled by default.

@@ -477,7 +477,7 @@ Default value: 8.

 ## merge_tree_max_rows_to_use_cache {#setting-merge_tree_max_rows_to_use_cache}

-If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it doesn't use the cache of uncompressed blocks. 
+If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it doesn't use the cache of uncompressed blocks.

 The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.

@@ -591,12 +591,6 @@ We are writing a URL column with the String type (average size of 60 bytes per v

 There usually isn't any reason to change this setting.

-## mark_cache_min_lifetime {#settings-mark_cache_min_lifetime}
-
-If the value of [mark_cache_size](../server_settings/settings.md#server-mark-cache-size) setting is exceeded, delete only records older than mark_cache_min_lifetime seconds. If your hosts have low amount of RAM, it makes sense to lower this parameter.
-
-Default value: 10000 seconds.
-
 ## max_query_size {#settings-max_query_size}

 The maximum part of a query that can be taken to RAM for parsing with the SQL parser.
@@ -960,7 +954,7 @@ Possible values:

 - 1 — skipping enabled.

-    If a shard is unavailable, ClickHouse returns a result based on partial data and doesn't report node availability issues. 
+    If a shard is unavailable, ClickHouse returns a result based on partial data and doesn't report node availability issues.

 - 0 — skipping disabled.

@@ -1067,7 +1061,7 @@ Possible values:
 - Positive integer number, in nanoseconds.

    Recommended values:
-        
+
        - 10000000 (100 times a second) nanoseconds and less for single queries.
        - 1000000000 (once a second) for cluster-wide profiling.

@@ -1090,7 +1084,7 @@ Possible values:
 - Positive integer number of nanoseconds.

    Recommended values:
-        
+
        - 10000000 (100 times a second) nanosecods and more for for single queries.
        - 1000000000 (once a second) for cluster-wide profiling.


--- a/docs/ru/operations/server_settings/settings.md
+++ b/docs/ru/operations/server_settings/settings.md
@@ -370,10 +370,8 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat

 Приблизительный размер (в байтах) кэша засечек, используемых движками таблиц семейства [MergeTree](../../operations/table_engines/mergetree.md).

-Кэш общий для сервера, память выделяется по мере необходимости. Кэш не может быть меньше, чем 5368709120.
+Кэш общий для сервера, память выделяется по мере необходимости.

-!!! warning "Внимание"
-    Этот параметр может быть превышен при большом значении настройки [mark_cache_min_lifetime](../settings/settings.md#settings-mark_cache_min_lifetime).

 **Пример**


--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -566,12 +566,6 @@ ClickHouse использует этот параметр при чтении д

 Как правило, не имеет смысла менять эту настройку.

-## mark_cache_min_lifetime {#settings-mark_cache_min_lifetime}
-
-Если превышено значение параметра [mark_cache_size](../server_settings/settings.md#server-mark-cache-size), то будут удалены только записи старше чем значение этого параметра. Имеет смысл понижать данный параметр при малом количестве RAM на хост-системах.
-
-Default value: 10000 seconds.
-
 ## max_query_size {#settings-max_query_size}

 Максимальный кусок запроса, который будет считан в оперативку для разбора парсером языка SQL.