diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h
index 6a15d8e390c1fc0f909e3a311cedcd18ab6adabd..2291a08d33d93b5942ed16188dee77f05994fd1f 100644
--- a/dbms/src/Core/Settings.h
+++ b/dbms/src/Core/Settings.h
@@ -345,8 +345,8 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingBool, check_query_single_value_result, true, "Return check query result as single 1/0 value") \
     M(SettingBool, allow_drop_detached, false, "Allow ALTER TABLE ... DROP DETACHED PART[ITION] ... queries") \
     \
-    M(SettingSeconds, replica_error_decrease_period, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD, "Time period reduces replica error counter by 2 times.") \
-    M(SettingUInt64, replica_error_max_count, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, "Max number of errors per replica, prevents piling up increadible amount of errors if replica was offline for some time and allows it to be reconsidered in a shorter amount of time.") \
+    M(SettingSeconds, distributed_replica_error_half_life, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD, "Time period reduces replica error counter by 2 times.") \
+    M(SettingUInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, "Max number of errors per replica, prevents piling up increadible amount of errors if replica was offline for some time and allows it to be reconsidered in a shorter amount of time.") \
     \
     M(SettingBool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.") \
     M(SettingSeconds, live_view_heartbeat_interval, DEFAULT_LIVE_VIEW_HEARTBEAT_INTERVAL_SEC, "The heartbeat interval in seconds to indicate live query is alive.") \
diff --git a/dbms/src/Interpreters/Cluster.cpp b/dbms/src/Interpreters/Cluster.cpp
index b7bda653cfaef0c17d4cd3d223466b4f0ee6852c..12ba5850750738ca14cb04331eeaa26b75305a6c 100644
--- a/dbms/src/Interpreters/Cluster.cpp
+++ b/dbms/src/Interpreters/Cluster.cpp
@@ -348,7 +348,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, const Setting
 
             ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>(
                         all_replicas_pools, settings.load_balancing,
-                        settings.replica_error_decrease_period.totalSeconds(), settings.replica_error_max_count);
+                        settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap);
 
             if (weight)
                 slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
@@ -400,7 +400,7 @@ Cluster::Cluster(const Settings & settings, const std::vector<std::vector<String
 
         ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>(
                 all_replicas, settings.load_balancing,
-                settings.replica_error_decrease_period.totalSeconds(), settings.replica_error_max_count);
+                settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap);
 
         slot_to_shard.insert(std::end(slot_to_shard), default_weight, shards_info.size());
         shards_info.push_back({{}, current_shard_num, default_weight, std::move(shard_local_addresses), std::move(shard_pool),
diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
index e74ed612dcbc384de38fb5b2cd12c52e62b541b1..4d002b1bcf72b9b158d971d84351172c1ef7c763 100644
--- a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
+++ b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp
@@ -191,7 +191,7 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri
 
     const auto settings = storage.global_context.getSettings();
     return pools.size() == 1 ? pools.front() : std::make_shared<ConnectionPoolWithFailover>(pools, LoadBalancing::RANDOM,
-        settings.replica_error_decrease_period.totalSeconds(), settings.replica_error_max_count);
+        settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap);
 }
 
 
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 02d5871894bc4285ede4efeda246a24588db4577..27da2d8deb62dbace09182573c03ab5ed688c041 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -861,4 +861,29 @@ Possible values:
 
 Default value: 0.
 
+## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life}
+
+- Type: seconds
+- Default value: 60 seconds
+
+Controls how fast errors of distributed tables are zeroed. Given that currently a replica was unavailabe for some time and accumulated 5 errors and distributed_replica_error_half_life is set to 1 second, then said replica is considered back to normal in 3 seconds since last error.
+
+** See also **
+
+- [Table engine Distributed](../../operations/table_engines/distributed.md)
+- [`distributed_replica_error_cap`](#settings-distributed_replica_error_cap)
+
+
+## distributed_replica_error_cap {#settings-distributed_replica_error_cap}
+
+- Type: unsigned int
+- Default value: 1000
+
+Error count of each replica is capped at this value, preventing a single replica from accumulating to many errors.
+
+** See also **
+
+- [Table engine Distributed](../../operations/table_engines/distributed.md)
+- [`distributed_replica_error_half_life`](#settings-distributed_replica_error_half_life)
+
 [Original article](https://clickhouse.yandex/docs/en/operations/settings/settings/) <!-- hide -->
diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md
index 0f0917a876c93b2e5c8e10cf9c20267271c6cb6e..70b2364a2109e20c79e8828a5834faee57b1b4e7 100644
--- a/docs/en/operations/system_tables.md
+++ b/docs/en/operations/system_tables.md
@@ -45,18 +45,28 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10
 ## system.clusters
 
 Contains information about clusters available in the config file and the servers in them.
+
 Columns:
 
-```
-cluster String — The cluster name.
-shard_num UInt32 — The shard number in the cluster, starting from 1.
-shard_weight UInt32 — The relative weight of the shard when writing data.
-replica_num UInt32 — The replica number in the shard, starting from 1.
-host_name String — The host name, as specified in the config.
-String host_address — The host IP address obtained from DNS.
-port UInt16 — The port to use for connecting to the server.
-user String — The name of the user for connecting to the server.
-```
+- `cluster` (String) — The cluster name.
+- `shard_num` (UInt32) — The shard number in the cluster, starting from 1.
+- `shard_weight` (UInt32) — The relative weight of the shard when writing data.
+- `replica_num` (UInt32) — The replica number in the shard, starting from 1.
+- `host_name` (String) — The host name, as specified in the config.
+- `host_address` (String) — The host IP address obtained from DNS.
+- `port` (UInt16) — The port to use for connecting to the server.
+- `user` (String) — The name of the user for connecting to the server.
+- `errors_count` (UInt32) - number of times this host failed to reach replica.
+- `estimated_recovery_time` (UInt32) - seconds left until replica error count is zeroed and it is considered to be back to normal.
+
+
+Please note that `errors_count` is updated once per query to the cluster, but `estimated_recovery_time` is recalculated on-demand. So there could be a case of non-zero `errors_count` and zero `estimated_recovery_time`, that next query will zero `errors_count` and try to use replica as if it has no errors.
+
+** See also **
+
+- [Table engine Distributed](../../operations/table_engines/distributed.md)
+- [distributed_replica_error_cap setting](../settings/settings.md#settings-distributed_replica_error_cap)
+- [distributed_replica_error_half_life setting](../settings/settings.md#settings-distributed_replica_error_half_life)
 
 ## system.columns