[apps..default] run = true count = 1 [apps.meta] type = meta name = meta arguments = ports = 34601 pools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_META_STATE,THREAD_POOL_FD,THREAD_POOL_DLOCK,THREAD_POOL_FDS_SERVICE run = true count = 1 [apps.replica] type = replica name = replica arguments = ports = 34801 pools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_FDS_SERVICE,THREAD_POOL_COMPACT run = true count = 1 [apps.collector] name = collector type = collector arguments = ports = 34101 pools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION run = true count = 1 [apps.mimic] name = mimic type = dsn.app.mimic arguments = pools = THREAD_POOL_DEFAULT run = true count = 1 delay_seconds = 30 [core] data_dir = %{app.dir} tool = nativerun ;toollets = tracer, profiler, fault_injector toollets = profiler pause_on_start = false enable_default_app_mimic = true tls_trans_memory_KB = 1024 tcmalloc_release_rate = 1.0 logging_start_level = LOG_LEVEL_DEBUG logging_factory_name = dsn::tools::simple_logger logging_flush_on_exit = true [tools.simple_logger] short_header = false fast_flush = false max_number_of_log_files_on_disk = 500 stderr_start_level = LOG_LEVEL_ERROR [nfs] nfs_copy_block_bytes = 4194304 max_concurrent_remote_copy_requests = 50 max_concurrent_local_writes = 5 max_buffered_local_writes = 500 high_priority_speed_rate = 2 file_close_expire_time_ms = 60000 file_close_timer_interval_ms_on_server = 30000 max_file_copy_request_count_per_file = 10 [network] primary_interface = ; how many network threads for network library(used by asio) io_service_worker_count = 4 ; how many connections can be established from one ip address to a server(both replica and meta), 0 means no threshold conn_threshold_per_ip = 0 ; specification for each thread pool [threadpool..default] worker_count = 4 [threadpool.THREAD_POOL_DEFAULT] name = default partitioned = false worker_priority = THREAD_xPRIORITY_NORMAL # The worker count in THREAD_POOL_DEFAULT must be >= 5. # Because in info collector server, there are four timer tasks(LPC_PEGASUS_APP_STAT_TIMER, LPC_PEGASUS_STORAGE_SIZE_STAT_TIMER, # LPC_DETECT_AVAILABLE and LPC_PEGASUS_CAPACITY_UNIT_STAT_TIMER). Each of these timer tasks occupies a thread in THREAD_POOL_DEFAULT. # Each of these timer tasks calls remote procedure to meta server(which produce a callback), and waits for the rpc's callback to execute. # If the worker_count <= 4, all of these threads are occupied by these timer tasks. so their rpc's callbacks can't get a thread to run. # it comes to be a deadlock(timer task wait for rpc's callback to execute, and rpc's callback wait for the timer task to release the thread). worker_count = 8 [threadpool.THREAD_POOL_REPLICATION] name = replica partitioned = true worker_priority = THREAD_xPRIORITY_NORMAL worker_count = 24 [threadpool.THREAD_POOL_META_STATE] name = meta_state partitioned = true worker_priority = THREAD_xPRIORITY_NORMAL worker_count = 1 [threadpool.THREAD_POOL_DLOCK] name = dist_lock partitioned = true worker_priority = THREAD_xPRIORITY_NORMAL worker_count = 1 [threadpool.THREAD_POOL_FD] name = fd partitioned = false worker_priority = THREAD_xPRIORITY_NORMAL worker_count = 2 [threadpool.THREAD_POOL_LOCAL_APP] name = local_app partitioned = false worker_priority = THREAD_xPRIORITY_NORMAL worker_count = 24 [threadpool.THREAD_POOL_REPLICATION_LONG] name = rep_long partitioned = false worker_priority = THREAD_xPRIORITY_NORMAL worker_count = 8 [threadpool.THREAD_POOL_FDS_SERVICE] name = fds_service worker_count = 8 worker_priority = THREAD_xPRIORITY_NORMAL worker_count = 8 [threadpool.THREAD_POOL_COMPACT] name = compact partitioned = false worker_priority = THREAD_xPRIORITY_NORMAL worker_count = 8 [meta_server] server_list = %{meta.server.list} cluster_root = /pegasus/%{cluster.name} distributed_lock_service_type = distributed_lock_service_zookeeper distributed_lock_service_parameters = /pegasus/%{cluster.name}/lock meta_state_service_type = meta_state_service_zookeeper meta_state_service_parameters = node_live_percentage_threshold_for_update = 50 min_live_node_count_for_unfreeze = 3 meta_function_level_on_start = steady recover_from_replica_server = false hold_seconds_for_dropped_app = 604800 add_secondary_enable_flow_control = true add_secondary_max_count_for_one_node = 20 stable_rs_min_running_seconds = 600 max_succssive_unstable_restart = 5 server_load_balancer_type = greedy_load_balancer replica_assign_delay_ms_for_dropouts = 600000 max_replicas_in_group = 3 balancer_in_turn = false only_primary_balancer = false only_move_primary = false cold_backup_disabled = false enable_white_list = false replica_white_list = [replication] slog_dir = %{slog.dir} data_dirs = %{data.dirs} data_dirs_black_list_file = %{home.dir}/.pegasus_data_dirs_black_list cluster_name = %{cluster.name} deny_client_on_start = false verbose_client_log_on_start = false verbose_commit_log_on_start = false delay_for_fd_timeout_on_start = false empty_write_disabled = false allow_non_idempotent_write = false prepare_timeout_ms_for_secondaries = 3000 prepare_timeout_ms_for_potential_secondaries = 5000 prepare_decree_gap_for_debug_logging = 10000 batch_write_disabled = false staleness_for_commit = 20 max_mutation_count_in_prepare_list = 110 mutation_2pc_min_replica_count = 2 group_check_disabled = false group_check_interval_ms = 100000 checkpoint_disabled = false checkpoint_interval_seconds = 300 checkpoint_min_decree_gap = 10000 checkpoint_max_interval_hours = 2 gc_disabled = false gc_interval_ms = 30000 gc_memory_replica_interval_ms = 600000 gc_disk_error_replica_interval_seconds = 86400 gc_disk_garbage_replica_interval_seconds = 86400 disk_stat_disabled = false disk_stat_interval_seconds = 600 fd_disabled = false fd_check_interval_seconds = 2 fd_beacon_interval_seconds = 3 fd_lease_seconds = 20 fd_grace_seconds = 22 log_private_file_size_mb = 32 log_private_batch_buffer_kb = 7 log_private_batch_buffer_count = 512 log_private_batch_buffer_flush_interval_ms = 30000 log_private_reserve_max_size_mb = 1000 log_private_reserve_max_time_seconds = 36000 log_shared_file_size_mb = 128 log_shared_file_count_limit = 100 log_shared_batch_buffer_kb = 0 log_shared_force_flush = false log_shared_pending_size_throttling_threshold_kb = 0 log_shared_pending_size_throttling_delay_ms = 0 config_sync_disabled = false config_sync_interval_ms = 30000 ;; WARNING: memory release may incur major performance downgrade when inproperly configured. ;; ensure this feature is only enabled when it's necessary. mem_release_enabled = false mem_release_check_interval_ms = 3600000 mem_release_max_reserved_mem_percentage = 10 lb_interval_ms = 10000 learn_app_max_concurrent_count = 5 ;; the prefix of the path that to save backup-data on cold backup media ;; recommand using cluster name as the root cold_backup_root = %{cluster.name} max_concurrent_uploading_file_count = 10 [pegasus.server] rocksdb_verbose_log = false # get: {100ms,1MB} ; multiGet: {100ms,10MB,1000} rocksdb_slow_query_threshold_ns = 100000000 rocksdb_abnormal_get_size_threshold = 1000000 rocksdb_abnormal_multi_get_size_threshold = 10000000 rocksdb_abnormal_multi_get_iterate_count_threshold = 1000 rocksdb_write_buffer_size = 67108864 rocksdb_max_write_buffer_number = 3 rocksdb_max_background_flushes = 4 rocksdb_max_background_compactions = 12 rocksdb_num_levels = 6 rocksdb_target_file_size_base = 67108864 rocksdb_target_file_size_multiplier = 1 rocksdb_max_bytes_for_level_base = 671088640 rocksdb_max_bytes_for_level_multiplier = 10 rocksdb_level0_file_num_compaction_trigger = 4 rocksdb_level0_slowdown_writes_trigger = 30 rocksdb_level0_stop_writes_trigger = 60 rocksdb_compression_type = lz4 rocksdb_disable_table_block_cache = false rocksdb_block_cache_capacity = 10737418240 rocksdb_block_cache_num_shard_bits = -1 rocksdb_disable_bloom_filter = false # Bloom filter type, should be either 'common' or 'prefix' rocksdb_filter_type = prefix checkpoint_reserve_min_count = 2 checkpoint_reserve_time_seconds = 1800 update_rdb_stat_interval = 600 manual_compact_min_interval_seconds = 600 perf_counter_update_interval_seconds = 10 perf_counter_enable_logging = false # Where the metrics are collected. If no value is given, no sink is used. # Options: # - falcon # - prometheus perf_counter_sink = perf_counter_read_capacity_unit_size = 4096 perf_counter_write_capacity_unit_size = 4096 falcon_host = 127.0.0.1 falcon_port = 1988 falcon_path = /v1/push # The HTTP port exposed to Prometheus for pulling metrics from pegasus server. prometheus_port = 9091 [pegasus.collector] available_detect_app = temp available_detect_alert_script_dir = ./package/bin available_detect_alert_email_address = available_detect_interval_seconds = 3 available_detect_alert_fail_count = 30 available_detect_timeout = 5000 app_stat_interval_seconds = 10 usage_stat_app = stat capacity_unit_fetch_interval_seconds = 8 storage_size_fetch_interval_seconds = 3600 [pegasus.clusters] %{cluster.name} = %{meta.server.list} [components.pegasus_perf_counter_number_percentile_atomic] counter_computation_interval_seconds = 10 [zookeeper] hosts_list = %{zk.server.list} timeout_ms = 10000 logfile = zoo.log [task..default] is_trace = false is_profile = false allow_inline = false rpc_call_channel = RPC_CHANNEL_TCP rpc_call_header_format = NET_HDR_DSN rpc_message_crc_required = false rpc_call_header_format_name = dsn rpc_timeout_milliseconds = 5000 [task.LPC_REPLICATION_INIT_LOAD] ;is_profile = true [task.RPC_REPLICATION_WRITE_EMPTY] ;is_profile = true [task.RPC_REPLICATION_WRITE_EMPTY_ACK] ;is_profile = true [task.LPC_PER_REPLICA_CHECKPOINT_TIMER] ;is_profile = true [task.LPC_PER_REPLICA_COLLECT_INFO_TIMER] ;is_profile = true [task.LPC_GROUP_CHECK] ;is_profile = true [task.LPC_CM_DISCONNECTED_SCATTER] ;is_profile = true [task.LPC_QUERY_NODE_CONFIGURATION_SCATTER] ;is_profile = true [task.LPC_QUERY_NODE_CONFIGURATION_SCATTER2] ;is_profile = true [task.LPC_DELAY_UPDATE_CONFIG] ;is_profile = true [task.LPC_DELAY_LEARN] ;is_profile = true [task.LPC_LEARN_REMOTE_DELTA_FILES_COMPLETED] ;is_profile = true [task.LPC_CHECKPOINT_REPLICA_COMPLETED] ;is_profile = true [task.LPC_SIM_UPDATE_PARTITION_CONFIGURATION_REPLY] ;is_profile = true [task.LPC_WRITE_REPLICATION_LOG] ;is_profile = true [task.LPC_REPLICATION_ERROR] ;is_profile = true [task.LPC_LERARN_REMOTE_DISK_STATE] ;is_profile = true [task.RPC_CONFIG_PROPOSAL] ;is_profile = true [task.RPC_CONFIG_PROPOSAL_ACK] ;is_profile = true [task.RPC_QUERY_PN_DECREE] ;is_profile = true [task.RPC_QUERY_PN_DECREE_ACK] ;is_profile = true [task.RPC_QUERY_REPLICA_INFO] ;is_profile = true [task.RPC_QUERY_REPLICA_INFO_ACK] ;is_profile = true [task.RPC_PREPARE] is_profile = true [task.RPC_PREPARE_ACK] is_profile = true [task.LPC_DELAY_PREPARE] ;is_profile = true [task.RPC_GROUP_CHECK] ;is_profile = true [task.RPC_GROUP_CHECK_ACK] ;is_profile = true [task.RPC_QUERY_APP_INFO] ;is_profile = true [task.RPC_QUERY_APP_INFO_ACK] ;is_profile = true [task.RPC_LEARN] ;is_profile = true [task.RPC_LEARN_ACK] ;is_profile = true [task.RPC_LEARN_COMPLETION_NOTIFY] ;is_profile = true [task.RPC_LEARN_COMPLETION_NOTIFY_ACK] ;is_profile = true [task.RPC_LEARN_ADD_LEARNER] ;is_profile = true [task.RPC_LEARN_ADD_LEARNER_ACK] ;is_profile = true [task.RPC_REMOVE_REPLICA] ;is_profile = true [task.RPC_REMOVE_REPLICA_ACK] ;is_profile = true [task.RPC_REPLICA_COPY_LAST_CHECKPOINT] ;is_profile = true [task.RPC_REPLICA_COPY_LAST_CHECKPOINT_ACK] ;is_profile = true [task.LPC_REPLICA_COPY_LAST_CHECKPOINT_DONE] ;is_profile = true [task.RPC_COLD_BACKUP] ;is_profile = true [task.RPC_COLD_BACKUP_ACK] ;is_profile = true [task.LPC_REPLICATION_COLD_BACKUP] ;is_profile = true [task.RPC_RRDB_RRDB_PUT] rpc_request_throttling_mode = TM_DELAY rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100 is_profile = true profiler::size.request.server = true [task.RPC_RRDB_RRDB_PUT_ACK] is_profile = true [task.RPC_RRDB_RRDB_MULTI_PUT] rpc_request_throttling_mode = TM_DELAY rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100 is_profile = true profiler::size.request.server = true [task.RPC_RRDB_RRDB_MULTI_PUT_ACK] is_profile = true [task.RPC_RRDB_RRDB_REMOVE] rpc_request_throttling_mode = TM_DELAY rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100 is_profile = true [task.RPC_RRDB_RRDB_REMOVE_ACK] is_profile = true [task.RPC_RRDB_RRDB_MULTI_REMOVE] rpc_request_throttling_mode = TM_DELAY rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100 is_profile = true [task.RPC_RRDB_RRDB_MULTI_REMOVE_ACK] is_profile = true [task.RPC_RRDB_RRDB_INCR] rpc_request_throttling_mode = TM_DELAY rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100 is_profile = true [task.RPC_RRDB_RRDB_INCR_ACK] is_profile = true [task.RPC_RRDB_RRDB_CHECK_AND_SET] rpc_request_throttling_mode = TM_DELAY rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100 is_profile = true [task.RPC_RRDB_RRDB_CHECK_AND_SET_ACK] is_profile = true [task.RPC_RRDB_RRDB_CHECK_AND_MUTATE] rpc_request_throttling_mode = TM_DELAY rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100 is_profile = true [task.RPC_RRDB_RRDB_CHECK_AND_MUTATE_ACK] is_profile = true [task.RPC_RRDB_RRDB_GET] rpc_request_throttling_mode = TM_DELAY rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100 is_profile = true profiler::size.response.server = true [task.RPC_RRDB_RRDB_GET_ACK] is_profile = true [task.RPC_RRDB_RRDB_MULTI_GET] rpc_request_throttling_mode = TM_DELAY rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100 is_profile = true profiler::size.response.server = true [task.RPC_RRDB_RRDB_MULTI_GET_ACK] is_profile = true [task.RPC_RRDB_RRDB_SORTKEY_COUNT] rpc_request_throttling_mode = TM_DELAY rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100 is_profile = true [task.RPC_RRDB_RRDB_SORTKEY_COUNT_ACK] is_profile = true [task.RPC_RRDB_RRDB_TTL] rpc_request_throttling_mode = TM_DELAY rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100 is_profile = true [task.RPC_RRDB_RRDB_TTL_ACK] is_profile = true [task.RPC_RRDB_RRDB_GET_SCANNER] rpc_request_throttling_mode = TM_DELAY rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100 is_profile = true [task.RPC_RRDB_RRDB_GET_SCANNER_ACK] is_profile = true [task.RPC_RRDB_RRDB_SCAN] rpc_request_throttling_mode = TM_DELAY rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100 is_profile = true [task.RPC_RRDB_RRDB_SCAN_ACK] is_profile = true [task.RPC_RRDB_RRDB_CLEAR_SCANNER] rpc_request_throttling_mode = TM_DELAY rpc_request_delays_milliseconds = 50, 50, 50, 50, 50, 100 is_profile = true [task.RPC_RRDB_RRDB_CLEAR_SCANNER_ACK] is_profile = true [task.RPC_FD_FAILURE_DETECTOR_PING] rpc_call_header_format = NET_HDR_DSN rpc_call_channel = RPC_CHANNEL_UDP rpc_message_crc_required = true ;is_profile = true [task.RPC_FD_FAILURE_DETECTOR_PING_ACK] rpc_call_header_format = NET_HDR_DSN rpc_call_channel = RPC_CHANNEL_UDP rpc_message_crc_required = true ;is_profile = true