bookkeeper.conf 13.3 KB
Newer Older
M
Matteo Merli 已提交
1 2
#
# Copyright 2016 Yahoo Inc.
3
#
M
Matteo Merli 已提交
4 5 6
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
7
#
M
Matteo Merli 已提交
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
M
Matteo Merli 已提交
10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
15
#
M
Matteo Merli 已提交
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45

## Bookie settings

# Port that bookie server listen on
bookiePort=3181

# Set the network interface that the bookie should listen on.
# If not set, the bookie will listen on all interfaces.
#listeningInterface=eth0

# Whether the bookie allowed to use a loopback interface as its primary
# interface(i.e. the interface it uses to establish its identity)?
# By default, loopback interfaces are not allowed as the primary
# interface.
# Using a loopback interface as the primary interface usually indicates
# a configuration error. For example, its fairly common in some VPS setups
# to not configure a hostname, or to have the hostname resolve to
# 127.0.0.1. If this is the case, then all bookies in the cluster will
# establish their identities as 127.0.0.1:3181, and only one will be able
# to join the cluster. For VPSs configured like this, you should explicitly
# set the listening interface.
#allowLoopback=false

# Directory Bookkeeper outputs its write ahead log
journalDirectory=data/bookkeeper/journal

# Directory Bookkeeper outputs ledger snapshots
# could define multi directories to store snapshots, separated by ','
# For example:
# ledgerDirectories=/tmp/bk1-data,/tmp/bk2-data
46
#
M
Matteo Merli 已提交
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
# Ideally ledger dirs and journal dir are each in a differet device,
# which reduce the contention between random i/o and sequential write.
# It is possible to run with a single disk, but performance will be significantly lower.
ledgerDirectories=data/bookkeeper/ledgers
# Directories to store index files. If not specified, will use ledgerDirectories to store.
# indexDirectories=data/bookkeeper/ledgers

# Ledger Manager Class
# What kind of ledger manager is used to manage how ledgers are stored, managed
# and garbage collected. Try to read 'BookKeeper Internals' for detail info.
ledgerManagerType=hierarchical

# Root zookeeper path to store ledger metadata
# This parameter is used by zookeeper-based ledger manager as a root znode to
# store all ledgers.
# zkLedgersRootPath=/ledgers

# Ledger storage implementation class
ledgerStorageClass=org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage

# Enable/Disable entry logger preallocation
# entryLogFilePreallocationEnabled=true

# Max file size of entry logger, in bytes
# A new entry log file will be created when the old one reaches the file size limitation
# logSizeLimit=2147483648

# Threshold of minor compaction
# For those entry log files whose remaining size percentage reaches below
# this threshold will be compacted in a minor compaction.
# If it is set to less than zero, the minor compaction is disabled.
# minorCompactionThreshold=0.2

# Interval to run minor compaction, in seconds
81
# If it is set to less than zero, the minor compaction is disabled.
M
Matteo Merli 已提交
82 83 84 85 86 87 88 89 90 91 92
# minorCompactionInterval=3600

# Threshold of major compaction
# For those entry log files whose remaining size percentage reaches below
# this threshold will be compacted in a major compaction.
# Those entry log files whose remaining size percentage is still
# higher than the threshold will never be compacted.
# If it is set to less than zero, the minor compaction is disabled.
majorCompactionThreshold=0.5

# Interval to run major compaction, in seconds
93 94
# If it is set to less than zero, the major compaction is disabled.
# majorCompactionInterval=86400
M
Matteo Merli 已提交
95 96 97 98 99 100 101 102 103 104 105 106 107 108

# Set the maximum number of entries which can be compacted without flushing.
# When compacting, the entries are written to the entrylog and the new offsets
# are cached in memory. Once the entrylog is flushed the index is updated with
# the new offsets. This parameter controls the number of entries added to the
# entrylog before a flush is forced. A higher value for this parameter means
# more memory will be used for offsets. Each offset consists of 3 longs.
# This parameter should _not_ be modified unless you know what you're doing.
# The default is 100,000.
#compactionMaxOutstandingRequests=100000

# Set the rate at which compaction will readd entries. The unit is adds per second.
#compactionRate=1000

109
# Throttle compaction by bytes or by entries.
M
Matteo Merli 已提交
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
#isThrottleByBytes=false

# Set the rate at which compaction will readd entries. The unit is adds per second.
#compactionRateByEntries=1000

# Set the rate at which compaction will readd entries. The unit is bytes added per second.
#compactionRateByBytes=1000000

# Max file size of journal file, in mega bytes
# A new journal file will be created when the old one reaches the file size limitation
#
# journalMaxSizeMB=2048

# Max number of old journal file to kept
# Keep a number of old journal files would help data recovery in specia case
#
# journalMaxBackups=5

# How much space should we pre-allocate at a time in the journal
# journalPreAllocSizeMB=16

# Size of the write buffers used for the journal
# journalWriteBufferSizeKB=64

# Should we remove pages from page cache after force write
journalRemoveFromPageCache=true

# Should we group journal force writes, which optimize group commit
# for higher throughput
# journalAdaptiveGroupWrites=true

# Maximum latency to impose on a journal write to achieve grouping
journalMaxGroupWaitMSec=1

# All the journal writes and commits should be aligned to given size
journalAlignmentSize=4096

# Maximum writes to buffer to achieve grouping
# journalBufferedWritesThreshold=524288

# If we should flush the journal when journal queue is empty
# journalFlushWhenQueueEmpty=false

# The number of threads that should handle journal callbacks
numJournalCallbackThreads=8

# The number of max entries to keep in fragment for re-replication
rereplicationEntryBatchSize=5000

# How long the interval to trigger next garbage collection, in milliseconds
# Since garbage collection is running in background, too frequent gc
# will heart performance. It is better to give a higher number of gc
# interval if there is enough disk capacity.
gcWaitTime=900000

# How long the interval to trigger next garbage collection of overreplicated
# ledgers, in milliseconds [Default: 1 day]. This should not be run very frequently since we read
# the metadata for all the ledgers on the bookie from zk
# gcOverreplicatedLedgerWaitTime=86400000

# How long the interval to flush ledger index pages to disk, in milliseconds
# Flushing index files will introduce much random disk I/O.
# If separating journal dir and ledger dirs each on different devices,
# flushing would not affect performance. But if putting journal dir
# and ledger dirs on same device, performance degrade significantly
# on too frequent flushing. You can consider increment flush interval
# to get better performance, but you need to pay more time on bookie
# server restart after failure.
#
flushInterval=60000

# Interval to watch whether bookie is dead or not, in milliseconds
#
# bookieDeathWatchInterval=1000

## zookeeper client settings

# A list of one of more servers on which zookeeper is running.
# The server list can be comma separated values, for example:
# zkServers=zk1:2181,zk2:2181,zk3:2181
zkServers=localhost:2181
# ZooKeeper client session timeout in milliseconds
# Bookie server will exit if it received SESSION_EXPIRED because it
# was partitioned off from ZooKeeper for more than the session timeout
# JVM garbage collection, disk I/O will cause SESSION_EXPIRED.
# Increment this value could help avoiding this issue
zkTimeout=30000

## NIO Server settings

# This settings is used to enabled/disabled Nagle's algorithm, which is a means of
# improving the efficiency of TCP/IP networks by reducing the number of packets
# that need to be sent over the network.
# If you are sending many small messages, such that more than one can fit in
# a single IP packet, setting server.tcpnodelay to false to enable Nagle algorithm
# can provide better performance.
# Default value is true.
#
# serverTcpNoDelay=true

## ledger cache settings

# Max number of ledger index files could be opened in bookie server
# If number of ledger index files reaches this limitation, bookie
# server started to swap some ledgers from memory to disk.
# Too frequent swap will affect performance. You can tune this number
# to gain performance according your requirements.
openFileLimit=0

# Size of a index page in ledger cache, in bytes
# A larger index page can improve performance writing page to disk,
# which is efficent when you have small number of ledgers and these
# ledgers have similar number of entries.
# If you have large number of ledgers and each ledger has fewer entries,
# smaller index page would improve memory usage.
# pageSize=8192

# How many index pages provided in ledger cache
# If number of index pages reaches this limitation, bookie server
# starts to swap some ledgers from memory to disk. You can increment
# this value when you found swap became more frequent. But make sure
# pageLimit*pageSize should not more than JVM max memory limitation,
# otherwise you would got OutOfMemoryException.
# In general, incrementing pageLimit, using smaller index page would
# gain bettern performance in lager number of ledgers with fewer entries case
# If pageLimit is -1, bookie server will use 1/3 of JVM memory to compute
# the limitation of number of index pages.
pageLimit=0

#If all ledger directories configured are full, then support only read requests for clients.
#If "readOnlyModeEnabled=true" then on all ledger disks full, bookie will be converted
#to read-only mode and serve only read requests. Otherwise the bookie will be shutdown.
#By default this will be disabled.
readOnlyModeEnabled=true

#For each ledger dir, maximum disk space which can be used.
#Default is 0.95f. i.e. 95% of disk can be used at most after which nothing will
#be written to that partition. If all ledger dir partions are full, then bookie
#will turn to readonly mode if 'readOnlyModeEnabled=true' is set, else it will
#shutdown.
250
#Valid values should be in between 0 and 1 (exclusive).
M
Matteo Merli 已提交
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
#diskUsageThreshold=0.95

#Disk check interval in milli seconds, interval to check the ledger dirs usage.
#Default is 10000
#diskCheckInterval=10000

# Interval at which the auditor will do a check of all ledgers in the cluster.
# By default this runs once a week. The interval is set in seconds.
# To disable the periodic check completely, set this to 0.
# Note that periodic checking will put extra load on the cluster, so it should
# not be run more frequently than once a day.
#auditorPeriodicCheckInterval=604800

# The interval between auditor bookie checks.
# The auditor bookie check, checks ledger metadata to see which bookies should
# contain entries for each ledger. If a bookie which should contain entries is
# unavailable, then the ledger containing that entry is marked for recovery.
# Setting this to 0 disabled the periodic check. Bookie checks will still
# run when a bookie fails.
# The interval is specified in seconds.
#auditorPeriodicBookieCheckInterval=86400

# number of threads that should handle write requests. if zero, the writes would
# be handled by netty threads directly.
numAddWorkerThreads=0

# number of threads that should handle read requests. if zero, the reads would
# be handled by netty threads directly.
numReadWorkerThreads=8

# If read workers threads are enabled, limit the number of pending requests, to
# avoid the executor queue to grow indefinitely
maxPendingReadRequestsPerThread=2500

# The number of bytes we should use as capacity for BufferedReadChannel. Default is 512 bytes.
readBufferSizeBytes=4096

# The number of bytes used as capacity for the write buffer. Default is 64KB.
# writeBufferSizeBytes=65536

# Whether the bookie should use its hostname to register with the
# co-ordination service(eg: zookeeper service).
# When false, bookie will use its ipaddress for the registration.
# Defaults to false.
#useHostNameAsBookieID=false

# Stats Provider Class
#statsProviderClass=org.apache.bookkeeper.stats.CodahaleMetricsProvider
299
#codahaleStatsJmxEndpoint=metrics
M
Matteo Merli 已提交
300

301 302 303 304 305 306 307 308 309 310

## DB Ledger storage configuration

# Size of Write Cache. Memory is allocated from JVM direct memory.
# Write cache is used to buffer entries before flushing into the entry log
# For good performance, it should be big enough to hold a sub
dbStorage_writeCacheMaxSizeMb=512

# Size of Read cache. Memory is allocated from JVM direct memory.
# This read cache is pre-filled doing read-ahead whenever a cache miss happens
M
Matteo Merli 已提交
311 312
dbStorage_readAheadCacheMaxSizeMb=256

313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
# How many entries to pre-fill in cache after a read cache miss
dbStorage_readAheadCacheBatchSize=1000

## RocksDB specific configurations
## DbLedgerStorage uses RocksDB to store the indexes from
## (ledgerId, entryId) -> (entryLog, offset)

# Size of RocksDB block-cache. For best performance, this cache
# should be big enough to hold a significant portion of the index
# database which can reach ~2GB in some cases
# dbStorage_rocksDB_blockCacheSize=268435456 # 256 MBytes

# dbStorage_rocksDB_writeBufferSizeMB=64
# dbStorage_rocksDB_sstSizeInMB=64
# dbStorage_rocksDB_blockSize=65536
# dbStorage_rocksDB_bloomFilterBitsPerKey=10
# dbStorage_rocksDB_numLevels=-1
# dbStorage_rocksDB_numFilesInLevel0=4
# dbStorage_rocksDB_maxSizeInLevel1MB=256