diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 15b04ec97b20182022468f94a2689e42a46d0a2c..342388f921105bd8ee8509cfd1dea64f46f9d59b 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1461,6 +1461,9 @@ IO Interface Files The IO cost model isn't expected to be accurate in absolute sense and is scaled to the device behavior dynamically. + If needed, tools/cgroup/iocost_coef_gen.py can be used to + generate device-specific coefficients. + io.weight A read-write flat-keyed file which exists on non-root cgroups. The default is "default 100". diff --git a/block/blk-iocost.c b/block/blk-iocost.c index f9ef38b65810a813716f8c8c89fb283e873f948e..ed1d767051d9a1b6f9424b63c404311a8f331af4 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -46,6 +46,9 @@ * If needed, tools/cgroup/iocost_coef_gen.py can be used to generate * device-specific coefficients. * + * If needed, tools/cgroup/iocost_coef_gen.py can be used to generate + * device-specific coefficients. + * * 2. Control Strategy * * The device virtual time (vtime) is used as the primary control metric. diff --git a/tools/cgroup/iocost_coef_gen.py b/tools/cgroup/iocost_coef_gen.py new file mode 100644 index 0000000000000000000000000000000000000000..df17a2ae80e5f900576dc0a55af0c2544db6365b --- /dev/null +++ b/tools/cgroup/iocost_coef_gen.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2019 Tejun Heo +# Copyright (C) 2019 Andy Newell +# Copyright (C) 2019 Facebook + +desc = """ +Generate linear IO cost model coefficients used by the blk-iocost +controller. If the target raw testdev is specified, destructive tests +are performed against the whole device; otherwise, on +./iocost-coef-fio.testfile. The result can be written directly to +/sys/fs/cgroup/io.cost.model. + +On high performance devices, --numjobs > 1 is needed to achieve +saturation. + +See Documentation/admin-guide/cgroup-v2.rst and block/blk-iocost.c +for more details. +""" + +import argparse +import re +import json +import glob +import os +import sys +import atexit +import shutil +import tempfile +import subprocess + +parser = argparse.ArgumentParser(description=desc, + formatter_class=argparse.RawTextHelpFormatter) +parser.add_argument('--testdev', metavar='DEV', + help='Raw block device to use for testing, ignores --testfile-size') +parser.add_argument('--testfile-size-gb', type=float, metavar='GIGABYTES', default=16, + help='Testfile size in gigabytes (default: %(default)s)') +parser.add_argument('--duration', type=int, metavar='SECONDS', default=120, + help='Individual test run duration in seconds (default: %(default)s)') +parser.add_argument('--seqio-block-mb', metavar='MEGABYTES', type=int, default=128, + help='Sequential test block size in megabytes (default: %(default)s)') +parser.add_argument('--seq-depth', type=int, metavar='DEPTH', default=64, + help='Sequential test queue depth (default: %(default)s)') +parser.add_argument('--rand-depth', type=int, metavar='DEPTH', default=64, + help='Random test queue depth (default: %(default)s)') +parser.add_argument('--numjobs', type=int, metavar='JOBS', default=1, + help='Number of parallel fio jobs to run (default: %(default)s)') +parser.add_argument('--quiet', action='store_true') +parser.add_argument('--verbose', action='store_true') + +def info(msg): + if not args.quiet: + print(msg) + +def dbg(msg): + if args.verbose and not args.quiet: + print(msg) + +# determine ('DEVNAME', 'MAJ:MIN') for @path +def dir_to_dev(path): + # find the block device the current directory is on + devname = subprocess.run(f'findmnt -nvo SOURCE -T{path}', + stdout=subprocess.PIPE, shell=True).stdout + devname = os.path.basename(devname).decode('utf-8').strip() + + # partition -> whole device + parents = glob.glob('/sys/block/*/' + devname) + if len(parents): + devname = os.path.basename(os.path.dirname(parents[0])) + rdev = os.stat(f'/dev/{devname}').st_rdev + return (devname, f'{os.major(rdev)}:{os.minor(rdev)}') + +def create_testfile(path, size): + global args + + if os.path.isfile(path) and os.stat(path).st_size == size: + return + + info(f'Creating testfile {path}') + subprocess.check_call(f'rm -f {path}', shell=True) + subprocess.check_call(f'touch {path}', shell=True) + subprocess.call(f'chattr +C {path}', shell=True) + subprocess.check_call( + f'pv -s {size} -pr /dev/urandom {"-q" if args.quiet else ""} | ' + f'dd of={path} count={size} ' + f'iflag=count_bytes,fullblock oflag=direct bs=16M status=none', + shell=True) + +def run_fio(testfile, duration, iotype, iodepth, blocksize, jobs): + global args + + eta = 'never' if args.quiet else 'always' + outfile = tempfile.NamedTemporaryFile() + cmd = (f'fio --direct=1 --ioengine=libaio --name=coef ' + f'--filename={testfile} --runtime={round(duration)} ' + f'--readwrite={iotype} --iodepth={iodepth} --blocksize={blocksize} ' + f'--eta={eta} --output-format json --output={outfile.name} ' + f'--time_based --numjobs={jobs}') + if args.verbose: + dbg(f'Running {cmd}') + subprocess.check_call(cmd, shell=True) + with open(outfile.name, 'r') as f: + d = json.loads(f.read()) + return sum(j['read']['bw_bytes'] + j['write']['bw_bytes'] for j in d['jobs']) + +def restore_elevator_nomerges(): + global elevator_path, nomerges_path, elevator, nomerges + + info(f'Restoring elevator to {elevator} and nomerges to {nomerges}') + with open(elevator_path, 'w') as f: + f.write(elevator) + with open(nomerges_path, 'w') as f: + f.write(nomerges) + + +args = parser.parse_args() + +missing = False +for cmd in [ 'findmnt', 'pv', 'dd', 'fio' ]: + if not shutil.which(cmd): + print(f'Required command "{cmd}" is missing', file=sys.stderr) + missing = True +if missing: + sys.exit(1) + +if args.testdev: + devname = os.path.basename(args.testdev) + rdev = os.stat(f'/dev/{devname}').st_rdev + devno = f'{os.major(rdev)}:{os.minor(rdev)}' + testfile = f'/dev/{devname}' + info(f'Test target: {devname}({devno})') +else: + devname, devno = dir_to_dev('.') + testfile = 'iocost-coef-fio.testfile' + testfile_size = int(args.testfile_size_gb * 2 ** 30) + create_testfile(testfile, testfile_size) + info(f'Test target: {testfile} on {devname}({devno})') + +elevator_path = f'/sys/block/{devname}/queue/scheduler' +nomerges_path = f'/sys/block/{devname}/queue/nomerges' + +with open(elevator_path, 'r') as f: + elevator = re.sub(r'.*\[(.*)\].*', r'\1', f.read().strip()) +with open(nomerges_path, 'r') as f: + nomerges = f.read().strip() + +info(f'Temporarily disabling elevator and merges') +atexit.register(restore_elevator_nomerges) +with open(elevator_path, 'w') as f: + f.write('none') +with open(nomerges_path, 'w') as f: + f.write('1') + +info('Determining rbps...') +rbps = run_fio(testfile, args.duration, 'read', + 1, args.seqio_block_mb * (2 ** 20), args.numjobs) +info(f'\nrbps={rbps}, determining rseqiops...') +rseqiops = round(run_fio(testfile, args.duration, 'read', + args.seq_depth, 4096, args.numjobs) / 4096) +info(f'\nrseqiops={rseqiops}, determining rrandiops...') +rrandiops = round(run_fio(testfile, args.duration, 'randread', + args.rand_depth, 4096, args.numjobs) / 4096) +info(f'\nrrandiops={rrandiops}, determining wbps...') +wbps = run_fio(testfile, args.duration, 'write', + 1, args.seqio_block_mb * (2 ** 20), args.numjobs) +info(f'\nwbps={wbps}, determining wseqiops...') +wseqiops = round(run_fio(testfile, args.duration, 'write', + args.seq_depth, 4096, args.numjobs) / 4096) +info(f'\nwseqiops={wseqiops}, determining wrandiops...') +wrandiops = round(run_fio(testfile, args.duration, 'randwrite', + args.rand_depth, 4096, args.numjobs) / 4096) +info(f'\nwrandiops={wrandiops}') +restore_elevator_nomerges() +atexit.unregister(restore_elevator_nomerges) +info('') + +print(f'{devno} rbps={rbps} rseqiops={rseqiops} rrandiops={rrandiops} ' + f'wbps={wbps} wseqiops={wseqiops} wrandiops={wrandiops}')