xfs_filestream.c 8.7 KB
Newer Older
D
Dave Chinner 已提交
1
// SPDX-License-Identifier: GPL-2.0
2 3
/*
 * Copyright (c) 2006-2007 Silicon Graphics, Inc.
4
 * Copyright (c) 2014 Christoph Hellwig.
5 6 7
 * All Rights Reserved.
 */
#include "xfs.h"
8
#include "xfs_shared.h"
9
#include "xfs_format.h"
10 11 12
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
13 14 15 16
#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "xfs_alloc.h"
#include "xfs_mru_cache.h"
C
Christoph Hellwig 已提交
17
#include "xfs_trace.h"
18
#include "xfs_ag.h"
19
#include "xfs_ag_resv.h"
20
#include "xfs_trans.h"
D
Darrick J. Wong 已提交
21
#include "xfs_filestream.h"
22

23 24 25 26 27 28 29 30 31
struct xfs_fstrm_item {
	struct xfs_mru_cache_elem	mru;
	xfs_agnumber_t			ag; /* AG in use for this directory */
};

enum xfs_fstrm_alloc {
	XFS_PICK_USERDATA = 1,
	XFS_PICK_LOWSPACE = 2,
};
32

33 34
/*
 * Allocation group filestream associations are tracked with per-ag atomic
35
 * counters.  These counters allow xfs_filestream_pick_ag() to tell whether a
36
 * particular AG already has active filestreams associated with it.
37
 */
38
int
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
xfs_filestream_peek_ag(
	xfs_mount_t	*mp,
	xfs_agnumber_t	agno)
{
	struct xfs_perag *pag;
	int		ret;

	pag = xfs_perag_get(mp, agno);
	ret = atomic_read(&pag->pagf_fstrms);
	xfs_perag_put(pag);
	return ret;
}

static int
xfs_filestream_get_ag(
	xfs_mount_t	*mp,
	xfs_agnumber_t	agno)
{
	struct xfs_perag *pag;
	int		ret;

	pag = xfs_perag_get(mp, agno);
	ret = atomic_inc_return(&pag->pagf_fstrms);
	xfs_perag_put(pag);
	return ret;
}

static void
xfs_filestream_put_ag(
	xfs_mount_t	*mp,
	xfs_agnumber_t	agno)
{
	struct xfs_perag *pag;

	pag = xfs_perag_get(mp, agno);
	atomic_dec(&pag->pagf_fstrms);
	xfs_perag_put(pag);
}
77

78 79
static void
xfs_fstrm_free_func(
80
	void			*data,
81 82
	struct xfs_mru_cache_elem *mru)
{
83
	struct xfs_mount	*mp = data;
84 85 86
	struct xfs_fstrm_item	*item =
		container_of(mru, struct xfs_fstrm_item, mru);

87 88
	xfs_filestream_put_ag(mp, item->ag);
	trace_xfs_filestream_free(mp, mru->key, item->ag);
89

90
	kmem_free(item);
91 92
}

93 94 95 96 97
/*
 * Scan the AGs starting at startag looking for an AG that isn't in use and has
 * at least minlen blocks free.
 */
static int
98 99 100 101 102 103
xfs_filestream_pick_ag(
	struct xfs_inode	*ip,
	xfs_agnumber_t		startag,
	xfs_agnumber_t		*agp,
	int			flags,
	xfs_extlen_t		minlen)
104
{
105 106 107
	struct xfs_mount	*mp = ip->i_mount;
	struct xfs_fstrm_item	*item;
	struct xfs_perag	*pag;
108
	xfs_extlen_t		longest, free = 0, minfree, maxfree = 0;
109 110 111
	xfs_agnumber_t		ag, max_ag = NULLAGNUMBER;
	int			err, trylock, nscan;

D
Dave Chinner 已提交
112
	ASSERT(S_ISDIR(VFS_I(ip)->i_mode));
113 114 115 116 117 118 119 120 121 122 123

	/* 2% of an AG's blocks must be free for it to be chosen. */
	minfree = mp->m_sb.sb_agblocks / 50;

	ag = startag;
	*agp = NULLAGNUMBER;

	/* For the first pass, don't sleep trying to init the per-AG. */
	trylock = XFS_ALLOC_FLAG_TRYLOCK;

	for (nscan = 0; 1; nscan++) {
124
		trace_xfs_filestream_scan(mp, ip->i_ino, ag);
125

126
		pag = xfs_perag_get(mp, ag);
127 128 129

		if (!pag->pagf_init) {
			err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
130
			if (err) {
131
				xfs_perag_put(pag);
132 133 134 135
				if (err != -EAGAIN)
					return err;
				/* Couldn't lock the AGF, skip this AG. */
				continue;
136
			}
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
		}

		/* Keep track of the AG with the most free blocks. */
		if (pag->pagf_freeblks > maxfree) {
			maxfree = pag->pagf_freeblks;
			max_ag = ag;
		}

		/*
		 * The AG reference count does two things: it enforces mutual
		 * exclusion when examining the suitability of an AG in this
		 * loop, and it guards against two filestreams being established
		 * in the same AG as each other.
		 */
		if (xfs_filestream_get_ag(mp, ag) > 1) {
			xfs_filestream_put_ag(mp, ag);
			goto next_ag;
		}

156
		longest = xfs_alloc_longest_free_extent(pag,
157 158
				xfs_alloc_min_freelist(mp, pag),
				xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
159 160 161 162 163 164 165
		if (((minlen && longest >= minlen) ||
		     (!minlen && pag->pagf_freeblks >= minfree)) &&
		    (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
		     (flags & XFS_PICK_LOWSPACE))) {

			/* Break out, retaining the reference on the AG. */
			free = pag->pagf_freeblks;
166
			xfs_perag_put(pag);
167 168 169 170 171 172 173
			*agp = ag;
			break;
		}

		/* Drop the reference on this AG, it's not usable. */
		xfs_filestream_put_ag(mp, ag);
next_ag:
174
		xfs_perag_put(pag);
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
		/* Move to the next AG, wrapping to AG 0 if necessary. */
		if (++ag >= mp->m_sb.sb_agcount)
			ag = 0;

		/* If a full pass of the AGs hasn't been done yet, continue. */
		if (ag != startag)
			continue;

		/* Allow sleeping in xfs_alloc_pagf_init() on the 2nd pass. */
		if (trylock != 0) {
			trylock = 0;
			continue;
		}

		/* Finally, if lowspace wasn't set, set it for the 3rd pass. */
		if (!(flags & XFS_PICK_LOWSPACE)) {
			flags |= XFS_PICK_LOWSPACE;
			continue;
		}

		/*
		 * Take the AG with the most free space, regardless of whether
		 * it's already in use by another filestream.
		 */
		if (max_ag != NULLAGNUMBER) {
			xfs_filestream_get_ag(mp, max_ag);
			free = maxfree;
			*agp = max_ag;
			break;
		}

		/* take AG 0 if none matched */
207
		trace_xfs_filestream_pick(ip, *agp, free, nscan);
208 209 210 211
		*agp = 0;
		return 0;
	}

212
	trace_xfs_filestream_pick(ip, *agp, free, nscan);
213

214
	if (*agp == NULLAGNUMBER)
215 216
		return 0;

D
Dave Chinner 已提交
217
	err = -ENOMEM;
218
	item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
219
	if (!item)
220
		goto out_put_ag;
221

222
	item->ag = *agp;
223

224
	err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
225
	if (err) {
D
Dave Chinner 已提交
226
		if (err == -EEXIST)
227 228
			err = 0;
		goto out_free_item;
229 230 231 232
	}

	return 0;

233
out_free_item:
234
	kmem_free(item);
235 236 237
out_put_ag:
	xfs_filestream_put_ag(mp, *agp);
	return err;
238 239
}

240 241 242
static struct xfs_inode *
xfs_filestream_get_parent(
	struct xfs_inode	*ip)
243
{
244 245
	struct inode		*inode = VFS_I(ip), *dir = NULL;
	struct dentry		*dentry, *parent;
246

247 248 249
	dentry = d_find_alias(inode);
	if (!dentry)
		goto out;
250

251 252 253
	parent = dget_parent(dentry);
	if (!parent)
		goto out_dput;
254

255
	dir = igrab(d_inode(parent));
256
	dput(parent);
257

258 259 260 261
out_dput:
	dput(dentry);
out:
	return dir ? XFS_I(dir) : NULL;
262 263 264
}

/*
265 266 267 268
 * Find the right allocation group for a file, either by finding an
 * existing file stream or creating a new one.
 *
 * Returns NULLAGNUMBER in case of an error.
269 270 271
 */
xfs_agnumber_t
xfs_filestream_lookup_ag(
272
	struct xfs_inode	*ip)
273
{
274 275
	struct xfs_mount	*mp = ip->i_mount;
	struct xfs_inode	*pip = NULL;
276
	xfs_agnumber_t		startag, ag = NULLAGNUMBER;
277
	struct xfs_mru_cache_elem *mru;
278

D
Dave Chinner 已提交
279
	ASSERT(S_ISREG(VFS_I(ip)->i_mode));
280

281 282
	pip = xfs_filestream_get_parent(ip);
	if (!pip)
283
		return NULLAGNUMBER;
284 285

	mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
286
	if (mru) {
287
		ag = container_of(mru, struct xfs_fstrm_item, mru)->ag;
288
		xfs_mru_cache_done(mp->m_filestream);
289

290
		trace_xfs_filestream_lookup(mp, ip->i_ino, ag);
291
		goto out;
292 293 294 295 296 297 298
	}

	/*
	 * Set the starting AG using the rotor for inode32, otherwise
	 * use the directory inode's AG.
	 */
	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
299
		xfs_agnumber_t	 rotorstep = xfs_rotorstep;
300 301 302 303 304 305
		startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
		mp->m_agfrotor = (mp->m_agfrotor + 1) %
		                 (mp->m_sb.sb_agcount * rotorstep);
	} else
		startag = XFS_INO_TO_AGNO(mp, pip->i_ino);

306 307 308
	if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0))
		ag = NULLAGNUMBER;
out:
309
	xfs_irele(pip);
310
	return ag;
311 312 313
}

/*
314 315 316 317
 * Pick a new allocation group for the current file and its file stream.
 *
 * This is called when the allocator can't find a suitable extent in the
 * current AG, and we have to move the stream into a new AG with more space.
318 319 320
 */
int
xfs_filestream_new_ag(
D
Dave Chinner 已提交
321 322
	struct xfs_bmalloca	*ap,
	xfs_agnumber_t		*agp)
323
{
324 325 326 327
	struct xfs_inode	*ip = ap->ip, *pip;
	struct xfs_mount	*mp = ip->i_mount;
	xfs_extlen_t		minlen = ap->length;
	xfs_agnumber_t		startag = 0;
328 329
	int			flags = 0;
	int			err = 0;
330
	struct xfs_mru_cache_elem *mru;
331

332
	*agp = NULLAGNUMBER;
333

334 335 336
	pip = xfs_filestream_get_parent(ip);
	if (!pip)
		goto exit;
337

338 339 340 341 342
	mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino);
	if (mru) {
		struct xfs_fstrm_item *item =
			container_of(mru, struct xfs_fstrm_item, mru);
		startag = (item->ag + 1) % mp->m_sb.sb_agcount;
343 344
	}

345
	if (ap->datatype & XFS_ALLOC_USERDATA)
346
		flags |= XFS_PICK_USERDATA;
347
	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
348
		flags |= XFS_PICK_LOWSPACE;
349

350
	err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);
351 352

	/*
353
	 * Only free the item here so we skip over the old AG earlier.
354
	 */
355
	if (mru)
356
		xfs_fstrm_free_func(mp, mru);
357

358
	xfs_irele(pip);
359 360 361 362 363
exit:
	if (*agp == NULLAGNUMBER)
		*agp = 0;
	return err;
}
364

365 366 367 368 369 370
void
xfs_filestream_deassociate(
	struct xfs_inode	*ip)
{
	xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino);
}
371

372 373 374 375
int
xfs_filestream_mount(
	xfs_mount_t	*mp)
{
376
	/*
377 378 379 380 381
	 * The filestream timer tunable is currently fixed within the range of
	 * one second to four minutes, with five seconds being the default.  The
	 * group count is somewhat arbitrary, but it'd be nice to adhere to the
	 * timer tunable to within about 10 percent.  This requires at least 10
	 * groups.
382
	 */
383 384
	return xfs_mru_cache_create(&mp->m_filestream, mp,
			xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func);
385
}
386

387 388 389 390 391 392
void
xfs_filestream_unmount(
	xfs_mount_t	*mp)
{
	xfs_mru_cache_destroy(mp->m_filestream);
}