xfs_filestream.c 8.7 KB
Newer Older
D
Dave Chinner 已提交
1
// SPDX-License-Identifier: GPL-2.0
2 3
/*
 * Copyright (c) 2006-2007 Silicon Graphics, Inc.
4
 * Copyright (c) 2014 Christoph Hellwig.
5 6 7
 * All Rights Reserved.
 */
#include "xfs.h"
8
#include "xfs_shared.h"
9
#include "xfs_format.h"
10 11 12
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
13 14 15 16
#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "xfs_alloc.h"
#include "xfs_mru_cache.h"
C
Christoph Hellwig 已提交
17
#include "xfs_trace.h"
18
#include "xfs_ag.h"
19
#include "xfs_ag_resv.h"
20
#include "xfs_trans.h"
D
Darrick J. Wong 已提交
21
#include "xfs_filestream.h"
22

23 24 25 26 27 28 29 30 31
struct xfs_fstrm_item {
	struct xfs_mru_cache_elem	mru;
	xfs_agnumber_t			ag; /* AG in use for this directory */
};

enum xfs_fstrm_alloc {
	XFS_PICK_USERDATA = 1,
	XFS_PICK_LOWSPACE = 2,
};
32

33 34
/*
 * Allocation group filestream associations are tracked with per-ag atomic
35
 * counters.  These counters allow xfs_filestream_pick_ag() to tell whether a
36
 * particular AG already has active filestreams associated with it.
37
 */
38
int
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
xfs_filestream_peek_ag(
	xfs_mount_t	*mp,
	xfs_agnumber_t	agno)
{
	struct xfs_perag *pag;
	int		ret;

	pag = xfs_perag_get(mp, agno);
	ret = atomic_read(&pag->pagf_fstrms);
	xfs_perag_put(pag);
	return ret;
}

static int
xfs_filestream_get_ag(
	xfs_mount_t	*mp,
	xfs_agnumber_t	agno)
{
	struct xfs_perag *pag;
	int		ret;

	pag = xfs_perag_get(mp, agno);
	ret = atomic_inc_return(&pag->pagf_fstrms);
	xfs_perag_put(pag);
	return ret;
}

static void
xfs_filestream_put_ag(
	xfs_mount_t	*mp,
	xfs_agnumber_t	agno)
{
	struct xfs_perag *pag;

	pag = xfs_perag_get(mp, agno);
	atomic_dec(&pag->pagf_fstrms);
	xfs_perag_put(pag);
}
77

78 79
static void
xfs_fstrm_free_func(
80
	void			*data,
81 82
	struct xfs_mru_cache_elem *mru)
{
83
	struct xfs_mount	*mp = data;
84 85 86
	struct xfs_fstrm_item	*item =
		container_of(mru, struct xfs_fstrm_item, mru);

87 88
	xfs_filestream_put_ag(mp, item->ag);
	trace_xfs_filestream_free(mp, mru->key, item->ag);
89

90
	kmem_free(item);
91 92
}

93 94 95 96 97
/*
 * Scan the AGs starting at startag looking for an AG that isn't in use and has
 * at least minlen blocks free.
 */
static int
98 99 100 101 102 103
xfs_filestream_pick_ag(
	struct xfs_inode	*ip,
	xfs_agnumber_t		startag,
	xfs_agnumber_t		*agp,
	int			flags,
	xfs_extlen_t		minlen)
104
{
105 106 107
	struct xfs_mount	*mp = ip->i_mount;
	struct xfs_fstrm_item	*item;
	struct xfs_perag	*pag;
108
	xfs_extlen_t		longest, free = 0, minfree, maxfree = 0;
109 110 111
	xfs_agnumber_t		ag, max_ag = NULLAGNUMBER;
	int			err, trylock, nscan;

D
Dave Chinner 已提交
112
	ASSERT(S_ISDIR(VFS_I(ip)->i_mode));
113 114 115 116 117 118 119 120 121 122 123

	/* 2% of an AG's blocks must be free for it to be chosen. */
	minfree = mp->m_sb.sb_agblocks / 50;

	ag = startag;
	*agp = NULLAGNUMBER;

	/* For the first pass, don't sleep trying to init the per-AG. */
	trylock = XFS_ALLOC_FLAG_TRYLOCK;

	for (nscan = 0; 1; nscan++) {
124
		trace_xfs_filestream_scan(mp, ip->i_ino, ag);
125

126
		pag = xfs_perag_get(mp, ag);
127 128

		if (!pag->pagf_init) {
D
Dave Chinner 已提交
129
			err = xfs_alloc_read_agf(mp, NULL, ag, trylock, NULL);
130
			if (err) {
131 132
				if (err != -EAGAIN) {
					xfs_perag_put(pag);
133
					return err;
134
				}
135
				/* Couldn't lock the AGF, skip this AG. */
136
				goto next_ag;
137
			}
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
		}

		/* Keep track of the AG with the most free blocks. */
		if (pag->pagf_freeblks > maxfree) {
			maxfree = pag->pagf_freeblks;
			max_ag = ag;
		}

		/*
		 * The AG reference count does two things: it enforces mutual
		 * exclusion when examining the suitability of an AG in this
		 * loop, and it guards against two filestreams being established
		 * in the same AG as each other.
		 */
		if (xfs_filestream_get_ag(mp, ag) > 1) {
			xfs_filestream_put_ag(mp, ag);
			goto next_ag;
		}

157
		longest = xfs_alloc_longest_free_extent(pag,
158 159
				xfs_alloc_min_freelist(mp, pag),
				xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
160 161 162 163 164 165 166
		if (((minlen && longest >= minlen) ||
		     (!minlen && pag->pagf_freeblks >= minfree)) &&
		    (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
		     (flags & XFS_PICK_LOWSPACE))) {

			/* Break out, retaining the reference on the AG. */
			free = pag->pagf_freeblks;
167
			xfs_perag_put(pag);
168 169 170 171 172 173 174
			*agp = ag;
			break;
		}

		/* Drop the reference on this AG, it's not usable. */
		xfs_filestream_put_ag(mp, ag);
next_ag:
175
		xfs_perag_put(pag);
176 177 178 179 180 181 182 183
		/* Move to the next AG, wrapping to AG 0 if necessary. */
		if (++ag >= mp->m_sb.sb_agcount)
			ag = 0;

		/* If a full pass of the AGs hasn't been done yet, continue. */
		if (ag != startag)
			continue;

D
Dave Chinner 已提交
184
		/* Allow sleeping in xfs_alloc_read_agf() on the 2nd pass. */
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
		if (trylock != 0) {
			trylock = 0;
			continue;
		}

		/* Finally, if lowspace wasn't set, set it for the 3rd pass. */
		if (!(flags & XFS_PICK_LOWSPACE)) {
			flags |= XFS_PICK_LOWSPACE;
			continue;
		}

		/*
		 * Take the AG with the most free space, regardless of whether
		 * it's already in use by another filestream.
		 */
		if (max_ag != NULLAGNUMBER) {
			xfs_filestream_get_ag(mp, max_ag);
			free = maxfree;
			*agp = max_ag;
			break;
		}

		/* take AG 0 if none matched */
208
		trace_xfs_filestream_pick(ip, *agp, free, nscan);
209 210 211 212
		*agp = 0;
		return 0;
	}

213
	trace_xfs_filestream_pick(ip, *agp, free, nscan);
214

215
	if (*agp == NULLAGNUMBER)
216 217
		return 0;

D
Dave Chinner 已提交
218
	err = -ENOMEM;
219
	item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
220
	if (!item)
221
		goto out_put_ag;
222

223
	item->ag = *agp;
224

225
	err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
226
	if (err) {
D
Dave Chinner 已提交
227
		if (err == -EEXIST)
228 229
			err = 0;
		goto out_free_item;
230 231 232 233
	}

	return 0;

234
out_free_item:
235
	kmem_free(item);
236 237 238
out_put_ag:
	xfs_filestream_put_ag(mp, *agp);
	return err;
239 240
}

241 242 243
static struct xfs_inode *
xfs_filestream_get_parent(
	struct xfs_inode	*ip)
244
{
245 246
	struct inode		*inode = VFS_I(ip), *dir = NULL;
	struct dentry		*dentry, *parent;
247

248 249 250
	dentry = d_find_alias(inode);
	if (!dentry)
		goto out;
251

252 253 254
	parent = dget_parent(dentry);
	if (!parent)
		goto out_dput;
255

256
	dir = igrab(d_inode(parent));
257
	dput(parent);
258

259 260 261 262
out_dput:
	dput(dentry);
out:
	return dir ? XFS_I(dir) : NULL;
263 264 265
}

/*
266 267 268 269
 * Find the right allocation group for a file, either by finding an
 * existing file stream or creating a new one.
 *
 * Returns NULLAGNUMBER in case of an error.
270 271 272
 */
xfs_agnumber_t
xfs_filestream_lookup_ag(
273
	struct xfs_inode	*ip)
274
{
275 276
	struct xfs_mount	*mp = ip->i_mount;
	struct xfs_inode	*pip = NULL;
277
	xfs_agnumber_t		startag, ag = NULLAGNUMBER;
278
	struct xfs_mru_cache_elem *mru;
279

D
Dave Chinner 已提交
280
	ASSERT(S_ISREG(VFS_I(ip)->i_mode));
281

282 283
	pip = xfs_filestream_get_parent(ip);
	if (!pip)
284
		return NULLAGNUMBER;
285 286

	mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
287
	if (mru) {
288
		ag = container_of(mru, struct xfs_fstrm_item, mru)->ag;
289
		xfs_mru_cache_done(mp->m_filestream);
290

291
		trace_xfs_filestream_lookup(mp, ip->i_ino, ag);
292
		goto out;
293 294 295 296 297 298
	}

	/*
	 * Set the starting AG using the rotor for inode32, otherwise
	 * use the directory inode's AG.
	 */
299
	if (xfs_is_inode32(mp)) {
300
		xfs_agnumber_t	 rotorstep = xfs_rotorstep;
301 302 303 304 305 306
		startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
		mp->m_agfrotor = (mp->m_agfrotor + 1) %
		                 (mp->m_sb.sb_agcount * rotorstep);
	} else
		startag = XFS_INO_TO_AGNO(mp, pip->i_ino);

307 308 309
	if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0))
		ag = NULLAGNUMBER;
out:
310
	xfs_irele(pip);
311
	return ag;
312 313 314
}

/*
315 316 317 318
 * Pick a new allocation group for the current file and its file stream.
 *
 * This is called when the allocator can't find a suitable extent in the
 * current AG, and we have to move the stream into a new AG with more space.
319 320 321
 */
int
xfs_filestream_new_ag(
D
Dave Chinner 已提交
322 323
	struct xfs_bmalloca	*ap,
	xfs_agnumber_t		*agp)
324
{
325 326 327 328
	struct xfs_inode	*ip = ap->ip, *pip;
	struct xfs_mount	*mp = ip->i_mount;
	xfs_extlen_t		minlen = ap->length;
	xfs_agnumber_t		startag = 0;
329 330
	int			flags = 0;
	int			err = 0;
331
	struct xfs_mru_cache_elem *mru;
332

333
	*agp = NULLAGNUMBER;
334

335 336 337
	pip = xfs_filestream_get_parent(ip);
	if (!pip)
		goto exit;
338

339 340 341 342 343
	mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino);
	if (mru) {
		struct xfs_fstrm_item *item =
			container_of(mru, struct xfs_fstrm_item, mru);
		startag = (item->ag + 1) % mp->m_sb.sb_agcount;
344 345
	}

346
	if (ap->datatype & XFS_ALLOC_USERDATA)
347
		flags |= XFS_PICK_USERDATA;
348
	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
349
		flags |= XFS_PICK_LOWSPACE;
350

351
	err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);
352 353

	/*
354
	 * Only free the item here so we skip over the old AG earlier.
355
	 */
356
	if (mru)
357
		xfs_fstrm_free_func(mp, mru);
358

359
	xfs_irele(pip);
360 361 362 363 364
exit:
	if (*agp == NULLAGNUMBER)
		*agp = 0;
	return err;
}
365

366 367 368 369 370 371
void
xfs_filestream_deassociate(
	struct xfs_inode	*ip)
{
	xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino);
}
372

373 374 375 376
int
xfs_filestream_mount(
	xfs_mount_t	*mp)
{
377
	/*
378 379 380 381 382
	 * The filestream timer tunable is currently fixed within the range of
	 * one second to four minutes, with five seconds being the default.  The
	 * group count is somewhat arbitrary, but it'd be nice to adhere to the
	 * timer tunable to within about 10 percent.  This requires at least 10
	 * groups.
383
	 */
384 385
	return xfs_mru_cache_create(&mp->m_filestream, mp,
			xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func);
386
}
387

388 389 390 391 392 393
void
xfs_filestream_unmount(
	xfs_mount_t	*mp)
{
	xfs_mru_cache_destroy(mp->m_filestream);
}