xfs_dir2_leaf.c 55.3 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2 3
 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
 * All Rights Reserved.
L
Linus Torvalds 已提交
4
 *
5 6
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
L
Linus Torvalds 已提交
7 8
 * published by the Free Software Foundation.
 *
9 10 11 12
 * This program is distributed in the hope that it would be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
L
Linus Torvalds 已提交
13
 *
14 15 16
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
L
Linus Torvalds 已提交
17 18
 */
#include "xfs.h"
19
#include "xfs_fs.h"
L
Linus Torvalds 已提交
20
#include "xfs_types.h"
21
#include "xfs_bit.h"
L
Linus Torvalds 已提交
22 23 24 25 26
#include "xfs_log.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
27
#include "xfs_da_btree.h"
L
Linus Torvalds 已提交
28 29 30 31
#include "xfs_bmap_btree.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
C
Christoph Hellwig 已提交
32 33
#include "xfs_dir2_format.h"
#include "xfs_dir2_priv.h"
L
Linus Torvalds 已提交
34
#include "xfs_error.h"
C
Christoph Hellwig 已提交
35
#include "xfs_trace.h"
L
Linus Torvalds 已提交
36 37 38 39 40

/*
 * Local function declarations.
 */
#ifdef DEBUG
41
static void xfs_dir2_leaf_check(struct xfs_inode *dp, struct xfs_buf *bp);
L
Linus Torvalds 已提交
42 43 44
#else
#define	xfs_dir2_leaf_check(dp, bp)
#endif
45 46 47
static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
				    int *indexp, struct xfs_buf **dbpp);
static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp,
48
				    int first, int last);
49
static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp);
50

D
Dave Chinner 已提交
51 52 53 54 55 56 57 58 59 60 61 62 63 64
static void
xfs_dir2_leaf_verify(
	struct xfs_buf		*bp,
	__be16			magic)
{
	struct xfs_mount	*mp = bp->b_target->bt_mount;
	struct xfs_dir2_leaf_hdr *hdr = bp->b_addr;
	int			block_ok = 0;

	block_ok = hdr->info.magic == magic;
	if (!block_ok) {
		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
		xfs_buf_ioerror(bp, EFSCORRUPTED);
	}
65
}
D
Dave Chinner 已提交
66

67
static void
68
xfs_dir2_leaf1_read_verify(
69 70 71 72 73 74
	struct xfs_buf	*bp)
{
	xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
}

static void
75
xfs_dir2_leaf1_write_verify(
76 77 78
	struct xfs_buf	*bp)
{
	xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
D
Dave Chinner 已提交
79 80
}

81
void
82
xfs_dir2_leafn_read_verify(
83
	struct xfs_buf	*bp)
D
Dave Chinner 已提交
84
{
85
	xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
D
Dave Chinner 已提交
86 87
}

D
Dave Chinner 已提交
88
void
89
xfs_dir2_leafn_write_verify(
90
	struct xfs_buf	*bp)
D
Dave Chinner 已提交
91 92 93 94
{
	xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
}

95 96 97 98 99 100 101 102 103 104
static const struct xfs_buf_ops xfs_dir2_leaf1_buf_ops = {
	.verify_read = xfs_dir2_leaf1_read_verify,
	.verify_write = xfs_dir2_leaf1_write_verify,
};

const struct xfs_buf_ops xfs_dir2_leafn_buf_ops = {
	.verify_read = xfs_dir2_leafn_read_verify,
	.verify_write = xfs_dir2_leafn_write_verify,
};

D
Dave Chinner 已提交
105 106 107 108 109 110 111 112 113
static int
xfs_dir2_leaf_read(
	struct xfs_trans	*tp,
	struct xfs_inode	*dp,
	xfs_dablk_t		fbno,
	xfs_daddr_t		mappedbno,
	struct xfs_buf		**bpp)
{
	return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
114
				XFS_DATA_FORK, &xfs_dir2_leaf1_buf_ops);
D
Dave Chinner 已提交
115 116 117 118 119 120 121 122 123 124 125
}

int
xfs_dir2_leafn_read(
	struct xfs_trans	*tp,
	struct xfs_inode	*dp,
	xfs_dablk_t		fbno,
	xfs_daddr_t		mappedbno,
	struct xfs_buf		**bpp)
{
	return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
126
				XFS_DATA_FORK, &xfs_dir2_leafn_buf_ops);
D
Dave Chinner 已提交
127
}
L
Linus Torvalds 已提交
128 129 130 131 132 133 134

/*
 * Convert a block form directory to a leaf form directory.
 */
int						/* error */
xfs_dir2_block_to_leaf(
	xfs_da_args_t		*args,		/* operation arguments */
135
	struct xfs_buf		*dbp)		/* input block's buffer */
L
Linus Torvalds 已提交
136
{
137
	__be16			*bestsp;	/* leaf's bestsp entries */
L
Linus Torvalds 已提交
138
	xfs_dablk_t		blkno;		/* leaf block's bno */
139
	xfs_dir2_data_hdr_t	*hdr;		/* block header */
L
Linus Torvalds 已提交
140 141 142 143
	xfs_dir2_leaf_entry_t	*blp;		/* block's leaf entries */
	xfs_dir2_block_tail_t	*btp;		/* block's tail */
	xfs_inode_t		*dp;		/* incore directory inode */
	int			error;		/* error return code */
144
	struct xfs_buf		*lbp;		/* leaf block's buffer */
L
Linus Torvalds 已提交
145 146 147 148 149 150 151
	xfs_dir2_db_t		ldb;		/* leaf block's bno */
	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
	xfs_dir2_leaf_tail_t	*ltp;		/* leaf's tail */
	xfs_mount_t		*mp;		/* filesystem mount point */
	int			needlog;	/* need to log block header */
	int			needscan;	/* need to rescan bestfree */
	xfs_trans_t		*tp;		/* transaction pointer */
152
	struct xfs_dir2_data_free *bf;
L
Linus Torvalds 已提交
153

C
Christoph Hellwig 已提交
154 155
	trace_xfs_dir2_block_to_leaf(args);

L
Linus Torvalds 已提交
156 157 158 159 160 161 162 163 164 165 166
	dp = args->dp;
	mp = dp->i_mount;
	tp = args->trans;
	/*
	 * Add the leaf block to the inode.
	 * This interface will only put blocks in the leaf/node range.
	 * Since that's empty now, we'll get the root (block 0 in range).
	 */
	if ((error = xfs_da_grow_inode(args, &blkno))) {
		return error;
	}
167
	ldb = xfs_dir2_da_to_db(mp, blkno);
L
Linus Torvalds 已提交
168 169 170 171 172 173 174 175
	ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
	/*
	 * Initialize the leaf block, get a buffer for it.
	 */
	if ((error = xfs_dir2_leaf_init(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC))) {
		return error;
	}
	ASSERT(lbp != NULL);
176 177
	leaf = lbp->b_addr;
	hdr = dbp->b_addr;
178
	xfs_dir3_data_check(dp, dbp);
179
	btp = xfs_dir2_block_tail_p(mp, hdr);
180
	blp = xfs_dir2_block_leaf_p(btp);
181
	bf = xfs_dir3_data_bestfree_p(hdr);
L
Linus Torvalds 已提交
182 183 184
	/*
	 * Set the counts in the leaf header.
	 */
185 186
	leaf->hdr.count = cpu_to_be16(be32_to_cpu(btp->count));
	leaf->hdr.stale = cpu_to_be16(be32_to_cpu(btp->stale));
L
Linus Torvalds 已提交
187 188 189 190
	/*
	 * Could compact these but I think we always do the conversion
	 * after squeezing out stale entries.
	 */
191
	memcpy(leaf->ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
192
	xfs_dir2_leaf_log_ents(tp, lbp, 0, be16_to_cpu(leaf->hdr.count) - 1);
L
Linus Torvalds 已提交
193 194 195 196 197 198 199
	needscan = 0;
	needlog = 1;
	/*
	 * Make the space formerly occupied by the leaf entries and block
	 * tail be free.
	 */
	xfs_dir2_data_make_free(tp, dbp,
200 201
		(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
		(xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize -
L
Linus Torvalds 已提交
202 203 204 205 206
				       (char *)blp),
		&needlog, &needscan);
	/*
	 * Fix up the block header, make it a data block.
	 */
207 208 209 210 211 212
	dbp->b_ops = &xfs_dir3_data_buf_ops;
	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
		hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
	else
		hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);

L
Linus Torvalds 已提交
213
	if (needscan)
214
		xfs_dir2_data_freescan(mp, hdr, &needlog);
L
Linus Torvalds 已提交
215 216 217
	/*
	 * Set up leaf tail and bests table.
	 */
218
	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
219
	ltp->bestcount = cpu_to_be32(1);
220
	bestsp = xfs_dir2_leaf_bests_p(ltp);
221
	bestsp[0] =  bf[0].length;
L
Linus Torvalds 已提交
222 223 224 225 226 227
	/*
	 * Log the data header and leaf bests table.
	 */
	if (needlog)
		xfs_dir2_data_log_header(tp, dbp);
	xfs_dir2_leaf_check(dp, lbp);
228
	xfs_dir3_data_check(dp, dbp);
L
Linus Torvalds 已提交
229 230 231 232
	xfs_dir2_leaf_log_bests(tp, lbp, 0, 0);
	return 0;
}

233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
STATIC void
xfs_dir2_leaf_find_stale(
	struct xfs_dir2_leaf	*leaf,
	int			index,
	int			*lowstale,
	int			*highstale)
{
	/*
	 * Find the first stale entry before our index, if any.
	 */
	for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
		if (leaf->ents[*lowstale].address ==
		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
			break;
	}

	/*
	 * Find the first stale entry at or after our index, if any.
	 * Stop if the result would require moving more entries than using
	 * lowstale.
	 */
	for (*highstale = index;
	     *highstale < be16_to_cpu(leaf->hdr.count);
	     ++*highstale) {
		if (leaf->ents[*highstale].address ==
		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
			break;
		if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
			break;
	}
}

265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
struct xfs_dir2_leaf_entry *
xfs_dir2_leaf_find_entry(
	xfs_dir2_leaf_t		*leaf,		/* leaf structure */
	int			index,		/* leaf table position */
	int			compact,	/* need to compact leaves */
	int			lowstale,	/* index of prev stale leaf */
	int			highstale,	/* index of next stale leaf */
	int			*lfloglow,	/* low leaf logging index */
	int			*lfloghigh)	/* high leaf logging index */
{
	if (!leaf->hdr.stale) {
		xfs_dir2_leaf_entry_t	*lep;	/* leaf entry table pointer */

		/*
		 * Now we need to make room to insert the leaf entry.
		 *
		 * If there are no stale entries, just insert a hole at index.
		 */
		lep = &leaf->ents[index];
		if (index < be16_to_cpu(leaf->hdr.count))
			memmove(lep + 1, lep,
				(be16_to_cpu(leaf->hdr.count) - index) *
				 sizeof(*lep));

		/*
		 * Record low and high logging indices for the leaf.
		 */
		*lfloglow = index;
		*lfloghigh = be16_to_cpu(leaf->hdr.count);
		be16_add_cpu(&leaf->hdr.count, 1);
		return lep;
	}

	/*
	 * There are stale entries.
	 *
	 * We will use one of them for the new entry.  It's probably not at
	 * the right location, so we'll have to shift some up or down first.
	 *
	 * If we didn't compact before, we need to find the nearest stale
	 * entries before and after our insertion point.
	 */
307 308
	if (compact == 0)
		xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
309 310 311 312 313 314 315 316

	/*
	 * If the low one is better, use it.
	 */
	if (lowstale >= 0 &&
	    (highstale == be16_to_cpu(leaf->hdr.count) ||
	     index - lowstale - 1 < highstale - index)) {
		ASSERT(index - lowstale - 1 >= 0);
317 318
		ASSERT(leaf->ents[lowstale].address ==
		       cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339

		/*
		 * Copy entries up to cover the stale entry and make room
		 * for the new entry.
		 */
		if (index - lowstale - 1 > 0) {
			memmove(&leaf->ents[lowstale],
				&leaf->ents[lowstale + 1],
				(index - lowstale - 1) *
				sizeof(xfs_dir2_leaf_entry_t));
		}
		*lfloglow = MIN(lowstale, *lfloglow);
		*lfloghigh = MAX(index - 1, *lfloghigh);
		be16_add_cpu(&leaf->hdr.stale, -1);
		return &leaf->ents[index - 1];
	}

	/*
	 * The high one is better, so use that one.
	 */
	ASSERT(highstale - index >= 0);
340 341
	ASSERT(leaf->ents[highstale].address ==
	       cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357

	/*
	 * Copy entries down to cover the stale entry and make room for the
	 * new entry.
	 */
	if (highstale - index > 0) {
		memmove(&leaf->ents[index + 1],
			&leaf->ents[index],
			(highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
	}
	*lfloglow = MIN(index, *lfloglow);
	*lfloghigh = MAX(highstale, *lfloghigh);
	be16_add_cpu(&leaf->hdr.stale, -1);
	return &leaf->ents[index];
}

L
Linus Torvalds 已提交
358 359 360 361 362 363 364
/*
 * Add an entry to a leaf form directory.
 */
int						/* error */
xfs_dir2_leaf_addname(
	xfs_da_args_t		*args)		/* operation arguments */
{
365
	__be16			*bestsp;	/* freespace table in leaf */
L
Linus Torvalds 已提交
366
	int			compact;	/* need to compact leaves */
367
	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
368
	struct xfs_buf		*dbp;		/* data block buffer */
L
Linus Torvalds 已提交
369 370 371 372 373 374 375 376
	xfs_dir2_data_entry_t	*dep;		/* data block entry */
	xfs_inode_t		*dp;		/* incore directory inode */
	xfs_dir2_data_unused_t	*dup;		/* data unused entry */
	int			error;		/* error return value */
	int			grown;		/* allocated new data block */
	int			highstale;	/* index of next stale leaf */
	int			i;		/* temporary, index */
	int			index;		/* leaf table position */
377
	struct xfs_buf		*lbp;		/* leaf's buffer */
L
Linus Torvalds 已提交
378 379 380 381 382 383 384 385 386 387 388
	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
	int			length;		/* length of new entry */
	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry table pointer */
	int			lfloglow;	/* low leaf logging index */
	int			lfloghigh;	/* high leaf logging index */
	int			lowstale;	/* index of prev stale leaf */
	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail pointer */
	xfs_mount_t		*mp;		/* filesystem mount point */
	int			needbytes;	/* leaf block bytes needed */
	int			needlog;	/* need to log data header */
	int			needscan;	/* need to rescan data free */
389
	__be16			*tagp;		/* end of data entry */
L
Linus Torvalds 已提交
390 391
	xfs_trans_t		*tp;		/* transaction pointer */
	xfs_dir2_db_t		use_block;	/* data block number */
392
	struct xfs_dir2_data_free *bf;		/* bestfree table */
L
Linus Torvalds 已提交
393

C
Christoph Hellwig 已提交
394 395
	trace_xfs_dir2_leaf_addname(args);

L
Linus Torvalds 已提交
396 397 398
	dp = args->dp;
	tp = args->trans;
	mp = dp->i_mount;
D
Dave Chinner 已提交
399 400

	error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
401
	if (error)
L
Linus Torvalds 已提交
402
		return error;
D
Dave Chinner 已提交
403

L
Linus Torvalds 已提交
404 405 406 407 408 409 410
	/*
	 * Look up the entry by hash value and name.
	 * We know it's not there, our caller has already done a lookup.
	 * So the index is of the entry to insert in front of.
	 * But if there are dup hash values the index is of the first of those.
	 */
	index = xfs_dir2_leaf_search_hash(args, lbp);
411
	leaf = lbp->b_addr;
412 413 414
	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
	bestsp = xfs_dir2_leaf_bests_p(ltp);
	length = xfs_dir2_data_entsize(args->namelen);
L
Linus Torvalds 已提交
415 416 417 418 419 420 421
	/*
	 * See if there are any entries with the same hash value
	 * and space in their block for the new entry.
	 * This is good because it puts multiple same-hash value entries
	 * in a data block, improving the lookup of those entries.
	 */
	for (use_block = -1, lep = &leaf->ents[index];
422
	     index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
L
Linus Torvalds 已提交
423
	     index++, lep++) {
424
		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
L
Linus Torvalds 已提交
425
			continue;
426
		i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
427
		ASSERT(i < be32_to_cpu(ltp->bestcount));
428
		ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
429
		if (be16_to_cpu(bestsp[i]) >= length) {
L
Linus Torvalds 已提交
430 431 432 433 434 435 436 437
			use_block = i;
			break;
		}
	}
	/*
	 * Didn't find a block yet, linear search all the data blocks.
	 */
	if (use_block == -1) {
438
		for (i = 0; i < be32_to_cpu(ltp->bestcount); i++) {
L
Linus Torvalds 已提交
439 440 441
			/*
			 * Remember a block we see that's missing.
			 */
442 443
			if (bestsp[i] == cpu_to_be16(NULLDATAOFF) &&
			    use_block == -1)
L
Linus Torvalds 已提交
444
				use_block = i;
445
			else if (be16_to_cpu(bestsp[i]) >= length) {
L
Linus Torvalds 已提交
446 447 448 449 450 451 452 453
				use_block = i;
				break;
			}
		}
	}
	/*
	 * How many bytes do we need in the leaf block?
	 */
454 455 456 457 458 459
	needbytes = 0;
	if (!leaf->hdr.stale)
		needbytes += sizeof(xfs_dir2_leaf_entry_t);
	if (use_block == -1)
		needbytes += sizeof(xfs_dir2_data_off_t);

L
Linus Torvalds 已提交
460 461 462 463
	/*
	 * Now kill use_block if it refers to a missing block, so we
	 * can use it as an indication of allocation needed.
	 */
464
	if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF))
L
Linus Torvalds 已提交
465 466 467 468 469
		use_block = -1;
	/*
	 * If we don't have enough free bytes but we can make enough
	 * by compacting out stale entries, we'll do that.
	 */
470 471
	if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
				needbytes && be16_to_cpu(leaf->hdr.stale) > 1) {
L
Linus Torvalds 已提交
472 473 474 475 476 477
		compact = 1;
	}
	/*
	 * Otherwise if we don't have enough free bytes we need to
	 * convert to node form.
	 */
478 479
	else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(
						leaf->hdr.count)] < needbytes) {
L
Linus Torvalds 已提交
480 481 482
		/*
		 * Just checking or no space reservation, give up.
		 */
483 484
		if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
							args->total == 0) {
485
			xfs_trans_brelse(tp, lbp);
L
Linus Torvalds 已提交
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
			return XFS_ERROR(ENOSPC);
		}
		/*
		 * Convert to node form.
		 */
		error = xfs_dir2_leaf_to_node(args, lbp);
		if (error)
			return error;
		/*
		 * Then add the new entry.
		 */
		return xfs_dir2_node_addname(args);
	}
	/*
	 * Otherwise it will fit without compaction.
	 */
	else
		compact = 0;
	/*
	 * If just checking, then it will fit unless we needed to allocate
	 * a new data block.
	 */
508
	if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
509
		xfs_trans_brelse(tp, lbp);
L
Linus Torvalds 已提交
510 511 512 513 514 515 516
		return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
	}
	/*
	 * If no allocations are allowed, return now before we've
	 * changed anything.
	 */
	if (args->total == 0 && use_block == -1) {
517
		xfs_trans_brelse(tp, lbp);
L
Linus Torvalds 已提交
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
		return XFS_ERROR(ENOSPC);
	}
	/*
	 * Need to compact the leaf entries, removing stale ones.
	 * Leave one stale entry behind - the one closest to our
	 * insertion index - and we'll shift that one to our insertion
	 * point later.
	 */
	if (compact) {
		xfs_dir2_leaf_compact_x1(lbp, &index, &lowstale, &highstale,
			&lfloglow, &lfloghigh);
	}
	/*
	 * There are stale entries, so we'll need log-low and log-high
	 * impossibly bad values later.
	 */
534 535
	else if (be16_to_cpu(leaf->hdr.stale)) {
		lfloglow = be16_to_cpu(leaf->hdr.count);
L
Linus Torvalds 已提交
536 537 538 539 540 541 542 543 544 545 546 547
		lfloghigh = -1;
	}
	/*
	 * If there was no data block space found, we need to allocate
	 * a new one.
	 */
	if (use_block == -1) {
		/*
		 * Add the new data block.
		 */
		if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
				&use_block))) {
548
			xfs_trans_brelse(tp, lbp);
L
Linus Torvalds 已提交
549 550 551 552 553
			return error;
		}
		/*
		 * Initialize the block.
		 */
554
		if ((error = xfs_dir3_data_init(args, use_block, &dbp))) {
555
			xfs_trans_brelse(tp, lbp);
L
Linus Torvalds 已提交
556 557 558 559 560 561
			return error;
		}
		/*
		 * If we're adding a new data block on the end we need to
		 * extend the bests table.  Copy it up one entry.
		 */
562
		if (use_block >= be32_to_cpu(ltp->bestcount)) {
L
Linus Torvalds 已提交
563 564
			bestsp--;
			memmove(&bestsp[0], &bestsp[1],
565
				be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
566
			be32_add_cpu(&ltp->bestcount, 1);
L
Linus Torvalds 已提交
567
			xfs_dir2_leaf_log_tail(tp, lbp);
568
			xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
L
Linus Torvalds 已提交
569 570 571 572 573 574
		}
		/*
		 * If we're filling in a previously empty block just log it.
		 */
		else
			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
575
		hdr = dbp->b_addr;
576 577
		bf = xfs_dir3_data_bestfree_p(hdr);
		bestsp[use_block] = bf[0].length;
L
Linus Torvalds 已提交
578
		grown = 1;
579 580 581 582 583
	} else {
		/*
		 * Already had space in some data block.
		 * Just read that one in.
		 */
584
		error = xfs_dir3_data_read(tp, dp,
585 586
					   xfs_dir2_db_to_da(mp, use_block),
					   -1, &dbp);
587
		if (error) {
588
			xfs_trans_brelse(tp, lbp);
L
Linus Torvalds 已提交
589 590
			return error;
		}
591
		hdr = dbp->b_addr;
592
		bf = xfs_dir3_data_bestfree_p(hdr);
L
Linus Torvalds 已提交
593 594 595 596 597 598
		grown = 0;
	}
	/*
	 * Point to the biggest freespace in our data block.
	 */
	dup = (xfs_dir2_data_unused_t *)
599
	      ((char *)hdr + be16_to_cpu(bf[0].offset));
600
	ASSERT(be16_to_cpu(dup->length) >= length);
L
Linus Torvalds 已提交
601 602 603 604 605
	needscan = needlog = 0;
	/*
	 * Mark the initial part of our freespace in use for the new entry.
	 */
	xfs_dir2_data_use_free(tp, dbp, dup,
606
		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
L
Linus Torvalds 已提交
607 608 609 610 611
		&needlog, &needscan);
	/*
	 * Initialize our new entry (at last).
	 */
	dep = (xfs_dir2_data_entry_t *)dup;
612
	dep->inumber = cpu_to_be64(args->inumber);
L
Linus Torvalds 已提交
613 614
	dep->namelen = args->namelen;
	memcpy(dep->name, args->name, dep->namelen);
615
	tagp = xfs_dir2_data_entry_tag_p(dep);
616
	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
L
Linus Torvalds 已提交
617 618 619 620
	/*
	 * Need to scan fix up the bestfree table.
	 */
	if (needscan)
621
		xfs_dir2_data_freescan(mp, hdr, &needlog);
L
Linus Torvalds 已提交
622 623 624 625 626 627 628 629 630 631
	/*
	 * Need to log the data block's header.
	 */
	if (needlog)
		xfs_dir2_data_log_header(tp, dbp);
	xfs_dir2_data_log_entry(tp, dbp, dep);
	/*
	 * If the bests table needs to be changed, do it.
	 * Log the change unless we've already done that.
	 */
632 633
	if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) {
		bestsp[use_block] = bf[0].length;
L
Linus Torvalds 已提交
634 635 636
		if (!grown)
			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
	}
637 638 639 640

	lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
				       highstale, &lfloglow, &lfloghigh);

L
Linus Torvalds 已提交
641 642 643
	/*
	 * Fill in the new leaf entry.
	 */
644
	lep->hashval = cpu_to_be32(args->hashval);
645
	lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block,
646
				be16_to_cpu(*tagp)));
L
Linus Torvalds 已提交
647 648 649 650 651 652
	/*
	 * Log the leaf fields and give up the buffers.
	 */
	xfs_dir2_leaf_log_header(tp, lbp);
	xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh);
	xfs_dir2_leaf_check(dp, lbp);
653
	xfs_dir3_data_check(dp, dbp);
L
Linus Torvalds 已提交
654 655 656 657 658 659 660 661
	return 0;
}

#ifdef DEBUG
/*
 * Check the internal consistency of a leaf1 block.
 * Pop an assert if something is wrong.
 */
H
Hannes Eder 已提交
662
STATIC void
L
Linus Torvalds 已提交
663
xfs_dir2_leaf_check(
664 665
	struct xfs_inode	*dp,		/* incore directory inode */
	struct xfs_buf		*bp)		/* leaf's buffer */
L
Linus Torvalds 已提交
666 667 668 669 670 671 672
{
	int			i;		/* leaf index */
	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail pointer */
	xfs_mount_t		*mp;		/* filesystem mount point */
	int			stale;		/* count of stale leaves */

673
	leaf = bp->b_addr;
L
Linus Torvalds 已提交
674
	mp = dp->i_mount;
675
	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
L
Linus Torvalds 已提交
676 677 678 679 680
	/*
	 * This value is not restrictive enough.
	 * Should factor in the size of the bests table as well.
	 * We can deduce a value for that from di_size.
	 */
681 682
	ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
L
Linus Torvalds 已提交
683 684 685
	/*
	 * Leaves and bests don't overlap.
	 */
686
	ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <=
687
	       (char *)xfs_dir2_leaf_bests_p(ltp));
L
Linus Torvalds 已提交
688 689 690
	/*
	 * Check hash value order, count stale entries.
	 */
691 692
	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
		if (i + 1 < be16_to_cpu(leaf->hdr.count))
693 694
			ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
			       be32_to_cpu(leaf->ents[i + 1].hashval));
695
		if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
L
Linus Torvalds 已提交
696 697
			stale++;
	}
698
	ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
L
Linus Torvalds 已提交
699 700 701 702 703 704 705 706 707 708
}
#endif	/* DEBUG */

/*
 * Compact out any stale entries in the leaf.
 * Log the header and changed leaf entries, if any.
 */
void
xfs_dir2_leaf_compact(
	xfs_da_args_t	*args,		/* operation arguments */
709
	struct xfs_buf	*bp)		/* leaf buffer */
L
Linus Torvalds 已提交
710 711 712 713 714 715
{
	int		from;		/* source leaf index */
	xfs_dir2_leaf_t	*leaf;		/* leaf structure */
	int		loglow;		/* first leaf entry to log */
	int		to;		/* target leaf index */

716
	leaf = bp->b_addr;
L
Linus Torvalds 已提交
717 718 719 720 721 722
	if (!leaf->hdr.stale) {
		return;
	}
	/*
	 * Compress out the stale entries in place.
	 */
723
	for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
724 725
		if (leaf->ents[from].address ==
		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
L
Linus Torvalds 已提交
726 727 728 729 730 731 732 733 734 735 736 737 738 739
			continue;
		/*
		 * Only actually copy the entries that are different.
		 */
		if (from > to) {
			if (loglow == -1)
				loglow = to;
			leaf->ents[to] = leaf->ents[from];
		}
		to++;
	}
	/*
	 * Update and log the header, log the leaf entries.
	 */
740
	ASSERT(be16_to_cpu(leaf->hdr.stale) == from - to);
741
	be16_add_cpu(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale)));
L
Linus Torvalds 已提交
742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757
	leaf->hdr.stale = 0;
	xfs_dir2_leaf_log_header(args->trans, bp);
	if (loglow != -1)
		xfs_dir2_leaf_log_ents(args->trans, bp, loglow, to - 1);
}

/*
 * Compact the leaf entries, removing stale ones.
 * Leave one stale entry behind - the one closest to our
 * insertion index - and the caller will shift that one to our insertion
 * point later.
 * Return new insertion index, where the remaining stale entry is,
 * and leaf logging indices.
 */
void
xfs_dir2_leaf_compact_x1(
758
	struct xfs_buf	*bp,		/* leaf buffer */
L
Linus Torvalds 已提交
759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
	int		*indexp,	/* insertion index */
	int		*lowstalep,	/* out: stale entry before us */
	int		*highstalep,	/* out: stale entry after us */
	int		*lowlogp,	/* out: low log index */
	int		*highlogp)	/* out: high log index */
{
	int		from;		/* source copy index */
	int		highstale;	/* stale entry at/after index */
	int		index;		/* insertion index */
	int		keepstale;	/* source index of kept stale */
	xfs_dir2_leaf_t	*leaf;		/* leaf structure */
	int		lowstale;	/* stale entry before index */
	int		newindex=0;	/* new insertion index */
	int		to;		/* destination copy index */

774
	leaf = bp->b_addr;
775
	ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
L
Linus Torvalds 已提交
776
	index = *indexp;
777 778 779

	xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);

L
Linus Torvalds 已提交
780 781 782 783
	/*
	 * Pick the better of lowstale and highstale.
	 */
	if (lowstale >= 0 &&
784
	    (highstale == be16_to_cpu(leaf->hdr.count) ||
L
Linus Torvalds 已提交
785 786 787 788 789 790 791 792
	     index - lowstale <= highstale - index))
		keepstale = lowstale;
	else
		keepstale = highstale;
	/*
	 * Copy the entries in place, removing all the stale entries
	 * except keepstale.
	 */
793
	for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
L
Linus Torvalds 已提交
794 795 796 797 798 799
		/*
		 * Notice the new value of index.
		 */
		if (index == from)
			newindex = to;
		if (from != keepstale &&
800 801
		    leaf->ents[from].address ==
		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
L
Linus Torvalds 已提交
802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828
			if (from == to)
				*lowlogp = to;
			continue;
		}
		/*
		 * Record the new keepstale value for the insertion.
		 */
		if (from == keepstale)
			lowstale = highstale = to;
		/*
		 * Copy only the entries that have moved.
		 */
		if (from > to)
			leaf->ents[to] = leaf->ents[from];
		to++;
	}
	ASSERT(from > to);
	/*
	 * If the insertion point was past the last entry,
	 * set the new insertion point accordingly.
	 */
	if (index == from)
		newindex = to;
	*indexp = newindex;
	/*
	 * Adjust the leaf header values.
	 */
829
	be16_add_cpu(&leaf->hdr.count, -(from - to));
830
	leaf->hdr.stale = cpu_to_be16(1);
L
Linus Torvalds 已提交
831 832 833 834 835 836 837
	/*
	 * Remember the low/high stale value only in the "right"
	 * direction.
	 */
	if (lowstale >= newindex)
		lowstale = -1;
	else
838 839
		highstale = be16_to_cpu(leaf->hdr.count);
	*highlogp = be16_to_cpu(leaf->hdr.count) - 1;
L
Linus Torvalds 已提交
840 841 842 843
	*lowstalep = lowstale;
	*highstalep = highstale;
}

844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976
struct xfs_dir2_leaf_map_info {
	xfs_extlen_t	map_blocks;	/* number of fsbs in map */
	xfs_dablk_t	map_off;	/* last mapped file offset */
	int		map_size;	/* total entries in *map */
	int		map_valid;	/* valid entries in *map */
	int		nmap;		/* mappings to ask xfs_bmapi */
	xfs_dir2_db_t	curdb;		/* db for current block */
	int		ra_current;	/* number of read-ahead blks */
	int		ra_index;	/* *map index for read-ahead */
	int		ra_offset;	/* map entry offset for ra */
	int		ra_want;	/* readahead count wanted */
	struct xfs_bmbt_irec map[];	/* map vector for blocks */
};

STATIC int
xfs_dir2_leaf_readbuf(
	struct xfs_inode	*dp,
	size_t			bufsize,
	struct xfs_dir2_leaf_map_info *mip,
	xfs_dir2_off_t		*curoff,
	struct xfs_buf		**bpp)
{
	struct xfs_mount	*mp = dp->i_mount;
	struct xfs_buf		*bp = *bpp;
	struct xfs_bmbt_irec	*map = mip->map;
	int			error = 0;
	int			length;
	int			i;
	int			j;

	/*
	 * If we have a buffer, we need to release it and
	 * take it out of the mapping.
	 */

	if (bp) {
		xfs_trans_brelse(NULL, bp);
		bp = NULL;
		mip->map_blocks -= mp->m_dirblkfsbs;
		/*
		 * Loop to get rid of the extents for the
		 * directory block.
		 */
		for (i = mp->m_dirblkfsbs; i > 0; ) {
			j = min_t(int, map->br_blockcount, i);
			map->br_blockcount -= j;
			map->br_startblock += j;
			map->br_startoff += j;
			/*
			 * If mapping is done, pitch it from
			 * the table.
			 */
			if (!map->br_blockcount && --mip->map_valid)
				memmove(&map[0], &map[1],
					sizeof(map[0]) * mip->map_valid);
			i -= j;
		}
	}

	/*
	 * Recalculate the readahead blocks wanted.
	 */
	mip->ra_want = howmany(bufsize + mp->m_dirblksize,
			       mp->m_sb.sb_blocksize) - 1;
	ASSERT(mip->ra_want >= 0);

	/*
	 * If we don't have as many as we want, and we haven't
	 * run out of data blocks, get some more mappings.
	 */
	if (1 + mip->ra_want > mip->map_blocks &&
	    mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
		/*
		 * Get more bmaps, fill in after the ones
		 * we already have in the table.
		 */
		mip->nmap = mip->map_size - mip->map_valid;
		error = xfs_bmapi_read(dp, mip->map_off,
				xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) -
								mip->map_off,
				&map[mip->map_valid], &mip->nmap, 0);

		/*
		 * Don't know if we should ignore this or try to return an
		 * error.  The trouble with returning errors is that readdir
		 * will just stop without actually passing the error through.
		 */
		if (error)
			goto out;	/* XXX */

		/*
		 * If we got all the mappings we asked for, set the final map
		 * offset based on the last bmap value received.  Otherwise,
		 * we've reached the end.
		 */
		if (mip->nmap == mip->map_size - mip->map_valid) {
			i = mip->map_valid + mip->nmap - 1;
			mip->map_off = map[i].br_startoff + map[i].br_blockcount;
		} else
			mip->map_off = xfs_dir2_byte_to_da(mp,
							XFS_DIR2_LEAF_OFFSET);

		/*
		 * Look for holes in the mapping, and eliminate them.  Count up
		 * the valid blocks.
		 */
		for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) {
			if (map[i].br_startblock == HOLESTARTBLOCK) {
				mip->nmap--;
				length = mip->map_valid + mip->nmap - i;
				if (length)
					memmove(&map[i], &map[i + 1],
						sizeof(map[i]) * length);
			} else {
				mip->map_blocks += map[i].br_blockcount;
				i++;
			}
		}
		mip->map_valid += mip->nmap;
	}

	/*
	 * No valid mappings, so no more data blocks.
	 */
	if (!mip->map_valid) {
		*curoff = xfs_dir2_da_to_byte(mp, mip->map_off);
		goto out;
	}

	/*
	 * Read the directory block starting at the first mapping.
	 */
	mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
977
	error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
978
			map->br_blockcount >= mp->m_dirblkfsbs ?
979
			    XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp);
980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005

	/*
	 * Should just skip over the data block instead of giving up.
	 */
	if (error)
		goto out;	/* XXX */

	/*
	 * Adjust the current amount of read-ahead: we just read a block that
	 * was previously ra.
	 */
	if (mip->ra_current)
		mip->ra_current -= mp->m_dirblkfsbs;

	/*
	 * Do we need more readahead?
	 */
	for (mip->ra_index = mip->ra_offset = i = 0;
	     mip->ra_want > mip->ra_current && i < mip->map_blocks;
	     i += mp->m_dirblkfsbs) {
		ASSERT(mip->ra_index < mip->map_valid);
		/*
		 * Read-ahead a contiguous directory block.
		 */
		if (i > mip->ra_current &&
		    map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
1006
			xfs_dir3_data_readahead(NULL, dp,
1007
				map[mip->ra_index].br_startoff + mip->ra_offset,
1008 1009
				XFS_FSB_TO_DADDR(mp,
					map[mip->ra_index].br_startblock +
1010
							mip->ra_offset));
1011 1012 1013 1014 1015 1016 1017 1018
			mip->ra_current = i;
		}

		/*
		 * Read-ahead a non-contiguous directory block.  This doesn't
		 * use our mapping, but this is a very rare case.
		 */
		else if (i > mip->ra_current) {
1019
			xfs_dir3_data_readahead(NULL, dp,
1020
					map[mip->ra_index].br_startoff +
1021
							mip->ra_offset, -1);
1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053
			mip->ra_current = i;
		}

		/*
		 * Advance offset through the mapping table.
		 */
		for (j = 0; j < mp->m_dirblkfsbs; j++) {
			/*
			 * The rest of this extent but not more than a dir
			 * block.
			 */
			length = min_t(int, mp->m_dirblkfsbs,
					map[mip->ra_index].br_blockcount -
							mip->ra_offset);
			j += length;
			mip->ra_offset += length;

			/*
			 * Advance to the next mapping if this one is used up.
			 */
			if (mip->ra_offset == map[mip->ra_index].br_blockcount) {
				mip->ra_offset = 0;
				mip->ra_index++;
			}
		}
	}

out:
	*bpp = bp;
	return error;
}

L
Linus Torvalds 已提交
1054 1055 1056 1057 1058 1059 1060
/*
 * Getdents (readdir) for leaf and node directories.
 * This reads the data blocks only, so is the same for both forms.
 */
int						/* error */
xfs_dir2_leaf_getdents(
	xfs_inode_t		*dp,		/* incore directory inode */
C
Christoph Hellwig 已提交
1061 1062 1063 1064
	void			*dirent,
	size_t			bufsize,
	xfs_off_t		*offset,
	filldir_t		filldir)
L
Linus Torvalds 已提交
1065
{
1066
	struct xfs_buf		*bp = NULL;	/* data block buffer */
1067
	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
L
Linus Torvalds 已提交
1068 1069
	xfs_dir2_data_entry_t	*dep;		/* data entry */
	xfs_dir2_data_unused_t	*dup;		/* unused entry */
1070
	int			error = 0;	/* error return value */
L
Linus Torvalds 已提交
1071 1072
	int			length;		/* temporary length value */
	xfs_mount_t		*mp;		/* filesystem mount point */
1073 1074
	int			byteoff;	/* offset in current block */
	xfs_dir2_off_t		curoff;		/* current overall offset */
L
Linus Torvalds 已提交
1075
	xfs_dir2_off_t		newoff;		/* new curoff after new blk */
1076
	char			*ptr = NULL;	/* pointer to current data */
1077
	struct xfs_dir2_leaf_map_info *map_info;
L
Linus Torvalds 已提交
1078 1079 1080

	/*
	 * If the offset is at or past the largest allowed value,
C
Christoph Hellwig 已提交
1081
	 * give up right away.
L
Linus Torvalds 已提交
1082
	 */
C
Christoph Hellwig 已提交
1083
	if (*offset >= XFS_DIR2_MAX_DATAPTR)
L
Linus Torvalds 已提交
1084
		return 0;
C
Christoph Hellwig 已提交
1085

L
Linus Torvalds 已提交
1086
	mp = dp->i_mount;
C
Christoph Hellwig 已提交
1087

L
Linus Torvalds 已提交
1088 1089 1090 1091 1092
	/*
	 * Set up to bmap a number of blocks based on the caller's
	 * buffer size, the directory block size, and the filesystem
	 * block size.
	 */
1093 1094 1095 1096 1097 1098
	length = howmany(bufsize + mp->m_dirblksize,
				     mp->m_sb.sb_blocksize);
	map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
				(length * sizeof(struct xfs_bmbt_irec)),
			       KM_SLEEP);
	map_info->map_size = length;
C
Christoph Hellwig 已提交
1099

L
Linus Torvalds 已提交
1100 1101 1102 1103
	/*
	 * Inside the loop we keep the main offset value as a byte offset
	 * in the directory file.
	 */
C
Christoph Hellwig 已提交
1104 1105
	curoff = xfs_dir2_dataptr_to_byte(mp, *offset);

L
Linus Torvalds 已提交
1106 1107 1108 1109
	/*
	 * Force this conversion through db so we truncate the offset
	 * down to get the start of the data block.
	 */
1110 1111 1112
	map_info->map_off = xfs_dir2_db_to_da(mp,
					      xfs_dir2_byte_to_db(mp, curoff));

L
Linus Torvalds 已提交
1113 1114 1115 1116 1117 1118 1119 1120 1121
	/*
	 * Loop over directory entries until we reach the end offset.
	 * Get more blocks and readahead as necessary.
	 */
	while (curoff < XFS_DIR2_LEAF_OFFSET) {
		/*
		 * If we have no buffer, or we're off the end of the
		 * current buffer, need to get another one.
		 */
1122
		if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) {
C
Christoph Hellwig 已提交
1123

1124 1125 1126
			error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info,
						      &curoff, &bp);
			if (error || !map_info->map_valid)
L
Linus Torvalds 已提交
1127
				break;
1128

L
Linus Torvalds 已提交
1129 1130 1131
			/*
			 * Having done a read, we need to set a new offset.
			 */
1132
			newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0);
L
Linus Torvalds 已提交
1133 1134 1135 1136 1137 1138 1139 1140 1141
			/*
			 * Start of the current block.
			 */
			if (curoff < newoff)
				curoff = newoff;
			/*
			 * Make sure we're in the right block.
			 */
			else if (curoff > newoff)
1142
				ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
1143
				       map_info->curdb);
1144
			hdr = bp->b_addr;
1145
			xfs_dir3_data_check(dp, bp);
L
Linus Torvalds 已提交
1146 1147 1148
			/*
			 * Find our position in the block.
			 */
1149
			ptr = (char *)xfs_dir3_data_entry_p(hdr);
1150
			byteoff = xfs_dir2_byte_to_off(mp, curoff);
L
Linus Torvalds 已提交
1151 1152 1153 1154
			/*
			 * Skip past the header.
			 */
			if (byteoff == 0)
1155
				curoff += xfs_dir3_data_entry_offset(hdr);
L
Linus Torvalds 已提交
1156 1157 1158 1159
			/*
			 * Skip past entries until we reach our offset.
			 */
			else {
1160
				while ((char *)ptr - (char *)hdr < byteoff) {
L
Linus Torvalds 已提交
1161 1162
					dup = (xfs_dir2_data_unused_t *)ptr;

1163
					if (be16_to_cpu(dup->freetag)
L
Linus Torvalds 已提交
1164 1165
						  == XFS_DIR2_DATA_FREE_TAG) {

1166
						length = be16_to_cpu(dup->length);
L
Linus Torvalds 已提交
1167 1168 1169 1170 1171
						ptr += length;
						continue;
					}
					dep = (xfs_dir2_data_entry_t *)ptr;
					length =
1172
					   xfs_dir2_data_entsize(dep->namelen);
L
Linus Torvalds 已提交
1173 1174 1175 1176 1177 1178
					ptr += length;
				}
				/*
				 * Now set our real offset.
				 */
				curoff =
1179 1180
					xfs_dir2_db_off_to_byte(mp,
					    xfs_dir2_byte_to_db(mp, curoff),
1181 1182
					    (char *)ptr - (char *)hdr);
				if (ptr >= (char *)hdr + mp->m_dirblksize) {
L
Linus Torvalds 已提交
1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194
					continue;
				}
			}
		}
		/*
		 * We have a pointer to an entry.
		 * Is it a live one?
		 */
		dup = (xfs_dir2_data_unused_t *)ptr;
		/*
		 * No, it's unused, skip over it.
		 */
1195 1196
		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
			length = be16_to_cpu(dup->length);
L
Linus Torvalds 已提交
1197 1198 1199 1200 1201 1202
			ptr += length;
			curoff += length;
			continue;
		}

		dep = (xfs_dir2_data_entry_t *)ptr;
C
Christoph Hellwig 已提交
1203
		length = xfs_dir2_data_entsize(dep->namelen);
L
Linus Torvalds 已提交
1204

1205
		if (filldir(dirent, (char *)dep->name, dep->namelen,
1206
			    xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff,
C
Christoph Hellwig 已提交
1207
			    be64_to_cpu(dep->inumber), DT_UNKNOWN))
L
Linus Torvalds 已提交
1208
			break;
C
Christoph Hellwig 已提交
1209

L
Linus Torvalds 已提交
1210 1211 1212 1213 1214
		/*
		 * Advance to next entry in the block.
		 */
		ptr += length;
		curoff += length;
1215 1216
		/* bufsize may have just been a guess; don't go negative */
		bufsize = bufsize > length ? bufsize - length : 0;
L
Linus Torvalds 已提交
1217 1218 1219 1220 1221
	}

	/*
	 * All done.  Set output offset value to current offset.
	 */
1222
	if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
1223
		*offset = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
L
Linus Torvalds 已提交
1224
	else
1225
		*offset = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
1226
	kmem_free(map_info);
L
Linus Torvalds 已提交
1227
	if (bp)
1228
		xfs_trans_brelse(NULL, bp);
L
Linus Torvalds 已提交
1229 1230 1231 1232 1233 1234 1235 1236 1237 1238
	return error;
}

/*
 * Initialize a new leaf block, leaf1 or leafn magic accepted.
 */
int
xfs_dir2_leaf_init(
	xfs_da_args_t		*args,		/* operation arguments */
	xfs_dir2_db_t		bno,		/* directory block number */
1239
	struct xfs_buf		**bpp,		/* out: leaf buffer */
L
Linus Torvalds 已提交
1240 1241
	int			magic)		/* magic number for block */
{
1242
	struct xfs_buf		*bp;		/* leaf buffer */
L
Linus Torvalds 已提交
1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258
	xfs_inode_t		*dp;		/* incore directory inode */
	int			error;		/* error return code */
	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
	xfs_mount_t		*mp;		/* filesystem mount point */
	xfs_trans_t		*tp;		/* transaction pointer */

	dp = args->dp;
	ASSERT(dp != NULL);
	tp = args->trans;
	mp = dp->i_mount;
	ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) &&
	       bno < XFS_DIR2_FREE_FIRSTDB(mp));
	/*
	 * Get the buffer for the block.
	 */
1259
	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
1260 1261
			       XFS_DATA_FORK);
	if (error)
L
Linus Torvalds 已提交
1262
		return error;
1263

L
Linus Torvalds 已提交
1264 1265 1266
	/*
	 * Initialize the header.
	 */
1267
	leaf = bp->b_addr;
1268
	leaf->hdr.info.magic = cpu_to_be16(magic);
L
Linus Torvalds 已提交
1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279
	leaf->hdr.info.forw = 0;
	leaf->hdr.info.back = 0;
	leaf->hdr.count = 0;
	leaf->hdr.stale = 0;
	xfs_dir2_leaf_log_header(tp, bp);
	/*
	 * If it's a leaf-format directory initialize the tail.
	 * In this case our caller has the real bests table to copy into
	 * the block.
	 */
	if (magic == XFS_DIR2_LEAF1_MAGIC) {
1280
		bp->b_ops = &xfs_dir2_leaf1_buf_ops;
1281
		ltp = xfs_dir2_leaf_tail_p(mp, leaf);
L
Linus Torvalds 已提交
1282 1283
		ltp->bestcount = 0;
		xfs_dir2_leaf_log_tail(tp, bp);
1284
	} else
1285
		bp->b_ops = &xfs_dir2_leafn_buf_ops;
L
Linus Torvalds 已提交
1286 1287 1288 1289 1290 1291 1292
	*bpp = bp;
	return 0;
}

/*
 * Log the bests entries indicated from a leaf1 block.
 */
1293
static void
L
Linus Torvalds 已提交
1294 1295
xfs_dir2_leaf_log_bests(
	xfs_trans_t		*tp,		/* transaction pointer */
1296
	struct xfs_buf		*bp,		/* leaf buffer */
L
Linus Torvalds 已提交
1297 1298 1299
	int			first,		/* first entry to log */
	int			last)		/* last entry to log */
{
1300 1301
	__be16			*firstb;	/* pointer to first entry */
	__be16			*lastb;		/* pointer to last entry */
L
Linus Torvalds 已提交
1302 1303 1304
	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */

1305
	leaf = bp->b_addr;
1306
	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
1307 1308 1309
	ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
	firstb = xfs_dir2_leaf_bests_p(ltp) + first;
	lastb = xfs_dir2_leaf_bests_p(ltp) + last;
1310
	xfs_trans_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
L
Linus Torvalds 已提交
1311 1312 1313 1314 1315 1316 1317 1318 1319
		(uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
}

/*
 * Log the leaf entries indicated from a leaf1 or leafn block.
 */
void
xfs_dir2_leaf_log_ents(
	xfs_trans_t		*tp,		/* transaction pointer */
1320
	struct xfs_buf		*bp,		/* leaf buffer */
L
Linus Torvalds 已提交
1321 1322 1323 1324 1325 1326 1327
	int			first,		/* first entry to log */
	int			last)		/* last entry to log */
{
	xfs_dir2_leaf_entry_t	*firstlep;	/* pointer to first entry */
	xfs_dir2_leaf_entry_t	*lastlep;	/* pointer to last entry */
	xfs_dir2_leaf_t		*leaf;		/* leaf structure */

1328
	leaf = bp->b_addr;
1329 1330
	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
L
Linus Torvalds 已提交
1331 1332
	firstlep = &leaf->ents[first];
	lastlep = &leaf->ents[last];
1333
	xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
L
Linus Torvalds 已提交
1334 1335 1336 1337 1338 1339 1340 1341
		(uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
}

/*
 * Log the header of the leaf1 or leafn block.
 */
void
xfs_dir2_leaf_log_header(
1342 1343
	struct xfs_trans	*tp,
	struct xfs_buf		*bp)
L
Linus Torvalds 已提交
1344 1345 1346
{
	xfs_dir2_leaf_t		*leaf;		/* leaf structure */

1347
	leaf = bp->b_addr;
1348 1349
	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1350
	xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
L
Linus Torvalds 已提交
1351 1352 1353 1354 1355 1356
		(uint)(sizeof(leaf->hdr) - 1));
}

/*
 * Log the tail of the leaf1 block.
 */
1357
STATIC void
L
Linus Torvalds 已提交
1358
xfs_dir2_leaf_log_tail(
1359 1360
	struct xfs_trans	*tp,
	struct xfs_buf		*bp)
L
Linus Torvalds 已提交
1361 1362 1363 1364 1365 1366
{
	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
	xfs_mount_t		*mp;		/* filesystem mount point */

	mp = tp->t_mountp;
1367
	leaf = bp->b_addr;
1368
	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
1369
	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1370
	xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
L
Linus Torvalds 已提交
1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382
		(uint)(mp->m_dirblksize - 1));
}

/*
 * Look up the entry referred to by args in the leaf format directory.
 * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which
 * is also used by the node-format code.
 */
int
xfs_dir2_leaf_lookup(
	xfs_da_args_t		*args)		/* operation arguments */
{
1383
	struct xfs_buf		*dbp;		/* data block buffer */
L
Linus Torvalds 已提交
1384 1385 1386 1387
	xfs_dir2_data_entry_t	*dep;		/* data block entry */
	xfs_inode_t		*dp;		/* incore directory inode */
	int			error;		/* error return code */
	int			index;		/* found entry index */
1388
	struct xfs_buf		*lbp;		/* leaf buffer */
L
Linus Torvalds 已提交
1389 1390 1391 1392
	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
	xfs_trans_t		*tp;		/* transaction pointer */

C
Christoph Hellwig 已提交
1393 1394
	trace_xfs_dir2_leaf_lookup(args);

L
Linus Torvalds 已提交
1395 1396 1397 1398 1399 1400 1401 1402 1403
	/*
	 * Look up name in the leaf block, returning both buffers and index.
	 */
	if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
		return error;
	}
	tp = args->trans;
	dp = args->dp;
	xfs_dir2_leaf_check(dp, lbp);
1404
	leaf = lbp->b_addr;
L
Linus Torvalds 已提交
1405 1406 1407 1408 1409 1410 1411 1412
	/*
	 * Get to the leaf entry and contained data entry address.
	 */
	lep = &leaf->ents[index];
	/*
	 * Point to the data entry.
	 */
	dep = (xfs_dir2_data_entry_t *)
1413
	      ((char *)dbp->b_addr +
1414
	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
L
Linus Torvalds 已提交
1415
	/*
1416
	 * Return the found inode number & CI name if appropriate
L
Linus Torvalds 已提交
1417
	 */
1418
	args->inumber = be64_to_cpu(dep->inumber);
1419
	error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
1420 1421
	xfs_trans_brelse(tp, dbp);
	xfs_trans_brelse(tp, lbp);
1422
	return XFS_ERROR(error);
L
Linus Torvalds 已提交
1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433
}

/*
 * Look up name/hash in the leaf block.
 * Fill in indexp with the found index, and dbpp with the data buffer.
 * If not found dbpp will be NULL, and ENOENT comes back.
 * lbpp will always be filled in with the leaf buffer unless there's an error.
 */
static int					/* error */
xfs_dir2_leaf_lookup_int(
	xfs_da_args_t		*args,		/* operation arguments */
1434
	struct xfs_buf		**lbpp,		/* out: leaf buffer */
L
Linus Torvalds 已提交
1435
	int			*indexp,	/* out: index in leaf block */
1436
	struct xfs_buf		**dbpp)		/* out: data buffer */
L
Linus Torvalds 已提交
1437
{
1438
	xfs_dir2_db_t		curdb = -1;	/* current data block number */
1439
	struct xfs_buf		*dbp = NULL;	/* data buffer */
L
Linus Torvalds 已提交
1440 1441 1442 1443
	xfs_dir2_data_entry_t	*dep;		/* data entry */
	xfs_inode_t		*dp;		/* incore directory inode */
	int			error;		/* error return code */
	int			index;		/* index in leaf block */
1444
	struct xfs_buf		*lbp;		/* leaf buffer */
L
Linus Torvalds 已提交
1445 1446 1447 1448 1449
	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
	xfs_mount_t		*mp;		/* filesystem mount point */
	xfs_dir2_db_t		newdb;		/* new data block number */
	xfs_trans_t		*tp;		/* transaction pointer */
1450
	xfs_dir2_db_t		cidb = -1;	/* case match data block no. */
1451
	enum xfs_dacmp		cmp;		/* name compare result */
L
Linus Torvalds 已提交
1452 1453 1454 1455

	dp = args->dp;
	tp = args->trans;
	mp = dp->i_mount;
D
Dave Chinner 已提交
1456 1457

	error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
1458
	if (error)
L
Linus Torvalds 已提交
1459
		return error;
D
Dave Chinner 已提交
1460

L
Linus Torvalds 已提交
1461
	*lbpp = lbp;
1462
	leaf = lbp->b_addr;
L
Linus Torvalds 已提交
1463 1464 1465 1466 1467 1468 1469 1470 1471
	xfs_dir2_leaf_check(dp, lbp);
	/*
	 * Look for the first leaf entry with our hash value.
	 */
	index = xfs_dir2_leaf_search_hash(args, lbp);
	/*
	 * Loop over all the entries with the right hash value
	 * looking to match the name.
	 */
1472
	for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
1473 1474
				be32_to_cpu(lep->hashval) == args->hashval;
				lep++, index++) {
L
Linus Torvalds 已提交
1475 1476 1477
		/*
		 * Skip over stale leaf entries.
		 */
1478
		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
L
Linus Torvalds 已提交
1479 1480 1481 1482
			continue;
		/*
		 * Get the new data block number.
		 */
1483
		newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
L
Linus Torvalds 已提交
1484 1485 1486 1487 1488
		/*
		 * If it's not the same as the old data block number,
		 * need to pitch the old one and read the new one.
		 */
		if (newdb != curdb) {
1489
			if (dbp)
1490
				xfs_trans_brelse(tp, dbp);
1491
			error = xfs_dir3_data_read(tp, dp,
1492 1493
						   xfs_dir2_db_to_da(mp, newdb),
						   -1, &dbp);
1494
			if (error) {
1495
				xfs_trans_brelse(tp, lbp);
L
Linus Torvalds 已提交
1496 1497 1498 1499 1500 1501 1502
				return error;
			}
			curdb = newdb;
		}
		/*
		 * Point to the data entry.
		 */
1503
		dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr +
1504
			xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
L
Linus Torvalds 已提交
1505
		/*
1506 1507 1508
		 * Compare name and if it's an exact match, return the index
		 * and buffer. If it's the first case-insensitive match, store
		 * the index and buffer and continue looking for an exact match.
L
Linus Torvalds 已提交
1509
		 */
1510 1511 1512
		cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
		if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
			args->cmpresult = cmp;
L
Linus Torvalds 已提交
1513
			*indexp = index;
1514
			/* case exact match: return the current buffer. */
1515 1516 1517 1518
			if (cmp == XFS_CMP_EXACT) {
				*dbpp = dbp;
				return 0;
			}
1519
			cidb = curdb;
L
Linus Torvalds 已提交
1520 1521
		}
	}
1522
	ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1523
	/*
1524 1525 1526
	 * Here, we can only be doing a lookup (not a rename or remove).
	 * If a case-insensitive match was found earlier, re-read the
	 * appropriate data block if required and return it.
1527 1528
	 */
	if (args->cmpresult == XFS_CMP_CASE) {
1529 1530
		ASSERT(cidb != -1);
		if (cidb != curdb) {
1531
			xfs_trans_brelse(tp, dbp);
1532
			error = xfs_dir3_data_read(tp, dp,
1533 1534
						   xfs_dir2_db_to_da(mp, cidb),
						   -1, &dbp);
1535
			if (error) {
1536
				xfs_trans_brelse(tp, lbp);
1537 1538 1539 1540
				return error;
			}
		}
		*dbpp = dbp;
1541 1542
		return 0;
	}
L
Linus Torvalds 已提交
1543 1544 1545
	/*
	 * No match found, return ENOENT.
	 */
1546
	ASSERT(cidb == -1);
L
Linus Torvalds 已提交
1547
	if (dbp)
1548 1549
		xfs_trans_brelse(tp, dbp);
	xfs_trans_brelse(tp, lbp);
L
Linus Torvalds 已提交
1550 1551 1552 1553 1554 1555 1556 1557 1558 1559
	return XFS_ERROR(ENOENT);
}

/*
 * Remove an entry from a leaf format directory.
 */
int						/* error */
xfs_dir2_leaf_removename(
	xfs_da_args_t		*args)		/* operation arguments */
{
1560
	__be16			*bestsp;	/* leaf block best freespace */
1561
	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
L
Linus Torvalds 已提交
1562
	xfs_dir2_db_t		db;		/* data block number */
1563
	struct xfs_buf		*dbp;		/* data block buffer */
L
Linus Torvalds 已提交
1564 1565 1566 1567 1568
	xfs_dir2_data_entry_t	*dep;		/* data entry structure */
	xfs_inode_t		*dp;		/* incore directory inode */
	int			error;		/* error return code */
	xfs_dir2_db_t		i;		/* temporary data block # */
	int			index;		/* index into leaf entries */
1569
	struct xfs_buf		*lbp;		/* leaf buffer */
L
Linus Torvalds 已提交
1570 1571 1572 1573 1574 1575 1576 1577
	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
	xfs_mount_t		*mp;		/* filesystem mount point */
	int			needlog;	/* need to log data header */
	int			needscan;	/* need to rescan data frees */
	xfs_dir2_data_off_t	oldbest;	/* old value of best free */
	xfs_trans_t		*tp;		/* transaction pointer */
1578
	struct xfs_dir2_data_free *bf;		/* bestfree table */
L
Linus Torvalds 已提交
1579

C
Christoph Hellwig 已提交
1580 1581
	trace_xfs_dir2_leaf_removename(args);

L
Linus Torvalds 已提交
1582 1583 1584 1585 1586 1587 1588 1589 1590
	/*
	 * Lookup the leaf entry, get the leaf and data blocks read in.
	 */
	if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
		return error;
	}
	dp = args->dp;
	tp = args->trans;
	mp = dp->i_mount;
1591 1592
	leaf = lbp->b_addr;
	hdr = dbp->b_addr;
1593 1594
	bf = xfs_dir3_data_bestfree_p(hdr);
	xfs_dir3_data_check(dp, dbp);
L
Linus Torvalds 已提交
1595 1596 1597 1598
	/*
	 * Point to the leaf entry, use that to point to the data entry.
	 */
	lep = &leaf->ents[index];
1599
	db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
L
Linus Torvalds 已提交
1600
	dep = (xfs_dir2_data_entry_t *)
1601
	      ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
L
Linus Torvalds 已提交
1602
	needscan = needlog = 0;
1603
	oldbest = be16_to_cpu(bf[0].length);
1604 1605
	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
	bestsp = xfs_dir2_leaf_bests_p(ltp);
1606
	ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
L
Linus Torvalds 已提交
1607 1608 1609 1610
	/*
	 * Mark the former data entry unused.
	 */
	xfs_dir2_data_make_free(tp, dbp,
1611
		(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
1612
		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
L
Linus Torvalds 已提交
1613 1614 1615
	/*
	 * We just mark the leaf entry stale by putting a null in it.
	 */
1616
	be16_add_cpu(&leaf->hdr.stale, 1);
L
Linus Torvalds 已提交
1617
	xfs_dir2_leaf_log_header(tp, lbp);
1618
	lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
L
Linus Torvalds 已提交
1619 1620 1621 1622 1623 1624
	xfs_dir2_leaf_log_ents(tp, lbp, index, index);
	/*
	 * Scan the freespace in the data block again if necessary,
	 * log the data block header if necessary.
	 */
	if (needscan)
1625
		xfs_dir2_data_freescan(mp, hdr, &needlog);
L
Linus Torvalds 已提交
1626 1627 1628 1629 1630 1631
	if (needlog)
		xfs_dir2_data_log_header(tp, dbp);
	/*
	 * If the longest freespace in the data block has changed,
	 * put the new value in the bests table and log that.
	 */
1632 1633
	if (be16_to_cpu(bf[0].length) != oldbest) {
		bestsp[db] = bf[0].length;
L
Linus Torvalds 已提交
1634 1635
		xfs_dir2_leaf_log_bests(tp, lbp, db, db);
	}
1636
	xfs_dir3_data_check(dp, dbp);
L
Linus Torvalds 已提交
1637 1638 1639
	/*
	 * If the data block is now empty then get rid of the data block.
	 */
1640 1641
	if (be16_to_cpu(bf[0].length) ==
			mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr)) {
L
Linus Torvalds 已提交
1642 1643 1644 1645 1646 1647 1648 1649
		ASSERT(db != mp->m_dirdatablk);
		if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
			/*
			 * Nope, can't get rid of it because it caused
			 * allocation of a bmap btree block to do so.
			 * Just go on, returning success, leaving the
			 * empty block in place.
			 */
1650
			if (error == ENOSPC && args->total == 0)
L
Linus Torvalds 已提交
1651 1652 1653 1654 1655 1656 1657 1658 1659
				error = 0;
			xfs_dir2_leaf_check(dp, lbp);
			return error;
		}
		dbp = NULL;
		/*
		 * If this is the last data block then compact the
		 * bests table by getting rid of entries.
		 */
1660
		if (db == be32_to_cpu(ltp->bestcount) - 1) {
L
Linus Torvalds 已提交
1661 1662 1663 1664
			/*
			 * Look for the last active entry (i).
			 */
			for (i = db - 1; i > 0; i--) {
1665
				if (bestsp[i] != cpu_to_be16(NULLDATAOFF))
L
Linus Torvalds 已提交
1666 1667 1668 1669 1670 1671 1672
					break;
			}
			/*
			 * Copy the table down so inactive entries at the
			 * end are removed.
			 */
			memmove(&bestsp[db - i], bestsp,
1673
				(be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
1674
			be32_add_cpu(&ltp->bestcount, -(db - i));
L
Linus Torvalds 已提交
1675
			xfs_dir2_leaf_log_tail(tp, lbp);
1676
			xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
L
Linus Torvalds 已提交
1677
		} else
1678
			bestsp[db] = cpu_to_be16(NULLDATAOFF);
L
Linus Torvalds 已提交
1679 1680 1681 1682
	}
	/*
	 * If the data block was not the first one, drop it.
	 */
1683
	else if (db != mp->m_dirdatablk)
L
Linus Torvalds 已提交
1684
		dbp = NULL;
1685

L
Linus Torvalds 已提交
1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699
	xfs_dir2_leaf_check(dp, lbp);
	/*
	 * See if we can convert to block form.
	 */
	return xfs_dir2_leaf_to_block(args, lbp, dbp);
}

/*
 * Replace the inode number in a leaf format directory entry.
 */
int						/* error */
xfs_dir2_leaf_replace(
	xfs_da_args_t		*args)		/* operation arguments */
{
1700
	struct xfs_buf		*dbp;		/* data block buffer */
L
Linus Torvalds 已提交
1701 1702 1703 1704
	xfs_dir2_data_entry_t	*dep;		/* data block entry */
	xfs_inode_t		*dp;		/* incore directory inode */
	int			error;		/* error return code */
	int			index;		/* index of leaf entry */
1705
	struct xfs_buf		*lbp;		/* leaf buffer */
L
Linus Torvalds 已提交
1706 1707 1708 1709
	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
	xfs_trans_t		*tp;		/* transaction pointer */

C
Christoph Hellwig 已提交
1710 1711
	trace_xfs_dir2_leaf_replace(args);

L
Linus Torvalds 已提交
1712 1713 1714 1715 1716 1717 1718
	/*
	 * Look up the entry.
	 */
	if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
		return error;
	}
	dp = args->dp;
1719
	leaf = lbp->b_addr;
L
Linus Torvalds 已提交
1720 1721 1722 1723 1724 1725 1726 1727
	/*
	 * Point to the leaf entry, get data address from it.
	 */
	lep = &leaf->ents[index];
	/*
	 * Point to the data entry.
	 */
	dep = (xfs_dir2_data_entry_t *)
1728
	      ((char *)dbp->b_addr +
1729
	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1730
	ASSERT(args->inumber != be64_to_cpu(dep->inumber));
L
Linus Torvalds 已提交
1731 1732 1733
	/*
	 * Put the new inode number in, log it.
	 */
1734
	dep->inumber = cpu_to_be64(args->inumber);
L
Linus Torvalds 已提交
1735 1736 1737
	tp = args->trans;
	xfs_dir2_data_log_entry(tp, dbp, dep);
	xfs_dir2_leaf_check(dp, lbp);
1738
	xfs_trans_brelse(tp, lbp);
L
Linus Torvalds 已提交
1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749
	return 0;
}

/*
 * Return index in the leaf block (lbp) which is either the first
 * one with this hash value, or if there are none, the insert point
 * for that hash value.
 */
int						/* index value */
xfs_dir2_leaf_search_hash(
	xfs_da_args_t		*args,		/* operation arguments */
1750
	struct xfs_buf		*lbp)		/* leaf buffer */
L
Linus Torvalds 已提交
1751 1752 1753 1754 1755 1756 1757 1758 1759
{
	xfs_dahash_t		hash=0;		/* hash from this entry */
	xfs_dahash_t		hashwant;	/* hash value looking for */
	int			high;		/* high leaf index */
	int			low;		/* low leaf index */
	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
	int			mid=0;		/* current leaf index */

1760
	leaf = lbp->b_addr;
L
Linus Torvalds 已提交
1761 1762 1763 1764 1765 1766 1767 1768
#ifndef __KERNEL__
	if (!leaf->hdr.count)
		return 0;
#endif
	/*
	 * Note, the table cannot be empty, so we have to go through the loop.
	 * Binary search the leaf entries looking for our hash value.
	 */
1769
	for (lep = leaf->ents, low = 0, high = be16_to_cpu(leaf->hdr.count) - 1,
L
Linus Torvalds 已提交
1770 1771 1772
		hashwant = args->hashval;
	     low <= high; ) {
		mid = (low + high) >> 1;
1773
		if ((hash = be32_to_cpu(lep[mid].hashval)) == hashwant)
L
Linus Torvalds 已提交
1774 1775 1776 1777 1778 1779 1780 1781 1782 1783
			break;
		if (hash < hashwant)
			low = mid + 1;
		else
			high = mid - 1;
	}
	/*
	 * Found one, back up through all the equal hash values.
	 */
	if (hash == hashwant) {
1784
		while (mid > 0 && be32_to_cpu(lep[mid - 1].hashval) == hashwant) {
L
Linus Torvalds 已提交
1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802
			mid--;
		}
	}
	/*
	 * Need to point to an entry higher than ours.
	 */
	else if (hash < hashwant)
		mid++;
	return mid;
}

/*
 * Trim off a trailing data block.  We know it's empty since the leaf
 * freespace table says so.
 */
int						/* error */
xfs_dir2_leaf_trim_data(
	xfs_da_args_t		*args,		/* operation arguments */
1803
	struct xfs_buf		*lbp,		/* leaf buffer */
L
Linus Torvalds 已提交
1804 1805
	xfs_dir2_db_t		db)		/* data block number */
{
1806
	__be16			*bestsp;	/* leaf bests table */
1807
	struct xfs_buf		*dbp;		/* data block buffer */
L
Linus Torvalds 已提交
1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820
	xfs_inode_t		*dp;		/* incore directory inode */
	int			error;		/* error return value */
	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
	xfs_mount_t		*mp;		/* filesystem mount point */
	xfs_trans_t		*tp;		/* transaction pointer */

	dp = args->dp;
	mp = dp->i_mount;
	tp = args->trans;
	/*
	 * Read the offending data block.  We need its buffer.
	 */
1821
	error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp);
1822
	if (error)
L
Linus Torvalds 已提交
1823 1824
		return error;

1825
	leaf = lbp->b_addr;
1826
	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1827 1828 1829

#ifdef DEBUG
{
1830
	struct xfs_dir2_data_hdr *hdr = dbp->b_addr;
1831
	struct xfs_dir2_data_free *bf = xfs_dir3_data_bestfree_p(hdr);
1832

1833 1834 1835 1836
	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
	ASSERT(be16_to_cpu(bf[0].length) ==
	       mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr));
1837
	ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
1838 1839 1840
}
#endif

L
Linus Torvalds 已提交
1841 1842 1843 1844 1845
	/*
	 * Get rid of the data block.
	 */
	if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
		ASSERT(error != ENOSPC);
1846
		xfs_trans_brelse(tp, dbp);
L
Linus Torvalds 已提交
1847 1848 1849 1850 1851
		return error;
	}
	/*
	 * Eliminate the last bests entry from the table.
	 */
1852
	bestsp = xfs_dir2_leaf_bests_p(ltp);
1853
	be32_add_cpu(&ltp->bestcount, -1);
1854
	memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
L
Linus Torvalds 已提交
1855
	xfs_dir2_leaf_log_tail(tp, lbp);
1856
	xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
L
Linus Torvalds 已提交
1857 1858 1859
	return 0;
}

1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873
static inline size_t
xfs_dir2_leaf_size(
	struct xfs_dir2_leaf_hdr	*hdr,
	int				counts)
{
	int			entries;

	entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale);
	return sizeof(xfs_dir2_leaf_hdr_t) +
	    entries * sizeof(xfs_dir2_leaf_entry_t) +
	    counts * sizeof(xfs_dir2_data_off_t) +
	    sizeof(xfs_dir2_leaf_tail_t);
}

L
Linus Torvalds 已提交
1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885
/*
 * Convert node form directory to leaf form directory.
 * The root of the node form dir needs to already be a LEAFN block.
 * Just return if we can't do anything.
 */
int						/* error */
xfs_dir2_node_to_leaf(
	xfs_da_state_t		*state)		/* directory operation state */
{
	xfs_da_args_t		*args;		/* operation arguments */
	xfs_inode_t		*dp;		/* incore directory inode */
	int			error;		/* error return code */
1886
	struct xfs_buf		*fbp;		/* buffer for freespace block */
L
Linus Torvalds 已提交
1887 1888
	xfs_fileoff_t		fo;		/* freespace file offset */
	xfs_dir2_free_t		*free;		/* freespace structure */
1889
	struct xfs_buf		*lbp;		/* buffer for leaf block */
L
Linus Torvalds 已提交
1890 1891 1892 1893 1894
	xfs_dir2_leaf_tail_t	*ltp;		/* tail of leaf structure */
	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
	xfs_mount_t		*mp;		/* filesystem mount point */
	int			rval;		/* successful free trim? */
	xfs_trans_t		*tp;		/* transaction pointer */
1895
	struct xfs_dir3_icfree_hdr freehdr;
L
Linus Torvalds 已提交
1896 1897 1898 1899 1900 1901 1902 1903

	/*
	 * There's more than a leaf level in the btree, so there must
	 * be multiple leafn blocks.  Give up.
	 */
	if (state->path.active > 1)
		return 0;
	args = state->args;
C
Christoph Hellwig 已提交
1904 1905 1906

	trace_xfs_dir2_node_to_leaf(args);

L
Linus Torvalds 已提交
1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943
	mp = state->mp;
	dp = args->dp;
	tp = args->trans;
	/*
	 * Get the last offset in the file.
	 */
	if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) {
		return error;
	}
	fo -= mp->m_dirblkfsbs;
	/*
	 * If there are freespace blocks other than the first one,
	 * take this opportunity to remove trailing empty freespace blocks
	 * that may have been left behind during no-space-reservation
	 * operations.
	 */
	while (fo > mp->m_dirfreeblk) {
		if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) {
			return error;
		}
		if (rval)
			fo -= mp->m_dirblkfsbs;
		else
			return 0;
	}
	/*
	 * Now find the block just before the freespace block.
	 */
	if ((error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK))) {
		return error;
	}
	/*
	 * If it's not the single leaf block, give up.
	 */
	if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize)
		return 0;
	lbp = state->path.blk[0].bp;
1944
	leaf = lbp->b_addr;
1945
	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
L
Linus Torvalds 已提交
1946 1947 1948
	/*
	 * Read the freespace block.
	 */
D
Dave Chinner 已提交
1949
	error = xfs_dir2_free_read(tp, dp,  mp->m_dirfreeblk, &fbp);
1950
	if (error)
L
Linus Torvalds 已提交
1951
		return error;
1952
	free = fbp->b_addr;
1953 1954 1955
	xfs_dir3_free_hdr_from_disk(&freehdr, free);

	ASSERT(!freehdr.firstdb);
1956

L
Linus Torvalds 已提交
1957 1958 1959 1960
	/*
	 * Now see if the leafn and free data will fit in a leaf1.
	 * If not, release the buffer and give up.
	 */
1961
	if (xfs_dir2_leaf_size(&leaf->hdr, freehdr.nvalid) > mp->m_dirblksize) {
1962
		xfs_trans_brelse(tp, fbp);
L
Linus Torvalds 已提交
1963 1964
		return 0;
	}
1965

L
Linus Torvalds 已提交
1966 1967 1968 1969
	/*
	 * If the leaf has any stale entries in it, compress them out.
	 * The compact routine will log the header.
	 */
1970
	if (be16_to_cpu(leaf->hdr.stale))
L
Linus Torvalds 已提交
1971 1972 1973
		xfs_dir2_leaf_compact(args, lbp);
	else
		xfs_dir2_leaf_log_header(tp, lbp);
1974

1975
	lbp->b_ops = &xfs_dir2_leaf1_buf_ops;
1976
	leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAF1_MAGIC);
1977

L
Linus Torvalds 已提交
1978 1979 1980
	/*
	 * Set up the leaf tail from the freespace block.
	 */
1981
	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1982
	ltp->bestcount = cpu_to_be32(freehdr.nvalid);
L
Linus Torvalds 已提交
1983 1984 1985
	/*
	 * Set up the leaf bests table.
	 */
1986 1987
	memcpy(xfs_dir2_leaf_bests_p(ltp), xfs_dir3_free_bests_p(mp, free),
		freehdr.nvalid * sizeof(xfs_dir2_data_off_t));
1988
	xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
L
Linus Torvalds 已提交
1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
	xfs_dir2_leaf_log_tail(tp, lbp);
	xfs_dir2_leaf_check(dp, lbp);
	/*
	 * Get rid of the freespace block.
	 */
	error = xfs_dir2_shrink_inode(args, XFS_DIR2_FREE_FIRSTDB(mp), fbp);
	if (error) {
		/*
		 * This can't fail here because it can only happen when
		 * punching out the middle of an extent, and this is an
		 * isolated block.
		 */
		ASSERT(error != ENOSPC);
		return error;
	}
	fbp = NULL;
	/*
	 * Now see if we can convert the single-leaf directory
	 * down to a block form directory.
	 * This routine always kills the dabuf for the leaf, so
	 * eliminate it from the path.
	 */
	error = xfs_dir2_leaf_to_block(args, lbp, NULL);
	state->path.blk[0].bp = NULL;
	return error;
}