inode.c 12.2 KB
Newer Older
M
Mike Marshall 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * (C) 2001 Clemson University and The University of Chicago
 *
 * See COPYING in top-level directory.
 */

/*
 *  Linux VFS inode operations.
 */

#include "protocol.h"
12 13
#include "orangefs-kernel.h"
#include "orangefs-bufmap.h"
M
Mike Marshall 已提交
14 15 16 17 18 19 20 21 22

static int read_one_page(struct page *page)
{
	int ret;
	int max_block;
	ssize_t bytes_read = 0;
	struct inode *inode = page->mapping->host;
	const __u32 blocksize = PAGE_CACHE_SIZE;	/* inode->i_blksize */
	const __u32 blockbits = PAGE_CACHE_SHIFT;	/* inode->i_blkbits */
23 24 25 26
	struct iov_iter to;
	struct bio_vec bv = {.bv_page = page, .bv_len = PAGE_SIZE};

	iov_iter_bvec(&to, ITER_BVEC | READ, &bv, 1, PAGE_SIZE);
M
Mike Marshall 已提交
27 28

	gossip_debug(GOSSIP_INODE_DEBUG,
29
		    "orangefs_readpage called with page %p\n",
M
Mike Marshall 已提交
30 31 32 33 34 35 36
		     page);

	max_block = ((inode->i_size / blocksize) + 1);

	if (page->index < max_block) {
		loff_t blockptr_offset = (((loff_t) page->index) << blockbits);

37 38 39 40
		bytes_read = orangefs_inode_read(inode,
						 &to,
						 &blockptr_offset,
						 inode->i_size);
M
Mike Marshall 已提交
41
	}
42 43
	/* this will only zero remaining unread portions of the page data */
	iov_iter_zero(~0U, &to);
M
Mike Marshall 已提交
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
	/* takes care of potential aliasing */
	flush_dcache_page(page);
	if (bytes_read < 0) {
		ret = bytes_read;
		SetPageError(page);
	} else {
		SetPageUptodate(page);
		if (PageError(page))
			ClearPageError(page);
		ret = 0;
	}
	/* unlock the page after the ->readpage() routine completes */
	unlock_page(page);
	return ret;
}

60
static int orangefs_readpage(struct file *file, struct page *page)
M
Mike Marshall 已提交
61 62 63 64
{
	return read_one_page(page);
}

65
static int orangefs_readpages(struct file *file,
M
Mike Marshall 已提交
66 67 68 69 70 71 72
			   struct address_space *mapping,
			   struct list_head *pages,
			   unsigned nr_pages)
{
	int page_idx;
	int ret;

73
	gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_readpages called\n");
M
Mike Marshall 已提交
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95

	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
		struct page *page;

		page = list_entry(pages->prev, struct page, lru);
		list_del(&page->lru);
		if (!add_to_page_cache(page,
				       mapping,
				       page->index,
				       GFP_KERNEL)) {
			ret = read_one_page(page);
			gossip_debug(GOSSIP_INODE_DEBUG,
				"failure adding page to cache, read_one_page returned: %d\n",
				ret);
	      } else {
			page_cache_release(page);
	      }
	}
	BUG_ON(!list_empty(pages));
	return 0;
}

96
static void orangefs_invalidatepage(struct page *page,
M
Mike Marshall 已提交
97 98 99 100
				 unsigned int offset,
				 unsigned int length)
{
	gossip_debug(GOSSIP_INODE_DEBUG,
101
		     "orangefs_invalidatepage called on page %p "
M
Mike Marshall 已提交
102 103 104 105 106 107 108 109 110 111
		     "(offset is %u)\n",
		     page,
		     offset);

	ClearPageUptodate(page);
	ClearPageMappedToDisk(page);
	return;

}

112
static int orangefs_releasepage(struct page *page, gfp_t foo)
M
Mike Marshall 已提交
113 114
{
	gossip_debug(GOSSIP_INODE_DEBUG,
115
		     "orangefs_releasepage called on page %p\n",
M
Mike Marshall 已提交
116 117 118 119 120 121 122 123 124 125 126 127
		     page);
	return 0;
}

/*
 * Having a direct_IO entry point in the address_space_operations
 * struct causes the kernel to allows us to use O_DIRECT on
 * open. Nothing will ever call this thing, but in the future we
 * will need to be able to use O_DIRECT on open in order to support
 * AIO. Modeled after NFS, they do this too.
 */
/*
128
 * static ssize_t orangefs_direct_IO(int rw,
129 130 131 132 133
 *			struct kiocb *iocb,
 *			struct iov_iter *iter,
 *			loff_t offset)
 *{
 *	gossip_debug(GOSSIP_INODE_DEBUG,
134
 *		     "orangefs_direct_IO: %s\n",
135 136 137 138 139
 *		     iocb->ki_filp->f_path.dentry->d_name.name);
 *
 *	return -EINVAL;
 *}
 */
M
Mike Marshall 已提交
140

141 142
struct backing_dev_info orangefs_backing_dev_info = {
	.name = "orangefs",
M
Mike Marshall 已提交
143 144 145 146
	.ra_pages = 0,
	.capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
};

147 148 149 150 151 152
/** ORANGEFS2 implementation of address space operations */
const struct address_space_operations orangefs_address_operations = {
	.readpage = orangefs_readpage,
	.readpages = orangefs_readpages,
	.invalidatepage = orangefs_invalidatepage,
	.releasepage = orangefs_releasepage,
153
/*	.direct_IO = orangefs_direct_IO */
M
Mike Marshall 已提交
154 155
};

156
static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr)
M
Mike Marshall 已提交
157
{
158 159
	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
	struct orangefs_kernel_op_s *new_op;
M
Mike Marshall 已提交
160 161 162 163 164 165 166
	loff_t orig_size = i_size_read(inode);
	int ret = -EINVAL;

	gossip_debug(GOSSIP_INODE_DEBUG,
		     "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n",
		     __func__,
		     get_khandle_from_ino(inode),
167 168
		     &orangefs_inode->refn.khandle,
		     orangefs_inode->refn.fs_id,
M
Mike Marshall 已提交
169 170 171 172
		     iattr->ia_size);

	truncate_setsize(inode, iattr->ia_size);

173
	new_op = op_alloc(ORANGEFS_VFS_OP_TRUNCATE);
M
Mike Marshall 已提交
174 175 176
	if (!new_op)
		return -ENOMEM;

177
	new_op->upcall.req.truncate.refn = orangefs_inode->refn;
M
Mike Marshall 已提交
178 179 180 181 182 183 184 185 186 187
	new_op->upcall.req.truncate.size = (__s64) iattr->ia_size;

	ret = service_operation(new_op, __func__,
				get_interruptible_flag(inode));

	/*
	 * the truncate has no downcall members to retrieve, but
	 * the status value tells us if it went through ok or not
	 */
	gossip_debug(GOSSIP_INODE_DEBUG,
188
		     "orangefs: orangefs_truncate got return value of %d\n",
M
Mike Marshall 已提交
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
		     ret);

	op_release(new_op);

	if (ret != 0)
		return ret;

	/*
	 * Only change the c/mtime if we are changing the size or we are
	 * explicitly asked to change it.  This handles the semantic difference
	 * between truncate() and ftruncate() as implemented in the VFS.
	 *
	 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
	 * special case where we need to update the times despite not having
	 * these flags set.  For all other operations the VFS set these flags
	 * explicitly if it wants a timestamp update.
	 */
	if (orig_size != i_size_read(inode) &&
	    !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
		iattr->ia_ctime = iattr->ia_mtime =
			current_fs_time(inode->i_sb);
		iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
	}

	return ret;
}

/*
 * Change attributes of an object referenced by dentry.
 */
219
int orangefs_setattr(struct dentry *dentry, struct iattr *iattr)
M
Mike Marshall 已提交
220 221 222 223 224
{
	int ret = -EINVAL;
	struct inode *inode = dentry->d_inode;

	gossip_debug(GOSSIP_INODE_DEBUG,
225
		     "orangefs_setattr: called on %s\n",
M
Mike Marshall 已提交
226 227 228 229 230 231 232 233
		     dentry->d_name.name);

	ret = inode_change_ok(inode, iattr);
	if (ret)
		goto out;

	if ((iattr->ia_valid & ATTR_SIZE) &&
	    iattr->ia_size != i_size_read(inode)) {
234
		ret = orangefs_setattr_size(inode, iattr);
M
Mike Marshall 已提交
235 236 237 238 239 240 241
		if (ret)
			goto out;
	}

	setattr_copy(inode, iattr);
	mark_inode_dirty(inode);

242
	ret = orangefs_inode_setattr(inode, iattr);
M
Mike Marshall 已提交
243
	gossip_debug(GOSSIP_INODE_DEBUG,
244
		     "orangefs_setattr: inode_setattr returned %d\n",
M
Mike Marshall 已提交
245 246 247 248 249 250 251
		     ret);

	if (!ret && (iattr->ia_valid & ATTR_MODE))
		/* change mod on a file that has ACLs */
		ret = posix_acl_chmod(inode, inode->i_mode);

out:
252
	gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_setattr: returning %d\n", ret);
M
Mike Marshall 已提交
253 254 255 256 257 258
	return ret;
}

/*
 * Obtain attributes of an object given a dentry
 */
259
int orangefs_getattr(struct vfsmount *mnt,
M
Mike Marshall 已提交
260 261 262 263 264
		  struct dentry *dentry,
		  struct kstat *kstat)
{
	int ret = -ENOENT;
	struct inode *inode = dentry->d_inode;
265
	struct orangefs_inode_s *orangefs_inode = NULL;
M
Mike Marshall 已提交
266 267

	gossip_debug(GOSSIP_INODE_DEBUG,
268
		     "orangefs_getattr: called on %s\n",
M
Mike Marshall 已提交
269 270
		     dentry->d_name.name);

M
Martin Brandenburg 已提交
271
	ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, 0);
M
Mike Marshall 已提交
272 273
	if (ret == 0) {
		generic_fillattr(inode, kstat);
274

M
Mike Marshall 已提交
275
		/* override block size reported to stat */
276 277
		orangefs_inode = ORANGEFS_I(inode);
		kstat->blksize = orangefs_inode->blksize;
278 279

		inode->i_link = ORANGEFS_I(dentry->d_inode)->link_target;
M
Mike Marshall 已提交
280 281 282 283 284 285 286
	} else {
		/* assume an I/O error and flag inode as bad */
		gossip_debug(GOSSIP_INODE_DEBUG,
			     "%s:%s:%d calling make bad inode\n",
			     __FILE__,
			     __func__,
			     __LINE__);
287
		orangefs_make_bad_inode(inode);
M
Mike Marshall 已提交
288 289 290 291
	}
	return ret;
}

292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
int orangefs_permission(struct inode *inode, int mask)
{
	int ret;

	if (mask & MAY_NOT_BLOCK)
		return -ECHILD;

	gossip_debug(GOSSIP_INODE_DEBUG, "%s: refreshing\n", __func__);

	/* Make sure the permission (and other common attrs) are up to date. */
	ret = orangefs_inode_getattr(inode,
	    ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE, 0);
	if (ret < 0)
		return ret;

	return generic_permission(inode, mask);
}

310 311 312 313 314 315
/* ORANGEDS2 implementation of VFS inode operations for files */
struct inode_operations orangefs_file_inode_operations = {
	.get_acl = orangefs_get_acl,
	.set_acl = orangefs_set_acl,
	.setattr = orangefs_setattr,
	.getattr = orangefs_getattr,
M
Mike Marshall 已提交
316 317
	.setxattr = generic_setxattr,
	.getxattr = generic_getxattr,
318
	.listxattr = orangefs_listxattr,
M
Mike Marshall 已提交
319
	.removexattr = generic_removexattr,
320
	.permission = orangefs_permission,
M
Mike Marshall 已提交
321 322
};

323
static int orangefs_init_iops(struct inode *inode)
M
Mike Marshall 已提交
324
{
325
	inode->i_mapping->a_ops = &orangefs_address_operations;
M
Mike Marshall 已提交
326 327 328

	switch (inode->i_mode & S_IFMT) {
	case S_IFREG:
329 330
		inode->i_op = &orangefs_file_inode_operations;
		inode->i_fop = &orangefs_file_operations;
M
Mike Marshall 已提交
331 332 333
		inode->i_blkbits = PAGE_CACHE_SHIFT;
		break;
	case S_IFLNK:
334
		inode->i_op = &orangefs_symlink_inode_operations;
M
Mike Marshall 已提交
335 336
		break;
	case S_IFDIR:
337 338
		inode->i_op = &orangefs_dir_inode_operations;
		inode->i_fop = &orangefs_dir_operations;
M
Mike Marshall 已提交
339 340 341 342 343 344 345 346 347 348 349 350
		break;
	default:
		gossip_debug(GOSSIP_INODE_DEBUG,
			     "%s: unsupported mode\n",
			     __func__);
		return -EINVAL;
	}

	return 0;
}

/*
351
 * Given a ORANGEFS object identifier (fsid, handle), convert it into a ino_t type
M
Mike Marshall 已提交
352 353 354
 * that will be used as a hash-index from where the handle will
 * be searched for in the VFS hash table of inodes.
 */
355
static inline ino_t orangefs_handle_hash(struct orangefs_object_kref *ref)
M
Mike Marshall 已提交
356 357 358
{
	if (!ref)
		return 0;
359
	return orangefs_khandle_to_ino(&(ref->khandle));
M
Mike Marshall 已提交
360 361 362 363 364
}

/*
 * Called to set up an inode from iget5_locked.
 */
365
static int orangefs_set_inode(struct inode *inode, void *data)
M
Mike Marshall 已提交
366
{
367 368
	struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data;
	struct orangefs_inode_s *orangefs_inode = NULL;
M
Mike Marshall 已提交
369 370 371 372

	/* Make sure that we have sane parameters */
	if (!data || !inode)
		return 0;
373 374
	orangefs_inode = ORANGEFS_I(inode);
	if (!orangefs_inode)
M
Mike Marshall 已提交
375
		return 0;
376 377
	orangefs_inode->refn.fs_id = ref->fs_id;
	orangefs_inode->refn.khandle = ref->khandle;
M
Mike Marshall 已提交
378 379 380 381 382 383
	return 0;
}

/*
 * Called to determine if handles match.
 */
384
static int orangefs_test_inode(struct inode *inode, void *data)
M
Mike Marshall 已提交
385
{
386 387
	struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data;
	struct orangefs_inode_s *orangefs_inode = NULL;
M
Mike Marshall 已提交
388

389 390 391
	orangefs_inode = ORANGEFS_I(inode);
	return (!ORANGEFS_khandle_cmp(&(orangefs_inode->refn.khandle), &(ref->khandle))
		&& orangefs_inode->refn.fs_id == ref->fs_id);
M
Mike Marshall 已提交
392 393 394
}

/*
395
 * Front-end to lookup the inode-cache maintained by the VFS using the ORANGEFS
M
Mike Marshall 已提交
396 397 398
 * file handle.
 *
 * @sb: the file system super block instance.
399
 * @ref: The ORANGEFS object for which we are trying to locate an inode structure.
M
Mike Marshall 已提交
400
 */
401
struct inode *orangefs_iget(struct super_block *sb, struct orangefs_object_kref *ref)
M
Mike Marshall 已提交
402 403 404 405 406
{
	struct inode *inode = NULL;
	unsigned long hash;
	int error;

407 408
	hash = orangefs_handle_hash(ref);
	inode = iget5_locked(sb, hash, orangefs_test_inode, orangefs_set_inode, ref);
M
Mike Marshall 已提交
409 410 411
	if (!inode || !(inode->i_state & I_NEW))
		return inode;

412 413
	error = orangefs_inode_getattr(inode,
	    ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE, 0);
M
Mike Marshall 已提交
414 415 416 417 418 419
	if (error) {
		iget_failed(inode);
		return ERR_PTR(error);
	}

	inode->i_ino = hash;	/* needed for stat etc */
420
	orangefs_init_iops(inode);
M
Mike Marshall 已提交
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
	unlock_new_inode(inode);

	gossip_debug(GOSSIP_INODE_DEBUG,
		     "iget handle %pU, fsid %d hash %ld i_ino %lu\n",
		     &ref->khandle,
		     ref->fs_id,
		     hash,
		     inode->i_ino);

	return inode;
}

/*
 * Allocate an inode for a newly created file and insert it into the inode hash.
 */
436 437
struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir,
		int mode, dev_t dev, struct orangefs_object_kref *ref)
M
Mike Marshall 已提交
438
{
439
	unsigned long hash = orangefs_handle_hash(ref);
M
Mike Marshall 已提交
440 441 442 443
	struct inode *inode;
	int error;

	gossip_debug(GOSSIP_INODE_DEBUG,
444 445
		     "%s:(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n",
		     __func__,
M
Mike Marshall 已提交
446 447 448 449 450 451 452 453 454
		     sb,
		     MAJOR(dev),
		     MINOR(dev),
		     mode);

	inode = new_inode(sb);
	if (!inode)
		return NULL;

455
	orangefs_set_inode(inode, ref);
M
Mike Marshall 已提交
456 457
	inode->i_ino = hash;	/* needed for stat etc */

458 459
	error = orangefs_inode_getattr(inode,
	    ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE, 0);
M
Mike Marshall 已提交
460 461 462
	if (error)
		goto out_iput;

463
	orangefs_init_iops(inode);
M
Mike Marshall 已提交
464 465 466 467 468 469 470 471

	inode->i_mode = mode;
	inode->i_uid = current_fsuid();
	inode->i_gid = current_fsgid();
	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
	inode->i_size = PAGE_CACHE_SIZE;
	inode->i_rdev = dev;

472
	error = insert_inode_locked4(inode, hash, orangefs_test_inode, ref);
M
Mike Marshall 已提交
473 474 475 476 477 478
	if (error < 0)
		goto out_iput;

	gossip_debug(GOSSIP_INODE_DEBUG,
		     "Initializing ACL's for inode %pU\n",
		     get_khandle_from_ino(inode));
479
	orangefs_init_acl(inode, dir);
M
Mike Marshall 已提交
480 481 482 483 484 485
	return inode;

out_iput:
	iput(inode);
	return ERR_PTR(error);
}