inode.c 11.9 KB
Newer Older
M
Mike Marshall 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * (C) 2001 Clemson University and The University of Chicago
 *
 * See COPYING in top-level directory.
 */

/*
 *  Linux VFS inode operations.
 */

#include "protocol.h"
12 13
#include "orangefs-kernel.h"
#include "orangefs-bufmap.h"
M
Mike Marshall 已提交
14 15 16 17 18 19 20 21 22

static int read_one_page(struct page *page)
{
	int ret;
	int max_block;
	ssize_t bytes_read = 0;
	struct inode *inode = page->mapping->host;
	const __u32 blocksize = PAGE_CACHE_SIZE;	/* inode->i_blksize */
	const __u32 blockbits = PAGE_CACHE_SHIFT;	/* inode->i_blkbits */
23 24 25 26
	struct iov_iter to;
	struct bio_vec bv = {.bv_page = page, .bv_len = PAGE_SIZE};

	iov_iter_bvec(&to, ITER_BVEC | READ, &bv, 1, PAGE_SIZE);
M
Mike Marshall 已提交
27 28

	gossip_debug(GOSSIP_INODE_DEBUG,
29
		    "orangefs_readpage called with page %p\n",
M
Mike Marshall 已提交
30 31 32 33 34 35 36
		     page);

	max_block = ((inode->i_size / blocksize) + 1);

	if (page->index < max_block) {
		loff_t blockptr_offset = (((loff_t) page->index) << blockbits);

37 38 39 40
		bytes_read = orangefs_inode_read(inode,
						 &to,
						 &blockptr_offset,
						 inode->i_size);
M
Mike Marshall 已提交
41
	}
42 43
	/* this will only zero remaining unread portions of the page data */
	iov_iter_zero(~0U, &to);
M
Mike Marshall 已提交
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
	/* takes care of potential aliasing */
	flush_dcache_page(page);
	if (bytes_read < 0) {
		ret = bytes_read;
		SetPageError(page);
	} else {
		SetPageUptodate(page);
		if (PageError(page))
			ClearPageError(page);
		ret = 0;
	}
	/* unlock the page after the ->readpage() routine completes */
	unlock_page(page);
	return ret;
}

60
static int orangefs_readpage(struct file *file, struct page *page)
M
Mike Marshall 已提交
61 62 63 64
{
	return read_one_page(page);
}

65
static int orangefs_readpages(struct file *file,
M
Mike Marshall 已提交
66 67 68 69 70 71 72
			   struct address_space *mapping,
			   struct list_head *pages,
			   unsigned nr_pages)
{
	int page_idx;
	int ret;

73
	gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_readpages called\n");
M
Mike Marshall 已提交
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95

	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
		struct page *page;

		page = list_entry(pages->prev, struct page, lru);
		list_del(&page->lru);
		if (!add_to_page_cache(page,
				       mapping,
				       page->index,
				       GFP_KERNEL)) {
			ret = read_one_page(page);
			gossip_debug(GOSSIP_INODE_DEBUG,
				"failure adding page to cache, read_one_page returned: %d\n",
				ret);
	      } else {
			page_cache_release(page);
	      }
	}
	BUG_ON(!list_empty(pages));
	return 0;
}

96
static void orangefs_invalidatepage(struct page *page,
M
Mike Marshall 已提交
97 98 99 100
				 unsigned int offset,
				 unsigned int length)
{
	gossip_debug(GOSSIP_INODE_DEBUG,
101
		     "orangefs_invalidatepage called on page %p "
M
Mike Marshall 已提交
102 103 104 105 106 107 108 109 110 111
		     "(offset is %u)\n",
		     page,
		     offset);

	ClearPageUptodate(page);
	ClearPageMappedToDisk(page);
	return;

}

112
static int orangefs_releasepage(struct page *page, gfp_t foo)
M
Mike Marshall 已提交
113 114
{
	gossip_debug(GOSSIP_INODE_DEBUG,
115
		     "orangefs_releasepage called on page %p\n",
M
Mike Marshall 已提交
116 117 118 119 120 121 122 123 124 125 126 127
		     page);
	return 0;
}

/*
 * Having a direct_IO entry point in the address_space_operations
 * struct causes the kernel to allows us to use O_DIRECT on
 * open. Nothing will ever call this thing, but in the future we
 * will need to be able to use O_DIRECT on open in order to support
 * AIO. Modeled after NFS, they do this too.
 */
/*
128
 * static ssize_t orangefs_direct_IO(int rw,
129 130 131 132 133
 *			struct kiocb *iocb,
 *			struct iov_iter *iter,
 *			loff_t offset)
 *{
 *	gossip_debug(GOSSIP_INODE_DEBUG,
134
 *		     "orangefs_direct_IO: %s\n",
135 136 137 138 139
 *		     iocb->ki_filp->f_path.dentry->d_name.name);
 *
 *	return -EINVAL;
 *}
 */
M
Mike Marshall 已提交
140

141 142
struct backing_dev_info orangefs_backing_dev_info = {
	.name = "orangefs",
M
Mike Marshall 已提交
143 144 145 146
	.ra_pages = 0,
	.capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
};

147 148 149 150 151 152
/** ORANGEFS2 implementation of address space operations */
const struct address_space_operations orangefs_address_operations = {
	.readpage = orangefs_readpage,
	.readpages = orangefs_readpages,
	.invalidatepage = orangefs_invalidatepage,
	.releasepage = orangefs_releasepage,
153
/*	.direct_IO = orangefs_direct_IO */
M
Mike Marshall 已提交
154 155
};

156
static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr)
M
Mike Marshall 已提交
157
{
158 159
	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
	struct orangefs_kernel_op_s *new_op;
M
Mike Marshall 已提交
160 161 162 163 164 165 166
	loff_t orig_size = i_size_read(inode);
	int ret = -EINVAL;

	gossip_debug(GOSSIP_INODE_DEBUG,
		     "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n",
		     __func__,
		     get_khandle_from_ino(inode),
167 168
		     &orangefs_inode->refn.khandle,
		     orangefs_inode->refn.fs_id,
M
Mike Marshall 已提交
169 170 171 172
		     iattr->ia_size);

	truncate_setsize(inode, iattr->ia_size);

173
	new_op = op_alloc(ORANGEFS_VFS_OP_TRUNCATE);
M
Mike Marshall 已提交
174 175 176
	if (!new_op)
		return -ENOMEM;

177
	new_op->upcall.req.truncate.refn = orangefs_inode->refn;
M
Mike Marshall 已提交
178 179 180 181 182 183 184 185 186 187
	new_op->upcall.req.truncate.size = (__s64) iattr->ia_size;

	ret = service_operation(new_op, __func__,
				get_interruptible_flag(inode));

	/*
	 * the truncate has no downcall members to retrieve, but
	 * the status value tells us if it went through ok or not
	 */
	gossip_debug(GOSSIP_INODE_DEBUG,
188
		     "orangefs: orangefs_truncate got return value of %d\n",
M
Mike Marshall 已提交
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
		     ret);

	op_release(new_op);

	if (ret != 0)
		return ret;

	/*
	 * Only change the c/mtime if we are changing the size or we are
	 * explicitly asked to change it.  This handles the semantic difference
	 * between truncate() and ftruncate() as implemented in the VFS.
	 *
	 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
	 * special case where we need to update the times despite not having
	 * these flags set.  For all other operations the VFS set these flags
	 * explicitly if it wants a timestamp update.
	 */
	if (orig_size != i_size_read(inode) &&
	    !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
		iattr->ia_ctime = iattr->ia_mtime =
			current_fs_time(inode->i_sb);
		iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
	}

	return ret;
}

/*
 * Change attributes of an object referenced by dentry.
 */
219
int orangefs_setattr(struct dentry *dentry, struct iattr *iattr)
M
Mike Marshall 已提交
220 221 222 223 224
{
	int ret = -EINVAL;
	struct inode *inode = dentry->d_inode;

	gossip_debug(GOSSIP_INODE_DEBUG,
225
		     "orangefs_setattr: called on %s\n",
M
Mike Marshall 已提交
226 227 228 229 230 231 232 233
		     dentry->d_name.name);

	ret = inode_change_ok(inode, iattr);
	if (ret)
		goto out;

	if ((iattr->ia_valid & ATTR_SIZE) &&
	    iattr->ia_size != i_size_read(inode)) {
234
		ret = orangefs_setattr_size(inode, iattr);
M
Mike Marshall 已提交
235 236 237 238 239 240 241
		if (ret)
			goto out;
	}

	setattr_copy(inode, iattr);
	mark_inode_dirty(inode);

242
	ret = orangefs_inode_setattr(inode, iattr);
M
Mike Marshall 已提交
243
	gossip_debug(GOSSIP_INODE_DEBUG,
244
		     "orangefs_setattr: inode_setattr returned %d\n",
M
Mike Marshall 已提交
245 246 247 248 249 250 251
		     ret);

	if (!ret && (iattr->ia_valid & ATTR_MODE))
		/* change mod on a file that has ACLs */
		ret = posix_acl_chmod(inode, inode->i_mode);

out:
252
	gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_setattr: returning %d\n", ret);
M
Mike Marshall 已提交
253 254 255 256 257 258
	return ret;
}

/*
 * Obtain attributes of an object given a dentry
 */
259
int orangefs_getattr(struct vfsmount *mnt,
M
Mike Marshall 已提交
260 261 262 263 264
		  struct dentry *dentry,
		  struct kstat *kstat)
{
	int ret = -ENOENT;
	struct inode *inode = dentry->d_inode;
265
	struct orangefs_inode_s *orangefs_inode = NULL;
M
Mike Marshall 已提交
266 267

	gossip_debug(GOSSIP_INODE_DEBUG,
268
		     "orangefs_getattr: called on %s\n",
M
Mike Marshall 已提交
269 270 271 272 273 274 275
		     dentry->d_name.name);

	/*
	 * Similar to the above comment, a getattr also expects that all
	 * fields/attributes of the inode would be refreshed. So again, we
	 * dont have too much of a choice but refresh all the attributes.
	 */
M
Martin Brandenburg 已提交
276
	ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, 0);
M
Mike Marshall 已提交
277 278 279
	if (ret == 0) {
		generic_fillattr(inode, kstat);
		/* override block size reported to stat */
280 281
		orangefs_inode = ORANGEFS_I(inode);
		kstat->blksize = orangefs_inode->blksize;
M
Mike Marshall 已提交
282 283 284 285 286 287 288
	} else {
		/* assume an I/O error and flag inode as bad */
		gossip_debug(GOSSIP_INODE_DEBUG,
			     "%s:%s:%d calling make bad inode\n",
			     __FILE__,
			     __func__,
			     __LINE__);
289
		orangefs_make_bad_inode(inode);
M
Mike Marshall 已提交
290 291 292 293
	}
	return ret;
}

294 295 296 297 298 299
/* ORANGEDS2 implementation of VFS inode operations for files */
struct inode_operations orangefs_file_inode_operations = {
	.get_acl = orangefs_get_acl,
	.set_acl = orangefs_set_acl,
	.setattr = orangefs_setattr,
	.getattr = orangefs_getattr,
M
Mike Marshall 已提交
300 301
	.setxattr = generic_setxattr,
	.getxattr = generic_getxattr,
302
	.listxattr = orangefs_listxattr,
M
Mike Marshall 已提交
303 304 305
	.removexattr = generic_removexattr,
};

306
static int orangefs_init_iops(struct inode *inode)
M
Mike Marshall 已提交
307
{
308
	inode->i_mapping->a_ops = &orangefs_address_operations;
M
Mike Marshall 已提交
309 310 311

	switch (inode->i_mode & S_IFMT) {
	case S_IFREG:
312 313
		inode->i_op = &orangefs_file_inode_operations;
		inode->i_fop = &orangefs_file_operations;
M
Mike Marshall 已提交
314 315 316
		inode->i_blkbits = PAGE_CACHE_SHIFT;
		break;
	case S_IFLNK:
317
		inode->i_op = &orangefs_symlink_inode_operations;
M
Mike Marshall 已提交
318 319
		break;
	case S_IFDIR:
320 321
		inode->i_op = &orangefs_dir_inode_operations;
		inode->i_fop = &orangefs_dir_operations;
M
Mike Marshall 已提交
322 323 324 325 326 327 328 329 330 331 332 333
		break;
	default:
		gossip_debug(GOSSIP_INODE_DEBUG,
			     "%s: unsupported mode\n",
			     __func__);
		return -EINVAL;
	}

	return 0;
}

/*
334
 * Given a ORANGEFS object identifier (fsid, handle), convert it into a ino_t type
M
Mike Marshall 已提交
335 336 337
 * that will be used as a hash-index from where the handle will
 * be searched for in the VFS hash table of inodes.
 */
338
static inline ino_t orangefs_handle_hash(struct orangefs_object_kref *ref)
M
Mike Marshall 已提交
339 340 341
{
	if (!ref)
		return 0;
342
	return orangefs_khandle_to_ino(&(ref->khandle));
M
Mike Marshall 已提交
343 344 345 346 347
}

/*
 * Called to set up an inode from iget5_locked.
 */
348
static int orangefs_set_inode(struct inode *inode, void *data)
M
Mike Marshall 已提交
349
{
350 351
	struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data;
	struct orangefs_inode_s *orangefs_inode = NULL;
M
Mike Marshall 已提交
352 353 354 355

	/* Make sure that we have sane parameters */
	if (!data || !inode)
		return 0;
356 357
	orangefs_inode = ORANGEFS_I(inode);
	if (!orangefs_inode)
M
Mike Marshall 已提交
358
		return 0;
359 360
	orangefs_inode->refn.fs_id = ref->fs_id;
	orangefs_inode->refn.khandle = ref->khandle;
M
Mike Marshall 已提交
361 362 363 364 365 366
	return 0;
}

/*
 * Called to determine if handles match.
 */
367
static int orangefs_test_inode(struct inode *inode, void *data)
M
Mike Marshall 已提交
368
{
369 370
	struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data;
	struct orangefs_inode_s *orangefs_inode = NULL;
M
Mike Marshall 已提交
371

372 373 374
	orangefs_inode = ORANGEFS_I(inode);
	return (!ORANGEFS_khandle_cmp(&(orangefs_inode->refn.khandle), &(ref->khandle))
		&& orangefs_inode->refn.fs_id == ref->fs_id);
M
Mike Marshall 已提交
375 376 377
}

/*
378
 * Front-end to lookup the inode-cache maintained by the VFS using the ORANGEFS
M
Mike Marshall 已提交
379 380 381
 * file handle.
 *
 * @sb: the file system super block instance.
382
 * @ref: The ORANGEFS object for which we are trying to locate an inode structure.
M
Mike Marshall 已提交
383
 */
384
struct inode *orangefs_iget(struct super_block *sb, struct orangefs_object_kref *ref)
M
Mike Marshall 已提交
385 386 387 388 389
{
	struct inode *inode = NULL;
	unsigned long hash;
	int error;

390 391
	hash = orangefs_handle_hash(ref);
	inode = iget5_locked(sb, hash, orangefs_test_inode, orangefs_set_inode, ref);
M
Mike Marshall 已提交
392 393 394
	if (!inode || !(inode->i_state & I_NEW))
		return inode;

M
Martin Brandenburg 已提交
395
	error = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, 0);
M
Mike Marshall 已提交
396 397 398 399 400 401
	if (error) {
		iget_failed(inode);
		return ERR_PTR(error);
	}

	inode->i_ino = hash;	/* needed for stat etc */
402
	orangefs_init_iops(inode);
M
Mike Marshall 已提交
403 404 405 406 407 408 409 410 411 412 413 414 415 416 417
	unlock_new_inode(inode);

	gossip_debug(GOSSIP_INODE_DEBUG,
		     "iget handle %pU, fsid %d hash %ld i_ino %lu\n",
		     &ref->khandle,
		     ref->fs_id,
		     hash,
		     inode->i_ino);

	return inode;
}

/*
 * Allocate an inode for a newly created file and insert it into the inode hash.
 */
418 419
struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir,
		int mode, dev_t dev, struct orangefs_object_kref *ref)
M
Mike Marshall 已提交
420
{
421
	unsigned long hash = orangefs_handle_hash(ref);
M
Mike Marshall 已提交
422 423 424 425
	struct inode *inode;
	int error;

	gossip_debug(GOSSIP_INODE_DEBUG,
426
		     "orangefs_get_custom_inode_common: called\n"
M
Mike Marshall 已提交
427 428 429 430 431 432 433 434 435 436
		     "(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n",
		     sb,
		     MAJOR(dev),
		     MINOR(dev),
		     mode);

	inode = new_inode(sb);
	if (!inode)
		return NULL;

437
	orangefs_set_inode(inode, ref);
M
Mike Marshall 已提交
438 439
	inode->i_ino = hash;	/* needed for stat etc */

M
Martin Brandenburg 已提交
440
	error = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, 0);
M
Mike Marshall 已提交
441 442 443
	if (error)
		goto out_iput;

444
	orangefs_init_iops(inode);
M
Mike Marshall 已提交
445 446 447 448 449 450 451 452

	inode->i_mode = mode;
	inode->i_uid = current_fsuid();
	inode->i_gid = current_fsgid();
	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
	inode->i_size = PAGE_CACHE_SIZE;
	inode->i_rdev = dev;

453
	error = insert_inode_locked4(inode, hash, orangefs_test_inode, ref);
M
Mike Marshall 已提交
454 455 456 457 458 459
	if (error < 0)
		goto out_iput;

	gossip_debug(GOSSIP_INODE_DEBUG,
		     "Initializing ACL's for inode %pU\n",
		     get_khandle_from_ino(inode));
460
	orangefs_init_acl(inode, dir);
M
Mike Marshall 已提交
461 462 463 464 465 466
	return inode;

out_iput:
	iput(inode);
	return ERR_PTR(error);
}