inode.c 11.6 KB
Newer Older
M
Mike Marshall 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * (C) 2001 Clemson University and The University of Chicago
 *
 * See COPYING in top-level directory.
 */

/*
 *  Linux VFS inode operations.
 */

#include "protocol.h"
12 13
#include "orangefs-kernel.h"
#include "orangefs-bufmap.h"
M
Mike Marshall 已提交
14 15 16 17 18 19 20 21 22

static int read_one_page(struct page *page)
{
	int ret;
	int max_block;
	ssize_t bytes_read = 0;
	struct inode *inode = page->mapping->host;
	const __u32 blocksize = PAGE_CACHE_SIZE;	/* inode->i_blksize */
	const __u32 blockbits = PAGE_CACHE_SHIFT;	/* inode->i_blkbits */
23 24 25 26
	struct iov_iter to;
	struct bio_vec bv = {.bv_page = page, .bv_len = PAGE_SIZE};

	iov_iter_bvec(&to, ITER_BVEC | READ, &bv, 1, PAGE_SIZE);
M
Mike Marshall 已提交
27 28

	gossip_debug(GOSSIP_INODE_DEBUG,
29
		    "orangefs_readpage called with page %p\n",
M
Mike Marshall 已提交
30 31 32 33 34 35 36
		     page);

	max_block = ((inode->i_size / blocksize) + 1);

	if (page->index < max_block) {
		loff_t blockptr_offset = (((loff_t) page->index) << blockbits);

37 38 39 40
		bytes_read = orangefs_inode_read(inode,
						 &to,
						 &blockptr_offset,
						 inode->i_size);
M
Mike Marshall 已提交
41
	}
42 43
	/* this will only zero remaining unread portions of the page data */
	iov_iter_zero(~0U, &to);
M
Mike Marshall 已提交
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
	/* takes care of potential aliasing */
	flush_dcache_page(page);
	if (bytes_read < 0) {
		ret = bytes_read;
		SetPageError(page);
	} else {
		SetPageUptodate(page);
		if (PageError(page))
			ClearPageError(page);
		ret = 0;
	}
	/* unlock the page after the ->readpage() routine completes */
	unlock_page(page);
	return ret;
}

60
static int orangefs_readpage(struct file *file, struct page *page)
M
Mike Marshall 已提交
61 62 63 64
{
	return read_one_page(page);
}

65
static int orangefs_readpages(struct file *file,
M
Mike Marshall 已提交
66 67 68 69 70 71 72
			   struct address_space *mapping,
			   struct list_head *pages,
			   unsigned nr_pages)
{
	int page_idx;
	int ret;

73
	gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_readpages called\n");
M
Mike Marshall 已提交
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95

	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
		struct page *page;

		page = list_entry(pages->prev, struct page, lru);
		list_del(&page->lru);
		if (!add_to_page_cache(page,
				       mapping,
				       page->index,
				       GFP_KERNEL)) {
			ret = read_one_page(page);
			gossip_debug(GOSSIP_INODE_DEBUG,
				"failure adding page to cache, read_one_page returned: %d\n",
				ret);
	      } else {
			page_cache_release(page);
	      }
	}
	BUG_ON(!list_empty(pages));
	return 0;
}

96
static void orangefs_invalidatepage(struct page *page,
M
Mike Marshall 已提交
97 98 99 100
				 unsigned int offset,
				 unsigned int length)
{
	gossip_debug(GOSSIP_INODE_DEBUG,
101
		     "orangefs_invalidatepage called on page %p "
M
Mike Marshall 已提交
102 103 104 105 106 107 108 109 110 111
		     "(offset is %u)\n",
		     page,
		     offset);

	ClearPageUptodate(page);
	ClearPageMappedToDisk(page);
	return;

}

112
static int orangefs_releasepage(struct page *page, gfp_t foo)
M
Mike Marshall 已提交
113 114
{
	gossip_debug(GOSSIP_INODE_DEBUG,
115
		     "orangefs_releasepage called on page %p\n",
M
Mike Marshall 已提交
116 117 118 119 120 121 122 123 124 125 126 127
		     page);
	return 0;
}

/*
 * Having a direct_IO entry point in the address_space_operations
 * struct causes the kernel to allows us to use O_DIRECT on
 * open. Nothing will ever call this thing, but in the future we
 * will need to be able to use O_DIRECT on open in order to support
 * AIO. Modeled after NFS, they do this too.
 */
/*
128
 * static ssize_t orangefs_direct_IO(int rw,
129 130 131 132 133
 *			struct kiocb *iocb,
 *			struct iov_iter *iter,
 *			loff_t offset)
 *{
 *	gossip_debug(GOSSIP_INODE_DEBUG,
134
 *		     "orangefs_direct_IO: %s\n",
135 136 137 138 139
 *		     iocb->ki_filp->f_path.dentry->d_name.name);
 *
 *	return -EINVAL;
 *}
 */
M
Mike Marshall 已提交
140

141 142
struct backing_dev_info orangefs_backing_dev_info = {
	.name = "orangefs",
M
Mike Marshall 已提交
143 144 145 146
	.ra_pages = 0,
	.capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
};

147 148 149 150 151 152
/** ORANGEFS2 implementation of address space operations */
const struct address_space_operations orangefs_address_operations = {
	.readpage = orangefs_readpage,
	.readpages = orangefs_readpages,
	.invalidatepage = orangefs_invalidatepage,
	.releasepage = orangefs_releasepage,
153
/*	.direct_IO = orangefs_direct_IO */
M
Mike Marshall 已提交
154 155
};

156
static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr)
M
Mike Marshall 已提交
157
{
158 159
	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
	struct orangefs_kernel_op_s *new_op;
M
Mike Marshall 已提交
160 161 162 163 164 165 166
	loff_t orig_size = i_size_read(inode);
	int ret = -EINVAL;

	gossip_debug(GOSSIP_INODE_DEBUG,
		     "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n",
		     __func__,
		     get_khandle_from_ino(inode),
167 168
		     &orangefs_inode->refn.khandle,
		     orangefs_inode->refn.fs_id,
M
Mike Marshall 已提交
169 170 171 172
		     iattr->ia_size);

	truncate_setsize(inode, iattr->ia_size);

173
	new_op = op_alloc(ORANGEFS_VFS_OP_TRUNCATE);
M
Mike Marshall 已提交
174 175 176
	if (!new_op)
		return -ENOMEM;

177
	new_op->upcall.req.truncate.refn = orangefs_inode->refn;
M
Mike Marshall 已提交
178 179 180 181 182 183 184 185 186 187
	new_op->upcall.req.truncate.size = (__s64) iattr->ia_size;

	ret = service_operation(new_op, __func__,
				get_interruptible_flag(inode));

	/*
	 * the truncate has no downcall members to retrieve, but
	 * the status value tells us if it went through ok or not
	 */
	gossip_debug(GOSSIP_INODE_DEBUG,
188
		     "orangefs: orangefs_truncate got return value of %d\n",
M
Mike Marshall 已提交
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
		     ret);

	op_release(new_op);

	if (ret != 0)
		return ret;

	/*
	 * Only change the c/mtime if we are changing the size or we are
	 * explicitly asked to change it.  This handles the semantic difference
	 * between truncate() and ftruncate() as implemented in the VFS.
	 *
	 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
	 * special case where we need to update the times despite not having
	 * these flags set.  For all other operations the VFS set these flags
	 * explicitly if it wants a timestamp update.
	 */
	if (orig_size != i_size_read(inode) &&
	    !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
		iattr->ia_ctime = iattr->ia_mtime =
			current_fs_time(inode->i_sb);
		iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
	}

	return ret;
}

/*
 * Change attributes of an object referenced by dentry.
 */
219
int orangefs_setattr(struct dentry *dentry, struct iattr *iattr)
M
Mike Marshall 已提交
220 221 222 223 224
{
	int ret = -EINVAL;
	struct inode *inode = dentry->d_inode;

	gossip_debug(GOSSIP_INODE_DEBUG,
225
		     "orangefs_setattr: called on %s\n",
M
Mike Marshall 已提交
226 227 228 229 230 231 232 233
		     dentry->d_name.name);

	ret = inode_change_ok(inode, iattr);
	if (ret)
		goto out;

	if ((iattr->ia_valid & ATTR_SIZE) &&
	    iattr->ia_size != i_size_read(inode)) {
234
		ret = orangefs_setattr_size(inode, iattr);
M
Mike Marshall 已提交
235 236 237 238 239 240 241
		if (ret)
			goto out;
	}

	setattr_copy(inode, iattr);
	mark_inode_dirty(inode);

242
	ret = orangefs_inode_setattr(inode, iattr);
M
Mike Marshall 已提交
243
	gossip_debug(GOSSIP_INODE_DEBUG,
244
		     "orangefs_setattr: inode_setattr returned %d\n",
M
Mike Marshall 已提交
245 246 247 248 249 250 251
		     ret);

	if (!ret && (iattr->ia_valid & ATTR_MODE))
		/* change mod on a file that has ACLs */
		ret = posix_acl_chmod(inode, inode->i_mode);

out:
252
	gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_setattr: returning %d\n", ret);
M
Mike Marshall 已提交
253 254 255 256 257 258
	return ret;
}

/*
 * Obtain attributes of an object given a dentry
 */
259
int orangefs_getattr(struct vfsmount *mnt,
M
Mike Marshall 已提交
260 261 262 263 264
		  struct dentry *dentry,
		  struct kstat *kstat)
{
	int ret = -ENOENT;
	struct inode *inode = dentry->d_inode;
265
	struct orangefs_inode_s *orangefs_inode = NULL;
M
Mike Marshall 已提交
266 267

	gossip_debug(GOSSIP_INODE_DEBUG,
268
		     "orangefs_getattr: called on %s\n",
M
Mike Marshall 已提交
269 270
		     dentry->d_name.name);

271
	ret = orangefs_inode_getattr(inode, 0, 1);
M
Mike Marshall 已提交
272 273
	if (ret == 0) {
		generic_fillattr(inode, kstat);
274

M
Mike Marshall 已提交
275
		/* override block size reported to stat */
276 277
		orangefs_inode = ORANGEFS_I(inode);
		kstat->blksize = orangefs_inode->blksize;
M
Mike Marshall 已提交
278 279 280 281
	}
	return ret;
}

282 283 284 285 286 287 288 289 290 291
int orangefs_permission(struct inode *inode, int mask)
{
	int ret;

	if (mask & MAY_NOT_BLOCK)
		return -ECHILD;

	gossip_debug(GOSSIP_INODE_DEBUG, "%s: refreshing\n", __func__);

	/* Make sure the permission (and other common attrs) are up to date. */
292
	ret = orangefs_inode_getattr(inode, 0, 0);
293 294 295 296 297 298
	if (ret < 0)
		return ret;

	return generic_permission(inode, mask);
}

299 300 301 302 303 304
/* ORANGEDS2 implementation of VFS inode operations for files */
struct inode_operations orangefs_file_inode_operations = {
	.get_acl = orangefs_get_acl,
	.set_acl = orangefs_set_acl,
	.setattr = orangefs_setattr,
	.getattr = orangefs_getattr,
M
Mike Marshall 已提交
305 306
	.setxattr = generic_setxattr,
	.getxattr = generic_getxattr,
307
	.listxattr = orangefs_listxattr,
M
Mike Marshall 已提交
308
	.removexattr = generic_removexattr,
309
	.permission = orangefs_permission,
M
Mike Marshall 已提交
310 311
};

312
static int orangefs_init_iops(struct inode *inode)
M
Mike Marshall 已提交
313
{
314
	inode->i_mapping->a_ops = &orangefs_address_operations;
M
Mike Marshall 已提交
315 316 317

	switch (inode->i_mode & S_IFMT) {
	case S_IFREG:
318 319
		inode->i_op = &orangefs_file_inode_operations;
		inode->i_fop = &orangefs_file_operations;
M
Mike Marshall 已提交
320 321 322
		inode->i_blkbits = PAGE_CACHE_SHIFT;
		break;
	case S_IFLNK:
323
		inode->i_op = &orangefs_symlink_inode_operations;
M
Mike Marshall 已提交
324 325
		break;
	case S_IFDIR:
326 327
		inode->i_op = &orangefs_dir_inode_operations;
		inode->i_fop = &orangefs_dir_operations;
M
Mike Marshall 已提交
328 329 330 331 332 333 334 335 336 337 338 339
		break;
	default:
		gossip_debug(GOSSIP_INODE_DEBUG,
			     "%s: unsupported mode\n",
			     __func__);
		return -EINVAL;
	}

	return 0;
}

/*
340
 * Given a ORANGEFS object identifier (fsid, handle), convert it into a ino_t type
M
Mike Marshall 已提交
341 342 343
 * that will be used as a hash-index from where the handle will
 * be searched for in the VFS hash table of inodes.
 */
344
static inline ino_t orangefs_handle_hash(struct orangefs_object_kref *ref)
M
Mike Marshall 已提交
345 346 347
{
	if (!ref)
		return 0;
348
	return orangefs_khandle_to_ino(&(ref->khandle));
M
Mike Marshall 已提交
349 350 351 352 353
}

/*
 * Called to set up an inode from iget5_locked.
 */
354
static int orangefs_set_inode(struct inode *inode, void *data)
M
Mike Marshall 已提交
355
{
356
	struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data;
357 358
	ORANGEFS_I(inode)->refn.fs_id = ref->fs_id;
	ORANGEFS_I(inode)->refn.khandle = ref->khandle;
M
Mike Marshall 已提交
359 360 361 362 363 364
	return 0;
}

/*
 * Called to determine if handles match.
 */
365
static int orangefs_test_inode(struct inode *inode, void *data)
M
Mike Marshall 已提交
366
{
367 368
	struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data;
	struct orangefs_inode_s *orangefs_inode = NULL;
M
Mike Marshall 已提交
369

370 371 372
	orangefs_inode = ORANGEFS_I(inode);
	return (!ORANGEFS_khandle_cmp(&(orangefs_inode->refn.khandle), &(ref->khandle))
		&& orangefs_inode->refn.fs_id == ref->fs_id);
M
Mike Marshall 已提交
373 374 375
}

/*
376
 * Front-end to lookup the inode-cache maintained by the VFS using the ORANGEFS
M
Mike Marshall 已提交
377 378 379
 * file handle.
 *
 * @sb: the file system super block instance.
380
 * @ref: The ORANGEFS object for which we are trying to locate an inode structure.
M
Mike Marshall 已提交
381
 */
382
struct inode *orangefs_iget(struct super_block *sb, struct orangefs_object_kref *ref)
M
Mike Marshall 已提交
383 384 385 386 387
{
	struct inode *inode = NULL;
	unsigned long hash;
	int error;

388 389
	hash = orangefs_handle_hash(ref);
	inode = iget5_locked(sb, hash, orangefs_test_inode, orangefs_set_inode, ref);
M
Mike Marshall 已提交
390 391 392
	if (!inode || !(inode->i_state & I_NEW))
		return inode;

393
	error = orangefs_inode_getattr(inode, 1, 0);
M
Mike Marshall 已提交
394 395 396 397 398 399
	if (error) {
		iget_failed(inode);
		return ERR_PTR(error);
	}

	inode->i_ino = hash;	/* needed for stat etc */
400
	orangefs_init_iops(inode);
M
Mike Marshall 已提交
401 402 403 404 405 406 407 408 409 410 411 412 413 414 415
	unlock_new_inode(inode);

	gossip_debug(GOSSIP_INODE_DEBUG,
		     "iget handle %pU, fsid %d hash %ld i_ino %lu\n",
		     &ref->khandle,
		     ref->fs_id,
		     hash,
		     inode->i_ino);

	return inode;
}

/*
 * Allocate an inode for a newly created file and insert it into the inode hash.
 */
416 417
struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir,
		int mode, dev_t dev, struct orangefs_object_kref *ref)
M
Mike Marshall 已提交
418
{
419
	unsigned long hash = orangefs_handle_hash(ref);
M
Mike Marshall 已提交
420 421 422 423
	struct inode *inode;
	int error;

	gossip_debug(GOSSIP_INODE_DEBUG,
424 425
		     "%s:(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n",
		     __func__,
M
Mike Marshall 已提交
426 427 428 429 430 431 432 433 434
		     sb,
		     MAJOR(dev),
		     MINOR(dev),
		     mode);

	inode = new_inode(sb);
	if (!inode)
		return NULL;

435
	orangefs_set_inode(inode, ref);
M
Mike Marshall 已提交
436 437
	inode->i_ino = hash;	/* needed for stat etc */

438
	error = orangefs_inode_getattr(inode, 1, 0);
M
Mike Marshall 已提交
439 440 441
	if (error)
		goto out_iput;

442
	orangefs_init_iops(inode);
M
Mike Marshall 已提交
443 444 445 446 447 448 449 450

	inode->i_mode = mode;
	inode->i_uid = current_fsuid();
	inode->i_gid = current_fsgid();
	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
	inode->i_size = PAGE_CACHE_SIZE;
	inode->i_rdev = dev;

451
	error = insert_inode_locked4(inode, hash, orangefs_test_inode, ref);
M
Mike Marshall 已提交
452 453 454 455 456 457
	if (error < 0)
		goto out_iput;

	gossip_debug(GOSSIP_INODE_DEBUG,
		     "Initializing ACL's for inode %pU\n",
		     get_khandle_from_ino(inode));
458
	orangefs_init_acl(inode, dir);
M
Mike Marshall 已提交
459 460 461 462 463 464
	return inode;

out_iput:
	iput(inode);
	return ERR_PTR(error);
}