orangefs-kernel.h 23.5 KB
Newer Older
M
Mike Marshall 已提交
1 2 3 4 5 6 7
/*
 * (C) 2001 Clemson University and The University of Chicago
 *
 * See COPYING in top-level directory.
 */

/*
8
 *  The ORANGEFS Linux kernel support allows ORANGEFS volumes to be mounted and
M
Mike Marshall 已提交
9 10 11 12 13 14
 *  accessed through the Linux VFS (i.e. using standard I/O system calls).
 *  This support is only needed on clients that wish to mount the file system.
 *
 */

/*
15
 *  Declarations and macros for the ORANGEFS Linux kernel support.
M
Mike Marshall 已提交
16 17
 */

18 19
#ifndef __ORANGEFSKERNEL_H
#define __ORANGEFSKERNEL_H
M
Mike Marshall 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55

#include <linux/kernel.h>
#include <linux/moduleparam.h>
#include <linux/statfs.h>
#include <linux/backing-dev.h>
#include <linux/device.h>
#include <linux/mpage.h>
#include <linux/namei.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/vmalloc.h>

#include <linux/aio.h>
#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
#include <linux/compat.h>
#include <linux/mount.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
#include <linux/uio.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/wait.h>
#include <linux/dcache.h>
#include <linux/pagemap.h>
#include <linux/poll.h>
#include <linux/rwsem.h>
#include <linux/xattr.h>
#include <linux/exportfs.h>

#include <asm/unaligned.h>

56
#include "orangefs-dev-proto.h"
M
Mike Marshall 已提交
57

58 59
#ifdef ORANGEFS_KERNEL_DEBUG
#define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS       10
M
Mike Marshall 已提交
60
#else
61
#define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS       20
M
Mike Marshall 已提交
62 63
#endif

64
#define ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS   30
M
Mike Marshall 已提交
65

66
#define ORANGEFS_DEFAULT_SLOT_TIMEOUT_SECS     900	/* 15 minutes */
M
Mike Marshall 已提交
67

68
#define ORANGEFS_REQDEVICE_NAME          "pvfs2-req"
M
Mike Marshall 已提交
69

70 71 72 73 74 75 76
#define ORANGEFS_DEVREQ_MAGIC             0x20030529
#define ORANGEFS_LINK_MAX                 0x000000FF
#define ORANGEFS_PURGE_RETRY_COUNT     0x00000005
#define ORANGEFS_SEEK_END              0x00000002
#define ORANGEFS_MAX_NUM_OPTIONS          0x00000004
#define ORANGEFS_MAX_MOUNT_OPT_LEN        0x00000080
#define ORANGEFS_MAX_FSKEY_LEN            64
M
Mike Marshall 已提交
77 78

#define MAX_DEV_REQ_UPSIZE (2*sizeof(__s32) +   \
79
sizeof(__u64) + sizeof(struct orangefs_upcall_s))
M
Mike Marshall 已提交
80
#define MAX_DEV_REQ_DOWNSIZE (2*sizeof(__s32) + \
81
sizeof(__u64) + sizeof(struct orangefs_downcall_s))
M
Mike Marshall 已提交
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106

#define BITS_PER_LONG_DIV_8 (BITS_PER_LONG >> 3)

/* borrowed from irda.h */
#ifndef MSECS_TO_JIFFIES
#define MSECS_TO_JIFFIES(ms) (((ms)*HZ+999)/1000)
#endif

#define MAX_ALIGNED_DEV_REQ_UPSIZE				\
		(MAX_DEV_REQ_UPSIZE +				\
			((((MAX_DEV_REQ_UPSIZE /		\
				(BITS_PER_LONG_DIV_8)) *	\
				(BITS_PER_LONG_DIV_8)) +	\
			    (BITS_PER_LONG_DIV_8)) -		\
			MAX_DEV_REQ_UPSIZE))

#define MAX_ALIGNED_DEV_REQ_DOWNSIZE				\
		(MAX_DEV_REQ_DOWNSIZE +				\
			((((MAX_DEV_REQ_DOWNSIZE /		\
				(BITS_PER_LONG_DIV_8)) *	\
				(BITS_PER_LONG_DIV_8)) +	\
			    (BITS_PER_LONG_DIV_8)) -		\
			MAX_DEV_REQ_DOWNSIZE))

/*
107
 * valid orangefs kernel operation states
M
Mike Marshall 已提交
108 109 110 111 112 113 114 115
 *
 * unknown  - op was just initialized
 * waiting  - op is on request_list (upward bound)
 * inprogr  - op is in progress (waiting for downcall)
 * serviced - op has matching downcall; ok
 * purged   - op has to start a timer since client-core
 *            exited uncleanly before servicing op
 */
116
enum orangefs_vfs_op_states {
M
Mike Marshall 已提交
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
	OP_VFS_STATE_UNKNOWN = 0,
	OP_VFS_STATE_WAITING = 1,
	OP_VFS_STATE_INPROGR = 2,
	OP_VFS_STATE_SERVICED = 4,
	OP_VFS_STATE_PURGED = 8,
};

#define set_op_state_waiting(op)     ((op)->op_state = OP_VFS_STATE_WAITING)
#define set_op_state_inprogress(op)  ((op)->op_state = OP_VFS_STATE_INPROGR)
#define set_op_state_serviced(op)    ((op)->op_state = OP_VFS_STATE_SERVICED)
#define set_op_state_purged(op)      ((op)->op_state |= OP_VFS_STATE_PURGED)

#define op_state_waiting(op)     ((op)->op_state & OP_VFS_STATE_WAITING)
#define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR)
#define op_state_serviced(op)    ((op)->op_state & OP_VFS_STATE_SERVICED)
#define op_state_purged(op)      ((op)->op_state & OP_VFS_STATE_PURGED)

#define get_op(op)					\
	do {						\
		atomic_inc(&(op)->aio_ref_count);	\
		gossip_debug(GOSSIP_DEV_DEBUG,	\
			"(get) Alloced OP (%p:%llu)\n",	\
			op,				\
			llu((op)->tag));		\
	} while (0)

#define put_op(op)							\
	do {								\
		if (atomic_sub_and_test(1, &(op)->aio_ref_count) == 1) {  \
			gossip_debug(GOSSIP_DEV_DEBUG,		\
				"(put) Releasing OP (%p:%llu)\n",	\
				op,					\
				llu((op)->tag));			\
			op_release(op);					\
			}						\
	} while (0)

#define op_wait(op) (atomic_read(&(op)->aio_ref_count) <= 2 ? 0 : 1)

/*
 * Defines for controlling whether I/O upcalls are for async or sync operations
 */
159 160 161
enum ORANGEFS_async_io_type {
	ORANGEFS_VFS_SYNC_IO = 0,
	ORANGEFS_VFS_ASYNC_IO = 1,
M
Mike Marshall 已提交
162 163 164 165 166 167 168 169 170 171 172 173 174
};

/*
 * An array of client_debug_mask will be built to hold debug keyword/mask
 * values fetched from userspace.
 */
struct client_debug_mask {
	char *keyword;
	__u64 mask1;
	__u64 mask2;
};

/*
175
 * orangefs kernel memory related flags
M
Mike Marshall 已提交
176 177
 */

178 179
#if ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB))
#define ORANGEFS_CACHE_CREATE_FLAGS SLAB_RED_ZONE
M
Mike Marshall 已提交
180
#else
181 182
#define ORANGEFS_CACHE_CREATE_FLAGS 0
#endif /* ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */
M
Mike Marshall 已提交
183

184 185 186
#define ORANGEFS_CACHE_ALLOC_FLAGS (GFP_KERNEL)
#define ORANGEFS_GFP_FLAGS (GFP_KERNEL)
#define ORANGEFS_BUFMAP_GFP_FLAGS (GFP_KERNEL)
M
Mike Marshall 已提交
187

188 189 190 191 192
/* orangefs xattr and acl related defines */
#define ORANGEFS_XATTR_INDEX_POSIX_ACL_ACCESS  1
#define ORANGEFS_XATTR_INDEX_POSIX_ACL_DEFAULT 2
#define ORANGEFS_XATTR_INDEX_TRUSTED           3
#define ORANGEFS_XATTR_INDEX_DEFAULT           4
M
Mike Marshall 已提交
193 194 195 196 197 198 199 200 201 202

#if 0
#ifndef POSIX_ACL_XATTR_ACCESS
#define POSIX_ACL_XATTR_ACCESS	"system.posix_acl_access"
#endif
#ifndef POSIX_ACL_XATTR_DEFAULT
#define POSIX_ACL_XATTR_DEFAULT	"system.posix_acl_default"
#endif
#endif

203 204 205 206
#define ORANGEFS_XATTR_NAME_ACL_ACCESS  POSIX_ACL_XATTR_ACCESS
#define ORANGEFS_XATTR_NAME_ACL_DEFAULT POSIX_ACL_XATTR_DEFAULT
#define ORANGEFS_XATTR_NAME_TRUSTED_PREFIX "trusted."
#define ORANGEFS_XATTR_NAME_DEFAULT_PREFIX ""
M
Mike Marshall 已提交
207

208
/* these functions are defined in orangefs-utils.c */
M
Mike Marshall 已提交
209 210 211
int orangefs_prepare_cdm_array(char *debug_array_string);
int orangefs_prepare_debugfs_help_string(int);

212 213
/* defined in orangefs-debugfs.c */
int orangefs_client_debug_init(void);
M
Mike Marshall 已提交
214 215 216 217 218 219 220 221 222 223 224

void debug_string_to_mask(char *, void *, int);
void do_c_mask(int, char *, struct client_debug_mask **);
void do_k_mask(int, char *, __u64 **);

void debug_mask_to_string(void *, int);
void do_k_string(void *, int);
void do_c_string(void *, int);
int check_amalgam_keyword(void *, int);
int keyword_is_amalgam(char *);

225 226 227 228
/*these variables are defined in orangefs-mod.c */
extern char kernel_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
extern char client_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
extern char client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
M
Mike Marshall 已提交
229 230
extern unsigned int kernel_mask_set_mod_init;

231 232
extern int orangefs_init_acl(struct inode *inode, struct inode *dir);
extern const struct xattr_handler *orangefs_xattr_handlers[];
M
Mike Marshall 已提交
233

234 235
extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type);
extern int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
M
Mike Marshall 已提交
236 237 238 239 240 241 242 243 244 245 246

/*
 * Redefine xtvec structure so that we could move helper functions out of
 * the define
 */
struct xtvec {
	__kernel_off_t xtv_off;		/* must be off_t */
	__kernel_size_t xtv_len;	/* must be size_t */
};

/*
247
 * orangefs data structures
M
Mike Marshall 已提交
248
 */
249 250
struct orangefs_kernel_op_s {
	enum orangefs_vfs_op_states op_state;
M
Mike Marshall 已提交
251 252 253 254 255 256 257 258 259
	__u64 tag;

	/*
	 * Set uses_shared_memory to 1 if this operation uses shared memory.
	 * If true, then a retry on the op must also get a new shared memory
	 * buffer and re-populate it.
	 */
	int uses_shared_memory;

260 261
	struct orangefs_upcall_s upcall;
	struct orangefs_downcall_s downcall;
M
Mike Marshall 已提交
262 263 264 265 266 267 268 269 270

	wait_queue_head_t waitq;
	spinlock_t lock;

	int io_completed;
	wait_queue_head_t io_completion_waitq;

	/* VFS aio fields */

271
	/* used by the async I/O code to stash the orangefs_kiocb_s structure */
M
Mike Marshall 已提交
272 273 274 275 276 277 278 279 280 281
	void *priv;

	/* used again for the async I/O code for deallocation */
	atomic_t aio_ref_count;

	int attempts;

	struct list_head list;
};

282 283 284 285
/* per inode private orangefs info */
struct orangefs_inode_s {
	struct orangefs_object_kref refn;
	char link_target[ORANGEFS_NAME_MAX];
M
Mike Marshall 已提交
286 287 288
	__s64 blksize;
	/*
	 * Reading/Writing Extended attributes need to acquire the appropriate
289
	 * reader/writer semaphore on the orangefs_inode_s structure.
M
Mike Marshall 已提交
290 291 292 293 294 295 296 297 298 299 300 301
	 */
	struct rw_semaphore xattr_sem;

	struct inode vfs_inode;
	sector_t last_failed_block_index_read;

	/*
	 * State of in-memory attributes not yet flushed to disk associated
	 * with this object
	 */
	unsigned long pinode_flags;

302
	/* All allocated orangefs_inode_s objects are chained to a list */
M
Mike Marshall 已提交
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
	struct list_head list;
};

#define P_ATIME_FLAG 0
#define P_MTIME_FLAG 1
#define P_CTIME_FLAG 2
#define P_MODE_FLAG  3

#define ClearAtimeFlag(pinode) clear_bit(P_ATIME_FLAG, &(pinode)->pinode_flags)
#define SetAtimeFlag(pinode)   set_bit(P_ATIME_FLAG, &(pinode)->pinode_flags)
#define AtimeFlag(pinode)      test_bit(P_ATIME_FLAG, &(pinode)->pinode_flags)

#define ClearMtimeFlag(pinode) clear_bit(P_MTIME_FLAG, &(pinode)->pinode_flags)
#define SetMtimeFlag(pinode)   set_bit(P_MTIME_FLAG, &(pinode)->pinode_flags)
#define MtimeFlag(pinode)      test_bit(P_MTIME_FLAG, &(pinode)->pinode_flags)

#define ClearCtimeFlag(pinode) clear_bit(P_CTIME_FLAG, &(pinode)->pinode_flags)
#define SetCtimeFlag(pinode)   set_bit(P_CTIME_FLAG, &(pinode)->pinode_flags)
#define CtimeFlag(pinode)      test_bit(P_CTIME_FLAG, &(pinode)->pinode_flags)

#define ClearModeFlag(pinode) clear_bit(P_MODE_FLAG, &(pinode)->pinode_flags)
#define SetModeFlag(pinode)   set_bit(P_MODE_FLAG, &(pinode)->pinode_flags)
#define ModeFlag(pinode)      test_bit(P_MODE_FLAG, &(pinode)->pinode_flags)

327 328 329
/* per superblock private orangefs info */
struct orangefs_sb_info_s {
	struct orangefs_khandle root_khandle;
M
Mike Marshall 已提交
330 331 332
	__s32 fs_id;
	int id;
	int flags;
333 334 335
#define ORANGEFS_OPT_INTR	0x01
#define ORANGEFS_OPT_LOCAL_LOCK	0x02
	char devname[ORANGEFS_MAX_SERVER_ADDR_LEN];
M
Mike Marshall 已提交
336 337 338 339 340 341 342 343 344 345 346
	struct super_block *sb;
	int mount_pending;
	struct list_head list;
};

/*
 * structure that holds the state of any async I/O operation issued
 * through the VFS. Needed especially to handle cancellation requests
 * or even completion notification so that the VFS client-side daemon
 * can free up its vfs_request slots.
 */
347
struct orangefs_kiocb_s {
M
Mike Marshall 已提交
348 349 350 351 352 353 354
	/* the pointer to the task that initiated the AIO */
	struct task_struct *tsk;

	/* pointer to the kiocb that kicked this operation */
	struct kiocb *kiocb;

	/* buffer index that was used for the I/O */
355
	struct orangefs_bufmap *bufmap;
M
Mike Marshall 已提交
356 357
	int buffer_index;

358 359
	/* orangefs kernel operation type */
	struct orangefs_kernel_op_s *op;
M
Mike Marshall 已提交
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379

	/* The user space buffers from/to which I/O is being staged */
	struct iovec *iov;

	/* number of elements in the iovector */
	unsigned long nr_segs;

	/* set to indicate the type of the operation */
	int rw;

	/* file offset */
	loff_t offset;

	/* and the count in bytes */
	size_t bytes_to_be_copied;

	ssize_t bytes_copied;
	int needs_cleanup;
};

380
struct orangefs_stats {
M
Mike Marshall 已提交
381 382 383 384 385 386
	unsigned long cache_hits;
	unsigned long cache_misses;
	unsigned long reads;
	unsigned long writes;
};

387
extern struct orangefs_stats g_orangefs_stats;
M
Mike Marshall 已提交
388 389

/*
390 391 392
 * NOTE: See Documentation/filesystems/porting for information
 * on implementing FOO_I and properly accessing fs private data
 */
393
static inline struct orangefs_inode_s *ORANGEFS_I(struct inode *inode)
M
Mike Marshall 已提交
394
{
395
	return container_of(inode, struct orangefs_inode_s, vfs_inode);
M
Mike Marshall 已提交
396 397
}

398
static inline struct orangefs_sb_info_s *ORANGEFS_SB(struct super_block *sb)
M
Mike Marshall 已提交
399
{
400
	return (struct orangefs_sb_info_s *) sb->s_fs_info;
M
Mike Marshall 已提交
401 402 403
}

/* ino_t descends from "unsigned long", 8 bytes, 64 bits. */
404
static inline ino_t orangefs_khandle_to_ino(struct orangefs_khandle *khandle)
M
Mike Marshall 已提交
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
{
	union {
		unsigned char u[8];
		__u64 ino;
	} ihandle;

	ihandle.u[0] = khandle->u[0] ^ khandle->u[4];
	ihandle.u[1] = khandle->u[1] ^ khandle->u[5];
	ihandle.u[2] = khandle->u[2] ^ khandle->u[6];
	ihandle.u[3] = khandle->u[3] ^ khandle->u[7];
	ihandle.u[4] = khandle->u[12] ^ khandle->u[8];
	ihandle.u[5] = khandle->u[13] ^ khandle->u[9];
	ihandle.u[6] = khandle->u[14] ^ khandle->u[10];
	ihandle.u[7] = khandle->u[15] ^ khandle->u[11];

	return ihandle.ino;
}

423
static inline struct orangefs_khandle *get_khandle_from_ino(struct inode *inode)
M
Mike Marshall 已提交
424
{
425
	return &(ORANGEFS_I(inode)->refn.khandle);
M
Mike Marshall 已提交
426 427 428 429
}

static inline __s32 get_fsid_from_ino(struct inode *inode)
{
430
	return ORANGEFS_I(inode)->refn.fs_id;
M
Mike Marshall 已提交
431 432 433 434
}

static inline ino_t get_ino_from_khandle(struct inode *inode)
{
435
	struct orangefs_khandle *khandle;
M
Mike Marshall 已提交
436 437 438
	ino_t ino;

	khandle = get_khandle_from_ino(inode);
439
	ino = orangefs_khandle_to_ino(khandle);
M
Mike Marshall 已提交
440 441 442 443 444 445 446 447 448 449 450 451 452
	return ino;
}

static inline ino_t get_parent_ino_from_dentry(struct dentry *dentry)
{
	return get_ino_from_khandle(dentry->d_parent->d_inode);
}

static inline int is_root_handle(struct inode *inode)
{
	gossip_debug(GOSSIP_DCACHE_DEBUG,
		     "%s: root handle: %pU, this handle: %pU:\n",
		     __func__,
453
		     &ORANGEFS_SB(inode->i_sb)->root_khandle,
M
Mike Marshall 已提交
454 455
		     get_khandle_from_ino(inode));

456
	if (ORANGEFS_khandle_cmp(&(ORANGEFS_SB(inode->i_sb)->root_khandle),
M
Mike Marshall 已提交
457 458 459 460 461 462
			     get_khandle_from_ino(inode)))
		return 0;
	else
		return 1;
}

463
static inline int match_handle(struct orangefs_khandle resp_handle,
M
Mike Marshall 已提交
464 465 466 467 468 469 470 471
			       struct inode *inode)
{
	gossip_debug(GOSSIP_DCACHE_DEBUG,
		     "%s: one handle: %pU, another handle:%pU:\n",
		     __func__,
		     &resp_handle,
		     get_khandle_from_ino(inode));

472
	if (ORANGEFS_khandle_cmp(&resp_handle, get_khandle_from_ino(inode)))
M
Mike Marshall 已提交
473 474 475 476 477 478
		return 0;
	else
		return 1;
}

/*
479
 * defined in orangefs-cache.c
M
Mike Marshall 已提交
480 481 482
 */
int op_cache_initialize(void);
int op_cache_finalize(void);
483 484 485
struct orangefs_kernel_op_s *op_alloc(__s32 type);
char *get_opname_string(struct orangefs_kernel_op_s *new_op);
void op_release(struct orangefs_kernel_op_s *op);
M
Mike Marshall 已提交
486 487 488 489 490 491

int dev_req_cache_initialize(void);
int dev_req_cache_finalize(void);
void *dev_req_alloc(void);
void dev_req_release(void *);

492 493
int orangefs_inode_cache_initialize(void);
int orangefs_inode_cache_finalize(void);
M
Mike Marshall 已提交
494 495 496

int kiocb_cache_initialize(void);
int kiocb_cache_finalize(void);
497 498
struct orangefs_kiocb_s *kiocb_alloc(void);
void kiocb_release(struct orangefs_kiocb_s *ptr);
M
Mike Marshall 已提交
499 500

/*
501
 * defined in orangefs-mod.c
M
Mike Marshall 已提交
502 503 504 505 506 507
 */
void purge_inprogress_ops(void);

/*
 * defined in waitqueue.c
 */
508 509 510
int wait_for_matching_downcall(struct orangefs_kernel_op_s *op);
int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op);
void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op);
M
Mike Marshall 已提交
511 512 513 514 515
void purge_waiting_ops(void);

/*
 * defined in super.c
 */
516
struct dentry *orangefs_mount(struct file_system_type *fst,
M
Mike Marshall 已提交
517 518 519 520
			   int flags,
			   const char *devname,
			   void *data);

521 522
void orangefs_kill_sb(struct super_block *sb);
int orangefs_remount(struct super_block *sb);
M
Mike Marshall 已提交
523 524 525 526 527 528 529

int fsid_key_table_initialize(void);
void fsid_key_table_finalize(void);

/*
 * defined in inode.c
 */
530 531
__u32 convert_to_orangefs_mask(unsigned long lite_mask);
struct inode *orangefs_new_inode(struct super_block *sb,
M
Mike Marshall 已提交
532 533 534
			      struct inode *dir,
			      int mode,
			      dev_t dev,
535
			      struct orangefs_object_kref *ref);
M
Mike Marshall 已提交
536

537
int orangefs_setattr(struct dentry *dentry, struct iattr *iattr);
M
Mike Marshall 已提交
538

539
int orangefs_getattr(struct vfsmount *mnt,
M
Mike Marshall 已提交
540 541 542 543 544 545
		  struct dentry *dentry,
		  struct kstat *kstat);

/*
 * defined in xattr.c
 */
546
int orangefs_setxattr(struct dentry *dentry,
M
Mike Marshall 已提交
547 548 549 550 551
		   const char *name,
		   const void *value,
		   size_t size,
		   int flags);

552
ssize_t orangefs_getxattr(struct dentry *dentry,
M
Mike Marshall 已提交
553 554 555 556
		       const char *name,
		       void *buffer,
		       size_t size);

557
ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size);
M
Mike Marshall 已提交
558 559 560 561

/*
 * defined in namei.c
 */
562 563
struct inode *orangefs_iget(struct super_block *sb,
			 struct orangefs_object_kref *ref);
M
Mike Marshall 已提交
564

565 566 567 568
ssize_t orangefs_inode_read(struct inode *inode,
			    struct iov_iter *iter,
			    loff_t *offset,
			    loff_t readahead_size);
M
Mike Marshall 已提交
569 570

/*
571
 * defined in devorangefs-req.c
M
Mike Marshall 已提交
572
 */
573 574
int orangefs_dev_init(void);
void orangefs_dev_cleanup(void);
M
Mike Marshall 已提交
575 576 577 578
int is_daemon_in_service(void);
int fs_mount_pending(__s32 fsid);

/*
579
 * defined in orangefs-utils.c
M
Mike Marshall 已提交
580
 */
581
__s32 fsid_of_op(struct orangefs_kernel_op_s *op);
M
Mike Marshall 已提交
582

583
int orangefs_flush_inode(struct inode *inode);
M
Mike Marshall 已提交
584

585
ssize_t orangefs_inode_getxattr(struct inode *inode,
M
Mike Marshall 已提交
586 587 588 589 590
			     const char *prefix,
			     const char *name,
			     void *buffer,
			     size_t size);

591
int orangefs_inode_setxattr(struct inode *inode,
M
Mike Marshall 已提交
592 593 594 595 596 597
			 const char *prefix,
			 const char *name,
			 const void *value,
			 size_t size,
			 int flags);

598
int orangefs_inode_getattr(struct inode *inode, __u32 mask);
M
Mike Marshall 已提交
599

600
int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr);
M
Mike Marshall 已提交
601

602
void orangefs_op_initialize(struct orangefs_kernel_op_s *op);
M
Mike Marshall 已提交
603

604
void orangefs_make_bad_inode(struct inode *inode);
M
Mike Marshall 已提交
605

606
void block_signals(sigset_t *);
M
Mike Marshall 已提交
607

608
void set_signals(sigset_t *);
M
Mike Marshall 已提交
609

610
int orangefs_unmount_sb(struct super_block *sb);
M
Mike Marshall 已提交
611

612
int orangefs_cancel_op_in_progress(__u64 tag);
M
Mike Marshall 已提交
613

614
static inline __u64 orangefs_convert_time_field(const struct timespec *ts)
615 616 617
{
	return (__u64)ts->tv_sec;
}
M
Mike Marshall 已提交
618

619
int orangefs_normalize_to_errno(__s32 error_code);
M
Mike Marshall 已提交
620 621 622 623 624 625

extern struct mutex devreq_mutex;
extern struct mutex request_mutex;
extern int debug;
extern int op_timeout_secs;
extern int slot_timeout_secs;
626 627 628 629 630
extern struct list_head orangefs_superblocks;
extern spinlock_t orangefs_superblocks_lock;
extern struct list_head orangefs_request_list;
extern spinlock_t orangefs_request_list_lock;
extern wait_queue_head_t orangefs_request_list_waitq;
M
Mike Marshall 已提交
631 632 633 634
extern struct list_head *htable_ops_in_progress;
extern spinlock_t htable_ops_in_progress_lock;
extern int hash_table_size;

635 636 637 638 639 640 641 642 643
extern const struct address_space_operations orangefs_address_operations;
extern struct backing_dev_info orangefs_backing_dev_info;
extern struct inode_operations orangefs_file_inode_operations;
extern const struct file_operations orangefs_file_operations;
extern struct inode_operations orangefs_symlink_inode_operations;
extern struct inode_operations orangefs_dir_inode_operations;
extern const struct file_operations orangefs_dir_operations;
extern const struct dentry_operations orangefs_dentry_operations;
extern const struct file_operations orangefs_devreq_file_operations;
M
Mike Marshall 已提交
644

645
extern wait_queue_head_t orangefs_bufmap_init_waitq;
M
Mike Marshall 已提交
646 647 648 649 650 651

/*
 * misc convenience macros
 */
#define add_op_to_request_list(op)				\
do {								\
652
	spin_lock(&orangefs_request_list_lock);			\
M
Mike Marshall 已提交
653 654
	spin_lock(&op->lock);					\
	set_op_state_waiting(op);				\
655 656
	list_add_tail(&op->list, &orangefs_request_list);		\
	spin_unlock(&orangefs_request_list_lock);			\
M
Mike Marshall 已提交
657
	spin_unlock(&op->lock);					\
658
	wake_up_interruptible(&orangefs_request_list_waitq);	\
M
Mike Marshall 已提交
659 660 661 662
} while (0)

#define add_priority_op_to_request_list(op)				\
	do {								\
663
		spin_lock(&orangefs_request_list_lock);			\
M
Mike Marshall 已提交
664 665 666
		spin_lock(&op->lock);					\
		set_op_state_waiting(op);				\
									\
667 668
		list_add(&op->list, &orangefs_request_list);		\
		spin_unlock(&orangefs_request_list_lock);			\
M
Mike Marshall 已提交
669
		spin_unlock(&op->lock);					\
670
		wake_up_interruptible(&orangefs_request_list_waitq);	\
M
Mike Marshall 已提交
671 672 673 674 675 676
} while (0)

#define remove_op_from_request_list(op)					\
	do {								\
		struct list_head *tmp = NULL;				\
		struct list_head *tmp_safe = NULL;			\
677
		struct orangefs_kernel_op_s *tmp_op = NULL;		\
M
Mike Marshall 已提交
678
									\
679 680
		spin_lock(&orangefs_request_list_lock);			\
		list_for_each_safe(tmp, tmp_safe, &orangefs_request_list) { \
M
Mike Marshall 已提交
681
			tmp_op = list_entry(tmp,			\
682
					    struct orangefs_kernel_op_s,	\
M
Mike Marshall 已提交
683 684 685 686 687 688
					    list);			\
			if (tmp_op && (tmp_op == op)) {			\
				list_del(&tmp_op->list);		\
				break;					\
			}						\
		}							\
689
		spin_unlock(&orangefs_request_list_lock);			\
M
Mike Marshall 已提交
690 691
	} while (0)

692 693 694 695 696
#define ORANGEFS_OP_INTERRUPTIBLE 1   /* service_operation() is interruptible */
#define ORANGEFS_OP_PRIORITY      2   /* service_operation() is high priority */
#define ORANGEFS_OP_CANCELLATION  4   /* this is a cancellation */
#define ORANGEFS_OP_NO_SEMAPHORE  8   /* don't acquire semaphore */
#define ORANGEFS_OP_ASYNC         16  /* Queue it, but don't wait */
M
Mike Marshall 已提交
697

698
int service_operation(struct orangefs_kernel_op_s *op,
M
Mike Marshall 已提交
699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721
		      const char *op_name,
		      int flags);

/*
 * handles two possible error cases, depending on context.
 *
 * by design, our vfs i/o errors need to be handled in one of two ways,
 * depending on where the error occured.
 *
 * if the error happens in the waitqueue code because we either timed
 * out or a signal was raised while waiting, we need to cancel the
 * userspace i/o operation and free the op manually.  this is done to
 * avoid having the device start writing application data to our shared
 * bufmap pages without us expecting it.
 *
 * FIXME: POSSIBLE OPTIMIZATION:
 * However, if we timed out or if we got a signal AND our upcall was never
 * picked off the queue (i.e. we were in OP_VFS_STATE_WAITING), then we don't
 * need to send a cancellation upcall. The way we can handle this is
 * set error_exit to 2 in such cases and 1 whenever cancellation has to be
 * sent and have handle_error
 * take care of this situation as well..
 *
722
 * if a orangefs sysint level error occured and i/o has been completed,
M
Mike Marshall 已提交
723 724 725 726 727 728 729 730 731 732 733
 * there is no need to cancel the operation, as the user has finished
 * using the bufmap page and so there is no danger in this case.  in
 * this case, we wake up the device normally so that it may free the
 * op, as normal.
 *
 * note the only reason this is a macro is because both read and write
 * cases need the exact same handling code.
 */
#define handle_io_error()					\
do {								\
	if (!op_state_serviced(new_op)) {			\
734
		orangefs_cancel_op_in_progress(new_op->tag);	\
M
Mike Marshall 已提交
735 736 737 738 739
		op_release(new_op);				\
	} else {						\
		wake_up_daemon_for_return(new_op);		\
	}							\
	new_op = NULL;						\
740
	orangefs_bufmap_put(bufmap, buffer_index);				\
M
Mike Marshall 已提交
741 742 743 744
	buffer_index = -1;					\
} while (0)

#define get_interruptible_flag(inode) \
745 746
	((ORANGEFS_SB(inode->i_sb)->flags & ORANGEFS_OPT_INTR) ? \
		ORANGEFS_OP_INTERRUPTIBLE : 0)
M
Mike Marshall 已提交
747

748
#define add_orangefs_sb(sb)						\
M
Mike Marshall 已提交
749 750
do {									\
	gossip_debug(GOSSIP_SUPER_DEBUG,				\
751 752 753 754 755
		     "Adding SB %p to orangefs superblocks\n",		\
		     ORANGEFS_SB(sb));					\
	spin_lock(&orangefs_superblocks_lock);				\
	list_add_tail(&ORANGEFS_SB(sb)->list, &orangefs_superblocks);		\
	spin_unlock(&orangefs_superblocks_lock); \
M
Mike Marshall 已提交
756 757
} while (0)

758
#define remove_orangefs_sb(sb)						\
M
Mike Marshall 已提交
759 760 761
do {									\
	struct list_head *tmp = NULL;					\
	struct list_head *tmp_safe = NULL;				\
762
	struct orangefs_sb_info_s *orangefs_sb = NULL;			\
M
Mike Marshall 已提交
763
									\
764 765 766 767
	spin_lock(&orangefs_superblocks_lock);				\
	list_for_each_safe(tmp, tmp_safe, &orangefs_superblocks) {		\
		orangefs_sb = list_entry(tmp,				\
				      struct orangefs_sb_info_s,		\
M
Mike Marshall 已提交
768
				      list);				\
769
		if (orangefs_sb && (orangefs_sb->sb == sb)) {			\
M
Mike Marshall 已提交
770
			gossip_debug(GOSSIP_SUPER_DEBUG,		\
771 772 773
			    "Removing SB %p from orangefs superblocks\n",	\
			orangefs_sb);					\
			list_del(&orangefs_sb->list);			\
M
Mike Marshall 已提交
774 775 776
			break;						\
		}							\
	}								\
777
	spin_unlock(&orangefs_superblocks_lock);				\
M
Mike Marshall 已提交
778 779
} while (0)

780 781
#define orangefs_lock_inode(inode) spin_lock(&inode->i_lock)
#define orangefs_unlock_inode(inode) spin_unlock(&inode->i_lock)
M
Mike Marshall 已提交
782 783 784 785 786 787

#define fill_default_sys_attrs(sys_attr, type, mode)			\
do {									\
	sys_attr.owner = from_kuid(current_user_ns(), current_fsuid()); \
	sys_attr.group = from_kgid(current_user_ns(), current_fsgid()); \
	sys_attr.size = 0;						\
788
	sys_attr.perms = ORANGEFS_util_translate_mode(mode);		\
M
Mike Marshall 已提交
789
	sys_attr.objtype = type;					\
790
	sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE;			\
M
Mike Marshall 已提交
791 792
} while (0)

793
#define orangefs_inode_lock(__i)  mutex_lock(&(__i)->i_mutex)
M
Mike Marshall 已提交
794

795
#define orangefs_inode_unlock(__i) mutex_unlock(&(__i)->i_mutex)
M
Mike Marshall 已提交
796

797
static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size)
M
Mike Marshall 已提交
798 799
{
#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
800
	ornagefs_inode_lock(inode);
M
Mike Marshall 已提交
801 802 803
#endif
	i_size_write(inode, i_size);
#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
804
	orangefs_inode_unlock(inode);
M
Mike Marshall 已提交
805 806 807 808 809 810 811 812 813 814 815 816 817 818
#endif
}

static inline unsigned int diff(struct timeval *end, struct timeval *begin)
{
	if (end->tv_usec < begin->tv_usec) {
		end->tv_usec += 1000000;
		end->tv_sec--;
	}
	end->tv_sec -= begin->tv_sec;
	end->tv_usec -= begin->tv_usec;
	return (end->tv_sec * 1000000) + end->tv_usec;
}

819
#endif /* __ORANGEFSKERNEL_H */