dir.c 10.1 KB
Newer Older
M
Mike Marshall 已提交
1 2 3 4 5 6 7
/*
 * (C) 2001 Clemson University and The University of Chicago
 *
 * See COPYING in top-level directory.
 */

#include "protocol.h"
8 9
#include "orangefs-kernel.h"
#include "orangefs-bufmap.h"
M
Mike Marshall 已提交
10 11

/*
12 13 14 15 16 17 18 19
 * decode routine used by kmod to deal with the blob sent from
 * userspace for readdirs. The blob contains zero or more of these
 * sub-blobs:
 *   __u32 - represents length of the character string that follows.
 *   string - between 1 and ORANGEFS_NAME_MAX bytes long.
 *   padding - (if needed) to cause the __u32 plus the string to be
 *             eight byte aligned.
 *   khandle - sizeof(khandle) bytes.
M
Mike Marshall 已提交
20
 */
21
static long decode_dirents(char *ptr, size_t size,
22
                           struct orangefs_readdir_response_s *readdir)
M
Mike Marshall 已提交
23 24
{
	int i;
25 26
	struct orangefs_readdir_response_s *rd =
		(struct orangefs_readdir_response_s *) ptr;
M
Mike Marshall 已提交
27
	char *buf = ptr;
28 29 30 31 32 33 34 35 36
	int khandle_size = sizeof(struct orangefs_khandle);
	size_t offset = offsetof(struct orangefs_readdir_response_s,
				dirent_array);
	/* 8 reflects eight byte alignment */
	int smallest_blob = khandle_size + 8;
	__u32 len;
	int aligned_len;
	int sizeof_u32 = sizeof(__u32);
	long ret;
M
Mike Marshall 已提交
37

38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
	gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size);

	/* size is = offset on empty dirs, > offset on non-empty dirs... */
	if (size < offset) {
		gossip_err("%s: size:%zu: offset:%zu:\n",
			   __func__,
			   size,
			   offset);
		ret = -EINVAL;
		goto out;
	}

        if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) {
		gossip_err("%s: size:%zu: dirent_outcount:%d:\n",
			   __func__,
			   size,
			   readdir->orangefs_dirent_outcount);
		ret = -EINVAL;
		goto out;
	}
58

M
Mike Marshall 已提交
59
	readdir->token = rd->token;
60 61
	readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount;
	readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount,
M
Mike Marshall 已提交
62 63
					sizeof(*readdir->dirent_array),
					GFP_KERNEL);
64 65 66 67 68
	if (readdir->dirent_array == NULL) {
		gossip_err("%s: kcalloc failed.\n", __func__);
		ret = -ENOMEM;
		goto out;
	}
69

70 71
	buf += offset;
	size -= offset;
72

73
	for (i = 0; i < readdir->orangefs_dirent_outcount; i++) {
74 75 76 77 78 79 80 81
		if (size < smallest_blob) {
			gossip_err("%s: size:%zu: smallest_blob:%d:\n",
				   __func__,
				   size,
				   smallest_blob);
			ret = -EINVAL;
			goto free;
		}
82 83

		len = *(__u32 *)buf;
84 85 86 87 88 89 90 91 92 93 94 95 96
		if ((len < 1) || (len > ORANGEFS_NAME_MAX)) {
			gossip_err("%s: len:%d:\n", __func__, len);
			ret = -EINVAL;
			goto free;
		}

		gossip_debug(GOSSIP_DIR_DEBUG,
			     "%s: size:%zu: len:%d:\n",
			     __func__,
			     size,
			     len);

		readdir->dirent_array[i].d_name = buf + sizeof_u32;
97
		readdir->dirent_array[i].d_length = len;
98

99
		/*
100 101 102 103 104 105 106 107 108 109 110 111
		 * Calculate "aligned" length of this string and its
		 * associated __u32 descriptor.
		 */
		aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7;
		gossip_debug(GOSSIP_DIR_DEBUG,
			     "%s: aligned_len:%d:\n",
			     __func__,
			     aligned_len);

		/*
		 * The end of the blob should coincide with the end
		 * of the last sub-blob.
112
		 */
113 114 115 116 117 118 119
		if (size < aligned_len + khandle_size) {
			gossip_err("%s: ran off the end of the blob.\n",
				   __func__);
			ret = -EINVAL;
			goto free;
		}
		size -= aligned_len + khandle_size;
120

121
		buf += aligned_len;
122

M
Mike Marshall 已提交
123
		readdir->dirent_array[i].khandle =
124
			*(struct orangefs_khandle *) buf;
125
		buf += khandle_size;
M
Mike Marshall 已提交
126
	}
127 128 129 130 131
	ret = buf - ptr;
	gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret);
	goto out;

free:
132 133
	kfree(readdir->dirent_array);
	readdir->dirent_array = NULL;
134 135 136

out:
	return ret;
M
Mike Marshall 已提交
137 138 139 140 141
}

/*
 * Read directory entries from an instance of an open directory.
 */
142
static int orangefs_readdir(struct file *file, struct dir_context *ctx)
M
Mike Marshall 已提交
143 144 145
{
	int ret = 0;
	int buffer_index;
146 147 148 149
	/*
	 * ptoken supports Orangefs' distributed directory logic, added
	 * in 2.9.2.
	 */
M
Mike Marshall 已提交
150 151 152 153
	__u64 *ptoken = file->private_data;
	__u64 pos = 0;
	ino_t ino = 0;
	struct dentry *dentry = file->f_path.dentry;
154 155
	struct orangefs_kernel_op_s *new_op = NULL;
	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode);
M
Mike Marshall 已提交
156
	int buffer_full = 0;
A
Al Viro 已提交
157 158
	struct orangefs_readdir_response_s readdir_response;
	void *dents_buf;
M
Mike Marshall 已提交
159 160 161 162 163 164
	int i = 0;
	int len = 0;
	ino_t current_ino = 0;
	char *current_entry = NULL;
	long bytes_decoded;

165 166 167 168 169
	gossip_debug(GOSSIP_DIR_DEBUG,
		     "%s: ctx->pos:%lld, ptoken = %llu\n",
		     __func__,
		     lld(ctx->pos),
		     llu(*ptoken));
M
Mike Marshall 已提交
170 171 172 173

	pos = (__u64) ctx->pos;

	/* are we done? */
174
	if (pos == ORANGEFS_READDIR_END) {
M
Mike Marshall 已提交
175 176 177 178 179 180
		gossip_debug(GOSSIP_DIR_DEBUG,
			     "Skipping to termination path\n");
		return 0;
	}

	gossip_debug(GOSSIP_DIR_DEBUG,
181
		     "orangefs_readdir called on %s (pos=%llu)\n",
M
Mike Marshall 已提交
182 183
		     dentry->d_name.name, llu(pos));

A
Al Viro 已提交
184
	memset(&readdir_response, 0, sizeof(readdir_response));
M
Mike Marshall 已提交
185

186
	new_op = op_alloc(ORANGEFS_VFS_OP_READDIR);
M
Mike Marshall 已提交
187 188 189
	if (!new_op)
		return -ENOMEM;

190 191 192 193
	/*
	 * Only the indices are shared. No memory is actually shared, but the
	 * mechanism is used.
	 */
M
Mike Marshall 已提交
194
	new_op->uses_shared_memory = 1;
195
	new_op->upcall.req.readdir.refn = orangefs_inode->refn;
196 197
	new_op->upcall.req.readdir.max_dirent_count =
	    ORANGEFS_MAX_DIRENT_COUNT_READDIR;
M
Mike Marshall 已提交
198 199 200 201 202 203 204 205 206

	gossip_debug(GOSSIP_DIR_DEBUG,
		     "%s: upcall.req.readdir.refn.khandle: %pU\n",
		     __func__,
		     &new_op->upcall.req.readdir.refn.khandle);

	new_op->upcall.req.readdir.token = *ptoken;

get_new_buffer_index:
207 208 209
	buffer_index = orangefs_readdir_index_get();
	if (buffer_index < 0) {
		ret = buffer_index;
210
		gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n",
M
Mike Marshall 已提交
211 212 213 214 215 216
			    ret);
		goto out_free_op;
	}
	new_op->upcall.req.readdir.buf_index = buffer_index;

	ret = service_operation(new_op,
217
				"orangefs_readdir",
M
Mike Marshall 已提交
218 219 220 221 222 223 224
				get_interruptible_flag(dentry->d_inode));

	gossip_debug(GOSSIP_DIR_DEBUG,
		     "Readdir downcall status is %d.  ret:%d\n",
		     new_op->downcall.status,
		     ret);

225 226
	orangefs_readdir_index_put(buffer_index);

M
Mike Marshall 已提交
227
	if (ret == -EAGAIN && op_state_purged(new_op)) {
228
		/* Client-core indices are invalid after it restarted. */
M
Mike Marshall 已提交
229 230 231 232 233 234 235 236 237
		gossip_debug(GOSSIP_DIR_DEBUG,
			"%s: Getting new buffer_index for retry of readdir..\n",
			 __func__);
		goto get_new_buffer_index;
	}

	if (ret == -EIO && op_state_purged(new_op)) {
		gossip_err("%s: Client is down. Aborting readdir call.\n",
			__func__);
A
Al Viro 已提交
238
		goto out_slot;
M
Mike Marshall 已提交
239 240 241 242 243 244 245 246
	}

	if (ret < 0 || new_op->downcall.status != 0) {
		gossip_debug(GOSSIP_DIR_DEBUG,
			     "Readdir request failed.  Status:%d\n",
			     new_op->downcall.status);
		if (ret >= 0)
			ret = new_op->downcall.status;
A
Al Viro 已提交
247 248 249 250 251 252 253 254
		goto out_slot;
	}

	dents_buf = new_op->downcall.trailer_buf;
	if (dents_buf == NULL) {
		gossip_err("Invalid NULL buffer in readdir response\n");
		ret = -ENOMEM;
		goto out_slot;
M
Mike Marshall 已提交
255 256
	}

A
Al Viro 已提交
257 258
	bytes_decoded = decode_dirents(dents_buf, new_op->downcall.trailer_size,
					&readdir_response);
M
Mike Marshall 已提交
259 260
	if (bytes_decoded < 0) {
		ret = bytes_decoded;
A
Al Viro 已提交
261 262
		gossip_err("Could not decode readdir from buffer %d\n", ret);
		goto out_vfree;
M
Mike Marshall 已提交
263 264 265
	}

	if (bytes_decoded != new_op->downcall.trailer_size) {
266
		gossip_err("orangefs_readdir: # bytes decoded (%ld) "
267 268 269
			   "!= trailer size (%ld)\n",
			   bytes_decoded,
			   (long)new_op->downcall.trailer_size);
M
Mike Marshall 已提交
270 271 272 273
		ret = -EINVAL;
		goto out_destroy_handle;
	}

274
	/*
275
	 *  orangefs doesn't actually store dot and dot-dot, but
276 277
	 *  we need to have them represented.
	 */
M
Mike Marshall 已提交
278 279 280 281 282 283 284
	if (pos == 0) {
		ino = get_ino_from_khandle(dentry->d_inode);
		gossip_debug(GOSSIP_DIR_DEBUG,
			     "%s: calling dir_emit of \".\" with pos = %llu\n",
			     __func__,
			     llu(pos));
		ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
285
		pos += 1;
M
Mike Marshall 已提交
286 287 288 289 290 291 292 293 294
	}

	if (pos == 1) {
		ino = get_parent_ino_from_dentry(dentry);
		gossip_debug(GOSSIP_DIR_DEBUG,
			     "%s: calling dir_emit of \"..\" with pos = %llu\n",
			     __func__,
			     llu(pos));
		ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
295
		pos += 1;
M
Mike Marshall 已提交
296 297
	}

298
	/*
299
	 * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around
300 301 302
	 * to prevent "finding" dot and dot-dot on any iteration
	 * other than the first.
	 */
303
	if (ctx->pos == ORANGEFS_ITERATE_NEXT)
304 305
		ctx->pos = 0;

306 307 308
	gossip_debug(GOSSIP_DIR_DEBUG,
		     "%s: dirent_outcount:%d:\n",
		     __func__,
A
Al Viro 已提交
309
		     readdir_response.orangefs_dirent_outcount);
310
	for (i = ctx->pos;
A
Al Viro 已提交
311
	     i < readdir_response.orangefs_dirent_outcount;
312
	     i++) {
A
Al Viro 已提交
313 314
		len = readdir_response.dirent_array[i].d_length;
		current_entry = readdir_response.dirent_array[i].d_name;
315
		current_ino = orangefs_khandle_to_ino(
A
Al Viro 已提交
316
			&readdir_response.dirent_array[i].khandle);
M
Mike Marshall 已提交
317 318

		gossip_debug(GOSSIP_DIR_DEBUG,
319 320
			     "calling dir_emit for %s with len %d"
			     ", ctx->pos %ld\n",
M
Mike Marshall 已提交
321 322
			     current_entry,
			     len,
323 324 325 326 327 328
			     (unsigned long)ctx->pos);
		/*
		 * type is unknown. We don't return object type
		 * in the dirent_array. This leaves getdents
		 * clueless about type.
		 */
M
Mike Marshall 已提交
329 330
		ret =
		    dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
331 332
		if (!ret)
			break;
M
Mike Marshall 已提交
333
		ctx->pos++;
334
		gossip_debug(GOSSIP_DIR_DEBUG,
M
Mike Marshall 已提交
335 336 337 338 339 340
			      "%s: ctx->pos:%lld\n",
			      __func__,
			      lld(ctx->pos));

	}

341
	/*
342 343 344 345
	 * we ran all the way through the last batch, set up for
	 * getting another batch...
	 */
	if (ret) {
A
Al Viro 已提交
346
		*ptoken = readdir_response.token;
347
		ctx->pos = ORANGEFS_ITERATE_NEXT;
M
Mike Marshall 已提交
348 349 350 351 352
	}

	/*
	 * Did we hit the end of the directory?
	 */
A
Al Viro 已提交
353
	if (readdir_response.token == ORANGEFS_READDIR_END &&
M
Mike Marshall 已提交
354
	    !buffer_full) {
355
		gossip_debug(GOSSIP_DIR_DEBUG,
356 357
		"End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n");
		ctx->pos = ORANGEFS_READDIR_END;
M
Mike Marshall 已提交
358 359 360
	}

out_destroy_handle:
A
Al Viro 已提交
361 362 363 364 365 366 367
	/* kfree(NULL) is safe */
	kfree(readdir_response.dirent_array);
out_vfree:
	gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", dents_buf);
	vfree(dents_buf);
out_slot:
	orangefs_readdir_index_put(buffer_index);
M
Mike Marshall 已提交
368 369
out_free_op:
	op_release(new_op);
370
	gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret);
M
Mike Marshall 已提交
371 372 373
	return ret;
}

374
static int orangefs_dir_open(struct inode *inode, struct file *file)
M
Mike Marshall 已提交
375 376 377 378 379 380 381 382
{
	__u64 *ptoken;

	file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL);
	if (!file->private_data)
		return -ENOMEM;

	ptoken = file->private_data;
383
	*ptoken = ORANGEFS_READDIR_START;
M
Mike Marshall 已提交
384 385 386
	return 0;
}

387
static int orangefs_dir_release(struct inode *inode, struct file *file)
M
Mike Marshall 已提交
388
{
389
	orangefs_flush_inode(inode);
M
Mike Marshall 已提交
390 391 392 393
	kfree(file->private_data);
	return 0;
}

394 395
/** ORANGEFS implementation of VFS directory operations */
const struct file_operations orangefs_dir_operations = {
M
Mike Marshall 已提交
396
	.read = generic_read_dir,
397 398 399
	.iterate = orangefs_readdir,
	.open = orangefs_dir_open,
	.release = orangefs_dir_release,
M
Mike Marshall 已提交
400
};