dir.c 10.0 KB
Newer Older
M
Mike Marshall 已提交
1 2 3 4 5 6 7
/*
 * (C) 2001 Clemson University and The University of Chicago
 *
 * See COPYING in top-level directory.
 */

#include "protocol.h"
8 9
#include "orangefs-kernel.h"
#include "orangefs-bufmap.h"
M
Mike Marshall 已提交
10 11

/*
12 13 14 15 16 17 18 19
 * decode routine used by kmod to deal with the blob sent from
 * userspace for readdirs. The blob contains zero or more of these
 * sub-blobs:
 *   __u32 - represents length of the character string that follows.
 *   string - between 1 and ORANGEFS_NAME_MAX bytes long.
 *   padding - (if needed) to cause the __u32 plus the string to be
 *             eight byte aligned.
 *   khandle - sizeof(khandle) bytes.
M
Mike Marshall 已提交
20
 */
21
static long decode_dirents(char *ptr, size_t size,
22
                           struct orangefs_readdir_response_s *readdir)
M
Mike Marshall 已提交
23 24
{
	int i;
25 26
	struct orangefs_readdir_response_s *rd =
		(struct orangefs_readdir_response_s *) ptr;
M
Mike Marshall 已提交
27
	char *buf = ptr;
28 29 30 31 32 33 34 35 36
	int khandle_size = sizeof(struct orangefs_khandle);
	size_t offset = offsetof(struct orangefs_readdir_response_s,
				dirent_array);
	/* 8 reflects eight byte alignment */
	int smallest_blob = khandle_size + 8;
	__u32 len;
	int aligned_len;
	int sizeof_u32 = sizeof(__u32);
	long ret;
M
Mike Marshall 已提交
37

38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
	gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size);

	/* size is = offset on empty dirs, > offset on non-empty dirs... */
	if (size < offset) {
		gossip_err("%s: size:%zu: offset:%zu:\n",
			   __func__,
			   size,
			   offset);
		ret = -EINVAL;
		goto out;
	}

        if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) {
		gossip_err("%s: size:%zu: dirent_outcount:%d:\n",
			   __func__,
			   size,
			   readdir->orangefs_dirent_outcount);
		ret = -EINVAL;
		goto out;
	}
58

M
Mike Marshall 已提交
59
	readdir->token = rd->token;
60 61
	readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount;
	readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount,
M
Mike Marshall 已提交
62 63
					sizeof(*readdir->dirent_array),
					GFP_KERNEL);
64 65 66 67 68
	if (readdir->dirent_array == NULL) {
		gossip_err("%s: kcalloc failed.\n", __func__);
		ret = -ENOMEM;
		goto out;
	}
69

70 71
	buf += offset;
	size -= offset;
72

73
	for (i = 0; i < readdir->orangefs_dirent_outcount; i++) {
74 75 76 77 78 79 80 81
		if (size < smallest_blob) {
			gossip_err("%s: size:%zu: smallest_blob:%d:\n",
				   __func__,
				   size,
				   smallest_blob);
			ret = -EINVAL;
			goto free;
		}
82 83

		len = *(__u32 *)buf;
84 85 86 87 88 89 90 91 92 93 94 95 96
		if ((len < 1) || (len > ORANGEFS_NAME_MAX)) {
			gossip_err("%s: len:%d:\n", __func__, len);
			ret = -EINVAL;
			goto free;
		}

		gossip_debug(GOSSIP_DIR_DEBUG,
			     "%s: size:%zu: len:%d:\n",
			     __func__,
			     size,
			     len);

		readdir->dirent_array[i].d_name = buf + sizeof_u32;
97
		readdir->dirent_array[i].d_length = len;
98

99
		/*
100 101 102 103 104 105 106 107 108 109 110 111
		 * Calculate "aligned" length of this string and its
		 * associated __u32 descriptor.
		 */
		aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7;
		gossip_debug(GOSSIP_DIR_DEBUG,
			     "%s: aligned_len:%d:\n",
			     __func__,
			     aligned_len);

		/*
		 * The end of the blob should coincide with the end
		 * of the last sub-blob.
112
		 */
113 114 115 116 117 118 119
		if (size < aligned_len + khandle_size) {
			gossip_err("%s: ran off the end of the blob.\n",
				   __func__);
			ret = -EINVAL;
			goto free;
		}
		size -= aligned_len + khandle_size;
120

121
		buf += aligned_len;
122

M
Mike Marshall 已提交
123
		readdir->dirent_array[i].khandle =
124
			*(struct orangefs_khandle *) buf;
125
		buf += khandle_size;
M
Mike Marshall 已提交
126
	}
127 128 129 130 131
	ret = buf - ptr;
	gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret);
	goto out;

free:
132 133
	kfree(readdir->dirent_array);
	readdir->dirent_array = NULL;
134 135 136

out:
	return ret;
M
Mike Marshall 已提交
137 138 139 140 141
}

/*
 * Read directory entries from an instance of an open directory.
 */
142
static int orangefs_readdir(struct file *file, struct dir_context *ctx)
M
Mike Marshall 已提交
143 144 145
{
	int ret = 0;
	int buffer_index;
146 147 148 149
	/*
	 * ptoken supports Orangefs' distributed directory logic, added
	 * in 2.9.2.
	 */
M
Mike Marshall 已提交
150 151 152 153
	__u64 *ptoken = file->private_data;
	__u64 pos = 0;
	ino_t ino = 0;
	struct dentry *dentry = file->f_path.dentry;
154 155
	struct orangefs_kernel_op_s *new_op = NULL;
	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode);
A
Al Viro 已提交
156 157
	struct orangefs_readdir_response_s readdir_response;
	void *dents_buf;
M
Mike Marshall 已提交
158 159 160 161 162 163
	int i = 0;
	int len = 0;
	ino_t current_ino = 0;
	char *current_entry = NULL;
	long bytes_decoded;

164 165 166 167 168
	gossip_debug(GOSSIP_DIR_DEBUG,
		     "%s: ctx->pos:%lld, ptoken = %llu\n",
		     __func__,
		     lld(ctx->pos),
		     llu(*ptoken));
M
Mike Marshall 已提交
169 170 171 172

	pos = (__u64) ctx->pos;

	/* are we done? */
173
	if (pos == ORANGEFS_READDIR_END) {
M
Mike Marshall 已提交
174 175 176 177 178 179
		gossip_debug(GOSSIP_DIR_DEBUG,
			     "Skipping to termination path\n");
		return 0;
	}

	gossip_debug(GOSSIP_DIR_DEBUG,
180
		     "orangefs_readdir called on %s (pos=%llu)\n",
M
Mike Marshall 已提交
181 182
		     dentry->d_name.name, llu(pos));

A
Al Viro 已提交
183
	memset(&readdir_response, 0, sizeof(readdir_response));
M
Mike Marshall 已提交
184

185
	new_op = op_alloc(ORANGEFS_VFS_OP_READDIR);
M
Mike Marshall 已提交
186 187 188
	if (!new_op)
		return -ENOMEM;

189 190 191 192
	/*
	 * Only the indices are shared. No memory is actually shared, but the
	 * mechanism is used.
	 */
M
Mike Marshall 已提交
193
	new_op->uses_shared_memory = 1;
194
	new_op->upcall.req.readdir.refn = orangefs_inode->refn;
195 196
	new_op->upcall.req.readdir.max_dirent_count =
	    ORANGEFS_MAX_DIRENT_COUNT_READDIR;
M
Mike Marshall 已提交
197 198 199 200 201 202 203 204 205

	gossip_debug(GOSSIP_DIR_DEBUG,
		     "%s: upcall.req.readdir.refn.khandle: %pU\n",
		     __func__,
		     &new_op->upcall.req.readdir.refn.khandle);

	new_op->upcall.req.readdir.token = *ptoken;

get_new_buffer_index:
206 207 208
	buffer_index = orangefs_readdir_index_get();
	if (buffer_index < 0) {
		ret = buffer_index;
209
		gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n",
M
Mike Marshall 已提交
210 211 212 213 214 215
			    ret);
		goto out_free_op;
	}
	new_op->upcall.req.readdir.buf_index = buffer_index;

	ret = service_operation(new_op,
216
				"orangefs_readdir",
M
Mike Marshall 已提交
217 218 219 220 221 222 223
				get_interruptible_flag(dentry->d_inode));

	gossip_debug(GOSSIP_DIR_DEBUG,
		     "Readdir downcall status is %d.  ret:%d\n",
		     new_op->downcall.status,
		     ret);

224 225
	orangefs_readdir_index_put(buffer_index);

M
Mike Marshall 已提交
226
	if (ret == -EAGAIN && op_state_purged(new_op)) {
227
		/* Client-core indices are invalid after it restarted. */
M
Mike Marshall 已提交
228 229 230 231 232 233 234 235 236
		gossip_debug(GOSSIP_DIR_DEBUG,
			"%s: Getting new buffer_index for retry of readdir..\n",
			 __func__);
		goto get_new_buffer_index;
	}

	if (ret == -EIO && op_state_purged(new_op)) {
		gossip_err("%s: Client is down. Aborting readdir call.\n",
			__func__);
237
		goto out_free_op;
M
Mike Marshall 已提交
238 239 240 241 242 243 244 245
	}

	if (ret < 0 || new_op->downcall.status != 0) {
		gossip_debug(GOSSIP_DIR_DEBUG,
			     "Readdir request failed.  Status:%d\n",
			     new_op->downcall.status);
		if (ret >= 0)
			ret = new_op->downcall.status;
246
		goto out_free_op;
A
Al Viro 已提交
247 248 249 250 251 252
	}

	dents_buf = new_op->downcall.trailer_buf;
	if (dents_buf == NULL) {
		gossip_err("Invalid NULL buffer in readdir response\n");
		ret = -ENOMEM;
253
		goto out_free_op;
M
Mike Marshall 已提交
254 255
	}

A
Al Viro 已提交
256 257
	bytes_decoded = decode_dirents(dents_buf, new_op->downcall.trailer_size,
					&readdir_response);
M
Mike Marshall 已提交
258 259
	if (bytes_decoded < 0) {
		ret = bytes_decoded;
A
Al Viro 已提交
260 261
		gossip_err("Could not decode readdir from buffer %d\n", ret);
		goto out_vfree;
M
Mike Marshall 已提交
262 263 264
	}

	if (bytes_decoded != new_op->downcall.trailer_size) {
265
		gossip_err("orangefs_readdir: # bytes decoded (%ld) "
266 267 268
			   "!= trailer size (%ld)\n",
			   bytes_decoded,
			   (long)new_op->downcall.trailer_size);
M
Mike Marshall 已提交
269 270 271 272
		ret = -EINVAL;
		goto out_destroy_handle;
	}

273
	/*
274
	 *  orangefs doesn't actually store dot and dot-dot, but
275 276
	 *  we need to have them represented.
	 */
M
Mike Marshall 已提交
277 278 279 280 281 282 283
	if (pos == 0) {
		ino = get_ino_from_khandle(dentry->d_inode);
		gossip_debug(GOSSIP_DIR_DEBUG,
			     "%s: calling dir_emit of \".\" with pos = %llu\n",
			     __func__,
			     llu(pos));
		ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
284
		pos += 1;
M
Mike Marshall 已提交
285 286 287 288 289 290 291 292 293
	}

	if (pos == 1) {
		ino = get_parent_ino_from_dentry(dentry);
		gossip_debug(GOSSIP_DIR_DEBUG,
			     "%s: calling dir_emit of \"..\" with pos = %llu\n",
			     __func__,
			     llu(pos));
		ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
294
		pos += 1;
M
Mike Marshall 已提交
295 296
	}

297
	/*
298
	 * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around
299 300 301
	 * to prevent "finding" dot and dot-dot on any iteration
	 * other than the first.
	 */
302
	if (ctx->pos == ORANGEFS_ITERATE_NEXT)
303 304
		ctx->pos = 0;

305 306 307
	gossip_debug(GOSSIP_DIR_DEBUG,
		     "%s: dirent_outcount:%d:\n",
		     __func__,
A
Al Viro 已提交
308
		     readdir_response.orangefs_dirent_outcount);
309
	for (i = ctx->pos;
A
Al Viro 已提交
310
	     i < readdir_response.orangefs_dirent_outcount;
311
	     i++) {
A
Al Viro 已提交
312 313
		len = readdir_response.dirent_array[i].d_length;
		current_entry = readdir_response.dirent_array[i].d_name;
314
		current_ino = orangefs_khandle_to_ino(
A
Al Viro 已提交
315
			&readdir_response.dirent_array[i].khandle);
M
Mike Marshall 已提交
316 317

		gossip_debug(GOSSIP_DIR_DEBUG,
318 319
			     "calling dir_emit for %s with len %d"
			     ", ctx->pos %ld\n",
M
Mike Marshall 已提交
320 321
			     current_entry,
			     len,
322 323 324 325 326 327
			     (unsigned long)ctx->pos);
		/*
		 * type is unknown. We don't return object type
		 * in the dirent_array. This leaves getdents
		 * clueless about type.
		 */
M
Mike Marshall 已提交
328 329
		ret =
		    dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
330 331
		if (!ret)
			break;
M
Mike Marshall 已提交
332
		ctx->pos++;
333
		gossip_debug(GOSSIP_DIR_DEBUG,
M
Mike Marshall 已提交
334 335 336 337 338 339
			      "%s: ctx->pos:%lld\n",
			      __func__,
			      lld(ctx->pos));

	}

340
	/*
341 342 343 344
	 * we ran all the way through the last batch, set up for
	 * getting another batch...
	 */
	if (ret) {
A
Al Viro 已提交
345
		*ptoken = readdir_response.token;
346
		ctx->pos = ORANGEFS_ITERATE_NEXT;
M
Mike Marshall 已提交
347 348 349 350 351
	}

	/*
	 * Did we hit the end of the directory?
	 */
352
	if (readdir_response.token == ORANGEFS_READDIR_END) {
353
		gossip_debug(GOSSIP_DIR_DEBUG,
354 355
		"End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n");
		ctx->pos = ORANGEFS_READDIR_END;
M
Mike Marshall 已提交
356 357 358
	}

out_destroy_handle:
A
Al Viro 已提交
359 360 361 362 363
	/* kfree(NULL) is safe */
	kfree(readdir_response.dirent_array);
out_vfree:
	gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", dents_buf);
	vfree(dents_buf);
M
Mike Marshall 已提交
364 365
out_free_op:
	op_release(new_op);
366
	gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret);
M
Mike Marshall 已提交
367 368 369
	return ret;
}

370
static int orangefs_dir_open(struct inode *inode, struct file *file)
M
Mike Marshall 已提交
371 372 373 374 375 376 377 378
{
	__u64 *ptoken;

	file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL);
	if (!file->private_data)
		return -ENOMEM;

	ptoken = file->private_data;
379
	*ptoken = ORANGEFS_READDIR_START;
M
Mike Marshall 已提交
380 381 382
	return 0;
}

383
static int orangefs_dir_release(struct inode *inode, struct file *file)
M
Mike Marshall 已提交
384
{
385
	orangefs_flush_inode(inode);
M
Mike Marshall 已提交
386 387 388 389
	kfree(file->private_data);
	return 0;
}

390 391
/** ORANGEFS implementation of VFS directory operations */
const struct file_operations orangefs_dir_operations = {
M
Mike Marshall 已提交
392
	.read = generic_read_dir,
393 394 395
	.iterate = orangefs_readdir,
	.open = orangefs_dir_open,
	.release = orangefs_dir_release,
M
Mike Marshall 已提交
396
};