ios.c 20.3 KB
Newer Older
1 2
/*
 * Copyright (C) 2005, 2006
B
Boaz Harrosh 已提交
3
 * Avishay Traeger (avishay@gmail.com)
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 * Copyright (C) 2008, 2009
 * Boaz Harrosh <bharrosh@panasas.com>
 *
 * This file is part of exofs.
 *
 * exofs is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation.  Since it is based on ext2, and the only
 * valid version of GPL for the Linux kernel is version 2, the only valid
 * version of GPL for exofs is version 2.
 *
 * exofs is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with exofs; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

25
#include <linux/slab.h>
26
#include <scsi/scsi_device.h>
B
Boaz Harrosh 已提交
27
#include <asm/div64.h>
28 29 30

#include "exofs.h"

B
Boaz Harrosh 已提交
31 32 33
#define EXOFS_DBGMSG2(M...) do {} while (0)
/* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */

34
void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
35
{
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
	osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
}

int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
		    u64 offset, void *p, unsigned length)
{
	struct osd_request *or = osd_start_request(od, GFP_KERNEL);
/*	struct osd_sense_info osi = {.key = 0};*/
	int ret;

	if (unlikely(!or)) {
		EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
		return -ENOMEM;
	}
	ret = osd_req_read_kern(or, obj, offset, p, length);
	if (unlikely(ret)) {
		EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
		goto out;
54 55
	}

56 57
	ret = osd_finalize_request(or, 0, cred, NULL);
	if (unlikely(ret)) {
P
Paul Bolle 已提交
58
		EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret);
59 60
		goto out;
	}
61

62 63 64 65
	ret = osd_execute_request(or);
	if (unlikely(ret))
		EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
	/* osd_req_decode_sense(or, ret); */
66

67 68
out:
	osd_end_request(or);
69 70 71
	return ret;
}

72 73
int  exofs_get_rw_state(struct exofs_layout *layout, bool is_reading,
			u64 offset, u64 length, struct exofs_io_state **pios)
74
{
75 76 77
	struct exofs_io_state *ios;

	/*TODO: Maybe use kmem_cach per sbi of size
78
	 * exofs_io_state_size(layout->s_numdevs)
79
	 */
80
	ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL);
81
	if (unlikely(!ios)) {
P
Paul Bolle 已提交
82
		EXOFS_DBGMSG("Failed kzalloc bytes=%d\n",
83
			     exofs_io_state_size(layout->s_numdevs));
84 85 86 87
		*pios = NULL;
		return -ENOMEM;
	}

88 89
	ios->layout = layout;
	ios->obj.partition = layout->s_pid;
90 91 92 93
	ios->offset = offset;
	ios->length = length;
	ios->reading = is_reading;

94 95
	*pios = ios;
	return 0;
96 97
}

98 99 100 101 102 103
int  exofs_get_io_state(struct exofs_layout *layout,
			struct exofs_io_state **ios)
{
	return exofs_get_rw_state(layout, true, 0, 0, ios);
}

104
void exofs_put_io_state(struct exofs_io_state *ios)
105
{
106 107
	if (ios) {
		unsigned i;
108

109 110 111 112 113 114 115 116 117 118
		for (i = 0; i < ios->numdevs; i++) {
			struct exofs_per_dev_state *per_dev = &ios->per_dev[i];

			if (per_dev->or)
				osd_end_request(per_dev->or);
			if (per_dev->bio)
				bio_put(per_dev->bio);
		}

		kfree(ios);
119
	}
120
}
121

122 123 124
unsigned exofs_layout_od_id(struct exofs_layout *layout,
			    osd_id obj_no, unsigned layout_index)
{
B
Boaz Harrosh 已提交
125 126 127 128 129 130 131 132 133 134 135
/*	switch (layout->lay_func) {
	case LAYOUT_MOVING_WINDOW:
	{*/
		unsigned dev_mod = obj_no;

		return (layout_index + dev_mod * layout->mirrors_p1) %
							      layout->s_numdevs;
/*	}
	case LAYOUT_FUNC_IMPLICT:
		return layout->devs[layout_index];
	}*/
136 137 138 139 140 141 142 143 144
}

static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios,
					   unsigned layout_index)
{
	return ios->layout->s_ods[
		exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)];
}

145 146 147
static void _sync_done(struct exofs_io_state *ios, void *p)
{
	struct completion *waiting = p;
148

149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
	complete(waiting);
}

static void _last_io(struct kref *kref)
{
	struct exofs_io_state *ios = container_of(
					kref, struct exofs_io_state, kref);

	ios->done(ios, ios->private);
}

static void _done_io(struct osd_request *or, void *p)
{
	struct exofs_io_state *ios = p;

	kref_put(&ios->kref, _last_io);
}

static int exofs_io_execute(struct exofs_io_state *ios)
{
	DECLARE_COMPLETION_ONSTACK(wait);
	bool sync = (ios->done == NULL);
	int i, ret;

	if (sync) {
		ios->done = _sync_done;
		ios->private = &wait;
	}

	for (i = 0; i < ios->numdevs; i++) {
		struct osd_request *or = ios->per_dev[i].or;
		if (unlikely(!or))
			continue;

		ret = osd_finalize_request(or, 0, ios->cred, NULL);
		if (unlikely(ret)) {
P
Paul Bolle 已提交
185
			EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n",
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
				     ret);
			return ret;
		}
	}

	kref_init(&ios->kref);

	for (i = 0; i < ios->numdevs; i++) {
		struct osd_request *or = ios->per_dev[i].or;
		if (unlikely(!or))
			continue;

		kref_get(&ios->kref);
		osd_execute_request_async(or, _done_io, ios);
	}

	kref_put(&ios->kref, _last_io);
	ret = 0;

	if (sync) {
		wait_for_completion(&wait);
		ret = exofs_check_io(ios, NULL);
	}
209 210 211
	return ret;
}

212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
static void _clear_bio(struct bio *bio)
{
	struct bio_vec *bv;
	unsigned i;

	__bio_for_each_segment(bv, bio, i, 0) {
		unsigned this_count = bv->bv_len;

		if (likely(PAGE_SIZE == this_count))
			clear_highpage(bv->bv_page);
		else
			zero_user(bv->bv_page, bv->bv_offset, this_count);
	}
}

227
int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
228
{
229 230 231
	enum osd_err_priority acumulated_osd_err = 0;
	int acumulated_lin_err = 0;
	int i;
232

233 234
	for (i = 0; i < ios->numdevs; i++) {
		struct osd_sense_info osi;
235 236 237 238 239
		struct osd_request *or = ios->per_dev[i].or;
		int ret;

		if (unlikely(!or))
			continue;
240

241
		ret = osd_req_decode_sense(or, &osi);
242 243 244
		if (likely(!ret))
			continue;

245 246 247 248 249
		if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
			/* start read offset passed endof file */
			_clear_bio(ios->per_dev[i].bio);
			EXOFS_DBGMSG("start read offset passed end of file "
				"offset=0x%llx, length=0x%llx\n",
B
Boaz Harrosh 已提交
250 251
				_LLU(ios->per_dev[i].offset),
				_LLU(ios->per_dev[i].length));
252 253

			continue; /* we recovered */
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
		}

		if (osi.osd_err_pri >= acumulated_osd_err) {
			acumulated_osd_err = osi.osd_err_pri;
			acumulated_lin_err = ret;
		}
	}

	/* TODO: raid specific residual calculations */
	if (resid) {
		if (likely(!acumulated_lin_err))
			*resid = 0;
		else
			*resid = ios->length;
	}

	return acumulated_lin_err;
}

B
Boaz Harrosh 已提交
273 274 275
/*
 * L - logical offset into the file
 *
B
Boaz Harrosh 已提交
276
 * U - The number of bytes in a stripe within a group
B
Boaz Harrosh 已提交
277 278 279
 *
 *	U = stripe_unit * group_width
 *
B
Boaz Harrosh 已提交
280 281
 * T - The number of bytes striped within a group of component objects
 *     (before advancing to the next group)
B
Boaz Harrosh 已提交
282
 *
B
Boaz Harrosh 已提交
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
 *	T = stripe_unit * group_width * group_depth
 *
 * S - The number of bytes striped across all component objects
 *     before the pattern repeats
 *
 *	S = stripe_unit * group_width * group_depth * group_count
 *
 * M - The "major" (i.e., across all components) stripe number
 *
 *	M = L / S
 *
 * G - Counts the groups from the beginning of the major stripe
 *
 *	G = (L - (M * S)) / T	[or (L % S) / T]
 *
 * H - The byte offset within the group
 *
 *	H = (L - (M * S)) % T	[or (L % S) % T]
 *
 * N - The "minor" (i.e., across the group) stripe number
 *
 *	N = H / U
B
Boaz Harrosh 已提交
305 306 307
 *
 * C - The component index coresponding to L
 *
B
Boaz Harrosh 已提交
308 309
 *	C = (H - (N * U)) / stripe_unit + G * group_width
 *	[or (L % U) / stripe_unit + G * group_width]
B
Boaz Harrosh 已提交
310 311 312
 *
 * O - The component offset coresponding to L
 *
B
Boaz Harrosh 已提交
313
 *	O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit
B
Boaz Harrosh 已提交
314 315 316
 */
struct _striping_info {
	u64 obj_offset;
B
Boaz Harrosh 已提交
317
	u64 group_length;
318
	u64 M; /* for truncate */
B
Boaz Harrosh 已提交
319 320 321 322
	unsigned dev;
	unsigned unit_off;
};

323
static void _calc_stripe_info(struct exofs_layout *layout, u64 file_offset,
B
Boaz Harrosh 已提交
324
			      struct _striping_info *si)
B
Boaz Harrosh 已提交
325
{
326 327 328
	u32	stripe_unit = layout->stripe_unit;
	u32	group_width = layout->group_width;
	u64	group_depth = layout->group_depth;
B
Boaz Harrosh 已提交
329

B
Boaz Harrosh 已提交
330
	u32	U = stripe_unit * group_width;
B
Boaz Harrosh 已提交
331
	u64	T = U * group_depth;
332
	u64	S = T * layout->group_count;
B
Boaz Harrosh 已提交
333 334 335 336 337 338 339 340 341 342 343 344 345 346
	u64	M = div64_u64(file_offset, S);

	/*
	G = (L - (M * S)) / T
	H = (L - (M * S)) % T
	*/
	u64	LmodS = file_offset - M * S;
	u32	G = div64_u64(LmodS, T);
	u64	H = LmodS - G * T;

	u32	N = div_u64(H, U);

	/* "H - (N * U)" is just "H % U" so it's bound to u32 */
	si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
347
	si->dev *= layout->mirrors_p1;
B
Boaz Harrosh 已提交
348

B
Boaz Harrosh 已提交
349
	div_u64_rem(file_offset, stripe_unit, &si->unit_off);
B
Boaz Harrosh 已提交
350

B
Boaz Harrosh 已提交
351 352 353 354
	si->obj_offset = si->unit_off + (N * stripe_unit) +
				  (M * group_depth * stripe_unit);

	si->group_length = T - H;
355
	si->M = M;
B
Boaz Harrosh 已提交
356 357
}

358 359 360
static int _add_stripe_unit(struct exofs_io_state *ios,  unsigned *cur_pg,
		unsigned pgbase, struct exofs_per_dev_state *per_dev,
		int cur_len)
B
Boaz Harrosh 已提交
361
{
362
	unsigned pg = *cur_pg;
B
Boaz Harrosh 已提交
363 364 365 366 367 368 369 370
	struct request_queue *q =
			osd_request_queue(exofs_ios_od(ios, per_dev->dev));

	per_dev->length += cur_len;

	if (per_dev->bio == NULL) {
		unsigned pages_in_stripe = ios->layout->group_width *
					(ios->layout->stripe_unit / PAGE_SIZE);
371
		unsigned bio_size = (ios->nr_pages + pages_in_stripe) /
B
Boaz Harrosh 已提交
372 373 374 375
						ios->layout->group_width;

		per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
		if (unlikely(!per_dev->bio)) {
P
Paul Bolle 已提交
376
			EXOFS_DBGMSG("Failed to allocate BIO size=%u\n",
B
Boaz Harrosh 已提交
377 378 379 380 381 382
				     bio_size);
			return -ENOMEM;
		}
	}

	while (cur_len > 0) {
383 384
		unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
		unsigned added_len;
B
Boaz Harrosh 已提交
385

386 387
		BUG_ON(ios->nr_pages <= pg);
		cur_len -= pglen;
B
Boaz Harrosh 已提交
388

389 390 391
		added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg],
					    pglen, pgbase);
		if (unlikely(pglen != added_len))
B
Boaz Harrosh 已提交
392
			return -ENOMEM;
393 394
		pgbase = 0;
		++pg;
B
Boaz Harrosh 已提交
395 396 397
	}
	BUG_ON(cur_len);

398
	*cur_pg = pg;
B
Boaz Harrosh 已提交
399 400 401
	return 0;
}

B
Boaz Harrosh 已提交
402
static int _prepare_one_group(struct exofs_io_state *ios, u64 length,
403
			      struct _striping_info *si)
B
Boaz Harrosh 已提交
404 405
{
	unsigned stripe_unit = ios->layout->stripe_unit;
B
Boaz Harrosh 已提交
406
	unsigned mirrors_p1 = ios->layout->mirrors_p1;
B
Boaz Harrosh 已提交
407
	unsigned devs_in_group = ios->layout->group_width * mirrors_p1;
B
Boaz Harrosh 已提交
408
	unsigned dev = si->dev;
B
Boaz Harrosh 已提交
409 410 411
	unsigned first_dev = dev - (dev % devs_in_group);
	unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
	unsigned cur_pg = ios->pages_consumed;
412
	int ret = 0;
B
Boaz Harrosh 已提交
413 414

	while (length) {
415
		struct exofs_per_dev_state *per_dev = &ios->per_dev[dev];
B
Boaz Harrosh 已提交
416
		unsigned cur_len, page_off = 0;
B
Boaz Harrosh 已提交
417 418

		if (!per_dev->length) {
B
Boaz Harrosh 已提交
419 420 421 422 423 424 425 426 427 428 429 430 431 432
			per_dev->dev = dev;
			if (dev < si->dev) {
				per_dev->offset = si->obj_offset + stripe_unit -
								   si->unit_off;
				cur_len = stripe_unit;
			} else if (dev == si->dev) {
				per_dev->offset = si->obj_offset;
				cur_len = stripe_unit - si->unit_off;
				page_off = si->unit_off & ~PAGE_MASK;
				BUG_ON(page_off && (page_off != ios->pgbase));
			} else { /* dev > si->dev */
				per_dev->offset = si->obj_offset - si->unit_off;
				cur_len = stripe_unit;
			}
B
Boaz Harrosh 已提交
433

434 435
			if (max_comp < dev)
				max_comp = dev;
B
Boaz Harrosh 已提交
436
		} else {
B
Boaz Harrosh 已提交
437
			cur_len = stripe_unit;
B
Boaz Harrosh 已提交
438
		}
B
Boaz Harrosh 已提交
439 440
		if (cur_len >= length)
			cur_len = length;
B
Boaz Harrosh 已提交
441

442 443
		ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
				       cur_len);
B
Boaz Harrosh 已提交
444 445 446
		if (unlikely(ret))
			goto out;

447 448
		dev += mirrors_p1;
		dev = (dev % devs_in_group) + first_dev;
B
Boaz Harrosh 已提交
449 450 451 452

		length -= cur_len;
	}
out:
B
Boaz Harrosh 已提交
453 454
	ios->numdevs = max_comp + mirrors_p1;
	ios->pages_consumed = cur_pg;
B
Boaz Harrosh 已提交
455 456 457
	return ret;
}

B
Boaz Harrosh 已提交
458 459
static int _prepare_for_striping(struct exofs_io_state *ios)
{
B
Boaz Harrosh 已提交
460
	u64 length = ios->length;
461
	u64 offset = ios->offset;
B
Boaz Harrosh 已提交
462
	struct _striping_info si;
B
Boaz Harrosh 已提交
463
	int ret = 0;
B
Boaz Harrosh 已提交
464 465 466 467 468

	if (!ios->pages) {
		if (ios->kern_buff) {
			struct exofs_per_dev_state *per_dev = &ios->per_dev[0];

469
			_calc_stripe_info(ios->layout, ios->offset, &si);
B
Boaz Harrosh 已提交
470 471 472 473 474 475 476 477 478 479 480 481
			per_dev->offset = si.obj_offset;
			per_dev->dev = si.dev;

			/* no cross device without page array */
			BUG_ON((ios->layout->group_width > 1) &&
			       (si.unit_off + ios->length >
				ios->layout->stripe_unit));
		}
		ios->numdevs = ios->layout->mirrors_p1;
		return 0;
	}

B
Boaz Harrosh 已提交
482
	while (length) {
483
		_calc_stripe_info(ios->layout, offset, &si);
484

B
Boaz Harrosh 已提交
485 486 487
		if (length < si.group_length)
			si.group_length = length;

488
		ret = _prepare_one_group(ios, si.group_length, &si);
B
Boaz Harrosh 已提交
489 490 491
		if (unlikely(ret))
			goto out;

492
		offset += si.group_length;
B
Boaz Harrosh 已提交
493 494 495 496 497
		length -= si.group_length;
	}

out:
	return ret;
B
Boaz Harrosh 已提交
498 499
}

500 501 502 503
int exofs_sbi_create(struct exofs_io_state *ios)
{
	int i, ret;

504
	for (i = 0; i < ios->layout->s_numdevs; i++) {
505 506
		struct osd_request *or;

507
		or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527
		if (unlikely(!or)) {
			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
			ret = -ENOMEM;
			goto out;
		}
		ios->per_dev[i].or = or;
		ios->numdevs++;

		osd_req_create_object(or, &ios->obj);
	}
	ret = exofs_io_execute(ios);

out:
	return ret;
}

int exofs_sbi_remove(struct exofs_io_state *ios)
{
	int i, ret;

528
	for (i = 0; i < ios->layout->s_numdevs; i++) {
529 530
		struct osd_request *or;

531
		or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
		if (unlikely(!or)) {
			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
			ret = -ENOMEM;
			goto out;
		}
		ios->per_dev[i].or = or;
		ios->numdevs++;

		osd_req_remove_object(or, &ios->obj);
	}
	ret = exofs_io_execute(ios);

out:
	return ret;
}

B
Boaz Harrosh 已提交
548
static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
549
{
B
Boaz Harrosh 已提交
550 551 552 553
	struct exofs_per_dev_state *master_dev = &ios->per_dev[cur_comp];
	unsigned dev = ios->per_dev[cur_comp].dev;
	unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
	int ret = 0;
554

B
Boaz Harrosh 已提交
555 556 557
	if (ios->pages && !master_dev->length)
		return 0; /* Just an empty slot */

B
Boaz Harrosh 已提交
558 559
	for (; cur_comp < last_comp; ++cur_comp, ++dev) {
		struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
560 561
		struct osd_request *or;

B
Boaz Harrosh 已提交
562
		or = osd_start_request(exofs_ios_od(ios, dev), GFP_KERNEL);
563 564 565 566 567
		if (unlikely(!or)) {
			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
			ret = -ENOMEM;
			goto out;
		}
B
Boaz Harrosh 已提交
568 569
		per_dev->or = or;
		per_dev->offset = master_dev->offset;
570

571
		if (ios->pages) {
572 573
			struct bio *bio;

B
Boaz Harrosh 已提交
574
			if (per_dev != master_dev) {
B
Boaz Harrosh 已提交
575
				bio = bio_kmalloc(GFP_KERNEL,
B
Boaz Harrosh 已提交
576
						  master_dev->bio->bi_max_vecs);
B
Boaz Harrosh 已提交
577
				if (unlikely(!bio)) {
B
Boaz Harrosh 已提交
578
					EXOFS_DBGMSG(
P
Paul Bolle 已提交
579
					      "Failed to allocate BIO size=%u\n",
B
Boaz Harrosh 已提交
580
					      master_dev->bio->bi_max_vecs);
B
Boaz Harrosh 已提交
581 582 583 584
					ret = -ENOMEM;
					goto out;
				}

B
Boaz Harrosh 已提交
585
				__bio_clone(bio, master_dev->bio);
B
Boaz Harrosh 已提交
586 587
				bio->bi_bdev = NULL;
				bio->bi_next = NULL;
B
Boaz Harrosh 已提交
588 589 590
				per_dev->length = master_dev->length;
				per_dev->bio =  bio;
				per_dev->dev = dev;
B
Boaz Harrosh 已提交
591
			} else {
B
Boaz Harrosh 已提交
592 593
				bio = master_dev->bio;
				/* FIXME: bio_set_dir() */
594
				bio->bi_rw |= REQ_WRITE;
B
Boaz Harrosh 已提交
595
			}
596

B
Boaz Harrosh 已提交
597 598
			osd_req_write(or, &ios->obj, per_dev->offset, bio,
				      per_dev->length);
B
Boaz Harrosh 已提交
599 600
			EXOFS_DBGMSG("write(0x%llx) offset=0x%llx "
				      "length=0x%llx dev=%d\n",
B
Boaz Harrosh 已提交
601 602
				     _LLU(ios->obj.id), _LLU(per_dev->offset),
				     _LLU(per_dev->length), dev);
603
		} else if (ios->kern_buff) {
B
Boaz Harrosh 已提交
604
			ret = osd_req_write_kern(or, &ios->obj, per_dev->offset,
605
					   ios->kern_buff, ios->length);
B
Boaz Harrosh 已提交
606 607
			if (unlikely(ret))
				goto out;
B
Boaz Harrosh 已提交
608 609
			EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
				      "length=0x%llx dev=%d\n",
B
Boaz Harrosh 已提交
610 611
				     _LLU(ios->obj.id), _LLU(per_dev->offset),
				     _LLU(ios->length), dev);
612 613
		} else {
			osd_req_set_attributes(or, &ios->obj);
B
Boaz Harrosh 已提交
614
			EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
B
Boaz Harrosh 已提交
615
				     _LLU(ios->obj.id), ios->out_attr_len, dev);
616 617 618 619 620 621 622 623 624
		}

		if (ios->out_attr)
			osd_req_add_set_attr_list(or, ios->out_attr,
						  ios->out_attr_len);

		if (ios->in_attr)
			osd_req_add_get_attr_list(or, ios->in_attr,
						  ios->in_attr_len);
625
	}
626 627 628 629 630

out:
	return ret;
}

B
Boaz Harrosh 已提交
631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650
int exofs_sbi_write(struct exofs_io_state *ios)
{
	int i;
	int ret;

	ret = _prepare_for_striping(ios);
	if (unlikely(ret))
		return ret;

	for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
		ret = _sbi_write_mirror(ios, i);
		if (unlikely(ret))
			return ret;
	}

	ret = exofs_io_execute(ios);
	return ret;
}

static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp)
651
{
B
Boaz Harrosh 已提交
652
	struct osd_request *or;
B
Boaz Harrosh 已提交
653
	struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
B
Boaz Harrosh 已提交
654
	unsigned first_dev = (unsigned)ios->obj.id;
655

B
Boaz Harrosh 已提交
656 657 658
	if (ios->pages && !per_dev->length)
		return 0; /* Just an empty slot */

B
Boaz Harrosh 已提交
659
	first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
660
	or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL);
B
Boaz Harrosh 已提交
661 662 663 664 665 666
	if (unlikely(!or)) {
		EXOFS_ERR("%s: osd_start_request failed\n", __func__);
		return -ENOMEM;
	}
	per_dev->or = or;

667
	if (ios->pages) {
B
Boaz Harrosh 已提交
668 669
		osd_req_read(or, &ios->obj, per_dev->offset,
				per_dev->bio, per_dev->length);
B
Boaz Harrosh 已提交
670 671
		EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
			     " dev=%d\n", _LLU(ios->obj.id),
B
Boaz Harrosh 已提交
672
			     _LLU(per_dev->offset), _LLU(per_dev->length),
B
Boaz Harrosh 已提交
673 674
			     first_dev);
	} else if (ios->kern_buff) {
B
Boaz Harrosh 已提交
675
		int ret = osd_req_read_kern(or, &ios->obj, per_dev->offset,
B
Boaz Harrosh 已提交
676 677 678
					    ios->kern_buff, ios->length);
		EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
			      "length=0x%llx dev=%d ret=>%d\n",
B
Boaz Harrosh 已提交
679
			      _LLU(ios->obj.id), _LLU(per_dev->offset),
B
Boaz Harrosh 已提交
680 681 682 683 684 685 686 687 688 689
			      _LLU(ios->length), first_dev, ret);
		if (unlikely(ret))
			return ret;
	} else {
		osd_req_get_attributes(or, &ios->obj);
		EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
			      _LLU(ios->obj.id), ios->in_attr_len, first_dev);
	}
	if (ios->out_attr)
		osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
690

B
Boaz Harrosh 已提交
691 692
	if (ios->in_attr)
		osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len);
693

B
Boaz Harrosh 已提交
694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713
	return 0;
}

int exofs_sbi_read(struct exofs_io_state *ios)
{
	int i;
	int ret;

	ret = _prepare_for_striping(ios);
	if (unlikely(ret))
		return ret;

	for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
		ret = _sbi_read_mirror(ios, i);
		if (unlikely(ret))
			return ret;
	}

	ret = exofs_io_execute(ios);
	return ret;
714 715
}

716
int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
717 718 719 720 721 722 723
{
	struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
	void *iter = NULL;
	int nelem;

	do {
		nelem = 1;
724 725
		osd_req_decode_get_attr_list(ios->per_dev[0].or,
					     &cur_attr, &nelem, &iter);
726 727 728 729 730 731 732 733 734 735
		if ((cur_attr.attr_page == attr->attr_page) &&
		    (cur_attr.attr_id == attr->attr_id)) {
			attr->len = cur_attr.len;
			attr->val_ptr = cur_attr.val_ptr;
			return 0;
		}
	} while (iter);

	return -EIO;
}
736

B
Boaz Harrosh 已提交
737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759
static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp,
			     struct osd_attr *attr)
{
	int last_comp = cur_comp + ios->layout->mirrors_p1;

	for (; cur_comp < last_comp; ++cur_comp) {
		struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
		struct osd_request *or;

		or = osd_start_request(exofs_ios_od(ios, cur_comp), GFP_KERNEL);
		if (unlikely(!or)) {
			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
			return -ENOMEM;
		}
		per_dev->or = or;

		osd_req_set_attributes(or, &ios->obj);
		osd_req_add_set_attr_list(or, attr, 1);
	}

	return 0;
}

760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784
struct _trunc_info {
	struct _striping_info si;
	u64 prev_group_obj_off;
	u64 next_group_obj_off;

	unsigned first_group_dev;
	unsigned nex_group_dev;
	unsigned max_devs;
};

void _calc_trunk_info(struct exofs_layout *layout, u64 file_offset,
		       struct _trunc_info *ti)
{
	unsigned stripe_unit = layout->stripe_unit;

	_calc_stripe_info(layout, file_offset, &ti->si);

	ti->prev_group_obj_off = ti->si.M * stripe_unit;
	ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0;

	ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width);
	ti->nex_group_dev = ti->first_group_dev + layout->group_width;
	ti->max_devs = layout->group_width * layout->group_count;
}

785 786 787 788
int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
{
	struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info;
	struct exofs_io_state *ios;
B
Boaz Harrosh 已提交
789 790 791 792
	struct exofs_trunc_attr {
		struct osd_attr attr;
		__be64 newsize;
	} *size_attrs;
793
	struct _trunc_info ti;
794 795
	int i, ret;

B
Boaz Harrosh 已提交
796 797 798 799
	ret = exofs_get_io_state(&sbi->layout, &ios);
	if (unlikely(ret))
		return ret;

800 801 802
	_calc_trunk_info(ios->layout, size, &ti);

	size_attrs = kcalloc(ti.max_devs, sizeof(*size_attrs),
B
Boaz Harrosh 已提交
803 804 805 806 807
			     GFP_KERNEL);
	if (unlikely(!size_attrs)) {
		ret = -ENOMEM;
		goto out;
	}
808 809 810

	ios->obj.id = exofs_oi_objno(oi);
	ios->cred = oi->i_cred;
B
Boaz Harrosh 已提交
811
	ios->numdevs = ios->layout->s_numdevs;
812

813
	for (i = 0; i < ti.max_devs; ++i) {
B
Boaz Harrosh 已提交
814 815
		struct exofs_trunc_attr *size_attr = &size_attrs[i];
		u64 obj_size;
816

817 818 819 820 821 822 823 824 825 826 827
		if (i < ti.first_group_dev)
			obj_size = ti.prev_group_obj_off;
		else if (i >= ti.nex_group_dev)
			obj_size = ti.next_group_obj_off;
		else if (i < ti.si.dev) /* dev within this group */
			obj_size = ti.si.obj_offset +
				      ios->layout->stripe_unit - ti.si.unit_off;
		else if (i == ti.si.dev)
			obj_size = ti.si.obj_offset;
		else /* i > ti.dev */
			obj_size = ti.si.obj_offset - ti.si.unit_off;
828

B
Boaz Harrosh 已提交
829 830 831 832
		size_attr->newsize = cpu_to_be64(obj_size);
		size_attr->attr = g_attr_logical_length;
		size_attr->attr.val_ptr = &size_attr->newsize;

833 834
		EXOFS_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
			     _LLU(ios->obj.id), _LLU(obj_size), i);
B
Boaz Harrosh 已提交
835 836 837 838
		ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
					&size_attr->attr);
		if (unlikely(ret))
			goto out;
839 840 841 842
	}
	ret = exofs_io_execute(ios);

out:
B
Boaz Harrosh 已提交
843
	kfree(size_attrs);
844 845 846
	exofs_put_io_state(ios);
	return ret;
}