zstd.c 19.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
N
Nick Terrell 已提交
2 3 4 5 6
/*
 * Copyright (c) 2016-present, Facebook, Inc.
 * All rights reserved.
 *
 */
7

N
Nick Terrell 已提交
8
#include <linux/bio.h>
9
#include <linux/bitmap.h>
N
Nick Terrell 已提交
10 11 12 13
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
14
#include <linux/sched/mm.h>
N
Nick Terrell 已提交
15 16 17 18 19 20
#include <linux/pagemap.h>
#include <linux/refcount.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/zstd.h>
#include "compression.h"
21
#include "ctree.h"
N
Nick Terrell 已提交
22 23 24 25

#define ZSTD_BTRFS_MAX_WINDOWLOG 17
#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
#define ZSTD_BTRFS_DEFAULT_LEVEL 3
26
#define ZSTD_BTRFS_MAX_LEVEL 15
27 28
/* 307s to avoid pathologically clashing with transaction commit */
#define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
N
Nick Terrell 已提交
29

30 31
static ZSTD_parameters zstd_get_btrfs_parameters(unsigned int level,
						 size_t src_len)
N
Nick Terrell 已提交
32
{
33
	ZSTD_parameters params = ZSTD_getParams(level, src_len, 0);
N
Nick Terrell 已提交
34 35 36 37 38 39 40 41 42 43 44

	if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
		params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
	WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT);
	return params;
}

struct workspace {
	void *mem;
	size_t size;
	char *buf;
45
	unsigned int level;
46
	unsigned int req_level;
47
	unsigned long last_used; /* jiffies */
N
Nick Terrell 已提交
48
	struct list_head list;
49
	struct list_head lru_list;
50 51
	ZSTD_inBuffer in_buf;
	ZSTD_outBuffer out_buf;
N
Nick Terrell 已提交
52 53
};

54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
/*
 * Zstd Workspace Management
 *
 * Zstd workspaces have different memory requirements depending on the level.
 * The zstd workspaces are managed by having individual lists for each level
 * and a global lru.  Forward progress is maintained by protecting a max level
 * workspace.
 *
 * Getting a workspace is done by using the bitmap to identify the levels that
 * have available workspaces and scans up.  This lets us recycle higher level
 * workspaces because of the monotonic memory guarantee.  A workspace's
 * last_used is only updated if it is being used by the corresponding memory
 * level.  Putting a workspace involves adding it back to the appropriate places
 * and adding it back to the lru if necessary.
 *
 * A timer is used to reclaim workspaces if they have not been used for
 * ZSTD_BTRFS_RECLAIM_JIFFIES.  This helps keep only active workspaces around.
 * The upper bound is provided by the workqueue limit which is 2 (percpu limit).
 */

struct zstd_workspace_manager {
	const struct btrfs_compress_op *ops;
	spinlock_t lock;
	struct list_head lru_list;
	struct list_head idle_ws[ZSTD_BTRFS_MAX_LEVEL];
	unsigned long active_map;
	wait_queue_head_t wait;
	struct timer_list timer;
};

static struct zstd_workspace_manager wsm;
85

86 87
static size_t zstd_ws_mem_sizes[ZSTD_BTRFS_MAX_LEVEL];

88 89 90 91 92
static inline struct workspace *list_to_workspace(struct list_head *list)
{
	return container_of(list, struct workspace, list);
}

93 94 95
static void zstd_free_workspace(struct list_head *ws);
static struct list_head *zstd_alloc_workspace(unsigned int level);

96 97 98 99 100 101 102 103 104 105 106 107
/*
 * zstd_reclaim_timer_fn - reclaim timer
 * @t: timer
 *
 * This scans the lru_list and attempts to reclaim any workspace that hasn't
 * been used for ZSTD_BTRFS_RECLAIM_JIFFIES.
 */
static void zstd_reclaim_timer_fn(struct timer_list *timer)
{
	unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
	struct list_head *pos, *next;

108
	spin_lock_bh(&wsm.lock);
109 110

	if (list_empty(&wsm.lru_list)) {
111
		spin_unlock_bh(&wsm.lock);
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
		return;
	}

	list_for_each_prev_safe(pos, next, &wsm.lru_list) {
		struct workspace *victim = container_of(pos, struct workspace,
							lru_list);
		unsigned int level;

		if (time_after(victim->last_used, reclaim_threshold))
			break;

		/* workspace is in use */
		if (victim->req_level)
			continue;

		level = victim->level;
		list_del(&victim->lru_list);
		list_del(&victim->list);
130
		zstd_free_workspace(&victim->list);
131 132 133 134 135 136 137 138 139

		if (list_empty(&wsm.idle_ws[level - 1]))
			clear_bit(level - 1, &wsm.active_map);

	}

	if (!list_empty(&wsm.lru_list))
		mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);

140
	spin_unlock_bh(&wsm.lock);
141 142
}

143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
/*
 * zstd_calc_ws_mem_sizes - calculate monotonic memory bounds
 *
 * It is possible based on the level configurations that a higher level
 * workspace uses less memory than a lower level workspace.  In order to reuse
 * workspaces, this must be made a monotonic relationship.  This precomputes
 * the required memory for each level and enforces the monotonicity between
 * level and memory required.
 */
static void zstd_calc_ws_mem_sizes(void)
{
	size_t max_size = 0;
	unsigned int level;

	for (level = 1; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
		ZSTD_parameters params =
			zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
		size_t level_size =
			max_t(size_t,
			      ZSTD_CStreamWorkspaceBound(params.cParams),
			      ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT));

		max_size = max_t(size_t, max_size, level_size);
		zstd_ws_mem_sizes[level - 1] = max_size;
	}
}

170 171
static void zstd_init_workspace_manager(void)
{
172 173 174
	struct list_head *ws;
	int i;

175 176
	zstd_calc_ws_mem_sizes();

177 178 179 180 181 182 183 184 185
	wsm.ops = &btrfs_zstd_compress;
	spin_lock_init(&wsm.lock);
	init_waitqueue_head(&wsm.wait);
	timer_setup(&wsm.timer, zstd_reclaim_timer_fn, 0);

	INIT_LIST_HEAD(&wsm.lru_list);
	for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
		INIT_LIST_HEAD(&wsm.idle_ws[i]);

186
	ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
187 188 189 190 191 192 193
	if (IS_ERR(ws)) {
		pr_warn(
		"BTRFS: cannot preallocate zstd compression workspace\n");
	} else {
		set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &wsm.active_map);
		list_add(ws, &wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
	}
194 195 196 197
}

static void zstd_cleanup_workspace_manager(void)
{
198 199 200
	struct workspace *workspace;
	int i;

201
	spin_lock_bh(&wsm.lock);
202 203 204 205 206 207
	for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
		while (!list_empty(&wsm.idle_ws[i])) {
			workspace = container_of(wsm.idle_ws[i].next,
						 struct workspace, list);
			list_del(&workspace->list);
			list_del(&workspace->lru_list);
208
			zstd_free_workspace(&workspace->list);
209 210
		}
	}
211
	spin_unlock_bh(&wsm.lock);
212 213

	del_timer_sync(&wsm.timer);
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
}

/*
 * zstd_find_workspace - find workspace
 * @level: compression level
 *
 * This iterates over the set bits in the active_map beginning at the requested
 * compression level.  This lets us utilize already allocated workspaces before
 * allocating a new one.  If the workspace is of a larger size, it is used, but
 * the place in the lru_list and last_used times are not updated.  This is to
 * offer the opportunity to reclaim the workspace in favor of allocating an
 * appropriately sized one in the future.
 */
static struct list_head *zstd_find_workspace(unsigned int level)
{
	struct list_head *ws;
	struct workspace *workspace;
	int i = level - 1;

233
	spin_lock_bh(&wsm.lock);
234 235 236 237 238 239 240 241 242 243 244
	for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
		if (!list_empty(&wsm.idle_ws[i])) {
			ws = wsm.idle_ws[i].next;
			workspace = list_to_workspace(ws);
			list_del_init(ws);
			/* keep its place if it's a lower level using this */
			workspace->req_level = level;
			if (level == workspace->level)
				list_del(&workspace->lru_list);
			if (list_empty(&wsm.idle_ws[i]))
				clear_bit(i, &wsm.active_map);
245
			spin_unlock_bh(&wsm.lock);
246 247 248
			return ws;
		}
	}
249
	spin_unlock_bh(&wsm.lock);
250 251

	return NULL;
252 253
}

254 255 256 257 258 259 260 261 262
/*
 * zstd_get_workspace - zstd's get_workspace
 * @level: compression level
 *
 * If @level is 0, then any compression level can be used.  Therefore, we begin
 * scanning from 1.  We first scan through possible workspaces and then after
 * attempt to allocate a new workspace.  If we fail to allocate one due to
 * memory pressure, go to sleep waiting for the max level workspace to free up.
 */
263
static struct list_head *zstd_get_workspace(unsigned int level)
264
{
265 266
	struct list_head *ws;
	unsigned int nofs_flag;
267

268 269 270 271 272 273 274 275 276 277
	/* level == 0 means we can use any workspace */
	if (!level)
		level = 1;

again:
	ws = zstd_find_workspace(level);
	if (ws)
		return ws;

	nofs_flag = memalloc_nofs_save();
278
	ws = zstd_alloc_workspace(level);
279 280 281 282 283 284 285 286 287 288 289
	memalloc_nofs_restore(nofs_flag);

	if (IS_ERR(ws)) {
		DEFINE_WAIT(wait);

		prepare_to_wait(&wsm.wait, &wait, TASK_UNINTERRUPTIBLE);
		schedule();
		finish_wait(&wsm.wait, &wait);

		goto again;
	}
290 291

	return ws;
292 293
}

294 295 296 297 298 299 300 301 302 303
/*
 * zstd_put_workspace - zstd put_workspace
 * @ws: list_head for the workspace
 *
 * When putting back a workspace, we only need to update the LRU if we are of
 * the requested compression level.  Here is where we continue to protect the
 * max level workspace or update last_used accordingly.  If the reclaim timer
 * isn't set, it is also set here.  Only the max level workspace tries and wakes
 * up waiting workspaces.
 */
304 305
static void zstd_put_workspace(struct list_head *ws)
{
306 307
	struct workspace *workspace = list_to_workspace(ws);

308
	spin_lock_bh(&wsm.lock);
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327

	/* A node is only taken off the lru if we are the corresponding level */
	if (workspace->req_level == workspace->level) {
		/* Hide a max level workspace from reclaim */
		if (list_empty(&wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
			INIT_LIST_HEAD(&workspace->lru_list);
		} else {
			workspace->last_used = jiffies;
			list_add(&workspace->lru_list, &wsm.lru_list);
			if (!timer_pending(&wsm.timer))
				mod_timer(&wsm.timer,
					  jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
		}
	}

	set_bit(workspace->level - 1, &wsm.active_map);
	list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]);
	workspace->req_level = 0;

328
	spin_unlock_bh(&wsm.lock);
329 330 331

	if (workspace->level == ZSTD_BTRFS_MAX_LEVEL)
		cond_wake_up(&wsm.wait);
332 333
}

N
Nick Terrell 已提交
334 335 336 337 338 339 340 341 342
static void zstd_free_workspace(struct list_head *ws)
{
	struct workspace *workspace = list_entry(ws, struct workspace, list);

	kvfree(workspace->mem);
	kfree(workspace->buf);
	kfree(workspace);
}

343
static struct list_head *zstd_alloc_workspace(unsigned int level)
N
Nick Terrell 已提交
344 345 346 347 348 349 350
{
	struct workspace *workspace;

	workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
	if (!workspace)
		return ERR_PTR(-ENOMEM);

351
	workspace->size = zstd_ws_mem_sizes[level - 1];
352 353 354
	workspace->level = level;
	workspace->req_level = level;
	workspace->last_used = jiffies;
N
Nick Terrell 已提交
355 356 357 358 359 360
	workspace->mem = kvmalloc(workspace->size, GFP_KERNEL);
	workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
	if (!workspace->mem || !workspace->buf)
		goto fail;

	INIT_LIST_HEAD(&workspace->list);
361
	INIT_LIST_HEAD(&workspace->lru_list);
N
Nick Terrell 已提交
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387

	return &workspace->list;
fail:
	zstd_free_workspace(&workspace->list);
	return ERR_PTR(-ENOMEM);
}

static int zstd_compress_pages(struct list_head *ws,
		struct address_space *mapping,
		u64 start,
		struct page **pages,
		unsigned long *out_pages,
		unsigned long *total_in,
		unsigned long *total_out)
{
	struct workspace *workspace = list_entry(ws, struct workspace, list);
	ZSTD_CStream *stream;
	int ret = 0;
	int nr_pages = 0;
	struct page *in_page = NULL;  /* The current page to read */
	struct page *out_page = NULL; /* The current page to write to */
	unsigned long tot_in = 0;
	unsigned long tot_out = 0;
	unsigned long len = *total_out;
	const unsigned long nr_dest_pages = *out_pages;
	unsigned long max_out = nr_dest_pages * PAGE_SIZE;
388 389
	ZSTD_parameters params = zstd_get_btrfs_parameters(workspace->req_level,
							   len);
N
Nick Terrell 已提交
390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405

	*out_pages = 0;
	*total_out = 0;
	*total_in = 0;

	/* Initialize the stream */
	stream = ZSTD_initCStream(params, len, workspace->mem,
			workspace->size);
	if (!stream) {
		pr_warn("BTRFS: ZSTD_initCStream failed\n");
		ret = -EIO;
		goto out;
	}

	/* map in the first page of input data */
	in_page = find_get_page(mapping, start >> PAGE_SHIFT);
406 407 408
	workspace->in_buf.src = kmap(in_page);
	workspace->in_buf.pos = 0;
	workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
N
Nick Terrell 已提交
409 410 411 412 413 414 415 416 417


	/* Allocate and map in the output buffer */
	out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
	if (out_page == NULL) {
		ret = -ENOMEM;
		goto out;
	}
	pages[nr_pages++] = out_page;
418 419 420
	workspace->out_buf.dst = kmap(out_page);
	workspace->out_buf.pos = 0;
	workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
N
Nick Terrell 已提交
421 422 423 424

	while (1) {
		size_t ret2;

425 426
		ret2 = ZSTD_compressStream(stream, &workspace->out_buf,
				&workspace->in_buf);
N
Nick Terrell 已提交
427 428 429 430 431 432 433 434
		if (ZSTD_isError(ret2)) {
			pr_debug("BTRFS: ZSTD_compressStream returned %d\n",
					ZSTD_getErrorCode(ret2));
			ret = -EIO;
			goto out;
		}

		/* Check to see if we are making it bigger */
435 436 437
		if (tot_in + workspace->in_buf.pos > 8192 &&
				tot_in + workspace->in_buf.pos <
				tot_out + workspace->out_buf.pos) {
N
Nick Terrell 已提交
438 439 440 441 442
			ret = -E2BIG;
			goto out;
		}

		/* We've reached the end of our output range */
443 444
		if (workspace->out_buf.pos >= max_out) {
			tot_out += workspace->out_buf.pos;
N
Nick Terrell 已提交
445 446 447 448 449
			ret = -E2BIG;
			goto out;
		}

		/* Check if we need more output space */
450
		if (workspace->out_buf.pos == workspace->out_buf.size) {
N
Nick Terrell 已提交
451 452 453 454 455 456 457 458 459 460 461 462 463 464
			tot_out += PAGE_SIZE;
			max_out -= PAGE_SIZE;
			kunmap(out_page);
			if (nr_pages == nr_dest_pages) {
				out_page = NULL;
				ret = -E2BIG;
				goto out;
			}
			out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
			if (out_page == NULL) {
				ret = -ENOMEM;
				goto out;
			}
			pages[nr_pages++] = out_page;
465 466 467 468
			workspace->out_buf.dst = kmap(out_page);
			workspace->out_buf.pos = 0;
			workspace->out_buf.size = min_t(size_t, max_out,
							PAGE_SIZE);
N
Nick Terrell 已提交
469 470 471
		}

		/* We've reached the end of the input */
472 473
		if (workspace->in_buf.pos >= len) {
			tot_in += workspace->in_buf.pos;
N
Nick Terrell 已提交
474 475 476 477
			break;
		}

		/* Check if we need more input */
478
		if (workspace->in_buf.pos == workspace->in_buf.size) {
N
Nick Terrell 已提交
479 480 481 482 483 484 485
			tot_in += PAGE_SIZE;
			kunmap(in_page);
			put_page(in_page);

			start += PAGE_SIZE;
			len -= PAGE_SIZE;
			in_page = find_get_page(mapping, start >> PAGE_SHIFT);
486 487 488
			workspace->in_buf.src = kmap(in_page);
			workspace->in_buf.pos = 0;
			workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
N
Nick Terrell 已提交
489 490 491 492 493
		}
	}
	while (1) {
		size_t ret2;

494
		ret2 = ZSTD_endStream(stream, &workspace->out_buf);
N
Nick Terrell 已提交
495 496 497 498 499 500 501
		if (ZSTD_isError(ret2)) {
			pr_debug("BTRFS: ZSTD_endStream returned %d\n",
					ZSTD_getErrorCode(ret2));
			ret = -EIO;
			goto out;
		}
		if (ret2 == 0) {
502
			tot_out += workspace->out_buf.pos;
N
Nick Terrell 已提交
503 504
			break;
		}
505 506
		if (workspace->out_buf.pos >= max_out) {
			tot_out += workspace->out_buf.pos;
N
Nick Terrell 已提交
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524
			ret = -E2BIG;
			goto out;
		}

		tot_out += PAGE_SIZE;
		max_out -= PAGE_SIZE;
		kunmap(out_page);
		if (nr_pages == nr_dest_pages) {
			out_page = NULL;
			ret = -E2BIG;
			goto out;
		}
		out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
		if (out_page == NULL) {
			ret = -ENOMEM;
			goto out;
		}
		pages[nr_pages++] = out_page;
525 526 527
		workspace->out_buf.dst = kmap(out_page);
		workspace->out_buf.pos = 0;
		workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
N
Nick Terrell 已提交
528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571
	}

	if (tot_out >= tot_in) {
		ret = -E2BIG;
		goto out;
	}

	ret = 0;
	*total_in = tot_in;
	*total_out = tot_out;
out:
	*out_pages = nr_pages;
	/* Cleanup */
	if (in_page) {
		kunmap(in_page);
		put_page(in_page);
	}
	if (out_page)
		kunmap(out_page);
	return ret;
}

static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{
	struct workspace *workspace = list_entry(ws, struct workspace, list);
	struct page **pages_in = cb->compressed_pages;
	u64 disk_start = cb->start;
	struct bio *orig_bio = cb->orig_bio;
	size_t srclen = cb->compressed_len;
	ZSTD_DStream *stream;
	int ret = 0;
	unsigned long page_in_index = 0;
	unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
	unsigned long buf_start;
	unsigned long total_out = 0;

	stream = ZSTD_initDStream(
			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
	if (!stream) {
		pr_debug("BTRFS: ZSTD_initDStream failed\n");
		ret = -EIO;
		goto done;
	}

572 573 574
	workspace->in_buf.src = kmap(pages_in[page_in_index]);
	workspace->in_buf.pos = 0;
	workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
N
Nick Terrell 已提交
575

576 577 578
	workspace->out_buf.dst = workspace->buf;
	workspace->out_buf.pos = 0;
	workspace->out_buf.size = PAGE_SIZE;
N
Nick Terrell 已提交
579 580 581 582

	while (1) {
		size_t ret2;

583 584
		ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
				&workspace->in_buf);
N
Nick Terrell 已提交
585 586 587 588 589 590 591
		if (ZSTD_isError(ret2)) {
			pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
					ZSTD_getErrorCode(ret2));
			ret = -EIO;
			goto done;
		}
		buf_start = total_out;
592 593
		total_out += workspace->out_buf.pos;
		workspace->out_buf.pos = 0;
N
Nick Terrell 已提交
594

595 596
		ret = btrfs_decompress_buf2page(workspace->out_buf.dst,
				buf_start, total_out, disk_start, orig_bio);
N
Nick Terrell 已提交
597 598 599
		if (ret == 0)
			break;

600
		if (workspace->in_buf.pos >= srclen)
N
Nick Terrell 已提交
601 602 603 604 605 606
			break;

		/* Check if we've hit the end of a frame */
		if (ret2 == 0)
			break;

607
		if (workspace->in_buf.pos == workspace->in_buf.size) {
N
Nick Terrell 已提交
608 609
			kunmap(pages_in[page_in_index++]);
			if (page_in_index >= total_pages_in) {
610
				workspace->in_buf.src = NULL;
N
Nick Terrell 已提交
611 612 613 614
				ret = -EIO;
				goto done;
			}
			srclen -= PAGE_SIZE;
615 616 617
			workspace->in_buf.src = kmap(pages_in[page_in_index]);
			workspace->in_buf.pos = 0;
			workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
N
Nick Terrell 已提交
618 619 620 621 622
		}
	}
	ret = 0;
	zero_fill_bio(orig_bio);
done:
623
	if (workspace->in_buf.src)
N
Nick Terrell 已提交
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650
		kunmap(pages_in[page_in_index]);
	return ret;
}

static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
		struct page *dest_page,
		unsigned long start_byte,
		size_t srclen, size_t destlen)
{
	struct workspace *workspace = list_entry(ws, struct workspace, list);
	ZSTD_DStream *stream;
	int ret = 0;
	size_t ret2;
	unsigned long total_out = 0;
	unsigned long pg_offset = 0;
	char *kaddr;

	stream = ZSTD_initDStream(
			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
	if (!stream) {
		pr_warn("BTRFS: ZSTD_initDStream failed\n");
		ret = -EIO;
		goto finish;
	}

	destlen = min_t(size_t, destlen, PAGE_SIZE);

651 652 653
	workspace->in_buf.src = data_in;
	workspace->in_buf.pos = 0;
	workspace->in_buf.size = srclen;
N
Nick Terrell 已提交
654

655 656 657
	workspace->out_buf.dst = workspace->buf;
	workspace->out_buf.pos = 0;
	workspace->out_buf.size = PAGE_SIZE;
N
Nick Terrell 已提交
658 659

	ret2 = 1;
660 661
	while (pg_offset < destlen
	       && workspace->in_buf.pos < workspace->in_buf.size) {
N
Nick Terrell 已提交
662 663 664 665 666 667 668 669 670 671
		unsigned long buf_start;
		unsigned long buf_offset;
		unsigned long bytes;

		/* Check if the frame is over and we still need more input */
		if (ret2 == 0) {
			pr_debug("BTRFS: ZSTD_decompressStream ended early\n");
			ret = -EIO;
			goto finish;
		}
672 673
		ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
				&workspace->in_buf);
N
Nick Terrell 已提交
674 675 676 677 678 679 680 681
		if (ZSTD_isError(ret2)) {
			pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
					ZSTD_getErrorCode(ret2));
			ret = -EIO;
			goto finish;
		}

		buf_start = total_out;
682 683
		total_out += workspace->out_buf.pos;
		workspace->out_buf.pos = 0;
N
Nick Terrell 已提交
684 685 686 687 688 689 690 691 692 693

		if (total_out <= start_byte)
			continue;

		if (total_out > start_byte && buf_start < start_byte)
			buf_offset = start_byte - buf_start;
		else
			buf_offset = 0;

		bytes = min_t(unsigned long, destlen - pg_offset,
694
				workspace->out_buf.size - buf_offset);
N
Nick Terrell 已提交
695 696

		kaddr = kmap_atomic(dest_page);
697 698
		memcpy(kaddr + pg_offset, workspace->out_buf.dst + buf_offset,
				bytes);
N
Nick Terrell 已提交
699 700 701 702 703 704 705 706 707 708 709 710 711 712
		kunmap_atomic(kaddr);

		pg_offset += bytes;
	}
	ret = 0;
finish:
	if (pg_offset < destlen) {
		kaddr = kmap_atomic(dest_page);
		memset(kaddr + pg_offset, 0, destlen - pg_offset);
		kunmap_atomic(kaddr);
	}
	return ret;
}

713
static unsigned int zstd_set_level(unsigned int level)
714
{
715 716 717 718
	if (!level)
		return ZSTD_BTRFS_DEFAULT_LEVEL;

	return min_t(unsigned int, level, ZSTD_BTRFS_MAX_LEVEL);
719 720
}

N
Nick Terrell 已提交
721
const struct btrfs_compress_op btrfs_zstd_compress = {
722 723 724 725
	.init_workspace_manager = zstd_init_workspace_manager,
	.cleanup_workspace_manager = zstd_cleanup_workspace_manager,
	.get_workspace = zstd_get_workspace,
	.put_workspace = zstd_put_workspace,
N
Nick Terrell 已提交
726 727 728 729 730
	.alloc_workspace = zstd_alloc_workspace,
	.free_workspace = zstd_free_workspace,
	.compress_pages = zstd_compress_pages,
	.decompress_bio = zstd_decompress_bio,
	.decompress = zstd_decompress,
731
	.set_level = zstd_set_level,
732 733
	.max_level	= ZSTD_BTRFS_MAX_LEVEL,
	.default_level	= ZSTD_BTRFS_DEFAULT_LEVEL,
N
Nick Terrell 已提交
734
};