i915_gem_shrinker.c 16.9 KB
Newer Older
1
/*
2
 * SPDX-License-Identifier: MIT
3
 *
4
 * Copyright © 2008-2015 Intel Corporation
5 6 7
 */

#include <linux/oom.h>
8
#include <linux/sched/mm.h>
9 10 11 12 13
#include <linux/shmem_fs.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/pci.h>
#include <linux/dma-buf.h>
14
#include <linux/vmalloc.h>
15 16 17 18
#include <drm/i915_drm.h>

#include "i915_trace.h"

19 20 21
static bool shrinker_lock(struct drm_i915_private *i915,
			  unsigned int flags,
			  bool *unlock)
22
{
23 24 25
	struct mutex *m = &i915->drm.struct_mutex;

	switch (mutex_trylock_recursive(m)) {
26
	case MUTEX_TRYLOCK_RECURSIVE:
27
		*unlock = false;
28
		return true;
29 30

	case MUTEX_TRYLOCK_FAILED:
31
		*unlock = false;
32 33
		if (flags & I915_SHRINK_ACTIVE &&
		    mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0)
34
			*unlock = true;
35
		return *unlock;
36

37 38 39
	case MUTEX_TRYLOCK_SUCCESS:
		*unlock = true;
		return true;
40 41
	}

42
	BUG();
43 44
}

45
static void shrinker_unlock(struct drm_i915_private *i915, bool unlock)
46 47 48 49
{
	if (!unlock)
		return;

50
	mutex_unlock(&i915->drm.struct_mutex);
51 52
}

53 54 55 56 57 58 59
static bool swap_available(void)
{
	return get_nr_swap_pages() > 0;
}

static bool can_release_pages(struct drm_i915_gem_object *obj)
{
60 61
	/* Consider only shrinkable ojects. */
	if (!i915_gem_object_is_shrinkable(obj))
62 63
		return false;

64 65 66 67 68 69 70 71
	/* Only report true if by unbinding the object and putting its pages
	 * we can actually make forward progress towards freeing physical
	 * pages.
	 *
	 * If the pages are pinned for any other reason than being bound
	 * to the GPU, simply unbinding from the GPU is not going to succeed
	 * in releasing our pin count on the pages themselves.
	 */
72
	if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count))
73 74
		return false;

75 76 77 78 79 80
	/* If any vma are "permanently" pinned, it will prevent us from
	 * reclaiming the obj->mm.pages. We only allow scanout objects to claim
	 * a permanent pin, along with a few others like the context objects.
	 * To simplify the scan, and to avoid walking the list of vma under the
	 * object, we just check the count of its permanently pinned.
	 */
81
	if (READ_ONCE(obj->pin_global))
82 83 84 85 86 87
		return false;

	/* We can only return physical pages to the system if we can either
	 * discard the contents (because the user has marked them as being
	 * purgeable) or if we can move their contents out to swap.
	 */
C
Chris Wilson 已提交
88
	return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
89 90
}

91 92
static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
			      unsigned long shrink)
93
{
94 95 96 97 98 99 100
	unsigned long flags;

	flags = 0;
	if (shrink & I915_SHRINK_ACTIVE)
		flags = I915_GEM_OBJECT_UNBIND_ACTIVE;

	if (i915_gem_object_unbind(obj, flags) == 0)
101
		__i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
102

103
	return !i915_gem_object_has_pages(obj);
104 105
}

106 107
static void try_to_writeback(struct drm_i915_gem_object *obj,
			     unsigned int flags)
108 109 110
{
	switch (obj->mm.madv) {
	case I915_MADV_DONTNEED:
111
		i915_gem_object_truncate(obj);
112 113 114 115
	case __I915_MADV_PURGED:
		return;
	}

116 117
	if (flags & I915_SHRINK_WRITEBACK)
		i915_gem_object_writeback(obj);
118 119
}

120 121
/**
 * i915_gem_shrink - Shrink buffer object caches
122
 * @i915: i915 device
123
 * @target: amount of memory to make available, in pages
124
 * @nr_scanned: optional output for number of pages scanned (incremental)
125
 * @shrink: control flags for selecting cache types
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
 *
 * This function is the main interface to the shrinker. It will try to release
 * up to @target pages of main memory backing storage from buffer objects.
 * Selection of the specific caches can be done with @flags. This is e.g. useful
 * when purgeable objects should be removed from caches preferentially.
 *
 * Note that it's not guaranteed that released amount is actually available as
 * free system memory - the pages might still be in-used to due to other reasons
 * (like cpu mmaps) or the mm core has reused them before we could grab them.
 * Therefore code that needs to explicitly shrink buffer objects caches (e.g. to
 * avoid deadlocks in memory reclaim) must fall back to i915_gem_shrink_all().
 *
 * Also note that any kind of pinning (both per-vma address space pins and
 * backing storage pins at the buffer object level) result in the shrinker code
 * having to skip the object.
 *
 * Returns:
 * The number of pages of backing storage actually released.
 */
145
unsigned long
146
i915_gem_shrink(struct drm_i915_private *i915,
147 148
		unsigned long target,
		unsigned long *nr_scanned,
149
		unsigned int shrink)
150 151 152 153 154
{
	const struct {
		struct list_head *list;
		unsigned int bit;
	} phases[] = {
155
		{ &i915->mm.purge_list, ~0u },
156 157 158 159
		{
			&i915->mm.shrink_list,
			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND
		},
160 161
		{ NULL, 0 },
	}, *phase;
162
	intel_wakeref_t wakeref = 0;
163
	unsigned long count = 0;
164
	unsigned long scanned = 0;
165 166
	bool unlock;

167
	if (!shrinker_lock(i915, shrink, &unlock))
168
		return 0;
169

170
	/*
171 172 173 174 175 176
	 * When shrinking the active list, we should also consider active
	 * contexts. Active contexts are pinned until they are retired, and
	 * so can not be simply unbound to retire and unpin their pages. To
	 * shrink the contexts, we must wait until the gpu is idle and
	 * completed its switch to the kernel context. In short, we do
	 * not have a good mechanism for idling a specific context.
177 178
	 */

179
	trace_i915_gem_shrink(i915, target, shrink);
180

181 182 183 184 185
	/*
	 * Unbinding of objects will require HW access; Let us not wake the
	 * device just to recover a little memory. If absolutely necessary,
	 * we will force the wake during oom-notifier.
	 */
186
	if (shrink & I915_SHRINK_BOUND) {
187
		wakeref = intel_runtime_pm_get_if_in_use(&i915->runtime_pm);
188
		if (!wakeref)
189
			shrink &= ~I915_SHRINK_BOUND;
190
	}
191

192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
	/*
	 * As we may completely rewrite the (un)bound list whilst unbinding
	 * (due to retiring requests) we have to strictly process only
	 * one element of the list at the time, and recheck the list
	 * on every iteration.
	 *
	 * In particular, we must hold a reference whilst removing the
	 * object as we may end up waiting for and/or retiring the objects.
	 * This might release the final reference (held by the active list)
	 * and result in the object being freed from under us. This is
	 * similar to the precautions the eviction code must take whilst
	 * removing objects.
	 *
	 * Also note that although these lists do not hold a reference to
	 * the object we can safely grab one here: The final object
	 * unreferencing and the bound_list are both protected by the
	 * dev->struct_mutex and so we won't ever be able to observe an
	 * object on the bound_list with a reference count equals 0.
	 */
	for (phase = phases; phase->list; phase++) {
		struct list_head still_in_list;
213
		struct drm_i915_gem_object *obj;
214
		unsigned long flags;
215

216
		if ((shrink & phase->bit) == 0)
217 218 219
			continue;

		INIT_LIST_HEAD(&still_in_list);
220 221 222 223 224 225 226 227

		/*
		 * We serialize our access to unreferenced objects through
		 * the use of the struct_mutex. While the objects are not
		 * yet freed (due to RCU then a workqueue) we still want
		 * to be able to shrink their pages, so they remain on
		 * the unbound/bound list until actually freed.
		 */
228
		spin_lock_irqsave(&i915->mm.obj_lock, flags);
229 230 231
		while (count < target &&
		       (obj = list_first_entry_or_null(phase->list,
						       typeof(*obj),
232 233
						       mm.link))) {
			list_move_tail(&obj->mm.link, &still_in_list);
234

235
			if (shrink & I915_SHRINK_VMAPS &&
C
Chris Wilson 已提交
236
			    !is_vmalloc_addr(obj->mm.mapping))
237 238
				continue;

239
			if (!(shrink & I915_SHRINK_ACTIVE) &&
240
			    i915_gem_object_is_framebuffer(obj))
241 242
				continue;

243
			if (!(shrink & I915_SHRINK_BOUND) &&
244
			    atomic_read(&obj->bind_count))
245 246
				continue;

247 248 249
			if (!can_release_pages(obj))
				continue;

250 251 252
			if (!kref_get_unless_zero(&obj->base.refcount))
				continue;

253
			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
254

255
			if (unsafe_drop_pages(obj, shrink)) {
256 257
				/* May arrive from get_pages on another bo */
				mutex_lock_nested(&obj->mm.lock,
258
						  I915_MM_SHRINKER);
259
				if (!i915_gem_object_has_pages(obj)) {
260
					try_to_writeback(obj, shrink);
261 262 263 264
					count += obj->base.size >> PAGE_SHIFT;
				}
				mutex_unlock(&obj->mm.lock);
			}
265

266
			scanned += obj->base.size >> PAGE_SHIFT;
267
			i915_gem_object_put(obj);
268

269
			spin_lock_irqsave(&i915->mm.obj_lock, flags);
270
		}
271
		list_splice_tail(&still_in_list, phase->list);
272
		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
273 274
	}

275
	if (shrink & I915_SHRINK_BOUND)
276
		intel_runtime_pm_put(&i915->runtime_pm, wakeref);
277

278
	shrinker_unlock(i915, unlock);
279

280 281
	if (nr_scanned)
		*nr_scanned += scanned;
282 283 284
	return count;
}

285
/**
286
 * i915_gem_shrink_all - Shrink buffer object caches completely
287
 * @i915: i915 device
288 289 290 291 292 293 294 295 296 297 298
 *
 * This is a simple wraper around i915_gem_shrink() to aggressively shrink all
 * caches completely. It also first waits for and retires all outstanding
 * requests to also be able to release backing storage for active objects.
 *
 * This should only be used in code to intentionally quiescent the gpu or as a
 * last-ditch effort when memory seems to have run out.
 *
 * Returns:
 * The number of pages of backing storage actually released.
 */
299
unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
300
{
301
	intel_wakeref_t wakeref;
302
	unsigned long freed = 0;
303

304
	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
305 306 307 308 309
		freed = i915_gem_shrink(i915, -1UL, NULL,
					I915_SHRINK_BOUND |
					I915_SHRINK_UNBOUND |
					I915_SHRINK_ACTIVE);
	}
310

311
	return freed;
312 313 314 315 316
}

static unsigned long
i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
{
317
	struct drm_i915_private *i915 =
318
		container_of(shrinker, struct drm_i915_private, mm.shrinker);
319 320
	unsigned long num_objects;
	unsigned long count;
321

322 323
	count = READ_ONCE(i915->mm.shrink_memory) >> PAGE_SHIFT;
	num_objects = READ_ONCE(i915->mm.shrink_count);
324

325 326
	/*
	 * Update our preferred vmscan batch size for the next pass.
327 328 329 330 331 332 333 334 335 336 337 338
	 * Our rough guess for an effective batch size is roughly 2
	 * available GEM objects worth of pages. That is we don't want
	 * the shrinker to fire, until it is worth the cost of freeing an
	 * entire GEM object.
	 */
	if (num_objects) {
		unsigned long avg = 2 * count / num_objects;

		i915->mm.shrinker.batch =
			max((i915->mm.shrinker.batch + avg) >> 1,
			    128ul /* default SHRINK_BATCH */);
	}
339 340 341 342 343 344 345

	return count;
}

static unsigned long
i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
{
346
	struct drm_i915_private *i915 =
347 348 349 350
		container_of(shrinker, struct drm_i915_private, mm.shrinker);
	unsigned long freed;
	bool unlock;

351 352
	sc->nr_scanned = 0;

353
	if (!shrinker_lock(i915, 0, &unlock))
354 355
		return SHRINK_STOP;

356
	freed = i915_gem_shrink(i915,
357
				sc->nr_to_scan,
358
				&sc->nr_scanned,
359 360
				I915_SHRINK_BOUND |
				I915_SHRINK_UNBOUND |
361
				I915_SHRINK_WRITEBACK);
362
	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
363 364
		intel_wakeref_t wakeref;

365
		with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
366 367 368 369 370
			freed += i915_gem_shrink(i915,
						 sc->nr_to_scan - sc->nr_scanned,
						 &sc->nr_scanned,
						 I915_SHRINK_ACTIVE |
						 I915_SHRINK_BOUND |
371 372
						 I915_SHRINK_UNBOUND |
						 I915_SHRINK_WRITEBACK);
373
		}
374
	}
375

376
	shrinker_unlock(i915, unlock);
377

378
	return sc->nr_scanned ? freed : SHRINK_STOP;
379 380 381 382 383
}

static int
i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
{
384
	struct drm_i915_private *i915 =
385 386
		container_of(nb, struct drm_i915_private, mm.oom_notifier);
	struct drm_i915_gem_object *obj;
387
	unsigned long unevictable, available, freed_pages;
388
	intel_wakeref_t wakeref;
389
	unsigned long flags;
390

391
	freed_pages = 0;
392
	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
393 394
		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
					       I915_SHRINK_BOUND |
395 396
					       I915_SHRINK_UNBOUND |
					       I915_SHRINK_WRITEBACK);
397 398 399 400 401

	/* Because we may be allocating inside our own driver, we cannot
	 * assert that there are no objects with pinned pages that are not
	 * being pointed to by hardware.
	 */
402
	available = unevictable = 0;
403
	spin_lock_irqsave(&i915->mm.obj_lock, flags);
404
	list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
405 406
		if (!can_release_pages(obj))
			unevictable += obj->base.size >> PAGE_SHIFT;
407
		else
408
			available += obj->base.size >> PAGE_SHIFT;
409
	}
410
	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
411

412
	if (freed_pages || available)
413
		pr_info("Purging GPU memory, %lu pages freed, "
414 415
			"%lu pages still pinned, %lu pages left available.\n",
			freed_pages, unevictable, available);
416 417 418 419 420

	*(unsigned long *)ptr += freed_pages;
	return NOTIFY_DONE;
}

421 422 423
static int
i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
{
424
	struct drm_i915_private *i915 =
425
		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
426 427
	struct i915_vma *vma, *next;
	unsigned long freed_pages = 0;
428
	intel_wakeref_t wakeref;
429
	bool unlock;
430

431
	if (!shrinker_lock(i915, 0, &unlock))
432 433
		return NOTIFY_DONE;

434
	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
435 436 437 438
		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
					       I915_SHRINK_BOUND |
					       I915_SHRINK_UNBOUND |
					       I915_SHRINK_VMAPS);
439 440

	/* We also want to clear any cached iomaps as they wrap vmap */
441
	mutex_lock(&i915->ggtt.vm.mutex);
442
	list_for_each_entry_safe(vma, next,
443
				 &i915->ggtt.vm.bound_list, vm_link) {
444
		unsigned long count = vma->node.size >> PAGE_SHIFT;
445 446 447 448

		if (!vma->iomap || i915_vma_is_active(vma))
			continue;

449
		mutex_unlock(&i915->ggtt.vm.mutex);
450
		if (i915_vma_unbind(vma) == 0)
451
			freed_pages += count;
452
		mutex_lock(&i915->ggtt.vm.mutex);
453
	}
454
	mutex_unlock(&i915->ggtt.vm.mutex);
455

456
	shrinker_unlock(i915, unlock);
457 458 459 460 461

	*(unsigned long *)ptr += freed_pages;
	return NOTIFY_DONE;
}

462
/**
463
 * i915_gem_shrinker_register - Register the i915 shrinker
464
 * @i915: i915 device
465 466 467
 *
 * This function registers and sets up the i915 shrinker and OOM handler.
 */
468
void i915_gem_shrinker_register(struct drm_i915_private *i915)
469
{
470 471 472 473 474
	i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
	i915->mm.shrinker.count_objects = i915_gem_shrinker_count;
	i915->mm.shrinker.seeks = DEFAULT_SEEKS;
	i915->mm.shrinker.batch = 4096;
	WARN_ON(register_shrinker(&i915->mm.shrinker));
475

476 477
	i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
	WARN_ON(register_oom_notifier(&i915->mm.oom_notifier));
478

479 480
	i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap;
	WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier));
481 482 483
}

/**
484
 * i915_gem_shrinker_unregister - Unregisters the i915 shrinker
485
 * @i915: i915 device
486 487 488
 *
 * This function unregisters the i915 shrinker and OOM handler.
 */
489
void i915_gem_shrinker_unregister(struct drm_i915_private *i915)
490
{
491 492 493
	WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier));
	WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier));
	unregister_shrinker(&i915->mm.shrinker);
494
}
495

496 497
void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
				    struct mutex *mutex)
498
{
499 500
	bool unlock = false;

501 502 503
	if (!IS_ENABLED(CONFIG_LOCKDEP))
		return;

504 505 506 507 508 509
	if (!lockdep_is_held_type(&i915->drm.struct_mutex, -1)) {
		mutex_acquire(&i915->drm.struct_mutex.dep_map,
			      I915_MM_NORMAL, 0, _RET_IP_);
		unlock = true;
	}

510
	fs_reclaim_acquire(GFP_KERNEL);
511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527

	/*
	 * As we invariably rely on the struct_mutex within the shrinker,
	 * but have a complicated recursion dance, taint all the mutexes used
	 * within the shrinker with the struct_mutex. For completeness, we
	 * taint with all subclass of struct_mutex, even though we should
	 * only need tainting by I915_MM_NORMAL to catch possible ABBA
	 * deadlocks from using struct_mutex inside @mutex.
	 */
	mutex_acquire(&i915->drm.struct_mutex.dep_map,
		      I915_MM_SHRINKER, 0, _RET_IP_);

	mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
	mutex_release(&mutex->dep_map, 0, _RET_IP_);

	mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);

528
	fs_reclaim_release(GFP_KERNEL);
529 530 531

	if (unlock)
		mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
532
}
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590

#define obj_to_i915(obj__) to_i915((obj__)->base.dev)

void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
{
	/*
	 * We can only be called while the pages are pinned or when
	 * the pages are released. If pinned, we should only be called
	 * from a single caller under controlled conditions; and on release
	 * only one caller may release us. Neither the two may cross.
	 */
	if (!list_empty(&obj->mm.link)) { /* pinned by caller */
		struct drm_i915_private *i915 = obj_to_i915(obj);
		unsigned long flags;

		spin_lock_irqsave(&i915->mm.obj_lock, flags);
		GEM_BUG_ON(list_empty(&obj->mm.link));

		list_del_init(&obj->mm.link);
		i915->mm.shrink_count--;
		i915->mm.shrink_memory -= obj->base.size;

		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
	}
}

static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
					      struct list_head *head)
{
	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
	GEM_BUG_ON(!list_empty(&obj->mm.link));

	if (i915_gem_object_is_shrinkable(obj)) {
		struct drm_i915_private *i915 = obj_to_i915(obj);
		unsigned long flags;

		spin_lock_irqsave(&i915->mm.obj_lock, flags);
		GEM_BUG_ON(!kref_read(&obj->base.refcount));

		list_add_tail(&obj->mm.link, head);
		i915->mm.shrink_count++;
		i915->mm.shrink_memory += obj->base.size;

		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
	}
}

void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
{
	__i915_gem_object_make_shrinkable(obj,
					  &obj_to_i915(obj)->mm.shrink_list);
}

void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
{
	__i915_gem_object_make_shrinkable(obj,
					  &obj_to_i915(obj)->mm.purge_list);
}