prio_tree.c 6.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * mm/prio_tree.c - priority search tree for mapping->i_mmap
 *
 * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu>
 *
 * This file is released under the GPL v2.
 *
 * Based on the radix priority search tree proposed by Edward M. McCreight
 * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
 *
 * 02Feb2004	Initial version
 */

#include <linux/mm.h>
#include <linux/prio_tree.h>
16
#include <linux/prefetch.h>
L
Linus Torvalds 已提交
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37

/*
 * See lib/prio_tree.c for details on the general radix priority search tree
 * code.
 */

/*
 * The following #defines are mirrored from lib/prio_tree.c. They're only used
 * for debugging, and should be removed (along with the debugging code using
 * them) when switching also VMAs to the regular prio_tree code.
 */

#define RADIX_INDEX(vma)  ((vma)->vm_pgoff)
#define VMA_SIZE(vma)	  (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
/* avoid overflow */
#define HEAP_INDEX(vma)   ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))

/*
 * Radix priority search tree for address_space->i_mmap
 *
 * For each vma that map a unique set of file pages i.e., unique [radix_index,
S
Simon Arlott 已提交
38
 * heap_index] value, we have a corresponding priority search tree node. If
L
Linus Torvalds 已提交
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
 * multiple vmas have identical [radix_index, heap_index] value, then one of
 * them is used as a tree node and others are stored in a vm_set list. The tree
 * node points to the first vma (head) of the list using vm_set.head.
 *
 * prio_tree_root
 *      |
 *      A       vm_set.head
 *     / \      /
 *    L   R -> H-I-J-K-M-N-O-P-Q-S
 *    ^   ^    <-- vm_set.list -->
 *  tree nodes
 *
 * We need some way to identify whether a vma is a tree node, head of a vm_set
 * list, or just a member of a vm_set list. We cannot use vm_flags to store
 * such information. The reason is, in the above figure, it is possible that
 * vm_flags' of R and H are covered by the different mmap_sems. When R is
 * removed under R->mmap_sem, H replaces R as a tree node. Since we do not hold
 * H->mmap_sem, we cannot use H->vm_flags for marking that H is a tree node now.
 * That's why some trick involving shared.vm_set.parent is used for identifying
 * tree nodes and list head nodes.
 *
 * vma radix priority search tree node rules:
 *
 * vma->shared.vm_set.parent != NULL    ==> a tree node
 *      vma->shared.vm_set.head != NULL ==> list of others mapping same range
 *      vma->shared.vm_set.head == NULL ==> no others map the same range
 *
 * vma->shared.vm_set.parent == NULL
 * 	vma->shared.vm_set.head != NULL ==> list head of vmas mapping same range
 * 	vma->shared.vm_set.head == NULL ==> a list node
 */

/*
 * Add a new vma known to map the same set of pages as the old vma:
 * useful for fork's dup_mmap as well as vma_prio_tree_insert below.
 * Note that it just happens to work correctly on i_mmap_nonlinear too.
 */
void vma_prio_tree_add(struct vm_area_struct *vma, struct vm_area_struct *old)
{
	/* Leave these BUG_ONs till prio_tree patch stabilizes */
	BUG_ON(RADIX_INDEX(vma) != RADIX_INDEX(old));
	BUG_ON(HEAP_INDEX(vma) != HEAP_INDEX(old));

	vma->shared.vm_set.head = NULL;
	vma->shared.vm_set.parent = NULL;

	if (!old->shared.vm_set.parent)
		list_add(&vma->shared.vm_set.list,
				&old->shared.vm_set.list);
	else if (old->shared.vm_set.head)
		list_add_tail(&vma->shared.vm_set.list,
				&old->shared.vm_set.head->shared.vm_set.list);
	else {
		INIT_LIST_HEAD(&vma->shared.vm_set.list);
		vma->shared.vm_set.head = old;
		old->shared.vm_set.head = vma;
	}
}

void vma_prio_tree_insert(struct vm_area_struct *vma,
			  struct prio_tree_root *root)
{
	struct prio_tree_node *ptr;
	struct vm_area_struct *old;

	vma->shared.vm_set.head = NULL;

	ptr = raw_prio_tree_insert(root, &vma->shared.prio_tree_node);
	if (ptr != (struct prio_tree_node *) &vma->shared.prio_tree_node) {
		old = prio_tree_entry(ptr, struct vm_area_struct,
					shared.prio_tree_node);
		vma_prio_tree_add(vma, old);
	}
}

void vma_prio_tree_remove(struct vm_area_struct *vma,
			  struct prio_tree_root *root)
{
	struct vm_area_struct *node, *head, *new_head;

	if (!vma->shared.vm_set.head) {
		if (!vma->shared.vm_set.parent)
			list_del_init(&vma->shared.vm_set.list);
		else
			raw_prio_tree_remove(root, &vma->shared.prio_tree_node);
	} else {
		/* Leave this BUG_ON till prio_tree patch stabilizes */
		BUG_ON(vma->shared.vm_set.head->shared.vm_set.head != vma);
		if (vma->shared.vm_set.parent) {
			head = vma->shared.vm_set.head;
			if (!list_empty(&head->shared.vm_set.list)) {
				new_head = list_entry(
					head->shared.vm_set.list.next,
					struct vm_area_struct,
					shared.vm_set.list);
				list_del_init(&head->shared.vm_set.list);
			} else
				new_head = NULL;

			raw_prio_tree_replace(root, &vma->shared.prio_tree_node,
					&head->shared.prio_tree_node);
			head->shared.vm_set.head = new_head;
			if (new_head)
				new_head->shared.vm_set.head = head;

		} else {
			node = vma->shared.vm_set.head;
			if (!list_empty(&vma->shared.vm_set.list)) {
				new_head = list_entry(
					vma->shared.vm_set.list.next,
					struct vm_area_struct,
					shared.vm_set.list);
				list_del_init(&vma->shared.vm_set.list);
				node->shared.vm_set.head = new_head;
				new_head->shared.vm_set.head = node;
			} else
				node->shared.vm_set.head = NULL;
		}
	}
}

/*
 * Helper function to enumerate vmas that map a given file page or a set of
 * contiguous file pages. The function returns vmas that at least map a single
 * page in the given range of contiguous file pages.
 */
struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
					struct prio_tree_iter *iter)
{
	struct prio_tree_node *ptr;
	struct vm_area_struct *next;

	if (!vma) {
		/*
		 * First call is with NULL vma
		 */
		ptr = prio_tree_next(iter);
		if (ptr) {
			next = prio_tree_entry(ptr, struct vm_area_struct,
						shared.prio_tree_node);
			prefetch(next->shared.vm_set.head);
			return next;
		} else
			return NULL;
	}

	if (vma->shared.vm_set.parent) {
		if (vma->shared.vm_set.head) {
			next = vma->shared.vm_set.head;
			prefetch(next->shared.vm_set.list.next);
			return next;
		}
	} else {
		next = list_entry(vma->shared.vm_set.list.next,
				struct vm_area_struct, shared.vm_set.list);
		if (!next->shared.vm_set.head) {
			prefetch(next->shared.vm_set.list.next);
			return next;
		}
	}

	ptr = prio_tree_next(iter);
	if (ptr) {
		next = prio_tree_entry(ptr, struct vm_area_struct,
					shared.prio_tree_node);
		prefetch(next->shared.vm_set.head);
		return next;
	} else
		return NULL;
}