sched_cpupri.c 5.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
/*
 *  kernel/sched_cpupri.c
 *
 *  CPU priority management
 *
 *  Copyright (C) 2007-2008 Novell
 *
 *  Author: Gregory Haskins <ghaskins@novell.com>
 *
 *  This code tracks the priority of each CPU so that global migration
 *  decisions are easy to calculate.  Each CPU can be in a state as follows:
 *
 *                 (INVALID), IDLE, NORMAL, RT1, ... RT99
 *
 *  going from the lowest priority to the highest.  CPUs in the INVALID state
 *  are not eligible for routing.  The system maintains this state with
 *  a 2 dimensional bitmap (the first for priority class, the second for cpus
 *  in that class).  Therefore a typical application without affinity
 *  restrictions can find a suitable CPU with O(1) complexity (e.g. two bit
 *  searches).  For tasks with affinity restrictions, the algorithm has a
 *  worst case complexity of O(min(102, nr_domcpus)), though the scenario that
 *  yields the worst case search is fairly contrived.
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License
 *  as published by the Free Software Foundation; version 2
 *  of the License.
 */

#include "sched_cpupri.h"

/* Convert between a 140 based task->prio, and our 102 based cpupri */
static int convert_prio(int prio)
{
	int cpupri;

	if (prio == CPUPRI_INVALID)
		cpupri = CPUPRI_INVALID;
	else if (prio == MAX_PRIO)
		cpupri = CPUPRI_IDLE;
	else if (prio >= MAX_RT_PRIO)
		cpupri = CPUPRI_NORMAL;
	else
		cpupri = MAX_RT_PRIO - prio + 1;

	return cpupri;
}

#define for_each_cpupri_active(array, idx)                    \
  for (idx = find_first_bit(array, CPUPRI_NR_PRIORITIES);     \
       idx < CPUPRI_NR_PRIORITIES;                            \
       idx = find_next_bit(array, CPUPRI_NR_PRIORITIES, idx+1))

/**
 * cpupri_find - find the best (lowest-pri) CPU in the system
 * @cp: The cpupri context
 * @p: The task
58
 * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
59 60 61 62 63 64 65 66 67 68 69
 *
 * Note: This function returns the recommended CPUs as calculated during the
 * current invokation.  By the time the call returns, the CPUs may have in
 * fact changed priorities any number of times.  While not ideal, it is not
 * an issue of correctness since the normal rebalancer logic will correct
 * any discrepancies created by racing against the uncertainty of the current
 * priority configuration.
 *
 * Returns: (int)bool - CPUs were found
 */
int cpupri_find(struct cpupri *cp, struct task_struct *p,
70
		struct cpumask *lowest_mask)
71 72 73 74 75 76 77 78 79 80
{
	int                  idx      = 0;
	int                  task_pri = convert_prio(p->prio);

	for_each_cpupri_active(cp->pri_active, idx) {
		struct cpupri_vec *vec  = &cp->pri_to_cpu[idx];

		if (idx >= task_pri)
			break;

81
		if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
82 83
			continue;

84
		if (lowest_mask) {
85
			cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
86 87 88 89 90 91 92 93 94 95 96 97 98

			/*
			 * We have to ensure that we have at least one bit
			 * still set in the array, since the map could have
			 * been concurrently emptied between the first and
			 * second reads of vec->mask.  If we hit this
			 * condition, simply act as though we never hit this
			 * priority level and continue on.
			 */
			if (cpumask_any(lowest_mask) >= nr_cpu_ids)
				continue;
		}

99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
		return 1;
	}

	return 0;
}

/**
 * cpupri_set - update the cpu priority setting
 * @cp: The cpupri context
 * @cpu: The target cpu
 * @pri: The priority (INVALID-RT99) to assign to this CPU
 *
 * Note: Assumes cpu_rq(cpu)->lock is locked
 *
 * Returns: (void)
 */
void cpupri_set(struct cpupri *cp, int cpu, int newpri)
{
	int                 *currpri = &cp->cpu_to_pri[cpu];
	int                  oldpri  = *currpri;
	unsigned long        flags;

	newpri = convert_prio(newpri);

	BUG_ON(newpri >= CPUPRI_NR_PRIORITIES);

	if (newpri == oldpri)
		return;

	/*
	 * If the cpu was currently mapped to a different value, we
	 * first need to unmap the old value
	 */
	if (likely(oldpri != CPUPRI_INVALID)) {
		struct cpupri_vec *vec  = &cp->pri_to_cpu[oldpri];

		spin_lock_irqsave(&vec->lock, flags);

		vec->count--;
		if (!vec->count)
			clear_bit(oldpri, cp->pri_active);
140
		cpumask_clear_cpu(cpu, vec->mask);
141 142 143 144 145 146 147 148 149

		spin_unlock_irqrestore(&vec->lock, flags);
	}

	if (likely(newpri != CPUPRI_INVALID)) {
		struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];

		spin_lock_irqsave(&vec->lock, flags);

150
		cpumask_set_cpu(cpu, vec->mask);
151 152 153 154 155 156 157 158 159 160 161 162 163
		vec->count++;
		if (vec->count == 1)
			set_bit(newpri, cp->pri_active);

		spin_unlock_irqrestore(&vec->lock, flags);
	}

	*currpri = newpri;
}

/**
 * cpupri_init - initialize the cpupri structure
 * @cp: The cpupri context
164
 * @bootmem: true if allocations need to use bootmem
165
 *
166
 * Returns: -ENOMEM if memory fails.
167
 */
L
Li Zefan 已提交
168
int cpupri_init(struct cpupri *cp, bool bootmem)
169
{
P
Pekka Enberg 已提交
170
	gfp_t gfp = GFP_KERNEL;
171 172
	int i;

P
Pekka Enberg 已提交
173 174 175
	if (bootmem)
		gfp = GFP_NOWAIT;

176 177 178 179 180 181 182
	memset(cp, 0, sizeof(*cp));

	for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
		struct cpupri_vec *vec = &cp->pri_to_cpu[i];

		spin_lock_init(&vec->lock);
		vec->count = 0;
P
Pekka Enberg 已提交
183
		if (!zalloc_cpumask_var(&vec->mask, gfp))
184
			goto cleanup;
185 186 187 188
	}

	for_each_possible_cpu(i)
		cp->cpu_to_pri[i] = CPUPRI_INVALID;
189 190 191 192 193 194
	return 0;

cleanup:
	for (i--; i >= 0; i--)
		free_cpumask_var(cp->pri_to_cpu[i].mask);
	return -ENOMEM;
195 196
}

197 198 199 200 201 202 203
/**
 * cpupri_cleanup - clean up the cpupri structure
 * @cp: The cpupri context
 */
void cpupri_cleanup(struct cpupri *cp)
{
	int i;
204

205 206 207
	for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
		free_cpumask_var(cp->pri_to_cpu[i].mask);
}