sched.c 10.0 KB
Newer Older
1 2 3 4 5
/* sched.c - SPU scheduler.
 *
 * Copyright (C) IBM 2005
 * Author: Mark Nutter <mnutter@us.ibm.com>
 *
6
 * 2006-03-31	NUMA domains added.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

23 24
#undef DEBUG

25 26 27 28 29 30 31 32 33 34 35
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/completion.h>
#include <linux/vmalloc.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
36 37
#include <linux/numa.h>
#include <linux/mutex.h>
38
#include <linux/notifier.h>
39 40 41 42 43

#include <asm/io.h>
#include <asm/mmu_context.h>
#include <asm/spu.h>
#include <asm/spu_csa.h>
44
#include <asm/spu_priv1.h>
45 46
#include "spufs.h"

47
#define SPU_MIN_TIMESLICE 	(100 * HZ / 1000)
48

49 50 51
#define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1)
struct spu_prio_array {
	unsigned long bitmap[SPU_BITMAP_SIZE];
52 53
	struct list_head runq[MAX_PRIO];
	spinlock_t runq_lock;
54 55
	struct list_head active_list[MAX_NUMNODES];
	struct mutex active_mutex[MAX_NUMNODES];
56 57
};

58
static struct spu_prio_array *spu_prio;
59

60
static inline int node_allowed(int node)
61
{
62
	cpumask_t mask;
63

64 65 66 67 68 69
	if (!nr_cpus_node(node))
		return 0;
	mask = node_to_cpumask(node);
	if (!cpus_intersects(mask, current->cpus_allowed))
		return 0;
	return 1;
70 71
}

72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
/**
 * spu_add_to_active_list - add spu to active list
 * @spu:	spu to add to the active list
 */
static void spu_add_to_active_list(struct spu *spu)
{
	mutex_lock(&spu_prio->active_mutex[spu->node]);
	list_add_tail(&spu->list, &spu_prio->active_list[spu->node]);
	mutex_unlock(&spu_prio->active_mutex[spu->node]);
}

/**
 * spu_remove_from_active_list - remove spu from active list
 * @spu:       spu to remove from the active list
 */
87
static void spu_remove_from_active_list(struct spu *spu)
88 89 90 91
{
	int node = spu->node;

	mutex_lock(&spu_prio->active_mutex[node]);
92
	list_del_init(&spu->list);
93 94 95
	mutex_unlock(&spu_prio->active_mutex[node]);
}

96 97
static inline void mm_needs_global_tlbie(struct mm_struct *mm)
{
98 99
	int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;

100
	/* Global TLBIE broadcast required with SPEs. */
101
	__cpus_setall(&mm->cpu_vm_mask, nr);
102 103
}

104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);

static void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
{
	blocking_notifier_call_chain(&spu_switch_notifier,
			    ctx ? ctx->object_id : 0, spu);
}

int spu_switch_event_register(struct notifier_block * n)
{
	return blocking_notifier_chain_register(&spu_switch_notifier, n);
}

int spu_switch_event_unregister(struct notifier_block * n)
{
	return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
}

122 123 124 125 126 127
/**
 * spu_bind_context - bind spu context to physical spu
 * @spu:	physical spu to bind to
 * @ctx:	context to bind
 */
static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
128
{
129 130
	pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
		 spu->number, spu->node);
131 132 133 134 135 136 137 138 139
	spu->ctx = ctx;
	spu->flags = 0;
	ctx->spu = spu;
	ctx->ops = &spu_hw_ops;
	spu->pid = current->pid;
	spu->mm = ctx->owner;
	mm_needs_global_tlbie(spu->mm);
	spu->ibox_callback = spufs_ibox_callback;
	spu->wbox_callback = spufs_wbox_callback;
140
	spu->stop_callback = spufs_stop_callback;
141
	spu->mfc_callback = spufs_mfc_callback;
142
	spu->dma_callback = spufs_dma_callback;
143
	mb();
144
	spu_unmap_mappings(ctx);
145
	spu_restore(&ctx->csa, spu);
146
	spu->timestamp = jiffies;
147
	spu_cpu_affinity_set(spu, raw_smp_processor_id());
148
	spu_switch_notify(spu, ctx);
149
	spu_add_to_active_list(spu);
150
	ctx->state = SPU_STATE_RUNNABLE;
151 152
}

153 154 155 156 157
/**
 * spu_unbind_context - unbind spu context from physical spu
 * @spu:	physical spu to unbind from
 * @ctx:	context to unbind
 */
158
static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
159
{
160 161
	pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
		 spu->pid, spu->number, spu->node);
162

163
	spu_remove_from_active_list(spu);
164
	spu_switch_notify(spu, NULL);
165
	spu_unmap_mappings(ctx);
166
	spu_save(&ctx->csa, spu);
167
	spu->timestamp = jiffies;
168 169 170
	ctx->state = SPU_STATE_SAVED;
	spu->ibox_callback = NULL;
	spu->wbox_callback = NULL;
171
	spu->stop_callback = NULL;
172
	spu->mfc_callback = NULL;
173
	spu->dma_callback = NULL;
174 175 176 177
	spu->mm = NULL;
	spu->pid = 0;
	ctx->ops = &spu_backing_ops;
	ctx->spu = NULL;
178
	spu->flags = 0;
179 180 181
	spu->ctx = NULL;
}

182 183 184 185 186
/**
 * spu_add_to_rq - add a context to the runqueue
 * @ctx:       context to add
 */
static void spu_add_to_rq(struct spu_context *ctx)
187
{
188 189 190 191
	spin_lock(&spu_prio->runq_lock);
	list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
	set_bit(ctx->prio, spu_prio->bitmap);
	spin_unlock(&spu_prio->runq_lock);
192
}
193

194 195 196 197 198
/**
 * spu_del_from_rq - remove a context from the runqueue
 * @ctx:       context to remove
 */
static void spu_del_from_rq(struct spu_context *ctx)
199
{
200 201 202 203 204 205
	spin_lock(&spu_prio->runq_lock);
	list_del_init(&ctx->rq);
	if (list_empty(&spu_prio->runq[ctx->prio]))
		clear_bit(ctx->prio, spu_prio->bitmap);
	spin_unlock(&spu_prio->runq_lock);
}
206

207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
/**
 * spu_grab_context - remove one context from the runqueue
 * @prio:      priority of the context to be removed
 *
 * This function removes one context from the runqueue for priority @prio.
 * If there is more than one context with the given priority the first
 * task on the runqueue will be taken.
 *
 * Returns the spu_context it just removed.
 *
 * Must be called with spu_prio->runq_lock held.
 */
static struct spu_context *spu_grab_context(int prio)
{
	struct list_head *rq = &spu_prio->runq[prio];
222

223 224 225
	if (list_empty(rq))
		return NULL;
	return list_entry(rq->next, struct spu_context, rq);
226 227
}

228
static void spu_prio_wait(struct spu_context *ctx)
229
{
230
	DEFINE_WAIT(wait);
231

232
	set_bit(SPU_SCHED_WAKE, &ctx->sched_flags);
233
	prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
234
	if (!signal_pending(current)) {
235
		mutex_unlock(&ctx->state_mutex);
236
		schedule();
237
		mutex_lock(&ctx->state_mutex);
238
	}
239 240
	__set_current_state(TASK_RUNNING);
	remove_wait_queue(&ctx->stop_wq, &wait);
241
	clear_bit(SPU_SCHED_WAKE, &ctx->sched_flags);
242 243
}

244 245 246 247 248 249 250 251
/**
 * spu_reschedule - try to find a runnable context for a spu
 * @spu:       spu available
 *
 * This function is called whenever a spu becomes idle.  It looks for the
 * most suitable runnable spu context and schedules it for execution.
 */
static void spu_reschedule(struct spu *spu)
252
{
253 254 255 256 257 258
	int best;

	spu_free(spu);

	spin_lock(&spu_prio->runq_lock);
	best = sched_find_first_bit(spu_prio->bitmap);
259
	if (best < MAX_PRIO) {
260
		struct spu_context *ctx = spu_grab_context(best);
261
		if (ctx && test_bit(SPU_SCHED_WAKE, &ctx->sched_flags))
262
			wake_up(&ctx->stop_wq);
263
	}
264
	spin_unlock(&spu_prio->runq_lock);
265 266
}

267
static struct spu *spu_get_idle(struct spu_context *ctx)
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
{
	struct spu *spu = NULL;
	int node = cpu_to_node(raw_smp_processor_id());
	int n;

	for (n = 0; n < MAX_NUMNODES; n++, node++) {
		node = (node < MAX_NUMNODES) ? node : 0;
		if (!node_allowed(node))
			continue;
		spu = spu_alloc_node(node);
		if (spu)
			break;
	}
	return spu;
}
283

284 285 286 287 288 289 290 291 292
/**
 * spu_activate - find a free spu for a context and execute it
 * @ctx:	spu context to schedule
 * @flags:	flags (currently ignored)
 *
 * Tries to find a free spu to run @ctx.  If no free spu is availble
 * add the context to the runqueue so it gets woken up once an spu
 * is available.
 */
293
int spu_activate(struct spu_context *ctx, unsigned long flags)
294 295
{

296 297 298 299 300 301 302 303
	if (ctx->spu)
		return 0;

	do {
		struct spu *spu;

		spu = spu_get_idle(ctx);
		if (spu) {
304
			spu_bind_context(spu, ctx);
305
			return 0;
306
		}
307 308

		spu_add_to_rq(ctx);
309 310
		if (!(flags & SPU_ACTIVATE_NOWAKE))
			spu_prio_wait(ctx);
311 312 313 314
		spu_del_from_rq(ctx);
	} while (!signal_pending(current));

	return -ERESTARTSYS;
315 316
}

317 318 319 320 321 322 323
/**
 * spu_deactivate - unbind a context from it's physical spu
 * @ctx:	spu context to unbind
 *
 * Unbind @ctx from the physical spu it is running on and schedule
 * the highest priority context to run on the freed physical spu.
 */
324 325
void spu_deactivate(struct spu_context *ctx)
{
326
	struct spu *spu = ctx->spu;
327

328 329
	if (spu) {
		spu_unbind_context(spu, ctx);
330
		spu_reschedule(spu);
331
	}
332 333
}

334 335 336 337 338 339 340 341
/**
 * spu_yield -  yield a physical spu if others are waiting
 * @ctx:	spu context to yield
 *
 * Check if there is a higher priority context waiting and if yes
 * unbind @ctx from the physical spu and schedule the highest
 * priority context to run on the freed physical spu instead.
 */
342 343 344
void spu_yield(struct spu_context *ctx)
{
	struct spu *spu;
345
	int need_yield = 0;
346

347
	if (mutex_trylock(&ctx->state_mutex)) {
348 349 350 351 352 353 354 355 356
		if ((spu = ctx->spu) != NULL) {
			int best = sched_find_first_bit(spu_prio->bitmap);
			if (best < MAX_PRIO) {
				pr_debug("%s: yielding SPU %d NODE %d\n",
					 __FUNCTION__, spu->number, spu->node);
				spu_deactivate(ctx);
				need_yield = 1;
			}
		}
357
		mutex_unlock(&ctx->state_mutex);
358
	}
359 360
	if (unlikely(need_yield))
		yield();
361 362 363 364 365 366
}

int __init spu_sched_init(void)
{
	int i;

367 368 369
	spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
	if (!spu_prio) {
		printk(KERN_WARNING "%s: Unable to allocate priority queue.\n",
370 371 372 373
		       __FUNCTION__);
		return 1;
	}
	for (i = 0; i < MAX_PRIO; i++) {
374
		INIT_LIST_HEAD(&spu_prio->runq[i]);
375
		__clear_bit(i, spu_prio->bitmap);
376
	}
377 378 379 380
	__set_bit(MAX_PRIO, spu_prio->bitmap);
	for (i = 0; i < MAX_NUMNODES; i++) {
		mutex_init(&spu_prio->active_mutex[i]);
		INIT_LIST_HEAD(&spu_prio->active_list[i]);
381
	}
382
	spin_lock_init(&spu_prio->runq_lock);
383 384 385 386 387
	return 0;
}

void __exit spu_sched_exit(void)
{
388 389 390 391 392 393 394 395 396 397 398
	struct spu *spu, *tmp;
	int node;

	for (node = 0; node < MAX_NUMNODES; node++) {
		mutex_lock(&spu_prio->active_mutex[node]);
		list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node],
					 list) {
			list_del_init(&spu->list);
			spu_free(spu);
		}
		mutex_unlock(&spu_prio->active_mutex[node]);
399
	}
400
	kfree(spu_prio);
401
}