提交 bc2d968f 编写于 作者: L Linus Torvalds

Merge branch 'parisc-for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux

Pull parisc updates from Helge Deller:
 "Main fixes and updates in this patch series are:
   - we faced kernel stack corruptions because of multiple delivery of
     interrupts
   - added kernel stack overflow checks
   - added possibility to use dedicated stacks for irq processing
   - initial support for page sizes > 4k
   - more information in /proc/interrupts (e.g.  TLB flushes and number
     of IPI calls)
   - documented how the parisc gateway page works
   - and of course quite some other smaller cleanups and fixes."

* 'parisc-for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux:
  parisc: tlb flush counting fix for SMP and UP
  parisc: more irq statistics in /proc/interrupts
  parisc: implement irq stacks
  parisc: add kernel stack overflow check
  parisc: only re-enable interrupts if we need to schedule or deliver signals when returning to userspace
  parisc: implement atomic64_dec_if_positive()
  parisc: use long branch in fork_like macro
  parisc: fix NATIVE set up in build
  parisc: document the parisc gateway page
  parisc: fix partly 16/64k PAGE_SIZE boot
  parisc: Provide default implementation for dma_{alloc, free}_attrs
  parisc: fix whitespace errors in arch/parisc/kernel/traps.c
  parisc: remove the second argument of kmap_atomic
......@@ -13,6 +13,7 @@ config PARISC
select BUG
select HAVE_PERF_EVENTS
select GENERIC_ATOMIC64 if !64BIT
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_GENERIC_HARDIRQS
select BROKEN_RODATA
select GENERIC_IRQ_PROBE
......@@ -242,6 +243,14 @@ config SMP
If you don't know what to do here, say N.
config IRQSTACKS
bool "Use separate kernel stacks when processing interrupts"
default n
help
If you say Y here the kernel will use separate kernel stacks
for handling hard and soft interrupts. This can help avoid
overflowing the process kernel stacks.
config HOTPLUG_CPU
bool
default y if SMP
......
......@@ -13,3 +13,14 @@ config DEBUG_RODATA
If in doubt, say "N".
endmenu
config DEBUG_STACKOVERFLOW
bool "Check for stack overflows"
default y
depends on DEBUG_KERNEL
---help---
Say Y here if you want to check the overflows of kernel, IRQ
and exception stacks. This option will cause messages of the
stacks in detail when free stack space drops below a certain
limit.
If in doubt, say "N".
......@@ -24,9 +24,7 @@ CHECKFLAGS += -D__hppa__=1
LIBGCC = $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
MACHINE := $(shell uname -m)
ifeq ($(MACHINE),parisc*)
NATIVE := 1
endif
NATIVE := $(if $(filter parisc%,$(MACHINE)),1,0)
ifdef CONFIG_64BIT
UTS_MACHINE := parisc64
......
......@@ -229,6 +229,29 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
/*
* atomic64_dec_if_positive - decrement by 1 if old value positive
* @v: pointer of type atomic_t
*
* The function returns the old value of *v minus 1, even if
* the atomic variable, v, was not decremented.
*/
static inline long atomic64_dec_if_positive(atomic64_t *v)
{
long c, old, dec;
c = atomic64_read(v);
for (;;) {
dec = c - 1;
if (unlikely(dec < 0))
break;
old = atomic64_cmpxchg((v), c, dec);
if (likely(old == c))
break;
c = old;
}
return dec;
}
#endif /* !CONFIG_64BIT */
......
......@@ -46,6 +46,9 @@ extern struct hppa_dma_ops pcx_dma_ops;
extern struct hppa_dma_ops *hppa_dma_ops;
#define dma_alloc_attrs(d, s, h, f, a) dma_alloc_coherent(d, s, h, f)
#define dma_free_attrs(d, s, h, f, a) dma_free_coherent(d, s, h, f)
static inline void *
dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t flag)
......
/* hardirq.h: PA-RISC hard IRQ support.
*
* Copyright (C) 2001 Matthew Wilcox <matthew@wil.cx>
* Copyright (C) 2013 Helge Deller <deller@gmx.de>
*/
#ifndef _PARISC_HARDIRQ_H
#define _PARISC_HARDIRQ_H
#include <asm-generic/hardirq.h>
#include <linux/cache.h>
#include <linux/threads.h>
#include <linux/irq.h>
typedef struct {
unsigned int __softirq_pending;
#ifdef CONFIG_DEBUG_STACKOVERFLOW
unsigned int kernel_stack_usage;
#endif
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
#endif
unsigned int irq_tlb_count;
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
#define __ARCH_IRQ_STAT
#define __IRQ_STAT(cpu, member) (irq_stat[cpu].member)
#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
#define local_softirq_pending() this_cpu_read(irq_stat.__softirq_pending)
#define __ARCH_SET_SOFTIRQ_PENDING
#define set_softirq_pending(x) \
this_cpu_write(irq_stat.__softirq_pending, (x))
#define or_softirq_pending(x) this_cpu_or(irq_stat.__softirq_pending, (x))
#define ack_bad_irq(irq) WARN(1, "unexpected IRQ trap at vector %02x\n", irq)
#endif /* _PARISC_HARDIRQ_H */
......@@ -20,8 +20,6 @@
#endif /* __ASSEMBLY__ */
#define KERNEL_STACK_SIZE (4*PAGE_SIZE)
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
......@@ -60,6 +58,23 @@
#ifndef __ASSEMBLY__
/*
* IRQ STACK - used for irq handler
*/
#ifdef __KERNEL__
#define IRQ_STACK_SIZE (4096 << 2) /* 16k irq stack size */
union irq_stack_union {
unsigned long stack[IRQ_STACK_SIZE/sizeof(unsigned long)];
};
DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
void call_on_stack(unsigned long p1, void *func, unsigned long new_stack);
#endif /* __KERNEL__ */
/*
* Data detected about CPUs at boot time which is the same for all CPU's.
* HP boxes are SMP - ie identical processors.
......@@ -97,7 +112,6 @@ struct cpuinfo_parisc {
unsigned long txn_addr; /* MMIO addr of EIR or id_eid */
#ifdef CONFIG_SMP
unsigned long pending_ipi; /* bitmap of type ipi_message_type */
unsigned long ipi_count; /* number ipi Interrupts */
#endif
unsigned long bh_count; /* number of times bh was invoked */
unsigned long prof_counter; /* per CPU profiling support */
......
......@@ -40,7 +40,7 @@ struct thread_info {
/* thread information allocation */
#define THREAD_SIZE_ORDER 2
#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */
/* Be sure to hunt all references to this down when you change the size of
* the kernel stack */
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
......
......@@ -22,6 +22,8 @@ extern spinlock_t pa_tlb_lock;
extern void flush_tlb_all(void);
extern void flush_tlb_all_local(void *);
#define smp_flush_tlb_all() flush_tlb_all()
/*
* flush_tlb_mm()
*
......
......@@ -606,7 +606,7 @@ void clear_user_highpage(struct page *page, unsigned long vaddr)
/* Clear using TMPALIAS region. The page doesn't need to
be flushed but the kernel mapping needs to be purged. */
vto = kmap_atomic(page, KM_USER0);
vto = kmap_atomic(page);
/* The PA-RISC 2.0 Architecture book states on page F-6:
"Before a write-capable translation is enabled, *all*
......@@ -641,8 +641,8 @@ void copy_user_highpage(struct page *to, struct page *from,
the `to' page must be flushed in copy_user_page_asm since
it can be used to bring in executable code. */
vfrom = kmap_atomic(from, KM_USER0);
vto = kmap_atomic(to, KM_USER1);
vfrom = kmap_atomic(from);
vto = kmap_atomic(to);
purge_kernel_dcache_page_asm((unsigned long)vto);
purge_tlb_start(flags);
......
......@@ -400,7 +400,15 @@
#if PT_NLEVELS == 3
extru \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index
#else
# if defined(CONFIG_64BIT)
extrd,u \va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
#else
# if PAGE_SIZE > 4096
extru \va,31-ASM_PGDIR_SHIFT,32-ASM_PGDIR_SHIFT,\index
# else
extru \va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
# endif
# endif
#endif
dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */
copy %r0,\pte
......@@ -615,7 +623,7 @@
.text
.align PAGE_SIZE
.align 4096
ENTRY(fault_vector_20)
/* First vector is invalid (0) */
......@@ -825,11 +833,6 @@ ENTRY(syscall_exit_rfi)
STREG %r19,PT_SR7(%r16)
intr_return:
/* NOTE: Need to enable interrupts incase we schedule. */
ssm PSW_SM_I, %r0
intr_check_resched:
/* check for reschedule */
mfctl %cr30,%r1
LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */
......@@ -856,6 +859,11 @@ intr_check_sig:
LDREG PT_IASQ1(%r16), %r20
cmpib,COND(=),n 0,%r20,intr_restore /* backward */
/* NOTE: We need to enable interrupts if we have to deliver
* signals. We used to do this earlier but it caused kernel
* stack overflows. */
ssm PSW_SM_I, %r0
copy %r0, %r25 /* long in_syscall = 0 */
#ifdef CONFIG_64BIT
ldo -16(%r30),%r29 /* Reference param save area */
......@@ -907,6 +915,10 @@ intr_do_resched:
cmpib,COND(=) 0, %r20, intr_do_preempt
nop
/* NOTE: We need to enable interrupts if we schedule. We used
* to do this earlier but it caused kernel stack overflows. */
ssm PSW_SM_I, %r0
#ifdef CONFIG_64BIT
ldo -16(%r30),%r29 /* Reference param save area */
#endif
......@@ -1694,7 +1706,8 @@ ENTRY(sys_\name\()_wrapper)
ldo TASK_REGS(%r1),%r1
reg_save %r1
mfctl %cr27, %r28
b sys_\name
ldil L%sys_\name, %r31
be R%sys_\name(%sr4,%r31)
STREG %r28, PT_CR27(%r1)
ENDPROC(sys_\name\()_wrapper)
.endm
......@@ -1997,6 +2010,47 @@ ftrace_stub:
ENDPROC(return_to_handler)
#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_IRQSTACKS
/* void call_on_stack(unsigned long param1, void *func,
unsigned long new_stack) */
ENTRY(call_on_stack)
copy %sp, %r1
/* Regarding the HPPA calling conventions for function pointers,
we assume the PIC register is not changed across call. For
CONFIG_64BIT, the argument pointer is left to point at the
argument region allocated for the call to call_on_stack. */
# ifdef CONFIG_64BIT
/* Switch to new stack. We allocate two 128 byte frames. */
ldo 256(%arg2), %sp
/* Save previous stack pointer and return pointer in frame marker */
STREG %rp, -144(%sp)
/* Calls always use function descriptor */
LDREG 16(%arg1), %arg1
bve,l (%arg1), %rp
STREG %r1, -136(%sp)
LDREG -144(%sp), %rp
bve (%rp)
LDREG -136(%sp), %sp
# else
/* Switch to new stack. We allocate two 64 byte frames. */
ldo 128(%arg2), %sp
/* Save previous stack pointer and return pointer in frame marker */
STREG %r1, -68(%sp)
STREG %rp, -84(%sp)
/* Calls use function descriptor if PLABEL bit is set */
bb,>=,n %arg1, 30, 1f
depwi 0,31,2, %arg1
LDREG 0(%arg1), %arg1
1:
be,l 0(%sr4,%arg1), %sr0, %r31
copy %r31, %rp
LDREG -84(%sp), %rp
bv (%rp)
LDREG -68(%sp), %sp
# endif /* CONFIG_64BIT */
ENDPROC(call_on_stack)
#endif /* CONFIG_IRQSTACKS */
get_register:
/*
......
......@@ -55,13 +55,13 @@
* IODC requires 7K byte stack. That leaves 1K byte for os_hpmc.
*/
.align PAGE_SIZE
.align 4096
hpmc_stack:
.block 16384
#define HPMC_IODC_BUF_SIZE 0x8000
.align PAGE_SIZE
.align 4096
hpmc_iodc_buf:
.block HPMC_IODC_BUF_SIZE
......
......@@ -152,6 +152,39 @@ static struct irq_chip cpu_interrupt_type = {
.irq_retrigger = NULL,
};
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
#define irq_stats(x) (&per_cpu(irq_stat, x))
/*
* /proc/interrupts printing for arch specific interrupts
*/
int arch_show_interrupts(struct seq_file *p, int prec)
{
int j;
#ifdef CONFIG_DEBUG_STACKOVERFLOW
seq_printf(p, "%*s: ", prec, "STK");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->kernel_stack_usage);
seq_printf(p, " Kernel stack usage\n");
#endif
#ifdef CONFIG_SMP
seq_printf(p, "%*s: ", prec, "RES");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
seq_printf(p, " Rescheduling interrupts\n");
seq_printf(p, "%*s: ", prec, "CAL");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
seq_printf(p, " Function call interrupts\n");
#endif
seq_printf(p, "%*s: ", prec, "TLB");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
seq_printf(p, " TLB shootdowns\n");
return 0;
}
int show_interrupts(struct seq_file *p, void *v)
{
int i = *(loff_t *) v, j;
......@@ -219,6 +252,9 @@ int show_interrupts(struct seq_file *p, void *v)
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
if (i == NR_IRQS)
arch_show_interrupts(p, 3);
return 0;
}
......@@ -330,6 +366,66 @@ static inline int eirr_to_irq(unsigned long eirr)
return (BITS_PER_LONG - bit) + TIMER_IRQ;
}
int sysctl_panic_on_stackoverflow = 1;
static inline void stack_overflow_check(struct pt_regs *regs)
{
#ifdef CONFIG_DEBUG_STACKOVERFLOW
#define STACK_MARGIN (256*6)
/* Our stack starts directly behind the thread_info struct. */
unsigned long stack_start = (unsigned long) current_thread_info();
unsigned long sp = regs->gr[30];
unsigned long stack_usage;
unsigned int *last_usage;
/* if sr7 != 0, we interrupted a userspace process which we do not want
* to check for stack overflow. We will only check the kernel stack. */
if (regs->sr[7])
return;
/* calculate kernel stack usage */
stack_usage = sp - stack_start;
last_usage = &per_cpu(irq_stat.kernel_stack_usage, smp_processor_id());
if (unlikely(stack_usage > *last_usage))
*last_usage = stack_usage;
if (likely(stack_usage < (THREAD_SIZE - STACK_MARGIN)))
return;
pr_emerg("stackcheck: %s will most likely overflow kernel stack "
"(sp:%lx, stk bottom-top:%lx-%lx)\n",
current->comm, sp, stack_start, stack_start + THREAD_SIZE);
if (sysctl_panic_on_stackoverflow)
panic("low stack detected by irq handler - check messages\n");
#endif
}
#ifdef CONFIG_IRQSTACKS
DEFINE_PER_CPU(union irq_stack_union, irq_stack_union);
static void execute_on_irq_stack(void *func, unsigned long param1)
{
unsigned long *irq_stack_start;
unsigned long irq_stack;
int cpu = smp_processor_id();
irq_stack_start = &per_cpu(irq_stack_union, cpu).stack[0];
irq_stack = (unsigned long) irq_stack_start;
irq_stack = ALIGN(irq_stack, 16); /* align for stack frame usage */
BUG_ON(*irq_stack_start); /* report bug if we were called recursive. */
*irq_stack_start = 1;
/* This is where we switch to the IRQ stack. */
call_on_stack(param1, func, irq_stack);
*irq_stack_start = 0;
}
#endif /* CONFIG_IRQSTACKS */
/* ONLY called from entry.S:intr_extint() */
void do_cpu_irq_mask(struct pt_regs *regs)
{
......@@ -364,7 +460,13 @@ void do_cpu_irq_mask(struct pt_regs *regs)
goto set_out;
}
#endif
stack_overflow_check(regs);
#ifdef CONFIG_IRQSTACKS
execute_on_irq_stack(&generic_handle_irq, irq);
#else
generic_handle_irq(irq);
#endif /* CONFIG_IRQSTACKS */
out:
irq_exit();
......@@ -420,6 +522,4 @@ void __init init_IRQ(void)
cpu_eiem = EIEM_MASK(TIMER_IRQ);
#endif
set_eiem(cpu_eiem); /* EIEM : enable all external intr */
}
......@@ -563,6 +563,15 @@ ENDPROC(copy_page_asm)
* %r23 physical page (shifted for tlb insert) of "from" translation
*/
/* Drop prot bits and convert to page addr for iitlbt and idtlbt */
#define PAGE_ADD_SHIFT (PAGE_SHIFT-12)
.macro convert_phys_for_tlb_insert20 phys
extrd,u \phys, 56-PAGE_ADD_SHIFT, 32-PAGE_ADD_SHIFT, \phys
#if _PAGE_SIZE_ENCODING_DEFAULT
depdi _PAGE_SIZE_ENCODING_DEFAULT, 63, (63-58), \phys
#endif
.endm
/*
* We can't do this since copy_user_page is used to bring in
* file data that might have instructions. Since the data would
......@@ -589,15 +598,14 @@ ENTRY(copy_user_page_asm)
sub %r25, %r1, %r23
ldil L%(TMPALIAS_MAP_START), %r28
/* FIXME for different page sizes != 4k */
#ifdef CONFIG_64BIT
#if (TMPALIAS_MAP_START >= 0x80000000)
depdi 0, 31,32, %r28 /* clear any sign extension */
#endif
extrd,u %r26,56,32, %r26 /* convert phys addr to tlb insert format */
extrd,u %r23,56,32, %r23 /* convert phys addr to tlb insert format */
convert_phys_for_tlb_insert20 %r26 /* convert phys addr to tlb insert format */
convert_phys_for_tlb_insert20 %r23 /* convert phys addr to tlb insert format */
depd %r24,63,22, %r28 /* Form aliased virtual address 'to' */
depdi 0, 63,12, %r28 /* Clear any offset bits */
depdi 0, 63,PAGE_SHIFT, %r28 /* Clear any offset bits */
copy %r28, %r29
depdi 1, 41,1, %r29 /* Form aliased virtual address 'from' */
#else
......@@ -747,11 +755,10 @@ ENTRY(clear_user_page_asm)
#ifdef CONFIG_64BIT
#if (TMPALIAS_MAP_START >= 0x80000000)
depdi 0, 31,32, %r28 /* clear any sign extension */
/* FIXME: page size dependend */
#endif
extrd,u %r26, 56,32, %r26 /* convert phys addr to tlb insert format */
convert_phys_for_tlb_insert20 %r26 /* convert phys addr to tlb insert format */
depd %r25, 63,22, %r28 /* Form aliased virtual address 'to' */
depdi 0, 63,12, %r28 /* Clear any offset bits */
depdi 0, 63,PAGE_SHIFT, %r28 /* Clear any offset bits */
#else
extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */
depw %r25, 31,22, %r28 /* Form aliased virtual address 'to' */
......@@ -832,11 +839,10 @@ ENTRY(flush_dcache_page_asm)
#ifdef CONFIG_64BIT
#if (TMPALIAS_MAP_START >= 0x80000000)
depdi 0, 31,32, %r28 /* clear any sign extension */
/* FIXME: page size dependend */
#endif
extrd,u %r26, 56,32, %r26 /* convert phys addr to tlb insert format */
convert_phys_for_tlb_insert20 %r26 /* convert phys addr to tlb insert format */
depd %r25, 63,22, %r28 /* Form aliased virtual address 'to' */
depdi 0, 63,12, %r28 /* Clear any offset bits */
depdi 0, 63,PAGE_SHIFT, %r28 /* Clear any offset bits */
#else
extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */
depw %r25, 31,22, %r28 /* Form aliased virtual address 'to' */
......@@ -909,11 +915,10 @@ ENTRY(flush_icache_page_asm)
#ifdef CONFIG_64BIT
#if (TMPALIAS_MAP_START >= 0x80000000)
depdi 0, 31,32, %r28 /* clear any sign extension */
/* FIXME: page size dependend */
#endif
extrd,u %r26, 56,32, %r26 /* convert phys addr to tlb insert format */
convert_phys_for_tlb_insert20 %r26 /* convert phys addr to tlb insert format */
depd %r25, 63,22, %r28 /* Form aliased virtual address 'to' */
depdi 0, 63,12, %r28 /* Clear any offset bits */
depdi 0, 63,PAGE_SHIFT, %r28 /* Clear any offset bits */
#else
extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */
depw %r25, 31,22, %r28 /* Form aliased virtual address 'to' */
......@@ -959,7 +964,7 @@ ENTRY(flush_icache_page_asm)
fic,m %r1(%sr4,%r28)
fic,m %r1(%sr4,%r28)
fic,m %r1(%sr4,%r28)
cmpb,COND(<<) %r28, %r25,1b
cmpb,COND(<<) %r28, %r25,1b
fic,m %r1(%sr4,%r28)
sync
......
......@@ -129,6 +129,8 @@ void __init setup_arch(char **cmdline_p)
printk(KERN_INFO "The 32-bit Kernel has started...\n");
#endif
printk(KERN_INFO "Default page size is %dKB.\n", (int)(PAGE_SIZE / 1024));
pdc_console_init();
#ifdef CONFIG_64BIT
......
......@@ -127,7 +127,7 @@ ipi_interrupt(int irq, void *dev_id)
unsigned long flags;
/* Count this now; we may make a call that never returns. */
p->ipi_count++;
inc_irq_stat(irq_call_count);
mb(); /* Order interrupt and bit testing. */
......@@ -155,6 +155,7 @@ ipi_interrupt(int irq, void *dev_id)
case IPI_RESCHEDULE:
smp_debug(100, KERN_DEBUG "CPU%d IPI_RESCHEDULE\n", this_cpu);
inc_irq_stat(irq_resched_count);
scheduler_ipi();
break;
......@@ -262,17 +263,6 @@ void arch_send_call_function_single_ipi(int cpu)
send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE);
}
/*
* Flush all other CPU's tlb and then mine. Do this with on_each_cpu()
* as we want to ensure all TLB's flushed before proceeding.
*/
void
smp_flush_tlb_all(void)
{
on_each_cpu(flush_tlb_all_local, NULL, 1);
}
/*
* Called by secondaries to update state and initialize CPU registers.
*/
......
/*
* Linux/PA-RISC Project (http://www.parisc-linux.org/)
*
* System call entry code Copyright (c) Matthew Wilcox 1999 <willy@bofh.ai>
* System call entry code / Linux gateway page
* Copyright (c) Matthew Wilcox 1999 <willy@bofh.ai>
* Licensed under the GNU GPL.
* thanks to Philipp Rumpf, Mike Shaver and various others
* sorry about the wall, puffin..
*/
/*
How does the Linux gateway page on PA-RISC work?
------------------------------------------------
The Linux gateway page on PA-RISC is "special".
It actually has PAGE_GATEWAY bits set (this is linux terminology; in parisc
terminology it's Execute, promote to PL0) in the page map. So anything
executing on this page executes with kernel level privilege (there's more to it
than that: to have this happen, you also have to use a branch with a ,gate
completer to activate the privilege promotion). The upshot is that everything
that runs on the gateway page runs at kernel privilege but with the current
user process address space (although you have access to kernel space via %sr2).
For the 0x100 syscall entry, we redo the space registers to point to the kernel
address space (preserving the user address space in %sr3), move to wide mode if
required, save the user registers and branch into the kernel syscall entry
point. For all the other functions, we execute at kernel privilege but don't
flip address spaces. The basic upshot of this is that these code snippets are
executed atomically (because the kernel can't be pre-empted) and they may
perform architecturally forbidden (to PL3) operations (like setting control
registers).
*/
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#include <asm/errno.h>
......@@ -15,6 +38,7 @@
#include <asm/thread_info.h>
#include <asm/assembly.h>
#include <asm/processor.h>
#include <asm/cache.h>
#include <linux/linkage.h>
......@@ -643,7 +667,7 @@ ENTRY(end_linux_gateway_page)
.section .rodata,"a"
.align PAGE_SIZE
.align 8
/* Light-weight-syscall table */
/* Start of lws table. */
ENTRY(lws_table)
......@@ -652,13 +676,13 @@ ENTRY(lws_table)
END(lws_table)
/* End of lws table */
.align PAGE_SIZE
.align 8
ENTRY(sys_call_table)
#include "syscall_table.S"
END(sys_call_table)
#ifdef CONFIG_64BIT
.align PAGE_SIZE
.align 8
ENTRY(sys_call_table64)
#define SYSCALL_TABLE_64BIT
#include "syscall_table.S"
......@@ -674,7 +698,7 @@ END(sys_call_table64)
with ldcw.
*/
.section .data
.align PAGE_SIZE
.align L1_CACHE_BYTES
ENTRY(lws_lock_start)
/* lws locks */
.rept 16
......
......@@ -522,10 +522,10 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
*/
if (((unsigned long)regs->iaoq[0] & 3) &&
((unsigned long)regs->iasq[0] != (unsigned long)regs->sr[7])) {
/* Kill the user process later */
regs->iaoq[0] = 0 | 3;
/* Kill the user process later */
regs->iaoq[0] = 0 | 3;
regs->iaoq[1] = regs->iaoq[0] + 4;
regs->iasq[0] = regs->iasq[1] = regs->sr[7];
regs->iasq[0] = regs->iasq[1] = regs->sr[7];
regs->gr[0] &= ~PSW_B;
return;
}
......@@ -541,8 +541,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
/* set up a new led state on systems shipped with a LED State panel */
pdc_chassis_send_status(PDC_CHASSIS_DIRECT_HPMC);
parisc_terminate("High Priority Machine Check (HPMC)",
parisc_terminate("High Priority Machine Check (HPMC)",
regs, code, 0);
/* NOT REACHED */
......@@ -584,13 +584,13 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
/* Break instruction trap */
handle_break(regs);
return;
case 10:
/* Privileged operation trap */
die_if_kernel("Privileged operation", regs, code);
si.si_code = ILL_PRVOPC;
goto give_sigill;
case 11:
/* Privileged register trap */
if ((regs->iir & 0xffdfffe0) == 0x034008a0) {
......@@ -634,7 +634,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
if(user_mode(regs)){
si.si_signo = SIGFPE;
/* Set to zero, and let the userspace app figure it out from
the insn pointed to by si_addr */
the insn pointed to by si_addr */
si.si_code = 0;
si.si_addr = (void __user *) regs->iaoq[0];
force_sig_info(SIGFPE, &si, current);
......@@ -648,7 +648,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
die_if_kernel("Floating point exception", regs, 0); /* quiet */
handle_fpe(regs);
return;
case 15:
/* Data TLB miss fault/Data page fault */
/* Fall through */
......@@ -660,15 +660,15 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
case 17:
/* Non-access data TLB miss fault/Non-access data page fault */
/* FIXME:
Still need to add slow path emulation code here!
If the insn used a non-shadow register, then the tlb
Still need to add slow path emulation code here!
If the insn used a non-shadow register, then the tlb
handlers could not have their side-effect (e.g. probe
writing to a target register) emulated since rfir would
erase the changes to said register. Instead we have to
setup everything, call this function we are in, and emulate
by hand. Technically we need to emulate:
fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw
*/
*/
fault_address = regs->ior;
fault_space = regs->isr;
break;
......
......@@ -95,7 +95,7 @@ SECTIONS
NOTES
/* Data */
RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, PAGE_SIZE)
/* PA-RISC locks requires 16-byte alignment */
. = ALIGN(16);
......
......@@ -1069,6 +1069,7 @@ void flush_tlb_all(void)
{
int do_recycle;
inc_irq_stat(irq_tlb_count);
do_recycle = 0;
spin_lock(&sid_lock);
if (dirty_space_ids > RECYCLE_THRESHOLD) {
......@@ -1089,6 +1090,7 @@ void flush_tlb_all(void)
#else
void flush_tlb_all(void)
{
inc_irq_stat(irq_tlb_count);
spin_lock(&sid_lock);
flush_tlb_all_local(NULL);
recycle_sids();
......
......@@ -575,7 +575,7 @@ sba_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba,
mtsp(sid,1);
asm("lci 0(%%sr1, %1), %0" : "=r" (ci) : "r" (vba));
pa |= (ci >> 12) & 0xff; /* move CI (8 bits) into lowest byte */
pa |= (ci >> PAGE_SHIFT) & 0xff; /* move CI (8 bits) into lowest byte */
pa |= SBA_PDIR_VALID_BIT; /* set "valid" bit */
*pdir_ptr = cpu_to_le64(pa); /* swap and store into I/O Pdir */
......@@ -1376,7 +1376,7 @@ static void
sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num)
{
u32 iova_space_size, iova_space_mask;
unsigned int pdir_size, iov_order;
unsigned int pdir_size, iov_order, tcnfg;
/*
** Determine IOVA Space size from memory size.
......@@ -1468,8 +1468,19 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num)
WRITE_REG(ioc->ibase | 1, ioc->ioc_hpa+IOC_IBASE);
WRITE_REG(ioc->imask, ioc->ioc_hpa+IOC_IMASK);
/* Set I/O PDIR Page size to 4K */
WRITE_REG(0, ioc->ioc_hpa+IOC_TCNFG);
/* Set I/O PDIR Page size to system page size */
switch (PAGE_SHIFT) {
case 12: tcnfg = 0; break; /* 4K */
case 13: tcnfg = 1; break; /* 8K */
case 14: tcnfg = 2; break; /* 16K */
case 16: tcnfg = 3; break; /* 64K */
default:
panic(__FILE__ "Unsupported system page size %d",
1 << PAGE_SHIFT);
break;
}
/* Set I/O PDIR Page size to PAGE_SIZE (4k/16k/...) */
WRITE_REG(tcnfg, ioc->ioc_hpa+IOC_TCNFG);
/*
** Clear I/O TLB of any possible entries.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册