diff --git a/libcpu/arm/cortex-m0/context_iar.S b/libcpu/arm/cortex-m0/context_iar.S
new file mode 100644
index 0000000000000000000000000000000000000000..500e46d065c872a7c0c986f733eaf02edce107c1
--- /dev/null
+++ b/libcpu/arm/cortex-m0/context_iar.S
@@ -0,0 +1,201 @@
+;/*
+; * File      : context_iar.S
+; * This file is part of RT-Thread RTOS
+; * COPYRIGHT (C) 2009, RT-Thread Development Team
+; *
+; * The license and distribution terms for this file may be
+; * found in the file LICENSE in this distribution or at
+; * http://www.rt-thread.org/license/LICENSE
+; *
+; * Change Logs:
+; * Date           Author       Notes
+; * 2010-01-25     Bernard      first version
+; * 2012-06-01     aozima       set pendsv priority to 0xFF.
+; * 2012-08-17     aozima       fixed bug: store r8 - r11.
+; */
+
+;/**
+; * @addtogroup CORTEX-M0
+; */
+;/*@{*/
+
+NVIC_INT_CTRL   EQU     0xE000ED04               ; interrupt control state register
+NVIC_SHPR3      EQU     0xE000ED20               ; system priority register (2)
+NVIC_PENDSV_PRI EQU     0x00FF0000               ; PendSV priority value (lowest)
+NVIC_PENDSVSET  EQU     0x10000000               ; value to trigger PendSV exception
+
+    SECTION    .text:CODE(2)
+    THUMB
+    REQUIRE8
+    PRESERVE8
+
+    IMPORT rt_thread_switch_interrupt_flag
+    IMPORT rt_interrupt_from_thread
+    IMPORT rt_interrupt_to_thread
+
+;/*
+; * rt_base_t rt_hw_interrupt_disable();
+; */
+    EXPORT rt_hw_interrupt_disable
+rt_hw_interrupt_disable:
+    MRS     r0, PRIMASK
+    CPSID   I
+    BX      LR
+
+;/*
+; * void rt_hw_interrupt_enable(rt_base_t level);
+; */
+    EXPORT  rt_hw_interrupt_enable
+rt_hw_interrupt_enable:
+    MSR     PRIMASK, r0
+    BX      LR
+
+;/*
+; * void rt_hw_context_switch(rt_uint32 from, rt_uint32 to);
+; * r0 --> from
+; * r1 --> to
+; */
+    EXPORT rt_hw_context_switch_interrupt
+    EXPORT rt_hw_context_switch
+rt_hw_context_switch_interrupt:
+rt_hw_context_switch:
+    ; set rt_thread_switch_interrupt_flag to 1
+    LDR     r2, =rt_thread_switch_interrupt_flag
+    LDR     r3, [r2]
+    CMP     r3, #1
+    BEQ     _reswitch
+    MOVS    r3, #0x1
+    STR     r3, [r2]
+
+    LDR     r2, =rt_interrupt_from_thread   ; set rt_interrupt_from_thread
+    STR     r0, [r2]
+
+_reswitch
+    LDR     r2, =rt_interrupt_to_thread     ; set rt_interrupt_to_thread
+    STR     r1, [r2]
+
+    LDR     r0, =NVIC_INT_CTRL              ; trigger the PendSV exception (causes context switch)
+    LDR     r1, =NVIC_PENDSVSET
+    STR     r1, [r0]
+    BX      LR
+
+; r0 --> swith from thread stack
+; r1 --> swith to thread stack
+; psr, pc, lr, r12, r3, r2, r1, r0 are pushed into [from] stack
+    EXPORT PendSV_Handler
+PendSV_Handler:
+
+    ; disable interrupt to protect context switch
+    MRS     r2, PRIMASK
+    CPSID   I
+
+    ; get rt_thread_switch_interrupt_flag
+    LDR     r0, =rt_thread_switch_interrupt_flag
+    LDR     r1, [r0]
+    CMP     r1, #0x00
+    BEQ     pendsv_exit                ; pendsv already handled
+
+    ; clear rt_thread_switch_interrupt_flag to 0
+    MOVS    r1, #0x00
+    STR     r1, [r0]
+
+    LDR     r0, =rt_interrupt_from_thread
+    LDR     r1, [r0]
+    CMP     r1, #0x00
+    BEQ     swtich_to_thread        ; skip register save at the first time
+
+    MRS     r1, psp                 ; get from thread stack pointer
+
+    SUBS    r1, r1, #0x20           ; space for {r4 - r7} and {r8 - r11}
+    LDR     r0, [r0]
+    STR     r1, [r0]                ; update from thread stack pointer
+
+    STMIA   r1!, {r4 - r7}          ; push thread {r4 - r7} register to thread stack
+
+    MOV     r4, r8                  ; mov thread {r8 - r11} to {r4 - r7}
+    MOV     r5, r9
+    MOV     r6, r10
+    MOV     r7, r11
+    STMIA   r1!, {r4 - r7}          ; push thread {r8 - r11} high register to thread stack
+
+swtich_to_thread
+    LDR     r1, =rt_interrupt_to_thread
+    LDR     r1, [r1]
+    LDR     r1, [r1]                ; load thread stack pointer
+
+    LDMIA   r1!, {r4 - r7}          ; pop thread {r4 - r7} register from thread stack
+    PUSH    {r4 - r7}               ; push {r4 - r7} to MSP for copy {r8 - r11}
+
+    LDMIA   r1!, {r4 - r7}          ; pop thread {r8 - r11} high register from thread stack to {r4 - r7}
+    MOV     r8,  r4                 ; mov {r4 - r7} to {r8 - r11}
+    MOV     r9,  r5
+    MOV     r10, r6
+    MOV     r11, r7
+
+    POP     {r4 - r7}               ; pop {r4 - r7} from MSP
+
+    MSR     psp, r1                 ; update stack pointer
+
+pendsv_exit
+    ; restore interrupt
+    MSR     PRIMASK, r2
+
+    MOVS    r0, #0x04
+    RSBS    r0, r0, #0x00
+    BX      r0
+
+;/*
+; * void rt_hw_context_switch_to(rt_uint32 to);
+; * r0 --> to
+; * this fucntion is used to perform the first thread switch
+; */
+    EXPORT rt_hw_context_switch_to
+rt_hw_context_switch_to:
+    ; set to thread
+    LDR     r1, =rt_interrupt_to_thread
+    STR     r0, [r1]
+
+    ; set from thread to 0
+    LDR     r1, =rt_interrupt_from_thread
+    MOVS    r0, #0x0
+    STR     r0, [r1]
+
+    ; set interrupt flag to 1
+    LDR     r1, =rt_thread_switch_interrupt_flag
+    MOVS    r0, #1
+    STR     r0, [r1]
+
+    ; set the PendSV exception priority
+    LDR     r0, =NVIC_SHPR3
+    LDR     r1, =NVIC_PENDSV_PRI
+    LDR     r2, [r0,#0x00]       ; read
+    ORRS    r1,r1,r2             ; modify
+    STR     r1, [r0]             ; write-back
+
+    ; trigger the PendSV exception (causes context switch)
+    LDR     r0, =NVIC_INT_CTRL
+    LDR     r1, =NVIC_PENDSVSET
+    STR     r1, [r0]
+    NOP
+
+    ; enable interrupts at processor level
+    CPSIE   I
+
+    ; never reach here!
+
+; compatible with old version
+    EXPORT rt_hw_interrupt_thread_switch
+rt_hw_interrupt_thread_switch:
+    BX      lr
+
+    IMPORT rt_hw_hard_fault_exception
+    EXPORT HardFault_Handler
+HardFault_Handler:
+
+    ; get current context
+    MRS     r0, psp                 ; get fault thread stack pointer
+    PUSH    {lr}
+    BL      rt_hw_hard_fault_exception
+    POP     {pc}
+
+    END
diff --git a/libcpu/arm/cortex-m0/context_rvds.S b/libcpu/arm/cortex-m0/context_rvds.S
index 0b0043760918aef33275b793905f5e4725f8bb7f..29bef74684c94b45cf4e4012ea64095a7c2c3d2c 100644
--- a/libcpu/arm/cortex-m0/context_rvds.S
+++ b/libcpu/arm/cortex-m0/context_rvds.S
@@ -11,6 +11,7 @@
 ; * Date           Author       Notes
 ; * 2010-01-25     Bernard      first version
 ; * 2012-06-01     aozima       set pendsv priority to 0xFF.
+; * 2012-08-17     aozima       fixed bug: store r8 - r11.
 ; */
 
 ;/**
@@ -66,7 +67,7 @@ rt_hw_context_switch    PROC
     LDR     r3, [r2]
     CMP     r3, #1
     BEQ     _reswitch
-    MOVS    r3, #0x1
+    MOVS    r3, #0x01
     STR     r3, [r2]
 
     LDR     r2, =rt_interrupt_from_thread   ; set rt_interrupt_from_thread
@@ -108,17 +109,35 @@ PendSV_Handler    PROC
     BEQ     swtich_to_thread        ; skip register save at the first time
 
     MRS     r1, psp                 ; get from thread stack pointer
-    SUBS    r1, r1, #0x10
+
+    SUBS    r1, r1, #0x20           ; space for {r4 - r7} and {r8 - r11}
     LDR     r0, [r0]
     STR     r1, [r0]                ; update from thread stack pointer
-    STMIA   r1!, {r4 - r7}          ; push r4 - r7 register
+
+    STMIA   r1!, {r4 - r7}          ; push thread {r4 - r7} register to thread stack
+
+    MOV     r4, r8                  ; mov thread {r8 - r11} to {r4 - r7}
+    MOV     r5, r9
+    MOV     r6, r10
+    MOV     r7, r11
+    STMIA   r1!, {r4 - r7}          ; push thread {r8 - r11} high register to thread stack
 
 swtich_to_thread
     LDR     r1, =rt_interrupt_to_thread
     LDR     r1, [r1]
     LDR     r1, [r1]                ; load thread stack pointer
 
-    LDMIA   r1!, {r4 - r7}          ; pop r4 - r7 register
+    LDMIA   r1!, {r4 - r7}          ; pop thread {r4 - r7} register from thread stack
+    PUSH    {r4 - r7}               ; push {r4 - r7} to MSP for copy {r8 - r11}
+
+    LDMIA   r1!, {r4 - r7}          ; pop thread {r8 - r11} high register from thread stack to {r4 - r7}
+    MOV     r8,  r4                 ; mov {r4 - r7} to {r8 - r11}
+    MOV     r9,  r5
+    MOV     r10, r6
+    MOV     r11, r7
+
+    POP     {r4 - r7}               ; pop {r4 - r7} from MSP
+
     MSR     psp, r1                 ; update stack pointer
 
 pendsv_exit
@@ -126,7 +145,7 @@ pendsv_exit
     MSR     PRIMASK, r2
 
     MOVS    r0, #0x04
-    RSBS    r0, #0
+    RSBS    r0, r0, #0x00
     BX      r0
     ENDP
 
diff --git a/libcpu/arm/cortex-m0/cpuport.c b/libcpu/arm/cortex-m0/cpuport.c
index 61f77c6df9ff366d254e0b89183de7d156f23b03..2000b1175dc01b154e912432fb47a2b769268c83 100644
--- a/libcpu/arm/cortex-m0/cpuport.c
+++ b/libcpu/arm/cortex-m0/cpuport.c
@@ -11,11 +11,12 @@
  * Date           Author       Notes
  * 2010-01-25     Bernard      first version
  * 2012-05-31     aozima       Merge all of the C source code into cpuport.c
+ * 2012-08-17     aozima       fixed bug: store r8 - r11.
  */
 
 #include <rtthread.h>
 
-struct stack_contex
+struct exception_stack_frame
 {
     rt_uint32_t r0;
     rt_uint32_t r1;
@@ -27,6 +28,23 @@ struct stack_contex
     rt_uint32_t psr;
 };
 
+struct stack_frame
+{
+    /* r4 ~ r7 low register */
+    rt_uint32_t r4;
+    rt_uint32_t r5;
+    rt_uint32_t r6;
+    rt_uint32_t r7;
+
+    /* r8 ~ r11 high register */
+    rt_uint32_t r8;
+    rt_uint32_t r9;
+    rt_uint32_t r10;
+    rt_uint32_t r11;
+	
+    struct exception_stack_frame exception_stack_frame;
+};
+
 /* flag in interrupt handling */
 rt_uint32_t rt_interrupt_from_thread, rt_interrupt_to_thread;
 rt_uint32_t rt_thread_switch_interrupt_flag;
@@ -44,29 +62,37 @@ rt_uint32_t rt_thread_switch_interrupt_flag;
 rt_uint8_t *rt_hw_stack_init(void *tentry, void *parameter,
                              rt_uint8_t *stack_addr, void *texit)
 {
-    unsigned long *stk;
+    struct stack_frame * stack_frame;
+    rt_uint8_t * stk;
+    unsigned long i;
+
+    stk = stack_addr + sizeof(rt_uint32_t);
+
+    stk -= sizeof(struct stack_frame);
+    stack_frame = (struct stack_frame *)stk;
+	
+    /* init all register */
+    for(i=0; i<sizeof(struct stack_frame)/sizeof(rt_uint32_t); i++)
+    {
+        ((rt_uint32_t*)stack_frame)[i] = 0xdeadbeef;
+    }
 
-    stk 	 = (unsigned long *)stack_addr;
-    *(stk)   = 0x01000000L;					/* PSR */
-    *(--stk) = (unsigned long)tentry;		/* entry point, pc */
-    *(--stk) = (unsigned long)texit;		/* lr */
-    *(--stk) = 0;							/* r12 */
-    *(--stk) = 0;							/* r3 */
-    *(--stk) = 0;							/* r2 */
-    *(--stk) = 0;							/* r1 */
-    *(--stk) = (unsigned long)parameter;	/* r0 : argument */
-    *(--stk) = 0;							/* r7 */
-    *(--stk) = 0;							/* r6 */
-    *(--stk) = 0;							/* r5 */
-    *(--stk) = 0;							/* r4 */
+    stack_frame->exception_stack_frame.r0  = (unsigned long)parameter; /* r0 : argument */
+    stack_frame->exception_stack_frame.r1  = 0;                        /* r1 */
+    stack_frame->exception_stack_frame.r2  = 0;                        /* r2 */
+    stack_frame->exception_stack_frame.r3  = 0;                        /* r3 */
+    stack_frame->exception_stack_frame.r12 = 0;                        /* r12 */
+    stack_frame->exception_stack_frame.lr  = (unsigned long)texit;     /* lr */
+    stack_frame->exception_stack_frame.pc  = (unsigned long)tentry;    /* entry point, pc */
+    stack_frame->exception_stack_frame.psr = 0x01000000L;              /* PSR */
 
     /* return task's current stack address */
-    return (rt_uint8_t *)stk;
+    return stk;
 }
 
 extern long list_thread(void);
 extern rt_thread_t rt_current_thread;
-void rt_hw_hard_fault_exception(struct stack_contex* contex)
+void rt_hw_hard_fault_exception(struct exception_stack_frame* contex)
 {
     rt_kprintf("psr: 0x%08x\n", contex->psr);
     rt_kprintf(" pc: 0x%08x\n", contex->pc);