diff --git a/arch/sh/lib/delay.c b/arch/sh/lib/delay.c index faa8f86c0db490718d5dbb2c6948f11d3fff28d1..0901b2f14e15e9b2b396e923641de5c2a28eb90d 100644 --- a/arch/sh/lib/delay.c +++ b/arch/sh/lib/delay.c @@ -10,6 +10,16 @@ void __delay(unsigned long loops) { __asm__ __volatile__( + /* + * ST40-300 appears to have an issue with this code, + * normally taking two cycles each loop, as with all + * other SH variants. If however the branch and the + * delay slot straddle an 8 byte boundary, this increases + * to 3 cycles. + * This align directive ensures this doesn't occur. + */ + ".balign 8\n\t" + "tst %0, %0\n\t" "1:\t" "bf/s 1b\n\t"