提交 a71a29de 编写于 作者: Y Yoshinori Sato

h8300: library functions

Signed-off-by: NYoshinori Sato <ysato@users.sourceforge.jp>
上级 bbeb79ac
#
# Makefile for H8/300-specific library files..
#
lib-y = memcpy.o memset.o abs.o strncpy.o \
mulsi3.o udivsi3.o muldi3.o moddivsi3.o \
ashldi3.o lshrdi3.o ashrdi3.o ucmpdi2.o \
delay.o
;;; abs.S
#include <asm/linkage.h>
#if defined(CONFIG_CPU_H8300H)
.h8300h
#endif
#if defined(CONFIG_CPU_H8S)
.h8300s
#endif
.text
.global _abs
;;; int abs(int n)
_abs:
mov.l er0,er0
bpl 1f
neg.l er0
1:
rts
#include "libgcc.h"
DWtype
__ashldi3(DWtype u, word_type b)
{
const DWunion uu = {.ll = u};
const word_type bm = (sizeof (Wtype) * BITS_PER_UNIT) - b;
DWunion w;
if (b == 0)
return u;
if (bm <= 0) {
w.s.low = 0;
w.s.high = (UWtype) uu.s.low << -bm;
} else {
const UWtype carries = (UWtype) uu.s.low >> bm;
w.s.low = (UWtype) uu.s.low << b;
w.s.high = ((UWtype) uu.s.high << b) | carries;
}
return w.ll;
}
#include "libgcc.h"
DWtype __ashrdi3(DWtype u, word_type b)
{
const DWunion uu = {.ll = u};
const word_type bm = (sizeof (Wtype) * BITS_PER_UNIT) - b;
DWunion w;
if (b == 0)
return u;
if (bm <= 0) {
/* w.s.high = 1..1 or 0..0 */
w.s.high = uu.s.high >> (sizeof (Wtype) * BITS_PER_UNIT - 1);
w.s.low = uu.s.high >> -bm;
} else {
const UWtype carries = (UWtype) uu.s.high << bm;
w.s.high = uu.s.high >> b;
w.s.low = ((UWtype) uu.s.low >> b) | carries;
}
return w.ll;
}
/*
* delay loops
*
* Copyright (C) 2015 Yoshinori Sato
*/
#include <linux/module.h>
#include <linux/delay.h>
#include <asm/param.h>
#include <asm/processor.h>
#include <asm/timex.h>
void __delay(unsigned long cycles)
{
__asm__ volatile ("1: dec.l #1,%0\n\t"
"bne 1b":"=r"(cycles):"0"(cycles));
}
EXPORT_SYMBOL(__delay);
void __const_udelay(unsigned long xloops)
{
u64 loops;
loops = (u64)xloops * loops_per_jiffy * HZ;
__delay(loops >> 32);
}
EXPORT_SYMBOL(__const_udelay);
void __udelay(unsigned long usecs)
{
__const_udelay(usecs * 0x10C7UL); /* 2**32 / 1000000 (rounded up) */
}
EXPORT_SYMBOL(__udelay);
void __ndelay(unsigned long nsecs)
{
__const_udelay(nsecs * 0x5UL); /* 2**32 / 1000000000 (rounded up) */
}
EXPORT_SYMBOL(__ndelay);
#ifndef __H8300_LIBGCC_H__
#define __H8300_LIBGCC_H__
#ifdef __ASSEMBLY__
#define A0 r0
#define A0L r0l
#define A0H r0h
#define A1 r1
#define A1L r1l
#define A1H r1h
#define A2 r2
#define A2L r2l
#define A2H r2h
#define A3 r3
#define A3L r3l
#define A3H r3h
#define S0 r4
#define S0L r4l
#define S0H r4h
#define S1 r5
#define S1L r5l
#define S1H r5h
#define S2 r6
#define S2L r6l
#define S2H r6h
#define PUSHP push.l
#define POPP pop.l
#define A0P er0
#define A1P er1
#define A2P er2
#define A3P er3
#define S0P er4
#define S1P er5
#define S2P er6
#define A0E e0
#define A1E e1
#define A2E e2
#define A3E e3
#else
#define Wtype SItype
#define UWtype USItype
#define HWtype SItype
#define UHWtype USItype
#define DWtype DItype
#define UDWtype UDItype
#define UWtype USItype
#define Wtype SItype
#define UWtype USItype
#define W_TYPE_SIZE (4 * BITS_PER_UNIT)
#define BITS_PER_UNIT (8)
typedef int SItype __attribute__ ((mode (SI)));
typedef unsigned int USItype __attribute__ ((mode (SI)));
typedef int DItype __attribute__ ((mode (DI)));
typedef unsigned int UDItype __attribute__ ((mode (DI)));
struct DWstruct {
Wtype high, low;
};
typedef union {
struct DWstruct s;
DWtype ll;
} DWunion;
typedef int word_type __attribute__ ((mode (__word__)));
#endif
#endif
#include "libgcc.h"
DWtype __lshrdi3(DWtype u, word_type b)
{
const DWunion uu = {.ll = u};
const word_type bm = (sizeof (Wtype) * BITS_PER_UNIT) - b;
DWunion w;
if (b == 0)
return u;
if (bm <= 0) {
w.s.high = 0;
w.s.low = (UWtype) uu.s.high >> -bm;
} else {
const UWtype carries = (UWtype) uu.s.high << bm;
w.s.high = (UWtype) uu.s.high >> b;
w.s.low = ((UWtype) uu.s.low >> b) | carries;
}
return w.ll;
}
;;; memcpy.S
#include <asm/linkage.h>
#if defined(CONFIG_CPU_H8300H)
.h8300h
#endif
#if defined(CONFIG_CPU_H8S)
.h8300s
#endif
.text
.global memcpy
;;; void *memcpy(void *to, void *from, size_t n)
memcpy:
mov.l er2,er2
bne 1f
rts
1:
;; address check
bld #0,r0l
bxor #0,r1l
bcs 4f
mov.l er4,@-sp
mov.l er0,@-sp
btst #0,r0l
beq 1f
;; (aligned even) odd address
mov.b @er1,r3l
mov.b r3l,@er0
adds #1,er1
adds #1,er0
dec.l #1,er2
beq 3f
1:
;; n < sizeof(unsigned long) check
sub.l er4,er4
adds #4,er4 ; loop count check value
cmp.l er4,er2
blo 2f
;; unsigned long copy
1:
mov.l @er1,er3
mov.l er3,@er0
adds #4,er0
adds #4,er1
subs #4,er2
cmp.l er4,er2
bcc 1b
;; rest
2:
mov.l er2,er2
beq 3f
1:
mov.b @er1,r3l
mov.b r3l,@er0
adds #1,er1
adds #1,er0
dec.l #1,er2
bne 1b
3:
mov.l @sp+,er0
mov.l @sp+,er4
rts
;; odd <- even / even <- odd
4:
mov.l er4,er3
mov.l er2,er4
mov.l er5,er2
mov.l er1,er5
mov.l er6,er1
mov.l er0,er6
1:
eepmov.w
mov.w r4,r4
bne 1b
dec.w #1,e4
bpl 1b
mov.l er1,er6
mov.l er2,er5
mov.l er3,er4
rts
.end
/* memset.S */
#include <asm/linkage.h>
#if defined(CONFIG_CPU_H8300H)
.h8300h
#endif
#if defined(CONFIG_CPU_H8S)
.h8300s
#endif
.text
.global memset
.global clear_user
;;void *memset(*ptr, int c, size_t count)
;; ptr = er0
;; c = er1(r1l)
;; count = er2
memset:
btst #0,r0l
beq 2f
;; odd address
1:
mov.b r1l,@er0
adds #1,er0
dec.l #1,er2
beq 6f
;; even address
2:
mov.l er2,er3
cmp.l #4,er2
blo 4f
;; count>=4 -> count/4
#if defined(CONFIG_CPU_H8300H)
shlr.l er2
shlr.l er2
#endif
#if defined(CONFIG_CPU_H8S)
shlr.l #2,er2
#endif
;; byte -> long
mov.b r1l,r1h
mov.w r1,e1
3:
mov.l er1,@er0
adds #4,er0
dec.l #1,er2
bne 3b
4:
;; count % 4
and.b #3,r3l
beq 6f
5:
mov.b r1l,@er0
adds #1,er0
dec.b r3l
bne 5b
6:
rts
clear_user:
mov.l er1, er2
sub.l er1, er1
bra memset
.end
#include "libgcc.h"
; numerator in A0/A1
; denominator in A2/A3
.global __modsi3
__modsi3:
PUSHP S2P
bsr modnorm
bsr __divsi3
mov.l er3,er0
bra exitdiv
.global __umodsi3
__umodsi3:
bsr __udivsi3:16
mov.l er3,er0
rts
.global __divsi3
__divsi3:
PUSHP S2P
bsr divnorm
bsr __udivsi3:16
; examine what the sign should be
exitdiv:
btst #3,S2L
beq reti
; should be -ve
neg.l A0P
reti:
POPP S2P
rts
divnorm:
mov.l A0P,A0P ; is the numerator -ve
stc ccr,S2L ; keep the sign in bit 3 of S2L
bge postive
neg.l A0P ; negate arg
postive:
mov.l A1P,A1P ; is the denominator -ve
bge postive2
neg.l A1P ; negate arg
xor.b #0x08,S2L ; toggle the result sign
postive2:
rts
;; Basically the same, except that the sign of the divisor determines
;; the sign.
modnorm:
mov.l A0P,A0P ; is the numerator -ve
stc ccr,S2L ; keep the sign in bit 3 of S2L
bge mpostive
neg.l A0P ; negate arg
mpostive:
mov.l A1P,A1P ; is the denominator -ve
bge mpostive2
neg.l A1P ; negate arg
mpostive2:
rts
.end
#include "libgcc.h"
; numerator in A0/A1
; denominator in A2/A3
.global __modsi3
__modsi3:
PUSHP S2P
bsr modnorm
bsr __divsi3
mov.l er3,er0
bra exitdiv
.global __umodsi3
__umodsi3:
bsr __udivsi3
mov.l er3,er0
rts
.global __divsi3
__divsi3:
PUSHP S2P
jsr divnorm
bsr __udivsi3
; examine what the sign should be
exitdiv:
btst #3,S2L
beq reti
; should be -ve
neg.l A0P
reti:
POPP S2P
rts
divnorm:
mov.l A0P,A0P ; is the numerator -ve
stc ccr,S2L ; keep the sign in bit 3 of S2L
bge postive
neg.l A0P ; negate arg
postive:
mov.l A1P,A1P ; is the denominator -ve
bge postive2
neg.l A1P ; negate arg
xor.b #0x08,S2L ; toggle the result sign
postive2:
rts
;; Basically the same, except that the sign of the divisor determines
;; the sign.
modnorm:
mov.l A0P,A0P ; is the numerator -ve
stc ccr,S2L ; keep the sign in bit 3 of S2L
bge mpostive
neg.l A0P ; negate arg
mpostive:
mov.l A1P,A1P ; is the denominator -ve
bge mpostive2
neg.l A1P ; negate arg
mpostive2:
rts
.end
#include "libgcc.h"
#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
#define umul_ppmm(w1, w0, u, v) \
do { \
UWtype __x0, __x1, __x2, __x3; \
UHWtype __ul, __vl, __uh, __vh; \
__ul = __ll_lowpart(u); \
__uh = __ll_highpart(u); \
__vl = __ll_lowpart(v); \
__vh = __ll_highpart(v); \
__x0 = (UWtype) __ul * __vl; \
__x1 = (UWtype) __ul * __vh; \
__x2 = (UWtype) __uh * __vl; \
__x3 = (UWtype) __uh * __vh; \
__x1 += __ll_highpart(__x0); \
__x1 += __x2; \
if (__x1 < __x2) \
__x3 += __ll_B; \
(w1) = __x3 + __ll_highpart(__x1); \
(w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0); \
} while (0)
#define __umulsidi3(u, v) ( \
{ \
DWunion __w; \
umul_ppmm(__w.s.high, __w.s.low, u, v); \
__w.ll; } \
)
DWtype __muldi3(DWtype u, DWtype v)
{
const DWunion uu = {.ll = u};
const DWunion vv = {.ll = v};
DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)};
w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high
+ (UWtype) uu.s.high * (UWtype) vv.s.low);
return w.ll;
}
;
; mulsi3 for H8/300H - based on Renesas SH implementation
;
; by Toshiyasu Morita
;
; Old code:
;
; 16b * 16b = 372 states (worst case)
; 32b * 32b = 724 states (worst case)
;
; New code:
;
; 16b * 16b = 48 states
; 16b * 32b = 72 states
; 32b * 32b = 92 states
;
.global __mulsi3
__mulsi3:
mov.w r1,r2 ; ( 2 states) b * d
mulxu r0,er2 ; (22 states)
mov.w e0,r3 ; ( 2 states) a * d
beq L_skip1 ; ( 4 states)
mulxu r1,er3 ; (22 states)
add.w r3,e2 ; ( 2 states)
L_skip1:
mov.w e1,r3 ; ( 2 states) c * b
beq L_skip2 ; ( 4 states)
mulxu r0,er3 ; (22 states)
add.w r3,e2 ; ( 2 states)
L_skip2:
mov.l er2,er0 ; ( 2 states)
rts ; (10 states)
.end
;;; strncpy.S
#include <asm/linkage.h>
.text
.global strncpy_from_user
;;; long strncpy_from_user(void *to, void *from, size_t n)
strncpy_from_user:
mov.l er2,er2
bne 1f
sub.l er0,er0
rts
1:
mov.l er4,@-sp
sub.l er3,er3
2:
mov.b @er1+,r4l
mov.b r4l,@er0
adds #1,er0
beq 3f
inc.l #1,er3
dec.l #1,er2
bne 2b
3:
dec.l #1,er2
4:
mov.b r4l,@er0
adds #1,er0
dec.l #1,er2
bne 4b
mov.l er3,er0
mov.l @sp+,er4
rts
#include "libgcc.h"
word_type __ucmpdi2(DWtype a, DWtype b)
{
const DWunion au = {.ll = a};
const DWunion bu = {.ll = b};
if ((UWtype) au.s.high < (UWtype) bu.s.high)
return 0;
else if ((UWtype) au.s.high > (UWtype) bu.s.high)
return 2;
if ((UWtype) au.s.low < (UWtype) bu.s.low)
return 0;
else if ((UWtype) au.s.low > (UWtype) bu.s.low)
return 2;
return 1;
}
#include "libgcc.h"
;; This function also computes the remainder and stores it in er3.
.global __udivsi3
__udivsi3:
mov.w A1E,A1E ; denominator top word 0?
bne DenHighNonZero
; do it the easy way, see page 107 in manual
mov.w A0E,A2
extu.l A2P
divxu.w A1,A2P
mov.w A2E,A0E
divxu.w A1,A0P
mov.w A0E,A3
mov.w A2,A0E
extu.l A3P
rts
; er0 = er0 / er1
; er3 = er0 % er1
; trashes er1 er2
; expects er1 >= 2^16
DenHighNonZero:
mov.l er0,er3
mov.l er1,er2
#ifdef CONFIG_CPU_H8300H
divmod_L21:
shlr.l er0
shlr.l er2 ; make divisor < 2^16
mov.w e2,e2
bne divmod_L21
#else
shlr.l #2,er2 ; make divisor < 2^16
mov.w e2,e2
beq divmod_L22A
divmod_L21:
shlr.l #2,er0
divmod_L22:
shlr.l #2,er2 ; make divisor < 2^16
mov.w e2,e2
bne divmod_L21
divmod_L22A:
rotxl.w r2
bcs divmod_L23
shlr.l er0
bra divmod_L24
divmod_L23:
rotxr.w r2
shlr.l #2,er0
divmod_L24:
#endif
;; At this point,
;; er0 contains shifted dividend
;; er1 contains divisor
;; er2 contains shifted divisor
;; er3 contains dividend, later remainder
divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ)
extu.l er0
beq divmod_L25
subs #1,er0 ; er0 = AQ - 1
mov.w e1,r2
mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor
sub.w r2,e3 ; dividend - 65536 * er2
mov.w r1,r2
mulxu.w r0,er2 ; compute er3 = remainder (tentative)
sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor
divmod_L25:
cmp.l er1,er3 ; is divisor < remainder?
blo divmod_L26
adds #1,er0
sub.l er1,er3 ; correct the remainder
divmod_L26:
rts
.end
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册