diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 0d5d8244ccad617fed999285b06b3f62745696e9..07e48c0c29b9a6edfc61858b9c5be202191f872d 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -184,7 +184,10 @@ enum { CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ CC_OP_EFLAGS, /* all cc are explicitely computed, CC_SRC = flags */ - CC_OP_MUL, /* modify all flags, C, O = (CC_SRC != 0) */ + + CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */ + CC_OP_MULW, + CC_OP_MULL, CC_OP_ADDB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ CC_OP_ADDW, diff --git a/target-i386/op.c b/target-i386/op.c index 8988e061cb739d7348228de4d837844cf4331813..5423be5a3a6f7dd0989d29fe1c4658e62cfee641 100644 --- a/target-i386/op.c +++ b/target-i386/op.c @@ -169,11 +169,16 @@ void OPPROTO op_bswapl_T0(void) } /* multiply/divide */ + +/* XXX: add eflags optimizations */ +/* XXX: add non P4 style flags */ + void OPPROTO op_mulb_AL_T0(void) { unsigned int res; res = (uint8_t)EAX * (uint8_t)T0; EAX = (EAX & 0xffff0000) | res; + CC_DST = res; CC_SRC = (res & 0xff00); } @@ -182,6 +187,7 @@ void OPPROTO op_imulb_AL_T0(void) int res; res = (int8_t)EAX * (int8_t)T0; EAX = (EAX & 0xffff0000) | (res & 0xffff); + CC_DST = res; CC_SRC = (res != (int8_t)res); } @@ -191,6 +197,7 @@ void OPPROTO op_mulw_AX_T0(void) res = (uint16_t)EAX * (uint16_t)T0; EAX = (EAX & 0xffff0000) | (res & 0xffff); EDX = (EDX & 0xffff0000) | ((res >> 16) & 0xffff); + CC_DST = res; CC_SRC = res >> 16; } @@ -200,6 +207,7 @@ void OPPROTO op_imulw_AX_T0(void) res = (int16_t)EAX * (int16_t)T0; EAX = (EAX & 0xffff0000) | (res & 0xffff); EDX = (EDX & 0xffff0000) | ((res >> 16) & 0xffff); + CC_DST = res; CC_SRC = (res != (int16_t)res); } @@ -209,6 +217,7 @@ void OPPROTO op_mull_EAX_T0(void) res = (uint64_t)((uint32_t)EAX) * (uint64_t)((uint32_t)T0); EAX = res; EDX = res >> 32; + CC_DST = res; CC_SRC = res >> 32; } @@ -218,6 +227,7 @@ void OPPROTO op_imull_EAX_T0(void) res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T0); EAX = res; EDX = res >> 32; + CC_DST = res; CC_SRC = (res != (int32_t)res); } @@ -226,6 +236,7 @@ void OPPROTO op_imulw_T0_T1(void) int res; res = (int16_t)T0 * (int16_t)T1; T0 = res; + CC_DST = res; CC_SRC = (res != (int16_t)res); } @@ -234,6 +245,7 @@ void OPPROTO op_imull_T0_T1(void) int64_t res; res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1); T0 = res; + CC_DST = res; CC_SRC = (res != (int32_t)res); } @@ -1293,31 +1305,14 @@ static int compute_c_eflags(void) return CC_SRC & CC_C; } -static int compute_c_mul(void) -{ - int cf; - cf = (CC_SRC != 0); - return cf; -} - -static int compute_all_mul(void) -{ - int cf, pf, af, zf, sf, of; - cf = (CC_SRC != 0); - pf = 0; /* undefined */ - af = 0; /* undefined */ - zf = 0; /* undefined */ - sf = 0; /* undefined */ - of = cf << 11; - return cf | pf | af | zf | sf | of; -} - CCTable cc_table[CC_OP_NB] = { [CC_OP_DYNAMIC] = { /* should never happen */ }, [CC_OP_EFLAGS] = { compute_all_eflags, compute_c_eflags }, - [CC_OP_MUL] = { compute_all_mul, compute_c_mul }, + [CC_OP_MULB] = { compute_all_mulb, compute_c_mull }, + [CC_OP_MULW] = { compute_all_mulw, compute_c_mull }, + [CC_OP_MULL] = { compute_all_mull, compute_c_mull }, [CC_OP_ADDB] = { compute_all_addb, compute_c_addb }, [CC_OP_ADDW] = { compute_all_addw, compute_c_addw }, diff --git a/target-i386/ops_template.h b/target-i386/ops_template.h index 064881558baf7963554fc9229822714e3164681c..a486d20818e27777829fc6c4e0d7eaae2b8f5dad 100644 --- a/target-i386/ops_template.h +++ b/target-i386/ops_template.h @@ -229,6 +229,29 @@ static int glue(compute_all_sar, SUFFIX)(void) return cf | pf | af | zf | sf | of; } +#if DATA_BITS == 32 +static int glue(compute_c_mul, SUFFIX)(void) +{ + int cf; + cf = (CC_SRC != 0); + return cf; +} +#endif + +/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and + CF are modified and it is slower to do that. */ +static int glue(compute_all_mul, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + cf = (CC_SRC != 0); + pf = parity_table[(uint8_t)CC_DST]; + af = 0; /* undefined */ + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = cf << 11; + return cf | pf | af | zf | sf | of; +} + /* various optimized jumps cases */ void OPPROTO glue(op_jb_sub, SUFFIX)(void) diff --git a/target-i386/translate.c b/target-i386/translate.c index 9c5c52e4b2370327ba3b70f69fec97f53d40dfb4..35067bff62a06eadb1f1bac34d8941695c71192e 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -2016,31 +2016,35 @@ static uint8_t *disas_insn(DisasContext *s, uint8_t *pc_start) switch(ot) { case OT_BYTE: gen_op_mulb_AL_T0(); + s->cc_op = CC_OP_MULB; break; case OT_WORD: gen_op_mulw_AX_T0(); + s->cc_op = CC_OP_MULW; break; default: case OT_LONG: gen_op_mull_EAX_T0(); + s->cc_op = CC_OP_MULL; break; } - s->cc_op = CC_OP_MUL; break; case 5: /* imul */ switch(ot) { case OT_BYTE: gen_op_imulb_AL_T0(); + s->cc_op = CC_OP_MULB; break; case OT_WORD: gen_op_imulw_AX_T0(); + s->cc_op = CC_OP_MULW; break; default: case OT_LONG: gen_op_imull_EAX_T0(); + s->cc_op = CC_OP_MULL; break; } - s->cc_op = CC_OP_MUL; break; case 6: /* div */ switch(ot) { @@ -2235,7 +2239,7 @@ static uint8_t *disas_insn(DisasContext *s, uint8_t *pc_start) gen_op_imulw_T0_T1(); } gen_op_mov_reg_T0[ot][reg](); - s->cc_op = CC_OP_MUL; + s->cc_op = CC_OP_MULB + ot; break; case 0x1c0: case 0x1c1: /* xadd Ev, Gv */