32x, optimize poll detection and sh2 drc, fix constant memory address access calculation

This commit is contained in:
root
2021-06-21 08:42:45 +02:00
parent fa5f425ee0
commit 5986ec32c8
2 changed files with 10 additions and 9 deletions

View File

@@ -2612,8 +2612,8 @@ static uptr split_address(uptr la, uptr mask, s32 *offs)
uptr sign = (mask>>1) + 1; // sign bit in offset
*offs = (la & mask) | (la & sign ? ~mask : 0); // offset part, sign extended
la = (la & ~mask) + ((la & sign) << 1); // base part, corrected for offs sign
if (~mask && la == ~mask && !(*offs & sign)) { // special case la=-1 & offs>0
*offs = -*offs;
if (~mask && la == ~mask && *offs > 0) { // special case la=-1&~mask && offs>0
*offs -= mask+1;
la = 0;
}
return la;
@@ -2676,8 +2676,9 @@ static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmode, s32 *offs)
// known fixed host address
la = split_address(la + ((a + *offs) & mask), omask, offs);
if (la == 0) {
la = *offs;
*offs = 0;
// offset only. optimize for hosts having short indexed addressing
la = *offs & ~0x7f; // keep the lower bits for endianess handling
*offs &= 0x7f;
}
hr = rcache_get_tmp();
emith_move_r_ptr_imm(hr, la);

View File

@@ -119,7 +119,7 @@ void NOINLINE p32x_sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt)
// reading 2 consecutive 16bit values is probably a 32bit access. detect this
// by checking address (max 2 bytes away) and cycles (max 2 cycles later).
// no polling if more than 20 cycles have passed since last detect call.
if (a - sh2->poll_addr <= 2 && CYCLES_GE(20, cycles_diff)) {
if (a - sh2->poll_addr <= 2 && CYCLES_GE(10, cycles_diff)) {
if (CYCLES_GT(cycles_diff, 2) && ++sh2->poll_cnt >= maxcnt) {
if (!(sh2->state & flags))
elprintf_sh2(sh2, EL_32X, "state: %02x->%02x",
@@ -736,7 +736,7 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2)
return (r[0] & P32XS_FM) | Pico32x.sh2_regs[0]
| Pico32x.sh2irq_mask[sh2->is_slave];
case 0x04/2: // H count (often as comm too)
p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9);
p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 5);
cycles = sh2_cycles_done_m68k(sh2);
sh2s_sync_on_read(sh2, cycles);
return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], cycles, sh2);
@@ -770,7 +770,7 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2)
case 0x2a/2:
case 0x2c/2:
case 0x2e/2:
p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9);
p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 5);
cycles = sh2_cycles_done_m68k(sh2);
sh2s_sync_on_read(sh2, cycles);
return sh2_poll_read(a, r[a / 2], cycles, sh2);
@@ -1457,7 +1457,7 @@ static u32 REGPARM(2) sh2_read8_cs0(u32 a, SH2 *sh2)
if ((a & 0x3fff0) == 0x4100) {
d = p32x_vdp_read16(a);
p32x_sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9);
p32x_sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 7);
goto out_16to8;
}
@@ -1520,7 +1520,7 @@ static u32 REGPARM(2) sh2_read16_cs0(u32 a, SH2 *sh2)
if ((a & 0x3fff0) == 0x4100) {
d = p32x_vdp_read16(a);
p32x_sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9);
p32x_sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 7);
goto out;
}