From e008a4cf592cc727f96db1da08d4c3b39d43b6cc Mon Sep 17 00:00:00 2001 From: root Date: Tue, 29 Jun 2021 11:16:28 +0200 Subject: [PATCH] Last irixxxx's optimizations --- cpu/cz80/cz80.c | 3 ++- cpu/sh2/compiler.c | 16 ++++++++++------ cpu/sh2/mame/sh2.c | 1 + cpu/sh2/mame/sh2pico.c | 6 ++++-- cpu/sh2/sh2.h | 3 +++ pico/32x/memory.c | 30 +++++++++++++++++------------- pico/pico_int.h | 4 ++++ platform/common/emu.c | 5 +++-- 8 files changed, 44 insertions(+), 24 deletions(-) diff --git a/cpu/cz80/cz80.c b/cpu/cz80/cz80.c index 7f432bda..82de13f8 100644 --- a/cpu/cz80/cz80.c +++ b/cpu/cz80/cz80.c @@ -211,7 +211,8 @@ void Cz80_Init(cz80_struc *CPU) void Cz80_Reset(cz80_struc *CPU) { - memset(CPU, 0, (FPTR)&CPU->BasePC - (FPTR)CPU); + // I, R, CPU and interrupts logic is reset, registers are untouched + memset(&CPU->R, 0, (FPTR)&CPU->BasePC - (FPTR)&CPU->R); Cz80_Set_Reg(CPU, CZ80_PC, 0); } diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 08b2c80f..bb2206c1 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -68,7 +68,6 @@ // 200 - compare trace // 400 - block entry backtrace on exit // 800 - state dump on exit -// { #ifndef DRC_DEBUG #define DRC_DEBUG 0//x847 #endif @@ -218,7 +217,7 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) if (block != NULL) { #if defined PDB dbg(8, "= %csh2 enter %08x %p, c=%d", sh2->is_slave?'s':'m', - sh2->pc, block, (signed int)sr >> 12); + sh2->pc, block, ((signed int)sr >> 12)+1); pdb_step(sh2, sh2->pc); #elif (DRC_DEBUG & 8) if (lastpc != sh2->pc) { @@ -2612,10 +2611,14 @@ static uptr split_address(uptr la, uptr mask, s32 *offs) uptr sign = (mask>>1) + 1; // sign bit in offset *offs = (la & mask) | (la & sign ? ~mask : 0); // offset part, sign extended la = (la & ~mask) + ((la & sign) << 1); // base part, corrected for offs sign - if (~mask && la == ~mask && *offs > 0) { // special case la=-1&~mask && offs>0 - *offs -= mask+1; - la = 0; +#ifdef __arm__ + // arm32 offset has an add/sub flag and an unsigned 8 bit value, which only + // allows values of [-255...255]. the value -256 thus can't be used. + if (*offs + sign == 0) { + la += sign; + *offs += sign; } +#endif return la; } @@ -4414,6 +4417,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) EMITH_HINT_COND(DCOND_EQ); emith_subf_r_r_imm(tmp, tmp2, 1); emith_set_t_cond(sr, DCOND_EQ); + emith_or_r_imm(sr, SH2_NO_POLLING); goto end_op; } goto default_; @@ -5013,7 +5017,7 @@ end_op: // can't resolve branch locally, make a block exit bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); if (cond != -1) { -#if 1 +#ifndef __arm__ if (bl && blx_target_count < ARRAY_SIZE(blx_targets)) { // conditional jumps get a blx stub for the far jump bl->type = BL_JCCBLX; diff --git a/cpu/sh2/mame/sh2.c b/cpu/sh2/mame/sh2.c index fa49153a..17b96a31 100644 --- a/cpu/sh2/mame/sh2.c +++ b/cpu/sh2/mame/sh2.c @@ -800,6 +800,7 @@ INLINE void DT(sh2_state *sh2, UINT32 n) sh2->sr |= T; else sh2->sr &= ~T; + sh2->no_polling = SH2_NO_POLLING; #if BUSY_LOOP_HACKS { UINT32 next_opcode = (UINT32)(UINT16)RW( sh2, sh2->ppc & AM ); diff --git a/cpu/sh2/mame/sh2pico.c b/cpu/sh2/mame/sh2pico.c index f4ae85cb..65f4757e 100644 --- a/cpu/sh2/mame/sh2pico.c +++ b/cpu/sh2/mame/sh2pico.c @@ -24,9 +24,10 @@ typedef u8 UINT8; static __inline unsigned int name(SH2 *sh2, unsigned int a) \ { \ unsigned int ret; \ - sh2->sr |= sh2->icount << 12; \ + sh2->sr |= (sh2->icount << 12) | (sh2->no_polling); \ ret = cname(a, sh2); \ sh2->icount = (signed int)sh2->sr >> 12; \ + sh2->no_polling = (sh2->sr & SH2_NO_POLLING); \ sh2->sr &= 0x3f3; \ return ret; \ } @@ -34,9 +35,10 @@ static __inline unsigned int name(SH2 *sh2, unsigned int a) \ #define MAKE_WRITEFUNC(name, cname) \ static __inline void name(SH2 *sh2, unsigned int a, unsigned int d) \ { \ - sh2->sr |= sh2->icount << 12; \ + sh2->sr |= (sh2->icount << 12) | (sh2->no_polling); \ cname(a, d, sh2); \ sh2->icount = (signed int)sh2->sr >> 12; \ + sh2->no_polling = (sh2->sr & SH2_NO_POLLING); \ sh2->sr &= 0x3f3; \ } diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 614e7de1..bb8debe0 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -56,6 +56,9 @@ typedef struct SH2_ uint32_t poll_addr; int poll_cycles; int poll_cnt; +// NB MUST be a bit unused in SH2 SR, see also cpu/sh2/compiler.c! +#define SH2_NO_POLLING (1 << 10) // poll detection control + int no_polling; // DRC branch cache. size must be 2^n and <=128 int rts_cache_idx; diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 0b13d7c7..d49cd2a5 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -120,7 +120,8 @@ void NOINLINE p32x_sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) // by checking address (max 2 bytes away) and cycles (max 2 cycles later). // no polling if more than 20 cycles have passed since last detect call. if (a - sh2->poll_addr <= 2 && CYCLES_GE(20, cycles_diff)) { - if (CYCLES_GT(cycles_diff, 2) && ++sh2->poll_cnt >= maxcnt) { + if (!sh2_not_polling(sh2) && CYCLES_GT(cycles_diff, 2) && + ++sh2->poll_cnt >= maxcnt) { if (!(sh2->state & flags)) elprintf_sh2(sh2, EL_32X, "state: %02x->%02x", sh2->state, sh2->state | flags); @@ -144,6 +145,7 @@ void NOINLINE p32x_sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) sh2->poll_addr = a; } sh2->poll_cycles = cycles_done; + sh2_set_polling(sh2); } void NOINLINE p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles) @@ -511,16 +513,17 @@ static void p32x_reg_write8(u32 a, u32 d) case 0x2d: case 0x2e: case 0x2f: - if (REG8IN16(r, a) != (u8)d) { - unsigned int cycles = SekCyclesDone(); + { unsigned int cycles = SekCyclesDone(); if (CYCLES_GT(cycles - msh2.m68krcycles_done, 64)) p32x_sync_sh2s(cycles); - REG8IN16(r, a) = d; - p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); - p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); - sh2_poll_write(a & ~1, r[a / 2], cycles, NULL); + if (REG8IN16(r, a) != (u8)d) { + REG8IN16(r, a) = d; + p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); + p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); + sh2_poll_write(a & ~1, r[a / 2], cycles, NULL); + } } return; case 0x30: @@ -608,16 +611,17 @@ static void p32x_reg_write16(u32 a, u32 d) case 0x2a/2: case 0x2c/2: case 0x2e/2: - if (r[a / 2] != (u16)d) { - unsigned int cycles = SekCyclesDone(); + { unsigned int cycles = SekCyclesDone(); if (CYCLES_GT(cycles - msh2.m68krcycles_done, 64)) p32x_sync_sh2s(cycles); - r[a / 2] = d; - p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); - p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); - sh2_poll_write(a, (u16)d, cycles, NULL); + if (r[a / 2] != (u16)d) { + r[a / 2] = d; + p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); + p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); + sh2_poll_write(a, (u16)d, cycles, NULL); + } } return; case 0x30/2: // PWM control diff --git a/pico/pico_int.h b/pico/pico_int.h index 18e33418..83e81418 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -224,6 +224,8 @@ extern SH2 sh2s[2]; # define sh2_cycles_left(sh2) (sh2)->icount # define sh2_burn_cycles(sh2, n) (sh2)->icount -= n # define sh2_pc(sh2) (sh2)->ppc +# define sh2_not_polling(sh2) (sh2)->no_polling +# define sh2_set_polling(sh2) (sh2)->no_polling = 0 #else # define sh2_end_run(sh2, after_) do { \ int left_ = ((signed int)(sh2)->sr >> 12) - (after_); \ @@ -235,6 +237,8 @@ extern SH2 sh2s[2]; # define sh2_cycles_left(sh2) ((signed int)(sh2)->sr >> 12) # define sh2_burn_cycles(sh2, n) (sh2)->sr -= ((n) << 12) # define sh2_pc(sh2) (sh2)->pc +# define sh2_not_polling(sh2) ((sh2)->sr & SH2_NO_POLLING) +# define sh2_set_polling(sh2) ((sh2)->sr &= ~SH2_NO_POLLING) #endif #define sh2_cycles_done(sh2) (unsigned)((int)(sh2)->cycles_timeslice - sh2_cycles_left(sh2)) diff --git a/platform/common/emu.c b/platform/common/emu.c index 627374f6..3a5ee5f2 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -1683,6 +1683,7 @@ static void emu_loop_prep(void) /* our tick here is 1 us right now */ #define ms_to_ticks(x) (unsigned int)(x * 1000) #define get_ticks() plat_get_ticks_us() +#define vsync_delay_x3 3*ms_to_ticks(1) void emu_loop(void) { @@ -1860,13 +1861,13 @@ void emu_loop(void) diff = timestamp_aim_x3 - timestamp * 3; // sleep or vsync if we are still too fast - if (diff > target_frametime_x3 && (currentConfig.EmuOpt & EOPT_VSYNC)) { + if (diff > target_frametime_x3 + vsync_delay_x3 && (currentConfig.EmuOpt & EOPT_VSYNC)) { // we are too fast plat_video_wait_vsync(); timestamp = get_ticks(); diff = timestamp * 3 - timestamp_aim_x3; } - if (diff > target_frametime_x3) { + if (diff > target_frametime_x3 + vsync_delay_x3) { // still too fast plat_wait_till_us(timestamp + (diff - target_frametime_x3) / 3); }