Last irixxxx's optimizations

This commit is contained in:
root 2021-06-29 11:16:28 +02:00
parent 5110c84bee
commit e008a4cf59
8 changed files with 44 additions and 24 deletions

View File

@ -211,7 +211,8 @@ void Cz80_Init(cz80_struc *CPU)
void Cz80_Reset(cz80_struc *CPU)
{
memset(CPU, 0, (FPTR)&CPU->BasePC - (FPTR)CPU);
// I, R, CPU and interrupts logic is reset, registers are untouched
memset(&CPU->R, 0, (FPTR)&CPU->BasePC - (FPTR)&CPU->R);
Cz80_Set_Reg(CPU, CZ80_PC, 0);
}

View File

@ -68,7 +68,6 @@
// 200 - compare trace
// 400 - block entry backtrace on exit
// 800 - state dump on exit
// {
#ifndef DRC_DEBUG
#define DRC_DEBUG 0//x847
#endif
@ -218,7 +217,7 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr)
if (block != NULL) {
#if defined PDB
dbg(8, "= %csh2 enter %08x %p, c=%d", sh2->is_slave?'s':'m',
sh2->pc, block, (signed int)sr >> 12);
sh2->pc, block, ((signed int)sr >> 12)+1);
pdb_step(sh2, sh2->pc);
#elif (DRC_DEBUG & 8)
if (lastpc != sh2->pc) {
@ -2612,10 +2611,14 @@ static uptr split_address(uptr la, uptr mask, s32 *offs)
uptr sign = (mask>>1) + 1; // sign bit in offset
*offs = (la & mask) | (la & sign ? ~mask : 0); // offset part, sign extended
la = (la & ~mask) + ((la & sign) << 1); // base part, corrected for offs sign
if (~mask && la == ~mask && *offs > 0) { // special case la=-1&~mask && offs>0
*offs -= mask+1;
la = 0;
#ifdef __arm__
// arm32 offset has an add/sub flag and an unsigned 8 bit value, which only
// allows values of [-255...255]. the value -256 thus can't be used.
if (*offs + sign == 0) {
la += sign;
*offs += sign;
}
#endif
return la;
}
@ -4414,6 +4417,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
EMITH_HINT_COND(DCOND_EQ);
emith_subf_r_r_imm(tmp, tmp2, 1);
emith_set_t_cond(sr, DCOND_EQ);
emith_or_r_imm(sr, SH2_NO_POLLING);
goto end_op;
}
goto default_;
@ -5013,7 +5017,7 @@ end_op:
// can't resolve branch locally, make a block exit
bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id);
if (cond != -1) {
#if 1
#ifndef __arm__
if (bl && blx_target_count < ARRAY_SIZE(blx_targets)) {
// conditional jumps get a blx stub for the far jump
bl->type = BL_JCCBLX;

View File

@ -800,6 +800,7 @@ INLINE void DT(sh2_state *sh2, UINT32 n)
sh2->sr |= T;
else
sh2->sr &= ~T;
sh2->no_polling = SH2_NO_POLLING;
#if BUSY_LOOP_HACKS
{
UINT32 next_opcode = (UINT32)(UINT16)RW( sh2, sh2->ppc & AM );

View File

@ -24,9 +24,10 @@ typedef u8 UINT8;
static __inline unsigned int name(SH2 *sh2, unsigned int a) \
{ \
unsigned int ret; \
sh2->sr |= sh2->icount << 12; \
sh2->sr |= (sh2->icount << 12) | (sh2->no_polling); \
ret = cname(a, sh2); \
sh2->icount = (signed int)sh2->sr >> 12; \
sh2->no_polling = (sh2->sr & SH2_NO_POLLING); \
sh2->sr &= 0x3f3; \
return ret; \
}
@ -34,9 +35,10 @@ static __inline unsigned int name(SH2 *sh2, unsigned int a) \
#define MAKE_WRITEFUNC(name, cname) \
static __inline void name(SH2 *sh2, unsigned int a, unsigned int d) \
{ \
sh2->sr |= sh2->icount << 12; \
sh2->sr |= (sh2->icount << 12) | (sh2->no_polling); \
cname(a, d, sh2); \
sh2->icount = (signed int)sh2->sr >> 12; \
sh2->no_polling = (sh2->sr & SH2_NO_POLLING); \
sh2->sr &= 0x3f3; \
}

View File

@ -56,6 +56,9 @@ typedef struct SH2_
uint32_t poll_addr;
int poll_cycles;
int poll_cnt;
// NB MUST be a bit unused in SH2 SR, see also cpu/sh2/compiler.c!
#define SH2_NO_POLLING (1 << 10) // poll detection control
int no_polling;
// DRC branch cache. size must be 2^n and <=128
int rts_cache_idx;

View File

@ -120,7 +120,8 @@ void NOINLINE p32x_sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt)
// by checking address (max 2 bytes away) and cycles (max 2 cycles later).
// no polling if more than 20 cycles have passed since last detect call.
if (a - sh2->poll_addr <= 2 && CYCLES_GE(20, cycles_diff)) {
if (CYCLES_GT(cycles_diff, 2) && ++sh2->poll_cnt >= maxcnt) {
if (!sh2_not_polling(sh2) && CYCLES_GT(cycles_diff, 2) &&
++sh2->poll_cnt >= maxcnt) {
if (!(sh2->state & flags))
elprintf_sh2(sh2, EL_32X, "state: %02x->%02x",
sh2->state, sh2->state | flags);
@ -144,6 +145,7 @@ void NOINLINE p32x_sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt)
sh2->poll_addr = a;
}
sh2->poll_cycles = cycles_done;
sh2_set_polling(sh2);
}
void NOINLINE p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles)
@ -511,16 +513,17 @@ static void p32x_reg_write8(u32 a, u32 d)
case 0x2d:
case 0x2e:
case 0x2f:
if (REG8IN16(r, a) != (u8)d) {
unsigned int cycles = SekCyclesDone();
{ unsigned int cycles = SekCyclesDone();
if (CYCLES_GT(cycles - msh2.m68krcycles_done, 64))
p32x_sync_sh2s(cycles);
REG8IN16(r, a) = d;
p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles);
p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles);
sh2_poll_write(a & ~1, r[a / 2], cycles, NULL);
if (REG8IN16(r, a) != (u8)d) {
REG8IN16(r, a) = d;
p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles);
p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles);
sh2_poll_write(a & ~1, r[a / 2], cycles, NULL);
}
}
return;
case 0x30:
@ -608,16 +611,17 @@ static void p32x_reg_write16(u32 a, u32 d)
case 0x2a/2:
case 0x2c/2:
case 0x2e/2:
if (r[a / 2] != (u16)d) {
unsigned int cycles = SekCyclesDone();
{ unsigned int cycles = SekCyclesDone();
if (CYCLES_GT(cycles - msh2.m68krcycles_done, 64))
p32x_sync_sh2s(cycles);
r[a / 2] = d;
p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles);
p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles);
sh2_poll_write(a, (u16)d, cycles, NULL);
if (r[a / 2] != (u16)d) {
r[a / 2] = d;
p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles);
p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles);
sh2_poll_write(a, (u16)d, cycles, NULL);
}
}
return;
case 0x30/2: // PWM control

View File

@ -224,6 +224,8 @@ extern SH2 sh2s[2];
# define sh2_cycles_left(sh2) (sh2)->icount
# define sh2_burn_cycles(sh2, n) (sh2)->icount -= n
# define sh2_pc(sh2) (sh2)->ppc
# define sh2_not_polling(sh2) (sh2)->no_polling
# define sh2_set_polling(sh2) (sh2)->no_polling = 0
#else
# define sh2_end_run(sh2, after_) do { \
int left_ = ((signed int)(sh2)->sr >> 12) - (after_); \
@ -235,6 +237,8 @@ extern SH2 sh2s[2];
# define sh2_cycles_left(sh2) ((signed int)(sh2)->sr >> 12)
# define sh2_burn_cycles(sh2, n) (sh2)->sr -= ((n) << 12)
# define sh2_pc(sh2) (sh2)->pc
# define sh2_not_polling(sh2) ((sh2)->sr & SH2_NO_POLLING)
# define sh2_set_polling(sh2) ((sh2)->sr &= ~SH2_NO_POLLING)
#endif
#define sh2_cycles_done(sh2) (unsigned)((int)(sh2)->cycles_timeslice - sh2_cycles_left(sh2))

View File

@ -1683,6 +1683,7 @@ static void emu_loop_prep(void)
/* our tick here is 1 us right now */
#define ms_to_ticks(x) (unsigned int)(x * 1000)
#define get_ticks() plat_get_ticks_us()
#define vsync_delay_x3 3*ms_to_ticks(1)
void emu_loop(void)
{
@ -1860,13 +1861,13 @@ void emu_loop(void)
diff = timestamp_aim_x3 - timestamp * 3;
// sleep or vsync if we are still too fast
if (diff > target_frametime_x3 && (currentConfig.EmuOpt & EOPT_VSYNC)) {
if (diff > target_frametime_x3 + vsync_delay_x3 && (currentConfig.EmuOpt & EOPT_VSYNC)) {
// we are too fast
plat_video_wait_vsync();
timestamp = get_ticks();
diff = timestamp * 3 - timestamp_aim_x3;
}
if (diff > target_frametime_x3) {
if (diff > target_frametime_x3 + vsync_delay_x3) {
// still too fast
plat_wait_till_us(timestamp + (diff - target_frametime_x3) / 3);
}