Implement the Palette for PS2

This commit is contained in:
Francisco Javier Trujillo Mata 2019-02-03 12:57:15 +01:00
parent 03f663dd1a
commit 3bc15e631d
9 changed files with 3208 additions and 15 deletions

View File

@ -259,7 +259,9 @@ else ifeq ($(platform), ps2)
TARGET := $(TARGET_NAME)_libretro_$(platform).a
CC = ee-gcc$(EXE_EXT)
AR = ee-ar$(EXE_EXT)
CFLAGS += -G0 -Wall -DPS2 -DUSE_BGR555 -DUTYPES_DEFINED -DFAMEC_NO_GOTOS -fsingle-precision-constant
CFLAGS += -G0 -Wall -DPS2 -DUSE_BGR555 -DUTYPES_DEFINED -DFAMEC_NO_GOTOS -DRENDER_GSKIT_PS2 -fsingle-precision-constant
CFLAGS += -Dmemcpy=mips_memcpy -Dmemset=mips_memset
# CFLAGS += -D_ASM_MISC_C_AMIPS -D_ASM_DRAW_C_AMIPS -D_ASM_MEMORY_C_AMIPS
CFLAGS += -I$(PS2SDK)/ports/include -I$(PS2DEV)/gsKit/include -I$(PS2SDK)/ee/include -I$(PS2SDK)/common/include -Iplatform/ps2
STATIC_LINKING = 1
NO_MMAP = 1
@ -277,6 +279,9 @@ else ifeq ($(platform), ps2)
use_drz80 = 0
use_cz80 = 1
OBJS += platform/ps2/asm.o platform/ps2/SMS_Utils.o
# OBJS += platform/ps2/Draw_mips_r5900.o platform/ps2/Memory_mips_r5900.o pico/misc_amips.o
# CTR (3DS)
else ifeq ($(platform), ctr)
TARGET := $(TARGET_NAME)_libretro_$(platform).a

View File

@ -284,10 +284,16 @@ static void FinalizeLine8bitM4(int line)
{
unsigned char *pd = Pico.est.DrawLineDest;
if (!(PicoIn.opt & POPT_DIS_32C_BORDER))
#if defined(RENDER_GSKIT_PS2)
memcpy(pd, Pico.est.HighCol, 328);
#else
if (!(PicoIn.opt & POPT_DIS_32C_BORDER)) {
pd += 32;
memcpy(pd, Pico.est.HighCol + 8, 256);
memcpy(pd, Pico.est.HighCol + 8, 256);
} else {
memcpy(pd, Pico.est.HighCol + 8, 320);
}
#endif
}
void PicoDrawSetOutputMode4(pdso_t which)

View File

@ -0,0 +1,68 @@
/* Copyright (C) 2010-2018 The RetroArch team
*
* ---------------------------------------------------------------------------------------------
* The following license statement only applies to this libretro API header (libretro_d3d.h)
* ---------------------------------------------------------------------------------------------
*
* Permission is hereby granted, free of charge,
* to any person obtaining a copy of this software and associated documentation files (the
* "Software"),
* to deal in the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be included in all copies or
* substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef LIBRETRO_GSKIT_PS2_H_
#define LIBRETRO_GSKIT_PS2_H_
#include "libretro.h"
#if defined(PS2)
#include <gsKit.h>
#define RETRO_HW_RENDER_INTERFACE_GSKIT_PS2_VERSION 1
struct retro_hw_ps2_insets
{
float top;
float left;
float bottom;
float right;
};
#define empty_ps2_insets (struct retro_hw_ps2_insets){0.f, 0.f, 0.f, 0.f}
struct retro_hw_render_interface_gskit_ps2
{
/* Must be set to RETRO_HW_RENDER_INTERFACE_GSKIT_PS2. */
enum retro_hw_render_interface_type interface_type;
/* Must be set to RETRO_HW_RENDER_INTERFACE_GSKIT_PS2_VERSION. */
unsigned interface_version;
/* Opaque handle to the GSKit_PS2 backend in the frontend
* which must be passed along to all function pointers
* in this interface.
*/
GSTEXTURE *coreTexture;
bool clearTexture;
bool updatedPalette;
struct retro_hw_ps2_insets padding;
};
typedef struct retro_hw_render_interface_gskit_ps2 RETRO_HW_RENDER_INTEFACE_GSKIT_PS2;
#endif
#endif /* LIBRETRO_GSKIT_PS2_H_ */

View File

@ -35,6 +35,11 @@
#include "file_stream_transforms.h"
#endif
#if defined(RENDER_GSKIT_PS2)
#include "libretro-common/include/libretro_gskit_ps2.h"
#include "../ps2/asm.h"
#endif
#ifdef _3DS
#include "3ds/3ds_utils.h"
#define MEMOP_MAP 4
@ -74,8 +79,14 @@ static retro_input_state_t input_state_cb;
static retro_environment_t environ_cb;
static retro_audio_sample_batch_t audio_batch_cb;
#if defined(RENDER_GSKIT_PS2)
#define VOUT_MAX_WIDTH 328
#else
#define VOUT_MAX_WIDTH 320
#define VOUT_32BIT_WIDTH 256
#endif
#define VOUT_MAX_HEIGHT 240
#define SND_RATE 44100
static const float VOUT_PAR = 0.0;
static const float VOUT_4_3 = (224.0f * (4.0f / 3.0f));
@ -87,7 +98,13 @@ static void *vout_buf;
static int vout_width, vout_height, vout_offset;
static float user_vout_width = 0.0;
static short ALIGNED(4) sndBuffer[2*44100/50];
#if defined(RENDER_GSKIT_PS2)
RETRO_HW_RENDER_INTEFACE_GSKIT_PS2 *ps2 = NULL;
static void *retro_palette;
static struct retro_hw_ps2_insets padding;
#endif
static short ALIGNED(4) sndBuffer[2*SND_RATE/50];
static void snd_write(int len);
@ -492,14 +509,33 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols)
{
struct retro_system_av_info av_info;
memset(vout_buf, 0, 320 * 240 * 2);
vout_width = is_32cols ? 256 : 320;
#if defined(RENDER_GSKIT_PS2)
if (is_32cols) {
padding = (struct retro_hw_ps2_insets){start_line, 16.0f, VOUT_MAX_HEIGHT - line_count - start_line, 64.0f};
} else {
padding = (struct retro_hw_ps2_insets){start_line, 16.0f, VOUT_MAX_HEIGHT - line_count - start_line, 0.0f};
}
vout_width = VOUT_MAX_WIDTH;
vout_height = VOUT_MAX_HEIGHT;
memset(vout_buf, 0, vout_width * VOUT_MAX_HEIGHT);
memset(retro_palette, 0, gsKit_texture_size_ee(16, 16, GS_PSM_CT16));
PicoDrawSetOutBuf(vout_buf, vout_width);
if (ps2) {
ps2->clearTexture = true;
}
#else
vout_width = is_32cols ? VOUT_32BIT_WIDTH : VOUT_MAX_WIDTH;
memset(vout_buf, 0, VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2);
PicoDrawSetOutBuf(vout_buf, vout_width * 2);
if (show_overscan == true) line_count += 16;
if (show_overscan == true) line_count += 16;
if (show_overscan == true) start_line -= 8;
vout_height = line_count;
vout_offset = vout_width * start_line;
#endif
// Update the geometry
retro_get_system_av_info(&av_info);
@ -593,7 +629,7 @@ void retro_get_system_av_info(struct retro_system_av_info *info)
memset(info, 0, sizeof(*info));
info->timing.fps = Pico.m.pal ? 50 : 60;
info->timing.sample_rate = 44100;
info->timing.sample_rate = SND_RATE;
info->geometry.base_width = vout_width;
info->geometry.base_height = vout_height;
info->geometry.max_width = vout_width;
@ -1360,6 +1396,7 @@ void retro_run(void)
{
bool updated = false;
int pad, i;
static void *buff;
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE, &updated) && updated)
update_variables();
@ -1375,7 +1412,93 @@ void retro_run(void)
PicoPatchApply();
PicoFrame();
video_cb((short *)vout_buf + vout_offset,
#if defined(RENDER_GSKIT_PS2)
buff = (uint32_t *)RETRO_HW_FRAME_BUFFER_VALID;
if (!ps2) {
if (!environ_cb(RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE, (void **)&ps2) || !ps2) {
printf("Failed to get HW rendering interface!\n");
return;
}
if (ps2->interface_version != RETRO_HW_RENDER_INTERFACE_GSKIT_PS2_VERSION) {
printf("HW render interface mismatch, expected %u, got %u!\n",
RETRO_HW_RENDER_INTERFACE_GSKIT_PS2_VERSION, ps2->interface_version);
return;
}
ps2->coreTexture->Width = vout_width;
ps2->coreTexture->Height = vout_height;
ps2->coreTexture->PSM = GS_PSM_T8;
ps2->coreTexture->ClutPSM = GS_PSM_CT16;
ps2->coreTexture->Filter = GS_FILTER_LINEAR;
ps2->coreTexture->Clut = retro_palette;
}
if (Pico.m.dirtyPal) {
int i;
unsigned short int *pal=(void *)ps2->coreTexture->Clut;
if (PicoIn.AHW & PAHW_SMS) {
// SMS
unsigned int *spal=(void *)PicoMem.cram;
unsigned int *dpal=(void *)pal;
unsigned int t;
/* cram is always stored as shorts, even though real hardware probably uses bytes */
for (i = 0x20/2; i > 0; i--, spal++, dpal++) {
t = *spal;
t = ((t & 0x00030003)<< 3) | ((t & 0x000c000c)<<6) | ((t & 0x00300030)<<9);
t |= t >> 2;
t |= (t >> 4) & 0x08610861;
*dpal = t;
}
pal[0xe0] = 0;
} else if (PicoIn.AHW & PAHW_32X) {
// MCD+32X
} else if (PicoIn.AHW & PAHW_MCD) {
// MCD
} else {
// MD
if(Pico.video.reg[0xC]&8){
do_pal_convert_with_shadows(pal, PicoMem.cram);
} else {
do_pal_convert(pal, PicoMem.cram);
if (Pico.est.rendstatus & PDRAW_SONIC_MODE) {
memcpy(&pal[0x80], pal, 0x40*2);
}
}
}
//Rotate CLUT.
for (i = 0; i < 256; i++) {
if ((i&0x18) == 8) {
unsigned short int tmp = pal[i];
pal[i] = pal[i+8];
pal[i+8] = tmp;
}
}
Pico.m.dirtyPal = 0;
ps2->updatedPalette = true;
}
if (PicoIn.AHW & PAHW_SMS) {
ps2->coreTexture->Mem = vout_buf;
} else {
ps2->coreTexture->Mem = Pico.est.Draw2FB;
}
ps2->padding = padding;
#else
buff = vout_buf + vout_offset;
#endif
video_cb((short *)buff,
vout_width, vout_height, vout_width * 2);
}
@ -1410,20 +1533,29 @@ void retro_init(void)
#endif
PicoIn.opt |= POPT_EN_DRC;
#endif
PicoIn.sndRate = 44100;
PicoIn.sndRate = SND_RATE;
PicoIn.autoRgnOrder = 0x184; // US, EU, JP
vout_width = 320;
vout_height = 240;
vout_width = VOUT_MAX_WIDTH;
vout_height = VOUT_MAX_HEIGHT;
#ifdef _3DS
vout_buf = linearMemAlign(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2, 0x80);
#elif defined(RENDER_GSKIT_PS2)
vout_buf = memalign(128, VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT);
retro_palette = memalign(128, gsKit_texture_size_ee(16, 16, GS_PSM_CT16));
#else
vout_buf = malloc(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2);
#endif
PicoInit();
#if defined(RENDER_GSKIT_PS2)
PicoDrawSetOutFormat(PDF_NONE, 0);
PicoDrawSetOutBuf(vout_buf, vout_width);
PicoDrawSetOutputMode4(PDF_8BIT);
#else
PicoDrawSetOutFormat(PDF_RGB555, 0);
PicoDrawSetOutBuf(vout_buf, vout_width * 2);
#endif
//PicoIn.osdMessage = plat_status_msg_busy_next;
PicoIn.mcdTrayOpen = disk_tray_open;
@ -1436,11 +1568,13 @@ void retro_deinit(void)
{
#ifdef _3DS
linearFree(vout_buf);
#elif defined(RENDER_GSKIT_PS2)
free(vout_buf);
free(retro_palette);
ps2 = NULL;
#else
free(vout_buf);
#endif
vout_buf = NULL;
PicoExit();
}
// vim:shiftwidth=3:ts=3:expandtab

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,847 @@
# vim:filetype=mips
# memory handlers with banking support for SSF II - The New Challengers
# mostly based on Gens code
# For the MIPS R5900. Based on Memory_amips.s by Notaz.
# (c) Copyright 2007, Grazvydas "notaz" Ignotas
# All Rights Reserved
.set noreorder
.set noat #$at is currently used in the substitute macro instructions (and some replacement code).
.text
.align 4
.macro ext dest,src,pos,size
srl \dest, \src, \pos
li $at, ((1 << \size) - 1)
and \dest, $at
.endm
# default jump tables
m_read8_def_table:
.long m_read8_rom0 # 0x000000 - 0x07FFFF
.long m_read8_rom1 # 0x080000 - 0x0FFFFF
.long m_read8_rom2 # 0x100000 - 0x17FFFF
.long m_read8_rom3 # 0x180000 - 0x1FFFFF
.long m_read8_rom4 # 0x200000 - 0x27FFFF
.long m_read8_rom5 # 0x280000 - 0x2FFFFF
.long m_read8_rom6 # 0x300000 - 0x37FFFF
.long m_read8_rom7 # 0x380000 - 0x3FFFFF
.long m_read8_rom8 # 0x400000 - 0x47FFFF - for all those large ROM hacks
.long m_read8_rom9 # 0x480000 - 0x4FFFFF
.long m_read8_romA # 0x500000 - 0x57FFFF
.long m_read8_romB # 0x580000 - 0x5FFFFF
.long m_read8_romC # 0x600000 - 0x67FFFF
.long m_read8_romD # 0x680000 - 0x6FFFFF
.long m_read8_romE # 0x700000 - 0x77FFFF
.long m_read8_romF # 0x780000 - 0x7FFFFF
.long m_read8_rom10 # 0x800000 - 0x87FFFF
.long m_read8_rom11 # 0x880000 - 0x8FFFFF
.long m_read8_rom12 # 0x900000 - 0x97FFFF
.long m_read8_rom13 # 0x980000 - 0x9FFFFF
.long m_read8_misc # 0xA00000 - 0xA7FFFF
.long m_read_null # 0xA80000 - 0xAFFFFF
.long m_read_null # 0xB00000 - 0xB7FFFF
.long m_read_null # 0xB80000 - 0xBFFFFF
.long m_read8_vdp # 0xC00000 - 0xC7FFFF
.long m_read8_vdp # 0xC80000 - 0xCFFFFF
.long m_read8_vdp # 0xD00000 - 0xD7FFFF
.long m_read8_vdp # 0xD80000 - 0xDFFFFF
.long m_read8_ram # 0xE00000 - 0xE7FFFF
.long m_read8_ram # 0xE80000 - 0xEFFFFF
.long m_read8_ram # 0xF00000 - 0xF7FFFF
.long m_read8_ram # 0xF80000 - 0xFFFFFF
m_read16_def_table:
.long m_read16_rom0 # 0x000000 - 0x07FFFF
.long m_read16_rom1 # 0x080000 - 0x0FFFFF
.long m_read16_rom2 # 0x100000 - 0x17FFFF
.long m_read16_rom3 # 0x180000 - 0x1FFFFF
.long m_read16_rom4 # 0x200000 - 0x27FFFF
.long m_read16_rom5 # 0x280000 - 0x2FFFFF
.long m_read16_rom6 # 0x300000 - 0x37FFFF
.long m_read16_rom7 # 0x380000 - 0x3FFFFF
.long m_read16_rom8 # 0x400000 - 0x47FFFF
.long m_read16_rom9 # 0x480000 - 0x4FFFFF
.long m_read16_romA # 0x500000 - 0x57FFFF
.long m_read16_romB # 0x580000 - 0x5FFFFF
.long m_read16_romC # 0x600000 - 0x67FFFF
.long m_read16_romD # 0x680000 - 0x6FFFFF
.long m_read16_romE # 0x700000 - 0x77FFFF
.long m_read16_romF # 0x780000 - 0x7FFFFF
.long m_read16_rom10 # 0x800000 - 0x87FFFF
.long m_read16_rom11 # 0x880000 - 0x8FFFFF
.long m_read16_rom12 # 0x900000 - 0x97FFFF
.long m_read16_rom13 # 0x980000 - 0x9FFFFF
.long m_read16_misc # 0xA00000 - 0xA7FFFF
.long m_read_null # 0xA80000 - 0xAFFFFF
.long m_read_null # 0xB00000 - 0xB7FFFF
.long m_read_null # 0xB80000 - 0xBFFFFF
.long m_read16_vdp # 0xC00000 - 0xC7FFFF
.long m_read16_vdp # 0xC80000 - 0xCFFFFF
.long m_read16_vdp # 0xD00000 - 0xD7FFFF
.long m_read16_vdp # 0xD80000 - 0xDFFFFF
.long m_read16_ram # 0xE00000 - 0xE7FFFF
.long m_read16_ram # 0xE80000 - 0xEFFFFF
.long m_read16_ram # 0xF00000 - 0xF7FFFF
.long m_read16_ram # 0xF80000 - 0xFFFFFF
m_read32_def_table:
.long m_read32_rom0 # 0x000000 - 0x07FFFF
.long m_read32_rom1 # 0x080000 - 0x0FFFFF
.long m_read32_rom2 # 0x100000 - 0x17FFFF
.long m_read32_rom3 # 0x180000 - 0x1FFFFF
.long m_read32_rom4 # 0x200000 - 0x27FFFF
.long m_read32_rom5 # 0x280000 - 0x2FFFFF
.long m_read32_rom6 # 0x300000 - 0x37FFFF
.long m_read32_rom7 # 0x380000 - 0x3FFFFF
.long m_read32_rom8 # 0x400000 - 0x47FFFF
.long m_read32_rom9 # 0x480000 - 0x4FFFFF
.long m_read32_romA # 0x500000 - 0x57FFFF
.long m_read32_romB # 0x580000 - 0x5FFFFF
.long m_read32_romC # 0x600000 - 0x67FFFF
.long m_read32_romD # 0x680000 - 0x6FFFFF
.long m_read32_romE # 0x700000 - 0x77FFFF
.long m_read32_romF # 0x780000 - 0x7FFFFF
.long m_read32_rom10 # 0x800000 - 0x87FFFF
.long m_read32_rom11 # 0x880000 - 0x8FFFFF
.long m_read32_rom12 # 0x900000 - 0x97FFFF
.long m_read32_rom13 # 0x980000 - 0x9FFFFF
.long m_read32_misc # 0xA00000 - 0xA7FFFF
.long m_read_null # 0xA80000 - 0xAFFFFF
.long m_read_null # 0xB00000 - 0xB7FFFF
.long m_read_null # 0xB80000 - 0xBFFFFF
.long m_read32_vdp # 0xC00000 - 0xC7FFFF
.long m_read32_vdp # 0xC80000 - 0xCFFFFF
.long m_read32_vdp # 0xD00000 - 0xD7FFFF
.long m_read32_vdp # 0xD80000 - 0xDFFFFF
.long m_read32_ram # 0xE00000 - 0xE7FFFF
.long m_read32_ram # 0xE80000 - 0xEFFFFF
.long m_read32_ram # 0xF00000 - 0xF7FFFF
.long m_read32_ram # 0xF80000 - 0xFFFFFF
# #############################################################################
.bss
.align 4
# used tables
m_read8_table:
.skip 32*4
m_read16_table:
.skip 32*4
m_read32_table:
.skip 32*4
# #############################################################################
.text
.align 4
.global PicoMemReset
.global PicoRead8
.global PicoRead16
.global PicoRead32
.global PicoWriteRomHW_SSF2
.global m_read8_def_table
.global m_read8_table
.macro PicoMemResetCopyDef dst_table src_table
lui $t0, %hi(\dst_table)
addiu $t0, %lo(\dst_table)
lui $t1, %hi(\src_table)
addiu $t1, %lo(\src_table)
li $t2, 32
1:
lw $t3, 0($t1)
sw $t3, 0($t0)
addiu $t2, -1
addiu $t1, 4
nop #Workaround for the R5900 short-loop bug.
bnez $t2, 1b
addiu $t0, 4
.endm
# $t4 = 4
.macro PicoMemResetRomArea dst_table ar_label
lui $t0, %hi(\dst_table)
addiu $t0, %lo(\dst_table)
lui $t1, %hi(\ar_label)
addiu $t1, %lo(\ar_label)
li $t2, 20
1:
beq $t2, $v1, 2f
addiu $t2, -1
sll $t3, $t2, 2
beq $t2, $t4, 1b # do not touch the SRAM area
addu $t3, $t0
j 1b
sw $t1, 0($t3)
2:
.endm
PicoMemReset:
lui $v1, %hi(Pico+0x22204)
lw $v1, %lo(Pico+0x22204)($v1) # romsize
lui $t0, 8
addu $v1, $t0
addiu $v1, -1
srl $v1, 19
PicoMemResetCopyDef m_read8_table m_read8_def_table
PicoMemResetCopyDef m_read16_table m_read16_def_table
PicoMemResetCopyDef m_read32_table m_read32_def_table
# update memhandlers according to ROM size
li $t4, 4
PicoMemResetRomArea m_read8_table m_read8_above_rom
PicoMemResetRomArea m_read16_table m_read16_above_rom
PicoMemResetRomArea m_read32_table m_read32_above_rom
jr $ra
nop
# #############################################################################
.macro PicoReadJump table
lui $t0, %hi(\table)
srl $t1, $a0, 19
li $at, 0xFFFFFF83
and $t0, $t0, $at
andi $t1, $t1, 0x1F
sll $t1, $t1, 2
or $t0, $t0, $t1
lw $t0, %lo(\table)($t0)
sll $a0, $a0, 8
jr $t0
srl $a0, $a0, 8
.endm
PicoRead8: # u32 a
PicoReadJump m_read8_table
PicoRead16: # u32 a
PicoReadJump m_read16_table
PicoRead32: # u32 a
PicoReadJump m_read32_table
# #############################################################################
m_read_null:
jr $ra
li $v0, 0
m_read_neg1:
jr $ra
addiu $v0, $0, 0xffff
# loads &Pico.rom to $t3
.macro m_read_rom_try_sram is200000 size
lui $t2, %hi(SRam)
addiu $t2, %lo(SRam)
lui $t3, %hi(Pico+0x22200)
lw $t1, 8($t2) # SRam.end
.if \is200000
sll $a0, $a0, 13
srl $a0, $a0, 13
lui $t4, 0x20
or $a0, $t4
.endif
subu $t4, $a0, $t1
bgtz $t4, 1f
addiu $t3, %lo(Pico+0x22200)
lw $t1, 4($t2) # SRam.start
subu $t4, $t1, $a0
bgtz $t4, 1f
nop
lb $t1, 0x11($t3) # Pico.m.sram_reg
andi $t4, $t1, 5
beqz $t4, 1f
nop
.if \size == 8
j SRAMRead
nop
.elseif \size == 16
sd $ra, -16($sp)
jal SRAMRead16
addiu $sp, -16
ld $ra, 0($sp)
jr $ra
addiu $sp, 16
.else
addiu $sp, -32
sd $ra, 0($sp)
sd $a0, 16($sp)
jal SRAMRead16
nop
ld $a0, 16($sp)
sd $v0, 16($sp)
jal SRAMRead16
addiu $a0, 2
ld $v1, 16($sp)
ld $ra, 0($sp)
andi $v0, $v0, 0xFFFF
srl $v1, $v1, 16
or $v0, $v1
jr $ra
addiu $sp, 32
.endif
# m_read_nosram:
1:
.endm
.macro m_read8_rom sect
lui $t0, %hi(Pico+0x22200)
lw $t0, %lo(Pico+0x22200)($t0) # rom
xori $a0, 1
sll $a0, $a0, 13
srl $a0, $a0, 13
.if \sect
lui $t1, 8*\sect
addu $a0, $t1
.endif
addu $t0, $a0
jr $ra
lb $v0, 0($t0)
.endm
m_read8_rom0: # 0x000000 - 0x07ffff
m_read8_rom 0
m_read8_rom1: # 0x080000 - 0x0fffff
m_read8_rom 1
m_read8_rom2: # 0x100000 - 0x17ffff
m_read8_rom 2
m_read8_rom3: # 0x180000 - 0x1fffff
m_read8_rom 3
m_read8_rom4: # 0x200000 - 0x27ffff, SRAM area
m_read_rom_try_sram 1 8
lw $t1, 4($t3) # romsize
subu $t4, $t1, $a0
blez $t4, m_read_null
lw $t1, 0($t3) # rom
xori $a0, 1
addu $t1, $a0
jr $ra
lb $v0, 0($t1)
m_read8_rom5: # 0x280000 - 0x2fffff
m_read8_rom 5
m_read8_rom6: # 0x300000 - 0x37ffff
m_read8_rom 6
m_read8_rom7: # 0x380000 - 0x3fffff
m_read8_rom 7
m_read8_rom8: # 0x400000 - 0x47ffff
m_read8_rom 8
m_read8_rom9: # 0x480000 - 0x4fffff
m_read8_rom 9
m_read8_romA: # 0x500000 - 0x57ffff
m_read8_rom 0xA
m_read8_romB: # 0x580000 - 0x5fffff
m_read8_rom 0xB
m_read8_romC: # 0x600000 - 0x67ffff
m_read8_rom 0xC
m_read8_romD: # 0x680000 - 0x6fffff
m_read8_rom 0xD
m_read8_romE: # 0x700000 - 0x77ffff
m_read8_rom 0xE
m_read8_romF: # 0x780000 - 0x7fffff
m_read8_rom 0xF
m_read8_rom10: # 0x800000 - 0x87ffff
m_read8_rom 0x10
m_read8_rom11: # 0x880000 - 0x8fffff
m_read8_rom 0x11
m_read8_rom12: # 0x900000 - 0x97ffff
m_read8_rom 0x12
m_read8_rom13: # 0x980000 - 0x9fffff
m_read8_rom 0x13
m_read8_misc:
srl $t0, $a0, 5
sll $t0, $t0, 5
lui $t1, 0xa1
bne $t0, $t1, m_read8_misc2
andi $t0, $a0, 0x1e
m_read8_misc_io:
beqz $t0, m_read8_misc_hwreg
sub $t1, $t0, 4
bgtz $t1, m_read8_misc_ioports
nop
slti $a0, $t0, 4
xori $a0, 1
j PadRead
nop
m_read8_misc_hwreg:
lui $v0, %hi(Pico+0x2220f)
jr $ra
lb $v0, %lo(Pico+0x2220f)($v0)
m_read8_misc_ioports:
lui $v0, %hi(Pico+0x22000)
sra $v0, $v0, 5
sll $v0, $v0, 5
andi $t0, $t0, 0x1F
or $v0, $v0, $t0
jr $ra
lb $v0, %lo(Pico+0x22000)($v0)
m_read8_misc2:
lui $t0, 0xa1
ori $t0, 0x1100
bne $a0, $t0, m_read8_misc3
srl $t0, $a0, 16
j z80ReadBusReq
m_read8_misc3:
addiu $t0, 0xff60 # expecting 0xa0 to get 0
bnez $t0, m_read8_misc4
# z80 area
andi $t0, $a0, 0x4000
bnez $t0, m_read8_z80_misc
andi $t0, $a0, 0x6000
j z80Read8 # z80 RAM
m_read8_z80_misc:
addiu $t0, 0xc000 # expecting 0x4000 to get 0
bnez $t0, m_read_neg1 # invalid
nop
j ym2612_read_local_68k
nop
m_read8_fake_ym2612:
lb $v0, %lo(Pico+0x22208)($t0) # Pico.m.rotate
addiu $t1, $v0, 1
jr $ra
sb $t1, %lo(Pico+0x22208)($t0)
# delay slot friendly
.macro m_read8_call16 funcname is_func_ptr=0
.if \is_func_ptr
lui $t1, %hi(\funcname)
lw $t1, %lo(\funcname)($t1)
.endif
andi $t0, $a0, 1
beqz $t0, 1f
li $a1, 8 # not always needed, but shouln't cause problems
.if \is_func_ptr
jr $t1
.else
j \funcname # odd address
.endif
nop
1:
addiu $sp, -16
sd $ra, 0($sp)
.if \is_func_ptr
jalr $t1
.else
jal \funcname
.endif
xori $a0, 1
ld $ra, 0($sp)
addiu $sp, 16
jr $ra
srl $v0, 8
.endm
m_read8_misc4:
# if everything else fails, use generic handler
m_read8_call16 OtherRead16
m_read8_vdp:
ext $t0, $a0, 16, 3
andi $t1, $a0, 0xe0
or $t0, $t1
bnez $t0, m_read_null # invalid address
nop
j PicoVideoRead8
nop
m_read8_ram:
lui $t0, %hi(Pico)
andi $a0, $a0, 0xFFFF
sra $t0, $t0, 16
sll $t0, $t0, 16
or $t0, $t0, $a0
xori $t0, 1
jr $ra
lb $v0, %lo(Pico)($t0)
m_read8_above_rom:
# might still be SRam (Micro Machines, HardBall '95)
m_read_rom_try_sram 0 8
m_read8_call16 PicoRead16Hook 1
# #############################################################################
.macro m_read16_rom sect
lui $t0, %hi(Pico+0x22200)
lw $t0, %lo(Pico+0x22200)($t0) # rom
li $at, 0x0007FFFE
and $a0, $a0, $at
.if \sect
lui $t1, 8*\sect
addu $a0, $t1
.endif
addu $t0, $a0
jr $ra
lh $v0, 0($t0)
.endm
m_read16_rom0: # 0x000000 - 0x07ffff
m_read16_rom 0
m_read16_rom1: # 0x080000 - 0x0fffff
m_read16_rom 1
m_read16_rom2: # 0x100000 - 0x17ffff
m_read16_rom 2
m_read16_rom3: # 0x180000 - 0x1fffff
m_read16_rom 3
m_read16_rom4: # 0x200000 - 0x27ffff, SRAM area
m_read_rom_try_sram 1 16
lw $t1, 4($t3) # romsize
subu $t4, $t1, $a0
blez $t4, m_read_null
lw $t1, 0($t3) # rom
sra $a0, $a0, 1
sll $a0, $a0, 1
addu $t1, $a0
jr $ra
lh $v0, 0($t1)
m_read16_rom5: # 0x280000 - 0x2fffff
m_read16_rom 5
m_read16_rom6: # 0x300000 - 0x37ffff
m_read16_rom 6
m_read16_rom7: # 0x380000 - 0x3fffff
m_read16_rom 7
m_read16_rom8: # 0x400000 - 0x47ffff
m_read16_rom 8
m_read16_rom9: # 0x480000 - 0x4fffff
m_read16_rom 9
m_read16_romA: # 0x500000 - 0x57ffff
m_read16_rom 0xA
m_read16_romB: # 0x580000 - 0x5fffff
m_read16_rom 0xB
m_read16_romC: # 0x600000 - 0x67ffff
m_read16_rom 0xC
m_read16_romD: # 0x680000 - 0x6fffff
m_read16_rom 0xD
m_read16_romE: # 0x700000 - 0x77ffff
m_read16_rom 0xE
m_read16_romF: # 0x780000 - 0x7fffff
m_read16_rom 0xF
m_read16_rom10: # 0x800000 - 0x87ffff
m_read16_rom 0x10
m_read16_rom11: # 0x880000 - 0x8fffff
m_read16_rom 0x11
m_read16_rom12: # 0x900000 - 0x97ffff
m_read16_rom 0x12
m_read16_rom13: # 0x980000 - 0x9fffff
m_read16_rom 0x13
m_read16_misc:
sra $a0, $a0, 1
sll $a0, $a0, 1
j OtherRead16
li $a1, 16
m_read16_vdp:
ext $t0, $a0, 16, 3
andi $t1, $a0, 0xe0
or $t0, $t1
bnez $t0, m_read_null # invalid address
sra $a0, $a0, 1
j PicoVideoRead
sll $a0, $a0, 1
m_read16_ram:
lui $t0, %hi(Pico)
srl $a0, $a0, 1
sll $a0, $a0, 17
srl $a0, $a0, 16
sra $t0, $t0, 16
sll $t0, $t0, 16
or $t0, $t0, $a0
jr $ra
lh $v0, %lo(Pico)($t0)
m_read16_above_rom:
# might still be SRam
m_read_rom_try_sram 0 16
lui $t1, %hi(PicoRead16Hook)
lw $t1, %lo(PicoRead16Hook)($t1)
sra $a0, $a0, 1
jr $t1
sll $a0, $a0, 1
# #############################################################################
.macro m_read32_rom sect
lui $t0, %hi(Pico+0x22200)
lw $t0, %lo(Pico+0x22200)($t0) # rom
li $at, 0x0007FFFE
and $a0, $a0, $at
.if \sect
lui $t1, 8*\sect
addu $a0, $t1
.endif
addu $t0, $a0
lh $v1, 0($t0)
lh $v0, 2($t0)
andi $v0, $v0, 0xFFFF
sll $v1, $v1, 16
jr $ra
or $v0, $v0, $v1
.endm
m_read32_rom0: # 0x000000 - 0x07ffff
m_read32_rom 0
m_read32_rom1: # 0x080000 - 0x0fffff
m_read32_rom 1
m_read32_rom2: # 0x100000 - 0x17ffff
m_read32_rom 2
m_read32_rom3: # 0x180000 - 0x1fffff
m_read32_rom 3
m_read32_rom4: # 0x200000 - 0x27ffff, SRAM area
m_read_rom_try_sram 1 32
lw $t1, 4($t3) # romsize
subu $t4, $t1, $a0
blez $t4, m_read_null
lw $t1, 0($t3) # rom
sra $a0, $a0, 1
sll $a0, $a0, 1
addu $t1, $a0
lh $v1, 0($t1)
lh $v0, 2($t1)
andi $v0, $v0, 0xFFFF
sll $v1, $v1, 16
jr $ra
or $v0, $v0, $v1
m_read32_rom5: # 0x280000 - 0x2fffff
m_read32_rom 5
m_read32_rom6: # 0x300000 - 0x37ffff
m_read32_rom 6
m_read32_rom7: # 0x380000 - 0x3fffff
m_read32_rom 7
m_read32_rom8: # 0x400000 - 0x47ffff
m_read32_rom 8
m_read32_rom9: # 0x480000 - 0x4fffff
m_read32_rom 9
m_read32_romA: # 0x500000 - 0x57ffff
m_read32_rom 0xA
m_read32_romB: # 0x580000 - 0x5fffff
m_read32_rom 0xB
m_read32_romC: # 0x600000 - 0x67ffff
m_read32_rom 0xC
m_read32_romD: # 0x680000 - 0x6fffff
m_read32_rom 0xD
m_read32_romE: # 0x700000 - 0x77ffff
m_read32_rom 0xE
m_read32_romF: # 0x780000 - 0x7fffff
m_read32_rom 0xF
m_read32_rom10: # 0x800000 - 0x87ffff
m_read32_rom 0x10
m_read32_rom11: # 0x880000 - 0x8fffff
m_read32_rom 0x11
m_read32_rom12: # 0x900000 - 0x97ffff
m_read32_rom 0x12
m_read32_rom13: # 0x980000 - 0x9fffff
m_read32_rom 0x13
.macro m_read32_call16 func need_a1=0
addiu $sp, -32
sd $ra, 0($sp)
sd $s0, 16($sp)
.if \need_a1
li $a1, 16
.endif
jal \func
move $s0, $a0
addu $a0, $s0, 2
.if \need_a1
li $a1, 16
.endif
jal \func
move $s0, $v0
sll $s0, $s0, 16
andi $v0, $v0, 0xFFFF
or $v0, $v0, $s0
ld $ra, 0($sp)
ld $s0, 16($sp)
jr $ra
addiu $sp, 32
.endm
m_read32_misc:
sra $a0, $a0, 1
sll $a0, $a0, 1
m_read32_call16 OtherRead16, 1
m_read32_vdp:
ext $t0, $a0, 16, 3
andi $t1, $a0, 0xe0
or $t0, $t1
bnez $t0, m_read_null # invalid address
sra $a0, $a0, 1
sll $a0, $a0, 1
m_read32_call16 PicoVideoRead
m_read32_ram:
lui $t0, %hi(Pico)
sra $a0, $a0, 1
sll $a0, $a0, 17
srl $a0, $a0, 16
sra $t0, $t0, 16
sll $t0, $t0, 16
or $t0, $t0, $a0
lh $v1, %lo(Pico)($t0)
lh $v0, %lo(Pico+2)($t0)
andi $v0, $v0, 0xFFFF
sll $v1, $v1, 16
jr $ra
or $v0, $v0, $v1
m_read32_above_rom:
# might still be SRam
m_read_rom_try_sram 0 32
sra $a0, $a0, 1
sll $a0, $a0, 1
lui $t1, %hi(PicoRead16Hook)
lw $t1, %lo(PicoRead16Hook)($t1)
addiu $sp, -16*3
sd $ra, 0($sp)
sd $s0, 16($sp)
sd $t1, 32($sp)
jalr $t1
move $s0, $a0
ld $t1, 32($sp)
addu $a0, $s0, 2
jalr $t1
move $s0, $v0
andi $v0, $v0, 0xFFFF
sll $s0, $s0, 16
or $v0, $v0, $s0
ld $ra, 0($sp)
ld $s0, 16($sp)
jr $ra
addiu $sp, 16*3
# #############################################################################
.macro PicoWriteRomHW_SSF2_ls def_table table
lui $t3, %hi(\def_table)
li $at, 0xFFFFFF83
and $t3, $t3, $at
andi $at, $a1, 0x1F
sll $at, $at, 2
or $t3, $t3, $at
lw $t0, %lo(\def_table)($t3)
lui $t2, %hi(\table)
li $at, 0xFFFFFFE3
and $t2, $t2, $at
andi $at, $a0, 0x07
sll $at, $at, 2
or $t2, $t2, $at
sw $t0, %lo(\table)($t2)
.endm
PicoWriteRomHW_SSF2: # u32 a, u32 d
ext $a0, $a0, 1, 3
bnez $a0, pwr_banking
# sram register
lui $t0, %hi(Pico+0x22211)
lb $t1, %lo(Pico+0x22211)($t0) # Pico.m.sram_reg
sra $t1, $t1, 2
sll $t1, $t1, 2
andi $a1, $a1, 3
or $t1, $t1, $a1
jr $ra
sb $t1, %lo(Pico+0x22211)($t0)
pwr_banking:
andi $a1, 0x1f
PicoWriteRomHW_SSF2_ls m_read8_def_table m_read8_table
PicoWriteRomHW_SSF2_ls m_read16_def_table m_read16_table
PicoWriteRomHW_SSF2_ls m_read32_def_table m_read32_table
jr $ra
nop

202
platform/ps2/SMS_Utils.s Normal file
View File

@ -0,0 +1,202 @@
/*
# ___ _ _ ___
# | | | | |
# ___| | | ___| PS2DEV Open Source Project.
#----------------------------------------------------------
# MUL64 is pulled from some binary library (I don't remember which one).
# mips_memcpy routine is pulled from 'sde' library from MIPS.
#
*/
.set noat
.set noreorder
.set nomacro
.globl MUL64
.globl mips_memcpy
.globl mips_memset
.text
MUL64:
pmultuw $v0, $a0, $a1
dsra32 $a2, $a0, 0
dsra32 $v1, $a1, 0
mult $v1, $a0, $v1
mult1 $a2, $a2, $a1
addu $v1, $v1, $a2
dsll32 $v1, $v1, 0
jr $ra
daddu $v0, $v0, $v1
mips_memcpy:
addu $v0, $a0, $zero
beqz $a2, 1f
sltiu $t2, $a2, 12
bnez $t2, 2f
xor $v1, $a1, $a0
andi $v1, $v1, 7
negu $a3, $a0
beqz $v1, 3f
andi $a3, $a3, 7
beqz $a3, 4f
subu $a2, $a2, $a3
ldr $v1, 0($a1)
ldl $v1, 7($a1)
addu $a1, $a1, $a3
sdr $v1, 0($a0)
addu $a0, $a0, $a3
4:
andi $v1, $a2, 31
subu $a3, $a2, $v1
beqz $a3, 5f
addu $a2, $v1, $zero
addu $a3, $a3, $a1
6:
ldr $v1, 0($a1)
ldl $v1, 7($a1)
ldr $t0, 8($a1)
ldl $t0, 15($a1)
ldr $t1, 16($a1)
ldl $t1, 23($a1)
ldr $t2, 24($a1)
ldl $t2, 31($a1)
sd $v1, 0($a0)
sd $t0, 8($a0)
sd $t1, 16($a0)
addiu $a1, $a1, 32
addiu $a0, $a0, 32
bne $a1, $a3, 6b
sd $t2, -8($a0)
5:
andi $v1, $a2, 7
subu $a3, $a2, $v1
beqz $a3, 2f
addu $a2, $v1, $zero
addu $a3, $a3, $a1
7:
ldr $v1, 0($a1)
ldl $v1, 7($a1)
addiu $a1, $a1, 8
addiu $a0, $a0, 8
nop
bne $a1, $a3, 7b
sd $v1, -8($a0)
beq $zero, $zero, 2f
nop
3:
beqz $a3, 8f
subu $a2, $a2, $a3
ldr $v1, 0($a1)
addu $a1, $a1, $a3
sdr $v1, 0($a0)
addu $a0, $a0, $a3
8:
andi $v1, $a2, 31
subu $a3, $a2, $v1
beqz $a3, 9f
addu $a2, $v1, $zero
addu $a3, $a3, $a1
10:
ld $v1, 0($a1)
ld $t0, 8($a1)
ld $t1, 16($a1)
ld $t2, 24($a1)
sd $v1, 0($a0)
sd $t0, 8($a0)
sd $t1, 16($a0)
addiu $a1, $a1, 32
addiu $a0, $a0, 32
bne $a1, $a3, 10b
sd $t2, -8($a0)
9:
andi $v1, $a2, 7
subu $a3, $a2, $v1
beqz $a3, 2f
addu $a2, $v1, $zero
addu $a3, $a3, $a1
11:
ld $v1, 0($a1)
addiu $a1, $a1, 8
addiu $a0, $a0, 8
nop
nop
bne $a1, $a3, 11b
sd $v1, -8($a0)
2:
beqz $a2, 1f
addu $a3, $a2, $a1
12:
lbu $v1, 0($a1)
addiu $a1, $a1, 1
addiu $a0, $a0, 1
nop
nop
bne $a1, $a3, 12b
sb $v1, -1($a0)
1:
jr $ra
nop
mips_memset:
beqz $a2, 1f
sltiu $at, $a2, 16
bnez $at, 2f
andi $a1, $a1, 0xFF
dsll $at, $a1, 0x8
or $a1, $a1, $at
dsll $at, $a1, 0x10
or $a1, $a1, $at
dsll32 $at, $a1, 0x0
or $a1, $a1, $at
andi $v1, $a0, 0x7
beqz $v1, 3f
li $a3, 8
subu $a3, $a3, $v1
subu $a2, $a2, $a3
sdr $a1, 0($a0)
addu $a0, $a0, $a3
3:
andi $v1, $a2, 0x1f
subu $a3, $a2, $v1
beqz $a3, 4f
move $a2, $v1
addu $a3, $a3, $a0
5:
sd $a1, 0($a0)
sd $a1, 8($a0)
sd $a1, 16($a0)
addiu $a0, $a0, 32
sd $a1, -8($a0)
bne $a0, $a3, 5b
4:
andi $v1, $a2, 0x7
subu $a3, $a2, $v1
beqz $a3, 2f
move $a2, $v1
addu $a3, $a3, $a0
6:
addiu $a0, $a0, 8
beq $a0, $a3, 2f
sd $a1, -8($a0)
addiu $a0, $a0, 8
beq $a0, $a3, 2f
sd $a1, -8($a0)
addiu $a0, $a0, 8
bne $a0, $a3, 6b
sd $a1, -8($a0)
2:
beqz $a2, 1f
addu $a3, $a2, $a0
7:
addiu $a0, $a0, 1
beq $a0, $a3, 1f
sb $a1, -1($a0)
addiu $a0, $a0, 1
beq $a0, $a3, 1f
sb $a1, -1($a0)
addiu $a0, $a0, 1
bne $a0, $a3, 7b
sb $a1, -1($a0)
1:
jr $ra
nop

3
platform/ps2/asm.h Normal file
View File

@ -0,0 +1,3 @@
// By right, input and output pointers must be all quad-word aligned. Unfortunately, some stuff that Picodrive passes to these functions aren't aligned to that degree. And so, only double-word alignment is required.
void do_pal_convert(unsigned short *dest, const unsigned short *src);
void do_pal_convert_with_shadows(unsigned short *dest, const unsigned short *src);

129
platform/ps2/asm.s Normal file
View File

@ -0,0 +1,129 @@
# vim:filetype=mips
# some asm utils for the Sony Emotion Engine (MIPS R5900)
.set push
.set noreorder
.text
.align 4
# A1B5G5R5 abbb bbgg gggr rrrr
.global do_pal_convert # dest, src
.ent do_pal_convert
do_pal_convert:
li $t0, 0x8000800080008000 #A
li $t1, 0x000E000E000E000E #R
li $t2, 0x00E000E000E000E0 #G
li $t3, 0x0E000E000E000E00 #B
li $t4, 64 # 64 16-bit colours
#Duplicate the lower dword into the upper dword of each mask (0-63 to 64-127).
pcpyld $t0, $t0
pcpyld $t1, $t1
pcpyld $t2, $t2
pcpyld $t3, $t3
# I couldn't do this with qword loads and stores in C (There's no 128-bit literal data type definition), but here's the 16-bit (1 colour per literation) equivalent in C for a reference.
# PalRow=in_palette[i];
# palette[i]=((PalRow&0x000E)<< 1)|((PalRow&0x00E0)<<2)|((PalRow&0x0E00)<<3) | 0x8000;
pal_convert_loop:
ld $t5, 8($a1)
ld $t6, 0($a1)
pcpyld $t5, $t5, $t6
# lq $t5, 0($a1) #This won't work because the CRAM palette may not be aligned to a 128-bit address (And unless the source code of Picodrive is modified for that purpose, use two dword loads instead). :(
#Blue
pand $t6, $t5, $t3
psllh $t6, $t6, 3
#Green
pand $t7, $t5, $t2
psllh $t7, $t7, 2
#Red
pand $t5, $t5, $t1
psllh $t5, $t5, 1
por $t5, $t5, $t0 #Logical OR in the alpha channel
por $t5, $t5, $t6 #Logical OR in the blue channel
por $t5, $t5, $t7 #Logical OR in the green channel
sq $t5, ($a0)
addiu $a1, $a1, 16
addiu $t4, $t4, -8 #8 16-bit colours were processed.
bgez $t4, pal_convert_loop
addiu $a0, $a0, 16
jr $ra
nop
.end do_pal_convert
.global do_pal_convert_with_shadows # dest, src
.ent do_pal_convert_with_shadows
do_pal_convert_with_shadows:
li $t0, 0x8000800080008000 #A mask
li $t1, 0x000E000E000E000E #R mask
li $t2, 0x00E000E000E000E0 #G mask
li $t3, 0x0E000E000E000E00 #B mask
li $a2, 0x39CE39CE39CE39CE #Shadow mask
li $a3, 0x4210421042104210 #Highlight mask
li $t4, 64 # 64 16-bit colours
# $t5 will contain the raw converted colour, without alpha. This will be also used for conversion into the shadow alternate colours.
# Duplicate the lower dword into the upper dword of each mask (0-63 to 64-127).
pcpyld $t0, $t0
pcpyld $t1, $t1
pcpyld $t2, $t2
pcpyld $t3, $t3
pcpyld $a2, $a2
pcpyld $a3, $a3
# I couldn't do this with qword loads and stores in C (There's no 128-bit literal data type definition), but here's the 16-bit (1 colour per literation) equivalent in C for a reference.
# PalRow=in_palette[i];
# palette[i]=((PalRow&0x000E)<< 1)|((PalRow&0x00E0)<<2)|((PalRow&0x0E00)<<3) | 0x8000;
pal_convert_loop_sh:
ld $t5, 8($a1)
ld $t6, 0($a1)
pcpyld $t5, $t5, $t6
# lq $t5, 0($a1) #This won't work because the CRAM palette may not be aligned to a 128-bit address (And unless the source code of Picodrive is modified for that purpose, use two dword loads instead). :(
#Blue
pand $t6, $t5, $t3
psllh $t6, $t6, 3
#Green
pand $t7, $t5, $t2
psllh $t7, $t7, 2
#Red
pand $t5, $t5, $t1
psllh $t5, $t5, 1
por $t5, $t5, $t6 #Logical OR in the blue channel
por $t5, $t5, $t7 #Logical OR in the green channel
por $t6, $t5, $t0 #Logical OR in the alpha channel
sq $t6, ($a0) #Normal
#Highlights
por $t6, $t6, $a3
sq $t6, 0x80($a0)
#Shadows
psrlh $t5, $t5, 1
pand $t5, $t5, $a2
por $t5, $t5, $t0 #Logical OR in the alpha channel
sq $t5, 0x40($a0)
sq $t5, 0xC0($a0)
addiu $a1, $a1, 16
addiu $t4, $t4, -8 #8 16-bit colours were processed.
bgez $t4, pal_convert_loop_sh
addiu $a0, $a0, 16
jr $ra
nop
.end do_pal_convert_with_shadows
.set pop