Initial commit

This commit is contained in:
Gericom
2025-11-22 11:08:28 +01:00
commit 9cf3ffbfcf
358 changed files with 58350 additions and 0 deletions

39
common/ndsabi/macros.inc Normal file
View File

@@ -0,0 +1,39 @@
// SPDX-License-Identifier: Zlib
// SPDX-FileNotice: Modified from the original version by the BlocksDS project.
//
// Copyright (C) 2021-2023 agbabi contributors
//
// ARM assembly support macros
@ Shift and test upper two bits, clobbering \reg
@ Use mi for first bit, cs for second bit
.macro joaobapt_test_lsl reg shift = #0
movs \reg, \reg, lsl \shift
.endm
@ Test lowest two bits, clobbering \reg
@ Use mi for low bit, cs for high bit
.macro joaobapt_test reg
joaobapt_test_lsl \reg, #31
.endm
@ Test lowest two bits of \src, result stored in \dst
@ Use mi for low bit, cs for high bit
.macro joaobapt_test_into dst, src
movs \dst, \src, lsl #31
.endm
@ Branches depending on lowest two bits, clobbering \reg
@ b_mi = low bit case, b_cs = high bit case
.macro joaobapt_switch reg, b_mi, b_cs
joaobapt_test \reg
bmi \b_mi
bcs \b_cs
.endm
@ Branches depending on alignment of \a and \b, clobbering \scratch
@ b_byte = off-by-byte case, b_half = off-by-half case
.macro align_switch a, b, scratch, b_byte, b_half
eor \scratch, \a, \b
joaobapt_switch \scratch, \b_byte, \b_half
.endm

126
common/ndsabi/memcpy.s Normal file
View File

@@ -0,0 +1,126 @@
// SPDX-License-Identifier: Zlib
// SPDX-FileNotice: Modified from the original version by the BlocksDS project.
//
// Copyright (C) 2021-2023 agbabi contributors
//
// ABI:
// __aeabi_memcpy, __aeabi_memcpy4, __aeabi_memcpy8
// Standard:
// memcpy
// Support:
// __ndsabi_memcpy2, __ndsabi_memcpy1
#include <nds/asminc.h>
#include "macros.inc"
.syntax unified
.arm
BEGIN_ASM_FUNC __aeabi_memcpy
@ >6-bytes is roughly the threshold when byte-by-byte copy is slower
cmp r2, #6
ble __ndsabi_memcpy1
align_switch r0, r1, r3, __ndsabi_memcpy1, .Lcopy_halves
@ Check if r0 (or r1) needs word aligning
rsbs r3, r0, #4
joaobapt_test r3
@ Copy byte head to align
ldrbmi r3, [r1], #1
strbmi r3, [r0], #1
submi r2, r2, #1
@ r0, r1 are now half aligned
@ Copy half head to align
ldrhcs r3, [r1], #2
strhcs r3, [r0], #2
subcs r2, r2, #2
@ r0, r1 are now word aligned
BEGIN_ASM_FUNC_NO_SECTION __aeabi_memcpy8
BEGIN_ASM_FUNC_NO_SECTION __aeabi_memcpy4
cmp r2, #32
blt .Lcopy_words
@ Word aligned, 32-byte copy
push {r4-r10}
.Lloop_32:
subs r2, r2, #32
ldmiage r1!, {r3-r10}
stmiage r0!, {r3-r10}
bgt .Lloop_32
pop {r4-r10}
bxeq lr
@ < 32 bytes remaining to be copied
add r2, r2, #32
.Lcopy_words:
cmp r2, #4
blt .Lcopy_halves
.Lloop_4:
subs r2, r2, #4
ldrge r3, [r1], #4
strge r3, [r0], #4
bgt .Lloop_4
bxeq lr
@ Copy byte & half tail
@ This test still works when r2 is negative
joaobapt_test r2
@ Copy half
ldrhcs r3, [r1], #2
strhcs r3, [r0], #2
@ Copy byte
ldrbmi r3, [r1]
strbmi r3, [r0]
bx lr
.Lcopy_halves:
@ Copy byte head to align
tst r0, #1
ldrbne r3, [r1], #1
strbne r3, [r0], #1
subne r2, r2, #1
@ r0, r1 are now half aligned
BEGIN_ASM_FUNC_NO_SECTION __ndsabi_memcpy2
subs r2, r2, #2
ldrhge r3, [r1], #2
strhge r3, [r0], #2
bgt __ndsabi_memcpy2
bxeq lr
@ Copy byte tail
adds r2, r2, #2
ldrbne r3, [r1]
strbne r3, [r0]
bx lr
BEGIN_ASM_FUNC __ndsabi_memcpy1
subs r2, r2, #1
ldrbge r3, [r1], #1
strbge r3, [r0], #1
bgt __ndsabi_memcpy1
bx lr
BEGIN_ASM_FUNC memcpy
push {r0, lr}
bl __aeabi_memcpy
pop {r0, lr}
bx lr

45
common/ndsabi/memmove.s Normal file
View File

@@ -0,0 +1,45 @@
// SPDX-License-Identifier: Zlib
// SPDX-FileNotice: Modified from the original version by the BlocksDS project.
//
// Copyright (C) 2021-2023 agbabi contributors
//
// ABI:
// __aeabi_memmove, __aeabi_memmove4, __aeabi_memmove8
// Standard:
// memmove
#include <nds/asminc.h>
.syntax unified
.arm
BEGIN_ASM_FUNC __aeabi_memmove
cmp r0, r1
bgt __ndsabi_rmemcpy
b __aeabi_memcpy
BEGIN_ASM_FUNC __aeabi_memmove8
BEGIN_ASM_FUNC_NO_SECTION __aeabi_memmove4
cmp r0, r1
bgt __ndsabi_rmemcpy
b __aeabi_memcpy4
BEGIN_ASM_FUNC __ndsabi_memmove1
cmp r0, r1
bgt __ndsabi_rmemcpy1
b __ndsabi_memcpy1
BEGIN_ASM_FUNC memmove
push {r0, lr}
bl __aeabi_memmove
pop {r0, lr}
bx lr

120
common/ndsabi/memset.s Normal file
View File

@@ -0,0 +1,120 @@
// SPDX-License-Identifier: Zlib
// SPDX-FileNotice: Modified from the original version by the BlocksDS project.
//
// Copyright (C) 2021-2023 agbabi contributors
//
// ABI:
// __aeabi_memclr, __aeabi_memclr4, __aeabi_memclr8,
// __aeabi_memset, __aeabi_memset4, __aeabi_memset8
// Standard:
// memset
// Support:
// __ndsabi_wordset4, __ndsabi_lwordset4, __ndsabi_memset1
#include <nds/asminc.h>
#include "macros.inc"
.syntax unified
.arm
BEGIN_ASM_FUNC __aeabi_memclr
mov r2, #0
b __aeabi_memset
BEGIN_ASM_FUNC __aeabi_memclr8
BEGIN_ASM_FUNC_NO_SECTION __aeabi_memclr4
mov r2, #0
b __ndsabi_wordset4
BEGIN_ASM_FUNC __aeabi_memset
@ < 8 bytes probably won't be aligned: go byte-by-byte
cmp r1, #8
blt __ndsabi_memset1
@ Copy head to align to next word
rsb r3, r0, #4
joaobapt_test r3
strbmi r2, [r0], #1
submi r1, r1, #1
strbcs r2, [r0], #1
strbcs r2, [r0], #1
subcs r1, r1, #2
BEGIN_ASM_FUNC_NO_SECTION __aeabi_memset8
BEGIN_ASM_FUNC_NO_SECTION __aeabi_memset4
lsl r2, r2, #24
orr r2, r2, r2, lsr #8
orr r2, r2, r2, lsr #16
BEGIN_ASM_FUNC_NO_SECTION __ndsabi_wordset4
mov r3, r2
BEGIN_ASM_FUNC_NO_SECTION __ndsabi_lwordset4
@ 16 words is roughly the threshold when lwordset is slower
cmp r1, #64
blt .Lset_2_words
@ 8 word set
push {r4-r9}
mov r4, r2
mov r5, r3
mov r6, r2
mov r7, r3
mov r8, r2
mov r9, r3
.Lset_8_words:
subs r1, r1, #32
stmiage r0!, {r2-r9}
bgt .Lset_8_words
pop {r4-r9}
bxeq lr
@ Fixup remaining
add r1, r1, #32
.Lset_2_words:
subs r1, r1, #8
stmiage r0!, {r2-r3}
bgt .Lset_2_words
bxeq lr
@ Test for remaining word
adds r1, r1, #4
strge r2, [r0], #4
bxeq lr
@ Set tail
joaobapt_test r1
strhcs r2, [r0], #2
strbmi r2, [r0], #1
bx lr
BEGIN_ASM_FUNC __ndsabi_memset1
subs r1, r1, #1
strbge r2, [r0], #1
bgt __ndsabi_memset1
bx lr
BEGIN_ASM_FUNC memset
mov r3, r1
mov r1, r2
mov r2, r3
push {r0, lr}
bl __aeabi_memset
pop {r0, lr}
bx lr

106
common/ndsabi/rmemcpy.s Normal file
View File

@@ -0,0 +1,106 @@
// SPDX-License-Identifier: Zlib
// SPDX-FileNotice: Modified from the original version by the BlocksDS project.
//
// Copyright (C) 2021-2023 agbabi contributors
//
// Support:
// __ndsabi_rmemcpy, __ndsabi_rmemcpy1
#include <nds/asminc.h>
#include "macros.inc"
.syntax unified
.arm
BEGIN_ASM_FUNC __ndsabi_rmemcpy
@ >6-bytes is roughly the threshold when byte-by-byte copy is slower
cmp r2, #6
ble __ndsabi_rmemcpy1
align_switch r0, r1, r3, __ndsabi_rmemcpy1, .Lcopy_halves
@ Check if end needs word aligning
add r3, r0, r2
joaobapt_test r3
@ Copy byte tail to align
submi r2, r2, #1
ldrbmi r3, [r1, r2]
strbmi r3, [r0, r2]
@ r2 is now half aligned
@ Copy half tail to align
subcs r2, r2, #2
ldrhcs r3, [r1, r2]
strhcs r3, [r0, r2]
@ r2 is now word aligned
cmp r2, #32
blt .Lcopy_words
@ Word aligned, 32-byte copy
push {r0-r1, r4-r10}
add r0, r0, r2
add r1, r1, r2
.Lloop_32:
subs r2, r2, #32
ldmdbge r1!, {r3-r10}
stmdbge r0!, {r3-r10}
bgt .Lloop_32
pop {r0-r1, r4-r10}
bxeq lr
@ < 32 bytes remaining to be copied
add r2, r2, #32
.Lcopy_words:
subs r2, r2, #4
ldrge r3, [r1, r2]
strge r3, [r0, r2]
bgt .Lcopy_words
bxeq lr
@ Copy byte & half head
joaobapt_test_into r3, r2
@ Copy half
addcs r2, r2, #2
ldrhcs r3, [r1, r2]
strhcs r3, [r0, r2]
@ Copy byte
ldrbmi r3, [r1]
strbmi r3, [r0]
bx lr
.Lcopy_halves:
@ Copy byte tail to align
add r3, r0, r2
tst r3, #1
subne r2, r2, #1
ldrbne r3, [r1, r2]
strbne r3, [r0, r2]
@ r2 is now half aligned
.Lloop_2:
subs r2, r2, #2
ldrhge r3, [r1, r2]
strhge r3, [r0, r2]
bgt .Lloop_2
bxeq lr
@ Copy byte head
ldrb r3, [r1]
strb r3, [r0]
bx lr
BEGIN_ASM_FUNC_NO_SECTION __ndsabi_rmemcpy1
subs r2, r2, #1
ldrbge r3, [r1, r2]
strbge r3, [r0, r2]
bgt __ndsabi_rmemcpy1
bx lr