mirror of
https://github.com/LNH-team/pico-loader.git
synced 2026-06-02 09:16:49 +02:00
Initial commit
This commit is contained in:
39
common/ndsabi/macros.inc
Normal file
39
common/ndsabi/macros.inc
Normal file
@@ -0,0 +1,39 @@
|
||||
// SPDX-License-Identifier: Zlib
|
||||
// SPDX-FileNotice: Modified from the original version by the BlocksDS project.
|
||||
//
|
||||
// Copyright (C) 2021-2023 agbabi contributors
|
||||
//
|
||||
// ARM assembly support macros
|
||||
|
||||
@ Shift and test upper two bits, clobbering \reg
|
||||
@ Use mi for first bit, cs for second bit
|
||||
.macro joaobapt_test_lsl reg shift = #0
|
||||
movs \reg, \reg, lsl \shift
|
||||
.endm
|
||||
|
||||
@ Test lowest two bits, clobbering \reg
|
||||
@ Use mi for low bit, cs for high bit
|
||||
.macro joaobapt_test reg
|
||||
joaobapt_test_lsl \reg, #31
|
||||
.endm
|
||||
|
||||
@ Test lowest two bits of \src, result stored in \dst
|
||||
@ Use mi for low bit, cs for high bit
|
||||
.macro joaobapt_test_into dst, src
|
||||
movs \dst, \src, lsl #31
|
||||
.endm
|
||||
|
||||
@ Branches depending on lowest two bits, clobbering \reg
|
||||
@ b_mi = low bit case, b_cs = high bit case
|
||||
.macro joaobapt_switch reg, b_mi, b_cs
|
||||
joaobapt_test \reg
|
||||
bmi \b_mi
|
||||
bcs \b_cs
|
||||
.endm
|
||||
|
||||
@ Branches depending on alignment of \a and \b, clobbering \scratch
|
||||
@ b_byte = off-by-byte case, b_half = off-by-half case
|
||||
.macro align_switch a, b, scratch, b_byte, b_half
|
||||
eor \scratch, \a, \b
|
||||
joaobapt_switch \scratch, \b_byte, \b_half
|
||||
.endm
|
||||
126
common/ndsabi/memcpy.s
Normal file
126
common/ndsabi/memcpy.s
Normal file
@@ -0,0 +1,126 @@
|
||||
// SPDX-License-Identifier: Zlib
|
||||
// SPDX-FileNotice: Modified from the original version by the BlocksDS project.
|
||||
//
|
||||
// Copyright (C) 2021-2023 agbabi contributors
|
||||
//
|
||||
// ABI:
|
||||
// __aeabi_memcpy, __aeabi_memcpy4, __aeabi_memcpy8
|
||||
// Standard:
|
||||
// memcpy
|
||||
// Support:
|
||||
// __ndsabi_memcpy2, __ndsabi_memcpy1
|
||||
|
||||
#include <nds/asminc.h>
|
||||
|
||||
#include "macros.inc"
|
||||
|
||||
.syntax unified
|
||||
|
||||
.arm
|
||||
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC __aeabi_memcpy
|
||||
|
||||
@ >6-bytes is roughly the threshold when byte-by-byte copy is slower
|
||||
cmp r2, #6
|
||||
ble __ndsabi_memcpy1
|
||||
|
||||
align_switch r0, r1, r3, __ndsabi_memcpy1, .Lcopy_halves
|
||||
|
||||
@ Check if r0 (or r1) needs word aligning
|
||||
rsbs r3, r0, #4
|
||||
joaobapt_test r3
|
||||
|
||||
@ Copy byte head to align
|
||||
ldrbmi r3, [r1], #1
|
||||
strbmi r3, [r0], #1
|
||||
submi r2, r2, #1
|
||||
@ r0, r1 are now half aligned
|
||||
|
||||
@ Copy half head to align
|
||||
ldrhcs r3, [r1], #2
|
||||
strhcs r3, [r0], #2
|
||||
subcs r2, r2, #2
|
||||
@ r0, r1 are now word aligned
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC_NO_SECTION __aeabi_memcpy8
|
||||
BEGIN_ASM_FUNC_NO_SECTION __aeabi_memcpy4
|
||||
|
||||
cmp r2, #32
|
||||
blt .Lcopy_words
|
||||
|
||||
@ Word aligned, 32-byte copy
|
||||
push {r4-r10}
|
||||
.Lloop_32:
|
||||
subs r2, r2, #32
|
||||
ldmiage r1!, {r3-r10}
|
||||
stmiage r0!, {r3-r10}
|
||||
bgt .Lloop_32
|
||||
pop {r4-r10}
|
||||
bxeq lr
|
||||
|
||||
@ < 32 bytes remaining to be copied
|
||||
add r2, r2, #32
|
||||
|
||||
.Lcopy_words:
|
||||
cmp r2, #4
|
||||
blt .Lcopy_halves
|
||||
.Lloop_4:
|
||||
subs r2, r2, #4
|
||||
ldrge r3, [r1], #4
|
||||
strge r3, [r0], #4
|
||||
bgt .Lloop_4
|
||||
bxeq lr
|
||||
|
||||
@ Copy byte & half tail
|
||||
@ This test still works when r2 is negative
|
||||
joaobapt_test r2
|
||||
@ Copy half
|
||||
ldrhcs r3, [r1], #2
|
||||
strhcs r3, [r0], #2
|
||||
@ Copy byte
|
||||
ldrbmi r3, [r1]
|
||||
strbmi r3, [r0]
|
||||
bx lr
|
||||
|
||||
.Lcopy_halves:
|
||||
@ Copy byte head to align
|
||||
tst r0, #1
|
||||
ldrbne r3, [r1], #1
|
||||
strbne r3, [r0], #1
|
||||
subne r2, r2, #1
|
||||
@ r0, r1 are now half aligned
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC_NO_SECTION __ndsabi_memcpy2
|
||||
|
||||
subs r2, r2, #2
|
||||
ldrhge r3, [r1], #2
|
||||
strhge r3, [r0], #2
|
||||
bgt __ndsabi_memcpy2
|
||||
bxeq lr
|
||||
|
||||
@ Copy byte tail
|
||||
adds r2, r2, #2
|
||||
ldrbne r3, [r1]
|
||||
strbne r3, [r0]
|
||||
bx lr
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC __ndsabi_memcpy1
|
||||
|
||||
subs r2, r2, #1
|
||||
ldrbge r3, [r1], #1
|
||||
strbge r3, [r0], #1
|
||||
bgt __ndsabi_memcpy1
|
||||
bx lr
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC memcpy
|
||||
|
||||
push {r0, lr}
|
||||
bl __aeabi_memcpy
|
||||
pop {r0, lr}
|
||||
bx lr
|
||||
45
common/ndsabi/memmove.s
Normal file
45
common/ndsabi/memmove.s
Normal file
@@ -0,0 +1,45 @@
|
||||
// SPDX-License-Identifier: Zlib
|
||||
// SPDX-FileNotice: Modified from the original version by the BlocksDS project.
|
||||
//
|
||||
// Copyright (C) 2021-2023 agbabi contributors
|
||||
//
|
||||
// ABI:
|
||||
// __aeabi_memmove, __aeabi_memmove4, __aeabi_memmove8
|
||||
// Standard:
|
||||
// memmove
|
||||
|
||||
#include <nds/asminc.h>
|
||||
|
||||
.syntax unified
|
||||
|
||||
.arm
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC __aeabi_memmove
|
||||
|
||||
cmp r0, r1
|
||||
bgt __ndsabi_rmemcpy
|
||||
b __aeabi_memcpy
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC __aeabi_memmove8
|
||||
BEGIN_ASM_FUNC_NO_SECTION __aeabi_memmove4
|
||||
|
||||
cmp r0, r1
|
||||
bgt __ndsabi_rmemcpy
|
||||
b __aeabi_memcpy4
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC __ndsabi_memmove1
|
||||
|
||||
cmp r0, r1
|
||||
bgt __ndsabi_rmemcpy1
|
||||
b __ndsabi_memcpy1
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC memmove
|
||||
|
||||
push {r0, lr}
|
||||
bl __aeabi_memmove
|
||||
pop {r0, lr}
|
||||
bx lr
|
||||
120
common/ndsabi/memset.s
Normal file
120
common/ndsabi/memset.s
Normal file
@@ -0,0 +1,120 @@
|
||||
// SPDX-License-Identifier: Zlib
|
||||
// SPDX-FileNotice: Modified from the original version by the BlocksDS project.
|
||||
//
|
||||
// Copyright (C) 2021-2023 agbabi contributors
|
||||
//
|
||||
// ABI:
|
||||
// __aeabi_memclr, __aeabi_memclr4, __aeabi_memclr8,
|
||||
// __aeabi_memset, __aeabi_memset4, __aeabi_memset8
|
||||
// Standard:
|
||||
// memset
|
||||
// Support:
|
||||
// __ndsabi_wordset4, __ndsabi_lwordset4, __ndsabi_memset1
|
||||
|
||||
#include <nds/asminc.h>
|
||||
|
||||
#include "macros.inc"
|
||||
|
||||
.syntax unified
|
||||
|
||||
.arm
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC __aeabi_memclr
|
||||
|
||||
mov r2, #0
|
||||
b __aeabi_memset
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC __aeabi_memclr8
|
||||
BEGIN_ASM_FUNC_NO_SECTION __aeabi_memclr4
|
||||
|
||||
mov r2, #0
|
||||
b __ndsabi_wordset4
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC __aeabi_memset
|
||||
|
||||
@ < 8 bytes probably won't be aligned: go byte-by-byte
|
||||
cmp r1, #8
|
||||
blt __ndsabi_memset1
|
||||
|
||||
@ Copy head to align to next word
|
||||
rsb r3, r0, #4
|
||||
joaobapt_test r3
|
||||
strbmi r2, [r0], #1
|
||||
submi r1, r1, #1
|
||||
strbcs r2, [r0], #1
|
||||
strbcs r2, [r0], #1
|
||||
subcs r1, r1, #2
|
||||
|
||||
BEGIN_ASM_FUNC_NO_SECTION __aeabi_memset8
|
||||
BEGIN_ASM_FUNC_NO_SECTION __aeabi_memset4
|
||||
|
||||
lsl r2, r2, #24
|
||||
orr r2, r2, r2, lsr #8
|
||||
orr r2, r2, r2, lsr #16
|
||||
|
||||
BEGIN_ASM_FUNC_NO_SECTION __ndsabi_wordset4
|
||||
|
||||
mov r3, r2
|
||||
|
||||
BEGIN_ASM_FUNC_NO_SECTION __ndsabi_lwordset4
|
||||
|
||||
@ 16 words is roughly the threshold when lwordset is slower
|
||||
cmp r1, #64
|
||||
blt .Lset_2_words
|
||||
|
||||
@ 8 word set
|
||||
push {r4-r9}
|
||||
mov r4, r2
|
||||
mov r5, r3
|
||||
mov r6, r2
|
||||
mov r7, r3
|
||||
mov r8, r2
|
||||
mov r9, r3
|
||||
|
||||
.Lset_8_words:
|
||||
subs r1, r1, #32
|
||||
stmiage r0!, {r2-r9}
|
||||
bgt .Lset_8_words
|
||||
pop {r4-r9}
|
||||
bxeq lr
|
||||
|
||||
@ Fixup remaining
|
||||
add r1, r1, #32
|
||||
.Lset_2_words:
|
||||
subs r1, r1, #8
|
||||
stmiage r0!, {r2-r3}
|
||||
bgt .Lset_2_words
|
||||
bxeq lr
|
||||
|
||||
@ Test for remaining word
|
||||
adds r1, r1, #4
|
||||
strge r2, [r0], #4
|
||||
bxeq lr
|
||||
|
||||
@ Set tail
|
||||
joaobapt_test r1
|
||||
strhcs r2, [r0], #2
|
||||
strbmi r2, [r0], #1
|
||||
bx lr
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC __ndsabi_memset1
|
||||
|
||||
subs r1, r1, #1
|
||||
strbge r2, [r0], #1
|
||||
bgt __ndsabi_memset1
|
||||
bx lr
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC memset
|
||||
|
||||
mov r3, r1
|
||||
mov r1, r2
|
||||
mov r2, r3
|
||||
push {r0, lr}
|
||||
bl __aeabi_memset
|
||||
pop {r0, lr}
|
||||
bx lr
|
||||
106
common/ndsabi/rmemcpy.s
Normal file
106
common/ndsabi/rmemcpy.s
Normal file
@@ -0,0 +1,106 @@
|
||||
// SPDX-License-Identifier: Zlib
|
||||
// SPDX-FileNotice: Modified from the original version by the BlocksDS project.
|
||||
//
|
||||
// Copyright (C) 2021-2023 agbabi contributors
|
||||
//
|
||||
// Support:
|
||||
// __ndsabi_rmemcpy, __ndsabi_rmemcpy1
|
||||
|
||||
#include <nds/asminc.h>
|
||||
|
||||
#include "macros.inc"
|
||||
|
||||
.syntax unified
|
||||
|
||||
.arm
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC __ndsabi_rmemcpy
|
||||
|
||||
@ >6-bytes is roughly the threshold when byte-by-byte copy is slower
|
||||
cmp r2, #6
|
||||
ble __ndsabi_rmemcpy1
|
||||
|
||||
align_switch r0, r1, r3, __ndsabi_rmemcpy1, .Lcopy_halves
|
||||
|
||||
@ Check if end needs word aligning
|
||||
add r3, r0, r2
|
||||
joaobapt_test r3
|
||||
|
||||
@ Copy byte tail to align
|
||||
submi r2, r2, #1
|
||||
ldrbmi r3, [r1, r2]
|
||||
strbmi r3, [r0, r2]
|
||||
@ r2 is now half aligned
|
||||
|
||||
@ Copy half tail to align
|
||||
subcs r2, r2, #2
|
||||
ldrhcs r3, [r1, r2]
|
||||
strhcs r3, [r0, r2]
|
||||
@ r2 is now word aligned
|
||||
|
||||
cmp r2, #32
|
||||
blt .Lcopy_words
|
||||
|
||||
@ Word aligned, 32-byte copy
|
||||
push {r0-r1, r4-r10}
|
||||
add r0, r0, r2
|
||||
add r1, r1, r2
|
||||
.Lloop_32:
|
||||
subs r2, r2, #32
|
||||
ldmdbge r1!, {r3-r10}
|
||||
stmdbge r0!, {r3-r10}
|
||||
bgt .Lloop_32
|
||||
pop {r0-r1, r4-r10}
|
||||
bxeq lr
|
||||
|
||||
@ < 32 bytes remaining to be copied
|
||||
add r2, r2, #32
|
||||
|
||||
.Lcopy_words:
|
||||
subs r2, r2, #4
|
||||
ldrge r3, [r1, r2]
|
||||
strge r3, [r0, r2]
|
||||
bgt .Lcopy_words
|
||||
bxeq lr
|
||||
|
||||
@ Copy byte & half head
|
||||
joaobapt_test_into r3, r2
|
||||
@ Copy half
|
||||
addcs r2, r2, #2
|
||||
ldrhcs r3, [r1, r2]
|
||||
strhcs r3, [r0, r2]
|
||||
@ Copy byte
|
||||
ldrbmi r3, [r1]
|
||||
strbmi r3, [r0]
|
||||
bx lr
|
||||
|
||||
.Lcopy_halves:
|
||||
@ Copy byte tail to align
|
||||
add r3, r0, r2
|
||||
tst r3, #1
|
||||
subne r2, r2, #1
|
||||
ldrbne r3, [r1, r2]
|
||||
strbne r3, [r0, r2]
|
||||
@ r2 is now half aligned
|
||||
|
||||
.Lloop_2:
|
||||
subs r2, r2, #2
|
||||
ldrhge r3, [r1, r2]
|
||||
strhge r3, [r0, r2]
|
||||
bgt .Lloop_2
|
||||
bxeq lr
|
||||
|
||||
@ Copy byte head
|
||||
ldrb r3, [r1]
|
||||
strb r3, [r0]
|
||||
bx lr
|
||||
|
||||
|
||||
BEGIN_ASM_FUNC_NO_SECTION __ndsabi_rmemcpy1
|
||||
|
||||
subs r2, r2, #1
|
||||
ldrbge r3, [r1, r2]
|
||||
strbge r3, [r0, r2]
|
||||
bgt __ndsabi_rmemcpy1
|
||||
bx lr
|
||||
Reference in New Issue
Block a user