mirror of
https://github.com/clockworkpi/PicoCalc.git
synced 2026-03-23 12:32:38 +01:00
add pico_multi_booter code
This commit is contained in:
62
Code/pico_multi_booter/picomite/assember.S
Normal file
62
Code/pico_multi_booter/picomite/assember.S
Normal file
@@ -0,0 +1,62 @@
|
||||
.syntax unified
|
||||
|
||||
@ High-throughput block transfer using the FPU register set as a 128-byte
|
||||
@ buffer.
|
||||
@
|
||||
@ This implementation exploits the IT Folding feature of the Cortex-M4.
|
||||
@ An IT instruction following a 16-bit Thumb instruction, when both are
|
||||
@ aligned into a 32-bit memory word, takes no additional cycles to issue.
|
||||
@
|
||||
@ Arguments:
|
||||
@ r0 source address
|
||||
@ r1 destination address
|
||||
@ r2 number of words to transfer.
|
||||
.section .time_critical,"ax",%progbits
|
||||
.balign 4
|
||||
.global _Z10copy_wordsPKmPmm
|
||||
.thumb_func
|
||||
_Z10copy_wordsPKmPmm:
|
||||
@ Name our registers.
|
||||
src .req r0
|
||||
dst .req r1
|
||||
count .req r2
|
||||
@ The caller may have been using floating point.
|
||||
@ Save the callee-save portion of the register file.
|
||||
vpush {s16 - s31} @ 17
|
||||
@ 'Warm up' the transfer engine, which wants to operate in units of
|
||||
@ 128 bytes, by making smaller transfers until count a multiple of
|
||||
@ Special-case the single word transfer; the macro below won't work.
|
||||
lsrs.n count, #1 @ 1
|
||||
itt cs @ 0 (aligned)
|
||||
vldmcs.32 src!, {s0} @ 2
|
||||
vstmcs.32 dst!, {s0} @ 2
|
||||
@ Transfer n+1 words.
|
||||
.macro XFER n @ 5 + 2*n
|
||||
lsrs.n count, #1 @ 1
|
||||
itt cs @ 0 (aligned)
|
||||
vldmcs.32 src!, {s0 - s\n} @ 1+1+n
|
||||
vstmcs.32 dst!, {s0 - s\n} @ 1+1+n
|
||||
.endm
|
||||
XFER 1 @ 7
|
||||
XFER 3 @ 11
|
||||
XFER 7 @ 19
|
||||
XFER 15 @ 35
|
||||
@ Handle the case where we've been asked to transfer <32 words.
|
||||
@ In such a case, count will now be zero, and the Z flag will still
|
||||
@ be set from the last XFER.
|
||||
@
|
||||
@ Force the branch to use a 32-bit instruction to preserve alignment
|
||||
@ of the loop branch below; this saves a cycle per time that branch
|
||||
@ is taken.
|
||||
@
|
||||
@ Note that the target of this branch (at 1 below) is also aligned,
|
||||
@ saving a cycle on the rare escape path.
|
||||
beq.w 1f @ 1 (n.t.)
|
||||
@ All warmed up, transfer in units of 128 bytes.
|
||||
0: vldm.32 src!, {s0 - s31} @ 33
|
||||
vstm.32 dst!, {s0 - s31} @ 33
|
||||
subs.n count, #1 @ 1
|
||||
bne.n 0b @ ~3 (taken)
|
||||
@ Restore FPU state.
|
||||
1: vpop {s16 - s31} @ 17
|
||||
bx lr @ 1-3??
|
||||
Reference in New Issue
Block a user