big-endian off
+! Stack frame
+! https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling?view=vs-2019
+
+! x0 Volatile Parameter/scratch register 1, result register
+! x1-x7 Volatile Parameter/scratch register 2-8
+! x8-x15 Volatile Scratch registers
+! x16-x17 Volatile Intra-procedure-call scratch registers
+! x18 Non-volatile Platform register: in kernel mode, points to KPCR for the current processor;
+! in user mode, points to TEB
+! x19-x28 Non-volatile Scratch registers
+! x29/fp Non-volatile Frame pointer
+! x30/lr Non-volatile Link registers
+
+! varargs https://developer.arm.com/documentation/ihi0055/d/?lang=en
+: stack-frame-size ( -- n ) 8 bootstrap-cells ;
+: volatile-regs ( -- seq ) { X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 } ;
+! windows arm - X18 is non-volatile https://docs.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-160
+: nv-regs ( -- seq ) { X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 } ;
+
+! callee-save = non-volatile aka call-preserved
+
+! x30 is the link register (used to return from subroutines)
+! x29 is the frame register
+! x19 to x29 are callee-saved
+! x18 is the 'platform register', used for some operating-system-specific special purpose,
+! or an additional caller-saved register
+! x16 and x17 are the Intra-Procedure-call scratch register
+! x9 to x15: used to hold local variables (caller saved)
+! x8: used to hold indirect return value address
+! x0 to x7: used to hold argument values passed to a subroutine, and also hold
+! results returned from a subroutine
+
+
+! https://en.wikichip.org/wiki/arm/aarch64
+! Generally, X0 through X18 (volatile, can corrupt) while X19-X29 must be preserved (non-volatile)
+! Volatile registers' content may change over a subroutine call
+! non-volatile register is a type of register with contents that must be preserved over subroutine calls
+! Register Role Requirement
+! X0 - X7 Parameter/result registers Can Corrupt (volatile)
+! X8 Indirect result location register (volatile)
+! X9 - X15 Temporary registers (volatile)
+! X16 - X17 Intra-procedure call temporary (volatile)
+! x16 - syscall reg with SVC instructioin
+! X18 Platform register, otherwise temporary, DONT USE (volatile)
+
+! X19 - X29 Callee-saved register Must preserve (non-volatile)
+! X29 - frame pointer register, must always be valid
+! X30 Link Register LR Can Corrupt
+! X31 Stack Pointer SP
+! 16-byte stack alignment
+
+! stack walking - {fp, lr} pairs if compiled with frame pointers enabled
+
+: arg1 ( -- reg ) X0 ;
+: arg2 ( -- reg ) X1 ;
+: arg3 ( -- reg ) X2 ;
+: arg4 ( -- reg ) X3 ;
+
+! Red zone
+! windows arm64: 16 bytes https://devblogs.microsoft.com/oldnewthing/20190111-00/?p=100685
+! windows arm32: 8 bytes
+! x86/x64: 0 bytes
+! Apple arm64: 128 bytes https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms?language=objc
+: red-zone-size ( -- n ) 16 ; ! 16 bytes on windows, or 128 bytes on linux? or 0?
+! 0 or 16 likely
+! no red zone on x86/x64 windows
+
+
+! https://github.com/MicrosoftDocs/cpp-docs/blob/master/docs/build/arm64-windows-abi-conventions.md
+
: shift-arg ( -- reg ) X1 ;
: div-arg ( -- reg ) X0 ;
: mod-arg ( -- reg ) X2 ;
: temp3 ( -- reg ) X12 ;
-
! : pic-tail-reg ( -- reg ) RBX ;
-! : return-reg ( -- reg ) RAX ;
-! : nv-reg ( -- reg ) RBX ;
-! : stack-reg ( -- reg ) RSP ;
-! : frame-reg ( -- reg ) RBP ;
-! : link-reg ( -- reg ) R11 ;
+: return-reg ( -- reg ) X0 ;
+: stack-reg ( -- reg ) SP ;
+! https://developer.arm.com/documentation/dui0801/a/Overview-of-AArch64-state/Link-registers
+: link-reg ( -- reg ) X30 ; ! LR
+: stack-frame-reg ( -- reg ) X29 ; ! FP
+: vm-reg ( -- reg ) X28 ;
+: ds-reg ( -- reg ) X27 ;
+: rs-reg ( -- reg ) X26 ;
! : ctx-reg ( -- reg ) R12 ;
-! : vm-reg ( -- reg ) R13 ;
-: ds-reg ( -- reg ) X5 ;
-: rs-reg ( -- reg ) X6 ;
! : fixnum>slot@ ( -- ) temp0 1 SAR ;
! : rex-length ( -- n ) 1 ;
-: jit-call ( name -- ) drop ;
+! rc-absolute-cell is just CONSTANT: 0
+: jit-call ( name -- )
+ 0 X0 MOVwi64
+ f rc-absolute-cell rel-dlsym
+ X0 BLR ;
! RAX 0 MOV f rc-absolute-cell rel-dlsym
! RAX CALL ;
-:: jit-call-1arg ( arg1s name -- ) 2drop ;
- ! arg1 arg1s MOV
+:: jit-call-1arg ( arg1s name -- )
+ arg1s arg1 MOVr64
+ name jit-call ;
+ ! arg1 arg1s MOVr64
! name jit-call ;
-:: jit-call-2arg ( arg1s arg2s name -- ) 3drop ;
+:: jit-call-2arg ( arg1s arg2s name -- )
+ arg1s arg1 MOVr64
+ arg2s arg2 MOVr64
+ name jit-call ;
! arg1 arg1s MOV
! arg2 arg2s MOV
! name jit-call ;
! ctx-reg context-retainstack-offset [+] rs-reg MOV ;
! ctx-reg must already have been loaded
-: jit-restore-context ( -- )
+: jit-restore-context ( -- ) ;
! ds-reg ctx-reg context-datastack-offset [+] MOV
! rs-reg ctx-reg context-retainstack-offset [+] MOV ;
[
+
! ! ctx-reg is preserved across the call because it is non-volatile
! ! in the C ABI
! jit-save-context
] JIT-PRIMITIVE jit-define
-: jit-jump-quot ( -- ) ;
+: jit-jump-quot ( -- )
+ quot-entry-point-offset arg1 ADR
+ arg1 BR ;
! arg1 quot-entry-point-offset [+] JMP ;
-: jit-call-quot ( -- ) ;
+: jit-call-quot ( -- )
+ quot-entry-point-offset arg1 ADR
+ arg1 BLR ;
! arg1 quot-entry-point-offset [+] CALL ;
: signal-handler-save-regs ( -- regs ) { } ;
] JIT-SAFEPOINT jit-define
! # All arm.64 subprimitives
-
{
+ { c-to-factor [
+ ! Set up the datastack and retainstack registers
+ ! and jump into the quotation
+
+
+ ! write()
+ ! 68 X8 MOVwi64
+ ! X2 MOVwi64
+ ! 0 SVC
+
+ ! exit(42)
+
+ ! 9999 BRK
+ ! 42 X0 MOVwi64
+ ! 93 X8 MOVwi64
+ ! 0 SVC
+
+ ! Rn Rd MOVr64 ! comment
+ arg1 arg2 MOVr64
+ vm-reg "begin_callback" jit-call-1arg
+
+ return-reg arg1 MOVr64 ! arg1 is return
+ jit-call-quot
+
+ vm-reg "end_callback" jit-call-1arg
+ ] }
+} define-sub-primitives
+
+
+! {
! ## Contexts
! { (set-context) [ jit-set-context ] }
! { (set-context-and-delete) [
! { (start-context-and-delete) [ jit-start-context-and-delete ] }
! ## Entry points
- { c-to-factor [ ] }
- { unwind-native-frames [ ] }
+ ! { c-to-factor [
+ ! ! dst src MOV
+ ! ! arg2 arg1 MOV
+ ! ! vm-reg "begin_callback" jit-call-1arg
+
+ ! ! ! call the quotation
+ ! ! arg1 return-reg MOV
+ ! ! jit-call-quot
+
+ ! ! vm-reg "end_callback" jit-call-1arg
+
+ ! [
+
+ ! ! write()
+ ! ! 68 X8 MOVwi64
+ ! ! X2 MOVwi64
+ ! ! 0 SVC
+
+ ! ! exit(42)
+ ! 9999 BRK
+ ! 42 X0 MOVwi64
+ ! 93 X8 MOVwi64
+ ! 0 SVC
+
+
+
+ ! ! Rn Rd MOVr64
+ ! ! arg1 arg2 MOVr64
+ ! ! vm-reg "begin_callback" jit-call-1arg
+
+ ! ! return-reg arg1 MOVr64 ! arg1 is return
+ ! ! jit-call-quot
+
+ ! ! vm-reg "end_callback" jit-call-1arg
+
+ ! ] assemble-arm %
+
+ ! ] }
+ ! { unwind-native-frames [ ] }
! ## Math
! { fixnum+ [ [ ADD ] "overflow_fixnum_add" jit-overflow ] }
! ! Return with new callstack
! 0 RET
! ] }
-} define-sub-primitives
+! } define-sub-primitives
! C to Factor entry point
[
+
+ 9999 BRK
! ! Optimizing compiler's side of callback accesses
! ! arguments that are on the stack via the frame pointer.
! ! On x86-32 fastcall, and x86-64, some arguments are passed
! temp0 word-entry-point-offset [+] JMP
] JIT-EXECUTE jit-define
+
+! https://elixir.bootlin.com/linux/latest/source/arch/arm64/kernel/stacktrace.c#L22
[
- ! stack-reg stack-frame-size bootstrap-cell - SUB
+ ! x64 ! stack-reg stack-frame-size bootstrap-cell - SUB
+
+
+ ! : link-reg ( -- reg ) X30 ; ! LR
+ ! : stack-frame-reg ( -- reg ) X29 ; ! FP
+
+ ! ! make room for LR plus magic number of callback, 16byte align
+ stack-frame-size bootstrap-cell 2 * + stack-reg stack-reg SUBi64
+ ! link-reg X29 stack-reg STP
+ -16 SP link-reg X29 STP-pre
] JIT-PROLOG jit-define
[
- ! stack-reg stack-frame-size bootstrap-cell - ADD
+ ! x64 ! stack-reg stack-frame-size bootstrap-cell - ADD
+ -16 SP link-reg X29 LDP-pre
+ stack-frame-size bootstrap-cell 2 * + stack-reg stack-reg ADDi64
] JIT-EPILOG jit-define
[
- ! 0 RET
+ f RET
] JIT-RETURN jit-define
! ! ! Polymorphic inline caches
! Load a value from a stack position
[
- temp1 ds-reg 0x7f [+] MOV f rc-absolute-1 rel-untagged
+ ! temp1 ds-reg 0x7f [+] MOV f rc-absolute-1 rel-untagged
] PIC-LOAD jit-define
-[ temp1/32 tag-mask get AND ] PIC-TAG jit-define
+[
+ ! temp1/32 tag-mask get AND
+] PIC-TAG jit-define
[
- temp0 temp1 MOV
- temp1/32 tag-mask get AND
- temp1/32 tuple type-number CMP
- [ JNE ]
- [ temp1 temp0 tuple-class-offset [+] MOV ]
- jit-conditional
+ ! temp0 temp1 MOV
+ ! temp1/32 tag-mask get AND
+ ! temp1/32 tuple type-number CMP
+ ! [ JNE ]
+ ! [ temp1 temp0 tuple-class-offset [+] MOV ]
+ ! jit-conditional
] PIC-TUPLE jit-define
[
- temp1/32 0x7f CMP f rc-absolute-1 rel-untagged
+ ! temp1/32 0x7f CMP f rc-absolute-1 rel-untagged
] PIC-CHECK-TAG jit-define
-[ 0 JE f rc-relative rel-word ] PIC-HIT jit-define
+[
+ ! 0 JE f rc-relative rel-word
+] PIC-HIT jit-define
! ! ! Megamorphic caches