generic.single.private kernel kernel.private layouts
locals.backend math math.private namespaces slots.private
strings.private threads.private vocabs ;
+RENAME: STRuoff cpu.arm.assembler.32 => STRuoff32
IN: bootstrap.assembler.arm
8 \ cell set
: pop-link-reg ( -- ) 16 stack-reg link-reg LDRpost ;
: load0 ( -- ) 0 ds-reg temp0 LDRuoff ;
+: load1 ( -- ) -8 ds-reg temp1 LDUR ;
+: load2 ( -- ) -16 ds-reg temp2 LDUR ;
: load1/0 ( -- ) -8 ds-reg temp0 temp1 LDPsoff ;
: load2/1 ( -- ) -16 ds-reg temp1 temp2 LDPsoff ;
: load2/1* ( -- ) -8 ds-reg temp1 temp2 LDPsoff ;
:: tag ( reg -- ) tag-bits get reg reg LSLi ;
:: untag ( reg -- ) tag-bits get reg reg ASRi ;
-: tagged>offset ( -- ) 1 temp0 temp0 ASRi ;
+: tagged>offset0 ( -- ) 1 temp0 temp0 ASRi ;
: >r ( -- ) pop0 pushr ;
: r> ( -- ) popr push0 ;
-: jit-call ( name -- )
- ! RAX 0 MOV f rc-absolute-cell rel-dlsym
- ! RAX CALL ;
+: absolute-jump ( -- word class )
+ 2 words temp0 LDRl
+ temp0 BR
+ NOP NOP f rc-absolute-cell ;
+
+: absolute-call ( -- word class )
+ 5 words temp0 LDRl
push-link-reg
- 3 words temp0 LDRl
temp0 BLR
+ pop-link-reg
3 words Br
- NOP NOP f rc-absolute-cell rel-dlsym
- pop-link-reg ;
+ NOP NOP f rc-absolute-cell ;
+
+[
+ ! pic-tail-reg 5 [RIP+] LEA
+ ! why do we store the address after JMP in EBX, where is it
+ ! picked up?
+ ! 0 JMP f rc-relative rel-word-pic-tail
+ absolute-jump rel-word-pic-tail
+] JIT-WORD-JUMP jit-define
+
+[
+ ! 0 CALL f rc-relative rel-word-pic
+ absolute-call rel-word-pic
+] JIT-WORD-CALL jit-define
+
+: jit-call ( name -- )
+ ! RAX 0 MOV f rc-absolute-cell rel-dlsym
+ ! RAX CALL ;
+ absolute-call rel-dlsym ;
:: jit-call-1arg ( arg1s name -- )
! arg1 arg1s MOVr
arg2s arg2 MOVr
name jit-call ;
-[
- ! pic-tail-reg 5 [RIP+] LEA
- ! 0 JMP f rc-relative rel-word-pic-tail
- 4 words temp0 LDRl32
- 4 words temp1 ADR
- temp1 temp0 temp0 ADDr
- temp0 BR
- NOP f rc-relative rel-word-pic-tail
-] JIT-WORD-JUMP jit-define
-
: jit-load-vm ( -- ) ;
: jit-load-context ( -- )
context-retainstack-offset ctx-reg rs-reg LDRuoff ;
[
- push-link-reg
! ! ctx-reg is preserved across the call because it is non-volatile
! ! in the C ABI
jit-save-context
! RAX 0 MOV f f rc-absolute-cell rel-dlsym
! RAX CALL
vm-reg arg1 MOVr
- 3 words temp0 LDRl
- temp0 BLR
- 3 words Br
- NOP NOP f f rc-absolute-cell rel-dlsym
+ f jit-call
jit-restore-context
- pop-link-reg
] JIT-PRIMITIVE jit-define
: jit-jump-quot ( -- )
! arg1 quot-entry-point-offset [+] JMP ;
- quot-entry-point-offset arg1 temp0 LDRpre
+ quot-entry-point-offset arg1 temp0 LDUR
temp0 BR ;
: jit-call-quot ( -- )
! arg1 quot-entry-point-offset [+] CALL ;
push-link-reg
- quot-entry-point-offset arg1 temp0 LDRpre
+ quot-entry-point-offset arg1 temp0 LDUR
temp0 BLR
pop-link-reg ;
vm-reg arg2 MOVr
! RAX 0 MOV rc-absolute-cell rel-inline-cache-miss
! RAX CALL
- 5 words temp0 LDRl
- push-link-reg
- temp0 BLR
- pop-link-reg
- 3 words Br
- NOP NOP rc-absolute-cell rel-inline-cache-miss
+ absolute-call nip rel-inline-cache-miss
jit-load-context
jit-restore-context ;
-[ jit-load-return-address jit-inline-cache-miss ]
-[
+[ jit-load-return-address jit-inline-cache-miss ] [
! RAX CALL
push-link-reg
temp0 BLR
pop-link-reg
-]
-[
+] [
! RAX JMP
temp0 BR
-]
-\ inline-cache-miss define-combinator-primitive
+] \ inline-cache-miss define-combinator-primitive
-[ jit-inline-cache-miss ]
-[
+[ jit-inline-cache-miss ] [
! RAX CALL
push-link-reg
temp0 BLR
pop-link-reg
-]
-[
+] [
! RAX JMP
temp0 BR
-]
-\ inline-cache-miss-tail define-combinator-primitive
-
-! Overflowing fixnum arithmetic
-: jit-overflow ( insn func -- )
- ! ds-reg 8 SUB
- jit-save-context
- ! arg1 ds-reg [] MOV
- ! arg2 ds-reg 8 [+] MOV
- load-arg1/2
- ! arg3 arg1 MOV
- ! [ [ arg3 arg2 ] dip call ] dip
- [ [ arg2 arg1 arg3 ] dip call ] dip
- ! ds-reg [] arg3 MOV
- push-down-arg3
- ! [ JNO ]
- [ VC B.cond ] [
- ! arg3 vm-reg MOV
- vm-reg arg3 MOVr
- jit-call
- ] jit-conditional ; inline
+] \ inline-cache-miss-tail define-combinator-primitive
! Contexts
: jit-switch-context ( reg -- )
[
! 0 [RIP+] EAX MOV rc-relative rel-safepoint
- 4 words temp0 LDRl32
- 4 words temp1 ADR
- temp1 temp0 W0 STRr32
- 2 words Br
- NOP rc-relative rel-safepoint
+ 3 words temp0 LDRl
+ 0 temp0 W0 STRuoff32
+ 3 words Br
+ NOP NOP rc-absolute-cell rel-safepoint
] JIT-SAFEPOINT jit-define
-! # All arm.64 subprimitives
-{
- ! ## Contexts
- { (set-context) [ jit-set-context ] }
- { (set-context-and-delete) [
- jit-delete-current-context
- jit-set-context
- ] }
- { (start-context) [ jit-start-context ] }
- { (start-context-and-delete) [ jit-start-context-and-delete ] }
-
- ! ## Entry points
- { c-to-factor [
- arg1 arg2 MOVr
- vm-reg "begin_callback" jit-call-1arg
-
- jit-call-quot
-
- vm-reg "end_callback" jit-call-1arg
- ] }
- { unwind-native-frames [
- ! ! unwind-native-frames is marked as "special" in
- ! ! vm/quotations.cpp so it does not have a standard prolog
- ! ! Unwind stack frames
- ! RSP arg2 MOV
- arg2 stack-reg MOVsp
- ! ! Load VM pointer into vm-reg, since we're entering from
- ! ! C code
- ! vm-reg 0 MOV 0 rc-absolute-cell rel-vm
- 2 words vm-reg LDRl
- 3 words Br
- NOP NOP 0 rc-absolute-cell rel-vm
- ! ! Load ds and rs registers
- jit-load-context
- jit-restore-context
- ! ! Clear the fault flag
- ! vm-reg vm-fault-flag-offset [+] 0 MOV
- vm-fault-flag-offset vm-reg XZR STRuoff
- ! ! Call quotation
- jit-jump-quot
- ] }
-
- ! ## Math
- { fixnum+ [ [ ADDr ] "overflow_fixnum_add" jit-overflow ] }
- { fixnum- [ [ SUBr ] "overflow_fixnum_subtract" jit-overflow ] }
- { fixnum* [
- ! ds-reg 8 SUB
- jit-save-context
- ! RCX ds-reg [] MOV
- ! RBX ds-reg 8 [+] MOV
- load1/0
- ! RBX tag-bits get SAR
- temp0 untag
- ! RAX RCX MOV
- ! RBX IMUL
- ! RAX * RBX = RDX:RAX
- temp1 temp0 temp0 MUL
- ! ds-reg [] RAX MOV
- 1 push-down0
- ! [ JNO ]
- [ VC B.cond ] [
- ! arg1 RCX MOV
- temp1 arg1 MOVr
- ! arg1 tag-bits get SAR
- temp1 untag
- ! arg2 RBX MOV
- temp0 arg2 MOVr
- ! arg3 vm-reg MOV
- vm-reg arg3 MOVr
- "overflow_fixnum_multiply" jit-call
- ] jit-conditional
- ] }
-
- ! ## Misc
- { fpu-state [
- ! RSP 2 SUB
- ! RSP [] FNSTCW
- ! FNINIT
- ! AX RSP [] MOV
- ! RSP 2 ADD
- FPSR XZR MSRr
- FPCR arg1 MRS
- ] }
- { set-fpu-state [
- ! RSP 2 SUB
- ! RSP [] arg1 16-bit-version-of MOV
- ! RSP [] FLDCW
- ! RSP 2 ADD
- FPCR arg1 MSRr
- ] }
- { set-callstack [
- ! ! Load callstack object
- ! arg4 ds-reg [] MOV
- ! ds-reg bootstrap-cell SUB
- pop0
- ! ! Get ctx->callstack_bottom
- jit-load-context
- ! arg1 ctx-reg context-callstack-bottom-offset [+] MOV
- context-callstack-bottom-offset ctx-reg arg1 LDRuoff
- ! ! Get top of callstack object -- 'src' for memcpy
- ! arg2 arg4 callstack-top-offset [+] LEA
- callstack-top-offset temp0 arg2 ADDr
- ! ! Get callstack length, in bytes --- 'len' for memcpy
- ! arg3 arg4 callstack-length-offset [+] MOV
- 2 temp0 temp0 SUBi ! callstack-length-offset
- 0 temp0 arg3 LDRuoff
- ! arg3 tag-bits get SHR
- tag-bits get arg3 arg3 LSRi
- ! ! Compute new stack pointer -- 'dst' for memcpy
- ! arg1 arg3 SUB
- arg3 arg1 arg1 SUBr
- ! ! Install new stack pointer
- ! RSP arg1 MOV
- arg1 stack-reg MOVsp
- ! ! Call memcpy; arguments are now in the correct registers
- ! ! Create register shadow area for Win64
- ! RSP 32 SUB
- 32 stack-reg stack-reg SUBi
- "factor_memcpy" jit-call
- ! ! Tear down register shadow area
- ! RSP 32 ADD
- 32 stack-reg stack-reg ADDi
- ! ! Return with new callstack
- ! 0 RET
- f RET
- ] }
-} define-sub-primitives
-
! C to Factor entry point
[
0xabcd BRK
push0
] JIT-PUSH-LITERAL jit-define
-: call-relative ( -- word class )
- push-link-reg
- 5 words temp0 LDRl32
- 5 words temp1 ADR
- temp1 temp0 temp0 ADDr
- temp0 BLR
- 2 words Br
- NOP f rc-relative
- pop-link-reg ;
-
-[
- ! 0 CALL f rc-relative rel-word-pic
- call-relative rel-word-pic
-] JIT-WORD-CALL jit-define
-
! The *-signal-handler subprimitives are special-cased in vm/quotations.cpp
! not to trigger generation of a stack frame, so they can
! peform their own prolog/epilog preserving registers.
\ f type-number temp0 CMPi
! ! jump to true branch if not equal
! 0 JNE f rc-relative rel-word
- 6 words EQ B.cond
- 4 words temp0 LDRl32
- 4 words temp1 ADR
- temp1 temp0 temp0 ADDr
- temp0 BR
- NOP f rc-relative rel-word
+ 5 words EQ B.cond
+ absolute-jump rel-word
! ! jump to false branch if equal
! 0 JMP f rc-relative rel-word
- 4 words temp0 LDRl32
- 4 words temp1 ADR
- temp1 temp0 temp0 ADDr
- temp0 BR
- NOP f rc-relative rel-word
+ absolute-jump rel-word
] JIT-IF jit-define
[
>r
! 0 CALL f rc-relative rel-word
- call-relative rel-word
+ absolute-call rel-word
r>
] JIT-DIP jit-define
[
>r >r
! 0 CALL f rc-relative rel-word
- call-relative rel-word
+ absolute-call rel-word
r> r>
] JIT-2DIP jit-define
[
>r >r >r
! 0 CALL f rc-relative rel-word
- call-relative rel-word
+ absolute-call rel-word
r> r> r>
] JIT-3DIP jit-define
! ! pop stack
! ds-reg bootstrap-cell SUB
pop0
-]
-[
+] [
! temp0 word-entry-point-offset [+] CALL
push-link-reg
temp0 BLR
pop-link-reg
-]
-[
+] [
! temp0 word-entry-point-offset [+] JMP
temp0 BR
-]
-\ (execute) define-combinator-primitive
+] \ (execute) define-combinator-primitive
[
! temp0 ds-reg [] MOV
[
! ! make room for LR plus magic number of callback, 16byte align
! x64 ! stack-reg stack-frame-size bootstrap-cell - SUB
- stack-frame-size bootstrap-cell 2 * + stack-reg stack-reg SUBi
- -16 SP link-reg stack-frame-reg STPpre
+ stack-frame-size stack-reg stack-reg SUBi
+ -16 SP link-reg STRpre
] JIT-PROLOG jit-define
[
! x64 ! stack-reg stack-frame-size bootstrap-cell - ADD
- ! -16 SP link-reg X29 LDPpre
- 16 SP link-reg stack-frame-reg LDPpost
- stack-frame-size bootstrap-cell 2 * + stack-reg stack-reg ADDi
+ 16 SP link-reg LDRpost
+ stack-frame-size stack-reg stack-reg ADDi
] JIT-EPILOG jit-define
[ f RET ] JIT-RETURN jit-define
[
! temp1/32 tag-mask get AND
- tag-mask get temp1 temp1 ANDi32
+ tag-mask get temp1 temp1 ANDi
] PIC-TAG jit-define
[
! temp0 temp1 MOV
temp1 temp0 MOVr
! temp1/32 tag-mask get AND
- tag-mask get temp1 temp1 ANDi32
+ tag-mask get temp1 temp1 ANDi
! temp1/32 tuple type-number CMP
tuple type-number temp1 CMPi
! [ JNE ]
! [ temp1 temp0 tuple-class-offset [+] MOV ]
[ NE B.cond ] [
- ! tuple-class-offset temp0 temp1 LDRuoff
- 1 temp0 temp0 ADDi
- 0 temp0 temp1 LDRuoff
+ tuple-class-offset temp0 temp1 LDUR
] jit-conditional
] PIC-TUPLE jit-define
[
! 0 JE f rc-relative rel-word
- 6 words NE B.cond
- 4 words temp3 LDRl32
- 4 words temp2 ADR
- temp3 temp2 temp2 ADDr
- temp2 BR
- NOP f rc-relative rel-word
+ 5 words NE B.cond
+ absolute-jump rel-word
] PIC-HIT jit-define
! ! ! Megamorphic caches
! temp0 temp1 MOV
temp1 temp0 MOVr
! temp1/32 tag-mask get AND
- tag-mask get temp1 temp1 ANDi32
+ tag-mask get temp1 temp1 ANDi
! temp1/32 tag-bits get SHL
temp1 tag
! temp1/32 tuple type-number tag-fixnum CMP
! [ JNE ]
! [ temp1 temp0 tuple-class-offset [+] MOV ]
[ NE B.cond ] [
- ! tuple-class-offset temp0 temp1 LDRuoff
- 1 temp0 temp0 ADDi
- 0 temp0 temp1 LDRuoff
+ tuple-class-offset temp0 temp1 LDUR
] jit-conditional
! ! cache = ...
! temp0 0 MOV f rc-absolute-cell rel-literal
3 words Br
NOP NOP rc-absolute-cell rel-megamorphic-cache-hits
! temp1 [] 1 ADD
- 0 temp1 temp2 LDRuoff
- 1 temp2 temp2 ADDi
- 0 temp1 temp2 STRuoff
+ 1 temp3 MOVwi
+ temp3 temp1 STADD
! ! goto get(cache + bootstrap-cell)
! temp0 temp0 bootstrap-cell [+] MOV
bootstrap-cell temp0 temp0 LDRuoff
1 push-down0 ;
! Math
+
+! Overflowing fixnum arithmetic
+: jit-overflow ( insn func -- )
+ ! ds-reg 8 SUB
+ jit-save-context
+ ! arg1 ds-reg [] MOV
+ ! arg2 ds-reg 8 [+] MOV
+ load-arg1/2
+ ! arg3 arg1 MOV
+ ! [ [ arg3 arg2 ] dip call ] dip
+ [ [ arg2 arg1 arg3 ] dip call ] dip
+ ! ds-reg [] arg3 MOV
+ push-down-arg3
+ ! [ JNO ]
+ [ VC B.cond ] [
+ ! arg3 vm-reg MOV
+ vm-reg arg3 MOVr
+ jit-call
+ ] jit-conditional ; inline
+
: jit-math ( insn -- )
! ! load second input
! temp0 ds-reg [] MOV
temp0 temp1 temp2 SDIV
temp1 temp0 temp2 temp0 MSUB ;
-! # Rest of arm64 subprimitives
+! # All arm.64 subprimitives
{
+ ! ## Contexts
+ { (set-context) [ jit-set-context ] }
+ { (set-context-and-delete) [
+ jit-delete-current-context
+ jit-set-context
+ ] }
+ { (start-context) [ jit-start-context ] }
+ { (start-context-and-delete) [ jit-start-context-and-delete ] }
+
+ ! ## Entry points
+ { c-to-factor [
+ arg1 arg2 MOVr
+ vm-reg "begin_callback" jit-call-1arg
+
+ jit-call-quot
+
+ vm-reg "end_callback" jit-call-1arg
+ ] }
+ { unwind-native-frames [
+ ! ! unwind-native-frames is marked as "special" in
+ ! ! vm/quotations.cpp so it does not have a standard prolog
+ ! ! Unwind stack frames
+ ! RSP arg2 MOV
+ arg2 stack-reg MOVsp
+ ! ! Load VM pointer into vm-reg, since we're entering from
+ ! ! C code
+ ! vm-reg 0 MOV 0 rc-absolute-cell rel-vm
+ 2 words vm-reg LDRl
+ 3 words Br
+ NOP NOP 0 rc-absolute-cell rel-vm
+ ! ! Load ds and rs registers
+ jit-load-context
+ jit-restore-context
+ ! ! Clear the fault flag
+ ! vm-reg vm-fault-flag-offset [+] 0 MOV
+ vm-fault-flag-offset vm-reg XZR STRuoff
+ ! ! Call quotation
+ jit-jump-quot
+ ] }
+
+ ! ## Math
+ { fixnum+ [ [ ADDr ] "overflow_fixnum_add" jit-overflow ] }
+ { fixnum- [ [ SUBr ] "overflow_fixnum_subtract" jit-overflow ] }
+ { fixnum* [
+ ! ds-reg 8 SUB
+ jit-save-context
+ ! RCX ds-reg [] MOV
+ ! RBX ds-reg 8 [+] MOV
+ load1/0
+ ! RBX tag-bits get SAR
+ temp0 untag
+ ! RAX RCX MOV
+ ! RBX IMUL
+ ! RAX * RBX = RDX:RAX
+ temp1 temp0 temp0 MUL
+ ! ds-reg [] RAX MOV
+ 1 push-down0
+ ! [ JNO ]
+ [ VC B.cond ] [
+ ! arg1 RCX MOV
+ temp1 arg1 MOVr
+ ! arg1 tag-bits get SAR
+ temp1 untag
+ ! arg2 RBX MOV
+ temp0 arg2 MOVr
+ ! arg3 vm-reg MOV
+ vm-reg arg3 MOVr
+ "overflow_fixnum_multiply" jit-call
+ ] jit-conditional
+ ] }
+
+ ! ## Misc
+ { fpu-state [
+ ! RSP 2 SUB
+ ! RSP [] FNSTCW
+ ! FNINIT
+ ! AX RSP [] MOV
+ ! RSP 2 ADD
+ FPSR XZR MSRr
+ FPCR arg1 MRS
+ ] }
+ { set-fpu-state [
+ ! RSP 2 SUB
+ ! RSP [] arg1 16-bit-version-of MOV
+ ! RSP [] FLDCW
+ ! RSP 2 ADD
+ FPCR arg1 MSRr
+ ] }
+ { set-callstack [
+ ! ! Load callstack object
+ ! arg4 ds-reg [] MOV
+ ! ds-reg bootstrap-cell SUB
+ pop0
+ ! ! Get ctx->callstack_bottom
+ jit-load-context
+ ! arg1 ctx-reg context-callstack-bottom-offset [+] MOV
+ context-callstack-bottom-offset ctx-reg arg1 LDRuoff
+ ! ! Get top of callstack object -- 'src' for memcpy
+ ! arg2 arg4 callstack-top-offset [+] LEA
+ callstack-top-offset temp0 arg2 ADDr
+ ! ! Get callstack length, in bytes --- 'len' for memcpy
+ ! arg3 arg4 callstack-length-offset [+] MOV
+ 2 temp0 temp0 SUBi ! callstack-length-offset
+ 0 temp0 arg3 LDRuoff
+ ! arg3 tag-bits get SHR
+ tag-bits get arg3 arg3 LSRi
+ ! ! Compute new stack pointer -- 'dst' for memcpy
+ ! arg1 arg3 SUB
+ arg3 arg1 arg1 SUBr
+ ! ! Install new stack pointer
+ ! RSP arg1 MOV
+ arg1 stack-reg MOVsp
+ ! ! Call memcpy; arguments are now in the correct registers
+ ! ! Create register shadow area for Win64
+ ! RSP 32 SUB
+ 32 stack-reg stack-reg SUBi
+ "factor_memcpy" jit-call
+ ! ! Tear down register shadow area
+ ! RSP 32 ADD
+ 32 stack-reg stack-reg ADDi
+ ! ! Return with new callstack
+ ! 0 RET
+ f RET
+ ] }
+
! ## Fixnums
! ### Add
{ fixnum+fast [ \ ADDr jit-math ] }
- ! ### Bit stuff
+ ! ### Bit manipulation
{ fixnum-bitand [ \ ANDr jit-math ] }
{ fixnum-bitnot [
! ! complement
! temp3 CL SAR
temp0 temp2 temp2 ASRr
! temp3 tag-mask get bitnot AND
- tag-mask get temp3 MOVwi
- temp3 temp2 temp3 BIC
+ tag-mask get bitnot temp2 temp2 ANDi
! ! if shift count was negative, move temp3 to temp2
! shift-arg 0 CMP
! temp2 temp3 CMOVGE
! ds-reg bootstrap-cell SUB
pop0
! ! turn local number into offset
- tagged>offset
+ tagged>offset0
! ! decrement retain stack pointer
! rs-reg temp0 SUB
temp0 rs-reg rs-reg SUBr
! temp0 ds-reg [] MOV
load0
! ! turn local number into offset
- tagged>offset
+ tagged>offset0
! ! load local value
! temp0 rs-reg temp0 [+] MOV
temp0 rs-reg temp0 LDRr
! temp1 ds-reg [] MOV
load1/0
! ! turn slot number into offset
- tagged>offset
+ tagged>offset0
! ! mask off tag
! temp1 tag-bits get SHR
- temp1 untag
! temp1 tag-bits get SHL
- temp1 tag
+ tag-mask get bitnot temp1 temp1 ANDi
! ! load slot value
! temp0 temp1 temp0 [+] MOV
temp1 temp0 temp0 LDRr
load0
! ! compute tag
! temp0/32 tag-mask get AND
- tag-mask get temp0 temp0 ANDi32
+ tag-mask get temp0 temp0 ANDi
! ! tag the tag
! temp0/32 tag-bits get SHL
temp0 tag
! ! ### Dups
{ dup [ load0 push0 ] }
{ 2dup [ load1/0 push1 push0 ] }
- { 3dup [ load3/2 load1/0 push2 push1 push0 ] }
+ { 3dup [ load2 load1/0 push2 push1 push0 ] }
{ 4dup [ load3/2 load1/0 push3 push2 push1 push0 ] }
{ dupd [ load1/0 store1 push0 ] }
! ! ### Misc shufflers
- { over [ load1/0 push1 ] }
- { pick [ load3/2 push2 ] }
+ { over [ load1 push1 ] }
+ { pick [ load2 push2 ] }
! ! ### Nips
{ nip [ load0 1 push-down0 ] }
! Pop the fake leaf frame along with our return address
! leaf-stack-frame-size bootstrap-cell - RET
leaf-stack-frame-size bootstrap-cell - SP SP ADDr
+ f RET
] }
{ signal-handler [
jit-signal-handler-prolog
math.bitwise ;
IN: cpu.arm.assembler.64
+: encode-bitmask ( imm64 -- Nimmrimms ) 64 (encode-bitmask) ;
+
: ADC ( Rm Rn Rd -- ) ADC64-encode ;
: ADCS ( Rm Rn Rd -- ) ADCS64-encode ;
-: ADDi ( imm12 Rn Rd -- )
- [ 12 prepare-split-imm 1 0 ? swap ] 2dip
- ADDi64-encode ;
-
+: ADDi ( imm12 Rn Rd -- ) [ split-imm ] 2dip ADDi64-encode ;
: ADDr ( Rm Rn Rd -- ) [ 0 0 ] 2dip ADDer64-encode ;
-: ANDi ( imm13 Rn Rd -- ) [ 13 bits ] 2dip ANDi64-encode ;
-: ANDi32 ( imm12 Rn Rd -- ) [ 12 bits ] 2dip ANDi32-encode ;
+: ANDi ( imm64 Rn Rd -- ) [ encode-bitmask ] 2dip ANDi64-encode ;
: ANDr ( Rm Rn Rd -- ) [ [ 0 ] dip 0 ] 2dip ANDsr64-encode ;
-: ASRi ( imm6 Rn Rd -- ) [ 6 ?bits ] 2dip ASRi64-encode ;
+: ASRi ( imm6 Rn Rd -- ) [ 6 ?ubits ] 2dip ASRi64-encode ;
: ASRr ( Rm Rn Rd -- ) ASRr64-encode ;
: BIC ( Rm Rn Rd -- ) [ [ 0 ] dip 0 ] 2dip BIC64-encode ;
-: CBNZ ( imm19 Rt -- ) [ 19 ?bits ] dip CBNZ64-encode ;
-
-: CMPi ( imm12 Rd -- )
- [ 12 prepare-split-imm 1 0 ? swap ] dip
- CMPi64-encode ;
+: CBNZ ( imm21 Rt -- ) [ 4 / 19 ?ubits ] dip CBNZ64-encode ;
+: CMPi ( imm12 Rd -- ) [ split-imm ] dip CMPi64-encode ;
: CMPr ( Rm Rn -- ) [ 3 0 ] dip CMPer64-encode ;
! cond4 is EQ NE CS HS CC LO MI PL VS VC HI LS GE LT GT LE AL NV
: CSET ( Rd cond4 -- ) swap CSET64-encode ;
: CSETM ( Rd cond4 -- ) swap CSETM64-encode ;
-: EORi ( imm13 Rn Rd -- ) [ 13 bits ] 2dip EORi64-encode ;
+: EORi ( imm64 Rn Rd -- ) [ encode-bitmask ] 2dip EORi64-encode ;
: EORr ( Rm Rn Rd -- ) [ [ 0 ] dip 0 ] 2dip EORsr64-encode ;
: FPCR ( -- op0 op1 CRn CRm op2 ) 3 3 4 4 0 ;
: FPSR ( -- op0 op1 CRn CRm op2 ) 3 3 4 4 1 ;
-: LDPpre ( imm10 Rn Rt2 Rt -- )
- [ 8 / 7 bits ] 3dip swapd LDPpre64-encode ;
+: LDPpost ( imm10 Rn Rt2 Rt -- ) [ 8 / 7 ?sbits ] 3dip swapd LDPpost64-encode ;
+: LDPpre ( imm10 Rn Rt2 Rt -- ) [ 8 / 7 ?sbits ] 3dip swapd LDPpre64-encode ;
+: LDPsoff ( imm10 Rn Rt2 Rt -- ) [ 8 / 7 ?sbits ] 3dip swapd LDPsoff64-encode ;
-: LDPpost ( imm10 Rn Rt2 Rt -- )
- [ 8 / 7 bits ] 3dip swapd LDPpost64-encode ;
-
-: LDPsoff ( imm10 Rn Rt2 Rt -- )
- [ 8 / 7 bits ] 3dip swapd LDPsoff64-encode ;
-
-: LDRpre ( imm9 Rn Rt -- ) [ 9 bits ] 2dip LDRpre64-encode ;
-: LDRpost ( imm9 Rn Rt -- ) [ 9 bits ] 2dip LDRpost64-encode ;
-: LDRuoff ( imm15 Rn Rt -- ) [ 8 / 12 ?bits ] 2dip LDRuoff64-encode ;
-: LDRl ( imm21 Rt -- ) [ 4 / 19 bits ] dip LDRl64-encode ;
-: LDRl32 ( imm21 Rt -- ) [ 4 / 19 bits ] dip LDRl32-encode ;
+: LDRl ( imm21 Rt -- ) [ 4 / 19 ?sbits ] dip LDRl64-encode ;
+: LDRpost ( imm12 Rn Rt -- ) [ 8 / 9 ?sbits ] 2dip LDRpost64-encode ;
+: LDRpre ( imm12 Rn Rt -- ) [ 8 / 9 ?sbits ] 2dip LDRpre64-encode ;
: LDRr ( Rm Rn Rt -- ) [ 3 0 ] 2dip LDRr64-encode ;
+: LDRuoff ( imm15 Rn Rt -- ) [ 8 / 12 ?ubits ] 2dip LDRuoff64-encode ;
: LDRBr ( Rm Rn Rt -- ) [ 0 ] 2dip LDRBsr-encode ;
+: LDRBuoff ( imm12 Rn Rt -- ) [ 12 ?ubits ] 2dip LDRBuoff-encode ;
-: LDRBuoff ( imm12 Rn Rt -- )
- [ 12 ?bits ] 2dip LDRBuoff-encode ;
+: LDRHuoff ( imm13 Rn Rt -- ) [ 2 / 12 ?ubits ] 2dip LDRHuoff-encode ;
-: LDRHuoff ( imm14 Rn Rt -- )
- [ 2 / 12 ?bits ] 2dip LDRHuoff-encode ;
+: LDUR ( imm9 Rn Rt -- ) [ 9 ?sbits ] 2dip LDUR64-encode ;
-: LSLi ( imm6 Rn Rd -- ) [ 6 ?bits ] 2dip LSLi64-encode ;
+: LSLi ( imm6 Rn Rd -- ) [ 6 ?ubits ] 2dip LSLi64-encode ;
: LSLr ( Rm Rn Rd -- ) LSLr64-encode ;
-: LSRi ( imm6 Rn Rd -- ) [ 6 ?bits ] 2dip LSRi64-encode ;
+: LSRi ( imm6 Rn Rd -- ) [ 6 ?ubits ] 2dip LSRi64-encode ;
-: MOVwi ( imm Rt -- ) [ 0 ] 2dip MOVwi64-encode ;
: MOVr ( Rn Rd -- ) MOVr64-encode ;
: MOVsp ( Rn Rd -- ) [ 0 ] 2dip MOVsp64-encode ;
+: MOVwi ( imm Rt -- ) [ 0 ] 2dip MOVwi64-encode ;
: MRS ( op0 op1 CRn CRm op2 Rt -- ) MRS-encode ;
: SDIV ( Rm Rn Rd -- ) SDIV64-encode ;
-: STPpre ( imm10 Rn Rt2 Rt -- )
- [ 8 / 7 bits ] 3dip swapd STPpre64-encode ;
-
-: STPpost ( imm10 Rn Rt2 Rt -- )
- [ 8 / 7 bits ] 3dip swapd STPpost64-encode ;
-
-: STPsoff ( imm10 Rn Rt2 Rt -- )
- [ 8 / 7 bits ] 3dip swapd STPsoff64-encode ;
+: STADD ( Rs Rn -- ) STADD64-encode ;
-: STRpre ( imm9 Rn Rt -- ) [ 9 bits ] 2dip STRpre64-encode ;
-
-: STRpost ( imm9 Rn Rt -- ) [ 9 bits ] 2dip STRpost64-encode ;
+: STPpost ( imm10 Rn Rt2 Rt -- ) [ 8 / 7 ?sbits ] 3dip swapd STPpost64-encode ;
+: STPpre ( imm10 Rn Rt2 Rt -- ) [ 8 / 7 ?sbits ] 3dip swapd STPpre64-encode ;
+: STPsoff ( imm10 Rn Rt2 Rt -- ) [ 8 / 7 ?sbits ] 3dip swapd STPsoff64-encode ;
+: STRpre ( imm12 Rn Rt -- ) [ 8 / 9 ?sbits ] 2dip STRpre64-encode ;
+: STRpost ( imm12 Rn Rt -- ) [ 8 / 9 ?sbits ] 2dip STRpost64-encode ;
: STRr ( Rm Rn Rt -- ) [ 3 0 ] 2dip STRr64-encode ;
-: STRr32 ( Rm Rn Rt -- ) [ 3 0 ] 2dip STRr32-encode ;
-
-: STRuoff ( imm15 Rn Rt -- )
- [ 8 / 12 ?bits ] 2dip STRuoff64-encode ;
-
-: SUBi ( imm12 Rn Rd -- )
- [ 12 prepare-split-imm 1 0 ? swap ] 2dip
- SUBi64-encode ;
+: STRuoff ( imm15 Rn Rt -- ) [ 8 / 12 ?ubits ] 2dip STRuoff64-encode ;
+: SUBi ( imm12 Rn Rd -- ) [ split-imm ] 2dip SUBi64-encode ;
: SUBr ( Rm Rn Rd -- ) [ 3 0 ] 2dip SUBer64-encode ;
-: TSTi ( imm13 Rn -- ) [ 13 bits ] dip TSTi64-encode ;
+: TSTi ( imm64 Rn -- ) [ encode-bitmask ] dip TSTi64-encode ;
! Copyright (C) 2020 Doug Coleman.
! See https://factorcode.org/license.txt for BSD license.
-USING: accessors combinators cpu.arm.assembler.opcodes
-kernel make math math.bitwise namespaces sequences ;
+USING: combinators cpu.arm.assembler.opcodes grouping kernel
+math math.bitwise math.parser sequences ;
IN: cpu.arm.assembler
! pre-index mode: computed addres is the base-register + offset
! in both modes, the base-register is updated
ERROR: arm64-encoding-imm original n-bits-requested truncated ;
-: ?bits ( x n -- x ) 2dup bits dup reach = [ 2drop ] [ arm64-encoding-imm ] if ; inline
+: ?ubits ( x n -- x )
+ 2dup bits dup reach =
+ [ 2drop ] [ arm64-encoding-imm ] if ; inline
-! : ip ( -- address ) arm64-assembler get ip>> ;
+: ?sbits ( x n -- x )
+ 2dup >signed dup reach =
+ [ drop bits ] [ arm64-encoding-imm ] if ; inline
! Some instructions allow an immediate literal of n bits
! or n bits shifted. This means there are invalid immediate
! values, e.g. imm12 of 1, 4096, but not 4097
ERROR: imm-out-of-range imm n ;
-: imm-lower? ( imm n -- ? )
- on-bits unmask 0 > not ;
+: imm-lower? ( imm n -- ? ) on-bits unmask 0 > not ;
: imm-upper? ( imm n -- ? )
[ on-bits ] [ shift ] bi unmask 0 > not ;
-: prepare-split-imm ( imm n -- imm upper? )
+: (split-imm) ( imm n -- imm upper? )
{
{ [ 2dup imm-lower? ] [ drop f ] }
{ [ 2dup imm-upper? ] [ drop t ] }
[ imm-out-of-range ]
} cond ;
-: ADR ( imm21 Rd -- )
- [ [ 2 bits ] [ -2 shift 19 ?bits ] bi ] dip ADR-encode ;
+: split-imm ( imm -- shift imm' ) 12 (split-imm) 1 0 ? swap ;
-: ADRP ( imm21 Rd -- )
- [ [ 2 bits ] [ -2 shift 19 ?bits ] bi ] dip ADRP-encode ;
+ERROR: illegal-bitmask-immediate n ;
+: ?bitmask ( imm imm-size -- imm )
+ dupd on-bits 0 [ = ] bi-curry@ bi or
+ [ dup illegal-bitmask-immediate ] when ;
+
+: element-size ( imm imm-size -- imm element-size )
+ [ 2dup 2/ [ neg shift ] 2keep '[ _ on-bits bitand ] same? ]
+ [ 2/ ] while ;
+
+: bit-transitions ( imm element-size -- seq )
+ [ >bin ] dip CHAR: 0 pad-head 2 circular-clump ;
+
+ERROR: illegal-bitmask-element n ;
+: ?element ( imm element-size -- element )
+ [ bits ] keep dupd bit-transitions
+ [ first2 = not ] count 2 =
+ [ dup illegal-bitmask-element ] unless ;
+
+: >Nimms ( element element-size -- N imms )
+ [ bit-count 1 - ] [ log2 1 + ] bi*
+ 7 [ on-bits ] bi@ bitxor bitor
+ 6 toggle-bit [ -6 shift ] [ 6 bits ] bi ;
+
+: >immr ( element element-size -- immr )
+ [ bit-transitions "10" swap index 1 + ] keep mod ;
+
+: (encode-bitmask) ( imm imm-size -- (N)immrimms )
+ [ bits ] [ ?bitmask ] [ element-size ] tri
+ [ ?element ] keep [ >Nimms ] [ >immr ] 2bi
+ { 12 0 6 } bitfield* ;
+
+: ADR ( imm21 Rd -- ) [ [ 2 bits ] [ -2 shift 19 ?sbits ] bi ] dip ADR-encode ;
+
+: ADRP ( imm21 Rd -- ) [ 4096 / [ 2 bits ] [ -2 shift 19 ?sbits ] bi ] dip ADRP-encode ;
: RET ( register/f -- ) X30 or RET-encode ;
-: SVC ( imm16 -- ) 16 ?bits SVC-encode ;
+: SVC ( imm16 -- ) 16 ?ubits SVC-encode ;
-: BRK ( imm16 -- ) 16 ?bits BRK-encode ;
-: HLT ( imm16 -- ) 16 ?bits HLT-encode ;
+: BRK ( imm16 -- ) 16 ?ubits BRK-encode ;
+: HLT ( imm16 -- ) 16 ?ubits HLT-encode ;
! B but that is breakpoint
-: Br ( imm26 -- ) 4 / 26 bits B-encode ;
-: B.cond ( imm19 cond4 -- ) [ 4 / 19 bits ] dip B.cond-encode ;
-: BL ( imm26 -- ) 4 / 26 bits BL-encode ;
+: Br ( imm28 -- ) 4 / 26 ?sbits B-encode ;
+: B.cond ( imm21 cond4 -- ) [ 4 / 19 ?sbits ] dip B.cond-encode ;
+: BL ( imm28 -- ) 4 / 26 ?sbits BL-encode ;
: BR ( Rn -- ) BR-encode ;
: BLR ( Rn -- ) BLR-encode ;