M: x86 %fixnum-mul ( label dst src1 src2 -- )
int-rep two-operand swap IMUL2 JO ;
-M: x86 %add-float double-rep two-operand ADDSD ;
-M: x86 %sub-float double-rep two-operand SUBSD ;
-M: x86 %mul-float double-rep two-operand MULSD ;
-M: x86 %div-float double-rep two-operand DIVSD ;
-M: x86 %min-float double-rep two-operand MINSD ;
-M: x86 %max-float double-rep two-operand MAXSD ;
-M: x86 %sqrt SQRTSD ;
-
-M: x86 %single>double-float CVTSS2SD ;
-M: x86 %double>single-float CVTSD2SS ;
-
-M: x86 %integer>float CVTSI2SD ;
-M: x86 %float>integer CVTTSD2SI ;
-
-M: x86 %unbox-float ( dst src -- )
- float-offset [+] MOVSD ;
-
-M:: x86 %box-float ( dst src temp -- )
- dst 16 float temp %allot
- dst float-offset [+] src MOVSD ;
-
-M:: x86 %box-vector ( dst src rep temp -- )
- dst rep rep-size 2 cells + byte-array temp %allot
- 16 tag-fixnum dst 1 byte-array tag-number %set-slot-imm
- dst byte-array-offset [+]
- src rep %copy ;
+M: x86 %unbox-alien ( dst src -- )
+ alien-offset [+] MOV ;
-M:: x86 %unbox-vector ( dst src rep -- )
- dst src byte-array-offset [+]
- rep %copy ;
+M:: x86 %unbox-any-c-ptr ( dst src temp -- )
+ [
+ { "is-byte-array" "end" "start" } [ define-label ] each
+ dst 0 MOV
+ temp src MOV
+ ! We come back here with displaced aliens
+ "start" resolve-label
+ ! Is the object f?
+ temp \ f tag-number CMP
+ "end" get JE
+ ! Is the object an alien?
+ temp header-offset [+] alien type-number tag-fixnum CMP
+ "is-byte-array" get JNE
+ ! If so, load the offset and add it to the address
+ dst temp alien-offset [+] ADD
+ ! Now recurse on the underlying alien
+ temp temp underlying-alien-offset [+] MOV
+ "start" get JMP
+ "is-byte-array" resolve-label
+ ! Add byte array address to address being computed
+ dst temp ADD
+ ! Add an offset to start of byte array's data
+ dst byte-array-offset ADD
+ "end" resolve-label
+ ] with-scope ;
-MACRO: available-reps ( alist -- )
- ! Each SSE version adds new representations and supports
- ! all old ones
- unzip { } [ append ] accumulate rest swap suffix
- [ [ 1quotation ] map ] bi@ zip
- reverse [ { } ] suffix
- '[ _ cond ] ;
+: alien@ ( reg n -- op ) cells alien tag-number - [+] ;
-M: x86 %broadcast-vector ( dst src rep -- )
- {
- { float-4-rep [ [ float-4-rep %copy ] [ drop dup 0 SHUFPS ] 2bi ] }
- { double-2-rep [ [ double-2-rep %copy ] [ drop dup UNPCKLPD ] 2bi ] }
- } case ;
+:: %allot-alien ( dst displacement base temp -- )
+ dst 4 cells alien temp %allot
+ dst 1 alien@ base MOV ! alien
+ dst 2 alien@ \ f tag-number MOV ! expired
+ dst 3 alien@ displacement MOV ! displacement
+ ;
-M: x86 %broadcast-vector-reps
- {
- ! Can't do this with sse1 since it will want to unbox
- ! a double-precision float and convert to single precision
- { sse2? { float-4-rep double-2-rep } }
- } available-reps ;
+M:: x86 %box-alien ( dst src temp -- )
+ [
+ "end" define-label
+ dst \ f tag-number MOV
+ src 0 CMP
+ "end" get JE
+ dst src \ f tag-number temp %allot-alien
+ "end" resolve-label
+ ] with-scope ;
-M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
- rep {
- {
- float-4-rep
- [
- dst src1 float-4-rep %copy
- dst src2 UNPCKLPS
- src3 src4 UNPCKLPS
- dst src3 MOVLHPS
- ]
- }
- } case ;
+M:: x86 %box-displaced-alien ( dst displacement base displacement' base' base-class -- )
+ [
+ "end" define-label
+ "ok" define-label
+ ! If displacement is zero, return the base
+ dst base MOV
+ displacement 0 CMP
+ "end" get JE
+ ! Quickly use displacement' before its needed for real, as allot temporary
+ dst 4 cells alien displacement' %allot
+ ! If base is already a displaced alien, unpack it
+ base' base MOV
+ displacement' displacement MOV
+ base \ f tag-number CMP
+ "ok" get JE
+ base header-offset [+] alien type-number tag-fixnum CMP
+ "ok" get JNE
+ ! displacement += base.displacement
+ displacement' base 3 alien@ ADD
+ ! base = base.base
+ base' base 1 alien@ MOV
+ "ok" resolve-label
+ dst 1 alien@ base' MOV ! alien
+ dst 2 alien@ \ f tag-number MOV ! expired
+ dst 3 alien@ displacement' MOV ! displacement
+ "end" resolve-label
+ ] with-scope ;
-M: x86 %gather-vector-4-reps
- {
- ! Can't do this with sse1 since it will want to unbox
- ! double-precision floats and convert to single precision
- { sse2? { float-4-rep } }
- } available-reps ;
+! The 'small-reg' mess is pretty crappy, but its only used on x86-32.
+! On x86-64, all registers have 8-bit versions. However, a similar
+! problem arises for shifts, where the shift count must be in CL, and
+! so one day I will fix this properly by adding precoloring to the
+! register allocator.
-M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
- rep {
- {
- double-2-rep
- [
- dst src1 double-2-rep %copy
- dst src2 UNPCKLPD
- ]
- }
- } case ;
+HOOK: has-small-reg? cpu ( reg size -- ? )
-M: x86 %gather-vector-2-reps
- {
- { sse2? { double-2-rep } }
- } available-reps ;
+CONSTANT: have-byte-regs { EAX ECX EDX EBX }
-M: x86 %add-vector ( dst src1 src2 rep -- )
- [ two-operand ] keep
+M: x86.32 has-small-reg?
{
- { float-4-rep [ ADDPS ] }
- { double-2-rep [ ADDPD ] }
- { char-16-rep [ PADDB ] }
- { uchar-16-rep [ PADDB ] }
- { short-8-rep [ PADDW ] }
- { ushort-8-rep [ PADDW ] }
- { int-4-rep [ PADDD ] }
- { uint-4-rep [ PADDD ] }
- { longlong-2-rep [ PADDQ ] }
- { ulonglong-2-rep [ PADDQ ] }
+ { 8 [ have-byte-regs memq? ] }
+ { 16 [ drop t ] }
+ { 32 [ drop t ] }
} case ;
-M: x86 %add-vector-reps
- {
- { sse? { float-4-rep } }
- { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
- } available-reps ;
+M: x86.64 has-small-reg? 2drop t ;
-M: x86 %saturated-add-vector ( dst src1 src2 rep -- )
- [ two-operand ] keep
- {
- { char-16-rep [ PADDSB ] }
- { uchar-16-rep [ PADDUSB ] }
- { short-8-rep [ PADDSW ] }
- { ushort-8-rep [ PADDUSW ] }
- } case ;
+: small-reg-that-isn't ( exclude -- reg' )
+ [ have-byte-regs ] dip
+ [ native-version-of ] map
+ '[ _ memq? not ] find nip ;
-M: x86 %saturated-add-vector-reps
- {
- { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
- } available-reps ;
+: with-save/restore ( reg quot -- )
+ [ drop PUSH ] [ call ] [ drop POP ] 2tri ; inline
-M: x86 %add-sub-vector ( dst src1 src2 rep -- )
- [ two-operand ] keep
- {
- { float-4-rep [ ADDSUBPS ] }
- { double-2-rep [ ADDSUBPD ] }
- } case ;
+:: with-small-register ( dst exclude size quot: ( new-dst -- ) -- )
+ ! If the destination register overlaps a small register with
+ ! 'size' bits, we call the quot with that. Otherwise, we find a
+ ! small register that is not in exclude, and call quot, saving and
+ ! restoring the small register.
+ dst size has-small-reg? [ dst quot call ] [
+ exclude small-reg-that-isn't
+ [ quot call ] with-save/restore
+ ] if ; inline
-M: x86 %add-sub-vector-reps
- {
- { sse3? { float-4-rep double-2-rep } }
- } available-reps ;
+M:: x86 %string-nth ( dst src index temp -- )
+ ! We request a small-reg of size 8 since those of size 16 are
+ ! a superset.
+ "end" define-label
+ dst { src index temp } 8 [| new-dst |
+ ! Load the least significant 7 bits into new-dst.
+ ! 8th bit indicates whether we have to load from
+ ! the aux vector or not.
+ temp src index [+] LEA
+ new-dst 8-bit-version-of temp string-offset [+] MOV
+ new-dst new-dst 8-bit-version-of MOVZX
+ ! Do we have to look at the aux vector?
+ new-dst HEX: 80 CMP
+ "end" get JL
+ ! Yes, this is a non-ASCII character. Load aux vector
+ temp src string-aux-offset [+] MOV
+ new-dst temp XCHG
+ ! Compute index
+ new-dst index ADD
+ new-dst index ADD
+ ! Load high 16 bits
+ new-dst 16-bit-version-of new-dst byte-array-offset [+] MOV
+ new-dst new-dst 16-bit-version-of MOVZX
+ new-dst 7 SHL
+ ! Compute code point
+ new-dst temp XOR
+ "end" resolve-label
+ dst new-dst int-rep %copy
+ ] with-small-register ;
-M: x86 %sub-vector ( dst src1 src2 rep -- )
- [ two-operand ] keep
- {
- { float-4-rep [ SUBPS ] }
- { double-2-rep [ SUBPD ] }
- { char-16-rep [ PSUBB ] }
- { uchar-16-rep [ PSUBB ] }
- { short-8-rep [ PSUBW ] }
- { ushort-8-rep [ PSUBW ] }
- { int-4-rep [ PSUBD ] }
- { uint-4-rep [ PSUBD ] }
- { longlong-2-rep [ PSUBQ ] }
- { ulonglong-2-rep [ PSUBQ ] }
- } case ;
+M:: x86 %set-string-nth-fast ( ch str index temp -- )
+ ch { index str temp } 8 [| new-ch |
+ new-ch ch int-rep %copy
+ temp str index [+] LEA
+ temp string-offset [+] new-ch 8-bit-version-of MOV
+ ] with-small-register ;
-M: x86 %sub-vector-reps
- {
- { sse? { float-4-rep } }
- { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
- } available-reps ;
+:: %alien-integer-getter ( dst src size quot -- )
+ dst { src } size [| new-dst |
+ new-dst dup size n-bit-version-of dup src [] MOV
+ quot call
+ dst new-dst int-rep %copy
+ ] with-small-register ; inline
-M: x86 %saturated-sub-vector ( dst src1 src2 rep -- )
- [ two-operand ] keep
- {
- { char-16-rep [ PSUBSB ] }
- { uchar-16-rep [ PSUBUSB ] }
- { short-8-rep [ PSUBSW ] }
- { ushort-8-rep [ PSUBUSW ] }
- } case ;
+: %alien-unsigned-getter ( dst src size -- )
+ [ MOVZX ] %alien-integer-getter ; inline
-M: x86 %saturated-sub-vector-reps
- {
- { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
- } available-reps ;
+M: x86 %alien-unsigned-1 8 %alien-unsigned-getter ;
+M: x86 %alien-unsigned-2 16 %alien-unsigned-getter ;
+M: x86 %alien-unsigned-4 32 [ 2drop ] %alien-integer-getter ;
-M: x86 %mul-vector ( dst src1 src2 rep -- )
- [ two-operand ] keep
- {
- { float-4-rep [ MULPS ] }
- { double-2-rep [ MULPD ] }
- { short-8-rep [ PMULLW ] }
- { ushort-8-rep [ PMULLW ] }
- { int-4-rep [ PMULLD ] }
- { uint-4-rep [ PMULLD ] }
- } case ;
+: %alien-signed-getter ( dst src size -- )
+ [ MOVSX ] %alien-integer-getter ; inline
-M: x86 %mul-vector-reps
- {
- { sse? { float-4-rep } }
- { sse2? { double-2-rep short-8-rep ushort-8-rep } }
- { sse4.1? { int-4-rep uint-4-rep } }
- } available-reps ;
+M: x86 %alien-signed-1 8 %alien-signed-getter ;
+M: x86 %alien-signed-2 16 %alien-signed-getter ;
+M: x86 %alien-signed-4 32 %alien-signed-getter ;
-M: x86 %saturated-mul-vector-reps
- ! No multiplication with saturation on x86
- { } ;
+M: x86 %alien-cell [] MOV ;
+M: x86 %alien-float [] MOVSS ;
+M: x86 %alien-double [] MOVSD ;
+M: x86 %alien-vector [ [] ] dip %copy ;
-M: x86 %div-vector ( dst src1 src2 rep -- )
- [ two-operand ] keep
- {
- { float-4-rep [ DIVPS ] }
- { double-2-rep [ DIVPD ] }
- } case ;
+:: %alien-integer-setter ( ptr value size -- )
+ value { ptr } size [| new-value |
+ new-value value int-rep %copy
+ ptr [] new-value size n-bit-version-of MOV
+ ] with-small-register ; inline
-M: x86 %div-vector-reps
- {
- { sse? { float-4-rep } }
- { sse2? { double-2-rep } }
- } available-reps ;
+M: x86 %set-alien-integer-1 8 %alien-integer-setter ;
+M: x86 %set-alien-integer-2 16 %alien-integer-setter ;
+M: x86 %set-alien-integer-4 32 %alien-integer-setter ;
+M: x86 %set-alien-cell [ [] ] dip MOV ;
+M: x86 %set-alien-float [ [] ] dip MOVSS ;
+M: x86 %set-alien-double [ [] ] dip MOVSD ;
+M: x86 %set-alien-vector [ [] ] 2dip %copy ;
-M: x86 %min-vector ( dst src1 src2 rep -- )
- [ two-operand ] keep
- {
- { char-16-rep [ PMINSB ] }
- { uchar-16-rep [ PMINUB ] }
- { short-8-rep [ PMINSW ] }
- { ushort-8-rep [ PMINUW ] }
- { int-4-rep [ PMINSD ] }
- { uint-4-rep [ PMINUD ] }
- { float-4-rep [ MINPS ] }
- { double-2-rep [ MINPD ] }
- } case ;
+: shift-count? ( reg -- ? ) { ECX RCX } memq? ;
-M: x86 %min-vector-reps
- {
- { sse? { float-4-rep } }
- { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
- { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
- } available-reps ;
+:: emit-shift ( dst src quot -- )
+ src shift-count? [
+ dst CL quot call
+ ] [
+ dst shift-count? [
+ dst src XCHG
+ src CL quot call
+ dst src XCHG
+ ] [
+ ECX native-version-of [
+ CL src MOV
+ drop dst CL quot call
+ ] with-save/restore
+ ] if
+ ] if ; inline
-M: x86 %max-vector ( dst src1 src2 rep -- )
- [ two-operand ] keep
- {
- { char-16-rep [ PMAXSB ] }
- { uchar-16-rep [ PMAXUB ] }
- { short-8-rep [ PMAXSW ] }
- { ushort-8-rep [ PMAXUW ] }
- { int-4-rep [ PMAXSD ] }
- { uint-4-rep [ PMAXUD ] }
- { float-4-rep [ MAXPS ] }
- { double-2-rep [ MAXPD ] }
- } case ;
+M: x86 %shl int-rep two-operand [ SHL ] emit-shift ;
+M: x86 %shr int-rep two-operand [ SHR ] emit-shift ;
+M: x86 %sar int-rep two-operand [ SAR ] emit-shift ;
-M: x86 %max-vector-reps
- {
- { sse? { float-4-rep } }
- { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
- { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
- } available-reps ;
+M: x86 %vm-field-ptr ( dst field -- )
+ [ drop 0 MOV rc-absolute-cell rt-vm rel-fixup ]
+ [ vm-field-offset ADD ] 2bi ;
-M: x86 %horizontal-add-vector ( dst src rep -- )
- {
- { float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] }
- { double-2-rep [ [ double-2-rep %copy ] [ HADDPD ] 2bi ] }
- } case ;
+: load-zone-ptr ( reg -- )
+ #! Load pointer to start of zone array
+ "nursery" %vm-field-ptr ;
-M: x86 %horizontal-add-vector-reps
- {
- { sse3? { float-4-rep double-2-rep } }
- } available-reps ;
+: load-allot-ptr ( nursery-ptr allot-ptr -- )
+ [ drop load-zone-ptr ] [ swap cell [+] MOV ] 2bi ;
-M: x86 %horizontal-shl-vector ( dst src1 src2 rep -- )
- two-operand PSLLDQ ;
+: inc-allot-ptr ( nursery-ptr n -- )
+ [ cell [+] ] dip 8 align ADD ;
-M: x86 %horizontal-shl-vector-reps
- {
- { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
- } available-reps ;
+: store-header ( temp class -- )
+ [ [] ] [ type-number tag-fixnum ] bi* MOV ;
-M: x86 %horizontal-shr-vector ( dst src1 src2 rep -- )
- two-operand PSRLDQ ;
+: store-tagged ( dst tag -- )
+ tag-number OR ;
-M: x86 %horizontal-shr-vector-reps
- {
- { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
- } available-reps ;
+M:: x86 %allot ( dst size class nursery-ptr -- )
+ nursery-ptr dst load-allot-ptr
+ dst class store-header
+ dst class store-tagged
+ nursery-ptr size inc-allot-ptr ;
-M: x86 %abs-vector ( dst src rep -- )
- {
- { char-16-rep [ PABSB ] }
- { short-8-rep [ PABSW ] }
- { int-4-rep [ PABSD ] }
- } case ;
+M:: x86 %write-barrier ( src card# table -- )
+ #! Mark the card pointed to by vreg.
+ ! Mark the card
+ card# src MOV
+ card# card-bits SHR
+ table "cards_offset" %vm-field-ptr
+ table table [] MOV
+ table card# [+] card-mark <byte> MOV
-M: x86 %abs-vector-reps
- {
- { ssse3? { char-16-rep short-8-rep int-4-rep } }
- } available-reps ;
+ ! Mark the card deck
+ card# deck-bits card-bits - SHR
+ table "decks_offset" %vm-field-ptr
+ table table [] MOV
+ table card# [+] card-mark <byte> MOV ;
-M: x86 %sqrt-vector ( dst src rep -- )
- {
- { float-4-rep [ SQRTPS ] }
- { double-2-rep [ SQRTPD ] }
- } case ;
+M:: x86 %check-nursery ( label temp1 temp2 -- )
+ temp1 load-zone-ptr
+ temp2 temp1 cell [+] MOV
+ temp2 1024 ADD
+ temp1 temp1 3 cells [+] MOV
+ temp2 temp1 CMP
+ label JLE ;
-M: x86 %sqrt-vector-reps
- {
- { sse? { float-4-rep } }
- { sse2? { double-2-rep } }
- } available-reps ;
+M: x86 %save-gc-root ( gc-root register -- ) [ gc-root@ ] dip MOV ;
-M: x86 %and-vector ( dst src1 src2 rep -- )
- [ two-operand ] keep
- {
- { float-4-rep [ ANDPS ] }
- { double-2-rep [ ANDPD ] }
- [ drop PAND ]
- } case ;
+M: x86 %load-gc-root ( gc-root register -- ) swap gc-root@ MOV ;
-M: x86 %and-vector-reps
- {
- { sse? { float-4-rep } }
- { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
- } available-reps ;
+M: x86 %alien-global ( dst symbol library -- )
+ [ 0 MOV ] 2dip rc-absolute-cell rel-dlsym ;
-M: x86 %andn-vector ( dst src1 src2 rep -- )
- [ two-operand ] keep
- {
- { float-4-rep [ ANDNPS ] }
- { double-2-rep [ ANDNPD ] }
- [ drop PANDN ]
- } case ;
+M: x86 %epilogue ( n -- ) cell - incr-stack-reg ;
-M: x86 %andn-vector-reps
- {
- { sse? { float-4-rep } }
- { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
- } available-reps ;
+:: %boolean ( dst temp word -- )
+ dst \ f tag-number MOV
+ temp 0 MOV \ t rc-absolute-cell rel-immediate
+ dst temp word execute ; inline
-M: x86 %or-vector ( dst src1 src2 rep -- )
- [ two-operand ] keep
- {
- { float-4-rep [ ORPS ] }
- { double-2-rep [ ORPD ] }
- [ drop POR ]
+M:: x86 %compare ( dst src1 src2 cc temp -- )
+ src1 src2 CMP
+ cc order-cc {
+ { cc< [ dst temp \ CMOVL %boolean ] }
+ { cc<= [ dst temp \ CMOVLE %boolean ] }
+ { cc> [ dst temp \ CMOVG %boolean ] }
+ { cc>= [ dst temp \ CMOVGE %boolean ] }
+ { cc= [ dst temp \ CMOVE %boolean ] }
+ { cc/= [ dst temp \ CMOVNE %boolean ] }
} case ;
-M: x86 %or-vector-reps
- {
- { sse? { float-4-rep } }
- { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
- } available-reps ;
+M: x86 %compare-imm ( dst src1 src2 cc temp -- )
+ %compare ;
-M: x86 %xor-vector ( dst src1 src2 rep -- )
- [ two-operand ] keep
- {
- { float-4-rep [ XORPS ] }
- { double-2-rep [ XORPD ] }
- [ drop PXOR ]
+M:: x86 %compare-branch ( label src1 src2 cc -- )
+ src1 src2 CMP
+ cc order-cc {
+ { cc< [ label JL ] }
+ { cc<= [ label JLE ] }
+ { cc> [ label JG ] }
+ { cc>= [ label JGE ] }
+ { cc= [ label JE ] }
+ { cc/= [ label JNE ] }
} case ;
-M: x86 %xor-vector-reps
- {
- { sse? { float-4-rep } }
- { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
- } available-reps ;
+M: x86 %compare-imm-branch ( label src1 src2 cc -- )
+ %compare-branch ;
-M: x86 %shl-vector ( dst src1 src2 rep -- )
- [ two-operand ] keep
- {
- { short-8-rep [ PSLLW ] }
- { ushort-8-rep [ PSLLW ] }
- { int-4-rep [ PSLLD ] }
- { uint-4-rep [ PSLLD ] }
- { longlong-2-rep [ PSLLQ ] }
- { ulonglong-2-rep [ PSLLQ ] }
- } case ;
+M: x86 %add-float double-rep two-operand ADDSD ;
+M: x86 %sub-float double-rep two-operand SUBSD ;
+M: x86 %mul-float double-rep two-operand MULSD ;
+M: x86 %div-float double-rep two-operand DIVSD ;
+M: x86 %min-float double-rep two-operand MINSD ;
+M: x86 %max-float double-rep two-operand MAXSD ;
+M: x86 %sqrt SQRTSD ;
-M: x86 %shl-vector-reps
- {
- { sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
- } available-reps ;
+M: x86 %single>double-float CVTSS2SD ;
+M: x86 %double>single-float CVTSD2SS ;
-M: x86 %shr-vector ( dst src1 src2 rep -- )
- [ two-operand ] keep
- {
- { short-8-rep [ PSRAW ] }
- { ushort-8-rep [ PSRLW ] }
- { int-4-rep [ PSRAD ] }
- { uint-4-rep [ PSRLD ] }
- { ulonglong-2-rep [ PSRLQ ] }
- } case ;
+M: x86 %integer>float CVTSI2SD ;
+M: x86 %float>integer CVTTSD2SI ;
-M: x86 %shr-vector-reps
- {
- { sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep ulonglong-2-rep } }
- } available-reps ;
+M: x86 %unbox-float ( dst src -- )
+ float-offset [+] MOVSD ;
-M: x86 %integer>scalar drop MOVD ;
+M:: x86 %box-float ( dst src temp -- )
+ dst 16 float temp %allot
+ dst float-offset [+] src MOVSD ;
-M: x86 %scalar>integer drop MOVD ;
+: %cmov-float= ( dst src -- )
+ [
+ "no-move" define-label
-M: x86 %unbox-alien ( dst src -- )
- alien-offset [+] MOV ;
+ "no-move" get [ JNE ] [ JP ] bi
+ MOV
+ "no-move" resolve-label
+ ] with-scope ;
-M:: x86 %unbox-any-c-ptr ( dst src temp -- )
+: %cmov-float/= ( dst src -- )
[
- { "is-byte-array" "end" "start" } [ define-label ] each
- dst 0 MOV
- temp src MOV
- ! We come back here with displaced aliens
- "start" resolve-label
- ! Is the object f?
- temp \ f tag-number CMP
- "end" get JE
- ! Is the object an alien?
- temp header-offset [+] alien type-number tag-fixnum CMP
- "is-byte-array" get JNE
- ! If so, load the offset and add it to the address
- dst temp alien-offset [+] ADD
- ! Now recurse on the underlying alien
- temp temp underlying-alien-offset [+] MOV
- "start" get JMP
- "is-byte-array" resolve-label
- ! Add byte array address to address being computed
- dst temp ADD
- ! Add an offset to start of byte array's data
- dst byte-array-offset ADD
- "end" resolve-label
+ "no-move" define-label
+ "move" define-label
+
+ "move" get JP
+ "no-move" get JE
+ "move" resolve-label
+ MOV
+ "no-move" resolve-label
] with-scope ;
-: alien@ ( reg n -- op ) cells alien tag-number - [+] ;
+:: (%compare-float) ( dst src1 src2 cc temp compare -- )
+ cc {
+ { cc< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVA %boolean ] }
+ { cc<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
+ { cc> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVA %boolean ] }
+ { cc>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
+ { cc= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float= %boolean ] }
+ { cc<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNE %boolean ] }
+ { cc<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNP %boolean ] }
+ { cc/< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
+ { cc/<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVB %boolean ] }
+ { cc/> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
+ { cc/>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVB %boolean ] }
+ { cc/= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float/= %boolean ] }
+ { cc/<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVE %boolean ] }
+ { cc/<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVP %boolean ] }
+ } case ; inline
-:: %allot-alien ( dst displacement base temp -- )
- dst 4 cells alien temp %allot
- dst 1 alien@ base MOV ! alien
- dst 2 alien@ \ f tag-number MOV ! expired
- dst 3 alien@ displacement MOV ! displacement
- ;
+M: x86 %compare-float-ordered ( dst src1 src2 cc temp -- )
+ \ COMISD (%compare-float) ;
-M:: x86 %box-alien ( dst src temp -- )
- [
- "end" define-label
- dst \ f tag-number MOV
- src 0 CMP
- "end" get JE
- dst src \ f tag-number temp %allot-alien
- "end" resolve-label
- ] with-scope ;
+M: x86 %compare-float-unordered ( dst src1 src2 cc temp -- )
+ \ UCOMISD (%compare-float) ;
-M:: x86 %box-displaced-alien ( dst displacement base displacement' base' base-class -- )
+: %jump-float= ( label -- )
[
- "end" define-label
- "ok" define-label
- ! If displacement is zero, return the base
- dst base MOV
- displacement 0 CMP
- "end" get JE
- ! Quickly use displacement' before its needed for real, as allot temporary
- dst 4 cells alien displacement' %allot
- ! If base is already a displaced alien, unpack it
- base' base MOV
- displacement' displacement MOV
- base \ f tag-number CMP
- "ok" get JE
- base header-offset [+] alien type-number tag-fixnum CMP
- "ok" get JNE
- ! displacement += base.displacement
- displacement' base 3 alien@ ADD
- ! base = base.base
- base' base 1 alien@ MOV
- "ok" resolve-label
- dst 1 alien@ base' MOV ! alien
- dst 2 alien@ \ f tag-number MOV ! expired
- dst 3 alien@ displacement' MOV ! displacement
- "end" resolve-label
+ "no-jump" define-label
+ "no-jump" get JP
+ JE
+ "no-jump" resolve-label
] with-scope ;
-! The 'small-reg' mess is pretty crappy, but its only used on x86-32.
-! On x86-64, all registers have 8-bit versions. However, a similar
-! problem arises for shifts, where the shift count must be in CL, and
-! so one day I will fix this properly by adding precoloring to the
-! register allocator.
+: %jump-float/= ( label -- )
+ [ JNE ] [ JP ] bi ;
-HOOK: has-small-reg? cpu ( reg size -- ? )
+:: (%compare-float-branch) ( label src1 src2 cc compare -- )
+ cc {
+ { cc< [ src2 src1 \ compare execute( a b -- ) label JA ] }
+ { cc<= [ src2 src1 \ compare execute( a b -- ) label JAE ] }
+ { cc> [ src1 src2 \ compare execute( a b -- ) label JA ] }
+ { cc>= [ src1 src2 \ compare execute( a b -- ) label JAE ] }
+ { cc= [ src1 src2 \ compare execute( a b -- ) label %jump-float= ] }
+ { cc<> [ src1 src2 \ compare execute( a b -- ) label JNE ] }
+ { cc<>= [ src1 src2 \ compare execute( a b -- ) label JNP ] }
+ { cc/< [ src2 src1 \ compare execute( a b -- ) label JBE ] }
+ { cc/<= [ src2 src1 \ compare execute( a b -- ) label JB ] }
+ { cc/> [ src1 src2 \ compare execute( a b -- ) label JBE ] }
+ { cc/>= [ src1 src2 \ compare execute( a b -- ) label JB ] }
+ { cc/= [ src1 src2 \ compare execute( a b -- ) label %jump-float/= ] }
+ { cc/<> [ src1 src2 \ compare execute( a b -- ) label JE ] }
+ { cc/<>= [ src1 src2 \ compare execute( a b -- ) label JP ] }
+ } case ;
-CONSTANT: have-byte-regs { EAX ECX EDX EBX }
+M: x86 %compare-float-ordered-branch ( label src1 src2 cc -- )
+ \ COMISD (%compare-float-branch) ;
-M: x86.32 has-small-reg?
- {
- { 8 [ have-byte-regs memq? ] }
- { 16 [ drop t ] }
- { 32 [ drop t ] }
- } case ;
+M: x86 %compare-float-unordered-branch ( label src1 src2 cc -- )
+ \ UCOMISD (%compare-float-branch) ;
-M: x86.64 has-small-reg? 2drop t ;
+M:: x86 %box-vector ( dst src rep temp -- )
+ dst rep rep-size 2 cells + byte-array temp %allot
+ 16 tag-fixnum dst 1 byte-array tag-number %set-slot-imm
+ dst byte-array-offset [+]
+ src rep %copy ;
-: small-reg-that-isn't ( exclude -- reg' )
- [ have-byte-regs ] dip
- [ native-version-of ] map
- '[ _ memq? not ] find nip ;
+M:: x86 %unbox-vector ( dst src rep -- )
+ dst src byte-array-offset [+]
+ rep %copy ;
-: with-save/restore ( reg quot -- )
- [ drop PUSH ] [ call ] [ drop POP ] 2tri ; inline
+MACRO: available-reps ( alist -- )
+ ! Each SSE version adds new representations and supports
+ ! all old ones
+ unzip { } [ append ] accumulate rest swap suffix
+ [ [ 1quotation ] map ] bi@ zip
+ reverse [ { } ] suffix
+ '[ _ cond ] ;
-:: with-small-register ( dst exclude size quot: ( new-dst -- ) -- )
- ! If the destination register overlaps a small register with
- ! 'size' bits, we call the quot with that. Otherwise, we find a
- ! small register that is not in exclude, and call quot, saving and
- ! restoring the small register.
- dst size has-small-reg? [ dst quot call ] [
- exclude small-reg-that-isn't
- [ quot call ] with-save/restore
- ] if ; inline
+M: x86 %broadcast-vector ( dst src rep -- )
+ {
+ { float-4-rep [ [ float-4-rep %copy ] [ drop dup 0 SHUFPS ] 2bi ] }
+ { double-2-rep [ [ double-2-rep %copy ] [ drop dup UNPCKLPD ] 2bi ] }
+ } case ;
-M:: x86 %string-nth ( dst src index temp -- )
- ! We request a small-reg of size 8 since those of size 16 are
- ! a superset.
- "end" define-label
- dst { src index temp } 8 [| new-dst |
- ! Load the least significant 7 bits into new-dst.
- ! 8th bit indicates whether we have to load from
- ! the aux vector or not.
- temp src index [+] LEA
- new-dst 8-bit-version-of temp string-offset [+] MOV
- new-dst new-dst 8-bit-version-of MOVZX
- ! Do we have to look at the aux vector?
- new-dst HEX: 80 CMP
- "end" get JL
- ! Yes, this is a non-ASCII character. Load aux vector
- temp src string-aux-offset [+] MOV
- new-dst temp XCHG
- ! Compute index
- new-dst index ADD
- new-dst index ADD
- ! Load high 16 bits
- new-dst 16-bit-version-of new-dst byte-array-offset [+] MOV
- new-dst new-dst 16-bit-version-of MOVZX
- new-dst 7 SHL
- ! Compute code point
- new-dst temp XOR
- "end" resolve-label
- dst new-dst int-rep %copy
- ] with-small-register ;
+M: x86 %broadcast-vector-reps
+ {
+ ! Can't do this with sse1 since it will want to unbox
+ ! a double-precision float and convert to single precision
+ { sse2? { float-4-rep double-2-rep } }
+ } available-reps ;
-M:: x86 %set-string-nth-fast ( ch str index temp -- )
- ch { index str temp } 8 [| new-ch |
- new-ch ch int-rep %copy
- temp str index [+] LEA
- temp string-offset [+] new-ch 8-bit-version-of MOV
- ] with-small-register ;
+M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
+ rep {
+ {
+ float-4-rep
+ [
+ dst src1 float-4-rep %copy
+ dst src2 UNPCKLPS
+ src3 src4 UNPCKLPS
+ dst src3 MOVLHPS
+ ]
+ }
+ } case ;
-:: %alien-integer-getter ( dst src size quot -- )
- dst { src } size [| new-dst |
- new-dst dup size n-bit-version-of dup src [] MOV
- quot call
- dst new-dst int-rep %copy
- ] with-small-register ; inline
+M: x86 %gather-vector-4-reps
+ {
+ ! Can't do this with sse1 since it will want to unbox
+ ! double-precision floats and convert to single precision
+ { sse2? { float-4-rep } }
+ } available-reps ;
-: %alien-unsigned-getter ( dst src size -- )
- [ MOVZX ] %alien-integer-getter ; inline
+M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
+ rep {
+ {
+ double-2-rep
+ [
+ dst src1 double-2-rep %copy
+ dst src2 UNPCKLPD
+ ]
+ }
+ } case ;
-M: x86 %alien-unsigned-1 8 %alien-unsigned-getter ;
-M: x86 %alien-unsigned-2 16 %alien-unsigned-getter ;
-M: x86 %alien-unsigned-4 32 [ 2drop ] %alien-integer-getter ;
+M: x86 %gather-vector-2-reps
+ {
+ { sse2? { double-2-rep } }
+ } available-reps ;
-: %alien-signed-getter ( dst src size -- )
- [ MOVSX ] %alien-integer-getter ; inline
+M: x86 %add-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { float-4-rep [ ADDPS ] }
+ { double-2-rep [ ADDPD ] }
+ { char-16-rep [ PADDB ] }
+ { uchar-16-rep [ PADDB ] }
+ { short-8-rep [ PADDW ] }
+ { ushort-8-rep [ PADDW ] }
+ { int-4-rep [ PADDD ] }
+ { uint-4-rep [ PADDD ] }
+ { longlong-2-rep [ PADDQ ] }
+ { ulonglong-2-rep [ PADDQ ] }
+ } case ;
-M: x86 %alien-signed-1 8 %alien-signed-getter ;
-M: x86 %alien-signed-2 16 %alien-signed-getter ;
-M: x86 %alien-signed-4 32 %alien-signed-getter ;
+M: x86 %add-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
-M: x86 %alien-cell [] MOV ;
-M: x86 %alien-float [] MOVSS ;
-M: x86 %alien-double [] MOVSD ;
-M: x86 %alien-vector [ [] ] dip %copy ;
+M: x86 %saturated-add-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { char-16-rep [ PADDSB ] }
+ { uchar-16-rep [ PADDUSB ] }
+ { short-8-rep [ PADDSW ] }
+ { ushort-8-rep [ PADDUSW ] }
+ } case ;
-:: %alien-integer-setter ( ptr value size -- )
- value { ptr } size [| new-value |
- new-value value int-rep %copy
- ptr [] new-value size n-bit-version-of MOV
- ] with-small-register ; inline
+M: x86 %saturated-add-vector-reps
+ {
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
+ } available-reps ;
-M: x86 %set-alien-integer-1 8 %alien-integer-setter ;
-M: x86 %set-alien-integer-2 16 %alien-integer-setter ;
-M: x86 %set-alien-integer-4 32 %alien-integer-setter ;
-M: x86 %set-alien-cell [ [] ] dip MOV ;
-M: x86 %set-alien-float [ [] ] dip MOVSS ;
-M: x86 %set-alien-double [ [] ] dip MOVSD ;
-M: x86 %set-alien-vector [ [] ] 2dip %copy ;
+M: x86 %add-sub-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { float-4-rep [ ADDSUBPS ] }
+ { double-2-rep [ ADDSUBPD ] }
+ } case ;
-: shift-count? ( reg -- ? ) { ECX RCX } memq? ;
+M: x86 %add-sub-vector-reps
+ {
+ { sse3? { float-4-rep double-2-rep } }
+ } available-reps ;
-:: emit-shift ( dst src1 src2 quot -- )
- src2 shift-count? [
- dst CL quot call
- ] [
- dst shift-count? [
- dst src2 XCHG
- src2 CL quot call
- dst src2 XCHG
- ] [
- ECX native-version-of [
- CL src2 MOV
- drop dst CL quot call
- ] with-save/restore
- ] if
- ] if ; inline
+M: x86 %sub-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { float-4-rep [ SUBPS ] }
+ { double-2-rep [ SUBPD ] }
+ { char-16-rep [ PSUBB ] }
+ { uchar-16-rep [ PSUBB ] }
+ { short-8-rep [ PSUBW ] }
+ { ushort-8-rep [ PSUBW ] }
+ { int-4-rep [ PSUBD ] }
+ { uint-4-rep [ PSUBD ] }
+ { longlong-2-rep [ PSUBQ ] }
+ { ulonglong-2-rep [ PSUBQ ] }
+ } case ;
-M: x86 %shl [ SHL ] emit-shift ;
-M: x86 %shr [ SHR ] emit-shift ;
-M: x86 %sar [ SAR ] emit-shift ;
+M: x86 %sub-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
-M: x86 %vm-field-ptr ( dst field -- )
- [ drop 0 MOV rc-absolute-cell rt-vm rel-fixup ]
- [ vm-field-offset ADD ] 2bi ;
+M: x86 %saturated-sub-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { char-16-rep [ PSUBSB ] }
+ { uchar-16-rep [ PSUBUSB ] }
+ { short-8-rep [ PSUBSW ] }
+ { ushort-8-rep [ PSUBUSW ] }
+ } case ;
-: load-zone-ptr ( reg -- )
- #! Load pointer to start of zone array
- "nursery" %vm-field-ptr ;
+M: x86 %saturated-sub-vector-reps
+ {
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
+ } available-reps ;
-: load-allot-ptr ( nursery-ptr allot-ptr -- )
- [ drop load-zone-ptr ] [ swap cell [+] MOV ] 2bi ;
+M: x86 %mul-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { float-4-rep [ MULPS ] }
+ { double-2-rep [ MULPD ] }
+ { short-8-rep [ PMULLW ] }
+ { ushort-8-rep [ PMULLW ] }
+ { int-4-rep [ PMULLD ] }
+ { uint-4-rep [ PMULLD ] }
+ } case ;
-: inc-allot-ptr ( nursery-ptr n -- )
- [ cell [+] ] dip 8 align ADD ;
+M: x86 %mul-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep short-8-rep ushort-8-rep } }
+ { sse4.1? { int-4-rep uint-4-rep } }
+ } available-reps ;
-: store-header ( temp class -- )
- [ [] ] [ type-number tag-fixnum ] bi* MOV ;
+M: x86 %saturated-mul-vector-reps
+ ! No multiplication with saturation on x86
+ { } ;
-: store-tagged ( dst tag -- )
- tag-number OR ;
+M: x86 %div-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { float-4-rep [ DIVPS ] }
+ { double-2-rep [ DIVPD ] }
+ } case ;
-M:: x86 %allot ( dst size class nursery-ptr -- )
- nursery-ptr dst load-allot-ptr
- dst class store-header
- dst class store-tagged
- nursery-ptr size inc-allot-ptr ;
+M: x86 %div-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep } }
+ } available-reps ;
+M: x86 %min-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { char-16-rep [ PMINSB ] }
+ { uchar-16-rep [ PMINUB ] }
+ { short-8-rep [ PMINSW ] }
+ { ushort-8-rep [ PMINUW ] }
+ { int-4-rep [ PMINSD ] }
+ { uint-4-rep [ PMINUD ] }
+ { float-4-rep [ MINPS ] }
+ { double-2-rep [ MINPD ] }
+ } case ;
+
+M: x86 %min-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
+ { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
+ } available-reps ;
-M:: x86 %write-barrier ( src card# table -- )
- #! Mark the card pointed to by vreg.
- ! Mark the card
- card# src MOV
- card# card-bits SHR
- table "cards_offset" %vm-field-ptr
- table table [] MOV
- table card# [+] card-mark <byte> MOV
+M: x86 %max-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { char-16-rep [ PMAXSB ] }
+ { uchar-16-rep [ PMAXUB ] }
+ { short-8-rep [ PMAXSW ] }
+ { ushort-8-rep [ PMAXUW ] }
+ { int-4-rep [ PMAXSD ] }
+ { uint-4-rep [ PMAXUD ] }
+ { float-4-rep [ MAXPS ] }
+ { double-2-rep [ MAXPD ] }
+ } case ;
- ! Mark the card deck
- card# deck-bits card-bits - SHR
- table "decks_offset" %vm-field-ptr
- table table [] MOV
- table card# [+] card-mark <byte> MOV ;
+M: x86 %max-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
+ { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
+ } available-reps ;
-M:: x86 %check-nursery ( label temp1 temp2 -- )
- temp1 load-zone-ptr
- temp2 temp1 cell [+] MOV
- temp2 1024 ADD
- temp1 temp1 3 cells [+] MOV
- temp2 temp1 CMP
- label JLE ;
+M: x86 %horizontal-add-vector ( dst src rep -- )
+ {
+ { float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] }
+ { double-2-rep [ [ double-2-rep %copy ] [ HADDPD ] 2bi ] }
+ } case ;
-M: x86 %save-gc-root ( gc-root register -- ) [ gc-root@ ] dip MOV ;
+M: x86 %horizontal-add-vector-reps
+ {
+ { sse3? { float-4-rep double-2-rep } }
+ } available-reps ;
-M: x86 %load-gc-root ( gc-root register -- ) swap gc-root@ MOV ;
+M: x86 %horizontal-shl-vector ( dst src1 src2 rep -- )
+ two-operand PSLLDQ ;
-M: x86 %alien-global ( dst symbol library -- )
- [ 0 MOV ] 2dip rc-absolute-cell rel-dlsym ;
+M: x86 %horizontal-shl-vector-reps
+ {
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
-M: x86 %epilogue ( n -- ) cell - incr-stack-reg ;
+M: x86 %horizontal-shr-vector ( dst src1 src2 rep -- )
+ two-operand PSRLDQ ;
-:: %boolean ( dst temp word -- )
- dst \ f tag-number MOV
- temp 0 MOV \ t rc-absolute-cell rel-immediate
- dst temp word execute ; inline
+M: x86 %horizontal-shr-vector-reps
+ {
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
-M:: x86 %compare ( dst src1 src2 cc temp -- )
- src1 src2 CMP
- cc order-cc {
- { cc< [ dst temp \ CMOVL %boolean ] }
- { cc<= [ dst temp \ CMOVLE %boolean ] }
- { cc> [ dst temp \ CMOVG %boolean ] }
- { cc>= [ dst temp \ CMOVGE %boolean ] }
- { cc= [ dst temp \ CMOVE %boolean ] }
- { cc/= [ dst temp \ CMOVNE %boolean ] }
+M: x86 %abs-vector ( dst src rep -- )
+ {
+ { char-16-rep [ PABSB ] }
+ { short-8-rep [ PABSW ] }
+ { int-4-rep [ PABSD ] }
} case ;
-M: x86 %compare-imm ( dst src1 src2 cc temp -- )
- %compare ;
+M: x86 %abs-vector-reps
+ {
+ { ssse3? { char-16-rep short-8-rep int-4-rep } }
+ } available-reps ;
-: %cmov-float= ( dst src -- )
- [
- "no-move" define-label
+M: x86 %sqrt-vector ( dst src rep -- )
+ {
+ { float-4-rep [ SQRTPS ] }
+ { double-2-rep [ SQRTPD ] }
+ } case ;
- "no-move" get [ JNE ] [ JP ] bi
- MOV
- "no-move" resolve-label
- ] with-scope ;
+M: x86 %sqrt-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep } }
+ } available-reps ;
-: %cmov-float/= ( dst src -- )
- [
- "no-move" define-label
- "move" define-label
+M: x86 %and-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { float-4-rep [ ANDPS ] }
+ { double-2-rep [ ANDPD ] }
+ [ drop PAND ]
+ } case ;
- "move" get JP
- "no-move" get JE
- "move" resolve-label
- MOV
- "no-move" resolve-label
- ] with-scope ;
+M: x86 %and-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
-:: (%compare-float) ( dst src1 src2 cc temp compare -- )
- cc {
- { cc< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVA %boolean ] }
- { cc<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
- { cc> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVA %boolean ] }
- { cc>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
- { cc= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float= %boolean ] }
- { cc<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNE %boolean ] }
- { cc<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNP %boolean ] }
- { cc/< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
- { cc/<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVB %boolean ] }
- { cc/> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
- { cc/>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVB %boolean ] }
- { cc/= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float/= %boolean ] }
- { cc/<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVE %boolean ] }
- { cc/<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVP %boolean ] }
- } case ; inline
+M: x86 %andn-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { float-4-rep [ ANDNPS ] }
+ { double-2-rep [ ANDNPD ] }
+ [ drop PANDN ]
+ } case ;
-M: x86 %compare-float-ordered ( dst src1 src2 cc temp -- )
- \ COMISD (%compare-float) ;
+M: x86 %andn-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
-M: x86 %compare-float-unordered ( dst src1 src2 cc temp -- )
- \ UCOMISD (%compare-float) ;
+M: x86 %or-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { float-4-rep [ ORPS ] }
+ { double-2-rep [ ORPD ] }
+ [ drop POR ]
+ } case ;
-M:: x86 %compare-branch ( label src1 src2 cc -- )
- src1 src2 CMP
- cc order-cc {
- { cc< [ label JL ] }
- { cc<= [ label JLE ] }
- { cc> [ label JG ] }
- { cc>= [ label JGE ] }
- { cc= [ label JE ] }
- { cc/= [ label JNE ] }
+M: x86 %or-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
+
+M: x86 %xor-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { float-4-rep [ XORPS ] }
+ { double-2-rep [ XORPD ] }
+ [ drop PXOR ]
} case ;
-M: x86 %compare-imm-branch ( label src1 src2 cc -- )
- %compare-branch ;
+M: x86 %xor-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
-: %jump-float= ( label -- )
- [
- "no-jump" define-label
- "no-jump" get JP
- JE
- "no-jump" resolve-label
- ] with-scope ;
+M: x86 %shl-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { short-8-rep [ PSLLW ] }
+ { ushort-8-rep [ PSLLW ] }
+ { int-4-rep [ PSLLD ] }
+ { uint-4-rep [ PSLLD ] }
+ { longlong-2-rep [ PSLLQ ] }
+ { ulonglong-2-rep [ PSLLQ ] }
+ } case ;
-: %jump-float/= ( label -- )
- [ JNE ] [ JP ] bi ;
+M: x86 %shl-vector-reps
+ {
+ { sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
-:: (%compare-float-branch) ( label src1 src2 cc compare -- )
- cc {
- { cc< [ src2 src1 \ compare execute( a b -- ) label JA ] }
- { cc<= [ src2 src1 \ compare execute( a b -- ) label JAE ] }
- { cc> [ src1 src2 \ compare execute( a b -- ) label JA ] }
- { cc>= [ src1 src2 \ compare execute( a b -- ) label JAE ] }
- { cc= [ src1 src2 \ compare execute( a b -- ) label %jump-float= ] }
- { cc<> [ src1 src2 \ compare execute( a b -- ) label JNE ] }
- { cc<>= [ src1 src2 \ compare execute( a b -- ) label JNP ] }
- { cc/< [ src2 src1 \ compare execute( a b -- ) label JBE ] }
- { cc/<= [ src2 src1 \ compare execute( a b -- ) label JB ] }
- { cc/> [ src1 src2 \ compare execute( a b -- ) label JBE ] }
- { cc/>= [ src1 src2 \ compare execute( a b -- ) label JB ] }
- { cc/= [ src1 src2 \ compare execute( a b -- ) label %jump-float/= ] }
- { cc/<> [ src1 src2 \ compare execute( a b -- ) label JE ] }
- { cc/<>= [ src1 src2 \ compare execute( a b -- ) label JP ] }
+M: x86 %shr-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { short-8-rep [ PSRAW ] }
+ { ushort-8-rep [ PSRLW ] }
+ { int-4-rep [ PSRAD ] }
+ { uint-4-rep [ PSRLD ] }
+ { ulonglong-2-rep [ PSRLQ ] }
} case ;
-M: x86 %compare-float-ordered-branch ( label src1 src2 cc -- )
- \ COMISD (%compare-float-branch) ;
+M: x86 %shr-vector-reps
+ {
+ { sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep ulonglong-2-rep } }
+ } available-reps ;
-M: x86 %compare-float-unordered-branch ( label src1 src2 cc -- )
- \ UCOMISD (%compare-float-branch) ;
+M: x86 %integer>scalar drop MOVD ;
+
+M: x86 %scalar>integer drop MOVD ;
M:: x86 %spill ( src rep dst -- ) dst src rep %copy ;
M:: x86 %reload ( dst rep src -- ) dst src rep %copy ;