! On x86, floating point registers are really vector registers
SINGLETONS:
-float-4-rep
-double-2-rep
char-16-rep
uchar-16-rep
short-8-rep
ushort-8-rep
int-4-rep
-uint-4-rep ;
+uint-4-rep
+longlong-2-rep
+ulonglong-2-rep ;
-UNION: vector-rep
+! Scalar values in the high component of a vector register
+SINGLETONS:
+char-scalar-rep
+uchar-scalar-rep
+short-scalar-rep
+ushort-scalar-rep
+int-scalar-rep
+uint-scalar-rep
+longlong-scalar-rep
+ulonglong-scalar-rep ;
+
+SINGLETONS:
float-4-rep
-double-2-rep
+double-2-rep ;
+
+UNION: int-vector-rep
char-16-rep
uchar-16-rep
short-8-rep
ushort-8-rep
int-4-rep
-uint-4-rep ;
+uint-4-rep
+longlong-2-rep
+ulonglong-2-rep ;
+
+UNION: scalar-rep
+char-scalar-rep
+uchar-scalar-rep
+short-scalar-rep
+ushort-scalar-rep
+int-scalar-rep
+uint-scalar-rep
+longlong-scalar-rep
+ulonglong-scalar-rep ;
+
+UNION: float-vector-rep
+float-4-rep
+double-2-rep ;
+
+UNION: vector-rep
+int-vector-rep
+float-vector-rep ;
UNION: representation
any-rep
int-rep
float-rep
double-rep
-vector-rep ;
+vector-rep
+scalar-rep ;
! Register classes
SINGLETONS: int-regs float-regs ;
! A pseudo-register class for parameters spilled on the stack
SINGLETON: stack-params
+! On x86, vectors and floats are stored in the same register bank
+! On PowerPC they are distinct
+HOOK: vector-regs cpu ( -- reg-class )
+
GENERIC: reg-class-of ( rep -- reg-class )
M: tagged-rep reg-class-of drop int-regs ;
M: int-rep reg-class-of drop int-regs ;
M: float-rep reg-class-of drop float-regs ;
M: double-rep reg-class-of drop float-regs ;
-M: vector-rep reg-class-of drop float-regs ;
+M: vector-rep reg-class-of drop vector-regs ;
+M: scalar-rep reg-class-of drop vector-regs ;
M: stack-params reg-class-of drop stack-params ;
GENERIC: rep-size ( rep -- n ) foldable
M: stack-params rep-size drop cell ;
M: vector-rep rep-size drop 16 ;
+GENERIC: rep-component-type ( rep -- n )
+
+! Methods defined in alien.c-types
+
GENERIC: scalar-rep-of ( rep -- rep' )
M: float-4-rep scalar-rep-of drop float-rep ;
M: double-2-rep scalar-rep-of drop double-rep ;
+M: char-16-rep scalar-rep-of drop char-scalar-rep ;
+M: uchar-16-rep scalar-rep-of drop uchar-scalar-rep ;
+M: short-8-rep scalar-rep-of drop short-scalar-rep ;
+M: ushort-8-rep scalar-rep-of drop ushort-scalar-rep ;
+M: int-4-rep scalar-rep-of drop int-scalar-rep ;
+M: uint-4-rep scalar-rep-of drop uint-scalar-rep ;
+M: longlong-2-rep scalar-rep-of drop longlong-scalar-rep ;
+M: ulonglong-2-rep scalar-rep-of drop ulonglong-scalar-rep ;
! Mapping from register class to machine registers
HOOK: machine-registers cpu ( -- assoc )
HOOK: %dispatch cpu ( src temp -- )
- HOOK: %slot cpu ( dst obj slot tag temp -- )
+ HOOK: %slot cpu ( dst obj slot -- )
HOOK: %slot-imm cpu ( dst obj slot tag -- )
- HOOK: %set-slot cpu ( src obj slot tag temp -- )
+ HOOK: %set-slot cpu ( src obj slot -- )
HOOK: %set-slot-imm cpu ( src obj slot tag -- )
HOOK: %string-nth cpu ( dst obj index temp -- )
HOOK: %broadcast-vector cpu ( dst src rep -- )
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
-
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
+HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- )
+HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
+HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
+HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
HOOK: %sqrt-vector cpu ( dst src rep -- )
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
+HOOK: %abs-vector cpu ( dst src rep -- )
+HOOK: %and-vector cpu ( dst src1 src2 rep -- )
+HOOK: %or-vector cpu ( dst src1 src2 rep -- )
+HOOK: %xor-vector cpu ( dst src1 src2 rep -- )
+HOOK: %shl-vector cpu ( dst src1 src2 rep -- )
+HOOK: %shr-vector cpu ( dst src1 src2 rep -- )
+
+HOOK: %integer>scalar cpu ( dst src rep -- )
+HOOK: %scalar>integer cpu ( dst src rep -- )
+
+HOOK: %broadcast-vector-reps cpu ( -- reps )
+HOOK: %gather-vector-2-reps cpu ( -- reps )
+HOOK: %gather-vector-4-reps cpu ( -- reps )
+HOOK: %add-vector-reps cpu ( -- reps )
+HOOK: %saturated-add-vector-reps cpu ( -- reps )
+HOOK: %add-sub-vector-reps cpu ( -- reps )
+HOOK: %sub-vector-reps cpu ( -- reps )
+HOOK: %saturated-sub-vector-reps cpu ( -- reps )
+HOOK: %mul-vector-reps cpu ( -- reps )
+HOOK: %saturated-mul-vector-reps cpu ( -- reps )
+HOOK: %div-vector-reps cpu ( -- reps )
+HOOK: %min-vector-reps cpu ( -- reps )
+HOOK: %max-vector-reps cpu ( -- reps )
+HOOK: %sqrt-vector-reps cpu ( -- reps )
+HOOK: %horizontal-add-vector-reps cpu ( -- reps )
+HOOK: %abs-vector-reps cpu ( -- reps )
+HOOK: %and-vector-reps cpu ( -- reps )
+HOOK: %or-vector-reps cpu ( -- reps )
+HOOK: %xor-vector-reps cpu ( -- reps )
+HOOK: %shl-vector-reps cpu ( -- reps )
+HOOK: %shr-vector-reps cpu ( -- reps )
HOOK: %unbox-alien cpu ( dst src -- )
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
HOOK: %check-nursery cpu ( label temp1 temp2 -- )
HOOK: %save-gc-root cpu ( gc-root register -- )
HOOK: %load-gc-root cpu ( gc-root register -- )
-HOOK: %call-gc cpu ( gc-root-count -- )
+HOOK: %call-gc cpu ( gc-root-count temp1 -- )
HOOK: %prologue cpu ( n -- )
HOOK: %epilogue cpu ( n -- )
HOOK: %alien-invoke cpu ( function library -- )
-HOOK: %vm-invoke-1st-arg cpu ( function -- )
-HOOK: %vm-invoke-3rd-arg cpu ( function -- )
-
HOOK: %cleanup cpu ( params -- )
M: object %cleanup ( params -- ) drop ;
HOOK: %callback-value cpu ( ctype -- )
+HOOK: %nest-stacks cpu ( -- )
+
+HOOK: %unnest-stacks cpu ( -- )
+
! Return to caller with stdcall unwinding (only for x86)
HOOK: %callback-return cpu ( params -- )
M: ppc %vm-field-ptr ( dst field -- ) %load-vm-field-addr ;
-M: ppc %vm-invoke-1st-arg ( function -- ) f %alien-invoke ;
-M: ppc %vm-invoke-3rd-arg ( function -- ) f %alien-invoke ;
-
M: ppc machine-registers
{
{ int-regs $[ 2 12 [a,b] 15 29 [a,b] append ] }
temp MTCTR
BCTR ;
- :: (%slot) ( obj slot tag temp -- reg offset )
- temp slot obj ADD
- temp tag neg ; inline
-
: (%slot-imm) ( obj slot tag -- reg offset )
[ cells ] dip - ; inline
- M: ppc %slot ( dst obj slot tag temp -- ) (%slot) LWZ ;
+ M: ppc %slot ( dst obj slot -- ) LWZX ;
M: ppc %slot-imm ( dst obj slot tag -- ) (%slot-imm) LWZ ;
- M: ppc %set-slot ( src obj slot tag temp -- ) (%slot) STW ;
+ M: ppc %set-slot ( src obj slot -- ) STWX ;
M: ppc %set-slot-imm ( src obj slot tag -- ) (%slot-imm) STW ;
M:: ppc %string-nth ( dst src index temp -- )
dst 1 4 scratch@ LWZ ;
M: ppc %copy ( dst src rep -- )
- {
- { int-rep [ MR ] }
- { double-rep [ FMR ] }
- } case ;
+ 2over eq? [ 3drop ] [
+ {
+ { int-rep [ MR ] }
+ { double-rep [ FMR ] }
+ } case
+ ] if ;
M: ppc %unbox-float ( dst src -- ) float-offset LFD ;
[ float-regs param-regs nth 1 ] [ n>> spill@ ] bi* LFD ;
: float-function-return ( reg -- )
- float-regs return-reg 2dup = [ 2drop ] [ FMR ] if ;
+ float-regs return-reg double-rep %copy ;
M:: ppc %unary-float-function ( dst src func -- )
0 src float-function-param
dst float-function-return ;
! Internal format is always double-precision on PowerPC
-M: ppc %single>double-float FMR ;
-
-M: ppc %double>single-float FMR ;
+M: ppc %single>double-float double-rep %copy ;
+M: ppc %double>single-float double-rep %copy ;
+
+! VMX/AltiVec not supported yet
+M: ppc %broadcast-vector-reps { } ;
+M: ppc %gather-vector-2-reps { } ;
+M: ppc %gather-vector-4-reps { } ;
+M: ppc %add-vector-reps { } ;
+M: ppc %saturated-add-vector-reps { } ;
+M: ppc %add-sub-vector-reps { } ;
+M: ppc %sub-vector-reps { } ;
+M: ppc %saturated-sub-vector-reps { } ;
+M: ppc %mul-vector-reps { } ;
+M: ppc %saturated-mul-vector-reps { } ;
+M: ppc %div-vector-reps { } ;
+M: ppc %min-vector-reps { } ;
+M: ppc %max-vector-reps { } ;
+M: ppc %sqrt-vector-reps { } ;
+M: ppc %horizontal-add-vector-reps { } ;
+M: ppc %abs-vector-reps { } ;
+M: ppc %and-vector-reps { } ;
+M: ppc %or-vector-reps { } ;
+M: ppc %xor-vector-reps { } ;
+M: ppc %shl-vector-reps { } ;
+M: ppc %shr-vector-reps { } ;
M: ppc %unbox-alien ( dst src -- )
alien-offset LWZ ;
M:: ppc %load-gc-root ( gc-root register -- )
register 1 gc-root gc-root@ LWZ ;
-M:: ppc %call-gc ( gc-root-count -- )
+M:: ppc %call-gc ( gc-root-count temp -- )
3 1 gc-root-base local@ ADDI
gc-root-count 4 LI
"inline_gc" f %alien-invoke ;
4 3 4 LWZ
3 3 0 LWZ ;
+M: ppc %nest-stacks ( -- )
+ "nest_stacks" f %alien-invoke ;
+
+M: ppc %unnest-stacks ( -- )
+ "unnest_stacks" f %alien-invoke ;
+
M: ppc %unbox-small-struct ( size -- )
#! Alien must be in EAX.
heap-size cell align cell /i {
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs alien alien.c-types arrays strings
cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands
-cpu.architecture kernel kernel.private math memory namespaces make
-sequences words system layouts combinators math.order fry locals
-compiler.constants vm byte-arrays
+cpu.x86.features cpu.x86.features.private cpu.architecture kernel
+kernel.private math memory namespaces make sequences words system
+layouts combinators math.order fry locals compiler.constants
+byte-arrays io macros quotations compiler compiler.units init vm
compiler.cfg.registers
compiler.cfg.instructions
compiler.cfg.intrinsics
M: x86 two-operand? t ;
+M: x86 vector-regs float-regs ;
+
HOOK: stack-reg cpu ( -- reg )
HOOK: reserved-area-size cpu ( -- n )
! use in calls in and out of C
HOOK: temp-reg cpu ( -- reg )
-! Fastcall calling convention
-HOOK: param-reg-1 cpu ( -- reg )
-HOOK: param-reg-2 cpu ( -- reg )
-
HOOK: pic-tail-reg cpu ( -- reg )
M: x86 %load-immediate dup 0 = [ drop dup XOR ] [ MOV ] if ;
: align-code ( n -- )
0 <repetition> % ;
- :: (%slot) ( obj slot tag temp -- op )
- temp slot obj [+] LEA
- temp tag neg [+] ; inline
-
:: (%slot-imm) ( obj slot tag -- op )
obj slot cells tag - [+] ; inline
- M: x86 %slot ( dst obj slot tag temp -- ) (%slot) MOV ;
+ M: x86 %slot ( dst obj slot -- ) [+] MOV ;
M: x86 %slot-imm ( dst obj slot tag -- ) (%slot-imm) MOV ;
- M: x86 %set-slot ( src obj slot tag temp -- ) (%slot) swap MOV ;
+ M: x86 %set-slot ( src obj slot -- ) [+] swap MOV ;
M: x86 %set-slot-imm ( src obj slot tag -- ) (%slot-imm) swap MOV ;
M: x86 %add 2over eq? [ nip ADD ] [ [+] LEA ] if ;
M: double-2-rep copy-register* drop MOVUPD ;
M: vector-rep copy-register* drop MOVDQU ;
-: copy-register ( dst src rep -- )
+M: x86 %copy ( dst src rep -- )
2over eq? [ 3drop ] [ copy-register* ] if ;
-M: x86 %copy ( dst src rep -- ) copy-register ;
-
:: overflow-template ( label dst src1 src2 insn -- )
src1 src2 insn call
label JO ; inline
dst rep rep-size 2 cells + byte-array temp %allot
16 tag-fixnum dst 1 byte-array tag-number %set-slot-imm
dst byte-array-offset [+]
- src rep copy-register ;
+ src rep %copy ;
M:: x86 %unbox-vector ( dst src rep -- )
dst src byte-array-offset [+]
- rep copy-register ;
+ rep %copy ;
+
+MACRO: available-reps ( alist -- )
+ ! Each SSE version adds new representations and supports
+ ! all old ones
+ unzip { } [ append ] accumulate rest swap suffix
+ [ [ 1quotation ] map ] bi@ zip
+ reverse [ { } ] suffix
+ '[ _ cond ] ;
M: x86 %broadcast-vector ( dst src rep -- )
{
- { float-4-rep [ [ MOVSS ] [ drop dup 0 SHUFPS ] 2bi ] }
- { double-2-rep [ [ MOVSD ] [ drop dup UNPCKLPD ] 2bi ] }
+ { float-4-rep [ [ float-4-rep %copy ] [ drop dup 0 SHUFPS ] 2bi ] }
+ { double-2-rep [ [ double-2-rep %copy ] [ drop dup UNPCKLPD ] 2bi ] }
} case ;
+M: x86 %broadcast-vector-reps
+ {
+ ! Can't do this with sse1 since it will want to unbox
+ ! a double-precision float and convert to single precision
+ { sse2? { float-4-rep double-2-rep } }
+ } available-reps ;
+
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
rep {
{
float-4-rep
[
- dst src1 MOVSS
+ dst src1 float-4-rep %copy
dst src2 UNPCKLPS
src3 src4 UNPCKLPS
dst src3 MOVLHPS
}
} case ;
+M: x86 %gather-vector-4-reps
+ {
+ ! Can't do this with sse1 since it will want to unbox
+ ! double-precision floats and convert to single precision
+ { sse2? { float-4-rep } }
+ } available-reps ;
+
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
rep {
{
double-2-rep
[
- dst src1 MOVSD
+ dst src1 double-2-rep %copy
dst src2 UNPCKLPD
]
}
} case ;
+M: x86 %gather-vector-2-reps
+ {
+ { sse2? { double-2-rep } }
+ } available-reps ;
+
M: x86 %add-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ ADDPS ] }
{ ushort-8-rep [ PADDW ] }
{ int-4-rep [ PADDD ] }
{ uint-4-rep [ PADDD ] }
+ { longlong-2-rep [ PADDQ ] }
+ { ulonglong-2-rep [ PADDQ ] }
+ } case drop ;
+
+M: x86 %add-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
+
+M: x86 %saturated-add-vector ( dst src1 src2 rep -- )
+ {
+ { char-16-rep [ PADDSB ] }
+ { uchar-16-rep [ PADDUSB ] }
+ { short-8-rep [ PADDSW ] }
+ { ushort-8-rep [ PADDUSW ] }
} case drop ;
+M: x86 %saturated-add-vector-reps
+ {
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
+ } available-reps ;
+
+M: x86 %add-sub-vector ( dst src1 src2 rep -- )
+ {
+ { float-4-rep [ ADDSUBPS ] }
+ { double-2-rep [ ADDSUBPD ] }
+ } case drop ;
+
+M: x86 %add-sub-vector-reps
+ {
+ { sse3? { float-4-rep double-2-rep } }
+ } available-reps ;
+
M: x86 %sub-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ SUBPS ] }
{ ushort-8-rep [ PSUBW ] }
{ int-4-rep [ PSUBD ] }
{ uint-4-rep [ PSUBD ] }
+ { longlong-2-rep [ PSUBQ ] }
+ { ulonglong-2-rep [ PSUBQ ] }
} case drop ;
+M: x86 %sub-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
+
+M: x86 %saturated-sub-vector ( dst src1 src2 rep -- )
+ {
+ { char-16-rep [ PSUBSB ] }
+ { uchar-16-rep [ PSUBUSB ] }
+ { short-8-rep [ PSUBSW ] }
+ { ushort-8-rep [ PSUBUSW ] }
+ } case drop ;
+
+M: x86 %saturated-sub-vector-reps
+ {
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
+ } available-reps ;
+
M: x86 %mul-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ MULPS ] }
{ double-2-rep [ MULPD ] }
- { int-4-rep [ PMULLW ] }
+ { short-8-rep [ PMULLW ] }
+ { ushort-8-rep [ PMULLW ] }
+ { int-4-rep [ PMULLD ] }
+ { uint-4-rep [ PMULLD ] }
} case drop ;
+M: x86 %mul-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep short-8-rep ushort-8-rep } }
+ { sse4.1? { int-4-rep uint-4-rep } }
+ } available-reps ;
+
+M: x86 %saturated-mul-vector-reps
+ ! No multiplication with saturation on x86
+ { } ;
+
M: x86 %div-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ DIVPS ] }
{ double-2-rep [ DIVPD ] }
} case drop ;
+M: x86 %div-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep } }
+ } available-reps ;
+
M: x86 %min-vector ( dst src1 src2 rep -- )
{
+ { char-16-rep [ PMINSB ] }
+ { uchar-16-rep [ PMINUB ] }
+ { short-8-rep [ PMINSW ] }
+ { ushort-8-rep [ PMINUW ] }
+ { int-4-rep [ PMINSD ] }
+ { uint-4-rep [ PMINUD ] }
{ float-4-rep [ MINPS ] }
{ double-2-rep [ MINPD ] }
} case drop ;
+M: x86 %min-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
+ { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
+ } available-reps ;
+
M: x86 %max-vector ( dst src1 src2 rep -- )
{
+ { char-16-rep [ PMAXSB ] }
+ { uchar-16-rep [ PMAXUB ] }
+ { short-8-rep [ PMAXSW ] }
+ { ushort-8-rep [ PMAXUW ] }
+ { int-4-rep [ PMAXSD ] }
+ { uint-4-rep [ PMAXUD ] }
{ float-4-rep [ MAXPS ] }
{ double-2-rep [ MAXPD ] }
} case drop ;
+M: x86 %max-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
+ { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
+ } available-reps ;
+
+M: x86 %horizontal-add-vector ( dst src rep -- )
+ {
+ { float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] }
+ { double-2-rep [ [ double-2-rep %copy ] [ HADDPD ] 2bi ] }
+ } case ;
+
+M: x86 %horizontal-add-vector-reps
+ {
+ { sse3? { float-4-rep double-2-rep } }
+ } available-reps ;
+
+M: x86 %abs-vector ( dst src rep -- )
+ {
+ { char-16-rep [ PABSB ] }
+ { short-8-rep [ PABSW ] }
+ { int-4-rep [ PABSD ] }
+ } case ;
+
+M: x86 %abs-vector-reps
+ {
+ { ssse3? { char-16-rep short-8-rep int-4-rep } }
+ } available-reps ;
+
M: x86 %sqrt-vector ( dst src rep -- )
{
{ float-4-rep [ SQRTPS ] }
{ double-2-rep [ SQRTPD ] }
} case ;
-M: x86 %horizontal-add-vector ( dst src rep -- )
+M: x86 %sqrt-vector-reps
{
- { float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
- { double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
- } case ;
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep } }
+ } available-reps ;
+
+M: x86 %and-vector ( dst src1 src2 rep -- )
+ {
+ { float-4-rep [ ANDPS ] }
+ { double-2-rep [ ANDPD ] }
+ [ drop PAND ]
+ } case drop ;
+
+M: x86 %and-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
+
+M: x86 %or-vector ( dst src1 src2 rep -- )
+ {
+ { float-4-rep [ ORPS ] }
+ { double-2-rep [ ORPD ] }
+ [ drop POR ]
+ } case drop ;
+
+M: x86 %or-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
+
+M: x86 %xor-vector ( dst src1 src2 rep -- )
+ {
+ { float-4-rep [ XORPS ] }
+ { double-2-rep [ XORPD ] }
+ [ drop PXOR ]
+ } case drop ;
+
+M: x86 %xor-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
+
+M: x86 %shl-vector ( dst src1 src2 rep -- )
+ {
+ { short-8-rep [ PSLLW ] }
+ { ushort-8-rep [ PSLLW ] }
+ { int-4-rep [ PSLLD ] }
+ { uint-4-rep [ PSLLD ] }
+ { longlong-2-rep [ PSLLQ ] }
+ { ulonglong-2-rep [ PSLLQ ] }
+ } case drop ;
+
+M: x86 %shl-vector-reps
+ {
+ { sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
+
+M: x86 %shr-vector ( dst src1 src2 rep -- )
+ {
+ { short-8-rep [ PSRAW ] }
+ { ushort-8-rep [ PSRLW ] }
+ { int-4-rep [ PSRAD ] }
+ { uint-4-rep [ PSRLD ] }
+ { ulonglong-2-rep [ PSRLQ ] }
+ } case drop ;
+
+M: x86 %shr-vector-reps
+ {
+ { sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep ulonglong-2-rep } }
+ } available-reps ;
+
+M: x86 %integer>scalar drop MOVD ;
+
+M: x86 %scalar>integer drop MOVD ;
M: x86 %unbox-alien ( dst src -- )
alien-offset [+] MOV ;
[ quot call ] with-save/restore
] if ; inline
-: ?MOV ( dst src -- )
- 2dup = [ 2drop ] [ MOV ] if ; inline
-
M:: x86 %string-nth ( dst src index temp -- )
! We request a small-reg of size 8 since those of size 16 are
! a superset.
! Compute code point
new-dst temp XOR
"end" resolve-label
- dst new-dst ?MOV
+ dst new-dst int-rep %copy
] with-small-register ;
M:: x86 %set-string-nth-fast ( ch str index temp -- )
ch { index str temp } 8 [| new-ch |
- new-ch ch ?MOV
+ new-ch ch int-rep %copy
temp str index [+] LEA
temp string-offset [+] new-ch 8-bit-version-of MOV
] with-small-register ;
dst { src } size [| new-dst |
new-dst dup size n-bit-version-of dup src [] MOV
quot call
- dst new-dst ?MOV
+ dst new-dst int-rep %copy
] with-small-register ; inline
: %alien-unsigned-getter ( dst src size -- )
M: x86 %alien-cell [] MOV ;
M: x86 %alien-float [] MOVSS ;
M: x86 %alien-double [] MOVSD ;
-M: x86 %alien-vector [ [] ] dip copy-register ;
+M: x86 %alien-vector [ [] ] dip %copy ;
:: %alien-integer-setter ( ptr value size -- )
value { ptr } size [| new-value |
- new-value value ?MOV
+ new-value value int-rep %copy
ptr [] new-value size n-bit-version-of MOV
] with-small-register ; inline
M: x86 %set-alien-cell [ [] ] dip MOV ;
M: x86 %set-alien-float [ [] ] dip MOVSS ;
M: x86 %set-alien-double [ [] ] dip MOVSD ;
-M: x86 %set-alien-vector [ [] ] 2dip copy-register ;
+M: x86 %set-alien-vector [ [] ] 2dip %copy ;
: shift-count? ( reg -- ? ) { ECX RCX } memq? ;
M: x86 %load-gc-root ( gc-root register -- ) swap gc-root@ MOV ;
-M:: x86 %call-gc ( gc-root-count -- )
- ! Pass pointer to start of GC roots as first parameter
- param-reg-1 gc-root-base param@ LEA
- ! Pass number of roots as second parameter
- param-reg-2 gc-root-count MOV
- ! Call GC
- "inline_gc" %vm-invoke-3rd-arg ;
-
M: x86 %alien-global ( dst symbol library -- )
[ 0 MOV ] 2dip rc-absolute-cell rel-dlsym ;
\ UCOMISD (%compare-float-branch) ;
M:: x86 %spill ( src rep n -- )
- n spill@ src rep copy-register ;
+ n spill@ src rep %copy ;
M:: x86 %reload ( dst rep n -- )
- dst n spill@ rep copy-register ;
+ dst n spill@ rep %copy ;
M: x86 %loop-entry 16 code-alignment [ NOP ] times ;
#! set up by the caller.
stack-frame get total-size>> + stack@ ;
-: enable-sse2 ( -- )
- enable-float-intrinsics
- enable-fsqrt
- enable-float-min/max
- enable-sse2-simd ;
-
-: enable-sse3 ( -- )
- enable-sse2
- enable-sse3-simd ;
-
+enable-simd
enable-min/max
enable-fixnum-log2
+
+:: install-sse2-check ( -- )
+ [
+ sse-version 20 < [
+ "This image was built to use SSE2 but your CPU does not support it." print
+ "You will need to bootstrap Factor again." print
+ flush
+ 1 exit
+ ] when
+ ] "cpu.x86" add-init-hook ;
+
+: enable-sse2 ( version -- )
+ 20 >= [
+ enable-float-intrinsics
+ enable-fsqrt
+ enable-float-min/max
+ install-sse2-check
+ ] when ;
+
+: check-sse ( -- )
+ [ { sse_version } compile ] with-optimizer
+ "Checking for multimedia extensions: " write sse-version
+ [ sse-string write " detected" print ] [ enable-sse2 ] bi ;