Merge branch 'slots' of git://factorcode.org/git/factor into slots

author sheeple <sheeple@oberon.local>

Sat, 26 Sep 2009 08:12:42 +0000 (03:12 -0500)

committer sheeple <sheeple@oberon.local>

Sat, 26 Sep 2009 08:12:42 +0000 (03:12 -0500)
author sheeple <sheeple@oberon.local>
Sat, 26 Sep 2009 08:12:42 +0000 (03:12 -0500)
committer sheeple <sheeple@oberon.local>
Sat, 26 Sep 2009 08:12:42 +0000 (03:12 -0500)
diff --combined basis/compiler/cfg/instructions/instructions.factor

index 8f0a5d5402f7d5c3b03b5997d41781247db69323,5f46f833ee3c850bce461240c10416255dfff4b5..6f5a05c672f298f15fd2bac0749283d111477346
--- 1/basis/compiler/cfg/instructions/instructions.factor
--- 2/basis/compiler/cfg/instructions/instructions.factor
+++ b/basis/compiler/cfg/instructions/instructions.factor
@@@ -63,9 -63,7 +63,7 @@@ temp: temp/int-rep 
   ! Slot access
   INSN: ##slot
   def: dst/int-rep
- use: obj/int-rep slot/int-rep
- literal: tag
- temp: temp/int-rep ;
+ use: obj/int-rep slot/int-rep ;
   
   INSN: ##slot-imm
   def: dst/int-rep
@@@ -73,9 -71,7 +71,7 @@@ use: obj/int-re
   literal: slot tag ;
   
   INSN: ##set-slot
- use: src/int-rep obj/int-rep slot/int-rep
- literal: tag
- temp: temp/int-rep ;
+ use: src/int-rep obj/int-rep slot/int-rep ;
   
   INSN: ##set-slot-imm
   use: src/int-rep obj/int-rep
@@@ -305,36 -301,16 +301,36 @@@ def: ds
   use: src1 src2
   literal: rep ;
   
+ +PURE-INSN: ##saturated-add-vector
+ +def: dst
+ +use: src1 src2
+ +literal: rep ;
+ +
+ +PURE-INSN: ##add-sub-vector
+ +def: dst
+ +use: src1 src2
+ +literal: rep ;
+ +
   PURE-INSN: ##sub-vector
   def: dst
   use: src1 src2
   literal: rep ;
   
+ +PURE-INSN: ##saturated-sub-vector
+ +def: dst
+ +use: src1 src2
+ +literal: rep ;
+ +
   PURE-INSN: ##mul-vector
   def: dst
   use: src1 src2
   literal: rep ;
   
+ +PURE-INSN: ##saturated-mul-vector
+ +def: dst
+ +use: src1 src2
+ +literal: rep ;
+ +
   PURE-INSN: ##div-vector
   def: dst
   use: src1 src2
@@@ -350,57 -326,16 +346,57 @@@ def: ds
   use: src1 src2
   literal: rep ;
   
+ +PURE-INSN: ##horizontal-add-vector
+ +def: dst/scalar-rep
+ +use: src
+ +literal: rep ;
+ +
+ +PURE-INSN: ##abs-vector
+ +def: dst
+ +use: src
+ +literal: rep ;
+ +
   PURE-INSN: ##sqrt-vector
   def: dst
   use: src
   literal: rep ;
   
- -PURE-INSN: ##horizontal-add-vector
- -def: dst/scalar-rep
+ +PURE-INSN: ##and-vector
+ +def: dst
+ +use: src1 src2
+ +literal: rep ;
+ +
+ +PURE-INSN: ##or-vector
+ +def: dst
+ +use: src1 src2
+ +literal: rep ;
+ +
+ +PURE-INSN: ##xor-vector
+ +def: dst
+ +use: src1 src2
+ +literal: rep ;
+ +
+ +PURE-INSN: ##shl-vector
+ +def: dst
+ +use: src1 src2/scalar-rep
+ +literal: rep ;
+ +
+ +PURE-INSN: ##shr-vector
+ +def: dst
+ +use: src1 src2/scalar-rep
+ +literal: rep ;
+ +
+ +! Scalar/integer conversion
+ +PURE-INSN: ##scalar>integer
+ +def: dst/int-rep
   use: src
   literal: rep ;
   
+ +PURE-INSN: ##integer>scalar
+ +def: dst
+ +use: src/int-rep
+ +literal: rep ;
+ +
   ! Boxing and unboxing aliens
   PURE-INSN: ##box-alien
   def: dst/int-rep
@@@ -513,7 -448,7 +509,7 @@@ literal: symbol library 
   
   INSN: ##vm-field-ptr
   def: dst/int-rep
- -literal: fieldname ;
+ +literal: field-name ;
   
   ! FFI
   INSN: ##alien-invoke
diff --combined basis/cpu/architecture/architecture.factor

index eb3c43210183a3243bf9f6ddf0707e18bd1170f3,6b41613c007e259ea212aab2ee52bd876dc56e2f..c27aacb875ae7d622699ce544fd365b1b914af22
--- 1/basis/cpu/architecture/architecture.factor
--- 2/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@@@ -22,57 -22,24 +22,57 @@@ SINGLETONS: float-rep double-rep 
   
   ! On x86, floating point registers are really vector registers
   SINGLETONS:
- -float-4-rep
- -double-2-rep
   char-16-rep
   uchar-16-rep
   short-8-rep
   ushort-8-rep
   int-4-rep
- -uint-4-rep ;
+ +uint-4-rep
+ +longlong-2-rep
+ +ulonglong-2-rep ;
   
- -UNION: vector-rep
+ +! Scalar values in the high component of a vector register
+ +SINGLETONS:
+ +char-scalar-rep
+ +uchar-scalar-rep
+ +short-scalar-rep
+ +ushort-scalar-rep
+ +int-scalar-rep
+ +uint-scalar-rep
+ +longlong-scalar-rep
+ +ulonglong-scalar-rep ;
+ +
+ +SINGLETONS:
   float-4-rep
- -double-2-rep
+ +double-2-rep ;
+ +
+ +UNION: int-vector-rep
   char-16-rep
   uchar-16-rep
   short-8-rep
   ushort-8-rep
   int-4-rep
- -uint-4-rep ;
+ +uint-4-rep
+ +longlong-2-rep
+ +ulonglong-2-rep ;
+ +
+ +UNION: scalar-rep
+ +char-scalar-rep
+ +uchar-scalar-rep
+ +short-scalar-rep
+ +ushort-scalar-rep
+ +int-scalar-rep
+ +uint-scalar-rep
+ +longlong-scalar-rep
+ +ulonglong-scalar-rep ;
+ +
+ +UNION: float-vector-rep
+ +float-4-rep
+ +double-2-rep ;
+ +
+ +UNION: vector-rep
+ +int-vector-rep
+ +float-vector-rep ;
   
   UNION: representation
   any-rep
@@@ -80,8 -47,7 +80,8 @@@ tagged-re
   int-rep
   float-rep
   double-rep
- -vector-rep ;
+ +vector-rep
+ +scalar-rep ;
   
   ! Register classes
   SINGLETONS: int-regs float-regs ;
@@@ -92,18 -58,13 +92,18 @@@ CONSTANT: reg-classes { int-regs float-
   ! A pseudo-register class for parameters spilled on the stack
   SINGLETON: stack-params
   
+ +! On x86, vectors and floats are stored in the same register bank
+ +! On PowerPC they are distinct
+ +HOOK: vector-regs cpu ( -- reg-class )
+ +
   GENERIC: reg-class-of ( rep -- reg-class )
   
   M: tagged-rep reg-class-of drop int-regs ;
   M: int-rep reg-class-of drop int-regs ;
   M: float-rep reg-class-of drop float-regs ;
   M: double-rep reg-class-of drop float-regs ;
- -M: vector-rep reg-class-of drop float-regs ;
+ +M: vector-rep reg-class-of drop vector-regs ;
+ +M: scalar-rep reg-class-of drop vector-regs ;
   M: stack-params reg-class-of drop stack-params ;
   
   GENERIC: rep-size ( rep -- n ) foldable
@@@ -115,22 -76,10 +115,22 @@@ M: double-rep rep-size drop 8 
   M: stack-params rep-size drop cell ;
   M: vector-rep rep-size drop 16 ;
   
+ +GENERIC: rep-component-type ( rep -- n )
+ +
+ +! Methods defined in alien.c-types
+ +
   GENERIC: scalar-rep-of ( rep -- rep' )
   
   M: float-4-rep scalar-rep-of drop float-rep ;
   M: double-2-rep scalar-rep-of drop double-rep ;
+ +M: char-16-rep scalar-rep-of drop char-scalar-rep ;
+ +M: uchar-16-rep scalar-rep-of drop uchar-scalar-rep ;
+ +M: short-8-rep scalar-rep-of drop short-scalar-rep ;
+ +M: ushort-8-rep scalar-rep-of drop ushort-scalar-rep ;
+ +M: int-4-rep scalar-rep-of drop int-scalar-rep ;
+ +M: uint-4-rep scalar-rep-of drop uint-scalar-rep ;
+ +M: longlong-2-rep scalar-rep-of drop longlong-scalar-rep ;
+ +M: ulonglong-2-rep scalar-rep-of drop ulonglong-scalar-rep ;
   
   ! Mapping from register class to machine registers
   HOOK: machine-registers cpu ( -- assoc )
@@@ -153,9 -102,9 +153,9 @@@ HOOK: %return cpu ( -- 
   
   HOOK: %dispatch cpu ( src temp -- )
   
- HOOK: %slot cpu ( dst obj slot tag temp -- )
+ HOOK: %slot cpu ( dst obj slot -- )
   HOOK: %slot-imm cpu ( dst obj slot tag -- )
- HOOK: %set-slot cpu ( src obj slot tag temp -- )
+ HOOK: %set-slot cpu ( src obj slot -- )
   HOOK: %set-slot-imm cpu ( src obj slot tag -- )
   
   HOOK: %string-nth cpu ( dst obj index temp -- )
@@@ -218,49 -167,15 +218,49 @@@ HOOK: %unbox-vector cpu ( dst src rep -
   HOOK: %broadcast-vector cpu ( dst src rep -- )
   HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
   HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
- -
   HOOK: %add-vector cpu ( dst src1 src2 rep -- )
+ +HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- )
+ +HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
   HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
+ +HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- )
   HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
+ +HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
   HOOK: %div-vector cpu ( dst src1 src2 rep -- )
   HOOK: %min-vector cpu ( dst src1 src2 rep -- )
   HOOK: %max-vector cpu ( dst src1 src2 rep -- )
   HOOK: %sqrt-vector cpu ( dst src rep -- )
   HOOK: %horizontal-add-vector cpu ( dst src rep -- )
+ +HOOK: %abs-vector cpu ( dst src rep -- )
+ +HOOK: %and-vector cpu ( dst src1 src2 rep -- )
+ +HOOK: %or-vector cpu ( dst src1 src2 rep -- )
+ +HOOK: %xor-vector cpu ( dst src1 src2 rep -- )
+ +HOOK: %shl-vector cpu ( dst src1 src2 rep -- )
+ +HOOK: %shr-vector cpu ( dst src1 src2 rep -- )
+ +
+ +HOOK: %integer>scalar cpu ( dst src rep -- )
+ +HOOK: %scalar>integer cpu ( dst src rep -- )
+ +
+ +HOOK: %broadcast-vector-reps cpu ( -- reps )
+ +HOOK: %gather-vector-2-reps cpu ( -- reps )
+ +HOOK: %gather-vector-4-reps cpu ( -- reps )
+ +HOOK: %add-vector-reps cpu ( -- reps )
+ +HOOK: %saturated-add-vector-reps cpu ( -- reps )
+ +HOOK: %add-sub-vector-reps cpu ( -- reps )
+ +HOOK: %sub-vector-reps cpu ( -- reps )
+ +HOOK: %saturated-sub-vector-reps cpu ( -- reps )
+ +HOOK: %mul-vector-reps cpu ( -- reps )
+ +HOOK: %saturated-mul-vector-reps cpu ( -- reps )
+ +HOOK: %div-vector-reps cpu ( -- reps )
+ +HOOK: %min-vector-reps cpu ( -- reps )
+ +HOOK: %max-vector-reps cpu ( -- reps )
+ +HOOK: %sqrt-vector-reps cpu ( -- reps )
+ +HOOK: %horizontal-add-vector-reps cpu ( -- reps )
+ +HOOK: %abs-vector-reps cpu ( -- reps )
+ +HOOK: %and-vector-reps cpu ( -- reps )
+ +HOOK: %or-vector-reps cpu ( -- reps )
+ +HOOK: %xor-vector-reps cpu ( -- reps )
+ +HOOK: %shl-vector-reps cpu ( -- reps )
+ +HOOK: %shr-vector-reps cpu ( -- reps )
   
   HOOK: %unbox-alien cpu ( dst src -- )
   HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
@@@ -296,7 -211,7 +296,7 @@@ HOOK: %write-barrier cpu ( src card# ta
   HOOK: %check-nursery cpu ( label temp1 temp2 -- )
   HOOK: %save-gc-root cpu ( gc-root register -- )
   HOOK: %load-gc-root cpu ( gc-root register -- )
- -HOOK: %call-gc cpu ( gc-root-count -- )
+ +HOOK: %call-gc cpu ( gc-root-count temp1 -- )
   
   HOOK: %prologue cpu ( n -- )
   HOOK: %epilogue cpu ( n -- )
@@@ -383,6 -298,9 +383,6 @@@ M: object %prepare-var-args 
   
   HOOK: %alien-invoke cpu ( function library -- )
   
- -HOOK: %vm-invoke-1st-arg cpu ( function -- )
- -HOOK: %vm-invoke-3rd-arg cpu ( function -- )
- -
   HOOK: %cleanup cpu ( params -- )
   
   M: object %cleanup ( params -- ) drop ;
@@@ -395,10 -313,6 +395,10 @@@ HOOK: %alien-callback cpu ( quot -- 
   
   HOOK: %callback-value cpu ( ctype -- )
   
+ +HOOK: %nest-stacks cpu ( -- )
+ +
+ +HOOK: %unnest-stacks cpu ( -- )
+ +
   ! Return to caller with stdcall unwinding (only for x86)
   HOOK: %callback-return cpu ( params -- )
   
diff --combined basis/cpu/ppc/ppc.factor

index bcd52206a0caeebab86cdea25e96fa91b5e60db1,4ae92e72301281aa6fcbc787df5e04a95cd9c1f2..5461002dc86a3465a371fd2384010714d6990989
--- 1/basis/cpu/ppc/ppc.factor
--- 2/basis/cpu/ppc/ppc.factor
+++ b/basis/cpu/ppc/ppc.factor
@@@ -40,6 -40,9 +40,6 @@@ enable-float-intrinsic
   
   M: ppc %vm-field-ptr ( dst field -- ) %load-vm-field-addr ;
   
- -M: ppc %vm-invoke-1st-arg ( function -- ) f %alien-invoke ;
- -M: ppc %vm-invoke-3rd-arg ( function -- ) f %alien-invoke ;
- -
   M: ppc machine-registers
       {
           { int-regs $[ 2 12 [a,b] 15 29 [a,b] append ] }
@@@ -139,16 -142,12 +139,12 @@@ M:: ppc %dispatch ( src temp -- 
       temp MTCTR
       BCTR ;
   
- :: (%slot) ( obj slot tag temp -- reg offset )
-     temp slot obj ADD
-     temp tag neg ; inline
- 
   : (%slot-imm) ( obj slot tag -- reg offset )
       [ cells ] dip - ; inline
   
- M: ppc %slot ( dst obj slot tag temp -- ) (%slot) LWZ ;
+ M: ppc %slot ( dst obj slot -- ) LWZX ;
   M: ppc %slot-imm ( dst obj slot tag -- ) (%slot-imm) LWZ ;
- M: ppc %set-slot ( src obj slot tag temp -- ) (%slot) STW ;
+ M: ppc %set-slot ( src obj slot -- ) STWX ;
   M: ppc %set-slot-imm ( src obj slot tag -- ) (%slot-imm) STW ;
   
   M:: ppc %string-nth ( dst src index temp -- )
@@@ -281,12 -280,10 +277,12 @@@ M:: ppc %float>integer ( dst src -- 
       dst 1 4 scratch@ LWZ ;
   
   M: ppc %copy ( dst src rep -- )
- -    {
- -        { int-rep [ MR ] }
- -        { double-rep [ FMR ] }
- -    } case ;
+ +    2over eq? [ 3drop ] [
+ +        {
+ +            { int-rep [ MR ] }
+ +            { double-rep [ FMR ] }
+ +        } case
+ +    ] if ;
   
   M: ppc %unbox-float ( dst src -- ) float-offset LFD ;
   
@@@ -298,7 -295,7 +294,7 @@@ M:: ppc %box-float ( dst src temp -- 
       [ float-regs param-regs nth 1 ] [ n>> spill@ ] bi* LFD ;
   
   : float-function-return ( reg -- )
- -    float-regs return-reg 2dup = [ 2drop ] [ FMR ] if ;
+ +    float-regs return-reg double-rep %copy ;
   
   M:: ppc %unary-float-function ( dst src func -- )
       0 src float-function-param
@@@ -312,31 -309,9 +308,31 @@@ M:: ppc %binary-float-function ( dst sr
       dst float-function-return ;
   
   ! Internal format is always double-precision on PowerPC
- -M: ppc %single>double-float FMR ;
- -
- -M: ppc %double>single-float FMR ;
+ +M: ppc %single>double-float double-rep %copy ;
+ +M: ppc %double>single-float double-rep %copy ;
+ +
+ +! VMX/AltiVec not supported yet
+ +M: ppc %broadcast-vector-reps { } ;
+ +M: ppc %gather-vector-2-reps { } ;
+ +M: ppc %gather-vector-4-reps { } ;
+ +M: ppc %add-vector-reps { } ;
+ +M: ppc %saturated-add-vector-reps { } ;
+ +M: ppc %add-sub-vector-reps { } ;
+ +M: ppc %sub-vector-reps { } ;
+ +M: ppc %saturated-sub-vector-reps { } ;
+ +M: ppc %mul-vector-reps { } ;
+ +M: ppc %saturated-mul-vector-reps { } ;
+ +M: ppc %div-vector-reps { } ;
+ +M: ppc %min-vector-reps { } ;
+ +M: ppc %max-vector-reps { } ;
+ +M: ppc %sqrt-vector-reps { } ;
+ +M: ppc %horizontal-add-vector-reps { } ;
+ +M: ppc %abs-vector-reps { } ;
+ +M: ppc %and-vector-reps { } ;
+ +M: ppc %or-vector-reps { } ;
+ +M: ppc %xor-vector-reps { } ;
+ +M: ppc %shl-vector-reps { } ;
+ +M: ppc %shr-vector-reps { } ;
   
   M: ppc %unbox-alien ( dst src -- )
       alien-offset LWZ ;
@@@ -510,7 -485,7 +506,7 @@@ M:: ppc %save-gc-root ( gc-root registe
   M:: ppc %load-gc-root ( gc-root register -- )
       register 1 gc-root gc-root@ LWZ ;
   
- -M:: ppc %call-gc ( gc-root-count -- )
+ +M:: ppc %call-gc ( gc-root-count temp -- )
       3 1 gc-root-base local@ ADDI
       gc-root-count 4 LI
       "inline_gc" f %alien-invoke ;
@@@ -778,12 -753,6 +774,12 @@@ M: ppc %box-small-struct ( c-type -- 
       4 3 4 LWZ
       3 3 0 LWZ ;
   
+ +M: ppc %nest-stacks ( -- )
+ +    "nest_stacks" f %alien-invoke ;
+ +
+ +M: ppc %unnest-stacks ( -- )
+ +    "unnest_stacks" f %alien-invoke ;
+ +
   M: ppc %unbox-small-struct ( size -- )
       #! Alien must be in EAX.
       heap-size cell align cell /i {
diff --combined basis/cpu/x86/x86.factor

index d6bf8feaa1b38b3a1453347fe4ff3cafe70be4a1,fc89e1cfd692233344ecd1399030b6da98628fc4..d89e360d09a32ebe8897b0b2936fcb32200e2a27
--- 1/basis/cpu/x86/x86.factor
--- 2/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@@@ -2,10 -2,9 +2,10 @@@
   ! See http://factorcode.org/license.txt for BSD license.
   USING: accessors assocs alien alien.c-types arrays strings
   cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands
- -cpu.architecture kernel kernel.private math memory namespaces make
- -sequences words system layouts combinators math.order fry locals
- -compiler.constants vm byte-arrays
+ +cpu.x86.features cpu.x86.features.private cpu.architecture kernel
+ +kernel.private math memory namespaces make sequences words system
+ +layouts combinators math.order fry locals compiler.constants
+ +byte-arrays io macros quotations compiler compiler.units init vm
   compiler.cfg.registers
   compiler.cfg.instructions
   compiler.cfg.intrinsics
@@@ -22,8 -21,6 +22,8 @@@ M: label JUMPcc [ 0 ] dip JUMPcc rc-rel
   
   M: x86 two-operand? t ;
   
+ +M: x86 vector-regs float-regs ;
+ +
   HOOK: stack-reg cpu ( -- reg )
   
   HOOK: reserved-area-size cpu ( -- n )
@@@ -52,6 -49,10 +52,6 @@@ M: x86 stack-frame-size ( stack-frame -
   ! use in calls in and out of C
   HOOK: temp-reg cpu ( -- reg )
   
- -! Fastcall calling convention
- -HOOK: param-reg-1 cpu ( -- reg )
- -HOOK: param-reg-2 cpu ( -- reg )
- -
   HOOK: pic-tail-reg cpu ( -- reg )
   
   M: x86 %load-immediate dup 0 = [ drop dup XOR ] [ MOV ] if ;
@@@ -94,16 -95,12 +94,12 @@@ M: x86 %return ( -- ) 0 RET 
   : align-code ( n -- )
       0 <repetition> % ;
   
- :: (%slot) ( obj slot tag temp -- op )
-     temp slot obj [+] LEA
-     temp tag neg [+] ; inline
- 
   :: (%slot-imm) ( obj slot tag -- op )
       obj slot cells tag - [+] ; inline
   
- M: x86 %slot ( dst obj slot tag temp -- ) (%slot) MOV ;
+ M: x86 %slot ( dst obj slot -- ) [+] MOV ;
   M: x86 %slot-imm ( dst obj slot tag -- ) (%slot-imm) MOV ;
- M: x86 %set-slot ( src obj slot tag temp -- ) (%slot) swap MOV ;
+ M: x86 %set-slot ( src obj slot -- ) [+] swap MOV ;
   M: x86 %set-slot-imm ( src obj slot tag -- ) (%slot-imm) swap MOV ;
   
   M: x86 %add     2over eq? [ nip ADD ] [ [+] LEA ] if ;
@@@ -138,9 -135,11 +134,9 @@@ M: float-4-rep copy-register* drop MOVU
   M: double-2-rep copy-register* drop MOVUPD ;
   M: vector-rep copy-register* drop MOVDQU ;
   
- -: copy-register ( dst src rep -- )
+ +M: x86 %copy ( dst src rep -- )
       2over eq? [ 3drop ] [ copy-register* ] if ;
   
- -M: x86 %copy ( dst src rep -- ) copy-register ;
- -
   :: overflow-template ( label dst src1 src2 insn -- )
       src1 src2 insn call
       label JO ; inline
@@@ -239,39 -238,24 +235,39 @@@ M:: x86 %box-vector ( dst src rep temp 
       dst rep rep-size 2 cells + byte-array temp %allot
       16 tag-fixnum dst 1 byte-array tag-number %set-slot-imm
       dst byte-array-offset [+]
- -    src rep copy-register ;
+ +    src rep %copy ;
   
   M:: x86 %unbox-vector ( dst src rep -- )
       dst src byte-array-offset [+]
- -    rep copy-register ;
+ +    rep %copy ;
+ +
+ +MACRO: available-reps ( alist -- )
+ +    ! Each SSE version adds new representations and supports
+ +    ! all old ones
+ +    unzip { } [ append ] accumulate rest swap suffix
+ +    [ [ 1quotation ] map ] bi@ zip
+ +    reverse [ { } ] suffix
+ +    '[ _ cond ] ;
   
   M: x86 %broadcast-vector ( dst src rep -- )
       {
- -        { float-4-rep [ [ MOVSS ] [ drop dup 0 SHUFPS ] 2bi ] }
- -        { double-2-rep [ [ MOVSD ] [ drop dup UNPCKLPD ] 2bi ] }
+ +        { float-4-rep [ [ float-4-rep %copy ] [ drop dup 0 SHUFPS ] 2bi ] }
+ +        { double-2-rep [ [ double-2-rep %copy ] [ drop dup UNPCKLPD ] 2bi ] }
       } case ;
   
+ +M: x86 %broadcast-vector-reps
+ +    {
+ +        ! Can't do this with sse1 since it will want to unbox
+ +        ! a double-precision float and convert to single precision
+ +        { sse2? { float-4-rep double-2-rep } }
+ +    } available-reps ;
+ +
   M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
       rep {
           {
               float-4-rep
               [
- -                dst src1 MOVSS
+ +                dst src1 float-4-rep %copy
                   dst src2 UNPCKLPS
                   src3 src4 UNPCKLPS
                   dst src3 MOVLHPS
@@@ -279,29 -263,17 +275,29 @@@
           }
       } case ;
   
+ +M: x86 %gather-vector-4-reps
+ +    {
+ +        ! Can't do this with sse1 since it will want to unbox
+ +        ! double-precision floats and convert to single precision
+ +        { sse2? { float-4-rep } }
+ +    } available-reps ;
+ +
   M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
       rep {
           {
               double-2-rep
               [
- -                dst src1 MOVSD
+ +                dst src1 double-2-rep %copy
                   dst src2 UNPCKLPD
               ]
           }
       } case ;
   
+ +M: x86 %gather-vector-2-reps
+ +    {
+ +        { sse2? { double-2-rep } }
+ +    } available-reps ;
+ +
   M: x86 %add-vector ( dst src1 src2 rep -- )
       {
           { float-4-rep [ ADDPS ] }
@@@ -312,40 -284,8 +308,40 @@@
           { ushort-8-rep [ PADDW ] }
           { int-4-rep [ PADDD ] }
           { uint-4-rep [ PADDD ] }
+ +        { longlong-2-rep [ PADDQ ] }
+ +        { ulonglong-2-rep [ PADDQ ] }
+ +    } case drop ;
+ +
+ +M: x86 %add-vector-reps
+ +    {
+ +        { sse? { float-4-rep } }
+ +        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ +    } available-reps ;
+ +
+ +M: x86 %saturated-add-vector ( dst src1 src2 rep -- )
+ +    {
+ +        { char-16-rep [ PADDSB ] }
+ +        { uchar-16-rep [ PADDUSB ] }
+ +        { short-8-rep [ PADDSW ] }
+ +        { ushort-8-rep [ PADDUSW ] }
       } case drop ;
   
+ +M: x86 %saturated-add-vector-reps
+ +    {
+ +        { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
+ +    } available-reps ;
+ +
+ +M: x86 %add-sub-vector ( dst src1 src2 rep -- )
+ +    {
+ +        { float-4-rep [ ADDSUBPS ] }
+ +        { double-2-rep [ ADDSUBPD ] }
+ +    } case drop ;
+ +
+ +M: x86 %add-sub-vector-reps
+ +    {
+ +        { sse3? { float-4-rep double-2-rep } }
+ +    } available-reps ;
+ +
   M: x86 %sub-vector ( dst src1 src2 rep -- )
       {
           { float-4-rep [ SUBPS ] }
@@@ -356,206 -296,44 +352,206 @@@
           { ushort-8-rep [ PSUBW ] }
           { int-4-rep [ PSUBD ] }
           { uint-4-rep [ PSUBD ] }
+ +        { longlong-2-rep [ PSUBQ ] }
+ +        { ulonglong-2-rep [ PSUBQ ] }
       } case drop ;
   
+ +M: x86 %sub-vector-reps
+ +    {
+ +        { sse? { float-4-rep } }
+ +        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ +    } available-reps ;
+ +
+ +M: x86 %saturated-sub-vector ( dst src1 src2 rep -- )
+ +    {
+ +        { char-16-rep [ PSUBSB ] }
+ +        { uchar-16-rep [ PSUBUSB ] }
+ +        { short-8-rep [ PSUBSW ] }
+ +        { ushort-8-rep [ PSUBUSW ] }
+ +    } case drop ;
+ +
+ +M: x86 %saturated-sub-vector-reps
+ +    {
+ +        { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
+ +    } available-reps ;
+ +
   M: x86 %mul-vector ( dst src1 src2 rep -- )
       {
           { float-4-rep [ MULPS ] }
           { double-2-rep [ MULPD ] }
- -        { int-4-rep [ PMULLW ] }
+ +        { short-8-rep [ PMULLW ] }
+ +        { ushort-8-rep [ PMULLW ] }
+ +        { int-4-rep [ PMULLD ] }
+ +        { uint-4-rep [ PMULLD ] }
       } case drop ;
   
+ +M: x86 %mul-vector-reps
+ +    {
+ +        { sse? { float-4-rep } }
+ +        { sse2? { double-2-rep short-8-rep ushort-8-rep } }
+ +        { sse4.1? { int-4-rep uint-4-rep } }
+ +    } available-reps ;
+ +
+ +M: x86 %saturated-mul-vector-reps
+ +    ! No multiplication with saturation on x86
+ +    { } ;
+ +
   M: x86 %div-vector ( dst src1 src2 rep -- )
       {
           { float-4-rep [ DIVPS ] }
           { double-2-rep [ DIVPD ] }
       } case drop ;
   
+ +M: x86 %div-vector-reps
+ +    {
+ +        { sse? { float-4-rep } }
+ +        { sse2? { double-2-rep } }
+ +    } available-reps ;
+ +
   M: x86 %min-vector ( dst src1 src2 rep -- )
       {
+ +        { char-16-rep [ PMINSB ] }
+ +        { uchar-16-rep [ PMINUB ] }
+ +        { short-8-rep [ PMINSW ] }
+ +        { ushort-8-rep [ PMINUW ] }
+ +        { int-4-rep [ PMINSD ] }
+ +        { uint-4-rep [ PMINUD ] }
           { float-4-rep [ MINPS ] }
           { double-2-rep [ MINPD ] }
       } case drop ;
   
+ +M: x86 %min-vector-reps
+ +    {
+ +        { sse? { float-4-rep } }
+ +        { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
+ +        { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
+ +    } available-reps ;
+ +
   M: x86 %max-vector ( dst src1 src2 rep -- )
       {
+ +        { char-16-rep [ PMAXSB ] }
+ +        { uchar-16-rep [ PMAXUB ] }
+ +        { short-8-rep [ PMAXSW ] }
+ +        { ushort-8-rep [ PMAXUW ] }
+ +        { int-4-rep [ PMAXSD ] }
+ +        { uint-4-rep [ PMAXUD ] }
           { float-4-rep [ MAXPS ] }
           { double-2-rep [ MAXPD ] }
       } case drop ;
   
+ +M: x86 %max-vector-reps
+ +    {
+ +        { sse? { float-4-rep } }
+ +        { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } }
+ +        { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
+ +    } available-reps ;
+ +
+ +M: x86 %horizontal-add-vector ( dst src rep -- )
+ +    {
+ +        { float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] }
+ +        { double-2-rep [ [ double-2-rep %copy ] [ HADDPD ] 2bi ] }
+ +    } case ;
+ +
+ +M: x86 %horizontal-add-vector-reps
+ +    {
+ +        { sse3? { float-4-rep double-2-rep } }
+ +    } available-reps ;
+ +
+ +M: x86 %abs-vector ( dst src rep -- )
+ +    {
+ +        { char-16-rep [ PABSB ] }
+ +        { short-8-rep [ PABSW ] }
+ +        { int-4-rep [ PABSD ] }
+ +    } case ;
+ +
+ +M: x86 %abs-vector-reps
+ +    {
+ +        { ssse3? { char-16-rep short-8-rep int-4-rep } }
+ +    } available-reps ;
+ +
   M: x86 %sqrt-vector ( dst src rep -- )
       {
           { float-4-rep [ SQRTPS ] }
           { double-2-rep [ SQRTPD ] }
       } case ;
   
- -M: x86 %horizontal-add-vector ( dst src rep -- )
+ +M: x86 %sqrt-vector-reps
       {
- -        { float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
- -        { double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
- -    } case ;
+ +        { sse? { float-4-rep } }
+ +        { sse2? { double-2-rep } }
+ +    } available-reps ;
+ +
+ +M: x86 %and-vector ( dst src1 src2 rep -- )
+ +    {
+ +        { float-4-rep [ ANDPS ] }
+ +        { double-2-rep [ ANDPD ] }
+ +        [ drop PAND ]
+ +    } case drop ;
+ +
+ +M: x86 %and-vector-reps
+ +    {
+ +        { sse? { float-4-rep } }
+ +        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ +    } available-reps ;
+ +
+ +M: x86 %or-vector ( dst src1 src2 rep -- )
+ +    {
+ +        { float-4-rep [ ORPS ] }
+ +        { double-2-rep [ ORPD ] }
+ +        [ drop POR ]
+ +    } case drop ;
+ +
+ +M: x86 %or-vector-reps
+ +    {
+ +        { sse? { float-4-rep } }
+ +        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ +    } available-reps ;
+ +
+ +M: x86 %xor-vector ( dst src1 src2 rep -- )
+ +    {
+ +        { float-4-rep [ XORPS ] }
+ +        { double-2-rep [ XORPD ] }
+ +        [ drop PXOR ]
+ +    } case drop ;
+ +
+ +M: x86 %xor-vector-reps
+ +    {
+ +        { sse? { float-4-rep } }
+ +        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ +    } available-reps ;
+ +
+ +M: x86 %shl-vector ( dst src1 src2 rep -- )
+ +    {
+ +        { short-8-rep [ PSLLW ] }
+ +        { ushort-8-rep [ PSLLW ] }
+ +        { int-4-rep [ PSLLD ] }
+ +        { uint-4-rep [ PSLLD ] }
+ +        { longlong-2-rep [ PSLLQ ] }
+ +        { ulonglong-2-rep [ PSLLQ ] }
+ +    } case drop ;
+ +
+ +M: x86 %shl-vector-reps
+ +    {
+ +        { sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ +    } available-reps ;
+ +
+ +M: x86 %shr-vector ( dst src1 src2 rep -- )
+ +    {
+ +        { short-8-rep [ PSRAW ] }
+ +        { ushort-8-rep [ PSRLW ] }
+ +        { int-4-rep [ PSRAD ] }
+ +        { uint-4-rep [ PSRLD ] }
+ +        { ulonglong-2-rep [ PSRLQ ] }
+ +    } case drop ;
+ +
+ +M: x86 %shr-vector-reps
+ +    {
+ +        { sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep ulonglong-2-rep } }
+ +    } available-reps ;
+ +
+ +M: x86 %integer>scalar drop MOVD ;
+ +
+ +M: x86 %scalar>integer drop MOVD ;
   
   M: x86 %unbox-alien ( dst src -- )
       alien-offset [+] MOV ;
@@@ -670,6 -448,9 +666,6 @@@ M: x86.64 has-small-reg? 2drop t 
           [ quot call ] with-save/restore
       ] if ; inline
   
- -: ?MOV ( dst src -- )
- -    2dup = [ 2drop ] [ MOV ] if ; inline
- -
   M:: x86 %string-nth ( dst src index temp -- )
       ! We request a small-reg of size 8 since those of size 16 are
       ! a superset.
@@@ -697,12 -478,12 +693,12 @@@
           ! Compute code point
           new-dst temp XOR
           "end" resolve-label
- -        dst new-dst ?MOV
+ +        dst new-dst int-rep %copy
       ] with-small-register ;
   
   M:: x86 %set-string-nth-fast ( ch str index temp -- )
       ch { index str temp } 8 [| new-ch |
- -        new-ch ch ?MOV
+ +        new-ch ch int-rep %copy
           temp str index [+] LEA
           temp string-offset [+] new-ch 8-bit-version-of MOV
       ] with-small-register ;
@@@ -711,7 -492,7 +707,7 @@@
       dst { src } size [| new-dst |
           new-dst dup size n-bit-version-of dup src [] MOV
           quot call
- -        dst new-dst ?MOV
+ +        dst new-dst int-rep %copy
       ] with-small-register ; inline
   
   : %alien-unsigned-getter ( dst src size -- )
@@@ -731,11 -512,11 +727,11 @@@ M: x86 %alien-signed-4 32 %alien-signed
   M: x86 %alien-cell [] MOV ;
   M: x86 %alien-float [] MOVSS ;
   M: x86 %alien-double [] MOVSD ;
- -M: x86 %alien-vector [ [] ] dip copy-register ;
+ +M: x86 %alien-vector [ [] ] dip %copy ;
   
   :: %alien-integer-setter ( ptr value size -- )
       value { ptr } size [| new-value |
- -        new-value value ?MOV
+ +        new-value value int-rep %copy
           ptr [] new-value size n-bit-version-of MOV
       ] with-small-register ; inline
   
@@@ -745,7 -526,7 +741,7 @@@ M: x86 %set-alien-integer-4 32 %alien-i
   M: x86 %set-alien-cell [ [] ] dip MOV ;
   M: x86 %set-alien-float [ [] ] dip MOVSS ;
   M: x86 %set-alien-double [ [] ] dip MOVSD ;
- -M: x86 %set-alien-vector [ [] ] 2dip copy-register ;
+ +M: x86 %set-alien-vector [ [] ] 2dip %copy ;
   
   : shift-count? ( reg -- ? ) { ECX RCX } memq? ;
   
@@@ -823,6 -604,14 +819,6 @@@ M: x86 %save-gc-root ( gc-root registe
   
   M: x86 %load-gc-root ( gc-root register -- ) swap gc-root@ MOV ;
   
- -M:: x86 %call-gc ( gc-root-count -- )
- -    ! Pass pointer to start of GC roots as first parameter
- -    param-reg-1 gc-root-base param@ LEA
- -    ! Pass number of roots as second parameter
- -    param-reg-2 gc-root-count MOV
- -    ! Call GC
- -    "inline_gc" %vm-invoke-3rd-arg ; 
- -
   M: x86 %alien-global ( dst symbol library -- )
       [ 0 MOV ] 2dip rc-absolute-cell rel-dlsym ;    
   
@@@ -942,10 -731,10 +938,10 @@@ M: x86 %compare-float-unordered-branch 
       \ UCOMISD (%compare-float-branch) ;
   
   M:: x86 %spill ( src rep n -- )
- -    n spill@ src rep copy-register ;
+ +    n spill@ src rep %copy ;
   
   M:: x86 %reload ( dst rep n -- )
- -    dst n spill@ rep copy-register ;
+ +    dst n spill@ rep %copy ;
   
   M: x86 %loop-entry 16 code-alignment [ NOP ] times ;
   
@@@ -974,29 -763,15 +970,29 @@@ M: x86 small-enough? ( n -- ? 
       #! set up by the caller.
       stack-frame get total-size>> + stack@ ;
   
- -: enable-sse2 ( -- )
- -    enable-float-intrinsics
- -    enable-fsqrt
- -    enable-float-min/max
- -    enable-sse2-simd ;
- -
- -: enable-sse3 ( -- )
- -    enable-sse2
- -    enable-sse3-simd ;
- -
+ +enable-simd
   enable-min/max
   enable-fixnum-log2
+ +
+ +:: install-sse2-check ( -- )
+ +    [
+ +        sse-version 20 < [
+ +            "This image was built to use SSE2 but your CPU does not support it." print
+ +            "You will need to bootstrap Factor again." print
+ +            flush
+ +            1 exit
+ +        ] when
+ +    ] "cpu.x86" add-init-hook ;
+ +
+ +: enable-sse2 ( version -- )
+ +    20 >= [
+ +        enable-float-intrinsics
+ +        enable-fsqrt
+ +        enable-float-min/max
+ +        install-sse2-check
+ +    ] when ;
+ +
+ +: check-sse ( -- )
+ +    [ { sse_version } compile ] with-optimizer
+ +    "Checking for multimedia extensions: " write sse-version
+ +    [ sse-string write " detected" print ] [ enable-sse2 ] bi ;
author	sheeple <sheeple@oberon.local>
	Sat, 26 Sep 2009 08:12:42 +0000 (03:12 -0500)
committer	sheeple <sheeple@oberon.local>
	Sat, 26 Sep 2009 08:12:42 +0000 (03:12 -0500)
		1	2
basis/compiler/cfg/instructions/instructions.factor	patch \|	diff1 \|	diff2 \|	blob \| history
basis/cpu/architecture/architecture.factor	patch \|	diff1 \|	diff2 \|	blob \| history
basis/cpu/ppc/ppc.factor	patch \|	diff1 \|	diff2 \|	blob \| history
basis/cpu/x86/x86.factor	patch \|	diff1 \|	diff2 \|	blob \| history