More SIMD work

author Slava Pestov <slava@slava-pestovs-macbook-pro.local>

Fri, 4 Sep 2009 01:58:56 +0000 (20:58 -0500)

committer Slava Pestov <slava@slava-pestovs-macbook-pro.local>

Fri, 4 Sep 2009 01:58:56 +0000 (20:58 -0500)
author Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Fri, 4 Sep 2009 01:58:56 +0000 (20:58 -0500)
committer Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Fri, 4 Sep 2009 01:58:56 +0000 (20:58 -0500)
diff --git a/basis/alien/c-types/c-types.factor b/basis/alien/c-types/c-types.factor

index ac0bbf68b3c489e45e821cbd247597470f46d5f4..d087296c3e793f05bbbf10aeacf98ee0ab78f21f 100755 (executable)
--- a/basis/alien/c-types/c-types.factor
+++ b/basis/alien/c-types/c-types.factor
@@ -549,7 +549,7 @@ CONSTANT: primitive-types
          4 >>align
          "box_float" >>boxer
          "to_float" >>unboxer
-        single-float-rep >>rep
+        float-rep >>rep
          [ >float ] >>unboxer-quot
          "float" set-array-class
      "float" define-primitive-type
@@ -563,7 +563,7 @@ CONSTANT: primitive-types
          8 >>align
          "box_double" >>boxer
          "to_double" >>unboxer
-        double-float-rep >>rep
+        double-rep >>rep
          [ >float ] >>unboxer-quot
          "double" set-array-class
      "double" define-primitive-type
diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor

index a9c2b4e2c0803dea2fabb4a8204a4b767ab5a8fb..5a6c2a710650b1a7111465bcca75b71a9ea4824f 100644 (file)
--- a/basis/compiler/cfg/instructions/instructions.factor
+++ b/basis/compiler/cfg/instructions/instructions.factor
@@ -207,135 +207,135 @@ temp: temp/int-rep ;
  
  ! Float arithmetic
  PURE-INSN: ##unbox-float
-def: dst/double-float-rep
+def: dst/double-rep
  use: src/int-rep ;
  
  PURE-INSN: ##box-float
  def: dst/int-rep
-use: src/double-float-rep
+use: src/double-rep
  temp: temp/int-rep ;
  
  PURE-INSN: ##add-float
-def: dst/double-float-rep
-use: src1/double-float-rep src2/double-float-rep ;
+def: dst/double-rep
+use: src1/double-rep src2/double-rep ;
  
  PURE-INSN: ##sub-float
-def: dst/double-float-rep
-use: src1/double-float-rep src2/double-float-rep ;
+def: dst/double-rep
+use: src1/double-rep src2/double-rep ;
  
  PURE-INSN: ##mul-float
-def: dst/double-float-rep
-use: src1/double-float-rep src2/double-float-rep ;
+def: dst/double-rep
+use: src1/double-rep src2/double-rep ;
  
  PURE-INSN: ##div-float
-def: dst/double-float-rep
-use: src1/double-float-rep src2/double-float-rep ;
+def: dst/double-rep
+use: src1/double-rep src2/double-rep ;
  
  PURE-INSN: ##min-float
-def: dst/double-float-rep
-use: src1/double-float-rep src2/double-float-rep ;
+def: dst/double-rep
+use: src1/double-rep src2/double-rep ;
  
  PURE-INSN: ##max-float
-def: dst/double-float-rep
-use: src1/double-float-rep src2/double-float-rep ;
+def: dst/double-rep
+use: src1/double-rep src2/double-rep ;
  
  PURE-INSN: ##sqrt
-def: dst/double-float-rep
-use: src/double-float-rep ;
+def: dst/double-rep
+use: src/double-rep ;
  
  ! libc intrinsics
  PURE-INSN: ##unary-float-function
-def: dst/double-float-rep
-use: src/double-float-rep
+def: dst/double-rep
+use: src/double-rep
  literal: func ;
  
  PURE-INSN: ##binary-float-function
-def: dst/double-float-rep
-use: src1/double-float-rep src2/double-float-rep
+def: dst/double-rep
+use: src1/double-rep src2/double-rep
  literal: func ;
  
  ! Single/double float conversion
  PURE-INSN: ##single>double-float
-def: dst/double-float-rep
-use: src/single-float-rep ;
+def: dst/double-rep
+use: src/float-rep ;
  
  PURE-INSN: ##double>single-float
-def: dst/single-float-rep
-use: src/double-float-rep ;
+def: dst/float-rep
+use: src/double-rep ;
  
  ! Float/integer conversion
  PURE-INSN: ##float>integer
  def: dst/int-rep
-use: src/double-float-rep ;
+use: src/double-rep ;
  
  PURE-INSN: ##integer>float
-def: dst/double-float-rep
+def: dst/double-rep
  use: src/int-rep ;
  
  ! SIMD operations
  
-INSN: ##box-vector
+PURE-INSN: ##box-vector
  def: dst/int-rep
  use: src
  literal: rep
  temp: temp/int-rep ;
  
-INSN: ##unbox-vector
+PURE-INSN: ##unbox-vector
  def: dst
  use: src/int-rep
  literal: rep ;
  
-INSN: ##broadcast-vector
+PURE-INSN: ##broadcast-vector
  def: dst
  use: src/scalar-rep
  literal: rep ;
  
-INSN: ##gather-vector-2
+PURE-INSN: ##gather-vector-2
  def: dst
  use: src1/scalar-rep src2/scalar-rep
  literal: rep ;
  
-INSN: ##gather-vector-4
+PURE-INSN: ##gather-vector-4
  def: dst
  use: src1/scalar-rep src2/scalar-rep src3/scalar-rep src4/scalar-rep
  literal: rep ;
  
-INSN: ##add-vector
+PURE-INSN: ##add-vector
  def: dst
  use: src1 src2
  literal: rep ;
  
-INSN: ##sub-vector
+PURE-INSN: ##sub-vector
  def: dst
  use: src1 src2
  literal: rep ;
  
-INSN: ##mul-vector
+PURE-INSN: ##mul-vector
  def: dst
  use: src1 src2
  literal: rep ;
  
-INSN: ##div-vector
+PURE-INSN: ##div-vector
  def: dst
  use: src1 src2
  literal: rep ;
  
-INSN: ##min-vector
+PURE-INSN: ##min-vector
  def: dst
  use: src1 src2
  literal: rep ;
  
-INSN: ##max-vector
+PURE-INSN: ##max-vector
  def: dst
  use: src1 src2
  literal: rep ;
  
-INSN: ##sqrt-vector
+PURE-INSN: ##sqrt-vector
  def: dst
  use: src
  literal: rep ;
  
-INSN: ##horizontal-add-vector
+PURE-INSN: ##horizontal-add-vector
  def: dst/scalar-rep
  use: src
  literal: rep ;
@@ -399,11 +399,11 @@ def: dst/int-rep
  use: src/int-rep ;
  
  INSN: ##alien-float
-def: dst/single-float-rep
+def: dst/float-rep
  use: src/int-rep ;
  
  INSN: ##alien-double
-def: dst/double-float-rep
+def: dst/double-rep
  use: src/int-rep ;
  
  INSN: ##alien-vector
@@ -424,10 +424,10 @@ INSN: ##set-alien-cell
  use: src/int-rep value/int-rep ;
  
  INSN: ##set-alien-float
-use: src/int-rep value/single-float-rep ;
+use: src/int-rep value/float-rep ;
  
  INSN: ##set-alien-double
-use: src/int-rep value/double-float-rep ;
+use: src/int-rep value/double-rep ;
  
  INSN: ##set-alien-vector
  use: src/int-rep value
@@ -494,12 +494,12 @@ literal: cc
  temp: temp/int-rep ;
  
  INSN: ##compare-float-branch
-use: src1/double-float-rep src2/double-float-rep
+use: src1/double-rep src2/double-rep
  literal: cc ;
  
  PURE-INSN: ##compare-float
  def: dst/int-rep
-use: src1/double-float-rep src2/double-float-rep
+use: src1/double-rep src2/double-rep
  literal: cc
  temp: temp/int-rep ;
  
diff --git a/basis/compiler/cfg/intrinsics/alien/alien.factor b/basis/compiler/cfg/intrinsics/alien/alien.factor

index 440a453173661c9d9f7fe0b0750175e3b8169787..e9fe8965023abc36ae987ddd4374c9af37cd0826 100644 (file)
--- a/basis/compiler/cfg/intrinsics/alien/alien.factor
+++ b/basis/compiler/cfg/intrinsics/alien/alien.factor
@@ -98,15 +98,15 @@ IN: compiler.cfg.intrinsics.alien
  : emit-alien-float-getter ( node rep -- )
      '[
          _ {
-            { single-float-rep [ ^^alien-float ] }
-            { double-float-rep [ ^^alien-double ] }
+            { float-rep [ ^^alien-float ] }
+            { double-rep [ ^^alien-double ] }
          } case
      ] inline-alien-getter ;
  
  : emit-alien-float-setter ( node rep -- )
      '[
          _ {
-            { single-float-rep [ ##set-alien-float ] }
-            { double-float-rep [ ##set-alien-double ] }
+            { float-rep [ ##set-alien-float ] }
+            { double-rep [ ##set-alien-double ] }
          } case
      ] inline-alien-float-setter ;
diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor

index caa322fc4616a22bd8ec16e9d1e13b12f9f79f5d..612e9dcdc44d47563c806c856231d4b9b69e2106 100644 (file)
--- a/basis/compiler/cfg/intrinsics/intrinsics.factor
+++ b/basis/compiler/cfg/intrinsics/intrinsics.factor
@@ -93,10 +93,10 @@ IN: compiler.cfg.intrinsics
          { math.private:float= [ drop cc= emit-float-comparison ] }
          { math.private:float>fixnum [ drop emit-float>fixnum ] }
          { math.private:fixnum>float [ drop emit-fixnum>float ] }
-        { alien.accessors:alien-float [ single-float-rep emit-alien-float-getter ] }
-        { alien.accessors:set-alien-float [ single-float-rep emit-alien-float-setter ] }
-        { alien.accessors:alien-double [ double-float-rep emit-alien-float-getter ] }
-        { alien.accessors:set-alien-double [ double-float-rep emit-alien-float-setter ] }
+        { alien.accessors:alien-float [ float-rep emit-alien-float-getter ] }
+        { alien.accessors:set-alien-float [ float-rep emit-alien-float-setter ] }
+        { alien.accessors:alien-double [ double-rep emit-alien-float-getter ] }
+        { alien.accessors:set-alien-double [ double-rep emit-alien-float-setter ] }
      } enable-intrinsics ;
  
  : enable-fsqrt ( -- )
diff --git a/basis/compiler/cfg/linear-scan/linear-scan-tests.factor b/basis/compiler/cfg/linear-scan/linear-scan-tests.factor

index 062c62adab6b97045aa923848f80c672bd24a516..f09fe403e66a691a982650a059e00716a8d305bf 100644 (file)
--- a/basis/compiler/cfg/linear-scan/linear-scan-tests.factor
+++ b/basis/compiler/cfg/linear-scan/linear-scan-tests.factor
@@ -80,9 +80,9 @@ cfg new 0 >>spill-area-size cfg set
  H{ } spill-slots set
  
  H{
-    { 1 single-float-rep }
-    { 2 single-float-rep }
-    { 3 single-float-rep }
+    { 1 float-rep }
+    { 2 float-rep }
+    { 3 float-rep }
  } representations set
  
  [
diff --git a/basis/compiler/cfg/representations/representations-tests.factor b/basis/compiler/cfg/representations/representations-tests.factor

index 29f0fa064ffbd7c4c477948cd1c21e7888b8e980..c50cfc4c86d4678798af618b6e49c52931a12cdc 100644 (file)
--- a/basis/compiler/cfg/representations/representations-tests.factor
+++ b/basis/compiler/cfg/representations/representations-tests.factor
@@ -3,7 +3,7 @@ compiler.cfg.registers compiler.cfg.instructions
  compiler.cfg.representations.preferred ;
  IN: compiler.cfg.representations
  
-[ { double-float-rep double-float-rep } ] [
+[ { double-rep double-rep } ] [
      T{ ##add-float
         { dst 5 }
         { src1 3 }
@@ -11,7 +11,7 @@ IN: compiler.cfg.representations
      } uses-vreg-reps
  ] unit-test
  
-[ double-float-rep ] [
+[ double-rep ] [
      T{ ##alien-double
         { dst 5 }
         { src 3 }
diff --git a/basis/compiler/cfg/representations/representations.factor b/basis/compiler/cfg/representations/representations.factor

index 4c417442ea229a7dd37248d313c73278340d9ad9..6fe28caf817682fb8f906fcdb8d508fedaefbcb2 100644 (file)
--- a/basis/compiler/cfg/representations/representations.factor
+++ b/basis/compiler/cfg/representations/representations.factor
@@ -22,21 +22,21 @@ ERROR: bad-conversion dst src dst-rep src-rep ;
  GENERIC: emit-box ( dst src rep -- )
  GENERIC: emit-unbox ( dst src rep -- )
  
-M: single-float-rep emit-box
+M: float-rep emit-box
      drop
-    [ double-float-rep next-vreg-rep dup ] dip ##single>double-float
+    [ double-rep next-vreg-rep dup ] dip ##single>double-float
      int-rep next-vreg-rep ##box-float ;
  
-M: single-float-rep emit-unbox
+M: float-rep emit-unbox
      drop
-    [ double-float-rep next-vreg-rep dup ] dip ##unbox-float
+    [ double-rep next-vreg-rep dup ] dip ##unbox-float
      ##double>single-float ;
  
-M: double-float-rep emit-box
+M: double-rep emit-box
      drop
      int-rep next-vreg-rep ##box-float ;
  
-M: double-float-rep emit-unbox
+M: double-rep emit-unbox
      drop ##unbox-float ;
  
  M: vector-rep emit-box
@@ -52,8 +52,8 @@ M: vector-rep emit-unbox
          { [ over int-rep eq? ] [ nip emit-box ] }
          [
              2array {
-                { { double-float-rep single-float-rep } [ ##single>double-float ] }
-                { { single-float-rep double-float-rep } [ ##double>single-float ] }
+                { { double-rep float-rep } [ ##single>double-float ] }
+                { { float-rep double-rep } [ ##double>single-float ] }
                  [ first2 bad-conversion ]
              } case
          ]
diff --git a/basis/compiler/cfg/two-operand/two-operand-tests.factor b/basis/compiler/cfg/two-operand/two-operand-tests.factor

index 09d88a29598c676fe569f66f3eac837821ee239a..41094cfac41f4e9f9e657b2004e5e1144edb2988 100644 (file)
--- a/basis/compiler/cfg/two-operand/two-operand-tests.factor
+++ b/basis/compiler/cfg/two-operand/two-operand-tests.factor
@@ -22,14 +22,14 @@ IN: compiler.cfg.two-operand.tests
  
  [
      V{
-        T{ ##copy f 1 2 double-float-rep }
+        T{ ##copy f 1 2 double-rep }
          T{ ##sub-float f 1 1 3 }
      }
  ] [
      H{
-        { 1 double-float-rep }
-        { 2 double-float-rep }
-        { 3 double-float-rep }
+        { 1 double-rep }
+        { 2 double-rep }
+        { 3 double-rep }
      } clone representations set
      {
          T{ ##sub-float f 1 2 3 }
@@ -38,13 +38,13 @@ IN: compiler.cfg.two-operand.tests
  
  [
      V{
-        T{ ##copy f 1 2 double-float-rep }
+        T{ ##copy f 1 2 double-rep }
          T{ ##mul-float f 1 1 1 }
      }
  ] [
      H{
-        { 1 double-float-rep }
-        { 2 double-float-rep }
+        { 1 double-rep }
+        { 2 double-rep }
      } clone representations set
      {
          T{ ##mul-float f 1 2 2 }
diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor

index 8cbede2d4824f2a001176dc49cbb01b8e5c7533d..51f304b76306ed83827b998ac057d33c40aef44f 100755 (executable)
--- a/basis/compiler/codegen/codegen.factor
+++ b/basis/compiler/codegen/codegen.factor
@@ -279,10 +279,10 @@ GENERIC: next-fastcall-param ( rep -- )
  M: int-rep next-fastcall-param
      int-regs inc [ ?dummy-stack-params ] [ ?dummy-fp-params ] bi ;
  
-M: single-float-rep next-fastcall-param
+M: float-rep next-fastcall-param
      float-regs inc [ ?dummy-stack-params ] [ ?dummy-int-params ] bi ;
  
-M: double-float-rep next-fastcall-param
+M: double-rep next-fastcall-param
      float-regs inc [ ?dummy-stack-params ] [ ?dummy-int-params ] bi ;
  
  GENERIC: reg-class-full? ( reg-class -- ? )
diff --git a/basis/compiler/tests/low-level-ir.factor b/basis/compiler/tests/low-level-ir.factor

index d67aaef43b92621a5c5292934216ad98a3eca47d..e2fc26e94bea23d842c5b2f27b174d63a64a31ac 100644 (file)
--- a/basis/compiler/tests/low-level-ir.factor
+++ b/basis/compiler/tests/low-level-ir.factor
@@ -53,7 +53,7 @@ IN: compiler.tests.low-level-ir
          V{
              T{ ##load-reference f 4 1.5 }
              T{ ##unbox-float f 1 4 }
-            T{ ##copy f 2 1 double-float-rep }
+            T{ ##copy f 2 1 double-rep }
              T{ ##box-float f 3 2 }
              T{ ##copy f 0 3 int-rep }
          } compile-test-bb
diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor

index 9f63e747b97b8c713e69e1f877d00271af2bc14a..47cfd74de220f309e8d222a52de450e192f752ae 100644 (file)
--- a/basis/compiler/tree/propagation/simd/simd.factor
+++ b/basis/compiler/tree/propagation/simd/simd.factor
@@ -22,8 +22,8 @@ IN: compiler.tree.propagation.simd
  \ (simd-sum) [
      nip dup literal?>> [
          literal>> scalar-rep-of {
-            { single-float-rep [ float ] }
-            { double-float-rep [ float ] }
+            { float-rep [ float ] }
+            { double-rep [ float ] }
          } case
      ] [ drop real ] if
      <class-info>
diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor

index eaa4bc394a3b9da8dcf9a62480e7a6e33d43bb6b..283e37d4150a0b2e2f09e855213f3e9acd3fc098 100644 (file)
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@@ -18,34 +18,35 @@ SINGLETONS: tagged-rep int-rep ;
  
  ! Floating point registers can contain data with
  ! one of these representations
-SINGLETONS: single-float-rep double-float-rep ;
+SINGLETONS: float-rep double-rep ;
  
+! On x86, floating point registers are really vector registers
  SINGLETONS:
-4float-array-rep
-2double-array-rep
-16char-array-rep
-16uchar-array-rep
-8short-array-rep
-8ushort-array-rep
-4int-array-rep
-4uint-array-rep ;
+float-4-rep
+double-2-rep
+char-16-rep
+uchar-16-rep
+short-8-rep
+ushort-8-rep
+int-4-rep
+uint-4-rep ;
  
  UNION: vector-rep
-4float-array-rep
-2double-array-rep
-16char-array-rep
-16uchar-array-rep
-8short-array-rep
-8ushort-array-rep
-4int-array-rep
-4uint-array-rep ;
+float-4-rep
+double-2-rep
+char-16-rep
+uchar-16-rep
+short-8-rep
+ushort-8-rep
+int-4-rep
+uint-4-rep ;
  
  UNION: representation
  any-rep
  tagged-rep
  int-rep
-single-float-rep
-double-float-rep
+float-rep
+double-rep
  vector-rep ;
  
  ! Register classes
@@ -61,8 +62,8 @@ GENERIC: reg-class-of ( rep -- reg-class )
  
  M: tagged-rep reg-class-of drop int-regs ;
  M: int-rep reg-class-of drop int-regs ;
-M: single-float-rep reg-class-of drop float-regs ;
-M: double-float-rep reg-class-of drop float-regs ;
+M: float-rep reg-class-of drop float-regs ;
+M: double-rep reg-class-of drop float-regs ;
  M: vector-rep reg-class-of drop float-regs ;
  M: stack-params reg-class-of drop stack-params ;
  
@@ -70,15 +71,15 @@ GENERIC: rep-size ( rep -- n )
  
  M: tagged-rep rep-size drop cell ;
  M: int-rep rep-size drop cell ;
-M: single-float-rep rep-size drop 4 ;
-M: double-float-rep rep-size drop 8 ;
+M: float-rep rep-size drop 4 ;
+M: double-rep rep-size drop 8 ;
  M: stack-params rep-size drop cell ;
  M: vector-rep rep-size drop 16 ;
  
  GENERIC: scalar-rep-of ( rep -- rep' )
  
-M: 4float-array-rep scalar-rep-of drop single-float-rep ;
-M: 2double-array-rep scalar-rep-of drop double-float-rep ;
+M: float-4-rep scalar-rep-of drop float-rep ;
+M: double-2-rep scalar-rep-of drop double-rep ;
  
  ! Mapping from register class to machine registers
  HOOK: machine-registers cpu ( -- assoc )
diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor

index 26bdae74e4a75e424d44bd372c17356b9a24d4d7..c0961734a2e95357dfb517aeae07f77b52c9c09c 100644 (file)
--- a/basis/cpu/ppc/ppc.factor
+++ b/basis/cpu/ppc/ppc.factor
@@ -272,7 +272,7 @@ M:: ppc %float>integer ( dst src -- )
  M: ppc %copy ( dst src rep -- )
      {
          { int-rep [ MR ] }
-        { double-float-rep [ FMR ] }
+        { double-rep [ FMR ] }
      } case ;
  
  M: ppc %unbox-float ( dst src -- ) float-offset LFD ;
@@ -535,8 +535,8 @@ M: ppc %compare-float-branch [ (%compare-float) ] 2dip %branch ;
  : load-from-frame ( dst n rep -- )
      {
          { int-rep [ [ 1 ] dip LWZ ] }
-        { single-float-rep [ [ 1 ] dip LFS ] }
-        { double-float-rep [ [ 1 ] dip LFD ] }
+        { float-rep [ [ 1 ] dip LFS ] }
+        { double-rep [ [ 1 ] dip LFD ] }
          { stack-params [ [ 0 1 ] dip LWZ [ 0 1 ] dip param@ STW ] }
      } case ;
  
@@ -545,8 +545,8 @@ M: ppc %compare-float-branch [ (%compare-float) ] 2dip %branch ;
  : store-to-frame ( src n rep -- )
      {
          { int-rep [ [ 1 ] dip STW ] }
-        { single-float-rep [ [ 1 ] dip STFS ] }
-        { double-float-rep [ [ 1 ] dip STFD ] }
+        { float-rep [ [ 1 ] dip STFS ] }
+        { double-rep [ [ 1 ] dip STFD ] }
          { stack-params [ [ [ 0 1 ] dip next-param@ LWZ 0 1 ] dip STW ] }
      } case ;
  
diff --git a/basis/cpu/x86/32/32.factor b/basis/cpu/x86/32/32.factor

index 2ed461927c7b1a081cc02dc506df410e0e3a98e3..ce1993ece49e322e11238fe01fd07071b61a2c92 100755 (executable)
--- a/basis/cpu/x86/32/32.factor
+++ b/basis/cpu/x86/32/32.factor
@@ -70,13 +70,13 @@ M: int-rep push-return-reg drop EAX PUSH ;
  M: int-rep load-return-reg drop EAX swap next-stack@ MOV ;
  M: int-rep store-return-reg drop stack@ EAX MOV ;
  
-M: single-float-rep push-return-reg drop ESP 4 SUB ESP [] FSTPS ;
-M: single-float-rep load-return-reg drop next-stack@ FLDS ;
-M: single-float-rep store-return-reg drop stack@ FSTPS ;
+M: float-rep push-return-reg drop ESP 4 SUB ESP [] FSTPS ;
+M: float-rep load-return-reg drop next-stack@ FLDS ;
+M: float-rep store-return-reg drop stack@ FSTPS ;
  
-M: double-float-rep push-return-reg drop ESP 8 SUB ESP [] FSTPL ;
-M: double-float-rep load-return-reg drop next-stack@ FLDL ;
-M: double-float-rep store-return-reg drop stack@ FSTPL ;
+M: double-rep push-return-reg drop ESP 8 SUB ESP [] FSTPL ;
+M: double-rep load-return-reg drop next-stack@ FLDL ;
+M: double-rep store-return-reg drop stack@ FSTPL ;
  
  : align-sub ( n -- )
      [ align-stack ] keep - decr-stack-reg ;
diff --git a/basis/cpu/x86/64/64.factor b/basis/cpu/x86/64/64.factor

index df9287162cfcea7d030d569e65cf0e0ab7a6ca94..7cfcb7c5574c3f39a101dd25dd66a263b03da910 100644 (file)
--- a/basis/cpu/x86/64/64.factor
+++ b/basis/cpu/x86/64/64.factor
@@ -201,7 +201,7 @@ M: x86.64 %callback-value ( ctype -- )
      [ float-regs param-regs nth ] [ n>> spill@ ] bi* MOVSD ;
  
  : float-function-return ( reg -- )
-    float-regs return-reg double-float-rep copy-register ;
+    float-regs return-reg double-rep copy-register ;
  
  M:: x86.64 %unary-float-function ( dst src func -- )
      0 src float-function-param
diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor

index 602dfd65412f28bd12adfea2dbfef256861513b5..ff658f0b6032fa28593bbb2c46547ae94f00c1ce 100644 (file)
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@@ -134,10 +134,10 @@ GENERIC: copy-register* ( dst src rep -- )
  
  M: int-rep copy-register* drop MOV ;
  M: tagged-rep copy-register* drop MOV ;
-M: single-float-rep copy-register* drop MOVSS ;
-M: double-float-rep copy-register* drop MOVSD ;
-M: 4float-array-rep copy-register* drop MOVUPS ;
-M: 2double-array-rep copy-register* drop MOVUPD ;
+M: float-rep copy-register* drop MOVSS ;
+M: double-rep copy-register* drop MOVSD ;
+M: float-4-rep copy-register* drop MOVUPS ;
+M: double-2-rep copy-register* drop MOVUPD ;
  M: vector-rep copy-register* drop MOVDQU ;
  
  : copy-register ( dst src rep -- )
@@ -251,14 +251,14 @@ M:: x86 %unbox-vector ( dst src rep -- )
  
  M: x86 %broadcast-vector ( dst src rep -- )
      {
-        { 4float-array-rep [ [ MOVAPS ] [ drop dup 0 SHUFPS ] 2bi ] }
-        { 2double-array-rep [ [ MOVAPD ] [ drop dup 0 SHUFPD ] 2bi ] }
+        { float-4-rep [ [ MOVAPS ] [ drop dup 0 SHUFPS ] 2bi ] }
+        { double-2-rep [ [ MOVAPD ] [ drop dup 0 SHUFPD ] 2bi ] }
      } case ;
  
  M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
      rep {
          {
-            4float-array-rep
+            float-4-rep
              [
                  dst src1 MOVSS
                  dst src2 UNPCKLPS
@@ -271,7 +271,7 @@ M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
  M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
      rep {
          {
-            2double-array-rep
+            double-2-rep
              [
                  dst src1 MOVAPD
                  dst src2 0 SHUFPD
@@ -281,63 +281,63 @@ M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
  
  M: x86 %add-vector ( dst src1 src2 rep -- )
      {
-        { 4float-array-rep [ ADDPS ] }
-        { 2double-array-rep [ ADDPD ] }
-        { 16char-array-rep [ PADDB ] }
-        { 16uchar-array-rep [ PADDB ] }
-        { 8short-array-rep [ PADDW ] }
-        { 8ushort-array-rep [ PADDW ] }
-        { 4int-array-rep [ PADDD ] }
-        { 4uint-array-rep [ PADDD ] }
+        { float-4-rep [ ADDPS ] }
+        { double-2-rep [ ADDPD ] }
+        { char-16-rep [ PADDB ] }
+        { uchar-16-rep [ PADDB ] }
+        { short-8-rep [ PADDW ] }
+        { ushort-8-rep [ PADDW ] }
+        { int-4-rep [ PADDD ] }
+        { uint-4-rep [ PADDD ] }
      } case drop ;
  
  M: x86 %sub-vector ( dst src1 src2 rep -- )
      {
-        { 4float-array-rep [ SUBPS ] }
-        { 2double-array-rep [ SUBPD ] }
-        { 16char-array-rep [ PSUBB ] }
-        { 16uchar-array-rep [ PSUBB ] }
-        { 8short-array-rep [ PSUBW ] }
-        { 8ushort-array-rep [ PSUBW ] }
-        { 4int-array-rep [ PSUBD ] }
-        { 4uint-array-rep [ PSUBD ] }
+        { float-4-rep [ SUBPS ] }
+        { double-2-rep [ SUBPD ] }
+        { char-16-rep [ PSUBB ] }
+        { uchar-16-rep [ PSUBB ] }
+        { short-8-rep [ PSUBW ] }
+        { ushort-8-rep [ PSUBW ] }
+        { int-4-rep [ PSUBD ] }
+        { uint-4-rep [ PSUBD ] }
      } case drop ;
  
  M: x86 %mul-vector ( dst src1 src2 rep -- )
      {
-        { 4float-array-rep [ MULPS ] }
-        { 2double-array-rep [ MULPD ] }
-        { 4int-array-rep [ PMULLW ] }
+        { float-4-rep [ MULPS ] }
+        { double-2-rep [ MULPD ] }
+        { int-4-rep [ PMULLW ] }
      } case drop ;
  
  M: x86 %div-vector ( dst src1 src2 rep -- )
      {
-        { 4float-array-rep [ DIVPS ] }
-        { 2double-array-rep [ DIVPD ] }
+        { float-4-rep [ DIVPS ] }
+        { double-2-rep [ DIVPD ] }
      } case drop ;
  
  M: x86 %min-vector ( dst src1 src2 rep -- )
      {
-        { 4float-array-rep [ MINPS ] }
-        { 2double-array-rep [ MINPD ] }
+        { float-4-rep [ MINPS ] }
+        { double-2-rep [ MINPD ] }
      } case drop ;
  
  M: x86 %max-vector ( dst src1 src2 rep -- )
      {
-        { 4float-array-rep [ MAXPS ] }
-        { 2double-array-rep [ MAXPD ] }
+        { float-4-rep [ MAXPS ] }
+        { double-2-rep [ MAXPD ] }
      } case drop ;
  
  M: x86 %sqrt-vector ( dst src rep -- )
      {
-        { 4float-array-rep [ SQRTPS ] }
-        { 2double-array-rep [ SQRTPD ] }
+        { float-4-rep [ SQRTPS ] }
+        { double-2-rep [ SQRTPD ] }
      } case ;
  
  M: x86 %horizontal-add-vector ( dst src rep -- )
      {
-        { 4float-array-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
-        { 2double-array-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
+        { float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
+        { double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
      } case ;
  
  M:: x86 %unbox-any-c-ptr ( dst src temp -- )
diff --git a/basis/math/vectors/simd/alien/alien-tests.factor b/basis/math/vectors/simd/alien/alien-tests.factor

index afdada3c8e3dbae862380fbe07f41c8b6a6bdd15..52b8db9bd1d9f0be38e97229f23e7e8b08925a48 100644 (file)
--- a/basis/math/vectors/simd/alien/alien-tests.factor
+++ b/basis/math/vectors/simd/alien/alien-tests.factor
@@ -2,14 +2,14 @@ IN: math.vectors.simd.alien.tests
  USING: cpu.architecture math.vectors.simd
  math.vectors.simd.intrinsics accessors math.vectors.simd.alien
  kernel classes.struct tools.test compiler sequences byte-arrays
-alien math kernel.private specialized-arrays.float ;
+alien math kernel.private specialized-arrays.float combinators ;
  
  ! Vector alien intrinsics
-[ 4float-array{ 1 2 3 4 } ] [
+[ float-4{ 1 2 3 4 } ] [
      [
-        4float-array{ 1 2 3 4 }
-        underlying>> 0 4float-array-rep alien-vector
-    ] compile-call 4float-array boa
+        float-4{ 1 2 3 4 }
+        underlying>> 0 float-4-rep alien-vector
+    ] compile-call float-4 boa
  ] unit-test
  
  [ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
@@ -17,7 +17,7 @@ alien math kernel.private specialized-arrays.float ;
      [
          0 [
              { byte-array c-ptr fixnum } declare
-            4float-array-rep set-alien-vector
+            float-4-rep set-alien-vector
          ] compile-call
      ] keep
  ] unit-test
@@ -26,31 +26,44 @@ alien math kernel.private specialized-arrays.float ;
      [
          float-array{ 1 2 3 4 } underlying>>
          float-array{ 4 3 2 1 } clone
-        [ underlying>> 0 4float-array-rep set-alien-vector ] keep
+        [ underlying>> 0 float-4-rep set-alien-vector ] keep
      ] compile-call
  ] unit-test
  
  STRUCT: simd-struct
-{ x 4float-array }
-{ y 2double-array }
-{ z 4double-array } ;
+{ x float-4 }
+{ y double-2 }
+{ z double-4 }
+{ w float-8 } ;
  
  [ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
  
-[ 4float-array{ 1 2 3 4 } 2double-array{ 2 1 } 4double-array{ 4 3 2 1 } ] [
+[
+    float-4{ 1 2 3 4 }
+    double-2{ 2 1 }
+    double-4{ 4 3 2 1 }
+    float-8{ 1 2 3 4 5 6 7 8 }
+] [
      simd-struct <struct>
-    4float-array{ 1 2 3 4 } >>x
-    2double-array{ 2 1 } >>y
-    4double-array{ 4 3 2 1 } >>z
-    [ x>> ] [ y>> ] [ z>> ] tri
+    float-4{ 1 2 3 4 } >>x
+    double-2{ 2 1 } >>y
+    double-4{ 4 3 2 1 } >>z
+    float-8{ 1 2 3 4 5 6 7 8 } >>w
+    { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
  ] unit-test
  
-[ 4float-array{ 1 2 3 4 } 2double-array{ 2 1 } 4double-array{ 4 3 2 1 } ] [
+[
+    float-4{ 1 2 3 4 }
+    double-2{ 2 1 }
+    double-4{ 4 3 2 1 }
+    float-8{ 1 2 3 4 5 6 7 8 }
+] [
      [
          simd-struct <struct>
-        4float-array{ 1 2 3 4 } >>x
-        2double-array{ 2 1 } >>y
-        4double-array{ 4 3 2 1 } >>z
-        [ x>> ] [ y>> ] [ z>> ] tri
+        float-4{ 1 2 3 4 } >>x
+        double-2{ 2 1 } >>y
+        double-4{ 4 3 2 1 } >>z
+        float-8{ 1 2 3 4 5 6 7 8 } >>w
+        { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
      ] compile-call
  ] unit-test
diff --git a/basis/math/vectors/simd/alien/alien.factor b/basis/math/vectors/simd/alien/alien.factor

index 102559898ed2fda21cf11cb32fb771bbeb8eda3b..1486f6d0af4af9924c8e3fd23f38b53ae8c46bd8 100644 (file)
--- a/basis/math/vectors/simd/alien/alien.factor
+++ b/basis/math/vectors/simd/alien/alien.factor
@@ -5,39 +5,38 @@ cpu.architecture locals kernel math math.vectors.simd
  math.vectors.simd.intrinsics ;
  IN: math.vectors.simd.alien
  
-:: define-simd-type ( class rep -- )
+:: define-simd-128-type ( class rep -- )
      <c-type>
          byte-array >>class
          class >>boxed-class
-        [ rep alien-vector ] >>getter
+        [ rep alien-vector class boa ] >>getter
          [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
          16 >>size
          8 >>align
          rep >>rep
-        [ class boa ] >>boxer-quot
-        [ underlying>> ] >>unboxer-quot
      class name>> typedef ;
  
-: define-4double-array-type ( -- )
+:: define-simd-256-type ( class rep -- )
      <c-type>
-        4double-array >>class
-        4double-array >>boxed-class
+        class >>class
+        class >>boxed-class
          [
-            [ 2double-array-rep alien-vector ]
-            [ 16 + >fixnum 2double-array-rep alien-vector ] 2bi
-            4double-array boa
+            [ rep alien-vector ]
+            [ 16 + >fixnum rep alien-vector ] 2bi
+            class boa
          ] >>getter
          [
-            [ [ underlying1>> ] 2dip 2double-array-rep set-alien-vector ]
-            [ [ underlying2>> ] 2dip 16 + >fixnum 2double-array-rep set-alien-vector ]
+            [ [ underlying1>> ] 2dip rep set-alien-vector ]
+            [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
              3bi
          ] >>setter
          32 >>size
          8 >>align
-        2double-array-rep >>rep
-    "4double-array" typedef ;
+        rep >>rep
+    class name>> typedef ;
  [
-    4float-array 4float-array-rep define-simd-type
-    2double-array 2double-array-rep define-simd-type
-    define-4double-array-type
+    float-4 float-4-rep define-simd-128-type
+    double-2 double-2-rep define-simd-128-type
+    float-8 float-4-rep define-simd-256-type
+    double-4 double-2-rep define-simd-256-type
  ] with-compilation-unit
diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor

index 9273f1108edbbdfd76da02705bd490e5bfec7a59..356ffe8245df0fde785de5d361bb9ab5931bdf8a 100644 (file)
--- a/basis/math/vectors/simd/functor/functor.factor
+++ b/basis/math/vectors/simd/functor/functor.factor
@@ -2,14 +2,16 @@
  ! See http://factorcode.org/license.txt for BSD license.
  USING: accessors alien.c-types byte-arrays classes functors
  kernel math parser prettyprint.custom sequences
-sequences.private ;
+sequences.private literals ;
  IN: math.vectors.simd.functor
  
  ERROR: bad-length got expected ;
  
-FUNCTOR: define-simd-type ( T N -- )
+FUNCTOR: define-simd-128 ( T -- )
  
-A            DEFINES-CLASS ${N}${T}-array
+N            [ 16 T heap-size /i ]
+
+A            DEFINES-CLASS ${T}-${N}
  <A>          DEFINES <${A}>
  (A)          DEFINES (${A})
  >A           DEFINES >${A}
@@ -18,17 +20,18 @@ A{           DEFINES ${A}{
  NTH          [ T dup c-type-getter-boxer array-accessor ]
  SET-NTH      [ T dup c-setter array-accessor ]
  
-BYTES        [ T heap-size N * ]
-INITIAL      [ BYTES <byte-array> ]
+A-rep        IS ${A}-rep
+A-vv->v-op   DEFINES ${A}-vv->v-op
+A-v->n-op    DEFINES ${A}-v->n-op
  
  WHERE
  
  TUPLE: A
-{ underlying byte-array read-only initial: INITIAL } ;
+{ underlying byte-array read-only initial: $[ 16 <byte-array> ] } ;
  
-: <A> ( -- simd-array ) BYTES <byte-array> A boa ; inline
+: <A> ( -- simd-array ) 16 <byte-array> \ A boa ; inline
  
-: (A) ( -- simd-array ) BYTES (byte-array) A boa ; inline
+: (A) ( -- simd-array ) 16 (byte-array) \ A boa ; inline
  
  M: A clone underlying>> clone \ A boa ; inline
  
@@ -58,4 +61,103 @@ SYNTAX: A{ \ } [ >A ] parse-literal ;
  
  INSTANCE: A sequence
  
+<PRIVATE
+
+: A-vv->v-op ( v1 v2 quot -- v3 )
+    [ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
+
+: A-v->n-op ( v quot -- n )
+    [ underlying>> A-rep ] dip call ; inline
+
+PRIVATE>
+
+;FUNCTOR
+
+! Synthesize 256-bit vectors from a pair of 128-bit vectors
+FUNCTOR: define-simd-256 ( T -- )
+
+N            [ 32 T heap-size /i ]
+
+N/2          [ N 2 / ]
+A/2          IS ${T}-${N/2}
+
+A            DEFINES-CLASS ${T}-${N}
+<A>          DEFINES <${A}>
+(A)          DEFINES (${A})
+>A           DEFINES >${A}
+A{           DEFINES ${A}{
+
+A-deref      DEFINES ${A}-deref
+
+A-rep        IS ${A/2}-rep
+A-vv->v-op   DEFINES ${A}-vv->v-op
+A-v->n-op    DEFINES ${A}-v->n-op
+
+WHERE
+
+SLOT: underlying1
+SLOT: underlying2
+
+TUPLE: A
+{ underlying1 byte-array initial: $[ 16 <byte-array> ] read-only }
+{ underlying2 byte-array initial: $[ 16 <byte-array> ] read-only } ;
+
+: <A> ( -- simd-array )
+    16 <byte-array> 16 <byte-array> \ A boa ; inline
+
+: (A) ( -- simd-array )
+    16 (byte-array) 16 (byte-array) \ A boa ; inline
+
+M: A clone
+    [ underlying1>> clone ] [ underlying2>> clone ] bi
+    \ A boa ; inline
+
+M: A length drop N ; inline
+
+<PRIVATE
+
+: A-deref ( n seq -- n' seq' )
+    over N/2 < [ underlying1>> ] [ [ N/2 - ] dip underlying2>> ] if A/2 boa ; inline
+
+PRIVATE>
+
+M: A nth-unsafe A-deref nth-unsafe ; inline
+
+M: A set-nth-unsafe A-deref set-nth-unsafe ; inline
+
+: >A ( seq -- simd-array ) \ A new clone-like ;
+
+M: A like drop dup \ A instance? [ >A ] unless ; inline
+
+M: A new-sequence drop dup N = [ drop (A) ] [ N bad-length ] if ; inline
+
+M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ;
+
+M: A byte-length drop 32 ; inline
+
+SYNTAX: A{ \ } [ >A ] parse-literal ;
+
+M: A pprint-delims drop \ A{ \ } ;
+
+M: A >pprint-sequence ;
+
+M: A pprint* pprint-object ;
+
+INSTANCE: A sequence
+
+<PRIVATE
+
+: A-vv->v-op ( v1 v2 quot -- v3 )
+    [ [ [ underlying1>> ] bi@ A-rep ] dip call ]
+    [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
+    \ A boa ; inline
+
+: A-v->n-op ( v1 quot scalar-quot -- v2 )
+    [
+        [ [ underlying1>> A-rep ] dip call ]
+        [ [ underlying2>> A-rep ] dip call ] 2bi
+    ] dip call ; inline
+
+PRIVATE>
+
  ;FUNCTOR
diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor

index 58a5373713b1d79e88443fc6417968e5700746ed..532ec8476487e727c9248bc2985b870e383eff85 100644 (file)
--- a/basis/math/vectors/simd/simd-tests.factor
+++ b/basis/math/vectors/simd/simd-tests.factor
@@ -3,299 +3,348 @@ USING: math math.vectors.simd math.vectors.simd.private
  math.vectors math.functions kernel.private compiler sequences
  tools.test compiler.tree.debugger accessors kernel ;
  
-[ 4float-array{ 0 0 0 0 } ] [ 4float-array new ] unit-test
+[ float-4{ 0 0 0 0 } ] [ float-4 new ] unit-test
  
-[ V{ float } ] [ [ { 4float-array } declare norm-sq ] final-classes ] unit-test
+[ float-4{ 0 0 0 0 } ] [ [ float-4 new ] compile-call ] unit-test
  
-[ V{ float } ] [ [ { 4float-array } declare norm ] final-classes ] unit-test
+[ V{ float } ] [ [ { float-4 } declare norm-sq ] final-classes ] unit-test
  
-[ 4float-array{ 12 12 12 12 } ] [
-    12 [ 4float-array-with ] compile-call
+[ V{ float } ] [ [ { float-4 } declare norm ] final-classes ] unit-test
+
+[ float-4{ 12 12 12 12 } ] [
+    12 [ float-4-with ] compile-call
  ] unit-test
  
-[ 4float-array{ 1 2 3 4 } ] [
-    1 2 3 4 [ 4float-array-boa ] compile-call
+[ float-4{ 1 2 3 4 } ] [
+    1 2 3 4 [ float-4-boa ] compile-call
  ] unit-test
  
-[ 4float-array{ 11 22 33 44 } ] [
-    4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 }
-    [ { 4float-array 4float-array } declare v+ ] compile-call
+[ float-4{ 11 22 33 44 } ] [
+    float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
+    [ { float-4 float-4 } declare v+ ] compile-call
  ] unit-test
  
-[ 4float-array{ -9 -18 -27 -36 } ] [
-    4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 }
-    [ { 4float-array 4float-array } declare v- ] compile-call
+[ float-4{ -9 -18 -27 -36 } ] [
+    float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
+    [ { float-4 float-4 } declare v- ] compile-call
  ] unit-test
  
-[ 4float-array{ 10 40 90 160 } ] [
-    4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 }
-    [ { 4float-array 4float-array } declare v* ] compile-call
+[ float-4{ 10 40 90 160 } ] [
+    float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
+    [ { float-4 float-4 } declare v* ] compile-call
  ] unit-test
  
-[ 4float-array{ 10 100 1000 10000 } ] [
-    4float-array{ 100 2000 30000 400000 } 4float-array{ 10 20 30 40 }
-    [ { 4float-array 4float-array } declare v/ ] compile-call
+[ float-4{ 10 100 1000 10000 } ] [
+    float-4{ 100 2000 30000 400000 } float-4{ 10 20 30 40 }
+    [ { float-4 float-4 } declare v/ ] compile-call
  ] unit-test
  
-[ 4float-array{ -10 -20 -30 -40 } ] [
-    4float-array{ -10 20 -30 40 } 4float-array{ 10 -20 30 -40 }
-    [ { 4float-array 4float-array } declare vmin ] compile-call
+[ float-4{ -10 -20 -30 -40 } ] [
+    float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
+    [ { float-4 float-4 } declare vmin ] compile-call
  ] unit-test
  
-[ 4float-array{ 10 20 30 40 } ] [
-    4float-array{ -10 20 -30 40 } 4float-array{ 10 -20 30 -40 }
-    [ { 4float-array 4float-array } declare vmax ] compile-call
+[ float-4{ 10 20 30 40 } ] [
+    float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
+    [ { float-4 float-4 } declare vmax ] compile-call
  ] unit-test
  
  [ 10.0 ] [
-    4float-array{ 1 2 3 4 }
-    [ { 4float-array } declare sum ] compile-call
+    float-4{ 1 2 3 4 }
+    [ { float-4 } declare sum ] compile-call
  ] unit-test
  
  [ 13.0 ] [
-    4float-array{ 1 2 3 4 }
-    [ { 4float-array } declare sum 3.0 + ] compile-call
+    float-4{ 1 2 3 4 }
+    [ { float-4 } declare sum 3.0 + ] compile-call
  ] unit-test
  
  [ 8.0 ] [
-    4float-array{ 1 2 3 4 } 4float-array{ 2 0 2 0 }
-    [ { 4float-array 4float-array } declare v. ] compile-call
+    float-4{ 1 2 3 4 } float-4{ 2 0 2 0 }
+    [ { float-4 float-4 } declare v. ] compile-call
  ] unit-test
  
-[ 4float-array{ 5 10 15 20 } ] [
-    5.0 4float-array{ 1 2 3 4 }
-    [ { float 4float-array } declare n*v ] compile-call
+[ float-4{ 5 10 15 20 } ] [
+    5.0 float-4{ 1 2 3 4 }
+    [ { float float-4 } declare n*v ] compile-call
  ] unit-test
  
-[ 4float-array{ 5 10 15 20 } ] [
-    4float-array{ 1 2 3 4 } 5.0
-    [ { float 4float-array } declare v*n ] compile-call
+[ float-4{ 5 10 15 20 } ] [
+    float-4{ 1 2 3 4 } 5.0
+    [ { float float-4 } declare v*n ] compile-call
  ] unit-test
  
-[ 4float-array{ 10 5 2 5 } ] [
-    10.0 4float-array{ 1 2 5 2 }
-    [ { float 4float-array } declare n/v ] compile-call
+[ float-4{ 10 5 2 5 } ] [
+    10.0 float-4{ 1 2 5 2 }
+    [ { float float-4 } declare n/v ] compile-call
  ] unit-test
  
-[ 4float-array{ 0.5 1 1.5 2 } ] [
-    4float-array{ 1 2 3 4 } 2
-    [ { float 4float-array } declare v/n ] compile-call
+[ float-4{ 0.5 1 1.5 2 } ] [
+    float-4{ 1 2 3 4 } 2
+    [ { float float-4 } declare v/n ] compile-call
  ] unit-test
  
-[ 4float-array{ 1 0 0 0 } ] [
-    4float-array{ 10 0 0 0 }
-    [ { 4float-array } declare normalize ] compile-call
+[ float-4{ 1 0 0 0 } ] [
+    float-4{ 10 0 0 0 }
+    [ { float-4 } declare normalize ] compile-call
  ] unit-test
  
  [ 30.0 ] [
-    4float-array{ 1 2 3 4 }
-    [ { 4float-array } declare norm-sq ] compile-call
+    float-4{ 1 2 3 4 }
+    [ { float-4 } declare norm-sq ] compile-call
  ] unit-test
  
  [ t ] [
-    4float-array{ 1 0 0 0 }
-    4float-array{ 0 1 0 0 }
-    [ { 4float-array 4float-array } declare distance ] compile-call
+    float-4{ 1 0 0 0 }
+    float-4{ 0 1 0 0 }
+    [ { float-4 float-4 } declare distance ] compile-call
      2 sqrt 1.0e-6 ~
  ] unit-test
  
-[ 2double-array{ 12 12 } ] [
-    12 [ 2double-array-with ] compile-call
+[ double-2{ 12 12 } ] [
+    12 [ double-2-with ] compile-call
  ] unit-test
  
-[ 2double-array{ 1 2 } ] [
-    1 2 [ 2double-array-boa ] compile-call
+[ double-2{ 1 2 } ] [
+    1 2 [ double-2-boa ] compile-call
  ] unit-test
  
-[ 2double-array{ 11 22 } ] [
-    2double-array{ 1 2 } 2double-array{ 10 20 }
-    [ { 2double-array 2double-array } declare v+ ] compile-call
+[ double-2{ 11 22 } ] [
+    double-2{ 1 2 } double-2{ 10 20 }
+    [ { double-2 double-2 } declare v+ ] compile-call
  ] unit-test
  
-[ 2double-array{ -9 -18 } ] [
-    2double-array{ 1 2 } 2double-array{ 10 20 }
-    [ { 2double-array 2double-array } declare v- ] compile-call
+[ double-2{ -9 -18 } ] [
+    double-2{ 1 2 } double-2{ 10 20 }
+    [ { double-2 double-2 } declare v- ] compile-call
  ] unit-test
  
-[ 2double-array{ 10 40 } ] [
-    2double-array{ 1 2 } 2double-array{ 10 20 }
-    [ { 2double-array 2double-array } declare v* ] compile-call
+[ double-2{ 10 40 } ] [
+    double-2{ 1 2 } double-2{ 10 20 }
+    [ { double-2 double-2 } declare v* ] compile-call
  ] unit-test
  
-[ 2double-array{ 10 100 } ] [
-    2double-array{ 100 2000 } 2double-array{ 10 20 }
-    [ { 2double-array 2double-array } declare v/ ] compile-call
+[ double-2{ 10 100 } ] [
+    double-2{ 100 2000 } double-2{ 10 20 }
+    [ { double-2 double-2 } declare v/ ] compile-call
  ] unit-test
  
-[ 2double-array{ -10 -20 } ] [
-    2double-array{ -10 20 } 2double-array{ 10 -20 }
-    [ { 2double-array 2double-array } declare vmin ] compile-call
+[ double-2{ -10 -20 } ] [
+    double-2{ -10 20 } double-2{ 10 -20 }
+    [ { double-2 double-2 } declare vmin ] compile-call
  ] unit-test
  
-[ 2double-array{ 10 20 } ] [
-    2double-array{ -10 20 } 2double-array{ 10 -20 }
-    [ { 2double-array 2double-array } declare vmax ] compile-call
+[ double-2{ 10 20 } ] [
+    double-2{ -10 20 } double-2{ 10 -20 }
+    [ { double-2 double-2 } declare vmax ] compile-call
  ] unit-test
  
  [ 3.0 ] [
-    2double-array{ 1 2 }
-    [ { 2double-array } declare sum ] compile-call
+    double-2{ 1 2 }
+    [ { double-2 } declare sum ] compile-call
  ] unit-test
  
  [ 7.0 ] [
-    2double-array{ 1 2 }
-    [ { 2double-array } declare sum 4.0 + ] compile-call
+    double-2{ 1 2 }
+    [ { double-2 } declare sum 4.0 + ] compile-call
  ] unit-test
  
  [ 16.0 ] [
-    2double-array{ 1 2 } 2double-array{ 2 7 }
-    [ { 2double-array 2double-array } declare v. ] compile-call
+    double-2{ 1 2 } double-2{ 2 7 }
+    [ { double-2 double-2 } declare v. ] compile-call
  ] unit-test
  
-[ 2double-array{ 5 10 } ] [
-    5.0 2double-array{ 1 2 }
-    [ { float 2double-array } declare n*v ] compile-call
+[ double-2{ 5 10 } ] [
+    5.0 double-2{ 1 2 }
+    [ { float double-2 } declare n*v ] compile-call
  ] unit-test
  
-[ 2double-array{ 5 10 } ] [
-    2double-array{ 1 2 } 5.0
-    [ { float 2double-array } declare v*n ] compile-call
+[ double-2{ 5 10 } ] [
+    double-2{ 1 2 } 5.0
+    [ { float double-2 } declare v*n ] compile-call
  ] unit-test
  
-[ 2double-array{ 10 5 } ] [
-    10.0 2double-array{ 1 2 }
-    [ { float 2double-array } declare n/v ] compile-call
+[ double-2{ 10 5 } ] [
+    10.0 double-2{ 1 2 }
+    [ { float double-2 } declare n/v ] compile-call
  ] unit-test
  
-[ 2double-array{ 0.5 1 } ] [
-    2double-array{ 1 2 } 2
-    [ { float 2double-array } declare v/n ] compile-call
+[ double-2{ 0.5 1 } ] [
+    double-2{ 1 2 } 2
+    [ { float double-2 } declare v/n ] compile-call
  ] unit-test
  
-[ 2double-array{ 0 0 } ] [ 2double-array new ] unit-test
+[ double-2{ 0 0 } ] [ double-2 new ] unit-test
  
-[ 2double-array{ 1 0 } ] [
-    2double-array{ 10 0 }
-    [ { 2double-array } declare normalize ] compile-call
+[ double-2{ 1 0 } ] [
+    double-2{ 10 0 }
+    [ { double-2 } declare normalize ] compile-call
  ] unit-test
  
  [ 5.0 ] [
-    2double-array{ 1 2 }
-    [ { 2double-array } declare norm-sq ] compile-call
+    double-2{ 1 2 }
+    [ { double-2 } declare norm-sq ] compile-call
  ] unit-test
  
  [ t ] [
-    2double-array{ 1 0 }
-    2double-array{ 0 1 }
-    [ { 2double-array 2double-array } declare distance ] compile-call
+    double-2{ 1 0 }
+    double-2{ 0 1 }
+    [ { double-2 double-2 } declare distance ] compile-call
      2 sqrt 1.0e-6 ~
  ] unit-test
  
-[ 4double-array{ 0 0 0 0 } ] [ 4double-array new ] unit-test
+[ double-4{ 0 0 0 0 } ] [ double-4 new ] unit-test
  
-[ 4double-array{ 1 2 3 4 } ] [
-    1 2 3 4 4double-array-boa
+[ double-4{ 1 2 3 4 } ] [
+    1 2 3 4 double-4-boa
  ] unit-test
  
-[ 4double-array{ 1 1 1 1 } ] [
-    1 4double-array-with
+[ double-4{ 1 1 1 1 } ] [
+    1 double-4-with
  ] unit-test
  
-[ 4double-array{ 0 1 2 3 } ] [
-    1 4double-array-with [ * ] map-index
+[ double-4{ 0 1 2 3 } ] [
+    1 double-4-with [ * ] map-index
  ] unit-test
  
-[ V{ float } ] [ [ { 4double-array } declare norm-sq ] final-classes ] unit-test
+[ V{ float } ] [ [ { double-4 } declare norm-sq ] final-classes ] unit-test
  
-[ V{ float } ] [ [ { 4double-array } declare norm ] final-classes ] unit-test
+[ V{ float } ] [ [ { double-4 } declare norm ] final-classes ] unit-test
  
-[ 4double-array{ 12 12 12 12 } ] [
-    12 [ 4double-array-with ] compile-call
+[ double-4{ 12 12 12 12 } ] [
+    12 [ double-4-with ] compile-call
  ] unit-test
  
-[ 4double-array{ 1 2 3 4 } ] [
-    1 2 3 4 [ 4double-array-boa ] compile-call
+[ double-4{ 1 2 3 4 } ] [
+    1 2 3 4 [ double-4-boa ] compile-call
  ] unit-test
  
-[ 4double-array{ 11 22 33 44 } ] [
-    4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 }
-    [ { 4double-array 4double-array } declare v+ ] compile-call
+[ double-4{ 11 22 33 44 } ] [
+    double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
+    [ { double-4 double-4 } declare v+ ] compile-call
  ] unit-test
  
-[ 4double-array{ -9 -18 -27 -36 } ] [
-    4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 }
-    [ { 4double-array 4double-array } declare v- ] compile-call
+[ double-4{ -9 -18 -27 -36 } ] [
+    double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
+    [ { double-4 double-4 } declare v- ] compile-call
  ] unit-test
  
-[ 4double-array{ 10 40 90 160 } ] [
-    4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 }
-    [ { 4double-array 4double-array } declare v* ] compile-call
+[ double-4{ 10 40 90 160 } ] [
+    double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
+    [ { double-4 double-4 } declare v* ] compile-call
  ] unit-test
  
-[ 4double-array{ 10 100 1000 10000 } ] [
-    4double-array{ 100 2000 30000 400000 } 4double-array{ 10 20 30 40 }
-    [ { 4double-array 4double-array } declare v/ ] compile-call
+[ double-4{ 10 100 1000 10000 } ] [
+    double-4{ 100 2000 30000 400000 } double-4{ 10 20 30 40 }
+    [ { double-4 double-4 } declare v/ ] compile-call
  ] unit-test
  
-[ 4double-array{ -10 -20 -30 -40 } ] [
-    4double-array{ -10 20 -30 40 } 4double-array{ 10 -20 30 -40 }
-    [ { 4double-array 4double-array } declare vmin ] compile-call
+[ double-4{ -10 -20 -30 -40 } ] [
+    double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
+    [ { double-4 double-4 } declare vmin ] compile-call
  ] unit-test
  
-[ 4double-array{ 10 20 30 40 } ] [
-    4double-array{ -10 20 -30 40 } 4double-array{ 10 -20 30 -40 }
-    [ { 4double-array 4double-array } declare vmax ] compile-call
+[ double-4{ 10 20 30 40 } ] [
+    double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
+    [ { double-4 double-4 } declare vmax ] compile-call
  ] unit-test
  
  [ 10.0 ] [
-    4double-array{ 1 2 3 4 }
-    [ { 4double-array } declare sum ] compile-call
+    double-4{ 1 2 3 4 }
+    [ { double-4 } declare sum ] compile-call
  ] unit-test
  
  [ 13.0 ] [
-    4double-array{ 1 2 3 4 }
-    [ { 4double-array } declare sum 3.0 + ] compile-call
+    double-4{ 1 2 3 4 }
+    [ { double-4 } declare sum 3.0 + ] compile-call
  ] unit-test
  
  [ 8.0 ] [
-    4double-array{ 1 2 3 4 } 4double-array{ 2 0 2 0 }
-    [ { 4double-array 4double-array } declare v. ] compile-call
+    double-4{ 1 2 3 4 } double-4{ 2 0 2 0 }
+    [ { double-4 double-4 } declare v. ] compile-call
  ] unit-test
  
-[ 4double-array{ 5 10 15 20 } ] [
-    5.0 4double-array{ 1 2 3 4 }
-    [ { float 4double-array } declare n*v ] compile-call
+[ double-4{ 5 10 15 20 } ] [
+    5.0 double-4{ 1 2 3 4 }
+    [ { float double-4 } declare n*v ] compile-call
  ] unit-test
  
-[ 4double-array{ 5 10 15 20 } ] [
-    4double-array{ 1 2 3 4 } 5.0
-    [ { float 4double-array } declare v*n ] compile-call
+[ double-4{ 5 10 15 20 } ] [
+    double-4{ 1 2 3 4 } 5.0
+    [ { float double-4 } declare v*n ] compile-call
  ] unit-test
  
-[ 4double-array{ 10 5 2 5 } ] [
-    10.0 4double-array{ 1 2 5 2 }
-    [ { float 4double-array } declare n/v ] compile-call
+[ double-4{ 10 5 2 5 } ] [
+    10.0 double-4{ 1 2 5 2 }
+    [ { float double-4 } declare n/v ] compile-call
  ] unit-test
  
-[ 4double-array{ 0.5 1 1.5 2 } ] [
-    4double-array{ 1 2 3 4 } 2
-    [ { float 4double-array } declare v/n ] compile-call
+[ double-4{ 0.5 1 1.5 2 } ] [
+    double-4{ 1 2 3 4 } 2
+    [ { float double-4 } declare v/n ] compile-call
  ] unit-test
  
-[ 4double-array{ 1 0 0 0 } ] [
-    4double-array{ 10 0 0 0 }
-    [ { 4double-array } declare normalize ] compile-call
+[ double-4{ 1 0 0 0 } ] [
+    double-4{ 10 0 0 0 }
+    [ { double-4 } declare normalize ] compile-call
  ] unit-test
  
  [ 30.0 ] [
-    4double-array{ 1 2 3 4 }
-    [ { 4double-array } declare norm-sq ] compile-call
+    double-4{ 1 2 3 4 }
+    [ { double-4 } declare norm-sq ] compile-call
  ] unit-test
  
  [ t ] [
-    4double-array{ 1 0 0 0 }
-    4double-array{ 0 1 0 0 }
-    [ { 4double-array 4double-array } declare distance ] compile-call
+    double-4{ 1 0 0 0 }
+    double-4{ 0 1 0 0 }
+    [ { double-4 double-4 } declare distance ] compile-call
      2 sqrt 1.0e-6 ~
  ] unit-test
  
+[ float-8{ 0 0 0 0 0 0 0 0 } ] [ float-8 new ] unit-test
+
+[ float-8{ 0 0 0 0 0 0 0 0 } ] [ [ float-8 new ] compile-call ] unit-test
+
+[ float-8{ 1 1 1 1 1 1 1 1 } ] [ 1 float-8-with ] unit-test
+
+[ float-8{ 1 1 1 1 1 1 1 1 } ] [ [ 1 float-8-with ] compile-call ] unit-test
+
+[ float-8{ 1 2 3 4 5 6 7 8 } ] [ 1 2 3 4 5 6 7 8 float-8-boa ] unit-test
+
+[ float-8{ 1 2 3 4 5 6 7 8 } ] [ [ 1 2 3 4 5 6 7 8 float-8-boa ] compile-call ] unit-test
+
+[ float-8{ 3 6 9 12 15 18 21 24 } ] [
+    float-8{ 1 2 3 4 5 6 7 8 }
+    float-8{ 2 4 6 8 10 12 14 16 }
+    [ { float-8 float-8 } declare v+ ] compile-call
+] unit-test
+
+[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
+    float-8{ 1 2 3 4 5 6 7 8 }
+    float-8{ 2 4 6 8 10 12 14 16 }
+    [ { float-8 float-8 } declare v- ] compile-call
+] unit-test
+
+[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
+    -0.5
+    float-8{ 2 4 6 8 10 12 14 16 }
+    [ { float float-8 } declare n*v ] compile-call
+] unit-test
+
+[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
+    float-8{ 2 4 6 8 10 12 14 16 }
+    -0.5
+    [ { float-8 float } declare v*n ] compile-call
+] unit-test
+
+[ float-8{ 256 128 64 32 16 8 4 2 } ] [
+    256.0
+    float-8{ 1 2 4 8 16 32 64 128 }
+    [ { float float-8 } declare n/v ] compile-call
+] unit-test
+
+[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
+    float-8{ 2 4 6 8 10 12 14 16 }
+    -2.0
+    [ { float-8 float } declare v/n ] compile-call
+] unit-test
diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor

index c487e5954384a092bb904f3b1f84e7ab2439eb59..cf237bb5c2a2037864db50b1be79a4ea42ccd777 100644 (file)
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -1,51 +1,59 @@
  ! Copyright (C) 2009 Slava Pestov.
  ! See http://factorcode.org/license.txt for BSD license.
  USING: accessors alien.c-types byte-arrays cpu.architecture
-generalizations kernel math math.functions math.vectors
+kernel math math.functions math.vectors
  math.vectors.simd.functor math.vectors.simd.intrinsics
  math.vectors.specialization parser prettyprint.custom sequences
  sequences.private specialized-arrays.double locals assocs
-literals words fry ;
+words fry ;
  IN: math.vectors.simd
  
  <<
  
-DEFER: 4float-array
-DEFER: 2double-array
+DEFER: float-4
+DEFER: double-2
+DEFER: float-8
+DEFER: double-4
  
-"double" 2 define-simd-type
-"float" 4 define-simd-type
+"double" define-simd-128
+"float" define-simd-128
+"double" define-simd-256
+"float" define-simd-256
  
  >>
  
-! Constructors
-: 4float-array-with ( x -- simd-array )
-    >float 4float-array-rep (simd-broadcast) 4float-array boa ; inline
+: float-4-with ( x -- simd-array )
+    >float float-4-rep (simd-broadcast) float-4 boa ; inline
  
-: 4float-array-boa ( a b c d -- simd-array )
-    [ >float ] 4 napply 4float-array-rep (simd-gather-4) 4float-array boa ; inline
+:: float-4-boa ( a b c d -- simd-array )
+    a >float b >float c >float d >float
+    float-4-rep (simd-gather-4) float-4 boa ; inline
  
-: 2double-array-with ( x -- simd-array )
-    >float 2double-array-rep (simd-broadcast) 2double-array boa ; inline
+: double-2-with ( x -- simd-array )
+    >float double-2-rep (simd-broadcast) double-2 boa ; inline
  
-: 2double-array-boa ( a b -- simd-array )
-    [ >float ] bi@ 2double-array-rep (simd-gather-2) 2double-array boa ; inline
+: double-2-boa ( a b -- simd-array )
+    [ >float ] bi@ double-2-rep (simd-gather-2) double-2 boa ; inline
  
-<PRIVATE
-
-: 4float-array-vv->v-op ( v1 v2 quot -- v3 )
-    [ [ underlying>> ] bi@ 4float-array-rep ] dip call 4float-array boa ; inline
+: float-8-with ( x -- simd-array )
+    [ float-4-with ] [ float-4-with ] bi [ underlying>> ] bi@
+    float-8 boa ; inline
  
-: 4float-array-v->n-op ( v1 quot -- v2 )
-    [ underlying>> 4float-array-rep ] dip call ; inline
+:: float-8-boa ( a b c d e f g h -- simd-array )
+    a b c d float-4-boa
+    e f g h float-4-boa
+    [ underlying>> ] bi@
+    float-8 boa ; inline
  
-: 2double-array-vv->v-op ( v1 v2 quot -- v3 )
-    [ [ underlying>> ] bi@ 2double-array-rep ] dip call 2double-array boa ; inline
+: double-4-with ( x -- simd-array )
+    [ double-2-with ] [ double-2-with ] bi [ underlying>> ] bi@
+    double-4 boa ; inline
  
-: 2double-array-v->n-op ( v1 quot -- v2 )
-    [ underlying>> 2double-array-rep ] dip call ; inline
-
-PRIVATE>
+:: double-4-boa ( a b c d -- simd-array )
+    a b double-2-boa
+    c d double-2-boa
+    [ underlying>> ] bi@
+    double-4 boa ; inline
  
  <<
  
@@ -64,6 +72,7 @@ PRIVATE>
      '[ drop _ key? ] assoc-filter ;
  
  :: high-level-ops ( ctor -- assoc )
+    ! Some SIMD operations are defined in terms of others.
      {
          { vneg [ [ dup v- ] keep v- ] }
          { v. [ v* sum ] }
@@ -87,121 +96,44 @@ PRIVATE>
  
  PRIVATE>
  
-\ 4float-array \ 4float-array-with float H{
-    { v+ [ [ (simd-v+) ] 4float-array-vv->v-op ] }
-    { v- [ [ (simd-v-) ] 4float-array-vv->v-op ] }
-    { v* [ [ (simd-v*) ] 4float-array-vv->v-op ] }
-    { v/ [ [ (simd-v/) ] 4float-array-vv->v-op ] }
-    { vmin [ [ (simd-vmin) ] 4float-array-vv->v-op ] }
-    { vmax [ [ (simd-vmax) ] 4float-array-vv->v-op ] }
-    { sum [ [ (simd-sum) ] 4float-array-v->n-op ] }
+\ float-4 \ float-4-with float H{
+    { v+ [ [ (simd-v+) ] float-4-vv->v-op ] }
+    { v- [ [ (simd-v-) ] float-4-vv->v-op ] }
+    { v* [ [ (simd-v*) ] float-4-vv->v-op ] }
+    { v/ [ [ (simd-v/) ] float-4-vv->v-op ] }
+    { vmin [ [ (simd-vmin) ] float-4-vv->v-op ] }
+    { vmax [ [ (simd-vmax) ] float-4-vv->v-op ] }
+    { sum [ [ (simd-sum) ] float-4-v->n-op ] }
  } simd-vector-words
  
-\ 2double-array \ 2double-array-with float H{
-    { v+ [ [ (simd-v+) ] 2double-array-vv->v-op ] }
-    { v- [ [ (simd-v-) ] 2double-array-vv->v-op ] }
-    { v* [ [ (simd-v*) ] 2double-array-vv->v-op ] }
-    { v/ [ [ (simd-v/) ] 2double-array-vv->v-op ] }
-    { vmin [ [ (simd-vmin) ] 2double-array-vv->v-op ] }
-    { vmax [ [ (simd-vmax) ] 2double-array-vv->v-op ] }
-    { sum [ [ (simd-sum) ] 2double-array-v->n-op ] }
+\ double-2 \ double-2-with float H{
+    { v+ [ [ (simd-v+) ] double-2-vv->v-op ] }
+    { v- [ [ (simd-v-) ] double-2-vv->v-op ] }
+    { v* [ [ (simd-v*) ] double-2-vv->v-op ] }
+    { v/ [ [ (simd-v/) ] double-2-vv->v-op ] }
+    { vmin [ [ (simd-vmin) ] double-2-vv->v-op ] }
+    { vmax [ [ (simd-vmax) ] double-2-vv->v-op ] }
+    { sum [ [ (simd-sum) ] double-2-v->n-op ] }
  } simd-vector-words
  
->>
-
-! Synthesize 256-bit vectors from a pair of 128-bit vectors
-! Functorize this later so that we can do it for integers, etc
-TUPLE: 4double-array
-{ underlying1 byte-array initial: $[ 16 <byte-array> ] read-only }
-{ underlying2 byte-array initial: $[ 16 <byte-array> ] read-only } ;
-
-: <4double-array> ( -- simd-array )
-    16 <byte-array> 16 <byte-array> 4double-array boa ; inline
-
-: (4double-array) ( -- simd-array )
-    16 (byte-array) 16 (byte-array) 4double-array boa ; inline
-
-M: 4double-array clone
-    [ underlying1>> clone ] [ underlying2>> clone ] bi
-    4double-array boa ; inline
-
-M: 4double-array length drop 4 ; inline
-
-<PRIVATE
-
-: 4double-array-deref ( n seq -- n' seq' )
-    over 2 < [ underlying1>> ] [ [ 2 - ] dip underlying2>> ] if
-    2 swap double-array boa ; inline
-
-PRIVATE>
-
-M: 4double-array nth-unsafe
-    4double-array-deref nth-unsafe ; inline
-
-M: 4double-array set-nth-unsafe
-    4double-array-deref set-nth-unsafe ; inline
-
-: >4double-array ( seq -- simd-array )
-    4double-array new clone-like ;
-
-M: 4double-array like
-    drop dup 4double-array? [ >4double-array ] unless ; inline
-
-M: 4double-array new-sequence
-    drop dup 4 = [ drop (4double-array) ] [ 4 bad-length ] if ; inline
-
-M: 4double-array equal?
-    over 4double-array? [ sequence= ] [ 2drop f ] if ;
-
-M: 4double-array byte-length drop 32 ; inline
-
-SYNTAX: 4double-array{
-    \ } [ >4double-array ] parse-literal ;
-
-M: 4double-array pprint-delims
-    drop \ 4double-array{ \ } ;
-
-M: 4double-array >pprint-sequence ;
-
-M: 4double-array pprint* pprint-object ;
-
-INSTANCE: 4double-array sequence
-
-: 4double-array-with ( x -- simd-array )
-    dup [ >float 2double-array-rep (simd-broadcast) ] bi@
-    4double-array boa ; inline
-
-: 4double-array-boa ( a b c d -- simd-array )
-    [ >float ] 4 napply [ 2double-array-rep (simd-gather-2) ] 2bi@
-    4double-array boa ; inline
-
-! SIMD operations on 4double-arrays
-
-<PRIVATE
-
-: 4double-array-vv->v-op ( v1 v2 quot -- v3 )
-    [ [ [ underlying1>> ] bi@ 2double-array-rep ] dip call ]
-    [ [ [ underlying2>> ] bi@ 2double-array-rep ] dip call ] 3bi
-    4double-array boa ; inline
-
-: 4double-array-v->n-op ( v1 quot scalar-quot -- v2 )
-    [
-        [ [ underlying1>> 2double-array-rep ] dip call ]
-        [ [ underlying2>> 2double-array-rep ] dip call ] 2bi
-    ] dip call ; inline
-
-PRIVATE>
-
-<<
+\ float-8 \ float-8-with float H{
+    { v+ [ [ (simd-v+) ] float-8-vv->v-op ] }
+    { v- [ [ (simd-v-) ] float-8-vv->v-op ] }
+    { v* [ [ (simd-v*) ] float-8-vv->v-op ] }
+    { v/ [ [ (simd-v/) ] float-8-vv->v-op ] }
+    { vmin [ [ (simd-vmin) ] float-8-vv->v-op ] }
+    { vmax [ [ (simd-vmax) ] float-8-vv->v-op ] }
+    { sum [ [ (simd-sum) ] [ + ] float-8-v->n-op ] }
+} simd-vector-words
  
-\ 4double-array \ 4double-array-with float H{
-    { v+ [ [ (simd-v+) ] 4double-array-vv->v-op ] }
-    { v- [ [ (simd-v-) ] 4double-array-vv->v-op ] }
-    { v* [ [ (simd-v*) ] 4double-array-vv->v-op ] }
-    { v/ [ [ (simd-v/) ] 4double-array-vv->v-op ] }
-    { vmin [ [ (simd-vmin) ] 4double-array-vv->v-op ] }
-    { vmax [ [ (simd-vmax) ] 4double-array-vv->v-op ] }
-    { sum [ [ (simd-sum) ] [ + ] 4double-array-v->n-op ] }
+\ double-4 \ double-4-with float H{
+    { v+ [ [ (simd-v+) ] double-4-vv->v-op ] }
+    { v- [ [ (simd-v-) ] double-4-vv->v-op ] }
+    { v* [ [ (simd-v*) ] double-4-vv->v-op ] }
+    { v/ [ [ (simd-v/) ] double-4-vv->v-op ] }
+    { vmin [ [ (simd-vmin) ] double-4-vv->v-op ] }
+    { vmax [ [ (simd-vmax) ] double-4-vv->v-op ] }
+    { sum [ [ (simd-sum) ] [ + ] double-4-v->n-op ] }
  } simd-vector-words
  
  >>
author	Slava Pestov <slava@slava-pestovs-macbook-pro.local>
	Fri, 4 Sep 2009 01:58:56 +0000 (20:58 -0500)
committer	Slava Pestov <slava@slava-pestovs-macbook-pro.local>
	Fri, 4 Sep 2009 01:58:56 +0000 (20:58 -0500)
basis/alien/c-types/c-types.factor		patch \| blob \| history
basis/compiler/cfg/instructions/instructions.factor		patch \| blob \| history
basis/compiler/cfg/intrinsics/alien/alien.factor		patch \| blob \| history
basis/compiler/cfg/intrinsics/intrinsics.factor		patch \| blob \| history
basis/compiler/cfg/linear-scan/linear-scan-tests.factor		patch \| blob \| history
basis/compiler/cfg/representations/representations-tests.factor		patch \| blob \| history
basis/compiler/cfg/representations/representations.factor		patch \| blob \| history
basis/compiler/cfg/two-operand/two-operand-tests.factor		patch \| blob \| history
basis/compiler/codegen/codegen.factor		patch \| blob \| history
basis/compiler/tests/low-level-ir.factor		patch \| blob \| history
basis/compiler/tree/propagation/simd/simd.factor		patch \| blob \| history
basis/cpu/architecture/architecture.factor		patch \| blob \| history
basis/cpu/ppc/ppc.factor		patch \| blob \| history
basis/cpu/x86/32/32.factor		patch \| blob \| history
basis/cpu/x86/64/64.factor		patch \| blob \| history
basis/cpu/x86/x86.factor		patch \| blob \| history
basis/math/vectors/simd/alien/alien-tests.factor		patch \| blob \| history
basis/math/vectors/simd/alien/alien.factor		patch \| blob \| history
basis/math/vectors/simd/functor/functor.factor		patch \| blob \| history
basis/math/vectors/simd/simd-tests.factor		patch \| blob \| history
basis/math/vectors/simd/simd.factor		patch \| blob \| history