math.vectors.simd: add fast intrinsic for 'nth', replace broadcast primitive with...

author Slava Pestov <slava@slava-pestovs-macbook-pro.local>

Tue, 29 Sep 2009 09:46:38 +0000 (04:46 -0500)

committer Slava Pestov <slava@slava-pestovs-macbook-pro.local>

Tue, 29 Sep 2009 09:48:11 +0000 (04:48 -0500)
author Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Tue, 29 Sep 2009 09:46:38 +0000 (04:46 -0500)
committer Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Tue, 29 Sep 2009 09:48:11 +0000 (04:48 -0500)
diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor

index 2e9d2863c3373a41669461078e29ec1b1bf00aec..716ae46592e0725b8625b52582322851beeed229 100644 (file)
--- a/basis/compiler/cfg/instructions/instructions.factor
+++ b/basis/compiler/cfg/instructions/instructions.factor
@@ -278,11 +278,6 @@ PURE-INSN: ##zero-vector
  def: dst
  literal: rep ;
  
-PURE-INSN: ##broadcast-vector
-def: dst
-use: src/scalar-rep
-literal: rep ;
-
  PURE-INSN: ##gather-vector-2
  def: dst
  use: src1/scalar-rep src2/scalar-rep
@@ -298,11 +293,6 @@ def: dst
  use: src
  literal: shuffle rep ;
  
-PURE-INSN: ##select-vector
-def: dst
-use: src
-literal: n rep ;
-
  PURE-INSN: ##add-vector
  def: dst
  use: src1 src2
@@ -418,7 +408,7 @@ def: dst
  use: src1 src2/scalar-rep
  literal: rep ;
  
-! Scalar/integer conversion
+! Scalar/vector conversion
  PURE-INSN: ##scalar>integer
  def: dst/int-rep
  use: src
@@ -429,6 +419,16 @@ def: dst
  use: src/int-rep
  literal: rep ;
  
+PURE-INSN: ##vector>scalar
+def: dst/scalar-rep
+use: src
+literal: rep ;
+
+PURE-INSN: ##scalar>vector
+def: dst
+use: src/scalar-rep
+literal: rep ;
+
  ! Boxing and unboxing aliens
  PURE-INSN: ##box-alien
  def: dst/int-rep
diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor

index 9784855b6dd8b753533e43ad45bc8e5fb0c7d500..c9cbc71a8ce378e09971383542221b4d403bd6d0 100644 (file)
--- a/basis/compiler/cfg/intrinsics/intrinsics.factor
+++ b/basis/compiler/cfg/intrinsics/intrinsics.factor
@@ -175,7 +175,7 @@ IN: compiler.cfg.intrinsics
          { math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector ] emit-binary-vector-op ] }
          { math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector ] emit-horizontal-shift ] }
          { math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector ] emit-horizontal-shift ] }
-        { math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-broadcast) [ emit-broadcast-vector ] }
          { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
          { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
          { math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] }
diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor

index 7f393fdc830a5f693de6ac9e11d3c49ba988887b..07ee55fc3c6521d1a1141114efa11dec505a71ae 100644 (file)
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@@ -1,10 +1,10 @@
  ! Copyright (C) 2009 Slava Pestov.
  ! See http://factorcode.org/license.txt for BSD license.
  USING: accessors byte-arrays fry cpu.architecture kernel math
-sequences macros generalizations combinators
-combinators.short-circuit arrays compiler.tree.propagation.info
-compiler.cfg.builder.blocks compiler.cfg.stacks
-compiler.cfg.stacks.local compiler.cfg.hats
+sequences math.vectors.simd.intrinsics macros generalizations
+combinators combinators.short-circuit arrays
+compiler.tree.propagation.info compiler.cfg.builder.blocks
+compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
  compiler.cfg.instructions compiler.cfg.registers
  compiler.cfg.intrinsics.alien ;
  IN: compiler.cfg.intrinsics.simd
@@ -70,6 +70,19 @@ MACRO: if-literals-match ( quots -- )
      [ [ -2 inc-d ds-pop ] 2dip ^^shuffle-vector ds-push ]
      { [ shuffle? ] [ representation? ] } if-literals-match ; inline
  
+: ^^broadcast-vector ( src rep -- dst )
+    [ ^^scalar>vector ] keep
+    [ rep-components 0 <array> ] keep
+    ^^shuffle-vector ;
+
+: emit-broadcast-vector ( node -- )
+    [ ^^broadcast-vector ] emit-unary-vector-op ;
+
+: ^^select-vector ( src n rep -- dst )
+    [ rep-components swap <array> ] keep
+    [ ^^shuffle-vector ] keep
+    ^^vector>scalar ;
+
  : emit-select-vector ( node -- )
      [ [ -2 inc-d ds-pop ] 2dip ^^select-vector ds-push ]
      { [ integer? ] [ representation? ] } if-literals-match ; inline
diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor

index 12cf303b4cd4d4d5784ed10b3dc454b823965e3f..8e99f79b36e58a41bab3b3faed6d425a1be08481 100755 (executable)
--- a/basis/compiler/codegen/codegen.factor
+++ b/basis/compiler/codegen/codegen.factor
@@ -162,11 +162,9 @@ CODEGEN: ##integer>float %integer>float
  CODEGEN: ##float>integer %float>integer
  CODEGEN: ##unbox-vector %unbox-vector
  CODEGEN: ##zero-vector %zero-vector
-CODEGEN: ##broadcast-vector %broadcast-vector
  CODEGEN: ##gather-vector-2 %gather-vector-2
  CODEGEN: ##gather-vector-4 %gather-vector-4
  CODEGEN: ##shuffle-vector %shuffle-vector
-CODEGEN: ##select-vector %select-vector
  CODEGEN: ##box-vector %box-vector
  CODEGEN: ##add-vector %add-vector
  CODEGEN: ##saturated-add-vector %saturated-add-vector
@@ -193,6 +191,8 @@ CODEGEN: ##shl-vector %shl-vector
  CODEGEN: ##shr-vector %shr-vector
  CODEGEN: ##integer>scalar %integer>scalar
  CODEGEN: ##scalar>integer %scalar>integer
+CODEGEN: ##vector>scalar %vector>scalar
+CODEGEN: ##scalar>vector %scalar>vector
  CODEGEN: ##box-alien %box-alien
  CODEGEN: ##box-displaced-alien %box-displaced-alien
  CODEGEN: ##unbox-alien %unbox-alien
diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor

index 552ab799bab9102c1ac4d3efd17840583360ca6e..f29b534deb11465ba8e25a10d42041ab3a140b44 100644 (file)
--- a/basis/compiler/tree/propagation/simd/simd.factor
+++ b/basis/compiler/tree/propagation/simd/simd.factor
@@ -28,7 +28,6 @@ IN: compiler.tree.propagation.simd
      (simd-broadcast)
      (simd-gather-2)
      (simd-gather-4)
-    (simd-select)
      alien-vector
  } [ { byte-array } "default-output-classes" set-word-prop ] each
  
@@ -46,6 +45,8 @@ IN: compiler.tree.propagation.simd
  
  \ (simd-v.) [ 2nip scalar-output-class ] "outputs" set-word-prop
  
+\ (simd-select) [ 2nip scalar-output-class ] "outputs" set-word-prop
+
  \ assert-positive [
      real [0,inf] <class/interval-info> value-info-intersect
  ] "outputs" set-word-prop
diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor

index 9222fcc17cb10019b07116eedc5653c70b79d03f..9d6f8fd66251d551c7c27e30b1e0b25002a2656f 100644 (file)
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@@ -212,11 +212,9 @@ HOOK: %box-vector cpu ( dst src temp rep -- )
  HOOK: %unbox-vector cpu ( dst src rep -- )
  
  HOOK: %zero-vector cpu ( dst rep -- )
-HOOK: %broadcast-vector cpu ( dst src rep -- )
  HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
  HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
  HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
-HOOK: %select-vector cpu ( dst src n rep -- )
  HOOK: %add-vector cpu ( dst src1 src2 rep -- )
  HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- )
  HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
@@ -243,13 +241,13 @@ HOOK: %horizontal-shr-vector cpu ( dst src1 src2 rep -- )
  
  HOOK: %integer>scalar cpu ( dst src rep -- )
  HOOK: %scalar>integer cpu ( dst src rep -- )
+HOOK: %vector>scalar cpu ( dst src rep -- )
+HOOK: %scalar>vector cpu ( dst src rep -- )
  
  HOOK: %zero-vector-reps cpu ( -- reps )
-HOOK: %broadcast-vector-reps cpu ( -- reps )
  HOOK: %gather-vector-2-reps cpu ( -- reps )
  HOOK: %gather-vector-4-reps cpu ( -- reps )
  HOOK: %shuffle-vector-reps cpu ( -- reps )
-HOOK: %select-vector-reps cpu ( -- reps )
  HOOK: %add-vector-reps cpu ( -- reps )
  HOOK: %saturated-add-vector-reps cpu ( -- reps )
  HOOK: %add-sub-vector-reps cpu ( -- reps )
diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor

index d5ee166486e0242f61285a794f5a46eff190bd0b..006d38f3849c21c2d3026de1ceb98a7fe76426fc 100644 (file)
--- a/basis/cpu/ppc/ppc.factor
+++ b/basis/cpu/ppc/ppc.factor
@@ -184,6 +184,7 @@ M: ppc %shr-imm swapd SRWI ;
  M: ppc %sar     SRAW ;
  M: ppc %sar-imm SRAWI ;
  M: ppc %not     NOT ;
+M: ppc %neg     NEG ;
  
  :: overflow-template ( label dst src1 src2 insn -- )
      0 0 LI
@@ -262,9 +263,10 @@ M: ppc %single>double-float double-rep %copy ;
  M: ppc %double>single-float double-rep %copy ;
  
  ! VMX/AltiVec not supported yet
-M: ppc %broadcast-vector-reps { } ;
+M: ppc %zero-vector-reps { } ;
  M: ppc %gather-vector-2-reps { } ;
  M: ppc %gather-vector-4-reps { } ;
+M: ppc %shuffle-vector-reps { } ;
  M: ppc %add-vector-reps { } ;
  M: ppc %saturated-add-vector-reps { } ;
  M: ppc %add-sub-vector-reps { } ;
@@ -275,14 +277,19 @@ M: ppc %saturated-mul-vector-reps { } ;
  M: ppc %div-vector-reps { } ;
  M: ppc %min-vector-reps { } ;
  M: ppc %max-vector-reps { } ;
+M: ppc %dot-vector-reps { } ;
  M: ppc %sqrt-vector-reps { } ;
  M: ppc %horizontal-add-vector-reps { } ;
+M: ppc %horizontal-sub-vector-reps { } ;
  M: ppc %abs-vector-reps { } ;
  M: ppc %and-vector-reps { } ;
+M: ppc %andn-vector-reps { } ;
  M: ppc %or-vector-reps { } ;
  M: ppc %xor-vector-reps { } ;
  M: ppc %shl-vector-reps { } ;
  M: ppc %shr-vector-reps { } ;
+M: ppc %horizontal-shl-vector-reps { } ;
+M: ppc %horizontal-shr-vector-reps { } ;
  
  M: ppc %unbox-alien ( dst src -- )
      alien-offset LWZ ;
diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor

index 397a26a4649728a71c1f75ba5651e4da8dbb8075..14a382d6cf09f1f820fe6ba5eb6278c9ecf7dd16 100644 (file)
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@@ -600,42 +600,42 @@ M: x86 %zero-vector-reps
          { uchar-16-rep    char-16-rep }
      } ?at drop ;
  
-M:: x86 %broadcast-vector ( dst src rep -- )
-    rep unsign-rep {
-        { float-4-rep [
-            dst src float-4-rep %copy
-            dst dst { 0 0 0 0 } SHUFPS
-        ] }
-        { double-2-rep [
-            dst src MOVDDUP
-        ] }
-        { longlong-2-rep [
-            dst src =
-            [ dst dst PUNPCKLQDQ ]
-            [ dst src { 0 1 0 1 } PSHUFD ]
-            if
-        ] }
-        { int-4-rep [
-            dst src { 0 0 0 0 } PSHUFD
-        ] }
-        { short-8-rep [
-            dst src { 0 0 0 0 } PSHUFLW 
-            dst dst PUNPCKLQDQ 
-        ] }
-        { char-16-rep [
-            dst src char-16-rep %copy
-            dst dst PUNPCKLBW
-            dst dst { 0 0 0 0 } PSHUFLW
-            dst dst PUNPCKLQDQ
-        ] }
-    } case ;
-
-M: x86 %broadcast-vector-reps
-    {
-        ! Can't do this with sse1 since it will want to unbox
-        ! a double-precision float and convert to single precision
-        { sse2? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } }
-    } available-reps ;
+! M:: x86 %broadcast-vector ( dst src rep -- )
+!     rep unsign-rep {
+!         { float-4-rep [
+!             dst src float-4-rep %copy
+!             dst dst { 0 0 0 0 } SHUFPS
+!         ] }
+!         { double-2-rep [
+!             dst src MOVDDUP
+!         ] }
+!         { longlong-2-rep [
+!             dst src =
+!             [ dst dst PUNPCKLQDQ ]
+!             [ dst src { 0 1 0 1 } PSHUFD ]
+!             if
+!         ] }
+!         { int-4-rep [
+!             dst src { 0 0 0 0 } PSHUFD
+!         ] }
+!         { short-8-rep [
+!             dst src { 0 0 0 0 } PSHUFLW 
+!             dst dst PUNPCKLQDQ 
+!         ] }
+!         { char-16-rep [
+!             dst src char-16-rep %copy
+!             dst dst PUNPCKLBW
+!             dst dst { 0 0 0 0 } PSHUFLW
+!             dst dst PUNPCKLQDQ
+!         ] }
+!     } case ;
+! 
+! M: x86 %broadcast-vector-reps
+!     {
+!         ! Can't do this with sse1 since it will want to unbox
+!         ! a double-precision float and convert to single precision
+!         { sse2? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } }
+!     } available-reps ;
  
  M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
      rep unsign-rep {
@@ -721,11 +721,10 @@ M:: x86 %shuffle-vector ( dst src shuffle rep -- )
  
  M: x86 %shuffle-vector-reps
      {
-        { sse2? { double-2-rep float-4-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+        { sse? { float-4-rep } }
+        { sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
      } available-reps ;
  
-M: x86 %select-vector-reps { } ;
-
  M: x86 %add-vector ( dst src1 src2 rep -- )
      [ two-operand ] keep
      {
@@ -1044,8 +1043,9 @@ M: x86 %shr-vector-reps
      } available-reps ;
  
  M: x86 %integer>scalar drop MOVD ;
-
  M: x86 %scalar>integer drop MOVD ;
+M: x86 %vector>scalar %copy ;
+M: x86 %scalar>vector %copy ;
  
  M:: x86 %spill ( src rep dst -- ) dst src rep %copy ;
  M:: x86 %reload ( dst rep src -- ) dst src rep %copy ;
diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor

index ba045cda600239fd8089a64764e4788941be7b2a..89ab07384568f5b5c88540dd9baa6c6c47617d20 100644 (file)
--- a/basis/math/vectors/simd/functor/functor.factor
+++ b/basis/math/vectors/simd/functor/functor.factor
@@ -5,7 +5,7 @@ functors generalizations kernel literals locals math math.functions
  math.vectors math.vectors.private math.vectors.simd.intrinsics
  math.vectors.specialization parser prettyprint.custom sequences
  sequences.private strings words definitions macros cpu.architecture
-namespaces arrays quotations ;
+namespaces arrays quotations combinators sets ;
  QUALIFIED-WITH: math m
  IN: math.vectors.simd.functor
  
@@ -28,11 +28,23 @@ MACRO: simd-boa ( rep class -- simd-array )
  :: define-with-custom-inlining ( word rep class -- )
      word [
          drop
-        rep \ (simd-broadcast) supported-simd-op? [
+        rep \ (simd-vshuffle) supported-simd-op? [
              [ rep rep-coerce rep (simd-broadcast) class boa ]
          ] [ word def>> ] if
      ] "custom-inlining" set-word-prop ;
  
+: simd-nth-fast ( rep -- quot )
+    [ rep-components ] keep
+    '[ swap _ '[ _ _ (simd-select) ] 2array ] map-index
+    '[ swap >fixnum _ case ] ;
+
+: simd-nth-slow ( rep -- quot )
+    rep-component-type dup c-type-getter-boxer array-accessor ;
+
+MACRO: simd-nth ( rep -- x )
+    dup \ (simd-vshuffle) supported-simd-op?
+    [ simd-nth-fast ] [ simd-nth-slow ] if ;
+
  : boa-effect ( rep n -- effect )
      [ rep-components ] dip *
      [ CHAR: a + 1string ] map
@@ -45,8 +57,8 @@ MACRO: simd-boa ( rep class -- simd-array )
  
  ERROR: bad-schema schema ;
  
-: low-level-ops ( box-quot: ( inputs... simd-op -- outputs... ) -- alist )
-    [ simd-ops get ] dip '[
+: low-level-ops ( simd-ops alist -- alist' )
+    '[
          1quotation
          over word-schema _ ?at [ bad-schema ] unless
          [ ] 2sequence
@@ -73,21 +85,17 @@ ERROR: bad-schema schema ;
      ! in the general case.
      elt-class m:float = [ { distance [ v- norm ] } suffix ] when ;
  
-:: simd-vector-words ( class ctor rep vv->v vn->v vv->n v->v v->n -- )
-    rep rep-component-type c-type-boxed-class :> elt-class
-    class
-    elt-class
+TUPLE: simd class elt-class ops wrappers ctor rep ;
+
+: define-simd ( simd -- )
+    dup rep>> rep-component-type c-type-boxed-class >>elt-class
      {
-        { { +vector+ +vector+ -> +vector+ } vv->v }
-        { { +vector+ +scalar+ -> +vector+ } vn->v }
-        { { +vector+ +literal+ -> +vector+ } vn->v }
-        { { +vector+ +vector+ -> +scalar+ } vv->n }
-        { { +vector+ -> +vector+ } v->v }
-        { { +vector+ -> +scalar+ } v->n }
-        { { +vector+ -> +nonnegative+ } v->n }
-    } low-level-ops
-    rep supported-simd-ops
-    ctor elt-class high-level-ops assoc-union
+        [ class>> ]
+        [ elt-class>> ]
+        [ [ ops>> ] [ wrappers>> ] bi low-level-ops ]
+        [ rep>> supported-simd-ops ]
+        [ [ ctor>> ] [ elt-class>> ] bi high-level-ops assoc-union ]
+    } cleave
      specialize-vector-words ;
  
  :: define-simd-128-type ( class rep -- )
@@ -101,6 +109,11 @@ ERROR: bad-schema schema ;
          rep >>rep
      class typedef ;
  
+: (define-simd-128) ( simd -- )
+    simd-ops get >>ops
+    [ define-simd ]
+    [ [ class>> ] [ rep>> ] bi define-simd-128-type ] bi ;
+
  FUNCTOR: define-simd-128 ( T -- )
  
  N            [ 16 T heap-size /i ]
@@ -112,7 +125,6 @@ A-cast       DEFINES ${A}-cast
  >A           DEFINES >${A}
  A{           DEFINES ${A}{
  
-NTH          [ T dup c-type-getter-boxer array-accessor ]
  SET-NTH      [ T dup c-setter array-accessor ]
  
  A-rep        [ A name>> "-rep" append "cpu.architecture" lookup ]
@@ -131,7 +143,7 @@ M: A clone underlying>> clone \ A boa ; inline
  
  M: A length drop N ; inline
  
-M: A nth-unsafe underlying>> NTH call ; inline
+M: A nth-unsafe underlying>> A-rep simd-nth ; inline
  
  M: A set-nth-unsafe underlying>> SET-NTH call ; inline
  
@@ -193,8 +205,20 @@ INSTANCE: A sequence
  : A-v->n-op ( v quot -- n )
      [ underlying>> A-rep ] dip call ; inline
  
-\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-vv->n-op \ A-v->v-op \ A-v->n-op simd-vector-words
-\ A \ A-rep define-simd-128-type
+simd new
+    \ A >>class
+    \ A-with >>ctor
+    \ A-rep >>rep
+    {
+        { { +vector+ +vector+ -> +vector+ } A-vv->v-op }
+        { { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
+        { { +vector+ +literal+ -> +vector+ } A-vn->v-op }
+        { { +vector+ +vector+ -> +scalar+ } A-vv->n-op }
+        { { +vector+ -> +vector+ } A-v->v-op }
+        { { +vector+ -> +scalar+ } A-v->n-op }
+        { { +vector+ -> +nonnegative+ } A-v->n-op }
+    } >>wrappers
+(define-simd-128)
  
  PRIVATE>
  
@@ -223,6 +247,11 @@ SLOT: underlying2
          rep >>rep
      class typedef ;
  
+: (define-simd-256) ( simd -- )
+    simd-ops get { vshuffle hlshift hrshift } unique assoc-diff >>ops
+    [ define-simd ]
+    [ [ class>> ] [ rep>> ] bi define-simd-256-type ] bi ;
+
  FUNCTOR: define-simd-256 ( T -- )
  
  N            [ 32 T heap-size /i ]
@@ -332,7 +361,19 @@ INSTANCE: A sequence
  : A-v->n-op ( v1 combine-quot -- v2 )
      [ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline
  
-\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-vv->n-op \ A-v->v-op \ A-v->n-op simd-vector-words
-\ A \ A-rep define-simd-256-type
+simd new
+    \ A >>class
+    \ A-with >>ctor
+    \ A-rep >>rep
+    {
+        { { +vector+ +vector+ -> +vector+ } A-vv->v-op }
+        { { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
+        { { +vector+ +literal+ -> +vector+ } A-vn->v-op }
+        { { +vector+ +vector+ -> +scalar+ } A-vv->n-op }
+        { { +vector+ -> +vector+ } A-v->v-op }
+        { { +vector+ -> +scalar+ } A-v->n-op }
+        { { +vector+ -> +nonnegative+ } A-v->n-op }
+    } >>wrappers
+(define-simd-256)
  
  ;FUNCTOR
diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor

index 78301ae3a53d060cf7e1fe6dbe1b093f5405d786..522488d804cb6d52eac76092b44114abcd32cca7 100644 (file)
--- a/basis/math/vectors/simd/intrinsics/intrinsics.factor
+++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor
@@ -126,8 +126,6 @@ M: vector-rep supported-simd-op?
          { \ (simd-hlshift)   [ %horizontal-shl-vector-reps ] }
          { \ (simd-hrshift)   [ %horizontal-shr-vector-reps ] }
          { \ (simd-vshuffle)  [ %shuffle-vector-reps        ] }
-        { \ (simd-broadcast) [ %broadcast-vector-reps      ] }
          { \ (simd-gather-2)  [ %gather-vector-2-reps       ] }
          { \ (simd-gather-4)  [ %gather-vector-4-reps       ] }
-        { \ (simd-select)    [ %select-vector-reps         ] }
      } case member? ;
diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor

index 7a3ba727e06e1493bed0cf2c757ea676226faa9c..541e5b5c22a4922111864e53bfdf9ae0c46a23df 100644 (file)
--- a/basis/math/vectors/simd/simd-docs.factor
+++ b/basis/math/vectors/simd/simd-docs.factor
@@ -21,13 +21,13 @@ ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operat
  $nl
  "SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")."
  $nl
-"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD in missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
+"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD is missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "."
  $nl
  "SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "."
  $nl
  "SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "."
  $nl
-"SSE4.1 introduces " { $link vmin } " and " { $link vmax } " for all remaining integer types."
+"SSE4.1 introduces " { $link vmin } " and " { $link vmax } " for all remaining integer types, a faster instruction for " { $link v. } ", and a few other things."
  $nl
  "On PowerPC, or older x86 chips without SSE, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance."
  $nl
@@ -183,7 +183,7 @@ $nl
  ARTICLE: "math.vectors.simd.accuracy" "Numerical accuracy of SIMD primitives"
  "No guarantees are made that " { $vocab-link "math.vectors.simd" } " words will give identical results on different SSE versions, or between the hardware intrinsics and the software fallbacks."
  $nl
-"In particular, horizontal operations on " { $snippet "float-4" } " and " { $snippet "float-8" } " are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal opeartions include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ;
+"In particular, horizontal operations on " { $snippet "float-4" } " and " { $snippet "float-8" } " are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal operations include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ;
  
  ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
  "The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors."
diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor

index 588ef8381623e535bafd8db4080b67767f417489..3f43a21c1078a5272401171a264a0c53cc8cbd96 100644 (file)
--- a/basis/math/vectors/simd/simd-tests.factor
+++ b/basis/math/vectors/simd/simd-tests.factor
@@ -5,7 +5,8 @@ math.vectors.simd.private prettyprint random sequences system
  tools.test vocabs assocs compiler.cfg.debugger words
  locals math.vectors.specialization combinators cpu.architecture
  math.vectors.simd.intrinsics namespaces byte-arrays alien
-specialized-arrays classes.struct eval ;
+specialized-arrays classes.struct eval classes.algebra sets
+quotations ;
  QUALIFIED-WITH: alien.c-types c
  SPECIALIZED-ARRAY: c:float
  SIMD: c:char
@@ -34,6 +35,20 @@ IN: math.vectors.simd.tests
  
  [ V{ float-4 } ] [ [ { float-4 float-4 } declare v+ ] final-classes ] unit-test
  
+[ V{ float } ] [ [ { float-4 } declare second ] final-classes ] unit-test
+
+[ V{ int-4 } ] [ [ { int-4 int-4 } declare v+ ] final-classes ] unit-test
+
+[ t ] [ [ { int-4 } declare second ] final-classes first integer class<= ] unit-test
+
+[ V{ longlong-2 } ] [ [ { longlong-2 longlong-2 } declare v+ ] final-classes ] unit-test
+
+[ V{ integer } ] [ [ { longlong-2 } declare second ] final-classes ] unit-test
+
+[ V{ int-8 } ] [ [ { int-8 int-8 } declare v+ ] final-classes ] unit-test
+
+[ t ] [ [ { int-8 } declare second ] final-classes first integer class<= ] unit-test
+
  ! Test puns; only on x86
  cpu x86? [
      [ double-2{ 4 1024 } ] [
@@ -78,9 +93,10 @@ CONSTANT: simd-classes
  : boa-ctors ( -- seq )
      simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ;
  
-: check-optimizer ( seq inputs quot eq-quot -- )
+: check-optimizer ( seq quot eq-quot -- failures )
      '[
          @
+        [ dup [ class ] { } map-as ] dip '[ _ declare @ ]
          {
              [ "print-mr" get [ nip test-mr mr. ] [ 2drop ] if ]
              [ "print-checks" get [ [ . ] bi@ ] [ 2drop ] if ]
@@ -104,7 +120,7 @@ CONSTANT: simd-classes
  
  [ { } ] [
      with-ctors [
-        [ 1000 random '[ _ ] ] dip '[ { fixnum } declare _ execute ]
+        [ 1000 random '[ _ ] ] dip '[ _ execute ]
      ] [ = ] check-optimizer
  ] unit-test
  
@@ -112,10 +128,8 @@ CONSTANT: simd-classes
  
  [ { } ] [
      boa-ctors [
-        dup stack-effect in>> length
-        [ nip [ 1000 random ] [ ] replicate-as ]
-        [ fixnum <array> swap '[ _ declare _ execute ] ]
-        2bi
+        [ stack-effect in>> length [ 1000 random ] [ ] replicate-as ] keep
+        '[ _ execute ]
      ] [ = ] check-optimizer
  ] unit-test
  
@@ -126,31 +140,22 @@ CONSTANT: simd-classes
  
  :: check-vector-op ( word inputs class elt-class -- inputs quot )
      inputs [
-        [
-            {
-                { +vector+ [ class random-vector ] }
-                { +scalar+ [ 1000 random elt-class float = [ >float ] when ] }
-            } case
-        ] [ ] map-as
-    ] [
-        [
-            {
-                { +vector+ [ class ] }
-                { +scalar+ [ elt-class ] }
-            } case
-        ] map
-    ] bi
-    word '[ _ declare _ execute ] ;
+        {
+            { +vector+ [ class random-vector ] }
+            { +scalar+ [ 1000 random elt-class float = [ >float ] when ] }
+        } case
+    ] [ ] map-as
+    word '[ _ execute ] ;
  
  : remove-float-words ( alist -- alist' )
-    [ drop { vsqrt n/v v/n v/ normalize } member? not ] assoc-filter ;
+    { vsqrt n/v v/n v/ normalize } unique assoc-diff ;
  
  : remove-integer-words ( alist -- alist' )
-    [ drop { vlshift vrshift } member? not ] assoc-filter ;
+    { vlshift vrshift } unique assoc-diff ;
  
  : remove-special-words ( alist -- alist' )
      ! These have their own tests later
-    [ drop { hlshift hrshift vshuffle } member? not ] assoc-filter ;
+    { hlshift hrshift vshuffle } unique assoc-diff ;
  
  : ops-to-check ( elt-class -- alist )
      [ vector-words >alist ] dip
@@ -189,13 +194,89 @@ simd-classes&reps [
      [ [ { } ] ] dip first3 '[ _ _ _ check-vector-ops ] unit-test
  ] each
  
-! Other regressions
-[ 8000000 ] [
-    int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 }
-    [ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
-] unit-test
+"== Checking shifts and permutations" print
+
+[ int-4{ 256 512 1024 2048 } ]
+[ int-4{ 1 2 4 8 } 1 hlshift ] unit-test
+
+[ int-4{ 256 512 1024 2048 } ]
+[ int-4{ 1 2 4 8 } [ { int-4 } declare 1 hlshift ] compile-call ] unit-test
+
+[ int-4{ 1 2 4 8 } ]
+[ int-4{ 256 512 1024 2048 } 1 hrshift ] unit-test
+
+[ int-4{ 1 2 4 8 } ]
+[ int-4{ 256 512 1024 2048 } [ { int-4 } declare 1 hrshift ] compile-call ] unit-test
+
+! Shuffles
+: shuffles-for ( n -- shuffles )
+    {
+        { 2 [
+            {
+                { 0 1 }
+                { 1 1 }
+                { 1 0 }
+                { 0 0 }
+            }
+        ] }
+        { 4 [
+            {
+                { 1 2 3 0 }
+                { 0 1 2 3 }
+                { 1 1 2 2 }
+                { 0 0 1 1 }
+                { 2 2 3 3 }
+                { 0 1 0 1 }
+                { 2 3 2 3 }
+                { 0 0 2 2 }
+                { 1 1 3 3 }
+                { 0 1 0 1 }
+                { 2 2 3 3 }
+            }
+        ] }
+        { 8 [
+            4 shuffles-for
+            4 shuffles-for
+            [ [ 4 + ] map ] map
+            [ append ] 2map
+        ] }
+        [ dup '[ _ random ] replicate 1array ]
+    } case ;
+
+simd-classes [
+    [ [ { } ] ] dip
+    [ new length shuffles-for ] keep
+    '[
+        _ [ [ _ new [ length iota ] keep like 1quotation ] dip '[ _ vshuffle ] ]
+        [ = ] check-optimizer
+    ] unit-test
+] each
+
+"== Checking element access" print
+
+! Test element access -- it should box bignums for int-4 on x86
+: test-accesses ( seq -- failures )
+    [ length >array ] keep
+    '[ [ _ 1quotation ] dip '[ _ swap nth ] ] [ = ] check-optimizer ; inline
+
+[ { } ] [ float-4{ 1.0 2.0 3.0 4.0 } test-accesses ] unit-test
+[ { } ] [ int-4{ HEX: 7fffffff 3 4 -8 } test-accesses ] unit-test
+[ { } ] [ uint-4{ HEX: ffffffff 2 3 4 } test-accesses ] unit-test
+
+[ { } ] [ double-2{ 1.0 2.0 } test-accesses ] unit-test
+[ { } ] [ longlong-2{ 1 2 } test-accesses ] unit-test
+[ { } ] [ ulonglong-2{ 1 2 } test-accesses ] unit-test
+
+[ { } ] [ float-8{ 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 } test-accesses ] unit-test
+[ { } ] [ int-8{ 1 2 3 4 5 6 7 8 } test-accesses ] unit-test
+[ { } ] [ uint-8{ 1 2 3 4 5 6 7 8 } test-accesses ] unit-test
+
+[ { } ] [ double-4{ 1.0 2.0 3.0 4.0 } test-accesses ] unit-test
+[ { } ] [ longlong-4{ 1 2 3 4 } test-accesses ] unit-test
+[ { } ] [ ulonglong-4{ 1 2 3 4 } test-accesses ] unit-test
+
+"== Checking alien operations" print
  
-! Vector alien intrinsics
  [ float-4{ 1 2 3 4 } ] [
      [
          float-4{ 1 2 3 4 }
@@ -259,60 +340,12 @@ STRUCT: simd-struct
      ] compile-call
  ] unit-test
  
-[ ] [ char-16 new 1array stack. ] unit-test
-
-[ int-4{ 256 512 1024 2048 } ]
-[ int-4{ 1 2 4 8 } 1 hlshift ] unit-test
-
-[ int-4{ 256 512 1024 2048 } ]
-[ int-4{ 1 2 4 8 } [ { int-4 } declare 1 hlshift ] compile-call ] unit-test
-
-[ int-4{ 1 2 4 8 } ]
-[ int-4{ 256 512 1024 2048 } 1 hrshift ] unit-test
-
-[ int-4{ 1 2 4 8 } ]
-[ int-4{ 256 512 1024 2048 } [ { int-4 } declare 1 hrshift ] compile-call ] unit-test
-
-! Shuffles
-: test-shuffle ( input shuffle -- failures )
-    [ dup class 1array ] dip
-    '[ _ declare _ vshuffle ]
-    [ call ] [ compile-call ] 2bi = not ; inline
-
-: shuffles-for ( seq -- shuffles )
-    length {
-        { 2 [
-            {
-                { 0 1 }
-                { 1 1 }
-                { 1 0 }
-                { 0 0 }
-            }
-        ] }
-        { 4 [
-            {
-                { 1 2 3 0 }
-                { 0 1 2 3 }
-                { 1 1 2 2 }
-                { 0 0 1 1 }
-                { 2 2 3 3 }
-                { 0 1 0 1 }
-                { 2 3 2 3 }
-                { 0 0 2 2 }
-                { 1 1 3 3 }
-                { 0 1 0 1 }
-                { 2 2 3 3 }
-            }
-        ] }
-    } case ;
-
-: test-shuffles ( input -- failures )
-    dup shuffles-for [ test-shuffle ] with filter ; inline
+"== Misc tests" print
  
-[ { } ] [ float-4{ 1.0 2.0 3.0 4.0 } test-shuffles ] unit-test
-[ { } ] [ int-4{ 1 2 3 4 } test-shuffles ] unit-test
-[ { } ] [ uint-4{ 1 2 3 4 } test-shuffles ] unit-test
+[ ] [ char-16 new 1array stack. ] unit-test
  
-[ { } ] [ double-2{ 1.0 2.0 } test-shuffles ] unit-test
-[ { } ] [ longlong-2{ 1 2 } test-shuffles ] unit-test
-[ { } ] [ ulonglong-2{ 1 2 } test-shuffles ] unit-test
+! Other regressions
+[ 8000000 ] [
+    int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 }
+    [ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
+] unit-test
diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor

index 230f43029a761b4d5f10df40ef9329789418ef7f..af04e283f0d48b586d03a8db8abc3d56747bf793 100644 (file)
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -2,7 +2,8 @@
  ! See http://factorcode.org/license.txt for BSD license.
  USING: alien.c-types combinators fry kernel parser math math.parser
  math.vectors.simd.functor sequences splitting vocabs.generated
-vocabs.loader vocabs.parser words accessors ;
+vocabs.loader vocabs.parser words accessors vocabs compiler.units
+definitions ;
  QUALIFIED-WITH: alien.c-types c
  IN: math.vectors.simd
  
@@ -17,6 +18,12 @@ ERROR: bad-base-type type ;
      dup { c:char c:uchar c:short c:ushort c:int c:uint c:longlong c:ulonglong c:float c:double } memq?
      [ bad-base-type ] unless ;
  
+: forget-instances ( -- )
+    [
+        "math.vectors.simd.instances" child-vocabs
+        [ forget-vocab ] each
+    ] with-compilation-unit ;
+
  PRIVATE>
  
  : define-simd-vocab ( type -- vocab )
@@ -29,3 +36,4 @@ PRIVATE>
  
  SYNTAX: SIMD:
      scan-word define-simd-vocab use-vocab ;
+
author	Slava Pestov <slava@slava-pestovs-macbook-pro.local>
	Tue, 29 Sep 2009 09:46:38 +0000 (04:46 -0500)
committer	Slava Pestov <slava@slava-pestovs-macbook-pro.local>
	Tue, 29 Sep 2009 09:48:11 +0000 (04:48 -0500)
basis/compiler/cfg/instructions/instructions.factor		patch \| blob \| history
basis/compiler/cfg/intrinsics/intrinsics.factor		patch \| blob \| history
basis/compiler/cfg/intrinsics/simd/simd.factor		patch \| blob \| history
basis/compiler/codegen/codegen.factor		patch \| blob \| history
basis/compiler/tree/propagation/simd/simd.factor		patch \| blob \| history
basis/cpu/architecture/architecture.factor		patch \| blob \| history
basis/cpu/ppc/ppc.factor		patch \| blob \| history
basis/cpu/x86/x86.factor		patch \| blob \| history
basis/math/vectors/simd/functor/functor.factor		patch \| blob \| history
basis/math/vectors/simd/intrinsics/intrinsics.factor		patch \| blob \| history
basis/math/vectors/simd/simd-docs.factor		patch \| blob \| history
basis/math/vectors/simd/simd-tests.factor		patch \| blob \| history
basis/math/vectors/simd/simd.factor		patch \| blob \| history