From 47d87633406c48f1e7af4f2b5f6cdf21896ea35b Mon Sep 17 00:00:00 2001
From: Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Date: Sun, 20 Sep 2009 16:48:17 -0500
Subject: [PATCH] More integer SIMD work - move generated vocab support from
 specialized-arrays to vocabs.generated - add fuzz testing to
 math.vectors.simd - add alien type support for integer SIMD vectors - SIMD:
 parsing word generates a SIMD type, instead of pre-generating them all in
 math.vectors.simd

---
 .../tree/propagation/simd/simd.factor         |   1 +
 basis/cpu/x86/x86.factor                      |   3 +-
 .../vectors/simd/alien/alien-tests.factor     |  70 ---
 basis/math/vectors/simd/alien/alien.factor    |  42 --
 .../math/vectors/simd/functor/functor.factor  |  60 +-
 basis/math/vectors/simd/simd-docs.factor      |  67 ++-
 basis/math/vectors/simd/simd-tests.factor     | 534 +++++++-----------
 basis/math/vectors/simd/simd.factor           |  33 +-
 .../specialized-arrays.factor                 |  11 +-
 .../specialized-vectors.factor                |   2 +-
 .../alien => vocabs/generated}/authors.txt    |   0
 basis/vocabs/generated/generated.factor       |  13 +
 extra/benchmark/nbody-simd/nbody-simd.factor  |   1 +
 .../raytracer-simd/raytracer-simd.factor      |   1 +
 extra/benchmark/simd-1/simd-1.factor          |   1 +
 15 files changed, 342 insertions(+), 497 deletions(-)
 delete mode 100644 basis/math/vectors/simd/alien/alien-tests.factor
 delete mode 100644 basis/math/vectors/simd/alien/alien.factor
 rename basis/{math/vectors/simd/alien => vocabs/generated}/authors.txt (100%)
 create mode 100644 basis/vocabs/generated/generated.factor
diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor
index 3baa7cdcbf..42c1f35617 100644
--- a/basis/compiler/tree/propagation/simd/simd.factor
+++ b/basis/compiler/tree/propagation/simd/simd.factor
@@ -24,6 +24,7 @@ IN: compiler.tree.propagation.simd
         literal>> scalar-rep-of {
             { float-rep [ float ] }
             { double-rep [ float ] }
+            { int-rep [ integer ] }
         } case
     ] [ drop real ] if
     <class-info>
diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor
index 4d80862ed3..322b123d99 100644
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@@ -417,8 +417,7 @@ M: x86 %horizontal-add-vector ( dst src rep -- )
 
 M: x86 %horizontal-add-vector-reps
     {
-        { sse? { float-4-rep } }
-        { sse2? { double-2-rep short-8-rep uchar-16-rep } }
+        { sse3? { float-4-rep double-2-rep } }
     } available-reps ;
 
 M: x86 %unbox-alien ( dst src -- )
diff --git a/basis/math/vectors/simd/alien/alien-tests.factor b/basis/math/vectors/simd/alien/alien-tests.factor
deleted file mode 100644
index 87540dd9a5..0000000000
--- a/basis/math/vectors/simd/alien/alien-tests.factor
+++ /dev/null
@@ -1,70 +0,0 @@
-USING: cpu.architecture math.vectors.simd
-math.vectors.simd.intrinsics accessors math.vectors.simd.alien
-kernel classes.struct tools.test compiler sequences byte-arrays
-alien math kernel.private specialized-arrays combinators ;
-SPECIALIZED-ARRAY: float
-IN: math.vectors.simd.alien.tests
-
-! Vector alien intrinsics
-[ float-4{ 1 2 3 4 } ] [
-    [
-        float-4{ 1 2 3 4 }
-        underlying>> 0 float-4-rep alien-vector
-    ] compile-call float-4 boa
-] unit-test
-
-[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
-    16 [ 1 ] B{ } replicate-as 16 <byte-array>
-    [
-        0 [
-            { byte-array c-ptr fixnum } declare
-            float-4-rep set-alien-vector
-        ] compile-call
-    ] keep
-] unit-test
-
-[ float-array{ 1 2 3 4 } ] [
-    [
-        float-array{ 1 2 3 4 } underlying>>
-        float-array{ 4 3 2 1 } clone
-        [ underlying>> 0 float-4-rep set-alien-vector ] keep
-    ] compile-call
-] unit-test
-
-STRUCT: simd-struct
-{ x float-4 }
-{ y double-2 }
-{ z double-4 }
-{ w float-8 } ;
-
-[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
-
-[
-    float-4{ 1 2 3 4 }
-    double-2{ 2 1 }
-    double-4{ 4 3 2 1 }
-    float-8{ 1 2 3 4 5 6 7 8 }
-] [
-    simd-struct <struct>
-    float-4{ 1 2 3 4 } >>x
-    double-2{ 2 1 } >>y
-    double-4{ 4 3 2 1 } >>z
-    float-8{ 1 2 3 4 5 6 7 8 } >>w
-    { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
-] unit-test
-
-[
-    float-4{ 1 2 3 4 }
-    double-2{ 2 1 }
-    double-4{ 4 3 2 1 }
-    float-8{ 1 2 3 4 5 6 7 8 }
-] [
-    [
-        simd-struct <struct>
-        float-4{ 1 2 3 4 } >>x
-        double-2{ 2 1 } >>y
-        double-4{ 4 3 2 1 } >>z
-        float-8{ 1 2 3 4 5 6 7 8 } >>w
-        { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
-    ] compile-call
-] unit-test
diff --git a/basis/math/vectors/simd/alien/alien.factor b/basis/math/vectors/simd/alien/alien.factor
deleted file mode 100644
index 1486f6d0af..0000000000
--- a/basis/math/vectors/simd/alien/alien.factor
+++ /dev/null
@@ -1,42 +0,0 @@
-! Copyright (C) 2009 Slava Pestov.
-! See http://factorcode.org/license.txt for BSD license.
-USING: alien accessors alien.c-types byte-arrays compiler.units
-cpu.architecture locals kernel math math.vectors.simd
-math.vectors.simd.intrinsics ;
-IN: math.vectors.simd.alien
-
-:: define-simd-128-type ( class rep -- )
-    <c-type>
-        byte-array >>class
-        class >>boxed-class
-        [ rep alien-vector class boa ] >>getter
-        [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
-        16 >>size
-        8 >>align
-        rep >>rep
-    class name>> typedef ;
-
-:: define-simd-256-type ( class rep -- )
-    <c-type>
-        class >>class
-        class >>boxed-class
-        [
-            [ rep alien-vector ]
-            [ 16 + >fixnum rep alien-vector ] 2bi
-            class boa
-        ] >>getter
-        [
-            [ [ underlying1>> ] 2dip rep set-alien-vector ]
-            [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
-            3bi
-        ] >>setter
-        32 >>size
-        8 >>align
-        rep >>rep
-    class name>> typedef ;
-[
-    float-4 float-4-rep define-simd-128-type
-    double-2 double-2-rep define-simd-128-type
-    float-8 float-4-rep define-simd-256-type
-    double-4 double-2-rep define-simd-256-type
-] with-compilation-unit
diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor
index a97dc192be..57126f1bf8 100644
--- a/basis/math/vectors/simd/functor/functor.factor
+++ b/basis/math/vectors/simd/functor/functor.factor
@@ -5,6 +5,7 @@ effects fry functors generalizations kernel literals locals
 math math.functions math.vectors math.vectors.simd.intrinsics
 math.vectors.specialization parser prettyprint.custom sequences
 sequences.private strings words definitions macros cpu.architecture ;
+QUALIFIED-WITH: math m
 IN: math.vectors.simd.functor
 
 ERROR: bad-length got expected ;
@@ -51,11 +52,10 @@ MACRO: simd-boa ( rep class -- simd-array )
     '[ nip _ swap supported-simd-op? ] assoc-filter
     '[ drop _ key? ] assoc-filter ;
 
-:: high-level-ops ( ctor -- assoc )
+:: high-level-ops ( ctor elt-class -- assoc )
     ! Some SIMD operations are defined in terms of others.
     {
         { vneg [ [ dup v- ] keep v- ] }
-        { v. [ v* sum ] }
         { n+v [ [ ctor execute ] dip v+ ] }
         { v+n [ ctor execute v+ ] }
         { n-v [ [ ctor execute ] dip v- ] }
@@ -67,16 +67,36 @@ MACRO: simd-boa ( rep class -- simd-array )
         { norm-sq [ dup v. assert-positive ] }
         { norm [ norm-sq sqrt ] }
         { normalize [ dup norm v/n ] }
-        { distance [ v- norm ] }
-    } ;
+    }
+    ! To compute dot product and distance with integer vectors, we
+    ! have to do things less efficiently, with integer overflow checks,
+    ! in the general case.
+    elt-class m:float = [
+        {
+            { distance [ v- norm ] }
+            { v. [ v* sum ] }
+        } append
+    ] when ;
 
 :: simd-vector-words ( class ctor rep assoc -- )
+    rep rep-component-type c-type-boxed-class :> elt-class
     class
-    rep rep-component-type c-type-boxed-class
+    elt-class
     assoc rep supported-simd-ops
-    ctor high-level-ops assoc-union
+    ctor elt-class high-level-ops assoc-union
     specialize-vector-words ;
 
+:: define-simd-128-type ( class rep -- )
+    <c-type>
+        byte-array >>class
+        class >>boxed-class
+        [ rep alien-vector class boa ] >>getter
+        [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
+        16 >>size
+        8 >>align
+        rep >>rep
+    class typedef ;
+
 FUNCTOR: define-simd-128 ( T -- )
 
 N            [ 16 T heap-size /i ]
@@ -159,11 +179,35 @@ INSTANCE: A sequence
     { sum [ [ (simd-sum) ] \ A-v->n-op execute ] }
 } simd-vector-words
 
+\ A \ A-rep define-simd-128-type
+
 PRIVATE>
 
 ;FUNCTOR
 
 ! Synthesize 256-bit vectors from a pair of 128-bit vectors
+SLOT: underlying1
+SLOT: underlying2
+
+:: define-simd-256-type ( class rep -- )
+    <c-type>
+        class >>class
+        class >>boxed-class
+        [
+            [ rep alien-vector ]
+            [ 16 + >fixnum rep alien-vector ] 2bi
+            class boa
+        ] >>getter
+        [
+            [ [ underlying1>> ] 2dip rep set-alien-vector ]
+            [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
+            3bi
+        ] >>setter
+        32 >>size
+        8 >>align
+        rep >>rep
+    class typedef ;
+
 FUNCTOR: define-simd-256 ( T -- )
 
 N            [ 32 T heap-size /i ]
@@ -235,7 +279,7 @@ M: A pprint* pprint-object ;
 
 : A-boa ( ... -- simd-array )
     [ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@
-    \ A boa ;
+    \ A boa ; inline
 
 \ A-rep 2 boa-effect \ A-boa set-stack-effect
 
@@ -260,4 +304,6 @@ INSTANCE: A sequence
     { sum [ [ (simd-v+) ] [ (simd-sum) ] \ A-v->n-op execute ] }
 } simd-vector-words
 
+\ A \ A-rep define-simd-256-type
+
 ;FUNCTOR
diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor
index d6131b3a71..42512feb6f 100644
--- a/basis/math/vectors/simd/simd-docs.factor
+++ b/basis/math/vectors/simd/simd-docs.factor
@@ -17,23 +17,45 @@ $nl
 "There should never be any reason to use " { $link "math.vectors.simd.intrinsics" } " directly, but they too have a straightforward, but lower-level, interface." ;
 
 ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations"
-"At present, the SIMD support makes use of SSE2 and a few SSE3 instructions on x86 CPUs."
+"At present, the SIMD support makes use of SSE, SSE2 and a few SSE3 instructions on x86 CPUs."
 $nl
-"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words, decreasing performance."
+"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")."
 $nl
-"On PowerPC, or older x86 chips without SSE2, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance."
+"SSE2 introduces double-precision and integer SIMD."
+$nl
+"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words."
+$nl
+"On PowerPC, or older x86 chips without SSE, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance."
 $nl
 "The primities in the " { $vocab-link "math.vectors.simd.intrinsics" } " vocabulary do not have software fallbacks, but they should not be called directly in any case." ;
 
 ARTICLE: "math.vectors.simd.types" "SIMD vector types"
-"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type such as " { $snippet "float" } " or " { $snippet "double" } ", and " { $snippet "count" } " is a vector dimension, such as 2, 4, or 8."
-$nl
-"The following vector types are defined:"
-{ $subsection float-4 }
-{ $subsection double-2 }
-{ $subsection float-8 }
-{ $subsection double-4 }
-"For each vector type, several words are defined:"
+"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension."
+$nl
+"To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":"
+{ $subsection POSTPONE: SIMD: }
+"The following vector types are supported:"
+{ $code
+    "char-16"
+    "uchar-16"
+    "char-32"
+    "uchar-32"
+    "short-8"
+    "ushort-8"
+    "short-16"
+    "ushort-16"
+    "int-4"
+    "uint-4"
+    "int-8"
+    "uint-8"
+    "float-4"
+    "float-8"
+    "double-2"
+    "double-4"
+} ;
+
+ARTICLE: "math.vectors.simd.words" "SIMD vector words"
+"For each SIMD vector type, several words are defined:"
 { $table
     { "Word" "Stack effect" "Description" }
     { { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" }
@@ -41,8 +63,6 @@ $nl
     { { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" }
     { { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" }
 }
-"The " { $link float-4 } " and " { $link double-2 } " types correspond to 128-bit vector registers. The " { $link float-8 } " and " { $link double-4 } " types are not directly supported in hardware, and instead unbox to a pair of 128-bit vector registers."
-$nl
 "To actually perform vector arithmetic on SIMD vectors, use " { $link "math-vectors" } " words."
 { $see-also "c-types-specs" } ;
 
@@ -68,6 +88,8 @@ SYMBOLS: x y ;
 { $code
 <" USING: compiler.tree.debugger kernel.private
 math.vectors math.vectors.simd ;
+SIMD: float-4
+IN: simd-demo
 
 : interpolate ( v a b -- w )
     { float-4 float-4 float-4 } declare
@@ -80,6 +102,8 @@ $nl
 { $code
 <" USING: compiler.tree.debugger hints
 math.vectors math.vectors.simd ;
+SIMD: float-4
+IN: simd-demo
 
 : interpolate ( v a b -- w )
     [ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ;
@@ -94,6 +118,7 @@ $nl
 "In the " { $snippet "interpolate" } " word, there is still a call to the " { $link <tuple-boa> } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:"
 { $code
 <" USING: compiler.tree.debugger math.vectors math.vectors.simd ;
+SIMD: float-4
 IN: simd-demo
 
 STRUCT: actor
@@ -151,21 +176,23 @@ $nl
 "For the most part, the above primitives correspond directly to vector arithmetic words. They take a representation parameter, which is one of the singleton members of the " { $link vector-rep } " union in the " { $vocab-link "cpu.architecture" } " vocabulary." ;
 
 ARTICLE: "math.vectors.simd.alien" "SIMD data in struct classes"
-"Struct classes may contain fields which store SIMD data; use one of the following C type names:"
-{ $code
-<" float-4
-double-2
-float-8
-double-4"> }
-"Passing SIMD data as function parameters is not yet supported." ;
+"Struct classes may contain fields which store SIMD data; for each SIMD vector type listed in " { $snippet "math.vectors.simd.types" } " there is a C type with the same name."
+$nl
+"Only SIMD struct fields are allowed at the moment; passing SIMD data as function parameters is not yet supported." ;
 
 ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
 "The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors."
 { $subsection "math.vectors.simd.intro" }
 { $subsection "math.vectors.simd.types" }
+{ $subsection "math.vectors.simd.words" }
 { $subsection "math.vectors.simd.support" }
 { $subsection "math.vectors.simd.efficiency" }
 { $subsection "math.vectors.simd.alien" }
 { $subsection "math.vectors.simd.intrinsics" } ;
 
+HELP: SIMD:
+{ $syntax "SIMD: type-length" }
+{ $values { "type" "a scalar C type" } { "length" "a vector dimension" } }
+{ $description "Brings a SIMD array for holding " { $snippet "length" } " values of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
+
 ABOUT: "math.vectors.simd"
diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor
index f5318c341f..39afe3cb03 100644
--- a/basis/math/vectors/simd/simd-tests.factor
+++ b/basis/math/vectors/simd/simd-tests.factor
@@ -1,8 +1,30 @@
+USING: accessors arrays classes compiler compiler.tree.debugger
+effects fry io kernel kernel.private math math.functions
+math.private math.vectors math.vectors.simd
+math.vectors.simd.private prettyprint random sequences system
+tools.test vocabs assocs compiler.cfg.debugger words
+locals math.vectors.specialization combinators cpu.architecture
+math.vectors.simd.intrinsics namespaces byte-arrays alien
+specialized-arrays classes.struct ;
+FROM: alien.c-types => c-type-boxed-class ;
+SPECIALIZED-ARRAY: float
+SIMD: char-16
+SIMD: uchar-16
+SIMD: char-32
+SIMD: uchar-32
+SIMD: short-8
+SIMD: ushort-8
+SIMD: short-16
+SIMD: ushort-16
+SIMD: int-4
+SIMD: uint-4
+SIMD: int-8
+SIMD: uint-8
+SIMD: float-4
+SIMD: float-8
+SIMD: double-2
+SIMD: double-4
 IN: math.vectors.simd.tests
-USING: math math.vectors.simd math.vectors.simd.private
-math.vectors math.functions math.private kernel.private compiler
-sequences tools.test compiler.tree.debugger accessors kernel
-system ;
 
 [ float-4{ 0 0 0 0 } ] [ float-4 new ] unit-test
 
@@ -12,353 +34,191 @@ system ;
 
 [ V{ float } ] [ [ { float-4 } declare norm ] final-classes ] unit-test
 
-[ float-4{ 12 12 12 12 } ] [
-    12 [ float-4-with ] compile-call
-] unit-test
-
-[ float-4{ 1 2 3 4 } ] [
-    1 2 3 4 [ float-4-boa ] compile-call
-] unit-test
-
-[ float-4{ 11 22 33 44 } ] [
-    float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
-    [ { float-4 float-4 } declare v+ ] compile-call
-] unit-test
-
-[ float-4{ -9 -18 -27 -36 } ] [
-    float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
-    [ { float-4 float-4 } declare v- ] compile-call
-] unit-test
-
-[ float-4{ 10 40 90 160 } ] [
-    float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
-    [ { float-4 float-4 } declare v* ] compile-call
-] unit-test
-
-[ float-4{ 10 100 1000 10000 } ] [
-    float-4{ 100 2000 30000 400000 } float-4{ 10 20 30 40 }
-    [ { float-4 float-4 } declare v/ ] compile-call
-] unit-test
-
-[ float-4{ -10 -20 -30 -40 } ] [
-    float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
-    [ { float-4 float-4 } declare vmin ] compile-call
-] unit-test
-
-[ float-4{ 10 20 30 40 } ] [
-    float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
-    [ { float-4 float-4 } declare vmax ] compile-call
-] unit-test
-
-[ 10.0 ] [
-    float-4{ 1 2 3 4 }
-    [ { float-4 } declare sum ] compile-call
-] unit-test
+! Test puns; only on x86
+cpu x86? [
+    [ double-2{ 4 1024 } ] [
+        float-4{ 0 1 0 2 }
+        [ { float-4 } declare dup v+ underlying>> double-2 boa dup v+ ] compile-call
+    ] unit-test
+    
+    [ 33.0 ] [
+        double-2{ 1 2 } double-2{ 10 20 }
+        [ { double-2 double-2 } declare v+ underlying>> 3.0 float* ] compile-call
+    ] unit-test
+] when
 
-[ 13.0 ] [
-    float-4{ 1 2 3 4 }
-    [ { float-4 } declare sum 3.0 + ] compile-call
-] unit-test
+! Fuzz testing
+CONSTANT: simd-classes
+    {
+        char-16
+        uchar-16
+        char-32
+        uchar-32
+        short-8
+        ushort-8
+        short-16
+        ushort-16
+        int-4
+        uint-4
+        int-8
+        uint-8
+        float-4
+        float-8
+        double-2
+        double-4
+    }
+
+: with-ctors ( -- seq )
+    simd-classes [ [ name>> "-with" append ] [ vocabulary>> ] bi lookup ] map ;
+
+: boa-ctors ( -- seq )
+    simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ;
+
+: check-optimizer ( seq inputs quot -- )
+    [
+        [ "print-mr" get [ nip test-mr mr. ] [ 2drop ] if ]
+        [ [ call ] dip call ]
+        [ [ call ] dip compile-call ] 2tri = not
+    ] compose filter ; inline
+
+"== Checking -new constructors" print
+
+[ { } ] [
+    simd-classes [ [ [ ] ] dip '[ _ new ] ] check-optimizer
+] unit-test
+
+[ { } ] [
+    simd-classes [ '[ _ new ] compile-call [ zero? ] all? not ] filter
+] unit-test
+
+"== Checking -with constructors" print
+
+[ { } ] [
+    with-ctors [
+        [ 1000 random '[ _ ] ] dip '[ { fixnum } declare _ execute ]
+    ] check-optimizer
+] unit-test
+
+"== Checking -boa constructors" print
+
+[ { } ] [
+    boa-ctors [
+        dup stack-effect in>> length
+        [ nip [ 1000 random ] [ ] replicate-as ]
+        [ fixnum <array> swap '[ _ declare _ execute ] ]
+        2bi
+    ] check-optimizer
+] unit-test
+
+"== Checking vector operations" print
+
+: random-vector ( class -- vec )
+    new [ drop 1000 random ] map ;
+
+:: check-vector-op ( word inputs class elt-class -- inputs quot )
+    inputs [
+        [
+            {
+                { +vector+ [ class random-vector ] }
+                { +scalar+ [ 1000 random elt-class float = [ >float ] when ] }
+            } case
+        ] [ ] map-as
+    ] [
+        [
+            {
+                { +vector+ [ class ] }
+                { +scalar+ [ elt-class ] }
+            } case
+        ] map
+    ] bi
+    word '[ _ declare _ execute ] ;
+
+: ops-to-check ( elt-class -- alist )
+    [ vector-words >alist ] dip float = [
+        [ drop { n/v v/n v/ normalize } member? not ] assoc-filter
+    ] unless ;
+
+: check-vector-ops ( class elt-class -- )
+    [ nip ops-to-check ] 2keep
+    '[ first2 inputs _ _ check-vector-op ] check-optimizer ; inline
+
+: simd-classes&reps ( -- alist )
+    simd-classes [
+        dup name>> [ "float" head? ] [ "double" head? ] bi or
+        float fixnum ?
+    ] { } map>assoc ;
+
+simd-classes&reps [
+    [ [ { } ] ] 2dip '[ _ _ check-vector-ops ] unit-test
+] assoc-each
+
+! Other regressions
+[ 8000000 ] [
+    int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 }
+    [ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
+] unit-test
 
-[ 8.0 ] [
-    float-4{ 1 2 3 4 } float-4{ 2 0 2 0 }
-    [ { float-4 float-4 } declare v. ] compile-call
-] unit-test
 
-[ float-4{ 5 10 15 20 } ] [
-    5.0 float-4{ 1 2 3 4 }
-    [ { float float-4 } declare n*v ] compile-call
+! Vector alien intrinsics
+[ float-4{ 1 2 3 4 } ] [
+    [
+        float-4{ 1 2 3 4 }
+        underlying>> 0 float-4-rep alien-vector
+    ] compile-call float-4 boa
 ] unit-test
 
-[ float-4{ 5 10 15 20 } ] [
-    float-4{ 1 2 3 4 } 5.0
-    [ { float float-4 } declare v*n ] compile-call
+[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
+    16 [ 1 ] B{ } replicate-as 16 <byte-array>
+    [
+        0 [
+            { byte-array c-ptr fixnum } declare
+            float-4-rep set-alien-vector
+        ] compile-call
+    ] keep
 ] unit-test
 
-[ float-4{ 10 5 2 5 } ] [
-    10.0 float-4{ 1 2 5 2 }
-    [ { float float-4 } declare n/v ] compile-call
+[ float-array{ 1 2 3 4 } ] [
+    [
+        float-array{ 1 2 3 4 } underlying>>
+        float-array{ 4 3 2 1 } clone
+        [ underlying>> 0 float-4-rep set-alien-vector ] keep
+    ] compile-call
 ] unit-test
 
-[ float-4{ 0.5 1 1.5 2 } ] [
-    float-4{ 1 2 3 4 } 2
-    [ { float float-4 } declare v/n ] compile-call
-] unit-test
+STRUCT: simd-struct
+{ x float-4 }
+{ y double-2 }
+{ z double-4 }
+{ w float-8 } ;
 
-[ float-4{ 1 0 0 0 } ] [
-    float-4{ 10 0 0 0 }
-    [ { float-4 } declare normalize ] compile-call
-] unit-test
+[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
 
-[ 30.0 ] [
+[
     float-4{ 1 2 3 4 }
-    [ { float-4 } declare norm-sq ] compile-call
-] unit-test
-
-[ t ] [
-    float-4{ 1 0 0 0 }
-    float-4{ 0 1 0 0 }
-    [ { float-4 float-4 } declare distance ] compile-call
-    2 sqrt 1.0e-6 ~
-] unit-test
-
-[ double-2{ 12 12 } ] [
-    12 [ double-2-with ] compile-call
-] unit-test
-
-[ double-2{ 1 2 } ] [
-    1 2 [ double-2-boa ] compile-call
-] unit-test
-
-[ double-2{ 11 22 } ] [
-    double-2{ 1 2 } double-2{ 10 20 }
-    [ { double-2 double-2 } declare v+ ] compile-call
-] unit-test
-
-[ double-2{ -9 -18 } ] [
-    double-2{ 1 2 } double-2{ 10 20 }
-    [ { double-2 double-2 } declare v- ] compile-call
-] unit-test
-
-[ double-2{ 10 40 } ] [
-    double-2{ 1 2 } double-2{ 10 20 }
-    [ { double-2 double-2 } declare v* ] compile-call
-] unit-test
-
-[ double-2{ 10 100 } ] [
-    double-2{ 100 2000 } double-2{ 10 20 }
-    [ { double-2 double-2 } declare v/ ] compile-call
-] unit-test
-
-[ double-2{ -10 -20 } ] [
-    double-2{ -10 20 } double-2{ 10 -20 }
-    [ { double-2 double-2 } declare vmin ] compile-call
-] unit-test
-
-[ double-2{ 10 20 } ] [
-    double-2{ -10 20 } double-2{ 10 -20 }
-    [ { double-2 double-2 } declare vmax ] compile-call
-] unit-test
-
-[ 3.0 ] [
-    double-2{ 1 2 }
-    [ { double-2 } declare sum ] compile-call
-] unit-test
-
-[ 7.0 ] [
-    double-2{ 1 2 }
-    [ { double-2 } declare sum 4.0 + ] compile-call
-] unit-test
-
-[ 16.0 ] [
-    double-2{ 1 2 } double-2{ 2 7 }
-    [ { double-2 double-2 } declare v. ] compile-call
-] unit-test
-
-[ double-2{ 5 10 } ] [
-    5.0 double-2{ 1 2 }
-    [ { float double-2 } declare n*v ] compile-call
-] unit-test
-
-[ double-2{ 5 10 } ] [
-    double-2{ 1 2 } 5.0
-    [ { float double-2 } declare v*n ] compile-call
-] unit-test
-
-[ double-2{ 10 5 } ] [
-    10.0 double-2{ 1 2 }
-    [ { float double-2 } declare n/v ] compile-call
-] unit-test
-
-[ double-2{ 0.5 1 } ] [
-    double-2{ 1 2 } 2
-    [ { float double-2 } declare v/n ] compile-call
-] unit-test
-
-[ double-2{ 0 0 } ] [ double-2 new ] unit-test
-
-[ double-2{ 1 0 } ] [
-    double-2{ 10 0 }
-    [ { double-2 } declare normalize ] compile-call
-] unit-test
-
-[ 5.0 ] [
-    double-2{ 1 2 }
-    [ { double-2 } declare norm-sq ] compile-call
-] unit-test
-
-[ t ] [
-    double-2{ 1 0 }
-    double-2{ 0 1 }
-    [ { double-2 double-2 } declare distance ] compile-call
-    2 sqrt 1.0e-6 ~
-] unit-test
-
-[ double-4{ 0 0 0 0 } ] [ double-4 new ] unit-test
-
-[ double-4{ 1 2 3 4 } ] [
-    1 2 3 4 double-4-boa
-] unit-test
-
-[ double-4{ 1 1 1 1 } ] [
-    1 double-4-with
-] unit-test
-
-[ double-4{ 0 1 2 3 } ] [
-    1 double-4-with [ * ] map-index
-] unit-test
-
-[ V{ float } ] [ [ { double-4 } declare norm-sq ] final-classes ] unit-test
-
-[ V{ float } ] [ [ { double-4 } declare norm ] final-classes ] unit-test
-
-[ double-4{ 12 12 12 12 } ] [
-    12 [ double-4-with ] compile-call
-] unit-test
-
-[ double-4{ 1 2 3 4 } ] [
-    1 2 3 4 [ double-4-boa ] compile-call
-] unit-test
-
-[ double-4{ 11 22 33 44 } ] [
-    double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
-    [ { double-4 double-4 } declare v+ ] compile-call
-] unit-test
-
-[ double-4{ -9 -18 -27 -36 } ] [
-    double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
-    [ { double-4 double-4 } declare v- ] compile-call
-] unit-test
-
-[ double-4{ 10 40 90 160 } ] [
-    double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
-    [ { double-4 double-4 } declare v* ] compile-call
-] unit-test
-
-[ double-4{ 10 100 1000 10000 } ] [
-    double-4{ 100 2000 30000 400000 } double-4{ 10 20 30 40 }
-    [ { double-4 double-4 } declare v/ ] compile-call
-] unit-test
-
-[ double-4{ -10 -20 -30 -40 } ] [
-    double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
-    [ { double-4 double-4 } declare vmin ] compile-call
-] unit-test
-
-[ double-4{ 10 20 30 40 } ] [
-    double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
-    [ { double-4 double-4 } declare vmax ] compile-call
-] unit-test
-
-[ 10.0 ] [
-    double-4{ 1 2 3 4 }
-    [ { double-4 } declare sum ] compile-call
-] unit-test
-
-[ 13.0 ] [
-    double-4{ 1 2 3 4 }
-    [ { double-4 } declare sum 3.0 + ] compile-call
-] unit-test
-
-[ 8.0 ] [
-    double-4{ 1 2 3 4 } double-4{ 2 0 2 0 }
-    [ { double-4 double-4 } declare v. ] compile-call
-] unit-test
-
-[ double-4{ 5 10 15 20 } ] [
-    5.0 double-4{ 1 2 3 4 }
-    [ { float double-4 } declare n*v ] compile-call
-] unit-test
-
-[ double-4{ 5 10 15 20 } ] [
-    double-4{ 1 2 3 4 } 5.0
-    [ { float double-4 } declare v*n ] compile-call
-] unit-test
-
-[ double-4{ 10 5 2 5 } ] [
-    10.0 double-4{ 1 2 5 2 }
-    [ { float double-4 } declare n/v ] compile-call
-] unit-test
-
-[ double-4{ 0.5 1 1.5 2 } ] [
-    double-4{ 1 2 3 4 } 2
-    [ { float double-4 } declare v/n ] compile-call
-] unit-test
-
-[ double-4{ 1 0 0 0 } ] [
-    double-4{ 10 0 0 0 }
-    [ { double-4 } declare normalize ] compile-call
-] unit-test
-
-[ 30.0 ] [
-    double-4{ 1 2 3 4 }
-    [ { double-4 } declare norm-sq ] compile-call
-] unit-test
-
-[ t ] [
-    double-4{ 1 0 0 0 }
-    double-4{ 0 1 0 0 }
-    [ { double-4 double-4 } declare distance ] compile-call
-    2 sqrt 1.0e-6 ~
-] unit-test
-
-[ float-8{ 0 0 0 0 0 0 0 0 } ] [ float-8 new ] unit-test
-
-[ float-8{ 0 0 0 0 0 0 0 0 } ] [ [ float-8 new ] compile-call ] unit-test
-
-[ float-8{ 1 1 1 1 1 1 1 1 } ] [ 1 float-8-with ] unit-test
-
-[ float-8{ 1 1 1 1 1 1 1 1 } ] [ [ 1 float-8-with ] compile-call ] unit-test
-
-[ float-8{ 1 2 3 4 5 6 7 8 } ] [ 1 2 3 4 5 6 7 8 float-8-boa ] unit-test
-
-[ float-8{ 1 2 3 4 5 6 7 8 } ] [ [ 1 2 3 4 5 6 7 8 float-8-boa ] compile-call ] unit-test
-
-[ float-8{ 3 6 9 12 15 18 21 24 } ] [
+    double-2{ 2 1 }
+    double-4{ 4 3 2 1 }
     float-8{ 1 2 3 4 5 6 7 8 }
-    float-8{ 2 4 6 8 10 12 14 16 }
-    [ { float-8 float-8 } declare v+ ] compile-call
+] [
+    simd-struct <struct>
+    float-4{ 1 2 3 4 } >>x
+    double-2{ 2 1 } >>y
+    double-4{ 4 3 2 1 } >>z
+    float-8{ 1 2 3 4 5 6 7 8 } >>w
+    { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
 ] unit-test
 
-[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
+[
+    float-4{ 1 2 3 4 }
+    double-2{ 2 1 }
+    double-4{ 4 3 2 1 }
     float-8{ 1 2 3 4 5 6 7 8 }
-    float-8{ 2 4 6 8 10 12 14 16 }
-    [ { float-8 float-8 } declare v- ] compile-call
-] unit-test
-
-[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
-    -0.5
-    float-8{ 2 4 6 8 10 12 14 16 }
-    [ { float float-8 } declare n*v ] compile-call
+] [
+    [
+        simd-struct <struct>
+        float-4{ 1 2 3 4 } >>x
+        double-2{ 2 1 } >>y
+        double-4{ 4 3 2 1 } >>z
+        float-8{ 1 2 3 4 5 6 7 8 } >>w
+        { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
+    ] compile-call
 ] unit-test
-
-[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
-    float-8{ 2 4 6 8 10 12 14 16 }
-    -0.5
-    [ { float-8 float } declare v*n ] compile-call
-] unit-test
-
-[ float-8{ 256 128 64 32 16 8 4 2 } ] [
-    256.0
-    float-8{ 1 2 4 8 16 32 64 128 }
-    [ { float float-8 } declare n/v ] compile-call
-] unit-test
-
-[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
-    float-8{ 2 4 6 8 10 12 14 16 }
-    -2.0
-    [ { float-8 float } declare v/n ] compile-call
-] unit-test
-
-! Test puns; only on x86
-cpu x86? [
-    [ double-2{ 4 1024 } ] [
-        float-4{ 0 1 0 2 }
-        [ { float-4 } declare dup v+ underlying>> double-2 boa dup v+ ] compile-call
-    ] unit-test
-    
-    [ 33.0 ] [
-        double-2{ 1 2 } double-2{ 10 20 }
-        [ { double-2 double-2 } declare v+ underlying>> 3.0 float* ] compile-call
-    ] unit-test
-] when
diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor
index c5e7d6f75d..fe043032b8 100644
--- a/basis/math/vectors/simd/simd.factor
+++ b/basis/math/vectors/simd/simd.factor
@@ -1,15 +1,32 @@
 ! Copyright (C) 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: alien.c-types cpu.architecture kernel
-math.vectors.simd.functor vocabs.loader ;
-FROM: sequences => each ;
+USING: alien.c-types combinators fry kernel lexer math math.parser
+math.vectors.simd.functor sequences splitting vocabs.generated
+vocabs.loader vocabs.parser words ;
 IN: math.vectors.simd
 
-<<
+ERROR: bad-vector-size bits ;
 
-{ double float char uchar short ushort int uint }
-[ [ define-simd-128 ] [ define-simd-256 ] bi ] each
+<PRIVATE
 
->>
+: simd-vocab ( type -- vocab )
+    "math.vectors.simd.instances." prepend ;
 
-"math.vectors.simd.alien" require
+: parse-simd-name ( string -- c-type quot )
+    "-" split1
+    [ "alien.c-types" lookup dup heap-size ] [ string>number ] bi*
+    * 8 * {
+        { 128 [ [ define-simd-128 ] ] }
+        { 256 [ [ define-simd-256 ] ] }
+        [ bad-vector-size ]
+    } case ;
+
+PRIVATE>
+
+: define-simd-vocab ( type -- vocab )
+    [ simd-vocab ]
+    [ '[ _ parse-simd-name call( type -- ) ] ] bi
+    generate-vocab ;
+
+SYNTAX: SIMD:
+    scan define-simd-vocab use-vocab ;
diff --git a/basis/specialized-arrays/specialized-arrays.factor b/basis/specialized-arrays/specialized-arrays.factor
index 6931c83677..a64d052fd1 100755
--- a/basis/specialized-arrays/specialized-arrays.factor
+++ b/basis/specialized-arrays/specialized-arrays.factor
@@ -4,7 +4,7 @@ USING: accessors alien alien.c-types alien.data alien.parser assocs
 byte-arrays classes compiler.units functors kernel lexer libc math
 math.vectors.specialization namespaces parser prettyprint.custom
 sequences sequences.private strings summary vocabs vocabs.loader
-vocabs.parser words fry combinators ;
+vocabs.parser vocabs.generated words fry combinators ;
 IN: specialized-arrays
 
 MIXIN: specialized-array
@@ -123,15 +123,6 @@ M: word (underlying-type) "c-type" word-prop ;
 
 PRIVATE>
 
-: generate-vocab ( vocab-name quot -- vocab )
-    [ dup vocab [ ] ] dip '[
-        [
-            [
-                 _ with-current-vocab
-            ] with-compilation-unit
-        ] keep
-    ] ?if ; inline
-
 : define-array-vocab ( type -- vocab )
     underlying-type-name
     [ specialized-array-vocab ] [ '[ _ define-array ] ] bi
diff --git a/basis/specialized-vectors/specialized-vectors.factor b/basis/specialized-vectors/specialized-vectors.factor
index 58fb97764b..7cda026cb3 100644
--- a/basis/specialized-vectors/specialized-vectors.factor
+++ b/basis/specialized-vectors/specialized-vectors.factor
@@ -3,7 +3,7 @@
 USING: accessors alien.c-types assocs compiler.units functors
 growable kernel lexer namespaces parser prettyprint.custom
 sequences specialized-arrays specialized-arrays.private strings
-vocabs vocabs.parser fry ;
+vocabs vocabs.parser vocabs.generated fry ;
 QUALIFIED: vectors.functor
 IN: specialized-vectors
 
diff --git a/basis/math/vectors/simd/alien/authors.txt b/basis/vocabs/generated/authors.txt
similarity index 100%
rename from basis/math/vectors/simd/alien/authors.txt
rename to basis/vocabs/generated/authors.txt
diff --git a/basis/vocabs/generated/generated.factor b/basis/vocabs/generated/generated.factor
new file mode 100644
index 0000000000..1ddcc73db2
--- /dev/null
+++ b/basis/vocabs/generated/generated.factor
@@ -0,0 +1,13 @@
+! Copyright (C) 2009 Slava Pestov.
+! See http://factorcode.org/license.txt for BSD license.
+USING: compiler.units fry kernel vocabs vocabs.parser ;
+IN: vocabs.generated
+
+: generate-vocab ( vocab-name quot -- vocab )
+    [ dup vocab [ ] ] dip '[
+        [
+            [
+                 _ with-current-vocab
+            ] with-compilation-unit
+        ] keep
+    ] ?if ; inline
diff --git a/extra/benchmark/nbody-simd/nbody-simd.factor b/extra/benchmark/nbody-simd/nbody-simd.factor
index e8bef58923..3aedffed91 100644
--- a/extra/benchmark/nbody-simd/nbody-simd.factor
+++ b/extra/benchmark/nbody-simd/nbody-simd.factor
@@ -4,6 +4,7 @@ USING: accessors fry kernel locals math math.constants
 math.functions math.vectors math.vectors.simd prettyprint
 combinators.smart sequences hints classes.struct
 specialized-arrays ;
+SIMD: double-4
 IN: benchmark.nbody-simd
 
 : solar-mass ( -- x ) 4 pi sq * ; inline
diff --git a/extra/benchmark/raytracer-simd/raytracer-simd.factor b/extra/benchmark/raytracer-simd/raytracer-simd.factor
index 3712972862..2d16c8cd1f 100644
--- a/extra/benchmark/raytracer-simd/raytracer-simd.factor
+++ b/extra/benchmark/raytracer-simd/raytracer-simd.factor
@@ -5,6 +5,7 @@ USING: arrays accessors io io.files io.files.temp
 io.encodings.binary kernel math math.constants math.functions
 math.vectors math.vectors.simd math.parser make sequences
 sequences.private words hints classes.struct ;
+SIMD: double-4
 IN: benchmark.raytracer-simd
 
 ! parameters
diff --git a/extra/benchmark/simd-1/simd-1.factor b/extra/benchmark/simd-1/simd-1.factor
index 4f57cca0bb..1e753a331d 100644
--- a/extra/benchmark/simd-1/simd-1.factor
+++ b/extra/benchmark/simd-1/simd-1.factor
@@ -2,6 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: kernel io math math.functions math.parser math.vectors
 math.vectors.simd sequences specialized-arrays ;
+SIMD: float-4
 SPECIALIZED-ARRAY: float-4
 IN: benchmark.simd-1
 
-- 
2.34.1