From 47d87633406c48f1e7af4f2b5f6cdf21896ea35b Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sun, 20 Sep 2009 16:48:17 -0500 Subject: [PATCH] More integer SIMD work - move generated vocab support from specialized-arrays to vocabs.generated - add fuzz testing to math.vectors.simd - add alien type support for integer SIMD vectors - SIMD: parsing word generates a SIMD type, instead of pre-generating them all in math.vectors.simd --- .../tree/propagation/simd/simd.factor | 1 + basis/cpu/x86/x86.factor | 3 +- .../vectors/simd/alien/alien-tests.factor | 70 --- basis/math/vectors/simd/alien/alien.factor | 42 -- .../math/vectors/simd/functor/functor.factor | 60 +- basis/math/vectors/simd/simd-docs.factor | 67 ++- basis/math/vectors/simd/simd-tests.factor | 534 +++++++----------- basis/math/vectors/simd/simd.factor | 33 +- .../specialized-arrays.factor | 11 +- .../specialized-vectors.factor | 2 +- .../alien => vocabs/generated}/authors.txt | 0 basis/vocabs/generated/generated.factor | 13 + extra/benchmark/nbody-simd/nbody-simd.factor | 1 + .../raytracer-simd/raytracer-simd.factor | 1 + extra/benchmark/simd-1/simd-1.factor | 1 + 15 files changed, 342 insertions(+), 497 deletions(-) delete mode 100644 basis/math/vectors/simd/alien/alien-tests.factor delete mode 100644 basis/math/vectors/simd/alien/alien.factor rename basis/{math/vectors/simd/alien => vocabs/generated}/authors.txt (100%) create mode 100644 basis/vocabs/generated/generated.factor diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 3baa7cdcbf..42c1f35617 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -24,6 +24,7 @@ IN: compiler.tree.propagation.simd literal>> scalar-rep-of { { float-rep [ float ] } { double-rep [ float ] } + { int-rep [ integer ] } } case ] [ drop real ] if diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 4d80862ed3..322b123d99 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -417,8 +417,7 @@ M: x86 %horizontal-add-vector ( dst src rep -- ) M: x86 %horizontal-add-vector-reps { - { sse? { float-4-rep } } - { sse2? { double-2-rep short-8-rep uchar-16-rep } } + { sse3? { float-4-rep double-2-rep } } } available-reps ; M: x86 %unbox-alien ( dst src -- ) diff --git a/basis/math/vectors/simd/alien/alien-tests.factor b/basis/math/vectors/simd/alien/alien-tests.factor deleted file mode 100644 index 87540dd9a5..0000000000 --- a/basis/math/vectors/simd/alien/alien-tests.factor +++ /dev/null @@ -1,70 +0,0 @@ -USING: cpu.architecture math.vectors.simd -math.vectors.simd.intrinsics accessors math.vectors.simd.alien -kernel classes.struct tools.test compiler sequences byte-arrays -alien math kernel.private specialized-arrays combinators ; -SPECIALIZED-ARRAY: float -IN: math.vectors.simd.alien.tests - -! Vector alien intrinsics -[ float-4{ 1 2 3 4 } ] [ - [ - float-4{ 1 2 3 4 } - underlying>> 0 float-4-rep alien-vector - ] compile-call float-4 boa -] unit-test - -[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [ - 16 [ 1 ] B{ } replicate-as 16 - [ - 0 [ - { byte-array c-ptr fixnum } declare - float-4-rep set-alien-vector - ] compile-call - ] keep -] unit-test - -[ float-array{ 1 2 3 4 } ] [ - [ - float-array{ 1 2 3 4 } underlying>> - float-array{ 4 3 2 1 } clone - [ underlying>> 0 float-4-rep set-alien-vector ] keep - ] compile-call -] unit-test - -STRUCT: simd-struct -{ x float-4 } -{ y double-2 } -{ z double-4 } -{ w float-8 } ; - -[ t ] [ [ simd-struct ] compile-call >c-ptr [ 0 = ] all? ] unit-test - -[ - float-4{ 1 2 3 4 } - double-2{ 2 1 } - double-4{ 4 3 2 1 } - float-8{ 1 2 3 4 5 6 7 8 } -] [ - simd-struct - float-4{ 1 2 3 4 } >>x - double-2{ 2 1 } >>y - double-4{ 4 3 2 1 } >>z - float-8{ 1 2 3 4 5 6 7 8 } >>w - { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave -] unit-test - -[ - float-4{ 1 2 3 4 } - double-2{ 2 1 } - double-4{ 4 3 2 1 } - float-8{ 1 2 3 4 5 6 7 8 } -] [ - [ - simd-struct - float-4{ 1 2 3 4 } >>x - double-2{ 2 1 } >>y - double-4{ 4 3 2 1 } >>z - float-8{ 1 2 3 4 5 6 7 8 } >>w - { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave - ] compile-call -] unit-test diff --git a/basis/math/vectors/simd/alien/alien.factor b/basis/math/vectors/simd/alien/alien.factor deleted file mode 100644 index 1486f6d0af..0000000000 --- a/basis/math/vectors/simd/alien/alien.factor +++ /dev/null @@ -1,42 +0,0 @@ -! Copyright (C) 2009 Slava Pestov. -! See http://factorcode.org/license.txt for BSD license. -USING: alien accessors alien.c-types byte-arrays compiler.units -cpu.architecture locals kernel math math.vectors.simd -math.vectors.simd.intrinsics ; -IN: math.vectors.simd.alien - -:: define-simd-128-type ( class rep -- ) - - byte-array >>class - class >>boxed-class - [ rep alien-vector class boa ] >>getter - [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter - 16 >>size - 8 >>align - rep >>rep - class name>> typedef ; - -:: define-simd-256-type ( class rep -- ) - - class >>class - class >>boxed-class - [ - [ rep alien-vector ] - [ 16 + >fixnum rep alien-vector ] 2bi - class boa - ] >>getter - [ - [ [ underlying1>> ] 2dip rep set-alien-vector ] - [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ] - 3bi - ] >>setter - 32 >>size - 8 >>align - rep >>rep - class name>> typedef ; -[ - float-4 float-4-rep define-simd-128-type - double-2 double-2-rep define-simd-128-type - float-8 float-4-rep define-simd-256-type - double-4 double-2-rep define-simd-256-type -] with-compilation-unit diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index a97dc192be..57126f1bf8 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -5,6 +5,7 @@ effects fry functors generalizations kernel literals locals math math.functions math.vectors math.vectors.simd.intrinsics math.vectors.specialization parser prettyprint.custom sequences sequences.private strings words definitions macros cpu.architecture ; +QUALIFIED-WITH: math m IN: math.vectors.simd.functor ERROR: bad-length got expected ; @@ -51,11 +52,10 @@ MACRO: simd-boa ( rep class -- simd-array ) '[ nip _ swap supported-simd-op? ] assoc-filter '[ drop _ key? ] assoc-filter ; -:: high-level-ops ( ctor -- assoc ) +:: high-level-ops ( ctor elt-class -- assoc ) ! Some SIMD operations are defined in terms of others. { { vneg [ [ dup v- ] keep v- ] } - { v. [ v* sum ] } { n+v [ [ ctor execute ] dip v+ ] } { v+n [ ctor execute v+ ] } { n-v [ [ ctor execute ] dip v- ] } @@ -67,16 +67,36 @@ MACRO: simd-boa ( rep class -- simd-array ) { norm-sq [ dup v. assert-positive ] } { norm [ norm-sq sqrt ] } { normalize [ dup norm v/n ] } - { distance [ v- norm ] } - } ; + } + ! To compute dot product and distance with integer vectors, we + ! have to do things less efficiently, with integer overflow checks, + ! in the general case. + elt-class m:float = [ + { + { distance [ v- norm ] } + { v. [ v* sum ] } + } append + ] when ; :: simd-vector-words ( class ctor rep assoc -- ) + rep rep-component-type c-type-boxed-class :> elt-class class - rep rep-component-type c-type-boxed-class + elt-class assoc rep supported-simd-ops - ctor high-level-ops assoc-union + ctor elt-class high-level-ops assoc-union specialize-vector-words ; +:: define-simd-128-type ( class rep -- ) + + byte-array >>class + class >>boxed-class + [ rep alien-vector class boa ] >>getter + [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter + 16 >>size + 8 >>align + rep >>rep + class typedef ; + FUNCTOR: define-simd-128 ( T -- ) N [ 16 T heap-size /i ] @@ -159,11 +179,35 @@ INSTANCE: A sequence { sum [ [ (simd-sum) ] \ A-v->n-op execute ] } } simd-vector-words +\ A \ A-rep define-simd-128-type + PRIVATE> ;FUNCTOR ! Synthesize 256-bit vectors from a pair of 128-bit vectors +SLOT: underlying1 +SLOT: underlying2 + +:: define-simd-256-type ( class rep -- ) + + class >>class + class >>boxed-class + [ + [ rep alien-vector ] + [ 16 + >fixnum rep alien-vector ] 2bi + class boa + ] >>getter + [ + [ [ underlying1>> ] 2dip rep set-alien-vector ] + [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ] + 3bi + ] >>setter + 32 >>size + 8 >>align + rep >>rep + class typedef ; + FUNCTOR: define-simd-256 ( T -- ) N [ 32 T heap-size /i ] @@ -235,7 +279,7 @@ M: A pprint* pprint-object ; : A-boa ( ... -- simd-array ) [ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@ - \ A boa ; + \ A boa ; inline \ A-rep 2 boa-effect \ A-boa set-stack-effect @@ -260,4 +304,6 @@ INSTANCE: A sequence { sum [ [ (simd-v+) ] [ (simd-sum) ] \ A-v->n-op execute ] } } simd-vector-words +\ A \ A-rep define-simd-256-type + ;FUNCTOR diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index d6131b3a71..42512feb6f 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -17,23 +17,45 @@ $nl "There should never be any reason to use " { $link "math.vectors.simd.intrinsics" } " directly, but they too have a straightforward, but lower-level, interface." ; ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations" -"At present, the SIMD support makes use of SSE2 and a few SSE3 instructions on x86 CPUs." +"At present, the SIMD support makes use of SSE, SSE2 and a few SSE3 instructions on x86 CPUs." $nl -"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words, decreasing performance." +"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")." $nl -"On PowerPC, or older x86 chips without SSE2, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance." +"SSE2 introduces double-precision and integer SIMD." +$nl +"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words." +$nl +"On PowerPC, or older x86 chips without SSE, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance." $nl "The primities in the " { $vocab-link "math.vectors.simd.intrinsics" } " vocabulary do not have software fallbacks, but they should not be called directly in any case." ; ARTICLE: "math.vectors.simd.types" "SIMD vector types" -"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type such as " { $snippet "float" } " or " { $snippet "double" } ", and " { $snippet "count" } " is a vector dimension, such as 2, 4, or 8." -$nl -"The following vector types are defined:" -{ $subsection float-4 } -{ $subsection double-2 } -{ $subsection float-8 } -{ $subsection double-4 } -"For each vector type, several words are defined:" +"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension." +$nl +"To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":" +{ $subsection POSTPONE: SIMD: } +"The following vector types are supported:" +{ $code + "char-16" + "uchar-16" + "char-32" + "uchar-32" + "short-8" + "ushort-8" + "short-16" + "ushort-16" + "int-4" + "uint-4" + "int-8" + "uint-8" + "float-4" + "float-8" + "double-2" + "double-4" +} ; + +ARTICLE: "math.vectors.simd.words" "SIMD vector words" +"For each SIMD vector type, several words are defined:" { $table { "Word" "Stack effect" "Description" } { { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" } @@ -41,8 +63,6 @@ $nl { { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" } { { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" } } -"The " { $link float-4 } " and " { $link double-2 } " types correspond to 128-bit vector registers. The " { $link float-8 } " and " { $link double-4 } " types are not directly supported in hardware, and instead unbox to a pair of 128-bit vector registers." -$nl "To actually perform vector arithmetic on SIMD vectors, use " { $link "math-vectors" } " words." { $see-also "c-types-specs" } ; @@ -68,6 +88,8 @@ SYMBOLS: x y ; { $code <" USING: compiler.tree.debugger kernel.private math.vectors math.vectors.simd ; +SIMD: float-4 +IN: simd-demo : interpolate ( v a b -- w ) { float-4 float-4 float-4 } declare @@ -80,6 +102,8 @@ $nl { $code <" USING: compiler.tree.debugger hints math.vectors math.vectors.simd ; +SIMD: float-4 +IN: simd-demo : interpolate ( v a b -- w ) [ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ; @@ -94,6 +118,7 @@ $nl "In the " { $snippet "interpolate" } " word, there is still a call to the " { $link } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:" { $code <" USING: compiler.tree.debugger math.vectors math.vectors.simd ; +SIMD: float-4 IN: simd-demo STRUCT: actor @@ -151,21 +176,23 @@ $nl "For the most part, the above primitives correspond directly to vector arithmetic words. They take a representation parameter, which is one of the singleton members of the " { $link vector-rep } " union in the " { $vocab-link "cpu.architecture" } " vocabulary." ; ARTICLE: "math.vectors.simd.alien" "SIMD data in struct classes" -"Struct classes may contain fields which store SIMD data; use one of the following C type names:" -{ $code -<" float-4 -double-2 -float-8 -double-4"> } -"Passing SIMD data as function parameters is not yet supported." ; +"Struct classes may contain fields which store SIMD data; for each SIMD vector type listed in " { $snippet "math.vectors.simd.types" } " there is a C type with the same name." +$nl +"Only SIMD struct fields are allowed at the moment; passing SIMD data as function parameters is not yet supported." ; ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)" "The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors." { $subsection "math.vectors.simd.intro" } { $subsection "math.vectors.simd.types" } +{ $subsection "math.vectors.simd.words" } { $subsection "math.vectors.simd.support" } { $subsection "math.vectors.simd.efficiency" } { $subsection "math.vectors.simd.alien" } { $subsection "math.vectors.simd.intrinsics" } ; +HELP: SIMD: +{ $syntax "SIMD: type-length" } +{ $values { "type" "a scalar C type" } { "length" "a vector dimension" } } +{ $description "Brings a SIMD array for holding " { $snippet "length" } " values of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ; + ABOUT: "math.vectors.simd" diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index f5318c341f..39afe3cb03 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -1,8 +1,30 @@ +USING: accessors arrays classes compiler compiler.tree.debugger +effects fry io kernel kernel.private math math.functions +math.private math.vectors math.vectors.simd +math.vectors.simd.private prettyprint random sequences system +tools.test vocabs assocs compiler.cfg.debugger words +locals math.vectors.specialization combinators cpu.architecture +math.vectors.simd.intrinsics namespaces byte-arrays alien +specialized-arrays classes.struct ; +FROM: alien.c-types => c-type-boxed-class ; +SPECIALIZED-ARRAY: float +SIMD: char-16 +SIMD: uchar-16 +SIMD: char-32 +SIMD: uchar-32 +SIMD: short-8 +SIMD: ushort-8 +SIMD: short-16 +SIMD: ushort-16 +SIMD: int-4 +SIMD: uint-4 +SIMD: int-8 +SIMD: uint-8 +SIMD: float-4 +SIMD: float-8 +SIMD: double-2 +SIMD: double-4 IN: math.vectors.simd.tests -USING: math math.vectors.simd math.vectors.simd.private -math.vectors math.functions math.private kernel.private compiler -sequences tools.test compiler.tree.debugger accessors kernel -system ; [ float-4{ 0 0 0 0 } ] [ float-4 new ] unit-test @@ -12,353 +34,191 @@ system ; [ V{ float } ] [ [ { float-4 } declare norm ] final-classes ] unit-test -[ float-4{ 12 12 12 12 } ] [ - 12 [ float-4-with ] compile-call -] unit-test - -[ float-4{ 1 2 3 4 } ] [ - 1 2 3 4 [ float-4-boa ] compile-call -] unit-test - -[ float-4{ 11 22 33 44 } ] [ - float-4{ 1 2 3 4 } float-4{ 10 20 30 40 } - [ { float-4 float-4 } declare v+ ] compile-call -] unit-test - -[ float-4{ -9 -18 -27 -36 } ] [ - float-4{ 1 2 3 4 } float-4{ 10 20 30 40 } - [ { float-4 float-4 } declare v- ] compile-call -] unit-test - -[ float-4{ 10 40 90 160 } ] [ - float-4{ 1 2 3 4 } float-4{ 10 20 30 40 } - [ { float-4 float-4 } declare v* ] compile-call -] unit-test - -[ float-4{ 10 100 1000 10000 } ] [ - float-4{ 100 2000 30000 400000 } float-4{ 10 20 30 40 } - [ { float-4 float-4 } declare v/ ] compile-call -] unit-test - -[ float-4{ -10 -20 -30 -40 } ] [ - float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 } - [ { float-4 float-4 } declare vmin ] compile-call -] unit-test - -[ float-4{ 10 20 30 40 } ] [ - float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 } - [ { float-4 float-4 } declare vmax ] compile-call -] unit-test - -[ 10.0 ] [ - float-4{ 1 2 3 4 } - [ { float-4 } declare sum ] compile-call -] unit-test +! Test puns; only on x86 +cpu x86? [ + [ double-2{ 4 1024 } ] [ + float-4{ 0 1 0 2 } + [ { float-4 } declare dup v+ underlying>> double-2 boa dup v+ ] compile-call + ] unit-test + + [ 33.0 ] [ + double-2{ 1 2 } double-2{ 10 20 } + [ { double-2 double-2 } declare v+ underlying>> 3.0 float* ] compile-call + ] unit-test +] when -[ 13.0 ] [ - float-4{ 1 2 3 4 } - [ { float-4 } declare sum 3.0 + ] compile-call -] unit-test +! Fuzz testing +CONSTANT: simd-classes + { + char-16 + uchar-16 + char-32 + uchar-32 + short-8 + ushort-8 + short-16 + ushort-16 + int-4 + uint-4 + int-8 + uint-8 + float-4 + float-8 + double-2 + double-4 + } + +: with-ctors ( -- seq ) + simd-classes [ [ name>> "-with" append ] [ vocabulary>> ] bi lookup ] map ; + +: boa-ctors ( -- seq ) + simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ; + +: check-optimizer ( seq inputs quot -- ) + [ + [ "print-mr" get [ nip test-mr mr. ] [ 2drop ] if ] + [ [ call ] dip call ] + [ [ call ] dip compile-call ] 2tri = not + ] compose filter ; inline + +"== Checking -new constructors" print + +[ { } ] [ + simd-classes [ [ [ ] ] dip '[ _ new ] ] check-optimizer +] unit-test + +[ { } ] [ + simd-classes [ '[ _ new ] compile-call [ zero? ] all? not ] filter +] unit-test + +"== Checking -with constructors" print + +[ { } ] [ + with-ctors [ + [ 1000 random '[ _ ] ] dip '[ { fixnum } declare _ execute ] + ] check-optimizer +] unit-test + +"== Checking -boa constructors" print + +[ { } ] [ + boa-ctors [ + dup stack-effect in>> length + [ nip [ 1000 random ] [ ] replicate-as ] + [ fixnum swap '[ _ declare _ execute ] ] + 2bi + ] check-optimizer +] unit-test + +"== Checking vector operations" print + +: random-vector ( class -- vec ) + new [ drop 1000 random ] map ; + +:: check-vector-op ( word inputs class elt-class -- inputs quot ) + inputs [ + [ + { + { +vector+ [ class random-vector ] } + { +scalar+ [ 1000 random elt-class float = [ >float ] when ] } + } case + ] [ ] map-as + ] [ + [ + { + { +vector+ [ class ] } + { +scalar+ [ elt-class ] } + } case + ] map + ] bi + word '[ _ declare _ execute ] ; + +: ops-to-check ( elt-class -- alist ) + [ vector-words >alist ] dip float = [ + [ drop { n/v v/n v/ normalize } member? not ] assoc-filter + ] unless ; + +: check-vector-ops ( class elt-class -- ) + [ nip ops-to-check ] 2keep + '[ first2 inputs _ _ check-vector-op ] check-optimizer ; inline + +: simd-classes&reps ( -- alist ) + simd-classes [ + dup name>> [ "float" head? ] [ "double" head? ] bi or + float fixnum ? + ] { } map>assoc ; + +simd-classes&reps [ + [ [ { } ] ] 2dip '[ _ _ check-vector-ops ] unit-test +] assoc-each + +! Other regressions +[ 8000000 ] [ + int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 } + [ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call +] unit-test -[ 8.0 ] [ - float-4{ 1 2 3 4 } float-4{ 2 0 2 0 } - [ { float-4 float-4 } declare v. ] compile-call -] unit-test -[ float-4{ 5 10 15 20 } ] [ - 5.0 float-4{ 1 2 3 4 } - [ { float float-4 } declare n*v ] compile-call +! Vector alien intrinsics +[ float-4{ 1 2 3 4 } ] [ + [ + float-4{ 1 2 3 4 } + underlying>> 0 float-4-rep alien-vector + ] compile-call float-4 boa ] unit-test -[ float-4{ 5 10 15 20 } ] [ - float-4{ 1 2 3 4 } 5.0 - [ { float float-4 } declare v*n ] compile-call +[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [ + 16 [ 1 ] B{ } replicate-as 16 + [ + 0 [ + { byte-array c-ptr fixnum } declare + float-4-rep set-alien-vector + ] compile-call + ] keep ] unit-test -[ float-4{ 10 5 2 5 } ] [ - 10.0 float-4{ 1 2 5 2 } - [ { float float-4 } declare n/v ] compile-call +[ float-array{ 1 2 3 4 } ] [ + [ + float-array{ 1 2 3 4 } underlying>> + float-array{ 4 3 2 1 } clone + [ underlying>> 0 float-4-rep set-alien-vector ] keep + ] compile-call ] unit-test -[ float-4{ 0.5 1 1.5 2 } ] [ - float-4{ 1 2 3 4 } 2 - [ { float float-4 } declare v/n ] compile-call -] unit-test +STRUCT: simd-struct +{ x float-4 } +{ y double-2 } +{ z double-4 } +{ w float-8 } ; -[ float-4{ 1 0 0 0 } ] [ - float-4{ 10 0 0 0 } - [ { float-4 } declare normalize ] compile-call -] unit-test +[ t ] [ [ simd-struct ] compile-call >c-ptr [ 0 = ] all? ] unit-test -[ 30.0 ] [ +[ float-4{ 1 2 3 4 } - [ { float-4 } declare norm-sq ] compile-call -] unit-test - -[ t ] [ - float-4{ 1 0 0 0 } - float-4{ 0 1 0 0 } - [ { float-4 float-4 } declare distance ] compile-call - 2 sqrt 1.0e-6 ~ -] unit-test - -[ double-2{ 12 12 } ] [ - 12 [ double-2-with ] compile-call -] unit-test - -[ double-2{ 1 2 } ] [ - 1 2 [ double-2-boa ] compile-call -] unit-test - -[ double-2{ 11 22 } ] [ - double-2{ 1 2 } double-2{ 10 20 } - [ { double-2 double-2 } declare v+ ] compile-call -] unit-test - -[ double-2{ -9 -18 } ] [ - double-2{ 1 2 } double-2{ 10 20 } - [ { double-2 double-2 } declare v- ] compile-call -] unit-test - -[ double-2{ 10 40 } ] [ - double-2{ 1 2 } double-2{ 10 20 } - [ { double-2 double-2 } declare v* ] compile-call -] unit-test - -[ double-2{ 10 100 } ] [ - double-2{ 100 2000 } double-2{ 10 20 } - [ { double-2 double-2 } declare v/ ] compile-call -] unit-test - -[ double-2{ -10 -20 } ] [ - double-2{ -10 20 } double-2{ 10 -20 } - [ { double-2 double-2 } declare vmin ] compile-call -] unit-test - -[ double-2{ 10 20 } ] [ - double-2{ -10 20 } double-2{ 10 -20 } - [ { double-2 double-2 } declare vmax ] compile-call -] unit-test - -[ 3.0 ] [ - double-2{ 1 2 } - [ { double-2 } declare sum ] compile-call -] unit-test - -[ 7.0 ] [ - double-2{ 1 2 } - [ { double-2 } declare sum 4.0 + ] compile-call -] unit-test - -[ 16.0 ] [ - double-2{ 1 2 } double-2{ 2 7 } - [ { double-2 double-2 } declare v. ] compile-call -] unit-test - -[ double-2{ 5 10 } ] [ - 5.0 double-2{ 1 2 } - [ { float double-2 } declare n*v ] compile-call -] unit-test - -[ double-2{ 5 10 } ] [ - double-2{ 1 2 } 5.0 - [ { float double-2 } declare v*n ] compile-call -] unit-test - -[ double-2{ 10 5 } ] [ - 10.0 double-2{ 1 2 } - [ { float double-2 } declare n/v ] compile-call -] unit-test - -[ double-2{ 0.5 1 } ] [ - double-2{ 1 2 } 2 - [ { float double-2 } declare v/n ] compile-call -] unit-test - -[ double-2{ 0 0 } ] [ double-2 new ] unit-test - -[ double-2{ 1 0 } ] [ - double-2{ 10 0 } - [ { double-2 } declare normalize ] compile-call -] unit-test - -[ 5.0 ] [ - double-2{ 1 2 } - [ { double-2 } declare norm-sq ] compile-call -] unit-test - -[ t ] [ - double-2{ 1 0 } - double-2{ 0 1 } - [ { double-2 double-2 } declare distance ] compile-call - 2 sqrt 1.0e-6 ~ -] unit-test - -[ double-4{ 0 0 0 0 } ] [ double-4 new ] unit-test - -[ double-4{ 1 2 3 4 } ] [ - 1 2 3 4 double-4-boa -] unit-test - -[ double-4{ 1 1 1 1 } ] [ - 1 double-4-with -] unit-test - -[ double-4{ 0 1 2 3 } ] [ - 1 double-4-with [ * ] map-index -] unit-test - -[ V{ float } ] [ [ { double-4 } declare norm-sq ] final-classes ] unit-test - -[ V{ float } ] [ [ { double-4 } declare norm ] final-classes ] unit-test - -[ double-4{ 12 12 12 12 } ] [ - 12 [ double-4-with ] compile-call -] unit-test - -[ double-4{ 1 2 3 4 } ] [ - 1 2 3 4 [ double-4-boa ] compile-call -] unit-test - -[ double-4{ 11 22 33 44 } ] [ - double-4{ 1 2 3 4 } double-4{ 10 20 30 40 } - [ { double-4 double-4 } declare v+ ] compile-call -] unit-test - -[ double-4{ -9 -18 -27 -36 } ] [ - double-4{ 1 2 3 4 } double-4{ 10 20 30 40 } - [ { double-4 double-4 } declare v- ] compile-call -] unit-test - -[ double-4{ 10 40 90 160 } ] [ - double-4{ 1 2 3 4 } double-4{ 10 20 30 40 } - [ { double-4 double-4 } declare v* ] compile-call -] unit-test - -[ double-4{ 10 100 1000 10000 } ] [ - double-4{ 100 2000 30000 400000 } double-4{ 10 20 30 40 } - [ { double-4 double-4 } declare v/ ] compile-call -] unit-test - -[ double-4{ -10 -20 -30 -40 } ] [ - double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 } - [ { double-4 double-4 } declare vmin ] compile-call -] unit-test - -[ double-4{ 10 20 30 40 } ] [ - double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 } - [ { double-4 double-4 } declare vmax ] compile-call -] unit-test - -[ 10.0 ] [ - double-4{ 1 2 3 4 } - [ { double-4 } declare sum ] compile-call -] unit-test - -[ 13.0 ] [ - double-4{ 1 2 3 4 } - [ { double-4 } declare sum 3.0 + ] compile-call -] unit-test - -[ 8.0 ] [ - double-4{ 1 2 3 4 } double-4{ 2 0 2 0 } - [ { double-4 double-4 } declare v. ] compile-call -] unit-test - -[ double-4{ 5 10 15 20 } ] [ - 5.0 double-4{ 1 2 3 4 } - [ { float double-4 } declare n*v ] compile-call -] unit-test - -[ double-4{ 5 10 15 20 } ] [ - double-4{ 1 2 3 4 } 5.0 - [ { float double-4 } declare v*n ] compile-call -] unit-test - -[ double-4{ 10 5 2 5 } ] [ - 10.0 double-4{ 1 2 5 2 } - [ { float double-4 } declare n/v ] compile-call -] unit-test - -[ double-4{ 0.5 1 1.5 2 } ] [ - double-4{ 1 2 3 4 } 2 - [ { float double-4 } declare v/n ] compile-call -] unit-test - -[ double-4{ 1 0 0 0 } ] [ - double-4{ 10 0 0 0 } - [ { double-4 } declare normalize ] compile-call -] unit-test - -[ 30.0 ] [ - double-4{ 1 2 3 4 } - [ { double-4 } declare norm-sq ] compile-call -] unit-test - -[ t ] [ - double-4{ 1 0 0 0 } - double-4{ 0 1 0 0 } - [ { double-4 double-4 } declare distance ] compile-call - 2 sqrt 1.0e-6 ~ -] unit-test - -[ float-8{ 0 0 0 0 0 0 0 0 } ] [ float-8 new ] unit-test - -[ float-8{ 0 0 0 0 0 0 0 0 } ] [ [ float-8 new ] compile-call ] unit-test - -[ float-8{ 1 1 1 1 1 1 1 1 } ] [ 1 float-8-with ] unit-test - -[ float-8{ 1 1 1 1 1 1 1 1 } ] [ [ 1 float-8-with ] compile-call ] unit-test - -[ float-8{ 1 2 3 4 5 6 7 8 } ] [ 1 2 3 4 5 6 7 8 float-8-boa ] unit-test - -[ float-8{ 1 2 3 4 5 6 7 8 } ] [ [ 1 2 3 4 5 6 7 8 float-8-boa ] compile-call ] unit-test - -[ float-8{ 3 6 9 12 15 18 21 24 } ] [ + double-2{ 2 1 } + double-4{ 4 3 2 1 } float-8{ 1 2 3 4 5 6 7 8 } - float-8{ 2 4 6 8 10 12 14 16 } - [ { float-8 float-8 } declare v+ ] compile-call +] [ + simd-struct + float-4{ 1 2 3 4 } >>x + double-2{ 2 1 } >>y + double-4{ 4 3 2 1 } >>z + float-8{ 1 2 3 4 5 6 7 8 } >>w + { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave ] unit-test -[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [ +[ + float-4{ 1 2 3 4 } + double-2{ 2 1 } + double-4{ 4 3 2 1 } float-8{ 1 2 3 4 5 6 7 8 } - float-8{ 2 4 6 8 10 12 14 16 } - [ { float-8 float-8 } declare v- ] compile-call -] unit-test - -[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [ - -0.5 - float-8{ 2 4 6 8 10 12 14 16 } - [ { float float-8 } declare n*v ] compile-call +] [ + [ + simd-struct + float-4{ 1 2 3 4 } >>x + double-2{ 2 1 } >>y + double-4{ 4 3 2 1 } >>z + float-8{ 1 2 3 4 5 6 7 8 } >>w + { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave + ] compile-call ] unit-test - -[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [ - float-8{ 2 4 6 8 10 12 14 16 } - -0.5 - [ { float-8 float } declare v*n ] compile-call -] unit-test - -[ float-8{ 256 128 64 32 16 8 4 2 } ] [ - 256.0 - float-8{ 1 2 4 8 16 32 64 128 } - [ { float float-8 } declare n/v ] compile-call -] unit-test - -[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [ - float-8{ 2 4 6 8 10 12 14 16 } - -2.0 - [ { float-8 float } declare v/n ] compile-call -] unit-test - -! Test puns; only on x86 -cpu x86? [ - [ double-2{ 4 1024 } ] [ - float-4{ 0 1 0 2 } - [ { float-4 } declare dup v+ underlying>> double-2 boa dup v+ ] compile-call - ] unit-test - - [ 33.0 ] [ - double-2{ 1 2 } double-2{ 10 20 } - [ { double-2 double-2 } declare v+ underlying>> 3.0 float* ] compile-call - ] unit-test -] when diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index c5e7d6f75d..fe043032b8 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -1,15 +1,32 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: alien.c-types cpu.architecture kernel -math.vectors.simd.functor vocabs.loader ; -FROM: sequences => each ; +USING: alien.c-types combinators fry kernel lexer math math.parser +math.vectors.simd.functor sequences splitting vocabs.generated +vocabs.loader vocabs.parser words ; IN: math.vectors.simd -<< +ERROR: bad-vector-size bits ; -{ double float char uchar short ushort int uint } -[ [ define-simd-128 ] [ define-simd-256 ] bi ] each +> +: simd-vocab ( type -- vocab ) + "math.vectors.simd.instances." prepend ; -"math.vectors.simd.alien" require +: parse-simd-name ( string -- c-type quot ) + "-" split1 + [ "alien.c-types" lookup dup heap-size ] [ string>number ] bi* + * 8 * { + { 128 [ [ define-simd-128 ] ] } + { 256 [ [ define-simd-256 ] ] } + [ bad-vector-size ] + } case ; + +PRIVATE> + +: define-simd-vocab ( type -- vocab ) + [ simd-vocab ] + [ '[ _ parse-simd-name call( type -- ) ] ] bi + generate-vocab ; + +SYNTAX: SIMD: + scan define-simd-vocab use-vocab ; diff --git a/basis/specialized-arrays/specialized-arrays.factor b/basis/specialized-arrays/specialized-arrays.factor index 6931c83677..a64d052fd1 100755 --- a/basis/specialized-arrays/specialized-arrays.factor +++ b/basis/specialized-arrays/specialized-arrays.factor @@ -4,7 +4,7 @@ USING: accessors alien alien.c-types alien.data alien.parser assocs byte-arrays classes compiler.units functors kernel lexer libc math math.vectors.specialization namespaces parser prettyprint.custom sequences sequences.private strings summary vocabs vocabs.loader -vocabs.parser words fry combinators ; +vocabs.parser vocabs.generated words fry combinators ; IN: specialized-arrays MIXIN: specialized-array @@ -123,15 +123,6 @@ M: word (underlying-type) "c-type" word-prop ; PRIVATE> -: generate-vocab ( vocab-name quot -- vocab ) - [ dup vocab [ ] ] dip '[ - [ - [ - _ with-current-vocab - ] with-compilation-unit - ] keep - ] ?if ; inline - : define-array-vocab ( type -- vocab ) underlying-type-name [ specialized-array-vocab ] [ '[ _ define-array ] ] bi diff --git a/basis/specialized-vectors/specialized-vectors.factor b/basis/specialized-vectors/specialized-vectors.factor index 58fb97764b..7cda026cb3 100644 --- a/basis/specialized-vectors/specialized-vectors.factor +++ b/basis/specialized-vectors/specialized-vectors.factor @@ -3,7 +3,7 @@ USING: accessors alien.c-types assocs compiler.units functors growable kernel lexer namespaces parser prettyprint.custom sequences specialized-arrays specialized-arrays.private strings -vocabs vocabs.parser fry ; +vocabs vocabs.parser vocabs.generated fry ; QUALIFIED: vectors.functor IN: specialized-vectors diff --git a/basis/math/vectors/simd/alien/authors.txt b/basis/vocabs/generated/authors.txt similarity index 100% rename from basis/math/vectors/simd/alien/authors.txt rename to basis/vocabs/generated/authors.txt diff --git a/basis/vocabs/generated/generated.factor b/basis/vocabs/generated/generated.factor new file mode 100644 index 0000000000..1ddcc73db2 --- /dev/null +++ b/basis/vocabs/generated/generated.factor @@ -0,0 +1,13 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: compiler.units fry kernel vocabs vocabs.parser ; +IN: vocabs.generated + +: generate-vocab ( vocab-name quot -- vocab ) + [ dup vocab [ ] ] dip '[ + [ + [ + _ with-current-vocab + ] with-compilation-unit + ] keep + ] ?if ; inline diff --git a/extra/benchmark/nbody-simd/nbody-simd.factor b/extra/benchmark/nbody-simd/nbody-simd.factor index e8bef58923..3aedffed91 100644 --- a/extra/benchmark/nbody-simd/nbody-simd.factor +++ b/extra/benchmark/nbody-simd/nbody-simd.factor @@ -4,6 +4,7 @@ USING: accessors fry kernel locals math math.constants math.functions math.vectors math.vectors.simd prettyprint combinators.smart sequences hints classes.struct specialized-arrays ; +SIMD: double-4 IN: benchmark.nbody-simd : solar-mass ( -- x ) 4 pi sq * ; inline diff --git a/extra/benchmark/raytracer-simd/raytracer-simd.factor b/extra/benchmark/raytracer-simd/raytracer-simd.factor index 3712972862..2d16c8cd1f 100644 --- a/extra/benchmark/raytracer-simd/raytracer-simd.factor +++ b/extra/benchmark/raytracer-simd/raytracer-simd.factor @@ -5,6 +5,7 @@ USING: arrays accessors io io.files io.files.temp io.encodings.binary kernel math math.constants math.functions math.vectors math.vectors.simd math.parser make sequences sequences.private words hints classes.struct ; +SIMD: double-4 IN: benchmark.raytracer-simd ! parameters diff --git a/extra/benchmark/simd-1/simd-1.factor b/extra/benchmark/simd-1/simd-1.factor index 4f57cca0bb..1e753a331d 100644 --- a/extra/benchmark/simd-1/simd-1.factor +++ b/extra/benchmark/simd-1/simd-1.factor @@ -2,6 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: kernel io math math.functions math.parser math.vectors math.vectors.simd sequences specialized-arrays ; +SIMD: float-4 SPECIALIZED-ARRAY: float-4 IN: benchmark.simd-1 -- 2.34.1