use: src
literal: rep ;
+PURE-INSN: ##horizontal-sub-vector
+def: dst/scalar-rep
+use: src
+literal: rep ;
+
+PURE-INSN: ##horizontal-shl-vector
+def: dst
+use: src1
+literal: src2 rep ;
+
+PURE-INSN: ##horizontal-shr-vector
+def: dst
+use: src1
+literal: src2 rep ;
+
PURE-INSN: ##abs-vector
def: dst
use: src
use: src1 src2
literal: rep ;
+PURE-INSN: ##andn-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
PURE-INSN: ##or-vector
def: dst
use: src1 src2
{ math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
+ { math.vectors.simd.intrinsics:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vlshift) [ [ ^^shl-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector ] emit-binary-vector-op ] }
+ { math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector ] emit-horizontal-shift ] }
+ { math.vectors.simd.intrinsics:(simd-hrshift) [ [ ^^horizontal-shr-vector ] emit-horizontal-shift ] }
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors byte-arrays fry cpu.architecture kernel
+USING: accessors byte-arrays fry cpu.architecture kernel math
sequences compiler.tree.propagation.info
compiler.cfg.builder.blocks compiler.cfg.stacks
compiler.cfg.stacks.local compiler.cfg.hats
IN: compiler.cfg.intrinsics.simd
: emit-vector-op ( node quot: ( rep -- ) -- )
- [ dup node-input-infos last literal>> ] dip over representation?
- [ [ drop ] 2dip call ] [ 2drop emit-primitive ] if ; inline
+ [ dup node-input-infos last literal>> dup representation? ] dip
+ '[ nip @ ] [ drop emit-primitive ] if ; inline
: emit-binary-vector-op ( node quot -- )
'[ [ ds-drop 2inputs ] dip @ ds-push ] emit-vector-op ; inline
: emit-unary-vector-op ( node quot -- )
'[ [ ds-drop ds-pop ] dip @ ds-push ] emit-vector-op ; inline
+: emit-horizontal-shift ( node quot -- )
+ [
+ dup node-input-infos
+ [ second literal>> ] [ third literal>> ] bi
+ 2dup [ integer? ] [ representation? ] bi* and
+ ] dip
+ '[ [ drop ds-drop ds-drop ds-pop ] 2dip @ ds-push ]
+ [ 2drop emit-primitive ]
+ if ; inline
+
: emit-gather-vector-2 ( node -- )
[ ^^gather-vector-2 ] emit-binary-vector-op ;
CODEGEN: ##max-vector %max-vector
CODEGEN: ##sqrt-vector %sqrt-vector
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
+CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector
+CODEGEN: ##horizontal-shl-vector %horizontal-shl-vector
+CODEGEN: ##horizontal-shr-vector %horizontal-shr-vector
CODEGEN: ##abs-vector %abs-vector
CODEGEN: ##and-vector %and-vector
+CODEGEN: ##andn-vector %andn-vector
CODEGEN: ##or-vector %or-vector
CODEGEN: ##xor-vector %xor-vector
CODEGEN: ##shl-vector %shl-vector
(simd-vabs)
(simd-vsqrt)
(simd-vbitand)
+ (simd-vbitandn)
(simd-vbitor)
(simd-vbitxor)
(simd-vlshift)
(simd-vrshift)
+ (simd-hlshift)
+ (simd-hrshift)
(simd-broadcast)
(simd-gather-2)
(simd-gather-4)
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
HOOK: %sqrt-vector cpu ( dst src rep -- )
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
+HOOK: %horizontal-sub-vector cpu ( dst src rep -- )
HOOK: %abs-vector cpu ( dst src rep -- )
HOOK: %and-vector cpu ( dst src1 src2 rep -- )
+HOOK: %andn-vector cpu ( dst src1 src2 rep -- )
HOOK: %or-vector cpu ( dst src1 src2 rep -- )
HOOK: %xor-vector cpu ( dst src1 src2 rep -- )
HOOK: %shl-vector cpu ( dst src1 src2 rep -- )
HOOK: %shr-vector cpu ( dst src1 src2 rep -- )
+HOOK: %horizontal-shl-vector cpu ( dst src1 src2 rep -- )
+HOOK: %horizontal-shr-vector cpu ( dst src1 src2 rep -- )
HOOK: %integer>scalar cpu ( dst src rep -- )
HOOK: %scalar>integer cpu ( dst src rep -- )
HOOK: %max-vector-reps cpu ( -- reps )
HOOK: %sqrt-vector-reps cpu ( -- reps )
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
+HOOK: %horizontal-sub-vector-reps cpu ( -- reps )
HOOK: %abs-vector-reps cpu ( -- reps )
HOOK: %and-vector-reps cpu ( -- reps )
+HOOK: %andn-vector-reps cpu ( -- reps )
HOOK: %or-vector-reps cpu ( -- reps )
HOOK: %xor-vector-reps cpu ( -- reps )
HOOK: %shl-vector-reps cpu ( -- reps )
HOOK: %shr-vector-reps cpu ( -- reps )
+HOOK: %horizontal-shl-vector-reps cpu ( -- reps )
+HOOK: %horizontal-shr-vector-reps cpu ( -- reps )
HOOK: %unbox-alien cpu ( dst src -- )
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
{ sse3? { float-4-rep double-2-rep } }
} available-reps ;
+M: x86 %horizontal-shl-vector ( dst src1 src2 rep -- )
+ two-operand PSLLDQ ;
+
+M: x86 %horizontal-shl-vector-reps
+ {
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
+
+M: x86 %horizontal-shr-vector ( dst src1 src2 rep -- )
+ two-operand PSRLDQ ;
+
+M: x86 %horizontal-shr-vector-reps
+ {
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
+
M: x86 %abs-vector ( dst src rep -- )
{
{ char-16-rep [ PABSB ] }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
+M: x86 %andn-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { float-4-rep [ ANDNPS ] }
+ { double-2-rep [ ANDNPD ] }
+ [ drop PANDN ]
+ } case ;
+
+M: x86 %andn-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
+
M: x86 %or-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
SIMD-OP: sum
SIMD-OP: vabs
SIMD-OP: vbitand
+SIMD-OP: vbitandn
SIMD-OP: vbitor
SIMD-OP: vbitxor
SIMD-OP: vlshift
SIMD-OP: vrshift
+SIMD-OP: hlshift
+SIMD-OP: hrshift
: (simd-broadcast) ( x rep -- v ) bad-simd-call ;
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
: (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
+
: assert-positive ( x -- y ) ;
: alien-vector ( c-ptr n rep -- value )
{ \ (simd-sum) [ %horizontal-add-vector-reps ] }
{ \ (simd-vabs) [ %abs-vector-reps ] }
{ \ (simd-vbitand) [ %and-vector-reps ] }
+ { \ (simd-vbitandn) [ %andn-vector-reps ] }
{ \ (simd-vbitor) [ %or-vector-reps ] }
{ \ (simd-vbitxor) [ %xor-vector-reps ] }
{ \ (simd-vlshift) [ %shl-vector-reps ] }
{ \ (simd-vrshift) [ %shr-vector-reps ] }
+ { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
+ { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
{ \ (simd-broadcast) [ %broadcast-vector-reps ] }
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
} ;
ARTICLE: "math.vectors.simd.words" "SIMD vector words"
-"For each SIMD vector type, several words are defined:"
+"For each SIMD vector type, several words are defined, where " { $snippet "type" } " is the type in question:"
{ $table
{ "Word" "Stack effect" "Description" }
{ { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" }
{ { $snippet "type-boa" } { $snippet "( ... -- simd-array )" } "creates a new instance where components are read from the stack" }
- { { $snipept "type-cast" } { $snippet "( simd-array -- simd-array' )" } "creates a new SIMD array where the underlying data is taken from another SIMD array, with no format conversion" }
+ { { $snippet "type-cast" } { $snippet "( simd-array -- simd-array' )" } "creates a new SIMD array where the underlying data is taken from another SIMD array, with no format conversion" }
{ { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" }
{ { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" }
}
: remove-integer-words ( alist -- alist' )
[ drop { vlshift vrshift } member? not ] assoc-filter ;
+: remove-horizontal-shifts ( alist -- alist' )
+ [ drop { hlshift hrshift } member? not ] assoc-filter ;
+
: ops-to-check ( elt-class -- alist )
[ vector-words >alist ] dip
- float = [ remove-integer-words ] [ remove-float-words ] if ;
+ float = [ remove-integer-words ] [ remove-float-words ] if
+ remove-horizontal-shifts ;
: check-vector-ops ( class elt-class compare-quot -- )
[
] unit-test
[ ] [ char-16 new 1array stack. ] unit-test
+
+[ int-4{ 256 512 1024 2048 } ]
+[ int-4{ 1 2 4 8 } 1 hlshift ] unit-test
+
+[ int-4{ 256 512 1024 2048 } ]
+[ int-4{ 1 2 4 8 } [ { int-4 } declare 1 hlshift ] compile-call ] unit-test
+
+[ int-4{ 1 2 4 8 } ]
+[ int-4{ 256 512 1024 2048 } 1 hrshift ] unit-test
+
+[ int-4{ 1 2 4 8 } ]
+[ int-4{ 256 512 1024 2048 } [ { int-4 } declare 1 hrshift ] compile-call ] unit-test
{ vabs { +vector+ -> +vector+ } }
{ vsqrt { +vector+ -> +vector+ } }
{ vbitand { +vector+ +vector+ -> +vector+ } }
+ { vbitandn { +vector+ +vector+ -> +vector+ } }
{ vbitor { +vector+ +vector+ -> +vector+ } }
{ vbitxor { +vector+ +vector+ -> +vector+ } }
{ vlshift { +vector+ +scalar+ -> +vector+ } }
{ vrshift { +vector+ +scalar+ -> +vector+ } }
+ { hlshift { +vector+ +scalar+ -> +vector+ } }
+ { hrshift { +vector+ +scalar+ -> +vector+ } }
}
PREDICATE: vector-word < word vector-words key? ;
:: input-signature ( word array-type elt-type -- signature )
array-type elt-type word word-schema inputs signature-for-schema ;
-: vector-words-for-type ( elt-type -- alist )
+: vector-words-for-type ( elt-type -- words )
{
! Can't do shifts on floats
{ [ dup float class<= ] [ vector-words keys { vlshift vrshift } diff ] }
! Can't compute square root of complex numbers (vsqrt uses fsqrt not sqrt)
{ [ dup complex class<= ] [ vector-words keys { vsqrt } diff ] }
[ { } ]
- } cond nip ;
+ } cond
+ ! Don't specialize horizontal shifts at all, they're only for SIMD
+ { hlshift hrshift } diff
+ nip ;
:: specialize-vector-words ( array-type elt-type simd -- )
- elt-type vector-words-for-type [
+ elt-type vector-words-for-type simd keys union [
[ array-type elt-type simd specialize-vector-word ]
[ array-type elt-type input-signature ]
[ ]
{ $subsection vmin }
"Bitwise operations:"
{ $subsection vbitand }
+{ $subsection vbitandn }
{ $subsection vbitor }
{ $subsection vbitxor }
{ $subsection vlshift }
{ $description "Takes the bitwise and of " { $snippet "u" } " and " { $snippet "v" } " component-wise." }
{ $notes "Unlike " { $link bitand } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ;
+HELP: vbitandn
+{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
+{ $description "Takes the bitwise and-not of " { $snippet "u" } " and " { $snippet "v" } " component-wise, where " { $snippet "x and-not y" } " is defined as " { $snippet "not(x) and y" } "." }
+{ $notes "This word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ;
+
HELP: vbitor
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
{ $description "Takes the bitwise or of " { $snippet "u" } " and " { $snippet "v" } " component-wise." }
{ $description "Shifts each element of " { $snippet "u" } " to the right by " { $snippet "n" } " bits." }
{ $notes "Undefined behavior will result if " { $snippet "n" } " is negative." } ;
+HELP: hlshift
+{ $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "w" "a SIMD array" } }
+{ $description "Shifts the entire SIMD array to the left by " { $snippet "n" } " bytes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ;
+
+HELP: hrshift
+{ $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "w" "a SIMD array" } }
+{ $description "Shifts the entire SIMD array to the right by " { $snippet "n" } " bytes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ;
+
HELP: norm-sq
{ $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
{ $description "Computes the squared length of a mathematical vector." } ;
! Copyright (C) 2005, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: arrays alien.c-types kernel sequences math math.functions
-hints math.order math.libm fry combinators ;
+hints math.order math.libm fry combinators byte-arrays accessors ;
QUALIFIED-WITH: alien.c-types c
IN: math.vectors
[ drop call ]
} case ; inline
+: bitandn ( x y -- z ) [ bitnot ] dip bitand ; inline
+
PRIVATE>
: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ;
+: vbitandn ( u v -- w ) over '[ _ [ bitandn ] fp-bitwise-op ] 2map ;
: vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ;
: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
: vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ;
: vlshift ( u n -- w ) '[ _ shift ] map ;
: vrshift ( u n -- w ) neg '[ _ shift ] map ;
+: hlshift ( u n -- w )
+ [ clone ] dip
+ '[ _ <byte-array> append 16 tail* ] change-underlying ;
+
+: hrshift ( u n -- w )
+ [ clone ] dip
+ '[ _ <byte-array> prepend 16 head* ] change-underlying ;
+
: vfloor ( u -- v ) [ floor ] map ;
: vceiling ( u -- v ) [ ceiling ] map ;
: vtruncate ( u -- v ) [ truncate ] map ;