use: src1 src2
literal: rep ;
+PURE-INSN: ##add-sub-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
PURE-INSN: ##mul-vector
def: dst
use: src1 src2
{
{ math.vectors.simd.intrinsics:assert-positive [ drop ] }
{ math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
+ { math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
##max-float
##add-vector
##sub-vector
+ ##add-sub-vector
##mul-vector
##div-vector
##min-vector
CODEGEN: ##box-vector %box-vector
CODEGEN: ##add-vector %add-vector
CODEGEN: ##sub-vector %sub-vector
+CODEGEN: ##add-sub-vector %add-sub-vector
CODEGEN: ##mul-vector %mul-vector
CODEGEN: ##div-vector %div-vector
CODEGEN: ##min-vector %min-vector
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors byte-arrays combinators fry
+USING: accessors byte-arrays combinators fry sequences
compiler.tree.propagation.info cpu.architecture kernel words math
math.intervals math.vectors.simd.intrinsics ;
IN: compiler.tree.propagation.simd
-\ (simd-v+) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-v-) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-v*) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-v/) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-vmin) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-vmax) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-vsqrt) { byte-array } "default-output-classes" set-word-prop
+{
+ (simd-v+)
+ (simd-v-)
+ (simd-v+-)
+ (simd-v*)
+ (simd-v/)
+ (simd-vmin)
+ (simd-vmax)
+ (simd-vsqrt)
+ (simd-broadcast)
+ (simd-gather-2)
+ (simd-gather-4)
+ alien-vector
+} [ { byte-array } "default-output-classes" set-word-prop ] each
\ (simd-sum) [
nip dup literal?>> [
<class-info>
] "outputs" set-word-prop
-\ (simd-broadcast) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-gather-2) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-gather-4) { byte-array } "default-output-classes" set-word-prop
-
\ assert-positive [
real [0,inf] <class/interval-info> value-info-intersect
] "outputs" set-word-prop
-\ alien-vector { byte-array } "default-output-classes" set-word-prop
-
! If SIMD is not available, inline alien-vector and set-alien-vector
! to get a speedup
: inline-unless-intrinsic ( word -- )
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
+HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
HOOK: %gather-vector-4-reps cpu ( -- reps )
HOOK: %add-vector-reps cpu ( -- reps )
HOOK: %sub-vector-reps cpu ( -- reps )
+HOOK: %add-sub-vector-reps cpu ( -- reps )
HOOK: %mul-vector-reps cpu ( -- reps )
HOOK: %div-vector-reps cpu ( -- reps )
HOOK: %min-vector-reps cpu ( -- reps )
4 "double" c-type (>>align)
] unless
-"cpu.x86.features" require
+check-sse
{ [ os winnt? ] [ "cpu.x86.64.winnt" require ] }
} cond
-"cpu.x86.features" require
+check-sse
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
+M: x86 %add-sub-vector ( dst src1 src2 rep -- )
+ {
+ { float-4-rep [ ADDSUBPS ] }
+ { double-2-rep [ ADDSUBPD ] }
+ } case drop ;
+
+M: x86 %add-sub-vector-reps
+ {
+ { sse3? { float-4-rep double-2-rep } }
+ } available-reps ;
+
M: x86 %mul-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ MULPS ] }
{ 42 [ enable-sse3 ] }
} case ;
-[ { sse_version } compile ] with-optimizer
+: check-sse ( -- )
+ [ { sse_version } compile ] with-optimizer
-"Checking for multimedia extensions: " write sse-version 30 min
-[ sse-string write " detected" print ]
-[ install-sse-check ]
-[ enable-sse ] tri
+ "Checking for multimedia extensions: " write sse-version 30 min
+ [ sse-string write " detected" print ]
+ [ install-sse-check ]
+ [ enable-sse ] tri ;
{
{ v+ (simd-v+) }
{ v- (simd-v-) }
+ { v+- (simd-v+-) }
{ v* (simd-v*) }
{ v/ (simd-v/) }
{ vmin (simd-vmin) }
\ A \ A-with \ A-rep H{
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
+ { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
\ A \ A-with \ A-rep H{
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
+ { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
ERROR: bad-simd-call ;
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ;
+: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ;
M: vector-rep supported-simd-op?
{
{ \ (simd-v+) [ %add-vector-reps ] }
+ { \ (simd-v+-) [ %add-sub-vector-reps ] }
{ \ (simd-v-) [ %sub-vector-reps ] }
{ \ (simd-v*) [ %mul-vector-reps ] }
{ \ (simd-v/) [ %div-vector-reps ] }
"It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "."
{ $subsection (simd-v+) }
{ $subsection (simd-v-) }
+{ $subsection (simd-v+-) }
{ $subsection (simd-v/) }
{ $subsection (simd-vmin) }
{ $subsection (simd-vmax) }
{ v* { +vector+ +vector+ -> +vector+ } }
{ v*n { +vector+ +scalar+ -> +vector+ } }
{ v+ { +vector+ +vector+ -> +vector+ } }
+ { v+- { +vector+ +vector+ -> +vector+ } }
{ v+n { +vector+ +scalar+ -> +vector+ } }
{ v- { +vector+ +vector+ -> +vector+ } }
{ v-n { +vector+ +scalar+ -> +vector+ } }
"Combining two vectors to form another vector with " { $link 2map } ":"
{ $subsection v+ }
{ $subsection v- }
+{ $subsection v+- }
{ $subsection v* }
{ $subsection v/ }
{ $subsection vmax }
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise." } ;
+HELP: v+-
+{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
+{ $description "Adds and subtracts alternate elements of " { $snippet "v" } " and " { $snippet "u" } " component-wise." }
+{ $examples
+ { $example
+ "USING: math.vectors prettyprint ;"
+ "{ 1 2 3 } { 2 3 2 } v+- ."
+ "{ -1 5 1 }"
+ }
+} ;
+
HELP: [v-]
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise; any components which become negative are set to zero." } ;
[ 1.125 ] [ 0.0 1.0 2.0 4.0 { 0.5 0.25 } bilerp ] unit-test
-[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test
\ No newline at end of file
+[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test
+
+[ { 0 3 2 5 4 } ] [ { 1 2 3 4 5 } { 1 1 1 1 1 } v+- ] unit-test
\ No newline at end of file
: vmax ( u v -- w ) [ max ] 2map ;
: vmin ( u v -- w ) [ min ] 2map ;
+: v+- ( u v -- w )
+ [ t ] 2dip
+ [ [ not ] 2dip pick [ + ] [ - ] if ] 2map
+ nip ;
+
: vfloor ( v -- _v_ ) [ floor ] map ;
: vceiling ( v -- ^v^ ) [ ceiling ] map ;
: vtruncate ( v -- -v- ) [ truncate ] map ;