use: src1 src2
literal: rep ;
+PURE-INSN: ##mul-high-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
+PURE-INSN: ##mul-horizontal-add-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
PURE-INSN: ##saturated-mul-vector
def: dst
use: src1 src2
use: src1 src2
literal: rep ;
+PURE-INSN: ##avg-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
PURE-INSN: ##dot-vector
def: dst/scalar-rep
use: src1 src2
literal: rep ;
+PURE-INSN: ##sad-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
PURE-INSN: ##horizontal-add-vector
def: dst
use: src1 src2
{ longlong-2-rep [ longlong-array{ -1 0 } underlying>> ^^load-constant ] }
} case ;
+: ^load-half-vector ( rep -- dst )
+ {
+ { float-4-rep [ float-array{ 0.5 0.5 0.5 0.5 } underlying>> ^^load-constant ] }
+ { double-2-rep [ double-array{ 0.5 0.5 } underlying>> ^^load-constant ] }
+ } case ;
+
: >variable-shuffle ( shuffle rep -- shuffle' )
rep-component-type heap-size
[ dup <repetition> >byte-array ]
[ ^^mul-vector ]
} emit-vv-vector-op ;
+: emit-simd-v*high ( node -- )
+ {
+ [ ^^mul-high-vector ]
+ } emit-vv-vector-op ;
+
+: emit-simd-v*hs+ ( node -- )
+ {
+ [ ^^mul-horizontal-add-vector ]
+ } emit-vv-vector-op ;
+
: emit-simd-v/ ( node -- )
{
[ ^^div-vector ]
]
} emit-vv-vector-op ;
+: emit-simd-vavg ( node -- )
+ {
+ [ ^^avg-vector ]
+ { float-vector-rep [| src1 src2 rep |
+ src1 src2 rep ^^add-vector
+ rep ^load-half-vector rep ^^mul-vector
+ ] }
+ } emit-vv-vector-op ;
+
: emit-simd-v. ( node -- )
{
[ ^^dot-vector ]
{ float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
} emit-vv-vector-op ;
+: emit-simd-vsad ( node -- )
+ {
+ [ [ ^^sad-vector ] [ widen-vector-rep ^^vector>scalar ] bi ]
+ } emit-vv-vector-op ;
+
: emit-simd-vsqrt ( node -- )
{
[ ^^sqrt-vector ]
{ (simd-vs-) [ emit-simd-vs- ] }
{ (simd-vs*) [ emit-simd-vs* ] }
{ (simd-v*) [ emit-simd-v* ] }
+ { (simd-v*high) [ emit-simd-v*high ] }
+ { (simd-v*hs+) [ emit-simd-v*hs+ ] }
{ (simd-v/) [ emit-simd-v/ ] }
{ (simd-vmin) [ emit-simd-vmin ] }
{ (simd-vmax) [ emit-simd-vmax ] }
+ { (simd-vavg) [ emit-simd-vavg ] }
{ (simd-v.) [ emit-simd-v. ] }
+ { (simd-vsad) [ emit-simd-vsad ] }
{ (simd-vsqrt) [ emit-simd-vsqrt ] }
{ (simd-sum) [ emit-simd-sum ] }
{ (simd-vabs) [ emit-simd-vabs ] }
CODEGEN: ##sub-vector %sub-vector
CODEGEN: ##saturated-sub-vector %saturated-sub-vector
CODEGEN: ##mul-vector %mul-vector
+CODEGEN: ##mul-high-vector %mul-high-vector
+CODEGEN: ##mul-horizontal-add-vector %mul-horizontal-add-vector
CODEGEN: ##saturated-mul-vector %saturated-mul-vector
CODEGEN: ##div-vector %div-vector
CODEGEN: ##min-vector %min-vector
CODEGEN: ##max-vector %max-vector
+CODEGEN: ##avg-vector %avg-vector
CODEGEN: ##dot-vector %dot-vector
+CODEGEN: ##sad-vector %sad-vector
CODEGEN: ##sqrt-vector %sqrt-vector
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector
(simd-vs-)
(simd-vs*)
(simd-v*)
+ (simd-v*high)
+ (simd-v*hs+)
(simd-v/)
(simd-vmin)
(simd-vmax)
+ (simd-vavg)
(simd-vsqrt)
(simd-vabs)
(simd-vbitand)
CONSTANT: vector-other-intrinsics
{
(simd-v.)
+ (simd-vsad)
(simd-sum)
(simd-vany?)
(simd-vall?)
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
+HOOK: %mul-high-vector cpu ( dst src1 src2 rep -- )
+HOOK: %mul-horizontal-add-vector cpu ( dst src1 src2 rep -- )
HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
+HOOK: %avg-vector cpu ( dst src1 src2 rep -- )
HOOK: %dot-vector cpu ( dst src1 src2 rep -- )
+HOOK: %sad-vector cpu ( dst src1 src2 rep -- )
HOOK: %sqrt-vector cpu ( dst src rep -- )
HOOK: %horizontal-add-vector cpu ( dst src1 src2 rep -- )
HOOK: %horizontal-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %sub-vector-reps cpu ( -- reps )
HOOK: %saturated-sub-vector-reps cpu ( -- reps )
HOOK: %mul-vector-reps cpu ( -- reps )
+HOOK: %mul-high-vector-reps cpu ( -- reps )
+HOOK: %mul-horizontal-add-vector-reps cpu ( -- reps )
HOOK: %saturated-mul-vector-reps cpu ( -- reps )
HOOK: %div-vector-reps cpu ( -- reps )
HOOK: %min-vector-reps cpu ( -- reps )
HOOK: %max-vector-reps cpu ( -- reps )
+HOOK: %avg-vector-reps cpu ( -- reps )
HOOK: %dot-vector-reps cpu ( -- reps )
+HOOK: %sad-vector-reps cpu ( -- reps )
HOOK: %sqrt-vector-reps cpu ( -- reps )
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
HOOK: %horizontal-sub-vector-reps cpu ( -- reps )
{ sse4.1? { int-4-rep uint-4-rep } }
} available-reps ;
+M: x86 %mul-high-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { short-8-rep [ PMULHW ] }
+ { ushort-8-rep [ PMULHUW ] }
+ } case ;
+
+M: x86 %mul-high-vector-reps
+ {
+ { sse2? { short-8-rep ushort-8-rep } }
+ } available-reps ;
+
+M: x86 %mul-horizontal-add-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { char-16-rep [ PMADDUBSW ] }
+ { uchar-16-rep [ PMADDUBSW ] }
+ { short-8-rep [ PMADDWD ] }
+ { ushort-8-rep [ PMADDWD ] }
+ } case ;
+
+M: x86 %mul-horizontal-add-vector-reps
+ {
+ { sse2? { short-8-rep ushort-8-rep } }
+ { ssse3? { char-16-rep uchar-16-rep } }
+ } available-reps ;
+
M: x86 %div-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
+M: x86 %avg-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { uchar-16-rep [ PAVGB ] }
+ { ushort-8-rep [ PAVGW ] }
+ } case ;
+
+M: x86 %avg-vector-reps
+ {
+ { sse2? { uchar-16-rep ushort-8-rep } }
+ } available-reps ;
+
M: x86 %dot-vector
[ two-operand ] keep
{
{ sse4.1? { float-4-rep double-2-rep } }
} available-reps ;
+M: x86 %sad-vector
+ [ two-operand ] keep
+ {
+ { char-16-rep [ PSADBW ] }
+ { uchar-16-rep [ PSADBW ] }
+ } case ;
+
+M: x86 %sad-vector-reps
+ {
+ { sse2? { char-16-rep uchar-16-rep } }
+ } available-reps ;
+
M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
signed-rep {
M: A vs* \ A-rep [ (simd-vs*) ] [ call-next-method ] vv->v-op ; inline
M: A v* \ A-rep [ (simd-v*) ] [ call-next-method ] vv->v-op ; inline
M: A v*high \ A-rep [ (simd-v*high) ] [ call-next-method ] vv->v-op ; inline
-M: A v*hs+ \ A-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op ; inline
M: A v/ \ A-rep [ (simd-v/) ] [ call-next-method ] vv->v-op ; inline
M: A vavg \ A-rep [ (simd-vavg) ] [ call-next-method ] vv->v-op ; inline
M: A vmin \ A-rep [ (simd-vmin) ] [ call-next-method ] vv->v-op ; inline
M: simd-128 vshuffle ( u perm -- v )
vshuffle-bytes ; inline
+M: uchar-16 v*hs+
+ uchar-16-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op ushort-8-cast ; inline
+M: ushort-8 v*hs+
+ ushort-8-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op uint-4-cast ; inline
+M: uint-4 v*hs+
+ uint-4-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op ulonglong-2-cast ; inline
+M: char-16 v*hs+
+ char-16-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op short-8-cast ; inline
+M: short-8 v*hs+
+ short-8-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op int-4-cast ; inline
+M: int-4 v*hs+
+ int-4-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op longlong-2-cast ; inline
+
"mirrors" vocab [
"math.vectors.simd.mirrors" require
] when