use: src1 src2
literal: rep ;
+PURE-INSN: ##mul-high-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
+PURE-INSN: ##mul-horizontal-add-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
PURE-INSN: ##saturated-mul-vector
def: dst
use: src1 src2
use: src1 src2
literal: rep ;
+PURE-INSN: ##avg-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
PURE-INSN: ##dot-vector
def: dst/scalar-rep
use: src1 src2
literal: rep ;
+PURE-INSN: ##sad-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
PURE-INSN: ##horizontal-add-vector
def: dst
use: src1 src2
[ horizontal-cpu float-4-rep [ emit-simd-sum ] test-emit ]
unit-test
-[ {
- ##unpack-vector-head ##unpack-vector-tail ##add-vector
- ##horizontal-add-vector
- ##vector>scalar
-} ]
-[ horizontal-cpu int-4-rep [ emit-simd-sum ] test-emit ]
-unit-test
-
[ {
##unpack-vector-head ##unpack-vector-tail ##add-vector
##horizontal-add-vector ##horizontal-add-vector
! with
[ { ##scalar>vector ##shuffle-vector-imm } ]
-[ shuffle-imm-cpu int-4-rep [ emit-simd-with ] test-emit ]
+[ shuffle-imm-cpu float-4-rep [ emit-simd-with ] test-emit ]
unit-test
! gather-2
[ { ##gather-vector-2 } ]
-[ simple-ops-cpu longlong-2-rep [ emit-simd-gather-2 ] test-emit ]
+[ simple-ops-cpu double-2-rep [ emit-simd-gather-2 ] test-emit ]
unit-test
! gather-4
[ { ##gather-vector-4 } ]
-[ simple-ops-cpu int-4-rep [ emit-simd-gather-4 ] test-emit ]
+[ simple-ops-cpu float-4-rep [ emit-simd-gather-4 ] test-emit ]
unit-test
! select
[ { ##shuffle-vector-imm ##vector>scalar } ]
-[ shuffle-imm-cpu 1 int-4-rep [ emit-simd-select ] test-emit-literal ]
+[ shuffle-imm-cpu 1 float-4-rep [ emit-simd-select ] test-emit-literal ]
unit-test
! test with nonliteral/invalid reps
{ longlong-2-rep [ longlong-array{ -1 0 } underlying>> ^^load-constant ] }
} case ;
+: ^load-half-vector ( rep -- dst )
+ {
+ { float-4-rep [ float-array{ 0.5 0.5 0.5 0.5 } underlying>> ^^load-constant ] }
+ { double-2-rep [ double-array{ 0.5 0.5 } underlying>> ^^load-constant ] }
+ } case ;
+
: >variable-shuffle ( shuffle rep -- shuffle' )
rep-component-type heap-size
[ dup <repetition> >byte-array ]
[ ^^mul-vector ]
} emit-vv-vector-op ;
+: emit-simd-v*high ( node -- )
+ {
+ [ ^^mul-high-vector ]
+ } emit-vv-vector-op ;
+
+: emit-simd-v*hs+ ( node -- )
+ {
+ [ ^^mul-horizontal-add-vector ]
+ } emit-vv-vector-op ;
+
: emit-simd-v/ ( node -- )
{
[ ^^div-vector ]
]
} emit-vv-vector-op ;
+: emit-simd-vavg ( node -- )
+ {
+ [ ^^avg-vector ]
+ { float-vector-rep [| src1 src2 rep |
+ src1 src2 rep ^^add-vector
+ rep ^load-half-vector rep ^^mul-vector
+ ] }
+ } emit-vv-vector-op ;
+
: emit-simd-v. ( node -- )
{
[ ^^dot-vector ]
{ float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
} emit-vv-vector-op ;
+: emit-simd-vsad ( node -- )
+ {
+ [
+ [ ^^sad-vector dup { 2 3 0 1 } int-4-rep ^^shuffle-vector-imm int-4-rep ^^add-vector ]
+ [ widen-vector-rep ^^vector>scalar ] bi
+ ]
+ } emit-vv-vector-op ;
+
: emit-simd-vsqrt ( node -- )
{
[ ^^sqrt-vector ]
{ (simd-vs-) [ emit-simd-vs- ] }
{ (simd-vs*) [ emit-simd-vs* ] }
{ (simd-v*) [ emit-simd-v* ] }
+ { (simd-v*high) [ emit-simd-v*high ] }
+ { (simd-v*hs+) [ emit-simd-v*hs+ ] }
{ (simd-v/) [ emit-simd-v/ ] }
{ (simd-vmin) [ emit-simd-vmin ] }
{ (simd-vmax) [ emit-simd-vmax ] }
+ { (simd-vavg) [ emit-simd-vavg ] }
{ (simd-v.) [ emit-simd-v. ] }
+ { (simd-vsad) [ emit-simd-vsad ] }
{ (simd-vsqrt) [ emit-simd-vsqrt ] }
{ (simd-sum) [ emit-simd-sum ] }
{ (simd-vabs) [ emit-simd-vabs ] }
CODEGEN: ##sub-vector %sub-vector
CODEGEN: ##saturated-sub-vector %saturated-sub-vector
CODEGEN: ##mul-vector %mul-vector
+CODEGEN: ##mul-high-vector %mul-high-vector
+CODEGEN: ##mul-horizontal-add-vector %mul-horizontal-add-vector
CODEGEN: ##saturated-mul-vector %saturated-mul-vector
CODEGEN: ##div-vector %div-vector
CODEGEN: ##min-vector %min-vector
CODEGEN: ##max-vector %max-vector
+CODEGEN: ##avg-vector %avg-vector
CODEGEN: ##dot-vector %dot-vector
+CODEGEN: ##sad-vector %sad-vector
CODEGEN: ##sqrt-vector %sqrt-vector
CODEGEN: ##horizontal-add-vector %horizontal-add-vector
CODEGEN: ##horizontal-sub-vector %horizontal-sub-vector
(simd-vs-)
(simd-vs*)
(simd-v*)
+ (simd-v*high)
+ (simd-v*hs+)
(simd-v/)
(simd-vmin)
(simd-vmax)
+ (simd-vavg)
(simd-vsqrt)
(simd-vabs)
(simd-vbitand)
CONSTANT: vector-other-intrinsics
{
(simd-v.)
+ (simd-vsad)
(simd-sum)
(simd-vany?)
(simd-vall?)
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
+HOOK: %mul-high-vector cpu ( dst src1 src2 rep -- )
+HOOK: %mul-horizontal-add-vector cpu ( dst src1 src2 rep -- )
HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
+HOOK: %avg-vector cpu ( dst src1 src2 rep -- )
HOOK: %dot-vector cpu ( dst src1 src2 rep -- )
+HOOK: %sad-vector cpu ( dst src1 src2 rep -- )
HOOK: %sqrt-vector cpu ( dst src rep -- )
HOOK: %horizontal-add-vector cpu ( dst src1 src2 rep -- )
HOOK: %horizontal-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %sub-vector-reps cpu ( -- reps )
HOOK: %saturated-sub-vector-reps cpu ( -- reps )
HOOK: %mul-vector-reps cpu ( -- reps )
+HOOK: %mul-high-vector-reps cpu ( -- reps )
+HOOK: %mul-horizontal-add-vector-reps cpu ( -- reps )
HOOK: %saturated-mul-vector-reps cpu ( -- reps )
HOOK: %div-vector-reps cpu ( -- reps )
HOOK: %min-vector-reps cpu ( -- reps )
HOOK: %max-vector-reps cpu ( -- reps )
+HOOK: %avg-vector-reps cpu ( -- reps )
HOOK: %dot-vector-reps cpu ( -- reps )
+HOOK: %sad-vector-reps cpu ( -- reps )
HOOK: %sqrt-vector-reps cpu ( -- reps )
HOOK: %horizontal-add-vector-reps cpu ( -- reps )
HOOK: %horizontal-sub-vector-reps cpu ( -- reps )
{ sse4.1? { int-4-rep uint-4-rep } }
} available-reps ;
+M: x86 %mul-high-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { short-8-rep [ PMULHW ] }
+ { ushort-8-rep [ PMULHUW ] }
+ } case ;
+
+M: x86 %mul-high-vector-reps
+ {
+ { sse2? { short-8-rep ushort-8-rep } }
+ } available-reps ;
+
+M: x86 %mul-horizontal-add-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { char-16-rep [ PMADDUBSW ] }
+ { uchar-16-rep [ PMADDUBSW ] }
+ { short-8-rep [ PMADDWD ] }
+ } case ;
+
+M: x86 %mul-horizontal-add-vector-reps
+ {
+ { sse2? { short-8-rep } }
+ { ssse3? { char-16-rep uchar-16-rep } }
+ } available-reps ;
+
M: x86 %div-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
+M: x86 %avg-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ {
+ { uchar-16-rep [ PAVGB ] }
+ { ushort-8-rep [ PAVGW ] }
+ } case ;
+
+M: x86 %avg-vector-reps
+ {
+ { sse2? { uchar-16-rep ushort-8-rep } }
+ } available-reps ;
+
M: x86 %dot-vector
[ two-operand ] keep
{
{ sse4.1? { float-4-rep double-2-rep } }
} available-reps ;
+M: x86 %sad-vector
+ [ two-operand ] keep
+ {
+ { uchar-16-rep [ PSADBW ] }
+ } case ;
+
+M: x86 %sad-vector-reps
+ {
+ { sse2? { uchar-16-rep } }
+ } available-reps ;
+
M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
signed-rep {
M: x86 %integer>scalar drop MOVD ;
-M:: x86 %scalar>integer ( dst src rep -- )
+:: %scalar>integer-32 ( dst src rep -- )
rep {
{ int-scalar-rep [
dst 32-bit-version-of src MOVD
] }
} case ;
+M: x86.32 %scalar>integer ( dst src rep -- ) %scalar>integer-32 ;
+M: x86.64 %scalar>integer ( dst src rep -- )
+ {
+ { longlong-scalar-rep [ MOVD ] }
+ { ulonglong-scalar-rep [ MOVD ] }
+ [ %scalar>integer-32 ]
+ } case ;
+
M: x86 %vector>scalar %copy ;
M: x86 %scalar>vector %copy ;
! Copyright (c) 2007-2009 Slava Pestov, Doug Coleman, Aaron Schaefer.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs binary-search fry kernel locals math math.order
- math.ranges mirrors namespaces sequences sorting ;
+ math.ranges namespaces sequences sorting ;
IN: math.combinatorics
<PRIVATE
! (c)2009 Slava Pestov, Joe Groff bsd license
USING: accessors alien alien.c-types alien.data combinators
-sequences.cords cpu.architecture fry generalizations kernel
-libc locals math math.libm math.order math.ranges math.vectors
-sequences sequences.private specialized-arrays vocabs.loader ;
+sequences.cords cpu.architecture fry generalizations grouping
+kernel libc locals math math.libm math.order math.ranges
+math.vectors sequences sequences.private specialized-arrays
+vocabs.loader ;
QUALIFIED-WITH: alien.c-types c
SPECIALIZED-ARRAYS:
c:char c:short c:int c:longlong
: (simd-vs*) ( a b rep -- c )
dup rep-component-type '[ * _ c-type-clamp ] components-2map ;
: (simd-v*) ( a b rep -- c ) [ * ] components-2map ;
+: (simd-v*high) ( a b rep -- c )
+ dup rep-component-type heap-size -8 * '[ * _ shift ] components-2map ;
+:: (simd-v*hs+) ( a b rep -- c )
+ rep { char-16-rep uchar-16-rep } member-eq?
+ [ uchar-16-rep char-16-rep ]
+ [ rep rep ] if :> ( a-rep b-rep )
+ b-rep widen-vector-rep signed-rep :> wide-rep
+ wide-rep rep-component-type :> wide-type
+ a a-rep >rep-array 2 <groups> :> a'
+ b b-rep >rep-array 2 <groups> :> b'
+ a' b' [
+ [ [ first ] bi@ * ]
+ [ [ second ] bi@ * ] 2bi +
+ wide-type c-type-clamp
+ ] wide-rep <rep-array> 2map-as underlying>> ;
: (simd-v/) ( a b rep -- c ) [ / ] components-2map ;
+: (simd-vavg) ( a b rep -- c )
+ [ + dup integer? [ 1 + -1 shift ] [ 0.5 * ] if ] components-2map ;
: (simd-vmin) ( a b rep -- c ) [ min ] components-2map ;
: (simd-vmax) ( a b rep -- c ) [ max ] components-2map ;
: (simd-v.) ( a b rep -- n )
[ 2>rep-array [ [ first ] bi@ * ] 2keep ] keep
1 swap rep-length [a,b) [ '[ _ swap nth-unsafe ] bi@ * + ] with with each ;
: (simd-vsqrt) ( a rep -- c ) [ fsqrt ] components-map ;
+: (simd-vsad) ( a b rep -- c ) 2>rep-array [ - abs ] [ + ] 2map-reduce ;
: (simd-sum) ( a rep -- n ) [ + ] components-reduce ;
: (simd-vabs) ( a rep -- c ) [ abs ] components-map ;
: (simd-vbitand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
{ v* { +vector+ +vector+ -> +vector+ } }
{ vs* { +vector+ +vector+ -> +vector+ } }
{ v*n { +vector+ +scalar+ -> +vector+ } }
+ { v*high { +vector+ +vector+ -> +vector+ } }
+ { v*hs+ { +vector+ +vector+ -> +vector+ } }
{ v+ { +vector+ +vector+ -> +vector+ } }
{ vs+ { +vector+ +vector+ -> +vector+ } }
{ v+- { +vector+ +vector+ -> +vector+ } }
{ vs- { +vector+ +vector+ -> +vector+ } }
{ v-n { +vector+ +scalar+ -> +vector+ } }
{ v. { +vector+ +vector+ -> +scalar+ } }
+ { vsad { +vector+ +vector+ -> +scalar+ } }
{ v/ { +vector+ +vector+ -> +vector+ } }
{ v/n { +vector+ +scalar+ -> +vector+ } }
{ vceiling { +vector+ -> +vector+ } }
{ vfloor { +vector+ -> +vector+ } }
{ vmax { +vector+ +vector+ -> +vector+ } }
{ vmin { +vector+ +vector+ -> +vector+ } }
+ { vavg { +vector+ +vector+ -> +vector+ } }
{ vneg { +vector+ -> +vector+ } }
{ vtruncate { +vector+ -> +vector+ } }
{ sum { +vector+ -> +scalar+ } }
{ vsqrt n/v v/n v/ normalize } unique assoc-diff ;
: remove-integer-words ( alist -- alist' )
- { vlshift vrshift } unique assoc-diff ;
+ { vlshift vrshift v*high v*hs+ } unique assoc-diff ;
: boolean-ops ( -- words )
{ vand vandn vor vxor vnot } ;
ERROR: bad-simd-length got expected ;
+ERROR: bad-simd-vector obj ;
+
<<
<PRIVATE
! Primitive SIMD constructors
GENERIC: simd-element-type ( obj -- c-type )
GENERIC: simd-rep ( simd -- rep )
+GENERIC: simd-with ( n exemplar -- v )
M: object simd-element-type drop f ;
M: object simd-rep drop f ;
>>
<<
+
+! SIMD vectors as sequences
+
+M: simd-128 hashcode* underlying>> hashcode* ; inline
+M: simd-128 clone [ clone ] change-underlying ; inline
+M: simd-128 c:byte-length drop 16 ; inline
+
+M: simd-128 new-sequence
+ 2dup length =
+ [ nip [ 16 (byte-array) ] make-underlying ]
+ [ length bad-simd-length ] if ; inline
+
+M: simd-128 equal?
+ dup simd-rep [ drop v= vall? ] [ 3drop f ] if-both-vectors-match ; inline
+
+! SIMD primitive operations
+
+M: simd-128 v+
+ dup simd-rep [ (simd-v+) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v-
+ dup simd-rep [ (simd-v-) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vneg
+ dup simd-rep [ (simd-vneg) ] [ call-next-method ] v->v-op ; inline
+M: simd-128 v+-
+ dup simd-rep [ (simd-v+-) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vs+
+ dup simd-rep [ (simd-vs+) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vs-
+ dup simd-rep [ (simd-vs-) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vs*
+ dup simd-rep [ (simd-vs*) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v*
+ dup simd-rep [ (simd-v*) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v*high
+ dup simd-rep [ (simd-v*high) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v/
+ dup simd-rep [ (simd-v/) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vavg
+ dup simd-rep [ (simd-vavg) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vmin
+ dup simd-rep [ (simd-vmin) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vmax
+ dup simd-rep [ (simd-vmax) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v.
+ dup simd-rep [ (simd-v.) ] [ call-next-method ] vv->n-op ; inline
+M: simd-128 vsad
+ dup simd-rep [ (simd-vsad) ] [ call-next-method ] vv->n-op ; inline
+M: simd-128 vsqrt
+ dup simd-rep [ (simd-vsqrt) ] [ call-next-method ] v->v-op ; inline
+M: simd-128 sum
+ dup simd-rep [ (simd-sum) ] [ call-next-method ] v->n-op ; inline
+M: simd-128 vabs
+ dup simd-rep [ (simd-vabs) ] [ call-next-method ] v->v-op ; inline
+M: simd-128 vbitand
+ dup simd-rep [ (simd-vbitand) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vbitandn
+ dup simd-rep [ (simd-vbitandn) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vbitor
+ dup simd-rep [ (simd-vbitor) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vbitxor
+ dup simd-rep [ (simd-vbitxor) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vbitnot
+ dup simd-rep [ (simd-vbitnot) ] [ call-next-method ] v->v-op ; inline
+M: simd-128 vand
+ dup simd-rep [ (simd-vand) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vandn
+ dup simd-rep [ (simd-vandn) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vor
+ dup simd-rep [ (simd-vor) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vxor
+ dup simd-rep [ (simd-vxor) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vnot
+ dup simd-rep [ (simd-vnot) ] [ call-next-method ] v->v-op ; inline
+M: simd-128 vlshift
+ over simd-rep [ (simd-vlshift) ] [ call-next-method ] vn->v-op ; inline
+M: simd-128 vrshift
+ over simd-rep [ (simd-vrshift) ] [ call-next-method ] vn->v-op ; inline
+M: simd-128 hlshift
+ over simd-rep [ (simd-hlshift) ] [ call-next-method ] vn->v-op ; inline
+M: simd-128 hrshift
+ over simd-rep [ (simd-hrshift) ] [ call-next-method ] vn->v-op ; inline
+M: simd-128 vshuffle-elements
+ over simd-rep [ (simd-vshuffle-elements) ] [ call-next-method ] vn->v-op ; inline
+M: simd-128 vshuffle-bytes
+ dup simd-rep [ (simd-vshuffle-bytes) ] [ call-next-method ] vv'->v-op ; inline
+M: simd-128 (vmerge-head)
+ dup simd-rep [ (simd-vmerge-head) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 (vmerge-tail)
+ dup simd-rep [ (simd-vmerge-tail) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v<=
+ dup simd-rep [ (simd-v<=) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v<
+ dup simd-rep [ (simd-v<) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v=
+ dup simd-rep [ (simd-v=) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v>
+ dup simd-rep [ (simd-v>) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 v>=
+ dup simd-rep [ (simd-v>=) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vunordered?
+ dup simd-rep [ (simd-vunordered?) ] [ call-next-method ] vv->v-op ; inline
+M: simd-128 vany?
+ dup simd-rep [ (simd-vany?) ] [ call-next-method ] v->n-op ; inline
+M: simd-128 vall?
+ dup simd-rep [ (simd-vall?) ] [ call-next-method ] v->n-op ; inline
+M: simd-128 vnone?
+ dup simd-rep [ (simd-vnone?) ] [ call-next-method ] v->n-op ; inline
+
+! SIMD high-level specializations
+
+M: simd-128 vbroadcast swap [ nth ] [ simd-with ] bi ; inline
+M: simd-128 n+v [ simd-with ] keep v+ ; inline
+M: simd-128 n-v [ simd-with ] keep v- ; inline
+M: simd-128 n*v [ simd-with ] keep v* ; inline
+M: simd-128 n/v [ simd-with ] keep v/ ; inline
+M: simd-128 v+n over simd-with v+ ; inline
+M: simd-128 v-n over simd-with v- ; inline
+M: simd-128 v*n over simd-with v* ; inline
+M: simd-128 v/n over simd-with v/ ; inline
+M: simd-128 norm-sq dup v. assert-positive ; inline
+M: simd-128 distance v- norm ; inline
+
+M: simd-128 >pprint-sequence ;
+M: simd-128 pprint* pprint-object ;
+
<PRIVATE
! SIMD concrete type functor
M: A new-underlying drop \ A boa ; inline
M: A simd-rep drop A-rep ; inline
M: A simd-element-type drop ELT ; inline
+M: A simd-with drop A-with ; inline
+M: A nth-unsafe
+ swap \ A-rep [ (simd-select) ] [ call-next-method ] vn->n-op ; inline
M: A set-nth-unsafe
[ ELT boolean>element ] 2dip
underlying>> SET-NTH call ; inline
: A-with ( n -- v ) COERCER call \ A-rep (simd-with) \ A boa ; inline
: A-cast ( v -- v' ) underlying>> \ A boa ; inline
-! SIMD vectors as sequences
-
-M: A hashcode* underlying>> hashcode* ; inline
-M: A clone [ clone ] change-underlying ; inline
M: A length drop N ; inline
-M: A nth-unsafe
- swap \ A-rep [ (simd-select) ] [ call-next-method ] vn->n-op ; inline
-M: A c:byte-length drop 16 ; inline
-
-M: A new-sequence
- 2dup length =
- [ nip [ 16 (byte-array) ] make-underlying ]
- [ length bad-simd-length ] if ; inline
-
-M: A equal?
- \ A-rep [ drop v= vall? ] [ 3drop f ] if-both-vectors-match ; inline
-
-! SIMD primitive operations
-
-M: A v+ \ A-rep [ (simd-v+) ] [ call-next-method ] vv->v-op ; inline
-M: A v- \ A-rep [ (simd-v-) ] [ call-next-method ] vv->v-op ; inline
-M: A vneg \ A-rep [ (simd-vneg) ] [ call-next-method ] v->v-op ; inline
-M: A v+- \ A-rep [ (simd-v+-) ] [ call-next-method ] vv->v-op ; inline
-M: A vs+ \ A-rep [ (simd-vs+) ] [ call-next-method ] vv->v-op ; inline
-M: A vs- \ A-rep [ (simd-vs-) ] [ call-next-method ] vv->v-op ; inline
-M: A vs* \ A-rep [ (simd-vs*) ] [ call-next-method ] vv->v-op ; inline
-M: A v* \ A-rep [ (simd-v*) ] [ call-next-method ] vv->v-op ; inline
-M: A v/ \ A-rep [ (simd-v/) ] [ call-next-method ] vv->v-op ; inline
-M: A vmin \ A-rep [ (simd-vmin) ] [ call-next-method ] vv->v-op ; inline
-M: A vmax \ A-rep [ (simd-vmax) ] [ call-next-method ] vv->v-op ; inline
-M: A v. \ A-rep [ (simd-v.) ] [ call-next-method ] vv->n-op ; inline
-M: A vsqrt \ A-rep [ (simd-vsqrt) ] [ call-next-method ] v->v-op ; inline
-M: A sum \ A-rep [ (simd-sum) ] [ call-next-method ] v->n-op ; inline
-M: A vabs \ A-rep [ (simd-vabs) ] [ call-next-method ] v->v-op ; inline
-M: A vbitand \ A-rep [ (simd-vbitand) ] [ call-next-method ] vv->v-op ; inline
-M: A vbitandn \ A-rep [ (simd-vbitandn) ] [ call-next-method ] vv->v-op ; inline
-M: A vbitor \ A-rep [ (simd-vbitor) ] [ call-next-method ] vv->v-op ; inline
-M: A vbitxor \ A-rep [ (simd-vbitxor) ] [ call-next-method ] vv->v-op ; inline
-M: A vbitnot \ A-rep [ (simd-vbitnot) ] [ call-next-method ] v->v-op ; inline
-M: A vand \ A-rep [ (simd-vand) ] [ call-next-method ] vv->v-op ; inline
-M: A vandn \ A-rep [ (simd-vandn) ] [ call-next-method ] vv->v-op ; inline
-M: A vor \ A-rep [ (simd-vor) ] [ call-next-method ] vv->v-op ; inline
-M: A vxor \ A-rep [ (simd-vxor) ] [ call-next-method ] vv->v-op ; inline
-M: A vnot \ A-rep [ (simd-vnot) ] [ call-next-method ] v->v-op ; inline
-M: A vlshift \ A-rep [ (simd-vlshift) ] [ call-next-method ] vn->v-op ; inline
-M: A vrshift \ A-rep [ (simd-vrshift) ] [ call-next-method ] vn->v-op ; inline
-M: A hlshift \ A-rep [ (simd-hlshift) ] [ call-next-method ] vn->v-op ; inline
-M: A hrshift \ A-rep [ (simd-hrshift) ] [ call-next-method ] vn->v-op ; inline
-M: A vshuffle-elements \ A-rep [ (simd-vshuffle-elements) ] [ call-next-method ] vn->v-op ; inline
-M: A vshuffle-bytes \ A-rep [ (simd-vshuffle-bytes) ] [ call-next-method ] vv'->v-op ; inline
-M: A (vmerge-head) \ A-rep [ (simd-vmerge-head) ] [ call-next-method ] vv->v-op ; inline
-M: A (vmerge-tail) \ A-rep [ (simd-vmerge-tail) ] [ call-next-method ] vv->v-op ; inline
-M: A v<= \ A-rep [ (simd-v<=) ] [ call-next-method ] vv->v-op ; inline
-M: A v< \ A-rep [ (simd-v<) ] [ call-next-method ] vv->v-op ; inline
-M: A v= \ A-rep [ (simd-v=) ] [ call-next-method ] vv->v-op ; inline
-M: A v> \ A-rep [ (simd-v>) ] [ call-next-method ] vv->v-op ; inline
-M: A v>= \ A-rep [ (simd-v>=) ] [ call-next-method ] vv->v-op ; inline
-M: A vunordered? \ A-rep [ (simd-vunordered?) ] [ call-next-method ] vv->v-op ; inline
-M: A vany? \ A-rep [ (simd-vany?) ] [ call-next-method ] v->n-op ; inline
-M: A vall? \ A-rep [ (simd-vall?) ] [ call-next-method ] v->n-op ; inline
-M: A vnone? \ A-rep [ (simd-vnone?) ] [ call-next-method ] v->n-op ; inline
-
-! SIMD high-level specializations
-
-M: A vbroadcast swap nth A-with ; inline
-M: A n+v [ A-with ] dip v+ ; inline
-M: A n-v [ A-with ] dip v- ; inline
-M: A n*v [ A-with ] dip v* ; inline
-M: A n/v [ A-with ] dip v/ ; inline
-M: A v+n A-with v+ ; inline
-M: A v-n A-with v- ; inline
-M: A v*n A-with v* ; inline
-M: A v/n A-with v/ ; inline
-M: A norm-sq dup v. assert-positive ; inline
-M: A distance v- norm ; inline
-
-M: A >pprint-sequence ;
-M: A pprint* pprint-object ;
\ A-boa
[ COERCER N napply ] N {
M: A pprint-delims drop \ A{ \ } ;
SYNTAX: A{ \ } [ >A ] parse-literal ;
+INSTANCE: A sequence
+
c:<c-type>
byte-array >>class
A >>boxed-class
{ A-rep alien-vector A boa } >quotation >>getter
- { [ underlying>> ] 2dip A-rep set-alien-vector } >quotation >>setter
+ {
+ [ dup simd-128? [ bad-simd-vector ] unless underlying>> ] 2dip
+ A-rep set-alien-vector
+ } >quotation >>setter
16 >>size
16 >>align
A-rep >>rep
>>
-INSTANCE: simd-128 sequence
-
! SIMD instances
SIMD-128: char-16
M: simd-128 vshuffle ( u perm -- v )
vshuffle-bytes ; inline
+M: uchar-16 v*hs+
+ uchar-16-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op short-8-cast ; inline
+M: ushort-8 v*hs+
+ ushort-8-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op uint-4-cast ; inline
+M: uint-4 v*hs+
+ uint-4-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op ulonglong-2-cast ; inline
+M: char-16 v*hs+
+ char-16-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op short-8-cast ; inline
+M: short-8 v*hs+
+ short-8-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op int-4-cast ; inline
+M: int-4 v*hs+
+ int-4-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op longlong-2-cast ; inline
+
"mirrors" vocab [
"math.vectors.simd.mirrors" require
] when
"Processor SIMD units supported by the " { $vocab-link "math.vectors.simd" } " vocabulary represent boolean values as bitmasks, where a true result's binary representation is all ones and a false representation is all zeroes. This is the format in which results from comparison words such as " { $link v= } " return their results and in which logic and test words such as " { $link vand } " and " { $link vall? } " take their inputs when working with SIMD types. For a float vector, false will manifest itself as " { $snippet "0.0" } " and true as a " { $link POSTPONE: NAN: } " literal with a string of set bits in its payload:"
{ $example
"""USING: math.vectors math.vectors.simd prettyprint ;
-FROM: alien.c-types => float ;
-SIMD: float
float-4{ 1.0 2.0 3.0 0/0. } float-4{ 1.0 -2.0 3.0 0/0. } v= ."""
"""float-4{ NAN: fffffe0000000 0.0 NAN: fffffe0000000 0.0 }"""
"For an integer vector, false will manifest as " { $snippet "0" } " and true as " { $snippet "-1" } " (for signed vectors) or the largest representable value of the element type (for unsigned vectors):"
{ $example
"""USING: math.vectors math.vectors.simd prettyprint alien.c-types ;
-SIMD: int
-SIMD: uchar
int-4{ 1 2 3 0 } int-4{ 1 -2 3 4 } v=
uchar-16{ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 }
"This differs from Factor's native representation of boolean values, where " { $link f } " is false and every other value (including " { $snippet "0" } " and " { $snippet "0.0" } ") is true. To make it easy to construct literal SIMD masks, " { $link t } " and " { $link f } " are accepted inside SIMD literal syntax and expand to the proper true or false representation for the underlying type:"
{ $example
"""USING: math.vectors math.vectors.simd prettyprint alien.c-types ;
-SIMD: int
int-4{ f f t f } ."""
"""int-4{ 0 0 -1 0 }""" }
{ $description "Truncates each element of " { $snippet "u" } "." } ;
HELP: n+v
-{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
+{ $values { "n" "a number" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Adds " { $snippet "n" } " to each element of " { $snippet "u" } "." } ;
HELP: v+n
-{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } }
+{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "w" "a sequence of numbers" } }
{ $description "Adds " { $snippet "n" } " to each element of " { $snippet "u" } "." } ;
HELP: n-v
-{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
+{ $values { "n" "a number" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Subtracts each element of " { $snippet "u" } " from " { $snippet "n" } "." } ;
HELP: v-n
-{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } }
+{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "w" "a sequence of numbers" } }
{ $description "Subtracts " { $snippet "n" } " from each element of " { $snippet "u" } "." } ;
HELP: n*v
-{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
+{ $values { "n" "a number" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Multiplies each element of " { $snippet "u" } " by " { $snippet "n" } "." } ;
HELP: v*n
-{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } }
+{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "w" "a sequence of numbers" } }
{ $description "Multiplies each element of " { $snippet "u" } " by " { $snippet "n" } "." } ;
HELP: n/v
-{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } }
+{ $values { "n" "a number" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Divides " { $snippet "n" } " by each element of " { $snippet "u" } "." }
{ $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ;
HELP: v/n
-{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } }
+{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "w" "a sequence of numbers" } }
{ $description "Divides each element of " { $snippet "u" } " by " { $snippet "n" } "." }
{ $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ;
HELP: v+-
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
-{ $description "Adds and subtracts alternate elements of " { $snippet "v" } " and " { $snippet "u" } " component-wise." }
+{ $description "Adds and subtracts alternate elements of " { $snippet "v" } " and " { $snippet "u" } " component-wise. Elements at even indexes are subtracted, while elements at odd indexes are added." }
{ $examples
{ $example
"USING: math.vectors prettyprint ;"
{ $examples
{ $example
"USING: alien.c-types math.vectors math.vectors.simd" "prettyprint ;"
- "SIMD: int"
"int-4{ 69 42 911 13 } 2 vbroadcast ."
"int-4{ 911 911 911 911 }"
}
{ $examples
{ $example
"USING: alien.c-types math.vectors math.vectors.simd" "prettyprint ;"
- "SIMD: int"
"int-4{ 69 42 911 13 } { 1 3 2 3 } vshuffle ."
"int-4{ 42 13 911 13 }"
}
! Copyright (C) 2005, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: arrays alien.c-types assocs kernel sequences math math.functions
-hints math.order math.libm math.floats.private fry combinators
+grouping hints math.order math.libm math.floats.private fry combinators
byte-arrays accessors locals ;
QUALIFIED-WITH: alien.c-types c
IN: math.vectors
GENERIC: vneg ( u -- v )
M: object vneg [ neg ] map ;
-GENERIC# v+n 1 ( u n -- v )
+GENERIC# v+n 1 ( u n -- w )
M: object v+n [ + ] curry map ;
GENERIC: n+v ( n v -- w )
GENERIC: n-v ( n v -- w )
M: object n-v [ - ] with map ;
-GENERIC# v*n 1 ( u n -- v )
+GENERIC# v*n 1 ( u n -- w )
M: object v*n [ * ] curry map ;
GENERIC: n*v ( n v -- w )
M: object n*v [ * ] with map ;
-GENERIC# v/n 1 ( u n -- v )
+GENERIC# v/n 1 ( u n -- w )
M: object v/n [ / ] curry map ;
GENERIC: n/v ( n v -- w )
GENERIC: v* ( u v -- w )
M: object v* [ * ] 2map ;
+GENERIC: v*high ( u v -- w )
+
+<PRIVATE
+: (h+) ( u -- w ) 2 <groups> [ first2 + ] map ;
+: (h-) ( u -- w ) 2 <groups> [ first2 - ] map ;
+PRIVATE>
+
+GENERIC: v*hs+ ( u v -- w )
+M: object v*hs+ [ * ] 2map (h+) ;
+
GENERIC: v/ ( u v -- w )
M: object v/ [ / ] 2map ;
PRIVATE>
+GENERIC: vavg ( u v -- w )
+M: object vavg [ + 2 / ] 2map ;
+
GENERIC: vmax ( u v -- w )
M: object vmax [ [ float-max ] [ max ] if-both-floats ] 2map ;
GENERIC: vsqrt ( u -- v )
M: object vsqrt [ >float fsqrt ] map ;
+GENERIC: vsad ( u v -- n )
+M: object vsad [ - abs ] [ + ] 2map-reduce ;
+
<PRIVATE
: bitandn ( x y -- z ) [ bitnot ] dip bitand ; inline
SPECIALIZED-ARRAY: int
SPECIALIZED-ARRAYS: bool ushort char uint float ulonglong ;
-[ ulonglong ] [ ulonglong-array{ } element-type ] unit-test
-
[ t ] [ { 1 2 3 } >int-array int-array? ] unit-test
[ t ] [ int-array{ 1 2 3 } int-array? ] unit-test
INSTANCE: A specialized-array
+M: A vs+ [ + \ T c-type-clamp ] 2map ;
+M: A vs- [ - \ T c-type-clamp ] 2map ;
+M: A vs* [ * \ T c-type-clamp ] 2map ;
+
+M: A v*high [ * \ T heap-size neg shift ] 2map ;
+
;FUNCTOR
GENERIC: (underlying-type) ( c-type -- c-type' )
"slots"
"special"
"specializer"
- "specializations"
"struct-slots"
! UI needs this
! "superclass"
#define rs_bot (ctx->retainstack_region->start)
#define rs_top (ctx->retainstack_region->end)
-DEFPUSHPOP(d,ds)
-DEFPUSHPOP(r,rs)
+inline cell dpeek()
+{
+ return *(cell *)ds;
+}
+
+inline void drepl(cell tagged)
+{
+ *(cell *)ds = tagged;
+}
+
+inline cell dpop()
+{
+ cell value = dpeek();
+ ds -= sizeof(cell);
+ return value;
+}
+
+inline void dpush(cell tagged)
+{
+ ds += sizeof(cell);
+ drepl(tagged);
+}
VM_C_API void nest_stacks(stack_frame *magic_frame, factor_vm *vm);
VM_C_API void unnest_stacks(factor_vm *vm);
}
-
clear_decks(gen);
}
-bool data_heap::low_memory_p()
+bool data_heap::high_fragmentation_p()
{
return (tenured->largest_free_block() <= nursery->size + aging->size);
}
+bool data_heap::low_memory_p()
+{
+ return (tenured->free_space() <= nursery->size + aging->size);
+}
+
void data_heap::mark_all_cards()
{
memset(cards,-1,cards_end - cards);
void reset_generation(nursery_space *gen);
void reset_generation(aging_space *gen);
void reset_generation(tenured_space *gen);
+ bool high_fragmentation_p();
bool low_memory_p();
void mark_all_cards();
};
{
collect_mark_impl(trace_contexts_p);
collect_sweep_impl();
+
if(data->low_memory_p())
+ {
+ current_gc->op = collect_growing_heap_op;
+ current_gc->event->op = collect_growing_heap_op;
+ collect_growing_heap(0,trace_contexts_p);
+ }
+ else if(data->high_fragmentation_p())
{
current_gc->op = collect_compact_op;
current_gc->event->op = collect_compact_op;
collect_compact_impl(trace_contexts_p);
}
+
code->flush_icache();
}
break;
case collect_aging_op:
collect_aging();
- if(data->low_memory_p())
+ if(data->high_fragmentation_p())
{
current_gc->op = collect_full_op;
current_gc->event->op = collect_full_op;
break;
case collect_to_tenured_op:
collect_to_tenured();
- if(data->low_memory_p())
+ if(data->high_fragmentation_p())
{
current_gc->op = collect_full_op;
current_gc->event->op = collect_full_op;
#include "layouts.hpp"
#include "platform.hpp"
#include "primitives.hpp"
-#include "stacks.hpp"
#include "segments.hpp"
#include "contexts.hpp"
#include "run.hpp"
+++ /dev/null
-namespace factor
-{
-
-#define DEFPUSHPOP(prefix,ptr) \
- inline cell prefix##peek() { return *(cell *)ptr; } \
- inline void prefix##repl(cell tagged) { *(cell *)ptr = tagged; } \
- inline cell prefix##pop() \
- { \
- cell value = prefix##peek(); \
- ptr -= sizeof(cell); \
- return value; \
- } \
- inline void prefix##push(cell tagged) \
- { \
- ptr += sizeof(cell); \
- prefix##repl(tagged); \
- }
-
-}