USING: alien alien.syntax alien.c-types kernel tools.test
-sequences system libc alien.strings io.encodings.utf8 ;
+sequences system libc alien.strings io.encodings.utf8
+math.constants ;
IN: alien.c-types.tests
CONSTANT: xyz 123
os windows? cpu x86.64? and [
[ -2147467259 ] [ 2147500037 <long> *long ] unit-test
] when
+
+[ 0 ] [ -10 uchar c-type-clamp ] unit-test
+[ 12 ] [ 12 uchar c-type-clamp ] unit-test
+[ -10 ] [ -10 char c-type-clamp ] unit-test
+[ 127 ] [ 230 char c-type-clamp ] unit-test
+[ t ] [ pi dup float c-type-clamp = ] unit-test
! Copyright (C) 2004, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: byte-arrays arrays assocs kernel kernel.private math
-namespaces make parser sequences strings words splitting math.parser
-cpu.architecture alien alien.accessors alien.strings quotations
-layouts system compiler.units io io.files io.encodings.binary
-io.streams.memory accessors combinators effects continuations fry
-classes vocabs vocabs.loader words.symbol ;
+math.order math.parser namespaces make parser sequences strings
+words splitting cpu.architecture alien alien.accessors
+alien.strings quotations layouts system compiler.units io
+io.files io.encodings.binary io.streams.memory accessors
+combinators effects continuations fry classes vocabs
+vocabs.loader words.symbol ;
QUALIFIED: math
IN: alien.c-types
\ ulong \ size_t typedef
] with-compilation-unit
+M: char-16-rep rep-component-type drop char ;
+M: uchar-16-rep rep-component-type drop uchar ;
+M: short-8-rep rep-component-type drop short ;
+M: ushort-8-rep rep-component-type drop ushort ;
+M: int-4-rep rep-component-type drop int ;
+M: uint-4-rep rep-component-type drop uint ;
+M: float-4-rep rep-component-type drop float ;
+M: double-2-rep rep-component-type drop double ;
+
+: (unsigned-interval) ( bytes -- from to ) [ 0 ] dip 8 * 2^ 1 - ; foldable
+: unsigned-interval ( c-type -- from to ) heap-size (unsigned-interval) ; foldable
+: (signed-interval) ( bytes -- from to ) 8 * 1 - 2^ [ neg ] [ 1 - ] bi ; foldable
+: signed-interval ( c-type -- from to ) heap-size (signed-interval) ; foldable
+
+: c-type-interval ( c-type -- from to )
+ {
+ { [ dup { float double } memq? ] [ drop -1/0. 1/0. ] }
+ { [ dup { char short int long longlong } memq? ] [ signed-interval ] }
+ { [ dup { uchar ushort uint ulong ulonglong } memq? ] [ unsigned-interval ] }
+ } cond ; foldable
+
+: c-type-clamp ( value c-type -- value' ) c-type-interval clamp ; inline
use: src1 src2
literal: rep ;
+PURE-INSN: ##saturated-add-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
+PURE-INSN: ##add-sub-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
PURE-INSN: ##sub-vector
def: dst
use: src1 src2
literal: rep ;
+PURE-INSN: ##saturated-sub-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
PURE-INSN: ##mul-vector
def: dst
use: src1 src2
literal: rep ;
+PURE-INSN: ##saturated-mul-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
PURE-INSN: ##div-vector
def: dst
use: src1 src2
{ math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] }
} enable-intrinsics ;
-: enable-sse2-simd ( -- )
+: enable-simd ( -- )
{
{ math.vectors.simd.intrinsics:assert-positive [ drop ] }
{ math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
+ { math.vectors.simd.intrinsics:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] }
+ { math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
+ { math.vectors.simd.intrinsics:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
+ { math.vectors.simd.intrinsics:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
+ { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
{ math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
} enable-intrinsics ;
-: enable-sse3-simd ( -- )
- {
- { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
- } enable-intrinsics ;
-
: emit-intrinsic ( node word -- )
"intrinsic" word-prop call( node -- ) ;
##min-float
##max-float
##add-vector
+ ##saturated-add-vector
+ ##add-sub-vector
##sub-vector
+ ##saturated-sub-vector
##mul-vector
+ ##saturated-mul-vector
##div-vector
##min-vector
##max-vector ;
CODEGEN: ##gather-vector-4 %gather-vector-4
CODEGEN: ##box-vector %box-vector
CODEGEN: ##add-vector %add-vector
+CODEGEN: ##saturated-add-vector %saturated-add-vector
+CODEGEN: ##add-sub-vector %add-sub-vector
CODEGEN: ##sub-vector %sub-vector
+CODEGEN: ##saturated-sub-vector %saturated-sub-vector
CODEGEN: ##mul-vector %mul-vector
+CODEGEN: ##saturated-mul-vector %saturated-mul-vector
CODEGEN: ##div-vector %div-vector
CODEGEN: ##min-vector %min-vector
CODEGEN: ##max-vector %max-vector
compiler.tree.propagation.call-effect
compiler.tree.propagation.transforms
compiler.tree.propagation.simd ;
+FROM: alien.c-types => (signed-interval) (unsigned-interval) ;
IN: compiler.tree.propagation.known-words
{ + - * / }
alien-unsigned-8
} [
dup name>> {
- {
- [ "alien-signed-" ?head ]
- [ string>number 8 * 1 - 2^ dup neg swap 1 - [a,b] ]
- }
- {
- [ "alien-unsigned-" ?head ]
- [ string>number 8 * 2^ 1 - 0 swap [a,b] ]
- }
- } cond
+ { [ "alien-signed-" ?head ] [ string>number (signed-interval) ] }
+ { [ "alien-unsigned-" ?head ] [ string>number (unsigned-interval) ] }
+ } cond [a,b]
[ fits-in-fixnum? fixnum integer ? ] keep <class/interval-info>
'[ 2drop _ ] "outputs" set-word-prop
] each
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors byte-arrays combinators fry
+USING: accessors byte-arrays combinators fry sequences
compiler.tree.propagation.info cpu.architecture kernel words math
math.intervals math.vectors.simd.intrinsics ;
IN: compiler.tree.propagation.simd
-\ (simd-v+) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-v-) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-v*) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-v/) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-vmin) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-vmax) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-vsqrt) { byte-array } "default-output-classes" set-word-prop
+{
+ (simd-v+)
+ (simd-v-)
+ (simd-v+-)
+ (simd-v*)
+ (simd-v/)
+ (simd-vmin)
+ (simd-vmax)
+ (simd-vsqrt)
+ (simd-broadcast)
+ (simd-gather-2)
+ (simd-gather-4)
+ alien-vector
+} [ { byte-array } "default-output-classes" set-word-prop ] each
\ (simd-sum) [
nip dup literal?>> [
literal>> scalar-rep-of {
{ float-rep [ float ] }
{ double-rep [ float ] }
+ { int-rep [ integer ] }
} case
] [ drop real ] if
<class-info>
] "outputs" set-word-prop
-\ (simd-broadcast) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-gather-2) { byte-array } "default-output-classes" set-word-prop
-
-\ (simd-gather-4) { byte-array } "default-output-classes" set-word-prop
-
\ assert-positive [
real [0,inf] <class/interval-info> value-info-intersect
] "outputs" set-word-prop
-\ alien-vector { byte-array } "default-output-classes" set-word-prop
-
! If SIMD is not available, inline alien-vector and set-alien-vector
! to get a speedup
: inline-unless-intrinsic ( word -- )
! On x86, floating point registers are really vector registers
SINGLETONS:
-float-4-rep
-double-2-rep
char-16-rep
uchar-16-rep
short-8-rep
int-4-rep
uint-4-rep ;
-UNION: vector-rep
+SINGLETONS:
float-4-rep
-double-2-rep
+double-2-rep ;
+
+UNION: int-vector-rep
char-16-rep
uchar-16-rep
short-8-rep
int-4-rep
uint-4-rep ;
+UNION: float-vector-rep
+float-4-rep
+double-2-rep ;
+
+UNION: vector-rep
+int-vector-rep
+float-vector-rep ;
+
UNION: representation
any-rep
tagged-rep
M: stack-params rep-size drop cell ;
M: vector-rep rep-size drop 16 ;
+GENERIC: rep-component-type ( rep -- n )
+
+! Methods defined in alien.c-types
+
GENERIC: scalar-rep-of ( rep -- rep' )
M: float-4-rep scalar-rep-of drop float-rep ;
M: double-2-rep scalar-rep-of drop double-rep ;
+M: int-vector-rep scalar-rep-of drop int-rep ;
! Mapping from register class to machine registers
HOOK: machine-registers cpu ( -- assoc )
HOOK: %broadcast-vector cpu ( dst src rep -- )
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
-
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
+HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- )
+HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
+HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
+HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
HOOK: %sqrt-vector cpu ( dst src rep -- )
HOOK: %horizontal-add-vector cpu ( dst src rep -- )
+HOOK: %broadcast-vector-reps cpu ( -- reps )
+HOOK: %gather-vector-2-reps cpu ( -- reps )
+HOOK: %gather-vector-4-reps cpu ( -- reps )
+HOOK: %add-vector-reps cpu ( -- reps )
+HOOK: %saturated-add-vector-reps cpu ( -- reps )
+HOOK: %add-sub-vector-reps cpu ( -- reps )
+HOOK: %sub-vector-reps cpu ( -- reps )
+HOOK: %saturated-sub-vector-reps cpu ( -- reps )
+HOOK: %mul-vector-reps cpu ( -- reps )
+HOOK: %saturated-mul-vector-reps cpu ( -- reps )
+HOOK: %div-vector-reps cpu ( -- reps )
+HOOK: %min-vector-reps cpu ( -- reps )
+HOOK: %max-vector-reps cpu ( -- reps )
+HOOK: %sqrt-vector-reps cpu ( -- reps )
+HOOK: %horizontal-add-vector-reps cpu ( -- reps )
+
HOOK: %unbox-alien cpu ( dst src -- )
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
HOOK: %box-alien cpu ( dst src temp -- )
4 "double" c-type (>>align)
] unless
-"cpu.x86.features" require
+check-sse
{ [ os winnt? ] [ "cpu.x86.64.winnt" require ] }
} cond
-"cpu.x86.features" require
+check-sse
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: system kernel math math.order math.parser namespaces
-alien.c-types alien.syntax combinators locals init io cpu.x86
+USING: system kernel memoize math math.order math.parser
+namespaces alien.c-types alien.syntax combinators locals init io
compiler compiler.units accessors ;
IN: cpu.x86.features
PRIVATE>
-ALIAS: sse-version sse_version
+: sse-version ( -- n )
+ sse_version
+ "sse-version" get string>number [ min ] when* ; foldable
+
+: sse? ( -- ? ) sse-version 10 >= ; foldable
+: sse2? ( -- ? ) sse-version 20 >= ; foldable
+: sse3? ( -- ? ) sse-version 30 >= ; foldable
+: ssse3? ( -- ? ) sse-version 33 >= ; foldable
+: sse4.1? ( -- ? ) sse-version 41 >= ; foldable
+: sse4.2? ( -- ? ) sse-version 42 >= ; foldable
: sse-string ( version -- string )
{
: count-instructions ( quot -- n )
instruction-count [ call ] dip instruction-count swap - ; inline
-
-USING: cpu.x86.features cpu.x86.features.private ;
-
-:: install-sse-check ( version -- )
- [
- sse-version version < [
- "This image was built to use " write
- version sse-string write
- " but your CPU only supports " write
- sse-version sse-string write "." print
- "You will need to bootstrap Factor again." print
- flush
- 1 exit
- ] when
- ] "cpu.x86" add-init-hook ;
-
-: enable-sse ( version -- )
- {
- { 00 [ ] }
- { 10 [ ] }
- { 20 [ enable-sse2 ] }
- { 30 [ enable-sse3 ] }
- { 33 [ enable-sse3 ] }
- { 41 [ enable-sse3 ] }
- { 42 [ enable-sse3 ] }
- } case ;
-
-[ { sse_version } compile ] with-optimizer
-
-"Checking for multimedia extensions: " write sse-version
-"sse-version" get [ string>number min ] when*
-[ sse-string write " detected" print ]
-[ install-sse-check ]
-[ enable-sse ] tri
cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands
cpu.architecture kernel kernel.private math memory namespaces make
sequences words system layouts combinators math.order fry locals
-compiler.constants vm byte-arrays
+compiler.constants byte-arrays io macros quotations cpu.x86.features
+cpu.x86.features.private compiler compiler.units init vm
compiler.cfg.registers
compiler.cfg.instructions
compiler.cfg.intrinsics
dst src byte-array-offset [+]
rep copy-register ;
+MACRO: available-reps ( alist -- )
+ ! Each SSE version adds new representations and supports
+ ! all old ones
+ unzip { } [ append ] accumulate rest swap suffix
+ [ [ 1quotation ] map ] bi@ zip
+ reverse [ { } ] suffix
+ '[ _ cond ] ;
+
M: x86 %broadcast-vector ( dst src rep -- )
{
{ float-4-rep [ [ MOVSS ] [ drop dup 0 SHUFPS ] 2bi ] }
{ double-2-rep [ [ MOVSD ] [ drop dup UNPCKLPD ] 2bi ] }
} case ;
+M: x86 %broadcast-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep } }
+ } available-reps ;
+
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
rep {
{
}
} case ;
+M: x86 %gather-vector-4-reps
+ {
+ { sse? { float-4-rep } }
+ } available-reps ;
+
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
rep {
{
}
} case ;
+M: x86 %gather-vector-2-reps
+ {
+ { sse2? { double-2-rep } }
+ } available-reps ;
+
M: x86 %add-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ ADDPS ] }
{ uint-4-rep [ PADDD ] }
} case drop ;
+M: x86 %add-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
+ } available-reps ;
+
+M: x86 %saturated-add-vector ( dst src1 src2 rep -- )
+ {
+ { char-16-rep [ PADDSB ] }
+ { uchar-16-rep [ PADDUSB ] }
+ { short-8-rep [ PADDSW ] }
+ { ushort-8-rep [ PADDUSW ] }
+ } case drop ;
+
+M: x86 %saturated-add-vector-reps
+ {
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
+ } available-reps ;
+
+M: x86 %add-sub-vector ( dst src1 src2 rep -- )
+ {
+ { float-4-rep [ ADDSUBPS ] }
+ { double-2-rep [ ADDSUBPD ] }
+ } case drop ;
+
+M: x86 %add-sub-vector-reps
+ {
+ { sse3? { float-4-rep double-2-rep } }
+ } available-reps ;
+
M: x86 %sub-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ SUBPS ] }
{ uint-4-rep [ PSUBD ] }
} case drop ;
+M: x86 %sub-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
+ } available-reps ;
+
+M: x86 %saturated-sub-vector ( dst src1 src2 rep -- )
+ {
+ { char-16-rep [ PSUBSB ] }
+ { uchar-16-rep [ PSUBUSB ] }
+ { short-8-rep [ PSUBSW ] }
+ { ushort-8-rep [ PSUBUSW ] }
+ } case drop ;
+
+M: x86 %saturated-sub-vector-reps
+ {
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
+ } available-reps ;
+
M: x86 %mul-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ MULPS ] }
{ double-2-rep [ MULPD ] }
- { int-4-rep [ PMULLW ] }
+ { short-8-rep [ PMULLW ] }
+ { ushort-8-rep [ PMULLW ] }
+ { int-4-rep [ PMULLD ] }
+ { uint-4-rep [ PMULLD ] }
} case drop ;
+M: x86 %mul-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
+ } available-reps ;
+
+M: x86 %saturated-mul-vector-reps
+ ! No multiplication with saturation on x86
+ { } ;
+
M: x86 %div-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ DIVPS ] }
{ double-2-rep [ DIVPD ] }
} case drop ;
+M: x86 %div-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep } }
+ } available-reps ;
+
M: x86 %min-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ MINPS ] }
{ double-2-rep [ MINPD ] }
+ { uchar-16-rep [ PMINUB ] }
+ { short-8-rep [ PMINSW ] }
} case drop ;
+M: x86 %min-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep short-8-rep uchar-16-rep } }
+ } available-reps ;
+
M: x86 %max-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ MAXPS ] }
{ double-2-rep [ MAXPD ] }
+ { uchar-16-rep [ PMAXUB ] }
+ { short-8-rep [ PMAXSW ] }
} case drop ;
+M: x86 %max-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep short-8-rep uchar-16-rep } }
+ } available-reps ;
+
M: x86 %sqrt-vector ( dst src rep -- )
{
{ float-4-rep [ SQRTPS ] }
{ double-2-rep [ SQRTPD ] }
} case ;
+M: x86 %sqrt-vector-reps
+ {
+ { sse? { float-4-rep } }
+ { sse2? { double-2-rep } }
+ } available-reps ;
+
M: x86 %horizontal-add-vector ( dst src rep -- )
{
{ float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
{ double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
} case ;
+M: x86 %horizontal-add-vector-reps
+ {
+ { sse3? { float-4-rep double-2-rep } }
+ } available-reps ;
+
M: x86 %unbox-alien ( dst src -- )
alien-offset [+] MOV ;
#! set up by the caller.
stack-frame get total-size>> + stack@ ;
-: enable-sse2 ( -- )
- enable-float-intrinsics
- enable-fsqrt
- enable-float-min/max
- enable-sse2-simd ;
+enable-simd
+enable-min/max
+enable-fixnum-log2
-: enable-sse3 ( -- )
- enable-sse2
- enable-sse3-simd ;
+:: install-sse2-check ( -- )
+ [
+ sse-version 20 < [
+ "This image was built to use SSE2 but your CPU does not support it." print
+ "You will need to bootstrap Factor again." print
+ flush
+ 1 exit
+ ] when
+ ] "cpu.x86" add-init-hook ;
+
+: enable-sse2 ( version -- )
+ 20 >= [
+ enable-float-intrinsics
+ enable-fsqrt
+ enable-float-min/max
+ install-sse2-check
+ ] when ;
-enable-min/max
-enable-fixnum-log2
\ No newline at end of file
+: check-sse ( -- )
+ [ { sse_version } compile ] with-optimizer
+ "Checking for multimedia extensions: " write sse-version
+ [ sse-string write " detected" print ] [ enable-sse2 ] bi ;
set_x87_env ;
M: x86 (fp-env-registers)
- sse-version 20 >=
- [ <sse-env> <x87-env> 2array ]
- [ <x87-env> 1array ] if ;
+ sse2? [ <sse-env> <x87-env> 2array ] [ <x87-env> 1array ] if ;
CONSTANT: sse-exception-flag-bits HEX: 3f
CONSTANT: sse-exception-flag>bit
+++ /dev/null
-USING: cpu.architecture math.vectors.simd
-math.vectors.simd.intrinsics accessors math.vectors.simd.alien
-kernel classes.struct tools.test compiler sequences byte-arrays
-alien math kernel.private specialized-arrays combinators ;
-SPECIALIZED-ARRAY: float
-IN: math.vectors.simd.alien.tests
-
-! Vector alien intrinsics
-[ float-4{ 1 2 3 4 } ] [
- [
- float-4{ 1 2 3 4 }
- underlying>> 0 float-4-rep alien-vector
- ] compile-call float-4 boa
-] unit-test
-
-[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
- 16 [ 1 ] B{ } replicate-as 16 <byte-array>
- [
- 0 [
- { byte-array c-ptr fixnum } declare
- float-4-rep set-alien-vector
- ] compile-call
- ] keep
-] unit-test
-
-[ float-array{ 1 2 3 4 } ] [
- [
- float-array{ 1 2 3 4 } underlying>>
- float-array{ 4 3 2 1 } clone
- [ underlying>> 0 float-4-rep set-alien-vector ] keep
- ] compile-call
-] unit-test
-
-STRUCT: simd-struct
-{ x float-4 }
-{ y double-2 }
-{ z double-4 }
-{ w float-8 } ;
-
-[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
-
-[
- float-4{ 1 2 3 4 }
- double-2{ 2 1 }
- double-4{ 4 3 2 1 }
- float-8{ 1 2 3 4 5 6 7 8 }
-] [
- simd-struct <struct>
- float-4{ 1 2 3 4 } >>x
- double-2{ 2 1 } >>y
- double-4{ 4 3 2 1 } >>z
- float-8{ 1 2 3 4 5 6 7 8 } >>w
- { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
-] unit-test
-
-[
- float-4{ 1 2 3 4 }
- double-2{ 2 1 }
- double-4{ 4 3 2 1 }
- float-8{ 1 2 3 4 5 6 7 8 }
-] [
- [
- simd-struct <struct>
- float-4{ 1 2 3 4 } >>x
- double-2{ 2 1 } >>y
- double-4{ 4 3 2 1 } >>z
- float-8{ 1 2 3 4 5 6 7 8 } >>w
- { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
- ] compile-call
-] unit-test
+++ /dev/null
-! Copyright (C) 2009 Slava Pestov.
-! See http://factorcode.org/license.txt for BSD license.
-USING: alien accessors alien.c-types byte-arrays compiler.units
-cpu.architecture locals kernel math math.vectors.simd
-math.vectors.simd.intrinsics ;
-IN: math.vectors.simd.alien
-
-:: define-simd-128-type ( class rep -- )
- <c-type>
- byte-array >>class
- class >>boxed-class
- [ rep alien-vector class boa ] >>getter
- [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
- 16 >>size
- 8 >>align
- rep >>rep
- class name>> typedef ;
-
-:: define-simd-256-type ( class rep -- )
- <c-type>
- class >>class
- class >>boxed-class
- [
- [ rep alien-vector ]
- [ 16 + >fixnum rep alien-vector ] 2bi
- class boa
- ] >>getter
- [
- [ [ underlying1>> ] 2dip rep set-alien-vector ]
- [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
- 3bi
- ] >>setter
- 32 >>size
- 8 >>align
- rep >>rep
- class name>> typedef ;
-[
- float-4 float-4-rep define-simd-128-type
- double-2 double-2-rep define-simd-128-type
- float-8 float-4-rep define-simd-256-type
- double-4 double-2-rep define-simd-256-type
-] with-compilation-unit
+++ /dev/null
-Slava Pestov
\ No newline at end of file
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors alien.c-types byte-arrays classes functors
-kernel math parser prettyprint.custom sequences
-sequences.private literals ;
+USING: accessors alien.c-types assocs byte-arrays classes
+effects fry functors generalizations kernel literals locals
+math math.functions math.vectors math.vectors.simd.intrinsics
+math.vectors.specialization parser prettyprint.custom sequences
+sequences.private strings words definitions macros cpu.architecture ;
+QUALIFIED-WITH: math m
IN: math.vectors.simd.functor
ERROR: bad-length got expected ;
-FUNCTOR: define-simd-128 ( T -- )
+MACRO: simd-boa ( rep class -- simd-array )
+ [ rep-components ] [ new ] bi* '[ _ _ nsequence ] ;
+
+:: define-boa-custom-inlining ( word rep class -- )
+ word [
+ drop
+ rep rep rep-gather-word supported-simd-op? [
+ [ rep (simd-boa) class boa ]
+ ] [ word def>> ] if
+ ] "custom-inlining" set-word-prop ;
+
+: simd-with ( rep class x -- simd-array )
+ [ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline
+
+:: define-with-custom-inlining ( word rep class -- )
+ word [
+ drop
+ rep \ (simd-broadcast) supported-simd-op? [
+ [ rep rep-coerce rep (simd-broadcast) class boa ]
+ ] [ word def>> ] if
+ ] "custom-inlining" set-word-prop ;
+
+: boa-effect ( rep n -- effect )
+ [ rep-components ] dip *
+ [ CHAR: a + 1string ] map
+ { "simd-vector" } <effect> ;
+
+: supported-simd-ops ( assoc rep -- assoc' )
+ [
+ {
+ { v+ (simd-v+) }
+ { vs+ (simd-vs+) }
+ { v+- (simd-v+-) }
+ { v- (simd-v-) }
+ { vs- (simd-vs-) }
+ { v* (simd-v*) }
+ { vs* (simd-vs*) }
+ { v/ (simd-v/) }
+ { vmin (simd-vmin) }
+ { vmax (simd-vmax) }
+ { sum (simd-sum) }
+ }
+ ] dip
+ '[ nip _ swap supported-simd-op? ] assoc-filter
+ '[ drop _ key? ] assoc-filter ;
+
+:: high-level-ops ( ctor elt-class -- assoc )
+ ! Some SIMD operations are defined in terms of others.
+ {
+ { vneg [ [ dup v- ] keep v- ] }
+ { n+v [ [ ctor execute ] dip v+ ] }
+ { v+n [ ctor execute v+ ] }
+ { n-v [ [ ctor execute ] dip v- ] }
+ { v-n [ ctor execute v- ] }
+ { n*v [ [ ctor execute ] dip v* ] }
+ { v*n [ ctor execute v* ] }
+ { n/v [ [ ctor execute ] dip v/ ] }
+ { v/n [ ctor execute v/ ] }
+ { norm-sq [ dup v. assert-positive ] }
+ { norm [ norm-sq sqrt ] }
+ { normalize [ dup norm v/n ] }
+ }
+ ! To compute dot product and distance with integer vectors, we
+ ! have to do things less efficiently, with integer overflow checks,
+ ! in the general case.
+ elt-class m:float = [
+ {
+ { distance [ v- norm ] }
+ { v. [ v* sum ] }
+ } append
+ ] when ;
+
+:: simd-vector-words ( class ctor rep assoc -- )
+ rep rep-component-type c-type-boxed-class :> elt-class
+ class
+ elt-class
+ assoc rep supported-simd-ops
+ ctor elt-class high-level-ops assoc-union
+ specialize-vector-words ;
+
+:: define-simd-128-type ( class rep -- )
+ <c-type>
+ byte-array >>class
+ class >>boxed-class
+ [ rep alien-vector class boa ] >>getter
+ [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
+ 16 >>size
+ 8 >>align
+ rep >>rep
+ class typedef ;
-T-TYPE IS ${T}
+FUNCTOR: define-simd-128 ( T -- )
-N [ 16 T-TYPE heap-size /i ]
+N [ 16 T heap-size /i ]
A DEFINES-CLASS ${T}-${N}
+A-boa DEFINES ${A}-boa
+A-with DEFINES ${A}-with
>A DEFINES >${A}
A{ DEFINES ${A}{
-NTH [ T-TYPE dup c-type-getter-boxer array-accessor ]
-SET-NTH [ T-TYPE dup c-setter array-accessor ]
+NTH [ T dup c-type-getter-boxer array-accessor ]
+SET-NTH [ T dup c-setter array-accessor ]
-A-rep IS ${A}-rep
+A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
M: A byte-length underlying>> length ; inline
+M: A element-type drop A-rep rep-component-type ;
+
M: A pprint-delims drop \ A{ \ } ;
M: A >pprint-sequence ;
SYNTAX: A{ \ } [ >A ] parse-literal ;
+: A-with ( x -- simd-array ) [ A-rep A ] dip simd-with ;
+
+\ A-with \ A-rep \ A define-with-custom-inlining
+
+\ A-boa [ \ A-rep \ A simd-boa ] \ A-rep 1 boa-effect define-declared
+
+\ A-rep rep-gather-word [
+ \ A-boa \ A-rep \ A define-boa-custom-inlining
+] when
+
INSTANCE: A sequence
<PRIVATE
: A-v->n-op ( v quot -- n )
[ underlying>> A-rep ] dip call ; inline
+\ A \ A-with \ A-rep H{
+ { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
+ { vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] }
+ { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
+ { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
+ { vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] }
+ { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
+ { vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] }
+ { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
+ { vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
+ { vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
+ { sum [ [ (simd-sum) ] \ A-v->n-op execute ] }
+} simd-vector-words
+
+\ A \ A-rep define-simd-128-type
+
PRIVATE>
;FUNCTOR
! Synthesize 256-bit vectors from a pair of 128-bit vectors
-FUNCTOR: define-simd-256 ( T -- )
+SLOT: underlying1
+SLOT: underlying2
+
+:: define-simd-256-type ( class rep -- )
+ <c-type>
+ class >>class
+ class >>boxed-class
+ [
+ [ rep alien-vector ]
+ [ 16 + >fixnum rep alien-vector ] 2bi
+ class boa
+ ] >>getter
+ [
+ [ [ underlying1>> ] 2dip rep set-alien-vector ]
+ [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
+ 3bi
+ ] >>setter
+ 32 >>size
+ 8 >>align
+ rep >>rep
+ class typedef ;
-T-TYPE IS ${T}
+FUNCTOR: define-simd-256 ( T -- )
-N [ 32 T-TYPE heap-size /i ]
+N [ 32 T heap-size /i ]
N/2 [ N 2 / ]
A/2 IS ${T}-${N/2}
+A/2-boa IS ${A/2}-boa
+A/2-with IS ${A/2}-with
A DEFINES-CLASS ${T}-${N}
+A-boa DEFINES ${A}-boa
+A-with DEFINES ${A}-with
>A DEFINES >${A}
A{ DEFINES ${A}{
A-deref DEFINES-PRIVATE ${A}-deref
-A-rep IS ${A/2}-rep
+A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
M: A byte-length drop 32 ; inline
+M: A element-type drop A-rep rep-component-type ;
+
SYNTAX: A{ \ } [ >A ] parse-literal ;
M: A pprint-delims drop \ A{ \ } ;
M: A pprint* pprint-object ;
+: A-with ( x -- simd-array )
+ [ A/2-with ] [ A/2-with ] bi [ underlying>> ] bi@
+ \ A boa ; inline
+
+: A-boa ( ... -- simd-array )
+ [ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@
+ \ A boa ; inline
+
+\ A-rep 2 boa-effect \ A-boa set-stack-effect
+
INSTANCE: A sequence
: A-vv->v-op ( v1 v2 quot -- v3 )
[ [ [ underlying1>> ] [ underlying2>> ] bi A-rep ] dip call A-rep ]
dip call ; inline
+\ A \ A-with \ A-rep H{
+ { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
+ { vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] }
+ { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
+ { vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] }
+ { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
+ { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
+ { vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] }
+ { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
+ { vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
+ { vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
+ { sum [ [ (simd-v+) ] [ (simd-sum) ] \ A-v->n-op execute ] }
+} simd-vector-words
+
+\ A \ A-rep define-simd-256-type
+
;FUNCTOR
--- /dev/null
+IN: math.vectors.simd.intrinsics.tests
+USING: math.vectors.simd.intrinsics cpu.architecture tools.test ;
+
+[ 16 ] [ uchar-16-rep rep-components ] unit-test
+[ 16 ] [ char-16-rep rep-components ] unit-test
+[ 8 ] [ ushort-8-rep rep-components ] unit-test
+[ 8 ] [ short-8-rep rep-components ] unit-test
+[ 4 ] [ uint-4-rep rep-components ] unit-test
+[ 4 ] [ int-4-rep rep-components ] unit-test
+[ 4 ] [ float-4-rep rep-components ] unit-test
+[ 2 ] [ double-2-rep rep-components ] unit-test
+
+{ 4 1 } [ uint-4-rep (simd-boa) ] must-infer-as
+{ 4 1 } [ int-4-rep (simd-boa) ] must-infer-as
+{ 4 1 } [ float-4-rep (simd-boa) ] must-infer-as
+{ 2 1 } [ double-2-rep (simd-boa) ] must-infer-as
+
+
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: kernel alien alien.data cpu.architecture libc ;
+USING: alien alien.c-types alien.data assocs combinators
+cpu.architecture fry generalizations kernel libc macros math
+sequences ;
IN: math.vectors.simd.intrinsics
ERROR: bad-simd-call ;
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
+: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ;
+: (simd-vs+) ( v1 v2 rep -- v3 ) bad-simd-call ;
+: (simd-vs-) ( v1 v2 rep -- v3 ) bad-simd-call ;
+: (simd-vs*) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ;
! Inefficient version for when intrinsics are missing
[ swap <displaced-alien> swap ] dip rep-size memcpy ;
+<<
+
+: rep-components ( rep -- n )
+ 16 swap rep-component-type heap-size /i ; foldable
+
+: rep-coercer ( rep -- quot )
+ {
+ { [ dup int-vector-rep? ] [ [ >fixnum ] ] }
+ { [ dup float-vector-rep? ] [ [ >float ] ] }
+ } cond nip ; foldable
+
+: rep-coerce ( value rep -- value' )
+ rep-coercer call( value -- value' ) ; inline
+
+CONSTANT: rep-gather-words
+ {
+ { 2 (simd-gather-2) }
+ { 4 (simd-gather-4) }
+ }
+
+: rep-gather-word ( rep -- word )
+ rep-components rep-gather-words at ;
+
+>>
+
+MACRO: (simd-boa) ( rep -- quot )
+ {
+ [ rep-coercer ]
+ [ rep-components ]
+ [ ]
+ [ rep-gather-word ]
+ } cleave
+ '[ _ _ napply _ _ execute ] ;
+
+GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
+
+M: vector-rep supported-simd-op?
+ {
+ { \ (simd-v+) [ %add-vector-reps ] }
+ { \ (simd-vs+) [ %saturated-add-vector-reps ] }
+ { \ (simd-v+-) [ %add-sub-vector-reps ] }
+ { \ (simd-v-) [ %sub-vector-reps ] }
+ { \ (simd-vs-) [ %saturated-sub-vector-reps ] }
+ { \ (simd-v*) [ %mul-vector-reps ] }
+ { \ (simd-vs*) [ %saturated-mul-vector-reps ] }
+ { \ (simd-v/) [ %div-vector-reps ] }
+ { \ (simd-vmin) [ %min-vector-reps ] }
+ { \ (simd-vmax) [ %max-vector-reps ] }
+ { \ (simd-vsqrt) [ %sqrt-vector-reps ] }
+ { \ (simd-sum) [ %horizontal-add-vector-reps ] }
+ { \ (simd-broadcast) [ %broadcast-vector-reps ] }
+ { \ (simd-gather-2) [ %gather-vector-2-reps ] }
+ { \ (simd-gather-4) [ %gather-vector-4-reps ] }
+ } case member? ;
"There should never be any reason to use " { $link "math.vectors.simd.intrinsics" } " directly, but they too have a straightforward, but lower-level, interface." ;
ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations"
-"At present, the SIMD support makes use of SSE2 and a few SSE3 instructions on x86 CPUs."
+"At present, the SIMD support makes use of SSE, SSE2 and a few SSE3 instructions on x86 CPUs."
$nl
-"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words, decreasing performance."
+"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")."
$nl
-"On PowerPC, or older x86 chips without SSE2, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance."
+"SSE2 introduces double-precision and integer SIMD."
+$nl
+"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words."
+$nl
+"On PowerPC, or older x86 chips without SSE, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance."
$nl
"The primities in the " { $vocab-link "math.vectors.simd.intrinsics" } " vocabulary do not have software fallbacks, but they should not be called directly in any case." ;
ARTICLE: "math.vectors.simd.types" "SIMD vector types"
-"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type such as " { $snippet "float" } " or " { $snippet "double" } ", and " { $snippet "count" } " is a vector dimension, such as 2, 4, or 8."
-$nl
-"The following vector types are defined:"
-{ $subsection float-4 }
-{ $subsection double-2 }
-{ $subsection float-8 }
-{ $subsection double-4 }
-"For each vector type, several words are defined:"
+"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension."
+$nl
+"To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":"
+{ $subsection POSTPONE: SIMD: }
+"The following vector types are supported:"
+{ $code
+ "char-16"
+ "uchar-16"
+ "char-32"
+ "uchar-32"
+ "short-8"
+ "ushort-8"
+ "short-16"
+ "ushort-16"
+ "int-4"
+ "uint-4"
+ "int-8"
+ "uint-8"
+ "float-4"
+ "float-8"
+ "double-2"
+ "double-4"
+} ;
+
+ARTICLE: "math.vectors.simd.words" "SIMD vector words"
+"For each SIMD vector type, several words are defined:"
{ $table
{ "Word" "Stack effect" "Description" }
{ { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" }
{ { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" }
{ { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" }
}
-"The " { $link float-4 } " and " { $link double-2 } " types correspond to 128-bit vector registers. The " { $link float-8 } " and " { $link double-4 } " types are not directly supported in hardware, and instead unbox to a pair of 128-bit vector registers."
-$nl
-"Operations on " { $link float-4 } " instances:"
-{ $subsection float-4-with }
-{ $subsection float-4-boa }
-{ $subsection POSTPONE: float-4{ }
-"Operations on " { $link double-2 } " instances:"
-{ $subsection double-2-with }
-{ $subsection double-2-boa }
-{ $subsection POSTPONE: double-2{ }
-"Operations on " { $link float-8 } " instances:"
-{ $subsection float-8-with }
-{ $subsection float-8-boa }
-{ $subsection POSTPONE: float-8{ }
-"Operations on " { $link double-4 } " instances:"
-{ $subsection double-4-with }
-{ $subsection double-4-boa }
-{ $subsection POSTPONE: double-4{ }
"To actually perform vector arithmetic on SIMD vectors, use " { $link "math-vectors" } " words."
{ $see-also "c-types-specs" } ;
{ $code
<" USING: compiler.tree.debugger kernel.private
math.vectors math.vectors.simd ;
+SIMD: float-4
+IN: simd-demo
: interpolate ( v a b -- w )
{ float-4 float-4 float-4 } declare
{ $code
<" USING: compiler.tree.debugger hints
math.vectors math.vectors.simd ;
+SIMD: float-4
+IN: simd-demo
: interpolate ( v a b -- w )
[ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ;
"In the " { $snippet "interpolate" } " word, there is still a call to the " { $link <tuple-boa> } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:"
{ $code
<" USING: compiler.tree.debugger math.vectors math.vectors.simd ;
+SIMD: float-4
IN: simd-demo
STRUCT: actor
$nl
"It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "."
{ $subsection (simd-v+) }
+{ $subsection (simd-vs+) }
+{ $subsection (simd-v+-) }
{ $subsection (simd-v-) }
+{ $subsection (simd-vs-) }
+{ $subsection (simd-v*) }
+{ $subsection (simd-vs*) }
{ $subsection (simd-v/) }
{ $subsection (simd-vmin) }
{ $subsection (simd-vmax) }
"For the most part, the above primitives correspond directly to vector arithmetic words. They take a representation parameter, which is one of the singleton members of the " { $link vector-rep } " union in the " { $vocab-link "cpu.architecture" } " vocabulary." ;
ARTICLE: "math.vectors.simd.alien" "SIMD data in struct classes"
-"Struct classes may contain fields which store SIMD data; use one of the following C type names:"
-{ $code
-<" float-4
-double-2
-float-8
-double-4"> }
-"Passing SIMD data as function parameters is not yet supported." ;
+"Struct classes may contain fields which store SIMD data; for each SIMD vector type listed in " { $snippet "math.vectors.simd.types" } " there is a C type with the same name."
+$nl
+"Only SIMD struct fields are allowed at the moment; passing SIMD data as function parameters is not yet supported." ;
ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
"The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors."
{ $subsection "math.vectors.simd.intro" }
{ $subsection "math.vectors.simd.types" }
+{ $subsection "math.vectors.simd.words" }
{ $subsection "math.vectors.simd.support" }
{ $subsection "math.vectors.simd.efficiency" }
{ $subsection "math.vectors.simd.alien" }
{ $subsection "math.vectors.simd.intrinsics" } ;
-! ! ! float-4
-
-HELP: float-4
-{ $class-description "A sequence of four single-precision floating point values. New instances can be created with " { $link float-4-with } " or " { $link float-4-boa } "." } ;
-
-HELP: float-4-with
-{ $values { "x" float } { "simd-array" float-4 } }
-{ $description "Creates a new vector with all four components equal to a scalar." } ;
-
-HELP: float-4-boa
-{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "simd-array" float-4 } }
-{ $description "Creates a new vector from four scalar components." } ;
-
-HELP: float-4{
-{ $syntax "float-4{ a b c d }" }
-{ $description "Literal syntax for a " { $link float-4 } "." } ;
-
-! ! ! double-2
-
-HELP: double-2
-{ $class-description "A sequence of two double-precision floating point values. New instances can be created with " { $link double-2-with } " or " { $link double-2-boa } "." } ;
-
-HELP: double-2-with
-{ $values { "x" float } { "simd-array" double-2 } }
-{ $description "Creates a new vector with both components equal to a scalar." } ;
-
-HELP: double-2-boa
-{ $values { "a" float } { "b" float } { "simd-array" double-2 } }
-{ $description "Creates a new vector from two scalar components." } ;
-
-HELP: double-2{
-{ $syntax "double-2{ a b }" }
-{ $description "Literal syntax for a " { $link double-2 } "." } ;
-
-! ! ! float-8
-
-HELP: float-8
-{ $class-description "A sequence of eight single-precision floating point values. New instances can be created with " { $link float-8-with } " or " { $link float-8-boa } "." } ;
-
-HELP: float-8-with
-{ $values { "x" float } { "simd-array" float-8 } }
-{ $description "Creates a new vector with all eight components equal to a scalar." } ;
-
-HELP: float-8-boa
-{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "e" float } { "f" float } { "g" float } { "h" float } { "simd-array" float-8 } }
-{ $description "Creates a new vector from eight scalar components." } ;
-
-HELP: float-8{
-{ $syntax "float-8{ a b c d e f g h }" }
-{ $description "Literal syntax for a " { $link float-8 } "." } ;
-
-! ! ! double-4
-
-HELP: double-4
-{ $class-description "A sequence of four double-precision floating point values. New instances can be created with " { $link double-4-with } " or " { $link double-4-boa } "." } ;
-
-HELP: double-4-with
-{ $values { "x" float } { "simd-array" double-4 } }
-{ $description "Creates a new vector with all four components equal to a scalar." } ;
-
-HELP: double-4-boa
-{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "simd-array" double-4 } }
-{ $description "Creates a new vector from four scalar components." } ;
-
-HELP: double-4{
-{ $syntax "double-4{ a b c d }" }
-{ $description "Literal syntax for a " { $link double-4 } "." } ;
+HELP: SIMD:
+{ $syntax "SIMD: type-length" }
+{ $values { "type" "a scalar C type" } { "length" "a vector dimension" } }
+{ $description "Brings a SIMD array for holding " { $snippet "length" } " values of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
ABOUT: "math.vectors.simd"
+USING: accessors arrays classes compiler compiler.tree.debugger
+effects fry io kernel kernel.private math math.functions
+math.private math.vectors math.vectors.simd
+math.vectors.simd.private prettyprint random sequences system
+tools.test vocabs assocs compiler.cfg.debugger words
+locals math.vectors.specialization combinators cpu.architecture
+math.vectors.simd.intrinsics namespaces byte-arrays alien
+specialized-arrays classes.struct ;
+FROM: alien.c-types => c-type-boxed-class ;
+SPECIALIZED-ARRAY: float
+SIMD: char-16
+SIMD: uchar-16
+SIMD: char-32
+SIMD: uchar-32
+SIMD: short-8
+SIMD: ushort-8
+SIMD: short-16
+SIMD: ushort-16
+SIMD: int-4
+SIMD: uint-4
+SIMD: int-8
+SIMD: uint-8
+SIMD: float-4
+SIMD: float-8
+SIMD: double-2
+SIMD: double-4
IN: math.vectors.simd.tests
-USING: math math.vectors.simd math.vectors.simd.private
-math.vectors math.functions math.private kernel.private compiler
-sequences tools.test compiler.tree.debugger accessors kernel
-system ;
[ float-4{ 0 0 0 0 } ] [ float-4 new ] unit-test
[ V{ float } ] [ [ { float-4 } declare norm ] final-classes ] unit-test
-[ float-4{ 12 12 12 12 } ] [
- 12 [ float-4-with ] compile-call
-] unit-test
-
-[ float-4{ 1 2 3 4 } ] [
- 1 2 3 4 [ float-4-boa ] compile-call
-] unit-test
-
-[ float-4{ 11 22 33 44 } ] [
- float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
- [ { float-4 float-4 } declare v+ ] compile-call
-] unit-test
-
-[ float-4{ -9 -18 -27 -36 } ] [
- float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
- [ { float-4 float-4 } declare v- ] compile-call
-] unit-test
-
-[ float-4{ 10 40 90 160 } ] [
- float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
- [ { float-4 float-4 } declare v* ] compile-call
-] unit-test
-
-[ float-4{ 10 100 1000 10000 } ] [
- float-4{ 100 2000 30000 400000 } float-4{ 10 20 30 40 }
- [ { float-4 float-4 } declare v/ ] compile-call
-] unit-test
-
-[ float-4{ -10 -20 -30 -40 } ] [
- float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
- [ { float-4 float-4 } declare vmin ] compile-call
-] unit-test
-
-[ float-4{ 10 20 30 40 } ] [
- float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
- [ { float-4 float-4 } declare vmax ] compile-call
-] unit-test
-
-[ 10.0 ] [
- float-4{ 1 2 3 4 }
- [ { float-4 } declare sum ] compile-call
-] unit-test
+! Test puns; only on x86
+cpu x86? [
+ [ double-2{ 4 1024 } ] [
+ float-4{ 0 1 0 2 }
+ [ { float-4 } declare dup v+ underlying>> double-2 boa dup v+ ] compile-call
+ ] unit-test
+
+ [ 33.0 ] [
+ double-2{ 1 2 } double-2{ 10 20 }
+ [ { double-2 double-2 } declare v+ underlying>> 3.0 float* ] compile-call
+ ] unit-test
+] when
-[ 13.0 ] [
- float-4{ 1 2 3 4 }
- [ { float-4 } declare sum 3.0 + ] compile-call
-] unit-test
+! Fuzz testing
+CONSTANT: simd-classes
+ {
+ char-16
+ uchar-16
+ char-32
+ uchar-32
+ short-8
+ ushort-8
+ short-16
+ ushort-16
+ int-4
+ uint-4
+ int-8
+ uint-8
+ float-4
+ float-8
+ double-2
+ double-4
+ }
+
+: with-ctors ( -- seq )
+ simd-classes [ [ name>> "-with" append ] [ vocabulary>> ] bi lookup ] map ;
+
+: boa-ctors ( -- seq )
+ simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ;
+
+: check-optimizer ( seq inputs quot -- )
+ [
+ [ "print-mr" get [ nip test-mr mr. ] [ 2drop ] if ]
+ [ [ call ] dip call ]
+ [ [ call ] dip compile-call ] 2tri = not
+ ] compose filter ; inline
+
+"== Checking -new constructors" print
+
+[ { } ] [
+ simd-classes [ [ [ ] ] dip '[ _ new ] ] check-optimizer
+] unit-test
+
+[ { } ] [
+ simd-classes [ '[ _ new ] compile-call [ zero? ] all? not ] filter
+] unit-test
+
+"== Checking -with constructors" print
+
+[ { } ] [
+ with-ctors [
+ [ 1000 random '[ _ ] ] dip '[ { fixnum } declare _ execute ]
+ ] check-optimizer
+] unit-test
+
+"== Checking -boa constructors" print
+
+[ { } ] [
+ boa-ctors [
+ dup stack-effect in>> length
+ [ nip [ 1000 random ] [ ] replicate-as ]
+ [ fixnum <array> swap '[ _ declare _ execute ] ]
+ 2bi
+ ] check-optimizer
+] unit-test
+
+"== Checking vector operations" print
+
+: random-vector ( class -- vec )
+ new [ drop 1000 random ] map ;
+
+:: check-vector-op ( word inputs class elt-class -- inputs quot )
+ inputs [
+ [
+ {
+ { +vector+ [ class random-vector ] }
+ { +scalar+ [ 1000 random elt-class float = [ >float ] when ] }
+ } case
+ ] [ ] map-as
+ ] [
+ [
+ {
+ { +vector+ [ class ] }
+ { +scalar+ [ elt-class ] }
+ } case
+ ] map
+ ] bi
+ word '[ _ declare _ execute ] ;
+
+: ops-to-check ( elt-class -- alist )
+ [ vector-words >alist ] dip float = [
+ [ drop { n/v v/n v/ normalize } member? not ] assoc-filter
+ ] unless ;
+
+: check-vector-ops ( class elt-class -- )
+ [ nip ops-to-check ] 2keep
+ '[ first2 inputs _ _ check-vector-op ] check-optimizer ; inline
+
+: simd-classes&reps ( -- alist )
+ simd-classes [
+ dup name>> [ "float" head? ] [ "double" head? ] bi or
+ float fixnum ?
+ ] { } map>assoc ;
+
+simd-classes&reps [
+ [ [ { } ] ] 2dip '[ _ _ check-vector-ops ] unit-test
+] assoc-each
+
+! Other regressions
+[ 8000000 ] [
+ int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 }
+ [ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call
+] unit-test
-[ 8.0 ] [
- float-4{ 1 2 3 4 } float-4{ 2 0 2 0 }
- [ { float-4 float-4 } declare v. ] compile-call
-] unit-test
-[ float-4{ 5 10 15 20 } ] [
- 5.0 float-4{ 1 2 3 4 }
- [ { float float-4 } declare n*v ] compile-call
+! Vector alien intrinsics
+[ float-4{ 1 2 3 4 } ] [
+ [
+ float-4{ 1 2 3 4 }
+ underlying>> 0 float-4-rep alien-vector
+ ] compile-call float-4 boa
] unit-test
-[ float-4{ 5 10 15 20 } ] [
- float-4{ 1 2 3 4 } 5.0
- [ { float float-4 } declare v*n ] compile-call
+[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
+ 16 [ 1 ] B{ } replicate-as 16 <byte-array>
+ [
+ 0 [
+ { byte-array c-ptr fixnum } declare
+ float-4-rep set-alien-vector
+ ] compile-call
+ ] keep
] unit-test
-[ float-4{ 10 5 2 5 } ] [
- 10.0 float-4{ 1 2 5 2 }
- [ { float float-4 } declare n/v ] compile-call
+[ float-array{ 1 2 3 4 } ] [
+ [
+ float-array{ 1 2 3 4 } underlying>>
+ float-array{ 4 3 2 1 } clone
+ [ underlying>> 0 float-4-rep set-alien-vector ] keep
+ ] compile-call
] unit-test
-[ float-4{ 0.5 1 1.5 2 } ] [
- float-4{ 1 2 3 4 } 2
- [ { float float-4 } declare v/n ] compile-call
-] unit-test
+STRUCT: simd-struct
+{ x float-4 }
+{ y double-2 }
+{ z double-4 }
+{ w float-8 } ;
-[ float-4{ 1 0 0 0 } ] [
- float-4{ 10 0 0 0 }
- [ { float-4 } declare normalize ] compile-call
-] unit-test
+[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
-[ 30.0 ] [
+[
float-4{ 1 2 3 4 }
- [ { float-4 } declare norm-sq ] compile-call
-] unit-test
-
-[ t ] [
- float-4{ 1 0 0 0 }
- float-4{ 0 1 0 0 }
- [ { float-4 float-4 } declare distance ] compile-call
- 2 sqrt 1.0e-6 ~
-] unit-test
-
-[ double-2{ 12 12 } ] [
- 12 [ double-2-with ] compile-call
-] unit-test
-
-[ double-2{ 1 2 } ] [
- 1 2 [ double-2-boa ] compile-call
-] unit-test
-
-[ double-2{ 11 22 } ] [
- double-2{ 1 2 } double-2{ 10 20 }
- [ { double-2 double-2 } declare v+ ] compile-call
-] unit-test
-
-[ double-2{ -9 -18 } ] [
- double-2{ 1 2 } double-2{ 10 20 }
- [ { double-2 double-2 } declare v- ] compile-call
-] unit-test
-
-[ double-2{ 10 40 } ] [
- double-2{ 1 2 } double-2{ 10 20 }
- [ { double-2 double-2 } declare v* ] compile-call
-] unit-test
-
-[ double-2{ 10 100 } ] [
- double-2{ 100 2000 } double-2{ 10 20 }
- [ { double-2 double-2 } declare v/ ] compile-call
-] unit-test
-
-[ double-2{ -10 -20 } ] [
- double-2{ -10 20 } double-2{ 10 -20 }
- [ { double-2 double-2 } declare vmin ] compile-call
-] unit-test
-
-[ double-2{ 10 20 } ] [
- double-2{ -10 20 } double-2{ 10 -20 }
- [ { double-2 double-2 } declare vmax ] compile-call
-] unit-test
-
-[ 3.0 ] [
- double-2{ 1 2 }
- [ { double-2 } declare sum ] compile-call
-] unit-test
-
-[ 7.0 ] [
- double-2{ 1 2 }
- [ { double-2 } declare sum 4.0 + ] compile-call
-] unit-test
-
-[ 16.0 ] [
- double-2{ 1 2 } double-2{ 2 7 }
- [ { double-2 double-2 } declare v. ] compile-call
-] unit-test
-
-[ double-2{ 5 10 } ] [
- 5.0 double-2{ 1 2 }
- [ { float double-2 } declare n*v ] compile-call
-] unit-test
-
-[ double-2{ 5 10 } ] [
- double-2{ 1 2 } 5.0
- [ { float double-2 } declare v*n ] compile-call
-] unit-test
-
-[ double-2{ 10 5 } ] [
- 10.0 double-2{ 1 2 }
- [ { float double-2 } declare n/v ] compile-call
-] unit-test
-
-[ double-2{ 0.5 1 } ] [
- double-2{ 1 2 } 2
- [ { float double-2 } declare v/n ] compile-call
-] unit-test
-
-[ double-2{ 0 0 } ] [ double-2 new ] unit-test
-
-[ double-2{ 1 0 } ] [
- double-2{ 10 0 }
- [ { double-2 } declare normalize ] compile-call
-] unit-test
-
-[ 5.0 ] [
- double-2{ 1 2 }
- [ { double-2 } declare norm-sq ] compile-call
-] unit-test
-
-[ t ] [
- double-2{ 1 0 }
- double-2{ 0 1 }
- [ { double-2 double-2 } declare distance ] compile-call
- 2 sqrt 1.0e-6 ~
-] unit-test
-
-[ double-4{ 0 0 0 0 } ] [ double-4 new ] unit-test
-
-[ double-4{ 1 2 3 4 } ] [
- 1 2 3 4 double-4-boa
-] unit-test
-
-[ double-4{ 1 1 1 1 } ] [
- 1 double-4-with
-] unit-test
-
-[ double-4{ 0 1 2 3 } ] [
- 1 double-4-with [ * ] map-index
-] unit-test
-
-[ V{ float } ] [ [ { double-4 } declare norm-sq ] final-classes ] unit-test
-
-[ V{ float } ] [ [ { double-4 } declare norm ] final-classes ] unit-test
-
-[ double-4{ 12 12 12 12 } ] [
- 12 [ double-4-with ] compile-call
-] unit-test
-
-[ double-4{ 1 2 3 4 } ] [
- 1 2 3 4 [ double-4-boa ] compile-call
-] unit-test
-
-[ double-4{ 11 22 33 44 } ] [
- double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
- [ { double-4 double-4 } declare v+ ] compile-call
-] unit-test
-
-[ double-4{ -9 -18 -27 -36 } ] [
- double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
- [ { double-4 double-4 } declare v- ] compile-call
-] unit-test
-
-[ double-4{ 10 40 90 160 } ] [
- double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
- [ { double-4 double-4 } declare v* ] compile-call
-] unit-test
-
-[ double-4{ 10 100 1000 10000 } ] [
- double-4{ 100 2000 30000 400000 } double-4{ 10 20 30 40 }
- [ { double-4 double-4 } declare v/ ] compile-call
-] unit-test
-
-[ double-4{ -10 -20 -30 -40 } ] [
- double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
- [ { double-4 double-4 } declare vmin ] compile-call
-] unit-test
-
-[ double-4{ 10 20 30 40 } ] [
- double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
- [ { double-4 double-4 } declare vmax ] compile-call
-] unit-test
-
-[ 10.0 ] [
- double-4{ 1 2 3 4 }
- [ { double-4 } declare sum ] compile-call
-] unit-test
-
-[ 13.0 ] [
- double-4{ 1 2 3 4 }
- [ { double-4 } declare sum 3.0 + ] compile-call
-] unit-test
-
-[ 8.0 ] [
- double-4{ 1 2 3 4 } double-4{ 2 0 2 0 }
- [ { double-4 double-4 } declare v. ] compile-call
-] unit-test
-
-[ double-4{ 5 10 15 20 } ] [
- 5.0 double-4{ 1 2 3 4 }
- [ { float double-4 } declare n*v ] compile-call
-] unit-test
-
-[ double-4{ 5 10 15 20 } ] [
- double-4{ 1 2 3 4 } 5.0
- [ { float double-4 } declare v*n ] compile-call
-] unit-test
-
-[ double-4{ 10 5 2 5 } ] [
- 10.0 double-4{ 1 2 5 2 }
- [ { float double-4 } declare n/v ] compile-call
-] unit-test
-
-[ double-4{ 0.5 1 1.5 2 } ] [
- double-4{ 1 2 3 4 } 2
- [ { float double-4 } declare v/n ] compile-call
-] unit-test
-
-[ double-4{ 1 0 0 0 } ] [
- double-4{ 10 0 0 0 }
- [ { double-4 } declare normalize ] compile-call
-] unit-test
-
-[ 30.0 ] [
- double-4{ 1 2 3 4 }
- [ { double-4 } declare norm-sq ] compile-call
-] unit-test
-
-[ t ] [
- double-4{ 1 0 0 0 }
- double-4{ 0 1 0 0 }
- [ { double-4 double-4 } declare distance ] compile-call
- 2 sqrt 1.0e-6 ~
-] unit-test
-
-[ float-8{ 0 0 0 0 0 0 0 0 } ] [ float-8 new ] unit-test
-
-[ float-8{ 0 0 0 0 0 0 0 0 } ] [ [ float-8 new ] compile-call ] unit-test
-
-[ float-8{ 1 1 1 1 1 1 1 1 } ] [ 1 float-8-with ] unit-test
-
-[ float-8{ 1 1 1 1 1 1 1 1 } ] [ [ 1 float-8-with ] compile-call ] unit-test
-
-[ float-8{ 1 2 3 4 5 6 7 8 } ] [ 1 2 3 4 5 6 7 8 float-8-boa ] unit-test
-
-[ float-8{ 1 2 3 4 5 6 7 8 } ] [ [ 1 2 3 4 5 6 7 8 float-8-boa ] compile-call ] unit-test
-
-[ float-8{ 3 6 9 12 15 18 21 24 } ] [
+ double-2{ 2 1 }
+ double-4{ 4 3 2 1 }
float-8{ 1 2 3 4 5 6 7 8 }
- float-8{ 2 4 6 8 10 12 14 16 }
- [ { float-8 float-8 } declare v+ ] compile-call
+] [
+ simd-struct <struct>
+ float-4{ 1 2 3 4 } >>x
+ double-2{ 2 1 } >>y
+ double-4{ 4 3 2 1 } >>z
+ float-8{ 1 2 3 4 5 6 7 8 } >>w
+ { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] unit-test
-[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
+[
+ float-4{ 1 2 3 4 }
+ double-2{ 2 1 }
+ double-4{ 4 3 2 1 }
float-8{ 1 2 3 4 5 6 7 8 }
- float-8{ 2 4 6 8 10 12 14 16 }
- [ { float-8 float-8 } declare v- ] compile-call
-] unit-test
-
-[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
- -0.5
- float-8{ 2 4 6 8 10 12 14 16 }
- [ { float float-8 } declare n*v ] compile-call
+] [
+ [
+ simd-struct <struct>
+ float-4{ 1 2 3 4 } >>x
+ double-2{ 2 1 } >>y
+ double-4{ 4 3 2 1 } >>z
+ float-8{ 1 2 3 4 5 6 7 8 } >>w
+ { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
+ ] compile-call
] unit-test
-
-[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
- float-8{ 2 4 6 8 10 12 14 16 }
- -0.5
- [ { float-8 float } declare v*n ] compile-call
-] unit-test
-
-[ float-8{ 256 128 64 32 16 8 4 2 } ] [
- 256.0
- float-8{ 1 2 4 8 16 32 64 128 }
- [ { float float-8 } declare n/v ] compile-call
-] unit-test
-
-[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
- float-8{ 2 4 6 8 10 12 14 16 }
- -2.0
- [ { float-8 float } declare v/n ] compile-call
-] unit-test
-
-! Test puns; only on x86
-cpu x86? [
- [ double-2{ 4 1024 } ] [
- float-4{ 0 1 0 2 }
- [ { float-4 } declare dup v+ underlying>> double-2 boa dup v+ ] compile-call
- ] unit-test
-
- [ 33.0 ] [
- double-2{ 1 2 } double-2{ 10 20 }
- [ { double-2 double-2 } declare v+ underlying>> 3.0 float* ] compile-call
- ] unit-test
-] when
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors alien.c-types byte-arrays cpu.architecture
-kernel math math.functions math.vectors
-math.vectors.simd.functor math.vectors.simd.intrinsics
-math.vectors.specialization parser prettyprint.custom sequences
-sequences.private locals assocs words fry ;
-FROM: alien.c-types => float ;
-QUALIFIED-WITH: math m
+USING: alien.c-types combinators fry kernel lexer math math.parser
+math.vectors.simd.functor sequences splitting vocabs.generated
+vocabs.loader vocabs.parser words ;
IN: math.vectors.simd
-<<
-
-DEFER: float-4
-DEFER: double-2
-DEFER: float-8
-DEFER: double-4
-
-"double" define-simd-128
-"float" define-simd-128
-"double" define-simd-256
-"float" define-simd-256
-
->>
-
-: float-4-with ( x -- simd-array )
- [ 4 ] dip >float '[ _ ] \ float-4 new replicate-as ;
-
-: float-4-boa ( a b c d -- simd-array )
- \ float-4 new 4sequence ;
-
-: double-2-with ( x -- simd-array )
- [ 2 ] dip >float '[ _ ] \ double-2 new replicate-as ;
-
-: double-2-boa ( a b -- simd-array )
- \ double-2 new 2sequence ;
-
-! More efficient expansions for the above, used when SIMD is
-! actually available.
-
-<<
-
-\ float-4-with [
- drop
- \ (simd-broadcast) "intrinsic" word-prop [
- [ >float float-4-rep (simd-broadcast) \ float-4 boa ]
- ] [ \ float-4-with def>> ] if
-] "custom-inlining" set-word-prop
-
-\ float-4-boa [
- drop
- \ (simd-gather-4) "intrinsic" word-prop [
- [| a b c d |
- a >float b >float c >float d >float
- float-4-rep (simd-gather-4) \ float-4 boa
- ]
- ] [ \ float-4-boa def>> ] if
-] "custom-inlining" set-word-prop
-
-\ double-2-with [
- drop
- \ (simd-broadcast) "intrinsic" word-prop [
- [ >float double-2-rep (simd-broadcast) \ double-2 boa ]
- ] [ \ double-2-with def>> ] if
-] "custom-inlining" set-word-prop
-
-\ double-2-boa [
- drop
- \ (simd-gather-4) "intrinsic" word-prop [
- [ [ >float ] bi@ double-2-rep (simd-gather-2) \ double-2 boa ]
- ] [ \ double-2-boa def>> ] if
-] "custom-inlining" set-word-prop
-
->>
-
-: float-8-with ( x -- simd-array )
- [ float-4-with ] [ float-4-with ] bi [ underlying>> ] bi@
- \ float-8 boa ; inline
-
-:: float-8-boa ( a b c d e f g h -- simd-array )
- a b c d float-4-boa
- e f g h float-4-boa
- [ underlying>> ] bi@
- \ float-8 boa ; inline
-
-: double-4-with ( x -- simd-array )
- [ double-2-with ] [ double-2-with ] bi [ underlying>> ] bi@
- \ double-4 boa ; inline
-
-:: double-4-boa ( a b c d -- simd-array )
- a b double-2-boa
- c d double-2-boa
- [ underlying>> ] bi@
- \ double-4 boa ; inline
-
-<<
+ERROR: bad-vector-size bits ;
<PRIVATE
-! Filter out operations that are not available, eg horizontal adds
-! on SSE2. Fallback code in math.vectors is used in that case.
-
-: supported-simd-ops ( assoc -- assoc' )
- {
- { v+ (simd-v+) }
- { v- (simd-v-) }
- { v* (simd-v*) }
- { v/ (simd-v/) }
- { vmin (simd-vmin) }
- { vmax (simd-vmax) }
- { sum (simd-sum) }
- } [ nip "intrinsic" word-prop ] assoc-filter
- '[ drop _ key? ] assoc-filter ;
-
-! Some SIMD operations are defined in terms of others.
+: simd-vocab ( type -- vocab )
+ "math.vectors.simd.instances." prepend ;
-:: high-level-ops ( ctor -- assoc )
- {
- { vneg [ [ dup v- ] keep v- ] }
- { v. [ v* sum ] }
- { n+v [ [ ctor execute ] dip v+ ] }
- { v+n [ ctor execute v+ ] }
- { n-v [ [ ctor execute ] dip v- ] }
- { v-n [ ctor execute v- ] }
- { n*v [ [ ctor execute ] dip v* ] }
- { v*n [ ctor execute v* ] }
- { n/v [ [ ctor execute ] dip v/ ] }
- { v/n [ ctor execute v/ ] }
- { norm-sq [ dup v. assert-positive ] }
- { norm [ norm-sq sqrt ] }
- { normalize [ dup norm v/n ] }
- { distance [ v- norm ] }
- } ;
-
-:: simd-vector-words ( class ctor elt-type assoc -- )
- class elt-type assoc supported-simd-ops ctor high-level-ops assoc-union
- specialize-vector-words ;
+: parse-simd-name ( string -- c-type quot )
+ "-" split1
+ [ "alien.c-types" lookup dup heap-size ] [ string>number ] bi*
+ * 8 * {
+ { 128 [ [ define-simd-128 ] ] }
+ { 256 [ [ define-simd-256 ] ] }
+ [ bad-vector-size ]
+ } case ;
PRIVATE>
-\ float-4 \ float-4-with m:float H{
- { v+ [ [ (simd-v+) ] float-4-vv->v-op ] }
- { v- [ [ (simd-v-) ] float-4-vv->v-op ] }
- { v* [ [ (simd-v*) ] float-4-vv->v-op ] }
- { v/ [ [ (simd-v/) ] float-4-vv->v-op ] }
- { vmin [ [ (simd-vmin) ] float-4-vv->v-op ] }
- { vmax [ [ (simd-vmax) ] float-4-vv->v-op ] }
- { sum [ [ (simd-sum) ] float-4-v->n-op ] }
-} simd-vector-words
-
-\ double-2 \ double-2-with m:float H{
- { v+ [ [ (simd-v+) ] double-2-vv->v-op ] }
- { v- [ [ (simd-v-) ] double-2-vv->v-op ] }
- { v* [ [ (simd-v*) ] double-2-vv->v-op ] }
- { v/ [ [ (simd-v/) ] double-2-vv->v-op ] }
- { vmin [ [ (simd-vmin) ] double-2-vv->v-op ] }
- { vmax [ [ (simd-vmax) ] double-2-vv->v-op ] }
- { sum [ [ (simd-sum) ] double-2-v->n-op ] }
-} simd-vector-words
-
-\ float-8 \ float-8-with m:float H{
- { v+ [ [ (simd-v+) ] float-8-vv->v-op ] }
- { v- [ [ (simd-v-) ] float-8-vv->v-op ] }
- { v* [ [ (simd-v*) ] float-8-vv->v-op ] }
- { v/ [ [ (simd-v/) ] float-8-vv->v-op ] }
- { vmin [ [ (simd-vmin) ] float-8-vv->v-op ] }
- { vmax [ [ (simd-vmax) ] float-8-vv->v-op ] }
- { sum [ [ (simd-sum) ] [ + ] float-8-v->n-op ] }
-} simd-vector-words
-
-\ double-4 \ double-4-with m:float H{
- { v+ [ [ (simd-v+) ] double-4-vv->v-op ] }
- { v- [ [ (simd-v-) ] double-4-vv->v-op ] }
- { v* [ [ (simd-v*) ] double-4-vv->v-op ] }
- { v/ [ [ (simd-v/) ] double-4-vv->v-op ] }
- { vmin [ [ (simd-vmin) ] double-4-vv->v-op ] }
- { vmax [ [ (simd-vmax) ] double-4-vv->v-op ] }
- { sum [ [ (simd-v+) ] [ (simd-sum) ] double-4-v->n-op ] }
-} simd-vector-words
-
->>
-
-USE: vocabs.loader
+: define-simd-vocab ( type -- vocab )
+ [ simd-vocab ]
+ [ '[ _ parse-simd-name call( type -- ) ] ] bi
+ generate-vocab ;
-"math.vectors.simd.alien" require
+SYNTAX: SIMD:
+ scan define-simd-vocab use-vocab ;
--- /dev/null
+Single-instruction-multiple-data parallel vector operations
{ norm-sq { +vector+ -> +nonnegative+ } }
{ normalize { +vector+ -> +vector+ } }
{ v* { +vector+ +vector+ -> +vector+ } }
+ { vs* { +vector+ +vector+ -> +vector+ } }
{ v*n { +vector+ +scalar+ -> +vector+ } }
{ v+ { +vector+ +vector+ -> +vector+ } }
+ { vs+ { +vector+ +vector+ -> +vector+ } }
+ { v+- { +vector+ +vector+ -> +vector+ } }
{ v+n { +vector+ +scalar+ -> +vector+ } }
{ v- { +vector+ +vector+ -> +vector+ } }
+ { vs- { +vector+ +vector+ -> +vector+ } }
{ v-n { +vector+ +scalar+ -> +vector+ } }
{ v. { +vector+ +vector+ -> +scalar+ } }
{ v/ { +vector+ +vector+ -> +vector+ } }
"Combining two vectors to form another vector with " { $link 2map } ":"
{ $subsection v+ }
{ $subsection v- }
+{ $subsection v+- }
{ $subsection v* }
{ $subsection v/ }
{ $subsection vmax }
{ $subsection v. }
{ $subsection norm }
{ $subsection norm-sq }
-{ $subsection normalize } ;
+{ $subsection normalize }
+"Saturated arithmetic may be performed on " { $link "specialized-arrays" } "; the results are clamped to the minimum and maximum bounds of the array element type, instead of wrapping around:"
+{ $subsection vs+ }
+{ $subsection vs- }
+{ $subsection vs* } ;
ABOUT: "math-vectors"
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise." } ;
+HELP: v+-
+{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
+{ $description "Adds and subtracts alternate elements of " { $snippet "v" } " and " { $snippet "u" } " component-wise." }
+{ $examples
+ { $example
+ "USING: math.vectors prettyprint ;"
+ "{ 1 2 3 } { 2 3 2 } v+- ."
+ "{ -1 5 1 }"
+ }
+} ;
+
HELP: [v-]
{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } }
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise; any components which become negative are set to zero." } ;
{ $snippet "0 [ conjugate * + ] 2reduce" }
} ;
+HELP: vs+
+{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
+{ $description "Adds " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." }
+{ $examples
+ "With saturation:"
+ { $example
+ "USING: math.vectors prettyprint specialized-arrays ;"
+ "SPECIALIZED-ARRAY: uchar"
+ "uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } vs+ ."
+ "uchar-array{ 170 255 220 }"
+ }
+ "Without saturation:"
+ { $example
+ "USING: math.vectors prettyprint specialized-arrays ;"
+ "SPECIALIZED-ARRAY: uchar"
+ "uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } v+ ."
+ "uchar-array{ 170 14 220 }"
+ }
+} ;
+
+HELP: vs-
+{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
+{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise with saturation." } ;
+
+HELP: vs*
+{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
+{ $description "Multiplies " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." } ;
+
HELP: norm-sq
{ $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
{ $description "Computes the squared length of a mathematical vector." } ;
{ 2map v+ v- v* v/ } related-words
{ 2reduce v. } related-words
+
+{ vs+ vs- vs* } related-words
[ 1.125 ] [ 0.0 1.0 2.0 4.0 { 0.5 0.25 } bilerp ] unit-test
-[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test
\ No newline at end of file
+[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test
+
+[ { 0 3 2 5 4 } ] [ { 1 2 3 4 5 } { 1 1 1 1 1 } v+- ] unit-test
\ No newline at end of file
! Copyright (C) 2005, 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: arrays kernel sequences math math.functions hints
-math.order ;
+USING: arrays alien.c-types kernel sequences math math.functions
+hints math.order fry ;
IN: math.vectors
+GENERIC: element-type ( obj -- c-type )
+
: vneg ( u -- v ) [ neg ] map ;
: v+n ( u n -- v ) [ + ] curry map ;
: vmax ( u v -- w ) [ max ] 2map ;
: vmin ( u v -- w ) [ min ] 2map ;
+: v+- ( u v -- w )
+ [ t ] 2dip
+ [ [ not ] 2dip pick [ + ] [ - ] if ] 2map
+ nip ;
+
+: 2saturate-map ( u v quot -- w )
+ pick element-type '[ @ _ c-type-clamp ] 2map ; inline
+
+: vs+ ( u v -- w ) [ + ] 2saturate-map ;
+: vs- ( u v -- w ) [ - ] 2saturate-map ;
+: vs* ( u v -- w ) [ * ] 2saturate-map ;
+
: vfloor ( v -- _v_ ) [ floor ] map ;
: vceiling ( v -- ^v^ ) [ ceiling ] map ;
: vtruncate ( v -- -v- ) [ truncate ] map ;
kernel arrays combinators compiler compiler.units classes.struct
combinators.smart compiler.tree.debugger math libc destructors
sequences.private multiline eval words vocabs namespaces
-assocs prettyprint alien.data ;
+assocs prettyprint alien.data math.vectors ;
FROM: alien.c-types => float ;
SPECIALIZED-ARRAY: int
SPECIALIZED-ARRAY: char
SPECIALIZED-ARRAY: uint
SPECIALIZED-ARRAY: float
+SPECIALIZED-ARRAY: ulonglong
+
+[ ulonglong ] [ ulonglong-array{ } element-type ] unit-test
[ t ] [ { 1 2 3 } >int-array int-array? ] unit-test
! Copyright (C) 2008, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors alien alien.c-types alien.data alien.parser assocs
-byte-arrays classes compiler.units functors kernel lexer libc math
-math.vectors.specialization namespaces parser prettyprint.custom
-sequences sequences.private strings summary vocabs vocabs.loader
-vocabs.parser words fry combinators ;
+USING: accessors alien alien.c-types alien.data alien.parser
+assocs byte-arrays classes compiler.units functors kernel lexer
+libc math math.vectors math.vectors.specialization namespaces
+parser prettyprint.custom sequences sequences.private strings
+summary vocabs vocabs.loader vocabs.parser vocabs.generated
+words fry combinators present ;
IN: specialized-arrays
MIXIN: specialized-array
: <direct-A> ( alien len -- specialized-array ) A boa ; inline
-: <A> ( n -- specialized-array ) [ T <underlying> ] keep <direct-A> ; inline
+: <A> ( n -- specialized-array ) [ \ T <underlying> ] keep <direct-A> ; inline
-: (A) ( n -- specialized-array ) [ T (underlying) ] keep <direct-A> ; inline
+: (A) ( n -- specialized-array ) [ \ T (underlying) ] keep <direct-A> ; inline
-: malloc-A ( len -- specialized-array ) [ T heap-size calloc ] keep <direct-A> ; inline
+: malloc-A ( len -- specialized-array ) [ \ T heap-size calloc ] keep <direct-A> ; inline
: byte-array>A ( byte-array -- specialized-array )
- dup length T heap-size /mod 0 = [ drop T bad-byte-array-length ] unless
+ dup length \ T heap-size /mod 0 = [ drop \ T bad-byte-array-length ] unless
<direct-A> ; inline
M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline
M: A resize
[
- [ T heap-size * ] [ underlying>> ] bi*
+ [ \ T heap-size * ] [ underlying>> ] bi*
resize-byte-array
] [ drop ] 2bi
<direct-A> ; inline
-M: A byte-length length T heap-size * ; inline
+M: A byte-length length \ T heap-size * ; inline
+
+M: A element-type drop \ T ; inline
M: A direct-array-syntax drop \ A@ ;
} cond ;
: underlying-type-name ( c-type -- name )
- underlying-type dup word? [ name>> ] when ;
+ underlying-type present ;
: specialized-array-vocab ( c-type -- vocab )
- "specialized-arrays.instances." prepend ;
+ present "specialized-arrays.instances." prepend ;
PRIVATE>
-: generate-vocab ( vocab-name quot -- vocab )
- [ dup vocab [ ] ] dip '[
- [
- [
- _ with-current-vocab
- ] with-compilation-unit
- ] keep
- ] ?if ; inline
-
: define-array-vocab ( type -- vocab )
- underlying-type-name
+ underlying-type
[ specialized-array-vocab ] [ '[ _ define-array ] ] bi
generate-vocab ;
USING: accessors alien.c-types assocs compiler.units functors
growable kernel lexer namespaces parser prettyprint.custom
sequences specialized-arrays specialized-arrays.private strings
-vocabs vocabs.parser fry ;
+vocabs vocabs.parser vocabs.generated fry ;
QUALIFIED: vectors.functor
IN: specialized-vectors
--- /dev/null
+Slava Pestov
\ No newline at end of file
--- /dev/null
+! Copyright (C) 2009 Slava Pestov.
+! See http://factorcode.org/license.txt for BSD license.
+USING: compiler.units fry kernel vocabs vocabs.parser ;
+IN: vocabs.generated
+
+: generate-vocab ( vocab-name quot -- vocab )
+ [ dup vocab [ ] ] dip '[
+ [
+ [
+ _ with-current-vocab
+ ] with-compilation-unit
+ ] keep
+ ] ?if ; inline
math.functions math.vectors math.vectors.simd prettyprint
combinators.smart sequences hints classes.struct
specialized-arrays ;
+SIMD: double-4
IN: benchmark.nbody-simd
: solar-mass ( -- x ) 4 pi sq * ; inline
io.encodings.binary kernel math math.constants math.functions
math.vectors math.vectors.simd math.parser make sequences
sequences.private words hints classes.struct ;
+SIMD: double-4
IN: benchmark.raytracer-simd
! parameters
! See http://factorcode.org/license.txt for BSD license.
USING: kernel io math math.functions math.parser math.vectors
math.vectors.simd sequences specialized-arrays ;
+SIMD: float-4
SPECIALIZED-ARRAY: float-4
IN: benchmark.simd-1
] with-scope
] unit-test
-[ { "./factor.com" "-i=boot.x86.32.image" "-no-user-init" "-sse-version=30" } ] [
+[ { "./factor.com" "-i=boot.x86.32.image" "-no-user-init" } ] [
[
"winnt" target-os set
"x86.32" target-cpu set
factor-vm ,
"-i=" boot-image-name append ,
"-no-user-init" ,
- target-cpu get { "x86.32" "x86.64" } member? [ "-sse-version=30" , ] when
] { } make ;
: boot ( -- )
DEF(bool,sse_version,(void)):
mov $0x1,RETURN_REG
cpuid
- /* test $0x100000,%ecx
+ test $0x100000,%ecx
jnz sse_42
test $0x80000,%ecx
jnz sse_41
test $0x200,%ecx
- jnz ssse_3 */
+ jnz ssse_3
test $0x1,%ecx
jnz sse_3
test $0x4000000,%edx