- Rename SIMD types and register representations: <type>-<count> rather than <count><type>-array
- Make a functor to define 256-bit vector types, use it to define float-8 type
- Make SIMD instructions pure-insns so that they participate in value numbering
4 >>align
"box_float" >>boxer
"to_float" >>unboxer
- single-float-rep >>rep
+ float-rep >>rep
[ >float ] >>unboxer-quot
"float" set-array-class
"float" define-primitive-type
8 >>align
"box_double" >>boxer
"to_double" >>unboxer
- double-float-rep >>rep
+ double-rep >>rep
[ >float ] >>unboxer-quot
"double" set-array-class
"double" define-primitive-type
! Float arithmetic
PURE-INSN: ##unbox-float
-def: dst/double-float-rep
+def: dst/double-rep
use: src/int-rep ;
PURE-INSN: ##box-float
def: dst/int-rep
-use: src/double-float-rep
+use: src/double-rep
temp: temp/int-rep ;
PURE-INSN: ##add-float
-def: dst/double-float-rep
-use: src1/double-float-rep src2/double-float-rep ;
+def: dst/double-rep
+use: src1/double-rep src2/double-rep ;
PURE-INSN: ##sub-float
-def: dst/double-float-rep
-use: src1/double-float-rep src2/double-float-rep ;
+def: dst/double-rep
+use: src1/double-rep src2/double-rep ;
PURE-INSN: ##mul-float
-def: dst/double-float-rep
-use: src1/double-float-rep src2/double-float-rep ;
+def: dst/double-rep
+use: src1/double-rep src2/double-rep ;
PURE-INSN: ##div-float
-def: dst/double-float-rep
-use: src1/double-float-rep src2/double-float-rep ;
+def: dst/double-rep
+use: src1/double-rep src2/double-rep ;
PURE-INSN: ##min-float
-def: dst/double-float-rep
-use: src1/double-float-rep src2/double-float-rep ;
+def: dst/double-rep
+use: src1/double-rep src2/double-rep ;
PURE-INSN: ##max-float
-def: dst/double-float-rep
-use: src1/double-float-rep src2/double-float-rep ;
+def: dst/double-rep
+use: src1/double-rep src2/double-rep ;
PURE-INSN: ##sqrt
-def: dst/double-float-rep
-use: src/double-float-rep ;
+def: dst/double-rep
+use: src/double-rep ;
! libc intrinsics
PURE-INSN: ##unary-float-function
-def: dst/double-float-rep
-use: src/double-float-rep
+def: dst/double-rep
+use: src/double-rep
literal: func ;
PURE-INSN: ##binary-float-function
-def: dst/double-float-rep
-use: src1/double-float-rep src2/double-float-rep
+def: dst/double-rep
+use: src1/double-rep src2/double-rep
literal: func ;
! Single/double float conversion
PURE-INSN: ##single>double-float
-def: dst/double-float-rep
-use: src/single-float-rep ;
+def: dst/double-rep
+use: src/float-rep ;
PURE-INSN: ##double>single-float
-def: dst/single-float-rep
-use: src/double-float-rep ;
+def: dst/float-rep
+use: src/double-rep ;
! Float/integer conversion
PURE-INSN: ##float>integer
def: dst/int-rep
-use: src/double-float-rep ;
+use: src/double-rep ;
PURE-INSN: ##integer>float
-def: dst/double-float-rep
+def: dst/double-rep
use: src/int-rep ;
! SIMD operations
-INSN: ##box-vector
+PURE-INSN: ##box-vector
def: dst/int-rep
use: src
literal: rep
temp: temp/int-rep ;
-INSN: ##unbox-vector
+PURE-INSN: ##unbox-vector
def: dst
use: src/int-rep
literal: rep ;
-INSN: ##broadcast-vector
+PURE-INSN: ##broadcast-vector
def: dst
use: src/scalar-rep
literal: rep ;
-INSN: ##gather-vector-2
+PURE-INSN: ##gather-vector-2
def: dst
use: src1/scalar-rep src2/scalar-rep
literal: rep ;
-INSN: ##gather-vector-4
+PURE-INSN: ##gather-vector-4
def: dst
use: src1/scalar-rep src2/scalar-rep src3/scalar-rep src4/scalar-rep
literal: rep ;
-INSN: ##add-vector
+PURE-INSN: ##add-vector
def: dst
use: src1 src2
literal: rep ;
-INSN: ##sub-vector
+PURE-INSN: ##sub-vector
def: dst
use: src1 src2
literal: rep ;
-INSN: ##mul-vector
+PURE-INSN: ##mul-vector
def: dst
use: src1 src2
literal: rep ;
-INSN: ##div-vector
+PURE-INSN: ##div-vector
def: dst
use: src1 src2
literal: rep ;
-INSN: ##min-vector
+PURE-INSN: ##min-vector
def: dst
use: src1 src2
literal: rep ;
-INSN: ##max-vector
+PURE-INSN: ##max-vector
def: dst
use: src1 src2
literal: rep ;
-INSN: ##sqrt-vector
+PURE-INSN: ##sqrt-vector
def: dst
use: src
literal: rep ;
-INSN: ##horizontal-add-vector
+PURE-INSN: ##horizontal-add-vector
def: dst/scalar-rep
use: src
literal: rep ;
use: src/int-rep ;
INSN: ##alien-float
-def: dst/single-float-rep
+def: dst/float-rep
use: src/int-rep ;
INSN: ##alien-double
-def: dst/double-float-rep
+def: dst/double-rep
use: src/int-rep ;
INSN: ##alien-vector
use: src/int-rep value/int-rep ;
INSN: ##set-alien-float
-use: src/int-rep value/single-float-rep ;
+use: src/int-rep value/float-rep ;
INSN: ##set-alien-double
-use: src/int-rep value/double-float-rep ;
+use: src/int-rep value/double-rep ;
INSN: ##set-alien-vector
use: src/int-rep value
temp: temp/int-rep ;
INSN: ##compare-float-branch
-use: src1/double-float-rep src2/double-float-rep
+use: src1/double-rep src2/double-rep
literal: cc ;
PURE-INSN: ##compare-float
def: dst/int-rep
-use: src1/double-float-rep src2/double-float-rep
+use: src1/double-rep src2/double-rep
literal: cc
temp: temp/int-rep ;
: emit-alien-float-getter ( node rep -- )
'[
_ {
- { single-float-rep [ ^^alien-float ] }
- { double-float-rep [ ^^alien-double ] }
+ { float-rep [ ^^alien-float ] }
+ { double-rep [ ^^alien-double ] }
} case
] inline-alien-getter ;
: emit-alien-float-setter ( node rep -- )
'[
_ {
- { single-float-rep [ ##set-alien-float ] }
- { double-float-rep [ ##set-alien-double ] }
+ { float-rep [ ##set-alien-float ] }
+ { double-rep [ ##set-alien-double ] }
} case
] inline-alien-float-setter ;
{ math.private:float= [ drop cc= emit-float-comparison ] }
{ math.private:float>fixnum [ drop emit-float>fixnum ] }
{ math.private:fixnum>float [ drop emit-fixnum>float ] }
- { alien.accessors:alien-float [ single-float-rep emit-alien-float-getter ] }
- { alien.accessors:set-alien-float [ single-float-rep emit-alien-float-setter ] }
- { alien.accessors:alien-double [ double-float-rep emit-alien-float-getter ] }
- { alien.accessors:set-alien-double [ double-float-rep emit-alien-float-setter ] }
+ { alien.accessors:alien-float [ float-rep emit-alien-float-getter ] }
+ { alien.accessors:set-alien-float [ float-rep emit-alien-float-setter ] }
+ { alien.accessors:alien-double [ double-rep emit-alien-float-getter ] }
+ { alien.accessors:set-alien-double [ double-rep emit-alien-float-setter ] }
} enable-intrinsics ;
: enable-fsqrt ( -- )
H{ } spill-slots set
H{
- { 1 single-float-rep }
- { 2 single-float-rep }
- { 3 single-float-rep }
+ { 1 float-rep }
+ { 2 float-rep }
+ { 3 float-rep }
} representations set
[
compiler.cfg.representations.preferred ;
IN: compiler.cfg.representations
-[ { double-float-rep double-float-rep } ] [
+[ { double-rep double-rep } ] [
T{ ##add-float
{ dst 5 }
{ src1 3 }
} uses-vreg-reps
] unit-test
-[ double-float-rep ] [
+[ double-rep ] [
T{ ##alien-double
{ dst 5 }
{ src 3 }
GENERIC: emit-box ( dst src rep -- )
GENERIC: emit-unbox ( dst src rep -- )
-M: single-float-rep emit-box
+M: float-rep emit-box
drop
- [ double-float-rep next-vreg-rep dup ] dip ##single>double-float
+ [ double-rep next-vreg-rep dup ] dip ##single>double-float
int-rep next-vreg-rep ##box-float ;
-M: single-float-rep emit-unbox
+M: float-rep emit-unbox
drop
- [ double-float-rep next-vreg-rep dup ] dip ##unbox-float
+ [ double-rep next-vreg-rep dup ] dip ##unbox-float
##double>single-float ;
-M: double-float-rep emit-box
+M: double-rep emit-box
drop
int-rep next-vreg-rep ##box-float ;
-M: double-float-rep emit-unbox
+M: double-rep emit-unbox
drop ##unbox-float ;
M: vector-rep emit-box
{ [ over int-rep eq? ] [ nip emit-box ] }
[
2array {
- { { double-float-rep single-float-rep } [ ##single>double-float ] }
- { { single-float-rep double-float-rep } [ ##double>single-float ] }
+ { { double-rep float-rep } [ ##single>double-float ] }
+ { { float-rep double-rep } [ ##double>single-float ] }
[ first2 bad-conversion ]
} case
]
[
V{
- T{ ##copy f 1 2 double-float-rep }
+ T{ ##copy f 1 2 double-rep }
T{ ##sub-float f 1 1 3 }
}
] [
H{
- { 1 double-float-rep }
- { 2 double-float-rep }
- { 3 double-float-rep }
+ { 1 double-rep }
+ { 2 double-rep }
+ { 3 double-rep }
} clone representations set
{
T{ ##sub-float f 1 2 3 }
[
V{
- T{ ##copy f 1 2 double-float-rep }
+ T{ ##copy f 1 2 double-rep }
T{ ##mul-float f 1 1 1 }
}
] [
H{
- { 1 double-float-rep }
- { 2 double-float-rep }
+ { 1 double-rep }
+ { 2 double-rep }
} clone representations set
{
T{ ##mul-float f 1 2 2 }
M: int-rep next-fastcall-param
int-regs inc [ ?dummy-stack-params ] [ ?dummy-fp-params ] bi ;
-M: single-float-rep next-fastcall-param
+M: float-rep next-fastcall-param
float-regs inc [ ?dummy-stack-params ] [ ?dummy-int-params ] bi ;
-M: double-float-rep next-fastcall-param
+M: double-rep next-fastcall-param
float-regs inc [ ?dummy-stack-params ] [ ?dummy-int-params ] bi ;
GENERIC: reg-class-full? ( reg-class -- ? )
V{
T{ ##load-reference f 4 1.5 }
T{ ##unbox-float f 1 4 }
- T{ ##copy f 2 1 double-float-rep }
+ T{ ##copy f 2 1 double-rep }
T{ ##box-float f 3 2 }
T{ ##copy f 0 3 int-rep }
} compile-test-bb
\ (simd-sum) [
nip dup literal?>> [
literal>> scalar-rep-of {
- { single-float-rep [ float ] }
- { double-float-rep [ float ] }
+ { float-rep [ float ] }
+ { double-rep [ float ] }
} case
] [ drop real ] if
<class-info>
! Floating point registers can contain data with
! one of these representations
-SINGLETONS: single-float-rep double-float-rep ;
+SINGLETONS: float-rep double-rep ;
+! On x86, floating point registers are really vector registers
SINGLETONS:
-4float-array-rep
-2double-array-rep
-16char-array-rep
-16uchar-array-rep
-8short-array-rep
-8ushort-array-rep
-4int-array-rep
-4uint-array-rep ;
+float-4-rep
+double-2-rep
+char-16-rep
+uchar-16-rep
+short-8-rep
+ushort-8-rep
+int-4-rep
+uint-4-rep ;
UNION: vector-rep
-4float-array-rep
-2double-array-rep
-16char-array-rep
-16uchar-array-rep
-8short-array-rep
-8ushort-array-rep
-4int-array-rep
-4uint-array-rep ;
+float-4-rep
+double-2-rep
+char-16-rep
+uchar-16-rep
+short-8-rep
+ushort-8-rep
+int-4-rep
+uint-4-rep ;
UNION: representation
any-rep
tagged-rep
int-rep
-single-float-rep
-double-float-rep
+float-rep
+double-rep
vector-rep ;
! Register classes
M: tagged-rep reg-class-of drop int-regs ;
M: int-rep reg-class-of drop int-regs ;
-M: single-float-rep reg-class-of drop float-regs ;
-M: double-float-rep reg-class-of drop float-regs ;
+M: float-rep reg-class-of drop float-regs ;
+M: double-rep reg-class-of drop float-regs ;
M: vector-rep reg-class-of drop float-regs ;
M: stack-params reg-class-of drop stack-params ;
M: tagged-rep rep-size drop cell ;
M: int-rep rep-size drop cell ;
-M: single-float-rep rep-size drop 4 ;
-M: double-float-rep rep-size drop 8 ;
+M: float-rep rep-size drop 4 ;
+M: double-rep rep-size drop 8 ;
M: stack-params rep-size drop cell ;
M: vector-rep rep-size drop 16 ;
GENERIC: scalar-rep-of ( rep -- rep' )
-M: 4float-array-rep scalar-rep-of drop single-float-rep ;
-M: 2double-array-rep scalar-rep-of drop double-float-rep ;
+M: float-4-rep scalar-rep-of drop float-rep ;
+M: double-2-rep scalar-rep-of drop double-rep ;
! Mapping from register class to machine registers
HOOK: machine-registers cpu ( -- assoc )
M: ppc %copy ( dst src rep -- )
{
{ int-rep [ MR ] }
- { double-float-rep [ FMR ] }
+ { double-rep [ FMR ] }
} case ;
M: ppc %unbox-float ( dst src -- ) float-offset LFD ;
: load-from-frame ( dst n rep -- )
{
{ int-rep [ [ 1 ] dip LWZ ] }
- { single-float-rep [ [ 1 ] dip LFS ] }
- { double-float-rep [ [ 1 ] dip LFD ] }
+ { float-rep [ [ 1 ] dip LFS ] }
+ { double-rep [ [ 1 ] dip LFD ] }
{ stack-params [ [ 0 1 ] dip LWZ [ 0 1 ] dip param@ STW ] }
} case ;
: store-to-frame ( src n rep -- )
{
{ int-rep [ [ 1 ] dip STW ] }
- { single-float-rep [ [ 1 ] dip STFS ] }
- { double-float-rep [ [ 1 ] dip STFD ] }
+ { float-rep [ [ 1 ] dip STFS ] }
+ { double-rep [ [ 1 ] dip STFD ] }
{ stack-params [ [ [ 0 1 ] dip next-param@ LWZ 0 1 ] dip STW ] }
} case ;
M: int-rep load-return-reg drop EAX swap next-stack@ MOV ;
M: int-rep store-return-reg drop stack@ EAX MOV ;
-M: single-float-rep push-return-reg drop ESP 4 SUB ESP [] FSTPS ;
-M: single-float-rep load-return-reg drop next-stack@ FLDS ;
-M: single-float-rep store-return-reg drop stack@ FSTPS ;
+M: float-rep push-return-reg drop ESP 4 SUB ESP [] FSTPS ;
+M: float-rep load-return-reg drop next-stack@ FLDS ;
+M: float-rep store-return-reg drop stack@ FSTPS ;
-M: double-float-rep push-return-reg drop ESP 8 SUB ESP [] FSTPL ;
-M: double-float-rep load-return-reg drop next-stack@ FLDL ;
-M: double-float-rep store-return-reg drop stack@ FSTPL ;
+M: double-rep push-return-reg drop ESP 8 SUB ESP [] FSTPL ;
+M: double-rep load-return-reg drop next-stack@ FLDL ;
+M: double-rep store-return-reg drop stack@ FSTPL ;
: align-sub ( n -- )
[ align-stack ] keep - decr-stack-reg ;
[ float-regs param-regs nth ] [ n>> spill@ ] bi* MOVSD ;
: float-function-return ( reg -- )
- float-regs return-reg double-float-rep copy-register ;
+ float-regs return-reg double-rep copy-register ;
M:: x86.64 %unary-float-function ( dst src func -- )
0 src float-function-param
M: int-rep copy-register* drop MOV ;
M: tagged-rep copy-register* drop MOV ;
-M: single-float-rep copy-register* drop MOVSS ;
-M: double-float-rep copy-register* drop MOVSD ;
-M: 4float-array-rep copy-register* drop MOVUPS ;
-M: 2double-array-rep copy-register* drop MOVUPD ;
+M: float-rep copy-register* drop MOVSS ;
+M: double-rep copy-register* drop MOVSD ;
+M: float-4-rep copy-register* drop MOVUPS ;
+M: double-2-rep copy-register* drop MOVUPD ;
M: vector-rep copy-register* drop MOVDQU ;
: copy-register ( dst src rep -- )
M: x86 %broadcast-vector ( dst src rep -- )
{
- { 4float-array-rep [ [ MOVAPS ] [ drop dup 0 SHUFPS ] 2bi ] }
- { 2double-array-rep [ [ MOVAPD ] [ drop dup 0 SHUFPD ] 2bi ] }
+ { float-4-rep [ [ MOVAPS ] [ drop dup 0 SHUFPS ] 2bi ] }
+ { double-2-rep [ [ MOVAPD ] [ drop dup 0 SHUFPD ] 2bi ] }
} case ;
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
rep {
{
- 4float-array-rep
+ float-4-rep
[
dst src1 MOVSS
dst src2 UNPCKLPS
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
rep {
{
- 2double-array-rep
+ double-2-rep
[
dst src1 MOVAPD
dst src2 0 SHUFPD
M: x86 %add-vector ( dst src1 src2 rep -- )
{
- { 4float-array-rep [ ADDPS ] }
- { 2double-array-rep [ ADDPD ] }
- { 16char-array-rep [ PADDB ] }
- { 16uchar-array-rep [ PADDB ] }
- { 8short-array-rep [ PADDW ] }
- { 8ushort-array-rep [ PADDW ] }
- { 4int-array-rep [ PADDD ] }
- { 4uint-array-rep [ PADDD ] }
+ { float-4-rep [ ADDPS ] }
+ { double-2-rep [ ADDPD ] }
+ { char-16-rep [ PADDB ] }
+ { uchar-16-rep [ PADDB ] }
+ { short-8-rep [ PADDW ] }
+ { ushort-8-rep [ PADDW ] }
+ { int-4-rep [ PADDD ] }
+ { uint-4-rep [ PADDD ] }
} case drop ;
M: x86 %sub-vector ( dst src1 src2 rep -- )
{
- { 4float-array-rep [ SUBPS ] }
- { 2double-array-rep [ SUBPD ] }
- { 16char-array-rep [ PSUBB ] }
- { 16uchar-array-rep [ PSUBB ] }
- { 8short-array-rep [ PSUBW ] }
- { 8ushort-array-rep [ PSUBW ] }
- { 4int-array-rep [ PSUBD ] }
- { 4uint-array-rep [ PSUBD ] }
+ { float-4-rep [ SUBPS ] }
+ { double-2-rep [ SUBPD ] }
+ { char-16-rep [ PSUBB ] }
+ { uchar-16-rep [ PSUBB ] }
+ { short-8-rep [ PSUBW ] }
+ { ushort-8-rep [ PSUBW ] }
+ { int-4-rep [ PSUBD ] }
+ { uint-4-rep [ PSUBD ] }
} case drop ;
M: x86 %mul-vector ( dst src1 src2 rep -- )
{
- { 4float-array-rep [ MULPS ] }
- { 2double-array-rep [ MULPD ] }
- { 4int-array-rep [ PMULLW ] }
+ { float-4-rep [ MULPS ] }
+ { double-2-rep [ MULPD ] }
+ { int-4-rep [ PMULLW ] }
} case drop ;
M: x86 %div-vector ( dst src1 src2 rep -- )
{
- { 4float-array-rep [ DIVPS ] }
- { 2double-array-rep [ DIVPD ] }
+ { float-4-rep [ DIVPS ] }
+ { double-2-rep [ DIVPD ] }
} case drop ;
M: x86 %min-vector ( dst src1 src2 rep -- )
{
- { 4float-array-rep [ MINPS ] }
- { 2double-array-rep [ MINPD ] }
+ { float-4-rep [ MINPS ] }
+ { double-2-rep [ MINPD ] }
} case drop ;
M: x86 %max-vector ( dst src1 src2 rep -- )
{
- { 4float-array-rep [ MAXPS ] }
- { 2double-array-rep [ MAXPD ] }
+ { float-4-rep [ MAXPS ] }
+ { double-2-rep [ MAXPD ] }
} case drop ;
M: x86 %sqrt-vector ( dst src rep -- )
{
- { 4float-array-rep [ SQRTPS ] }
- { 2double-array-rep [ SQRTPD ] }
+ { float-4-rep [ SQRTPS ] }
+ { double-2-rep [ SQRTPD ] }
} case ;
M: x86 %horizontal-add-vector ( dst src rep -- )
{
- { 4float-array-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
- { 2double-array-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
+ { float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] }
+ { double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] }
} case ;
M:: x86 %unbox-any-c-ptr ( dst src temp -- )
USING: cpu.architecture math.vectors.simd
math.vectors.simd.intrinsics accessors math.vectors.simd.alien
kernel classes.struct tools.test compiler sequences byte-arrays
-alien math kernel.private specialized-arrays.float ;
+alien math kernel.private specialized-arrays.float combinators ;
! Vector alien intrinsics
-[ 4float-array{ 1 2 3 4 } ] [
+[ float-4{ 1 2 3 4 } ] [
[
- 4float-array{ 1 2 3 4 }
- underlying>> 0 4float-array-rep alien-vector
- ] compile-call 4float-array boa
+ float-4{ 1 2 3 4 }
+ underlying>> 0 float-4-rep alien-vector
+ ] compile-call float-4 boa
] unit-test
[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [
[
0 [
{ byte-array c-ptr fixnum } declare
- 4float-array-rep set-alien-vector
+ float-4-rep set-alien-vector
] compile-call
] keep
] unit-test
[
float-array{ 1 2 3 4 } underlying>>
float-array{ 4 3 2 1 } clone
- [ underlying>> 0 4float-array-rep set-alien-vector ] keep
+ [ underlying>> 0 float-4-rep set-alien-vector ] keep
] compile-call
] unit-test
STRUCT: simd-struct
-{ x 4float-array }
-{ y 2double-array }
-{ z 4double-array } ;
+{ x float-4 }
+{ y double-2 }
+{ z double-4 }
+{ w float-8 } ;
[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
-[ 4float-array{ 1 2 3 4 } 2double-array{ 2 1 } 4double-array{ 4 3 2 1 } ] [
+[
+ float-4{ 1 2 3 4 }
+ double-2{ 2 1 }
+ double-4{ 4 3 2 1 }
+ float-8{ 1 2 3 4 5 6 7 8 }
+] [
simd-struct <struct>
- 4float-array{ 1 2 3 4 } >>x
- 2double-array{ 2 1 } >>y
- 4double-array{ 4 3 2 1 } >>z
- [ x>> ] [ y>> ] [ z>> ] tri
+ float-4{ 1 2 3 4 } >>x
+ double-2{ 2 1 } >>y
+ double-4{ 4 3 2 1 } >>z
+ float-8{ 1 2 3 4 5 6 7 8 } >>w
+ { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] unit-test
-[ 4float-array{ 1 2 3 4 } 2double-array{ 2 1 } 4double-array{ 4 3 2 1 } ] [
+[
+ float-4{ 1 2 3 4 }
+ double-2{ 2 1 }
+ double-4{ 4 3 2 1 }
+ float-8{ 1 2 3 4 5 6 7 8 }
+] [
[
simd-struct <struct>
- 4float-array{ 1 2 3 4 } >>x
- 2double-array{ 2 1 } >>y
- 4double-array{ 4 3 2 1 } >>z
- [ x>> ] [ y>> ] [ z>> ] tri
+ float-4{ 1 2 3 4 } >>x
+ double-2{ 2 1 } >>y
+ double-4{ 4 3 2 1 } >>z
+ float-8{ 1 2 3 4 5 6 7 8 } >>w
+ { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] compile-call
] unit-test
math.vectors.simd.intrinsics ;
IN: math.vectors.simd.alien
-:: define-simd-type ( class rep -- )
+:: define-simd-128-type ( class rep -- )
<c-type>
byte-array >>class
class >>boxed-class
- [ rep alien-vector ] >>getter
+ [ rep alien-vector class boa ] >>getter
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
16 >>size
8 >>align
rep >>rep
- [ class boa ] >>boxer-quot
- [ underlying>> ] >>unboxer-quot
class name>> typedef ;
-: define-4double-array-type ( -- )
+:: define-simd-256-type ( class rep -- )
<c-type>
- 4double-array >>class
- 4double-array >>boxed-class
+ class >>class
+ class >>boxed-class
[
- [ 2double-array-rep alien-vector ]
- [ 16 + >fixnum 2double-array-rep alien-vector ] 2bi
- 4double-array boa
+ [ rep alien-vector ]
+ [ 16 + >fixnum rep alien-vector ] 2bi
+ class boa
] >>getter
[
- [ [ underlying1>> ] 2dip 2double-array-rep set-alien-vector ]
- [ [ underlying2>> ] 2dip 16 + >fixnum 2double-array-rep set-alien-vector ]
+ [ [ underlying1>> ] 2dip rep set-alien-vector ]
+ [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ]
3bi
] >>setter
32 >>size
8 >>align
- 2double-array-rep >>rep
- "4double-array" typedef ;
+ rep >>rep
+ class name>> typedef ;
[
- 4float-array 4float-array-rep define-simd-type
- 2double-array 2double-array-rep define-simd-type
- define-4double-array-type
+ float-4 float-4-rep define-simd-128-type
+ double-2 double-2-rep define-simd-128-type
+ float-8 float-4-rep define-simd-256-type
+ double-4 double-2-rep define-simd-256-type
] with-compilation-unit
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien.c-types byte-arrays classes functors
kernel math parser prettyprint.custom sequences
-sequences.private ;
+sequences.private literals ;
IN: math.vectors.simd.functor
ERROR: bad-length got expected ;
-FUNCTOR: define-simd-type ( T N -- )
+FUNCTOR: define-simd-128 ( T -- )
-A DEFINES-CLASS ${N}${T}-array
+N [ 16 T heap-size /i ]
+
+A DEFINES-CLASS ${T}-${N}
<A> DEFINES <${A}>
(A) DEFINES (${A})
>A DEFINES >${A}
NTH [ T dup c-type-getter-boxer array-accessor ]
SET-NTH [ T dup c-setter array-accessor ]
-BYTES [ T heap-size N * ]
-INITIAL [ BYTES <byte-array> ]
+A-rep IS ${A}-rep
+A-vv->v-op DEFINES ${A}-vv->v-op
+A-v->n-op DEFINES ${A}-v->n-op
WHERE
TUPLE: A
-{ underlying byte-array read-only initial: INITIAL } ;
+{ underlying byte-array read-only initial: $[ 16 <byte-array> ] } ;
-: <A> ( -- simd-array ) BYTES <byte-array> A boa ; inline
+: <A> ( -- simd-array ) 16 <byte-array> \ A boa ; inline
-: (A) ( -- simd-array ) BYTES (byte-array) A boa ; inline
+: (A) ( -- simd-array ) 16 (byte-array) \ A boa ; inline
M: A clone underlying>> clone \ A boa ; inline
INSTANCE: A sequence
+<PRIVATE
+
+: A-vv->v-op ( v1 v2 quot -- v3 )
+ [ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
+
+: A-v->n-op ( v quot -- n )
+ [ underlying>> A-rep ] dip call ; inline
+
+PRIVATE>
+
+;FUNCTOR
+
+! Synthesize 256-bit vectors from a pair of 128-bit vectors
+FUNCTOR: define-simd-256 ( T -- )
+
+N [ 32 T heap-size /i ]
+
+N/2 [ N 2 / ]
+A/2 IS ${T}-${N/2}
+
+A DEFINES-CLASS ${T}-${N}
+<A> DEFINES <${A}>
+(A) DEFINES (${A})
+>A DEFINES >${A}
+A{ DEFINES ${A}{
+
+A-deref DEFINES ${A}-deref
+
+A-rep IS ${A/2}-rep
+A-vv->v-op DEFINES ${A}-vv->v-op
+A-v->n-op DEFINES ${A}-v->n-op
+
+WHERE
+
+SLOT: underlying1
+SLOT: underlying2
+
+TUPLE: A
+{ underlying1 byte-array initial: $[ 16 <byte-array> ] read-only }
+{ underlying2 byte-array initial: $[ 16 <byte-array> ] read-only } ;
+
+: <A> ( -- simd-array )
+ 16 <byte-array> 16 <byte-array> \ A boa ; inline
+
+: (A) ( -- simd-array )
+ 16 (byte-array) 16 (byte-array) \ A boa ; inline
+
+M: A clone
+ [ underlying1>> clone ] [ underlying2>> clone ] bi
+ \ A boa ; inline
+
+M: A length drop N ; inline
+
+<PRIVATE
+
+: A-deref ( n seq -- n' seq' )
+ over N/2 < [ underlying1>> ] [ [ N/2 - ] dip underlying2>> ] if A/2 boa ; inline
+
+PRIVATE>
+
+M: A nth-unsafe A-deref nth-unsafe ; inline
+
+M: A set-nth-unsafe A-deref set-nth-unsafe ; inline
+
+: >A ( seq -- simd-array ) \ A new clone-like ;
+
+M: A like drop dup \ A instance? [ >A ] unless ; inline
+
+M: A new-sequence drop dup N = [ drop (A) ] [ N bad-length ] if ; inline
+
+M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ;
+
+M: A byte-length drop 32 ; inline
+
+SYNTAX: A{ \ } [ >A ] parse-literal ;
+
+M: A pprint-delims drop \ A{ \ } ;
+
+M: A >pprint-sequence ;
+
+M: A pprint* pprint-object ;
+
+INSTANCE: A sequence
+
+<PRIVATE
+
+: A-vv->v-op ( v1 v2 quot -- v3 )
+ [ [ [ underlying1>> ] bi@ A-rep ] dip call ]
+ [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
+ \ A boa ; inline
+
+: A-v->n-op ( v1 quot scalar-quot -- v2 )
+ [
+ [ [ underlying1>> A-rep ] dip call ]
+ [ [ underlying2>> A-rep ] dip call ] 2bi
+ ] dip call ; inline
+
+PRIVATE>
+
;FUNCTOR
math.vectors math.functions kernel.private compiler sequences
tools.test compiler.tree.debugger accessors kernel ;
-[ 4float-array{ 0 0 0 0 } ] [ 4float-array new ] unit-test
+[ float-4{ 0 0 0 0 } ] [ float-4 new ] unit-test
-[ V{ float } ] [ [ { 4float-array } declare norm-sq ] final-classes ] unit-test
+[ float-4{ 0 0 0 0 } ] [ [ float-4 new ] compile-call ] unit-test
-[ V{ float } ] [ [ { 4float-array } declare norm ] final-classes ] unit-test
+[ V{ float } ] [ [ { float-4 } declare norm-sq ] final-classes ] unit-test
-[ 4float-array{ 12 12 12 12 } ] [
- 12 [ 4float-array-with ] compile-call
+[ V{ float } ] [ [ { float-4 } declare norm ] final-classes ] unit-test
+
+[ float-4{ 12 12 12 12 } ] [
+ 12 [ float-4-with ] compile-call
] unit-test
-[ 4float-array{ 1 2 3 4 } ] [
- 1 2 3 4 [ 4float-array-boa ] compile-call
+[ float-4{ 1 2 3 4 } ] [
+ 1 2 3 4 [ float-4-boa ] compile-call
] unit-test
-[ 4float-array{ 11 22 33 44 } ] [
- 4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 }
- [ { 4float-array 4float-array } declare v+ ] compile-call
+[ float-4{ 11 22 33 44 } ] [
+ float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
+ [ { float-4 float-4 } declare v+ ] compile-call
] unit-test
-[ 4float-array{ -9 -18 -27 -36 } ] [
- 4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 }
- [ { 4float-array 4float-array } declare v- ] compile-call
+[ float-4{ -9 -18 -27 -36 } ] [
+ float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
+ [ { float-4 float-4 } declare v- ] compile-call
] unit-test
-[ 4float-array{ 10 40 90 160 } ] [
- 4float-array{ 1 2 3 4 } 4float-array{ 10 20 30 40 }
- [ { 4float-array 4float-array } declare v* ] compile-call
+[ float-4{ 10 40 90 160 } ] [
+ float-4{ 1 2 3 4 } float-4{ 10 20 30 40 }
+ [ { float-4 float-4 } declare v* ] compile-call
] unit-test
-[ 4float-array{ 10 100 1000 10000 } ] [
- 4float-array{ 100 2000 30000 400000 } 4float-array{ 10 20 30 40 }
- [ { 4float-array 4float-array } declare v/ ] compile-call
+[ float-4{ 10 100 1000 10000 } ] [
+ float-4{ 100 2000 30000 400000 } float-4{ 10 20 30 40 }
+ [ { float-4 float-4 } declare v/ ] compile-call
] unit-test
-[ 4float-array{ -10 -20 -30 -40 } ] [
- 4float-array{ -10 20 -30 40 } 4float-array{ 10 -20 30 -40 }
- [ { 4float-array 4float-array } declare vmin ] compile-call
+[ float-4{ -10 -20 -30 -40 } ] [
+ float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
+ [ { float-4 float-4 } declare vmin ] compile-call
] unit-test
-[ 4float-array{ 10 20 30 40 } ] [
- 4float-array{ -10 20 -30 40 } 4float-array{ 10 -20 30 -40 }
- [ { 4float-array 4float-array } declare vmax ] compile-call
+[ float-4{ 10 20 30 40 } ] [
+ float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 }
+ [ { float-4 float-4 } declare vmax ] compile-call
] unit-test
[ 10.0 ] [
- 4float-array{ 1 2 3 4 }
- [ { 4float-array } declare sum ] compile-call
+ float-4{ 1 2 3 4 }
+ [ { float-4 } declare sum ] compile-call
] unit-test
[ 13.0 ] [
- 4float-array{ 1 2 3 4 }
- [ { 4float-array } declare sum 3.0 + ] compile-call
+ float-4{ 1 2 3 4 }
+ [ { float-4 } declare sum 3.0 + ] compile-call
] unit-test
[ 8.0 ] [
- 4float-array{ 1 2 3 4 } 4float-array{ 2 0 2 0 }
- [ { 4float-array 4float-array } declare v. ] compile-call
+ float-4{ 1 2 3 4 } float-4{ 2 0 2 0 }
+ [ { float-4 float-4 } declare v. ] compile-call
] unit-test
-[ 4float-array{ 5 10 15 20 } ] [
- 5.0 4float-array{ 1 2 3 4 }
- [ { float 4float-array } declare n*v ] compile-call
+[ float-4{ 5 10 15 20 } ] [
+ 5.0 float-4{ 1 2 3 4 }
+ [ { float float-4 } declare n*v ] compile-call
] unit-test
-[ 4float-array{ 5 10 15 20 } ] [
- 4float-array{ 1 2 3 4 } 5.0
- [ { float 4float-array } declare v*n ] compile-call
+[ float-4{ 5 10 15 20 } ] [
+ float-4{ 1 2 3 4 } 5.0
+ [ { float float-4 } declare v*n ] compile-call
] unit-test
-[ 4float-array{ 10 5 2 5 } ] [
- 10.0 4float-array{ 1 2 5 2 }
- [ { float 4float-array } declare n/v ] compile-call
+[ float-4{ 10 5 2 5 } ] [
+ 10.0 float-4{ 1 2 5 2 }
+ [ { float float-4 } declare n/v ] compile-call
] unit-test
-[ 4float-array{ 0.5 1 1.5 2 } ] [
- 4float-array{ 1 2 3 4 } 2
- [ { float 4float-array } declare v/n ] compile-call
+[ float-4{ 0.5 1 1.5 2 } ] [
+ float-4{ 1 2 3 4 } 2
+ [ { float float-4 } declare v/n ] compile-call
] unit-test
-[ 4float-array{ 1 0 0 0 } ] [
- 4float-array{ 10 0 0 0 }
- [ { 4float-array } declare normalize ] compile-call
+[ float-4{ 1 0 0 0 } ] [
+ float-4{ 10 0 0 0 }
+ [ { float-4 } declare normalize ] compile-call
] unit-test
[ 30.0 ] [
- 4float-array{ 1 2 3 4 }
- [ { 4float-array } declare norm-sq ] compile-call
+ float-4{ 1 2 3 4 }
+ [ { float-4 } declare norm-sq ] compile-call
] unit-test
[ t ] [
- 4float-array{ 1 0 0 0 }
- 4float-array{ 0 1 0 0 }
- [ { 4float-array 4float-array } declare distance ] compile-call
+ float-4{ 1 0 0 0 }
+ float-4{ 0 1 0 0 }
+ [ { float-4 float-4 } declare distance ] compile-call
2 sqrt 1.0e-6 ~
] unit-test
-[ 2double-array{ 12 12 } ] [
- 12 [ 2double-array-with ] compile-call
+[ double-2{ 12 12 } ] [
+ 12 [ double-2-with ] compile-call
] unit-test
-[ 2double-array{ 1 2 } ] [
- 1 2 [ 2double-array-boa ] compile-call
+[ double-2{ 1 2 } ] [
+ 1 2 [ double-2-boa ] compile-call
] unit-test
-[ 2double-array{ 11 22 } ] [
- 2double-array{ 1 2 } 2double-array{ 10 20 }
- [ { 2double-array 2double-array } declare v+ ] compile-call
+[ double-2{ 11 22 } ] [
+ double-2{ 1 2 } double-2{ 10 20 }
+ [ { double-2 double-2 } declare v+ ] compile-call
] unit-test
-[ 2double-array{ -9 -18 } ] [
- 2double-array{ 1 2 } 2double-array{ 10 20 }
- [ { 2double-array 2double-array } declare v- ] compile-call
+[ double-2{ -9 -18 } ] [
+ double-2{ 1 2 } double-2{ 10 20 }
+ [ { double-2 double-2 } declare v- ] compile-call
] unit-test
-[ 2double-array{ 10 40 } ] [
- 2double-array{ 1 2 } 2double-array{ 10 20 }
- [ { 2double-array 2double-array } declare v* ] compile-call
+[ double-2{ 10 40 } ] [
+ double-2{ 1 2 } double-2{ 10 20 }
+ [ { double-2 double-2 } declare v* ] compile-call
] unit-test
-[ 2double-array{ 10 100 } ] [
- 2double-array{ 100 2000 } 2double-array{ 10 20 }
- [ { 2double-array 2double-array } declare v/ ] compile-call
+[ double-2{ 10 100 } ] [
+ double-2{ 100 2000 } double-2{ 10 20 }
+ [ { double-2 double-2 } declare v/ ] compile-call
] unit-test
-[ 2double-array{ -10 -20 } ] [
- 2double-array{ -10 20 } 2double-array{ 10 -20 }
- [ { 2double-array 2double-array } declare vmin ] compile-call
+[ double-2{ -10 -20 } ] [
+ double-2{ -10 20 } double-2{ 10 -20 }
+ [ { double-2 double-2 } declare vmin ] compile-call
] unit-test
-[ 2double-array{ 10 20 } ] [
- 2double-array{ -10 20 } 2double-array{ 10 -20 }
- [ { 2double-array 2double-array } declare vmax ] compile-call
+[ double-2{ 10 20 } ] [
+ double-2{ -10 20 } double-2{ 10 -20 }
+ [ { double-2 double-2 } declare vmax ] compile-call
] unit-test
[ 3.0 ] [
- 2double-array{ 1 2 }
- [ { 2double-array } declare sum ] compile-call
+ double-2{ 1 2 }
+ [ { double-2 } declare sum ] compile-call
] unit-test
[ 7.0 ] [
- 2double-array{ 1 2 }
- [ { 2double-array } declare sum 4.0 + ] compile-call
+ double-2{ 1 2 }
+ [ { double-2 } declare sum 4.0 + ] compile-call
] unit-test
[ 16.0 ] [
- 2double-array{ 1 2 } 2double-array{ 2 7 }
- [ { 2double-array 2double-array } declare v. ] compile-call
+ double-2{ 1 2 } double-2{ 2 7 }
+ [ { double-2 double-2 } declare v. ] compile-call
] unit-test
-[ 2double-array{ 5 10 } ] [
- 5.0 2double-array{ 1 2 }
- [ { float 2double-array } declare n*v ] compile-call
+[ double-2{ 5 10 } ] [
+ 5.0 double-2{ 1 2 }
+ [ { float double-2 } declare n*v ] compile-call
] unit-test
-[ 2double-array{ 5 10 } ] [
- 2double-array{ 1 2 } 5.0
- [ { float 2double-array } declare v*n ] compile-call
+[ double-2{ 5 10 } ] [
+ double-2{ 1 2 } 5.0
+ [ { float double-2 } declare v*n ] compile-call
] unit-test
-[ 2double-array{ 10 5 } ] [
- 10.0 2double-array{ 1 2 }
- [ { float 2double-array } declare n/v ] compile-call
+[ double-2{ 10 5 } ] [
+ 10.0 double-2{ 1 2 }
+ [ { float double-2 } declare n/v ] compile-call
] unit-test
-[ 2double-array{ 0.5 1 } ] [
- 2double-array{ 1 2 } 2
- [ { float 2double-array } declare v/n ] compile-call
+[ double-2{ 0.5 1 } ] [
+ double-2{ 1 2 } 2
+ [ { float double-2 } declare v/n ] compile-call
] unit-test
-[ 2double-array{ 0 0 } ] [ 2double-array new ] unit-test
+[ double-2{ 0 0 } ] [ double-2 new ] unit-test
-[ 2double-array{ 1 0 } ] [
- 2double-array{ 10 0 }
- [ { 2double-array } declare normalize ] compile-call
+[ double-2{ 1 0 } ] [
+ double-2{ 10 0 }
+ [ { double-2 } declare normalize ] compile-call
] unit-test
[ 5.0 ] [
- 2double-array{ 1 2 }
- [ { 2double-array } declare norm-sq ] compile-call
+ double-2{ 1 2 }
+ [ { double-2 } declare norm-sq ] compile-call
] unit-test
[ t ] [
- 2double-array{ 1 0 }
- 2double-array{ 0 1 }
- [ { 2double-array 2double-array } declare distance ] compile-call
+ double-2{ 1 0 }
+ double-2{ 0 1 }
+ [ { double-2 double-2 } declare distance ] compile-call
2 sqrt 1.0e-6 ~
] unit-test
-[ 4double-array{ 0 0 0 0 } ] [ 4double-array new ] unit-test
+[ double-4{ 0 0 0 0 } ] [ double-4 new ] unit-test
-[ 4double-array{ 1 2 3 4 } ] [
- 1 2 3 4 4double-array-boa
+[ double-4{ 1 2 3 4 } ] [
+ 1 2 3 4 double-4-boa
] unit-test
-[ 4double-array{ 1 1 1 1 } ] [
- 1 4double-array-with
+[ double-4{ 1 1 1 1 } ] [
+ 1 double-4-with
] unit-test
-[ 4double-array{ 0 1 2 3 } ] [
- 1 4double-array-with [ * ] map-index
+[ double-4{ 0 1 2 3 } ] [
+ 1 double-4-with [ * ] map-index
] unit-test
-[ V{ float } ] [ [ { 4double-array } declare norm-sq ] final-classes ] unit-test
+[ V{ float } ] [ [ { double-4 } declare norm-sq ] final-classes ] unit-test
-[ V{ float } ] [ [ { 4double-array } declare norm ] final-classes ] unit-test
+[ V{ float } ] [ [ { double-4 } declare norm ] final-classes ] unit-test
-[ 4double-array{ 12 12 12 12 } ] [
- 12 [ 4double-array-with ] compile-call
+[ double-4{ 12 12 12 12 } ] [
+ 12 [ double-4-with ] compile-call
] unit-test
-[ 4double-array{ 1 2 3 4 } ] [
- 1 2 3 4 [ 4double-array-boa ] compile-call
+[ double-4{ 1 2 3 4 } ] [
+ 1 2 3 4 [ double-4-boa ] compile-call
] unit-test
-[ 4double-array{ 11 22 33 44 } ] [
- 4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 }
- [ { 4double-array 4double-array } declare v+ ] compile-call
+[ double-4{ 11 22 33 44 } ] [
+ double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
+ [ { double-4 double-4 } declare v+ ] compile-call
] unit-test
-[ 4double-array{ -9 -18 -27 -36 } ] [
- 4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 }
- [ { 4double-array 4double-array } declare v- ] compile-call
+[ double-4{ -9 -18 -27 -36 } ] [
+ double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
+ [ { double-4 double-4 } declare v- ] compile-call
] unit-test
-[ 4double-array{ 10 40 90 160 } ] [
- 4double-array{ 1 2 3 4 } 4double-array{ 10 20 30 40 }
- [ { 4double-array 4double-array } declare v* ] compile-call
+[ double-4{ 10 40 90 160 } ] [
+ double-4{ 1 2 3 4 } double-4{ 10 20 30 40 }
+ [ { double-4 double-4 } declare v* ] compile-call
] unit-test
-[ 4double-array{ 10 100 1000 10000 } ] [
- 4double-array{ 100 2000 30000 400000 } 4double-array{ 10 20 30 40 }
- [ { 4double-array 4double-array } declare v/ ] compile-call
+[ double-4{ 10 100 1000 10000 } ] [
+ double-4{ 100 2000 30000 400000 } double-4{ 10 20 30 40 }
+ [ { double-4 double-4 } declare v/ ] compile-call
] unit-test
-[ 4double-array{ -10 -20 -30 -40 } ] [
- 4double-array{ -10 20 -30 40 } 4double-array{ 10 -20 30 -40 }
- [ { 4double-array 4double-array } declare vmin ] compile-call
+[ double-4{ -10 -20 -30 -40 } ] [
+ double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
+ [ { double-4 double-4 } declare vmin ] compile-call
] unit-test
-[ 4double-array{ 10 20 30 40 } ] [
- 4double-array{ -10 20 -30 40 } 4double-array{ 10 -20 30 -40 }
- [ { 4double-array 4double-array } declare vmax ] compile-call
+[ double-4{ 10 20 30 40 } ] [
+ double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 }
+ [ { double-4 double-4 } declare vmax ] compile-call
] unit-test
[ 10.0 ] [
- 4double-array{ 1 2 3 4 }
- [ { 4double-array } declare sum ] compile-call
+ double-4{ 1 2 3 4 }
+ [ { double-4 } declare sum ] compile-call
] unit-test
[ 13.0 ] [
- 4double-array{ 1 2 3 4 }
- [ { 4double-array } declare sum 3.0 + ] compile-call
+ double-4{ 1 2 3 4 }
+ [ { double-4 } declare sum 3.0 + ] compile-call
] unit-test
[ 8.0 ] [
- 4double-array{ 1 2 3 4 } 4double-array{ 2 0 2 0 }
- [ { 4double-array 4double-array } declare v. ] compile-call
+ double-4{ 1 2 3 4 } double-4{ 2 0 2 0 }
+ [ { double-4 double-4 } declare v. ] compile-call
] unit-test
-[ 4double-array{ 5 10 15 20 } ] [
- 5.0 4double-array{ 1 2 3 4 }
- [ { float 4double-array } declare n*v ] compile-call
+[ double-4{ 5 10 15 20 } ] [
+ 5.0 double-4{ 1 2 3 4 }
+ [ { float double-4 } declare n*v ] compile-call
] unit-test
-[ 4double-array{ 5 10 15 20 } ] [
- 4double-array{ 1 2 3 4 } 5.0
- [ { float 4double-array } declare v*n ] compile-call
+[ double-4{ 5 10 15 20 } ] [
+ double-4{ 1 2 3 4 } 5.0
+ [ { float double-4 } declare v*n ] compile-call
] unit-test
-[ 4double-array{ 10 5 2 5 } ] [
- 10.0 4double-array{ 1 2 5 2 }
- [ { float 4double-array } declare n/v ] compile-call
+[ double-4{ 10 5 2 5 } ] [
+ 10.0 double-4{ 1 2 5 2 }
+ [ { float double-4 } declare n/v ] compile-call
] unit-test
-[ 4double-array{ 0.5 1 1.5 2 } ] [
- 4double-array{ 1 2 3 4 } 2
- [ { float 4double-array } declare v/n ] compile-call
+[ double-4{ 0.5 1 1.5 2 } ] [
+ double-4{ 1 2 3 4 } 2
+ [ { float double-4 } declare v/n ] compile-call
] unit-test
-[ 4double-array{ 1 0 0 0 } ] [
- 4double-array{ 10 0 0 0 }
- [ { 4double-array } declare normalize ] compile-call
+[ double-4{ 1 0 0 0 } ] [
+ double-4{ 10 0 0 0 }
+ [ { double-4 } declare normalize ] compile-call
] unit-test
[ 30.0 ] [
- 4double-array{ 1 2 3 4 }
- [ { 4double-array } declare norm-sq ] compile-call
+ double-4{ 1 2 3 4 }
+ [ { double-4 } declare norm-sq ] compile-call
] unit-test
[ t ] [
- 4double-array{ 1 0 0 0 }
- 4double-array{ 0 1 0 0 }
- [ { 4double-array 4double-array } declare distance ] compile-call
+ double-4{ 1 0 0 0 }
+ double-4{ 0 1 0 0 }
+ [ { double-4 double-4 } declare distance ] compile-call
2 sqrt 1.0e-6 ~
] unit-test
+[ float-8{ 0 0 0 0 0 0 0 0 } ] [ float-8 new ] unit-test
+
+[ float-8{ 0 0 0 0 0 0 0 0 } ] [ [ float-8 new ] compile-call ] unit-test
+
+[ float-8{ 1 1 1 1 1 1 1 1 } ] [ 1 float-8-with ] unit-test
+
+[ float-8{ 1 1 1 1 1 1 1 1 } ] [ [ 1 float-8-with ] compile-call ] unit-test
+
+[ float-8{ 1 2 3 4 5 6 7 8 } ] [ 1 2 3 4 5 6 7 8 float-8-boa ] unit-test
+
+[ float-8{ 1 2 3 4 5 6 7 8 } ] [ [ 1 2 3 4 5 6 7 8 float-8-boa ] compile-call ] unit-test
+
+[ float-8{ 3 6 9 12 15 18 21 24 } ] [
+ float-8{ 1 2 3 4 5 6 7 8 }
+ float-8{ 2 4 6 8 10 12 14 16 }
+ [ { float-8 float-8 } declare v+ ] compile-call
+] unit-test
+
+[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
+ float-8{ 1 2 3 4 5 6 7 8 }
+ float-8{ 2 4 6 8 10 12 14 16 }
+ [ { float-8 float-8 } declare v- ] compile-call
+] unit-test
+
+[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
+ -0.5
+ float-8{ 2 4 6 8 10 12 14 16 }
+ [ { float float-8 } declare n*v ] compile-call
+] unit-test
+
+[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
+ float-8{ 2 4 6 8 10 12 14 16 }
+ -0.5
+ [ { float-8 float } declare v*n ] compile-call
+] unit-test
+
+[ float-8{ 256 128 64 32 16 8 4 2 } ] [
+ 256.0
+ float-8{ 1 2 4 8 16 32 64 128 }
+ [ { float float-8 } declare n/v ] compile-call
+] unit-test
+
+[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [
+ float-8{ 2 4 6 8 10 12 14 16 }
+ -2.0
+ [ { float-8 float } declare v/n ] compile-call
+] unit-test
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien.c-types byte-arrays cpu.architecture
-generalizations kernel math math.functions math.vectors
+kernel math math.functions math.vectors
math.vectors.simd.functor math.vectors.simd.intrinsics
math.vectors.specialization parser prettyprint.custom sequences
sequences.private specialized-arrays.double locals assocs
-literals words fry ;
+words fry ;
IN: math.vectors.simd
<<
-DEFER: 4float-array
-DEFER: 2double-array
+DEFER: float-4
+DEFER: double-2
+DEFER: float-8
+DEFER: double-4
-"double" 2 define-simd-type
-"float" 4 define-simd-type
+"double" define-simd-128
+"float" define-simd-128
+"double" define-simd-256
+"float" define-simd-256
>>
-! Constructors
-: 4float-array-with ( x -- simd-array )
- >float 4float-array-rep (simd-broadcast) 4float-array boa ; inline
+: float-4-with ( x -- simd-array )
+ >float float-4-rep (simd-broadcast) float-4 boa ; inline
-: 4float-array-boa ( a b c d -- simd-array )
- [ >float ] 4 napply 4float-array-rep (simd-gather-4) 4float-array boa ; inline
+:: float-4-boa ( a b c d -- simd-array )
+ a >float b >float c >float d >float
+ float-4-rep (simd-gather-4) float-4 boa ; inline
-: 2double-array-with ( x -- simd-array )
- >float 2double-array-rep (simd-broadcast) 2double-array boa ; inline
+: double-2-with ( x -- simd-array )
+ >float double-2-rep (simd-broadcast) double-2 boa ; inline
-: 2double-array-boa ( a b -- simd-array )
- [ >float ] bi@ 2double-array-rep (simd-gather-2) 2double-array boa ; inline
+: double-2-boa ( a b -- simd-array )
+ [ >float ] bi@ double-2-rep (simd-gather-2) double-2 boa ; inline
-<PRIVATE
-
-: 4float-array-vv->v-op ( v1 v2 quot -- v3 )
- [ [ underlying>> ] bi@ 4float-array-rep ] dip call 4float-array boa ; inline
+: float-8-with ( x -- simd-array )
+ [ float-4-with ] [ float-4-with ] bi [ underlying>> ] bi@
+ float-8 boa ; inline
-: 4float-array-v->n-op ( v1 quot -- v2 )
- [ underlying>> 4float-array-rep ] dip call ; inline
+:: float-8-boa ( a b c d e f g h -- simd-array )
+ a b c d float-4-boa
+ e f g h float-4-boa
+ [ underlying>> ] bi@
+ float-8 boa ; inline
-: 2double-array-vv->v-op ( v1 v2 quot -- v3 )
- [ [ underlying>> ] bi@ 2double-array-rep ] dip call 2double-array boa ; inline
+: double-4-with ( x -- simd-array )
+ [ double-2-with ] [ double-2-with ] bi [ underlying>> ] bi@
+ double-4 boa ; inline
-: 2double-array-v->n-op ( v1 quot -- v2 )
- [ underlying>> 2double-array-rep ] dip call ; inline
-
-PRIVATE>
+:: double-4-boa ( a b c d -- simd-array )
+ a b double-2-boa
+ c d double-2-boa
+ [ underlying>> ] bi@
+ double-4 boa ; inline
<<
'[ drop _ key? ] assoc-filter ;
:: high-level-ops ( ctor -- assoc )
+ ! Some SIMD operations are defined in terms of others.
{
{ vneg [ [ dup v- ] keep v- ] }
{ v. [ v* sum ] }
PRIVATE>
-\ 4float-array \ 4float-array-with float H{
- { v+ [ [ (simd-v+) ] 4float-array-vv->v-op ] }
- { v- [ [ (simd-v-) ] 4float-array-vv->v-op ] }
- { v* [ [ (simd-v*) ] 4float-array-vv->v-op ] }
- { v/ [ [ (simd-v/) ] 4float-array-vv->v-op ] }
- { vmin [ [ (simd-vmin) ] 4float-array-vv->v-op ] }
- { vmax [ [ (simd-vmax) ] 4float-array-vv->v-op ] }
- { sum [ [ (simd-sum) ] 4float-array-v->n-op ] }
+\ float-4 \ float-4-with float H{
+ { v+ [ [ (simd-v+) ] float-4-vv->v-op ] }
+ { v- [ [ (simd-v-) ] float-4-vv->v-op ] }
+ { v* [ [ (simd-v*) ] float-4-vv->v-op ] }
+ { v/ [ [ (simd-v/) ] float-4-vv->v-op ] }
+ { vmin [ [ (simd-vmin) ] float-4-vv->v-op ] }
+ { vmax [ [ (simd-vmax) ] float-4-vv->v-op ] }
+ { sum [ [ (simd-sum) ] float-4-v->n-op ] }
} simd-vector-words
-\ 2double-array \ 2double-array-with float H{
- { v+ [ [ (simd-v+) ] 2double-array-vv->v-op ] }
- { v- [ [ (simd-v-) ] 2double-array-vv->v-op ] }
- { v* [ [ (simd-v*) ] 2double-array-vv->v-op ] }
- { v/ [ [ (simd-v/) ] 2double-array-vv->v-op ] }
- { vmin [ [ (simd-vmin) ] 2double-array-vv->v-op ] }
- { vmax [ [ (simd-vmax) ] 2double-array-vv->v-op ] }
- { sum [ [ (simd-sum) ] 2double-array-v->n-op ] }
+\ double-2 \ double-2-with float H{
+ { v+ [ [ (simd-v+) ] double-2-vv->v-op ] }
+ { v- [ [ (simd-v-) ] double-2-vv->v-op ] }
+ { v* [ [ (simd-v*) ] double-2-vv->v-op ] }
+ { v/ [ [ (simd-v/) ] double-2-vv->v-op ] }
+ { vmin [ [ (simd-vmin) ] double-2-vv->v-op ] }
+ { vmax [ [ (simd-vmax) ] double-2-vv->v-op ] }
+ { sum [ [ (simd-sum) ] double-2-v->n-op ] }
} simd-vector-words
->>
-
-! Synthesize 256-bit vectors from a pair of 128-bit vectors
-! Functorize this later so that we can do it for integers, etc
-TUPLE: 4double-array
-{ underlying1 byte-array initial: $[ 16 <byte-array> ] read-only }
-{ underlying2 byte-array initial: $[ 16 <byte-array> ] read-only } ;
-
-: <4double-array> ( -- simd-array )
- 16 <byte-array> 16 <byte-array> 4double-array boa ; inline
-
-: (4double-array) ( -- simd-array )
- 16 (byte-array) 16 (byte-array) 4double-array boa ; inline
-
-M: 4double-array clone
- [ underlying1>> clone ] [ underlying2>> clone ] bi
- 4double-array boa ; inline
-
-M: 4double-array length drop 4 ; inline
-
-<PRIVATE
-
-: 4double-array-deref ( n seq -- n' seq' )
- over 2 < [ underlying1>> ] [ [ 2 - ] dip underlying2>> ] if
- 2 swap double-array boa ; inline
-
-PRIVATE>
-
-M: 4double-array nth-unsafe
- 4double-array-deref nth-unsafe ; inline
-
-M: 4double-array set-nth-unsafe
- 4double-array-deref set-nth-unsafe ; inline
-
-: >4double-array ( seq -- simd-array )
- 4double-array new clone-like ;
-
-M: 4double-array like
- drop dup 4double-array? [ >4double-array ] unless ; inline
-
-M: 4double-array new-sequence
- drop dup 4 = [ drop (4double-array) ] [ 4 bad-length ] if ; inline
-
-M: 4double-array equal?
- over 4double-array? [ sequence= ] [ 2drop f ] if ;
-
-M: 4double-array byte-length drop 32 ; inline
-
-SYNTAX: 4double-array{
- \ } [ >4double-array ] parse-literal ;
-
-M: 4double-array pprint-delims
- drop \ 4double-array{ \ } ;
-
-M: 4double-array >pprint-sequence ;
-
-M: 4double-array pprint* pprint-object ;
-
-INSTANCE: 4double-array sequence
-
-: 4double-array-with ( x -- simd-array )
- dup [ >float 2double-array-rep (simd-broadcast) ] bi@
- 4double-array boa ; inline
-
-: 4double-array-boa ( a b c d -- simd-array )
- [ >float ] 4 napply [ 2double-array-rep (simd-gather-2) ] 2bi@
- 4double-array boa ; inline
-
-! SIMD operations on 4double-arrays
-
-<PRIVATE
-
-: 4double-array-vv->v-op ( v1 v2 quot -- v3 )
- [ [ [ underlying1>> ] bi@ 2double-array-rep ] dip call ]
- [ [ [ underlying2>> ] bi@ 2double-array-rep ] dip call ] 3bi
- 4double-array boa ; inline
-
-: 4double-array-v->n-op ( v1 quot scalar-quot -- v2 )
- [
- [ [ underlying1>> 2double-array-rep ] dip call ]
- [ [ underlying2>> 2double-array-rep ] dip call ] 2bi
- ] dip call ; inline
-
-PRIVATE>
-
-<<
+\ float-8 \ float-8-with float H{
+ { v+ [ [ (simd-v+) ] float-8-vv->v-op ] }
+ { v- [ [ (simd-v-) ] float-8-vv->v-op ] }
+ { v* [ [ (simd-v*) ] float-8-vv->v-op ] }
+ { v/ [ [ (simd-v/) ] float-8-vv->v-op ] }
+ { vmin [ [ (simd-vmin) ] float-8-vv->v-op ] }
+ { vmax [ [ (simd-vmax) ] float-8-vv->v-op ] }
+ { sum [ [ (simd-sum) ] [ + ] float-8-v->n-op ] }
+} simd-vector-words
-\ 4double-array \ 4double-array-with float H{
- { v+ [ [ (simd-v+) ] 4double-array-vv->v-op ] }
- { v- [ [ (simd-v-) ] 4double-array-vv->v-op ] }
- { v* [ [ (simd-v*) ] 4double-array-vv->v-op ] }
- { v/ [ [ (simd-v/) ] 4double-array-vv->v-op ] }
- { vmin [ [ (simd-vmin) ] 4double-array-vv->v-op ] }
- { vmax [ [ (simd-vmax) ] 4double-array-vv->v-op ] }
- { sum [ [ (simd-sum) ] [ + ] 4double-array-v->n-op ] }
+\ double-4 \ double-4-with float H{
+ { v+ [ [ (simd-v+) ] double-4-vv->v-op ] }
+ { v- [ [ (simd-v-) ] double-4-vv->v-op ] }
+ { v* [ [ (simd-v*) ] double-4-vv->v-op ] }
+ { v/ [ [ (simd-v/) ] double-4-vv->v-op ] }
+ { vmin [ [ (simd-vmin) ] double-4-vv->v-op ] }
+ { vmax [ [ (simd-vmax) ] double-4-vv->v-op ] }
+ { sum [ [ (simd-sum) ] [ + ] double-4-v->n-op ] }
} simd-vector-words
>>