1 ! (c)2009 Slava Pestov, Joe Groff bsd license
2 USING: accessors alien alien.c-types alien.data combinators
3 cords cpu.architecture fry generalizations kernel libc locals
4 math math.libm math.order math.ranges math.vectors sequences
5 sequences.private specialized-arrays vocabs.loader ;
6 QUALIFIED-WITH: alien.c-types c
8 c:char c:short c:int c:longlong
9 c:uchar c:ushort c:uint c:ulonglong
11 IN: math.vectors.simd.intrinsics
13 : assert-positive ( x -- y ) ;
17 : >bitwise-vector-rep ( rep -- rep' )
19 { float-4-rep [ uint-4-rep ] }
20 { double-2-rep [ ulonglong-2-rep ] }
24 : >uint-vector-rep ( rep -- rep' )
26 { longlong-2-rep [ ulonglong-2-rep ] }
27 { int-4-rep [ uint-4-rep ] }
28 { short-8-rep [ ushort-8-rep ] }
29 { char-16-rep [ uchar-16-rep ] }
33 : >int-vector-rep ( rep -- rep' )
35 { float-4-rep [ int-4-rep ] }
36 { double-2-rep [ longlong-2-rep ] }
39 : >float-vector-rep ( rep -- rep' )
41 { int-4-rep [ float-4-rep ] }
42 { longlong-2-rep [ double-2-rep ] }
45 : [byte>rep-array] ( rep -- class )
47 { char-16-rep [ [ byte-array>char-array ] ] }
48 { uchar-16-rep [ [ byte-array>uchar-array ] ] }
49 { short-8-rep [ [ byte-array>short-array ] ] }
50 { ushort-8-rep [ [ byte-array>ushort-array ] ] }
51 { int-4-rep [ [ byte-array>int-array ] ] }
52 { uint-4-rep [ [ byte-array>uint-array ] ] }
53 { longlong-2-rep [ [ byte-array>longlong-array ] ] }
54 { ulonglong-2-rep [ [ byte-array>ulonglong-array ] ] }
55 { float-4-rep [ [ byte-array>float-array ] ] }
56 { double-2-rep [ [ byte-array>double-array ] ] }
59 : [>rep-array] ( rep -- class )
61 { char-16-rep [ [ >char-array ] ] }
62 { uchar-16-rep [ [ >uchar-array ] ] }
63 { short-8-rep [ [ >short-array ] ] }
64 { ushort-8-rep [ [ >ushort-array ] ] }
65 { int-4-rep [ [ >int-array ] ] }
66 { uint-4-rep [ [ >uint-array ] ] }
67 { longlong-2-rep [ [ >longlong-array ] ] }
68 { ulonglong-2-rep [ [ >ulonglong-array ] ] }
69 { float-4-rep [ [ >float-array ] ] }
70 { double-2-rep [ [ >double-array ] ] }
73 : [<rep-array>] ( rep -- class )
75 { char-16-rep [ [ 16 (char-array) ] ] }
76 { uchar-16-rep [ [ 16 (uchar-array) ] ] }
77 { short-8-rep [ [ 8 (short-array) ] ] }
78 { ushort-8-rep [ [ 8 (ushort-array) ] ] }
79 { int-4-rep [ [ 4 (int-array) ] ] }
80 { uint-4-rep [ [ 4 (uint-array) ] ] }
81 { longlong-2-rep [ [ 2 (longlong-array) ] ] }
82 { ulonglong-2-rep [ [ 2 (ulonglong-array) ] ] }
83 { float-4-rep [ [ 4 (float-array) ] ] }
84 { double-2-rep [ [ 2 (double-array) ] ] }
87 : rep-tf-values ( rep -- t f )
88 float-vector-rep? [ -1 bits>double 0.0 ] [ -1 0 ] if ;
90 : >rep-array ( a rep -- a' )
91 [byte>rep-array] call( a -- a' ) ; inline
92 : 2>rep-array ( a b rep -- a' b' )
93 [byte>rep-array] '[ _ call( a -- a' ) ] bi@ ; inline
94 : <rep-array> ( rep -- a' )
95 [<rep-array>] call( -- a' ) ; inline
97 : components-map ( a rep quot -- c )
98 [ >rep-array ] dip map underlying>> ; inline
99 : components-2map ( a b rep quot -- c )
100 [ 2>rep-array ] dip 2map underlying>> ; inline
101 : components-reduce ( a rep quot -- x )
102 [ >rep-array [ ] ] dip map-reduce ; inline
104 : bitwise-components-map ( a rep quot -- c )
105 [ >bitwise-vector-rep >rep-array ] dip map underlying>> ; inline
106 : bitwise-components-2map ( a b rep quot -- c )
107 [ >bitwise-vector-rep 2>rep-array ] dip 2map underlying>> ; inline
108 : bitwise-components-reduce ( a rep quot -- x )
109 [ >bitwise-vector-rep >rep-array [ ] ] dip map-reduce ; inline
111 :: (vshuffle) ( a elts rep -- c )
112 a rep >rep-array :> a'
113 rep <rep-array> :> c'
115 from rep rep-length 1 - bitand
119 c' underlying>> ; inline
123 : (simd-v+) ( a b rep -- c ) [ + ] components-2map ;
124 : (simd-v-) ( a b rep -- c ) [ - ] components-2map ;
125 : (simd-vneg) ( a rep -- c ) [ neg ] components-map ;
126 :: (simd-v+-) ( a b rep -- c )
127 a b rep 2>rep-array :> ( a' b' )
128 rep <rep-array> :> c'
129 0 rep length 1 - 2 <range> [| n |
130 n a' nth-unsafe n b' nth-unsafe -
133 n 1 + a' nth-unsafe n 1 + b' nth-unsafe +
134 n 1 + c' set-nth-unsafe
137 : (simd-vs+) ( a b rep -- c )
138 dup rep-component-type '[ + _ c-type-clamp ] components-2map ;
139 : (simd-vs-) ( a b rep -- c )
140 dup rep-component-type '[ - _ c-type-clamp ] components-2map ;
141 : (simd-vs*) ( a b rep -- c )
142 dup rep-component-type '[ * _ c-type-clamp ] components-2map ;
143 : (simd-v*) ( a b rep -- c ) [ * ] components-2map ;
144 : (simd-v/) ( a b rep -- c ) [ / ] components-2map ;
145 : (simd-vmin) ( a b rep -- c ) [ min ] components-2map ;
146 : (simd-vmax) ( a b rep -- c ) [ max ] components-2map ;
147 : (simd-v.) ( a b rep -- n )
148 [ 2>rep-array [ [ first ] bi@ * ] 2keep ] keep
149 1 swap rep-length [a,b) [ '[ _ swap nth-unsafe ] bi@ * + ] with with each ;
150 : (simd-vsqrt) ( a rep -- c ) [ fsqrt ] components-map ;
151 : (simd-sum) ( a rep -- n ) [ + ] components-reduce ;
152 : (simd-vabs) ( a rep -- c ) [ abs ] components-map ;
153 : (simd-vbitand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
154 : (simd-vbitandn) ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ;
155 : (simd-vbitor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
156 : (simd-vbitxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
157 : (simd-vbitnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ;
158 : (simd-vand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
159 : (simd-vandn) ( a b rep -- c ) [ [ not ] dip bitand ] bitwise-components-2map ;
160 : (simd-vor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
161 : (simd-vxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
162 : (simd-vnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ;
163 : (simd-vlshift) ( a n rep -- c ) swap '[ _ shift ] bitwise-components-map ;
164 : (simd-vrshift) ( a n rep -- c ) swap '[ _ neg shift ] bitwise-components-map ;
165 : (simd-hlshift) ( a n rep -- c )
166 drop head-slice* 16 0 pad-head ;
167 : (simd-hrshift) ( a n rep -- c )
168 drop tail-slice 16 0 pad-tail ;
169 : (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ;
170 : (simd-vshuffle-bytes) ( a b rep -- c ) drop uchar-16-rep (vshuffle) ;
171 :: (simd-vmerge-head) ( a b rep -- c )
172 a b rep 2>rep-array :> ( a' b' )
173 rep <rep-array> :> c'
174 rep rep-length 2 /i iota [| n |
175 n a' nth-unsafe n 2 * c' set-nth-unsafe
176 n b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
179 :: (simd-vmerge-tail) ( a b rep -- c )
180 a b rep 2>rep-array :> ( a' b' )
181 rep <rep-array> :> c'
182 rep rep-length 2 /i :> len
184 n len + a' nth-unsafe n 2 * c' set-nth-unsafe
185 n len + b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
188 : (simd-v<=) ( a b rep -- c )
189 dup rep-tf-values '[ <= _ _ ? ] components-2map ;
190 : (simd-v<) ( a b rep -- c )
191 dup rep-tf-values '[ < _ _ ? ] components-2map ;
192 : (simd-v=) ( a b rep -- c )
193 dup rep-tf-values '[ = _ _ ? ] components-2map ;
194 : (simd-v>) ( a b rep -- c )
195 dup rep-tf-values '[ > _ _ ? ] components-2map ;
196 : (simd-v>=) ( a b rep -- c )
197 dup rep-tf-values '[ >= _ _ ? ] components-2map ;
198 : (simd-vunordered?) ( a b rep -- c )
199 dup rep-tf-values '[ unordered? _ _ ? ] components-2map ;
200 : (simd-vany?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? not ;
201 : (simd-vall?) ( a rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ;
202 : (simd-vnone?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? ;
203 : (simd-v>float) ( a rep -- c )
204 [ >rep-array ] [ >float-vector-rep [>rep-array] ] bi call( i -- f ) underlying>> ;
205 : (simd-v>integer) ( a rep -- c )
206 [ >rep-array ] [ >int-vector-rep [>rep-array] ] bi call( i -- f ) underlying>> ;
207 : (simd-vpack-signed) ( a b rep -- c )
208 [ 2>rep-array cord-append ]
209 [ narrow-vector-rep [ [<rep-array>] ] [ rep-component-type ] bi ] bi
210 '[ _ c-type-clamp ] swap map-as underlying>> ;
211 : (simd-vpack-unsigned) ( a b rep -- c )
212 [ 2>rep-array cord-append ]
213 [ narrow-vector-rep >uint-vector-rep [ [<rep-array>] ] [ rep-component-type ] bi ] bi
214 '[ _ c-type-clamp ] swap map-as underlying>> ;
215 : (simd-vunpack-head) ( a rep -- c )
216 [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
217 [ head-slice ] dip call( a' -- c' ) underlying>> ;
218 : (simd-vunpack-tail) ( a rep -- c )
219 [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
220 [ tail-slice ] dip call( a' -- c' ) underlying>> ;
221 : (simd-with) ( n rep -- v )
222 [ rep-length iota swap '[ _ ] ] [ <rep-array> ] bi replicate-as
224 : (simd-gather-2) ( m n rep -- v ) <rep-array> [ 2 set-firstn ] keep underlying>> ;
225 : (simd-gather-4) ( m n o p rep -- v ) <rep-array> [ 4 set-firstn ] keep underlying>> ;
226 : (simd-select) ( a n rep -- x ) [ swap ] dip >rep-array nth-unsafe ;
228 : alien-vector ( c-ptr n rep -- value )
229 [ swap <displaced-alien> ] dip rep-size memory>byte-array ;
230 : set-alien-vector ( value c-ptr n rep -- )
231 [ swap <displaced-alien> swap ] dip rep-size memcpy ;
233 "compiler.cfg.intrinsics.simd" require
234 "compiler.tree.propagation.simd" require
235 "compiler.cfg.value-numbering.simd" require