1 ! (c)2009 Slava Pestov, Joe Groff bsd license
2 USING: accessors alien alien.data combinators cpu.architecture fry
3 grouping kernel libc locals math math.libm math.order math.ranges
4 sequences sequences.cords sequences.generalizations sequences.private
5 sequences.unrolled sequences.unrolled.private specialized-arrays
7 QUALIFIED-WITH: alien.c-types c
9 c:char c:short c:int c:longlong
10 c:uchar c:ushort c:uint c:ulonglong
12 IN: math.vectors.simd.intrinsics
14 : assert-positive ( x -- y ) ;
18 : >bitwise-vector-rep ( rep -- rep' )
20 { float-4-rep [ uint-4-rep ] }
21 { double-2-rep [ ulonglong-2-rep ] }
25 : >uint-vector-rep ( rep -- rep' )
27 { longlong-2-rep [ ulonglong-2-rep ] }
28 { int-4-rep [ uint-4-rep ] }
29 { short-8-rep [ ushort-8-rep ] }
30 { char-16-rep [ uchar-16-rep ] }
34 : >int-vector-rep ( rep -- rep' )
36 { float-4-rep [ int-4-rep ] }
37 { double-2-rep [ longlong-2-rep ] }
40 : >float-vector-rep ( rep -- rep' )
42 { int-4-rep [ float-4-rep ] }
43 { longlong-2-rep [ double-2-rep ] }
46 : byte>rep-array ( byte-array rep -- array )
48 { char-16-rep [ 16 c:char <c-direct-array> ] }
49 { uchar-16-rep [ 16 c:uchar <c-direct-array> ] }
50 { short-8-rep [ 8 c:short <c-direct-array> ] }
51 { ushort-8-rep [ 8 c:ushort <c-direct-array> ] }
52 { int-4-rep [ 4 c:int <c-direct-array> ] }
53 { uint-4-rep [ 4 c:uint <c-direct-array> ] }
54 { longlong-2-rep [ 2 c:longlong <c-direct-array> ] }
55 { ulonglong-2-rep [ 2 c:ulonglong <c-direct-array> ] }
56 { float-4-rep [ 4 c:float <c-direct-array> ] }
57 { double-2-rep [ 2 c:double <c-direct-array> ] }
60 : >rep-array ( seq rep -- array )
62 { char-16-rep [ c:char >c-array ] }
63 { uchar-16-rep [ c:uchar >c-array ] }
64 { short-8-rep [ c:short >c-array ] }
65 { ushort-8-rep [ c:ushort >c-array ] }
66 { int-4-rep [ c:int >c-array ] }
67 { uint-4-rep [ c:uint >c-array ] }
68 { longlong-2-rep [ c:longlong >c-array ] }
69 { ulonglong-2-rep [ c:ulonglong >c-array ] }
70 { float-4-rep [ c:float >c-array ] }
71 { double-2-rep [ c:double >c-array ] }
74 : <rep-array> ( rep -- array )
76 { char-16-rep [ 16 c:char (c-array) ] }
77 { uchar-16-rep [ 16 c:uchar (c-array) ] }
78 { short-8-rep [ 8 c:short (c-array) ] }
79 { ushort-8-rep [ 8 c:ushort (c-array) ] }
80 { int-4-rep [ 4 c:int (c-array) ] }
81 { uint-4-rep [ 4 c:uint (c-array) ] }
82 { longlong-2-rep [ 2 c:longlong (c-array) ] }
83 { ulonglong-2-rep [ 2 c:ulonglong (c-array) ] }
84 { float-4-rep [ 4 c:float (c-array) ] }
85 { double-2-rep [ 2 c:double (c-array) ] }
88 : rep-tf-values ( rep -- t f )
89 float-vector-rep? [ -1 bits>double 0.0 ] [ -1 0 ] if ;
91 : 2byte>rep-array ( a b rep -- a' b' )
92 '[ _ byte>rep-array ] bi@ ; inline
94 : components-map ( a rep quot -- c )
95 [ [ byte>rep-array ] [ rep-length ] bi ] dip unrolled-map-unsafe underlying>> ; inline
96 : components-2map ( a b rep quot -- c )
97 [ [ 2byte>rep-array ] [ rep-length ] bi ] dip unrolled-2map-unsafe underlying>> ; inline
98 : components-reduce ( a rep quot -- x )
99 [ byte>rep-array [ ] ] dip map-reduce ; inline
101 : bitwise-components-map ( a rep quot -- c )
102 [ >bitwise-vector-rep [ byte>rep-array ] [ rep-length ] bi ] dip
103 unrolled-map-unsafe underlying>> ; inline
104 : bitwise-components-2map ( a b rep quot -- c )
105 [ >bitwise-vector-rep [ 2byte>rep-array ] [ rep-length ] bi ] dip
106 unrolled-2map-unsafe underlying>> ; inline
107 : bitwise-components-reduce ( a rep quot -- x )
108 [ >bitwise-vector-rep byte>rep-array [ ] ] dip map-reduce ; inline
109 : bitwise-components-reduce* ( a rep identity quot -- x )
110 [ >bitwise-vector-rep byte>rep-array ] 2dip reduce ; inline
112 :: (vshuffle) ( a elts rep -- c )
113 a rep byte>rep-array :> a'
114 rep <rep-array> :> c'
115 elts rep rep-length [| from to |
116 from rep rep-length 1 - bitand
119 ] unrolled-each-index-unsafe
120 c' underlying>> ; inline
122 :: (vshuffle2) ( a b elts rep -- c )
123 a rep byte>rep-array :> a'
124 b rep byte>rep-array :> b'
125 a' b' cord-append :> ab'
126 rep <rep-array> :> c'
127 elts rep rep-length [| from to |
128 from rep rep-length dup + 1 - bitand
131 ] unrolled-each-index-unsafe
132 c' underlying>> ; inline
134 GENERIC: native/ ( x y -- x/y )
136 M: integer native/ /i ; inline
137 M: float native/ /f ; inline
139 : (vgetmask) ( a rep -- b )
140 0 [ [ 1 shift ] [ zero? 0 1 ? ] bi* bitor ] bitwise-components-reduce* ; inline
144 : (simd-v+) ( a b rep -- c ) [ + ] components-2map ;
145 : (simd-v-) ( a b rep -- c ) [ - ] components-2map ;
146 : (simd-vneg) ( a rep -- c ) [ neg ] components-map ;
147 :: (simd-v+-) ( a b rep -- c )
148 a b rep 2byte>rep-array :> ( a' b' )
149 rep <rep-array> :> c'
150 0 rep rep-length [ 1 - 2 <range> ] [ 2 /i ] bi [| n |
151 n a' nth-unsafe n b' nth-unsafe -
154 n 1 + a' nth-unsafe n 1 + b' nth-unsafe +
155 n 1 + c' set-nth-unsafe
156 ] unrolled-each-unsafe
158 : (simd-vs+) ( a b rep -- c )
159 dup rep-component-type '[ + _ c:c-type-clamp ] components-2map ;
160 : (simd-vs-) ( a b rep -- c )
161 dup rep-component-type '[ - _ c:c-type-clamp ] components-2map ;
162 : (simd-vs*) ( a b rep -- c )
163 dup rep-component-type '[ * _ c:c-type-clamp ] components-2map ;
164 : (simd-v*) ( a b rep -- c ) [ * ] components-2map ;
165 : (simd-v*high) ( a b rep -- c )
166 dup rep-component-type c:heap-size -8 * '[ * _ shift ] components-2map ;
167 :: (simd-v*hs+) ( a b rep -- c )
168 rep { char-16-rep uchar-16-rep } member-eq?
169 [ uchar-16-rep char-16-rep ]
170 [ rep rep ] if :> ( a-rep b-rep )
171 b-rep widen-vector-rep signed-rep :> wide-rep
172 wide-rep rep-component-type :> wide-type
173 a a-rep byte>rep-array 2 <groups> :> a'
174 b b-rep byte>rep-array 2 <groups> :> b'
175 a' b' rep rep-length 2 /i [
177 [ [ second ] bi@ * ] 2bi +
178 wide-type c:c-type-clamp
179 ] wide-rep <rep-array> unrolled-2map-as-unsafe underlying>> ;
180 : (simd-v/) ( a b rep -- c ) [ native/ ] components-2map ;
181 : (simd-vavg) ( a b rep -- c )
182 [ + dup integer? [ 1 + -1 shift ] [ 0.5 * ] if ] components-2map ;
183 : (simd-vmin) ( a b rep -- c ) [ min ] components-2map ;
184 : (simd-vmax) ( a b rep -- c ) [ max ] components-2map ;
186 : (simd-v.) ( a b rep -- n )
187 [ 2byte>rep-array [ [ first ] bi@ * ] 2keep ] keep
188 1 swap rep-length [a,b) [ '[ _ swap nth-unsafe ] bi@ * + ] 2with each ;
189 : (simd-vsqrt) ( a rep -- c ) [ fsqrt ] components-map ;
190 : (simd-vsad) ( a b rep -- c ) 2byte>rep-array [ - abs ] [ + ] 2map-reduce ;
191 : (simd-sum) ( a rep -- n ) [ + ] components-reduce ;
192 : (simd-vabs) ( a rep -- c ) [ abs ] components-map ;
193 : (simd-vbitand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
194 : (simd-vbitandn) ( a b rep -- c ) [ [ bitnot ] dip bitand ] bitwise-components-2map ;
195 : (simd-vbitor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
196 : (simd-vbitxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
197 : (simd-vbitnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ;
198 : (simd-vand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
199 : (simd-vandn) ( a b rep -- c ) [ [ bitnot ] dip bitand ] bitwise-components-2map ;
200 : (simd-vor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
201 : (simd-vxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
202 : (simd-vnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ;
203 : (simd-vlshift) ( a n rep -- c ) swap '[ _ shift ] bitwise-components-map ;
204 : (simd-vrshift) ( a n rep -- c ) swap '[ _ neg shift ] bitwise-components-map ;
206 : (simd-hlshift) ( a n rep -- c )
207 drop head-slice* 16 0 pad-head ;
209 : (simd-hrshift) ( a n rep -- c )
210 drop tail-slice 16 0 pad-tail ;
211 : (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ;
212 : (simd-vshuffle2-elements) ( a b n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle2) ;
213 : (simd-vshuffle-bytes) ( a b rep -- c ) drop uchar-16-rep (vshuffle) ;
214 :: (simd-vmerge-head) ( a b rep -- c )
215 a b rep 2byte>rep-array :> ( a' b' )
216 rep <rep-array> :> c'
217 rep rep-length 2 /i [| n |
218 n a' nth-unsafe n 2 * c' set-nth-unsafe
219 n b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
220 ] unrolled-each-integer
222 :: (simd-vmerge-tail) ( a b rep -- c )
223 a b rep 2byte>rep-array :> ( a' b' )
224 rep <rep-array> :> c'
225 rep rep-length 2 /i :> len
227 n len + a' nth-unsafe n 2 * c' set-nth-unsafe
228 n len + b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
229 ] unrolled-each-integer
231 : (simd-v<=) ( a b rep -- c )
232 dup rep-tf-values '[ <= _ _ ? ] components-2map ;
233 : (simd-v<) ( a b rep -- c )
234 dup rep-tf-values '[ < _ _ ? ] components-2map ;
235 : (simd-v=) ( a b rep -- c )
236 dup rep-tf-values '[ = _ _ ? ] components-2map ;
237 : (simd-v>) ( a b rep -- c )
238 dup rep-tf-values '[ > _ _ ? ] components-2map ;
239 : (simd-v>=) ( a b rep -- c )
240 dup rep-tf-values '[ >= _ _ ? ] components-2map ;
241 : (simd-vunordered?) ( a b rep -- c )
242 dup rep-tf-values '[ unordered? _ _ ? ] components-2map ;
243 : (simd-vany?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? not ;
244 : (simd-vall?) ( a rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ;
245 : (simd-vnone?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? ;
246 : (simd-vgetmask) ( a rep -- n )
247 { float-4-rep double-2-rep } member?
248 [ uint-4-rep (vgetmask) ] [ uchar-16-rep (vgetmask) ] if ;
249 : (simd-v>float) ( a rep -- c )
250 [ [ byte>rep-array ] [ rep-length ] bi [ >float ] ]
251 [ >float-vector-rep <rep-array> ] bi unrolled-map-as-unsafe underlying>> ;
252 : (simd-v>integer) ( a rep -- c )
253 [ [ byte>rep-array ] [ rep-length ] bi [ >integer ] ]
254 [ >int-vector-rep <rep-array> ] bi unrolled-map-as-unsafe underlying>> ;
255 : (simd-vpack-signed) ( a b rep -- c )
256 [ [ 2byte>rep-array cord-append ] [ rep-length 2 * ] bi ]
257 [ narrow-vector-rep [ <rep-array> ] [ rep-component-type ] bi ] bi
258 '[ _ c:c-type-clamp ] swap unrolled-map-as-unsafe underlying>> ;
259 : (simd-vpack-unsigned) ( a b rep -- c )
260 [ [ 2byte>rep-array cord-append ] [ rep-length 2 * ] bi ]
261 [ narrow-vector-rep >uint-vector-rep [ <rep-array> ] [ rep-component-type ] bi ] bi
262 '[ _ c:c-type-clamp ] swap unrolled-map-as-unsafe underlying>> ;
263 : (simd-vunpack-head) ( a rep -- c )
264 [ byte>rep-array ] [ widen-vector-rep [ rep-length ] [ '[ _ >rep-array ] ] bi ] bi
265 [ head-slice ] dip call( a' -- c' ) underlying>> ;
266 : (simd-vunpack-tail) ( a rep -- c )
267 [ byte>rep-array ] [ widen-vector-rep [ rep-length ] [ '[ _ >rep-array ] ] bi ] bi
268 [ tail-slice ] dip call( a' -- c' ) underlying>> ;
269 : (simd-with) ( n rep -- v )
270 [ rep-length swap '[ _ ] ] [ <rep-array> ] bi replicate-as
272 : (simd-gather-2) ( m n rep -- v ) <rep-array> [ 2 set-firstn-unsafe ] keep underlying>> ;
273 : (simd-gather-4) ( m n o p rep -- v ) <rep-array> [ 4 set-firstn-unsafe ] keep underlying>> ;
274 : (simd-select) ( a n rep -- x ) swapd byte>rep-array nth-unsafe ;
276 : alien-vector ( c-ptr n rep -- value )
277 [ swap <displaced-alien> ] dip rep-size memory>byte-array ;
278 : set-alien-vector ( value c-ptr n rep -- )
279 [ swap <displaced-alien> swap ] dip rep-size memcpy ;
281 "compiler.cfg.intrinsics.simd" require
282 "compiler.tree.propagation.simd" require
283 "compiler.cfg.value-numbering.simd" require