1 ! (c)2009 Slava Pestov, Joe Groff bsd license
2 USING: accessors alien alien.data combinators
3 sequences.cords cpu.architecture fry generalizations grouping
4 kernel libc locals macros math math.libm math.order
5 math.ranges math.vectors sequences sequences.generalizations
6 sequences.private sequences.unrolled sequences.unrolled.private
7 specialized-arrays vocabs words effects.parser locals.parser ;
8 QUALIFIED-WITH: alien.c-types c
10 c:char c:short c:int c:longlong
11 c:uchar c:ushort c:uint c:ulonglong
13 IN: math.vectors.simd.intrinsics
16 : simd-intrinsic-body ( def effect -- def' )
17 '[ _ _ call-effect ] ;
19 : define-simd-intrinsic ( word def effect -- )
20 [ simd-intrinsic-body ] keep define-declared ;
22 SYNTAX: SIMD-INTRINSIC:
24 SYNTAX: SIMD-INTRINSIC::
25 (::) define-declared ;
29 : assert-positive ( x -- y ) ;
33 : >bitwise-vector-rep ( rep -- rep' )
35 { float-4-rep [ uint-4-rep ] }
36 { double-2-rep [ ulonglong-2-rep ] }
40 : >uint-vector-rep ( rep -- rep' )
42 { longlong-2-rep [ ulonglong-2-rep ] }
43 { int-4-rep [ uint-4-rep ] }
44 { short-8-rep [ ushort-8-rep ] }
45 { char-16-rep [ uchar-16-rep ] }
49 : >int-vector-rep ( rep -- rep' )
51 { float-4-rep [ int-4-rep ] }
52 { double-2-rep [ longlong-2-rep ] }
55 : >float-vector-rep ( rep -- rep' )
57 { int-4-rep [ float-4-rep ] }
58 { longlong-2-rep [ double-2-rep ] }
61 : [byte>rep-array] ( rep -- class )
63 { char-16-rep [ [ 16 c:char <c-direct-array> ] ] }
64 { uchar-16-rep [ [ 16 c:uchar <c-direct-array> ] ] }
65 { short-8-rep [ [ 8 c:short <c-direct-array> ] ] }
66 { ushort-8-rep [ [ 8 c:ushort <c-direct-array> ] ] }
67 { int-4-rep [ [ 4 c:int <c-direct-array> ] ] }
68 { uint-4-rep [ [ 4 c:uint <c-direct-array> ] ] }
69 { longlong-2-rep [ [ 2 c:longlong <c-direct-array> ] ] }
70 { ulonglong-2-rep [ [ 2 c:ulonglong <c-direct-array> ] ] }
71 { float-4-rep [ [ 4 c:float <c-direct-array> ] ] }
72 { double-2-rep [ [ 2 c:double <c-direct-array> ] ] }
75 : [>rep-array] ( rep -- class )
77 { char-16-rep [ [ c:char >c-array ] ] }
78 { uchar-16-rep [ [ c:uchar >c-array ] ] }
79 { short-8-rep [ [ c:short >c-array ] ] }
80 { ushort-8-rep [ [ c:ushort >c-array ] ] }
81 { int-4-rep [ [ c:int >c-array ] ] }
82 { uint-4-rep [ [ c:uint >c-array ] ] }
83 { longlong-2-rep [ [ c:longlong >c-array ] ] }
84 { ulonglong-2-rep [ [ c:ulonglong >c-array ] ] }
85 { float-4-rep [ [ c:float >c-array ] ] }
86 { double-2-rep [ [ c:double >c-array ] ] }
89 : [<rep-array>] ( rep -- class )
91 { char-16-rep [ [ 16 c:char (c-array) ] ] }
92 { uchar-16-rep [ [ 16 c:uchar (c-array) ] ] }
93 { short-8-rep [ [ 8 c:short (c-array) ] ] }
94 { ushort-8-rep [ [ 8 c:ushort (c-array) ] ] }
95 { int-4-rep [ [ 4 c:int (c-array) ] ] }
96 { uint-4-rep [ [ 4 c:uint (c-array) ] ] }
97 { longlong-2-rep [ [ 2 c:longlong (c-array) ] ] }
98 { ulonglong-2-rep [ [ 2 c:ulonglong (c-array) ] ] }
99 { float-4-rep [ [ 4 c:float (c-array) ] ] }
100 { double-2-rep [ [ 2 c:double (c-array) ] ] }
103 : rep-tf-values ( rep -- t f )
104 float-vector-rep? [ -1 bits>double 0.0 ] [ -1 0 ] if ;
106 : >rep-array ( a rep -- a' )
107 [byte>rep-array] call( a -- a' ) ; inline
108 : 2>rep-array ( a b rep -- a' b' )
109 [byte>rep-array] '[ _ call( a -- a' ) ] bi@ ; inline
110 : <rep-array> ( rep -- a' )
111 [<rep-array>] call( -- a' ) ; inline
113 : components-map ( a rep quot -- c )
114 [ [ >rep-array ] [ rep-length ] bi ] dip unrolled-map-unsafe underlying>> ; inline
115 : components-2map ( a b rep quot -- c )
116 [ [ 2>rep-array ] [ rep-length ] bi ] dip unrolled-2map-unsafe underlying>> ; inline
118 : components-reduce ( a rep quot -- x )
119 [ >rep-array [ ] ] dip map-reduce ; inline
121 : bitwise-components-map ( a rep quot -- c )
122 [ >bitwise-vector-rep [ >rep-array ] [ rep-length ] bi ] dip
123 unrolled-map-unsafe underlying>> ; inline
124 : bitwise-components-2map ( a b rep quot -- c )
125 [ >bitwise-vector-rep [ 2>rep-array ] [ rep-length ] bi ] dip
126 unrolled-2map-unsafe underlying>> ; inline
128 : bitwise-components-reduce ( a rep quot -- x )
129 [ >bitwise-vector-rep >rep-array [ ] ] dip map-reduce ; inline
131 :: (vshuffle) ( a elts rep -- c )
132 a rep >rep-array :> a'
133 rep <rep-array> :> c'
134 elts rep rep-length [| from to |
135 from rep rep-length 1 - bitand
138 ] unrolled-each-index-unsafe
139 c' underlying>> ; inline
141 :: (vshuffle2) ( a b elts rep -- c )
142 a rep >rep-array :> a'
143 b rep >rep-array :> b'
144 a' b' cord-append :> ab'
145 rep <rep-array> :> c'
146 elts rep rep-length [| from to |
147 from rep rep-length dup + 1 - bitand
150 ] unrolled-each-index-unsafe
151 c' underlying>> ; inline
153 GENERIC: native/ ( x y -- x/y )
155 M: integer native/ /i ; inline
156 M: float native/ /f ; inline
160 SIMD-INTRINSIC: (simd-v+) ( a b rep -- c ) [ + ] components-2map ;
161 SIMD-INTRINSIC: (simd-v-) ( a b rep -- c ) [ - ] components-2map ;
162 SIMD-INTRINSIC: (simd-vneg) ( a rep -- c ) [ neg ] components-map ;
163 SIMD-INTRINSIC:: (simd-v+-) ( a b rep -- c )
164 a b rep 2>rep-array :> ( a' b' )
165 rep <rep-array> :> c'
166 0 rep rep-length [ 1 - 2 <range> ] [ 2 /i ] bi [| n |
167 n a' nth-unsafe n b' nth-unsafe -
170 n 1 + a' nth-unsafe n 1 + b' nth-unsafe +
171 n 1 + c' set-nth-unsafe
172 ] unrolled-each-unsafe
174 SIMD-INTRINSIC: (simd-vs+) ( a b rep -- c )
175 dup rep-component-type '[ + _ c:c-type-clamp ] components-2map ;
176 SIMD-INTRINSIC: (simd-vs-) ( a b rep -- c )
177 dup rep-component-type '[ - _ c:c-type-clamp ] components-2map ;
178 SIMD-INTRINSIC: (simd-vs*) ( a b rep -- c )
179 dup rep-component-type '[ * _ c:c-type-clamp ] components-2map ;
180 SIMD-INTRINSIC: (simd-v*) ( a b rep -- c ) [ * ] components-2map ;
181 SIMD-INTRINSIC: (simd-v*high) ( a b rep -- c )
182 dup rep-component-type c:heap-size -8 * '[ * _ shift ] components-2map ;
183 SIMD-INTRINSIC:: (simd-v*hs+) ( a b rep -- c )
184 rep { char-16-rep uchar-16-rep } member-eq?
185 [ uchar-16-rep char-16-rep ]
186 [ rep rep ] if :> ( a-rep b-rep )
187 b-rep widen-vector-rep signed-rep :> wide-rep
188 wide-rep rep-component-type :> wide-type
189 a a-rep >rep-array 2 <groups> :> a'
190 b b-rep >rep-array 2 <groups> :> b'
191 a' b' rep rep-length 2 /i [
193 [ [ second ] bi@ * ] 2bi +
194 wide-type c:c-type-clamp
195 ] wide-rep <rep-array> unrolled-2map-as-unsafe underlying>> ;
196 SIMD-INTRINSIC: (simd-v/) ( a b rep -- c ) [ native/ ] components-2map ;
197 SIMD-INTRINSIC: (simd-vavg) ( a b rep -- c )
198 [ + dup integer? [ 1 + -1 shift ] [ 0.5 * ] if ] components-2map ;
199 SIMD-INTRINSIC: (simd-vmin) ( a b rep -- c ) [ min ] components-2map ;
200 SIMD-INTRINSIC: (simd-vmax) ( a b rep -- c ) [ max ] components-2map ;
202 SIMD-INTRINSIC: (simd-v.) ( a b rep -- n )
203 [ 2>rep-array [ [ first ] bi@ * ] 2keep ] keep
204 1 swap rep-length [a,b) [ '[ _ swap nth-unsafe ] bi@ * + ] with with each ;
205 SIMD-INTRINSIC: (simd-vsqrt) ( a rep -- c ) [ fsqrt ] components-map ;
206 SIMD-INTRINSIC: (simd-vsad) ( a b rep -- c ) 2>rep-array [ - abs ] [ + ] 2map-reduce ;
207 SIMD-INTRINSIC: (simd-sum) ( a rep -- n ) [ + ] components-reduce ;
208 SIMD-INTRINSIC: (simd-vabs) ( a rep -- c ) [ abs ] components-map ;
209 SIMD-INTRINSIC: (simd-vbitand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
210 SIMD-INTRINSIC: (simd-vbitandn) ( a b rep -- c ) [ [ bitnot ] dip bitand ] bitwise-components-2map ;
211 SIMD-INTRINSIC: (simd-vbitor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
212 SIMD-INTRINSIC: (simd-vbitxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
213 SIMD-INTRINSIC: (simd-vbitnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ;
214 SIMD-INTRINSIC: (simd-vand) ( a b rep -- c ) [ bitand ] bitwise-components-2map ;
215 SIMD-INTRINSIC: (simd-vandn) ( a b rep -- c ) [ [ bitnot ] dip bitand ] bitwise-components-2map ;
216 SIMD-INTRINSIC: (simd-vor) ( a b rep -- c ) [ bitor ] bitwise-components-2map ;
217 SIMD-INTRINSIC: (simd-vxor) ( a b rep -- c ) [ bitxor ] bitwise-components-2map ;
218 SIMD-INTRINSIC: (simd-vnot) ( a rep -- c ) [ bitnot ] bitwise-components-map ;
219 SIMD-INTRINSIC: (simd-vlshift) ( a n rep -- c ) swap '[ _ shift ] bitwise-components-map ;
220 SIMD-INTRINSIC: (simd-vrshift) ( a n rep -- c ) swap '[ _ neg shift ] bitwise-components-map ;
222 SIMD-INTRINSIC: (simd-hlshift) ( a n rep -- c )
223 drop head-slice* 16 0 pad-head ;
225 SIMD-INTRINSIC: (simd-hrshift) ( a n rep -- c )
226 drop tail-slice 16 0 pad-tail ;
227 SIMD-INTRINSIC: (simd-vshuffle-elements) ( a n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle) ;
228 SIMD-INTRINSIC: (simd-vshuffle2-elements) ( a b n rep -- c ) [ rep-length 0 pad-tail ] keep (vshuffle2) ;
229 SIMD-INTRINSIC: (simd-vshuffle-bytes) ( a b rep -- c ) drop uchar-16-rep (vshuffle) ;
230 SIMD-INTRINSIC:: (simd-vmerge-head) ( a b rep -- c )
231 a b rep 2>rep-array :> ( a' b' )
232 rep <rep-array> :> c'
233 rep rep-length 2 /i [| n |
234 n a' nth-unsafe n 2 * c' set-nth-unsafe
235 n b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
236 ] unrolled-each-integer
238 SIMD-INTRINSIC:: (simd-vmerge-tail) ( a b rep -- c )
239 a b rep 2>rep-array :> ( a' b' )
240 rep <rep-array> :> c'
241 rep rep-length 2 /i :> len
243 n len + a' nth-unsafe n 2 * c' set-nth-unsafe
244 n len + b' nth-unsafe n 2 * 1 + c' set-nth-unsafe
245 ] unrolled-each-integer
247 SIMD-INTRINSIC: (simd-v<=) ( a b rep -- c )
248 dup rep-tf-values '[ <= _ _ ? ] components-2map ;
249 SIMD-INTRINSIC: (simd-v<) ( a b rep -- c )
250 dup rep-tf-values '[ < _ _ ? ] components-2map ;
251 SIMD-INTRINSIC: (simd-v=) ( a b rep -- c )
252 dup rep-tf-values '[ = _ _ ? ] components-2map ;
253 SIMD-INTRINSIC: (simd-v>) ( a b rep -- c )
254 dup rep-tf-values '[ > _ _ ? ] components-2map ;
255 SIMD-INTRINSIC: (simd-v>=) ( a b rep -- c )
256 dup rep-tf-values '[ >= _ _ ? ] components-2map ;
257 SIMD-INTRINSIC: (simd-vunordered?) ( a b rep -- c )
258 dup rep-tf-values '[ unordered? _ _ ? ] components-2map ;
259 SIMD-INTRINSIC: (simd-vany?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? not ;
260 SIMD-INTRINSIC: (simd-vall?) ( a rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ;
261 SIMD-INTRINSIC: (simd-vnone?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? ;
262 SIMD-INTRINSIC: (simd-v>float) ( a rep -- c )
263 [ [ >rep-array ] [ rep-length ] bi [ >float ] ]
264 [ >float-vector-rep <rep-array> ] bi unrolled-map-as-unsafe underlying>> ;
265 SIMD-INTRINSIC: (simd-v>integer) ( a rep -- c )
266 [ [ >rep-array ] [ rep-length ] bi [ >integer ] ]
267 [ >int-vector-rep <rep-array> ] bi unrolled-map-as-unsafe underlying>> ;
268 SIMD-INTRINSIC: (simd-vpack-signed) ( a b rep -- c )
269 [ [ 2>rep-array cord-append ] [ rep-length 2 * ] bi ]
270 [ narrow-vector-rep [ <rep-array> ] [ rep-component-type ] bi ] bi
271 '[ _ c:c-type-clamp ] swap unrolled-map-as-unsafe underlying>> ;
272 SIMD-INTRINSIC: (simd-vpack-unsigned) ( a b rep -- c )
273 [ [ 2>rep-array cord-append ] [ rep-length 2 * ] bi ]
274 [ narrow-vector-rep >uint-vector-rep [ <rep-array> ] [ rep-component-type ] bi ] bi
275 '[ _ c:c-type-clamp ] swap unrolled-map-as-unsafe underlying>> ;
277 SIMD-INTRINSIC: (simd-vunpack-head) ( a rep -- c )
278 [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
279 [ head-slice ] dip call( a' -- c' ) underlying>> ;
281 SIMD-INTRINSIC: (simd-vunpack-tail) ( a rep -- c )
282 [ >rep-array ] [ widen-vector-rep [ rep-length ] [ [>rep-array] ] bi ] bi
283 [ tail-slice ] dip call( a' -- c' ) underlying>> ;
285 SIMD-INTRINSIC: (simd-with) ( n rep -- v )
286 [ rep-length swap '[ _ ] ] [ <rep-array> ] bi replicate-as
288 SIMD-INTRINSIC: (simd-gather-2) ( m n rep -- v ) <rep-array> [ 2 set-firstn-unsafe ] keep underlying>> ;
289 SIMD-INTRINSIC: (simd-gather-4) ( m n o p rep -- v ) <rep-array> [ 4 set-firstn-unsafe ] keep underlying>> ;
290 SIMD-INTRINSIC: (simd-select) ( a n rep -- x ) [ swap ] dip >rep-array nth-unsafe ;
292 SIMD-INTRINSIC: alien-vector ( c-ptr n rep -- value )
293 [ swap <displaced-alien> ] dip rep-size memory>byte-array ;
294 SIMD-INTRINSIC: set-alien-vector ( value c-ptr n rep -- )
295 [ swap <displaced-alien> swap ] dip rep-size memcpy ;
297 "compiler.cfg.intrinsics.simd" require
298 "compiler.tree.propagation.simd" require
299 "compiler.cfg.value-numbering.simd" require