1 ! Copyright (C) 2009 Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors alien byte-arrays fry classes.algebra
4 cpu.architecture kernel math sequences math.vectors
5 math.vectors.simd macros generalizations combinators
6 combinators.short-circuit arrays locals
7 compiler.tree.propagation.info compiler.cfg.builder.blocks
8 compiler.cfg.comparisons
9 compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
10 compiler.cfg.instructions compiler.cfg.registers
11 compiler.cfg.intrinsics.alien
13 FROM: alien.c-types => heap-size char short int longlong float double ;
14 SPECIALIZED-ARRAYS: char short int longlong float double ;
15 IN: compiler.cfg.intrinsics.simd
19 : ^load-neg-zero-vector ( rep -- dst )
21 { float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-constant ] }
22 { double-2-rep [ double-array{ -0.0 -0.0 } underlying>> ^^load-constant ] }
25 : ^load-add-sub-vector ( rep -- dst )
27 { float-4-rep [ float-array{ -0.0 0.0 -0.0 0.0 } underlying>> ^^load-constant ] }
28 { double-2-rep [ double-array{ -0.0 0.0 } underlying>> ^^load-constant ] }
29 { char-16-rep [ char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] }
30 { short-8-rep [ short-array{ -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] }
31 { int-4-rep [ int-array{ -1 0 -1 0 } underlying>> ^^load-constant ] }
32 { longlong-2-rep [ longlong-array{ -1 0 } underlying>> ^^load-constant ] }
35 : >variable-shuffle ( shuffle rep -- shuffle' )
36 rep-component-type heap-size
37 [ dup <repetition> >byte-array ]
38 [ iota >byte-array ] bi
39 '[ _ n*v _ v+ ] map concat ;
41 : ^load-immediate-shuffle ( shuffle rep -- dst )
42 >variable-shuffle ^^load-constant ;
44 :: ^blend-vector ( mask true false rep -- dst )
45 true mask rep ^^and-vector
46 mask false rep ^^andn-vector
49 : ^compare-vector ( src1 src2 rep cc -- dst )
52 : ^widened-shr-vector-imm ( src shift rep -- dst )
53 widen-vector-rep ^^shr-vector-imm ;
57 : emit-simd-v+ ( node -- )
62 : emit-simd-v- ( node -- )
67 : emit-simd-vneg ( node -- )
69 { float-vector-rep [ [ ^load-neg-zero-vector ] [ ^^sub-vector ] bi ] }
70 { int-vector-rep [ [ ^^zero-vector ] [ ^^sub-vector ] bi ] }
73 : emit-simd-v+- ( node -- )
76 { float-vector-rep [| src1 src2 rep |
77 rep ^load-add-sub-vector :> signs
78 src2 signs rep ^^xor-vector :> src2'
79 src1 src2' rep ^^add-vector
81 { int-vector-rep [| src1 src2 rep |
82 rep ^load-add-sub-vector :> signs
83 src2 signs rep ^^xor-vector :> src2'
84 src2' signs rep ^^sub-vector :> src2''
85 src1 src2'' rep ^^add-vector
89 : emit-simd-vs+ ( node -- )
91 { float-vector-rep [ ^^add-vector ] }
92 { int-vector-rep [ ^^saturated-add-vector ] }
95 : emit-simd-vs- ( node -- )
97 { float-vector-rep [ ^^sub-vector ] }
98 { int-vector-rep [ ^^saturated-sub-vector ] }
101 : emit-simd-vs* ( node -- )
103 { float-vector-rep [ ^^mul-vector ] }
104 { int-vector-rep [ ^^saturated-mul-vector ] }
105 } emit-vv-vector-op ;
107 : emit-simd-v* ( node -- )
110 } emit-vv-vector-op ;
112 : emit-simd-v/ ( node -- )
115 } emit-vv-vector-op ;
117 : emit-simd-vmin ( node -- )
121 [ cc< ^compare-vector ]
122 [ ^blend-vector ] 3bi
124 } emit-vv-vector-op ;
126 : emit-simd-vmax ( node -- )
130 [ cc> ^compare-vector ]
131 [ ^blend-vector ] 3bi
133 } emit-vv-vector-op ;
135 : emit-simd-v. ( node -- )
138 { float-vector-rep [| src1 src2 rep |
141 { int-vector-rep [| src1 src2 rep |
144 } emit-vv-vector-op ;
146 : emit-simd-vsqrt ( node -- )
151 : emit-simd-sum ( node -- )
154 : emit-simd-vabs ( node -- )
156 { unsigned-int-vector-rep [ drop ] }
158 { float-vector-rep [ [ ^load-neg-zero-vector ] [ swapd ^^andn-vector ] bi ] }
159 { int-vector-rep [| src rep |
160 rep ^^zero-vector :> zero
161 zero src rep ^^sub-vector :> -src
162 zero src rep cc> ^compare-vector :> sign
163 sign -src src rep ^blend-vector
167 : emit-simd-vand ( node -- )
170 } emit-vv-vector-op ;
172 : emit-simd-vandn ( node -- )
175 } emit-vv-vector-op ;
177 : emit-simd-vor ( node -- )
180 } emit-vv-vector-op ;
182 : emit-simd-vxor ( node -- )
185 } emit-vv-vector-op ;
187 : emit-simd-vnot ( node -- )
190 [ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
193 : emit-simd-vlshift ( node -- )
198 } emit-vn-or-vl-vector-op ;
200 : emit-simd-vrshift ( node -- )
205 } emit-vn-or-vl-vector-op ;
207 : emit-simd-hlshift ( node -- )
209 [ ^^horizontal-shl-vector-imm ]
210 } emit-vl-vector-op ;
212 : emit-simd-hrshift ( node -- )
214 [ ^^horizontal-shr-vector-imm ]
215 } emit-vl-vector-op ;
217 : emit-simd-vshuffle-elements ( node -- )
219 [ ^^shuffle-vector-imm ]
220 [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] ]
221 } emit-vl-vector-op ;
223 : emit-simd-vshuffle-bytes ( node -- )
226 } emit-vv-vector-op ;
228 : emit-simd-vmerge-head ( node -- )
230 [ ^^merge-vector-head ]
231 } emit-vv-vector-op ;
233 : emit-simd-vmerge-tail ( node -- )
235 [ ^^merge-vector-tail ]
236 } emit-vv-vector-op ;
238 : emit-simd-v<= ( node -- )
239 [ cc<= ^compare-vector ] (emit-vv-vector-op) ;
240 : emit-simd-v< ( node -- )
241 [ cc< ^compare-vector ] (emit-vv-vector-op) ;
242 : emit-simd-v= ( node -- )
243 [ cc= ^compare-vector ] (emit-vv-vector-op) ;
244 : emit-simd-v> ( node -- )
245 [ cc> ^compare-vector ] (emit-vv-vector-op) ;
246 : emit-simd-v>= ( node -- )
247 [ cc>= ^compare-vector ] (emit-vv-vector-op) ;
248 : emit-simd-vunordered? ( node -- )
249 [ cc/<>= ^compare-vector ] (emit-vv-vector-op) ;
251 : emit-simd-vany? ( node -- )
252 [ vcc-any ^test-vector ] (emit-vv-vector-op) ;
253 : emit-simd-vall? ( node -- )
254 [ vcc-all ^test-vector ] (emit-vv-vector-op) ;
255 : emit-simd-vnone? ( node -- )
256 [ vcc-none ^test-vector ] (emit-vv-vector-op) ;
258 : emit-simd-v>float ( node -- )
260 { float-vector-rep [ drop ] }
261 { int-vector-rep [ ^^integer>float-vector ] }
262 } emit-vv-vector-op ;
264 : emit-simd-v>integer ( node -- )
266 { float-vector-rep [ ^^float>integer-vector ] }
267 { int-vector-rep [ dup ] }
268 } emit-vv-vector-op ;
270 : emit-simd-vpack-signed ( node -- )
272 [ ^^signed-pack-vector ]
273 } emit-vv-vector-op ;
275 : emit-simd-vpack-unsigned ( node -- )
277 [ ^^unsigned-pack-vector ]
278 } emit-vv-vector-op ;
280 ! XXX shr vector rep is widened!
281 : emit-simd-vunpack-head ( node -- )
283 [ ^^unpack-vector-head ]
284 { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] }
285 { signed-int-vector-rep [| src rep |
286 src src rep ^^merge-vector-head :> merged
287 rep rep-component-type heap-size 8 * :> bits
288 merged bits rep ^widened-shr-vector-imm
290 { signed-int-vector-rep [| src rep |
291 rep ^^zero-vector :> zero
292 zero src rep cc> ^compare-vector :> sign
293 src sign rep ^^merge-vector-head
297 : emit-simd-vunpack-tail ( node -- )
299 [ ^^unpack-vector-tail ]
300 [ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ]
301 { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] }
302 { signed-int-vector-rep [| src rep |
303 src src rep ^^merge-vector-tail :> merged
304 rep rep-component-type heap-size 8 * :> bits
305 merged bits rep widen-vector-rep ^widened-shr-vector-imm
307 { signed-int-vector-rep [| src rep |
308 rep ^^zero-vector :> zero
309 zero src rep cc> ^compare-vector :> sign
310 src sign rep ^^merge-vector-tail
314 : emit-simd-with ( node -- )
315 : emit-simd-gather-2 ( node -- )
316 : emit-simd-gather-4 ( node -- )
317 : emit-simd-select ( node -- )
318 : emit-alien-vector ( node -- )
319 : emit-set-alien-vector ( node -- )
320 : emit-alien-vector-aligned ( node -- )
321 : emit-set-alien-vector-aligned ( node -- )
325 { (simd-v+) [ emit-simd-v+ ] }
326 { (simd-v-) [ emit-simd-v- ] }
327 { (simd-vneg) [ emit-simd-vneg ] }
328 { (simd-v+-) [ emit-simd-v+- ] }
329 { (simd-vs+) [ emit-simd-vs+ ] }
330 { (simd-vs-) [ emit-simd-vs- ] }
331 { (simd-vs*) [ emit-simd-vs* ] }
332 { (simd-v*) [ emit-simd-v* ] }
333 { (simd-v/) [ emit-simd-v/ ] }
334 { (simd-vmin) [ emit-simd-vmin ] }
335 { (simd-vmax) [ emit-simd-vmax ] }
336 { (simd-v.) [ emit-simd-v. ] }
337 { (simd-vsqrt) [ emit-simd-vsqrt ] }
338 { (simd-sum) [ emit-simd-sum ] }
339 { (simd-vabs) [ emit-simd-vabs ] }
340 { (simd-vbitand) [ emit-simd-vand ] }
341 { (simd-vbitandn) [ emit-simd-vandn ] }
342 { (simd-vbitor) [ emit-simd-vor ] }
343 { (simd-vbitxor) [ emit-simd-vxor ] }
344 { (simd-vbitnot) [ emit-simd-vnot ] }
345 { (simd-vand) [ emit-simd-vand ] }
346 { (simd-vandn) [ emit-simd-vandn ] }
347 { (simd-vor) [ emit-simd-vor ] }
348 { (simd-vxor) [ emit-simd-vxor ] }
349 { (simd-vnot) [ emit-simd-vnot ] }
350 { (simd-vlshift) [ emit-simd-vlshift ] }
351 { (simd-vrshift) [ emit-simd-vrshift ] }
352 { (simd-hlshift) [ emit-simd-hlshift ] }
353 { (simd-hrshift) [ emit-simd-hrshift ] }
354 { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] }
355 { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] }
356 { (simd-vmerge-head) [ emit-simd-vmerge-head ] }
357 { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] }
358 { (simd-v<=) [ emit-simd-v<= ] }
359 { (simd-v<) [ emit-simd-v< ] }
360 { (simd-v=) [ emit-simd-v= ] }
361 { (simd-v>) [ emit-simd-v> ] }
362 { (simd-v>=) [ emit-simd-v>= ] }
363 { (simd-vunordered?) [ emit-simd-vunordered? ] }
364 { (simd-vany?) [ emit-simd-vany? ] }
365 { (simd-vall?) [ emit-simd-vall? ] }
366 { (simd-vnone?) [ emit-simd-vnone? ] }
367 { (simd-v>float) [ emit-simd-v>float ] }
368 { (simd-v>integer) [ emit-simd-v>integer ] }
369 { (simd-vpack-signed) [ emit-simd-vpack-signed ] }
370 { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] }
371 { (simd-vunpack-head) [ emit-simd-vunpack-head ] }
372 { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] }
373 { (simd-with) [ emit-simd-with ] }
374 { (simd-gather-2) [ emit-simd-gather-2 ] }
375 { (simd-gather-4) [ emit-simd-gather-4 ] }
376 { (simd-select) [ emit-simd-select ] }
377 { alien-vector [ emit-alien-vector ] }
378 { set-alien-vector [ emit-set-alien-vector ] }
379 { alien-vector-aligned [ emit-alien-vector ] }
380 { set-alien-vector-aligned [ emit-set-alien-vector ] }
381 } enable-intrinsics ;