1 ! Copyright (C) 2009 Slava Pestov, Joe Groff.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors alien alien.c-types byte-arrays fry
4 classes.algebra cpu.architecture kernel layouts math sequences
5 math.vectors math.vectors.simd.intrinsics
6 macros generalizations combinators combinators.short-circuit
7 arrays locals compiler.tree.propagation.info
8 compiler.cfg.builder.blocks
9 compiler.cfg.comparisons
10 compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
11 compiler.cfg.instructions compiler.cfg.registers
12 compiler.cfg.intrinsics
13 compiler.cfg.intrinsics.alien
14 compiler.cfg.intrinsics.simd.backend
16 FROM: alien.c-types => heap-size char short int longlong float double ;
17 SPECIALIZED-ARRAYS: char uchar short ushort int uint longlong ulonglong float double ;
18 IN: compiler.cfg.intrinsics.simd
22 : sign-bit-mask ( rep -- byte-array )
24 { char-16-rep [ uchar-array{
25 HEX: 80 HEX: 80 HEX: 80 HEX: 80
26 HEX: 80 HEX: 80 HEX: 80 HEX: 80
27 HEX: 80 HEX: 80 HEX: 80 HEX: 80
28 HEX: 80 HEX: 80 HEX: 80 HEX: 80
30 { short-8-rep [ ushort-array{
31 HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000
32 HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000
34 { int-4-rep [ uint-array{
35 HEX: 8000,0000 HEX: 8000,0000
36 HEX: 8000,0000 HEX: 8000,0000
38 { longlong-2-rep [ ulonglong-array{
39 HEX: 8000,0000,0000,0000
40 HEX: 8000,0000,0000,0000
44 : ^load-neg-zero-vector ( rep -- dst )
46 { float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-constant ] }
47 { double-2-rep [ double-array{ -0.0 -0.0 } underlying>> ^^load-constant ] }
50 : ^load-add-sub-vector ( rep -- dst )
52 { float-4-rep [ float-array{ -0.0 0.0 -0.0 0.0 } underlying>> ^^load-constant ] }
53 { double-2-rep [ double-array{ -0.0 0.0 } underlying>> ^^load-constant ] }
54 { char-16-rep [ char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] }
55 { short-8-rep [ short-array{ -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] }
56 { int-4-rep [ int-array{ -1 0 -1 0 } underlying>> ^^load-constant ] }
57 { longlong-2-rep [ longlong-array{ -1 0 } underlying>> ^^load-constant ] }
60 : >variable-shuffle ( shuffle rep -- shuffle' )
61 rep-component-type heap-size
62 [ dup <repetition> >byte-array ]
63 [ iota >byte-array ] bi
64 '[ _ n*v _ v+ ] map concat ;
66 : ^load-immediate-shuffle ( shuffle rep -- dst )
67 >variable-shuffle ^^load-constant ;
69 :: ^blend-vector ( mask true false rep -- dst )
70 true mask rep ^^and-vector
71 mask false rep ^^andn-vector
74 : ^not-vector ( src rep -- dst )
77 [ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
80 :: ^((compare-vector)) ( src1 src2 rep {cc,swap} -- dst )
81 {cc,swap} first2 :> ( cc swap? )
83 [ src2 src1 rep cc ^^compare-vector ]
84 [ src1 src2 rep cc ^^compare-vector ] if ;
86 :: ^(compare-vector) ( src1 src2 rep orig-cc -- dst )
87 rep orig-cc %compare-vector-ccs :> ( ccs not? )
90 [ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
92 ccs unclip :> ( rest-ccs first-cc )
93 src1 src2 rep first-cc ^((compare-vector)) :> first-dst
96 [ [ src1 src2 rep ] dip ^((compare-vector)) rep ^^or-vector ]
99 not? [ rep ^not-vector ] when
102 :: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
104 { cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^(compare-vector) ] }
105 { cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^(compare-vector) ] }
106 { cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^(compare-vector) ] }
107 { cc>= [ src1 src2 rep ^^max-vector src1 rep cc= ^(compare-vector) ] }
110 : ^compare-vector ( src1 src2 rep cc -- dst )
112 [ ^(compare-vector) ]
113 [ ^minmax-compare-vector ]
114 { unsigned-int-vector-rep [| src1 src2 rep cc |
115 rep sign-bit-mask ^^load-constant :> sign-bits
116 src1 sign-bits rep ^^xor-vector
117 src2 sign-bits rep ^^xor-vector
118 rep signed-rep cc ^(compare-vector)
122 : ^unpack-vector-head ( src rep -- dst )
124 [ ^^unpack-vector-head ]
125 { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] }
126 { signed-int-vector-rep [| src rep |
127 src src rep ^^merge-vector-head :> merged
128 rep rep-component-type heap-size 8 * :> bits
129 merged bits rep widen-vector-rep ^^shr-vector-imm
131 { signed-int-vector-rep [| src rep |
132 rep ^^zero-vector :> zero
133 zero src rep cc> ^compare-vector :> sign
134 src sign rep ^^merge-vector-head
138 : ^unpack-vector-tail ( src rep -- dst )
140 [ ^^unpack-vector-tail ]
141 [ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ]
142 { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] }
143 { signed-int-vector-rep [| src rep |
144 src src rep ^^merge-vector-tail :> merged
145 rep rep-component-type heap-size 8 * :> bits
146 merged bits rep widen-vector-rep ^^shr-vector-imm
148 { signed-int-vector-rep [| src rep |
149 rep ^^zero-vector :> zero
150 zero src rep cc> ^compare-vector :> sign
151 src sign rep ^^merge-vector-tail
155 PREDICATE: fixnum-vector-rep < int-vector-rep
156 rep-component-type heap-size cell < ;
158 : ^(sum-vector-2) ( src rep -- dst )
160 [ dupd ^^horizontal-add-vector ]
162 src src rep ^^merge-vector-head :> head
163 src src rep ^^merge-vector-tail :> tail
164 head tail rep ^^add-vector
168 : ^(sum-vector-4) ( src rep -- dst )
171 [ dupd ^^horizontal-add-vector ]
172 [ dupd ^^horizontal-add-vector ] bi
175 src src rep ^^merge-vector-head :> head
176 src src rep ^^merge-vector-tail :> tail
177 head tail rep ^^add-vector :> src'
179 rep widen-vector-rep :> rep'
180 src' src' rep' ^^merge-vector-head :> head'
181 src' src' rep' ^^merge-vector-tail :> tail'
182 head' tail' rep ^^add-vector
186 : ^(sum-vector-8) ( src rep -- dst )
189 [ dupd ^^horizontal-add-vector ]
190 [ dupd ^^horizontal-add-vector ]
191 [ dupd ^^horizontal-add-vector ] tri
194 src src rep ^^merge-vector-head :> head
195 src src rep ^^merge-vector-tail :> tail
196 head tail rep ^^add-vector :> src'
198 rep widen-vector-rep :> rep'
199 src' src' rep' ^^merge-vector-head :> head'
200 src' src' rep' ^^merge-vector-tail :> tail'
201 head' tail' rep ^^add-vector :> src''
203 rep' widen-vector-rep :> rep''
204 src'' src'' rep'' ^^merge-vector-head :> head''
205 src'' src'' rep'' ^^merge-vector-tail :> tail''
206 head'' tail'' rep ^^add-vector
210 : ^(sum-vector-16) ( src rep -- dst )
214 [ dupd ^^horizontal-add-vector ]
215 [ dupd ^^horizontal-add-vector ]
216 [ dupd ^^horizontal-add-vector ]
217 [ dupd ^^horizontal-add-vector ]
221 src src rep ^^merge-vector-head :> head
222 src src rep ^^merge-vector-tail :> tail
223 head tail rep ^^add-vector :> src'
225 rep widen-vector-rep :> rep'
226 src' src' rep' ^^merge-vector-head :> head'
227 src' src' rep' ^^merge-vector-tail :> tail'
228 head' tail' rep ^^add-vector :> src''
230 rep' widen-vector-rep :> rep''
231 src'' src'' rep'' ^^merge-vector-head :> head''
232 src'' src'' rep'' ^^merge-vector-tail :> tail''
233 head'' tail'' rep ^^add-vector :> src'''
235 rep'' widen-vector-rep :> rep'''
236 src''' src''' rep''' ^^merge-vector-head :> head'''
237 src''' src''' rep''' ^^merge-vector-tail :> tail'''
238 head''' tail''' rep ^^add-vector
242 : ^(sum-vector) ( src rep -- dst )
245 { 2 [ ^(sum-vector-2) ] }
246 { 4 [ ^(sum-vector-4) ] }
247 { 8 [ ^(sum-vector-8) ] }
248 { 16 [ ^(sum-vector-16) ] }
250 ] [ ^^vector>scalar ] bi ;
252 : ^sum-vector ( src rep -- dst )
254 { float-vector-rep [ ^(sum-vector) ] }
255 { fixnum-vector-rep [| src rep |
256 src rep ^unpack-vector-head :> head
257 src rep ^unpack-vector-tail :> tail
258 rep widen-vector-rep :> wide-rep
259 head tail wide-rep ^^add-vector wide-rep
264 : shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
266 : ^shuffle-vector-imm ( src1 shuffle rep -- dst )
267 [ rep-length 0 pad-tail ] keep {
268 [ ^^shuffle-vector-imm ]
269 [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ]
272 : ^broadcast-vector ( src n rep -- dst )
273 [ rep-length swap <array> ] keep
274 ^shuffle-vector-imm ;
276 : ^with-vector ( src rep -- dst )
277 [ ^^scalar>vector ] keep [ 0 ] dip ^broadcast-vector ;
279 : ^select-vector ( src n rep -- dst )
280 [ ^broadcast-vector ] keep ^^vector>scalar ;
284 : emit-simd-v+ ( node -- )
287 } emit-vv-vector-op ;
289 : emit-simd-v- ( node -- )
292 } emit-vv-vector-op ;
294 : emit-simd-vneg ( node -- )
296 { float-vector-rep [ [ ^load-neg-zero-vector swap ] [ ^^sub-vector ] bi ] }
297 { int-vector-rep [ [ ^^zero-vector swap ] [ ^^sub-vector ] bi ] }
300 : emit-simd-v+- ( node -- )
303 { float-vector-rep [| src1 src2 rep |
304 rep ^load-add-sub-vector :> signs
305 src2 signs rep ^^xor-vector :> src2'
306 src1 src2' rep ^^add-vector
308 { int-vector-rep [| src1 src2 rep |
309 rep ^load-add-sub-vector :> signs
310 src2 signs rep ^^xor-vector :> src2'
311 src2' signs rep ^^sub-vector :> src2''
312 src1 src2'' rep ^^add-vector
314 } emit-vv-vector-op ;
316 : emit-simd-vs+ ( node -- )
318 { float-vector-rep [ ^^add-vector ] }
319 { int-vector-rep [ ^^saturated-add-vector ] }
320 } emit-vv-vector-op ;
322 : emit-simd-vs- ( node -- )
324 { float-vector-rep [ ^^sub-vector ] }
325 { int-vector-rep [ ^^saturated-sub-vector ] }
326 } emit-vv-vector-op ;
328 : emit-simd-vs* ( node -- )
330 { float-vector-rep [ ^^mul-vector ] }
331 { int-vector-rep [ ^^saturated-mul-vector ] }
332 } emit-vv-vector-op ;
334 : emit-simd-v* ( node -- )
337 } emit-vv-vector-op ;
339 : emit-simd-v/ ( node -- )
342 } emit-vv-vector-op ;
344 : emit-simd-vmin ( node -- )
348 [ cc< ^compare-vector ]
349 [ ^blend-vector ] 3bi
351 } emit-vv-vector-op ;
353 : emit-simd-vmax ( node -- )
357 [ cc> ^compare-vector ]
358 [ ^blend-vector ] 3bi
360 } emit-vv-vector-op ;
362 : emit-simd-v. ( node -- )
365 { float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
366 } emit-vv-vector-op ;
368 : emit-simd-vsqrt ( node -- )
373 : emit-simd-sum ( node -- )
378 : emit-simd-vabs ( node -- )
380 { unsigned-int-vector-rep [ drop ] }
382 { float-vector-rep [ [ ^load-neg-zero-vector ] [ swapd ^^andn-vector ] bi ] }
383 { int-vector-rep [| src rep |
384 rep ^^zero-vector :> zero
385 zero src rep ^^sub-vector :> -src
386 zero src rep cc> ^compare-vector :> sign
387 sign -src src rep ^blend-vector
391 : emit-simd-vand ( node -- )
394 } emit-vv-vector-op ;
396 : emit-simd-vandn ( node -- )
399 } emit-vv-vector-op ;
401 : emit-simd-vor ( node -- )
404 } emit-vv-vector-op ;
406 : emit-simd-vxor ( node -- )
409 } emit-vv-vector-op ;
411 : emit-simd-vnot ( node -- )
416 : emit-simd-vlshift ( node -- )
421 } [ integer? ] emit-vv-or-vl-vector-op ;
423 : emit-simd-vrshift ( node -- )
428 } [ integer? ] emit-vv-or-vl-vector-op ;
430 : emit-simd-hlshift ( node -- )
432 [ ^^horizontal-shl-vector-imm ]
433 } [ integer? ] emit-vl-vector-op ;
435 : emit-simd-hrshift ( node -- )
437 [ ^^horizontal-shr-vector-imm ]
438 } [ integer? ] emit-vl-vector-op ;
440 : emit-simd-vshuffle-elements ( node -- )
442 [ ^shuffle-vector-imm ]
443 } [ shuffle? ] emit-vl-vector-op ;
445 : emit-simd-vshuffle-bytes ( node -- )
448 } emit-vv-vector-op ;
450 : emit-simd-vmerge-head ( node -- )
452 [ ^^merge-vector-head ]
453 } emit-vv-vector-op ;
455 : emit-simd-vmerge-tail ( node -- )
457 [ ^^merge-vector-tail ]
458 } emit-vv-vector-op ;
460 : emit-simd-v<= ( node -- )
462 [ cc<= ^compare-vector ]
463 } emit-vv-vector-op ;
464 : emit-simd-v< ( node -- )
466 [ cc< ^compare-vector ]
467 } emit-vv-vector-op ;
468 : emit-simd-v= ( node -- )
470 [ cc= ^compare-vector ]
471 } emit-vv-vector-op ;
472 : emit-simd-v> ( node -- )
474 [ cc> ^compare-vector ]
475 } emit-vv-vector-op ;
476 : emit-simd-v>= ( node -- )
478 [ cc>= ^compare-vector ]
479 } emit-vv-vector-op ;
480 : emit-simd-vunordered? ( node -- )
482 [ cc/<>= ^compare-vector ]
483 } emit-vv-vector-op ;
485 : emit-simd-vany? ( node -- )
487 [ vcc-any ^^test-vector ]
489 : emit-simd-vall? ( node -- )
491 [ vcc-all ^^test-vector ]
493 : emit-simd-vnone? ( node -- )
495 [ vcc-none ^^test-vector ]
498 : emit-simd-v>float ( node -- )
500 { float-vector-rep [ drop ] }
501 { int-vector-rep [ ^^integer>float-vector ] }
504 : emit-simd-v>integer ( node -- )
506 { float-vector-rep [ ^^float>integer-vector ] }
507 { int-vector-rep [ drop ] }
510 : emit-simd-vpack-signed ( node -- )
512 [ ^^signed-pack-vector ]
513 } emit-vv-vector-op ;
515 : emit-simd-vpack-unsigned ( node -- )
517 [ ^^unsigned-pack-vector ]
518 } emit-vv-vector-op ;
520 : emit-simd-vunpack-head ( node -- )
522 [ ^unpack-vector-head ]
525 : emit-simd-vunpack-tail ( node -- )
527 [ ^unpack-vector-tail ]
530 : emit-simd-with ( node -- )
532 { fixnum-vector-rep [ ^with-vector ] }
533 { float-vector-rep [ ^with-vector ] }
536 : emit-simd-gather-2 ( node -- )
538 { fixnum-vector-rep [ ^^gather-vector-2 ] }
539 { float-vector-rep [ ^^gather-vector-2 ] }
540 } emit-vv-vector-op ;
542 : emit-simd-gather-4 ( node -- )
544 { fixnum-vector-rep [ ^^gather-vector-4 ] }
545 { float-vector-rep [ ^^gather-vector-4 ] }
546 } emit-vvvv-vector-op ;
548 : emit-simd-select ( node -- )
550 { fixnum-vector-rep [ ^select-vector ] }
551 { float-vector-rep [ ^select-vector ] }
552 } [ integer? ] emit-vl-vector-op ;
554 : emit-alien-vector ( node -- )
557 ds-drop prepare-alien-getter
558 _ ^^alien-vector ds-push
560 [ inline-alien-getter? ] inline-alien
561 ] with { [ %alien-vector-reps member? ] } if-literals-match ;
563 : emit-set-alien-vector ( node -- )
566 ds-drop prepare-alien-setter ds-pop
569 [ byte-array inline-alien-setter? ]
571 ] with { [ %alien-vector-reps member? ] } if-literals-match ;
575 { (simd-v+) [ emit-simd-v+ ] }
576 { (simd-v-) [ emit-simd-v- ] }
577 { (simd-vneg) [ emit-simd-vneg ] }
578 { (simd-v+-) [ emit-simd-v+- ] }
579 { (simd-vs+) [ emit-simd-vs+ ] }
580 { (simd-vs-) [ emit-simd-vs- ] }
581 { (simd-vs*) [ emit-simd-vs* ] }
582 { (simd-v*) [ emit-simd-v* ] }
583 { (simd-v/) [ emit-simd-v/ ] }
584 { (simd-vmin) [ emit-simd-vmin ] }
585 { (simd-vmax) [ emit-simd-vmax ] }
586 { (simd-v.) [ emit-simd-v. ] }
587 { (simd-vsqrt) [ emit-simd-vsqrt ] }
588 { (simd-sum) [ emit-simd-sum ] }
589 { (simd-vabs) [ emit-simd-vabs ] }
590 { (simd-vbitand) [ emit-simd-vand ] }
591 { (simd-vbitandn) [ emit-simd-vandn ] }
592 { (simd-vbitor) [ emit-simd-vor ] }
593 { (simd-vbitxor) [ emit-simd-vxor ] }
594 { (simd-vbitnot) [ emit-simd-vnot ] }
595 { (simd-vand) [ emit-simd-vand ] }
596 { (simd-vandn) [ emit-simd-vandn ] }
597 { (simd-vor) [ emit-simd-vor ] }
598 { (simd-vxor) [ emit-simd-vxor ] }
599 { (simd-vnot) [ emit-simd-vnot ] }
600 { (simd-vlshift) [ emit-simd-vlshift ] }
601 { (simd-vrshift) [ emit-simd-vrshift ] }
602 { (simd-hlshift) [ emit-simd-hlshift ] }
603 { (simd-hrshift) [ emit-simd-hrshift ] }
604 { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] }
605 { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] }
606 { (simd-vmerge-head) [ emit-simd-vmerge-head ] }
607 { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] }
608 { (simd-v<=) [ emit-simd-v<= ] }
609 { (simd-v<) [ emit-simd-v< ] }
610 { (simd-v=) [ emit-simd-v= ] }
611 { (simd-v>) [ emit-simd-v> ] }
612 { (simd-v>=) [ emit-simd-v>= ] }
613 { (simd-vunordered?) [ emit-simd-vunordered? ] }
614 { (simd-vany?) [ emit-simd-vany? ] }
615 { (simd-vall?) [ emit-simd-vall? ] }
616 { (simd-vnone?) [ emit-simd-vnone? ] }
617 { (simd-v>float) [ emit-simd-v>float ] }
618 { (simd-v>integer) [ emit-simd-v>integer ] }
619 { (simd-vpack-signed) [ emit-simd-vpack-signed ] }
620 { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] }
621 { (simd-vunpack-head) [ emit-simd-vunpack-head ] }
622 { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] }
623 { (simd-with) [ emit-simd-with ] }
624 { (simd-gather-2) [ emit-simd-gather-2 ] }
625 { (simd-gather-4) [ emit-simd-gather-4 ] }
626 { (simd-select) [ emit-simd-select ] }
627 { alien-vector [ emit-alien-vector ] }
628 { set-alien-vector [ emit-set-alien-vector ] }
629 } enable-intrinsics ;