1 ! Copyright (C) 2009 Slava Pestov, Joe Groff.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors alien alien.c-types byte-arrays fry
4 classes.algebra cpu.architecture kernel layouts math sequences
5 math.vectors math.vectors.simd.intrinsics
6 macros generalizations combinators combinators.short-circuit
7 arrays locals compiler.tree.propagation.info
8 compiler.cfg.builder.blocks
9 compiler.cfg.comparisons
10 compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
11 compiler.cfg.instructions compiler.cfg.registers
12 compiler.cfg.intrinsics
13 compiler.cfg.intrinsics.alien
14 compiler.cfg.intrinsics.simd.backend
16 FROM: alien.c-types => heap-size char short int longlong float double ;
17 SPECIALIZED-ARRAYS: char uchar short ushort int uint longlong ulonglong float double ;
18 IN: compiler.cfg.intrinsics.simd
22 : sign-bit-mask ( rep -- byte-array )
24 { char-16-rep [ uchar-array{
30 { short-8-rep [ ushort-array{
31 0x8000 0x8000 0x8000 0x8000
32 0x8000 0x8000 0x8000 0x8000
34 { int-4-rep [ uint-array{
35 0x8000,0000 0x8000,0000
36 0x8000,0000 0x8000,0000
38 { longlong-2-rep [ ulonglong-array{
44 : ^load-neg-zero-vector ( rep -- dst )
46 { float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-literal ] }
47 { double-2-rep [ double-array{ -0.0 -0.0 } underlying>> ^^load-literal ] }
50 : ^load-add-sub-vector ( rep -- dst )
52 { float-4-rep [ float-array{ -0.0 0.0 -0.0 0.0 } underlying>> ^^load-literal ] }
53 { double-2-rep [ double-array{ -0.0 0.0 } underlying>> ^^load-literal ] }
54 { char-16-rep [ char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-literal ] }
55 { short-8-rep [ short-array{ -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-literal ] }
56 { int-4-rep [ int-array{ -1 0 -1 0 } underlying>> ^^load-literal ] }
57 { longlong-2-rep [ longlong-array{ -1 0 } underlying>> ^^load-literal ] }
60 : ^load-half-vector ( rep -- dst )
62 { float-4-rep [ float-array{ 0.5 0.5 0.5 0.5 } underlying>> ^^load-literal ] }
63 { double-2-rep [ double-array{ 0.5 0.5 } underlying>> ^^load-literal ] }
66 : >variable-shuffle ( shuffle rep -- shuffle' )
67 rep-component-type heap-size
68 [ dup <repetition> >byte-array ]
69 [ iota >byte-array ] bi
70 '[ _ n*v _ v+ ] map concat ;
72 : ^load-immediate-shuffle ( shuffle rep -- dst )
73 >variable-shuffle ^^load-literal ;
75 :: ^blend-vector ( mask true false rep -- dst )
76 true mask rep ^^and-vector
77 mask false rep ^^andn-vector
80 : ^not-vector ( src rep -- dst )
83 [ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
86 :: ^((compare-vector)) ( src1 src2 rep {cc,swap} -- dst )
87 {cc,swap} first2 :> ( cc swap? )
89 [ src2 src1 rep cc ^^compare-vector ]
90 [ src1 src2 rep cc ^^compare-vector ] if ;
92 :: ^(compare-vector) ( src1 src2 rep orig-cc -- dst )
93 rep orig-cc %compare-vector-ccs :> ( ccs not? )
96 [ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
98 ccs unclip :> ( rest-ccs first-cc )
99 src1 src2 rep first-cc ^((compare-vector)) :> first-dst
102 [ [ src1 src2 rep ] dip ^((compare-vector)) rep ^^or-vector ]
105 not? [ rep ^not-vector ] when
108 :: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
110 { cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^(compare-vector) ] }
111 { cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^(compare-vector) ] }
112 { cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^(compare-vector) ] }
113 { cc>= [ src1 src2 rep ^^max-vector src1 rep cc= ^(compare-vector) ] }
116 : ^compare-vector ( src1 src2 rep cc -- dst )
118 [ ^(compare-vector) ]
119 [ ^minmax-compare-vector ]
120 { unsigned-int-vector-rep [| src1 src2 rep cc |
121 rep sign-bit-mask ^^load-literal :> sign-bits
122 src1 sign-bits rep ^^xor-vector
123 src2 sign-bits rep ^^xor-vector
124 rep signed-rep cc ^(compare-vector)
128 : ^unpack-vector-head ( src rep -- dst )
130 [ ^^unpack-vector-head ]
131 { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] }
132 { signed-int-vector-rep [| src rep |
133 src src rep ^^merge-vector-head :> merged
134 rep rep-component-type heap-size 8 * :> bits
135 merged bits rep widen-vector-rep ^^shr-vector-imm
137 { signed-int-vector-rep [| src rep |
138 rep ^^zero-vector :> zero
139 zero src rep cc> ^compare-vector :> sign
140 src sign rep ^^merge-vector-head
144 : ^unpack-vector-tail ( src rep -- dst )
146 [ ^^unpack-vector-tail ]
147 [ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ]
148 { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] }
149 { signed-int-vector-rep [| src rep |
150 src src rep ^^merge-vector-tail :> merged
151 rep rep-component-type heap-size 8 * :> bits
152 merged bits rep widen-vector-rep ^^shr-vector-imm
154 { signed-int-vector-rep [| src rep |
155 rep ^^zero-vector :> zero
156 zero src rep cc> ^compare-vector :> sign
157 src sign rep ^^merge-vector-tail
161 PREDICATE: fixnum-vector-rep < int-vector-rep
162 rep-component-type heap-size cell < ;
164 : ^(sum-vector-2) ( src rep -- dst )
166 [ dupd ^^horizontal-add-vector ]
168 src src rep ^^merge-vector-head :> head
169 src src rep ^^merge-vector-tail :> tail
170 head tail rep ^^add-vector
174 : ^(sum-vector-4) ( src rep -- dst )
177 [ dupd ^^horizontal-add-vector ]
178 [ dupd ^^horizontal-add-vector ] bi
181 src src rep ^^merge-vector-head :> head
182 src src rep ^^merge-vector-tail :> tail
183 head tail rep ^^add-vector :> src'
185 rep widen-vector-rep :> rep'
186 src' src' rep' ^^merge-vector-head :> head'
187 src' src' rep' ^^merge-vector-tail :> tail'
188 head' tail' rep ^^add-vector
192 : ^(sum-vector-8) ( src rep -- dst )
195 [ dupd ^^horizontal-add-vector ]
196 [ dupd ^^horizontal-add-vector ]
197 [ dupd ^^horizontal-add-vector ] tri
200 src src rep ^^merge-vector-head :> head
201 src src rep ^^merge-vector-tail :> tail
202 head tail rep ^^add-vector :> src'
204 rep widen-vector-rep :> rep'
205 src' src' rep' ^^merge-vector-head :> head'
206 src' src' rep' ^^merge-vector-tail :> tail'
207 head' tail' rep ^^add-vector :> src''
209 rep' widen-vector-rep :> rep''
210 src'' src'' rep'' ^^merge-vector-head :> head''
211 src'' src'' rep'' ^^merge-vector-tail :> tail''
212 head'' tail'' rep ^^add-vector
216 : ^(sum-vector-16) ( src rep -- dst )
220 [ dupd ^^horizontal-add-vector ]
221 [ dupd ^^horizontal-add-vector ]
222 [ dupd ^^horizontal-add-vector ]
223 [ dupd ^^horizontal-add-vector ]
227 src src rep ^^merge-vector-head :> head
228 src src rep ^^merge-vector-tail :> tail
229 head tail rep ^^add-vector :> src'
231 rep widen-vector-rep :> rep'
232 src' src' rep' ^^merge-vector-head :> head'
233 src' src' rep' ^^merge-vector-tail :> tail'
234 head' tail' rep ^^add-vector :> src''
236 rep' widen-vector-rep :> rep''
237 src'' src'' rep'' ^^merge-vector-head :> head''
238 src'' src'' rep'' ^^merge-vector-tail :> tail''
239 head'' tail'' rep ^^add-vector :> src'''
241 rep'' widen-vector-rep :> rep'''
242 src''' src''' rep''' ^^merge-vector-head :> head'''
243 src''' src''' rep''' ^^merge-vector-tail :> tail'''
244 head''' tail''' rep ^^add-vector
248 : ^(sum-vector) ( src rep -- dst )
251 { 2 [ ^(sum-vector-2) ] }
252 { 4 [ ^(sum-vector-4) ] }
253 { 8 [ ^(sum-vector-8) ] }
254 { 16 [ ^(sum-vector-16) ] }
256 ] [ ^^vector>scalar ] bi ;
258 : ^sum-vector ( src rep -- dst )
260 { float-vector-rep [ ^(sum-vector) ] }
261 { fixnum-vector-rep [| src rep |
262 src rep ^unpack-vector-head :> head
263 src rep ^unpack-vector-tail :> tail
264 rep widen-vector-rep :> wide-rep
265 head tail wide-rep ^^add-vector wide-rep
270 : shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
272 : ^shuffle-vector-imm ( src1 shuffle rep -- dst )
273 [ rep-length 0 pad-tail ] keep {
274 [ ^^shuffle-vector-imm ]
275 [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ]
278 : ^shuffle-2-vectors-imm ( src1 src2 shuffle rep -- dst )
279 [ rep-length 0 pad-tail ] keep {
280 { double-2-rep [| src1 src2 shuffle rep |
281 shuffle first2 [ 4 mod ] bi@ :> ( i j )
283 { [ i j [ 2 < ] both? ] [
284 src1 shuffle rep ^shuffle-vector-imm
286 { [ i j [ 2 >= ] both? ] [
287 src2 shuffle [ 2 - ] map rep ^shuffle-vector-imm
290 src1 src2 i j 2 - 2array rep ^^shuffle-vector-halves-imm
293 [ src2 src1 i 2 - j 2array rep ^^shuffle-vector-halves-imm ]
298 : ^broadcast-vector ( src n rep -- dst )
299 [ rep-length swap <array> ] keep
300 ^shuffle-vector-imm ;
302 : ^with-vector ( src rep -- dst )
303 [ ^^scalar>vector ] keep [ 0 ] dip ^broadcast-vector ;
305 : ^select-vector ( src n rep -- dst )
308 [ [ ^broadcast-vector ] keep ^^vector>scalar ]
313 : emit-simd-v+ ( node -- )
316 } emit-vv-vector-op ;
318 : emit-simd-v- ( node -- )
321 } emit-vv-vector-op ;
323 : emit-simd-vneg ( node -- )
325 { float-vector-rep [ [ ^load-neg-zero-vector swap ] [ ^^sub-vector ] bi ] }
326 { int-vector-rep [ [ ^^zero-vector swap ] [ ^^sub-vector ] bi ] }
329 : emit-simd-v+- ( node -- )
332 { float-vector-rep [| src1 src2 rep |
333 rep ^load-add-sub-vector :> signs
334 src2 signs rep ^^xor-vector :> src2'
335 src1 src2' rep ^^add-vector
337 { int-vector-rep [| src1 src2 rep |
338 rep ^load-add-sub-vector :> signs
339 src2 signs rep ^^xor-vector :> src2'
340 src2' signs rep ^^sub-vector :> src2''
341 src1 src2'' rep ^^add-vector
343 } emit-vv-vector-op ;
345 : emit-simd-vs+ ( node -- )
347 { float-vector-rep [ ^^add-vector ] }
348 { int-vector-rep [ ^^saturated-add-vector ] }
349 } emit-vv-vector-op ;
351 : emit-simd-vs- ( node -- )
353 { float-vector-rep [ ^^sub-vector ] }
354 { int-vector-rep [ ^^saturated-sub-vector ] }
355 } emit-vv-vector-op ;
357 : emit-simd-vs* ( node -- )
359 { float-vector-rep [ ^^mul-vector ] }
360 { int-vector-rep [ ^^saturated-mul-vector ] }
361 } emit-vv-vector-op ;
363 : emit-simd-v* ( node -- )
366 } emit-vv-vector-op ;
368 : emit-simd-v*high ( node -- )
370 [ ^^mul-high-vector ]
371 } emit-vv-vector-op ;
373 : emit-simd-v*hs+ ( node -- )
375 [ ^^mul-horizontal-add-vector ]
376 } emit-vv-vector-op ;
378 : emit-simd-v/ ( node -- )
381 } emit-vv-vector-op ;
383 : emit-simd-vmin ( node -- )
387 [ cc< ^compare-vector ]
388 [ ^blend-vector ] 3bi
390 } emit-vv-vector-op ;
392 : emit-simd-vmax ( node -- )
396 [ cc> ^compare-vector ]
397 [ ^blend-vector ] 3bi
399 } emit-vv-vector-op ;
401 : emit-simd-vavg ( node -- )
404 { float-vector-rep [| src1 src2 rep |
405 src1 src2 rep ^^add-vector
406 rep ^load-half-vector rep ^^mul-vector
408 } emit-vv-vector-op ;
410 : emit-simd-v. ( node -- )
413 { float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
414 } emit-vv-vector-op ;
416 : emit-simd-vsad ( node -- )
419 [ ^^sad-vector dup { 2 3 0 1 } int-4-rep ^^shuffle-vector-imm int-4-rep ^^add-vector ]
420 [ widen-vector-rep ^^vector>scalar ] bi
422 } emit-vv-vector-op ;
424 : emit-simd-vsqrt ( node -- )
429 : emit-simd-sum ( node -- )
434 : emit-simd-vabs ( node -- )
436 { unsigned-int-vector-rep [ drop ] }
438 { float-vector-rep [ [ ^load-neg-zero-vector ] [ swapd ^^andn-vector ] bi ] }
439 { int-vector-rep [| src rep |
440 rep ^^zero-vector :> zero
441 zero src rep ^^sub-vector :> -src
442 zero src rep cc> ^compare-vector :> sign
443 sign -src src rep ^blend-vector
447 : emit-simd-vand ( node -- )
450 } emit-vv-vector-op ;
452 : emit-simd-vandn ( node -- )
455 } emit-vv-vector-op ;
457 : emit-simd-vor ( node -- )
460 } emit-vv-vector-op ;
462 : emit-simd-vxor ( node -- )
465 } emit-vv-vector-op ;
467 : emit-simd-vnot ( node -- )
472 : emit-simd-vlshift ( node -- )
477 } [ integer? ] emit-vv-or-vl-vector-op ;
479 : emit-simd-vrshift ( node -- )
484 } [ integer? ] emit-vv-or-vl-vector-op ;
486 : emit-simd-hlshift ( node -- )
488 [ ^^horizontal-shl-vector-imm ]
489 } [ integer? ] emit-vl-vector-op ;
491 : emit-simd-hrshift ( node -- )
493 [ ^^horizontal-shr-vector-imm ]
494 } [ integer? ] emit-vl-vector-op ;
496 : emit-simd-vshuffle-elements ( node -- )
498 [ ^shuffle-vector-imm ]
499 } [ shuffle? ] emit-vl-vector-op ;
501 : emit-simd-vshuffle2-elements ( node -- )
503 [ ^shuffle-2-vectors-imm ]
504 } [ shuffle? ] emit-vvl-vector-op ;
506 : emit-simd-vshuffle-bytes ( node -- )
509 } emit-vv-vector-op ;
511 : emit-simd-vmerge-head ( node -- )
513 [ ^^merge-vector-head ]
514 } emit-vv-vector-op ;
516 : emit-simd-vmerge-tail ( node -- )
518 [ ^^merge-vector-tail ]
519 } emit-vv-vector-op ;
521 : emit-simd-v<= ( node -- )
523 [ cc<= ^compare-vector ]
524 } emit-vv-vector-op ;
525 : emit-simd-v< ( node -- )
527 [ cc< ^compare-vector ]
528 } emit-vv-vector-op ;
529 : emit-simd-v= ( node -- )
531 [ cc= ^compare-vector ]
532 } emit-vv-vector-op ;
533 : emit-simd-v> ( node -- )
535 [ cc> ^compare-vector ]
536 } emit-vv-vector-op ;
537 : emit-simd-v>= ( node -- )
539 [ cc>= ^compare-vector ]
540 } emit-vv-vector-op ;
541 : emit-simd-vunordered? ( node -- )
543 [ cc/<>= ^compare-vector ]
544 } emit-vv-vector-op ;
546 : emit-simd-vany? ( node -- )
548 [ vcc-any ^^test-vector ]
550 : emit-simd-vall? ( node -- )
552 [ vcc-all ^^test-vector ]
554 : emit-simd-vnone? ( node -- )
556 [ vcc-none ^^test-vector ]
558 : emit-simd-vgetmask ( node -- )
560 [ ^^move-vector-mask ]
563 : emit-simd-v>float ( node -- )
565 { float-vector-rep [ drop ] }
566 { int-vector-rep [ ^^integer>float-vector ] }
569 : emit-simd-v>integer ( node -- )
571 { float-vector-rep [ ^^float>integer-vector ] }
572 { int-vector-rep [ drop ] }
575 : emit-simd-vpack-signed ( node -- )
577 { double-2-rep [| src1 src2 rep |
578 src1 double-2-rep ^^float-pack-vector :> dst-head
579 src2 double-2-rep ^^float-pack-vector :> dst-tail
580 dst-head dst-tail { 0 1 0 1 } float-4-rep ^^shuffle-vector-halves-imm
582 { int-vector-rep [ ^^signed-pack-vector ] }
583 } emit-vv-vector-op ;
585 : emit-simd-vpack-unsigned ( node -- )
587 [ ^^unsigned-pack-vector ]
588 } emit-vv-vector-op ;
590 : emit-simd-vunpack-head ( node -- )
592 [ ^unpack-vector-head ]
595 : emit-simd-vunpack-tail ( node -- )
597 [ ^unpack-vector-tail ]
600 : emit-simd-with ( node -- )
602 { fixnum-vector-rep [ ^with-vector ] }
603 { float-vector-rep [ ^with-vector ] }
606 : emit-simd-gather-2 ( node -- )
608 { fixnum-vector-rep [ ^^gather-int-vector-2 ] }
609 { fixnum-vector-rep [ ^^gather-vector-2 ] }
610 { float-vector-rep [ ^^gather-vector-2 ] }
611 } emit-vv-vector-op ;
613 : emit-simd-gather-4 ( node -- )
615 { fixnum-vector-rep [ ^^gather-int-vector-4 ] }
616 { fixnum-vector-rep [ ^^gather-vector-4 ] }
617 { float-vector-rep [ ^^gather-vector-4 ] }
618 } emit-vvvv-vector-op ;
620 : emit-simd-select ( node -- )
622 { fixnum-vector-rep [ ^select-vector ] }
623 { float-vector-rep [ ^select-vector ] }
624 } [ integer? ] emit-vl-vector-op ;
626 : emit-alien-vector ( node -- )
629 ds-drop prepare-load-memory
630 _ f ^^load-memory-imm ds-push
632 [ inline-load-memory? ] inline-accessor
633 ] with { [ %alien-vector-reps member? ] } if-literals-match ;
635 : emit-set-alien-vector ( node -- )
638 ds-drop prepare-store-memory
639 _ f ##store-memory-imm,
641 [ byte-array inline-store-memory? ]
643 ] with { [ %alien-vector-reps member? ] } if-literals-match ;
647 { (simd-v+) [ emit-simd-v+ ] }
648 { (simd-v-) [ emit-simd-v- ] }
649 { (simd-vneg) [ emit-simd-vneg ] }
650 { (simd-v+-) [ emit-simd-v+- ] }
651 { (simd-vs+) [ emit-simd-vs+ ] }
652 { (simd-vs-) [ emit-simd-vs- ] }
653 { (simd-vs*) [ emit-simd-vs* ] }
654 { (simd-v*) [ emit-simd-v* ] }
655 { (simd-v*high) [ emit-simd-v*high ] }
656 { (simd-v*hs+) [ emit-simd-v*hs+ ] }
657 { (simd-v/) [ emit-simd-v/ ] }
658 { (simd-vmin) [ emit-simd-vmin ] }
659 { (simd-vmax) [ emit-simd-vmax ] }
660 { (simd-vavg) [ emit-simd-vavg ] }
661 { (simd-v.) [ emit-simd-v. ] }
662 { (simd-vsad) [ emit-simd-vsad ] }
663 { (simd-vsqrt) [ emit-simd-vsqrt ] }
664 { (simd-sum) [ emit-simd-sum ] }
665 { (simd-vabs) [ emit-simd-vabs ] }
666 { (simd-vbitand) [ emit-simd-vand ] }
667 { (simd-vbitandn) [ emit-simd-vandn ] }
668 { (simd-vbitor) [ emit-simd-vor ] }
669 { (simd-vbitxor) [ emit-simd-vxor ] }
670 { (simd-vbitnot) [ emit-simd-vnot ] }
671 { (simd-vand) [ emit-simd-vand ] }
672 { (simd-vandn) [ emit-simd-vandn ] }
673 { (simd-vor) [ emit-simd-vor ] }
674 { (simd-vxor) [ emit-simd-vxor ] }
675 { (simd-vnot) [ emit-simd-vnot ] }
676 { (simd-vlshift) [ emit-simd-vlshift ] }
677 { (simd-vrshift) [ emit-simd-vrshift ] }
678 { (simd-hlshift) [ emit-simd-hlshift ] }
679 { (simd-hrshift) [ emit-simd-hrshift ] }
680 { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] }
681 { (simd-vshuffle2-elements) [ emit-simd-vshuffle2-elements ] }
682 { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] }
683 { (simd-vmerge-head) [ emit-simd-vmerge-head ] }
684 { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] }
685 { (simd-v<=) [ emit-simd-v<= ] }
686 { (simd-v<) [ emit-simd-v< ] }
687 { (simd-v=) [ emit-simd-v= ] }
688 { (simd-v>) [ emit-simd-v> ] }
689 { (simd-v>=) [ emit-simd-v>= ] }
690 { (simd-vunordered?) [ emit-simd-vunordered? ] }
691 { (simd-vany?) [ emit-simd-vany? ] }
692 { (simd-vall?) [ emit-simd-vall? ] }
693 { (simd-vnone?) [ emit-simd-vnone? ] }
694 { (simd-v>float) [ emit-simd-v>float ] }
695 { (simd-v>integer) [ emit-simd-v>integer ] }
696 { (simd-vpack-signed) [ emit-simd-vpack-signed ] }
697 { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] }
698 { (simd-vunpack-head) [ emit-simd-vunpack-head ] }
699 { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] }
700 { (simd-with) [ emit-simd-with ] }
701 { (simd-gather-2) [ emit-simd-gather-2 ] }
702 { (simd-gather-4) [ emit-simd-gather-4 ] }
703 { (simd-select) [ emit-simd-select ] }
704 { alien-vector [ emit-alien-vector ] }
705 { set-alien-vector [ emit-set-alien-vector ] }
706 { assert-positive [ drop ] }
707 { (simd-vgetmask) [ emit-simd-vgetmask ] }
708 } enable-intrinsics ;