1 ! Copyright (C) 2009 Slava Pestov, Joe Groff.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors alien.c-types arrays assocs byte-arrays combinators
4 combinators.short-circuit compiler.cfg.comparisons
5 compiler.cfg.hats compiler.cfg.instructions
6 compiler.cfg.intrinsics compiler.cfg.intrinsics.alien
7 compiler.cfg.intrinsics.simd.backend compiler.cfg.stacks
8 cpu.architecture fry kernel layouts locals math math.vectors
9 math.vectors.simd.intrinsics sequences specialized-arrays ;
10 FROM: alien.c-types => heap-size char short int longlong float double ;
11 SPECIALIZED-ARRAYS: char uchar short ushort int uint longlong ulonglong float double ;
12 IN: compiler.cfg.intrinsics.simd
16 CONSTANT: rep>bit-mask {
18 char-16-rep uchar-array{
26 short-8-rep ushort-array{
27 0x8000 0x8000 0x8000 0x8000
28 0x8000 0x8000 0x8000 0x8000
33 0x8000,0000 0x8000,0000
34 0x8000,0000 0x8000,0000
38 longlong-2-rep ulonglong-array{
45 : sign-bit-mask ( rep -- byte-array )
46 signed-rep rep>bit-mask at underlying>> ;
48 CONSTANT: rep>neg-zero {
49 { float-4-rep float-array{ -0.0 -0.0 -0.0 -0.0 } }
50 { double-2-rep double-array{ -0.0 -0.0 } }
53 : ^load-neg-zero-vector ( rep -- dst )
54 rep>neg-zero at underlying>> ^^load-literal ;
56 CONSTANT: rep>add-sub {
57 { float-4-rep float-array{ -0.0 0.0 -0.0 0.0 } }
58 { double-2-rep double-array{ -0.0 0.0 } }
59 { char-16-rep char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } }
60 { short-8-rep short-array{ -1 0 -1 0 -1 0 -1 0 } }
61 { int-4-rep int-array{ -1 0 -1 0 } }
62 { longlong-2-rep longlong-array{ -1 0 } }
65 : ^load-add-sub-vector ( rep -- dst )
66 signed-rep rep>add-sub at underlying>> ^^load-literal ;
69 { float-4-rep float-array{ 0.5 0.5 0.5 0.5 } }
70 { double-2-rep double-array{ 0.5 0.5 } }
73 : ^load-half-vector ( rep -- dst )
74 rep>half at underlying>> ^^load-literal ;
76 : >variable-shuffle ( shuffle rep -- shuffle' )
77 rep-component-type heap-size
78 [ dup <repetition> >byte-array ]
79 [ iota >byte-array ] bi
80 '[ _ n*v _ v+ ] map concat ;
82 : ^load-immediate-shuffle ( shuffle rep -- dst )
83 >variable-shuffle ^^load-literal ;
85 :: ^blend-vector ( mask true false rep -- dst )
86 true mask rep ^^and-vector
87 mask false rep ^^andn-vector
90 : ^not-vector ( src rep -- dst )
93 [ [ ^^fill-vector ] [ ^^xor-vector ] bi ]
96 :: ^swap-compare-vector ( src1 src2 rep {cc,swap} -- dst )
97 {cc,swap} first2 :> ( cc swap? )
99 [ src2 src1 rep cc ^^compare-vector ]
100 [ src1 src2 rep cc ^^compare-vector ] if ;
102 :: ^(compare-vector) ( src1 src2 rep orig-cc -- dst )
103 rep orig-cc %compare-vector-ccs :> ( ccs not? )
106 [ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
108 ccs unclip :> ( rest-ccs first-cc )
109 src1 src2 rep first-cc ^swap-compare-vector :> first-dst
112 [ [ src1 src2 rep ] dip ^swap-compare-vector rep ^^or-vector ]
115 not? [ rep ^not-vector ] when
118 :: ^minmax-compare-vector ( src1 src2 rep cc -- dst )
120 { cc< [ src1 src2 rep ^^max-vector src1 rep cc/= ^(compare-vector) ] }
121 { cc<= [ src1 src2 rep ^^min-vector src1 rep cc= ^(compare-vector) ] }
122 { cc> [ src1 src2 rep ^^min-vector src1 rep cc/= ^(compare-vector) ] }
123 { cc>= [ src1 src2 rep ^^max-vector src1 rep cc= ^(compare-vector) ] }
126 : ^compare-vector ( src1 src2 rep cc -- dst )
128 [ ^(compare-vector) ]
129 [ ^minmax-compare-vector ]
130 { unsigned-int-vector-rep [| src1 src2 rep cc |
131 rep sign-bit-mask ^^load-literal :> sign-bits
132 src1 sign-bits rep ^^xor-vector
133 src2 sign-bits rep ^^xor-vector
134 rep signed-rep cc ^(compare-vector)
138 : ^unpack-vector-head ( src rep -- dst )
140 [ ^^unpack-vector-head ]
141 { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-head ] bi ] }
142 { signed-int-vector-rep [| src rep |
143 src src rep ^^merge-vector-head :> merged
144 rep rep-component-type heap-size 8 * :> bits
145 merged bits rep widen-vector-rep ^^shr-vector-imm
147 { signed-int-vector-rep [| src rep |
148 rep ^^zero-vector :> zero
149 zero src rep cc> ^compare-vector :> sign
150 src sign rep ^^merge-vector-head
154 : ^unpack-vector-tail ( src rep -- dst )
156 [ ^^unpack-vector-tail ]
157 [ [ ^^tail>head-vector ] [ ^^unpack-vector-head ] bi ]
158 { unsigned-int-vector-rep [ [ ^^zero-vector ] [ ^^merge-vector-tail ] bi ] }
159 { signed-int-vector-rep [| src rep |
160 src src rep ^^merge-vector-tail :> merged
161 rep rep-component-type heap-size 8 * :> bits
162 merged bits rep widen-vector-rep ^^shr-vector-imm
164 { signed-int-vector-rep [| src rep |
165 rep ^^zero-vector :> zero
166 zero src rep cc> ^compare-vector :> sign
167 src sign rep ^^merge-vector-tail
171 PREDICATE: fixnum-vector-rep < int-vector-rep
172 rep-component-type heap-size cell < ;
174 : ^(sum-vector-2) ( src rep -- dst )
176 [ dupd ^^horizontal-add-vector ]
178 src src rep ^^merge-vector-head :> head
179 src src rep ^^merge-vector-tail :> tail
180 head tail rep ^^add-vector
184 : ^(sum-vector-4) ( src rep -- dst )
187 [ dupd ^^horizontal-add-vector ]
188 [ dupd ^^horizontal-add-vector ] bi
191 src src rep ^^merge-vector-head :> head
192 src src rep ^^merge-vector-tail :> tail
193 head tail rep ^^add-vector :> src'
195 rep widen-vector-rep :> rep'
196 src' src' rep' ^^merge-vector-head :> head'
197 src' src' rep' ^^merge-vector-tail :> tail'
198 head' tail' rep ^^add-vector
202 : ^(sum-vector-8) ( src rep -- dst )
205 [ dupd ^^horizontal-add-vector ]
206 [ dupd ^^horizontal-add-vector ]
207 [ dupd ^^horizontal-add-vector ] tri
210 src src rep ^^merge-vector-head :> head
211 src src rep ^^merge-vector-tail :> tail
212 head tail rep ^^add-vector :> src'
214 rep widen-vector-rep :> rep'
215 src' src' rep' ^^merge-vector-head :> head'
216 src' src' rep' ^^merge-vector-tail :> tail'
217 head' tail' rep ^^add-vector :> src''
219 rep' widen-vector-rep :> rep''
220 src'' src'' rep'' ^^merge-vector-head :> head''
221 src'' src'' rep'' ^^merge-vector-tail :> tail''
222 head'' tail'' rep ^^add-vector
226 : ^(sum-vector-16) ( src rep -- dst )
230 [ dupd ^^horizontal-add-vector ]
231 [ dupd ^^horizontal-add-vector ]
232 [ dupd ^^horizontal-add-vector ]
233 [ dupd ^^horizontal-add-vector ]
237 src src rep ^^merge-vector-head :> head
238 src src rep ^^merge-vector-tail :> tail
239 head tail rep ^^add-vector :> src'
241 rep widen-vector-rep :> rep'
242 src' src' rep' ^^merge-vector-head :> head'
243 src' src' rep' ^^merge-vector-tail :> tail'
244 head' tail' rep ^^add-vector :> src''
246 rep' widen-vector-rep :> rep''
247 src'' src'' rep'' ^^merge-vector-head :> head''
248 src'' src'' rep'' ^^merge-vector-tail :> tail''
249 head'' tail'' rep ^^add-vector :> src'''
251 rep'' widen-vector-rep :> rep'''
252 src''' src''' rep''' ^^merge-vector-head :> head'''
253 src''' src''' rep''' ^^merge-vector-tail :> tail'''
254 head''' tail''' rep ^^add-vector
258 : ^(sum-vector) ( src rep -- dst )
261 { 2 [ ^(sum-vector-2) ] }
262 { 4 [ ^(sum-vector-4) ] }
263 { 8 [ ^(sum-vector-8) ] }
264 { 16 [ ^(sum-vector-16) ] }
266 ] [ ^^vector>scalar ] bi ;
268 : ^sum-vector ( src rep -- dst )
270 { float-vector-rep [ ^(sum-vector) ] }
271 { fixnum-vector-rep [| src rep |
272 src rep ^unpack-vector-head :> head
273 src rep ^unpack-vector-tail :> tail
274 rep widen-vector-rep :> wide-rep
275 head tail wide-rep ^^add-vector wide-rep
280 : shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
282 : ^shuffle-vector-imm ( src1 shuffle rep -- dst )
283 [ rep-length 0 pad-tail ] keep {
284 [ ^^shuffle-vector-imm ]
285 [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ]
288 : ^shuffle-2-vectors-imm ( src1 src2 shuffle rep -- dst )
289 [ rep-length 0 pad-tail ] keep {
290 { double-2-rep [| src1 src2 shuffle rep |
291 shuffle first2 [ 4 mod ] bi@ :> ( i j )
293 { [ i j [ 2 < ] both? ] [
294 src1 shuffle rep ^shuffle-vector-imm
296 { [ i j [ 2 >= ] both? ] [
297 src2 shuffle [ 2 - ] map rep ^shuffle-vector-imm
300 src1 src2 i j 2 - 2array rep ^^shuffle-vector-halves-imm
303 [ src2 src1 i 2 - j 2array rep ^^shuffle-vector-halves-imm ]
308 : ^broadcast-vector ( src n rep -- dst )
309 [ rep-length swap <array> ] keep
310 ^shuffle-vector-imm ;
312 : ^with-vector ( src rep -- dst )
313 [ ^^scalar>vector ] keep [ 0 ] dip ^broadcast-vector ;
315 : ^select-vector ( src n rep -- dst )
318 [ [ ^broadcast-vector ] keep ^^vector>scalar ]
323 : emit-simd-v+ ( node -- )
326 } emit-vv-vector-op ;
328 : emit-simd-v- ( node -- )
331 } emit-vv-vector-op ;
333 : emit-simd-vneg ( node -- )
335 { float-vector-rep [ [ ^load-neg-zero-vector swap ] [ ^^sub-vector ] bi ] }
336 { int-vector-rep [ [ ^^zero-vector swap ] [ ^^sub-vector ] bi ] }
339 : emit-simd-v+- ( node -- )
342 { float-vector-rep [| src1 src2 rep |
343 rep ^load-add-sub-vector :> signs
344 src2 signs rep ^^xor-vector :> src2'
345 src1 src2' rep ^^add-vector
347 { int-vector-rep [| src1 src2 rep |
348 rep ^load-add-sub-vector :> signs
349 src2 signs rep ^^xor-vector :> src2'
350 src2' signs rep ^^sub-vector :> src2''
351 src1 src2'' rep ^^add-vector
353 } emit-vv-vector-op ;
355 : emit-simd-vs+ ( node -- )
357 { float-vector-rep [ ^^add-vector ] }
358 { int-vector-rep [ ^^saturated-add-vector ] }
359 } emit-vv-vector-op ;
361 : emit-simd-vs- ( node -- )
363 { float-vector-rep [ ^^sub-vector ] }
364 { int-vector-rep [ ^^saturated-sub-vector ] }
365 } emit-vv-vector-op ;
367 : emit-simd-vs* ( node -- )
369 { float-vector-rep [ ^^mul-vector ] }
370 { int-vector-rep [ ^^saturated-mul-vector ] }
371 } emit-vv-vector-op ;
373 : emit-simd-v* ( node -- )
376 } emit-vv-vector-op ;
378 : emit-simd-v*high ( node -- )
380 [ ^^mul-high-vector ]
381 } emit-vv-vector-op ;
383 : emit-simd-v*hs+ ( node -- )
385 [ ^^mul-horizontal-add-vector ]
386 } emit-vv-vector-op ;
388 : emit-simd-v/ ( node -- )
391 } emit-vv-vector-op ;
393 : emit-simd-vmin ( node -- )
397 [ cc< ^compare-vector ]
398 [ ^blend-vector ] 3bi
400 } emit-vv-vector-op ;
402 : emit-simd-vmax ( node -- )
406 [ cc> ^compare-vector ]
407 [ ^blend-vector ] 3bi
409 } emit-vv-vector-op ;
411 : emit-simd-vavg ( node -- )
414 { float-vector-rep [| src1 src2 rep |
415 src1 src2 rep ^^add-vector
416 rep ^load-half-vector rep ^^mul-vector
418 } emit-vv-vector-op ;
420 : emit-simd-v. ( node -- )
423 { float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
424 } emit-vv-vector-op ;
426 : emit-simd-vsad ( node -- )
429 [ ^^sad-vector dup { 2 3 0 1 } int-4-rep ^^shuffle-vector-imm int-4-rep ^^add-vector ]
430 [ widen-vector-rep ^^vector>scalar ] bi
432 } emit-vv-vector-op ;
434 : emit-simd-vsqrt ( node -- )
439 : emit-simd-sum ( node -- )
444 : emit-simd-vabs ( node -- )
446 { unsigned-int-vector-rep [ drop ] }
448 { float-vector-rep [ [ ^load-neg-zero-vector ] [ swapd ^^andn-vector ] bi ] }
449 { int-vector-rep [| src rep |
450 rep ^^zero-vector :> zero
451 zero src rep ^^sub-vector :> -src
452 zero src rep cc> ^compare-vector :> sign
453 sign -src src rep ^blend-vector
457 : emit-simd-vand ( node -- )
460 } emit-vv-vector-op ;
462 : emit-simd-vandn ( node -- )
465 } emit-vv-vector-op ;
467 : emit-simd-vor ( node -- )
470 } emit-vv-vector-op ;
472 : emit-simd-vxor ( node -- )
475 } emit-vv-vector-op ;
477 : emit-simd-vnot ( node -- )
482 : emit-simd-vlshift ( node -- )
487 } [ integer? ] emit-vv-or-vl-vector-op ;
489 : emit-simd-vrshift ( node -- )
494 } [ integer? ] emit-vv-or-vl-vector-op ;
496 : emit-simd-hlshift ( node -- )
498 [ ^^horizontal-shl-vector-imm ]
499 } [ integer? ] emit-vl-vector-op ;
501 : emit-simd-hrshift ( node -- )
503 [ ^^horizontal-shr-vector-imm ]
504 } [ integer? ] emit-vl-vector-op ;
506 : emit-simd-vshuffle-elements ( node -- )
508 [ ^shuffle-vector-imm ]
509 } [ shuffle? ] emit-vl-vector-op ;
511 : emit-simd-vshuffle2-elements ( node -- )
513 [ ^shuffle-2-vectors-imm ]
514 } [ shuffle? ] emit-vvl-vector-op ;
516 : emit-simd-vshuffle-bytes ( node -- )
519 } emit-vv-vector-op ;
521 : emit-simd-vmerge-head ( node -- )
523 [ ^^merge-vector-head ]
524 } emit-vv-vector-op ;
526 : emit-simd-vmerge-tail ( node -- )
528 [ ^^merge-vector-tail ]
529 } emit-vv-vector-op ;
531 : emit-simd-v<= ( node -- )
533 [ cc<= ^compare-vector ]
534 } emit-vv-vector-op ;
535 : emit-simd-v< ( node -- )
537 [ cc< ^compare-vector ]
538 } emit-vv-vector-op ;
539 : emit-simd-v= ( node -- )
541 [ cc= ^compare-vector ]
542 } emit-vv-vector-op ;
543 : emit-simd-v> ( node -- )
545 [ cc> ^compare-vector ]
546 } emit-vv-vector-op ;
547 : emit-simd-v>= ( node -- )
549 [ cc>= ^compare-vector ]
550 } emit-vv-vector-op ;
551 : emit-simd-vunordered? ( node -- )
553 [ cc/<>= ^compare-vector ]
554 } emit-vv-vector-op ;
556 : emit-simd-vany? ( node -- )
558 [ vcc-any ^^test-vector ]
560 : emit-simd-vall? ( node -- )
562 [ vcc-all ^^test-vector ]
564 : emit-simd-vnone? ( node -- )
566 [ vcc-none ^^test-vector ]
568 : emit-simd-vgetmask ( node -- )
570 [ ^^move-vector-mask ]
573 : emit-simd-v>float ( node -- )
575 { float-vector-rep [ drop ] }
576 { int-vector-rep [ ^^integer>float-vector ] }
579 : emit-simd-v>integer ( node -- )
581 { float-vector-rep [ ^^float>integer-vector ] }
582 { int-vector-rep [ drop ] }
585 : emit-simd-vpack-signed ( node -- )
587 { double-2-rep [| src1 src2 rep |
588 src1 double-2-rep ^^float-pack-vector :> dst-head
589 src2 double-2-rep ^^float-pack-vector :> dst-tail
590 dst-head dst-tail { 0 1 0 1 } float-4-rep ^^shuffle-vector-halves-imm
592 { int-vector-rep [ ^^signed-pack-vector ] }
593 } emit-vv-vector-op ;
595 : emit-simd-vpack-unsigned ( node -- )
597 [ ^^unsigned-pack-vector ]
598 } emit-vv-vector-op ;
600 : emit-simd-vunpack-head ( node -- )
602 [ ^unpack-vector-head ]
605 : emit-simd-vunpack-tail ( node -- )
607 [ ^unpack-vector-tail ]
610 : emit-simd-with ( node -- )
612 { fixnum-vector-rep [ ^with-vector ] }
613 { float-vector-rep [ ^with-vector ] }
616 : emit-simd-gather-2 ( node -- )
618 { fixnum-vector-rep [ ^^gather-int-vector-2 ] }
619 { fixnum-vector-rep [ ^^gather-vector-2 ] }
620 { float-vector-rep [ ^^gather-vector-2 ] }
621 } emit-vv-vector-op ;
623 : emit-simd-gather-4 ( node -- )
625 { fixnum-vector-rep [ ^^gather-int-vector-4 ] }
626 { fixnum-vector-rep [ ^^gather-vector-4 ] }
627 { float-vector-rep [ ^^gather-vector-4 ] }
628 } emit-vvvv-vector-op ;
630 : emit-simd-select ( node -- )
632 { fixnum-vector-rep [ ^select-vector ] }
633 { float-vector-rep [ ^select-vector ] }
634 } [ integer? ] emit-vl-vector-op ;
636 : emit-alien-vector ( block node -- block' )
639 ds-drop prepare-load-memory
640 _ f ^^load-memory-imm ds-push
642 [ inline-load-memory? ] inline-accessor
643 ] with { [ %alien-vector-reps member? ] } if-literals-match ;
645 : emit-set-alien-vector ( block node -- block' )
648 ds-drop prepare-store-memory
649 _ f ##store-memory-imm,
651 [ byte-array inline-store-memory? ] inline-accessor
652 ] with { [ %alien-vector-reps member? ] } if-literals-match ;
656 { (simd-v+) [ emit-simd-v+ ] }
657 { (simd-v-) [ emit-simd-v- ] }
658 { (simd-vneg) [ emit-simd-vneg ] }
659 { (simd-v+-) [ emit-simd-v+- ] }
660 { (simd-vs+) [ emit-simd-vs+ ] }
661 { (simd-vs-) [ emit-simd-vs- ] }
662 { (simd-vs*) [ emit-simd-vs* ] }
663 { (simd-v*) [ emit-simd-v* ] }
664 { (simd-v*high) [ emit-simd-v*high ] }
665 { (simd-v*hs+) [ emit-simd-v*hs+ ] }
666 { (simd-v/) [ emit-simd-v/ ] }
667 { (simd-vmin) [ emit-simd-vmin ] }
668 { (simd-vmax) [ emit-simd-vmax ] }
669 { (simd-vavg) [ emit-simd-vavg ] }
670 { (simd-v.) [ emit-simd-v. ] }
671 { (simd-vsad) [ emit-simd-vsad ] }
672 { (simd-vsqrt) [ emit-simd-vsqrt ] }
673 { (simd-sum) [ emit-simd-sum ] }
674 { (simd-vabs) [ emit-simd-vabs ] }
675 { (simd-vbitand) [ emit-simd-vand ] }
676 { (simd-vbitandn) [ emit-simd-vandn ] }
677 { (simd-vbitor) [ emit-simd-vor ] }
678 { (simd-vbitxor) [ emit-simd-vxor ] }
679 { (simd-vbitnot) [ emit-simd-vnot ] }
680 { (simd-vand) [ emit-simd-vand ] }
681 { (simd-vandn) [ emit-simd-vandn ] }
682 { (simd-vor) [ emit-simd-vor ] }
683 { (simd-vxor) [ emit-simd-vxor ] }
684 { (simd-vnot) [ emit-simd-vnot ] }
685 { (simd-vlshift) [ emit-simd-vlshift ] }
686 { (simd-vrshift) [ emit-simd-vrshift ] }
687 { (simd-hlshift) [ emit-simd-hlshift ] }
688 { (simd-hrshift) [ emit-simd-hrshift ] }
689 { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements ] }
690 { (simd-vshuffle2-elements) [ emit-simd-vshuffle2-elements ] }
691 { (simd-vshuffle-bytes) [ emit-simd-vshuffle-bytes ] }
692 { (simd-vmerge-head) [ emit-simd-vmerge-head ] }
693 { (simd-vmerge-tail) [ emit-simd-vmerge-tail ] }
694 { (simd-v<=) [ emit-simd-v<= ] }
695 { (simd-v<) [ emit-simd-v< ] }
696 { (simd-v=) [ emit-simd-v= ] }
697 { (simd-v>) [ emit-simd-v> ] }
698 { (simd-v>=) [ emit-simd-v>= ] }
699 { (simd-vunordered?) [ emit-simd-vunordered? ] }
700 { (simd-vany?) [ emit-simd-vany? ] }
701 { (simd-vall?) [ emit-simd-vall? ] }
702 { (simd-vnone?) [ emit-simd-vnone? ] }
703 { (simd-v>float) [ emit-simd-v>float ] }
704 { (simd-v>integer) [ emit-simd-v>integer ] }
705 { (simd-vpack-signed) [ emit-simd-vpack-signed ] }
706 { (simd-vpack-unsigned) [ emit-simd-vpack-unsigned ] }
707 { (simd-vunpack-head) [ emit-simd-vunpack-head ] }
708 { (simd-vunpack-tail) [ emit-simd-vunpack-tail ] }
709 { (simd-with) [ emit-simd-with ] }
710 { (simd-gather-2) [ emit-simd-gather-2 ] }
711 { (simd-gather-4) [ emit-simd-gather-4 ] }
712 { (simd-select) [ emit-simd-select ] }
713 { alien-vector [ emit-alien-vector ] }
714 { set-alien-vector [ emit-set-alien-vector ] }
715 { assert-positive [ drop ] }
716 { (simd-vgetmask) [ emit-simd-vgetmask ] }
717 } enable-intrinsics ;