]> gitweb.factorcode.org Git - factor.git/blobdiff - basis/compiler/cfg/intrinsics/simd/simd.factor
use radix literals
[factor.git] / basis / compiler / cfg / intrinsics / simd / simd.factor
index a64c6575562fdbe40012fa1d3973680a41b3d3b5..b35efc0d970450be6e1f8b0f5c4408490e5b0346 100644 (file)
@@ -1,7 +1,7 @@
 ! Copyright (C) 2009 Slava Pestov, Joe Groff.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors alien alien.c-types byte-arrays fry
-classes.algebra cpu.architecture kernel math sequences
+classes.algebra cpu.architecture kernel layouts math sequences
 math.vectors math.vectors.simd.intrinsics
 macros generalizations combinators combinators.short-circuit
 arrays locals compiler.tree.propagation.info
@@ -22,39 +22,45 @@ IN: compiler.cfg.intrinsics.simd
 : sign-bit-mask ( rep -- byte-array )
     signed-rep {
         { char-16-rep [ uchar-array{
-            HEX: 80 HEX: 80 HEX: 80 HEX: 80
-            HEX: 80 HEX: 80 HEX: 80 HEX: 80
-            HEX: 80 HEX: 80 HEX: 80 HEX: 80
-            HEX: 80 HEX: 80 HEX: 80 HEX: 80
+            0x80 0x80 0x80 0x80
+            0x80 0x80 0x80 0x80
+            0x80 0x80 0x80 0x80
+            0x80 0x80 0x80 0x80
         } underlying>> ] }
         { short-8-rep [ ushort-array{
-            HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000
-            HEX: 8000 HEX: 8000 HEX: 8000 HEX: 8000
+            0x8000 0x8000 0x8000 0x8000
+            0x8000 0x8000 0x8000 0x8000
         } underlying>> ] }
         { int-4-rep [ uint-array{
-            HEX: 8000,0000 HEX: 8000,0000
-            HEX: 8000,0000 HEX: 8000,0000
+            0x8000,0000 0x8000,0000
+            0x8000,0000 0x8000,0000
         } underlying>> ] }
         { longlong-2-rep [ ulonglong-array{
-            HEX: 8000,0000,0000,0000
-            HEX: 8000,0000,0000,0000
+            0x8000,0000,0000,0000
+            0x8000,0000,0000,0000
         } underlying>> ] }
     } case ;
 
 : ^load-neg-zero-vector ( rep -- dst )
     {
-        { float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-constant ] }
-        { double-2-rep [ double-array{ -0.0 -0.0 } underlying>> ^^load-constant ] }
+        { float-4-rep [ float-array{ -0.0 -0.0 -0.0 -0.0 } underlying>> ^^load-literal ] }
+        { double-2-rep [ double-array{ -0.0 -0.0 } underlying>> ^^load-literal ] }
     } case ;
 
 : ^load-add-sub-vector ( rep -- dst )
     signed-rep {
-        { float-4-rep    [ float-array{ -0.0  0.0 -0.0  0.0 } underlying>> ^^load-constant ] }
-        { double-2-rep   [ double-array{ -0.0  0.0 } underlying>> ^^load-constant ] }
-        { char-16-rep    [ char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] }
-        { short-8-rep    [ short-array{ -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-constant ] }
-        { int-4-rep      [ int-array{ -1 0 -1 0 } underlying>> ^^load-constant ] }
-        { longlong-2-rep [ longlong-array{ -1 0 } underlying>> ^^load-constant ] }
+        { float-4-rep    [ float-array{ -0.0  0.0 -0.0  0.0 } underlying>> ^^load-literal ] }
+        { double-2-rep   [ double-array{ -0.0  0.0 } underlying>> ^^load-literal ] }
+        { char-16-rep    [ char-array{ -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-literal ] }
+        { short-8-rep    [ short-array{ -1 0 -1 0 -1 0 -1 0 } underlying>> ^^load-literal ] }
+        { int-4-rep      [ int-array{ -1 0 -1 0 } underlying>> ^^load-literal ] }
+        { longlong-2-rep [ longlong-array{ -1 0 } underlying>> ^^load-literal ] }
+    } case ;
+
+: ^load-half-vector ( rep -- dst )
+    {
+        { float-4-rep  [ float-array{  0.5 0.5 0.5 0.5 } underlying>> ^^load-literal ] }
+        { double-2-rep [ double-array{ 0.5 0.5 }         underlying>> ^^load-literal ] }
     } case ;
 
 : >variable-shuffle ( shuffle rep -- shuffle' )
@@ -64,7 +70,7 @@ IN: compiler.cfg.intrinsics.simd
     '[ _ n*v _ v+ ] map concat ;
 
 : ^load-immediate-shuffle ( shuffle rep -- dst )
-    >variable-shuffle ^^load-constant ;
+    >variable-shuffle ^^load-literal ;
 
 :: ^blend-vector ( mask true false rep -- dst )
     true mask rep ^^and-vector
@@ -112,7 +118,7 @@ IN: compiler.cfg.intrinsics.simd
         [ ^(compare-vector) ]
         [ ^minmax-compare-vector ]
         { unsigned-int-vector-rep [| src1 src2 rep cc |
-            rep sign-bit-mask ^^load-constant :> sign-bits
+            rep sign-bit-mask ^^load-literal :> sign-bits
             src1 sign-bits rep ^^xor-vector
             src2 sign-bits rep ^^xor-vector
             rep signed-rep cc ^(compare-vector)
@@ -152,6 +158,9 @@ IN: compiler.cfg.intrinsics.simd
         ] }
     } v-vector-op ;
 
+PREDICATE: fixnum-vector-rep < int-vector-rep
+    rep-component-type heap-size cell < ;
+
 : ^(sum-vector-2) ( src rep -- dst )
     {
         [ dupd ^^horizontal-add-vector ]
@@ -249,7 +258,7 @@ IN: compiler.cfg.intrinsics.simd
 : ^sum-vector ( src rep -- dst )
     {
         { float-vector-rep [ ^(sum-vector) ] }
-        { int-vector-rep [| src rep |
+        { fixnum-vector-rep [| src rep |
             src rep ^unpack-vector-head :> head
             src rep ^unpack-vector-tail :> tail
             rep widen-vector-rep :> wide-rep
@@ -266,6 +275,26 @@ IN: compiler.cfg.intrinsics.simd
         [ [ ^load-immediate-shuffle ] [ ^^shuffle-vector ] bi ]
     } vl-vector-op ;
 
+: ^shuffle-2-vectors-imm ( src1 src2 shuffle rep -- dst )
+    [ rep-length 0 pad-tail ] keep {
+        { double-2-rep [| src1 src2 shuffle rep |
+            shuffle first2 [ 4 mod ] bi@ :> ( i j )
+            {
+                { [ i j [ 2 < ] both? ] [
+                    src1 shuffle rep ^shuffle-vector-imm
+                ] }
+                { [ i j [ 2 >= ] both? ] [
+                    src2 shuffle [ 2 - ] map rep ^shuffle-vector-imm
+                ] }
+                { [ i 2 < ] [
+                    src1 src2 i j 2 - 2array rep ^^shuffle-vector-halves-imm
+                ] }
+                ! [ j 2 < ]
+                [ src2 src1 i 2 - j 2array rep ^^shuffle-vector-halves-imm ]
+            } cond
+        ] }
+    } vvl-vector-op ;
+
 : ^broadcast-vector ( src n rep -- dst )
     [ rep-length swap <array> ] keep
     ^shuffle-vector-imm ;
@@ -274,7 +303,10 @@ IN: compiler.cfg.intrinsics.simd
     [ ^^scalar>vector ] keep [ 0 ] dip ^broadcast-vector ;
 
 : ^select-vector ( src n rep -- dst )
-    [ ^broadcast-vector ] keep ^^vector>scalar ;
+    {
+        [ ^^select-vector ]
+        [ [ ^broadcast-vector ] keep ^^vector>scalar ]
+    } vl-vector-op ;
 
 ! intrinsic emitters
 
@@ -333,6 +365,16 @@ IN: compiler.cfg.intrinsics.simd
         [ ^^mul-vector ]
     } emit-vv-vector-op ;
 
+: emit-simd-v*high ( node -- )
+    {
+        [ ^^mul-high-vector ]
+    } emit-vv-vector-op ;
+
+: emit-simd-v*hs+ ( node -- )
+    {
+        [ ^^mul-horizontal-add-vector ]
+    } emit-vv-vector-op ;
+
 : emit-simd-v/ ( node -- )
     {
         [ ^^div-vector ]
@@ -356,12 +398,29 @@ IN: compiler.cfg.intrinsics.simd
         ]
     } emit-vv-vector-op ;
 
+: emit-simd-vavg ( node -- )
+    {
+        [ ^^avg-vector ]
+        { float-vector-rep [| src1 src2 rep |
+            src1 src2 rep ^^add-vector
+            rep ^load-half-vector rep ^^mul-vector
+        ] }
+    } emit-vv-vector-op ;
+
 : emit-simd-v. ( node -- )
     {
         [ ^^dot-vector ]
         { float-vector-rep [ [ ^^mul-vector ] [ ^sum-vector ] bi ] }
     } emit-vv-vector-op ;
 
+: emit-simd-vsad ( node -- )
+    {
+        [
+            [ ^^sad-vector dup { 2 3 0 1 } int-4-rep ^^shuffle-vector-imm int-4-rep ^^add-vector ]
+            [ widen-vector-rep ^^vector>scalar ] bi
+        ]
+    } emit-vv-vector-op ;
+
 : emit-simd-vsqrt ( node -- )
     {
         [ ^^sqrt-vector ]
@@ -439,6 +498,11 @@ IN: compiler.cfg.intrinsics.simd
         [ ^shuffle-vector-imm ]
     } [ shuffle? ] emit-vl-vector-op ;
 
+: emit-simd-vshuffle2-elements ( node -- )
+    {
+        [ ^shuffle-2-vectors-imm ]
+    } [ shuffle? ] emit-vvl-vector-op ;
+
 : emit-simd-vshuffle-bytes ( node -- )
     {
         [ ^^shuffle-vector ]
@@ -491,6 +555,10 @@ IN: compiler.cfg.intrinsics.simd
     {
         [ vcc-none ^^test-vector ]
     } emit-v-vector-op ;
+: emit-simd-vgetmask ( node -- )
+    {
+        [ ^^move-vector-mask ]
+    } emit-v-vector-op ;
 
 : emit-simd-v>float ( node -- )
     {
@@ -506,7 +574,12 @@ IN: compiler.cfg.intrinsics.simd
 
 : emit-simd-vpack-signed ( node -- )
     {
-        [ ^^signed-pack-vector ]
+        { double-2-rep [| src1 src2 rep |
+            src1 double-2-rep ^^float-pack-vector :> dst-head
+            src2 double-2-rep ^^float-pack-vector :> dst-tail
+            dst-head dst-tail { 0 1 0 1 } float-4-rep ^^shuffle-vector-halves-imm
+        ] }
+        { int-vector-rep [ ^^signed-pack-vector ] }
     } emit-vv-vector-op ;
 
 : emit-simd-vpack-unsigned ( node -- )
@@ -526,99 +599,112 @@ IN: compiler.cfg.intrinsics.simd
 
 : emit-simd-with ( node -- )
     {
-        [ ^with-vector ]
+        { fixnum-vector-rep [ ^with-vector ] }
+        { float-vector-rep  [ ^with-vector ] }
     } emit-v-vector-op ;
 
 : emit-simd-gather-2 ( node -- )
     {
-        [ ^^gather-vector-2 ]
+        { fixnum-vector-rep [ ^^gather-int-vector-2 ] }
+        { fixnum-vector-rep [ ^^gather-vector-2 ] }
+        { float-vector-rep  [ ^^gather-vector-2 ] }
     } emit-vv-vector-op ;
 
 : emit-simd-gather-4 ( node -- )
     {
-        [ ^^gather-vector-4 ]
+        { fixnum-vector-rep [ ^^gather-int-vector-4 ] }
+        { fixnum-vector-rep [ ^^gather-vector-4 ] }
+        { float-vector-rep  [ ^^gather-vector-4 ] }
     } emit-vvvv-vector-op ;
 
 : emit-simd-select ( node -- )
     {
-        [ ^select-vector ]
+        { fixnum-vector-rep [ ^select-vector ] }
+        { float-vector-rep  [ ^select-vector ] }
     } [ integer? ] emit-vl-vector-op ;
 
 : emit-alien-vector ( node -- )
     dup [
         '[
-            ds-drop prepare-alien-getter
-            _ ^^alien-vector ds-push
+            ds-drop prepare-load-memory
+            _ f ^^load-memory-imm ds-push
         ]
-        [ inline-alien-getter? ] inline-alien
+        [ inline-load-memory? ] inline-accessor
     ] with { [ %alien-vector-reps member? ] } if-literals-match ;
 
 : emit-set-alien-vector ( node -- )
     dup [
         '[
-            ds-drop prepare-alien-setter ds-pop
-            _ ##set-alien-vector
+            ds-drop prepare-store-memory
+            _ f ##store-memory-imm,
         ]
-        [ byte-array inline-alien-setter? ]
-        inline-alien
+        [ byte-array inline-store-memory? ]
+        inline-accessor
     ] with { [ %alien-vector-reps member? ] } if-literals-match ;
 
 : enable-simd ( -- )
     {
-        { (simd-v+)                [ emit-simd-v+                  ] }
-        { (simd-v-)                [ emit-simd-v-                  ] }
-        { (simd-vneg)              [ emit-simd-vneg                ] }
-        { (simd-v+-)               [ emit-simd-v+-                 ] }
-        { (simd-vs+)               [ emit-simd-vs+                 ] }
-        { (simd-vs-)               [ emit-simd-vs-                 ] }
-        { (simd-vs*)               [ emit-simd-vs*                 ] }
-        { (simd-v*)                [ emit-simd-v*                  ] }
-        { (simd-v/)                [ emit-simd-v/                  ] }
-        { (simd-vmin)              [ emit-simd-vmin                ] }
-        { (simd-vmax)              [ emit-simd-vmax                ] }
-        { (simd-v.)                [ emit-simd-v.                  ] }
-        { (simd-vsqrt)             [ emit-simd-vsqrt               ] }
-        { (simd-sum)               [ emit-simd-sum                 ] }
-        { (simd-vabs)              [ emit-simd-vabs                ] }
-        { (simd-vbitand)           [ emit-simd-vand                ] }
-        { (simd-vbitandn)          [ emit-simd-vandn               ] }
-        { (simd-vbitor)            [ emit-simd-vor                 ] }
-        { (simd-vbitxor)           [ emit-simd-vxor                ] }
-        { (simd-vbitnot)           [ emit-simd-vnot                ] }
-        { (simd-vand)              [ emit-simd-vand                ] }
-        { (simd-vandn)             [ emit-simd-vandn               ] }
-        { (simd-vor)               [ emit-simd-vor                 ] }
-        { (simd-vxor)              [ emit-simd-vxor                ] }
-        { (simd-vnot)              [ emit-simd-vnot                ] }
-        { (simd-vlshift)           [ emit-simd-vlshift             ] }
-        { (simd-vrshift)           [ emit-simd-vrshift             ] }
-        { (simd-hlshift)           [ emit-simd-hlshift             ] }
-        { (simd-hrshift)           [ emit-simd-hrshift             ] }
-        { (simd-vshuffle-elements) [ emit-simd-vshuffle-elements   ] }
-        { (simd-vshuffle-bytes)    [ emit-simd-vshuffle-bytes      ] }
-        { (simd-vmerge-head)       [ emit-simd-vmerge-head         ] }
-        { (simd-vmerge-tail)       [ emit-simd-vmerge-tail         ] }
-        { (simd-v<=)               [ emit-simd-v<=                 ] }
-        { (simd-v<)                [ emit-simd-v<                  ] }
-        { (simd-v=)                [ emit-simd-v=                  ] }
-        { (simd-v>)                [ emit-simd-v>                  ] }
-        { (simd-v>=)               [ emit-simd-v>=                 ] }
-        { (simd-vunordered?)       [ emit-simd-vunordered?         ] }
-        { (simd-vany?)             [ emit-simd-vany?               ] }
-        { (simd-vall?)             [ emit-simd-vall?               ] }
-        { (simd-vnone?)            [ emit-simd-vnone?              ] }
-        { (simd-v>float)           [ emit-simd-v>float             ] }
-        { (simd-v>integer)         [ emit-simd-v>integer           ] }
-        { (simd-vpack-signed)      [ emit-simd-vpack-signed        ] }
-        { (simd-vpack-unsigned)    [ emit-simd-vpack-unsigned      ] }
-        { (simd-vunpack-head)      [ emit-simd-vunpack-head        ] }
-        { (simd-vunpack-tail)      [ emit-simd-vunpack-tail        ] }
-        { (simd-with)              [ emit-simd-with                ] }
-        { (simd-gather-2)          [ emit-simd-gather-2            ] }
-        { (simd-gather-4)          [ emit-simd-gather-4            ] }
-        { (simd-select)            [ emit-simd-select              ] }
-        { alien-vector             [ emit-alien-vector             ] }
-        { set-alien-vector         [ emit-set-alien-vector         ] }
+        { (simd-v+)                 [ emit-simd-v+                  ] }
+        { (simd-v-)                 [ emit-simd-v-                  ] }
+        { (simd-vneg)               [ emit-simd-vneg                ] }
+        { (simd-v+-)                [ emit-simd-v+-                 ] }
+        { (simd-vs+)                [ emit-simd-vs+                 ] }
+        { (simd-vs-)                [ emit-simd-vs-                 ] }
+        { (simd-vs*)                [ emit-simd-vs*                 ] }
+        { (simd-v*)                 [ emit-simd-v*                  ] }
+        { (simd-v*high)             [ emit-simd-v*high              ] }
+        { (simd-v*hs+)              [ emit-simd-v*hs+               ] }
+        { (simd-v/)                 [ emit-simd-v/                  ] }
+        { (simd-vmin)               [ emit-simd-vmin                ] }
+        { (simd-vmax)               [ emit-simd-vmax                ] }
+        { (simd-vavg)               [ emit-simd-vavg                ] }
+        { (simd-v.)                 [ emit-simd-v.                  ] }
+        { (simd-vsad)               [ emit-simd-vsad                ] }
+        { (simd-vsqrt)              [ emit-simd-vsqrt               ] }
+        { (simd-sum)                [ emit-simd-sum                 ] }
+        { (simd-vabs)               [ emit-simd-vabs                ] }
+        { (simd-vbitand)            [ emit-simd-vand                ] }
+        { (simd-vbitandn)           [ emit-simd-vandn               ] }
+        { (simd-vbitor)             [ emit-simd-vor                 ] }
+        { (simd-vbitxor)            [ emit-simd-vxor                ] }
+        { (simd-vbitnot)            [ emit-simd-vnot                ] }
+        { (simd-vand)               [ emit-simd-vand                ] }
+        { (simd-vandn)              [ emit-simd-vandn               ] }
+        { (simd-vor)                [ emit-simd-vor                 ] }
+        { (simd-vxor)               [ emit-simd-vxor                ] }
+        { (simd-vnot)               [ emit-simd-vnot                ] }
+        { (simd-vlshift)            [ emit-simd-vlshift             ] }
+        { (simd-vrshift)            [ emit-simd-vrshift             ] }
+        { (simd-hlshift)            [ emit-simd-hlshift             ] }
+        { (simd-hrshift)            [ emit-simd-hrshift             ] }
+        { (simd-vshuffle-elements)  [ emit-simd-vshuffle-elements   ] }
+        { (simd-vshuffle2-elements) [ emit-simd-vshuffle2-elements  ] }
+        { (simd-vshuffle-bytes)     [ emit-simd-vshuffle-bytes      ] }
+        { (simd-vmerge-head)        [ emit-simd-vmerge-head         ] }
+        { (simd-vmerge-tail)        [ emit-simd-vmerge-tail         ] }
+        { (simd-v<=)                [ emit-simd-v<=                 ] }
+        { (simd-v<)                 [ emit-simd-v<                  ] }
+        { (simd-v=)                 [ emit-simd-v=                  ] }
+        { (simd-v>)                 [ emit-simd-v>                  ] }
+        { (simd-v>=)                [ emit-simd-v>=                 ] }
+        { (simd-vunordered?)        [ emit-simd-vunordered?         ] }
+        { (simd-vany?)              [ emit-simd-vany?               ] }
+        { (simd-vall?)              [ emit-simd-vall?               ] }
+        { (simd-vnone?)             [ emit-simd-vnone?              ] }
+        { (simd-v>float)            [ emit-simd-v>float             ] }
+        { (simd-v>integer)          [ emit-simd-v>integer           ] }
+        { (simd-vpack-signed)       [ emit-simd-vpack-signed        ] }
+        { (simd-vpack-unsigned)     [ emit-simd-vpack-unsigned      ] }
+        { (simd-vunpack-head)       [ emit-simd-vunpack-head        ] }
+        { (simd-vunpack-tail)       [ emit-simd-vunpack-tail        ] }
+        { (simd-with)               [ emit-simd-with                ] }
+        { (simd-gather-2)           [ emit-simd-gather-2            ] }
+        { (simd-gather-4)           [ emit-simd-gather-4            ] }
+        { (simd-select)             [ emit-simd-select              ] }
+        { alien-vector              [ emit-alien-vector             ] }
+        { set-alien-vector          [ emit-set-alien-vector         ] }
+        { assert-positive           [ drop                          ] }
+        { (simd-vgetmask)           [ emit-simd-vgetmask            ] }
     } enable-intrinsics ;
 
 enable-simd