! See http://factorcode.org/license.txt for BSD license.
USING: accessors combinators combinators.short-circuit arrays
fry kernel layouts math namespaces sequences cpu.architecture
-math.bitwise math.order classes
-vectors locals make alien.c-types io.binary grouping
+math.bitwise math.order classes generalizations
+combinators.smart locals make alien.c-types io.binary grouping
math.vectors.simd.intrinsics
compiler.cfg
compiler.cfg.registers
[ 2drop f ]
} cond ;
+: scalar-value ( literal-insn rep -- byte-array )
+ {
+ { float-4-rep [ obj>> float>bits 4 >le ] }
+ { double-2-rep [ obj>> double>bits 8 >le ] }
+ [ [ val>> ] [ rep-component-type heap-size ] bi* >le ]
+ } case ;
+
: (fold-scalar>vector) ( insn bytes -- insn' )
[ [ dst>> ] [ rep>> rep-length ] bi ] dip <repetition> concat
\ ##load-reference new-insn ;
: fold-scalar>vector ( outer inner -- insn' )
- obj>> over rep>> {
- { float-4-rep [ float>bits 4 >le (fold-scalar>vector) ] }
- { double-2-rep [ double>bits 8 >le (fold-scalar>vector) ] }
- [ [ untag-fixnum ] dip rep-component-type heap-size >le (fold-scalar>vector) ]
- } case ;
+ over rep>> scalar-value (fold-scalar>vector) ;
M: ##scalar>vector rewrite
dup src>> vreg>insn {
- { [ dup ##load-reference? ] [ fold-scalar>vector ] }
+ { [ dup literal-insn? ] [ fold-scalar>vector ] }
{ [ dup ##vector>scalar? ] [ [ dst>> ] [ src>> ] bi* <copy> ] }
[ 2drop f ]
} cond ;
+:: fold-gather-vector-2 ( insn src1 src2 -- insn )
+ insn dst>>
+ src1 src2 [ insn rep>> scalar-value ] bi@ append
+ \ ##load-reference new-insn ;
+
+: rewrite-gather-vector-2 ( insn -- insn/f )
+ dup [ src1>> vreg>insn ] [ src2>> vreg>insn ] bi {
+ { [ 2dup [ literal-insn? ] both? ] [ fold-gather-vector-2 ] }
+ [ 3drop f ]
+ } cond ;
+
+M: ##gather-vector-2 rewrite rewrite-gather-vector-2 ;
+
+M: ##gather-int-vector-2 rewrite rewrite-gather-vector-2 ;
+
+:: fold-gather-vector-4 ( insn src1 src2 src3 src4 -- insn )
+ insn dst>>
+ [
+ src1 src2 src3 src4
+ [ insn rep>> scalar-value ] 4 napply
+ ] B{ } append-outputs-as
+ \ ##load-reference new-insn ;
+
+: rewrite-gather-vector-4 ( insn -- insn/f )
+ dup { [ src1>> ] [ src2>> ] [ src3>> ] [ src4>> ] } cleave [ vreg>insn ] 4 napply
+ {
+ { [ 4 ndup [ literal-insn? ] 4 napply and and and ] [ fold-gather-vector-4 ] }
+ [ 5 ndrop f ]
+ } cond ;
+
+M: ##gather-vector-4 rewrite rewrite-gather-vector-4 ;
+
+M: ##gather-int-vector-4 rewrite rewrite-gather-vector-4 ;
+
M: ##xor-vector rewrite
dup diagonal?
[ [ dst>> ] [ rep>> ] bi \ ##zero-vector new-insn ] [ drop f ] if ;
[
{
- T{ ##load-reference f 0 $[ 55 tag-fixnum ] }
+ T{ ##load-integer f 0 55 }
T{ ##load-reference f 1 B{ 55 0 0 0 55 0 0 0 55 0 0 0 55 0 0 0 } }
T{ ##load-reference f 2 B{ 55 0 0 0 55 0 0 0 55 0 0 0 55 0 0 0 } }
}
] [
{
- T{ ##load-reference f 0 $[ 55 tag-fixnum ] }
+ T{ ##load-integer f 0 55 }
T{ ##scalar>vector f 1 0 int-4-rep }
T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep }
} value-numbering-step
} value-numbering-step
] unit-test
+[
+ {
+ T{ ##load-reference f 0 1.25 }
+ T{ ##load-reference f 1 B{ 0 0 160 63 0 0 160 63 0 0 160 63 0 0 160 63 } }
+ T{ ##load-reference f 2 B{ 0 0 160 63 0 0 160 63 0 0 160 63 0 0 160 63 } }
+ }
+] [
+ {
+ T{ ##load-reference f 0 1.25 }
+ T{ ##scalar>vector f 1 0 float-4-rep }
+ T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep }
+ } value-numbering-step
+] unit-test
+
+[
+ {
+ T{ ##load-reference f 0 1.25 }
+ T{ ##load-reference f 2 3.75 }
+ T{ ##load-reference f 4 B{ 0 0 0 0 0 0 244 63 0 0 0 0 0 0 14 64 } }
+ }
+] [
+ {
+ T{ ##load-reference f 0 1.25 }
+ T{ ##load-reference f 2 3.75 }
+ T{ ##gather-vector-2 f 4 0 2 double-2-rep }
+ } value-numbering-step
+] unit-test
+
+[
+ {
+ T{ ##load-integer f 0 125 }
+ T{ ##load-integer f 2 375 }
+ T{ ##load-reference f 4 B{ 125 0 0 0 0 0 0 0 119 1 0 0 0 0 0 0 } }
+ }
+] [
+ {
+ T{ ##load-integer f 0 125 }
+ T{ ##load-integer f 2 375 }
+ T{ ##gather-vector-2 f 4 0 2 longlong-2-rep }
+ } value-numbering-step
+] unit-test
+
+[
+ {
+ T{ ##load-reference f 0 1.25 }
+ T{ ##load-reference f 1 2.50 }
+ T{ ##load-reference f 2 3.75 }
+ T{ ##load-reference f 3 5.00 }
+ T{ ##load-reference f 4 B{ 0 0 160 63 0 0 32 64 0 0 112 64 0 0 160 64 } }
+ }
+] [
+ {
+ T{ ##load-reference f 0 1.25 }
+ T{ ##load-reference f 1 2.50 }
+ T{ ##load-reference f 2 3.75 }
+ T{ ##load-reference f 3 5.00 }
+ T{ ##gather-vector-4 f 4 0 1 2 3 float-4-rep }
+ } value-numbering-step
+] unit-test
+
+[
+ {
+ T{ ##load-integer f 0 125 }
+ T{ ##load-integer f 1 250 }
+ T{ ##load-integer f 2 375 }
+ T{ ##load-integer f 3 500 }
+ T{ ##load-reference f 4 B{ 125 0 0 0 250 0 0 0 119 1 0 0 244 1 0 0 } }
+ }
+] [
+ {
+ T{ ##load-integer f 0 125 }
+ T{ ##load-integer f 1 250 }
+ T{ ##load-integer f 2 375 }
+ T{ ##load-integer f 3 500 }
+ T{ ##gather-vector-4 f 4 0 1 2 3 int-4-rep }
+ } value-numbering-step
+] unit-test
+
[
{
T{ ##zero-vector f 2 float-4-rep }