use: src1 src2
literal: rep ;
+PURE-INSN: ##shl-vector
+def: dst
+use: src1 src2/scalar-rep
+literal: rep ;
+
+PURE-INSN: ##shr-vector
+def: dst
+use: src1 src2/scalar-rep
+literal: rep ;
+
+! Scalar/integer conversion
+PURE-INSN: ##scalar>integer
+def: dst/int-rep
+use: src
+literal: rep ;
+
+PURE-INSN: ##integer>scalar
+def: dst
+use: src/int-rep
+literal: rep ;
+
! Boxing and unboxing aliens
PURE-INSN: ##box-alien
def: dst/int-rep
{ math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
+ { math.vectors.simd.intrinsics:(simd-v<<) [ [ ^^shl-vector ] emit-binary-vector-op ] }
+ { math.vectors.simd.intrinsics:(simd-v>>) [ [ ^^shr-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
USING: kernel accessors sequences arrays fry namespaces generic
words sets combinators generalizations cpu.architecture compiler.units
compiler.cfg.utilities compiler.cfg compiler.cfg.rpo
-compiler.cfg.instructions compiler.cfg.instructions.syntax
-compiler.cfg.def-use ;
+compiler.cfg.instructions compiler.cfg.def-use ;
+FROM: compiler.cfg.instructions.syntax => insn-def-slot insn-use-slots insn-temp-slots scalar-rep ;
IN: compiler.cfg.representations.preferred
GENERIC: defs-vreg-rep ( insn -- rep/f )
! Copyright (C) 2009 Slava Pestov
! See http://factorcode.org/license.txt for BSD license.
USING: kernel fry accessors sequences assocs sets namespaces
-arrays combinators make locals deques dlists
+arrays combinators make locals deques dlists layouts
cpu.architecture compiler.utilities
compiler.cfg
compiler.cfg.rpo
GENERIC: emit-box ( dst src rep -- )
GENERIC: emit-unbox ( dst src rep -- )
-M: float-rep emit-box
- drop
- [ double-rep next-vreg-rep dup ] dip ##single>double-float
- int-rep next-vreg-rep ##box-float ;
+M:: float-rep emit-box ( dst src rep -- )
+ double-rep next-vreg-rep :> temp
+ temp src ##single>double-float
+ dst temp int-rep next-vreg-rep ##box-float ;
-M: float-rep emit-unbox
- drop
- [ double-rep next-vreg-rep dup ] dip ##unbox-float
- ##double>single-float ;
+M:: float-rep emit-unbox ( dst src rep -- )
+ double-rep next-vreg-rep :> temp
+ temp src ##unbox-float
+ dst temp ##double>single-float ;
M: double-rep emit-box
- drop
- int-rep next-vreg-rep ##box-float ;
+ drop int-rep next-vreg-rep ##box-float ;
M: double-rep emit-unbox
drop ##unbox-float ;
M: vector-rep emit-unbox
##unbox-vector ;
+M:: scalar-rep emit-box ( dst src rep -- )
+ int-rep next-vreg-rep :> temp
+ temp src rep ##scalar>integer
+ dst temp tag-bits get ##shl-imm ;
+
+M:: scalar-rep emit-unbox ( dst src rep -- )
+ int-rep next-vreg-rep :> temp
+ temp src tag-bits get ##sar-imm
+ dst temp rep ##integer>scalar ;
+
: emit-conversion ( dst src dst-rep src-rep -- )
{
{ [ 2dup eq? ] [ drop ##copy ] }
##max-vector
##and-vector
##or-vector
- ##xor-vector ;
+ ##xor-vector
+ ##shl-vector
+ ##shr-vector ;
GENERIC: convert-two-operand* ( insn -- )
CODEGEN: ##and-vector %and-vector
CODEGEN: ##or-vector %or-vector
CODEGEN: ##xor-vector %xor-vector
+CODEGEN: ##shl-vector %shl-vector
+CODEGEN: ##shr-vector %shr-vector
+CODEGEN: ##integer>scalar %integer>scalar
+CODEGEN: ##scalar>integer %scalar>integer
CODEGEN: ##box-alien %box-alien
CODEGEN: ##box-displaced-alien %box-displaced-alien
CODEGEN: ##unbox-alien %unbox-alien
(simd-vbitand)
(simd-vbitor)
(simd-vbitxor)
+ (simd-v<<)
+ (simd-v>>)
(simd-broadcast)
(simd-gather-2)
(simd-gather-4)
literal>> scalar-rep-of {
{ float-rep [ float ] }
{ double-rep [ float ] }
- { int-rep [ integer ] }
+ [ integer ]
} case
] [ drop real ] if
<class-info>
longlong-2-rep
ulonglong-2-rep ;
+! Scalar values in the high component of a vector register
+SINGLETONS:
+char-scalar-rep
+uchar-scalar-rep
+short-scalar-rep
+ushort-scalar-rep
+int-scalar-rep
+uint-scalar-rep
+longlong-scalar-rep
+ulonglong-scalar-rep ;
+
SINGLETONS:
float-4-rep
double-2-rep ;
longlong-2-rep
ulonglong-2-rep ;
+UNION: scalar-rep
+char-scalar-rep
+uchar-scalar-rep
+short-scalar-rep
+ushort-scalar-rep
+int-scalar-rep
+uint-scalar-rep
+longlong-scalar-rep
+ulonglong-scalar-rep ;
+
UNION: float-vector-rep
float-4-rep
double-2-rep ;
int-rep
float-rep
double-rep
-vector-rep ;
+vector-rep
+scalar-rep ;
! Register classes
SINGLETONS: int-regs float-regs ;
! A pseudo-register class for parameters spilled on the stack
SINGLETON: stack-params
+! On x86, vectors and floats are stored in the same register bank
+! On PowerPC they are distinct
+HOOK: vector-regs cpu ( -- reg-class )
+
GENERIC: reg-class-of ( rep -- reg-class )
M: tagged-rep reg-class-of drop int-regs ;
M: int-rep reg-class-of drop int-regs ;
M: float-rep reg-class-of drop float-regs ;
M: double-rep reg-class-of drop float-regs ;
-M: vector-rep reg-class-of drop float-regs ;
+M: vector-rep reg-class-of drop vector-regs ;
+M: scalar-rep reg-class-of drop vector-regs ;
M: stack-params reg-class-of drop stack-params ;
GENERIC: rep-size ( rep -- n ) foldable
M: float-4-rep scalar-rep-of drop float-rep ;
M: double-2-rep scalar-rep-of drop double-rep ;
-M: int-vector-rep scalar-rep-of drop int-rep ;
+M: char-16-rep scalar-rep-of drop char-scalar-rep ;
+M: uchar-16-rep scalar-rep-of drop uchar-scalar-rep ;
+M: short-8-rep scalar-rep-of drop short-scalar-rep ;
+M: ushort-8-rep scalar-rep-of drop ushort-scalar-rep ;
+M: int-4-rep scalar-rep-of drop int-scalar-rep ;
+M: uint-4-rep scalar-rep-of drop uint-scalar-rep ;
+M: longlong-2-rep scalar-rep-of drop longlong-scalar-rep ;
+M: ulonglong-2-rep scalar-rep-of drop ulonglong-scalar-rep ;
! Mapping from register class to machine registers
HOOK: machine-registers cpu ( -- assoc )
HOOK: %and-vector cpu ( dst src1 src2 rep -- )
HOOK: %or-vector cpu ( dst src1 src2 rep -- )
HOOK: %xor-vector cpu ( dst src1 src2 rep -- )
+HOOK: %shl-vector cpu ( dst src1 src2 rep -- )
+HOOK: %shr-vector cpu ( dst src1 src2 rep -- )
+
+HOOK: %integer>scalar cpu ( dst src rep -- )
+HOOK: %scalar>integer cpu ( dst src rep -- )
HOOK: %broadcast-vector-reps cpu ( -- reps )
HOOK: %gather-vector-2-reps cpu ( -- reps )
HOOK: %and-vector-reps cpu ( -- reps )
HOOK: %or-vector-reps cpu ( -- reps )
HOOK: %xor-vector-reps cpu ( -- reps )
+HOOK: %shl-vector-reps cpu ( -- reps )
+HOOK: %shr-vector-reps cpu ( -- reps )
HOOK: %unbox-alien cpu ( dst src -- )
HOOK: %unbox-any-c-ptr cpu ( dst src temp -- )
M: x86 two-operand? t ;
+M: x86 vector-regs float-regs ;
+
HOOK: stack-reg cpu ( -- reg )
HOOK: reserved-area-size cpu ( -- n )
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
+M: x86 %shl-vector ( dst src1 src2 rep -- )
+ {
+ { short-8-rep [ PSLLW ] }
+ { ushort-8-rep [ PSLLW ] }
+ { int-4-rep [ PSLLD ] }
+ { uint-4-rep [ PSLLD ] }
+ { longlong-2-rep [ PSLLQ ] }
+ { ulonglong-2-rep [ PSLLQ ] }
+ } case drop ;
+
+M: x86 %shl-vector-reps
+ {
+ { sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ } available-reps ;
+
+M: x86 %shr-vector ( dst src1 src2 rep -- )
+ {
+ { short-8-rep [ PSRAW ] }
+ { ushort-8-rep [ PSRLW ] }
+ { int-4-rep [ PSRAD ] }
+ { uint-4-rep [ PSRLD ] }
+ { ulonglong-2-rep [ PSRLQ ] }
+ } case drop ;
+
+M: x86 %shr-vector-reps
+ {
+ { sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep ulonglong-2-rep } }
+ } available-reps ;
+
+M: x86 %integer>scalar drop MOVD ;
+
+M: x86 %scalar>integer drop MOVD ;
+
M: x86 %unbox-alien ( dst src -- )
alien-offset [+] MOV ;
} append
] when ;
-:: simd-vector-words ( class ctor rep vv->v v->v v->n -- )
+:: simd-vector-words ( class ctor rep vv->v vn->v v->v v->n -- )
rep rep-component-type c-type-boxed-class :> elt-class
class
elt-class
{
{ { +vector+ +vector+ -> +vector+ } vv->v }
+ { { +vector+ +scalar+ -> +vector+ } vn->v }
{ { +vector+ -> +vector+ } v->v }
{ { +vector+ -> +scalar+ } v->n }
{ { +vector+ -> +nonnegative+ } v->n }
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
+A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
: A-vv->v-op ( v1 v2 quot -- v3 )
[ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline
+: A-vn->v-op ( v1 v2 quot -- v3 )
+ [ [ underlying>> ] dip A-rep ] dip call \ A boa ; inline
+
: A-v->v-op ( v1 quot -- v2 )
[ underlying>> A-rep ] dip call \ A boa ; inline
: A-v->n-op ( v quot -- n )
[ underlying>> A-rep ] dip call ; inline
-\ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
+\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
\ A \ A-rep define-simd-128-type
PRIVATE>
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
+A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op
A-v->v-op DEFINES-PRIVATE ${A}-v->v-op
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
[ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi
\ A boa ; inline
+: A-vn->v-op ( v1 v2 quot -- v3 )
+ [ [ [ underlying1>> ] dip A-rep ] dip call ]
+ [ [ [ underlying2>> ] dip A-rep ] dip call ] 3bi
+ \ A boa ; inline
+
: A-v->v-op ( v1 combine-quot -- v2 )
[ [ underlying1>> A-rep ] dip call ]
[ [ underlying2>> A-rep ] dip call ] 2bi
: A-v->n-op ( v1 combine-quot -- v2 )
[ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline
-\ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
+\ A \ A-with \ A-rep \ A-vv->v-op \ A-vn->v-op \ A-v->v-op \ A-v->n-op simd-vector-words
\ A \ A-rep define-simd-256-type
;FUNCTOR
SIMD-OP: vbitand
SIMD-OP: vbitor
SIMD-OP: vbitxor
+SIMD-OP: v<<
+SIMD-OP: v>>
: (simd-broadcast) ( x rep -- v ) bad-simd-call ;
: (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
{ \ (simd-vbitand) [ %and-vector-reps ] }
{ \ (simd-vbitor) [ %or-vector-reps ] }
{ \ (simd-vbitxor) [ %xor-vector-reps ] }
+ { \ (simd-v<<) [ %shl-vector-reps ] }
+ { \ (simd-v>>) [ %shr-vector-reps ] }
{ \ (simd-broadcast) [ %broadcast-vector-reps ] }
{ \ (simd-gather-2) [ %gather-vector-2-reps ] }
{ \ (simd-gather-4) [ %gather-vector-4-reps ] }
: remove-float-words ( alist -- alist' )
[ drop { vsqrt n/v v/n v/ normalize } member? not ] assoc-filter ;
+: remove-integer-words ( alist -- alist' )
+ [ drop { v<< v>> } member? not ] assoc-filter ;
+
: ops-to-check ( elt-class -- alist )
[ vector-words >alist ] dip
- float = [ remove-float-words ] unless ;
+ float = [ remove-integer-words ] [ remove-float-words ] if ;
: check-vector-ops ( class elt-class compare-quot -- )
[
simd-classes [
{
{ [ dup name>> "float" head? ] [ float [ approx= ] ] }
- { [ dup name>> "double" tail? ] [ float [ = ] ] }
+ { [ dup name>> "double" head? ] [ float [ = ] ] }
[ fixnum [ = ] ]
} cond 3array
] map ;
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: alien.c-types words kernel make sequences effects
-kernel.private accessors combinators math math.intervals
-math.vectors namespaces assocs fry splitting classes.algebra
-generalizations locals compiler.tree.propagation.info ;
+USING: words kernel make sequences effects sets kernel.private
+accessors combinators math math.intervals math.vectors
+namespaces assocs fry splitting classes.algebra generalizations
+locals compiler.tree.propagation.info ;
IN: math.vectors.specialization
SYMBOLS: -> +vector+ +scalar+ +nonnegative+ ;
{ vbitand { +vector+ +vector+ -> +vector+ } }
{ vbitor { +vector+ +vector+ -> +vector+ } }
{ vbitxor { +vector+ +vector+ -> +vector+ } }
+ { v>> { +vector+ +scalar+ -> +vector+ } }
+ { v<< { +vector+ +scalar+ -> +vector+ } }
}
PREDICATE: vector-word < word vector-words key? ;
:: input-signature ( word array-type elt-type -- signature )
array-type elt-type word word-schema inputs signature-for-schema ;
+: vector-words-for-type ( elt-type -- alist )
+ {
+ ! Can't do shifts on floats
+ { [ dup float class<= ] [ vector-words keys { v<< v>> } diff ] }
+ ! Can't divide integers
+ { [ dup integer class<= ] [ vector-words keys { vsqrt n/v v/n v/ normalize } diff ] }
+ [ { } ]
+ } cond nip ;
+
:: specialize-vector-words ( array-type elt-type simd -- )
- elt-type number class<= [
- vector-words keys [
- [ array-type elt-type simd specialize-vector-word ]
- [ array-type elt-type input-signature ]
- [ ]
- tri add-specialization
- ] each
- ] when ;
+ elt-type vector-words-for-type [
+ [ array-type elt-type simd specialize-vector-word ]
+ [ array-type elt-type input-signature ]
+ [ ]
+ tri add-specialization
+ ] each ;
: find-specialization ( classes word -- word/f )
specializations
{ $subsection vbitand }
{ $subsection vbitor }
{ $subsection vbitxor }
+{ $subsection v<< }
+{ $subsection v>> }
"Inner product and norm:"
{ $subsection v. }
{ $subsection norm }
{ $description "Takes the bitwise exclusive or of " { $snippet "u" } " and " { $snippet "v" } " component-wise." }
{ $notes "Unlike " { $link bitxor } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ;
+HELP: v<<
+{ $values { "u" "a sequence of integers" } { "n" "a non-negative integer" } { "w" "a sequence of integers" } }
+{ $description "Shifts each element of " { $snippet "u" } " to the left by " { $snippet "n" } " bits." } ;
+
+HELP: v>>
+{ $values { "u" "a sequence of integers" } { "n" "a non-negative integer" } { "w" "a sequence of integers" } }
+{ $description "Shifts each element of " { $snippet "u" } " to the right by " { $snippet "n" } " bits." } ;
+
HELP: norm-sq
{ $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
{ $description "Computes the squared length of a mathematical vector." } ;
: vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ;
: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ;
+: v<< ( u n -- w ) '[ _ shift ] map ;
+: v>> ( u n -- w ) neg '[ _ shift ] map ;
+
: vfloor ( u -- v ) [ floor ] map ;
: vceiling ( u -- v ) [ ceiling ] map ;
: vtruncate ( u -- v ) [ truncate ] map ;