: little-endian? ( -- ? ) 1 <int> *char 1 = ; foldable
- GENERIC: heap-size ( name -- size ) foldable
+ GENERIC: heap-size ( name -- size )
M: c-type-name heap-size c-type heap-size ;
M: abstract-c-type heap-size size>> ;
- GENERIC: stack-size ( name -- size ) foldable
+ GENERIC: stack-size ( name -- size )
M: c-type-name stack-size c-type stack-size ;
M: long-long-type box-return ( c-type -- )
f swap box-parameter ;
- : define-deref ( name -- )
- [ CHAR: * prefix "alien.c-types" create ] [ c-getter 0 prefix ] bi
+ : define-deref ( c-type -- )
+ [ name>> CHAR: * prefix "alien.c-types" create ] [ c-getter 0 prefix ] bi
(( c-ptr -- value )) define-inline ;
- : define-out ( name -- )
- [ "alien.c-types" constructor-word ]
+ : define-out ( c-type -- )
+ [ name>> "alien.c-types" constructor-word ]
[ dup c-setter '[ _ heap-size (byte-array) [ 0 @ ] keep ] ] bi
(( value -- c-ptr )) define-inline ;
: define-primitive-type ( c-type name -- )
- [ typedef ]
- [ name>> define-deref ]
- [ name>> define-out ]
- tri ;
+ [ typedef ] [ define-deref ] [ define-out ] tri ;
: if-void ( c-type true false -- )
pick void? [ drop nip call ] [ nip call ] if ; inline
{ [ dup { uchar ushort uint ulong ulonglong } member-eq? ] [ unsigned-interval ] }
} cond ; foldable
-: c-type-clamp ( value c-type -- value' ) c-type-interval clamp ; inline
+: c-type-clamp ( value c-type -- value' )
+ dup { float double } member-eq?
+ [ drop ] [ c-type-interval clamp ] if ; inline
\ cleave [ ] 2sequence
\ output>array [ ] 2sequence ;
-: define-inline-method ( class generic quot -- )
- [ create-method-in ] dip [ define ] [ drop make-inline ] 2bi ;
-
: (define-struct-slot-values-method) ( class -- )
[ \ struct-slot-values ] [ struct-slot-values-quot ] bi
define-inline-method ;
slots empty? [ struct-must-have-slots ] when
class redefine-struct-tuple-class
slots make-slots dup check-struct-slots :> slot-specs
+ slot-specs offsets-quot call :> unaligned-size
slot-specs struct-alignment :> alignment
- slot-specs offsets-quot call alignment align :> size
+ unaligned-size alignment align :> size
class slot-specs size alignment c-type-for-class :> c-type
end-stack-analysis
] with-scope ; inline
+: with-dummy-cfg-builder ( node quot -- )
+ [
+ [ V{ } clone procedures ] 2dip
+ '[ _ t t [ _ call( node -- ) ] with-cfg-builder ] with-variable
+ ] { } make drop ;
+
GENERIC: emit-node ( node -- )
: emit-nodes ( nodes -- )
! See http://factorcode.org/license.txt for BSD license.
USING: accessors combinators combinators.short-circuit arrays
fry kernel layouts math namespaces sequences cpu.architecture
-math.bitwise math.order math.vectors.simd.intrinsics classes
+math.bitwise math.order classes
vectors locals make alien.c-types io.binary grouping
compiler.cfg
compiler.cfg.registers
] [ drop f ] if ; inline
: general-compare-expr? ( insn -- ? )
+ {
+ [ compare-expr? ]
+ [ compare-imm-expr? ]
+ [ compare-float-unordered-expr? ]
+ [ compare-float-ordered-expr? ]
+ } 1|| ;
+
+: general-or-vector-compare-expr? ( insn -- ? )
{
[ compare-expr? ]
[ compare-imm-expr? ]
: rewrite-boolean-comparison? ( insn -- ? )
dup ##branch-t? [
- src1>> vreg>expr general-compare-expr?
+ src1>> vreg>expr general-or-vector-compare-expr?
] [ drop f ] if ; inline
: >compare-expr< ( expr -- in1 in2 cc )
M: ##alien-signed-4 rewrite rewrite-alien-addressing ;
M: ##alien-float rewrite rewrite-alien-addressing ;
M: ##alien-double rewrite rewrite-alien-addressing ;
-M: ##alien-vector rewrite rewrite-alien-addressing ;
M: ##set-alien-integer-1 rewrite rewrite-alien-addressing ;
M: ##set-alien-integer-2 rewrite rewrite-alien-addressing ;
M: ##set-alien-integer-4 rewrite rewrite-alien-addressing ;
M: ##set-alien-float rewrite rewrite-alien-addressing ;
M: ##set-alien-double rewrite rewrite-alien-addressing ;
-M: ##set-alien-vector rewrite rewrite-alien-addressing ;
-
-! Some lame constant folding for SIMD intrinsics. Eventually this
-! should be redone completely.
-
-: rewrite-shuffle-vector-imm ( insn expr -- insn' )
- 2dup [ rep>> ] bi@ eq? [
- [ [ dst>> ] [ src>> vn>vreg ] bi* ]
- [ [ shuffle>> ] bi@ nths ]
- [ drop rep>> ]
- 2tri \ ##shuffle-vector-imm new-insn
- ] [ 2drop f ] if ;
-
-: (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' )
- 2dup length swap length /i group nths concat ;
-
-: fold-shuffle-vector-imm ( insn expr -- insn' )
- [ [ dst>> ] [ shuffle>> ] bi ] dip value>>
- (fold-shuffle-vector-imm) \ ##load-constant new-insn ;
-
-M: ##shuffle-vector-imm rewrite
- dup src>> vreg>expr {
- { [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] }
- { [ dup reference-expr? ] [ fold-shuffle-vector-imm ] }
- { [ dup constant-expr? ] [ fold-shuffle-vector-imm ] }
- [ 2drop f ]
- } cond ;
-
-: (fold-scalar>vector) ( insn bytes -- insn' )
- [ [ dst>> ] [ rep>> rep-components ] bi ] dip <repetition> concat
- \ ##load-constant new-insn ;
-: fold-scalar>vector ( insn expr -- insn' )
- value>> over rep>> {
- { float-4-rep [ float>bits 4 >le (fold-scalar>vector) ] }
- { double-2-rep [ double>bits 8 >le (fold-scalar>vector) ] }
- [ [ untag-fixnum ] dip rep-component-type heap-size >le (fold-scalar>vector) ]
- } case ;
-
-M: ##scalar>vector rewrite
- dup src>> vreg>expr dup constant-expr?
- [ fold-scalar>vector ] [ 2drop f ] if ;
-
-M: ##xor-vector rewrite
- dup [ src1>> vreg>vn ] [ src2>> vreg>vn ] bi eq?
- [ [ dst>> ] [ rep>> ] bi \ ##zero-vector new-insn ] [ drop f ] if ;
-
-: vector-not? ( expr -- ? )
- {
- [ not-vector-expr? ]
- [ {
- [ xor-vector-expr? ]
- [ [ src1>> ] [ src2>> ] bi [ vn>expr fill-vector-expr? ] either? ]
- } 1&& ]
- } 1|| ;
-
-GENERIC: vector-not-src ( expr -- vreg )
-M: not-vector-expr vector-not-src src>> vn>vreg ;
-M: xor-vector-expr vector-not-src
- dup src1>> vn>expr fill-vector-expr? [ src2>> ] [ src1>> ] if vn>vreg ;
-
-M: ##and-vector rewrite
- {
- { [ dup src1>> vreg>expr vector-not? ] [
- {
- [ dst>> ]
- [ src1>> vreg>expr vector-not-src ]
- [ src2>> ]
- [ rep>> ]
- } cleave \ ##andn-vector new-insn
- ] }
- { [ dup src2>> vreg>expr vector-not? ] [
- {
- [ dst>> ]
- [ src2>> vreg>expr vector-not-src ]
- [ src1>> ]
- [ rep>> ]
- } cleave \ ##andn-vector new-insn
- ] }
- [ drop f ]
- } cond ;
-
-M: ##andn-vector rewrite
- dup src1>> vreg>expr vector-not? [
- {
- [ dst>> ]
- [ src1>> vreg>expr vector-not-src ]
- [ src2>> ]
- [ rep>> ]
- } cleave \ ##and-vector new-insn
- ] [ drop f ] if ;
} available-reps ;
! M:: x86 %broadcast-vector ( dst src rep -- )
-! rep unsign-rep {
+! rep signed-rep {
! { float-4-rep [
! dst src float-4-rep %copy
! dst dst { 0 0 0 0 } SHUFPS
! } available-reps ;
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
- rep unsign-rep {
+ rep signed-rep {
{ float-4-rep [
dst src1 float-4-rep %copy
dst src2 UNPCKLPS
} available-reps ;
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
- rep unsign-rep {
+ rep signed-rep {
{ double-2-rep [
dst src1 double-2-rep %copy
dst src2 MOVLHPS
M:: x86 %shuffle-vector-imm ( dst src shuffle rep -- )
dst src rep %copy
- dst shuffle rep unsign-rep {
+ dst shuffle rep signed-rep {
{ double-2-rep [ >float-4-shuffle float-4-shuffle ] }
{ float-4-rep [ float-4-shuffle ] }
{ int-4-rep [ int-4-shuffle ] }
M: x86 %merge-vector-head
[ two-operand ] keep
- unsign-rep {
+ signed-rep {
{ double-2-rep [ MOVLHPS ] }
{ float-4-rep [ UNPCKLPS ] }
{ longlong-2-rep [ PUNPCKLQDQ ] }
M: x86 %merge-vector-tail
[ two-operand ] keep
- unsign-rep {
+ signed-rep {
{ double-2-rep [ UNPCKHPD ] }
{ float-4-rep [ UNPCKHPS ] }
{ longlong-2-rep [ PUNPCKHQDQ ] }
M: x86 %unsigned-pack-vector
[ two-operand ] keep
- unsign-rep {
+ signed-rep {
{ int-4-rep [ PACKUSDW ] }
{ short-8-rep [ PACKUSWB ] }
} case ;
} case ;
:: (%compare-int-vector) ( dst src rep int64 int32 int16 int8 -- )
- rep unsign-rep :> rep'
+ rep signed-rep :> rep'
dst src rep' {
{ longlong-2-rep [ int64 call ] }
{ int-4-rep [ int32 call ] }
M: x86 %dot-vector
[ two-operand ] keep
{
- { float-4-rep [
- sse4.1?
- [ HEX: ff DPPS ]
- [ [ MULPS ] [ drop dup float-4-rep %horizontal-add-vector ] 2bi ]
- if
- ] }
- { double-2-rep [
- sse4.1?
- [ HEX: ff DPPD ]
- [ [ MULPD ] [ drop dup double-2-rep %horizontal-add-vector ] 2bi ]
- if
- ] }
+ { float-4-rep [ HEX: ff DPPS ] }
+ { double-2-rep [ HEX: ff DPPD ] }
} case ;
M: x86 %dot-vector-reps
{
- { sse3? { float-4-rep double-2-rep } }
+ { sse4.1? { float-4-rep double-2-rep } }
} available-reps ;
-M: x86 %horizontal-add-vector ( dst src rep -- )
- {
- { float-4-rep [ [ float-4-rep %copy ] [ HADDPS ] [ HADDPS ] 2tri ] }
- { double-2-rep [ [ double-2-rep %copy ] [ HADDPD ] 2bi ] }
+M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
+ [ two-operand ] keep
+ signed-rep {
+ { float-4-rep [ HADDPS ] }
+ { double-2-rep [ HADDPD ] }
+ { int-4-rep [ PHADDD ] }
+ { short-8-rep [ PHADDW ] }
} case ;
M: x86 %horizontal-add-vector-reps
{
{ sse3? { float-4-rep double-2-rep } }
+ { ssse3? { int-4-rep uint-4-rep short-8-rep ushort-8-rep } }
} available-reps ;
M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )
M: x86 %horizontal-shl-vector-imm-reps
{
- { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
} available-reps ;
M: x86 %horizontal-shr-vector-imm ( dst src1 src2 rep -- )
M: x86 %horizontal-shr-vector-imm-reps
{
- { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
+ { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
} available-reps ;
M: x86 %abs-vector ( dst src rep -- )
M: x86 %integer>scalar drop MOVD ;
+! XXX the longlong versions won't work on x86.32
M:: x86 %scalar>integer ( dst src rep -- )
rep {
+ { longlong-scalar-rep [
+ dst src MOVD
+ ] }
+ { ulonglong-scalar-rep [
+ dst src MOVD
+ ] }
{ int-scalar-rep [
dst 32-bit-version-of src MOVD
dst dst 32-bit-version-of
#! set up by the caller.
stack-frame get total-size>> + stack@ ;
-enable-simd
enable-min/max
enable-fixnum-log2
flush
1 exit
] when
- ] "cpu.x86" add-init-hook ;
+ ] "cpu.x86" add-startup-hook ;
: enable-sse2 ( version -- )
20 >= [
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien alien.c-types alien.data alien.parser
assocs byte-arrays classes compiler.units functors kernel lexer
-libc math math.vectors math.vectors.private
-math.vectors.specialization namespaces
+libc math math.vectors math.vectors.private namespaces
parser prettyprint.custom sequences sequences.private strings
summary vocabs vocabs.loader vocabs.parser vocabs.generated
words fry combinators make ;
[ drop \ T bad-byte-array-length ] unless
<direct-A> ; inline
-M: A new-underlying drop byte-array>A ;
-
M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline
M: A length length>> ; inline
M: A byte-length length \ T heap-size * ; inline
-M: A element-type drop \ T ; inline
-
M: A direct-array-syntax drop \ A@ ;
M: A pprint-delims drop \ A{ \ } ;
INSTANCE: A specialized-array
-A T c-type-boxed-class f specialize-vector-words
-
;FUNCTOR
GENERIC: (underlying-type) ( c-type -- c-type' )
: trim ( seq quot -- newseq )
[ trim-slice ] [ drop ] 2bi like ; inline
-: sum ( seq -- n ) 0 [ + ] binary-reduce ;
+GENERIC: sum ( seq -- n )
+M: object sum 0 [ + ] binary-reduce ; inline
: product ( seq -- n ) 1 [ * ] binary-reduce ;
FROM: alien.c-types => float ;
SPECIALIZED-ARRAY: float
SPECIALIZED-VECTOR: uint
-SIMD: float
IN: gpu.demos.bunny
GLSL-SHADER-FILE: bunny-vertex-shader vertex-shader "bunny.v.glsl"
[ sobel>> framebuffer>> ] [ dim>> ] bi resize-framebuffer ;
M: bunny-world pref-dim* drop { 1024 768 } ;
- M: bunny-world tick-length drop 1000 30 /i ;
+ M: bunny-world tick-length drop 1000000 30 /i ;
M: bunny-world wasd-movement-speed drop 1/160. ;
M: bunny-world wasd-near-plane drop 1/32. ;
M: bunny-world wasd-far-plane drop 256.0 ;
destructors grid-meshes math.vectors.simd ;
QUALIFIED-WITH: alien.c-types c
SPECIALIZED-ARRAY: c:float
-SIMD: c:float
IN: terrain
CONSTANT: FOV $[ 2.0 sqrt 1 + ]
VELOCITY-MODIFIER-NORMAL >>velocity-modifier ;
M: terrain-world tick-length
- drop 1000 30 /i ;
+ drop 1000000 30 /i ;
: frustum ( dim -- -x x -y y near far )
dup first2 min v/n