! (c)2009, 2010 Slava Pestov, Joe Groff bsd license
-USING: accessors alien alien.c-types alien.arrays alien.strings arrays
-byte-arrays cpu.architecture fry io io.encodings.binary
-io.files io.streams.memory kernel libc math sequences words
-byte-vectors ;
+USING: accessors alien alien.c-types alien.arrays alien.strings
+arrays byte-arrays cpu.architecture fry io io.encodings.binary
+io.files io.streams.memory kernel libc math sequences words ;
IN: alien.data
GENERIC: require-c-array ( c-type -- )
swap memory>byte-array
] [ [ + ] change-index drop ] 2bi ;
-M: byte-vector stream-write
- [ dup byte-length tail-slice ]
- [ [ [ byte-length ] bi@ + ] keep lengthen ]
- [ drop byte-length ]
- 2tri
- [ >c-ptr swap >c-ptr ] dip memcpy ;
-
M: value-type c-type-rep drop int-rep ;
M: value-type c-type-getter
unclip [ array-length ] dip [ <c-direct-array> ] 2curry ;
M: array c-type-unboxer-quot drop [ >c-ptr ] ;
-
M: biassoc assoc-size from>> assoc-size ;
-M: biassoc at* from>> at* ;
+M: biassoc at* from>> at* ; inline
-M: biassoc value-at* to>> at* ;
+M: biassoc value-at* to>> at* ; inline
: once-at ( value key assoc -- )
2dup key? [ 3drop ] [ set-at ] if ;
! Copyright (C) 2008, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays combinators hints kernel locals math
-math.order sequences ;
+math.order sequences sequences.private ;
IN: binary-search
<PRIVATE
:: (search) ( seq from to quot: ( elt -- <=> ) -- i elt )
from to + 2/ :> midpoint@
- midpoint@ seq nth :> midpoint
+ midpoint@ seq nth-unsafe :> midpoint
to from - 1 <= [
midpoint@ midpoint
T{ bit-set f ?{ f f t f t f } } intersect
] unit-test
+[ f ] [ T{ bit-set f ?{ t f f f t f } } null? ] unit-test
+[ t ] [ T{ bit-set f ?{ f f f f f f } } null? ] unit-test
+
[ T{ bit-set f ?{ t f t f f f } } ] [
T{ bit-set f ?{ t t t f f f } }
T{ bit-set f ?{ f t f f t t } } diff
"alien.remote-control" require
] unless
-"prettyprint" "alien.prettyprint" require-when
-"debugger" "alien.debugger" require-when
+{ "boostrap.compiler" "prettyprint" } "alien.prettyprint" require-when
+{ "boostrap.compiler" "debugger" } "alien.debugger" require-when
"cpu." cpu name>> append require
[ optimized? not ] filter compile ;
"debug-compiler" get [
-
+
nl
"Compiling..." write flush
curry compose uncurry
- array-nth set-array-nth length>>
+ array-nth set-array-nth
wrap probe
" done" print flush
+ "io.streams.byte-array.fast" require
+
] unless
USING: vocabs.loader vocabs kernel ;\r
IN: bootstrap.handbook\r
\r
-"bootstrap.help" "help.handbook" require-when\r
+{ "boostrap.handbook" "bootstrap.help" } "help.handbook" require-when\r
compiler.utilities namespaces ;
IN: bootstrap.threads
-"debugger" "debugger.threads" require-when
+{ "bootstrap.threads" "debugger" } "debugger.threads" require-when
[ yield ] yield-hook set-global
[ "bootstrap." prepend vocab ] all? [
"ui.tools" require
- "ui.backend.cocoa" "ui.backend.cocoa.tools" require-when
+ { "ui.backend.cocoa" } "ui.backend.cocoa.tools" require-when
"ui.tools.walker" require
] when
USING: vocabs vocabs.loader ;
-"prettyprint" "classes.struct.prettyprint" require-when
+{ "classes.struct" "prettyprint" } "classes.struct.prettyprint" require-when
M: ##compare analyze-aliases*
call-next-method
dup useless-compare? [
- dst>> \ f type-number \ ##load-immediate new-insn
+ dst>> f \ ##load-constant new-insn
analyze-aliases*
] when ;
and ;
: emit-trivial-if ( -- )
- ds-pop \ f type-number cc/= ^^compare-imm ds-push ;
+ ds-pop f cc/= ^^compare-imm ds-push ;
: trivial-not-if? ( #if -- ? )
children>> first2
and ;
: emit-trivial-not-if ( -- )
- ds-pop \ f type-number cc= ^^compare-imm ds-push ;
+ ds-pop f cc= ^^compare-imm ds-push ;
: emit-actual-if ( #if -- )
! Inputs to the final instruction need to be copied because of
! loc>vreg sync
- ds-pop any-rep ^^copy \ f type-number cc/= ##compare-imm-branch emit-if ;
+ ds-pop any-rep ^^copy f cc/= ##compare-imm-branch emit-if ;
M: #if emit-node
{
-! Copyright (C) 2008, 2009 Slava Pestov.
+! Copyright (C) 2008, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays byte-arrays kernel layouts math
-namespaces sequences combinators splitting parser effects
-words cpu.architecture compiler.cfg.registers
+USING: accessors arrays byte-arrays combinators.short-circuit
+kernel layouts math namespaces sequences combinators splitting
+parser effects words cpu.architecture compiler.cfg.registers
compiler.cfg.instructions compiler.cfg.instructions.syntax ;
IN: compiler.cfg.hats
>>
+: immutable? ( obj -- ? )
+ { [ float? ] [ word? ] [ not ] } 1|| ; inline
+
: ^^load-literal ( obj -- dst )
[ next-vreg dup ] dip {
- { [ dup not ] [ drop \ f type-number ##load-immediate ] }
{ [ dup fixnum? ] [ tag-fixnum ##load-immediate ] }
- { [ dup float? ] [ ##load-constant ] }
+ { [ dup immutable? ] [ ##load-constant ] }
[ ##load-reference ]
} cond ;
def: dst/int-rep
constant: obj ;
+INSN: ##load-double
+def: dst/double-rep
+constant: val ;
+
INSN: ##peek
def: dst/int-rep
literal: loc ;
0 cc= ^^compare-imm
ds-push ;
-: tag-literal ( n -- tagged )
- literal>> [ tag-fixnum ] [ \ f type-number ] if* ;
-
: emit-fixnum-op ( insn -- )
[ 2inputs ] dip call ds-push ; inline
{ [ dup 0 [-inf,a] interval-subset? ] [ drop emit-fixnum-right-shift ] }
[ drop emit-fixnum-shift-general ]
} cond ;
-
+
: emit-fixnum-bitnot ( -- )
ds-pop ^^not tag-mask get ^^xor-imm ds-push ;
tri
] with-compilation-unit
-: each-def-rep ( ... insn vreg-quot: ( ... vreg rep -- ... ) -- ... )
+: each-def-rep ( insn vreg-quot: ( vreg rep -- ) -- )
[ [ defs-vreg ] [ defs-vreg-rep ] bi ] dip with when* ; inline
-: each-use-rep ( ... insn vreg-quot: ( ... vreg rep -- ... ) -- ... )
+: each-use-rep ( insn vreg-quot: ( vreg rep -- ) -- )
[ [ uses-vregs ] [ uses-vreg-reps ] bi ] dip 2each ; inline
-: each-temp-rep ( ... insn vreg-quot: ( ... vreg rep -- ... ) -- ... )
+: each-temp-rep ( insn vreg-quot: ( vreg rep -- ) -- )
[ [ temp-vregs ] [ temp-vreg-reps ] bi ] dip 2each ; inline
+: each-rep ( insn vreg-quot: ( vreg rep -- ) -- )
+ [ each-def-rep ] [ each-use-rep ] [ each-temp-rep ] 2tri ; inline
+
: with-vreg-reps ( ..a cfg vreg-quot: ( ..a vreg rep -- ..b ) -- ..b )
'[
[ basic-block set ] [
[
- _
- [ each-def-rep ]
- [ each-use-rep ]
- [ each-temp-rep ] 2tri
+ _ each-rep
] each-non-phi
] bi
] each-basic-block ; inline
-USING: tools.test cpu.architecture
-compiler.cfg.registers compiler.cfg.instructions
-compiler.cfg.representations.preferred ;
+USING: accessors compiler.cfg compiler.cfg.debugger
+compiler.cfg.instructions compiler.cfg.registers
+compiler.cfg.representations.preferred cpu.architecture kernel
+namespaces tools.test sequences arrays system ;
IN: compiler.cfg.representations
[ { double-rep double-rep } ] [
{ dst 5 }
{ src 3 }
} defs-vreg-rep
-] unit-test
\ No newline at end of file
+] unit-test
+
+: test-representations ( -- )
+ cfg new 0 get >>entry dup cfg set select-representations drop ;
+
+! Make sure cost calculation isn't completely wrong
+V{
+ T{ ##prologue }
+ T{ ##branch }
+} 0 test-bb
+
+V{
+ T{ ##peek f 1 D 0 }
+ T{ ##peek f 2 D 1 }
+ T{ ##add-float f 3 1 2 }
+ T{ ##replace f 3 D 0 }
+ T{ ##replace f 3 D 1 }
+ T{ ##replace f 3 D 2 }
+ T{ ##branch }
+} 1 test-bb
+
+V{
+ T{ ##epilogue }
+ T{ ##return }
+} 2 test-bb
+
+0 1 edge
+1 2 edge
+
+[ ] [ test-representations ] unit-test
+
+[ 1 ] [ 1 get instructions>> [ ##allot? ] count ] unit-test
+
+cpu x86.32? [
+
+ ! Make sure load-constant is converted into load-double
+ V{
+ T{ ##prologue }
+ T{ ##branch }
+ } 0 test-bb
+
+ V{
+ T{ ##peek f 1 D 0 }
+ T{ ##load-constant f 2 0.5 }
+ T{ ##add-float f 3 1 2 }
+ T{ ##replace f 3 D 0 }
+ T{ ##branch }
+ } 1 test-bb
+
+ V{
+ T{ ##epilogue }
+ T{ ##return }
+ } 2 test-bb
+
+ 0 1 edge
+ 1 2 edge
+
+ [ ] [ test-representations ] unit-test
+
+ [ t ] [ 1 get instructions>> second ##load-double? ] unit-test
+
+ ! Make sure phi nodes are handled in a sane way
+ V{
+ T{ ##prologue }
+ T{ ##branch }
+ } 0 test-bb
+
+ V{
+ T{ ##peek f 1 D 0 }
+ T{ ##compare-imm-branch f 1 2 }
+ } 1 test-bb
+
+ V{
+ T{ ##load-constant f 2 1.5 }
+ T{ ##branch }
+ } 2 test-bb
+
+ V{
+ T{ ##load-constant f 3 2.5 }
+ T{ ##branch }
+ } 3 test-bb
+
+ V{
+ T{ ##phi f 4 }
+ T{ ##peek f 5 D 0 }
+ T{ ##add-float f 6 4 5 }
+ T{ ##replace f 6 D 0 }
+ } 4 test-bb
+
+ V{
+ T{ ##epilogue }
+ T{ ##return }
+ } 5 test-bb
+
+ test-diamond
+ 4 5 edge
+
+ 2 get 2 2array
+ 3 get 3 2array 2array 4 get instructions>> first (>>inputs)
+
+ [ ] [ test-representations ] unit-test
+
+ [ t ] [ 2 get instructions>> first ##load-double? ] unit-test
+
+ [ t ] [ 3 get instructions>> first ##load-double? ] unit-test
+
+ [ t ] [ 4 get instructions>> first ##phi? ] unit-test
+] when
\ No newline at end of file
-! Copyright (C) 2009 Slava Pestov
+! Copyright (C) 2009, 2010 Slava Pestov
! See http://factorcode.org/license.txt for BSD license.
USING: kernel fry accessors sequences assocs sets namespaces
arrays combinators combinators.short-circuit math make locals
: possible ( vreg -- reps ) possibilities get at ;
: compute-possibilities ( cfg -- )
- H{ } clone [ '[ swap _ conjoin-at ] with-vreg-reps ] keep
- [ keys ] assoc-map possibilities set ;
+ H{ } clone [ '[ swap _ adjoin-at ] with-vreg-reps ] keep
+ [ members ] assoc-map possibilities set ;
! Compute vregs which must remain tagged for their lifetime.
SYMBOL: always-boxed
SYMBOL: costs
: init-costs ( -- )
- possibilities get [ [ 0 ] H{ } map>assoc ] assoc-map costs set ;
+ possibilities get [ drop H{ } clone ] assoc-map costs set ;
+
+: record-possibility ( rep vreg -- )
+ costs get at [ 0 or ] change-at ;
: increase-cost ( rep vreg -- )
! Increase cost of keeping vreg in rep, making a choice of rep less
! likely.
- [ basic-block get loop-nesting-at ] 2dip costs get at at+ ;
+ costs get at [ 0 or basic-block get loop-nesting-at 1 + + ] change-at ;
: maybe-increase-cost ( possible vreg preferred -- )
- pick eq? [ 2drop ] [ increase-cost ] if ;
+ pick eq? [ record-possibility ] [ increase-cost ] if ;
: representation-cost ( vreg preferred -- )
! 'preferred' is a representation that the instruction can accept with no cost.
[ '[ _ _ maybe-increase-cost ] ]
2bi each ;
+GENERIC: compute-insn-costs ( insn -- )
+
+M: ##load-constant compute-insn-costs
+ ! There's no cost to unboxing the result of a ##load-constant
+ drop ;
+
+M: insn compute-insn-costs [ representation-cost ] each-rep ;
+
: compute-costs ( cfg -- costs )
- init-costs [ representation-cost ] with-vreg-reps costs get ;
+ init-costs
+ [
+ [ basic-block set ]
+ [
+ [
+ compute-insn-costs
+ ] each-non-phi
+ ] bi
+ ] each-basic-block
+ costs get ;
! For every vreg, compute preferred representation, that minimizes costs.
: minimize-costs ( costs -- representations )
+ [ nip assoc-empty? not ] assoc-filter
[ >alist alist-min first ] assoc-map ;
: compute-representations ( cfg -- )
bi assoc-union
representations set ;
+! PHI nodes require special treatment
+! If the output of a phi instruction is only used as the input to another
+! phi instruction, then we want to use the same representation for both
+! if possible.
+SYMBOL: phis
+
+: collect-phis ( cfg -- )
+ H{ } clone phis set
+ [
+ phis get
+ '[ [ inputs>> values ] [ dst>> ] bi _ set-at ] each-phi
+ ] each-basic-block ;
+
+SYMBOL: work-list
+
+: add-to-work-list ( vregs -- )
+ work-list get push-all-front ;
+
+: rep-assigned ( vregs -- vregs' )
+ representations get '[ _ key? ] filter ;
+
+: rep-not-assigned ( vregs -- vregs' )
+ representations get '[ _ key? not ] filter ;
+
+: add-ready-phis ( -- )
+ phis get keys rep-assigned add-to-work-list ;
+
+: process-phi ( dst -- )
+ ! If dst = phi(src1,src2,...) and dst's representation has been
+ ! determined, assign that representation to each one of src1,...
+ ! that does not have a representation yet, and process those, too.
+ dup phis get at* [
+ [ rep-of ] [ rep-not-assigned ] bi*
+ [ [ set-rep-of ] with each ] [ add-to-work-list ] bi
+ ] [ 2drop ] if ;
+
+: remaining-phis ( -- )
+ phis get keys rep-not-assigned { } assert-sequence= ;
+
+: process-phis ( -- )
+ <hashed-dlist> work-list set
+ add-ready-phis
+ work-list get [ process-phi ] slurp-deque
+ remaining-phis ;
+
+: compute-phi-representations ( cfg -- )
+ collect-phis process-phis ;
+
! Insert conversions. This introduces new temporaries, so we need
! to rename opearands too.
: record-renaming ( from to -- )
2array renaming-set get push needs-renaming? on ;
-:: (compute-renaming-set) ( ..a vreg required quot: ( ..a vreg preferred required -- ..b ) -- ..b )
+:: (compute-renaming-set) ( vreg required quot: ( vreg preferred required -- new-vreg ) -- )
vreg rep-of :> preferred
preferred required eq?
[ vreg no-renaming ]
GENERIC: conversions-for-insn ( insn -- )
-SYMBOL: phi-mappings
+M: ##phi conversions-for-insn , ;
-! compiler.cfg.cssa inserts conversions which convert phi inputs into
-! the representation of the output. However, we still have to do some
-! processing here, because if the only node that uses the output of
-! the phi instruction is another phi instruction then this phi node's
-! output won't have a representation assigned.
-M: ##phi conversions-for-insn
- [ , ] [ [ inputs>> values ] [ dst>> ] bi phi-mappings get set-at ] bi ;
+! When a float is unboxed, we replace the ##load-constant with a ##load-double
+! if the architecture supports it
+: convert-to-load-double? ( insn -- ? )
+ {
+ [ drop load-double? ]
+ [ dst>> rep-of double-rep? ]
+ [ obj>> float? ]
+ } 1&& ;
! When a literal zeroes/ones vector is unboxed, we replace the ##load-reference
! with a ##zero-vector or ##fill-vector instruction since this is more efficient.
[ dst>> rep-of vector-rep? ]
[ obj>> B{ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 } = ]
} 1&& ;
+
: convert-to-fill-vector? ( insn -- ? )
{
[ dst>> rep-of vector-rep? ]
[ obj>> B{ 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 } = ]
} 1&& ;
+: (convert-to-load-double) ( insn -- dst val )
+ [ dst>> ] [ obj>> ] bi ; inline
+
: (convert-to-zero/fill-vector) ( insn -- dst rep )
dst>> dup rep-of ; inline
: conversions-for-load-insn ( insn -- ?insn )
{
+ {
+ [ dup convert-to-load-double? ]
+ [ (convert-to-load-double) ##load-double f ]
+ }
{
[ dup convert-to-zero-vector? ]
[ (convert-to-zero/fill-vector) ##zero-vector f ]
] change-instructions drop
] if ;
-! If the output of a phi instruction is only used as the input to another
-! phi instruction, then we want to use the same representation for both
-! if possible.
-SYMBOL: work-list
-
-: add-to-work-list ( vregs -- )
- work-list get push-all-front ;
-
-: rep-assigned ( vregs -- vregs' )
- representations get '[ _ key? ] filter ;
-
-: rep-not-assigned ( vregs -- vregs' )
- representations get '[ _ key? not ] filter ;
-
-: add-ready-phis ( -- )
- phi-mappings get keys rep-assigned add-to-work-list ;
-
-: process-phi-mapping ( dst -- )
- ! If dst = phi(src1,src2,...) and dst's representation has been
- ! determined, assign that representation to each one of src1,...
- ! that does not have a representation yet, and process those, too.
- dup phi-mappings get at* [
- [ rep-of ] [ rep-not-assigned ] bi*
- [ [ set-rep-of ] with each ] [ add-to-work-list ] bi
- ] [ 2drop ] if ;
-
-: remaining-phi-mappings ( -- )
- phi-mappings get keys rep-not-assigned
- [ [ int-rep ] dip set-rep-of ] each ;
-
-: process-phi-mappings ( -- )
- <hashed-dlist> work-list set
- add-ready-phis
- work-list get [ process-phi-mapping ] slurp-deque
- remaining-phi-mappings ;
-
: insert-conversions ( cfg -- )
- H{ } clone phi-mappings set
- [ conversions-for-block ] each-basic-block
- process-phi-mappings ;
+ [ conversions-for-block ] each-basic-block ;
PRIVATE>
{
[ compute-possibilities ]
[ compute-representations ]
+ [ compute-phi-representations ]
[ insert-conversions ]
[ ]
} cleave
[ value>> immediate-bitwise? ]
} 1&& ;
+: vreg-immediate-comparand? ( vreg -- ? )
+ vreg>expr {
+ [ constant-expr? ]
+ [ value>> immediate-comparand? ]
+ } 1&& ;
+
! Outputs f to mean no change
GENERIC: rewrite ( insn -- insn/f )
: ##branch-t? ( insn -- ? )
dup ##compare-imm-branch? [
- {
- [ cc>> cc/= eq? ]
- [ src2>> \ f type-number eq? ]
- } 1&&
+ { [ cc>> cc/= eq? ] [ src2>> not ] } 1&&
] [ drop f ] if ; inline
: general-compare-expr? ( insn -- ? )
: rewrite-redundant-comparison? ( insn -- ? )
{
[ src1>> vreg>expr general-compare-expr? ]
- [ src2>> \ f type-number = ]
- [ cc>> { cc= cc/= } member-eq? ]
+ [ src2>> not ]
+ [ cc>> { cc= cc/= } member? ]
} 1&& ; inline
: rewrite-redundant-comparison ( insn -- insn' )
} cond
swap cc= eq? [ [ negate-cc ] change-cc ] when ;
-ERROR: bad-comparison ;
-
: (fold-compare-imm) ( insn -- ? )
- [ [ src1>> vreg>constant ] [ src2>> ] bi ] [ cc>> ] bi
- pick integer?
- [ [ <=> ] dip evaluate-cc ]
- [
- 2nip {
- { cc= [ f ] }
- { cc/= [ t ] }
- [ bad-comparison ]
+ [ src1>> vreg>constant ] [ src2>> ] [ cc>> ] tri
+ 2over [ integer? ] both? [ [ <=> ] dip evaluate-cc ] [
+ {
+ { cc= [ eq? ] }
+ { cc/= [ eq? not ] }
} case
] if ;
M: ##compare-branch rewrite
{
- { [ dup src1>> vreg-immediate-arithmetic? ] [ t >compare-imm-branch ] }
- { [ dup src2>> vreg-immediate-arithmetic? ] [ f >compare-imm-branch ] }
+ { [ dup src1>> vreg-immediate-comparand? ] [ t >compare-imm-branch ] }
+ { [ dup src2>> vreg-immediate-comparand? ] [ f >compare-imm-branch ] }
{ [ dup self-compare? ] [ rewrite-self-compare-branch ] }
[ drop f ]
} cond ;
next-vreg \ ##compare-imm new-insn ; inline
: >boolean-insn ( insn ? -- insn' )
- [ dst>> ] dip
- {
- { t [ t \ ##load-constant new-insn ] }
- { f [ \ f type-number \ ##load-immediate new-insn ] }
- } case ;
+ [ dst>> ] dip \ ##load-constant new-insn ;
: rewrite-self-compare ( insn -- insn' )
dup (rewrite-self-compare) >boolean-insn ;
M: ##compare rewrite
{
- { [ dup src1>> vreg-immediate-arithmetic? ] [ t >compare-imm ] }
- { [ dup src2>> vreg-immediate-arithmetic? ] [ f >compare-imm ] }
+ { [ dup src1>> vreg-immediate-comparand? ] [ t >compare-imm ] }
+ { [ dup src2>> vreg-immediate-comparand? ] [ f >compare-imm ] }
{ [ dup self-compare? ] [ rewrite-self-compare ] }
[ drop f ]
} cond ;
: constant-fold ( insn -- insn' )
[ dst>> ]
- [ [ src1>> vreg>constant ] [ src2>> ] [ ] tri constant-fold* ] bi
+ [
+ [ src1>> vreg>constant \ f type-number or ]
+ [ src2>> ]
+ [ ]
+ tri constant-fold*
+ ] bi
\ ##load-immediate new-insn ; inline
: unary-constant-fold? ( insn -- ? )
[ drop f ]
} cond ;
-: insn>imm-insn ( insn op swap? -- )
+: insn>imm-insn ( insn op swap? -- new-insn )
swap [
[ [ dst>> ] [ src1>> ] [ src2>> ] tri ] dip
[ swap ] when vreg>constant
arithmetic-op?
[ vreg-immediate-arithmetic? ] [ vreg-immediate-bitwise? ] if ;
-: rewrite-arithmetic ( insn op -- ? )
+: rewrite-arithmetic ( insn op -- insn/f )
{
{ [ over src2>> over vreg-immediate? ] [ f insn>imm-insn ] }
[ 2drop f ]
} cond ; inline
-: rewrite-arithmetic-commutative ( insn op -- ? )
+: rewrite-arithmetic-commutative ( insn op -- insn/f )
{
{ [ over src2>> over vreg-immediate? ] [ f insn>imm-insn ] }
{ [ over src1>> over vreg-immediate? ] [ t insn>imm-insn ] }
accessors sequences compiler.cfg.predecessors locals compiler.cfg.dce
compiler.cfg.ssa.destruction compiler.cfg.loop-detection
compiler.cfg.representations compiler.cfg assocs vectors arrays
-layouts literals namespaces alien compiler.cfg.value-numbering.simd ;
+layouts literals namespaces alien compiler.cfg.value-numbering.simd
+system ;
IN: compiler.cfg.value-numbering.tests
: trim-temps ( insns -- insns )
T{ ##load-reference f 1 + }
T{ ##peek f 2 D 0 }
T{ ##compare f 4 2 1 cc> }
- T{ ##compare-imm f 6 4 $[ \ f type-number ] cc/= }
+ T{ ##compare-imm f 6 4 f cc/= }
T{ ##replace f 6 D 0 }
} value-numbering-step trim-temps
] unit-test
T{ ##load-reference f 1 + }
T{ ##peek f 2 D 0 }
T{ ##compare f 4 2 1 cc<= }
- T{ ##compare-imm f 6 4 $[ \ f type-number ] cc= }
+ T{ ##compare-imm f 6 4 f cc= }
T{ ##replace f 6 D 0 }
} value-numbering-step trim-temps
] unit-test
T{ ##peek f 8 D 0 }
T{ ##peek f 9 D -1 }
T{ ##compare-float-unordered f 12 8 9 cc< }
- T{ ##compare-imm f 14 12 $[ \ f type-number ] cc= }
+ T{ ##compare-imm f 14 12 f cc= }
T{ ##replace f 14 D 0 }
} value-numbering-step trim-temps
] unit-test
T{ ##peek f 29 D -1 }
T{ ##peek f 30 D -2 }
T{ ##compare f 33 29 30 cc<= }
- T{ ##compare-imm-branch f 33 $[ \ f type-number ] cc/= }
+ T{ ##compare-imm-branch f 33 f cc/= }
} value-numbering-step trim-temps
] unit-test
{
T{ ##peek f 1 D -1 }
T{ ##test-vector f 2 1 f float-4-rep vcc-any }
- T{ ##compare-imm-branch f 2 $[ \ f type-number ] cc/= }
+ T{ ##compare-imm-branch f 2 f cc/= }
} value-numbering-step trim-temps
] unit-test
} value-numbering-step trim-temps
] unit-test
+cpu x86.32? [
+ [
+ {
+ T{ ##peek f 0 D 0 }
+ T{ ##load-constant f 1 + }
+ T{ ##compare-imm f 2 0 + cc= }
+ }
+ ] [
+ {
+ T{ ##peek f 0 D 0 }
+ T{ ##load-constant f 1 + }
+ T{ ##compare f 2 0 1 cc= }
+ } value-numbering-step trim-temps
+ ] unit-test
+
+ [
+ {
+ T{ ##peek f 0 D 0 }
+ T{ ##load-constant f 1 + }
+ T{ ##compare-imm-branch f 0 + cc= }
+ }
+ ] [
+ {
+ T{ ##peek f 0 D 0 }
+ T{ ##load-constant f 1 + }
+ T{ ##compare-branch f 0 1 cc= }
+ } value-numbering-step trim-temps
+ ] unit-test
+] when
+
[
{
T{ ##peek f 0 D 0 }
} value-numbering-step trim-temps
] unit-test
+[
+ {
+ T{ ##peek f 0 D 0 }
+ T{ ##load-constant f 1 3.5 }
+ T{ ##compare-branch f 0 1 cc= }
+ }
+] [
+ {
+ T{ ##peek f 0 D 0 }
+ T{ ##load-constant f 1 3.5 }
+ T{ ##compare-branch f 0 1 cc= }
+ } value-numbering-step trim-temps
+] unit-test
+
[
{
T{ ##peek f 0 D 0 }
[
{
T{ ##peek f 0 D 0 }
- T{ ##load-constant f 1 3.5 }
- T{ ##compare-branch f 0 1 cc= }
+ T{ ##load-immediate f 1 100 }
+ T{ ##compare-imm-branch f 0 100 cc>= }
}
] [
{
T{ ##peek f 0 D 0 }
- T{ ##load-constant f 1 3.5 }
- T{ ##compare-branch f 0 1 cc= }
+ T{ ##load-immediate f 1 100 }
+ T{ ##compare-branch f 1 0 cc<= }
+ } value-numbering-step trim-temps
+] unit-test
+
+! Branch folding
+[
+ {
+ T{ ##load-immediate f 1 100 }
+ T{ ##load-immediate f 2 200 }
+ T{ ##load-constant f 3 t }
+ }
+] [
+ {
+ T{ ##load-immediate f 1 100 }
+ T{ ##load-immediate f 2 200 }
+ T{ ##compare f 3 1 2 cc<= }
} value-numbering-step trim-temps
] unit-test
[
{
- T{ ##peek f 0 D 0 }
T{ ##load-immediate f 1 100 }
- T{ ##compare-imm-branch f 0 100 cc>= }
+ T{ ##load-immediate f 2 200 }
+ T{ ##load-constant f 3 f }
}
] [
{
- T{ ##peek f 0 D 0 }
T{ ##load-immediate f 1 100 }
- T{ ##compare-branch f 1 0 cc<= }
+ T{ ##load-immediate f 2 200 }
+ T{ ##compare f 3 1 2 cc= }
+ } value-numbering-step trim-temps
+] unit-test
+
+[
+ {
+ T{ ##load-immediate f 1 100 }
+ T{ ##load-constant f 2 f }
+ }
+] [
+ {
+ T{ ##load-immediate f 1 100 }
+ T{ ##compare-imm f 2 1 f cc= }
+ } value-numbering-step trim-temps
+] unit-test
+
+[
+ {
+ T{ ##load-constant f 1 f }
+ T{ ##load-constant f 2 t }
+ }
+] [
+ {
+ T{ ##load-constant f 1 f }
+ T{ ##compare-imm f 2 1 f cc= }
} value-numbering-step trim-temps
] unit-test
} value-numbering-step
] unit-test
+! Stupid constant folding corner case
+[
+ {
+ T{ ##load-constant f 1 f }
+ T{ ##load-immediate f 2 $[ \ f type-number ] }
+ }
+] [
+ {
+ T{ ##load-constant f 1 f }
+ T{ ##and-imm f 2 1 15 }
+ } value-numbering-step
+] unit-test
+
! Displaced alien optimizations
3 vreg-counter set-global
{
T{ ##load-immediate f 1 10 }
T{ ##load-immediate f 2 20 }
- T{ ##load-immediate f 3 $[ \ f type-number ] }
+ T{ ##load-constant f 3 f }
}
] [
{
{
T{ ##load-immediate f 1 10 }
T{ ##load-immediate f 2 20 }
- T{ ##load-immediate f 3 $[ \ f type-number ] }
+ T{ ##load-constant f 3 f }
}
] [
{
[
{
T{ ##peek f 0 D 0 }
- T{ ##load-immediate f 1 $[ \ f type-number ] }
+ T{ ##load-constant f 1 f }
}
] [
{
[
{
T{ ##peek f 0 D 0 }
- T{ ##load-immediate f 1 $[ \ f type-number ] }
+ T{ ##load-constant f 1 f }
}
] [
{
[
{
T{ ##peek f 0 D 0 }
- T{ ##load-immediate f 1 $[ \ f type-number ] }
+ T{ ##load-constant f 1 f }
}
] [
{
{
T{ ##peek f 0 D 0 }
T{ ##compare f 1 0 0 cc<= }
- T{ ##compare-imm-branch f 1 $[ \ f type-number ] cc/= }
+ T{ ##compare-imm-branch f 1 f cc/= }
} test-branch-folding
] unit-test
T{ ##copy { dst 21 } { src 20 } { rep any-rep } }
T{ ##compare-imm-branch
{ src1 21 }
- { src2 $[ \ f type-number ] }
+ { src2 f }
{ cc cc/= }
}
} 1 test-bb
CODEGEN: ##load-immediate %load-immediate
CODEGEN: ##load-reference %load-reference
CODEGEN: ##load-constant %load-reference
+CODEGEN: ##load-double %load-double
CODEGEN: ##peek %peek
CODEGEN: ##replace %replace
CODEGEN: ##inc-d %inc-d
: rel-word-pic-tail ( word class -- )
[ add-literal ] dip rt-entry-point-pic-tail rel-fixup ;
-: rel-immediate ( literal class -- )
+: rel-literal ( literal class -- )
[ add-literal ] dip rt-literal rel-fixup ;
+: rel-float ( literal class -- )
+ [ add-literal ] dip rt-float rel-fixup ;
+
: rel-this ( class -- )
rt-this rel-fixup ;
rt-vm
rt-cards-offset
rt-decks-offset
- rt-exception-handler ;
+ rt-exception-handler
+ rt-float ;
: rc-absolute? ( n -- ? )
${
compile-test-cfg
execute( -- result ) ;
-! loading immediates
+! loading constants
[ f ] [
V{
- T{ ##load-immediate f 0 $[ \ f type-number ] }
+ T{ ##load-constant f 0 f }
} compile-test-bb
] unit-test
integer generalize-counter-interval
] unit-test
-[ T{ interval f { 0 t } { $[ most-positive-fixnum ] t } } ] [
+[ T{ interval f { 0 t } { $[ max-array-capacity ] t } } ] [
T{ interval f { 1 t } { 1 t } }
T{ interval f { 0 t } { 0 t } }
fixnum generalize-counter-interval
-! Copyright (C) 2008, 2009 Slava Pestov.
+! Copyright (C) 2008, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: kernel sequences accessors arrays fry math math.intervals
-layouts combinators namespaces locals
+USING: kernel classes.algebra sequences accessors arrays fry
+math math.intervals layouts combinators namespaces locals
stack-checker.inlining
compiler.tree
compiler.tree.combinators
compiler.tree.propagation.simple
compiler.tree.propagation.branches
compiler.tree.propagation.constraints ;
+FROM: sequences.private => array-capacity ;
IN: compiler.tree.propagation.recursive
: check-fixed-point ( node infos1 infos2 -- )
[ label>> calls>> [ node>> node-input-infos ] map flip ]
[ latest-input-infos ] bi ;
+: counter-class ( interval class -- class' )
+ dup fixnum class<= [
+ swap array-capacity-interval interval-subset?
+ [ drop array-capacity ] when
+ ] [ nip ] if ;
+
:: generalize-counter-interval ( interval initial-interval class -- interval' )
+ interval class counter-class :> class
{
{ [ interval initial-interval interval-subset? ] [ initial-interval ] }
{ [ interval empty-interval eq? ] [ initial-interval ] }
! Copyright (C) 2008, 2010 Slava Pestov, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
-USING: alien.c-types kernel sequences words fry generic accessors
-classes.tuple classes classes.algebra definitions
-stack-checker.dependencies quotations classes.tuple.private math
-math.partial-dispatch math.private math.intervals sets.private
-math.floats.private math.integers.private layouts math.order
-vectors hashtables combinators effects generalizations assocs
-sets combinators.short-circuit sequences.private locals growable
+USING: alien.c-types kernel sequences words fry generic
+generic.single accessors classes.tuple classes classes.algebra
+definitions stack-checker.dependencies quotations
+classes.tuple.private math math.partial-dispatch math.private
+math.intervals sets.private math.floats.private
+math.integers.private layouts math.order vectors hashtables
+combinators effects generalizations assocs sets
+combinators.short-circuit sequences.private locals growable
stack-checker namespaces compiler.tree.propagation.info ;
FROM: math => float ;
FROM: sets => set ;
[ \ push def>> ] [ f ] if
] "custom-inlining" set-word-prop
+! Speeds up fasta benchmark
+\ >fixnum [
+ in-d>> first value-info class>> fixnum \ f class-or class<=
+ [ [ dup [ \ >fixnum no-method ] unless ] ] [ f ] if
+] "custom-inlining" set-word-prop
+
! We want to constant-fold calls to heap-size, and recompile those
! calls when a C type is redefined
\ heap-size [
! Mapping from register class to machine registers
HOOK: machine-registers cpu ( -- assoc )
-HOOK: %load-immediate cpu ( reg obj -- )
+HOOK: %load-immediate cpu ( reg val -- )
HOOK: %load-reference cpu ( reg obj -- )
+HOOK: %load-double cpu ( reg val -- )
HOOK: %peek cpu ( vreg loc -- )
HOOK: %replace cpu ( vreg loc -- )
M: stack-params param-reg 2drop ;
-! Is this integer small enough to be an immediate operand for
-! %add-imm, %sub-imm, and %mul-imm?
+! Does this architecture support %load-double?
+HOOK: load-double? cpu ( -- ? )
+
+M: object load-double? f ;
+
+! Can this value be an immediate operand for %add-imm, %sub-imm,
+! or %mul-imm?
HOOK: immediate-arithmetic? cpu ( n -- ? )
-! Is this integer small enough to be an immediate operand for
-! %and-imm, %or-imm, and %xor-imm?
+! Can this value be an immediate operand for %and-imm, %or-imm,
+! or %xor-imm?
HOOK: immediate-bitwise? cpu ( n -- ? )
-! What c-type describes the implicit struct return pointer for large structs?
+! Can this value be an immediate operand for %compare-imm or
+! %compare-imm-branch?
+HOOK: immediate-comparand? cpu ( n -- ? )
+
+M: object immediate-comparand? ( n -- ? )
+ {
+ { [ dup integer? ] [ immediate-arithmetic? ] }
+ { [ dup not ] [ drop t ] }
+ [ drop f ]
+ } cond ;
+
+! What c-type describes the implicit struct return pointer for
+! large structs?
HOOK: struct-return-pointer-type cpu ( -- c-type )
! Is this structure small enough to be returned in registers?
M: ppc %load-immediate ( reg n -- ) swap LOAD ;
M: ppc %load-reference ( reg obj -- )
- [ 0 swap LOAD32 ] [ rc-absolute-ppc-2/2 rel-immediate ] bi* ;
+ [ 0 swap LOAD32 ] [ rc-absolute-ppc-2/2 rel-literal ] bi* ;
M: ppc %alien-global ( register symbol dll -- )
[ 0 swap LOAD32 ] 2dip rc-absolute-ppc-2/2 rel-dlsym ;
} case ;
: (%compare) ( src1 src2 -- ) [ 0 ] dip CMP ; inline
-: (%compare-imm) ( src1 src2 -- ) [ 0 ] 2dip CMPI ; inline
+: (%compare-imm) ( src1 src2 -- ) [ 0 ] [ ] [ \ f type-number or ] tri* CMPI ; inline
: (%compare-float-unordered) ( src1 src2 -- ) [ 0 ] dip FCMPU ; inline
: (%compare-float-ordered) ( src1 src2 -- ) [ 0 ] dip FCMPO ; inline
! See http://factorcode.org/license.txt for BSD license.
USING: locals alien alien.c-types alien.libraries alien.syntax
arrays kernel fry math namespaces sequences system layouts io
-vocabs.loader accessors init classes.struct combinators command-line
-make compiler compiler.units compiler.constants compiler.alien
-compiler.codegen compiler.codegen.fixup
-compiler.cfg.instructions compiler.cfg.builder
-compiler.cfg.intrinsics compiler.cfg.stack-frame
-cpu.x86.assembler cpu.x86.assembler.operands cpu.x86
-cpu.architecture vm ;
+vocabs.loader accessors init classes.struct combinators
+command-line make words compiler compiler.units
+compiler.constants compiler.alien compiler.codegen
+compiler.codegen.fixup compiler.cfg.instructions
+compiler.cfg.builder compiler.cfg.intrinsics
+compiler.cfg.stack-frame cpu.x86.assembler
+cpu.x86.assembler.operands cpu.x86 cpu.architecture vm ;
FROM: layouts => cell ;
IN: cpu.x86.32
M: x86.32 frame-reg EBP ;
M: x86.32 temp-reg ECX ;
+M: x86.32 immediate-comparand? ( n -- ? )
+ [ call-next-method ] [ word? ] bi or ;
+
+M: x86.32 load-double? ( -- ? ) t ;
+
+M: x86.32 %load-double ( dst val -- )
+ [ 0 [] MOVSD ] dip rc-absolute rel-float ;
+
M: x86.32 %mov-vm-ptr ( reg -- )
0 MOV 0 rc-absolute-cell rel-vm ;
M: x86 %load-immediate dup 0 = [ drop dup XOR ] [ MOV ] if ;
-M: x86 %load-reference swap 0 MOV rc-absolute-cell rel-immediate ;
+M: x86 %load-reference swap 0 MOV rc-absolute-cell rel-literal ;
HOOK: ds-reg cpu ( -- reg )
HOOK: rs-reg cpu ( -- reg )
M: x86 %epilogue ( n -- ) cell - incr-stack-reg ;
-:: %boolean ( dst temp word -- )
+:: (%boolean) ( dst temp insn -- )
dst \ f type-number MOV
- temp 0 MOV \ t rc-absolute-cell rel-immediate
- dst temp word execute ; inline
-
-: (%compare) ( src1 src2 cc -- )
- 2over [ { cc= cc/= } member? ] [ register? ] [ 0 = ] tri* and and
- [ drop dup TEST ]
- [ CMP ] if ;
+ temp 0 MOV \ t rc-absolute-cell rel-literal
+ dst temp insn execute ; inline
+
+: %boolean ( dst cc temp -- )
+ swap order-cc {
+ { cc< [ \ CMOVL (%boolean) ] }
+ { cc<= [ \ CMOVLE (%boolean) ] }
+ { cc> [ \ CMOVG (%boolean) ] }
+ { cc>= [ \ CMOVGE (%boolean) ] }
+ { cc= [ \ CMOVE (%boolean) ] }
+ { cc/= [ \ CMOVNE (%boolean) ] }
+ } case ;
M:: x86 %compare ( dst src1 src2 cc temp -- )
- src1 src2 cc (%compare)
- cc order-cc {
- { cc< [ dst temp \ CMOVL %boolean ] }
- { cc<= [ dst temp \ CMOVLE %boolean ] }
- { cc> [ dst temp \ CMOVG %boolean ] }
- { cc>= [ dst temp \ CMOVGE %boolean ] }
- { cc= [ dst temp \ CMOVE %boolean ] }
- { cc/= [ dst temp \ CMOVNE %boolean ] }
- } case ;
+ src1 src2 CMP
+ dst cc temp %boolean ;
-M: x86 %compare-imm ( dst src1 src2 cc temp -- )
- %compare ;
+: use-test? ( src1 src2 cc -- ? )
+ [ register? ] [ 0 = ] [ { cc= cc/= } member? ] tri* and and ;
-M:: x86 %compare-branch ( label src1 src2 cc -- )
- src1 src2 cc (%compare)
- cc order-cc {
- { cc< [ label JL ] }
- { cc<= [ label JLE ] }
- { cc> [ label JG ] }
- { cc>= [ label JGE ] }
- { cc= [ label JE ] }
- { cc/= [ label JNE ] }
+: (%compare-tagged) ( src1 src2 -- )
+ [ HEX: ffffffff CMP ] dip rc-absolute rel-literal ;
+
+: (%compare-imm) ( src1 src2 cc -- )
+ {
+ { [ 3dup use-test? ] [ 2drop dup TEST ] }
+ { [ over integer? ] [ drop CMP ] }
+ { [ over word? ] [ drop (%compare-tagged) ] }
+ { [ over not ] [ 2drop \ f type-number CMP ] }
+ } cond ;
+
+M:: x86 %compare-imm ( dst src1 src2 cc temp -- )
+ src1 src2 cc (%compare-imm)
+ dst cc temp %boolean ;
+
+: %branch ( label cc -- )
+ order-cc {
+ { cc< [ JL ] }
+ { cc<= [ JLE ] }
+ { cc> [ JG ] }
+ { cc>= [ JGE ] }
+ { cc= [ JE ] }
+ { cc/= [ JNE ] }
} case ;
-M: x86 %compare-imm-branch ( label src1 src2 cc -- )
- %compare-branch ;
+M:: x86 %compare-branch ( label src1 src2 cc -- )
+ src1 src2 CMP
+ label cc %branch ;
+
+M:: x86 %compare-imm-branch ( label src1 src2 cc -- )
+ src1 src2 cc (%compare-imm)
+ label cc %branch ;
M: x86 %add-float double-rep two-operand ADDSD ;
M: x86 %sub-float double-rep two-operand SUBSD ;
:: (%compare-float) ( dst src1 src2 cc temp compare -- )
cc {
- { cc< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVA %boolean ] }
- { cc<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
- { cc> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVA %boolean ] }
- { cc>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
- { cc= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float= %boolean ] }
- { cc<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNE %boolean ] }
- { cc<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNP %boolean ] }
- { cc/< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
- { cc/<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVB %boolean ] }
- { cc/> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
- { cc/>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVB %boolean ] }
- { cc/= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float/= %boolean ] }
- { cc/<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVE %boolean ] }
- { cc/<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVP %boolean ] }
+ { cc< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVA (%boolean) ] }
+ { cc<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVAE (%boolean) ] }
+ { cc> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVA (%boolean) ] }
+ { cc>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVAE (%boolean) ] }
+ { cc= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float= (%boolean) ] }
+ { cc<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNE (%boolean) ] }
+ { cc<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNP (%boolean) ] }
+ { cc/< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVBE (%boolean) ] }
+ { cc/<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVB (%boolean) ] }
+ { cc/> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVBE (%boolean) ] }
+ { cc/>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVB (%boolean) ] }
+ { cc/= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float/= (%boolean) ] }
+ { cc/<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVE (%boolean) ] }
+ { cc/<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVP (%boolean) ] }
} case ; inline
M: x86 %compare-float-ordered ( dst src1 src2 cc temp -- )
:: %test-vector-mask ( dst temp mask vcc -- )
vcc {
- { vcc-any [ dst dst TEST dst temp \ CMOVNE %boolean ] }
- { vcc-none [ dst dst TEST dst temp \ CMOVE %boolean ] }
- { vcc-all [ dst mask CMP dst temp \ CMOVE %boolean ] }
- { vcc-notall [ dst mask CMP dst temp \ CMOVNE %boolean ] }
+ { vcc-any [ dst dst TEST dst temp \ CMOVNE (%boolean) ] }
+ { vcc-none [ dst dst TEST dst temp \ CMOVE (%boolean) ] }
+ { vcc-all [ dst mask CMP dst temp \ CMOVE (%boolean) ] }
+ { vcc-notall [ dst mask CMP dst temp \ CMOVNE (%boolean) ] }
} case ;
: %move-vector-mask ( dst src rep -- mask )
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs byte-arrays byte-vectors classes
combinators definitions effects fry generic generic.single
-generic.standard hashtables io.binary io.streams.string kernel
-kernel.private math math.integers.private math.parser
-namespaces parser sbufs sequences splitting splitting.private strings
-vectors words ;
+generic.standard hashtables io.binary io.encodings
+io.streams.string kernel kernel.private math
+math.integers.private math.parser namespaces parser sbufs
+sequences splitting splitting.private strings vectors words ;
IN: hints
GENERIC: specializer-predicate ( spec -- quot )
M\ hashtable set-at { { object fixnum object } { object word object } } "specializer" set-word-prop
\ bignum/f { { bignum bignum } { bignum fixnum } { fixnum bignum } { fixnum fixnum } } "specializer" set-word-prop
+
+\ encode-string { string object object } "specializer" set-word-prop
: http-delete ( url -- response data )
<delete-request> http-request ;
-USING: vocabs vocabs.loader ;
+USE: vocabs.loader
-"debugger" "http.client.debugger" require-when
+{ "http.client" "debugger" } "http.client.debugger" require-when
! Copyright (C) 2008 Daniel Ehrenberg, Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
-USING: math.parser arrays io.encodings sequences kernel assocs
-hashtables io.encodings.ascii generic parser classes.tuple words
-words.symbol io io.files splitting namespaces math
-compiler.units accessors classes.singleton classes.mixin
-io.encodings.iana fry simple-flat-file lexer ;
+USING: arrays assocs biassocs kernel io.encodings math.parser
+sequences hashtables io.encodings.ascii generic parser
+classes.tuple words words.symbol io io.files splitting
+namespaces math compiler.units accessors classes.singleton
+classes.mixin io.encodings.iana fry simple-flat-file lexer ;
IN: io.encodings.8-bit
<PRIVATE
SYMBOL: 8-bit-encodings
8-bit-encodings [ H{ } clone ] initialize
-TUPLE: 8-bit biassoc ;
+TUPLE: 8-bit { biassoc biassoc read-only } ;
-: encode-8-bit ( char stream assoc -- )
- swapd value-at
- [ swap stream-write1 ] [ encode-error ] if* ; inline
+: 8-bit-encode ( char 8-bit -- byte )
+ biassoc>> value-at [ encode-error ] unless* ; inline
-M: 8-bit encode-char biassoc>> encode-8-bit ;
+M: 8-bit encode-char
+ swap [ 8-bit-encode ] dip stream-write1 ;
-: decode-8-bit ( stream assoc -- char/f )
- swap stream-read1
- [ swap at [ replacement-char ] unless* ]
- [ drop f ] if* ; inline
+M: 8-bit encode-string
+ swap [ '[ _ 8-bit-encode ] B{ } map-as ] dip stream-write ;
-M: 8-bit decode-char biassoc>> decode-8-bit ;
+M: 8-bit decode-char
+ swap stream-read1 dup
+ [ swap biassoc>> at [ replacement-char ] unless* ]
+ [ 2drop f ]
+ if ;
MIXIN: 8-bit-encoding
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
-USING: io io.encodings kernel math io.encodings.private ;
+USING: accessors byte-arrays io io.encodings
+io.encodings.private kernel math sequences ;
IN: io.encodings.ascii
-<PRIVATE
-: encode-if< ( char stream encoding max -- )
- nip 1 - pick < [ encode-error ] [ stream-write1 ] if ; inline
-
-: decode-if< ( stream encoding max -- character )
- nip swap stream-read1 dup
- [ [ nip ] [ > ] 2bi [ >fixnum ] [ drop replacement-char ] if ]
- [ 2drop f ] if ; inline
-PRIVATE>
-
SINGLETON: ascii
M: ascii encode-char
- 128 encode-if< ; inline
+ drop
+ over 127 <= [ stream-write1 ] [ encode-error ] if ; inline
+
+M: ascii encode-string
+ drop
+ [
+ dup aux>>
+ [ [ dup 127 <= [ encode-error ] unless ] B{ } map-as ]
+ [ >byte-array ]
+ if
+ ] dip
+ stream-write ;
M: ascii decode-char
- 128 decode-if< ; inline
+ drop
+ stream-read1 dup [
+ dup 127 <= [ >fixnum ] [ drop replacement-char ] if
+ ] when ; inline
: write-in-groups ( byte-array port -- )
[ binary-object <direct-uchar-array> ] dip
- [ buffer>> size>> <groups> ] [ '[ _ stream-write ] ] bi
+ [ buffer>> size>> <sliced-groups> ] [ '[ _ stream-write ] ] bi
each ;
M: output-port stream-write
HINTS: decoder-read-until { string input-port utf8 } { string input-port ascii } ;
HINTS: decoder-readln { input-port utf8 } { input-port ascii } ;
-
-HINTS: encoder-write { object output-port utf8 } { object output-port ascii } ;
--- /dev/null
+Slava Pestov
--- /dev/null
+! Copyright (C) 2010 Slava Pestov.
+! See http://factorcode.org/license.txt for BSD license.
+USING: alien byte-vectors io kernel libc math sequences ;
+IN: io.streams.byte-array.fast
+
+! This is split off from io.streams.byte-array because it uses
+! memcpy, which is a non-core word that only works after the
+! optimizing compiler has been loaded.
+
+M: byte-vector stream-write
+ [ dup byte-length tail-slice ]
+ [ [ [ byte-length ] bi@ + ] keep lengthen ]
+ [ drop byte-length ]
+ 2tri
+ [ >c-ptr swap >c-ptr ] dip memcpy ;
"locals.fry"
} [ require ] each
-"prettyprint" "locals.definitions" require-when
-"prettyprint" "locals.prettyprint" require-when
+{ "locals" "prettyprint" } "locals.definitions" require-when
+{ "locals" "prettyprint" } "locals.prettyprint" require-when
USE: vocabs.loader
-"prettyprint" "math.rectangles.prettyprint" require-when
+{ "math.rectangles" "prettyprint" } "math.rectangles.prettyprint" require-when
M: int-4 v*hs+
int-4-rep [ (simd-v*hs+) ] [ call-next-method ] vv->v-op longlong-2-cast ; inline
-"mirrors" "math.vectors.simd.mirrors" require-when
+{ "math.vectors.simd" "mirrors" } "math.vectors.simd.mirrors" require-when
] append!
] ;
-USING: vocabs vocabs.loader ;
+USE: vocabs.loader
-"debugger" "peg.debugger" require-when
+{ "debugger" "peg" } "peg.debugger" require-when
SYNTAX: R{ CHAR: } parsing-regexp ;
SYNTAX: R| CHAR: | parsing-regexp ;
-USING: vocabs vocabs.loader ;
+USE: vocabs.loader
-"prettyprint" "regexp.prettyprint" require-when
+{ "prettyprint" "regexp" } "regexp.prettyprint" require-when
SYNTAX: SPECIALIZED-ARRAY:
scan-c-type define-array-vocab use-vocab ;
-"prettyprint" "specialized-arrays.prettyprint" require-when
+{ "specialized-arrays" "prettyprint" } "specialized-arrays.prettyprint" require-when
-"mirrors" "specialized-arrays.mirrors" require-when
+{ "specialized-arrays" "mirrors" } "specialized-arrays.mirrors" require-when
ERROR: unbalanced-branches-error < inference-error word quots declareds actuals ;
-"debugger" "stack-checker.errors.prettyprint" require-when
+{ "stack-checker.errors" "debugger" } "stack-checker.errors.prettyprint" require-when
\ both-fixnums? { object object } { object } define-primitive
\ byte-array>bignum { byte-array } { bignum } define-primitive \ byte-array>bignum make-foldable
\ callstack { } { callstack } define-primitive \ callstack make-flushable
+\ callstack-bounds { } { alien alien } define-primitive \ callstack-bounds make-flushable
\ callstack-for { c-ptr } { callstack } define-primitive \ callstack make-flushable
\ callstack>array { callstack } { array } define-primitive \ callstack>array make-flushable
\ check-datastack { array integer integer } { object } define-primitive \ check-datastack make-flushable
SYNTAX: TYPED::
(::) define-typed ;
-USING: vocabs vocabs.loader ;
+USE: vocabs.loader
-"prettyprint" "typed.prettyprint" require-when
+{ "typed" "prettyprint" } "typed.prettyprint" require-when
USE: vocabs.loader
-"prettyprint" "ui.gadgets.prettyprint" require-when
+{ "ui.gadgets" "prettyprint" } "ui.gadgets.prettyprint" require-when
<<
-"debugger" "unix.debugger" require-when
+{ "unix" "debugger" } "unix.debugger" require-when
>>
USE: vocabs.loader
-"prettyprint" "urls.prettyprint" require-when
+{ "urls" "prettyprint" } "urls.prettyprint" require-when
USE: vocabs.loader
-"prettyprint" "windows.com.prettyprint" require-when
+{ "windows.com" "prettyprint" } "windows.com.prettyprint" require-when
: with-x ( display-string quot -- )
[ init-x ] dip [ close-x ] [ ] cleanup ; inline
-"io.backend.unix" "x11.io.unix" require-when
+{ "x11" "io.backend.unix" } "x11.io.unix" require-when
USE: vocabs.loader
-"inverse" "xml.syntax.inverse" require-when
+{ "xml.syntax" "inverse" } "xml.syntax.inverse" require-when
{ "retainstack" "kernel" "primitive_retainstack" (( -- array )) }
{ "(identity-hashcode)" "kernel.private" "primitive_identity_hashcode" (( obj -- code )) }
{ "become" "kernel.private" "primitive_become" (( old new -- )) }
+ { "callstack-bounds" "kernel.private" "primitive_callstack_bounds" (( -- start end )) }
{ "check-datastack" "kernel.private" "primitive_check_datastack" (( array in# out# -- ? )) }
{ "compute-identity-hashcode" "kernel.private" "primitive_compute_identity_hashcode" (( obj -- )) }
{ "context-object" "kernel.private" "primitive_context_object" (( n -- obj )) }
[ f ] [ HS{ 1 2 3 } HS{ 2 3 } set= ] unit-test
[ HS{ 1 2 } HS{ 1 2 3 } ] [ HS{ 1 2 } clone dup clone [ 3 swap adjoin ] keep ] unit-test
+
+[ t ] [ HS{ } null? ] unit-test
+[ f ] [ HS{ 1 } null? ] unit-test
M: hash-set members table>> keys ; inline
M: hash-set set-like drop dup hash-set? [ members <hash-set> ] unless ;
M: hash-set clone table>> clone hash-set boa ;
+M: hash-set null? table>> assoc-empty? ;
M: sequence fast-set <hash-set> ;
M: f fast-set drop H{ } clone hash-set boa ;
-! Copyright (C) 2008 Daniel Ehrenberg.
+! Copyright (C) 2008, 2010 Daniel Ehrenberg, Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: math kernel sequences sbufs vectors namespaces growable
strings io classes continuations destructors combinators
GENERIC: encode-char ( char stream encoding -- )
+GENERIC: encode-string ( string stream encoding -- )
+
+M: object encode-string [ encode-char ] 2curry each ; inline
+
GENERIC: <decoder> ( stream encoding -- newstream )
CONSTANT: replacement-char HEX: fffd
M: encoder stream-write1
>encoder< encode-char ;
-GENERIC# encoder-write 2 ( string stream encoding -- )
-
-M: string encoder-write
- [ encode-char ] 2curry each ;
-
M: encoder stream-write
- >encoder< encoder-write ;
+ >encoder< encode-string ;
M: encoder dispose stream>> dispose ;
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
-USING: math math.order kernel sequences sbufs vectors growable io
-continuations namespaces io.encodings combinators strings ;
+USING: accessors byte-arrays math math.order kernel sequences
+sbufs vectors growable io continuations namespaces io.encodings
+combinators strings ;
IN: io.encodings.utf8
! Decoding UTF-8
! Encoding UTF-8
: encoded ( stream char -- )
- BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ;
+ BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ; inline
-: char>utf8 ( stream char -- )
- {
+: char>utf8 ( char stream -- )
+ swap {
{ [ dup -7 shift zero? ] [ swap stream-write1 ] }
{ [ dup -11 shift zero? ] [
2dup -6 shift BIN: 11000000 bitor swap stream-write1
2dup -6 shift encoded
encoded
]
- } cond ;
+ } cond ; inline
M: utf8 encode-char
- drop swap char>utf8 ;
+ drop char>utf8 ;
+
+M: utf8 encode-string
+ drop
+ over aux>>
+ [ [ char>utf8 ] curry each ]
+ [ [ >byte-array ] dip stream-write ] if ;
PRIVATE>
adjoin
delete
}
+"To test if a set is the empty set:"
+{ $subsections null? }
"Basic mathematical operations, which any type of set may override for efficiency:"
{ $subsections
diff
HELP: without
{ $values { "seq" sequence } { "set" set } { "subseq" sequence } }
{ $description "Returns the subsequence of the given sequence consisting of things that are not members of the set. This may contain duplicates, if the sequence has duplicates." } ;
+
+HELP: null?
+{ $values { "set" set } { "?" "a boolean" } }
+{ $description "Tests whether the given set is empty. This outputs " { $snippet "t" } " when given a null set of any type." } ;
[ f ] [ HS{ 1 2 3 1 2 1 } duplicates ] unit-test
[ H{ { 3 HS{ 1 2 } } } ] [ H{ } clone 1 3 pick adjoin-at 2 3 pick adjoin-at ] unit-test
+
+[ t ] [ f null? ] unit-test
+[ f ] [ { 4 } null? ] unit-test
GENERIC: set= ( set1 set2 -- ? )
GENERIC: duplicates ( set -- seq )
GENERIC: all-unique? ( set -- ? )
+GENERIC: null? ( set -- ? )
! Defaults for some methods.
! Override them for efficiency
+M: set null? members null? ; inline
+
M: set set-like drop ; inline
M: set union
M: sequence members
[ pruned ] keep like ;
+
+M: sequence null?
+ empty? ; inline
: combine ( sets -- set )
[ f ]
! Copyright (C) 2003, 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel math.private sequences kernel.private
-math sequences.private slots.private byte-arrays
-alien.accessors ;
+math sequences.private slots.private alien.accessors ;
IN: strings
<PRIVATE
{ $notes "To unconditionally reload a vocabulary, use " { $link reload } ". To reload changed source files only, use the words in " { $link "vocabs.refresh" } "." } ;
HELP: require-when
-{ $values { "if" "a vocabulary specifier" } { "then" "a vocabulary specifier" } }
-{ $description "Loads the " { $snippet "then" } " vocabulary if it is not loaded and the " { $snippet "if" } " vocabulary is. If the " { $snippet "if" } " vocabulary is not loaded now, but it is later, then the " { $snippet "then" } " vocabulary will be loaded along with it at that time." }
-{ $notes "This is used to express a joint dependency of vocabularies. If vocabularies " { $snippet "a" } " and " { $snippet "b" } " use code in vocabulary " { $snippet "c" } " to interact, then the following line can be placed in " { $snippet "a" } " in order express the dependency."
-{ $code "\"b\" \"c\" require-when" } } ;
+{ $values { "if" "a sequence of vocabulary specifiers" } { "then" "a vocabulary specifier" } }
+{ $description "Loads the " { $snippet "then" } " vocabulary if it is not loaded and all of the " { $snippet "if" } " vocabulary is. If some of the " { $snippet "if" } " vocabularies are not loaded now, but they are later, then the " { $snippet "then" } " vocabulary will be loaded along with the final one." }
+{ $notes "This is used to express a joint dependency of vocabularies. If vocabularies " { $snippet "a" } " and " { $snippet "b" } " use code in vocabulary " { $snippet "c" } " to interact, then the following line, which can be placed in " { $snippet "a" } " or " { $snippet "b" } ", expresses the dependency."
+{ $code "{ \"a\" \"b\" } \"c\" require-when" } } ;
HELP: run
{ $values { "vocab" "a vocabulary specifier" } }
<PRIVATE
-: load-conditional-requires ( vocab-name -- )
- conditional-requires get
- [ at [ require ] each ]
- [ delete-at ] 2bi ;
+SYMBOL: require-when-vocabs
+require-when-vocabs [ HS{ } clone ] initialize
+
+SYMBOL: require-when-table
+require-when-table [ V{ } clone ] initialize
+
+: load-conditional-requires ( vocab -- )
+ vocab-name require-when-vocabs get in? [
+ require-when-table get [
+ [ [ vocab ] all? ] dip
+ [ require ] curry when
+ ] assoc-each
+ ] when ;
: load-source ( vocab -- )
dup check-vocab-hook get call( vocab -- )
[ +parsing+ >>source-loaded? ] dip
[ % ] [ call( -- ) ] if-bootstrapping
+done+ >>source-loaded?
- vocab-name load-conditional-requires
+ load-conditional-requires
] [ ] [ f >>source-loaded? ] cleanup ;
: load-docs ( vocab -- )
load-vocab drop ;
: require-when ( if then -- )
- over vocab
- [ nip require ]
- [ swap conditional-requires get [ swap suffix ] change-at ]
- if ;
+ over [ vocab ] all? [
+ require drop
+ ] [
+ [ drop [ require-when-vocabs get adjoin ] each ]
+ [ 2array require-when-table get push ] 2bi
+ ] if ;
: reload ( name -- )
dup vocab
USE: vocabs.loader
IN: vocabs.loader.test.m
-"vocabs.loader.test.o" "vocabs.loader.test.n" require-when
+{ "vocabs.loader.test.o" "vocabs.loader.test.m" }
+"vocabs.loader.test.n" require-when
! Copyright (C) 2007, 2009 Eduardo Cavazos, Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs strings kernel sorting namespaces
-sequences definitions sets ;
+sequences definitions sets combinators ;
IN: vocabs
SYMBOL: dictionary
: check-vocab-name ( name -- name )
dup string? [ bad-vocab-name ] unless ;
-SYMBOL: conditional-requires
-conditional-requires [ H{ } clone ] initialize
-
: create-vocab ( name -- vocab )
check-vocab-name
dictionary get [ <vocab> ] cache
n 2 * ALU "Homo sapiens alu" "ONE" write-repeat-fasta
initial-seed
+
n 3 * homo-sapiens-chars homo-sapiens-floats
"IUB ambiguity codes" "TWO" write-random-fasta
+
n 5 * IUB-chars IUB-floats
"Homo sapiens frequency" "THREE" write-random-fasta
+
drop
] with-file-writer
] ;
! Copyright (C) 2010 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors alien alien.c-types alien.data alien.parser
-alien.strings arrays assocs byte-arrays classes.struct
-combinators continuations cuda.ffi destructors fry io
-io.backend io.encodings.string io.encodings.utf8 kernel lexer
-locals math math.parser namespaces opengl.gl.extensions
-prettyprint quotations sequences ;
+USING: accessors alien alien.data alien.parser alien.strings
+alien.syntax arrays assocs byte-arrays classes.struct
+combinators continuations cuda.ffi cuda.memory cuda.utils
+destructors fry io io.backend io.encodings.string
+io.encodings.utf8 kernel lexer locals macros math math.parser
+namespaces nested-comments opengl.gl.extensions parser
+prettyprint quotations sequences words ;
+QUALIFIED-WITH: alien.c-types a
IN: cuda
-SYMBOL: cuda-device
-SYMBOL: cuda-context
-SYMBOL: cuda-module
-SYMBOL: cuda-function
-SYMBOL: cuda-launcher
-SYMBOL: cuda-memory-hashtable
-
-ERROR: throw-cuda-error n ;
-
-: cuda-error ( n -- )
- dup CUDA_SUCCESS = [ drop ] [ throw-cuda-error ] if ;
-
-: cuda-version ( -- n )
- int <c-object> [ cuDriverGetVersion cuda-error ] keep *int ;
-
-: init-cuda ( -- )
- 0 cuInit cuda-error ;
-
TUPLE: launcher
{ device integer initial: 0 }
-{ device-flags initial: 0 }
-path block-shape shared-size grid ;
+{ device-flags initial: 0 } ;
-: with-cuda-context ( flags device quot -- )
- [
- [ CUcontext <c-object> ] 2dip
- [ cuCtxCreate cuda-error ] 3keep 2drop *void*
- ] dip
- [ '[ _ @ ] ]
- [ drop '[ _ cuCtxDestroy cuda-error ] ] 2bi
- [ ] cleanup ; inline
+TUPLE: function-launcher
+dim-block dim-grid shared-size stream ;
-: with-cuda-module ( path quot -- )
- [
- normalize-path
- [ CUmodule <c-object> ] dip
- [ cuModuleLoad cuda-error ] 2keep drop *void*
- ] dip
+: with-cuda-context ( flags device quot -- )
+ H{ } clone cuda-modules set-global
+ H{ } clone cuda-functions set
+ [ create-context ] dip
[ '[ _ @ ] ]
- [ drop '[ _ cuModuleUnload cuda-error ] ] 2bi
+ [ drop '[ _ destroy-context ] ] 2bi
[ ] cleanup ; inline
-: with-cuda-program ( flags device path quot -- )
+: with-cuda-program ( flags device quot -- )
[ dup cuda-device set ] 2dip
- '[
- cuda-context set
- _ [
- cuda-module set
- _ call
- ] with-cuda-module
- ] with-cuda-context ; inline
+ '[ cuda-context set _ call ] with-cuda-context ; inline
: with-cuda ( launcher quot -- )
- [
- init-cuda
- H{ } clone cuda-memory-hashtable
- ] 2dip '[
+ init-cuda
+ [ H{ } clone cuda-memory-hashtable ] 2dip '[
_
[ cuda-launcher set ]
- [ [ device>> ] [ device-flags>> ] [ path>> ] tri ] bi
+ [ [ device>> ] [ device-flags>> ] bi ] bi
_ with-cuda-program
] with-variable ; inline
-<PRIVATE
-
-: #cuda-devices ( -- n )
- int <c-object> [ cuDeviceGetCount cuda-error ] keep *int ;
-
-: n>cuda-device ( n -- device )
- [ CUdevice <c-object> ] dip [ cuDeviceGet cuda-error ] 2keep drop *int ;
-
-: enumerate-cuda-devices ( -- devices )
- #cuda-devices iota [ n>cuda-device ] map ;
-
-: cuda-device-properties ( device -- properties )
- [ CUdevprop <c-object> ] dip
- [ cuDeviceGetProperties cuda-error ] 2keep drop
- CUdevprop memory>struct ;
-
-PRIVATE>
-
-: cuda-devices ( -- assoc )
- enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ;
-
-: cuda-device-name ( n -- string )
- [ 256 [ <byte-array> ] keep ] dip
- [ cuDeviceGetName cuda-error ]
- [ 2drop utf8 alien>string ] 3bi ;
-
-: cuda-device-capability ( n -- pair )
- [ int <c-object> int <c-object> ] dip
- [ cuDeviceComputeCapability cuda-error ]
- [ drop [ *int ] bi@ ] 3bi 2array ;
-
-: cuda-device-memory ( n -- bytes )
- [ uint <c-object> ] dip
- [ cuDeviceTotalMem cuda-error ]
- [ drop *uint ] 2bi ;
-
-: get-cuda-function* ( module string -- function )
- [ CUfunction <c-object> ] 2dip
- [ cuModuleGetFunction cuda-error ] 3keep 2drop *void* ;
-
-: get-cuda-function ( string -- function )
- [ cuda-module get ] dip get-cuda-function* ;
-
-: with-cuda-function ( string quot -- )
- [
- get-cuda-function cuda-function set
- ] dip call ; inline
-
-: launch-function* ( function -- ) cuLaunch cuda-error ;
-
-: launch-function ( -- ) cuda-function get cuLaunch cuda-error ;
-
-: launch-function-grid* ( function width height -- )
- cuLaunchGrid cuda-error ;
-
-: launch-function-grid ( width height -- )
- [ cuda-function get ] 2dip
- cuLaunchGrid cuda-error ;
-
-TUPLE: cuda-memory < disposable ptr length ;
-
-: <cuda-memory> ( ptr length -- obj )
- cuda-memory new-disposable
- swap >>length
- swap >>ptr ;
-
-: add-cuda-memory ( obj -- obj )
- dup dup ptr>> cuda-memory-hashtable get set-at ;
-
-: delete-cuda-memory ( obj -- )
- cuda-memory-hashtable delete-at ;
-
-ERROR: invalid-cuda-memory ptr ;
-
-: cuda-memory-length ( cuda-memory -- n )
- ptr>> cuda-memory-hashtable get ?at [
- length>>
- ] [
- invalid-cuda-memory
- ] if ;
-
-M: cuda-memory byte-length length>> ;
-
-: cuda-malloc ( n -- ptr )
- [ CUdeviceptr <c-object> ] dip
- [ cuMemAlloc cuda-error ] 2keep
- [ *int ] dip <cuda-memory> add-cuda-memory ;
-
-: cuda-free* ( ptr -- )
- cuMemFree cuda-error ;
-
-M: cuda-memory dispose ( ptr -- )
- ptr>> cuda-free* ;
-
-: host>device ( dest-ptr src-ptr -- )
- [ ptr>> ] dip dup length cuMemcpyHtoD cuda-error ;
-
-:: device>host ( ptr -- seq )
- ptr byte-length <byte-array>
- [ ptr [ ptr>> ] [ byte-length ] bi cuMemcpyDtoH cuda-error ] keep ;
-
-: memcpy-device>device ( dest-ptr src-ptr count -- )
- cuMemcpyDtoD cuda-error ;
-
-: memcpy-device>array ( dest-array dest-index src-ptr count -- )
- cuMemcpyDtoA cuda-error ;
-
-: memcpy-array>device ( dest-ptr src-array src-index count -- )
- cuMemcpyAtoD cuda-error ;
-
-: memcpy-array>host ( dest-ptr src-array src-index count -- )
- cuMemcpyAtoH cuda-error ;
-
-: memcpy-host>array ( dest-array dest-index src-ptr count -- )
- cuMemcpyHtoA cuda-error ;
-
-: memcpy-array>array ( dest-array dest-index src-array src-ptr count -- )
- cuMemcpyAtoA cuda-error ;
-
-: cuda-int* ( function offset value -- )
- cuParamSeti cuda-error ;
-
-: cuda-int ( offset value -- )
- [ cuda-function get ] 2dip cuda-int* ;
-
-: cuda-float* ( function offset value -- )
- cuParamSetf cuda-error ;
-
-: cuda-float ( offset value -- )
- [ cuda-function get ] 2dip cuda-float* ;
-
-: cuda-vector* ( function offset ptr n -- )
- cuParamSetv cuda-error ;
-
-: cuda-vector ( offset ptr n -- )
- [ cuda-function get ] 3dip cuda-vector* ;
-
-: param-size* ( function n -- )
- cuParamSetSize cuda-error ;
-
-: param-size ( n -- )
- [ cuda-function get ] dip param-size* ;
-
-: malloc-device-string ( string -- n )
- utf8 encode
- [ length cuda-malloc ] keep
- [ host>device ] [ drop ] 2bi ;
-
-ERROR: bad-cuda-parameter parameter ;
-
-:: set-parameters ( seq -- )
- cuda-function get :> function
- 0 :> offset!
- seq [
- [ offset ] dip
- {
- { [ dup cuda-memory? ] [ ptr>> cuda-int ] }
- { [ dup float? ] [ cuda-float ] }
- { [ dup integer? ] [ cuda-int ] }
- [ bad-cuda-parameter ]
- } cond
- offset 4 + offset!
- ] each
- offset param-size ;
-
-: cuda-device-attribute ( attribute dev -- n )
- [ int <c-object> ] 2dip
- [ cuDeviceGetAttribute cuda-error ]
- [ 2drop *int ] 3bi ;
-
-: function-block-shape* ( function x y z -- )
- cuFuncSetBlockShape cuda-error ;
-
-: function-block-shape ( x y z -- )
- [ cuda-function get ] 3dip
- cuFuncSetBlockShape cuda-error ;
-
-: function-shared-size* ( function n -- )
- cuFuncSetSharedSize cuda-error ;
-
-: function-shared-size ( n -- )
- [ cuda-function get ] dip
- cuFuncSetSharedSize cuda-error ;
-
-: launch ( -- )
- cuda-launcher get {
- [ block-shape>> first3 function-block-shape ]
- [ shared-size>> function-shared-size ]
+: c-type>cuda-setter ( c-type -- n cuda-type )
+ {
+ { [ dup a:int = ] [ drop 4 [ cuda-int* ] ] }
+ { [ dup a:uint = ] [ drop 4 [ cuda-int* ] ] }
+ { [ dup a:float = ] [ drop 4 [ cuda-float* ] ] }
+ { [ dup a:pointer? ] [ drop 4 [ ptr>> cuda-int* ] ] }
+ { [ dup a:void* = ] [ drop 4 [ ptr>> cuda-int* ] ] }
+ } cond ;
+
+: run-function-launcher ( function-launcher function -- )
+ swap
+ {
+ [ dim-block>> first3 function-block-shape* ]
+ [ shared-size>> function-shared-size* ]
[
- grid>> [
- launch-function
+ dim-grid>> [
+ launch-function*
] [
- first2 launch-function-grid
+ first2 launch-function-grid*
] if-empty
]
- } cleave ;
+ } 2cleave ;
-: cuda-device. ( n -- )
- {
- [ "Device: " write number>string print ]
- [ "Name: " write cuda-device-name print ]
- [ "Memory: " write cuda-device-memory number>string print ]
- [
- "Capability: " write
- cuda-device-capability [ number>string ] map " " join print
- ]
- [ "Properties: " write cuda-device-properties . ]
- [
- "CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: " write
- CU_DEVICE_ATTRIBUTE_GPU_OVERLAP swap
- cuda-device-attribute number>string print
- ]
- } cleave ;
+: cuda-argument-setter ( offset c-type -- offset' quot )
+ c-type>cuda-setter
+ [ over [ + ] dip ] dip
+ '[ swap _ swap _ call ] ;
-: cuda. ( -- )
- "CUDA Version: " write cuda-version number>string print nl
- #cuda-devices iota [ nl ] [ cuda-device. ] interleave ;
+MACRO: cuda-arguments ( c-types -- quot: ( args... function -- ) )
+ [ 0 ] dip [ cuda-argument-setter ] map reverse
+ swap '[ _ param-size* ] suffix
+ '[ _ cleave ] ;
-
-: test-cuda0 ( -- )
- T{ launcher
- { path "vocab:cuda/hello.ptx" }
- { block-shape { 6 6 6 } }
- { shared-size 2 }
- { grid { 2 6 } }
- } [
- "helloWorld" [
- "Hello World!" [ - ] map-index
- malloc-device-string &dispose
-
- [ 1array set-parameters ]
- [ drop launch ]
- [ device>host utf8 alien>string . ] tri
- ] with-cuda-function
- ] with-cuda ;
+: define-cuda-word ( word module-name function-name arguments -- )
+ [
+ '[
+ _ _ cached-function
+ [ nip _ cuda-arguments ]
+ [ run-function-launcher ] 2bi
+ ]
+ ]
+ [ 2nip \ function-launcher suffix a:void function-effect ]
+ 3bi define-declared ;
--- /dev/null
+Doug Coleman
--- /dev/null
+! Copyright (C) 2010 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: alien.c-types alien.strings cuda cuda.memory cuda.syntax
+destructors io io.encodings.utf8 kernel locals math sequences ;
+IN: cuda.demos.hello-world
+
+CUDA-LIBRARY: hello vocab:cuda/demos/hello-world/hello.ptx
+
+CUDA-FUNCTION: helloWorld ( char* string-ptr ) ;
+
+:: cuda-hello-world ( -- )
+ T{ launcher { device 0 } } [
+ "Hello World!" [ - ] map-index malloc-device-string
+ &dispose dup :> str
+
+ { 6 1 1 } { 2 1 } 1 3<<< helloWorld
+
+ str device>host utf8 alien>string print
+ ] with-cuda ;
+
+MAIN: cuda-hello-world
--- /dev/null
+/*
+ World using CUDA
+**
+** The string "Hello World!" is mangled then restored using a common CUDA idiom
+**
+** Byron Galbraith
+** 2009-02-18
+*/
+#include <cuda.h>
+#include <stdio.h>
+
+// Prototypes
+extern "C" __global__ void helloWorld(char*);
+
+// Host function
+int
+main(int argc, char** argv)
+{
+ int i;
+
+ // desired output
+ char str[] = "Hello World!";
+
+ // mangle contents of output
+ // the null character is left intact for simplicity
+ for(i = 0; i < 12; i++)
+ str[i] -= i;
+
+ // allocate memory on the device
+ char *d_str;
+ size_t size = sizeof(str);
+ cudaMalloc((void**)&d_str, size);
+
+ // copy the string to the device
+ cudaMemcpy(d_str, str, size, cudaMemcpyHostToDevice);
+
+ // set the grid and block sizes
+ dim3 dimGrid(2); // one block per word
+ dim3 dimBlock(6); // one thread per character
+
+ // invoke the kernel
+ helloWorld<<< dimGrid, dimBlock >>>(d_str);
+
+ // retrieve the results from the device
+ cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost);
+
+ // free up the allocated memory on the device
+ cudaFree(d_str);
+
+ // everyone's favorite part
+ printf("%s\n", str);
+
+ return 0;
+}
+
+// Device kernel
+__global__ void
+helloWorld(char* str)
+{
+ // determine where in the thread grid we are
+ int idx = blockIdx.x * blockDim.x + threadIdx.x;
+
+ // unmangle output
+ str[idx] += idx;
+}
--- /dev/null
+ .version 1.4
+ .target sm_10, map_f64_to_f32
+ // compiled with /usr/local/cuda/bin/../open64/lib//be
+ // nvopencc 3.0 built on 2010-03-11
+
+ //-----------------------------------------------------------
+ // Compiling /tmp/tmpxft_00000eab_00000000-7_hello.cpp3.i (/var/folders/KD/KDnx4D80Eh0fsORqNrFWBE+++TI/-Tmp-/ccBI#.AYqbdQ)
+ //-----------------------------------------------------------
+
+ //-----------------------------------------------------------
+ // Options:
+ //-----------------------------------------------------------
+ // Target:ptx, ISA:sm_10, Endian:little, Pointer Size:32
+ // -O3 (Optimization level)
+ // -g0 (Debug level)
+ // -m2 (Report advisories)
+ //-----------------------------------------------------------
+
+ .file 1 "<command-line>"
+ .file 2 "/tmp/tmpxft_00000eab_00000000-6_hello.cudafe2.gpu"
+ .file 3 "/usr/lib/gcc/i686-apple-darwin10/4.2.1/include/stddef.h"
+ .file 4 "/usr/local/cuda/bin/../include/crt/device_runtime.h"
+ .file 5 "/usr/local/cuda/bin/../include/host_defines.h"
+ .file 6 "/usr/local/cuda/bin/../include/builtin_types.h"
+ .file 7 "/usr/local/cuda/bin/../include/device_types.h"
+ .file 8 "/usr/local/cuda/bin/../include/driver_types.h"
+ .file 9 "/usr/local/cuda/bin/../include/texture_types.h"
+ .file 10 "/usr/local/cuda/bin/../include/vector_types.h"
+ .file 11 "/usr/local/cuda/bin/../include/device_launch_parameters.h"
+ .file 12 "/usr/local/cuda/bin/../include/crt/storage_class.h"
+ .file 13 "/usr/include/i386/_types.h"
+ .file 14 "/usr/include/time.h"
+ .file 15 "/usr/local/cuda/bin/../include/texture_fetch_functions.h"
+ .file 16 "/usr/local/cuda/bin/../include/common_functions.h"
+ .file 17 "/usr/local/cuda/bin/../include/crt/func_macro.h"
+ .file 18 "/usr/local/cuda/bin/../include/math_functions.h"
+ .file 19 "/usr/local/cuda/bin/../include/device_functions.h"
+ .file 20 "/usr/local/cuda/bin/../include/math_constants.h"
+ .file 21 "/usr/local/cuda/bin/../include/sm_11_atomic_functions.h"
+ .file 22 "/usr/local/cuda/bin/../include/sm_12_atomic_functions.h"
+ .file 23 "/usr/local/cuda/bin/../include/sm_13_double_functions.h"
+ .file 24 "/usr/local/cuda/bin/../include/common_types.h"
+ .file 25 "/usr/local/cuda/bin/../include/sm_20_atomic_functions.h"
+ .file 26 "/usr/local/cuda/bin/../include/sm_20_intrinsics.h"
+ .file 27 "/usr/local/cuda/bin/../include/math_functions_dbl_ptx1.h"
+ .file 28 "hello.cu"
+
+
+ .entry helloWorld (
+ .param .u32 __cudaparm_helloWorld_str)
+ {
+ .reg .u16 %rh<4>;
+ .reg .u32 %r<9>;
+ .loc 28 58 0
+$LBB1_helloWorld:
+ .loc 28 64 0
+ mov.u16 %rh1, %ctaid.x;
+ mov.u16 %rh2, %ntid.x;
+ mul.wide.u16 %r1, %rh1, %rh2;
+ cvt.u32.u16 %r2, %tid.x;
+ add.u32 %r3, %r2, %r1;
+ ld.param.u32 %r4, [__cudaparm_helloWorld_str];
+ add.u32 %r5, %r4, %r3;
+ ld.global.s8 %r6, [%r5+0];
+ add.s32 %r7, %r6, %r3;
+ st.global.s8 [%r5+0], %r7;
+ .loc 28 65 0
+ exit;
+$LDWend_helloWorld:
+ } // helloWorld
+
--- /dev/null
+Doug Coleman
+Joe Groff
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda_runtime.h>
+
+static const int LOG_BANK_COUNT = 4;
+
+static inline __device__ __host__ unsigned shared_offset(unsigned i)
+{
+ return i + (i >> LOG_BANK_COUNT);
+}
+
+static inline __device__ __host__ unsigned offset_a(unsigned offset, unsigned i)
+{
+ return shared_offset(offset * (2*i + 1) - 1);
+}
+
+static inline __device__ __host__ unsigned offset_b(unsigned offset, unsigned i)
+{
+ return shared_offset(offset * (2*i + 2) - 1);
+}
+
+static inline __device__ __host__ unsigned lpot(unsigned x)
+{
+ --x; x |= x>>1; x|=x>>2; x|=x>>4; x|=x>>8; x|=x>>16; return ++x;
+}
+
+template<typename T>
+__global__ void prefix_sum_block(T *in, T *out, unsigned n)
+{
+ extern __shared__ T temp[];
+
+ int idx = threadIdx.x;
+ int blocksize = blockDim.x;
+
+ temp[shared_offset(idx )] = (idx < n) ? in[idx ] : 0;
+ temp[shared_offset(idx + blocksize)] = (idx + blocksize < n) ? in[idx + blocksize] : 0;
+
+ int offset, d;
+ for (offset = 1, d = blocksize; d > 0; d >>= 1, offset <<= 1) {
+ __syncthreads();
+ if (idx < d) {
+ unsigned a = offset_a(offset, idx), b = offset_b(offset, idx);
+ temp[b] += temp[a];
+ }
+ }
+
+ if (idx == 0) temp[shared_offset(blocksize*2 - 1)] = 0;
+
+ for (d = 1; d <= blocksize; d <<= 1) {
+ offset >>= 1;
+ __syncthreads();
+
+ if (idx < d) {
+ unsigned a = offset_a(offset, idx), b = offset_b(offset, idx);
+ unsigned t = temp[a];
+ temp[a] = temp[b];
+ temp[b] += t;
+ }
+ }
+ __syncthreads();
+
+ if (idx < n) out[idx ] = temp[shared_offset(idx )];
+ if (idx + blocksize < n) out[idx + blocksize] = temp[shared_offset(idx + blocksize)];
+}
+
+template<typename T>
+void prefix_sum(T *in, T *out, unsigned n)
+{
+ char *device_values;
+ unsigned n_lpot = lpot(n);
+ size_t n_pitch;
+
+ cudaError_t error = cudaMallocPitch((void**)&device_values, &n_pitch, sizeof(T)*n, 2);
+ if (error != 0) {
+ printf("error %u allocating width %lu height %u\n", error, sizeof(T)*n, 2);
+ exit(1);
+ }
+
+ cudaMemcpy(device_values, in, sizeof(T)*n, cudaMemcpyHostToDevice);
+
+ prefix_sum_block<<<1, n_lpot/2, shared_offset(n_lpot)*sizeof(T)>>>
+ ((T*)device_values, (T*)(device_values + n_pitch), n);
+
+ cudaMemcpy(out, device_values + n_pitch, sizeof(T)*n, cudaMemcpyDeviceToHost);
+ cudaFree(device_values);
+}
+
+int main()
+{
+ sranddev();
+
+ static unsigned in_values[1024], out_values[1024];
+
+ for (int i = 0; i < 1024; ++i)
+ in_values[i] = rand() >> 21;
+
+ prefix_sum(in_values, out_values, 1024);
+
+ for (int i = 0; i < 1024; ++i)
+ printf("%5d => %5d\n", in_values[i], out_values[i]);
+
+ return 0;
+}
--- /dev/null
+! Copyright (C) 2010 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: alien.c-types cuda cuda.syntax locals ;
+IN: cuda.demos.prefix-sum
+
+CUDA-LIBRARY: prefix-sum vocab:cuda/demos/prefix-sum/prefix-sum.ptx
+
+CUDA-FUNCTION: prefix_sum_block ( uint* in, uint* out, uint n ) ;
+
+:: cuda-prefix-sum ( -- )
+ T{ launcher { device 0 } }
+ [
+ ! { 1 1 1 } { 2 1 } 0 3<<< prefix_sum_block
+ ] with-cuda ;
+
+MAIN: cuda-prefix-sum
--- /dev/null
+ .version 1.4
+ .target sm_10, map_f64_to_f32
+ // compiled with /usr/local/cuda/bin/../open64/lib//be
+ // nvopencc 3.0 built on 2010-03-11
+
+ //-----------------------------------------------------------
+ // Compiling /tmp/tmpxft_00000236_00000000-7_prefix-sum.cpp3.i (/var/folders/K6/K6oI14wZ2RWhSE+BYqTjA++++TI/-Tmp-/ccBI#.0ATpGM)
+ //-----------------------------------------------------------
+
+ //-----------------------------------------------------------
+ // Options:
+ //-----------------------------------------------------------
+ // Target:ptx, ISA:sm_10, Endian:little, Pointer Size:32
+ // -O3 (Optimization level)
+ // -g0 (Debug level)
+ // -m2 (Report advisories)
+ //-----------------------------------------------------------
+
+ .file 1 "<command-line>"
+ .file 2 "/tmp/tmpxft_00000236_00000000-6_prefix-sum.cudafe2.gpu"
+ .file 3 "/usr/lib/gcc/i686-apple-darwin10/4.2.1/include/stddef.h"
+ .file 4 "/usr/local/cuda/bin/../include/crt/device_runtime.h"
+ .file 5 "/usr/local/cuda/bin/../include/host_defines.h"
+ .file 6 "/usr/local/cuda/bin/../include/builtin_types.h"
+ .file 7 "/usr/local/cuda/bin/../include/device_types.h"
+ .file 8 "/usr/local/cuda/bin/../include/driver_types.h"
+ .file 9 "/usr/local/cuda/bin/../include/texture_types.h"
+ .file 10 "/usr/local/cuda/bin/../include/vector_types.h"
+ .file 11 "/usr/local/cuda/bin/../include/device_launch_parameters.h"
+ .file 12 "/usr/local/cuda/bin/../include/crt/storage_class.h"
+ .file 13 "/usr/include/i386/_types.h"
+ .file 14 "/usr/include/time.h"
+ .file 15 "prefix-sum.cu"
+ .file 16 "/usr/local/cuda/bin/../include/common_functions.h"
+ .file 17 "/usr/local/cuda/bin/../include/crt/func_macro.h"
+ .file 18 "/usr/local/cuda/bin/../include/math_functions.h"
+ .file 19 "/usr/local/cuda/bin/../include/device_functions.h"
+ .file 20 "/usr/local/cuda/bin/../include/math_constants.h"
+ .file 21 "/usr/local/cuda/bin/../include/sm_11_atomic_functions.h"
+ .file 22 "/usr/local/cuda/bin/../include/sm_12_atomic_functions.h"
+ .file 23 "/usr/local/cuda/bin/../include/sm_13_double_functions.h"
+ .file 24 "/usr/local/cuda/bin/../include/common_types.h"
+ .file 25 "/usr/local/cuda/bin/../include/sm_20_atomic_functions.h"
+ .file 26 "/usr/local/cuda/bin/../include/sm_20_intrinsics.h"
+ .file 27 "/usr/local/cuda/bin/../include/texture_fetch_functions.h"
+ .file 28 "/usr/local/cuda/bin/../include/math_functions_dbl_ptx1.h"
+
+ .extern .shared .align 4 .b8 temp[];
+
+ .entry _Z16prefix_sum_blockIjEvPT_S1_j (
+ .param .u32 __cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_in,
+ .param .u32 __cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_out,
+ .param .u32 __cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_n)
+ {
+ .reg .u32 %r<81>;
+ .reg .pred %p<11>;
+ .loc 15 28 0
+$LBB1__Z16prefix_sum_blockIjEvPT_S1_j:
+ ld.param.u32 %r1, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_n];
+ cvt.s32.u16 %r2, %tid.x;
+ setp.lt.u32 %p1, %r2, %r1;
+ @!%p1 bra $Lt_0_7938;
+ .loc 15 35 0
+ ld.param.u32 %r3, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_in];
+ mul24.lo.u32 %r4, %r2, 4;
+ add.u32 %r5, %r3, %r4;
+ ld.global.u32 %r6, [%r5+0];
+ bra.uni $Lt_0_7682;
+$Lt_0_7938:
+ mov.u32 %r6, 0;
+$Lt_0_7682:
+ mov.u32 %r7, temp;
+ shr.u32 %r8, %r2, 4;
+ add.u32 %r9, %r2, %r8;
+ mul.lo.u32 %r10, %r9, 4;
+ add.u32 %r11, %r10, %r7;
+ st.shared.u32 [%r11+0], %r6;
+ cvt.s32.u16 %r12, %ntid.x;
+ add.s32 %r13, %r12, %r2;
+ .loc 15 28 0
+ ld.param.u32 %r1, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_n];
+ .loc 15 35 0
+ setp.lt.u32 %p2, %r13, %r1;
+ @!%p2 bra $Lt_0_8450;
+ .loc 15 36 0
+ ld.param.u32 %r14, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_in];
+ mul.lo.u32 %r15, %r13, 4;
+ add.u32 %r16, %r14, %r15;
+ ld.global.u32 %r17, [%r16+0];
+ bra.uni $Lt_0_8194;
+$Lt_0_8450:
+ mov.u32 %r17, 0;
+$Lt_0_8194:
+ shr.u32 %r18, %r13, 4;
+ add.u32 %r19, %r13, %r18;
+ mul.lo.u32 %r20, %r19, 4;
+ add.u32 %r21, %r20, %r7;
+ st.shared.u32 [%r21+0], %r17;
+ .loc 15 39 0
+ mov.s32 %r22, %r12;
+ mov.u32 %r23, 0;
+ setp.le.s32 %p3, %r12, %r23;
+ mov.s32 %r24, 1;
+ @%p3 bra $Lt_0_13314;
+$Lt_0_9218:
+ //<loop> Loop body line 39, nesting depth: 1, estimated iterations: unknown
+ .loc 15 40 0
+ bar.sync 0;
+ setp.le.s32 %p4, %r22, %r2;
+ @%p4 bra $Lt_0_9474;
+ //<loop> Part of loop body line 39, head labeled $Lt_0_9218
+ .loc 15 43 0
+ mul24.lo.u32 %r25, %r2, 2;
+ add.u32 %r26, %r25, 1;
+ add.u32 %r27, %r25, 2;
+ mul.lo.u32 %r28, %r24, %r26;
+ mul.lo.u32 %r29, %r24, %r27;
+ sub.u32 %r30, %r29, 1;
+ shr.u32 %r31, %r30, 4;
+ add.u32 %r32, %r29, %r31;
+ mul.lo.u32 %r33, %r32, 4;
+ add.u32 %r34, %r33, %r7;
+ ld.shared.u32 %r35, [%r34+-4];
+ sub.u32 %r36, %r28, 1;
+ shr.u32 %r37, %r36, 4;
+ add.u32 %r38, %r28, %r37;
+ mul.lo.u32 %r39, %r38, 4;
+ add.u32 %r40, %r7, %r39;
+ ld.shared.u32 %r41, [%r40+-4];
+ add.u32 %r42, %r35, %r41;
+ st.shared.u32 [%r34+-4], %r42;
+$Lt_0_9474:
+ //<loop> Part of loop body line 39, head labeled $Lt_0_9218
+ .loc 15 39 0
+ shr.s32 %r22, %r22, 1;
+ shl.b32 %r24, %r24, 1;
+ mov.u32 %r43, 0;
+ setp.gt.s32 %p5, %r22, %r43;
+ @%p5 bra $Lt_0_9218;
+ bra.uni $Lt_0_8706;
+$Lt_0_13314:
+$Lt_0_8706:
+ mov.u32 %r44, 0;
+ setp.ne.s32 %p6, %r2, %r44;
+ @%p6 bra $Lt_0_10242;
+ .loc 15 47 0
+ mul24.lo.s32 %r45, %r12, 2;
+ mov.u32 %r46, 0;
+ sub.u32 %r47, %r45, 1;
+ shr.u32 %r48, %r47, 4;
+ add.u32 %r49, %r45, %r48;
+ mul.lo.u32 %r50, %r49, 4;
+ add.u32 %r51, %r7, %r50;
+ st.shared.u32 [%r51+-4], %r46;
+$Lt_0_10242:
+ mov.u32 %r52, 1;
+ setp.lt.s32 %p7, %r12, %r52;
+ @%p7 bra $Lt_0_10754;
+ mov.s32 %r22, 1;
+$Lt_0_11266:
+ //<loop> Loop body line 47, nesting depth: 1, estimated iterations: unknown
+ .loc 15 50 0
+ shr.s32 %r24, %r24, 1;
+ .loc 15 51 0
+ bar.sync 0;
+ setp.le.s32 %p8, %r22, %r2;
+ @%p8 bra $Lt_0_11522;
+ //<loop> Part of loop body line 47, head labeled $Lt_0_11266
+ .loc 15 55 0
+ mul24.lo.u32 %r53, %r2, 2;
+ add.u32 %r54, %r53, 1;
+ mul.lo.u32 %r55, %r24, %r54;
+ sub.u32 %r56, %r55, 1;
+ shr.u32 %r57, %r56, 4;
+ add.u32 %r58, %r55, %r57;
+ mul.lo.u32 %r59, %r58, 4;
+ add.u32 %r60, %r59, %r7;
+ ld.shared.u32 %r61, [%r60+-4];
+ .loc 15 56 0
+ add.u32 %r62, %r53, 2;
+ mul.lo.u32 %r63, %r24, %r62;
+ sub.u32 %r64, %r63, 1;
+ shr.u32 %r65, %r64, 4;
+ add.u32 %r66, %r63, %r65;
+ mul.lo.u32 %r67, %r66, 4;
+ add.u32 %r68, %r67, %r7;
+ ld.shared.u32 %r69, [%r68+-4];
+ st.shared.u32 [%r60+-4], %r69;
+ .loc 15 57 0
+ ld.shared.u32 %r70, [%r68+-4];
+ add.u32 %r71, %r70, %r61;
+ st.shared.u32 [%r68+-4], %r71;
+$Lt_0_11522:
+ //<loop> Part of loop body line 47, head labeled $Lt_0_11266
+ .loc 15 49 0
+ shl.b32 %r22, %r22, 1;
+ setp.le.s32 %p9, %r22, %r12;
+ @%p9 bra $Lt_0_11266;
+$Lt_0_10754:
+ .loc 15 60 0
+ bar.sync 0;
+ @!%p1 bra $Lt_0_12290;
+ .loc 15 62 0
+ ld.shared.u32 %r72, [%r11+0];
+ ld.param.u32 %r73, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_out];
+ mul24.lo.u32 %r74, %r2, 4;
+ add.u32 %r75, %r73, %r74;
+ st.global.u32 [%r75+0], %r72;
+$Lt_0_12290:
+ @!%p2 bra $Lt_0_12802;
+ .loc 15 63 0
+ ld.shared.u32 %r76, [%r21+0];
+ ld.param.u32 %r77, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_out];
+ mul.lo.u32 %r78, %r13, 4;
+ add.u32 %r79, %r77, %r78;
+ st.global.u32 [%r79+0], %r76;
+$Lt_0_12802:
+ .loc 15 64 0
+ exit;
+$LDWend__Z16prefix_sum_blockIjEvPT_S1_j:
+ } // _Z16prefix_sum_blockIjEvPT_S1_j
+
--- /dev/null
+Doug Coleman
--- /dev/null
+! Copyright (C) 2010 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: alien.c-types alien.data alien.strings arrays assocs
+byte-arrays classes.struct combinators cuda.ffi cuda.utils io
+io.encodings.utf8 kernel math.parser prettyprint sequences ;
+IN: cuda.devices
+
+: #cuda-devices ( -- n )
+ int <c-object> [ cuDeviceGetCount cuda-error ] keep *int ;
+
+: n>cuda-device ( n -- device )
+ [ CUdevice <c-object> ] dip [ cuDeviceGet cuda-error ] 2keep drop *int ;
+
+: enumerate-cuda-devices ( -- devices )
+ #cuda-devices iota [ n>cuda-device ] map ;
+
+: cuda-device-properties ( device -- properties )
+ [ CUdevprop <c-object> ] dip
+ [ cuDeviceGetProperties cuda-error ] 2keep drop
+ CUdevprop memory>struct ;
+
+: cuda-devices ( -- assoc )
+ enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ;
+
+: cuda-device-name ( n -- string )
+ [ 256 [ <byte-array> ] keep ] dip
+ [ cuDeviceGetName cuda-error ]
+ [ 2drop utf8 alien>string ] 3bi ;
+
+: cuda-device-capability ( n -- pair )
+ [ int <c-object> int <c-object> ] dip
+ [ cuDeviceComputeCapability cuda-error ]
+ [ drop [ *int ] bi@ ] 3bi 2array ;
+
+: cuda-device-memory ( n -- bytes )
+ [ uint <c-object> ] dip
+ [ cuDeviceTotalMem cuda-error ]
+ [ drop *uint ] 2bi ;
+
+: cuda-device-attribute ( attribute dev -- n )
+ [ int <c-object> ] 2dip
+ [ cuDeviceGetAttribute cuda-error ]
+ [ 2drop *int ] 3bi ;
+
+: cuda-device. ( n -- )
+ {
+ [ "Device: " write number>string print ]
+ [ "Name: " write cuda-device-name print ]
+ [ "Memory: " write cuda-device-memory number>string print ]
+ [
+ "Capability: " write
+ cuda-device-capability [ number>string ] map " " join print
+ ]
+ [ "Properties: " write cuda-device-properties . ]
+ [
+ "CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: " write
+ CU_DEVICE_ATTRIBUTE_GPU_OVERLAP swap
+ cuda-device-attribute number>string print
+ ]
+ } cleave ;
+
+: cuda. ( -- )
+ "CUDA Version: " write cuda-version number>string print nl
+ #cuda-devices iota [ nl ] [ cuda-device. ] interleave ;
+
+++ /dev/null
-/*
- World using CUDA
-**
-** The string "Hello World!" is mangled then restored using a common CUDA idiom
-**
-** Byron Galbraith
-** 2009-02-18
-*/
-#include <cuda.h>
-#include <stdio.h>
-
-// Prototypes
-extern "C" __global__ void helloWorld(char*);
-
-// Host function
-int
-main(int argc, char** argv)
-{
- int i;
-
- // desired output
- char str[] = "Hello World!";
-
- // mangle contents of output
- // the null character is left intact for simplicity
- for(i = 0; i < 12; i++)
- str[i] -= i;
-
- // allocate memory on the device
- char *d_str;
- size_t size = sizeof(str);
- cudaMalloc((void**)&d_str, size);
-
- // copy the string to the device
- cudaMemcpy(d_str, str, size, cudaMemcpyHostToDevice);
-
- // set the grid and block sizes
- dim3 dimGrid(2); // one block per word
- dim3 dimBlock(6); // one thread per character
-
- // invoke the kernel
- helloWorld<<< dimGrid, dimBlock >>>(d_str);
-
- // retrieve the results from the device
- cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost);
-
- // free up the allocated memory on the device
- cudaFree(d_str);
-
- // everyone's favorite part
- printf("%s\n", str);
-
- return 0;
-}
-
-// Device kernel
-__global__ void
-helloWorld(char* str)
-{
- // determine where in the thread grid we are
- int idx = blockIdx.x * blockDim.x + threadIdx.x;
-
- // unmangle output
- str[idx] += idx;
-}
+++ /dev/null
- .version 1.4
- .target sm_10, map_f64_to_f32
- // compiled with /usr/local/cuda/bin/../open64/lib//be
- // nvopencc 3.0 built on 2010-03-11
-
- //-----------------------------------------------------------
- // Compiling /tmp/tmpxft_00000eab_00000000-7_hello.cpp3.i (/var/folders/KD/KDnx4D80Eh0fsORqNrFWBE+++TI/-Tmp-/ccBI#.AYqbdQ)
- //-----------------------------------------------------------
-
- //-----------------------------------------------------------
- // Options:
- //-----------------------------------------------------------
- // Target:ptx, ISA:sm_10, Endian:little, Pointer Size:32
- // -O3 (Optimization level)
- // -g0 (Debug level)
- // -m2 (Report advisories)
- //-----------------------------------------------------------
-
- .file 1 "<command-line>"
- .file 2 "/tmp/tmpxft_00000eab_00000000-6_hello.cudafe2.gpu"
- .file 3 "/usr/lib/gcc/i686-apple-darwin10/4.2.1/include/stddef.h"
- .file 4 "/usr/local/cuda/bin/../include/crt/device_runtime.h"
- .file 5 "/usr/local/cuda/bin/../include/host_defines.h"
- .file 6 "/usr/local/cuda/bin/../include/builtin_types.h"
- .file 7 "/usr/local/cuda/bin/../include/device_types.h"
- .file 8 "/usr/local/cuda/bin/../include/driver_types.h"
- .file 9 "/usr/local/cuda/bin/../include/texture_types.h"
- .file 10 "/usr/local/cuda/bin/../include/vector_types.h"
- .file 11 "/usr/local/cuda/bin/../include/device_launch_parameters.h"
- .file 12 "/usr/local/cuda/bin/../include/crt/storage_class.h"
- .file 13 "/usr/include/i386/_types.h"
- .file 14 "/usr/include/time.h"
- .file 15 "/usr/local/cuda/bin/../include/texture_fetch_functions.h"
- .file 16 "/usr/local/cuda/bin/../include/common_functions.h"
- .file 17 "/usr/local/cuda/bin/../include/crt/func_macro.h"
- .file 18 "/usr/local/cuda/bin/../include/math_functions.h"
- .file 19 "/usr/local/cuda/bin/../include/device_functions.h"
- .file 20 "/usr/local/cuda/bin/../include/math_constants.h"
- .file 21 "/usr/local/cuda/bin/../include/sm_11_atomic_functions.h"
- .file 22 "/usr/local/cuda/bin/../include/sm_12_atomic_functions.h"
- .file 23 "/usr/local/cuda/bin/../include/sm_13_double_functions.h"
- .file 24 "/usr/local/cuda/bin/../include/common_types.h"
- .file 25 "/usr/local/cuda/bin/../include/sm_20_atomic_functions.h"
- .file 26 "/usr/local/cuda/bin/../include/sm_20_intrinsics.h"
- .file 27 "/usr/local/cuda/bin/../include/math_functions_dbl_ptx1.h"
- .file 28 "hello.cu"
-
-
- .entry helloWorld (
- .param .u32 __cudaparm_helloWorld_str)
- {
- .reg .u16 %rh<4>;
- .reg .u32 %r<9>;
- .loc 28 58 0
-$LBB1_helloWorld:
- .loc 28 64 0
- mov.u16 %rh1, %ctaid.x;
- mov.u16 %rh2, %ntid.x;
- mul.wide.u16 %r1, %rh1, %rh2;
- cvt.u32.u16 %r2, %tid.x;
- add.u32 %r3, %r2, %r1;
- ld.param.u32 %r4, [__cudaparm_helloWorld_str];
- add.u32 %r5, %r4, %r3;
- ld.global.s8 %r6, [%r5+0];
- add.s32 %r7, %r6, %r3;
- st.global.s8 [%r5+0], %r7;
- .loc 28 65 0
- exit;
-$LDWend_helloWorld:
- } // helloWorld
-
--- /dev/null
+Doug Coleman
--- /dev/null
+! Copyright (C) 2010 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors alien alien.data assocs byte-arrays cuda.ffi
+cuda.utils destructors io.encodings.string io.encodings.utf8
+kernel locals namespaces sequences ;
+QUALIFIED-WITH: alien.c-types a
+IN: cuda.memory
+
+SYMBOL: cuda-memory-hashtable
+
+TUPLE: cuda-memory < disposable ptr length ;
+
+: <cuda-memory> ( ptr length -- obj )
+ cuda-memory new-disposable
+ swap >>length
+ swap >>ptr ;
+
+: add-cuda-memory ( obj -- obj )
+ dup dup ptr>> cuda-memory-hashtable get set-at ;
+
+: delete-cuda-memory ( obj -- )
+ cuda-memory-hashtable delete-at ;
+
+ERROR: invalid-cuda-memory ptr ;
+
+: cuda-memory-length ( cuda-memory -- n )
+ ptr>> cuda-memory-hashtable get ?at [
+ length>>
+ ] [
+ invalid-cuda-memory
+ ] if ;
+
+M: cuda-memory byte-length length>> ;
+
+: cuda-malloc ( n -- ptr )
+ [ CUdeviceptr <c-object> ] dip
+ [ cuMemAlloc cuda-error ] 2keep
+ [ a:*int ] dip <cuda-memory> add-cuda-memory ;
+
+: cuda-free* ( ptr -- )
+ cuMemFree cuda-error ;
+
+M: cuda-memory dispose ( ptr -- )
+ ptr>> cuda-free* ;
+
+: memcpy-device>device ( dest-ptr src-ptr count -- )
+ cuMemcpyDtoD cuda-error ;
+
+: memcpy-device>array ( dest-array dest-index src-ptr count -- )
+ cuMemcpyDtoA cuda-error ;
+
+: memcpy-array>device ( dest-ptr src-array src-index count -- )
+ cuMemcpyAtoD cuda-error ;
+
+: memcpy-array>host ( dest-ptr src-array src-index count -- )
+ cuMemcpyAtoH cuda-error ;
+
+: memcpy-host>array ( dest-array dest-index src-ptr count -- )
+ cuMemcpyHtoA cuda-error ;
+
+: memcpy-array>array ( dest-array dest-index src-array src-ptr count -- )
+ cuMemcpyAtoA cuda-error ;
+
+: host>device ( dest-ptr src-ptr -- )
+ [ ptr>> ] dip dup length cuMemcpyHtoD cuda-error ;
+
+:: device>host ( ptr -- seq )
+ ptr byte-length <byte-array>
+ [ ptr [ ptr>> ] [ byte-length ] bi cuMemcpyDtoH cuda-error ] keep ;
+
+: malloc-device-string ( string -- n )
+ utf8 encode
+ [ length cuda-malloc ] keep
+ [ host>device ] [ drop ] 2bi ;
+++ /dev/null
-#include <stdio.h>
-#include <stdlib.h>
-#include <cuda_runtime.h>
-
-static const int LOG_BANK_COUNT = 4;
-
-static inline __device__ __host__ unsigned shared_offset(unsigned i)
-{
- return i + (i >> LOG_BANK_COUNT);
-}
-
-static inline __device__ __host__ unsigned offset_a(unsigned offset, unsigned i)
-{
- return shared_offset(offset * (2*i + 1) - 1);
-}
-
-static inline __device__ __host__ unsigned offset_b(unsigned offset, unsigned i)
-{
- return shared_offset(offset * (2*i + 2) - 1);
-}
-
-static inline __device__ __host__ unsigned lpot(unsigned x)
-{
- --x; x |= x>>1; x|=x>>2; x|=x>>4; x|=x>>8; x|=x>>16; return ++x;
-}
-
-template<typename T>
-__global__ void prefix_sum_block(T *in, T *out, unsigned n)
-{
- extern __shared__ T temp[];
-
- int idx = threadIdx.x;
- int blocksize = blockDim.x;
-
- temp[shared_offset(idx )] = (idx < n) ? in[idx ] : 0;
- temp[shared_offset(idx + blocksize)] = (idx + blocksize < n) ? in[idx + blocksize] : 0;
-
- int offset, d;
- for (offset = 1, d = blocksize; d > 0; d >>= 1, offset <<= 1) {
- __syncthreads();
- if (idx < d) {
- unsigned a = offset_a(offset, idx), b = offset_b(offset, idx);
- temp[b] += temp[a];
- }
- }
-
- if (idx == 0) temp[shared_offset(blocksize*2 - 1)] = 0;
-
- for (d = 1; d <= blocksize; d <<= 1) {
- offset >>= 1;
- __syncthreads();
-
- if (idx < d) {
- unsigned a = offset_a(offset, idx), b = offset_b(offset, idx);
- unsigned t = temp[a];
- temp[a] = temp[b];
- temp[b] += t;
- }
- }
- __syncthreads();
-
- if (idx < n) out[idx ] = temp[shared_offset(idx )];
- if (idx + blocksize < n) out[idx + blocksize] = temp[shared_offset(idx + blocksize)];
-}
-
-template<typename T>
-void prefix_sum(T *in, T *out, unsigned n)
-{
- char *device_values;
- unsigned n_lpot = lpot(n);
- size_t n_pitch;
-
- cudaError_t error = cudaMallocPitch((void**)&device_values, &n_pitch, sizeof(T)*n, 2);
- if (error != 0) {
- printf("error %u allocating width %lu height %u\n", error, sizeof(T)*n, 2);
- exit(1);
- }
-
- cudaMemcpy(device_values, in, sizeof(T)*n, cudaMemcpyHostToDevice);
-
- prefix_sum_block<<<1, n_lpot/2, shared_offset(n_lpot)*sizeof(T)>>>
- ((T*)device_values, (T*)(device_values + n_pitch), n);
-
- cudaMemcpy(out, device_values + n_pitch, sizeof(T)*n, cudaMemcpyDeviceToHost);
- cudaFree(device_values);
-}
-
-int main()
-{
- sranddev();
-
- static unsigned in_values[1024], out_values[1024];
-
- for (int i = 0; i < 1024; ++i)
- in_values[i] = rand() >> 21;
-
- prefix_sum(in_values, out_values, 1024);
-
- for (int i = 0; i < 1024; ++i)
- printf("%5d => %5d\n", in_values[i], out_values[i]);
-
- return 0;
-}
+++ /dev/null
- .version 1.4
- .target sm_10, map_f64_to_f32
- // compiled with /usr/local/cuda/bin/../open64/lib//be
- // nvopencc 3.0 built on 2010-03-11
-
- //-----------------------------------------------------------
- // Compiling /tmp/tmpxft_00000236_00000000-7_prefix-sum.cpp3.i (/var/folders/K6/K6oI14wZ2RWhSE+BYqTjA++++TI/-Tmp-/ccBI#.0ATpGM)
- //-----------------------------------------------------------
-
- //-----------------------------------------------------------
- // Options:
- //-----------------------------------------------------------
- // Target:ptx, ISA:sm_10, Endian:little, Pointer Size:32
- // -O3 (Optimization level)
- // -g0 (Debug level)
- // -m2 (Report advisories)
- //-----------------------------------------------------------
-
- .file 1 "<command-line>"
- .file 2 "/tmp/tmpxft_00000236_00000000-6_prefix-sum.cudafe2.gpu"
- .file 3 "/usr/lib/gcc/i686-apple-darwin10/4.2.1/include/stddef.h"
- .file 4 "/usr/local/cuda/bin/../include/crt/device_runtime.h"
- .file 5 "/usr/local/cuda/bin/../include/host_defines.h"
- .file 6 "/usr/local/cuda/bin/../include/builtin_types.h"
- .file 7 "/usr/local/cuda/bin/../include/device_types.h"
- .file 8 "/usr/local/cuda/bin/../include/driver_types.h"
- .file 9 "/usr/local/cuda/bin/../include/texture_types.h"
- .file 10 "/usr/local/cuda/bin/../include/vector_types.h"
- .file 11 "/usr/local/cuda/bin/../include/device_launch_parameters.h"
- .file 12 "/usr/local/cuda/bin/../include/crt/storage_class.h"
- .file 13 "/usr/include/i386/_types.h"
- .file 14 "/usr/include/time.h"
- .file 15 "prefix-sum.cu"
- .file 16 "/usr/local/cuda/bin/../include/common_functions.h"
- .file 17 "/usr/local/cuda/bin/../include/crt/func_macro.h"
- .file 18 "/usr/local/cuda/bin/../include/math_functions.h"
- .file 19 "/usr/local/cuda/bin/../include/device_functions.h"
- .file 20 "/usr/local/cuda/bin/../include/math_constants.h"
- .file 21 "/usr/local/cuda/bin/../include/sm_11_atomic_functions.h"
- .file 22 "/usr/local/cuda/bin/../include/sm_12_atomic_functions.h"
- .file 23 "/usr/local/cuda/bin/../include/sm_13_double_functions.h"
- .file 24 "/usr/local/cuda/bin/../include/common_types.h"
- .file 25 "/usr/local/cuda/bin/../include/sm_20_atomic_functions.h"
- .file 26 "/usr/local/cuda/bin/../include/sm_20_intrinsics.h"
- .file 27 "/usr/local/cuda/bin/../include/texture_fetch_functions.h"
- .file 28 "/usr/local/cuda/bin/../include/math_functions_dbl_ptx1.h"
-
- .extern .shared .align 4 .b8 temp[];
-
- .entry _Z16prefix_sum_blockIjEvPT_S1_j (
- .param .u32 __cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_in,
- .param .u32 __cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_out,
- .param .u32 __cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_n)
- {
- .reg .u32 %r<81>;
- .reg .pred %p<11>;
- .loc 15 28 0
-$LBB1__Z16prefix_sum_blockIjEvPT_S1_j:
- ld.param.u32 %r1, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_n];
- cvt.s32.u16 %r2, %tid.x;
- setp.lt.u32 %p1, %r2, %r1;
- @!%p1 bra $Lt_0_7938;
- .loc 15 35 0
- ld.param.u32 %r3, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_in];
- mul24.lo.u32 %r4, %r2, 4;
- add.u32 %r5, %r3, %r4;
- ld.global.u32 %r6, [%r5+0];
- bra.uni $Lt_0_7682;
-$Lt_0_7938:
- mov.u32 %r6, 0;
-$Lt_0_7682:
- mov.u32 %r7, temp;
- shr.u32 %r8, %r2, 4;
- add.u32 %r9, %r2, %r8;
- mul.lo.u32 %r10, %r9, 4;
- add.u32 %r11, %r10, %r7;
- st.shared.u32 [%r11+0], %r6;
- cvt.s32.u16 %r12, %ntid.x;
- add.s32 %r13, %r12, %r2;
- .loc 15 28 0
- ld.param.u32 %r1, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_n];
- .loc 15 35 0
- setp.lt.u32 %p2, %r13, %r1;
- @!%p2 bra $Lt_0_8450;
- .loc 15 36 0
- ld.param.u32 %r14, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_in];
- mul.lo.u32 %r15, %r13, 4;
- add.u32 %r16, %r14, %r15;
- ld.global.u32 %r17, [%r16+0];
- bra.uni $Lt_0_8194;
-$Lt_0_8450:
- mov.u32 %r17, 0;
-$Lt_0_8194:
- shr.u32 %r18, %r13, 4;
- add.u32 %r19, %r13, %r18;
- mul.lo.u32 %r20, %r19, 4;
- add.u32 %r21, %r20, %r7;
- st.shared.u32 [%r21+0], %r17;
- .loc 15 39 0
- mov.s32 %r22, %r12;
- mov.u32 %r23, 0;
- setp.le.s32 %p3, %r12, %r23;
- mov.s32 %r24, 1;
- @%p3 bra $Lt_0_13314;
-$Lt_0_9218:
- //<loop> Loop body line 39, nesting depth: 1, estimated iterations: unknown
- .loc 15 40 0
- bar.sync 0;
- setp.le.s32 %p4, %r22, %r2;
- @%p4 bra $Lt_0_9474;
- //<loop> Part of loop body line 39, head labeled $Lt_0_9218
- .loc 15 43 0
- mul24.lo.u32 %r25, %r2, 2;
- add.u32 %r26, %r25, 1;
- add.u32 %r27, %r25, 2;
- mul.lo.u32 %r28, %r24, %r26;
- mul.lo.u32 %r29, %r24, %r27;
- sub.u32 %r30, %r29, 1;
- shr.u32 %r31, %r30, 4;
- add.u32 %r32, %r29, %r31;
- mul.lo.u32 %r33, %r32, 4;
- add.u32 %r34, %r33, %r7;
- ld.shared.u32 %r35, [%r34+-4];
- sub.u32 %r36, %r28, 1;
- shr.u32 %r37, %r36, 4;
- add.u32 %r38, %r28, %r37;
- mul.lo.u32 %r39, %r38, 4;
- add.u32 %r40, %r7, %r39;
- ld.shared.u32 %r41, [%r40+-4];
- add.u32 %r42, %r35, %r41;
- st.shared.u32 [%r34+-4], %r42;
-$Lt_0_9474:
- //<loop> Part of loop body line 39, head labeled $Lt_0_9218
- .loc 15 39 0
- shr.s32 %r22, %r22, 1;
- shl.b32 %r24, %r24, 1;
- mov.u32 %r43, 0;
- setp.gt.s32 %p5, %r22, %r43;
- @%p5 bra $Lt_0_9218;
- bra.uni $Lt_0_8706;
-$Lt_0_13314:
-$Lt_0_8706:
- mov.u32 %r44, 0;
- setp.ne.s32 %p6, %r2, %r44;
- @%p6 bra $Lt_0_10242;
- .loc 15 47 0
- mul24.lo.s32 %r45, %r12, 2;
- mov.u32 %r46, 0;
- sub.u32 %r47, %r45, 1;
- shr.u32 %r48, %r47, 4;
- add.u32 %r49, %r45, %r48;
- mul.lo.u32 %r50, %r49, 4;
- add.u32 %r51, %r7, %r50;
- st.shared.u32 [%r51+-4], %r46;
-$Lt_0_10242:
- mov.u32 %r52, 1;
- setp.lt.s32 %p7, %r12, %r52;
- @%p7 bra $Lt_0_10754;
- mov.s32 %r22, 1;
-$Lt_0_11266:
- //<loop> Loop body line 47, nesting depth: 1, estimated iterations: unknown
- .loc 15 50 0
- shr.s32 %r24, %r24, 1;
- .loc 15 51 0
- bar.sync 0;
- setp.le.s32 %p8, %r22, %r2;
- @%p8 bra $Lt_0_11522;
- //<loop> Part of loop body line 47, head labeled $Lt_0_11266
- .loc 15 55 0
- mul24.lo.u32 %r53, %r2, 2;
- add.u32 %r54, %r53, 1;
- mul.lo.u32 %r55, %r24, %r54;
- sub.u32 %r56, %r55, 1;
- shr.u32 %r57, %r56, 4;
- add.u32 %r58, %r55, %r57;
- mul.lo.u32 %r59, %r58, 4;
- add.u32 %r60, %r59, %r7;
- ld.shared.u32 %r61, [%r60+-4];
- .loc 15 56 0
- add.u32 %r62, %r53, 2;
- mul.lo.u32 %r63, %r24, %r62;
- sub.u32 %r64, %r63, 1;
- shr.u32 %r65, %r64, 4;
- add.u32 %r66, %r63, %r65;
- mul.lo.u32 %r67, %r66, 4;
- add.u32 %r68, %r67, %r7;
- ld.shared.u32 %r69, [%r68+-4];
- st.shared.u32 [%r60+-4], %r69;
- .loc 15 57 0
- ld.shared.u32 %r70, [%r68+-4];
- add.u32 %r71, %r70, %r61;
- st.shared.u32 [%r68+-4], %r71;
-$Lt_0_11522:
- //<loop> Part of loop body line 47, head labeled $Lt_0_11266
- .loc 15 49 0
- shl.b32 %r22, %r22, 1;
- setp.le.s32 %p9, %r22, %r12;
- @%p9 bra $Lt_0_11266;
-$Lt_0_10754:
- .loc 15 60 0
- bar.sync 0;
- @!%p1 bra $Lt_0_12290;
- .loc 15 62 0
- ld.shared.u32 %r72, [%r11+0];
- ld.param.u32 %r73, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_out];
- mul24.lo.u32 %r74, %r2, 4;
- add.u32 %r75, %r73, %r74;
- st.global.u32 [%r75+0], %r72;
-$Lt_0_12290:
- @!%p2 bra $Lt_0_12802;
- .loc 15 63 0
- ld.shared.u32 %r76, [%r21+0];
- ld.param.u32 %r77, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_out];
- mul.lo.u32 %r78, %r13, 4;
- add.u32 %r79, %r77, %r78;
- st.global.u32 [%r79+0], %r76;
-$Lt_0_12802:
- .loc 15 64 0
- exit;
-$LDWend__Z16prefix_sum_blockIjEvPT_S1_j:
- } // _Z16prefix_sum_blockIjEvPT_S1_j
-
--- /dev/null
+USING: cuda.ptx tools.test ;
+IN: cuda.ptx.tests
+
+[ """ .version 2.0
+ .target sm_20
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20, .texmode_independent
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } { texmode .texmode_independent } } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_11, map_f64_to_f32
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target
+ { arch sm_11 }
+ { map_f64_to_f32? t }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_11, map_f64_to_f32, .texmode_independent
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target
+ { arch sm_11 }
+ { map_f64_to_f32? t }
+ { texmode .texmode_independent }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ .global .f32 foo[9000];
+ .extern .align 16 .shared .v4.f32 bar[];
+ .func (.reg .f32 sum) zap (.reg .f32 a, .reg .f32 b)
+ {
+ add.rn.f32 sum, a, b;
+ ret;
+ }
+ .func frob (.align 8 .param .u64 in, .align 8 .param .u64 out, .align 8 .param .u64 len)
+ {
+ ret;
+ }
+ .func twib
+ {
+ ret;
+ }
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ ptx-variable
+ { storage-space .global }
+ { type .f32 }
+ { name "foo" }
+ { dim 9000 }
+ }
+ T{ ptx-variable
+ { extern? t }
+ { align 16 }
+ { storage-space .shared }
+ { type T{ .v4 f .f32 } }
+ { name "bar" }
+ { dim 0 }
+ }
+ T{ ptx-func
+ { return T{ ptx-variable { storage-space .reg } { type .f32 } { name "sum" } } }
+ { name "zap" }
+ { params {
+ T{ ptx-variable { storage-space .reg } { type .f32 } { name "a" } }
+ T{ ptx-variable { storage-space .reg } { type .f32 } { name "b" } }
+ } }
+ { body {
+ T{ add { round .rn } { type .f32 } { dest "sum" } { a "a" } { b "b" } }
+ T{ ret }
+ } }
+ }
+ T{ ptx-func
+ { name "frob" }
+ { params {
+ T{ ptx-variable { align 8 } { storage-space .param } { type .u64 } { name "in" } }
+ T{ ptx-variable { align 8 } { storage-space .param } { type .u64 } { name "out" } }
+ T{ ptx-variable { align 8 } { storage-space .param } { type .u64 } { name "len" } }
+ } }
+ { body {
+ T{ ret }
+ } }
+ }
+ T{ ptx-func
+ { name "twib" }
+ { body {
+ T{ ret }
+ } }
+ }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ abs.s32 a, b;
+ @p abs.s32 a, b;
+ @!p abs.s32 a, b;
+foo: abs.s32 a, b;
+ abs.ftz.f32 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ abs { type .s32 } { dest "a" } { a "b" } }
+ T{ abs
+ { predicate T{ ptx-predicate { variable "p" } } }
+ { type .s32 } { dest "a" } { a "b" }
+ }
+ T{ abs
+ { predicate T{ ptx-predicate { negated? t } { variable "p" } } }
+ { type .s32 } { dest "a" } { a "b" }
+ }
+ T{ abs
+ { label "foo" }
+ { type .s32 } { dest "a" } { a "b" }
+ }
+ T{ abs { type .f32 } { dest "a" } { a "b" } { ftz? t } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ add.s32 a, b, c;
+ add.cc.s32 a, b, c;
+ add.sat.s32 a, b, c;
+ add.ftz.f32 a, b, c;
+ add.ftz.sat.f32 a, b, c;
+ add.rz.sat.f32 a, b, c;
+ add.rz.ftz.sat.f32 a, b, c;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ add { type .s32 } { dest "a" } { a "b" } { b "c" } }
+ T{ add { cc? t } { type .s32 } { dest "a" } { a "b" } { b "c" } }
+ T{ add { sat? t } { type .s32 } { dest "a" } { a "b" } { b "c" } }
+ T{ add { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ add { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ add { round .rz } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ add { round .rz } { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ addc.s32 a, b, c;
+ addc.cc.s32 a, b, c;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ addc { type .s32 } { dest "a" } { a "b" } { b "c" } }
+ T{ addc { cc? t } { type .s32 } { dest "a" } { a "b" } { b "c" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ and.b32 a, b, c;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ and { type .b32 } { dest "a" } { a "b" } { b "c" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ atom.and.u32 a, [b], c;
+ atom.global.or.u32 a, [b], c;
+ atom.shared.cas.u32 a, [b], c, d;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ atom { op .and } { type .u32 } { dest "a" } { a "[b]" } { b "c" } }
+ T{ atom { storage-space .global } { op .or } { type .u32 } { dest "a" } { a "[b]" } { b "c" } }
+ T{ atom { storage-space .shared } { op .cas } { type .u32 } { dest "a" } { a "[b]" } { b "c" } { c "d" } }
+
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ bar.arrive a, b;
+ bar.red.popc.u32 a, b, d;
+ bar.red.popc.u32 a, b, !d;
+ bar.red.popc.u32 a, b, c, !d;
+ bar.sync a;
+ bar.sync a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ bar.arrive { a "a" } { b "b" } }
+ T{ bar.red { op .popc } { type .u32 } { dest "a" } { a "b" } { c "d" } }
+ T{ bar.red { op .popc } { type .u32 } { dest "a" } { a "b" } { c "!d" } }
+ T{ bar.red { op .popc } { type .u32 } { dest "a" } { a "b" } { b "c" } { c "!d" } }
+ T{ bar.sync { a "a" } }
+ T{ bar.sync { a "a" } { b "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ bfe.u32 a, b, c, d;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ bfe { type .u32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ bfi.u32 a, b, c, d, e;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ bfi { type .u32 } { dest "a" } { a "b" } { b "c" } { c "d" } { d "e" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ bfind.u32 a, b;
+ bfind.shiftamt.u32 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ bfind { type .u32 } { dest "a" } { a "b" } }
+ T{ bfind { type .u32 } { shiftamt? t } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ bra foo;
+ bra.uni bar;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ bra { target "foo" } }
+ T{ bra { uni? t } { target "bar" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ brev.b32 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ brev { type .b32 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ brkpt;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ brkpt }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ call foo;
+ call.uni foo;
+ call (a), foo;
+ call (a), foo, (b);
+ call (a), foo, (b, c);
+ call (a), foo, (b, c, d);
+ call foo, (b, c, d);
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ call { target "foo" } }
+ T{ call { uni? t } { target "foo" } }
+ T{ call { return "a" } { target "foo" } }
+ T{ call { return "a" } { target "foo" } { params { "b" } } }
+ T{ call { return "a" } { target "foo" } { params { "b" "c" } } }
+ T{ call { return "a" } { target "foo" } { params { "b" "c" "d" } } }
+ T{ call { target "foo" } { params { "b" "c" "d" } } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ clz.b32 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ clz { type .b32 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ cnot.b32 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ cnot { type .b32 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ copysign.f64 a, b, c;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ copysign { type .f64 } { dest "a" } { a "b" } { b "c" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ cos.approx.f32 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ cos { round .approx } { type .f32 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ cvt.f32.s32 a, b;
+ cvt.s32.f32 a, b;
+ cvt.rp.f32.f64 a, b;
+ cvt.rpi.s32.f32 a, b;
+ cvt.ftz.f32.f64 a, b;
+ cvt.sat.f32.f64 a, b;
+ cvt.ftz.sat.f32.f64 a, b;
+ cvt.rp.ftz.sat.f32.f64 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ cvt { dest-type .f32 } { type .s32 } { dest "a" } { a "b" } }
+ T{ cvt { dest-type .s32 } { type .f32 } { dest "a" } { a "b" } }
+ T{ cvt { round .rp } { dest-type .f32 } { type .f64 } { dest "a" } { a "b" } }
+ T{ cvt { round .rpi } { dest-type .s32 } { type .f32 } { dest "a" } { a "b" } }
+ T{ cvt { ftz? t } { dest-type .f32 } { type .f64 } { dest "a" } { a "b" } }
+ T{ cvt { sat? t } { dest-type .f32 } { type .f64 } { dest "a" } { a "b" } }
+ T{ cvt { ftz? t } { sat? t } { dest-type .f32 } { type .f64 } { dest "a" } { a "b" } }
+ T{ cvt { round .rp } { ftz? t } { sat? t } { dest-type .f32 } { type .f64 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ cvta.global.u64 a, b;
+ cvta.shared.u64 a, b;
+ cvta.to.shared.u64 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ cvta { storage-space .global } { type .u64 } { dest "a" } { a "b" } }
+ T{ cvta { storage-space .shared } { type .u64 } { dest "a" } { a "b" } }
+ T{ cvta { to? t } { storage-space .shared } { type .u64 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ div.u32 a, b, c;
+ div.approx.f32 a, b, c;
+ div.approx.ftz.f32 a, b, c;
+ div.full.f32 a, b, c;
+ div.full.ftz.f32 a, b, c;
+ div.f32 a, b, c;
+ div.rz.f32 a, b, c;
+ div.ftz.f32 a, b, c;
+ div.rz.ftz.f32 a, b, c;
+ div.f64 a, b, c;
+ div.rz.f64 a, b, c;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ div { type .u32 } { dest "a" } { a "b" } { b "c" } }
+ T{ div { round .approx } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ div { round .approx } { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ div { round .full } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ div { round .full } { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ div { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ div { round .rz } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ div { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ div { round .rz } { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ div { type .f64 } { dest "a" } { a "b" } { b "c" } }
+ T{ div { round .rz } { type .f64 } { dest "a" } { a "b" } { b "c" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ ex2.approx.f32 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ ex2 { round .approx } { type .f32 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ exit;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ exit }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ fma.f32 a, b, c, d;
+ fma.sat.f32 a, b, c, d;
+ fma.ftz.f32 a, b, c, d;
+ fma.ftz.sat.f32 a, b, c, d;
+ fma.rz.sat.f32 a, b, c, d;
+ fma.rz.ftz.sat.f32 a, b, c, d;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ fma { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ fma { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ fma { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ fma { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ fma { round .rz } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ fma { round .rz } { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ isspacep.shared a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ isspacep { storage-space .shared } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ ld.u32 a, [b];
+ ld.v2.u32 a, [b];
+ ld.v4.u32 a, [b];
+ ld.v4.u32 {a, b, c, d}, [e];
+ ld.lu.u32 a, [b];
+ ld.const.lu.u32 a, [b];
+ ld.volatile.const[5].u32 a, [b];
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ ld { type .u32 } { dest "a" } { a "[b]" } }
+ T{ ld { type T{ .v2 { of .u32 } } } { dest "a" } { a "[b]" } }
+ T{ ld { type T{ .v4 { of .u32 } } } { dest "a" } { a "[b]" } }
+ T{ ld { type T{ .v4 { of .u32 } } } { dest "{a, b, c, d}" } { a "[e]" } }
+ T{ ld { cache-op .lu } { type .u32 } { dest "a" } { a "[b]" } }
+ T{ ld { storage-space T{ .const } } { cache-op .lu } { type .u32 } { dest "a" } { a "[b]" } }
+ T{ ld { volatile? t } { storage-space T{ .const { bank 5 } } } { type .u32 } { dest "a" } { a "[b]" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ ldu.u32 a, [b];
+ ldu.v2.u32 a, [b];
+ ldu.v4.u32 a, [b];
+ ldu.v4.u32 {a, b, c, d}, [e];
+ ldu.lu.u32 a, [b];
+ ldu.const.lu.u32 a, [b];
+ ldu.volatile.const[5].u32 a, [b];
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ ldu { type .u32 } { dest "a" } { a "[b]" } }
+ T{ ldu { type T{ .v2 { of .u32 } } } { dest "a" } { a "[b]" } }
+ T{ ldu { type T{ .v4 { of .u32 } } } { dest "a" } { a "[b]" } }
+ T{ ldu { type T{ .v4 { of .u32 } } } { dest "{a, b, c, d}" } { a "[e]" } }
+ T{ ldu { cache-op .lu } { type .u32 } { dest "a" } { a "[b]" } }
+ T{ ldu { storage-space T{ .const } } { cache-op .lu } { type .u32 } { dest "a" } { a "[b]" } }
+ T{ ldu { volatile? t } { storage-space T{ .const { bank 5 } } } { type .u32 } { dest "a" } { a "[b]" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ lg2.approx.f32 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ lg2 { round .approx } { type .f32 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ mad.s32 a, b, c, d;
+ mad.lo.s32 a, b, c, d;
+ mad.sat.s32 a, b, c, d;
+ mad.hi.sat.s32 a, b, c, d;
+ mad.ftz.f32 a, b, c, d;
+ mad.ftz.sat.f32 a, b, c, d;
+ mad.rz.sat.f32 a, b, c, d;
+ mad.rz.ftz.sat.f32 a, b, c, d;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ mad { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ mad { mode .lo } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ mad { sat? t } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ mad { mode .hi } { sat? t } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ mad { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ mad { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ mad { round .rz } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ mad { round .rz } { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ mad24.s32 a, b, c, d;
+ mad24.lo.s32 a, b, c, d;
+ mad24.sat.s32 a, b, c, d;
+ mad24.hi.sat.s32 a, b, c, d;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ mad24 { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ mad24 { mode .lo } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ mad24 { sat? t } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ mad24 { mode .hi } { sat? t } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ neg.s32 a, b;
+ neg.f32 a, b;
+ neg.ftz.f32 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ neg { type .s32 } { dest "a" } { a "b" } }
+ T{ neg { type .f32 } { dest "a" } { a "b" } }
+ T{ neg { ftz? t } { type .f32 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ not.b32 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ not { type .b32 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ or.b32 a, b, c;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ or { type .b32 } { dest "a" } { a "b" } { b "c" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ pmevent a;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ pmevent { a "a" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ popc.b64 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ popc { type .b64 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ prefetch.L1 [a];
+ prefetch.local.L2 [a];
+ prefetchu.L1 [a];
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ prefetch { level .L1 } { a "[a]" } }
+ T{ prefetch { storage-space .local } { level .L2 } { a "[a]" } }
+ T{ prefetchu { level .L1 } { a "[a]" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ prmt.b32 a, b, c, d;
+ prmt.b32.f4e a, b, c, d;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ prmt { type .b32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ prmt { type .b32 } { mode .f4e } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ rcp.approx.f32 a, b;
+ rcp.approx.ftz.f32 a, b;
+ rcp.f32 a, b;
+ rcp.rz.f32 a, b;
+ rcp.ftz.f32 a, b;
+ rcp.rz.ftz.f32 a, b;
+ rcp.f64 a, b;
+ rcp.rz.f64 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ rcp { round .approx } { type .f32 } { dest "a" } { a "b" } }
+ T{ rcp { round .approx } { ftz? t } { type .f32 } { dest "a" } { a "b" } }
+ T{ rcp { type .f32 } { dest "a" } { a "b" } }
+ T{ rcp { round .rz } { type .f32 } { dest "a" } { a "b" } }
+ T{ rcp { ftz? t } { type .f32 } { dest "a" } { a "b" } }
+ T{ rcp { round .rz } { ftz? t } { type .f32 } { dest "a" } { a "b" } }
+ T{ rcp { type .f64 } { dest "a" } { a "b" } }
+ T{ rcp { round .rz } { type .f64 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ red.and.u32 [a], b;
+ red.global.and.u32 [a], b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ red { op .and } { type .u32 } { dest "[a]" } { a "b" } }
+ T{ red { storage-space .global } { op .and } { type .u32 } { dest "[a]" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ rsqrt.approx.f32 a, b;
+ rsqrt.approx.ftz.f32 a, b;
+ rsqrt.approx.f64 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ rsqrt { round .approx } { type .f32 } { dest "a" } { a "b" } }
+ T{ rsqrt { round .approx } { ftz? t } { type .f32 } { dest "a" } { a "b" } }
+ T{ rsqrt { round .approx } { type .f64 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ rsqrt.approx.f32 a, b;
+ rsqrt.approx.ftz.f32 a, b;
+ rsqrt.approx.f64 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ rsqrt { round .approx } { type .f32 } { dest "a" } { a "b" } }
+ T{ rsqrt { round .approx } { ftz? t } { type .f32 } { dest "a" } { a "b" } }
+ T{ rsqrt { round .approx } { type .f64 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ sad.u32 a, b, c, d;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ sad { type .u32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ selp.u32 a, b, c, d;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ selp { type .u32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ set.gt.u32.s32 a, b, c;
+ set.gt.ftz.u32.f32 a, b, c;
+ set.gt.and.ftz.u32.f32 a, b, c, d;
+ set.gt.and.ftz.u32.f32 a, b, c, !d;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ set { cmp-op .gt } { dest-type .u32 } { type .s32 } { dest "a" } { a "b" } { b "c" } }
+ T{ set { cmp-op .gt } { ftz? t } { dest-type .u32 } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ set { cmp-op .gt } { bool-op .and } { ftz? t } { dest-type .u32 } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ set { cmp-op .gt } { bool-op .and } { ftz? t } { dest-type .u32 } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "!d" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ setp.gt.s32 a, b, c;
+ setp.gt.s32 a|z, b, c;
+ setp.gt.ftz.f32 a, b, c;
+ setp.gt.and.ftz.f32 a, b, c, d;
+ setp.gt.and.ftz.f32 a, b, c, !d;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ setp { cmp-op .gt } { type .s32 } { dest "a" } { a "b" } { b "c" } }
+ T{ setp { cmp-op .gt } { type .s32 } { dest "a" } { |dest "z" } { a "b" } { b "c" } }
+ T{ setp { cmp-op .gt } { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ setp { cmp-op .gt } { bool-op .and } { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ setp { cmp-op .gt } { bool-op .and } { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "!d" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ shl.b32 a, b, c;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ shl { type .b32 } { dest "a" } { a "b" } { b "c" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ shr.b32 a, b, c;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ shr { type .b32 } { dest "a" } { a "b" } { b "c" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ sin.approx.f32 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ sin { round .approx } { type .f32 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ slct.f32.s32 a, b, c, d;
+ slct.ftz.f32.s32 a, b, c, d;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ slct { dest-type .f32 } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ T{ slct { ftz? t } { dest-type .f32 } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ sqrt.approx.f32 a, b;
+ sqrt.approx.ftz.f32 a, b;
+ sqrt.f32 a, b;
+ sqrt.rz.f32 a, b;
+ sqrt.ftz.f32 a, b;
+ sqrt.rz.ftz.f32 a, b;
+ sqrt.f64 a, b;
+ sqrt.rz.f64 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ sqrt { round .approx } { type .f32 } { dest "a" } { a "b" } }
+ T{ sqrt { round .approx } { ftz? t } { type .f32 } { dest "a" } { a "b" } }
+ T{ sqrt { type .f32 } { dest "a" } { a "b" } }
+ T{ sqrt { round .rz } { type .f32 } { dest "a" } { a "b" } }
+ T{ sqrt { ftz? t } { type .f32 } { dest "a" } { a "b" } }
+ T{ sqrt { round .rz } { ftz? t } { type .f32 } { dest "a" } { a "b" } }
+ T{ sqrt { type .f64 } { dest "a" } { a "b" } }
+ T{ sqrt { round .rz } { type .f64 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ st.u32 [a], b;
+ st.v2.u32 [a], b;
+ st.v4.u32 [a], b;
+ st.v4.u32 [a], {b, c, d, e};
+ st.lu.u32 [a], b;
+ st.local.lu.u32 [a], b;
+ st.volatile.local.u32 [a], b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ st { type .u32 } { dest "[a]" } { a "b" } }
+ T{ st { type T{ .v2 { of .u32 } } } { dest "[a]" } { a "b" } }
+ T{ st { type T{ .v4 { of .u32 } } } { dest "[a]" } { a "b" } }
+ T{ st { type T{ .v4 { of .u32 } } } { dest "[a]" } { a "{b, c, d, e}" } }
+ T{ st { cache-op .lu } { type .u32 } { dest "[a]" } { a "b" } }
+ T{ st { storage-space .local } { cache-op .lu } { type .u32 } { dest "[a]" } { a "b" } }
+ T{ st { volatile? t } { storage-space .local } { type .u32 } { dest "[a]" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ sub.s32 a, b, c;
+ sub.cc.s32 a, b, c;
+ sub.sat.s32 a, b, c;
+ sub.ftz.f32 a, b, c;
+ sub.ftz.sat.f32 a, b, c;
+ sub.rz.sat.f32 a, b, c;
+ sub.rz.ftz.sat.f32 a, b, c;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ sub { type .s32 } { dest "a" } { a "b" } { b "c" } }
+ T{ sub { cc? t } { type .s32 } { dest "a" } { a "b" } { b "c" } }
+ T{ sub { sat? t } { type .s32 } { dest "a" } { a "b" } { b "c" } }
+ T{ sub { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ sub { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ sub { round .rz } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ T{ sub { round .rz } { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ subc.s32 a, b, c;
+ subc.cc.s32 a, b, c;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ subc { type .s32 } { dest "a" } { a "b" } { b "c" } }
+ T{ subc { cc? t } { type .s32 } { dest "a" } { a "b" } { b "c" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ testp.finite.f32 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ testp { op .finite } { type .f32 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ trap;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ trap }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ vote.all.pred a, b;
+ vote.all.pred a, !b;
+ vote.ballot.b32 a, b;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ vote { mode .all } { type .pred } { dest "a" } { a "b" } }
+ T{ vote { mode .all } { type .pred } { dest "a" } { a "!b" } }
+ T{ vote { mode .ballot } { type .b32 } { dest "a" } { a "b" } }
+ } }
+ } ptx>string
+] unit-test
+
+[ """ .version 2.0
+ .target sm_20
+ xor.b32 a, b, c;
+""" ] [
+ T{ ptx
+ { version "2.0" }
+ { target T{ ptx-target { arch sm_20 } } }
+ { body {
+ T{ xor { type .b32 } { dest "a" } { a "b" } { b "c" } }
+ } }
+ } ptx>string
+] unit-test
+
! (c)2010 Joe Groff bsd license
-USING: accessors arrays combinators io kernel math math.parser
-roles sequences strings variants words ;
+USING: accessors arrays combinators io io.streams.string kernel
+math math.parser roles sequences strings variants words ;
FROM: roles => TUPLE: ;
IN: cuda.ptx
{ parameter ?integer }
{ dim dim }
{ initializer ?string } ;
+UNION: ?ptx-variable POSTPONE: f ptx-variable ;
TUPLE: ptx-predicate
{ negated? boolean }
body ;
TUPLE: ptx-func < ptx-entry
- { return ptx-variable } ;
+ { return ?ptx-variable } ;
TUPLE: ptx-directive ;
TUPLE: copysign < ptx-3op-instruction ;
TUPLE: cos <{ ptx-2op-instruction ptx-float-env } ;
TUPLE: cvt < ptx-2op-instruction
- { rounding-mode ?ptx-rounding-mode }
+ { round ?ptx-rounding-mode }
{ ftz? boolean }
{ sat? boolean }
{ dest-type ptx-type } ;
TUPLE: exit < ptx-instruction ;
TUPLE: fma <{ ptx-mad-instruction ptx-float-env } ;
TUPLE: isspacep < ptx-instruction
- { storage-space ?ptx-storage-space }
+ { storage-space ptx-storage-space }
{ dest string }
{ a string } ;
TUPLE: ld < ptx-ldst-instruction ;
GENERIC: ptx-element-label ( elt -- label )
M: object ptx-element-label drop f ;
+GENERIC: ptx-semicolon? ( elt -- ? )
+M: object ptx-semicolon? drop t ;
+M: ptx-target ptx-semicolon? drop f ;
+M: ptx-entry ptx-semicolon? drop f ;
+M: ptx-func ptx-semicolon? drop f ;
+M: .file ptx-semicolon? drop f ;
+M: .loc ptx-semicolon? drop f ;
+
GENERIC: (write-ptx-element) ( elt -- )
: write-ptx-element ( elt -- )
dup ptx-element-label [ write ":" write ] when*
- "\t" write (write-ptx-element)
- ";" print ;
+ "\t" write dup (write-ptx-element)
+ ptx-semicolon? [ ";" print ] [ nl ] if ;
: write-ptx ( ptx -- )
- "\t.version " write dup version>> write ";" print
+ "\t.version " write dup version>> print
dup target>> write-ptx-element
body>> [ write-ptx-element ] each ;
"\t}" write ;
: write-entry ( entry -- )
- dup name>> write " " write
- dup params>> [ write-params ] when* nl
- dup directives>> [ (write-ptx-element) ] each nl
+ dup name>> write
+ dup params>> [ " " write write-params ] when* nl
+ dup directives>> [ (write-ptx-element) nl ] each
dup body>> write-body
drop ;
dup b>> [ ", " write write ] when*
", " write c>> write ;
M: bar.sync (write-ptx-element)
- "bar.arrive " write-insn
+ "bar.sync " write-insn
dup a>> write
dup b>> [ ", " write write ] when*
drop ;
write-2op ;
M: bra (write-ptx-element)
"bra" write-insn
- dup write-uni
- " " write target>> write ;
+ dup write-uni " " write
+ target>> write ;
M: brev (write-ptx-element)
"brev" write-insn
write-2op ;
M: brkpt (write-ptx-element)
"brkpt" write-insn drop ;
M: call (write-ptx-element)
- "call" write-insn " " write
+ "call" write-insn
+ dup write-uni " " write
dup return>> [ "(" write write "), " write ] when*
dup target>> write
dup params>> [ ", (" write ", " join write ")" write ] unless-empty
write-2op ;
M: cvt (write-ptx-element)
"cvt" write-insn
- dup rounding-mode>> (write-ptx-element)
+ dup round>> (write-ptx-element)
dup write-ftz
dup write-sat
dup dest-type>> (write-ptx-element)
" " write a>> write ;
M: prmt (write-ptx-element)
"prmt" write-insn
- dup mode>> (write-ptx-element)
- write-4op ;
+ dup type>> (write-ptx-element)
+ dup mode>> (write-ptx-element) " " write
+ dup dest>> write ", " write
+ dup a>> write ", " write
+ dup b>> write ", " write
+ dup c>> write
+ drop ;
M: rcp (write-ptx-element)
"rcp" write-insn
dup write-float-env
- write-3op ;
+ write-2op ;
M: red (write-ptx-element)
"red" write-insn
dup storage-space>> (write-ptx-element)
"testp" write-insn
dup op>> (write-ptx-element)
write-2op ;
+M: trap (write-ptx-element)
+ "trap" write-insn drop ;
M: vote (write-ptx-element)
"vote" write-insn
dup mode>> (write-ptx-element)
write-2op ;
M: xor (write-ptx-element)
- "or" write-insn
+ "xor" write-insn
write-3op ;
+
+: ptx>string ( ptx -- string )
+ [ write-ptx ] with-string-writer ;
--- /dev/null
+Doug Coleman
--- /dev/null
+! Copyright (C) 2010 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: alien.parser cuda cuda.utils io.backend kernel lexer
+namespaces parser ;
+IN: cuda.syntax
+
+SYNTAX: CUDA-LIBRARY:
+ scan scan normalize-path
+ [ add-cuda-library ]
+ [ drop current-cuda-library set-global ] 2bi ;
+
+SYNTAX: CUDA-FUNCTION:
+ scan [ create-in current-cuda-library get ] [ ] bi
+ ";" scan-c-args drop define-cuda-word ;
+
+: 3<<< ( dim-block dim-grid shared-size -- function-launcher )
+ f function-launcher boa ;
+
+: 4<<< ( dim-block dim-grid shared-size stream -- function-launcher )
+ function-launcher boa ;
--- /dev/null
+Doug Coleman
--- /dev/null
+! Copyright (C) 2010 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors alien.c-types alien.data alien.strings arrays
+assocs byte-arrays classes.struct combinators cuda.ffi io
+io.backend io.encodings.utf8 kernel math.parser namespaces
+prettyprint sequences ;
+IN: cuda.utils
+
+SYMBOL: cuda-device
+SYMBOL: cuda-context
+SYMBOL: cuda-module
+SYMBOL: cuda-function
+SYMBOL: cuda-launcher
+
+SYMBOL: cuda-modules
+SYMBOL: cuda-functions
+
+ERROR: throw-cuda-error n ;
+
+: cuda-error ( n -- )
+ dup CUDA_SUCCESS = [ drop ] [ throw-cuda-error ] if ;
+
+: init-cuda ( -- )
+ 0 cuInit cuda-error ;
+
+: cuda-version ( -- n )
+ int <c-object> [ cuDriverGetVersion cuda-error ] keep *int ;
+
+: get-function-ptr* ( module string -- function )
+ [ CUfunction <c-object> ] 2dip
+ [ cuModuleGetFunction cuda-error ] 3keep 2drop *void* ;
+
+: get-function-ptr ( string -- function )
+ [ cuda-module get ] dip get-function-ptr* ;
+
+: with-cuda-function ( string quot -- )
+ [
+ get-function-ptr* cuda-function set
+ ] dip call ; inline
+
+: create-context ( flags device -- context )
+ [ CUcontext <c-object> ] 2dip
+ [ cuCtxCreate cuda-error ] 3keep 2drop *void* ;
+
+: destroy-context ( context -- ) cuCtxDestroy cuda-error ;
+
+SYMBOL: cuda-libraries
+cuda-libraries [ H{ } clone ] initialize
+
+SYMBOL: current-cuda-library
+
+TUPLE: cuda-library name path handle ;
+
+: <cuda-library> ( name path -- obj )
+ \ cuda-library new
+ swap >>path
+ swap >>name ;
+
+: add-cuda-library ( name path -- )
+ normalize-path <cuda-library>
+ dup name>> cuda-libraries get-global set-at ;
+
+: ?delete-at ( key assoc -- old/key ? )
+ 2dup delete-at* [ 2nip t ] [ 2drop f ] if ; inline
+
+ERROR: no-cuda-library name ;
+
+: load-module ( path -- module )
+ [ CUmodule <c-object> ] dip
+ [ cuModuleLoad cuda-error ] 2keep drop *void* ;
+
+: unload-module ( module -- )
+ cuModuleUnload cuda-error ;
+
+: load-cuda-library ( library -- handle )
+ path>> load-module ;
+
+: lookup-cuda-library ( name -- cuda-library )
+ cuda-libraries get ?at [ no-cuda-library ] unless ;
+
+: remove-cuda-library ( name -- library )
+ cuda-libraries get ?delete-at [ no-cuda-library ] unless ;
+
+: unload-cuda-library ( name -- )
+ remove-cuda-library handle>> unload-module ;
+
+
+: cached-module ( module-name -- alien )
+ lookup-cuda-library
+ cuda-modules get-global [ load-cuda-library ] cache ;
+
+: cached-function ( module-name function-name -- alien )
+ [ cached-module ] dip
+ 2array cuda-functions get [ first2 get-function-ptr* ] cache ;
+
+: launch-function* ( function -- ) cuLaunch cuda-error ;
+
+: launch-function ( -- ) cuda-function get cuLaunch cuda-error ;
+
+: cuda-int* ( function offset value -- )
+ cuParamSeti cuda-error ;
+
+: cuda-int ( offset value -- )
+ [ cuda-function get ] 2dip cuda-int* ;
+
+: cuda-float* ( function offset value -- )
+ cuParamSetf cuda-error ;
+
+: cuda-float ( offset value -- )
+ [ cuda-function get ] 2dip cuda-float* ;
+
+: cuda-vector* ( function offset ptr n -- )
+ cuParamSetv cuda-error ;
+
+: cuda-vector ( offset ptr n -- )
+ [ cuda-function get ] 3dip cuda-vector* ;
+
+: param-size* ( function n -- )
+ cuParamSetSize cuda-error ;
+
+: param-size ( n -- )
+ [ cuda-function get ] dip param-size* ;
+
+: launch-function-grid* ( function width height -- )
+ cuLaunchGrid cuda-error ;
+
+: launch-function-grid ( width height -- )
+ [ cuda-function get ] 2dip
+ cuLaunchGrid cuda-error ;
+
+: function-block-shape* ( function x y z -- )
+ cuFuncSetBlockShape cuda-error ;
+
+: function-block-shape ( x y z -- )
+ [ cuda-function get ] 3dip
+ cuFuncSetBlockShape cuda-error ;
+
+: function-shared-size* ( function n -- )
+ cuFuncSetSharedSize cuda-error ;
+
+: function-shared-size ( n -- )
+ [ cuda-function get ] dip
+ cuFuncSetSharedSize cuda-error ;
M: game-loop dispose
stop-loop ;
-USING: vocabs vocabs.loader ;
+USE: vocabs.loader
-"prettyprint" "game.loop.prettyprint" require-when
+{ "game.loop" "prettyprint" } "game.loop.prettyprint" require-when
[ world>> ] [ program>> instances>> ] [ ] tri ?delete-at
reset-memos ;
-"prettyprint" "gpu.shaders.prettyprint" require-when
+{ "gpu.shaders" "prettyprint" } "gpu.shaders.prettyprint" require-when
--- /dev/null
+Doug Coleman
--- /dev/null
+Doug Coleman
--- /dev/null
+! Copyright (C) 2010 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: alien.syntax core-foundation core-foundation.strings
+javascriptcore.ffi ;
+IN: javascriptcore.core-foundation
+
+FUNCTION: JSStringRef JSStringCreateWithCFString ( CFStringRef string ) ;
+
+FUNCTION: CFStringRef JSStringCopyCFString ( CFAllocatorRef alloc, JSStringRef string ) ;
+
+
--- /dev/null
+Doug Coleman
--- /dev/null
+! Copyright (C) 2010 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: alien alien.c-types alien.libraries alien.syntax
+classes.struct combinators io.encodings.utf8 system ;
+IN: javascriptcore.ffi
+
+<<
+"javascriptcore" {
+ { [ os macosx? ] [ "/System/Library/Frameworks/JavaScriptCore.framework/Versions/Current/JavaScriptCore" ] }
+ ! { [ os winnt? ] [ "javascriptcore.dll" ] }
+ ! { [ os unix? ] [ "libsqlite3.so" ] }
+ } cond cdecl add-library
+>>
+
+LIBRARY: javascriptcore
+
+TYPEDEF: void* JSContextGroupRef
+TYPEDEF: void* JSContextRef
+TYPEDEF: void* JSGlobalContextRef
+TYPEDEF: void* JSStringRef
+TYPEDEF: void* JSClassRef
+TYPEDEF: void* JSPropertyNameArrayRef
+TYPEDEF: void* JSPropertyNameAccumulatorRef
+TYPEDEF: void* JSValueRef
+TYPEDEF: void* JSObjectRef
+TYPEDEF: void* JSObjectInitializeCallback
+TYPEDEF: void* JSObjectFinalizeCallback
+TYPEDEF: void* JSObjectHasPropertyCallback
+TYPEDEF: void* JSObjectGetPropertyCallback
+TYPEDEF: void* JSObjectSetPropertyCallback
+TYPEDEF: void* JSObjectDeletePropertyCallback
+TYPEDEF: void* JSObjectGetPropertyNamesCallback
+TYPEDEF: void* JSObjectCallAsFunctionCallback
+TYPEDEF: void* JSObjectCallAsConstructorCallback
+TYPEDEF: void* JSObjectHasInstanceCallback
+TYPEDEF: void* JSObjectConvertToTypeCallback
+TYPEDEF: uint unsigned
+TYPEDEF: ushort JSChar
+! char[utf16n] for strings
+
+C-ENUM: JSPropertyAttributes
+ { kJSPropertyAttributeNone 0 }
+ { kJSPropertyAttributeReadOnly 2 }
+ { kJSPropertyAttributeDontEnum 4 }
+ { kJSPropertyAttributeDontDelete 8 } ;
+
+C-ENUM: JSClassAttributes
+ { kJSClassAttributeNone 0 }
+ { kJSClassAttributeNoAutomaticPrototype 2 } ;
+
+C-ENUM: JSType
+ kJSTypeUndefined,
+ kJSTypeNull,
+ kJSTypeBoolean,
+ kJSTypeNumber,
+ kJSTypeString,
+ kJSTypeObject ;
+
+STRUCT: JSStaticValue
+ { name c-string }
+ { getProperty JSObjectGetPropertyCallback }
+ { setProperty JSObjectSetPropertyCallback }
+ { attributes JSPropertyAttributes } ;
+
+STRUCT: JSStaticFunction
+ { name c-string }
+ { callAsFunction JSObjectCallAsFunctionCallback } ;
+
+STRUCT: JSClassDefinition
+ { version int }
+ { attributes JSClassAttributes }
+ { className c-string }
+ { parentClass JSClassRef }
+ { staticValues JSStaticValue* }
+ { staticFunctions JSStaticFunction* }
+ { initialize JSObjectInitializeCallback }
+ { finalize JSObjectFinalizeCallback }
+ { hasProperty JSObjectHasPropertyCallback }
+ { getProperty JSObjectGetPropertyCallback }
+ { setProperty JSObjectSetPropertyCallback }
+ { deleteProperty JSObjectDeletePropertyCallback }
+ { getPropertyNames JSObjectGetPropertyNamesCallback }
+ { callAsFunction JSObjectCallAsFunctionCallback }
+ { callAsConstructor JSObjectCallAsConstructorCallback }
+ { hasInstance JSObjectHasInstanceCallback }
+ { convertToType JSObjectConvertToTypeCallback } ;
+
+ALIAS: kJSClassDefinitionEmpty JSClassDefinition
+
+FUNCTION: JSValueRef JSEvaluateScript (
+ JSContextRef ctx,
+ JSStringRef script,
+ JSObjectRef thisObject,
+ JSStringRef sourceURL,
+ int startingLineNumber,
+ JSValueRef* exception ) ;
+
+FUNCTION: bool JSCheckScriptSyntax (
+ JSContextRef ctx,
+ JSStringRef script,
+ JSStringRef sourceURL,
+ int startingLineNumber,
+ JSValueRef* exception ) ;
+
+FUNCTION: void JSGarbageCollect
+ ( JSContextRef ctx ) ;
+
+FUNCTION: JSContextGroupRef JSContextGroupCreate
+ ( ) ;
+
+FUNCTION: JSContextGroupRef JSContextGroupRetain
+ ( JSContextGroupRef group ) ;
+
+FUNCTION: void JSContextGroupRelease
+ ( JSContextGroupRef group ) ;
+
+FUNCTION: JSGlobalContextRef JSGlobalContextCreate
+ ( JSClassRef globalObjectClass ) ;
+
+FUNCTION: JSGlobalContextRef JSGlobalContextCreateInGroup (
+ JSContextGroupRef group,
+ JSClassRef globalObjectClass ) ;
+
+FUNCTION: JSGlobalContextRef JSGlobalContextRetain
+ ( JSGlobalContextRef ctx ) ;
+
+FUNCTION: void JSGlobalContextRelease
+ ( JSGlobalContextRef ctx ) ;
+
+FUNCTION: JSObjectRef JSContextGetGlobalObject
+ ( JSContextRef ctx ) ;
+
+FUNCTION: JSContextGroupRef JSContextGetGroup
+ ( JSContextRef ctx ) ;
+
+FUNCTION: JSClassRef JSClassCreate
+ ( JSClassDefinition* definition ) ;
+
+FUNCTION: JSClassRef JSClassRetain
+ ( JSClassRef jsClass ) ;
+
+FUNCTION: void JSClassRelease
+ ( JSClassRef jsClass ) ;
+
+FUNCTION: JSObjectRef JSObjectMake
+ ( JSContextRef ctx,
+ JSClassRef jsClass, void* data ) ;
+
+FUNCTION: JSObjectRef JSObjectMakeFunctionWithCallback ( JSContextRef ctx, JSStringRef name, JSObjectCallAsFunctionCallback callAsFunction ) ;
+
+FUNCTION: JSObjectRef JSObjectMakeConstructor ( JSContextRef ctx, JSClassRef jsClass, JSObjectCallAsConstructorCallback callAsConstructor ) ;
+
+FUNCTION: JSObjectRef JSObjectMakeArray ( JSContextRef ctx, size_t argumentCount, JSValueRef arguments[], JSValueRef* exception ) ;
+
+FUNCTION: JSObjectRef JSObjectMakeDate ( JSContextRef ctx, size_t argumentCount, JSValueRef arguments[], JSValueRef* exception ) ;
+
+FUNCTION: JSObjectRef JSObjectMakeError ( JSContextRef ctx, size_t argumentCount, JSValueRef arguments[], JSValueRef* exception ) ;
+
+FUNCTION: JSObjectRef JSObjectMakeRegExp ( JSContextRef ctx, size_t argumentCount, JSValueRef arguments[], JSValueRef* exception ) ;
+
+FUNCTION: JSObjectRef JSObjectMakeFunction ( JSContextRef ctx, JSStringRef name, unsigned parameterCount, JSStringRef parameterNames[], JSStringRef body, JSStringRef sourceURL, int startingLineNumber, JSValueRef* exception ) ;
+
+FUNCTION: JSValueRef JSObjectGetPrototype ( JSContextRef ctx, JSObjectRef object ) ;
+
+FUNCTION: void JSObjectSetPrototype ( JSContextRef ctx, JSObjectRef object, JSValueRef value ) ;
+
+FUNCTION: bool JSObjectHasProperty ( JSContextRef ctx, JSObjectRef object, JSStringRef propertyName ) ;
+
+FUNCTION: JSValueRef JSObjectGetProperty ( JSContextRef ctx, JSObjectRef object, JSStringRef propertyName, JSValueRef* exception ) ;
+
+FUNCTION: void JSObjectSetProperty ( JSContextRef ctx, JSObjectRef object, JSStringRef propertyName, JSValueRef value, JSPropertyAttributes attributes, JSValueRef* exception ) ;
+
+FUNCTION: bool JSObjectDeleteProperty ( JSContextRef ctx, JSObjectRef object, JSStringRef propertyName, JSValueRef* exception ) ;
+
+FUNCTION: JSValueRef JSObjectGetPropertyAtIndex ( JSContextRef ctx, JSObjectRef object, unsigned propertyIndex, JSValueRef* exception ) ;
+
+FUNCTION: void JSObjectSetPropertyAtIndex ( JSContextRef ctx, JSObjectRef object, unsigned propertyIndex, JSValueRef value, JSValueRef* exception ) ;
+
+FUNCTION: void* JSObjectGetPrivate ( JSObjectRef object ) ;
+
+FUNCTION: bool JSObjectSetPrivate ( JSObjectRef object, void* data ) ;
+
+FUNCTION: bool JSObjectIsFunction ( JSContextRef ctx, JSObjectRef object ) ;
+
+FUNCTION: JSValueRef JSObjectCallAsFunction ( JSContextRef ctx, JSObjectRef object, JSObjectRef thisObject, size_t argumentCount, JSValueRef arguments[], JSValueRef* exception ) ;
+
+FUNCTION: bool JSObjectIsConstructor ( JSContextRef ctx, JSObjectRef object ) ;
+
+FUNCTION: JSObjectRef JSObjectCallAsConstructor ( JSContextRef ctx, JSObjectRef object, size_t argumentCount, JSValueRef arguments[], JSValueRef* exception ) ;
+
+FUNCTION: JSPropertyNameArrayRef JSObjectCopyPropertyNames ( JSContextRef ctx, JSObjectRef object ) ;
+
+FUNCTION: JSPropertyNameArrayRef JSPropertyNameArrayRetain ( JSPropertyNameArrayRef array ) ;
+
+FUNCTION: void JSPropertyNameArrayRelease ( JSPropertyNameArrayRef array ) ;
+
+FUNCTION: size_t JSPropertyNameArrayGetCount ( JSPropertyNameArrayRef array ) ;
+
+FUNCTION: JSStringRef JSPropertyNameArrayGetNameAtIndex ( JSPropertyNameArrayRef array, size_t index ) ;
+
+FUNCTION: void JSPropertyNameAccumulatorAddName ( JSPropertyNameAccumulatorRef accumulator, JSStringRef propertyName ) ;
+
+FUNCTION: JSStringRef JSStringCreateWithCharacters ( JSChar* chars, size_t numChars ) ;
+
+FUNCTION: JSStringRef JSStringCreateWithUTF8CString ( c-string[utf8] string ) ;
+
+FUNCTION: JSStringRef JSStringRetain ( JSStringRef string ) ;
+
+FUNCTION: void JSStringRelease ( JSStringRef string ) ;
+
+FUNCTION: size_t JSStringGetLength ( JSStringRef string ) ;
+
+FUNCTION: JSChar* JSStringGetCharactersPtr ( JSStringRef string ) ;
+
+FUNCTION: size_t JSStringGetMaximumUTF8CStringSize ( JSStringRef string ) ;
+
+FUNCTION: size_t JSStringGetUTF8CString ( JSStringRef string, char* buffer, size_t bufferSize ) ;
+
+FUNCTION: bool JSStringIsEqual ( JSStringRef a, JSStringRef b ) ;
+
+FUNCTION: bool JSStringIsEqualToUTF8CString ( JSStringRef a, char* b ) ;
+
+FUNCTION: JSType JSValueGetType ( JSContextRef ctx, JSValueRef value ) ;
+
+FUNCTION: bool JSValueIsUndefined ( JSContextRef ctx, JSValueRef value ) ;
+
+FUNCTION: bool JSValueIsNull ( JSContextRef ctx, JSValueRef value ) ;
+
+FUNCTION: bool JSValueIsBoolean ( JSContextRef ctx, JSValueRef value ) ;
+
+FUNCTION: bool JSValueIsNumber ( JSContextRef ctx, JSValueRef value ) ;
+
+FUNCTION: bool JSValueIsString ( JSContextRef ctx, JSValueRef value ) ;
+
+FUNCTION: bool JSValueIsObject ( JSContextRef ctx, JSValueRef value ) ;
+
+FUNCTION: bool JSValueIsObjectOfClass ( JSContextRef ctx, JSValueRef value, JSClassRef jsClass ) ;
+
+FUNCTION: bool JSValueIsEqual ( JSContextRef ctx, JSValueRef a, JSValueRef b, JSValueRef* exception ) ;
+
+FUNCTION: bool JSValueIsStrictEqual ( JSContextRef ctx, JSValueRef a, JSValueRef b ) ;
+
+FUNCTION: bool JSValueIsInstanceOfConstructor ( JSContextRef ctx, JSValueRef value, JSObjectRef constructor, JSValueRef* exception ) ;
+
+FUNCTION: JSValueRef JSValueMakeUndefined ( JSContextRef ctx ) ;
+
+FUNCTION: JSValueRef JSValueMakeNull ( JSContextRef ctx ) ;
+
+FUNCTION: JSValueRef JSValueMakeBoolean ( JSContextRef ctx, bool boolean ) ;
+
+FUNCTION: JSValueRef JSValueMakeNumber ( JSContextRef ctx, double number ) ;
+
+FUNCTION: JSValueRef JSValueMakeString ( JSContextRef ctx, JSStringRef string ) ;
+
+FUNCTION: bool JSValueToBoolean ( JSContextRef ctx, JSValueRef value ) ;
+
+FUNCTION: double JSValueToNumber ( JSContextRef ctx, JSValueRef value, JSValueRef* exception ) ;
+
+FUNCTION: JSStringRef JSValueToStringCopy ( JSContextRef ctx, JSValueRef value, JSValueRef* exception ) ;
+
+FUNCTION: JSObjectRef JSValueToObject ( JSContextRef ctx, JSValueRef value, JSValueRef* exception ) ;
+
+FUNCTION: void JSValueProtect ( JSContextRef ctx, JSValueRef value ) ;
+
+FUNCTION: void JSValueUnprotect ( JSContextRef ctx, JSValueRef value ) ;
+
--- /dev/null
+Doug Coleman
--- /dev/null
+! Copyright (C) 2010 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: alien alien.accessors alien.syntax kernel kernel.private
+math system ;
+IN: javascriptcore.ffi.hack
+
+HOOK: set-callstack-bounds os ( -- )
+
+HOOK: macosx-callstack-start-offset cpu ( -- address )
+HOOK: macosx-callstack-size-offset cpu ( -- address )
+
+M: ppc macosx-callstack-start-offset HEX: 188 ;
+M: ppc macosx-callstack-size-offset HEX: 18c ;
+
+M: x86.32 macosx-callstack-start-offset HEX: c48 ;
+M: x86.32 macosx-callstack-size-offset HEX: c4c ;
+
+M: x86.64 macosx-callstack-start-offset HEX: 1860 ;
+M: x86.64 macosx-callstack-size-offset HEX: 1868 ;
+
+M: object set-callstack-bounds ;
+
+FUNCTION: void* pthread_self ( ) ;
+
+M: macosx set-callstack-bounds
+ callstack-bounds over [ alien-address ] bi@ -
+ pthread_self
+ [ macosx-callstack-size-offset set-alien-unsigned-cell ]
+ [ macosx-callstack-start-offset set-alien-cell ] bi ;
--- /dev/null
+! Copyright (C) 2010 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: javascriptcore.ffi.hack kernel ;
+IN: javascriptcore
+
+: with-javascriptcore ( quot -- )
+ set-callstack-bounds
+ call ; inline
FRAME_RETURN_ADDRESS(inner,this) = (char *)quot->entry_point + offset;
}
+void factor_vm::primitive_callstack_bounds()
+{
+ ctx->push(allot_alien((void*)ctx->callstack_seg->start));
+ ctx->push(allot_alien((void*)ctx->callstack_seg->end));
+}
+
}
case RT_LITERAL:
op.store_value(next_literal());
break;
+ case RT_FLOAT:
+ op.store_float(next_literal());
+ break;
case RT_ENTRY_POINT:
op.store_value(parent->compute_entry_point_address(next_literal()));
break;
case RT_LITERAL:
op.store_value(slot_forwarder.visit_pointer(op.load_value(old_offset)));
break;
+ case RT_FLOAT:
+ op.store_float(slot_forwarder.visit_pointer(op.load_float(old_offset)));
+ break;
case RT_ENTRY_POINT:
case RT_ENTRY_POINT_PIC:
case RT_ENTRY_POINT_PIC_TAIL:
case RT_LITERAL:
op.store_value(data_visitor.visit_pointer(op.load_value(old_offset)));
break;
+ case RT_FLOAT:
+ op.store_float(data_visitor.visit_pointer(op.load_float(old_offset)));
+ break;
case RT_ENTRY_POINT:
case RT_ENTRY_POINT_PIC:
case RT_ENTRY_POINT_PIC_TAIL:
return load_value(pointer);
}
+cell instruction_operand::load_float()
+{
+ return (cell)load_value() - boxed_float_offset;
+}
+
+cell instruction_operand::load_float(cell pointer)
+{
+ return (cell)load_value(pointer) - boxed_float_offset;
+}
+
code_block *instruction_operand::load_code_block(cell relative_to)
{
return ((code_block *)load_value(relative_to) - 1);
}
}
+void instruction_operand::store_float(cell value)
+{
+ store_value((fixnum)value + boxed_float_offset);
+}
+
void instruction_operand::store_code_block(code_block *compiled)
{
store_value((cell)compiled->entry_point());
type since its used in a situation where relocation arguments cannot
be passed in, and so RT_DLSYM is inappropriate (Windows only) */
RT_EXCEPTION_HANDLER,
+ /* pointer to a float's payload */
+ RT_FLOAT,
+
};
enum relocation_class {
case RT_CARDS_OFFSET:
case RT_DECKS_OFFSET:
case RT_EXCEPTION_HANDLER:
+ case RT_FLOAT:
return 0;
default:
critical_error("Bad rel type",rel_type());
fixnum load_value_masked(cell mask, cell bits, cell shift);
fixnum load_value(cell relative_to);
fixnum load_value();
+ cell load_float(cell relative_to);
+ cell load_float();
code_block *load_code_block(cell relative_to);
code_block *load_code_block();
void store_value_2_2(fixnum value);
void store_value_masked(fixnum value, cell mask, cell shift);
void store_value(fixnum value);
+ void store_float(cell value);
void store_code_block(code_block *compiled);
};
cell object;
};
+const fixnum boxed_float_offset = 8 - FLOAT_TYPE;
+
/* Assembly code makes assumptions about the layout of this struct */
struct boxed_float : object {
static const cell type_number = FLOAT_TYPE;
_(byte_array_to_bignum) \
_(callback) \
_(callstack) \
+ _(callstack_bounds) \
_(callstack_for) \
_(callstack_to_array) \
_(check_datastack) \
void operator()(instruction_operand op)
{
- if(op.rel_type() == RT_LITERAL)
+ switch(op.rel_type())
+ {
+ case RT_LITERAL:
op.store_value(visitor->visit_pointer(op.load_value()));
+ break;
+ case RT_FLOAT:
+ op.store_float(visitor->visit_pointer(op.load_float()));
+ break;
+ default:
+ break;
+ }
}
};
void primitive_innermost_stack_frame_executing();
void primitive_innermost_stack_frame_scan();
void primitive_set_innermost_stack_frame_quot();
+ void primitive_callstack_bounds();
template<typename Iterator> void iterate_callstack(context *ctx, Iterator &iterator);
// alien