! Copyright (C) 2010 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien.c-types alien.strings byte-arrays cuda
-cuda.contexts cuda.devices cuda.memory cuda.syntax destructors
-io io.encodings.string io.encodings.utf8 kernel locals math
-math.parser namespaces sequences strings ;
+cuda.contexts cuda.devices cuda.libraries cuda.memory cuda.syntax
+destructors io io.encodings.string io.encodings.utf8 kernel locals
+math math.parser namespaces sequences strings ;
IN: cuda.demos.hello-world
CUDA-LIBRARY: hello vocab:cuda/demos/hello-world/hello.ptx
"CUDA device " ": " surround write
"Hello World!" >byte-array [ - ] map-index host>device &cuda-free
- [ { 2 1 } { 6 1 1 } 2<<< helloWorld ]
+ [ { 2 1 } { 6 1 1 } <grid> helloWorld ]
[ 12 device>host >string print ] bi
] with-destructors
] with-each-cuda-device ;
:: cuda-prefix-sum ( -- )
init-cuda
0 0 [
- ! { 1 1 1 } { 2 1 } 0 3<<< prefix_sum_block
+ ! { 1 1 1 } { 2 1 } 0 <grid-shared> prefix_sum_block
] with-cuda-context ;
MAIN: cuda-prefix-sum
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien.c-types alien.data alien.strings arrays
assocs byte-arrays classes.struct combinators cuda
-cuda.contexts cuda.ffi cuda.syntax fry io io.encodings.utf8
+cuda.contexts cuda.ffi cuda.libraries fry io io.encodings.utf8
kernel locals math math.order math.parser namespaces
prettyprint sequences ;
IN: cuda.devices
: distribute-jobs ( job-count per-job-shared -- launcher )
context-device-properties
[ sharedMemPerBlock>> ] [ maxThreadsPerBlock>> ] bi
- (distribute-jobs) 3<<< ; inline
+ (distribute-jobs) <grid-shared> ; inline
[ cuda-function get ] dip
cuFuncSetSharedSize cuda-error ; inline
-TUPLE: function-launcher
+TUPLE: grid
dim-grid dim-block shared-size stream ;
+: <grid> ( dim-grid dim-block -- grid )
+ 0 f grid boa ; inline
+
+: <grid-shared> ( dim-grid dim-block shared-size -- grid )
+ f grid boa ; inline
+
+: <grid-shared-stream> ( dim-grid dim-block shared-size stream -- grid )
+ grid boa ; inline
+
: c-type>cuda-setter ( c-type -- n cuda-type )
{
{ [ dup c:int = ] [ drop 4 [ cuda-int* ] ] }
: launch-function ( -- ) cuda-function get cuLaunch cuda-error ; inline
-: run-function-launcher ( function-launcher function -- )
+: run-grid ( grid function -- )
swap
{
[ dim-block>> block-dim function-block-shape* ]
'[
_ _ cached-function
[ nip _ cuda-arguments ]
- [ run-function-launcher ] 2bi
+ [ run-grid ] 2bi
]
]
- [ 2nip \ function-launcher suffix c:void function-effect ]
+ [ 2nip \ grid suffix c:void function-effect ]
3bi define-declared ;
TUPLE: cuda-library name path handle ;
scan [ create-in current-cuda-library get ] [ ] bi
";" scan-c-args drop define-cuda-word ;
-: 2<<< ( dim-grid dim-block -- function-launcher )
- 0 f function-launcher boa ; inline
-
-: 3<<< ( dim-grid dim-block shared-size -- function-launcher )
- f function-launcher boa ; inline
-
-: 4<<< ( dim-grid dim-block shared-size stream -- function-launcher )
- function-launcher boa ; inline