swap >>device ; inline
TUPLE: function-launcher
-dim-block dim-grid shared-size stream ;
+dim-grid dim-block shared-size stream ;
: with-cuda-context ( flags device quot -- )
H{ } clone cuda-modules set-global
{ [ dup a:void* = ] [ drop 4 [ cuda-int* ] ] }
} cond ;
+<PRIVATE
+: block-dim ( block -- x y z )
+ dup sequence? [ 3 1 pad-tail first3 ] [ 1 1 ] if ; inline
+: grid-dim ( block -- x y )
+ dup sequence? [ 2 1 pad-tail first2 ] [ 1 ] if ; inline
+PRIVATE>
+
: run-function-launcher ( function-launcher function -- )
swap
{
- [ dim-block>> first3 function-block-shape* ]
+ [ dim-block>> block-dim function-block-shape* ]
[ shared-size>> function-shared-size* ]
[
dim-grid>> [
launch-function*
] [
- first2 launch-function-grid*
+ grid-dim launch-function-grid*
] if-empty
]
} 2cleave ;
"CUDA device " ": " surround write
"Hello World!" >byte-array [ - ] map-index host>device &cuda-free
- [ { 6 1 1 } { 2 1 } 2<<< helloWorld ]
+ [ { 2 1 } { 6 1 1 } 2<<< helloWorld ]
[ 12 device>host >string print ] bi
] with-destructors
] with-each-cuda-device ;
scan [ create-in current-cuda-library get ] [ ] bi
";" scan-c-args drop define-cuda-word ;
-: 2<<< ( dim-block dim-grid -- function-launcher )
+: 2<<< ( dim-grid dim-block -- function-launcher )
0 f function-launcher boa ; inline
-: 3<<< ( dim-block dim-grid shared-size -- function-launcher )
+: 3<<< ( dim-grid dim-block shared-size -- function-launcher )
f function-launcher boa ; inline
-: 4<<< ( dim-block dim-grid shared-size stream -- function-launcher )
+: 4<<< ( dim-grid dim-block shared-size stream -- function-launcher )
function-launcher boa ; inline