QUALIFIED-WITH: alien.c-types c
IN: cuda
-TUPLE: launcher
-{ device integer initial: 0 }
-{ device-flags initial: 0 } ;
-
-: <launcher> ( device-id -- launcher )
- launcher new
- swap >>device ; inline
-
TUPLE: function-launcher
dim-grid dim-block shared-size stream ;
-: (set-up-cuda-context) ( flags device create-quot -- )
+: (set-up-cuda-context) ( device flags create-quot -- )
H{ } clone cuda-modules set-global
H{ } clone cuda-functions set
call ; inline
: (with-cuda-context) ( context quot -- )
- [ '[ _ @ ] ]
- [ drop '[ [ sync-context ] ignore-errors _ destroy-context ] ] 2bi
- [ ] cleanup ; inline
+ swap '[ [ sync-context ] ignore-errors _ destroy-context ] [ ] cleanup ; inline
-: with-cuda-context ( flags device quot -- )
+: with-cuda-context ( device flags quot -- )
[ [ create-context ] (set-up-cuda-context) ] dip (with-cuda-context) ; inline
-: with-cuda-program ( flags device quot -- )
- [ dup cuda-device set ] 2dip
- '[ cuda-context set _ call ] with-cuda-context ; inline
-
-: with-cuda ( launcher quot -- )
- init-cuda [
- [ cuda-launcher set ]
- [ [ device>> ] [ device-flags>> ] bi ] bi
- ] [ with-cuda-program ] bi* ; inline
-
: c-type>cuda-setter ( c-type -- n cuda-type )
{
{ [ dup c:int = ] [ drop 4 [ cuda-int* ] ] }
CUDA-FUNCTION: helloWorld ( char* string-ptr ) ;
: cuda-hello-world ( -- )
+ init-cuda
[
[
- cuda-launcher get device>> number>string
+ context-device number>string
"CUDA device " ": " surround write
"Hello World!" >byte-array [ - ] map-index host>device &cuda-free
CUDA-FUNCTION: prefix_sum_block ( uint* in, uint* out, uint n ) ;
:: cuda-prefix-sum ( -- )
- T{ launcher { device 0 } }
- [
+ 0 0 [
! { 1 1 1 } { 2 1 } 0 3<<< prefix_sum_block
- ] with-cuda ;
+ ] with-cuda-context ;
MAIN: cuda-prefix-sum
#cuda-devices iota [ n>cuda-device ] map ;
: with-each-cuda-device ( quot -- )
- [ enumerate-cuda-devices ] dip '[ <launcher> _ with-cuda ] each ; inline
+ [ enumerate-cuda-devices ] dip '[ 0 _ with-cuda-context ] each ; inline
: cuda-device-properties ( n -- properties )
[ CUdevprop <struct> ] dip
grid-size block-size per-block-shared ; inline
: distribute-jobs ( job-count per-job-shared -- launcher )
- cuda-device get cuda-device-properties
+ context-device cuda-device-properties
[ sharedMemPerBlock>> ] [ maxThreadsDim>> first ] bi
(distribute-jobs) 3<<< ; inline
fry gpu.buffers kernel ;
IN: cuda.gl
-: create-gl-cuda-context ( flags device -- context )
+: create-gl-cuda-context ( device flags -- context )
+ swap
[ CUcontext <c-object> ] 2dip
[ cuGLCtxCreate cuda-error ] 3keep 2drop *void* ; inline
-: with-gl-cuda-context ( flags device quot -- )
+: with-gl-cuda-context ( device flags quot -- )
[ [ create-gl-cuda-context ] (set-up-cuda-context) ] dip (with-cuda-context) ; inline
: gl-buffer>resource ( gl-buffer flags -- resource )
prettyprint sequences ;
IN: cuda.utils
-SYMBOL: cuda-device
-SYMBOL: cuda-context
SYMBOL: cuda-module
SYMBOL: cuda-function
-SYMBOL: cuda-launcher
SYMBOL: cuda-modules
SYMBOL: cuda-functions
get-function-ptr* cuda-function set
] dip call ; inline
-: create-context ( flags device -- context )
+: create-context ( device flags -- context )
+ swap
[ CUcontext <c-object> ] 2dip
[ cuCtxCreate cuda-error ] 3keep 2drop *void* ; inline
: sync-context ( -- )
cuCtxSynchronize cuda-error ; inline
+: context-device ( -- n )
+ CUdevice <c-object> [ cuCtxGetDevice cuda-error ] keep *int ; inline
+
: destroy-context ( context -- ) cuCtxDestroy cuda-error ; inline
: launch-function* ( function -- ) cuLaunch cuda-error ; inline
: function-shared-size ( n -- )
[ cuda-function get ] dip
cuFuncSetSharedSize cuda-error ; inline
+