dup CUDA_SUCCESS = [ drop ] [ throw-cuda-error ] if ;
: init-cuda ( -- )
- 0 cuInit cuda-error ;
+ 0 cuInit cuda-error ; inline
: cuda-version ( -- n )
int <c-object> [ cuDriverGetVersion cuda-error ] keep *int ;
: create-context ( flags device -- context )
[ CUcontext <c-object> ] 2dip
- [ cuCtxCreate cuda-error ] 3keep 2drop *void* ;
+ [ cuCtxCreate cuda-error ] 3keep 2drop *void* ; inline
-: destroy-context ( context -- ) cuCtxDestroy cuda-error ;
+: sync-context ( -- )
+ cuCtxSynchronize cuda-error ; inline
-: launch-function* ( function -- ) cuLaunch cuda-error ;
+: destroy-context ( context -- ) cuCtxDestroy cuda-error ; inline
-: launch-function ( -- ) cuda-function get cuLaunch cuda-error ;
+: launch-function* ( function -- ) cuLaunch cuda-error ; inline
+
+: launch-function ( -- ) cuda-function get cuLaunch cuda-error ; inline
: cuda-int* ( function offset value -- )
- cuParamSeti cuda-error ;
+ cuParamSeti cuda-error ; inline
: cuda-int ( offset value -- )
- [ cuda-function get ] 2dip cuda-int* ;
+ [ cuda-function get ] 2dip cuda-int* ; inline
: cuda-float* ( function offset value -- )
- cuParamSetf cuda-error ;
+ cuParamSetf cuda-error ; inline
: cuda-float ( offset value -- )
- [ cuda-function get ] 2dip cuda-float* ;
+ [ cuda-function get ] 2dip cuda-float* ; inline
: cuda-vector* ( function offset ptr n -- )
- cuParamSetv cuda-error ;
+ cuParamSetv cuda-error ; inline
: cuda-vector ( offset ptr n -- )
- [ cuda-function get ] 3dip cuda-vector* ;
+ [ cuda-function get ] 3dip cuda-vector* ; inline
: param-size* ( function n -- )
- cuParamSetSize cuda-error ;
+ cuParamSetSize cuda-error ; inline
: param-size ( n -- )
- [ cuda-function get ] dip param-size* ;
+ [ cuda-function get ] dip param-size* ; inline
: launch-function-grid* ( function width height -- )
- cuLaunchGrid cuda-error ;
+ cuLaunchGrid cuda-error ; inline
: launch-function-grid ( width height -- )
[ cuda-function get ] 2dip
- cuLaunchGrid cuda-error ;
+ cuLaunchGrid cuda-error ; inline
: function-block-shape* ( function x y z -- )
- cuFuncSetBlockShape cuda-error ;
+ cuFuncSetBlockShape cuda-error ; inline
: function-block-shape ( x y z -- )
[ cuda-function get ] 3dip
- cuFuncSetBlockShape cuda-error ;
+ cuFuncSetBlockShape cuda-error ; inline
: function-shared-size* ( function n -- )
- cuFuncSetSharedSize cuda-error ;
+ cuFuncSetSharedSize cuda-error ; inline
: function-shared-size ( n -- )
[ cuda-function get ] dip
- cuFuncSetSharedSize cuda-error ;
+ cuFuncSetSharedSize cuda-error ; inline