+++ /dev/null
-IN: cuda.constants
-
-CONSTANT: cuda-shared-size 16384
-CONSTANT: cuda-warp-size 32
io.encodings.utf8 kernel lexer locals macros math math.parser
namespaces nested-comments opengl.gl.extensions parser
prettyprint quotations sequences words cuda.libraries ;
-QUALIFIED-WITH: alien.c-types a
+QUALIFIED-WITH: alien.c-types c
IN: cuda
TUPLE: launcher
: c-type>cuda-setter ( c-type -- n cuda-type )
{
- { [ dup a:int = ] [ drop 4 [ cuda-int* ] ] }
- { [ dup a:uint = ] [ drop 4 [ cuda-int* ] ] }
- { [ dup a:float = ] [ drop 4 [ cuda-float* ] ] }
- { [ dup a:pointer? ] [ drop 4 [ cuda-int* ] ] }
- { [ dup a:void* = ] [ drop 4 [ cuda-int* ] ] }
+ { [ dup c:int = ] [ drop 4 [ cuda-int* ] ] }
+ { [ dup c:uint = ] [ drop 4 [ cuda-int* ] ] }
+ { [ dup c:float = ] [ drop 4 [ cuda-float* ] ] }
+ { [ dup c:pointer? ] [ drop 4 [ cuda-int* ] ] }
+ { [ dup c:void* = ] [ drop 4 [ cuda-int* ] ] }
} cond ;
<PRIVATE
[ run-function-launcher ] 2bi
]
]
- [ 2nip \ function-launcher suffix a:void function-effect ]
+ [ 2nip \ function-launcher suffix c:void function-effect ]
3bi define-declared ;
--- /dev/null
+! (c)2010 Joe Groff bsd license
+USING: cuda.devices tools.test ;
+IN: cuda.devices.tests
+
+[ 1 5 100 ] [ 5 20 100 10 (distribute-jobs) ] unit-test
+[ 2 5 100 ] [ 10 20 100 10 (distribute-jobs) ] unit-test
+[ 2 5 100 ] [ 10 20 200 5 (distribute-jobs) ] unit-test
+[ 2 5 100 ] [ 10 20 300 6 (distribute-jobs) ] unit-test
+[ 2 6 120 ] [ 11 20 300 6 (distribute-jobs) ] unit-test
+[ 1 10 200 ] [ 10 20 200 10 (distribute-jobs) ] unit-test
+[ 1 10 0 ] [ 10 0 200 10 (distribute-jobs) ] unit-test
+[ 2 5 0 ] [ 10 0 200 9 (distribute-jobs) ] unit-test
+
! Copyright (C) 2010 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
-USING: alien.c-types alien.data alien.strings arrays assocs
-byte-arrays classes.struct combinators cuda cuda.ffi cuda.utils
-fry io io.encodings.utf8 kernel math.parser prettyprint
-sequences ;
+USING: accessors alien.c-types alien.data alien.strings arrays
+assocs byte-arrays classes.struct combinators cuda cuda.ffi
+cuda.syntax cuda.utils fry io io.encodings.utf8 kernel locals
+math math.order math.parser namespaces prettyprint sequences ;
IN: cuda.devices
: #cuda-devices ( -- n )
- init-cuda
int <c-object> [ cuDeviceGetCount cuda-error ] keep *int ;
: n>cuda-device ( n -- device )
- init-cuda
[ CUdevice <c-object> ] dip [ cuDeviceGet cuda-error ] 2keep drop *int ;
: enumerate-cuda-devices ( -- devices )
[ enumerate-cuda-devices ] dip '[ <launcher> _ with-cuda ] each ; inline
: cuda-device-properties ( n -- properties )
- init-cuda
- [ CUdevprop <c-object> ] dip
- [ cuDeviceGetProperties cuda-error ] 2keep drop
- CUdevprop memory>struct ;
+ [ CUdevprop <struct> ] dip
+ [ cuDeviceGetProperties cuda-error ] 2keep drop ;
: cuda-devices ( -- assoc )
enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ;
: cuda-device-name ( n -- string )
- init-cuda
[ 256 [ <byte-array> ] keep ] dip
[ cuDeviceGetName cuda-error ]
[ 2drop utf8 alien>string ] 3bi ;
: cuda-device-capability ( n -- pair )
- init-cuda
[ int <c-object> int <c-object> ] dip
[ cuDeviceComputeCapability cuda-error ]
[ drop [ *int ] bi@ ] 3bi 2array ;
: cuda-device-memory ( n -- bytes )
- init-cuda
[ uint <c-object> ] dip
[ cuDeviceTotalMem cuda-error ]
[ drop *uint ] 2bi ;
: cuda-device-attribute ( attribute n -- n )
- init-cuda
[ int <c-object> ] 2dip
[ cuDeviceGetAttribute cuda-error ]
[ 2drop *int ] 3bi ;
: cuda-device. ( n -- )
- init-cuda
{
[ "Device: " write number>string print ]
[ "Name: " write cuda-device-name print ]
"CUDA Version: " write cuda-version number>string print nl
#cuda-devices iota [ nl ] [ cuda-device. ] interleave ;
+: up/i ( x y -- z )
+ [ 1 - + ] keep /i ; inline
+
+:: (distribute-jobs) ( job-count per-job-shared max-shared-size max-block-size
+ -- grid-size block-size per-block-shared )
+ per-job-shared [ max-block-size ] [ max-shared-size swap /i max-block-size min ] if-zero
+ job-count min :> job-max-block-size
+ job-count job-max-block-size up/i :> grid-size
+ job-count grid-size up/i :> block-size
+ block-size per-job-shared * :> per-block-shared
+
+ grid-size block-size per-block-shared ; inline
+
+: distribute-jobs ( job-count per-job-shared -- launcher )
+ cuda-device get cuda-device-properties
+ [ sharedMemPerBlock>> ] [ maxThreadsDim>> first ] bi
+ (distribute-jobs) 3<<< ; inline
! Copyright (C) 2010 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien.c-types alien.data alien.strings arrays
-assocs byte-arrays classes.struct combinators cuda.ffi io
-io.backend io.encodings.utf8 kernel math.parser namespaces
+assocs byte-arrays classes.struct combinators cuda.devices cuda.ffi
+io io.backend io.encodings.utf8 kernel math.parser namespaces
prettyprint sequences ;
IN: cuda.utils
dup CUDA_SUCCESS = [ drop ] [ throw-cuda-error ] if ;
: init-cuda ( -- )
- 0 cuInit cuda-error ;
+ 0 cuInit cuda-error ; inline
: cuda-version ( -- n )
int <c-object> [ cuDriverGetVersion cuda-error ] keep *int ;
: create-context ( flags device -- context )
[ CUcontext <c-object> ] 2dip
- [ cuCtxCreate cuda-error ] 3keep 2drop *void* ;
+ [ cuCtxCreate cuda-error ] 3keep 2drop *void* ; inline
-: destroy-context ( context -- ) cuCtxDestroy cuda-error ;
+: sync-context ( -- )
+ cuCtxSynchronize cuda-error ; inline
-: launch-function* ( function -- ) cuLaunch cuda-error ;
+: destroy-context ( context -- ) cuCtxDestroy cuda-error ; inline
-: launch-function ( -- ) cuda-function get cuLaunch cuda-error ;
+: launch-function* ( function -- ) cuLaunch cuda-error ; inline
+
+: launch-function ( -- ) cuda-function get cuLaunch cuda-error ; inline
: cuda-int* ( function offset value -- )
- cuParamSeti cuda-error ;
+ cuParamSeti cuda-error ; inline
: cuda-int ( offset value -- )
- [ cuda-function get ] 2dip cuda-int* ;
+ [ cuda-function get ] 2dip cuda-int* ; inline
: cuda-float* ( function offset value -- )
- cuParamSetf cuda-error ;
+ cuParamSetf cuda-error ; inline
: cuda-float ( offset value -- )
- [ cuda-function get ] 2dip cuda-float* ;
+ [ cuda-function get ] 2dip cuda-float* ; inline
: cuda-vector* ( function offset ptr n -- )
- cuParamSetv cuda-error ;
+ cuParamSetv cuda-error ; inline
: cuda-vector ( offset ptr n -- )
- [ cuda-function get ] 3dip cuda-vector* ;
+ [ cuda-function get ] 3dip cuda-vector* ; inline
: param-size* ( function n -- )
- cuParamSetSize cuda-error ;
+ cuParamSetSize cuda-error ; inline
: param-size ( n -- )
- [ cuda-function get ] dip param-size* ;
+ [ cuda-function get ] dip param-size* ; inline
: launch-function-grid* ( function width height -- )
- cuLaunchGrid cuda-error ;
+ cuLaunchGrid cuda-error ; inline
: launch-function-grid ( width height -- )
[ cuda-function get ] 2dip
- cuLaunchGrid cuda-error ;
+ cuLaunchGrid cuda-error ; inline
: function-block-shape* ( function x y z -- )
- cuFuncSetBlockShape cuda-error ;
+ cuFuncSetBlockShape cuda-error ; inline
: function-block-shape ( x y z -- )
[ cuda-function get ] 3dip
- cuFuncSetBlockShape cuda-error ;
+ cuFuncSetBlockShape cuda-error ; inline
: function-shared-size* ( function n -- )
- cuFuncSetSharedSize cuda-error ;
+ cuFuncSetSharedSize cuda-error ; inline
: function-shared-size ( n -- )
[ cuda-function get ] dip
- cuFuncSetSharedSize cuda-error ;
+ cuFuncSetSharedSize cuda-error ; inline