1 ! Copyright (C) 2010 Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors alien.c-types alien.data alien.strings arrays
4 assocs byte-arrays classes.struct combinators cuda
5 cuda.contexts cuda.ffi cuda.libraries fry io io.encodings.utf8
6 kernel locals math math.order math.parser namespaces
7 prettyprint sequences ;
10 : #cuda-devices ( -- n )
11 int <c-object> [ cuDeviceGetCount cuda-error ] keep *int ;
13 : n>cuda-device ( n -- device )
14 [ CUdevice <c-object> ] dip [ cuDeviceGet cuda-error ] 2keep drop *int ;
16 : enumerate-cuda-devices ( -- devices )
17 #cuda-devices iota [ n>cuda-device ] map ;
19 : with-each-cuda-device ( quot -- )
20 [ enumerate-cuda-devices ] dip '[ 0 _ with-cuda-context ] each ; inline
22 : cuda-device-properties ( n -- properties )
23 [ CUdevprop <struct> ] dip
24 [ cuDeviceGetProperties cuda-error ] 2keep drop ;
26 : cuda-devices ( -- assoc )
27 enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ;
29 : cuda-device-name ( n -- string )
30 [ 256 [ <byte-array> ] keep ] dip
31 [ cuDeviceGetName cuda-error ]
32 [ 2drop utf8 alien>string ] 3bi ;
34 : cuda-device-capability ( n -- pair )
35 [ int <c-object> int <c-object> ] dip
36 [ cuDeviceComputeCapability cuda-error ]
37 [ drop [ *int ] bi@ ] 3bi 2array ;
39 : cuda-device-memory ( n -- bytes )
40 [ uint <c-object> ] dip
41 [ cuDeviceTotalMem cuda-error ]
44 : cuda-device-attribute ( attribute n -- n )
45 [ int <c-object> ] 2dip
46 [ cuDeviceGetAttribute cuda-error ]
49 : cuda-device. ( n -- )
51 [ "Device: " write number>string print ]
52 [ "Name: " write cuda-device-name print ]
53 [ "Memory: " write cuda-device-memory number>string print ]
56 cuda-device-capability [ number>string ] map " " join print
58 [ "Properties: " write cuda-device-properties . ]
60 "CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: " write
61 CU_DEVICE_ATTRIBUTE_GPU_OVERLAP swap
62 cuda-device-attribute number>string print
68 "CUDA Version: " write cuda-version number>string print nl
69 #cuda-devices iota [ nl ] [ cuda-device. ] interleave ;
72 [ 1 - + ] keep /i ; inline
74 : context-device-properties ( -- props )
75 context-device cuda-device-properties ; inline
77 :: (distribute-jobs) ( job-count per-job-shared max-shared-size max-block-size
78 -- grid-size block-size per-block-shared )
79 per-job-shared [ max-block-size ] [ max-shared-size swap /i max-block-size min ] if-zero
80 job-count min :> job-max-block-size
81 job-count job-max-block-size up/i :> grid-size
82 job-count grid-size up/i :> block-size
83 block-size per-job-shared * :> per-block-shared
85 grid-size block-size per-block-shared ; inline
87 : distribute-jobs ( job-count per-job-shared -- launcher )
88 context-device-properties
89 [ sharedMemPerBlock>> ] [ maxThreadsPerBlock>> ] bi
90 (distribute-jobs) <grid-shared> ; inline