1 ! Copyright (C) 2010 Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors alien.c-types alien.data alien.strings arrays
4 assocs byte-arrays classes.struct combinators cuda cuda.ffi
5 cuda.syntax cuda.utils fry io io.encodings.utf8 kernel locals
6 math math.order math.parser namespaces prettyprint sequences ;
9 : #cuda-devices ( -- n )
10 int <c-object> [ cuDeviceGetCount cuda-error ] keep *int ;
12 : n>cuda-device ( n -- device )
13 [ CUdevice <c-object> ] dip [ cuDeviceGet cuda-error ] 2keep drop *int ;
15 : enumerate-cuda-devices ( -- devices )
16 #cuda-devices iota [ n>cuda-device ] map ;
18 : with-each-cuda-device ( quot -- )
19 [ enumerate-cuda-devices ] dip '[ <launcher> _ with-cuda ] each ; inline
21 : cuda-device-properties ( n -- properties )
22 [ CUdevprop <struct> ] dip
23 [ cuDeviceGetProperties cuda-error ] 2keep drop ;
25 : cuda-devices ( -- assoc )
26 enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ;
28 : cuda-device-name ( n -- string )
29 [ 256 [ <byte-array> ] keep ] dip
30 [ cuDeviceGetName cuda-error ]
31 [ 2drop utf8 alien>string ] 3bi ;
33 : cuda-device-capability ( n -- pair )
34 [ int <c-object> int <c-object> ] dip
35 [ cuDeviceComputeCapability cuda-error ]
36 [ drop [ *int ] bi@ ] 3bi 2array ;
38 : cuda-device-memory ( n -- bytes )
39 [ uint <c-object> ] dip
40 [ cuDeviceTotalMem cuda-error ]
43 : cuda-device-attribute ( attribute n -- n )
44 [ int <c-object> ] 2dip
45 [ cuDeviceGetAttribute cuda-error ]
48 : cuda-device. ( n -- )
50 [ "Device: " write number>string print ]
51 [ "Name: " write cuda-device-name print ]
52 [ "Memory: " write cuda-device-memory number>string print ]
55 cuda-device-capability [ number>string ] map " " join print
57 [ "Properties: " write cuda-device-properties . ]
59 "CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: " write
60 CU_DEVICE_ATTRIBUTE_GPU_OVERLAP swap
61 cuda-device-attribute number>string print
67 "CUDA Version: " write cuda-version number>string print nl
68 #cuda-devices iota [ nl ] [ cuda-device. ] interleave ;
71 [ 1 - + ] keep /i ; inline
73 :: (distribute-jobs) ( job-count per-job-shared max-shared-size max-block-size
74 -- grid-size block-size per-block-shared )
75 per-job-shared [ max-block-size ] [ max-shared-size swap /i max-block-size min ] if-zero
76 job-count min :> job-max-block-size
77 job-count job-max-block-size up/i :> grid-size
78 job-count grid-size up/i :> block-size
79 block-size per-job-shared * :> per-block-shared
81 grid-size block-size per-block-shared ; inline
83 : distribute-jobs ( job-count per-job-shared -- launcher )
84 cuda-device get cuda-device-properties
85 [ sharedMemPerBlock>> ] [ maxThreadsDim>> first ] bi
86 (distribute-jobs) 3<<< ; inline