1 ! Copyright (C) 2022 Doug Coleman.
2 ! See https://factorcode.org/license.txt for BSD license.
3 USING: accessors alien alien.c-types alien.data alien.enums
4 alien.strings ascii assocs byte-arrays classes classes.struct
5 combinators combinators.extras combinators.short-circuit
6 combinators.smart discord io io.backend io.encodings.utf8
7 io.files.info kernel layouts libc libclang.ffi make math
8 math.parser multiline namespaces prettyprint sequences
9 sequences.private sets sorting splitting strings ;
13 : clang-state> ( -- clang-state ) clang-state get-global ;
17 defs-counter c-defs-by-name c-defs-by-order
19 unnamed-counter unnamed-table
21 out-forms-counter out-forms out-forms-by-name
22 out-forms-written out-form-names-written ;
24 : <libclang-state> ( -- state )
27 H{ } clone >>c-defs-by-name
28 H{ } clone >>c-defs-by-order
30 H{ } clone >>child-forms
32 H{ } clone >>unnamed-table
35 H{ } clone >>out-forms
36 H{ } clone >>out-forms-by-name
37 HS{ } clone >>out-forms-written
38 HS{ } clone >>out-form-names-written ;
40 : next-defs-counter ( libclang-state -- n ) [ dup 1 + ] change-defs-counter drop ;
41 : next-unnamed-counter ( libclang-state -- n ) [ dup 1 + ] change-unnamed-counter drop ;
42 : next-out-forms-counter ( libclang-state -- n ) [ dup 1 + ] change-out-forms-counter drop ;
44 GENERIC: def>out-form ( obj -- string )
46 : out-form-written? ( string -- ? )
47 clang-state> out-forms-written>> in? ; inline
49 : out-form-name-written? ( string -- ? )
50 clang-state> out-form-names-written>> in? ; inline
52 : save-out-form ( string def -- )
56 over out-form-written? [
57 ! dup name>> out-form-name-written? [
64 [ next-out-forms-counter ]
65 [ out-forms>> set-at ] bi
67 [ nipd [ name>> ] dip out-form-names-written>> adjoin ]
68 [ nip out-forms-written>> adjoin ]
69 [ [ name>> ] dip out-forms-by-name>> push-at ]
74 ! some forms must be defined out of order, e.g. anonymous unions/structs
75 : def>out-forms ( obj -- )
76 [ def>out-form ] keep save-out-form ;
78 : peek-current-form ( -- n )
79 clang-state> c-forms>> ?last ; inline
84 : push-child-form ( form -- )
85 ! dup order>> c-defs-by-order get-global set-at ; inline
86 dup parent-order>> clang-state> child-forms>> push-at ; inline
88 : with-new-form ( quot -- n )
89 clang-state> [ next-defs-counter ] [ c-forms>> ] bi push
91 clang-state> c-forms>> pop ; inline
93 ERROR: unknown-form name ;
94 GENERIC: print-deferred ( obj -- )
96 ! foo*** -> foo, todo: other cases?
97 : factor-type-name ( type -- type' ) [ CHAR: * = ] trim-tail ;
99 : ?lookup-type ( type -- obj/f )
101 clang-state> c-defs-by-name>> ?at [ drop f ] unless ;
103 : lookup-order ( obj -- order/f ) type>> ?lookup-type [ order>> ] ?call -1 or ;
105 M: object print-deferred
106 type>> ?lookup-type [ def>out-forms ] when* ;
108 : unnamed? ( string -- ? ) "(unnamed" swap subseq? ; inline
109 : unnamed-exists? ( string -- value/key ? ) clang-state> unnamed-table>> ?at ; inline
110 : lookup-unnamed ( type string -- type-name )
114 [ clang-state> next-unnamed-counter number>string append ] dip
116 "RECORDING: " gwrite dup g... gflush
117 [ clang-state> unnamed-table>> set-at ] keepd
120 : ?unnamed ( string type -- string' ? )
122 swap lookup-unnamed t
128 { return-type string }
133 : <c-function> ( return-type name args -- c-function )
138 clang-state> next-defs-counter >>order ;
145 : <c-struct> ( name order -- c-struct )
155 : <c-union> ( name order -- c-union )
166 : <c-enum> ( name order -- c-enum )
175 { parent-order integer }
178 : <c-arg> ( name type -- c-arg )
182 peek-current-form >>parent-order
183 clang-state> next-defs-counter >>order ;
189 { parent-order integer }
192 : <c-field> ( name type -- c-field )
196 peek-current-form >>parent-order
197 clang-state> next-defs-counter >>order ;
205 : <c-typedef> ( type name -- c-typedef )
209 clang-state> next-defs-counter >>order ;
211 M: c-function def>out-form
214 [ drop "FUNCTION: " ]
215 [ return-type>> " " ]
217 [ args>> dup empty? ")\n" " )\n" ? ]
219 ] "" append-outputs-as ;
221 : ignore-typedef? ( typedef -- ? )
222 [ type>> ] [ name>> ] bi
223 { [ = ] [ [ empty? ] either? ] } 2|| ;
225 M: c-typedef def>out-form
226 dup ignore-typedef? [
235 ] "" append-outputs-as
238 ERROR: unknown-child-forms order ;
239 M: c-field def>out-form
246 ] "" append-outputs-as ;
248 : print-defers ( current-order slots -- )
250 tuck lookup-order < [
257 : empty-struct? ( c-struct -- ? )
258 order>> clang-state> child-forms>> key? not ;
260 M: c-struct def>out-form
262 name>> "C-TYPE: " prepend
269 order>> dup clang-state> child-forms>> ?at [ drop { } ] unless
271 [ nip [ def>out-form ] map "\n" join " ;\n" append ] 2bi
274 ] "" append-outputs-as
277 M: c-enum def>out-form
283 order>> dup clang-state> child-forms>> ?at [ drop { } ] unless
285 [ nip [ def>out-form ] map "\n" join " ;\n" append ] 2bi
288 ] "" append-outputs-as ;
290 M: c-union def>out-form
293 [ drop "UNION-STRUCT: " ]
296 order>> dup clang-state> child-forms>> ?at [ drop { } ] unless
298 [ nip [ def>out-form ] map "\n" join " ;\n" append ] 2bi
301 ] "" append-outputs-as ;
303 M: object def>out-form
304 class-of name>> "unknown object: " prepend ;
306 : set-definition ( named -- )
307 [ dup name>> clang-state> c-defs-by-name>> set-at ]
308 [ dup order>> clang-state> c-defs-by-order>> set-at ] bi ;
310 : set-typedef ( typedef -- )
311 dup ignore-typedef? [
314 [ type>> ] [ name>> ] bi clang-state> typedefs>> set-at
317 : clang-get-cstring ( CXString -- string )
318 clang_getCString [ utf8 alien>string ] [ clang_disposeString ] bi ;
320 : trim-blanks ( string -- string' )
321 [ blank? ] trim ; inline
323 : cut-tail ( string quot -- before after ) (trim-tail) cut ; inline
325 : cell-bytes ( -- n )
326 cell-bits 8 /i ; inline
328 : get-tokens ( tokens ntokens -- tokens )
330 _ * swap <displaced-alien>
334 : clang-get-file-max-range ( CXTranslationUnit path -- CXSourceRange )
335 [ dupd clang_getFile 0 clang_getLocationForOffset ]
336 [ dupd [ clang_getFile ] [ nip file-info size>> ] 2bi clang_getLocationForOffset ] 2bi
339 : clang-tokenize ( CXTranslationUnit CXSourceRange -- tokens ntokens )
342 [ clang_tokenize ] 2keep
346 : tokenize-path ( tu path -- tokens ntokens )
347 [ drop ] [ clang-get-file-max-range ] 2bi
350 : tokenize-translation-unit ( CXTranslationUnit -- tokens ntokens )
351 [ ] [ clang_getTranslationUnitCursor clang_getCursorExtent ] bi
354 : tokenize-cursor ( cursor -- tokens ntokens )
355 [ clang_Cursor_getTranslationUnit ] [ clang_getCursorExtent ] bi
358 : dispose-tokens ( cursor tokens ntokens -- )
359 [ clang_Cursor_getTranslationUnit ] 2dip clang_disposeTokens ;
361 :: with-cursor-tokens ( cursor quot: ( tu token -- obj ) -- )
362 cursor clang_Cursor_getTranslationUnit :> tu
363 cursor tokenize-cursor :> ( tokens ntokens )
364 tokens ntokens <iota>
365 cell-bytes :> bytesize
368 [ tu ] 2dip bytesize * swap <displaced-alien> @
370 tu tokens ntokens dispose-tokens ; inline
372 : clang-get-token-spelling ( CXTranslationUnit CXToken -- string )
373 clang_getTokenSpelling clang-get-cstring ;
375 DEFER: cursor>c-struct
376 DEFER: cursor>c-union
378 :: cursor-type ( cursor -- string )
379 cursor clang_getCursorType clang_getTypeSpelling clang-get-cstring
383 [ CHAR: * = ] cut-tail
384 [ [ trim-blanks ] dip append ] when*
388 { [ dup "struct " head? ] [
390 clang-state> unnamed-table>> ?at or
393 ! libclang uses two forms for unnamed union (why!?)
394 ! union (unnamed at /Users/erg/factor/elf2.h:39:3)
395 ! union (unnamed union at /Users/erg/factor/elf2.h:39:3)
396 { [ dup "union " head? ] [
398 clang-state> unnamed-table>> ?at or
400 { [ dup "_Bool" = ] [ drop "bool" ] }
401 { [ "int8_t" ?head ] [ trim-blanks "char" prepend ] }
402 { [ "int16_t" ?head ] [ trim-blanks "short" prepend ] }
403 { [ "int32_t" ?head ] [ trim-blanks "int" prepend ] }
404 { [ "int64_t" ?head ] [ trim-blanks "longlong" prepend ] }
405 { [ "uint8_t" ?head ] [ trim-blanks "uchar" prepend ] }
406 { [ "uint16_t" ?head ] [ trim-blanks "ushort" prepend ] }
407 { [ "uint32_t" ?head ] [ trim-blanks "uint" prepend ] }
408 { [ "uint64_t" ?head ] [ trim-blanks "ulonglong" prepend ] }
409 { [ "signed char" ?head ] [ trim-blanks "char" prepend ] }
410 { [ "signed short" ?head ] [ trim-blanks "short" prepend ] }
411 { [ "signed int" ?head ] [ trim-blanks "int" prepend ] }
412 { [ "signed long" ?head ] [ trim-blanks "long" prepend ] }
413 { [ "unsigned char" ?head ] [ trim-blanks "uchar" prepend ] }
414 { [ "unsigned short" ?head ] [ trim-blanks "ushort" prepend ] }
415 { [ "unsigned int" ?head ] [ trim-blanks "uint" prepend ] }
416 { [ "unsigned long" ?head ] [ trim-blanks "ulong" prepend ] }
417 { [ dup "(*)" swap subseq? ] [ drop "void*" ] }
421 : cursor-name ( cursor -- string )
422 clang_getCursorSpelling clang-get-cstring "" ?unnamed drop ;
424 : ?cursor-name ( cursor unnamed-type -- string )
425 [ clang_getCursorSpelling clang-get-cstring ] dip ?unnamed drop ;
427 : arg-info ( cursor -- string )
428 [ cursor-type ] [ cursor-name [ "dummy" ] when-empty ] bi " " glue ;
430 : cursor>args ( CXCursor -- args/f )
431 dup clang_Cursor_getNumArguments dup -1 = [
435 clang_Cursor_getArgument
439 : cxprimitive-type>factor ( CXType -- string )
441 { CXType_Bool [ "bool" ] }
442 { CXType_Char_S [ "char" ] }
443 { CXType_Char_U [ "uchar" ] }
444 { CXType_SChar [ "char" ] }
445 { CXType_UChar [ "uchar" ] }
446 { CXType_Short [ "short" ] }
447 { CXType_UShort [ "ushort" ] }
448 { CXType_Int [ "int" ] }
449 { CXType_UInt [ "uint" ] }
450 { CXType_Long [ "long" ] }
451 { CXType_ULong [ "ulong" ] }
452 { CXType_LongLong [ "longlong" ] }
453 { CXType_ULongLong [ "ulonglong" ] }
454 { CXType_Float [ "float" ] }
455 { CXType_Double [ "double" ] }
456 { CXType_Void [ "void" ] }
460 : cxreturn-type>factor ( CXType -- string )
462 { [ dup kind>> CXType_Pointer = ] [
463 clang_getPointeeType dup g... gflush cxreturn-type>factor "*" append
465 { [ dup kind>> CXType_Elaborated = ] [
466 clang_getCanonicalType cxreturn-type>factor
468 { [ dup kind>> CXType_Record = ] [
469 clang_getTypeDeclaration cursor-name
471 { [ dup kind>> CXType_FunctionProto = ] [
472 ! inside a CXType_Pointer, so we get `void*` from that case
475 [ kind>> cxprimitive-type>factor ]
478 : cursor>args-info ( CXCursor -- args-info )
479 cursor>args [ arg-info ] map ", " join ;
481 : cursor>c-function ( CXCursor -- )
482 [ clang_getCursorResultType cxreturn-type>factor ]
484 [ cursor>args-info ] tri <c-function> set-definition ;
486 : cursor>c-typedef ( CXCursor -- )
487 [ clang_getTypedefDeclUnderlyingType cxreturn-type>factor ]
488 [ cursor-name ] bi <c-typedef> [ set-definition ] [ set-typedef ] bi ;
490 : cursor>c-field ( CXCursor -- )
491 [ cursor-name ] [ cursor-type ] bi <c-field> dup g... gflush push-child-form ;
493 DEFER: cursor-visitor
495 : cursor>enum ( CXCursor -- )
497 [ cursor-name ] [ cursor-visitor ] bi
498 f clang_visitChildren drop
499 ] with-new-form <c-enum> set-definition ;
501 : cursor>c-union ( CXCursor -- )
503 "cursor>union start" g...
504 peek-current-form g... gflush
506 [ "Union" ?cursor-name "name: " gwrite dup g... gflush ] keep
507 cursor-visitor f clang_visitChildren drop
509 "cursor>union finish" g... gflush
510 peek-current-form g... gflush
512 <c-union> dup g... gflush set-definition ;
514 : cursor>c-struct ( CXCursor -- )
516 "cursor>c-struct start" g...
517 peek-current-form g... gflush
519 [ "Struct" ?cursor-name ] keep
520 cursor-visitor f clang_visitChildren drop
522 "cursor>c-struct finish" g... gflush
523 peek-current-form g... gflush
525 <c-struct> dup g... gflush set-definition ;
527 : cursor-visitor ( -- callback )
530 dup clang_getCursorKind
531 dup "cursor-visitor got: " gwrite g... gflush
533 { CXCursor_Namespace [ drop CXChildVisit_Recurse ] }
534 { CXCursor_FunctionDecl [ cursor>c-function CXChildVisit_Continue ] }
535 { CXCursor_TypedefDecl [ cursor>c-typedef CXChildVisit_Continue ] }
536 { CXCursor_UnionDecl [ cursor>c-union CXChildVisit_Continue ] }
537 { CXCursor_StructDecl [ cursor>c-struct CXChildVisit_Continue ] }
538 { CXCursor_EnumDecl [ cursor>enum CXChildVisit_Continue ] }
539 { CXCursor_VarDecl [ drop CXChildVisit_Continue ] }
541 { CXCursor_FieldDecl [
542 cursor>c-field CXChildVisit_Continue
544 { CXCursor_EnumConstantDecl [
546 [ clang-get-token-spelling ] with-cursor-tokens
549 clang_getEnumConstantDeclUnsignedValue number>string
551 <c-field> push-child-form
552 CXChildVisit_Continue
555 "cursor-visitor unhandled: " gwrite dup g... gflush
556 2drop CXChildVisit_Recurse
562 : with-clang-index ( quot: ( index -- ) -- )
563 [ 0 0 clang_createIndex ] dip keep clang_disposeIndex ; inline
565 : with-clang-translation-unit ( idx source-file command-line-args nargs unsaved-files nunsaved-files options quot: ( tu -- ) -- )
566 [ enum>number clang_parseTranslationUnit ] dip
567 keep clang_disposeTranslationUnit ; inline
569 : with-clang-default-translation-unit ( path quot: ( tu path -- ) -- )
571 _ f 0 f 0 CXTranslationUnit_None [
573 ] with-clang-translation-unit
574 ] with-clang-index ; inline
576 : with-clang-cursor ( path quot: ( tu path cursor -- ) -- )
578 _ f 0 f 0 CXTranslationUnit_None [
579 _ over clang_getTranslationUnitCursor @
580 ] with-clang-translation-unit
581 ] with-clang-index ; inline
583 : parse-c-exports ( path -- )
585 2nip cursor-visitor f clang_visitChildren drop
586 ] with-clang-cursor ;
588 : write-c-defs ( clang-state -- )
592 [ def>out-forms ] each
595 [ members [ length ] inv-sort-by ] assoc-map
596 ] change-out-forms-by-name
598 sort-keys values [ print ] each
601 : parse-include ( path -- libclang-state )
602 <libclang-state> clang-state [
605 ] with-output-global-variable dup write-c-defs ; inline