extra/lint/vocabs/vocabs.factor

   1 ! Copyright (C) 2022 CapitalEx
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 USING: accessors arrays assocs compiler.units formatting
   4 hash-sets hashtables io io.encodings.utf8 io.files
   5 kernel namespaces regexp sequences sequences.deep sets sorting
   6 splitting unicode vocabs vocabs.loader ;
   7 FROM: namespaces => set ;
   8 IN: lint.vocabs
   9
  10 <PRIVATE
  11 SYMBOL: old-dictionary
  12 SYMBOL: LINT-VOCABS-REGEX
  13
  14 ! Cache regular expression to avoid compile time slowdowns
  15 "CHAR:\\s+\\S+\\s+|\"(\\\\\\\\|\\\\[\\\\stnrbvf0e\"]|\\\\x[a-fA-F0-9]{2}|\\\\u[a-fA-F0-9]{6}|[^\\\\\"])*\"|R/ (\\\\/|[^/])*/|\\\\\\s+(USE:|USING:)|POSTPONE:\\s+(USE:|USING:)|(?<!\\S+)! [^\n]*" <regexp>
  16 LINT-VOCABS-REGEX set-global
  17
  18 : save-dictionary ( -- )
  19     dictionary     get clone
  20     old-dictionary set       ;
  21
  22 : restore-dictionary ( -- )
  23     dictionary     get keys >hash-set
  24     old-dictionary get keys >hash-set
  25     diff members [ [ forget-vocab ] each ] with-compilation-unit ;
  26
  27 : vocab-loaded? ( name -- ? )
  28     dictionary get key? ;
  29
  30 : (get-words) ( name -- vocab )
  31     dup load-vocab words>> keys 2array ;
  32
  33 : no-vocab-found ( name -- empty )
  34     { } 2array ;
  35
  36 : nl>space ( string -- string )
  37     "\n" " " replace ;
  38
  39 : find-import-statements ( string -- seq )
  40     "USING: [^;]+ ;|USE: \\S+" <regexp> all-matching-subseqs ;
  41
  42 : clean-up-source ( string -- string )
  43     LINT-VOCABS-REGEX get-global "" re-replace ;
  44
  45 : strip-syntax ( seq -- seq )
  46     [ "USING: | ;|USE: " <regexp> " " re-replace ] map ;
  47
  48 : split-when-blank ( string -- seq )
  49     [ blank? ] split-when ;
  50
  51 : split-words ( line -- words )
  52     [ split-when-blank ] map flatten harvest ;
  53
  54 : get-unique-words ( seq -- hash-set )
  55     harvest split-words >hash-set ;
  56
  57 : [is-used?] ( hash-set  -- quot )
  58     '[ nip [ _ in? ] any? ] ; inline
  59
  60 : reject-unused-vocabs ( assoc hash-set -- seq )
  61     [is-used?] assoc-reject keys ;
  62
  63 : print-unused-vocabs ( name seq -- )
  64     swap "The following vocabs are unused in %s: \n" printf
  65         [ "    - " prepend print ] each ;
  66
  67 : print-no-unused-vocabs ( name _ -- )
  68     drop "No unused vocabs found in %s.\n" printf ;
  69
  70 PRIVATE>
  71
  72 : get-words ( name -- assoc )
  73     dup vocab-exists?
  74         [ (get-words) ]
  75         [ no-vocab-found ] if ;
  76
  77 : get-vocabs ( string -- seq )
  78     nl>space find-import-statements strip-syntax split-words harvest ;
  79
  80 : get-imported-words ( string -- hashtable )
  81     save-dictionary
  82         get-vocabs [ get-words ] map >hashtable
  83     restore-dictionary
  84     ;
  85
  86 : find-unused-in-string ( string -- seq )
  87     clean-up-source
  88     [ get-imported-words ] [ "\n" split get-unique-words ] bi
  89     reject-unused-vocabs natural-sort ; inline
  90
  91 : find-unused-in-file ( path -- seq )
  92     utf8 file-contents find-unused-in-string ;
  93
  94 : find-unused ( name -- seq )
  95     vocab-source-path dup [ find-unused-in-file ] when ;
  96
  97 : find-unused. ( name -- )
  98     dup find-unused dup empty?
  99         [ print-no-unused-vocabs ]
 100            [ print-unused-vocabs ] if ;