extra/lint/vocabs/vocabs.factor

   1 ! Copyright (C) 2022 CapitalEx
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 USING: accessors arrays assocs compiler.units formatting
   4 hash-sets hashtables io io.encodings.utf8 io.files
   5 kernel namespaces regexp sequences sequences.deep sets sorting
   6 splitting unicode vocabs vocabs.loader ;
   7 FROM: namespaces => set ;
   8 IN: lint.vocabs
   9
  10 <PRIVATE
  11 SYMBOL: old-dictionary
  12
  13 : save-dictionary ( -- )
  14     dictionary     get clone
  15     old-dictionary set       ;
  16
  17 : restore-dictionary ( -- )
  18     dictionary     get keys >hash-set
  19     old-dictionary get keys >hash-set
  20     diff members [ [ forget-vocab ] each ] with-compilation-unit ;
  21
  22 : vocab-loaded? ( name -- ? )
  23     dictionary get key? ;
  24
  25 : (get-words) ( name -- vocab )
  26     dup load-vocab words>> keys 2array ;
  27
  28 : no-vocab-found ( name -- empty )
  29     { } 2array ;
  30
  31 : nl>space ( string -- string )
  32     "\n" " " replace ;
  33
  34 : find-import-statements ( string -- seq )
  35     "USING: [^;]+ ;|USE: \\S+" <regexp> all-matching-subseqs ;
  36
  37 : clean-up-source ( string -- string )
  38     "\"(\\\\\"|[^\"])*\"|R/ (\\\\/|[^/])*/|\\\\\\s+(USE:|USING:)|POSTPONE:\\s+(USE:|USING:)|! [^\n]*|CHAR:\\s+\\S+\\s+" <regexp> "" re-replace ;
  39
  40 : strip-syntax ( seq -- seq )
  41     [ "USING: | ;|USE: " <regexp> " " re-replace ] map ;
  42
  43 : split-when-blank ( string -- seq )
  44     [ blank? ] split-when ;
  45
  46 : split-words ( line -- words )
  47     [ split-when-blank ] map flatten harvest ;
  48
  49 : get-unique-words ( seq -- hash-set )
  50     harvest split-words >hash-set ;
  51
  52 : [is-used?] ( hash-set  -- quot )
  53     '[ nip [ _ in? ] any? ] ; inline
  54
  55 : reject-unused-vocabs ( assoc hash-set -- seq )
  56     [is-used?] assoc-reject keys ;
  57
  58 : print-unused-vocabs ( name seq -- )
  59     swap "The following vocabs are unused in %s: \n" printf
  60         [ "    - " prepend print ] each ;
  61
  62 : print-no-unused-vocabs ( name _ -- )
  63     drop "No unused vocabs found in %s.\n" printf ;
  64
  65 PRIVATE>
  66
  67 : get-words ( name -- assoc )
  68     dup vocab-exists?
  69         [ (get-words) ]
  70         [ no-vocab-found ] if ;
  71
  72 : get-vocabs ( string -- seq )
  73     nl>space find-import-statements strip-syntax split-words harvest ;
  74
  75 : get-imported-words ( string -- hashtable )
  76     save-dictionary
  77         get-vocabs [ get-words ] map >hashtable
  78     restore-dictionary
  79     ;
  80
  81 : find-unused-in-string ( string -- seq )
  82     clean-up-source
  83     [ get-imported-words ] [ "\n" split get-unique-words ] bi
  84     reject-unused-vocabs natural-sort ; inline
  85
  86 : find-unused-in-file ( path -- seq )
  87     utf8 file-contents find-unused-in-string ;
  88
  89 : find-unused ( name -- seq )
  90     vocab-source-path dup [ find-unused-in-file ] when ;
  91
  92 : find-unused. ( name -- )
  93     dup find-unused dup empty?
  94         [ print-no-unused-vocabs ]
  95            [ print-unused-vocabs ] if ;