1 ! Copyright (C) 2022 CapitalEx
2 ! See https://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays assocs combinators
4 combinators.short-circuit compiler.units formatting hash-sets
5 hashtables io io.encodings.utf8 io.files io.styles kernel
6 namespaces sequences sequences.parser sets sorting strings
7 unicode vectors vocabs vocabs.loader vocabs.prettyprint
8 vocabs.prettyprint.private ;
9 FROM: namespaces => set ;
13 SYMBOL: old-dictionary
16 ! Words for working with the dictionary.
17 : save-dictionary ( -- )
21 : restore-dictionary ( -- )
22 dictionary get keys >hash-set
23 old-dictionary get keys >hash-set
24 diff members [ [ forget-vocab ] each ] with-compilation-unit ;
26 : vocab-loaded? ( name -- ? )
31 : tokenize ( string -- sequence-parser )
34 : skip-after ( sequence-parser seq -- sequence-parser )
35 [ take-until-sequence* drop ] curry keep ;
37 : skip-after* ( sequence-parser object -- sequence-parser )
38 [ take-until-object drop ] curry keep ;
40 : next-line ( sequence-parser -- sequence-parser )
43 : quotation-mark? ( token -- ? )
46 : comment? ( token -- ? )
49 : string-literal? ( token -- ? )
53 ! Words for parsing tokens
56 : reject-token ( sequence-parser token -- string )
57 drop next-line next-token ;
59 : accept-token ( sequence-parser token -- string )
62 : get-token ( sequence-parser -- token )
63 skip-whitespace [ current blank? ] take-until ;
65 : next-token ( sequence-parser -- string )
66 dup get-token dup comment?
70 : skip-token ( sequence-parser -- sequence-parser )
74 ! Words for removing syntax that should be ignored
75 : ends-with-quote? ( token -- ? )
76 2 tail* [ first CHAR: \ = not ]
77 [ second CHAR: " = ] bi and ;
79 : end-string? ( token -- ? )
80 dup length 1 = [ quotation-mark? ] [ ends-with-quote? ] if ;
82 : skip-string ( sequence-parser string -- sequence-parser )
83 end-string? not [ dup next-token skip-string ] when ;
85 : ?handle-string ( sequence-parser string -- sequence-parser string/f )
86 dup { [ empty? not ] [ string-literal? ] } 1&& [ skip-string f ] when ;
88 : next-word/f ( sequence-parser -- sequence-parser string/f )
90 ! skip over empty tokens
94 { "FROM:" [ ";" skip-after f ] }
95 { "SYMBOLS:" [ ";" skip-after f ] }
96 { "R/" [ "/" skip-after f ] }
97 { "(" [ ")" skip-after f ] }
98 { "IN:" [ skip-token f ] }
99 { "SYMBOL:" [ skip-token f ] }
100 { ":" [ skip-token f ] }
101 { "POSTPONE:" [ skip-token f ] }
102 { "\\" [ skip-token f ] }
103 { "CHAR:" [ skip-token f ] }
106 { "!" [ next-line f ] }
107 { "((" [ "))" skip-after f ] }
108 { "/*" [ "*/" skip-after f ] }
109 { "![[" [ "]]" skip-after f ] }
110 { "![=[" [ "]=]" skip-after f ] }
111 { "![==[" [ "]==]" skip-after f ] }
112 { "![===[" [ "]===]" skip-after f ] }
113 { "![====[" [ "]====]" skip-after f ] }
114 { "![=====[" [ "]=====]" skip-after f ] }
115 { "![======[" [ "]======]" skip-after f ] }
117 ! strings (special case needed for `"`)
118 { "STRING:" [ ";" skip-after f ] }
119 { "[[" [ "]]" skip-after f ] }
120 { "[=[" [ "]=]" skip-after f ] }
121 { "[==[" [ "]==]" skip-after f ] }
122 { "[===[" [ "]===]" skip-after f ] }
123 { "[====[" [ "]====]" skip-after f ] }
124 { "[=====[" [ "]=====]" skip-after f ] }
125 { "[======[" [ "]======]" skip-after f ] }
128 { "EBNF[[" [ "]]" skip-after f ] }
129 { "EBNF[=[" [ "]=]" skip-after f ] }
130 { "EBNF[==[" [ "]==]" skip-after f ] }
131 { "EBNF[===[" [ "]===]" skip-after f ] }
132 { "EBNF[====[" [ "]====]" skip-after f ] }
133 { "EBNF[=====[" [ "]=====]" skip-after f ] }
134 { "EBNF[======[" [ "]======]" skip-after f ] }
137 { "!AUTHOR" [ next-line f ] }
138 { "!BROKEN" [ next-line f ] }
139 { "!BUG" [ next-line f ] }
140 { "!FIXME" [ next-line f ] }
141 { "!LICENSE" [ next-line f ] }
142 { "!LOL" [ next-line f ] }
143 { "!NOTE" [ next-line f ] }
144 { "!REVIEW" [ next-line f ] }
145 { "!TODO" [ next-line f ] }
146 { "!XXX" [ next-line f ] }
150 } case ?handle-string ;
152 : ?push ( vector sequence-parser string/? -- vector sequence-parser )
153 [ [ swap [ push ] keep ] curry dip ] when* ;
155 : ?keep-parsing-with ( vector sequence-parser quot -- vector )
156 [ dup sequence-parse-end? not ] dip
157 [ call( x x -- x ) ] curry [ drop ] if ;
159 : (strip-code) ( vector sequence-praser -- vector )
160 skip-whitespace next-word/f ?push
161 [ (strip-code) ] ?keep-parsing-with harvest ;
163 : strip-code ( string -- string )
164 tokenize V{ } clone swap (strip-code) ;
167 ! Words for finding the words used in a program
168 ! and stripping out import statements
169 : skip-imports ( sequence-parser -- sequence-parser string/? )
171 { "USING:" [ ";" skip-after* f ] }
172 { "USE:" [ advance f ] }
176 : take-imports ( sequence-parser -- vector )
178 { "USING:" [ ";" take-until-object ] }
179 { "USE:" [ 1 take-n ] }
183 : (find-used-words) ( vector sequence-parser -- vector )
184 skip-imports ?push [ (find-used-words) ] ?keep-parsing-with ;
186 : find-used-words ( vector -- set )
187 <sequence-parser> V{ } clone swap (find-used-words) fast-set ;
189 : (find-imports) ( vector sequence-parser -- vector )
190 dup take-imports rot prepend swap [ (find-imports) ] ?keep-parsing-with ;
192 : find-imports ( vector -- seq )
193 <sequence-parser> V{ } clone swap (find-imports) dup cache set ;
195 : (get-words) ( name -- vocab )
196 dup load-vocab words>> keys 2array ;
198 : no-vocab-found ( name -- empty )
201 : reject-unused-vocabs ( assoc hash-set -- seq )
202 '[ [ _ in? ] any? ] reject-values keys ;
204 :: print-new-header ( seq -- )
205 "Use the following header to remove unused imports: " print
206 manifest-style [ cache get seq diff pprint-using ] with-nesting ;
208 :: print-unused-vocabs ( name seq -- )
209 name "The following vocabs are unused in %s: \n" printf
210 seq [ " - " prepend print ] each
215 : print-no-unused-vocabs ( name _ -- )
216 drop "No unused vocabs found in %s.\n" printf ;
219 ! Private details for fetching words and imports
220 : get-words ( name -- assoc )
221 dup vocab-exists? [ (get-words) ] [ no-vocab-found ] if ;
223 : get-imported-words ( string -- hashtable )
225 find-imports [ get-words ] map >hashtable
230 : find-unused-in-string ( string -- seq )
231 strip-code [ get-imported-words ] [ find-used-words ] bi
232 reject-unused-vocabs sort ;
234 : find-unused-in-file ( path -- seq )
235 utf8 file-contents find-unused-in-string ;
237 : find-unused ( name -- seq )
238 vocab-source-path dup [ find-unused-in-file ] when ;
240 : find-unused. ( name -- )
241 dup find-unused dup empty?
242 [ print-no-unused-vocabs ]
243 [ print-unused-vocabs ] if ;