! Copyright (C) 2022 CapitalEx
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays assocs compiler.units continuations
-formatting hash-sets hashtables io io.encodings.utf8 io.files
+USING: accessors arrays assocs compiler.units formatting
+hash-sets hashtables io io.encodings.utf8 io.files
kernel namespaces regexp sequences sequences.deep sets sorting
splitting unicode vocabs vocabs.loader ;
FROM: namespaces => set ;
<PRIVATE
SYMBOL: old-dictionary
+SYMBOL: LINT-VOCABS-REGEX
+
+! Cache regular expression to avoid compile time slowdowns
+"CHAR:\\s+\\S+\\s+|\"(\\\\\\\\|\\\\[\\\\stnrbvf0e\"]|\\\\x[a-fA-F0-9]{2}|\\\\u[a-fA-F0-9]{6}|[^\\\\\"])*\"|R/ (\\\\/|[^/])*/|\\\\\\s+(USE:|USING:)|POSTPONE:\\s+(USE:|USING:)|(?<!\\S+)! [^\n]*" <regexp>
+LINT-VOCABS-REGEX set-global
: save-dictionary ( -- )
dictionary get clone
: find-import-statements ( string -- seq )
"USING: [^;]+ ;|USE: \\S+" <regexp> all-matching-subseqs ;
-: clean-up-source ( string -- string )
- "\"(\\\"|[^\"]*)\"|(R/ (\\\\/|[^/])*/)|\\\\\\s+\\S+|POSTPONE: \\S+|! ([^\n])*" <regexp> "" re-replace ;
+: clean-up-source ( string -- string )
+ LINT-VOCABS-REGEX get-global "" re-replace ;
: strip-syntax ( seq -- seq )
[ "USING: | ;|USE: " <regexp> " " re-replace ] map ;