1 ! Copyright (C) 2008 Slava Pestov.
2 ! See https://factorcode.org/license.txt for BSD license.
4 USING: accessors ascii assocs combinators
5 combinators.short-circuit formatting kernel make math namespaces
6 regexp regexp.parser sequences splitting strings
7 xmode.marker.state xmode.rules xmode.tokens xmode.utilities ;
11 ! Next two words copied from parser-combinators
12 ! Just like head?, but they optionally ignore case
14 : string= ( str1 str2 ignore-case -- ? )
15 [ [ >upper ] bi@ ] when sequence= ;
17 : string-head? ( str1 str2 ignore-case -- ? )
22 [ length head-slice ] 2bi
26 ! Based on org.gjt.sp.jedit.syntax.TokenMarker
28 : current-keyword ( -- string )
29 last-offset get position get line get subseq ;
31 : keyword-number? ( keyword -- ? )
33 [ current-rule-set highlight-digits?>> ]
34 [ dup [ digit? ] any? ]
37 current-rule-set digit-re>>
38 [ dupd matches? ] [ f ] if*
43 : mark-number ( keyword -- id )
44 keyword-number? DIGIT and ;
46 : mark-keyword ( keyword -- id )
47 current-rule-set keywords>> at ;
49 : add-remaining-token ( -- )
50 current-rule-set default>> prev-token, ;
54 dup mark-number [ ] [ mark-keyword ] ?if
55 [ prev-token, ] when* ;
57 : current-char ( -- char )
58 position get line get nth ;
60 GENERIC: match-position ( rule -- n )
62 M: mark-previous-rule match-position drop last-offset get ;
64 M: rule match-position drop position get ;
66 : can-match-here? ( matcher rule -- ? )
69 [ over at-line-start?>> over zero? implies ]
70 [ over at-whitespace-end?>> over whitespace-end get = implies ]
71 [ over at-word-start?>> over last-offset get = implies ]
74 : rest-of-line ( -- str )
75 line get position get tail-slice ;
77 : match-start ( string regexp -- slice/f )
78 first-match dup [ dup from>> 0 = [ drop f ] unless ] when ;
80 GENERIC: text-matches? ( string text -- match-count/f )
85 M: string-matcher text-matches?
86 [ string>> ] [ ignore-case?>> ] bi
87 [ string-head? ] keepd length and ;
89 M: regexp text-matches?
90 [ >string ] dip match-start dup [ to>> ] when ;
94 ! XXX: Terrible inefficient regexp match group support
96 ! XXX: support named-capturing groups?
98 : group-start ( i raw -- n/f )
99 [ CHAR: ( -rot index-from ] keep 2dup
100 { [ drop ] [ [ 1 + ] dip ?nth CHAR: ? = ] } 2&&
101 [ [ 1 + ] dip group-start ] [ drop ] if ;
103 : nth-group-start ( n raw -- n )
104 [ -1 ] 2dip '[ dup [ 1 + _ group-start ] when ] times ;
106 : matching-paren ( str -- to )
115 : nth-group ( n raw -- before nth )
116 [ nth-group-start ] keep swap cut dup matching-paren 1 + head ;
118 : match-group-regexp ( regexp n -- skip-regexp match-regexp )
119 [ [ options>> options>string ] [ raw>> ] bi ] dip swap
120 nth-group rot '[ _ H{ } [ <optioned-regexp> ] 2cache ] bi@ ;
122 : skip-first-match ( match regexp -- tailseq )
123 [ >string ] dip first-match [ seq>> ] [ to>> ] bi tail ;
125 : nth-match ( match regexp n -- slice/f )
126 match-group-regexp [ skip-first-match ] [ match-start ] bi* ;
128 : update-match-groups ( str match regexp -- str' )
129 pick CHAR: $ swap index [
130 R/ [$]\d/ [ second CHAR: 0 - nth-match ] 2with re-replace-with
133 GENERIC: fixup-end ( match regexp end -- end' )
135 M: string-matcher fixup-end
136 [ string>> -rot update-match-groups ]
137 [ ignore-case?>> ] bi <string-matcher> ;
139 MEMO: <fixup-regexp> ( raw matched options -- regexp )
141 [ parse-tree>> ] [ options>> ] [ dfa>> ] [ next-match>> ]
142 } cleave regexp boa ;
145 [ raw>> [ -rot update-match-groups ] keep swap ]
146 [ options>> options>string ] bi <fixup-regexp> ;
148 : fixup-end? ( text -- ? )
149 { [ regexp? ] [ 0 swap raw>> group-start ] } 1&& ;
151 : fixup-end/text-matches? ( string regexp rule -- match-count/f )
152 [ >string ] 2dip [ [ match-start dup ] keep ] dip pick [
153 end>> [ [ fixup-end ] change-text drop ] [ 2drop ] if*
156 ] if dup [ to>> ] when ;
160 :: rule-start-matches? ( rule -- match-count/f )
161 rule start>> dup rule can-match-here? [
162 rest-of-line swap text>>
164 rule fixup-end/text-matches?
172 : rule-end-matches? ( rule -- match-count/f )
173 dup mark-following-rule? [
174 [ start>> ] keep can-match-here? 0 and
176 [ end>> dup ] keep can-match-here? [
178 swap text>> context get end>> or
187 : get-always-rules ( ruleset -- vector/f )
190 : get-char-rules ( char ruleset -- vector/f )
191 [ ch>upper ] dip rules>> at ;
193 : get-rules ( char ruleset -- seq )
194 [ get-char-rules ] [ get-always-rules ] bi [ append ] when* ;
196 GENERIC: handle-rule-start ( match-count rule -- )
198 GENERIC: handle-rule-end ( match-count rule -- )
200 : find-escape-rule ( -- rule )
202 in-rule-set>> escape-rule>> [ ] [
203 parent>> in-rule-set>>
204 dup [ escape-rule>> ] when
207 : check-escape-rule ( rule -- ? )
208 escape-rule>> [ find-escape-rule ] unless*
210 dup rule-start-matches? [
211 swap handle-rule-start
212 delegate-end-escaped? toggle
219 : check-every-rule ( -- ? )
220 current-char current-rule-set get-rules
221 [ rule-start-matches? ] map-find
222 [ handle-rule-start t ] [ drop f ] if* ;
226 dup rule-end-matches?
227 [ swap handle-rule-end ] [ drop ] if*
230 : rule-match-token* ( rule -- id )
232 { f [ dup body-token>> ] }
233 { t [ current-rule-set default>> ] }
237 M: escape-rule handle-rule-start
240 process-escape? get [
242 position [ + ] change
245 M: seq-rule handle-rule-start
249 [ body-token>> next-token, ] keep
250 delegate>> [ push-context ] when* ;
252 UNION: abstract-span-rule span-rule eol-span-rule ;
254 M: abstract-span-rule handle-rule-start
258 [ rule-match-token* next-token, ] keep
260 dup context get in-rule<<
261 delegate>> push-context ;
263 M: span-rule handle-rule-end
266 M: mark-following-rule handle-rule-start
268 mark-token add-remaining-token
269 [ rule-match-token* next-token, ] keep
271 context get in-rule<< ;
273 M: mark-following-rule handle-rule-end
274 nip rule-match-token* prev-token,
275 f context get in-rule<< ;
277 M: mark-previous-rule handle-rule-start
280 dup body-token>> prev-token,
281 rule-match-token* next-token, ;
289 : check-end-delegate ( -- ? )
290 context get parent>> [
292 dup rule-end-matches? [
298 ] keep context get parent>> in-rule>>
299 rule-match-token* next-token,
301 seen-whitespace-end? on t
302 ] [ check-escape-rule ] if*
306 : handle-no-word-break ( -- )
307 context get parent>> [
309 dup no-word-break?>> [
310 rule-match-token* prev-token,
320 add-remaining-token ;
322 : (check-word-break) ( -- )
325 1 current-rule-set default>> next-token, ;
327 : rule-set-empty? ( ruleset -- ? )
328 [ rules>> ] [ keywords>> ] bi
329 [ assoc-empty? ] both? ;
331 : check-word-break ( -- ? )
332 current-char dup blank? [
335 seen-whitespace-end? get [
336 position get 1 + whitespace-end set
342 ! Micro-optimization with incorrect semantics; we keep
343 ! it here because jEdit mode files depend on it now...
344 current-rule-set rule-set-empty? [
350 current-rule-set rule-set-no-word-sep* member? [
356 seen-whitespace-end? on
359 delegate-end-escaped? off t ;
362 : mark-token-loop ( -- )
363 position get line get length < [
365 [ check-end-delegate ]
374 : mark-remaining ( -- )
375 line get length position set
378 : unwind-no-line-break ( -- )
379 context get parent>> [
388 : tokenize-line ( line-context line rules -- line-context' seq )