1 ! Copyright (C) 2008 Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
4 USING: kernel namespaces make xmode.rules xmode.tokens
5 xmode.marker.state xmode.marker.context xmode.utilities
6 xmode.catalog sequences math assocs combinators strings
7 regexp splitting ascii parser-combinators regexp.backend
8 ascii combinators.short-circuit accessors ;
9 ! parser-combinators is for the string-head? word
10 ! regexp.backend is for the regexp class
12 ! Based on org.gjt.sp.jedit.syntax.TokenMarker
14 : current-keyword ( -- string )
15 last-offset get position get line get subseq ;
17 : keyword-number? ( keyword -- ? )
19 [ current-rule-set highlight-digits?>> ]
20 [ dup [ digit? ] any? ]
23 current-rule-set digit-re>>
24 dup [ dupd matches? ] [ drop f ] if
29 : mark-number ( keyword -- id )
30 keyword-number? DIGIT and ;
32 : mark-keyword ( keyword -- id )
33 current-rule-set keywords>> at ;
35 : add-remaining-token ( -- )
36 current-rule-set default>> prev-token, ;
40 dup mark-number [ ] [ mark-keyword ] ?if
41 [ prev-token, ] when* ;
43 : current-char ( -- char )
44 position get line get nth ;
46 GENERIC: match-position ( rule -- n )
48 M: mark-previous-rule match-position drop last-offset get ;
50 M: rule match-position drop position get ;
52 : can-match-here? ( matcher rule -- ? )
55 [ over at-line-start?>> over zero? implies ]
56 [ over at-whitespace-end?>> over whitespace-end get = implies ]
57 [ over at-word-start?>> over last-offset get = implies ]
60 : rest-of-line ( -- str )
61 line get position get tail-slice ;
63 GENERIC: text-matches? ( string text -- match-count/f )
68 M: string-matcher text-matches?
70 [ string>> ] [ ignore-case?>> ] bi string-head?
71 ] keep string>> length and ;
73 M: regexp text-matches?
74 [ >string ] dip match-head ;
76 : rule-start-matches? ( rule -- match-count/f )
77 dup start>> tuck swap can-match-here? [
78 rest-of-line swap text>> text-matches?
83 : rule-end-matches? ( rule -- match-count/f )
84 dup mark-following-rule? [
85 dup start>> swap can-match-here? 0 and
87 dup end>> tuck swap can-match-here? [
89 swap text>> context get end>> or
98 : get-always-rules ( vector/f ruleset -- vector/f )
99 f swap rules>> at ?push-all ;
101 : get-char-rules ( vector/f char ruleset -- vector/f )
102 [ ch>upper ] dip rules>> at ?push-all ;
104 : get-rules ( char ruleset -- seq )
105 [ f ] 2dip [ get-char-rules ] keep get-always-rules ;
107 GENERIC: handle-rule-start ( match-count rule -- )
109 GENERIC: handle-rule-end ( match-count rule -- )
111 : find-escape-rule ( -- rule )
113 in-rule-set>> escape-rule>> [ ] [
114 parent>> in-rule-set>>
115 dup [ escape-rule>> ] when
118 : check-escape-rule ( rule -- ? )
120 find-escape-rule dup [
121 dup rule-start-matches? dup [
122 swap handle-rule-start
123 delegate-end-escaped? [ not ] change
131 : check-every-rule ( -- ? )
132 current-char current-rule-set get-rules
133 [ rule-start-matches? ] map-find
134 dup [ handle-rule-start t ] [ 2drop f ] if ;
138 dup rule-end-matches?
139 dup [ swap handle-rule-end ] [ 2drop ] if
142 : rule-match-token* ( rule -- id )
144 { f [ dup body-token>> ] }
145 { t [ current-rule-set default>> ] }
149 M: escape-rule handle-rule-start
152 process-escape? get [
153 escaped? [ not ] change
154 position [ + ] change
157 M: seq-rule handle-rule-start
161 tuck body-token>> next-token,
162 delegate>> [ push-context ] when* ;
164 UNION: abstract-span-rule span-rule eol-span-rule ;
166 M: abstract-span-rule handle-rule-start
170 tuck rule-match-token* next-token,
172 dup context get (>>in-rule)
173 delegate>> push-context ;
175 M: span-rule handle-rule-end
178 M: mark-following-rule handle-rule-start
180 mark-token add-remaining-token
181 tuck rule-match-token* next-token,
182 f context get (>>end)
183 context get (>>in-rule) ;
185 M: mark-following-rule handle-rule-end
186 nip rule-match-token* prev-token,
187 f context get (>>in-rule) ;
189 M: mark-previous-rule handle-rule-start
192 dup body-token>> prev-token,
193 rule-match-token* next-token, ;
201 : check-end-delegate ( -- ? )
202 context get parent>> [
204 dup rule-end-matches? dup [
210 ] keep context get parent>> in-rule>>
211 rule-match-token* next-token,
213 seen-whitespace-end? on t
214 ] [ drop check-escape-rule ] if
218 : handle-no-word-break ( -- )
219 context get parent>> [
221 dup no-word-break?>> [
222 rule-match-token* prev-token,
232 add-remaining-token ;
234 : (check-word-break) ( -- )
237 1 current-rule-set default>> next-token, ;
239 : rule-set-empty? ( ruleset -- ? )
240 [ rules>> ] [ keywords>> ] bi
241 [ assoc-empty? ] bi@ and ;
243 : check-word-break ( -- ? )
244 current-char dup blank? [
247 seen-whitespace-end? get [
248 position get 1+ whitespace-end set
254 ! Micro-optimization with incorrect semantics; we keep
255 ! it here because jEdit mode files depend on it now...
256 current-rule-set rule-set-empty? [
262 current-rule-set rule-set-no-word-sep* member? [
268 seen-whitespace-end? on
271 delegate-end-escaped? off t ;
274 : mark-token-loop ( -- )
275 position get line get length < [
277 [ check-end-delegate ]
286 : mark-remaining ( -- )
287 line get length position set
290 : unwind-no-line-break ( -- )
291 context get parent>> [
300 : tokenize-line ( line-context line rules -- line-context' seq )