1 ! Copyright (C) 2008 Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
4 USING: kernel namespaces make xmode.rules xmode.tokens
5 xmode.marker.state xmode.marker.context xmode.utilities
6 xmode.catalog sequences math assocs combinators
7 strings parser-combinators.regexp regexp splitting
8 parser-combinators ascii unicode.case
9 combinators.short-circuit accessors ;
11 ! Based on org.gjt.sp.jedit.syntax.TokenMarker
13 : current-keyword ( -- string )
14 last-offset get position get line get subseq ;
16 : keyword-number? ( keyword -- ? )
18 [ current-rule-set highlight-digits?>> ]
19 [ dup [ digit? ] contains? ]
22 current-rule-set digit-re>>
23 dup [ dupd matches? ] [ drop f ] if
28 : mark-number ( keyword -- id )
29 keyword-number? DIGIT and ;
31 : mark-keyword ( keyword -- id )
32 current-rule-set keywords>> at ;
34 : add-remaining-token ( -- )
35 current-rule-set default>> prev-token, ;
39 dup mark-number [ ] [ mark-keyword ] ?if
40 [ prev-token, ] when* ;
42 : current-char ( -- char )
43 position get line get nth ;
45 GENERIC: match-position ( rule -- n )
47 M: mark-previous-rule match-position drop last-offset get ;
49 M: rule match-position drop position get ;
51 : can-match-here? ( matcher rule -- ? )
54 [ over at-line-start?>> over zero? implies ]
55 [ over at-whitespace-end?>> over whitespace-end get = implies ]
56 [ over at-word-start?>> over last-offset get = implies ]
59 : rest-of-line ( -- str )
60 line get position get tail-slice ;
62 GENERIC: text-matches? ( string text -- match-count/f )
67 M: string-matcher text-matches?
69 [ string>> ] [ ignore-case?>> ] bi string-head?
70 ] keep string>> length and ;
72 M: regexp text-matches?
73 >r >string r> match-head ;
75 : rule-start-matches? ( rule -- match-count/f )
76 dup start>> tuck swap can-match-here? [
77 rest-of-line swap text>> text-matches?
82 : rule-end-matches? ( rule -- match-count/f )
83 dup mark-following-rule? [
84 dup start>> swap can-match-here? 0 and
86 dup end>> tuck swap can-match-here? [
88 swap text>> context get end>> or
97 : get-always-rules ( vector/f ruleset -- vector/f )
98 f swap rules>> at ?push-all ;
100 : get-char-rules ( vector/f char ruleset -- vector/f )
101 >r ch>upper r> rules>> at ?push-all ;
103 : get-rules ( char ruleset -- seq )
104 f -rot [ get-char-rules ] keep get-always-rules ;
106 GENERIC: handle-rule-start ( match-count rule -- )
108 GENERIC: handle-rule-end ( match-count rule -- )
110 : find-escape-rule ( -- rule )
112 in-rule-set>> escape-rule>> [ ] [
113 parent>> in-rule-set>>
114 dup [ escape-rule>> ] when
117 : check-escape-rule ( rule -- ? )
119 find-escape-rule dup [
120 dup rule-start-matches? dup [
121 swap handle-rule-start
122 delegate-end-escaped? [ not ] change
130 : check-every-rule ( -- ? )
131 current-char current-rule-set get-rules
132 [ rule-start-matches? ] map-find
133 dup [ handle-rule-start t ] [ 2drop f ] if ;
137 dup rule-end-matches?
138 dup [ swap handle-rule-end ] [ 2drop ] if
141 : rule-match-token* ( rule -- id )
143 { f [ dup body-token>> ] }
144 { t [ current-rule-set default>> ] }
148 M: escape-rule handle-rule-start
151 process-escape? get [
152 escaped? [ not ] change
153 position [ + ] change
156 M: seq-rule handle-rule-start
160 tuck body-token>> next-token,
161 delegate>> [ push-context ] when* ;
163 UNION: abstract-span-rule span-rule eol-span-rule ;
165 M: abstract-span-rule handle-rule-start
169 tuck rule-match-token* next-token,
171 dup context get (>>in-rule)
172 delegate>> push-context ;
174 M: span-rule handle-rule-end
177 M: mark-following-rule handle-rule-start
179 mark-token add-remaining-token
180 tuck rule-match-token* next-token,
181 f context get (>>end)
182 context get (>>in-rule) ;
184 M: mark-following-rule handle-rule-end
185 nip rule-match-token* prev-token,
186 f context get (>>in-rule) ;
188 M: mark-previous-rule handle-rule-start
191 dup body-token>> prev-token,
192 rule-match-token* next-token, ;
200 : check-end-delegate ( -- ? )
201 context get parent>> [
203 dup rule-end-matches? dup [
209 ] keep context get parent>> in-rule>>
210 rule-match-token* next-token,
212 seen-whitespace-end? on t
213 ] [ drop check-escape-rule ] if
217 : handle-no-word-break ( -- )
218 context get parent>> [
220 dup no-word-break?>> [
221 rule-match-token* prev-token,
231 add-remaining-token ;
233 : (check-word-break) ( -- )
236 1 current-rule-set default>> next-token, ;
238 : rule-set-empty? ( ruleset -- ? )
239 [ rules>> ] [ keywords>> ] bi
240 [ assoc-empty? ] bi@ and ;
242 : check-word-break ( -- ? )
243 current-char dup blank? [
246 seen-whitespace-end? get [
247 position get 1+ whitespace-end set
253 ! Micro-optimization with incorrect semantics; we keep
254 ! it here because jEdit mode files depend on it now...
255 current-rule-set rule-set-empty? [
261 current-rule-set rule-set-no-word-sep* member? [
267 seen-whitespace-end? on
270 delegate-end-escaped? off t ;
273 : mark-token-loop ( -- )
274 position get line get length < [
276 [ check-end-delegate ]
285 : mark-remaining ( -- )
286 line get length position set
289 : unwind-no-line-break ( -- )
290 context get parent>> [
299 : tokenize-line ( line-context line rules -- line-context' seq )