basis/xmode/marker/marker.factor

   1 ! Copyright (C) 2008 Slava Pestov.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 USING: kernel namespaces make xmode.rules xmode.tokens
   4 xmode.marker.state xmode.marker.context xmode.utilities
   5 xmode.catalog sequences math assocs combinators strings
   6 regexp splitting unicode.case
   7 combinators.short-circuit accessors ;
   8 IN: xmode.marker
   9
  10 ! Next two words copied from parser-combinators
  11 ! Just like head?, but they optionally ignore case
  12
  13 : string= ( str1 str2 ignore-case -- ? )
  14     [ [ >upper ] bi@ ] when sequence= ;
  15
  16 : string-head? ( str1 str2 ignore-case -- ? )
  17     2over shorter?
  18     [ 3drop f ] [
  19         [
  20             [ nip ]
  21             [ length head-slice ] 2bi
  22         ] dip string=
  23     ] if ;
  24
  25 ! Based on org.gjt.sp.jedit.syntax.TokenMarker
  26
  27 : current-keyword ( -- string )
  28     last-offset get position get line get subseq ;
  29
  30 : keyword-number? ( keyword -- ? )
  31     {
  32         [ current-rule-set highlight-digits?>> ]
  33         [ dup [ digit? ] any? ]
  34         [
  35             dup [ digit? ] all? [
  36                 current-rule-set digit-re>>
  37                 dup [ dupd matches? ] [ drop f ] if
  38             ] unless*
  39         ]
  40     } 0&& nip ;
  41
  42 : mark-number ( keyword -- id )
  43     keyword-number? DIGIT and ;
  44
  45 : mark-keyword ( keyword -- id )
  46     current-rule-set keywords>> at ;
  47
  48 : add-remaining-token ( -- )
  49     current-rule-set default>> prev-token, ;
  50
  51 : mark-token ( -- )
  52     current-keyword
  53     dup mark-number [ ] [ mark-keyword ] ?if
  54     [ prev-token, ] when* ;
  55
  56 : current-char ( -- char )
  57     position get line get nth ;
  58
  59 GENERIC: match-position ( rule -- n )
  60
  61 M: mark-previous-rule match-position drop last-offset get ;
  62
  63 M: rule match-position drop position get ;
  64
  65 : can-match-here? ( matcher rule -- ? )
  66     match-position {
  67         [ over ]
  68         [ over at-line-start?>>     over zero?                implies ]
  69         [ over at-whitespace-end?>> over whitespace-end get = implies ]
  70         [ over at-word-start?>>     over last-offset get =    implies ]
  71     } 0&& 2nip ;
  72
  73 : rest-of-line ( -- str )
  74     line get position get tail-slice ;
  75
  76 GENERIC: text-matches? ( string text -- match-count/f )
  77
  78 M: f text-matches?
  79     2drop f ;
  80
  81 M: string-matcher text-matches?
  82     [
  83         [ string>> ] [ ignore-case?>> ] bi string-head?
  84     ] keep string>> length and ;
  85
  86 M: regexp text-matches?
  87     [ >string ] dip match-head ;
  88
  89 : rule-start-matches? ( rule -- match-count/f )
  90     dup start>> tuck swap can-match-here? [
  91         rest-of-line swap text>> text-matches?
  92     ] [
  93         drop f
  94     ] if ;
  95
  96 : rule-end-matches? ( rule -- match-count/f )
  97     dup mark-following-rule? [
  98         dup start>> swap can-match-here? 0 and
  99     ] [
 100         dup end>> tuck swap can-match-here? [
 101             rest-of-line
 102             swap text>> context get end>> or
 103             text-matches?
 104         ] [
 105             drop f
 106         ] if
 107     ] if ;
 108
 109 DEFER: get-rules
 110
 111 : get-always-rules ( vector/f ruleset -- vector/f )
 112     f swap rules>> at ?push-all ;
 113
 114 : get-char-rules ( vector/f char ruleset -- vector/f )
 115     [ ch>upper ] dip rules>> at ?push-all ;
 116
 117 : get-rules ( char ruleset -- seq )
 118     [ f ] 2dip [ get-char-rules ] keep get-always-rules ;
 119
 120 GENERIC: handle-rule-start ( match-count rule -- )
 121
 122 GENERIC: handle-rule-end ( match-count rule -- )
 123
 124 : find-escape-rule ( -- rule )
 125     context get dup
 126     in-rule-set>> escape-rule>> [ ] [
 127         parent>> in-rule-set>>
 128         dup [ escape-rule>> ] when
 129     ] ?if ;
 130
 131 : check-escape-rule ( rule -- ? )
 132     no-escape?>> [ f ] [
 133         find-escape-rule dup [
 134             dup rule-start-matches? dup [
 135                 swap handle-rule-start
 136                 delegate-end-escaped? [ not ] change
 137                 t
 138             ] [
 139                 2drop f
 140             ] if
 141         ] when
 142     ] if ;
 143
 144 : check-every-rule ( -- ? )
 145     current-char current-rule-set get-rules
 146     [ rule-start-matches? ] map-find
 147     dup [ handle-rule-start t ] [ 2drop f ] if ;
 148
 149 : ?end-rule ( -- )
 150     current-rule [
 151         dup rule-end-matches?
 152         dup [ swap handle-rule-end ] [ 2drop ] if
 153     ] when* ;
 154
 155 : rule-match-token* ( rule -- id )
 156     dup match-token>> {
 157         { f [ dup body-token>> ] }
 158         { t [ current-rule-set default>> ] }
 159         [ ]
 160     } case nip ;
 161
 162 M: escape-rule handle-rule-start
 163     drop
 164     ?end-rule
 165     process-escape? get [
 166         escaped? [ not ] change
 167         position [ + ] change
 168     ] [ drop ] if ;
 169
 170 M: seq-rule handle-rule-start
 171     ?end-rule
 172     mark-token
 173     add-remaining-token
 174     tuck body-token>> next-token,
 175     delegate>> [ push-context ] when* ;
 176
 177 UNION: abstract-span-rule span-rule eol-span-rule ;
 178
 179 M: abstract-span-rule handle-rule-start
 180     ?end-rule
 181     mark-token
 182     add-remaining-token
 183     tuck rule-match-token* next-token,
 184     ! ... end subst ...
 185     dup context get (>>in-rule)
 186     delegate>> push-context ;
 187
 188 M: span-rule handle-rule-end
 189     2drop ;
 190
 191 M: mark-following-rule handle-rule-start
 192     ?end-rule
 193     mark-token add-remaining-token
 194     tuck rule-match-token* next-token,
 195     f context get (>>end)
 196     context get (>>in-rule) ;
 197
 198 M: mark-following-rule handle-rule-end
 199     nip rule-match-token* prev-token,
 200     f context get (>>in-rule) ;
 201
 202 M: mark-previous-rule handle-rule-start
 203     ?end-rule
 204     mark-token
 205     dup body-token>> prev-token,
 206     rule-match-token* next-token, ;
 207
 208 : do-escaped ( -- )
 209     escaped? get [
 210         escaped? off
 211         ! ...
 212     ] when ;
 213
 214 : check-end-delegate ( -- ? )
 215     context get parent>> [
 216         in-rule>> [
 217             dup rule-end-matches? dup [
 218                 [
 219                     swap handle-rule-end
 220                     ?end-rule
 221                     mark-token
 222                     add-remaining-token
 223                 ] keep context get parent>> in-rule>>
 224                 rule-match-token* next-token,
 225                 pop-context
 226                 seen-whitespace-end? on t
 227             ] [ drop check-escape-rule ] if
 228         ] [ f ] if*
 229     ] [ f ] if* ;
 230
 231 : handle-no-word-break ( -- )
 232     context get parent>> [
 233         in-rule>> [
 234             dup no-word-break?>> [
 235                 rule-match-token* prev-token,
 236                 pop-context
 237             ] [ drop ] if
 238         ] when*
 239     ] when* ;
 240
 241 : check-rule ( -- )
 242     ?end-rule
 243     handle-no-word-break
 244     mark-token
 245     add-remaining-token ;
 246
 247 : (check-word-break) ( -- )
 248     check-rule
 249
 250     1 current-rule-set default>> next-token, ;
 251
 252 : rule-set-empty? ( ruleset -- ? )
 253     [ rules>> ] [ keywords>> ] bi
 254     [ assoc-empty? ] bi@ and ;
 255
 256 : check-word-break ( -- ? )
 257     current-char dup blank? [
 258         drop
 259
 260         seen-whitespace-end? get [
 261             position get 1+ whitespace-end set
 262         ] unless
 263
 264         (check-word-break)
 265
 266     ] [
 267         ! Micro-optimization with incorrect semantics; we keep
 268         ! it here because jEdit mode files depend on it now...
 269         current-rule-set rule-set-empty? [
 270             drop
 271         ] [
 272             dup alpha? [
 273                 drop
 274             ] [
 275                 current-rule-set rule-set-no-word-sep* member? [
 276                     (check-word-break)
 277                 ] unless
 278             ] if
 279         ] if
 280
 281         seen-whitespace-end? on
 282     ] if
 283     escaped? off
 284     delegate-end-escaped? off t ;
 285
 286
 287 : mark-token-loop ( -- )
 288     position get line get length < [
 289         {
 290             [ check-end-delegate ]
 291             [ check-every-rule ]
 292             [ check-word-break ]
 293         } 0|| drop
 294
 295         position inc
 296         mark-token-loop
 297     ] when ;
 298
 299 : mark-remaining ( -- )
 300     line get length position set
 301     check-rule ;
 302
 303 : unwind-no-line-break ( -- )
 304     context get parent>> [
 305         in-rule>> [
 306             no-line-break?>> [
 307                 pop-context
 308                 unwind-no-line-break
 309             ] when
 310         ] when*
 311     ] when* ;
 312
 313 : tokenize-line ( line-context line rules -- line-context' seq )
 314     [
 315         "MAIN" swap at -rot
 316         init-token-marker
 317         mark-token-loop
 318         mark-remaining
 319         unwind-no-line-break
 320         context get
 321     ] { } make ;