basis/xmode/marker/marker.factor

   1 ! Copyright (C) 2008 Slava Pestov.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 USING: kernel namespaces make xmode.rules xmode.tokens
   4 xmode.marker.state xmode.marker.context xmode.utilities
   5 xmode.catalog sequences math assocs combinators strings
   6 regexp splitting ascii combinators.short-circuit accessors ;
   7 IN: xmode.marker
   8
   9 ! Next two words copied from parser-combinators
  10 ! Just like head?, but they optionally ignore case
  11
  12 : string= ( str1 str2 ignore-case -- ? )
  13     [ [ >upper ] bi@ ] when sequence= ;
  14
  15 : string-head? ( str1 str2 ignore-case -- ? )
  16     2over shorter?
  17     [ 3drop f ] [
  18         [
  19             [ nip ]
  20             [ length head-slice ] 2bi
  21         ] dip string=
  22     ] if ;
  23
  24 ! Based on org.gjt.sp.jedit.syntax.TokenMarker
  25
  26 : current-keyword ( -- string )
  27     last-offset get position get line get subseq ;
  28
  29 : keyword-number? ( keyword -- ? )
  30     {
  31         [ current-rule-set highlight-digits?>> ]
  32         [ dup [ digit? ] any? ]
  33         [
  34             dup [ digit? ] all? [
  35                 current-rule-set digit-re>>
  36                 dup [ dupd matches? ] [ drop f ] if
  37             ] unless*
  38         ]
  39     } 0&& nip ;
  40
  41 : mark-number ( keyword -- id )
  42     keyword-number? DIGIT and ;
  43
  44 : mark-keyword ( keyword -- id )
  45     current-rule-set keywords>> at ;
  46
  47 : add-remaining-token ( -- )
  48     current-rule-set default>> prev-token, ;
  49
  50 : mark-token ( -- )
  51     current-keyword
  52     dup mark-number [ ] [ mark-keyword ] ?if
  53     [ prev-token, ] when* ;
  54
  55 : current-char ( -- char )
  56     position get line get nth ;
  57
  58 GENERIC: match-position ( rule -- n )
  59
  60 M: mark-previous-rule match-position drop last-offset get ;
  61
  62 M: rule match-position drop position get ;
  63
  64 : can-match-here? ( matcher rule -- ? )
  65     match-position {
  66         [ over ]
  67         [ over at-line-start?>>     over zero?                implies ]
  68         [ over at-whitespace-end?>> over whitespace-end get = implies ]
  69         [ over at-word-start?>>     over last-offset get =    implies ]
  70     } 0&& 2nip ;
  71
  72 : rest-of-line ( -- str )
  73     line get position get tail-slice ;
  74
  75 GENERIC: text-matches? ( string text -- match-count/f )
  76
  77 M: f text-matches?
  78     2drop f ;
  79
  80 M: string-matcher text-matches?
  81     [
  82         [ string>> ] [ ignore-case?>> ] bi string-head?
  83     ] keep string>> length and ;
  84
  85 M: regexp text-matches?
  86     [ >string ] dip first-match dup [ to>> ] when ;
  87
  88 : rule-start-matches? ( rule -- match-count/f )
  89     dup start>> tuck swap can-match-here? [
  90         rest-of-line swap text>> text-matches?
  91     ] [
  92         drop f
  93     ] if ;
  94
  95 : rule-end-matches? ( rule -- match-count/f )
  96     dup mark-following-rule? [
  97         dup start>> swap can-match-here? 0 and
  98     ] [
  99         dup end>> tuck swap can-match-here? [
 100             rest-of-line
 101             swap text>> context get end>> or
 102             text-matches?
 103         ] [
 104             drop f
 105         ] if
 106     ] if ;
 107
 108 DEFER: get-rules
 109
 110 : get-always-rules ( vector/f ruleset -- vector/f )
 111     f swap rules>> at ?push-all ;
 112
 113 : get-char-rules ( vector/f char ruleset -- vector/f )
 114     [ ch>upper ] dip rules>> at ?push-all ;
 115
 116 : get-rules ( char ruleset -- seq )
 117     [ f ] 2dip [ get-char-rules ] keep get-always-rules ;
 118
 119 GENERIC: handle-rule-start ( match-count rule -- )
 120
 121 GENERIC: handle-rule-end ( match-count rule -- )
 122
 123 : find-escape-rule ( -- rule )
 124     context get dup
 125     in-rule-set>> escape-rule>> [ ] [
 126         parent>> in-rule-set>>
 127         dup [ escape-rule>> ] when
 128     ] ?if ;
 129
 130 : check-escape-rule ( rule -- ? )
 131     no-escape?>> [ f ] [
 132         find-escape-rule dup [
 133             dup rule-start-matches? dup [
 134                 swap handle-rule-start
 135                 delegate-end-escaped? [ not ] change
 136                 t
 137             ] [
 138                 2drop f
 139             ] if
 140         ] when
 141     ] if ;
 142
 143 : check-every-rule ( -- ? )
 144     current-char current-rule-set get-rules
 145     [ rule-start-matches? ] map-find
 146     dup [ handle-rule-start t ] [ 2drop f ] if ;
 147
 148 : ?end-rule ( -- )
 149     current-rule [
 150         dup rule-end-matches?
 151         dup [ swap handle-rule-end ] [ 2drop ] if
 152     ] when* ;
 153
 154 : rule-match-token* ( rule -- id )
 155     dup match-token>> {
 156         { f [ dup body-token>> ] }
 157         { t [ current-rule-set default>> ] }
 158         [ ]
 159     } case nip ;
 160
 161 M: escape-rule handle-rule-start
 162     drop
 163     ?end-rule
 164     process-escape? get [
 165         escaped? [ not ] change
 166         position [ + ] change
 167     ] [ drop ] if ;
 168
 169 M: seq-rule handle-rule-start
 170     ?end-rule
 171     mark-token
 172     add-remaining-token
 173     tuck body-token>> next-token,
 174     delegate>> [ push-context ] when* ;
 175
 176 UNION: abstract-span-rule span-rule eol-span-rule ;
 177
 178 M: abstract-span-rule handle-rule-start
 179     ?end-rule
 180     mark-token
 181     add-remaining-token
 182     tuck rule-match-token* next-token,
 183     ! ... end subst ...
 184     dup context get (>>in-rule)
 185     delegate>> push-context ;
 186
 187 M: span-rule handle-rule-end
 188     2drop ;
 189
 190 M: mark-following-rule handle-rule-start
 191     ?end-rule
 192     mark-token add-remaining-token
 193     tuck rule-match-token* next-token,
 194     f context get (>>end)
 195     context get (>>in-rule) ;
 196
 197 M: mark-following-rule handle-rule-end
 198     nip rule-match-token* prev-token,
 199     f context get (>>in-rule) ;
 200
 201 M: mark-previous-rule handle-rule-start
 202     ?end-rule
 203     mark-token
 204     dup body-token>> prev-token,
 205     rule-match-token* next-token, ;
 206
 207 : do-escaped ( -- )
 208     escaped? get [
 209         escaped? off
 210         ! ...
 211     ] when ;
 212
 213 : check-end-delegate ( -- ? )
 214     context get parent>> [
 215         in-rule>> [
 216             dup rule-end-matches? dup [
 217                 [
 218                     swap handle-rule-end
 219                     ?end-rule
 220                     mark-token
 221                     add-remaining-token
 222                 ] keep context get parent>> in-rule>>
 223                 rule-match-token* next-token,
 224                 pop-context
 225                 seen-whitespace-end? on t
 226             ] [ drop check-escape-rule ] if
 227         ] [ f ] if*
 228     ] [ f ] if* ;
 229
 230 : handle-no-word-break ( -- )
 231     context get parent>> [
 232         in-rule>> [
 233             dup no-word-break?>> [
 234                 rule-match-token* prev-token,
 235                 pop-context
 236             ] [ drop ] if
 237         ] when*
 238     ] when* ;
 239
 240 : check-rule ( -- )
 241     ?end-rule
 242     handle-no-word-break
 243     mark-token
 244     add-remaining-token ;
 245
 246 : (check-word-break) ( -- )
 247     check-rule
 248
 249     1 current-rule-set default>> next-token, ;
 250
 251 : rule-set-empty? ( ruleset -- ? )
 252     [ rules>> ] [ keywords>> ] bi
 253     [ assoc-empty? ] bi@ and ;
 254
 255 : check-word-break ( -- ? )
 256     current-char dup blank? [
 257         drop
 258
 259         seen-whitespace-end? get [
 260             position get 1+ whitespace-end set
 261         ] unless
 262
 263         (check-word-break)
 264
 265     ] [
 266         ! Micro-optimization with incorrect semantics; we keep
 267         ! it here because jEdit mode files depend on it now...
 268         current-rule-set rule-set-empty? [
 269             drop
 270         ] [
 271             dup alpha? [
 272                 drop
 273             ] [
 274                 current-rule-set rule-set-no-word-sep* member? [
 275                     (check-word-break)
 276                 ] unless
 277             ] if
 278         ] if
 279
 280         seen-whitespace-end? on
 281     ] if
 282     escaped? off
 283     delegate-end-escaped? off t ;
 284
 285
 286 : mark-token-loop ( -- )
 287     position get line get length < [
 288         {
 289             [ check-end-delegate ]
 290             [ check-every-rule ]
 291             [ check-word-break ]
 292         } 0|| drop
 293
 294         position inc
 295         mark-token-loop
 296     ] when ;
 297
 298 : mark-remaining ( -- )
 299     line get length position set
 300     check-rule ;
 301
 302 : unwind-no-line-break ( -- )
 303     context get parent>> [
 304         in-rule>> [
 305             no-line-break?>> [
 306                 pop-context
 307                 unwind-no-line-break
 308             ] when
 309         ] when*
 310     ] when* ;
 311
 312 : tokenize-line ( line-context line rules -- line-context' seq )
 313     [
 314         "MAIN" swap at -rot
 315         init-token-marker
 316         mark-token-loop
 317         mark-remaining
 318         unwind-no-line-break
 319         context get
 320     ] { } make ;