basis/xmode/marker/marker.factor

   1 ! Copyright (C) 2008 Slava Pestov.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 IN: xmode.marker
   4 USING: kernel namespaces make xmode.rules xmode.tokens
   5 xmode.marker.state xmode.marker.context xmode.utilities
   6 xmode.catalog sequences math assocs combinators
   7 strings parser-combinators.regexp regexp splitting
   8 parser-combinators ascii unicode.case
   9 combinators.short-circuit accessors ;
  10
  11 ! Based on org.gjt.sp.jedit.syntax.TokenMarker
  12
  13 : current-keyword ( -- string )
  14     last-offset get position get line get subseq ;
  15
  16 : keyword-number? ( keyword -- ? )
  17     {
  18         [ current-rule-set highlight-digits?>> ]
  19         [ dup [ digit? ] contains? ]
  20         [
  21             dup [ digit? ] all? [
  22                 current-rule-set digit-re>>
  23                 dup [ dupd matches? ] [ drop f ] if
  24             ] unless*
  25         ]
  26     } 0&& nip ;
  27
  28 : mark-number ( keyword -- id )
  29     keyword-number? DIGIT and ;
  30
  31 : mark-keyword ( keyword -- id )
  32     current-rule-set keywords>> at ;
  33
  34 : add-remaining-token ( -- )
  35     current-rule-set default>> prev-token, ;
  36
  37 : mark-token ( -- )
  38     current-keyword
  39     dup mark-number [ ] [ mark-keyword ] ?if
  40     [ prev-token, ] when* ;
  41
  42 : current-char ( -- char )
  43     position get line get nth ;
  44
  45 GENERIC: match-position ( rule -- n )
  46
  47 M: mark-previous-rule match-position drop last-offset get ;
  48
  49 M: rule match-position drop position get ;
  50
  51 : can-match-here? ( matcher rule -- ? )
  52     match-position {
  53         [ over ]
  54         [ over at-line-start?>>     over zero?                implies ]
  55         [ over at-whitespace-end?>> over whitespace-end get = implies ]
  56         [ over at-word-start?>>     over last-offset get =    implies ]
  57     } 0&& 2nip ;
  58
  59 : rest-of-line ( -- str )
  60     line get position get tail-slice ;
  61
  62 GENERIC: text-matches? ( string text -- match-count/f )
  63
  64 M: f text-matches?
  65     2drop f ;
  66
  67 M: string-matcher text-matches?
  68     [
  69         [ string>> ] [ ignore-case?>> ] bi string-head?
  70     ] keep string>> length and ;
  71
  72 M: regexp text-matches?
  73     >r >string r> match-head ;
  74
  75 : rule-start-matches? ( rule -- match-count/f )
  76     dup start>> tuck swap can-match-here? [
  77         rest-of-line swap text>> text-matches?
  78     ] [
  79         drop f
  80     ] if ;
  81
  82 : rule-end-matches? ( rule -- match-count/f )
  83     dup mark-following-rule? [
  84         dup start>> swap can-match-here? 0 and
  85     ] [
  86         dup end>> tuck swap can-match-here? [
  87             rest-of-line
  88             swap text>> context get end>> or
  89             text-matches?
  90         ] [
  91             drop f
  92         ] if
  93     ] if ;
  94
  95 DEFER: get-rules
  96
  97 : get-always-rules ( vector/f ruleset -- vector/f )
  98     f swap rules>> at ?push-all ;
  99
 100 : get-char-rules ( vector/f char ruleset -- vector/f )
 101     >r ch>upper r> rules>> at ?push-all ;
 102
 103 : get-rules ( char ruleset -- seq )
 104     f -rot [ get-char-rules ] keep get-always-rules ;
 105
 106 GENERIC: handle-rule-start ( match-count rule -- )
 107
 108 GENERIC: handle-rule-end ( match-count rule -- )
 109
 110 : find-escape-rule ( -- rule )
 111     context get dup
 112     in-rule-set>> escape-rule>> [ ] [
 113         parent>> in-rule-set>>
 114         dup [ escape-rule>> ] when
 115     ] ?if ;
 116
 117 : check-escape-rule ( rule -- ? )
 118     no-escape?>> [ f ] [
 119         find-escape-rule dup [
 120             dup rule-start-matches? dup [
 121                 swap handle-rule-start
 122                 delegate-end-escaped? [ not ] change
 123                 t
 124             ] [
 125                 2drop f
 126             ] if
 127         ] when
 128     ] if ;
 129
 130 : check-every-rule ( -- ? )
 131     current-char current-rule-set get-rules
 132     [ rule-start-matches? ] map-find
 133     dup [ handle-rule-start t ] [ 2drop f ] if ;
 134
 135 : ?end-rule ( -- )
 136     current-rule [
 137         dup rule-end-matches?
 138         dup [ swap handle-rule-end ] [ 2drop ] if
 139     ] when* ;
 140
 141 : rule-match-token* ( rule -- id )
 142     dup match-token>> {
 143         { f [ dup body-token>> ] }
 144         { t [ current-rule-set default>> ] }
 145         [ ]
 146     } case nip ;
 147
 148 M: escape-rule handle-rule-start
 149     drop
 150     ?end-rule
 151     process-escape? get [
 152         escaped? [ not ] change
 153         position [ + ] change
 154     ] [ 2drop ] if ;
 155
 156 M: seq-rule handle-rule-start
 157     ?end-rule
 158     mark-token
 159     add-remaining-token
 160     tuck body-token>> next-token,
 161     delegate>> [ push-context ] when* ;
 162
 163 UNION: abstract-span-rule span-rule eol-span-rule ;
 164
 165 M: abstract-span-rule handle-rule-start
 166     ?end-rule
 167     mark-token
 168     add-remaining-token
 169     tuck rule-match-token* next-token,
 170     ! ... end subst ...
 171     dup context get (>>in-rule)
 172     delegate>> push-context ;
 173
 174 M: span-rule handle-rule-end
 175     2drop ;
 176
 177 M: mark-following-rule handle-rule-start
 178     ?end-rule
 179     mark-token add-remaining-token
 180     tuck rule-match-token* next-token,
 181     f context get (>>end)
 182     context get (>>in-rule) ;
 183
 184 M: mark-following-rule handle-rule-end
 185     nip rule-match-token* prev-token,
 186     f context get (>>in-rule) ;
 187
 188 M: mark-previous-rule handle-rule-start
 189     ?end-rule
 190     mark-token
 191     dup body-token>> prev-token,
 192     rule-match-token* next-token, ;
 193
 194 : do-escaped ( -- )
 195     escaped? get [
 196         escaped? off
 197         ! ...
 198     ] when ;
 199
 200 : check-end-delegate ( -- ? )
 201     context get parent>> [
 202         in-rule>> [
 203             dup rule-end-matches? dup [
 204                 [
 205                     swap handle-rule-end
 206                     ?end-rule
 207                     mark-token
 208                     add-remaining-token
 209                 ] keep context get parent>> in-rule>>
 210                 rule-match-token* next-token,
 211                 pop-context
 212                 seen-whitespace-end? on t
 213             ] [ drop check-escape-rule ] if
 214         ] [ f ] if*
 215     ] [ f ] if* ;
 216
 217 : handle-no-word-break ( -- )
 218     context get parent>> [
 219         in-rule>> [
 220             dup no-word-break?>> [
 221                 rule-match-token* prev-token,
 222                 pop-context
 223             ] [ drop ] if
 224         ] when*
 225     ] when* ;
 226
 227 : check-rule ( -- )
 228     ?end-rule
 229     handle-no-word-break
 230     mark-token
 231     add-remaining-token ;
 232
 233 : (check-word-break) ( -- )
 234     check-rule
 235
 236     1 current-rule-set default>> next-token, ;
 237
 238 : rule-set-empty? ( ruleset -- ? )
 239     [ rules>> ] [ keywords>> ] bi
 240     [ assoc-empty? ] bi@ and ;
 241
 242 : check-word-break ( -- ? )
 243     current-char dup blank? [
 244         drop
 245
 246         seen-whitespace-end? get [
 247             position get 1+ whitespace-end set
 248         ] unless
 249
 250         (check-word-break)
 251
 252     ] [
 253         ! Micro-optimization with incorrect semantics; we keep
 254         ! it here because jEdit mode files depend on it now...
 255         current-rule-set rule-set-empty? [
 256             drop
 257         ] [
 258             dup alpha? [
 259                 drop
 260             ] [
 261                 current-rule-set rule-set-no-word-sep* member? [
 262                     (check-word-break)
 263                 ] unless
 264             ] if
 265         ] if
 266
 267         seen-whitespace-end? on
 268     ] if
 269     escaped? off
 270     delegate-end-escaped? off t ;
 271
 272
 273 : mark-token-loop ( -- )
 274     position get line get length < [
 275         {
 276             [ check-end-delegate ]
 277             [ check-every-rule ]
 278             [ check-word-break ]
 279         } 0|| drop
 280
 281         position inc
 282         mark-token-loop
 283     ] when ;
 284
 285 : mark-remaining ( -- )
 286     line get length position set
 287     check-rule ;
 288
 289 : unwind-no-line-break ( -- )
 290     context get parent>> [
 291         in-rule>> [
 292             no-line-break?>> [
 293                 pop-context
 294                 unwind-no-line-break
 295             ] when
 296         ] when*
 297     ] when* ;
 298
 299 : tokenize-line ( line-context line rules -- line-context' seq )
 300     [
 301         "MAIN" swap at -rot
 302         init-token-marker
 303         mark-token-loop
 304         mark-remaining
 305         unwind-no-line-break
 306         context get
 307     ] { } make ;