basis/xmode/marker/marker.factor

   1 IN: xmode.marker
   2 USING: kernel namespaces xmode.rules xmode.tokens
   3 xmode.marker.state xmode.marker.context xmode.utilities
   4 xmode.catalog sequences math assocs combinators
   5 strings regexp splitting parser-combinators ascii unicode.case
   6 combinators.short-circuit accessors ;
   7
   8 ! Based on org.gjt.sp.jedit.syntax.TokenMarker
   9
  10 : current-keyword ( -- string )
  11     last-offset get position get line get subseq ;
  12
  13 : keyword-number? ( keyword -- ? )
  14     {
  15         [ current-rule-set highlight-digits?>> ]
  16         [ dup [ digit? ] contains? ]
  17         [
  18             dup [ digit? ] all? [
  19                 current-rule-set digit-re>>
  20                 dup [ dupd matches? ] [ drop f ] if
  21             ] unless*
  22         ]
  23     } 0&& nip ;
  24
  25 : mark-number ( keyword -- id )
  26     keyword-number? DIGIT and ;
  27
  28 : mark-keyword ( keyword -- id )
  29     current-rule-set keywords>> at ;
  30
  31 : add-remaining-token ( -- )
  32     current-rule-set default>> prev-token, ;
  33
  34 : mark-token ( -- )
  35     current-keyword
  36     dup mark-number [ ] [ mark-keyword ] ?if
  37     [ prev-token, ] when* ;
  38
  39 : current-char ( -- char )
  40     position get line get nth ;
  41
  42 GENERIC: match-position ( rule -- n )
  43
  44 M: mark-previous-rule match-position drop last-offset get ;
  45
  46 M: rule match-position drop position get ;
  47
  48 : can-match-here? ( matcher rule -- ? )
  49     match-position {
  50         [ over ]
  51         [ over at-line-start?>>     over zero?                implies ]
  52         [ over at-whitespace-end?>> over whitespace-end get = implies ]
  53         [ over at-word-start?>>     over last-offset get =    implies ]
  54     } 0&& 2nip ;
  55
  56 : rest-of-line ( -- str )
  57     line get position get tail-slice ;
  58
  59 GENERIC: text-matches? ( string text -- match-count/f )
  60
  61 M: f text-matches?
  62     2drop f ;
  63
  64 M: string-matcher text-matches?
  65     [
  66         [ string>> ] [ ignore-case?>> ] bi string-head?
  67     ] keep string>> length and ;
  68
  69 M: regexp text-matches?
  70     >r >string r> match-head ;
  71
  72 : rule-start-matches? ( rule -- match-count/f )
  73     dup start>> tuck swap can-match-here? [
  74         rest-of-line swap text>> text-matches?
  75     ] [
  76         drop f
  77     ] if ;
  78
  79 : rule-end-matches? ( rule -- match-count/f )
  80     dup mark-following-rule? [
  81         dup start>> swap can-match-here? 0 and
  82     ] [
  83         dup end>> tuck swap can-match-here? [
  84             rest-of-line
  85             swap text>> context get end>> or
  86             text-matches?
  87         ] [
  88             drop f
  89         ] if
  90     ] if ;
  91
  92 DEFER: get-rules
  93
  94 : get-always-rules ( vector/f ruleset -- vector/f )
  95     f swap rules>> at ?push-all ;
  96
  97 : get-char-rules ( vector/f char ruleset -- vector/f )
  98     >r ch>upper r> rules>> at ?push-all ;
  99
 100 : get-rules ( char ruleset -- seq )
 101     f -rot [ get-char-rules ] keep get-always-rules ;
 102
 103 GENERIC: handle-rule-start ( match-count rule -- )
 104
 105 GENERIC: handle-rule-end ( match-count rule -- )
 106
 107 : find-escape-rule ( -- rule )
 108     context get dup
 109     in-rule-set>> escape-rule>> [ ] [
 110         parent>> in-rule-set>>
 111         dup [ escape-rule>> ] when
 112     ] ?if ;
 113
 114 : check-escape-rule ( rule -- ? )
 115     no-escape?>> [ f ] [
 116         find-escape-rule dup [
 117             dup rule-start-matches? dup [
 118                 swap handle-rule-start
 119                 delegate-end-escaped? [ not ] change
 120                 t
 121             ] [
 122                 2drop f
 123             ] if
 124         ] when
 125     ] if ;
 126
 127 : check-every-rule ( -- ? )
 128     current-char current-rule-set get-rules
 129     [ rule-start-matches? ] map-find
 130     dup [ handle-rule-start t ] [ 2drop f ] if ;
 131
 132 : ?end-rule ( -- )
 133     current-rule [
 134         dup rule-end-matches?
 135         dup [ swap handle-rule-end ] [ 2drop ] if
 136     ] when* ;
 137
 138 : rule-match-token* ( rule -- id )
 139     dup match-token>> {
 140         { f [ dup body-token>> ] }
 141         { t [ current-rule-set default>> ] }
 142         [ ]
 143     } case nip ;
 144
 145 M: escape-rule handle-rule-start
 146     drop
 147     ?end-rule
 148     process-escape? get [
 149         escaped? [ not ] change
 150         position [ + ] change
 151     ] [ 2drop ] if ;
 152
 153 M: seq-rule handle-rule-start
 154     ?end-rule
 155     mark-token
 156     add-remaining-token
 157     tuck body-token>> next-token,
 158     delegate>> [ push-context ] when* ;
 159
 160 UNION: abstract-span-rule span-rule eol-span-rule ;
 161
 162 M: abstract-span-rule handle-rule-start
 163     ?end-rule
 164     mark-token
 165     add-remaining-token
 166     tuck rule-match-token* next-token,
 167     ! ... end subst ...
 168     dup context get (>>in-rule)
 169     delegate>> push-context ;
 170
 171 M: span-rule handle-rule-end
 172     2drop ;
 173
 174 M: mark-following-rule handle-rule-start
 175     ?end-rule
 176     mark-token add-remaining-token
 177     tuck rule-match-token* next-token,
 178     f context get (>>end)
 179     context get (>>in-rule) ;
 180
 181 M: mark-following-rule handle-rule-end
 182     nip rule-match-token* prev-token,
 183     f context get (>>in-rule) ;
 184
 185 M: mark-previous-rule handle-rule-start
 186     ?end-rule
 187     mark-token
 188     dup body-token>> prev-token,
 189     rule-match-token* next-token, ;
 190
 191 : do-escaped ( -- )
 192     escaped? get [
 193         escaped? off
 194         ! ...
 195     ] when ;
 196
 197 : check-end-delegate ( -- ? )
 198     context get parent>> [
 199         in-rule>> [
 200             dup rule-end-matches? dup [
 201                 [
 202                     swap handle-rule-end
 203                     ?end-rule
 204                     mark-token
 205                     add-remaining-token
 206                 ] keep context get parent>> in-rule>>
 207                 rule-match-token* next-token,
 208                 pop-context
 209                 seen-whitespace-end? on t
 210             ] [ drop check-escape-rule ] if
 211         ] [ f ] if*
 212     ] [ f ] if* ;
 213
 214 : handle-no-word-break ( -- )
 215     context get parent>> [
 216         in-rule>> [
 217             dup no-word-break?>> [
 218                 rule-match-token* prev-token,
 219                 pop-context
 220             ] [ drop ] if
 221         ] when*
 222     ] when* ;
 223
 224 : check-rule ( -- )
 225     ?end-rule
 226     handle-no-word-break
 227     mark-token
 228     add-remaining-token ;
 229
 230 : (check-word-break) ( -- )
 231     check-rule
 232
 233     1 current-rule-set default>> next-token, ;
 234
 235 : rule-set-empty? ( ruleset -- ? )
 236     [ rules>> ] [ keywords>> ] bi
 237     [ assoc-empty? ] bi@ and ;
 238
 239 : check-word-break ( -- ? )
 240     current-char dup blank? [
 241         drop
 242
 243         seen-whitespace-end? get [
 244             position get 1+ whitespace-end set
 245         ] unless
 246
 247         (check-word-break)
 248
 249     ] [
 250         ! Micro-optimization with incorrect semantics; we keep
 251         ! it here because jEdit mode files depend on it now...
 252         current-rule-set rule-set-empty? [
 253             drop
 254         ] [
 255             dup alpha? [
 256                 drop
 257             ] [
 258                 current-rule-set rule-set-no-word-sep* member? [
 259                     (check-word-break)
 260                 ] unless
 261             ] if
 262         ] if
 263
 264         seen-whitespace-end? on
 265     ] if
 266     escaped? off
 267     delegate-end-escaped? off t ;
 268
 269
 270 : mark-token-loop ( -- )
 271     position get line get length < [
 272         {
 273             [ check-end-delegate ]
 274             [ check-every-rule ]
 275             [ check-word-break ]
 276         } 0|| drop
 277
 278         position inc
 279         mark-token-loop
 280     ] when ;
 281
 282 : mark-remaining ( -- )
 283     line get length position set
 284     check-rule ;
 285
 286 : unwind-no-line-break ( -- )
 287     context get parent>> [
 288         in-rule>> [
 289             no-line-break?>> [
 290                 pop-context
 291                 unwind-no-line-break
 292             ] when
 293         ] when*
 294     ] when* ;
 295
 296 : tokenize-line ( line-context line rules -- line-context' seq )
 297     [
 298         "MAIN" swap at -rot
 299         init-token-marker
 300         mark-token-loop
 301         mark-remaining
 302         unwind-no-line-break
 303         context get
 304     ] { } make ;