extra/xmode/marker/marker.factor

   1 IN: xmode.marker
   2 USING: kernel namespaces xmode.rules xmode.tokens
   3 xmode.marker.state xmode.marker.context xmode.utilities
   4 xmode.catalog sequences math assocs combinators combinators.lib
   5 strings regexp splitting parser-combinators ascii unicode.case ;
   6
   7 ! Based on org.gjt.sp.jedit.syntax.TokenMarker
   8
   9 : current-keyword ( -- string )
  10     last-offset get position get line get subseq ;
  11
  12 : keyword-number? ( keyword -- ? )
  13     {
  14         [ current-rule-set rule-set-highlight-digits? ]
  15         [ dup [ digit? ] contains? ]
  16         [
  17             dup [ digit? ] all? [
  18                 current-rule-set rule-set-digit-re
  19                 dup [ dupd matches? ] [ drop f ] if
  20             ] unless*
  21         ]
  22     } && nip ;
  23
  24 : mark-number ( keyword -- id )
  25     keyword-number? DIGIT and ;
  26
  27 : mark-keyword ( keyword -- id )
  28     current-rule-set rule-set-keywords at ;
  29
  30 : add-remaining-token ( -- )
  31     current-rule-set rule-set-default prev-token, ;
  32
  33 : mark-token ( -- )
  34     current-keyword
  35     dup mark-number [ ] [ mark-keyword ] ?if
  36     [ prev-token, ] when* ;
  37
  38 : current-char ( -- char )
  39     position get line get nth ;
  40
  41 GENERIC: match-position ( rule -- n )
  42
  43 M: mark-previous-rule match-position drop last-offset get ;
  44
  45 M: rule match-position drop position get ;
  46
  47 : can-match-here? ( matcher rule -- ? )
  48     match-position {
  49         [ over ]
  50         [ over matcher-at-line-start?     over zero?                implies ]
  51         [ over matcher-at-whitespace-end? over whitespace-end get = implies ]
  52         [ over matcher-at-word-start?     over last-offset get =    implies ]
  53     } && 2nip ;
  54
  55 : rest-of-line ( -- str )
  56     line get position get tail-slice ;
  57
  58 GENERIC: text-matches? ( string text -- match-count/f )
  59
  60 M: f text-matches?
  61     2drop f ;
  62
  63 M: string-matcher text-matches?
  64     [
  65         dup string-matcher-string
  66         swap string-matcher-ignore-case?
  67         string-head?
  68     ] keep string-matcher-string length and ;
  69
  70 M: regexp text-matches?
  71     >r >string r> match-head ;
  72
  73 : rule-start-matches? ( rule -- match-count/f )
  74     dup rule-start tuck swap can-match-here? [
  75         rest-of-line swap matcher-text text-matches?
  76     ] [
  77         drop f
  78     ] if ;
  79
  80 : rule-end-matches? ( rule -- match-count/f )
  81     dup mark-following-rule? [
  82         dup rule-start swap can-match-here? 0 and
  83     ] [
  84         dup rule-end tuck swap can-match-here? [
  85             rest-of-line
  86             swap matcher-text context get line-context-end or
  87             text-matches?
  88         ] [
  89             drop f
  90         ] if
  91     ] if ;
  92
  93 DEFER: get-rules
  94
  95 : get-always-rules ( vector/f ruleset -- vector/f )
  96     f swap rule-set-rules at ?push-all ;
  97
  98 : get-char-rules ( vector/f char ruleset -- vector/f )
  99     >r ch>upper r> rule-set-rules at ?push-all ;
 100
 101 : get-rules ( char ruleset -- seq )
 102     f -rot [ get-char-rules ] keep get-always-rules ;
 103
 104 GENERIC: handle-rule-start ( match-count rule -- )
 105
 106 GENERIC: handle-rule-end ( match-count rule -- )
 107
 108 : find-escape-rule ( -- rule )
 109     context get dup
 110     line-context-in-rule-set rule-set-escape-rule [ ] [
 111         line-context-parent line-context-in-rule-set
 112         dup [ rule-set-escape-rule ] when
 113     ] ?if ;
 114
 115 : check-escape-rule ( rule -- ? )
 116     rule-no-escape? [ f ] [
 117         find-escape-rule dup [
 118             dup rule-start-matches? dup [
 119                 swap handle-rule-start
 120                 delegate-end-escaped? [ not ] change
 121                 t
 122             ] [
 123                 2drop f
 124             ] if
 125         ] when
 126     ] if ;
 127
 128 : check-every-rule ( -- ? )
 129     current-char current-rule-set get-rules
 130     [ rule-start-matches? ] map-find
 131     dup [ handle-rule-start t ] [ 2drop f ] if ;
 132
 133 : ?end-rule ( -- )
 134     current-rule [
 135         dup rule-end-matches?
 136         dup [ swap handle-rule-end ] [ 2drop ] if
 137     ] when* ;
 138
 139 : rule-match-token* ( rule -- id )
 140     dup rule-match-token {
 141         { f [ dup rule-body-token ] }
 142         { t [ current-rule-set rule-set-default ] }
 143         [ ]
 144     } case nip ;
 145
 146 M: escape-rule handle-rule-start
 147     drop
 148     ?end-rule
 149     process-escape? get [
 150         escaped? [ not ] change
 151         position [ + ] change
 152     ] [ 2drop ] if ;
 153
 154 M: seq-rule handle-rule-start
 155     ?end-rule
 156     mark-token
 157     add-remaining-token
 158     tuck rule-body-token next-token,
 159     rule-delegate [ push-context ] when* ;
 160
 161 UNION: abstract-span-rule span-rule eol-span-rule ;
 162
 163 M: abstract-span-rule handle-rule-start
 164     ?end-rule
 165     mark-token
 166     add-remaining-token
 167     tuck rule-match-token* next-token,
 168     ! ... end subst ...
 169     dup context get set-line-context-in-rule
 170     rule-delegate push-context ;
 171
 172 M: span-rule handle-rule-end
 173     2drop ;
 174
 175 M: mark-following-rule handle-rule-start
 176     ?end-rule
 177     mark-token add-remaining-token
 178     tuck rule-match-token* next-token,
 179     f context get set-line-context-end
 180     context get set-line-context-in-rule ;
 181
 182 M: mark-following-rule handle-rule-end
 183     nip rule-match-token* prev-token,
 184     f context get set-line-context-in-rule ;
 185
 186 M: mark-previous-rule handle-rule-start
 187     ?end-rule
 188     mark-token
 189     dup rule-body-token prev-token,
 190     rule-match-token* next-token, ;
 191
 192 : do-escaped ( -- )
 193     escaped? get [
 194         escaped? off
 195         ! ...
 196     ] when ;
 197
 198 : check-end-delegate ( -- ? )
 199     context get line-context-parent [
 200         line-context-in-rule [
 201             dup rule-end-matches? dup [
 202                 [
 203                     swap handle-rule-end
 204                     ?end-rule
 205                     mark-token
 206                     add-remaining-token
 207                 ] keep context get line-context-parent line-context-in-rule rule-match-token* next-token,
 208                 pop-context
 209                 seen-whitespace-end? on t
 210             ] [ drop check-escape-rule ] if
 211         ] [ f ] if*
 212     ] [ f ] if* ;
 213
 214 : handle-no-word-break ( -- )
 215     context get line-context-parent [
 216         line-context-in-rule [
 217             dup rule-no-word-break? [
 218                 rule-match-token* prev-token,
 219                 pop-context
 220             ] [ drop ] if
 221         ] when*
 222     ] when* ;
 223
 224 : check-rule ( -- )
 225     ?end-rule
 226     handle-no-word-break
 227     mark-token
 228     add-remaining-token ;
 229
 230 : (check-word-break) ( -- )
 231     check-rule
 232
 233     1 current-rule-set rule-set-default next-token, ;
 234
 235 : rule-set-empty? ( ruleset -- ? )
 236     dup rule-set-rules assoc-empty?
 237     swap rule-set-keywords assoc-empty? and ;
 238
 239 : check-word-break ( -- ? )
 240     current-char dup blank? [
 241         drop
 242
 243         seen-whitespace-end? get [
 244             position get 1+ whitespace-end set
 245         ] unless
 246
 247         (check-word-break)
 248
 249     ] [
 250         ! Micro-optimization with incorrect semantics; we keep
 251         ! it here because jEdit mode files depend on it now...
 252         current-rule-set rule-set-empty? [
 253             drop
 254         ] [
 255             dup alpha? [
 256                 drop
 257             ] [
 258                 current-rule-set rule-set-no-word-sep* member? [
 259                     (check-word-break)
 260                 ] unless
 261             ] if
 262         ] if
 263
 264         seen-whitespace-end? on
 265     ] if
 266     escaped? off
 267     delegate-end-escaped? off t ;
 268
 269
 270 : mark-token-loop ( -- )
 271     position get line get length < [
 272         {
 273             [ check-end-delegate ]
 274             [ check-every-rule ]
 275             [ check-word-break ]
 276         } || drop
 277
 278         position inc
 279         mark-token-loop
 280     ] when ;
 281
 282 : mark-remaining ( -- )
 283     line get length position set
 284     check-rule ;
 285
 286 : unwind-no-line-break ( -- )
 287     context get line-context-parent [
 288         line-context-in-rule [
 289             rule-no-line-break? [
 290                 pop-context
 291                 unwind-no-line-break
 292             ] when
 293         ] when*
 294     ] when* ;
 295
 296 : tokenize-line ( line-context line rules -- line-context' seq )
 297     [
 298         "MAIN" swap at -rot
 299         init-token-marker
 300         mark-token-loop
 301         mark-remaining
 302         unwind-no-line-break
 303         context get
 304     ] { } make ;