basis/xmode/marker/marker.factor

   1 ! Copyright (C) 2008 Slava Pestov.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 IN: xmode.marker
   4 USING: kernel namespaces make xmode.rules xmode.tokens
   5 xmode.marker.state xmode.marker.context xmode.utilities
   6 xmode.catalog sequences math assocs combinators strings
   7 regexp splitting ascii parser-combinators regexp.backend
   8 ascii combinators.short-circuit accessors ;
   9 ! parser-combinators is for the string-head? word
  10 ! regexp.backend is for the regexp class
  11
  12 ! Based on org.gjt.sp.jedit.syntax.TokenMarker
  13
  14 : current-keyword ( -- string )
  15     last-offset get position get line get subseq ;
  16
  17 : keyword-number? ( keyword -- ? )
  18     {
  19         [ current-rule-set highlight-digits?>> ]
  20         [ dup [ digit? ] any? ]
  21         [
  22             dup [ digit? ] all? [
  23                 current-rule-set digit-re>>
  24                 dup [ dupd matches? ] [ drop f ] if
  25             ] unless*
  26         ]
  27     } 0&& nip ;
  28
  29 : mark-number ( keyword -- id )
  30     keyword-number? DIGIT and ;
  31
  32 : mark-keyword ( keyword -- id )
  33     current-rule-set keywords>> at ;
  34
  35 : add-remaining-token ( -- )
  36     current-rule-set default>> prev-token, ;
  37
  38 : mark-token ( -- )
  39     current-keyword
  40     dup mark-number [ ] [ mark-keyword ] ?if
  41     [ prev-token, ] when* ;
  42
  43 : current-char ( -- char )
  44     position get line get nth ;
  45
  46 GENERIC: match-position ( rule -- n )
  47
  48 M: mark-previous-rule match-position drop last-offset get ;
  49
  50 M: rule match-position drop position get ;
  51
  52 : can-match-here? ( matcher rule -- ? )
  53     match-position {
  54         [ over ]
  55         [ over at-line-start?>>     over zero?                implies ]
  56         [ over at-whitespace-end?>> over whitespace-end get = implies ]
  57         [ over at-word-start?>>     over last-offset get =    implies ]
  58     } 0&& 2nip ;
  59
  60 : rest-of-line ( -- str )
  61     line get position get tail-slice ;
  62
  63 GENERIC: text-matches? ( string text -- match-count/f )
  64
  65 M: f text-matches?
  66     2drop f ;
  67
  68 M: string-matcher text-matches?
  69     [
  70         [ string>> ] [ ignore-case?>> ] bi string-head?
  71     ] keep string>> length and ;
  72
  73 M: regexp text-matches?
  74     [ >string ] dip match-head ;
  75
  76 : rule-start-matches? ( rule -- match-count/f )
  77     dup start>> tuck swap can-match-here? [
  78         rest-of-line swap text>> text-matches?
  79     ] [
  80         drop f
  81     ] if ;
  82
  83 : rule-end-matches? ( rule -- match-count/f )
  84     dup mark-following-rule? [
  85         dup start>> swap can-match-here? 0 and
  86     ] [
  87         dup end>> tuck swap can-match-here? [
  88             rest-of-line
  89             swap text>> context get end>> or
  90             text-matches?
  91         ] [
  92             drop f
  93         ] if
  94     ] if ;
  95
  96 DEFER: get-rules
  97
  98 : get-always-rules ( vector/f ruleset -- vector/f )
  99     f swap rules>> at ?push-all ;
 100
 101 : get-char-rules ( vector/f char ruleset -- vector/f )
 102     [ ch>upper ] dip rules>> at ?push-all ;
 103
 104 : get-rules ( char ruleset -- seq )
 105     [ f ] 2dip [ get-char-rules ] keep get-always-rules ;
 106
 107 GENERIC: handle-rule-start ( match-count rule -- )
 108
 109 GENERIC: handle-rule-end ( match-count rule -- )
 110
 111 : find-escape-rule ( -- rule )
 112     context get dup
 113     in-rule-set>> escape-rule>> [ ] [
 114         parent>> in-rule-set>>
 115         dup [ escape-rule>> ] when
 116     ] ?if ;
 117
 118 : check-escape-rule ( rule -- ? )
 119     no-escape?>> [ f ] [
 120         find-escape-rule dup [
 121             dup rule-start-matches? dup [
 122                 swap handle-rule-start
 123                 delegate-end-escaped? [ not ] change
 124                 t
 125             ] [
 126                 2drop f
 127             ] if
 128         ] when
 129     ] if ;
 130
 131 : check-every-rule ( -- ? )
 132     current-char current-rule-set get-rules
 133     [ rule-start-matches? ] map-find
 134     dup [ handle-rule-start t ] [ 2drop f ] if ;
 135
 136 : ?end-rule ( -- )
 137     current-rule [
 138         dup rule-end-matches?
 139         dup [ swap handle-rule-end ] [ 2drop ] if
 140     ] when* ;
 141
 142 : rule-match-token* ( rule -- id )
 143     dup match-token>> {
 144         { f [ dup body-token>> ] }
 145         { t [ current-rule-set default>> ] }
 146         [ ]
 147     } case nip ;
 148
 149 M: escape-rule handle-rule-start
 150     drop
 151     ?end-rule
 152     process-escape? get [
 153         escaped? [ not ] change
 154         position [ + ] change
 155     ] [ drop ] if ;
 156
 157 M: seq-rule handle-rule-start
 158     ?end-rule
 159     mark-token
 160     add-remaining-token
 161     tuck body-token>> next-token,
 162     delegate>> [ push-context ] when* ;
 163
 164 UNION: abstract-span-rule span-rule eol-span-rule ;
 165
 166 M: abstract-span-rule handle-rule-start
 167     ?end-rule
 168     mark-token
 169     add-remaining-token
 170     tuck rule-match-token* next-token,
 171     ! ... end subst ...
 172     dup context get (>>in-rule)
 173     delegate>> push-context ;
 174
 175 M: span-rule handle-rule-end
 176     2drop ;
 177
 178 M: mark-following-rule handle-rule-start
 179     ?end-rule
 180     mark-token add-remaining-token
 181     tuck rule-match-token* next-token,
 182     f context get (>>end)
 183     context get (>>in-rule) ;
 184
 185 M: mark-following-rule handle-rule-end
 186     nip rule-match-token* prev-token,
 187     f context get (>>in-rule) ;
 188
 189 M: mark-previous-rule handle-rule-start
 190     ?end-rule
 191     mark-token
 192     dup body-token>> prev-token,
 193     rule-match-token* next-token, ;
 194
 195 : do-escaped ( -- )
 196     escaped? get [
 197         escaped? off
 198         ! ...
 199     ] when ;
 200
 201 : check-end-delegate ( -- ? )
 202     context get parent>> [
 203         in-rule>> [
 204             dup rule-end-matches? dup [
 205                 [
 206                     swap handle-rule-end
 207                     ?end-rule
 208                     mark-token
 209                     add-remaining-token
 210                 ] keep context get parent>> in-rule>>
 211                 rule-match-token* next-token,
 212                 pop-context
 213                 seen-whitespace-end? on t
 214             ] [ drop check-escape-rule ] if
 215         ] [ f ] if*
 216     ] [ f ] if* ;
 217
 218 : handle-no-word-break ( -- )
 219     context get parent>> [
 220         in-rule>> [
 221             dup no-word-break?>> [
 222                 rule-match-token* prev-token,
 223                 pop-context
 224             ] [ drop ] if
 225         ] when*
 226     ] when* ;
 227
 228 : check-rule ( -- )
 229     ?end-rule
 230     handle-no-word-break
 231     mark-token
 232     add-remaining-token ;
 233
 234 : (check-word-break) ( -- )
 235     check-rule
 236
 237     1 current-rule-set default>> next-token, ;
 238
 239 : rule-set-empty? ( ruleset -- ? )
 240     [ rules>> ] [ keywords>> ] bi
 241     [ assoc-empty? ] bi@ and ;
 242
 243 : check-word-break ( -- ? )
 244     current-char dup blank? [
 245         drop
 246
 247         seen-whitespace-end? get [
 248             position get 1+ whitespace-end set
 249         ] unless
 250
 251         (check-word-break)
 252
 253     ] [
 254         ! Micro-optimization with incorrect semantics; we keep
 255         ! it here because jEdit mode files depend on it now...
 256         current-rule-set rule-set-empty? [
 257             drop
 258         ] [
 259             dup alpha? [
 260                 drop
 261             ] [
 262                 current-rule-set rule-set-no-word-sep* member? [
 263                     (check-word-break)
 264                 ] unless
 265             ] if
 266         ] if
 267
 268         seen-whitespace-end? on
 269     ] if
 270     escaped? off
 271     delegate-end-escaped? off t ;
 272
 273
 274 : mark-token-loop ( -- )
 275     position get line get length < [
 276         {
 277             [ check-end-delegate ]
 278             [ check-every-rule ]
 279             [ check-word-break ]
 280         } 0|| drop
 281
 282         position inc
 283         mark-token-loop
 284     ] when ;
 285
 286 : mark-remaining ( -- )
 287     line get length position set
 288     check-rule ;
 289
 290 : unwind-no-line-break ( -- )
 291     context get parent>> [
 292         in-rule>> [
 293             no-line-break?>> [
 294                 pop-context
 295                 unwind-no-line-break
 296             ] when
 297         ] when*
 298     ] when* ;
 299
 300 : tokenize-line ( line-context line rules -- line-context' seq )
 301     [
 302         "MAIN" swap at -rot
 303         init-token-marker
 304         mark-token-loop
 305         mark-remaining
 306         unwind-no-line-break
 307         context get
 308     ] { } make ;