! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
+
+USING: accessors ascii assocs combinators
+combinators.short-circuit formatting kernel make math namespaces
+regexp regexp.parser sequences splitting strings
+xmode.marker.state xmode.rules xmode.tokens xmode.utilities ;
+
IN: xmode.marker
-USING: kernel namespaces make xmode.rules xmode.tokens
-xmode.marker.state xmode.marker.context xmode.utilities
-xmode.catalog sequences math assocs combinators strings
-parser-combinators.regexp splitting parser-combinators ascii
-ascii combinators.short-circuit accessors ;
+
+! Next two words copied from parser-combinators
+! Just like head?, but they optionally ignore case
+
+: string= ( str1 str2 ignore-case -- ? )
+ [ [ >upper ] bi@ ] when sequence= ;
+
+: string-head? ( str1 str2 ignore-case -- ? )
+ 2over shorter?
+ [ 3drop f ] [
+ [
+ [ nip ]
+ [ length head-slice ] 2bi
+ ] dip string=
+ ] if ;
! Based on org.gjt.sp.jedit.syntax.TokenMarker
[
dup [ digit? ] all? [
current-rule-set digit-re>>
- dup [ dupd matches? ] [ drop f ] if
+ [ dupd matches? ] [ f ] if*
] unless*
]
} 0&& nip ;
: rest-of-line ( -- str )
line get position get tail-slice ;
+: match-start ( string regexp -- slice/f )
+ first-match dup [ dup from>> 0 = [ drop f ] unless ] when ;
+
GENERIC: text-matches? ( string text -- match-count/f )
M: f text-matches?
2drop f ;
M: string-matcher text-matches?
- [
- [ string>> ] [ ignore-case?>> ] bi string-head?
- ] keep string>> length and ;
+ [ string>> ] [ ignore-case?>> ] bi
+ [ string-head? ] keepd length and ;
M: regexp text-matches?
- [ >string ] dip match-head ;
+ [ >string ] dip match-start dup [ to>> ] when ;
+
+<PRIVATE
+
+! XXX: Terrible inefficient regexp match group support
+
+: #match-groups ( regexp -- n/f )
+ raw>> [ CHAR: ( = ] count [ f ] when-zero ;
+
+: nth-index ( n obj seq -- i )
+ [ = dup [ drop 1 - dup 0 < ] when ] with find drop nip ;
+
+: match-group-regexp ( regexp n -- skip-regexp match-regexp )
+ [ [ options>> options>string ] [ raw>> ] bi ] dip
+ CHAR: ( pick nth-index cut CHAR: ) over index 1 + head
+ rot '[ _ H{ } [ <optioned-regexp> ] 2cache ] bi@ ;
+
+: skip-first-match ( match regexp -- tailseq )
+ first-match [ seq>> ] [ to>> ] bi tail ;
+
+: nth-match ( match regexp n -- slice/f )
+ match-group-regexp [ skip-first-match ] [ first-match ] bi* ;
+
+:: update-match-group ( str match regexp n -- str' )
+ n H{ } [ CHAR: 1 + CHAR: $ swap "" 2sequence ] cache :> x
+ x str subseq-range :> ( from to )
+ from [
+ to str snip-slice match regexp n nth-match glue
+ ] [ str ] if* ;
+
+: update-match-groups ( str match regexp -- str' )
+ [ >string ] dip
+ dup #match-groups [ update-match-group ] 2with each-integer ;
-: rule-start-matches? ( rule -- match-count/f )
- dup start>> tuck swap can-match-here? [
- rest-of-line swap text>> text-matches?
+GENERIC: fixup-end ( match regexp end -- end' )
+
+M: string-matcher fixup-end
+ [ string>> -rot update-match-groups ]
+ [ ignore-case?>> ] bi <string-matcher> ;
+
+MEMO: <fixup-regexp> ( raw matched options -- regexp )
+ <optioned-regexp> {
+ [ parse-tree>> ] [ options>> ] [ dfa>> ] [ next-match>> ]
+ } cleave regexp boa ;
+
+M: regexp fixup-end
+ [ raw>> [ -rot update-match-groups ] keep swap ]
+ [ options>> options>string ] bi <fixup-regexp> ;
+
+: fixup-end? ( text -- ? )
+ { [ regexp? ] [ #match-groups ] } 1&& ;
+
+: fixup-end/text-matches? ( string regexp rule -- match-count/f )
+ [ >string ] 2dip [ [ match-start dup ] keep ] dip pick [
+ end>> [ [ fixup-end ] change-text drop ] [ 2drop ] if*
+ ] [
+ 3drop
+ ] if dup [ to>> ] when ;
+
+PRIVATE>
+
+:: rule-start-matches? ( rule -- match-count/f )
+ rule start>> dup rule can-match-here? [
+ rest-of-line swap text>>
+ dup fixup-end? [
+ rule fixup-end/text-matches?
+ ] [
+ text-matches?
+ ] if
] [
drop f
] if ;
: rule-end-matches? ( rule -- match-count/f )
dup mark-following-rule? [
- dup start>> swap can-match-here? 0 and
+ [ start>> ] keep can-match-here? 0 and
] [
- dup end>> tuck swap can-match-here? [
+ [ end>> dup ] keep can-match-here? [
rest-of-line
swap text>> context get end>> or
text-matches?
DEFER: get-rules
-: get-always-rules ( vector/f ruleset -- vector/f )
- f swap rules>> at ?push-all ;
+: get-always-rules ( ruleset -- vector/f )
+ f swap rules>> at ;
-: get-char-rules ( vector/f char ruleset -- vector/f )
- [ ch>upper ] dip rules>> at ?push-all ;
+: get-char-rules ( char ruleset -- vector/f )
+ [ ch>upper ] dip rules>> at ;
: get-rules ( char ruleset -- seq )
- [ f ] 2dip [ get-char-rules ] keep get-always-rules ;
+ [ get-char-rules ] [ get-always-rules ] bi [ append ] when* ;
GENERIC: handle-rule-start ( match-count rule -- )
] ?if ;
: check-escape-rule ( rule -- ? )
- no-escape?>> [ f ] [
- find-escape-rule dup [
- dup rule-start-matches? dup [
- swap handle-rule-start
- delegate-end-escaped? [ not ] change
- t
- ] [
- 2drop f
- ] if
- ] when
- ] if ;
+ escape-rule>> [ find-escape-rule ] unless*
+ dup [
+ dup rule-start-matches? [
+ swap handle-rule-start
+ delegate-end-escaped? toggle
+ t
+ ] [
+ drop f
+ ] if*
+ ] when ;
: check-every-rule ( -- ? )
current-char current-rule-set get-rules
[ rule-start-matches? ] map-find
- dup [ handle-rule-start t ] [ 2drop f ] if ;
+ [ handle-rule-start t ] [ drop f ] if* ;
: ?end-rule ( -- )
current-rule [
dup rule-end-matches?
- dup [ swap handle-rule-end ] [ 2drop ] if
+ [ swap handle-rule-end ] [ drop ] if*
] when* ;
: rule-match-token* ( rule -- id )
drop
?end-rule
process-escape? get [
- escaped? [ not ] change
+ escaped? toggle
position [ + ] change
- ] [ 2drop ] if ;
+ ] [ drop ] if ;
M: seq-rule handle-rule-start
?end-rule
mark-token
add-remaining-token
- tuck body-token>> next-token,
+ [ body-token>> next-token, ] keep
delegate>> [ push-context ] when* ;
UNION: abstract-span-rule span-rule eol-span-rule ;
?end-rule
mark-token
add-remaining-token
- tuck rule-match-token* next-token,
+ [ rule-match-token* next-token, ] keep
! ... end subst ...
- dup context get (>>in-rule)
+ dup context get in-rule<<
delegate>> push-context ;
M: span-rule handle-rule-end
M: mark-following-rule handle-rule-start
?end-rule
mark-token add-remaining-token
- tuck rule-match-token* next-token,
- f context get (>>end)
- context get (>>in-rule) ;
+ [ rule-match-token* next-token, ] keep
+ f context get end<<
+ context get in-rule<< ;
M: mark-following-rule handle-rule-end
nip rule-match-token* prev-token,
- f context get (>>in-rule) ;
+ f context get in-rule<< ;
M: mark-previous-rule handle-rule-start
?end-rule
: check-end-delegate ( -- ? )
context get parent>> [
in-rule>> [
- dup rule-end-matches? dup [
+ dup rule-end-matches? [
[
swap handle-rule-end
?end-rule
rule-match-token* next-token,
pop-context
seen-whitespace-end? on t
- ] [ drop check-escape-rule ] if
+ ] [ check-escape-rule ] if*
] [ f ] if*
] [ f ] if* ;
: (check-word-break) ( -- )
check-rule
-
+
1 current-rule-set default>> next-token, ;
: rule-set-empty? ( ruleset -- ? )
[ rules>> ] [ keywords>> ] bi
- [ assoc-empty? ] bi@ and ;
+ [ assoc-empty? ] both? ;
: check-word-break ( -- ? )
current-char dup blank? [
drop
seen-whitespace-end? get [
- position get 1+ whitespace-end set
+ position get 1 + whitespace-end set
] unless
(check-word-break)
: tokenize-line ( line-context line rules -- line-context' seq )
[
- "MAIN" swap at -rot
+ "MAIN" of -rot
init-token-marker
mark-token-loop
mark-remaining