X-Git-Url: https://gitweb.factorcode.org/gitweb.cgi?p=factor.git;a=blobdiff_plain;f=basis%2Fxmode%2Fmarker%2Fmarker.factor;h=4ecfa9b7963cd0fbde4d8e869d8d2e5398f48c73;hp=e106af79526eb2209de8a46fe9cbeb419df22c04;hb=528de2849f6e31f45750b5276bec49123df03f58;hpb=9a015f56ac9ebced309a739334524bef7a346116 diff --git a/basis/xmode/marker/marker.factor b/basis/xmode/marker/marker.factor old mode 100755 new mode 100644 index e106af7952..4ecfa9b796 --- a/basis/xmode/marker/marker.factor +++ b/basis/xmode/marker/marker.factor @@ -1,12 +1,12 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. + +USING: accessors ascii assocs combinators +combinators.short-circuit formatting kernel make math namespaces +regexp regexp.parser sequences splitting strings +xmode.marker.state xmode.rules xmode.tokens xmode.utilities ; + IN: xmode.marker -USING: kernel namespaces make xmode.rules xmode.tokens -xmode.marker.state xmode.marker.context xmode.utilities -xmode.catalog sequences math assocs combinators strings -regexp splitting ascii regexp.backend unicode.case -ascii combinators.short-circuit accessors ; -! regexp.backend is for the regexp class ! Next two words copied from parser-combinators ! Just like head?, but they optionally ignore case @@ -35,7 +35,7 @@ ascii combinators.short-circuit accessors ; [ dup [ digit? ] all? [ current-rule-set digit-re>> - dup [ dupd matches? ] [ drop f ] if + [ dupd matches? ] [ f ] if* ] unless* ] } 0&& nip ; @@ -74,31 +74,93 @@ M: rule match-position drop position get ; : rest-of-line ( -- str ) line get position get tail-slice ; +: match-start ( string regexp -- slice/f ) + first-match dup [ dup from>> 0 = [ drop f ] unless ] when ; + GENERIC: text-matches? ( string text -- match-count/f ) M: f text-matches? 2drop f ; M: string-matcher text-matches? - [ - [ string>> ] [ ignore-case?>> ] bi string-head? - ] keep string>> length and ; + [ string>> ] [ ignore-case?>> ] bi + [ string-head? ] keepd length and ; M: regexp text-matches? - [ >string ] dip match-head ; + [ >string ] dip match-start dup [ to>> ] when ; + +> [ CHAR: ( = ] count [ f ] when-zero ; + +: nth-index ( n obj seq -- i ) + [ = dup [ drop 1 - dup 0 < ] when ] with find drop nip ; + +: match-group-regexp ( regexp n -- skip-regexp match-regexp ) + [ [ options>> options>string ] [ raw>> ] bi ] dip + CHAR: ( pick nth-index cut CHAR: ) over index 1 + head + rot '[ H{ } [ _ ] cache ] bi@ ; + +: skip-first-match ( match regexp -- tailseq ) + first-match [ seq>> ] [ to>> ] bi tail ; + +: nth-match ( match regexp n -- slice/f ) + match-group-regexp [ skip-first-match ] [ first-match ] bi* ; + +: update-match-group ( str match regexp n -- str' ) + [ nth-match ] [ CHAR: 1 + "$%c" sprintf ] bi swap replace ; + +: update-match-groups ( str match regexp -- str' ) + [ >string ] dip + dup #match-groups [ update-match-group ] 2with each-integer ; + +GENERIC: fixup-end ( match regexp end -- end' ) + +M: string-matcher fixup-end + [ string>> -rot update-match-groups ] + [ ignore-case?>> ] bi ; + +MEMO: ( raw matched options -- regexp ) + { + [ parse-tree>> ] [ options>> ] [ dfa>> ] [ next-match>> ] + } cleave regexp boa ; -: rule-start-matches? ( rule -- match-count/f ) - dup start>> tuck swap can-match-here? [ - rest-of-line swap text>> text-matches? +M: regexp fixup-end + [ raw>> [ -rot update-match-groups ] keep swap ] + [ options>> options>string ] bi ; + +: fixup-end? ( text -- ? ) + { [ regexp? ] [ #match-groups ] } 1&& ; + +: fixup-end/text-matches? ( string regexp rule -- match-count/f ) + [ >string ] 2dip [ [ match-start dup ] keep ] dip pick [ + end>> [ [ fixup-end ] change-text drop ] [ 2drop ] if* + ] [ + 3drop + ] if dup [ to>> ] when ; + +PRIVATE> + +:: rule-start-matches? ( rule -- match-count/f ) + rule start>> dup rule can-match-here? [ + rest-of-line swap text>> + dup fixup-end? [ + rule fixup-end/text-matches? + ] [ + text-matches? + ] if ] [ drop f ] if ; : rule-end-matches? ( rule -- match-count/f ) dup mark-following-rule? [ - dup start>> swap can-match-here? 0 and + [ start>> ] keep can-match-here? 0 and ] [ - dup end>> tuck swap can-match-here? [ + [ end>> dup ] keep can-match-here? [ rest-of-line swap text>> context get end>> or text-matches? @@ -130,27 +192,26 @@ GENERIC: handle-rule-end ( match-count rule -- ) ] ?if ; : check-escape-rule ( rule -- ? ) - no-escape?>> [ f ] [ - find-escape-rule dup [ - dup rule-start-matches? dup [ - swap handle-rule-start - delegate-end-escaped? [ not ] change - t - ] [ - 2drop f - ] if - ] when - ] if ; + escape-rule>> [ find-escape-rule ] unless* + dup [ + dup rule-start-matches? [ + swap handle-rule-start + delegate-end-escaped? toggle + t + ] [ + drop f + ] if* + ] when ; : check-every-rule ( -- ? ) current-char current-rule-set get-rules [ rule-start-matches? ] map-find - dup [ handle-rule-start t ] [ 2drop f ] if ; + [ handle-rule-start t ] [ drop f ] if* ; : ?end-rule ( -- ) current-rule [ dup rule-end-matches? - dup [ swap handle-rule-end ] [ 2drop ] if + [ swap handle-rule-end ] [ drop ] if* ] when* ; : rule-match-token* ( rule -- id ) @@ -164,7 +225,7 @@ M: escape-rule handle-rule-start drop ?end-rule process-escape? get [ - escaped? [ not ] change + escaped? toggle position [ + ] change ] [ drop ] if ; @@ -172,7 +233,7 @@ M: seq-rule handle-rule-start ?end-rule mark-token add-remaining-token - tuck body-token>> next-token, + [ body-token>> next-token, ] keep delegate>> [ push-context ] when* ; UNION: abstract-span-rule span-rule eol-span-rule ; @@ -181,9 +242,9 @@ M: abstract-span-rule handle-rule-start ?end-rule mark-token add-remaining-token - tuck rule-match-token* next-token, + [ rule-match-token* next-token, ] keep ! ... end subst ... - dup context get (>>in-rule) + dup context get in-rule<< delegate>> push-context ; M: span-rule handle-rule-end @@ -192,13 +253,13 @@ M: span-rule handle-rule-end M: mark-following-rule handle-rule-start ?end-rule mark-token add-remaining-token - tuck rule-match-token* next-token, - f context get (>>end) - context get (>>in-rule) ; + [ rule-match-token* next-token, ] keep + f context get end<< + context get in-rule<< ; M: mark-following-rule handle-rule-end nip rule-match-token* prev-token, - f context get (>>in-rule) ; + f context get in-rule<< ; M: mark-previous-rule handle-rule-start ?end-rule @@ -215,7 +276,7 @@ M: mark-previous-rule handle-rule-start : check-end-delegate ( -- ? ) context get parent>> [ in-rule>> [ - dup rule-end-matches? dup [ + dup rule-end-matches? [ [ swap handle-rule-end ?end-rule @@ -225,7 +286,7 @@ M: mark-previous-rule handle-rule-start rule-match-token* next-token, pop-context seen-whitespace-end? on t - ] [ drop check-escape-rule ] if + ] [ check-escape-rule ] if* ] [ f ] if* ] [ f ] if* ; @@ -247,19 +308,19 @@ M: mark-previous-rule handle-rule-start : (check-word-break) ( -- ) check-rule - + 1 current-rule-set default>> next-token, ; : rule-set-empty? ( ruleset -- ? ) [ rules>> ] [ keywords>> ] bi - [ assoc-empty? ] bi@ and ; + [ assoc-empty? ] both? ; : check-word-break ( -- ? ) current-char dup blank? [ drop seen-whitespace-end? get [ - position get 1+ whitespace-end set + position get 1 + whitespace-end set ] unless (check-word-break) @@ -313,7 +374,7 @@ M: mark-previous-rule handle-rule-start : tokenize-line ( line-context line rules -- line-context' seq ) [ - "MAIN" swap at -rot + "MAIN" of -rot init-token-marker mark-token-loop mark-remaining