X-Git-Url: https://gitweb.factorcode.org/gitweb.cgi?p=factor.git;a=blobdiff_plain;f=basis%2Fregexp%2Fregexp.factor;h=42138d5c66e7792b109f2b1651837fa6e02e496f;hp=ba4aa47e7b87f7dcd26ff157cc5b86d4ff25501c;hb=2f59f72ed58809c266ad1d2a777be7285d246a06;hpb=478b960560eea138e29c8642cc1bff89f3e88597 diff --git a/basis/regexp/regexp.factor b/basis/regexp/regexp.factor index ba4aa47e7b..42138d5c66 100644 --- a/basis/regexp/regexp.factor +++ b/basis/regexp/regexp.factor @@ -1,9 +1,10 @@ ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors combinators kernel kernel.private math sequences -sequences.private strings sets assocs make lexer namespaces parser -arrays fry locals regexp.parser splitting sorting regexp.ast -regexp.negation regexp.compiler compiler.units words math.ranges ; +USING: accessors arrays classes compiler.units kernel +kernel.private lexer make math ranges namespaces quotations +regexp.ast regexp.compiler regexp.negation regexp.parser +sequences sequences.private splitting strings vocabs.loader +words ; IN: regexp TUPLE: regexp @@ -16,20 +17,18 @@ TUPLE: reverse-regexp < regexp ; quot ! Returns ( index string -- ? ) - term>> ast>dfa dfa>shortest-word '[ f _ execute ] ; +M: lookahead question>quot + ! Returns ( index string -- ? ) + term>> ast>dfa dfa>shortest-word 1quotation [ f ] prepose ; : ( ast -- reversed ) "r" string>options ; -M: lookbehind question>quot ! Returns ( index string -- ? ) +M: lookbehind question>quot + ! Returns ( index string -- ? ) term>> ast>dfa dfa>reverse-shortest-word - '[ [ 1 - ] dip f _ execute ] ; - -: check-string ( string -- string ) - ! Make this configurable - dup string? [ "String required" throw ] unless ; + 1quotation [ [ 1 - ] dip f ] prepose ; : match-index-from ( i string regexp -- index/f ) ! This word is unsafe. It assumes that i is a fixnum @@ -43,51 +42,67 @@ M: reverse-regexp end/start drop length 1 - -1 swap ; PRIVATE> : matches? ( string regexp -- ? ) - [ check-string ] dip + [ string check-instance ] dip [ end/start ] 2keep match-index-from [ = ] [ drop f ] if* ; > - execute( i string regexp -- i start end ? ) ; inline - -:: (each-match) ( i string regexp quot: ( start end string -- ) -- ) - i string regexp do-next-match [| i' start end | - start end string quot call - i' string regexp quot (each-match) - ] [ 3drop ] if ; inline recursive + execute( i string regexp -- start end ? ) ; inline + +:: (each-match-forward) ( ... i string regexp quot: ( ... start end string -- ... ) -- ... ) + i string length <= [ + i string regexp do-next-match [| start end | + start end string quot call + start end eq? [ end 1 + ] [ end ] if + string regexp quot (each-match-forward) + ] [ 2drop ] if + ] when ; inline recursive + +:: (each-match-backward) ( ... i string regexp quot: ( ... start end string -- ... ) -- ... ) + i -1 >= [ + i string regexp do-next-match [| start end | + start 1 + end 1 + string quot call + start end eq? [ start 1 - ] [ start ] if + string regexp quot (each-match-backward) + ] [ 2drop ] if + ] when ; inline recursive + +: (each-match) ( ... i string regexp quot: ( ... start end string -- ... ) -- ... ) + over reverse-regexp? [ (each-match-backward) ] [ (each-match-forward) ] if ; inline + +GENERIC: match-iterator-start ( string regexp -- start ) +M: regexp match-iterator-start 2drop 0 ; +M: reverse-regexp match-iterator-start drop length ; : prepare-match-iterator ( string regexp -- i string regexp ) - [ check-string ] dip [ end/start nip ] 2keep ; inline + [ string check-instance ] dip [ match-iterator-start ] 2keep ; inline PRIVATE> -: each-match ( string regexp quot: ( start end string -- ) -- ) +: each-match ( ... string regexp quot: ( ... start end string -- ... ) -- ... ) [ prepare-match-iterator ] dip (each-match) ; inline -: map-matches ( string regexp quot: ( start end string -- obj ) -- seq ) - accumulator [ each-match ] dip >array ; inline +: map-matches ( ... string regexp quot: ( ... start end string -- ... obj ) -- ... seq ) + collector [ each-match ] dip >array ; inline : all-matching-slices ( string regexp -- seq ) - [ slice boa ] map-matches ; + [ ] map-matches ; : all-matching-subseqs ( string regexp -- seq ) [ subseq ] map-matches ; @@ -97,7 +112,7 @@ PRIVATE> PRIVATE> -: first-match ( string regexp -- slice/f ) - [ prepare-match-iterator do-next-match ] [ drop ] 2bi - '[ _ slice boa nip ] [ 3drop f ] if ; +:: first-match ( string regexp -- slice/f ) + string regexp prepare-match-iterator do-next-match [ + regexp reverse-regexp? [ [ 1 + ] bi@ ] when + string + ] [ 2drop f ] if ; : re-contains? ( string regexp -- ? ) - prepare-match-iterator do-next-match [ 3drop ] dip >boolean ; + prepare-match-iterator do-next-match 2nip >boolean ; : re-split ( string regexp -- seq ) - [ slice boa ] (re-split) ; + [ ] (re-split) ; : re-replace ( string regexp replacement -- result ) [ [ subseq ] (re-split) ] dip join ; +:: re-replace-with ( string regexp quot: ( slice -- replacement ) -- result ) + [ + 0 string regexp [ + drop [ [ string , ] keep ] dip + [ string quot call( x -- x ) , ] keep + ] each-match string [ length ] [ ] bi , + ] { } make concat ; + dfa dfa>word ] when ] change-dfa ; -M: reverse-regexp compile-regexp ( regexp -- regexp ) +M: reverse-regexp compile-regexp t backwards? [ call-next-method ] with-variable ; DEFER: compile-next-match -: next-initial-word ( i string regexp -- i start end string ) +: next-initial-word ( i string regexp -- start end string ) [ compile-next-match ] with-compilation-unit do-next-match ; : compile-next-match ( regexp -- regexp ) @@ -149,7 +174,7 @@ DEFER: compile-next-match dup \ next-initial-word = [ drop _ [ compile-regexp dfa>> def>> ] [ reverse-regexp? ] bi '[ { array-capacity string regexp } declare _ _ next-match ] - (( i string regexp -- i start end string )) define-temp + ( i string regexp -- start end string ) define-temp ] when ] change-next-match ; @@ -171,53 +196,27 @@ PRIVATE> compile-next-match parsed ; +: parse-regexp ( accum -- accum ) + lexer get [ take-until "\\/" "/" replace ] [ parse-noblank-token ] bi + compile-next-match suffix! ; PRIVATE> -SYNTAX: R! CHAR: ! parsing-regexp ; -SYNTAX: R" CHAR: " parsing-regexp ; -SYNTAX: R# CHAR: # parsing-regexp ; -SYNTAX: R' CHAR: ' parsing-regexp ; -SYNTAX: R( CHAR: ) parsing-regexp ; -SYNTAX: R/ CHAR: / parsing-regexp ; -SYNTAX: R@ CHAR: @ parsing-regexp ; -SYNTAX: R[ CHAR: ] parsing-regexp ; -SYNTAX: R` CHAR: ` parsing-regexp ; -SYNTAX: R{ CHAR: } parsing-regexp ; -SYNTAX: R| CHAR: | parsing-regexp ; - -USING: vocabs vocabs.loader ; - -"prettyprint" vocab [ - "regexp.prettyprint" require -] when +SYNTAX: R/ parse-regexp ; + + +{ "prettyprint" "regexp" } "regexp.prettyprint" require-when