X-Git-Url: https://gitweb.factorcode.org/gitweb.cgi?p=factor.git;a=blobdiff_plain;f=basis%2Fregexp%2Fregexp.factor;h=42138d5c66e7792b109f2b1651837fa6e02e496f;hp=63a2f25885b06308da29e7cdc932fef9fe362089;hb=2f59f72ed58809c266ad1d2a777be7285d246a06;hpb=58d997de5c9053ff6d987d3ac2f4f081e747dc74 diff --git a/basis/regexp/regexp.factor b/basis/regexp/regexp.factor index 63a2f25885..42138d5c66 100644 --- a/basis/regexp/regexp.factor +++ b/basis/regexp/regexp.factor @@ -1,10 +1,10 @@ ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors combinators kernel kernel.private math sequences -sequences.private strings sets assocs prettyprint.backend -prettyprint.custom make lexer namespaces parser arrays fry locals -regexp.parser splitting sorting regexp.ast regexp.negation -regexp.compiler compiler.units words call call.private math.ranges ; +USING: accessors arrays classes compiler.units kernel +kernel.private lexer make math ranges namespaces quotations +regexp.ast regexp.compiler regexp.negation regexp.parser +sequences sequences.private splitting strings vocabs.loader +words ; IN: regexp TUPLE: regexp @@ -17,88 +17,102 @@ TUPLE: reverse-regexp < regexp ; quot ! Returns ( index string -- ? ) - term>> ast>dfa dfa>shortest-word '[ f _ execute ] ; +M: lookahead question>quot + ! Returns ( index string -- ? ) + term>> ast>dfa dfa>shortest-word 1quotation [ f ] prepose ; : ( ast -- reversed ) "r" string>options ; -M: lookbehind question>quot ! Returns ( index string -- ? ) +M: lookbehind question>quot + ! Returns ( index string -- ? ) term>> ast>dfa dfa>reverse-shortest-word - '[ [ 1- ] dip f _ execute ] ; - -: check-string ( string -- string ) - ! Make this configurable - dup string? [ "String required" throw ] unless ; + 1quotation [ [ 1 - ] dip f ] prepose ; : match-index-from ( i string regexp -- index/f ) ! This word is unsafe. It assumes that i is a fixnum ! and that string is a string. - dup dfa>> execute-unsafe( index string regexp -- i/f ) ; inline + dup dfa>> execute( index string regexp -- i/f ) ; inline GENERIC: end/start ( string regexp -- end start ) M: regexp end/start drop length 0 ; -M: reverse-regexp end/start drop length 1- -1 swap ; +M: reverse-regexp end/start drop length 1 - -1 swap ; PRIVATE> : matches? ( string regexp -- ? ) - [ check-string ] dip + [ string check-instance ] dip [ end/start ] 2keep match-index-from [ = ] [ drop f ] if* ; > - execute-unsafe( i string regexp -- i start end ? ) ; inline - -:: (each-match) ( i string regexp quot: ( start end string -- ) -- ) - i string regexp do-next-match [| i' start end | - start end string quot call - i' string regexp quot (each-match) - ] [ 3drop ] if ; inline recursive + execute( i string regexp -- start end ? ) ; inline + +:: (each-match-forward) ( ... i string regexp quot: ( ... start end string -- ... ) -- ... ) + i string length <= [ + i string regexp do-next-match [| start end | + start end string quot call + start end eq? [ end 1 + ] [ end ] if + string regexp quot (each-match-forward) + ] [ 2drop ] if + ] when ; inline recursive + +:: (each-match-backward) ( ... i string regexp quot: ( ... start end string -- ... ) -- ... ) + i -1 >= [ + i string regexp do-next-match [| start end | + start 1 + end 1 + string quot call + start end eq? [ start 1 - ] [ start ] if + string regexp quot (each-match-backward) + ] [ 2drop ] if + ] when ; inline recursive + +: (each-match) ( ... i string regexp quot: ( ... start end string -- ... ) -- ... ) + over reverse-regexp? [ (each-match-backward) ] [ (each-match-forward) ] if ; inline + +GENERIC: match-iterator-start ( string regexp -- start ) +M: regexp match-iterator-start 2drop 0 ; +M: reverse-regexp match-iterator-start drop length ; : prepare-match-iterator ( string regexp -- i string regexp ) - [ check-string ] dip [ end/start nip ] 2keep ; inline + [ string check-instance ] dip [ match-iterator-start ] 2keep ; inline PRIVATE> -: each-match ( string regexp quot: ( start end string -- ) -- ) +: each-match ( ... string regexp quot: ( ... start end string -- ... ) -- ... ) [ prepare-match-iterator ] dip (each-match) ; inline -: map-matches ( string regexp quot: ( start end string -- obj ) -- seq ) - accumulator [ each-match ] dip >array ; inline +: map-matches ( ... string regexp quot: ( ... start end string -- ... obj ) -- ... seq ) + collector [ each-match ] dip >array ; inline : all-matching-slices ( string regexp -- seq ) - [ slice boa ] map-matches ; + [ ] map-matches ; : all-matching-subseqs ( string regexp -- seq ) [ subseq ] map-matches ; : count-matches ( string regexp -- n ) - [ 0 ] 2dip [ 3drop 1+ ] each-match ; + [ 0 ] 2dip [ 3drop 1 + ] each-match ; PRIVATE> -: first-match ( string regexp -- slice/f ) - [ prepare-match-iterator do-next-match ] [ drop ] 2bi - '[ _ slice boa nip ] [ 3drop f ] if ; +:: first-match ( string regexp -- slice/f ) + string regexp prepare-match-iterator do-next-match [ + regexp reverse-regexp? [ [ 1 + ] bi@ ] when + string + ] [ 2drop f ] if ; : re-contains? ( string regexp -- ? ) - prepare-match-iterator do-next-match [ 3drop ] dip >boolean ; + prepare-match-iterator do-next-match 2nip >boolean ; : re-split ( string regexp -- seq ) - [ slice boa ] (re-split) ; + [ ] (re-split) ; : re-replace ( string regexp replacement -- result ) [ [ subseq ] (re-split) ] dip join ; +:: re-replace-with ( string regexp quot: ( slice -- replacement ) -- result ) + [ + 0 string regexp [ + drop [ [ string , ] keep ] dip + [ string quot call( x -- x ) , ] keep + ] each-match string [ length ] [ ] bi , + ] { } make concat ; + dfa dfa>word ] when ] change-dfa ; -M: reverse-regexp compile-regexp ( regexp -- regexp ) +M: reverse-regexp compile-regexp t backwards? [ call-next-method ] with-variable ; DEFER: compile-next-match -: next-initial-word ( i string regexp -- i start end string ) +: next-initial-word ( i string regexp -- start end string ) [ compile-next-match ] with-compilation-unit do-next-match ; : compile-next-match ( regexp -- regexp ) @@ -150,7 +174,7 @@ DEFER: compile-next-match dup \ next-initial-word = [ drop _ [ compile-regexp dfa>> def>> ] [ reverse-regexp? ] bi '[ { array-capacity string regexp } declare _ _ next-match ] - (( i string regexp -- i start end string )) define-temp + ( i string regexp -- start end string ) define-temp ] when ] change-next-match ; @@ -172,56 +196,27 @@ PRIVATE> compile-next-match parsed ; +: parse-regexp ( accum -- accum ) + lexer get [ take-until "\\/" "/" replace ] [ parse-noblank-token ] bi + compile-next-match suffix! ; PRIVATE> -: R! CHAR: ! parsing-regexp ; parsing -: R" CHAR: " parsing-regexp ; parsing -: R# CHAR: # parsing-regexp ; parsing -: R' CHAR: ' parsing-regexp ; parsing -: R( CHAR: ) parsing-regexp ; parsing -: R/ CHAR: / parsing-regexp ; parsing -: R@ CHAR: @ parsing-regexp ; parsing -: R[ CHAR: ] parsing-regexp ; parsing -: R` CHAR: ` parsing-regexp ; parsing -: R{ CHAR: } parsing-regexp ; parsing -: R| CHAR: | parsing-regexp ; parsing - -M: regexp pprint* - [ - [ - [ raw>> dup find-regexp-syntax swap % swap % % ] - [ options>> options>string % ] bi - ] "" make - ] keep present-text ; +SYNTAX: R/ parse-regexp ; + +{ "prettyprint" "regexp" } "regexp.prettyprint" require-when