! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: accessors combinators kernel math sequences strings sets assocs prettyprint.backend prettyprint.custom make lexer namespaces parser arrays fry locals regexp.minimize regexp.parser regexp.nfa regexp.dfa regexp.traversal regexp.transition-tables splitting sorting regexp.ast regexp.negation ; IN: regexp TUPLE: regexp { raw read-only } { parse-tree read-only } { options read-only } dfa ; : make-regexp ( string ast -- regexp ) f f f regexp boa ; foldable ! Foldable because, when the dfa slot is set, ! it'll be set to the same thing regardless of who sets it : ( string options -- regexp ) [ dup parse-regexp ] [ string>options ] bi* f regexp boa ; : ( string -- regexp ) "" ; > [ dup [ parse-tree>> ] [ options>> ] bi ast>dfa >>dfa ] unless ; : (match) ( string regexp -- dfa-traverser ) compile-regexp dfa>> do-match ; inline PRIVATE> : match ( string regexp -- slice/f ) (match) return-match ; : matches? ( string regexp -- ? ) dupd match [ [ length ] bi@ = ] [ drop f ] if* ; : match-head ( string regexp -- end/f ) match [ length ] [ f ] if* ; : match-at ( string m regexp -- n/f finished? ) [ 2dup swap length > [ 2drop f f ] [ tail-slice t ] if ] dip swap [ match-head f ] [ 2drop f t ] if ; : match-range ( string m regexp -- a/f b/f ) 3dup match-at over [ drop nip rot drop dupd + ] [ [ 3drop drop f f ] [ drop [ 1+ ] dip match-range ] if ] if ; : first-match ( string regexp -- slice/f ) dupd 0 swap match-range rot over [ ] [ 3drop f ] if ; : re-cut ( string regexp -- end/f start ) dupd first-match [ split1-slice swap ] [ "" like f swap ] if* ; : re-split ( string regexp -- seq ) [ (re-split) ] { } make ; : re-replace ( string regexp replacement -- result ) [ re-split ] dip join ; : next-match ( string regexp -- end/f match/f ) dupd first-match dup [ [ split1-slice nip ] keep ] [ 2drop f f ] if ; : all-matches ( string regexp -- seq ) [ dup ] swap '[ _ next-match ] [ ] produce nip harvest ; : count-matches ( string regexp -- n ) all-matches length ; compile-regexp parsed ; PRIVATE> : R! CHAR: ! parsing-regexp ; parsing : R" CHAR: " parsing-regexp ; parsing : R# CHAR: # parsing-regexp ; parsing : R' CHAR: ' parsing-regexp ; parsing : R( CHAR: ) parsing-regexp ; parsing : R/ CHAR: / parsing-regexp ; parsing : R@ CHAR: @ parsing-regexp ; parsing : R[ CHAR: ] parsing-regexp ; parsing : R` CHAR: ` parsing-regexp ; parsing : R{ CHAR: } parsing-regexp ; parsing : R| CHAR: | parsing-regexp ; parsing M: regexp pprint* [ [ [ raw>> dup find-regexp-syntax swap % swap % % ] [ options>> options>string % ] bi ] "" make ] keep present-text ;