--- /dev/null
- [ dfa-table>> transitions>> keys ]
+ ! Copyright (C) 2008 Doug Coleman.
+ ! See http://factorcode.org/license.txt for BSD license.
+ USING: accessors arrays assocs combinators fry kernel locals
+ math math.order regexp.nfa regexp.transition-tables sequences
+ sets sorting vectors regexp.utils sequences.deep ;
+ USING: io prettyprint threads ;
+ IN: regexp.dfa
+
+ : find-delta ( states transition regexp -- new-states )
+ nfa-table>> transitions>>
+ rot [ swap at at ] with with gather sift ;
+
+ : (find-epsilon-closure) ( states regexp -- new-states )
+ eps swap find-delta ;
+
+ : find-epsilon-closure ( states regexp -- new-states )
+ '[ dup _ (find-epsilon-closure) union ] [ length ] while-changes
+ natural-sort ;
+
+ : find-closure ( states transition regexp -- new-states )
+ [ find-delta ] 2keep nip find-epsilon-closure ;
+
+ : find-start-state ( regexp -- state )
+ [ nfa-table>> start-state>> 1vector ] keep find-epsilon-closure ;
+
+ : find-transitions ( seq1 regexp -- seq2 )
+ nfa-table>> transitions>>
+ [ at keys ] curry map concat
+ eps swap remove ;
+
+ : add-todo-state ( state regexp -- )
+ 2dup visited-states>> key? [
+ 2drop
+ ] [
+ [ visited-states>> conjoin ]
+ [ new-states>> push ] 2bi
+ ] if ;
+
+ : new-transitions ( regexp -- )
+ dup new-states>> [
+ drop
+ ] [
+ dupd pop dup pick find-transitions rot
+ [
+ [ [ find-closure ] 2keep nip dupd add-todo-state ] 3keep
+ >r swapd transition make-transition r> dfa-table>> add-transition
+ ] curry with each
+ new-transitions
+ ] if-empty ;
+
+ : states ( hashtable -- array )
+ [ keys ]
+ [ values [ values concat ] map concat append ] bi ;
+
+ : set-final-states ( regexp -- )
+ dup
+ [ nfa-table>> final-states>> keys ]
+ [ dfa-table>> transitions>> states ] bi
+ [ intersect empty? not ] with filter
+
+ swap dfa-table>> final-states>>
+ [ conjoin ] curry each ;
+
+ : set-initial-state ( regexp -- )
+ dup
+ [ dfa-table>> ] [ find-start-state ] bi
+ [ >>start-state drop ] keep
+ 1vector >>new-states drop ;
+
+ : set-traversal-flags ( regexp -- )
- bi 2drop ;
++ dup
+ [ nfa-traversal-flags>> ]
- [ set-initial-state ]
- [ new-transitions ]
- [ set-final-states ] tri ;
- ! [ set-traversal-flags ] quad ;
++ [ dfa-table>> transitions>> keys ] bi
++ [ tuck [ swap at ] with map concat ] with H{ } map>assoc
++ >>dfa-traversal-flags drop ;
+
+ : construct-dfa ( regexp -- )
++ {
++ [ set-initial-state ]
++ [ new-transitions ]
++ [ set-final-states ]
++ [ set-traversal-flags ]
++ } cleave ;
--- /dev/null
-! [ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head ] unit-test
-! [ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
+ USING: regexp tools.test kernel sequences regexp.parser
+ regexp.traversal eval ;
+ IN: regexp-tests
+
+ [ f ] [ "b" "a*" <regexp> matches? ] unit-test
+ [ t ] [ "" "a*" <regexp> matches? ] unit-test
+ [ t ] [ "a" "a*" <regexp> matches? ] unit-test
+ [ t ] [ "aaaaaaa" "a*" <regexp> matches? ] unit-test
+ [ f ] [ "ab" "a*" <regexp> matches? ] unit-test
+
+ [ t ] [ "abc" "abc" <regexp> matches? ] unit-test
+ [ t ] [ "a" "a|b|c" <regexp> matches? ] unit-test
+ [ t ] [ "b" "a|b|c" <regexp> matches? ] unit-test
+ [ t ] [ "c" "a|b|c" <regexp> matches? ] unit-test
+ [ f ] [ "c" "d|e|f" <regexp> matches? ] unit-test
+
+ [ t ] [ "b" "|b" <regexp> matches? ] unit-test
+ [ t ] [ "b" "b|" <regexp> matches? ] unit-test
+ [ t ] [ "" "b|" <regexp> matches? ] unit-test
+ [ t ] [ "" "b|" <regexp> matches? ] unit-test
+ [ f ] [ "" "|" <regexp> matches? ] unit-test
+ [ f ] [ "" "|||||||" <regexp> matches? ] unit-test
+
+ [ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
+ [ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
+ [ f ] [ "cc" "a|b|c" <regexp> matches? ] unit-test
+ [ f ] [ "cc" "d|e|f" <regexp> matches? ] unit-test
+
+ [ f ] [ "" "a+" <regexp> matches? ] unit-test
+ [ t ] [ "a" "a+" <regexp> matches? ] unit-test
+ [ t ] [ "aa" "a+" <regexp> matches? ] unit-test
+
+ [ t ] [ "" "a?" <regexp> matches? ] unit-test
+ [ t ] [ "a" "a?" <regexp> matches? ] unit-test
+ [ f ] [ "aa" "a?" <regexp> matches? ] unit-test
+
+ [ f ] [ "" "." <regexp> matches? ] unit-test
+ [ t ] [ "a" "." <regexp> matches? ] unit-test
+ [ t ] [ "." "." <regexp> matches? ] unit-test
+ ! [ f ] [ "\n" "." <regexp> matches? ] unit-test
+
+ [ f ] [ "" ".+" <regexp> matches? ] unit-test
+ [ t ] [ "a" ".+" <regexp> matches? ] unit-test
+ [ t ] [ "ab" ".+" <regexp> matches? ] unit-test
+
+
+ [ t ] [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
+ [ t ] [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
+ [ t ] [ "c" "a|b*|c+|d?" <regexp> matches? ] unit-test
+ [ t ] [ "cc" "a|b*|c+|d?" <regexp> matches? ] unit-test
+ [ f ] [ "ccd" "a|b*|c+|d?" <regexp> matches? ] unit-test
+ [ t ] [ "d" "a|b*|c+|d?" <regexp> matches? ] unit-test
+
+ [ t ] [ "foo" "foo|bar" <regexp> matches? ] unit-test
+ [ t ] [ "bar" "foo|bar" <regexp> matches? ] unit-test
+ [ f ] [ "foobar" "foo|bar" <regexp> matches? ] unit-test
+
+ [ f ] [ "" "(a)" <regexp> matches? ] unit-test
+ [ t ] [ "a" "(a)" <regexp> matches? ] unit-test
+ [ f ] [ "aa" "(a)" <regexp> matches? ] unit-test
+ [ t ] [ "aa" "(a*)" <regexp> matches? ] unit-test
+
+ [ f ] [ "aababaaabbac" "(a|b)+" <regexp> matches? ] unit-test
+ [ t ] [ "ababaaabba" "(a|b)+" <regexp> matches? ] unit-test
+
+ [ f ] [ "" "a{1}" <regexp> matches? ] unit-test
+ [ t ] [ "a" "a{1}" <regexp> matches? ] unit-test
+ [ f ] [ "aa" "a{1}" <regexp> matches? ] unit-test
+
+ [ f ] [ "a" "a{2,}" <regexp> matches? ] unit-test
+ [ t ] [ "aaa" "a{2,}" <regexp> matches? ] unit-test
+ [ t ] [ "aaaa" "a{2,}" <regexp> matches? ] unit-test
+ [ t ] [ "aaaaa" "a{2,}" <regexp> matches? ] unit-test
+
+ [ t ] [ "" "a{,2}" <regexp> matches? ] unit-test
+ [ t ] [ "a" "a{,2}" <regexp> matches? ] unit-test
+ [ t ] [ "aa" "a{,2}" <regexp> matches? ] unit-test
+ [ f ] [ "aaa" "a{,2}" <regexp> matches? ] unit-test
+ [ f ] [ "aaaa" "a{,2}" <regexp> matches? ] unit-test
+ [ f ] [ "aaaaa" "a{,2}" <regexp> matches? ] unit-test
+
+ [ f ] [ "" "a{1,3}" <regexp> matches? ] unit-test
+ [ t ] [ "a" "a{1,3}" <regexp> matches? ] unit-test
+ [ t ] [ "aa" "a{1,3}" <regexp> matches? ] unit-test
+ [ t ] [ "aaa" "a{1,3}" <regexp> matches? ] unit-test
+ [ f ] [ "aaaa" "a{1,3}" <regexp> matches? ] unit-test
+
+ [ f ] [ "" "[a]" <regexp> matches? ] unit-test
+ [ t ] [ "a" "[a]" <regexp> matches? ] unit-test
+ [ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
+ [ f ] [ "b" "[a]" <regexp> matches? ] unit-test
+ [ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
+ [ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
+ [ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
+
+ [ f ] [ "" "[^a]" <regexp> matches? ] unit-test
+ [ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
+ [ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
+ [ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
+ [ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
+ [ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
+ [ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
+
+ [ t ] [ "]" "[]]" <regexp> matches? ] unit-test
+ [ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
+ [ t ] [ "a" "[^]]" <regexp> matches? ] unit-test
+
+ [ "^" "[^]" <regexp> matches? ] must-fail
+ [ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
+ [ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
+
+ [ t ] [ "[" "[[]" <regexp> matches? ] unit-test
+ [ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
+ [ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
+
+ [ t ] [ "-" "[-]" <regexp> matches? ] unit-test
+ [ f ] [ "a" "[-]" <regexp> matches? ] unit-test
+ [ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
+ [ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
+
+ [ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
+ [ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
+ [ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
+ [ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
+ [ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
+ [ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
+ [ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
+
+ [ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
+ [ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
+ [ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
+ [ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
+
+ [ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
+ [ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
+
+ [ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
+ [ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
+ [ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
+ [ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
+
+ [ t ] [ "0" "[\\d]" <regexp> matches? ] unit-test
+ [ f ] [ "a" "[\\d]" <regexp> matches? ] unit-test
+ [ f ] [ "0" "[^\\d]" <regexp> matches? ] unit-test
+ [ t ] [ "a" "[^\\d]" <regexp> matches? ] unit-test
+
+ [ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" <regexp> matches? ] unit-test
+ [ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" <regexp> matches? ] unit-test
+ [ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" <regexp> matches? ] unit-test
+
+ [ t ] [ "1000" "\\d{4,6}" <regexp> matches? ] unit-test
+ [ t ] [ "1000" "[0-9]{4,6}" <regexp> matches? ] unit-test
+
+ [ t ] [ "abc" "\\p{Lower}{3}" <regexp> matches? ] unit-test
+ [ f ] [ "ABC" "\\p{Lower}{3}" <regexp> matches? ] unit-test
+ [ t ] [ "ABC" "\\p{Upper}{3}" <regexp> matches? ] unit-test
+ [ f ] [ "abc" "\\p{Upper}{3}" <regexp> matches? ] unit-test
+ !
+ [ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
+ [ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
+
+ [ f ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
+ [ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
+ [ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
+ [ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
+ [ t ] [ "s" "\\Qs\\E" <regexp> matches? ] unit-test
+
+ [ t ] [ "S" "\\0123" <regexp> matches? ] unit-test
+ [ t ] [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
+ [ t ] [ "x" "\\x78" <regexp> matches? ] unit-test
+ [ f ] [ "y" "\\x78" <regexp> matches? ] unit-test
+ [ t ] [ "x" "\\u000078" <regexp> matches? ] unit-test
+ [ f ] [ "y" "\\u000078" <regexp> matches? ] unit-test
+
+ [ t ] [ "ab" "a+b" <regexp> matches? ] unit-test
+ [ f ] [ "b" "a+b" <regexp> matches? ] unit-test
+ [ t ] [ "aab" "a+b" <regexp> matches? ] unit-test
+ [ f ] [ "abb" "a+b" <regexp> matches? ] unit-test
+
+ [ t ] [ "abbbb" "ab*" <regexp> matches? ] unit-test
+ [ t ] [ "a" "ab*" <regexp> matches? ] unit-test
+ [ f ] [ "abab" "ab*" <regexp> matches? ] unit-test
+
+ [ f ] [ "x" "\\." <regexp> matches? ] unit-test
+ [ t ] [ "." "\\." <regexp> matches? ] unit-test
+
+ [ t ] [ "aaaab" "a+ab" <regexp> matches? ] unit-test
+ [ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
+ [ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
+
+ [ 3 ] [ "aaacb" "a*" <regexp> match-head ] unit-test
+ [ 2 ] [ "aaacb" "aa?" <regexp> match-head ] unit-test
+
+ [ t ] [ "aaa" "AAA" <iregexp> matches? ] unit-test
+ [ f ] [ "aax" "AAA" <iregexp> matches? ] unit-test
+ [ t ] [ "aaa" "A*" <iregexp> matches? ] unit-test
+ [ f ] [ "aaba" "A*" <iregexp> matches? ] unit-test
+ [ t ] [ "b" "[AB]" <iregexp> matches? ] unit-test
+ [ f ] [ "c" "[AB]" <iregexp> matches? ] unit-test
+ [ t ] [ "c" "[A-Z]" <iregexp> matches? ] unit-test
+ [ f ] [ "3" "[A-Z]" <iregexp> matches? ] unit-test
+
+ [ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
+ [ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
+ [ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
+ [ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
+
+ [ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
+ [ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
+ [ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
+ [ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
+
+ [ f ] [ "A" "[a-z]" <regexp> matches? ] unit-test
+ [ t ] [ "A" "[a-z]" <iregexp> matches? ] unit-test
+
+ [ f ] [ "A" "\\p{Lower}" <regexp> matches? ] unit-test
+ [ t ] [ "A" "\\p{Lower}" <iregexp> matches? ] unit-test
+
+ [ t ] [ "abc" <reversed> "abc" <rregexp> matches? ] unit-test
+ [ t ] [ "abc" <reversed> "a[bB][cC]" <rregexp> matches? ] unit-test
+ [ t ] [ "adcbe" "a(?r)(bcd)(?-r)e" <rregexp> matches? ] unit-test
+
+ [ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
+ [ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
+ [ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
+
+ [ t ] [ "abc*" "[^\\*]*\\*" <regexp> matches? ] unit-test
+ [ t ] [ "bca" "[^a]*a" <regexp> matches? ] unit-test
+
+ [ ] [
+ "(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
+ <regexp> drop
+ ] unit-test
+
+ [ ] [ "(\\$[\\p{XDigit}]|[\\p{Digit}])" <regexp> drop ] unit-test
+
+ ! Comment
+ [ t ] [ "ac" "a(?#boo)c" <regexp> matches? ] unit-test
+
+
+
+ ! [ "{Lower}" <regexp> ] [ invalid-range? ] must-fail-with
+
+ ! [ 1 ] [ "aaacb" "a+?" <regexp> match-head ] unit-test
+ ! [ 1 ] [ "aaacb" "aa??" <regexp> match-head ] unit-test
+ ! [ f ] [ "aaaab" "a++ab" <regexp> matches? ] unit-test
+ ! [ t ] [ "aaacb" "a++cb" <regexp> matches? ] unit-test
+ ! [ 3 ] [ "aacb" "aa?c" <regexp> match-head ] unit-test
+ ! [ 3 ] [ "aacb" "aa??c" <regexp> match-head ] unit-test
+
+ ! [ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
+ ! [ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
+
++[ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head ] unit-test
++[ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
+
+ ! [ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test
+ ! [ 3 ] [ "foo" "foo\\z" <regexp> match-head ] unit-test
+
+ ! [ 3 ] [ "foo bar" "foo\\b" <regexp> match-head ] unit-test
+ ! [ f ] [ "fooxbar" "foo\\b" <regexp> matches? ] unit-test
+ ! [ t ] [ "foo" "foo\\b" <regexp> matches? ] unit-test
+ ! [ t ] [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
+ ! [ f ] [ "fooxbar" "foo\\bxbar" <regexp> matches? ] unit-test
+ ! [ f ] [ "foo" "foo\\bbar" <regexp> matches? ] unit-test
+
+ ! [ f ] [ "foo bar" "foo\\B" <regexp> matches? ] unit-test
+ ! [ 3 ] [ "fooxbar" "foo\\B" <regexp> match-head ] unit-test
+ ! [ t ] [ "foo" "foo\\B" <regexp> matches? ] unit-test
+ ! [ f ] [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
+ ! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
+ ! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
+
+ [ ] [ "USING: regexp kernel ; R' -{3}[+]{1,6}(?:!!)?\\s' drop" eval ] unit-test
+
+ [ ] [ "USING: regexp kernel ; R' (ftp|http|https)://(\\w+:?\\w*@)?(\\S+)(:[0-9]+)?(/|/([\\w#!:.?+=&%@!\\-/]))?' drop" eval ] unit-test
+
+ [ ] [ "USING: regexp kernel ; R' \\*[^\s*][^*]*\\*' drop" eval ] unit-test
+
+ ! Bug in parsing word
+ ! [ t ] [ "a" R' a' matches? ] unit-test
+
+ ! ((A)(B(C)))
+ ! 1. ((A)(B(C)))
+ ! 2. (A)
+ ! 3. (B(C))
+ ! 4. (C)
--- /dev/null
-quotations sequences regexp.parser regexp.classes
-combinators.short-circuit regexp.utils ;
+ ! Copyright (C) 2008 Doug Coleman.
+ ! See http://factorcode.org/license.txt for BSD license.
+ USING: accessors assocs combinators kernel math math.ranges
- { lookahead-counter integer }
++quotations sequences regexp.parser regexp.classes fry
++combinators.short-circuit regexp.utils prettyprint regexp.nfa ;
+ IN: regexp.traversal
+
+ TUPLE: dfa-traverser
+ dfa-table
+ traversal-flags
+ capture-groups
+ { capture-group-index integer }
- V{ } clone >>capture-groups ;
++ lookahead-counters
+ last-state current-state
+ text
+ start-index current-index
+ matches ;
+
+ : <dfa-traverser> ( text regexp -- match )
+ [ dfa-table>> ] [ dfa-traversal-flags>> ] bi
+ dfa-traverser new
+ swap >>traversal-flags
+ swap [ start-state>> >>current-state ] keep
+ >>dfa-table
+ swap >>text
+ 0 >>start-index
+ 0 >>current-index
+ V{ } clone >>matches
-: print-flags ( dfa-traverser -- dfa-traverser )
++ V{ } clone >>capture-groups
++ V{ } clone >>lookahead-counters ;
+
+ : final-state? ( dfa-traverser -- ? )
+ [ current-state>> ] [ dfa-table>> final-states>> ] bi
+ key? ;
+
+ : text-finished? ( dfa-traverser -- ? )
+ [ current-index>> ] [ text>> length ] bi >= ;
+
+ : save-final-state ( dfa-straverser -- )
+ [ current-index>> ] [ matches>> ] bi push ;
+
+ : match-done? ( dfa-traverser -- ? )
+ dup final-state? [
+ dup save-final-state
+ ] when text-finished? ;
+
- ;
++GENERIC: flag-action ( dfa-traverser flag -- )
++
++M: lookahead-on flag-action ( dfa-traverser flag -- )
++ drop
++ lookahead-counters>> 0 swap push ;
++
++M: lookahead-off flag-action ( dfa-traverser flag -- )
++ drop
++ dup lookahead-counters>> pop
++ '[ _ - ] change-current-index drop ;
++
++: process-flags ( dfa-traverser -- )
++ [ [ 1+ ] map ] change-lookahead-counters
+ dup [ current-state>> ] [ traversal-flags>> ] bi
++ at [ dup . flag-action ] with each ;
+
+ : increment-state ( dfa-traverser state -- dfa-traverser )
+ [
+ [ 1+ ] change-current-index dup current-state>> >>last-state
+ ] dip
+ first >>current-state ;
+
+ : match-failed ( dfa-traverser -- dfa-traverser )
+ V{ } clone >>matches ;
+
+ : match-literal ( transition from-state table -- to-state/f )
+ transitions>> at* [ at ] [ 2drop f ] if ;
+
+ : match-class ( transition from-state table -- to-state/f )
+ transitions>> at* [
+ [ drop class-member? ] assoc-with assoc-find [ nip ] [ drop ] if
+ ] [ drop ] if ;
+
+ : match-default ( transition from-state table -- to-state/f )
+ [ nip ] dip transitions>> at*
+ [ t swap at* [ ] [ drop f ] if ] [ drop f ] if ;
+
+ : match-transition ( obj from-state dfa -- to-state/f )
+ { [ match-literal ] [ match-class ] [ match-default ] } 3|| ;
+
+ : setup-match ( match -- obj state dfa-table )
+ {
+ [ current-index>> ] [ text>> ]
+ [ current-state>> ] [ dfa-table>> ]
+ } cleave
+ [ nth ] 2dip ;
+
+ : do-match ( dfa-traverser -- dfa-traverser )
++ dup process-flags
+ dup match-done? [
+ dup setup-match match-transition
+ [ increment-state do-match ] when*
+ ] unless ;
+
+ : return-match ( dfa-traverser -- interval/f )
+ dup matches>>
+ [ drop f ]
+ [ [ start-index>> ] [ peek ] bi* 1 <range> ] if-empty ;