class-transition add-simple-entry ;
M: capture-group nfa-node ( node -- )
- term>> nfa-node ;
+ eps literal-transition add-simple-entry
+ capture-group-on add-traversal-flag
+ term>> nfa-node
+ eps literal-transition add-simple-entry
+ capture-group-off add-traversal-flag
+ 2 [ concatenate-nodes ] times ;
! xyzzy
M: non-capture-group nfa-node ( node -- )
[ { 0 1 } ] [ "ab" "a(?=b)(?=b)" <regexp> first-match ] unit-test
[ { 1 2 } ] [ "ba" "a(?<=b)(?<=b)" <regexp> first-match ] unit-test
+[ { 1 2 } ] [ "cab" "a(?=b)(?<=c)" <regexp> first-match ] unit-test
+! capture group 1: "aaaa" 2: ""
+! "aaaa" "(a*)(a*)" <regexp> match*
+! "aaaa" "(a*)(a+)" <regexp> match*
-[ { 1 2 } ] [ "cab" "a(?=b)(?<=c)" <regexp> first-match ] unit-test
+[ { 0 2 } ] [ "ab" "(a|ab)(bc)?" <regexp> first-match ] unit-test
+[ { 0 3 } ] [ "abc" "(a|ab)(bc)?" <regexp> first-match ] unit-test
+
+[ { 0 2 } ] [ "ab" "(ab|a)(bc)?" <regexp> first-match ] unit-test
+[ { 0 3 } ] [ "abc" "(ab|a)(bc)?" <regexp> first-match ] unit-test
+
+[ { 23 24 } ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> first-match ] unit-test
: match ( string regexp -- pair )
<dfa-traverser> do-match return-match ;
+: match* ( string regexp -- pair )
+ <dfa-traverser> do-match [ return-match ] [ captured-groups>> ] bi ;
+
: matches? ( string regexp -- ? )
dupd match
[ [ length ] [ length>> 1- ] bi* = ] [ drop f ] if* ;
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs combinators kernel math math.ranges
-quotations sequences regexp.parser regexp.classes fry
+quotations sequences regexp.parser regexp.classes fry arrays
combinators.short-circuit regexp.utils prettyprint regexp.nfa ;
IN: regexp.traversal
dfa-table
traversal-flags
traverse-forward
- capture-groups
- { capture-group-index integer }
lookahead-counters
lookbehind-counters
+ capture-counters
+ captured-groups
+ capture-group-index
last-state current-state
text
start-index current-index
t >>traverse-forward
0 >>start-index
0 >>current-index
+ 0 >>capture-group-index
V{ } clone >>matches
- V{ } clone >>capture-groups
+ V{ } clone >>capture-counters
V{ } clone >>lookbehind-counters
- V{ } clone >>lookahead-counters ;
+ V{ } clone >>lookahead-counters
+ H{ } clone >>captured-groups ;
: final-state? ( dfa-traverser -- ? )
[ current-state>> ] [ dfa-table>> final-states>> ] bi
dup lookbehind-counters>>
[ drop ] [ pop '[ _ + 2 + ] change-current-index drop ] if-empty ;
+M: capture-group-on flag-action ( dfa-traverser flag -- )
+ drop
+ [ current-index>> 0 2array ]
+ [ capture-counters>> ] bi push ;
+
+M: capture-group-off flag-action ( dfa-traverser flag -- )
+ drop
+ dup capture-counters>> empty? [
+ drop
+ ] [
+ {
+ [ capture-counters>> pop first2 dupd + ]
+ [ text>> <slice> ]
+ [ [ 1+ ] change-capture-group-index capture-group-index>> ]
+ [ captured-groups>> set-at ]
+ } cleave
+ ] if ;
+
: process-flags ( dfa-traverser -- )
[ [ 1+ ] map ] change-lookahead-counters
[ [ 1+ ] map ] change-lookbehind-counters
+ [ [ first2 1+ 2array ] map ] change-capture-counters
! dup current-state>> .
dup [ current-state>> ] [ traversal-flags>> ] bi
at [ dup . flag-action ] with each ;