]> gitweb.factorcode.org Git - factor.git/commitdiff
first stab at capture groups. they work for unambiguous groups (no overlap), working...
authorDoug Coleman <doug.coleman@gmail.com>
Tue, 23 Sep 2008 01:09:42 +0000 (20:09 -0500)
committerDoug Coleman <doug.coleman@gmail.com>
Tue, 23 Sep 2008 01:09:42 +0000 (20:09 -0500)
basis/regexp/nfa/nfa.factor
basis/regexp/regexp-tests.factor
basis/regexp/regexp.factor
basis/regexp/traversal/traversal.factor

index aebfea04e1ee534b68110e941cee5ad572289fea..72d0fe970bdcb3abe5e4def0311de81319b847b1 100644 (file)
@@ -121,7 +121,12 @@ M: character-class-range nfa-node ( node -- )
     class-transition add-simple-entry ;
 
 M: capture-group nfa-node ( node -- )
-    term>> nfa-node ;
+    eps literal-transition add-simple-entry
+    capture-group-on add-traversal-flag
+    term>> nfa-node
+    eps literal-transition add-simple-entry
+    capture-group-off add-traversal-flag
+    2 [ concatenate-nodes ] times ;
 
 ! xyzzy
 M: non-capture-group nfa-node ( node -- )
index 934b635e507a200aa105a2e44602c084178e81f2..46696c8c0ff943edfb7113235769d2e676a58d0c 100644 (file)
@@ -318,7 +318,17 @@ IN: regexp-tests
 
 [ { 0 1 } ] [ "ab" "a(?=b)(?=b)" <regexp> first-match ] unit-test
 [ { 1 2 } ] [ "ba" "a(?<=b)(?<=b)" <regexp> first-match ] unit-test
+[ { 1 2 } ] [ "cab" "a(?=b)(?<=c)" <regexp> first-match ] unit-test
 
+! capture group 1: "aaaa"  2: ""
+! "aaaa" "(a*)(a*)" <regexp> match*
+! "aaaa" "(a*)(a+)" <regexp> match*
 
-[ { 1 2 } ] [ "cab" "a(?=b)(?<=c)" <regexp> first-match ] unit-test
+[ { 0 2 } ] [ "ab" "(a|ab)(bc)?" <regexp> first-match ] unit-test
+[ { 0 3 } ] [ "abc" "(a|ab)(bc)?" <regexp> first-match ] unit-test
+
+[ { 0 2 } ] [ "ab" "(ab|a)(bc)?" <regexp> first-match ] unit-test
+[ { 0 3 } ] [ "abc" "(ab|a)(bc)?" <regexp> first-match ] unit-test
+
+[ { 23 24 } ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> first-match ] unit-test
 
index debf94ef33793bf9d92366b2b90cc9233b99752a..73555fe9537be534fe2e25efc73c6df776dda738 100644 (file)
@@ -28,6 +28,9 @@ IN: regexp
 : match ( string regexp -- pair )
     <dfa-traverser> do-match return-match ;
 
+: match* ( string regexp -- pair )
+    <dfa-traverser> do-match [ return-match ] [ captured-groups>> ] bi ;
+
 : matches? ( string regexp -- ? )
     dupd match
     [ [ length ] [ length>> 1- ] bi* = ] [ drop f ] if* ;
index d82e9941a28fdf2224a378496b1b0232f8fb0fa6..f5a235fa7f3a1696c41308f1cc78c6895a15ecdb 100644 (file)
@@ -1,7 +1,7 @@
 ! Copyright (C) 2008 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors assocs combinators kernel math math.ranges
-quotations sequences regexp.parser regexp.classes fry
+quotations sequences regexp.parser regexp.classes fry arrays
 combinators.short-circuit regexp.utils prettyprint regexp.nfa ;
 IN: regexp.traversal
 
@@ -9,10 +9,11 @@ TUPLE: dfa-traverser
     dfa-table
     traversal-flags
     traverse-forward
-    capture-groups
-    { capture-group-index integer }
     lookahead-counters
     lookbehind-counters
+    capture-counters
+    captured-groups
+    capture-group-index
     last-state current-state
     text
     start-index current-index
@@ -28,10 +29,12 @@ TUPLE: dfa-traverser
         t >>traverse-forward
         0 >>start-index
         0 >>current-index
+        0 >>capture-group-index
         V{ } clone >>matches
-        V{ } clone >>capture-groups
+        V{ } clone >>capture-counters
         V{ } clone >>lookbehind-counters
-        V{ } clone >>lookahead-counters ;
+        V{ } clone >>lookahead-counters
+        H{ } clone >>captured-groups ;
 
 : final-state? ( dfa-traverser -- ? )
     [ current-state>> ] [ dfa-table>> final-states>> ] bi
@@ -75,9 +78,28 @@ M: lookbehind-off flag-action ( dfa-traverser flag -- )
     dup lookbehind-counters>>
     [ drop ] [ pop '[ _ + 2 + ] change-current-index drop ] if-empty ;
 
+M: capture-group-on flag-action ( dfa-traverser flag -- )
+    drop
+    [ current-index>> 0 2array ]
+    [ capture-counters>> ] bi push ;
+
+M: capture-group-off flag-action ( dfa-traverser flag -- )
+    drop
+    dup capture-counters>> empty? [
+        drop
+    ] [
+        {
+            [ capture-counters>> pop first2 dupd + ]
+            [ text>> <slice> ]
+            [ [ 1+ ] change-capture-group-index capture-group-index>> ]
+            [ captured-groups>> set-at ]
+        } cleave
+    ] if ;
+
 : process-flags ( dfa-traverser -- )
     [ [ 1+ ] map ] change-lookahead-counters
     [ [ 1+ ] map ] change-lookbehind-counters
+    [ [ first2 1+ 2array ] map ] change-capture-counters
     ! dup current-state>> .
     dup [ current-state>> ] [ traversal-flags>> ] bi
     at [ dup . flag-action ] with each ;