Fixing help-lint for regexp; adding first-match and re-contains?

author Daniel Ehrenberg <littledan@Macintosh-122.local>

Wed, 11 Mar 2009 00:34:49 +0000 (19:34 -0500)

committer Daniel Ehrenberg <littledan@Macintosh-122.local>

Wed, 11 Mar 2009 00:34:49 +0000 (19:34 -0500)
author Daniel Ehrenberg <littledan@Macintosh-122.local>
Wed, 11 Mar 2009 00:34:49 +0000 (19:34 -0500)
committer Daniel Ehrenberg <littledan@Macintosh-122.local>
Wed, 11 Mar 2009 00:34:49 +0000 (19:34 -0500)
diff --git a/basis/regexp/regexp-docs.factor b/basis/regexp/regexp-docs.factor

index ce4a54df87764f101ea5f426c26bf19286363ab9..1d28e5e92fa03cf3c50d5b473950d7b2e0adc454 100644 (file)
--- a/basis/regexp/regexp-docs.factor
+++ b/basis/regexp/regexp-docs.factor
@@ -39,13 +39,14 @@ ARTICLE: { "regexp" "theory" } "The theory of regular expressions"
  "The Factor regular expression engine was built with the design decision to support negation and intersection at the expense of backreferences. This lets us have a guaranteed linear-time matching algorithm. Systems like Ragel and Lex also use this algorithm, but in the Factor regular expression engine, all other features of regexps are still present." ;
  
  ARTICLE: { "regexp" "operations" } "Matching operations with regular expressions"
-{ $subsection all-matches }
  { $subsection matches? }
+{ $subsection re-contains? }
+{ $subsection first-match }
+{ $subsection all-matches }
  { $subsection re-split1 }
  { $subsection re-split }
  { $subsection re-replace }
-{ $subsection count-matches }
-{ $subsection re-replace } ;
+{ $subsection count-matches } ;
  
  HELP: <regexp>
  { $values { "string" string } { "regexp" regexp } }
@@ -63,25 +64,33 @@ HELP: regexp
  { $class-description "The class of regular expressions. To construct these, see " { $link { "regexp" "construction" } } "." } ;
  
  HELP: matches?
-{ $values { "string" string } { "matcher" regexp } { "?" "a boolean" } }
+{ $values { "string" string } { "regexp" regexp } { "?" "a boolean" } }
  { $description "Tests if the string as a whole matches the given regular expression." } ;
  
  HELP: re-split1
-{ $values { "string" string } { "matcher" regexp } { "before" string } { "after/f" string } }
+{ $values { "string" string } { "regexp" regexp } { "before" string } { "after/f" string } }
  { $description "Searches the string for a substring which matches the pattern. If found, the input string is split on the leftmost and longest occurence of the match, and the two halves are given as output. If no match is found, then the input string and " { $link f } " are output." } ;
  
  HELP: all-matches
-{ $values { "string" string } { "matcher" regexp } { "seq" "a sequence of slices of the input" } }
+{ $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } }
  { $description "Finds a sequence of disjoint substrings which each match the pattern. It chooses this by finding the leftmost longest match, and then the leftmost longest match which starts after the end of the previous match, and so on." } ;
  
  HELP: count-matches
-{ $values { "string" string } { "matcher" regexp } { "n" integer } }
+{ $values { "string" string } { "regexp" regexp } { "n" integer } }
  { $description "Counts how many disjoint matches the regexp has in the string, as made unambiguous by " { $link all-matches } "." } ;
  
  HELP: re-split
-{ $values { "string" string } { "matcher" regexp } { "seq" "a sequence of slices of the input" } }
+{ $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } }
  { $description "Splits the input string into chunks separated by the regular expression. Each chunk contains no match of the regexp. The chunks are chosen by the strategy of " { $link all-matches } "." } ;
  
  HELP: re-replace
-{ $values { "string" string } { "matcher" regexp } { "replacement" string } { "result" string } }
+{ $values { "string" string } { "regexp" regexp } { "replacement" string } { "result" string } }
  { $description "Replaces substrings which match the input regexp with the given replacement text. The boundaries of the substring are chosen by the strategy used by " { $link all-matches } "." } ;
+
+HELP: first-match
+{ $values { "string" string } { "regexp" regexp } { "slice/f" "the match, if one exists" } }
+{ $description "Finds the first match of the regular expression in the string, and returns it as a slice. If there is no match, then " { $link f } " is returned." } ;
+
+HELP: re-contains?
+{ $values { "string" string } { "regexp" regexp } { "?" "a boolean" } }
+{ $description "Determines whether the string has a substring which matches the regular expression given." } ;
diff --git a/basis/regexp/regexp-tests.factor b/basis/regexp/regexp-tests.factor

index f7d3dae3f3ffaccdd5b04ea33e74d924479d7296..f05416ab9468a400abf8a2be820f7d4a4145c98a 100644 (file)
--- a/basis/regexp/regexp-tests.factor
+++ b/basis/regexp/regexp-tests.factor
@@ -211,8 +211,8 @@ IN: regexp-tests
  [ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
  [ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
  
-[ "aaa" ] [ "aaacb" "a*" <regexp> match-head >string ] unit-test
-[ "aa" ] [ "aaacb" "aa?" <regexp> match-head >string ] unit-test
+[ "aaa" ] [ "aaacb" "a*" <regexp> first-match >string ] unit-test
+[ "aa" ] [ "aaacb" "aa?" <regexp> first-match >string ] unit-test
  
  [ t ] [ "aaa" R/ AAA/i matches? ] unit-test
  [ f ] [ "aax" R/ AAA/i matches? ] unit-test
@@ -268,13 +268,13 @@ IN: regexp-tests
  
  [ ] [ "USING: regexp kernel ; R' \\*[^\s*][^*]*\\*' drop" eval ] unit-test
  
-[ "ab" ] [ "ab" "(a|ab)(bc)?" <regexp> match-head >string ] unit-test
-[ "abc" ] [ "abc" "(a|ab)(bc)?" <regexp> match-head >string ] unit-test
+[ "ab" ] [ "ab" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
+[ "abc" ] [ "abc" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
  
-[ "ab" ] [ "ab" "(ab|a)(bc)?" <regexp> match-head >string ] unit-test
-[ "abc" ] [ "abc" "(ab|a)(bc)?" <regexp> match-head >string ] unit-test
+[ "ab" ] [ "ab" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
+[ "abc" ] [ "abc" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
  
-[ "b" ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> match-head >string ] unit-test
+[ "b" ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> first-match >string ] unit-test
  
  [ { "1" "2" "3" "4" } ]
  [ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
@@ -300,18 +300,18 @@ IN: regexp-tests
    
  [ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test
  
-[ "" ] [ "ab" "a(?!b)" <regexp> match-head >string ] unit-test
-[ "a" ] [ "ac" "a(?!b)" <regexp> match-head >string ] unit-test
+[ "" ] [ "ab" "a(?!b)" <regexp> first-match >string ] unit-test
+[ "a" ] [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
  [ t ] [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
  [ t ] [ "foobar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
  [ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
  [ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
-[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> match-head >string ] unit-test
-[ "a" ] [ "ba" "(?<=b)(?<=b)a" <regexp> match-head >string ] unit-test
-[ "a" ] [ "cab" "(?<=c)a(?=b)" <regexp> match-head >string ] unit-test
+[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> first-match >string ] unit-test
+[ "a" ] [ "ba" "(?<=b)(?<=b)a" <regexp> first-match >string ] unit-test
+[ "a" ] [ "cab" "(?<=c)a(?=b)" <regexp> first-match >string ] unit-test
  
-[ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head length ] unit-test
-[ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
+[ 3 ] [ "foobar" "foo(?=bar)" <regexp> first-match length ] unit-test
+[ f ] [ "foobxr" "foo(?=bar)" <regexp> first-match ] unit-test
  
  ! Bug in parsing word
  [ t ] [ "a" R' a' matches? ] unit-test
@@ -424,8 +424,12 @@ IN: regexp-tests
  [ 1 ] [ "a\r" R/ a$/m count-matches ] unit-test
  [ 1 ] [ "a\r\n" R/ a$/m count-matches ] unit-test
  
-[ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test
-[ 3 ] [ "foo" "foo\\z" <regexp> match-head length ] unit-test
+[ f ] [ "foobxr" "foo\\z" <regexp> first-match ] unit-test
+[ 3 ] [ "foo" "foo\\z" <regexp> first-match length ] unit-test
+
+[ t ] [ "a foo b" R/ foo/ re-contains? ] unit-test
+[ f ] [ "a bar b" R/ foo/ re-contains? ] unit-test
+[ t ] [ "foo" R/ foo/ re-contains? ] unit-test
  
  ! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test
  ! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test
diff --git a/basis/regexp/regexp.factor b/basis/regexp/regexp.factor

index 94bbc2af58cdc44bc824431b370e65392322bc39..90218e05bdaa4d30fb5982ba9a437cfb0ce0b882 100644 (file)
--- a/basis/regexp/regexp.factor
+++ b/basis/regexp/regexp.factor
@@ -89,16 +89,17 @@ PRIVATE>
      slices [ from>> ] map string length suffix
      [ string <slice> ] 2map ;
  
-: match-head ( str regexp -- slice/f )
-    [
-        [ 0 ] [ check-string ] [ dup dfa>> '[ _ _ execute ] ] tri*
-        match-from
-    ] call( str regexp -- slice/f ) ;
-
  PRIVATE>
  
+: first-match ( string regexp -- slice/f )
+    [ 0 ] [ check-string ] [ ] tri*
+    do-next-match nip ;
+
+: re-contains? ( string regexp -- ? )
+    first-match >boolean ;
+
  : re-split1 ( string regexp -- before after/f )
-    dupd match-head [ 1array split-slices first2 ] [ f ] if* ;
+    dupd first-match [ 1array split-slices first2 ] [ f ] if* ;
  
  : re-split ( string regexp -- seq )
      dupd all-matches split-slices ;
author	Daniel Ehrenberg <littledan@Macintosh-122.local>
	Wed, 11 Mar 2009 00:34:49 +0000 (19:34 -0500)
committer	Daniel Ehrenberg <littledan@Macintosh-122.local>
	Wed, 11 Mar 2009 00:34:49 +0000 (19:34 -0500)
basis/regexp/regexp-docs.factor		patch \| blob \| history
basis/regexp/regexp-tests.factor		patch \| blob \| history
basis/regexp/regexp.factor		patch \| blob \| history