]> gitweb.factorcode.org Git - factor.git/commitdiff
Regexp character class intersection and difference syntax
authorDaniel Ehrenberg <littledan@Macintosh-122.local>
Sat, 21 Mar 2009 07:53:36 +0000 (02:53 -0500)
committerDaniel Ehrenberg <littledan@Macintosh-122.local>
Sat, 21 Mar 2009 07:53:36 +0000 (02:53 -0500)
basis/regexp/classes/classes.factor
basis/regexp/parser/parser-tests.factor
basis/regexp/parser/parser.factor
basis/regexp/regexp-tests.factor

index e114dea26013f5be5123d060645bd141f6b2ecf4..a1c4e3ca2a53cc3e01725d62f307a8a45b3e823c 100644 (file)
@@ -230,7 +230,10 @@ TUPLE: class-partition integers not-integers simples not-simples and or other ;
     dup or-class flatten partition-classes
     dup not-integers>> length {
         { 0 [ nip make-or-class ] }
-        { 1 [ not-integers>> first [ class>> '[ _ swap class-member? ] any? ] keep or ] }
+        { 1 [
+            not-integers>> first
+            [ class>> '[ _ swap class-member? ] any? ] keep or
+        ] }
         [ 3drop t ]
     } case ;
 
@@ -251,6 +254,12 @@ M: or-class <not-class>
 M: t <not-class> drop f ;
 M: f <not-class> drop t ;
 
+: <minus-class> ( a b -- a-b )
+    <not-class> 2array <and-class> ;
+
+: <sym-diff-class> ( a b -- a~b )
+    2array [ <or-class> ] [ <and-class> ] bi <minus-class> ;
+
 M: primitive-class class-member?
     class>> class-member? ;
 
index d606015f617e19e5e3a181174e0425df838593c1..5ea9753fbaf66b9ec2a964a7a8db951f30a0cb9d 100644 (file)
@@ -11,7 +11,7 @@ IN: regexp.parser.tests
     "a|b" "a.b" "a|b|c" "abc|b" "a|bcd" "a|(b)" "(?-i:a)" "||"
     "(a)|b" "(a|b)" "((a)|(b))" "(?:a)" "(?i:a)" "|b" "b|"
     "[abc]" "[a-c]" "[^a-c]" "[^]]" "[]a]" "[[]" "[]-a]" "[a-]" "[-]"
-    "[--a]" "foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
+    "foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
     "(foo){2}" "{2,3}" "{," "{,}" "}" "foo}" "[^]-a]" "[^-]a]"
     "[a-]" "[^a-]" "[^a-]" "a{,2}" "(?#foobar)"
     "\\p{Space}" "\\t" "\\[" "[\\]]" "\\P{Space}"
index e8de469a9475cb7dcce7026b9f4768378a0e101a..9fcadc40084f78d7e3924d2b5264afcee144cab4 100644 (file)
@@ -148,19 +148,29 @@ Character = EscapeSequence
           | "^" => [[ ^ <tagged-epsilon> ]]
           | . ?[ allowed-char? ]?
 
-AnyRangeCharacter = EscapeSequence | .
+AnyRangeCharacter = !("&&"|"||"|"--"|"~~") (EscapeSequence | .)
 
 RangeCharacter = !("]") AnyRangeCharacter
 
-Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
+Range = RangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
       | RangeCharacter
 
-StartRange = AnyRangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
+StartRange = AnyRangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
            | AnyRangeCharacter
 
 Ranges = StartRange:s Range*:r => [[ r s prefix ]]
 
-CharClass = "^"?:n Ranges:e => [[ e n char-class ]]
+BasicCharClass =  "^"?:n Ranges:e => [[ e n char-class ]]
+
+CharClass = BasicCharClass:b "&&" CharClass:c
+                => [[ b c 2array <and-class> ]]
+          | BasicCharClass:b "||" CharClass:c
+                => [[ b c 2array <or-class> ]]
+          | BasicCharClass:b "~~" CharClass:c
+                => [[ b c <sym-diff-class> ]]
+          | BasicCharClass:b "--" CharClass:c
+                => [[ b c <minus-class> ]]
+          | BasicCharClass
 
 Options = [idmsux]*
 
index 999caeaed63c1a5935ada2db98625b35754f8362..22343868032108956f864c87579bf6f61736c5be 100644 (file)
@@ -508,3 +508,29 @@ IN: regexp-tests
 [ t ] [ " " R/ \P{LL}/ matches? ] unit-test
 [ f ] [ "a" R/ \P{sCriPt = latin}/ matches? ] unit-test
 [ t ] [ " " R/ \P{SCRIPT = laTIn}/ matches? ] unit-test
+
+! Logical operators
+[ t ] [ "a" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
+[ t ] [ "π" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
+[ t ] [ "A" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
+[ f ] [ "3" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
+
+[ t ] [ "a" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
+[ t ] [ "π" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
+[ t ] [ "A" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
+[ f ] [ "3" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
+
+[ t ] [ "a" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
+[ f ] [ "π" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
+[ f ] [ "A" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
+[ f ] [ "3" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
+
+[ f ] [ "a" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
+[ t ] [ "π" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
+[ t ] [ "A" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
+[ f ] [ "3" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
+
+[ f ] [ "a" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
+[ f ] [ "π" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
+[ t ] [ "A" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
+[ f ] [ "3" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test