]> gitweb.factorcode.org Git - factor.git/commitdiff
regexp: try again to fix the issue with backslashes.
authorJohn Benediktsson <mrjbq7@gmail.com>
Thu, 21 Mar 2019 20:29:15 +0000 (13:29 -0700)
committerJohn Benediktsson <mrjbq7@gmail.com>
Thu, 21 Mar 2019 20:29:15 +0000 (13:29 -0700)
the new simple rule is inside R/ syntax all backslashes should be
escaped to allow easy regexp literal tokenization:

R/ [\/]/

but in the constructor <regexp> they shouldn't be:

"[/]" <regexp>

When the regexp is prettyprinted we make sure to visually escape the
backslashes in the raw regexp.

basis/globs/globs.factor
basis/regexp/parser/parser.factor
basis/regexp/prettyprint/prettyprint.factor
basis/regexp/regexp-tests.factor
basis/regexp/regexp.factor
basis/validators/validators.factor
extra/metar/metar.factor
extra/xkcd/xkcd.factor

index 3a540e2b23aaf52510b789752864c116c6ae4fd7..e2bbb99aee166b23647785fda508e9fecd9fc872 100644 (file)
@@ -7,10 +7,10 @@ unicode multiline ;
 IN: globs
 
 : not-path-separator ( -- sep )
-    os windows? R/ [^\\/\\]/ R/ [^\\/]/ ? ; foldable
+    os windows? R/ [^\/\\]/ R/ [^\/]/ ? ; foldable
 
 : wild-path-separator ( -- sep )
-    os windows? R/ [^\\/\\][\\/\\]|[^\\/\\]/ R/ [^\\/][\\/]|[^\\/]/ ? ; foldable
+    os windows? R/ [^\/\\][\/\\]|[^\/\\]/ R/ [^\/][\/]|[^\/]/ ? ; foldable
 
 EBNF: <glob> [=[
 
index 931843542c65719860891b2ebcf9c760861dbf75..f28aa61c39c57d394ef4502a9f6141969bd80b83 100644 (file)
@@ -3,7 +3,8 @@
 USING: accessors arrays assocs combinators
 combinators.short-circuit interval-maps kernel locals
 math.parser memoize multiline peg.ebnf regexp.ast regexp.classes
-sequences sets splitting strings unicode unicode.data unicode.script ;
+sequences sets splitting strings unicode unicode.data
+unicode.script ;
 IN: regexp.parser
 
 : allowed-char? ( ch -- ? )
@@ -70,13 +71,14 @@ MEMO: simple-category-table ( -- table )
 
 : lookup-escape ( char -- ast )
     {
-        { CHAR: t [ CHAR: \t ] }
+        { CHAR: a [ CHAR: \a ] }
+        { CHAR: e [ CHAR: \e ] }
+        { CHAR: f [ CHAR: \f ] }
         { CHAR: n [ CHAR: \n ] }
         { CHAR: r [ CHAR: \r ] }
-        { CHAR: f [ 0xc ] }
-        { CHAR: a [ 0x7 ] }
-        { CHAR: e [ 0x1b ] }
-        { CHAR: \\ [ CHAR: \\ ] }
+        { CHAR: t [ CHAR: \t ] }
+        { CHAR: v [ CHAR: \v ] }
+        { CHAR: 0 [ CHAR: \0 ] }
 
         { CHAR: w [ c-identifier-class <primitive-class> ] }
         { CHAR: W [ c-identifier-class <primitive-class> <not-class> ] }
index 372ef69062f83f16d68c35d9f9b5b55e92872df5..ae3877c7d78f39797d823c2cea453fe12c708221 100644 (file)
@@ -7,7 +7,7 @@ IN: regexp.prettyprint
 M: regexp pprint*
     [
         [
-            [ raw>> "\\/" "\\\\/" replace "R/ " % % "/" % ]
+            [ raw>> "/" "\\/" replace "R/ " % % "/" % ]
             [ options>> options>string % ] bi
         ] "" make
     ] keep present-text ;
index 30ec30a6d98df7034d5bf1213df834490c9ad115..36e877a03f2354655362c0ee0197656c2153f15a 100644 (file)
@@ -49,6 +49,9 @@ IN: regexp.tests
 { t } [ "a" ".+" <regexp> matches? ] unit-test
 { t } [ "ab" ".+" <regexp> matches? ] unit-test
 
+{ t } [ "\0" "[\\0]" <regexp> matches? ] unit-test
+{ f } [ "0" "[\\0]" <regexp> matches? ] unit-test
+
 { t } [ " " "[\\s]" <regexp> matches? ] unit-test
 { f } [ "a" "[\\s]" <regexp> matches? ] unit-test
 { f } [ " " "[\\S]" <regexp> matches? ] unit-test
@@ -335,6 +338,10 @@ unit-test
 { "XhXXlXlXoX XwXoXrXlXdX" } [ "hello world" R/ e*/ "X" re-replace ] unit-test
 { "-- title --" } [ "== title ==" R/ =/ "-" re-replace ] unit-test
 
+{ "abc" } [ "a/   \\bc" "/.*\\" <regexp> "" re-replace ] unit-test
+{ "ac" } [ "a/   \\bc" R/ \/.*\\./ "" re-replace ] unit-test
+{ "abc" } [ "a/   \\bc" R/ \/.*\\/ "" re-replace ] unit-test
+
 { "" } [ "ab" "a(?!b)" <regexp> first-match >string ] unit-test
 { "a" } [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
 { t } [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
index 19d2d8710b9f884fd927b5e2300002b09401364b..c31571c718711f02bec2549ec94bd848a02afc49 100644 (file)
@@ -200,10 +200,11 @@ PRIVATE>
 : take-until ( lexer -- string )
     dup skip-blank [
         dupd [
-            [ CHAR: / -rot index-from ] keep
-            over [ "Unterminated regexp" throw ] unless
-            2dup [ 1 - ] dip nth CHAR: \\ =
-            [ [ [ 1 + ] dip ] when ] keep
+            [ [ "\\/" member? ] find-from ] keep swap [
+                CHAR: \ = [ [ 2 + ] dip t ] [ f ] if
+            ] [
+                "Unterminated regexp" throw
+            ] if*
         ] loop over [ subseq ] dip 1 +
     ] change-lexer-column ;
 
index c3b800ae57d800dc4343e786008a4206400879d0..cb3d2eca31d4d4c972c09addb08c77415bd640c2 100644 (file)
@@ -65,7 +65,7 @@ IN: validators
     v-regexp ;
 
 : v-url ( str -- str )
-    "URL" R/ (?:ftp|http|https):\\/\\/\S+/ v-regexp ;
+    "URL" R/ (?:ftp|http|https):\/\/\S+/ v-regexp ;
 
 : v-captcha ( str -- str )
     dup empty? [ "must remain blank" throw ] unless ;
index e4a98b8e9947675d44ce870e53cd0242a4badd68..f01f53874440a79ea504863dbe845c89ae2216d6 100644 (file)
@@ -283,11 +283,11 @@ CONSTANT: sky H{
 
 CONSTANT: re-timestamp R/ \d{6}Z/
 CONSTANT: re-station R/ \w{4}/
-CONSTANT: re-temperature R/ [M]?\d{2}\\/([M]?\d{2})?/
+CONSTANT: re-temperature R/ [M]?\d{2}\/([M]?\d{2})?/
 CONSTANT: re-wind R/ (VRB|\d{3})\d{2,3}(G\d{2,3})?KT/
 CONSTANT: re-wind-variable R/ \d{3}V\d{3}/
-CONSTANT: re-visibility R/ [MP]?\d+(\\/\d+)?SM/
-CONSTANT: re-rvr R/ R\d{2}[RLC]?\\/\d{4}(V\d{4})?FT/
+CONSTANT: re-visibility R/ [MP]?\d+(\/\d+)?SM/
+CONSTANT: re-rvr R/ R\d{2}[RLC]?\/\d{4}(V\d{4})?FT/
 CONSTANT: re-weather R/ [+-]?(VC)?(\w{2}|\w{4})/
 CONSTANT: re-sky-condition R/ (\w{2,3}\d{3}(\w+)?|\w{3}|CAVOK)/
 CONSTANT: re-altimeter R/ [AQ]\d{4}/
@@ -519,23 +519,23 @@ CONSTANT: re-recent-weather R/ ((\w{2})?[BE]\d{2,4}((\w{2})?[BE]\d{2,4})?)+/
         { [ dup R/ 1\d{4}/ matches? ] [ parse-6hr-max-temp ] }
         { [ dup R/ 2\d{4}/ matches? ] [ parse-6hr-min-temp ] }
         { [ dup R/ 4\d{8}/ matches? ] [ parse-24hr-temp ] }
-        { [ dup R/ 4\\/\d{3}/ matches? ] [ parse-snow-depth ] }
+        { [ dup R/ 4\/\d{3}/ matches? ] [ parse-snow-depth ] }
         { [ dup R/ 5\d{4}/ matches? ] [ parse-1hr-pressure ] }
-        { [ dup R/ 6[\d\\/]{4}/ matches? ] [ parse-6hr-precipitation ] }
+        { [ dup R/ 6[\d\/]{4}/ matches? ] [ parse-6hr-precipitation ] }
         { [ dup R/ 7\d{4}/ matches? ] [ parse-24hr-precipitation ] }
-        { [ dup R/ 8\\/\d{3}/ matches? ] [ parse-cloud-cover ] }
+        { [ dup R/ 8\/\d{3}/ matches? ] [ parse-cloud-cover ] }
         { [ dup R/ 931\d{3}/ matches? ] [ parse-6hr-snowfall ] }
         { [ dup R/ 933\d{3}/ matches? ] [ parse-water-equivalent-snow ] }
         { [ dup R/ 98\d{3}/ matches? ] [ parse-duration-of-sunshine ] }
         { [ dup R/ T\d{4,8}/ matches? ] [ parse-1hr-temp ] }
-        { [ dup R/ \d{3}\d{2,3}\\/\d{2,4}/ matches? ] [ parse-peak-wind ] }
+        { [ dup R/ \d{3}\d{2,3}\/\d{2,4}/ matches? ] [ parse-peak-wind ] }
         { [ dup R/ P\d{4}/ matches? ] [ parse-1hr-precipitation ] }
         { [ dup R/ SLP\d{3}/ matches? ] [ parse-sea-level-pressure ] }
         { [ dup R/ LTG\w+/ matches? ] [ parse-lightning ] }
         { [ dup R/ PROB\d+/ matches? ] [ parse-probability ] }
         { [ dup R/ \d{3}V\d{3}/ matches? ] [ parse-varying ] }
         { [ dup R/ [^-]+(-[^-]+)+/ matches? ] [ parse-from-to ] }
-        { [ dup R/ [^\\/]+(\\/[^\\/]+)+/ matches? ] [ ] }
+        { [ dup R/ [^\/]+(\/[^\/]+)+/ matches? ] [ ] }
         { [ dup R/ \d+.\d+/ matches? ] [ ] }
         { [ dup re-recent-weather matches? ] [ parse-recent-weather ] }
         { [ dup re-weather matches? ] [ parse-weather ] }
index f9a5f06b6937b28b893c811ef365fa289101dd94..8b9546f64aa8150f7045b6a35c25553df3a8f2e7 100644 (file)
@@ -13,7 +13,7 @@ IN: xkcd
 
 : comic-image ( url -- image )
     http-get nip
-    R/ \/\/imgs\.xkcd\.com\\/comics\\/[^\.]+\.(png|jpg)/
+    R/ \/\/imgs\.xkcd\.com\/comics\/[^\.]+\.(png|jpg)/
     first-match >string "http:" prepend load-http-image ;
 
 : comic-image. ( url -- )