]> gitweb.factorcode.org Git - factor.git/commitdiff
peg.javascript.tokenizer: support hex and unicode escapes.
authorJohn Benediktsson <mrjbq7@gmail.com>
Sun, 12 Jun 2016 13:41:59 +0000 (06:41 -0700)
committerJohn Benediktsson <mrjbq7@gmail.com>
Sun, 12 Jun 2016 13:41:59 +0000 (06:41 -0700)
extra/peg/javascript/tokenizer/tokenizer-tests.factor
extra/peg/javascript/tokenizer/tokenizer.factor

index 1e9df1cf52b519de68166ee7614c421f1f018d84..5d01d2d15466c3e7b76ea1ee7e5bb3585a21b790 100644 (file)
@@ -31,3 +31,7 @@ IN: peg.javascript.tokenizer.tests
 {
     V{ T{ ast-string { value "\b\f\n\r\t\v'\"\\" } } }
 } [ "\"\\b\\f\\n\\r\\t\\v\\'\\\"\\\\\"" tokenize-javascript ] unit-test
+
+{
+    V{ T{ ast-string { value "abc" } } }
+} [ "\"\\x61\\u0062\\u{63}\"" tokenize-javascript ] unit-test
index 8dcbc70e851a697e9b85bf5c4f6f6570d1f2c1eb..56afa42cad31b41f4a9024bb1d42e9cfb3ec0d37 100644 (file)
@@ -48,15 +48,20 @@ Name              = !(Keyword) iName  => [[ ast-name boa ]]
 Number            =   Digits:ws '.' Digits:fs => [[ ws "." fs 3array "" concat-as string>number ast-number boa ]]
                     | Digits => [[ >string string>number ast-number boa ]]
 
-EscapeChar        =   "\\b"  => [[ CHAR: \b ]]
-                    | "\\f"  => [[ CHAR: \f ]]
-                    | "\\n"  => [[ CHAR: \n ]]
-                    | "\\r"  => [[ CHAR: \r ]]
-                    | "\\t"  => [[ CHAR: \t ]]
-                    | "\\v"  => [[ CHAR: \v ]]
-                    | "\\'"  => [[ CHAR: '  ]]
-                    | "\\\"" => [[ CHAR: "  ]]
-                    | "\\\\" => [[ CHAR: \\ ]]
+SingleEscapeChar  =   "b"  => [[ CHAR: \b ]]
+                    | "f"  => [[ CHAR: \f ]]
+                    | "n"  => [[ CHAR: \n ]]
+                    | "r"  => [[ CHAR: \r ]]
+                    | "t"  => [[ CHAR: \t ]]
+                    | "v"  => [[ CHAR: \v ]]
+                    | "'"  => [[ CHAR: '  ]]
+                    | "\"" => [[ CHAR: "  ]]
+                    | "\\" => [[ CHAR: \\ ]]
+HexDigit          = [0-9a-fA-F]
+HexEscapeChar     = "x" (HexDigit HexDigit):d => [[ d hex> ]]
+UnicodeEscapeChar = "u" (HexDigit HexDigit HexDigit HexDigit):d => [[ d hex> ]]
+                    | "u{" HexDigit+:d "}" => [[ d hex> ]]
+EscapeChar         = "\\" (SingleEscapeChar | HexEscapeChar | UnicodeEscapeChar):c => [[ c ]]
 StringChars1       = (EscapeChar | !('"""') .)* => [[ >string ]]
 StringChars2       = (EscapeChar | !('"') .)* => [[ >string ]]
 StringChars3       = (EscapeChar | !("'") .)* => [[ >string ]]