]> gitweb.factorcode.org Git - factor.git/commitdiff
io.encodings.utf8: assume streams are largely ascii.
authorJohn Benediktsson <mrjbq7@gmail.com>
Mon, 3 Oct 2011 19:31:46 +0000 (12:31 -0700)
committerJohn Benediktsson <mrjbq7@gmail.com>
Tue, 4 Oct 2011 04:49:21 +0000 (21:49 -0700)
Results in 30% faster file-contents for test file and 7% faster benchmark.xml.

core/io/encodings/encodings.factor
core/io/encodings/utf8/utf8.factor

index 1b34b6aa8b5edb4d3c6eab25f67ee1be62fe8908..81037ca2dfb791925e988e7f52d4cf7b56615fdc 100644 (file)
@@ -78,10 +78,10 @@ M: decoder stream-read1
     ] keep ; inline
 
 : finish-read ( n/f string -- string/f )
-    {
-        { [ over 0 = ] [ 2drop f ] }
-        { [ over not ] [ nip ] }
-        [ swap head ]
+    swap {
+        { [ dup zero? ] [ 2drop f ] }
+        { [ dup not   ] [ drop ] }
+        [ head ]
     } cond ; inline
 
 M: decoder stream-read
index 09e3dd5f4bb5da6a23803fd9067d6257f1cf0e64..8fbc71e016ae26010edd8d53d4716f75f93b0879 100644 (file)
@@ -39,13 +39,14 @@ SINGLETON: utf8
     HEX: 10FFFF maximum-code-point ; inline
 
 : begin-utf8 ( stream byte -- stream char )
-    {
-        { [ dup -7 shift zero? ] [ ] }
-        { [ dup -5 shift BIN: 110 = ] [ double ] }
-        { [ dup -4 shift BIN: 1110 = ] [ triple ] }
-        { [ dup -3 shift BIN: 11110 = ] [ quadruple ] }
-        [ drop replacement-char ]
-    } cond ; inline
+    dup 127 > [
+        {
+            { [ dup -5 shift BIN: 110 = ] [ double ] }
+            { [ dup -4 shift BIN: 1110 = ] [ triple ] }
+            { [ dup -3 shift BIN: 11110 = ] [ quadruple ] }
+            [ drop replacement-char ]
+        } cond
+    ] when ; inline
 
 : decode-utf8 ( stream -- char/f )
     dup stream-read1 dup [ begin-utf8 ] when nip ; inline
@@ -59,24 +60,25 @@ M: utf8 decode-char
     BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ; inline
 
 : char>utf8 ( char stream -- )
-    swap {
-        { [ dup -7 shift zero? ] [ swap stream-write1 ] }
-        { [ dup -11 shift zero? ] [
-            2dup -6 shift BIN: 11000000 bitor swap stream-write1
-            encoded
-        ] }
-        { [ dup -16 shift zero? ] [
-            2dup -12 shift BIN: 11100000 bitor swap stream-write1
-            2dup -6 shift encoded
-            encoded
-        ] }
-        [
-            2dup -18 shift BIN: 11110000 bitor swap stream-write1
-            2dup -12 shift encoded
-            2dup -6 shift encoded
-            encoded
-        ]
-    } cond ; inline
+    over 127 <= [ stream-write1 ] [
+        swap {
+            { [ dup -11 shift zero? ] [
+                2dup -6 shift BIN: 11000000 bitor swap stream-write1
+                encoded
+            ] }
+            { [ dup -16 shift zero? ] [
+                2dup -12 shift BIN: 11100000 bitor swap stream-write1
+                2dup -6 shift encoded
+                encoded
+            ] }
+            [
+                2dup -18 shift BIN: 11110000 bitor swap stream-write1
+                2dup -12 shift encoded
+                2dup -6 shift encoded
+                encoded
+            ]
+        } cond
+    ] if ; inline
 
 M: utf8 encode-char
     drop char>utf8 ;