]> gitweb.factorcode.org Git - factor.git/blobdiff - extra/io/encodings/detect/detect.factor
factor: trim using lists
[factor.git] / extra / io / encodings / detect / detect.factor
index c8b01757f8ddb056f8acc1bd69a81c8707d24eba..a111c2109fa6293be31206beb86e712da92ebe9e 100644 (file)
@@ -1,50 +1,50 @@
-! (c)2010 Joe Groff bsd license\r
-USING: accessors byte-arrays byte-arrays.hex combinators\r
-continuations fry io io.encodings io.encodings.8-bit.latin1\r
-io.encodings.ascii io.encodings.binary io.encodings.iana\r
-io.encodings.string io.encodings.utf16 io.encodings.utf32\r
-io.encodings.utf8 io.files io.streams.string kernel literals\r
-math namespaces sequences strings ;\r
-IN: io.encodings.detect\r
-\r
-SYMBOL: default-8bit-encoding\r
-default-8bit-encoding [ latin1 ] initialize\r
-\r
-<PRIVATE\r
-\r
-: prolog-tag ( bytes -- string )\r
-    CHAR: > over index [ 1 + ] [ dup length ] if* head-slice >string ;\r
-\r
-: prolog-encoding ( string -- iana-encoding )\r
-    '[\r
-        _ "encoding=" over start\r
-        10 + swap [ [ 1 - ] dip nth ] [ index-from ] [ swapd subseq ] 2tri\r
-    ] [ drop "UTF-8" ] recover ;\r
-\r
-: detect-xml-prolog ( bytes -- encoding )\r
-    prolog-tag prolog-encoding name>encoding [ ascii ] unless* ;\r
-\r
-: valid-utf8? ( bytes -- ? )\r
-    utf8 decode 1 head-slice* replacement-char swap member? not ;\r
-\r
-PRIVATE>\r
-\r
-: detect-byte-array ( bytes -- encoding )\r
-    {\r
-        { [ dup HEX{ 0000FEFF } head? ] [ drop utf32be ] }\r
-        { [ dup HEX{ FFFE0000 } head? ] [ drop utf32le ] }\r
-        { [ dup HEX{ FEFF } head? ] [ drop utf16be ] }\r
-        { [ dup HEX{ FFFE } head? ] [ drop utf16le ] }\r
-        { [ dup HEX{ EF BB BF } head? ] [ drop utf8 ] }\r
-        { [ dup $[ "<?xml" >byte-array ] head? ] [ detect-xml-prolog ] }\r
-        { [ 0 over member? ] [ drop binary ] }\r
-        { [ dup empty? ] [ drop utf8 ] }\r
-        { [ dup valid-utf8? ] [ drop utf8 ] }\r
-        [ drop default-8bit-encoding get ]\r
-    } cond ;\r
-\r
-: detect-stream ( stream -- sample encoding )\r
-    256 swap stream-read dup detect-byte-array ;\r
-\r
-: detect-file ( file -- encoding )\r
-    binary [ input-stream get detect-stream nip ] with-file-reader ;\r
+! Copyright (C) 2010 Joe Groff.
+! See http://factorcode.org/license.txt for BSD license.
+USING: byte-arrays combinators continuations io io.encodings
+io.encodings.ascii io.encodings.binary io.encodings.iana
+io.encodings.latin1 io.encodings.string io.encodings.utf16
+io.encodings.utf32 io.encodings.utf8 io.files kernel literals
+math namespaces sequences strings ;
+IN: io.encodings.detect
+
+SYMBOL: default-encoding
+default-encoding [ latin1 ] initialize
+
+<PRIVATE
+
+: prolog-tag ( bytes -- string )
+    CHAR: > over index [ 1 + head-slice ] when* >string ;
+
+: prolog-encoding ( string -- iana-encoding )
+    '[
+        _ "encoding=" over subseq-start
+        10 + swap [ [ 1 - ] dip nth ] [ index-from ] [ swapd subseq ] 2tri
+    ] [ drop "UTF-8" ] recover ;
+
+: detect-xml-prolog ( bytes -- encoding )
+    prolog-tag prolog-encoding name>encoding [ ascii ] unless* ;
+
+: valid-utf8? ( bytes -- ? )
+    utf8 decode but-last-slice replacement-char swap member? not ;
+
+PRIVATE>
+
+: detect-byte-array ( bytes -- encoding )
+    {
+        { [ dup B{ 0x00 0x00 0xFE 0xFF } head? ] [ drop utf32be ] }
+        { [ dup B{ 0xFF 0xFE 0x00 0x00 } head? ] [ drop utf32le ] }
+        { [ dup B{ 0xFE 0xFF } head? ] [ drop utf16be ] }
+        { [ dup B{ 0xFF 0xFE } head? ] [ drop utf16le ] }
+        { [ dup B{ 0xEF 0xBB 0xBF } head? ] [ drop utf8 ] }
+        { [ dup $[ "<?xml" >byte-array ] head? ] [ detect-xml-prolog ] }
+        { [ 0 over member? ] [ drop binary ] }
+        { [ dup empty? ] [ drop utf8 ] }
+        { [ dup valid-utf8? ] [ drop utf8 ] }
+        [ drop default-encoding get ]
+    } cond ;
+
+: detect-stream ( stream -- sample encoding )
+    256 swap stream-read dup detect-byte-array ;
+
+: detect-file ( file -- encoding )
+    binary [ input-stream get detect-stream nip ] with-file-reader ;