1 ! Copyright (C) 2008 Daniel Ehrenberg
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: math.parser arrays io.encodings sequences kernel assocs
4 hashtables io.encodings.ascii generic parser classes.tuple words
5 words.symbol io io.files splitting namespaces math
6 compiler.units accessors classes.singleton classes.mixin
7 io.encodings.iana fry ;
13 ! encoding-name iana-name file-name
14 { "latin1" "ISO_8859-1:1987" "8859-1" }
15 { "latin2" "ISO_8859-2:1987" "8859-2" }
16 { "latin3" "ISO_8859-3:1988" "8859-3" }
17 { "latin4" "ISO_8859-4:1988" "8859-4" }
18 { "latin/cyrillic" "ISO_8859-5:1988" "8859-5" }
19 { "latin/arabic" "ISO_8859-6:1987" "8859-6" }
20 { "latin/greek" "ISO_8859-7:1987" "8859-7" }
21 { "latin/hebrew" "ISO_8859-8:1988" "8859-8" }
22 { "latin5" "ISO_8859-9:1989" "8859-9" }
23 { "latin6" "ISO-8859-10" "8859-10" }
24 { "latin/thai" "TIS-620" "8859-11" }
25 { "latin7" "ISO-8859-13" "8859-13" }
26 { "latin8" "ISO-8859-14" "8859-14" }
27 { "latin9" "ISO-8859-15" "8859-15" }
28 { "latin10" "ISO-8859-16" "8859-16" }
29 { "koi8-r" "KOI8-R" "KOI8-R" }
30 { "windows-1252" "windows-1252" "CP1252" }
31 { "ebcdic" "IBM037" "CP037" }
32 { "mac-roman" "macintosh" "ROMAN" }
35 : encoding-file ( file-name -- stream )
36 "vocab:io/encodings/8-bit/" ".TXT" surround ;
38 : process-contents ( lines -- assoc )
39 [ "#" split1 drop ] map harvest
40 [ "\t" split 2 head [ 2 short tail hex> ] map ] map ;
42 : byte>ch ( assoc -- array )
43 256 replacement-char <array>
44 [ '[ swap _ set-nth ] assoc-each ] keep ;
46 : ch>byte ( assoc -- newassoc )
47 [ swap ] assoc-map >hashtable ;
49 : parse-file ( path -- byte>ch ch>byte )
50 ascii file-lines process-contents
51 [ byte>ch ] [ ch>byte ] bi ;
53 SYMBOL: 8-bit-encodings
55 TUPLE: 8-bit decode encode ;
57 : encode-8-bit ( char stream assoc -- )
59 [ swap stream-write1 ] [ nip encode-error ] if ; inline
61 M: 8-bit encode-char encode>> encode-8-bit ;
63 : decode-8-bit ( stream array -- char/f )
65 [ swap nth [ replacement-char ] unless* ] [ 2drop f ] if ; inline
67 M: 8-bit decode-char decode>> decode-8-bit ;
71 M: 8-bit-encoding <encoder>
72 8-bit-encodings get-global at <encoder> ;
74 M: 8-bit-encoding <decoder>
75 8-bit-encodings get-global at <decoder> ;
77 : create-encoding ( name -- word )
78 "io.encodings.8-bit" create
79 [ define-singleton-class ]
80 [ 8-bit-encoding add-mixin-instance ]
89 [ dupd register-encoding ]
90 [ encoding-file parse-file 8-bit boa ]
93 8-bit-encodings set-global
94 ] with-compilation-unit