1 ! Copyright (C) 2008 Daniel Ehrenberg
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: math.parser arrays io.encodings sequences kernel assocs
4 hashtables io.encodings.ascii generic parser classes.tuple words
5 words.symbol io io.files splitting namespaces math
6 compiler.units accessors ;
16 { "latin/cyrillic" "8859-5" }
17 { "latin/arabic" "8859-6" }
18 { "latin/greek" "8859-7" }
19 { "latin/hebrew" "8859-8" }
21 { "latin6" "8859-10" }
22 { "latin/thai" "8859-11" }
23 { "latin7" "8859-13" }
24 { "latin8" "8859-14" }
25 { "latin9" "8859-15" }
26 { "latin10" "8859-16" }
28 { "windows-1252" "CP1252" }
30 { "mac-roman" "ROMAN" }
33 : encoding-file ( file-name -- stream )
34 "resource:basis/io/encodings/8-bit/" swap ".TXT"
35 3append ascii <file-reader> ;
37 : process-contents ( lines -- assoc )
38 [ "#" split1 drop ] map harvest
39 [ "\t" split 2 head [ 2 short tail hex> ] map ] map ;
41 : byte>ch ( assoc -- array )
42 256 replacement-char <array>
43 [ [ swapd set-nth ] curry assoc-each ] keep ;
45 : ch>byte ( assoc -- newassoc )
46 [ swap ] assoc-map >hashtable ;
48 : parse-file ( stream -- byte>ch ch>byte )
49 lines process-contents
50 [ byte>ch ] [ ch>byte ] bi ;
52 SYMBOL: 8-bit-encodings
54 TUPLE: 8-bit decode encode ;
56 : encode-8-bit ( char stream assoc -- )
58 [ swap stream-write1 ] [ nip encode-error ] if ; inline
60 M: 8-bit encode-char encode>> encode-8-bit ;
62 : decode-8-bit ( stream array -- char/f )
64 [ swap nth [ replacement-char ] unless* ] [ 2drop f ] if ; inline
66 M: 8-bit decode-char decode>> decode-8-bit ;
68 PREDICATE: 8-bit-encoding < word
69 8-bit-encodings get-global key? ;
71 M: 8-bit-encoding <encoder>
72 8-bit-encodings get-global at <encoder> ;
74 M: 8-bit-encoding <decoder>
75 8-bit-encodings get-global at <decoder> ;
81 [ "io.encodings.8-bit" create ]
82 [ encoding-file parse-file 8-bit boa ]
85 [ keys [ define-symbol ] each ]
86 [ 8-bit-encodings set-global ]
88 ] with-compilation-unit