]> gitweb.factorcode.org Git - factor.git/blob - basis/io/encodings/8-bit/8-bit.factor
factor: trim using lists
[factor.git] / basis / io / encodings / 8-bit / 8-bit.factor
1 ! Copyright (C) 2008 Daniel Ehrenberg, Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays assocs classes.singleton generic
4 hashtables io io.encodings io.encodings.iana kernel lexer parser
5 sequences simple-flat-file words ;
6 IN: io.encodings.8-bit
7
8 <<
9 <PRIVATE
10
11 : encoding-file ( file-name -- stream )
12     "vocab:io/encodings/8-bit/" ".TXT" surround ;
13
14 TUPLE: 8-bit { from array read-only } { to hashtable read-only } ;
15
16 : <8-bit> ( biassoc -- 8-bit )
17     [ from>> 256 <iota> [ of ] with map ] [ to>> ] bi 8-bit boa ;
18
19 : 8-bit-encode ( char 8-bit -- byte )
20     to>> at [ encode-error ] unless* ; inline
21
22 M: 8-bit encode-char
23     swap [ 8-bit-encode ] dip stream-write1 ;
24
25 M: 8-bit encode-string
26     swap [ '[ _ 8-bit-encode ] B{ } map-as ] dip stream-write ;
27
28 M: 8-bit decode-char
29     swap stream-read1 [
30         swap from>> ?nth [ replacement-char ] unless*
31     ] [ drop f ] if* ;
32
33 : create-encoding ( name -- word )
34     create-word-in dup define-singleton-class ;
35
36 : load-encoding ( name iana-name file-name -- )
37     [ create-encoding dup ]
38     [ register-encoding ]
39     [ encoding-file load-codetable-file <8-bit> ] tri*
40     [ [ \ <encoder> create-method ] dip '[ drop _ <encoder> ] define ]
41     [ [ \ <decoder> create-method ] dip '[ drop _ <decoder> ] define ] 2bi ;
42
43 PRIVATE>
44
45 SYNTAX: 8-BIT: scan-token scan-token scan-token load-encoding ;
46 >>
47
48 8-BIT: cp424 IBM424 CP424
49 8-BIT: cp437 IBM437 CP437
50 8-BIT: cp500 IBM500 CP500
51 8-BIT: cp775 IBM775 CP775
52 8-BIT: cp850 IBM850 CP850
53 8-BIT: cp852 IBM852 CP852
54 8-BIT: cp855 IBM855 CP855
55 8-BIT: cp857 IBM857 CP857
56 8-BIT: cp860 IBM860 CP860
57 8-BIT: cp861 IBM861 CP861
58 8-BIT: cp862 IBM862 CP862
59 8-BIT: cp863 IBM863 CP863
60 8-BIT: cp864 IBM864 CP864
61 8-BIT: cp865 IBM865 CP865
62 8-BIT: cp866 IBM866 CP866
63 8-BIT: cp869 IBM869 CP869
64 8-BIT: cp1026 IBM1026 CP1026
65 8-BIT: ebcdic IBM037 CP037
66 8-BIT: kz1048 KZ-1048 KZ1048
67 8-BIT: koi8-r KOI8-R KOI8-R
68 8-BIT: koi8-u KOI8-U KOI8-U
69 8-BIT: latin/arabic ISO_8859-6:1987 8859-6
70 8-BIT: latin/cyrillic ISO_8859-5:1988 8859-5
71 8-BIT: latin/greek ISO_8859-7:1987 8859-7
72 8-BIT: latin/hebrew ISO_8859-8:1988 8859-8
73 8-BIT: latin/thai TIS-620 8859-11
74 ! 8-BIT: latin1 ISO_8859-1:1987 8859-1
75 8-BIT: latin2 ISO_8859-2:1987 8859-2
76 8-BIT: latin3 ISO_8859-3:1988 8859-3
77 8-BIT: latin4 ISO_8859-4:1988 8859-4
78 8-BIT: latin5 ISO_8859-9:1989 8859-9
79 8-BIT: latin6 ISO-8859-10 8859-10
80 8-BIT: latin7 ISO-8859-13 8859-13
81 8-BIT: latin8 ISO-8859-14 8859-14
82 8-BIT: latin9 ISO-8859-15 8859-15
83 8-BIT: latin10 ISO-8859-16 8859-16
84 8-BIT: mac-roman macintosh ROMAN
85 ! 8-BIT: mac-cyrillic mac-cyrillic CYRILLIC
86 ! 8-BIT: mac-greek mac-greek GREEK
87 ! 8-BIT: mac-icelandic mac-icelandic ICELAND
88 ! 8-BIT: mac-latin2 mac-latin2 LATIN2
89 ! 8-BIT: mac-turkish mac-turkish TURKISH
90 8-BIT: windows-1250 windows-1250 CP1250
91 8-BIT: windows-1251 windows-1251 CP1251
92 8-BIT: windows-1252 windows-1252 CP1252
93 8-BIT: windows-1253 windows-1253 CP1253
94 8-BIT: windows-1254 windows-1254 CP1254
95 8-BIT: windows-1255 windows-1255 CP1255
96 8-BIT: windows-1256 windows-1256 CP1256
97 8-BIT: windows-1257 windows-1257 CP1257
98 8-BIT: windows-1258 windows-1258 CP1258