! Copyright (C) 2008 Daniel Ehrenberg, Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors assocs biassocs classes.mixin classes.singleton
-fry io io.encodings io.encodings.iana kernel lexer namespaces
-parser sequences simple-flat-file ;
+USING: accessors arrays assocs classes.singleton generic
+hashtables io io.encodings io.encodings.iana kernel lexer parser
+sequences simple-flat-file words ;
IN: io.encodings.8-bit
<<
: encoding-file ( file-name -- stream )
"vocab:io/encodings/8-bit/" ".TXT" surround ;
-SYMBOL: 8-bit-encodings
-8-bit-encodings [ H{ } clone ] initialize
+TUPLE: 8-bit { from array read-only } { to hashtable read-only } ;
-TUPLE: 8-bit { biassoc biassoc read-only } ;
+: <8-bit> ( biassoc -- 8-bit )
+ [ from>> 256 <iota> [ of ] with map ] [ to>> ] bi 8-bit boa ;
: 8-bit-encode ( char 8-bit -- byte )
- biassoc>> value-at [ encode-error ] unless* ; inline
+ to>> at [ encode-error ] unless* ; inline
M: 8-bit encode-char
swap [ 8-bit-encode ] dip stream-write1 ;
swap [ '[ _ 8-bit-encode ] B{ } map-as ] dip stream-write ;
M: 8-bit decode-char
- swap stream-read1 dup
- [ swap biassoc>> at [ replacement-char ] unless* ]
- [ 2drop f ]
- if ;
-
-MIXIN: 8-bit-encoding
-
-M: 8-bit-encoding <encoder>
- 8-bit-encodings get-global at <encoder> ;
-
-M: 8-bit-encoding <decoder>
- 8-bit-encodings get-global at <decoder> ;
+ swap stream-read1 [
+ swap from>> ?nth [ replacement-char ] unless*
+ ] [ drop f ] if* ;
: create-encoding ( name -- word )
- create-word-in
- [ define-singleton-class ]
- [ 8-bit-encoding add-mixin-instance ]
- [ ] tri ;
+ create-word-in dup define-singleton-class ;
: load-encoding ( name iana-name file-name -- )
[ create-encoding dup ]
[ register-encoding ]
- [ encoding-file load-codetable-file 8-bit boa ] tri*
- swap 8-bit-encodings get-global set-at ;
+ [ encoding-file load-codetable-file <8-bit> ] tri*
+ [ [ \ <encoder> create-method ] dip '[ drop _ <encoder> ] define ]
+ [ [ \ <decoder> create-method ] dip '[ drop _ <decoder> ] define ] 2bi ;
PRIVATE>
SYNTAX: 8-BIT: scan-token scan-token scan-token load-encoding ;
>>
+8-BIT: cp424 IBM424 CP424
8-BIT: cp437 IBM437 CP437
8-BIT: cp500 IBM500 CP500
8-BIT: cp775 IBM775 CP775
8-BIT: cp869 IBM869 CP869
8-BIT: cp1026 IBM1026 CP1026
8-BIT: ebcdic IBM037 CP037
+8-BIT: kz1048 KZ-1048 KZ1048
8-BIT: koi8-r KOI8-R KOI8-R
+8-BIT: koi8-u KOI8-U KOI8-U
8-BIT: latin/arabic ISO_8859-6:1987 8859-6
8-BIT: latin/cyrillic ISO_8859-5:1988 8859-5
8-BIT: latin/greek ISO_8859-7:1987 8859-7
8-BIT: latin/hebrew ISO_8859-8:1988 8859-8
8-BIT: latin/thai TIS-620 8859-11
-8-BIT: latin1 ISO_8859-1:1987 8859-1
+! 8-BIT: latin1 ISO_8859-1:1987 8859-1
8-BIT: latin2 ISO_8859-2:1987 8859-2
8-BIT: latin3 ISO_8859-3:1988 8859-3
8-BIT: latin4 ISO_8859-4:1988 8859-4