1 ! Copyright (C) 2009 Daniel Ehrenberg
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: sequences kernel io io.files combinators.short-circuit
4 math.order values assocs io.encodings io.binary fry strings math
5 io.encodings.ascii arrays byte-arrays accessors splitting
6 math.parser biassocs io.encodings.iana ;
7 IN: io.encodings.japanese
11 shift-jis "Shift_JIS" register-encoding
13 SINGLETON: windows-31j
15 windows-31j "Windows-31J" register-encoding
19 VALUE: shift-jis-table
21 M: shift-jis <encoder> drop shift-jis-table <encoder> ;
22 M: shift-jis <decoder> drop shift-jis-table <decoder> ;
24 VALUE: windows-31j-table
26 M: windows-31j <encoder> drop windows-31j-table <encoder> ;
27 M: windows-31j <decoder> drop windows-31j-table <decoder> ;
31 : <jis> ( assoc -- jis )
35 : ch>jis ( ch tuple -- jis ) assoc>> value-at [ encode-error ] unless* ;
36 : jis>ch ( jis tuple -- string ) assoc>> at replacement-char or ;
38 : process-jis ( lines -- assoc )
39 [ "#" split1 drop ] map harvest [
41 [ 2 short tail hex> ] map
44 : make-jis ( filename -- jis )
45 ascii file-lines process-jis <jis> ;
47 "vocab:io/encodings/japanese/CP932.txt"
48 make-jis to: windows-31j-table
50 "vocab:io/encodings/japanese/sjis-0208-1997-std.txt"
51 make-jis to: shift-jis-table
53 : small? ( char -- ? )
54 ! ASCII range or single-byte halfwidth katakana
55 { [ 0 HEX: 7F between? ] [ HEX: A1 HEX: DF between? ] } 1|| ;
57 : write-halfword ( stream halfword -- )
58 h>b/b swap 2byte-array swap stream-write ;
63 [ swap stream-write1 ]
64 [ write-halfword ] if ;
67 swap dup stream-read1 [
68 dup small? [ nip swap jis>ch ] [
70 [ 2array be> swap jis>ch ]
71 [ 2drop replacement-char ] if*