1 ! Copyright (C) 2009 Daniel Ehrenberg
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: arrays ascii assocs biassocs interval-maps
4 io.encodings.utf8 io.files kernel math.parser sequences sets
8 : drop-comments ( seq -- newseq )
9 [ dup [ "#@" member? ] find drop [ head ] when* ] map harvest ;
11 : split-column ( line -- columns )
12 " \t" split harvest 2 short head 2 f pad-tail ;
14 : parse-hex ( s -- n )
16 "0x" ?head [ "U+" ?head [ "Missing 0x or U+" throw ] unless ] unless
20 : parse-line ( line -- code-unicode )
21 split-column [ parse-hex ] map! ;
23 : process-codetable-lines ( lines -- assoc )
24 drop-comments [ parse-line ] map! ;
26 : flat-file>biassoc ( filename -- biassoc )
27 utf8 file-lines process-codetable-lines >biassoc ;
29 : split-; ( line -- array )
30 ";" split [ [ blank? ] trim ] map! ;
32 : data ( filename -- data )
33 utf8 file-lines drop-comments [ split-; ] map! ;
35 : expand-range ( range -- range' )
36 ".." split1 [ hex> ] bi@ [ 2array ] when* ;
38 : expand-ranges ( ranges -- table )
39 [ [ expand-range ] dip ] assoc-map <interval-map> ;
41 : intern ( value values -- value' )
44 : intern-values ( assoc -- assoc' )
45 dup values members [ intern ] curry assoc-map ;
47 : load-interval-file ( filename -- table )
48 data intern-values expand-ranges ;