! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
-USING: sequences splitting kernel math.parser io.files io.encodings.ascii biassocs ;
+USING: sequences splitting kernel math.parser io.files io.encodings.ascii
+biassocs ascii ;
IN: simple-flat-file
: drop-comments ( seq -- newseq )
- [ "#" split1 drop ] map harvest ;
+ [ "#@" split first ] map harvest ;
: split-column ( line -- columns )
" \t" split harvest 2 short head 2 f pad-tail ;
: flat-file>biassoc ( filename -- biassoc )
ascii file-lines process-codetable-lines >biassoc ;
+: split-; ( line -- array )
+ ";" split [ [ blank? ] trim ] map ;
+
+: data ( filename -- data )
+ ascii file-lines drop-comments [ split-; ] map ;
ascii io assocs strings math namespaces make sorting combinators\r
math.order arrays unicode.normalize unicode.data locals\r
unicode.syntax macros sequences.deep words unicode.breaks\r
-quotations combinators.short-circuit ;\r
+quotations combinators.short-circuit simple-flat-file ;\r
IN: unicode.collation\r
\r
<PRIVATE\r
[ >>primary ] [ >>secondary ] [ >>tertiary ] tri*\r
] map ;\r
\r
-: parse-line ( line -- code-poing weight )\r
- ";" split1 [ [ blank? ] trim ] bi@\r
- [ " " split [ hex> ] "" map-as ] [ parse-weight ] bi* ;\r
+: parse-keys ( string -- chars )\r
+ " " split [ hex> ] "" map-as ;\r
\r
: parse-ducet ( file -- ducet )\r
- ascii file-lines filter-comments\r
- [ parse-line ] H{ } map>assoc ;\r
+ data [ [ parse-keys ] [ parse-weight ] bi* ] H{ } assoc-map-as ;\r
\r
"vocab:unicode/collation/allkeys.txt" parse-ducet to: ducet\r
\r
math.parser hash2 math.order byte-arrays words namespaces words
compiler.units parser io.encodings.ascii values interval-maps
ascii sets combinators locals math.ranges sorting make
-strings.parser io.encodings.utf8 memoize ;
+strings.parser io.encodings.utf8 memoize simple-flat-file ;
IN: unicode.data
<PRIVATE
VALUE: class-map
VALUE: compatibility-map
VALUE: category-map
-VALUE: name-map
VALUE: special-casing
VALUE: properties
PRIVATE>
+VALUE: name-map
+
: canonical-entry ( char -- seq ) canonical-map at ; inline
: combine-chars ( a b -- char/f ) combine-map hash2 ; inline
: compatibility-entry ( char -- seq ) compatibility-map at ; inline
! Loading data from UnicodeData.txt
-: split-; ( line -- array )
- ";" split [ [ blank? ] trim ] map ;
-
-: data ( filename -- data )
- ascii file-lines [ split-; ] map ;
-
: load-data ( -- data )
"vocab:unicode/data/UnicodeData.txt" data ;
-: filter-comments ( lines -- lines )
- [ "#@" split first ] map harvest ;
-
: (process-data) ( index data -- newdata )
- filter-comments
[ [ nth ] keep first swap ] with { } map>assoc
[ [ hex> ] dip ] assoc-map ;
<code-point> swap first set ;
! Extra properties
-: properties-lines ( -- lines )
- "vocab:unicode/data/PropList.txt"
- ascii file-lines ;
-
: parse-properties ( -- {{[a,b],prop}} )
- properties-lines filter-comments [
- split-; first2
- [ ".." split1 [ dup ] unless* [ hex> ] bi@ 2array ] dip
- ] { } map>assoc ;
+ "vocab:unicode/data/PropList.txt" data [
+ [
+ ".." split1 [ dup ] unless*
+ [ hex> ] bi@ 2array
+ ] dip
+ ] assoc-map ;
: properties>intervals ( properties -- assoc[str,interval] )
dup values prune [ f ] H{ } map>assoc
SYMBOL: interned
-: parse-key-value ( filename -- assoc )
- ! assoc is code point/range => name
- ascii file-lines filter-comments [ split-; ] map ;
-
: range, ( value key -- )
swap interned get
[ = ] with find nip 2array , ;
PRIVATE>
: load-key-value ( filename -- table )
- parse-key-value process-key-value ;
+ data process-key-value ;