! See http://factorcode.org/license.txt for BSD license.
USING: arrays ascii assocs byte-arrays combinators
combinators.short-circuit grouping hashtables interval-sets
-io.encodings.utf8 io.files kernel locals make math math.bitwise
-math.order math.parser math.ranges memoize namespaces sequences
+io.encodings.utf8 io.files kernel make math math.bitwise
+math.order math.parser ranges namespaces sequences
sets simple-flat-file sorting splitting strings.parser ;
IN: unicode.data
MEMO: categories-map ( -- hashtable )
categories H{ } zip-index-as ;
-CONSTANT: NUM-CHARS 0x2FA1E
+CONSTANT: NUM-CHARS 0x2FA25
PRIVATE>
: (process-decomposed) ( data -- alist )
5 swap (process-data)
- [ words [ hex> ] map ] assoc-map ;
+ [ split-words [ hex> ] map ] assoc-map ;
: exclusions-file ( -- filename )
"vocab:unicode/UCD/CompositionExclusions.txt" ;
: exclusions ( -- set )
exclusions-file utf8 file-lines
- [ "#" split1 drop [ blank? ] trim-tail hex> ] map
- [ 0 = ] reject ;
+ [ "#" split1 drop [ ascii:blank? ] trim-tail hex> ] map
+ 0 swap remove ;
: unique ( seq -- assoc )
[ dup ] H{ } map>assoc ;
name-map sort-values keys
[ { [ "first>" tail? ] [ "last>" tail? ] } 1|| ] filter
2 group [
- [ name-map at ] bi@ [ [a,b] ] [ table ?nth ] bi
+ [ name-map at ] bi@ [ [a..b] ] [ table ?nth ] bi
[ swap table ?set-nth ] curry each
] assoc-each table ;
] H{ } assoc-map-as ;
: multihex ( hexstring -- string )
- words [ hex> ] map sift ;
+ split-words [ hex> ] map sift ;
PRIVATE>
PRIVATE>
+ERROR: invalid-unicode-character name ;
+
[
- name-map at [ "Invalid character" throw ] unless*
+ name-map ?at [ invalid-unicode-character ] unless
] name>char-hook set-global