USING: assocs math kernel sequences io.files hashtables
-quotations splitting arrays math.parser hash2 unicode.syntax.backend
-byte-arrays words namespaces words compiler.units parser io.encodings.ascii ;
+quotations splitting arrays math.parser hash2 math.order
+byte-arrays words namespaces words compiler.units parser
+io.encodings.ascii unicode.syntax.backend ;
IN: unicode.data
! Convenience functions
: (process-data) ( index data -- newdata )
[ [ nth ] keep first swap 2array ] with map
- [ second empty? not ] subset
+ [ second empty? not ] filter
[ >r hex> r> ] assoc-map ;
: process-data ( index data -- hash )
[ " " split [ hex> ] map ] assoc-map ;
: process-canonical ( data -- hash2 hash )
- (process-decomposed) [ first* ] subset
+ (process-decomposed) [ first* ] filter
[
- [ second length 2 = ] subset
+ [ second length 2 = ] filter
! using 1009 as the size, the maximum load is 4
[ first2 first2 rot 3array ] map 1009 alist>hash2
] keep
: process-compat ( data -- hash )
(process-decomposed)
- [ dup first* [ first2 1 tail 2array ] unless ] map
+ [ dup first* [ first2 rest 2array ] unless ] map
>hashtable chain-decomposed ;
: process-combining ( data -- hash )
3 swap (process-data)
[ string>number ] assoc-map
- [ nip zero? not ] assoc-subset
+ [ nip zero? not ] assoc-filter
>hashtable ;
: categories ( -- names )
] assoc-map >hashtable ;
: multihex ( hexstring -- string )
- " " split [ hex> ] map [ ] subset ;
+ " " split [ hex> ] map [ ] filter ;
TUPLE: code-point lower title upper ;
! Special casing data
: load-special-casing ( -- special-casing )
"extra/unicode/SpecialCasing.txt" resource-path data
- [ length 5 = ] subset
+ [ length 5 = ] filter
[ [ set-code-point ] each ] H{ } make-assoc ;
load-data