! Copyright (C) 2008 Daniel Ehrenberg.\r
! See http://factorcode.org/license.txt for BSD license.\r
-USING: combinators.short-circuit sequences io.files\r
-io.encodings.ascii kernel values splitting accessors math.parser\r
-ascii io assocs strings math namespaces make sorting combinators\r
-math.order arrays unicode.normalize unicode.data locals\r
-unicode.syntax macros sequences.deep words unicode.breaks\r
-quotations combinators.short-circuit ;\r
+USING: sequences io.files io.encodings.ascii kernel splitting\r
+accessors math.parser ascii io assocs strings math namespaces make\r
+sorting combinators math.order arrays unicode.normalize unicode.data\r
+locals macros sequences.deep words unicode.breaks quotations\r
+combinators.short-circuit simple-flat-file ;\r
IN: unicode.collation\r
\r
<PRIVATE\r
-VALUE: ducet\r
+SYMBOL: ducet\r
\r
TUPLE: weight primary secondary tertiary ignorable? ;\r
\r
[ >>primary ] [ >>secondary ] [ >>tertiary ] tri*\r
] map ;\r
\r
-: parse-line ( line -- code-poing weight )\r
- ";" split1 [ [ blank? ] trim ] bi@\r
- [ " " split [ hex> ] "" map-as ] [ parse-weight ] bi* ;\r
+: parse-keys ( string -- chars )\r
+ " " split [ hex> ] "" map-as ;\r
\r
-: parse-ducet ( stream -- ducet )\r
- lines filter-comments\r
- [ parse-line ] H{ } map>assoc ;\r
+: parse-ducet ( file -- ducet )\r
+ data [ [ parse-keys ] [ parse-weight ] bi* ] H{ } assoc-map-as ;\r
\r
-"resource:basis/unicode/collation/allkeys.txt"\r
-ascii <file-reader> parse-ducet to: ducet\r
+"vocab:unicode/collation/allkeys.txt" parse-ducet ducet set-global\r
\r
! Fix up table for long contractions\r
: help-one ( assoc key -- )\r
! Need to be more general? Not for DUCET, apparently\r
2 head 2dup swap key? [ 2drop ] [\r
- [ [ 1string swap at ] with { } map-as concat ]\r
+ [ [ 1string of ] with { } map-as concat ]\r
[ swap set-at ] 2bi\r
] if ;\r
\r
dup keys [ length 3 >= ] filter\r
[ help-one ] with each ;\r
\r
-ducet insert-helpers\r
+ducet get-global insert-helpers\r
\r
: base ( char -- base )\r
{\r
- { [ dup HEX: 3400 HEX: 4DB5 between? ] [ drop HEX: FB80 ] } ! Extension A\r
- { [ dup HEX: 20000 HEX: 2A6D6 between? ] [ drop HEX: FB80 ] } ! Extension B\r
- { [ dup HEX: 4E00 HEX: 9FC3 between? ] [ drop HEX: FB40 ] } ! CJK\r
- [ drop HEX: FBC0 ] ! Other\r
+ { [ dup 0x3400 0x4DB5 between? ] [ drop 0xFB80 ] } ! Extension A\r
+ { [ dup 0x20000 0x2A6D6 between? ] [ drop 0xFB80 ] } ! Extension B\r
+ { [ dup 0x4E00 0x9FC3 between? ] [ drop 0xFB40 ] } ! CJK\r
+ [ drop 0xFBC0 ] ! Other\r
} cond ;\r
\r
: AAAA ( char -- weight )\r
- [ base ] [ -15 shift ] bi + HEX: 20 2 f weight boa ;\r
+ [ base ] [ -15 shift ] bi + 0x20 2 f weight boa ;\r
\r
: BBBB ( char -- weight )\r
- HEX: 7FFF bitand HEX: 8000 bitor 0 0 f weight boa ;\r
+ 0x7FFF bitand 0x8000 bitor 0 0 f weight boa ;\r
\r
: illegal? ( char -- ? )\r
{ [ "Noncharacter_Code_Point" property? ] [ category "Cs" = ] } 1|| ;\r
[ drop { } ]\r
[ [ AAAA ] [ BBBB ] bi 2array ] if ;\r
\r
-: last ( -- char )\r
- building get empty? [ 0 ] [ building get peek peek ] if ;\r
+: building-last ( -- char )\r
+ building get empty? [ 0 ] [ building get last last ] if ;\r
\r
: blocked? ( char -- ? )\r
combining-class dup { 0 f } member?\r
- [ drop last non-starter? ]\r
- [ last combining-class = ] if ;\r
+ [ drop building-last non-starter? ]\r
+ [ building-last combining-class = ] if ;\r
\r
: possible-bases ( -- slice-of-building )\r
building get dup [ first non-starter? not ] find-last\r
drop [ 0 ] unless* tail-slice ;\r
\r
:: ?combine ( char slice i -- ? )\r
- [let | str [ i slice nth char suffix ] |\r
- str ducet key? dup\r
- [ str i slice set-nth ] when\r
- ] ;\r
+ i slice nth char suffix :> str\r
+ str ducet get-global key? dup\r
+ [ str i slice set-nth ] when ;\r
\r
: add ( char -- )\r
dup blocked? [ 1string , ] [\r
- dup possible-bases dup length\r
- [ ?combine ] with with any?\r
+ dup possible-bases dup length iota\r
+ [ ?combine ] 2with any?\r
[ drop ] [ 1string , ] if\r
] if ;\r
\r
: graphemes>weights ( graphemes -- weights )\r
[\r
dup weight? [ 1array ] ! From tailoring\r
- [ dup ducet at [ ] [ derive-weight ] ?if ] if\r
+ [ dup ducet get-global at [ ] [ derive-weight ] ?if ] if\r
] { } map-as concat ;\r
\r
: append-weights ( weights quot -- )\r
- [ [ ignorable?>> not ] filter ] dip\r
- map [ zero? not ] filter % 0 , ; inline\r
+ [ [ ignorable?>> ] reject ] dip\r
+ map [ zero? ] reject % 0 , ; inline\r
\r
: variable-weight ( weight -- )\r
- dup ignorable?>> [ primary>> ] [ drop HEX: FFFF ] if , ;\r
+ dup ignorable?>> [ primary>> ] [ drop 0xFFFF ] if , ;\r
\r
: weights>bytes ( weights -- byte-array )\r
[\r
[\r
[ collation-key ] dip\r
[ [ 0 = not ] trim-tail but-last ] times\r
- ] curry bi@ = ;\r
+ ] curry same? ;\r
PRIVATE>\r
\r
: primary= ( str1 str2 -- ? )\r
: quaternary= ( str1 str2 -- ? )\r
0 insensitive= ;\r
\r
-<PRIVATE\r
: w/collation-key ( str -- {str,key} )\r
[ collation-key ] keep 2array ;\r
-PRIVATE>\r
\r
: sort-strings ( strings -- sorted )\r
[ w/collation-key ] map natural-sort values ;\r