unicode: Minor cleanups.

author Doug Coleman <doug.coleman@gmail.com>

Sun, 28 Jul 2019 20:51:05 +0000 (15:51 -0500)

committer Doug Coleman <doug.coleman@gmail.com>

Tue, 30 Jul 2019 07:12:41 +0000 (02:12 -0500)
author Doug Coleman <doug.coleman@gmail.com>
Sun, 28 Jul 2019 20:51:05 +0000 (15:51 -0500)
committer Doug Coleman <doug.coleman@gmail.com>
Tue, 30 Jul 2019 07:12:41 +0000 (02:12 -0500)
diff --git a/basis/unicode/collation/collation.factor b/basis/unicode/collation/collation.factor

index 4c0ad626c19707d4725a40c2474ca3ce587222f0..97bb8f486f85a5b1ee2b5d86644740a4499fa9be 100644 (file)
--- a/basis/unicode/collation/collation.factor
+++ b/basis/unicode/collation/collation.factor
@@ -32,14 +32,7 @@ TUPLE: weight-levels primary secondary tertiary ignorable? ;
  
  "vocab:unicode/UCA/allkeys.txt" parse-ducet ducet set-global
  
-! Fix up table for long contractions
-: help-one ( assoc key -- )
-    ! Need to be more general? Not for DUCET, apparently
-    2 head 2dup swap key? [ 2drop ] [
-        [ [ 1string of ] with { } map-as concat ]
-        [ swap set-at ] 2bi
-    ] if ;
-
+! https://www.unicode.org/reports/tr10/tr10-41.html#Well_Formed_DUCET
  : fixup-ducet ( -- )
      {
          {
@@ -195,7 +188,8 @@ TUPLE: weight-levels primary secondary tertiary ignorable? ;
          }
      } ducet get-global '[ swap >string _ set-at ] assoc-each ;
  
-! Add a few missing ducet values
+! Add a few missing ducet values for Tibetan
+! https://www.unicode.org/reports/tr10/tr10-41.html#Well_Formed_DUCET
  fixup-ducet
  
  : tangut-block? ( char -- ? )
@@ -248,6 +242,7 @@ fixup-ducet
  : building-last ( -- char )
      building get [ 0 ] [ last last ] if-empty ;
  
+! https://www.unicode.org/reports/tr10/tr10-41.html#Collation_Graphemes
  : blocked? ( char -- ? )
      combining-class dup { 0 f } member?
      [ drop building-last non-starter? ]
diff --git a/basis/unicode/data/data.factor b/basis/unicode/data/data.factor

index bc744a747db21a116f950d5f697a457b8c0ef898..32ac3e94fa7d36a36d838e71d37a2a59daaf342c 100644 (file)
--- a/basis/unicode/data/data.factor
+++ b/basis/unicode/data/data.factor
@@ -53,7 +53,7 @@ CONSTANT: categories {
  MEMO: categories-map ( -- hashtable )
      categories H{ } zip-index-as ;
  
-CONSTANT: num-chars 0x2FA1E
+CONSTANT: NUM-CHARS 0x2FA1E
  
  PRIVATE>
  
@@ -76,7 +76,7 @@ PRIVATE>
  
  ! Loading data from UnicodeData.txt
  
-: load-data ( -- data )
+: load-unicode-data ( -- data )
      "vocab:unicode/UCD/UnicodeData.txt" load-data-file ;
  
  : (process-data) ( index data -- newdata )
@@ -146,7 +146,7 @@ PRIVATE>
      ] assoc-each table ;
  
  :: process-category ( data -- category-listing )
-    num-chars <byte-array> :> table
+    NUM-CHARS <byte-array> :> table
      2 data (process-data) [| char cat |
          cat categories-map at char table ?set-nth
      ] assoc-each table fill-ranges ;
@@ -194,7 +194,7 @@ C: <code-point> code-point
      [ length 5 = ] filter
      [ [ set-code-point ] each ] H{ } make ;
  
-load-data {
+load-unicode-data {
      [ process-names name-map swap assoc-union! drop ]
      [ 13 swap process-data simple-lower swap assoc-union! drop ]
      [ 12 swap process-data simple-upper swap assoc-union! drop ]
author	Doug Coleman <doug.coleman@gmail.com>
	Sun, 28 Jul 2019 20:51:05 +0000 (15:51 -0500)
committer	Doug Coleman <doug.coleman@gmail.com>
	Tue, 30 Jul 2019 07:12:41 +0000 (02:12 -0500)
basis/unicode/collation/collation.factor		patch \| blob \| history
basis/unicode/data/data.factor		patch \| blob \| history