! See http://factorcode.org/license.txt for BSD license.
USING: ascii sequences namespaces make unicode.data kernel math arrays
locals sorting.insertion accessors assocs math.order combinators
-unicode.syntax strings sbufs hints combinators.short-circuit vectors ;
+strings sbufs hints combinators.short-circuit vectors ;
IN: unicode.normalize
<PRIVATE
! Conjoining Jamo behavior
-CONSTANT: hangul-base HEX: ac00
-CONSTANT: hangul-end HEX: D7AF
-CONSTANT: initial-base HEX: 1100
-CONSTANT: medial-base HEX: 1161
-CONSTANT: final-base HEX: 11a7
+CONSTANT: hangul-base 0xac00
+CONSTANT: hangul-end 0xD7AF
+CONSTANT: initial-base 0x1100
+CONSTANT: medial-base 0x1161
+CONSTANT: final-base 0x11a7
CONSTANT: initial-count 19
CONSTANT: medial-count 21
pick [ between? ] [ 3drop f ] if ; inline
: hangul? ( ch -- ? ) hangul-base hangul-end ?between? ; inline
-: jamo? ( ch -- ? ) HEX: 1100 HEX: 11FF ?between? ; inline
+: jamo? ( ch -- ? ) 0x1100 0x11FF ?between? ; inline
! These numbers come from UAX 29
: initial? ( ch -- ? )
- dup HEX: 1100 HEX: 1159 ?between? [ ] [ HEX: 115F = ] ?if ; inline
-: medial? ( ch -- ? ) HEX: 1160 HEX: 11A2 ?between? ; inline
-: final? ( ch -- ? ) HEX: 11A8 HEX: 11F9 ?between? ; inline
+ dup 0x1100 0x1159 ?between? [ ] [ 0x115F = ] ?if ; inline
+: medial? ( ch -- ? ) 0x1160 0x11A2 ?between? ; inline
+: final? ( ch -- ? ) 0x11A8 0x11F9 ?between? ; inline
: hangul>jamo ( hangul -- jamo-string )
hangul-base - final-count /mod final-base +
! Normalization -- Composition
: initial-medial? ( str i -- ? )
- { [ swap nth initial? ] [ 1+ swap ?nth medial? ] } 2&& ;
+ { [ swap nth initial? ] [ 1 + swap ?nth medial? ] } 2&& ;
: --final? ( str i -- ? )
2 + swap ?nth final? ;
: compose-jamo ( str i -- str i )
2dup initial-medial? [
2dup --final? [ imf, ] [ im, ] if
- ] [ 2dup swap nth , 1+ ] if ;
+ ] [ 2dup swap nth , 1 + ] if ;
: pass-combining ( str -- str i )
dup [ non-starter? not ] find drop
TUPLE: compose-state i str char after last-class ;
: get-str ( state i -- ch )
- swap [ i>> + ] [ str>> ] bi ?nth ;
-: current ( state -- ch ) 0 get-str ;
-: to ( state -- state ) [ 1+ ] change-i ;
-: push-after ( ch state -- state ) [ ?push ] change-after ;
+ swap [ i>> + ] [ str>> ] bi ?nth ; inline
+: current ( state -- ch ) 0 get-str ; inline
+: to ( state -- state ) [ 1 + ] change-i ; inline
+: push-after ( ch state -- state ) [ ?push ] change-after ; inline
:: try-compose ( state new-char current-class -- state )
state last-class>> current-class =
new-char state push-after
current-class >>last-class
] if*
- ] if ;
+ ] if ; inline
DEFER: compose-iter
-: try-noncombining ( char state -- state )
- tuck char>> swap combine-chars
- [ >>char to f >>last-class compose-iter ] when* ;
+: try-noncombining ( state char -- state )
+ [ drop ] [ [ char>> ] dip combine-chars ] 2bi
+ [ >>char to f >>last-class compose-iter ] when* ; inline
: compose-iter ( state -- state )
dup current [
{ f [ drop ] }
{ 0 [
over last-class>>
- [ drop ] [ swap try-noncombining ] if ] }
+ [ drop ] [ try-noncombining ] if ] }
[ try-compose to compose-iter ]
} case
- ] when* ;
+ ] when* ; inline recursive
: compose-combining ( ch str i -- str i )
compose-state new
swap >>str
swap >>char
compose-iter
- { [ char>> , ] [ after>> % ] [ str>> ] [ i>> ] } cleave ;
+ { [ char>> , ] [ after>> % ] [ str>> ] [ i>> ] } cleave ; inline
:: (compose) ( str i -- )
i str ?nth [
dup jamo? [ drop str i compose-jamo ] [
- i 1+ str ?nth combining-class
- [ str i 1+ compose-combining ] [ , str i 1+ ] if
+ i 1 + str ?nth combining-class
+ [ str i 1 + compose-combining ] [ , str i 1 + ] if
] if (compose)
- ] when* ;
+ ] when* ; inline recursive
: combine ( str -- comp )
[ pass-combining (compose) ] "" make ;
+HINTS: combine string ;
+
PRIVATE>
: nfc ( string -- nfc )