! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
-USING: ascii sequences namespaces make unicode.data kernel math arrays
-locals sorting.insertion accessors assocs math.order combinators
-strings sbufs hints combinators.short-circuit vectors ;
+USING: accessors arrays ascii combinators
+combinators.short-circuit hints kernel make math
+math.order sbufs sequences sorting.insertion strings
+unicode.data vectors ;
IN: unicode.normalize
<PRIVATE
medial-base - + final-count *
] dip final-base - + hangul-base + ;
-! Normalization -- Decomposition
+! Normalization -- Decomposition
: reorder-slice ( string start -- slice done? )
2dup swap [ non-starter? not ] find-from drop
HINTS: (nfkd) string ;
-PRIVATE>
-
-: nfd ( string -- nfd )
- [ (nfd) ] with-string ;
-
-: nfkd ( string -- nfkd )
- [ (nfkd) ] with-string ;
-
-: string-append ( s1 s2 -- string )
- [ append ] keep
- 0 over ?nth non-starter?
- [ length dupd reorder-back ] [ drop ] if ;
-
-HINTS: string-append string string ;
-
-<PRIVATE
-
! Normalization -- Composition
: initial-medial? ( str i -- ? )
: --final? ( str i -- ? )
2 + swap ?nth final? ;
-: imf, ( str i -- str i )
+: imf% ( str i -- str i )
[ tail-slice first3 jamo>hangul , ]
[ 3 + ] 2bi ;
-: im, ( str i -- str i )
+: im% ( str i -- str i )
[ tail-slice first2 final-base jamo>hangul , ]
[ 2 + ] 2bi ;
: compose-jamo ( str i -- str i )
2dup initial-medial? [
- 2dup --final? [ imf, ] [ im, ] if
+ 2dup --final? [ imf% ] [ im% ] if
] [ 2dup swap nth , 1 + ] if ;
: pass-combining ( str -- str i )
: try-noncombining ( state char -- state )
[ drop ] [ [ char>> ] dip combine-chars ] 2bi
- [ >>char to f >>last-class compose-iter ] when* ; inline
+ [ >>char to f >>last-class compose-iter ] when* ; inline recursive
: compose-iter ( state -- state )
dup current [
HINTS: combine string ;
PRIVATE>
-
-: nfc ( string -- nfc )
- [ (nfd) combine ] with-string ;
-
-: nfkc ( string -- nfkc )
- [ (nfkd) combine ] with-string ;