1 ! Copyright (C) 2020 John Benediktsson
2 ! See http://factorcode.org/license.txt for BSD license
4 USING: ascii byte-arrays combinators kernel literals locals math
5 math.order sbufs sequences sequences.extras sets sorting
20 CONSTANT: DIGITS $[ "abcdefghijklmnopqrstuvwxyz0123456789" >byte-array ]
23 : threshold ( j bias -- T )
24 [ BASE * ] [ - ] bi* TMIN TMAX clamp ;
26 :: adapt ( delta! #chars first? -- bias )
27 delta first? DAMP 2 ? /i delta!
28 delta dup #chars /i + delta!
29 0 [ delta $[ BASE TMIN - TMAX * 2 /i ] > ] [
30 delta $[ BASE TMIN - ] /i delta!
32 ] while BASE delta * delta SKEW + /i + ;
34 : segregate ( str -- base extended )
35 [ N < ] partition members natural-sort ;
37 :: find-pos ( str ch i pos -- i' pos' )
44 ] find-from drop [ drop -1 -1 ] unless* ;
46 :: insertion-unsort ( str extended -- deltas )
53 str [ ch < ] count :> curlen
54 curlen 1 + ch oldch - * :> delta!
56 str ch i pos find-pos pos! i!
60 i oldi - delta + delta!
70 :: encode-delta ( delta! bias -- seq )
71 SBUF" " clone :> accum
81 delta T - BASE T - /mod T + swap delta!
82 ] if DIGITS nth accum push
85 :: encode-deltas ( baselen deltas -- seq )
86 SBUF" " clone :> accum
89 delta bias encode-delta accum push-all
90 delta baselen i + 1 + i 0 = adapt bias!
95 :: >punycode ( str -- punicode )
96 str segregate :> ( base extended )
97 str extended insertion-unsort :> deltas
98 base length deltas encode-deltas
99 base [ "-" rot 3append ] unless-empty "" like ;
103 ERROR: invalid-digit char ;
105 :: decode-digit ( ch -- digit )
107 { [ ch CHAR: A CHAR: Z between? ] [ ch CHAR: A - ] }
108 { [ ch CHAR: 0 CHAR: 9 between? ] [ ch CHAR: 0 26 - - ] }
112 :: decode-delta ( extended extpos! bias -- extpos' delta )
118 j bias threshold :> T
119 extpos extended nth decode-digit :> digit
121 digit w * delta + delta!
124 ] loop extpos delta ;
126 ERROR: invalid-character char ;
128 :: insertion-sort ( base extended -- base )
133 extended length :> extlen
134 [ extpos extlen < ] [
135 extended extpos bias decode-delta :> ( newpos delta )
137 pos base length 1 + /mod pos! ch + ch!
138 ch 0x10FFFF > [ ch invalid-character ] when
139 ch pos base insert-nth!
140 delta base length extpos 0 = adapt bias!
146 : punycode> ( punycode -- str )
147 CHAR: - over last-index [
148 ! FIXME: assert all non-basic code-points
149 [ head >sbuf ] [ 1 + tail ] 2bi >upper
151 SBUF" " clone swap >upper
152 ] if* insertion-sort "" like ;
154 : idna> ( punycode -- str )
156 "xn--" ?head [ punycode> ] when
159 : >idna ( str -- punycode )
162 >punycode "xn--" prepend