1 ! Copyright (C) 2020 John Benediktsson
2 ! See http://factorcode.org/license.txt for BSD license
4 USING: accessors arrays ascii assocs byte-arrays combinators
5 io.encodings.string io.encodings.utf8 kernel lexer linked-assocs
6 literals locals make math math.order math.parser multiline
7 namespaces peg.ebnf present prettyprint.backend
8 prettyprint.custom prettyprint.sections regexp sbufs sequences
9 sequences.extras sequences.generalizations sets sorting
10 splitting strings strings.parser urls urls.encoding
11 urls.encoding.private urls.private ;
25 CONSTANT: DIGITS $[ "abcdefghijklmnopqrstuvwxyz0123456789" >byte-array ]
28 : threshold ( j bias -- T )
29 [ BASE * ] [ - ] bi* TMIN TMAX clamp ;
31 :: adapt ( delta! #chars first? -- bias )
32 delta first? DAMP 2 ? /i delta!
33 delta dup #chars /i + delta!
34 0 [ delta $[ BASE TMIN - TMAX * 2 /i ] > ] [
35 delta $[ BASE TMIN - ] /i delta!
37 ] while BASE delta * delta SKEW + /i + ;
39 : segregate ( str -- base extended )
40 [ N < ] partition members natural-sort ;
42 :: find-pos ( str ch i pos -- i' pos' )
49 ] find-from drop [ drop -1 -1 ] unless* ;
51 :: insertion-unsort ( str extended -- deltas )
58 str [ ch < ] count :> curlen
59 curlen 1 + ch oldch - * :> delta!
61 str ch i pos find-pos pos! i!
65 i oldi - delta + delta!
75 :: encode-delta ( delta! bias -- seq )
76 SBUF" " clone :> accum
86 delta T - BASE T - /mod T + swap delta!
87 ] if DIGITS nth accum push
90 :: encode-deltas ( baselen deltas -- seq )
91 SBUF" " clone :> accum
94 delta bias encode-delta accum push-all
95 delta baselen i + 1 + i 0 = adapt bias!
100 :: >punycode ( str -- punicode )
101 str segregate :> ( base extended )
102 str extended insertion-unsort :> deltas
103 base length deltas encode-deltas
104 base [ "-" rot 3append ] unless-empty "" like ;
108 ERROR: invalid-digit char ;
110 :: decode-digit ( ch -- digit )
112 { [ ch CHAR: A CHAR: Z between? ] [ ch CHAR: A - ] }
113 { [ ch CHAR: 0 CHAR: 9 between? ] [ ch CHAR: 0 26 - - ] }
117 :: decode-delta ( extended extpos! bias -- extpos' delta )
123 j bias threshold :> T
124 extpos extended nth decode-digit :> digit
126 digit w * delta + delta!
129 ] loop extpos delta ;
131 ERROR: invalid-character char ;
133 :: insertion-sort ( base extended -- base )
138 extended length :> extlen
139 [ extpos extlen < ] [
140 extended extpos bias decode-delta :> ( newpos delta )
142 pos base length 1 + /mod pos! ch + ch!
143 ch 0x10FFFF > [ ch invalid-character ] when
144 ch pos base insert-nth!
145 delta base length extpos 0 = adapt bias!
151 : punycode> ( punycode -- str )
152 CHAR: - over last-index [
153 ! FIXME: assert all non-basic code-points
154 [ head >sbuf ] [ 1 + tail ] 2bi >upper
156 SBUF" " clone swap >upper
157 ] if* insertion-sort "" like ;
159 GENERIC: idna> ( punycode -- obj )
163 "xn--" ?head [ punycode> ] when
166 M: url idna> [ idna> ] change-host ;
168 GENERIC: >idna ( obj -- punycode )
173 >punycode "xn--" prepend
177 M: url >idna [ >idna ] change-host ;