From: John Benediktsson Date: Mon, 2 Nov 2020 19:12:32 +0000 (-0800) Subject: punycode: simplify ebnf, and handle iquery. X-Git-Tag: 0.99~3037 X-Git-Url: https://gitweb.factorcode.org/gitweb.cgi?p=factor.git;a=commitdiff_plain;h=1acd52e55ee20da98c0898a4843009a9bb89e171 punycode: simplify ebnf, and handle iquery. --- diff --git a/extra/punycode/punycode-tests.factor b/extra/punycode/punycode-tests.factor index fc316b281a..624bc94909 100644 --- a/extra/punycode/punycode-tests.factor +++ b/extra/punycode/punycode-tests.factor @@ -1,5 +1,6 @@ -USING: arrays assocs fry kernel punycode tools.test urls ; +USING: arrays assocs fry kernel linked-assocs punycode +tools.test urls ; { ! Wikipedia Examples @@ -150,3 +151,16 @@ USING: arrays assocs fry kernel punycode tools.test urls ; { IRL" http://例子.卷筒纸" } [ URL" http://xn--fsqu00a.xn--3lr804guic/" >irl ] unit-test { URL" http://xn--fsqu00a.xn--3lr804guic/" } [ IRL" http://例子.卷筒纸" >url ] unit-test + +{ + T{ irl + { protocol "http" } + { username f } + { password f } + { host "März.com" } + { port f } + { path "/päth" } + { query LH{ { "query" "Dürst" } } } + { anchor "☃" } + } +} [ "http://März.com/päth?query=Dürst#☃" >irl ] unit-test diff --git a/extra/punycode/punycode.factor b/extra/punycode/punycode.factor index a7f70e1835..e19c6ab234 100644 --- a/extra/punycode/punycode.factor +++ b/extra/punycode/punycode.factor @@ -2,11 +2,12 @@ ! See http://factorcode.org/license.txt for BSD license USING: accessors arrays ascii assocs byte-arrays combinators -io.encodings.string io.encodings.utf8 kernel lexer literals -locals make math math.order math.parser multiline namespaces -peg.ebnf present prettyprint.backend prettyprint.custom -prettyprint.sections regexp sbufs sequences sequences.extras -sets sorting splitting strings strings.parser urls urls.encoding +io.encodings.string io.encodings.utf8 kernel lexer linked-assocs +literals locals make math math.order math.parser multiline +namespaces peg.ebnf present prettyprint.backend +prettyprint.custom prettyprint.sections regexp sbufs sequences +sequences.extras sequences.generalizations sets sorting +splitting strings strings.parser urls urls.encoding urls.encoding.private urls.private ; IN: punycode @@ -180,28 +181,49 @@ M: f >irl drop ; : irl-decode ( str -- str' ) "" like R/ (%[a-fA-F0-9]{2})+/ [ url-decode ] re-replace-with ; -! RFC 3987 -EBNF: parse-irl [=[ +: iquery-decode ( str -- decoded ) + "+" split "%20" join irl-decode ; -protocol = [a-zA-Z0-9.+-]+ => [[ irl-decode ]] -username = [^/:@#?]+ => [[ irl-decode ]] -password = [^/:@#?]+ => [[ irl-decode ]] -pathname = [^#?]+ => [[ irl-decode ]] -query = [^#]+ => [[ query>assoc ]] -anchor = .+ => [[ irl-decode ]] +: iquery>assoc ( query -- assoc ) + dup [ + "&;" split [ + [ + [ "=" split1 [ dup [ iquery-decode ] when ] bi@ swap ] dip + add-query-param + ] curry each + ] keep + ] when ; -hostname = [^/#?]+ => [[ irl-decode ]] +: assoc>iquery ( assoc -- str ) + [ + [ + [ + dup array? [ 1array ] unless + [ "=" glue , ] with each + ] [ , ] if* + ] assoc-each + ] { } make "&" join ; -hostname-spec = hostname ("/"|!(.)) => [[ first ]] +! RFC 3987 +EBNF: parse-irl [=[ + +protocol = [a-zA-Z0-9.+-]+ => [[ irl-decode ]] +username = [^/:@#?]+ => [[ irl-decode ]] +password = [^/:@#?]+ => [[ irl-decode ]] +path = [^#?]+ => [[ irl-decode ]] +query = [^#]+ => [[ iquery>assoc ]] +anchor = .+ => [[ irl-decode ]] +hostname = [^/#?:]+ => [[ irl-decode ]] +port = [^/#?]+ => [[ url-decode parse-port ]] -auth = (username (":" password => [[ second ]])? "@" - => [[ first2 2array ]])? +auth = username (":"~ password)? "@"~ +host = hostname (":"~ port)? -url = (((protocol "://") => [[ first ]] auth hostname) - | (("//") => [[ f ]] auth hostname))? - (pathname)? - ("?" query => [[ second ]])? - ("#" anchor => [[ second ]])? +url = (protocol ":"~)? + ("//"~ auth? host?)? + path? + ("?"~ query)? + ("#"~ anchor)? ]=] @@ -222,38 +244,26 @@ M: irl present [ unparse-protocol ] [ unparse-iauthority ] [ path>> % ] - [ query>> dup assoc-empty? [ drop ] [ "?" % assoc>query % ] if ] + [ query>> dup assoc-empty? [ drop ] [ "?" % assoc>iquery % ] if ] [ anchor>> [ "#" % present % ] when* ] } cleave ] "" make ; -: parse-ihost ( string -- host/f port/f ) - [ - ":" split1-last [ irl-decode ] [ - [ f ] [ string>number [ malformed-port ] unless* ] if-empty - ] bi* - ] [ f f ] if* ; - PRIVATE> M: string >irl - [ ] dip - parse-irl { + [ ] dip parse-irl 5 firstn { + [ >lower >>protocol ] [ - first [ - [ first >lower >>protocol ] - [ - second - [ first [ first2 [ >>username ] [ >>password ] bi* ] when* ] - [ second parse-ihost [ >>host ] [ >>port ] bi* ] bi - ] bi + [ + [ first [ first2 [ >>username ] [ >>password ] bi* ] when* ] + [ second [ first2 [ >>host ] [ >>port ] bi* ] when* ] bi ] when* ] - [ second >>path ] - [ third >>query ] - [ fourth >>anchor ] - } cleave - dup host>> [ [ "/" or ] change-path ] when ; + [ >>path ] + [ >>query ] + [ >>anchor ] + } spread dup host>> [ [ "/" or ] change-path ] when ; M: irl >url [ ] dip { @@ -263,7 +273,7 @@ M: irl >url [ host>> [ >idna url-encode ] [ f ] if* >>host ] [ port>> >>port ] [ path>> [ url-encode ] [ f ] if* >>path ] - [ query>> [ url-encode ] [ f ] if* >>query ] + [ query>> >>query ] [ anchor>> [ url-encode ] [ f ] if* >>anchor ] } cleave ; @@ -275,7 +285,7 @@ M: url >irl [ host>> [ url-decode idna> ] [ f ] if* >>host ] [ port>> >>port ] [ path>> [ url-decode ] [ f ] if* >>path ] - [ query>> [ url-decode ] [ f ] if* >>query ] + [ query>> >>query ] [ anchor>> [ url-decode ] [ f ] if* >>anchor ] } cleave ;