From 66fef9f202340779fadb51d19f1ba39f48d59d66 Mon Sep 17 00:00:00 2001 From: John Benediktsson Date: Thu, 29 Oct 2020 14:40:04 -0700 Subject: [PATCH] punycode: adding an initial version of an IRL. Probably needs to use irl-decode in query>assoc. --- extra/punycode/punycode-tests.factor | 5 +- extra/punycode/punycode.factor | 128 ++++++++++++++++++++++++++- 2 files changed, 129 insertions(+), 4 deletions(-) diff --git a/extra/punycode/punycode-tests.factor b/extra/punycode/punycode-tests.factor index e31550ab7a..fc316b281a 100644 --- a/extra/punycode/punycode-tests.factor +++ b/extra/punycode/punycode-tests.factor @@ -1,5 +1,5 @@ -USING: arrays assocs fry kernel punycode tools.test ; +USING: arrays assocs fry kernel punycode tools.test urls ; { ! Wikipedia Examples @@ -147,3 +147,6 @@ USING: arrays assocs fry kernel punycode tools.test ; [ [ 1array ] dip '[ _ idna> ] unit-test ] [ 1array swap '[ _ >idna ] unit-test ] 2bi ] assoc-each + +{ IRL" http://例子.卷筒纸" } [ URL" http://xn--fsqu00a.xn--3lr804guic/" >irl ] unit-test +{ URL" http://xn--fsqu00a.xn--3lr804guic/" } [ IRL" http://例子.卷筒纸" >url ] unit-test diff --git a/extra/punycode/punycode.factor b/extra/punycode/punycode.factor index f69196a0bd..a7f70e1835 100644 --- a/extra/punycode/punycode.factor +++ b/extra/punycode/punycode.factor @@ -1,9 +1,13 @@ ! Copyright (C) 2020 John Benediktsson ! See http://factorcode.org/license.txt for BSD license -USING: ascii byte-arrays combinators kernel literals locals math -math.order sbufs sequences sequences.extras sets sorting -splitting ; +USING: accessors arrays ascii assocs byte-arrays combinators +io.encodings.string io.encodings.utf8 kernel lexer literals +locals make math math.order math.parser multiline namespaces +peg.ebnf present prettyprint.backend prettyprint.custom +prettyprint.sections regexp sbufs sequences sequences.extras +sets sorting splitting strings strings.parser urls urls.encoding +urls.encoding.private urls.private ; IN: punycode @@ -162,3 +166,121 @@ PRIVATE> >punycode "xn--" prepend ] unless ] map "." join ; + +TUPLE: irl < url ; + +: ( -- irl ) irl new ; + +GENERIC: >irl ( obj -- irl ) + +M: f >irl drop ; + + [[ irl-decode ]] +username = [^/:@#?]+ => [[ irl-decode ]] +password = [^/:@#?]+ => [[ irl-decode ]] +pathname = [^#?]+ => [[ irl-decode ]] +query = [^#]+ => [[ query>assoc ]] +anchor = .+ => [[ irl-decode ]] + +hostname = [^/#?]+ => [[ irl-decode ]] + +hostname-spec = hostname ("/"|!(.)) => [[ first ]] + +auth = (username (":" password => [[ second ]])? "@" + => [[ first2 2array ]])? + +url = (((protocol "://") => [[ first ]] auth hostname) + | (("//") => [[ f ]] auth hostname))? + (pathname)? + ("?" query => [[ second ]])? + ("#" anchor => [[ second ]])? + +]=] + +: unparse-ihost-part ( url -- ) + { + [ unparse-username-password ] + [ host>> % ] + [ url-port [ ":" % # ] when* ] + [ path>> "/" head? [ "/" % ] unless ] + } cleave ; + +: unparse-iauthority ( url -- ) + dup host>> [ "//" % unparse-ihost-part ] [ drop ] if ; + +M: irl present + [ + { + [ unparse-protocol ] + [ unparse-iauthority ] + [ path>> % ] + [ query>> dup assoc-empty? [ drop ] [ "?" % assoc>query % ] if ] + [ anchor>> [ "#" % present % ] when* ] + } cleave + ] "" make ; + +: parse-ihost ( string -- host/f port/f ) + [ + ":" split1-last [ irl-decode ] [ + [ f ] [ string>number [ malformed-port ] unless* ] if-empty + ] bi* + ] [ f f ] if* ; + +PRIVATE> + +M: string >irl + [ ] dip + parse-irl { + [ + first [ + [ first >lower >>protocol ] + [ + second + [ first [ first2 [ >>username ] [ >>password ] bi* ] when* ] + [ second parse-ihost [ >>host ] [ >>port ] bi* ] bi + ] bi + ] when* + ] + [ second >>path ] + [ third >>query ] + [ fourth >>anchor ] + } cleave + dup host>> [ [ "/" or ] change-path ] when ; + +M: irl >url + [ ] dip { + [ protocol>> >>protocol ] + [ username>> >>username ] + [ password>> >>password ] + [ host>> [ >idna url-encode ] [ f ] if* >>host ] + [ port>> >>port ] + [ path>> [ url-encode ] [ f ] if* >>path ] + [ query>> [ url-encode ] [ f ] if* >>query ] + [ anchor>> [ url-encode ] [ f ] if* >>anchor ] + } cleave ; + +M: url >irl + [ ] dip { + [ protocol>> >>protocol ] + [ username>> >>username ] + [ password>> >>password ] + [ host>> [ url-decode idna> ] [ f ] if* >>host ] + [ port>> >>port ] + [ path>> [ url-decode ] [ f ] if* >>path ] + [ query>> [ url-decode ] [ f ] if* >>query ] + [ anchor>> [ url-decode ] [ f ] if* >>anchor ] + } cleave ; + +SYNTAX: IRL" lexer get skip-blank parse-string >irl suffix! ; + +M: irl pprint* + \ IRL" record-vocab + dup present "IRL\" " "\"" pprint-string ; -- 2.34.1