[ { "blah" 123 } ] [ { "blah" xyz } expand-constants ] unit-test
-: foo ( -- n ) &: fdafd [ 123 ] unless* ;
-
-[ 123 ] [ foo ] unit-test
-
[ -1 ] [ -1 <char> *char ] unit-test
[ -1 ] [ -1 <short> *short ] unit-test
[ -1 ] [ -1 <int> *int ] unit-test
alien.arrays alien.strings kernel math namespaces parser
sequences words quotations math.parser splitting grouping
effects assocs combinators lexer strings.parser alien.parser
-fry vocabs.parser ;
+fry vocabs.parser words.constant ;
IN: alien.syntax
: DLL" lexer get skip-blank parse-string dlopen parsed ; parsing
: C-ENUM:
";" parse-tokens
- dup length
- [ [ create-in ] dip 1quotation define ] 2each ;
+ [ [ create-in ] dip define-constant ] each-index ;
parsing
+: address-of ( name library -- value )
+ load-library dlsym [ "No such symbol" throw ] unless* ;
+
: &:
- scan "c-library" get
- '[ _ _ load-library dlsym ] over push-all ; parsing
+ scan "c-library" get '[ _ _ address-of ] over push-all ; parsing
dup method>> {
{ "GET" [ url>> query>> ] }
{ "HEAD" [ url>> query>> ] }
- { "POST" [
- post-data>>
- dup content-type>> "application/x-www-form-urlencoded" =
- [ content>> ] [ drop f ] if
- ] }
+ { "POST" [ post-data>> params>> ] }
} case ;
: referrer ( -- referrer/f )
: random-alist ( n -- alist )
[
- [
- 32 random-bits dup number>string swap set
- ] times
- ] H{ } make-assoc ;
+ drop 32 random-bits dup number>string
+ ] H{ } map>assoc ;
: test-heap-sort ( n -- ? )
random-alist dup >alist sort-keys swap heap-sort = ;
dup header>> >hashtable
over url>> host>> [ over url>> url-host "host" pick set-at ] when
over post-data>> [
- [ raw>> length "content-length" pick set-at ]
+ [ data>> length "content-length" pick set-at ]
[ content-type>> "content-type" pick set-at ]
bi
] when*
GENERIC: >post-data ( object -- post-data )
+M: f >post-data ;
+
M: post-data >post-data ;
-M: string >post-data utf8 encode "application/octet-stream" <post-data> ;
+M: string >post-data
+ utf8 encode
+ "application/octet-stream" <post-data>
+ swap >>data ;
-M: byte-array >post-data "application/octet-stream" <post-data> ;
+M: assoc >post-data
+ "application/x-www-form-urlencoded" <post-data>
+ swap >>params ;
-M: assoc >post-data assoc>query ascii encode "application/x-www-form-urlencoded" <post-data> ;
+M: object >post-data
+ "application/octet-stream" <post-data>
+ swap >>data ;
-M: f >post-data ;
+: normalize-post-data ( request -- request )
+ dup post-data>> [
+ dup params>> [
+ assoc>query ascii encode >>data
+ ] when* drop
+ ] when* ;
: unparse-post-data ( request -- request )
- [ >post-data ] change-post-data ;
+ [ >post-data ] change-post-data
+ normalize-post-data ;
: write-post-data ( request -- request )
- dup method>> [ "POST" = ] [ "PUT" = ] bi or [ dup post-data>> raw>> write ] when ;
+ dup method>> { "POST" "PUT" } member? [
+ dup post-data>> data>> dup sequence?
+ [ write ] [ output-stream get stream-copy ] if
+ ] when ;
: write-request ( request -- )
unparse-post-data
{ method "POST" }
{ version "1.1" }
{ header H{ { "some-header" "1; 2" } { "content-length" "4" } { "content-type" "application/octet-stream" } } }
- { post-data T{ post-data { content "blah" } { raw "blah" } { content-type "application/octet-stream" } } }
+ { post-data T{ post-data { data "blah" } { content-type "application/octet-stream" } } }
{ cookies V{ } }
}
] [
raw-response new
"1.1" >>version ;
-TUPLE: post-data raw content content-type form-variables uploaded-files ;
+TUPLE: post-data data params content-type content-encoding ;
-: <post-data> ( form-variables uploaded-files raw content-type -- post-data )
+: <post-data> ( content-type -- post-data )
post-data new
- swap >>content-type
- swap >>raw
- swap >>uploaded-files
- swap >>form-variables ;
+ swap >>content-type ;
: parse-content-type-attributes ( string -- attributes )
" " split harvest [ "=" split1 [ >lower ] dip ] { } map>assoc ;
request get "accept" header "HTTP_ACCEPT" set\r
\r
post-request? [\r
- request get post-data>> raw>>\r
+ request get post-data>> data>>\r
[ "CONTENT_TYPE" set ]\r
[ length number>string "CONTENT_LENGTH" set ]\r
bi\r
swap '[\r
binary encode-output\r
_ output-stream get swap <cgi-process> binary <process-stream> [\r
- post-request? [ request get post-data>> raw>> write flush ] when\r
+ post-request? [ request get post-data>> data>> write flush ] when\r
input-stream get swap (stream-copy)\r
] with-stream\r
] >>body ;\r
: read-content ( request -- bytes )
"content-length" header string>number read ;
-: parse-content ( request content-type -- form-variables uploaded-files raw )
- {
- { "multipart/form-data" [ read-multipart-data f ] }
- { "application/x-www-form-urlencoded" [ read-content [ f f ] dip ] }
- [ drop read-content [ f f ] dip ]
+: parse-content ( request content-type -- post-data )
+ [ <post-data> swap ] keep {
+ { "multipart/form-data" [ read-multipart-data assoc-union >>params ] }
+ { "application/x-www-form-urlencoded" [ read-content query>assoc >>params ] }
+ [ drop read-content >>data ]
} case ;
: read-post-data ( request -- request )
dup method>> "POST" = [
dup dup "content-type" header
- [ ";" split1 drop parse-content ] keep
- <post-data> >>post-data
+ ";" split1 drop parse-content >>post-data
] when ;
: extract-host ( request -- request )
: <"
"\">" parse-multiline-string parsed ; parsing
+: <'
+ "'>" parse-multiline-string parsed ; parsing
+
+: {'
+ "'}" parse-multiline-string parsed ; parsing
+
+: {"
+ "\"}" parse-multiline-string parsed ; parsing
+
: /* "*/" parse-multiline-string drop ; parsing
--- /dev/null
+Daniel Ehrenberg
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: kernel namespaces xml.name io.encodings.utf8 xml.elements
+io.encodings.utf16 xml.tokenize xml.state math ascii sequences
+io.encodings.string io.encodings combinators ;
+IN: xml.autoencoding
+
+: continue-make-tag ( str -- tag )
+ parse-name-starting middle-tag end-tag ;
+
+: start-utf16le ( -- tag )
+ utf16le decode-input-if
+ CHAR: ? expect
+ 0 expect check instruct ;
+
+: 10xxxxxx? ( ch -- ? )
+ -6 shift 3 bitand 2 = ;
+
+: start<name ( ch -- tag )
+ ascii?
+ [ utf8 decode-input-if next make-tag ] [
+ next
+ [ get-next 10xxxxxx? not ] take-until
+ get-char suffix utf8 decode
+ utf8 decode-input-if next
+ continue-make-tag
+ ] if ;
+
+: start< ( -- tag )
+ get-next {
+ { 0 [ next next start-utf16le ] }
+ { CHAR: ? [ check next next instruct ] } ! XML prolog parsing sets the encoding
+ { CHAR: ! [ check utf8 decode-input next next direct ] }
+ [ check start<name ]
+ } case ;
+
+: skip-utf8-bom ( -- tag )
+ "\u0000bb\u0000bf" expect utf8 decode-input
+ CHAR: < expect check make-tag ;
+
+: decode-expecting ( encoding string -- tag )
+ [ decode-input-if next ] [ expect-string ] bi* check make-tag ;
+
+: start-utf16be ( -- tag )
+ utf16be "<" decode-expecting ;
+
+: skip-utf16le-bom ( -- tag )
+ utf16le "\u0000fe<" decode-expecting ;
+
+: skip-utf16be-bom ( -- tag )
+ utf16be "\u0000ff<" decode-expecting ;
+
+: start-document ( -- tag )
+ get-char {
+ { CHAR: < [ start< ] }
+ { 0 [ start-utf16be ] }
+ { HEX: EF [ skip-utf8-bom ] }
+ { HEX: FF [ skip-utf16le-bom ] }
+ { HEX: FE [ skip-utf16be-bom ] }
+ { f [ "" ] }
+ [ drop utf8 decode-input-if f ]
+ ! Same problem as with <e`>, in the case of XML chunks?
+ } case check ;
+
TUPLE: attlist-decl < directive name att-defs ;
C: <attlist-decl> attlist-decl
-TUPLE: entity-decl < directive name def ;
+TUPLE: entity-decl < directive name def pe? ;
C: <entity-decl> entity-decl
TUPLE: system-id system-literal ;
--- /dev/null
+Daniel Ehrenberg
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg, Slava Pestov
+! See http://factorcode.org/license.txt for BSD license.
+USING: xml.tokenize xml.data xml.state kernel sequences ascii
+fry xml.errors combinators hashtables namespaces xml.entities
+strings ;
+IN: xml.dtd
+
+: take-word ( -- string )
+ [ get-char blank? ] take-until ;
+
+: take-decl-contents ( -- first second )
+ pass-blank take-word pass-blank ">" take-string ;
+
+: take-element-decl ( -- element-decl )
+ take-decl-contents <element-decl> ;
+
+: take-attlist-decl ( -- attlist-decl )
+ take-decl-contents <attlist-decl> ;
+
+: take-notation-decl ( -- notation-decl )
+ take-decl-contents <notation-decl> ;
+
+: take-until-one-of ( seps -- str sep )
+ '[ get-char _ member? ] take-until get-char ;
+
+: take-system-id ( -- system-id )
+ parse-quote <system-id> close ;
+
+: take-public-id ( -- public-id )
+ parse-quote parse-quote <public-id> close ;
+
+UNION: dtd-acceptable
+ directive comment instruction ;
+
+: (take-external-id) ( token -- external-id )
+ pass-blank {
+ { "SYSTEM" [ take-system-id ] }
+ { "PUBLIC" [ take-public-id ] }
+ [ bad-external-id ]
+ } case ;
+
+: take-external-id ( -- external-id )
+ take-word (take-external-id) ;
+
+: only-blanks ( str -- )
+ [ blank? ] all? [ bad-decl ] unless ;
+: take-entity-def ( var -- entity-name entity-def )
+ [
+ take-word pass-blank get-char {
+ { CHAR: ' [ parse-quote ] }
+ { CHAR: " [ parse-quote ] }
+ [ drop take-external-id ]
+ } case
+ ] dip '[ swap _ [ ?set-at ] change ] 2keep ;
+
+: take-entity-decl ( -- entity-decl )
+ pass-blank get-char {
+ { CHAR: % [ next pass-blank pe-table take-entity-def t ] }
+ [ drop extra-entities take-entity-def f ]
+ } case
+ close <entity-decl> ;
--- /dev/null
+Daniel Ehrenberg
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: kernel namespaces xml.tokenize xml.state xml.name
+xml.data accessors arrays make xml.char-classes fry assocs sequences
+math xml.errors sets combinators io.encodings io.encodings.iana
+unicode.case xml.dtd strings ;
+IN: xml.elements
+
+: parse-attr ( -- )
+ parse-name pass-blank CHAR: = expect pass-blank
+ t parse-quote* 2array , ;
+
+: start-tag ( -- name ? )
+ #! Outputs the name and whether this is a closing tag
+ get-char CHAR: / = dup [ next ] when
+ parse-name swap ;
+
+: (middle-tag) ( -- )
+ pass-blank version=1.0? get-char name-start?
+ [ parse-attr (middle-tag) ] when ;
+
+: assure-no-duplicates ( attrs-alist -- attrs-alist )
+ H{ } clone 2dup '[ swap _ push-at ] assoc-each
+ [ nip length 2 >= ] assoc-filter >alist
+ [ first first2 duplicate-attr ] unless-empty ;
+
+: middle-tag ( -- attrs-alist )
+ ! f make will make a vector if it has any elements
+ [ (middle-tag) ] f make pass-blank
+ assure-no-duplicates ;
+
+: end-tag ( name attrs-alist -- tag )
+ tag-ns pass-blank get-char CHAR: / =
+ [ pop-ns <contained> next CHAR: > expect ]
+ [ depth inc <opener> close ] if ;
+
+: take-comment ( -- comment )
+ "--" expect-string
+ "--" take-string
+ <comment>
+ CHAR: > expect ;
+
+: assure-no-extra ( seq -- )
+ [ first ] map {
+ T{ name f "" "version" f }
+ T{ name f "" "encoding" f }
+ T{ name f "" "standalone" f }
+ } diff
+ [ extra-attrs ] unless-empty ;
+
+: good-version ( version -- version )
+ dup { "1.0" "1.1" } member? [ bad-version ] unless ;
+
+: prolog-version ( alist -- version )
+ T{ name f "" "version" f } swap at
+ [ good-version ] [ versionless-prolog ] if* ;
+
+: prolog-encoding ( alist -- encoding )
+ T{ name f "" "encoding" f } swap at "UTF-8" or ;
+
+: yes/no>bool ( string -- t/f )
+ {
+ { "yes" [ t ] }
+ { "no" [ f ] }
+ [ not-yes/no ]
+ } case ;
+
+: prolog-standalone ( alist -- version )
+ T{ name f "" "standalone" f } swap at
+ [ yes/no>bool ] [ f ] if* ;
+
+: prolog-attrs ( alist -- prolog )
+ [ prolog-version ]
+ [ prolog-encoding ]
+ [ prolog-standalone ]
+ tri <prolog> ;
+
+SYMBOL: string-input?
+: decode-input-if ( encoding -- )
+ string-input? get [ drop ] [ decode-input ] if ;
+
+: parse-prolog ( -- prolog )
+ pass-blank middle-tag "?>" expect-string
+ dup assure-no-extra prolog-attrs
+ dup encoding>> dup "UTF-16" =
+ [ drop ] [ name>encoding [ decode-input-if ] when* ] if
+ dup prolog-data set ;
+
+: instruct ( -- instruction )
+ take-name {
+ { [ dup "xml" = ] [ drop parse-prolog ] }
+ { [ dup >lower "xml" = ] [ capitalized-prolog ] }
+ { [ dup valid-name? not ] [ bad-name ] }
+ [ "?>" take-string append <instruction> ]
+ } cond ;
+
+: take-cdata ( -- string )
+ depth get zero? [ bad-cdata ] when
+ "[CDATA[" expect-string "]]>" take-string ;
+
+DEFER: make-tag ! Is this unavoidable?
+
+: expand-pe ( -- ) ; ! Make this run the contents of the pe within a DOCTYPE
+
+: (take-internal-subset) ( -- )
+ pass-blank get-char {
+ { CHAR: ] [ next ] }
+ { CHAR: % [ expand-pe ] }
+ { CHAR: < [
+ next make-tag dup dtd-acceptable?
+ [ bad-doctype ] unless , (take-internal-subset)
+ ] }
+ [ 1string bad-doctype ]
+ } case ;
+
+: take-internal-subset ( -- seq )
+ [
+ H{ } pe-table set
+ t in-dtd? set
+ (take-internal-subset)
+ ] { } make ;
+
+: nontrivial-doctype ( -- external-id internal-subset )
+ pass-blank get-char CHAR: [ = [
+ next take-internal-subset f swap close
+ ] [
+ " >" take-until-one-of {
+ { CHAR: \s [ (take-external-id) ] }
+ { CHAR: > [ only-blanks f ] }
+ } case f
+ ] if ;
+
+: take-doctype-decl ( -- doctype-decl )
+ pass-blank " >" take-until-one-of {
+ { CHAR: \s [ nontrivial-doctype ] }
+ { CHAR: > [ f f ] }
+ } case <doctype-decl> ;
+
+
+: take-directive ( -- directive )
+ take-name {
+ { "ELEMENT" [ take-element-decl ] }
+ { "ATTLIST" [ take-attlist-decl ] }
+ { "DOCTYPE" [ take-doctype-decl ] }
+ { "ENTITY" [ take-entity-decl ] }
+ { "NOTATION" [ take-notation-decl ] }
+ [ bad-directive ]
+ } case ;
+
+: direct ( -- object )
+ get-char {
+ { CHAR: - [ take-comment ] }
+ { CHAR: [ [ take-cdata ] }
+ [ drop take-directive ]
+ } case ;
+
+: make-tag ( -- tag )
+ {
+ { [ get-char dup CHAR: ! = ] [ drop next direct ] }
+ { [ CHAR: ? = ] [ next instruct ] }
+ [
+ start-tag [ dup add-ns pop-ns <closer> depth dec close ]
+ [ middle-tag end-tag ] if
+ ]
+ } cond ;
T{ bad-cdata f 1 3 } "<![CDATA[]]><x/>" xml-error-test
T{ bad-cdata f 1 7 } "<x/><![CDATA[]]>" xml-error-test
T{ pre/post-content f "&" t } "&32;<x/>" xml-error-test
+T{ bad-doctype f 1 17 "a" } "<!DOCTYPE foo [ a ]><x/>" xml-error-test
+T{ bad-doctype f 1 22 T{ opener { name T{ name f "" "foo" "" } } { attrs T{ attrs } } } } "<!DOCTYPE foo [ <foo> ]><x/>" xml-error-test
! See http://factorcode.org/license.txt for BSD license.
USING: xml.data xml.writer kernel generic io prettyprint math
debugger sequences xml.state accessors summary
-namespaces io.streams.string xml.backend ;
+namespaces io.streams.string xml.backend xml.writer.private ;
IN: xml.errors
TUPLE: parsing-error line column ;
"Not enough characters" print
] with-string-writer ;
+TUPLE: bad-doctype < parsing-error contents ;
+: bad-doctype ( contents -- * )
+ \ bad-doctype parsing-error swap >>contents throw ;
+M: bad-doctype summary
+ call-next-method "\nDTD contains invalid object" append ;
+
UNION: xml-parse-error
multitags notags extra-attrs nonexist-ns bad-decl
not-yes/no unclosed mismatched expected no-entity
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: kernel namespaces accessors xml.tokenize xml.data assocs
+xml.errors xml.char-classes combinators.short-circuit splitting
+fry xml.state sequences ;
+IN: xml.name
+
+! XML namespace processing: ns = namespace
+
+! A stack of hashtables
+SYMBOL: ns-stack
+
+: attrs>ns ( attrs-alist -- hash )
+ ! this should check to make sure URIs are valid
+ [
+ [
+ swap dup space>> "xmlns" =
+ [ main>> set ]
+ [
+ T{ name f "" "xmlns" f } names-match?
+ [ "" set ] [ drop ] if
+ ] if
+ ] assoc-each
+ ] { } make-assoc f like ;
+
+: add-ns ( name -- )
+ dup space>> dup ns-stack get assoc-stack
+ [ nip ] [ nonexist-ns ] if* >>url drop ;
+
+: push-ns ( hash -- )
+ ns-stack get push ;
+
+: pop-ns ( -- )
+ ns-stack get pop* ;
+
+: init-ns-stack ( -- )
+ V{ H{
+ { "xml" "http://www.w3.org/XML/1998/namespace" }
+ { "xmlns" "http://www.w3.org/2000/xmlns" }
+ { "" "" }
+ } } clone
+ ns-stack set ;
+
+: tag-ns ( name attrs-alist -- name attrs )
+ dup attrs>ns push-ns
+ [ dup add-ns ] dip dup [ drop add-ns ] assoc-each <attrs> ;
+
+: valid-name? ( str -- ? )
+ [ f ] [
+ version=1.0? swap {
+ [ first name-start? ]
+ [ rest-slice [ name-char? ] with all? ]
+ } 2&&
+ ] if-empty ;
+
+: prefixed-name ( str -- name/f )
+ ":" split dup length 2 = [
+ [ [ valid-name? ] all? ]
+ [ first2 f <name> ] bi and
+ ] [ drop f ] if ;
+
+: interpret-name ( str -- name )
+ dup prefixed-name [ ] [
+ dup valid-name?
+ [ <simple-name> ] [ bad-name ] if
+ ] ?if ;
+
+: take-name ( -- string )
+ version=1.0? '[ _ get-char name-char? not ] take-until ;
+
+: parse-name ( -- name )
+ take-name interpret-name ;
+
+: parse-name-starting ( string -- name )
+ take-name append interpret-name ;
+
--- /dev/null
+Daniel Ehrenberg
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors kernel namespaces ;
+USING: accessors kernel namespaces io ;
IN: xml.state
TUPLE: spot char line column next check ;
IN: xml.test.state
: string-parse ( str quot -- )
- [ <string-reader> ] dip state-parse ;
+ [ <string-reader> ] dip with-state ;
: take-rest ( -- string )
[ f ] take-until ;
IN: xml.tests
USING: kernel xml tools.test io namespaces make sequences
xml.errors xml.entities.html parser strings xml.data io.files
-xml.writer xml.utilities continuations assocs
+xml.utilities continuations assocs
sequences.deep accessors io.streams.string ;
! This is insufficient
\ read-xml must-infer
+[ [ drop ] sax ] must-infer
+\ string>xml must-infer
SYMBOL: xml-file
[ ] [ "resource:basis/xml/tests/test.xml"
] unit-test
[ V{ "fa&g" } ] [ xml-file get "x" get-id children>> ] unit-test
[ "that" ] [ xml-file get "this" swap at ] unit-test
-[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><a b=\"c\"/>" ]
- [ "<a b='c'/>" string>xml xml>string ] unit-test
[ "abcd" ] [
"<main>a<sub>bc</sub>d<nothing/></main>" string>xml
[ [ dup string? [ % ] [ drop ] if ] deep-each ] "" make
at swap "z" [ tuck ] dip swap set-at
T{ name f "blah" "z" f } swap at ] unit-test
[ "foo" ] [ "<boo><![CDATA[foo]]></boo>" string>xml children>string ] unit-test
-[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><foo>bar baz</foo>" ]
-[ "<foo>bar</foo>" string>xml [ " baz" append ] map xml>string ] unit-test
-[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<foo>\n bar\n</foo>" ]
-[ "<foo> bar </foo>" string>xml pprint-xml>string ] unit-test
[ "<!-- B+, B, or B--->" string>xml ] must-fail
[ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk first ] unit-test
-[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk [ write-xml-chunk ] with-string-writer = ] unit-test
-[ "foo" ] [ "<!ENTITY bar 'foo'><x>&bar;</x>" string>xml children>string ] unit-test
-[ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test
[ 958 ] [ [ "ξ" string>xml-chunk ] with-html-entities first first ] unit-test
[ "x" "<" ] [ "<x value='<'/>" string>xml [ name>> main>> ] [ "value" swap at ] bi ] unit-test
-! Copyright (C) 2005, 2006 Daniel Ehrenberg
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays ascii assocs combinators locals
-combinators.short-circuit fry io.encodings io.encodings.iana
-io.encodings.string io.encodings.utf16 io.encodings.utf8 kernel make
-math math.parser namespaces sequences sets splitting xml.state
-strings xml.char-classes xml.data xml.entities xml.errors hashtables
-circular io sbufs ;
+USING: namespaces xml.state kernel sequences accessors
+xml.char-classes xml.errors math io sbufs fry strings ascii
+circular xml.entities assocs make splitting math.parser
+locals combinators arrays ;
IN: xml.tokenize
-! Originally from state-parser
-
SYMBOL: prolog-data
+SYMBOL: depth
+
: version=1.0? ( -- ? )
prolog-data get [ version>> "1.0" = ] [ t ] if* ;
#! Increment spot.
get-char [ unexpected-end ] unless (next) record ;
+: init-parser ( -- )
+ 0 1 0 f f <spot> spot set
+ read1 set-next next ;
+
+: with-state ( stream quot -- )
+ ! with-input-stream implicitly creates a new scope which we use
+ swap [ init-parser call ] with-input-stream ; inline
+
: skip-until ( quot: ( -- ? ) -- )
get-char [
[ call ] keep swap [ drop ] [
dup [ get-char next ] replicate 2dup =
[ 2drop ] [ expected ] if ;
-: init-parser ( -- )
- 0 1 0 f f <spot> spot set
- read1 set-next next ;
-
-: state-parse ( stream quot -- )
- ! with-input-stream implicitly creates a new scope which we use
- swap [ init-parser call ] with-input-stream ; inline
-
-! XML namespace processing: ns = namespace
-
-! A stack of hashtables
-SYMBOL: ns-stack
-
-SYMBOL: depth
-
-: attrs>ns ( attrs-alist -- hash )
- ! this should check to make sure URIs are valid
- [
- [
- swap dup space>> "xmlns" =
- [ main>> set ]
- [
- T{ name f "" "xmlns" f } names-match?
- [ "" set ] [ drop ] if
- ] if
- ] assoc-each
- ] { } make-assoc f like ;
-
-: add-ns ( name -- )
- dup space>> dup ns-stack get assoc-stack
- [ nip ] [ nonexist-ns ] if* >>url drop ;
-
-: push-ns ( hash -- )
- ns-stack get push ;
-
-: pop-ns ( -- )
- ns-stack get pop* ;
-
-: init-ns-stack ( -- )
- V{ H{
- { "xml" "http://www.w3.org/XML/1998/namespace" }
- { "xmlns" "http://www.w3.org/2000/xmlns" }
- { "" "" }
- } } clone
- ns-stack set ;
-
-: tag-ns ( name attrs-alist -- name attrs )
- dup attrs>ns push-ns
- [ dup add-ns ] dip dup [ drop add-ns ] assoc-each <attrs> ;
-
-! Parsing names
-
-: valid-name? ( str -- ? )
- [ f ] [
- version=1.0? swap {
- [ first name-start? ]
- [ rest-slice [ name-char? ] with all? ]
- } 2&&
- ] if-empty ;
-
-: prefixed-name ( str -- name/f )
- ":" split dup length 2 = [
- [ [ valid-name? ] all? ]
- [ first2 f <name> ] bi and
- ] [ drop f ] if ;
-
-: interpret-name ( str -- name )
- dup prefixed-name [ ] [
- dup valid-name?
- [ <simple-name> ] [ bad-name ] if
- ] ?if ;
-
-: take-name ( -- string )
- version=1.0? '[ _ get-char name-char? not ] take-until ;
-
-: parse-name ( -- name )
- take-name interpret-name ;
-
-: parse-name-starting ( string -- name )
- take-name append interpret-name ;
-
-! -- Parsing strings
-
: parse-named-entity ( string -- )
dup entities at [ , ] [
dup extra-entities get at
char CHAR: < =
] parse-char ;
-! Parsing tags
-
-: start-tag ( -- name ? )
- #! Outputs the name and whether this is a closing tag
- get-char CHAR: / = dup [ next ] when
- parse-name swap ;
+: close ( -- )
+ pass-blank CHAR: > expect ;
: normalize-quote ( str -- str )
[ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ;
: parse-quote ( -- seq )
f parse-quote* ;
-: parse-attr ( -- )
- parse-name pass-blank CHAR: = expect pass-blank
- t parse-quote* 2array , ;
-
-: (middle-tag) ( -- )
- pass-blank version=1.0? get-char name-start?
- [ parse-attr (middle-tag) ] when ;
-
-: assure-no-duplicates ( attrs-alist -- attrs-alist )
- H{ } clone 2dup '[ swap _ push-at ] assoc-each
- [ nip length 2 >= ] assoc-filter >alist
- [ first first2 duplicate-attr ] unless-empty ;
-
-: middle-tag ( -- attrs-alist )
- ! f make will make a vector if it has any elements
- [ (middle-tag) ] f make pass-blank
- assure-no-duplicates ;
-
-: close ( -- )
- pass-blank CHAR: > expect ;
-
-: end-tag ( name attrs-alist -- tag )
- tag-ns pass-blank get-char CHAR: / =
- [ pop-ns <contained> next CHAR: > expect ]
- [ depth inc <opener> close ] if ;
-
-: take-comment ( -- comment )
- "--" expect-string
- "--" take-string
- <comment>
- CHAR: > expect ;
-
-: take-cdata ( -- string )
- depth get zero? [ bad-cdata ] when
- "[CDATA[" expect-string "]]>" take-string ;
-
-: take-word ( -- string )
- [ get-char blank? ] take-until ;
-
-: take-decl-contents ( -- first second )
- pass-blank take-word pass-blank ">" take-string ;
-
-: take-element-decl ( -- element-decl )
- take-decl-contents <element-decl> ;
-
-: take-attlist-decl ( -- attlist-decl )
- take-decl-contents <attlist-decl> ;
-
-: take-notation-decl ( -- notation-decl )
- take-decl-contents <notation-decl> ;
-
-: take-until-one-of ( seps -- str sep )
- '[ get-char _ member? ] take-until get-char ;
-
-: take-system-id ( -- system-id )
- parse-quote <system-id> close ;
-
-: take-public-id ( -- public-id )
- parse-quote parse-quote <public-id> close ;
-
-DEFER: direct
-
-: (take-internal-subset) ( -- )
- pass-blank get-char {
- { CHAR: ] [ next ] }
- [ drop "<!" expect-string direct , (take-internal-subset) ]
- } case ;
-
-: take-internal-subset ( -- seq )
- [
- H{ } pe-table set
- t in-dtd? set
- (take-internal-subset)
- ] { } make ;
-
-: (take-external-id) ( token -- external-id )
- pass-blank {
- { "SYSTEM" [ take-system-id ] }
- { "PUBLIC" [ take-public-id ] }
- [ bad-external-id ]
- } case ;
-
-: take-external-id ( -- external-id )
- take-word (take-external-id) ;
-
-: only-blanks ( str -- )
- [ blank? ] all? [ bad-decl ] unless ;
-
-: nontrivial-doctype ( -- external-id internal-subset )
- pass-blank get-char CHAR: [ = [
- next take-internal-subset f swap close
- ] [
- " >" take-until-one-of {
- { CHAR: \s [ (take-external-id) ] }
- { CHAR: > [ only-blanks f ] }
- } case f
- ] if ;
-
-: take-doctype-decl ( -- doctype-decl )
- pass-blank " >" take-until-one-of {
- { CHAR: \s [ nontrivial-doctype ] }
- { CHAR: > [ f f ] }
- } case <doctype-decl> ;
-
-: take-entity-def ( var -- entity-name entity-def )
- [
- take-word pass-blank get-char {
- { CHAR: ' [ parse-quote ] }
- { CHAR: " [ parse-quote ] }
- [ drop take-external-id ]
- } case swap
- ] dip [ [ ?set-at ] change ] 2keep swap ;
-
-: take-entity-decl ( -- entity-decl )
- pass-blank get-char {
- { CHAR: % [ next pass-blank pe-table take-entity-def ] }
- [ drop extra-entities take-entity-def ]
- } case
- close <entity-decl> ;
-
-: take-directive ( -- directive )
- take-name {
- { "ELEMENT" [ take-element-decl ] }
- { "ATTLIST" [ take-attlist-decl ] }
- { "DOCTYPE" [ take-doctype-decl ] }
- { "ENTITY" [ take-entity-decl ] }
- { "NOTATION" [ take-notation-decl ] }
- [ bad-directive ]
- } case ;
-
-: direct ( -- object )
- get-char {
- { CHAR: - [ take-comment ] }
- { CHAR: [ [ take-cdata ] }
- [ drop take-directive ]
- } case ;
-
-: assure-no-extra ( seq -- )
- [ first ] map {
- T{ name f "" "version" f }
- T{ name f "" "encoding" f }
- T{ name f "" "standalone" f }
- } diff
- [ extra-attrs ] unless-empty ;
-
-: good-version ( version -- version )
- dup { "1.0" "1.1" } member? [ bad-version ] unless ;
-
-: prolog-version ( alist -- version )
- T{ name f "" "version" f } swap at
- [ good-version ] [ versionless-prolog ] if* ;
-
-: prolog-encoding ( alist -- encoding )
- T{ name f "" "encoding" f } swap at "UTF-8" or ;
-
-: yes/no>bool ( string -- t/f )
- {
- { "yes" [ t ] }
- { "no" [ f ] }
- [ not-yes/no ]
- } case ;
-
-: prolog-standalone ( alist -- version )
- T{ name f "" "standalone" f } swap at
- [ yes/no>bool ] [ f ] if* ;
-
-: prolog-attrs ( alist -- prolog )
- [ prolog-version ]
- [ prolog-encoding ]
- [ prolog-standalone ]
- tri <prolog> ;
-
-SYMBOL: string-input?
-: decode-input-if ( encoding -- )
- string-input? get [ drop ] [ decode-input ] if ;
-
-: parse-prolog ( -- prolog )
- pass-blank middle-tag "?>" expect-string
- dup assure-no-extra prolog-attrs
- dup encoding>> dup "UTF-16" =
- [ drop ] [ name>encoding [ decode-input-if ] when* ] if
- dup prolog-data set ;
-
-: instruct ( -- instruction )
- take-name {
- { [ dup "xml" = ] [ drop parse-prolog ] }
- { [ dup >lower "xml" = ] [ capitalized-prolog ] }
- { [ dup valid-name? not ] [ bad-name ] }
- [ "?>" take-string append <instruction> ]
- } cond ;
-
-: make-tag ( -- tag )
- {
- { [ get-char dup CHAR: ! = ] [ drop next direct ] }
- { [ CHAR: ? = ] [ next instruct ] }
- [
- start-tag [ dup add-ns pop-ns <closer> depth dec close ]
- [ middle-tag end-tag ] if
- ]
- } cond ;
-
-! Autodetecting encodings
-
-: continue-make-tag ( str -- tag )
- parse-name-starting middle-tag end-tag ;
-
-: start-utf16le ( -- tag )
- utf16le decode-input-if
- CHAR: ? expect
- 0 expect check instruct ;
-
-: 10xxxxxx? ( ch -- ? )
- -6 shift 3 bitand 2 = ;
-
-: start<name ( ch -- tag )
- ascii?
- [ utf8 decode-input-if next make-tag ] [
- next
- [ get-next 10xxxxxx? not ] take-until
- get-char suffix utf8 decode
- utf8 decode-input-if next
- continue-make-tag
- ] if ;
-
-: start< ( -- tag )
- get-next {
- { 0 [ next next start-utf16le ] }
- { CHAR: ? [ check next next instruct ] } ! XML prolog parsing sets the encoding
- { CHAR: ! [ check utf8 decode-input next next direct ] }
- [ check start<name ]
- } case ;
-
-: skip-utf8-bom ( -- tag )
- "\u0000bb\u0000bf" expect utf8 decode-input
- CHAR: < expect check make-tag ;
-
-: decode-expecting ( encoding string -- tag )
- [ decode-input-if next ] [ expect-string ] bi* check make-tag ;
-
-: start-utf16be ( -- tag )
- utf16be "<" decode-expecting ;
-
-: skip-utf16le-bom ( -- tag )
- utf16le "\u0000fe<" decode-expecting ;
-
-: skip-utf16be-bom ( -- tag )
- utf16be "\u0000ff<" decode-expecting ;
-
-: start-document ( -- tag )
- get-char {
- { CHAR: < [ start< ] }
- { 0 [ start-utf16be ] }
- { HEX: EF [ skip-utf8-bom ] }
- { HEX: FF [ skip-utf16le-bom ] }
- { HEX: FE [ skip-utf16be-bom ] }
- { f [ "" ] }
- [ drop utf8 decode-input-if f ]
- ! Same problem as with <e`>, in the case of XML chunks?
- } case check ;
"These words are used to print XML normally"
{ $subsection xml>string }
{ $subsection write-xml }
- { $subsection print-xml }
"These words are used to prettyprint XML"
{ $subsection pprint-xml>string }
{ $subsection pprint-xml>string-but }
{ $description "prints the contents of an XML document to " { $link output-stream } "." }
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
-HELP: print-xml
-{ $values { "xml" "an XML document" } }
-{ $description "prints the contents of an XML document to " { $link output-stream } ", followed by a newline" }
-{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
-
HELP: pprint-xml
{ $values { "xml" "an XML document" } }
{ $description "prints the contents of an XML document to " { $link output-stream } " in a prettyprinted form." }
{ $description "Prettyprints an XML document, returning the result as a string and leaving the whitespace of the tags with names in sensitive-tags intact." }
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
-{ xml>string print-xml write-xml pprint-xml pprint-xml>string pprint-xml>string-but pprint-xml-but } related-words
+{ xml>string write-xml pprint-xml pprint-xml>string pprint-xml>string-but pprint-xml-but } related-words
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: xml.data xml.writer tools.test fry xml kernel multiline
+xml.writer.private io.streams.string xml.utilities sequences ;
IN: xml.writer.tests
-USING: xml.data xml.writer tools.test ;
+
+\ write-xml must-infer
+\ xml>string must-infer
+\ pprint-xml must-infer
+\ pprint-xml-but must-infer
[ "foo" ] [ T{ name { main "foo" } } name>string ] unit-test
+[ "foo" ] [ T{ name { space "" } { main "foo" } } name>string ] unit-test
[ "ns:foo" ] [ T{ name { space "ns" } { main "foo" } } name>string ] unit-test
+
+: reprints-as ( to from -- )
+ [ '[ _ ] ] [ '[ _ string>xml xml>string ] ] bi* unit-test ;
+
+: pprint-reprints-as ( to from -- )
+ [ '[ _ ] ] [ '[ _ string>xml pprint-xml>string ] ] bi* unit-test ;
+
+: reprints-same ( string -- ) dup reprints-as ;
+
+"<?xml version=\"1.0\" encoding=\"UTF-8\"?><x/>" reprints-same
+
+{" <?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE foo [<!ENTITY foo "bar">]>
+<x>bar</x> "}
+{" <?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE foo [<!ENTITY foo 'bar'>]>
+<x>&foo;</x> "} reprints-as
+
+{" <?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE foo [
+ <!ENTITY foo "bar">
+ <!ELEMENT br EMPTY>
+ <!ATTLIST list type (bullets|ordered|glossary) "ordered">
+ <!NOTATION foo bar>
+ <?baz bing bang bong?>
+ <!--wtf-->
+]>
+<x>
+ bar
+</x>"}
+{" <?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE foo [ <!ENTITY foo 'bar'> <!ELEMENT br EMPTY>
+<!ATTLIST list
+ type (bullets|ordered|glossary) "ordered">
+<!NOTATION foo bar> <?baz bing bang bong?>
+ <!--wtf-->
+]>
+<x>&foo;</x>"} pprint-reprints-as
+
+[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk xml-chunk>string = ] unit-test
+[ "foo" ] [ "<!ENTITY bar 'foo'><x>&bar;</x>" string>xml children>string ] unit-test
+[ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test
+[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><a b=\"c\"/>" ]
+ [ "<a b='c'/>" string>xml xml>string ] unit-test
+[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><foo>bar baz</foo>" ]
+[ "<foo>bar</foo>" string>xml [ " baz" append ] map xml>string ] unit-test
+[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<foo>\n bar\n</foo>" ]
+[ "<foo> bar </foo>" string>xml pprint-xml>string ] unit-test
-! Copyright (C) 2005, 2006 Daniel Ehrenberg\r
+! Copyright (C) 2005, 2009 Daniel Ehrenberg\r
! See http://factorcode.org/license.txt for BSD license.\r
USING: hashtables kernel math namespaces sequences strings\r
assocs combinators io io.streams.string accessors\r
SYMBOL: indenter\r
" " indenter set-global\r
\r
+<PRIVATE\r
+\r
: sensitive? ( tag -- ? )\r
sensitive-tags get swap '[ _ names-match? ] contains? ;\r
\r
: name>string ( name -- string )\r
[ main>> ] [ space>> ] bi [ ":" rot 3append ] unless-empty ;\r
\r
+PRIVATE>\r
+\r
: print-name ( name -- )\r
name>string write ;\r
\r
+<PRIVATE\r
+\r
: print-attrs ( assoc -- )\r
[\r
" " write\r
"\"" write\r
] assoc-each ;\r
\r
+PRIVATE>\r
+\r
GENERIC: write-xml-chunk ( object -- )\r
\r
+<PRIVATE\r
+\r
M: string write-xml-chunk\r
- escape-string dup empty? not xml-pprint? get and\r
- [ nl 80 indent-string indented-break ] when write ;\r
+ escape-string xml-pprint? get [\r
+ dup [ blank? ] all?\r
+ [ drop "" ]\r
+ [ nl 80 indent-string indented-break ] if\r
+ ] when write ;\r
\r
: write-tag ( tag -- )\r
?indent CHAR: < write1\r
[ att-defs>> write ">" write ]\r
bi ;\r
\r
-M: entity-decl write-xml-chunk\r
- "<!ENTITY " write\r
+M: notation-decl write-xml-chunk\r
+ "<!NOTATION " write\r
[ name>> write " " write ]\r
- [ def>> write-xml-chunk ">" write ]\r
+ [ id>> write ">" write ]\r
bi ;\r
\r
+M: entity-decl write-xml-chunk\r
+ "<!ENTITY " write\r
+ [ pe?>> [ " % " write ] when ]\r
+ [ name>> write " \"" write ] [\r
+ def>> f xml-pprint?\r
+ [ write-xml-chunk ] with-variable\r
+ "\">" write\r
+ ] tri ;\r
+\r
M: system-id write-xml-chunk\r
"SYSTEM '" write system-literal>> write "'" write ;\r
\r
[ pubid-literal>> write "' '" write ]\r
[ system-literal>> write "'" write ] bi ;\r
\r
+: write-internal-subset ( seq -- )\r
+ [\r
+ "[" write indent\r
+ [ ?indent write-xml-chunk ] each\r
+ unindent ?indent "]" write\r
+ ] when* ;\r
+\r
M: doctype-decl write-xml-chunk\r
- "<!DOCTYPE " write\r
+ ?indent "<!DOCTYPE " write\r
[ name>> write " " write ]\r
[ external-id>> [ write-xml-chunk " " write ] when* ]\r
- [\r
- internal-subset>>\r
- [ "[" write [ write-xml-chunk ] each "]" write ] when* ">" write\r
- ] tri ;\r
+ [ internal-subset>> write-internal-subset ">" write ] tri ;\r
\r
M: directive write-xml-chunk\r
- "<!" write text>> write CHAR: > write1 ;\r
+ "<!" write text>> write CHAR: > write1 nl ;\r
\r
M: instruction write-xml-chunk\r
"<?" write text>> write "?>" write ;\r
standalone>> [ "\" standalone=\"yes" write ] when\r
"\"?>" write ;\r
\r
+PRIVATE>\r
+\r
: write-xml ( xml -- )\r
{\r
[ prolog>> write-prolog ]\r
M: xml write-xml-chunk\r
body>> write-xml-chunk ;\r
\r
-: print-xml ( xml -- )\r
- write-xml nl ;\r
-\r
: xml>string ( xml -- string )\r
[ write-xml ] with-string-writer ;\r
\r
: xml-chunk>string ( object -- string )\r
[ write-xml-chunk ] with-string-writer ;\r
\r
-: with-xml-pprint ( sensitive-tags quot -- )\r
+: pprint-xml-but ( xml sensitive-tags -- )\r
[\r
- swap [ assure-name ] map sensitive-tags set\r
+ [ assure-name ] map sensitive-tags set\r
0 indentation set\r
xml-pprint? on\r
- call\r
- ] with-scope ; inline\r
-\r
-: pprint-xml-but ( xml sensitive-tags -- )\r
- [ print-xml ] with-xml-pprint ;\r
+ write-xml\r
+ ] with-scope ;\r
\r
: pprint-xml ( xml -- )\r
f pprint-xml-but ;\r
\r
: pprint-xml>string-but ( xml sensitive-tags -- string )\r
- [ xml>string ] with-xml-pprint ;\r
+ [ pprint-xml-but ] with-string-writer ;\r
\r
: pprint-xml>string ( xml -- string )\r
f pprint-xml>string-but ;\r
-! Copyright (C) 2005, 2006 Daniel Ehrenberg
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays io io.encodings.binary io.files
io.streams.string kernel namespaces sequences strings
-xml.backend xml.data xml.errors xml.tokenize ascii xml.entities
-xml.writer xml.state assocs ;
+xml.backend xml.data xml.errors xml.elements ascii xml.entities
+xml.writer xml.state xml.autoencoding assocs xml.tokenize xml.name ;
IN: xml
! -- Overall parser with data tree
reset-prolog init-ns-stack
start-document [ call-under ] when*
sax-loop
- ] state-parse ; inline recursive
+ ] with-state ; inline recursive
: (read-xml) ( -- )
start-document [ process ] when*
done? [ unclosed ] unless
xml-stack get first second
prolog-data get swap
- ] state-parse ;
+ ] with-state ;
: read-xml ( stream -- xml )
0 depth
-! Copyright (C) 2005, 2008 Slava Pestov.
+! Copyright (C) 2005, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: arrays byte-arrays kernel kernel.private math namespaces
make sequences strings words effects generic generic.standard
classes classes.algebra slots.private combinators accessors
-words sequences.private assocs alien quotations ;
+words sequences.private assocs alien quotations hashtables ;
IN: slots
TUPLE: slot-spec name offset class initial read-only ;
] [ ] make ;
: writer-props ( slot-spec -- assoc )
- [ "writing" set ] H{ } make-assoc ;
+ "writing" associate ;
: define-writer ( class slot-spec -- )
[ name>> writer-word ] [ writer-quot ] [ writer-props ] tri
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays html.parser.utils hashtables io kernel
namespaces make prettyprint quotations sequences splitting
-state-parser strings unicode.categories unicode.case ;
+html.parser.state strings unicode.categories unicode.case ;
IN: html.parser
TUPLE: tag name attributes text closing? ;
[ get-char CHAR: " = ] take-until ;
: read-quote ( -- string )
- get-char next* CHAR: ' =
- [ read-single-quote ] [ read-double-quote ] if next* ;
+ get-char next CHAR: ' =
+ [ read-single-quote ] [ read-double-quote ] if next ;
: read-key ( -- string )
read-whitespace*
: read-= ( -- )
read-whitespace*
- [ get-char CHAR: = = ] take-until drop next* ;
+ [ get-char CHAR: = = ] take-until drop next ;
: read-value ( -- string )
read-whitespace*
[ blank? ] trim ;
: read-comment ( -- )
- "-->" take-string* make-comment-tag push-tag ;
+ "-->" take-string make-comment-tag push-tag ;
: read-dtd ( -- )
- ">" take-string* make-dtd-tag push-tag ;
+ ">" take-string make-dtd-tag push-tag ;
: read-bang ( -- )
- next* get-char CHAR: - = get-next CHAR: - = and [
- next* next*
+ next get-char CHAR: - = get-next CHAR: - = and [
+ next next
read-comment
] [
read-dtd
: read-tag ( -- string )
[ get-char CHAR: > = get-char CHAR: < = or ] take-until
- get-char CHAR: < = [ next* ] unless ;
+ get-char CHAR: < = [ next ] unless ;
: read-< ( -- string )
- next* get-char CHAR: ! = [
+ next get-char CHAR: ! = [
read-bang f
] [
read-tag
--- /dev/null
+USING: tools.test html.parser.state ascii kernel ;
+IN: html.parser.state.tests
+
+: take-rest ( -- string )
+ [ f ] take-until ;
+
+: take-char ( -- string )
+ [ get-char = ] curry take-until ;
+
+[ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test
+[ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test
+[ "foo" ";bar" ] [ "foo;bar" [ CHAR: ; take-char take-rest ] string-parse ] unit-test
+! [ "foo " " bar" ] [ "foo and bar" [ "and" take-string take-rest ] string-parse ] unit-test
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: namespaces math kernel sequences accessors fry circular ;
+IN: html.parser.state
+
+TUPLE: state string i ;
+
+: get-i ( -- i ) state get i>> ;
+
+: get-char ( -- char )
+ state get [ i>> ] [ string>> ] bi ?nth ;
+
+: get-next ( -- char )
+ state get [ i>> 1+ ] [ string>> ] bi ?nth ;
+
+: next ( -- )
+ state get [ 1+ ] change-i drop ;
+
+: string-parse ( string quot -- )
+ [ 0 state boa state ] dip with-variable ;
+
+: short* ( n seq -- n' seq )
+ over [ nip dup length swap ] unless ;
+
+: skip-until ( quot: ( -- ? ) -- )
+ get-char [
+ [ call ] keep swap
+ [ drop ] [ next skip-until ] if
+ ] [ drop ] if ; inline recursive
+
+: take-until ( quot: ( -- ? ) -- )
+ [ get-i ] dip skip-until get-i
+ state get string>> subseq ;
+
+: string-matches? ( string circular -- ? )
+ get-char over push-circular sequence= ;
+
+: take-string ( match -- string )
+ dup length <circular-string>
+ [ 2dup string-matches? ] take-until nip
+ dup length rot length 1- - head next ;
USING: assocs combinators continuations hashtables
hashtables.private io kernel math
namespaces prettyprint quotations sequences splitting
-state-parser strings tools.test ;
+strings tools.test ;
USING: html.parser.utils ;
IN: html.parser.utils.tests
! See http://factorcode.org/license.txt for BSD license.
USING: assocs circular combinators continuations hashtables
hashtables.private io kernel math namespaces prettyprint
-quotations sequences splitting state-parser strings
+quotations sequences splitting html.parser.state strings
combinators.short-circuit ;
IN: html.parser.utils
: string-parse-end? ( -- ? ) get-next not ;
-: take-string* ( match -- string )
- dup length <circular-string>
- [ 2dup string-matches? ] take-until nip
- dup length rot length 1- - head next* ;
-
: trim1 ( seq ch -- newseq )
[ [ ?head-slice drop ] [ ?tail-slice drop ] bi ] 2keep drop like ;
void init_parameters_from_args(F_PARAMETERS *p, int argc, F_CHAR **argv)
{
default_parameters(p);
- const F_CHAR *executable_path = vm_executable_path();
- p->executable_path = executable_path ? executable_path : argv[0];
+ p->executable_path = argv[0];
int i = 0;
/* OS-specific initialization */
early_init();
+ const F_CHAR *executable_path = vm_executable_path();
+
+ if(executable_path)
+ p->executable_path = executable_path;
+
if(p->image_path == NULL)
p->image_path = default_image_path();