+++ /dev/null
-USING: arrays errors generic hashtables io kernel math
-namespaces parser prettyprint sequences strings vectors words ;
-IN: xml
-
-SYMBOL: code #! Source code
-SYMBOL: spot #! Current index of string
-SYMBOL: version
-SYMBOL: line
-SYMBOL: column
-
-! -- Error reporting
-
-TUPLE: xml-error line column ;
-C: xml-error ( -- xml-error )
- [ line get swap set-xml-error-line ] keep
- [ column get swap set-xml-error-column ] keep ;
-
-: xml-error. ( xml-error -- )
- "XML error" print
- "Line: " write dup xml-error-line .
- "Column: " write xml-error-column . ;
-
-TUPLE: expected should-be was ;
-C: expected ( should-be was -- error )
- [ <xml-error> swap set-delegate ] keep
- [ set-expected-was ] keep
- [ set-expected-should-be ] keep ;
-
-M: expected error.
- dup xml-error.
- "Token expected: " write dup expected-should-be print
- "Token present: " write expected-was print ;
-
-TUPLE: no-entity thing ;
-C: no-entity ( string -- entitiy )
- [ <xml-error> swap set-delegate ] keep
- [ set-no-entity-thing ] keep ;
-
-M: no-entity error.
- dup xml-error.
- "Entity does not exist: &" write no-entity-thing write ";" print ;
-
-TUPLE: xml-string-error string ;
-C: xml-string-error ( string -- xml-string-error )
- [ set-xml-string-error-string ] keep
- [ <xml-error> swap set-delegate ] keep ;
-
-M: xml-string-error error.
- dup xml-error.
- xml-string-error-string print ;
-
-! -- Basic utility words
-
-: set-code ( string -- ) ! for debugging
- code set [ spot line column ] [ 0 swap set ] each ;
-
-: more? ( -- ? )
- #! Return t if spot is not at the end of code
- code get length spot get = not ;
-
-: char ( -- char/f )
- more? [ spot get code get nth ] [ f ] if ;
-
-: incr-spot ( -- )
- #! Increment spot.
- spot inc
- char "\n\r" member? [ 0 column set line ] [ column ] if
- inc ;
-
-: skip-until ( quot -- )
- #! quot: ( char -- ? )
- more? [
- char swap [ call ] keep swap [ drop ] [
- incr-spot skip-until
- ] if
- ] [ drop ] if ; inline
-
-: take-until ( quot -- string | quot: char -- ? )
- #! Take the substring of a string starting at spot
- #! from code until the quotation given is true and
- #! advance spot to after the substring.
- spot get >r skip-until r>
- spot get code get subseq ; inline
-
-: pass-blank ( -- )
- #! Advance code past any whitespace, including newlines
- [ blank? not ] skip-until ;
-
-: string-matches? ( string -- ? )
- spot get dup pick length + code get subseq = ;
-
-: (take-until-string) ( string -- n )
- more? [
- dup string-matches? [
- drop spot get
- ] [
- incr-spot (take-until-string)
- ] if
- ] [ "Missing closing token" <xml-string-error> throw ] if ;
-
-: take-until-string ( string -- string )
- [ >r spot get r> (take-until-string) code get subseq ] keep
- length spot [ + ] change ;
-
-! -- Parsing strings
-
-: expect ( ch -- )
- char 2dup = [ 2drop ] [
- >r ch>string r> ch>string <expected> throw
- ] if incr-spot ;
-
-: expect-string ( string -- )
- >r spot get r> t over [ char incr-spot = and ] each [
- 2drop
- ] [
- swap spot get code get subseq <expected> throw
- ] if ;
-
-: entities
- #! We have both directions here as a shortcut.
- H{
- { "lt" CHAR: < }
- { "gt" CHAR: > }
- { "amp" CHAR: & }
- { "apos" CHAR: ' }
- { "quot" CHAR: " }
- { CHAR: < "<" }
- { CHAR: > ">" }
- { CHAR: & "&" }
- { CHAR: ' "'" }
- { CHAR: " """ }
- } ;
-
-: parse-entity ( -- ch )
- incr-spot [ CHAR: ; = ] take-until "#" ?head [
- "x" ?head 16 10 ? base>
- ] [
- dup entities hash [ ] [ <no-entity> throw ] ?if
- ] if ;
-
-: parsed-ch ( buf ch -- buf ) over push incr-spot ;
-
-: (parse-text) ( buf -- buf )
- {
- { [ more? not ] [ ] }
- { [ char CHAR: < = ] [ ] }
- { [ char CHAR: & = ] [ parse-entity parsed-ch (parse-text) ] }
- { [ t ] [ char parsed-ch (parse-text) ] }
- } cond ;
-
-: parse-text ( -- string )
- SBUF" " clone (parse-text) >string ;
-
-! -- Parsing tags
-
-: in-range-seq? ( number seq -- ? )
- #! seq: { { min max } { min max }* }
- [ first2 between? ] contains-with? ;
-
-: name-start-char? ( ch -- ? )
- {
- { CHAR: : CHAR: : }
- { CHAR: _ CHAR: _ }
- { CHAR: A CHAR: Z }
- { CHAR: a CHAR: z }
- { HEX: C0 HEX: D6 }
- { HEX: D8 HEX: F6 }
- { HEX: F8 HEX: 2FF }
- { HEX: 370 HEX: 37D }
- { HEX: 37F HEX: 1FFF }
- { HEX: 200C HEX: 200D }
- { HEX: 2070 HEX: 218F }
- { HEX: 2C00 HEX: 2FEF }
- { HEX: 3001 HEX: D7FF }
- { HEX: F900 HEX: FDCF }
- { HEX: FDF0 HEX: FFFD }
- { HEX: 10000 HEX: EFFFF }
- } in-range-seq? ;
-
-: name-char? ( ch -- ? )
- dup name-start-char? swap {
- { CHAR: - CHAR: - }
- { CHAR: . CHAR: . }
- { CHAR: 0 CHAR: 9 }
- { HEX: b7 HEX: b7 }
- { HEX: 300 HEX: 36F }
- { HEX: 203F HEX: 2040 }
- } in-range-seq? or ;
-
-: parse-name ( -- name )
- char dup name-start-char? [
- incr-spot ch>string [ name-char? not ] take-until append
- ] [
- "Malformed name" <xml-string-error> throw
- ] if ;
-
-TUPLE: opener name props ;
-TUPLE: closer name ;
-TUPLE: contained name props ;
-TUPLE: comment text ;
-TUPLE: directive text ;
-
-: start-tag ( -- string ? )
- #! Outputs the name and whether this is a closing tag
- char CHAR: / = dup [ incr-spot ] when
- parse-name swap ;
-
-: (parse-quot) ( ch buf -- buf )
- {
- { [ more? not ] [ nip ] }
- { [ char pick = ] [ incr-spot nip ] }
- { [ char CHAR: & = ] [ parse-entity parsed-ch (parse-quot) ] }
- { [ t ] [ char parsed-ch (parse-quot) ] }
- } cond ;
-
-: parse-quot ( ch -- str )
- SBUF" " clone (parse-quot) >string ;
-
-: parse-prop-value ( -- str )
- char dup "'\"" member? [
- incr-spot parse-quot
- ] [
- "Attribute lacks quote" <xml-string-error> throw
- ] if ;
-
-: parse-prop ( -- seq )
- parse-name pass-blank CHAR: = expect pass-blank
- parse-prop-value 2array ;
-
-: (middle-tag) ( seq -- seq )
- pass-blank char name-char?
- [ parse-prop over push (middle-tag) ] when ;
-
-: middle-tag ( -- hash )
- V{ } clone (middle-tag) alist>hash pass-blank ;
-
-: end-tag ( string hash -- tag )
- pass-blank char CHAR: / =
- [ <contained> incr-spot ] [ <opener> ] if ;
-
-: skip-comment ( -- comment )
- "--" expect-string
- "--" take-until-string
- <comment>
- CHAR: > expect ;
-
-: cdata ( -- string )
- "[CDATA[" expect-string "]]>" take-until-string ;
-
-: directive ( -- object )
- {
- { [ "--" string-matches? ] [ skip-comment ] }
- { [ "[CDATA[" string-matches? ] [ cdata ] }
- { [ t ] [ ">" take-until-string <directive> ] }
- } cond ;
-
-: make-tag ( -- tag/f )
- CHAR: < expect
- char CHAR: ! = [
- incr-spot directive
- ] [
- start-tag [
- <closer>
- ] [
- middle-tag end-tag
- ] if pass-blank CHAR: > expect
- ] if ;
-
-! -- Overall
-
-: get-version ( -- )
- "<?" string-matches? [
- "<?xml" expect-string
- pass-blank middle-tag "?>" expect-string
- "version" swap hash [ version set ] when*
- ] when ;
-
-! * Data tree
-
-TUPLE: tag name props children ;
-
-! A stack of { tag children } pairs
-SYMBOL: xml-stack
-
-TUPLE: mismatched open close ;
-M: mismatched error.
- "Mismatched tags" print
- "Opening tag: <" write dup mismatched-open write ">" print
- "Closing tag: </" write mismatched-close write ">" print ;
-
-TUPLE: unclosed tags ;
-C: unclosed ( -- unclosed )
- xml-stack get 1 tail-slice [ first opener-name ] map
- swap [ set-unclosed-tags ] keep ;
-M: unclosed error.
- "Unclosed tags" print
- "Tags: " print
- unclosed-tags [ " <" write write ">" print ] each ;
-
-: add-child ( object -- )
- xml-stack get peek second push ;
-
-: push-xml-stack ( object -- )
- V{ } clone 2array xml-stack get push ;
-
-GENERIC: process ( object -- )
-
-M: f process drop ;
-
-M: string process add-child ;
-M: comment process add-child ;
-M: directive process add-child ;
-
-M: contained process
- [ contained-name ] keep contained-props
- V{ } clone <tag> add-child ;
-
-M: opener process
- push-xml-stack ;
-
-M: closer process
- closer-name xml-stack get pop first2 >r [
- opener-name [
- 2dup = [ 2drop ] [ swap <mismatched> throw ] if
- ] keep
- ] keep opener-props r> <tag> add-child ;
-
-: init-xml-stack ( -- )
- V{ } clone xml-stack set f push-xml-stack ;
-
-: init-xml ( string -- )
- code set
- [ spot line column ] [ 0 swap set ] each
- "1.0" version set
- init-xml-stack ;
-
-: (string>xml) ( -- )
- parse-text process
- more? [ make-tag process (string>xml) ] when ; inline
-
-: string>xml ( string -- tag )
- #! Produces a tree of XML nodes
- [
- init-xml
- get-version (string>xml)
- xml-stack get
- dup length 1 = [ <unclosed> throw ] unless
- first second
- ] with-scope ;
-
-! * Printer
-
-: print-props ( hash -- )
- [
- " " % swap % "=\"" % % "\"" %
- ] hash-each ;
-
-GENERIC: (xml>string) ( object -- )
-
-: chars>entities ( str -- str )
- #! Convert <, >, &, ' and " to HTML entities.
- [
- [ dup entities hash [ % ] [ , ] ?if ] each
- ] "" make ;
-
-M: string (xml>string) chars>entities % ;
-
-: print-open/close ( tag -- )
- CHAR: > ,
- dup tag-children [ (xml>string) ] each
- "</" %
- tag-name %
- CHAR: > , ;
-
-M: tag (xml>string)
- CHAR: < ,
- dup tag-name %
- dup tag-props print-props
- dup tag-children [ empty? not ] contains?
- [ print-open/close ] [ drop "/>" % ] if ;
-
-M: comment (xml>string)
- "<!--" % comment-text % "-->" % ;
-
-M: object (xml>string)
- [ (xml>string) ] each ;
-
-: xml-preamble
- "<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>" ;
-
-: xml>string ( xml -- string )
- [ xml-preamble % (xml>string) ] "" make ;
-
-: xml-reprint ( string -- string )
- string>xml xml>string ;
-
-! * Easy XML generation for more literal things
-! should this be rewritten?
-
-: text ( string -- )
- chars>entities add-child ;
-
-: tag ( string attr-quot contents-quot -- )
- >r swap >r make-hash r> swap r>
- -rot dupd <opener> process
- slip
- <closer> process ; inline
-
-: text-tag ( content name attr-quot -- ) [ text ] tag ; inline
-
-: comment ( string -- )
- <comment> add-child ;
-
-: make-xml ( quot -- vector )
- #! Produces a tree of XML from a quotation to generate it
- [
- init-xml-stack
- call
- xml-stack get
- first second first
- ] with-scope ; inline
-
-! * System for words specialized on tag names
-
-TUPLE: process-missing process tag ;
-M: process-missing error.
- "Tag <" write
- process-missing-tag tag-name write
- "> not implemented on process " write
- dup process-missing-process word-name print ;
-
-: run-process ( tag word -- )
- 2dup "xtable" word-prop
- >r dup tag-name r> hash* [ 2nip call ] [
- drop <process-missing> throw
- ] if ;
--- /dev/null
+! Copyright (C) 2005, 2006 Daniel Ehrenberg\r
+! See http://factorcode.org/license.txt for BSD license.\r
+PROVIDE: contrib/xml\r
+{ +files+ {\r
+ "tokenizer.factor"\r
+ "parser.factor"\r
+ "writer.factor"\r
+ "utilities.factor"\r
+ "xml.facts"\r
+} }\r
+{ +tests+ {\r
+ "test.factor"\r
+} } ;\r
--- /dev/null
+! Copyright (C) 2005, 2006 Daniel Ehrenberg\r
+! See http://factorcode.org/license.txt for BSD license.\r
+IN: xml\r
+USING: errors hashtables io kernel math namespaces prettyprint sequences\r
+ arrays generic strings ;\r
+\r
+TUPLE: opener name props ;\r
+TUPLE: closer name ;\r
+TUPLE: contained name props ;\r
+TUPLE: comment text ;\r
+TUPLE: directive text ;\r
+TUPLE: instruction text ;\r
+\r
+: start-tag ( -- name ? )\r
+ #! Outputs the name and whether this is a closing tag\r
+ char CHAR: / = dup [ incr-spot ] when\r
+ parse-name swap ;\r
+\r
+: (parse-quot) ( ch vector sbuf -- vector )\r
+ {\r
+ { [ more? not ] [ "File ended in quote" <xml-string-error> throw ] }\r
+ { [ char >r pick r> swap = ] [ >string over push nip incr-spot ] }\r
+ { [ char CHAR: & = ] [ parse-entity (parse-quot) ] }\r
+ { [ t ] [ char parsed-ch (parse-quot) ] }\r
+ } cond ;\r
+\r
+: parse-quot ( ch -- array )\r
+ V{ } clone SBUF" " clone (parse-quot) ;\r
+\r
+: parse-prop-value ( -- str )\r
+ char dup "'\"" member? [\r
+ incr-spot parse-quot\r
+ ] [\r
+ "Attribute lacks quote" <xml-string-error> throw\r
+ ] if ;\r
+\r
+: parse-prop ( -- seq )\r
+ parse-name pass-blank CHAR: = expect pass-blank\r
+ parse-prop-value 2array ;\r
+\r
+: (middle-tag) ( seq -- seq )\r
+ pass-blank char name-char?\r
+ [ parse-prop over push (middle-tag) ] when ;\r
+\r
+: middle-tag ( -- hash )\r
+ V{ } clone (middle-tag) alist>hash pass-blank ;\r
+\r
+: end-tag ( string hash -- tag )\r
+ pass-blank char CHAR: / =\r
+ [ <contained> incr-spot ] [ <opener> ] if ;\r
+\r
+: skip-comment ( -- comment )\r
+ "--" expect-string\r
+ "--" take-until-string\r
+ <comment>\r
+ CHAR: > expect ;\r
+\r
+: cdata ( -- string )\r
+ "[CDATA[" expect-string "]]>" take-until-string ;\r
+\r
+: directive ( -- object )\r
+ {\r
+ { [ "--" string-matches? ] [ skip-comment ] }\r
+ { [ "[CDATA[" string-matches? ] [ cdata ] }\r
+ { [ t ] [ ">" take-until-string <directive> ] }\r
+ } cond ;\r
+\r
+: instruction ( -- instruction )\r
+ ! this should make sure the name doesn't include 'xml'\r
+ "?>" take-until-string <instruction> ;\r
+\r
+: make-tag ( -- tag/f )\r
+ CHAR: < expect\r
+ { { [ char dup CHAR: ! = ] [ drop incr-spot directive ] }\r
+ { [ CHAR: ? = ] [ incr-spot instruction ] } \r
+ { [ t ] [\r
+ start-tag [ <closer> ] [\r
+ middle-tag end-tag\r
+ ] if pass-blank CHAR: > expect\r
+ ] } } cond ;\r
+\r
+! -- Overall parser with data tree\r
+\r
+TUPLE: tag name props children ;\r
+\r
+TUPLE: contained-tag ;\r
+C: contained-tag ( name props -- contained-tag )\r
+ [ >r { } <tag> r> set-delegate ] keep ;\r
+\r
+! A stack of { tag children } pairs\r
+SYMBOL: xml-stack\r
+\r
+! A stack of hashtables\r
+SYMBOL: namespace-stack\r
+\r
+TUPLE: mismatched open close ;\r
+: write-name ( name -- )\r
+ dup name-space dup "" = [ drop ] [ write ":" write ] if\r
+ name-tag write ;\r
+M: mismatched error.\r
+ "Mismatched tags" print\r
+ "Opening tag: <" write dup mismatched-open write-name ">" print\r
+ "Closing tag: </" write mismatched-close write-name ">" print ;\r
+\r
+TUPLE: unclosed tags ;\r
+C: unclosed ( -- unclosed )\r
+ xml-stack get 1 tail-slice [ first opener-name ] map\r
+ swap [ set-unclosed-tags ] keep ;\r
+M: unclosed error.\r
+ "Unclosed tags" print\r
+ "Tags: " print\r
+ unclosed-tags [ " <" write write ">" print ] each ;\r
+\r
+: add-child ( object -- )\r
+ xml-stack get peek second push ;\r
+\r
+: push-xml-stack ( object -- )\r
+ V{ } clone 2array xml-stack get push ;\r
+\r
+: process-ns ( hash -- hash )\r
+ ! This should assure all namespaces are URIs by replacing first\r
+ [\r
+ dup [ swap dup name-space "xmlns" =\r
+ [ >r first r> name-tag set ] [ 2drop ] if\r
+ ] hash-each\r
+ T{ name f "" "xmlns" } swap hash [ first "" set ] when*\r
+ ] make-hash ;\r
+\r
+TUPLE: nonexist-ns name ;\r
+M: nonexist-ns error.\r
+ "Namespace " write nonexist-ns-name write " has not been declared" print ;\r
+\r
+: add-ns2name ( name -- )\r
+ dup name-space dup namespace-stack get hash-stack\r
+ [ nip ] [ <nonexist-ns> throw ] if* swap set-name-url ;\r
+\r
+: push-ns-stack ( hash -- )\r
+ dup process-ns namespace-stack get push\r
+ [ drop add-ns2name ] hash-each ;\r
+\r
+: pop-ns-stack ( -- )\r
+ namespace-stack get pop drop ;\r
+\r
+GENERIC: process ( object -- )\r
+\r
+M: f process drop ;\r
+\r
+M: object process add-child ;\r
+\r
+M: contained process\r
+ [ contained-name ] keep contained-props\r
+ dup push-ns-stack >r dup add-ns2name r>\r
+ pop-ns-stack <contained-tag> add-child ;\r
+\r
+M: opener process ! move add-ns2name on name to closer and fix mismatched\r
+ dup opener-props push-ns-stack push-xml-stack ;\r
+\r
+M: closer process\r
+ closer-name xml-stack get pop first2 >r [ \r
+ opener-name [\r
+ 2dup = [ nip add-ns2name ] [ swap <mismatched> throw ] if\r
+ ] keep\r
+ ] keep opener-props r> <tag> add-child pop-ns-stack ;\r
+\r
+: init-ns-stack ( -- )\r
+ V{ H{\r
+ { "xml" "http://www.w3.org/XML/1998/namespace" }\r
+ { "xmlns" "http://www.w3.org/2000/xmlns" }\r
+ { "" "" }\r
+ } } clone\r
+ namespace-stack set ;\r
+\r
+: init-xml-stack ( -- )\r
+ V{ } clone xml-stack set f push-xml-stack ;\r
+\r
+TUPLE: xml-doc prolog before after ;\r
+C: xml-doc ( prolog before main after -- xml-doc )\r
+ [ set-xml-doc-after ] keep\r
+ [ set-delegate ] keep\r
+ [ set-xml-doc-before ] keep\r
+ [ set-xml-doc-prolog ] keep ;\r
+\r
+TUPLE: not-yes/no text ;\r
+M: not-yes/no error.\r
+ "Standalone must be either yes or no, not \"" write\r
+ not-yes/no-text write "\"." print ;\r
+\r
+: yes/no>bool ( string -- t/f )\r
+ dup "yes" = [ drop t ] [\r
+ dup "no" = [ drop f ] [\r
+ <not-yes/no> throw\r
+ ] if\r
+ ] if ;\r
+\r
+TUPLE: extra-attrs attrs ;\r
+M: extra-attrs error.\r
+ "Extra attributes included in xml version declaration:" print\r
+ extra-attrs-attrs . ;\r
+\r
+: assure-no-extra ( hash -- )\r
+ hash-keys {\r
+ T{ name f "" "version" f }\r
+ T{ name f "" "encoding" f }\r
+ T{ name f "" "standalone" f }\r
+ } swap diff dup empty? [ drop ] [ <extra-attrs> throw ] if ; \r
+\r
+: concat-strings ( seq -- string )\r
+ dup [ string? ] all?\r
+ [ "XML prolog attributes contain undefined entities"\r
+ <xml-string-error> throw ] unless\r
+ concat ;\r
+\r
+: prolog-attr ( hash name default -- value )\r
+ >r "" swap <name> swap ?hash concat-strings\r
+ [ r> drop ] [ r> ] if* ; \r
+\r
+: parse-prolog ( -- prolog )\r
+ "<?xml" string-matches? [\r
+ 5 expect-string*\r
+ pass-blank middle-tag "?>" expect-string\r
+ dup assure-no-extra\r
+ ] [ f ] if \r
+ [ "version" "1.0" prolog-attr ] keep\r
+ [ "encoding" "iso-8859-1" prolog-attr ] keep\r
+ "standalone" "no" prolog-attr yes/no>bool\r
+ <prolog> dup prolog-data set ;\r
+\r
+: init-xml ( string -- )\r
+ code set\r
+ [ spot line column ] [ 0 swap set ] each\r
+ init-xml-stack init-ns-stack ;\r
+\r
+UNION: any-tag tag contained-tag ;\r
+\r
+TUPLE: notags ;\r
+M: notags error.\r
+ "XML document lacks a main tag" print ;\r
+\r
+TUPLE: multitags ;\r
+M: multitags error.\r
+ "XML document contains multiple tags" print ;\r
+\r
+: make-xml-doc ( prolog seq -- xml-doc )\r
+ dup [ any-tag? ] find\r
+ >r dup -1 = [ <notags> throw ] when\r
+ swap cut 1 tail\r
+ dup [ any-tag? ] contains? [ <multitags> throw ] when r>\r
+ swap <xml-doc> ;\r
+\r
+: (string>xml) ( -- )\r
+ parse-text process\r
+ more? [ make-tag process (string>xml) ] when ; inline\r
+\r
+: string>xml ( string -- xml-doc )\r
+ #! Produces a tree of XML nodes\r
+ [\r
+ init-xml\r
+ parse-prolog (string>xml)\r
+ xml-stack get\r
+ dup length 1 = [ <unclosed> throw ] unless\r
+ first second\r
+ ] with-scope make-xml-doc ;\r
+\r
+UNION: xml-parse-error multitags notags xml-error extra-attrs nonexist-ns\r
+ not-yes/no unclosed mismatched xml-string-error expected no-entity ;\r
--- /dev/null
+! Copyright (C) 2005, 2006 Daniel Ehrenberg\r
+! See http://factorcode.org/license.txt for BSD license.\r
+IN: temporary\r
+USING: kernel xml test io namespaces hashtables sequences errors ;\r
+\r
+! This is insufficient\r
+SYMBOL: xml-file\r
+[ ] [ "contrib/xml/test.xml" resource-path <file-reader>\r
+ contents string>xml xml-file set ] unit-test\r
+[ "1.0" ] [ xml-file get xml-doc-prolog prolog-version ] unit-test\r
+[ f ] [ xml-file get xml-doc-prolog prolog-standalone ] unit-test\r
+[ "a" ] [ xml-file get tag-name name-space ] unit-test\r
+[ "http://www.hello.com" ] [ xml-file get tag-name name-url ] unit-test\r
+[ V{ "that" } ] [ T{ name f "" "this" "http://d.de" } xml-file get\r
+ tag-props hash ] unit-test\r
+[ t ] [ xml-file get tag-children second contained-tag? ] unit-test\r
+[ t ] [ [ "<a></b>" string>xml ] catch xml-parse-error? ] unit-test\r
--- /dev/null
+<?xml version='1.0' encoding="UTF-8" standalone="no" ?>\r
+<!--This is where the fun begins!-->\r
+<!DOCTYPE greeting SYSTEM "hello.dtd">\r
+ <!--this is fun, isn't it, guys?-->\r
+<a:b xmlns:a='http://www.hello.com' xmlns='http://d.de'\r
+ this='that' that="this">\r
+ <b xmlns='http://b.nu' feeling='sombre'/>\r
+ Here's a new, undefined &entity;\r
+ <a:c><d mood="happy"></d></a:c>\r
+</a:b>\r
+<!--Well, that went over pretty well-->\r
+<?xsl stylesheet="that-one.xsl"?>\r
--- /dev/null
+! Copyright (C) 2005, 2006 Daniel Ehrenberg\r
+! See http://factorcode.org/license.txt for BSD license.\r
+IN: xml\r
+USING: errors hashtables io kernel math namespaces prettyprint sequences tools\r
+ generic strings ;\r
+\r
+SYMBOL: code #! Source code\r
+SYMBOL: spot #! Current index of string\r
+SYMBOL: prolog-data\r
+SYMBOL: line\r
+SYMBOL: column\r
+\r
+! -- Error reporting\r
+\r
+TUPLE: xml-error line column ;\r
+C: xml-error ( -- xml-error )\r
+ [ line get swap set-xml-error-line ] keep\r
+ [ column get swap set-xml-error-column ] keep ;\r
+\r
+: xml-error. ( xml-error -- )\r
+ "XML error" print\r
+ "Line: " write dup xml-error-line .\r
+ "Column: " write xml-error-column . ;\r
+\r
+TUPLE: expected should-be was ;\r
+C: expected ( should-be was -- error )\r
+ [ <xml-error> swap set-delegate ] keep\r
+ [ set-expected-was ] keep\r
+ [ set-expected-should-be ] keep ;\r
+\r
+M: expected error.\r
+ dup xml-error.\r
+ "Token expected: " write dup expected-should-be print\r
+ "Token present: " write expected-was print ;\r
+\r
+TUPLE: no-entity thing ;\r
+C: no-entity ( string -- entitiy )\r
+ [ <xml-error> swap set-delegate ] keep\r
+ [ set-no-entity-thing ] keep ;\r
+\r
+M: no-entity error.\r
+ dup xml-error.\r
+ "Entity does not exist: &" write no-entity-thing write ";" print ;\r
+\r
+TUPLE: xml-string-error string ;\r
+C: xml-string-error ( string -- xml-string-error )\r
+ [ set-xml-string-error-string ] keep\r
+ [ <xml-error> swap set-delegate ] keep ;\r
+\r
+M: xml-string-error error.\r
+ dup xml-error.\r
+ xml-string-error-string print ;\r
+\r
+! -- Basic utility words\r
+\r
+: set-code ( string -- ) ! for debugging\r
+ code set [ spot line column ] [ 0 swap set ] each ;\r
+\r
+: more? ( -- ? )\r
+ #! Return t if spot is not at the end of code\r
+ code get length spot get = not ;\r
+\r
+: char ( -- char/f )\r
+ more? [ spot get code get nth ] [ f ] if ;\r
+\r
+: incr-spot ( -- )\r
+ #! Increment spot.\r
+ spot inc\r
+ char "\n\r" member? [ 0 column set line ] [ column ] if\r
+ inc ;\r
+\r
+: skip-until ( quot -- )\r
+ #! quot: ( char -- ? )\r
+ more? [\r
+ char swap [ call ] keep swap [ drop ] [\r
+ incr-spot skip-until\r
+ ] if\r
+ ] [ drop ] if ; inline\r
+\r
+: take-until ( quot -- string | quot: char -- ? )\r
+ #! Take the substring of a string starting at spot\r
+ #! from code until the quotation given is true and\r
+ #! advance spot to after the substring.\r
+ spot get >r skip-until r>\r
+ spot get code get subseq ; inline\r
+\r
+: pass-blank ( -- )\r
+ #! Advance code past any whitespace, including newlines\r
+ [ blank? not ] skip-until ;\r
+\r
+: string-matches? ( string -- ? )\r
+ spot get dup pick length + code get\r
+ 2dup length > [ 3drop drop f ] [ <slice> sequence= ] if ;\r
+\r
+: (take-until-string) ( string -- n )\r
+ more? [\r
+ dup string-matches? [\r
+ drop spot get\r
+ ] [\r
+ incr-spot (take-until-string)\r
+ ] if\r
+ ] [ "Missing closing token" <xml-string-error> throw ] if ;\r
+\r
+: take-until-string ( string -- string )\r
+ [ >r spot get r> (take-until-string) code get subseq ] keep\r
+ length spot [ + ] change ;\r
+\r
+! -- Parsing strings\r
+\r
+: expect ( ch -- )\r
+ char 2dup = [ 2drop ] [\r
+ >r ch>string r> ch>string <expected> throw\r
+ ] if incr-spot ;\r
+\r
+: expect-string* ( num -- )\r
+ #! only skips string\r
+ [ incr-spot ] times ;\r
+\r
+: expect-string ( string -- )\r
+ >r spot get r> t over [ char incr-spot = and ] each [\r
+ 2drop\r
+ ] [\r
+ swap spot get code get subseq <expected> throw\r
+ ] if ;\r
+\r
+TUPLE: prolog version encoding standalone ; ! part of xml-doc, see parser\r
+\r
+: entities\r
+ #! We have both directions here as a shortcut.\r
+ H{\r
+ { "lt" CHAR: < }\r
+ { "gt" CHAR: > }\r
+ { "amp" CHAR: & }\r
+ { "apos" CHAR: ' }\r
+ { "quot" CHAR: " }\r
+ { CHAR: < "<" }\r
+ { CHAR: > ">" }\r
+ { CHAR: & "&" }\r
+ { CHAR: ' "'" }\r
+ { CHAR: " """ }\r
+ } ;\r
+\r
+TUPLE: entity name ;\r
+\r
+: parsed-ch ( sbuf ch -- sbuf ) over push incr-spot ;\r
+\r
+: parse-entity ( vector sbuf -- vector sbuf )\r
+ incr-spot [ CHAR: ; = ] take-until "#" ?head [\r
+ "x" ?head 16 10 ? base> parsed-ch\r
+ ] [\r
+ dup entities hash [ parsed-ch ] [ \r
+ prolog-data get prolog-standalone\r
+ [ <no-entity> throw ] [\r
+ >r >string over push r> <entity> over push incr-spot SBUF" " \r
+ ] if\r
+ ] ?if\r
+ ] if ;\r
+\r
+: (parse-text) ( vector sbuf -- vector )\r
+ {\r
+ { [ more? not ] [ >string over push ] }\r
+ { [ char CHAR: < = ] [ >string over push ] }\r
+ { [ char CHAR: & = ] [ parse-entity (parse-text) ] }\r
+ { [ t ] [ char parsed-ch (parse-text) ] }\r
+ } cond ;\r
+\r
+: parse-text ( -- array )\r
+ V{ } clone SBUF" " clone (parse-text) ;\r
+\r
+! -- Parsing tags\r
+\r
+: in-range-seq? ( number seq -- ? )\r
+ #! seq: { { min max } { min max }* }\r
+ [ first2 between? ] contains-with? ;\r
+\r
+: name-start-char? ( ch -- ? )\r
+ {\r
+ { CHAR: _ CHAR: _ }\r
+ { CHAR: A CHAR: Z }\r
+ { CHAR: a CHAR: z }\r
+ { HEX: C0 HEX: D6 }\r
+ { HEX: D8 HEX: F6 }\r
+ { HEX: F8 HEX: 2FF }\r
+ { HEX: 370 HEX: 37D }\r
+ { HEX: 37F HEX: 1FFF }\r
+ { HEX: 200C HEX: 200D }\r
+ { HEX: 2070 HEX: 218F }\r
+ { HEX: 2C00 HEX: 2FEF }\r
+ { HEX: 3001 HEX: D7FF }\r
+ { HEX: F900 HEX: FDCF }\r
+ { HEX: FDF0 HEX: FFFD }\r
+ { HEX: 10000 HEX: EFFFF }\r
+ } in-range-seq? ;\r
+\r
+: name-char? ( ch -- ? )\r
+ dup name-start-char? swap {\r
+ { CHAR: - CHAR: - }\r
+ { CHAR: . CHAR: . }\r
+ { CHAR: 0 CHAR: 9 }\r
+ { HEX: b7 HEX: b7 }\r
+ { HEX: 300 HEX: 36F }\r
+ { HEX: 203F HEX: 2040 }\r
+ } in-range-seq? or ;\r
+\r
+TUPLE: name space tag url ;\r
+C: name ( space tag -- name )\r
+ [ set-name-tag ] keep\r
+ [ set-name-space ] keep ;\r
+\r
+: (parse-name) ( -- str )\r
+ char dup name-start-char? [\r
+ incr-spot ch>string [ name-char? not ] take-until append\r
+ ] [\r
+ "Malformed name" <xml-string-error> throw\r
+ ] if ;\r
+\r
+: parse-name ( -- str-name )\r
+ (parse-name) char CHAR: : =\r
+ [ incr-spot (parse-name) ] [ "" swap ] if <name> ;\r
--- /dev/null
+! Copyright (C) 2005, 2006 Daniel Ehrenberg\r
+! See http://factorcode.org/license.txt for BSD license.\r
+IN: xml\r
+USING: hashtables kernel math namespaces sequences strings generic ;\r
+\r
+GENERIC: (xml>string) ( object -- )\r
+\r
+: print-name ( name -- )\r
+ dup name-space [ % CHAR: : , ] when*\r
+ name-tag % ;\r
+\r
+: print-props ( hash -- )\r
+ [\r
+ " " % swap print-name "=\"" % [ (xml>string) ] each "\"" %\r
+ ] hash-each ;\r
+\r
+: chars>entities ( str -- str )\r
+ #! Convert <, >, &, ' and " to HTML entities.\r
+ [\r
+ [ dup entities hash [ % ] [ , ] ?if ] each\r
+ ] "" make ;\r
+\r
+M: string (xml>string) chars>entities % ;\r
+\r
+M: contained-tag (xml>string)\r
+ CHAR: < ,\r
+ dup tag-name print-name\r
+ tag-props print-props\r
+ "/>" % ;\r
+\r
+M: tag (xml>string)\r
+ CHAR: < ,\r
+ dup tag-name print-name\r
+ dup tag-props print-props\r
+ CHAR: > ,\r
+ dup tag-children [ (xml>string) ] each\r
+ "</" % tag-name print-name CHAR: > , ;\r
+\r
+M: comment (xml>string)\r
+ "<!--" % comment-text % "-->" % ;\r
+\r
+M: object (xml>string)\r
+ [ (xml>string) ] each ;\r
+\r
+M: directive (xml>string)\r
+ "<!" % directive-text % CHAR: > , ;\r
+\r
+M: instruction (xml>string)\r
+ "<?" % instruction-text % "?>" % ;\r
+\r
+M: entity (xml>string)\r
+ CHAR: & , entity-name % CHAR: ; , ;\r
+\r
+: xml-preamble ( xml -- )\r
+ "<?xml version=\"" % dup prolog-version %\r
+ "\" encoding=\"" % dup prolog-encoding %\r
+ "\" standalone=\"" % prolog-standalone "yes" "no" ? %\r
+ "\"?>" % ;\r
+\r
+: xml>string ( xml-doc -- string )\r
+ [ \r
+ dup xml-doc-prolog xml-preamble\r
+ dup xml-doc-before (xml>string)\r
+ dup delegate (xml>string)\r
+ xml-doc-after (xml>string) ] "" make ;\r
+\r
+: xml-reprint ( string -- string )\r
+ string>xml xml>string ;\r
+\r
--- /dev/null
+! Copyright (C) 2005, 2006 Daniel Ehrenberg\r
+! See http://factorcode.org/license.txt for BSD license.\r
+USING: help kernel xml ;\r
+\r
+HELP: string>xml\r
+{ $values { "string" "a string" } { "xml-doc" "an xml document" } }\r
+{ $description "converts a string into an " { $snippet "xml-doc" }\r
+ " datatype for further processing" } ;\r
+\r
+HELP: xml>string\r
+{ $values { "xml-doc" "an xml document" } { "string" "a string" } }\r
+{ $description "converts an xml document into a string" }\r
+{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
+\r
+HELP: xml-parse-error\r
+{ $description "the exception class that all parsing errors in XML documents are in." } ;\r
+\r
+HELP: xml-reprint\r
+{ $values { "in" "a string of XML" } { "out" "reprinted XML" } }\r
+{ $description "parses XML and converts it back into a string, for testing purposes" }\r
+{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
+\r
+ARTICLE: { "xml" "intro" } "XML"\r
+ "The XML module attempts to implement the XML 1.1 standard, converting strings of text into XML and vice versa. It currently is a work in progress."\r
+ $terpri\r
+ "The XML module was implemented by Daniel Ehrenberg, with edits by Slava Pestov. Main functions implemented include:"\r
+ { $subsection string>xml }\r
+ { $subsection xml>string } ;\r