! Copyright (C) 2005, 2006 Daniel Ehrenberg\r
! See http://factorcode.org/license.txt for BSD license.\r
-IN: xml\r
USING: errors hashtables io kernel math namespaces prettyprint sequences\r
arrays generic strings vectors char-classes ;\r
+IN: xml\r
\r
! * Parsing tags\r
\r
! this should make sure the name doesn't include 'xml'\r
"?>" take-string <instruction> ;\r
\r
-: make-tag ( -- tag/f )\r
+: make-tag ( -- tag )\r
{ { [ get-char dup CHAR: ! = ] [ drop next directive ] }\r
{ [ CHAR: ? = ] [ next instruction ] } \r
{ [ t ] [\r
dup assure-no-extra prolog-attrs\r
] when ;\r
\r
-: init-xml ( stream -- )\r
+: basic-init ( stream -- )\r
stdio set\r
{ 0 0 0 "" } clone spot set\r
f record set f now-recording? set\r
next\r
- "1.0" "iso-8859-1" f <prolog> prolog-data set\r
- init-xml-stack\r
- init-ns-stack ;\r
+ "1.0" "iso-8859-1" f <prolog> prolog-data set ;\r
+\r
+: init-xml ( stream -- )\r
+ basic-init init-xml-stack init-ns-stack ;\r
\r
: init-xml-string ( string -- ) ! for debugging\r
<string-reader> init-xml ;\r
dup [ tag? ] contains? [ <multitags> throw ] when r>\r
swap <xml-doc> ;\r
\r
+! * Views of XML\r
+\r
+SYMBOL: text-now?\r
+\r
+TUPLE: pull-xml scope ;\r
+C: pull-xml ( stream -- pull-xml )\r
+ [\r
+ swap basic-init parse-prolog\r
+ t text-now? set\r
+ [ namestack pop swap set-pull-xml-scope ] keep\r
+ ] with-scope ;\r
+\r
+: pull-next ( pull -- xml-elem/f )\r
+ pull-xml-scope [\r
+ text-now? get [ parse-text f ] [\r
+ get-char [ make-tag t ] [ f f ] if\r
+ ] if text-now? set \r
+ ] bind ;\r
+\r
+: call-under ( quot object -- quot )\r
+ swap dup slip ; inline\r
+\r
: sax-loop ( quot -- ) ! quot: xml-elem --\r
- parse-text [ swap dup slip ] each\r
- get-char [ make-tag swap dup slip sax-loop ]\r
+ parse-text [ call-under ] each\r
+ get-char [ make-tag call-under sax-loop ]\r
[ drop ] if ; inline\r
\r
: sax ( stream quot -- ) ! quot: xml-elem --\r
swap [\r
- init-xml parse-prolog\r
- prolog-data get swap dup slip\r
+ basic-init parse-prolog\r
+ prolog-data get call-under\r
sax-loop\r
] with-scope ; inline\r
\r
[ write-str-elem ] each "\"" write\r
] hash-each ;\r
\r
-GENERIC: (xml>string) ( object -- )\r
+GENERIC: write-item ( object -- )\r
\r
-M: str-elem (xml>string) ! string element\r
+M: str-elem write-item ! string element\r
write-str-elem ;\r
\r
-M: contained-tag (xml>string)\r
+M: contained-tag write-item\r
CHAR: < write1\r
dup print-name\r
tag-props print-props\r
"/>" write ;\r
\r
-M: open-tag (xml>string)\r
+M: open-tag write-item\r
CHAR: < write1\r
dup print-name\r
dup tag-props print-props\r
CHAR: > write1\r
- dup tag-children [ (xml>string) ] each\r
+ dup tag-children [ write-item ] each\r
"</" write print-name CHAR: > write1 ;\r
\r
-M: comment (xml>string)\r
+M: comment write-item\r
"<!--" write comment-text write "-->" write ;\r
\r
-M: directive (xml>string)\r
+M: directive write-item\r
"<!" write directive-text write CHAR: > write1 ;\r
\r
-M: instruction (xml>string)\r
+M: instruction write-item\r
"<?" write instruction-text write "?>" write ;\r
\r
: xml-preamble ( xml -- )\r
prolog-standalone "yes" "no" ? write\r
"\"?>" write ;\r
\r
+: write-chunk ( seq -- )\r
+ [ write-item ] each ;\r
+\r
: write-xml ( xml-doc -- )\r
dup xml-doc-prolog xml-preamble\r
- dup xml-doc-before [ (xml>string) ] each\r
- dup delegate (xml>string)\r
- xml-doc-after [ (xml>string) ] each ;\r
+ dup xml-doc-before write-chunk\r
+ dup delegate write-item\r
+ xml-doc-after write-chunk ;\r
\r
: print-xml ( xml-doc -- )\r
write-xml terpri ;\r
{ $class-description "represents a self-closing tag, like <a/>. Contains two slots, name and props, containing, respectively, the name of the tag and its attributes. Usually, the name-url will be f." }\r
{ $see-also opener closer } ;\r
\r
+HELP: parse-text\r
+{ $values { "array" "an array of text elements" } }\r
+{ $description "moves the pointer from the current spot to the beginning of the next tag, parsing the text underneath, returning the text elements it passed. This parses DTD references like %foo; and XML entities like &bar; a and &" }\r
+{ $see-also parse-name } ;\r
+\r
+HELP: parse-name\r
+{ $values { "name" "an XML name" } }\r
+{ $description "parses a " { $link name } " from the input stream. Returns a name with only the name-space and name-tag defined, with name-url=f" }\r
+{ $see-also parse-text } ;\r
+\r
+HELP: make-tag\r
+{ $values { "tag" "an opener, closer or contained" } }\r
+{ $description "assuming the pointer is just past a <, this word parses until the next > and emits a tuple representing the tag parsed" }\r
+{ $see-also opener closer contained } ;\r
+\r
+HELP: pull-xml\r
+{ $class-description "represents the state of a pull-parser for XML. Has one slot, scope, which is a namespace which contains all relevant state information." }\r
+{ $see-also <pull-xml> pull-next } ;\r
+\r
+HELP: <pull-xml>\r
+{ $values { "stream" "an input stream containing XML" } { "pull-xml" "a pull-xml tuple" } }\r
+{ $description "creates an XML pull-based parser, executing all initial XML commands to set up the parser" }\r
+{ $see-also pull-xml pull-next } ;\r
+\r
+HELP: pull-next\r
+{ $values { "pull" "an XML pull parser" } { "xml-elem/f" "an XML tag event, sequence of string elements, or f" } }\r
+{ $description "gets the next xml element from the given XML pull parser. Returns f upon exaustion" }\r
+{ $see-also pull-xml <pull-xml> } ;\r
+\r
+HELP: write-item\r
+{ $values { "object" "an XML element" } }\r
+{ $description "writes an XML element to stdio" }\r
+{ $see-also write-chunk write-xml } ;\r
+\r
+HELP: write-chunk\r
+{ $values { "seq" "an XML document fragment" } }\r
+{ $description "writes an XML document fragment, ie a sequence of XML elements, to stdio" }\r
+{ $see-also write-item write-xml } ;\r
+\r
ARTICLE: { "xml" "intro" } "XML"\r
"The XML module attempts to implement the XML 1.1 standard, converting strings of text into XML and vice versa. It currently is a work in progress. Together with XML-RPC, this is a component of the F2EE framework."\r
$terpri\r
\r
ARTICLE: { "xml" "basic" } "Basic words for XML processing"\r
"These are the most basic words needed for processing an XML document"\r
+ $terpri\r
+ "Parsing XML:"\r
{ $subsection string>xml }\r
+ { $subsection read-xml }\r
+ { $subsection xml-chunk }\r
+ "Printing XML"\r
{ $subsection xml>string }\r
- { $subsection xml-parse-error }\r
- { $subsection xml-reprint }\r
{ $subsection write-xml }\r
- { $subsection read-xml }\r
- { $subsection xml-chunk } ;\r
+ { $subsection write-item }\r
+ { $subsection write-chunk }\r
+ "Other"\r
+ { $subsection xml-parse-error }\r
+ { $subsection xml-reprint } ;\r
\r
ARTICLE: { "xml" "classes" } "XML data classes"\r
"Data types that XML documents are made of:"\r
ARTICLE: { "xml" "utils" } "XML processing utilities"\r
"Utilities for processing XML include..."\r
$terpri\r
- "System for creating words which dispatch on XML tags:"\r
+ "System sfor creating words which dispatch on XML tags:"\r
{ $subsection POSTPONE: PROCESS: }\r
{ $subsection POSTPONE: TAG: }\r
"Combinators for traversing XML trees:"\r
{ $subsection take-char }\r
{ $subsection take-string }\r
{ $subsection next }\r
+ { $subsection parse-text }\r
+ { $subsection make-tag }\r
+ { $subsection parse-name }\r
{ $subsection process } ; ! should I have more? less?\r
\r
ARTICLE: { "xml" "events" } "Event-based XML parsing"\r
{ $subsection sax }\r
{ $subsection opener }\r
{ $subsection closer }\r
- { $subsection contained } ;\r
+ { $subsection contained }\r
+ "There is also pull-based parsing to augment the push-parsing of SAX. This is probably easier to use and more logical. It uses the same parsing objects as the above style of parsing, except string elements are always in arrays, for example { \"\" }. Relevant pull-parsing words are:"\r
+ { $subsection <pull-xml> }\r
+ { $subsection pull-xml }\r
+ { $subsection pull-next } ;\r