basis/xml/xml-docs.factor

   1 ! Copyright (C) 2005, 2006 Daniel Ehrenberg\r
   2 ! See http://factorcode.org/license.txt for BSD license.\r
   3 USING: help.markup help.syntax kernel xml.data xml.errors\r
   4 xml.writer state-parser xml.tokenize xml.utilities xml.entities\r
   5 strings sequences io ;\r
   6 IN: xml\r
   7 \r
   8 HELP: string>xml\r
   9 { $values { "string" "a string" } { "xml" "an xml document" } }\r
  10 { $description "converts a string into an " { $link xml }\r
  11     " datatype for further processing" } ;\r
  12 \r
  13 HELP: read-xml\r
  14 { $values { "stream" "a stream that supports readln" }\r
  15     { "xml" "an XML document" } }\r
  16 { $description "exausts the given stream, reading an XML document from it" } ;\r
  17 \r
  18 HELP: file>xml\r
  19 { $values { "filename" "a string representing a filename" }\r
  20     { "xml" "an XML document" } }\r
  21 { $description "opens the given file, reads it in as XML, closes the file and returns the corresponding XML tree" } ;\r
  22 \r
  23 { string>xml read-xml file>xml } related-words\r
  24 \r
  25 HELP: xml>string\r
  26 { $values { "xml" "an xml document" } { "string" "a string" } }\r
  27 { $description "converts an xml document (" { $link xml } ") into a string" }\r
  28 { $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
  29 \r
  30 HELP: pprint-xml>string\r
  31 { $values { "xml" "an xml document" } { "string" "a string" } }\r
  32 { $description "converts an xml document (" { $link xml } ") into a string in a prettyprinted form." }\r
  33 { $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
  34 \r
  35 HELP: xml-parse-error\r
  36 { $class-description "the exception class that all parsing errors in XML documents are in." } ;\r
  37 \r
  38 HELP: xml-reprint\r
  39 { $values { "string" "a string of XML" } }\r
  40 { $description "parses XML and prints it out again, for testing purposes" }\r
  41 { $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
  42 \r
  43 HELP: write-xml\r
  44 { $values { "xml" "an XML document" } }\r
  45 { $description "prints the contents of an XML document (" { $link xml } ") to " { $link output-stream } "." }\r
  46 { $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
  47 \r
  48 HELP: print-xml\r
  49 { $values { "xml" "an XML document" } }\r
  50 { $description "prints the contents of an XML document (" { $link xml } ") to " { $link output-stream } ", followed by a newline" }\r
  51 { $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
  52 \r
  53 HELP: pprint-xml\r
  54 { $values { "xml" "an XML document" } }\r
  55 { $description "prints the contents of an XML document (" { $link xml } ") to " { $link output-stream } " in a prettyprinted form." }\r
  56 { $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
  57 \r
  58 HELP: pprint-xml-but\r
  59 { $values { "xml" "an XML document" } { "sensitive-tags" "a sequence of names" } }\r
  60 { $description "Prettyprints an XML document, leaving the whitespace of the tags with names in sensitive-tags intact." }\r
  61 { $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
  62 \r
  63 HELP: pprint-xml>string-but\r
  64 { $values { "xml" "an XML document" } { "sensitive-tags" "a sequence of names" } { "string" string } }\r
  65 { $description "Prettyprints an XML document, returning the result as a string and leaving the whitespace of the tags with names in sensitive-tags intact." }\r
  66 { $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;\r
  67 \r
  68 { xml>string print-xml write-xml pprint-xml xml-reprint pprint-xml>string pprint-xml>string-but pprint-xml-but } related-words\r
  69 \r
  70 HELP: PROCESS:\r
  71 { $syntax "PROCESS: word" }\r
  72 { $values { "word" "a new word to define" } }\r
  73 { $description "creates a new word to process XML tags" }\r
  74 { $see-also POSTPONE: TAG: } ;\r
  75 \r
  76 HELP: TAG:\r
  77 { $syntax "TAG: tag word definition... ;" }\r
  78 { $values { "tag" "an xml tag name" } { "word" "an XML process" } }\r
  79 { $description "defines what a process should do when it encounters a specific tag" }\r
  80 { $examples { $code "PROCESS: x ( tag -- )\nTAG: a x drop \"hi\" write ;" } }\r
  81 { $see-also POSTPONE: PROCESS: } ;\r
  82 HELP: build-tag*\r
  83 { $values { "items" "sequence of elements" } { "name" "string" }\r
  84     { "tag" tag } }\r
  85 { $description "builds a " { $link tag } " with the specified name, in the namespace \"\" and URL \"\" containing the children listed in item" }\r
  86 { $see-also build-tag build-xml } ;\r
  87 \r
  88 HELP: build-tag\r
  89 { $values { "item" "an element" } { "name" string } { "tag" tag } }\r
  90 { $description "builds a " { $link tag } " with the specified name containing the single child item" }\r
  91 { $see-also build-tag* build-xml } ;\r
  92 \r
  93 HELP: build-xml\r
  94 { $values { "tag" tag } { "xml" "an XML document" } }\r
  95 { $description "builds an XML document out of a tag" }\r
  96 { $see-also build-tag* build-tag } ;\r
  97 \r
  98 HELP: tag\r
  99 { $class-description "tuple representing an XML tag, delegating to a " { $link\r
 100 name } ", containing the slots attrs (an alist of names to strings) and children (a sequence). Tags implement the sequence protocol by acting like a sequence of its chidren, and the assoc protocol by acting like its attributes." }\r
 101 { $see-also <tag> name contained-tag xml } ;\r
 102 \r
 103 HELP: <tag>\r
 104 { $values { "name" "an XML tag name" }\r
 105     { "attrs" "an alist of names to strings" }\r
 106     { "children" sequence }\r
 107     { "tag" tag } }\r
 108 { $description "constructs an XML " { $link tag } " with the name (not a string) and tag attributes specified in attrs and children specified" }\r
 109 { $see-also tag <contained-tag> build-tag build-tag* } ;\r
 110 \r
 111 HELP: name\r
 112 { $class-description "represents an XML name, with the fields space (a string representing the namespace, as written in the document, tag (a string of the actual name of the tag) and url (a string of the URL that the namespace points to)" }\r
 113 { $see-also <name> tag } ;\r
 114 \r
 115 HELP: <name> ( space tag url -- name )\r
 116 { $values { "space" "a string" } { "tag" "a string" } { "url" "a string" }\r
 117     { "name" "an XML tag name" } }\r
 118 { $description "creates a name tuple with the name-space space and the tag-name tag and the tag-url url." }\r
 119 { $see-also name <tag> } ;\r
 120 \r
 121 HELP: contained-tag\r
 122 { $class-description "delegates to tag representing a tag like <a/> with no contents. The tag attributes are accessed with tag-attrs" }\r
 123 { $see-also tag <contained-tag> } ;\r
 124 \r
 125 HELP: <contained-tag>\r
 126 { $values { "name" "an XML tag name" }\r
 127     { "attrs" "an alist from names to strings" }\r
 128     { "tag" tag } }\r
 129 { $description "creates an empty tag (like <a/>) with the specified name and tag attributes. This delegates to tag" }\r
 130 { $see-also contained-tag <tag> } ;\r
 131 \r
 132 HELP: xml\r
 133 { $class-description "tuple representing an XML document, delegating to the main tag, containing the fields prolog (the header <?xml...?>), before (whatever comes between the prolog and the main tag) and after (whatever comes after the main tag)" }\r
 134 { $see-also <xml> tag prolog } ;\r
 135 \r
 136 HELP: <xml>\r
 137 { $values { "prolog" "an XML prolog" } { "before" "a sequence of XML elements" }\r
 138 { "main" tag } { "after" "a sequence of XML elements" } { "xml" "an XML document" } }\r
 139 { $description "creates an XML document, delegating to the main tag, with the specified prolog, before, and after" }\r
 140 { $see-also xml <tag> } ;\r
 141 \r
 142 HELP: prolog\r
 143 { $class-description "represents an XML prolog, with the tuple fields version (containing \"1.0\" or \"1.1\"), encoding (a string representing the encoding type), and standalone (t or f, whether the document is standalone without external entities)" }\r
 144 { $see-also <prolog> xml } ;\r
 145 \r
 146 HELP: <prolog> ( version encoding standalone -- prolog )\r
 147 { $values { "version" "a string, 1.0 or 1.1" }\r
 148 { "encoding" "a string" } { "standalone" "a boolean" } { "prolog" "an XML prolog" } }\r
 149 { $description "creates an XML prolog tuple" }\r
 150 { $see-also prolog <xml> } ;\r
 151 \r
 152 HELP: comment\r
 153 { $class-description "represents a comment in XML. Has one slot, text, which contains the string of the comment" }\r
 154 { $see-also <comment> } ;\r
 155 \r
 156 HELP: <comment> ( text -- comment )\r
 157 { $values { "text" "a string" } { "comment" "a comment" } }\r
 158 { $description "creates an XML comment tuple" }\r
 159 { $see-also comment } ;\r
 160 \r
 161 HELP: instruction\r
 162 { $class-description "represents an XML instruction, such as <?xsl stylesheet='foo.xml'?>. Contains one slot, text, which contains the string between the question marks." }\r
 163 { $see-also <instruction> } ;\r
 164 \r
 165 HELP: <instruction> ( text -- instruction )\r
 166 { $values { "text" "a string" } { "instruction" "an XML instruction" } }\r
 167 { $description "creates an XML parsing instruction, such as <?xsl stylesheet='foo.xml'?>." }\r
 168 { $see-also instruction } ;\r
 169 \r
 170 HELP: names-match?\r
 171 { $values { "name1" "a name" } { "name2" "a name" } { "?" "t or f" } }\r
 172 { $description "checks to see if the two names match, that is, if all fields are equal, ignoring fields whose value is f in either name." }\r
 173 { $example "USING: prettyprint xml.data ;" "T{ name f \"rpc\" \"methodCall\" f } T{ name f f \"methodCall\" \"http://www.xmlrpc.org/\" } names-match? ." "t" }\r
 174 { $see-also name } ;\r
 175 \r
 176 HELP: xml-chunk\r
 177 { $values { "stream" "an input stream" } { "seq" "a sequence of elements" } }\r
 178 { $description "rather than parse a document, as " { $link read-xml } " does, this word parses and returns a sequence of XML elements (tags, strings, etc), ie a document fragment. This is useful for pieces of XML which may have more than one main tag." }\r
 179 { $see-also write-chunk read-xml } ;\r
 180 \r
 181 HELP: get-id\r
 182 { $values { "tag" "an XML tag or document" } { "id" "a string" } { "elem" "an XML element or f" } }\r
 183 { $description "finds the XML tag with the specified id, ignoring the namespace" }\r
 184 { $see-also } ;\r
 185 \r
 186 HELP: process\r
 187 { $values { "object" "an opener, closer, contained or text element" } }\r
 188 { $description  "takes an XML event and, using the XML stack, processes it and adds it to the tree"  } ;\r
 189 \r
 190 HELP: sax\r
 191 { $values { "stream" "an input stream" } { "quot" "a quotation ( xml-elem -- )" } }\r
 192 { $description "parses the XML document, and whenever an event is encountered (a tag piece, comment, parsing instruction, directive or string element), the quotation is called with that event on the stack. The quotation has all responsibility to deal with the event properly, and it is advised that generic words be used in dispatching on the event class." }\r
 193 { $notes "It is important to note that this is not SAX, merely an event-based XML view" }\r
 194 { $see-also read-xml } ;\r
 195 \r
 196 HELP: opener\r
 197 { $class-description "describes an opening tag, like <a>. Contains two slots, name and attrs containing, respectively, the name of the tag and its attributes. Usually, the name-url will be f." }\r
 198 { $see-also closer contained } ;\r
 199 \r
 200 HELP: closer\r
 201 { $class-description "describes a closing tag, like </a>. Contains one slot, name, containing the tag's name. Usually, the name-url will be f." }\r
 202 { $see-also opener contained } ;\r
 203 \r
 204 HELP: contained\r
 205 { $class-description "represents a self-closing tag, like <a/>. Contains two slots, name and attrs containing, respectively, the name of the tag and its attributes. Usually, the name-url will be f." }\r
 206 { $see-also opener closer } ;\r
 207 \r
 208 HELP: parse-text\r
 209 { $values { "string" "a string" } }\r
 210 { $description "moves the pointer from the current spot to the beginning of the next tag, parsing the text underneath, returning the text element it passed. This parses XML entities like &bar; &#97; and &amp;" }\r
 211 { $see-also parse-name } ;\r
 212 \r
 213 HELP: parse-name\r
 214 { $values { "name" "an XML name" } }\r
 215 { $description "parses a " { $link name } " from the input stream. Returns a name with only the name-space and name-tag defined, with name-url=f" }\r
 216 { $see-also parse-text } ;\r
 217 \r
 218 HELP: make-tag\r
 219 { $values { "tag" "an opener, closer or contained" } }\r
 220 { $description "assuming the pointer is just past a <, this word parses until the next > and emits a tuple representing the tag parsed" }\r
 221 { $see-also opener closer contained } ;\r
 222 \r
 223 HELP: pull-xml\r
 224 { $class-description "represents the state of a pull-parser for XML. Has one slot, scope, which is a namespace which contains all relevant state information." }\r
 225 { $see-also <pull-xml> pull-event pull-elem } ;\r
 226 \r
 227 HELP: <pull-xml>\r
 228 { $values { "pull-xml" "a pull-xml tuple" } }\r
 229 { $description "creates an XML pull-based parser which reads from " { $link input-stream } ", executing all initial XML commands to set up the parser." }\r
 230 { $see-also pull-xml pull-elem pull-event } ;\r
 231 \r
 232 HELP: pull-elem\r
 233 { $values { "pull" "an XML pull parser" } { "xml-elem/f" "an XML tag, string, or f" } }\r
 234 { $description "gets the next XML element from the given XML pull parser. Returns f upon exhaustion." }\r
 235 { $see-also pull-xml <pull-xml> pull-event } ;\r
 236 \r
 237 HELP: pull-event\r
 238 { $values { "pull" "an XML pull parser" } { "xml-event/f" "an XML tag event, string, or f" } }\r
 239 { $description "gets the next XML event from the given XML pull parser. Returns f upon exhaustion." }\r
 240 { $see-also pull-xml <pull-xml> pull-elem } ;\r
 241 \r
 242 HELP: write-item\r
 243 { $values { "object" "an XML element" } }\r
 244 { $description "writes an XML element to " { $link output-stream } "." }\r
 245 { $see-also write-chunk write-xml } ;\r
 246 \r
 247 HELP: write-chunk\r
 248 { $values { "seq" "an XML document fragment" } }\r
 249 { $description "writes an XML document fragment, ie a sequence of XML elements, to " { $link output-stream } "." }\r
 250 { $see-also write-item write-xml } ;\r
 251 \r
 252 HELP: deep-tag-named\r
 253 { $values { "tag" "an XML tag or document" } { "name/string" "an XML name or string representing a name" } { "matching-tag" tag } }\r
 254 { $description "finds an XML tag with a matching name, recursively searching children and children of children" }\r
 255 { $see-also tags-named tag-named deep-tags-named } ;\r
 256 \r
 257 HELP: deep-tags-named\r
 258 { $values { "tag" "an XML tag or document" } { "name/string" "an XML name or string representing a name" } { "tags-seq" "a sequence of tags" } }\r
 259 { $description "returns a sequence of all tags of a matching name, recursively searching children and children of children" }\r
 260 { $see-also tag-named deep-tag-named tags-named } ;\r
 261 \r
 262 HELP: children>string\r
 263 { $values { "tag" "an XML tag or document" } { "string" "a string" } }\r
 264 { $description "concatenates the children of the tag, ignoring everything that's not a string" } ;\r
 265 \r
 266 HELP: children-tags\r
 267 { $values { "tag" "an XML tag or document" } { "sequence" sequence } }\r
 268 { $description "gets the children of the tag that are themselves tags" }\r
 269 { $see-also first-child-tag } ;\r
 270 \r
 271 HELP: first-child-tag\r
 272 { $values { "tag" "an XML tag or document" } { "tag" tag } }\r
 273 { $description "returns the first child of the given tag that is a tag" }\r
 274 { $see-also children-tags } ;\r
 275 \r
 276 HELP: multitags\r
 277 { $class-description "XML parsing error describing the case where there is more than one main tag in a document. Contains no slots" } ;\r
 278 \r
 279 HELP: notags\r
 280 { $class-description "XML parsing error describing the case where an XML document contains no main tag, or any tags at all" } ;\r
 281 \r
 282 HELP: extra-attrs\r
 283 { $class-description "XML parsing error describing the case where the XML prolog (<?xml ...?>) contains attributes other than the three allowed ones, standalone, version and encoding. Contains one slot, attrs, which is a hashtable of all the extra attributes' names. Delegates to " { $link parsing-error } "." } ;\r
 284 \r
 285 HELP: nonexist-ns\r
 286 { $class-description "XML parsing error describing the case where a namespace doesn't exist but it is used in a tag. Contains one slot, name, which contains the name of the undeclared namespace, and delegates to " { $link parsing-error } "." } ;\r
 287 \r
 288 HELP: not-yes/no\r
 289 { $class-description "XML parsing error used to describe the case where standalone is set in the XML prolog to something other than 'yes' or 'no'. Delegates to " { $link parsing-error } " and contains one slot, text, which contains offending value." } ;\r
 290 \r
 291 HELP: unclosed\r
 292 { $class-description "XML parsing error used to describe the case where the XML document contains classes which are not closed by the end of the document. Contains one slot, tags, a sequence of names." } ;\r
 293 \r
 294 HELP: mismatched\r
 295 { $class-description "XML parsing error describing mismatched tags, eg <a></c>. Contains two slots: open is the name of the opening tag and close is the name of the closing tag. Delegates to " { $link parsing-error } " showing the location of the closing tag" } ;\r
 296 \r
 297 HELP: expected\r
 298 { $class-description "XML parsing error describing when an expected token was not present. Delegates to " { $link parsing-error } ". Contains two slots, should-be, which has the expected string, and was, which has the actual string." } ;\r
 299 \r
 300 HELP: no-entity\r
 301 { $class-description "XML parsing error describing the use of an undefined entity in a case where standalone is marked yes. Delegates to " { $link parsing-error } ". Contains one slot, thing, containing a string representing the entity." } ;\r
 302 \r
 303 HELP: xml-string-error\r
 304 { $class-description "XML parsing error that delegates to " { $link parsing-error } " and represents an other, unspecified error, which is represented by the slot string, containing a string describing the error." } ;\r
 305 \r
 306 HELP: open-tag\r
 307 { $class-description "represents a tag that does have children, ie is not a contained tag" }\r
 308 { $notes "the constructor used for this class is simply " { $link <tag> } "." }\r
 309 { $see-also tag contained-tag } ;\r
 310 \r
 311 HELP: tag-named\r
 312 { $values { "tag" "an XML tag or document" }\r
 313     { "name/string" "an XML name or string representing the name" }\r
 314     { "matching-tag" tag } }\r
 315 { $description "finds the first tag with matching name which is the direct child of the given tag" }\r
 316 { $see-also deep-tags-named deep-tag-named tags-named } ;\r
 317 \r
 318 HELP: tags-named\r
 319 { $values { "tag" "an XML tag or document" }\r
 320     { "name/string" "an XML name or string representing the name" }\r
 321     { "tags-seq" "a sequence of tags" } }\r
 322 { $description "finds all tags with matching name that are the direct children of the given tag" }\r
 323 { $see-also deep-tag-named deep-tags-named tag-named } ;\r
 324 \r
 325 HELP: state-parse\r
 326 { $values { "stream" "an input stream" } { "quot" "a quotation ( -- )" } }\r
 327 { $description "takes a stream and runs an imperative parser on it, allowing words like " { $link next } " to be used within the context of the stream." } ;\r
 328 \r
 329 HELP: pre/post-content\r
 330 { $class-description "describes the error where a non-whitespace string is used before or after the main tag in an XML document. Contains two slots: string contains the offending string, and pre? is t if it occured before the main tag and f if it occured after" } ;\r
 331 \r
 332 HELP: entities\r
 333 { $description "a hash table from default XML entity names (like &amp; and &lt;) to the characters they represent. This is automatically included when parsing any XML document." }\r
 334 { $see-also html-entities } ;\r
 335 \r
 336 HELP: html-entities\r
 337 { $description "a hash table from HTML entity names to their character values" }\r
 338 { $see-also entities with-html-entities } ;\r
 339 \r
 340 HELP: with-entities\r
 341 { $values { "entities" "a hash table of strings to chars" }\r
 342     { "quot" "a quotation ( -- )" } }\r
 343 { $description "calls the quotation using the given table of entity values (symbolizing, eg, that &foo; represents CHAR: a) on top of the default XML entities" }\r
 344 { $see-also with-html-entities } ;\r
 345 \r
 346 HELP: with-html-entities\r
 347 { $values { "quot" "a quotation ( -- )" } }\r
 348 { $description "calls the given quotation using HTML entity values" }\r
 349 { $see-also html-entities with-entities } ;\r
 350 \r
 351 ARTICLE: { "xml" "reading" } "Reading XML"\r
 352     "The following words are used to read something into an XML document"\r
 353     { $subsection string>xml }\r
 354     { $subsection read-xml }\r
 355     { $subsection xml-chunk }\r
 356     { $subsection file>xml } ;\r
 357 \r
 358 ARTICLE: { "xml" "writing" } "Writing XML"\r
 359     "These words are used in implementing prettyprint"\r
 360     { $subsection write-item }\r
 361     { $subsection write-chunk }\r
 362     "These words are used to print XML normally"\r
 363     { $subsection xml>string }\r
 364     { $subsection write-xml }\r
 365     { $subsection print-xml }\r
 366     "These words are used to prettyprint XML"\r
 367     { $subsection pprint-xml>string }\r
 368     { $subsection pprint-xml>string-but }\r
 369     { $subsection pprint-xml }\r
 370     { $subsection pprint-xml-but }\r
 371     "This word reads and writes XML"\r
 372     { $subsection xml-reprint } ;\r
 373 \r
 374 ARTICLE: { "xml" "classes" } "XML data classes"\r
 375     "Data types that XML documents are made of:"\r
 376     { $subsection name }\r
 377     { $subsection tag }\r
 378     { $subsection contained-tag }\r
 379     { $subsection open-tag }\r
 380     { $subsection xml }\r
 381     { $subsection prolog }\r
 382     { $subsection comment }\r
 383     { $subsection instruction } ;\r
 384 \r
 385 ARTICLE: { "xml" "construct" } "XML data constructors"\r
 386     "These data types are constructed with:"\r
 387     { $subsection <name> }\r
 388     { $subsection <tag> }\r
 389     { $subsection <contained-tag> }\r
 390     { $subsection <xml> }\r
 391     { $subsection <prolog> }\r
 392     { $subsection <comment> }\r
 393     { $subsection <instruction> } ;\r
 394 \r
 395 ARTICLE: { "xml" "utils" } "XML processing utilities"\r
 396     "Utilities for processing XML include..."\r
 397     $nl\r
 398     "System sfor creating words which dispatch on XML tags:"\r
 399     { $subsection POSTPONE: PROCESS: }\r
 400     { $subsection POSTPONE: TAG: }\r
 401     "Getting parts of an XML document or tag:"\r
 402     $nl\r
 403     "Note: the difference between deep-tag-named and tag-named is that the former searches recursively among all children and children of children of the tag, while the latter only looks at the direct children, and is therefore more efficient."\r
 404     { $subsection tag-named }\r
 405     { $subsection tags-named }\r
 406     { $subsection deep-tag-named }\r
 407     { $subsection deep-tags-named }\r
 408     { $subsection get-id }\r
 409     "Words for simplified generation of XML:"\r
 410     { $subsection build-tag* }\r
 411     { $subsection build-tag }\r
 412     { $subsection build-xml }\r
 413     "Other relevant words:"\r
 414     { $subsection children>string }\r
 415     { $subsection children-tags }\r
 416     { $subsection first-child-tag }\r
 417     { $subsection names-match? }\r
 418     { $subsection assert-tag } ;\r
 419 \r
 420 ARTICLE: { "xml" "internal" } "Internals of the XML parser"\r
 421     "The XML parser creates its own parsing framework to process XML documents. The parser operates on streams. Important words involved in processing are:"\r
 422     { $subsection parse-text }\r
 423     { $subsection make-tag }\r
 424     { $subsection parse-name }\r
 425     { $subsection process }\r
 426     "The XML parser is implemented using the libs/state-parser module. For more information, see " { $link { "state-parser" "main" } } ;\r
 427 \r
 428 ARTICLE: { "xml" "events" } "Event-based XML parsing"\r
 429     "In addition to DOM-style parsing based around " { $link read-xml } ", the XML module also provides SAX-style event-based parsing. This uses much of the same data structures as normal XML, with the exception of the classes " { $link xml } " and " { $link tag } " and as such, the articles " { $link { "xml" "classes" } } " and " { $link { "xml" "construct" } } " may be useful in learning how to process documents in this way. Other useful words are:"\r
 430     { $subsection sax }\r
 431     { $subsection opener }\r
 432     { $subsection closer }\r
 433     { $subsection contained }\r
 434     "There is also pull-based parsing to augment the push-parsing of SAX. This is probably easier to use and more logical. It uses the same parsing objects as the above style of parsing, except string elements are always in arrays, for example { \"\" }. Relevant pull-parsing words are:"\r
 435     { $subsection <pull-xml> }\r
 436     { $subsection pull-xml }\r
 437     { $subsection pull-event }\r
 438     { $subsection pull-elem } ;\r
 439 \r
 440 ARTICLE: { "xml" "errors" } "XML parsing errors"\r
 441     "The XML module provides a rich and highly inspectable set of parsing errors. All XML errors are described by the union class " { $link xml-parse-error } " but there are many classes contained in that:"\r
 442     { $subsection multitags }\r
 443     { $subsection notags }\r
 444     { $subsection extra-attrs }\r
 445     { $subsection nonexist-ns }\r
 446     { $subsection not-yes/no }\r
 447     { $subsection unclosed }\r
 448     { $subsection mismatched }\r
 449     { $subsection expected }\r
 450     { $subsection no-entity }\r
 451     { $subsection pre/post-content }\r
 452     "Additionally, most of these errors delegate to " { $link parsing-error } " in order to provide more information"\r
 453     $nl\r
 454     "Note that, in parsing an XML document, only the first error is reported." ;\r
 455 \r
 456 ARTICLE: { "xml" "entities" } "XML entities"\r
 457     "When XML is parsed, entities like &foo; are replaced with the characters they represent. A few entities like &amp; and &lt; are defined by default, but more are available, and the set of entities can be customized. Below are some words involved in XML entities, defined in the vocabulary 'entities':"\r
 458     { $subsection entities }\r
 459     { $subsection html-entities }\r
 460     { $subsection with-entities }\r
 461     { $subsection with-html-entities } ;\r
 462 \r
 463 ARTICLE: { "xml" "intro" } "XML"\r
 464     "The XML module attempts to implement the XML 1.1 standard, converting strings of text into XML and vice versa. It currently is a work in progress."\r
 465     $nl\r
 466     "The XML module was implemented by Daniel Ehrenberg, with contributions from the Factor community"\r
 467     { $subsection { "xml" "reading" } }\r
 468     { $subsection { "xml" "writing" } }\r
 469     { $subsection { "xml" "classes" } }\r
 470     { $subsection { "xml" "construct" } }\r
 471     { $subsection { "xml" "utils" } }\r
 472     { $subsection { "xml" "internal" } }\r
 473     { $subsection { "xml" "events" } }\r
 474     { $subsection { "xml" "errors" } }\r
 475     { $subsection { "xml" "entities" } } ;\r
 476 \r
 477 IN: xml\r
 478 \r
 479 ABOUT: { "xml" "intro" }\r