basis/xml/xml-docs.factor

   1 ! Copyright (C) 2005, 2009 Daniel Ehrenberg
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 USING: help.markup help.syntax xml.data io strings byte-arrays ;
   4 IN: xml
   5
   6 HELP: string>xml
   7 { $values { "string" string } { "xml" xml } }
   8 { $description "Converts a string into an " { $link xml }
   9     " tree for further processing." } ;
  10
  11 HELP: read-xml
  12 { $values { "stream" "an input stream" } { "xml" xml } }
  13 { $description "Exhausts the given stream, reading an XML document from it. A binary stream, one without encoding, should be used as input, and the encoding is automatically detected." } ;
  14
  15 HELP: file>xml
  16 { $values { "filename" string } { "xml" xml } }
  17 { $description "Opens the given file, reads it in as XML, closes the file and returns the corresponding XML tree. The encoding is automatically detected." } ;
  18
  19 HELP: bytes>xml
  20 { $values { "byte-array" byte-array } { "xml" xml } }
  21 { $description "Parses a byte array as an XML document. The encoding is automatically detected." } ;
  22
  23 { string>xml read-xml file>xml bytes>xml } related-words
  24
  25 HELP: read-xml-chunk
  26 { $values { "stream" "an input stream" } { "seq" "a sequence of elements" } }
  27 { $description "Rather than parse a document, as " { $link read-xml } " does, this word parses and returns a sequence of XML elements (tags, strings, etc), ie a document fragment. This is useful for pieces of XML which may have more than one main tag. The encoding is not automatically detected, and a stream with an encoding (ie. one which returns strings from " { $link read } ") should be used as input." }
  28 { $see-also read-xml } ;
  29
  30 HELP: each-element
  31 { $values { "stream" "an input stream" } { "quot" "a quotation ( xml-elem -- )" } }
  32 { $description "Parses the XML document, and whenever an event is encountered (a tag piece, comment, parsing instruction, directive or string element), the quotation is called with that event on the stack. The quotation has all responsibility to deal with the event properly. The encoding of the stream is automatically detected, so a binary input stream should be used." }
  33 { $see-also read-xml } ;
  34
  35 HELP: pull-xml
  36 { $class-description "Represents the state of a pull-parser for XML. Has one slot, " { $snippet "scope" } ", which is a namespace which contains all relevant state information." }
  37 { $see-also <pull-xml> pull-event pull-elem } ;
  38
  39 HELP: <pull-xml>
  40 { $values { "pull-xml" pull-xml } }
  41 { $description "Creates an XML pull-based parser which reads from " { $link input-stream } ", executing all initial XML commands to set up the parser." }
  42 { $see-also pull-xml pull-elem pull-event } ;
  43
  44 HELP: pull-elem
  45 { $values { "pull" "an XML pull parser" } { "xml-elem/f" "an XML tag, string, or f" } }
  46 { $description "Gets the next XML element from the given XML pull parser. Returns f upon exhaustion." }
  47 { $see-also pull-xml <pull-xml> pull-event } ;
  48
  49 HELP: pull-event
  50 { $values { "pull" "an XML pull parser" } { "xml-event/f" "an XML tag event, string, or f" } }
  51 { $description "Gets the next XML event from the given XML pull parser. Returns f upon exhaustion." }
  52 { $see-also pull-xml <pull-xml> pull-elem } ;
  53
  54 HELP: read-dtd
  55 { $values { "stream" "an input stream" } { "dtd" dtd } }
  56 { $description "Exhausts a stream, producing a " { $link dtd } " from the contents." } ;
  57
  58 HELP: file>dtd
  59 { $values { "filename" string } { "dtd" dtd } }
  60 { $description "Reads a file in UTF-8, converting it into an XML " { $link dtd } "." } ;
  61
  62 HELP: string>dtd
  63 { $values { "string" string } { "dtd" dtd } }
  64 { $description "Interprets a string as an XML " { $link dtd } "." } ;
  65
  66 { read-dtd file>dtd string>dtd } related-words
  67
  68 ARTICLE: { "xml" "reading" } "Reading XML"
  69 "The following words are used to read something into an XML document"
  70 { $subsections
  71     read-xml
  72     read-xml-chunk
  73     string>xml
  74     string>xml-chunk
  75     file>xml
  76     bytes>xml
  77 }
  78 "To read a DTD:"
  79 { $subsections
  80     read-dtd
  81     file>dtd
  82     string>dtd
  83 } ;
  84
  85 ARTICLE: { "xml" "events" } "Event-based XML parsing"
  86     "In addition to DOM-style parsing based around " { $link read-xml } ", the XML module also provides SAX-style event-based parsing. This uses much of the same data structures as normal XML, with the exception of the classes " { $link xml } " and " { $link tag } " and as such, the article " { $vocab-link "xml.data" } " may be useful in learning how to process documents in this way. Other useful words are:"
  87 { $subsections
  88     each-element
  89     opener
  90     closer
  91     contained
  92 }
  93 "There is also pull-based parsing to augment the push-parsing of SAX. This is probably easier to use and more logical. It uses the same parsing objects as the above style of parsing, except string elements are always in arrays, for example { \"\" }. Relevant pull-parsing words are:"
  94 { $subsections
  95     <pull-xml>
  96     pull-xml
  97     pull-event
  98     pull-elem
  99 } ;
 100
 101 ARTICLE: { "xml" "namespaces" } "Working with XML namespaces"
 102 "The Factor XML parser implements XML namespaces, and provides convenient utilities for working with them. Anywhere in the public API that a name is accepted as an argument, either a string or an XML name is accepted. If a string is used, it is coerced into a name by giving it a null namespace. Names are stored as " { $link name } " tuples, which have slots for the namespace prefix and namespace URL as well as the main part of the tag name." $nl
 103 "To make it easier to create XML names, the parsing word " { $snippet "XML-NS:" } " is provided in the " { $vocab-link "xml.syntax" } " vocabulary." $nl
 104 "When parsing XML, names are automatically augmented with the appropriate namespace URL when the information is available. This does not take into account any XML schema which might allow for such prefixes to be omitted. When generating XML to be written, keep in mind that the XML writer knows only about the literal prefixes and ignores the URLs. It is your job to make sure that they match up correctly, and that there is the appropriate " { $snippet "xmlns" } " declaration." ;
 105
 106 ARTICLE: "xml" "XML parser"
 107 "The " { $vocab-link "xml" } " vocabulary implements the XML 1.0 and 1.1 standards, converting strings of text into XML and vice versa. The parser checks for well-formedness but is not validating. There is only partial support for processing DTDs."
 108 { $subsections
 109     { "xml" "reading" }
 110     { "xml" "events" }
 111     { "xml" "namespaces" }
 112 }
 113 { $vocab-subsection "Writing XML" "xml.writer" }
 114 { $vocab-subsection "XML parsing errors" "xml.errors" }
 115 { $vocab-subsection "XML entities" "xml.entities" }
 116 { $vocab-subsection "XML data types" "xml.data" }
 117 { $vocab-subsection "Utilities for traversing XML" "xml.traversal" }
 118 { $vocab-subsection "Syntax extensions for XML" "xml.syntax" } ;
 119
 120 ABOUT: "xml"