: start-utf16le ( -- tag )
utf16le decode-input-if
- CHAR: ? expect
- 0 expect check instruct ;
+ "?\0" expect
+ check instruct ;
: 10xxxxxx? ( ch -- ? )
-6 shift 3 bitand 2 = ;
: skip-utf8-bom ( -- tag )
"\u0000bb\u0000bf" expect utf8 decode-input
- CHAR: < expect check make-tag ;
+ "<" expect check make-tag ;
: decode-expecting ( encoding string -- tag )
- [ decode-input-if next ] [ expect-string ] bi* check make-tag ;
+ [ decode-input-if next ] [ expect ] bi* check make-tag ;
: start-utf16be ( -- tag )
utf16be "<" decode-expecting ;
HELP: <simple-name>
{ $values { "string" string } { "name" name } }
{ $description "Converts a string into an XML name with an empty prefix and URL." } ;
+
+HELP: element-decl
+{ $class-description "Describes the class of element declarations, like <!ELEMENT greeting (#PCDATA)>." } ;
+
+HELP: <element-decl>
+{ $values { "name" name } { "content-spec" string } { "element-decl" entity-decl } }
+{ $description "Creates an element declaration object, of the class " { $link element-decl } } ;
+
+HELP: attlist-decl
+{ $class-description "Describes the class of element declarations, like <!ATTLIST pre xml:space (preserve) #FIXED 'preserve'>." } ;
+
+HELP: <attlist-decl>
+{ $values { "name" name } { "att-defs" string } { "attlist-decl" attlist-decl } }
+{ $description "Creates an element declaration object, of the class " { $link attlist-decl } } ;
+
+HELP: entity-decl
+{ $class-description "Describes the class of element declarations, like <!ENTITY foo 'bar'>." } ;
+
+HELP: <entity-decl>
+{ $values { "name" name } { "def" string } { "pe?" "t or f" } { "entity-decl" entity-decl } }
+{ $description "Creates an entity declaration object, of the class " { $link entity-decl } ". The pe? slot should be t if the object is a DTD-internal entity, like <!ENTITY % foo 'bar'> and f if the object is like <!ENTITY foo 'bar'>, that is, it can be used outside of the DTD." } ;
+
+HELP: system-id
+{ $class-description "Describes the class of system identifiers within an XML DTD directive, such as <!DOCTYPE greeting " { $emphasis "SYSTEM 'hello.dtd'" } ">" } ;
+
+HELP: <system-id>
+{ $values { "system-literal" string } { "system-id" system-id } }
+{ $description "Constructs a " { $link system-id } " tuple." } ;
+
+HELP: public-id
+{ $class-description "Describes the class of public identifiers within an XML DTD directive, such as <!DOCTYPE open-hatch " { $emphasis "PUBLIC '-//Textuality//TEXT Standard open-hatch boilerplate//EN' 'http://www.textuality.com/boilerplate/OpenHatch.xml'" } ">" } ;
+
+HELP: <public-id>
+{ $values { "pubid-literal" string } { "system-literal" string } { "public-id" public-id } }
+{ $description "Constructs a " { $link system-id } " tuple." } ;
+
+HELP: notation-decl
+{ $class-description "Describes the class of element declarations, like <!NOTATION jpg SYSTEM './jpgviewer'>." } ;
+
+HELP: <notation-decl>
+{ $values { "name" string } { "id" id } { "notation-decl" notation-decl } }
+{ $description "Creates an notation declaration object, of the class " { $link notation-decl } "." } ;
+
+HELP: doctype-decl
+{ $class-description "Describes the class of doctype declarations." } ;
+
+HELP: <doctype-decl>
+{ $values { "name" name } { "external-id" id } { "internal-subset" sequence } { "doctype-decl" doctype-decl } }
+{ $description "Creates a new doctype declaration object, of the class " { $link doctype-decl } ". Only one of external-id or internal-subset will be non-null." } ;
TUPLE: directive ;
TUPLE: element-decl < directive
- { name string } { content-spec string } ;
+ { name string }
+ { content-spec string } ;
C: <element-decl> element-decl
TUPLE: attlist-decl < directive
- { name string } { att-defs string } ;
+ { name string }
+ { att-defs string } ;
C: <attlist-decl> attlist-decl
UNION: boolean t POSTPONE: f ;
UNION: id system-id public-id POSTPONE: f ;
+TUPLE: dtd
+ { directives sequence }
+ { entities assoc }
+ { parameter-entities assoc } ;
+C: <dtd> dtd
+
+UNION: dtd/f dtd POSTPONE: f ;
+
TUPLE: doctype-decl < directive
{ name string }
{ external-id id }
- { internal-subset sequence } ;
+ { internal-subset dtd/f } ;
C: <doctype-decl> doctype-decl
-TUPLE: notation-decl < directive name id ;
+TUPLE: notation-decl < directive
+ { name string }
+ { id string } ;
C: <notation-decl> notation-decl
TUPLE: instruction { text string } ;
! See http://factorcode.org/license.txt for BSD license.
USING: xml.tokenize xml.data xml.state kernel sequences ascii
fry xml.errors combinators hashtables namespaces xml.entities
-strings ;
+strings xml.name ;
IN: xml.dtd
-: take-word ( -- string )
- [ get-char blank? ] take-until ;
-
: take-decl-contents ( -- first second )
pass-blank take-word pass-blank ">" take-string ;
: take-notation-decl ( -- notation-decl )
take-decl-contents <notation-decl> ;
-: take-until-one-of ( seps -- str sep )
- '[ get-char _ member? ] take-until get-char ;
-
-: take-system-id ( -- system-id )
- parse-quote <system-id> close ;
-
-: take-public-id ( -- public-id )
- parse-quote parse-quote <public-id> close ;
-
UNION: dtd-acceptable
directive comment instruction ;
-: (take-external-id) ( token -- external-id )
- pass-blank {
- { "SYSTEM" [ take-system-id ] }
- { "PUBLIC" [ take-public-id ] }
- [ bad-external-id ]
- } case ;
-
-: take-external-id ( -- external-id )
- take-word (take-external-id) ;
-
-: only-blanks ( str -- )
- [ blank? ] all? [ bad-decl ] unless ;
: take-entity-def ( var -- entity-name entity-def )
[
take-word pass-blank get-char {
{ CHAR: ' [ parse-quote ] }
{ CHAR: " [ parse-quote ] }
- [ drop take-external-id ]
+ [ drop take-external-id close ]
} case
] dip '[ swap _ [ ?set-at ] change ] 2keep ;
USING: kernel namespaces xml.tokenize xml.state xml.name
xml.data accessors arrays make xml.char-classes fry assocs sequences
math xml.errors sets combinators io.encodings io.encodings.iana
-unicode.case xml.dtd strings ;
+unicode.case xml.dtd strings xml.entities ;
IN: xml.elements
: parse-attr ( -- )
- parse-name pass-blank CHAR: = expect pass-blank
+ parse-name pass-blank "=" expect pass-blank
t parse-quote* 2array , ;
: start-tag ( -- name ? )
: end-tag ( name attrs-alist -- tag )
tag-ns pass-blank get-char CHAR: / =
- [ pop-ns <contained> next CHAR: > expect ]
+ [ pop-ns <contained> next ">" expect ]
[ depth inc <opener> close ] if ;
: take-comment ( -- comment )
- "--" expect-string
+ "--" expect
"--" take-string
<comment>
- CHAR: > expect ;
+ ">" expect ;
: assure-no-extra ( seq -- )
[ first ] map {
string-input? get [ drop ] [ decode-input ] if ;
: parse-prolog ( -- prolog )
- pass-blank middle-tag "?>" expect-string
+ pass-blank middle-tag "?>" expect
dup assure-no-extra prolog-attrs
dup encoding>> dup "UTF-16" =
[ drop ] [ name>encoding [ decode-input-if ] when* ] if
: take-cdata ( -- string )
depth get zero? [ bad-cdata ] when
- "[CDATA[" expect-string "]]>" take-string ;
+ "[CDATA[" expect "]]>" take-string ;
DEFER: make-tag ! Is this unavoidable?
: expand-pe ( -- ) ; ! Make this run the contents of the pe within a DOCTYPE
-: (take-internal-subset) ( -- )
+: dtd-loop ( -- )
pass-blank get-char {
{ CHAR: ] [ next ] }
{ CHAR: % [ expand-pe ] }
{ CHAR: < [
next make-tag dup dtd-acceptable?
- [ bad-doctype ] unless , (take-internal-subset)
+ [ bad-doctype ] unless , dtd-loop
] }
+ { f [ ] }
[ 1string bad-doctype ]
} case ;
-: take-internal-subset ( -- seq )
+: take-internal-subset ( -- dtd )
[
- H{ } pe-table set
+ H{ } clone pe-table set
t in-dtd? set
- (take-internal-subset)
- ] { } make ;
-
-: nontrivial-doctype ( -- external-id internal-subset )
- pass-blank get-char CHAR: [ = [
- next take-internal-subset f swap close
- ] [
- " >" take-until-one-of {
- { CHAR: \s [ (take-external-id) ] }
- { CHAR: > [ only-blanks f ] }
- } case f
- ] if ;
+ dtd-loop
+ pe-table get
+ ] { } make swap extra-entities get swap <dtd> ;
+
+: take-optional-id ( -- id/f )
+ get-char "SP" member?
+ [ take-external-id ] [ f ] if ;
+
+: take-internal ( -- dtd/f )
+ get-char CHAR: [ =
+ [ next take-internal-subset ] [ f ] if ;
: take-doctype-decl ( -- doctype-decl )
- pass-blank " >" take-until-one-of {
- { CHAR: \s [ nontrivial-doctype ] }
- { CHAR: > [ f f ] }
- } case <doctype-decl> ;
+ pass-blank take-name
+ pass-blank take-optional-id
+ pass-blank take-internal
+ <doctype-decl> close ;
: take-directive ( -- doctype )
take-name dup "DOCTYPE" =
! Copyright (C) 2005, 2006 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: namespaces make kernel assocs sequences fry values
-io.files io.encodings.binary ;
+io.files io.encodings.binary xml.state ;
IN: xml.entities
: entities-out
{ "quot" CHAR: " }
} ;
-SYMBOL: extra-entities
-
: with-entities ( entities quot -- )
[ swap extra-entities set call ] with-scope ; inline
! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: assocs io.encodings.binary io.files kernel namespaces sequences
-values xml xml.entities ;
+values xml xml.entities accessors xml.state ;
IN: xml.entities.html
VALUE: html-entities
: read-entities-file ( file -- table )
- file>dtd nip ;
+ file>dtd entities>> ;
: get-html ( -- table )
{ "lat1" "special" "symbol" } [
! See http://factorcode.org/license.txt for BSD license.
USING: kernel namespaces accessors xml.tokenize xml.data assocs
xml.errors xml.char-classes combinators.short-circuit splitting
-fry xml.state sequences ;
+fry xml.state sequences combinators ascii ;
IN: xml.name
! XML namespace processing: ns = namespace
: parse-name-starting ( string -- name )
take-name append interpret-name ;
+: take-system-id ( -- system-id )
+ parse-quote <system-id> ;
+
+: take-public-id ( -- public-id )
+ parse-quote parse-quote <public-id> ;
+
+: (take-external-id) ( token -- external-id )
+ pass-blank {
+ { "SYSTEM" [ take-system-id ] }
+ { "PUBLIC" [ take-public-id ] }
+ [ bad-external-id ]
+ } case ;
+
+: take-word ( -- string )
+ [ get-char blank? ] take-until ;
+
+: take-external-id ( -- external-id )
+ take-word (take-external-id) ;
SYMBOL: prolog-data
SYMBOL: depth
+
+SYMBOL: interpolating?
+
+SYMBOL: in-dtd?
+
+SYMBOL: pe-table
+
+SYMBOL: extra-entities
[ "foo" ] [ "<boo><![CDATA[foo]]></boo>" string>xml children>string ] unit-test
[ "<!-- B+, B, or B--->" string>xml ] must-fail
[ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
-[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>dtd drop second ] unit-test
-[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>dtd drop second ] unit-test
-[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>dtd drop second ] unit-test
-[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>dtd drop second ] unit-test
+[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>dtd directives>> first ] unit-test
+[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>dtd directives>> first ] unit-test
+[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>dtd directives>> first ] unit-test
+[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>dtd directives>> first ] unit-test
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
-USING: xml io.encodings.utf8 io.files kernel tools.test ;
+USING: xml xml.data kernel tools.test ;
IN: xml.tests
-[ ] [
- "resource:basis/xmode/xmode.dtd" file>dtd 2drop
+[ t ] [
+ "resource:basis/xmode/xmode.dtd" file>dtd dtd?
] unit-test
dup length rot length 1- - head
get-char [ missing-close ] unless next ;
-: expect ( ch -- )
- get-char 2dup = [ 2drop ] [
- [ 1string ] bi@ expected
- ] if next ;
-
-: expect-string ( string -- )
+: expect ( string -- )
dup [ get-char next ] replicate 2dup =
[ 2drop ] [ expected ] if ;
"x" ?head 16 10 ? base> ,
] [ parse-named-entity ] if ;
-SYMBOL: pe-table
-SYMBOL: in-dtd?
-
: parse-pe ( -- )
next CHAR: ; take-char dup next
pe-table get at [ % ] [ no-entity ] ?if ;
] parse-char ;
: close ( -- )
- pass-blank CHAR: > expect ;
+ pass-blank ">" expect ;
: normalize-quote ( str -- str )
[ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ;
[ pubid-literal>> write "' '" write ]\r
[ system-literal>> write "'" write ] bi ;\r
\r
-: write-internal-subset ( seq -- )\r
+: write-internal-subset ( dtd -- )\r
[\r
"[" write indent\r
- [ ?indent write-xml-chunk ] each\r
+ directives>> [ ?indent write-xml-chunk ] each\r
unindent ?indent "]" write\r
] when* ;\r
\r
: file>xml ( filename -- xml )
binary <file-reader> read-xml ;
-: (read-dtd) ( -- dtd )
- ! should filter out blanks, throw error on non-dtd stuff
- V{ } clone dup [ push ] curry sax-loop ;
-
-: read-dtd ( stream -- dtd entities )
+: read-dtd ( stream -- dtd )
[
- t in-dtd? set
reset-prolog
H{ } clone extra-entities set
- (read-dtd)
- extra-entities get
+ take-internal-subset
] with-state ;
-: file>dtd ( filename -- dtd entities )
+: file>dtd ( filename -- dtd )
utf8 <file-reader> read-dtd ;
-: string>dtd ( string -- dtd entities )
+: string>dtd ( string -- dtd )
<string-reader> read-dtd ;