'[ _ string>xml ] swap '[ _ = ] must-fail-with ;
T{ no-entity f 1 10 "nbsp" } "<x> </x>" xml-error-test
-T{ mismatched f 1 8 T{ name f "" "x" "" } T{ name f "" "y" "" } }
+T{ mismatched f 1 7 T{ name f "" "x" "" } T{ name f "" "y" "" } }
"<x></y>" xml-error-test
-T{ unclosed f 1 4 V{ T{ name f "" "x" "" } } } "<x>" xml-error-test
+T{ unclosed f 1 3 V{ T{ name f "" "x" "" } } } "<x>" xml-error-test
T{ nonexist-ns f 1 5 "x" } "<x:y/>" xml-error-test
-T{ unopened f 1 5 } "</x>" xml-error-test
+T{ unopened f 1 4 } "</x>" xml-error-test
T{ not-yes/no f 1 41 "maybe" }
"<?xml version='1.0' standalone='maybe'?><x/>" xml-error-test
T{ extra-attrs f 1 32 V{ T{ name f "" "foo" f } }
"<?xml version='5 million'?><x/>" xml-error-test
T{ notags f } "" xml-error-test
T{ multitags } "<x/><y/>" xml-error-test
-T{ bad-prolog f 1 26 T{ prolog f "1.0" "UTF-8" f } }
+T{ bad-prolog f 1 25 T{ prolog f "1.0" "UTF-8" f } }
"<x/><?xml version='1.0'?>" xml-error-test
T{ capitalized-prolog f 1 6 "XmL" } "<?XmL version='1.0'?><x/>"
xml-error-test
T{ pre/post-content f "x" t } "x<y/>" xml-error-test
T{ versionless-prolog f 1 8 } "<?xml?><x/>" xml-error-test
-T{ unclosed-quote f 1 13 } "<x value='/>" xml-error-test
+T{ unclosed-quote f 1 12 } "<x value='/>" xml-error-test
T{ bad-name f 1 3 "-" } "<-/>" xml-error-test
T{ quoteless-attr f 1 12 } "<x value=<->/>" xml-error-test
T{ quoteless-attr f 1 10 } "<x value=3/>" xml-error-test
T{ pre/post-content f "&" t } " <x/>" xml-error-test
T{ bad-doctype f 1 17 "a" } "<!DOCTYPE foo [ a ]><x/>" xml-error-test
T{ bad-doctype f 1 22 T{ opener { name T{ name f "" "foo" "" } } { attrs T{ attrs } } } } "<!DOCTYPE foo [ <foo> ]><x/>" xml-error-test
-T{ disallowed-char f 1 3 1 } "<x>\u000001</x>" xml-error-test
-T{ missing-close f 1 9 } "<!-- foo" xml-error-test
+T{ disallowed-char f 1 4 1 } "<x>\u000001</x>" xml-error-test
+T{ missing-close f 1 8 } "<!-- foo" xml-error-test
T{ misplaced-directive f 1 9 "ENTITY" } "<!ENTITY foo 'bar'><x/>" xml-error-test
USING: namespaces xml.state kernel sequences accessors
xml.char-classes xml.errors math io sbufs fry strings ascii
circular xml.entities assocs make splitting math.parser
-locals combinators arrays ;
+locals combinators arrays hints ;
IN: xml.tokenize
-: assure-good-char ( ch -- ch )
+! * Basic utility words
+
+: assure-good-char ( spot ch -- )
[
- version-1.0? over text? not get-check and
- [ disallowed-char ] when
- ] [ f ] if* ;
+ swap
+ [ version-1.0?>> over text? not ]
+ [ check>> ] bi and [
+ spot get [ 1+ ] change-column drop
+ disallowed-char
+ ] [ drop ] if
+ ] [ drop ] if* ;
+
+HINTS: assure-good-char { spot fixnum } ;
+
+: record ( spot char -- spot )
+ over char>> [
+ CHAR: \n =
+ [ [ 1+ ] change-line -1 ] [ dup column>> 1+ ] if
+ >>column
+ ] [ drop ] if ;
-! * Basic utility words
+HINTS: record { spot fixnum } ;
-: record ( char -- )
- CHAR: \n =
- [ 0 get-line 1+ set-line ] [ get-column 1+ ] if
- set-column ;
+:: (next) ( spot -- spot char )
+ spot next>> :> old-next
+ read1 :> new-next
+ old-next CHAR: \r = [
+ spot CHAR: \n >>char
+ new-next CHAR: \n =
+ [ read1 >>next ]
+ [ new-next >>next ] if
+ ] [ spot old-next >>char new-next >>next ] if
+ spot next>> ; inline
-! (next) normalizes \r\n and \r
-: (next) ( -- char )
- get-next read1
- 2dup swap CHAR: \r = [
- CHAR: \n =
- [ nip read1 ] [ nip CHAR: \n swap ] if
- ] [ drop ] if
- set-next dup set-char assure-good-char ;
+: next* ( spot -- )
+ dup char>> [ unexpected-end ] unless
+ (next) [ record ] keep assure-good-char ;
+
+HINTS: next* { spot } ;
: next ( -- )
- #! Increment spot.
- get-char [ unexpected-end ] unless (next) record ;
+ spot get next* ;
: init-parser ( -- )
- 0 1 0 f f t <spot> spot set
+ 0 1 0 0 f t <spot> spot set
read1 set-next next ;
: with-state ( stream quot -- )
! with-input-stream implicitly creates a new scope which we use
swap [ init-parser call ] with-input-stream ; inline
+:: (skip-until) ( quot: ( -- ? ) spot -- )
+ spot char>> [
+ quot call [
+ spot next* quot spot (skip-until)
+ ] unless
+ ] when ; inline recursive
+
: skip-until ( quot: ( -- ? ) -- )
- get-char [
- [ call ] keep swap [ drop ] [
- next skip-until
- ] if
- ] [ drop ] if ; inline recursive
+ spot get (skip-until) ; inline
: take-until ( quot -- string )
#! Take the substring of a string starting at spot
#! from code until the quotation given is true and
#! advance spot to after the substring.
10 <sbuf> [
- '[ @ [ t ] [ get-char _ push f ] if ] skip-until
+ spot get swap
+ '[ @ [ t ] [ _ char>> _ push f ] if ] skip-until
] keep >string ; inline
: take-to ( seq -- string )
- '[ get-char _ member? ] take-until ;
+ spot get swap '[ _ char>> _ member? ] take-until ;
: pass-blank ( -- )
#! Advance code past any whitespace, including newlines
- [ get-char blank? not ] skip-until ;
+ spot get '[ _ char>> blank? not ] skip-until ;
-: string-matches? ( string circular -- ? )
- get-char over push-circular
- sequence= ;
+: string-matches? ( string circular spot -- ? )
+ char>> over push-circular sequence= ;
: take-string ( match -- string )
dup length <circular-string>
- [ 2dup string-matches? ] take-until nip
+ spot get '[ 2dup _ string-matches? ] take-until nip
dup length rot length 1- - head
get-char [ missing-close ] unless next ;
: expect ( string -- )
- dup [ get-char next ] replicate 2dup =
- [ 2drop ] [ expected ] if ;
+ dup spot get '[ _ [ char>> ] keep next* ] replicate
+ 2dup = [ 2drop ] [ expected ] if ;
! Suddenly XML-specific
-: parse-named-entity ( string -- )
- dup entities at [ , ] [
+: parse-named-entity ( accum string -- )
+ dup entities at [ swap push ] [
dup extra-entities get at
- [ % ] [ no-entity ] ?if
+ [ swap push-all ] [ no-entity ] ?if
] ?if ;
: take-; ( -- string )
next ";" take-to next ;
-: parse-entity ( -- )
+: parse-entity ( accum -- )
take-; "#" ?head [
- "x" ?head 16 10 ? base> ,
+ "x" ?head 16 10 ? base> swap push
] [ parse-named-entity ] if ;
-: parse-pe ( -- )
+: parse-pe ( accum -- )
take-; dup pe-table get at
- [ % ] [ no-entity ] ?if ;
+ [ swap push-all ] [ no-entity ] ?if ;
-:: (parse-char) ( quot: ( ch -- ? ) -- )
+:: (parse-char) ( quot: ( ch -- ? ) accum -- )
get-char :> char
{
{ [ char not ] [ ] }
{ [ char quot call ] [ next ] }
- { [ char CHAR: & = ] [ parse-entity quot (parse-char) ] }
- { [ in-dtd? get char CHAR: % = and ] [ parse-pe quot (parse-char) ] }
- [ char , next quot (parse-char) ]
+ { [ char CHAR: & = ] [ accum parse-entity quot accum (parse-char) ] }
+ { [ in-dtd? get char CHAR: % = and ] [ accum parse-pe quot accum (parse-char) ] }
+ [ char accum push next quot accum (parse-char) ]
} cond ; inline recursive
: parse-char ( quot: ( ch -- ? ) -- seq )
- [ (parse-char) ] "" make ; inline
+ 1024 <sbuf> [ (parse-char) ] keep >string ; inline
: assure-no-]]> ( circular -- )
"]]>" sequence= [ text-w/]]> ] when ;