! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
-USING: namespaces xml.state kernel sequences accessors
-xml.char-classes xml.errors math io sbufs fry strings ascii
-xml.entities assocs splitting math.parser
-locals combinators combinators.short-circuit arrays hints ;
+USING: accessors ascii assocs combinators
+combinators.short-circuit hints io kernel math math.parser
+namespaces sbufs sequences splitting strings xml.char-classes
+xml.entities xml.errors xml.state ;
IN: xml.tokenize
! * Basic utility words
spot get (skip-until) ; inline
: take-until ( ... quot: ( ... char -- ... ? ) -- ... string )
- #! Take the substring of a string starting at spot
- #! from code until the quotation given is true and
- #! advance spot to after the substring.
+ ! Take the substring of a string starting at spot
+ ! from code until the quotation given is true and
+ ! advance spot to after the substring.
10 <sbuf> [
'[ _ keep over [ drop ] [ _ push ] if ] skip-until
- ] keep >string ; inline
+ ] keep "" like ; inline
: take-to ( seq -- string )
- '[ _ member? ] take-until ;
+ '[ _ member? ] take-until ; inline
: pass-blank ( -- )
- #! Advance code past any whitespace, including newlines
+ ! Advance code past any whitespace, including newlines
[ blank? not ] skip-until ;
: next-matching ( pos ch str -- pos' )
- [ over ] dip nth eq? [ 1 + ] [ drop 0 ] if ;
+ overd nth eq? [ 1 + ] [ drop 0 ] if ; inline
: string-matcher ( str -- quot: ( pos char -- pos ? ) )
dup length 1 - '[ _ next-matching dup _ > ] ; inline
+:: (take-string) ( match spot -- sbuf matched? )
+ 10 <sbuf> f [
+ spot char>> [
+ nip over push
+ spot next*
+ dup match tail? dup not
+ ] [ f ] if*
+ ] loop ; inline
+
: take-string ( match -- string )
- [ 0 swap string-matcher take-until nip ] keep
- dupd [ length ] bi@ 1 - - head
- get-char [ missing-close ] unless next ;
+ [ spot get (take-string) [ missing-close ] unless ]
+ [ dupd [ length ] bi@ - over shorten "" like ] bi ;
: expect ( string -- )
dup length spot get '[ _ [ char>> ] keep next* ] "" replicate-as
accum parse-entity
quot accum spot (parse-char)
] }
- { [ char CHAR: % eq? in-dtd? get and ] [
+ { [ char CHAR: % eq? [ in-dtd? get ] [ f ] if ] [
accum parse-pe
quot accum spot (parse-char)
] }
} cond ; inline recursive
: parse-char ( quot: ( ch -- ? ) -- seq )
- 1024 <sbuf> [ spot get (parse-char) ] keep >string ; inline
+ 512 <sbuf> [ spot get (parse-char) ] keep "" like ; inline
: assure-no-]]> ( pos char -- pos' )
- "]]>" next-matching dup 2 > [ text-w/]]> ] when ;
+ "]]>" next-matching dup 2 > [ text-w/]]> ] when ; inline
:: parse-text ( -- string )
- 0 :> pos!
depth get zero? :> no-text
+ 0 :> pos!
[| char |
pos char assure-no-]]> pos!
no-text [
pass-blank ">" expect ;
: normalize-quote ( str -- str )
- [ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ;
+ [ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map! ;
: (parse-quote) ( <-disallowed? ch -- string )
swap '[
: parse-quote ( -- seq )
f parse-quote* ;
-