factor: trim using lists

[factor.git] / basis / xml / tokenize / tokenize.factor
diff --git a/basis/xml/tokenize/tokenize.factor b/basis/xml/tokenize/tokenize.factor

index ebabd2c893ec7f2bf282e648a6b5e20d89d0b07d..77f5f8d917d5d16a53383b81a8325149ceb0c0f9 100644 (file)
--- a/basis/xml/tokenize/tokenize.factor
+++ b/basis/xml/tokenize/tokenize.factor
@@ -1,9 +1,9 @@
  ! Copyright (C) 2005, 2009 Daniel Ehrenberg
  ! See http://factorcode.org/license.txt for BSD license.
-USING: namespaces xml.state kernel sequences accessors
-xml.char-classes xml.errors math io sbufs fry strings ascii
-xml.entities assocs splitting math.parser
-locals combinators combinators.short-circuit arrays hints ;
+USING: accessors ascii assocs combinators
+combinators.short-circuit hints io kernel math math.parser
+namespaces sbufs sequences splitting strings xml.char-classes
+xml.entities xml.errors xml.state ;
  IN: xml.tokenize
  
  ! * Basic utility words
@@ -72,30 +72,38 @@ HINTS: next* { spot } ;
      spot get (skip-until) ; inline
  
  : take-until ( ... quot: ( ... char -- ... ? ) -- ... string )
-    #! Take the substring of a string starting at spot
-    #! from code until the quotation given is true and
-    #! advance spot to after the substring.
+    ! Take the substring of a string starting at spot
+    ! from code until the quotation given is true and
+    ! advance spot to after the substring.
     10 <sbuf> [
         '[ _ keep over [ drop ] [ _ push ] if ] skip-until
-   ] keep >string ; inline
+   ] keep "" like ; inline
  
  : take-to ( seq -- string )
-    '[ _ member? ] take-until ;
+    '[ _ member? ] take-until ; inline
  
  : pass-blank ( -- )
-    #! Advance code past any whitespace, including newlines
+    ! Advance code past any whitespace, including newlines
      [ blank? not ] skip-until ;
  
  : next-matching ( pos ch str -- pos' )
-    [ over ] dip nth eq? [ 1 + ] [ drop 0 ] if ;
+    overd nth eq? [ 1 + ] [ drop 0 ] if ; inline
  
  : string-matcher ( str -- quot: ( pos char -- pos ? ) )
      dup length 1 - '[ _ next-matching dup _ > ] ; inline
  
+:: (take-string) ( match spot -- sbuf matched? )
+    10 <sbuf> f [
+        spot char>> [
+            nip over push
+            spot next*
+            dup match tail? dup not
+        ] [ f ] if*
+    ] loop ; inline
+
  : take-string ( match -- string )
-    [ 0 swap string-matcher take-until nip ] keep
-    dupd [ length ] bi@ 1 - - head
-    get-char [ missing-close ] unless next ;
+    [ spot get (take-string) [ missing-close ] unless ]
+    [ dupd [ length ] bi@ - over shorten "" like ] bi ;
  
  : expect ( string -- )
      dup length spot get '[ _ [ char>> ] keep next* ] "" replicate-as
@@ -130,7 +138,7 @@ HINTS: next* { spot } ;
              accum parse-entity
              quot accum spot (parse-char)
          ] }
-        { [ char CHAR: % eq? in-dtd? get and ] [
+        { [ char CHAR: % eq? [ in-dtd? get ] [ f ] if ] [
              accum parse-pe
              quot accum spot (parse-char)
          ] }
@@ -142,14 +150,14 @@ HINTS: next* { spot } ;
      } cond ; inline recursive
  
  : parse-char ( quot: ( ch -- ? ) -- seq )
-    1024 <sbuf> [ spot get (parse-char) ] keep >string ; inline
+    512 <sbuf> [ spot get (parse-char) ] keep "" like ; inline
  
  : assure-no-]]> ( pos char -- pos' )
-    "]]>" next-matching dup 2 > [ text-w/]]> ] when ;
+    "]]>" next-matching dup 2 > [ text-w/]]> ] when ; inline
  
  :: parse-text ( -- string )
-    0 :> pos!
      depth get zero? :> no-text
+    0 :> pos!
      [| char |
          pos char assure-no-]]> pos!
          no-text [
@@ -164,7 +172,7 @@ HINTS: next* { spot } ;
      pass-blank ">" expect ;
  
  : normalize-quote ( str -- str )
-    [ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ;
+    [ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map! ;
  
  : (parse-quote) ( <-disallowed? ch -- string )
      swap '[
@@ -179,4 +187,3 @@ HINTS: next* { spot } ;
  
  : parse-quote ( -- seq )
     f parse-quote* ;
-