Delete empty unit tests files, remove 1- and 1+, reorder IN: lines in a lot of places...

[factor.git] / basis / xml / tokenize / tokenize.factor
diff --git a/basis/xml/tokenize/tokenize.factor b/basis/xml/tokenize/tokenize.factor

index 943f4e7a157e96743c7dfc84b72591e58feae489..b0dbdf22ac83036076b8271eb0dfc3322a9c2fee 100644 (file)
--- a/basis/xml/tokenize/tokenize.factor
+++ b/basis/xml/tokenize/tokenize.factor
@@ -2,119 +2,144 @@
  ! See http://factorcode.org/license.txt for BSD license.
  USING: namespaces xml.state kernel sequences accessors
  xml.char-classes xml.errors math io sbufs fry strings ascii
-circular xml.entities assocs make splitting math.parser
-locals combinators arrays ;
+circular xml.entities assocs splitting math.parser
+locals combinators arrays hints ;
  IN: xml.tokenize
  
-: version=1.0? ( -- ? )
-    prolog-data get [ version>> "1.0" = ] [ t ] if* ;
+! * Basic utility words
  
-: assure-good-char ( ch -- ch )
+: assure-good-char ( spot ch -- )
      [
-        version=1.0? over text? not get-check and
-        [ disallowed-char ] when
-    ] [ f ] if* ;
+        swap
+        [ version-1.0?>> over text? not ]
+        [ check>> ] bi and [
+            spot get [ 1 + ] change-column drop
+            disallowed-char
+        ] [ drop ] if
+    ] [ drop ] if* ;
+
+HINTS: assure-good-char { spot fixnum } ;
+
+: record ( spot char -- spot )
+    over char>> [
+        CHAR: \n =
+        [ [ 1 + ] change-line -1 ] [ dup column>> 1 + ] if
+        >>column
+    ] [ drop ] if ;
  
-! * Basic utility words
+HINTS: record { spot fixnum } ;
  
-: record ( char -- )
-    CHAR: \n =
-    [ 0 get-line 1+ set-line ] [ get-column 1+ ] if
-    set-column ;
+:: (next) ( spot -- spot char )
+    spot next>> :> old-next
+    spot stream>> stream-read1 :> new-next
+    old-next CHAR: \r = [
+        spot CHAR: \n >>char
+        new-next CHAR: \n =
+        [ spot stream>> stream-read1 >>next ]
+        [ new-next >>next ] if
+    ] [ spot old-next >>char new-next >>next ] if
+    spot next>> ; inline
  
-! (next) normalizes \r\n and \r
-: (next) ( -- char )
-    get-next read1
-    2dup swap CHAR: \r = [
-        CHAR: \n =
-        [ nip read1 ] [ nip CHAR: \n swap ] if
-    ] [ drop ] if
-    set-next dup set-char assure-good-char ;
+: next* ( spot -- )
+    dup char>> [ unexpected-end ] unless
+    (next) [ record ] keep assure-good-char ;
+
+HINTS: next* { spot } ;
  
  : next ( -- )
-    #! Increment spot.
-    get-char [ unexpected-end ] unless (next) record ;
+    spot get next* ;
  
  : init-parser ( -- )
-    0 1 0 f f <spot> spot set
+    0 1 0 0 f t f <spot>
+        input-stream get >>stream
+    spot set
      read1 set-next next ;
  
  : with-state ( stream quot -- )
      ! with-input-stream implicitly creates a new scope which we use
      swap [ init-parser call ] with-input-stream ; inline
  
+:: (skip-until) ( quot: ( -- ? ) spot -- )
+    spot char>> [
+        quot call [
+            spot next* quot spot (skip-until)
+        ] unless
+    ] when ; inline recursive
+
  : skip-until ( quot: ( -- ? ) -- )
-    get-char [
-        [ call ] keep swap [ drop ] [
-            next skip-until
-        ] if
-    ] [ drop ] if ; inline recursive
+    spot get (skip-until) ; inline
  
  : take-until ( quot -- string )
      #! Take the substring of a string starting at spot
      #! from code until the quotation given is true and
      #! advance spot to after the substring.
      10 <sbuf> [
-        '[ @ [ t ] [ get-char _ push f ] if ] skip-until
+        spot get swap
+        '[ @ [ t ] [ _ char>> _ push f ] if ] skip-until
      ] keep >string ; inline
  
-: take-char ( ch -- string )
-    [ dup get-char = ] take-until nip ;
+: take-to ( seq -- string )
+    spot get swap '[ _ char>> _ member? ] take-until ;
  
  : pass-blank ( -- )
      #! Advance code past any whitespace, including newlines
-    [ get-char blank? not ] skip-until ;
+    spot get '[ _ char>> blank? not ] skip-until ;
  
-: string-matches? ( string circular -- ? )
-    get-char over push-circular
-    sequence= ;
+: string-matches? ( string circular spot -- ? )
+    char>> over push-circular sequence= ;
  
  : take-string ( match -- string )
      dup length <circular-string>
-    [ 2dup string-matches? ] take-until nip
-    dup length rot length 1- - head
+    spot get '[ 2dup _ string-matches? ] take-until nip
+    dup length rot length 1 - - head
      get-char [ missing-close ] unless next ;
  
-: expect ( ch -- )
-    get-char 2dup = [ 2drop ] [
-        [ 1string ] bi@ expected
-    ] if next ;
+: expect ( string -- )
+    dup spot get '[ _ [ char>> ] keep next* ] replicate
+    2dup = [ 2drop ] [ expected ] if ;
  
-: expect-string ( string -- )
-    dup [ get-char next ] replicate 2dup =
-    [ 2drop ] [ expected ] if ;
+! Suddenly XML-specific
  
-: parse-named-entity ( string -- )
-    dup entities at [ , ] [
+: parse-named-entity ( accum string -- )
+    dup entities at [ swap push ] [
          dup extra-entities get at
-        [ % ] [ no-entity ] ?if
+        [ swap push-all ] [ no-entity ] ?if
      ] ?if ;
  
-: parse-entity ( -- )
-    next CHAR: ; take-char next
-    "#" ?head [
-        "x" ?head 16 10 ? base> ,
-    ] [ parse-named-entity ] if ;
+: take-; ( -- string )
+    next ";" take-to next ;
  
-SYMBOL: pe-table
-SYMBOL: in-dtd?
+: parse-entity ( accum -- )
+    take-; "#" ?head [
+        "x" ?head 16 10 ? base> swap push
+    ] [ parse-named-entity ] if ;
  
-: parse-pe ( -- )
-    next CHAR: ; take-char dup next
-    pe-table get at [ % ] [ no-entity ] ?if ;
+: parse-pe ( accum -- )
+    take-; dup pe-table get at
+    [ swap push-all ] [ no-entity ] ?if ;
  
-:: (parse-char) ( quot: ( ch -- ? ) -- )
-    get-char :> char
+:: (parse-char) ( quot: ( ch -- ? ) accum spot -- )
+    spot char>> :> char
      {
          { [ char not ] [ ] }
-        { [ char quot call ] [ next ] }
-        { [ char CHAR: & = ] [ parse-entity quot (parse-char) ] }
-        { [ in-dtd? get char CHAR: % = and ] [ parse-pe quot (parse-char) ] }
-        [ char , next quot (parse-char) ]
+        { [ char quot call ] [ spot next* ] }
+        { [ char CHAR: & = ] [
+            accum parse-entity
+            quot accum spot (parse-char)
+        ] }
+        { [ in-dtd? get char CHAR: % = and ] [
+            accum parse-pe
+            quot accum spot (parse-char)
+        ] }
+        [
+            char accum push
+            spot next*
+            quot accum spot (parse-char)
+        ]
      } cond ; inline recursive
  
  : parse-char ( quot: ( ch -- ? ) -- seq )
-    [ (parse-char) ] "" make ; inline
+    1024 <sbuf> [ spot get (parse-char) ] keep >string ; inline
  
  : assure-no-]]> ( circular -- )
      "]]>" sequence= [ text-w/]]> ] when ;
@@ -131,7 +156,7 @@ SYMBOL: in-dtd?
      ] parse-char ;
  
  : close ( -- )
-    pass-blank CHAR: > expect ;
+    pass-blank ">" expect ;
  
  : normalize-quote ( str -- str )
      [ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ;