xml: 25% (or more) faster.

author John Benediktsson <mrjbq7@gmail.com>

Fri, 30 Sep 2011 19:47:38 +0000 (12:47 -0700)

committer John Benediktsson <mrjbq7@gmail.com>

Fri, 30 Sep 2011 19:47:38 +0000 (12:47 -0700)
author John Benediktsson <mrjbq7@gmail.com>
Fri, 30 Sep 2011 19:47:38 +0000 (12:47 -0700)
committer John Benediktsson <mrjbq7@gmail.com>
Fri, 30 Sep 2011 19:47:38 +0000 (12:47 -0700)
diff --git a/basis/xml/autoencoding/autoencoding.factor b/basis/xml/autoencoding/autoencoding.factor

index 63482ff706f12097aa6972c82d5ccd1f57476fb4..1057463d4f9b444cfe2b1d6f8a68a8d3f9ecce00 100644 (file)
--- a/basis/xml/autoencoding/autoencoding.factor
+++ b/basis/xml/autoencoding/autoencoding.factor
@@ -19,7 +19,7 @@ IN: xml.autoencoding
  
  : 10xxxxxx? ( ch -- ? )
      -6 shift 3 bitand 2 = ;
-          
+
  : start<name ( ch -- tag )
      ! This is unfortunate, and exists for the corner case
      ! that the first letter of the document is < and second is
@@ -27,7 +27,7 @@ IN: xml.autoencoding
      ascii?
      [ utf8 decode-stream next make-tag ] [
          next
-        [ get-next 10xxxxxx? not ] take-until
+        [ drop get-next 10xxxxxx? not ] take-until
          get-char suffix utf8 decode
          utf8 decode-stream next
          continue-make-tag
diff --git a/basis/xml/char-classes/char-classes.factor b/basis/xml/char-classes/char-classes.factor

index 3deab0a2872189681a76e52d5d4a7bd26474b3be..528198878abd1c93d4c6c18445a6a830badb24cd 100644 (file)
--- a/basis/xml/char-classes/char-classes.factor
+++ b/basis/xml/char-classes/char-classes.factor
@@ -28,16 +28,33 @@ CATEGORY: 1.1name-char
  : name-char? ( 1.0? char -- ? )
      swap [ 1.0name-char? ] [ 1.1name-char? ] if ;
  
-: text? ( 1.0? char -- ? )
+HINTS: name-start? { object fixnum } ;
+HINTS: name-char? { object fixnum } ;
+
+<PRIVATE
+
+: 1.0-text? ( char -- ? )
      ! 1.0:
      ! #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
+    {
+        [ HEX: 20 HEX: D7FF between? ]
+        [ "\t\r\n" member? ]
+        [ HEX: E000 HEX: FFFD between? ]
+        [ HEX: 10000 HEX: 10FFFFF between? ]
+    } 1|| ; inline
+
+: 1.1-text? ( char -- ? )
      ! 1.1:
      ! [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
      {
-        { [ dup HEX: 20 < ] [ swap [ "\t\r\n" member? ] [ zero? not ] if ] }
-        { [ nip dup HEX: D800 < ] [ drop t ] }
-        { [ dup HEX: E000 < ] [ drop f ] }
-        [ { HEX: FFFE HEX: FFFF } member? not ]
-    } cond ;
+        [ HEX: 1 HEX: D7FF between? ]
+        [ HEX: E000 HEX: FFFD between? ]
+        [ HEX: 10000 HEX: 10FFFF between? ]
+    } 1|| ; inline
+
+PRIVATE>
+
+: text? ( 1.0? char -- ? )
+    swap [ 1.0-text? ] [ 1.1-text? ] if ;
  
  HINTS: text? { object fixnum } ;
diff --git a/basis/xml/elements/elements.factor b/basis/xml/elements/elements.factor

index 1e59c199091ce90705359a5640c63c23c7a9ce33..8d7f963625bf7008bc8824aae50cf6d620b44f97 100644 (file)
--- a/basis/xml/elements/elements.factor
+++ b/basis/xml/elements/elements.factor
@@ -9,7 +9,7 @@ IN: xml.elements
  
  : take-interpolated ( quot -- interpolated )
      interpolating? get [
-        drop get-char CHAR: > =
+        drop get-char CHAR: > eq?
          [ next f ]
          [ "->" take-string [ blank? ] trim ]
          if <interpolated>
@@ -20,13 +20,13 @@ IN: xml.elements
  
  : parse-attr ( -- )
      parse-name pass-blank "=" expect pass-blank
-    get-char CHAR: < =
+    get-char CHAR: < eq?
      [ "<-" expect interpolate-quote ]
      [ t parse-quote* ] if 2array , ;
  
  : start-tag ( -- name ? )
      #! Outputs the name and whether this is a closing tag
-    get-char CHAR: / = dup [ next ] when
+    get-char CHAR: / eq? dup [ next ] when
      parse-name swap ;
  
  : (middle-tag) ( -- )
@@ -41,10 +41,10 @@ IN: xml.elements
  : middle-tag ( -- attrs-alist )
      ! f make will make a vector if it has any elements
      [ (middle-tag) ] f make pass-blank
-    assure-no-duplicates ;
+    dup length 1 > [ assure-no-duplicates ] when ;
  
  : end-tag ( name attrs-alist -- tag )
-    tag-ns pass-blank get-char CHAR: / =
+    tag-ns pass-blank get-char CHAR: / eq?
      [ pop-ns <contained> next ">" expect ]
      [ depth inc <opener> close ] if ;
  
@@ -136,7 +136,7 @@ DEFER: make-tag ! Is this unavoidable?
      [ take-external-id ] [ f ] if ;
  
  : take-internal ( -- dtd/f )
-    get-char CHAR: [ =
+    get-char CHAR: [ eq?
      [ next take-internal-subset ] [ f ] if ;
  
  : take-doctype-decl ( -- doctype-decl )
@@ -169,9 +169,9 @@ DEFER: make-tag ! Is this unavoidable?
      [ "-" bad-name ] take-interpolated ;
  
  : make-tag ( -- tag )
-    {
-        { [ get-char dup CHAR: ! = ] [ drop next direct ] }
-        { [ dup CHAR: ? = ] [ drop next instruct ] }
-        { [ dup CHAR: - = ] [ drop next interpolate-tag ] }
+    get-char {
+        { CHAR: ! [ next direct ] }
+        { CHAR: ? [ next instruct ] }
+        { CHAR: - [ next interpolate-tag ] }
          [ drop normal-tag ]
-    } cond ;
+    } case ;
diff --git a/basis/xml/name/name.factor b/basis/xml/name/name.factor

index 1907a83a83f606322a2b552b4838e515f5729a9c..7f796223926e6871499d12bcc688307c0a012307 100644 (file)
--- a/basis/xml/name/name.factor
+++ b/basis/xml/name/name.factor
@@ -66,7 +66,7 @@ SYMBOL: ns-stack
      ] ?if ;
  
  : take-name ( -- string )
-    version-1.0? '[ _ get-char name-char? not ] take-until ;
+    version-1.0? '[ _ swap name-char? not ] take-until ;
  
  : parse-name ( -- name )
      take-name interpret-name ;
@@ -88,7 +88,7 @@ SYMBOL: ns-stack
      } case ;
  
  : take-word ( -- string )
-    [ get-char blank? ] take-until ;
+    [ blank? ] take-until ;
  
  : take-external-id ( -- external-id )
      take-word (take-external-id) ;
diff --git a/basis/xml/state/state.factor b/basis/xml/state/state.factor

index cf103f141b0bacb486e6431f9dc6050e8690c82c..03f8c8f3fb7008362f5a72d11fc4805380ec69ed 100644 (file)
--- a/basis/xml/state/state.factor
+++ b/basis/xml/state/state.factor
@@ -3,33 +3,23 @@
  USING: accessors kernel namespaces io math ;
  IN: xml.state
  
-TUPLE: spot
-    char line column next check version-1.0? stream ;
+TUPLE: spot char line column next check version-1.0? stream ;
  
  C: <spot> spot
  
-: get-char ( -- char ) spot get char>> ;
-: set-char ( char -- ) spot get swap >>char drop ;
-: get-line ( -- line ) spot get line>> ;
-: set-line ( line -- ) spot get swap >>line drop ;
-: get-column ( -- column ) spot get column>> ;
-: set-column ( column -- ) spot get swap >>column drop ;
-: get-next ( -- char ) spot get next>> ;
-: set-next ( char -- ) spot get swap >>next drop ;
-: get-check ( -- ? ) spot get check>> ;
-: check ( -- ) spot get t >>check drop ;
-: version-1.0? ( -- ? ) spot get version-1.0?>> ;
+: get-char ( -- char ) spot get char>> ; inline
+: get-line ( -- line ) spot get line>> ; inline
+: get-column ( -- column ) spot get column>> ; inline
+: get-next ( -- char ) spot get next>> ; inline
+: get-check ( -- ? ) spot get check>> ; inline
+: check ( -- ) spot get t >>check drop ; inline
+: version-1.0? ( -- ? ) spot get version-1.0?>> ; inline
  : set-version ( string -- )
-    spot get swap "1.0" = >>version-1.0? drop ;
+    spot get swap "1.0" = >>version-1.0? drop ; inline
  
  SYMBOL: xml-stack
-
  SYMBOL: depth
-
  SYMBOL: interpolating?
-
  SYMBOL: in-dtd?
-
  SYMBOL: pe-table
-
  SYMBOL: extra-entities
diff --git a/basis/xml/tests/state-parser-tests.factor b/basis/xml/tests/state-parser-tests.factor

index 5e214dc4a3f4b1b7fce99ef9f9d19c4a993bfd29..550378fea095a01bd93f7b59c38d8973c2cc2a7d 100644 (file)
--- a/basis/xml/tests/state-parser-tests.factor
+++ b/basis/xml/tests/state-parser-tests.factor
@@ -5,14 +5,14 @@ IN: xml.test.state
      [ <string-reader> ] dip with-state ; inline
  
  : take-rest ( -- string )
-    [ f ] take-until ;
+    [ drop f ] take-until ;
  
  : take-char ( char -- string )
      1string take-to ;
  
  [ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test
  [ 2 3 ] [ "12\n123" [ take-rest drop get-line get-column ] string-parse ] unit-test
-[ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test
+[ "hi" " how are you?" ] [ "hi how are you?" [ [ blank? ] take-until take-rest ] string-parse ] unit-test
  [ "foo" ";bar" ] [ "foo;bar" [ CHAR: ; take-char take-rest ] string-parse ] unit-test
  [ "foo " " bar" ] [ "foo and bar" [ "and" take-string take-rest ] string-parse ] unit-test
  [ "baz" ] [ " \n\t baz" [ pass-blank take-rest ] string-parse ] unit-test
diff --git a/basis/xml/tokenize/tokenize.factor b/basis/xml/tokenize/tokenize.factor

index 8978c660f40c97d334fda41148f68beccce314b7..f1f8bc83fbc4d29472988bd66cc715c1b902e3d0 100644 (file)
--- a/basis/xml/tokenize/tokenize.factor
+++ b/basis/xml/tokenize/tokenize.factor
@@ -2,7 +2,7 @@
  ! See http://factorcode.org/license.txt for BSD license.
  USING: namespaces xml.state kernel sequences accessors
  xml.char-classes xml.errors math io sbufs fry strings ascii
-circular xml.entities assocs splitting math.parser
+xml.entities assocs splitting math.parser
  locals combinators arrays hints ;
  IN: xml.tokenize
  
@@ -10,19 +10,20 @@ IN: xml.tokenize
  
  : assure-good-char ( spot ch -- )
      [
-        swap
+        over
          [ version-1.0?>> over text? not ]
-        [ check>> ] bi and [
-            spot get [ 1 + ] change-column drop
+        [ check>> ] bi and
+        [
+            [ [ 1 + ] change-column drop ] dip
              disallowed-char
-        ] [ drop ] if
+        ] [ 2drop ] if
      ] [ drop ] if* ;
  
  HINTS: assure-good-char { spot fixnum } ;
  
  : record ( spot char -- spot )
      over char>> [
-        CHAR: \n =
+        CHAR: \n eq?
          [ [ 1 + ] change-line -1 ] [ dup column>> 1 + ] if
          >>column
      ] [ drop ] if ;
@@ -32,9 +33,9 @@ HINTS: record { spot fixnum } ;
  :: (next) ( spot -- spot char )
      spot next>> :> old-next
      spot stream>> stream-read1 :> new-next
-    old-next CHAR: \r = [
+    old-next CHAR: \r eq? [
          spot CHAR: \n >>char
-        new-next CHAR: \n =
+        new-next CHAR: \n eq?
          [ spot stream>> stream-read1 >>next ]
          [ new-next >>next ] if
      ] [ spot old-next >>char new-next >>next ] if
@@ -52,46 +53,46 @@ HINTS: next* { spot } ;
  : init-parser ( -- )
      0 1 0 0 f t f <spot>
          input-stream get >>stream
-    spot set
-    read1 set-next next ;
+        read1 >>next
+    spot set next ;
  
  : with-state ( stream quot -- )
      ! with-input-stream implicitly creates a new scope which we use
      swap [ init-parser call ] with-input-stream ; inline
  
-:: (skip-until) ( ... quot: ( ... -- ... ? ) spot -- ... )
+:: (skip-until) ( ... quot: ( ... char -- ... ? ) spot -- ... )
      spot char>> [
          quot call [
              spot next* quot spot (skip-until)
          ] unless
-    ] when ; inline recursive
+    ] when* ; inline recursive
  
-: skip-until ( ... quot: ( ... -- ... ? ) -- ... )
+: skip-until ( ... quot: ( ... char -- ... ? ) -- ... )
      spot get (skip-until) ; inline
  
-: take-until ( quot -- string )
+: take-until ( ... quot: ( ... char -- ... ? ) -- ... string )
      #! Take the substring of a string starting at spot
      #! from code until the quotation given is true and
      #! advance spot to after the substring.
-    10 <sbuf> [
-        spot get swap
-        '[ @ [ t ] [ _ char>> _ push f ] if ] skip-until
-    ] keep >string ; inline
+   10 <sbuf> [
+       '[ _ keep over [ drop ] [ _ push ] if ] skip-until
+   ] keep >string ; inline
  
  : take-to ( seq -- string )
-    spot get swap '[ _ char>> _ member? ] take-until ;
+    '[ _ member? ] take-until ;
  
  : pass-blank ( -- )
      #! Advance code past any whitespace, including newlines
-    spot get '[ _ char>> blank? not ] skip-until ;
+    [ blank? not ] skip-until ;
  
-: string-matches? ( string circular spot -- ? )
-    char>> over circular-push sequence= ;
+: string-matcher ( str -- quot: ( pos char -- pos ? ) )
+    dup length 1 - '[
+        over _ nth eq? [ 1 + ] [ drop 0 ] if dup _ >
+    ] ; inline
  
  : take-string ( match -- string )
-    dup length <circular-string>
-    spot get '[ 2dup _ string-matches? ] take-until nip
-    dup length rot length 1 - - head
+    [ 0 swap string-matcher take-until nip ] keep
+    dupd [ length ] bi@ 1 - - head
      get-char [ missing-close ] unless next ;
  
  : expect ( string -- )
@@ -123,11 +124,11 @@ HINTS: next* { spot } ;
      {
          { [ char not ] [ ] }
          { [ char quot call ] [ spot next* ] }
-        { [ char CHAR: & = ] [
+        { [ char CHAR: & eq? ] [
              accum parse-entity
              quot accum spot (parse-char)
          ] }
-        { [ in-dtd? get char CHAR: % = and ] [
+        { [ char CHAR: % eq? in-dtd? get and ] [
              accum parse-pe
              quot accum spot (parse-char)
          ] }
@@ -141,18 +142,21 @@ HINTS: next* { spot } ;
  : parse-char ( quot: ( ch -- ? ) -- seq )
      1024 <sbuf> [ spot get (parse-char) ] keep >string ; inline
  
-: assure-no-]]> ( circular -- )
-    "]]>" sequence= [ text-w/]]> ] when ;
+: assure-no-]]> ( pos char -- pos' )
+    over "]]>" nth eq? [ 1 + ] [ drop 0 ] if
+    dup 2 > [ text-w/]]> ] when ;
  
  :: parse-text ( -- string )
-    3 f <array> <circular> :> circ
-    depth get zero? :> no-text [| char |
-        char circ circular-push
-        circ assure-no-]]>
-        no-text [ char blank? char CHAR: < = or [
-            char 1string t pre/post-content
-        ] unless ] when
-        char CHAR: < =
+    0 :> pos!
+    depth get zero? :> no-text
+    [| char |
+        pos char assure-no-]]> pos!
+        no-text [
+            char blank? char CHAR: < eq? or [
+                char 1string t pre/post-content
+            ] unless
+        ] when
+        char CHAR: < eq?
      ] parse-char ;
  
  : close ( -- )
@@ -163,8 +167,8 @@ HINTS: next* { spot } ;
  
  : (parse-quote) ( <-disallowed? ch -- string )
      swap '[
-        dup _ = [ drop t ]
-        [ CHAR: < = _ and [ attr-w/< ] [ f ] if ] if
+        dup _ eq? [ drop t ]
+        [ CHAR: < eq? _ and [ attr-w/< ] [ f ] if ] if
      ] parse-char normalize-quote get-char
      [ unclosed-quote ] unless ; inline
  
diff --git a/basis/xml/xml.factor b/basis/xml/xml.factor

index ac6fbfcddcf8d23fedb116a4b6bd2f0d8b255f81..7667ff3b21484a5fb495f86335021d3db536e258 100644 (file)
--- a/basis/xml/xml.factor
+++ b/basis/xml/xml.factor
@@ -1,7 +1,7 @@
  ! Copyright (C) 2005, 2009 Daniel Ehrenberg
  ! See http://factorcode.org/license.txt for BSD license.
  USING: accessors arrays io io.encodings.binary io.files
-io.streams.string kernel namespaces sequences strings
+io.streams.string kernel math namespaces sequences strings
  io.encodings.utf8 xml.data xml.errors xml.elements ascii
  xml.entities xml.state xml.autoencoding assocs xml.tokenize
  combinators.short-circuit xml.name splitting
@@ -147,8 +147,8 @@ PRIVATE>
      swap [ call ] keep ; inline
  
  : xml-loop ( quot: ( xml-elem -- ) -- )
-    parse-text call-under
-    get-char [ make-tag call-under xml-loop ]
+    parse-text call-under get-char
+    [ make-tag call-under xml-loop ]
      [ drop ] if ; inline recursive
  
  : read-seq ( stream quot n -- seq )
author	John Benediktsson <mrjbq7@gmail.com>
	Fri, 30 Sep 2011 19:47:38 +0000 (12:47 -0700)
committer	John Benediktsson <mrjbq7@gmail.com>
	Fri, 30 Sep 2011 19:47:38 +0000 (12:47 -0700)
basis/xml/autoencoding/autoencoding.factor		patch \| blob \| history
basis/xml/char-classes/char-classes.factor		patch \| blob \| history
basis/xml/elements/elements.factor		patch \| blob \| history
basis/xml/name/name.factor		patch \| blob \| history
basis/xml/state/state.factor		patch \| blob \| history
basis/xml/tests/state-parser-tests.factor		patch \| blob \| history
basis/xml/tokenize/tokenize.factor		patch \| blob \| history
basis/xml/xml.factor		patch \| blob \| history