make tokenize-line configurable, fix bug in take-quoted-string

author Doug Coleman <erg@jobim.local>

Wed, 1 Apr 2009 20:51:39 +0000 (15:51 -0500)

committer Doug Coleman <erg@jobim.local>

Wed, 1 Apr 2009 20:51:39 +0000 (15:51 -0500)
author Doug Coleman <erg@jobim.local>
Wed, 1 Apr 2009 20:51:39 +0000 (15:51 -0500)
committer Doug Coleman <erg@jobim.local>
Wed, 1 Apr 2009 20:51:39 +0000 (15:51 -0500)
diff --git a/extra/html/parser/state/state-tests.factor b/extra/html/parser/state/state-tests.factor

index b7a929284be5cd4dbab39fcbc749567c8bf3d5ed..e655dbb6997d07c77062d2346f0bdc914700cf7d 100644 (file)
--- a/extra/html/parser/state/state-tests.factor
+++ b/extra/html/parser/state/state-tests.factor
@@ -53,13 +53,18 @@ IN: html.parser.state.tests
  [ "cd" ]
  [ "abcd" <state-parser> [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test
  
-
  [ f ]
  [
      "\"abc\" asdf" <state-parser>
      [ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
  ] unit-test
  
+[ "abc\\\"def" ]
+[
+    "\"abc\\\"def\" asdf" <state-parser>
+    CHAR: \ CHAR: " take-quoted-string
+] unit-test
+
  [ "asdf" ]
  [
      "\"abc\" asdf" <state-parser>
@@ -82,3 +87,6 @@ IN: html.parser.state.tests
  
  [ "c" ]
  [ "c" <state-parser> take-token ] unit-test
+
+[ { "a" "b" "c" "abcd e \\\"f g" } ]
+[ "a b c  \"abcd e \\\"f g\"" CHAR: \ CHAR: " tokenize-line ] unit-test
diff --git a/extra/html/parser/state/state.factor b/extra/html/parser/state/state.factor

index 1b83089c98b2fcbf8cf5d4711b5023f61a2fe52c..6cca9f72a977d254ce36b850c30ba23f814d7a22 100644 (file)
--- a/extra/html/parser/state/state.factor
+++ b/extra/html/parser/state/state.factor
@@ -1,7 +1,8 @@
  ! Copyright (C) 2005, 2009 Daniel Ehrenberg
  ! See http://factorcode.org/license.txt for BSD license.
  USING: namespaces math kernel sequences accessors fry circular
-unicode.case unicode.categories locals combinators.short-circuit ;
+unicode.case unicode.categories locals combinators.short-circuit
+make combinators ;
  
  IN: html.parser.state
  
@@ -87,7 +88,7 @@ TUPLE: state-parser sequence n ;
      state-parser advance
      [
          {
-            [ { [ previous quote-char = ] [ current quote-char = ] } 1&& ]
+            [ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
              [ current quote-char = not ]
          } 1||
      ] take-while :> string
@@ -99,3 +100,17 @@ TUPLE: state-parser sequence n ;
  
  : take-token ( state-parser -- string )
      skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
+
+:: (tokenize-line) ( state-parser escape-char quote-char -- )
+    state-parser skip-whitespace
+    dup current {
+        { quote-char [
+            [ escape-char quote-char take-quoted-string , ]
+            [ escape-char quote-char (tokenize-line) ] bi
+        ] }
+        { f [ drop ] }
+        [ drop [ take-token , ] [ escape-char quote-char (tokenize-line) ] bi ]
+    } case ;
+
+: tokenize-line ( line escape-char quote-char -- seq )
+    [ <state-parser> ] 2dip [ (tokenize-line) ] { } make ;
author	Doug Coleman <erg@jobim.local>
	Wed, 1 Apr 2009 20:51:39 +0000 (15:51 -0500)
committer	Doug Coleman <erg@jobim.local>
	Wed, 1 Apr 2009 20:51:39 +0000 (15:51 -0500)
extra/html/parser/state/state-tests.factor		patch \| blob \| history
extra/html/parser/state/state.factor		patch \| blob \| history