! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
-USING: html.parser.state io io.encodings.utf8 io.files
+USING: sequence-parser io io.encodings.utf8 io.files
io.streams.string kernel combinators accessors io.pathnames
fry sequences arrays locals namespaces io.directories
assocs math splitting make unicode.categories
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays hashtables html.parser.state
+USING: accessors arrays hashtables sequence-parser
html.parser.utils kernel namespaces sequences
unicode.case unicode.categories combinators.short-circuit
quoting fry ;
+++ /dev/null
-USING: tools.test html.parser.state ascii kernel accessors ;
-IN: html.parser.state.tests
-
-[ "hello" ]
-[ "hello" [ take-rest ] state-parse ] unit-test
-
-[ "hi" " how are you?" ]
-[
- "hi how are you?"
- [ [ [ current blank? ] take-until ] [ take-rest ] bi ] state-parse
-] unit-test
-
-[ "foo" ";bar" ]
-[
- "foo;bar" [
- [ CHAR: ; take-until-object ] [ take-rest ] bi
- ] state-parse
-] unit-test
-
-[ "foo " " bar" ]
-[
- "foo and bar" [
- [ "and" take-until-sequence ] [ take-rest ] bi
- ] state-parse
-] unit-test
-
-[ 6 ]
-[
- " foo " [ skip-whitespace n>> ] state-parse
-] unit-test
-
-[ { 1 2 } ]
-[ { 1 2 3 } <state-parser> [ current 3 = ] take-until ] unit-test
-
-[ { 1 2 } ]
-[ { 1 2 3 4 } <state-parser> { 3 4 } take-until-sequence ] unit-test
-
-[ "ab" ]
-[ "abcd" <state-parser> "ab" take-sequence ] unit-test
-
-[ f ]
-[ "abcd" <state-parser> "lol" take-sequence ] unit-test
-
-[ "ab" ]
-[
- "abcd" <state-parser>
- [ "lol" take-sequence drop ] [ "ab" take-sequence ] bi
-] unit-test
-
-[ "" ]
-[ "abcd" <state-parser> "" take-sequence ] unit-test
-
-[ "cd" ]
-[ "abcd" <state-parser> [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test
-
-[ f ]
-[
- "\"abc\" asdf" <state-parser>
- [ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
-] unit-test
-
-[ "abc\\\"def" ]
-[
- "\"abc\\\"def\" asdf" <state-parser>
- CHAR: \ CHAR: " take-quoted-string
-] unit-test
-
-[ "asdf" ]
-[
- "\"abc\" asdf" <state-parser>
- [ CHAR: \ CHAR: " take-quoted-string drop ]
- [ skip-whitespace "asdf" take-sequence ] bi
-] unit-test
-
-[ f ]
-[
- "\"abc asdf" <state-parser>
- CHAR: \ CHAR: " take-quoted-string
-] unit-test
-
-[ "\"abc" ]
-[
- "\"abc asdf" <state-parser>
- [ CHAR: \ CHAR: " take-quoted-string drop ]
- [ "\"abc" take-sequence ] bi
-] unit-test
-
-[ "c" ]
-[ "c" <state-parser> take-token ] unit-test
-
-[ f ]
-[ "" <state-parser> take-token ] unit-test
-
-[ "abcd e \\\"f g" ]
-[ "\"abcd e \\\"f g\"" <state-parser> CHAR: \ CHAR: " take-token* ] unit-test
-
-[ "" ]
-[ "" <state-parser> take-rest ] unit-test
-
-[ "" ]
-[ "abc" <state-parser> dup "abc" take-sequence drop take-rest ] unit-test
-
-[ f ]
-[ "abc" <state-parser> "abcdefg" take-sequence ] unit-test
+++ /dev/null
-! Copyright (C) 2005, 2009 Daniel Ehrenberg
-! See http://factorcode.org/license.txt for BSD license.
-USING: namespaces math kernel sequences accessors fry circular
-unicode.case unicode.categories locals combinators.short-circuit
-make combinators io splitting ;
-
-IN: html.parser.state
-
-TUPLE: state-parser sequence n ;
-
-: <state-parser> ( sequence -- state-parser )
- state-parser new
- swap >>sequence
- 0 >>n ;
-
-: offset ( state-parser offset -- char/f )
- swap
- [ n>> + ] [ sequence>> ?nth ] bi ; inline
-
-: current ( state-parser -- char/f ) 0 offset ; inline
-
-: previous ( state-parser -- char/f ) -1 offset ; inline
-
-: peek-next ( state-parser -- char/f ) 1 offset ; inline
-
-: advance ( state-parser -- state-parser )
- [ 1 + ] change-n ; inline
-
-: advance* ( state-parser -- )
- advance drop ; inline
-
-: get+increment ( state-parser -- char/f )
- [ current ] [ advance drop ] bi ; inline
-
-:: skip-until ( state-parser quot: ( obj -- ? ) -- )
- state-parser current [
- state-parser quot call [ state-parser advance quot skip-until ] unless
- ] when ; inline recursive
-
-: state-parse-end? ( state-parser -- ? ) current not ;
-
-: take-until ( state-parser quot: ( obj -- ? ) -- sequence/f )
- over state-parse-end? [
- 2drop f
- ] [
- [ drop n>> ]
- [ skip-until ]
- [ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq
- ] if ; inline
-
-: take-while ( state-parser quot: ( obj -- ? ) -- sequence/f )
- [ not ] compose take-until ; inline
-
-: <safe-slice> ( from to seq -- slice/f )
- 3dup {
- [ 2drop 0 < ]
- [ [ drop ] 2dip length > ]
- [ drop > ]
- } 3|| [ 3drop f ] [ slice boa ] if ; inline
-
-:: take-sequence ( state-parser sequence -- obj/f )
- state-parser [ n>> dup sequence length + ] [ sequence>> ] bi
- <safe-slice> sequence sequence= [
- sequence
- state-parser [ sequence length + ] change-n drop
- ] [
- f
- ] if ;
-
-:: take-until-sequence ( state-parser sequence -- sequence' )
- sequence length <growing-circular> :> growing
- state-parser
- [
- current growing push-growing-circular
- sequence growing sequence=
- ] take-until :> found
- found dup length
- growing length 1- - head
- state-parser advance drop ;
-
-: skip-whitespace ( state-parser -- state-parser )
- [ [ current blank? not ] take-until drop ] keep ;
-
-: take-rest-slice ( state-parser -- sequence/f )
- [ sequence>> ] [ n>> ] bi
- 2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline
-
-: take-rest ( state-parser -- sequence )
- [ take-rest-slice ] [ sequence>> like ] bi ;
-
-: take-until-object ( state-parser obj -- sequence )
- '[ current _ = ] take-until ;
-
-: state-parse ( sequence quot -- )
- [ <state-parser> ] dip call ; inline
-
-:: take-quoted-string ( state-parser escape-char quote-char -- string )
- state-parser n>> :> start-n
- state-parser advance
- [
- {
- [ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
- [ current quote-char = not ]
- } 1||
- ] take-while :> string
- state-parser current quote-char = [
- state-parser advance* string
- ] [
- start-n state-parser (>>n) f
- ] if ;
-
-: (take-token) ( state-parser -- string )
- skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
-
-:: take-token* ( state-parser escape-char quote-char -- string/f )
- state-parser skip-whitespace
- dup current {
- { quote-char [ escape-char quote-char take-quoted-string ] }
- { f [ drop f ] }
- [ drop (take-token) ]
- } case ;
-
-: take-token ( state-parser -- string/f )
- CHAR: \ CHAR: " take-token* ;
-
-: write-full ( state-parser -- ) sequence>> write ;
-: write-rest ( state-parser -- ) take-rest write ;
! See http://factorcode.org/license.txt for BSD license.
USING: assocs circular combinators continuations hashtables
hashtables.private io kernel math namespaces prettyprint
-quotations sequences splitting html.parser.state strings
-combinators.short-circuit quoting ;
+quotations sequences splitting strings quoting
+combinators.short-circuit ;
IN: html.parser.utils
: trim1 ( seq ch -- newseq )
--- /dev/null
+USING: tools.test sequence-parser ascii kernel accessors ;
+IN: sequence-parser.tests
+
+[ "hello" ]
+[ "hello" [ take-rest ] parse-sequence ] unit-test
+
+[ "hi" " how are you?" ]
+[
+ "hi how are you?"
+ [ [ [ current blank? ] take-until ] [ take-rest ] bi ] parse-sequence
+] unit-test
+
+[ "foo" ";bar" ]
+[
+ "foo;bar" [
+ [ CHAR: ; take-until-object ] [ take-rest ] bi
+ ] parse-sequence
+] unit-test
+
+[ "foo " " bar" ]
+[
+ "foo and bar" [
+ [ "and" take-until-sequence ] [ take-rest ] bi
+ ] parse-sequence
+] unit-test
+
+[ 6 ]
+[
+ " foo " [ skip-whitespace n>> ] parse-sequence
+] unit-test
+
+[ { 1 2 } ]
+[ { 1 2 3 } <sequence-parser> [ current 3 = ] take-until ] unit-test
+
+[ { 1 2 } ]
+[ { 1 2 3 4 } <sequence-parser> { 3 4 } take-until-sequence ] unit-test
+
+[ "ab" ]
+[ "abcd" <sequence-parser> "ab" take-sequence ] unit-test
+
+[ f ]
+[ "abcd" <sequence-parser> "lol" take-sequence ] unit-test
+
+[ "ab" ]
+[
+ "abcd" <sequence-parser>
+ [ "lol" take-sequence drop ] [ "ab" take-sequence ] bi
+] unit-test
+
+[ "" ]
+[ "abcd" <sequence-parser> "" take-sequence ] unit-test
+
+[ "cd" ]
+[ "abcd" <sequence-parser> [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test
+
+[ f ]
+[
+ "\"abc\" asdf" <sequence-parser>
+ [ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
+] unit-test
+
+[ "abc\\\"def" ]
+[
+ "\"abc\\\"def\" asdf" <sequence-parser>
+ CHAR: \ CHAR: " take-quoted-string
+] unit-test
+
+[ "asdf" ]
+[
+ "\"abc\" asdf" <sequence-parser>
+ [ CHAR: \ CHAR: " take-quoted-string drop ]
+ [ skip-whitespace "asdf" take-sequence ] bi
+] unit-test
+
+[ f ]
+[
+ "\"abc asdf" <sequence-parser>
+ CHAR: \ CHAR: " take-quoted-string
+] unit-test
+
+[ "\"abc" ]
+[
+ "\"abc asdf" <sequence-parser>
+ [ CHAR: \ CHAR: " take-quoted-string drop ]
+ [ "\"abc" take-sequence ] bi
+] unit-test
+
+[ "c" ]
+[ "c" <sequence-parser> take-token ] unit-test
+
+[ f ]
+[ "" <sequence-parser> take-token ] unit-test
+
+[ "abcd e \\\"f g" ]
+[ "\"abcd e \\\"f g\"" <sequence-parser> CHAR: \ CHAR: " take-token* ] unit-test
+
+[ "" ]
+[ "" <sequence-parser> take-rest ] unit-test
+
+[ "" ]
+[ "abc" <sequence-parser> dup "abc" take-sequence drop take-rest ] unit-test
+
+[ f ]
+[ "abc" <sequence-parser> "abcdefg" take-sequence ] unit-test
--- /dev/null
+! Copyright (C) 2005, 2009 Daniel Ehrenberg, Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: namespaces math kernel sequences accessors fry circular
+unicode.case unicode.categories locals combinators.short-circuit
+make combinators io splitting ;
+IN: sequence-parser
+
+TUPLE: sequence-parser sequence n ;
+
+: <sequence-parser> ( sequence -- sequence-parser )
+ sequence-parser new
+ swap >>sequence
+ 0 >>n ;
+
+: offset ( sequence-parser offset -- char/f )
+ swap
+ [ n>> + ] [ sequence>> ?nth ] bi ; inline
+
+: current ( sequence-parser -- char/f ) 0 offset ; inline
+
+: previous ( sequence-parser -- char/f ) -1 offset ; inline
+
+: peek-next ( sequence-parser -- char/f ) 1 offset ; inline
+
+: advance ( sequence-parser -- sequence-parser )
+ [ 1 + ] change-n ; inline
+
+: advance* ( sequence-parser -- )
+ advance drop ; inline
+
+: get+increment ( sequence-parser -- char/f )
+ [ current ] [ advance drop ] bi ; inline
+
+:: skip-until ( sequence-parser quot: ( obj -- ? ) -- )
+ sequence-parser current [
+ sequence-parser quot call [ sequence-parser advance quot skip-until ] unless
+ ] when ; inline recursive
+
+: sequence-parse-end? ( sequence-parser -- ? ) current not ;
+
+: take-until ( sequence-parser quot: ( obj -- ? ) -- sequence/f )
+ over sequence-parse-end? [
+ 2drop f
+ ] [
+ [ drop n>> ]
+ [ skip-until ]
+ [ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq
+ ] if ; inline
+
+: take-while ( sequence-parser quot: ( obj -- ? ) -- sequence/f )
+ [ not ] compose take-until ; inline
+
+: <safe-slice> ( from to seq -- slice/f )
+ 3dup {
+ [ 2drop 0 < ]
+ [ [ drop ] 2dip length > ]
+ [ drop > ]
+ } 3|| [ 3drop f ] [ slice boa ] if ; inline
+
+:: take-sequence ( sequence-parser sequence -- obj/f )
+ sequence-parser [ n>> dup sequence length + ] [ sequence>> ] bi
+ <safe-slice> sequence sequence= [
+ sequence
+ sequence-parser [ sequence length + ] change-n drop
+ ] [
+ f
+ ] if ;
+
+:: take-until-sequence ( sequence-parser sequence -- sequence' )
+ sequence length <growing-circular> :> growing
+ sequence-parser
+ [
+ current growing push-growing-circular
+ sequence growing sequence=
+ ] take-until :> found
+ found dup length
+ growing length 1- - head
+ sequence-parser advance drop ;
+
+: skip-whitespace ( sequence-parser -- sequence-parser )
+ [ [ current blank? not ] take-until drop ] keep ;
+
+: take-rest-slice ( sequence-parser -- sequence/f )
+ [ sequence>> ] [ n>> ] bi
+ 2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline
+
+: take-rest ( sequence-parser -- sequence )
+ [ take-rest-slice ] [ sequence>> like ] bi ;
+
+: take-until-object ( sequence-parser obj -- sequence )
+ '[ current _ = ] take-until ;
+
+: parse-sequence ( sequence quot -- )
+ [ <sequence-parser> ] dip call ; inline
+
+:: take-quoted-string ( sequence-parser escape-char quote-char -- string )
+ sequence-parser n>> :> start-n
+ sequence-parser advance
+ [
+ {
+ [ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
+ [ current quote-char = not ]
+ } 1||
+ ] take-while :> string
+ sequence-parser current quote-char = [
+ sequence-parser advance* string
+ ] [
+ start-n sequence-parser (>>n) f
+ ] if ;
+
+: (take-token) ( sequence-parser -- string )
+ skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
+
+:: take-token* ( sequence-parser escape-char quote-char -- string/f )
+ sequence-parser skip-whitespace
+ dup current {
+ { quote-char [ escape-char quote-char take-quoted-string ] }
+ { f [ drop f ] }
+ [ drop (take-token) ]
+ } case ;
+
+: take-token ( sequence-parser -- string/f )
+ CHAR: \ CHAR: " take-token* ;
+
+: write-full ( sequence-parser -- ) sequence>> write ;
+: write-rest ( sequence-parser -- ) take-rest write ;