--- /dev/null
+Doug Coleman
\ No newline at end of file
--- /dev/null
+! Copyright (C) 2009 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors c.lexer kernel sequence-parser tools.test ;
+IN: c.lexer.tests
+
+[ 36 ]
+[
+ " //jofiejoe\n //eoieow\n/*asdf*/\n "
+ <sequence-parser> skip-whitespace/comments n>>
+] unit-test
+
+[ f "33asdf" ]
+[ "33asdf" <sequence-parser> [ take-c-identifier ] [ take-rest ] bi ] unit-test
+
+[ "asdf" ]
+[ "asdf" <sequence-parser> take-c-identifier ] unit-test
+
+[ "_asdf" ]
+[ "_asdf" <sequence-parser> take-c-identifier ] unit-test
+
+[ "_asdf400" ]
+[ "_asdf400" <sequence-parser> take-c-identifier ] unit-test
+
+[ "asdfasdf" ] [
+ "/*asdfasdf*/" <sequence-parser> take-c-comment
+] unit-test
+
+[ "k" ] [
+ "/*asdfasdf*/k" <sequence-parser> [ take-c-comment drop ] [ take-rest ] bi
+] unit-test
+
+[ "omg" ] [
+ "//asdfasdf\nomg" <sequence-parser>
+ [ take-c++-comment drop ] [ take-rest ] bi
+] unit-test
+
+[ "omg" ] [
+ "omg" <sequence-parser>
+ [ take-c++-comment drop ] [ take-rest ] bi
+] unit-test
+
+[ "/*asdfasdf" ] [
+ "/*asdfasdf" <sequence-parser> [ take-c-comment drop ] [ take-rest ] bi
+] unit-test
+
+[ "asdf" "eoieoei" ] [
+ "//asdf\neoieoei" <sequence-parser>
+ [ take-c++-comment ] [ take-rest ] bi
+] unit-test
+
+[ f ]
+[
+ "\"abc\" asdf" <sequence-parser>
+ [ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
+] unit-test
+
+[ "abc\\\"def" ]
+[
+ "\"abc\\\"def\" asdf" <sequence-parser>
+ CHAR: \ CHAR: " take-quoted-string
+] unit-test
+
+[ "asdf" ]
+[
+ "\"abc\" asdf" <sequence-parser>
+ [ CHAR: \ CHAR: " take-quoted-string drop ]
+ [ skip-whitespace "asdf" take-sequence ] bi
+] unit-test
+
+[ f ]
+[
+ "\"abc asdf" <sequence-parser>
+ CHAR: \ CHAR: " take-quoted-string
+] unit-test
+
+[ "\"abc" ]
+[
+ "\"abc asdf" <sequence-parser>
+ [ CHAR: \ CHAR: " take-quoted-string drop ]
+ [ "\"abc" take-sequence ] bi
+] unit-test
+
+[ "c" ]
+[ "c" <sequence-parser> take-token ] unit-test
+
+[ f ]
+[ "" <sequence-parser> take-token ] unit-test
+
+[ "abcd e \\\"f g" ]
+[ "\"abcd e \\\"f g\"" <sequence-parser> CHAR: \ CHAR: " take-token* ] unit-test
+
+[ "123" ]
+[ "123jjj" <sequence-parser> take-c-integer ] unit-test
+
+[ "123uLL" ]
+[ "123uLL" <sequence-parser> take-c-integer ] unit-test
+
+[ "123ull" ]
+[ "123ull" <sequence-parser> take-c-integer ] unit-test
+
+[ "123u" ]
+[ "123u" <sequence-parser> take-c-integer ] unit-test
+
--- /dev/null
+! Copyright (C) 2009 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors combinators combinators.short-circuit
+generalizations kernel locals math.order math.ranges
+sequence-parser sequences sorting.functor sorting.slots
+unicode.categories ;
+IN: c.lexer
+
+: take-c-comment ( sequence-parser -- seq/f )
+ [
+ dup "/*" take-sequence [
+ "*/" take-until-sequence*
+ ] [
+ drop f
+ ] if
+ ] with-sequence-parser ;
+
+: take-c++-comment ( sequence-parser -- seq/f )
+ [
+ dup "//" take-sequence [
+ [
+ [
+ { [ current CHAR: \n = ] [ sequence-parse-end? ] } 1||
+ ] take-until
+ ] [
+ advance drop
+ ] bi
+ ] [
+ drop f
+ ] if
+ ] with-sequence-parser ;
+
+: skip-whitespace/comments ( sequence-parser -- sequence-parser )
+ skip-whitespace-eol
+ {
+ { [ dup take-c-comment ] [ skip-whitespace/comments ] }
+ { [ dup take-c++-comment ] [ skip-whitespace/comments ] }
+ [ ]
+ } cond ;
+
+: take-define-identifier ( sequence-parser -- string )
+ skip-whitespace/comments
+ [ current { [ blank? ] [ CHAR: ( = ] } 1|| ] take-until ;
+
+:: take-quoted-string ( sequence-parser escape-char quote-char -- string )
+ sequence-parser n>> :> start-n
+ sequence-parser advance
+ [
+ {
+ [ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
+ [ current quote-char = not ]
+ } 1||
+ ] take-while :> string
+ sequence-parser current quote-char = [
+ sequence-parser advance* string
+ ] [
+ start-n sequence-parser (>>n) f
+ ] if ;
+
+: (take-token) ( sequence-parser -- string )
+ skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
+
+:: take-token* ( sequence-parser escape-char quote-char -- string/f )
+ sequence-parser skip-whitespace
+ dup current {
+ { quote-char [ escape-char quote-char take-quoted-string ] }
+ { f [ drop f ] }
+ [ drop (take-token) ]
+ } case ;
+
+: take-token ( sequence-parser -- string/f )
+ CHAR: \ CHAR: " take-token* ;
+
+: c-identifier-begin? ( ch -- ? )
+ CHAR: a CHAR: z [a,b]
+ CHAR: A CHAR: Z [a,b]
+ { CHAR: _ } 3append member? ;
+
+: c-identifier-ch? ( ch -- ? )
+ CHAR: a CHAR: z [a,b]
+ CHAR: A CHAR: Z [a,b]
+ CHAR: 0 CHAR: 9 [a,b]
+ { CHAR: _ } 4 nappend member? ;
+
+: (take-c-identifier) ( sequence-parser -- string/f )
+ dup current c-identifier-begin? [
+ [ current c-identifier-ch? ] take-while
+ ] [
+ drop f
+ ] if ;
+
+: take-c-identifier ( sequence-parser -- string/f )
+ [ (take-c-identifier) ] with-sequence-parser ;
+
+<< "length" [ length ] define-sorting >>
+
+: sort-tokens ( seq -- seq' )
+ { length>=< <=> } sort-by ;
+
+: take-c-integer ( sequence-parser -- string/f )
+ [
+ dup take-integer [
+ swap
+ { "ull" "uLL" "Ull" "ULL" "ll" "LL" "l" "L" "u" "U" }
+ take-longest [ append ] when*
+ ] [
+ drop f
+ ] if*
+ ] with-sequence-parser ;
+
+CONSTANT: c-punctuators
+ {
+ "[" "]" "(" ")" "{" "}" "." "->"
+ "++" "--" "&" "*" "+" "-" "~" "!"
+ "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||"
+ "?" ":" ";" "..."
+ "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
+ "," "#" "##"
+ "<:" ":>" "<%" "%>" "%:" "%:%:"
+ }
+
+: take-c-punctuator ( sequence-parser -- string/f )
+ c-punctuators take-longest ;
io.streams.string kernel combinators accessors io.pathnames
fry sequences arrays locals namespaces io.directories
assocs math splitting make unicode.categories
-combinators.short-circuit ;
+combinators.short-circuit c.lexer ;
IN: c.preprocessor
: initial-library-paths ( -- seq )
[ "cd" ]
[ "abcd" <sequence-parser> [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test
-[ f ]
-[
- "\"abc\" asdf" <sequence-parser>
- [ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
-] unit-test
-
-[ "abc\\\"def" ]
-[
- "\"abc\\\"def\" asdf" <sequence-parser>
- CHAR: \ CHAR: " take-quoted-string
-] unit-test
-
-[ "asdf" ]
-[
- "\"abc\" asdf" <sequence-parser>
- [ CHAR: \ CHAR: " take-quoted-string drop ]
- [ skip-whitespace "asdf" take-sequence ] bi
-] unit-test
-
-[ f ]
-[
- "\"abc asdf" <sequence-parser>
- CHAR: \ CHAR: " take-quoted-string
-] unit-test
-
-[ "\"abc" ]
-[
- "\"abc asdf" <sequence-parser>
- [ CHAR: \ CHAR: " take-quoted-string drop ]
- [ "\"abc" take-sequence ] bi
-] unit-test
-
-[ "c" ]
-[ "c" <sequence-parser> take-token ] unit-test
-
-[ f ]
-[ "" <sequence-parser> take-token ] unit-test
-
-[ "abcd e \\\"f g" ]
-[ "\"abcd e \\\"f g\"" <sequence-parser> CHAR: \ CHAR: " take-token* ] unit-test
-
[ f ]
[ "" <sequence-parser> take-rest ] unit-test
[ "abcd" ] [ "abcd" <sequence-parser> 4 take-n ] unit-test
[ "abcd" "efg" ] [ "abcdefg" <sequence-parser> [ 4 take-n ] [ take-rest ] bi ] unit-test
-[ "asdfasdf" ] [
- "/*asdfasdf*/" <sequence-parser> take-c-comment
-] unit-test
-
-[ "k" ] [
- "/*asdfasdf*/k" <sequence-parser> [ take-c-comment drop ] [ take-rest ] bi
-] unit-test
-
-[ "omg" ] [
- "//asdfasdf\nomg" <sequence-parser>
- [ take-c++-comment drop ] [ take-rest ] bi
-] unit-test
-
-[ "omg" ] [
- "omg" <sequence-parser>
- [ take-c++-comment drop ] [ take-rest ] bi
-] unit-test
-
-[ "/*asdfasdf" ] [
- "/*asdfasdf" <sequence-parser> [ take-c-comment drop ] [ take-rest ] bi
-] unit-test
-
-[ "asdf" "eoieoei" ] [
- "//asdf\neoieoei" <sequence-parser>
- [ take-c++-comment ] [ take-rest ] bi
-] unit-test
-
-[ f "33asdf" ]
-[ "33asdf" <sequence-parser> [ take-c-identifier ] [ take-rest ] bi ] unit-test
-
-[ "asdf" ]
-[ "asdf" <sequence-parser> take-c-identifier ] unit-test
-
-[ "_asdf" ]
-[ "_asdf" <sequence-parser> take-c-identifier ] unit-test
-
-[ "_asdf400" ]
-[ "_asdf400" <sequence-parser> take-c-identifier ] unit-test
-
-[ "123" ]
-[ "123jjj" <sequence-parser> take-c-integer ] unit-test
-
-[ "123uLL" ]
-[ "123uLL" <sequence-parser> take-c-integer ] unit-test
-
-[ "123ull" ]
-[ "123ull" <sequence-parser> take-c-integer ] unit-test
-
-[ "123u" ]
-[ "123u" <sequence-parser> take-c-integer ] unit-test
-
-[ 36 ]
-[
- " //jofiejoe\n //eoieow\n/*asdf*/\n "
- <sequence-parser> skip-whitespace/comments n>>
-] unit-test
-
[ f ]
[ "\n" <sequence-parser> take-integer ] unit-test
! Copyright (C) 2005, 2009 Daniel Ehrenberg, Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
-USING: namespaces math kernel sequences accessors fry circular
-unicode.case unicode.categories locals combinators.short-circuit
-make combinators io splitting math.parser math.ranges
-generalizations sorting.functor math.order sorting.slots ;
+USING: accessors circular combinators.short-circuit fry io
+kernel locals math math.order sequences sorting.functor
+sorting.slots unicode.categories ;
IN: sequence-parser
TUPLE: sequence-parser sequence n ;
: skip-whitespace-eol ( sequence-parser -- sequence-parser )
[ [ current " \t\r" member? not ] take-until drop ] keep ;
-: take-c-comment ( sequence-parser -- seq/f )
- [
- dup "/*" take-sequence [
- "*/" take-until-sequence*
- ] [
- drop f
- ] if
- ] with-sequence-parser ;
-
-: take-c++-comment ( sequence-parser -- seq/f )
- [
- dup "//" take-sequence [
- [
- [
- { [ current CHAR: \n = ] [ sequence-parse-end? ] } 1||
- ] take-until
- ] [
- advance drop
- ] bi
- ] [
- drop f
- ] if
- ] with-sequence-parser ;
-
-: skip-whitespace/comments ( sequence-parser -- sequence-parser )
- skip-whitespace-eol
- {
- { [ dup take-c-comment ] [ skip-whitespace/comments ] }
- { [ dup take-c++-comment ] [ skip-whitespace/comments ] }
- [ ]
- } cond ;
-
-: take-define-identifier ( sequence-parser -- string )
- skip-whitespace/comments
- [ current { [ blank? ] [ CHAR: ( = ] } 1|| ] take-until ;
-
: take-rest-slice ( sequence-parser -- sequence/f )
[ sequence>> ] [ n>> ] bi
2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline
: parse-sequence ( sequence quot -- )
[ <sequence-parser> ] dip call ; inline
-:: take-quoted-string ( sequence-parser escape-char quote-char -- string )
- sequence-parser n>> :> start-n
- sequence-parser advance
- [
- {
- [ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
- [ current quote-char = not ]
- } 1||
- ] take-while :> string
- sequence-parser current quote-char = [
- sequence-parser advance* string
- ] [
- start-n sequence-parser (>>n) f
- ] if ;
-
-: (take-token) ( sequence-parser -- string )
- skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
-
-:: take-token* ( sequence-parser escape-char quote-char -- string/f )
- sequence-parser skip-whitespace
- dup current {
- { quote-char [ escape-char quote-char take-quoted-string ] }
- { f [ drop f ] }
- [ drop (take-token) ]
- } case ;
-
-: take-token ( sequence-parser -- string/f )
- CHAR: \ CHAR: " take-token* ;
-
: take-integer ( sequence-parser -- n/f )
[ current digit? ] take-while ;
sequence-parser [ n + ] change-n drop
] if ;
-: c-identifier-begin? ( ch -- ? )
- CHAR: a CHAR: z [a,b]
- CHAR: A CHAR: Z [a,b]
- { CHAR: _ } 3append member? ;
-
-: c-identifier-ch? ( ch -- ? )
- CHAR: a CHAR: z [a,b]
- CHAR: A CHAR: Z [a,b]
- CHAR: 0 CHAR: 9 [a,b]
- { CHAR: _ } 4 nappend member? ;
-
-: (take-c-identifier) ( sequence-parser -- string/f )
- dup current c-identifier-begin? [
- [ current c-identifier-ch? ] take-while
- ] [
- drop f
- ] if ;
-
-: take-c-identifier ( sequence-parser -- string/f )
- [ (take-c-identifier) ] with-sequence-parser ;
-
<< "length" [ length ] define-sorting >>
: sort-tokens ( seq -- seq' )
swap
'[ _ [ swap take-sequence ] with-sequence-parser ] find nip ;
-
: take-longest ( sequence-parser seq -- seq )
sort-tokens take-first-matching ;
-: take-c-integer ( sequence-parser -- string/f )
- [
- dup take-integer [
- swap
- { "ull" "uLL" "Ull" "ULL" "ll" "LL" "l" "L" "u" "U" }
- take-longest [ append ] when*
- ] [
- drop f
- ] if*
- ] with-sequence-parser ;
-
-CONSTANT: c-punctuators
- {
- "[" "]" "(" ")" "{" "}" "." "->"
- "++" "--" "&" "*" "+" "-" "~" "!"
- "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||"
- "?" ":" ";" "..."
- "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
- "," "#" "##"
- "<:" ":>" "<%" "%>" "%:" "%:%:"
- }
-
-: take-c-punctuator ( sequence-parser -- string/f )
- c-punctuators take-longest ;
-
: write-full ( sequence-parser -- ) sequence>> write ;
: write-rest ( sequence-parser -- ) take-rest write ;