]> gitweb.factorcode.org Git - factor.git/blob - extra/c/lexer/lexer.factor
factor: Move math.ranges => ranges.
[factor.git] / extra / c / lexer / lexer.factor
1 ! Copyright (C) 2009 Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors combinators combinators.short-circuit
4 generalizations kernel locals math.order ranges
5 sequences.parser sequences sequences.generalizations
6 sorting.functor sorting.slots unicode ;
7 IN: c.lexer
8
9 : take-c-comment ( sequence-parser -- seq/f )
10     [
11         dup "/*" take-sequence [
12             "*/" take-until-sequence*
13         ] [
14             drop f
15         ] if
16     ] with-sequence-parser ;
17
18 : take-c++-comment ( sequence-parser -- seq/f )
19     [
20         dup "//" take-sequence [
21             [
22                 [
23                     { [ current CHAR: \n = ] [ sequence-parse-end? ] } 1||
24                 ] take-until
25             ] [
26                 advance drop
27             ] bi
28         ] [
29             drop f
30         ] if
31     ] with-sequence-parser ;
32
33 : skip-whitespace/comments ( sequence-parser -- sequence-parser )
34     skip-whitespace-eol
35     {
36         { [ dup take-c-comment ] [ skip-whitespace/comments ] }
37         { [ dup take-c++-comment ] [ skip-whitespace/comments ] }
38         [ ]
39     } cond ;
40
41 : take-define-identifier ( sequence-parser -- string )
42     skip-whitespace/comments
43     [ current { [ blank? ] [ CHAR: ( = ] } 1|| ] take-until ;
44
45 :: take-quoted-string ( sequence-parser escape-char quote-char -- string )
46     sequence-parser n>> :> start-n
47     sequence-parser advance
48     [
49         {
50             [ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
51             [ current quote-char = not ]
52         } 1||
53     ] take-while :> string
54     sequence-parser current quote-char = [
55         sequence-parser advance* string
56     ] [
57         start-n sequence-parser n<< f
58     ] if ;
59
60 : (take-token) ( sequence-parser -- string )
61     skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
62
63 :: take-token* ( sequence-parser escape-char quote-char -- string/f )
64     sequence-parser skip-whitespace
65     dup current {
66         { quote-char [ escape-char quote-char take-quoted-string ] }
67         { f [ drop f ] }
68         [ drop (take-token) ]
69     } case ;
70
71 : take-token ( sequence-parser -- string/f )
72     CHAR: \ CHAR: \" take-token* ;
73
74 : c-identifier-begin? ( ch -- ? )
75     CHAR: a CHAR: z [a..b]
76     CHAR: A CHAR: Z [a..b]
77     { CHAR: _ } 3append member? ;
78
79 : c-identifier-ch? ( ch -- ? )
80     CHAR: a CHAR: z [a..b]
81     CHAR: A CHAR: Z [a..b]
82     CHAR: 0 CHAR: 9 [a..b]
83     { CHAR: _ } 4 nappend member? ;
84
85 : (take-c-identifier) ( sequence-parser -- string/f )
86     dup current c-identifier-begin? [
87         [ current c-identifier-ch? ] take-while
88     ] [
89         drop f
90     ] if ;
91
92 : take-c-identifier ( sequence-parser -- string/f )
93     [ (take-c-identifier) ] with-sequence-parser ;
94
95 << "length" [ length ] define-sorting >>
96
97 : sort-tokens ( seq -- seq' )
98     { length>=< <=> } sort-by ;
99
100 : take-c-integer ( sequence-parser -- string/f )
101     [
102         dup take-integer [
103             swap
104             { "ull" "uLL" "Ull" "ULL" "ll" "LL" "l" "L" "u" "U" }
105             take-longest [ append ] when*
106         ] [
107             drop f
108         ] if*
109     ] with-sequence-parser ;
110
111 CONSTANT: c-punctuators
112     {
113         "[" "]" "(" ")" "{" "}" "." "->"
114         "++" "--" "&" "*" "+" "-" "~" "!"
115         "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||"
116         "?" ":" ";" "..."
117         "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
118         "," "#" "##"
119         "<:" ":>" "<%" "%>" "%:" "%:%:"
120     }
121
122 : take-c-punctuator ( sequence-parser -- string/f )
123     c-punctuators take-longest ;