]> gitweb.factorcode.org Git - factor.git/blob - extra/c/lexer/lexer.factor
Switch to https urls
[factor.git] / extra / c / lexer / lexer.factor
1 ! Copyright (C) 2009 Doug Coleman.
2 ! See https://factorcode.org/license.txt for BSD license.
3 USING: accessors combinators combinators.short-circuit kernel
4 math.order ranges sequences sequences.generalizations
5 sequences.parser sorting.functor sorting.slots unicode ;
6 IN: c.lexer
7
8 : take-c-comment ( sequence-parser -- seq/f )
9     [
10         dup "/*" take-sequence [
11             "*/" take-until-sequence*
12         ] [
13             drop f
14         ] if
15     ] with-sequence-parser ;
16
17 : take-c++-comment ( sequence-parser -- seq/f )
18     [
19         dup "//" take-sequence [
20             [
21                 [
22                     { [ current CHAR: \n = ] [ sequence-parse-end? ] } 1||
23                 ] take-until
24             ] [
25                 advance drop
26             ] bi
27         ] [
28             drop f
29         ] if
30     ] with-sequence-parser ;
31
32 : skip-whitespace/comments ( sequence-parser -- sequence-parser )
33     skip-whitespace-eol
34     {
35         { [ dup take-c-comment ] [ skip-whitespace/comments ] }
36         { [ dup take-c++-comment ] [ skip-whitespace/comments ] }
37         [ ]
38     } cond ;
39
40 : take-define-identifier ( sequence-parser -- string )
41     skip-whitespace/comments
42     [ current { [ blank? ] [ CHAR: ( = ] } 1|| ] take-until ;
43
44 :: take-quoted-string ( sequence-parser escape-char quote-char -- string )
45     sequence-parser n>> :> start-n
46     sequence-parser advance
47     [
48         {
49             [ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
50             [ current quote-char = not ]
51         } 1||
52     ] take-while :> string
53     sequence-parser current quote-char = [
54         sequence-parser advance* string
55     ] [
56         start-n sequence-parser n<< f
57     ] if ;
58
59 : (take-token) ( sequence-parser -- string )
60     skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
61
62 :: take-token* ( sequence-parser escape-char quote-char -- string/f )
63     sequence-parser skip-whitespace
64     dup current {
65         { quote-char [ escape-char quote-char take-quoted-string ] }
66         { f [ drop f ] }
67         [ drop (take-token) ]
68     } case ;
69
70 : take-token ( sequence-parser -- string/f )
71     CHAR: \ CHAR: \" take-token* ;
72
73 : c-identifier-begin? ( ch -- ? )
74     CHAR: a CHAR: z [a..b]
75     CHAR: A CHAR: Z [a..b]
76     { CHAR: _ } 3append member? ;
77
78 : c-identifier-ch? ( ch -- ? )
79     CHAR: a CHAR: z [a..b]
80     CHAR: A CHAR: Z [a..b]
81     CHAR: 0 CHAR: 9 [a..b]
82     { CHAR: _ } 4 nappend member? ;
83
84 : (take-c-identifier) ( sequence-parser -- string/f )
85     dup current c-identifier-begin? [
86         [ current c-identifier-ch? ] take-while
87     ] [
88         drop f
89     ] if ;
90
91 : take-c-identifier ( sequence-parser -- string/f )
92     [ (take-c-identifier) ] with-sequence-parser ;
93
94 << "length" [ length ] define-sorting >>
95
96 : sort-tokens ( seq -- seq' )
97     { length>=< <=> } sort-by ;
98
99 : take-c-integer ( sequence-parser -- string/f )
100     [
101         dup take-integer [
102             swap
103             { "ull" "uLL" "Ull" "ULL" "ll" "LL" "l" "L" "u" "U" }
104             take-longest [ append ] when*
105         ] [
106             drop f
107         ] if*
108     ] with-sequence-parser ;
109
110 CONSTANT: c-punctuators
111     {
112         "[" "]" "(" ")" "{" "}" "." "->"
113         "++" "--" "&" "*" "+" "-" "~" "!"
114         "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||"
115         "?" ":" ";" "..."
116         "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
117         "," "#" "##"
118         "<:" ":>" "<%" "%>" "%:" "%:%:"
119     }
120
121 : take-c-punctuator ( sequence-parser -- string/f )
122     c-punctuators take-longest ;