1 ! Copyright (C) 2005, 2009 Daniel Ehrenberg, Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: namespaces math kernel sequences accessors fry circular
4 unicode.case unicode.categories locals combinators.short-circuit
5 make combinators io splitting math.parser math.ranges
6 generalizations sorting.functor math.order sorting.slots ;
9 TUPLE: sequence-parser sequence n ;
11 : <sequence-parser> ( sequence -- sequence-parser )
16 :: with-sequence-parser ( sequence-parser quot -- seq/f )
17 sequence-parser n>> :> n
18 sequence-parser quot call [
19 n sequence-parser (>>n) f
22 : offset ( sequence-parser offset -- char/f )
24 [ n>> + ] [ sequence>> ?nth ] bi ; inline
26 : current ( sequence-parser -- char/f ) 0 offset ; inline
28 : previous ( sequence-parser -- char/f ) -1 offset ; inline
30 : peek-next ( sequence-parser -- char/f ) 1 offset ; inline
32 : advance ( sequence-parser -- sequence-parser )
33 [ 1 + ] change-n ; inline
35 : advance* ( sequence-parser -- )
38 : get+increment ( sequence-parser -- char/f )
39 [ current ] [ advance drop ] bi ; inline
41 :: skip-until ( sequence-parser quot: ( obj -- ? ) -- )
42 sequence-parser current [
43 sequence-parser quot call
44 [ sequence-parser advance quot skip-until ] unless
45 ] when ; inline recursive
47 : sequence-parse-end? ( sequence-parser -- ? ) current not ;
49 : take-until ( sequence-parser quot: ( obj -- ? ) -- sequence/f )
50 over sequence-parse-end? [
55 [ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq f like
58 : take-while ( sequence-parser quot: ( obj -- ? ) -- sequence/f )
59 [ not ] compose take-until ; inline
61 : <safe-slice> ( from to seq -- slice/f )
64 [ [ drop ] 2dip length > ]
66 } 3|| [ 3drop f ] [ slice boa ] if ; inline
68 :: take-sequence ( sequence-parser sequence -- obj/f )
69 sequence-parser [ n>> dup sequence length + ] [ sequence>> ] bi
70 <safe-slice> sequence sequence= [
72 sequence-parser [ sequence length + ] change-n drop
77 : take-sequence* ( sequence-parser sequence -- )
80 :: take-until-sequence ( sequence-parser sequence -- sequence'/f )
81 sequence-parser n>> :> saved
82 sequence length <growing-circular> :> growing
85 current growing push-growing-circular
86 sequence growing sequence=
88 growing sequence sequence= [
90 growing length 1- - head
91 sequence-parser [ growing length - 1 + ] change-n drop
92 ! sequence-parser advance drop
94 saved sequence-parser (>>n)
98 :: take-until-sequence* ( sequence-parser sequence -- sequence'/f )
99 sequence-parser sequence take-until-sequence :> out
101 sequence-parser [ sequence length + ] change-n drop
104 : skip-whitespace ( sequence-parser -- sequence-parser )
105 [ [ current blank? not ] take-until drop ] keep ;
107 : skip-whitespace-eol ( sequence-parser -- sequence-parser )
108 [ [ current " \t\r" member? not ] take-until drop ] keep ;
110 : take-c-comment ( sequence-parser -- seq/f )
112 dup "/*" take-sequence [
113 "*/" take-until-sequence*
117 ] with-sequence-parser ;
119 : take-c++-comment ( sequence-parser -- seq/f )
121 dup "//" take-sequence [
124 { [ current CHAR: \n = ] [ sequence-parse-end? ] } 1||
132 ] with-sequence-parser ;
134 : skip-whitespace/comments ( sequence-parser -- sequence-parser )
137 { [ dup take-c-comment ] [ skip-whitespace/comments ] }
138 { [ dup take-c++-comment ] [ skip-whitespace/comments ] }
142 : take-define-identifier ( sequence-parser -- string )
143 skip-whitespace/comments
144 [ current { [ blank? ] [ CHAR: ( = ] } 1|| ] take-until ;
146 : take-rest-slice ( sequence-parser -- sequence/f )
147 [ sequence>> ] [ n>> ] bi
148 2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline
150 : take-rest ( sequence-parser -- sequence )
151 [ take-rest-slice ] [ sequence>> like ] bi ;
153 : take-until-object ( sequence-parser obj -- sequence )
154 '[ current _ = ] take-until ;
156 : parse-sequence ( sequence quot -- )
157 [ <sequence-parser> ] dip call ; inline
159 :: take-quoted-string ( sequence-parser escape-char quote-char -- string )
160 sequence-parser n>> :> start-n
161 sequence-parser advance
164 [ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
165 [ current quote-char = not ]
167 ] take-while :> string
168 sequence-parser current quote-char = [
169 sequence-parser advance* string
171 start-n sequence-parser (>>n) f
174 : (take-token) ( sequence-parser -- string )
175 skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
177 :: take-token* ( sequence-parser escape-char quote-char -- string/f )
178 sequence-parser skip-whitespace
180 { quote-char [ escape-char quote-char take-quoted-string ] }
182 [ drop (take-token) ]
185 : take-token ( sequence-parser -- string/f )
186 CHAR: \ CHAR: " take-token* ;
188 : take-integer ( sequence-parser -- n/f )
189 [ current digit? ] take-while ;
191 :: take-n ( sequence-parser n -- seq/f )
192 n sequence-parser [ n>> + ] [ sequence>> length ] bi > [
195 sequence-parser n>> dup n + sequence-parser sequence>> subseq
196 sequence-parser [ n + ] change-n drop
199 : c-identifier-begin? ( ch -- ? )
200 CHAR: a CHAR: z [a,b]
201 CHAR: A CHAR: Z [a,b]
202 { CHAR: _ } 3append member? ;
204 : c-identifier-ch? ( ch -- ? )
205 CHAR: a CHAR: z [a,b]
206 CHAR: A CHAR: Z [a,b]
207 CHAR: 0 CHAR: 9 [a,b]
208 { CHAR: _ } 4 nappend member? ;
210 : (take-c-identifier) ( sequence-parser -- string/f )
211 dup current c-identifier-begin? [
212 [ current c-identifier-ch? ] take-while
217 : take-c-identifier ( sequence-parser -- string/f )
218 [ (take-c-identifier) ] with-sequence-parser ;
220 << "length" [ length ] define-sorting >>
222 : sort-tokens ( seq -- seq' )
223 { length>=< <=> } sort-by ;
225 : take-first-matching ( sequence-parser seq -- seq )
227 '[ _ [ swap take-sequence ] with-sequence-parser ] find nip ;
230 : take-longest ( sequence-parser seq -- seq )
231 sort-tokens take-first-matching ;
233 : take-c-integer ( sequence-parser -- string/f )
237 { "ull" "uLL" "Ull" "ULL" "ll" "LL" "l" "L" "u" "U" }
238 take-longest [ append ] when*
242 ] with-sequence-parser ;
244 CONSTANT: c-punctuators
246 "[" "]" "(" ")" "{" "}" "." "->"
247 "++" "--" "&" "*" "+" "-" "~" "!"
248 "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||"
250 "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
252 "<:" ":>" "<%" "%>" "%:" "%:%:"
255 : take-c-punctuator ( sequence-parser -- string/f )
256 c-punctuators take-longest ;
258 : write-full ( sequence-parser -- ) sequence>> write ;
259 : write-rest ( sequence-parser -- ) take-rest write ;