1 ! Copyright (C) 2004 Chris Double.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays kernel lists lists.lazy math promises
4 sequences sequences.deep strings unicode ;
7 ! Parser combinator protocol
8 GENERIC: parse ( input parser -- list )
10 M: promise parse ( input parser -- list )
13 TUPLE: parse-result parsed unparsed ;
15 ERROR: cannot-parse input ;
17 : parse-1 ( input parser -- result )
24 C: <parse-result> parse-result
26 : <parse-results> ( parsed unparsed -- list )
27 <parse-result> 1list ;
29 : parse-result-parsed-slice ( parse-result -- slice )
31 unparsed>> 0 0 rot <slice>
34 dup from>> [ rot parsed>> length - ] keep
38 : string= ( str1 str2 ignore-case -- ? )
39 [ [ >upper ] bi@ ] when sequence= ;
41 : string-head? ( str head ignore-case -- ? )
45 [ [ length head-slice ] keep ] dip string=
48 : ?string-head ( str head ignore-case -- newstr ? )
49 [ 2dup ] dip string-head?
50 [ length tail-slice t ] [ drop f ] if ;
52 TUPLE: token-parser string ignore-case? ;
54 C: <token-parser> token-parser
56 : token ( string -- parser ) f <token-parser> ;
58 : case-insensitive-token ( string -- parser ) t <token-parser> ;
60 M:: token-parser parse ( input parser -- list )
61 parser string>> :> str
62 parser ignore-case?>> :> case?
64 str input str case? ?string-head
65 [ <parse-results> ] [ 2drop nil ] if ;
67 : 1token ( n -- parser ) 1string token ;
69 TUPLE: satisfy-parser quot ;
71 C: satisfy satisfy-parser
73 M: satisfy-parser parse ( input parser -- list )
74 ! A parser that succeeds if the predicate,
75 ! when passed the first character in the input, returns
80 quot>> [ unclip-slice dup ] dip call( char -- ? )
81 [ swap <parse-results> ] [ 2drop nil ] if
84 LAZY: any-char-parser ( -- parser )
87 TUPLE: epsilon-parser ;
89 C: epsilon epsilon-parser
91 M: epsilon-parser parse ( input parser -- list )
92 ! A parser that parses the empty string. It
93 ! does not consume any input and always returns
94 ! an empty list as the parse tree with the
96 drop "" swap <parse-results> ;
98 TUPLE: succeed-parser result ;
100 C: succeed succeed-parser
102 M: succeed-parser parse ( input parser -- list )
103 ! A parser that always returns 'result' as a
104 ! successful parse with no input consumed.
105 result>> swap <parse-results> ;
111 M: fail-parser parse ( input parser -- list )
112 ! A parser that always fails and returns
113 ! an empty list of successes.
116 TUPLE: ensure-parser test ;
118 : ensure ( parser -- ensure )
121 M: ensure-parser parse ( input parser -- list )
122 2dup test>> parse nil?
123 [ 2drop nil ] [ drop t swap <parse-results> ] if ;
125 TUPLE: ensure-not-parser test ;
127 : ensure-not ( parser -- ensure )
128 ensure-not-parser boa ;
130 M: ensure-not-parser parse ( input parser -- list )
131 2dup test>> parse nil?
132 [ drop t swap <parse-results> ] [ 2drop nil ] if ;
134 TUPLE: and-parser parsers ;
136 : <&> ( parser1 parser2 -- parser )
138 [ parsers>> ] dip suffix
141 ] if and-parser boa ;
143 : <and-parser> ( parsers -- parser )
144 dup length 1 = [ first ] [ and-parser boa ] if ;
146 : and-parser-parse ( list p1 -- list )
148 dup unparsed>> rot parse
151 [ parsed>> 2array ] keep
152 unparsed>> <parse-result>
154 ] with lmap-lazy lconcat ;
156 M: and-parser parse ( input parser -- list )
157 ! Parse 'input' by sequentially combining the
158 ! two parsers. First parser1 is applied to the
159 ! input then parser2 is applied to the rest of
160 ! the input strings from the first parser.
161 parsers>> unclip swapd parse
162 [ [ and-parser-parse ] reduce ] 2curry <promise> ;
164 TUPLE: or-parser parsers ;
166 : <or-parser> ( parsers -- parser )
167 dup length 1 = [ first ] [ or-parser boa ] if ;
169 : <|> ( parser1 parser2 -- parser )
172 M: or-parser parse ( input parser1 -- list )
173 ! Return the combined list resulting from the parses
174 ! of parser1 and parser2 being applied to the same
175 ! input. This implements the choice parsing operator.
176 parsers>> sequence>list
177 [ parse ] with lmap-lazy lconcat ;
179 : trim-head-slice ( string -- string )
180 ! Return a new string without any leading whitespace
181 ! from the original string.
183 dup first blank? [ rest-slice trim-head-slice ] when
186 TUPLE: sp-parser p1 ;
188 ! Return a parser that first skips all whitespace before
189 ! calling the original parser.
192 M: sp-parser parse ( input parser -- list )
193 ! Skip all leading whitespace from the input then call
194 ! the parser on the remaining input.
195 [ trim-head-slice ] dip p1>> parse ;
197 TUPLE: just-parser p1 ;
201 M: just-parser parse ( input parser -- result )
202 ! Calls the given parser on the input removes
203 ! from the results anything where the remaining
204 ! input to be parsed is not empty. So ensures a
205 ! fully parsed input string.
206 p1>> parse [ unparsed>> empty? ] lfilter ;
208 TUPLE: apply-parser p1 quot ;
212 M: apply-parser parse ( input parser -- result )
213 ! Calls the parser on the input. For each successful
214 ! parse the quot is call with the parse result on the stack.
215 ! The result of that quotation then becomes the new parse result.
216 ! This allows modification of parse tree results (like
217 ! converting strings to integers, etc).
218 [ p1>> ] [ quot>> ] bi
220 [ parsed>> swap call ] keep
221 unparsed>> <parse-result>
224 TUPLE: some-parser p1 ;
228 M: some-parser parse ( input parser -- result )
229 ! Calls the parser on the input, guarantees
230 ! the parse is complete (the remaining input is empty),
231 ! picks the first solution and only returns the parse
232 ! tree since the remaining input is empty.
235 : <& ( parser1 parser2 -- parser )
236 ! Same as <&> except discard the results of the second parser.
239 : &> ( parser1 parser2 -- parser )
240 ! Same as <&> except discard the results of the first parser.
243 : <:&> ( parser1 parser2 -- result )
244 ! Same as <&> except flatten the result.
245 <&> [ first2 suffix ] <@ ;
247 : <&:> ( parser1 parser2 -- result )
248 ! Same as <&> except flatten the result.
249 <&> [ first2 swap prefix ] <@ ;
251 : <:&:> ( parser1 parser2 -- result )
252 ! Same as <&> except flatten the result.
253 <&> [ first2 append ] <@ ;
255 LAZY: <*> ( parser -- parser )
256 dup <*> <&:> { } succeed <|> ;
258 : <+> ( parser -- parser )
259 ! Return a parser that accepts one or more occurrences of the original
263 LAZY: <?> ( parser -- parser )
264 ! Return a parser that optionally uses the parser
265 ! if that parser would be successful.
266 [ 1array ] <@ f succeed <|> ;
268 TUPLE: only-first-parser p1 ;
270 LAZY: only-first ( parser -- parser )
271 only-first-parser boa ;
273 M: only-first-parser parse ( input parser -- list )
274 ! Transform a parser into a parser that only yields
275 ! the first possibility.
276 p1>> parse 1 swap ltake ;
278 LAZY: <!*> ( parser -- parser )
279 ! Like <*> but only return one possible result
280 ! containing all matching parses. Does not return
281 ! partial matches. Useful for efficiency since that's
282 ! usually the effect you want and cuts down on backtracking
286 LAZY: <!+> ( parser -- parser )
287 ! Like <+> but only return one possible result
288 ! containing all matching parses. Does not return
289 ! partial matches. Useful for efficiency since that's
290 ! usually the effect you want and cuts down on backtracking
294 LAZY: <!?> ( parser -- parser )
295 ! Like <?> but only return one possible result
296 ! containing all matching parses. Does not return
297 ! partial matches. Useful for efficiency since that's
298 ! usually the effect you want and cuts down on backtracking
302 LAZY: <(?)> ( parser -- parser )
303 ! Like <?> but take shortest match first.
304 f succeed swap [ 1array ] <@ <|> ;
306 LAZY: <(*)> ( parser -- parser )
307 ! Like <*> but take shortest match first.
308 ! Implementation by Matthew Willis.
309 { } succeed swap dup <(*)> <&:> <|> ;
311 LAZY: <(+)> ( parser -- parser )
312 ! Like <+> but take shortest match first.
313 ! Implementation by Matthew Willis.
316 : pack ( close body open -- parser )
317 ! Parse a construct enclosed by two symbols,
318 ! given a parser for the opening symbol, the
319 ! closing symbol, and the body.
322 : nonempty-list-of ( items separator -- parser )
323 [ over &> <*> <&:> ] keep <?> [ nip ] 2keep pack ;
325 : list-of ( items separator -- parser )
326 ! Given a parser for the separator and for the
327 ! items themselves, return a parser that parses
328 ! lists of those items. The parse tree is an
329 ! array of the parsed items.
330 nonempty-list-of { } succeed <|> ;
332 LAZY: surrounded-by ( parser start end -- parser' )
333 [ token ] bi@ swapd pack ;
335 : exactly-n ( parser n -- parser' )
336 swap <repetition> <and-parser> [ flatten ] <@ ;
338 : at-most-n ( parser n -- parser' )
343 -rot 1 - at-most-n <|>
346 : at-least-n ( parser n -- parser' )
347 dupd exactly-n swap <*> <&> ;
349 : from-m-to-n ( parser m n -- parser' )
350 [ [ exactly-n ] 2keep ] dip swap - at-most-n <:&:> ;