1 ! Copyright (C) 2004 Chris Double.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: lazy-lists promises kernel sequences strings math
4 arrays splitting quotations combinators namespaces
5 unicode.case unicode.categories sequences.deep ;
8 ! Parser combinator protocol
9 GENERIC: parse ( input parser -- list )
11 M: promise parse ( input parser -- list )
14 TUPLE: parse-result parsed unparsed ;
16 : parse-1 ( input parser -- result )
18 "Cannot parse " rot append throw
20 nip car parse-result-parsed
23 C: <parse-result> parse-result
25 : <parse-results> ( parsed unparsed -- list )
26 <parse-result> 1list ;
28 : parse-result-parsed-slice ( parse-result -- slice )
29 dup parse-result-parsed empty? [
30 parse-result-unparsed 0 0 rot <slice>
32 dup parse-result-unparsed
33 dup slice-from [ rot parse-result-parsed length - ] keep
37 : string= ( str1 str2 ignore-case -- ? )
38 [ [ >upper ] bi@ ] when sequence= ;
40 : string-head? ( str head ignore-case -- ? )
44 >r [ length head-slice ] keep r> string=
47 : ?string-head ( str head ignore-case -- newstr ? )
48 >r 2dup r> string-head?
49 [ length tail-slice t ] [ drop f ] if ;
51 TUPLE: token-parser string ignore-case? ;
53 C: <token-parser> token-parser
55 : token ( string -- parser ) f <token-parser> ;
57 : case-insensitive-token ( string -- parser ) t <token-parser> ;
59 M: token-parser parse ( input parser -- list )
60 dup token-parser-string swap token-parser-ignore-case?
61 >r tuck r> ?string-head
62 [ <parse-results> ] [ 2drop nil ] if ;
64 : 1token ( n -- parser ) 1string token ;
66 TUPLE: satisfy-parser quot ;
68 C: satisfy satisfy-parser ( quot -- parser )
70 M: satisfy-parser parse ( input parser -- list )
71 #! A parser that succeeds if the predicate,
72 #! when passed the first character in the input, returns
77 satisfy-parser-quot >r unclip-slice dup r> call
78 [ swap <parse-results> ] [ 2drop nil ] if
81 LAZY: any-char-parser ( -- parser )
84 TUPLE: epsilon-parser ;
86 C: epsilon epsilon-parser ( -- parser )
88 M: epsilon-parser parse ( input parser -- list )
89 #! A parser that parses the empty string. It
90 #! does not consume any input and always returns
91 #! an empty list as the parse tree with the
93 drop "" swap <parse-results> ;
95 TUPLE: succeed-parser result ;
97 C: succeed succeed-parser ( result -- parser )
99 M: succeed-parser parse ( input parser -- list )
100 #! A parser that always returns 'result' as a
101 #! successful parse with no input consumed.
102 succeed-parser-result swap <parse-results> ;
106 C: fail fail-parser ( -- parser )
108 M: fail-parser parse ( input parser -- list )
109 #! A parser that always fails and returns
110 #! an empty list of successes.
113 TUPLE: ensure-parser test ;
115 : ensure ( parser -- ensure )
118 M: ensure-parser parse ( input parser -- list )
119 2dup ensure-parser-test parse nil?
120 [ 2drop nil ] [ drop t swap <parse-results> ] if ;
122 TUPLE: ensure-not-parser test ;
124 : ensure-not ( parser -- ensure )
125 ensure-not-parser boa ;
127 M: ensure-not-parser parse ( input parser -- list )
128 2dup ensure-not-parser-test parse nil?
129 [ drop t swap <parse-results> ] [ 2drop nil ] if ;
131 TUPLE: and-parser parsers ;
133 : <&> ( parser1 parser2 -- parser )
135 >r and-parser-parsers r> suffix
138 ] if and-parser boa ;
140 : <and-parser> ( parsers -- parser )
141 dup length 1 = [ first ] [ and-parser boa ] if ;
143 : and-parser-parse ( list p1 -- list )
145 dup parse-result-unparsed rot parse
147 >r parse-result-parsed r>
148 [ parse-result-parsed 2array ] keep
149 parse-result-unparsed <parse-result>
151 ] lmap-with lconcat ;
153 M: and-parser parse ( input parser -- list )
154 #! Parse 'input' by sequentially combining the
155 #! two parsers. First parser1 is applied to the
156 #! input then parser2 is applied to the rest of
157 #! the input strings from the first parser.
158 and-parser-parsers unclip swapd parse
159 [ [ and-parser-parse ] reduce ] 2curry promise ;
161 TUPLE: or-parser parsers ;
163 : <or-parser> ( parsers -- parser )
164 dup length 1 = [ first ] [ or-parser boa ] if ;
166 : <|> ( parser1 parser2 -- parser )
169 M: or-parser parse ( input parser1 -- list )
170 #! Return the combined list resulting from the parses
171 #! of parser1 and parser2 being applied to the same
172 #! input. This implements the choice parsing operator.
173 or-parser-parsers 0 swap seq>list
174 [ parse ] lmap-with lconcat ;
176 : left-trim-slice ( string -- string )
177 #! Return a new string without any leading whitespace
178 #! from the original string.
180 dup first blank? [ rest-slice left-trim-slice ] when
183 TUPLE: sp-parser p1 ;
185 #! Return a parser that first skips all whitespace before
186 #! calling the original parser.
187 C: sp sp-parser ( p1 -- parser )
189 M: sp-parser parse ( input parser -- list )
190 #! Skip all leading whitespace from the input then call
191 #! the parser on the remaining input.
192 >r left-trim-slice r> sp-parser-p1 parse ;
194 TUPLE: just-parser p1 ;
196 C: just just-parser ( p1 -- parser )
198 M: just-parser parse ( input parser -- result )
199 #! Calls the given parser on the input removes
200 #! from the results anything where the remaining
201 #! input to be parsed is not empty. So ensures a
202 #! fully parsed input string.
203 just-parser-p1 parse [ parse-result-unparsed empty? ] lfilter ;
205 TUPLE: apply-parser p1 quot ;
207 C: <@ apply-parser ( parser quot -- parser )
209 M: apply-parser parse ( input parser -- result )
210 #! Calls the parser on the input. For each successful
211 #! parse the quot is call with the parse result on the stack.
212 #! The result of that quotation then becomes the new parse result.
213 #! This allows modification of parse tree results (like
214 #! converting strings to integers, etc).
215 [ apply-parser-p1 ] keep apply-parser-quot
217 [ parse-result-parsed swap call ] keep
218 parse-result-unparsed <parse-result>
221 TUPLE: some-parser p1 ;
223 C: some some-parser ( p1 -- parser )
225 M: some-parser parse ( input parser -- result )
226 #! Calls the parser on the input, guarantees
227 #! the parse is complete (the remaining input is empty),
228 #! picks the first solution and only returns the parse
229 #! tree since the remaining input is empty.
230 some-parser-p1 just parse-1 ;
232 : <& ( parser1 parser2 -- parser )
233 #! Same as <&> except discard the results of the second parser.
236 : &> ( parser1 parser2 -- parser )
237 #! Same as <&> except discard the results of the first parser.
240 : <:&> ( parser1 parser2 -- result )
241 #! Same as <&> except flatten the result.
242 <&> [ first2 suffix ] <@ ;
244 : <&:> ( parser1 parser2 -- result )
245 #! Same as <&> except flatten the result.
246 <&> [ first2 swap prefix ] <@ ;
248 : <:&:> ( parser1 parser2 -- result )
249 #! Same as <&> except flatten the result.
250 <&> [ first2 append ] <@ ;
252 LAZY: <*> ( parser -- parser )
253 dup <*> <&:> { } succeed <|> ;
255 : <+> ( parser -- parser )
256 #! Return a parser that accepts one or more occurences of the original
260 LAZY: <?> ( parser -- parser )
261 #! Return a parser that optionally uses the parser
262 #! if that parser would be successful.
263 [ 1array ] <@ f succeed <|> ;
265 TUPLE: only-first-parser p1 ;
267 LAZY: only-first ( parser -- parser )
268 only-first-parser boa ;
270 M: only-first-parser parse ( input parser -- list )
271 #! Transform a parser into a parser that only yields
272 #! the first possibility.
273 only-first-parser-p1 parse 1 swap ltake ;
275 LAZY: <!*> ( parser -- parser )
276 #! Like <*> but only return one possible result
277 #! containing all matching parses. Does not return
278 #! partial matches. Useful for efficiency since that's
279 #! usually the effect you want and cuts down on backtracking
283 LAZY: <!+> ( parser -- parser )
284 #! Like <+> but only return one possible result
285 #! containing all matching parses. Does not return
286 #! partial matches. Useful for efficiency since that's
287 #! usually the effect you want and cuts down on backtracking
291 LAZY: <!?> ( parser -- parser )
292 #! Like <?> but only return one possible result
293 #! containing all matching parses. Does not return
294 #! partial matches. Useful for efficiency since that's
295 #! usually the effect you want and cuts down on backtracking
299 LAZY: <(?)> ( parser -- parser )
300 #! Like <?> but take shortest match first.
301 f succeed swap [ 1array ] <@ <|> ;
303 LAZY: <(*)> ( parser -- parser )
304 #! Like <*> but take shortest match first.
305 #! Implementation by Matthew Willis.
306 { } succeed swap dup <(*)> <&:> <|> ;
308 LAZY: <(+)> ( parser -- parser )
309 #! Like <+> but take shortest match first.
310 #! Implementation by Matthew Willis.
313 : pack ( close body open -- parser )
314 #! Parse a construct enclosed by two symbols,
315 #! given a parser for the opening symbol, the
316 #! closing symbol, and the body.
319 : nonempty-list-of ( items separator -- parser )
320 [ over &> <*> <&:> ] keep <?> tuck pack ;
322 : list-of ( items separator -- parser )
323 #! Given a parser for the separator and for the
324 #! items themselves, return a parser that parses
325 #! lists of those items. The parse tree is an
326 #! array of the parsed items.
327 nonempty-list-of { } succeed <|> ;
329 LAZY: surrounded-by ( parser start end -- parser' )
330 [ token ] bi@ swapd pack ;
332 : exactly-n ( parser n -- parser' )
333 swap <repetition> <and-parser> [ flatten ] <@ ;
335 : at-most-n ( parser n -- parser' )
340 -rot 1- at-most-n <|>
343 : at-least-n ( parser n -- parser' )
344 dupd exactly-n swap <*> <&> ;
346 : from-m-to-n ( parser m n -- parser' )
347 >r [ exactly-n ] 2keep r> swap - at-most-n <:&:> ;