1 ! Copyright (C) 2004 Chris Double.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: lists lists.lazy promises kernel sequences strings math
4 arrays splitting quotations combinators namespaces locals
5 unicode.case unicode.categories sequences.deep accessors ;
8 ! Parser combinator protocol
9 GENERIC: parse ( input parser -- list )
11 M: promise parse ( input parser -- list )
14 TUPLE: parse-result parsed unparsed ;
16 ERROR: cannot-parse input ;
18 : parse-1 ( input parser -- result )
25 C: <parse-result> parse-result
27 : <parse-results> ( parsed unparsed -- list )
28 <parse-result> 1list ;
30 : parse-result-parsed-slice ( parse-result -- slice )
32 unparsed>> 0 0 rot <slice>
35 dup from>> [ rot parsed>> length - ] keep
39 : string= ( str1 str2 ignore-case -- ? )
40 [ [ >upper ] bi@ ] when sequence= ;
42 : string-head? ( str head ignore-case -- ? )
46 [ [ length head-slice ] keep ] dip string=
49 : ?string-head ( str head ignore-case -- newstr ? )
50 [ 2dup ] dip string-head?
51 [ length tail-slice t ] [ drop f ] if ;
53 TUPLE: token-parser string ignore-case? ;
55 C: <token-parser> token-parser
57 : token ( string -- parser ) f <token-parser> ;
59 : case-insensitive-token ( string -- parser ) t <token-parser> ;
61 M:: token-parser parse ( input parser -- list )
62 parser string>> :> str
63 parser ignore-case?>> :> case?
65 str input str case? ?string-head
66 [ <parse-results> ] [ 2drop nil ] if ;
68 : 1token ( n -- parser ) 1string token ;
70 TUPLE: satisfy-parser quot ;
72 C: satisfy satisfy-parser
74 M: satisfy-parser parse ( input parser -- list )
75 ! A parser that succeeds if the predicate,
76 ! when passed the first character in the input, returns
81 quot>> [ unclip-slice dup ] dip call( char -- ? )
82 [ swap <parse-results> ] [ 2drop nil ] if
85 LAZY: any-char-parser ( -- parser )
88 TUPLE: epsilon-parser ;
90 C: epsilon epsilon-parser
92 M: epsilon-parser parse ( input parser -- list )
93 ! A parser that parses the empty string. It
94 ! does not consume any input and always returns
95 ! an empty list as the parse tree with the
97 drop "" swap <parse-results> ;
99 TUPLE: succeed-parser result ;
101 C: succeed succeed-parser
103 M: succeed-parser parse ( input parser -- list )
104 ! A parser that always returns 'result' as a
105 ! successful parse with no input consumed.
106 result>> swap <parse-results> ;
112 M: fail-parser parse ( input parser -- list )
113 ! A parser that always fails and returns
114 ! an empty list of successes.
117 TUPLE: ensure-parser test ;
119 : ensure ( parser -- ensure )
122 M: ensure-parser parse ( input parser -- list )
123 2dup test>> parse nil?
124 [ 2drop nil ] [ drop t swap <parse-results> ] if ;
126 TUPLE: ensure-not-parser test ;
128 : ensure-not ( parser -- ensure )
129 ensure-not-parser boa ;
131 M: ensure-not-parser parse ( input parser -- list )
132 2dup test>> parse nil?
133 [ drop t swap <parse-results> ] [ 2drop nil ] if ;
135 TUPLE: and-parser parsers ;
137 : <&> ( parser1 parser2 -- parser )
139 [ parsers>> ] dip suffix
142 ] if and-parser boa ;
144 : <and-parser> ( parsers -- parser )
145 dup length 1 = [ first ] [ and-parser boa ] if ;
147 : and-parser-parse ( list p1 -- list )
149 dup unparsed>> rot parse
152 [ parsed>> 2array ] keep
153 unparsed>> <parse-result>
155 ] with lazy-map lconcat ;
157 M: and-parser parse ( input parser -- list )
158 ! Parse 'input' by sequentially combining the
159 ! two parsers. First parser1 is applied to the
160 ! input then parser2 is applied to the rest of
161 ! the input strings from the first parser.
162 parsers>> unclip swapd parse
163 [ [ and-parser-parse ] reduce ] 2curry <promise> ;
165 TUPLE: or-parser parsers ;
167 : <or-parser> ( parsers -- parser )
168 dup length 1 = [ first ] [ or-parser boa ] if ;
170 : <|> ( parser1 parser2 -- parser )
173 M: or-parser parse ( input parser1 -- list )
174 ! Return the combined list resulting from the parses
175 ! of parser1 and parser2 being applied to the same
176 ! input. This implements the choice parsing operator.
177 parsers>> sequence>list
178 [ parse ] with lazy-map lconcat ;
180 : trim-head-slice ( string -- string )
181 ! Return a new string without any leading whitespace
182 ! from the original string.
184 dup first blank? [ rest-slice trim-head-slice ] when
187 TUPLE: sp-parser p1 ;
189 #! Return a parser that first skips all whitespace before
190 #! calling the original parser.
193 M: sp-parser parse ( input parser -- list )
194 ! Skip all leading whitespace from the input then call
195 ! the parser on the remaining input.
196 [ trim-head-slice ] dip p1>> parse ;
198 TUPLE: just-parser p1 ;
202 M: just-parser parse ( input parser -- result )
203 ! Calls the given parser on the input removes
204 ! from the results anything where the remaining
205 ! input to be parsed is not empty. So ensures a
206 ! fully parsed input string.
207 p1>> parse [ unparsed>> empty? ] lfilter ;
209 TUPLE: apply-parser p1 quot ;
213 M: apply-parser parse ( input parser -- result )
214 ! Calls the parser on the input. For each successful
215 ! parse the quot is call with the parse result on the stack.
216 ! The result of that quotation then becomes the new parse result.
217 ! This allows modification of parse tree results (like
218 ! converting strings to integers, etc).
219 [ p1>> ] [ quot>> ] bi
221 [ parsed>> swap call ] keep
222 unparsed>> <parse-result>
225 TUPLE: some-parser p1 ;
229 M: some-parser parse ( input parser -- result )
230 ! Calls the parser on the input, guarantees
231 ! the parse is complete (the remaining input is empty),
232 ! picks the first solution and only returns the parse
233 ! tree since the remaining input is empty.
236 : <& ( parser1 parser2 -- parser )
237 ! Same as <&> except discard the results of the second parser.
240 : &> ( parser1 parser2 -- parser )
241 ! Same as <&> except discard the results of the first parser.
244 : <:&> ( parser1 parser2 -- result )
245 ! Same as <&> except flatten the result.
246 <&> [ first2 suffix ] <@ ;
248 : <&:> ( parser1 parser2 -- result )
249 ! Same as <&> except flatten the result.
250 <&> [ first2 swap prefix ] <@ ;
252 : <:&:> ( parser1 parser2 -- result )
253 ! Same as <&> except flatten the result.
254 <&> [ first2 append ] <@ ;
256 LAZY: <*> ( parser -- parser )
257 dup <*> <&:> { } succeed <|> ;
259 : <+> ( parser -- parser )
260 ! Return a parser that accepts one or more occurences of the original
264 LAZY: <?> ( parser -- parser )
265 ! Return a parser that optionally uses the parser
266 ! if that parser would be successful.
267 [ 1array ] <@ f succeed <|> ;
269 TUPLE: only-first-parser p1 ;
271 LAZY: only-first ( parser -- parser )
272 only-first-parser boa ;
274 M: only-first-parser parse ( input parser -- list )
275 ! Transform a parser into a parser that only yields
276 ! the first possibility.
277 p1>> parse 1 swap ltake ;
279 LAZY: <!*> ( parser -- parser )
280 ! Like <*> but only return one possible result
281 ! containing all matching parses. Does not return
282 ! partial matches. Useful for efficiency since that's
283 ! usually the effect you want and cuts down on backtracking
287 LAZY: <!+> ( parser -- parser )
288 ! Like <+> but only return one possible result
289 ! containing all matching parses. Does not return
290 ! partial matches. Useful for efficiency since that's
291 ! usually the effect you want and cuts down on backtracking
295 LAZY: <!?> ( parser -- parser )
296 ! Like <?> but only return one possible result
297 ! containing all matching parses. Does not return
298 ! partial matches. Useful for efficiency since that's
299 ! usually the effect you want and cuts down on backtracking
303 LAZY: <(?)> ( parser -- parser )
304 ! Like <?> but take shortest match first.
305 f succeed swap [ 1array ] <@ <|> ;
307 LAZY: <(*)> ( parser -- parser )
308 ! Like <*> but take shortest match first.
309 ! Implementation by Matthew Willis.
310 { } succeed swap dup <(*)> <&:> <|> ;
312 LAZY: <(+)> ( parser -- parser )
313 ! Like <+> but take shortest match first.
314 ! Implementation by Matthew Willis.
317 : pack ( close body open -- parser )
318 ! Parse a construct enclosed by two symbols,
319 ! given a parser for the opening symbol, the
320 ! closing symbol, and the body.
323 : nonempty-list-of ( items separator -- parser )
324 [ over &> <*> <&:> ] keep <?> [ nip ] 2keep pack ;
326 : list-of ( items separator -- parser )
327 ! Given a parser for the separator and for the
328 ! items themselves, return a parser that parses
329 ! lists of those items. The parse tree is an
330 ! array of the parsed items.
331 nonempty-list-of { } succeed <|> ;
333 LAZY: surrounded-by ( parser start end -- parser' )
334 [ token ] bi@ swapd pack ;
336 : exactly-n ( parser n -- parser' )
337 swap <repetition> <and-parser> [ flatten ] <@ ;
339 : at-most-n ( parser n -- parser' )
344 -rot 1 - at-most-n <|>
347 : at-least-n ( parser n -- parser' )
348 dupd exactly-n swap <*> <&> ;
350 : from-m-to-n ( parser m n -- parser' )
351 [ [ exactly-n ] 2keep ] dip swap - at-most-n <:&:> ;