1 ! Copyright (C) 2007 Chris Double.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: kernel sequences strings namespaces math assocs shuffle
4 vectors arrays combinators.lib memoize math.parser match
8 TUPLE: parse-result remaining ast ;
10 GENERIC: compile ( parser -- quot )
12 : (parse) ( state parser -- result )
22 : not-in-cache? ( result -- ? )
25 : <parse-result> ( remaining ast -- parse-result )
26 parse-result construct-boa ;
30 : get-next-id ( -- number )
31 next-id get-global 0 or dup 1+ next-id set-global ;
35 : init-parser ( parser -- parser )
36 get-next-id parser construct-boa over set-delegate ;
38 : from ( slice-or-string -- index )
39 dup slice? [ slice-from ] [ drop 0 ] if ;
41 : get-cached ( input parser -- result )
42 [ from ] dip parser-id packrat-cache get at at* [
46 : put-cached ( result input parser -- )
47 parser-id dup packrat-cache get at [
50 H{ } clone dup >r swap packrat-cache get set-at r>
56 : parse ( input parser -- result )
58 2dup get-cached dup not-in-cache? [
59 ! "cache missed: " write over parser-id number>string write " - " write nl ! pick .
61 #! Protect against left recursion blowing the callstack
62 #! by storing a failed parse in the cache.
63 [ f ] dipd [ put-cached ] 2keep
64 [ (parse) dup ] 2keep put-cached
66 ! "cache hit: " write over parser-id number>string write " - " write nl ! pick .
73 : packrat-parse ( input parser -- result )
74 H{ } clone packrat-cache [ parse ] with-variable ;
78 TUPLE: token-parser symbol ;
82 : token-pattern ( -- quot )
85 dup >r length tail-slice r> <parse-result>
91 M: token-parser compile ( parser -- quot )
92 token-parser-symbol \ ?token token-pattern match-replace ;
94 TUPLE: satisfy-parser quot ;
98 : satisfy-pattern ( -- quot )
103 unclip-slice dup ?quot call [
111 M: satisfy-parser compile ( parser -- quot )
112 satisfy-parser-quot \ ?quot satisfy-pattern match-replace ;
114 TUPLE: range-parser min max ;
116 MATCH-VARS: ?min ?max ;
118 : range-pattern ( -- quot )
125 [ 1 tail-slice ] dip <parse-result>
132 M: range-parser compile ( parser -- quot )
133 T{ range-parser _ ?min ?max } range-pattern match-replace ;
135 TUPLE: seq-parser parsers ;
137 : seq-pattern ( -- quot )
140 dup parse-result-remaining ?quot call [
141 [ parse-result-remaining swap set-parse-result-remaining ] 2keep
142 parse-result-ast dup ignore = [
145 swap [ parse-result-ast push ] keep
155 M: seq-parser compile ( parser -- quot )
157 [ V{ } clone <parse-result> ] %
158 seq-parser-parsers [ compile \ ?quot seq-pattern match-replace % ] each
161 TUPLE: choice-parser parsers ;
163 : choice-pattern ( -- quot )
172 M: choice-parser compile ( parser -- quot )
175 choice-parser-parsers [ compile \ ?quot choice-pattern match-replace % ] each
179 TUPLE: repeat0-parser p1 ;
181 : (repeat0) ( quot result -- result )
182 2dup parse-result-remaining swap call [
183 [ parse-result-remaining swap set-parse-result-remaining ] 2keep
184 parse-result-ast swap [ parse-result-ast push ] keep
190 : repeat0-pattern ( -- quot )
195 M: repeat0-parser compile ( parser -- quot )
197 [ V{ } clone <parse-result> ] %
198 repeat0-parser-p1 compile \ ?quot repeat0-pattern match-replace %
201 TUPLE: repeat1-parser p1 ;
203 : repeat1-pattern ( -- quot )
205 ?quot swap (repeat0) [
206 dup parse-result-ast empty? [
214 M: repeat1-parser compile ( parser -- quot )
216 [ V{ } clone <parse-result> ] %
217 repeat1-parser-p1 compile \ ?quot repeat1-pattern match-replace %
220 TUPLE: optional-parser p1 ;
222 : optional-pattern ( -- quot )
224 dup ?quot call swap f <parse-result> or
227 M: optional-parser compile ( parser -- quot )
228 optional-parser-p1 compile \ ?quot optional-pattern match-replace ;
230 TUPLE: ensure-parser p1 ;
232 : ensure-pattern ( -- quot )
235 ignore <parse-result>
241 M: ensure-parser compile ( parser -- quot )
242 ensure-parser-p1 compile \ ?quot ensure-pattern match-replace ;
244 TUPLE: ensure-not-parser p1 ;
246 : ensure-not-pattern ( -- quot )
251 ignore <parse-result>
255 M: ensure-not-parser compile ( parser -- quot )
256 ensure-not-parser-p1 compile \ ?quot ensure-not-pattern match-replace ;
258 TUPLE: action-parser p1 quot ;
260 MATCH-VARS: ?action ;
262 : action-pattern ( -- quot )
265 dup parse-result-ast ?action call
266 swap [ set-parse-result-ast ] keep
270 M: action-parser compile ( parser -- quot )
271 { action-parser-p1 action-parser-quot } get-slots [ compile ] dip
272 2array { ?quot ?action } action-pattern match-replace ;
274 : left-trim-slice ( string -- string )
275 #! Return a new string without any leading whitespace
276 #! from the original string.
278 dup first blank? [ 1 tail-slice left-trim-slice ] when
281 TUPLE: sp-parser p1 ;
283 M: sp-parser compile ( parser -- quot )
285 \ left-trim-slice , sp-parser-p1 compile %
288 TUPLE: delay-parser quot ;
290 M: delay-parser compile ( parser -- quot )
292 delay-parser-quot % \ compile , \ call ,
297 MEMO: token ( string -- parser )
298 token-parser construct-boa init-parser ;
300 : satisfy ( quot -- parser )
301 satisfy-parser construct-boa init-parser ;
303 MEMO: range ( min max -- parser )
304 range-parser construct-boa init-parser ;
306 : seq ( seq -- parser )
307 seq-parser construct-boa init-parser ;
309 : choice ( seq -- parser )
310 choice-parser construct-boa init-parser ;
312 MEMO: repeat0 ( parser -- parser )
313 repeat0-parser construct-boa init-parser ;
315 MEMO: repeat1 ( parser -- parser )
316 repeat1-parser construct-boa init-parser ;
318 MEMO: optional ( parser -- parser )
319 optional-parser construct-boa init-parser ;
321 MEMO: ensure ( parser -- parser )
322 ensure-parser construct-boa init-parser ;
324 MEMO: ensure-not ( parser -- parser )
325 ensure-not-parser construct-boa init-parser ;
327 : action ( parser quot -- parser )
328 action-parser construct-boa init-parser ;
330 MEMO: sp ( parser -- parser )
331 sp-parser construct-boa init-parser ;
333 MEMO: hide ( parser -- parser )
334 [ drop ignore ] action ;
336 MEMO: delay ( parser -- parser )
337 delay-parser construct-boa init-parser ;
339 MEMO: list-of ( items separator -- parser )
340 hide over 2array seq repeat0 [ concat ] action 2array seq [ unclip 1vector swap first append ] action ;
342 MEMO: 'digit' ( -- parser )
343 [ digit? ] satisfy [ digit> ] action ;
345 MEMO: 'integer' ( -- parser )
346 'digit' repeat1 [ 10 swap digits>integer ] action ;
348 MEMO: 'string' ( -- parser )
350 [ CHAR: " = ] satisfy hide ,
351 [ CHAR: " = not ] satisfy repeat0 ,
352 [ CHAR: " = ] satisfy hide ,
353 ] { } make seq [ first >string ] action ;