1 ! Copyright (C) 2008, 2009 Slava Pestov.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: kernel sequences accessors namespaces math words strings
4 io vectors arrays math.parser combinators continuations
8 TUPLE: lexer text line line-text line-length column parsing-words ;
10 TUPLE: lexer-parsing-word word line line-text column ;
12 : next-line ( lexer -- )
13 dup [ line>> ] [ text>> ] bi ?nth >>line-text
14 dup line-text>> length >>line-length
19 : push-parsing-word ( word -- )
20 lexer-parsing-word new
24 [ line-text>> >>line-text ]
25 [ column>> >>column ] tri
26 ] [ parsing-words>> push ] bi ;
28 : pop-parsing-word ( -- )
29 lexer get parsing-words>> pop drop ;
31 : new-lexer ( text class -- lexer )
35 V{ } clone >>parsing-words
36 dup next-line ; inline
38 : <lexer> ( text -- lexer )
41 ERROR: unexpected want got ;
43 : forbid-tab ( c -- c )
44 [ CHAR: \t eq? [ "[space]" "[tab]" unexpected ] when ] keep ; inline
46 : skip ( i seq ? -- n )
48 [ [ swap forbid-tab CHAR: \s eq? xor ] curry find-from drop ] dip or ;
50 : change-lexer-column ( lexer quot -- )
51 [ [ column>> ] [ line-text>> ] bi ] prepose keep
54 GENERIC: skip-blank ( lexer -- )
56 M: lexer skip-blank ( lexer -- )
57 [ t skip ] change-lexer-column ;
59 GENERIC: skip-word ( lexer -- )
61 M: lexer skip-word ( lexer -- )
63 2dup nth CHAR: " eq? [ drop 1 + ] [ f skip ] if
64 ] change-lexer-column ;
66 : still-parsing? ( lexer -- ? )
67 [ line>> ] [ text>> length ] bi <= ;
69 : still-parsing-line? ( lexer -- ? )
70 [ column>> ] [ line-length>> ] bi < ;
72 : (parse-token) ( lexer -- str )
80 : parse-token ( lexer -- str/f )
83 dup still-parsing-line?
84 [ (parse-token) ] [ dup next-line parse-token ] if
87 : scan ( -- str/f ) lexer get parse-token ;
89 PREDICATE: unexpected-eof < unexpected got>> not ;
91 : unexpected-eof ( word -- * ) f unexpected ;
95 [ 2dup = [ 2drop ] [ unexpected ] if ]
99 : each-token ( ... end quot: ( ... token -- ... ) -- ... )
101 { [ 2over = ] [ 3drop ] }
102 { [ pick not ] [ drop unexpected-eof ] }
103 [ [ nip call ] [ each-token ] 2bi ]
104 } cond ; inline recursive
106 : map-tokens ( ... end quot: ( ... token -- ... elt ) -- ... seq )
107 collector [ each-token ] dip { } like ; inline
109 : parse-tokens ( end -- seq )
112 TUPLE: lexer-error line column line-text parsing-words error ;
114 M: lexer-error error-file error>> error-file ;
116 M: lexer-error error-line [ error>> error-line ] [ line>> ] bi or ;
118 : <lexer-error> ( msg -- error )
122 [ column>> >>column ] bi
124 [ line-text>> >>line-text ]
125 [ parsing-words>> clone >>parsing-words ] bi
129 : simple-lexer-dump ( error -- )
130 [ line>> number>string ": " append ]
131 [ line-text>> dup string? [ drop "" ] unless ]
132 [ column>> 0 or ] tri
133 pick length + CHAR: \s <string>
134 [ write ] [ print ] [ write "^" print ] tri* ;
136 : (parsing-word-lexer-dump) ( error parsing-word -- )
139 over line>> number>string length
142 ] [ line-text>> dup string? [ drop "" ] unless print ] bi
145 : parsing-word-lexer-dump ( error parsing-word -- )
146 2dup [ line>> ] bi@ =
147 [ drop simple-lexer-dump ]
148 [ (parsing-word-lexer-dump) ] if ;
150 : lexer-dump ( error -- )
151 dup parsing-words>> [ simple-lexer-dump ] [ last parsing-word-lexer-dump ] if-empty ;
153 : with-lexer ( lexer quot -- newquot )
154 [ lexer set ] dip [ <lexer-error> rethrow ] recover ; inline
156 SYMBOL: lexer-factory
158 [ <lexer> ] lexer-factory set-global