1 ! Copyright (C) 2008, 2009 Slava Pestov, Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors arrays assocs combinators kernel lexer make
4 math math.parser namespaces sequences splitting strings ;
7 ERROR: bad-escape char ;
9 : escape ( escape -- ch )
21 } ?at [ bad-escape ] unless ;
23 SYMBOL: name>char-hook
26 [ "Unicode support not available" throw ]
29 : hex-escape ( str -- ch str' )
30 2 cut-slice [ hex> ] dip ;
32 : unicode-escape ( str -- ch str' )
34 CHAR: } over index cut-slice
35 [ >string name>char-hook get call( name -- char ) ] dip
38 6 cut-slice [ hex> ] dip
41 : next-escape ( str -- ch str' )
43 { CHAR: u [ 1 tail-slice unicode-escape ] }
44 { CHAR: x [ 1 tail-slice hex-escape ] }
45 [ drop unclip-slice escape swap ]
48 : (unescape-string) ( str -- )
49 CHAR: \\ over index dup [
50 cut-slice [ % ] dip rest-slice
57 : unescape-string ( str -- str' )
58 [ (unescape-string) ] "" make ;
60 : (parse-string) ( str -- m )
61 dup [ "\"\\" member? ] find dup [
62 [ cut-slice [ % ] dip rest-slice ] dip
66 next-escape [ , ] dip (parse-string)
69 "Unterminated string" throw
72 : parse-string ( -- str )
74 [ swap tail-slice (parse-string) ] "" make swap
75 ] change-lexer-column ;
79 : lexer-subseq ( i -- before )
83 [ column>> ] [ line-text>> ] bi
89 : rest-of-line ( lexer -- seq )
90 [ line-text>> ] [ column>> ] bi tail-slice ;
92 : current-char ( lexer -- ch/f )
93 [ column>> ] [ line-text>> ] bi ?nth ;
95 : advance-char ( lexer -- )
96 [ 1 + ] change-column drop ;
98 ERROR: escaped-char-expected ;
100 : next-char ( lexer -- ch )
101 dup still-parsing-line? [
102 [ current-char ] [ advance-char ] bi
104 escaped-char-expected
107 : lexer-head? ( string -- ? )
108 [ lexer get rest-of-line ] dip head? ;
110 : advance-lexer ( n -- )
111 [ lexer get ] dip [ + ] curry change-column drop ; inline
113 : find-next-token ( ch -- i elt )
115 [ lexer get [ column>> ] [ line-text>> ] bi ] dip
116 [ member? ] curry find-from ;
118 : next-line% ( lexer -- )
120 [ next-line "\n" % ] bi ;
122 : take-double-quotes ( -- string )
123 lexer get dup current-char CHAR: " = [
124 [ ] [ column>> ] [ line-text>> ] tri
125 [ CHAR: " = not ] find-from drop [
126 swap column>> - CHAR: " <repetition>
132 ] if dup length advance-lexer ;
134 : end-string-parse ( delimiter -- )
136 take-double-quotes 3 tail %
138 lexer get advance-char
141 DEFER: (parse-multiline-string)
143 : parse-found-token ( string i token -- )
144 [ lexer-subseq % ] dip
146 lexer get [ next-char , ] [ next-char , ] bi (parse-multiline-string)
151 lexer get next-char , (parse-multiline-string)
155 ERROR: trailing-characters string ;
157 : (parse-multiline-string) ( string -- )
158 lexer get still-parsing? [
159 dup first find-next-token [
162 drop lexer get next-line%
163 (parse-multiline-string)
171 : parse-multiline-string ( -- string )
172 lexer get rest-of-line "\"\"" head? [
173 lexer get [ 2 + ] change-column drop
177 ] if [ (parse-multiline-string) ] "" make unescape-string ;