1 ! Copyright (C) 2019 John Benediktsson
2 ! See https://factorcode.org/license.txt for BSD license
4 USING: accessors ascii assocs calendar.parser hashtables
5 io.encodings.utf8 io.files kernel make math.parser peg
6 peg.parsers regexp sequences splitting strings.parser ;
8 ! https://github.com/toml-lang/toml/blob/main/toml.abnf
12 ERROR: duplicate-key key ;
14 ERROR: unknown-value value ;
18 TUPLE: table name array? ;
20 TUPLE: entry key value ;
22 : check-no-key ( key assoc -- key assoc )
23 2dup at* nip [ over duplicate-key ] when ;
25 : entries-at ( table keys -- key entries )
28 H{ } clone [ spin check-no-key set-at ] keep
32 GENERIC: update-toml ( root table entry -- root table' )
35 dupd [ key>> entries-at ] [ value>> ] bi
36 -rot check-no-key set-at ;
39 nip dupd [ name>> entries-at ] [ array?>> ] bi
41 swap [ -rot push-at ] [ -rot check-no-key set-at ] if
45 [ " \t" member? ] satisfy repeat0 ;
47 : newline ( -- parser )
48 "\n" token "\r\n" token 2choice ;
50 : boolean-parser ( -- parser )
51 "true" token [ drop t ] action
52 "false" token [ drop f ] action
55 : digits ( parser -- parser )
56 "_" token [ drop f ] action 2choice repeat1 [ sift ] action ;
59 "+" token "-" token 2choice ;
61 : hexdigit ( -- parser )
64 CHAR: A CHAR: F range 3choice ;
66 : hex-parser ( -- parser )
67 sign optional "0x" token hexdigit digits 3seq
68 [ "" concat-as string>number ] action ;
70 : decdigit ( -- parser )
71 CHAR: 0 CHAR: 9 range ;
73 : dec-parser ( -- parser )
74 sign optional decdigit digits 2seq
75 [ "" concat-as string>number ] action ;
77 : octdigit ( -- parser )
78 CHAR: 0 CHAR: 7 range ;
80 : oct-parser ( -- parser )
81 sign optional "0o" token octdigit digits 3seq
82 [ "" concat-as string>number ] action ;
84 : bindigit ( -- parser )
85 CHAR: 0 CHAR: 1 range ;
87 : bin-parser ( -- parser )
88 sign optional "0b" token bindigit digits 3seq
89 [ "" concat-as string>number ] action ;
91 : integer-parser ( -- parser )
92 hex-parser oct-parser bin-parser dec-parser 4choice ;
94 : exponent ( -- parser )
95 "e" token "E" token 2choice sign optional
96 decdigit digits optional 3seq
97 [ "" concat-as ] action ;
99 : normal-float ( -- parser )
100 [ sign optional , decdigit digits , exponent , ] seq*
101 [ sign optional , decdigit digits , "." token , decdigit digits , exponent optional , ] seq*
102 2choice [ "" concat-as string>number ] action ;
105 "+inf" token "inf" token 2choice [ drop 1/0. ] action ;
108 "-inf" token [ drop -1/0. ] action ;
111 sign optional "nan" token 2seq [ drop 0/0. ] action ;
113 : float-parser ( -- parser )
114 normal-float +inf -inf nan 4choice ;
116 : number-parser ( -- parser )
118 [ blank? not ] satisfy repeat1 [ string>number ] action
121 : escaped ( -- parser )
122 "\\" token hide [ "\"\\befnrt" member-eq? ] satisfy 2seq
123 [ first escape ] action ;
125 : unicode ( -- parser )
126 "\\u" token hide hexdigit 4 exactly-n 2seq
127 "\\U" token hide hexdigit 8 exactly-n 2seq
128 2choice [ first hex> ] action ;
130 : hexescape ( -- parser )
131 "\\x" token hide hexdigit 2 exactly-n 2seq
132 "\\X" token hide hexdigit 2 exactly-n 2seq
133 2choice [ first hex> ] action ;
135 : basic-string ( -- parser )
136 escaped unicode hexescape [ "\"\n" member? not ] satisfy
137 4choice repeat0 "\"" dup surrounded-by ;
139 : literal-string ( -- parser )
140 [ "'" member? not ] satisfy repeat0 "'" dup surrounded-by ;
142 : single-string ( -- parser )
143 basic-string literal-string 2choice [ "" like ] action ;
145 : multi-basic-string ( -- parser )
146 escaped unicode [ CHAR: \" = not ] satisfy 3choice repeat0
147 "\"\"\"" dup surrounded-by ;
149 : multi-literal-string ( -- parser )
150 [ CHAR: ' = not ] satisfy repeat0 "'''" dup surrounded-by ;
152 : multi-string ( -- parser )
153 multi-basic-string multi-literal-string 2choice [
154 "" like "\n" ?head drop
155 R/ \\[ \t\r\n]*\n[ \t\r\n]*/m "" re-replace
158 : string-parser ( -- parser )
159 multi-string single-string 2choice ;
161 : non-ascii ( -- parser )
162 0x80 0xd7ff range 0xe000 0x10ffff range 2choice ;
164 : comment-char ( -- parser )
165 0x01 0x09 range 0x0e 0x7f range non-ascii 3choice ;
167 : comment ( -- parser )
168 "#" token comment-char repeat0 2seq hide ;
170 : ws-comment-newline ( -- parser )
171 ws comment optional 2seq newline list-of ;
173 : date-parser ( -- parser )
175 decdigit 4 exactly-n ,
177 decdigit 2 exactly-n ,
179 decdigit 2 exactly-n ,
180 ] seq* [ "" concat-as ] action ;
182 : time-parser ( -- parser )
184 decdigit 2 exactly-n ,
186 decdigit 2 exactly-n ,
188 decdigit 2 exactly-n ,
189 "." token decdigit repeat1 2seq optional [ concat ] action ,
190 ] seq* [ "" concat-as ] action ;
192 : timezone-parser ( -- parser )
194 "+" token "-" token 2choice
195 decdigit 2 exactly-n ":" token
196 decdigit 2 exactly-n 4seq [ "" concat-as ] action
199 : datetime-parser ( -- parser )
202 "T" token "t" token " " token 3choice ,
204 timezone-parser optional ,
205 ] seq* [ "" concat-as rfc3339>timestamp ] action ;
207 : separator ( -- parser )
208 "," token comment optional 2seq ;
212 : array-value-parser ( -- parser )
213 ws-comment-newline hide
214 value-parser optional
215 ws-comment-newline hide 3seq [ first ] action ;
217 : array-parser ( -- parser )
220 array-value-parser separator list-of [ sift ] action ,
222 ] seq* [ first { } like ] action ;
224 DEFER: key-value-parser
226 : inline-table-key-value ( -- parser )
227 ws-comment-newline hide
229 ws-comment-newline hide 3seq [ first ] action ;
231 : inline-table-parser ( -- parser )
234 inline-table-key-value separator list-of ,
235 separator optional hide ,
236 ws-comment-newline hide ,
239 first [ length <hashtable> ] keep [ update-toml ] each
242 : value-parser ( -- parser )
253 inline-table-parser ,
257 : unquoted-key ( -- parser )
259 CHAR: A CHAR: Z range ,
260 CHAR: a CHAR: z range ,
261 CHAR: 0 CHAR: 9 range ,
262 [ "_-\xb2\xb3\xb9\xbc\xbd\xbe" member? ] satisfy ,
267 0x200C 0x200D range ,
268 0x203F 0x2040 range ,
269 0x2070 0x218F range ,
270 0x2460 0x24FF range ,
271 0x2C00 0x2FEF range ,
272 0x3001 0xD7FF range ,
273 0xF900 0xFDCF range ,
274 0xFDF0 0xFFFFD range ,
275 0x10000 0xEFFFF range ,
276 ] choice* repeat1 [ "" like ] action single-string 2choice ;
278 : quoted-key ( -- parser )
279 multi-string single-string 2choice ;
281 : simple-key ( -- parser )
282 unquoted-key quoted-key 2choice ;
284 : key-parser ( -- parser )
285 simple-key ws "." token ws 3seq list-of ;
287 : key-value-parser ( -- parser )
294 ] seq* [ first2 entry boa ] action ;
296 :: table-name-parser ( begin end array? -- parser )
303 ] seq* [ first array? table boa ] action ;
305 : array-table ( -- parser )
306 "[[" "]]" t table-name-parser ;
308 : std-table ( -- parser )
309 "[" "]" f table-name-parser ;
311 : table-parser ( -- parser )
312 array-table std-table 2choice ;
314 PEG: parse-toml ( string -- ast )
315 ws hide key-value-parser ws hide comment optional hide 4seq
316 ws hide table-parser ws hide comment optional hide 4seq
317 ws hide comment optional hide 2seq
318 3choice newline list-of [ { } concat-as ] action ;
322 : toml> ( string -- assoc )
323 [ H{ } clone dup ] dip parse-toml [ update-toml ] each drop ;
325 : path>toml ( path -- assoc )
326 utf8 file-contents toml> ;