-USING: multiline toml tools.test ;
+USING: assocs multiline present toml tools.test ;
+
+! Example document
{
H{
{ "title" "TOML Example" }
- { "hosts" { "alpha" "omega" } }
{
"owner"
H{
"clients"
H{
{ "data" { { "gamma" "delta" } { 1 2 } } }
+ { "hosts" { "alpha" "omega" } }
}
}
{
"[deps]
temp_targets = { case = 72.0 }" toml>
] unit-test
+
+! TESTS FROM 1.0.0 SPEC
+
+! Comments
+{
+ H{ { "key" "value" } { "another" "# This is not a comment" } }
+} [
+ [=[ # This is a full-line comment
+key = "value" # This is a comment at the end of a line
+another = "# This is not a comment"]=] toml>
+] unit-test
+
+! Key/Value Pairs
+
+[ [=[ key = # INVALID]=] toml> ] must-fail
+
+[ [=[ key = 1234abcd ]=] toml> ] must-fail
+
+[ [=[ first = "Tom" last = "Preston-Werner" # INVALID]=] toml> ] must-fail
+
+! Keys
+
+{
+ H{
+ { "character encoding" "value" }
+ { "quoted \"value\"" "value" }
+ { "ʎǝʞ" "value" }
+ { "key2" "value" }
+ { "127.0.0.1" "value" }
+ }
+} [
+ [=[
+"127.0.0.1" = "value"
+"character encoding" = "value"
+"ʎǝʞ" = "value"
+'key2' = "value"
+'quoted "value"' = "value"
+ ]=] toml>
+] unit-test
+
+[ [=[ = "no key name" # INVALID]=] toml> ] must-fail
+{ H{ { "" "blank" } } } [ [=[ "" = "blank" # VALID but discouraged]=] toml> ] unit-test
+{ H{ { "" "blank" } } } [ [=[ '' = "blank" # VALID but discouraged]=] toml> ] unit-test
+
+{
+ H{
+ { "physical" H{ { "color" "orange" } { "shape" "round" } } }
+ { "name" "Orange" }
+ { "site" H{ { "google.com" t } } }
+ }
+} [
+ [=[
+name = "Orange"
+physical.color = "orange"
+physical.shape = "round"
+site."google.com" = true
+]=] toml>
+] unit-test
+
+{
+ H{
+ { "fruit" H{
+ { "name" "banana" }
+ { "color" "yellow" }
+ { "flavor" "banana" }
+ }
+ }
+ }
+} [
+ [=[
+fruit.name = "banana" # this is best practice
+fruit. color = "yellow" # same as fruit.color
+fruit . flavor = "banana" # same as fruit.flavor]=] toml>
+] unit-test
+
+[ [=[
+# DO NOT DO THIS
+name = "Tom"
+name = "Pradyun"
+]=] toml> ] [ duplicate-key? ] must-fail-with
+
+[ [=[ # THE FOLLOWING IS INVALID
+
+# This defines the value of fruit.apple to be an integer.
+fruit.apple = 1
+
+# But then this treats fruit.apple like it's a table.
+# You can't turn an integer into a table.
+fruit.apple.smooth = true]=] toml> ] must-fail
+
+{ H{ { "3" H{ { "14159" "pi" } } } } } [
+ [=[ 3.14159 = "pi" ]=] toml>
+] unit-test
+
+! Strings
+
+{
+ H{
+ {
+ "str"
+ "I'm a string. \"You can quote me\". Name\tJosé\nLocation\tSF."
+ }
+ }
+} [
+ [=[ str = "I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF." ]=]
+ toml>
+] unit-test
+
+{ H{ { "str1" "Roses are red\nViolets are blue" } } } [
+ [=[ str1 = """
+Roses are red
+Violets are blue"""]=] toml>
+] unit-test
+
+{
+ H{
+ { "str1" "The quick brown fox jumps over the lazy dog." }
+ { "str2" "The quick brown fox jumps over the lazy dog." }
+ { "str3" "The quick brown fox jumps over the lazy dog." }
+ }
+} [
+ [=[
+# The following strings are byte-for-byte equivalent:
+str1 = "The quick brown fox jumps over the lazy dog."
+
+str2 = """
+The quick brown \
+
+
+ fox jumps over \
+ the lazy dog."""
+
+str3 = """\
+ The quick brown \
+ fox jumps over \
+ the lazy dog.\
+ """
+ ]=] toml>
+] unit-test
+
+{
+ H{
+ { "regex" "<\\i\\c*\\s*>" }
+ { "quoted" "Tom \"Dubs\" Preston-Werner" }
+ { "winpath2" "\\\\ServerX\\admin$\\system32\\" }
+ { "winpath" "C:\\Users\\nodejs\\templates" }
+ }
+} [
+ [=[ # What you see is what you get.
+winpath = 'C:\Users\nodejs\templates'
+winpath2 = '\\ServerX\admin$\system32\'
+quoted = 'Tom "Dubs" Preston-Werner'
+regex = '<\i\c*\s*>' ]=] toml>
+] unit-test
+
+! Integer
+
+{
+ H{
+ { "int1" 99 }
+ { "int2" 42 }
+ { "int3" 0 }
+ { "int4" -17 }
+ { "int5" 1000 }
+ { "int6" 5349221 }
+ { "int7" 5349221 }
+ { "int8" 12345 }
+ { "hex1" 0xdeadbeef }
+ { "hex2" 0xdeadbeef }
+ { "hex3" 0xdeadbeef }
+ { "oct1" 0o01234567 }
+ { "oct2" 0o755 }
+ { "bin1" 0b11010110 }
+ }
+} [
+ [=[
+int1 = +99
+int2 = 42
+int3 = 0
+int4 = -17
+int5 = 1_000
+int6 = 5_349_221
+int7 = 53_49_221 # Indian number system grouping
+int8 = 1_2_3_4_5 # VALID but discouraged
+
+# hexadecimal with prefix `0x`
+hex1 = 0xDEADBEEF
+hex2 = 0xdeadbeef
+hex3 = 0xdead_beef
+
+# octal with prefix `0o`
+oct1 = 0o01234567
+oct2 = 0o755 # useful for Unix file permissions
+
+# binary with prefix `0b`
+bin1 = 0b11010110
+]=] toml>
+] unit-test
+
+[ [=[ key = +0o99 ]=] toml> ] must-fail
+
+! Floats
+
+{
+ H{
+ { "flt1" "1.0" }
+ { "flt2" "3.1415" }
+ { "flt3" "-0.01" }
+ { "flt4" "5.0e+22" }
+ { "flt5" "1000000.0" }
+ { "flt6" "-0.02" }
+ { "flt7" "6.626e-34" }
+ { "flt8" "224617.445991228" }
+ { "sf1" "1/0." }
+ { "sf3" "-1/0." }
+ { "sf2" "1/0." }
+ { "sf5" "0/0." }
+ { "sf4" "0/0." }
+ { "sf6" "0/0." }
+ }
+} [
+ [=[
+# fractional
+flt1 = +1.0
+flt2 = 3.1415
+flt3 = -0.01
+
+# exponent
+flt4 = 5e+22
+flt5 = 1e06
+flt6 = -2E-2
+
+# both
+flt7 = 6.626e-34
+
+flt8 = 224_617.445_991_228
+
+# infinity
+sf1 = inf # positive infinity
+sf2 = +inf # positive infinity
+sf3 = -inf # negative infinity
+
+# not a number
+sf4 = nan # actual sNaN/qNaN encoding is implementation-specific
+sf5 = +nan # same as `nan`
+sf6 = -nan # valid, actual encoding is implementation-specific
+]=] toml> [ present ] assoc-map
+] unit-test
+
+[ [=[ invalid_float_1 = .7]=] toml> ] must-fail
+[ [=[ invalid_float_2 = 7.]=] toml> ] must-fail
+[ [=[ invalid_float_2 = 3.e+20]=] toml> ] must-fail
+
+! Booleans
+
+{ H{ { "bool1" t } { "bool2" f } } } [
+ [=[ bool1 = true
+bool2 = false]=] toml>
+] unit-test
+
+! Offset Date-Time
+
+! XXX:
+
+! Local Date-Time
+
+! XXX:
+
+! Local Date
+
+! XXX:
+
+! Local Time
+
+! XXX:
+
+! Array
+
+{
+ H{
+ { "integers" { 1 2 3 } }
+ { "colors" { "red" "yellow" "green" } }
+ { "nested_arrays_of_ints" { { 1 2 } { 3 4 5 } } }
+ { "nested_mixed_array" { { 1 2 } { "a" "b" "c" } } }
+ { "string_array" { "all" "strings" "are the same" "type" } }
+ { "numbers" { 0.1 0.2 0.5 1 2 5 } }
+ {
+ "contributors"
+ {
+ "Foo Bar <foo@example.com>"
+ H{
+ { "name" "Baz Qux" }
+ { "email" "bazqux@example.com" }
+ { "url" "https://example.com/bazqux" }
+ }
+ }
+ }
+ }
+} [
+ [=[
+integers = [ 1, 2, 3 ]
+colors = [ "red", "yellow", "green" ]
+nested_arrays_of_ints = [ [ 1, 2 ], [3, 4, 5] ]
+nested_mixed_array = [ [ 1, 2 ], ["a", "b", "c"] ]
+string_array = [ "all", 'strings', """are the same""", '''type''' ]
+
+# Mixed-type arrays are allowed
+numbers = [ 0.1, 0.2, 0.5, 1, 2, 5 ]
+contributors = [
+ "Foo Bar <foo@example.com>",
+ { name = "Baz Qux", email = "bazqux@example.com", url = "https://example.com/bazqux" }
+]
+]=] toml>
+] unit-test
+
+{
+ H{
+ { "integers2" { 1 2 3 } }
+ { "integers3" { 1 2 } }
+ }
+} [
+ [=[ integers2 = [
+ 1, 2, 3
+]
+
+integers3 = [
+ 1,
+ 2, # this is ok
+]]=] toml>
+] unit-test
+
+! Table
+
+{
+ H{ { "j" H{ { "ʞ" H{ { "l" H{ { "key1" t } } } } } } } }
+} [
+ [=[
+[ j . "ʞ" . 'l' ]
+key1 = true
+]=] toml>
+] unit-test
+
+! Inline Table
+
+{
+ H{
+ {
+ "name"
+ H{ { "first" "Tom" } { "last" "Preston-Werner" } }
+ }
+ { "point" H{ { "x" 1 } { "y" 2 } } }
+ { "animal" H{ { "type" H{ { "name" "pug" } } } } }
+ }
+} [
+ [=[
+name = { first = "Tom", last = "Preston-Werner" }
+point = { x = 1, y = 2 }
+animal = { type.name = "pug" }]=] toml>
+] unit-test
+
+! Array of Tables
+
+{
+ H{
+ { "points" {
+ H{ { "x" 1 } { "y" 2 } { "z" 3 } }
+ H{ { "x" 7 } { "y" 8 } { "z" 9 } }
+ H{ { "x" 2 } { "y" 4 } { "z" 8 } }
+ }
+ }
+ }
+} [
+ [=[ points = [ { x = 1, y = 2, z = 3 },
+ { x = 7, y = 8, z = 9 },
+ { x = 2, y = 4, z = 8 } ] ]=] toml>
+] unit-test
+
+! unreleased
+
+! Clarify Unicode and UTF-8 references.
+! Relax comment parsing; most control characters are again permitted.
+! Allow newline after key/values in inline tables.
+! Allow trailing comma in inline tables.
+! Clarify where and how dotted keys define tables.
+! Add new \e shorthand for the escape character.
+! Add \x00 notation to basic strings.
+! Seconds in Date-Time and Time values are now optional.
+! Allow non-English scripts in unquoted (bare) keys
+! Clarify newline normalization in multi-line literal strings.
! Copyright (C) 2019 John Benediktsson
! See https://factorcode.org/license.txt for BSD license
-USING: accessors assocs hashtables kernel make math.parser peg
-peg.parsers regexp sequences splitting strings.parser ;
+USING: accessors ascii assocs hashtables kernel make math.parser
+peg peg.parsers regexp sequences splitting strings.parser ;
+
+! https://github.com/toml-lang/toml/blob/main/toml.abnf
+
+! XXX: reload and it doesn't clear some compiled parser
IN: toml
<PRIVATE
-! FIXME: key = 1234abcd # should error!
-
-TUPLE: table name array? entries ;
+TUPLE: table name array? ;
TUPLE: entry key value ;
+: ws ( -- parser )
+ [ " \t" member? ] satisfy repeat0 ;
+
+: newline ( -- parser )
+ "\n" token "\r\n" token 2choice ;
+
: boolean-parser ( -- parser )
"true" token [ drop t ] action
"false" token [ drop f ] action
"+" token "-" token 2choice ;
: hexdigit ( -- parser )
- [
- CHAR: 0 CHAR: 9 range ,
- CHAR: a CHAR: f range ,
- CHAR: A CHAR: F range ,
- ] choice* ;
+ CHAR: 0 CHAR: 9 range
+ CHAR: a CHAR: f range
+ CHAR: A CHAR: F range 3choice ;
-: hex ( -- parser )
- "0x" token hide hexdigit digits 2seq [ first hex> ] action ;
+: hex-parser ( -- parser )
+ sign optional "0x" token hexdigit digits 3seq
+ [ "" concat-as string>number ] action ;
: decdigit ( -- parser )
CHAR: 0 CHAR: 9 range ;
-: dec ( -- parser )
- decdigit digits [ dec> ] action ;
+: dec-parser ( -- parser )
+ sign optional decdigit digits 2seq
+ [ "" concat-as string>number ] action ;
: octdigit ( -- parser )
CHAR: 0 CHAR: 7 range ;
-: oct ( -- parser )
- "0o" token hide octdigit digits 2seq [ first oct> ] action ;
+: oct-parser ( -- parser )
+ sign optional "0o" token octdigit digits 3seq
+ [ "" concat-as string>number ] action ;
: bindigit ( -- parser )
CHAR: 0 CHAR: 1 range ;
-: bin ( -- parser )
- "0b" token hide bindigit digits 2seq [ first bin> ] action ;
+: bin-parser ( -- parser )
+ sign optional "0b" token bindigit digits 3seq
+ [ "" concat-as string>number ] action ;
: integer-parser ( -- parser )
- hex oct bin dec 4choice ;
+ hex-parser oct-parser bin-parser dec-parser 4choice ;
-: float ( -- parser )
- [
- sign optional ,
- decdigit digits optional ,
- "." token ,
- decdigit digits optional ,
- "e" token "E" token 2choice
- sign optional
- decdigit digits optional 3seq optional ,
- ] seq* [ unclip-last append "" concat-as string>number ] action ;
+: exponent ( -- parser )
+ "e" token "E" token 2choice sign optional
+ decdigit digits optional 3seq
+ [ "" concat-as ] action ;
+
+: normal-float ( -- parser )
+ [ sign optional , decdigit digits , exponent , ] seq*
+ [ sign optional , decdigit digits , "." token , decdigit digits , exponent optional , ] seq*
+ 2choice [ "" concat-as string>number ] action ;
: +inf ( -- parser )
- "+" token optional "inf" token 2seq [ drop 1/0. ] action ;
+ "+inf" token "inf" token 2choice [ drop 1/0. ] action ;
: -inf ( -- parser )
"-inf" token [ drop -1/0. ] action ;
: nan ( -- parser )
- sign optional "nan" token 2seq
- [ drop NAN: 8000000000000 ] action ;
+ sign optional "nan" token 2seq [ drop 0/0. ] action ;
: float-parser ( -- parser )
- float +inf -inf nan 4choice ;
+ normal-float +inf -inf nan 4choice ;
+
+: number-parser ( -- parser )
+ +inf -inf nan
+ [ blank? not ] satisfy repeat1 [ string>number ] action
+ 4choice ;
: escaped ( -- parser )
- "\\" token hide [ "btnfr\"\\" member-eq? ] satisfy 2seq
+ "\\" token hide [ "\"\\befnrt" member-eq? ] satisfy 2seq
[ first escape ] action ;
: unicode ( -- parser )
"\\U" token hide hexdigit 8 exactly-n 2seq
2choice [ first hex> ] action ;
+: hexescape ( -- parser )
+ "\\x" token hide hexdigit 2 exactly-n 2seq
+ "\\X" token hide hexdigit 2 exactly-n 2seq
+ 2choice [ first hex> ] action ;
+
: basic-string ( -- parser )
- escaped unicode [ "\"\n" member? not ] satisfy 3choice repeat0
- "\"" dup surrounded-by ;
+ escaped unicode hexescape [ "\"\n" member? not ] satisfy
+ 4choice repeat0 "\"" dup surrounded-by ;
: literal-string ( -- parser )
- [ "'\n" member? not ] satisfy repeat0
- "'" dup surrounded-by ;
+ [ "'" member? not ] satisfy repeat0 "'" dup surrounded-by ;
: single-string ( -- parser )
basic-string literal-string 2choice [ "" like ] action ;
"\"\"\"" dup surrounded-by ;
: multi-literal-string ( -- parser )
- [ CHAR: ' = not ] satisfy repeat0
- "'''" dup surrounded-by ;
+ [ CHAR: ' = not ] satisfy repeat0 "'''" dup surrounded-by ;
: multi-string ( -- parser )
multi-basic-string multi-literal-string 2choice [
timezone-parser optional ,
] seq* [ "" concat-as ] action ;
-: space ( -- parser )
- [ " \t" member? ] satisfy repeat0 ;
+DEFER: comment
-: whitespace ( -- parser )
- [ " \t\r\n" member? ] satisfy repeat0 ;
+: separator ( -- parser )
+ "," token comment optional 2seq ;
+
+DEFER: ws-comment-newline
DEFER: value-parser
+: array-value-parser ( -- parser )
+ ws-comment-newline hide
+ value-parser
+ ws-comment-newline hide 3seq [ first ] action ;
+
: array-parser ( -- parser )
[
"[" token hide ,
- whitespace hide ,
- value-parser
- whitespace "," token whitespace pack list-of ,
- whitespace hide ,
+ array-value-parser separator list-of ,
+ separator optional hide ,
+ ws-comment-newline hide ,
"]" token hide ,
] seq* [ first { } like ] action ;
DEFER: update-toml
+: inline-table-key-value ( -- parser )
+ ws-comment-newline hide
+ key-value-parser
+ ws-comment-newline hide 3seq [ first ] action ;
+
: inline-table-parser ( -- parser )
[
"{" token hide ,
- whitespace hide ,
- key-value-parser
- whitespace "," token whitespace pack list-of ,
- whitespace hide ,
+ inline-table-key-value separator list-of ,
+ separator optional hide ,
+ ws-comment-newline hide ,
"}" token hide ,
] seq* [
first [ length <hashtable> ] keep [ update-toml ] each
] choice*
] delay ;
-: name-parser ( -- parser )
+: unquoted-key ( -- parser )
[
CHAR: A CHAR: Z range ,
CHAR: a CHAR: z range ,
CHAR: 0 CHAR: 9 range ,
- "_" token [ first ] action ,
- "-" token [ first ] action ,
+ [ "_-\xb2\xb3\xb9\xbc\xbd\xbe" member? ] satisfy ,
+ 0xC0 0XD6 range ,
+ 0xD8 0xF6 range ,
+ 0xF8 0x37D range ,
+ 0x37F 0x1FFF range ,
+ 0x200C 0x200D range ,
+ 0x203F 0x2040 range ,
+ 0x2070 0x218F range ,
+ 0x2460 0x24FF range ,
+ 0x2C00 0x2FEF range ,
+ 0x3001 0xD7FF range ,
+ 0xF900 0xFDCF range ,
+ 0xFDF0 0xFFFFD range ,
+ 0x10000 0xEFFFF range ,
] choice* repeat1 [ "" like ] action single-string 2choice ;
-: comment-parser ( -- parser )
- [
- space hide ,
- "#" token ,
- [ CHAR: \n = not ] satisfy repeat0 ,
- ] seq* [ drop f ] action ;
+: quoted-key ( -- parser )
+ multi-string single-string 2choice ;
+
+: non-ascii ( -- parser )
+ 0x80 0xd7ff range 0xe000 0x10ffff range 2choice ;
+
+: comment-char ( -- parser )
+ 0x01 0x09 range 0x0e 0x7f range non-ascii 3choice ;
+
+: comment ( -- parser )
+ "#" token comment-char repeat0 2seq hide ;
+
+: ws-comment-newline ( -- parser )
+ ws comment optional 2seq newline list-of ;
+
+: simple-key ( -- parser )
+ unquoted-key quoted-key 2choice ;
: key-parser ( -- parser )
- name-parser "." token list-of [ { } like ] action ;
+ simple-key ws "." token ws 3seq list-of ;
: key-value-parser ( -- parser )
[
- space hide ,
key-parser ,
- space hide ,
+ ws hide ,
"=" token hide ,
- space hide ,
+ ws hide ,
value-parser ,
- comment-parser optional hide ,
] seq* [ first2 entry boa ] action ;
-: line-parser ( -- parser )
- "\n" token "\r\n" token 2choice ;
+ALIAS: name-parser unquoted-key
-:: table-name-parser ( begin end -- parser )
+:: table-name-parser ( begin end array? -- parser )
[
begin token hide ,
- space hide ,
- name-parser
- space "." token space pack list-of
- [ { } like ] action ,
- space hide ,
+ ws hide ,
+ key-parser ,
+ ws hide ,
end token hide ,
- comment-parser optional hide ,
- ] seq* ;
+ ] seq* [ first array? table boa ] action ;
+
+: array-table ( -- parser )
+ "[[" "]]" t table-name-parser ;
+
+: std-table ( -- parser )
+ "[" "]" f table-name-parser ;
: table-parser ( -- parser )
- [
- space hide ,
- "[[" "]]" table-name-parser [ t suffix! ] action
- "[" "]" table-name-parser [ f suffix! ] action
- 2choice ,
- whitespace hide ,
- key-value-parser line-parser list-of optional ,
- ] seq* [ first2 [ first2 ] dip table boa ] action ;
+ array-table std-table 2choice ;
: toml-parser ( -- parser )
- [
- whitespace hide ,
- [
- comment-parser ,
- table-parser ,
- key-value-parser ,
- ] choice* whitespace list-of ,
- whitespace hide ,
- ] seq* [ first sift { } like ] action ;
+ ws hide key-value-parser ws hide comment optional hide 4seq
+ ws hide table-parser ws hide comment optional hide 4seq
+ ws hide comment optional hide 2seq
+ 3choice newline list-of [ { } concat-as ] action ;
+
+
+! entry entry table entry entry table entry entry
+
+! tables are added to the root
+
+! accum is the root to start
+
+! tables add an accum
+! std-table key=H{}
+! array-table key={H{}}
+
+! then when we see a table, accum is changed
+
: check-no-key ( key assoc -- key assoc )
2dup at* nip [ over duplicate-key ] when ;
-: deep-at ( keys assoc -- value )
- swap [
+: entries-at ( table keys -- key entries )
+ unclip-last -rot [
over ?at [ nip ] [
H{ } clone [ spin check-no-key set-at ] keep
] if
] each ;
-GENERIC: update-toml ( assoc entry -- assoc )
+GENERIC: update-toml ( root table entry -- root table' )
M: entry update-toml
- [ key>> unclip-last [ over deep-at ] dip ] [ value>> ] bi
- spin check-no-key set-at ;
+ dupd [ key>> entries-at ] [ value>> ] bi
+ -rot check-no-key set-at ;
M: table update-toml
- [ name>> unclip-last [ over deep-at ] dip ]
- [ entries>> [ H{ } clone ] dip [ update-toml ] each spin ]
- [ array?>> [ push-at ] [ check-no-key set-at ] if ] tri ;
+ nip dupd [ name>> entries-at ] [ array?>> ] bi
+ H{ } clone [
+ swap [ -rot push-at ] [ -rot check-no-key set-at ] if
+ ] keep ;
PEG: parse-toml ( string -- ast ) toml-parser ;
PRIVATE>
: toml> ( string -- assoc )
- [ H{ } clone ] dip parse-toml [ update-toml ] each ;
+ [ H{ } clone dup ] dip parse-toml [ update-toml ] each drop ;