--- /dev/null
+Phil Dawes
--- /dev/null
+USING: help.syntax help.markup kernel prettyprint sequences ;
+IN: csv
+
+HELP: csv
+{ $values { "stream" "an input stream" }
+ { "rows" "an array of arrays of fields" } }
+{ $description "parses a csv stream into an array of row arrays"
+} ;
+
+HELP: csv-row
+{ $values { "stream" "an input stream" }
+ { "row" "an array of fields" } }
+{ $description "parses a row from a csv stream"
+} ;
+
+HELP: write-csv
+{ $values { "rows" "an sequence of sequences of strings" }
+ { "stream" "an output stream" } }
+{ $description "writes csv to the output stream, escaping where necessary"
+} ;
+
+
+HELP: with-delimiter
+{ $values { "char" "field delimiter (e.g. CHAR: \t)" }
+ { "quot" "a quotation" } }
+{ $description "Sets the field delimiter for csv or csv-row words "
+} ;
+
--- /dev/null
+IN: csv.tests
+USING: io.streams.string csv tools.test shuffle kernel strings ;
+
+! I like to name my unit tests
+: named-unit-test ( name output input -- )
+ nipd unit-test ; inline
+
+! tests nicked from the wikipedia csv article
+! http://en.wikipedia.org/wiki/Comma-separated_values
+
+"Fields are separated by commas"
+[ { { "1997" "Ford" "E350" } } ]
+[ "1997,Ford,E350" <string-reader> csv ] named-unit-test
+
+"ignores whitespace before and after elements. n.b.specifically prohibited by RFC 4180, which states, 'Spaces are considered part of a field and should not be ignored.'"
+[ { { "1997" "Ford" "E350" } } ]
+[ "1997, Ford , E350" <string-reader> csv ] named-unit-test
+
+"keeps spaces in quotes"
+[ { { "1997" "Ford" "E350" "Super, luxurious truck" } } ]
+[ "1997,Ford,E350,\"Super, luxurious truck\"" <string-reader> csv ] named-unit-test
+
+"double quotes mean escaped in quotes"
+[ { { "1997" "Ford" "E350" "Super \"luxurious\" truck" } } ]
+[ "1997,Ford,E350,\"Super \"\"luxurious\"\" truck\""
+ <string-reader> csv ] named-unit-test
+
+"Fields with embedded line breaks must be delimited by double-quote characters."
+[ { { "1997" "Ford" "E350" "Go get one now\nthey are going fast" } } ]
+[ "1997,Ford,E350,\"Go get one now\nthey are going fast\""
+ <string-reader> csv ] named-unit-test
+
+"Fields with leading or trailing spaces must be delimited by double-quote characters. (See comment about leading and trailing spaces above)"
+[ { { "1997" "Ford" "E350" " Super luxurious truck " } } ]
+[ "1997,Ford,E350,\" Super luxurious truck \""
+ <string-reader> csv ] named-unit-test
+
+"Fields may always be delimited by double-quote characters, whether necessary or not."
+[ { { "1997" "Ford" "E350" } } ]
+[ "\"1997\",\"Ford\",\"E350\"" <string-reader> csv ] named-unit-test
+
+"The first record in a csv file may contain column names in each of the fields."
+[ { { "Year" "Make" "Model" }
+ { "1997" "Ford" "E350" }
+ { "2000" "Mercury" "Cougar" } } ]
+[ "Year,Make,Model\n1997,Ford,E350\n2000,Mercury,Cougar"
+ <string-reader> csv ] named-unit-test
+
+
+! !!!!!!!! other tests
+
+[ { { "Phil Dawes" } } ]
+[ "\"Phil Dawes\"" <string-reader> csv ] unit-test
+
+[ { { "1" "2" "3" } { "4" "5" "6" } } ]
+[ "1,2,3\n4,5,6\n" <string-reader> csv ] unit-test
+
+"trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this"
+[ { { "foo yeah" "bah" "baz" } } ]
+[ " foo yeah , bah ,baz\n" <string-reader> csv ] named-unit-test
+
+
+"allows setting of delimiting character"
+[ { { "foo" "bah" "baz" } } ]
+[ "foo\tbah\tbaz\n" <string-reader> CHAR: \t [ csv ] with-delimiter ] named-unit-test
+
+"Quoted field followed immediately by newline"
+[ { { "foo" "bar" }
+ { "1" "2" } } ]
+[ "foo,\"bar\"\n1,2" <string-reader> csv ] named-unit-test
+
+"can write csv too!"
+[ "foo1,bar1\nfoo2,bar2\n" ]
+[ { { "foo1" "bar1" } { "foo2" "bar2" } } <string-writer> tuck write-csv >string ] named-unit-test
+
+"escapes quotes commas and newlines when writing"
+[ "\"fo\"\"o1\",bar1\n\"fo\no2\",\"b,ar2\"\n" ]
+[ { { "fo\"o1" "bar1" } { "fo\no2" "b,ar2" } } <string-writer> tuck write-csv >string ] named-unit-test ! "
--- /dev/null
+! Copyright (C) 2007, 2008 Phil Dawes
+! See http://factorcode.org/license.txt for BSD license.
+
+! Simple CSV Parser
+! Phil Dawes phil@phildawes.net
+
+USING: kernel sequences io namespaces combinators unicode.categories ;
+IN: csv
+
+SYMBOL: delimiter
+
+CHAR: , delimiter set-global
+
+: delimiter> delimiter get ; inline
+
+DEFER: quoted-field ( -- endchar )
+
+! trims whitespace from either end of string
+: trim-whitespace ( str -- str )
+ [ blank? ] trim ; inline
+
+: skip-to-field-end ( -- endchar )
+ "\n" delimiter> suffix read-until nip ; inline
+
+: not-quoted-field ( -- endchar )
+ "\"\n" delimiter> suffix read-until ! "
+ dup
+ { { CHAR: " [ drop drop quoted-field ] } ! "
+ { delimiter> [ swap trim-whitespace % ] }
+ { CHAR: \n [ swap trim-whitespace % ] }
+ { f [ swap trim-whitespace % ] } ! eof
+ } case ;
+
+: maybe-escaped-quote ( -- endchar )
+ read1 dup
+ { { CHAR: " [ , quoted-field ] } ! " is an escaped quote
+ { delimiter> [ ] } ! end of quoted field
+ { CHAR: \n [ ] }
+ [ 2drop skip-to-field-end ] ! end of quoted field + padding
+ } case ;
+
+: quoted-field ( -- endchar )
+ "\"" read-until ! "
+ drop % maybe-escaped-quote ;
+
+: field ( -- sep string )
+ [ not-quoted-field ] "" make ; ! trim-whitespace
+
+: (row) ( -- sep )
+ field ,
+ dup delimiter get = [ drop (row) ] when ;
+
+: row ( -- eof? array[string] )
+ [ (row) ] { } make ;
+
+: append-if-row-not-empty ( row -- )
+ dup { "" } = [ drop ] [ , ] if ;
+
+: (csv) ( -- )
+ row append-if-row-not-empty
+ [ (csv) ] when ;
+
+: csv-row ( stream -- row )
+ [ row nip ] with-input-stream ;
+
+: csv ( stream -- rows )
+ [ [ (csv) ] { } make ] with-input-stream ;
+
+: with-delimiter ( char quot -- )
+ delimiter swap with-variable ; inline
+
+: needs-escaping? ( cell -- ? )
+ [ [ "\n\"" member? ] [ delimiter get = ] bi or ] contains? ; inline ! "
+
+: escape-quotes ( cell -- cell' )
+ [ [ dup , CHAR: " = [ CHAR: " , ] when ] each ] "" make ; inline
+
+: enclose-in-quotes ( cell -- cell' )
+ CHAR: " [ prefix ] [ suffix ] bi ; inline ! "
+
+: escape-if-required ( cell -- cell' )
+ dup needs-escaping? [ escape-quotes enclose-in-quotes ] when ; inline
+
+: write-row ( row -- )
+ [ delimiter get write1 ] [ escape-if-required write ] interleave nl ; inline
+
+: write-csv ( rows stream -- )
+ [ [ write-row ] each ] with-output-stream ;
--- /dev/null
+CSV parser
+++ /dev/null
-Phil Dawes
+++ /dev/null
-USING: help.syntax help.markup kernel prettyprint sequences ;
-IN: csv
-
-HELP: csv
-{ $values { "stream" "an input stream" }
- { "rows" "an array of arrays of fields" } }
-{ $description "parses a csv stream into an array of row arrays"
-} ;
-
-HELP: csv-row
-{ $values { "stream" "an input stream" }
- { "row" "an array of fields" } }
-{ $description "parses a row from a csv stream"
-} ;
-
-HELP: write-csv
-{ $values { "rows" "an sequence of sequences of strings" }
- { "stream" "an output stream" } }
-{ $description "writes csv to the output stream, escaping where necessary"
-} ;
-
-
-HELP: with-delimiter
-{ $values { "char" "field delimiter (e.g. CHAR: \t)" }
- { "quot" "a quotation" } }
-{ $description "Sets the field delimiter for csv or csv-row words "
-} ;
-
+++ /dev/null
-IN: csv.tests
-USING: io.streams.string csv tools.test shuffle kernel strings ;
-
-! I like to name my unit tests
-: named-unit-test ( name output input -- )
- nipd unit-test ; inline
-
-! tests nicked from the wikipedia csv article
-! http://en.wikipedia.org/wiki/Comma-separated_values
-
-"Fields are separated by commas"
-[ { { "1997" "Ford" "E350" } } ]
-[ "1997,Ford,E350" <string-reader> csv ] named-unit-test
-
-"ignores whitespace before and after elements. n.b.specifically prohibited by RFC 4180, which states, 'Spaces are considered part of a field and should not be ignored.'"
-[ { { "1997" "Ford" "E350" } } ]
-[ "1997, Ford , E350" <string-reader> csv ] named-unit-test
-
-"keeps spaces in quotes"
-[ { { "1997" "Ford" "E350" "Super, luxurious truck" } } ]
-[ "1997,Ford,E350,\"Super, luxurious truck\"" <string-reader> csv ] named-unit-test
-
-"double quotes mean escaped in quotes"
-[ { { "1997" "Ford" "E350" "Super \"luxurious\" truck" } } ]
-[ "1997,Ford,E350,\"Super \"\"luxurious\"\" truck\""
- <string-reader> csv ] named-unit-test
-
-"Fields with embedded line breaks must be delimited by double-quote characters."
-[ { { "1997" "Ford" "E350" "Go get one now\nthey are going fast" } } ]
-[ "1997,Ford,E350,\"Go get one now\nthey are going fast\""
- <string-reader> csv ] named-unit-test
-
-"Fields with leading or trailing spaces must be delimited by double-quote characters. (See comment about leading and trailing spaces above)"
-[ { { "1997" "Ford" "E350" " Super luxurious truck " } } ]
-[ "1997,Ford,E350,\" Super luxurious truck \""
- <string-reader> csv ] named-unit-test
-
-"Fields may always be delimited by double-quote characters, whether necessary or not."
-[ { { "1997" "Ford" "E350" } } ]
-[ "\"1997\",\"Ford\",\"E350\"" <string-reader> csv ] named-unit-test
-
-"The first record in a csv file may contain column names in each of the fields."
-[ { { "Year" "Make" "Model" }
- { "1997" "Ford" "E350" }
- { "2000" "Mercury" "Cougar" } } ]
-[ "Year,Make,Model\n1997,Ford,E350\n2000,Mercury,Cougar"
- <string-reader> csv ] named-unit-test
-
-
-! !!!!!!!! other tests
-
-[ { { "Phil Dawes" } } ]
-[ "\"Phil Dawes\"" <string-reader> csv ] unit-test
-
-[ { { "1" "2" "3" } { "4" "5" "6" } } ]
-[ "1,2,3\n4,5,6\n" <string-reader> csv ] unit-test
-
-"trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this"
-[ { { "foo yeah" "bah" "baz" } } ]
-[ " foo yeah , bah ,baz\n" <string-reader> csv ] named-unit-test
-
-
-"allows setting of delimiting character"
-[ { { "foo" "bah" "baz" } } ]
-[ "foo\tbah\tbaz\n" <string-reader> CHAR: \t [ csv ] with-delimiter ] named-unit-test
-
-"Quoted field followed immediately by newline"
-[ { { "foo" "bar" }
- { "1" "2" } } ]
-[ "foo,\"bar\"\n1,2" <string-reader> csv ] named-unit-test
-
-"can write csv too!"
-[ "foo1,bar1\nfoo2,bar2\n" ]
-[ { { "foo1" "bar1" } { "foo2" "bar2" } } <string-writer> tuck write-csv >string ] named-unit-test
-
-"escapes quotes commas and newlines when writing"
-[ "\"fo\"\"o1\",bar1\n\"fo\no2\",\"b,ar2\"\n" ]
-[ { { "fo\"o1" "bar1" } { "fo\no2" "b,ar2" } } <string-writer> tuck write-csv >string ] named-unit-test ! "
+++ /dev/null
-! Copyright (C) 2007, 2008 Phil Dawes
-! See http://factorcode.org/license.txt for BSD license.
-
-! Simple CSV Parser
-! Phil Dawes phil@phildawes.net
-
-USING: kernel sequences io namespaces combinators unicode.categories ;
-IN: csv
-
-SYMBOL: delimiter
-
-CHAR: , delimiter set-global
-
-: delimiter> delimiter get ; inline
-
-DEFER: quoted-field ( -- endchar )
-
-! trims whitespace from either end of string
-: trim-whitespace ( str -- str )
- [ blank? ] trim ; inline
-
-: skip-to-field-end ( -- endchar )
- "\n" delimiter> suffix read-until nip ; inline
-
-: not-quoted-field ( -- endchar )
- "\"\n" delimiter> suffix read-until ! "
- dup
- { { CHAR: " [ drop drop quoted-field ] } ! "
- { delimiter> [ swap trim-whitespace % ] }
- { CHAR: \n [ swap trim-whitespace % ] }
- { f [ swap trim-whitespace % ] } ! eof
- } case ;
-
-: maybe-escaped-quote ( -- endchar )
- read1 dup
- { { CHAR: " [ , quoted-field ] } ! " is an escaped quote
- { delimiter> [ ] } ! end of quoted field
- { CHAR: \n [ ] }
- [ 2drop skip-to-field-end ] ! end of quoted field + padding
- } case ;
-
-: quoted-field ( -- endchar )
- "\"" read-until ! "
- drop % maybe-escaped-quote ;
-
-: field ( -- sep string )
- [ not-quoted-field ] "" make ; ! trim-whitespace
-
-: (row) ( -- sep )
- field ,
- dup delimiter get = [ drop (row) ] when ;
-
-: row ( -- eof? array[string] )
- [ (row) ] { } make ;
-
-: append-if-row-not-empty ( row -- )
- dup { "" } = [ drop ] [ , ] if ;
-
-: (csv) ( -- )
- row append-if-row-not-empty
- [ (csv) ] when ;
-
-: csv-row ( stream -- row )
- [ row nip ] with-input-stream ;
-
-: csv ( stream -- rows )
- [ [ (csv) ] { } make ] with-input-stream ;
-
-: with-delimiter ( char quot -- )
- delimiter swap with-variable ; inline
-
-: needs-escaping? ( cell -- ? )
- [ [ "\n\"" member? ] [ delimiter get = ] bi or ] contains? ; inline ! "
-
-: escape-quotes ( cell -- cell' )
- [ [ dup , CHAR: " = [ CHAR: " , ] when ] each ] "" make ; inline
-
-: enclose-in-quotes ( cell -- cell' )
- CHAR: " [ prefix ] [ suffix ] bi ; inline ! "
-
-: escape-if-required ( cell -- cell' )
- dup needs-escaping? [ escape-quotes enclose-in-quotes ] when ; inline
-
-: write-row ( row -- )
- [ delimiter get write1 ] [ escape-if-required write ] interleave nl ; inline
-
-: write-csv ( rows stream -- )
- [ [ write-row ] each ] with-output-stream ;
+++ /dev/null
-CSV parser