Move csv to basis

author Slava Pestov <slava@slava-pestovs-macbook-pro.local>

Thu, 4 Sep 2008 01:19:52 +0000 (20:19 -0500)

committer Slava Pestov <slava@slava-pestovs-macbook-pro.local>

Thu, 4 Sep 2008 01:19:52 +0000 (20:19 -0500)
author Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Thu, 4 Sep 2008 01:19:52 +0000 (20:19 -0500)
committer Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Thu, 4 Sep 2008 01:19:52 +0000 (20:19 -0500)
diff --git a/basis/csv/authors.txt b/basis/csv/authors.txt

new file mode 100644 (file)

index 0000000..0be42b2
--- /dev/null
+++ b/basis/csv/authors.txt
@@ -0,0 +1 @@
+Phil Dawes
diff --git a/basis/csv/csv-docs.factor b/basis/csv/csv-docs.factor

new file mode 100644 (file)

index 0000000..e4741f4
--- /dev/null
+++ b/basis/csv/csv-docs.factor
@@ -0,0 +1,28 @@
+USING: help.syntax help.markup kernel prettyprint sequences ;
+IN: csv
+
+HELP: csv
+{ $values { "stream" "an input stream" }
+          { "rows" "an array of arrays of fields" } } 
+{ $description "parses a csv stream into an array of row arrays"
+} ;
+
+HELP: csv-row
+{ $values { "stream" "an input stream" }
+          { "row" "an array of fields" } } 
+{ $description "parses a row from a csv stream"
+} ;
+
+HELP: write-csv
+{ $values { "rows" "an sequence of sequences of strings" }
+          { "stream" "an output stream" } } 
+{ $description "writes csv to the output stream, escaping where necessary"
+} ;
+
+
+HELP: with-delimiter
+{ $values { "char" "field delimiter (e.g. CHAR: \t)" }
+          { "quot" "a quotation" } }
+{ $description "Sets the field delimiter for csv or csv-row words "
+} ;
+
diff --git a/basis/csv/csv-tests.factor b/basis/csv/csv-tests.factor

new file mode 100644 (file)

index 0000000..8261ae1
--- /dev/null
+++ b/basis/csv/csv-tests.factor
@@ -0,0 +1,78 @@
+IN: csv.tests
+USING: io.streams.string csv tools.test shuffle kernel strings ;
+
+! I like to name my unit tests
+: named-unit-test ( name output input -- ) 
+  nipd unit-test ; inline
+
+! tests nicked from the wikipedia csv article
+! http://en.wikipedia.org/wiki/Comma-separated_values
+
+"Fields are separated by commas"
+[ { { "1997" "Ford" "E350" } } ] 
+[ "1997,Ford,E350" <string-reader> csv ] named-unit-test
+
+"ignores whitespace before and after elements. n.b.specifically prohibited by RFC 4180, which states, 'Spaces are considered part of a field and should not be ignored.'"
+[ { { "1997" "Ford" "E350" } } ]
+[ "1997,   Ford   , E350" <string-reader> csv ] named-unit-test
+
+"keeps spaces in quotes"
+[ { { "1997" "Ford" "E350" "Super, luxurious truck" } } ]
+[ "1997,Ford,E350,\"Super, luxurious truck\"" <string-reader> csv ] named-unit-test
+
+"double quotes mean escaped in quotes"
+[ { { "1997" "Ford" "E350" "Super \"luxurious\" truck" } } ]
+[ "1997,Ford,E350,\"Super \"\"luxurious\"\" truck\"" 
+  <string-reader> csv ] named-unit-test
+
+"Fields with embedded line breaks must be delimited by double-quote characters."
+[ { { "1997" "Ford" "E350" "Go get one now\nthey are going fast" } } ]
+[ "1997,Ford,E350,\"Go get one now\nthey are going fast\""
+  <string-reader> csv ] named-unit-test
+
+"Fields with leading or trailing spaces must be delimited by double-quote characters. (See comment about leading and trailing spaces above)"
+[ { { "1997" "Ford" "E350" "  Super luxurious truck    " } } ]
+[ "1997,Ford,E350,\"  Super luxurious truck    \""
+  <string-reader> csv ] named-unit-test
+
+"Fields may always be delimited by double-quote characters, whether necessary or not."
+[ { { "1997" "Ford" "E350" } } ]
+[ "\"1997\",\"Ford\",\"E350\"" <string-reader> csv ] named-unit-test
+
+"The first record in a csv file may contain column names in each of the fields."
+[ { { "Year" "Make" "Model" } 
+    { "1997" "Ford" "E350" }
+    { "2000" "Mercury" "Cougar" } } ]
+[ "Year,Make,Model\n1997,Ford,E350\n2000,Mercury,Cougar" 
+   <string-reader> csv ] named-unit-test
+
+
+! !!!!!!!!  other tests
+   
+[ { { "Phil Dawes" } } ] 
+[ "\"Phil Dawes\"" <string-reader> csv ] unit-test
+
+[ { { "1" "2" "3" } { "4" "5" "6" } } ] 
+[ "1,2,3\n4,5,6\n" <string-reader> csv ] unit-test
+
+"trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this"
+[ { { "foo yeah" "bah" "baz" } } ] 
+[ "  foo yeah  , bah ,baz\n" <string-reader> csv ] named-unit-test
+
+
+"allows setting of delimiting character"
+[ { { "foo" "bah" "baz" } } ] 
+[ "foo\tbah\tbaz\n" <string-reader> CHAR: \t [ csv ] with-delimiter ] named-unit-test
+
+"Quoted field followed immediately by newline"
+[ { { "foo" "bar" }
+    { "1"   "2" } } ]
+[ "foo,\"bar\"\n1,2" <string-reader> csv ] named-unit-test
+
+"can write csv too!"
+[ "foo1,bar1\nfoo2,bar2\n" ]
+[ { { "foo1" "bar1" } { "foo2" "bar2" } } <string-writer> tuck write-csv >string ] named-unit-test
+
+"escapes quotes commas and newlines when writing"
+[ "\"fo\"\"o1\",bar1\n\"fo\no2\",\"b,ar2\"\n" ]
+[ { { "fo\"o1" "bar1" } { "fo\no2" "b,ar2" } } <string-writer> tuck write-csv >string ] named-unit-test ! "
diff --git a/basis/csv/csv.factor b/basis/csv/csv.factor

new file mode 100644 (file)

index 0000000..59a3f21
--- /dev/null
+++ b/basis/csv/csv.factor
@@ -0,0 +1,88 @@
+! Copyright (C) 2007, 2008 Phil Dawes
+! See http://factorcode.org/license.txt for BSD license.
+
+! Simple CSV Parser
+! Phil Dawes phil@phildawes.net
+
+USING: kernel sequences io namespaces combinators unicode.categories ;
+IN: csv
+
+SYMBOL: delimiter
+
+CHAR: , delimiter set-global
+
+: delimiter> delimiter get ; inline
+    
+DEFER: quoted-field ( -- endchar )
+    
+! trims whitespace from either end of string
+: trim-whitespace ( str -- str )
+  [ blank? ] trim ; inline
+
+: skip-to-field-end ( -- endchar )
+  "\n" delimiter> suffix read-until nip ; inline
+  
+: not-quoted-field ( -- endchar )
+  "\"\n" delimiter> suffix read-until   ! "
+  dup
+  { { CHAR: "     [ drop drop quoted-field ] }  ! " 
+    { delimiter> [ swap trim-whitespace % ] } 
+    { CHAR: \n    [ swap trim-whitespace % ] }    
+    { f           [ swap trim-whitespace % ] }       ! eof
+  } case ;
+  
+: maybe-escaped-quote ( -- endchar )
+  read1 dup 
+  { { CHAR: "    [ , quoted-field ] }  ! " is an escaped quote
+    { delimiter> [ ] }                 ! end of quoted field 
+    { CHAR: \n   [ ] }
+    [ 2drop skip-to-field-end ]       ! end of quoted field + padding
+  } case ;
+  
+: quoted-field ( -- endchar )
+  "\"" read-until                                 ! "
+  drop % maybe-escaped-quote ;
+
+: field ( -- sep string )
+  [ not-quoted-field ] "" make  ; ! trim-whitespace
+
+: (row) ( -- sep )
+  field , 
+  dup delimiter get = [ drop (row) ] when ;
+
+: row ( -- eof? array[string] )
+  [ (row) ] { } make ;
+
+: append-if-row-not-empty ( row -- )
+  dup { "" } = [ drop ] [ , ] if ;
+
+: (csv) ( -- )
+  row append-if-row-not-empty
+  [ (csv) ] when ;
+  
+: csv-row ( stream -- row )
+  [ row nip ] with-input-stream ;
+
+: csv ( stream -- rows )
+  [ [ (csv) ] { } make ] with-input-stream ;
+
+: with-delimiter ( char quot -- )
+  delimiter swap with-variable ; inline
+
+: needs-escaping? ( cell -- ? )
+  [ [ "\n\"" member? ] [ delimiter get = ] bi or ] contains? ; inline ! "
+
+: escape-quotes ( cell -- cell' )
+  [ [ dup , CHAR: " = [ CHAR: " , ] when ] each ] "" make ; inline
+
+: enclose-in-quotes ( cell -- cell' )
+  CHAR: " [ prefix ] [ suffix ] bi ; inline ! "
+    
+: escape-if-required ( cell -- cell' )
+  dup needs-escaping? [ escape-quotes enclose-in-quotes ] when ; inline
+    
+: write-row ( row -- )
+  [ delimiter get write1 ] [ escape-if-required write ] interleave nl ; inline
+    
+: write-csv ( rows stream -- )
+  [ [ write-row ] each ] with-output-stream ;
diff --git a/basis/csv/summary.txt b/basis/csv/summary.txt

new file mode 100644 (file)

index 0000000..503cc1b
--- /dev/null
+++ b/basis/csv/summary.txt
@@ -0,0 +1 @@
+CSV parser
diff --git a/extra/csv/authors.txt b/extra/csv/authors.txt

deleted file mode 100644 (file)

index 0be42b2..0000000
--- a/extra/csv/authors.txt
+++ /dev/null
@@ -1 +0,0 @@
-Phil Dawes
diff --git a/extra/csv/csv-docs.factor b/extra/csv/csv-docs.factor

deleted file mode 100644 (file)

index e4741f4..0000000
--- a/extra/csv/csv-docs.factor
+++ /dev/null
@@ -1,28 +0,0 @@
-USING: help.syntax help.markup kernel prettyprint sequences ;
-IN: csv
-
-HELP: csv
-{ $values { "stream" "an input stream" }
-          { "rows" "an array of arrays of fields" } } 
-{ $description "parses a csv stream into an array of row arrays"
-} ;
-
-HELP: csv-row
-{ $values { "stream" "an input stream" }
-          { "row" "an array of fields" } } 
-{ $description "parses a row from a csv stream"
-} ;
-
-HELP: write-csv
-{ $values { "rows" "an sequence of sequences of strings" }
-          { "stream" "an output stream" } } 
-{ $description "writes csv to the output stream, escaping where necessary"
-} ;
-
-
-HELP: with-delimiter
-{ $values { "char" "field delimiter (e.g. CHAR: \t)" }
-          { "quot" "a quotation" } }
-{ $description "Sets the field delimiter for csv or csv-row words "
-} ;
-
diff --git a/extra/csv/csv-tests.factor b/extra/csv/csv-tests.factor

deleted file mode 100644 (file)

index 8261ae1..0000000
--- a/extra/csv/csv-tests.factor
+++ /dev/null
@@ -1,78 +0,0 @@
-IN: csv.tests
-USING: io.streams.string csv tools.test shuffle kernel strings ;
-
-! I like to name my unit tests
-: named-unit-test ( name output input -- ) 
-  nipd unit-test ; inline
-
-! tests nicked from the wikipedia csv article
-! http://en.wikipedia.org/wiki/Comma-separated_values
-
-"Fields are separated by commas"
-[ { { "1997" "Ford" "E350" } } ] 
-[ "1997,Ford,E350" <string-reader> csv ] named-unit-test
-
-"ignores whitespace before and after elements. n.b.specifically prohibited by RFC 4180, which states, 'Spaces are considered part of a field and should not be ignored.'"
-[ { { "1997" "Ford" "E350" } } ]
-[ "1997,   Ford   , E350" <string-reader> csv ] named-unit-test
-
-"keeps spaces in quotes"
-[ { { "1997" "Ford" "E350" "Super, luxurious truck" } } ]
-[ "1997,Ford,E350,\"Super, luxurious truck\"" <string-reader> csv ] named-unit-test
-
-"double quotes mean escaped in quotes"
-[ { { "1997" "Ford" "E350" "Super \"luxurious\" truck" } } ]
-[ "1997,Ford,E350,\"Super \"\"luxurious\"\" truck\"" 
-  <string-reader> csv ] named-unit-test
-
-"Fields with embedded line breaks must be delimited by double-quote characters."
-[ { { "1997" "Ford" "E350" "Go get one now\nthey are going fast" } } ]
-[ "1997,Ford,E350,\"Go get one now\nthey are going fast\""
-  <string-reader> csv ] named-unit-test
-
-"Fields with leading or trailing spaces must be delimited by double-quote characters. (See comment about leading and trailing spaces above)"
-[ { { "1997" "Ford" "E350" "  Super luxurious truck    " } } ]
-[ "1997,Ford,E350,\"  Super luxurious truck    \""
-  <string-reader> csv ] named-unit-test
-
-"Fields may always be delimited by double-quote characters, whether necessary or not."
-[ { { "1997" "Ford" "E350" } } ]
-[ "\"1997\",\"Ford\",\"E350\"" <string-reader> csv ] named-unit-test
-
-"The first record in a csv file may contain column names in each of the fields."
-[ { { "Year" "Make" "Model" } 
-    { "1997" "Ford" "E350" }
-    { "2000" "Mercury" "Cougar" } } ]
-[ "Year,Make,Model\n1997,Ford,E350\n2000,Mercury,Cougar" 
-   <string-reader> csv ] named-unit-test
-
-
-! !!!!!!!!  other tests
-   
-[ { { "Phil Dawes" } } ] 
-[ "\"Phil Dawes\"" <string-reader> csv ] unit-test
-
-[ { { "1" "2" "3" } { "4" "5" "6" } } ] 
-[ "1,2,3\n4,5,6\n" <string-reader> csv ] unit-test
-
-"trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this"
-[ { { "foo yeah" "bah" "baz" } } ] 
-[ "  foo yeah  , bah ,baz\n" <string-reader> csv ] named-unit-test
-
-
-"allows setting of delimiting character"
-[ { { "foo" "bah" "baz" } } ] 
-[ "foo\tbah\tbaz\n" <string-reader> CHAR: \t [ csv ] with-delimiter ] named-unit-test
-
-"Quoted field followed immediately by newline"
-[ { { "foo" "bar" }
-    { "1"   "2" } } ]
-[ "foo,\"bar\"\n1,2" <string-reader> csv ] named-unit-test
-
-"can write csv too!"
-[ "foo1,bar1\nfoo2,bar2\n" ]
-[ { { "foo1" "bar1" } { "foo2" "bar2" } } <string-writer> tuck write-csv >string ] named-unit-test
-
-"escapes quotes commas and newlines when writing"
-[ "\"fo\"\"o1\",bar1\n\"fo\no2\",\"b,ar2\"\n" ]
-[ { { "fo\"o1" "bar1" } { "fo\no2" "b,ar2" } } <string-writer> tuck write-csv >string ] named-unit-test ! "
diff --git a/extra/csv/csv.factor b/extra/csv/csv.factor

deleted file mode 100644 (file)

index 59a3f21..0000000
--- a/extra/csv/csv.factor
+++ /dev/null
@@ -1,88 +0,0 @@
-! Copyright (C) 2007, 2008 Phil Dawes
-! See http://factorcode.org/license.txt for BSD license.
-
-! Simple CSV Parser
-! Phil Dawes phil@phildawes.net
-
-USING: kernel sequences io namespaces combinators unicode.categories ;
-IN: csv
-
-SYMBOL: delimiter
-
-CHAR: , delimiter set-global
-
-: delimiter> delimiter get ; inline
-    
-DEFER: quoted-field ( -- endchar )
-    
-! trims whitespace from either end of string
-: trim-whitespace ( str -- str )
-  [ blank? ] trim ; inline
-
-: skip-to-field-end ( -- endchar )
-  "\n" delimiter> suffix read-until nip ; inline
-  
-: not-quoted-field ( -- endchar )
-  "\"\n" delimiter> suffix read-until   ! "
-  dup
-  { { CHAR: "     [ drop drop quoted-field ] }  ! " 
-    { delimiter> [ swap trim-whitespace % ] } 
-    { CHAR: \n    [ swap trim-whitespace % ] }    
-    { f           [ swap trim-whitespace % ] }       ! eof
-  } case ;
-  
-: maybe-escaped-quote ( -- endchar )
-  read1 dup 
-  { { CHAR: "    [ , quoted-field ] }  ! " is an escaped quote
-    { delimiter> [ ] }                 ! end of quoted field 
-    { CHAR: \n   [ ] }
-    [ 2drop skip-to-field-end ]       ! end of quoted field + padding
-  } case ;
-  
-: quoted-field ( -- endchar )
-  "\"" read-until                                 ! "
-  drop % maybe-escaped-quote ;
-
-: field ( -- sep string )
-  [ not-quoted-field ] "" make  ; ! trim-whitespace
-
-: (row) ( -- sep )
-  field , 
-  dup delimiter get = [ drop (row) ] when ;
-
-: row ( -- eof? array[string] )
-  [ (row) ] { } make ;
-
-: append-if-row-not-empty ( row -- )
-  dup { "" } = [ drop ] [ , ] if ;
-
-: (csv) ( -- )
-  row append-if-row-not-empty
-  [ (csv) ] when ;
-  
-: csv-row ( stream -- row )
-  [ row nip ] with-input-stream ;
-
-: csv ( stream -- rows )
-  [ [ (csv) ] { } make ] with-input-stream ;
-
-: with-delimiter ( char quot -- )
-  delimiter swap with-variable ; inline
-
-: needs-escaping? ( cell -- ? )
-  [ [ "\n\"" member? ] [ delimiter get = ] bi or ] contains? ; inline ! "
-
-: escape-quotes ( cell -- cell' )
-  [ [ dup , CHAR: " = [ CHAR: " , ] when ] each ] "" make ; inline
-
-: enclose-in-quotes ( cell -- cell' )
-  CHAR: " [ prefix ] [ suffix ] bi ; inline ! "
-    
-: escape-if-required ( cell -- cell' )
-  dup needs-escaping? [ escape-quotes enclose-in-quotes ] when ; inline
-    
-: write-row ( row -- )
-  [ delimiter get write1 ] [ escape-if-required write ] interleave nl ; inline
-    
-: write-csv ( rows stream -- )
-  [ [ write-row ] each ] with-output-stream ;
diff --git a/extra/csv/summary.txt b/extra/csv/summary.txt

deleted file mode 100644 (file)

index 503cc1b..0000000
--- a/extra/csv/summary.txt
+++ /dev/null
@@ -1 +0,0 @@
-CSV parser
author	Slava Pestov <slava@slava-pestovs-macbook-pro.local>
	Thu, 4 Sep 2008 01:19:52 +0000 (20:19 -0500)
committer	Slava Pestov <slava@slava-pestovs-macbook-pro.local>
	Thu, 4 Sep 2008 01:19:52 +0000 (20:19 -0500)
basis/csv/authors.txt	[new file with mode: 0644]	patch \| blob
basis/csv/csv-docs.factor	[new file with mode: 0644]	patch \| blob
basis/csv/csv-tests.factor	[new file with mode: 0644]	patch \| blob
basis/csv/csv.factor	[new file with mode: 0644]	patch \| blob
basis/csv/summary.txt	[new file with mode: 0644]	patch \| blob
extra/csv/authors.txt	[deleted file]	patch \| blob \| history
extra/csv/csv-docs.factor	[deleted file]	patch \| blob \| history
extra/csv/csv-tests.factor	[deleted file]	patch \| blob \| history
extra/csv/csv.factor	[deleted file]	patch \| blob \| history
extra/csv/summary.txt	[deleted file]	patch \| blob \| history