Moving more parsing code to simple-flat-file

author Daniel Ehrenberg <littledan@Macintosh-122.local>

Thu, 19 Mar 2009 03:49:06 +0000 (22:49 -0500)

committer Daniel Ehrenberg <littledan@Macintosh-122.local>

Thu, 19 Mar 2009 03:49:06 +0000 (22:49 -0500)
author Daniel Ehrenberg <littledan@Macintosh-122.local>
Thu, 19 Mar 2009 03:49:06 +0000 (22:49 -0500)
committer Daniel Ehrenberg <littledan@Macintosh-122.local>
Thu, 19 Mar 2009 03:49:06 +0000 (22:49 -0500)
diff --git a/basis/simple-flat-file/simple-flat-file.factor b/basis/simple-flat-file/simple-flat-file.factor

index 403fc4d14b82e0a4b8056d0d5120184e867c3dbf..63b2b87c7e009d106f08baa9f9315cd814d5cd63 100644 (file)
--- a/basis/simple-flat-file/simple-flat-file.factor
+++ b/basis/simple-flat-file/simple-flat-file.factor
@@ -1,10 +1,11 @@
  ! Copyright (C) 2009 Daniel Ehrenberg
  ! See http://factorcode.org/license.txt for BSD license.
-USING: sequences splitting kernel math.parser io.files io.encodings.ascii biassocs ;
+USING: sequences splitting kernel math.parser io.files io.encodings.ascii
+biassocs ascii ;
  IN: simple-flat-file
  
  : drop-comments ( seq -- newseq )
-    [ "#" split1 drop ] map harvest ;
+    [ "#@" split first ] map harvest ;
  
  : split-column ( line -- columns )
      " \t" split harvest 2 short head 2 f pad-tail ;
@@ -24,3 +25,8 @@ IN: simple-flat-file
  : flat-file>biassoc ( filename -- biassoc )
      ascii file-lines process-codetable-lines >biassoc ;
  
+: split-; ( line -- array )
+    ";" split [ [ blank? ] trim ] map ;
+
+: data ( filename -- data )
+    ascii file-lines drop-comments [ split-; ] map ;
diff --git a/basis/unicode/collation/collation.factor b/basis/unicode/collation/collation.factor

index 2a94d501bdce30de81e10bc4db287f554760ce80..0c51ea4352efda97386b7528886f4bc2639a288c 100755 (executable)
--- a/basis/unicode/collation/collation.factor
+++ b/basis/unicode/collation/collation.factor
@@ -5,7 +5,7 @@ io.encodings.ascii kernel values splitting accessors math.parser
  ascii io assocs strings math namespaces make sorting combinators\r
  math.order arrays unicode.normalize unicode.data locals\r
  unicode.syntax macros sequences.deep words unicode.breaks\r
-quotations combinators.short-circuit ;\r
+quotations combinators.short-circuit simple-flat-file ;\r
  IN: unicode.collation\r
  \r
  <PRIVATE\r
@@ -20,13 +20,11 @@ TUPLE: weight primary secondary tertiary ignorable? ;
          [ >>primary ] [ >>secondary ] [ >>tertiary ] tri*\r
      ] map ;\r
  \r
-: parse-line ( line -- code-poing weight )\r
-    ";" split1 [ [ blank? ] trim ] bi@\r
-    [ " " split [ hex> ] "" map-as ] [ parse-weight ] bi* ;\r
+: parse-keys ( string -- chars )\r
+    " " split [ hex> ] "" map-as ;\r
  \r
  : parse-ducet ( file -- ducet )\r
-    ascii file-lines filter-comments\r
-    [ parse-line ] H{ } map>assoc ;\r
+    data [ [ parse-keys ] [ parse-weight ] bi* ] H{ } assoc-map-as ;\r
  \r
  "vocab:unicode/collation/allkeys.txt" parse-ducet to: ducet\r
  \r
diff --git a/basis/unicode/data/data.factor b/basis/unicode/data/data.factor

index 93df3d5a8c2e3cf02f4d8188c540b5916c381054..e94036a85e6cf4bb6944526ee2b6b97b41e58547 100644 (file)
--- a/basis/unicode/data/data.factor
+++ b/basis/unicode/data/data.factor
@@ -5,7 +5,7 @@ io.files hashtables quotations splitting grouping arrays io
  math.parser hash2 math.order byte-arrays words namespaces words
  compiler.units parser io.encodings.ascii values interval-maps
  ascii sets combinators locals math.ranges sorting make
-strings.parser io.encodings.utf8 memoize ;
+strings.parser io.encodings.utf8 memoize simple-flat-file ;
  IN: unicode.data
  
  <PRIVATE
@@ -18,12 +18,13 @@ VALUE: combine-map
  VALUE: class-map
  VALUE: compatibility-map
  VALUE: category-map
-VALUE: name-map
  VALUE: special-casing
  VALUE: properties
  
  PRIVATE>
  
+VALUE: name-map
+
  : canonical-entry ( char -- seq ) canonical-map at ; inline
  : combine-chars ( a b -- char/f ) combine-map hash2 ; inline
  : compatibility-entry ( char -- seq ) compatibility-map at ; inline
@@ -76,20 +77,10 @@ PRIVATE>
  
  ! Loading data from UnicodeData.txt
  
-: split-; ( line -- array )
-    ";" split [ [ blank? ] trim ] map ;
-
-: data ( filename -- data )
-    ascii file-lines [ split-; ] map ;
-
  : load-data ( -- data )
      "vocab:unicode/data/UnicodeData.txt" data ;
  
-: filter-comments ( lines -- lines )
-    [ "#@" split first ] map harvest ;
-
  : (process-data) ( index data -- newdata )
-    filter-comments
      [ [ nth ] keep first swap ] with { } map>assoc
      [ [ hex> ] dip ] assoc-map ;
  
@@ -182,15 +173,13 @@ C: <code-point> code-point
      <code-point> swap first set ;
  
  ! Extra properties
-: properties-lines ( -- lines )
-    "vocab:unicode/data/PropList.txt"
-    ascii file-lines ;
-
  : parse-properties ( -- {{[a,b],prop}} )
-    properties-lines filter-comments [
-        split-; first2
-        [ ".." split1 [ dup ] unless* [ hex> ] bi@ 2array ] dip
-    ] { } map>assoc ;
+    "vocab:unicode/data/PropList.txt" data [
+        [
+            ".." split1 [ dup ] unless*
+            [ hex> ] bi@ 2array
+        ] dip
+    ] assoc-map ;
  
  : properties>intervals ( properties -- assoc[str,interval] )
      dup values prune [ f ] H{ } map>assoc
@@ -233,10 +222,6 @@ name>char-hook set-global
  
  SYMBOL: interned
  
-: parse-key-value ( filename -- assoc )
-    ! assoc is code point/range => name
-    ascii file-lines filter-comments [ split-; ] map ;
-
  : range, ( value key -- )
      swap interned get
      [ = ] with find nip 2array , ;
@@ -257,4 +242,4 @@ SYMBOL: interned
  PRIVATE>
  
  : load-key-value ( filename -- table )
-    parse-key-value process-key-value ;
+    data process-key-value ;
author	Daniel Ehrenberg <littledan@Macintosh-122.local>
	Thu, 19 Mar 2009 03:49:06 +0000 (22:49 -0500)
committer	Daniel Ehrenberg <littledan@Macintosh-122.local>
	Thu, 19 Mar 2009 03:49:06 +0000 (22:49 -0500)
basis/simple-flat-file/simple-flat-file.factor		patch \| blob \| history
basis/unicode/collation/collation.factor		patch \| blob \| history
basis/unicode/data/data.factor		patch \| blob \| history