From 4aa430cfd7033af9e6190a63a72fe82ea57cdc53 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Wed, 18 Mar 2009 22:49:06 -0500 Subject: [PATCH] Moving more parsing code to simple-flat-file --- .../simple-flat-file/simple-flat-file.factor | 10 ++++-- basis/unicode/collation/collation.factor | 10 +++--- basis/unicode/data/data.factor | 35 ++++++------------- 3 files changed, 22 insertions(+), 33 deletions(-) diff --git a/basis/simple-flat-file/simple-flat-file.factor b/basis/simple-flat-file/simple-flat-file.factor index 403fc4d14b..63b2b87c7e 100644 --- a/basis/simple-flat-file/simple-flat-file.factor +++ b/basis/simple-flat-file/simple-flat-file.factor @@ -1,10 +1,11 @@ ! Copyright (C) 2009 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. -USING: sequences splitting kernel math.parser io.files io.encodings.ascii biassocs ; +USING: sequences splitting kernel math.parser io.files io.encodings.ascii +biassocs ascii ; IN: simple-flat-file : drop-comments ( seq -- newseq ) - [ "#" split1 drop ] map harvest ; + [ "#@" split first ] map harvest ; : split-column ( line -- columns ) " \t" split harvest 2 short head 2 f pad-tail ; @@ -24,3 +25,8 @@ IN: simple-flat-file : flat-file>biassoc ( filename -- biassoc ) ascii file-lines process-codetable-lines >biassoc ; +: split-; ( line -- array ) + ";" split [ [ blank? ] trim ] map ; + +: data ( filename -- data ) + ascii file-lines drop-comments [ split-; ] map ; diff --git a/basis/unicode/collation/collation.factor b/basis/unicode/collation/collation.factor index 2a94d501bd..0c51ea4352 100755 --- a/basis/unicode/collation/collation.factor +++ b/basis/unicode/collation/collation.factor @@ -5,7 +5,7 @@ io.encodings.ascii kernel values splitting accessors math.parser ascii io assocs strings math namespaces make sorting combinators math.order arrays unicode.normalize unicode.data locals unicode.syntax macros sequences.deep words unicode.breaks -quotations combinators.short-circuit ; +quotations combinators.short-circuit simple-flat-file ; IN: unicode.collation >primary ] [ >>secondary ] [ >>tertiary ] tri* ] map ; -: parse-line ( line -- code-poing weight ) - ";" split1 [ [ blank? ] trim ] bi@ - [ " " split [ hex> ] "" map-as ] [ parse-weight ] bi* ; +: parse-keys ( string -- chars ) + " " split [ hex> ] "" map-as ; : parse-ducet ( file -- ducet ) - ascii file-lines filter-comments - [ parse-line ] H{ } map>assoc ; + data [ [ parse-keys ] [ parse-weight ] bi* ] H{ } assoc-map-as ; "vocab:unicode/collation/allkeys.txt" parse-ducet to: ducet diff --git a/basis/unicode/data/data.factor b/basis/unicode/data/data.factor index 93df3d5a8c..e94036a85e 100644 --- a/basis/unicode/data/data.factor +++ b/basis/unicode/data/data.factor @@ -5,7 +5,7 @@ io.files hashtables quotations splitting grouping arrays io math.parser hash2 math.order byte-arrays words namespaces words compiler.units parser io.encodings.ascii values interval-maps ascii sets combinators locals math.ranges sorting make -strings.parser io.encodings.utf8 memoize ; +strings.parser io.encodings.utf8 memoize simple-flat-file ; IN: unicode.data +VALUE: name-map + : canonical-entry ( char -- seq ) canonical-map at ; inline : combine-chars ( a b -- char/f ) combine-map hash2 ; inline : compatibility-entry ( char -- seq ) compatibility-map at ; inline @@ -76,20 +77,10 @@ PRIVATE> ! Loading data from UnicodeData.txt -: split-; ( line -- array ) - ";" split [ [ blank? ] trim ] map ; - -: data ( filename -- data ) - ascii file-lines [ split-; ] map ; - : load-data ( -- data ) "vocab:unicode/data/UnicodeData.txt" data ; -: filter-comments ( lines -- lines ) - [ "#@" split first ] map harvest ; - : (process-data) ( index data -- newdata ) - filter-comments [ [ nth ] keep first swap ] with { } map>assoc [ [ hex> ] dip ] assoc-map ; @@ -182,15 +173,13 @@ C: code-point swap first set ; ! Extra properties -: properties-lines ( -- lines ) - "vocab:unicode/data/PropList.txt" - ascii file-lines ; - : parse-properties ( -- {{[a,b],prop}} ) - properties-lines filter-comments [ - split-; first2 - [ ".." split1 [ dup ] unless* [ hex> ] bi@ 2array ] dip - ] { } map>assoc ; + "vocab:unicode/data/PropList.txt" data [ + [ + ".." split1 [ dup ] unless* + [ hex> ] bi@ 2array + ] dip + ] assoc-map ; : properties>intervals ( properties -- assoc[str,interval] ) dup values prune [ f ] H{ } map>assoc @@ -233,10 +222,6 @@ name>char-hook set-global SYMBOL: interned -: parse-key-value ( filename -- assoc ) - ! assoc is code point/range => name - ascii file-lines filter-comments [ split-; ] map ; - : range, ( value key -- ) swap interned get [ = ] with find nip 2array , ; @@ -257,4 +242,4 @@ SYMBOL: interned PRIVATE> : load-key-value ( filename -- table ) - parse-key-value process-key-value ; + data process-key-value ; -- 2.34.1