-USING: kernel io io.files splitting strings io.encodings.ascii
- hashtables sequences assocs math namespaces prettyprint
- math.parser combinators arrays sorting unicode.case ;
-
+! Copyright (C) 2008, 2010 Slava Pestov.
+! See http://factorcode.org/license.txt for BSD license.
+USING: ascii assocs formatting grouping io io.encodings.ascii
+io.files kernel math math.statistics sequences ;
IN: benchmark.knucleotide
-: float>string ( float places -- string )
- swap >float number>string
- "." split1 rot
- over length over <
- [ CHAR: 0 pad-right ]
- [ head ] if "." glue ;
+CONSTANT: knucleotide-in "vocab:benchmark/knucleotide/knucleotide-input.txt"
: discard-lines ( -- )
readln
">" read-until drop
CHAR: \n swap remove >upper ;
-: tally ( x exemplar -- b )
- clone tuck
- [
- [ [ 1+ ] [ 1 ] if* ] change-at
- ] curry each ;
-
-: small-groups ( x n -- b )
- swap
- [ length swap - 1+ ] 2keep
- [ [ over + ] dip subseq ] 2curry map ;
-
: handle-table ( inputs n -- )
- small-groups
- [ length ] keep
- H{ } tally >alist
- sort-values reverse
- [
- dup first write bl
- second 100 * over / 3 float>string print
- ] each
- drop ;
+ clump
+ [ sorted-histogram reverse ] [ length ] bi
+ '[
+ [ first write bl ]
+ [ second 100 * _ /f "%.3f" printf nl ] bi
+ ] each ;
-: handle-n ( inputs x -- )
- tuck length
- small-groups H{ } tally
- at [ 0 ] unless*
- number>string 8 CHAR: \s pad-right write ;
+: handle-n ( input x -- )
+ [ nip ] [ length clump histogram ] 2bi at 0 or "%d\t" printf ;
: process-input ( input -- )
- dup 1 handle-table nl
- dup 2 handle-table nl
- { "GGT" "GGTA" "GGTATT" "GGTATTTTAATT" "GGTATTTTAATTTATAGT" }
- [ [ dupd handle-n ] keep print ] each
- drop ;
-
-: knucleotide ( -- )
- "resource:extra/benchmark/knucleotide/knucleotide-input.txt"
+ [ 1 handle-table nl ]
+ [ 2 handle-table nl ]
+ [
+ { "GGT" "GGTA" "GGTATT" "GGTATTTTAATT" "GGTATTTTAATTTATAGT" }
+ [ [ handle-n ] keep print ] with each
+ ]
+ tri ;
+
+: knucleotide-benchmark ( -- )
+ knucleotide-in
ascii [ read-input ] with-file-reader
process-input ;
-MAIN: knucleotide
+MAIN: knucleotide-benchmark