1 ! Based on http://shootout.alioth.debian.org/gp4/benchmark.php?test=fasta&lang=java&id=2
2 USING: assocs benchmark.reverse-complement byte-arrays fry io
3 io.encodings.ascii io.files locals kernel math sequences
4 sequences.private specialized-arrays strings typed ;
5 QUALIFIED-WITH: alien.c-types c
6 SPECIALIZED-ARRAY: c:double
12 CONSTANT: initial-seed 42
13 CONSTANT: line-length 60
15 : random ( seed -- seed n )
16 IA * IC + IM mod dup IM /f ; inline
18 CONSTANT: ALU "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
40 CONSTANT: homo-sapiens
42 { CHAR: a 0.3029549426680 }
43 { CHAR: c 0.1979883004921 }
44 { CHAR: g 0.1975473066391 }
45 { CHAR: t 0.3015094502008 }
48 TYPED: make-cumulative ( freq -- chars: byte-array floats: double-array )
50 [ values >double-array unclip [ + ] accumulate swap suffix ] bi ;
52 :: select-random ( seed chars floats -- seed elt )
53 seed random floats [ <= ] with find drop chars nth-unsafe ; inline
55 TYPED: make-random-fasta ( seed: float len: fixnum chars: byte-array floats: double-array -- seed: float )
56 '[ _ _ select-random ] "" replicate-as print ;
58 : write-description ( desc id -- )
59 ">" write write bl print ;
61 :: split-lines ( n quot -- )
63 [ [ line-length quot call ] times ] dip
64 quot unless-zero ; inline
66 TYPED: write-random-fasta ( seed: float n: fixnum chars: byte-array floats: double-array desc id -- seed: float )
68 '[ _ _ make-random-fasta ] split-lines ;
70 TYPED:: make-repeat-fasta ( k: fixnum len: fixnum alu: string -- k': fixnum )
72 len iota [ k + kn mod alu nth-unsafe ] "" map-as print
75 : write-repeat-fasta ( n alu desc id -- )
80 [| len | k len alu make-repeat-fasta k! ] split-lines
84 homo-sapiens make-cumulative
87 :> ( n out IUB-chars IUB-floats homo-sapiens-chars homo-sapiens-floats )
91 n 2 * ALU "Homo sapiens alu" "ONE" write-repeat-fasta
94 n 3 * homo-sapiens-chars homo-sapiens-floats
95 "IUB ambiguity codes" "TWO" write-random-fasta
96 n 5 * IUB-chars IUB-floats
97 "Homo sapiens frequency" "THREE" write-random-fasta
102 : run-fasta ( -- ) 2500000 reverse-complement-in fasta ;