USING: io io.files io.streams.duplex kernel sequences
sequences.private strings vectors words memoize splitting
-grouping hints unicode.case continuations io.encodings.ascii ;
+grouping hints tr continuations io.encodings.ascii
+unicode.case ;
IN: benchmark.reverse-complement
-MEMO: trans-map ( -- str )
- 256 >string
- "TGCAAKYRMBDHV" "ACGTUMRYKVHDB"
- [ pick set-nth ] 2each ;
-
-: do-trans-map ( str -- )
- [ ch>upper trans-map nth ] change-each ;
-
-HINTS: do-trans-map string ;
+TR: trans-map ch>upper "ACGTUMRYKVHDB" "TGCAAKYRMBDHV" ;
: translate-seq ( seq -- str )
- concat dup reverse-here dup do-trans-map ;
+ concat dup reverse-here dup trans-map-fast ;
: show-seq ( seq -- )
translate-seq 60 <groups> [ print ] each ;
: do-line ( seq line -- seq )
- dup first ">;" memq? [
- over show-seq print dup delete-all
- ] [
- over push
- ] if ;
+ dup first ">;" memq?
+ [ over show-seq print dup delete-all ] [ over push ] if ;
HINTS: do-line vector string ;
--- /dev/null
+Slava Pestov
--- /dev/null
+IN: soundex.tests
+USING: soundex tools.test ;
+
+[ "S162" ] [ "supercalifrag" soundex ] unit-test
--- /dev/null
+! Copyright (C) 2008 Slava Pestov.
+! See http://factorcode.org/license.txt for BSD license.
+USING: sequences sequences.lib grouping assocs kernel ascii
+unicode.case tr ;
+IN: soundex
+
+TR: soundex-tr
+ ch>upper
+ "AEHIOUWYBFPVCGJKQSXZDTLMNR"
+ "00000000111122222222334556" ;
+
+: remove-duplicates ( seq -- seq' )
+ #! Remove _consecutive_ duplicates (unlike prune which removes
+ #! all duplicates).
+ [ 2 <clumps> [ = not ] assoc-filter values ] [ first ] bi prefix ;
+
+: first>upper ( seq -- seq' ) 1 head >upper ;
+: trim-first ( seq -- seq' ) dup first [ = ] curry left-trim ;
+: remove-zeroes ( seq -- seq' ) CHAR: 0 swap remove ;
+: remove-non-alpha ( seq -- seq' ) [ alpha? ] filter ;
+: pad-4 ( first seq -- seq' ) "000" 3append 4 head ;
+
+: soundex ( string -- soundex )
+ remove-non-alpha [ f ] [
+ [ first>upper ]
+ [
+ soundex-tr
+ trim-first
+ remove-duplicates
+ remove-zeroes
+ ] bi
+ pad-4
+ ] if-empty ;
--- /dev/null
+Soundex is a phonetic algorithm for indexing names by sound
--- /dev/null
+Slava Pestov
--- /dev/null
+Fast character-to-character translation of ASCII strings
--- /dev/null
+IN: tr.tests
+USING: tr tools.test unicode.case ;
+
+TR: tr-test ch>upper "ABC" "XYZ" ;
+
+[ "XXYY" ] [ "aabb" tr-test ] unit-test
+[ "XXYY" ] [ "AABB" tr-test ] unit-test
--- /dev/null
+! Copyright (C) 2008 Slava Pestov.
+! See http://factorcode.org/license.txt for BSD license.
+USING: byte-arrays strings sequences sequences.private
+fry kernel words parser lexer assocs ;
+IN: tr
+
+<PRIVATE
+
+: compute-tr ( quot from to -- mapping )
+ zip [ 256 ] 2dip '[ [ @ , at ] keep or ] B{ } map-as ; inline
+
+: tr-hints ( word -- )
+ { { byte-array } { string } } "specializer" set-word-prop ;
+
+: create-tr ( token -- word )
+ create-in dup tr-hints ;
+
+: define-tr ( word mapping -- )
+ '[ [ , nth ] map ]
+ (( seq -- translated ))
+ define-declared ;
+
+: define-fast-tr ( word mapping -- )
+ '[ [ , nth-unsafe ] change-each ]
+ (( seq -- ))
+ define-declared ;
+
+PRIVATE>
+
+: TR:
+ scan parse-definition
+ unclip-last [ unclip-last ] dip compute-tr
+ [ [ create-tr ] dip define-tr ]
+ [ [ "-fast" append create-tr ] dip define-fast-tr ] 2bi ;
+ parsing