1 USING: arrays ascii assocs combinators combinators.smart fry
2 http.client io.encodings.ascii io.files io.files.temp kernel
3 locals math math.statistics memoize sequences sorting splitting
7 ! http://norvig.com/spell-correct.html
9 CONSTANT: ALPHABET "abcdefghijklmnopqrstuvwxyz"
11 : splits ( word -- sequence )
12 dup length iota [ cut 2array ] with map ;
14 : deletes ( sequence -- sequence' )
15 [ second length 0 > ] filter [ first2 rest append ] map ;
17 : transposes ( sequence -- sequence' )
18 [ second length 1 > ] filter [
22 [ second second 1string ]
23 [ second first 1string ]
26 ] "" append-outputs-as
29 : replaces ( sequence -- sequence' )
30 [ second length 0 > ] filter [
31 [ ALPHABET ] dip first2
32 '[ 1string _ _ rest surround ] { } map-as
35 : inserts ( sequence -- sequence' )
38 [ [ first2 ] dip 1string glue ] with { } map-as
41 : edits1 ( word -- edits )
51 : edits2 ( word -- edits )
52 edits1 [ edits1 ] map concat ;
54 : filter-known ( words dictionary -- words' )
57 :: corrections ( word dictionary -- words )
58 word 1array dictionary filter-known
59 [ word edits1 dictionary filter-known ] when-empty
60 [ word edits2 dictionary filter-known ] when-empty
61 [ dictionary at 1 or ] sort-with ;
63 : words ( string -- words )
64 >lower [ letter? not ] split-when harvest ;
66 : load-dictionary ( file -- assoc )
67 ascii file-contents words histogram ;
69 MEMO: default-dictionary ( -- counts )
70 "big.txt" temp-file dup exists?
71 [ URL" http://norvig.com/big.txt" over download-to ] unless
74 : (correct) ( word dictionary -- word/f )
75 corrections [ f ] [ first ] if-empty ;
77 : correct ( word -- word/f )
78 default-dictionary (correct) ;