1 ! Copyright (C) 2010 John Benediktsson
2 ! See http://factorcode.org/license.txt for BSD license
3 USING: arrays ascii assocs combinators combinators.smart fry
4 http.client io.encodings.ascii io.files io.files.temp kernel
5 locals math math.ranges math.statistics memoize sequences
6 sequences.private sorting splitting urls ;
9 ! http://norvig.com/spell-correct.html
11 CONSTANT: ALPHABET "abcdefghijklmnopqrstuvwxyz"
13 : deletes ( word -- edits )
14 [ length <iota> ] keep '[ _ remove-nth ] map ;
16 : transposes ( word -- edits )
17 [ length [1..b) ] keep
18 '[ dup 1 - _ clone [ exchange-unsafe ] keep ] map ;
20 : replace1 ( i word -- words )
21 [ ALPHABET ] 2dip bounds-check
22 '[ _ _ clone [ set-nth-unsafe ] keep ] { } map-as ;
24 : replaces ( word -- edits )
25 [ length <iota> ] keep '[ _ replace1 ] map concat ;
27 : inserts ( word -- edits )
28 [ length [0..b] ] keep
29 '[ CHAR: ? over _ insert-nth replace1 ] map concat ;
31 : edits1 ( word -- edits )
41 : edits2 ( word -- edits )
42 edits1 [ edits1 ] map concat ;
44 : filter-known ( edits dictionary -- words )
47 :: corrections ( word dictionary -- words )
48 word 1array dictionary filter-known
49 [ word edits1 dictionary filter-known ] when-empty
50 [ word edits2 dictionary filter-known ] when-empty
51 [ dictionary at ] sort-with reverse! ;
53 : words ( string -- words )
54 >lower [ letter? not ] split-when harvest ;
56 : load-dictionary ( file -- assoc )
57 ascii file-contents words histogram ;
59 MEMO: default-dictionary ( -- counts )
60 URL" http://norvig.com/big.txt" "big.txt" temp-file
61 [ ?download-to ] [ load-dictionary ] bi ;
63 : (correct) ( word dictionary -- word/f )
66 : correct ( word -- word/f )
67 default-dictionary (correct) ;