USING: arrays ascii assocs combinators combinators.smart fry
http.client io.encodings.ascii io.files io.files.temp kernel
-locals math math.statistics memoize sequences sorting splitting
-strings urls ;
+literals locals math math.ranges math.statistics memoize
+sequences sets sorting splitting strings urls ;
IN: spelling
! http://norvig.com/spell-correct.html
-CONSTANT: ALPHABET "abcdefghijklmnopqrstuvwxyz"
+CONSTANT: ALPHABET $[
+ "abcdefghijklmnopqrstuvwxyz" [ 1string ] { } map-as
+]
-: splits ( word -- sequence )
- dup length iota [ cut 2array ] with map ;
+: splits ( word -- splits )
+ dup length [0,b] [ cut 2array ] with map ;
-: deletes ( sequence -- sequence' )
+: deletes ( splits -- edits )
[ second length 0 > ] filter [ first2 rest append ] map ;
-: transposes ( sequence -- sequence' )
- [ second length 1 > ] filter [
- [
- {
- [ first ]
- [ second second 1string ]
- [ second first 1string ]
- [ second 2 tail ]
- } cleave
- ] "" append-outputs-as
- ] map ;
-
-: replaces ( sequence -- sequence' )
- [ second length 0 > ] filter [
- [ ALPHABET ] dip first2
- '[ 1string _ _ rest surround ] { } map-as
- ] map concat ;
-
-: inserts ( sequence -- sequence' )
- [
- ALPHABET
- [ [ first2 ] dip 1string glue ] with { } map-as
- ] map concat ;
+: transposes ( splits -- edits )
+ [ second length 1 > ] filter
+ [ first2 2 cut swap reverse! glue ] map ;
+
+: replaces ( splits -- edits )
+ [ second length 0 > ] filter ALPHABET
+ [ [ first2 rest ] [ glue ] bi* ] cartesian-map concat ;
+
+: inserts ( splits -- edits )
+ ALPHABET [ [ first2 ] [ glue ] bi* ] cartesian-map concat ;
: edits1 ( word -- edits )
[
: edits2 ( word -- edits )
edits1 [ edits1 ] map concat ;
-: filter-known ( words dictionary -- words' )
+: filter-known ( edits dictionary -- words )
'[ _ key? ] filter ;
:: corrections ( word dictionary -- words )
word 1array dictionary filter-known
[ word edits1 dictionary filter-known ] when-empty
[ word edits2 dictionary filter-known ] when-empty
- [ dictionary at 1 or ] sort-with ;
+ [ dictionary at ] sort-with reverse! ;
: words ( string -- words )
>lower [ letter? not ] split-when harvest ;