: 4seq ( parser1 parser2 parser3 parser4 -- parser )
4array seq ;
-: seq* ( quot -- paser )
+: seq* ( quot -- parser )
{ } make seq ; inline
: choice ( seq -- parser )
: 4choice ( parser1 parser2 parser3 parser4 -- parser )
4array choice ;
-: choice* ( quot -- paser )
+: choice* ( quot -- parser )
{ } make choice ; inline
: repeat0 ( parser -- parser )
HELP: <boyer-moore>
{ $values
- { "pat" sequence } { "bm" boyer-moore }
+ { "pattern" sequence } { "boyer-moore" boyer-moore }
}
{ $description
"Given a pattern performs pattern preprocessing and returns "
"results as an (opaque) object that is reusable across "
- "searches in different sequences via " { $link search-from }
- " generic word."
+ "searches in different sequences via " { $link search-from } "."
+} { $examples
+ { $example
+ "USING: boyer-moore prettyprint ;"
+ "\"abc\" <boyer-moore> ."
+ "T{ boyer-moore
+ { pattern \"abc\" }
+ { bad-char-table H{ { 97 0 } { 98 -1 } { 99 -2 } } }
+ { good-suffix-table { 3 3 1 } }
+}"
+ }
} ;
HELP: search-from
{ "obj" object }
{ "i/f" "the index of first match or " { $link f } }
}
-{ $description "Performs an attempt to find the first "
+{ $contract "Performs an attempt to find the first "
"occurrence of pattern in " { $snippet "seq" }
" starting from " { $snippet "from" } " using "
"Boyer-Moore search algorithm. Output is the index "
- "if the attempt was succeessful and " { $link f }
+ "if the attempt was succeessful, or " { $link f }
" otherwise."
+} { $examples
+ { $example
+ "USING: boyer-moore prettyprint ;"
+ "{ 1 2 7 10 20 2 7 10 } 3 { 2 7 10 } search-from ."
+ "5"
+ }
} ;
HELP: search
}
{ $description "A simpler variant of " { $link search-from }
" that starts searching from the beginning of the sequence."
+} { $examples
+ { $example
+ "USING: boyer-moore prettyprint ;"
+ "\"Source string\" \"ce st\" search ."
+ "4"
+ }
} ;
ARTICLE: "boyer-moore" "The Boyer-Moore algorithm"
{ $heading "Summary" }
"The " { $vocab-link "boyer-moore" } " vocabulary "
-"implements a Boyer-Moore string search algorithm with "
-"so-called 'strong good suffix shift rule'. Since algorithm is "
-"alphabet-independent it is applicable to searching in any "
-"collection that implements " { $links "sequence-protocol" } "."
+"implements a Boyer-Moore string search algorithm with the "
+"so-called 'strong good suffix shift rule'. Since the algorithm is "
+"alphabet-independent, it is applicable to searching in any "
+"collection that implements the " { $links "sequence-protocol" } "."
{ $heading "Complexity" }
-"Let " { $snippet "n" } " and " { $snippet "m" } " be lengths "
+"Let " { $snippet "n" } " and " { $snippet "m" } " be the lengths "
"of the sequences being searched " { $emphasis "in" } " and "
{ $emphasis "for" } " respectively. Then searching runs in "
-{ $snippet "O(n)" } " time in its worst case using additional "
+{ $snippet "O(n)" } " time worst-case, using additional "
{ $snippet "O(m)" } " space. The preprocessing phase runs in "
{ $snippet "O(m)" } " time."
;
[ length dup ] [ <reversed> ] bi
[ (partial-suffixes) ] map-index 2nip ; inline
-: <gs-table> ( seq -- table )
+: <good-suffix-table> ( seq -- table )
z-values [ partial-suffixes ] [ normal-suffixes ] bi
[ [ nip ] when* ] 2map reverse! ; inline
-: insert-bc-shift ( table elt len i -- table )
+: insert-bad-char-shift ( table elt len i -- table )
1 + swap - swap pick 2dup key?
[ 3drop ] [ set-at ] if ; inline
-: <bc-table> ( seq -- table )
+: <bad-char-table> ( seq -- table )
H{ } clone swap [ length ] keep
- [ insert-bc-shift ] with each-index ; inline
+ [ insert-bad-char-shift ] with each-index ; inline
-TUPLE: boyer-moore pattern bc-table gs-table ;
+TUPLE: boyer-moore pattern bad-char-table good-suffix-table ;
-: gs-shift ( i c bm -- s ) nip gs-table>> nth-unsafe ; inline
+: good-suffix-shift ( i c boyer-moore -- s ) nip good-suffix-table>> nth-unsafe ; inline
-: bc-shift ( i c bm -- s ) bc-table>> at dup 1 ? + ; inline
+: bad-char-shift ( i c boyer-moore -- s ) bad-char-table>> at dup 1 ? + ; inline
-: do-shift ( pos i c bm -- newpos )
- [ gs-shift ] [ bc-shift ] bi-curry 2bi max + ; inline
+: do-shift ( pos i c boyer-moore -- newpos )
+ [ good-suffix-shift ] [ bad-char-shift ] bi-curry 2bi max + ; inline
: match? ( i1 s1 i2 s2 -- ? ) [ nth-unsafe ] 2bi@ = ; inline
len 1 - [ [ pos + s1 ] keep s2 match? not ]
find-last-integer ; inline
-:: (search-from) ( seq from bm -- i/f )
- bm pattern>> :> pat
- pat length :> plen
- seq length plen - :> lim
+:: (search-from) ( seq from boyer-moore -- i/f )
+ boyer-moore pattern>> :> pat
+ pat length :> plen
+ seq length plen - :> lim
from
[
dup lim <=
[
seq pat pick plen mismatch?
- [ 2dup + seq nth-unsafe bm do-shift t ] [ f ] if*
+ [ 2dup + seq nth-unsafe boyer-moore do-shift t ] [ f ] if*
] [ drop f f ] if
] loop ; inline
PRIVATE>
-: <boyer-moore> ( pat -- bm )
- dup <reversed> [ <bc-table> ] [ <gs-table> ] bi
+: <boyer-moore> ( pattern -- boyer-moore )
+ dup <reversed> [ <bad-char-table> ] [ <good-suffix-table> ] bi
boyer-moore boa ;
GENERIC: search-from ( seq from obj -- i/f )