From: Cat Stevens Date: Sun, 15 Mar 2020 14:39:47 +0000 (-0400) Subject: fix various typos; cleanup and fully document boyer-moore X-Git-Tag: 0.99~3370 X-Git-Url: https://gitweb.factorcode.org/gitweb.cgi?p=factor.git;a=commitdiff_plain;h=426d8f09b2d62d29a5b89a53492890ce2a49158a fix various typos; cleanup and fully document boyer-moore --- diff --git a/basis/peg/peg.factor b/basis/peg/peg.factor index dc47f6a84f..7470592d88 100644 --- a/basis/peg/peg.factor +++ b/basis/peg/peg.factor @@ -514,7 +514,7 @@ PRIVATE> : 4seq ( parser1 parser2 parser3 parser4 -- parser ) 4array seq ; -: seq* ( quot -- paser ) +: seq* ( quot -- parser ) { } make seq ; inline : choice ( seq -- parser ) @@ -529,7 +529,7 @@ PRIVATE> : 4choice ( parser1 parser2 parser3 parser4 -- parser ) 4array choice ; -: choice* ( quot -- paser ) +: choice* ( quot -- parser ) { } make choice ; inline : repeat0 ( parser -- parser ) diff --git a/extra/boyer-moore/boyer-moore-docs.factor b/extra/boyer-moore/boyer-moore-docs.factor index e052d6744b..970bfdf9a8 100644 --- a/extra/boyer-moore/boyer-moore-docs.factor +++ b/extra/boyer-moore/boyer-moore-docs.factor @@ -5,13 +5,22 @@ IN: boyer-moore HELP: { $values - { "pat" sequence } { "bm" boyer-moore } + { "pattern" sequence } { "boyer-moore" boyer-moore } } { $description "Given a pattern performs pattern preprocessing and returns " "results as an (opaque) object that is reusable across " - "searches in different sequences via " { $link search-from } - " generic word." + "searches in different sequences via " { $link search-from } "." +} { $examples + { $example + "USING: boyer-moore prettyprint ;" + "\"abc\" ." + "T{ boyer-moore + { pattern \"abc\" } + { bad-char-table H{ { 97 0 } { 98 -1 } { 99 -2 } } } + { good-suffix-table { 3 3 1 } } +}" + } } ; HELP: search-from @@ -21,12 +30,18 @@ HELP: search-from { "obj" object } { "i/f" "the index of first match or " { $link f } } } -{ $description "Performs an attempt to find the first " +{ $contract "Performs an attempt to find the first " "occurrence of pattern in " { $snippet "seq" } " starting from " { $snippet "from" } " using " "Boyer-Moore search algorithm. Output is the index " - "if the attempt was succeessful and " { $link f } + "if the attempt was succeessful, or " { $link f } " otherwise." +} { $examples + { $example + "USING: boyer-moore prettyprint ;" + "{ 1 2 7 10 20 2 7 10 } 3 { 2 7 10 } search-from ." + "5" + } } ; HELP: search @@ -37,21 +52,27 @@ HELP: search } { $description "A simpler variant of " { $link search-from } " that starts searching from the beginning of the sequence." +} { $examples + { $example + "USING: boyer-moore prettyprint ;" + "\"Source string\" \"ce st\" search ." + "4" + } } ; ARTICLE: "boyer-moore" "The Boyer-Moore algorithm" { $heading "Summary" } "The " { $vocab-link "boyer-moore" } " vocabulary " -"implements a Boyer-Moore string search algorithm with " -"so-called 'strong good suffix shift rule'. Since algorithm is " -"alphabet-independent it is applicable to searching in any " -"collection that implements " { $links "sequence-protocol" } "." +"implements a Boyer-Moore string search algorithm with the " +"so-called 'strong good suffix shift rule'. Since the algorithm is " +"alphabet-independent, it is applicable to searching in any " +"collection that implements the " { $links "sequence-protocol" } "." { $heading "Complexity" } -"Let " { $snippet "n" } " and " { $snippet "m" } " be lengths " +"Let " { $snippet "n" } " and " { $snippet "m" } " be the lengths " "of the sequences being searched " { $emphasis "in" } " and " { $emphasis "for" } " respectively. Then searching runs in " -{ $snippet "O(n)" } " time in its worst case using additional " +{ $snippet "O(n)" } " time worst-case, using additional " { $snippet "O(m)" } " space. The preprocessing phase runs in " { $snippet "O(m)" } " time." ; diff --git a/extra/boyer-moore/boyer-moore.factor b/extra/boyer-moore/boyer-moore.factor index 1775b94c17..57f5d5b446 100644 --- a/extra/boyer-moore/boyer-moore.factor +++ b/extra/boyer-moore/boyer-moore.factor @@ -21,26 +21,26 @@ IN: boyer-moore [ length dup ] [ ] bi [ (partial-suffixes) ] map-index 2nip ; inline -: ( seq -- table ) +: ( seq -- table ) z-values [ partial-suffixes ] [ normal-suffixes ] bi [ [ nip ] when* ] 2map reverse! ; inline -: insert-bc-shift ( table elt len i -- table ) +: insert-bad-char-shift ( table elt len i -- table ) 1 + swap - swap pick 2dup key? [ 3drop ] [ set-at ] if ; inline -: ( seq -- table ) +: ( seq -- table ) H{ } clone swap [ length ] keep - [ insert-bc-shift ] with each-index ; inline + [ insert-bad-char-shift ] with each-index ; inline -TUPLE: boyer-moore pattern bc-table gs-table ; +TUPLE: boyer-moore pattern bad-char-table good-suffix-table ; -: gs-shift ( i c bm -- s ) nip gs-table>> nth-unsafe ; inline +: good-suffix-shift ( i c boyer-moore -- s ) nip good-suffix-table>> nth-unsafe ; inline -: bc-shift ( i c bm -- s ) bc-table>> at dup 1 ? + ; inline +: bad-char-shift ( i c boyer-moore -- s ) bad-char-table>> at dup 1 ? + ; inline -: do-shift ( pos i c bm -- newpos ) - [ gs-shift ] [ bc-shift ] bi-curry 2bi max + ; inline +: do-shift ( pos i c boyer-moore -- newpos ) + [ good-suffix-shift ] [ bad-char-shift ] bi-curry 2bi max + ; inline : match? ( i1 s1 i2 s2 -- ? ) [ nth-unsafe ] 2bi@ = ; inline @@ -48,23 +48,23 @@ TUPLE: boyer-moore pattern bc-table gs-table ; len 1 - [ [ pos + s1 ] keep s2 match? not ] find-last-integer ; inline -:: (search-from) ( seq from bm -- i/f ) - bm pattern>> :> pat - pat length :> plen - seq length plen - :> lim +:: (search-from) ( seq from boyer-moore -- i/f ) + boyer-moore pattern>> :> pat + pat length :> plen + seq length plen - :> lim from [ dup lim <= [ seq pat pick plen mismatch? - [ 2dup + seq nth-unsafe bm do-shift t ] [ f ] if* + [ 2dup + seq nth-unsafe boyer-moore do-shift t ] [ f ] if* ] [ drop f f ] if ] loop ; inline PRIVATE> -: ( pat -- bm ) - dup [ ] [ ] bi +: ( pattern -- boyer-moore ) + dup [ ] [ ] bi boyer-moore boa ; GENERIC: search-from ( seq from obj -- i/f )