From ad87a38ab8e8385c3f5e7024ce0dfd3d156d6fa2 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Tue, 15 Jul 2008 17:16:08 -0500 Subject: [PATCH] Refactor binary search --- core/binary-search/binary-search-docs.factor | 43 +++++++++++++++++ core/binary-search/binary-search-tests.factor | 17 +++++++ core/binary-search/binary-search.factor | 46 +++++++++++++++++++ core/sequences/sequences-docs.factor | 3 +- core/sorting/sorting-docs.factor | 29 ++---------- core/sorting/sorting-tests.factor | 10 ---- core/sorting/sorting.factor | 24 +--------- extra/cords/cords.factor | 6 +-- extra/interval-maps/interval-maps.factor | 8 ++-- extra/math/primes/primes.factor | 11 +++-- extra/ui/gadgets/gadgets.factor | 17 ++++--- extra/usa-cities/usa-cities.factor | 2 +- 12 files changed, 136 insertions(+), 80 deletions(-) create mode 100644 core/binary-search/binary-search-docs.factor create mode 100644 core/binary-search/binary-search-tests.factor create mode 100644 core/binary-search/binary-search.factor diff --git a/core/binary-search/binary-search-docs.factor b/core/binary-search/binary-search-docs.factor new file mode 100644 index 0000000000..db442a9ac8 --- /dev/null +++ b/core/binary-search/binary-search-docs.factor @@ -0,0 +1,43 @@ +IN: binary-search +USING: help.markup help.syntax sequences kernel math.order ; + +ARTICLE: "binary-search" "Binary search" +"The " { $emphasis "binary search" } " algorithm allows elements to be located in sorted sequence in " { $snippet "O(log n)" } " time." +{ $subsection search } +"Variants of sequence words optimized for sorted sequences:" +{ $subsection sorted-index } +{ $subsection sorted-member? } +{ $subsection sorted-memq? } +{ $see-also "order-specifiers" "sequences-sorting" } ; + +ABOUT: "binary-search" + +HELP: search +{ $values { "seq" "a sorted sequence" } { "quot" "a quotation with stack effect " { $snippet "( elt -- <=> )" } } { "i" "an index, or " { $link f } } { "elt" "an element, or " { $link f } } } +{ $description "Performs a binary search on a sequence, calling the quotation to decide whether to end the search (" { $link +eq+ } "), search lower (" { $link +lt+ } ") or search higher (" { $link +gt+ } ")." +$nl +"If the sequence is non-empty, outputs the index and value of the closest match, which is either an element for which the quotation output " { $link +eq+ } ", or failing that, least element for which the quotation output " { $link +lt+ } "." +$nl +"If the sequence is empty, outputs " { $link f } " " { $link f } "." } +{ $notes "If the sequence has at least one element, this word always outputs a valid index, because it finds the closest match, not necessarily an exact one. In this respect its behavior differs from " { $link find } "." } ; + +{ find find-from find-last find-last find-last-from search } related-words + +HELP: sorted-index +{ $values { "elt" object } { "seq" "a sorted sequence" } { "i" "an index, or " { $link f } } { "elt" "an element, or " { $link f } } } +{ $description "Outputs the index and value of the element closest to " { $snippet "elt" } " in the sequence. See " { $link search } " for details." } +{ $notes "If the sequence has at least one element, this word always outputs a valid index, because it finds the closest match, not necessarily an exact one. In this respect its behavior differs from " { $link index } "." } ; + +{ index index-from last-index last-index-from sorted-index } related-words + +HELP: sorted-member? +{ $values { "elt" object } { "seq" "a sorted sequence" } { "?" "a boolean" } } +{ $description "Tests if the sorted sequence contains " { $snippet "elt" } ". Equality is tested with " { $link = } "." } ; + +{ member? sorted-member? } related-words + +HELP: sorted-memq? +{ $values { "elt" object } { "seq" "a sorted sequence" } { "?" "a boolean" } } +{ $description "Tests if the sorted sequence contains " { $snippet "elt" } ". Equality is tested with " { $link eq? } "." } ; + +{ memq? sorted-memq? } related-words diff --git a/core/binary-search/binary-search-tests.factor b/core/binary-search/binary-search-tests.factor new file mode 100644 index 0000000000..77b1c16505 --- /dev/null +++ b/core/binary-search/binary-search-tests.factor @@ -0,0 +1,17 @@ +IN: binary-search.tests +USING: binary-search math.order vectors kernel tools.test ; + +\ sorted-member? must-infer + +[ f ] [ 3 { } [ <=> ] with search drop ] unit-test +[ 0 ] [ 3 { 3 } [ <=> ] with search drop ] unit-test +[ 1 ] [ 2 { 1 2 3 } [ <=> ] with search drop ] unit-test +[ 3 ] [ 4 { 1 2 3 4 5 6 } [ <=> ] with search drop ] unit-test +[ 2 ] [ 3.5 { 1 2 3 4 5 6 7 8 } [ <=> ] with search drop ] unit-test +[ 4 ] [ 5.5 { 1 2 3 4 5 6 7 8 } [ <=> ] with search drop ] unit-test +[ 10 ] [ 10 20 >vector [ <=> ] with search drop ] unit-test + +[ t ] [ "hello" { "alligrator" "cat" "fish" "hello" "ikarus" "java" } sorted-member? ] unit-test +[ 3 ] [ "hey" { "alligrator" "cat" "fish" "hello" "ikarus" "java" } sorted-index ] unit-test +[ f ] [ "hello" { "alligrator" "cat" "fish" "ikarus" "java" } sorted-member? ] unit-test +[ f ] [ "zebra" { "alligrator" "cat" "fish" "ikarus" "java" } sorted-member? ] unit-test diff --git a/core/binary-search/binary-search.factor b/core/binary-search/binary-search.factor new file mode 100644 index 0000000000..87a4e0f503 --- /dev/null +++ b/core/binary-search/binary-search.factor @@ -0,0 +1,46 @@ +! Copyright (C) 2008 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel sequences sequences.private accessors math +math.order combinators ; +IN: binary-search + + ) + [ midpoint swap call ] 2keep rot ; inline + +: finish ( quot slice -- i elt ) + [ [ from>> ] [ midpoint@ ] bi + ] [ seq>> ] bi + [ drop ] [ dup ] [ ] tri* nth ; inline + +: (search) ( quot seq -- i elt ) + dup length 1 <= [ + finish + ] [ + decide { + { +eq+ [ finish ] } + { +lt+ [ dup midpoint@ head-slice (search) ] } + { +gt+ [ dup midpoint@ tail-slice (search) ] } + } case + ] if ; inline + +PRIVATE> + +: search ( seq quot -- i elt ) + over empty? [ 2drop f f ] [ swap (search) ] if ; + inline + +: natural-search ( obj seq -- i elt ) + [ <=> ] with search ; + +: sorted-index ( obj seq -- i ) + natural-search drop ; + +: sorted-member? ( obj seq -- ? ) + dupd natural-search nip = ; + +: sorted-memq? ( obj seq -- ? ) + dupd natural-search nip eq? ; diff --git a/core/sequences/sequences-docs.factor b/core/sequences/sequences-docs.factor index 1bb7666447..8434a99b30 100755 --- a/core/sequences/sequences-docs.factor +++ b/core/sequences/sequences-docs.factor @@ -243,6 +243,7 @@ $nl { $subsection "sequences-destructive" } { $subsection "sequences-stacks" } { $subsection "sequences-sorting" } +{ $subsection "binary-search" } { $subsection "sets" } "For inner loops:" { $subsection "sequences-unsafe" } ; @@ -585,8 +586,6 @@ HELP: index { $values { "obj" object } { "seq" sequence } { "n" "an index" } } { $description "Outputs the index of the first element in the sequence equal to " { $snippet "obj" } ". If no element is found, outputs " { $link f } "." } ; -{ index index-from last-index last-index-from member? memq? } related-words - HELP: index-from { $values { "obj" object } { "i" "a start index" } { "seq" sequence } { "n" "an index" } } { $description "Outputs the index of the first element in the sequence equal to " { $snippet "obj" } ", starting the search from the " { $snippet "i" } "th element. If no element is found, outputs " { $link f } "." } ; diff --git a/core/sorting/sorting-docs.factor b/core/sorting/sorting-docs.factor index d52ea5e11f..e55d1eb150 100644 --- a/core/sorting/sorting-docs.factor +++ b/core/sorting/sorting-docs.factor @@ -2,18 +2,15 @@ USING: help.markup help.syntax kernel words math sequences math.order ; IN: sorting -ARTICLE: "sequences-sorting" "Sorting and binary search" -"Sorting and binary search combinators all take comparator quotations with stack effect " { $snippet "( elt1 elt2 -- <=> )" } ", where the output value is one of the three " { $link "order-specifiers" } "." +ARTICLE: "sequences-sorting" "Sorting sequences" +"Sorting combinators all take comparator quotations with stack effect " { $snippet "( elt1 elt2 -- <=> )" } ", where the output value is one of the three " { $link "order-specifiers" } "." $nl "Sorting a sequence with a custom comparator:" { $subsection sort } "Sorting a sequence with common comparators:" { $subsection natural-sort } { $subsection sort-keys } -{ $subsection sort-values } -"Binary search:" -{ $subsection binsearch } -{ $subsection binsearch* } ; +{ $subsection sort-values } ; ABOUT: "sequences-sorting" @@ -41,24 +38,4 @@ HELP: midpoint@ { $values { "seq" "a sequence" } { "n" integer } } { $description "Outputs the index of the midpoint of " { $snippet "seq" } "." } ; -HELP: midpoint -{ $values { "seq" "a sequence" } { "elt" object } } -{ $description "Outputs the element at the midpoint of a sequence." } ; - -HELP: partition -{ $values { "seq" "a sequence" } { "n" integer } { "slice" slice } } -{ $description "Outputs a slice of the first or second half of the sequence, respectively, depending on the integer's sign." } ; - -HELP: binsearch -{ $values { "elt" object } { "seq" "a sorted sequence" } { "quot" "a quotation with stack effect " { $snippet "( obj1 obj2 -- <=> )" } } { "i" "the index of the search result" } } -{ $description "Given a sequence that is sorted with respect to the " { $snippet "quot" } " comparator, searches for an element equal to " { $snippet "elt" } ", or failing that, the greatest element smaller than " { $snippet "elt" } ". Comparison is performed with " { $snippet "quot" } "." -$nl -"Outputs f if the sequence is empty. If the sequence has at least one element, this word always outputs a valid index." } ; - -HELP: binsearch* -{ $values { "elt" object } { "seq" "a sorted sequence" } { "quot" "a quotation with stack effect " { $snippet "( obj1 obj2 -- <=> )" } } { "result" "the search result" } } -{ $description "Variant of " { $link binsearch } " which outputs the found element rather than its index in the sequence." -$nl -"Outputs " { $link f } " if the sequence is empty. If the sequence has at least one element, this word always outputs a sequence element." } ; - { <=> compare natural-sort sort-keys sort-values } related-words diff --git a/core/sorting/sorting-tests.factor b/core/sorting/sorting-tests.factor index 17ec2d7cd1..f79800feae 100755 --- a/core/sorting/sorting-tests.factor +++ b/core/sorting/sorting-tests.factor @@ -16,13 +16,3 @@ unit-test ] unit-test [ ] [ { 1 2 } [ 2drop 1 ] sort drop ] unit-test - -[ 3 ] [ { 1 2 3 4 } midpoint ] unit-test - -[ f ] [ 3 { } [ <=> ] binsearch ] unit-test -[ 0 ] [ 3 { 3 } [ <=> ] binsearch ] unit-test -[ 1 ] [ 2 { 1 2 3 } [ <=> ] binsearch ] unit-test -[ 3 ] [ 4 { 1 2 3 4 5 6 } [ <=> ] binsearch ] unit-test -[ 2 ] [ 3.5 { 1 2 3 4 5 6 7 8 } [ <=> ] binsearch ] unit-test -[ 4 ] [ 5.5 { 1 2 3 4 5 6 7 8 } [ <=> ] binsearch ] unit-test -[ 10 ] [ 10 20 >vector [ <=> ] binsearch ] unit-test diff --git a/core/sorting/sorting.factor b/core/sorting/sorting.factor index 1a2491328c..0bc09089db 100755 --- a/core/sorting/sorting.factor +++ b/core/sorting/sorting.factor @@ -1,4 +1,4 @@ -! Copyright (C) 2005, 2007 Slava Pestov. +! Copyright (C) 2005, 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: accessors arrays kernel math sequences vectors math.order sequences sequences.private math.order ; @@ -53,25 +53,3 @@ PRIVATE> : sort-values ( seq -- sortedseq ) [ [ second ] compare ] sort ; : sort-pair ( a b -- c d ) 2dup after? [ swap ] when ; - -: midpoint ( seq -- elt ) - [ midpoint@ ] keep nth-unsafe ; inline - -: partition ( seq n -- slice ) - +gt+ eq? not swap halves ? ; inline - -: (binsearch) ( elt quot seq -- i ) - dup length 1 <= [ - slice-from 2nip - ] [ - [ midpoint swap call ] 3keep roll dup +eq+ eq? - [ drop dup slice-from swap midpoint@ + 2nip ] - [ partition (binsearch) ] if - ] if ; inline - -: binsearch ( elt seq quot -- i ) - swap dup empty? - [ 3drop f ] [ (binsearch) ] if ; inline - -: binsearch* ( elt seq quot -- result ) - over >r binsearch [ r> ?nth ] [ r> drop f ] if* ; inline diff --git a/extra/cords/cords.factor b/extra/cords/cords.factor index a7f4246826..52cb9914b4 100644 --- a/extra/cords/cords.factor +++ b/extra/cords/cords.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors assocs sequences sorting math math.order -arrays combinators kernel ; +USING: accessors assocs sequences sorting binary-search math +math.order arrays combinators kernel ; IN: cords > ; M: multi-cord virtual@ dupd - seqs>> [ first <=> ] binsearch* + seqs>> [ first <=> ] with search nip [ first - ] [ second ] bi ; M: multi-cord virtual-seq diff --git a/extra/interval-maps/interval-maps.factor b/extra/interval-maps/interval-maps.factor index 95e3794e32..a62855d78f 100755 --- a/extra/interval-maps/interval-maps.factor +++ b/extra/interval-maps/interval-maps.factor @@ -1,5 +1,7 @@ -USING: kernel sequences arrays accessors grouping -math.order sorting math assocs locals namespaces ; +! Copyright (C) 2008 Daniel Ehrenberg. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel sequences arrays accessors grouping math.order +sorting binary-search math assocs locals namespaces ; IN: interval-maps TUPLE: interval-map array ; @@ -7,7 +9,7 @@ TUPLE: interval-map array ; ] binsearch* ; + [ first <=> ] with search nip ; : interval-contains? ( key interval-node -- ? ) first2 between? ; diff --git a/extra/math/primes/primes.factor b/extra/math/primes/primes.factor index 59aebbf0dd..f3a515e72b 100644 --- a/extra/math/primes/primes.factor +++ b/extra/math/primes/primes.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2007 Samuel Tardieu. ! See http://factorcode.org/license.txt for BSD license. USING: combinators kernel lists.lazy math math.functions math.miller-rabin - math.order math.primes.list math.ranges sequences sorting ; + math.order math.primes.list math.ranges sequences sorting + binary-search ; IN: math.primes : next-prime ( n -- p ) dup 999983 < [ - primes-under-million [ [ <=> ] binsearch 1+ ] keep nth + primes-under-million [ natural-search drop 1+ ] keep nth ] [ next-odd find-prime-miller-rabin ] if ; foldable : prime? ( n -- ? ) dup 1000000 < [ - dup primes-under-million [ <=> ] binsearch* = + dup primes-under-million natural-search nip = ] [ miller-rabin ] if ; foldable @@ -37,7 +38,7 @@ PRIVATE> { { [ dup 2 < ] [ drop { } ] } { [ dup 1000003 < ] - [ primes-under-million [ [ <=> ] binsearch 1+ 0 swap ] keep ] } + [ primes-under-million [ natural-search drop 1+ 0 swap ] keep ] } [ primes-under-million 1000003 lprimes-from rot [ <= ] curry lwhile list>array append ] } cond ; foldable @@ -45,6 +46,6 @@ PRIVATE> : primes-between ( low high -- seq ) primes-upto [ 1- next-prime ] dip - [ [ <=> ] binsearch ] keep [ length ] keep ; foldable + [ natural-search drop ] keep [ length ] keep ; foldable : coprime? ( a b -- ? ) gcd nip 1 = ; foldable diff --git a/extra/ui/gadgets/gadgets.factor b/extra/ui/gadgets/gadgets.factor index 19593d2f22..ea51847ba7 100755 --- a/extra/ui/gadgets/gadgets.factor +++ b/extra/ui/gadgets/gadgets.factor @@ -1,9 +1,9 @@ ! Copyright (C) 2005, 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: accessors arrays hashtables kernel models math namespaces - sequences quotations math.vectors combinators sorting vectors - dlists dequeues models threads concurrency.flags - math.order math.geometry.rect ; + sequences quotations math.vectors combinators sorting + binary-search vectors dlists dequeues models threads + concurrency.flags math.order math.geometry.rect ; IN: ui.gadgets @@ -70,12 +70,15 @@ GENERIC: children-on ( rect/point gadget -- seq ) M: gadget children-on nip children>> ; -: (fast-children-on) ( dim axis gadgets -- i ) - swapd [ rect-loc v- over v. 0 <=> ] binsearch nip ; +: ((fast-children-on)) ( gadget dim axis -- <=> ) + [ swap loc>> v- ] dip v. 0 <=> ; + +: (fast-children-on) ( dim axis children -- i ) + -rot [ ((fast-children-on)) ] 2curry search drop ; : fast-children-on ( rect axis children -- from to ) - [ >r >r rect-loc r> r> (fast-children-on) 0 or ] - [ >r >r dup rect-loc swap rect-dim v+ r> r> (fast-children-on) ?1+ ] + [ [ rect-loc ] 2dip (fast-children-on) 0 or ] + [ [ rect-bounds v+ ] 2dip (fast-children-on) ?1+ ] 3bi ; : inside? ( bounds gadget -- ? ) diff --git a/extra/usa-cities/usa-cities.factor b/extra/usa-cities/usa-cities.factor index 968bf9d053..a5abb53c62 100644 --- a/extra/usa-cities/usa-cities.factor +++ b/extra/usa-cities/usa-cities.factor @@ -50,4 +50,4 @@ MEMO: cities-named-in ( name state -- cities ) ] with with filter ; : find-zip-code ( code -- city ) - cities [ first-zip>> <=> ] binsearch* ; + cities [ first-zip>> <=> ] with search nip ; -- 2.34.1