basis/bloom-filters/bloom-filters-tests.factor

   1 USING: accessors bit-arrays bloom-filters bloom-filters.private kernel layouts
   2 math random sequences tools.test ;
   3 IN: bloom-filters.tests
   4
   5 { { 200 5 } } [ { 100 7 } { 200 5 } smaller-second ] unit-test
   6 { { 200 5 } } [ { 200 5 } { 100 7 } smaller-second ] unit-test
   7
   8 ! The sizing information was generated using the subroutine
   9 ! calculate_shortest_filter_length from
  10 ! http://www.perl.com/pub/a/2004/04/08/bloom_filters.html.
  11
  12 ! Test bloom-filter creation
  13 { 47965 } [ 7 0.01 5000 bits-to-satisfy-error-rate ] unit-test
  14 { 7 47965 } [ 0.01 5000 size-bloom-filter ] unit-test
  15 { 7 } [ 0.01 5000 <bloom-filter> #hashes>> ] unit-test
  16 { 47965 } [ 0.01 5000 <bloom-filter> bits>> length ] unit-test
  17 { 5000 } [ 0.01 5000 <bloom-filter> capacity>> ] unit-test
  18 { 0 } [ 0.01 5000 <bloom-filter> count>> ] unit-test
  19
  20 ! Should return the fewest hashes to satisfy the bits requested, not the most.
  21 { 32 } [ 4 0.05 5 bits-to-satisfy-error-rate ] unit-test
  22 { 32 } [ 5 0.05 5 bits-to-satisfy-error-rate ] unit-test
  23 { 4 32 } [ 0.05 5 size-bloom-filter ] unit-test
  24
  25 ! This is a lot of bits.
  26 [ 0.00000001 max-array-capacity size-bloom-filter ] [ invalid-size? ]  must-fail-with
  27
  28 ! Other error conditions.
  29 [ 1.0 2000 <bloom-filter> ] [ invalid-error-rate? ] must-fail-with
  30 [ 20 2000 <bloom-filter> ] [ invalid-error-rate? ] must-fail-with
  31 [ 0.0 2000 <bloom-filter> ] [ invalid-error-rate? ] must-fail-with
  32 [ -2 2000 <bloom-filter> ] [ invalid-error-rate? ] must-fail-with
  33 [ 0.5 0 <bloom-filter> ] [ invalid-capacity? ] must-fail-with
  34 [ 0.5 -5 <bloom-filter> ] [ invalid-capacity? ] must-fail-with
  35
  36 ! Should not generate bignum hash codes.  Enhanced double hashing may generate a
  37 ! lot of hash codes, and it's better to do this earlier than later.
  38 { t } [ 10000 <iota> [ double-hashcodes [ fixnum? ] both? ] all? ] unit-test
  39
  40 : empty-bloom-filter ( -- bloom-filter )
  41     0.01 2000 <bloom-filter> ;
  42
  43 { 1 } [ empty-bloom-filter dup increment-count count>> ] unit-test
  44
  45 : basic-insert-test-setup ( -- bloom-filter )
  46     1 empty-bloom-filter [ bloom-filter-insert ] keep ;
  47
  48 ! Basic tests that insert does something
  49 { t } [ basic-insert-test-setup bits>> [ ] any? ] unit-test
  50 { 1 } [ basic-insert-test-setup count>> ] unit-test
  51
  52 : non-empty-bloom-filter ( -- bloom-filter )
  53     1000 <iota>
  54     empty-bloom-filter
  55     [ [ bloom-filter-insert ] curry each ] keep ;
  56
  57 : full-bloom-filter ( -- bloom-filter )
  58     2000 <iota>
  59     empty-bloom-filter
  60     [ [ bloom-filter-insert ] curry each ] keep ;
  61
  62 ! Should find what we put in there.
  63 { t } [ 2000 <iota>
  64         full-bloom-filter
  65         [ bloom-filter-member? ] curry map
  66         [ ] all?
  67 ] unit-test
  68
  69 ! We shouldn't have more than 0.01 false-positive rate.
  70 { t } [ 1000 <iota> [ drop most-positive-fixnum random 1000 + ] map
  71         full-bloom-filter
  72         [ bloom-filter-member? ] curry map
  73         [ ] count
  74         ! TODO: This should be 10, but the false positive rate is currently very
  75         ! high.  300 is large enough not to prevent builds from succeeding.
  76         300 <=
  77 ] unit-test