]> gitweb.factorcode.org Git - factor.git/commitdiff
cuckoo-filters: new vocabulary.
authorJohn Benediktsson <mrjbq7@gmail.com>
Mon, 8 Aug 2016 17:13:11 +0000 (10:13 -0700)
committerJohn Benediktsson <mrjbq7@gmail.com>
Mon, 8 Aug 2016 17:17:00 +0000 (10:17 -0700)
extra/cuckoo-filters/authors.txt [new file with mode: 0644]
extra/cuckoo-filters/cuckoo-filters-tests.factor [new file with mode: 0644]
extra/cuckoo-filters/cuckoo-filters.factor [new file with mode: 0644]
extra/cuckoo-filters/summary.txt [new file with mode: 0644]

diff --git a/extra/cuckoo-filters/authors.txt b/extra/cuckoo-filters/authors.txt
new file mode 100644 (file)
index 0000000..e091bb8
--- /dev/null
@@ -0,0 +1 @@
+John Benediktsson
diff --git a/extra/cuckoo-filters/cuckoo-filters-tests.factor b/extra/cuckoo-filters/cuckoo-filters-tests.factor
new file mode 100644 (file)
index 0000000..3255d2d
--- /dev/null
@@ -0,0 +1,28 @@
+USING: accessors combinators combinators.short-circuit
+cuckoo-filters kernel math.parser sequences tools.test ;
+
+{ t 1 t t f 0 } [
+    "factor" 100 <cuckoo-filter> {
+        [ cuckoo-insert ]
+        [ nip size>> ]
+        [ cuckoo-lookup ]
+        [ cuckoo-delete ]
+        [ cuckoo-lookup ]
+        [ nip size>> ]
+    } 2cleave
+] unit-test
+
+{ 250,000 0 } [
+    250,000 <cuckoo-filter>
+    250,000 [ number>string ] { } map-integers
+    [
+        [
+            {
+                [ over cuckoo-lookup not ]
+                [ over cuckoo-insert ]
+            } 1&&
+        ] count swap
+    ]
+    [ [ over cuckoo-delete drop ] each ] bi
+    size>>
+] unit-test
diff --git a/extra/cuckoo-filters/cuckoo-filters.factor b/extra/cuckoo-filters/cuckoo-filters.factor
new file mode 100644 (file)
index 0000000..8c3db68
--- /dev/null
@@ -0,0 +1,96 @@
+! Copyright (C) 2016 John Benediktsson
+! See http://factorcode.org/license.txt for BSD license
+
+USING: accessors arrays checksums checksums.sha
+combinators.short-circuit io.binary kernel locals math
+math.bitwise random sequences ;
+
+IN: cuckoo-filters
+
+<PRIVATE
+
+! The maximum number of times we kick down items/displace from
+! their buckets
+CONSTANT: max-cuckoo-count 500
+
+! The maximum load factor we allow before growing the capacity
+CONSTANT: max-load-factor 0.96
+
+! The number of tags to store in each bucket
+CONSTANT: bucket-size 4
+
+: #buckets ( capacity -- #buckets )
+    [ bucket-size /i next-power-of-2 ] keep
+    over / bucket-size / max-load-factor > [ 2 * ] when ;
+
+: <cuckoo-buckets> ( capacity -- buckets )
+    #buckets [ bucket-size f <array> ] replicate ;
+
+: tag-index ( hash -- tag index )
+    4 cut 4 head [ be> ] bi@ ;
+
+: alt-index ( tag index -- altindex )
+    [ 0x5bd1e995 w* ] [ bitxor ] bi* ;
+
+: tag-indices ( bytes cuckoo-filter -- tag i1 i2 )
+    checksum>> checksum-bytes tag-index 2dup alt-index ;
+
+: bucket-lookup ( fingerprint bucket -- ? )
+    member? ;
+
+: bucket-insert ( fingerprint bucket -- ? )
+    dup [ not ] find drop [ swap set-nth t ] [ 2drop f ] if* ;
+
+: bucket-delete ( fingerprint bucket -- ? )
+    [ f ] 2dip [ index ] keep over [ set-nth t ] [ 3drop f ] if ;
+
+: bucket-swap ( fingerprint bucket -- fingerprint' )
+    [ length random ] keep [ swap ] change-nth ;
+
+PRIVATE>
+
+TUPLE: cuckoo-filter buckets checksum size ;
+
+: <cuckoo-filter> ( capacity -- cuckoo-filter )
+    <cuckoo-buckets> sha1 0 cuckoo-filter boa ;
+
+:: cuckoo-insert ( obj cuckoo-filter -- ? )
+    obj cuckoo-filter tag-indices :> ( tag! i1 i2 )
+    cuckoo-filter buckets>> :> buckets
+    buckets length :> cuckoo-size
+    {
+        [ tag i1 cuckoo-size mod buckets nth bucket-insert ]
+        [ tag i2 cuckoo-size mod buckets nth bucket-insert ]
+    } 0|| [
+        cuckoo-filter [ 1 + ] change-size drop t
+    ] [
+        cuckoo-filter checksum>> :> checksum
+        { i1 i2 } random :> i!
+        max-cuckoo-count [
+            drop
+            tag i cuckoo-size mod buckets nth bucket-swap tag!
+            tag i alt-index i!
+
+            tag i cuckoo-size mod buckets nth bucket-insert
+            dup [ cuckoo-filter [ 1 + ] change-size drop ] when
+        ] find-integer >boolean
+    ] if ;
+
+:: cuckoo-lookup ( obj cuckoo-filter -- ? )
+    obj cuckoo-filter tag-indices :> ( tag i1 i2 )
+    cuckoo-filter buckets>> :> buckets
+    buckets length :> cuckoo-size
+    {
+        [ tag i1 cuckoo-size mod buckets nth bucket-lookup ]
+        [ tag i2 cuckoo-size mod buckets nth bucket-lookup ]
+    } 0|| ;
+
+:: cuckoo-delete ( obj cuckoo-filter -- ? )
+    obj cuckoo-filter tag-indices :> ( tag i1 i2 )
+    cuckoo-filter buckets>> :> buckets
+    buckets length :> cuckoo-size
+    {
+        [ tag i1 cuckoo-size mod buckets nth bucket-delete ]
+        [ tag i2 cuckoo-size mod buckets nth bucket-delete ]
+    } 0||
+    dup [ cuckoo-filter [ 1 - ] change-size drop ] when ;
diff --git a/extra/cuckoo-filters/summary.txt b/extra/cuckoo-filters/summary.txt
new file mode 100644 (file)
index 0000000..078b781
--- /dev/null
@@ -0,0 +1 @@
+Cuckoo filters