hash-sets: faster implementation based on hashtables.

author John Benediktsson <mrjbq7@gmail.com>

Fri, 8 Mar 2013 05:43:17 +0000 (21:43 -0800)

committer John Benediktsson <mrjbq7@gmail.com>

Fri, 8 Mar 2013 05:43:17 +0000 (21:43 -0800)
author John Benediktsson <mrjbq7@gmail.com>
Fri, 8 Mar 2013 05:43:17 +0000 (21:43 -0800)
committer John Benediktsson <mrjbq7@gmail.com>
Fri, 8 Mar 2013 05:43:17 +0000 (21:43 -0800)
diff --git a/basis/compiler/tree/propagation/transforms/transforms.factor b/basis/compiler/tree/propagation/transforms/transforms.factor

index 735a8fcdabcfdbd2b54da468a3fe746ae052c21f..705156ff2cc9d026522dcb1f0a8e994e473306e3 100644 (file)
--- a/basis/compiler/tree/propagation/transforms/transforms.factor
+++ b/basis/compiler/tree/propagation/transforms/transforms.factor
@@ -9,7 +9,7 @@ math.integers.private layouts math.order vectors hashtables
  combinators effects generalizations sequences.generalizations
  assocs sets combinators.short-circuit sequences.private locals
  growable stack-checker namespaces compiler.tree.propagation.info
-;
+hash-sets ;
  FROM: math => float ;
  FROM: sets => set ;
  IN: compiler.tree.propagation.transforms
@@ -157,7 +157,7 @@ IN: compiler.tree.propagation.transforms
      in-d>> first value-info literal>> {
          { V{ } [ [ drop { } 0 vector boa ] ] }
          { H{ } [ [ drop 0 <hashtable> ] ] }
-        { HS{ } [ [ drop f fast-set ] ] }
+        { HS{ } [ [ drop 0 <hash-set> ] ] }
          [ drop f ]
      } case
  ] "custom-inlining" set-word-prop
diff --git a/basis/random/random.factor b/basis/random/random.factor

index f425bb96e0afe24e4b4080dad1a855be2ab95883..d2dae4335bf7a93875272aaf673474fd6cd379ec 100644 (file)
--- a/basis/random/random.factor
+++ b/basis/random/random.factor
@@ -94,7 +94,12 @@ M: hashtable random
  
  M: sets:set random members random ;
  
-M: hash-set random table>> random first ;
+M: hash-set random
+    dup cardinality [ drop f ] [
+        [ 0 ] [ array>> ] [ random ] tri* 1 + [
+            [ 2dup array-nth tombstone? [ 1 + ] 2dip ] loop
+        ] times [ 1 - ] dip array-nth
+    ] if-zero ;
  
  : randomize-n-last ( seq n -- seq )
      [ dup length dup ] dip - 1 max '[ dup _ > ]
diff --git a/core/hash-sets/hash-sets.factor b/core/hash-sets/hash-sets.factor

index fa88cf60bd967ecfb42992f700ca1b45e4d7c622..80cd5e1c9bd5c91d754434b0916b1647579b5e9b 100644 (file)
--- a/core/hash-sets/hash-sets.factor
+++ b/core/hash-sets/hash-sets.factor
@@ -1,34 +1,149 @@
  ! Copyright (C) 2010 Daniel Ehrenberg
+! Copyright (C) 2005, 2011 John Benediktsson, Slava Pestov.
  ! See http://factorcode.org/license.txt for BSD license.
-USING: accessors assocs hashtables kernel sequences sets
-sets.private ;
+USING: accessors arrays hash-sets hashtables.private kernel
+kernel.private math math.private sequences sequences.private
+sets sets.private slots.private vectors ;
  IN: hash-sets
  
-! In a better implementation, less memory would be used
-TUPLE: hash-set { table hashtable read-only } ;
+TUPLE: hash-set
+{ count array-capacity }
+{ deleted array-capacity }
+{ array array } ;
  
-: <hash-set> ( capacity -- hash-set )
-    <hashtable> hash-set boa ; inline
+<PRIVATE
+
+: hash@ ( key array -- i )
+    [ hashcode >fixnum ] dip wrap ; inline
+
+: probe ( array i probe# -- array i probe# )
+    1 fixnum+fast [ fixnum+fast over wrap ] keep ; inline
+
+: no-key ( key array -- array n ? ) nip f f ; inline
+
+: (key@) ( key array i probe# -- array n ? )
+    [ 3dup swap array-nth ] dip over ((empty)) eq?
+    [ 4drop no-key ] [
+        [ = ] dip swap
+        [ drop rot drop t ]
+        [ probe (key@) ]
+        if
+    ] if ; inline recursive
+
+: key@ ( key hash -- array n ? )
+    array>> dup length>> 0 eq?
+    [ no-key ] [ 2dup hash@ 0 (key@) ] if ; inline
+
+: <hash-array> ( n -- array )
+    1 + next-power-of-2 2 * ((empty)) <array> ; inline
+
+: reset-hash ( n hash -- )
+    swap <hash-array> >>array init-hash ; inline
+
+: (new-key@) ( key array i probe# j -- array i j empty? )
+    [ 2dup swap array-nth ] 2dip pick tombstone?
+    [
+        rot ((empty)) eq?
+        [ nip [ drop ] 3dip t ]
+        [ pick or [ probe ] dip (new-key@) ]
+        if
+    ] [
+        [ [ pick ] dip = ] 2dip rot
+        [ nip [ drop ] 3dip f ]
+        [ [ probe ] dip (new-key@) ]
+        if
+    ] if ; inline recursive
+
+: new-key@ ( key hash -- array n )
+    [ array>> 2dup hash@ 0 f (new-key@) ] keep swap
+    [ over [ hash-deleted- ] [ hash-count+ ] if swap or ] [ 2drop ] if ; inline
+
+: set-nth-item ( key seq n -- )
+    2 fixnum+fast set-slot ; inline
+
+: (rehash) ( hash seq -- )
+    swap [ dupd new-key@ set-nth-item ] curry each ; inline
+
+: hash-large? ( hash -- ? )
+    [ count>> 3 fixnum*fast 1 fixnum+fast ]
+    [ array>> length>> 1 fixnum-shift-fast ] bi fixnum> ; inline
+
+: grow-hash ( hash -- )
+    { hash-set } declare [
+        [ members { array } declare ]
+        [ cardinality 1 + ]
+        [ reset-hash ] tri
+    ] keep swap (rehash) ;
+
+: ?grow-hash ( hash -- )
+    dup hash-large? [ grow-hash ] [ drop ] if ; inline
+
+PRIVATE>
+
+: <hash-set> ( n -- hash )
+    hash-set new [ reset-hash ] keep ; inline
+
+M: hash-set in? ( key hash -- ? )
+     key@ 2nip ;
+
+M: hash-set clear-set ( hash -- )
+    [ init-hash ] [ array>> [ drop ((empty)) ] map! drop ] bi ;
+
+M: hash-set delete ( key hash -- )
+    [ nip ] [ key@ ] 2bi [
+        [ ((tombstone)) ] 2dip set-nth-item
+        hash-deleted+
+    ] [
+        3drop
+    ] if ;
+
+M: hash-set cardinality ( hash -- n )
+    [ count>> ] [ deleted>> ] bi - ; inline
+
+M: hash-set adjoin ( key hash -- )
+    dup ?grow-hash dupd new-key@ set-nth-item ;
+
+<PRIVATE
+
+: push-unsafe ( elt seq -- )
+    [ length ] keep
+    [ underlying>> set-array-nth ]
+    [ [ 1 fixnum+fast { array-capacity } declare ] dip length<< ]
+    2bi ; inline
+
+PRIVATE>
+
+M: hash-set members
+    [ array>> [ length ] keep ] [ cardinality <vector> ] bi [
+        [
+            [ array-nth ] dip over tombstone?
+            [ 2drop ] [ push-unsafe ] if
+        ] 2curry each-integer
+    ] keep { } like ;
+
+M: hash-set clone
+    (clone) [ clone ] change-array ; inline
+
+M: hash-set equal?
+    over hash-set? [ set= ] [ 2drop f ] if ;
  
  : >hash-set ( members -- hash-set )
-    unique hash-set boa ; inline
+    dup length <hash-set> [ [ adjoin ] curry each ] keep ;
+
+M: hash-set set-like
+    drop dup hash-set? [ ?members >hash-set ] unless ; inline
  
  INSTANCE: hash-set set
-M: hash-set in? table>> key? ; inline
-M: hash-set adjoin table>> dupd set-at ; inline
-M: hash-set delete table>> delete-at ; inline
-M: hash-set members table>> keys ; inline
-M: hash-set set-like drop dup hash-set? [ ?members >hash-set ] unless ;
-M: hash-set clone table>> clone hash-set boa ;
-M: hash-set null? table>> assoc-empty? ;
-M: hash-set cardinality table>> assoc-size ;
+
  M: hash-set intersect small/large sequence/tester filter >hash-set ;
+
  M: hash-set union (union) >hash-set ;
+
  M: hash-set diff sequence/tester [ not ] compose filter >hash-set ;
-M: hash-set clear-set table>> clear-assoc ;
+
+M: f fast-set drop 0 <hash-set> ;
  
  M: sequence fast-set >hash-set ;
-M: f fast-set drop H{ } clone hash-set boa ;
  
  M: sequence duplicates
      dup length <hash-set> [ ?adjoin not ] curry filter ;
author	John Benediktsson <mrjbq7@gmail.com>
	Fri, 8 Mar 2013 05:43:17 +0000 (21:43 -0800)
committer	John Benediktsson <mrjbq7@gmail.com>
	Fri, 8 Mar 2013 05:43:17 +0000 (21:43 -0800)
basis/compiler/tree/propagation/transforms/transforms.factor		patch \| blob \| history
basis/random/random.factor		patch \| blob \| history
core/hash-sets/hash-sets.factor		patch \| blob \| history