/[pcsx2_0.9.7]/trunk/3rdparty/google/sparsehash/densehashtable.h
ViewVC logotype

Contents of /trunk/3rdparty/google/sparsehash/densehashtable.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 8 - (show annotations) (download)
Mon Sep 6 11:19:43 2010 UTC (9 years, 4 months ago) by william
File MIME type: text/plain
File size: 42271 byte(s)
Exported ./upsream/trunk @r3730 from http://pcsx2.googlecode.com/svn/trunk/
1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 // ---
31 // Author: Craig Silverstein
32 //
33 // A dense hashtable is a particular implementation of
34 // a hashtable: one that is meant to minimize memory allocation.
35 // It does this by using an array to store all the data. We
36 // steal a value from the key space to indicate "empty" array
37 // elements (ie indices where no item lives) and another to indicate
38 // "deleted" elements.
39 //
40 // (Note it is possible to change the value of the delete key
41 // on the fly; you can even remove it, though after that point
42 // the hashtable is insert_only until you set it again. The empty
43 // value however can't be changed.)
44 //
45 // To minimize allocation and pointer overhead, we use internal
46 // probing, in which the hashtable is a single table, and collisions
47 // are resolved by trying to insert again in another bucket. The
48 // most cache-efficient internal probing schemes are linear probing
49 // (which suffers, alas, from clumping) and quadratic probing, which
50 // is what we implement by default.
51 //
52 // Type requirements: value_type is required to be Copy Constructible
53 // and Default Constructible. It is not required to be (and commonly
54 // isn't) Assignable.
55 //
56 // You probably shouldn't use this code directly. Use
57 // <google/dense_hash_map> or <google/dense_hash_set> instead.
58
59 // You can change the following below:
60 // HT_OCCUPANCY_FLT -- how full before we double size
61 // HT_EMPTY_FLT -- how empty before we halve size
62 // HT_MIN_BUCKETS -- default smallest bucket size
63 //
64 // You can also change enlarge_resize_percent (which defaults to
65 // HT_OCCUPANCY_FLT), and shrink_resize_percent (which defaults to
66 // HT_EMPTY_FLT) with set_resizing_parameters().
67 //
68 // How to decide what values to use?
69 // shrink_resize_percent's default of .4 * OCCUPANCY_FLT, is probably good.
70 // HT_MIN_BUCKETS is probably unnecessary since you can specify
71 // (indirectly) the starting number of buckets at construct-time.
72 // For enlarge_resize_percent, you can use this chart to try to trade-off
73 // expected lookup time to the space taken up. By default, this
74 // code uses quadratic probing, though you can change it to linear
75 // via _JUMP below if you really want to.
76 //
77 // From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html
78 // NUMBER OF PROBES / LOOKUP Successful Unsuccessful
79 // Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L)
80 // Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2
81 //
82 // -- enlarge_resize_percent -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99
83 // QUADRATIC COLLISION RES.
84 // probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11
85 // probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6
86 // LINEAR COLLISION RES.
87 // probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5
88 // probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0
89
90 #ifndef _DENSEHASHTABLE_H_
91 #define _DENSEHASHTABLE_H_
92
93 // The probing method
94 // Linear probing
95 // #define JUMP_(key, num_probes) ( 1 )
96 // Quadratic-ish probing
97 #define JUMP_(key, num_probes) ( num_probes )
98
99
100 // Hashtable class, used to implement the hashed associative containers
101 // hash_set and hash_map.
102
103 #include <google/sparsehash/sparseconfig.h>
104 #include <assert.h>
105 #include <stdio.h>
106 #include <stdlib.h> // for abort()
107 #include <algorithm> // For swap(), eg
108 #include <iostream> // For cerr
109 #include <memory> // For uninitialized_fill, uninitialized_copy
110 #include <utility> // for pair<>
111 #include <iterator> // for facts about iterator tags
112 #include <google/type_traits.h> // for true_type, integral_constant, etc.
113
114 _START_GOOGLE_NAMESPACE_
115
116 using STL_NAMESPACE::pair;
117
118 template <class Value, class Key, class HashFcn,
119 class ExtractKey, class EqualKey, class Alloc>
120 class dense_hashtable;
121
122 template <class V, class K, class HF, class ExK, class EqK, class A>
123 struct dense_hashtable_iterator;
124
125 template <class V, class K, class HF, class ExK, class EqK, class A>
126 struct dense_hashtable_const_iterator;
127
128 // We're just an array, but we need to skip over empty and deleted elements
129 template <class V, class K, class HF, class ExK, class EqK, class A>
130 struct dense_hashtable_iterator {
131 public:
132 typedef dense_hashtable_iterator<V,K,HF,ExK,EqK,A> iterator;
133 typedef dense_hashtable_const_iterator<V,K,HF,ExK,EqK,A> const_iterator;
134
135 typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
136 typedef V value_type;
137 typedef ptrdiff_t difference_type;
138 typedef size_t size_type;
139 typedef V& reference; // Value
140 typedef V* pointer;
141
142 // "Real" constructor and default constructor
143 dense_hashtable_iterator(const dense_hashtable<V,K,HF,ExK,EqK,A> *h,
144 pointer it, pointer it_end, bool advance)
145 : ht(h), pos(it), end(it_end) {
146 if (advance) advance_past_empty_and_deleted();
147 }
148 dense_hashtable_iterator() { }
149 // The default destructor is fine; we don't define one
150 // The default operator= is fine; we don't define one
151
152 // Happy dereferencer
153 reference operator*() const { return *pos; }
154 pointer operator->() const { return &(operator*()); }
155
156 // Arithmetic. The only hard part is making sure that
157 // we're not on an empty or marked-deleted array element
158 void advance_past_empty_and_deleted() {
159 while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) )
160 ++pos;
161 }
162 iterator& operator++() {
163 assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this;
164 }
165 iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
166
167 // Comparison.
168 bool operator==(const iterator& it) const { return pos == it.pos; }
169 bool operator!=(const iterator& it) const { return pos != it.pos; }
170
171
172 // The actual data
173 const dense_hashtable<V,K,HF,ExK,EqK,A> *ht;
174 pointer pos, end;
175 };
176
177
178 // Now do it all again, but with const-ness!
179 template <class V, class K, class HF, class ExK, class EqK, class A>
180 struct dense_hashtable_const_iterator {
181 public:
182 typedef dense_hashtable_iterator<V,K,HF,ExK,EqK,A> iterator;
183 typedef dense_hashtable_const_iterator<V,K,HF,ExK,EqK,A> const_iterator;
184
185 typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
186 typedef V value_type;
187 typedef ptrdiff_t difference_type;
188 typedef size_t size_type;
189 typedef const V& reference; // Value
190 typedef const V* pointer;
191
192 // "Real" constructor and default constructor
193 dense_hashtable_const_iterator(const dense_hashtable<V,K,HF,ExK,EqK,A> *h,
194 pointer it, pointer it_end, bool advance)
195 : ht(h), pos(it), end(it_end) {
196 if (advance) advance_past_empty_and_deleted();
197 }
198 dense_hashtable_const_iterator() { }
199 // This lets us convert regular iterators to const iterators
200 dense_hashtable_const_iterator(const iterator &it)
201 : ht(it.ht), pos(it.pos), end(it.end) { }
202 // The default destructor is fine; we don't define one
203 // The default operator= is fine; we don't define one
204
205 // Happy dereferencer
206 reference operator*() const { return *pos; }
207 pointer operator->() const { return &(operator*()); }
208
209 // Arithmetic. The only hard part is making sure that
210 // we're not on an empty or marked-deleted array element
211 void advance_past_empty_and_deleted() {
212 while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) )
213 ++pos;
214 }
215 const_iterator& operator++() {
216 assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this;
217 }
218 const_iterator operator++(int) { const_iterator tmp(*this); ++*this; return tmp; }
219
220 // Comparison.
221 bool operator==(const const_iterator& it) const { return pos == it.pos; }
222 bool operator!=(const const_iterator& it) const { return pos != it.pos; }
223
224
225 // The actual data
226 const dense_hashtable<V,K,HF,ExK,EqK,A> *ht;
227 pointer pos, end;
228 };
229
230 template <class Value, class Key, class HashFcn,
231 class ExtractKey, class EqualKey, class Alloc>
232 class dense_hashtable {
233 public:
234 typedef Key key_type;
235 typedef Value value_type;
236 typedef HashFcn hasher;
237 typedef EqualKey key_equal;
238
239 typedef size_t size_type;
240 typedef ptrdiff_t difference_type;
241 typedef value_type* pointer;
242 typedef const value_type* const_pointer;
243 typedef value_type& reference;
244 typedef const value_type& const_reference;
245 typedef dense_hashtable_iterator<Value, Key, HashFcn,
246 ExtractKey, EqualKey, Alloc>
247 iterator;
248
249 typedef dense_hashtable_const_iterator<Value, Key, HashFcn,
250 ExtractKey, EqualKey, Alloc>
251 const_iterator;
252
253 // How full we let the table get before we resize. Knuth says .8 is
254 // good -- higher causes us to probe too much, though saves memory
255 static const float HT_OCCUPANCY_FLT; // = 0.8;
256
257 // How empty we let the table get before we resize lower.
258 // (0.0 means never resize lower.)
259 // It should be less than OCCUPANCY_FLT / 2 or we thrash resizing
260 static const float HT_EMPTY_FLT; // = 0.4 * HT_OCCUPANCY_FLT
261
262 // Minimum size we're willing to let hashtables be.
263 // Must be a power of two, and at least 4.
264 // Note, however, that for a given hashtable, the initial size is a
265 // function of the first constructor arg, and may be >HT_MIN_BUCKETS.
266 static const size_t HT_MIN_BUCKETS = 4;
267
268 // By default, if you don't specify a hashtable size at
269 // construction-time, we use this size. Must be a power of two, and
270 // at least HT_MIN_BUCKETS.
271 static const size_t HT_DEFAULT_STARTING_BUCKETS = 32;
272
273
274 // ITERATOR FUNCTIONS
275 iterator begin() { return iterator(this, table,
276 table + num_buckets, true); }
277 iterator end() { return iterator(this, table + num_buckets,
278 table + num_buckets, true); }
279 const_iterator begin() const { return const_iterator(this, table,
280 table+num_buckets,true);}
281 const_iterator end() const { return const_iterator(this, table + num_buckets,
282 table+num_buckets,true);}
283
284 // ACCESSOR FUNCTIONS for the things we templatize on, basically
285 hasher hash_funct() const { return hash; }
286 key_equal key_eq() const { return equals; }
287
288 // Annoyingly, we can't copy values around, because they might have
289 // const components (they're probably pair<const X, Y>). We use
290 // explicit destructor invocation and placement new to get around
291 // this. Arg.
292 private:
293 void set_value(value_type* dst, const value_type& src) {
294 dst->~value_type();
295 new(dst) value_type(src);
296 }
297
298 void destroy_buckets(size_type first, size_type last) {
299 for ( ; first != last; ++first)
300 table[first].~value_type();
301 }
302
303 // DELETE HELPER FUNCTIONS
304 // This lets the user describe a key that will indicate deleted
305 // table entries. This key should be an "impossible" entry --
306 // if you try to insert it for real, you won't be able to retrieve it!
307 // (NB: while you pass in an entire value, only the key part is looked
308 // at. This is just because I don't know how to assign just a key.)
309 private:
310 void squash_deleted() { // gets rid of any deleted entries we have
311 if ( num_deleted ) { // get rid of deleted before writing
312 dense_hashtable tmp(*this); // copying will get rid of deleted
313 swap(tmp); // now we are tmp
314 }
315 assert(num_deleted == 0);
316 }
317
318 public:
319 void set_deleted_key(const value_type &val) {
320 // the empty indicator (if specified) and the deleted indicator
321 // must be different
322 assert(!use_empty || !equals(get_key(val), get_key(emptyval)));
323 // It's only safe to change what "deleted" means if we purge deleted guys
324 squash_deleted();
325 use_deleted = true;
326 set_value(&delval, val);
327 }
328 void clear_deleted_key() {
329 squash_deleted();
330 use_deleted = false;
331 }
332
333 // These are public so the iterators can use them
334 // True if the item at position bucknum is "deleted" marker
335 bool test_deleted(size_type bucknum) const {
336 // The num_deleted test is crucial for read(): after read(), the ht values
337 // are garbage, and we don't want to think some of them are deleted.
338 return (use_deleted && num_deleted > 0 &&
339 equals(get_key(delval), get_key(table[bucknum])));
340 }
341 bool test_deleted(const iterator &it) const {
342 return (use_deleted && num_deleted > 0 &&
343 equals(get_key(delval), get_key(*it)));
344 }
345 bool test_deleted(const const_iterator &it) const {
346 return (use_deleted && num_deleted > 0 &&
347 equals(get_key(delval), get_key(*it)));
348 }
349 // Set it so test_deleted is true. true if object didn't used to be deleted
350 // See below (at erase()) to explain why we allow const_iterators
351 bool set_deleted(const_iterator &it) {
352 assert(use_deleted); // bad if set_deleted_key() wasn't called
353 bool retval = !test_deleted(it);
354 // &* converts from iterator to value-type
355 set_value(const_cast<value_type*>(&(*it)), delval);
356 return retval;
357 }
358 // Set it so test_deleted is false. true if object used to be deleted
359 bool clear_deleted(const_iterator &it) {
360 assert(use_deleted); // bad if set_deleted_key() wasn't called
361 // happens automatically when we assign something else in its place
362 return test_deleted(it);
363 }
364
365 // EMPTY HELPER FUNCTIONS
366 // This lets the user describe a key that will indicate empty (unused)
367 // table entries. This key should be an "impossible" entry --
368 // if you try to insert it for real, you won't be able to retrieve it!
369 // (NB: while you pass in an entire value, only the key part is looked
370 // at. This is just because I don't know how to assign just a key.)
371 public:
372 // These are public so the iterators can use them
373 // True if the item at position bucknum is "empty" marker
374 bool test_empty(size_type bucknum) const {
375 assert(use_empty); // we always need to know what's empty!
376 return equals(get_key(emptyval), get_key(table[bucknum]));
377 }
378 bool test_empty(const iterator &it) const {
379 assert(use_empty); // we always need to know what's empty!
380 return equals(get_key(emptyval), get_key(*it));
381 }
382 bool test_empty(const const_iterator &it) const {
383 assert(use_empty); // we always need to know what's empty!
384 return equals(get_key(emptyval), get_key(*it));
385 }
386
387 private:
388 // You can either set a range empty or an individual element
389 void set_empty(size_type bucknum) {
390 assert(use_empty);
391 set_value(&table[bucknum], emptyval);
392 }
393 void fill_range_with_empty(value_type* table_start, value_type* table_end) {
394 // Like set_empty(range), but doesn't destroy previous contents
395 STL_NAMESPACE::uninitialized_fill(table_start, table_end, emptyval);
396 }
397 void set_empty(size_type buckstart, size_type buckend) {
398 assert(use_empty);
399 destroy_buckets(buckstart, buckend);
400 fill_range_with_empty(table + buckstart, table + buckend);
401 }
402
403 public:
404 // TODO(csilvers): change all callers of this to pass in a key instead,
405 // and take a const key_type instead of const value_type.
406 void set_empty_key(const value_type &val) {
407 // Once you set the empty key, you can't change it
408 assert(!use_empty);
409 // The deleted indicator (if specified) and the empty indicator
410 // must be different.
411 assert(!use_deleted || !equals(get_key(val), get_key(delval)));
412 use_empty = true;
413 set_value(&emptyval, val);
414
415 assert(!table); // must set before first use
416 // num_buckets was set in constructor even though table was NULL
417 table = (value_type *) malloc(num_buckets * sizeof(*table));
418 assert(table);
419 fill_range_with_empty(table, table + num_buckets);
420 }
421
422 // FUNCTIONS CONCERNING SIZE
423 public:
424 size_type size() const { return num_elements - num_deleted; }
425 // Buckets are always a power of 2
426 size_type max_size() const { return (size_type(-1) >> 1U) + 1; }
427 bool empty() const { return size() == 0; }
428 size_type bucket_count() const { return num_buckets; }
429 size_type max_bucket_count() const { return max_size(); }
430 size_type nonempty_bucket_count() const { return num_elements; }
431
432 private:
433 // Because of the above, size_type(-1) is never legal; use it for errors
434 static const size_type ILLEGAL_BUCKET = size_type(-1);
435
436 private:
437 // This is the smallest size a hashtable can be without being too crowded
438 // If you like, you can give a min #buckets as well as a min #elts
439 size_type min_size(size_type num_elts, size_type min_buckets_wanted) {
440 size_type sz = HT_MIN_BUCKETS; // min buckets allowed
441 while ( sz < min_buckets_wanted || num_elts >= sz * enlarge_resize_percent )
442 sz *= 2;
443 return sz;
444 }
445
446 // Used after a string of deletes
447 void maybe_shrink() {
448 assert(num_elements >= num_deleted);
449 assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
450 assert(bucket_count() >= HT_MIN_BUCKETS);
451
452 // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
453 // we'll never shrink until you get relatively big, and we'll never
454 // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something
455 // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
456 // shrink us down to HT_MIN_BUCKETS buckets, which is too small.
457 if (shrink_threshold > 0 &&
458 (num_elements-num_deleted) < shrink_threshold &&
459 bucket_count() > HT_DEFAULT_STARTING_BUCKETS ) {
460 size_type sz = bucket_count() / 2; // find how much we should shrink
461 while ( sz > HT_DEFAULT_STARTING_BUCKETS &&
462 (num_elements - num_deleted) < sz * shrink_resize_percent )
463 sz /= 2; // stay a power of 2
464 dense_hashtable tmp(*this, sz); // Do the actual resizing
465 swap(tmp); // now we are tmp
466 }
467 consider_shrink = false; // because we just considered it
468 }
469
470 // We'll let you resize a hashtable -- though this makes us copy all!
471 // When you resize, you say, "make it big enough for this many more elements"
472 void resize_delta(size_type delta) {
473 if ( consider_shrink ) // see if lots of deletes happened
474 maybe_shrink();
475 if ( bucket_count() > HT_MIN_BUCKETS &&
476 (num_elements + delta) <= enlarge_threshold )
477 return; // we're ok as we are
478
479 // Sometimes, we need to resize just to get rid of all the
480 // "deleted" buckets that are clogging up the hashtable. So when
481 // deciding whether to resize, count the deleted buckets (which
482 // are currently taking up room). But later, when we decide what
483 // size to resize to, *don't* count deleted buckets, since they
484 // get discarded during the resize.
485 const size_type needed_size = min_size(num_elements + delta, 0);
486 if ( needed_size > bucket_count() ) { // we don't have enough buckets
487 const size_type resize_to = min_size(num_elements - num_deleted + delta,
488 0);
489 dense_hashtable tmp(*this, resize_to);
490 swap(tmp); // now we are tmp
491 }
492 }
493
494 // Increase number of buckets, assuming value_type has trivial copy
495 // constructor and destructor. (Really, we want it to have "trivial
496 // move", because that's what realloc does. But there's no way to
497 // capture that using type_traits, so we pretend that move(x, y) is
498 // equivalent to "x.~T(); new(x) T(y);" which is pretty much
499 // correct, if a bit conservative.)
500 void expand_array(size_t resize_to, true_type) {
501 table = (value_type *) realloc(table, resize_to * sizeof(value_type));
502 assert(table);
503 fill_range_with_empty(table + num_buckets, table + resize_to);
504 }
505
506 // Increase number of buckets, without special assumptions about value_type.
507 // TODO(austern): make this exception safe. Handle exceptions from
508 // value_type's copy constructor.
509 void expand_array(size_t resize_to, false_type) {
510 value_type* new_table =
511 (value_type *) malloc(resize_to * sizeof(value_type));
512 assert(new_table);
513 STL_NAMESPACE::uninitialized_copy(table, table + num_buckets, new_table);
514 fill_range_with_empty(new_table + num_buckets, new_table + resize_to);
515 destroy_buckets(0, num_buckets);
516 free(table);
517 table = new_table;
518 }
519
520 // Used to actually do the rehashing when we grow/shrink a hashtable
521 void copy_from(const dense_hashtable &ht, size_type min_buckets_wanted) {
522 clear(); // clear table, set num_deleted to 0
523
524 // If we need to change the size of our table, do it now
525 const size_type resize_to = min_size(ht.size(), min_buckets_wanted);
526 if ( resize_to > bucket_count() ) { // we don't have enough buckets
527 typedef integral_constant<bool,
528 (has_trivial_copy<value_type>::value &&
529 has_trivial_destructor<value_type>::value)>
530 realloc_ok; // we pretend mv(x,y) == "x.~T(); new(x) T(y)"
531 expand_array(resize_to, realloc_ok());
532 num_buckets = resize_to;
533 reset_thresholds();
534 }
535
536 // We use a normal iterator to get non-deleted bcks from ht
537 // We could use insert() here, but since we know there are
538 // no duplicates and no deleted items, we can be more efficient
539 assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two
540 for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) {
541 size_type num_probes = 0; // how many times we've probed
542 size_type bucknum;
543 const size_type bucket_count_minus_one = bucket_count() - 1;
544 for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
545 !test_empty(bucknum); // not empty
546 bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
547 ++num_probes;
548 assert(num_probes < bucket_count()); // or else the hashtable is full
549 }
550 set_value(&table[bucknum], *it); // copies the value to here
551 num_elements++;
552 }
553 }
554
555 // Required by the spec for hashed associative container
556 public:
557 // Though the docs say this should be num_buckets, I think it's much
558 // more useful as req_elements. As a special feature, calling with
559 // req_elements==0 will cause us to shrink if we can, saving space.
560 void resize(size_type req_elements) { // resize to this or larger
561 if ( consider_shrink || req_elements == 0 )
562 maybe_shrink();
563 if ( req_elements > num_elements )
564 return resize_delta(req_elements - num_elements);
565 }
566
567 // Change the value of shrink_resize_percent and
568 // enlarge_resize_percent. The description at the beginning of this
569 // file explains how to choose the values. Setting the shrink
570 // parameter to 0.0 ensures that the table never shrinks.
571 void set_resizing_parameters(float shrink, float grow) {
572 assert(shrink >= 0.0);
573 assert(grow <= 1.0);
574 assert(shrink <= grow/2.0);
575 shrink_resize_percent = shrink;
576 enlarge_resize_percent = grow;
577 reset_thresholds();
578 }
579
580 // CONSTRUCTORS -- as required by the specs, we take a size,
581 // but also let you specify a hashfunction, key comparator,
582 // and key extractor. We also define a copy constructor and =.
583 // DESTRUCTOR -- needs to free the table
584 explicit dense_hashtable(size_type expected_max_items_in_table = 0,
585 const HashFcn& hf = HashFcn(),
586 const EqualKey& eql = EqualKey(),
587 const ExtractKey& ext = ExtractKey())
588 : hash(hf), equals(eql), get_key(ext), num_deleted(0),
589 use_deleted(false), use_empty(false),
590 delval(), emptyval(), enlarge_resize_percent(HT_OCCUPANCY_FLT),
591 shrink_resize_percent(HT_EMPTY_FLT), table(NULL),
592 num_buckets(expected_max_items_in_table == 0
593 ? HT_DEFAULT_STARTING_BUCKETS
594 : min_size(expected_max_items_in_table, 0)),
595 num_elements(0) {
596 // table is NULL until emptyval is set. However, we set num_buckets
597 // here so we know how much space to allocate once emptyval is set
598 reset_thresholds();
599 }
600
601 // As a convenience for resize(), we allow an optional second argument
602 // which lets you make this new hashtable a different size than ht
603 dense_hashtable(const dense_hashtable& ht,
604 size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
605 : hash(ht.hash), equals(ht.equals), get_key(ht.get_key), num_deleted(0),
606 use_deleted(ht.use_deleted), use_empty(ht.use_empty),
607 delval(ht.delval), emptyval(ht.emptyval),
608 enlarge_resize_percent(ht.enlarge_resize_percent),
609 shrink_resize_percent(ht.shrink_resize_percent), table(NULL),
610 num_buckets(0), num_elements(0) {
611 reset_thresholds();
612 copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries
613 }
614
615 dense_hashtable& operator= (const dense_hashtable& ht) {
616 if (&ht == this) return *this; // don't copy onto ourselves
617 clear();
618 hash = ht.hash;
619 equals = ht.equals;
620 get_key = ht.get_key;
621 use_deleted = ht.use_deleted;
622 use_empty = ht.use_empty;
623 set_value(&delval, ht.delval);
624 set_value(&emptyval, ht.emptyval);
625 enlarge_resize_percent = ht.enlarge_resize_percent;
626 shrink_resize_percent = ht.shrink_resize_percent;
627 copy_from(ht, HT_MIN_BUCKETS); // sets num_deleted to 0 too
628 return *this;
629 }
630
631 ~dense_hashtable() {
632 if (table) {
633 destroy_buckets(0, num_buckets);
634 free(table);
635 }
636 }
637
638 // Many STL algorithms use swap instead of copy constructors
639 void swap(dense_hashtable& ht) {
640 STL_NAMESPACE::swap(hash, ht.hash);
641 STL_NAMESPACE::swap(equals, ht.equals);
642 STL_NAMESPACE::swap(get_key, ht.get_key);
643 STL_NAMESPACE::swap(num_deleted, ht.num_deleted);
644 STL_NAMESPACE::swap(use_deleted, ht.use_deleted);
645 STL_NAMESPACE::swap(use_empty, ht.use_empty);
646 STL_NAMESPACE::swap(enlarge_resize_percent, ht.enlarge_resize_percent);
647 STL_NAMESPACE::swap(shrink_resize_percent, ht.shrink_resize_percent);
648 { value_type tmp; // for annoying reasons, swap() doesn't work
649 set_value(&tmp, delval);
650 set_value(&delval, ht.delval);
651 set_value(&ht.delval, tmp);
652 }
653 { value_type tmp; // for annoying reasons, swap() doesn't work
654 set_value(&tmp, emptyval);
655 set_value(&emptyval, ht.emptyval);
656 set_value(&ht.emptyval, tmp);
657 }
658 STL_NAMESPACE::swap(table, ht.table);
659 STL_NAMESPACE::swap(num_buckets, ht.num_buckets);
660 STL_NAMESPACE::swap(num_elements, ht.num_elements);
661 reset_thresholds();
662 ht.reset_thresholds();
663 }
664
665 // It's always nice to be able to clear a table without deallocating it
666 void clear() {
667 if (table)
668 destroy_buckets(0, num_buckets);
669 num_buckets = min_size(0,0); // our new size
670 reset_thresholds();
671 table = (value_type *) realloc(table, num_buckets * sizeof(*table));
672 assert(table);
673 fill_range_with_empty(table, table + num_buckets);
674 num_elements = 0;
675 num_deleted = 0;
676 }
677
678 // Clear the table without resizing it.
679 // Mimicks the stl_hashtable's behaviour when clear()-ing in that it
680 // does not modify the bucket count
681 void clear_no_resize() {
682 if (table) {
683 set_empty(0, num_buckets);
684 }
685 // don't consider to shrink before another erase()
686 reset_thresholds();
687 num_elements = 0;
688 num_deleted = 0;
689 }
690
691 // LOOKUP ROUTINES
692 private:
693 // Returns a pair of positions: 1st where the object is, 2nd where
694 // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET
695 // if object is not found; 2nd is ILLEGAL_BUCKET if it is.
696 // Note: because of deletions where-to-insert is not trivial: it's the
697 // first deleted bucket we see, as long as we don't find the key later
698 pair<size_type, size_type> find_position(const key_type &key) const {
699 size_type num_probes = 0; // how many times we've probed
700 const size_type bucket_count_minus_one = bucket_count() - 1;
701 size_type bucknum = hash(key) & bucket_count_minus_one;
702 size_type insert_pos = ILLEGAL_BUCKET; // where we would insert
703 while ( 1 ) { // probe until something happens
704 if ( test_empty(bucknum) ) { // bucket is empty
705 if ( insert_pos == ILLEGAL_BUCKET ) // found no prior place to insert
706 return pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum);
707 else
708 return pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos);
709
710 } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert
711 if ( insert_pos == ILLEGAL_BUCKET )
712 insert_pos = bucknum;
713
714 } else if ( equals(key, get_key(table[bucknum])) ) {
715 return pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET);
716 }
717 ++num_probes; // we're doing another probe
718 bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
719 assert(num_probes < bucket_count()); // don't probe too many times!
720 }
721 }
722
723 public:
724 iterator find(const key_type& key) {
725 if ( size() == 0 ) return end();
726 pair<size_type, size_type> pos = find_position(key);
727 if ( pos.first == ILLEGAL_BUCKET ) // alas, not there
728 return end();
729 else
730 return iterator(this, table + pos.first, table + num_buckets, false);
731 }
732
733 const_iterator find(const key_type& key) const {
734 if ( size() == 0 ) return end();
735 pair<size_type, size_type> pos = find_position(key);
736 if ( pos.first == ILLEGAL_BUCKET ) // alas, not there
737 return end();
738 else
739 return const_iterator(this, table + pos.first, table+num_buckets, false);
740 }
741
742 // Counts how many elements have key key. For maps, it's either 0 or 1.
743 size_type count(const key_type &key) const {
744 pair<size_type, size_type> pos = find_position(key);
745 return pos.first == ILLEGAL_BUCKET ? 0 : 1;
746 }
747
748 // Likewise, equal_range doesn't really make sense for us. Oh well.
749 pair<iterator,iterator> equal_range(const key_type& key) {
750 const iterator pos = find(key); // either an iterator or end
751 return pair<iterator,iterator>(pos, pos);
752 }
753 pair<const_iterator,const_iterator> equal_range(const key_type& key) const {
754 const const_iterator pos = find(key); // either an iterator or end
755 return pair<iterator,iterator>(pos, pos);
756 }
757
758
759 // INSERTION ROUTINES
760 private:
761 // If you know *this is big enough to hold obj, use this routine
762 pair<iterator, bool> insert_noresize(const value_type& obj) {
763 // First, double-check we're not inserting delval or emptyval
764 assert(!use_empty || !equals(get_key(obj), get_key(emptyval)));
765 assert(!use_deleted || !equals(get_key(obj), get_key(delval)));
766 const pair<size_type,size_type> pos = find_position(get_key(obj));
767 if ( pos.first != ILLEGAL_BUCKET) { // object was already there
768 return pair<iterator,bool>(iterator(this, table + pos.first,
769 table + num_buckets, false),
770 false); // false: we didn't insert
771 } else { // pos.second says where to put it
772 if ( test_deleted(pos.second) ) { // just replace if it's been del.
773 const_iterator delpos(this, table + pos.second, // shrug:
774 table + num_buckets, false);// shouldn't need const
775 clear_deleted(delpos);
776 assert( num_deleted > 0);
777 --num_deleted; // used to be, now it isn't
778 } else {
779 ++num_elements; // replacing an empty bucket
780 }
781 set_value(&table[pos.second], obj);
782 return pair<iterator,bool>(iterator(this, table + pos.second,
783 table + num_buckets, false),
784 true); // true: we did insert
785 }
786 }
787
788 public:
789 // This is the normal insert routine, used by the outside world
790 pair<iterator, bool> insert(const value_type& obj) {
791 resize_delta(1); // adding an object, grow if need be
792 return insert_noresize(obj);
793 }
794
795 // When inserting a lot at a time, we specialize on the type of iterator
796 template <class InputIterator>
797 void insert(InputIterator f, InputIterator l) {
798 // specializes on iterator type
799 insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category());
800 }
801
802 // Iterator supports operator-, resize before inserting
803 template <class ForwardIterator>
804 void insert(ForwardIterator f, ForwardIterator l,
805 STL_NAMESPACE::forward_iterator_tag) {
806 size_type n = STL_NAMESPACE::distance(f, l); // TODO(csilvers): standard?
807 resize_delta(n);
808 for ( ; n > 0; --n, ++f)
809 insert_noresize(*f);
810 }
811
812 // Arbitrary iterator, can't tell how much to resize
813 template <class InputIterator>
814 void insert(InputIterator f, InputIterator l,
815 STL_NAMESPACE::input_iterator_tag) {
816 for ( ; f != l; ++f)
817 insert(*f);
818 }
819
820
821 // DELETION ROUTINES
822 size_type erase(const key_type& key) {
823 // First, double-check we're not trying to erase delval or emptyval
824 assert(!use_empty || !equals(key, get_key(emptyval)));
825 assert(!use_deleted || !equals(key, get_key(delval)));
826 const_iterator pos = find(key); // shrug: shouldn't need to be const
827 if ( pos != end() ) {
828 assert(!test_deleted(pos)); // or find() shouldn't have returned it
829 set_deleted(pos);
830 ++num_deleted;
831 consider_shrink = true; // will think about shrink after next insert
832 return 1; // because we deleted one thing
833 } else {
834 return 0; // because we deleted nothing
835 }
836 }
837
838 // This is really evil: really it should be iterator, not const_iterator.
839 // But...the only reason keys are const is to allow lookup.
840 // Since that's a moot issue for deleted keys, we allow const_iterators
841 void erase(const_iterator pos) {
842 if ( pos == end() ) return; // sanity check
843 if ( set_deleted(pos) ) { // true if object has been newly deleted
844 ++num_deleted;
845 consider_shrink = true; // will think about shrink after next insert
846 }
847 }
848
849 void erase(const_iterator f, const_iterator l) {
850 for ( ; f != l; ++f) {
851 if ( set_deleted(f) ) // should always be true
852 ++num_deleted;
853 }
854 consider_shrink = true; // will think about shrink after next insert
855 }
856
857
858 // COMPARISON
859 bool operator==(const dense_hashtable& ht) const {
860 if (size() != ht.size()) {
861 return false;
862 } else if (this == &ht) {
863 return true;
864 } else {
865 // Iterate through the elements in "this" and see if the
866 // corresponding element is in ht
867 for ( const_iterator it = begin(); it != end(); ++it ) {
868 const_iterator it2 = ht.find(get_key(*it));
869 if ((it2 == ht.end()) || (*it != *it2)) {
870 return false;
871 }
872 }
873 return true;
874 }
875 }
876 bool operator!=(const dense_hashtable& ht) const {
877 return !(*this == ht);
878 }
879
880
881 // I/O
882 // We support reading and writing hashtables to disk. Alas, since
883 // I don't know how to write a hasher or key_equal, you have to make
884 // sure everything but the table is the same. We compact before writing
885 //
886 // NOTE: These functions are currently TODO. They've not been implemented.
887 bool write_metadata(FILE *fp) {
888 squash_deleted(); // so we don't have to worry about delval
889 return false; // TODO
890 }
891
892 bool read_metadata(FILE *fp) {
893 num_deleted = 0; // since we got rid before writing
894 assert(use_empty); // have to set this before calling us
895 if (table) free(table); // we'll make our own
896 // TODO: read magic number
897 // TODO: read num_buckets
898 reset_thresholds();
899 table = (value_type *) malloc(num_buckets * sizeof(*table));
900 assert(table);
901 fill_range_with_empty(table, table + num_buckets);
902 // TODO: read num_elements
903 for ( size_type i = 0; i < num_elements; ++i ) {
904 // TODO: read bucket_num
905 // TODO: set with non-empty, non-deleted value
906 }
907 return false; // TODO
908 }
909
910 // If your keys and values are simple enough, we can write them to
911 // disk for you. "simple enough" means value_type is a POD type
912 // that contains no pointers. However, we don't try to normalize
913 // endianness
914 bool write_nopointer_data(FILE *fp) const {
915 for ( const_iterator it = begin(); it != end(); ++it ) {
916 // TODO: skip empty/deleted values
917 if ( !fwrite(&*it, sizeof(*it), 1, fp) ) return false;
918 }
919 return false;
920 }
921
922 // When reading, we have to override the potential const-ness of *it
923 bool read_nopointer_data(FILE *fp) {
924 for ( iterator it = begin(); it != end(); ++it ) {
925 // TODO: skip empty/deleted values
926 if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
927 return false;
928 }
929 return false;
930 }
931
932 private:
933 // The actual data
934 hasher hash; // required by hashed_associative_container
935 key_equal equals;
936 ExtractKey get_key;
937 size_type num_deleted; // how many occupied buckets are marked deleted
938 bool use_deleted; // false until delval has been set
939 bool use_empty; // you must do this before you start
940 value_type delval; // which key marks deleted entries
941 value_type emptyval; // which key marks unused entries
942 float enlarge_resize_percent; // how full before resize
943 float shrink_resize_percent; // how empty before resize
944 size_type shrink_threshold; // num_buckets * shrink_resize_percent
945 size_type enlarge_threshold; // num_buckets * enlarge_resize_percent
946 value_type *table;
947 size_type num_buckets;
948 size_type num_elements;
949 bool consider_shrink; // true if we should try to shrink before next insert
950
951 void reset_thresholds() {
952 enlarge_threshold = static_cast<size_type>(num_buckets
953 * enlarge_resize_percent);
954 shrink_threshold = static_cast<size_type>(num_buckets
955 * shrink_resize_percent);
956 consider_shrink = false; // whatever caused us to reset already considered
957 }
958 };
959
960 // We need a global swap as well
961 template <class V, class K, class HF, class ExK, class EqK, class A>
962 inline void swap(dense_hashtable<V,K,HF,ExK,EqK,A> &x,
963 dense_hashtable<V,K,HF,ExK,EqK,A> &y) {
964 x.swap(y);
965 }
966
967 #undef JUMP_
968
969 template <class V, class K, class HF, class ExK, class EqK, class A>
970 const typename dense_hashtable<V,K,HF,ExK,EqK,A>::size_type
971 dense_hashtable<V,K,HF,ExK,EqK,A>::ILLEGAL_BUCKET;
972
973 // How full we let the table get before we resize. Knuth says .8 is
974 // good -- higher causes us to probe too much, though saves memory
975 template <class V, class K, class HF, class ExK, class EqK, class A>
976 const float dense_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT = 0.5f;
977
978 // How empty we let the table get before we resize lower.
979 // It should be less than OCCUPANCY_FLT / 2 or we thrash resizing
980 template <class V, class K, class HF, class ExK, class EqK, class A>
981 const float dense_hashtable<V,K,HF,ExK,EqK,A>::HT_EMPTY_FLT = 0.4f *
982 dense_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT;
983
984 _END_GOOGLE_NAMESPACE_
985
986 #endif /* _DENSEHASHTABLE_H_ */

  ViewVC Help
Powered by ViewVC 1.1.22