1 |
// Copyright (c) 2005, Google Inc. |
2 |
// All rights reserved. |
3 |
// |
4 |
// Redistribution and use in source and binary forms, with or without |
5 |
// modification, are permitted provided that the following conditions are |
6 |
// met: |
7 |
// |
8 |
// * Redistributions of source code must retain the above copyright |
9 |
// notice, this list of conditions and the following disclaimer. |
10 |
// * Redistributions in binary form must reproduce the above |
11 |
// copyright notice, this list of conditions and the following disclaimer |
12 |
// in the documentation and/or other materials provided with the |
13 |
// distribution. |
14 |
// * Neither the name of Google Inc. nor the names of its |
15 |
// contributors may be used to endorse or promote products derived from |
16 |
// this software without specific prior written permission. |
17 |
// |
18 |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 |
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 |
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 |
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 |
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 |
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 |
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 |
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 |
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 |
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 |
|
30 |
// --- |
31 |
// Author: Craig Silverstein |
32 |
// |
33 |
// This is just a very thin wrapper over sparsehashtable.h, just |
34 |
// like sgi stl's stl_hash_set is a very thin wrapper over |
35 |
// stl_hashtable. The major thing we define is operator[], because |
36 |
// we have a concept of a data_type which stl_hashtable doesn't |
37 |
// (it only has a key and a value). |
38 |
// |
39 |
// This is more different from sparse_hash_map than you might think, |
40 |
// because all iterators for sets are const (you obviously can't |
41 |
// change the key, and for sets there is no value). |
42 |
// |
43 |
// We adhere mostly to the STL semantics for hash-set. One important |
44 |
// exception is that insert() invalidates iterators entirely. On the |
45 |
// plus side, though, delete() doesn't invalidate iterators at all, or |
46 |
// even change the ordering of elements. |
47 |
// |
48 |
// Here are a few "power user" tips: |
49 |
// |
50 |
// 1) set_deleted_key(): |
51 |
// Unlike STL's hash_map, if you want to use erase() you |
52 |
// must call set_deleted_key() after construction. |
53 |
// |
54 |
// 2) resize(0): |
55 |
// When an item is deleted, its memory isn't freed right |
56 |
// away. This allows you to iterate over a hashtable, |
57 |
// and call erase(), without invalidating the iterator. |
58 |
// To force the memory to be freed, call resize(0). |
59 |
// |
60 |
// 3) set_resizing_parameters(0.0, 0.8): |
61 |
// Setting the shrink_resize_percent to 0.0 guarantees |
62 |
// that the hash table will never shrink. |
63 |
// |
64 |
// Guide to what kind of hash_set to use: |
65 |
// (1) dense_hash_set: fastest, uses the most memory |
66 |
// (2) sparse_hash_set: slowest, uses the least memory |
67 |
// (3) hash_set (STL): in the middle |
68 |
// Typically I use sparse_hash_set when I care about space and/or when |
69 |
// I need to save the hashtable on disk. I use hash_set otherwise. I |
70 |
// don't personally use dense_hash_set ever; the only use of |
71 |
// dense_hash_set I know of is to work around malloc() bugs in some |
72 |
// systems (dense_hash_set has a particularly simple allocation scheme). |
73 |
// |
74 |
// - dense_hash_set has, typically, a factor of 2 memory overhead (if your |
75 |
// data takes up X bytes, the hash_set uses X more bytes in overhead). |
76 |
// - sparse_hash_set has about 2 bits overhead per entry. |
77 |
// - sparse_hash_map can be 3-7 times slower than the others for lookup and, |
78 |
// especially, inserts. See time_hash_map.cc for details. |
79 |
// |
80 |
// See /usr/(local/)?doc/sparsehash-0.1/sparse_hash_set.html |
81 |
// for information about how to use this class. |
82 |
|
83 |
#ifndef _SPARSE_HASH_SET_H_ |
84 |
#define _SPARSE_HASH_SET_H_ |
85 |
|
86 |
#include <google/sparsehash/sparseconfig.h> |
87 |
#include <stdio.h> // for FILE * in read()/write() |
88 |
#include <algorithm> // for the default template args |
89 |
#include <functional> // for equal_to |
90 |
#include <memory> // for alloc<> |
91 |
#include <utility> // for pair<> |
92 |
#include HASH_FUN_H // defined in config.h |
93 |
#include <google/sparsehash/sparsehashtable.h> |
94 |
|
95 |
_START_GOOGLE_NAMESPACE_ |
96 |
|
97 |
using STL_NAMESPACE::pair; |
98 |
|
99 |
template <class Value, |
100 |
class HashFcn = SPARSEHASH_HASH<Value>, // defined in sparseconfig.h |
101 |
class EqualKey = STL_NAMESPACE::equal_to<Value>, |
102 |
class Alloc = STL_NAMESPACE::allocator<Value> > |
103 |
class sparse_hash_set { |
104 |
private: |
105 |
// Apparently identity is not stl-standard, so we define our own |
106 |
struct Identity { |
107 |
Value& operator()(Value& v) const { return v; } |
108 |
const Value& operator()(const Value& v) const { return v; } |
109 |
}; |
110 |
|
111 |
// The actual data |
112 |
typedef sparse_hashtable<Value, Value, HashFcn, Identity, EqualKey, Alloc> ht; |
113 |
ht rep; |
114 |
|
115 |
public: |
116 |
typedef typename ht::key_type key_type; |
117 |
typedef typename ht::value_type value_type; |
118 |
typedef typename ht::hasher hasher; |
119 |
typedef typename ht::key_equal key_equal; |
120 |
|
121 |
typedef typename ht::size_type size_type; |
122 |
typedef typename ht::difference_type difference_type; |
123 |
typedef typename ht::const_pointer pointer; |
124 |
typedef typename ht::const_pointer const_pointer; |
125 |
typedef typename ht::const_reference reference; |
126 |
typedef typename ht::const_reference const_reference; |
127 |
|
128 |
typedef typename ht::const_iterator iterator; |
129 |
typedef typename ht::const_iterator const_iterator; |
130 |
|
131 |
|
132 |
// Iterator functions -- recall all iterators are const |
133 |
iterator begin() const { return rep.begin(); } |
134 |
iterator end() const { return rep.end(); } |
135 |
|
136 |
|
137 |
// Accessor functions |
138 |
hasher hash_funct() const { return rep.hash_funct(); } |
139 |
key_equal key_eq() const { return rep.key_eq(); } |
140 |
|
141 |
|
142 |
// Constructors |
143 |
explicit sparse_hash_set(size_type expected_max_items_in_table = 0, |
144 |
const hasher& hf = hasher(), |
145 |
const key_equal& eql = key_equal()) |
146 |
: rep(expected_max_items_in_table, hf, eql) { } |
147 |
|
148 |
template <class InputIterator> |
149 |
sparse_hash_set(InputIterator f, InputIterator l, |
150 |
size_type expected_max_items_in_table = 0, |
151 |
const hasher& hf = hasher(), |
152 |
const key_equal& eql = key_equal()) |
153 |
: rep(expected_max_items_in_table, hf, eql) { |
154 |
rep.insert(f, l); |
155 |
} |
156 |
// We use the default copy constructor |
157 |
// We use the default operator=() |
158 |
// We use the default destructor |
159 |
|
160 |
void clear() { rep.clear(); } |
161 |
void swap(sparse_hash_set& hs) { rep.swap(hs.rep); } |
162 |
|
163 |
|
164 |
// Functions concerning size |
165 |
size_type size() const { return rep.size(); } |
166 |
size_type max_size() const { return rep.max_size(); } |
167 |
bool empty() const { return rep.empty(); } |
168 |
size_type bucket_count() const { return rep.bucket_count(); } |
169 |
size_type max_bucket_count() const { return rep.max_bucket_count(); } |
170 |
|
171 |
void resize(size_type hint) { rep.resize(hint); } |
172 |
|
173 |
void set_resizing_parameters(float shrink, float grow) { |
174 |
return rep.set_resizing_parameters(shrink, grow); |
175 |
} |
176 |
|
177 |
// Lookup routines |
178 |
iterator find(const key_type& key) const { return rep.find(key); } |
179 |
|
180 |
size_type count(const key_type& key) const { return rep.count(key); } |
181 |
|
182 |
pair<iterator, iterator> equal_range(const key_type& key) const { |
183 |
return rep.equal_range(key); |
184 |
} |
185 |
|
186 |
// Insertion routines |
187 |
pair<iterator, bool> insert(const value_type& obj) { |
188 |
pair<typename ht::iterator, bool> p = rep.insert(obj); |
189 |
return pair<iterator, bool>(p.first, p.second); // const to non-const |
190 |
} |
191 |
template <class InputIterator> |
192 |
void insert(InputIterator f, InputIterator l) { rep.insert(f, l); } |
193 |
void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } |
194 |
// required for std::insert_iterator; the passed-in iterator is ignored |
195 |
iterator insert(iterator, const value_type& obj) { return insert(obj).first; } |
196 |
|
197 |
|
198 |
// Deletion routines |
199 |
// THESE ARE NON-STANDARD! I make you specify an "impossible" key |
200 |
// value to identify deleted buckets. You can change the key as |
201 |
// time goes on, or get rid of it entirely to be insert-only. |
202 |
void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } |
203 |
void clear_deleted_key() { rep.clear_deleted_key(); } |
204 |
|
205 |
// These are standard |
206 |
size_type erase(const key_type& key) { return rep.erase(key); } |
207 |
void erase(iterator it) { rep.erase(it); } |
208 |
void erase(iterator f, iterator l) { rep.erase(f, l); } |
209 |
|
210 |
|
211 |
// Comparison |
212 |
bool operator==(const sparse_hash_set& hs) const { return rep == hs.rep; } |
213 |
bool operator!=(const sparse_hash_set& hs) const { return rep != hs.rep; } |
214 |
|
215 |
|
216 |
// I/O -- this is an add-on for writing metainformation to disk |
217 |
bool write_metadata(FILE *fp) { return rep.write_metadata(fp); } |
218 |
bool read_metadata(FILE *fp) { return rep.read_metadata(fp); } |
219 |
bool write_nopointer_data(FILE *fp) { return rep.write_nopointer_data(fp); } |
220 |
bool read_nopointer_data(FILE *fp) { return rep.read_nopointer_data(fp); } |
221 |
}; |
222 |
|
223 |
template <class Val, class HashFcn, class EqualKey, class Alloc> |
224 |
inline void swap(sparse_hash_set<Val, HashFcn, EqualKey, Alloc>& hs1, |
225 |
sparse_hash_set<Val, HashFcn, EqualKey, Alloc>& hs2) { |
226 |
hs1.swap(hs2); |
227 |
} |
228 |
|
229 |
_END_GOOGLE_NAMESPACE_ |
230 |
|
231 |
#endif /* _SPARSE_HASH_SET_H_ */ |