1 |
// Copyright (c) 2005, Google Inc. |
2 |
// All rights reserved. |
3 |
// |
4 |
// Redistribution and use in source and binary forms, with or without |
5 |
// modification, are permitted provided that the following conditions are |
6 |
// met: |
7 |
// |
8 |
// * Redistributions of source code must retain the above copyright |
9 |
// notice, this list of conditions and the following disclaimer. |
10 |
// * Redistributions in binary form must reproduce the above |
11 |
// copyright notice, this list of conditions and the following disclaimer |
12 |
// in the documentation and/or other materials provided with the |
13 |
// distribution. |
14 |
// * Neither the name of Google Inc. nor the names of its |
15 |
// contributors may be used to endorse or promote products derived from |
16 |
// this software without specific prior written permission. |
17 |
// |
18 |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 |
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 |
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 |
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 |
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 |
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 |
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 |
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 |
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 |
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 |
|
30 |
// --- |
31 |
// Author: Craig Silverstein |
32 |
// |
33 |
// A sparsetable is a random container that implements a sparse array, |
34 |
// that is, an array that uses very little memory to store unassigned |
35 |
// indices (in this case, between 1-2 bits per unassigned index). For |
36 |
// instance, if you allocate an array of size 5 and assign a[2] = <big |
37 |
// struct>, then a[2] will take up a lot of memory but a[0], a[1], |
38 |
// a[3], and a[4] will not. Array elements that have a value are |
39 |
// called "assigned". Array elements that have no value yet, or have |
40 |
// had their value cleared using erase() or clear(), are called |
41 |
// "unassigned". |
42 |
// |
43 |
// Unassigned values seem to have the default value of T (see below). |
44 |
// Nevertheless, there is a difference between an unassigned index and |
45 |
// one explicitly assigned the value of T(). The latter is considered |
46 |
// assigned. |
47 |
// |
48 |
// Access to an array element is constant time, as is insertion and |
49 |
// deletion. Insertion and deletion may be fairly slow, however: |
50 |
// because of this container's memory economy, each insert and delete |
51 |
// causes a memory reallocation. |
52 |
// |
53 |
// See /usr/(local/)?doc/sparsehash-0.1/sparsetable.html |
54 |
// for information about how to use this class. |
55 |
|
56 |
#ifndef _SPARSETABLE_H_ |
57 |
#define _SPARSETABLE_H_ |
58 |
|
59 |
#include <google/sparsehash/sparseconfig.h> |
60 |
#include <stdlib.h> // for malloc/free |
61 |
#include <stdio.h> // to read/write tables |
62 |
#ifdef HAVE_STDINT_H |
63 |
#include <stdint.h> // the normal place uint16_t is defined |
64 |
#endif |
65 |
#ifdef HAVE_SYS_TYPES_H |
66 |
#include <sys/types.h> // the normal place u_int16_t is defined |
67 |
#endif |
68 |
#ifdef HAVE_INTTYPES_H |
69 |
#include <inttypes.h> // a third place for uint16_t or u_int16_t |
70 |
#endif |
71 |
#include <assert.h> // for bounds checking |
72 |
#include <iterator> // to define reverse_iterator for me |
73 |
#include <algorithm> // equal, lexicographical_compare, swap,... |
74 |
#include <memory> // uninitialized_copy |
75 |
#include <vector> // a sparsetable is a vector of groups |
76 |
#include <google/type_traits.h> // for true_type, integral_constant, etc. |
77 |
|
78 |
#if STDC_HEADERS |
79 |
#include <string.h> // for memcpy |
80 |
#else |
81 |
#if !HAVE_MEMCPY |
82 |
#define memcpy(d, s, n) bcopy ((s), (d), (n)) |
83 |
#endif |
84 |
#endif |
85 |
|
86 |
_START_GOOGLE_NAMESPACE_ |
87 |
|
88 |
#ifndef HAVE_U_INT16_T |
89 |
# if defined HAVE_UINT16_T |
90 |
typedef uint16_t u_int16_t; // true on solaris, possibly other C99 libc's |
91 |
# elif defined HAVE___UINT16 |
92 |
typedef __int16 int16_t; // true on vc++7 |
93 |
typedef unsigned __int16 u_int16_t; |
94 |
# else |
95 |
// Cannot find a 16-bit integer type. Hoping for the best with "short"... |
96 |
typedef short int int16_t; |
97 |
typedef unsigned short int u_int16_t; |
98 |
# endif |
99 |
#endif |
100 |
|
101 |
using STL_NAMESPACE::vector; |
102 |
using STL_NAMESPACE::uninitialized_copy; |
103 |
|
104 |
// The smaller this is, the faster lookup is (because the group bitmap is |
105 |
// smaller) and the faster insert is, because there's less to move. |
106 |
// On the other hand, there are more groups. Since group::size_type is |
107 |
// a short, this number should be of the form 32*x + 16 to avoid waste. |
108 |
static const u_int16_t DEFAULT_SPARSEGROUP_SIZE = 48; // fits in 1.5 words |
109 |
|
110 |
|
111 |
// A NOTE ON ASSIGNING: |
112 |
// A sparse table does not actually allocate memory for entries |
113 |
// that are not filled. Because of this, it becomes complicated |
114 |
// to have a non-const iterator: we don't know, if the iterator points |
115 |
// to a not-filled bucket, whether you plan to fill it with something |
116 |
// or whether you plan to read its value (in which case you'll get |
117 |
// the default bucket value). Therefore, while we can define const |
118 |
// operations in a pretty 'normal' way, for non-const operations, we |
119 |
// define something that returns a helper object with operator= and |
120 |
// operator& that allocate a bucket lazily. We use this for table[] |
121 |
// and also for regular table iterators. |
122 |
|
123 |
template <class tabletype> |
124 |
class table_element_adaptor { |
125 |
public: |
126 |
typedef typename tabletype::value_type value_type; |
127 |
typedef typename tabletype::size_type size_type; |
128 |
typedef typename tabletype::reference reference; |
129 |
typedef typename tabletype::pointer pointer; |
130 |
|
131 |
table_element_adaptor(tabletype *tbl, size_type p) |
132 |
: table(tbl), pos(p) { } |
133 |
table_element_adaptor& operator= (const value_type &val) { |
134 |
table->set(pos, val); |
135 |
return *this; |
136 |
} |
137 |
operator value_type() { return table->get(pos); } // we look like a value |
138 |
pointer operator& () { return &table->mutating_get(pos); } |
139 |
|
140 |
private: |
141 |
tabletype* table; |
142 |
size_type pos; |
143 |
}; |
144 |
|
145 |
// Our iterator as simple as iterators can be: basically it's just |
146 |
// the index into our table. Dereference, the only complicated |
147 |
// thing, we punt to the table class. This just goes to show how |
148 |
// much machinery STL requires to do even the most trivial tasks. |
149 |
// |
150 |
// By templatizing over tabletype, we have one iterator type which |
151 |
// we can use for both sparsetables and sparsebins. In fact it |
152 |
// works on any class that allows size() and operator[] (eg vector), |
153 |
// as long as it does the standard STL typedefs too (eg value_type). |
154 |
|
155 |
template <class tabletype> |
156 |
class table_iterator { |
157 |
public: |
158 |
typedef table_iterator iterator; |
159 |
|
160 |
typedef STL_NAMESPACE::random_access_iterator_tag iterator_category; |
161 |
typedef typename tabletype::value_type value_type; |
162 |
typedef typename tabletype::difference_type difference_type; |
163 |
typedef typename tabletype::size_type size_type; |
164 |
typedef table_element_adaptor<tabletype> reference; |
165 |
typedef table_element_adaptor<tabletype>* pointer; |
166 |
|
167 |
// The "real" constructor |
168 |
table_iterator(tabletype *tbl, size_type p) |
169 |
: table(tbl), pos(p) { } |
170 |
// The default constructor, used when I define vars of type table::iterator |
171 |
table_iterator() : table(NULL), pos(0) { } |
172 |
// The copy constructor, for when I say table::iterator foo = tbl.begin() |
173 |
// The default destructor is fine; we don't define one |
174 |
// The default operator= is fine; we don't define one |
175 |
|
176 |
// The main thing our iterator does is dereference. If the table entry |
177 |
// we point to is empty, we return the default value type. |
178 |
// This is the big different function from the const iterator. |
179 |
reference operator*() { |
180 |
return table_element_adaptor<tabletype>(table, pos); |
181 |
} |
182 |
pointer operator->() { return &(operator*()); } |
183 |
|
184 |
// Helper function to assert things are ok; eg pos is still in range |
185 |
void check() const { |
186 |
assert(table); |
187 |
assert(pos <= table->size()); |
188 |
} |
189 |
|
190 |
// Arithmetic: we just do arithmetic on pos. We don't even need to |
191 |
// do bounds checking, since STL doesn't consider that it's job. :-) |
192 |
iterator& operator+=(size_type t) { pos += t; check(); return *this; } |
193 |
iterator& operator-=(size_type t) { pos -= t; check(); return *this; } |
194 |
iterator& operator++() { ++pos; check(); return *this; } |
195 |
iterator& operator--() { --pos; check(); return *this; } |
196 |
iterator operator++(int) { iterator tmp(*this); // for x++ |
197 |
++pos; check(); return tmp; } |
198 |
iterator operator--(int) { iterator tmp(*this); // for x-- |
199 |
--pos; check(); return tmp; } |
200 |
iterator operator+(difference_type i) const { iterator tmp(*this); |
201 |
tmp += i; return tmp; } |
202 |
iterator operator-(difference_type i) const { iterator tmp(*this); |
203 |
tmp -= i; return tmp; } |
204 |
difference_type operator-(iterator it) const { // for "x = it2 - it" |
205 |
assert(table == it.table); |
206 |
return pos - it.pos; |
207 |
} |
208 |
reference operator[](difference_type n) const { |
209 |
return *(*this + n); // simple though not totally efficient |
210 |
} |
211 |
|
212 |
// Comparisons. |
213 |
bool operator==(const iterator& it) const { |
214 |
return table == it.table && pos == it.pos; |
215 |
} |
216 |
bool operator<(const iterator& it) const { |
217 |
assert(table == it.table); // life is bad bad bad otherwise |
218 |
return pos < it.pos; |
219 |
} |
220 |
bool operator!=(const iterator& it) const { return !(*this == it); } |
221 |
bool operator<=(const iterator& it) const { return !(it < *this); } |
222 |
bool operator>(const iterator& it) const { return it < *this; } |
223 |
bool operator>=(const iterator& it) const { return !(*this < it); } |
224 |
|
225 |
// Here's the info we actually need to be an iterator |
226 |
tabletype *table; // so we can dereference and bounds-check |
227 |
size_type pos; // index into the table |
228 |
}; |
229 |
|
230 |
// support for "3 + iterator" has to be defined outside the class, alas |
231 |
template<class T> |
232 |
table_iterator<T> operator+(typename table_iterator<T>::difference_type i, |
233 |
table_iterator<T> it) { |
234 |
return it + i; // so people can say it2 = 3 + it |
235 |
} |
236 |
|
237 |
template <class tabletype> |
238 |
class const_table_iterator { |
239 |
public: |
240 |
typedef table_iterator<tabletype> iterator; |
241 |
typedef const_table_iterator const_iterator; |
242 |
|
243 |
typedef STL_NAMESPACE::random_access_iterator_tag iterator_category; |
244 |
typedef typename tabletype::value_type value_type; |
245 |
typedef typename tabletype::difference_type difference_type; |
246 |
typedef typename tabletype::size_type size_type; |
247 |
typedef typename tabletype::const_reference reference; // we're const-only |
248 |
typedef typename tabletype::const_pointer pointer; |
249 |
|
250 |
// The "real" constructor |
251 |
const_table_iterator(const tabletype *tbl, size_type p) |
252 |
: table(tbl), pos(p) { } |
253 |
// The default constructor, used when I define vars of type table::iterator |
254 |
const_table_iterator() : table(NULL), pos(0) { } |
255 |
// The copy constructor, for when I say table::iterator foo = tbl.begin() |
256 |
// Also converts normal iterators to const iterators |
257 |
const_table_iterator(const iterator &from) |
258 |
: table(from.table), pos(from.pos) { } |
259 |
// The default destructor is fine; we don't define one |
260 |
// The default operator= is fine; we don't define one |
261 |
|
262 |
// The main thing our iterator does is dereference. If the table entry |
263 |
// we point to is empty, we return the default value type. |
264 |
reference operator*() const { return (*table)[pos]; } |
265 |
pointer operator->() const { return &(operator*()); } |
266 |
|
267 |
// Helper function to assert things are ok; eg pos is still in range |
268 |
void check() const { |
269 |
assert(table); |
270 |
assert(pos <= table->size()); |
271 |
} |
272 |
|
273 |
// Arithmetic: we just do arithmetic on pos. We don't even need to |
274 |
// do bounds checking, since STL doesn't consider that it's job. :-) |
275 |
const_iterator& operator+=(size_type t) { pos += t; check(); return *this; } |
276 |
const_iterator& operator-=(size_type t) { pos -= t; check(); return *this; } |
277 |
const_iterator& operator++() { ++pos; check(); return *this; } |
278 |
const_iterator& operator--() { --pos; check(); return *this; } |
279 |
const_iterator operator++(int) { const_iterator tmp(*this); // for x++ |
280 |
++pos; check(); return tmp; } |
281 |
const_iterator operator--(int) { const_iterator tmp(*this); // for x-- |
282 |
--pos; check(); return tmp; } |
283 |
const_iterator operator+(difference_type i) const { const_iterator tmp(*this); |
284 |
tmp += i; return tmp; } |
285 |
const_iterator operator-(difference_type i) const { const_iterator tmp(*this); |
286 |
tmp -= i; return tmp; } |
287 |
difference_type operator-(const_iterator it) const { // for "x = it2 - it" |
288 |
assert(table == it.table); |
289 |
return pos - it.pos; |
290 |
} |
291 |
reference operator[](difference_type n) const { |
292 |
return *(*this + n); // simple though not totally efficient |
293 |
} |
294 |
|
295 |
// Comparisons. |
296 |
bool operator==(const const_iterator& it) const { |
297 |
return table == it.table && pos == it.pos; |
298 |
} |
299 |
bool operator<(const const_iterator& it) const { |
300 |
assert(table == it.table); // life is bad bad bad otherwise |
301 |
return pos < it.pos; |
302 |
} |
303 |
bool operator!=(const const_iterator& it) const { return !(*this == it); } |
304 |
bool operator<=(const const_iterator& it) const { return !(it < *this); } |
305 |
bool operator>(const const_iterator& it) const { return it < *this; } |
306 |
bool operator>=(const const_iterator& it) const { return !(*this < it); } |
307 |
|
308 |
// Here's the info we actually need to be an iterator |
309 |
const tabletype *table; // so we can dereference and bounds-check |
310 |
size_type pos; // index into the table |
311 |
}; |
312 |
|
313 |
// support for "3 + iterator" has to be defined outside the class, alas |
314 |
template<class T> |
315 |
const_table_iterator<T> operator+(typename |
316 |
const_table_iterator<T>::difference_type i, |
317 |
const_table_iterator<T> it) { |
318 |
return it + i; // so people can say it2 = 3 + it |
319 |
} |
320 |
|
321 |
|
322 |
// --------------------------------------------------------------------------- |
323 |
|
324 |
|
325 |
/* |
326 |
// This is a 2-D iterator. You specify a begin and end over a list |
327 |
// of *containers*. We iterate over each container by iterating over |
328 |
// it. It's actually simple: |
329 |
// VECTOR.begin() VECTOR[0].begin() --------> VECTOR[0].end() ---, |
330 |
// | ________________________________________________/ |
331 |
// | \_> VECTOR[1].begin() --------> VECTOR[1].end() -, |
332 |
// | ___________________________________________________/ |
333 |
// v \_> ...... |
334 |
// VECTOR.end() |
335 |
// |
336 |
// It's impossible to do random access on one of these things in constant |
337 |
// time, so it's just a bidirectional iterator. |
338 |
// |
339 |
// Unfortunately, because we need to use this for a non-empty iterator, |
340 |
// we use nonempty_begin() and nonempty_end() instead of begin() and end() |
341 |
// (though only going across, not down). |
342 |
*/ |
343 |
|
344 |
#define TWOD_BEGIN_ nonempty_begin |
345 |
#define TWOD_END_ nonempty_end |
346 |
#define TWOD_ITER_ nonempty_iterator |
347 |
#define TWOD_CONST_ITER_ const_nonempty_iterator |
348 |
|
349 |
template <class containertype> |
350 |
class two_d_iterator { |
351 |
public: |
352 |
typedef two_d_iterator iterator; |
353 |
|
354 |
typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category; |
355 |
// apparently some versions of VC++ have trouble with two ::'s in a typename |
356 |
typedef typename containertype::value_type _tmp_vt; |
357 |
typedef typename _tmp_vt::value_type value_type; |
358 |
typedef typename _tmp_vt::difference_type difference_type; |
359 |
typedef typename _tmp_vt::reference reference; |
360 |
typedef typename _tmp_vt::pointer pointer; |
361 |
|
362 |
// The "real" constructor. begin and end specify how many rows we have |
363 |
// (in the diagram above); we always iterate over each row completely. |
364 |
two_d_iterator(typename containertype::iterator begin, |
365 |
typename containertype::iterator end, |
366 |
typename containertype::iterator curr) |
367 |
: row_begin(begin), row_end(end), row_current(curr), col_current() { |
368 |
if ( row_current != row_end ) { |
369 |
col_current = row_current->TWOD_BEGIN_(); |
370 |
advance_past_end(); // in case cur->begin() == cur->end() |
371 |
} |
372 |
} |
373 |
// If you want to start at an arbitrary place, you can, I guess |
374 |
two_d_iterator(typename containertype::iterator begin, |
375 |
typename containertype::iterator end, |
376 |
typename containertype::iterator curr, |
377 |
typename containertype::value_type::TWOD_ITER_ col) |
378 |
: row_begin(begin), row_end(end), row_current(curr), col_current(col) { |
379 |
advance_past_end(); // in case cur->begin() == cur->end() |
380 |
} |
381 |
// The default constructor, used when I define vars of type table::iterator |
382 |
two_d_iterator() : row_begin(), row_end(), row_current(), col_current() { } |
383 |
// The default destructor is fine; we don't define one |
384 |
// The default operator= is fine; we don't define one |
385 |
|
386 |
// Happy dereferencer |
387 |
reference operator*() const { return *col_current; } |
388 |
pointer operator->() const { return &(operator*()); } |
389 |
|
390 |
// Arithmetic: we just do arithmetic on pos. We don't even need to |
391 |
// do bounds checking, since STL doesn't consider that it's job. :-) |
392 |
// NOTE: this is not amortized constant time! What do we do about it? |
393 |
void advance_past_end() { // used when col_current points to end() |
394 |
while ( col_current == row_current->TWOD_END_() ) { // end of current row |
395 |
++row_current; // go to beginning of next |
396 |
if ( row_current != row_end ) // col is irrelevant at end |
397 |
col_current = row_current->TWOD_BEGIN_(); |
398 |
else |
399 |
break; // don't go past row_end |
400 |
} |
401 |
} |
402 |
|
403 |
iterator& operator++() { |
404 |
assert(row_current != row_end); // how to ++ from there? |
405 |
++col_current; |
406 |
advance_past_end(); // in case col_current is at end() |
407 |
return *this; |
408 |
} |
409 |
iterator& operator--() { |
410 |
while ( row_current == row_end || |
411 |
col_current == row_current->TWOD_BEGIN_() ) { |
412 |
assert(row_current != row_begin); |
413 |
--row_current; |
414 |
col_current = row_current->TWOD_END_(); // this is 1 too far |
415 |
} |
416 |
--col_current; |
417 |
return *this; |
418 |
} |
419 |
iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } |
420 |
iterator operator--(int) { iterator tmp(*this); --*this; return tmp; } |
421 |
|
422 |
|
423 |
// Comparisons. |
424 |
bool operator==(const iterator& it) const { |
425 |
return ( row_begin == it.row_begin && |
426 |
row_end == it.row_end && |
427 |
row_current == it.row_current && |
428 |
(row_current == row_end || col_current == it.col_current) ); |
429 |
} |
430 |
bool operator!=(const iterator& it) const { return !(*this == it); } |
431 |
|
432 |
|
433 |
// Here's the info we actually need to be an iterator |
434 |
// These need to be public so we convert from iterator to const_iterator |
435 |
typename containertype::iterator row_begin, row_end, row_current; |
436 |
typename containertype::value_type::TWOD_ITER_ col_current; |
437 |
}; |
438 |
|
439 |
// The same thing again, but this time const. :-( |
440 |
template <class containertype> |
441 |
class const_two_d_iterator { |
442 |
public: |
443 |
typedef const_two_d_iterator iterator; |
444 |
|
445 |
typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category; |
446 |
// apparently some versions of VC++ have trouble with two ::'s in a typename |
447 |
typedef typename containertype::value_type _tmp_vt; |
448 |
typedef typename _tmp_vt::value_type value_type; |
449 |
typedef typename _tmp_vt::difference_type difference_type; |
450 |
typedef typename _tmp_vt::const_reference reference; |
451 |
typedef typename _tmp_vt::const_pointer pointer; |
452 |
|
453 |
const_two_d_iterator(typename containertype::const_iterator begin, |
454 |
typename containertype::const_iterator end, |
455 |
typename containertype::const_iterator curr) |
456 |
: row_begin(begin), row_end(end), row_current(curr), col_current() { |
457 |
if ( curr != end ) { |
458 |
col_current = curr->TWOD_BEGIN_(); |
459 |
advance_past_end(); // in case cur->begin() == cur->end() |
460 |
} |
461 |
} |
462 |
const_two_d_iterator(typename containertype::const_iterator begin, |
463 |
typename containertype::const_iterator end, |
464 |
typename containertype::const_iterator curr, |
465 |
typename containertype::value_type::TWOD_CONST_ITER_ col) |
466 |
: row_begin(begin), row_end(end), row_current(curr), col_current(col) { |
467 |
advance_past_end(); // in case cur->begin() == cur->end() |
468 |
} |
469 |
const_two_d_iterator() |
470 |
: row_begin(), row_end(), row_current(), col_current() { |
471 |
} |
472 |
// Need this explicitly so we can convert normal iterators to const iterators |
473 |
const_two_d_iterator(const two_d_iterator<containertype>& it) : |
474 |
row_begin(it.row_begin), row_end(it.row_end), row_current(it.row_current), |
475 |
col_current(it.col_current) { } |
476 |
|
477 |
typename containertype::const_iterator row_begin, row_end, row_current; |
478 |
typename containertype::value_type::TWOD_CONST_ITER_ col_current; |
479 |
|
480 |
|
481 |
// EVERYTHING FROM HERE DOWN IS THE SAME AS THE NON-CONST ITERATOR |
482 |
reference operator*() const { return *col_current; } |
483 |
pointer operator->() const { return &(operator*()); } |
484 |
|
485 |
void advance_past_end() { // used when col_current points to end() |
486 |
while ( col_current == row_current->TWOD_END_() ) { // end of current row |
487 |
++row_current; // go to beginning of next |
488 |
if ( row_current != row_end ) // col is irrelevant at end |
489 |
col_current = row_current->TWOD_BEGIN_(); |
490 |
else |
491 |
break; // don't go past row_end |
492 |
} |
493 |
} |
494 |
iterator& operator++() { |
495 |
assert(row_current != row_end); // how to ++ from there? |
496 |
++col_current; |
497 |
advance_past_end(); // in case col_current is at end() |
498 |
return *this; |
499 |
} |
500 |
iterator& operator--() { |
501 |
while ( row_current == row_end || |
502 |
col_current == row_current->TWOD_BEGIN_() ) { |
503 |
assert(row_current != row_begin); |
504 |
--row_current; |
505 |
col_current = row_current->TWOD_END_(); // this is 1 too far |
506 |
} |
507 |
--col_current; |
508 |
return *this; |
509 |
} |
510 |
iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } |
511 |
iterator operator--(int) { iterator tmp(*this); --*this; return tmp; } |
512 |
|
513 |
bool operator==(const iterator& it) const { |
514 |
return ( row_begin == it.row_begin && |
515 |
row_end == it.row_end && |
516 |
row_current == it.row_current && |
517 |
(row_current == row_end || col_current == it.col_current) ); |
518 |
} |
519 |
bool operator!=(const iterator& it) const { return !(*this == it); } |
520 |
}; |
521 |
|
522 |
// We provide yet another version, to be as frugal with memory as |
523 |
// possible. This one frees each block of memory as it finishes |
524 |
// iterating over it. By the end, the entire table is freed. |
525 |
// For understandable reasons, you can only iterate over it once, |
526 |
// which is why it's an input iterator |
527 |
template <class containertype> |
528 |
class destructive_two_d_iterator { |
529 |
public: |
530 |
typedef destructive_two_d_iterator iterator; |
531 |
|
532 |
typedef STL_NAMESPACE::input_iterator_tag iterator_category; |
533 |
// apparently some versions of VC++ have trouble with two ::'s in a typename |
534 |
typedef typename containertype::value_type _tmp_vt; |
535 |
typedef typename _tmp_vt::value_type value_type; |
536 |
typedef typename _tmp_vt::difference_type difference_type; |
537 |
typedef typename _tmp_vt::reference reference; |
538 |
typedef typename _tmp_vt::pointer pointer; |
539 |
|
540 |
destructive_two_d_iterator(typename containertype::iterator begin, |
541 |
typename containertype::iterator end, |
542 |
typename containertype::iterator curr) |
543 |
: row_begin(begin), row_end(end), row_current(curr), col_current() { |
544 |
if ( curr != end ) { |
545 |
col_current = curr->TWOD_BEGIN_(); |
546 |
advance_past_end(); // in case cur->begin() == cur->end() |
547 |
} |
548 |
} |
549 |
destructive_two_d_iterator(typename containertype::iterator begin, |
550 |
typename containertype::iterator end, |
551 |
typename containertype::iterator curr, |
552 |
typename containertype::value_type::TWOD_ITER_ col) |
553 |
: row_begin(begin), row_end(end), row_current(curr), col_current(col) { |
554 |
advance_past_end(); // in case cur->begin() == cur->end() |
555 |
} |
556 |
destructive_two_d_iterator() |
557 |
: row_begin(), row_end(), row_current(), col_current() { |
558 |
} |
559 |
|
560 |
typename containertype::iterator row_begin, row_end, row_current; |
561 |
typename containertype::value_type::TWOD_ITER_ col_current; |
562 |
|
563 |
// This is the part that destroys |
564 |
void advance_past_end() { // used when col_current points to end() |
565 |
while ( col_current == row_current->TWOD_END_() ) { // end of current row |
566 |
row_current->clear(); // the destructive part |
567 |
// It would be nice if we could decrement sparsetable->num_buckets here |
568 |
++row_current; // go to beginning of next |
569 |
if ( row_current != row_end ) // col is irrelevant at end |
570 |
col_current = row_current->TWOD_BEGIN_(); |
571 |
else |
572 |
break; // don't go past row_end |
573 |
} |
574 |
} |
575 |
|
576 |
// EVERYTHING FROM HERE DOWN IS THE SAME AS THE REGULAR ITERATOR |
577 |
reference operator*() const { return *col_current; } |
578 |
pointer operator->() const { return &(operator*()); } |
579 |
|
580 |
iterator& operator++() { |
581 |
assert(row_current != row_end); // how to ++ from there? |
582 |
++col_current; |
583 |
advance_past_end(); // in case col_current is at end() |
584 |
return *this; |
585 |
} |
586 |
iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } |
587 |
|
588 |
bool operator==(const iterator& it) const { |
589 |
return ( row_begin == it.row_begin && |
590 |
row_end == it.row_end && |
591 |
row_current == it.row_current && |
592 |
(row_current == row_end || col_current == it.col_current) ); |
593 |
} |
594 |
bool operator!=(const iterator& it) const { return !(*this == it); } |
595 |
}; |
596 |
|
597 |
#undef TWOD_BEGIN_ |
598 |
#undef TWOD_END_ |
599 |
#undef TWOD_ITER_ |
600 |
#undef TWOD_CONST_ITER_ |
601 |
|
602 |
|
603 |
|
604 |
|
605 |
// SPARSE-TABLE |
606 |
// ------------ |
607 |
// The idea is that a table with (logically) t buckets is divided |
608 |
// into t/M *groups* of M buckets each. (M is a constant set in |
609 |
// GROUP_SIZE for efficiency.) Each group is stored sparsely. |
610 |
// Thus, inserting into the table causes some array to grow, which is |
611 |
// slow but still constant time. Lookup involves doing a |
612 |
// logical-position-to-sparse-position lookup, which is also slow but |
613 |
// constant time. The larger M is, the slower these operations are |
614 |
// but the less overhead (slightly). |
615 |
// |
616 |
// To store the sparse array, we store a bitmap B, where B[i] = 1 iff |
617 |
// bucket i is non-empty. Then to look up bucket i we really look up |
618 |
// array[# of 1s before i in B]. This is constant time for fixed M. |
619 |
// |
620 |
// Terminology: the position of an item in the overall table (from |
621 |
// 1 .. t) is called its "location." The logical position in a group |
622 |
// (from 1 .. M ) is called its "position." The actual location in |
623 |
// the array (from 1 .. # of non-empty buckets in the group) is |
624 |
// called its "offset." |
625 |
|
626 |
// The weird mod in the offset is entirely to quiet compiler warnings |
627 |
// as is the cast to int after doing the "x mod 256" |
628 |
#define PUT_(take_from, offset) do { \ |
629 |
if (putc(static_cast<int>(((take_from) >> ((offset) % (sizeof(take_from)*8)))\ |
630 |
% 256), fp) \ |
631 |
== EOF) \ |
632 |
return false; \ |
633 |
} while (0) |
634 |
|
635 |
#define GET_(add_to, offset) do { \ |
636 |
if ((x=getc(fp)) == EOF) \ |
637 |
return false; \ |
638 |
else \ |
639 |
add_to |= (static_cast<size_type>(x) << ((offset) % (sizeof(add_to)*8))); \ |
640 |
} while (0) |
641 |
|
642 |
template <class T, u_int16_t GROUP_SIZE> |
643 |
class sparsegroup { |
644 |
public: |
645 |
// Basic types |
646 |
typedef T value_type; |
647 |
typedef value_type* pointer; |
648 |
typedef const value_type* const_pointer; |
649 |
typedef table_iterator<sparsegroup<T, GROUP_SIZE> > iterator; |
650 |
typedef const_table_iterator<sparsegroup<T, GROUP_SIZE> > const_iterator; |
651 |
typedef table_element_adaptor<sparsegroup<T, GROUP_SIZE> > element_adaptor; |
652 |
typedef value_type &reference; |
653 |
typedef const value_type &const_reference; |
654 |
typedef u_int16_t size_type; // max # of buckets |
655 |
typedef int16_t difference_type; |
656 |
typedef STL_NAMESPACE::reverse_iterator<const_iterator> const_reverse_iterator; |
657 |
typedef STL_NAMESPACE::reverse_iterator<iterator> reverse_iterator; |
658 |
|
659 |
// These are our special iterators, that go over non-empty buckets in a |
660 |
// group. These aren't const-only because you can change non-empty bcks. |
661 |
typedef pointer nonempty_iterator; |
662 |
typedef const_pointer const_nonempty_iterator; |
663 |
typedef STL_NAMESPACE::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator; |
664 |
typedef STL_NAMESPACE::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator; |
665 |
|
666 |
// Iterator functions |
667 |
iterator begin() { return iterator(this, 0); } |
668 |
const_iterator begin() const { return const_iterator(this, 0); } |
669 |
iterator end() { return iterator(this, size()); } |
670 |
const_iterator end() const { return const_iterator(this, size()); } |
671 |
reverse_iterator rbegin() { return reverse_iterator(end()); } |
672 |
const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } |
673 |
reverse_iterator rend() { return reverse_iterator(begin()); } |
674 |
const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } |
675 |
|
676 |
// We'll have versions for our special non-empty iterator too |
677 |
nonempty_iterator nonempty_begin() { return group; } |
678 |
const_nonempty_iterator nonempty_begin() const { return group; } |
679 |
nonempty_iterator nonempty_end() { return group + num_buckets; } |
680 |
const_nonempty_iterator nonempty_end() const { return group + num_buckets; } |
681 |
reverse_nonempty_iterator nonempty_rbegin() { |
682 |
return reverse_nonempty_iterator(nonempty_end()); |
683 |
} |
684 |
const_reverse_nonempty_iterator nonempty_rbegin() const { |
685 |
return const_reverse_nonempty_iterator(nonempty_end()); |
686 |
} |
687 |
reverse_nonempty_iterator nonempty_rend() { |
688 |
return reverse_nonempty_iterator(nonempty_begin()); |
689 |
} |
690 |
const_reverse_nonempty_iterator nonempty_rend() const { |
691 |
return const_reverse_nonempty_iterator(nonempty_begin()); |
692 |
} |
693 |
|
694 |
|
695 |
// This gives us the "default" value to return for an empty bucket. |
696 |
// We just use the default constructor on T, the template type |
697 |
const_reference default_value() const { |
698 |
static value_type defaultval = value_type(); |
699 |
return defaultval; |
700 |
} |
701 |
|
702 |
|
703 |
private: |
704 |
// We need to do all this bit manipulation, of course. ick |
705 |
static size_type charbit(size_type i) { return i >> 3; } |
706 |
static size_type modbit(size_type i) { return 1 << (i&7); } |
707 |
int bmtest(size_type i) const { return bitmap[charbit(i)] & modbit(i); } |
708 |
void bmset(size_type i) { bitmap[charbit(i)] |= modbit(i); } |
709 |
void bmclear(size_type i) { bitmap[charbit(i)] &= ~modbit(i); } |
710 |
|
711 |
void* realloc_or_die(void* ptr, size_t num_bytes) { |
712 |
void* retval = realloc(ptr, num_bytes); |
713 |
if (retval == NULL) { |
714 |
// We really should use PRIuS here, but I don't want to have to add |
715 |
// a whole new configure option, with concomitant macro namespace |
716 |
// pollution, just to print this (unlikely) error message. So I cast. |
717 |
fprintf(stderr, "FATAL ERROR: failed to allocate %lu bytes for ptr %p", |
718 |
static_cast<unsigned long>(num_bytes), ptr); |
719 |
exit(1); |
720 |
} |
721 |
return retval; |
722 |
} |
723 |
|
724 |
value_type* allocate_group(size_t n) { |
725 |
return static_cast<value_type*>(realloc_or_die(NULL, |
726 |
n * sizeof(value_type))); |
727 |
} |
728 |
|
729 |
void free_group() { |
730 |
// Valid even for empty group, because NULL+0 is defined to be NULL |
731 |
value_type* end_it = group + num_buckets; |
732 |
for (value_type* p = group; p != end_it; ++p) |
733 |
p->~value_type(); |
734 |
free(group); |
735 |
group = NULL; |
736 |
} |
737 |
|
738 |
public: // get_iter() in sparsetable needs it |
739 |
// We need a small function that tells us how many set bits there are |
740 |
// in positions 0..i-1 of the bitmap. It uses a big table. |
741 |
// We make it static so templates don't allocate lots of these tables |
742 |
static size_type pos_to_offset(const unsigned char *bm, size_type pos) { |
743 |
// We could make these ints. The tradeoff is size (eg does it overwhelm |
744 |
// the cache?) vs efficiency in referencing sub-word-sized array elements |
745 |
static const char bits_in[256] = { // # of bits set in one char |
746 |
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, |
747 |
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
748 |
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
749 |
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
750 |
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
751 |
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
752 |
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
753 |
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
754 |
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
755 |
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
756 |
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
757 |
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
758 |
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
759 |
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
760 |
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
761 |
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, |
762 |
}; |
763 |
size_type retval = 0; |
764 |
|
765 |
// [Note: condition pos > 8 is an optimization; convince yourself we |
766 |
// give exactly the same result as if we had pos >= 8 here instead.] |
767 |
for ( ; pos > 8; pos -= 8 ) // bm[0..pos/8-1] |
768 |
retval += bits_in[*bm++]; // chars we want *all* bits in |
769 |
return retval + bits_in[*bm & ((1 << pos)-1)]; // the char that includes pos |
770 |
} |
771 |
|
772 |
size_type pos_to_offset(size_type pos) const { // not static but still const |
773 |
return pos_to_offset(bitmap, pos); |
774 |
} |
775 |
|
776 |
|
777 |
public: |
778 |
// Constructors -- default and copy -- and destructor |
779 |
sparsegroup() : group(0), num_buckets(0) { memset(bitmap, 0, sizeof(bitmap)); } |
780 |
sparsegroup(const sparsegroup& x) : group(0), num_buckets(x.num_buckets) { |
781 |
if ( num_buckets ) { |
782 |
group = allocate_group(x.num_buckets); |
783 |
uninitialized_copy(x.group, x.group + x.num_buckets, group); |
784 |
} |
785 |
memcpy(bitmap, x.bitmap, sizeof(bitmap)); |
786 |
} |
787 |
~sparsegroup() { free_group(); } |
788 |
|
789 |
// Operator= is just like the copy constructor, I guess |
790 |
// TODO(austern): Make this exception safe. Handle exceptions in value_type's |
791 |
// copy constructor. |
792 |
sparsegroup &operator=(const sparsegroup& x) { |
793 |
if ( &x == this ) return *this; // x = x |
794 |
if ( x.num_buckets == 0 ) { |
795 |
free_group(); |
796 |
} else { |
797 |
value_type* p = allocate_group(x.num_buckets); |
798 |
uninitialized_copy(x.group, x.group + x.num_buckets, p); |
799 |
free_group(); |
800 |
group = p; |
801 |
} |
802 |
memcpy(bitmap, x.bitmap, sizeof(bitmap)); |
803 |
num_buckets = x.num_buckets; |
804 |
return *this; |
805 |
} |
806 |
|
807 |
// Many STL algorithms use swap instead of copy constructors |
808 |
void swap(sparsegroup& x) { |
809 |
STL_NAMESPACE::swap(group, x.group); |
810 |
for ( int i = 0; i < sizeof(bitmap) / sizeof(*bitmap); ++i ) |
811 |
STL_NAMESPACE::swap(bitmap[i], x.bitmap[i]); // swap not defined on arrays |
812 |
STL_NAMESPACE::swap(num_buckets, x.num_buckets); |
813 |
} |
814 |
|
815 |
// It's always nice to be able to clear a table without deallocating it |
816 |
void clear() { |
817 |
free_group(); |
818 |
memset(bitmap, 0, sizeof(bitmap)); |
819 |
num_buckets = 0; |
820 |
} |
821 |
|
822 |
// Functions that tell you about size. Alas, these aren't so useful |
823 |
// because our table is always fixed size. |
824 |
size_type size() const { return GROUP_SIZE; } |
825 |
size_type max_size() const { return GROUP_SIZE; } |
826 |
bool empty() const { return false; } |
827 |
// We also may want to know how many *used* buckets there are |
828 |
size_type num_nonempty() const { return num_buckets; } |
829 |
|
830 |
|
831 |
// get()/set() are explicitly const/non-const. You can use [] if |
832 |
// you want something that can be either (potentially more expensive). |
833 |
const_reference get(size_type i) const { |
834 |
if ( bmtest(i) ) // bucket i is occupied |
835 |
return group[pos_to_offset(bitmap, i)]; |
836 |
else |
837 |
return default_value(); // return the default reference |
838 |
} |
839 |
|
840 |
// TODO(csilvers): make protected + friend |
841 |
reference mutating_get(size_type i) { // fills bucket i before getting |
842 |
if ( !bmtest(i) ) |
843 |
set(i, default_value()); |
844 |
return group[pos_to_offset(bitmap, i)]; |
845 |
} |
846 |
|
847 |
// Syntactic sugar. It's easy to return a const reference. To |
848 |
// return a non-const reference, we need to use the assigner adaptor. |
849 |
const_reference operator[](size_type i) const { |
850 |
return get(i); |
851 |
} |
852 |
|
853 |
element_adaptor operator[](size_type i) { |
854 |
return element_adaptor(this, i); |
855 |
} |
856 |
|
857 |
private: |
858 |
// Create space at group[offset], assuming value_type has trivial |
859 |
// copy constructor and destructor. (Really, we want it to have |
860 |
// "trivial move", because that's what realloc and memmove both do. |
861 |
// But there's no way to capture that using type_traits, so we |
862 |
// pretend that move(x, y) is equivalent to "x.~T(); new(x) T(y);" |
863 |
// which is pretty much correct, if a bit conservative.) |
864 |
void set_aux(size_type offset, true_type) { |
865 |
group = (value_type *) |
866 |
realloc_or_die(group, sizeof(*group) * (num_buckets+1)); |
867 |
// This is equivalent to memmove(), but faster on my Intel P4, |
868 |
// at least with gcc4.1 -O2 / glibc 2.3.6. |
869 |
for (size_type i = num_buckets; i > offset; --i) |
870 |
memcpy(group + i, group + i-1, sizeof(*group)); |
871 |
} |
872 |
|
873 |
// Create space at group[offset], without special assumptions about value_type |
874 |
void set_aux(size_type offset, false_type) { |
875 |
// This is valid because 0 <= offset <= num_buckets |
876 |
value_type* p = allocate_group(num_buckets + 1); |
877 |
uninitialized_copy(group, group + offset, p); |
878 |
uninitialized_copy(group + offset, group + num_buckets, p + offset + 1); |
879 |
free_group(); |
880 |
group = p; |
881 |
} |
882 |
|
883 |
public: |
884 |
// This returns a reference to the inserted item (which is a copy of val). |
885 |
// TODO(austern): Make this exception safe: handle exceptions from |
886 |
// value_type's copy constructor. |
887 |
reference set(size_type i, const_reference val) { |
888 |
size_type offset = pos_to_offset(bitmap, i); // where we'll find (or insert) |
889 |
if ( bmtest(i) ) { |
890 |
// Delete the old value, which we're replacing with the new one |
891 |
group[offset].~value_type(); |
892 |
} else { |
893 |
typedef integral_constant<bool, |
894 |
(has_trivial_copy<value_type>::value && |
895 |
has_trivial_destructor<value_type>::value)> |
896 |
realloc_and_memmove_ok; // we pretend mv(x,y) == "x.~T(); new(x) T(y)" |
897 |
set_aux(offset, realloc_and_memmove_ok()); |
898 |
++num_buckets; |
899 |
bmset(i); |
900 |
} |
901 |
// This does the actual inserting. Since we made the array using |
902 |
// malloc, we use "placement new" to just call the constructor. |
903 |
new(&group[offset]) value_type(val); |
904 |
return group[offset]; |
905 |
} |
906 |
|
907 |
// We let you see if a bucket is non-empty without retrieving it |
908 |
bool test(size_type i) const { |
909 |
return bmtest(i) ? true : false; // cast an int to a bool |
910 |
} |
911 |
bool test(iterator pos) const { |
912 |
return bmtest(pos.pos) ? true : false; |
913 |
} |
914 |
|
915 |
private: |
916 |
// Shrink the array, assuming value_type has trivial copy |
917 |
// constructor and destructor. (Really, we want it to have "trivial |
918 |
// move", because that's what realloc and memmove both do. But |
919 |
// there's no way to capture that using type_traits, so we pretend |
920 |
// that move(x, y) is equivalent to ""x.~T(); new(x) T(y);" |
921 |
// which is pretty much correct, if a bit conservative.) |
922 |
void erase_aux(size_type offset, true_type) { |
923 |
// This isn't technically necessary, since we know we have a |
924 |
// trivial destructor, but is a cheap way to get a bit more safety. |
925 |
group[offset].~value_type(); |
926 |
// This is equivalent to memmove(), but faster on my Intel P4, |
927 |
// at lesat with gcc4.1 -O2 / glibc 2.3.6. |
928 |
assert(num_buckets > 0); |
929 |
for (size_type i = offset; i < num_buckets-1; ++i) |
930 |
memcpy(group + i, group + i+1, sizeof(*group)); // hopefully inlined! |
931 |
group = (value_type *) |
932 |
realloc_or_die(group, sizeof(*group) * (num_buckets-1)); |
933 |
} |
934 |
|
935 |
// Shrink the array, without any special assumptions about value_type. |
936 |
void erase_aux(size_type offset, false_type) { |
937 |
// This is valid because 0 <= offset < num_buckets. Note the inequality. |
938 |
value_type* p = allocate_group(num_buckets - 1); |
939 |
uninitialized_copy(group, group + offset, p); |
940 |
uninitialized_copy(group + offset + 1, group + num_buckets, p + offset); |
941 |
free_group(); |
942 |
group = p; |
943 |
} |
944 |
|
945 |
public: |
946 |
// This takes the specified elements out of the group. This is |
947 |
// "undefining", rather than "clearing". |
948 |
// TODO(austern): Make this exception safe: handle exceptions from |
949 |
// value_type's copy constructor. |
950 |
void erase(size_type i) { |
951 |
if ( bmtest(i) ) { // trivial to erase empty bucket |
952 |
size_type offset = pos_to_offset(bitmap,i); // where we'll find (or insert) |
953 |
if ( num_buckets == 1 ) { |
954 |
free_group(); |
955 |
group = NULL; |
956 |
} else { |
957 |
typedef integral_constant<bool, |
958 |
(has_trivial_copy<value_type>::value && |
959 |
has_trivial_destructor<value_type>::value)> |
960 |
realloc_and_memmove_ok; // pretend mv(x,y) == "x.~T(); new(x) T(y)" |
961 |
erase_aux(offset, realloc_and_memmove_ok()); |
962 |
} |
963 |
--num_buckets; |
964 |
bmclear(i); |
965 |
} |
966 |
} |
967 |
|
968 |
void erase(iterator pos) { |
969 |
erase(pos.pos); |
970 |
} |
971 |
|
972 |
void erase(iterator start_it, iterator end_it) { |
973 |
// This could be more efficient, but to do so we'd need to make |
974 |
// bmclear() clear a range of indices. Doesn't seem worth it. |
975 |
for ( ; start_it != end_it; ++start_it ) |
976 |
erase(start_it); |
977 |
} |
978 |
|
979 |
|
980 |
// I/O |
981 |
// We support reading and writing groups to disk. We don't store |
982 |
// the actual array contents (which we don't know how to store), |
983 |
// just the bitmap and size. Meant to be used with table I/O. |
984 |
// Returns true if all was ok |
985 |
bool write_metadata(FILE *fp) const { |
986 |
assert(sizeof(num_buckets) == 2); // we explicitly set to u_int16_t |
987 |
PUT_(num_buckets, 8); |
988 |
PUT_(num_buckets, 0); |
989 |
if ( !fwrite(bitmap, sizeof(bitmap), 1, fp) ) return false; |
990 |
return true; |
991 |
} |
992 |
|
993 |
// Reading destroys the old group contents! Returns true if all was ok |
994 |
bool read_metadata(FILE *fp) { |
995 |
clear(); |
996 |
|
997 |
int x; // the GET_ macro requires an 'int x' to be defined |
998 |
GET_(num_buckets, 8); |
999 |
GET_(num_buckets, 0); |
1000 |
|
1001 |
if ( !fread(bitmap, sizeof(bitmap), 1, fp) ) return false; |
1002 |
|
1003 |
// We'll allocate the space, but we won't fill it: it will be |
1004 |
// left as uninitialized raw memory. |
1005 |
group = allocate_group(num_buckets); |
1006 |
return true; |
1007 |
} |
1008 |
|
1009 |
// If your keys and values are simple enough, we can write them |
1010 |
// to disk for you. "simple enough" means POD and no pointers. |
1011 |
// However, we don't try to normalize endianness |
1012 |
bool write_nopointer_data(FILE *fp) const { |
1013 |
for ( const_nonempty_iterator it = nonempty_begin(); |
1014 |
it != nonempty_end(); ++it ) { |
1015 |
if ( !fwrite(&*it, sizeof(*it), 1, fp) ) return false; |
1016 |
} |
1017 |
return true; |
1018 |
} |
1019 |
|
1020 |
// When reading, we have to override the potential const-ness of *it. |
1021 |
// Again, only meaningful if value_type is a POD. |
1022 |
bool read_nopointer_data(FILE *fp) { |
1023 |
for ( nonempty_iterator it = nonempty_begin(); |
1024 |
it != nonempty_end(); ++it ) { |
1025 |
if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) ) |
1026 |
return false; |
1027 |
} |
1028 |
return true; |
1029 |
} |
1030 |
|
1031 |
// Comparisons. Note the comparisons are pretty arbitrary: we |
1032 |
// compare values of the first index that isn't equal (using default |
1033 |
// value for empty buckets). |
1034 |
bool operator==(const sparsegroup& x) const { |
1035 |
return ( num_buckets == x.num_buckets && |
1036 |
memcmp(bitmap, x.bitmap, sizeof(bitmap)) == 0 && |
1037 |
STL_NAMESPACE::equal(begin(), end(), x.begin()) ); // from algorithm |
1038 |
} |
1039 |
bool operator<(const sparsegroup& x) const { // also from algorithm |
1040 |
return STL_NAMESPACE::lexicographical_compare(begin(), end(), |
1041 |
x.begin(), x.end()); |
1042 |
} |
1043 |
bool operator!=(const sparsegroup& x) const { return !(*this == x); } |
1044 |
bool operator<=(const sparsegroup& x) const { return !(x < *this); } |
1045 |
bool operator>(const sparsegroup& x) const { return x < *this; } |
1046 |
bool operator>=(const sparsegroup& x) const { return !(*this < x); } |
1047 |
|
1048 |
private: |
1049 |
// The actual data |
1050 |
value_type *group; // (small) array of T's |
1051 |
unsigned char bitmap[(GROUP_SIZE-1)/8 + 1]; // fancy math is so we round up |
1052 |
size_type num_buckets; // limits GROUP_SIZE to 64K |
1053 |
}; |
1054 |
|
1055 |
// We need a global swap as well |
1056 |
template <class T, u_int16_t GROUP_SIZE> |
1057 |
inline void swap(sparsegroup<T,GROUP_SIZE> &x, sparsegroup<T,GROUP_SIZE> &y) { |
1058 |
x.swap(y); |
1059 |
} |
1060 |
|
1061 |
// --------------------------------------------------------------------------- |
1062 |
|
1063 |
|
1064 |
template <class T, u_int16_t GROUP_SIZE = DEFAULT_SPARSEGROUP_SIZE> |
1065 |
class sparsetable { |
1066 |
public: |
1067 |
// Basic types |
1068 |
typedef T value_type; // stolen from stl_vector.h |
1069 |
typedef value_type* pointer; |
1070 |
typedef const value_type* const_pointer; |
1071 |
typedef table_iterator<sparsetable<T, GROUP_SIZE> > iterator; |
1072 |
typedef const_table_iterator<sparsetable<T, GROUP_SIZE> > const_iterator; |
1073 |
typedef table_element_adaptor<sparsetable<T, GROUP_SIZE> > element_adaptor; |
1074 |
typedef value_type &reference; |
1075 |
typedef const value_type &const_reference; |
1076 |
typedef size_t size_type; |
1077 |
typedef ptrdiff_t difference_type; |
1078 |
typedef STL_NAMESPACE::reverse_iterator<const_iterator> const_reverse_iterator; |
1079 |
typedef STL_NAMESPACE::reverse_iterator<iterator> reverse_iterator; |
1080 |
|
1081 |
// These are our special iterators, that go over non-empty buckets in a |
1082 |
// table. These aren't const only because you can change non-empty bcks. |
1083 |
typedef two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > > |
1084 |
nonempty_iterator; |
1085 |
typedef const_two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > > |
1086 |
const_nonempty_iterator; |
1087 |
typedef STL_NAMESPACE::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator; |
1088 |
typedef STL_NAMESPACE::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator; |
1089 |
// Another special iterator: it frees memory as it iterates (used to resize) |
1090 |
typedef destructive_two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > > |
1091 |
destructive_iterator; |
1092 |
|
1093 |
// Iterator functions |
1094 |
iterator begin() { return iterator(this, 0); } |
1095 |
const_iterator begin() const { return const_iterator(this, 0); } |
1096 |
iterator end() { return iterator(this, size()); } |
1097 |
const_iterator end() const { return const_iterator(this, size()); } |
1098 |
reverse_iterator rbegin() { return reverse_iterator(end()); } |
1099 |
const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } |
1100 |
reverse_iterator rend() { return reverse_iterator(begin()); } |
1101 |
const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } |
1102 |
|
1103 |
// Versions for our special non-empty iterator |
1104 |
nonempty_iterator nonempty_begin() { |
1105 |
return nonempty_iterator(groups.begin(), groups.end(), groups.begin()); |
1106 |
} |
1107 |
const_nonempty_iterator nonempty_begin() const { |
1108 |
return const_nonempty_iterator(groups.begin(),groups.end(), groups.begin()); |
1109 |
} |
1110 |
nonempty_iterator nonempty_end() { |
1111 |
return nonempty_iterator(groups.begin(), groups.end(), groups.end()); |
1112 |
} |
1113 |
const_nonempty_iterator nonempty_end() const { |
1114 |
return const_nonempty_iterator(groups.begin(), groups.end(), groups.end()); |
1115 |
} |
1116 |
reverse_nonempty_iterator nonempty_rbegin() { |
1117 |
return reverse_nonempty_iterator(nonempty_end()); |
1118 |
} |
1119 |
const_reverse_nonempty_iterator nonempty_rbegin() const { |
1120 |
return const_reverse_nonempty_iterator(nonempty_end()); |
1121 |
} |
1122 |
reverse_nonempty_iterator nonempty_rend() { |
1123 |
return reverse_nonempty_iterator(nonempty_begin()); |
1124 |
} |
1125 |
const_reverse_nonempty_iterator nonempty_rend() const { |
1126 |
return const_reverse_nonempty_iterator(nonempty_begin()); |
1127 |
} |
1128 |
destructive_iterator destructive_begin() { |
1129 |
return destructive_iterator(groups.begin(), groups.end(), groups.begin()); |
1130 |
} |
1131 |
destructive_iterator destructive_end() { |
1132 |
return destructive_iterator(groups.begin(), groups.end(), groups.end()); |
1133 |
} |
1134 |
|
1135 |
private: |
1136 |
typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::reference |
1137 |
GroupsReference; |
1138 |
typedef typename |
1139 |
vector< sparsegroup<value_type, GROUP_SIZE> >::const_reference |
1140 |
GroupsConstReference; |
1141 |
typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::iterator |
1142 |
GroupsIterator; |
1143 |
typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::const_iterator |
1144 |
GroupsConstIterator; |
1145 |
|
1146 |
// How to deal with the proper group |
1147 |
static size_type num_groups(size_type num) { // how many to hold num buckets |
1148 |
return num == 0 ? 0 : ((num-1) / GROUP_SIZE) + 1; |
1149 |
} |
1150 |
|
1151 |
u_int16_t pos_in_group(size_type i) const { |
1152 |
return static_cast<u_int16_t>(i % GROUP_SIZE); |
1153 |
} |
1154 |
size_type group_num(size_type i) const { |
1155 |
return i / GROUP_SIZE; |
1156 |
} |
1157 |
GroupsReference which_group(size_type i) { |
1158 |
return groups[group_num(i)]; |
1159 |
} |
1160 |
GroupsConstReference which_group(size_type i) const { |
1161 |
return groups[group_num(i)]; |
1162 |
} |
1163 |
|
1164 |
public: |
1165 |
// Constructors -- default, normal (when you specify size), and copy |
1166 |
sparsetable(size_type sz = 0) |
1167 |
: groups(num_groups(sz)), table_size(sz), num_buckets(0) { } |
1168 |
// We'll can get away with using the default copy constructor, |
1169 |
// and default destructor, and hence the default operator=. Huzzah! |
1170 |
|
1171 |
// Many STL algorithms use swap instead of copy constructors |
1172 |
void swap(sparsetable& x) { |
1173 |
STL_NAMESPACE::swap(groups, x.groups); |
1174 |
STL_NAMESPACE::swap(table_size, x.table_size); |
1175 |
STL_NAMESPACE::swap(num_buckets, x.num_buckets); |
1176 |
} |
1177 |
|
1178 |
// It's always nice to be able to clear a table without deallocating it |
1179 |
void clear() { |
1180 |
GroupsIterator group; |
1181 |
for ( group = groups.begin(); group != groups.end(); ++group ) { |
1182 |
group->clear(); |
1183 |
} |
1184 |
num_buckets = 0; |
1185 |
} |
1186 |
|
1187 |
// Functions that tell you about size. |
1188 |
// NOTE: empty() is non-intuitive! It does not tell you the number |
1189 |
// of not-empty buckets (use num_nonempty() for that). Instead |
1190 |
// it says whether you've allocated any buckets or not. |
1191 |
size_type size() const { return table_size; } |
1192 |
size_type max_size() const { return size_type(-1); } |
1193 |
bool empty() const { return table_size == 0; } |
1194 |
// We also may want to know how many *used* buckets there are |
1195 |
size_type num_nonempty() const { return num_buckets; } |
1196 |
|
1197 |
// OK, we'll let you resize one of these puppies |
1198 |
void resize(size_type new_size) { |
1199 |
groups.resize(num_groups(new_size)); |
1200 |
if ( new_size < table_size) { // lower num_buckets, clear last group |
1201 |
if ( pos_in_group(new_size) > 0 ) // need to clear inside last group |
1202 |
groups.back().erase(groups.back().begin() + pos_in_group(new_size), |
1203 |
groups.back().end()); |
1204 |
num_buckets = 0; // refigure # of used buckets |
1205 |
GroupsConstIterator group; |
1206 |
for ( group = groups.begin(); group != groups.end(); ++group ) |
1207 |
num_buckets += group->num_nonempty(); |
1208 |
} |
1209 |
table_size = new_size; |
1210 |
} |
1211 |
|
1212 |
|
1213 |
// We let you see if a bucket is non-empty without retrieving it |
1214 |
bool test(size_type i) const { |
1215 |
return which_group(i).test(pos_in_group(i)); |
1216 |
} |
1217 |
bool test(iterator pos) const { |
1218 |
return which_group(pos.pos).test(pos_in_group(pos.pos)); |
1219 |
} |
1220 |
bool test(const_iterator pos) const { |
1221 |
return which_group(pos.pos).test(pos_in_group(pos.pos)); |
1222 |
} |
1223 |
|
1224 |
// We only return const_references because it's really hard to |
1225 |
// return something settable for empty buckets. Use set() instead. |
1226 |
const_reference get(size_type i) const { |
1227 |
assert(i < table_size); |
1228 |
return which_group(i).get(pos_in_group(i)); |
1229 |
} |
1230 |
|
1231 |
// TODO(csilvers): make protected + friend element_adaptor |
1232 |
reference mutating_get(size_type i) { // fills bucket i before getting |
1233 |
assert(i < table_size); |
1234 |
size_type old_numbuckets = which_group(i).num_nonempty(); |
1235 |
reference retval = which_group(i).mutating_get(pos_in_group(i)); |
1236 |
num_buckets += which_group(i).num_nonempty() - old_numbuckets; |
1237 |
return retval; |
1238 |
} |
1239 |
|
1240 |
// Syntactic sugar. As in sparsegroup, the non-const version is harder |
1241 |
const_reference operator[](size_type i) const { |
1242 |
return get(i); |
1243 |
} |
1244 |
|
1245 |
element_adaptor operator[](size_type i) { |
1246 |
return element_adaptor(this, i); |
1247 |
} |
1248 |
|
1249 |
// Needed for hashtables, gets as a nonempty_iterator. Crashes for empty bcks |
1250 |
const_nonempty_iterator get_iter(size_type i) const { |
1251 |
assert(test(i)); // how can a nonempty_iterator point to an empty bucket? |
1252 |
return const_nonempty_iterator( |
1253 |
groups.begin(), groups.end(), |
1254 |
groups.begin() + group_num(i), |
1255 |
(groups[group_num(i)].nonempty_begin() + |
1256 |
groups[group_num(i)].pos_to_offset(pos_in_group(i)))); |
1257 |
} |
1258 |
// For nonempty we can return a non-const version |
1259 |
nonempty_iterator get_iter(size_type i) { |
1260 |
assert(test(i)); // how can a nonempty_iterator point to an empty bucket? |
1261 |
return nonempty_iterator( |
1262 |
groups.begin(), groups.end(), |
1263 |
groups.begin() + group_num(i), |
1264 |
(groups[group_num(i)].nonempty_begin() + |
1265 |
groups[group_num(i)].pos_to_offset(pos_in_group(i)))); |
1266 |
} |
1267 |
|
1268 |
|
1269 |
// This returns a reference to the inserted item (which is a copy of val) |
1270 |
// The trick is to figure out whether we're replacing or inserting anew |
1271 |
reference set(size_type i, const_reference val) { |
1272 |
assert(i < table_size); |
1273 |
size_type old_numbuckets = which_group(i).num_nonempty(); |
1274 |
reference retval = which_group(i).set(pos_in_group(i), val); |
1275 |
num_buckets += which_group(i).num_nonempty() - old_numbuckets; |
1276 |
return retval; |
1277 |
} |
1278 |
|
1279 |
// This takes the specified elements out of the table. This is |
1280 |
// "undefining", rather than "clearing". |
1281 |
void erase(size_type i) { |
1282 |
assert(i < table_size); |
1283 |
size_type old_numbuckets = which_group(i).num_nonempty(); |
1284 |
which_group(i).erase(pos_in_group(i)); |
1285 |
num_buckets += which_group(i).num_nonempty() - old_numbuckets; |
1286 |
} |
1287 |
|
1288 |
void erase(iterator pos) { |
1289 |
erase(pos.pos); |
1290 |
} |
1291 |
|
1292 |
void erase(iterator start_it, iterator end_it) { |
1293 |
// This could be more efficient, but then we'd need to figure |
1294 |
// out if we spanned groups or not. Doesn't seem worth it. |
1295 |
for ( ; start_it != end_it; ++start_it ) |
1296 |
erase(start_it); |
1297 |
} |
1298 |
|
1299 |
|
1300 |
// We support reading and writing tables to disk. We don't store |
1301 |
// the actual array contents (which we don't know how to store), |
1302 |
// just the groups and sizes. Returns true if all went ok. |
1303 |
|
1304 |
private: |
1305 |
// Every time the disk format changes, this should probably change too |
1306 |
static const unsigned long MAGIC_NUMBER = 0x24687531; |
1307 |
|
1308 |
// Old versions of this code write all data in 32 bits. We need to |
1309 |
// support these files as well as having support for 64-bit systems. |
1310 |
// So we use the following encoding scheme: for values < 2^32-1, we |
1311 |
// store in 4 bytes in big-endian order. For values > 2^32, we |
1312 |
// store 0xFFFFFFF followed by 8 bytes in big-endian order. This |
1313 |
// causes us to mis-read old-version code that stores exactly |
1314 |
// 0xFFFFFFF, but I don't think that is likely to have happened for |
1315 |
// these particular values. |
1316 |
static bool write_32_or_64(FILE* fp, size_type value) { |
1317 |
if ( value < 0xFFFFFFFFULL ) { // fits in 4 bytes |
1318 |
PUT_(value, 24); |
1319 |
PUT_(value, 16); |
1320 |
PUT_(value, 8); |
1321 |
PUT_(value, 0); |
1322 |
} else if ( value == 0xFFFFFFFFUL ) { // special case in 32bit systems |
1323 |
PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); // marker |
1324 |
PUT_(0, 0); PUT_(0, 0); PUT_(0, 0); PUT_(0, 0); |
1325 |
PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); |
1326 |
} else { |
1327 |
PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); // marker |
1328 |
PUT_(value, 56); |
1329 |
PUT_(value, 48); |
1330 |
PUT_(value, 40); |
1331 |
PUT_(value, 32); |
1332 |
PUT_(value, 24); |
1333 |
PUT_(value, 16); |
1334 |
PUT_(value, 8); |
1335 |
PUT_(value, 0); |
1336 |
} |
1337 |
return true; |
1338 |
} |
1339 |
|
1340 |
static bool read_32_or_64(FILE* fp, size_type *value) { // reads into value |
1341 |
size_type first4 = 0; |
1342 |
int x; |
1343 |
GET_(first4, 24); |
1344 |
GET_(first4, 16); |
1345 |
GET_(first4, 8); |
1346 |
GET_(first4, 0); |
1347 |
if ( first4 < 0xFFFFFFFFULL ) { |
1348 |
*value = first4; |
1349 |
} else { |
1350 |
GET_(*value, 56); |
1351 |
GET_(*value, 48); |
1352 |
GET_(*value, 40); |
1353 |
GET_(*value, 32); |
1354 |
GET_(*value, 24); |
1355 |
GET_(*value, 16); |
1356 |
GET_(*value, 8); |
1357 |
GET_(*value, 0); |
1358 |
} |
1359 |
return true; |
1360 |
} |
1361 |
|
1362 |
public: |
1363 |
bool write_metadata(FILE *fp) const { |
1364 |
if ( !write_32_or_64(fp, MAGIC_NUMBER) ) return false; |
1365 |
if ( !write_32_or_64(fp, table_size) ) return false; |
1366 |
if ( !write_32_or_64(fp, num_buckets) ) return false; |
1367 |
|
1368 |
GroupsConstIterator group; |
1369 |
for ( group = groups.begin(); group != groups.end(); ++group ) |
1370 |
if ( group->write_metadata(fp) == false ) return false; |
1371 |
return true; |
1372 |
} |
1373 |
|
1374 |
// Reading destroys the old table contents! Returns true if read ok. |
1375 |
bool read_metadata(FILE *fp) { |
1376 |
size_type magic_read = 0; |
1377 |
if ( !read_32_or_64(fp, &magic_read) ) return false; |
1378 |
if ( magic_read != MAGIC_NUMBER ) { |
1379 |
clear(); // just to be consistent |
1380 |
return false; |
1381 |
} |
1382 |
|
1383 |
if ( !read_32_or_64(fp, &table_size) ) return false; |
1384 |
if ( !read_32_or_64(fp, &num_buckets) ) return false; |
1385 |
|
1386 |
resize(table_size); // so the vector's sized ok |
1387 |
GroupsIterator group; |
1388 |
for ( group = groups.begin(); group != groups.end(); ++group ) |
1389 |
if ( group->read_metadata(fp) == false ) return false; |
1390 |
return true; |
1391 |
} |
1392 |
|
1393 |
// This code is identical to that for SparseGroup |
1394 |
// If your keys and values are simple enough, we can write them |
1395 |
// to disk for you. "simple enough" means no pointers. |
1396 |
// However, we don't try to normalize endianness |
1397 |
bool write_nopointer_data(FILE *fp) const { |
1398 |
for ( const_nonempty_iterator it = nonempty_begin(); |
1399 |
it != nonempty_end(); ++it ) { |
1400 |
if ( !fwrite(&*it, sizeof(*it), 1, fp) ) return false; |
1401 |
} |
1402 |
return true; |
1403 |
} |
1404 |
|
1405 |
// When reading, we have to override the potential const-ness of *it |
1406 |
bool read_nopointer_data(FILE *fp) { |
1407 |
for ( nonempty_iterator it = nonempty_begin(); |
1408 |
it != nonempty_end(); ++it ) { |
1409 |
if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) ) |
1410 |
return false; |
1411 |
} |
1412 |
return true; |
1413 |
} |
1414 |
|
1415 |
// Comparisons. Note the comparisons are pretty arbitrary: we |
1416 |
// compare values of the first index that isn't equal (using default |
1417 |
// value for empty buckets). |
1418 |
bool operator==(const sparsetable& x) const { |
1419 |
return ( table_size == x.table_size && |
1420 |
num_buckets == x.num_buckets && |
1421 |
groups == x.groups ); |
1422 |
} |
1423 |
bool operator<(const sparsetable& x) const { // also from algobase.h |
1424 |
return STL_NAMESPACE::lexicographical_compare(begin(), end(), |
1425 |
x.begin(), x.end()); |
1426 |
} |
1427 |
bool operator!=(const sparsetable& x) const { return !(*this == x); } |
1428 |
bool operator<=(const sparsetable& x) const { return !(x < *this); } |
1429 |
bool operator>(const sparsetable& x) const { return x < *this; } |
1430 |
bool operator>=(const sparsetable& x) const { return !(*this < x); } |
1431 |
|
1432 |
|
1433 |
private: |
1434 |
// The actual data |
1435 |
vector< sparsegroup<value_type, GROUP_SIZE> > groups; // our list of groups |
1436 |
size_type table_size; // how many buckets they want |
1437 |
size_type num_buckets; // number of non-empty buckets |
1438 |
}; |
1439 |
|
1440 |
// We need a global swap as well |
1441 |
template <class T, u_int16_t GROUP_SIZE> |
1442 |
inline void swap(sparsetable<T,GROUP_SIZE> &x, sparsetable<T,GROUP_SIZE> &y) { |
1443 |
x.swap(y); |
1444 |
} |
1445 |
|
1446 |
#undef GET_ |
1447 |
#undef PUT_ |
1448 |
|
1449 |
_END_GOOGLE_NAMESPACE_ |
1450 |
|
1451 |
#endif |