/[pcsx2_0.9.7]/trunk/3rdparty/google/sparsetable
ViewVC logotype

Contents of /trunk/3rdparty/google/sparsetable

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10 - (show annotations) (download)
Mon Sep 6 11:40:06 2010 UTC (9 years, 4 months ago) by william
File size: 60505 byte(s)
exported r3113 from ./upstream/trunk
1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 // ---
31 // Author: Craig Silverstein
32 //
33 // A sparsetable is a random container that implements a sparse array,
34 // that is, an array that uses very little memory to store unassigned
35 // indices (in this case, between 1-2 bits per unassigned index). For
36 // instance, if you allocate an array of size 5 and assign a[2] = <big
37 // struct>, then a[2] will take up a lot of memory but a[0], a[1],
38 // a[3], and a[4] will not. Array elements that have a value are
39 // called "assigned". Array elements that have no value yet, or have
40 // had their value cleared using erase() or clear(), are called
41 // "unassigned".
42 //
43 // Unassigned values seem to have the default value of T (see below).
44 // Nevertheless, there is a difference between an unassigned index and
45 // one explicitly assigned the value of T(). The latter is considered
46 // assigned.
47 //
48 // Access to an array element is constant time, as is insertion and
49 // deletion. Insertion and deletion may be fairly slow, however:
50 // because of this container's memory economy, each insert and delete
51 // causes a memory reallocation.
52 //
53 // See /usr/(local/)?doc/sparsehash-0.1/sparsetable.html
54 // for information about how to use this class.
55
56 #ifndef _SPARSETABLE_H_
57 #define _SPARSETABLE_H_
58
59 #include <google/sparsehash/sparseconfig.h>
60 #include <stdlib.h> // for malloc/free
61 #include <stdio.h> // to read/write tables
62 #ifdef HAVE_STDINT_H
63 #include <stdint.h> // the normal place uint16_t is defined
64 #endif
65 #ifdef HAVE_SYS_TYPES_H
66 #include <sys/types.h> // the normal place u_int16_t is defined
67 #endif
68 #ifdef HAVE_INTTYPES_H
69 #include <inttypes.h> // a third place for uint16_t or u_int16_t
70 #endif
71 #include <assert.h> // for bounds checking
72 #include <iterator> // to define reverse_iterator for me
73 #include <algorithm> // equal, lexicographical_compare, swap,...
74 #include <memory> // uninitialized_copy
75 #include <vector> // a sparsetable is a vector of groups
76 #include <google/type_traits.h> // for true_type, integral_constant, etc.
77
78 #if STDC_HEADERS
79 #include <string.h> // for memcpy
80 #else
81 #if !HAVE_MEMCPY
82 #define memcpy(d, s, n) bcopy ((s), (d), (n))
83 #endif
84 #endif
85
86 _START_GOOGLE_NAMESPACE_
87
88 #ifndef HAVE_U_INT16_T
89 # if defined HAVE_UINT16_T
90 typedef uint16_t u_int16_t; // true on solaris, possibly other C99 libc's
91 # elif defined HAVE___UINT16
92 typedef __int16 int16_t; // true on vc++7
93 typedef unsigned __int16 u_int16_t;
94 # else
95 // Cannot find a 16-bit integer type. Hoping for the best with "short"...
96 typedef short int int16_t;
97 typedef unsigned short int u_int16_t;
98 # endif
99 #endif
100
101 using STL_NAMESPACE::vector;
102 using STL_NAMESPACE::uninitialized_copy;
103
104 // The smaller this is, the faster lookup is (because the group bitmap is
105 // smaller) and the faster insert is, because there's less to move.
106 // On the other hand, there are more groups. Since group::size_type is
107 // a short, this number should be of the form 32*x + 16 to avoid waste.
108 static const u_int16_t DEFAULT_SPARSEGROUP_SIZE = 48; // fits in 1.5 words
109
110
111 // A NOTE ON ASSIGNING:
112 // A sparse table does not actually allocate memory for entries
113 // that are not filled. Because of this, it becomes complicated
114 // to have a non-const iterator: we don't know, if the iterator points
115 // to a not-filled bucket, whether you plan to fill it with something
116 // or whether you plan to read its value (in which case you'll get
117 // the default bucket value). Therefore, while we can define const
118 // operations in a pretty 'normal' way, for non-const operations, we
119 // define something that returns a helper object with operator= and
120 // operator& that allocate a bucket lazily. We use this for table[]
121 // and also for regular table iterators.
122
123 template <class tabletype>
124 class table_element_adaptor {
125 public:
126 typedef typename tabletype::value_type value_type;
127 typedef typename tabletype::size_type size_type;
128 typedef typename tabletype::reference reference;
129 typedef typename tabletype::pointer pointer;
130
131 table_element_adaptor(tabletype *tbl, size_type p)
132 : table(tbl), pos(p) { }
133 table_element_adaptor& operator= (const value_type &val) {
134 table->set(pos, val);
135 return *this;
136 }
137 operator value_type() { return table->get(pos); } // we look like a value
138 pointer operator& () { return &table->mutating_get(pos); }
139
140 private:
141 tabletype* table;
142 size_type pos;
143 };
144
145 // Our iterator as simple as iterators can be: basically it's just
146 // the index into our table. Dereference, the only complicated
147 // thing, we punt to the table class. This just goes to show how
148 // much machinery STL requires to do even the most trivial tasks.
149 //
150 // By templatizing over tabletype, we have one iterator type which
151 // we can use for both sparsetables and sparsebins. In fact it
152 // works on any class that allows size() and operator[] (eg vector),
153 // as long as it does the standard STL typedefs too (eg value_type).
154
155 template <class tabletype>
156 class table_iterator {
157 public:
158 typedef table_iterator iterator;
159
160 typedef STL_NAMESPACE::random_access_iterator_tag iterator_category;
161 typedef typename tabletype::value_type value_type;
162 typedef typename tabletype::difference_type difference_type;
163 typedef typename tabletype::size_type size_type;
164 typedef table_element_adaptor<tabletype> reference;
165 typedef table_element_adaptor<tabletype>* pointer;
166
167 // The "real" constructor
168 table_iterator(tabletype *tbl, size_type p)
169 : table(tbl), pos(p) { }
170 // The default constructor, used when I define vars of type table::iterator
171 table_iterator() : table(NULL), pos(0) { }
172 // The copy constructor, for when I say table::iterator foo = tbl.begin()
173 // The default destructor is fine; we don't define one
174 // The default operator= is fine; we don't define one
175
176 // The main thing our iterator does is dereference. If the table entry
177 // we point to is empty, we return the default value type.
178 // This is the big different function from the const iterator.
179 reference operator*() {
180 return table_element_adaptor<tabletype>(table, pos);
181 }
182 pointer operator->() { return &(operator*()); }
183
184 // Helper function to assert things are ok; eg pos is still in range
185 void check() const {
186 assert(table);
187 assert(pos <= table->size());
188 }
189
190 // Arithmetic: we just do arithmetic on pos. We don't even need to
191 // do bounds checking, since STL doesn't consider that it's job. :-)
192 iterator& operator+=(size_type t) { pos += t; check(); return *this; }
193 iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
194 iterator& operator++() { ++pos; check(); return *this; }
195 iterator& operator--() { --pos; check(); return *this; }
196 iterator operator++(int) { iterator tmp(*this); // for x++
197 ++pos; check(); return tmp; }
198 iterator operator--(int) { iterator tmp(*this); // for x--
199 --pos; check(); return tmp; }
200 iterator operator+(difference_type i) const { iterator tmp(*this);
201 tmp += i; return tmp; }
202 iterator operator-(difference_type i) const { iterator tmp(*this);
203 tmp -= i; return tmp; }
204 difference_type operator-(iterator it) const { // for "x = it2 - it"
205 assert(table == it.table);
206 return pos - it.pos;
207 }
208 reference operator[](difference_type n) const {
209 return *(*this + n); // simple though not totally efficient
210 }
211
212 // Comparisons.
213 bool operator==(const iterator& it) const {
214 return table == it.table && pos == it.pos;
215 }
216 bool operator<(const iterator& it) const {
217 assert(table == it.table); // life is bad bad bad otherwise
218 return pos < it.pos;
219 }
220 bool operator!=(const iterator& it) const { return !(*this == it); }
221 bool operator<=(const iterator& it) const { return !(it < *this); }
222 bool operator>(const iterator& it) const { return it < *this; }
223 bool operator>=(const iterator& it) const { return !(*this < it); }
224
225 // Here's the info we actually need to be an iterator
226 tabletype *table; // so we can dereference and bounds-check
227 size_type pos; // index into the table
228 };
229
230 // support for "3 + iterator" has to be defined outside the class, alas
231 template<class T>
232 table_iterator<T> operator+(typename table_iterator<T>::difference_type i,
233 table_iterator<T> it) {
234 return it + i; // so people can say it2 = 3 + it
235 }
236
237 template <class tabletype>
238 class const_table_iterator {
239 public:
240 typedef table_iterator<tabletype> iterator;
241 typedef const_table_iterator const_iterator;
242
243 typedef STL_NAMESPACE::random_access_iterator_tag iterator_category;
244 typedef typename tabletype::value_type value_type;
245 typedef typename tabletype::difference_type difference_type;
246 typedef typename tabletype::size_type size_type;
247 typedef typename tabletype::const_reference reference; // we're const-only
248 typedef typename tabletype::const_pointer pointer;
249
250 // The "real" constructor
251 const_table_iterator(const tabletype *tbl, size_type p)
252 : table(tbl), pos(p) { }
253 // The default constructor, used when I define vars of type table::iterator
254 const_table_iterator() : table(NULL), pos(0) { }
255 // The copy constructor, for when I say table::iterator foo = tbl.begin()
256 // Also converts normal iterators to const iterators
257 const_table_iterator(const iterator &from)
258 : table(from.table), pos(from.pos) { }
259 // The default destructor is fine; we don't define one
260 // The default operator= is fine; we don't define one
261
262 // The main thing our iterator does is dereference. If the table entry
263 // we point to is empty, we return the default value type.
264 reference operator*() const { return (*table)[pos]; }
265 pointer operator->() const { return &(operator*()); }
266
267 // Helper function to assert things are ok; eg pos is still in range
268 void check() const {
269 assert(table);
270 assert(pos <= table->size());
271 }
272
273 // Arithmetic: we just do arithmetic on pos. We don't even need to
274 // do bounds checking, since STL doesn't consider that it's job. :-)
275 const_iterator& operator+=(size_type t) { pos += t; check(); return *this; }
276 const_iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
277 const_iterator& operator++() { ++pos; check(); return *this; }
278 const_iterator& operator--() { --pos; check(); return *this; }
279 const_iterator operator++(int) { const_iterator tmp(*this); // for x++
280 ++pos; check(); return tmp; }
281 const_iterator operator--(int) { const_iterator tmp(*this); // for x--
282 --pos; check(); return tmp; }
283 const_iterator operator+(difference_type i) const { const_iterator tmp(*this);
284 tmp += i; return tmp; }
285 const_iterator operator-(difference_type i) const { const_iterator tmp(*this);
286 tmp -= i; return tmp; }
287 difference_type operator-(const_iterator it) const { // for "x = it2 - it"
288 assert(table == it.table);
289 return pos - it.pos;
290 }
291 reference operator[](difference_type n) const {
292 return *(*this + n); // simple though not totally efficient
293 }
294
295 // Comparisons.
296 bool operator==(const const_iterator& it) const {
297 return table == it.table && pos == it.pos;
298 }
299 bool operator<(const const_iterator& it) const {
300 assert(table == it.table); // life is bad bad bad otherwise
301 return pos < it.pos;
302 }
303 bool operator!=(const const_iterator& it) const { return !(*this == it); }
304 bool operator<=(const const_iterator& it) const { return !(it < *this); }
305 bool operator>(const const_iterator& it) const { return it < *this; }
306 bool operator>=(const const_iterator& it) const { return !(*this < it); }
307
308 // Here's the info we actually need to be an iterator
309 const tabletype *table; // so we can dereference and bounds-check
310 size_type pos; // index into the table
311 };
312
313 // support for "3 + iterator" has to be defined outside the class, alas
314 template<class T>
315 const_table_iterator<T> operator+(typename
316 const_table_iterator<T>::difference_type i,
317 const_table_iterator<T> it) {
318 return it + i; // so people can say it2 = 3 + it
319 }
320
321
322 // ---------------------------------------------------------------------------
323
324
325 /*
326 // This is a 2-D iterator. You specify a begin and end over a list
327 // of *containers*. We iterate over each container by iterating over
328 // it. It's actually simple:
329 // VECTOR.begin() VECTOR[0].begin() --------> VECTOR[0].end() ---,
330 // | ________________________________________________/
331 // | \_> VECTOR[1].begin() --------> VECTOR[1].end() -,
332 // | ___________________________________________________/
333 // v \_> ......
334 // VECTOR.end()
335 //
336 // It's impossible to do random access on one of these things in constant
337 // time, so it's just a bidirectional iterator.
338 //
339 // Unfortunately, because we need to use this for a non-empty iterator,
340 // we use nonempty_begin() and nonempty_end() instead of begin() and end()
341 // (though only going across, not down).
342 */
343
344 #define TWOD_BEGIN_ nonempty_begin
345 #define TWOD_END_ nonempty_end
346 #define TWOD_ITER_ nonempty_iterator
347 #define TWOD_CONST_ITER_ const_nonempty_iterator
348
349 template <class containertype>
350 class two_d_iterator {
351 public:
352 typedef two_d_iterator iterator;
353
354 typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category;
355 // apparently some versions of VC++ have trouble with two ::'s in a typename
356 typedef typename containertype::value_type _tmp_vt;
357 typedef typename _tmp_vt::value_type value_type;
358 typedef typename _tmp_vt::difference_type difference_type;
359 typedef typename _tmp_vt::reference reference;
360 typedef typename _tmp_vt::pointer pointer;
361
362 // The "real" constructor. begin and end specify how many rows we have
363 // (in the diagram above); we always iterate over each row completely.
364 two_d_iterator(typename containertype::iterator begin,
365 typename containertype::iterator end,
366 typename containertype::iterator curr)
367 : row_begin(begin), row_end(end), row_current(curr), col_current() {
368 if ( row_current != row_end ) {
369 col_current = row_current->TWOD_BEGIN_();
370 advance_past_end(); // in case cur->begin() == cur->end()
371 }
372 }
373 // If you want to start at an arbitrary place, you can, I guess
374 two_d_iterator(typename containertype::iterator begin,
375 typename containertype::iterator end,
376 typename containertype::iterator curr,
377 typename containertype::value_type::TWOD_ITER_ col)
378 : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
379 advance_past_end(); // in case cur->begin() == cur->end()
380 }
381 // The default constructor, used when I define vars of type table::iterator
382 two_d_iterator() : row_begin(), row_end(), row_current(), col_current() { }
383 // The default destructor is fine; we don't define one
384 // The default operator= is fine; we don't define one
385
386 // Happy dereferencer
387 reference operator*() const { return *col_current; }
388 pointer operator->() const { return &(operator*()); }
389
390 // Arithmetic: we just do arithmetic on pos. We don't even need to
391 // do bounds checking, since STL doesn't consider that it's job. :-)
392 // NOTE: this is not amortized constant time! What do we do about it?
393 void advance_past_end() { // used when col_current points to end()
394 while ( col_current == row_current->TWOD_END_() ) { // end of current row
395 ++row_current; // go to beginning of next
396 if ( row_current != row_end ) // col is irrelevant at end
397 col_current = row_current->TWOD_BEGIN_();
398 else
399 break; // don't go past row_end
400 }
401 }
402
403 iterator& operator++() {
404 assert(row_current != row_end); // how to ++ from there?
405 ++col_current;
406 advance_past_end(); // in case col_current is at end()
407 return *this;
408 }
409 iterator& operator--() {
410 while ( row_current == row_end ||
411 col_current == row_current->TWOD_BEGIN_() ) {
412 assert(row_current != row_begin);
413 --row_current;
414 col_current = row_current->TWOD_END_(); // this is 1 too far
415 }
416 --col_current;
417 return *this;
418 }
419 iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
420 iterator operator--(int) { iterator tmp(*this); --*this; return tmp; }
421
422
423 // Comparisons.
424 bool operator==(const iterator& it) const {
425 return ( row_begin == it.row_begin &&
426 row_end == it.row_end &&
427 row_current == it.row_current &&
428 (row_current == row_end || col_current == it.col_current) );
429 }
430 bool operator!=(const iterator& it) const { return !(*this == it); }
431
432
433 // Here's the info we actually need to be an iterator
434 // These need to be public so we convert from iterator to const_iterator
435 typename containertype::iterator row_begin, row_end, row_current;
436 typename containertype::value_type::TWOD_ITER_ col_current;
437 };
438
439 // The same thing again, but this time const. :-(
440 template <class containertype>
441 class const_two_d_iterator {
442 public:
443 typedef const_two_d_iterator iterator;
444
445 typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category;
446 // apparently some versions of VC++ have trouble with two ::'s in a typename
447 typedef typename containertype::value_type _tmp_vt;
448 typedef typename _tmp_vt::value_type value_type;
449 typedef typename _tmp_vt::difference_type difference_type;
450 typedef typename _tmp_vt::const_reference reference;
451 typedef typename _tmp_vt::const_pointer pointer;
452
453 const_two_d_iterator(typename containertype::const_iterator begin,
454 typename containertype::const_iterator end,
455 typename containertype::const_iterator curr)
456 : row_begin(begin), row_end(end), row_current(curr), col_current() {
457 if ( curr != end ) {
458 col_current = curr->TWOD_BEGIN_();
459 advance_past_end(); // in case cur->begin() == cur->end()
460 }
461 }
462 const_two_d_iterator(typename containertype::const_iterator begin,
463 typename containertype::const_iterator end,
464 typename containertype::const_iterator curr,
465 typename containertype::value_type::TWOD_CONST_ITER_ col)
466 : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
467 advance_past_end(); // in case cur->begin() == cur->end()
468 }
469 const_two_d_iterator()
470 : row_begin(), row_end(), row_current(), col_current() {
471 }
472 // Need this explicitly so we can convert normal iterators to const iterators
473 const_two_d_iterator(const two_d_iterator<containertype>& it) :
474 row_begin(it.row_begin), row_end(it.row_end), row_current(it.row_current),
475 col_current(it.col_current) { }
476
477 typename containertype::const_iterator row_begin, row_end, row_current;
478 typename containertype::value_type::TWOD_CONST_ITER_ col_current;
479
480
481 // EVERYTHING FROM HERE DOWN IS THE SAME AS THE NON-CONST ITERATOR
482 reference operator*() const { return *col_current; }
483 pointer operator->() const { return &(operator*()); }
484
485 void advance_past_end() { // used when col_current points to end()
486 while ( col_current == row_current->TWOD_END_() ) { // end of current row
487 ++row_current; // go to beginning of next
488 if ( row_current != row_end ) // col is irrelevant at end
489 col_current = row_current->TWOD_BEGIN_();
490 else
491 break; // don't go past row_end
492 }
493 }
494 iterator& operator++() {
495 assert(row_current != row_end); // how to ++ from there?
496 ++col_current;
497 advance_past_end(); // in case col_current is at end()
498 return *this;
499 }
500 iterator& operator--() {
501 while ( row_current == row_end ||
502 col_current == row_current->TWOD_BEGIN_() ) {
503 assert(row_current != row_begin);
504 --row_current;
505 col_current = row_current->TWOD_END_(); // this is 1 too far
506 }
507 --col_current;
508 return *this;
509 }
510 iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
511 iterator operator--(int) { iterator tmp(*this); --*this; return tmp; }
512
513 bool operator==(const iterator& it) const {
514 return ( row_begin == it.row_begin &&
515 row_end == it.row_end &&
516 row_current == it.row_current &&
517 (row_current == row_end || col_current == it.col_current) );
518 }
519 bool operator!=(const iterator& it) const { return !(*this == it); }
520 };
521
522 // We provide yet another version, to be as frugal with memory as
523 // possible. This one frees each block of memory as it finishes
524 // iterating over it. By the end, the entire table is freed.
525 // For understandable reasons, you can only iterate over it once,
526 // which is why it's an input iterator
527 template <class containertype>
528 class destructive_two_d_iterator {
529 public:
530 typedef destructive_two_d_iterator iterator;
531
532 typedef STL_NAMESPACE::input_iterator_tag iterator_category;
533 // apparently some versions of VC++ have trouble with two ::'s in a typename
534 typedef typename containertype::value_type _tmp_vt;
535 typedef typename _tmp_vt::value_type value_type;
536 typedef typename _tmp_vt::difference_type difference_type;
537 typedef typename _tmp_vt::reference reference;
538 typedef typename _tmp_vt::pointer pointer;
539
540 destructive_two_d_iterator(typename containertype::iterator begin,
541 typename containertype::iterator end,
542 typename containertype::iterator curr)
543 : row_begin(begin), row_end(end), row_current(curr), col_current() {
544 if ( curr != end ) {
545 col_current = curr->TWOD_BEGIN_();
546 advance_past_end(); // in case cur->begin() == cur->end()
547 }
548 }
549 destructive_two_d_iterator(typename containertype::iterator begin,
550 typename containertype::iterator end,
551 typename containertype::iterator curr,
552 typename containertype::value_type::TWOD_ITER_ col)
553 : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
554 advance_past_end(); // in case cur->begin() == cur->end()
555 }
556 destructive_two_d_iterator()
557 : row_begin(), row_end(), row_current(), col_current() {
558 }
559
560 typename containertype::iterator row_begin, row_end, row_current;
561 typename containertype::value_type::TWOD_ITER_ col_current;
562
563 // This is the part that destroys
564 void advance_past_end() { // used when col_current points to end()
565 while ( col_current == row_current->TWOD_END_() ) { // end of current row
566 row_current->clear(); // the destructive part
567 // It would be nice if we could decrement sparsetable->num_buckets here
568 ++row_current; // go to beginning of next
569 if ( row_current != row_end ) // col is irrelevant at end
570 col_current = row_current->TWOD_BEGIN_();
571 else
572 break; // don't go past row_end
573 }
574 }
575
576 // EVERYTHING FROM HERE DOWN IS THE SAME AS THE REGULAR ITERATOR
577 reference operator*() const { return *col_current; }
578 pointer operator->() const { return &(operator*()); }
579
580 iterator& operator++() {
581 assert(row_current != row_end); // how to ++ from there?
582 ++col_current;
583 advance_past_end(); // in case col_current is at end()
584 return *this;
585 }
586 iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
587
588 bool operator==(const iterator& it) const {
589 return ( row_begin == it.row_begin &&
590 row_end == it.row_end &&
591 row_current == it.row_current &&
592 (row_current == row_end || col_current == it.col_current) );
593 }
594 bool operator!=(const iterator& it) const { return !(*this == it); }
595 };
596
597 #undef TWOD_BEGIN_
598 #undef TWOD_END_
599 #undef TWOD_ITER_
600 #undef TWOD_CONST_ITER_
601
602
603
604
605 // SPARSE-TABLE
606 // ------------
607 // The idea is that a table with (logically) t buckets is divided
608 // into t/M *groups* of M buckets each. (M is a constant set in
609 // GROUP_SIZE for efficiency.) Each group is stored sparsely.
610 // Thus, inserting into the table causes some array to grow, which is
611 // slow but still constant time. Lookup involves doing a
612 // logical-position-to-sparse-position lookup, which is also slow but
613 // constant time. The larger M is, the slower these operations are
614 // but the less overhead (slightly).
615 //
616 // To store the sparse array, we store a bitmap B, where B[i] = 1 iff
617 // bucket i is non-empty. Then to look up bucket i we really look up
618 // array[# of 1s before i in B]. This is constant time for fixed M.
619 //
620 // Terminology: the position of an item in the overall table (from
621 // 1 .. t) is called its "location." The logical position in a group
622 // (from 1 .. M ) is called its "position." The actual location in
623 // the array (from 1 .. # of non-empty buckets in the group) is
624 // called its "offset."
625
626 // The weird mod in the offset is entirely to quiet compiler warnings
627 // as is the cast to int after doing the "x mod 256"
628 #define PUT_(take_from, offset) do { \
629 if (putc(static_cast<int>(((take_from) >> ((offset) % (sizeof(take_from)*8)))\
630 % 256), fp) \
631 == EOF) \
632 return false; \
633 } while (0)
634
635 #define GET_(add_to, offset) do { \
636 if ((x=getc(fp)) == EOF) \
637 return false; \
638 else \
639 add_to |= (static_cast<size_type>(x) << ((offset) % (sizeof(add_to)*8))); \
640 } while (0)
641
642 template <class T, u_int16_t GROUP_SIZE>
643 class sparsegroup {
644 public:
645 // Basic types
646 typedef T value_type;
647 typedef value_type* pointer;
648 typedef const value_type* const_pointer;
649 typedef table_iterator<sparsegroup<T, GROUP_SIZE> > iterator;
650 typedef const_table_iterator<sparsegroup<T, GROUP_SIZE> > const_iterator;
651 typedef table_element_adaptor<sparsegroup<T, GROUP_SIZE> > element_adaptor;
652 typedef value_type &reference;
653 typedef const value_type &const_reference;
654 typedef u_int16_t size_type; // max # of buckets
655 typedef int16_t difference_type;
656 typedef STL_NAMESPACE::reverse_iterator<const_iterator> const_reverse_iterator;
657 typedef STL_NAMESPACE::reverse_iterator<iterator> reverse_iterator;
658
659 // These are our special iterators, that go over non-empty buckets in a
660 // group. These aren't const-only because you can change non-empty bcks.
661 typedef pointer nonempty_iterator;
662 typedef const_pointer const_nonempty_iterator;
663 typedef STL_NAMESPACE::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
664 typedef STL_NAMESPACE::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator;
665
666 // Iterator functions
667 iterator begin() { return iterator(this, 0); }
668 const_iterator begin() const { return const_iterator(this, 0); }
669 iterator end() { return iterator(this, size()); }
670 const_iterator end() const { return const_iterator(this, size()); }
671 reverse_iterator rbegin() { return reverse_iterator(end()); }
672 const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
673 reverse_iterator rend() { return reverse_iterator(begin()); }
674 const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
675
676 // We'll have versions for our special non-empty iterator too
677 nonempty_iterator nonempty_begin() { return group; }
678 const_nonempty_iterator nonempty_begin() const { return group; }
679 nonempty_iterator nonempty_end() { return group + num_buckets; }
680 const_nonempty_iterator nonempty_end() const { return group + num_buckets; }
681 reverse_nonempty_iterator nonempty_rbegin() {
682 return reverse_nonempty_iterator(nonempty_end());
683 }
684 const_reverse_nonempty_iterator nonempty_rbegin() const {
685 return const_reverse_nonempty_iterator(nonempty_end());
686 }
687 reverse_nonempty_iterator nonempty_rend() {
688 return reverse_nonempty_iterator(nonempty_begin());
689 }
690 const_reverse_nonempty_iterator nonempty_rend() const {
691 return const_reverse_nonempty_iterator(nonempty_begin());
692 }
693
694
695 // This gives us the "default" value to return for an empty bucket.
696 // We just use the default constructor on T, the template type
697 const_reference default_value() const {
698 static value_type defaultval = value_type();
699 return defaultval;
700 }
701
702
703 private:
704 // We need to do all this bit manipulation, of course. ick
705 static size_type charbit(size_type i) { return i >> 3; }
706 static size_type modbit(size_type i) { return 1 << (i&7); }
707 int bmtest(size_type i) const { return bitmap[charbit(i)] & modbit(i); }
708 void bmset(size_type i) { bitmap[charbit(i)] |= modbit(i); }
709 void bmclear(size_type i) { bitmap[charbit(i)] &= ~modbit(i); }
710
711 void* realloc_or_die(void* ptr, size_t num_bytes) {
712 void* retval = realloc(ptr, num_bytes);
713 if (retval == NULL) {
714 // We really should use PRIuS here, but I don't want to have to add
715 // a whole new configure option, with concomitant macro namespace
716 // pollution, just to print this (unlikely) error message. So I cast.
717 fprintf(stderr, "FATAL ERROR: failed to allocate %lu bytes for ptr %p",
718 static_cast<unsigned long>(num_bytes), ptr);
719 exit(1);
720 }
721 return retval;
722 }
723
724 value_type* allocate_group(size_t n) {
725 return static_cast<value_type*>(realloc_or_die(NULL,
726 n * sizeof(value_type)));
727 }
728
729 void free_group() {
730 // Valid even for empty group, because NULL+0 is defined to be NULL
731 value_type* end_it = group + num_buckets;
732 for (value_type* p = group; p != end_it; ++p)
733 p->~value_type();
734 free(group);
735 group = NULL;
736 }
737
738 public: // get_iter() in sparsetable needs it
739 // We need a small function that tells us how many set bits there are
740 // in positions 0..i-1 of the bitmap. It uses a big table.
741 // We make it static so templates don't allocate lots of these tables
742 static size_type pos_to_offset(const unsigned char *bm, size_type pos) {
743 // We could make these ints. The tradeoff is size (eg does it overwhelm
744 // the cache?) vs efficiency in referencing sub-word-sized array elements
745 static const char bits_in[256] = { // # of bits set in one char
746 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
747 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
748 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
749 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
750 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
751 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
752 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
753 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
754 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
755 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
756 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
757 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
758 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
759 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
760 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
761 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
762 };
763 size_type retval = 0;
764
765 // [Note: condition pos > 8 is an optimization; convince yourself we
766 // give exactly the same result as if we had pos >= 8 here instead.]
767 for ( ; pos > 8; pos -= 8 ) // bm[0..pos/8-1]
768 retval += bits_in[*bm++]; // chars we want *all* bits in
769 return retval + bits_in[*bm & ((1 << pos)-1)]; // the char that includes pos
770 }
771
772 size_type pos_to_offset(size_type pos) const { // not static but still const
773 return pos_to_offset(bitmap, pos);
774 }
775
776
777 public:
778 // Constructors -- default and copy -- and destructor
779 sparsegroup() : group(0), num_buckets(0) { memset(bitmap, 0, sizeof(bitmap)); }
780 sparsegroup(const sparsegroup& x) : group(0), num_buckets(x.num_buckets) {
781 if ( num_buckets ) {
782 group = allocate_group(x.num_buckets);
783 uninitialized_copy(x.group, x.group + x.num_buckets, group);
784 }
785 memcpy(bitmap, x.bitmap, sizeof(bitmap));
786 }
787 ~sparsegroup() { free_group(); }
788
789 // Operator= is just like the copy constructor, I guess
790 // TODO(austern): Make this exception safe. Handle exceptions in value_type's
791 // copy constructor.
792 sparsegroup &operator=(const sparsegroup& x) {
793 if ( &x == this ) return *this; // x = x
794 if ( x.num_buckets == 0 ) {
795 free_group();
796 } else {
797 value_type* p = allocate_group(x.num_buckets);
798 uninitialized_copy(x.group, x.group + x.num_buckets, p);
799 free_group();
800 group = p;
801 }
802 memcpy(bitmap, x.bitmap, sizeof(bitmap));
803 num_buckets = x.num_buckets;
804 return *this;
805 }
806
807 // Many STL algorithms use swap instead of copy constructors
808 void swap(sparsegroup& x) {
809 STL_NAMESPACE::swap(group, x.group);
810 for ( int i = 0; i < sizeof(bitmap) / sizeof(*bitmap); ++i )
811 STL_NAMESPACE::swap(bitmap[i], x.bitmap[i]); // swap not defined on arrays
812 STL_NAMESPACE::swap(num_buckets, x.num_buckets);
813 }
814
815 // It's always nice to be able to clear a table without deallocating it
816 void clear() {
817 free_group();
818 memset(bitmap, 0, sizeof(bitmap));
819 num_buckets = 0;
820 }
821
822 // Functions that tell you about size. Alas, these aren't so useful
823 // because our table is always fixed size.
824 size_type size() const { return GROUP_SIZE; }
825 size_type max_size() const { return GROUP_SIZE; }
826 bool empty() const { return false; }
827 // We also may want to know how many *used* buckets there are
828 size_type num_nonempty() const { return num_buckets; }
829
830
831 // get()/set() are explicitly const/non-const. You can use [] if
832 // you want something that can be either (potentially more expensive).
833 const_reference get(size_type i) const {
834 if ( bmtest(i) ) // bucket i is occupied
835 return group[pos_to_offset(bitmap, i)];
836 else
837 return default_value(); // return the default reference
838 }
839
840 // TODO(csilvers): make protected + friend
841 reference mutating_get(size_type i) { // fills bucket i before getting
842 if ( !bmtest(i) )
843 set(i, default_value());
844 return group[pos_to_offset(bitmap, i)];
845 }
846
847 // Syntactic sugar. It's easy to return a const reference. To
848 // return a non-const reference, we need to use the assigner adaptor.
849 const_reference operator[](size_type i) const {
850 return get(i);
851 }
852
853 element_adaptor operator[](size_type i) {
854 return element_adaptor(this, i);
855 }
856
857 private:
858 // Create space at group[offset], assuming value_type has trivial
859 // copy constructor and destructor. (Really, we want it to have
860 // "trivial move", because that's what realloc and memmove both do.
861 // But there's no way to capture that using type_traits, so we
862 // pretend that move(x, y) is equivalent to "x.~T(); new(x) T(y);"
863 // which is pretty much correct, if a bit conservative.)
864 void set_aux(size_type offset, true_type) {
865 group = (value_type *)
866 realloc_or_die(group, sizeof(*group) * (num_buckets+1));
867 // This is equivalent to memmove(), but faster on my Intel P4,
868 // at least with gcc4.1 -O2 / glibc 2.3.6.
869 for (size_type i = num_buckets; i > offset; --i)
870 memcpy(group + i, group + i-1, sizeof(*group));
871 }
872
873 // Create space at group[offset], without special assumptions about value_type
874 void set_aux(size_type offset, false_type) {
875 // This is valid because 0 <= offset <= num_buckets
876 value_type* p = allocate_group(num_buckets + 1);
877 uninitialized_copy(group, group + offset, p);
878 uninitialized_copy(group + offset, group + num_buckets, p + offset + 1);
879 free_group();
880 group = p;
881 }
882
883 public:
884 // This returns a reference to the inserted item (which is a copy of val).
885 // TODO(austern): Make this exception safe: handle exceptions from
886 // value_type's copy constructor.
887 reference set(size_type i, const_reference val) {
888 size_type offset = pos_to_offset(bitmap, i); // where we'll find (or insert)
889 if ( bmtest(i) ) {
890 // Delete the old value, which we're replacing with the new one
891 group[offset].~value_type();
892 } else {
893 typedef integral_constant<bool,
894 (has_trivial_copy<value_type>::value &&
895 has_trivial_destructor<value_type>::value)>
896 realloc_and_memmove_ok; // we pretend mv(x,y) == "x.~T(); new(x) T(y)"
897 set_aux(offset, realloc_and_memmove_ok());
898 ++num_buckets;
899 bmset(i);
900 }
901 // This does the actual inserting. Since we made the array using
902 // malloc, we use "placement new" to just call the constructor.
903 new(&group[offset]) value_type(val);
904 return group[offset];
905 }
906
907 // We let you see if a bucket is non-empty without retrieving it
908 bool test(size_type i) const {
909 return bmtest(i) ? true : false; // cast an int to a bool
910 }
911 bool test(iterator pos) const {
912 return bmtest(pos.pos) ? true : false;
913 }
914
915 private:
916 // Shrink the array, assuming value_type has trivial copy
917 // constructor and destructor. (Really, we want it to have "trivial
918 // move", because that's what realloc and memmove both do. But
919 // there's no way to capture that using type_traits, so we pretend
920 // that move(x, y) is equivalent to ""x.~T(); new(x) T(y);"
921 // which is pretty much correct, if a bit conservative.)
922 void erase_aux(size_type offset, true_type) {
923 // This isn't technically necessary, since we know we have a
924 // trivial destructor, but is a cheap way to get a bit more safety.
925 group[offset].~value_type();
926 // This is equivalent to memmove(), but faster on my Intel P4,
927 // at lesat with gcc4.1 -O2 / glibc 2.3.6.
928 assert(num_buckets > 0);
929 for (size_type i = offset; i < num_buckets-1; ++i)
930 memcpy(group + i, group + i+1, sizeof(*group)); // hopefully inlined!
931 group = (value_type *)
932 realloc_or_die(group, sizeof(*group) * (num_buckets-1));
933 }
934
935 // Shrink the array, without any special assumptions about value_type.
936 void erase_aux(size_type offset, false_type) {
937 // This is valid because 0 <= offset < num_buckets. Note the inequality.
938 value_type* p = allocate_group(num_buckets - 1);
939 uninitialized_copy(group, group + offset, p);
940 uninitialized_copy(group + offset + 1, group + num_buckets, p + offset);
941 free_group();
942 group = p;
943 }
944
945 public:
946 // This takes the specified elements out of the group. This is
947 // "undefining", rather than "clearing".
948 // TODO(austern): Make this exception safe: handle exceptions from
949 // value_type's copy constructor.
950 void erase(size_type i) {
951 if ( bmtest(i) ) { // trivial to erase empty bucket
952 size_type offset = pos_to_offset(bitmap,i); // where we'll find (or insert)
953 if ( num_buckets == 1 ) {
954 free_group();
955 group = NULL;
956 } else {
957 typedef integral_constant<bool,
958 (has_trivial_copy<value_type>::value &&
959 has_trivial_destructor<value_type>::value)>
960 realloc_and_memmove_ok; // pretend mv(x,y) == "x.~T(); new(x) T(y)"
961 erase_aux(offset, realloc_and_memmove_ok());
962 }
963 --num_buckets;
964 bmclear(i);
965 }
966 }
967
968 void erase(iterator pos) {
969 erase(pos.pos);
970 }
971
972 void erase(iterator start_it, iterator end_it) {
973 // This could be more efficient, but to do so we'd need to make
974 // bmclear() clear a range of indices. Doesn't seem worth it.
975 for ( ; start_it != end_it; ++start_it )
976 erase(start_it);
977 }
978
979
980 // I/O
981 // We support reading and writing groups to disk. We don't store
982 // the actual array contents (which we don't know how to store),
983 // just the bitmap and size. Meant to be used with table I/O.
984 // Returns true if all was ok
985 bool write_metadata(FILE *fp) const {
986 assert(sizeof(num_buckets) == 2); // we explicitly set to u_int16_t
987 PUT_(num_buckets, 8);
988 PUT_(num_buckets, 0);
989 if ( !fwrite(bitmap, sizeof(bitmap), 1, fp) ) return false;
990 return true;
991 }
992
993 // Reading destroys the old group contents! Returns true if all was ok
994 bool read_metadata(FILE *fp) {
995 clear();
996
997 int x; // the GET_ macro requires an 'int x' to be defined
998 GET_(num_buckets, 8);
999 GET_(num_buckets, 0);
1000
1001 if ( !fread(bitmap, sizeof(bitmap), 1, fp) ) return false;
1002
1003 // We'll allocate the space, but we won't fill it: it will be
1004 // left as uninitialized raw memory.
1005 group = allocate_group(num_buckets);
1006 return true;
1007 }
1008
1009 // If your keys and values are simple enough, we can write them
1010 // to disk for you. "simple enough" means POD and no pointers.
1011 // However, we don't try to normalize endianness
1012 bool write_nopointer_data(FILE *fp) const {
1013 for ( const_nonempty_iterator it = nonempty_begin();
1014 it != nonempty_end(); ++it ) {
1015 if ( !fwrite(&*it, sizeof(*it), 1, fp) ) return false;
1016 }
1017 return true;
1018 }
1019
1020 // When reading, we have to override the potential const-ness of *it.
1021 // Again, only meaningful if value_type is a POD.
1022 bool read_nopointer_data(FILE *fp) {
1023 for ( nonempty_iterator it = nonempty_begin();
1024 it != nonempty_end(); ++it ) {
1025 if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
1026 return false;
1027 }
1028 return true;
1029 }
1030
1031 // Comparisons. Note the comparisons are pretty arbitrary: we
1032 // compare values of the first index that isn't equal (using default
1033 // value for empty buckets).
1034 bool operator==(const sparsegroup& x) const {
1035 return ( num_buckets == x.num_buckets &&
1036 memcmp(bitmap, x.bitmap, sizeof(bitmap)) == 0 &&
1037 STL_NAMESPACE::equal(begin(), end(), x.begin()) ); // from algorithm
1038 }
1039 bool operator<(const sparsegroup& x) const { // also from algorithm
1040 return STL_NAMESPACE::lexicographical_compare(begin(), end(),
1041 x.begin(), x.end());
1042 }
1043 bool operator!=(const sparsegroup& x) const { return !(*this == x); }
1044 bool operator<=(const sparsegroup& x) const { return !(x < *this); }
1045 bool operator>(const sparsegroup& x) const { return x < *this; }
1046 bool operator>=(const sparsegroup& x) const { return !(*this < x); }
1047
1048 private:
1049 // The actual data
1050 value_type *group; // (small) array of T's
1051 unsigned char bitmap[(GROUP_SIZE-1)/8 + 1]; // fancy math is so we round up
1052 size_type num_buckets; // limits GROUP_SIZE to 64K
1053 };
1054
1055 // We need a global swap as well
1056 template <class T, u_int16_t GROUP_SIZE>
1057 inline void swap(sparsegroup<T,GROUP_SIZE> &x, sparsegroup<T,GROUP_SIZE> &y) {
1058 x.swap(y);
1059 }
1060
1061 // ---------------------------------------------------------------------------
1062
1063
1064 template <class T, u_int16_t GROUP_SIZE = DEFAULT_SPARSEGROUP_SIZE>
1065 class sparsetable {
1066 public:
1067 // Basic types
1068 typedef T value_type; // stolen from stl_vector.h
1069 typedef value_type* pointer;
1070 typedef const value_type* const_pointer;
1071 typedef table_iterator<sparsetable<T, GROUP_SIZE> > iterator;
1072 typedef const_table_iterator<sparsetable<T, GROUP_SIZE> > const_iterator;
1073 typedef table_element_adaptor<sparsetable<T, GROUP_SIZE> > element_adaptor;
1074 typedef value_type &reference;
1075 typedef const value_type &const_reference;
1076 typedef size_t size_type;
1077 typedef ptrdiff_t difference_type;
1078 typedef STL_NAMESPACE::reverse_iterator<const_iterator> const_reverse_iterator;
1079 typedef STL_NAMESPACE::reverse_iterator<iterator> reverse_iterator;
1080
1081 // These are our special iterators, that go over non-empty buckets in a
1082 // table. These aren't const only because you can change non-empty bcks.
1083 typedef two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
1084 nonempty_iterator;
1085 typedef const_two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
1086 const_nonempty_iterator;
1087 typedef STL_NAMESPACE::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
1088 typedef STL_NAMESPACE::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator;
1089 // Another special iterator: it frees memory as it iterates (used to resize)
1090 typedef destructive_two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
1091 destructive_iterator;
1092
1093 // Iterator functions
1094 iterator begin() { return iterator(this, 0); }
1095 const_iterator begin() const { return const_iterator(this, 0); }
1096 iterator end() { return iterator(this, size()); }
1097 const_iterator end() const { return const_iterator(this, size()); }
1098 reverse_iterator rbegin() { return reverse_iterator(end()); }
1099 const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
1100 reverse_iterator rend() { return reverse_iterator(begin()); }
1101 const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
1102
1103 // Versions for our special non-empty iterator
1104 nonempty_iterator nonempty_begin() {
1105 return nonempty_iterator(groups.begin(), groups.end(), groups.begin());
1106 }
1107 const_nonempty_iterator nonempty_begin() const {
1108 return const_nonempty_iterator(groups.begin(),groups.end(), groups.begin());
1109 }
1110 nonempty_iterator nonempty_end() {
1111 return nonempty_iterator(groups.begin(), groups.end(), groups.end());
1112 }
1113 const_nonempty_iterator nonempty_end() const {
1114 return const_nonempty_iterator(groups.begin(), groups.end(), groups.end());
1115 }
1116 reverse_nonempty_iterator nonempty_rbegin() {
1117 return reverse_nonempty_iterator(nonempty_end());
1118 }
1119 const_reverse_nonempty_iterator nonempty_rbegin() const {
1120 return const_reverse_nonempty_iterator(nonempty_end());
1121 }
1122 reverse_nonempty_iterator nonempty_rend() {
1123 return reverse_nonempty_iterator(nonempty_begin());
1124 }
1125 const_reverse_nonempty_iterator nonempty_rend() const {
1126 return const_reverse_nonempty_iterator(nonempty_begin());
1127 }
1128 destructive_iterator destructive_begin() {
1129 return destructive_iterator(groups.begin(), groups.end(), groups.begin());
1130 }
1131 destructive_iterator destructive_end() {
1132 return destructive_iterator(groups.begin(), groups.end(), groups.end());
1133 }
1134
1135 private:
1136 typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::reference
1137 GroupsReference;
1138 typedef typename
1139 vector< sparsegroup<value_type, GROUP_SIZE> >::const_reference
1140 GroupsConstReference;
1141 typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::iterator
1142 GroupsIterator;
1143 typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::const_iterator
1144 GroupsConstIterator;
1145
1146 // How to deal with the proper group
1147 static size_type num_groups(size_type num) { // how many to hold num buckets
1148 return num == 0 ? 0 : ((num-1) / GROUP_SIZE) + 1;
1149 }
1150
1151 u_int16_t pos_in_group(size_type i) const {
1152 return static_cast<u_int16_t>(i % GROUP_SIZE);
1153 }
1154 size_type group_num(size_type i) const {
1155 return i / GROUP_SIZE;
1156 }
1157 GroupsReference which_group(size_type i) {
1158 return groups[group_num(i)];
1159 }
1160 GroupsConstReference which_group(size_type i) const {
1161 return groups[group_num(i)];
1162 }
1163
1164 public:
1165 // Constructors -- default, normal (when you specify size), and copy
1166 sparsetable(size_type sz = 0)
1167 : groups(num_groups(sz)), table_size(sz), num_buckets(0) { }
1168 // We'll can get away with using the default copy constructor,
1169 // and default destructor, and hence the default operator=. Huzzah!
1170
1171 // Many STL algorithms use swap instead of copy constructors
1172 void swap(sparsetable& x) {
1173 STL_NAMESPACE::swap(groups, x.groups);
1174 STL_NAMESPACE::swap(table_size, x.table_size);
1175 STL_NAMESPACE::swap(num_buckets, x.num_buckets);
1176 }
1177
1178 // It's always nice to be able to clear a table without deallocating it
1179 void clear() {
1180 GroupsIterator group;
1181 for ( group = groups.begin(); group != groups.end(); ++group ) {
1182 group->clear();
1183 }
1184 num_buckets = 0;
1185 }
1186
1187 // Functions that tell you about size.
1188 // NOTE: empty() is non-intuitive! It does not tell you the number
1189 // of not-empty buckets (use num_nonempty() for that). Instead
1190 // it says whether you've allocated any buckets or not.
1191 size_type size() const { return table_size; }
1192 size_type max_size() const { return size_type(-1); }
1193 bool empty() const { return table_size == 0; }
1194 // We also may want to know how many *used* buckets there are
1195 size_type num_nonempty() const { return num_buckets; }
1196
1197 // OK, we'll let you resize one of these puppies
1198 void resize(size_type new_size) {
1199 groups.resize(num_groups(new_size));
1200 if ( new_size < table_size) { // lower num_buckets, clear last group
1201 if ( pos_in_group(new_size) > 0 ) // need to clear inside last group
1202 groups.back().erase(groups.back().begin() + pos_in_group(new_size),
1203 groups.back().end());
1204 num_buckets = 0; // refigure # of used buckets
1205 GroupsConstIterator group;
1206 for ( group = groups.begin(); group != groups.end(); ++group )
1207 num_buckets += group->num_nonempty();
1208 }
1209 table_size = new_size;
1210 }
1211
1212
1213 // We let you see if a bucket is non-empty without retrieving it
1214 bool test(size_type i) const {
1215 return which_group(i).test(pos_in_group(i));
1216 }
1217 bool test(iterator pos) const {
1218 return which_group(pos.pos).test(pos_in_group(pos.pos));
1219 }
1220 bool test(const_iterator pos) const {
1221 return which_group(pos.pos).test(pos_in_group(pos.pos));
1222 }
1223
1224 // We only return const_references because it's really hard to
1225 // return something settable for empty buckets. Use set() instead.
1226 const_reference get(size_type i) const {
1227 assert(i < table_size);
1228 return which_group(i).get(pos_in_group(i));
1229 }
1230
1231 // TODO(csilvers): make protected + friend element_adaptor
1232 reference mutating_get(size_type i) { // fills bucket i before getting
1233 assert(i < table_size);
1234 size_type old_numbuckets = which_group(i).num_nonempty();
1235 reference retval = which_group(i).mutating_get(pos_in_group(i));
1236 num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1237 return retval;
1238 }
1239
1240 // Syntactic sugar. As in sparsegroup, the non-const version is harder
1241 const_reference operator[](size_type i) const {
1242 return get(i);
1243 }
1244
1245 element_adaptor operator[](size_type i) {
1246 return element_adaptor(this, i);
1247 }
1248
1249 // Needed for hashtables, gets as a nonempty_iterator. Crashes for empty bcks
1250 const_nonempty_iterator get_iter(size_type i) const {
1251 assert(test(i)); // how can a nonempty_iterator point to an empty bucket?
1252 return const_nonempty_iterator(
1253 groups.begin(), groups.end(),
1254 groups.begin() + group_num(i),
1255 (groups[group_num(i)].nonempty_begin() +
1256 groups[group_num(i)].pos_to_offset(pos_in_group(i))));
1257 }
1258 // For nonempty we can return a non-const version
1259 nonempty_iterator get_iter(size_type i) {
1260 assert(test(i)); // how can a nonempty_iterator point to an empty bucket?
1261 return nonempty_iterator(
1262 groups.begin(), groups.end(),
1263 groups.begin() + group_num(i),
1264 (groups[group_num(i)].nonempty_begin() +
1265 groups[group_num(i)].pos_to_offset(pos_in_group(i))));
1266 }
1267
1268
1269 // This returns a reference to the inserted item (which is a copy of val)
1270 // The trick is to figure out whether we're replacing or inserting anew
1271 reference set(size_type i, const_reference val) {
1272 assert(i < table_size);
1273 size_type old_numbuckets = which_group(i).num_nonempty();
1274 reference retval = which_group(i).set(pos_in_group(i), val);
1275 num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1276 return retval;
1277 }
1278
1279 // This takes the specified elements out of the table. This is
1280 // "undefining", rather than "clearing".
1281 void erase(size_type i) {
1282 assert(i < table_size);
1283 size_type old_numbuckets = which_group(i).num_nonempty();
1284 which_group(i).erase(pos_in_group(i));
1285 num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1286 }
1287
1288 void erase(iterator pos) {
1289 erase(pos.pos);
1290 }
1291
1292 void erase(iterator start_it, iterator end_it) {
1293 // This could be more efficient, but then we'd need to figure
1294 // out if we spanned groups or not. Doesn't seem worth it.
1295 for ( ; start_it != end_it; ++start_it )
1296 erase(start_it);
1297 }
1298
1299
1300 // We support reading and writing tables to disk. We don't store
1301 // the actual array contents (which we don't know how to store),
1302 // just the groups and sizes. Returns true if all went ok.
1303
1304 private:
1305 // Every time the disk format changes, this should probably change too
1306 static const unsigned long MAGIC_NUMBER = 0x24687531;
1307
1308 // Old versions of this code write all data in 32 bits. We need to
1309 // support these files as well as having support for 64-bit systems.
1310 // So we use the following encoding scheme: for values < 2^32-1, we
1311 // store in 4 bytes in big-endian order. For values > 2^32, we
1312 // store 0xFFFFFFF followed by 8 bytes in big-endian order. This
1313 // causes us to mis-read old-version code that stores exactly
1314 // 0xFFFFFFF, but I don't think that is likely to have happened for
1315 // these particular values.
1316 static bool write_32_or_64(FILE* fp, size_type value) {
1317 if ( value < 0xFFFFFFFFULL ) { // fits in 4 bytes
1318 PUT_(value, 24);
1319 PUT_(value, 16);
1320 PUT_(value, 8);
1321 PUT_(value, 0);
1322 } else if ( value == 0xFFFFFFFFUL ) { // special case in 32bit systems
1323 PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); // marker
1324 PUT_(0, 0); PUT_(0, 0); PUT_(0, 0); PUT_(0, 0);
1325 PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0);
1326 } else {
1327 PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); // marker
1328 PUT_(value, 56);
1329 PUT_(value, 48);
1330 PUT_(value, 40);
1331 PUT_(value, 32);
1332 PUT_(value, 24);
1333 PUT_(value, 16);
1334 PUT_(value, 8);
1335 PUT_(value, 0);
1336 }
1337 return true;
1338 }
1339
1340 static bool read_32_or_64(FILE* fp, size_type *value) { // reads into value
1341 size_type first4 = 0;
1342 int x;
1343 GET_(first4, 24);
1344 GET_(first4, 16);
1345 GET_(first4, 8);
1346 GET_(first4, 0);
1347 if ( first4 < 0xFFFFFFFFULL ) {
1348 *value = first4;
1349 } else {
1350 GET_(*value, 56);
1351 GET_(*value, 48);
1352 GET_(*value, 40);
1353 GET_(*value, 32);
1354 GET_(*value, 24);
1355 GET_(*value, 16);
1356 GET_(*value, 8);
1357 GET_(*value, 0);
1358 }
1359 return true;
1360 }
1361
1362 public:
1363 bool write_metadata(FILE *fp) const {
1364 if ( !write_32_or_64(fp, MAGIC_NUMBER) ) return false;
1365 if ( !write_32_or_64(fp, table_size) ) return false;
1366 if ( !write_32_or_64(fp, num_buckets) ) return false;
1367
1368 GroupsConstIterator group;
1369 for ( group = groups.begin(); group != groups.end(); ++group )
1370 if ( group->write_metadata(fp) == false ) return false;
1371 return true;
1372 }
1373
1374 // Reading destroys the old table contents! Returns true if read ok.
1375 bool read_metadata(FILE *fp) {
1376 size_type magic_read = 0;
1377 if ( !read_32_or_64(fp, &magic_read) ) return false;
1378 if ( magic_read != MAGIC_NUMBER ) {
1379 clear(); // just to be consistent
1380 return false;
1381 }
1382
1383 if ( !read_32_or_64(fp, &table_size) ) return false;
1384 if ( !read_32_or_64(fp, &num_buckets) ) return false;
1385
1386 resize(table_size); // so the vector's sized ok
1387 GroupsIterator group;
1388 for ( group = groups.begin(); group != groups.end(); ++group )
1389 if ( group->read_metadata(fp) == false ) return false;
1390 return true;
1391 }
1392
1393 // This code is identical to that for SparseGroup
1394 // If your keys and values are simple enough, we can write them
1395 // to disk for you. "simple enough" means no pointers.
1396 // However, we don't try to normalize endianness
1397 bool write_nopointer_data(FILE *fp) const {
1398 for ( const_nonempty_iterator it = nonempty_begin();
1399 it != nonempty_end(); ++it ) {
1400 if ( !fwrite(&*it, sizeof(*it), 1, fp) ) return false;
1401 }
1402 return true;
1403 }
1404
1405 // When reading, we have to override the potential const-ness of *it
1406 bool read_nopointer_data(FILE *fp) {
1407 for ( nonempty_iterator it = nonempty_begin();
1408 it != nonempty_end(); ++it ) {
1409 if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
1410 return false;
1411 }
1412 return true;
1413 }
1414
1415 // Comparisons. Note the comparisons are pretty arbitrary: we
1416 // compare values of the first index that isn't equal (using default
1417 // value for empty buckets).
1418 bool operator==(const sparsetable& x) const {
1419 return ( table_size == x.table_size &&
1420 num_buckets == x.num_buckets &&
1421 groups == x.groups );
1422 }
1423 bool operator<(const sparsetable& x) const { // also from algobase.h
1424 return STL_NAMESPACE::lexicographical_compare(begin(), end(),
1425 x.begin(), x.end());
1426 }
1427 bool operator!=(const sparsetable& x) const { return !(*this == x); }
1428 bool operator<=(const sparsetable& x) const { return !(x < *this); }
1429 bool operator>(const sparsetable& x) const { return x < *this; }
1430 bool operator>=(const sparsetable& x) const { return !(*this < x); }
1431
1432
1433 private:
1434 // The actual data
1435 vector< sparsegroup<value_type, GROUP_SIZE> > groups; // our list of groups
1436 size_type table_size; // how many buckets they want
1437 size_type num_buckets; // number of non-empty buckets
1438 };
1439
1440 // We need a global swap as well
1441 template <class T, u_int16_t GROUP_SIZE>
1442 inline void swap(sparsetable<T,GROUP_SIZE> &x, sparsetable<T,GROUP_SIZE> &y) {
1443 x.swap(y);
1444 }
1445
1446 #undef GET_
1447 #undef PUT_
1448
1449 _END_GOOGLE_NAMESPACE_
1450
1451 #endif

  ViewVC Help
Powered by ViewVC 1.1.22