1 | /* |
---|
2 | * ====================================================================== |
---|
3 | * Rappture::Lookup<valType> |
---|
4 | * Rappture::Lookup2<keyType,valType> |
---|
5 | * |
---|
6 | * AUTHOR: Michael McLennan, Purdue University |
---|
7 | * Copyright (c) 2004-2012 HUBzero Foundation, LLC |
---|
8 | * ---------------------------------------------------------------------- |
---|
9 | * See the file "license.terms" for information on usage and |
---|
10 | * redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. |
---|
11 | * ====================================================================== |
---|
12 | * |
---|
13 | * This code is based on the Tcl_HashTable facility included in the |
---|
14 | * Tcl source release, which includes the following copyright: |
---|
15 | * |
---|
16 | * Copyright (c) 1987-1994 The Regents of the University of California. |
---|
17 | * Copyright (c) 1993-1996 Lucent Technologies. |
---|
18 | * Copyright (c) 1994-1998 Sun Microsystems, Inc. |
---|
19 | * Copyright (c) 1998-2000 by Scriptics Corporation. |
---|
20 | * |
---|
21 | * This software is copyrighted by the Regents of the University of |
---|
22 | * California, Sun Microsystems, Inc., Scriptics Corporation, |
---|
23 | * and other parties. The following terms apply to all files associated |
---|
24 | * with the software unless explicitly disclaimed in individual files. |
---|
25 | * |
---|
26 | * The authors hereby grant permission to use, copy, modify, distribute, |
---|
27 | * and license this software and its documentation for any purpose, provided |
---|
28 | * that existing copyright notices are retained in all copies and that this |
---|
29 | * notice is included verbatim in any distributions. No written agreement, |
---|
30 | * license, or royalty fee is required for any of the authorized uses. |
---|
31 | * Modifications to this software may be copyrighted by their authors |
---|
32 | * and need not follow the licensing terms described here, provided that |
---|
33 | * the new terms are clearly indicated on the first page of each file where |
---|
34 | * they apply. |
---|
35 | * |
---|
36 | * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY |
---|
37 | * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
---|
38 | * ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY |
---|
39 | * DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE |
---|
40 | * POSSIBILITY OF SUCH DAMAGE. |
---|
41 | * |
---|
42 | * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, |
---|
43 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, |
---|
44 | * FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE |
---|
45 | * IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE |
---|
46 | * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR |
---|
47 | * MODIFICATIONS. |
---|
48 | * |
---|
49 | * GOVERNMENT USE: If you are acquiring this software on behalf of the |
---|
50 | * U.S. government, the Government shall have only "Restricted Rights" |
---|
51 | * in the software and related documentation as defined in the Federal |
---|
52 | * Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you |
---|
53 | * are acquiring the software on behalf of the Department of Defense, the |
---|
54 | * software shall be classified as "Commercial Computer Software" and the |
---|
55 | * Government shall have only "Restricted Rights" as defined in Clause |
---|
56 | * 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the |
---|
57 | * authors grant the U.S. Government and others acting in its behalf |
---|
58 | * permission to use and distribute the software in accordance with the |
---|
59 | * terms specified in this license. |
---|
60 | * ====================================================================== |
---|
61 | */ |
---|
62 | #include <sstream> |
---|
63 | #include "Lookup.h" |
---|
64 | |
---|
65 | using namespace Rappture; |
---|
66 | |
---|
67 | /* |
---|
68 | * When there are this many entries per bucket, on average, rebuild |
---|
69 | * the hash table to make it larger. |
---|
70 | */ |
---|
71 | #define REBUILD_MULTIPLIER 3 |
---|
72 | |
---|
73 | |
---|
74 | LookupCore::LookupCore(size_t keySize) |
---|
75 | { |
---|
76 | // convert key size in bytes to size in words |
---|
77 | _keySize = keySize/4 + ((keySize%4 > 0) ? 1 : 0); |
---|
78 | |
---|
79 | _numEntries = 0; |
---|
80 | _numBuckets = RP_DICT_MIN_SIZE; |
---|
81 | _buckets = _staticBuckets; |
---|
82 | for (int i=0; i < RP_DICT_MIN_SIZE; i++) { |
---|
83 | _staticBuckets[i] = NULL; |
---|
84 | } |
---|
85 | _rebuildSize = RP_DICT_MIN_SIZE*REBUILD_MULTIPLIER; |
---|
86 | _downShift = 28; |
---|
87 | _mask = 3; |
---|
88 | } |
---|
89 | |
---|
90 | LookupCore::~LookupCore() |
---|
91 | { |
---|
92 | // free up all entries in the lookup table |
---|
93 | for (int i = 0; i < _numBuckets; i++) { |
---|
94 | LookupEntryCore *entryPtr = _buckets[i]; |
---|
95 | while (entryPtr != NULL) { |
---|
96 | LookupEntryCore *nextPtr = entryPtr->nextPtr; |
---|
97 | delete entryPtr; |
---|
98 | entryPtr = nextPtr; |
---|
99 | } |
---|
100 | } |
---|
101 | |
---|
102 | // free up the bucket array, as long as it's not the built-in one |
---|
103 | if (_buckets != _staticBuckets) { |
---|
104 | delete [] _buckets; |
---|
105 | } |
---|
106 | } |
---|
107 | |
---|
108 | /** |
---|
109 | * Finds a lookup entry with the specified key. Returns |
---|
110 | * the entry, or NULL if there was no matching entry. |
---|
111 | */ |
---|
112 | LookupEntryCore* |
---|
113 | LookupCore::find(void* key) |
---|
114 | { |
---|
115 | unsigned int index = _hashIndex(key); |
---|
116 | |
---|
117 | // |
---|
118 | // Search all of the entries in the appropriate bucket. |
---|
119 | // |
---|
120 | LookupEntryCore *hPtr; |
---|
121 | if (_keySize == 0) { |
---|
122 | for (hPtr=_buckets[index]; hPtr != NULL; hPtr=hPtr->nextPtr) { |
---|
123 | char* p1 = (char*)key; |
---|
124 | char* p2 = hPtr->key.string; |
---|
125 | while (1) { |
---|
126 | if (*p1 != *p2) { |
---|
127 | break; |
---|
128 | } |
---|
129 | if (*p1 == '\0') { |
---|
130 | return hPtr; |
---|
131 | } |
---|
132 | p1++, p2++; |
---|
133 | } |
---|
134 | } |
---|
135 | } else { |
---|
136 | for (hPtr=_buckets[index]; hPtr != NULL; hPtr=hPtr->nextPtr) { |
---|
137 | int *iPtr1 = (int*)key; |
---|
138 | int *iPtr2 = hPtr->key.words; |
---|
139 | for (int count = _keySize; ; count--, iPtr1++, iPtr2++) { |
---|
140 | if (count == 0) { |
---|
141 | return hPtr; |
---|
142 | } |
---|
143 | if (*iPtr1 != *iPtr2) { |
---|
144 | break; |
---|
145 | } |
---|
146 | } |
---|
147 | } |
---|
148 | } |
---|
149 | return NULL; |
---|
150 | } |
---|
151 | |
---|
152 | /** |
---|
153 | * Finds a lookup entry with the specified key. Returns |
---|
154 | * the entry, or NULL if there was no matching entry. |
---|
155 | */ |
---|
156 | LookupEntryCore* |
---|
157 | LookupCore::get(void* key, int* newEntryPtr) |
---|
158 | { |
---|
159 | if (newEntryPtr) { |
---|
160 | *newEntryPtr = 0; |
---|
161 | } |
---|
162 | |
---|
163 | unsigned int index = _hashIndex(key); |
---|
164 | |
---|
165 | // |
---|
166 | // Search all of the entries in the appropriate bucket. |
---|
167 | // |
---|
168 | LookupEntryCore *hPtr; |
---|
169 | if (_keySize == 0) { |
---|
170 | for (hPtr=_buckets[index]; hPtr != NULL; hPtr=hPtr->nextPtr) { |
---|
171 | char* p1 = (char*)key; |
---|
172 | char* p2 = hPtr->key.string; |
---|
173 | while (1) { |
---|
174 | if (*p1 != *p2) { |
---|
175 | break; |
---|
176 | } |
---|
177 | if (*p1 == '\0') { |
---|
178 | return hPtr; |
---|
179 | } |
---|
180 | p1++, p2++; |
---|
181 | } |
---|
182 | } |
---|
183 | } else { |
---|
184 | for (hPtr=_buckets[index]; hPtr != NULL; hPtr=hPtr->nextPtr) { |
---|
185 | int *iPtr1 = (int*)key; |
---|
186 | int *iPtr2 = hPtr->key.words; |
---|
187 | for (int count = _keySize; ; count--, iPtr1++, iPtr2++) { |
---|
188 | if (count == 0) { |
---|
189 | return hPtr; |
---|
190 | } |
---|
191 | if (*iPtr1 != *iPtr2) { |
---|
192 | break; |
---|
193 | } |
---|
194 | } |
---|
195 | } |
---|
196 | } |
---|
197 | |
---|
198 | // |
---|
199 | // Entry not found. Add a new one to the bucket. |
---|
200 | // |
---|
201 | if (newEntryPtr) { |
---|
202 | *newEntryPtr = 1; |
---|
203 | } |
---|
204 | if (_keySize == 0) { |
---|
205 | char* mem = new char[sizeof(LookupEntryCore) |
---|
206 | + strlen((char*)key) - sizeof(hPtr->key) + 1]; |
---|
207 | hPtr = new (mem) LookupEntryCore; |
---|
208 | } else { |
---|
209 | char* mem = new char[sizeof(LookupEntryCore) + _keySize-1]; |
---|
210 | hPtr = new (mem) LookupEntryCore; |
---|
211 | } |
---|
212 | hPtr->dictPtr = this; |
---|
213 | hPtr->bucketPtr = &(_buckets[index]); |
---|
214 | hPtr->nextPtr = *hPtr->bucketPtr; |
---|
215 | hPtr->valuePtr = NULL; |
---|
216 | *hPtr->bucketPtr = hPtr; |
---|
217 | _numEntries++; |
---|
218 | |
---|
219 | if (_keySize == 0) { |
---|
220 | strcpy(hPtr->key.string, (char*)key); |
---|
221 | } else { |
---|
222 | memcpy(hPtr->key.words, (int*)key, _keySize*4); |
---|
223 | } |
---|
224 | |
---|
225 | // |
---|
226 | // If the table has exceeded a decent size, rebuild it with many |
---|
227 | // more buckets. |
---|
228 | // |
---|
229 | if (_numEntries >= _rebuildSize) { |
---|
230 | _rebuildBuckets(); |
---|
231 | } |
---|
232 | return hPtr; |
---|
233 | } |
---|
234 | |
---|
235 | /** |
---|
236 | * Removes an entry from the lookup table. The data associated |
---|
237 | * with the entry is freed at the Lookup level before calling |
---|
238 | * this core method to clean up the slot. |
---|
239 | */ |
---|
240 | LookupEntryCore* |
---|
241 | LookupCore::erase(LookupEntryCore* entryPtr) |
---|
242 | { |
---|
243 | LookupEntryCore *prevPtr; |
---|
244 | |
---|
245 | if (*entryPtr->bucketPtr == entryPtr) { |
---|
246 | *entryPtr->bucketPtr = entryPtr->nextPtr; |
---|
247 | } else { |
---|
248 | for (prevPtr = *entryPtr->bucketPtr; ; prevPtr = prevPtr->nextPtr) { |
---|
249 | assert(prevPtr != NULL); |
---|
250 | if (prevPtr->nextPtr == entryPtr) { |
---|
251 | prevPtr->nextPtr = entryPtr->nextPtr; |
---|
252 | break; |
---|
253 | } |
---|
254 | } |
---|
255 | } |
---|
256 | entryPtr->dictPtr->_numEntries--; |
---|
257 | LookupEntryCore *nextPtr = entryPtr->nextPtr; |
---|
258 | delete entryPtr; |
---|
259 | |
---|
260 | return nextPtr; |
---|
261 | } |
---|
262 | |
---|
263 | /** |
---|
264 | * Returns the next entry after the given entry. If then entry |
---|
265 | * is NULL and bucketNum is 0, it returns the very first entry. |
---|
266 | * When there are no more entries, it returns NULL. The bucketNum |
---|
267 | * helps keep track of the next bucket in the table. |
---|
268 | */ |
---|
269 | LookupEntryCore* |
---|
270 | LookupCore::next(LookupEntryCore *entryPtr, int& bucketNum) |
---|
271 | { |
---|
272 | if (entryPtr) { |
---|
273 | entryPtr = entryPtr->nextPtr; |
---|
274 | } |
---|
275 | |
---|
276 | // hit the end of a bucket, or just starting? then go to next bucket |
---|
277 | while (entryPtr == NULL) { |
---|
278 | if (bucketNum >= _numBuckets) { |
---|
279 | return NULL; |
---|
280 | } |
---|
281 | entryPtr = _buckets[bucketNum]; |
---|
282 | bucketNum++; |
---|
283 | } |
---|
284 | return entryPtr; |
---|
285 | } |
---|
286 | |
---|
287 | /** |
---|
288 | * Returns a description of all entries in the lookup table. |
---|
289 | * Useful for debugging. |
---|
290 | */ |
---|
291 | std::string |
---|
292 | LookupCore::stats() |
---|
293 | { |
---|
294 | #define NUM_COUNTERS 10 |
---|
295 | int i, count[NUM_COUNTERS], overflow; |
---|
296 | double average, tmp; |
---|
297 | |
---|
298 | // |
---|
299 | // Compute a histogram of bucket usage. |
---|
300 | // |
---|
301 | for (i=0; i < NUM_COUNTERS; i++) { |
---|
302 | count[i] = 0; |
---|
303 | } |
---|
304 | overflow = 0; |
---|
305 | average = 0.0; |
---|
306 | for (i=0; i < _numBuckets; i++) { |
---|
307 | int j=0; |
---|
308 | for (LookupEntryCore *entryPtr=_buckets[i]; |
---|
309 | entryPtr != NULL; |
---|
310 | entryPtr = entryPtr->nextPtr) { |
---|
311 | j++; |
---|
312 | } |
---|
313 | if (j < NUM_COUNTERS) { |
---|
314 | count[j]++; |
---|
315 | } else { |
---|
316 | overflow++; |
---|
317 | } |
---|
318 | tmp = j; |
---|
319 | average += 0.5*(tmp+1.0)*(tmp/_numEntries); |
---|
320 | } |
---|
321 | |
---|
322 | // |
---|
323 | // Send back the histogram and other stats. |
---|
324 | // |
---|
325 | std::ostringstream result; |
---|
326 | result << _numEntries << " entries in lookup table, "; |
---|
327 | result << _numBuckets << " buckets" << std::endl; |
---|
328 | |
---|
329 | for (i=0; i < NUM_COUNTERS; i++) { |
---|
330 | result << "number of buckets with " << i |
---|
331 | << " entries: " << count[i] << std::endl; |
---|
332 | } |
---|
333 | result << "number of buckets with " << NUM_COUNTERS |
---|
334 | << " or more entries: " << overflow << std::endl; |
---|
335 | result << "average search distance for entry: " << average << std::endl; |
---|
336 | return result.str(); |
---|
337 | } |
---|
338 | |
---|
339 | /** |
---|
340 | * Invoked when the ratio of entries to hash buckets becomes too |
---|
341 | * large. It creates a new layout with a larger bucket array |
---|
342 | * and moves all of the entries into the new table. |
---|
343 | */ |
---|
344 | void |
---|
345 | LookupCore::_rebuildBuckets() |
---|
346 | { |
---|
347 | int oldSize = _numBuckets; |
---|
348 | LookupEntryCore** oldBuckets = _buckets; |
---|
349 | |
---|
350 | // |
---|
351 | // Allocate and initialize the new bucket array, and set up |
---|
352 | // hashing constants for new array size. |
---|
353 | // |
---|
354 | _numBuckets *= 4; |
---|
355 | _buckets = new (LookupEntryCore*)[_numBuckets]; |
---|
356 | LookupEntryCore **newChainPtr = _buckets; |
---|
357 | for (int count=_numBuckets; count > 0; count--, newChainPtr++) { |
---|
358 | *newChainPtr = NULL; |
---|
359 | } |
---|
360 | _rebuildSize *= 4; |
---|
361 | _downShift -= 2; |
---|
362 | _mask = (_mask << 2) + 3; |
---|
363 | |
---|
364 | // |
---|
365 | // Rehash all of the existing entries into the new bucket array. |
---|
366 | // |
---|
367 | for (LookupEntryCore** oldChainPtr=oldBuckets; |
---|
368 | oldSize > 0; oldSize--, oldChainPtr++) { |
---|
369 | |
---|
370 | for (LookupEntryCore* hPtr=*oldChainPtr; |
---|
371 | hPtr != NULL; hPtr=*oldChainPtr) { |
---|
372 | |
---|
373 | *oldChainPtr = hPtr->nextPtr; |
---|
374 | unsigned int index = _hashIndex((void*)hPtr->key.string); |
---|
375 | hPtr->bucketPtr = &(_buckets[index]); |
---|
376 | hPtr->nextPtr = *hPtr->bucketPtr; |
---|
377 | *hPtr->bucketPtr = hPtr; |
---|
378 | } |
---|
379 | } |
---|
380 | |
---|
381 | // |
---|
382 | // Free up the old bucket array, if it was dynamically allocated. |
---|
383 | // |
---|
384 | if (oldBuckets != _staticBuckets) { |
---|
385 | delete [] oldBuckets; |
---|
386 | } |
---|
387 | } |
---|
388 | |
---|
389 | /** |
---|
390 | * Computes the hash value for the specified key. |
---|
391 | */ |
---|
392 | unsigned int |
---|
393 | LookupCore::_hashIndex(void* key) |
---|
394 | { |
---|
395 | unsigned int result = 0; |
---|
396 | if (_keySize == 0) { |
---|
397 | char* string = (char*)key; |
---|
398 | /* |
---|
399 | * I tried a zillion different hash functions and asked many other |
---|
400 | * people for advice. Many people had their own favorite functions, |
---|
401 | * all different, but no-one had much idea why they were good ones. |
---|
402 | * I chose the one below (multiply by 9 and add new character) |
---|
403 | * because of the following reasons: |
---|
404 | * |
---|
405 | * 1. Multiplying by 10 is perfect for keys that are decimal strings, |
---|
406 | * and multiplying by 9 is just about as good. |
---|
407 | * 2. Times-9 is (shift-left-3) plus (old). This means that each |
---|
408 | * character's bits hang around in the low-order bits of the |
---|
409 | * hash value for ever, plus they spread fairly rapidly up to |
---|
410 | * the high-order bits to fill out the hash value. This seems |
---|
411 | * works well both for decimal and non-decimal strings. |
---|
412 | */ |
---|
413 | while (1) { |
---|
414 | int c = *string; |
---|
415 | string++; |
---|
416 | if (c == 0) { |
---|
417 | break; |
---|
418 | } |
---|
419 | result += (result<<3) + c; |
---|
420 | } |
---|
421 | result &= _mask; |
---|
422 | } else { |
---|
423 | int *iPtr = (int*)key; |
---|
424 | for (int count=_keySize; count > 0; count--, iPtr++) { |
---|
425 | result += *iPtr; |
---|
426 | } |
---|
427 | result = (((((long)(result))*1103515245) >> _downShift) & _mask); |
---|
428 | } |
---|
429 | return result; |
---|
430 | } |
---|