source: branches/1.6/src/core2/Lookup.cpp @ 6131

Last change on this file since 6131 was 3177, checked in by mmc, 12 years ago

Updated all of the copyright notices to reference the transfer to
the new HUBzero Foundation, LLC.

File size: 13.2 KB
Line 
1/*
2 * ======================================================================
3 *  Rappture::Lookup<valType>
4 *  Rappture::Lookup2<keyType,valType>
5 *
6 *  AUTHOR:  Michael McLennan, Purdue University
7 *  Copyright (c) 2004-2012  HUBzero Foundation, LLC
8 * ----------------------------------------------------------------------
9 *  See the file "license.terms" for information on usage and
10 *  redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
11 * ======================================================================
12 *
13 *  This code is based on the Tcl_HashTable facility included in the
14 *  Tcl source release, which includes the following copyright:
15 *
16 *  Copyright (c) 1987-1994 The Regents of the University of California.
17 *  Copyright (c) 1993-1996 Lucent Technologies.
18 *  Copyright (c) 1994-1998 Sun Microsystems, Inc.
19 *  Copyright (c) 1998-2000 by Scriptics Corporation.
20 *
21 *  This software is copyrighted by the Regents of the University of
22 *  California, Sun Microsystems, Inc., Scriptics Corporation,
23 *  and other parties.  The following terms apply to all files associated
24 *  with the software unless explicitly disclaimed in individual files.
25 *
26 *  The authors hereby grant permission to use, copy, modify, distribute,
27 *  and license this software and its documentation for any purpose, provided
28 *  that existing copyright notices are retained in all copies and that this
29 *  notice is included verbatim in any distributions. No written agreement,
30 *  license, or royalty fee is required for any of the authorized uses.
31 *  Modifications to this software may be copyrighted by their authors
32 *  and need not follow the licensing terms described here, provided that
33 *  the new terms are clearly indicated on the first page of each file where
34 *  they apply.
35 *
36 *  IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
37 *  FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
38 *  ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
39 *  DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
40 *  POSSIBILITY OF SUCH DAMAGE.
41 *
42 *  THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
43 *  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
44 *  FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
45 *  IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
46 *  NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
47 *  MODIFICATIONS.
48 *
49 *  GOVERNMENT USE: If you are acquiring this software on behalf of the
50 *  U.S. government, the Government shall have only "Restricted Rights"
51 *  in the software and related documentation as defined in the Federal
52 *  Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
53 *  are acquiring the software on behalf of the Department of Defense, the
54 *  software shall be classified as "Commercial Computer Software" and the
55 *  Government shall have only "Restricted Rights" as defined in Clause
56 *  252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
57 *  authors grant the U.S. Government and others acting in its behalf
58 *  permission to use and distribute the software in accordance with the
59 *  terms specified in this license.
60 * ======================================================================
61 */
62#include <sstream>
63#include "Lookup.h"
64
65using namespace Rappture;
66
67/*
68 * When there are this many entries per bucket, on average, rebuild
69 * the hash table to make it larger.
70 */
71#define REBUILD_MULTIPLIER  3
72
73
74LookupCore::LookupCore(size_t keySize)
75{
76    // convert key size in bytes to size in words
77    _keySize = keySize/4 + ((keySize%4 > 0) ? 1 : 0);
78
79    _numEntries = 0;
80    _numBuckets = RP_DICT_MIN_SIZE;
81    _buckets = _staticBuckets;
82    for (int i=0; i < RP_DICT_MIN_SIZE; i++) {
83        _staticBuckets[i] = NULL;
84    }
85    _rebuildSize = RP_DICT_MIN_SIZE*REBUILD_MULTIPLIER;
86    _downShift = 28;
87    _mask = 3;
88}
89
90LookupCore::~LookupCore()
91{
92    // free up all entries in the lookup table
93    for (int i = 0; i < _numBuckets; i++) {
94        LookupEntryCore *entryPtr = _buckets[i];
95        while (entryPtr != NULL) {
96            LookupEntryCore *nextPtr = entryPtr->nextPtr;
97            delete entryPtr;
98            entryPtr = nextPtr;
99        }
100    }
101
102    // free up the bucket array, as long as it's not the built-in one
103    if (_buckets != _staticBuckets) {
104        delete [] _buckets;
105    }
106}
107
108/**
109 * Finds a lookup entry with the specified key.  Returns
110 * the entry, or NULL if there was no matching entry.
111 */
112LookupEntryCore*
113LookupCore::find(void* key)
114{
115    unsigned int index = _hashIndex(key);
116
117    //
118    // Search all of the entries in the appropriate bucket.
119    //
120    LookupEntryCore *hPtr;
121    if (_keySize == 0) {
122        for (hPtr=_buckets[index]; hPtr != NULL; hPtr=hPtr->nextPtr) {
123            char* p1 = (char*)key;
124            char* p2 = hPtr->key.string;
125            while (1) {
126                if (*p1 != *p2) {
127                    break;
128                }
129                if (*p1 == '\0') {
130                    return hPtr;
131                }
132                p1++, p2++;
133            }
134        }
135    } else {
136        for (hPtr=_buckets[index]; hPtr != NULL; hPtr=hPtr->nextPtr) {
137            int *iPtr1 = (int*)key;
138            int *iPtr2 = hPtr->key.words;
139            for (int count = _keySize; ; count--, iPtr1++, iPtr2++) {
140                if (count == 0) {
141                    return hPtr;
142                }
143                if (*iPtr1 != *iPtr2) {
144                    break;
145                }
146            }
147        }
148    }
149    return NULL;
150}
151
152/**
153 * Finds a lookup entry with the specified key.  Returns
154 * the entry, or NULL if there was no matching entry.
155 */
156LookupEntryCore*
157LookupCore::get(void* key, int* newEntryPtr)
158{
159    if (newEntryPtr) {
160        *newEntryPtr = 0;
161    }
162
163    unsigned int index = _hashIndex(key);
164
165    //
166    // Search all of the entries in the appropriate bucket.
167    //
168    LookupEntryCore *hPtr;
169    if (_keySize == 0) {
170        for (hPtr=_buckets[index]; hPtr != NULL; hPtr=hPtr->nextPtr) {
171            char* p1 = (char*)key;
172            char* p2 = hPtr->key.string;
173            while (1) {
174                if (*p1 != *p2) {
175                    break;
176                }
177                if (*p1 == '\0') {
178                    return hPtr;
179                }
180                p1++, p2++;
181            }
182        }
183    } else {
184        for (hPtr=_buckets[index]; hPtr != NULL; hPtr=hPtr->nextPtr) {
185            int *iPtr1 = (int*)key;
186            int *iPtr2 = hPtr->key.words;
187            for (int count = _keySize; ; count--, iPtr1++, iPtr2++) {
188                if (count == 0) {
189                    return hPtr;
190                }
191                if (*iPtr1 != *iPtr2) {
192                    break;
193                }
194            }
195        }
196    }
197
198    //
199    // Entry not found.  Add a new one to the bucket.
200    //
201    if (newEntryPtr) {
202        *newEntryPtr = 1;
203    }
204    if (_keySize == 0) {
205        char* mem = new char[sizeof(LookupEntryCore)
206            + strlen((char*)key) - sizeof(hPtr->key) + 1];
207        hPtr = new (mem) LookupEntryCore;
208    } else {
209        char* mem = new char[sizeof(LookupEntryCore) + _keySize-1];
210        hPtr = new (mem) LookupEntryCore;
211    }
212    hPtr->dictPtr = this;
213    hPtr->bucketPtr = &(_buckets[index]);
214    hPtr->nextPtr = *hPtr->bucketPtr;
215    hPtr->valuePtr = NULL;
216    *hPtr->bucketPtr = hPtr;
217    _numEntries++;
218
219    if (_keySize == 0) {
220        strcpy(hPtr->key.string, (char*)key);
221    } else {
222        memcpy(hPtr->key.words, (int*)key, _keySize*4);
223    }
224
225    //
226    // If the table has exceeded a decent size, rebuild it with many
227    // more buckets.
228    //
229    if (_numEntries >= _rebuildSize) {
230        _rebuildBuckets();
231    }
232    return hPtr;
233}
234
235/**
236 * Removes an entry from the lookup table.  The data associated
237 * with the entry is freed at the Lookup level before calling
238 * this core method to clean up the slot.
239 */
240LookupEntryCore*
241LookupCore::erase(LookupEntryCore* entryPtr)
242{
243    LookupEntryCore *prevPtr;
244
245    if (*entryPtr->bucketPtr == entryPtr) {
246        *entryPtr->bucketPtr = entryPtr->nextPtr;
247    } else {
248        for (prevPtr = *entryPtr->bucketPtr; ; prevPtr = prevPtr->nextPtr) {
249            assert(prevPtr != NULL);
250            if (prevPtr->nextPtr == entryPtr) {
251                prevPtr->nextPtr = entryPtr->nextPtr;
252                break;
253            }
254        }
255    }
256    entryPtr->dictPtr->_numEntries--;
257    LookupEntryCore *nextPtr = entryPtr->nextPtr;
258    delete entryPtr;
259
260    return nextPtr;
261}
262
263/**
264 * Returns the next entry after the given entry.  If then entry
265 * is NULL and bucketNum is 0, it returns the very first entry.
266 * When there are no more entries, it returns NULL.  The bucketNum
267 * helps keep track of the next bucket in the table.
268 */
269LookupEntryCore*
270LookupCore::next(LookupEntryCore *entryPtr, int& bucketNum)
271{
272    if (entryPtr) {
273        entryPtr = entryPtr->nextPtr;
274    }
275
276    // hit the end of a bucket, or just starting? then go to next bucket
277    while (entryPtr == NULL) {
278        if (bucketNum >= _numBuckets) {
279            return NULL;
280        }
281        entryPtr = _buckets[bucketNum];
282        bucketNum++;
283    }
284    return entryPtr;
285}
286
287/**
288 * Returns a description of all entries in the lookup table.
289 * Useful for debugging.
290 */
291std::string
292LookupCore::stats()
293{
294#define NUM_COUNTERS 10
295    int i, count[NUM_COUNTERS], overflow;
296    double average, tmp;
297
298    //
299    // Compute a histogram of bucket usage.
300    //
301    for (i=0; i < NUM_COUNTERS; i++) {
302        count[i] = 0;
303    }
304    overflow = 0;
305    average = 0.0;
306    for (i=0; i < _numBuckets; i++) {
307        int j=0;
308        for (LookupEntryCore *entryPtr=_buckets[i];
309             entryPtr != NULL;
310             entryPtr = entryPtr->nextPtr) {
311            j++;
312        }
313        if (j < NUM_COUNTERS) {
314            count[j]++;
315        } else {
316            overflow++;
317        }
318        tmp = j;
319        average += 0.5*(tmp+1.0)*(tmp/_numEntries);
320    }
321
322    //
323    // Send back the histogram and other stats.
324    //
325    std::ostringstream result;
326    result << _numEntries << " entries in lookup table, ";
327    result << _numBuckets << " buckets" << std::endl;
328
329    for (i=0; i < NUM_COUNTERS; i++) {
330        result << "number of buckets with " << i
331               << " entries: " << count[i] << std::endl;
332    }
333    result << "number of buckets with " << NUM_COUNTERS
334               << " or more entries: " << overflow << std::endl;
335    result << "average search distance for entry: " << average << std::endl;
336    return result.str();
337}
338
339/**
340 * Invoked when the ratio of entries to hash buckets becomes too
341 * large.  It creates a new layout with a larger bucket array
342 * and moves all of the entries into the new table.
343 */
344void
345LookupCore::_rebuildBuckets()
346{
347    int oldSize = _numBuckets;
348    LookupEntryCore** oldBuckets = _buckets;
349
350    //
351    // Allocate and initialize the new bucket array, and set up
352    // hashing constants for new array size.
353    //
354    _numBuckets *= 4;
355    _buckets = new (LookupEntryCore*)[_numBuckets];
356    LookupEntryCore **newChainPtr = _buckets;
357    for (int count=_numBuckets; count > 0; count--, newChainPtr++) {
358        *newChainPtr = NULL;
359    }
360    _rebuildSize *= 4;
361    _downShift -= 2;
362    _mask = (_mask << 2) + 3;
363
364    //
365    // Rehash all of the existing entries into the new bucket array.
366    //
367    for (LookupEntryCore** oldChainPtr=oldBuckets;
368          oldSize > 0; oldSize--, oldChainPtr++) {
369
370        for (LookupEntryCore* hPtr=*oldChainPtr;
371              hPtr != NULL; hPtr=*oldChainPtr) {
372
373            *oldChainPtr = hPtr->nextPtr;
374            unsigned int index = _hashIndex((void*)hPtr->key.string);
375            hPtr->bucketPtr = &(_buckets[index]);
376            hPtr->nextPtr = *hPtr->bucketPtr;
377            *hPtr->bucketPtr = hPtr;
378        }
379    }
380
381    //
382    // Free up the old bucket array, if it was dynamically allocated.
383    //
384    if (oldBuckets != _staticBuckets) {
385        delete [] oldBuckets;
386    }
387}
388
389/**
390 * Computes the hash value for the specified key.
391 */
392unsigned int
393LookupCore::_hashIndex(void* key)
394{
395    unsigned int result = 0;
396    if (_keySize == 0) {
397        char* string = (char*)key;
398        /*
399         * I tried a zillion different hash functions and asked many other
400         * people for advice.  Many people had their own favorite functions,
401         * all different, but no-one had much idea why they were good ones.
402         * I chose the one below (multiply by 9 and add new character)
403         * because of the following reasons:
404         *
405         * 1. Multiplying by 10 is perfect for keys that are decimal strings,
406         *    and multiplying by 9 is just about as good.
407         * 2. Times-9 is (shift-left-3) plus (old).  This means that each
408         *    character's bits hang around in the low-order bits of the
409         *    hash value for ever, plus they spread fairly rapidly up to
410         *    the high-order bits to fill out the hash value.  This seems
411         *    works well both for decimal and non-decimal strings.
412         */
413        while (1) {
414            int c = *string;
415            string++;
416            if (c == 0) {
417                break;
418            }
419            result += (result<<3) + c;
420        }
421        result &= _mask;
422    } else {
423        int *iPtr = (int*)key;
424        for (int count=_keySize; count > 0; count--, iPtr++) {
425            result += *iPtr;
426        }
427        result = (((((long)(result))*1103515245) >> _downShift) & _mask);
428    }
429    return result;
430}
Note: See TracBrowser for help on using the repository browser.