Context Navigation

Lookup.cpp @ 6036

Last change on this file since 6036 was 3177, checked in by mmc, 12 years ago
Updated all of the copyright notices to reference the transfer to the new HUBzero Foundation, LLC.
File size: 13.2 KB

Line
1	/*
2	* ======================================================================
3	* Rappture::Lookup<valType>
4	* Rappture::Lookup2<keyType,valType>
5	*
6	* AUTHOR: Michael McLennan, Purdue University
7	* Copyright (c) 2004-2012 HUBzero Foundation, LLC
8	* ----------------------------------------------------------------------
9	* See the file "license.terms" for information on usage and
10	* redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
11	* ======================================================================
12	*
13	* This code is based on the Tcl_HashTable facility included in the
14	* Tcl source release, which includes the following copyright:
15	*
16	* Copyright (c) 1987-1994 The Regents of the University of California.
17	* Copyright (c) 1993-1996 Lucent Technologies.
18	* Copyright (c) 1994-1998 Sun Microsystems, Inc.
19	* Copyright (c) 1998-2000 by Scriptics Corporation.
20	*
21	* This software is copyrighted by the Regents of the University of
22	* California, Sun Microsystems, Inc., Scriptics Corporation,
23	* and other parties. The following terms apply to all files associated
24	* with the software unless explicitly disclaimed in individual files.
25	*
26	* The authors hereby grant permission to use, copy, modify, distribute,
27	* and license this software and its documentation for any purpose, provided
28	* that existing copyright notices are retained in all copies and that this
29	* notice is included verbatim in any distributions. No written agreement,
30	* license, or royalty fee is required for any of the authorized uses.
31	* Modifications to this software may be copyrighted by their authors
32	* and need not follow the licensing terms described here, provided that
33	* the new terms are clearly indicated on the first page of each file where
34	* they apply.
35	*
36	* IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
37	* FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
38	* ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
39	* DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
40	* POSSIBILITY OF SUCH DAMAGE.
41	*
42	* THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
43	* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
44	* FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
45	* IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
46	* NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
47	* MODIFICATIONS.
48	*
49	* GOVERNMENT USE: If you are acquiring this software on behalf of the
50	* U.S. government, the Government shall have only "Restricted Rights"
51	* in the software and related documentation as defined in the Federal
52	* Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
53	* are acquiring the software on behalf of the Department of Defense, the
54	* software shall be classified as "Commercial Computer Software" and the
55	* Government shall have only "Restricted Rights" as defined in Clause
56	* 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
57	* authors grant the U.S. Government and others acting in its behalf
58	* permission to use and distribute the software in accordance with the
59	* terms specified in this license.
60	* ======================================================================
61	*/
62	#include <sstream>
63	#include "Lookup.h"
64
65	using namespace Rappture;
66
67	/*
68	* When there are this many entries per bucket, on average, rebuild
69	* the hash table to make it larger.
70	*/
71	#define REBUILD_MULTIPLIER 3
72
73
74	LookupCore::LookupCore(size_t keySize)
75	{
76	// convert key size in bytes to size in words
77	_keySize = keySize/4 + ((keySize%4 > 0) ? 1 : 0);
78
79	_numEntries = 0;
80	_numBuckets = RP_DICT_MIN_SIZE;
81	_buckets = _staticBuckets;
82	for (int i=0; i < RP_DICT_MIN_SIZE; i++) {
83	_staticBuckets[i] = NULL;
84	}
85	_rebuildSize = RP_DICT_MIN_SIZE*REBUILD_MULTIPLIER;
86	_downShift = 28;
87	_mask = 3;
88	}
89
90	LookupCore::~LookupCore()
91	{
92	// free up all entries in the lookup table
93	for (int i = 0; i < _numBuckets; i++) {
94	LookupEntryCore *entryPtr = _buckets[i];
95	while (entryPtr != NULL) {
96	LookupEntryCore *nextPtr = entryPtr->nextPtr;
97	delete entryPtr;
98	entryPtr = nextPtr;
99	}
100	}
101
102	// free up the bucket array, as long as it's not the built-in one
103	if (_buckets != _staticBuckets) {
104	delete [] _buckets;
105	}
106	}
107
108	/**
109	* Finds a lookup entry with the specified key. Returns
110	* the entry, or NULL if there was no matching entry.
111	*/
112	LookupEntryCore*
113	LookupCore::find(void* key)
114	{
115	unsigned int index = _hashIndex(key);
116
117	//
118	// Search all of the entries in the appropriate bucket.
119	//
120	LookupEntryCore *hPtr;
121	if (_keySize == 0) {
122	for (hPtr=_buckets[index]; hPtr != NULL; hPtr=hPtr->nextPtr) {
123	char* p1 = (char*)key;
124	char* p2 = hPtr->key.string;
125	while (1) {
126	if (p1 != p2) {
127	break;
128	}
129	if (*p1 == '\0') {
130	return hPtr;
131	}
132	p1++, p2++;
133	}
134	}
135	} else {
136	for (hPtr=_buckets[index]; hPtr != NULL; hPtr=hPtr->nextPtr) {
137	int iPtr1 = (int)key;
138	int *iPtr2 = hPtr->key.words;
139	for (int count = _keySize; ; count--, iPtr1++, iPtr2++) {
140	if (count == 0) {
141	return hPtr;
142	}
143	if (iPtr1 != iPtr2) {
144	break;
145	}
146	}
147	}
148	}
149	return NULL;
150	}
151
152	/**
153	* Finds a lookup entry with the specified key. Returns
154	* the entry, or NULL if there was no matching entry.
155	*/
156	LookupEntryCore*
157	LookupCore::get(void* key, int* newEntryPtr)
158	{
159	if (newEntryPtr) {
160	*newEntryPtr = 0;
161	}
162
163	unsigned int index = _hashIndex(key);
164
165	//
166	// Search all of the entries in the appropriate bucket.
167	//
168	LookupEntryCore *hPtr;
169	if (_keySize == 0) {
170	for (hPtr=_buckets[index]; hPtr != NULL; hPtr=hPtr->nextPtr) {
171	char* p1 = (char*)key;
172	char* p2 = hPtr->key.string;
173	while (1) {
174	if (p1 != p2) {
175	break;
176	}
177	if (*p1 == '\0') {
178	return hPtr;
179	}
180	p1++, p2++;
181	}
182	}
183	} else {
184	for (hPtr=_buckets[index]; hPtr != NULL; hPtr=hPtr->nextPtr) {
185	int iPtr1 = (int)key;
186	int *iPtr2 = hPtr->key.words;
187	for (int count = _keySize; ; count--, iPtr1++, iPtr2++) {
188	if (count == 0) {
189	return hPtr;
190	}
191	if (iPtr1 != iPtr2) {
192	break;
193	}
194	}
195	}
196	}
197
198	//
199	// Entry not found. Add a new one to the bucket.
200	//
201	if (newEntryPtr) {
202	*newEntryPtr = 1;
203	}
204	if (_keySize == 0) {
205	char* mem = new char[sizeof(LookupEntryCore)
206	+ strlen((char*)key) - sizeof(hPtr->key) + 1];
207	hPtr = new (mem) LookupEntryCore;
208	} else {
209	char* mem = new char[sizeof(LookupEntryCore) + _keySize-1];
210	hPtr = new (mem) LookupEntryCore;
211	}
212	hPtr->dictPtr = this;
213	hPtr->bucketPtr = &(_buckets[index]);
214	hPtr->nextPtr = *hPtr->bucketPtr;
215	hPtr->valuePtr = NULL;
216	*hPtr->bucketPtr = hPtr;
217	_numEntries++;
218
219	if (_keySize == 0) {
220	strcpy(hPtr->key.string, (char*)key);
221	} else {
222	memcpy(hPtr->key.words, (int)key, _keySize4);
223	}
224
225	//
226	// If the table has exceeded a decent size, rebuild it with many
227	// more buckets.
228	//
229	if (_numEntries >= _rebuildSize) {
230	_rebuildBuckets();
231	}
232	return hPtr;
233	}
234
235	/**
236	* Removes an entry from the lookup table. The data associated
237	* with the entry is freed at the Lookup level before calling
238	* this core method to clean up the slot.
239	*/
240	LookupEntryCore*
241	LookupCore::erase(LookupEntryCore* entryPtr)
242	{
243	LookupEntryCore *prevPtr;
244
245	if (*entryPtr->bucketPtr == entryPtr) {
246	*entryPtr->bucketPtr = entryPtr->nextPtr;
247	} else {
248	for (prevPtr = *entryPtr->bucketPtr; ; prevPtr = prevPtr->nextPtr) {
249	assert(prevPtr != NULL);
250	if (prevPtr->nextPtr == entryPtr) {
251	prevPtr->nextPtr = entryPtr->nextPtr;
252	break;
253	}
254	}
255	}
256	entryPtr->dictPtr->_numEntries--;
257	LookupEntryCore *nextPtr = entryPtr->nextPtr;
258	delete entryPtr;
259
260	return nextPtr;
261	}
262
263	/**
264	* Returns the next entry after the given entry. If then entry
265	* is NULL and bucketNum is 0, it returns the very first entry.
266	* When there are no more entries, it returns NULL. The bucketNum
267	* helps keep track of the next bucket in the table.
268	*/
269	LookupEntryCore*
270	LookupCore::next(LookupEntryCore *entryPtr, int& bucketNum)
271	{
272	if (entryPtr) {
273	entryPtr = entryPtr->nextPtr;
274	}
275
276	// hit the end of a bucket, or just starting? then go to next bucket
277	while (entryPtr == NULL) {
278	if (bucketNum >= _numBuckets) {
279	return NULL;
280	}
281	entryPtr = _buckets[bucketNum];
282	bucketNum++;
283	}
284	return entryPtr;
285	}
286
287	/**
288	* Returns a description of all entries in the lookup table.
289	* Useful for debugging.
290	*/
291	std::string
292	LookupCore::stats()
293	{
294	#define NUM_COUNTERS 10
295	int i, count[NUM_COUNTERS], overflow;
296	double average, tmp;
297
298	//
299	// Compute a histogram of bucket usage.
300	//
301	for (i=0; i < NUM_COUNTERS; i++) {
302	count[i] = 0;
303	}
304	overflow = 0;
305	average = 0.0;
306	for (i=0; i < _numBuckets; i++) {
307	int j=0;
308	for (LookupEntryCore *entryPtr=_buckets[i];
309	entryPtr != NULL;
310	entryPtr = entryPtr->nextPtr) {
311	j++;
312	}
313	if (j < NUM_COUNTERS) {
314	count[j]++;
315	} else {
316	overflow++;
317	}
318	tmp = j;
319	average += 0.5(tmp+1.0)(tmp/_numEntries);
320	}
321
322	//
323	// Send back the histogram and other stats.
324	//
325	std::ostringstream result;
326	result << _numEntries << " entries in lookup table, ";
327	result << _numBuckets << " buckets" << std::endl;
328
329	for (i=0; i < NUM_COUNTERS; i++) {
330	result << "number of buckets with " << i
331	<< " entries: " << count[i] << std::endl;
332	}
333	result << "number of buckets with " << NUM_COUNTERS
334	<< " or more entries: " << overflow << std::endl;
335	result << "average search distance for entry: " << average << std::endl;
336	return result.str();
337	}
338
339	/**
340	* Invoked when the ratio of entries to hash buckets becomes too
341	* large. It creates a new layout with a larger bucket array
342	* and moves all of the entries into the new table.
343	*/
344	void
345	LookupCore::_rebuildBuckets()
346	{
347	int oldSize = _numBuckets;
348	LookupEntryCore** oldBuckets = _buckets;
349
350	//
351	// Allocate and initialize the new bucket array, and set up
352	// hashing constants for new array size.
353	//
354	_numBuckets *= 4;
355	_buckets = new (LookupEntryCore*)[_numBuckets];
356	LookupEntryCore **newChainPtr = _buckets;
357	for (int count=_numBuckets; count > 0; count--, newChainPtr++) {
358	*newChainPtr = NULL;
359	}
360	_rebuildSize *= 4;
361	_downShift -= 2;
362	_mask = (_mask << 2) + 3;
363
364	//
365	// Rehash all of the existing entries into the new bucket array.
366	//
367	for (LookupEntryCore** oldChainPtr=oldBuckets;
368	oldSize > 0; oldSize--, oldChainPtr++) {
369
370	for (LookupEntryCore* hPtr=*oldChainPtr;
371	hPtr != NULL; hPtr=*oldChainPtr) {
372
373	*oldChainPtr = hPtr->nextPtr;
374	unsigned int index = _hashIndex((void*)hPtr->key.string);
375	hPtr->bucketPtr = &(_buckets[index]);
376	hPtr->nextPtr = *hPtr->bucketPtr;
377	*hPtr->bucketPtr = hPtr;
378	}
379	}
380
381	//
382	// Free up the old bucket array, if it was dynamically allocated.
383	//
384	if (oldBuckets != _staticBuckets) {
385	delete [] oldBuckets;
386	}
387	}
388
389	/**
390	* Computes the hash value for the specified key.
391	*/
392	unsigned int
393	LookupCore::_hashIndex(void* key)
394	{
395	unsigned int result = 0;
396	if (_keySize == 0) {
397	char* string = (char*)key;
398	/*
399	* I tried a zillion different hash functions and asked many other
400	* people for advice. Many people had their own favorite functions,
401	* all different, but no-one had much idea why they were good ones.
402	* I chose the one below (multiply by 9 and add new character)
403	* because of the following reasons:
404	*
405	* 1. Multiplying by 10 is perfect for keys that are decimal strings,
406	* and multiplying by 9 is just about as good.
407	* 2. Times-9 is (shift-left-3) plus (old). This means that each
408	* character's bits hang around in the low-order bits of the
409	* hash value for ever, plus they spread fairly rapidly up to
410	* the high-order bits to fill out the hash value. This seems
411	* works well both for decimal and non-decimal strings.
412	*/
413	while (1) {
414	int c = *string;
415	string++;
416	if (c == 0) {
417	break;
418	}
419	result += (result<<3) + c;
420	}
421	result &= _mask;
422	} else {
423	int iPtr = (int)key;
424	for (int count=_keySize; count > 0; count--, iPtr++) {
425	result += *iPtr;
426	}
427	result = (((((long)(result))*1103515245) >> _downShift) & _mask);
428	}
429	return result;
430	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/1.5/src/core2/Lookup.cpp @ 6036

Download in other formats: