// // Note: This implementation is designed for a hash table without deletions. // // #ifndef __OPENHASHTABLE_H__ #define __OPENHASHTABLE_H__ #include "genlib.h" // (Steve) #include using namespace std; // For mlock -- available on IRIX, Linux, SunOS, FreeBSD, but not HP-UX // Note that mlock only works on Linux if root is calling it #ifndef __OS_HP_UX__ #ifndef WIN32 #include #include #endif #endif #define DOUBLE_HASH // This is for g++ to tell it that if external templates flag set, don't go // generating code for this stuff except in the file that says : // #pragma implementation "openhashtable.h" // // #pragma interface /////////////////////////////////////////////////////////////////////// // The templates have Data before Key because that's how Jordan's version // had them, so that some functions could have key as an optional argument. // I think the reverse is more natural, though. Change it? /////////////////////////////////////////////////////////////////////// template struct DataKeyPair { TKey Key; TData Data; }; /////////////////////////////////////////////////////////////////////// // Hash table entries template class COpenHashEntry { public: inline COpenHashEntry(); inline ~COpenHashEntry(); inline VOID Init(); inline BOOL Valid(); inline TData *Data(); inline TKey Key(); // Did this with ptrs to pass by reference but could use & since this is C++ inline VOID Set(TData*, TKey); inline VOID Reset(TData*, TKey); #ifndef __OS_LINUX__ inline VOID Display(); #endif private: TKey m_Key; TData m_Data; BOOL m_bValid; }; template inline COpenHashEntry::COpenHashEntry() { m_bValid = FALSE; } template inline COpenHashEntry::~COpenHashEntry() { } template inline VOID COpenHashEntry::Init() { m_bValid = FALSE; } template inline VOID COpenHashEntry::Set(TData *pData, TKey key) { ASSERT (!m_bValid); m_Key = key; m_Data = *pData; // TData is required to have an assignment op m_bValid = TRUE; } template inline VOID COpenHashEntry::Reset(TData *pData, TKey key) { ASSERT (m_bValid); m_Key = key; m_Data = *pData; // TData is required to have an assignment op } #ifndef __OS_LINUX__ template inline VOID COpenHashEntry::Display() { if (!m_bValid) cout << "empty" << "\n"; else cout << "Key\t" << m_Key << ";\tData " << m_Data << "\n"; } #endif template inline BOOL COpenHashEntry::Valid() { if (m_bValid) return TRUE; else return FALSE; } template inline TData *COpenHashEntry::Data() { if (m_bValid) return &m_Data; else return NULL; } template inline TKey COpenHashEntry::Key() { ASSERT (m_bValid); return m_Key; } /////////////////////////////////////////////////////////////////////// enum action{IGNORE, WARN, ERROR, REPLACE}; // Couldn't see why destructor & InsertDuplicate were virtual, so now // they aren't to avoid overhead of virtual pointer template class COpenHashTable { public: inline COpenHashTable(); inline ~COpenHashTable(); inline VOID Init(UINT_32 iSize = 127); protected: inline VOID Uninit(); public: //inserts single entry into HashTable inline VOID Insert(TData *pData, TKey key, action dupAction = IGNORE); //reads array & inserts into HashTable inline VOID InsertMulti(DataKeyPair *array, UINT_32 items, action dupAction = IGNORE); inline VOID lock(); inline VOID unlock(); inline TData *Peek(const TKey &key); inline VOID Display(); #ifndef NDEBUG inline COpenHashEntry *GetData() {return m_pData;}; #endif protected: inline VOID InsertDuplicate(COpenHashEntry *pOldData, TData *pData, TKey key, action dupAction = IGNORE); inline UINT_32 FindPos(TKey key); /* #ifndef NDEBUG inline UINT_32 FindPeekPos(TKey key); #endif */ inline VOID Rehash(); protected: COpenHashEntry *m_pData; UINT_32 m_uiNumLocs; UINT_32 m_uiFullLocs; #ifdef PRINTSTATS BOOL m_bPeekOnly; UINT_32 m_uiNumHits; UINT_32 m_uiNumMisses; #endif }; //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // Class Implementations // inline BOOL IsPrime(UINT_32 n) //Requires: int n > 0. //Effect: returns TRUE if c is a prime number. // (could skip every 3rd odd if check for n%3 and n%5 at start) { UINT_32 i; // Check if even if ( !(n & 0x1) ) { return FALSE; } for ( i=3; i*i <= n; i += 2 ) { if ( n % i == 0 ) { return FALSE; } } return TRUE; } inline UINT_32 GetNextPrime(UINT_32 n) //Requires: int n > 0. //Effect: returns the next prime number >= n. { if ( !(n & 0x1) ) { n++; } while ( !IsPrime(n) ) { n+=2; } return n; } ////////////////////////////////////////////////////////////////////////////// // COpenHashTable Class // template inline COpenHashTable::COpenHashTable() { m_uiFullLocs = 0; m_uiNumLocs = 0; m_pData = NULL; } template inline COpenHashTable::~COpenHashTable() { Uninit(); } template inline VOID COpenHashTable::Init(UINT_32 uiNumLocs/* = 127*/) { UINT_32 i; m_uiFullLocs = 0; //Currently, rehash counts on this being zero-ed. m_uiNumLocs = GetNextPrime(uiNumLocs); m_pData = new COpenHashEntry[m_uiNumLocs]; #ifdef PRINTSTATS m_bPeekOnly = FALSE; m_uiNumHits = 0; m_uiNumMisses = 0; #endif // not necessary if init just looks like the constructor for ( i = 0; i < m_uiNumLocs; i++ ) { m_pData[i].Init(); } } template inline VOID COpenHashTable::Uninit() { #ifdef PRINTSTATS printf("Freeing Table %ld of size %ld; %ld Hits and %ld Misses.\n", (UINT_32)this, m_uiNumLocs, m_uiNumHits,m_uiNumMisses); #endif if ( m_pData != NULL ) { delete []m_pData; m_pData = NULL; } } template inline VOID COpenHashTable:: Insert(TData *pData, TKey key, action dupAction) { ASSERT( m_pData != NULL && m_uiNumLocs > 0 ); UINT_32 h; #ifdef PRINTSTATS m_bPeekOnly = FALSE; #endif h = FindPos(key); if (!(m_pData[h].Valid())) { m_pData[h].Set(pData, key); m_uiFullLocs++; if ( (5*m_uiFullLocs) >= (4*m_uiNumLocs) ) { Rehash(); } } // otherwise it was a duplicate else { InsertDuplicate(&(m_pData[h]), pData, key, dupAction); } } template inline VOID COpenHashTable::lock() { #ifndef __OS_HP_UX__ #ifndef WIN32 int iRtnVal; UINT_64 uiSize = (UINT_64)m_uiNumLocs * (UINT_64)sizeof(COpenHashEntry); iRtnVal = mlock ((void *)m_pData, (size_t)uiSize); // (size_t)(m_uiNumLocs*sizeof(COpenHashEntry))); // Note that mlock only works on Linux if root is calling if (iRtnVal != 0) printf("mlock of size %ld failed.\n", (UINT_32)uiSize); #ifndef NDEBUG else printf("mlock of size %ld succeeded.\n", (UINT_32)uiSize); #endif #endif #endif } template inline VOID COpenHashTable::unlock() { #ifndef __OS_HP_UX__ #ifndef WIN32 int iRtnVal; iRtnVal = munlock((void *)m_pData, (size_t)(m_uiNumLocs*sizeof(COpenHashEntry))); if (iRtnVal != 0) printf("munlock failed.\n"); #endif #endif } template inline VOID COpenHashTable:: InsertDuplicate(COpenHashEntry *pOldData, TData *pData, TKey key, action dupAction) { switch (dupAction) { case ERROR: cout <<"Error: duplicate key in hash table; exiting"; exit(1); // exits on error so no break case REPLACE: pOldData->Reset(pData, key); break; case WARN: #ifndef __OS_LINUX__ cout << "Key" << key << " already in hash table; new data value ignored\n"; #else cout << "Key already in hash table; new data value ignored\n"; #endif case IGNORE: ; } } template inline VOID COpenHashTable:: InsertMulti(DataKeyPair *array, UINT_32 items, action dupAction) { UINT_32 i; // This is fast and dirty, with no checking to make sure data is valid. for (i = 0; i < items; i++) { Insert(&(array[i].Data), array[i].Key, dupAction); } } template inline VOID COpenHashTable::Rehash() { COpenHashEntry *pOldData; UINT_32 i, uiOldNumLocs; ASSERT( m_pData != NULL && m_uiNumLocs > 0 ); ASSERT(m_uiNumLocs < (MAX_UINT_32 / 2) ); #ifdef PRINTSTATS printf("Rehashing Table %ld of size %ld; %ld Hits and %ld Misses.\n", (UINT_32)this, m_uiNumLocs, m_uiNumHits, m_uiNumMisses); m_uiNumHits = 0; m_uiNumMisses = 0; #endif pOldData = m_pData; uiOldNumLocs = m_uiNumLocs; Init(2*m_uiNumLocs); for ( i = 0; i < uiOldNumLocs; i++ ) { if (pOldData[i].Valid()) { Insert (pOldData[i].Data(), pOldData[i].Key()); } } delete []pOldData; } // Double hashing template inline UINT_32 COpenHashTable::FindPos(TKey key) { ASSERT( m_pData != NULL && m_uiNumLocs > 0 ); extern UINT_32 Hash(TKey&); // must be defined by user UINT_32 orig_h, h, hash, hash2 = 0; #ifndef DOUBLE_HASH UINT_32 diff = 1; #endif #ifdef PRINTSTATS UINT_32 uiNumMisses = 0; #endif hash = Hash(key); orig_h = h = hash % m_uiNumLocs; do { if ( (!(m_pData[h].Valid())) || (key == m_pData[h].Key())) { #ifdef PRINTSTATS // check if it's an insertion if ((!m_bPeekOnly) && (!(m_pData[h].Valid()))) { m_uiNumHits++; m_uiNumMisses += uiNumMisses; } #endif return h; } #ifdef PRINTSTATS uiNumMisses++; #endif #ifdef DOUBLE_HASH if (hash2 == 0) hash2 = 1 + (hash % (m_uiNumLocs -1)); // h = (h + hash2)% m_uiNumLocs; h += hash2; #else h += diff; diff += 2; //quadratic probing: offset by 1,4,9,16,25, etc from original #endif // take care of "mod-ing" the index if (h >= m_uiNumLocs) { h -= m_uiNumLocs; // usually that's enough; otherwise do real mod if (h >= m_uiNumLocs) { h %= m_uiNumLocs; } } } while (h != orig_h); // if we got here, we cycled back to the starting index without finding a // match or an open slot - expand the table and try again. // We shouldn't get here since we rehash the table when it's 80% full and // our probe sequences should look at every slot. ASSERT(FALSE); Rehash(); return FindPos(key); // this "recursion" should only happen once } /* #ifndef NDEBUG template inline UINT_32 COpenHashTable::FindPeekPos(TKey key) { ASSERT( m_pData != NULL && m_uiNumLocs > 0 ); extern UINT_32 Hash(TKey&); // must be defined by user UINT_32 h, hash, hash2; UINT_32 orig_h; hash = Hash(key); hash2 = (hash % (m_uiNumLocs -1)) + 1; orig_h = h = hash % m_uiNumLocs; do { if ( (!(m_pData[h].Valid())) || (key == m_pData[h].Key())) { return h; } // h = (h + 1 + hash2)% m_uiNumLocs; h += hash2; if (h >= m_uiNumLocs) { h -= m_uiNumLocs; } } while (h != orig_h); // if we got here, we cycled back to the starting index without finding a // match or an open slot - expand the table and try again. Rehash(); return FindPos(key); // this "recursion" should only happen once } #endif */ /* // Quadratic probing // adapted from Model's book "Data Structures, Data Abstraction...Using C++" // template inline UINT_32 COpenHashTable::FindPos(TKey key) { ASSERT( m_pData != NULL && m_uiNumLocs > 0 ); extern UINT_32 Hash(TKey&); // must be defined by user UINT_32 h; UINT_32 orig_h, diff; #ifndef NDEBUG UINT_32 uiNumMisses = 0; #endif orig_h = h = Hash(key) % m_uiNumLocs; diff = 1; do { // if ((!m_pData[h]) || (!m_pData[h].Valid()) || Equal(key, m_pData[h].Key())) if ( (!(m_pData[h].Valid())) || (key == m_pData[h].Key())) { #ifdef PRINTSTATS // if (!m_bPeekOnly) { // check if it's an insertion // if (!(m_pData[h].Valid())) { m_uiNumHits++; m_uiNumMisses += uiNumMisses; // } // } #endif return h; } #ifdef PRINTSTATS // if (!m_bPeekOnly) { uiNumMisses++; // } #endif h += diff; diff += 2; //quadratic probing: offset by 1,4,9,16,25, etc from original // take care of "mod-ing" the index if (h >= m_uiNumLocs) { h -= m_uiNumLocs; // usually that's enough; otherwise do real mod if (h >= m_uiNumLocs) { h %= m_uiNumLocs; } } } while (h != orig_h); // if we got here, we cycled back to the starting index without finding a // match or an open slot - expand the table and try again. // We shouldn't get here since we rehash the table when it's 80% full and // our probe sequences should look at every slot. ASSERT(FALSE); Rehash(); return FindPos(key); // this "recursion" should only happen once } */ /* #ifndef NDEBUG template inline UINT_32 COpenHashTable::FindPeekPos(TKey key) { ASSERT( m_pData != NULL && m_uiNumLocs > 0 ); extern UINT_32 Hash(TKey&); // must be defined by user UINT_32 h; UINT_32 orig_h, diff; orig_h = h = Hash(key) % m_uiNumLocs; diff = 1; do { // if ((!m_pData[h]) || (!m_pData[h].Valid()) || Equal(key, m_pData[h].Key())) if ( (!(m_pData[h].Valid())) || (key == m_pData[h].Key())) { return h; } h += diff; diff += 2; //quadratic probing // take care of "mod-ing" the index if (h >= m_uiNumLocs) { h -= m_uiNumLocs; if (h >= m_uiNumLocs) { h %= m_uiNumLocs; } } } while (h != orig_h); // if we got here, we cycled back to the starting index without finding a // match or an open slot - expand the table and try again. Rehash(); return FindPos(key); // this "recursion" should only happen once } #endif */ template inline TData *COpenHashTable::Peek(const TKey &key) { // ASSERT( m_pData != NULL && m_uiNumLocs > 0 ); if ( (m_pData == NULL) || (m_uiNumLocs == 0) ) return(NULL); TData *pResult; UINT_32 h; #ifdef PRINTSTATS m_bPeekOnly = TRUE; // h = FindPeekPos(key); #endif h = FindPos(key); #ifdef PRINTSTATS m_bPeekOnly = FALSE; #endif pResult = m_pData[h].Data(); return pResult; } template inline VOID COpenHashTable::Display() { ASSERT( m_pData != NULL && m_uiNumLocs > 0 ); UINT_32 i; printf("\n"); for ( i = 0; i < m_uiNumLocs; i++) { cout << "[" << i << "]->"; m_pData[i].Display(); } } #endif // __OPENHASHTABLE_H__