adding hash tables lab

2023-11-04 17:28:11 -04:00
parent 033ef43743
commit 497493f79c
3 changed files with 447 additions and 0 deletions
--- a/labs/11_hash_tables/README.md
+++ b/labs/11_hash_tables/README.md
@@ -0,0 +1,32 @@
+# Lab 11 — Hash Tables
+
+In this lab, you will experiment with our hash table implementation of a set. The key differences between the ds_set class (based on a binary search tree) and the ds_hashset class (based on a hash table, of course), are the performance of insert/find/erase: O(log n) vs. O(1), and the order that the elements are traversed using iterators: the set was in order, while the hashset is in no apparent order.
+
+Provided code: [ds_hashset.h](ds_hashset.h) and [test_ds_hashset.cpp](test_ds_hashset.cpp).
+
+## Checkpoint 1
+
+*estimate: 15-30 minutes*
+
+For the first part of this checkpoint, implement and test the *insert* function for the hashset. The *insert* function must first determine in which bin the new element belongs (using the hash function), and then insert the element into that bin but only if it isn’t there already. The *insert* function returns a pair containing an iterator pointing at the element, and a bool indicating whether it was successfully inserted (true) or already there (false).
+
+For the second part of this checkpoint, experiment with the hash function. In the provided code we include the implementation of a good hash function for strings. Are there any collisions for the small example? Now write some alternative hash functions. First, create a trivial hash function that is guaranteed to have many, many collisions. Then, create a hash function that is not terrible, but will unfortunately always place anagrams (words with the same letters, but rearranged) in the same bin. Test your alternate functions and be prepared to show the results to your TA.
+
+**To complete this checkpoint**: Show a TA your debugged implementation of *insert* and your experimentation with alternative hash functions.
+
+## Checkpoint 2
+
+*estimate: 20-40 minutes*
+
+Next, implement and test the *begin* function, which initializes the iteration through a hashset. Confirm that the elements in the set are visited in the same order they appear with the *print* function (which we have implemented for debugging purposes only).
+
+Finally, implement and test the *resize* function. This function is automatically called from the *insert* function when the set gets “too full”. This function should make a new top level vector structure of the requested size and copy all the data from the old structure to the new structure. Note that the elements will likely be shuffled around from the old structure to the new structure.
+
+**To complete this checkpoint**: Show a TA these additions and the test output.
+
+## Checkpoint 3
+
+*estimate: remainder of lab time*
+
+To be added.
+
--- a/labs/11_hash_tables/ds_hashset.h
+++ b/labs/11_hash_tables/ds_hashset.h
@@ -0,0 +1,260 @@
+#ifndef ds_hashset_h_
+#define ds_hashset_h_
+// The set class as a hash table instead of a binary search tree.  The
+// primary external difference between ds_set and ds_hashset is that
+// the iterators do not step through the hashset in any meaningful
+// order.  It is just the order imposed by the hash function.
+#include <iostream>
+#include <list>
+#include <string>
+#include <vector>
+#include <algorithm>
+
+// The ds_hashset is templated over both the type of key and the type
+// of the hash function, a function object.
+template < class KeyType, class HashFunc >
+class ds_hashset {
+private:
+  typedef typename std::list<KeyType>::iterator hash_list_itr;
+
+public:
+  // =================================================================
+  // THE ITERATOR CLASS
+  // Defined as a nested class and thus is not separately templated.
+
+  class iterator {
+  public:
+    friend class ds_hashset;   // allows access to private variables
+  private:
+    
+    // ITERATOR REPRESENTATION
+    ds_hashset* m_hs;          
+    int m_index;               // current index in the hash table
+    hash_list_itr m_list_itr;  // current iterator at the current index
+
+  private:
+    // private constructors for use by the ds_hashset only
+    iterator(ds_hashset * hs) : m_hs(hs), m_index(-1) {}
+    iterator(ds_hashset* hs, int index, hash_list_itr loc)
+      : m_hs(hs), m_index(index), m_list_itr(loc) {}
+
+  public:
+    // Ordinary constructors & assignment operator
+    iterator() : m_hs(0), m_index(-1)  {}
+    iterator(iterator const& itr)
+      : m_hs(itr.m_hs), m_index(itr.m_index), m_list_itr(itr.m_list_itr) {}
+    iterator&  operator=(const iterator& old) {
+      m_hs = old.m_hs;
+      m_index = old.m_index; 
+      m_list_itr = old.m_list_itr;
+      return *this;
+    }
+
+    // The dereference operator need only worry about the current
+    // list iterator, and does not need to check the current index.
+    const KeyType& operator*() const { return *m_list_itr; }
+
+    // The comparison operators must account for the list iterators
+    // being unassigned at the end.
+    friend bool operator== (const iterator& lft, const iterator& rgt)
+    { return lft.m_hs == rgt.m_hs && lft.m_index == rgt.m_index && 
+	(lft.m_index == -1 || lft.m_list_itr == rgt.m_list_itr); }
+    friend bool operator!= (const iterator& lft, const iterator& rgt)
+    { return lft.m_hs != rgt.m_hs || lft.m_index != rgt.m_index || 
+	(lft.m_index != -1 && lft.m_list_itr != rgt.m_list_itr); }
+    // increment and decrement
+    iterator& operator++() { 
+      this->next();
+      return *this;
+    }
+    iterator operator++(int) {
+      iterator temp(*this);
+      this->next();
+      return temp;
+    }
+    iterator & operator--() { 
+      this->prev();
+      return *this;
+    }
+    iterator operator--(int) {
+      iterator temp(*this);
+      this->prev();
+      return temp;
+    }
+
+  private:
+    // Find the next entry in the table
+    void next() {
+      ++ m_list_itr;  // next item in the list
+
+      // If we are at the end of this list
+      if (m_list_itr == m_hs->m_table[m_index].end()) {
+        // Find the next non-empty list in the table
+        for (++m_index; 
+             m_index < int(m_hs->m_table.size()) && m_hs->m_table[m_index].empty();
+             ++m_index) {}
+        
+        // If one is found, assign the m_list_itr to the start
+        if (m_index != int(m_hs->m_table.size()))
+          m_list_itr = m_hs->m_table[m_index].begin();
+        
+        // Otherwise, we are at the end
+        else
+          m_index = -1;
+      }
+    }
+
+    // Find the previous entry in the table
+    void prev() {
+      // If we aren't at the start of the current list, just decrement
+      // the list iterator
+      if (m_list_itr != m_hs->m_table[m_index].begin())
+	m_list_itr -- ;
+
+      else {
+        // Otherwise, back down the table until the previous
+        // non-empty list in the table is found
+        for (--m_index; m_index >= 0 && m_hs->m_table[m_index].empty(); --m_index) {}
+
+        // Go to the last entry in the list.
+        m_list_itr = m_hs->m_table[m_index].begin();
+        hash_list_itr p = m_list_itr; ++p;
+        for (; p != m_hs->m_table[m_index].end(); ++p, ++m_list_itr) {}
+      }
+    }
+  };
+  // end of ITERATOR CLASS
+  // =================================================================
+private:
+  // =================================================================
+  // HASH SET REPRESENTATION
+  std::vector< std::list<KeyType> > m_table;  // actual table
+  HashFunc m_hash;                            // hash function
+  unsigned int m_size;                        // number of keys
+
+public:
+  // =================================================================
+  // HASH SET IMPLEMENTATION
+  
+  // Constructor for the table accepts the size of the table.  Default
+  // constructor for the hash function object is implicitly used.
+  ds_hashset(unsigned int init_size = 10) : m_table(init_size), m_size(0) {}
+  
+  // Copy constructor just uses the member function copy constructors.
+  ds_hashset(const ds_hashset<KeyType, HashFunc>& old) 
+    : m_table(old.m_table), m_size(old.m_size) {}
+
+  ~ds_hashset() {}
+
+  ds_hashset& operator=(const ds_hashset<KeyType,HashFunc>& old) {
+    if (&old != this) {
+      this->m_table = old.m_table;
+      this->m_size = old.m_size;
+      this->m_hash = old.m_hash;
+    }
+    return *this;
+  }
+
+  unsigned int size() const { return m_size; }
+
+
+  // Insert the key if it is not already there.
+  std::pair< iterator, bool > insert(KeyType const& key) {
+    const float LOAD_FRACTION_FOR_RESIZE = 1.25;
+
+    if (m_size >= LOAD_FRACTION_FOR_RESIZE * m_table.size())
+      this->resize_table(2*m_table.size()+1);
+
+    // Implement this function for Lab 11, Checkpoint 1
+
+
+
+
+
+
+
+
+
+
+
+
+
+  }
+
+  // Find the key, using hash function, indexing and list find
+  iterator find(const KeyType& key) {
+    unsigned int hash_value = m_hash(key);
+    unsigned int index = hash_value % m_table.size();
+    hash_list_itr p = std::find(m_table[index].begin(),
+				 m_table[index].end(), key);
+    if (p == m_table[index].end())
+      return this->end();
+    else
+      return iterator(this, index, p);
+  }
+  // Erase the key 
+  int erase(const KeyType& key) {
+    // Find the key and use the erase iterator function.
+    iterator p = find(key);
+    if (p == end())
+      return 0;
+    else {
+      erase(p);
+      return 1;
+    }
+  }
+
+  // Erase at the iterator
+  void erase(iterator p) {
+    m_table[ p.m_index ].erase(p.m_list_itr);
+  }
+
+  // Find the first entry in the table and create an associated iterator
+  iterator begin() {
+
+    // Implement this function for Lab 11, Checkpoint 2, Part 1
+
+
+
+
+
+
+
+  }
+
+  // Create an end iterator.
+  iterator end() {
+    iterator p(this);
+    p.m_index = -1;
+    return p;
+  }
+  
+  // A public print utility.
+  void print(std::ostream & ostr) {
+    for (unsigned int i=0; i<m_table.size(); ++i) {
+      ostr << i << ": ";
+      for (hash_list_itr p = m_table[i].begin(); p != m_table[i].end(); ++p)
+        ostr << ' ' << *p;
+      ostr << std::endl;
+    }
+  }
+
+private:
+  // resize the table with the same values but twice as many buckets
+  void resize_table(unsigned int new_size) {
+
+    // Implement this function for Lab 11, Checkpoint 2, Part 2
+
+
+
+
+
+
+
+
+
+
+
+  }
+};
+#endif
--- a/labs/11_hash_tables/test_ds_hashset.cpp
+++ b/labs/11_hash_tables/test_ds_hashset.cpp
@@ -0,0 +1,155 @@
+#include <iostream>
+#include <string>
+#include <utility>
+#include <cassert>
+
+#include "ds_hashset.h"
+
+
+// Wrapping a class around a function turns a function into a functor
+// (We'll talk about this more in Lecture 21.  You can just ignore
+// this wrapper part for now.)
+class hash_string_obj {
+public:
+
+  // ----------------------------------------------------------
+  // EXPERIMENT WITH THE HASH FUNCTION FOR CHECKPOINT 1, PART 2
+
+  unsigned int operator() ( const std::string& key ) const {
+    //  This implementation comes from 
+    //  http://www.partow.net/programming/hashfunctions/
+    //
+    //  This is a general-purpose, very good hash function for strings.
+    unsigned int hash = 1315423911;
+    for(unsigned int i = 0; i < key.length(); i++)
+      hash ^= ((hash << 5) + key[i] + (hash >> 2));
+    return hash;
+  }   
+  
+};
+
+
+typedef ds_hashset<std::string, hash_string_obj> ds_hashset_type;
+
+
+int main() {
+
+  // ---------------------------------
+  // CODE TO TEST CHECKPOINT 1, PART 1
+  ds_hashset_type a;
+  ds_hashset_type set1;
+  std::pair< ds_hashset_type::iterator, bool > insert_result;
+
+  std::string to_insert = std::string("hello");
+  insert_result = set1.insert( to_insert );
+  assert( insert_result.second );
+
+  insert_result = set1.insert( std::string("good-bye") );
+  assert( insert_result.second );
+
+  insert_result = set1.insert( std::string("friend") );
+  assert( insert_result.second );
+
+  insert_result = set1.insert( std::string("abc") );
+  assert( insert_result.second );
+
+  insert_result = set1.insert( std::string("puppy") );
+  assert( insert_result.second );
+
+  insert_result = set1.insert( std::string("zebra") );
+  assert( insert_result.second );
+
+  insert_result = set1.insert( std::string("daddy") );
+  assert( insert_result.second );
+
+  insert_result = set1.insert( std::string("puppy") );
+  assert( !insert_result.second && * insert_result.first == std::string("puppy") );
+
+  std::cout << "The set size is " << set1.size() << '\n'
+	    << "Here is the table: \n";
+  set1.print( std::cout );
+
+  ds_hashset_type::iterator p;
+  p = set1.find( "foo" );
+  if ( p == set1.end() )
+    std::cout << "\"foo\" is not in the set\n";
+  else
+    std::cout << "\"foo\" is in the set\n"
+	      << "The iterator points to " << *p << std::endl;
+
+  p = set1.find("puppy");
+  if ( p == set1.end() )
+    std::cout << "\"puppy\" is not in the set\n";
+  else
+    std::cout << "\"puppy\" is in the set\n"
+	      << "The iterator points to " << *p << std::endl;
+
+  p = set1.find("daddy");
+  if ( p == set1.end() )
+    std::cout << "\"daddy\" is not in the set\n";
+  else
+    std::cout << "\"daddy\" is in the set\n"
+	      << "The iterator points to " << *p << std::endl;
+
+
+  // ---------------------------------
+  // CODE TO TEST CHECKPOINT 2, PART 1
+  /*
+  p = set1.begin();
+  std::cout << "\nHere is the result of iterating: \n";
+  for ( p = set1.begin(); p != set1.end(); ++p )
+    std::cout << *p << '\n';
+  */
+
+
+  // ---------------------------------
+  // CODE TO TEST CHECKPOINT 2, PART 2
+  /*
+  ds_hashset_type set2( set1 );
+  std::cout << "set1.size() = " << set1.size() << ", set2.size() = " << set2.size() << std::endl;
+
+  //  Now add more stuff to set2.  This should trigger a resize given the default settings.
+  insert_result = set2.insert( std::string("ardvark") );
+  assert( insert_result.second );
+  insert_result = set2.insert( std::string("baseball") );
+  assert( insert_result.second );
+  insert_result = set2.insert( std::string("football") );
+  assert( insert_result.second );
+  insert_result = set2.insert( std::string("gymnastics") );
+  assert( insert_result.second );
+  insert_result = set2.insert( std::string("dance") );
+  assert( insert_result.second );
+  insert_result = set2.insert( std::string("swimming") );
+  assert( insert_result.second );
+  insert_result = set2.insert( std::string("track") );
+  assert( insert_result.second );
+
+  std::cout << "\nAfter seven more inserts:\n"
+	    << "set1.size() = " << set1.size() << ", set2.size() = " << set2.size() << "\n"
+	    << "\nThe contents of set2:" << std::endl;
+  set2.print(std::cout);
+  std::cout << "The results of iterating:\n";
+  for ( p = set2.begin(); p != set2.end(); ++p )
+    std::cout << *p << '\n';
+  */
+
+  // ---------------
+  // OTHER TEST CODE
+  /*  
+  //  Now test erase
+  int num = set2.erase( std::string("hello") );
+  std::cout << "Tried erase \"hello\" and got num (should be 1) = " << num << std::endl;
+  num = set2.erase( std::string("abc") );
+  std::cout << "Tried erase \"abc\" and got num (should be 1) = " << num << std::endl;
+  num = set2.erase( std::string("hello") );
+  std::cout << "Tried erase \"hello\" and got num (should be 0) = " << num << std::endl;
+  num = set2.erase( std::string("football") );
+  std::cout << "Tried erase \"football\" and got num (should be 1) = " << num << std::endl;
+  num = set2.erase( std::string("friend") );
+  std::cout << "Tried erase \"friend\" and got num (should be 1) = " << num
+	    << "\nHere are the final contents of set2:" << std::endl;
+  set2.print(std::cout);
+  */
+
+  return 0;
+}