Question
Ordered Hashtable Overview For this assignment you will update and complete the implementation of the hashtable data structure presented in class, which exposes an API
Ordered Hashtable
Overview
For this assignment you will update and complete the implementation of the hashtable data structure presented in class, which exposes an API mirroring that of the built-in Python dict. When iterating over its contents (supported by the __iter__, keys, values, and items methods), your updated implementation will also reflect the order in which key/value pairs were originally inserted into the hashtable. This will require that you implement the two-tiered list system described during lecture.
The operations you will implement are listed alongside their descriptions below (h refers to a hashtable):
Operation | Description |
---|---|
h[k] = v | If h does not contain key k, a new k?v mapping is added, else the value for key k is updated to v. |
h[k] | If h contains key k, the corresponding value is returned, else a KeyError is raised. |
del h[k] | If h contains key k, it is removed along with its value, else a KeyError is raised. Note that if k is re-inserted at some later point it is considered a new key (for ordering purposes). |
k in h | Returns True if key k is in h. |
len(h) | Returns the number of keys in h. |
iter(h) | Returns an iterator over all the keys in h, in the order they were added. |
h.keys() | (Same as above) |
h.values() | Returns an iterator over all the values in h, in the order they were added. |
h.items() | Returns an iterator over all the key/value pairs (as tuples) in h, in the order they were added. |
Your hashtable will be provided with the initial number of buckets on creation (i.e., in __init__); your implementation must heed this value, as there may be performance ramifications if it does not.
In [ ]:
#
class OrderedHashtable:
class Node:
"""This class is used to create nodes in the singly linked "chains" in
each hashtable bucket."""
def __init__(self, index, next=None):
# don't rename the following attributes!
self.index = index
self.next = next
def __init__(self, n_buckets=1000):
# the following two variables should be used to implement the "two-tiered"
# ordered hashtable described in class -- don't rename them!
self.indices = [None] * n_buckets
self.entries = []
self.count = 0
def __getitem__(self, key):
bucket_idx = hash(key) % len(self.buckets)
b = self.buckets[bucket_idx]
while b:
if b.key == key:
return b.val
b = b.next
else:
raise KeyError()
def __setitem__(self, key, val):
pass
def __delitem__(self, key):
pass
def __contains__(self, key):
try:
_ = self[key]
return True
except:
return False
def __len__(self):
return self.count
def __iter__(self):
pass
def keys(self):
return iter(self)
def values(self):
pass
def items(self):
pass
def __str__(self):
return '{ ' + ', '.join(str(k) + ': ' + str(v) for k, v in self.items()) + ' }'
def __repr__(self):
return str(self)
#
In [ ]:
# (3 tests) Short tests
?
from unittest import TestCase
import random
?
tc = TestCase()
?
ht = OrderedHashtable(2)
?
for k, v in (('batman', 'bruce wayne'), ('superman', 'clark kent'), ('spiderman', 'peter parker')):
ht[k] = v
tc.assertEqual(len(ht), 3)
?
tc.assertEqual(ht['superman'], 'clark kent')
?
tc.assertTrue('spiderman' in ht)
tc.assertFalse('iron man' in ht)
?
with tc.assertRaises(KeyError):
ht['iron man']
In [ ]:
# (3 points) Basic tests (insertion, fetch, count, chain-lengths)
?
from unittest import TestCase
import random
?
tc = TestCase()
?
class MyInt(int):
def __hash__(self):
"""MyInts hash to themselves already current Python default,
but just to ensure consistency."""
return self
def ll_len(l):
"""Returns the length of a linked list with head `l` (assuming no sentinel)"""
c = 0
while l:
c += 1
l = l.next
return c
ht = OrderedHashtable(10)
for i in range(25):
ht[MyInt(i)] = i*2
?
tc.assertEqual(len(ht), 25)
?
for i in range(5):
tc.assertEqual(ll_len(ht.indices[i]), 3)
for i in range(5, 10):
tc.assertEqual(ll_len(ht.indices[i]), 2)
?
for i in range(25):
tc.assertTrue(MyInt(i) in ht)
tc.assertEqual(ht[MyInt(i)], i*2)
In [ ]:
# (3 points) Update testing
?
from unittest import TestCase
import random
?
tc = TestCase()
?
ht = OrderedHashtable(100)
d = {}
?
for i in range(100):
k, v = str(i), str(i*2)
d[k] = v
ht[k] = v
for j in range(0, 100, 2):
k, v = str(i), str(i*3)
d[k] = v
ht[k] = v
for j in range(0, 100, 4):
k, v = str(i), str(i*4)
d[k] = v
ht[k] = v
for i in range(100):
tc.assertTrue(k in ht)
tc.assertEqual(d[k], ht[k])
In [ ]:
# (3 points) Deletion testing
?
from unittest import TestCase
import random
?
tc = TestCase()
?
ht = OrderedHashtable(100)
d = {}
?
for i in range(100):
k, v = str(i), str(random.randrange(10000000, 99999999))
d[k] = v
ht[k] = v
?
for _ in range(50):
k = str(random.randrange(100))
if k in d:
del d[k]
del ht[k]
?
tc.assertEqual(len(ht), len(d))
?
for k,v in ht.items():
tc.assertEqual(d[k], v)
In [ ]:
# (4 points) Iteration order testing
?
from unittest import TestCase
import random
?
tc = TestCase()
?
ht = OrderedHashtable(1000)
l = [str(i) for i in range(0, 1000)]
random.shuffle(l)
?
for x in l:
ht[x] = x
?
for _ in range(50):
idx_to_del = random.randrange(len(l))
val_to_del = l[idx_to_del]
del ht[val_to_del]
del l[idx_to_del]
if random.randrange(2) == 0:
l.append(val_to_del)
ht[val_to_del] = val_to_del
?
for x, y in zip(l, ht):
tc.assertEqual(x, y)
In [ ]:
# (4 points) Stress testing
?
from unittest import TestCase
from time import time
import random
?
tc = TestCase()
?
ht = OrderedHashtable(100000)
# ht = FakeHT()
d = {}
?
start = time()
?
for _ in range(100000):
k, v = str(random.randrange(100000)), str(random.randrange(10000000, 99999999))
d[k] = v
ht[k] = v
for k,v in d.items():
tc.assertTrue(k in ht)
tc.assertEqual(d[k], ht[k])
end = time()
tc.assertLess(end-start, 2.0, 'Your implementation ran too slow!')
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started