now to profile

So I have my suffix array for the assignment built and tested, but now I need to profile and analyze it's performance... and it looks like I'll need to alter my binary tree to make a significant comparison, which is part of the study drill in the assignment. 

The original search tree only looks for an exact match to a given key, so for it to be comparable to my suffix array, it would need to look for the same things... longest, shortest, all...  which just seems crazy.... build a whole other thing to compare it to, but otherwise they are different beasts.  Now if I put the suffix's in the binary tree... and use my match_loop in the tree.... I'm basically just putting the same code into my tree.... Using the tree to store my suffix's...

What can I do to make this interesting? 
How about.... 

def test_suffix():
for i in range(0, 100):
x = generate_string()
spam = SuffixArray_Binary(x)
spam.find_longest_binary('g')
spam.find_longest_binary(x)
spam.binary_search('b')
spam.binary_search(x)
spam.find_shortest_binary(x)
spam.find_shortest_binary('q')
spam.find_all_matches(x)
spam.find_all_matches('h')

That's my test for my suffix array....
What if I modified my tree to hold all 100 of the arrays i made.... and the tree can search any one of those arrays for the longest.... shortest.... all matches...  It can return something like
KEY = 'tacocat'  shortest = ['cat']
KEY = 'tabicat'  , shortest = ['cat']
KEY = 'catoninetales', shortest = ['catoninetales']

Then I'd add a test like this:

def test_binary_tree():
    # My binary tree has a median for the root_node
    eggs = BinaryTree('hjklmno')
    for i in range(0, 100):
       x=generate_string()
       # boolean to set if this is a suffix array holding tree here (in set_suffix  too the self in __init__?)
       # then a boolean check would have to be set in normal set also
       # Or remove normal set all together... make it a different tree.  Bare bone it... remove everything.
       # Yes.... remove everything. Just scratch it make a different tree. 
       eggs.set_suffix(x)
   
    # test these outside the for loop, to see how it compares looking up stuff when it is 
    # looking through 100 keys first.  It's a tree, so it shouldn't be any slower right?
    eggs.search_suffix_longest('g')
    eggs.search_suffix_shortest('q')
    eggs.search_suffix_all('x')



Which reminds me,  I didn't test it with spaces!!!! 

Here's the code for the SuffixArray_Binary.py   Gotta go run some tests for spaces now.
(( OH ,  and clean up that delete on original tree. Zed's made mine look like a horrific mess.))

class SuffixArray_Binary(object):
def __init__(self, astring):
self.alist = []
if type(astring) != str:
raise TypeError

for i in range(len(astring)):
self.alist.append(astring[i:])


def binary_search(self, astring):
""" find a matching string in the suffix's """
self.alist.sort()
length = len(self.alist) - 1
max_len = len(self.alist)
min_len = 0
count = 0
while min_len <= max_len:
midpoint = (max_len + min_len) // 2
count += 1
if midpoint > length  or midpoint < 0:
break

if self.alist[midpoint] == astring:
x = self.alist[midpoint]
print(count, x)
return x
elif self.alist[midpoint] < astring:
min_len = midpoint + 1
else:
max_len = midpoint - 1
return -1

def find_shortest_binary(self, astring):
""" find the shortest string that starts with given string (astring) in suffix's"""
self.alist.sort()
length = len(self.alist) - 1
max_len = len(self.alist)
min_len = 0
least = []
stop = None
while min_len <= max_len:
midpoint = (min_len + max_len)//2

if midpoint > length or midpoint < 0:
break
else:
akey = self.alist[midpoint]
if akey == astring:
print('found exact match')
return akey, midpoint
elif akey[0] == astring[0]:
if len(akey) >= len(astring):
x = 0
match = True
y = []
for x in range(0, len(astring)):
if akey[x] == astring[x]:
y.append(akey[x])
print(y)
pass

else:
match = False


if match == True:
stop = midpoint

if stop != None:
break

if akey > astring:
max_len = midpoint - 1
continue

else:
min_len = midpoint + 1
continue
else:

if akey > astring:
max_len = midpoint - 1
continue

else:
min_len = midpoint + 1
continue


elif akey > astring:
max_len = midpoint - 1
else:
min_len = midpoint + 1

if stop != None:
result = self.match_loop(self.alist, stop, astring)
#print(result)
result.sort(key=len)
#print(result)
return result[0]

else:
return -1

def match_loop(self, somelist, indexpoint, astring):
i = indexpoint
matchlist = []
x = somelist[i]
while x[0] == astring[0] and i < len(somelist):
akey = somelist[i]
match = True
if len(akey)>= len(astring):
for n in range(0, len(astring)):
if akey[n] == astring[n]:
pass
else:
match = False
if match:
matchlist.append(akey)
i += 1

i = indexpoint - 1
while x[0] == astring[0] and i >= 0:
akey = somelist[i]
match = True
if len(akey)>=len(astring):
for n in range(0, len(astring)):
if akey[n] == astring[n]:
print(akey, astring)
pass
else:
match = False
if match:
matchlist.append(akey)
i -= 1
return matchlist

def find_longest_binary(self, astring):
""" find the longest string that starts with given string (astring) in suffix's"""
self.alist.sort()
length = len(self.alist) - 1
max_len = len(self.alist)
min_len = 0
least = []
stop = None
while min_len <= max_len:
midpoint = (min_len + max_len)//2

if midpoint > length or midpoint < 0:
break
else:
akey = self.alist[midpoint]

if akey[0] == astring[0]:
if len(akey) >= len(astring):
x = 0
match = True
for x in range(0, len(astring)):
if akey[x] == astring[x]:
pass
else:
match = False


if match == True:
stop = midpoint

if stop != None:
break

if akey > astring:
max_len = midpoint - 1
continue

else:
min_len = midpoint + 1
continue
else:

if akey > astring:
max_len = midpoint - 1
continue

else:
min_len = midpoint + 1
continue


elif akey > astring:
max_len = midpoint - 1
else:
min_len = midpoint + 1

if stop != None:
result = self.match_loop(self.alist, stop, astring)
#print(result)
result.sort(key=len, reverse=True)
#print(result)
return result[0]

else:
return -1

def find_all_matches(self, astring):
""" find the longest string that starts with given string (astring) in suffix's"""
self.alist.sort()
length = len(self.alist) - 1
max_len = len(self.alist)
min_len = 0
least = []
stop = None
while min_len <= max_len:
midpoint = (min_len + max_len)//2

if midpoint > length or midpoint < 0:
break
else:
akey = self.alist[midpoint]

if akey[0] == astring[0]:
if len(akey) >= len(astring):
x = 0
match = True
for x in range(0, len(astring)):
if akey[x] == astring[x]:
pass
else:
match = False


if match == True:
stop = midpoint

if stop != None:
break

if akey > astring:
max_len = midpoint - 1
continue

else:
min_len = midpoint + 1
continue
else:

if akey > astring:
max_len = midpoint - 1
continue

else:
min_len = midpoint + 1
continue


elif akey > astring:
max_len = midpoint - 1
else:
min_len = midpoint + 1

if stop != None:
result = self.match_loop(self.alist, stop, astring)
return result

else:
return -1
def dump(self):
print(self.alist)



 

Comments

Popular posts from this blog

JavaScript Ascii animation with while loops and console.log

JavaScript and a Matrix

parenting, learning, and code