### Binary String Tree

I decided to go with using python's hash to get the strings into the binary tree. While I would have loved to get them in alphabetically, it was just a problem that was out of my realm. Hash was right there... already working and ready for use. Maybe I'll come back to it and try to tool with it later.

So I just ran tests on the 'get', 'set' and 'delete' of this. I was using the dump without problems, and the count in the pytest without issue. It's definitely not perfect or anything worth using I'm sure, but learning. That's the goal.

It's basically just the Binary tree with a few tweaks. The Node has a new 'string_value' to hold the hash number we create with the method 'ConvertString', we use that to set, delete and get the nodes.

the key and value don't do anything in those methods, they just get placed, and hold their places.

I also still use my self.median.... though I think if I'm using hash, it may be completely useless. The numbers are all arbitrary and range in a semi-random huge dramatic scale, so.... hence the reason I really really wanted to find a way to make an alphabetical tool to put them into the tree. Whatever the hash picks for my median.. my tree can end up totally defunct... and useless. It might be totally imbalanced and lopsided as a result of what random algo seed it generates for the root_node. If it say picks -28494949478937592 and most my other strings are 38938782392934283 the tree searches are going to be crap.

But, I can come back and try to keep fixing it. I'm not sure why no one else see's this as a problem. Maybe I'm just not finding the right search term.... I don't know why I can't seem to find what I'm looking for on Google to answer any of the questions I have.

Maybe... The point is, binary search tree's for strings in general are pointless? I don't know.... I can't find answers. On to suffix arrays.

Here's my binary string tree.

from TreeGraphical import PrintTree

class

def __init__(self, key, value, left, right):

self.key = key

self.value = value

self.left = left

self.right = right

self.string_value = None

def __repr__(self):

nleft = self.left and self.left.value or None

nright = self.right and self.right.value or None

return f"{nleft} <--- ( {self.key} : {self.value} ) ---> {nright}"

class

def __init__(self, median):

self.median = median

self.root_node = BinaryTreeStringNode(self.median, 'binary_root_node', None, None)

self._size = 0

if type(self.median) == str:

self.median = ConvertString(self.median) + 0.5

else:

print("ERROR Tree accepts strings ONLY.")

raise TypeError

def set(self, akey, value):

if type(akey) == str:

numeric = ConvertString(akey)

if numeric < self.median:

east = self.root_node.left

if not east:

# [None<---- newnode _---> None][<--left-- root --right-->]

newnode = BinaryTreeStringNode(akey, value, None, None)

newnode.string_value = numeric

self.root_node.left = newnode

self._size += 1

else:

while east:

if numeric == east.string_value:

east.value = value

break

elif east.string_value < numeric:

if east.right == None:

newnode = BinaryTreeStringNode(akey, value, None, None)

newnode.string_value = numeric

east.right = newnode

self._size += 1

break

else:

east = east.right

else:

if east.left == None:

newnode = BinaryTreeStringNode(akey, value, None, None)

newnode.string_value = numeric

east.left = newnode

self._size += 1

break

else:

east = east.left

if numeric > self.median:

west = self.root_node.right

if not west:

# [<---- root --right-->][None<---- newnode _---> None]

newnode = BinaryTreeStringNode(akey, value, None, None)

newnode.string_value = numeric

self.root_node.right = newnode

self._size += 1

else:

while west:

########################

if numeric == west.string_value:

west.value = value

break

elif west.string_value < numeric:

if west.right == None:

newnode = BinaryTreeStringNode(akey, value, None, None)

newnode.string_value = numeric

west.right = newnode

self._size += 1

break

else:

west = west.right

else:

if west.left == None:

newnode = BinaryTreeStringNode(akey, value, None, None)

newnode.string_value = numeric

west.left = newnode

self._size += 1

break

else:

west = west.left

else:

print("ERROR: This tree accepts STRINGS only.")

return None

def get(self, akey):

if type(akey) == str:

numeric = ConvertString(akey)

node = self.root_node

if numeric < self.median:

node = node.left

while node:

if numeric == node.string_value:

return node.value

else:

if numeric < node.string_value:

node = node.left

else:

node = node.right

else:

node = node.right

while node:

if numeric == node.string_value:

return node.value

else:

if numeric < node.string_value:

node = node.left

else:

node = node.right

# this return may not be necessary.

return None

else:

return None

def delete(self, akey):

if type(akey) == str:

numeric = ConvertString(akey)

if numeric < self.median:

east = self.root_node.left

if not east:

# do nothing

return None

else:

root = True

temp = None

#print(f" NUMERIC = {akey}")

while east:

left_child = east.left

right_child = east.right

if east.string_value == numeric:

# find the first center child/leaf that is none (left or right, I chose left)

# make that the holder for the oposing child, and the one you didn't use becomes parent node

# center branches are @east.left go right ---> (x) <--@east.right go left

if not left_child and not right_child:

if not root:

temp_left = temp.left

temp_right = temp.right

done = False

while not done:

if temp_right:

if temp_right.string_value == numeric:

temp.right = None

self._size -= 1

done = True

elif temp_left:

if temp_left.string_value == numeric:

temp.left = None

print("temp.left = None")

print(temp)

self._size -= 1

done = True

else:

print("OPPS")

break

else:

self.root_node.left = None

self._size -= 1

break

else:

# it has a child link

left_child = east.left

right_child = east.right

if left_child and right_child:

new_left_child = east.left

# because parent node dictates the greatest value of

# the left's, right branch; x in branch will not be greater then the parent...

# we can be sure that however far down the branch we go, the right_child

# which is greater then the parent_node, can go after the preceeding nodes in the chain. It will be #greater then them. Searching for numbers larger then right.key will still lead to the right_child, but #it may be a longer path to it and it's child nodes

while new_left_child:

if new_left_child.right == None:

new_left_child.right = right_child

east.key = left_child.key

east.value = left_child.value

east.left = left_child.left

east.right = left_child.right

east.string_value = left_child.string_value

self._size -= 1

break

else:

new_left_child = new_left_child.right

break

elif left_child and not right_child:

east.key = left_child.key

east.value = left_child.value

east.left = left_child.left

east.right = left_child.right

east.string_value = left_child.string_value

self._size -= 1

break

elif right_child and not left_child:

east.key = right_child.key

east.value = right_child.value

east.left = right_child.left

east.right = right_child.right

east.string_value = right_child.string_value

self._size -= 1

break

else:

print("error in while loop for :while east: delete(akey)")

break

elif numeric > east.string_value:

temp = east

root = False

east = east.right

else:

temp = east

root = False

east = east.left

else:

west = self.root_node.right

root = True

temp = None

if not west:

return None

else:

while west:

left_child = west.left

right_child = west.right

if numeric == west.string_value:

# we've found the node to remove

if not left_child and not right_child:

if not root:

temp_left = temp.left

temp_right = temp.right

done = False

while not done:

if temp_right:

if temp_right.string_value == numeric:

temp.right = None

self._size -= 1

done = True

break

if temp_left:

print("running check left temp")

if temp_left.string_value == numeric:

temp.left = None

self._size -= 1

done = True

break

else:

print("This should not happen.")

break

else:

self.root_node.right = None

self._size -= 1

break

elif left_child and not right_child:

west.key = left_child.key

west.value = left_child.value

west.left = left_child.left

west.right = left_child.right

west.string_value = left_child.string_value

self._size -= 1

break

elif right_child and not left_child:

west.key = right_child.key

west.value = right_child.value

west.left = right_child.left

west.right = right_child.right

west.string_value = right_child.string_value

self._size -= 1

break

elif right_child and left_child:

new_left_child = west.left

#print(right_child)

while new_left_child:

if new_left_child.right == None:

new_left_child.right = right_child

west.key = left_child.key

west.value = left_child.value

west.left = left_child.left

west.right = left_child.right

west.string_value = left_child.string_value

#print(new_left_child)

self._size -= 1

break

else:

new_left_child = new_left_child.right

break

else:

print("error in while loop for :while west: delete(akey)")

elif numeric > west.string_value:

temp = west

west = west.right

root = False

else:

temp = west

west = west.left

root = False

else:

return None

def dump(self):

west = self.root_node.right

east = self.root_node.left

if east:

print(f"East branch: {east}")

self.recursDump(east)

print("********** END EAST************")

if west:

print(f"West branch: {west}")

self.recursDump(west)

print("********** END WEST************")

def recursDump(self, branch):

if branch == None:

print(" ")

else:

left_child = branch.left

right_child = branch.right

if left_child:

print(left_child)

if right_child:

print(right_child)

self.recursDump(right_child)

self.recursDump(left_child)

return branch

def count(self):

trunk = self.root_node

keyslist = []

result = self.recurs_helper(trunk, keyslist)

#print(result)

count = len(result)

return count

def make_key_list(self):

# same as count different return

trunk = self.root_node

keyslist = []

result = self.recurs_helper(trunk, keyslist)

#print(result)

return result

def make_tuple_list(self):

# same as count different return

trunk = self.root_node

keyslist = []

result = self.recurs_helper_tuple(trunk, keyslist)

#print(result)

return result

def make_node_list(self):

# same as count different return

trunk = self.root_node

keyslist = []

result = self.recurs_helper_node(trunk, keyslist)

#print(result)

return result

def recurs_helper(self, branch, alist):

if branch == None:

pass

else:

left_sprout = branch.left

right_sprout = branch.right

if left_sprout:

self.list_add(left_sprout, alist)

if right_sprout:

self.list_add(right_sprout, alist)

self.recurs_helper(left_sprout, alist)

self.recurs_helper(right_sprout, alist)

return alist

def recurs_helper_tuple(self, branch, alist):

if branch == None:

pass

else:

left_sprout = branch.left

right_sprout = branch.right

if left_sprout:

self.list_add_tuple(left_sprout, alist)

if right_sprout:

self.list_add_tuple(right_sprout, alist)

self.recurs_helper_tuple(left_sprout, alist)

self.recurs_helper_tuple(right_sprout, alist)

return alist

def recurs_helper_node(self, branch, alist):

if branch == None:

pass

else:

left_sprout = branch.left

right_sprout = branch.right

if left_sprout:

self.list_add_node(left_sprout, alist)

if right_sprout:

self.list_add_node(right_sprout, alist)

self.recurs_helper_node(left_sprout, alist)

self.recurs_helper_node(right_sprout, alist)

return alist

def list_add(self, something, alist):

if something:

alist.append(something.key)

def list_add_tuple(self, something, alist):

if something:

x, y = something.key, something.value

alist.append((x, y))

def list_add_node(self, something, alist):

if something:

alist.append(something)

def ConvertString(astring):

astring = astring.lower()

x = hash(astring)

return x

#stringy = BinaryStringTree('HelloUser')

#stringy.set('abc', 'cadabra')

#stringy.set('yellow', 'canary')

#stringy.set('patient0', 'zombie')

#stringy.set('Bacula Scott', 'Quantum Leap')

#stringy.dump()

#stringy.delete('abc')

#stringy.dump()

#stringy.delete('patient0')

#stringy.dump()

So I just ran tests on the 'get', 'set' and 'delete' of this. I was using the dump without problems, and the count in the pytest without issue. It's definitely not perfect or anything worth using I'm sure, but learning. That's the goal.

It's basically just the Binary tree with a few tweaks. The Node has a new 'string_value' to hold the hash number we create with the method 'ConvertString', we use that to set, delete and get the nodes.

the key and value don't do anything in those methods, they just get placed, and hold their places.

I also still use my self.median.... though I think if I'm using hash, it may be completely useless. The numbers are all arbitrary and range in a semi-random huge dramatic scale, so.... hence the reason I really really wanted to find a way to make an alphabetical tool to put them into the tree. Whatever the hash picks for my median.. my tree can end up totally defunct... and useless. It might be totally imbalanced and lopsided as a result of what random algo seed it generates for the root_node. If it say picks -28494949478937592 and most my other strings are 38938782392934283 the tree searches are going to be crap.

But, I can come back and try to keep fixing it. I'm not sure why no one else see's this as a problem. Maybe I'm just not finding the right search term.... I don't know why I can't seem to find what I'm looking for on Google to answer any of the questions I have.

Maybe... The point is, binary search tree's for strings in general are pointless? I don't know.... I can't find answers. On to suffix arrays.

Here's my binary string tree.

from TreeGraphical import PrintTree

class

**BinaryTreeStringNode(**object):def __init__(self, key, value, left, right):

self.key = key

self.value = value

self.left = left

self.right = right

self.string_value = None

def __repr__(self):

nleft = self.left and self.left.value or None

nright = self.right and self.right.value or None

return f"{nleft} <--- ( {self.key} : {self.value} ) ---> {nright}"

class

**BinaryStringTree**(object):def __init__(self, median):

self.median = median

self.root_node = BinaryTreeStringNode(self.median, 'binary_root_node', None, None)

self._size = 0

if type(self.median) == str:

self.median = ConvertString(self.median) + 0.5

else:

print("ERROR Tree accepts strings ONLY.")

raise TypeError

def set(self, akey, value):

if type(akey) == str:

numeric = ConvertString(akey)

if numeric < self.median:

east = self.root_node.left

if not east:

# [None<---- newnode _---> None][<--left-- root --right-->]

newnode = BinaryTreeStringNode(akey, value, None, None)

newnode.string_value = numeric

self.root_node.left = newnode

self._size += 1

else:

while east:

if numeric == east.string_value:

east.value = value

break

elif east.string_value < numeric:

if east.right == None:

newnode = BinaryTreeStringNode(akey, value, None, None)

newnode.string_value = numeric

east.right = newnode

self._size += 1

break

else:

east = east.right

else:

if east.left == None:

newnode = BinaryTreeStringNode(akey, value, None, None)

newnode.string_value = numeric

east.left = newnode

self._size += 1

break

else:

east = east.left

if numeric > self.median:

west = self.root_node.right

if not west:

# [<---- root --right-->][None<---- newnode _---> None]

newnode = BinaryTreeStringNode(akey, value, None, None)

newnode.string_value = numeric

self.root_node.right = newnode

self._size += 1

else:

while west:

########################

if numeric == west.string_value:

west.value = value

break

elif west.string_value < numeric:

if west.right == None:

newnode = BinaryTreeStringNode(akey, value, None, None)

newnode.string_value = numeric

west.right = newnode

self._size += 1

break

else:

west = west.right

else:

if west.left == None:

newnode = BinaryTreeStringNode(akey, value, None, None)

newnode.string_value = numeric

west.left = newnode

self._size += 1

break

else:

west = west.left

else:

print("ERROR: This tree accepts STRINGS only.")

return None

def get(self, akey):

if type(akey) == str:

numeric = ConvertString(akey)

node = self.root_node

if numeric < self.median:

node = node.left

while node:

if numeric == node.string_value:

return node.value

else:

if numeric < node.string_value:

node = node.left

else:

node = node.right

else:

node = node.right

while node:

if numeric == node.string_value:

return node.value

else:

if numeric < node.string_value:

node = node.left

else:

node = node.right

# this return may not be necessary.

return None

else:

return None

def delete(self, akey):

if type(akey) == str:

numeric = ConvertString(akey)

if numeric < self.median:

east = self.root_node.left

if not east:

# do nothing

return None

else:

root = True

temp = None

#print(f" NUMERIC = {akey}")

while east:

left_child = east.left

right_child = east.right

if east.string_value == numeric:

# find the first center child/leaf that is none (left or right, I chose left)

# make that the holder for the oposing child, and the one you didn't use becomes parent node

# center branches are @east.left go right ---> (x) <--@east.right go left

if not left_child and not right_child:

if not root:

temp_left = temp.left

temp_right = temp.right

done = False

while not done:

if temp_right:

if temp_right.string_value == numeric:

temp.right = None

self._size -= 1

done = True

elif temp_left:

if temp_left.string_value == numeric:

temp.left = None

print("temp.left = None")

print(temp)

self._size -= 1

done = True

else:

print("OPPS")

break

else:

self.root_node.left = None

self._size -= 1

break

else:

# it has a child link

left_child = east.left

right_child = east.right

if left_child and right_child:

new_left_child = east.left

# because parent node dictates the greatest value of

# the left's, right branch; x in branch will not be greater then the parent...

# we can be sure that however far down the branch we go, the right_child

# which is greater then the parent_node, can go after the preceeding nodes in the chain. It will be #greater then them. Searching for numbers larger then right.key will still lead to the right_child, but #it may be a longer path to it and it's child nodes

while new_left_child:

if new_left_child.right == None:

new_left_child.right = right_child

east.key = left_child.key

east.value = left_child.value

east.left = left_child.left

east.right = left_child.right

east.string_value = left_child.string_value

self._size -= 1

break

else:

new_left_child = new_left_child.right

break

elif left_child and not right_child:

east.key = left_child.key

east.value = left_child.value

east.left = left_child.left

east.right = left_child.right

east.string_value = left_child.string_value

self._size -= 1

break

elif right_child and not left_child:

east.key = right_child.key

east.value = right_child.value

east.left = right_child.left

east.right = right_child.right

east.string_value = right_child.string_value

self._size -= 1

break

else:

print("error in while loop for :while east: delete(akey)")

break

elif numeric > east.string_value:

temp = east

root = False

east = east.right

else:

temp = east

root = False

east = east.left

else:

west = self.root_node.right

root = True

temp = None

if not west:

return None

else:

while west:

left_child = west.left

right_child = west.right

if numeric == west.string_value:

# we've found the node to remove

if not left_child and not right_child:

if not root:

temp_left = temp.left

temp_right = temp.right

done = False

while not done:

if temp_right:

if temp_right.string_value == numeric:

temp.right = None

self._size -= 1

done = True

break

if temp_left:

print("running check left temp")

if temp_left.string_value == numeric:

temp.left = None

self._size -= 1

done = True

break

else:

print("This should not happen.")

break

else:

self.root_node.right = None

self._size -= 1

break

elif left_child and not right_child:

west.key = left_child.key

west.value = left_child.value

west.left = left_child.left

west.right = left_child.right

west.string_value = left_child.string_value

self._size -= 1

break

elif right_child and not left_child:

west.key = right_child.key

west.value = right_child.value

west.left = right_child.left

west.right = right_child.right

west.string_value = right_child.string_value

self._size -= 1

break

elif right_child and left_child:

new_left_child = west.left

#print(right_child)

while new_left_child:

if new_left_child.right == None:

new_left_child.right = right_child

west.key = left_child.key

west.value = left_child.value

west.left = left_child.left

west.right = left_child.right

west.string_value = left_child.string_value

#print(new_left_child)

self._size -= 1

break

else:

new_left_child = new_left_child.right

break

else:

print("error in while loop for :while west: delete(akey)")

elif numeric > west.string_value:

temp = west

west = west.right

root = False

else:

temp = west

west = west.left

root = False

else:

return None

def dump(self):

west = self.root_node.right

east = self.root_node.left

if east:

print(f"East branch: {east}")

self.recursDump(east)

print("********** END EAST************")

if west:

print(f"West branch: {west}")

self.recursDump(west)

print("********** END WEST************")

def recursDump(self, branch):

if branch == None:

print(" ")

else:

left_child = branch.left

right_child = branch.right

if left_child:

print(left_child)

if right_child:

print(right_child)

self.recursDump(right_child)

self.recursDump(left_child)

return branch

def count(self):

trunk = self.root_node

keyslist = []

result = self.recurs_helper(trunk, keyslist)

#print(result)

count = len(result)

return count

def make_key_list(self):

# same as count different return

trunk = self.root_node

keyslist = []

result = self.recurs_helper(trunk, keyslist)

#print(result)

return result

def make_tuple_list(self):

# same as count different return

trunk = self.root_node

keyslist = []

result = self.recurs_helper_tuple(trunk, keyslist)

#print(result)

return result

def make_node_list(self):

# same as count different return

trunk = self.root_node

keyslist = []

result = self.recurs_helper_node(trunk, keyslist)

#print(result)

return result

def recurs_helper(self, branch, alist):

if branch == None:

pass

else:

left_sprout = branch.left

right_sprout = branch.right

if left_sprout:

self.list_add(left_sprout, alist)

if right_sprout:

self.list_add(right_sprout, alist)

self.recurs_helper(left_sprout, alist)

self.recurs_helper(right_sprout, alist)

return alist

def recurs_helper_tuple(self, branch, alist):

if branch == None:

pass

else:

left_sprout = branch.left

right_sprout = branch.right

if left_sprout:

self.list_add_tuple(left_sprout, alist)

if right_sprout:

self.list_add_tuple(right_sprout, alist)

self.recurs_helper_tuple(left_sprout, alist)

self.recurs_helper_tuple(right_sprout, alist)

return alist

def recurs_helper_node(self, branch, alist):

if branch == None:

pass

else:

left_sprout = branch.left

right_sprout = branch.right

if left_sprout:

self.list_add_node(left_sprout, alist)

if right_sprout:

self.list_add_node(right_sprout, alist)

self.recurs_helper_node(left_sprout, alist)

self.recurs_helper_node(right_sprout, alist)

return alist

def list_add(self, something, alist):

if something:

alist.append(something.key)

def list_add_tuple(self, something, alist):

if something:

x, y = something.key, something.value

alist.append((x, y))

def list_add_node(self, something, alist):

if something:

alist.append(something)

def ConvertString(astring):

astring = astring.lower()

x = hash(astring)

return x

#stringy = BinaryStringTree('HelloUser')

#stringy.set('abc', 'cadabra')

#stringy.set('yellow', 'canary')

#stringy.set('patient0', 'zombie')

#stringy.set('Bacula Scott', 'Quantum Leap')

#stringy.dump()

#stringy.delete('abc')

#stringy.dump()

#stringy.delete('patient0')

#stringy.dump()

## Comments

## Post a Comment