pandas python and probability Part 2
So I may have a really weird way going about figuring this stuff out.
I pound in code, break it tons of times and then keep googling until I find what I need to fix each break.
I have a very strong feeling the test first crowd would be furious with me....
But, here it goes.
I wanted to create a training set for my machine learning experiment.
This is the order in which I did that:
1) create the pandas database
pd.set_option('max_columns', 100)
train_data = { 'run number': [0], #'run number:' as key produces NaN results
'odd probability': [0.50],
'even probability': [0.50],
}
train_prob_data = pd.DataFrame(train_data, columns = ['run number', 'odd
probability', 'even probability'])
2) create a list of random numbers
3) make a recursive method to create a tuple list of odd probablities
and even probabilities( this errored out as returning a NoneType because
I was not 'returning' the function at the end of the else clause. I was just
running the method again instead)
def get_prob_data(alist, count, even_count, odd_count, max_count, probs_list):
"""
((With alist = list of randomly chosen numbers))
((count first set to 0, then incremented each recursion))
((max_count being the length of alist - 1, ))
((probs_list = initially empty, add probs for odd and even in tuple))
((return the probs_list))
"""
if count >= max_count:
return probs_list
else:
total = len(probs_list) + 2
item = alist[count]
if item % 2 == 0:
new_total = total + 1
even_count += 1
count += 1
add_even = round(even_count / new_total, 2)
add_odd = round(1.00 - add_even, 2)
probs_list.append((add_even, add_odd))
elif item %2 != 0:
new_total = total + 1
odd_count += 1
count += 1
add_odd = round(odd_count / new_total, 2)
add_even = round(1.00 - add_odd, 2)
probs_list.append((add_even, add_odd))
return get_prob_data(alist, count, even_count, odd_count, max_count, probs_list)
4) add these probabilites from the list returned from recursion to the pandas
database.
def add_probs(db, probs):
"""
db = database to be appended
probs = tuple list : [(even probability, odd probability)]
"""
max_probs = len(probs) - 1
for i in range(max_probs):
data_evens = probs[i][0]
data_odds = probs[i][1]
#print(i, "\n", data_evens, data_odds)
db = db.append({'run number': i, 'even probability': data_evens, 'odd probability': data_odds}, ignore_index=True)
#db2 = db.append({'odd probability': data_odds}, ignore_index = True)
### NOOOO!!!frames = [db1, db2]
### NO !!!! db = pd.concat(frames)
#print("===========>> for loop ======")
return db
I overloaded my command prompt at first. I had the odd probabilities, and even probabilities in separate lists, and was trying to add them to the database separately with concat. Not sure why in the world I thought I should put them in separate. It froze on the 23rd loop trying to concat the two databases I was appending. I learn a lot through breaking stuff.
There are some helpful links in the whole code at the bottom.
5) A method to wrap it all together and send back the new database:
def create_panda_set(db):
"""
Use create_data to return a list with 100 chosen 'random()' numbers
use recursive get_prob_data to return a probability set for odd, and even
numbers from create_data list.
Create empty panda database, put the probabilities and count into
the empty database.
"""
alist = create_data()
max_count = len(alist) - 1
probs_list = []
start_index = 0
odd_count = 1
even_count = 1
#print(alist, max_count, odd_list, even_list)
#x = get_prob_data(alist, start_index, max_count, odd_list, even_list)
#print(x)
probs_list = get_prob_data(alist, start_index, even_count, odd_count, max_count, probs_list)
# add even list to training data:
newdb = add_probs(db, probs_list)
return newdb
6) A lot of print statements, logging, trial and error, then ran for success:
def test_it():
print(train_prob_data)
new = create_panda_set(train_prob_data)
print("^*^" * 10)
#print(new)
print(new.head())
print(new.tail())
#create_panda_set()
test_it()
Picture of final product:
###### The whole code ######
# Machine learning experiment Part 2
import pandas as pd
from random import randint
import logging
import sys
############## experiment part 2 ##############
# resources:
# 1) https://stackoverflow.com/questions/16597265/appending-to-an-empty-data-frame-in-pandas
# 2) https://docs.python.org/3/library/logging.html
# 3) https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.append.html
# 4) https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html
# 5) https://stackoverflow.com/questions/15819050/pandas-dataframe-concat-vs-append
# pandas data set up:
pd.set_option('max_columns', 100)
train_data = { 'run number': [0], #'run number:' as key produces NaN results
'odd probability': [0.50],
'even probability': [0.50],
}
train_prob_data = pd.DataFrame(train_data, columns = ['run number', 'odd probability', 'even probability'])
# logger set up:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def create_data():
datalist = []
for coin_toss in range(100):
rand_num = randint(1, 20)
datalist.append(rand_num)
return datalist
def get_prob_data(alist, count, even_count, odd_count, max_count, probs_list): """
((With alist = list of randomly chosen numbers))
((count first set to 0, then incremented each recursion))
((max_count being the length of alist - 1, ))
((odd_prob = empty list on initial, then appended as calculated))
((even_pron = empty list on initial, then appended as calculated))
((return the count, odd_prob list, and even_prob list.))
"""
if count >= max_count:
# lots of prints, and logger here,
# the effect of not 'returning' the get_prob_data()
return probs_list
else:
total = len(probs_list) + 2
item = alist[count]
if item % 2 == 0:
new_total = total + 1
even_count += 1
count += 1
add_even = round(even_count / new_total, 2)
add_odd = round(1.00 - add_even, 2)
probs_list.append((add_even, add_odd))
elif item %2 != 0:
new_total = total + 1
odd_count += 1
count += 1
add_odd = round(odd_count / new_total, 2)
add_even = round(1.00 - add_odd, 2)
probs_list.append((add_even, add_odd))
return get_prob_data(alist, count, even_count, odd_count, max_count, probs_list)
############# bad zoot ##############
#event = 'recursion else clause: get_prob_data()\n alist len = %s\n oddlist len = %s\n, evenlist len= %s\n, current# = %s\n, max_count = %s'
#length_alist = len(alist)
#length_odd_prob = len(odd_prob)
#length_even_prob = len(even_prob)
#current_digit = item
#logger.info(event, length_alist, length_odd_prob, length_even_prob, current_digit, max_count)
#get_prob_data(alist, count, max_count, odd_prob, even_prob)
##########################################
def add_probs(db, probs):
"""
db = database to be appended
probs = tuple list : [(even probability, odd probability)]
"""
max_probs = len(probs) - 1
for i in range(max_probs):
data_evens = probs[i][0]
data_odds = probs[i][1]
#print(i, "\n", data_evens, data_odds)
db = db.append({'run number': i, 'even probability': data_evens, 'odd probability': data_odds}, ignore_index=True)
########## bad zoot ###########
#db1 = db.append({'even probability': data_evens}, ignore_index = True)
#db2 = db.append({'odd probability': data_odds}, ignore_index = True)
### NOOOO!!!frames = [db1, db2]
### NO !!!! db = pd.concat(frames)
#print("===========>> for loop ======")
########################
return db
def create_panda_set(db):
"""
Use create_data to return a list with 100 chosen 'random()' numbers
use recursive get_prob_data to return a probability set for odd, and even
numbers from create_data list.
Create empty panda database, put the probabilities and count into
the empty database.
"""
alist = create_data()
max_count = len(alist) - 1
probs_list = []
start_index = 0
odd_count = 1
even_count = 1
######## damn it Moon Moon! #######
#print(alist, max_count, odd_list, even_list)
#x = get_prob_data(alist, start_index, max_count, odd_list, even_list)
#print(x)
#################
probs_list = get_prob_data(alist, start_index, even_count, odd_count, max_count, probs_list)
newdb = add_probs(db, probs_list)
return newdb
def test_it():
print(train_prob_data)
new = create_panda_set(train_prob_data)
print("^*^" * 10)
#print(new)
print(new.head())
print(new.tail())
test_it()
I pound in code, break it tons of times and then keep googling until I find what I need to fix each break.
I have a very strong feeling the test first crowd would be furious with me....
But, here it goes.
I wanted to create a training set for my machine learning experiment.
This is the order in which I did that:
1) create the pandas database
pd.set_option('max_columns', 100)
train_data = { 'run number': [0], #'run number:' as key produces NaN results
'odd probability': [0.50],
'even probability': [0.50],
}
train_prob_data = pd.DataFrame(train_data, columns = ['run number', 'odd
probability', 'even probability'])
2) create a list of random numbers
def create_data():
datalist = []
for coin_toss in range(100):
rand_num = randint(1, 20)
datalist.append(rand_num)
return datalist
datalist = []
for coin_toss in range(100):
rand_num = randint(1, 20)
datalist.append(rand_num)
return datalist
3) make a recursive method to create a tuple list of odd probablities
and even probabilities( this errored out as returning a NoneType because
I was not 'returning' the function at the end of the else clause. I was just
running the method again instead)
def get_prob_data(alist, count, even_count, odd_count, max_count, probs_list):
"""
((With alist = list of randomly chosen numbers))
((count first set to 0, then incremented each recursion))
((max_count being the length of alist - 1, ))
((probs_list = initially empty, add probs for odd and even in tuple))
((return the probs_list))
"""
if count >= max_count:
return probs_list
else:
total = len(probs_list) + 2
item = alist[count]
if item % 2 == 0:
new_total = total + 1
even_count += 1
count += 1
add_even = round(even_count / new_total, 2)
add_odd = round(1.00 - add_even, 2)
probs_list.append((add_even, add_odd))
elif item %2 != 0:
new_total = total + 1
odd_count += 1
count += 1
add_odd = round(odd_count / new_total, 2)
add_even = round(1.00 - add_odd, 2)
probs_list.append((add_even, add_odd))
return get_prob_data(alist, count, even_count, odd_count, max_count, probs_list)
4) add these probabilites from the list returned from recursion to the pandas
database.
def add_probs(db, probs):
"""
db = database to be appended
probs = tuple list : [(even probability, odd probability)]
"""
max_probs = len(probs) - 1
for i in range(max_probs):
data_evens = probs[i][0]
data_odds = probs[i][1]
#print(i, "\n", data_evens, data_odds)
db = db.append({'run number': i, 'even probability': data_evens, 'odd probability': data_odds}, ignore_index=True)
#db2 = db.append({'odd probability': data_odds}, ignore_index = True)
### NOOOO!!!frames = [db1, db2]
### NO !!!! db = pd.concat(frames)
#print("===========>> for loop ======")
return db
I overloaded my command prompt at first. I had the odd probabilities, and even probabilities in separate lists, and was trying to add them to the database separately with concat. Not sure why in the world I thought I should put them in separate. It froze on the 23rd loop trying to concat the two databases I was appending. I learn a lot through breaking stuff.
There are some helpful links in the whole code at the bottom.
5) A method to wrap it all together and send back the new database:
def create_panda_set(db):
"""
Use create_data to return a list with 100 chosen 'random()' numbers
use recursive get_prob_data to return a probability set for odd, and even
numbers from create_data list.
Create empty panda database, put the probabilities and count into
the empty database.
"""
alist = create_data()
max_count = len(alist) - 1
probs_list = []
start_index = 0
odd_count = 1
even_count = 1
#print(alist, max_count, odd_list, even_list)
#x = get_prob_data(alist, start_index, max_count, odd_list, even_list)
#print(x)
probs_list = get_prob_data(alist, start_index, even_count, odd_count, max_count, probs_list)
# add even list to training data:
newdb = add_probs(db, probs_list)
return newdb
6) A lot of print statements, logging, trial and error, then ran for success:
def test_it():
print(train_prob_data)
new = create_panda_set(train_prob_data)
print("^*^" * 10)
#print(new)
print(new.head())
print(new.tail())
#create_panda_set()
test_it()
Picture of final product:
###### The whole code ######
# Machine learning experiment Part 2
import pandas as pd
from random import randint
import logging
import sys
############## experiment part 2 ##############
# resources:
# 1) https://stackoverflow.com/questions/16597265/appending-to-an-empty-data-frame-in-pandas
# 2) https://docs.python.org/3/library/logging.html
# 3) https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.append.html
# 4) https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html
# 5) https://stackoverflow.com/questions/15819050/pandas-dataframe-concat-vs-append
# pandas data set up:
pd.set_option('max_columns', 100)
train_data = { 'run number': [0], #'run number:' as key produces NaN results
'odd probability': [0.50],
'even probability': [0.50],
}
train_prob_data = pd.DataFrame(train_data, columns = ['run number', 'odd probability', 'even probability'])
# logger set up:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def create_data():
datalist = []
for coin_toss in range(100):
rand_num = randint(1, 20)
datalist.append(rand_num)
return datalist
def get_prob_data(alist, count, even_count, odd_count, max_count, probs_list): """
((With alist = list of randomly chosen numbers))
((count first set to 0, then incremented each recursion))
((max_count being the length of alist - 1, ))
((odd_prob = empty list on initial, then appended as calculated))
((even_pron = empty list on initial, then appended as calculated))
((return the count, odd_prob list, and even_prob list.))
"""
if count >= max_count:
# lots of prints, and logger here,
# the effect of not 'returning' the get_prob_data()
return probs_list
else:
total = len(probs_list) + 2
item = alist[count]
if item % 2 == 0:
new_total = total + 1
even_count += 1
count += 1
add_even = round(even_count / new_total, 2)
add_odd = round(1.00 - add_even, 2)
probs_list.append((add_even, add_odd))
elif item %2 != 0:
new_total = total + 1
odd_count += 1
count += 1
add_odd = round(odd_count / new_total, 2)
add_even = round(1.00 - add_odd, 2)
probs_list.append((add_even, add_odd))
return get_prob_data(alist, count, even_count, odd_count, max_count, probs_list)
############# bad zoot ##############
#event = 'recursion else clause: get_prob_data()\n alist len = %s\n oddlist len = %s\n, evenlist len= %s\n, current# = %s\n, max_count = %s'
#length_alist = len(alist)
#length_odd_prob = len(odd_prob)
#length_even_prob = len(even_prob)
#current_digit = item
#logger.info(event, length_alist, length_odd_prob, length_even_prob, current_digit, max_count)
#get_prob_data(alist, count, max_count, odd_prob, even_prob)
##########################################
def add_probs(db, probs):
"""
db = database to be appended
probs = tuple list : [(even probability, odd probability)]
"""
max_probs = len(probs) - 1
for i in range(max_probs):
data_evens = probs[i][0]
data_odds = probs[i][1]
#print(i, "\n", data_evens, data_odds)
db = db.append({'run number': i, 'even probability': data_evens, 'odd probability': data_odds}, ignore_index=True)
########## bad zoot ###########
#db1 = db.append({'even probability': data_evens}, ignore_index = True)
#db2 = db.append({'odd probability': data_odds}, ignore_index = True)
### NOOOO!!!frames = [db1, db2]
### NO !!!! db = pd.concat(frames)
#print("===========>> for loop ======")
########################
return db
def create_panda_set(db):
"""
Use create_data to return a list with 100 chosen 'random()' numbers
use recursive get_prob_data to return a probability set for odd, and even
numbers from create_data list.
Create empty panda database, put the probabilities and count into
the empty database.
"""
alist = create_data()
max_count = len(alist) - 1
probs_list = []
start_index = 0
odd_count = 1
even_count = 1
######## damn it Moon Moon! #######
#print(alist, max_count, odd_list, even_list)
#x = get_prob_data(alist, start_index, max_count, odd_list, even_list)
#print(x)
#################
probs_list = get_prob_data(alist, start_index, even_count, odd_count, max_count, probs_list)
newdb = add_probs(db, probs_list)
return newdb
def test_it():
print(train_prob_data)
new = create_panda_set(train_prob_data)
print("^*^" * 10)
#print(new)
print(new.head())
print(new.tail())
test_it()
Comments
Post a Comment