methods to play with nltk so far
Update: Found some new things to do.... Putting them on next post, with updated code.
I'm not having much luck doing anything useful with the verbs of the user (input). I can't seem to find anything in the nltk stuff online that might suggest they have a way of distinguishing 'action verbs' from the other verbs. Action verbs would be very useful.
I'll make some pictures to show off a bit. The script1.txt, and script2.txt I'm using as the sys.argv[1] are simple files with questions. I'll do a pic of that too.
So, here is my tinkering so far. And it is really fun to play with. Maybe I'm odd, I think it's fun to pretend my computer has my quirky sense of humor.
Pictures:
noun response
method:
make_noun_response()
verb response
method:
make_verb_response()
script1.txt
noun response questions
Code:
#!usr/bin/python3
# -*- coding: utf-8 -*-
import sys
from nltk.corpus import wordnet
from random import randint
import nltk as nltk
# place script1, script2, sys.argv[] here
#script1 = sys.argv[1]
#script2 = sys.argv[1]
"""
Requires:
above imports and :
install - nltk
install - python3
In your python3 shell type these to download needed data sets:
>>>import nltk
>>>nltk.download('wordnet')
>>>nltk.download('punkt')
>>>nltk.download('averaged_perceptron_tagger')
make_noun_response() -- requires script1 as sys.argv
make_verb_response() -- requires script2 as sys.argv
"""
def get_script_line(arg):
with open(arg) as f:
for i, l in enumerate(f):
pass
count = i
if count != None:
with open(arg) as f:
lines = f.readlines()
x = randint(0, count)
return lines[x]
def run_synonyms():
search_for = input(">>>")
while search_for not in ['QUIT', 'EXIT']:
alist = []
for syn in wordnet.synsets(search_for):
for l in syn.lemmas():
alist.append(l.name())
if len(alist) > 0:
length = len(alist) - 1
x = randint(0, length)
#print(x)
synx = alist[x]
print(f"possible matches = {length + 1}")
print(f"Are you looking for the word similar too : {synx} ?")
else:
print("word not found in wordnet")
search_for = input(">>>")
def find_NN_VV():
search_for = input("...>>>")
while search_for not in ['QUIT', 'EXIT']:
nounlist = []
verblist = []
tokens = nltk.word_tokenize(search_for)
#print(tokens)
tags = nltk.pos_tag(tokens)
#print(tags)
for item in tags:
#print(item[1][0])
if item[1][0] == 'N':
nounlist.append(item[0])
if item[1][0] == 'V':
verblist.append(item[0])
print("nouns = ")
print(nounlist)
print("verbs = ")
print(verblist)
search_for = input("...>>>")
def make_noun_response():
""" don't forget to set script1 as sys.argv[1], and type it in when you run this file
$python nltk0_ex.py script1.txt """
make = input(" user : ")
while make not in ['EXIT', 'QUIT']:
nounlist = []
tokens = nltk.word_tokenize(make)
tags = nltk.pos_tag(tokens)
for item in tags:
x = item[1]
if x.startswith('NN') == True:
nounlist.append(item[0])
if len(nounlist) > 0:
# change it to a set to eliminate duplicates
nounlist = set(nounlist)
# change it back to list, to be able to easily index random selection
nounlist = list(nounlist)
x = randint(0, len(nounlist) - 1)
script = get_script_line(script1)
noun = nounlist[x]
print("Wiwa: ")
print(script % noun)
else:
print("Wiwa: \nI do not comprehend.")
make = input(" user : ")
def make_verb_response():
""" Wiwa's verb scripting is really shotty. Real conversational verbs aren't direct action verbs
in most cases, so the script is not very interactive.
don't forget to set script2 as sys.argv[1], and type it in when you run this file
$python nltk0_ex.py script2.txt """
make = input(" user : ")
while make not in ['EXIT', 'QUIT']:
verblist = []
tokens = nltk.word_tokenize(make)
tags = nltk.pos_tag(tokens)
for item in tags:
x = item[1]
if x.startswith('VB') == True:
verblist.append(item[0])
if len(verblist) > 0:
# change it to a set to eliminate duplicates
verblist = set(verblist)
# change it back to a list to easily index a random element
# python has a built in for random.sample and random.choice on sets,
# but have had issues with random being slow when dealing with large data
# which I hope wiwa to have at some point.
verblist = list(verblist)
x = randint(0, len(verblist) - 1)
script = get_script_line(script2)
verb = verblist[x]
print("Wiwa: ")
print(script % verb)
else:
print("Wiwa: \nI do not comprehend.")
make = input(" user : ")
#find_NN_VV()
#run_synonyms()
#make_noun_response()
#make_verb_response()
I'm not having much luck doing anything useful with the verbs of the user (input). I can't seem to find anything in the nltk stuff online that might suggest they have a way of distinguishing 'action verbs' from the other verbs. Action verbs would be very useful.
I'll make some pictures to show off a bit. The script1.txt, and script2.txt I'm using as the sys.argv[1] are simple files with questions. I'll do a pic of that too.
So, here is my tinkering so far. And it is really fun to play with. Maybe I'm odd, I think it's fun to pretend my computer has my quirky sense of humor.
Pictures:
noun response
method:
make_noun_response()
verb response
method:
make_verb_response()
script1.txt
noun response questions
Code:
#!usr/bin/python3
# -*- coding: utf-8 -*-
import sys
from nltk.corpus import wordnet
from random import randint
import nltk as nltk
# place script1, script2, sys.argv[] here
#script1 = sys.argv[1]
#script2 = sys.argv[1]
"""
Requires:
above imports and :
install - nltk
install - python3
In your python3 shell type these to download needed data sets:
>>>import nltk
>>>nltk.download('wordnet')
>>>nltk.download('punkt')
>>>nltk.download('averaged_perceptron_tagger')
make_noun_response() -- requires script1 as sys.argv
make_verb_response() -- requires script2 as sys.argv
"""
def get_script_line(arg):
with open(arg) as f:
for i, l in enumerate(f):
pass
count = i
if count != None:
with open(arg) as f:
lines = f.readlines()
x = randint(0, count)
return lines[x]
def run_synonyms():
search_for = input(">>>")
while search_for not in ['QUIT', 'EXIT']:
alist = []
for syn in wordnet.synsets(search_for):
for l in syn.lemmas():
alist.append(l.name())
if len(alist) > 0:
length = len(alist) - 1
x = randint(0, length)
#print(x)
synx = alist[x]
print(f"possible matches = {length + 1}")
print(f"Are you looking for the word similar too : {synx} ?")
else:
print("word not found in wordnet")
search_for = input(">>>")
def find_NN_VV():
search_for = input("...>>>")
while search_for not in ['QUIT', 'EXIT']:
nounlist = []
verblist = []
tokens = nltk.word_tokenize(search_for)
#print(tokens)
tags = nltk.pos_tag(tokens)
#print(tags)
for item in tags:
#print(item[1][0])
if item[1][0] == 'N':
nounlist.append(item[0])
if item[1][0] == 'V':
verblist.append(item[0])
print("nouns = ")
print(nounlist)
print("verbs = ")
print(verblist)
search_for = input("...>>>")
def make_noun_response():
""" don't forget to set script1 as sys.argv[1], and type it in when you run this file
$python nltk0_ex.py script1.txt """
make = input(" user : ")
while make not in ['EXIT', 'QUIT']:
nounlist = []
tokens = nltk.word_tokenize(make)
tags = nltk.pos_tag(tokens)
for item in tags:
x = item[1]
if x.startswith('NN') == True:
nounlist.append(item[0])
if len(nounlist) > 0:
# change it to a set to eliminate duplicates
nounlist = set(nounlist)
# change it back to list, to be able to easily index random selection
nounlist = list(nounlist)
x = randint(0, len(nounlist) - 1)
script = get_script_line(script1)
noun = nounlist[x]
print("Wiwa: ")
print(script % noun)
else:
print("Wiwa: \nI do not comprehend.")
make = input(" user : ")
def make_verb_response():
""" Wiwa's verb scripting is really shotty. Real conversational verbs aren't direct action verbs
in most cases, so the script is not very interactive.
don't forget to set script2 as sys.argv[1], and type it in when you run this file
$python nltk0_ex.py script2.txt """
make = input(" user : ")
while make not in ['EXIT', 'QUIT']:
verblist = []
tokens = nltk.word_tokenize(make)
tags = nltk.pos_tag(tokens)
for item in tags:
x = item[1]
if x.startswith('VB') == True:
verblist.append(item[0])
if len(verblist) > 0:
# change it to a set to eliminate duplicates
verblist = set(verblist)
# change it back to a list to easily index a random element
# python has a built in for random.sample and random.choice on sets,
# but have had issues with random being slow when dealing with large data
# which I hope wiwa to have at some point.
verblist = list(verblist)
x = randint(0, len(verblist) - 1)
script = get_script_line(script2)
verb = verblist[x]
print("Wiwa: ")
print(script % verb)
else:
print("Wiwa: \nI do not comprehend.")
make = input(" user : ")
#find_NN_VV()
#run_synonyms()
#make_noun_response()
#make_verb_response()
Comments
Post a Comment