from urllib import * import re import google google.LICENSE_KEY = "INSERT YOUR GOOGLE LICENSE KEY HERE!" import string, sys #!/usr/bin/python #The heart of this program is the second option, the more sophisticated Ask Merlin script, #which used the weightGuesser class to call pyGoogle. The weightGuesser class takes inputs #from the user in the form of a question, some options or choices, some criteria, and then a #weight for each criteria. Then, weightGuesser calls pyGoogle to measure the number of hits, #or applicable sites, that come up when it uses each option in combination with each criteria #and the original question. Actually, the original question is first modified by running it #through the NLQ class method called Keyword, which extracts only the important Keywords #from the questions, in order to not use up the Google API's limited 10 search words with #frivolous words like the and a etc. The analysis then uses a weighted average calculation, #and also takes a ratio between the hits for a question-option-criterium combo to a hits #count form just the option alone, so as to allow for lesser common options which #nonetheless have a high ratio of repeated hits when put in combination with question and #criteria. The best way to comprehend all this is to read the code itself. # # #OK, I had to re-write askMerlin from scratch, due to Google's #disabling of the functionality that the program relied on. It # seesm that merlin was violating Google's terms of service, #unbeknownst to me, that's for sure! But I do not want to violate #anyone's terms of service, so I re-wrote askMerlin using # Yahoo instead. I also came up with a completely new #web-scraping algorithm, using string functions instead # of Regular Expressions. Simple is better than complicated! # # AskMerlin is a script I did by putting together two scripts and #modfying them both # and adding input/output routines around them. # # First, I ultilized the multiChoiceGuesser script that Max M posted # on the newsgroup comp.lang.python a couple weeks ago. This uses urllib #to go out to # the web and judge the appropriateness of a given answer by how many #hits it gets on Google # when coupled with the origninal question in a Google search. # My contributions were to enable the program to ask # for both an original question, and then for options to choose from. I #also set up a small # routine in order to choose a most appropriate answer, in the case that #no options are given. # This is done by using the second program, to create options of its own #to choose from # NLQ to pick out Keywords from the page returned by a Google search of #the question, by itself. # Then, these keyworsa are used as options or possible answers to the #question. # Then, multiChoiceGuesser is applied to the question along with all of #the Keywords # generated by NLQ. The result can take a long time, but eventually it #gets there, always. (???) # Also, I added to multiChoiceGuesser the requirement to do two google #searches, one on # the original question and each option, and one on the option by #itlself. Then # we calculate a ratio between each option's Google hit score and its #question/option # Google hit score, thus avoiding merely choosing the option that has #overwhelmingly high hits # all by itself. # # Surely better algorithms can vastly improve thsi program!!! # # I am hoping some one or some folks come up with improved variatiosn #and algorithtms # # Various algoritms could be tried, and then the results from the #various algoritms could be # averaged in order to produce more accurate results. # # # Currently, Merlin is may have a low IQ, but he has potential for the #future. # Anyway, Merlin can already answer just about any question. # Someday, perhaps he will even answer correctly or at least with #wisdom. most or all # of the time. # ;-))))))))))))) # # # NLQ: # a short program called NLQ, # or natural language query, which can be found online at #http://gurno.com/adam/nlq/#download # NLQ is a Class to take an inputted query and output 1. Keywords and 2. #also to categorize # the type of question being asked. I am primarily interested in using #the Keywords # extracted from a query by NLQ. I shamelesly modified NLQ to add many #more # IGNORE_WORDS and otherwise spruce it up. # # NLQ.py is still rather dumb, but hey, he has potential ;-))))). from urllib import * import re import string, sys # stuff __version__ = "0.1" #definine the question types... UNKNOWN = 0 KNOWLEDGE = 1 COMPREHENSION = 2 APPLICATION = 3 ANALYSIS = 4 SYNTHESIS = 5 EVALUATION = 6 KNOWLEDGE_WORDS = ["name", "list", "recall", "define", "tell", "match", "who", "what", "when", "describe", "where"] COMPREHENSION_WORDS = ["retell"] APPLICATION_WORDS = ["why"] ANALYSIS_WORDS = ["how", "classify", "outline", "diagram"] SYNTHESIS_WORDS = [] EVALUATION_WORDS = [] PRONOUNS = ["he", "she", "it", "me", "you", "they", "them", "we", "who", "myself", "yourself", "ourself", "I", "me", "my"] VERBS = ["is", "was", "are", "were", "be", "shall", "am", "isn't", "can't", "won't", "shouldn't", "couldn't", "aren't", "do", "don't", ] OTHER_WORDS = ["if", "to", "too", "there", "will", "the", "a", "let", "I'll", "this", "these", "those", "let", "*.", "+*", ".*", "<*", ">*", "=*", "*=", "*<", "*>", "*.", "*-", "-*", "*:", ":*", ";*", "*;", "*,", ",*", "*.*", "*,*", "*;*", "*:*", "*+*", "*=*", "*-*", "*_*", "*<*", "*>*", "*?*", "*/*", "of", "and", "for", "very", "not", "in", "on", "up", "has", "from", "which", "and", "on", "of", "or", "not", "by", "can", "that", "your", "with", "their", "over", "back", "link", "about", "an", "at", "his", "enter", "into", "so", "was", "a", "as", "but"] IGNORE_WORDS = VERBS + PRONOUNS + OTHER_WORDS + KNOWLEDGE_WORDS + COMPREHENSION_WORDS + APPLICATION_WORDS + ANALYSIS_WORDS def determine_type (word): # for right now this only matches the first word. Soon it will # take the whole string and attempt to match using that. return_type = UNKNOWN if word in KNOWLEDGE_WORDS: return_type = KNOWLEDGE elif word in APPLICATION_WORDS: return_type = APPLICATION elif word in ANALYSIS_WORDS: return_type = ANALYSIS elif word in SYNTHESIS_WORDS: return_type = SYNTHESIS elif word in EVALUATION_WORDS: return_type = EVALUATION elif word in COMPREHENSION_WORDS: return_type = COMPREHENSION return return_type class NLQ: def __init__(self, a_string): self.tuple = string.split(string.lower(a_string)) self.type = determine_type (self.tuple[0]) self.keywords = [] for word in self.tuple[1:]: if "~" in word: continue if "@" in word: continue if "#" in word: continue if "$" in word: continue if "%" in word: continue if "^" in word: continue if "&" in word: continue if "<" in word: continue if ">" in word: continue if ":" in word: continue if ";" in word: continue if "{" in word: continue if "}" in word: continue if "[" in word: continue if "*" in word: continue if "(" in word: continue if ")" in word: continue if "_" in word: continue if "-" in word: continue if "+" in word: continue if "=" in word: continue if "?" in word: continue if "for" == word: continue if word in IGNORE_WORDS: continue if word in OTHER_WORDS: continue if word in VERBS: continue if word in PRONOUNS: continue if "and" == word: continue if word[0] not in string.letters: continue if word[-1] not in string.letters: word = word[:-1] else: self.keywords.append (word) def __repr__(self): return "type: %s\nkeywords: %s" % (self.type, self.keywords) class Decision: def get_list(self, heading, prompt): print heading print print "(enter a blank line to end the list)" ret = [] i = 1 while 1: line = raw_input(prompt % i) if not line: break ret.append(line) i=i+1 print return ret def get_number(self, prompt): res = None while res is None: try: res = float(raw_input(prompt)) except ValueError: pass return res def getOptions(self): self.options = self.get_list("Enter your options", "Option %d: ") def getCriteria(self): self.criteria = self.get_list("Enter your criteria ...", "Criterion %d: ") def getWeights(self): self.weights = {} for c in self.criteria: print print "Enter a relative importance factor, or weight, for each criteria (higher is more important)" print self.weights[c] = self.get_number("Criterion %s: " % c) def getScores(self): self.scores = {} for o in self.options: print print "Scores for option %s" % o print for c in self.criteria: self.scores[o, c] = self.get_number("Criterion %s: " % c) def getResults(self): self.results = {} for o in self.options: value = 0 for c in self.criteria: print o, c, self.weights[c], self.scores[o, c] value = value + self.weights[c] * self.scores[o, c] self.results[o] = value self.results = self.results.items() # A list of tuples (key, value) self.results.sort(lambda x, y: -cmp(x[1], y[1])) def printResults(self): print print "Results, in order from highest to lowest score" print print "%5s %s" % ("Score", "Option") for option, result in self.results: print "%5s %s" % (result, option) def turntheCrank(self): self.getOptions() self.getCriteria() self.getWeights() self.getScores() self.getResults() self.printResults() class YesNo(Decision): def turntheCrank(self): self.options = ["Yes","No"] self.getCriteria() self.getWeights() self.getScores() self.getResults() self.printResults() class Lunch(Decision): def turntheCrank(self): print print "This is a classified, top secret program developed by the CIA. Please keep it confidential" print print "What should we do for lunch? ;-)))" print self.criteria = ["taste", "convenience", "atmosphere", "value"] self.options = self.get_list("Enter your restaurants or other food choices:", "Option %d: ") self.getWeights() self.getScores() self.getResults() self.printResults() class Basketball(Decision): def turntheCrank(self): print "This is a program to help you decide which team will win a basketball game" print print "When prompted, enter a number ranking each team on the prompted team skill" print "on a scale from 1 to 100, with 1 being terrible and 100 being the best imaginable" print team_one = raw_input ("What is the name of team one: ") team_two = raw_input ("What is the name of team two: ") teams = (team_one, team_two) self.options = teams self.weights = {"speed":100, "size":66, "jumping_ability":50, "defense":60, "shooting":75, "ballhandling":50, "rebounding":50} self.criteria = self.weights.keys() self.getScores() self.getResults() self.printResults() class Football(Decision): def turntheCrank(self): print "This is a program to help you decide which team will win a football game" print print "When prompted, enter a number ranking each team on the prompted team skill" print "on a scale from 1 to 100, with 1 being terrible and 100 being the best imaginable" print team_one = raw_input ("What is the name of team one: ") team_two = raw_input ("What is the name of team two: ") teams = (team_one, team_two) self.options = teams self.weights = {"speed and quickness":7, "size and strength":5, "rushing offense":5, "passing offense":7, "defense":1, "coaching":12} self.criteria = self.weights.keys() self.getScores() self.getResults() self.printResults() class Election(Decision): def getOptions(self): self.options = self.get_list("Enter the name of each candidate", "Candidate %d: ") def turntheCrank(self): self.criteria = ["Education", "Social Issues", "Taxing and Spending", "Character", "Intelligence", "Leadership", "Foreign Affairs"] self.getOptions() self.getWeights() self.getScores() self.getResults() self.printResults() class multiChoiceGuesser: def __init__(self, question='', replys=()): self.question = question self.replys = replys def guessedAnswer(self): hits = [] result = [] results = {} for reply in self.replys: x = (self._getGoogleHits(self.question + ' ' + reply)) y = (self._getGoogleHits(reply)) x = float(x) y = float(y) if y == 0: y = y + 1 if x == 0: x = x + 1 dividend = x / y hits.append(dividend) results[reply] = dividend return hits.index(max(hits)) def _getGoogleHits(self, query): data = google.doGoogleSearch(query) hits = data.meta.estimatedTotalResultsCount return hits def guess(self,question, choices): self.mcg = multiChoiceGuesser(question, choices) print ' The question is: ', question print " Please wait for Merlin's answer: ", choices[self.mcg.guessedAnswer()] , " ", "is the answer to your question!" print '' class AskMerlina(Decision, multiChoiceGuesser, NLQ): def turntheCrank(self): self.u = raw_input("What is your question? ") self.z = NLQ(self.u) self.L = self.z.keywords self.question = string.join(self.L) self.choices = self.get_list("Enter your options:", "Option %d: ") self.guess(self.question, self.choices) class simple(Decision, multiChoiceGuesser, NLQ): def turntheCrank(self): self.u = raw_input("What is your question? ") self.z = NLQ(self.u) self.L = self.z.keywords self.question = string.join(self.L) self.urlHandle = urlopen('http://www.google.com/search?%s' % (self.question)) self.googlePage = self.urlHandle.read() self.med = NLQ(self.googlePage) self.L = self.med.keywords self.answer = string.join(self.L) print "The answer is %s" % self.answer class weightGuesser(Decision, multiChoiceGuesser): def __init__(self, question='', choices=(), criteria = (), num = {}, den = {}, scores = {}): self.choices = choices self.criteria = criteria self.num = num self.den = den self.scores = scores def guessedAnswer(self): hits = [] result = [] results = {} for choice in self.choices: for criterium in self.criteria: x = (self._getGoogleHits(self.question + ' ' + choice + ' ' + criterium)) y = (self._getGoogleHits(choice)) x = float(x) y = float(y) print x, y if y == 0: y = y + 1 if x == 0: x = x + 1 dividend = x / y hits.append(dividend) results[reply] = dividend return hits.index(max(hits)) def _getGoogleHits(self, query): data = google.doGoogleSearch(query) hits = data.meta.estimatedTotalResultsCount return hits def getScores(self): import google self.scores = {} for o in self.choices: for c in self.criteria: data = google.doGoogleSearch(o + ' ' + c + ' ' + self.question) self.num[o, c] = data.meta.estimatedTotalResultsCount float(self.num[o, c]) for o in self.choices: for c in self.criteria: data = google.doGoogleSearch(o) self.den[o, c] = data.meta.estimatedTotalResultsCount float(self.den[o, c]) xyz = self.den[o, c]/self.num[o, c] self.scores[o, c] = xyz def getResults(self): self.results = {} for o in self.choices: value = 0 for c in self.criteria: value = value + self.weights[c] * self.scores[o,c] self.results[o] = 1.0/value self.results = self.results.items() # A list of tuples (key, value) self.results.sort(lambda x, y: -cmp(x[1], y[1])) def turntheCrank(self): self.u = raw_input("What is your question? ") self.z = NLQ(self.u) self.L = self.z.keywords self.question = string.join(self.L) self.choices = self.get_list("Enter your options:", "Option %d: ") self.criteria = self.get_list("Enter your criteria:", "Option %d: ") self.getWeights() self.getScores() self.getResults() self.printResults() if __name__ == "__main__": tidy = 1 while 1: tidy = tidy + 1 if tidy > 2: againornot = raw_input ("Hit 'enter' to ask another question, or type 'quit' to quit this program: ") if againornot == "quit": break print print "Please enter the number for the type of decision you wish to analayze: " print "1. Simple Ask Merlin, any question at all, (requires pyGoogle and internet connection)" print "2. Sophisticated Ask Merlin, he will help you score each option for each criteria (requires pyGoogle and internet connec tion)" print "3. General Decision Analysis, you choose the options, criteria, weights and scores." print "4. What's for Lunch?" print "5. For whom shall I Vote in the upcoming Election?" print "6. Which Basketball Team will win the Game?" print "7. Which Football Team will win the Game?" print "8. Questions which have a Yes or No answer" print "9. Simple question" print "10. Type '10' to quit this program" print choice = float(raw_input("Please type in the number of the type of decision-program you wish to run from above and hit enter: ")) print choice if choice == 3: decide = Decision() decide.turntheCrank() elif choice == 4: decide = Lunch() decide.turntheCrank() elif choice == 5: decide = Election() decide.turntheCrank() elif choice == 6: decide = Basketball() decide.turntheCrank() elif choice == 7: decide = Football() decide.turntheCrank() elif choice == 8: decide = YesNo() decide.turntheCrank() elif choice == 2: decide = weightGuesser() decide.turntheCrank() elif choice == 1: decide = AskMerlina() decide.turntheCrank() elif choice == 9: decide = simple() decide.turntheCrank() elif choice == 10: break # break from infinite loop elif choice =="quit": break # exit from infinite loop else: print "Invalid operation"