#!/usr/local/bin/python ####################################################### # runs on the server, reads form input, prints html; # url=http://server-name/root-dir/Basics/test3.cgi ####################################################### import cgi import string import sys sys.stderr = sys.stdout form = cgi.FieldStorage() # parse form data print "Content-type: text/html\n\n" # plus blank line html = """ askmerlina.cgi

Merlin has carefully considered and weighed the evidence, and he chooses


%s


""" from urllib import * import re import google google.LICENSE_KEY = "Wd23pfdQFHIHmc7G5eml/2dZEybvk5S3" import string, sys from urllib import * import re import google google.LICENSE_KEY = "Wd23pfdQFHIHmc7G5eml/2dZEybvk5S3" import string, sys #!/usr/bin/python #OK, I had to re-write askMerlin form scratch, due to Google's #disabling of the functionality that the program relied on. It # seesm that merlin was violating Google's terms of service, #unbeknownst to me, that's for sure! But I do not want to violate #anyone's terms of service, so I re-wrote askMerlin using # Yahoo instead. I also came up with a completely new #web-scraping algorithm, using string functions instead # of Regular Expressions. Simple is better than complicated! # # AskMerlin is a script I did by putting together two scripts and #modfying them both # and adding input/output routines around them. # # First, I ultilized the multiChoiceGuesser script that Max M posted # on the newsgroup comp.lang.python a couple weeks ago. This uses urllib #to go out to # the web and judge the appropriateness of a given answer by how many #hits it gets on Google # when coupled with the origninal question in a Google search. # My contributions were to enable the program to ask # for both an original question, and then for options to choose from. I #also set up a small # routine in order to choose a most appropriate answer, in the case that #no options are given. # This is done by using the second program, to create options of its own #to choose from # NLQ to pick out Keywords from the page returned by a Google search of #the question, by itself. # Then, these keyworsa are used as options or possible answers to the #question. # Then, multiChoiceGuesser is applied to the question along with all of #the Keywords # generated by NLQ. The result can take a long time, but eventually it #gets there, always. (???) # Also, I added to multiChoiceGuesser the requirement to do two google #searches, one on # the original question and each option, and one on the option by #itlself. Then # we calculate a ratio between each option's Google hit score and its #question/option # Google hit score, thus avoiding merely choosing the option that has #overwhelmingly high hits # all by itself. # # Surely better algorithms can vastly improve thsi program!!! # # I am hoping some one or some folks come up with improved variatiosn #and algorithtms # # Various algoritms could be tried, and then the results from the #various algoritms could be # averaged in order to produce more accurate results. # # # Currently, Merlin is may have a low IQ, but he has potential for the #future. # Anyway, Merlin can already answer just about any question. # Someday, perhaps he will even answer correctly or at least with #wisdom. most or all # of the time. # ;-))))))))))))) # # # NLQ: # a short program called NLQ, # or natural language query, which can be found online at #http://gurno.com/adam/nlq/#download # NLQ is a Class to take an inputted query and output 1. Keywords and 2. #also to categorize # the type of question being asked. I am primarily interested in using #the Keywords # extracted from a query by NLQ. I shamelesly modified NLQ to add many #more # IGNORE_WORDS and otherwise spruce it up. # # NLQ.py is still rather dumb, but hey, he has potential ;-))))). from urllib import * import re import string, sys # stuff __version__ = "0.1" #definine the question types... UNKNOWN = 0 KNOWLEDGE = 1 COMPREHENSION = 2 APPLICATION = 3 ANALYSIS = 4 SYNTHESIS = 5 EVALUATION = 6 KNOWLEDGE_WORDS = ["name", "list", "recall", "define", "tell", "match", "who", "what", "when", "describe", "where"] COMPREHENSION_WORDS = ["retell"] APPLICATION_WORDS = ["why"] ANALYSIS_WORDS = ["how", "classify", "outline", "diagram"] SYNTHESIS_WORDS = [] EVALUATION_WORDS = [] PRONOUNS = ["he", "she", "it", "me", "you", "they", "them", "we", "who", "myself", "yourself", "ourself", "I", "me", "my"] VERBS = ["is", "was", "are", "were", "be", "shall", "am", "isn't", "can't", "won't", "shouldn't", "couldn't", "aren't", "do", "don't", ] OTHER_WORDS = ["if", "to", "too", "there", "will", "the", "a", "let", "I'll", "this", "these", "those", "let", "*.", "+*", ".*", "<*", ">*", "=*", "*=", "*<", "*>", "*.", "*-", "-*", "*:", ":*", ";*", "*;", "*,", ",*", "*.*", "*,*", "*;*", "*:*", "*+*", "*=*", "*-*", "*_*", "*<*", "*>*", "*?*", "*/*", "of", "and", "for", "very", "not", "in", "on", "up", "has", "from", "which", "and", "on", "of", "or", "not", "by", "can", "that", "your", "with", "their", "over", "back", "link", "about", "an", "at", "his", "enter", "into", "so", "was", "a", "as", "but"] IGNORE_WORDS = VERBS + PRONOUNS + OTHER_WORDS + KNOWLEDGE_WORDS + COMPREHENSION_WORDS + APPLICATION_WORDS + ANALYSIS_WORDS def determine_type (word): # for right now this only matches the first word. Soon it will # take the whole string and attempt to match using that. return_type = UNKNOWN if word in KNOWLEDGE_WORDS: return_type = KNOWLEDGE elif word in APPLICATION_WORDS: return_type = APPLICATION elif word in ANALYSIS_WORDS: return_type = ANALYSIS elif word in SYNTHESIS_WORDS: return_type = SYNTHESIS elif word in EVALUATION_WORDS: return_type = EVALUATION elif word in COMPREHENSION_WORDS: return_type = COMPREHENSION return return_type class NLQ: def __init__(self, a_string): self.tuple = string.split(string.lower(a_string)) self.type = determine_type (self.tuple[0]) self.keywords = [] for word in self.tuple[1:]: if "~" in word: continue if "@" in word: continue if "#" in word: continue if "$" in word: continue if "%" in word: continue if "^" in word: continue if "&" in word: continue if "<" in word: continue if ">" in word: continue if ":" in word: continue if ";" in word: continue if "{" in word: continue if "}" in word: continue if "[" in word: continue if "*" in word: continue if "(" in word: continue if ")" in word: continue if "_" in word: continue if "-" in word: continue if "+" in word: continue if "=" in word: continue if "?" in word: continue if "for" == word: continue if word in IGNORE_WORDS: continue if word in OTHER_WORDS: continue if word in VERBS: continue if word in PRONOUNS: continue if "and" == word: continue if word[0] not in string.letters: continue if word[-1] not in string.letters: word = word[:-1] else: self.keywords.append (word) def __repr__(self): return "type: %s\nkeywords: %s" % (self.type, self.keywords) class Decision: def get_list(self, heading, prompt): print heading print print "(enter a blank line to end the list)" ret = [] i = 1 while 1: line = raw_input(prompt % i) if not line: break ret.append(line) i=i+1 print return ret def get_number(self, prompt): res = None while res is None: try: res = float(raw_input(prompt)) except ValueError: pass return res def getOptions(self): self.options = self.get_list("Enter your options", "Option %d: ") def getCriteria(self): self.criteria = self.get_list("Enter your criteria ...", "Criterion %d: ") def getWeights(self): self.weights = {} for c in self.criteria: print print "Enter a relative importance factor, or weight, for each criteria (higher is more important)" print self.weights[c] = self.get_number("Criterion %s: " % c) def getScores(self): self.scores = {} for o in self.options: print print "Scores for option %s" % o print for c in self.criteria: self.scores[o, c] = self.get_number("Criterion %s: " % c) def getResults(self): self.results = {} for o in self.options: value = 0 for c in self.criteria: print o, c, self.weights[c], self.scores[o, c] value = value + self.weights[c] * self.scores[o, c] self.results[o] = value self.results = self.results.items() # A list of tuples (key, value) self.results.sort(lambda x, y: -cmp(x[1], y[1])) def printResults(self): print print "Results, in order from highest to lowest score" print print "%5s %s" % ("Score", "Option") for option, result in self.results: print "%5s %s" % (result, option) def turntheCrank(self): self.getOptions() self.getCriteria() self.getWeights() self.getScores() self.getResults() self.printResults() class YesNo(Decision): def turntheCrank(self): self.options = ["Yes","No"] self.getCriteria() self.getWeights() self.getScores() self.getResults() self.printResults() class Lunch(Decision): def turntheCrank(self): print print "This is a classified, top secret program developed by the CIA. Please keep it confidential" print print "What should we do for lunch? ;-)))" print self.criteria = ["taste", "convenience", "atmosphere", "value"] self.options = self.get_list("Enter your restaurants or other food choices:", "Option %d: ") self.getWeights() self.getScores() self.getResults() self.printResults() class Basketball(Decision): def turntheCrank(self): print "This is a program to help you decide which team will win a basketball game" print print "When prompted, enter a number ranking each team on the prompted team skill" print "on a scale from 1 to 100, with 1 being terrible and 100 being the best imaginable" print team_one = raw_input ("What is the name of team one: ") team_two = raw_input ("What is the name of team two: ") teams = (team_one, team_two) self.options = teams self.weights = {"speed":100, "size":66, "jumping_ability":50, "defense":60, "shooting":75, "ballhandling":50, "rebounding":50} self.criteria = self.weights.keys() self.getScores() self.getResults() self.printResults() class Football(Decision): def turntheCrank(self): print "This is a program to help you decide which team will win a football game" print print "When prompted, enter a number ranking each team on the prompted team skill" print "on a scale from 1 to 100, with 1 being terrible and 100 being the best imaginable" print team_one = raw_input ("What is the name of team one: ") team_two = raw_input ("What is the name of team two: ") teams = (team_one, team_two) self.options = teams self.weights = {"speed and quickness":7, "size and strength":5, "rushing offense":5, "passing offense":7, "defense":1, "coaching":12} self.criteria = self.weights.keys() self.getScores() self.getResults() self.printResults() class Election(Decision): def getOptions(self): self.options = self.get_list("Enter the name of each candidate", "Candidate %d: ") def turntheCrank(self): self.criteria = ["Education", "Social Issues", "Taxing and Spending", "Character", "Intelligence", "Leadership", "Foreign Affairs"] self.getOptions() self.getWeights() self.getScores() self.getResults() self.printResults() class multiChoiceGuesser: def __init__(self, question='', choices=[], criteria = [], weights = []): self.question = question self.choices = choices self.criteria = criteria self.weights = weights def guessedAnswer(self): hits = [] result = [] results = {} for choice in self.choices: place = 0 hits.append(0.0) marker = 0.0 for criterium in self.criteria: x = (self._getGoogleHits(self.question + ' ' + choice + ' ' + criterium)) y = (self._getGoogleHits(choice)) x = float(x) y = float(y) if y == 0: y = y + 1 if x == 0: x = x + 1 dividend = x / y heftstr = self.weights[place] heft = float(heftstr) place = place + 1 marker = marker + (dividend * heft) results[choice] = marker return hits.index(max(hits)) def _getGoogleHits(self, query): google.LICENSE_KEY = "Wd23pfdQFHIHmc7G5eml/2dZEybvk5S3" data = google.doGoogleSearch(query) hits = data.meta.estimatedTotalResultsCount return hits def guess(self,question, choices, criteria, weights): self.mcg = multiChoiceGuesser(question, choices, criteria, weights) xyz = choices[self.mcg.guessedAnswer()] return xyz class AskMerlina(Decision, multiChoiceGuesser): def turntheCrank(self): self.question = form['question'].value self.choices = form['choices'].value.split(",") self.guess(self.question, self.choices) class weightGuesser(Decision, multiChoiceGuesser): google.LICENSE_KEY = "Wd23pfdQFHIHmc7G5eml/2dZEybvk5S3" def __init__(self, question='', choices=(), criteria = (), num = {}, den = {}, scores = {}, weights = []): self.question = question self.choices = choices self.criteria = criteria self.num = num self.den = den self.scores = scores self.weights = weights def guessedAnswer(self): hits = [] result = [] results = {} for choice in self.choices: for criterium in self.criteria: x = (self._getGoogleHits(self.question + ' ' + choice + ' ' + criterium)) y = (self._getGoogleHits(choice)) x = float(x) y = float(y) print x, y if y == 0: y = y + 1 if x == 0: x = x + 1 dividend = x / y hits.append(dividend) results[reply] = dividend return hits.index(max(hits)) def _getGoogleHits(self, query): google.LICENSE_KEY = "Wd23pfdQFHIHmc7G5eml/2dZEybvk5S3" data = google.doGoogleSearch(query) hits = data.meta.estimatedTotalResultsCount return hits def getScores(self): import google google.LICENSE_KEY = "Wd23pfdQFHIHmc7G5eml/2dZEybvk5S3" xyz = {} self.scores = {} for o in self.choices: for c in self.criteria: data = google.doGoogleSearch(o + ' ' + c + ' ' + self.question) self.num[o, c] = data.meta.estimatedTotalResultsCount float(self.num[o, c]) for o in self.choices: for c in self.criteria: data = google.doGoogleSearch(o) self.den[o, c] = data.meta.estimatedTotalResultsCount float(self.den[o, c]) xyz = self.den[o, c]/self.num[o, c] self.scores[o, c] = xyz def getResults(self): self.results = {} for o in self.choices: value = 0 for c in self.criteria: value = value + self.weights[c] * self.scores[o,c] self.results[o] = 1.0/value self.results = self.results.items() # A list of tuples (key, value) self.results.sort(lambda x, y: -cmp(x[1], y[1])) def turntheCrank(self): self.question = form['question'].value self.z = NLQ(self.question) self.L = self.z.keywords self.question = string.join(self.question) self.choices = form['choices'].value.split(",") self.criteria = form['criteria'].value.split(",") self.weights = form['weights'].value.split(",") self.guess(self.question, self.choices, self.criteria, self.weights) decide = weightGuesser() decide.turntheCrank() print html % decide.guess(decide.question, decide.choices, decide.criteria, decide.weights)