#!/usr/bin/python2 # -*- coding: utf-8 -*- import sys import shlex import optparse from lxml import etree import sqlite3 VERSION = "2.1.4b" class Definition(object): #ID is relative to the word type, eg noun 1, noun 2, verb 1, verb 2, not to the entire list id = 0 word = "" dictionary = "" word_type = "" definition = "" uses = [] synonyms = [] antonyms = [] def __init__(self, word, id, dictionary, word_type, definition, uses, synonyms, antonyms): self.word = word self.id = id self.dictionary = dictionary self.word_type = word_type self.definition = definition self.uses = uses self.synonyms = synonyms self.antonyms = antonyms def get_xml(word, word_dict): api_url = "http://services.aonaware.com/DictService/DictService.asmx/DefineInDict?dictId="+word_dict+"&word="+word try: xml = etree.parse(api_url) except IOError: print "Error: Could not access the Dictionary service." return #Root element tag is WordDefinition, which is the same as the element which contains the definition #That's akward, so let's just for element in xml.iter("{http://services.aonaware.com/webservices/}Definitions"): root = element break; return root def parse_xml(xml): #Only worrying about one definition for now, add multiples later for element in xml.iter("{http://services.aonaware.com/webservices/}WordDefinition"): definition = element.text break; definition_lines = definition.split("\n") items = [] synonyms = [] antonyms = [] id = 0 syn_line = False ant_line = False word = definition_lines[0] for line in definition_lines[1:]: extra_parts = line.split("[") line_parts = extra_parts[0].split(":") #Work out if this is a line that includes identifying information (word type or id) #This is done by checking the indenting - if the first 8 characters are spaces, it's not #It's a bit hacky, maybe change this later id_line = False for char in line_parts[0][0:7]: if char != " ": id_line = True if id_line: if id > 0: #Add a previous line to the array of definitions, if there is one item = Definition(word, id, "wn", word_type, definition, [], synonyms, antonyms) items.append(item) synonyms = [] antonyms = [] syn_line = False ant_line = False id_parts = line_parts[0].strip().split(" ") if id_parts[0] == "[also": #This line comes at the end, don't worry about it for now break; if id_parts[0].isdigit() != True: word_type = id_parts[0] if len(id_parts) < 2: id = 1 else : id = id_parts[1] definition = line_parts[1].strip() + " " else: id = id_parts[0] definition = line_parts[1].strip() + " " for word in line_parts[2:]: definition += word + " " elif syn_line: for part in line_parts: extra_words = part.strip().split(" ") for syn in extra_words: synonyms.append(syn.strip("{},] ")) elif ant_line: for part in line_parts: extra_words = part.strip().split(" ") for ant in extra_words: antonyms.append(syn.strip("{},] ")) else: definition += line_parts[0].strip() + " " if len(line_parts) > 1: for part in line_parts[1:-1]: definition += part + ":" definition += line_parts[-1] if len(extra_parts) > 0: for part in extra_parts: extra_words = part.split(" ") if extra_words[0] == "syn:": for syn in extra_words[1:]: synonyms.append(syn.strip("{},] ")) syn_line = True elif extra_words[0] == "ant:": for ant in extra_words[1:]: antonyms.append(ant.strip("{},] ")) ant_line = True item = Definition(word, id, "wn", word_type, definition, [], synonyms, antonyms) items.append(item) return items def parse_args(): args = sys.argv[1:] if not args: args = shlex.split(sys.stdin.read()) parser = optparse.OptionParser( usage = "!define " ) parser.add_option( "-v", "--version", action = "store_true", help = "Print the version number" ) parser.add_option( "-c", "--channel", action = "store", help = "The IRC channel of the request") options, args = parser.parse_args( args ) types = ["n", "noun", "v", "verb", "adj", "adjective", "adv", "adverb"] dicts = ["wn", "wordnet", "oed", "db"] word = "" word_type = "" word_dict = "" if len(args) > 0: word = args[0] if len(args) > 1: for arg in args[1:]: if arg in types: word_type = arg elif arg in dicts: word_dict = arg return word, word_type, word_dict, options def parse_oed(word): types = ["n.", "—n.", "v.", "—v.", "adj.", "—adj.", "adv.", "—adv."] file = open("dictionaries/oed.txt") word_line = "" for line in file: word_part = line.split(" ")[0] if word_part.lower() == word.lower(): word_line = line break; if word_line == "": return prev_part = "" items = [] id = 0 definition = "" for part in line.split(" ")[1:]: if part in types: if id > 0: item = Definition(word, id, "oed", word_type, definition, [], [], []) items.append(item) definition = "" word_type = part elif part.isdigit() and prev_part is not None and (prev_part[-1] == "." or prev_part[-1] == ")"): item = Definition(word, id, "oed", word_type, definition, [], [], []) items.append(item) definition = "" id = part else: definition += part + " " prev_part = part if id == 0: id = 1 item = Definition(word, id, "oed", word_type, definition, [], []) items.append(item) return items def get_sql(word): items = [] types = ["n", "v", "adj", "adv"] con = sqlite3.connect('dictionaries/wordnet.db'); con.row_factory = sqlite3.Row with con: cur = con.cursor() cur.execute("SELECT * from definitions where word = ? ORDER BY type_id,sub_id;", [word]) rows = cur.fetchall() for row in rows: item = Definition(row['word'], row['sub_id'], "db", types[row['type_id']-1], row['definition'], [], row['synset_id'], []) cur.execute("SELECT * from uses where definition_id = ?", [row['id']]) rows = cur.fetchall() item.uses = rows items.append(item) return items def main(): word, word_type, word_dict, options = parse_args() if options.version: print VERSION sys.exit( 0 ) if word_dict == "oed": items = parse_oed(word) elif word_dict == "wn": xml = get_xml(word, word_dict) if xml is None: return items = parse_xml(xml) else: if word_dict is None or word_dict == "": word_dict = "db" items = get_sql(word) if items is None or len(items) == 0: print "No definition found for "+word return line_length = 0 num_more = 0 suppress_print = False if options.channel: max_length = 510 - 37 - len(options.channel) else: max_length = 460 for item in items: if word_type is "" or word_type == item.word_type: if suppress_print: num_more+=1 else: definition = item.word_type + " " + str(item.id) + ": " + item.definition if len(item.uses) > 0: definition += "; \""+item.uses[0]['quote']+"\"" if line_length + len(definition) > max_length: suppress_print = True num_more+= 1 else: print definition, line_length += len(definition) if suppress_print: print "(" + str(num_more) + " more)" if __name__ == "__main__": main()