From e6a71f72b79d6aa289ee36518224114eb90dd9e5 Mon Sep 17 00:00:00 2001 From: Joe Robinson Date: Thu, 18 Sep 2014 14:36:40 +0100 Subject: Fix synonym/antonym parsing, clean up output --- bladictionary.py | 53 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 11 deletions(-) (limited to 'bladictionary.py') diff --git a/bladictionary.py b/bladictionary.py index e6c32a9..a754ddb 100755 --- a/bladictionary.py +++ b/bladictionary.py @@ -16,6 +16,7 @@ class Definition(object): word_type = "" definition = "" synonyms = [] + antonyms = [] def __init__(self, word, id, dictionary, word_type, definition, synonyms, antonyms): self.word = word @@ -24,12 +25,13 @@ class Definition(object): self.word_type = word_type self.definition = definition self.synonyms = synonyms + self.antonyms = antonyms -def get_xml(): - api_url = "http://services.aonaware.com/DictService/DictService.asmx/DefineInDict?dictId=wn&word=red" +def get_xml(word, word_dict): + api_url = "http://services.aonaware.com/DictService/DictService.asmx/DefineInDict?dictId="+word_dict+"&word="+word try: - xml = etree.parse("http://services.aonaware.com/DictService/DictService.asmx/DefineInDict?dictId=wn&word=red") + xml = etree.parse(api_url) except IOError: print "Error: Could not access the Dictionary service." return @@ -60,6 +62,7 @@ def parse_xml(xml): id = 0 syn_line = False ant_line = False + word = definition_lines[0] for line in definition_lines[1:]: extra_parts = line.split("[") line_parts = extra_parts[0].split(":") @@ -76,7 +79,7 @@ def parse_xml(xml): if id_line: if id > 0: #Add a previous line to the array of definitions, if there is one - item = Definition(id, "bla", "wn", word_type, definition, [], []) + item = Definition(word, id, "wn", word_type, definition, synonyms, antonyms) items.append(item) synonyms = [] antonyms = [] @@ -99,7 +102,16 @@ def parse_xml(xml): for word in line_parts[2:]: definition += word + " " - + elif syn_line: + for part in line_parts: + extra_words = part.strip().split(" ") + for syn in extra_words: + synonyms.append(syn.strip("{},] ")) + elif ant_line: + for part in line_parts: + extra_words = part.strip().split(" ") + for ant in extra_words: + antonyms.append(syn.strip("{},] ")) else: definition += line_parts[0].strip() + " " if len(line_parts) > 1: @@ -112,13 +124,16 @@ def parse_xml(xml): extra_words = part.split(" ") if extra_words[0] == "syn:": for syn in extra_words[1:]: - synonyms.append(syn.strip("{},]")) + synonyms.append(syn.strip("{},] ")) syn_line = True elif extra_words[0] == "ant:": for ant in extra_words[1:]: - antonyms.append(ant.strip("{},]")) + antonyms.append(ant.strip("{},] ")) ant_line = True + item = Definition(id, "bla", "wn", word_type, definition, synonyms, antonyms) + items.append(item) + return items def parse_args(): @@ -157,7 +172,7 @@ def parse_oed(word): return print line - + prev_part = "" items = [] id = 0 @@ -192,15 +207,31 @@ def main(): if word_dict == "oed": items = parse_oed(word) else: - xml = get_xml() + xml = get_xml(word, word_dict) if xml is None: return items = parse_xml(xml) + num_printed = 0 + num_more = 0 + suppress_print = False + for item in items: - print item.definition - print "-" + + if num_printed == 3: + suppress_print = True + + if word_type is "" or word_type == item.word_type: + if suppress_print: + num_more+=1 + else: + print item.word_type + " " + item.id + ": " + item.definition, + num_printed+=1 + + if suppress_print: + print "(" + str(num_more) + " more)" + if __name__ == "__main__": main() \ No newline at end of file -- cgit v1.2.3