Fix synonym/antonym parsing, clean up output

author: Joe Robinson <joe@lc8n.com> 2014-09-18 14:36:40 +0100
committer: Joe Robinson <joe@lc8n.com> 2014-09-18 14:36:40 +0100
commit: e6a71f72b79d6aa289ee36518224114eb90dd9e5 (patch)
tree: e4355e72f45b77242e3e06d7fc716e1f626e53b7 /bladictionary.py
parent: eba1508f45c209ee3c3aac66f219d994f98d02b0 (diff)
1 files changed, 42 insertions, 11 deletions
diff --git a/bladictionary.py b/bladictionary.py
index e6c32a9..a754ddb 100755
--- a/bladictionary.py
+++ b/bladictionary.py
@@ -16,6 +16,7 @@ class Definition(object):
 	word_type = ""
 	definition = ""
 	synonyms = []
+	antonyms = []
 
 	def __init__(self, word, id, dictionary, word_type, definition, synonyms, antonyms):
 		self.word = word
@@ -24,12 +25,13 @@ class Definition(object):
 		self.word_type = word_type
 		self.definition = definition
 		self.synonyms = synonyms
+		self.antonyms = antonyms
 
-def get_xml():
-	api_url = "http://services.aonaware.com/DictService/DictService.asmx/DefineInDict?dictId=wn&word=red"
+def get_xml(word, word_dict):
+	api_url = "http://services.aonaware.com/DictService/DictService.asmx/DefineInDict?dictId="+word_dict+"&word="+word
 
 	try:
-		xml = etree.parse("http://services.aonaware.com/DictService/DictService.asmx/DefineInDict?dictId=wn&word=red")
+		xml = etree.parse(api_url)
 	except IOError:
 		print "Error: Could not access the Dictionary service."
 		return
@@ -60,6 +62,7 @@ def parse_xml(xml):
 	id = 0
 	syn_line = False
 	ant_line = False
+	word = definition_lines[0]
 	for line in definition_lines[1:]:
 		extra_parts = line.split("[")
 		line_parts = extra_parts[0].split(":")
@@ -76,7 +79,7 @@ def parse_xml(xml):
 		if id_line:
 			if id > 0:
 				#Add a previous line to the array of definitions, if there is one
-				item = Definition(id, "bla", "wn", word_type, definition, [], [])
+				item = Definition(word, id, "wn", word_type, definition, synonyms, antonyms)
 				items.append(item)
 				synonyms = []
 				antonyms = []
@@ -99,7 +102,16 @@ def parse_xml(xml):
 
 			for word in line_parts[2:]:
 				definition += word + " "
-
+		elif syn_line:
+			for part in line_parts:
+				extra_words = part.strip().split(" ")
+				for syn in extra_words:
+					synonyms.append(syn.strip("{},] "))
+		elif ant_line:
+			for part in line_parts:
+				extra_words = part.strip().split(" ")
+				for ant in extra_words:
+					antonyms.append(syn.strip("{},] "))	
 		else:
 			definition += line_parts[0].strip() + " "
 			if len(line_parts) > 1:
@@ -112,13 +124,16 @@ def parse_xml(xml):
 				extra_words = part.split(" ")
 				if extra_words[0] == "syn:":
 					for syn in extra_words[1:]:
-						synonyms.append(syn.strip("{},]"))
+						synonyms.append(syn.strip("{},] "))
 						syn_line = True
 				elif extra_words[0] == "ant:":
 					for ant in extra_words[1:]:
-						antonyms.append(ant.strip("{},]"))
+						antonyms.append(ant.strip("{},] "))
 						ant_line = True
 
+	item = Definition(id, "bla", "wn", word_type, definition, synonyms, antonyms)
+	items.append(item)
+	
 	return items
 
 def parse_args():
@@ -157,7 +172,7 @@ def parse_oed(word):
 		return
 
 	print line
-	
+
 	prev_part = ""
 	items = []
 	id = 0
@@ -192,15 +207,31 @@ def main():
 	if word_dict == "oed":
 		items = parse_oed(word)
 	else:
-		xml = get_xml()
+		xml = get_xml(word, word_dict)
 
 		if xml is None:
 			return
 
 		items = parse_xml(xml)
 
+	num_printed = 0
+	num_more = 0
+	suppress_print = False
+
 	for item in items:
-		print item.definition
-		print "-"
+
+		if num_printed == 3:
+			suppress_print = True
+
+		if word_type is "" or word_type == item.word_type:
+			if suppress_print:
+				num_more+=1
+			else:
+				print item.word_type + " " + item.id + ": " + item.definition,
+				num_printed+=1
+	
+	if suppress_print:
+		print "(" + str(num_more) + " more)"
+
 if __name__ == "__main__":
 	main()
 \ No newline at end of file
author	Joe Robinson <joe@lc8n.com>	2014-09-18 14:36:40 +0100
committer	Joe Robinson <joe@lc8n.com>	2014-09-18 14:36:40 +0100
commit	e6a71f72b79d6aa289ee36518224114eb90dd9e5 (patch)
tree	e4355e72f45b77242e3e06d7fc716e1f626e53b7 /bladictionary.py
parent	eba1508f45c209ee3c3aac66f219d994f98d02b0 (diff)