Fixes, cleanup, move definition class to own file

author: Joe Robinson <joe@lc8n.com> 2014-10-04 17:58:43 +0100
committer: Joe Robinson <joe@lc8n.com> 2014-10-04 17:58:43 +0100
commit: 7fa9083eb83e96eb53f7b8ca39634bc34d8f38c0 (patch)
tree: 4c048446fe1f26b5565a445539998ab92dab7fbb /bladictionary.py
parent: 05b3990f3d81ea66cbca6e4e6e2bfb8294318931 (diff)
1 files changed, 12 insertions, 139 deletions
diff --git a/bladictionary.py b/bladictionary.py
index 58ddfeb..932d54e 100755
--- a/bladictionary.py
+++ b/bladictionary.py
@@ -7,29 +7,9 @@ import optparse
 from lxml import etree
 import sqlite3
 import requests
+from definition import Definition
 
-VERSION = "2.4.0"
-class Definition(object):
-
-	#ID is relative to the word type, eg noun 1, noun 2, verb 1, verb 2, not to the entire list
-	id = 0
-	word = ""
-	dictionary = ""
-	word_type = ""
-	definition = ""
-	uses = []
-	synonyms = []
-	antonyms = []
-
-	def __init__(self, word, id, dictionary, word_type, definition, uses, synonyms, antonyms):
-		self.word = word
-		self.id = id
-		self.dictionary = dictionary
-		self.word_type = word_type
-		self.definition = definition
-		self.uses = uses
-		self.synonyms = synonyms
-		self.antonyms = antonyms
+VERSION = "2.4.1"
 
 def get_xml(word, word_dict):
 	api_url = "http://services.aonaware.com/DictService/DictService.asmx/DefineInDict?dictId="+word_dict+"&word="+word
@@ -238,7 +218,7 @@ def parse_oed(word):
 		#If we've reached a word type, add the current data to the list
 		if part in types.keys():
 			#Don't add empty definitions
-			if id > 0 and definition is not " " and definition is not "" and definition is not None:
+			if id > 0 and definition is not " " and definition:
 				item = Definition(word, id, "oed", word_type, definition, [], [], [])
 				items.append(item)
 				definition = ""
@@ -246,7 +226,7 @@ def parse_oed(word):
 			#Convert the word type to the simple version
 			word_type = types[part]
 
-		elif part.isdigit() and prev_part is not None:# and (prev_part[-1] == "." or prev_part[-1] == ")"):
+		elif part.isdigit() and prev_part is not None:
 			#Ignore anything before the first definition
 			if part is not "1":
 				item = Definition(word, id, "oed", word_type, definition, [], [], [])
@@ -335,117 +315,8 @@ def get_sql(word, dictionary = "wn"):
 
 	return items
 
-def parse_foldoc(word, refer = None):
-
-	file = open("dictionaries/foldoc.txt")
-	word_line = ""
-	word_len = len(word.split(" "))
-	found = False
-	count_blank = 0
-	items = []
-	multiple = False
-	id = 0
-	skip = False
-	end = False
-	referring = False
-
-	definition = ""
-
-	for line in file:
-		
-		#line = line.strip()
-		word_parts = line.split(" ")
-		word_part = ""
-		if not found:
-			#Read the appropriate number of words depending on how many were specified
-			for part in word_parts[0:word_len]:
-				word_part += part + " "
-			word_part = word_part.rstrip()
-			#Ignore case
-			if word_part.lower() == word.lower():
-				found = True
-
-		#Foldoc definitions are split over multiple lines, so keep reading once we've found it
-		else:
-			line = line.strip()
-			#Line with the specified word is followed by one blank line
-			#Skip the first blank line, and then stop when any further blank lines are found
-			if len(line) == 0:
-				count_blank += 1
-				if count_blank == 1:
-					continue
-				elif multiple:
-					if end:
-						break
-					skip = True
-				else:
-					break
-			
-			subject_parts = line.split(">")
-			if len(subject_parts) > 1:
-				see_also = subject_parts[1].strip(" .")
-			else:
-				see_also = line
-
-			#If the line is just one string enclosed in {}s, then it means "see also", so look up that word
-			if len(line) > 0 and see_also[0] == "{" and see_also[-1] == "}":
-				refer_items = parse_foldoc(see_also.strip("{} "), word)
-				referring = True
-				items += refer_items
-				word_parts = ""
-				
-			#For handling words with multiple definitions
-			if line[0:2] == "1.":
-				multiple = True
 
 
-			if referring:
-				definition = ""
-			if multiple:
-				id_parts = line.split(".")
-
-				#This section is very hacky to deal with various edge cases. Also it was 5am when I was wrote this, and by the time I was done I'd forgotten how it even worked
-				#Maybe one day I'll fix it, but for now I don't want to look at it any more. Removing any one line may break some definitions in various ways.
-				if id_parts[0].isdigit() and len(id_parts) > 1 and id_parts[0] != 0 and definition.strip() is not None and definition.strip(". ") is not word and definition.strip(". ") != refer:
-					definition = definition[3:]
-					if refer is not None:
-						definition = word + ". " + definition
-					if id == 0:
-						id = 1
-					if not referring:
-						if word != definition.strip(". "):
-							item = Definition(word, id, "foldoc", "tech", definition, [], [], [])
-							items.append(item)
-					id = id_parts[0]
-					definition = ""
-					skip = False
-					end = False
-
-					referring = False
-
-				elif referring:
-					end = False
-				elif skip:
-					end = True
-
-			if not skip and not end:
-				for part in word_parts:
-					definition += part.strip().replace("{", "").replace("}", "") + " "
-
-	if not found :
-		return
-	else:
-		if id == 0:
-			id = 1
-		if definition is not None and len(definition) > 1 and definition.strip(". ") != word and definition.strip(". ") != refer and not referring :
-			if definition[0].isdigit() and definition[1] == ".":
-				definition = definition[3:]
-			if refer is not None:
-				definition = word + ". " + definition
-			item = Definition(word, id, "foldoc", "tech", definition, [], [], [])
-			items.append(item)
-		return items
-
 def parse_urban(word):
 
 	word = word.replace(" ", "+")
@@ -460,7 +331,7 @@ def parse_urban(word):
 	items = []
 	id = 1
 	for json_item in json:
-		if json_item['definition'] != "" and json_item['definition'] is not None:
+		if json_item['definition']:
 			item = Definition(word, id, "urban", "urban", json_item['definition'], [json_item['example']], [], [])
 			items.append(item)
 			id += 1
@@ -486,7 +357,7 @@ def main():
 
 		items = parse_xml(xml)
 	elif word_dict == "foldoc" or word_type == "tech":
-		items = get_sql(word, word_dict)
+		items = get_sql(word, "foldoc")
 	elif word_dict == "urban":
 		items = parse_urban(word)
 	else:
@@ -564,15 +435,17 @@ def main():
 			if suppress_print:
 				num_more+=1
 			else:
+				# Ignore anything after a line break as this breaks output
+				item_definition = item.definition.split("\n")[0]
 				if item.id > 0:
-					definition = item.word_type + " " + str(item.id) + ": " + item.definition
+					definition = item.word_type + " " + str(item.id) + ": " + item_definition
 				else:
-					definition = item.word_type + ": " + item.definition
-				if definition[-1] is not "." and definition[-1] is not " " and len(item.uses) == 0 :
+					definition = item.word_type + ": " + item_definition
+				definition = definition.strip()
+				if definition[-1] != "." and definition[-1] != " " and len(item.uses) == 0 :
 					definition += ". "
 				elif definition[-1] == "." :
 					definition += " "
-
 				#Print usage examples if they exist
 				if len(item.uses) > 0:
 					definition = definition.rstrip(". ")
author	Joe Robinson <joe@lc8n.com>	2014-10-04 17:58:43 +0100
committer	Joe Robinson <joe@lc8n.com>	2014-10-04 17:58:43 +0100
commit	7fa9083eb83e96eb53f7b8ca39634bc34d8f38c0 (patch)
tree	4c048446fe1f26b5565a445539998ab92dab7fbb /bladictionary.py
parent	05b3990f3d81ea66cbca6e4e6e2bfb8294318931 (diff)