summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoe Robinson <joe@lc8n.com>2014-10-04 17:58:43 +0100
committerJoe Robinson <joe@lc8n.com>2014-10-04 17:58:43 +0100
commit7fa9083eb83e96eb53f7b8ca39634bc34d8f38c0 (patch)
tree4c048446fe1f26b5565a445539998ab92dab7fbb
parent05b3990f3d81ea66cbca6e4e6e2bfb8294318931 (diff)
Fixes, cleanup, move definition class to own file
-rwxr-xr-xbladictionary.py151
-rwxr-xr-xconvert.py26
-rw-r--r--definition.py24
3 files changed, 37 insertions, 164 deletions
diff --git a/bladictionary.py b/bladictionary.py
index 58ddfeb..932d54e 100755
--- a/bladictionary.py
+++ b/bladictionary.py
@@ -7,29 +7,9 @@ import optparse
from lxml import etree
import sqlite3
import requests
+from definition import Definition
-VERSION = "2.4.0"
-class Definition(object):
-
- #ID is relative to the word type, eg noun 1, noun 2, verb 1, verb 2, not to the entire list
- id = 0
- word = ""
- dictionary = ""
- word_type = ""
- definition = ""
- uses = []
- synonyms = []
- antonyms = []
-
- def __init__(self, word, id, dictionary, word_type, definition, uses, synonyms, antonyms):
- self.word = word
- self.id = id
- self.dictionary = dictionary
- self.word_type = word_type
- self.definition = definition
- self.uses = uses
- self.synonyms = synonyms
- self.antonyms = antonyms
+VERSION = "2.4.1"
def get_xml(word, word_dict):
api_url = "http://services.aonaware.com/DictService/DictService.asmx/DefineInDict?dictId="+word_dict+"&word="+word
@@ -238,7 +218,7 @@ def parse_oed(word):
#If we've reached a word type, add the current data to the list
if part in types.keys():
#Don't add empty definitions
- if id > 0 and definition is not " " and definition is not "" and definition is not None:
+ if id > 0 and definition is not " " and definition:
item = Definition(word, id, "oed", word_type, definition, [], [], [])
items.append(item)
definition = ""
@@ -246,7 +226,7 @@ def parse_oed(word):
#Convert the word type to the simple version
word_type = types[part]
- elif part.isdigit() and prev_part is not None:# and (prev_part[-1] == "." or prev_part[-1] == ")"):
+ elif part.isdigit() and prev_part is not None:
#Ignore anything before the first definition
if part is not "1":
item = Definition(word, id, "oed", word_type, definition, [], [], [])
@@ -335,117 +315,8 @@ def get_sql(word, dictionary = "wn"):
return items
-def parse_foldoc(word, refer = None):
-
- file = open("dictionaries/foldoc.txt")
- word_line = ""
- word_len = len(word.split(" "))
- found = False
- count_blank = 0
- items = []
- multiple = False
- id = 0
- skip = False
- end = False
- referring = False
-
- definition = ""
-
- for line in file:
-
- #line = line.strip()
- word_parts = line.split(" ")
- word_part = ""
- if not found:
- #Read the appropriate number of words depending on how many were specified
- for part in word_parts[0:word_len]:
- word_part += part + " "
- word_part = word_part.rstrip()
- #Ignore case
- if word_part.lower() == word.lower():
- found = True
-
- #Foldoc definitions are split over multiple lines, so keep reading once we've found it
- else:
- line = line.strip()
- #Line with the specified word is followed by one blank line
- #Skip the first blank line, and then stop when any further blank lines are found
- if len(line) == 0:
- count_blank += 1
- if count_blank == 1:
- continue
- elif multiple:
- if end:
- break
- skip = True
- else:
- break
-
- subject_parts = line.split(">")
- if len(subject_parts) > 1:
- see_also = subject_parts[1].strip(" .")
- else:
- see_also = line
-
- #If the line is just one string enclosed in {}s, then it means "see also", so look up that word
- if len(line) > 0 and see_also[0] == "{" and see_also[-1] == "}":
- refer_items = parse_foldoc(see_also.strip("{} "), word)
- referring = True
- items += refer_items
- word_parts = ""
-
- #For handling words with multiple definitions
- if line[0:2] == "1.":
- multiple = True
- if referring:
- definition = ""
- if multiple:
- id_parts = line.split(".")
-
- #This section is very hacky to deal with various edge cases. Also it was 5am when I was wrote this, and by the time I was done I'd forgotten how it even worked
- #Maybe one day I'll fix it, but for now I don't want to look at it any more. Removing any one line may break some definitions in various ways.
- if id_parts[0].isdigit() and len(id_parts) > 1 and id_parts[0] != 0 and definition.strip() is not None and definition.strip(". ") is not word and definition.strip(". ") != refer:
- definition = definition[3:]
- if refer is not None:
- definition = word + ". " + definition
- if id == 0:
- id = 1
- if not referring:
- if word != definition.strip(". "):
- item = Definition(word, id, "foldoc", "tech", definition, [], [], [])
- items.append(item)
- id = id_parts[0]
- definition = ""
- skip = False
- end = False
-
- referring = False
-
- elif referring:
- end = False
- elif skip:
- end = True
-
- if not skip and not end:
- for part in word_parts:
- definition += part.strip().replace("{", "").replace("}", "") + " "
-
- if not found :
- return
- else:
- if id == 0:
- id = 1
- if definition is not None and len(definition) > 1 and definition.strip(". ") != word and definition.strip(". ") != refer and not referring :
- if definition[0].isdigit() and definition[1] == ".":
- definition = definition[3:]
- if refer is not None:
- definition = word + ". " + definition
- item = Definition(word, id, "foldoc", "tech", definition, [], [], [])
- items.append(item)
- return items
-
def parse_urban(word):
word = word.replace(" ", "+")
@@ -460,7 +331,7 @@ def parse_urban(word):
items = []
id = 1
for json_item in json:
- if json_item['definition'] != "" and json_item['definition'] is not None:
+ if json_item['definition']:
item = Definition(word, id, "urban", "urban", json_item['definition'], [json_item['example']], [], [])
items.append(item)
id += 1
@@ -486,7 +357,7 @@ def main():
items = parse_xml(xml)
elif word_dict == "foldoc" or word_type == "tech":
- items = get_sql(word, word_dict)
+ items = get_sql(word, "foldoc")
elif word_dict == "urban":
items = parse_urban(word)
else:
@@ -564,15 +435,17 @@ def main():
if suppress_print:
num_more+=1
else:
+ # Ignore anything after a line break as this breaks output
+ item_definition = item.definition.split("\n")[0]
if item.id > 0:
- definition = item.word_type + " " + str(item.id) + ": " + item.definition
+ definition = item.word_type + " " + str(item.id) + ": " + item_definition
else:
- definition = item.word_type + ": " + item.definition
- if definition[-1] is not "." and definition[-1] is not " " and len(item.uses) == 0 :
+ definition = item.word_type + ": " + item_definition
+ definition = definition.strip()
+ if definition[-1] != "." and definition[-1] != " " and len(item.uses) == 0 :
definition += ". "
elif definition[-1] == "." :
definition += " "
-
#Print usage examples if they exist
if len(item.uses) > 0:
definition = definition.rstrip(". ")
diff --git a/convert.py b/convert.py
index 90f5a96..ab5b35d 100755
--- a/convert.py
+++ b/convert.py
@@ -4,31 +4,7 @@
import MySQLdb as mysql
import sqlite3
import sys
-
-class Definition(object):
-
- #ID is relative to the word type, eg noun 1, noun 2, verb 1, verb 2, not to the entire list
- id = 0
- word = ""
- dictionary = ""
- word_type = ""
- definition = ""
- uses = []
- synonyms = []
- antonyms = []
- categories = []
-
- def __init__(self, word, id, dictionary, word_type, definition, uses = [], synonyms = [], antonyms = [], categories= [], see_also = False):
- self.word = word
- self.id = id
- self.dictionary = dictionary
- self.word_type = word_type
- self.definition = definition
- self.uses = uses
- self.synonyms = synonyms
- self.antonyms = antonyms
- self.categories = categories
- self.see_also = see_also
+from definition import Definition
def create():
try:
diff --git a/definition.py b/definition.py
new file mode 100644
index 0000000..877888d
--- /dev/null
+++ b/definition.py
@@ -0,0 +1,24 @@
+class Definition(object):
+
+ #ID is relative to the word type, eg noun 1, noun 2, verb 1, verb 2, not to the entire list
+ id = 0
+ word = ""
+ dictionary = ""
+ word_type = ""
+ definition = ""
+ uses = []
+ synonyms = []
+ antonyms = []
+ categories = []
+
+ def __init__(self, word, id, dictionary, word_type, definition, uses = [], synonyms = [], antonyms = [], categories= [], see_also = False):
+ self.word = word
+ self.id = id
+ self.dictionary = dictionary
+ self.word_type = word_type
+ self.definition = definition
+ self.uses = uses
+ self.synonyms = synonyms
+ self.antonyms = antonyms
+ self.categories = categories
+ self.see_also = see_also \ No newline at end of file