summaryrefslogtreecommitdiff
path: root/bladictionary.py
diff options
context:
space:
mode:
authorJoe Robinson <joe@lc8n.com>2014-09-21 01:08:15 +0100
committerJoe Robinson <joe@lc8n.com>2014-09-21 01:08:15 +0100
commit7a1af5825afb1ea7f282608bac259507a1a398e3 (patch)
treebd92f10ce1ffe1233862bc17f0860160f4e702e9 /bladictionary.py
parent36102ee26a1d6c1b90fa14ea30bb346c66164213 (diff)
Added FOLDOC dictionary parsing for technical words
Diffstat (limited to 'bladictionary.py')
-rwxr-xr-xbladictionary.py62
1 files changed, 57 insertions, 5 deletions
diff --git a/bladictionary.py b/bladictionary.py
index e7038b7..c3e135a 100755
--- a/bladictionary.py
+++ b/bladictionary.py
@@ -7,7 +7,7 @@ import optparse
from lxml import etree
import sqlite3
-VERSION = "2.1.8b"
+VERSION = "2.1.10b"
class Definition(object):
#ID is relative to the word type, eg noun 1, noun 2, verb 1, verb 2, not to the entire list
@@ -151,8 +151,8 @@ def parse_args():
parser.add_option( "-c", "--channel", action = "store", help = "The IRC channel of the request")
options, args = parser.parse_args( args )
- types = ["n", "noun", "v", "verb", "adj", "adjective", "adv", "adverb"]
- dicts = ["wn", "wordnet", "oed", "db"]
+ types = ["n", "noun", "v", "verb", "adj", "adjective", "adv", "adverb", "tech"]
+ dicts = ["wn", "wordnet", "oed", "db", "foldoc"]
word = ""
word_type = ""
@@ -296,6 +296,55 @@ def get_sql(word):
return items
+def parse_foldoc(word, refer = False):
+
+ file = open("dictionaries/foldoc.txt")
+ word_line = ""
+ word_len = len(word.split(" "))
+ found = False
+ count_blank = 0
+ items = []
+ #If it's been referred from another definition, include the new word
+ if refer:
+ definition = word + ". "
+ else:
+ definition = ""
+
+ for line in file:
+ line = line.strip()
+ word_parts = line.split(" ")
+ word_part = ""
+ if not found:
+ #Read the appropriate number of words depending on how many were specified
+ for part in word_parts[0:word_len]:
+ word_part += part + " "
+ word_part = word_part.strip()
+
+ #Ignore case
+ if word_part.lower() == word.lower():
+ found = True
+
+ #Foldoc definitions are split over multiple lines, so keep reading once we've found it
+ else:
+
+ if len(line) == 0:
+ count_blank += 1
+ if count_blank == 1:
+ continue
+ else:
+ break
+ if line[0] == "{" and line[-1] == "}":
+ parse_foldoc(line.strip("{} "), True)
+ for part in word_parts:
+ definition += part.strip("{").replace("}", "") + " "
+
+ if not found:
+ return
+ else:
+ item = Definition(word, 0, "foldoc", "tech", definition, [], [], [])
+ items.append(item)
+ return items
+
def main():
@@ -312,9 +361,12 @@ def main():
xml = get_xml(word, word_dict)
if xml is None:
- return
+ print "Error finding definitions for " + word
+ sys.exit( 1 )
items = parse_xml(xml)
+ elif word_dict == "foldoc" or word_type == "tech":
+ items = parse_foldoc(word)
else:
if word_dict is None or word_dict == "":
word_dict = "db"
@@ -336,7 +388,7 @@ def main():
else:
max_length = 460
- types = ["n", "v", "adj", "adv"]
+ types = ["n", "v", "adj", "adv", "tech"]
type_id = 0
all_types = word_type is ""