diff options
author | Joe Robinson <joe@lc8n.com> | 2014-09-21 01:08:15 +0100 |
---|---|---|
committer | Joe Robinson <joe@lc8n.com> | 2014-09-21 01:08:15 +0100 |
commit | 7a1af5825afb1ea7f282608bac259507a1a398e3 (patch) | |
tree | bd92f10ce1ffe1233862bc17f0860160f4e702e9 /bladictionary.py | |
parent | 36102ee26a1d6c1b90fa14ea30bb346c66164213 (diff) |
Added FOLDOC dictionary parsing for technical words
Diffstat (limited to 'bladictionary.py')
-rwxr-xr-x | bladictionary.py | 62 |
1 files changed, 57 insertions, 5 deletions
diff --git a/bladictionary.py b/bladictionary.py index e7038b7..c3e135a 100755 --- a/bladictionary.py +++ b/bladictionary.py @@ -7,7 +7,7 @@ import optparse from lxml import etree import sqlite3 -VERSION = "2.1.8b" +VERSION = "2.1.10b" class Definition(object): #ID is relative to the word type, eg noun 1, noun 2, verb 1, verb 2, not to the entire list @@ -151,8 +151,8 @@ def parse_args(): parser.add_option( "-c", "--channel", action = "store", help = "The IRC channel of the request") options, args = parser.parse_args( args ) - types = ["n", "noun", "v", "verb", "adj", "adjective", "adv", "adverb"] - dicts = ["wn", "wordnet", "oed", "db"] + types = ["n", "noun", "v", "verb", "adj", "adjective", "adv", "adverb", "tech"] + dicts = ["wn", "wordnet", "oed", "db", "foldoc"] word = "" word_type = "" @@ -296,6 +296,55 @@ def get_sql(word): return items +def parse_foldoc(word, refer = False): + + file = open("dictionaries/foldoc.txt") + word_line = "" + word_len = len(word.split(" ")) + found = False + count_blank = 0 + items = [] + #If it's been referred from another definition, include the new word + if refer: + definition = word + ". " + else: + definition = "" + + for line in file: + line = line.strip() + word_parts = line.split(" ") + word_part = "" + if not found: + #Read the appropriate number of words depending on how many were specified + for part in word_parts[0:word_len]: + word_part += part + " " + word_part = word_part.strip() + + #Ignore case + if word_part.lower() == word.lower(): + found = True + + #Foldoc definitions are split over multiple lines, so keep reading once we've found it + else: + + if len(line) == 0: + count_blank += 1 + if count_blank == 1: + continue + else: + break + if line[0] == "{" and line[-1] == "}": + parse_foldoc(line.strip("{} "), True) + for part in word_parts: + definition += part.strip("{").replace("}", "") + " " + + if not found: + return + else: + item = Definition(word, 0, "foldoc", "tech", definition, [], [], []) + items.append(item) + return items + def main(): @@ -312,9 +361,12 @@ def main(): xml = get_xml(word, word_dict) if xml is None: - return + print "Error finding definitions for " + word + sys.exit( 1 ) items = parse_xml(xml) + elif word_dict == "foldoc" or word_type == "tech": + items = parse_foldoc(word) else: if word_dict is None or word_dict == "": word_dict = "db" @@ -336,7 +388,7 @@ def main(): else: max_length = 460 - types = ["n", "v", "adj", "adv"] + types = ["n", "v", "adj", "adv", "tech"] type_id = 0 all_types = word_type is "" |