summaryrefslogtreecommitdiff
path: root/bladictionary.py
diff options
context:
space:
mode:
authorJoe Robinson <joe@lc8n.com>2014-09-21 07:01:48 +0100
committerJoe Robinson <joe@lc8n.com>2014-09-21 07:01:48 +0100
commit5c9c234cc572220e73b89d3169e27c639c5e0270 (patch)
treef36308b59ca4f5bde81af2d646750ab122f4ba9c /bladictionary.py
parent1792ce90f5e339ab1bf87f05f8b8bd064997fa00 (diff)
Various hacky fixes for FOLDOC
Diffstat (limited to 'bladictionary.py')
-rwxr-xr-xbladictionary.py73
1 files changed, 54 insertions, 19 deletions
diff --git a/bladictionary.py b/bladictionary.py
index 0adb568..6cdaa03 100755
--- a/bladictionary.py
+++ b/bladictionary.py
@@ -7,7 +7,7 @@ import optparse
from lxml import etree
import sqlite3
-VERSION = "2.2.1"
+VERSION = "2.2.2"
class Definition(object):
#ID is relative to the word type, eg noun 1, noun 2, verb 1, verb 2, not to the entire list
@@ -302,7 +302,7 @@ def get_sql(word):
return items
-def parse_foldoc(word, refer = False):
+def parse_foldoc(word, refer = None):
file = open("dictionaries/foldoc.txt")
word_line = ""
@@ -313,15 +313,13 @@ def parse_foldoc(word, refer = False):
multiple = False
id = 0
skip = False
+ end = False
+ referring = False
- #If it's been referred from another definition, include the new word
- if refer:
- definition = word + ". "
- else:
- definition = ""
+ definition = ""
for line in file:
-
+
#line = line.strip()
word_parts = line.split(" ")
word_part = ""
@@ -344,38 +342,75 @@ def parse_foldoc(word, refer = False):
if count_blank == 1:
continue
elif multiple:
+ if end:
+ break
skip = True
else:
break
+
+ subject_parts = line.split(">")
+ if len(subject_parts) > 1:
+ see_also = subject_parts[1].strip(" .")
+ else:
+ see_also = line
#If the line is just one string enclosed in {}s, then it means "see also", so look up that word
-
- if len(line) > 0 and line[0] == "{" and line[-1] == "}":
- item = parse_foldoc(line.strip("{} "), True)[0]
- items.append(item)
- return items
+ if len(line) > 0 and see_also[0] == "{" and see_also[-1] == "}":
+ refer_items = parse_foldoc(see_also.strip("{} "), word)
+ referring = True
+ items += refer_items
+ word_parts = ""
+
+ #For handling words with multiple definitions
if line[0:2] == "1.":
multiple = True
+
+ if referring:
+ definition = ""
if multiple:
id_parts = line.split(".")
- if id_parts[0].isdigit() and definition is not None and definition != "":
+
+ #This section is very hacky to deal with various edge cases. Also it was 5am when I was wrote this, and by the time I was done I'd forgotten how it even worked
+ #Maybe one day I'll fix it, but for now I don't want to look at it any more. Removing any one line may break some definitions in various ways.
+ if id_parts[0].isdigit() and len(id_parts) > 1 and id_parts[0] != 0 and definition.strip() is not None and definition.strip(". ") is not word and definition.strip(". ") != refer:
definition = definition[3:]
- item = Definition(word, id, "foldoc", "tech", definition, [], [], [])
- items.append(item)
+ if refer is not None:
+ definition = word + ". " + definition
+ if id == 0:
+ id = 1
+ if not referring:
+ if word != definition.strip(". "):
+ item = Definition(word, id, "foldoc", "tech", definition, [], [], [])
+ items.append(item)
id = id_parts[0]
definition = ""
skip = False
+ end = False
- if not skip:
+ referring = False
+
+ elif referring:
+ end = False
+ elif skip:
+ end = True
+
+ if not skip and not end:
for part in word_parts:
definition += part.strip().replace("{", "").replace("}", "") + " "
if not found :
return
else:
- item = Definition(word, id, "foldoc", "tech", definition, [], [], [])
- items.append(item)
+ if id == 0:
+ id = 1
+ if definition is not None and len(definition) > 1 and definition.strip(". ") != word and definition.strip(". ") != refer and not referring :
+ if definition[0].isdigit() and definition[1] == ".":
+ definition = definition[3:]
+ if refer is not None:
+ definition = word + ". " + definition
+ item = Definition(word, id, "foldoc", "tech", definition, [], [], [])
+ items.append(item)
return items