summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoe Robinson <joe@lc8n.com>2014-09-19 01:02:42 +0100
committerJoe Robinson <joe@lc8n.com>2014-09-19 01:02:42 +0100
commitac2251f199398b813f394bdb875bc2723557781c (patch)
tree2ce6695c21bbcc62b6b97c2468ecb6ece2c78cc1
parent2dfb9a16bd458f16f216f20b56485d982fd9af0f (diff)
Created script for converting WordNet database into a simpler/faster form
-rw-r--r--convert.py74
1 files changed, 74 insertions, 0 deletions
diff --git a/convert.py b/convert.py
new file mode 100644
index 0000000..9e8bba4
--- /dev/null
+++ b/convert.py
@@ -0,0 +1,74 @@
+import MySQLdb as mysql
+
+def create():
+ try:
+ con = mysql.connect('localhost', 'wordnet', 'words', 'wordnet');
+
+ with con:
+ cur = con.cursor(mysql.cursors.DictCursor)
+ cur.execute("CREATE TABLE types (id int not null auto_increment, type text, abbreviation text, primary key(id))")
+ cur.execute("INSERT INTO types (type, abbreviation) VALUES('noun', 'n')")
+ cur.execute("INSERT INTO types (type, abbreviation) VALUES('verb', 'v')")
+ cur.execute("INSERT INTO types (type, abbreviation) VALUES('adjective', 'adj')")
+ cur.execute("INSERT INTO types (type, abbreviation) VALUES('adverb', 'adv')")
+ cur.execute("CREATE TABLE definitions (id bigint not null auto_increment, word text, type_id int, sub_id int, synset_id bigint, definition text, primary key(id))")
+ cur.execute("CREATE TABLE uses (id bigint not null auto_increment, definition_id bigint, quote text, primary key(id))")
+
+ except mysql.Error, e:
+ print "Database Error %d: %s" % (e.args[0],e.args[1])
+ sys.exit(1)
+
+def select():
+ try:
+ con = mysql.connect('localhost', 'wordnet', 'words', 'wordnet');
+
+ with con:
+ cur = con.cursor(mysql.cursors.DictCursor)
+ cur.execute("SELECT lemma, pos, sensenum, synsetid, definition, sampleset from dict")
+ rows = cur.fetchall()
+ print len(rows)
+ except mysql.Error, e:
+ print "Database Error %d: %s" % (e.args[0],e.args[1])
+ sys.exit(1)
+
+ return rows
+
+def insert(rows):
+ try:
+ con = mysql.connect('localhost', 'wordnet', 'words', 'wordnet');
+
+ with con:
+ cur = con.cursor(mysql.cursors.DictCursor)
+
+ for row in rows:
+ if row['pos'] == 'n':
+ type_id = 1
+ elif row['pos'] == 'v':
+ type_id = 2
+ elif row['pos'] == 'a' or row['pos'] == 's':
+ type_id = 3
+ elif row['pos'] == 'r':
+ type_id = 4
+
+ cur.execute("INSERT INTO definitions(word, type_id, sub_id, synset_id, definition) values(%s, %s, %s, %s, %s)", [row['lemma'], type_id, row['sensenum'], row['synsetid'], row['definition']])
+
+ row_id = cur.lastrowid
+
+ if row['sampleset'] is not None:
+ uses = row['sampleset'].split("|")
+
+ for use in uses:
+ cur.execute("INSERT INTO uses(definition_id, quote) values(%s, %s)", [row_id, use])
+
+ except mysql.Error, e:
+ print "Database Error %d: %s" % (e.args[0],e.args[1])
+ sys.exit(1)
+
+def main():
+ create()
+ items = select()
+ insert(items)
+
+if __name__ == "__main__":
+ main()
+