diff options
authorJoe Robinson <>2014-09-19 01:02:42 +0100
committerJoe Robinson <>2014-09-19 01:02:42 +0100
commitac2251f199398b813f394bdb875bc2723557781c (patch)
parent2dfb9a16bd458f16f216f20b56485d982fd9af0f (diff)
Created script for converting WordNet database into a simpler/faster form
1 files changed, 74 insertions, 0 deletions
diff --git a/ b/
new file mode 100644
index 0000000..9e8bba4
--- /dev/null
+++ b/
@@ -0,0 +1,74 @@
+import MySQLdb as mysql
+def create():
+ try:
+ con = mysql.connect('localhost', 'wordnet', 'words', 'wordnet');
+ with con:
+ cur = con.cursor(mysql.cursors.DictCursor)
+ cur.execute("CREATE TABLE types (id int not null auto_increment, type text, abbreviation text, primary key(id))")
+ cur.execute("INSERT INTO types (type, abbreviation) VALUES('noun', 'n')")
+ cur.execute("INSERT INTO types (type, abbreviation) VALUES('verb', 'v')")
+ cur.execute("INSERT INTO types (type, abbreviation) VALUES('adjective', 'adj')")
+ cur.execute("INSERT INTO types (type, abbreviation) VALUES('adverb', 'adv')")
+ cur.execute("CREATE TABLE definitions (id bigint not null auto_increment, word text, type_id int, sub_id int, synset_id bigint, definition text, primary key(id))")
+ cur.execute("CREATE TABLE uses (id bigint not null auto_increment, definition_id bigint, quote text, primary key(id))")
+ except mysql.Error, e:
+ print "Database Error %d: %s" % (e.args[0],e.args[1])
+ sys.exit(1)
+def select():
+ try:
+ con = mysql.connect('localhost', 'wordnet', 'words', 'wordnet');
+ with con:
+ cur = con.cursor(mysql.cursors.DictCursor)
+ cur.execute("SELECT lemma, pos, sensenum, synsetid, definition, sampleset from dict")
+ rows = cur.fetchall()
+ print len(rows)
+ except mysql.Error, e:
+ print "Database Error %d: %s" % (e.args[0],e.args[1])
+ sys.exit(1)
+ return rows
+def insert(rows):
+ try:
+ con = mysql.connect('localhost', 'wordnet', 'words', 'wordnet');
+ with con:
+ cur = con.cursor(mysql.cursors.DictCursor)
+ for row in rows:
+ if row['pos'] == 'n':
+ type_id = 1
+ elif row['pos'] == 'v':
+ type_id = 2
+ elif row['pos'] == 'a' or row['pos'] == 's':
+ type_id = 3
+ elif row['pos'] == 'r':
+ type_id = 4
+ cur.execute("INSERT INTO definitions(word, type_id, sub_id, synset_id, definition) values(%s, %s, %s, %s, %s)", [row['lemma'], type_id, row['sensenum'], row['synsetid'], row['definition']])
+ row_id = cur.lastrowid
+ if row['sampleset'] is not None:
+ uses = row['sampleset'].split("|")
+ for use in uses:
+ cur.execute("INSERT INTO uses(definition_id, quote) values(%s, %s)", [row_id, use])
+ except mysql.Error, e:
+ print "Database Error %d: %s" % (e.args[0],e.args[1])
+ sys.exit(1)
+def main():
+ create()
+ items = select()
+ insert(items)
+if __name__ == "__main__":
+ main()