-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathnltk12.py
More file actions
77 lines (56 loc) · 2.09 KB
/
nltk12.py
File metadata and controls
77 lines (56 loc) · 2.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import time
import urllib2
from urllib2 import urlopen
import re
import cookielib, urllib2
from cookielib import CookieJar
import datetime
import sqlite3
cj = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
conn = sqlite3.connect('knowledgeBase.db')
c = conn.cursor()
startingWord = 'good'
startingWordVal = -1
synArray = []
def main():
for startingWord in wordz:
print 'about to do:',startingWord
time.sleep(1)
try:
page = 'http://thesaurus.com/browse/'+startingWord+'?s=t'
sourceCode = opener.open(page).read()
try:
synoNym = sourceCode.split('<td valign="top">Synonyms:</td>')
x=1
while x < len(synoNym):
try:
synoNymSplit = synoNym[x].split('</span></td>')[0]
synoNyms = re.findall(r'\">(\w*?)</a>', synoNymSplit)
print synoNyms
for eachSyn in synoNyms:
query = "SELECT * FROM wordVals WHERE word =?"
c.execute(query, [(eachSyn)])
data = c.fetchone()
if data is None:
print 'not here yet, let us add it'
c.execute("INSERT INTO wordVals (word, value) VALUES (?,?)",
(eachSyn, startingWordVal))
conn.commit()
else:
print 'word already here!'
except Exception, e:
print str(e)
print 'failed in 3rd try'
x+=1
except Exception, e:
print str(e)
print 'failed 2nd try'
except Exception, e:
print str(e)
print 'failed in the main loop'
main()
c.execute("INSERT INTO doneSyns (word, value) VALUES (?)",
(startingWord))
conn.commit()