-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathnltk1.py
More file actions
33 lines (28 loc) · 783 Bytes
/
nltk1.py
File metadata and controls
33 lines (28 loc) · 783 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import time
import urllib2
from urllib2 import urlopen
import re
import cookielib, urllib2
from cookielib import CookieJar
import datetime
cj = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
def main():
try:
page = 'http://www.huffingtonpost.com/feeds/index.xml'
sourceCode = opener.open(page).read()
#print sourceCode
try:
titles = re.findall(r'',sourceCode)
links = re.findall(r'(.*?)',sourceCode)
for title in titles:
print title
for link in links:
print link
except Exception, e:
print str(e)
except Exception,e:
print str(e)
pass
main()