-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreddit_batch_request.py
More file actions
73 lines (56 loc) · 2.15 KB
/
reddit_batch_request.py
File metadata and controls
73 lines (56 loc) · 2.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import requests
retrieved_text = set()
files = [r'.\askreddit_content', r'.\casualconv_content']
def main():
reqCount = 1
get_user_comments()
# handle_recursive_req(None, reqCount)
# with open(r'.\post_authors.txt', 'a') as txt:
# for i in retrieved_text:
# txt.write(i + '\n')
# txt.close()
# exit(0)
def get_user_comments():
for userList in files:
with open(userList + '_authors.txt', 'r') as txt, open(userList + '_comments.txt', 'a') as outTxt:
for user in txt.readlines():
baseUrl = 'https://www.reddit.com/user/' + user + '/comments/.json?limit=100'
response = requests.get(baseUrl, headers={'User-agent': 'JsonGrab'})
if not response.ok:
print("Error", response.status_code)
exit()
data = response.json()['data']
allPosts = data['children']
for post in allPosts:
postData = post['data']
try:
outTxt.write(postData['body'] + '\n')
except UnicodeEncodeError:
print("** UEE exception caught")
continue
def handle_recursive_req(paramId, reqCount):
if reqCount > 1: return
print("Parsed page No.", reqCount, " -- Approx.", (reqCount) * 100, " total posts.")
baseUrl = 'https://www.reddit.com/r/askreddit/hot.json?limit=100'
if paramId is not None:
baseUrl += "&after=" + paramId
response = requests.get(baseUrl, headers={'User-agent': 'JsonGrab'})
if not response.ok:
print("Error", response.status_code)
exit()
reqCount += 1
data = response.json()['data']
recurParam = data['after']
allPosts = data['children']
for post in allPosts:
postData = post['data']
retrieved_text.add(postData['author'])
handle_recursive_req(recurParam, reqCount)
# try:
# outFile.write(title + '\n')
# except UnicodeEncodeError:
# print("** UEE exception caught")
# continue
def sanitize_outfile():
pass
main()