Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
358 changes: 190 additions & 168 deletions reddit-analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,186 +5,208 @@
from collections import Counter
import time

if len(sys.argv) == 2:
username = str(sys.argv[1])
else:
print('-----')
print('usage: reddit-analyzer.py USERNAME')
print('-----')
sys.exit()

# initialize variables
# username = 'spez'
lastaction = 0
headers = {'User-Agent': 'testbot'}
curts = int(time.time())
commentdata = []
linkdata = []
timelist = []
hourseconds = 3600
houroffset = -7
offset = hourseconds*houroffset

# let people know that it's working
print(' --- fetching data for user: '+username+' ---')
print(' ')

# fetch profile data
r3 = requests.get('https://www.reddit.com/user/'+username+'/about.json', headers=headers)
userdata = r3.json()['data']
def main():
if len(sys.argv) == 2:
username = str(sys.argv[1])
else:
print('-----')
print('usage: python reddit-analyzer.py USERNAME')
print('-----')
sys.exit()

# --- Configuration ---
# Reddit requires a unique User-Agent to not block the request
headers = {'User-Agent': 'python:reddit-analyzer:v2.0 (by /u/unknown)'}

# fetch comments
while True:
comurl = 'https://api.pushshift.io/reddit/search/comment/?author='+username+'&size=500&before='+str(curts)
r1 = requests.get(comurl, headers=headers)
tempdata = r1.json()['data']
commentdata += tempdata
try:
if tempdata[499]:
curts = tempdata[499]['created_utc']
except: break

# re-establish current time
curts = int(time.time())

# fetch posts/submissions
while True:
linkurl = 'https://api.pushshift.io/reddit/search/submission/?author='+username+'&size=500&before='+str(curts)
r2 = requests.get(linkurl, headers=headers)
postdata = r2.json()['data']
linkdata += postdata
try:
if postdata[499]:
curts = postdata[499]['created_utc']
except: break


# set last active time
lastcomment = commentdata[0]['created_utc']
lastpost = postdata[0]['created_utc']

if lastcomment > lastpost:
lastaction = lastcomment
else: lastaction = lastpost

# Timezone offset (hours). -7 is MST. Change this to your local offset.
houroffset = -7
hourseconds = 3600
offset = hourseconds * houroffset

# add all subreddits to a list
# add all timed activities to a list
subList = []
for x in commentdata:
subList.append(x['subreddit'].lower())
timelist.append(x['created_utc'])
print(f' --- fetching data for user: {username} ---')
print(' ')

for x in postdata:
subList.append(x['subreddit'].lower())
timelist.append(x['created_utc'])
# --- Fetch Profile Data ---
try:
r_profile = requests.get(f'https://www.reddit.com/user/{username}/about.json', headers=headers)
if r_profile.status_code == 404:
print("Error: User not found.")
sys.exit()
elif r_profile.status_code == 429:
print("Error: Too many requests. Reddit is rate-limiting you.")
sys.exit()

userdata = r_profile.json().get('data')
if not userdata:
print("Error: Could not retrieve user data.")
sys.exit()
except Exception as e:
print(f"Error fetching profile: {e}")
sys.exit()

# --- Fetch Comments & Posts (Pagination) ---
def fetch_reddit_data(ctype):
"""
Fetches comments or submissions from Reddit JSON API.
ctype: 'comments' or 'submitted'
"""
items = []
after = None
base_url = f'https://www.reddit.com/user/{username}/{ctype}.json'

print(f"Downloading {ctype}...")

while True:
params = {'limit': 100, 'after': after}
try:
r = requests.get(base_url, headers=headers, params=params)
if r.status_code != 200:
break

data = r.json().get('data', {})
children = data.get('children', [])

for child in children:
items.append(child['data'])

after = data.get('after')
if not after:
break

time.sleep(1) # Be nice to the API
except Exception:
break
return items

commentdata = fetch_reddit_data('comments')
linkdata = fetch_reddit_data('submitted')

if not commentdata and not linkdata:
print("No activity found for this user (or profile is private/banned).")
sys.exit()

# --- Calculations ---
last_action_ts = 0

# Safely get last action times
if commentdata:
lastcomment = commentdata[0]['created_utc']
else:
lastcomment = 0

# adjust time for offset
timelist = [x + offset for x in timelist]
if linkdata:
lastpost = linkdata[0]['created_utc']
else:
lastpost = 0

# and create a set for comparison purposes
sublistset = set(subList)
last_action_ts = max(lastcomment, lastpost)

# load subreddits from file and check them against comments
locList = [line.rstrip('\n').lower() for line in open('all-locations.txt')]
loclistset = set(locList)
# Aggregate Subreddits and Timestamps
subList = []
timelist = []

for x in commentdata:
subList.append(x['subreddit'].lower())
timelist.append(x['created_utc'])

def getProfile():
print('[+] username : '+str(userdata['name']))
print('[+] creation date : '+str(datetime.fromtimestamp(userdata['created_utc'])))
print('[+] last action : '+str(datetime.fromtimestamp(lastaction)))
print('[+] verified email : '+str(userdata['has_verified_email']))
print('---')
print('[+] total comments : '+str(len(commentdata)))
print('[+] comment karma : '+str(userdata['comment_karma']))
print('---')
print('[+] total links : '+str(len(linkdata)))
print('[+] link karma : '+str(userdata['link_karma']))
print('---')
print('[+] location based reddit(s): '+ str(sublistset.intersection(loclistset)))
for x in linkdata:
subList.append(x['subreddit'].lower())
timelist.append(x['created_utc'])

# Adjust time for offset
timelist = [x + offset for x in timelist]

def getComments():
# Create sets
sublistset = set(subList)

# draw and print ascii graph
counter = Counter(subList)
gdata = counter.most_common()

graph = Pyasciigraph(
separator_length=4,
multivalue=False,
human_readable='si',
)
for line in graph.graph('Comment Activity', gdata):
print(line)

def timeGraph(timelist):
newtl = [] # hour list
wdlist = [] # weekday list

# fill newtl with HOURs
for x in timelist:
newtl.append(datetime.fromtimestamp(int(x)).hour)

# create hour name list
hournames = '00:00 01:00 02:00 03:00 04:00 05:00 06:00 07:00 08:00 09:00 10:00 11:00 12:00 13:00 14:00 15:00 16:00 17:00 18:00 19:00 20:00 21:00 22:00 23:00'.split()

# deal with HOUR counting
tgCounter = Counter(newtl)
tgdata = tgCounter.most_common()
# sort by HOUR not popularity
tgdata = sorted(tgdata)

d = []
e = 0
for g in hournames:
d.append(tuple([g, tgdata[e][1]]))
e+=1
tgdata = d

# draw HOUR graph
graph = Pyasciigraph(
separator_length=4,
multivalue=False,
human_readable='si',
)
for line in graph.graph('Time Activity', tgdata):
print(line)
# Handle the external file safely
try:
with open('all-locations.txt', 'r') as f:
locList = [line.rstrip('\n').lower() for line in f]
loclistset = set(locList)
location_matches = sublistset.intersection(loclistset)
except FileNotFoundError:
location_matches = "('all-locations.txt' not found)"

# --- Display Functions ---

def print_profile():
print(f'[+] username : {userdata.get("name")}')
print(f'[+] creation date : {datetime.fromtimestamp(userdata.get("created_utc", 0))}')
if last_action_ts > 0:
print(f'[+] last action : {datetime.fromtimestamp(last_action_ts)}')
print(f'[+] verified email : {userdata.get("has_verified_email")}')
print('---')
print(f'[+] total comments : {len(commentdata)}')
print(f'[+] comment karma : {userdata.get("comment_karma")}')
print('---')
print(f'[+] total links : {len(linkdata)}')
print(f'[+] link karma : {userdata.get("link_karma")}')
print('---')
print(f'[+] location subreddits: {location_matches}')

def print_sub_graph():
if not subList:
return
counter = Counter(subList)
gdata = counter.most_common(10) # Limit to top 10 for cleanliness

graph = Pyasciigraph(
separator_length=4,
multivalue=False,
human_readable='si',
)
for line in graph.graph('Top Subreddits', gdata):
print(line)

def print_time_graph(ts_list):
if not ts_list:
return

# --- Hour Analysis ---
hour_list = [datetime.fromtimestamp(int(x)).hour for x in ts_list]
hour_counter = Counter(hour_list)

# Prepare data for 00:00 to 23:00 (filling gaps with 0)
hour_data = []
for h in range(24):
time_label = f"{h:02d}:00"
count = hour_counter.get(h, 0)
hour_data.append((time_label, count))

graph = Pyasciigraph(
separator_length=4,
multivalue=False,
human_readable='si',
)

print(' ')
for line in graph.graph('Activity by Hour (Local Time)', hour_data):
print(line)

print(' ')

# --- Weekday Analysis ---
wd_list = [datetime.fromtimestamp(int(x)).weekday() for x in ts_list]
wd_counter = Counter(wd_list)

weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
wd_data = []

# Map 0-6 to names and fill gaps
for i, day_name in enumerate(weekdays):
count = wd_counter.get(i, 0)
wd_data.append((day_name, count))

for line in graph.graph('Activity by Day of Week', wd_data):
print(line)

# --- Execution ---
print_profile()
print(' ')
print_sub_graph()
print(' ')
print_time_graph(timelist)

# estabish weekday list (0 is Monday in Python-land)
weekdays = 'Monday Tuesday Wednesday Thursday Friday Saturday Sunday'.split()
for x in timelist:
wdlist.append(datetime.fromtimestamp(int(x)).weekday())

wdCounter = Counter(wdlist)
wddata = wdCounter.most_common()
wddata = sorted(wddata)

# change tuple weekday numbers to weekday names
y = []
c = 0
for z in weekdays:
y.append(tuple([z, wddata[c][1]]))
c+=1
wddata = y

# draw WEEKDAY graph
graph = Pyasciigraph(
separator_length=4,
multivalue=False,
human_readable='si',
)
for line in graph.graph('Day of the Week Activity', wddata):
print(line)

### PRINT INFO ###

getProfile()
print(' ')
getComments()
print(' ')
timeGraph(timelist)
if __name__ == "__main__":
main()
6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
ascii_graph==1.5.2
certifi==2025.11.12
charset-normalizer==3.4.4
idna==3.11
requests==2.32.5
urllib3==2.5.0