From f9f40ff38d6304022834a553d449769a9a237d3f Mon Sep 17 00:00:00 2001 From: Zakkye on Arch Date: Fri, 5 Dec 2025 11:39:58 +0100 Subject: [PATCH 1/2] fix: replace dead Pushshift API with Reddit JSON and refactor graph logic --- reddit-analyzer.py | 358 ++++++++++++++++++++++++--------------------- 1 file changed, 190 insertions(+), 168 deletions(-) diff --git a/reddit-analyzer.py b/reddit-analyzer.py index d220304..a633512 100644 --- a/reddit-analyzer.py +++ b/reddit-analyzer.py @@ -5,186 +5,208 @@ from collections import Counter import time -if len(sys.argv) == 2: - username = str(sys.argv[1]) -else: - print('-----') - print('usage: reddit-analyzer.py USERNAME') - print('-----') - sys.exit() - -# initialize variables -# username = 'spez' -lastaction = 0 -headers = {'User-Agent': 'testbot'} -curts = int(time.time()) -commentdata = [] -linkdata = [] -timelist = [] -hourseconds = 3600 -houroffset = -7 -offset = hourseconds*houroffset - -# let people know that it's working -print(' --- fetching data for user: '+username+' ---') -print(' ') - -# fetch profile data -r3 = requests.get('https://www.reddit.com/user/'+username+'/about.json', headers=headers) -userdata = r3.json()['data'] +def main(): + if len(sys.argv) == 2: + username = str(sys.argv[1]) + else: + print('-----') + print('usage: python reddit-analyzer.py USERNAME') + print('-----') + sys.exit() + + # --- Configuration --- + # Reddit requires a unique User-Agent to not block the request + headers = {'User-Agent': 'python:reddit-analyzer:v2.0 (by /u/unknown)'} -# fetch comments -while True: - comurl = 'https://api.pushshift.io/reddit/search/comment/?author='+username+'&size=500&before='+str(curts) - r1 = requests.get(comurl, headers=headers) - tempdata = r1.json()['data'] - commentdata += tempdata - try: - if tempdata[499]: - curts = tempdata[499]['created_utc'] - except: break - -# re-establish current time -curts = int(time.time()) - -# fetch posts/submissions -while True: - linkurl = 'https://api.pushshift.io/reddit/search/submission/?author='+username+'&size=500&before='+str(curts) - r2 = requests.get(linkurl, headers=headers) - postdata = r2.json()['data'] - linkdata += postdata - try: - if postdata[499]: - curts = postdata[499]['created_utc'] - except: break - - -# set last active time -lastcomment = commentdata[0]['created_utc'] -lastpost = postdata[0]['created_utc'] - -if lastcomment > lastpost: - lastaction = lastcomment -else: lastaction = lastpost - + # Timezone offset (hours). -7 is MST. Change this to your local offset. + houroffset = -7 + hourseconds = 3600 + offset = hourseconds * houroffset -# add all subreddits to a list -# add all timed activities to a list -subList = [] -for x in commentdata: - subList.append(x['subreddit'].lower()) - timelist.append(x['created_utc']) + print(f' --- fetching data for user: {username} ---') + print(' ') -for x in postdata: - subList.append(x['subreddit'].lower()) - timelist.append(x['created_utc']) + # --- Fetch Profile Data --- + try: + r_profile = requests.get(f'https://www.reddit.com/user/{username}/about.json', headers=headers) + if r_profile.status_code == 404: + print("Error: User not found.") + sys.exit() + elif r_profile.status_code == 429: + print("Error: Too many requests. Reddit is rate-limiting you.") + sys.exit() + + userdata = r_profile.json().get('data') + if not userdata: + print("Error: Could not retrieve user data.") + sys.exit() + except Exception as e: + print(f"Error fetching profile: {e}") + sys.exit() + + # --- Fetch Comments & Posts (Pagination) --- + def fetch_reddit_data(ctype): + """ + Fetches comments or submissions from Reddit JSON API. + ctype: 'comments' or 'submitted' + """ + items = [] + after = None + base_url = f'https://www.reddit.com/user/{username}/{ctype}.json' + + print(f"Downloading {ctype}...") + + while True: + params = {'limit': 100, 'after': after} + try: + r = requests.get(base_url, headers=headers, params=params) + if r.status_code != 200: + break + + data = r.json().get('data', {}) + children = data.get('children', []) + + for child in children: + items.append(child['data']) + + after = data.get('after') + if not after: + break + + time.sleep(1) # Be nice to the API + except Exception: + break + return items + + commentdata = fetch_reddit_data('comments') + linkdata = fetch_reddit_data('submitted') + + if not commentdata and not linkdata: + print("No activity found for this user (or profile is private/banned).") + sys.exit() + + # --- Calculations --- + last_action_ts = 0 + + # Safely get last action times + if commentdata: + lastcomment = commentdata[0]['created_utc'] + else: + lastcomment = 0 -# adjust time for offset -timelist = [x + offset for x in timelist] + if linkdata: + lastpost = linkdata[0]['created_utc'] + else: + lastpost = 0 -# and create a set for comparison purposes -sublistset = set(subList) + last_action_ts = max(lastcomment, lastpost) -# load subreddits from file and check them against comments -locList = [line.rstrip('\n').lower() for line in open('all-locations.txt')] -loclistset = set(locList) + # Aggregate Subreddits and Timestamps + subList = [] + timelist = [] + for x in commentdata: + subList.append(x['subreddit'].lower()) + timelist.append(x['created_utc']) -def getProfile(): - print('[+] username : '+str(userdata['name'])) - print('[+] creation date : '+str(datetime.fromtimestamp(userdata['created_utc']))) - print('[+] last action : '+str(datetime.fromtimestamp(lastaction))) - print('[+] verified email : '+str(userdata['has_verified_email'])) - print('---') - print('[+] total comments : '+str(len(commentdata))) - print('[+] comment karma : '+str(userdata['comment_karma'])) - print('---') - print('[+] total links : '+str(len(linkdata))) - print('[+] link karma : '+str(userdata['link_karma'])) - print('---') - print('[+] location based reddit(s): '+ str(sublistset.intersection(loclistset))) + for x in linkdata: + subList.append(x['subreddit'].lower()) + timelist.append(x['created_utc']) + # Adjust time for offset + timelist = [x + offset for x in timelist] -def getComments(): + # Create sets + sublistset = set(subList) - # draw and print ascii graph - counter = Counter(subList) - gdata = counter.most_common() - - graph = Pyasciigraph( - separator_length=4, - multivalue=False, - human_readable='si', - ) - for line in graph.graph('Comment Activity', gdata): - print(line) - -def timeGraph(timelist): - newtl = [] # hour list - wdlist = [] # weekday list - - # fill newtl with HOURs - for x in timelist: - newtl.append(datetime.fromtimestamp(int(x)).hour) - - # create hour name list - hournames = '00:00 01:00 02:00 03:00 04:00 05:00 06:00 07:00 08:00 09:00 10:00 11:00 12:00 13:00 14:00 15:00 16:00 17:00 18:00 19:00 20:00 21:00 22:00 23:00'.split() - - # deal with HOUR counting - tgCounter = Counter(newtl) - tgdata = tgCounter.most_common() - # sort by HOUR not popularity - tgdata = sorted(tgdata) - - d = [] - e = 0 - for g in hournames: - d.append(tuple([g, tgdata[e][1]])) - e+=1 - tgdata = d - - # draw HOUR graph - graph = Pyasciigraph( - separator_length=4, - multivalue=False, - human_readable='si', - ) - for line in graph.graph('Time Activity', tgdata): - print(line) + # Handle the external file safely + try: + with open('all-locations.txt', 'r') as f: + locList = [line.rstrip('\n').lower() for line in f] + loclistset = set(locList) + location_matches = sublistset.intersection(loclistset) + except FileNotFoundError: + location_matches = "('all-locations.txt' not found)" + + # --- Display Functions --- + + def print_profile(): + print(f'[+] username : {userdata.get("name")}') + print(f'[+] creation date : {datetime.fromtimestamp(userdata.get("created_utc", 0))}') + if last_action_ts > 0: + print(f'[+] last action : {datetime.fromtimestamp(last_action_ts)}') + print(f'[+] verified email : {userdata.get("has_verified_email")}') + print('---') + print(f'[+] total comments : {len(commentdata)}') + print(f'[+] comment karma : {userdata.get("comment_karma")}') + print('---') + print(f'[+] total links : {len(linkdata)}') + print(f'[+] link karma : {userdata.get("link_karma")}') + print('---') + print(f'[+] location subreddits: {location_matches}') + + def print_sub_graph(): + if not subList: + return + counter = Counter(subList) + gdata = counter.most_common(10) # Limit to top 10 for cleanliness + + graph = Pyasciigraph( + separator_length=4, + multivalue=False, + human_readable='si', + ) + for line in graph.graph('Top Subreddits', gdata): + print(line) + + def print_time_graph(ts_list): + if not ts_list: + return + + # --- Hour Analysis --- + hour_list = [datetime.fromtimestamp(int(x)).hour for x in ts_list] + hour_counter = Counter(hour_list) + + # Prepare data for 00:00 to 23:00 (filling gaps with 0) + hour_data = [] + for h in range(24): + time_label = f"{h:02d}:00" + count = hour_counter.get(h, 0) + hour_data.append((time_label, count)) + + graph = Pyasciigraph( + separator_length=4, + multivalue=False, + human_readable='si', + ) + + print(' ') + for line in graph.graph('Activity by Hour (Local Time)', hour_data): + print(line) + + print(' ') + + # --- Weekday Analysis --- + wd_list = [datetime.fromtimestamp(int(x)).weekday() for x in ts_list] + wd_counter = Counter(wd_list) + + weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] + wd_data = [] + + # Map 0-6 to names and fill gaps + for i, day_name in enumerate(weekdays): + count = wd_counter.get(i, 0) + wd_data.append((day_name, count)) + for line in graph.graph('Activity by Day of Week', wd_data): + print(line) + # --- Execution --- + print_profile() print(' ') + print_sub_graph() + print(' ') + print_time_graph(timelist) - # estabish weekday list (0 is Monday in Python-land) - weekdays = 'Monday Tuesday Wednesday Thursday Friday Saturday Sunday'.split() - for x in timelist: - wdlist.append(datetime.fromtimestamp(int(x)).weekday()) - - wdCounter = Counter(wdlist) - wddata = wdCounter.most_common() - wddata = sorted(wddata) - - # change tuple weekday numbers to weekday names - y = [] - c = 0 - for z in weekdays: - y.append(tuple([z, wddata[c][1]])) - c+=1 - wddata = y - - # draw WEEKDAY graph - graph = Pyasciigraph( - separator_length=4, - multivalue=False, - human_readable='si', - ) - for line in graph.graph('Day of the Week Activity', wddata): - print(line) - -### PRINT INFO ### - -getProfile() -print(' ') -getComments() -print(' ') -timeGraph(timelist) \ No newline at end of file +if __name__ == "__main__": + main() From fe4f77f699fa84751ebd3e87f144e35f92509b57 Mon Sep 17 00:00:00 2001 From: Zakkye on Arch Date: Fri, 5 Dec 2025 11:40:34 +0100 Subject: [PATCH 2/2] Added dependencies --- requirements.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c0d0ef2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +ascii_graph==1.5.2 +certifi==2025.11.12 +charset-normalizer==3.4.4 +idna==3.11 +requests==2.32.5 +urllib3==2.5.0