sshell · Domenez-dev · Dec 5, 2025 · Dec 5, 2025
diff --git a/reddit-analyzer.py b/reddit-analyzer.py
@@ -5,186 +5,208 @@
 from collections import Counter
 import time
 
-if len(sys.argv) == 2:
-	username = str(sys.argv[1])
-else:
-	print('-----')
-	print('usage: reddit-analyzer.py USERNAME')
-	print('-----')
-	sys.exit()
-
-# initialize variables
-# username = 'spez'
-lastaction = 0
-headers = {'User-Agent': 'testbot'}
-curts = int(time.time())
-commentdata = []
-linkdata = []
-timelist = []
-hourseconds = 3600
-houroffset = -7
-offset = hourseconds*houroffset
-
-# let people know that it's working
-print(' --- fetching data for user: '+username+' ---')
-print(' ')
-
-# fetch profile data
-r3 = requests.get('https://www.reddit.com/user/'+username+'/about.json', headers=headers)
-userdata = r3.json()['data']
+def main():
+    if len(sys.argv) == 2:
+        username = str(sys.argv[1])
+    else:
+        print('-----')
+        print('usage: python reddit-analyzer.py USERNAME')
+        print('-----')
+        sys.exit()
+
+    # --- Configuration ---
+    # Reddit requires a unique User-Agent to not block the request
+    headers = {'User-Agent': 'python:reddit-analyzer:v2.0 (by /u/unknown)'}
 
-# fetch comments
-while True:
-    comurl = 'https://api.pushshift.io/reddit/search/comment/?author='+username+'&size=500&before='+str(curts)
-    r1 = requests.get(comurl, headers=headers)
-    tempdata = r1.json()['data']
-    commentdata += tempdata
-    try:
-        if tempdata[499]:
-            curts = tempdata[499]['created_utc']
-    except: break
-
-# re-establish current time
-curts = int(time.time())
-
-# fetch posts/submissions
-while True:
-    linkurl = 'https://api.pushshift.io/reddit/search/submission/?author='+username+'&size=500&before='+str(curts)
-    r2 = requests.get(linkurl, headers=headers)
-    postdata = r2.json()['data']
-    linkdata += postdata
-    try:
-        if postdata[499]:
-            curts = postdata[499]['created_utc']
-    except: break
-
-
-# set last active time
-lastcomment = commentdata[0]['created_utc']
-lastpost = postdata[0]['created_utc']
-
-if lastcomment > lastpost:
-    lastaction = lastcomment
-else: lastaction = lastpost
-
+    # Timezone offset (hours). -7 is MST. Change this to your local offset.
+    houroffset = -7 
+    hourseconds = 3600
+    offset = hourseconds * houroffset
 
-# add all subreddits to a list
-# add all timed activities to a list
-subList = []
-for x in commentdata:
-    subList.append(x['subreddit'].lower())
-    timelist.append(x['created_utc'])
+    print(f' --- fetching data for user: {username} ---')
+    print(' ')
 
-for x in postdata:
-    subList.append(x['subreddit'].lower())
-    timelist.append(x['created_utc'])
+    # --- Fetch Profile Data ---
+    try:
+        r_profile = requests.get(f'https://www.reddit.com/user/{username}/about.json', headers=headers)
+        if r_profile.status_code == 404:
+            print("Error: User not found.")
+            sys.exit()
+        elif r_profile.status_code == 429:
+            print("Error: Too many requests. Reddit is rate-limiting you.")
+            sys.exit()
+
+        userdata = r_profile.json().get('data')
+        if not userdata:
+            print("Error: Could not retrieve user data.")
+            sys.exit()
+    except Exception as e:
+        print(f"Error fetching profile: {e}")
+        sys.exit()
+
+    # --- Fetch Comments & Posts (Pagination) ---
+    def fetch_reddit_data(ctype):
+        """
+        Fetches comments or submissions from Reddit JSON API.
+        ctype: 'comments' or 'submitted'
+        """
+        items = []
+        after = None
+        base_url = f'https://www.reddit.com/user/{username}/{ctype}.json'
+
+        print(f"Downloading {ctype}...")
+
+        while True:
+            params = {'limit': 100, 'after': after}
+            try:
+                r = requests.get(base_url, headers=headers, params=params)
+                if r.status_code != 200:
+                    break
+
+                data = r.json().get('data', {})
+                children = data.get('children', [])
+
+                for child in children:
+                    items.append(child['data'])
+
+                after = data.get('after')
+                if not after:
+                    break
+
+                time.sleep(1) # Be nice to the API
+            except Exception:
+                break
+        return items
+
+    commentdata = fetch_reddit_data('comments')
+    linkdata = fetch_reddit_data('submitted')
+
+    if not commentdata and not linkdata:
+        print("No activity found for this user (or profile is private/banned).")
+        sys.exit()
+
+    # --- Calculations ---
+    last_action_ts = 0
+
+    # Safely get last action times
+    if commentdata:
+        lastcomment = commentdata[0]['created_utc']
+    else:
+        lastcomment = 0
 
-# adjust time for offset
-timelist = [x + offset for x in timelist]
+    if linkdata:
+        lastpost = linkdata[0]['created_utc']
+    else:
+        lastpost = 0
 
-# and create a set for comparison purposes
-sublistset = set(subList)
+    last_action_ts = max(lastcomment, lastpost)
 
-# load subreddits from file and check them against comments
-locList = [line.rstrip('\n').lower() for line in open('all-locations.txt')]
-loclistset = set(locList)
+    # Aggregate Subreddits and Timestamps
+    subList = []
+    timelist = []
 
+    for x in commentdata:
+        subList.append(x['subreddit'].lower())
+        timelist.append(x['created_utc'])
 
-def getProfile():
-    print('[+] username        : '+str(userdata['name']))
-    print('[+] creation date   : '+str(datetime.fromtimestamp(userdata['created_utc'])))
-    print('[+] last action     : '+str(datetime.fromtimestamp(lastaction)))
-    print('[+] verified email  : '+str(userdata['has_verified_email']))
-    print('---')
-    print('[+] total comments  : '+str(len(commentdata)))
-    print('[+] comment karma   : '+str(userdata['comment_karma']))
-    print('---')
-    print('[+] total links     : '+str(len(linkdata)))
-    print('[+] link karma      : '+str(userdata['link_karma']))
-    print('---')
-    print('[+] location based reddit(s): '+ str(sublistset.intersection(loclistset)))
+    for x in linkdata:
+        subList.append(x['subreddit'].lower())
+        timelist.append(x['created_utc'])
 
+    # Adjust time for offset
+    timelist = [x + offset for x in timelist]
 
-def getComments():
+    # Create sets
+    sublistset = set(subList)
 
-    # draw and print ascii graph
-    counter = Counter(subList)
-    gdata = counter.most_common()
-
-    graph = Pyasciigraph(
-        separator_length=4,
-        multivalue=False,
-        human_readable='si',
-    )
-    for line in graph.graph('Comment Activity', gdata):
-        print(line)
-
-def timeGraph(timelist):
-    newtl = []  # hour list
-    wdlist = [] # weekday list
-
-    # fill newtl with HOURs 
-    for x in timelist:
-        newtl.append(datetime.fromtimestamp(int(x)).hour)
-
-    # create hour name list
-    hournames = '00:00 01:00 02:00 03:00 04:00 05:00 06:00 07:00 08:00 09:00 10:00 11:00 12:00 13:00 14:00 15:00 16:00 17:00 18:00 19:00 20:00 21:00 22:00 23:00'.split()
-
-    # deal with HOUR counting
-    tgCounter = Counter(newtl)
-    tgdata = tgCounter.most_common()
-    # sort by HOUR not popularity
-    tgdata = sorted(tgdata)
-
-    d = []
-    e = 0
-    for g in hournames:
-        d.append(tuple([g, tgdata[e][1]]))
-        e+=1
-    tgdata = d
-
-    # draw HOUR graph
-    graph = Pyasciigraph(
-        separator_length=4,
-        multivalue=False,
-        human_readable='si',
-    )
-    for line in graph.graph('Time Activity', tgdata):
-        print(line)    
+    # Handle the external file safely
+    try:
+        with open('all-locations.txt', 'r') as f:
+            locList = [line.rstrip('\n').lower() for line in f]
+        loclistset = set(locList)
+        location_matches = sublistset.intersection(loclistset)
+    except FileNotFoundError:
+        location_matches = "('all-locations.txt' not found)"
+
+    # --- Display Functions ---
+
+    def print_profile():
+        print(f'[+] username        : {userdata.get("name")}')
+        print(f'[+] creation date   : {datetime.fromtimestamp(userdata.get("created_utc", 0))}')
+        if last_action_ts > 0:
+            print(f'[+] last action     : {datetime.fromtimestamp(last_action_ts)}')
+        print(f'[+] verified email  : {userdata.get("has_verified_email")}')
+        print('---')
+        print(f'[+] total comments  : {len(commentdata)}')
+        print(f'[+] comment karma   : {userdata.get("comment_karma")}')
+        print('---')
+        print(f'[+] total links     : {len(linkdata)}')
+        print(f'[+] link karma      : {userdata.get("link_karma")}')
+        print('---')
+        print(f'[+] location subreddits: {location_matches}')
+
+    def print_sub_graph():
+        if not subList:
+            return
+        counter = Counter(subList)
+        gdata = counter.most_common(10) # Limit to top 10 for cleanliness
+
+        graph = Pyasciigraph(
+            separator_length=4,
+            multivalue=False,
+            human_readable='si',
+        )
+        for line in graph.graph('Top Subreddits', gdata):
+            print(line)
+
+    def print_time_graph(ts_list):
+        if not ts_list:
+            return
+
+        # --- Hour Analysis ---
+        hour_list = [datetime.fromtimestamp(int(x)).hour for x in ts_list]
+        hour_counter = Counter(hour_list)
+
+        # Prepare data for 00:00 to 23:00 (filling gaps with 0)
+        hour_data = []
+        for h in range(24):
+            time_label = f"{h:02d}:00"
+            count = hour_counter.get(h, 0)
+            hour_data.append((time_label, count))
+
+        graph = Pyasciigraph(
+            separator_length=4,
+            multivalue=False,
+            human_readable='si',
+        )
+
+        print(' ')
+        for line in graph.graph('Activity by Hour (Local Time)', hour_data):
+            print(line)
+
+        print(' ')
+
+        # --- Weekday Analysis ---
+        wd_list = [datetime.fromtimestamp(int(x)).weekday() for x in ts_list]
+        wd_counter = Counter(wd_list)
+
+        weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
+        wd_data = []
+
+        # Map 0-6 to names and fill gaps
+        for i, day_name in enumerate(weekdays):
+            count = wd_counter.get(i, 0)
+            wd_data.append((day_name, count))
 
+        for line in graph.graph('Activity by Day of Week', wd_data):
+            print(line)
 
+    # --- Execution ---
+    print_profile()
     print(' ')
+    print_sub_graph()
+    print(' ')
+    print_time_graph(timelist)
 
-    # estabish weekday list (0 is Monday in Python-land)
-    weekdays = 'Monday Tuesday Wednesday Thursday Friday Saturday Sunday'.split()
-    for x in timelist:
-        wdlist.append(datetime.fromtimestamp(int(x)).weekday())
-
-    wdCounter = Counter(wdlist)
-    wddata = wdCounter.most_common()
-    wddata = sorted(wddata)
-
-    # change tuple weekday numbers to weekday names
-    y = []
-    c = 0
-    for z in weekdays:
-      y.append(tuple([z, wddata[c][1]]))
-      c+=1
-    wddata = y
-
-    # draw WEEKDAY graph
-    graph = Pyasciigraph(
-        separator_length=4,
-        multivalue=False,
-        human_readable='si',
-    )
-    for line in graph.graph('Day of the Week Activity', wddata):
-        print(line)
-
-### PRINT INFO ###
-
-getProfile()
-print(' ')
-getComments()
-print(' ')
-timeGraph(timelist)
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+ascii_graph==1.5.2
+certifi==2025.11.12
+charset-normalizer==3.4.4
+idna==3.11
+requests==2.32.5
+urllib3==2.5.0