opportunity-hack · gregv · Mar 29, 2026 · Mar 29, 2026 · Mar 30, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -5,12 +5,41 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 ## Build & Run Commands
 - Install dependencies: `pip install -r requirements.txt`
 - Run application: `python src/main.py <org_names>`
-- Run scheduler: `python src/scheduler.py`
+- Run scheduler: `python src/scheduler.py <org_names>`
+- Get achievements only: `python src/main.py --achievements-only <org_names>`
 - Run tests: `pytest`
 - Lint code: `flake8 src/`
 - Format code: `black src/`
 - Type check: `mypy src/`
 
+## Environment Setup
+Required environment variables:
+- `GITHUB_TOKEN`: GitHub personal access token
+- `GOOGLE_APPLICATION_CREDENTIALS_JSON`: JSON string of Firestore service account credentials
+- `COLLECTION_INTERVAL`: daily, hourly, minutely, or weekly (default: daily)
+- `COLLECTION_TIME`: time to run collection (default: 00:00)
+- `GITHUB_IGNORED_USERS`: comma-separated list of GitHub usernames to ignore (default: gregv)
+
+## Architecture Overview
+This is a GitHub organization metrics collector with the following key components:
+
+**Core Classes:**
+- `GitHubClient`: Handles GitHub API interactions using aiohttp for async requests
+- `FirestoreClient`: Manages Firestore database operations for data persistence
+- `MetricsCollector`: Orchestrates data collection and coordinates between GitHub and Firestore
+- `AchievementsGenerator`: Generates achievement data based on contributor statistics
+- `Scheduler`: Handles periodic execution of metrics collection
+
+**Data Flow:**
+1. GitHub API → contributor stats, repository data, pull requests, commits
+2. Firestore structure: organizations/{org}/repositories/{repo}/contributors/{user}
+3. Achievements are generated from aggregated contributor data and stored separately
+
+**Key Operations:**
+- Organization processing: fetches all repos → all contributors → individual stats
+- Achievement generation: aggregates data across repositories to create achievements
+- Scheduled execution: runs collection at configurable intervals
+
 ## Code Style Guidelines
 - Use Python 3.11+ features
 - Format with black (line length: 88)

diff --git a/Dockerfile b/Dockerfile
@@ -27,4 +27,4 @@ RUN useradd -m appuser
 USER appuser
 
 # Run the scheduler
-CMD ["python", "src/scheduler.py", "2024-Arizona-Opportunity-Hack"]
+CMD ["python", "src/scheduler.py", "2026-ASU-WiCS-Opportunity-Hack"]
diff --git a/fly.toml b/fly.toml
@@ -8,27 +8,15 @@ primary_region = 'lax'
 
 [build]
 
-[env]
-  PORT = '8080'
-
 [[mounts]]
   source = 'github_stats_data'
   destination = '/data'
 
-[http_service]
-  internal_port = 8080
-  force_https = true
-  auto_stop_machines = 'stop'
-  auto_start_machines = true
-  min_machines_running = 1
-  processes = ['app']
+[processes]
+  app = "python src/scheduler.py 2026-ASU-WiCS-Opportunity-Hack"
 
 [[vm]]
   size = 'shared-cpu-1x'
   memory = '512mb'
   cpu_kind = 'shared'
   cpus = 1
-
-[[metrics]]
-  port = 9091
-  path = '/metrics'
diff --git a/src/achievements_generator.py b/src/achievements_generator.py
@@ -8,6 +8,8 @@
 logger = logging.getLogger(__name__)
 
 class AchievementsGenerator:
+    MIN_FILES_FOR_PRODUCTIVE_TEAM = 10
+
     def __init__(self):
         logger.info("AchievementsGenerator initialized")
 
@@ -41,6 +43,9 @@ def generate_achievements(self, org_name: str, repositories: List[Dict[str, Any]
             # Generate team achievements
             achievements.extend(self._find_most_productive_team(org_name, repos_data, contributors_data))
             achievements.extend(self._find_most_collaborative_team(org_name, repos_data, contributors_data))
+
+            # Generate mentor opportunity entries for teams that could use support
+            achievements.extend(self._find_teams_ready_for_boost(org_name, repos_data, contributors_data))
 
             logger.info(f"Generated {len(achievements)} achievements for {org_name}")
             return achievements
@@ -128,7 +133,7 @@ def _find_epic_pr(self, org_name: str, repos_data: Dict[str, Any], contributors_
                     },
                     "value": formatted_size,
                     "icon": "merge",
-                    "description": "Largest pull request merged",
+                    "description": "Largest merged PR by lines added + deleted",
                     "repo": largest_pr_repo,
                     "prNumber": str(largest_pr_number) if largest_pr_number else ""
                 }]
@@ -173,9 +178,9 @@ def _find_night_owl(self, org_name: str, repos_data: Dict[str, Any], contributor
                         "team": latest_contributor.get('team', ""),
                         "githubUsername": latest_contributor['login']
                     },
-                    "value": latest_total_commits,
+                    "value": f"{latest_total_commits} commits",
                     "icon": "accessTime",
-                    "description": "Most commits made at night",
+                    "description": "Commits between 10pm–4am MST",
                     "repo": latest_repo                    
                 }]
             return []
@@ -209,7 +214,7 @@ def _find_code_surgeon(self, org_name: str, repos_data: Dict[str, Any], contribu
                     },
                     "value": formatted_deletions,
                     "icon": "delete",
-                    "description": "Most code deleted",
+                    "description": "Most lines of code removed across commits",
                     "repo": max_deletion_repo
                 }]
             return []
@@ -239,9 +244,9 @@ def _find_pr_master(self, org_name: str, repos_data: Dict[str, Any], contributor
                         "team": max_pr_contributor.get('team', ""),
                         "githubUsername": max_pr_contributor['login']
                     },
-                    "value": str(max_prs),
+                    "value": f"{max_prs} PRs",
                     "icon": "pull_request",
-                    "description": "Most PRs created",
+                    "description": "Most pull requests created",
                     "repo": max_pr_repo
                 }]
             return []
@@ -271,9 +276,9 @@ def _find_issue_resolver(self, org_name: str, repos_data: Dict[str, Any], contri
                         "team": max_issue_contributor.get('team', ""),
                         "githubUsername": max_issue_contributor['login']
                     },
-                    "value": str(max_issues_closed),
+                    "value": f"{max_issues_closed} issues",
                     "icon": "task_alt",
-                    "description": "Most issues closed",
+                    "description": "Most GitHub issues closed",
                     "repo": max_issue_repo
                 }]
             return []
@@ -304,9 +309,9 @@ def _find_review_champion(self, org_name: str, repos_data: Dict[str, Any], contr
                         "team": max_review_contributor.get('team', ""),
                         "githubUsername": max_review_contributor['login']
                     },
-                    "value": str(max_reviews),
+                    "value": f"{max_reviews} reviews",
                     "icon": "rate_review",
-                    "description": "Most PR reviews submitted",
+                    "description": "Most pull request reviews submitted",
                     "repo": max_review_repo
                 }]
             return []
@@ -336,9 +341,9 @@ def _find_weekend_warrior(self, org_name: str, repos_data: Dict[str, Any], contr
                         "team": max_weekend_contributor.get('team', ""),
                         "githubUsername": max_weekend_contributor['login']
                     },
-                    "value": str(max_weekend_commits),
+                    "value": f"{max_weekend_commits} commits",
                     "icon": "weekend",
-                    "description": "Most commits on weekends",
+                    "description": "Most commits on Saturday & Sunday",
                     "repo": max_weekend_repo
                 }]
             return []
@@ -407,16 +412,19 @@ def _find_most_productive_team(self, org_name: str, repos_data: Dict[str, Any],
                 tasks_completed = 0
                 members_count = len(team_members)
 
+                files_changed = 0
                 for member in team_members:
                     # Count completed tasks (commits + PRs merged + issues closed)
                     tasks_completed += member.get('commits', 0)
                     tasks_completed += member.get('pull_requests', {}).get('merged', 0)
                     tasks_completed += member.get('issues', {}).get('closed', 0)
-
-                if tasks_completed > 0 and members_count > 0:
+                    files_changed += member.get('unique_files_changed', 0)
+
+                if tasks_completed > 0 and members_count > 0 and files_changed >= self.MIN_FILES_FOR_PRODUCTIVE_TEAM:
                     team_productivity[team_name] = {
                         "tasks": tasks_completed,
                         "members": members_count,
+                        "files_changed": files_changed,
                         "members_data": team_members
                     }
 
@@ -436,7 +444,8 @@ def _find_most_productive_team(self, org_name: str, repos_data: Dict[str, Any],
                 "value": f"{team_data['tasks']} tasks",
                 "icon": "group",
                 "members": team_data["members"],
-                "description": "Completed the most tasks during the hackathon",
+                "filesChanged": team_data["files_changed"],
+                "description": "Most commits + merged PRs + issues closed",
                 "teamPage": team_page
             }]
         except Exception as e:
@@ -493,9 +502,47 @@ def _find_most_collaborative_team(self, org_name: str, repos_data: Dict[str, Any
                 "value": f"{team_data['collaboration']} PRs reviewed",
                 "icon": "merge",
                 "members": team_data["members"],
-                "description": "Highest number of PR reviews and comments",
+                "description": "Highest number of pull request reviews",
                 "teamPage": team_page
             }]
         except Exception as e:
             logger.error(f"Error finding most collaborative team achievement: {str(e)}")
+            return []
+
+    def _find_teams_ready_for_boost(self, org_name: str, repos_data: Dict[str, Any], contributors_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Find teams with low GitHub activity so mentors can offer support."""
+        try:
+            teams = defaultdict(list)
+            for contributor in contributors_data:
+                team = contributor.get('team')
+                if team:
+                    teams[team].append(contributor)
+
+            if not teams:
+                return []
+
+            boost_teams = []
+            for team_name, team_members in teams.items():
+                total_commits = sum(member.get('commits', 0) for member in team_members)
+                files_changed = sum(member.get('unique_files_changed', 0) for member in team_members)
+                members_count = len(team_members)
+
+                if files_changed < self.MIN_FILES_FOR_PRODUCTIVE_TEAM or total_commits < 5:
+                    team_page = team_name.lower().replace(' ', '-')
+                    boost_teams.append({
+                        "title": "Ready for a Boost",
+                        "team": team_name,
+                        "value": f"{files_changed} files, {total_commits} commits",
+                        "icon": "rocket_launch",
+                        "members": members_count,
+                        "filesChanged": files_changed,
+                        "description": "This team might benefit from some mentor guidance to get rolling!",
+                        "teamPage": team_page,
+                        "type": "mentor_opportunity"
+                    })
+
+            logger.info(f"Found {len(boost_teams)} teams ready for a boost in {org_name}")
+            return boost_teams
+        except Exception as e:
+            logger.error(f"Error finding teams ready for boost: {str(e)}")
             return []
diff --git a/src/github_client.py b/src/github_client.py
@@ -1,19 +1,31 @@
 import aiohttp
 import asyncio
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Set
 import logging
+import os
+
+# Files and extensions excluded from unique_files_changed count
+# These are boilerplate/docs that don't represent substantive code work
+EXCLUDED_FILENAMES = {
+    "README.md", "CLAUDE.md", "LICENSE", "LICENSE.md",
+    ".gitignore", ".gitattributes", "CONTRIBUTING.md",
+    "CODE_OF_CONDUCT.md", "CHANGELOG.md",
+}
+EXCLUDED_EXTENSIONS = {".md", ".txt", ".lock"}
 
 logger = logging.getLogger(__name__)
 
 class GitHubClient:
-    def __init__(self, token: str):
+    def __init__(self, token: str, ignored_users: Set[str] = None):
         self.token = token
         self.base_url = "https://api.github.com"
         self.headers = {
             "Authorization": f"token {self.token}",
             "Accept": "application/vnd.github.v3+json"
         }
         self.session = None
+        self.ignored_users = ignored_users or set()
+        logger.info(f"GitHubClient initialized with {len(self.ignored_users)} ignored users: {self.ignored_users}")
 
     async def ensure_session(self):
         if self.session is None or self.session.closed:
@@ -31,7 +43,19 @@ async def get_organization_repos(self, org_name: str) -> List[Dict[str, Any]]:
     async def get_repo_contributors(self, repo_full_name: str) -> List[Dict[str, Any]]:
         await self.ensure_session()
         url = f"{self.base_url}/repos/{repo_full_name}/contributors"
-        return await self.get_paginated_data(url)
+        contributors = await self.get_paginated_data(url)
+
+        # Filter out ignored users
+        filtered_contributors = [
+            contributor for contributor in contributors 
+            if contributor.get('login') not in self.ignored_users
+        ]
+
+        if len(contributors) != len(filtered_contributors):
+            ignored_count = len(contributors) - len(filtered_contributors)
+            logger.info(f"Filtered out {ignored_count} ignored contributors from {repo_full_name}")
+
+        return filtered_contributors
 
     async def get_contributor_stats(self, org_name: str, repo_name: str, contributor_login: str) -> Dict[str, Any]:
         await self.ensure_session()
@@ -41,6 +65,7 @@ async def get_contributor_stats(self, org_name: str, repo_name: str, contributor
             "login": contributor_login,
             "org_name": org_name,
             "repo_name": repo_name,
+            "team": repo_name,
             "commits": 0,
             "additions": 0,
             "deletions": 0,
@@ -66,6 +91,7 @@ async def get_contributor_stats(self, org_name: str, repo_name: str, contributor
             "latest_commit_time": None,
             "latest_commit_id": None,
             "largest_pr": None,
+            "unique_files_changed": 0,
             "avatar_url": None,
             "name": None
         }
@@ -164,10 +190,38 @@ async def get_contributor_stats(self, org_name: str, repo_name: str, contributor
                 stats["latest_commit_id"] = last_commit["sha"]
 
 
-        for commit in commits:
-            if "stats" in commit:
-                stats["additions"] += commit["stats"].get("additions", 0)
-                stats["deletions"] += commit["stats"].get("deletions", 0)
+        # Fetch individual commit details for accurate stats and file tracking
+        # The list-commits endpoint doesn't return stats or files
+        unique_files = set()
+        semaphore = asyncio.Semaphore(10)
+
+        async def fetch_commit_detail(sha: str) -> dict | None:
+            async with semaphore:
+                url = f"{self.base_url}/repos/{repo_full_name}/commits/{sha}"
+                try:
+                    async with self.session.get(url) as response:
+                        if response.status == 200:
+                            return await response.json()
+                except Exception as e:
+                    logger.warning(f"Error fetching commit detail {sha}: {str(e)}")
+                return None
+
+        commit_details = await asyncio.gather(
+            *[fetch_commit_detail(c["sha"]) for c in commits]
+        )
+
+        for detail in commit_details:
+            if detail:
+                stats["additions"] += detail.get("stats", {}).get("additions", 0)
+                stats["deletions"] += detail.get("stats", {}).get("deletions", 0)
+                for f in detail.get("files", []):
+                    filename = f.get("filename", "")
+                    basename = os.path.basename(filename)
+                    _, ext = os.path.splitext(basename)
+                    if basename not in EXCLUDED_FILENAMES and ext not in EXCLUDED_EXTENSIONS:
+                        unique_files.add(filename)
+
+        stats["unique_files_changed"] = len(unique_files)
 
         # Fetch PR stats
         prs_url = f"{self.base_url}/repos/{repo_full_name}/pulls"
@@ -198,6 +252,7 @@ async def get_contributor_stats(self, org_name: str, repo_name: str, contributor
                                 "title": pr_data.get("title"),
                                 "additions": pr_data.get("additions", 0),
                                 "deletions": pr_data.get("deletions", 0),
+                                "changed_files": pr_data.get("changed_files", 0),
                                 "merged_at": pr_data.get("merged_at")
                             }