Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,41 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
## Build & Run Commands
- Install dependencies: `pip install -r requirements.txt`
- Run application: `python src/main.py <org_names>`
- Run scheduler: `python src/scheduler.py`
- Run scheduler: `python src/scheduler.py <org_names>`
- Get achievements only: `python src/main.py --achievements-only <org_names>`
- Run tests: `pytest`
- Lint code: `flake8 src/`
- Format code: `black src/`
- Type check: `mypy src/`

## Environment Setup
Required environment variables:
- `GITHUB_TOKEN`: GitHub personal access token
- `GOOGLE_APPLICATION_CREDENTIALS_JSON`: JSON string of Firestore service account credentials
- `COLLECTION_INTERVAL`: daily, hourly, minutely, or weekly (default: daily)
- `COLLECTION_TIME`: time to run collection (default: 00:00)
- `GITHUB_IGNORED_USERS`: comma-separated list of GitHub usernames to ignore (default: gregv)

## Architecture Overview
This is a GitHub organization metrics collector with the following key components:

**Core Classes:**
- `GitHubClient`: Handles GitHub API interactions using aiohttp for async requests
- `FirestoreClient`: Manages Firestore database operations for data persistence
- `MetricsCollector`: Orchestrates data collection and coordinates between GitHub and Firestore
- `AchievementsGenerator`: Generates achievement data based on contributor statistics
- `Scheduler`: Handles periodic execution of metrics collection

**Data Flow:**
1. GitHub API → contributor stats, repository data, pull requests, commits
2. Firestore structure: organizations/{org}/repositories/{repo}/contributors/{user}
3. Achievements are generated from aggregated contributor data and stored separately

**Key Operations:**
- Organization processing: fetches all repos → all contributors → individual stats
- Achievement generation: aggregates data across repositories to create achievements
- Scheduled execution: runs collection at configurable intervals

## Code Style Guidelines
- Use Python 3.11+ features
- Format with black (line length: 88)
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ RUN useradd -m appuser
USER appuser

# Run the scheduler
CMD ["python", "src/scheduler.py", "2024-Arizona-Opportunity-Hack"]
CMD ["python", "src/scheduler.py", "2026-ASU-WiCS-Opportunity-Hack"]
16 changes: 2 additions & 14 deletions fly.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,15 @@ primary_region = 'lax'

[build]

[env]
PORT = '8080'

[[mounts]]
source = 'github_stats_data'
destination = '/data'

[http_service]
internal_port = 8080
force_https = true
auto_stop_machines = 'stop'
auto_start_machines = true
min_machines_running = 1
processes = ['app']
[processes]
app = "python src/scheduler.py 2026-ASU-WiCS-Opportunity-Hack"

[[vm]]
size = 'shared-cpu-1x'
memory = '512mb'
cpu_kind = 'shared'
cpus = 1

[[metrics]]
port = 9091
path = '/metrics'
79 changes: 63 additions & 16 deletions src/achievements_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
logger = logging.getLogger(__name__)

class AchievementsGenerator:
MIN_FILES_FOR_PRODUCTIVE_TEAM = 10

def __init__(self):
logger.info("AchievementsGenerator initialized")

Expand Down Expand Up @@ -41,6 +43,9 @@ def generate_achievements(self, org_name: str, repositories: List[Dict[str, Any]
# Generate team achievements
achievements.extend(self._find_most_productive_team(org_name, repos_data, contributors_data))
achievements.extend(self._find_most_collaborative_team(org_name, repos_data, contributors_data))

# Generate mentor opportunity entries for teams that could use support
achievements.extend(self._find_teams_ready_for_boost(org_name, repos_data, contributors_data))

logger.info(f"Generated {len(achievements)} achievements for {org_name}")
return achievements
Expand Down Expand Up @@ -128,7 +133,7 @@ def _find_epic_pr(self, org_name: str, repos_data: Dict[str, Any], contributors_
},
"value": formatted_size,
"icon": "merge",
"description": "Largest pull request merged",
"description": "Largest merged PR by lines added + deleted",
"repo": largest_pr_repo,
"prNumber": str(largest_pr_number) if largest_pr_number else ""
}]
Expand Down Expand Up @@ -173,9 +178,9 @@ def _find_night_owl(self, org_name: str, repos_data: Dict[str, Any], contributor
"team": latest_contributor.get('team', ""),
"githubUsername": latest_contributor['login']
},
"value": latest_total_commits,
"value": f"{latest_total_commits} commits",
"icon": "accessTime",
"description": "Most commits made at night",
"description": "Commits between 10pm–4am MST",
"repo": latest_repo
}]
return []
Expand Down Expand Up @@ -209,7 +214,7 @@ def _find_code_surgeon(self, org_name: str, repos_data: Dict[str, Any], contribu
},
"value": formatted_deletions,
"icon": "delete",
"description": "Most code deleted",
"description": "Most lines of code removed across commits",
"repo": max_deletion_repo
}]
return []
Expand Down Expand Up @@ -239,9 +244,9 @@ def _find_pr_master(self, org_name: str, repos_data: Dict[str, Any], contributor
"team": max_pr_contributor.get('team', ""),
"githubUsername": max_pr_contributor['login']
},
"value": str(max_prs),
"value": f"{max_prs} PRs",
"icon": "pull_request",
"description": "Most PRs created",
"description": "Most pull requests created",
"repo": max_pr_repo
}]
return []
Expand Down Expand Up @@ -271,9 +276,9 @@ def _find_issue_resolver(self, org_name: str, repos_data: Dict[str, Any], contri
"team": max_issue_contributor.get('team', ""),
"githubUsername": max_issue_contributor['login']
},
"value": str(max_issues_closed),
"value": f"{max_issues_closed} issues",
"icon": "task_alt",
"description": "Most issues closed",
"description": "Most GitHub issues closed",
"repo": max_issue_repo
}]
return []
Expand Down Expand Up @@ -304,9 +309,9 @@ def _find_review_champion(self, org_name: str, repos_data: Dict[str, Any], contr
"team": max_review_contributor.get('team', ""),
"githubUsername": max_review_contributor['login']
},
"value": str(max_reviews),
"value": f"{max_reviews} reviews",
"icon": "rate_review",
"description": "Most PR reviews submitted",
"description": "Most pull request reviews submitted",
"repo": max_review_repo
}]
return []
Expand Down Expand Up @@ -336,9 +341,9 @@ def _find_weekend_warrior(self, org_name: str, repos_data: Dict[str, Any], contr
"team": max_weekend_contributor.get('team', ""),
"githubUsername": max_weekend_contributor['login']
},
"value": str(max_weekend_commits),
"value": f"{max_weekend_commits} commits",
"icon": "weekend",
"description": "Most commits on weekends",
"description": "Most commits on Saturday & Sunday",
"repo": max_weekend_repo
}]
return []
Expand Down Expand Up @@ -407,16 +412,19 @@ def _find_most_productive_team(self, org_name: str, repos_data: Dict[str, Any],
tasks_completed = 0
members_count = len(team_members)

files_changed = 0
for member in team_members:
# Count completed tasks (commits + PRs merged + issues closed)
tasks_completed += member.get('commits', 0)
tasks_completed += member.get('pull_requests', {}).get('merged', 0)
tasks_completed += member.get('issues', {}).get('closed', 0)

if tasks_completed > 0 and members_count > 0:
files_changed += member.get('unique_files_changed', 0)

if tasks_completed > 0 and members_count > 0 and files_changed >= self.MIN_FILES_FOR_PRODUCTIVE_TEAM:
team_productivity[team_name] = {
"tasks": tasks_completed,
"members": members_count,
"files_changed": files_changed,
"members_data": team_members
}

Expand All @@ -436,7 +444,8 @@ def _find_most_productive_team(self, org_name: str, repos_data: Dict[str, Any],
"value": f"{team_data['tasks']} tasks",
"icon": "group",
"members": team_data["members"],
"description": "Completed the most tasks during the hackathon",
"filesChanged": team_data["files_changed"],
"description": "Most commits + merged PRs + issues closed",
"teamPage": team_page
}]
except Exception as e:
Expand Down Expand Up @@ -493,9 +502,47 @@ def _find_most_collaborative_team(self, org_name: str, repos_data: Dict[str, Any
"value": f"{team_data['collaboration']} PRs reviewed",
"icon": "merge",
"members": team_data["members"],
"description": "Highest number of PR reviews and comments",
"description": "Highest number of pull request reviews",
"teamPage": team_page
}]
except Exception as e:
logger.error(f"Error finding most collaborative team achievement: {str(e)}")
return []

def _find_teams_ready_for_boost(self, org_name: str, repos_data: Dict[str, Any], contributors_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Find teams with low GitHub activity so mentors can offer support."""
try:
teams = defaultdict(list)
for contributor in contributors_data:
team = contributor.get('team')
if team:
teams[team].append(contributor)

if not teams:
return []

boost_teams = []
for team_name, team_members in teams.items():
total_commits = sum(member.get('commits', 0) for member in team_members)
files_changed = sum(member.get('unique_files_changed', 0) for member in team_members)
members_count = len(team_members)

if files_changed < self.MIN_FILES_FOR_PRODUCTIVE_TEAM or total_commits < 5:
team_page = team_name.lower().replace(' ', '-')
boost_teams.append({
"title": "Ready for a Boost",
"team": team_name,
"value": f"{files_changed} files, {total_commits} commits",
"icon": "rocket_launch",
"members": members_count,
"filesChanged": files_changed,
"description": "This team might benefit from some mentor guidance to get rolling!",
"teamPage": team_page,
"type": "mentor_opportunity"
})

logger.info(f"Found {len(boost_teams)} teams ready for a boost in {org_name}")
return boost_teams
except Exception as e:
logger.error(f"Error finding teams ready for boost: {str(e)}")
return []
69 changes: 62 additions & 7 deletions src/github_client.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,31 @@
import aiohttp
import asyncio
from typing import List, Dict, Any
from typing import List, Dict, Any, Set
import logging
import os

# Files and extensions excluded from unique_files_changed count
# These are boilerplate/docs that don't represent substantive code work
EXCLUDED_FILENAMES = {
"README.md", "CLAUDE.md", "LICENSE", "LICENSE.md",
".gitignore", ".gitattributes", "CONTRIBUTING.md",
"CODE_OF_CONDUCT.md", "CHANGELOG.md",
}
EXCLUDED_EXTENSIONS = {".md", ".txt", ".lock"}

logger = logging.getLogger(__name__)

class GitHubClient:
def __init__(self, token: str):
def __init__(self, token: str, ignored_users: Set[str] = None):
self.token = token
self.base_url = "https://api.github.com"
self.headers = {
"Authorization": f"token {self.token}",
"Accept": "application/vnd.github.v3+json"
}
self.session = None
self.ignored_users = ignored_users or set()
logger.info(f"GitHubClient initialized with {len(self.ignored_users)} ignored users: {self.ignored_users}")

async def ensure_session(self):
if self.session is None or self.session.closed:
Expand All @@ -31,7 +43,19 @@ async def get_organization_repos(self, org_name: str) -> List[Dict[str, Any]]:
async def get_repo_contributors(self, repo_full_name: str) -> List[Dict[str, Any]]:
await self.ensure_session()
url = f"{self.base_url}/repos/{repo_full_name}/contributors"
return await self.get_paginated_data(url)
contributors = await self.get_paginated_data(url)

# Filter out ignored users
filtered_contributors = [
contributor for contributor in contributors
if contributor.get('login') not in self.ignored_users
]

if len(contributors) != len(filtered_contributors):
ignored_count = len(contributors) - len(filtered_contributors)
logger.info(f"Filtered out {ignored_count} ignored contributors from {repo_full_name}")

return filtered_contributors

async def get_contributor_stats(self, org_name: str, repo_name: str, contributor_login: str) -> Dict[str, Any]:
await self.ensure_session()
Expand All @@ -41,6 +65,7 @@ async def get_contributor_stats(self, org_name: str, repo_name: str, contributor
"login": contributor_login,
"org_name": org_name,
"repo_name": repo_name,
"team": repo_name,
"commits": 0,
"additions": 0,
"deletions": 0,
Expand All @@ -66,6 +91,7 @@ async def get_contributor_stats(self, org_name: str, repo_name: str, contributor
"latest_commit_time": None,
"latest_commit_id": None,
"largest_pr": None,
"unique_files_changed": 0,
"avatar_url": None,
"name": None
}
Expand Down Expand Up @@ -164,10 +190,38 @@ async def get_contributor_stats(self, org_name: str, repo_name: str, contributor
stats["latest_commit_id"] = last_commit["sha"]


for commit in commits:
if "stats" in commit:
stats["additions"] += commit["stats"].get("additions", 0)
stats["deletions"] += commit["stats"].get("deletions", 0)
# Fetch individual commit details for accurate stats and file tracking
# The list-commits endpoint doesn't return stats or files
unique_files = set()
semaphore = asyncio.Semaphore(10)

async def fetch_commit_detail(sha: str) -> dict | None:
async with semaphore:
url = f"{self.base_url}/repos/{repo_full_name}/commits/{sha}"
try:
async with self.session.get(url) as response:
if response.status == 200:
return await response.json()
except Exception as e:
logger.warning(f"Error fetching commit detail {sha}: {str(e)}")
return None

commit_details = await asyncio.gather(
*[fetch_commit_detail(c["sha"]) for c in commits]
)

for detail in commit_details:
if detail:
stats["additions"] += detail.get("stats", {}).get("additions", 0)
stats["deletions"] += detail.get("stats", {}).get("deletions", 0)
for f in detail.get("files", []):
filename = f.get("filename", "")
basename = os.path.basename(filename)
_, ext = os.path.splitext(basename)
if basename not in EXCLUDED_FILENAMES and ext not in EXCLUDED_EXTENSIONS:
unique_files.add(filename)

stats["unique_files_changed"] = len(unique_files)

# Fetch PR stats
prs_url = f"{self.base_url}/repos/{repo_full_name}/pulls"
Expand Down Expand Up @@ -198,6 +252,7 @@ async def get_contributor_stats(self, org_name: str, repo_name: str, contributor
"title": pr_data.get("title"),
"additions": pr_data.get("additions", 0),
"deletions": pr_data.get("deletions", 0),
"changed_files": pr_data.get("changed_files", 0),
"merged_at": pr_data.get("merged_at")
}

Expand Down
Loading