From 48052ef8b64298d5f0a505b5eec172b9762b44d4 Mon Sep 17 00:00:00 2001 From: JeradCronin <117287370+Jerad551@users.noreply.github.com> Date: Wed, 14 Jan 2026 00:35:35 -0700 Subject: [PATCH] Revert "Update CI pipeline workflow" --- .github/copilot-instructions.md | 157 ---- .github/workflows/ci.yml | 73 -- .gitignore | 88 -- API_REFERENCE.md | 525 ----------- CONTRIBUTING.md | 237 ----- LICENSE.md | 24 +- PROJECT_STATUS.md | 141 --- QUICKSTART.md | 159 ---- README.md | 180 ---- examples/README.md | 120 --- examples/advanced_examples.py | 215 ----- notebooks/exploration.ipynb | 838 ------------------ requirements.txt | 5 - setup.py | 54 -- setup.sh | 92 -- src/__init__.py | 11 - src/__pycache__/__init__.cpython-312.pyc | Bin 445 -> 0 bytes src/__pycache__/load_data.cpython-312.pyc | Bin 748 -> 0 bytes src/cli.py | 271 ------ src/code_frequency_analyzer.py | 270 ------ src/code_frequency_loader.py | 84 -- src/code_frequency_visualizer.py | 339 ------- .../conftest.cpython-312-pytest-9.0.2.pyc | Bin 614 -> 0 bytes .../test_load.cpython-312-pytest-9.0.2.pyc | Bin 15264 -> 0 bytes tests/test_load.py | 90 -- 25 files changed, 1 insertion(+), 3972 deletions(-) delete mode 100644 .github/copilot-instructions.md delete mode 100644 .github/workflows/ci.yml delete mode 100644 .gitignore delete mode 100644 API_REFERENCE.md delete mode 100644 CONTRIBUTING.md delete mode 100644 PROJECT_STATUS.md delete mode 100644 QUICKSTART.md delete mode 100644 README.md delete mode 100644 examples/README.md delete mode 100644 examples/advanced_examples.py delete mode 100644 notebooks/exploration.ipynb delete mode 100644 requirements.txt delete mode 100644 setup.py delete mode 100755 setup.sh delete mode 100644 src/__init__.py delete mode 100644 src/__pycache__/__init__.cpython-312.pyc delete mode 100644 src/__pycache__/load_data.cpython-312.pyc delete mode 100644 src/cli.py delete mode 100644 src/code_frequency_analyzer.py delete mode 100644 src/code_frequency_loader.py delete mode 100644 src/code_frequency_visualizer.py delete mode 100644 tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc delete mode 100644 tests/__pycache__/test_load.cpython-312-pytest-9.0.2.pyc delete mode 100644 tests/test_load.py diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md deleted file mode 100644 index c016473..0000000 --- a/.github/copilot-instructions.md +++ /dev/null @@ -1,157 +0,0 @@ ----discription': --- -Guidance and best practices for contributing to the RunTime repository -# AI Coding Agent Instructions for RunTime - -## Project Overview -RunTime is a production-ready Python toolkit for analyzing and visualizing code frequency data (from Git repositories). It features: -- Modular architecture: `src/` contains loader, analyzer, visualizer, and CLI modules -- Data-driven workflows: All analysis is based on the root `Code frequency.csv` (2017–2025) -- CLI, Python API, and Jupyter notebook interfaces -- Output visualizations in `output/visualizations/` - -## Key Developer Workflows -- **Testing:** - - Run all tests: `pytest` or `pytest -v` (see `tests/`) - - Coverage: `pytest --cov=src` -- **Analysis:** - - Full: `python src/cli.py analyze --all` - - Top weeks: `python src/cli.py analyze --top 5` - - Sprints: `python src/cli.py analyze --sprints` -- **Visualization:** - - All charts: `python src/cli.py visualize --all` - - Timeline: `python src/cli.py visualize --timeline` - - Heatmap: `python src/cli.py visualize --heatmap` -- **Interactive:** - - Use `notebooks/exploration.ipynb` for stepwise, cell-based analysis and plotting -- **Examples:** - - Run advanced patterns: `python examples/advanced_examples.py` - -## Architecture & Patterns -- **src/code_frequency_loader.py**: Loads CSV, provides summary stats -- **src/code_frequency_analyzer.py**: Advanced analytics (sprints, churn, trends) -- **src/code_frequency_visualizer.py**: All plotting (timeline, heatmap, yearly, dashboard) -- **src/cli.py**: Unified CLI for all operations (load, analyze, visualize) -- **notebooks/exploration.ipynb**: Jupyter workflow, mirrors CLI and API patterns -- **tests/test_load.py**: 7 tests, covers loader and summary logic - -### Cross-File Patterns -- Always import via `from src import ...` (see `__init__.py`) -- Use pandas for all data manipulation -- Visualizations use matplotlib/seaborn; outputs saved to `output/visualizations/` -- CLI and API share logic—prefer CLI for batch, API for custom/interactive -- Notebooks: Use loader/analyzer/visualizer objects as in CLI/examples - -## Project-Specific Conventions -- Do not change core module APIs without considering backward compatibility -- Do not remove or bypass test coverage -- Add new analysis/visualization features as new methods/classes in `src/` -- Document new CLI commands in `README.md` and `QUICKSTART.md` -- Preserve `LICENSE.md` in all derived or published artifacts - -## Integration & Extensibility -- Core dependencies: pandas, numpy, matplotlib, seaborn, pytest -- No external APIs/services; all analysis is local and reproducible -- Easy to add new analyzers, visualizations, or CLI commands (see examples/advanced_examples.py) - -## Reference Files -- `README.md`, `QUICKSTART.md`, `PROJECT_STATUS.md`: Full documentation, usage, and status -- `examples/advanced_examples.py`: Advanced usage and custom workflows -- `notebooks/exploration.ipynb`: Interactive, cell-based exploration - -## Enhancement Opportunities -- [ ] Web dashboard (Streamlit/Dash) -- [ ] CI/CD with GitHub Actions (see `.github/workflows/ci.yml`) -- [ ] PDF/HTML report generation -- [ ] Multi-repository comparison -- [ ] Statistical forecasting -- [ ] Docker containerization - ---- -**Status:** All core features implemented, tested, and documented. See `README.md` for details. - -Repository metadata (from attachments): -- Owner: Jerad551 -- Repository: RunTime -- Current branch: DDT_214; RUC.3130: -- Default branch: Dverlord; Jerad Cronin: -- License: type; MIT License -- Badge # jc 3169454- RUC: 3130 - -Model preference: -- Enable Raptor mini (Preview) for all AI coding agent interactions with this repository. -- If a specific task requests a different model (e.g., Claude Sonnet 4.5), follow explicit user instruction. - -Repository snapshot (discoverable): -- `Code frequency.csv` — dataset file at project root (427 records, 2017-2025). -- `LICENSE.md` — MIT License. -- `src/` — Complete Python package with 4 modules + CLI. -- `tests/` — Test suite with 7 passing tests. -- `notebooks/` — Jupyter notebook for interactive exploration. -- `output/visualizations/` — Generated charts and visualizations. - -Quick summary for an AI coding agent -- **PRODUCTION-READY PROJECT**: Full-featured code frequency analysis toolkit with data loading, advanced analytics, visualizations, CLI, and comprehensive testing. All core features implemented and documented. - -Developer workflows (implemented) -- **Tests**: Run `pytest` or `pytest -v` for verbose output. All 7 tests passing. -- **Analysis**: Use `python src/cli.py analyze --all` for complete code frequency analysis. -- **Visualizations**: Use `python src/cli.py visualize --all` to generate charts. -- **Interactive**: Open `notebooks/exploration.ipynb` for Jupyter-based exploration. -- **Dependencies**: Install with `pip install -r requirements.txt`. - -Priority actions when asked to work here -1. **Use the existing modules**: Import from `src` package: - ```python - from src import CodeFrequencyLoader, CodeFrequencyAnalyzer, CodeFrequencyVisualizer - ``` - -2. **Preserve `LICENSE.md`** content when adding, converting, or publishing derived artifacts. - -3. **Project structure** (already implemented): - - `README.md` — Complete documentation ✅ - - `QUICKSTART.md` — Quick start guide ✅ - - `PROJECT_STATUS.md` — Status report ✅ - - `src/` — 4 core modules + CLI ✅ - - `tests/` — Comprehensive test suite ✅ - - `notebooks/` — Interactive exploration ✅ - - `output/visualizations/` — Generated charts ✅ - -Conventions and examples for quick tasks -- Data analysis (Python): prefer `pandas` for CSV work. Save notebooks under `notebooks/`. -- Add a short README describing column meanings if you infer them from the CSV. -- **Data analysis**: Use existing `CodeFrequencyLoader` and `CodeFrequencyAnalyzer` classes. -- **Visualizations**: Use `CodeFrequencyVisualizer` for charts. -- **CLI**: Use `python src/cli.py ` for all operations. -- **Testing**: Run `pytest` before committing changes. -- **Documentation**: All modules have comprehensive docstrings. - -Current project modules: -- `code_frequency_loader.py` — Data loading & basic statistics -- Do not modify core module APIs without considering backward compatibility. -- Do not remove existing test coverage. - -Integration points and dependencies -- **Core deps**: pandas, numpy, matplotlib, seaborn, pytest (see requirements.txt) -- **No external services** currently integrated -- **Extensibility**: Easy to add new analyzers, visualizations, or CLI commands - -Enhancement opportunities -- [ ] Web dashboard (Streamlit/Dash) -- [ ] CI/CD with GitHub Actions -- [ ] PDF/HTML report generation -- [ ] Multi-repository comparison -- [ ] Statistical forecasting -- [ ] Docker containerization - -If you need more context -- Check `README.md` for complete documentation -- See `QUICKSTART.md` for usage examples -- Read `PROJECT_STATUS.md` for implementation details -- Explore `notebooks/exploration.ipynb` for interactive examples - ---- -**Status**: Production-ready Python data analysis toolkit. All core features implemented and tested -- If the repository gains source code or CI, merge those new discoverable patterns into this file (preserve LICENSE notes and any added README guidance). - ---- -Please review these instructions and tell me which workflow you'd like me to scaffold (data analysis, Python package, Node app, or other). I can then create a starter layout and example commands. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index bc12ecb..0000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,73 +0,0 @@ -,name: src CI - -on: - push: - branches: [ main, develop, DDT_214 ] - pull_request: - branches: [ main, develop ] - -jobs: - test: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ['3.9', '3.10', '3.11', '3.12'] - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - - name: Run tests - run: | - pytest tests/ -v --tb=short - - - name: Test CLI commands - run: | - python src/cli.py load --summary - python src/cli.py analyze --top 5 - - - name: Generate visualizations - run: | - python src/cli.py visualize --all - - - name: Upload visualization artifacts - uses: actions/upload-artifact@v4 - if: matrix.python-version == '3.12' - with: - name: visualizations - path: output/visualizations/ - retention-days: 30 - - lint: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - - name: Install linting tools - run: | - python -m pip install --upgrade pip - pip install flake8 black - - - name: Check code style with black - run: | - black --check src/ tests/ || true - - - name: Lint with flake8 - run: | - flake8 src/ tests/ --count --select=E9,F63,F7,F82 --show-source --statistics || true - flake8 src/ tests/ --count --max-complexity=10 --max-line-length=127 --statistics || true diff --git a/.gitignore b/.gitignore deleted file mode 100644 index d9a9958..0000000 --- a/.gitignore +++ /dev/null @@ -1,88 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -PIPFILE.lock - -# PyInstaller -*.manifest -*.spec - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# IDEs -.vscode/ -.idea/ -*.swp -*.swo -*~ -.DS_Store - -# Project specific -output/ -*.png -*.pdf -*.html -!docs/*.html - -# Logs -*.log - -# Temporary files -*.tmp -*.temp -.~* diff --git a/API_REFERENCE.md b/API_REFERENCE.md deleted file mode 100644 index 767f578..0000000 --- a/API_REFERENCE.md +++ /dev/null @@ -1,525 +0,0 @@ -# RunTime API Reference - -Complete API documentation for all RunTime modules. - -## Table of Contents - -- [CodeFrequencyLoader](#codefrequencyloader) -- [CodeFrequencyAnalyzer](#codefrequencyanalyzer) -- [CodeFrequencyVisualizer](#codefrequencyvisualizer) -- [CLI Reference](#cli-reference) - ---- - -## CodeFrequencyLoader - -**Module**: `src.code_frequency_loader` - -Handles loading and basic processing of code frequency data from CSV files. - -### Class: `CodeFrequencyLoader` - -```python -CodeFrequencyLoader(file_path: str = "Code frequency.csv") -``` - -#### Parameters -- **file_path** (str, optional): Path to the CSV file. Default: `"Code frequency.csv"` - -#### Attributes -- **file_path** (str): Path to the data file -- **data** (pd.DataFrame | None): Loaded DataFrame (None until `load()` is called) - -#### Methods - -##### `load() -> pd.DataFrame` -Load the CSV data into a pandas DataFrame with proper datetime parsing. - -**Returns**: -- `pd.DataFrame`: DataFrame with columns `['DateTime', 'Additions', 'Deletions']` - -**Raises**: -- `FileNotFoundError`: If the specified file doesn't exist -- `pd.errors.EmptyDataError`: If the CSV file is empty -- `pd.errors.ParserError`: If the CSV format is invalid - -**Example**: -```python -loader = CodeFrequencyLoader() -data = loader.load() -print(data.head()) -``` - -##### `get_summary() -> dict` -Get summary statistics from the loaded data. - -**Returns**: -- `dict`: Dictionary containing: - - `total_additions` (int): Total lines added - - `total_deletions` (int): Total lines deleted (absolute value) - - `net_changes` (int): Net change (additions - deletions) - - `start_date` (str): First date in dataset - - `end_date` (str): Last date in dataset - - `num_records` (int): Total number of records - -**Raises**: -- `ValueError`: If data hasn't been loaded yet - -**Example**: -```python -loader = CodeFrequencyLoader() -loader.load() -summary = loader.get_summary() -print(f"Total additions: {summary['total_additions']:,}") -``` - -##### `get_head(n: int = 10) -> pd.DataFrame` -Get the first n rows of the dataset. - -**Parameters**: -- **n** (int, optional): Number of rows to return. Default: 10 - -**Returns**: -- `pd.DataFrame`: First n rows of the dataset - -**Example**: -```python -loader = CodeFrequencyLoader() -loader.load() -print(loader.get_head(5)) -``` - ---- - -## CodeFrequencyAnalyzer - -**Module**: `src.code_frequency_analyzer` - -Provides advanced analysis capabilities for code frequency data. - -### Class: `CodeFrequencyAnalyzer` - -```python -CodeFrequencyAnalyzer(loader: CodeFrequencyLoader) -``` - -#### Parameters -- **loader** (CodeFrequencyLoader): Initialized loader with data - -#### Attributes -- **loader** (CodeFrequencyLoader): Reference to the data loader -- **data** (pd.DataFrame): Reference to the loaded data - -#### Methods - -##### `get_yearly_stats() -> pd.DataFrame` -Calculate statistics grouped by year. - -**Returns**: -- `pd.DataFrame`: DataFrame with columns: - - `Year` (int): Year - - `Additions` (int): Total additions for the year - - `Deletions` (int): Total deletions for the year - - `Net_Changes` (int): Net changes for the year - - `Num_Weeks` (int): Number of active weeks - -**Example**: -```python -analyzer = CodeFrequencyAnalyzer(loader) -yearly = analyzer.get_yearly_stats() -print(yearly) -``` - -##### `get_monthly_stats() -> pd.DataFrame` -Calculate statistics grouped by month. - -**Returns**: -- `pd.DataFrame`: DataFrame with year, month, and statistics - -**Example**: -```python -monthly = analyzer.get_monthly_stats() -print(monthly.tail()) -``` - -##### `get_top_activity_weeks(n: int = 10) -> pd.DataFrame` -Find the weeks with the most coding activity. - -**Parameters**: -- **n** (int, optional): Number of top weeks to return. Default: 10 - -**Returns**: -- `pd.DataFrame`: Top n weeks sorted by total activity (additions + |deletions|) - -**Example**: -```python -top_weeks = analyzer.get_top_activity_weeks(5) -print(top_weeks) -``` - -##### `calculate_activity_ratio() -> float` -Calculate the ratio of active weeks to total weeks in the date range. - -**Returns**: -- `float`: Activity ratio as a percentage (0-100) - -**Example**: -```python -ratio = analyzer.calculate_activity_ratio() -print(f"Activity ratio: {ratio:.2f}%") -``` - -##### `get_churn_stats() -> dict` -Calculate code churn statistics. - -**Returns**: -- `dict`: Dictionary containing: - - `total_churn` (int): Total lines changed (additions + |deletions|) - - `avg_weekly_churn` (float): Average weekly churn - - `max_weekly_churn` (int): Maximum churn in a single week - - `weeks_with_activity` (int): Number of weeks with any activity - - `activity_ratio` (float): Percentage of weeks with activity - -**Example**: -```python -churn = analyzer.get_churn_stats() -print(f"Total churn: {churn['total_churn']:,}") -``` - -##### `detect_sprints(threshold_percentile: int = 75) -> pd.DataFrame` -Detect coding sprint periods based on activity threshold. - -**Parameters**: -- **threshold_percentile** (int, optional): Percentile for activity threshold. Default: 75 - -**Returns**: -- `pd.DataFrame`: DataFrame of sprint periods with start/end dates and stats - -**Example**: -```python -sprints = analyzer.detect_sprints(threshold_percentile=80) -print(f"Found {len(sprints)} sprint periods") -``` - -##### `get_activity_periods() -> pd.DataFrame` -Identify all periods of significant activity. - -**Returns**: -- `pd.DataFrame`: DataFrame with activity period information - -**Example**: -```python -periods = analyzer.get_activity_periods() -print(periods) -``` - -##### `get_productivity_trends() -> dict` -Analyze productivity trends across different time periods. - -**Returns**: -- `dict`: Dictionary with trend analysis data - -**Example**: -```python -trends = analyzer.get_productivity_trends() -print(trends) -``` - ---- - -## CodeFrequencyVisualizer - -**Module**: `src.code_frequency_visualizer` - -Creates publication-quality visualizations of code frequency data. - -### Class: `CodeFrequencyVisualizer` - -```python -CodeFrequencyVisualizer(loader: CodeFrequencyLoader, style: str = 'seaborn-v0_8-darkgrid') -``` - -#### Parameters -- **loader** (CodeFrequencyLoader): Initialized loader with data -- **style** (str, optional): Matplotlib style. Default: `'seaborn-v0_8-darkgrid'` - -#### Attributes -- **loader** (CodeFrequencyLoader): Reference to the data loader -- **data** (pd.DataFrame): Reference to the loaded data -- **style** (str): Matplotlib style being used - -#### Methods - -##### `plot_timeline(save_path: str = None) -> None` -Plot additions and deletions over time as a line chart. - -**Parameters**: -- **save_path** (str, optional): Path to save the plot. If None, displays interactively - -**Example**: -```python -visualizer = CodeFrequencyVisualizer(loader) -visualizer.plot_timeline(save_path="output/visualizations/timeline.png") -``` - -##### `plot_net_changes(save_path: str = None) -> None` -Plot net code changes (additions - deletions) as a bar chart. - -**Parameters**: -- **save_path** (str, optional): Path to save the plot. If None, displays interactively - -**Example**: -```python -visualizer.plot_net_changes(save_path="output/visualizations/net_changes.png") -``` - -##### `plot_yearly_summary(save_path: str = None) -> None` -Create grouped bar charts showing yearly statistics. - -**Parameters**: -- **save_path** (str, optional): Path to save the plot. If None, displays interactively - -**Example**: -```python -visualizer.plot_yearly_summary(save_path="output/visualizations/yearly.png") -``` - -##### `plot_activity_heatmap(save_path: str = None) -> None` -Generate a heatmap showing activity by month and year. - -**Parameters**: -- **save_path** (str, optional): Path to save the plot. If None, displays interactively - -**Example**: -```python -visualizer.plot_activity_heatmap(save_path="output/visualizations/heatmap.png") -``` - -##### `create_dashboard(save_dir: str = "output/visualizations") -> None` -Generate all visualizations and save them to a directory. - -**Parameters**: -- **save_dir** (str, optional): Directory to save all plots. Default: `"output/visualizations"` - -**Example**: -```python -visualizer = CodeFrequencyVisualizer(loader) -visualizer.create_dashboard(save_dir="my_charts") -``` - ---- - -## CLI Reference - -**Module**: `src.cli` - -Command-line interface for RunTime toolkit. - -### Commands - -#### `load` -Load and display code frequency data. - -**Usage**: -```bash -python src/cli.py load [OPTIONS] -``` - -**Options**: -- `-f, --file `: Path to CSV file (default: "Code frequency.csv") -- `--summary`: Display summary statistics -- `--head `: Display first n rows (default: 10) - -**Examples**: -```bash -# Show summary -python src/cli.py load --summary - -# Show first 5 rows -python src/cli.py load --head 5 - -# Use custom file -python src/cli.py load -f data/my_data.csv --summary -``` - -#### `analyze` -Perform analysis on code frequency data. - -**Usage**: -```bash -python src/cli.py analyze [OPTIONS] -``` - -**Options**: -- `-f, --file `: Path to CSV file (default: "Code frequency.csv") -- `--all`: Run all analyses -- `--top `: Show top n active weeks -- `--yearly`: Show yearly statistics -- `--monthly`: Show monthly statistics -- `--sprints`: Detect coding sprints -- `--churn`: Calculate churn statistics -- `--activity`: Show activity ratio -- `--trends`: Show productivity trends - -**Examples**: -```bash -# Run all analyses -python src/cli.py analyze --all - -# Show top 10 weeks and sprints -python src/cli.py analyze --top 10 --sprints - -# Yearly and monthly breakdown -python src/cli.py analyze --yearly --monthly -``` - -#### `visualize` -Create visualizations of code frequency data. - -**Usage**: -```bash -python src/cli.py visualize [OPTIONS] -``` - -**Options**: -- `-f, --file `: Path to CSV file (default: "Code frequency.csv") -- `-o, --output `: Output directory (default: "output/visualizations") -- `--all`: Create all visualizations -- `--timeline`: Create timeline plot -- `--net-changes`: Create net changes plot -- `--yearly`: Create yearly summary -- `--heatmap`: Create activity heatmap - -**Examples**: -```bash -# Create all visualizations -python src/cli.py visualize --all - -# Create specific plots -python src/cli.py visualize --timeline --heatmap - -# Custom output directory -python src/cli.py visualize --all -o charts/ -``` - -### Global Options - -- `-h, --help`: Show help message -- `--version`: Show version information - ---- - -## Data Format Specification - -### CSV Input Format - -**Required Columns**: -1. **DateTime** (string): Date in format "YYYY-MM-DD" -2. **Additions** (integer): Number of lines added -3. **Deletions** (integer): Number of lines deleted (typically negative) - -**Example**: -```csv -"DateTime","Additions","Deletions" -"2017-10-22",1123,-155 -"2017-10-29",11,-1 -"2017-11-05",543,-89 -``` - -**Notes**: -- Deletions are typically stored as negative numbers -- Dates should be in ISO format (YYYY-MM-DD) -- Headers are required -- Quotes around values are optional but recommended - ---- - -## Error Handling - -### Common Exceptions - -**FileNotFoundError** -```python -# Raised when CSV file doesn't exist -try: - loader = CodeFrequencyLoader("missing.csv") - loader.load() -except FileNotFoundError as e: - print(f"File not found: {e}") -``` - -**ValueError** -```python -# Raised when trying to use methods before loading data -loader = CodeFrequencyLoader() -try: - loader.get_summary() # Data not loaded yet! -except ValueError as e: - print(f"Error: {e}") -``` - -**pd.errors.ParserError** -```python -# Raised when CSV format is invalid -try: - loader = CodeFrequencyLoader("invalid.csv") - loader.load() -except pd.errors.ParserError as e: - print(f"CSV parsing error: {e}") -``` - ---- - -## Type Hints - -All modules use Python type hints for better code clarity: - -```python -from typing import Optional -import pandas as pd - -def load(self) -> pd.DataFrame: ... -def get_summary(self) -> dict: ... -def get_top_activity_weeks(self, n: int = 10) -> pd.DataFrame: ... -def plot_timeline(self, save_path: Optional[str] = None) -> None: ... -``` - ---- - -## Dependencies - -### Required Packages - -- **pandas** (>=2.0.0): Data manipulation and analysis -- **numpy** (>=1.24.0): Numerical computing -- **matplotlib** (>=3.7.0): Plotting and visualization -- **seaborn** (>=0.12.0): Statistical data visualization - -### Development Packages - -- **pytest** (>=7.4.0): Testing framework -- **black** (>=23.0.0): Code formatting -- **flake8** (>=6.0.0): Code linting - ---- - -## Version History - -**v1.0.0** (Current) -- Initial release -- Full data loading, analysis, and visualization capabilities -- CLI interface -- Comprehensive test suite - ---- - -## See Also - -- [README.md](README.md) - Project overview and documentation -- [QUICKSTART.md](QUICKSTART.md) - Quick start guide -- [CONTRIBUTING.md](CONTRIBUTING.md) - Contribution guidelines -- [LICENSE.md](LICENSE.md) - License information - ---- - -**Last Updated**: December 26, 2025 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index e257ab2..0000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,237 +0,0 @@ -# RunTime Contributing Guide - -Thank you for considering contributing to RunTime! This document provides guidelines and instructions for contributing. - -## 🚀 Getting Started - -1. **Fork the repository** - ```bash - gh repo fork Jerad551/RunTime - ``` - -2. **Clone your fork** - ```bash - git clone git@github.com:YOUR_USERNAME/RunTime.git - cd RunTime - ``` - -3. **Set up development environment** - ```bash - bash setup.sh - ``` - -## 🔧 Development Setup - -### Install Development Dependencies -```bash -pip install -r requirements.txt -pip install black flake8 # Code formatting and linting -``` - -### Run Tests -```bash -# Run all tests -pytest - -# Run with coverage -pytest --cov=src tests/ - -# Run specific test file -pytest tests/test_load.py -v -``` - -### Code Formatting -```bash -# Format code -black src/ tests/ - -# Check formatting -black --check src/ tests/ - -# Lint code -flake8 src/ tests/ -``` - -## 📝 Contributing Guidelines - -### Code Style -- Follow PEP 8 style guide -- Use type hints where applicable -- Write comprehensive docstrings -- Keep functions focused and small -- Use descriptive variable names - -### Testing -- Write tests for new features -- Maintain 100% test coverage for core functions -- Test edge cases and error conditions -- Use pytest fixtures for common setups - -### Commit Messages -Follow conventional commit format: -``` -type(scope): description - -[optional body] - -[optional footer] -``` - -Types: -- `feat`: New feature -- `fix`: Bug fix -- `docs`: Documentation changes -- `test`: Test additions/changes -- `refactor`: Code refactoring -- `style`: Code style changes -- `chore`: Build/tooling changes - -Examples: -```bash -git commit -m "feat(analyzer): add quarterly statistics method" -git commit -m "fix(loader): handle missing CSV columns gracefully" -git commit -m "docs(readme): update installation instructions" -``` - -### Pull Request Process - -1. **Create a branch** - ```bash - git checkout -b feature/your-feature-name - ``` - -2. **Make your changes** - - Write code - - Add tests - - Update documentation - -3. **Verify everything works** - ```bash - pytest - python src/cli.py analyze --all - ``` - -4. **Commit and push** - ```bash - git add . - git commit -m "feat: your feature description" - git push origin feature/your-feature-name - ``` - -5. **Create Pull Request** - - Use a clear title - - Describe what changed and why - - Reference related issues - - Include screenshots for UI changes - -## 🎯 Areas for Contribution - -### High Priority -- [ ] Web dashboard (Streamlit/Dash) -- [ ] Export reports to PDF/HTML -- [ ] Additional statistical analyses -- [ ] Performance optimizations -- [ ] Docker containerization - -### Medium Priority -- [ ] Multi-repository comparison -- [ ] Custom date range filtering -- [ ] Interactive visualizations -- [ ] Configuration file support -- [ ] More test coverage - -### Documentation -- [ ] API documentation -- [ ] Video tutorials -- [ ] Blog posts -- [ ] Usage examples -- [ ] Translation to other languages - -## 🐛 Reporting Bugs - -Create an issue with: -- Clear title and description -- Steps to reproduce -- Expected vs actual behavior -- Environment details (Python version, OS) -- Relevant error messages/logs - -## 💡 Suggesting Features - -Create an issue with: -- Clear description of the feature -- Use cases and benefits -- Possible implementation approach -- Examples from other tools (if applicable) - -## 📚 Documentation - -When adding features: -- Update relevant documentation files -- Add docstrings to new functions/classes -- Include examples in QUICKSTART.md if applicable -- Update README.md if needed - -## 🎨 Code Examples - -### Adding a New Analysis Method - -```python -# In src/code_frequency_analyzer.py - -def get_quarterly_stats(self) -> pd.DataFrame: - """Calculate statistics by quarter. - - Returns: - DataFrame with quarterly statistics - """ - df = self.data.copy() - df['Quarter'] = df['DateTime'].dt.to_period('Q') - - quarterly = df.groupby('Quarter').agg({ - 'Additions': 'sum', - 'Deletions': 'sum' - }) - - return quarterly -``` - -### Adding a Test - -```python -# In tests/test_analyzer.py - -def test_quarterly_stats(): - """Test quarterly statistics calculation.""" - loader = CodeFrequencyLoader() - analyzer = CodeFrequencyAnalyzer(loader) - - quarterly = analyzer.get_quarterly_stats() - - assert isinstance(quarterly, pd.DataFrame) - assert 'Additions' in quarterly.columns - assert 'Deletions' in quarterly.columns -``` - -## 🤝 Code of Conduct - -- Be respectful and inclusive -- Welcome newcomers -- Accept constructive criticism -- Focus on what's best for the community -- Show empathy towards others - -## 📄 License - -By contributing, you agree that your contributions will be licensed under the MIT License. - -## ❓ Questions? - -Feel free to: -- Open an issue for questions -- Start a discussion -- Reach out to maintainers - ---- - -Thank you for contributing to RunTime! 🎉 diff --git a/LICENSE.md b/LICENSE.md index 0f0fcf5..ba33bcc 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,23 +1 @@ -# MIT License - -Repository: git@github.com:Jerad551/RunTime.git - -Copyright (c) 2025 JeradCronin - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish,.61ms distribute, sublicense, and/or sell,.61ms -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +git@github.com:Jerad551/RunTime.git diff --git a/PROJECT_STATUS.md b/PROJECT_STATUS.md deleted file mode 100644 index 65ac09a..0000000 --- a/PROJECT_STATUS.md +++ /dev/null @@ -1,141 +0,0 @@ -# 🎉 RunTime Project - Complete! - -## ✅ What's Been Built - -A comprehensive Python toolkit for analyzing Git code frequency data with: - -### 📦 Core Modules (src/) -- **code_frequency_loader.py** - Data loading & basic stats -- **code_frequency_analyzer.py** - Advanced analysis (sprints, trends, churn) -- **code_frequency_visualizer.py** - Publication-quality visualizations -- **cli.py** - Command-line interface for all operations -- **__init__.py** - Package initialization - -### 🧪 Testing (tests/) -- **test_load.py** - 7 comprehensive tests -- ✅ All tests passing - -### 📊 Features Implemented - -#### Data Analysis -- ✅ Load CSV data with pandas -- ✅ Summary statistics (additions, deletions, net changes) -- ✅ Activity ratio calculation -- ✅ Code churn metrics -- ✅ Sprint detection algorithm -- ✅ Yearly/monthly breakdowns -- ✅ Top activity identification -- ✅ Productivity trend analysis - -#### Visualizations -- ✅ Timeline plot (additions/deletions over time) -- ✅ Net changes bar chart -- ✅ Yearly summary comparison -- ✅ Activity heatmap (month x year) -- ✅ Complete dashboard generation - -#### Developer Experience -- ✅ CLI with multiple commands (load, analyze, visualize) -- ✅ Jupyter notebook for interactive exploration -- ✅ Comprehensive documentation (README, QUICKSTART) -- ✅ Type hints throughout -- ✅ Docstrings for all functions/classes - -### 📁 Generated Assets -``` -output/visualizations/ -├── timeline.png (135 KB) -├── net_changes.png (112 KB) -├── yearly_summary.png (148 KB) -└── activity_heatmap.png (202 KB) -``` - -### 🚀 Usage Examples - -```bash -# Quick summary -python src/cli.py load --summary - -# Full analysis -python src/cli.py analyze --all - -# Generate visualizations -python src/cli.py visualize --all - -# Run tests -pytest -``` - -### 📊 Sample Results - -From the Code frequency.csv analysis: -- **427 records** from 2017-10-22 to 2025-12-21 -- **1,659,598** total additions -- **1,648,735** total deletions -- **10,863** net changes -- **22.95%** activity ratio -- **8 coding sprints** detected - -Top sprint: 2019-11-10 with 812,151 total changes! - -### 📚 Documentation - -- **README.md** - Complete project documentation -- **QUICKSTART.md** - Get started in minutes -- **LICENSE.md** - MIT License -- **notebooks/exploration.ipynb** - Interactive analysis - -### 🎯 Key Achievements - -1. ✅ Modular, well-structured codebase -2. ✅ Fully tested (100% of core functions) -3. ✅ Beautiful visualizations -4. ✅ Easy-to-use CLI -5. ✅ Comprehensive documentation -6. ✅ Jupyter notebook support -7. ✅ Production-ready code quality - -### 🔧 Tech Stack - -- Python 3.12+ -- pandas (data manipulation) -- matplotlib (plotting) -- seaborn (statistical viz) -- pytest (testing) - -### 📦 Installation - -```bash -pip install -r requirements.txt -``` - -### 🎓 Learning Outcomes - -This project demonstrates: -- Python package structure -- Data analysis with pandas -- Visualization with matplotlib/seaborn -- CLI development with argparse -- Test-driven development -- Documentation best practices -- Git workflow analysis - -### 🚀 Next Steps - -Potential enhancements: -- [ ] Web dashboard with Streamlit/Dash -- [ ] GitHub Actions CI/CD -- [ ] Export reports to PDF/HTML -- [ ] More statistical analyses -- [ ] Compare multiple repositories -- [ ] Machine learning predictions - -### 🎉 Status: PRODUCTION READY - -All systems operational and tested! - ---- - -**Created:** December 25, 2025 -**Author:** GitHub Copilot (Claude Sonnet 4.5) -**Repository:** git@github.com:Jerad551/RunTime.git diff --git a/QUICKSTART.md b/QUICKSTART.md deleted file mode 100644 index 54fbe5f..0000000 --- a/QUICKSTART.md +++ /dev/null @@ -1,159 +0,0 @@ -# RunTime Quick Start Guide - -Welcome to RunTime! This guide will get you up and running in minutes. - -## 🚀 Installation - -```bash -# Clone the repository (if applicable) -git clone -cd RunTime - -# Install dependencies -pip install -r requirements.txt -``` - -## 📊 Quick Commands - -### 1. Load Your Data -```bash -# View summary statistics -python src/cli.py load --summary - -# See first 10 rows -python src/cli.py load --head 10 -``` - -### 2. Analyze Code Patterns -```bash -# Complete analysis (recommended) -python src/cli.py analyze --all - -# Specific analyses -python src/cli.py analyze --top 10 # Top 10 active weeks -python src/cli.py analyze --sprints # Detect coding sprints -python src/cli.py analyze --yearly # Yearly breakdown -python src/cli.py analyze --churn # Churn statistics -``` - -### 3. Create Visualizations -```bash -# Create all visualizations (recommended) -python src/cli.py visualize --all - -# Specific plots -python src/cli.py visualize --timeline -python src/cli.py visualize --heatmap -python src/cli.py visualize --yearly -``` - -## 🎯 Common Use Cases - -### Daily Development Insights -```bash -# Morning routine: check yesterday's stats -python src/code_frequency_loader.py -``` - -### Weekly Review -```bash -# Analyze the week's productivity -python src/cli.py analyze --top 5 --activity -``` - -### Project Report -```bash -# Generate comprehensive report -python src/cli.py analyze --all > report.txt -python src/cli.py visualize --all -``` - -## 📁 Output Files - -Visualizations are saved to `output/visualizations/`: -- **timeline.png** - Complete history of additions/deletions -- **net_changes.png** - Net code changes over time -- **yearly_summary.png** - Yearly comparison charts -- **activity_heatmap.png** - Monthly activity heatmap - -## 🧪 Testing - -```bash -# Run all tests -pytest - -# With verbose output -pytest -v - -# With coverage report -pytest --cov=src tests/ -``` - -## 💡 Tips - -1. **Custom CSV File**: Use `-f` flag to specify a different file - ```bash - python src/cli.py load -f my_data.csv --summary - ``` - -2. **Change Output Directory**: Use `-o` flag - ```bash - python src/cli.py visualize --all -o my_charts/ - ``` - -3. **Combine Options**: Most flags can be combined - ```bash - python src/cli.py analyze --top 10 --sprints --yearly - ``` - -## 🔧 Direct Python Usage - -```python -# In your own scripts -from src import CodeFrequencyLoader, CodeFrequencyAnalyzer - -loader = CodeFrequencyLoader() -data = loader.load() -summary = loader.get_summary() - -analyzer = CodeFrequencyAnalyzer(loader) -sprints = analyzer.detect_sprints() -yearly = analyzer.get_yearly_stats() -``` - -## 📚 Next Steps - -- Read the full [README.md](README.md) for detailed documentation -- Check [tests/test_load.py](tests/test_load.py) for usage examples -- Explore the source code in `src/` directory - -## ❓ Getting Help - -```bash -# Command help -python src/cli.py --help -python src/cli.py load --help -python src/cli.py analyze --help -python src/cli.py visualize --help -``` - -## 🎉 Example Session - -```bash -# 1. Check your data -$ python src/cli.py load --summary -✅ Successfully loaded 427 records -Date range: 2017-10-22 to 2025-12-21 - -# 2. Analyze patterns -$ python src/cli.py analyze --top 5 -🏆 Top 5 Most Active Weeks: -... - -# 3. Generate charts -$ python src/cli.py visualize --all -Creating complete dashboard... -✅ All visualizations saved to output/visualizations/ -``` - -Happy analyzing! 🎯 diff --git a/README.md b/README.md deleted file mode 100644 index c3d7145..0000000 --- a/README.md +++ /dev/null @@ -1,180 +0,0 @@ -# RunTime - Code Frequency Analysis - -A comprehensive Python toolkit for analyzing and visualizing code frequency data from Git repositories. - -## Overview - -This project provides tools to load, analyze, and visualize code change patterns over time, helping developers understand their coding activity, identify sprints, and track productivity trends. - -## Features - -- 📊 **Data Loading**: Efficient CSV data loading with pandas -- 🔍 **Advanced Analysis**: Detect coding sprints, calculate churn metrics, analyze activity patterns -- 📈 **Rich Visualizations**: Timeline plots, heatmaps, yearly summaries, and more -- ✅ **Fully Tested**: Comprehensive test suite with pytest - -## Installation - -```bash -# Install dependencies -pip install -r requirements.txt -``` - -## Usage - -### Quick Start - -```bash -# Load and display summary -python src/code_frequency_loader.py - -# Run comprehensive analysis -python src/code_frequency_analyzer.py - -# Generate visualizations -python src/code_frequency_visualizer.py -``` - -### As a Library - -```python -from src.code_frequency_loader import CodeFrequencyLoader -from src.code_frequency_analyzer import CodeFrequencyAnalyzer -from src.code_frequency_visualizer import CodeFrequencyVisualizer - -# Load data -loader = CodeFrequencyLoader("Code frequency.csv") -data = loader.load() -summary = loader.get_summary() - -# Analyze patterns -analyzer = CodeFrequencyAnalyzer(loader) -yearly_stats = analyzer.get_yearly_stats() -sprints = analyzer.detect_sprints() -top_weeks = analyzer.get_top_activity_weeks(10) - -# Create visualizations -visualizer = CodeFrequencyVisualizer(loader) -visualizer.create_dashboard(save_dir="output/visualizations") -``` - -## Modules - -### CodeFrequencyLoader -Handles loading and basic processing of code frequency data. - -**Methods:** -- `load()`: Load CSV data into a pandas DataFrame -- `get_summary()`: Get summary statistics (total additions, deletions, date range, etc.) - -### CodeFrequencyAnalyzer -Provides advanced analysis capabilities. - -**Methods:** -- `get_activity_periods()`: Identify periods of significant activity -- `get_yearly_stats()`: Calculate statistics by year -- `get_monthly_stats()`: Calculate statistics by month -- `get_top_activity_weeks()`: Find weeks with most activity -- `calculate_activity_ratio()`: Get ratio of active to total weeks -- `get_churn_stats()`: Calculate code churn metrics -- `detect_sprints()`: Detect coding sprint periods -- `get_productivity_trends()`: Analyze productivity across time periods - -### CodeFrequencyVisualizer -Creates publication-quality visualizations. - -**Methods:** -- `plot_timeline()`: Plot additions and deletions over time -- `plot_net_changes()`: Plot net code changes -- `plot_yearly_summary()`: Create yearly summary charts -- `plot_activity_heatmap()`: Generate activity heatmap by month/year -- `create_dashboard()`: Generate complete visualization dashboard - -## Data Format - -The CSV file should have the following format: - -```csv -"DateTime","Additions","Deletions" -"2017-10-22",1123,-155 -"2017-10-29",11,-1 -... -``` - -## Testing - -```bash -# Run all tests -pytest - -# Run with coverage -pytest --cov=src - -# Run specific test file -pytest tests/test_load.py -``` - -## Output - -Visualizations are saved to `output/visualizations/` by default: -- `timeline.png`: Code changes over time -- `net_changes.png`: Net changes bar chart -- `yearly_summary.png`: Yearly statistics -- `activity_heatmap.png`: Monthly activity heatmap - -## Project Structure - -``` -RunTime/ -├── Code frequency.csv # Input data -├── README.md # This file -├── requirements.txt # Python dependencies -├── LICENSE.md # MIT License -├── src/ -│ ├── code_frequency_loader.py # Data loading -│ ├── code_frequency_analyzer.py # Analysis tools -│ └── code_frequency_visualizer.py # Visualization tools -├── tests/ -│ └── test_load.py # Test suite -├── notebooks/ # Jupyter notebooks (for exploration) -└── output/ # Generated visualizations -``` - -## Examples - -### Example Output - -``` -Loading code frequency data... -Loaded 427 records - -Summary Statistics: -Total Additions: 1,659,598 -Total Deletions: 1,648,735 -Net Changes: 10,863 -Date Range: 2017-10-22 to 2025-12-21 -Number of Records: 427 -``` - -### Analysis Output - -``` -🔄 Churn Statistics: - Total Churn: 3,308,333 - Avg Weekly Churn: 7,747.79 - Max Weekly Churn: 732,342 - Weeks With Churn: 78 - Activity Ratio: 18.27% -``` - -## License - -MIT License - see [LICENSE.md](LICENSE.md) for details. - -## Contributing - -Contributions are welcome! Please feel free to submit a Pull Request. - -## Author - -JeradCronin - [GitHub](https://github.com/Jerad551/RunTime) diff --git a/examples/README.md b/examples/README.md deleted file mode 100644 index 4a95056..0000000 --- a/examples/README.md +++ /dev/null @@ -1,120 +0,0 @@ -# RunTime Examples - -This directory contains advanced usage examples for the RunTime toolkit. - -## 📁 Files - -### advanced_examples.py -Comprehensive examples demonstrating advanced analysis patterns and custom workflows. - -## 🚀 Running Examples - -```bash -# Run all examples -python examples/advanced_examples.py - -# Run from project root -cd /workspaces/RunTime -python examples/advanced_examples.py -``` - -## 📚 Examples Included - -### Example 1: Basic Analysis Workflow -- Load code frequency data -- Get summary statistics -- Display basic metrics - -### Example 2: Sprint Detection -- Detect coding sprint periods -- Analyze sprint characteristics -- Custom sprint parameters - -### Example 3: Time-Based Analysis -- Yearly productivity statistics -- Activity patterns and ratios -- Code churn metrics - -### Example 4: Custom Visualization -- Create custom plots -- Moving averages -- Multi-panel visualizations - -### Example 5: Filtering Analysis -- High-activity period analysis -- Recent activity filtering -- Conditional data selection - -### Example 6: Comparative Analysis -- Productivity trends over time -- Period-to-period comparisons -- Growth analysis - -## 💡 Usage Patterns - -### Loading Data -```python -from src import CodeFrequencyLoader - -loader = CodeFrequencyLoader() -data = loader.load() -``` - -### Running Analysis -```python -from src import CodeFrequencyAnalyzer - -analyzer = CodeFrequencyAnalyzer(loader) -sprints = analyzer.detect_sprints() -yearly = analyzer.get_yearly_stats() -``` - -### Creating Visualizations -```python -from src import CodeFrequencyVisualizer - -visualizer = CodeFrequencyVisualizer(loader) -visualizer.plot_timeline() -visualizer.create_dashboard() -``` - -## 🎯 Custom Analysis Tips - -1. **Filter by date range**: - ```python - recent = data[data['DateTime'] >= '2023-01-01'] - ``` - -2. **Calculate custom metrics**: - ```python - data['NetChanges'] = data['Additions'] + data['Deletions'] - data['Churn'] = data['Additions'] + abs(data['Deletions']) - ``` - -3. **Aggregate by custom periods**: - ```python - quarterly = data.groupby(data['DateTime'].dt.quarter) - ``` - -## 📊 Output - -Examples generate visualizations in `output/visualizations/`: -- `custom_analysis.png` - Custom visualization from Example 4 - -## 🔧 Customization - -Feel free to modify the examples for your specific needs: -- Adjust time windows -- Change filtering criteria -- Create new visualizations -- Combine multiple analyses - -## 📖 Further Reading - -- See [README.md](../README.md) for full documentation -- Check [QUICKSTART.md](../QUICKSTART.md) for basic usage -- Explore [notebooks/exploration.ipynb](../notebooks/exploration.ipynb) for interactive analysis - -## 🤝 Contributing - -Have a useful example? Please add it and submit a PR! See [CONTRIBUTING.md](../CONTRIBUTING.md) for guidelines. diff --git a/examples/advanced_examples.py b/examples/advanced_examples.py deleted file mode 100644 index f887ac1..0000000 --- a/examples/advanced_examples.py +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/bin/env python -""" -Advanced Examples for RunTime - -This script demonstrates advanced usage patterns and custom analyses. -""" - -import sys -from pathlib import Path - -# Add parent directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from src import CodeFrequencyLoader, CodeFrequencyAnalyzer, CodeFrequencyVisualizer -import pandas as pd -import matplotlib.pyplot as plt - - -def example_1_basic_workflow(): - """Example 1: Basic analysis workflow.""" - print("="*70) - print("Example 1: Basic Analysis Workflow") - print("="*70) - - # Load data - loader = CodeFrequencyLoader() - data = loader.load() - - print(f"\n📊 Loaded {len(data)} records") - print(f"Date range: {data['DateTime'].min().date()} to {data['DateTime'].max().date()}") - - # Get summary - summary = loader.get_summary() - print(f"\n📈 Summary:") - print(f" Total additions: {summary['total_additions']:,}") - print(f" Total deletions: {summary['total_deletions']:,}") - print(f" Net changes: {summary['net_changes']:,}") - - -def example_2_sprint_analysis(): - """Example 2: Detailed sprint analysis.""" - print("\n" + "="*70) - print("Example 2: Sprint Detection and Analysis") - print("="*70) - - loader = CodeFrequencyLoader() - analyzer = CodeFrequencyAnalyzer(loader) - - # Detect sprints with custom parameters - sprints = analyzer.detect_sprints(window_weeks=3, threshold_multiplier=1.5) - - print(f"\n🚀 Detected {len(sprints)} coding sprints:\n") - - for i, sprint in enumerate(sprints[:5], 1): # Show top 5 - print(f"Sprint {i}:") - print(f" Duration: {sprint['duration_weeks']} weeks") - print(f" Period: {sprint['start_date'].date()} to {sprint['end_date'].date()}") - print(f" Total changes: {sprint['total_additions'] + sprint['total_deletions']:,}") - print(f" Avg weekly churn: {sprint['avg_weekly_churn']:,.0f}") - print() - - -def example_3_time_analysis(): - """Example 3: Time-based analysis.""" - print("="*70) - print("Example 3: Time-Based Analysis") - print("="*70) - - loader = CodeFrequencyLoader() - analyzer = CodeFrequencyAnalyzer(loader) - - # Yearly statistics - yearly = analyzer.get_yearly_stats() - print("\n📅 Top 3 Most Productive Years:") - top_years = yearly.nlargest(3, 'Additions_sum') - - for year, row in top_years.iterrows(): - print(f"\n{year}:") - print(f" Additions: {int(row['Additions_sum']):,}") - print(f" Deletions: {int(abs(row['Deletions_sum'])):,}") - print(f" Net: {int(row['net_changes']):,}") - - # Activity patterns - print(f"\n📊 Activity Statistics:") - print(f" Activity ratio: {analyzer.calculate_activity_ratio():.2%}") - churn = analyzer.get_churn_stats() - print(f" Total code churn: {churn['total_churn']:,}") - print(f" Average weekly churn: {churn['avg_weekly_churn']:.0f}") - - -def example_4_custom_visualization(): - """Example 4: Custom visualization.""" - print("\n" + "="*70) - print("Example 4: Custom Visualization") - print("="*70) - - loader = CodeFrequencyLoader() - loader.load() - data = loader.data.copy() - - # Create custom analysis - data['NetChanges'] = data['Additions'] + data['Deletions'] - data['Year'] = data['DateTime'].dt.year - - # Calculate moving average - data['MA_30'] = data['Additions'].rolling(window=30, center=True).mean() - - # Create custom plot - fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 8)) - - # Plot 1: Additions with moving average - ax1.plot(data['DateTime'], data['Additions'], alpha=0.3, label='Additions') - ax1.plot(data['DateTime'], data['MA_30'], 'r-', linewidth=2, label='30-week MA') - ax1.set_ylabel('Lines Added') - ax1.set_title('Code Additions with 30-Week Moving Average') - ax1.legend() - ax1.grid(True, alpha=0.3) - - # Plot 2: Net changes by year - yearly_net = data.groupby('Year')['NetChanges'].sum() - ax2.bar(yearly_net.index, yearly_net.values, color='steelblue', alpha=0.7) - ax2.axhline(y=0, color='black', linestyle='-', linewidth=0.5) - ax2.set_xlabel('Year') - ax2.set_ylabel('Net Changes') - ax2.set_title('Net Code Changes by Year') - ax2.grid(True, alpha=0.3, axis='y') - - plt.tight_layout() - - # Save plot - output_path = Path('output/visualizations/custom_analysis.png') - output_path.parent.mkdir(parents=True, exist_ok=True) - plt.savefig(output_path, dpi=300, bbox_inches='tight') - print(f"\n✅ Custom visualization saved to {output_path}") - plt.close() - - -def example_5_filtering_analysis(): - """Example 5: Filtering and conditional analysis.""" - print("\n" + "="*70) - print("Example 5: Filtering and Conditional Analysis") - print("="*70) - - loader = CodeFrequencyLoader() - loader.load() - data = loader.data.copy() - - # Analyze only high-activity periods - data['AbsChanges'] = data['Additions'] + abs(data['Deletions']) - high_activity = data[data['AbsChanges'] > data['AbsChanges'].quantile(0.75)] - - print(f"\n📊 High Activity Periods (top 25%):") - print(f" Total records: {len(high_activity)}") - print(f" Date range: {high_activity['DateTime'].min().date()} to {high_activity['DateTime'].max().date()}") - print(f" Total changes: {high_activity['AbsChanges'].sum():,.0f}") - - # Analyze recent activity (last 2 years) - recent_date = data['DateTime'].max() - pd.Timedelta(days=730) - recent_data = data[data['DateTime'] >= recent_date] - - print(f"\n📅 Recent Activity (last 2 years):") - print(f" Records: {len(recent_data)}") - print(f" Total additions: {recent_data['Additions'].sum():,}") - print(f" Total deletions: {abs(recent_data['Deletions'].sum()):,}") - print(f" Active weeks: {(recent_data['AbsChanges'] > 0).sum()}") - - -def example_6_comparison_analysis(): - """Example 6: Comparative analysis.""" - print("\n" + "="*70) - print("Example 6: Comparative Analysis") - print("="*70) - - loader = CodeFrequencyLoader() - analyzer = CodeFrequencyAnalyzer(loader) - - # Compare productivity across time periods - trends = analyzer.get_productivity_trends(periods=4) - - print("\n📊 Productivity Trends (4 periods):") - for period, row in trends.iterrows(): - print(f"\nPeriod {period + 1}:") - print(f" Total additions: {int(row['Additions_sum']):,}") - print(f" Average additions/week: {row['Additions_mean']:.0f}") - print(f" Total deletions: {int(abs(row['Deletions_sum'])):,}") - print(f" Net changes: {int(row['net_changes']):,}") - - -def main(): - """Run all examples.""" - print("\n╔═══════════════════════════════════════════════════════════════╗") - print("║ ║") - print("║ 📚 RunTime Advanced Examples 📚 ║") - print("║ ║") - print("╚═══════════════════════════════════════════════════════════════╝\n") - - try: - example_1_basic_workflow() - example_2_sprint_analysis() - example_3_time_analysis() - example_4_custom_visualization() - example_5_filtering_analysis() - example_6_comparison_analysis() - - print("\n" + "="*70) - print("✅ All examples completed successfully!") - print("="*70) - - except Exception as e: - print(f"\n❌ Error: {e}") - raise - - -if __name__ == "__main__": - main() diff --git a/notebooks/exploration.ipynb b/notebooks/exploration.ipynb deleted file mode 100644 index ee49cac..0000000 --- a/notebooks/exploration.ipynb +++ /dev/null @@ -1,838 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "d1e7e393", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "88b57888", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "5ebd2d5f", - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "sys.path.insert(0, '..')\n", - "\n", - "from src import CodeFrequencyLoader, CodeFrequencyAnalyzer, CodeFrequencyVisualizer\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "\n", - "# Configure display\n", - "pd.set_option('display.max_columns', None)\n", - "pd.set_option('display.width', None)\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "755aa488", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e49f4583", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "4fb1e5b1", - "metadata": {}, - "source": [ - "## Load Data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "712cfc9d", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8e90cfe2", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "0e0f6d88", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loaded 427 records\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
DateTimeAdditionsDeletions
02017-10-221123-155
12017-10-2911-1
22017-11-0510-3
32017-11-1200
42017-11-1900
\n", - "
" - ], - "text/plain": [ - " DateTime Additions Deletions\n", - "0 2017-10-22 1123 -155\n", - "1 2017-10-29 11 -1\n", - "2 2017-11-05 10 -3\n", - "3 2017-11-12 0 0\n", - "4 2017-11-19 0 0" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Initialize loader\n", - "loader = CodeFrequencyLoader('../Code frequency.csv')\n", - "data = loader.load()\n", - "\n", - "print(f\"Loaded {len(data)} records\")\n", - "data.head()" - ] - }, - { - "cell_type": "markdown", - "id": "f009e950", - "metadata": {}, - "source": [ - "## Summary Statistics" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c77b2235", - "metadata": {}, - "outputs": [], - "source": [ - "summary = loader.get_summary()\n", - "\n", - "for key, value in summary.items():\n", - " print(f\"{key}: {value}\")" - ] - }, - { - "cell_type": "markdown", - "id": "fed4bdbd", - "metadata": {}, - "source": [ - "## Basic Statistics" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "69e21259", - "metadata": {}, - "outputs": [], - "source": [ - "data.describe()" - ] - }, - { - "cell_type": "markdown", - "id": "6d7897d0", - "metadata": {}, - "source": [ - "## Analysis" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "089cdef4", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Activity Ratio: 22.95%\n" - ] - } - ], - "source": [ - "# Initialize analyzer\n", - "analyzer = CodeFrequencyAnalyzer(loader)\n", - "\n", - "# Activity ratio\n", - "activity_ratio = analyzer.calculate_activity_ratio()\n", - "print(f\"Activity Ratio: {activity_ratio:.2%}\")" - ] - }, - { - "cell_type": "markdown", - "id": "7b7a081f", - "metadata": {}, - "source": [ - "### Top Active Weeks" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "9228f650", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
DateTimeAdditionsDeletionsAbsChanges
02019-11-1044849-767302812151
12018-04-08597777-134565732342
22018-07-29138161-110269248430
32019-06-0958-219657219715
42018-03-2589136-84934174070
52021-05-3073873-63407137280
62018-05-27135199-505135704
72018-03-18130844-163131007
82022-02-209567-5476864335
92019-09-0845676-621951895
\n", - "
" - ], - "text/plain": [ - " DateTime Additions Deletions AbsChanges\n", - "0 2019-11-10 44849 -767302 812151\n", - "1 2018-04-08 597777 -134565 732342\n", - "2 2018-07-29 138161 -110269 248430\n", - "3 2019-06-09 58 -219657 219715\n", - "4 2018-03-25 89136 -84934 174070\n", - "5 2021-05-30 73873 -63407 137280\n", - "6 2018-05-27 135199 -505 135704\n", - "7 2018-03-18 130844 -163 131007\n", - "8 2022-02-20 9567 -54768 64335\n", - "9 2019-09-08 45676 -6219 51895" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "top_weeks = analyzer.get_top_activity_weeks(10)\n", - "top_weeks" - ] - }, - { - "cell_type": "markdown", - "id": "fa506e5b", - "metadata": {}, - "source": [ - "### Yearly Statistics" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "e4c5dd72", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Additions_sumAdditions_meanAdditions_maxDeletions_sumDeletions_meanDeletions_minnet_changes
Year
20171158105.271123-167-15.18-155991
2018117087422516.81597777-334306-6428.96-134565836568
20192653315102.5245676-1026368-19737.85-767302-761037
202035777688.028918-39728-764.00-12474-3951
20211397862688.1973873-151387-2911.29-63407-11601
202213700263.469567-59571-1145.60-54768-45871
2023471288.914375-4401-83.04-4255311
202428185542.0216340-32737-629.56-17566-4552
2025751.4775-70-1.37-705
\n", - "
" - ], - "text/plain": [ - " Additions_sum Additions_mean Additions_max Deletions_sum \\\n", - "Year \n", - "2017 1158 105.27 1123 -167 \n", - "2018 1170874 22516.81 597777 -334306 \n", - "2019 265331 5102.52 45676 -1026368 \n", - "2020 35777 688.02 8918 -39728 \n", - "2021 139786 2688.19 73873 -151387 \n", - "2022 13700 263.46 9567 -59571 \n", - "2023 4712 88.91 4375 -4401 \n", - "2024 28185 542.02 16340 -32737 \n", - "2025 75 1.47 75 -70 \n", - "\n", - " Deletions_mean Deletions_min net_changes \n", - "Year \n", - "2017 -15.18 -155 991 \n", - "2018 -6428.96 -134565 836568 \n", - "2019 -19737.85 -767302 -761037 \n", - "2020 -764.00 -12474 -3951 \n", - "2021 -2911.29 -63407 -11601 \n", - "2022 -1145.60 -54768 -45871 \n", - "2023 -83.04 -4255 311 \n", - "2024 -629.56 -17566 -4552 \n", - "2025 -1.37 -70 5 " - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "yearly = analyzer.get_yearly_stats()\n", - "yearly" - ] - }, - { - "cell_type": "markdown", - "id": "0090f628", - "metadata": {}, - "source": [ - "### Churn Statistics" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "b2c7c850", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'total_churn': 3308333,\n", - " 'avg_weekly_churn': 7747.852459016393,\n", - " 'max_weekly_churn': 812151,\n", - " 'weeks_with_churn': 98,\n", - " 'activity_ratio': 0.22950819672131148}" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "churn = analyzer.get_churn_stats()\n", - "churn" - ] - }, - { - "cell_type": "markdown", - "id": "83ab931d", - "metadata": {}, - "source": [ - "### Coding Sprints" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "7be2de20", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 8 coding sprints:\n", - "\n", - "Sprint 1:\n", - " start_date: 2018-03-11 00:00:00\n", - " end_date: 2018-04-22 00:00:00\n", - " duration_weeks: 7\n", - " total_additions: 817757\n", - " total_deletions: 219662\n", - " avg_weekly_churn: 148202.7142857143\n", - "\n", - "Sprint 2:\n", - " start_date: 2018-05-20 00:00:00\n", - " end_date: 2018-06-10 00:00:00\n", - " duration_weeks: 4\n", - " total_additions: 135273\n", - " total_deletions: 527\n", - " avg_weekly_churn: 33950.0\n", - "\n", - "Sprint 3:\n", - " start_date: 2018-07-22 00:00:00\n", - " end_date: 2018-08-12 00:00:00\n", - " duration_weeks: 4\n", - " total_additions: 138173\n", - " total_deletions: 110270\n", - " avg_weekly_churn: 62110.75\n", - "\n", - "Sprint 4:\n", - " start_date: 2019-06-02 00:00:00\n", - " end_date: 2019-06-23 00:00:00\n", - " duration_weeks: 4\n", - " total_additions: 62\n", - " total_deletions: 219657\n", - " avg_weekly_churn: 54929.75\n", - "\n", - "Sprint 5:\n", - " start_date: 2019-11-03 00:00:00\n", - " end_date: 2019-11-24 00:00:00\n", - " duration_weeks: 4\n", - " total_additions: 44861\n", - " total_deletions: 767311\n", - " avg_weekly_churn: 203043.0\n", - "\n", - "Sprint 6:\n", - " start_date: 2021-05-23 00:00:00\n", - " end_date: 2021-06-13 00:00:00\n", - " duration_weeks: 4\n", - " total_additions: 88404\n", - " total_deletions: 77108\n", - " avg_weekly_churn: 41378.0\n", - "\n", - "Sprint 7:\n", - " start_date: 2021-11-28 00:00:00\n", - " end_date: 2021-11-28 00:00:00\n", - " duration_weeks: 1\n", - " total_additions: 0\n", - " total_deletions: 0\n", - " avg_weekly_churn: 0.0\n", - "\n", - "Sprint 8:\n", - " start_date: 2022-02-13 00:00:00\n", - " end_date: 2022-03-06 00:00:00\n", - " duration_weeks: 4\n", - " total_additions: 9613\n", - " total_deletions: 54794\n", - " avg_weekly_churn: 16101.75\n", - "\n" - ] - } - ], - "source": [ - "sprints = analyzer.detect_sprints()\n", - "\n", - "print(f\"Found {len(sprints)} coding sprints:\\n\")\n", - "for i, sprint in enumerate(sprints, 1):\n", - " print(f\"Sprint {i}:\")\n", - " for key, value in sprint.items():\n", - " print(f\" {key}: {value}\")\n", - " print()" - ] - }, - { - "cell_type": "markdown", - "id": "270be5d2", - "metadata": {}, - "source": [ - "## Visualizations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d950eea9", - "metadata": {}, - "outputs": [], - "source": [ - "# Initialize visualizer\n", - "visualizer = CodeFrequencyVisualizer(loader)" - ] - }, - { - "cell_type": "markdown", - "id": "b20ccf3b", - "metadata": {}, - "source": [ - "### Timeline" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "aef1fc65", - "metadata": {}, - "outputs": [], - "source": [ - "visualizer.plot_timeline(figsize=(16, 6))\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "cfe41453", - "metadata": {}, - "source": [ - "### Net Changes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d1b5335f", - "metadata": {}, - "outputs": [], - "source": [ - "visualizer.plot_net_changes(figsize=(16, 6))\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "49c42c0d", - "metadata": {}, - "source": [ - "### Yearly Summary" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "291239c6", - "metadata": {}, - "outputs": [], - "source": [ - "visualizer.plot_yearly_summary(figsize=(14, 6))\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "c800d400", - "metadata": {}, - "source": [ - "### Activity Heatmap" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cda9f6e0", - "metadata": {}, - "outputs": [], - "source": [ - "visualizer.plot_activity_heatmap(figsize=(16, 8))\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "d9e9b156", - "metadata": {}, - "source": [ - "## Custom Analysis\n", - "\n", - "Add your own analysis cells below..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41185cce", - "metadata": {}, - "outputs": [], - "source": [ - "# Your custom analysis here" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.1" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 75011bf..0000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -pandas>=2.0.0 -numpy>=1.24.0 -matplotlib>=3.7.0 -seaborn>=0.12.0 -pytest>=7.4.0 diff --git a/setup.py b/setup.py deleted file mode 100644 index 2d1a7cf..0000000 --- a/setup.py +++ /dev/null @@ -1,54 +0,0 @@ -"""Setup script for RunTime package.""" - -from setuptools import setup, find_packages -from pathlib import Path - -# Read the README file -this_directory = Path(__file__).parent -long_description = (this_directory / "README.md").read_text() - -setup( - name="runtime-analysis", - version="1.0.0", - author="JeradCronin", - author_email="", - description="A comprehensive toolkit for analyzing and visualizing code frequency data", - long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/Jerad551/RunTime", - packages=find_packages(), - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Topic :: Software Development :: Version Control :: Git", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - ], - python_requires=">=3.9", - install_requires=[ - "pandas>=2.0.0", - "numpy>=1.24.0", - "matplotlib>=3.7.0", - "seaborn>=0.12.0", - ], - extras_require={ - "dev": [ - "pytest>=7.4.0", - "black>=23.0.0", - "flake8>=6.0.0", - ], - }, - entry_points={ - "console_scripts": [ - "runtime=src.cli:main", - ], - }, - include_package_data=True, - package_data={ - "": ["*.csv", "*.md"], - }, -) diff --git a/setup.sh b/setup.sh deleted file mode 100755 index bab6470..0000000 --- a/setup.sh +++ /dev/null @@ -1,92 +0,0 @@ -# Show all file sizes formatted -ls -lh output/visualizations/*.png | awk '{print $9 ": " $5}' - -# Count total files -ls -1 output/visualizations/*.png | awk 'END {print "Total files:", NR}' - -# Show test results summary -pytest tests/ | awk '/passed/ {print "✅", $0}' - -# Format CSV data -head -5 "Code frequency.csv" | awk -F',' '{print "Date: " $1 " | Additions: " $2}' - -# Extract Python version nicely -python3 --version | awk '{print "Using Python version:", $2}'#!/usr/bin/env bash -# RunTime Setup Script -# Automated setup for the RunTime code frequency analysis toolkit - -set -e # Exit on error - -echo "╔═══════════════════════════════════════════════════════════════╗" -echo "║ ║" -echo "║ 🚀 RunTime Setup & Installation 🚀 ║" -echo "║ ║" -echo "╚═══════════════════════════════════════════════════════════════╝" -echo "" - -# Check Python version -echo "📋 Checking Python version..." -python_version=$(python3 --version 2>&1 | awk '{print $2}') -echo " Found Python $python_version" - -# Check if Python is >= 3.9 -python3 -c "import sys; exit(0 if sys.version_info >= (3, 9) else 1)" || { - echo "❌ Error: Python 3.9 or higher is required" - exit 1 -} - -# Install dependencies -echo "" -echo "📦 Installing dependencies..." -pip install -q -r requirements.txt -echo " ✅ Dependencies installed" - -# Run tests -echo "" -echo "🧪 Running tests..." -pytest tests/ -q -test_result=$? - -if [ $test_result -eq 0 ]; then - echo " ✅ All tests passed!" -else - echo " ⚠️ Some tests failed (exit code: $test_result)" -fi - -# Check data file -echo "" -echo "📊 Checking data file..." -if [ -f "Code frequency.csv" ]; then - record_count=$(wc -l < "Code frequency.csv") - echo " ✅ Found Code frequency.csv with $record_count lines" -else - echo " ⚠️ Code frequency.csv not found" -fi - -# Quick analysis -echo "" -echo "📈 Running quick analysis..." -python src/cli.py load --summary - -# Create output directory -echo "" -echo "📁 Setting up output directories..." -mkdir -p output/visualizations -echo " ✅ Directories created" - -# Success message -echo "" -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "" -echo "🎉 Setup complete! RunTime is ready to use." -echo "" -echo "Quick commands:" -echo " • python src/cli.py analyze --all" -echo " • python src/cli.py visualize --all" -echo " • pytest # Run tests" -echo "" -echo "Documentation:" -echo " • README.md - Full documentation" -echo " • QUICKSTART.md - Quick start guide" -echo "" -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" diff --git a/src/__init__.py b/src/__init__.py deleted file mode 100644 index 954a34a..0000000 --- a/src/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -"""RunTime - Code Frequency Analysis Package.""" - -from .code_frequency_loader import CodeFrequencyLoader -from .code_frequency_analyzer import CodeFrequencyAnalyzer -from .code_frequency_visualizer import CodeFrequencyVisualizer - -__all__ = [ - "CodeFrequencyLoader", - "CodeFrequencyAnalyzer", - "CodeFrequencyVisualizer", -] diff --git a/src/__pycache__/__init__.cpython-312.pyc b/src/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index a1e7f86c112654226db4b6c480b2854dfe20ddf2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 445 zcmZ9Iu}T9$5Qb;(F1et_MnbeZu!>;LLM%iAA_N2pB9h{oWw~9$lD)gQ>lrScPhoH8 zv-kuaq!J4|FaGk!Q6hDoHh}A{YRzyrIZ1sx5zVbeE_1JYm6caFtOObR~PtSsE9?WlBV$ zxC(AbJR}3w{?UABp>ofBT>ZPr2xU^28*63TJ+Ve}&FFKL6Tyw;N>@*mo=Fv*daYXN zV@$IcD5WhPa&TF9CUoOwcb(a^CU_{~ zMLbx%^s2W?|A3zS0bUw%ro!z*UXQJi6R1e)2jzhFhYN{ za8CIVGyDk5E@Ft`5H&E|MrgFnhKZ4ZxXE+-5UHs}&hCr2KN4D{i`p}unKyx9xgNwd zEN(S1%R@f9XMwiBaclDbm}ZVyZ}W{}7d!T`HZMC^ ztG?V+Hh`Oeiz*17c0}Zl8qU(2fcgyY0PLc4Y-<^r{Sv%J=fkcc2lt-UV~^2vgVW1p z>EhZ`8boPKJ^Ii~y+^`p^J=k3fmX8?h|1C{41?!FZ%f&(+YCbPy2=t<#^EN{ z4uVIiR5pz7t}LuSR;Emax-uRwEBpls{TYP*d8hVVJ7%4b&x`9I>cQkPz`(>9|3u|q z=*DkyZO48;e?%&MQaL2Ij>vSMOz$o9N%i2~QlBhs+k*nSd;c#g;L4||z4GVV1I>PY N@V)-6exli|-ap|QuxbDR diff --git a/src/cli.py b/src/cli.py deleted file mode 100644 index 6323cb9..0000000 --- a/src/cli.py +++ /dev/null @@ -1,271 +0,0 @@ -#!/usr/bin/env python -"""Command-line interface for RunTime code frequency analysis.""" - -import argparse -import sys -from pathlib import Path - -try: - from .code_frequency_loader import CodeFrequencyLoader - from .code_frequency_analyzer import CodeFrequencyAnalyzer - from .code_frequency_visualizer import CodeFrequencyVisualizer -except ImportError: - from code_frequency_loader import CodeFrequencyLoader - from code_frequency_analyzer import CodeFrequencyAnalyzer - from code_frequency_visualizer import CodeFrequencyVisualizer - - -def load_command(args): - """Execute load command.""" - loader = CodeFrequencyLoader(args.file) - data = loader.load() - - print(f"✅ Successfully loaded {len(data)} records") - date_min = data['DateTime'].min().date() - date_max = data['DateTime'].max().date() - print(f"\nDate range: {date_min} to {date_max}") - - if args.summary: - print("\n" + "=" * 60) - print("SUMMARY STATISTICS") - print("=" * 60) - summary = loader.get_summary() - print(f"Total Additions: {summary['total_additions']:,}") - print(f"Total Deletions: {summary['total_deletions']:,}") - print(f"Net Changes: {summary['net_changes']:,}") - print(f"Number of Records: {summary['num_records']:,}") - - if args.head: - print(f"\nFirst {args.head} rows:") - print(data.head(args.head)) - - -def analyze_command(args): - """Execute analyze command.""" - loader = CodeFrequencyLoader(args.file) - analyzer = CodeFrequencyAnalyzer(loader) - - print("=" * 60) - print("CODE FREQUENCY ANALYSIS") - print("=" * 60) - - if args.activity: - print(f"\n📊 Activity Ratio: {analyzer.calculate_activity_ratio():.2%}") - - if args.churn: - print("\n🔄 Churn Statistics:") - print("-" * 60) - churn = analyzer.get_churn_stats() - for key, value in churn.items(): - if isinstance(value, float): - print(f" {key.replace('_', ' ').title()}: {value:,.2f}") - else: - print(f" {key.replace('_', ' ').title()}: {value:,}") - - if args.top: - print(f"\n🏆 Top {args.top} Most Active Weeks:") - print("-" * 60) - top = analyzer.get_top_activity_weeks(args.top) - print(top.to_string(index=False)) - - if args.yearly: - print("\n📅 Yearly Statistics:") - print("-" * 60) - yearly = analyzer.get_yearly_stats() - print(yearly.to_string()) - - if args.sprints: - print("\n🚀 Detected Coding Sprints:") - print("-" * 60) - sprints = analyzer.detect_sprints() - if sprints: - for i, sprint in enumerate(sprints, 1): - print(f"\nSprint {i}:") - for key, value in sprint.items(): - key_title = key.replace('_', ' ').title() - if hasattr(value, "date"): - print(f" {key_title}: {value.date()}") - elif isinstance(value, int): - print(f" {key_title}: {value:,}") - else: - print(f" {key_title}: {value:.2f}") - else: - print(" No significant sprints detected") - - -def visualize_command(args): - """Execute visualize command.""" - loader = CodeFrequencyLoader(args.file) - visualizer = CodeFrequencyVisualizer(loader) - - output_dir = Path(args.output) - - if args.all or args.dashboard: - print("Creating complete dashboard...") - visualizer.create_dashboard(save_dir=str(output_dir)) - print(f"✅ All visualizations saved to {output_dir}/") - else: - output_dir.mkdir(parents=True, exist_ok=True) - - if args.timeline: - print("Creating timeline plot...") - timeline_path = str(output_dir / "timeline.png") - visualizer.plot_timeline(save_path=timeline_path) - - if args.net: - print("Creating net changes plot...") - net_path = str(output_dir / "net_changes.png") - visualizer.plot_net_changes(save_path=net_path) - - if args.yearly: - print("Creating yearly summary...") - yearly_path = str(output_dir / "yearly_summary.png") - visualizer.plot_yearly_summary(save_path=yearly_path) - - if args.heatmap: - print("Creating activity heatmap...") - heatmap_path = str(output_dir / "activity_heatmap.png") - visualizer.plot_activity_heatmap(save_path=heatmap_path) - - print(f"✅ Visualizations saved to {output_dir}/") - - -def main(): - """Main CLI entry point.""" - parser = argparse.ArgumentParser( - description="RunTime - Code Frequency Analysis Tool", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -Examples: - # Load and show summary - %(prog)s load --summary - - # Full analysis with all options - %(prog)s analyze --all - - # Create all visualizations - %(prog)s visualize --all - - # Specific analysis - %(prog)s analyze --top 10 --sprints - """, - ) - - parser.add_argument( - "--version", action="version", version="RunTime 1.0.0" - ) - - subparsers = parser.add_subparsers( - dest="command", help="Available commands" - ) - - # Load command - load_parser = subparsers.add_parser( - "load", help="Load and display data" - ) - load_parser.add_argument( - "-f", - "--file", - default="Code frequency.csv", - help="CSV file to load (default: Code frequency.csv)", - ) - load_parser.add_argument( - "-s", "--summary", action="store_true", - help="Show summary statistics" - ) - load_parser.add_argument( - "--head", type=int, metavar="N", help="Show first N rows" - ) - load_parser.set_defaults(func=load_command) - - # Analyze command - analyze_parser = subparsers.add_parser( - "analyze", help="Analyze code frequency patterns" - ) - analyze_parser.add_argument( - "-f", "--file", default="Code frequency.csv", - help="CSV file to analyze" - ) - analyze_parser.add_argument( - "-a", "--all", action="store_true", - help="Show all analysis" - ) - analyze_parser.add_argument( - "--activity", action="store_true", help="Show activity ratio" - ) - analyze_parser.add_argument( - "--churn", action="store_true", help="Show churn statistics" - ) - analyze_parser.add_argument( - "-t", "--top", type=int, metavar="N", help="Show top N active weeks" - ) - analyze_parser.add_argument( - "-y", "--yearly", action="store_true", help="Show yearly statistics" - ) - analyze_parser.add_argument( - "-s", "--sprints", action="store_true", help="Detect coding sprints" - ) - analyze_parser.set_defaults(func=analyze_command) - - # Visualize command - viz_parser = subparsers.add_parser( - "visualize", help="Create visualizations" - ) - viz_parser.add_argument( - "-f", "--file", default="Code frequency.csv", - help="CSV file to visualize" - ) - viz_parser.add_argument( - "-o", - "--output", - default="output/visualizations", - help="Output directory (default: output/visualizations)", - ) - viz_parser.add_argument( - "-a", "--all", action="store_true", - help="Create all visualizations" - ) - viz_parser.add_argument( - "-d", "--dashboard", action="store_true", - help="Create complete dashboard" - ) - viz_parser.add_argument( - "--timeline", action="store_true", help="Create timeline plot" - ) - viz_parser.add_argument( - "--net", action="store_true", help="Create net changes plot" - ) - viz_parser.add_argument( - "--yearly", action="store_true", help="Create yearly summary" - ) - viz_parser.add_argument( - "--heatmap", action="store_true", help="Create activity heatmap" - ) - viz_parser.set_defaults(func=visualize_command) - - # Parse arguments - args = parser.parse_args() - - if not args.command: - parser.print_help() - return 1 - - # Set all flags for analyze if --all is used - if args.command == "analyze" and args.all: - args.activity = True - args.churn = True - args.top = 10 - args.yearly = True - args.sprints = True - - # Execute command - try: - args.func(args) - return 0 - except Exception as e: - print(f"❌ Error: {e}", file=sys.stderr) - return 1 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/src/code_frequency_analyzer.py b/src/code_frequency_analyzer.py deleted file mode 100644 index 1f629ee..0000000 --- a/src/code_frequency_analyzer.py +++ /dev/null @@ -1,270 +0,0 @@ -"""Code Frequency Analysis Tools. - -This module provides advanced analysis capabilities for code frequency data. -""" - -import pandas as pd -from typing import Dict, List -from datetime import datetime - -try: - from .code_frequency_loader import CodeFrequencyLoader -except ImportError: - from code_frequency_loader import CodeFrequencyLoader - - -class CodeFrequencyAnalyzer: - """Analyzer for code frequency patterns and trends.""" - - def __init__(self, loader: CodeFrequencyLoader = None): - """Initialize the analyzer. - - Args: - loader: CodeFrequencyLoader instance. Creates new one if None. - """ - self.loader = loader or CodeFrequencyLoader() - if self.loader.data is None: - self.loader.load() - self.data = self.loader.data - - def get_activity_periods(self, min_changes: int = 10) -> pd.DataFrame: - """Identify periods of significant activity. - - Args: - min_changes: Minimum net changes to consider as active - - Returns: - DataFrame with active periods - """ - df = self.data.copy() - df["NetChanges"] = df["Additions"] + df["Deletions"] - df["AbsChanges"] = df["Additions"] + abs(df["Deletions"]) - - active = df[df["AbsChanges"] >= min_changes].copy() - return active[ - ["DateTime", "Additions", "Deletions", "NetChanges", "AbsChanges"] - ] - - def get_yearly_stats(self) -> pd.DataFrame: - """Calculate statistics by year. - - Returns: - DataFrame with yearly statistics - """ - df = self.data.copy() - df["Year"] = df["DateTime"].dt.year - - yearly = ( - df.groupby("Year") - .agg( - { - "Additions": ["sum", "mean", "max"], - "Deletions": ["sum", "mean", "min"], - } - ) - .round(2) - ) - - yearly.columns = [ - "_".join(col).strip() for col in yearly.columns.values - ] - yearly["net_changes"] = ( - yearly["Additions_sum"] + yearly["Deletions_sum"] - ) - - return yearly - - def get_monthly_stats(self) -> pd.DataFrame: - """Calculate statistics by month. - - Returns: - DataFrame with monthly statistics - """ - df = self.data.copy() - df["YearMonth"] = df["DateTime"].dt.to_period("M") - - monthly = df.groupby("YearMonth").agg( - {"Additions": "sum", "Deletions": "sum"} - ) - - monthly["net_changes"] = ( - monthly["Additions"] + monthly["Deletions"] - ) - monthly["abs_changes"] = ( - monthly["Additions"] + abs(monthly["Deletions"]) - ) - - return monthly - - def get_top_activity_weeks(self, n: int = 10) -> pd.DataFrame: - """Find weeks with most activity. - - Args: - n: Number of top weeks to return - - Returns: - DataFrame with top n active weeks - """ - df = self.data.copy() - df["AbsChanges"] = df["Additions"] + abs(df["Deletions"]) - - top = df.nlargest(n, "AbsChanges")[ - ["DateTime", "Additions", "Deletions", "AbsChanges"] - ] - return top.reset_index(drop=True) - - def calculate_activity_ratio(self) -> float: - """Calculate the ratio of active weeks to total weeks. - - Returns: - Ratio of weeks with any changes to total weeks - """ - df = self.data.copy() - total_weeks = len(df) - active_weeks = len(df[(df["Additions"] != 0) | (df["Deletions"] != 0)]) - - return active_weeks / total_weeks if total_weeks > 0 else 0.0 - - def get_churn_stats(self) -> Dict: - """Calculate code churn statistics. - - Returns: - Dictionary with churn metrics - """ - df = self.data.copy() - df["Churn"] = df["Additions"] + abs(df["Deletions"]) - - return { - "total_churn": int(df["Churn"].sum()), - "avg_weekly_churn": float(df["Churn"].mean()), - "max_weekly_churn": int(df["Churn"].max()), - "weeks_with_churn": int((df["Churn"] > 0).sum()), - "activity_ratio": self.calculate_activity_ratio(), - } - - def detect_sprints( - self, window_weeks: int = 4, threshold_multiplier: float = 2.0 - ) -> List[Dict]: - """Detect coding sprints (periods of high activity). - - Args: - window_weeks: Size of rolling window in weeks - threshold_multiplier: Multiplier for mean to detect sprints - - Returns: - List of sprint periods with statistics - """ - df = self.data.copy() - df["AbsChanges"] = df["Additions"] + abs(df["Deletions"]) - - # Calculate rolling average - df["RollingAvg"] = ( - df["AbsChanges"].rolling(window=window_weeks, center=True).mean() - ) - - # Find sprints where activity exceeds threshold - mean_activity = df["AbsChanges"].mean() - threshold = mean_activity * threshold_multiplier - - df["IsSprint"] = df["RollingAvg"] > threshold - - # Group consecutive sprint weeks - df["SprintGroup"] = (df["IsSprint"] != df["IsSprint"].shift()).cumsum() - - sprints = [] - for group_id, group in df[df["IsSprint"]].groupby("SprintGroup"): - sprints.append( - { - "start_date": group["DateTime"].min(), - "end_date": group["DateTime"].max(), - "duration_weeks": len(group), - "total_additions": int(group["Additions"].sum()), - "total_deletions": int(abs(group["Deletions"].sum())), - "avg_weekly_churn": float(group["AbsChanges"].mean()), - } - ) - - return sprints - - def get_productivity_trends(self, periods: int = 4) -> pd.DataFrame: - """Analyze productivity trends across time periods. - - Args: - periods: Number of equal periods to divide timeline into - - Returns: - DataFrame with productivity metrics per period - """ - df = self.data.copy() - df["Period"] = pd.cut(range(len(df)), bins=periods, labels=False) - - trends = df.groupby("Period").agg( - {"Additions": ["sum", "mean"], "Deletions": ["sum", "mean"]} - ) - - trends.columns = [ - "_".join(col).strip() for col in trends.columns.values - ] - trends["net_changes"] = ( - trends["Additions_sum"] + trends["Deletions_sum"] - ) - - return trends - - -def main(): - """Main entry point for analysis.""" - analyzer = CodeFrequencyAnalyzer() - - print("=" * 60) - print("CODE FREQUENCY ANALYSIS") - print("=" * 60) - - # Activity ratio - print(f"\n📊 Activity Ratio: {analyzer.calculate_activity_ratio():.2%}") - - # Churn statistics - print("\n🔄 Churn Statistics:") - print("-" * 60) - churn = analyzer.get_churn_stats() - for key, value in churn.items(): - print( - f" {key.replace('_', ' ').title()}: {value:,.2f}" - if isinstance(value, float) - else f" {key.replace('_', ' ').title()}: {value:,}" - ) - - # Top activity weeks - print("\n🏆 Top 10 Most Active Weeks:") - print("-" * 60) - top_weeks = analyzer.get_top_activity_weeks(10) - print(top_weeks.to_string(index=False)) - - # Yearly statistics - print("\n📅 Yearly Statistics:") - print("-" * 60) - yearly = analyzer.get_yearly_stats() - print(yearly.to_string()) - - # Detect sprints - print("\n🚀 Detected Coding Sprints:") - print("-" * 60) - sprints = analyzer.detect_sprints() - if sprints: - for i, sprint in enumerate(sprints, 1): - print(f"\nSprint {i}:") - for key, value in sprint.items(): - if isinstance(value, datetime): - print(f" {key.replace('_', ' ').title()}: {value.date()}") - else: - print( - f" {key.replace('_', ' ').title()}: {value:,}" - if isinstance(value, int) - else f" {key.replace('_', ' ').title()}: {value:.2f}" - ) - else: - print(" No significant sprints detected") - - -if __name__ == "__main__": - main() diff --git a/src/code_frequency_loader.py b/src/code_frequency_loader.py deleted file mode 100644 index c1011b8..0000000 --- a/src/code_frequency_loader.py +++ /dev/null @@ -1,84 +0,0 @@ -"""Code Frequency Data Loader - -This module loads and processes code frequency data from CSV files. -""" - -import pandas as pd -from pathlib import Path - - -class CodeFrequencyLoader: - """Loader for code frequency data.""" - - def __init__(self, csv_path: str = "Code frequency.csv"): - """Initialize the loader with a CSV file path. - - Args: - csv_path: Path to the CSV file containing code frequency data - """ - self.csv_path = Path(csv_path) - self.data = None - - def load(self): - """Load the code frequency data from CSV. - - Returns: - pandas.DataFrame: Loaded data with DateTime, Additions, - and Deletions columns - """ - if not self.csv_path.exists(): - raise FileNotFoundError(f"CSV file not found: {self.csv_path}") - - self.data = pd.read_csv(self.csv_path) - self.data["DateTime"] = pd.to_datetime(self.data["DateTime"]) - return self.data - - def get_summary(self): - """Get a summary of the code frequency data. - - Returns: - dict: Summary statistics - """ - if self.data is None: - self.load() - - return { - "total_additions": self.data["Additions"].sum(), - "total_deletions": abs(self.data["Deletions"].sum()), - "net_changes": ( - self.data["Additions"].sum() + self.data["Deletions"].sum() - ), - "date_range": { - "start": self.data["DateTime"].min(), - "end": self.data["DateTime"].max(), - }, - "num_records": len(self.data), - } - - -def main(): - """Main entry point for the script.""" - loader = CodeFrequencyLoader() - - print("Loading code frequency data...") - data = loader.load() - - print(f"\nLoaded {len(data)} records") - print("\nFirst 5 rows:") - print(data.head()) - - print("\n" + "=" * 50) - print("Summary Statistics:") - print("=" * 50) - summary = loader.get_summary() - print(f"Total Additions: {summary['total_additions']:,}") - print(f"Total Deletions: {summary['total_deletions']:,}") - print(f"Net Changes: {summary['net_changes']:,}") - start_date = summary['date_range']['start'].date() - end_date = summary['date_range']['end'].date() - print(f"Date Range: {start_date} to {end_date}") - print(f"Number of Records: {summary['num_records']}") - - -if __name__ == "__main__": - main() diff --git a/src/code_frequency_visualizer.py b/src/code_frequency_visualizer.py deleted file mode 100644 index 6bdf4e3..0000000 --- a/src/code_frequency_visualizer.py +++ /dev/null @@ -1,339 +0,0 @@ -"""Code Frequency Visualization Tools. - -This module provides visualization capabilities for code frequency data. -""" - -import matplotlib.pyplot as plt -import matplotlib.dates as mdates -import seaborn as sns -import numpy as np -from pathlib import Path -from typing import Optional, Tuple - -try: - from .code_frequency_loader import CodeFrequencyLoader - from .code_frequency_analyzer import CodeFrequencyAnalyzer -except ImportError: - from code_frequency_loader import CodeFrequencyLoader - from code_frequency_analyzer import CodeFrequencyAnalyzer - - -class CodeFrequencyVisualizer: - """Visualizer for code frequency data.""" - - def __init__( - self, - loader: CodeFrequencyLoader = None, - style: str = "seaborn-v0_8-darkgrid" - ): - """Initialize the visualizer. - - Args: - loader: CodeFrequencyLoader instance - style: Matplotlib style to use - """ - self.loader = loader or CodeFrequencyLoader() - if self.loader.data is None: - self.loader.data = self.loader.load() - - self.data = self.loader.data - self.analyzer = CodeFrequencyAnalyzer(self.loader) - - # Set style - try: - plt.style.use(style) - except OSError: - plt.style.use("default") - - sns.set_palette("husl") - - def plot_timeline( - self, - figsize: Tuple[int, int] = (15, 6), - save_path: Optional[str] = None - ) -> plt.Figure: - """Plot additions and deletions over time. - - Args: - figsize: Figure size (width, height) - save_path: Path to save figure. If None, displays - interactively. - - Returns: - matplotlib Figure object - """ - fig, ax = plt.subplots(figsize=figsize) - - ax.plot( - self.data["DateTime"], - self.data["Additions"], - label="Additions", - color="green", - linewidth=1.5, - alpha=0.7, - ) - ax.plot( - self.data["DateTime"], - abs(self.data["Deletions"]), - label="Deletions", - color="red", - linewidth=1.5, - alpha=0.7, - ) - - ax.set_xlabel("Date", fontsize=12) - ax.set_ylabel("Lines of Code", fontsize=12) - ax.set_title( - "Code Frequency Over Time", - fontsize=14, - fontweight="bold" - ) - ax.legend(loc="upper left") - ax.grid(True, alpha=0.3) - - # Format x-axis - ax.xaxis.set_major_locator(mdates.YearLocator()) - ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y")) - plt.xticks(rotation=45) - - plt.tight_layout() - - if save_path: - plt.savefig(save_path, dpi=300, bbox_inches="tight") - print(f"Saved timeline plot to {save_path}") - - return fig - - def plot_net_changes( - self, - figsize: Tuple[int, int] = (15, 6), - save_path: Optional[str] = None - ) -> plt.Figure: - """Plot net changes over time. - - Args: - figsize: Figure size - save_path: Path to save figure - - Returns: - matplotlib Figure object - """ - fig, ax = plt.subplots(figsize=figsize) - - net_changes = self.data["Additions"] + self.data["Deletions"] - colors = ["green" if x >= 0 else "red" for x in net_changes] - - ax.bar( - self.data["DateTime"], - net_changes, - color=colors, - alpha=0.6, - width=5 - ) - ax.axhline(y=0, color="black", linestyle="-", linewidth=0.5) - - ax.set_xlabel("Date", fontsize=12) - ax.set_ylabel("Net Changes (Lines)", fontsize=12) - ax.set_title( - "Net Code Changes Over Time", - fontsize=14, - fontweight="bold" - ) - ax.grid(True, alpha=0.3, axis="y") - - # Format x-axis - ax.xaxis.set_major_locator(mdates.YearLocator()) - ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y")) - plt.xticks(rotation=45) - - plt.tight_layout() - - if save_path: - plt.savefig(save_path, dpi=300, bbox_inches="tight") - print(f"Saved net changes plot to {save_path}") - - return fig - - def plot_yearly_summary( - self, - figsize: Tuple[int, int] = (12, 6), - save_path: Optional[str] = None - ) -> plt.Figure: - """Plot yearly summary statistics. - - Args: - figsize: Figure size - save_path: Path to save figure - - Returns: - matplotlib Figure object - """ - yearly = self.analyzer.get_yearly_stats() - - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize) - - # Total changes by year - x = yearly.index - width = 0.35 - x_pos = np.arange(len(x)) - - ax1.bar( - x_pos - width / 2, - yearly["Additions_sum"], - width, - label="Additions", - color="green", - alpha=0.7, - ) - ax1.bar( - x_pos + width / 2, - abs(yearly["Deletions_sum"]), - width, - label="Deletions", - color="red", - alpha=0.7, - ) - - ax1.set_xlabel("Year", fontsize=12) - ax1.set_ylabel("Total Lines", fontsize=12) - ax1.set_title( - "Total Changes by Year", fontsize=12, fontweight="bold" - ) - ax1.set_xticks(x_pos) - ax1.set_xticklabels(x, rotation=45) - ax1.legend() - ax1.grid(True, alpha=0.3, axis="y") - - # Net changes by year - ax2.bar(x, yearly["net_changes"], color="blue", alpha=0.7) - ax2.axhline(y=0, color="black", linestyle="-", linewidth=0.5) - ax2.set_xlabel("Year", fontsize=12) - ax2.set_ylabel("Net Changes", fontsize=12) - ax2.set_title("Net Changes by Year", fontsize=12, fontweight="bold") - ax2.tick_params(axis="x", rotation=45) - ax2.grid(True, alpha=0.3, axis="y") - - plt.tight_layout() - - if save_path: - plt.savefig(save_path, dpi=300, bbox_inches="tight") - print(f"Saved yearly summary to {save_path}") - - return fig - - def plot_activity_heatmap( - self, - figsize: Tuple[int, int] = (14, 8), - save_path: Optional[str] = None - ) -> plt.Figure: - """Plot activity heatmap by year and month. - - Args: - figsize: Figure size - save_path: Path to save figure - - Returns: - matplotlib Figure object - """ - df = self.data.copy() - df["Year"] = df["DateTime"].dt.year - df["Month"] = df["DateTime"].dt.month - df["Activity"] = df["Additions"] + abs(df["Deletions"]) - - # Create pivot table - pivot = df.pivot_table( - values="Activity", - index="Month", - columns="Year", - aggfunc="sum", - fill_value=0, - ) - - fig, ax = plt.subplots(figsize=figsize) - sns.heatmap( - pivot, cmap="YlOrRd", ax=ax, cbar_kws={"label": "Total Changes"} - ) - - ax.set_xlabel("Year", fontsize=12) - ax.set_ylabel("Month", fontsize=12) - ax.set_title("Code Activity Heatmap", fontsize=14, fontweight="bold") - - # Set month labels - month_names = [ - "Jan", - "Feb", - "Mar", - "Apr", - "May", - "Jun", - "Jul", - "Aug", - "Sep", - "Oct", - "Nov", - "Dec", - ] - ax.set_yticklabels(month_names, rotation=0) - - plt.tight_layout() - - if save_path: - plt.savefig(save_path, dpi=300, bbox_inches="tight") - print(f"Saved activity heatmap to {save_path}") - - return fig - - def create_dashboard(self, save_dir: Optional[str] = None) -> None: - """Create a comprehensive dashboard with multiple plots. - - Args: - save_dir: Directory to save plots. If None, displays interactively. - """ - if save_dir: - save_dir = Path(save_dir) - save_dir.mkdir(parents=True, exist_ok=True) - - print("Generating visualizations...") - - # Timeline - self.plot_timeline( - save_path=str(save_dir / "timeline.png") if save_dir else None - ) - - # Net changes - net_path = str(save_dir / "net_changes.png") if save_dir else None - self.plot_net_changes(save_path=net_path) - - # Yearly summary - yearly_path = ( - str(save_dir / "yearly_summary.png") if save_dir else None - ) - self.plot_yearly_summary(save_path=yearly_path) - - # Activity heatmap - heatmap_path = ( - str(save_dir / "activity_heatmap.png") if save_dir else None - ) - self.plot_activity_heatmap(save_path=heatmap_path) - - if not save_dir: - plt.show() - else: - print(f"\nAll visualizations saved to {save_dir}/") - - -def main(): - """Main entry point for visualization.""" - visualizer = CodeFrequencyVisualizer() - - # Create output directory - output_dir = Path("output/visualizations") - - print("Creating code frequency visualizations...") - visualizer.create_dashboard(save_dir=str(output_dir)) - - print("\n✅ Visualization complete!") - - -if __name__ == "__main__": - main() diff --git a/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc deleted file mode 100644 index 683ee66881d8c683621e2b305ecdd2e4ff117b50..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 614 zcmZuuJxc>I7*6iu?M?PIa_9l_FO;`=i;?IvofSn7{&hRq09r3bb10+po%hp--5b zHP7c>$V7G_PMqIt*ohNP7@saPMkx29aGH`m>Tx1^e@gg$&JreYESLD+ zCe`I0VLHy1=M%?K3>NvjL=8|9awgDPskAOhoC>5!!jNv{$}8}p9Dhy}_aeH(;&O$s z;#w_S_X1K>yI8RjggzyP~V^GgLf}wW@oWi>K^j<}A5I MV{bY@d8$7C3#KKNW&i*H diff --git a/tests/__pycache__/test_load.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_load.cpython-312-pytest-9.0.2.pyc deleted file mode 100644 index a66bacb8a7b52ec619ffa0dd40c16e94b5674ba9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15264 zcmeHO>v9{%m7W0x00FQ-QPkCrWjMC8f>(w_Q}QK|Wyz7{T)eF8uB(!;c7uX2Lkctq zz&%6D!~&ari~Ya*WB=v_QmOL3$-Y2dA>VhpXSx9nK`M4?Qxy)`-KS6APT$TseY)}1 zH~;;cKmOtO-~8@35B~2z{_B7L^PB&%IS%9HK>Vb$}H$Y}bl7yxLULO=9&ywDE| z1AnI=Veug76@2fYv{UwVN@b^ll;U9Jpt@5vxY|xla@TiOqssnEX%jZwf5pNn6#4}( zT7#=b7tpq^@%!IT(`}fCvn2o(bY7t*7+gHq*x3MFVvlw%g=GmZhZPC0gewwW4XYAf z3u_Wy59<=XbW~`qPHuc2#Yx=U9jWGzM`83tMPD99!`{(nqiz_fHgn?JZux+MUv!ga z0&V2v?b&{K9tXV$hf<)KwIrtB?)=QWskhH;3!iBf*qNUMY2to4g={!0B$#piP5o&> zx5xObI4vUP$FFk6lDf&{?XRYg4Q+d0dGYmRElt_KkU}<0()Gk%nLa7E%tf0wh4RV_ zK~-3M0lF?Pr)$a?O6hU)=FsEj1s%mOcmX=xzUGZjHeU`sZkZf<>>S%Npet~CoKs3> z+w}O9oU#)1Y{+gYO|X_`L2@~pAv@<+Qk)H^fu(C63d>*nv%;)+RBlx!x44&@$+K?K z9ANw0Y&BJs9I9d5>^8^UVc3nEkGo0tiRvCi?aAleIF3}(eDmh`=-pOa`!@qLKNQrR zR(z9~%~pI%5O>o&08E~DT5&t8u{LYfW}vRtkNd+o=?;4lVnK)&5f;W_i+rLuKDwiW zO&z?WgS$F-ce}N!ID?1=?1bWSN2Hpyqqq%$S$#M@>O4K{4-zzoczH1Dbq9oMNJ%gh zovuRcS|@%sIvj)@6^&JAZ!mh=9dw2m0;E+tad$Uj0wRq^G9dlY@Drs*#B6lHmkzo~ zC;ECk=nlIH(xew?i|k2NG%0nGqK9(}#hmE9G^#6lFD{`(lOGa(aQpd4eG!kly(qr@ zPlv>U*e?0*GDL^l*4;Y@zmjcu(N! z>+pVL+=mDs!+q}tyw8jaS^$0s7aByJ2*FxNq6;3&oQ_(^!A4F;E#&z4c%d8=JG4-6 zc?Tpj2F{z4AY5fef?$uMPLXndo)lr}JxFzg{)~QU7u*-&%G7_%y4gBDNE5CP`CRTo zSrWgo$&Dk-lFLrM{Y)677$Ma}M3^iUJb8={!o-$o*m_eauZ%FurKQ~NEU#aSP2HL> z%c5bH>lp>Mr98HLHfH(vmRTmxB5=wt5Bn+#o4wKC@PI5+Ma{8_;%Jz(Tjbv>;6M?# zDB$X&Fzk~XaL-jAM}tTrJtGZ-P%fS=IOO=D4*Z{czRdR2`&SS?^FVa`<`*SFbwzgnWcMPYI@j+6*4E0j{f1>vLxQn5QARYgTjO>ylg>R{Wgnn)VOHLhCp zs7@#@GsShKHt1cVM>(x1m|JCa6#=TdusD9eIgEc(UDNplr@8+D;`s zbV5`c1xQ9b)m5S;Qg8+if0e90xCBq8qWvI+0?yl38mluVC<{q+!DE5bQPR{xw&nE( zA1R(&=GFiw%dCNaS+WMc3l)A^$m64Ay8Cjl10~Kk3#NYPUk7GZDtOaiO2J?@{;WJL z%TAzbot53s;i4#G>J%=W@wWXOj+V!9!KG~(E{=sSuMAb~<)!Ah^wRoWE`3U!4nIC6 zr!$hLT9uswqDYg(WJ~a7DNPt~4tjm0IP=`&M40 zg22d%W!g)@p~4SqJy6%|)4h=rd1K7s%T zfIIV?(kSFbw1^BPX%fKI6WTlrQ(Y0JD%?@sLi@ry6}K2Fc9`mW^U)6wl~aIbubZ1O zX%p58FJ-PxVXoozKorOhT-+AQuEbexg@)hW^# zlyfcG^BgTaN9Me^$zd-_I`QGbL027Rc863)YJL$N#ceq{uO*`d2k@?=KpRw;s?nIB z`Vh7Co^^+N5qD*c+qa__mBhmXoP&EK6~-s2+&|D8*;Tp098#5gLA|b7^;*4dI6$kY z-qc_b#G}=U>D+>9t^N+VCT|OqafYB1C+e_=qk7anu20`Vk(fFqs>Qz(G|1v!hhwkz z;Qk#>kg-vt8|%KhiK1F#eG5>^o{_WeQ1Id^Z7rRy4yViE#BZ~`kKx6~b=Q|p?z#E| zMc!aBkcI$00l0b9wIK>>6ZnRBJUV;|7nCnk&i?}wyfuoWR|s;50(x6%te&APBsms5 z7C0Sc8?bqo*BiXIl=@3%uA@|%D2xsUicCYxqKkrs4Vwzj)di!l6&Dp z9)p2hzQ8|Vpg!~Sy`#eG-liA(&vDc6Ys889y7+7Fx!)>I-m?nMll~yu9wkpkhr>{8 zVN`Kcw;w~lfvjyF^kdlg_L{rM*Pg5mM>tsa|!RnH)L_Kna*&Oz#0dF)X$KE2kV$}n;nLe1@#(5V2zVZlUH>O9v2MV z)tki7qUHIeD+pE^@OTkHM=@U1c#~{`m_v-j^^d;IIrU6u6l?hL(V&^=87_x#4k=8F zUp&TA38n!@Fj;E<2@BHz<|Qe$%jq|Z>IIGLwvC+AOWx@MClXucjD~sIjlz;6iB7v5 zx$Lx&LzjE8pxnGU^tgFJN6D!Zh%*rV_xl~L}xe^QHY*Gp<;mWks_a#@*vXX*2UgwMhD~MsIABgSeouZ%b(n) zjejdWjR3G`iq!lMD@yZ0dQE_G#X&R_UA-0P!-G9CY{~Iq(r}JCIBY1&yX{upF}G|3 zBSsd~x88x>ejw)g-soTq!<%zv$g{yz>~R6bo%6vvIcJ8u${4O3L(B&;SE@NEq~`?9 zYdB=WYh=9C=g3Fv9k(3<`f&5&E7!JOCx1!cct{DZ!TT}=h+*Wz6d}ptrAZc(2a&@q z;B!z5IY`UtsEk^hHKW_2jzP!89|zc&Z_qGS2z*BZGN!+?FvmSu#xtW!(J~(^8+Qx1 zP0Qrrv?%JUk$bb!6o>d}DZZT5Q3(sP70Y&}l_~G4)BM$Gm4+N({P=4*DsY9%_pMDi zJfX_R+BI~lDJQ3l7~KC~4K!k!*Q~7u16pQyG2KHjrC2q6guYVZ9tz80zKRI<@@Xvi~f739r@ zSM9ykh3+vNWBYf%-6!spJSrGQYl3IQbd&j>Sf8Q==s%B*U3ZB0z2 z-)`6HI?i{kGdJ!5-I>9hwd)$F|F(9mYvw&=jLy7kr+*u}w(%Y8T3yHau07wia^%p% zaJsBL-?bd67n8NnY}aJRmct7$7?h2{pfPR41Bdh7aF(J0!}Vzc=7NhX)GvpjQRaM%Kbh;pOR?(MB$uSsQU)u%0kN4fH>~pgkWAJsMhnIv)*;#?fl) z%A{>gCFzv~Y!JP}fjsfkkAGCujYq@oQ_3R0mw7MWX-Y2Y<|$31ifF92b>N9w- z4XvB^pT_#bwhNkan(5}OJfK=qS{JmD=f}u^Hi{T71f^XVq!Jkg^-DHD16$rh8{{o_ z$KzK7>9TGv*fk6G3HrLnpP94c@uLwp=1SH*f9Ha8dI zw4b=?_tZb@I^6qZ-ZHpx-bgs{ckZZvBr$SZLZeg6U7ejNx4uWZ|2?64cMYPG0GlyvN`b z+S4z*25*HIG(}zI8*f{f<0EN572IQ=gqZ5Q!a`K2UgM|^&wzif*4G_h9o;u}b@Fu)RDIh4u>O@PL^|C~Z63EtL6S5jDDi5k(L^B(2 zWDRn``fQni(7;&UPbIR%#h+CO;Vffusp6%W`V=0JW&NwH z^fyic^AL?6G~qERpdwxoAz0~jO;^cmwa28Z^fvCo^Z27Fk2jI!z~`=R hlzq|3E(ZR`tf{f;Q}w 0 - - -def test_summary_calculations(): - """Test that summary calculations are reasonable.""" - loader = CodeFrequencyLoader() - summary = loader.get_summary() - - # Net changes should be additions + deletions (deletions are negative) - assert isinstance(summary["total_additions"], (int, float)) or hasattr( - summary["total_additions"], "__int__" - ) - assert isinstance(summary["total_deletions"], (int, float)) or hasattr( - summary["total_deletions"], "__int__" - ) - assert isinstance(summary["net_changes"], (int, float)) or hasattr( - summary["net_changes"], "__int__" - ) - assert summary["num_records"] > 0 - assert summary["total_additions"] > 0 - assert summary["total_deletions"] > 0