From 1dcbd102d5b532a1a81bbf623b1b3a8ac00713b1 Mon Sep 17 00:00:00 2001 From: ghinks Date: Thu, 30 Apr 2026 12:28:21 -0400 Subject: [PATCH] feat: add --show-after output filter to classify command Adds a --show-after YYYY-MM-DD option to classify. Stats baseline (--start / --end) is computed from the full window as normal; only the displayed outliers are narrowed to those merged after the given date. Supported on the CLI, in TOML config files (all sections), and documented in the README. Closes #46 Co-Authored-By: Claude Sonnet 4.6 --- README.md | 27 +++++++++++++++++++++ src/review_classification/cli/app.py | 31 +++++++++++++++++++++++++ src/review_classification/cli/config.py | 9 +++++++ 3 files changed, 67 insertions(+) diff --git a/README.md b/README.md index d26a8fa..18345d2 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,11 @@ uv run review-classify classify --repo owner/repo --format json > outliers.json # Exclude PRs merged into the primary branch (main/master) uv run review-classify classify --repo owner/repo --exclude-primary-merged + +# Show only outliers merged in the last two weeks (stats baseline unchanged) +uv run review-classify classify --repo owner/repo \ + --start 2024-01-01 --end 2024-11-30 \ + --show-after 2024-12-15 ``` | Option | Description | @@ -127,6 +132,7 @@ uv run review-classify classify --repo owner/repo --exclude-primary-merged | `--start` | Start of the classification window (YYYY-MM-DD). | | `--end` | End of the classification window (YYYY-MM-DD). | | `--exclude-primary-merged` | Exclude PRs whose base branch is `main` or `master`. | +| `--show-after` | Only display outliers merged after this date (YYYY-MM-DD). Stats baseline is unaffected. | | `--verbose` / `-v` | Print progress details. | #### Classification window (`--start` / `--end`) @@ -163,6 +169,26 @@ Pass `--exclude-primary-merged` to restrict analysis to PRs that were **not** me uv run review-classify classify --repo owner/repo --exclude-primary-merged ``` +#### Filtering output to recent PRs (`--show-after`) + +`--show-after YYYY-MM-DD` filters the **displayed** outliers to only those merged after the given date. The stats baseline (`--start` / `--end`) is computed from the full window as normal — only the output is narrowed. + +This is the right tool when you have a long history in the database but only want to act on recent outliers. For example, compute stats from a 6-month baseline but display only the last two weeks: + +``` +[--start ──────────────────── --end] >end … [--show-after] … today + ↑ ↑ ↑ + baseline start baseline end only show from here +``` + +```bash +# Compute stats from Jan–Nov 2024; display only outliers merged after 2024-12-15 +uv run review-classify classify --repo owner/repo \ + --start 2024-01-01 \ + --end 2024-11-30 \ + --show-after 2024-12-15 +``` + ### Per-repository analysis Outlier detection is always **scoped to a single repository**. When you target multiple repositories (via `--org`, multiple `--repo` flags, or a config file), each repository is analysed independently: @@ -222,6 +248,7 @@ threshold = 2.0 min_samples = 30 start = "2024-01-01" end = "2024-06-30" +show_after = "2024-12-15" # only display outliers merged after this date # Individual repositories ───────────────────────────────────────────────────── diff --git a/src/review_classification/cli/app.py b/src/review_classification/cli/app.py index 0bd738c..0165514 100644 --- a/src/review_classification/cli/app.py +++ b/src/review_classification/cli/app.py @@ -103,6 +103,7 @@ def _detect_single( classify_start: str | None, classify_end: str | None, exclude_primary_merged: bool = False, + show_after: str | None = None, ) -> RepoClassifyResult: """Run outlier detection for a single repository.""" repo = GitHubRepo.from_string(repo_name) @@ -110,6 +111,7 @@ def _detect_single( classify_start_dt: datetime | None = None classify_end_dt: datetime | None = None + show_after_dt: datetime | None = None if classify_start: classify_start_dt = datetime.strptime(classify_start, "%Y-%m-%d").replace( @@ -119,6 +121,8 @@ def _detect_single( classify_end_dt = datetime.strptime(classify_end, "%Y-%m-%d").replace( hour=23, minute=59, second=59, tzinfo=UTC ) + if show_after: + show_after_dt = datetime.strptime(show_after, "%Y-%m-%d").replace(tzinfo=UTC) exclude_branches = _PRIMARY_BRANCHES if exclude_primary_merged else None @@ -130,6 +134,8 @@ def _detect_single( start_label = classify_start or "unbounded" end_label = classify_end or "unbounded" typer.echo(f"Classification window: {start_label} to {end_label}") + if show_after_dt: + typer.echo(f"Showing outliers merged after: {show_after}") if exclude_primary_merged: branches = sorted(_PRIMARY_BRANCHES) typer.echo(f"Excluding PRs merged into primary branches: {branches}") @@ -192,6 +198,11 @@ def _detect_single( save_outlier_scores(session, full_name, results, sample_size) + if show_after_dt: + results = [ + r for r in results if r.merged_at and r.merged_at > show_after_dt + ] + outliers = [r for r in results if r.is_outlier] if verbose: @@ -243,6 +254,7 @@ def _resolve_targets( default_min_samples: int | None = None, default_start: str | None = None, default_end: str | None = None, + default_show_after: str | None = None, ) -> list[RepoConfig]: """Resolve CLI arguments and config file into a concrete list of repositories.""" from ..queries.github_client import get_org_repos @@ -293,6 +305,11 @@ def add_repo(rc: RepoConfig) -> None: org_cfg.start if org_cfg.start is not None else multi.start ), end=(org_cfg.end if org_cfg.end is not None else multi.end), + show_after=( + org_cfg.show_after + if org_cfg.show_after is not None + else multi.show_after + ), ) add_repo(rc) @@ -306,6 +323,7 @@ def add_repo(rc: RepoConfig) -> None: min_samples=default_min_samples, start=default_start, end=default_end, + show_after=default_show_after, ) add_repo(rc) @@ -321,6 +339,7 @@ def add_repo(rc: RepoConfig) -> None: min_samples=default_min_samples, start=default_start, end=default_end, + show_after=default_show_after, ) add_repo(rc) @@ -481,6 +500,16 @@ def classify( help="Exclude PRs merged into the primary branch (main/master).", ), ] = False, + show_after: Annotated[ + str | None, + typer.Option( + "--show-after", + help=( + "Only display outliers merged after this date (YYYY-MM-DD). " + "Stats baseline is unaffected — use this to focus output on recent PRs." + ), + ), + ] = None, ) -> None: """Classify previously fetched PRs using z-score outlier analysis. @@ -513,6 +542,7 @@ def classify( default_min_samples=min_samples, default_start=start, default_end=end, + default_show_after=show_after, ) repo_results = [ @@ -524,6 +554,7 @@ def classify( target.start, target.end, exclude_primary_merged=exclude_primary_merged, + show_after=target.show_after, ) for target in targets ] diff --git a/src/review_classification/cli/config.py b/src/review_classification/cli/config.py index 9a20fab..17d8c13 100644 --- a/src/review_classification/cli/config.py +++ b/src/review_classification/cli/config.py @@ -16,6 +16,7 @@ class RepoConfig: min_samples: int | None = None start: str | None = None end: str | None = None + show_after: str | None = None @dataclass @@ -29,6 +30,7 @@ class OrgConfig: min_samples: int | None = None start: str | None = None end: str | None = None + show_after: str | None = None exclude_repos: list[str] = field(default_factory=list) @@ -49,6 +51,7 @@ class MultiRepoConfig: min_samples: int = 30 start: str | None = None end: str | None = None + show_after: str | None = None def resolve(self, repo: RepoConfig) -> RepoConfig: """Return a RepoConfig with global defaults applied for unset fields. @@ -77,6 +80,9 @@ def resolve(self, repo: RepoConfig) -> RepoConfig: ), start=repo.start if repo.start is not None else self.start, end=repo.end if repo.end is not None else self.end, + show_after=repo.show_after + if repo.show_after is not None + else self.show_after, ) @@ -136,6 +142,7 @@ def parse_config_file(path: Path) -> MultiRepoConfig: min_samples=int(raw_defaults.get("min_samples", 30)), start=_optional_str(raw_defaults, "start"), end=_optional_str(raw_defaults, "end"), + show_after=_optional_str(raw_defaults, "show_after"), ) except (TypeError, ValueError) as e: raise ValueError(f"Invalid value in [defaults]: {e}") from e @@ -166,6 +173,7 @@ def parse_config_file(path: Path) -> MultiRepoConfig: ), start=_optional_str(raw_repo, "start"), end=_optional_str(raw_repo, "end"), + show_after=_optional_str(raw_repo, "show_after"), ) except (TypeError, ValueError) as e: raise ValueError( @@ -208,6 +216,7 @@ def parse_config_file(path: Path) -> MultiRepoConfig: ), start=_optional_str(raw_org, "start"), end=_optional_str(raw_org, "end"), + show_after=_optional_str(raw_org, "show_after"), exclude_repos=[str(x) for x in exclude], ) except (TypeError, ValueError) as e: