Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ uv run review-classify classify --repo owner/repo --format json > outliers.json

# Exclude PRs merged into the primary branch (main/master)
uv run review-classify classify --repo owner/repo --exclude-primary-merged

# Show only outliers merged in the last two weeks (stats baseline unchanged)
uv run review-classify classify --repo owner/repo \
--start 2024-01-01 --end 2024-11-30 \
--show-after 2024-12-15
```

| Option | Description |
Expand All @@ -127,6 +132,7 @@ uv run review-classify classify --repo owner/repo --exclude-primary-merged
| `--start` | Start of the classification window (YYYY-MM-DD). |
| `--end` | End of the classification window (YYYY-MM-DD). |
| `--exclude-primary-merged` | Exclude PRs whose base branch is `main` or `master`. |
| `--show-after` | Only display outliers merged after this date (YYYY-MM-DD). Stats baseline is unaffected. |
| `--verbose` / `-v` | Print progress details. |

#### Classification window (`--start` / `--end`)
Expand Down Expand Up @@ -163,6 +169,26 @@ Pass `--exclude-primary-merged` to restrict analysis to PRs that were **not** me
uv run review-classify classify --repo owner/repo --exclude-primary-merged
```

#### Filtering output to recent PRs (`--show-after`)

`--show-after YYYY-MM-DD` filters the **displayed** outliers to only those merged after the given date. The stats baseline (`--start` / `--end`) is computed from the full window as normal — only the output is narrowed.

This is the right tool when you have a long history in the database but only want to act on recent outliers. For example, compute stats from a 6-month baseline but display only the last two weeks:

```
[--start ──────────────────── --end] >end … [--show-after] … today
↑ ↑ ↑
baseline start baseline end only show from here
```

```bash
# Compute stats from Jan–Nov 2024; display only outliers merged after 2024-12-15
uv run review-classify classify --repo owner/repo \
--start 2024-01-01 \
--end 2024-11-30 \
--show-after 2024-12-15
```

### Per-repository analysis

Outlier detection is always **scoped to a single repository**. When you target multiple repositories (via `--org`, multiple `--repo` flags, or a config file), each repository is analysed independently:
Expand Down Expand Up @@ -222,6 +248,7 @@ threshold = 2.0
min_samples = 30
start = "2024-01-01"
end = "2024-06-30"
show_after = "2024-12-15" # only display outliers merged after this date

# Individual repositories ─────────────────────────────────────────────────────

Expand Down
31 changes: 31 additions & 0 deletions src/review_classification/cli/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,15 @@ def _detect_single(
classify_start: str | None,
classify_end: str | None,
exclude_primary_merged: bool = False,
show_after: str | None = None,
) -> RepoClassifyResult:
"""Run outlier detection for a single repository."""
repo = GitHubRepo.from_string(repo_name)
full_name = f"{repo.owner}/{repo.name}"

classify_start_dt: datetime | None = None
classify_end_dt: datetime | None = None
show_after_dt: datetime | None = None

if classify_start:
classify_start_dt = datetime.strptime(classify_start, "%Y-%m-%d").replace(
Expand All @@ -119,6 +121,8 @@ def _detect_single(
classify_end_dt = datetime.strptime(classify_end, "%Y-%m-%d").replace(
hour=23, minute=59, second=59, tzinfo=UTC
)
if show_after:
show_after_dt = datetime.strptime(show_after, "%Y-%m-%d").replace(tzinfo=UTC)

exclude_branches = _PRIMARY_BRANCHES if exclude_primary_merged else None

Expand All @@ -130,6 +134,8 @@ def _detect_single(
start_label = classify_start or "unbounded"
end_label = classify_end or "unbounded"
typer.echo(f"Classification window: {start_label} to {end_label}")
if show_after_dt:
typer.echo(f"Showing outliers merged after: {show_after}")
if exclude_primary_merged:
branches = sorted(_PRIMARY_BRANCHES)
typer.echo(f"Excluding PRs merged into primary branches: {branches}")
Expand Down Expand Up @@ -192,6 +198,11 @@ def _detect_single(

save_outlier_scores(session, full_name, results, sample_size)

if show_after_dt:
results = [
r for r in results if r.merged_at and r.merged_at > show_after_dt
]
Comment thread
ghinks marked this conversation as resolved.

outliers = [r for r in results if r.is_outlier]

if verbose:
Expand Down Expand Up @@ -243,6 +254,7 @@ def _resolve_targets(
default_min_samples: int | None = None,
default_start: str | None = None,
default_end: str | None = None,
default_show_after: str | None = None,
) -> list[RepoConfig]:
"""Resolve CLI arguments and config file into a concrete list of repositories."""
from ..queries.github_client import get_org_repos
Expand Down Expand Up @@ -293,6 +305,11 @@ def add_repo(rc: RepoConfig) -> None:
org_cfg.start if org_cfg.start is not None else multi.start
),
end=(org_cfg.end if org_cfg.end is not None else multi.end),
show_after=(
org_cfg.show_after
if org_cfg.show_after is not None
else multi.show_after
),
)
add_repo(rc)

Expand All @@ -306,6 +323,7 @@ def add_repo(rc: RepoConfig) -> None:
min_samples=default_min_samples,
start=default_start,
end=default_end,
show_after=default_show_after,
)
add_repo(rc)

Expand All @@ -321,6 +339,7 @@ def add_repo(rc: RepoConfig) -> None:
min_samples=default_min_samples,
start=default_start,
end=default_end,
show_after=default_show_after,
)
add_repo(rc)

Expand Down Expand Up @@ -481,6 +500,16 @@ def classify(
help="Exclude PRs merged into the primary branch (main/master).",
),
] = False,
show_after: Annotated[
str | None,
typer.Option(
"--show-after",
help=(
"Only display outliers merged after this date (YYYY-MM-DD). "
"Stats baseline is unaffected — use this to focus output on recent PRs."
),
),
] = None,
) -> None:
"""Classify previously fetched PRs using z-score outlier analysis.

Expand Down Expand Up @@ -513,6 +542,7 @@ def classify(
default_min_samples=min_samples,
default_start=start,
default_end=end,
default_show_after=show_after,
)

repo_results = [
Expand All @@ -524,6 +554,7 @@ def classify(
target.start,
target.end,
exclude_primary_merged=exclude_primary_merged,
show_after=target.show_after,
)
for target in targets
]
Expand Down
9 changes: 9 additions & 0 deletions src/review_classification/cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class RepoConfig:
min_samples: int | None = None
start: str | None = None
end: str | None = None
show_after: str | None = None


@dataclass
Expand All @@ -29,6 +30,7 @@ class OrgConfig:
min_samples: int | None = None
start: str | None = None
end: str | None = None
show_after: str | None = None
exclude_repos: list[str] = field(default_factory=list)


Expand All @@ -49,6 +51,7 @@ class MultiRepoConfig:
min_samples: int = 30
start: str | None = None
end: str | None = None
show_after: str | None = None

def resolve(self, repo: RepoConfig) -> RepoConfig:
"""Return a RepoConfig with global defaults applied for unset fields.
Expand Down Expand Up @@ -77,6 +80,9 @@ def resolve(self, repo: RepoConfig) -> RepoConfig:
),
start=repo.start if repo.start is not None else self.start,
end=repo.end if repo.end is not None else self.end,
show_after=repo.show_after
if repo.show_after is not None
else self.show_after,
)


Expand Down Expand Up @@ -136,6 +142,7 @@ def parse_config_file(path: Path) -> MultiRepoConfig:
min_samples=int(raw_defaults.get("min_samples", 30)),
start=_optional_str(raw_defaults, "start"),
end=_optional_str(raw_defaults, "end"),
show_after=_optional_str(raw_defaults, "show_after"),
)
except (TypeError, ValueError) as e:
raise ValueError(f"Invalid value in [defaults]: {e}") from e
Expand Down Expand Up @@ -166,6 +173,7 @@ def parse_config_file(path: Path) -> MultiRepoConfig:
),
start=_optional_str(raw_repo, "start"),
end=_optional_str(raw_repo, "end"),
show_after=_optional_str(raw_repo, "show_after"),
)
except (TypeError, ValueError) as e:
raise ValueError(
Expand Down Expand Up @@ -208,6 +216,7 @@ def parse_config_file(path: Path) -> MultiRepoConfig:
),
start=_optional_str(raw_org, "start"),
end=_optional_str(raw_org, "end"),
show_after=_optional_str(raw_org, "show_after"),
exclude_repos=[str(x) for x in exclude],
)
except (TypeError, ValueError) as e:
Expand Down
Loading