Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ jobs:
run: pip install -e ".[dev]"

- name: Run migrations
run: alembic upgrade head
run: alembic -c alembic/alembic.ini upgrade head

- name: Run unit tests
run: |
Expand Down Expand Up @@ -419,7 +419,7 @@ jobs:
run: pip install -e ".[dev]"

- name: Run migrations
run: alembic upgrade head
run: alembic -c alembic/alembic.ini upgrade head

- name: Run integration tests
env:
Expand Down Expand Up @@ -491,6 +491,7 @@ jobs:
context: .
target: production
push: false
load: true
cache-from: type=gha
cache-to: type=gha,mode=max
tags: forge:prod-${{ github.sha }}
Expand Down
7 changes: 7 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
git \
nodejs \
npm \
ca-certificates \
gnupg \
&& install -m 0755 -d /etc/apt/keyrings \
&& curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg \
&& chmod a+r /etc/apt/keyrings/docker.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian $(. /etc/os-release && echo $VERSION_CODENAME) stable" > /etc/apt/sources.list.d/docker.list \
&& apt-get update && apt-get install -y --no-install-recommends docker-ce-cli \
&& rm -rf /var/lib/apt/lists/*

RUN groupadd --gid 1001 forge && \
Expand Down
2 changes: 2 additions & 0 deletions docker-compose.prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,15 @@ services:
--queues=default,commander,planner,reviewer,qa,release,security,builder,ingestion,skill_drills,ci_fixer
--concurrency=4
--max-tasks-per-child=100
user: root
environment:
PHALANX_WORKER: "1" # forces NullPool — prevents "Future attached to different loop" in forked workers
OPENAI_MODEL_REASONING: "gpt-4.1" # reasoning agents: Commander, Planner, QA, Reviewer, Release
volumes:
- forge-repos:/tmp/forge-repos
- ./configs:/app/configs:ro
- ./skill-registry:/app/skill-registry:ro
- /var/run/docker.sock:/var/run/docker.sock # required: CI fixer spawns sandbox containers
depends_on:
postgres:
condition: service_healthy
Expand Down
15 changes: 7 additions & 8 deletions docker/sandbox/go/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
FROM golang:1.22-alpine

# Install staticcheck and golangci-lint for broader Go lint coverage
RUN go install honnef.co/go/tools/cmd/staticcheck@2024.1.0 && \
go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.59.1
RUN apk add --no-cache git

# Create non-root user
RUN adduser -D -u 1000 phalanx
RUN go install honnef.co/go/tools/cmd/staticcheck@v0.5.1

COPY ../reset.sh /phalanx/reset.sh
RUN chmod +x /phalanx/reset.sh
RUN adduser -D -u 1000 phalanx

RUN mkdir -p /workspace && chown phalanx:phalanx /workspace
COPY reset.sh /phalanx/reset.sh
RUN chmod +x /phalanx/reset.sh && \
mkdir -p /workspace && \
chown phalanx /workspace

WORKDIR /workspace
USER phalanx
13 changes: 6 additions & 7 deletions docker/sandbox/node/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
FROM node:20-slim

# Create non-root user
RUN useradd -m -u 1000 -s /bin/bash phalanx 2>/dev/null || true
RUN usermod -l phalanx -d /home/phalanx -m node && \
groupmod -n phalanx node 2>/dev/null || true

# Install common Node tooling used by the CI fixer.
RUN npm install -g \
eslint@8.57.0 \
typescript@5.4.5 \
jest@29.7.0 \
--no-fund --no-audit

COPY ../reset.sh /phalanx/reset.sh
RUN chmod +x /phalanx/reset.sh

RUN mkdir -p /workspace && chown phalanx:phalanx /workspace
COPY reset.sh /phalanx/reset.sh
RUN chmod +x /phalanx/reset.sh && \
mkdir -p /workspace && \
chown phalanx:phalanx /workspace 2>/dev/null || chown phalanx /workspace

WORKDIR /workspace
USER phalanx
13 changes: 12 additions & 1 deletion phalanx/agents/ci_fixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1460,7 +1460,18 @@ async def _update_fingerprint_on_success(
# ── Auth helpers ────────────────────────────────────────────────────────────

def _decrypt_key(self, encrypted_key: str) -> str:
return encrypted_key # Phase 2: KMS decrypt
if not encrypted_key:
return ""
enc_key = getattr(settings, "encryption_key", None)
if not enc_key:
return encrypted_key
try:
from cryptography.fernet import Fernet, InvalidToken # noqa: PLC0415

f = Fernet(enc_key.encode())
return f.decrypt(encrypted_key.encode()).decode()
except (InvalidToken, Exception):
return encrypted_key

def _get_github_token(self, integration: CIIntegration) -> str:
return integration.github_token or settings.github_token
Expand Down
60 changes: 38 additions & 22 deletions phalanx/ci_fixer/analyst.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,9 @@ def is_actionable(self) -> bool:
"corrected_lines" (a JSON array of strings, one per line, each ending with \\n).
3. "corrected_lines" may differ from the original window by at most \
{max_line_delta} lines (adding or removing). Do NOT rewrite the whole file.
4. NEVER modify test files (paths containing /test or test_).
5. For unused imports (F401): delete the import line only.
4. NEVER rewrite test logic — only mechanical lint fixes (unused imports, line length)
in test files are allowed. Do NOT change assertions, test structure, or test data.
5. For unused imports (F401): delete the import line only. Set corrected_lines to [].
6. For line-too-long (E501): wrap or shorten the line only.
7. For future-import order (F404): move the __future__ import to line 1 only.
8. If you cannot produce a high or medium confidence fix, set \
Expand Down Expand Up @@ -205,11 +206,18 @@ def analyze(
root_cause="Could not read any of the failing files from workspace",
)

lint_only = bool(
parsed_log.lint_errors
and not parsed_log.type_errors
and not parsed_log.test_failures
and not parsed_log.build_errors
)

# ── Phase 2: history check ─────────────────────────────────────────────
if fingerprint_hash and self._history_lookup is not None:
cached = self._history_lookup(fingerprint_hash)
if cached:
patches = self._parse_and_validate_patches(cached, windows)
patches = self._parse_and_validate_patches(cached, windows, lint_only=lint_only)
if patches:
log.info(
"ci_analyst.history_hit",
Expand Down Expand Up @@ -258,7 +266,9 @@ def analyze(
log.warning("ci_analyst.json_parse_failed", error=str(exc), raw=raw[:500])
return FixPlan(confidence="low", root_cause="LLM returned non-JSON response")

patches = self._parse_and_validate_patches(data.get("patches", []), windows)
patches = self._parse_and_validate_patches(
data.get("patches", []), windows, lint_only=lint_only
)
confidence = data.get("confidence", "low")

# If patch validation rejected everything, downgrade to low
Expand All @@ -283,10 +293,10 @@ def _read_windows(self, workspace: Path, parsed_log: ParsedLog) -> list[FileWind
"""
# Build map: file_path → list of error line numbers
error_lines_by_file: dict[str, list[int]] = {}
for e in parsed_log.lint_errors:
error_lines_by_file.setdefault(e.file, []).append(e.line)
for e in parsed_log.type_errors:
error_lines_by_file.setdefault(e.file, []).append(e.line)
for le in parsed_log.lint_errors:
error_lines_by_file.setdefault(le.file, []).append(le.line)
for te in parsed_log.type_errors:
error_lines_by_file.setdefault(te.file, []).append(te.line)
# For test failures we have no line number — read top of file
for f in parsed_log.test_failures:
error_lines_by_file.setdefault(f.file, []).append(1)
Expand Down Expand Up @@ -333,7 +343,7 @@ def _read_windows(self, workspace: Path, parsed_log: ParsedLog) -> list[FileWind
# ── Patch validation ───────────────────────────────────────────────────────

def _parse_and_validate_patches(
self, raw_patches: list, windows: list[FileWindow]
self, raw_patches: list, windows: list[FileWindow], lint_only: bool = False
) -> list[FilePatch]:
"""
Parse LLM patch dicts, apply guard rails, return only safe patches.
Expand All @@ -342,7 +352,8 @@ def _parse_and_validate_patches(
- path not in the windows we sent (LLM invented a file)
- start_line / end_line don't match the window we sent (off-by-more-than-2)
- |delta| > _MAX_LINE_DELTA (LLM rewrote too much)
- path looks like a test file
- path looks like a test file (unless lint_only=True — lint fixes in test
files are valid, e.g. removing unused imports)
"""
window_by_path = {w.path: w for w in windows}
safe: list[FilePatch] = []
Expand All @@ -358,14 +369,14 @@ def _parse_and_validate_patches(
log.warning("ci_analyst.patch_unknown_file", path=path)
continue

# Guard: never touch test files
if _is_test_file(path):
# Guard: never touch test files unless it's a lint-only fix
if _is_test_file(path) and not lint_only:
log.warning("ci_analyst.patch_test_file_rejected", path=path)
continue

# Guard: corrected_lines must be a non-empty list of strings
if not isinstance(corrected, list) or not corrected:
log.warning("ci_analyst.patch_empty_corrected_lines", path=path)
# Guard: corrected_lines must be a list (empty = delete the lines)
if not isinstance(corrected, list):
log.warning("ci_analyst.patch_invalid_corrected_lines", path=path)
continue

# Ensure every line ends with \n
Expand All @@ -379,18 +390,23 @@ def _parse_and_validate_patches(
log.warning("ci_analyst.patch_missing_line_range", path=path)
continue

if abs(start - window.start_line) > 2 or abs(end - window.end_line) > 2:
# Accept sub-ranges (LLM targeting a specific line within the window is correct).
# Clamp to window bounds if the patch extends outside.
if start < window.start_line or end > window.end_line:
log.warning(
"ci_analyst.patch_line_range_mismatch",
"ci_analyst.patch_line_range_outside_window",
path=path,
expected_start=window.start_line,
expected_end=window.end_line,
window_start=window.start_line,
window_end=window.end_line,
got_start=start,
got_end=end,
)
# Clamp to the window we actually sent — safer than rejecting
start = window.start_line
end = window.end_line
start = window.start_line if start < window.start_line else start
end = window.end_line if end > window.end_line else end
# If clamping made start > end, fall back to the full window
if start > end:
start = window.start_line
end = window.end_line

original_size = end - start + 1
delta = len(corrected) - original_size
Expand Down
Loading
Loading