From c4f36add907cbf1b0bdcb6b4db036742b516c8a5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 11 Apr 2026 10:40:41 +0000 Subject: [PATCH 1/5] Add server/app.py shim at repo root for openenv multi-mode deployment validation Agent-Logs-Url: https://github.com/bigturtle679/Contract-Negotiation-Environment/sessions/5b9f0f08-3886-4dd3-b647-1a1eb0e56ee8 Co-authored-by: AbeerChaturvedi <171315954+AbeerChaturvedi@users.noreply.github.com> --- server/__init__.py | 0 server/app.py | 7 +++++++ 2 files changed, 7 insertions(+) create mode 100644 server/__init__.py create mode 100644 server/app.py diff --git a/server/__init__.py b/server/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/app.py b/server/app.py new file mode 100644 index 0000000..65bc94e --- /dev/null +++ b/server/app.py @@ -0,0 +1,7 @@ +"""Re-export the FastAPI app for multi-mode deployment compatibility. + +The openenv validator expects ``server/app.py`` at the repository root. +The canonical implementation lives in ``contract_env.server.app``. +""" + +from contract_env.server.app import app # noqa: F401 From 89c56e5cad63da8b2cf4dba55cab248151b95dc5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 11 Apr 2026 11:04:00 +0000 Subject: [PATCH 2/5] Fix CORS whitespace stripping, sanitise /reset error handler, increase HEALTHCHECK timeout, widen negation window, document API_KEY fallback Agent-Logs-Url: https://github.com/bigturtle679/Contract-Negotiation-Environment/sessions/0a8bea2f-6d2d-41b2-a461-42d97bf13e68 Co-authored-by: AbeerChaturvedi <171315954+AbeerChaturvedi@users.noreply.github.com> --- Dockerfile | 2 +- README.md | 2 +- contract_env/env/graders.py | 4 ++-- contract_env/server/app.py | 7 ++++--- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6b2cce1..657856e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,7 +19,7 @@ USER appuser EXPOSE 7860 -HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \ +HEALTHCHECK --interval=30s --timeout=15s --start-period=10s --retries=3 \ CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:7860/health', timeout=3)" CMD ["uvicorn", "contract_env.server.app:app", "--host", "0.0.0.0", "--port", "7860"] \ No newline at end of file diff --git a/README.md b/README.md index 7d27ade..82f5ecc 100644 --- a/README.md +++ b/README.md @@ -251,7 +251,7 @@ python inference.py --benchmark --mode api | Variable | Required | Default | Description | |----------|----------|---------|-------------| -| `HF_TOKEN` | Yes | — | HuggingFace / LLM API key | +| `HF_TOKEN` | Yes | — | HuggingFace / LLM API key (falls back to `API_KEY` if unset) | | `API_BASE_URL` | No | `https://router.huggingface.co/v1` | LLM API endpoint | | `MODEL_NAME` | No | `Qwen/Qwen2.5-72B-Instruct` | Model identifier | | `BENCHMARK` | No | `contract_negotiation` | Benchmark name in [START] log line | diff --git a/contract_env/env/graders.py b/contract_env/env/graders.py index 2e3728e..d18b7f5 100644 --- a/contract_env/env/graders.py +++ b/contract_env/env/graders.py @@ -67,8 +67,8 @@ def _is_negated(text_lower: str, keyword_lower: str) -> bool: if pos == -1: break found_any = True - # Check the 60-character window before the match for negation cues - window_start = max(0, pos - 60) + # Check the 150-character window before the match for negation cues + window_start = max(0, pos - 150) preceding = text_lower[window_start:pos] if not any(neg in preceding for neg in _NEGATION_PREFIXES): all_negated = False diff --git a/contract_env/server/app.py b/contract_env/server/app.py index 97e77fa..d468cb3 100644 --- a/contract_env/server/app.py +++ b/contract_env/server/app.py @@ -41,7 +41,7 @@ class EvaluateQualityRequest(BaseModel): app.add_middleware( CORSMiddleware, - allow_origins=os.getenv("CORS_ORIGINS", "*").split(","), + allow_origins=[o.strip() for o in os.getenv("CORS_ORIGINS", "*").split(",")], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], @@ -124,8 +124,9 @@ def reset(body: Optional[ResetRequest] = None): return {"observation": obs.model_dump()} except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) + except Exception: + logger.exception("Unexpected error during /reset") + raise HTTPException(status_code=500, detail="Internal server error") # ── STEP ──────────────────────────────────────────────────────────────── From 050d680aa6c7e80ecb0e691a2e1fc6e6c4e9121c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 11 Apr 2026 11:10:46 +0000 Subject: [PATCH 3/5] Fix step-after-done reward violation, add missing requests dep, add root .dockerignore, copy docker-compose.yml to root Agent-Logs-Url: https://github.com/bigturtle679/Contract-Negotiation-Environment/sessions/7afa6eb2-2d45-4385-a8a3-6e84d4609368 Co-authored-by: AbeerChaturvedi <171315954+AbeerChaturvedi@users.noreply.github.com> --- .dockerignore | 18 ++++++++++++++++++ contract_env/env/environment.py | 2 +- docker-compose.yml | 10 ++++++++++ pyproject.toml | 3 ++- requirements.txt | 1 + 5 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 .dockerignore create mode 100644 docker-compose.yml diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..d806617 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,18 @@ +.git +.github +.gitignore +.venv +venv +__pycache__ +*.py[cod] +*.egg-info +.pytest_cache +.mypy_cache +.cursor +uv.lock +hf_create.py +verify_graders.py +contract_env/tests +contract_env/scripts +contract_env/.dockerignore +contract_env/docker-compose.yml diff --git a/contract_env/env/environment.py b/contract_env/env/environment.py index 4dfd20f..32cb4f0 100644 --- a/contract_env/env/environment.py +++ b/contract_env/env/environment.py @@ -154,7 +154,7 @@ def step(self, action: Action) -> Tuple[Observation, float, bool, dict[str, Any] info: dict[str, Any] = {} if self.done: - return self._make_observation(), 0.0, True, {"error": "already_done"} + return self._make_observation(), 0.001, True, {"error": "already_done"} err = self._validate_action(action) if err: diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..91b71ba --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,10 @@ +services: + contract-env: + build: . + image: contract-negotiation-env:latest + ports: + - "7860:7860" + environment: + HOST: "0.0.0.0" + PORT: "7860" + HF_TOKEN: "${HF_TOKEN:-}" diff --git a/pyproject.toml b/pyproject.toml index cc655a5..172ffb2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,8 @@ dependencies = [ "fastapi>=0.115.0", "uvicorn[standard]>=0.32.0", "openai>=1.50.0", - "openenv>=0.1.13" + "openenv>=0.1.13", + "requests>=2.31.0" ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index 0280db9..76b935d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ fastapi>=0.115.0 uvicorn[standard]>=0.32.0 openai>=1.50.0 openenv>=0.1.13 +requests>=2.31.0 From 304c8205c6ef98bd093c368d7656fad9cc488fa5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 11 Apr 2026 11:20:43 +0000 Subject: [PATCH 4/5] Clean up remaining code quality issues: derive _VALID_ACTIONS from ActionType, remove dead code, add return type annotations, add /step exception handling Agent-Logs-Url: https://github.com/bigturtle679/Contract-Negotiation-Environment/sessions/ed6d217f-adb1-43f0-bad7-d360d5c3a800 Co-authored-by: AbeerChaturvedi <171315954+AbeerChaturvedi@users.noreply.github.com> --- contract_env/env/graders.py | 2 +- contract_env/server/app.py | 11 +++++++---- inference.py | 10 +++++----- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/contract_env/env/graders.py b/contract_env/env/graders.py index d18b7f5..872db64 100644 --- a/contract_env/env/graders.py +++ b/contract_env/env/graders.py @@ -239,7 +239,7 @@ def evaluate_action( # Completeness bonus: reward rewrites that include required legal elements # defined on the task (if any). completeness = 0.0 - required_elems: list[str] = getattr(task, "required_elements", []) + required_elems = task.required_elements if action.action_type in ("EDIT_CLAUSE", "PROPOSE_COUNTER") and content and required_elems: completeness = clause_completeness_score(content, required_elems) diff --git a/contract_env/server/app.py b/contract_env/server/app.py index d468cb3..f2d5ed0 100644 --- a/contract_env/server/app.py +++ b/contract_env/server/app.py @@ -2,7 +2,7 @@ import logging import os -from typing import Optional +from typing import Any, Optional from fastapi import FastAPI, HTTPException, Request from fastapi.exceptions import RequestValidationError @@ -112,7 +112,7 @@ def get_state(): # ── RESET ─────────────────────────────────────────────────────────────── @app.post("/reset") -def reset(body: Optional[ResetRequest] = None): +def reset(body: Optional[ResetRequest] = None) -> dict[str, Any]: """Start a new episode. Optionally pass ``{"task_id": "..."}`` to target a specific task; @@ -131,7 +131,7 @@ def reset(body: Optional[ResetRequest] = None): # ── STEP ──────────────────────────────────────────────────────────────── @app.post("/step") -def step(req: StepRequest): +def step(req: StepRequest) -> dict[str, Any]: try: action = Action(action_type=req.action_type, content=req.content) obs, reward, done, info = _env.step(action) @@ -145,6 +145,9 @@ def step(req: StepRequest): except ValidationError as e: raise HTTPException(status_code=422, detail=e.errors()) + except Exception: + logger.exception("Unexpected error during /step") + raise HTTPException(status_code=500, detail="Internal server error") # ── SCHEMA ────────────────────────────────────────────────────────────── @@ -161,7 +164,7 @@ def get_schema(): # ── EVALUATE QUALITY ───────────────────────────────────────────────────── @app.post("/evaluate-quality") -def evaluate_quality(body: EvaluateQualityRequest): +def evaluate_quality(body: EvaluateQualityRequest) -> dict[str, float]: """Score an arbitrary contract text against the current task. Body: {"contract_text": "..."} diff --git a/inference.py b/inference.py index a88b986..a5cdaa2 100644 --- a/inference.py +++ b/inference.py @@ -26,7 +26,7 @@ import os import random import re -from typing import Any, Optional +from typing import Any, Optional, get_args try: from dotenv import load_dotenv @@ -40,9 +40,10 @@ def load_dotenv(*_a: Any, **_kw: Any) -> None: # type: ignore[misc] from contract_env.env.graders import ( effective_risk_high, keyword_match_score, + observation_risk_float, trap_unresolved, ) -from contract_env.env.models import Action +from contract_env.env.models import Action, ActionType from contract_env.env.tasks import TASKS, NegotiationTask log = logging.getLogger(__name__) @@ -139,7 +140,7 @@ def _llm_chat( if attempt == _MAX_RETRIES: raise log.warning("[DEBUG] LLM call attempt %d failed: %s", attempt + 1, exc) - return "" + raise RuntimeError("LLM call failed after all retries") # unreachable; satisfies type-checker def _parse_llm_json(text: str) -> Optional[dict]: @@ -273,7 +274,7 @@ def _build_rewrite_prompt( ] -_VALID_ACTIONS = {"FLAG_RISK", "EDIT_CLAUSE", "ACCEPT", "REJECT", "PROPOSE_COUNTER"} +_VALID_ACTIONS = set(get_args(ActionType)) # Optimal action sequences per intent level for rule-based fallback. # MODERATE tasks front-load PROPOSE_COUNTER since it's the ideal action for @@ -455,7 +456,6 @@ def _choose( # ── 4d. Smart ACCEPT gate: only accept when quality actually improved ─ if action_type == "ACCEPT": - from contract_env.env.graders import observation_risk_float current_risk = observation_risk_float(task, contract_text) original_risk = observation_risk_float(task, task.contract_text) # Block acceptance if the contract hasn't improved meaningfully From 55609fe9ba3dd7171a4af74335625c2826ceca9e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 11 Apr 2026 11:28:03 +0000 Subject: [PATCH 5/5] Update inference.py to sample format: add LOCAL_IMAGE_NAME, fix _HTTPEnvClient.reset() task_id support, add resource cleanup Agent-Logs-Url: https://github.com/bigturtle679/Contract-Negotiation-Environment/sessions/b833f101-d40f-4799-9ba4-ad246091892b Co-authored-by: AbeerChaturvedi <171315954+AbeerChaturvedi@users.noreply.github.com> --- README.md | 1 + inference.py | 59 +++++++++++++++++++++++++++++++++------------------- 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 82f5ecc..a46d24b 100644 --- a/README.md +++ b/README.md @@ -254,6 +254,7 @@ python inference.py --benchmark --mode api | `HF_TOKEN` | Yes | — | HuggingFace / LLM API key (falls back to `API_KEY` if unset) | | `API_BASE_URL` | No | `https://router.huggingface.co/v1` | LLM API endpoint | | `MODEL_NAME` | No | `Qwen/Qwen2.5-72B-Instruct` | Model identifier | +| `LOCAL_IMAGE_NAME` | No | `contract-negotiation-env` | Docker image name for `from_docker_image()` client usage | | `BENCHMARK` | No | `contract_negotiation` | Benchmark name in [START] log line | | `ENV_SERVER_URL` | No | `http://localhost:7860` | Docker server URL (for `--mode api`) | | `PORT` | No | `7860` | Server port | diff --git a/inference.py b/inference.py index a5cdaa2..8e2eedc 100644 --- a/inference.py +++ b/inference.py @@ -1,13 +1,19 @@ """ -Inference Script — Contract Negotiation Environment -===================================================== -LLM-driven agent that analyses contract clauses, identifies legal risks, -and proposes safer alternatives through multi-turn negotiation. - -MANDATORY environment variables: - API_BASE_URL The API endpoint for the LLM. - MODEL_NAME The model identifier to use for inference. - HF_TOKEN Your Hugging Face / API key. +Inference Script Example +=================================== +MANDATORY +- Before submitting, ensure the following variables are defined in your + environment configuration: + API_BASE_URL The API endpoint for the LLM. + MODEL_NAME The model identifier to use for inference. + HF_TOKEN Your Hugging Face / API key. + LOCAL_IMAGE_NAME The name of the local Docker image to use for the + environment if you are using from_docker_image() method. + +- Defaults are set only for API_BASE_URL and MODEL_NAME + (and should reflect your active inference setup): + API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1") + MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct") STDOUT FORMAT (strictly followed): [START] task= env= model= @@ -54,6 +60,7 @@ def load_dotenv(*_a: Any, **_kw: Any) -> None: # type: ignore[misc] HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY") BENCHMARK = os.getenv("BENCHMARK", "contract_negotiation") ENV_SERVER_URL = os.getenv("ENV_SERVER_URL", "http://localhost:7860") +LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME", "contract-negotiation-env") MAX_STEPS = 10 SUCCESS_SCORE_THRESHOLD = 0.5 HISTORY_WINDOW = 8 # How many recent history entries to show the LLM @@ -518,20 +525,25 @@ def __enter__(self): def __exit__(self, *exc: Any) -> None: self.close() - def reset(self): - resp = self._session.post(f"{self.base_url}/reset", timeout=self._timeout) + def reset(self, task_id: Optional[str] = None): + body: dict[str, Any] = {} + if task_id is not None: + body["task_id"] = task_id + resp = self._session.post( + f"{self.base_url}/reset", json=body or None, timeout=self._timeout, + ) resp.raise_for_status() data = resp.json() obs = data["observation"] # Map to a NegotiationTask if possible (for _choose() to use) - task_id = None + resolved_task_id = None try: state = self._session.get(f"{self.base_url}/state", timeout=self._timeout).json() - task_id = state.get("task_id") + resolved_task_id = state.get("task_id") except Exception: pass - if task_id: - self.current_task = next((t for t in TASKS if t.id == task_id), None) + if resolved_task_id: + self.current_task = next((t for t in TASKS if t.id == resolved_task_id), None) if self.current_task is None: self.current_task = TASKS[self._task_idx % len(TASKS)] self._task_idx += 1 @@ -568,16 +580,12 @@ def run_episode(env, task_id: Optional[str] = None) -> tuple[float, str]: Args: env: Environment instance (ContractEnv or _HTTPEnvClient). - task_id: If given, reset to this specific task (local mode only). + task_id: If given, reset to this specific task. Returns (mean_episode_score, task_id). """ if task_id is not None: - try: - obs_obj = env.reset(task_id=task_id) - except TypeError: - # env.reset() doesn't accept task_id (e.g., _HTTPEnvClient) - obs_obj = env.reset() + obs_obj = env.reset(task_id=task_id) else: obs_obj = env.reset() task = env.current_task @@ -690,6 +698,15 @@ def main() -> None: env = ContractEnv() print("[CONFIG] mode=local", flush=True) + try: + _run_episodes(env, args) + finally: + if hasattr(env, "close"): + env.close() + + +def _run_episodes(env, args) -> None: + """Execute episode loop, retries, and print summary.""" episodes_to_run = len(TASKS) if args.benchmark else args.episodes total_score = 0.0