From c4f36add907cbf1b0bdcb6b4db036742b516c8a5 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 11 Apr 2026 10:40:41 +0000
Subject: [PATCH 1/5] Add server/app.py shim at repo root for openenv
 multi-mode deployment validation

Agent-Logs-Url: https://github.com/bigturtle679/Contract-Negotiation-Environment/sessions/5b9f0f08-3886-4dd3-b647-1a1eb0e56ee8

Co-authored-by: AbeerChaturvedi <171315954+AbeerChaturvedi@users.noreply.github.com>
---
 server/__init__.py | 0
 server/app.py      | 7 +++++++
 2 files changed, 7 insertions(+)
 create mode 100644 server/__init__.py
 create mode 100644 server/app.py

diff --git a/server/__init__.py b/server/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/server/app.py b/server/app.py
new file mode 100644
index 0000000..65bc94e
--- /dev/null
+++ b/server/app.py
@@ -0,0 +1,7 @@
+"""Re-export the FastAPI app for multi-mode deployment compatibility.
+
+The openenv validator expects ``server/app.py`` at the repository root.
+The canonical implementation lives in ``contract_env.server.app``.
+"""
+
+from contract_env.server.app import app  # noqa: F401

From 89c56e5cad63da8b2cf4dba55cab248151b95dc5 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 11 Apr 2026 11:04:00 +0000
Subject: [PATCH 2/5] Fix CORS whitespace stripping, sanitise /reset error
 handler, increase HEALTHCHECK timeout, widen negation window, document
 API_KEY fallback

Agent-Logs-Url: https://github.com/bigturtle679/Contract-Negotiation-Environment/sessions/0a8bea2f-6d2d-41b2-a461-42d97bf13e68

Co-authored-by: AbeerChaturvedi <171315954+AbeerChaturvedi@users.noreply.github.com>
---
 Dockerfile                  | 2 +-
 README.md                   | 2 +-
 contract_env/env/graders.py | 4 ++--
 contract_env/server/app.py  | 7 ++++---
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 6b2cce1..657856e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -19,7 +19,7 @@ USER appuser
 
 EXPOSE 7860
 
-HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
+HEALTHCHECK --interval=30s --timeout=15s --start-period=10s --retries=3 \
     CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:7860/health', timeout=3)"
 
 CMD ["uvicorn", "contract_env.server.app:app", "--host", "0.0.0.0", "--port", "7860"]
\ No newline at end of file
diff --git a/README.md b/README.md
index 7d27ade..82f5ecc 100644
--- a/README.md
+++ b/README.md
@@ -251,7 +251,7 @@ python inference.py --benchmark --mode api
 
 | Variable | Required | Default | Description |
 |----------|----------|---------|-------------|
-| `HF_TOKEN` | Yes | — | HuggingFace / LLM API key |
+| `HF_TOKEN` | Yes | — | HuggingFace / LLM API key (falls back to `API_KEY` if unset) |
 | `API_BASE_URL` | No | `https://router.huggingface.co/v1` | LLM API endpoint |
 | `MODEL_NAME` | No | `Qwen/Qwen2.5-72B-Instruct` | Model identifier |
 | `BENCHMARK` | No | `contract_negotiation` | Benchmark name in [START] log line |
diff --git a/contract_env/env/graders.py b/contract_env/env/graders.py
index 2e3728e..d18b7f5 100644
--- a/contract_env/env/graders.py
+++ b/contract_env/env/graders.py
@@ -67,8 +67,8 @@ def _is_negated(text_lower: str, keyword_lower: str) -> bool:
         if pos == -1:
             break
         found_any = True
-        # Check the 60-character window before the match for negation cues
-        window_start = max(0, pos - 60)
+        # Check the 150-character window before the match for negation cues
+        window_start = max(0, pos - 150)
         preceding = text_lower[window_start:pos]
         if not any(neg in preceding for neg in _NEGATION_PREFIXES):
             all_negated = False
diff --git a/contract_env/server/app.py b/contract_env/server/app.py
index 97e77fa..d468cb3 100644
--- a/contract_env/server/app.py
+++ b/contract_env/server/app.py
@@ -41,7 +41,7 @@ class EvaluateQualityRequest(BaseModel):
 
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=os.getenv("CORS_ORIGINS", "*").split(","),
+    allow_origins=[o.strip() for o in os.getenv("CORS_ORIGINS", "*").split(",")],
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
@@ -124,8 +124,9 @@ def reset(body: Optional[ResetRequest] = None):
         return {"observation": obs.model_dump()}
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
+    except Exception:
+        logger.exception("Unexpected error during /reset")
+        raise HTTPException(status_code=500, detail="Internal server error")
 
 
 # ── STEP ────────────────────────────────────────────────────────────────

From 050d680aa6c7e80ecb0e691a2e1fc6e6c4e9121c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 11 Apr 2026 11:10:46 +0000
Subject: [PATCH 3/5] Fix step-after-done reward violation, add missing
 requests dep, add root .dockerignore, copy docker-compose.yml to root

Agent-Logs-Url: https://github.com/bigturtle679/Contract-Negotiation-Environment/sessions/7afa6eb2-2d45-4385-a8a3-6e84d4609368

Co-authored-by: AbeerChaturvedi <171315954+AbeerChaturvedi@users.noreply.github.com>
---
 .dockerignore                   | 18 ++++++++++++++++++
 contract_env/env/environment.py |  2 +-
 docker-compose.yml              | 10 ++++++++++
 pyproject.toml                  |  3 ++-
 requirements.txt                |  1 +
 5 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 .dockerignore
 create mode 100644 docker-compose.yml

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..d806617
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,18 @@
+.git
+.github
+.gitignore
+.venv
+venv
+__pycache__
+*.py[cod]
+*.egg-info
+.pytest_cache
+.mypy_cache
+.cursor
+uv.lock
+hf_create.py
+verify_graders.py
+contract_env/tests
+contract_env/scripts
+contract_env/.dockerignore
+contract_env/docker-compose.yml
diff --git a/contract_env/env/environment.py b/contract_env/env/environment.py
index 4dfd20f..32cb4f0 100644
--- a/contract_env/env/environment.py
+++ b/contract_env/env/environment.py
@@ -154,7 +154,7 @@ def step(self, action: Action) -> Tuple[Observation, float, bool, dict[str, Any]
         info: dict[str, Any] = {}
 
         if self.done:
-            return self._make_observation(), 0.0, True, {"error": "already_done"}
+            return self._make_observation(), 0.001, True, {"error": "already_done"}
 
         err = self._validate_action(action)
         if err:
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..91b71ba
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,10 @@
+services:
+  contract-env:
+    build: .
+    image: contract-negotiation-env:latest
+    ports:
+      - "7860:7860"
+    environment:
+      HOST: "0.0.0.0"
+      PORT: "7860"
+      HF_TOKEN: "${HF_TOKEN:-}"
diff --git a/pyproject.toml b/pyproject.toml
index cc655a5..172ffb2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,7 +15,8 @@ dependencies = [
     "fastapi>=0.115.0",
     "uvicorn[standard]>=0.32.0",
     "openai>=1.50.0",
-    "openenv>=0.1.13"
+    "openenv>=0.1.13",
+    "requests>=2.31.0"
 ]
 
 [project.optional-dependencies]
diff --git a/requirements.txt b/requirements.txt
index 0280db9..76b935d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ fastapi>=0.115.0
 uvicorn[standard]>=0.32.0
 openai>=1.50.0
 openenv>=0.1.13
+requests>=2.31.0

From 304c8205c6ef98bd093c368d7656fad9cc488fa5 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 11 Apr 2026 11:20:43 +0000
Subject: [PATCH 4/5] Clean up remaining code quality issues: derive
 _VALID_ACTIONS from ActionType, remove dead code, add return type
 annotations, add /step exception handling

Agent-Logs-Url: https://github.com/bigturtle679/Contract-Negotiation-Environment/sessions/ed6d217f-adb1-43f0-bad7-d360d5c3a800

Co-authored-by: AbeerChaturvedi <171315954+AbeerChaturvedi@users.noreply.github.com>
---
 contract_env/env/graders.py |  2 +-
 contract_env/server/app.py  | 11 +++++++----
 inference.py                | 10 +++++-----
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/contract_env/env/graders.py b/contract_env/env/graders.py
index d18b7f5..872db64 100644
--- a/contract_env/env/graders.py
+++ b/contract_env/env/graders.py
@@ -239,7 +239,7 @@ def evaluate_action(
     # Completeness bonus: reward rewrites that include required legal elements
     # defined on the task (if any).
     completeness = 0.0
-    required_elems: list[str] = getattr(task, "required_elements", [])
+    required_elems = task.required_elements
     if action.action_type in ("EDIT_CLAUSE", "PROPOSE_COUNTER") and content and required_elems:
         completeness = clause_completeness_score(content, required_elems)
 
diff --git a/contract_env/server/app.py b/contract_env/server/app.py
index d468cb3..f2d5ed0 100644
--- a/contract_env/server/app.py
+++ b/contract_env/server/app.py
@@ -2,7 +2,7 @@
 
 import logging
 import os
-from typing import Optional
+from typing import Any, Optional
 
 from fastapi import FastAPI, HTTPException, Request
 from fastapi.exceptions import RequestValidationError
@@ -112,7 +112,7 @@ def get_state():
 
 # ── RESET ───────────────────────────────────────────────────────────────
 @app.post("/reset")
-def reset(body: Optional[ResetRequest] = None):
+def reset(body: Optional[ResetRequest] = None) -> dict[str, Any]:
     """Start a new episode.
 
     Optionally pass ``{"task_id": "..."}`` to target a specific task;
@@ -131,7 +131,7 @@ def reset(body: Optional[ResetRequest] = None):
 
 # ── STEP ────────────────────────────────────────────────────────────────
 @app.post("/step")
-def step(req: StepRequest):
+def step(req: StepRequest) -> dict[str, Any]:
     try:
         action = Action(action_type=req.action_type, content=req.content)
         obs, reward, done, info = _env.step(action)
@@ -145,6 +145,9 @@ def step(req: StepRequest):
 
     except ValidationError as e:
         raise HTTPException(status_code=422, detail=e.errors())
+    except Exception:
+        logger.exception("Unexpected error during /step")
+        raise HTTPException(status_code=500, detail="Internal server error")
 
 
 # ── SCHEMA ──────────────────────────────────────────────────────────────
@@ -161,7 +164,7 @@ def get_schema():
 
 # ── EVALUATE QUALITY ─────────────────────────────────────────────────────
 @app.post("/evaluate-quality")
-def evaluate_quality(body: EvaluateQualityRequest):
+def evaluate_quality(body: EvaluateQualityRequest) -> dict[str, float]:
     """Score an arbitrary contract text against the current task.
 
     Body: {"contract_text": "..."}
diff --git a/inference.py b/inference.py
index a88b986..a5cdaa2 100644
--- a/inference.py
+++ b/inference.py
@@ -26,7 +26,7 @@
 import os
 import random
 import re
-from typing import Any, Optional
+from typing import Any, Optional, get_args
 
 try:
     from dotenv import load_dotenv
@@ -40,9 +40,10 @@ def load_dotenv(*_a: Any, **_kw: Any) -> None:  # type: ignore[misc]
 from contract_env.env.graders import (
     effective_risk_high,
     keyword_match_score,
+    observation_risk_float,
     trap_unresolved,
 )
-from contract_env.env.models import Action
+from contract_env.env.models import Action, ActionType
 from contract_env.env.tasks import TASKS, NegotiationTask
 
 log = logging.getLogger(__name__)
@@ -139,7 +140,7 @@ def _llm_chat(
             if attempt == _MAX_RETRIES:
                 raise
             log.warning("[DEBUG] LLM call attempt %d failed: %s", attempt + 1, exc)
-    return ""
+    raise RuntimeError("LLM call failed after all retries")  # unreachable; satisfies type-checker
 
 
 def _parse_llm_json(text: str) -> Optional[dict]:
@@ -273,7 +274,7 @@ def _build_rewrite_prompt(
     ]
 
 
-_VALID_ACTIONS = {"FLAG_RISK", "EDIT_CLAUSE", "ACCEPT", "REJECT", "PROPOSE_COUNTER"}
+_VALID_ACTIONS = set(get_args(ActionType))
 
 # Optimal action sequences per intent level for rule-based fallback.
 # MODERATE tasks front-load PROPOSE_COUNTER since it's the ideal action for
@@ -455,7 +456,6 @@ def _choose(
 
     # ── 4d. Smart ACCEPT gate: only accept when quality actually improved ─
     if action_type == "ACCEPT":
-        from contract_env.env.graders import observation_risk_float
         current_risk = observation_risk_float(task, contract_text)
         original_risk = observation_risk_float(task, task.contract_text)
         # Block acceptance if the contract hasn't improved meaningfully

From 55609fe9ba3dd7171a4af74335625c2826ceca9e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 11 Apr 2026 11:28:03 +0000
Subject: [PATCH 5/5] Update inference.py to sample format: add
 LOCAL_IMAGE_NAME, fix _HTTPEnvClient.reset() task_id support, add resource
 cleanup

Agent-Logs-Url: https://github.com/bigturtle679/Contract-Negotiation-Environment/sessions/b833f101-d40f-4799-9ba4-ad246091892b

Co-authored-by: AbeerChaturvedi <171315954+AbeerChaturvedi@users.noreply.github.com>
---
 README.md    |  1 +
 inference.py | 59 +++++++++++++++++++++++++++++++++-------------------
 2 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 82f5ecc..a46d24b 100644
--- a/README.md
+++ b/README.md
@@ -254,6 +254,7 @@ python inference.py --benchmark --mode api
 | `HF_TOKEN` | Yes | — | HuggingFace / LLM API key (falls back to `API_KEY` if unset) |
 | `API_BASE_URL` | No | `https://router.huggingface.co/v1` | LLM API endpoint |
 | `MODEL_NAME` | No | `Qwen/Qwen2.5-72B-Instruct` | Model identifier |
+| `LOCAL_IMAGE_NAME` | No | `contract-negotiation-env` | Docker image name for `from_docker_image()` client usage |
 | `BENCHMARK` | No | `contract_negotiation` | Benchmark name in [START] log line |
 | `ENV_SERVER_URL` | No | `http://localhost:7860` | Docker server URL (for `--mode api`) |
 | `PORT` | No | `7860` | Server port |
diff --git a/inference.py b/inference.py
index a5cdaa2..8e2eedc 100644
--- a/inference.py
+++ b/inference.py
@@ -1,13 +1,19 @@
 """
-Inference Script — Contract Negotiation Environment
-=====================================================
-LLM-driven agent that analyses contract clauses, identifies legal risks,
-and proposes safer alternatives through multi-turn negotiation.
-
-MANDATORY environment variables:
-    API_BASE_URL   The API endpoint for the LLM.
-    MODEL_NAME     The model identifier to use for inference.
-    HF_TOKEN       Your Hugging Face / API key.
+Inference Script Example
+===================================
+MANDATORY
+- Before submitting, ensure the following variables are defined in your
+  environment configuration:
+    API_BASE_URL     The API endpoint for the LLM.
+    MODEL_NAME       The model identifier to use for inference.
+    HF_TOKEN         Your Hugging Face / API key.
+    LOCAL_IMAGE_NAME The name of the local Docker image to use for the
+                     environment if you are using from_docker_image() method.
+
+- Defaults are set only for API_BASE_URL and MODEL_NAME
+  (and should reflect your active inference setup):
+    API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
+    MODEL_NAME   = os.getenv("MODEL_NAME",   "Qwen/Qwen2.5-72B-Instruct")
 
 STDOUT FORMAT (strictly followed):
     [START] task=<task_id> env=<benchmark> model=<model_name>
@@ -54,6 +60,7 @@ def load_dotenv(*_a: Any, **_kw: Any) -> None:  # type: ignore[misc]
 HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
 BENCHMARK = os.getenv("BENCHMARK", "contract_negotiation")
 ENV_SERVER_URL = os.getenv("ENV_SERVER_URL", "http://localhost:7860")
+LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME", "contract-negotiation-env")
 MAX_STEPS = 10
 SUCCESS_SCORE_THRESHOLD = 0.5
 HISTORY_WINDOW = 8                      # How many recent history entries to show the LLM
@@ -518,20 +525,25 @@ def __enter__(self):
     def __exit__(self, *exc: Any) -> None:
         self.close()
 
-    def reset(self):
-        resp = self._session.post(f"{self.base_url}/reset", timeout=self._timeout)
+    def reset(self, task_id: Optional[str] = None):
+        body: dict[str, Any] = {}
+        if task_id is not None:
+            body["task_id"] = task_id
+        resp = self._session.post(
+            f"{self.base_url}/reset", json=body or None, timeout=self._timeout,
+        )
         resp.raise_for_status()
         data = resp.json()
         obs = data["observation"]
         # Map to a NegotiationTask if possible (for _choose() to use)
-        task_id = None
+        resolved_task_id = None
         try:
             state = self._session.get(f"{self.base_url}/state", timeout=self._timeout).json()
-            task_id = state.get("task_id")
+            resolved_task_id = state.get("task_id")
         except Exception:
             pass
-        if task_id:
-            self.current_task = next((t for t in TASKS if t.id == task_id), None)
+        if resolved_task_id:
+            self.current_task = next((t for t in TASKS if t.id == resolved_task_id), None)
         if self.current_task is None:
             self.current_task = TASKS[self._task_idx % len(TASKS)]
             self._task_idx += 1
@@ -568,16 +580,12 @@ def run_episode(env, task_id: Optional[str] = None) -> tuple[float, str]:
 
     Args:
         env: Environment instance (ContractEnv or _HTTPEnvClient).
-        task_id: If given, reset to this specific task (local mode only).
+        task_id: If given, reset to this specific task.
 
     Returns (mean_episode_score, task_id).
     """
     if task_id is not None:
-        try:
-            obs_obj = env.reset(task_id=task_id)
-        except TypeError:
-            # env.reset() doesn't accept task_id (e.g., _HTTPEnvClient)
-            obs_obj = env.reset()
+        obs_obj = env.reset(task_id=task_id)
     else:
         obs_obj = env.reset()
     task = env.current_task
@@ -690,6 +698,15 @@ def main() -> None:
         env = ContractEnv()
         print("[CONFIG] mode=local", flush=True)
 
+    try:
+        _run_episodes(env, args)
+    finally:
+        if hasattr(env, "close"):
+            env.close()
+
+
+def _run_episodes(env, args) -> None:
+    """Execute episode loop, retries, and print summary."""
     episodes_to_run = len(TASKS) if args.benchmark else args.episodes
 
     total_score = 0.0