From c0f538dd3d5882dbe78b3fe9f341b2c57cdf8d95 Mon Sep 17 00:00:00 2001
From: BIN LI <1457956056@qq.com>
Date: Sun, 19 Apr 2026 16:47:03 +0800
Subject: [PATCH] solve some unstable problems

---
 agent_loop.py                     |   5 +
 assets/tools_schema.json          |   8 +-
 ga.py                             |  49 +++++++--
 llmcore.py                        | 169 ++++++++++++++++++++++++------
 tests/test_minimax.py             |  61 +++++++++--
 tests/test_minimax_integration.py |  25 +++--
 tests/test_tool_constraints.py    |  93 ++++++++++++++++
 7 files changed, 351 insertions(+), 59 deletions(-)
 create mode 100644 tests/test_tool_constraints.py

diff --git a/agent_loop.py b/agent_loop.py
index 6a77a0fd..e31cca3c 100644
--- a/agent_loop.py
+++ b/agent_loop.py
@@ -16,6 +16,11 @@ def tool_before_callback(self, tool_name, args, response): pass
     def tool_after_callback(self, tool_name, args, response, ret): pass
     def turn_end_callback(self, response, tool_calls, tool_results, turn, next_prompt, exit_reason): return next_prompt
     def dispatch(self, tool_name, args, response, index=0):
+        # Some Anthropic-compatible relays/models may emit an internal "thinking"
+        # pseudo-tool call. Treat it as a no-op instead of derailing the turn.
+        if tool_name == 'thinking':
+            yield "[Info] 忽略兼容层返回的伪工具调用: thinking\n"
+            return StepOutcome(None, next_prompt="已忽略无效工具 thinking，请继续按真实工具列表调用。", should_exit=False)
         method_name = f"do_{tool_name}"
         if hasattr(self, method_name):
             args['_index'] = index
diff --git a/assets/tools_schema.json b/assets/tools_schema.json
index ec870c25..30a2de60 100644
--- a/assets/tools_schema.json
+++ b/assets/tools_schema.json
@@ -1,12 +1,12 @@
 [
   {"type": "function", "function": {
     "name": "code_run",
-    "description": "Code executor. Prefer python. Multi-call OK, use script param. Reply code block is executed if no script arg; prefer for single call to avoid escaping. No hardcoding bulk data",
+    "description": "Code executor. NEVER call with empty arguments. Provide arguments.script, or place exactly one fenced code block immediately before the tool call. Default runtime cwd is ./temp; use cwd:'../' for the repo root/current project folder. Prefer file_read for inspecting existing files. No hardcoding bulk data",
     "parameters": {"type": "object", "properties": {
-      "script": {"type": "string", "description": "[Mutually exclusive] NEVER use this param when use reply code block."},
+      "script": {"type": "string", "description": "Required unless the reply body contains exactly one fenced code block for this call."},
       "type": {"type": "string", "enum": ["python", "powershell"], "description": "Code type", "default": "python"},
       "timeout": {"type": "integer", "description": "in seconds", "default": 60},
-      "cwd": {"type": "string", "description": "Working directory, defaults to cwd"},
+      "cwd": {"type": "string", "description": "Working directory. Default is runtime scratch cwd ./temp; use ../ for the repo root/current project folder."},
       "inline_eval": {"type": "boolean", "description": "DO NOT USE except explicitly specified."}}}
   }},
   {"type": "function", "function": {
@@ -70,4 +70,4 @@
     "description": "Start distilling long-term memory. Call when discovering info worth remembering (env facts/user prefs/lessons learned). Skip if memory already updated or in autonomous flow. Must call when a task that took 15+ turns is completed",
     "parameters": {"type": "object", "properties": {}}}
   }
-]
\ No newline at end of file
+]
diff --git a/ga.py b/ga.py
index 1ad9836e..db531809 100644
--- a/ga.py
+++ b/ga.py
@@ -270,17 +270,48 @@ def _get_abs_path(self, path):
         if not path: return ""
         return os.path.abspath(os.path.join(self.cwd, path))   
 
-    def _extract_code_block(self, response, code_type):
-        matches = re.findall(rf"```{code_type}\n(.*?)\n```", response.content, re.DOTALL)
-        return matches[-1].strip() if matches else None
+    def _extract_code_block(self, response, code_type=None):
+        content = getattr(response, 'content', '') or ''
+        candidates = []
+        if code_type: candidates.append(str(code_type).lower())
+        candidates.extend([t for t in ("python", "powershell", "bash") if t not in candidates])
+        alias_map = {
+            "python": ["py"],
+            "powershell": ["ps1", "pwsh"],
+            "bash": ["sh", "shell"],
+            "javascript": ["js"],
+        }
+        for candidate in candidates:
+            langs = [candidate] + alias_map.get(candidate, [])
+            for lang in langs:
+                matches = re.findall(rf"```{lang}\n(.*?)\n```", content, re.DOTALL | re.IGNORECASE)
+                if matches: return candidate, matches[-1].strip()
+        generic = re.findall(r"```\n(.*?)\n```", content, re.DOTALL)
+        if generic: return (candidates[0] if candidates else "python"), generic[-1].strip()
+        return None, None
+
+    def _code_run_retry_hint(self):
+        project_root = os.path.abspath(os.path.join(self.cwd, '..'))
+        return (
+            "[System] Invalid code_run call. Provide a non-empty arguments.script, or put exactly one fenced "
+            "code block immediately before the tool call. Never call code_run with only type/cwd/inline_eval. "
+            f"Runtime scratch cwd is {self.cwd}. Project root is {project_root}; use cwd:'../' for the current "
+            "project folder/repo root. If you only need to inspect existing files, prefer file_read."
+        )
 
     def do_code_run(self, args, response):
         '''执行代码片段，有长度限制，不允许代码中放大量数据，如有需要应当通过文件读取进行。'''
-        code_type = args.get("type", "python")
+        explicit_type = args.get("type")
+        code_type = str(explicit_type or "python").lower()
         code = args.get("code") or args.get("script")
         if not code:
-            code = self._extract_code_block(response, code_type)
-            if not code: return StepOutcome("[Error] Code missing. Use ```{code_type} block or 'script' arg.", next_prompt="\n")
+            inferred_type, inferred_code = self._extract_code_block(response, code_type if explicit_type else None)
+            code_type, code = inferred_type or code_type, inferred_code
+            if not code:
+                return StepOutcome(
+                    "[Error] code_run requires a non-empty script. Use arguments.script or exactly one fenced code block immediately before the tool call.",
+                    next_prompt=self._get_anchor_prompt(skip=args.get('_index', 0) > 0) + "\n" + self._code_run_retry_hint()
+                )
         timeout = args.get("timeout", 60)
         raw_path = os.path.join(self.cwd, args.get("cwd", './'))
         cwd = os.path.normpath(os.path.abspath(raw_path))
@@ -323,7 +354,9 @@ def do_web_scan(self, args, response):
     
     def do_web_execute_js(self, args, response):
         '''web情况下的优先使用工具，执行任何js达成对浏览器的*完全*控制。支持将结果保存到文件供后续读取分析。'''
-        script = args.get("script", "") or self._extract_code_block(response, "javascript")
+        script = args.get("script", "")
+        if not script:
+            _, script = self._extract_code_block(response, "javascript")
         if not script: return StepOutcome("[Error] Script missing. Use ```javascript block or 'script' arg.", next_prompt="\n")
         abs_path = self._get_abs_path(script.strip())
         if os.path.isfile(abs_path):
@@ -551,6 +584,8 @@ def get_global_memory():
         with open(os.path.join(script_dir, 'memory/global_mem_insight.txt'), 'r', encoding='utf-8', errors='replace') as f: insight = f.read()
         with open(os.path.join(script_dir, f'assets/insight_fixed_structure{suffix}.txt'), 'r', encoding='utf-8') as f: structure = f.read()
         prompt += f'cwd = {os.path.join(script_dir, "temp")} (./)\n'
+        prompt += f'project_root = {script_dir} (../)\n'
+        prompt += "Interpret user-facing 'current folder/current project/current repository' as project_root (../), unless the user explicitly asks for temp/scratch cwd.\n"
         prompt += f"\n[Memory] (../memory)\n"
         prompt += structure + '\n../memory/global_mem_insight.txt:\n'
         prompt += insight + "\n"
diff --git a/llmcore.py b/llmcore.py
index a8887fb4..196ace89 100644
--- a/llmcore.py
+++ b/llmcore.py
@@ -238,6 +238,47 @@ def _parse_openai_sse(resp_lines, api_mode="chat_completions"):
             blocks.append({"type": "tool_use", "id": tc["id"], "name": tc["name"], "input": inp})
         return blocks
 
+def _parse_openai_json(data, api_mode="chat_completions"):
+    """Parse non-stream OpenAI-compatible JSON into content blocks."""
+    if api_mode == "responses":
+        usage = data.get("usage", {})
+        cached = (usage.get("input_tokens_details") or {}).get("cached_tokens", 0)
+        inp = usage.get("input_tokens", 0)
+        if inp: print(f"[Cache] input={inp} cached={cached}")
+        blocks = []
+        for item in (data.get("output") or []):
+            if item.get("type") == "message":
+                text = ""
+                for part in (item.get("content") or []):
+                    if part.get("type") in ("output_text", "text") and part.get("text"):
+                        text += part["text"]
+                if text: blocks.append({"type": "text", "text": text})
+            elif item.get("type") == "function_call":
+                args = item.get("arguments", "")
+                try: inp = json.loads(args) if args else {}
+                except: inp = {"_raw": args}
+                blocks.append({"type": "tool_use", "id": item.get("call_id", item.get("id", "")), "name": item.get("name", ""), "input": inp})
+        return blocks
+    usage = data.get("usage") or {}
+    cached = (usage.get("prompt_tokens_details") or {}).get("cached_tokens", 0)
+    if usage: print(f"[Cache] input={usage.get('prompt_tokens',0)} cached={cached}")
+    msg = ((data.get("choices") or [{}])[0]).get("message") or {}
+    content = msg.get("content", "")
+    text = ""
+    if isinstance(content, str): text = content
+    elif isinstance(content, list):
+        for part in content:
+            if isinstance(part, dict) and part.get("type") in ("text", "output_text") and part.get("text"):
+                text += part["text"]
+    blocks = [{"type": "text", "text": text}] if text else []
+    for tc in (msg.get("tool_calls") or []):
+        fn = tc.get("function", {})
+        args = fn.get("arguments", "")
+        try: inp = json.loads(args) if args else {}
+        except: inp = {"_raw": args}
+        blocks.append({"type": "tool_use", "id": tc.get("id", ""), "name": fn.get("name", ""), "input": inp})
+    return blocks
+
 def _stamp_oai_cache_markers(messages, model):
     """Add cache_control to last 2 user messages for Anthropic models via OAI-compatible relay."""
     ml = model.lower()
@@ -253,20 +294,22 @@ def _stamp_oai_cache_markers(messages, model):
 
 def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completions', *,
                    temperature=0.5, max_tokens=None, tools=None, reasoning_effort=None,
-                   max_retries=0, connect_timeout=10, read_timeout=300, proxies=None):
-    """Shared OpenAI-compatible streaming request with retry. Yields text chunks, returns list[content_block]."""
+                   max_retries=0, connect_timeout=10, read_timeout=300, proxies=None, stream=True):
+    """Shared OpenAI-compatible request with retry. Yields text chunks, returns list[content_block]."""
     ml = model.lower()
     if 'kimi' in ml or 'moonshot' in ml: temperature = 1
     elif 'minimax' in ml: temperature = max(0.01, min(temperature, 1.0))  # MiniMax requires temp in (0, 1]
-    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"}
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+    if stream: headers["Accept"] = "text/event-stream"
     if api_mode == "responses":
         url = auto_make_url(api_base, "responses")
-        payload = {"model": model, "input": _to_responses_input(messages), "stream": True, "prompt_cache_key": _RESP_CACHE_KEY}
+        payload = {"model": model, "input": _to_responses_input(messages), "stream": stream, "prompt_cache_key": _RESP_CACHE_KEY}
         if reasoning_effort: payload["reasoning"] = {"effort": reasoning_effort}
     else:
         url = auto_make_url(api_base, "chat/completions")
         _stamp_oai_cache_markers(messages, model)
-        payload = {"model": model, "messages": messages, "stream": True, "stream_options": {"include_usage": True}}
+        payload = {"model": model, "messages": messages, "stream": stream}
+        if stream: payload["stream_options"] = {"include_usage": True}
         if temperature != 1: payload["temperature"] = temperature
         if max_tokens: payload["max_tokens"] = max_tokens
         if reasoning_effort: payload["reasoning_effort"] = reasoning_effort
@@ -287,11 +330,15 @@ def _delay(resp, attempt):
         try: ra = float((resp.headers or {}).get("retry-after"))
         except: ra = None
         return max(0.5, ra if ra is not None else min(30.0, 1.5 * (2 ** attempt)))
+    def _post(url, **kwargs):
+        with requests.Session() as sess:
+            sess.trust_env = False
+            return sess.post(url, proxies=proxies, **kwargs)
     for attempt in range(max_retries + 1):
         streamed = False
         try:
-            with requests.post(url, headers=headers, json=payload, stream=True,
-                               timeout=(connect_timeout, read_timeout), proxies=proxies) as r:
+            with _post(url, headers=headers, json=payload, stream=stream,
+                       timeout=(connect_timeout, read_timeout)) as r:
                 if r.status_code >= 400:
                     if r.status_code in RETRYABLE and attempt < max_retries:
                         d = _delay(r, attempt)
@@ -304,11 +351,18 @@ def _delay(resp, attempt):
                     try: r.raise_for_status()
                     except requests.HTTPError as e:
                         e._err_body = err_body; raise
-                gen = _parse_openai_sse(r.iter_lines(), api_mode)
-                try:
-                    while True: streamed = True; yield next(gen)
-                except StopIteration as e:
-                    return e.value or []
+                if stream:
+                    gen = _parse_openai_sse(r.iter_lines(), api_mode)
+                    try:
+                        while True: streamed = True; yield next(gen)
+                    except StopIteration as e:
+                        return e.value or []
+                else:
+                    blocks = _parse_openai_json(r.json(), api_mode)
+                    for b in blocks:
+                        if b.get("type") == "text" and b.get("text"):
+                            yield b["text"]
+                    return blocks
         except requests.HTTPError as e:
             resp = getattr(e, "response", None); status = getattr(resp, "status_code", None)
             if status in RETRYABLE and attempt < max_retries and not streamed:
@@ -424,7 +478,7 @@ def __init__(self, cfg):
         self.max_retries = max(0, int(cfg.get('max_retries', 1)))
         self.stream = cfg.get('stream', True)
         default_ct, default_rt = (5, 30) if self.stream else (10, 240)
-        self.connect_timeout = max(1, int(cfg.get('timeout', default_ct)))
+        self.connect_timeout = max(1, int(cfg.get('connect_timeout', cfg.get('timeout', default_ct))))
         self.read_timeout = max(5, int(cfg.get('read_timeout', default_rt)))
         def _enum(key, valid):
             v = cfg.get(key); v = None if v is None else str(v).strip().lower()
@@ -475,9 +529,12 @@ def raw_ask(self, messages):
         self._apply_claude_thinking(payload)
         if self.system: payload["system"] = [{"type": "text", "text": self.system, "cache_control": {"type": "persistent"}}]
         try:
-            with requests.post(auto_make_url(self.api_base, "messages"), headers=headers, json=payload, stream=True, timeout=(self.connect_timeout, self.read_timeout)) as r:
-                if r.status_code != 200: raise Exception(f"HTTP {r.status_code} {r.content.decode('utf-8', errors='replace')[:500]}")
-                return (yield from _parse_claude_sse(r.iter_lines())) or []
+            with requests.Session() as sess:
+                sess.trust_env = False
+                with sess.post(auto_make_url(self.api_base, "messages"), headers=headers, json=payload, stream=True,
+                               timeout=(self.connect_timeout, self.read_timeout), proxies=self.proxies) as r:
+                    if r.status_code != 200: raise Exception(f"HTTP {r.status_code} {r.content.decode('utf-8', errors='replace')[:500]}")
+                    return (yield from _parse_claude_sse(r.iter_lines())) or []
         except Exception as e:
             yield (err := f"Error: {e}")
             return [{"type": "text", "text": err}]
@@ -493,7 +550,8 @@ def raw_ask(self, messages):
         return (yield from _openai_stream(self.api_base, self.api_key, messages, self.model, self.api_mode,
                                   temperature=self.temperature, reasoning_effort=self.reasoning_effort,
                                   max_tokens=self.max_tokens, max_retries=self.max_retries, 
-                                  connect_timeout=self.connect_timeout, read_timeout=self.read_timeout, proxies=self.proxies))
+                                  connect_timeout=self.connect_timeout, read_timeout=self.read_timeout,
+                                  proxies=self.proxies, stream=self.stream))
     def make_messages(self, raw_list): return _msgs_claude2oai(raw_list)
 
 def _fix_messages(messages):
@@ -551,17 +609,20 @@ def raw_ask(self, messages):
             messages[idx] = {**messages[idx], "content": list(messages[idx]["content"])}
             messages[idx]["content"][-1] = dict(messages[idx]["content"][-1], cache_control={"type": "ephemeral"})
         try:
-            with requests.post(auto_make_url(self.api_base, "messages")+'?beta=true', headers=headers, json=payload, stream=self.stream, timeout=(self.connect_timeout, self.read_timeout)) as resp:
-                if resp.status_code != 200: raise Exception(f"HTTP {resp.status_code} {resp.content.decode('utf-8', errors='replace')[:500]}")
-                if self.stream: return (yield from _parse_claude_sse(resp.iter_lines())) or []
-                else:
-                    data = resp.json(); content_blocks = data.get("content", [])
-                    usage = data.get("usage", {})
-                    print(f"[Cache] input={usage.get('input_tokens',0)} creation={usage.get('cache_creation_input_tokens',0)} read={usage.get('cache_read_input_tokens',0)}")
-                    for b in content_blocks:
-                        if b.get("type") == "text": yield b.get("text", "")
-                        elif b.get("type") == "thinking": yield ""
-                    return content_blocks
+            with requests.Session() as sess:
+                sess.trust_env = False
+                with sess.post(auto_make_url(self.api_base, "messages")+'?beta=true', headers=headers, json=payload,
+                               stream=self.stream, timeout=(self.connect_timeout, self.read_timeout), proxies=self.proxies) as resp:
+                    if resp.status_code != 200: raise Exception(f"HTTP {resp.status_code} {resp.content.decode('utf-8', errors='replace')[:500]}")
+                    if self.stream: return (yield from _parse_claude_sse(resp.iter_lines())) or []
+                    else:
+                        data = resp.json(); content_blocks = data.get("content", [])
+                        usage = data.get("usage", {})
+                        print(f"[Cache] input={usage.get('input_tokens',0)} creation={usage.get('cache_creation_input_tokens',0)} read={usage.get('cache_read_input_tokens',0)}")
+                        for b in content_blocks:
+                            if b.get("type") == "text": yield b.get("text", "")
+                            elif b.get("type") == "thinking": yield ""
+                        return content_blocks
         except Exception as e:
             yield (err := f"Error: {e}")
             return [{"type": "text", "text": err}]
@@ -603,7 +664,7 @@ def raw_ask(self, messages):
                                           temperature=self.temperature, max_tokens=self.max_tokens, 
                                           tools=self.tools, reasoning_effort=self.reasoning_effort,
                                           max_retries=self.max_retries, connect_timeout=self.connect_timeout,
-                                          read_timeout=self.read_timeout, proxies=self.proxies))
+                                          read_timeout=self.read_timeout, proxies=self.proxies, stream=self.stream))
 
 def openai_tools_to_claude(tools):
     """[{type:'function', function:{name,description,parameters}}] → [{name,description,input_schema}]."""
@@ -673,6 +734,13 @@ def _prepare_tool_instruction(self, tools):
         if not tools: return tool_instruction
         tools_json = json.dumps(tools, ensure_ascii=False, separators=(',', ':'))
         _en = os.environ.get('GA_LANG') == 'en'
+        critical_rules = """
+Critical tool rules:
+- code_run: NEVER call with empty arguments. Provide arguments.script, or put exactly one fenced code block immediately before the tool call.
+- code_run defaults to runtime scratch cwd ./temp. For the repo root/current project folder, use cwd:'../'.
+- If you only need to inspect existing file contents, prefer file_read over code_run.
+"""
+        format_instruction = '\nFormat: ```<tool_use>{{"name": "tool_name", "arguments": {{...}}}}</tool_use>```\n'
         if _en:
             tool_instruction = f"""
 ### Interaction Protocol (must follow strictly, always in effect)
@@ -696,10 +764,49 @@ def _prepare_tool_instruction(self, tools):
         self.last_tools = tools_json
         return tool_instruction
 
+    def _prepare_tool_instruction_v2(self, tools):
+        tool_instruction = ""
+        if not tools:
+            return tool_instruction
+        tools_json = json.dumps(tools, ensure_ascii=False, separators=(',', ':'))
+        _en = os.environ.get('GA_LANG') == 'en'
+        critical_rules = (
+            "\nCritical tool rules:\n"
+            "- code_run: NEVER call with empty arguments. Provide arguments.script, or put exactly one fenced code block immediately before the tool call.\n"
+            "- code_run defaults to runtime scratch cwd ./temp. For the repo root/current project folder, use cwd:'../'.\n"
+            "- If you only need to inspect existing file contents, prefer file_read over code_run.\n"
+        )
+        format_instruction = '\nFormat: ```<tool_use>{{"name": "tool_name", "arguments": {{...}}}}</tool_use>```\n'
+        if _en:
+            tool_instruction = (
+                "\n### Interaction Protocol (must follow strictly, always in effect)\n"
+                "Follow these steps to think and act:\n"
+                "1. **Think**: Analyze the current situation and strategy inside `<thinking>` tags.\n"
+                "2. **Summarize**: Output a minimal one-line (<30 words) physical snapshot in `<summary>`: new info from last tool result + current tool call intent. This goes into long-term working memory. Must contain real information, no filler.\n"
+                "3. **Act**: If you need to call tools, output one or more **<tool_use> blocks** after your reply, then stop.\n"
+            )
+            cached_prefix = "\n### Tools: still active, **ready to call**. Protocol unchanged.\n"
+        else:
+            tool_instruction = (
+                "\n### Interaction Protocol\n"
+                "1. Think inside <thinking>.\n"
+                "2. Write a short factual <summary>.\n"
+                "3. If tools are needed, output <tool_use> blocks and stop.\n"
+            )
+            cached_prefix = "\n### Tools: still active and ready to call.\n"
+        if self.auto_save_tokens and self.last_tools == tools_json:
+            tool_instruction = cached_prefix + critical_rules + format_instruction
+        else:
+            self.total_cd_tokens = 0
+            tool_instruction += critical_rules
+            tool_instruction += f'{format_instruction}\n### Tools (mounted, always in effect):\n{tools_json}\n'
+        self.last_tools = tools_json
+        return tool_instruction
+
     def _build_protocol_prompt(self, messages, tools):
         system_content = next((m['content'] for m in messages if m['role'].lower() == 'system'), "")
         history_msgs = [m for m in messages if m['role'].lower() != 'system']
-        tool_instruction = self._prepare_tool_instruction(tools)
+        tool_instruction = self._prepare_tool_instruction_v2(tools)
         system = ""; user = ""
         if system_content: system += f"{system_content}\n"
         system += f"{tool_instruction}"
@@ -917,4 +1024,4 @@ def chat(self, messages, tools=None):
         except StopIteration as e: resp = e.value
         if resp: _write_llm_log('Response', resp.raw)
         if resp and hasattr(resp, 'tool_calls') and resp.tool_calls: self._pending_tool_ids = [tc.id for tc in resp.tool_calls]
-        return resp
\ No newline at end of file
+        return resp
diff --git a/tests/test_minimax.py b/tests/test_minimax.py
index 19de58c0..ba665216 100644
--- a/tests/test_minimax.py
+++ b/tests/test_minimax.py
@@ -28,7 +28,11 @@ def fake_post(url, headers=None, json=None, stream=None, timeout=None, proxies=N
             resp.__exit__ = MagicMock(return_value=False)
             return resp
 
-        with patch('llmcore.requests.post', side_effect=fake_post):
+        fake_session = MagicMock()
+        fake_session.__enter__.return_value = fake_session
+        fake_session.__exit__.return_value = False
+        fake_session.post.side_effect = fake_post
+        with patch('llmcore.requests.Session', return_value=fake_session):
             gen = _openai_stream(
                 'https://api.minimax.io/v1', 'test-key', [{"role": "user", "content": "hi"}],
                 model, temperature=temperature
@@ -39,6 +43,45 @@ def fake_post(url, headers=None, json=None, stream=None, timeout=None, proxies=N
 
         return captured.get('payload', {})
 
+    def test_non_stream_response_parsed(self):
+        """Non-stream OpenAI-compatible responses should be parsed into text blocks."""
+        from llmcore import _openai_stream
+
+        def fake_post(url, headers=None, json=None, stream=None, timeout=None, proxies=None):
+            resp = MagicMock()
+            resp.status_code = 200
+            resp.json.return_value = {
+                "choices": [{
+                    "message": {
+                        "content": "Here is the answer.",
+                        "tool_calls": []
+                    }
+                }],
+                "usage": {"prompt_tokens": 12}
+            }
+            resp.__enter__ = lambda s: s
+            resp.__exit__ = MagicMock(return_value=False)
+            return resp
+
+        fake_session = MagicMock()
+        fake_session.__enter__.return_value = fake_session
+        fake_session.__exit__.return_value = False
+        fake_session.post.side_effect = fake_post
+        with patch('llmcore.requests.Session', return_value=fake_session):
+            gen = _openai_stream(
+                'https://api.minimax.io/v1', 'test-key', [{"role": "user", "content": "hi"}],
+                'MiniMax-M2.7', stream=False
+            )
+            chunks = []
+            try:
+                while True:
+                    chunks.append(next(gen))
+            except StopIteration as e:
+                blocks = e.value
+
+        self.assertEqual(chunks, ["Here is the answer."])
+        self.assertEqual(blocks, [{"type": "text", "text": "Here is the answer."}])
+
     def test_minimax_temp_zero_clamped(self):
         """MiniMax rejects temperature=0, should be clamped to 0.01."""
         payload = self._make_stream_call('MiniMax-M2.7', 0.0)
@@ -57,12 +100,12 @@ def test_minimax_temp_normal_preserved(self):
     def test_minimax_temp_one_preserved(self):
         """Temperature=1.0 should be preserved."""
         payload = self._make_stream_call('MiniMax-M2.7-highspeed', 1.0)
-        self.assertAlmostEqual(payload['temperature'], 1.0)
+        self.assertNotIn('temperature', payload)
 
     def test_minimax_temp_above_one_clamped(self):
         """Temperature > 1.0 should be clamped to 1.0."""
         payload = self._make_stream_call('MiniMax-M2.7', 1.5)
-        self.assertAlmostEqual(payload['temperature'], 1.0)
+        self.assertNotIn('temperature', payload)
 
     def test_minimax_case_insensitive(self):
         """Model name matching should be case-insensitive."""
@@ -77,7 +120,7 @@ def test_non_minimax_temp_zero_unchanged(self):
     def test_kimi_temp_still_forced(self):
         """Kimi/Moonshot temp override should still work."""
         payload = self._make_stream_call('kimi-2.0', 0.5)
-        self.assertAlmostEqual(payload['temperature'], 1.0)
+        self.assertNotIn('temperature', payload)
 
 
 class TestMiniMaxThinkTagHandling(unittest.TestCase):
@@ -145,15 +188,15 @@ def test_think_tag_compressed_in_old_messages(self):
 
         long_think = "A" * 2000
         messages = [
-            {"role": "assistant", "prompt": f"<think>{long_think}</think>\nShort answer."},
-            {"role": "user", "prompt": "Follow up"},
-        ] + [{"role": "user", "prompt": f"msg{i}"} for i in range(12)]
+            {"role": "assistant", "content": f"<think>{long_think}</think>\nShort answer."},
+            {"role": "user", "content": "Follow up"},
+        ] + [{"role": "user", "content": f"msg{i}"} for i in range(12)]
 
         # Force compression (counter divisible by 5)
         compress_history_tags._cd = 4
         result = compress_history_tags(messages, keep_recent=10, max_len=800)
         # The first message's <think> content should be truncated
-        first_content = result[0]["prompt"]
+        first_content = result[0]["content"]
         self.assertIn("<think>", first_content)
         self.assertIn("...", first_content)
         self.assertLess(len(first_content), len(f"<think>{long_think}</think>\nShort answer."))
@@ -268,7 +311,7 @@ def test_native_tool_client_think_tag(self):
         def mock_ask(msg, tools=None, model=None):
             text = "<think>Analyzing the request.</think>\n\nResult: success"
             yield text
-            return MockResponse('', text, [], text)
+            return MockResponse('Analyzing the request.', 'Result: success', [], text)
 
         session.ask = mock_ask
 
diff --git a/tests/test_minimax_integration.py b/tests/test_minimax_integration.py
index ae7d63e5..ad713510 100644
--- a/tests/test_minimax_integration.py
+++ b/tests/test_minimax_integration.py
@@ -64,7 +64,11 @@ def test_full_pipeline_with_think_tag(self):
         mock_resp.__enter__ = lambda s: s
         mock_resp.__exit__ = MagicMock(return_value=False)
 
-        with patch('llmcore.requests.post', return_value=mock_resp):
+        fake_session = MagicMock()
+        fake_session.__enter__.return_value = fake_session
+        fake_session.__exit__.return_value = False
+        fake_session.post.return_value = mock_resp
+        with patch('llmcore.requests.Session', return_value=fake_session):
             messages = [
                 {"role": "system", "content": "You are a helpful assistant."},
                 {"role": "user", "content": "Help me read a file."},
@@ -107,7 +111,11 @@ def test_full_pipeline_with_tool_call(self):
         mock_resp.__enter__ = lambda s: s
         mock_resp.__exit__ = MagicMock(return_value=False)
 
-        with patch('llmcore.requests.post', return_value=mock_resp):
+        fake_session = MagicMock()
+        fake_session.__enter__.return_value = fake_session
+        fake_session.__exit__.return_value = False
+        fake_session.post.return_value = mock_resp
+        with patch('llmcore.requests.Session', return_value=fake_session):
             messages = [{"role": "user", "content": "Read the config file."}]
             gen = client.chat(messages=messages, tools=None)
             try:
@@ -142,13 +150,14 @@ def capture_post(url, headers=None, json=None, stream=None, timeout=None, proxie
             resp.__exit__ = MagicMock(return_value=False)
             return resp
 
-        with patch('llmcore.requests.post', side_effect=capture_post):
+        fake_session = MagicMock()
+        fake_session.__enter__.return_value = fake_session
+        fake_session.__exit__.return_value = False
+        fake_session.post.side_effect = capture_post
+        with patch('llmcore.requests.Session', return_value=fake_session):
             session.raw_msgs = [{"role": "user", "prompt": "test", "image": None}]
-            gen = session.raw_ask(
-                [{"role": "user", "content": "test"}],
-                model='MiniMax-M2.7',
-                temperature=0.0,
-            )
+            session.temperature = 0.0
+            gen = session.raw_ask([{"role": "user", "content": "test"}])
             for _ in gen:
                 pass
 
diff --git a/tests/test_tool_constraints.py b/tests/test_tool_constraints.py
new file mode 100644
index 00000000..1bd32ff9
--- /dev/null
+++ b/tests/test_tool_constraints.py
@@ -0,0 +1,93 @@
+"""Regression tests for tool constraint handling."""
+import json
+import os
+import sys
+import unittest
+from types import SimpleNamespace
+from unittest.mock import patch
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from agent_loop import exhaust
+from ga import GenericAgentHandler
+from llmcore import ToolClient
+
+
+class TestToolConstraints(unittest.TestCase):
+    def setUp(self):
+        self.repo_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        self.temp_dir = os.path.join(self.repo_dir, "temp")
+        os.makedirs(self.temp_dir, exist_ok=True)
+        self.parent = SimpleNamespace(verbose=False, task_dir=self.temp_dir)
+        self.handler = GenericAgentHandler(self.parent, cwd=self.temp_dir)
+
+    def test_code_run_infers_powershell_from_fenced_block(self):
+        captured = {}
+
+        def fake_code_run(code, code_type="python", timeout=60, cwd=None, code_cwd=None, stop_signal=None):
+            captured.update({
+                "code": code,
+                "code_type": code_type,
+                "cwd": cwd,
+                "code_cwd": code_cwd,
+            })
+            if False:
+                yield None
+            return {"status": "success"}
+
+        response = SimpleNamespace(content="List files first.\n```powershell\nGet-ChildItem\n```")
+        with patch("ga.code_run", new=fake_code_run):
+            outcome = exhaust(self.handler.do_code_run({}, response))
+
+        self.assertEqual(captured["code"], "Get-ChildItem")
+        self.assertEqual(captured["code_type"], "powershell")
+        self.assertEqual(outcome.data, {"status": "success"})
+
+    def test_code_run_missing_script_returns_retry_hint(self):
+        response = SimpleNamespace(content="Need to inspect the folder.")
+        outcome = exhaust(self.handler.do_code_run({"type": "python"}, response))
+
+        self.assertIn("code_run requires a non-empty script", outcome.data)
+        self.assertIn("cwd:'../'", outcome.next_prompt)
+        self.assertIn(self.repo_dir, outcome.next_prompt)
+
+    def test_web_execute_js_extracts_js_alias_block(self):
+        captured = {}
+
+        def fake_web_execute_js(script, switch_tab_id=None, no_monitor=False):
+            captured.update({
+                "script": script,
+                "switch_tab_id": switch_tab_id,
+                "no_monitor": no_monitor,
+            })
+            return {"status": "success", "js_return": "ok"}
+
+        response = SimpleNamespace(content="```js\nconsole.log('ok')\n```")
+        with patch("ga.web_execute_js", new=fake_web_execute_js):
+            outcome = exhaust(self.handler.do_web_execute_js({}, response))
+
+        self.assertEqual(captured["script"], "console.log('ok')")
+        self.assertIn('"status": "success"', outcome.data)
+
+    def test_cached_tool_prompt_keeps_critical_rules(self):
+        client = ToolClient(SimpleNamespace(name="test-backend"))
+        tools = [{
+            "type": "function",
+            "function": {
+                "name": "code_run",
+                "description": "Code executor",
+                "parameters": {"type": "object", "properties": {}},
+            },
+        }]
+
+        first = client._prepare_tool_instruction_v2(tools)
+        second = client._prepare_tool_instruction_v2(tools)
+
+        self.assertIn("Critical tool rules", first)
+        self.assertIn("Critical tool rules", second)
+        self.assertIn("cwd:'../'", second)
+        self.assertIn("Format: ```<tool_use>", second)
+
+
+if __name__ == "__main__":
+    unittest.main()