diff --git a/proxy.py b/proxy.py index e9f096c..dd73fac 100644 --- a/proxy.py +++ b/proxy.py @@ -213,12 +213,26 @@ def _extract_call_objects(obj): def parse_tool_calls(content, tools=None): """ Parse blocks from the model's text. + Also handles bare JSON tool calls without tags (fallback). Returns (text_without_tags, list_of_openai_tool_call_dicts). """ pattern = re.compile(r"\s*(.*?)\s*", re.DOTALL) matches = pattern.findall(content) + # Fallback: detect bare JSON like {"name": "...", "arguments": {...}} + # when the model forgets to wrap in tags + bare_match = None + if not matches: + bare_pattern = re.compile( + r'(\{[^{}]*"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:\s*\{.*?\}[^{}]*\})', + re.DOTALL, + ) + bare_matches = bare_pattern.findall(content) + if bare_matches: + matches = bare_matches + bare_match = True + if not matches: return content, [] @@ -263,7 +277,15 @@ def parse_tool_calls(content, tools=None): except (json.JSONDecodeError, KeyError, AttributeError): continue - text = pattern.sub("", content).strip() + if bare_match: + # Remove the matched bare JSON blobs from text + bare_pattern = re.compile( + r'(\{[^{}]*"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:\s*\{.*?\}[^{}]*\})', + re.DOTALL, + ) + text = bare_pattern.sub("", content).strip() + else: + text = pattern.sub("", content).strip() return text, tool_calls @@ -394,7 +416,7 @@ def do_POST(self): for t in openai_req.get("tools", []) if t.get("function", {}).get("name", "").lower() not in FILTERED_TOOLS ] - tool_choice = openai_req.get("tool_choice", "auto") + tool_choice = openai_req.get("tool_choice", "required") last_content = extract_text_content( messages[-1].get("content", "") if messages else "" @@ -468,27 +490,51 @@ def do_POST(self): else: chat_messages.append({"role": role, "content": content}) - # Append tool definitions to the system prompt - full_system_prompt = system_prompt.strip() - if tools: - full_system_prompt += format_tools_for_prompt(tools, tool_choice) + # ----- Build final system prompt (tools first, then base) ----- + # chatjimmy silently returns empty responses above ~8000 chars. + # Strategy: tools are ALWAYS included intact (they must not be truncated), + # and the base system prompt is trimmed to fit within the budget. + MAX_TOTAL_SYSTEM = 80000 + + tools_section = format_tools_for_prompt(tools, tool_choice) if tools else "" + tools_len = len(tools_section) + + base_budget = max(0, MAX_TOTAL_SYSTEM - tools_len) + base_system_prompt = system_prompt.strip() - # ChatJimmy returns empty responses when system prompt exceeds ~30K chars - MAX_SYSTEM_PROMPT = 28000 - if len(full_system_prompt) > MAX_SYSTEM_PROMPT: + if len(base_system_prompt) > base_budget: logfile( - f"WARNING: system prompt is {len(full_system_prompt)} chars, truncating to {MAX_SYSTEM_PROMPT}" + f"WARNING: base system prompt truncated from {len(base_system_prompt)} to {base_budget} chars " + f"(tools use {tools_len} chars)" ) - full_system_prompt = full_system_prompt[:MAX_SYSTEM_PROMPT] + base_system_prompt = base_system_prompt[:base_budget] + + # Tools go LAST so they are never cut off by truncation + full_system_prompt = base_system_prompt + tools_section + + log(f"system_prompt={len(full_system_prompt)} chars (base={len(base_system_prompt)}, tools={tools_len})") + + # Clean messages: drop empty content, keep all valid roles + clean_messages = [] + for m in chat_messages: + msg_content = m.get("content", "") + if not msg_content or not str(msg_content).strip(): + continue + clean_messages.append({ + "role": m.get("role", "user"), + "content": str(msg_content), + }) + + if not clean_messages: + clean_messages = [{"role": "user", "content": "Hello"}] jimmy_payload = { - "messages": chat_messages, + "messages": clean_messages, "chatOptions": { - "selectedModel": MODELS.get(model, model), + "selectedModel": MODELS.get(model, "llama3.1-8B"), "systemPrompt": full_system_prompt, "topK": 8, }, - "attachment": None, } # File: translated payload @@ -526,6 +572,12 @@ def do_POST(self): logfile("--- RAW UPSTREAM RESPONSE ---") logfile(raw_response) + # Warn on empty or suspiciously short responses + if not raw_response.strip(): + log(f"WARNING: upstream returned empty response (system_prompt={len(full_system_prompt)} chars, tools={len(tools)})") + elif len(raw_response.strip()) < 10: + log(f"WARNING: upstream returned very short response: {repr(raw_response)}") + # Strip stats, parse usage content = re.sub( r"<\|stats\|>.*?<\|/stats\|>", "", raw_response, flags=re.DOTALL @@ -726,4 +778,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file