From db37876cc76e36c0312f6136e3e92d79a3cf7746 Mon Sep 17 00:00:00 2001 From: Avinash Mallick Date: Wed, 27 Aug 2025 13:38:05 +0100 Subject: [PATCH 1/6] Reimplementation of Explainer --- .../src/core/agents/__init__.py | 6 +- .../src/core/agents/antipattern_scanner.py | 21 ++- .../src/core/agents/explainer.py | 131 ++++++++++++++++++ .../src/core/graph/create_graph.py | 9 ++ .../src/core/prompt/prompt_manager.py | 26 ++-- .../src/core/utils/__init__.py | 3 + .../src/core/utils/json_utils.py | 27 ++++ .../static/prompt/explainer.yaml | 41 ++++++ 8 files changed, 239 insertions(+), 25 deletions(-) create mode 100644 AntiPattern_Remediator/src/core/agents/explainer.py create mode 100644 AntiPattern_Remediator/src/core/utils/__init__.py create mode 100644 AntiPattern_Remediator/src/core/utils/json_utils.py create mode 100644 AntiPattern_Remediator/static/prompt/explainer.yaml diff --git a/AntiPattern_Remediator/src/core/agents/__init__.py b/AntiPattern_Remediator/src/core/agents/__init__.py index 79c1adc..e2fdd8a 100644 --- a/AntiPattern_Remediator/src/core/agents/__init__.py +++ b/AntiPattern_Remediator/src/core/agents/__init__.py @@ -7,10 +7,12 @@ from .refactor_strategist import RefactorStrategist from .code_transformer import CodeTransformer from .code_reviewer import CodeReviewerAgent +from .explainer import ExplainerAgent __all__ = [ - "AntipatternScanner", + "AntipatternScanner", "RefactorStrategist", "CodeTransformer", - "CodeReviewerAgent" + "CodeReviewerAgent", + "ExplainerAgent" ] diff --git a/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py b/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py index 6873e7f..230bb1e 100644 --- a/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py +++ b/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py @@ -15,7 +15,7 @@ class AntipatternScanner: """Antipattern scanner agent""" - + def __init__(self, tool, model, prompt_manager: PromptManager): self.prompt_manager = prompt_manager self.tool = tool @@ -28,17 +28,17 @@ def retrieve_context(self, state: AgentState): search_query = f"Java antipatterns code analysis: {state['code'][:50]}" # Use retriever_tool to get relevant context context = self.tool.invoke({"query": search_query}) - + # Get current file path from state current_file_path = state['current_file_path'] - + # Extract project key and relative file path from the current file path project_key = None relative_file_path = None - + if current_file_path: path_obj = Path(current_file_path) - + # Find the repository name (project key) by looking for 'clones' directory for i, part in enumerate(path_obj.parts): if part == 'clones' and i + 1 < len(path_obj.parts): @@ -46,7 +46,7 @@ def retrieve_context(self, state: AgentState): # Get the relative path from the repository root relative_file_path = str(Path(*path_obj.parts[i + 2:])) break - + api = SonarQubeAPI() print(Fore.CYAN + f"Using SonarQube project: {project_key}, file: {relative_file_path}" + Style.RESET_ALL) issues = api.get_issues_for_file(project_key=project_key, file_path=relative_file_path) @@ -64,7 +64,7 @@ def analyze_antipatterns(self, state: AgentState): print("Analyzing code for antipatterns...") try: prompt_template = self.prompt_manager.get_prompt(self.prompt_manager.ANTIPATTERN_SCANNER) - + # Get historical messages from state, or use empty list if none exist msgs = state.get('msgs', []) @@ -74,7 +74,7 @@ def analyze_antipatterns(self, state: AgentState): sonarqube_issues=state['context'].get('solutions', ''), msgs=msgs ) - + response = self.llm.invoke(formatted_messages) state["antipatterns_scanner_results"] = response.content if hasattr(response, 'content') else str(response) print(Fore.GREEN + "Analysis completed successfully" + Style.RESET_ALL) @@ -82,12 +82,11 @@ def analyze_antipatterns(self, state: AgentState): print(Fore.RED + f"Error during analysis: {e}" + Style.RESET_ALL) state["antipatterns_scanner_results"] = f"Error occurred during analysis: {e}" return state - - def display_antipatterns_results(self, state: AgentState): + + def display_antipatterns_results(self, state: AgentState): """Display the final analysis results""" print("\nANTIPATTERN ANALYSIS RESULTS") print("=" * 60) print(state.get("antipatterns_scanner_results", "No analysis results available.")) print("=" * 60) return state - diff --git a/AntiPattern_Remediator/src/core/agents/explainer.py b/AntiPattern_Remediator/src/core/agents/explainer.py new file mode 100644 index 0000000..4ab86a6 --- /dev/null +++ b/AntiPattern_Remediator/src/core/agents/explainer.py @@ -0,0 +1,131 @@ +""" +ExplainerAgent — minimal version +- Delegates state handling to create_graph.py +- Uses PromptManager if available; otherwise a tiny inline fallback prompt +- Always passes msgs; always returns a non-empty explanation_json +""" +from __future__ import annotations + +from typing import Dict, Any +import json + +from langchain_core.language_models import BaseLanguageModel +from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder +from ..prompt import PromptManager +from src.core.utils import extract_first_json + +PROMPT_KEY = "explainer" + + +class ExplainerAgent: + def __init__(self, llm: BaseLanguageModel, prompt_manager: PromptManager): + self.llm = llm + self.prompt_manager = prompt_manager + + def explain_antipattern(self, state: Dict[str, Any]) -> Dict[str, Any]: + """Generate explanation JSON for detected antipatterns and refactor.""" + kwargs = dict( + code=state.get("code", ""), + language=state.get("language", "Java"), + context=state.get("context", ""), + refactored_code=state.get("refactored_code", ""), + refactor_rationale=state.get("refactor_rationale", ""), + antipattern_name=state.get("antipattern_name", "Unknown antipattern"), + antipattern_description=state.get("antipattern_description", ""), + antipatterns_json=json.dumps(state.get("antipatterns_json", []), ensure_ascii=False), + msgs=state.get("msgs", []), # ensure MessagesPlaceholder is satisfied + ) + + messages = self._build_messages(**kwargs) + + try: + response = self.llm.invoke(messages) + raw = getattr(response, "content", None) or str(response) + except Exception as e: + raw = f"LLM error: {e}" + + state["explanation_response_raw"] = raw + + # Robust parse: accept dict, wrap list, or emit a minimal fallback + try: + parsed = extract_first_json(raw) + except Exception: + parsed = None + + if isinstance(parsed, dict): + state["explanation_json"] = parsed + elif isinstance(parsed, list): + state["explanation_json"] = {"items": parsed} + else: + state["explanation_json"] = self._fallback_payload(state) + + return state + + def display_explanation(self, state: Dict[str, Any]) -> Dict[str, Any]: + print("\n=== Explanation (raw) ===\n", state.get("explanation_response_raw", "N/A")) + if state.get("explanation_json"): + print("\n=== Explanation (JSON) ===\n", + json.dumps(state["explanation_json"], indent=2, ensure_ascii=False)) + return state + + def _build_messages(self, **kwargs) -> Any: + # Always ensure msgs exists + if "msgs" not in kwargs or kwargs["msgs"] is None: + kwargs = {**kwargs, "msgs": []} + + # 1) Try preloaded template from PromptManager + prompt = None + getp = getattr(self.prompt_manager, "get_prompt", None) + if callable(getp): + prompt = getp(PROMPT_KEY) + if prompt is not None: + return prompt.format_messages(**kwargs) + + # 2) Minimal inline fallback + schema = { + "items": [{ + "antipattern_name": "", + "antipattern_description": "", + "impact": "", + "why_it_is_bad": "", + "how_we_fixed_it": "", + "refactored_code": "", + "summary": "" + }], + "what_changed": [], + "why_better": [], + "principles_applied": [], + "trade_offs": [], + "closing_summary": "" + } + content = ( + "Given inputs (JSON):\n" + json.dumps({k: v for k, v in kwargs.items() if k != "msgs"}, ensure_ascii=False) + + "\nRespond with STRICT JSON using exactly this schema:\n" + + json.dumps(schema, ensure_ascii=False) + ) + fallback = ChatPromptTemplate.from_messages([ + ("system", "Return STRICT JSON only. No commentary."), + ("user", content), + MessagesPlaceholder("msgs"), + ]) + return fallback.format_messages(**kwargs) + + @staticmethod + def _fallback_payload(state: Dict[str, Any]) -> Dict[str, Any]: + """Tiny fallback so downstream never breaks if parsing fails.""" + return { + "items": [{ + "antipattern_name": state.get("antipattern_name", "Unknown antipattern"), + "antipattern_description": state.get("antipattern_description", ""), + "impact": "", + "why_it_is_bad": "", + "how_we_fixed_it": state.get("refactor_rationale", ""), + "refactored_code": state.get("refactored_code", ""), + "summary": "Auto-generated minimal explanation (parser fallback)." + }], + "what_changed": [], + "why_better": [], + "principles_applied": [], + "trade_offs": [], + "closing_summary": "" + } diff --git a/AntiPattern_Remediator/src/core/graph/create_graph.py b/AntiPattern_Remediator/src/core/graph/create_graph.py index 9c4cf93..ae52ed5 100644 --- a/AntiPattern_Remediator/src/core/graph/create_graph.py +++ b/AntiPattern_Remediator/src/core/graph/create_graph.py @@ -19,6 +19,7 @@ import os from langsmith import Client from langchain.callbacks.tracers import LangChainTracer +from operator import add from colorama import Fore, Style @@ -78,6 +79,7 @@ def __init__(self, db_manager, prompt_manager: PromptManager, retriever=None, ll "strategist": RefactorStrategist(self.llm, self.prompt_manager, retriever=self.retriever), "transformer": CodeTransformer(self.llm, self.prompt_manager), "reviewer": CodeReviewerAgent(self.llm, self.prompt_manager), + } # Build the LangGraph workflow @@ -104,6 +106,10 @@ def _build_graph(self): graph.add_node("review_code", self.agents["reviewer"].review_code) graph.add_node("display_code_review_results", self.agents["reviewer"].display_code_review_results) + # Explainer: final storytelling + graph.add_node("explain_antipattern", self.agents["explainer"].explain_antipattern) + graph.add_node("display_explanation", self.agents["explainer"].display_explanation) + # Topology graph.set_entry_point("retrieve_context") graph.add_edge("retrieve_context", "analyze_antipatterns") @@ -113,6 +119,7 @@ def _build_graph(self): graph.add_edge("display_refactoring_results", "transform_code") graph.add_edge("transform_code", "display_transformed_code") graph.add_edge("display_transformed_code", "review_code") + graph.add_edge("review_code", "explain_antipattern") graph.add_conditional_edges( "review_code", @@ -124,5 +131,7 @@ def _build_graph(self): ) graph.add_edge("display_code_review_results", END) + graph.add_edge("explain_antipattern", "display_explanation") + graph.add_edge("display_explanation", END) return graph.compile() diff --git a/AntiPattern_Remediator/src/core/prompt/prompt_manager.py b/AntiPattern_Remediator/src/core/prompt/prompt_manager.py index 0650429..4392fdd 100644 --- a/AntiPattern_Remediator/src/core/prompt/prompt_manager.py +++ b/AntiPattern_Remediator/src/core/prompt/prompt_manager.py @@ -9,16 +9,17 @@ class PromptManager: def __init__(self): # Prompt key constants, **same as YAML filenames** self.ANTIPATTERN_SCANNER = "antipattern_scanner" - self.REFACTOR_STRATEGIST = "refactor_strategist" + self.REFACTOR_STRATEGIST = "refactor_strategist" self.CODE_TRANSFORMER = "code_transformer" self.CODE_REVIEWER = "code_reviewer" + self.EXPLAINER = "explainer" self.prompt_directory = settings.PROMPT_DIR # Initialize storage for prompt templates self._prompt_cache = {} # Load prompts on initialization self._load_all_prompts() - + def _load_all_prompts(self) -> None: """Load all prompt configurations from YAML files.""" try: @@ -28,32 +29,33 @@ def _load_all_prompts(self) -> None: self.REFACTOR_STRATEGIST, self.CODE_TRANSFORMER, self.CODE_REVIEWER, + self.EXPLAINER ] - + for prompt_key in prompt_constants: filename = f"{prompt_key}.yaml" self._load_prompt_from_yaml(filename, prompt_key) - + print(f"Successfully loaded {len(self._prompt_cache)} prompts") print("=" * 60) except Exception as e: print(f"Error loading prompts: {e}") - + def _load_prompt_from_yaml(self, filename: str, prompt_key: str) -> None: """Load a prompt configuration from a YAML file.""" yaml_path = self.prompt_directory / filename - + if not yaml_path.exists(): print(f"Warning: Prompt file {yaml_path} not found") return - + try: with open(yaml_path, 'r', encoding='utf-8') as file: config = yaml.safe_load(file) if prompt_key not in config: print(f"Warning: Section '{prompt_key}' not found in {filename}") return - + prompt_config = config[prompt_key] # Create ChatPromptTemplate self._prompt_cache[prompt_key] = ChatPromptTemplate([ @@ -62,13 +64,13 @@ def _load_prompt_from_yaml(self, filename: str, prompt_key: str) -> None: MessagesPlaceholder("msgs") ]) print(f"Loaded prompt '{prompt_key}' from {filename}") - + except Exception as e: print(f"Error loading prompt from {filename}: {e}") - + def get_prompt(self, prompt_key: str) -> Optional[ChatPromptTemplate]: if prompt_key not in self._prompt_cache: print(f"Warning: Prompt '{prompt_key}' not found in cache") return None - - return self._prompt_cache[prompt_key] \ No newline at end of file + + return self._prompt_cache[prompt_key] diff --git a/AntiPattern_Remediator/src/core/utils/__init__.py b/AntiPattern_Remediator/src/core/utils/__init__.py new file mode 100644 index 0000000..a6371ea --- /dev/null +++ b/AntiPattern_Remediator/src/core/utils/__init__.py @@ -0,0 +1,3 @@ +from .json_utils import extract_first_json + +__all__ = ["extract_first_json"] diff --git a/AntiPattern_Remediator/src/core/utils/json_utils.py b/AntiPattern_Remediator/src/core/utils/json_utils.py new file mode 100644 index 0000000..74ec803 --- /dev/null +++ b/AntiPattern_Remediator/src/core/utils/json_utils.py @@ -0,0 +1,27 @@ +import json + +def extract_first_json(text): + """ + Try to extract the first JSON object from a string. + Works if JSON is inside ```json ... ``` fences or just plain text. + """ + if not isinstance(text, str): + return None + + # 1. If the text has fenced JSON like ```json ... ``` + if "```" in text: + parts = text.split("```") + for part in parts: + # Look for JSON-specific fences + if part.strip().lower().startswith("json"): + json_part = part[len("json"):].strip() + try: + return json.loads(json_part) + except Exception: + pass # Try next part + + # 2. If no fenced JSON worked, try to parse the whole text + try: + return json.loads(text.strip()) + except Exception: + return None diff --git a/AntiPattern_Remediator/static/prompt/explainer.yaml b/AntiPattern_Remediator/static/prompt/explainer.yaml new file mode 100644 index 0000000..be29ab7 --- /dev/null +++ b/AntiPattern_Remediator/static/prompt/explainer.yaml @@ -0,0 +1,41 @@ +explainer: + system: | + You are a senior software reviewer. Output STRICT JSON only — no commentary outside the JSON object. + + user: | + === Inputs === + Language: {language} + Context: {context} + Detected Anti-patterns (JSON): {antipatterns_json} + Code: + ```{code}``` + Refactored Code: + ```{refactored_code}``` + Refactor Rationale: + {refactor_rationale} + + === Return EXACTLY this JSON structure === + {{ + "items": [ + {{ + "antipattern_name": "", + "antipattern_description": "", + "impact": "", + "why_it_is_bad": "", + "how_we_fixed_it": "", + "refactored_code": "", + "summary": "" + }} + ], + "what_changed": [], + "why_better": [], + "principles_applied": [], + "trade_offs": [], + "closing_summary": "" + }} + + Rules: + - Return valid JSON only (no code fences, no prose). + - If multiple antipatterns apply, include multiple entries in "items". + - Keep "refactored_code" concise; truncate if needed but keep it valid. + - Populate all fields; use brief placeholders if unsure. From 28ef50331a09bcddc98f288d2a79361db39dee6b Mon Sep 17 00:00:00 2001 From: Vamsi Date: Wed, 27 Aug 2025 15:40:02 +0100 Subject: [PATCH 2/6] add: Explainer to main workflow, fix: some minor changes to explainer agent --- AntiPattern_Remediator/full_repo_workflow.py | 4 +++- AntiPattern_Remediator/main.py | 14 +++++++++++++- .../src/core/agents/antipattern_scanner.py | 18 +++++++++++------- .../src/core/graph/create_graph.py | 6 ++++-- AntiPattern_Remediator/src/core/state.py | 2 ++ .../workflow/results_manager.py | 9 ++++++++- 6 files changed, 41 insertions(+), 12 deletions(-) diff --git a/AntiPattern_Remediator/full_repo_workflow.py b/AntiPattern_Remediator/full_repo_workflow.py index 69ddb5a..18c480f 100644 --- a/AntiPattern_Remediator/full_repo_workflow.py +++ b/AntiPattern_Remediator/full_repo_workflow.py @@ -108,7 +108,9 @@ def process_java_files_with_workflow(file_paths: list, settings, db_manager, pro "code_review_times": 0, "msgs": [], "answer": None, - "current_file_path": file_path # Track current file being processed + "current_file_path": file_path, # Track current file being processed + "explanation_response_raw": None, + "explanation_json": None } try: diff --git a/AntiPattern_Remediator/main.py b/AntiPattern_Remediator/main.py index a659983..344350d 100644 --- a/AntiPattern_Remediator/main.py +++ b/AntiPattern_Remediator/main.py @@ -9,6 +9,8 @@ from colorama import Fore, Style import os from pathlib import Path +import json + from full_repo_workflow import run_full_repo_workflow @@ -66,7 +68,10 @@ def run_code_snippet_workflow(settings, db_manager, prompt_manager, langgraph): "code_review_results": None, "code_review_times": 0, "msgs": [], - "answer": None + "answer": None, + "current_file_path": None, # Track current file being processed + "explanation_response_raw": None, + "explanation_json": None, } final_state = langgraph.invoke(initial_state) @@ -77,6 +82,13 @@ def run_code_snippet_workflow(settings, db_manager, prompt_manager, langgraph): print(f"Analysis completed: {'Yes' if final_state.get('antipatterns_scanner_results') else 'No'}") print(f"Refactored code: {'Yes' if final_state.get('refactored_code') else 'No'}") print(f"Code review results: {final_state.get('code_review_times')}") + + # Show explanation from ExplainerAgent + if final_state.get("explanation_json"): + print(Fore.CYAN + "\n=== Explanation (JSON) ===" + Style.RESET_ALL) + print(json.dumps(final_state["explanation_json"], indent=2, ensure_ascii=False)) + else: + print(Fore.RED + "\nNo explanation was generated." + Style.RESET_ALL) def main(): diff --git a/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py b/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py index 230bb1e..01b2147 100644 --- a/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py +++ b/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py @@ -47,14 +47,18 @@ def retrieve_context(self, state: AgentState): relative_file_path = str(Path(*path_obj.parts[i + 2:])) break - api = SonarQubeAPI() - print(Fore.CYAN + f"Using SonarQube project: {project_key}, file: {relative_file_path}" + Style.RESET_ALL) - issues = api.get_issues_for_file(project_key=project_key, file_path=relative_file_path) - solutions = [] - for issue in issues["issues"]: - solutions.append(api.get_rules_and_fix_method(rule_key=issue['rule'])) - state["context"] = {"sonarqube_issues": issues, "search_context": context, "solutions": solutions} + api = SonarQubeAPI() + print(Fore.CYAN + f"Using SonarQube project: {project_key}, file: {relative_file_path}" + Style.RESET_ALL) + issues = api.get_issues_for_file(project_key=project_key, file_path=relative_file_path) + solutions = [] + for issue in issues["issues"]: + solutions.append(api.get_rules_and_fix_method(rule_key=issue['rule'])) + state["context"] = {"sonarqube_issues": issues, "search_context": context, "solutions": solutions} + else: + state["context"] = {"sonarqube_issues": None, "search_context": context, "solutions": []} + print(Fore.GREEN + f"Successfully retrieved relevant context" + Style.RESET_ALL) + except Exception as e: print(Fore.RED + f"Error retrieving context: {e}" + Style.RESET_ALL) state["context"] = "No additional context available due to retrieval error." diff --git a/AntiPattern_Remediator/src/core/graph/create_graph.py b/AntiPattern_Remediator/src/core/graph/create_graph.py index ae52ed5..0100405 100644 --- a/AntiPattern_Remediator/src/core/graph/create_graph.py +++ b/AntiPattern_Remediator/src/core/graph/create_graph.py @@ -14,6 +14,7 @@ from ..agents import RefactorStrategist from ..agents import CodeTransformer from ..agents import CodeReviewerAgent +from ..agents import ExplainerAgent # Imports for LangSmith tracing import os @@ -63,7 +64,7 @@ def __init__(self, db_manager, prompt_manager: PromptManager, retriever=None, ll self.prompt_manager = prompt_manager self.conditional_edges = ConditionalEdges() - # ✅ assign the instance attribute before use + # assign the instance attribute before use self.retriever = retriever or self.db_manager.as_retriever() retriever_tool = create_retriever_tool( @@ -79,6 +80,7 @@ def __init__(self, db_manager, prompt_manager: PromptManager, retriever=None, ll "strategist": RefactorStrategist(self.llm, self.prompt_manager, retriever=self.retriever), "transformer": CodeTransformer(self.llm, self.prompt_manager), "reviewer": CodeReviewerAgent(self.llm, self.prompt_manager), + "explainer": ExplainerAgent(self.llm, self.prompt_manager) } @@ -130,7 +132,7 @@ def _build_graph(self): }, ) - graph.add_edge("display_code_review_results", END) + graph.add_edge("display_code_review_results", "explain_antipattern") graph.add_edge("explain_antipattern", "display_explanation") graph.add_edge("display_explanation", END) diff --git a/AntiPattern_Remediator/src/core/state.py b/AntiPattern_Remediator/src/core/state.py index fb42b13..1ac64d9 100644 --- a/AntiPattern_Remediator/src/core/state.py +++ b/AntiPattern_Remediator/src/core/state.py @@ -18,3 +18,5 @@ class AgentState(TypedDict): msgs: List[Dict[str, Any]] # Message history for conversation context answer: Optional[str] # Analysis result current_file_path: Optional[str] # Path to the current file being processed + explanation_response_raw: Optional[str] # Raw LLM output from explainer + explanation_json: Optional[Dict[str, Any]] # Parsed JSON explanation \ No newline at end of file diff --git a/AntiPattern_Remediator/workflow/results_manager.py b/AntiPattern_Remediator/workflow/results_manager.py index 3dbe79f..187cc25 100644 --- a/AntiPattern_Remediator/workflow/results_manager.py +++ b/AntiPattern_Remediator/workflow/results_manager.py @@ -91,8 +91,15 @@ def save_intermediate_results(file_path: str, final_state: dict, settings, resul else: markdown_content += "No refactoring strategy generated.\n\n" - markdown_content += f"---\n\n*Generated by AntiPattern Remediator Tool using {settings.LLM_MODEL}*\n" + if final_state.get("explanation_json"): + explanation_results = final_state.get('explanation_json') + markdown_content += "---\n\n## Explanation Results\n\n" + + markdown_content += f"```json\n{json.dumps(explanation_results, indent=2, default=str)}\n```\n\n" + else: + markdown_content += "No explanation generated.\n\n" + markdown_content += f"---\n\n*Generated by AntiPattern Remediator Tool using {settings.LLM_MODEL}*\n" # Save to markdown file with open(results_file_path, 'w', encoding='utf-8') as f: f.write(markdown_content) From 7370085e25f318c716eca1af84580efc9067c5ec Mon Sep 17 00:00:00 2001 From: Avinash Mallick Date: Wed, 27 Aug 2025 17:51:16 +0100 Subject: [PATCH 3/6] Explainer in work --- .../src/core/agents/explainer.py | 68 ++++++++++++------- .../src/core/graph/create_graph.py | 18 ++--- 2 files changed, 52 insertions(+), 34 deletions(-) diff --git a/AntiPattern_Remediator/src/core/agents/explainer.py b/AntiPattern_Remediator/src/core/agents/explainer.py index 4ab86a6..62ee143 100644 --- a/AntiPattern_Remediator/src/core/agents/explainer.py +++ b/AntiPattern_Remediator/src/core/agents/explainer.py @@ -1,11 +1,9 @@ """ -ExplainerAgent — minimal version -- Delegates state handling to create_graph.py -- Uses PromptManager if available; otherwise a tiny inline fallback prompt -- Always passes msgs; always returns a non-empty explanation_json +ExplainerAgent — full-state returns, collision-safe +- Returns a full state dict but NEVER writes 'code' back to the graph. +- Uses PromptManager if available; otherwise falls back to an inline prompt. """ from __future__ import annotations - from typing import Dict, Any import json @@ -22,6 +20,17 @@ def __init__(self, llm: BaseLanguageModel, prompt_manager: PromptManager): self.llm = llm self.prompt_manager = prompt_manager + # Merge helper: return a FULL state but drop keys we must not rewrite. + @staticmethod + def _merge_return_state(state: Dict[str, Any], updates: Dict[str, Any], drop_keys=("code",)) -> Dict[str, Any]: + merged = dict(state) + # don't write to LastValue 'code' to avoid concurrent updates + for k in drop_keys: + if k in merged: + merged.pop(k) + merged.update(updates or {}) + return merged + def explain_antipattern(self, state: Dict[str, Any]) -> Dict[str, Any]: """Generate explanation JSON for detected antipatterns and refactor.""" kwargs = dict( @@ -33,7 +42,7 @@ def explain_antipattern(self, state: Dict[str, Any]) -> Dict[str, Any]: antipattern_name=state.get("antipattern_name", "Unknown antipattern"), antipattern_description=state.get("antipattern_description", ""), antipatterns_json=json.dumps(state.get("antipatterns_json", []), ensure_ascii=False), - msgs=state.get("msgs", []), # ensure MessagesPlaceholder is satisfied + msgs=state.get("msgs", []), ) messages = self._build_messages(**kwargs) @@ -44,44 +53,53 @@ def explain_antipattern(self, state: Dict[str, Any]) -> Dict[str, Any]: except Exception as e: raw = f"LLM error: {e}" - state["explanation_response_raw"] = raw - - # Robust parse: accept dict, wrap list, or emit a minimal fallback + # Robust parse try: parsed = extract_first_json(raw) except Exception: parsed = None if isinstance(parsed, dict): - state["explanation_json"] = parsed + exp_json = parsed elif isinstance(parsed, list): - state["explanation_json"] = {"items": parsed} + exp_json = {"items": parsed} else: - state["explanation_json"] = self._fallback_payload(state) + exp_json = self._fallback_payload(state) - return state + updates = { + "explanation_response_raw": raw, + "explanation_json": exp_json, + } + # ✅ Return FULL state but with 'code' removed to avoid LastValue collision + return self._merge_return_state(state, updates, drop_keys=("code",)) def display_explanation(self, state: Dict[str, Any]) -> Dict[str, Any]: print("\n=== Explanation (raw) ===\n", state.get("explanation_response_raw", "N/A")) if state.get("explanation_json"): - print("\n=== Explanation (JSON) ===\n", - json.dumps(state["explanation_json"], indent=2, ensure_ascii=False)) - return state + print( + "\n=== Explanation (JSON) ===\n", + json.dumps(state["explanation_json"], indent=2, ensure_ascii=False), + ) + # ✅ Return FULL state but again ensure 'code' isn't echoed back + return self._merge_return_state(state, {}, drop_keys=("code",)) def _build_messages(self, **kwargs) -> Any: - # Always ensure msgs exists if "msgs" not in kwargs or kwargs["msgs"] is None: kwargs = {**kwargs, "msgs": []} - # 1) Try preloaded template from PromptManager prompt = None getp = getattr(self.prompt_manager, "get_prompt", None) if callable(getp): prompt = getp(PROMPT_KEY) if prompt is not None: - return prompt.format_messages(**kwargs) - - # 2) Minimal inline fallback + # Your YAML already has doubled braces for the literal JSON schema. + # If a stray KeyError occurs, fall back to inline prompt. + try: + return prompt.format_messages(**kwargs) + except KeyError: + pass + + # Minimal inline fallback schema = { "items": [{ "antipattern_name": "", @@ -99,9 +117,10 @@ def _build_messages(self, **kwargs) -> Any: "closing_summary": "" } content = ( - "Given inputs (JSON):\n" + json.dumps({k: v for k, v in kwargs.items() if k != "msgs"}, ensure_ascii=False) + - "\nRespond with STRICT JSON using exactly this schema:\n" + - json.dumps(schema, ensure_ascii=False) + "Given inputs (JSON):\n" + + json.dumps({k: v for k, v in kwargs.items() if k != "msgs"}, ensure_ascii=False) + + "\nRespond with STRICT JSON using exactly this schema:\n" + + json.dumps(schema, ensure_ascii=False) ) fallback = ChatPromptTemplate.from_messages([ ("system", "Return STRICT JSON only. No commentary."), @@ -112,7 +131,6 @@ def _build_messages(self, **kwargs) -> Any: @staticmethod def _fallback_payload(state: Dict[str, Any]) -> Dict[str, Any]: - """Tiny fallback so downstream never breaks if parsing fails.""" return { "items": [{ "antipattern_name": state.get("antipattern_name", "Unknown antipattern"), diff --git a/AntiPattern_Remediator/src/core/graph/create_graph.py b/AntiPattern_Remediator/src/core/graph/create_graph.py index 0100405..9a0d47d 100644 --- a/AntiPattern_Remediator/src/core/graph/create_graph.py +++ b/AntiPattern_Remediator/src/core/graph/create_graph.py @@ -20,7 +20,6 @@ import os from langsmith import Client from langchain.callbacks.tracers import LangChainTracer -from operator import add from colorama import Fore, Style @@ -37,7 +36,7 @@ def __init__(self, db_manager, prompt_manager: PromptManager, retriever=None, ll **getattr(settings, "parameters", {}) ) - # LangSmith integration + # LangSmith integration (optional) if settings.LLM_PROVIDER in ["ollama", "vllm"] and settings.LANGSMITH_ENABLED: try: os.environ["LANGCHAIN_TRACING_V2"] = "true" @@ -64,7 +63,7 @@ def __init__(self, db_manager, prompt_manager: PromptManager, retriever=None, ll self.prompt_manager = prompt_manager self.conditional_edges = ConditionalEdges() - # assign the instance attribute before use + # Assign the instance attribute before use self.retriever = retriever or self.db_manager.as_retriever() retriever_tool = create_retriever_tool( @@ -76,19 +75,17 @@ def __init__(self, db_manager, prompt_manager: PromptManager, retriever=None, ll # Agents self.agents = { "scanner": AntipatternScanner(retriever_tool, self.llm, self.prompt_manager), - # If your strategist uses the unified invoke-style retriever: "strategist": RefactorStrategist(self.llm, self.prompt_manager, retriever=self.retriever), "transformer": CodeTransformer(self.llm, self.prompt_manager), "reviewer": CodeReviewerAgent(self.llm, self.prompt_manager), - "explainer": ExplainerAgent(self.llm, self.prompt_manager) - + "explainer": ExplainerAgent(self.llm, self.prompt_manager), } # Build the LangGraph workflow self.workflow = self._build_graph() def _build_graph(self): - """Build LangGraph workflow""" + """Build LangGraph workflow and return the compiled graph.""" graph = StateGraph(AgentState) # Scanner: retrieve + analyze @@ -121,8 +118,8 @@ def _build_graph(self): graph.add_edge("display_refactoring_results", "transform_code") graph.add_edge("transform_code", "display_transformed_code") graph.add_edge("display_transformed_code", "review_code") - graph.add_edge("review_code", "explain_antipattern") + # Conditional: either loop back to transform_code or proceed to display_code_review_results graph.add_conditional_edges( "review_code", self.conditional_edges.code_review_condition, @@ -132,8 +129,11 @@ def _build_graph(self): }, ) + # Only reach explainer after the "pass" path completes graph.add_edge("display_code_review_results", "explain_antipattern") graph.add_edge("explain_antipattern", "display_explanation") graph.add_edge("display_explanation", END) - return graph.compile() + # Compile and return + compiled = graph.compile() + return compiled From 6b6b1f025dede3d1901ace7f47349e8beacf09ac Mon Sep 17 00:00:00 2001 From: Avinash Mallick Date: Wed, 27 Aug 2025 19:26:26 +0100 Subject: [PATCH 4/6] Final Commit --- .../src/core/agents/explainer.py | 50 +++++++++++++------ AntiPattern_Remediator/static/tinydb.json | 2 +- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/AntiPattern_Remediator/src/core/agents/explainer.py b/AntiPattern_Remediator/src/core/agents/explainer.py index 62ee143..c95da4d 100644 --- a/AntiPattern_Remediator/src/core/agents/explainer.py +++ b/AntiPattern_Remediator/src/core/agents/explainer.py @@ -9,6 +9,7 @@ from langchain_core.language_models import BaseLanguageModel from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder +from langchain_core.prompts import PromptTemplate from ..prompt import PromptManager from src.core.utils import extract_first_json @@ -35,12 +36,10 @@ def explain_antipattern(self, state: Dict[str, Any]) -> Dict[str, Any]: """Generate explanation JSON for detected antipatterns and refactor.""" kwargs = dict( code=state.get("code", ""), - language=state.get("language", "Java"), context=state.get("context", ""), refactored_code=state.get("refactored_code", ""), - refactor_rationale=state.get("refactor_rationale", ""), - antipattern_name=state.get("antipattern_name", "Unknown antipattern"), - antipattern_description=state.get("antipattern_description", ""), + refactoring_strategy=state.get("refactoring_strategy_results", ""), + antipattern_name=state.get("antipatterns_scanner_results", "Unknown antipattern"), antipatterns_json=json.dumps(state.get("antipatterns_json", []), ensure_ascii=False), msgs=state.get("msgs", []), ) @@ -70,7 +69,7 @@ def explain_antipattern(self, state: Dict[str, Any]) -> Dict[str, Any]: "explanation_response_raw": raw, "explanation_json": exp_json, } - # ✅ Return FULL state but with 'code' removed to avoid LastValue collision + # Return FULL state but with 'code' removed to avoid LastValue collision return self._merge_return_state(state, updates, drop_keys=("code",)) def display_explanation(self, state: Dict[str, Any]) -> Dict[str, Any]: @@ -80,26 +79,36 @@ def display_explanation(self, state: Dict[str, Any]) -> Dict[str, Any]: "\n=== Explanation (JSON) ===\n", json.dumps(state["explanation_json"], indent=2, ensure_ascii=False), ) - # ✅ Return FULL state but again ensure 'code' isn't echoed back + # Return FULL state but again ensure 'code' isn't echoed back return self._merge_return_state(state, {}, drop_keys=("code",)) def _build_messages(self, **kwargs) -> Any: + """ + Build a list of messages for the LLM. + Uses the user‑supplied prompt if available; otherwise falls back to an + inline prompt that safely injects JSON strings via placeholders. + """ if "msgs" not in kwargs or kwargs["msgs"] is None: kwargs = {**kwargs, "msgs": []} + # Try to get a prompt from the PromptManager prompt = None getp = getattr(self.prompt_manager, "get_prompt", None) if callable(getp): prompt = getp(PROMPT_KEY) + if prompt is not None: - # Your YAML already has doubled braces for the literal JSON schema. - # If a stray KeyError occurs, fall back to inline prompt. + # The prompt from PromptManager should already be a PromptTemplate + # or a string that can be formatted safely. try: return prompt.format_messages(**kwargs) except KeyError: + # Fall back if the prompt contains unexpected placeholders pass - # Minimal inline fallback + # ------------------------------------------------------------------ + # Inline fallback – use direct string template instead of nested PromptTemplate + # ------------------------------------------------------------------ schema = { "items": [{ "antipattern_name": "", @@ -116,18 +125,27 @@ def _build_messages(self, **kwargs) -> Any: "trade_offs": [], "closing_summary": "" } - content = ( - "Given inputs (JSON):\n" - + json.dumps({k: v for k, v in kwargs.items() if k != "msgs"}, ensure_ascii=False) - + "\nRespond with STRICT JSON using exactly this schema:\n" - + json.dumps(schema, ensure_ascii=False) + + # Prepare the JSON strings that will be inserted via placeholders + json_input = json.dumps( + {k: v for k, v in kwargs.items() if k != "msgs"}, + ensure_ascii=False ) + json_schema = json.dumps(schema, ensure_ascii=False) + + # Create the ChatPromptTemplate directly with string templates fallback = ChatPromptTemplate.from_messages([ ("system", "Return STRICT JSON only. No commentary."), - ("user", content), + ("user", "Given inputs (JSON):\n{json_input}\nRespond with STRICT JSON using exactly this schema:\n{json_schema}"), MessagesPlaceholder("msgs"), ]) - return fallback.format_messages(**kwargs) + + # Format the messages with the prepared JSON strings + return fallback.format_messages( + json_input=json_input, + json_schema=json_schema, + msgs=kwargs["msgs"] + ) @staticmethod def _fallback_payload(state: Dict[str, Any]) -> Dict[str, Any]: diff --git a/AntiPattern_Remediator/static/tinydb.json b/AntiPattern_Remediator/static/tinydb.json index cd87283..feb76cb 100644 --- a/AntiPattern_Remediator/static/tinydb.json +++ b/AntiPattern_Remediator/static/tinydb.json @@ -1 +1 @@ -{"_default": {"1": {"name": "Deep Nesting", "description": "Deep Nesting occurs when conditional or loop blocks are embedded within one another across multiple levels, creating code with high indentation and complex control flow. While not always increasing cyclomatic complexity linearly, deep nesting significantly raises cognitive complexity (the mental effort required to understand, modify, and debug a method).", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Low readability : The \"arrowhead\" structure makes it hard to trace logic and understand the intended flow.\nHigh cognitive load : Developers must mentally track the conditions that lead to or prevent reaching a particular line of code.\nError-prone maintenance : Adding or modifying logic inside deeply nested blocks increases the risk of missing cases or introducing bugs.\nInhibited reuse and testing : Deep nesting often combines concerns that should be split into smaller, testable methods or units.\nPoor diffs in version control : Even small changes can alter indentation across many lines, making reviews harder.", "remediation": "Guard clauses (early return) : Exit early when preconditions fail, flattening the control flow\nExtract method : Isolate deeply nested blocks into private methods with clear names to separate concerns and reduce depth.\nInvet conditionals : Invert logic to return early or skip unnecessary branches.\nReplace nested loops with streams (Java-specific): Abstract common filtering or mapping logic into declarative operations.\nUse pattern matching : Replace layered `if` chains with clearer structural or type-based matching.\nEncapsulate state checks : Group multiple conditionals into intention-revealing boolean helpers or state objects.", "limitation": "Can conflict with existing code style : Teams unfamiliar with guard clauses may resist early exits or multiple returns.\nRefactoring can obscure logic during transition : Extracted methods must be clearly named to preserve readability and avoid confusion.\nNested logic may be unavoidable in rare edge cases : Complex parsing, state machines, or embedded domain-specific languages may naturally involve deeper control structures.", "type": "antipattern", "source_file": "deep_nesting.json"}, "2": {"name": "Generic Exception handling", "description": "Generic exception handling refers to the use of broad or unspecific catch blocks (catching but ignoring the exceptions). These patterns obscure the true source of errors, suppress the useful debugging information and can unintentionally hide critical failures. This anti-pattern often stems from the desire to keep the code running, but typically leads to weak systems and increased technical debt.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Loss of context : Catching high-level exceptions removes the granularity needed to understand specific failure reasons\nRepeated boilerplate : Developers may re-implement logging, default values, or stream-closing logic instead of using safe utility methods\nViolation of fail-fast principles : Silent or overly generic handling can let critical errors go unnoticed for too long", "remediation": "", "limitation": "Catching too many specific exceptions can bloat the code and reduce readability.\nRefactoring exception handling may require thorough testing to avoid regressions.\nOver-logging exceptions can clutter logs and obscure real issues.\nSecurity-sensitive applications may need specialised exception handling strategies to avoid leaks", "type": "antipattern", "source_file": "generic_exception_handling.json"}, "3": {"name": "Magic Constants", "description": "Magic constants (or magic numbers) are hard-coded literal values (e.g., 3.14, 42, \"admin\") that appear directly in code without context or explanation. These values become problematic when their purpose is unclear, undocumented, or reused inconsistently. While some literals (like 0, 1, or -1) may be self-explanatory in some contexts, others represent thresholds, identifiers, or rules that should be named and documented for clarity.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Poor readability : Developers must guess the meaning of the value, increasing cognitive load.\nLow maintainability : Changing the value requires updating it everywhere, risking inconsistent updates.\nHarder debugging : Literal values lack descriptive meaning in stack traces, logs, or debuggers.\nDuplication : The same literal used in multiple places leads to repeated logic and tighter coupling.\nViolation of DRY : Embeds implicit meaning multiple times without abstraction.", "remediation": "Define Constants - Move literal values into named constants (e.g., const, final, or static readonly).\nUse Enums - Group related constants (e.g., roles, statuses) as enumerations with meaningful names.", "limitation": "Extracting trivial values like 0 or 1 may clutter code and reduce clarity if overdone.\nIn performance-critical code, indirection through constants or functions may introduce slight overhead.\nOver-abstracting unnamed constants (e.g., MAXCOUNT3) may make code harder to understand.\nIf a value is used only once and is self-explanatory, extracting it may be unnecessary overhead.", "type": "antipattern", "source_file": "magic_constants.json"}, "4": {"name": "SRP Violation", "description": "The Single Responsibility Principle (SRP) is one of the SOLID principles of object-oriented design, stating that a class (or, at a lower level, a method) should have only one reason to change (i.e., it should have only one responsibility or concern). A SRP Violation occurs when a class (method) takes on multiple unrelated responsibilities, making it harder to maintain, test, and understand. These violations often result in bloated classes or methods that mix unrelated concerns such as I/O, business logic, error handling, and configuration.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Poor maintainability : Changing one responsibility might inadvertently affect others, introducing bugs.\nLow cohesion : Code with unrelated responsibilities lacks a clear purpose, reducing clarity and reusability.\nDifficult testing : Unit testing becomes more complex as setup may require mocking or initializing unrelated dependencies.\nCode duplication and tight coupling : Responsibilities are harder to reuse or share, often leading to repeated logic or tight inter-class dependencies.\nHarder onboarding : New developers struggle to understand the purpose and scope of large, multi-purpose classes.", "remediation": "Extract Method \\- Split complex methods into smaller, single-purpose private methods.\nEarly Return (Guard Clauses) \\- Use early exits to reduce nested logic and clarify separate responsibilities.\nUse Local Functions/Lambdas \\- Encapsulate small inline logic into local functions for clarity.\nEncapsulate Temporary Variables \\- Move logic-heavy expressions into descriptive helper methods.\nGroup Related Logic \\- Cluster related operations into distinct helper methods within the same class.\nSeparate Concerns in Loops \\- Extract filtering, transforming, and aggregating into distinct steps.\nIsolate Logging/Error Handling \\- Move side-effect code like logging into dedicated private methods (unless the logging is a trivial single line and does not obscure business logic).", "limitation": "Fixing certain SRP violations (especially class-level violations) requires changing public method signatures, creating new classes, and/or breaking interfaces.\nMethods often depend on multiple injected services or shared state; untangling responsibilities might require broader architectural changes.\nLogging, error handling, metrics, and security checks are often scattered across responsibilities and difficult to isolate cleanly at the method level without aspect-oriented programming (AOP) or middleware/interceptor patterns.", "type": "antipattern", "source_file": "srp_violation.json"}, "5": {"name": "Unsafe or Vague Exception Handling", "description": "Reliable exception handling, type safety, and controlled flow are essential forr writing maintainable and robust software. Unsafe or Vague exception handling often results in code that is fragile, difficult to test, and challenging to debug. Instead of providing meaningful error handling or clear separation of concerns, these implementations either hide the underlying problem or use language features in a way that breaks maintainability.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Hidden failures : Catching exceptions without logging or meaningful handling hides the source of issues and makes debugging difficult\nPoor diagnosis : Broad exception handling with vague error messages hides intent and makes it harder to trace the root cause\nInconsistent runtime behaviour : using assertions for control logic can lead to unpredictable behaviour depending on the JVM configuration\nUncontrolled termination : Using system.exit() directly in application logic makes code untestable and prevents proper resource cleanup", "remediation": "Replace assertions with proper condition checks and informative exceptions to ensure consistent behaviour across environments\nAvoid silent catch blocks, log exceptions or rethrow them to preserve error context\nHandle specific exception types instead of catching broad categories like Exception or RuntimeException\nRefactor abrupt shutdown calls into controlled exits using exception handling or return code to support recovery", "limitation": "", "type": "antipattern", "source_file": "unsafe_or_vague_exception_handling.json"}}} \ No newline at end of file +{"_default": {"1": {"name": "Deep Nesting", "description": "Deep Nesting occurs when conditional or loop blocks are embedded within one another across multiple levels, creating code with high indentation and complex control flow. While not always increasing cyclomatic complexity linearly, deep nesting significantly raises cognitive complexity (the mental effort required to understand, modify, and debug a method).", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Low readability : The \"arrowhead\" structure makes it hard to trace logic and understand the intended flow.\nHigh cognitive load : Developers must mentally track the conditions that lead to or prevent reaching a particular line of code.\nError-prone maintenance : Adding or modifying logic inside deeply nested blocks increases the risk of missing cases or introducing bugs.\nInhibited reuse and testing : Deep nesting often combines concerns that should be split into smaller, testable methods or units.\nPoor diffs in version control : Even small changes can alter indentation across many lines, making reviews harder.", "remediation": "Guard clauses (early return) : Exit early when preconditions fail, flattening the control flow\nExtract method : Isolate deeply nested blocks into private methods with clear names to separate concerns and reduce depth.\nInvet conditionals : Invert logic to return early or skip unnecessary branches.\nReplace nested loops with streams (Java-specific): Abstract common filtering or mapping logic into declarative operations.\nUse pattern matching : Replace layered `if` chains with clearer structural or type-based matching.\nEncapsulate state checks : Group multiple conditionals into intention-revealing boolean helpers or state objects.", "limitation": "Can conflict with existing code style : Teams unfamiliar with guard clauses may resist early exits or multiple returns.\nRefactoring can obscure logic during transition : Extracted methods must be clearly named to preserve readability and avoid confusion.\nNested logic may be unavoidable in rare edge cases : Complex parsing, state machines, or embedded domain-specific languages may naturally involve deeper control structures.", "type": "antipattern", "source_file": "deep_nesting.json"}, "2": {"name": "Generic Exception handling", "description": "Generic exception handling refers to the use of broad or unspecific catch blocks (catching but ignoring the exceptions). These patterns obscure the true source of errors, suppress the useful debugging information and can unintentionally hide critical failures. This anti-pattern often stems from the desire to keep the code running, but typically leads to weak systems and increased technical debt.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Loss of context : Catching high-level exceptions removes the granularity needed to understand specific failure reasons\nRepeated boilerplate : Developers may re-implement logging, default values, or stream-closing logic instead of using safe utility methods\nViolation of fail-fast principles : Silent or overly generic handling can let critical errors go unnoticed for too long", "remediation": "", "limitation": "Catching too many specific exceptions can bloat the code and reduce readability.\nRefactoring exception handling may require thorough testing to avoid regressions.\nOver-logging exceptions can clutter logs and obscure real issues.\nSecurity-sensitive applications may need specialised exception handling strategies to avoid leaks", "type": "antipattern", "source_file": "generic_exception_handling.json"}, "3": {"name": "Magic Constants", "description": "Magic constants (or magic numbers) are hard-coded literal values (e.g., 3.14, 42, \"admin\") that appear directly in code without context or explanation. These values become problematic when their purpose is unclear, undocumented, or reused inconsistently. While some literals (like 0, 1, or -1) may be self-explanatory in some contexts, others represent thresholds, identifiers, or rules that should be named and documented for clarity.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Poor readability : Developers must guess the meaning of the value, increasing cognitive load.\nLow maintainability : Changing the value requires updating it everywhere, risking inconsistent updates.\nHarder debugging : Literal values lack descriptive meaning in stack traces, logs, or debuggers.\nDuplication : The same literal used in multiple places leads to repeated logic and tighter coupling.\nViolation of DRY : Embeds implicit meaning multiple times without abstraction.", "remediation": "Define Constants - Move literal values into named constants (e.g., const, final, or static readonly).\nUse Enums - Group related constants (e.g., roles, statuses) as enumerations with meaningful names.", "limitation": "Extracting trivial values like 0 or 1 may clutter code and reduce clarity if overdone.\nIn performance-critical code, indirection through constants or functions may introduce slight overhead.\nOver-abstracting unnamed constants (e.g., MAXCOUNT3) may make code harder to understand.\nIf a value is used only once and is self-explanatory, extracting it may be unnecessary overhead.", "type": "antipattern", "source_file": "magic_constants.json"}, "4": {"name": "Unsafe or Vague Exception Handling", "description": "Reliable exception handling, type safety, and controlled flow are essential forr writing maintainable and robust software. Unsafe or Vague exception handling often results in code that is fragile, difficult to test, and challenging to debug. Instead of providing meaningful error handling or clear separation of concerns, these implementations either hide the underlying problem or use language features in a way that breaks maintainability.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Hidden failures : Catching exceptions without logging or meaningful handling hides the source of issues and makes debugging difficult\nPoor diagnosis : Broad exception handling with vague error messages hides intent and makes it harder to trace the root cause\nInconsistent runtime behaviour : using assertions for control logic can lead to unpredictable behaviour depending on the JVM configuration\nUncontrolled termination : Using system.exit() directly in application logic makes code untestable and prevents proper resource cleanup", "remediation": "Replace assertions with proper condition checks and informative exceptions to ensure consistent behaviour across environments\nAvoid silent catch blocks, log exceptions or rethrow them to preserve error context\nHandle specific exception types instead of catching broad categories like Exception or RuntimeException\nRefactor abrupt shutdown calls into controlled exits using exception handling or return code to support recovery", "limitation": "", "type": "antipattern", "source_file": "unsafe_or_vague_exception_handling.json"}, "5": {"name": "Duplicate Code", "description": "Duplicate Code occurs when identical or very similar code blocks are repeated throughout the codebase. This pattern creates maintenance overhead, increases the likelihood of bugs, and violates the DRY (Don't Repeat Yourself) principle. Common examples include repeated null checks, validation logic, and similar conditional patterns across different methods or classes.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Maintenance overhead : Changes need to be applied in multiple places, increasing the risk of inconsistencies\nBug multiplication : A bug in duplicated code affects multiple locations, making fixes more complex\nCode bloat : Repeated code increases the overall size of the codebase without adding functionality\nViolation of DRY principle : Makes the code harder to understand and reason about", "remediation": "", "limitation": "Over-abstraction can make code harder to understand if the duplication is minimal or contextually different\nPremature extraction of methods may create unnecessary coupling between unrelated parts of the system\nSome duplication might be acceptable if the code serves different business contexts", "type": "antipattern", "source_file": "duplicate_code.json"}, "6": {"name": "God Class", "description": "A God Class anti-pattern refers to a class that centralises too many responsibilities in a single location, becoming overly complex and difficult to maintain. Such classes tend to know too much, do too much, and interact with many different parts of the system. This leads to tightly coupled code, reduces modularity, and makes the system hard to test and extend.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "High coupling: God Classes tend to interact with many other classes and modules, thereby reducing system modularity and increasing the risk of changes.\nPoor maintainability : Large, complex classes are difficult to read, understand, and modify.\nLow reusability : The class becomes so specific and bloated that it is rarely useful outside of its original context.\nHidden dependencies : God Classes often hide dependencies within fields or methods, making the codebase less transparent.", "remediation": "", "limitation": "", "type": "antipattern", "source_file": "god_class.json"}, "7": {"name": "Middle Man", "description": "The Middle Man anti-pattern occurs when a class exists primarily to delegate calls to another class without adding meaningful logic of its own. Essentially, the class acts as a pass-through or proxy, forwarding method calls without adding value. While delegation is sometimes necessary for abstraction, excessive or trivial delegation leads to unnecessary indirection and increases maintenance overhead.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Extra Indirection : Code must go through one more layer, which can complicate understanding the code flow.\nIncreased maintenance : When the delegated class changes, the middle-man class often needs updates for all its pass-through methods.\nLow cohesion : The middle-man class has little real logic, making its purpose unclear\nCode bloat : Many trivial delegation methods clutter the class, making it harder to navigate.\nHarder debugging : Tracing behaviour through multiple layers of delegation adds cognitive load.", "remediation": "", "limitation": "Some delegation is unavoidable, e.g., to implement an interface or provide a stable abstraction layer\nRemoving middleman classes may break existing APIs or require refactoring client code.\nIn some cases, delegation is part of a design pattern (like a proxy), which is intentional and not an anti-pattern", "type": "antipattern", "source_file": "middle_man.json"}, "8": {"name": "Monolithic Method", "description": "A Monolithic Method is a single method that tries to do too much, often combining unrelated responsibilities into one block of code. This is essentially a method-level violation of the Single Responsibility Principle (SRP). Monolithic methods are hard to read, understand, maintain, and test because they mix business logic, I/O, error handling, and other concerns in one place.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Poor maintainability : Modifying one part of the method risks breaking unrelated functionality.\nLow readability : Long, complex methods are hard to follow and understand.\nDifficult testing : Unit tests become cumbersome because the method does too many things at once.\nCode duplication : Reusing logic is difficult; similar tasks often get reimplemented elsewhere.\nTight coupling : Internal details are intertwined, making refactoring risky.", "remediation": "", "limitation": "Refactoring may require changes to public method signatures, especially if other code depends on it.\nSome logic may rely on shared state or multiple services, making it hard to separate without broader architectural changes.\nIf the method handles cross-cutting concerns (e.g., logging, metrics, validation), isolating responsibilities may require AOP or middleware.", "type": "antipattern", "source_file": "monolithic_method.json"}, "9": {"name": "SRP Violation", "description": "The Single Responsibility Principle (SRP) is one of the SOLID principles of object-oriented design, stating that a class (or, at a lower level, a method) should have only one reason to change (i.e., it should have only one responsibility or concern). A SRP Violation occurs when a class (method) takes on multiple unrelated responsibilities, making it harder to maintain, test, and understand. These violations often result in bloated classes or methods that mix unrelated concerns such as I/O, business logic, error handling, and configuration.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Poor maintainability : Changing one responsibility might inadvertently affect others, introducing bugs.\nLow cohesion : Code with unrelated responsibilities lacks a clear purpose, reducing clarity and reusability.\nDifficult testing : Unit testing becomes more complex as setup may require mocking or initializing unrelated dependencies.\nCode duplication and tight coupling : Responsibilities are harder to reuse or share, often leading to repeated logic or tight inter-class dependencies.\nHarder onboarding : New developers struggle to understand the purpose and scope of large, multi-purpose classes.", "remediation": "Extract Method \\- Split complex methods into smaller, single-purpose private methods.\nEarly Return (Guard Clauses) \\- Use early exits to reduce nested logic and clarify separate responsibilities.\nUse Local Functions/Lambdas \\- Encapsulate small inline logic into local functions for clarity.\nEncapsulate Temporary Variables \\- Move logic-heavy expressions into descriptive helper methods.\nGroup Related Logic \\- Cluster related operations into distinct helper methods within the same class.\nSeparate Concerns in Loops \\- Extract filtering, transforming, and aggregating into distinct steps.\nIsolate Logging/Error Handling \\- Move side-effect code like logging into dedicated private methods (unless the logging is a trivial single line and does not obscure business logic).", "limitation": "Fixing certain SRP violations (especially class-level violations) requires changing public method signatures, creating new classes, and/or breaking interfaces.\nMethods often depend on multiple injected services or shared state; untangling responsibilities might require broader architectural changes.\nLogging, error handling, metrics, and security checks are often scattered across responsibilities and difficult to isolate cleanly at the method level without aspect-oriented programming (AOP) or middleware/interceptor patterns.", "type": "antipattern", "source_file": "srp_violation.json"}}} \ No newline at end of file From 123a5bc0b45923a4367c10b57f12ca2f4929f8f5 Mon Sep 17 00:00:00 2001 From: Vamsi Date: Thu, 28 Aug 2025 00:25:06 +0100 Subject: [PATCH 5/6] Add : .md file generation for code snippet, Update: intermediate results to include refactored code --- AntiPattern_Remediator/main.py | 10 +- .../src/core/agents/explainer.py | 1 + .../workflow/results_manager.py | 149 ++++++++++++++---- 3 files changed, 127 insertions(+), 33 deletions(-) diff --git a/AntiPattern_Remediator/main.py b/AntiPattern_Remediator/main.py index 344350d..e788480 100644 --- a/AntiPattern_Remediator/main.py +++ b/AntiPattern_Remediator/main.py @@ -12,6 +12,7 @@ import json from full_repo_workflow import run_full_repo_workflow +from workflow.results_manager import save_intermediate_results def run_code_snippet_workflow(settings, db_manager, prompt_manager, langgraph): @@ -82,13 +83,20 @@ def run_code_snippet_workflow(settings, db_manager, prompt_manager, langgraph): print(f"Analysis completed: {'Yes' if final_state.get('antipatterns_scanner_results') else 'No'}") print(f"Refactored code: {'Yes' if final_state.get('refactored_code') else 'No'}") print(f"Code review results: {final_state.get('code_review_times')}") - + # Show explanation from ExplainerAgent if final_state.get("explanation_json"): print(Fore.CYAN + "\n=== Explanation (JSON) ===" + Style.RESET_ALL) print(json.dumps(final_state["explanation_json"], indent=2, ensure_ascii=False)) else: print(Fore.RED + "\nNo explanation was generated." + Style.RESET_ALL) + + save_intermediate_results( + file_path="java_code_snippet", + final_state=final_state, + settings=settings + ) + def main(): diff --git a/AntiPattern_Remediator/src/core/agents/explainer.py b/AntiPattern_Remediator/src/core/agents/explainer.py index c95da4d..9d38f86 100644 --- a/AntiPattern_Remediator/src/core/agents/explainer.py +++ b/AntiPattern_Remediator/src/core/agents/explainer.py @@ -34,6 +34,7 @@ def _merge_return_state(state: Dict[str, Any], updates: Dict[str, Any], drop_key def explain_antipattern(self, state: Dict[str, Any]) -> Dict[str, Any]: """Generate explanation JSON for detected antipatterns and refactor.""" + print("Preparing to Explain...") kwargs = dict( code=state.get("code", ""), context=state.get("context", ""), diff --git a/AntiPattern_Remediator/workflow/results_manager.py b/AntiPattern_Remediator/workflow/results_manager.py index 187cc25..cb32535 100644 --- a/AntiPattern_Remediator/workflow/results_manager.py +++ b/AntiPattern_Remediator/workflow/results_manager.py @@ -13,38 +13,42 @@ def save_intermediate_results(file_path: str, final_state: dict, settings, results_dir: str = "../processing_results") -> bool: """Save intermediate results from the agentic workflow for analysis in markdown format.""" try: - # Create results directory if it doesn't exist - results_path = Path(results_dir) - results_path.mkdir(parents=True, exist_ok=True) - - # Create a unique filename based on the original file path - file_path_obj = Path(file_path) - - # Extract the meaningful part of the path starting from the repository name - # Find the 'clones' directory and take everything after it - meaningful_path = None - for i, part in enumerate(file_path_obj.parts): - if part == 'clones' and i + 1 < len(file_path_obj.parts): - # Take from the repo name onwards - meaningful_path = Path(*file_path_obj.parts[i+1:]) - break - - if meaningful_path is None: - # Fallback: use just the filename if 'clones' not found - meaningful_path = file_path_obj.name - - # Create a safe filename by replacing path separators and other problematic characters - safe_filename = str(meaningful_path).replace('/', '_').replace('\\', '_').replace(':', '_') - - # Replace .java extension with .md and add results suffix - if safe_filename.endswith('.java'): - safe_filename = safe_filename[:-5] # Remove .java - results_filename = f"{safe_filename}_results.md" - results_file_path = results_path / results_filename - + if file_path != 'java_code_snippet' and not None: + # Create results directory if it doesn't exist + results_path = Path(results_dir) + results_path.mkdir(parents=True, exist_ok=True) + + # Create a unique filename based on the original file path + file_path_obj = Path(file_path) + + # Extract the meaningful part of the path starting from the repository name + # Find the 'clones' directory and take everything after it + meaningful_path = None + for i, part in enumerate(file_path_obj.parts): + if part == 'clones' and i + 1 < len(file_path_obj.parts): + # Take from the repo name onwards + meaningful_path = Path(*file_path_obj.parts[i+1:]) + break + + if meaningful_path is None: + # Fallback: use just the filename if 'clones' not found + meaningful_path = file_path_obj.name + + # Create a safe filename by replacing path separators and other problematic characters + safe_filename = str(meaningful_path).replace('/', '_').replace('\\', '_').replace(':', '_') + + # Replace .java extension with .md and add results suffix + if safe_filename.endswith('.java'): + safe_filename = safe_filename[:-5] # Remove .java + results_filename = f"{safe_filename}_results.md" + results_file_path = results_path / results_filename + else: + results_filename = f"java_code_snippet_results.md" + results_file_path = results_filename + # Generate markdown content timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - markdown_content = f"""# Processing Results: {file_path_obj.name} + markdown_content = f"""# Processing Results: {file_path_obj.name if file_path != 'java_code_snippet' else 'Java Code Snippet'} ## File Information - **Original File Path**: `{file_path}` @@ -94,11 +98,92 @@ def save_intermediate_results(file_path: str, final_state: dict, settings, resul if final_state.get("explanation_json"): explanation_results = final_state.get('explanation_json') markdown_content += "---\n\n## Explanation Results\n\n" - - markdown_content += f"```json\n{json.dumps(explanation_results, indent=2, default=str)}\n```\n\n" + + # Parse and format the explanation JSON + if isinstance(explanation_results, dict): + # Handle individual anti-patterns + items = explanation_results.get('items', []) + if items: + markdown_content += "### Anti-Patterns Addressed\n\n" + + for i, item in enumerate(items, 1): + antipattern_name = item.get('antipattern_name', 'Unknown Pattern') + markdown_content += f"#### {i}. {antipattern_name}\n\n" + + # Loop through all keys in the item (except refactored_code and antipattern_name) + for key, value in item.items(): + if key not in ['antipattern_name', 'refactored_code'] and value: + # Convert snake_case to Title Case for heading + heading = key.replace('_', ' ').title() + markdown_content += f"**{heading}:** {value}\n\n" + + markdown_content += "---\n\n" + + # Handle all other sections dynamically + for key, value in explanation_results.items(): + if key != 'items' and value: # Skip items (already processed) and empty values + # Convert snake_case to Title Case for section heading + section_title = key.replace('_', ' ').title() + markdown_content += f"### {section_title}\n\n" + + if isinstance(value, list): + # Handle list items + for item in value: + markdown_content += f"- {item}\n" + markdown_content += "\n" + else: + # Handle single values + markdown_content += f"{value}\n\n" + else: + # Fallback to JSON if format is unexpected + markdown_content += f"```json\n{json.dumps(explanation_results, indent=2, default=str)}\n```\n\n" else: markdown_content += "No explanation generated.\n\n" + # Add side-by-side code comparison + markdown_content += "---\n\n## Code Comparison\n\n" + + original_code = final_state.get('code', '') + refactored_code = final_state.get('refactored_code', '') + + if original_code or refactored_code: + markdown_content += "### Original Code vs Refactored Code\n\n" + markdown_content += '
\n\n' + + # Original code column + markdown_content += '
\n\n' + markdown_content += "**Original Code:**\n\n" + if original_code: + markdown_content += f"```java\n{original_code}\n```\n\n" + else: + markdown_content += "*No original code available*\n\n" + markdown_content += '
\n\n' + + # Refactored code column + markdown_content += '
\n\n' + markdown_content += "**Refactored Code:**\n\n" + if refactored_code: + markdown_content += f"```java\n{refactored_code}\n```\n\n" + else: + markdown_content += "*No refactored code generated*\n\n" + markdown_content += '
\n\n' + + markdown_content += '
\n\n' + + # Add code comparison summary + if original_code and refactored_code: + original_lines = len(original_code.splitlines()) + refactored_lines = len(refactored_code.splitlines()) + line_change = refactored_lines - original_lines + line_change_str = f"+{line_change}" if line_change > 0 else str(line_change) + + markdown_content += f"**Code Metrics:**\n" + markdown_content += f"- Original Loc: {original_lines}\n" + markdown_content += f"- Refactored Loc: {refactored_lines}\n" + markdown_content += f"- LoC change: {line_change_str}\n\n" + else: + markdown_content += "No code available for comparison.\n\n" + markdown_content += f"---\n\n*Generated by AntiPattern Remediator Tool using {settings.LLM_MODEL}*\n" # Save to markdown file with open(results_file_path, 'w', encoding='utf-8') as f: From b8b77392e04978145f63112aaee00556506f6aa1 Mon Sep 17 00:00:00 2001 From: Avinash Date: Thu, 28 Aug 2025 10:58:39 +0100 Subject: [PATCH 6/6] CI tests updated --- .../unit_test/prompt/test_prompt_manager.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py b/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py index 088af2f..e1ce7e5 100644 --- a/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py +++ b/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py @@ -44,6 +44,7 @@ def __init__(self): self.REFACTOR_STRATEGIST = "refactor_strategist" self.CODE_TRANSFORMER = "code_transformer" self.CODE_REVIEWER = "code_reviewer" + self.EXPLAINER = "explainer" self.prompt_directory = Path(__file__).parent self._prompt_cache = {} self._load_all_prompts() # Call this to match real behavior @@ -87,7 +88,8 @@ def _load_all_prompts(self): self.ANTIPATTERN_SCANNER, self.REFACTOR_STRATEGIST, self.CODE_TRANSFORMER, - self.CODE_REVIEWER + self.CODE_REVIEWER, + self.EXPLAINER ] for prompt_key in prompt_constants: @@ -155,6 +157,7 @@ def test_initialization_creates_correct_attributes(self): assert hasattr(manager, 'REFACTOR_STRATEGIST') assert hasattr(manager, 'CODE_TRANSFORMER') assert hasattr(manager, 'CODE_REVIEWER') + assert hasattr(manager, 'EXPLAINER') assert hasattr(manager, 'prompt_directory') assert hasattr(manager, '_prompt_cache') assert isinstance(manager._prompt_cache, dict) @@ -176,6 +179,7 @@ def test_prompt_constants_have_correct_values(self): assert manager.REFACTOR_STRATEGIST == "refactor_strategist" assert manager.CODE_TRANSFORMER == "code_transformer" assert manager.CODE_REVIEWER == "code_reviewer" + assert manager.EXPLAINER == "explainer" def test_prompt_directory_is_set_correctly(self): """Test that prompt directory is assigned from settings.""" @@ -505,6 +509,7 @@ def test_initialization_with_missing_directory(self, capsys): manager.REFACTOR_STRATEGIST = "refactor_strategist" manager.CODE_TRANSFORMER = "code_transformer" manager.CODE_REVIEWER = "code_reviewer" + manager.EXPLAINER = "explainer" manager.prompt_directory = Path("/non/existent/path") manager._prompt_cache = {} @@ -575,6 +580,12 @@ def temp_prompt_files(self): 'system': 'You are an expert code reviewer.', 'user': 'Review this code for quality and best practices: {code}' } + }, + 'explainer.yaml': { + 'explainer': { + 'system': 'You are a senior software engineer and you primarily explain code', + 'user': 'Explain: {code}\nLang: {language}\nCtx: {context}' + } } } @@ -593,6 +604,7 @@ def test_load_all_prompts_loads_all_available_files(self, temp_prompt_files, cap manager.REFACTOR_STRATEGIST = "refactor_strategist" manager.CODE_TRANSFORMER = "code_transformer" manager.CODE_REVIEWER = "code_reviewer" + manager.EXPLAINER = "explainer" manager.prompt_directory = temp_prompt_files manager._prompt_cache = {} @@ -600,7 +612,7 @@ def test_load_all_prompts_loads_all_available_files(self, temp_prompt_files, cap manager._load_all_prompts() # Assert: Verify all prompts were loaded - assert len(manager._prompt_cache) == 4 + assert len(manager._prompt_cache) == 5 assert "antipattern_scanner" in manager._prompt_cache assert "refactor_strategist" in manager._prompt_cache assert "code_transformer" in manager._prompt_cache @@ -608,7 +620,7 @@ def test_load_all_prompts_loads_all_available_files(self, temp_prompt_files, cap # Verify success message captured = capsys.readouterr() - assert "Successfully loaded 4 prompts" in captured.out + assert "Successfully loaded 5 prompts" in captured.out def test_load_all_prompts_handles_partial_failures(self, capsys): """Test that _load_all_prompts continues loading even if some files are missing.""" @@ -633,6 +645,7 @@ def test_load_all_prompts_handles_partial_failures(self, capsys): manager.REFACTOR_STRATEGIST = "refactor_strategist" manager.CODE_TRANSFORMER = "code_transformer" manager.CODE_REVIEWER = "code_reviewer" + manager.EXPLAINER = "explainer" manager.prompt_directory = temp_path manager._prompt_cache = {}