From ac485a015af8db0c290be2be2f0cf0f85b70b058 Mon Sep 17 00:00:00 2001 From: maoqin Date: Thu, 21 Aug 2025 14:54:01 +0100 Subject: [PATCH 1/9] add sonarqube api tool --- .../src/core/agents/antipattern_scanner.py | 15 +++++++++++++-- .../static/prompt/antipattern_scanner.yaml | 4 ++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py b/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py index a1312a0..50b92d8 100644 --- a/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py +++ b/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py @@ -2,9 +2,14 @@ Antipattern scanner agent for detecting code smells and antipatterns """ +import sys +import os +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..', '..')) + from ..state import AgentState from colorama import Fore, Style from ..prompt import PromptManager +from sonarqube_tool import SonarQubeAPI class AntipatternScanner: @@ -22,7 +27,12 @@ def retrieve_context(self, state: AgentState): search_query = f"Java antipatterns code analysis: {state['code'][:50]}" # Use retriever_tool to get relevant context context = self.tool.invoke({"query": search_query}) - state["context"] = context + api = SonarQubeAPI() + issues = api.get_issues_for_file(project_key="commons-collections", file_path="src/main/java/org/apache/commons/collections4/collection/SynchronizedCollection.java") + solutions = [] + for issue in issues["issues"]: + solutions.append(api.get_rules_and_fix_method(rule_key=issue['rule'])) + state["context"] = {"sonarqube_issues": issues, "search_context": context, "solutions": solutions} print(Fore.GREEN + f"Successfully retrieved relevant context" + Style.RESET_ALL) except Exception as e: print(Fore.RED + f"Error retrieving context: {e}" + Style.RESET_ALL) @@ -39,7 +49,8 @@ def analyze_antipatterns(self, state: AgentState): formatted_messages = prompt_template.format_messages( code=state['code'], - context=state['context'], + context=state['context'].get('search_context', ''), + sonarqube_issues=state['context'].get('solutions', ''), msgs=msgs ) diff --git a/AntiPattern_Remediator/static/prompt/antipattern_scanner.yaml b/AntiPattern_Remediator/static/prompt/antipattern_scanner.yaml index 052d3d3..33587d3 100644 --- a/AntiPattern_Remediator/static/prompt/antipattern_scanner.yaml +++ b/AntiPattern_Remediator/static/prompt/antipattern_scanner.yaml @@ -22,6 +22,10 @@ antipattern_scanner: ```java {code} ``` + Found issues: + ```json + {sonarqube_issues} + ``` Additional context from codebase: {context} From 746052ad8b25cbfbbba60f6fec8732f5e4eb2a99 Mon Sep 17 00:00:00 2001 From: Avinash Date: Fri, 22 Aug 2025 15:51:18 +0100 Subject: [PATCH 2/9] Final changes for explainer agent --- AntiPattern_Remediator/main.py | 31 +++++-- .../src/core/agents/__init__.py | 4 +- .../src/core/agents/explainer.py | 86 +++++++++++++++++++ .../src/core/graph/create_graph.py | 13 ++- .../src/core/prompt/prompt_manager.py | 29 ++++--- AntiPattern_Remediator/src/core/state.py | 2 + .../src/core/utils/__init__.py | 3 + .../src/core/utils/json_utils.py | 27 ++++++ .../static/prompt/explainer.yaml | 42 +++++++++ AntiPattern_Remediator/static/tinydb.json | 2 +- 10 files changed, 213 insertions(+), 26 deletions(-) create mode 100644 AntiPattern_Remediator/src/core/agents/explainer.py create mode 100644 AntiPattern_Remediator/src/core/utils/__init__.py create mode 100644 AntiPattern_Remediator/src/core/utils/json_utils.py create mode 100644 AntiPattern_Remediator/static/prompt/explainer.yaml diff --git a/AntiPattern_Remediator/main.py b/AntiPattern_Remediator/main.py index fc05fdd..cbd3b45 100644 --- a/AntiPattern_Remediator/main.py +++ b/AntiPattern_Remediator/main.py @@ -1,13 +1,12 @@ - """ Main entry point - Legacy Code Migration Tool """ from config.settings import initialize_settings -# from scripts import seed_database from dotenv import load_dotenv load_dotenv() from colorama import Fore, Style + def main(): """Main function: Run antipattern analysis""" @@ -18,9 +17,9 @@ def main(): provider_map = {"1": "ollama", "2": "ibm", "3": "vllm"} provider = provider_map.get(choice, "ollama") # default to ollama - #Let us choose which DB to interact with + # Let us choose which DB to interact with print("Choose your trove: 1) ChromaDB (VectorDB) 2) TinyDB (DocumentDB)") - db_choice = input("Choose 1 or 2: ").strip() + db_choice = input("Choose 1 or 2: ").strip() # Initialize global settings with selected provider settings = initialize_settings(provider) @@ -74,6 +73,8 @@ def main(): } } """ + + # Initial workflow state initial_state = { "code": legacy_code, "context": None, @@ -84,24 +85,29 @@ def main(): "code_review_results": None, "code_review_times": 0, "msgs": [], - "answer": None + "answer": None, + + # ExplainerAgent fields + "explanation_response_raw": None, + "explanation_json": None, } - #Setup Database + # Setup Database if db_choice == "2": print("Seeding TinyDB with AntiPattern Dataset") seed_database.main() db_manager = TinyDBManager() - print("Using TinyDB for knowledge retreival") + print("Using TinyDB for knowledge retrieval") else: vector_db = VectorDBManager() db_manager = vector_db.get_db() - print("Using ChromaDB for knowledge retreival") + print("Using ChromaDB for knowledge retrieval") retriever = db_manager.as_retriever() langgraph = CreateGraph(db_manager, prompt_manager, retriever=retriever).workflow final_state = langgraph.invoke(initial_state) + # Final results summary print(Fore.GREEN + f"\nAnalysis Complete!" + Style.RESET_ALL) print(f"Final state keys: {list(final_state.keys())}") print(f"Context retrieved: {'Yes' if final_state.get('context') else 'No'}") @@ -109,5 +115,14 @@ def main(): print(f"Refactored code: {'Yes' if final_state.get('refactored_code') else 'No'}") print(f"Code review results: {final_state.get('code_review_times')}") + # Show explanation from ExplainerAgent + if final_state.get("explanation_json"): + import json + print(Fore.CYAN + "\n=== Explanation (JSON) ===" + Style.RESET_ALL) + print(json.dumps(final_state["explanation_json"], indent=2, ensure_ascii=False)) + else: + print(Fore.RED + "\nNo explanation was generated." + Style.RESET_ALL) + + if __name__ == "__main__": main() diff --git a/AntiPattern_Remediator/src/core/agents/__init__.py b/AntiPattern_Remediator/src/core/agents/__init__.py index 79c1adc..d5eceaa 100644 --- a/AntiPattern_Remediator/src/core/agents/__init__.py +++ b/AntiPattern_Remediator/src/core/agents/__init__.py @@ -7,10 +7,12 @@ from .refactor_strategist import RefactorStrategist from .code_transformer import CodeTransformer from .code_reviewer import CodeReviewerAgent +from .explainer import ExplainerAgent __all__ = [ "AntipatternScanner", "RefactorStrategist", "CodeTransformer", - "CodeReviewerAgent" + "CodeReviewerAgent", + "ExplainerAgent" ] diff --git a/AntiPattern_Remediator/src/core/agents/explainer.py b/AntiPattern_Remediator/src/core/agents/explainer.py new file mode 100644 index 0000000..9344121 --- /dev/null +++ b/AntiPattern_Remediator/src/core/agents/explainer.py @@ -0,0 +1,86 @@ +""" +ExplainerAgent — minimal version +- Delegates state handling to create_graph.py +- Only builds messages and parses JSON response +- Keeps code minimal and focused +""" +from __future__ import annotations + +from typing import Dict, Any +import json + +from langchain_core.language_models import BaseLanguageModel +from ..prompt import PromptManager +from src.core.utils import extract_first_json + +PROMPT_KEY = "explainer" + + +class ExplainerAgent: + def __init__(self, llm: BaseLanguageModel, prompt_manager: PromptManager): + self.llm = llm + self.prompt_manager = prompt_manager + + def explain_antipattern(self, state: Dict[str, Any]) -> Dict[str, Any]: + """Generate explanation JSON for detected antipatterns and refactor.""" + kwargs = dict( + code=state.get("code", ""), + language=state.get("language", "Java"), + context=state.get("context", ""), + refactored_code=state.get("refactored_code", ""), + refactor_rationale=state.get("refactor_rationale", ""), + antipattern_name=state.get("antipattern_name", "Unknown antipattern"), + antipattern_description=state.get("antipattern_description", ""), + antipatterns_json=json.dumps(state.get("antipatterns_json", []), ensure_ascii=False), + ) + + messages = self._build_messages(**kwargs) + response = self.llm.invoke(messages) + raw = getattr(response, "content", None) or str(response) + state["explanation_response_raw"] = raw + + parsed = extract_first_json(raw) + state["explanation_json"] = parsed if isinstance(parsed, dict) else {} + return state + + def display_explanation(self, state: Dict[str, Any]) -> Dict[str, Any]: + print("\n=== Explanation (raw) ===\n", state.get("explanation_response_raw", "N/A")) + if state.get("explanation_json"): + print("\n=== Explanation (JSON) ===\n", json.dumps(state["explanation_json"], indent=2, ensure_ascii=False)) + return state + + def _build_messages(self, **kwargs) -> Any: + try: + getp = getattr(self.prompt_manager, "get_prompt", None) + if callable(getp): + prompt = getp(PROMPT_KEY) + if prompt is not None: + return prompt.format_messages(**kwargs) + except Exception: + pass + + schema = { + "items": [{ + "antipattern_name": "", + "antipattern_description": "", + "impact": "", + "why_it_is_bad": "", + "how_we_fixed_it": "", + "refactored_code": "", + "summary": "" + }], + "what_changed": [], + "why_better": [], + "principles_applied": [], + "trade_offs": [], + "closing_summary": "" + } + content = ( + "Given inputs (JSON):\n" + json.dumps(kwargs, ensure_ascii=False) + + "\nRespond with STRICT JSON using exactly this schema:\n" + + json.dumps(schema, ensure_ascii=False) + ) + return [ + {"role": "system", "content": "Return STRICT JSON only. No commentary."}, + {"role": "user", "content": content}, + ] diff --git a/AntiPattern_Remediator/src/core/graph/create_graph.py b/AntiPattern_Remediator/src/core/graph/create_graph.py index 9c4cf93..096d403 100644 --- a/AntiPattern_Remediator/src/core/graph/create_graph.py +++ b/AntiPattern_Remediator/src/core/graph/create_graph.py @@ -14,6 +14,7 @@ from ..agents import RefactorStrategist from ..agents import CodeTransformer from ..agents import CodeReviewerAgent +from ..agents import ExplainerAgent # Imports for LangSmith tracing import os @@ -62,7 +63,7 @@ def __init__(self, db_manager, prompt_manager: PromptManager, retriever=None, ll self.prompt_manager = prompt_manager self.conditional_edges = ConditionalEdges() - # ✅ assign the instance attribute before use + # assign the instance attribute before use self.retriever = retriever or self.db_manager.as_retriever() retriever_tool = create_retriever_tool( @@ -78,13 +79,13 @@ def __init__(self, db_manager, prompt_manager: PromptManager, retriever=None, ll "strategist": RefactorStrategist(self.llm, self.prompt_manager, retriever=self.retriever), "transformer": CodeTransformer(self.llm, self.prompt_manager), "reviewer": CodeReviewerAgent(self.llm, self.prompt_manager), + "explainer": ExplainerAgent(self.llm, self.prompt_manager) } # Build the LangGraph workflow self.workflow = self._build_graph() def _build_graph(self): - """Build LangGraph workflow""" graph = StateGraph(AgentState) # Scanner: retrieve + analyze @@ -104,6 +105,10 @@ def _build_graph(self): graph.add_node("review_code", self.agents["reviewer"].review_code) graph.add_node("display_code_review_results", self.agents["reviewer"].display_code_review_results) + # Explainer: final storytelling + graph.add_node("explain_antipattern", self.agents["explainer"].explain_antipattern) + graph.add_node("display_explanation", self.agents["explainer"].display_explanation) + # Topology graph.set_entry_point("retrieve_context") graph.add_edge("retrieve_context", "analyze_antipatterns") @@ -123,6 +128,8 @@ def _build_graph(self): }, ) - graph.add_edge("display_code_review_results", END) + graph.add_edge("display_code_review_results", "explain_antipattern") + graph.add_edge("explain_antipattern", "display_explanation") + graph.add_edge("display_explanation", END) return graph.compile() diff --git a/AntiPattern_Remediator/src/core/prompt/prompt_manager.py b/AntiPattern_Remediator/src/core/prompt/prompt_manager.py index 0650429..1f1abef 100644 --- a/AntiPattern_Remediator/src/core/prompt/prompt_manager.py +++ b/AntiPattern_Remediator/src/core/prompt/prompt_manager.py @@ -7,27 +7,26 @@ class PromptManager: """Manager for handling prompt templates and configurations.""" def __init__(self): - # Prompt key constants, **same as YAML filenames** + # Prompt key constants, **same as YAML filenames (without .yaml)** self.ANTIPATTERN_SCANNER = "antipattern_scanner" - self.REFACTOR_STRATEGIST = "refactor_strategist" + self.REFACTOR_STRATEGIST = "refactor_strategist" self.CODE_TRANSFORMER = "code_transformer" self.CODE_REVIEWER = "code_reviewer" + self.EXPLAINER_AGENT = "explainer" self.prompt_directory = settings.PROMPT_DIR - # Initialize storage for prompt templates self._prompt_cache = {} - # Load prompts on initialization self._load_all_prompts() def _load_all_prompts(self) -> None: """Load all prompt configurations from YAML files.""" try: - # Get all prompt constants and load corresponding files prompt_constants = [ self.ANTIPATTERN_SCANNER, self.REFACTOR_STRATEGIST, self.CODE_TRANSFORMER, self.CODE_REVIEWER, + self.EXPLAINER_AGENT, ] for prompt_key in prompt_constants: @@ -55,12 +54,17 @@ def _load_prompt_from_yaml(self, filename: str, prompt_key: str) -> None: return prompt_config = config[prompt_key] - # Create ChatPromptTemplate - self._prompt_cache[prompt_key] = ChatPromptTemplate([ - ("system", prompt_config.get('system', '')), - ("user", prompt_config.get('user', '')), - MessagesPlaceholder("msgs") - ]) + + # Build messages in (role, content) format + messages = [] + if prompt_config.get("system"): + messages.append(("system", prompt_config["system"])) + if prompt_config.get("user"): + messages.append(("user", prompt_config["user"])) + messages.append(MessagesPlaceholder("msgs")) + + # Use the correct constructor + self._prompt_cache[prompt_key] = ChatPromptTemplate.from_messages(messages) print(f"Loaded prompt '{prompt_key}' from {filename}") except Exception as e: @@ -70,5 +74,4 @@ def get_prompt(self, prompt_key: str) -> Optional[ChatPromptTemplate]: if prompt_key not in self._prompt_cache: print(f"Warning: Prompt '{prompt_key}' not found in cache") return None - - return self._prompt_cache[prompt_key] \ No newline at end of file + return self._prompt_cache[prompt_key] diff --git a/AntiPattern_Remediator/src/core/state.py b/AntiPattern_Remediator/src/core/state.py index c85077f..63a147f 100644 --- a/AntiPattern_Remediator/src/core/state.py +++ b/AntiPattern_Remediator/src/core/state.py @@ -17,3 +17,5 @@ class AgentState(TypedDict): code_review_times: int # Number of times code has been reviewed msgs: List[Dict[str, Any]] # Message history for conversation context answer: Optional[str] # Analysis result + explanation_response_raw: Optional[str] # Raw LLM output from explainer + explanation_json: Optional[Dict[str, Any]] # Parsed JSON explanation diff --git a/AntiPattern_Remediator/src/core/utils/__init__.py b/AntiPattern_Remediator/src/core/utils/__init__.py new file mode 100644 index 0000000..ccba67c --- /dev/null +++ b/AntiPattern_Remediator/src/core/utils/__init__.py @@ -0,0 +1,3 @@ +from .json_utils import extract_first_json + +__all__ = ["extract_first_json"] \ No newline at end of file diff --git a/AntiPattern_Remediator/src/core/utils/json_utils.py b/AntiPattern_Remediator/src/core/utils/json_utils.py new file mode 100644 index 0000000..d2e0b2f --- /dev/null +++ b/AntiPattern_Remediator/src/core/utils/json_utils.py @@ -0,0 +1,27 @@ +import json + +def extract_first_json(text): + """ + Try to extract the first JSON object from a string. + Works if JSON is inside ```json ... ``` fences or just plain text. + """ + if not isinstance(text, str): + return None + + # 1. If the text has fenced JSON like ```json ... ``` + if "```" in text: + parts = text.split("```") + for part in parts: + # Look for JSON-specific fences + if part.strip().lower().startswith("json"): + json_part = part[len("json"):].strip() + try: + return json.loads(json_part) + except Exception: + pass # Try next part + + # 2. If no fenced JSON worked, try to parse the whole text + try: + return json.loads(text.strip()) + except Exception: + return None \ No newline at end of file diff --git a/AntiPattern_Remediator/static/prompt/explainer.yaml b/AntiPattern_Remediator/static/prompt/explainer.yaml new file mode 100644 index 0000000..8963f95 --- /dev/null +++ b/AntiPattern_Remediator/static/prompt/explainer.yaml @@ -0,0 +1,42 @@ +explainer: + template: | + You are a senior software reviewer. + Your job is to explain detected anti-patterns and the applied refactor in a clear, structured way. + Output STRICT JSON only — no commentary outside JSON. + + === Inputs === + Language: {language} + Context: {context} + Detected Anti-patterns (JSON): {antipatterns_json} + Code: + ```{code}``` + Refactored Code: + ```{refactored_code}``` + Refactor Rationale: + {refactor_rationale} + + === Required Output Schema === + { + "items": [ + { + "antipattern_name": "", + "antipattern_description": "", + "impact": "", + "why_it_is_bad": "", + "how_we_fixed_it": "", + "refactored_code": "", + "summary": "" + } + ], + "what_changed": [], + "why_better": [], + "principles_applied": [], + "trade_offs": [], + "closing_summary": "" + } + + Notes: + - Always return valid JSON. + - Use multiple entries under "items" if more than one antipattern is relevant. + - Keep `refactored_code` short (or truncated if needed). + - Fill all fields, even if briefly. diff --git a/AntiPattern_Remediator/static/tinydb.json b/AntiPattern_Remediator/static/tinydb.json index cd87283..8f6198a 100644 --- a/AntiPattern_Remediator/static/tinydb.json +++ b/AntiPattern_Remediator/static/tinydb.json @@ -1 +1 @@ -{"_default": {"1": {"name": "Deep Nesting", "description": "Deep Nesting occurs when conditional or loop blocks are embedded within one another across multiple levels, creating code with high indentation and complex control flow. While not always increasing cyclomatic complexity linearly, deep nesting significantly raises cognitive complexity (the mental effort required to understand, modify, and debug a method).", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Low readability : The \"arrowhead\" structure makes it hard to trace logic and understand the intended flow.\nHigh cognitive load : Developers must mentally track the conditions that lead to or prevent reaching a particular line of code.\nError-prone maintenance : Adding or modifying logic inside deeply nested blocks increases the risk of missing cases or introducing bugs.\nInhibited reuse and testing : Deep nesting often combines concerns that should be split into smaller, testable methods or units.\nPoor diffs in version control : Even small changes can alter indentation across many lines, making reviews harder.", "remediation": "Guard clauses (early return) : Exit early when preconditions fail, flattening the control flow\nExtract method : Isolate deeply nested blocks into private methods with clear names to separate concerns and reduce depth.\nInvet conditionals : Invert logic to return early or skip unnecessary branches.\nReplace nested loops with streams (Java-specific): Abstract common filtering or mapping logic into declarative operations.\nUse pattern matching : Replace layered `if` chains with clearer structural or type-based matching.\nEncapsulate state checks : Group multiple conditionals into intention-revealing boolean helpers or state objects.", "limitation": "Can conflict with existing code style : Teams unfamiliar with guard clauses may resist early exits or multiple returns.\nRefactoring can obscure logic during transition : Extracted methods must be clearly named to preserve readability and avoid confusion.\nNested logic may be unavoidable in rare edge cases : Complex parsing, state machines, or embedded domain-specific languages may naturally involve deeper control structures.", "type": "antipattern", "source_file": "deep_nesting.json"}, "2": {"name": "Generic Exception handling", "description": "Generic exception handling refers to the use of broad or unspecific catch blocks (catching but ignoring the exceptions). These patterns obscure the true source of errors, suppress the useful debugging information and can unintentionally hide critical failures. This anti-pattern often stems from the desire to keep the code running, but typically leads to weak systems and increased technical debt.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Loss of context : Catching high-level exceptions removes the granularity needed to understand specific failure reasons\nRepeated boilerplate : Developers may re-implement logging, default values, or stream-closing logic instead of using safe utility methods\nViolation of fail-fast principles : Silent or overly generic handling can let critical errors go unnoticed for too long", "remediation": "", "limitation": "Catching too many specific exceptions can bloat the code and reduce readability.\nRefactoring exception handling may require thorough testing to avoid regressions.\nOver-logging exceptions can clutter logs and obscure real issues.\nSecurity-sensitive applications may need specialised exception handling strategies to avoid leaks", "type": "antipattern", "source_file": "generic_exception_handling.json"}, "3": {"name": "Magic Constants", "description": "Magic constants (or magic numbers) are hard-coded literal values (e.g., 3.14, 42, \"admin\") that appear directly in code without context or explanation. These values become problematic when their purpose is unclear, undocumented, or reused inconsistently. While some literals (like 0, 1, or -1) may be self-explanatory in some contexts, others represent thresholds, identifiers, or rules that should be named and documented for clarity.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Poor readability : Developers must guess the meaning of the value, increasing cognitive load.\nLow maintainability : Changing the value requires updating it everywhere, risking inconsistent updates.\nHarder debugging : Literal values lack descriptive meaning in stack traces, logs, or debuggers.\nDuplication : The same literal used in multiple places leads to repeated logic and tighter coupling.\nViolation of DRY : Embeds implicit meaning multiple times without abstraction.", "remediation": "Define Constants - Move literal values into named constants (e.g., const, final, or static readonly).\nUse Enums - Group related constants (e.g., roles, statuses) as enumerations with meaningful names.", "limitation": "Extracting trivial values like 0 or 1 may clutter code and reduce clarity if overdone.\nIn performance-critical code, indirection through constants or functions may introduce slight overhead.\nOver-abstracting unnamed constants (e.g., MAXCOUNT3) may make code harder to understand.\nIf a value is used only once and is self-explanatory, extracting it may be unnecessary overhead.", "type": "antipattern", "source_file": "magic_constants.json"}, "4": {"name": "SRP Violation", "description": "The Single Responsibility Principle (SRP) is one of the SOLID principles of object-oriented design, stating that a class (or, at a lower level, a method) should have only one reason to change (i.e., it should have only one responsibility or concern). A SRP Violation occurs when a class (method) takes on multiple unrelated responsibilities, making it harder to maintain, test, and understand. These violations often result in bloated classes or methods that mix unrelated concerns such as I/O, business logic, error handling, and configuration.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Poor maintainability : Changing one responsibility might inadvertently affect others, introducing bugs.\nLow cohesion : Code with unrelated responsibilities lacks a clear purpose, reducing clarity and reusability.\nDifficult testing : Unit testing becomes more complex as setup may require mocking or initializing unrelated dependencies.\nCode duplication and tight coupling : Responsibilities are harder to reuse or share, often leading to repeated logic or tight inter-class dependencies.\nHarder onboarding : New developers struggle to understand the purpose and scope of large, multi-purpose classes.", "remediation": "Extract Method \\- Split complex methods into smaller, single-purpose private methods.\nEarly Return (Guard Clauses) \\- Use early exits to reduce nested logic and clarify separate responsibilities.\nUse Local Functions/Lambdas \\- Encapsulate small inline logic into local functions for clarity.\nEncapsulate Temporary Variables \\- Move logic-heavy expressions into descriptive helper methods.\nGroup Related Logic \\- Cluster related operations into distinct helper methods within the same class.\nSeparate Concerns in Loops \\- Extract filtering, transforming, and aggregating into distinct steps.\nIsolate Logging/Error Handling \\- Move side-effect code like logging into dedicated private methods (unless the logging is a trivial single line and does not obscure business logic).", "limitation": "Fixing certain SRP violations (especially class-level violations) requires changing public method signatures, creating new classes, and/or breaking interfaces.\nMethods often depend on multiple injected services or shared state; untangling responsibilities might require broader architectural changes.\nLogging, error handling, metrics, and security checks are often scattered across responsibilities and difficult to isolate cleanly at the method level without aspect-oriented programming (AOP) or middleware/interceptor patterns.", "type": "antipattern", "source_file": "srp_violation.json"}, "5": {"name": "Unsafe or Vague Exception Handling", "description": "Reliable exception handling, type safety, and controlled flow are essential forr writing maintainable and robust software. Unsafe or Vague exception handling often results in code that is fragile, difficult to test, and challenging to debug. Instead of providing meaningful error handling or clear separation of concerns, these implementations either hide the underlying problem or use language features in a way that breaks maintainability.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Hidden failures : Catching exceptions without logging or meaningful handling hides the source of issues and makes debugging difficult\nPoor diagnosis : Broad exception handling with vague error messages hides intent and makes it harder to trace the root cause\nInconsistent runtime behaviour : using assertions for control logic can lead to unpredictable behaviour depending on the JVM configuration\nUncontrolled termination : Using system.exit() directly in application logic makes code untestable and prevents proper resource cleanup", "remediation": "Replace assertions with proper condition checks and informative exceptions to ensure consistent behaviour across environments\nAvoid silent catch blocks, log exceptions or rethrow them to preserve error context\nHandle specific exception types instead of catching broad categories like Exception or RuntimeException\nRefactor abrupt shutdown calls into controlled exits using exception handling or return code to support recovery", "limitation": "", "type": "antipattern", "source_file": "unsafe_or_vague_exception_handling.json"}}} \ No newline at end of file +{"_default": {"1": {"name": "Monolithic Method", "description": "A Monolithic Method is a single method that tries to do too much, often combining unrelated responsibilities into one block of code. This is essentially a method-level violation of the Single Responsibility Principle (SRP). Monolithic methods are hard to read, understand, maintain, and test because they mix business logic, I/O, error handling, and other concerns in one place.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Poor maintainability : Modifying one part of the method risks breaking unrelated functionality.\nLow readability : Long, complex methods are hard to follow and understand.\nDifficult testing : Unit tests become cumbersome because the method does too many things at once.\nCode duplication : Reusing logic is difficult; similar tasks often get reimplemented elsewhere.\nTight coupling : Internal details are intertwined, making refactoring risky.", "remediation": "", "limitation": "Refactoring may require changes to public method signatures, especially if other code depends on it.\nSome logic may rely on shared state or multiple services, making it hard to separate without broader architectural changes.\nIf the method handles cross-cutting concerns (e.g., logging, metrics, validation), isolating responsibilities may require AOP or middleware.", "type": "antipattern", "source_file": "monolithic_method.json"}, "2": {"name": "Unsafe or Vague Exception Handling", "description": "Reliable exception handling, type safety, and controlled flow are essential forr writing maintainable and robust software. Unsafe or Vague exception handling often results in code that is fragile, difficult to test, and challenging to debug. Instead of providing meaningful error handling or clear separation of concerns, these implementations either hide the underlying problem or use language features in a way that breaks maintainability.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Hidden failures : Catching exceptions without logging or meaningful handling hides the source of issues and makes debugging difficult\nPoor diagnosis : Broad exception handling with vague error messages hides intent and makes it harder to trace the root cause\nInconsistent runtime behaviour : using assertions for control logic can lead to unpredictable behaviour depending on the JVM configuration\nUncontrolled termination : Using system.exit() directly in application logic makes code untestable and prevents proper resource cleanup", "remediation": "Replace assertions with proper condition checks and informative exceptions to ensure consistent behaviour across environments\nAvoid silent catch blocks, log exceptions or rethrow them to preserve error context\nHandle specific exception types instead of catching broad categories like Exception or RuntimeException\nRefactor abrupt shutdown calls into controlled exits using exception handling or return code to support recovery", "limitation": "", "type": "antipattern", "source_file": "unsafe_or_vague_exception_handling.json"}, "3": {"name": "Deep Nesting", "description": "Deep Nesting occurs when conditional or loop blocks are embedded within one another across multiple levels, creating code with high indentation and complex control flow. While not always increasing cyclomatic complexity linearly, deep nesting significantly raises cognitive complexity (the mental effort required to understand, modify, and debug a method).", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Low readability : The \"arrowhead\" structure makes it hard to trace logic and understand the intended flow.\nHigh cognitive load : Developers must mentally track the conditions that lead to or prevent reaching a particular line of code.\nError-prone maintenance : Adding or modifying logic inside deeply nested blocks increases the risk of missing cases or introducing bugs.\nInhibited reuse and testing : Deep nesting often combines concerns that should be split into smaller, testable methods or units.\nPoor diffs in version control : Even small changes can alter indentation across many lines, making reviews harder.", "remediation": "Guard clauses (early return) : Exit early when preconditions fail, flattening the control flow\nExtract method : Isolate deeply nested blocks into private methods with clear names to separate concerns and reduce depth.\nInvet conditionals : Invert logic to return early or skip unnecessary branches.\nReplace nested loops with streams (Java-specific): Abstract common filtering or mapping logic into declarative operations.\nUse pattern matching : Replace layered `if` chains with clearer structural or type-based matching.\nEncapsulate state checks : Group multiple conditionals into intention-revealing boolean helpers or state objects.", "limitation": "Can conflict with existing code style : Teams unfamiliar with guard clauses may resist early exits or multiple returns.\nRefactoring can obscure logic during transition : Extracted methods must be clearly named to preserve readability and avoid confusion.\nNested logic may be unavoidable in rare edge cases : Complex parsing, state machines, or embedded domain-specific languages may naturally involve deeper control structures.", "type": "antipattern", "source_file": "deep_nesting.json"}, "4": {"name": "SRP Violation", "description": "The Single Responsibility Principle (SRP) is one of the SOLID principles of object-oriented design, stating that a class (or, at a lower level, a method) should have only one reason to change (i.e., it should have only one responsibility or concern). A SRP Violation occurs when a class (method) takes on multiple unrelated responsibilities, making it harder to maintain, test, and understand. These violations often result in bloated classes or methods that mix unrelated concerns such as I/O, business logic, error handling, and configuration.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Poor maintainability : Changing one responsibility might inadvertently affect others, introducing bugs.\nLow cohesion : Code with unrelated responsibilities lacks a clear purpose, reducing clarity and reusability.\nDifficult testing : Unit testing becomes more complex as setup may require mocking or initializing unrelated dependencies.\nCode duplication and tight coupling : Responsibilities are harder to reuse or share, often leading to repeated logic or tight inter-class dependencies.\nHarder onboarding : New developers struggle to understand the purpose and scope of large, multi-purpose classes.", "remediation": "Extract Method \\- Split complex methods into smaller, single-purpose private methods.\nEarly Return (Guard Clauses) \\- Use early exits to reduce nested logic and clarify separate responsibilities.\nUse Local Functions/Lambdas \\- Encapsulate small inline logic into local functions for clarity.\nEncapsulate Temporary Variables \\- Move logic-heavy expressions into descriptive helper methods.\nGroup Related Logic \\- Cluster related operations into distinct helper methods within the same class.\nSeparate Concerns in Loops \\- Extract filtering, transforming, and aggregating into distinct steps.\nIsolate Logging/Error Handling \\- Move side-effect code like logging into dedicated private methods (unless the logging is a trivial single line and does not obscure business logic).", "limitation": "Fixing certain SRP violations (especially class-level violations) requires changing public method signatures, creating new classes, and/or breaking interfaces.\nMethods often depend on multiple injected services or shared state; untangling responsibilities might require broader architectural changes.\nLogging, error handling, metrics, and security checks are often scattered across responsibilities and difficult to isolate cleanly at the method level without aspect-oriented programming (AOP) or middleware/interceptor patterns.", "type": "antipattern", "source_file": "srp_violation.json"}, "5": {"name": "Magic Constants", "description": "Magic constants (or magic numbers) are hard-coded literal values (e.g., 3.14, 42, \"admin\") that appear directly in code without context or explanation. These values become problematic when their purpose is unclear, undocumented, or reused inconsistently. While some literals (like 0, 1, or -1) may be self-explanatory in some contexts, others represent thresholds, identifiers, or rules that should be named and documented for clarity.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Poor readability : Developers must guess the meaning of the value, increasing cognitive load.\nLow maintainability : Changing the value requires updating it everywhere, risking inconsistent updates.\nHarder debugging : Literal values lack descriptive meaning in stack traces, logs, or debuggers.\nDuplication : The same literal used in multiple places leads to repeated logic and tighter coupling.\nViolation of DRY : Embeds implicit meaning multiple times without abstraction.", "remediation": "Define Constants - Move literal values into named constants (e.g., const, final, or static readonly).\nUse Enums - Group related constants (e.g., roles, statuses) as enumerations with meaningful names.", "limitation": "Extracting trivial values like 0 or 1 may clutter code and reduce clarity if overdone.\nIn performance-critical code, indirection through constants or functions may introduce slight overhead.\nOver-abstracting unnamed constants (e.g., MAXCOUNT3) may make code harder to understand.\nIf a value is used only once and is self-explanatory, extracting it may be unnecessary overhead.", "type": "antipattern", "source_file": "magic_constants.json"}, "6": {"name": "Duplicate Code", "description": "Duplicate Code occurs when identical or very similar code blocks are repeated throughout the codebase. This pattern creates maintenance overhead, increases the likelihood of bugs, and violates the DRY (Don't Repeat Yourself) principle. Common examples include repeated null checks, validation logic, and similar conditional patterns across different methods or classes.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Maintenance overhead : Changes need to be applied in multiple places, increasing the risk of inconsistencies\nBug multiplication : A bug in duplicated code affects multiple locations, making fixes more complex\nCode bloat : Repeated code increases the overall size of the codebase without adding functionality\nViolation of DRY principle : Makes the code harder to understand and reason about", "remediation": "", "limitation": "Over-abstraction can make code harder to understand if the duplication is minimal or contextually different\nPremature extraction of methods may create unnecessary coupling between unrelated parts of the system\nSome duplication might be acceptable if the code serves different business contexts", "type": "antipattern", "source_file": "duplicate_code.json"}, "7": {"name": "God Class", "description": "A God Class anti-pattern refers to a class that centralises too many responsibilities in a single location, becoming overly complex and difficult to maintain. Such classes tend to know too much, do too much, and interact with many different parts of the system. This leads to tightly coupled code, reduces modularity, and makes the system hard to test and extend.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "High coupling: God Classes tend to interact with many other classes and modules, thereby reducing system modularity and increasing the risk of changes.\nPoor maintainability : Large, complex classes are difficult to read, understand, and modify.\nLow reusability : The class becomes so specific and bloated that it is rarely useful outside of its original context.\nHidden dependencies : God Classes often hide dependencies within fields or methods, making the codebase less transparent.", "remediation": "", "limitation": "", "type": "antipattern", "source_file": "god_class.json"}, "8": {"name": "Middle Man", "description": "The Middle Man anti-pattern occurs when a class exists primarily to delegate calls to another class without adding meaningful logic of its own. Essentially, the class acts as a pass-through or proxy, forwarding method calls without adding value. While delegation is sometimes necessary for abstraction, excessive or trivial delegation leads to unnecessary indirection and increases maintenance overhead.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Extra Indirection : Code must go through one more layer, which can complicate understanding the code flow.\nIncreased maintenance : When the delegated class changes, the middle-man class often needs updates for all its pass-through methods.\nLow cohesion : The middle-man class has little real logic, making its purpose unclear\nCode bloat : Many trivial delegation methods clutter the class, making it harder to navigate.\nHarder debugging : Tracing behaviour through multiple layers of delegation adds cognitive load.", "remediation": "", "limitation": "Some delegation is unavoidable, e.g., to implement an interface or provide a stable abstraction layer\nRemoving middleman classes may break existing APIs or require refactoring client code.\nIn some cases, delegation is part of a design pattern (like a proxy), which is intentional and not an anti-pattern", "type": "antipattern", "source_file": "middle_man.json"}, "9": {"name": "Generic Exception handling", "description": "Generic exception handling refers to the use of broad or unspecific catch blocks (catching but ignoring the exceptions). These patterns obscure the true source of errors, suppress the useful debugging information and can unintentionally hide critical failures. This anti-pattern often stems from the desire to keep the code running, but typically leads to weak systems and increased technical debt.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Loss of context : Catching high-level exceptions removes the granularity needed to understand specific failure reasons\nRepeated boilerplate : Developers may re-implement logging, default values, or stream-closing logic instead of using safe utility methods\nViolation of fail-fast principles : Silent or overly generic handling can let critical errors go unnoticed for too long", "remediation": "", "limitation": "Catching too many specific exceptions can bloat the code and reduce readability.\nRefactoring exception handling may require thorough testing to avoid regressions.\nOver-logging exceptions can clutter logs and obscure real issues.\nSecurity-sensitive applications may need specialised exception handling strategies to avoid leaks", "type": "antipattern", "source_file": "generic_exception_handling.json"}}} \ No newline at end of file From e921bf9356e513ba6b8a2792da4fe7ae83416140 Mon Sep 17 00:00:00 2001 From: Avinash Date: Sat, 23 Aug 2025 10:36:01 +0100 Subject: [PATCH 3/9] Unit test updated for explainer --- .../src/core/prompt/prompt_manager.py | 31 ++++++++++--------- .../unit_test/prompt/test_prompt_manager.py | 20 ++++++++++-- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/AntiPattern_Remediator/src/core/prompt/prompt_manager.py b/AntiPattern_Remediator/src/core/prompt/prompt_manager.py index 1f1abef..d7fc7dd 100644 --- a/AntiPattern_Remediator/src/core/prompt/prompt_manager.py +++ b/AntiPattern_Remediator/src/core/prompt/prompt_manager.py @@ -41,34 +41,37 @@ def _load_all_prompts(self) -> None: def _load_prompt_from_yaml(self, filename: str, prompt_key: str) -> None: """Load a prompt configuration from a YAML file.""" yaml_path = self.prompt_directory / filename - + if not yaml_path.exists(): print(f"Warning: Prompt file {yaml_path} not found") return - + try: with open(yaml_path, 'r', encoding='utf-8') as file: config = yaml.safe_load(file) - if prompt_key not in config: + + if not config or prompt_key not in config: print(f"Warning: Section '{prompt_key}' not found in {filename}") return - - prompt_config = config[prompt_key] - # Build messages in (role, content) format - messages = [] - if prompt_config.get("system"): - messages.append(("system", prompt_config["system"])) - if prompt_config.get("user"): - messages.append(("user", prompt_config["user"])) - messages.append(MessagesPlaceholder("msgs")) + prompt_config = config.get(prompt_key) or {} + + # Always include System first (empty string if not provided) to satisfy tests + system_text = str(prompt_config.get("system", "") or "") + user_text = str(prompt_config.get("user", "") or "") + + messages = [ + ("system", system_text), # always present (possibly empty) + ("user", user_text), # always present (possibly empty) + MessagesPlaceholder("msgs"), # conversation history + ] - # Use the correct constructor self._prompt_cache[prompt_key] = ChatPromptTemplate.from_messages(messages) print(f"Loaded prompt '{prompt_key}' from {filename}") - + except Exception as e: print(f"Error loading prompt from {filename}: {e}") + def get_prompt(self, prompt_key: str) -> Optional[ChatPromptTemplate]: if prompt_key not in self._prompt_cache: diff --git a/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py b/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py index 088af2f..6e859c4 100644 --- a/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py +++ b/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py @@ -44,6 +44,7 @@ def __init__(self): self.REFACTOR_STRATEGIST = "refactor_strategist" self.CODE_TRANSFORMER = "code_transformer" self.CODE_REVIEWER = "code_reviewer" + self.EXPLAINER_AGENT = "explainer" self.prompt_directory = Path(__file__).parent self._prompt_cache = {} self._load_all_prompts() # Call this to match real behavior @@ -87,7 +88,8 @@ def _load_all_prompts(self): self.ANTIPATTERN_SCANNER, self.REFACTOR_STRATEGIST, self.CODE_TRANSFORMER, - self.CODE_REVIEWER + self.CODE_REVIEWER, + self.EXPLAINER_AGENT, ] for prompt_key in prompt_constants: @@ -155,6 +157,7 @@ def test_initialization_creates_correct_attributes(self): assert hasattr(manager, 'REFACTOR_STRATEGIST') assert hasattr(manager, 'CODE_TRANSFORMER') assert hasattr(manager, 'CODE_REVIEWER') + assert hasattr(manager, 'EXPLAINER_AGENT') assert hasattr(manager, 'prompt_directory') assert hasattr(manager, '_prompt_cache') assert isinstance(manager._prompt_cache, dict) @@ -176,6 +179,7 @@ def test_prompt_constants_have_correct_values(self): assert manager.REFACTOR_STRATEGIST == "refactor_strategist" assert manager.CODE_TRANSFORMER == "code_transformer" assert manager.CODE_REVIEWER == "code_reviewer" + assert manager.EXPLAINER_AGENT == "explainer" def test_prompt_directory_is_set_correctly(self): """Test that prompt directory is assigned from settings.""" @@ -505,6 +509,7 @@ def test_initialization_with_missing_directory(self, capsys): manager.REFACTOR_STRATEGIST = "refactor_strategist" manager.CODE_TRANSFORMER = "code_transformer" manager.CODE_REVIEWER = "code_reviewer" + manager.EXPLAINER_AGENT = "explainer" manager.prompt_directory = Path("/non/existent/path") manager._prompt_cache = {} @@ -575,6 +580,12 @@ def temp_prompt_files(self): 'system': 'You are an expert code reviewer.', 'user': 'Review this code for quality and best practices: {code}' } + }, + 'explainer.yaml': { + 'explainer': { + 'system': 'You are a senior software reviewer.', + 'user': 'Explain: {code}\nLang: {language}\nCtx: {context}' + } } } @@ -593,6 +604,7 @@ def test_load_all_prompts_loads_all_available_files(self, temp_prompt_files, cap manager.REFACTOR_STRATEGIST = "refactor_strategist" manager.CODE_TRANSFORMER = "code_transformer" manager.CODE_REVIEWER = "code_reviewer" + manager.EXPLAINER_AGENT = "explainer" manager.prompt_directory = temp_prompt_files manager._prompt_cache = {} @@ -600,15 +612,16 @@ def test_load_all_prompts_loads_all_available_files(self, temp_prompt_files, cap manager._load_all_prompts() # Assert: Verify all prompts were loaded - assert len(manager._prompt_cache) == 4 + assert len(manager._prompt_cache) == 5 assert "antipattern_scanner" in manager._prompt_cache assert "refactor_strategist" in manager._prompt_cache assert "code_transformer" in manager._prompt_cache assert "code_reviewer" in manager._prompt_cache + assert "explainer" in manager._prompt_cache # Verify success message captured = capsys.readouterr() - assert "Successfully loaded 4 prompts" in captured.out + assert "Successfully loaded 5 prompts" in captured.out def test_load_all_prompts_handles_partial_failures(self, capsys): """Test that _load_all_prompts continues loading even if some files are missing.""" @@ -633,6 +646,7 @@ def test_load_all_prompts_handles_partial_failures(self, capsys): manager.REFACTOR_STRATEGIST = "refactor_strategist" manager.CODE_TRANSFORMER = "code_transformer" manager.CODE_REVIEWER = "code_reviewer" + manager.EXPLAINER_AGENT = "explainer" manager.prompt_directory = temp_path manager._prompt_cache = {} From 5eebde8156f44a186bd04800556490e3c13431c8 Mon Sep 17 00:00:00 2001 From: Vamsi Date: Mon, 25 Aug 2025 13:27:23 +0100 Subject: [PATCH 4/9] Add Full Repository Run Functionality - Added full repo workflow and required utility functions - Updated Anti-pattern scanner to include SonarQube context - Updated code transformer to strip docstrings from refactored code while using Granite 3.3:8b --- AntiPattern_Remediator/full_repo_workflow.py | 293 ++++++++++++++++++ AntiPattern_Remediator/main.py | 104 ++++--- .../src/core/agents/antipattern_scanner.py | 23 +- .../src/core/agents/code_transformer.py | 26 ++ AntiPattern_Remediator/src/core/state.py | 1 + .../static/prompt/antipattern_scanner.yaml | 2 + .../static/prompt/code_transformer.yaml | 5 + AntiPattern_Remediator/workflow/__init__.py | 11 + .../workflow/backup_manager.py | 54 ++++ .../workflow/file_operations.py | 40 +++ .../workflow/results_manager.py | 155 +++++++++ .../workflow/workflow_utils.py | 70 +++++ 12 files changed, 746 insertions(+), 38 deletions(-) create mode 100644 AntiPattern_Remediator/full_repo_workflow.py create mode 100644 AntiPattern_Remediator/workflow/__init__.py create mode 100644 AntiPattern_Remediator/workflow/backup_manager.py create mode 100644 AntiPattern_Remediator/workflow/file_operations.py create mode 100644 AntiPattern_Remediator/workflow/results_manager.py create mode 100644 AntiPattern_Remediator/workflow/workflow_utils.py diff --git a/AntiPattern_Remediator/full_repo_workflow.py b/AntiPattern_Remediator/full_repo_workflow.py new file mode 100644 index 0000000..69ddb5a --- /dev/null +++ b/AntiPattern_Remediator/full_repo_workflow.py @@ -0,0 +1,293 @@ +""" +Full Repository Workflow - Process files with 100% test coverage from JaCoCo results +""" +from colorama import Fore, Style +from pathlib import Path +import os + +# Import workflow utilities +from workflow.workflow_utils import parse_antipattern_results, get_repository_paths_from_files +from workflow.backup_manager import create_repository_backup +from workflow.results_manager import save_intermediate_results, create_processing_summary +from workflow.file_operations import read_java_file, save_refactored_code + + + +def read_jacoco_results(jacoco_results_dir: str = "../jacoco_results") -> list: + """Read the list of files with 100% coverage from JaCoCo results.""" + results_path = Path(jacoco_results_dir) + + if not results_path.exists(): + print(Fore.RED + f"JaCoCo results directory not found: {results_path}" + Style.RESET_ALL) + return [] + + # Find all result files + all_files = list(results_path.glob("*.txt")) + if not all_files: + print(Fore.RED + f"No JaCoCo result files found in: {results_path}" + Style.RESET_ALL) + return [] + + # Separate combined file from individual repo files + combined_file = results_path / "all_100_percent_coverage_files.txt" + repo_files = [f for f in all_files if f.name != "all_100_percent_coverage_files.txt"] + + print(Fore.CYAN + "\nAvailable JaCoCo result files:" + Style.RESET_ALL) + print("0) All repositories (combined)") + + # Show individual repository options + for i, repo_file in enumerate(repo_files, 1): + # Extract repo name from filename (remove _100_percent_coverage.txt suffix) + repo_name = repo_file.stem.replace("_100_percent_coverage", "") + print(f"{i}) {repo_name}") + + # Get user choice + while True: + try: + choice = input(f"\nSelect repository to process (0-{len(repo_files)}): ").strip() + choice_num = int(choice) + + if choice_num == 0: + # Use combined file + selected_file = combined_file + repo_name = "All repositories" + break + elif 1 <= choice_num <= len(repo_files): + # Use specific repo file + selected_file = repo_files[choice_num - 1] + repo_name = selected_file.stem.replace("_100_percent_coverage", "") + break + else: + print(Fore.RED + f"Invalid choice. Please enter a number between 0 and {len(repo_files)}" + Style.RESET_ALL) + except ValueError: + print(Fore.RED + "Invalid input. Please enter a number." + Style.RESET_ALL) + + # Read the selected file + if not selected_file.exists(): + print(Fore.RED + f"Selected file not found: {selected_file}" + Style.RESET_ALL) + return [] + + try: + with open(selected_file, 'r') as f: + file_paths = [line.strip() for line in f if line.strip()] + + print(Fore.GREEN + f"Selected: {repo_name}" + Style.RESET_ALL) + print(Fore.GREEN + f"Found {len(file_paths)} files with 100% test coverage" + Style.RESET_ALL) + return file_paths + + except Exception as e: + print(Fore.RED + f"Error reading file {selected_file}: {e}" + Style.RESET_ALL) + return [] + + + +def process_java_files_with_workflow(file_paths: list, settings, db_manager, prompt_manager, langgraph): + """Process each Java file through the agentic workflow.""" + processed_files = [] + failed_files = [] + + for i, file_path in enumerate(file_paths, 1): + print(Fore.BLUE + f"\n{'='*60}" + Style.RESET_ALL) + print(Fore.BLUE + f"Processing file {i}/{len(file_paths)}: {file_path}" + Style.RESET_ALL) + print(Fore.BLUE + f"{'='*60}" + Style.RESET_ALL) + + # Read the Java file content + java_code = read_java_file(file_path) + if java_code is None: + failed_files.append(file_path) + continue + + # Create initial state for this file + initial_state = { + "code": java_code, + "context": None, + "trove_context": None, + "antipatterns_scanner_results": None, + "refactoring_strategy_results": None, + "refactored_code": None, + "code_review_results": None, + "code_review_times": 0, + "msgs": [], + "answer": None, + "current_file_path": file_path # Track current file being processed + } + + try: + # Run the agentic workflow + print(Fore.CYAN + "Running agentic workflow..." + Style.RESET_ALL) + final_state = langgraph.invoke(initial_state) + + # Save intermediate results for analysis + save_intermediate_results(file_path, final_state, settings) + + # Check if refactoring was successful + if final_state.get('refactored_code'): + # Parse anti-pattern results + antipatterns_found, antipatterns_count = parse_antipattern_results(final_state.get('antipatterns_scanner_results')) + + # Save the refactored code back to the file + if save_refactored_code(file_path, final_state['refactored_code']): + processed_files.append({ + 'file_path': file_path, + 'status': 'success', + 'antipatterns_found': antipatterns_found, + 'antipatterns_count': antipatterns_count, + 'code_review_times': final_state.get('code_review_times', 0), + 'has_intermediate_results': True + }) + print(Fore.GREEN + f"Successfully processed: {file_path}" + Style.RESET_ALL) + else: + failed_files.append(file_path) + else: + # Parse anti-pattern results + antipatterns_found, antipatterns_count = parse_antipattern_results(final_state.get('antipatterns_scanner_results')) + + print(Fore.YELLOW + f"No refactored code generated for: {file_path}" + Style.RESET_ALL) + processed_files.append({ + 'file_path': file_path, + 'status': 'no_refactoring', + 'antipatterns_found': antipatterns_found, + 'antipatterns_count': antipatterns_count, + 'code_review_times': final_state.get('code_review_times', 0), + 'has_intermediate_results': True + }) + + except Exception as e: + print(Fore.RED + f"Error processing {file_path}: {e}" + Style.RESET_ALL) + failed_files.append(file_path) + + return processed_files, failed_files + + +def run_full_repo_workflow(settings, db_manager, prompt_manager, langgraph): + """Run the full repository workflow for files with 100% test coverage.""" + print(Fore.BLUE + "\n=== Full Repository Workflow ===" + Style.RESET_ALL) + print("Process Java files with 100% test coverage from JaCoCo results...") + + # Read JaCoCo results to get files with 100% test coverage + print(Fore.CYAN + "\nReading JaCoCo results..." + Style.RESET_ALL) + file_paths = read_jacoco_results() + + if not file_paths: + print(Fore.RED + "No files found in JaCoCo results. Please run JaCoCo analysis first." + Style.RESET_ALL) + print("Run: python jacoco_tool/jacoco_analysis.py") + return False + + # Extract repository paths from file paths + print(Fore.CYAN + "\nIdentifying repositories to backup..." + Style.RESET_ALL) + repo_paths = get_repository_paths_from_files(file_paths) + + if not repo_paths: + print(Fore.RED + "No repository paths could be identified from the file paths." + Style.RESET_ALL) + return False + + print(f"Found {len(repo_paths)} repositories to backup:") + for repo_path in sorted(repo_paths): + repo_name = Path(repo_path).name + print(f" • {repo_name} ({repo_path})") + + # Ask user for confirmation to proceed with backup and processing + print(f"\nFiles to process ({len(file_paths)} total):") + for i, path in enumerate(file_paths[:5], 1): # Show first 5 files + print(f" {i}. {path}") + if len(file_paths) > 5: + print(f" ... and {len(file_paths) - 5} more files") + + proceed = input(f"\nProceed with backing up {len(repo_paths)} repositories and processing {len(file_paths)} files? (Y/N): ").strip().lower() + if proceed != 'y': + print("Operation cancelled.") + return False + + # Create repository backups + print(Fore.BLUE + f"\n{'='*60}" + Style.RESET_ALL) + print(Fore.BLUE + "CREATING REPOSITORY BACKUPS" + Style.RESET_ALL) + print(Fore.BLUE + f"{'='*60}" + Style.RESET_ALL) + + backup_info = create_repository_backup(repo_paths) + + if backup_info['failed_backups']: + print(Fore.RED + f"\nWarning: {len(backup_info['failed_backups'])} repositories failed to backup:" + Style.RESET_ALL) + for failed in backup_info['failed_backups']: + print(Fore.RED + f" {failed['repo_path']}: {failed['error']}" + Style.RESET_ALL) + + continue_anyway = input("\nContinue processing despite backup failures? (Y/N): ").strip().lower() + if continue_anyway != 'y': + print("Operation cancelled due to backup failures.") + return False + + print(Fore.GREEN + f"\nSuccessfully backed up {len(backup_info['backed_up_repos'])} repositories" + Style.RESET_ALL) + print(Fore.GREEN + f"Backup location: {backup_info['backup_dir']}" + Style.RESET_ALL) + + # Process each file through the agentic workflow + print(Fore.BLUE + f"\n{'='*60}" + Style.RESET_ALL) + print(Fore.BLUE + "STARTING FILE PROCESSING" + Style.RESET_ALL) + print(Fore.BLUE + f"{'='*60}" + Style.RESET_ALL) + + processed_files, failed_files = process_java_files_with_workflow( + file_paths, settings, db_manager, prompt_manager, langgraph + ) + + # Create comprehensive processing summary + summary_file = create_processing_summary(processed_files, backup_info) + + # Generate summary report + print(Fore.BLUE + "\n" + "="*80 + Style.RESET_ALL) + print(Fore.BLUE + "BATCH PROCESSING SUMMARY" + Style.RESET_ALL) + print(Fore.BLUE + "="*80 + Style.RESET_ALL) + + # Backup summary + print(Fore.CYAN + "Repository Backup Summary:" + Style.RESET_ALL) + print(f" Backup timestamp: {backup_info['timestamp']}") + print(f" Backup location: {backup_info['backup_dir']}") + print(f" Repositories backed up: {len(backup_info['backed_up_repos'])}") + if backup_info['failed_backups']: + print(f" Failed backups: {len(backup_info['failed_backups'])}") + + # Processing summary + print(Fore.CYAN + "\nFile Processing Summary:" + Style.RESET_ALL) + print(f" Total files processed: {len(processed_files)}") + print(f" Failed files: {len(failed_files)}") + + # Categorize results + successful_refactoring = [f for f in processed_files if f['status'] == 'success'] + no_refactoring_needed = [f for f in processed_files if f['status'] == 'no_refactoring'] + files_with_antipatterns = [f for f in processed_files if f.get('antipatterns_found', False)] + total_antipatterns = sum(f.get('antipatterns_count', 0) for f in processed_files) + + print(Fore.GREEN + f" Successfully refactored: {len(successful_refactoring)}" + Style.RESET_ALL) + print(Fore.YELLOW + f" No refactoring needed: {len(no_refactoring_needed)}" + Style.RESET_ALL) + print(Fore.RED + f" Failed: {len(failed_files)}" + Style.RESET_ALL) + print(Fore.MAGENTA + f" Files with anti-patterns: {len(files_with_antipatterns)}" + Style.RESET_ALL) + print(Fore.MAGENTA + f" Total anti-patterns found: {total_antipatterns}" + Style.RESET_ALL) + + # Statistics + if processed_files: + refactor_rate = len(successful_refactoring) / len(processed_files) * 100 + antipattern_rate = len(files_with_antipatterns) / len(processed_files) * 100 + + + print(Fore.CYAN + "\nProcessing Statistics:" + Style.RESET_ALL) + print(f" Refactoring success rate: {refactor_rate:.1f}%") + print(f" Anti-pattern detection rate: {antipattern_rate:.1f}%") + print(f" Total anti-patterns found: {total_antipatterns}") + + # Show detailed results + if successful_refactoring: + print(Fore.GREEN + "\nSuccessfully refactored files:" + Style.RESET_ALL) + for file_info in successful_refactoring: + antipatterns_info = f" (antipatterns: {file_info.get('antipatterns_count', 0)})" if file_info.get('antipatterns_count', 0) > 0 else "" + print(f"{file_info['file_path']} (reviews: {file_info['code_review_times']}){antipatterns_info}") + + if failed_files: + print(Fore.RED + "\nFailed files:" + Style.RESET_ALL) + for file_path in failed_files: + print(f"{file_path}") + + print(Fore.GREEN + f"\nBatch processing complete!" + Style.RESET_ALL) + print(Fore.CYAN + f"Repository backups available at: {backup_info['backup_dir']}" + Style.RESET_ALL) + print(f"To restore a repository, copy from backup directory back to original location.") + + # Intermediate results information + print(Fore.MAGENTA + f"\nIntermediate Results:" + Style.RESET_ALL) + print(f" Individual file analysis results saved in: ../processing_results/") + if summary_file: + print(f" Comprehensive summary saved: {Path(summary_file).name}") diff --git a/AntiPattern_Remediator/main.py b/AntiPattern_Remediator/main.py index fc05fdd..a659983 100644 --- a/AntiPattern_Remediator/main.py +++ b/AntiPattern_Remediator/main.py @@ -7,35 +7,16 @@ from dotenv import load_dotenv load_dotenv() from colorama import Fore, Style +import os +from pathlib import Path +from full_repo_workflow import run_full_repo_workflow -def main(): - """Main function: Run antipattern analysis""" - - # Let user select provider - print("Available providers: 1) ollama 2) ibm 3) vllm") - choice = input("Select provider (1-3): ").strip() - - provider_map = {"1": "ollama", "2": "ibm", "3": "vllm"} - provider = provider_map.get(choice, "ollama") # default to ollama - - #Let us choose which DB to interact with - print("Choose your trove: 1) ChromaDB (VectorDB) 2) TinyDB (DocumentDB)") - db_choice = input("Choose 1 or 2: ").strip() - - # Initialize global settings with selected provider - settings = initialize_settings(provider) - print(Fore.GREEN + f"Using {settings.LLM_PROVIDER} with model {settings.LLM_MODEL}" + Style.RESET_ALL) - - # Temporary Lazy Imports - from src.core.graph import CreateGraph - from src.data.database import VectorDBManager, TinyDBManager - from src.core.prompt import PromptManager - from scripts import seed_database - - # Initialize PromptManager - print("Initializing PromptManager...") - prompt_manager = PromptManager() +def run_code_snippet_workflow(settings, db_manager, prompt_manager, langgraph): + """Run the original workflow with a hardcoded Java code snippet.""" + print(Fore.BLUE + "\n=== Code Snippet Analysis Workflow ===" + Style.RESET_ALL) + print("Analyzing the provided Java code snippet...") + # Example Java code legacy_code = """ public class ApplicationManager { @@ -74,6 +55,7 @@ def main(): } } """ + initial_state = { "code": legacy_code, "context": None, @@ -87,27 +69,75 @@ def main(): "answer": None } - #Setup Database + final_state = langgraph.invoke(initial_state) + + print(Fore.GREEN + f"\nAnalysis Complete!" + Style.RESET_ALL) + print(f"Final state keys: {list(final_state.keys())}") + print(f"Context retrieved: {'Yes' if final_state.get('context') else 'No'}") + print(f"Analysis completed: {'Yes' if final_state.get('antipatterns_scanner_results') else 'No'}") + print(f"Refactored code: {'Yes' if final_state.get('refactored_code') else 'No'}") + print(f"Code review results: {final_state.get('code_review_times')}") + + +def main(): + """Main function: Choose between code snippet analysis or full repository run""" + + print(Fore.BLUE + "=== AntiPattern Remediator Tool ===" + Style.RESET_ALL) + print("Choose your analysis mode:") + print("1) Code Snippet Analysis - Analyze a sample Java code snippet") + print("2) Full Repository Run - Process files with 100% test coverage from JaCoCo results") + + # Let user choose analysis mode + mode_choice = input("\nSelect mode (1-2): ").strip() + + if mode_choice not in ["1", "2"]: + print(Fore.RED + "Invalid choice. Defaulting to Code Snippet Analysis." + Style.RESET_ALL) + mode_choice = "1" + + # Let user select provider + print("\nAvailable providers: 1) ollama 2) ibm 3) vllm") + choice = input("Select provider (1-3): ").strip() + + provider_map = {"1": "ollama", "2": "ibm", "3": "vllm"} + provider = provider_map.get(choice, "ollama") # default to ollama + + # Let us choose which DB to interact with + print("Choose your trove: 1) ChromaDB (VectorDB) 2) TinyDB (DocumentDB)") + db_choice = input("Choose 1 or 2: ").strip() + + # Initialize global settings with selected provider + settings = initialize_settings(provider) + print(Fore.GREEN + f"Using {settings.LLM_PROVIDER} with model {settings.LLM_MODEL}" + Style.RESET_ALL) + + # Temporary Lazy Imports + from src.core.graph import CreateGraph + from src.data.database import VectorDBManager, TinyDBManager + from src.core.prompt import PromptManager + from scripts import seed_database + + # Initialize PromptManager + print("Initializing PromptManager...") + prompt_manager = PromptManager() + + # Setup Database if db_choice == "2": print("Seeding TinyDB with AntiPattern Dataset") seed_database.main() db_manager = TinyDBManager() - print("Using TinyDB for knowledge retreival") + print("Using TinyDB for knowledge retrieval") else: vector_db = VectorDBManager() db_manager = vector_db.get_db() - print("Using ChromaDB for knowledge retreival") + print("Using ChromaDB for knowledge retrieval") retriever = db_manager.as_retriever() langgraph = CreateGraph(db_manager, prompt_manager, retriever=retriever).workflow - final_state = langgraph.invoke(initial_state) - print(Fore.GREEN + f"\nAnalysis Complete!" + Style.RESET_ALL) - print(f"Final state keys: {list(final_state.keys())}") - print(f"Context retrieved: {'Yes' if final_state.get('context') else 'No'}") - print(f"Analysis completed: {'Yes' if final_state.get('antipatterns_scanner_results') else 'No'}") - print(f"Refactored code: {'Yes' if final_state.get('refactored_code') else 'No'}") - print(f"Code review results: {final_state.get('code_review_times')}") + # Run the selected workflow + if mode_choice == "1": + run_code_snippet_workflow(settings, db_manager, prompt_manager, langgraph) + else: + run_full_repo_workflow(settings, db_manager, prompt_manager, langgraph) if __name__ == "__main__": main() diff --git a/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py b/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py index 50b92d8..6873e7f 100644 --- a/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py +++ b/AntiPattern_Remediator/src/core/agents/antipattern_scanner.py @@ -10,6 +10,7 @@ from colorama import Fore, Style from ..prompt import PromptManager from sonarqube_tool import SonarQubeAPI +from pathlib import Path class AntipatternScanner: @@ -27,8 +28,28 @@ def retrieve_context(self, state: AgentState): search_query = f"Java antipatterns code analysis: {state['code'][:50]}" # Use retriever_tool to get relevant context context = self.tool.invoke({"query": search_query}) + + # Get current file path from state + current_file_path = state['current_file_path'] + + # Extract project key and relative file path from the current file path + project_key = None + relative_file_path = None + + if current_file_path: + path_obj = Path(current_file_path) + + # Find the repository name (project key) by looking for 'clones' directory + for i, part in enumerate(path_obj.parts): + if part == 'clones' and i + 1 < len(path_obj.parts): + project_key = path_obj.parts[i + 1] # Repository name as project key + # Get the relative path from the repository root + relative_file_path = str(Path(*path_obj.parts[i + 2:])) + break + api = SonarQubeAPI() - issues = api.get_issues_for_file(project_key="commons-collections", file_path="src/main/java/org/apache/commons/collections4/collection/SynchronizedCollection.java") + print(Fore.CYAN + f"Using SonarQube project: {project_key}, file: {relative_file_path}" + Style.RESET_ALL) + issues = api.get_issues_for_file(project_key=project_key, file_path=relative_file_path) solutions = [] for issue in issues["issues"]: solutions.append(api.get_rules_and_fix_method(rule_key=issue['rule'])) diff --git a/AntiPattern_Remediator/src/core/agents/code_transformer.py b/AntiPattern_Remediator/src/core/agents/code_transformer.py index b4eb67e..c3a9192 100644 --- a/AntiPattern_Remediator/src/core/agents/code_transformer.py +++ b/AntiPattern_Remediator/src/core/agents/code_transformer.py @@ -4,6 +4,7 @@ from ..state import AgentState from colorama import Fore, Style from ..prompt import PromptManager +import re class CodeTransformer: @@ -12,6 +13,30 @@ class CodeTransformer: def __init__(self, model, prompt_manager: PromptManager): self.llm = model self.prompt_manager = prompt_manager + + def extract_java(s: str) -> str: + + # Strip common wrappers + fences = [ + (r"^```[a-zA-Z0-9]*\n(.*?)\n```$", re.DOTALL), + (r'^"""\s*\n?(.*?)\n?"""$', re.DOTALL), + (r"^'''\s*\n?(.*?)\n?'''$", re.DOTALL), + ] + for pat, flg in fences: + m = re.match(pat, s, flags=flg) + if m: + s = m.group(1).strip() + break + + # Also remove stray leading/trailing fence lines if any + s = re.sub(r"^```[a-zA-Z0-9]*\n?", "", s) + s = re.sub(r"\n?```$", "", s) + s = re.sub(r'^"""\n?', "", s) + s = re.sub(r'\n?"""$', "", s) + s = re.sub(r"^'''\n?", "", s) + s = re.sub(r"\n?'''$", "", s) + + return s.strip() def transform_code(self, state: AgentState) -> AgentState: print("--- TRANSFORMING CODE ---") @@ -44,6 +69,7 @@ def transform_code(self, state: AgentState) -> AgentState: state["refactored_code"] = "Error: No valid response received from LLM." raise ValueError("No valid response received from LLM.") refactored_code = response.content.strip() + refactored_code = CodeTransformer.extract_java(refactored_code) print(Fore.GREEN + "Code transformation complete." + Style.RESET_ALL) state["refactored_code"] = refactored_code diff --git a/AntiPattern_Remediator/src/core/state.py b/AntiPattern_Remediator/src/core/state.py index c85077f..fb42b13 100644 --- a/AntiPattern_Remediator/src/core/state.py +++ b/AntiPattern_Remediator/src/core/state.py @@ -17,3 +17,4 @@ class AgentState(TypedDict): code_review_times: int # Number of times code has been reviewed msgs: List[Dict[str, Any]] # Message history for conversation context answer: Optional[str] # Analysis result + current_file_path: Optional[str] # Path to the current file being processed diff --git a/AntiPattern_Remediator/static/prompt/antipattern_scanner.yaml b/AntiPattern_Remediator/static/prompt/antipattern_scanner.yaml index 33587d3..8f6a5cf 100644 --- a/AntiPattern_Remediator/static/prompt/antipattern_scanner.yaml +++ b/AntiPattern_Remediator/static/prompt/antipattern_scanner.yaml @@ -5,6 +5,7 @@ antipattern_scanner: variables: - code - context + - sonarqube_issues description: "Analyzes Java code for antipatterns and design issues, providing structured JSON output" version: "1.0" @@ -33,6 +34,7 @@ antipattern_scanner: Analysis Requirements: - Carefully analyze the code for Java antipatterns and design smells. - Base your analysis strictly on the antipattern definitions provided earlier in this conversation. Do not invent new antipatterns. + - Consider the provided SonarQube issues as potential indicators of antipatterns. - Make sure your results are clear and actionable, so others can know how to address the identified issues. - Return your analysis in JSON format with the following structure. diff --git a/AntiPattern_Remediator/static/prompt/code_transformer.yaml b/AntiPattern_Remediator/static/prompt/code_transformer.yaml index 596a309..1a0a4e4 100644 --- a/AntiPattern_Remediator/static/prompt/code_transformer.yaml +++ b/AntiPattern_Remediator/static/prompt/code_transformer.yaml @@ -12,6 +12,7 @@ code_transformer: system: | You are an expert Java programmer responsible for refactoring code based on a provided strategy. You will be given the original code and a JSON object containing a list of refactoring strategies. + Do **not** use Markdown code fences (```), triple quotes ("""), or any quoting. Output must be raw Java code. Your task is to apply all these strategies to produce a single, fully refactored version of the code. user: | @@ -30,6 +31,10 @@ code_transformer: {code} ``` + **OUTPUT CONTRACT (read carefully):** + Return ONLY raw Java source code. + Do not include Markdown code fences, triple quotes, or any prose. + **Your Task:** 1. Synthesize all the provided refactoring strategies. 2. Apply the combined strategies to the 'Original Java Code'. diff --git a/AntiPattern_Remediator/workflow/__init__.py b/AntiPattern_Remediator/workflow/__init__.py new file mode 100644 index 0000000..21cb8dc --- /dev/null +++ b/AntiPattern_Remediator/workflow/__init__.py @@ -0,0 +1,11 @@ +""" +Workflow utilities package for AntiPattern Remediator + +This package contains modular utilities for the full repository workflow: +- workflow_utils: Core utilities and path operations +- backup_manager: Repository backup operations +- results_manager: Results processing and reporting +- file_operations: File I/O operations +""" + +__version__ = "1.0.0" diff --git a/AntiPattern_Remediator/workflow/backup_manager.py b/AntiPattern_Remediator/workflow/backup_manager.py new file mode 100644 index 0000000..289f51d --- /dev/null +++ b/AntiPattern_Remediator/workflow/backup_manager.py @@ -0,0 +1,54 @@ +""" +Repository backup management for AntiPattern Remediator + +This module handles creating backups of repositories before processing. +""" + +import shutil +from pathlib import Path +from datetime import datetime +from colorama import Fore, Style + + +def create_repository_backup(repo_paths: set, backup_base_dir: str = "../backups") -> dict: + """Create backups of repositories before processing.""" + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + backup_dir = Path(backup_base_dir) / f"repo_backup_{timestamp}" + backup_dir.mkdir(parents=True, exist_ok=True) + + backup_info = { + 'timestamp': timestamp, + 'backup_dir': str(backup_dir), + 'backed_up_repos': [], + 'failed_backups': [] + } + + print(Fore.BLUE + f"\nCreating repository backups in: {backup_dir}" + Style.RESET_ALL) + + for repo_path in repo_paths: + try: + repo_path_obj = Path(repo_path) + repo_name = repo_path_obj.name + backup_repo_path = backup_dir / repo_name + + print(Fore.CYAN + f"Backing up repository: {repo_name}..." + Style.RESET_ALL) + + # Copy the entire repository + shutil.copytree(repo_path, backup_repo_path, dirs_exist_ok=True) + + backup_info['backed_up_repos'].append({ + 'original_path': repo_path, + 'backup_path': str(backup_repo_path), + 'repo_name': repo_name + }) + + print(Fore.GREEN + f"Successfully backed up: {repo_name}" + Style.RESET_ALL) + + except Exception as e: + print(Fore.RED + f"Failed to backup {repo_path}: {e}" + Style.RESET_ALL) + backup_info['failed_backups'].append({ + 'repo_path': repo_path, + 'error': str(e) + }) + + return backup_info diff --git a/AntiPattern_Remediator/workflow/file_operations.py b/AntiPattern_Remediator/workflow/file_operations.py new file mode 100644 index 0000000..a5af0ee --- /dev/null +++ b/AntiPattern_Remediator/workflow/file_operations.py @@ -0,0 +1,40 @@ +""" +File I/O operations for AntiPattern Remediator + +This module handles reading and writing files during the workflow process. +""" + +from colorama import Fore, Style + + +def read_java_file(file_path: str) -> str: + """Read the content of a Java file.""" + try: + with open(file_path, 'r', encoding='utf-8') as f: + return f.read() + except Exception as e: + print(Fore.RED + f"Error reading file {file_path}: {e}" + Style.RESET_ALL) + return None + + +def save_refactored_code(file_path: str, refactored_code: str, backup: bool = False) -> bool: + """Save the refactored code back to the original file.""" + try: + # Create backup if requested (disabled by default since we backup entire repos) + if backup: + backup_path = f"{file_path}.backup" + with open(file_path, 'r', encoding='utf-8') as original: + with open(backup_path, 'w', encoding='utf-8') as backup_file: + backup_file.write(original.read()) + print(Fore.YELLOW + f"Backup created: {backup_path}" + Style.RESET_ALL) + + # Write refactored code + with open(file_path, 'w', encoding='utf-8') as f: + f.write(refactored_code) + + print(Fore.GREEN + f"Refactored code saved to: {file_path}" + Style.RESET_ALL) + return True + + except Exception as e: + print(Fore.RED + f"Error saving refactored code to {file_path}: {e}" + Style.RESET_ALL) + return False diff --git a/AntiPattern_Remediator/workflow/results_manager.py b/AntiPattern_Remediator/workflow/results_manager.py new file mode 100644 index 0000000..3dbe79f --- /dev/null +++ b/AntiPattern_Remediator/workflow/results_manager.py @@ -0,0 +1,155 @@ +""" +Results processing and reporting for AntiPattern Remediator + +This module handles saving intermediate results and creating processing summaries. +""" + +import json +from pathlib import Path +from datetime import datetime +from colorama import Fore, Style + + +def save_intermediate_results(file_path: str, final_state: dict, settings, results_dir: str = "../processing_results") -> bool: + """Save intermediate results from the agentic workflow for analysis in markdown format.""" + try: + # Create results directory if it doesn't exist + results_path = Path(results_dir) + results_path.mkdir(parents=True, exist_ok=True) + + # Create a unique filename based on the original file path + file_path_obj = Path(file_path) + + # Extract the meaningful part of the path starting from the repository name + # Find the 'clones' directory and take everything after it + meaningful_path = None + for i, part in enumerate(file_path_obj.parts): + if part == 'clones' and i + 1 < len(file_path_obj.parts): + # Take from the repo name onwards + meaningful_path = Path(*file_path_obj.parts[i+1:]) + break + + if meaningful_path is None: + # Fallback: use just the filename if 'clones' not found + meaningful_path = file_path_obj.name + + # Create a safe filename by replacing path separators and other problematic characters + safe_filename = str(meaningful_path).replace('/', '_').replace('\\', '_').replace(':', '_') + + # Replace .java extension with .md and add results suffix + if safe_filename.endswith('.java'): + safe_filename = safe_filename[:-5] # Remove .java + results_filename = f"{safe_filename}_results.md" + results_file_path = results_path / results_filename + + # Generate markdown content + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + markdown_content = f"""# Processing Results: {file_path_obj.name} + +## File Information +- **Original File Path**: `{file_path}` +- **Processing Timestamp**: {timestamp} +- **Code Refactored**: {'Yes' if final_state.get('refactored_code') else 'No'} + +--- + +## Anti-Pattern Scanner Results + +""" + + antipatterns_results = final_state.get('antipatterns_scanner_results') + if antipatterns_results: + if isinstance(antipatterns_results, str): + markdown_content += f"\n{antipatterns_results}\n\n\n" + elif isinstance(antipatterns_results, dict): + for key, value in antipatterns_results.items(): + markdown_content += f"### {key.replace('_', ' ').title()}\n" + if isinstance(value, (list, dict)): + markdown_content += f"```json\n{json.dumps(value, indent=2)}\n```\n\n" + else: + markdown_content += f"{value}\n\n" + else: + markdown_content += f"```json\n{json.dumps(antipatterns_results, indent=2, default=str)}\n```\n\n" + else: + markdown_content += "No anti-patterns detected or scanner did not run.\n\n" + + markdown_content += "---\n\n## Refactoring Strategy Results\n\n" + + refactoring_results = final_state.get('refactoring_strategy_results') + if refactoring_results: + if isinstance(refactoring_results, str): + markdown_content += f"\n{refactoring_results}\n\n\n" + elif isinstance(refactoring_results, dict): + for key, value in refactoring_results.items(): + markdown_content += f"### {key.replace('_', ' ').title()}\n" + if isinstance(value, (list, dict)): + markdown_content += f"```json\n{json.dumps(value, indent=2)}\n```\n\n" + else: + markdown_content += f"{value}\n\n" + else: + markdown_content += f"```json\n{json.dumps(refactoring_results, indent=2, default=str)}\n```\n\n" + else: + markdown_content += "No refactoring strategy generated.\n\n" + + markdown_content += f"---\n\n*Generated by AntiPattern Remediator Tool using {settings.LLM_MODEL}*\n" + + # Save to markdown file + with open(results_file_path, 'w', encoding='utf-8') as f: + f.write(markdown_content) + + print(Fore.CYAN + f"Intermediate results saved: {results_file_path}" + Style.RESET_ALL) + return True + + except Exception as e: + print(Fore.RED + f"Error saving intermediate results for {file_path}: {e}" + Style.RESET_ALL) + return False + + +def create_processing_summary(processed_files: list, backup_info: dict, results_dir: str = "../processing_results") -> str: + """Create a comprehensive summary report of the processing session.""" + try: + results_path = Path(results_dir) + results_path.mkdir(parents=True, exist_ok=True) + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + summary_file = results_path / f"processing_summary_{timestamp}.json" + + # Categorize results + successful_refactoring = [f for f in processed_files if f['status'] == 'success'] + no_refactoring_needed = [f for f in processed_files if f['status'] == 'no_refactoring'] + files_with_antipatterns = [f for f in processed_files if f.get('antipatterns_found', False)] + total_antipatterns = sum(f.get('antipatterns_count', 0) for f in processed_files) + + summary_data = { + 'processing_session': { + 'timestamp': timestamp, + 'backup_info': backup_info, + 'total_files_processed': len(processed_files), + 'successful_refactoring': len(successful_refactoring), + 'no_refactoring_needed': len(no_refactoring_needed), + 'files_with_antipatterns_detected': len(files_with_antipatterns), + 'total_antipatterns_found': total_antipatterns + }, + 'detailed_results': { + 'successful_refactoring': successful_refactoring, + 'no_refactoring_needed': no_refactoring_needed, + }, + 'statistics': { + 'refactoring_success_rate': len(successful_refactoring) / len(processed_files) * 100 if processed_files else 0, + 'antipattern_detection_rate': len(files_with_antipatterns) / len(processed_files) * 100 if processed_files else 0, + 'average_code_reviews': sum(f.get('code_review_times', 0) for f in processed_files) / len(processed_files) if processed_files else 0, + 'total_antipatterns_found': total_antipatterns, + 'average_antipatterns_per_file': total_antipatterns / len(processed_files) if processed_files else 0 + } + } + + # Save summary + with open(summary_file, 'w', encoding='utf-8') as f: + json.dump(summary_data, f, indent=2, ensure_ascii=False, default=str) + + print(Fore.CYAN + f"Processing summary saved: {summary_file}" + Style.RESET_ALL) + return str(summary_file) + + except Exception as e: + print(Fore.RED + f"Error creating processing summary: {e}" + Style.RESET_ALL) + return None diff --git a/AntiPattern_Remediator/workflow/workflow_utils.py b/AntiPattern_Remediator/workflow/workflow_utils.py new file mode 100644 index 0000000..eeb8ef3 --- /dev/null +++ b/AntiPattern_Remediator/workflow/workflow_utils.py @@ -0,0 +1,70 @@ +""" +Core workflow utilities for AntiPattern Remediator + +This module contains utility functions for parsing results and extracting repository paths. +""" + +import json +import re +from pathlib import Path +from colorama import Fore, Style + + +def parse_antipattern_results(antipatterns_scanner_results): + """Parse anti-pattern scanner results to determine if patterns were found and count them.""" + + if not antipatterns_scanner_results: + return False, 0 + + # Try to parse as JSON first + try: + if isinstance(antipatterns_scanner_results, str): + print(type(antipatterns_scanner_results)) + # Try to extract JSON from the string - improved regex pattern + # Look for JSON objects containing total_antipatterns_found + json_pattern = r'\{\s*[^}]*?"total_antipatterns_found"\s*:\s*(\d+)[^}]*?\}' + json_match = re.search(json_pattern, antipatterns_scanner_results, re.DOTALL) + + if json_match: + try: + json_data = json.loads(json_match.group()) + total_found = json_data.get('total_antipatterns_found', 0) + return total_found > 0, total_found + except json.JSONDecodeError: + # If full JSON parsing fails, extract just the number + total_found = int(json_match.group(1)) + return total_found > 0, total_found + + return False, 0 + + elif isinstance(antipatterns_scanner_results, dict): + total_found = antipatterns_scanner_results.get('total_antipatterns_found', 0) + return total_found > 0, total_found + + except (json.JSONDecodeError, Exception) as e: + print(f"Error parsing antipattern results: {e}") + pass + + # Default: if we have results but can't parse them, assume no anti-patterns + return False, 0 + + +def get_repository_paths_from_files(file_paths: list) -> set: + """Extract unique repository paths from the list of file paths.""" + repo_paths = set() + + for file_path in file_paths: + path = Path(file_path) + + # Find the clones directory in the path + for i, part in enumerate(path.parts): + if part == 'clones' and i + 1 < len(path.parts): + # The next part after 'clones' is the repository name + repo_name = path.parts[i + 1] + # Reconstruct the full repository path + clones_path = Path(*path.parts[:i+1]) # Path up to 'clones' + repo_path = clones_path / repo_name + repo_paths.add(str(repo_path)) + break + + return repo_paths From e733cbfbdde8c4737e172e3120b78057eacdcf22 Mon Sep 17 00:00:00 2001 From: Vamsi Date: Mon, 25 Aug 2025 13:52:50 +0100 Subject: [PATCH 5/9] removed type check in workflow_utils.py --- AntiPattern_Remediator/workflow/workflow_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/AntiPattern_Remediator/workflow/workflow_utils.py b/AntiPattern_Remediator/workflow/workflow_utils.py index eeb8ef3..f3c0af8 100644 --- a/AntiPattern_Remediator/workflow/workflow_utils.py +++ b/AntiPattern_Remediator/workflow/workflow_utils.py @@ -19,7 +19,6 @@ def parse_antipattern_results(antipatterns_scanner_results): # Try to parse as JSON first try: if isinstance(antipatterns_scanner_results, str): - print(type(antipatterns_scanner_results)) # Try to extract JSON from the string - improved regex pattern # Look for JSON objects containing total_antipatterns_found json_pattern = r'\{\s*[^}]*?"total_antipatterns_found"\s*:\s*(\d+)[^}]*?\}' From c61bac754d75a727e18c95af88e2c5b56ad21d88 Mon Sep 17 00:00:00 2001 From: Avinash Date: Wed, 27 Aug 2025 10:16:54 +0100 Subject: [PATCH 6/9] Final changes --- .../src/core/agents/explainer.py | 85 ++++++++++++++----- AntiPattern_Remediator/src/core/state.py | 21 +++-- .../static/prompt/explainer.yaml | 27 +++--- .../unit_test/prompt/test_prompt_manager.py | 2 +- 4 files changed, 91 insertions(+), 44 deletions(-) diff --git a/AntiPattern_Remediator/src/core/agents/explainer.py b/AntiPattern_Remediator/src/core/agents/explainer.py index 9344121..4ab86a6 100644 --- a/AntiPattern_Remediator/src/core/agents/explainer.py +++ b/AntiPattern_Remediator/src/core/agents/explainer.py @@ -1,8 +1,8 @@ """ ExplainerAgent — minimal version - Delegates state handling to create_graph.py -- Only builds messages and parses JSON response -- Keeps code minimal and focused +- Uses PromptManager if available; otherwise a tiny inline fallback prompt +- Always passes msgs; always returns a non-empty explanation_json """ from __future__ import annotations @@ -10,6 +10,7 @@ import json from langchain_core.language_models import BaseLanguageModel +from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder from ..prompt import PromptManager from src.core.utils import extract_first_json @@ -32,33 +33,55 @@ def explain_antipattern(self, state: Dict[str, Any]) -> Dict[str, Any]: antipattern_name=state.get("antipattern_name", "Unknown antipattern"), antipattern_description=state.get("antipattern_description", ""), antipatterns_json=json.dumps(state.get("antipatterns_json", []), ensure_ascii=False), + msgs=state.get("msgs", []), # ensure MessagesPlaceholder is satisfied ) messages = self._build_messages(**kwargs) - response = self.llm.invoke(messages) - raw = getattr(response, "content", None) or str(response) + + try: + response = self.llm.invoke(messages) + raw = getattr(response, "content", None) or str(response) + except Exception as e: + raw = f"LLM error: {e}" + state["explanation_response_raw"] = raw - parsed = extract_first_json(raw) - state["explanation_json"] = parsed if isinstance(parsed, dict) else {} + # Robust parse: accept dict, wrap list, or emit a minimal fallback + try: + parsed = extract_first_json(raw) + except Exception: + parsed = None + + if isinstance(parsed, dict): + state["explanation_json"] = parsed + elif isinstance(parsed, list): + state["explanation_json"] = {"items": parsed} + else: + state["explanation_json"] = self._fallback_payload(state) + return state def display_explanation(self, state: Dict[str, Any]) -> Dict[str, Any]: print("\n=== Explanation (raw) ===\n", state.get("explanation_response_raw", "N/A")) if state.get("explanation_json"): - print("\n=== Explanation (JSON) ===\n", json.dumps(state["explanation_json"], indent=2, ensure_ascii=False)) + print("\n=== Explanation (JSON) ===\n", + json.dumps(state["explanation_json"], indent=2, ensure_ascii=False)) return state def _build_messages(self, **kwargs) -> Any: - try: - getp = getattr(self.prompt_manager, "get_prompt", None) - if callable(getp): - prompt = getp(PROMPT_KEY) - if prompt is not None: - return prompt.format_messages(**kwargs) - except Exception: - pass + # Always ensure msgs exists + if "msgs" not in kwargs or kwargs["msgs"] is None: + kwargs = {**kwargs, "msgs": []} + # 1) Try preloaded template from PromptManager + prompt = None + getp = getattr(self.prompt_manager, "get_prompt", None) + if callable(getp): + prompt = getp(PROMPT_KEY) + if prompt is not None: + return prompt.format_messages(**kwargs) + + # 2) Minimal inline fallback schema = { "items": [{ "antipattern_name": "", @@ -76,11 +99,33 @@ def _build_messages(self, **kwargs) -> Any: "closing_summary": "" } content = ( - "Given inputs (JSON):\n" + json.dumps(kwargs, ensure_ascii=False) + + "Given inputs (JSON):\n" + json.dumps({k: v for k, v in kwargs.items() if k != "msgs"}, ensure_ascii=False) + "\nRespond with STRICT JSON using exactly this schema:\n" + json.dumps(schema, ensure_ascii=False) ) - return [ - {"role": "system", "content": "Return STRICT JSON only. No commentary."}, - {"role": "user", "content": content}, - ] + fallback = ChatPromptTemplate.from_messages([ + ("system", "Return STRICT JSON only. No commentary."), + ("user", content), + MessagesPlaceholder("msgs"), + ]) + return fallback.format_messages(**kwargs) + + @staticmethod + def _fallback_payload(state: Dict[str, Any]) -> Dict[str, Any]: + """Tiny fallback so downstream never breaks if parsing fails.""" + return { + "items": [{ + "antipattern_name": state.get("antipattern_name", "Unknown antipattern"), + "antipattern_description": state.get("antipattern_description", ""), + "impact": "", + "why_it_is_bad": "", + "how_we_fixed_it": state.get("refactor_rationale", ""), + "refactored_code": state.get("refactored_code", ""), + "summary": "Auto-generated minimal explanation (parser fallback)." + }], + "what_changed": [], + "why_better": [], + "principles_applied": [], + "trade_offs": [], + "closing_summary": "" + } diff --git a/AntiPattern_Remediator/src/core/state.py b/AntiPattern_Remediator/src/core/state.py index 63a147f..3fa9562 100644 --- a/AntiPattern_Remediator/src/core/state.py +++ b/AntiPattern_Remediator/src/core/state.py @@ -8,14 +8,17 @@ class AgentState(TypedDict): """State definition for passing data through the workflow""" code: str # Code to be analyzed + language: Optional[str] # Language of the code (used by ExplainerAgent) context: Optional[str] # Context retrieved from knowledge base (scanner) trove_context: Optional[str] # Context retrieved from the Anti-Pattern Trove (TinyDB/Chroma) - antipatterns_scanner_results: Optional[str] - refactoring_strategy_results: Optional[str] # Refactoring strategy generated by strategist - refactored_code: Optional[str] # Code after refactoring - code_review_results: Optional[str] # Code review results - code_review_times: int # Number of times code has been reviewed - msgs: List[Dict[str, Any]] # Message history for conversation context - answer: Optional[str] # Analysis result - explanation_response_raw: Optional[str] # Raw LLM output from explainer - explanation_json: Optional[Dict[str, Any]] # Parsed JSON explanation + antipatterns_scanner_results: Optional[Dict[str, Any]] # Scanner output (structured) + antipatterns_json: Optional[List[Dict[str, Any]]] # Normalized list used by ExplainerAgent + refactoring_strategy_results: Optional[Any] # Strategy can be dict/list/str depending on agent + refactored_code: Optional[str] # Code after refactoring + code_review_results: Optional[str] # Code review results + code_review_times: int # Number of times code has been reviewed + msgs: List[Any] # Conversation history (LangChain BaseMessages) + answer: Optional[str] # Final/aggregated analysis result + + explanation_response_raw: Optional[str] # Raw LLM output from explainer + explanation_json: Optional[Dict[str, Any]] # Parsed JSON explanation diff --git a/AntiPattern_Remediator/static/prompt/explainer.yaml b/AntiPattern_Remediator/static/prompt/explainer.yaml index 8963f95..be29ab7 100644 --- a/AntiPattern_Remediator/static/prompt/explainer.yaml +++ b/AntiPattern_Remediator/static/prompt/explainer.yaml @@ -1,9 +1,8 @@ explainer: - template: | - You are a senior software reviewer. - Your job is to explain detected anti-patterns and the applied refactor in a clear, structured way. - Output STRICT JSON only — no commentary outside JSON. + system: | + You are a senior software reviewer. Output STRICT JSON only — no commentary outside the JSON object. + user: | === Inputs === Language: {language} Context: {context} @@ -15,10 +14,10 @@ explainer: Refactor Rationale: {refactor_rationale} - === Required Output Schema === - { + === Return EXACTLY this JSON structure === + {{ "items": [ - { + {{ "antipattern_name": "", "antipattern_description": "", "impact": "", @@ -26,17 +25,17 @@ explainer: "how_we_fixed_it": "", "refactored_code": "", "summary": "" - } + }} ], "what_changed": [], "why_better": [], "principles_applied": [], "trade_offs": [], "closing_summary": "" - } + }} - Notes: - - Always return valid JSON. - - Use multiple entries under "items" if more than one antipattern is relevant. - - Keep `refactored_code` short (or truncated if needed). - - Fill all fields, even if briefly. + Rules: + - Return valid JSON only (no code fences, no prose). + - If multiple antipatterns apply, include multiple entries in "items". + - Keep "refactored_code" concise; truncate if needed but keep it valid. + - Populate all fields; use brief placeholders if unsure. diff --git a/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py b/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py index 6e859c4..1fb5dec 100644 --- a/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py +++ b/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py @@ -583,7 +583,7 @@ def temp_prompt_files(self): }, 'explainer.yaml': { 'explainer': { - 'system': 'You are a senior software reviewer.', + 'system': 'You are a senior software code explainer.', 'user': 'Explain: {code}\nLang: {language}\nCtx: {context}' } } From 710d95e6aaf4ead9aa37e7ede78026125a8c293e Mon Sep 17 00:00:00 2001 From: Avinash Date: Fri, 22 Aug 2025 15:51:18 +0100 Subject: [PATCH 7/9] Final changes for explainer agent --- AntiPattern_Remediator/main.py | 113 ++++++++---------- .../src/core/agents/__init__.py | 4 +- .../src/core/agents/explainer.py | 86 +++++++++++++ .../src/core/graph/create_graph.py | 13 +- .../src/core/prompt/prompt_manager.py | 29 +++-- AntiPattern_Remediator/src/core/state.py | 2 + .../src/core/utils/__init__.py | 3 + .../src/core/utils/json_utils.py | 27 +++++ .../static/prompt/explainer.yaml | 42 +++++++ AntiPattern_Remediator/static/tinydb.json | 2 +- 10 files changed, 240 insertions(+), 81 deletions(-) create mode 100644 AntiPattern_Remediator/src/core/agents/explainer.py create mode 100644 AntiPattern_Remediator/src/core/utils/__init__.py create mode 100644 AntiPattern_Remediator/src/core/utils/json_utils.py create mode 100644 AntiPattern_Remediator/static/prompt/explainer.yaml diff --git a/AntiPattern_Remediator/main.py b/AntiPattern_Remediator/main.py index a659983..1c96cc8 100644 --- a/AntiPattern_Remediator/main.py +++ b/AntiPattern_Remediator/main.py @@ -1,9 +1,7 @@ - """ Main entry point - Legacy Code Migration Tool """ from config.settings import initialize_settings -# from scripts import seed_database from dotenv import load_dotenv load_dotenv() from colorama import Fore, Style @@ -12,11 +10,34 @@ from full_repo_workflow import run_full_repo_workflow -def run_code_snippet_workflow(settings, db_manager, prompt_manager, langgraph): - """Run the original workflow with a hardcoded Java code snippet.""" - print(Fore.BLUE + "\n=== Code Snippet Analysis Workflow ===" + Style.RESET_ALL) - print("Analyzing the provided Java code snippet...") - +def main(): + """Main function: Run antipattern analysis""" + + # Let user select provider + print("Available providers: 1) ollama 2) ibm 3) vllm") + choice = input("Select provider (1-3): ").strip() + + provider_map = {"1": "ollama", "2": "ibm", "3": "vllm"} + provider = provider_map.get(choice, "ollama") # default to ollama + + # Let us choose which DB to interact with + print("Choose your trove: 1) ChromaDB (VectorDB) 2) TinyDB (DocumentDB)") + db_choice = input("Choose 1 or 2: ").strip() + + # Initialize global settings with selected provider + settings = initialize_settings(provider) + print(Fore.GREEN + f"Using {settings.LLM_PROVIDER} with model {settings.LLM_MODEL}" + Style.RESET_ALL) + + # Temporary Lazy Imports + from src.core.graph import CreateGraph + from src.data.database import VectorDBManager, TinyDBManager + from src.core.prompt import PromptManager + from scripts import seed_database + + # Initialize PromptManager + print("Initializing PromptManager...") + prompt_manager = PromptManager() + # Example Java code legacy_code = """ public class ApplicationManager { @@ -55,7 +76,8 @@ def run_code_snippet_workflow(settings, db_manager, prompt_manager, langgraph): } } """ - + + # Initial workflow state initial_state = { "code": legacy_code, "context": None, @@ -66,58 +88,12 @@ def run_code_snippet_workflow(settings, db_manager, prompt_manager, langgraph): "code_review_results": None, "code_review_times": 0, "msgs": [], - "answer": None - } - - final_state = langgraph.invoke(initial_state) + "answer": None, - print(Fore.GREEN + f"\nAnalysis Complete!" + Style.RESET_ALL) - print(f"Final state keys: {list(final_state.keys())}") - print(f"Context retrieved: {'Yes' if final_state.get('context') else 'No'}") - print(f"Analysis completed: {'Yes' if final_state.get('antipatterns_scanner_results') else 'No'}") - print(f"Refactored code: {'Yes' if final_state.get('refactored_code') else 'No'}") - print(f"Code review results: {final_state.get('code_review_times')}") - - -def main(): - """Main function: Choose between code snippet analysis or full repository run""" - - print(Fore.BLUE + "=== AntiPattern Remediator Tool ===" + Style.RESET_ALL) - print("Choose your analysis mode:") - print("1) Code Snippet Analysis - Analyze a sample Java code snippet") - print("2) Full Repository Run - Process files with 100% test coverage from JaCoCo results") - - # Let user choose analysis mode - mode_choice = input("\nSelect mode (1-2): ").strip() - - if mode_choice not in ["1", "2"]: - print(Fore.RED + "Invalid choice. Defaulting to Code Snippet Analysis." + Style.RESET_ALL) - mode_choice = "1" - - # Let user select provider - print("\nAvailable providers: 1) ollama 2) ibm 3) vllm") - choice = input("Select provider (1-3): ").strip() - - provider_map = {"1": "ollama", "2": "ibm", "3": "vllm"} - provider = provider_map.get(choice, "ollama") # default to ollama - - # Let us choose which DB to interact with - print("Choose your trove: 1) ChromaDB (VectorDB) 2) TinyDB (DocumentDB)") - db_choice = input("Choose 1 or 2: ").strip() - - # Initialize global settings with selected provider - settings = initialize_settings(provider) - print(Fore.GREEN + f"Using {settings.LLM_PROVIDER} with model {settings.LLM_MODEL}" + Style.RESET_ALL) - - # Temporary Lazy Imports - from src.core.graph import CreateGraph - from src.data.database import VectorDBManager, TinyDBManager - from src.core.prompt import PromptManager - from scripts import seed_database - - # Initialize PromptManager - print("Initializing PromptManager...") - prompt_manager = PromptManager() + # ExplainerAgent fields + "explanation_response_raw": None, + "explanation_json": None, + } # Setup Database if db_choice == "2": @@ -133,11 +109,22 @@ def main(): retriever = db_manager.as_retriever() langgraph = CreateGraph(db_manager, prompt_manager, retriever=retriever).workflow - # Run the selected workflow - if mode_choice == "1": - run_code_snippet_workflow(settings, db_manager, prompt_manager, langgraph) + # Final results summary + print(Fore.GREEN + f"\nAnalysis Complete!" + Style.RESET_ALL) + print(f"Final state keys: {list(final_state.keys())}") + print(f"Context retrieved: {'Yes' if final_state.get('context') else 'No'}") + print(f"Analysis completed: {'Yes' if final_state.get('antipatterns_scanner_results') else 'No'}") + print(f"Refactored code: {'Yes' if final_state.get('refactored_code') else 'No'}") + print(f"Code review results: {final_state.get('code_review_times')}") + + # Show explanation from ExplainerAgent + if final_state.get("explanation_json"): + import json + print(Fore.CYAN + "\n=== Explanation (JSON) ===" + Style.RESET_ALL) + print(json.dumps(final_state["explanation_json"], indent=2, ensure_ascii=False)) else: - run_full_repo_workflow(settings, db_manager, prompt_manager, langgraph) + print(Fore.RED + "\nNo explanation was generated." + Style.RESET_ALL) + if __name__ == "__main__": main() diff --git a/AntiPattern_Remediator/src/core/agents/__init__.py b/AntiPattern_Remediator/src/core/agents/__init__.py index 79c1adc..d5eceaa 100644 --- a/AntiPattern_Remediator/src/core/agents/__init__.py +++ b/AntiPattern_Remediator/src/core/agents/__init__.py @@ -7,10 +7,12 @@ from .refactor_strategist import RefactorStrategist from .code_transformer import CodeTransformer from .code_reviewer import CodeReviewerAgent +from .explainer import ExplainerAgent __all__ = [ "AntipatternScanner", "RefactorStrategist", "CodeTransformer", - "CodeReviewerAgent" + "CodeReviewerAgent", + "ExplainerAgent" ] diff --git a/AntiPattern_Remediator/src/core/agents/explainer.py b/AntiPattern_Remediator/src/core/agents/explainer.py new file mode 100644 index 0000000..9344121 --- /dev/null +++ b/AntiPattern_Remediator/src/core/agents/explainer.py @@ -0,0 +1,86 @@ +""" +ExplainerAgent — minimal version +- Delegates state handling to create_graph.py +- Only builds messages and parses JSON response +- Keeps code minimal and focused +""" +from __future__ import annotations + +from typing import Dict, Any +import json + +from langchain_core.language_models import BaseLanguageModel +from ..prompt import PromptManager +from src.core.utils import extract_first_json + +PROMPT_KEY = "explainer" + + +class ExplainerAgent: + def __init__(self, llm: BaseLanguageModel, prompt_manager: PromptManager): + self.llm = llm + self.prompt_manager = prompt_manager + + def explain_antipattern(self, state: Dict[str, Any]) -> Dict[str, Any]: + """Generate explanation JSON for detected antipatterns and refactor.""" + kwargs = dict( + code=state.get("code", ""), + language=state.get("language", "Java"), + context=state.get("context", ""), + refactored_code=state.get("refactored_code", ""), + refactor_rationale=state.get("refactor_rationale", ""), + antipattern_name=state.get("antipattern_name", "Unknown antipattern"), + antipattern_description=state.get("antipattern_description", ""), + antipatterns_json=json.dumps(state.get("antipatterns_json", []), ensure_ascii=False), + ) + + messages = self._build_messages(**kwargs) + response = self.llm.invoke(messages) + raw = getattr(response, "content", None) or str(response) + state["explanation_response_raw"] = raw + + parsed = extract_first_json(raw) + state["explanation_json"] = parsed if isinstance(parsed, dict) else {} + return state + + def display_explanation(self, state: Dict[str, Any]) -> Dict[str, Any]: + print("\n=== Explanation (raw) ===\n", state.get("explanation_response_raw", "N/A")) + if state.get("explanation_json"): + print("\n=== Explanation (JSON) ===\n", json.dumps(state["explanation_json"], indent=2, ensure_ascii=False)) + return state + + def _build_messages(self, **kwargs) -> Any: + try: + getp = getattr(self.prompt_manager, "get_prompt", None) + if callable(getp): + prompt = getp(PROMPT_KEY) + if prompt is not None: + return prompt.format_messages(**kwargs) + except Exception: + pass + + schema = { + "items": [{ + "antipattern_name": "", + "antipattern_description": "", + "impact": "", + "why_it_is_bad": "", + "how_we_fixed_it": "", + "refactored_code": "", + "summary": "" + }], + "what_changed": [], + "why_better": [], + "principles_applied": [], + "trade_offs": [], + "closing_summary": "" + } + content = ( + "Given inputs (JSON):\n" + json.dumps(kwargs, ensure_ascii=False) + + "\nRespond with STRICT JSON using exactly this schema:\n" + + json.dumps(schema, ensure_ascii=False) + ) + return [ + {"role": "system", "content": "Return STRICT JSON only. No commentary."}, + {"role": "user", "content": content}, + ] diff --git a/AntiPattern_Remediator/src/core/graph/create_graph.py b/AntiPattern_Remediator/src/core/graph/create_graph.py index 9c4cf93..096d403 100644 --- a/AntiPattern_Remediator/src/core/graph/create_graph.py +++ b/AntiPattern_Remediator/src/core/graph/create_graph.py @@ -14,6 +14,7 @@ from ..agents import RefactorStrategist from ..agents import CodeTransformer from ..agents import CodeReviewerAgent +from ..agents import ExplainerAgent # Imports for LangSmith tracing import os @@ -62,7 +63,7 @@ def __init__(self, db_manager, prompt_manager: PromptManager, retriever=None, ll self.prompt_manager = prompt_manager self.conditional_edges = ConditionalEdges() - # ✅ assign the instance attribute before use + # assign the instance attribute before use self.retriever = retriever or self.db_manager.as_retriever() retriever_tool = create_retriever_tool( @@ -78,13 +79,13 @@ def __init__(self, db_manager, prompt_manager: PromptManager, retriever=None, ll "strategist": RefactorStrategist(self.llm, self.prompt_manager, retriever=self.retriever), "transformer": CodeTransformer(self.llm, self.prompt_manager), "reviewer": CodeReviewerAgent(self.llm, self.prompt_manager), + "explainer": ExplainerAgent(self.llm, self.prompt_manager) } # Build the LangGraph workflow self.workflow = self._build_graph() def _build_graph(self): - """Build LangGraph workflow""" graph = StateGraph(AgentState) # Scanner: retrieve + analyze @@ -104,6 +105,10 @@ def _build_graph(self): graph.add_node("review_code", self.agents["reviewer"].review_code) graph.add_node("display_code_review_results", self.agents["reviewer"].display_code_review_results) + # Explainer: final storytelling + graph.add_node("explain_antipattern", self.agents["explainer"].explain_antipattern) + graph.add_node("display_explanation", self.agents["explainer"].display_explanation) + # Topology graph.set_entry_point("retrieve_context") graph.add_edge("retrieve_context", "analyze_antipatterns") @@ -123,6 +128,8 @@ def _build_graph(self): }, ) - graph.add_edge("display_code_review_results", END) + graph.add_edge("display_code_review_results", "explain_antipattern") + graph.add_edge("explain_antipattern", "display_explanation") + graph.add_edge("display_explanation", END) return graph.compile() diff --git a/AntiPattern_Remediator/src/core/prompt/prompt_manager.py b/AntiPattern_Remediator/src/core/prompt/prompt_manager.py index 0650429..1f1abef 100644 --- a/AntiPattern_Remediator/src/core/prompt/prompt_manager.py +++ b/AntiPattern_Remediator/src/core/prompt/prompt_manager.py @@ -7,27 +7,26 @@ class PromptManager: """Manager for handling prompt templates and configurations.""" def __init__(self): - # Prompt key constants, **same as YAML filenames** + # Prompt key constants, **same as YAML filenames (without .yaml)** self.ANTIPATTERN_SCANNER = "antipattern_scanner" - self.REFACTOR_STRATEGIST = "refactor_strategist" + self.REFACTOR_STRATEGIST = "refactor_strategist" self.CODE_TRANSFORMER = "code_transformer" self.CODE_REVIEWER = "code_reviewer" + self.EXPLAINER_AGENT = "explainer" self.prompt_directory = settings.PROMPT_DIR - # Initialize storage for prompt templates self._prompt_cache = {} - # Load prompts on initialization self._load_all_prompts() def _load_all_prompts(self) -> None: """Load all prompt configurations from YAML files.""" try: - # Get all prompt constants and load corresponding files prompt_constants = [ self.ANTIPATTERN_SCANNER, self.REFACTOR_STRATEGIST, self.CODE_TRANSFORMER, self.CODE_REVIEWER, + self.EXPLAINER_AGENT, ] for prompt_key in prompt_constants: @@ -55,12 +54,17 @@ def _load_prompt_from_yaml(self, filename: str, prompt_key: str) -> None: return prompt_config = config[prompt_key] - # Create ChatPromptTemplate - self._prompt_cache[prompt_key] = ChatPromptTemplate([ - ("system", prompt_config.get('system', '')), - ("user", prompt_config.get('user', '')), - MessagesPlaceholder("msgs") - ]) + + # Build messages in (role, content) format + messages = [] + if prompt_config.get("system"): + messages.append(("system", prompt_config["system"])) + if prompt_config.get("user"): + messages.append(("user", prompt_config["user"])) + messages.append(MessagesPlaceholder("msgs")) + + # Use the correct constructor + self._prompt_cache[prompt_key] = ChatPromptTemplate.from_messages(messages) print(f"Loaded prompt '{prompt_key}' from {filename}") except Exception as e: @@ -70,5 +74,4 @@ def get_prompt(self, prompt_key: str) -> Optional[ChatPromptTemplate]: if prompt_key not in self._prompt_cache: print(f"Warning: Prompt '{prompt_key}' not found in cache") return None - - return self._prompt_cache[prompt_key] \ No newline at end of file + return self._prompt_cache[prompt_key] diff --git a/AntiPattern_Remediator/src/core/state.py b/AntiPattern_Remediator/src/core/state.py index fb42b13..a0b34f3 100644 --- a/AntiPattern_Remediator/src/core/state.py +++ b/AntiPattern_Remediator/src/core/state.py @@ -17,4 +17,6 @@ class AgentState(TypedDict): code_review_times: int # Number of times code has been reviewed msgs: List[Dict[str, Any]] # Message history for conversation context answer: Optional[str] # Analysis result + explanation_response_raw: Optional[str] # Raw LLM output from explainer + explanation_json: Optional[Dict[str, Any]] # Parsed JSON explanation current_file_path: Optional[str] # Path to the current file being processed diff --git a/AntiPattern_Remediator/src/core/utils/__init__.py b/AntiPattern_Remediator/src/core/utils/__init__.py new file mode 100644 index 0000000..ccba67c --- /dev/null +++ b/AntiPattern_Remediator/src/core/utils/__init__.py @@ -0,0 +1,3 @@ +from .json_utils import extract_first_json + +__all__ = ["extract_first_json"] \ No newline at end of file diff --git a/AntiPattern_Remediator/src/core/utils/json_utils.py b/AntiPattern_Remediator/src/core/utils/json_utils.py new file mode 100644 index 0000000..d2e0b2f --- /dev/null +++ b/AntiPattern_Remediator/src/core/utils/json_utils.py @@ -0,0 +1,27 @@ +import json + +def extract_first_json(text): + """ + Try to extract the first JSON object from a string. + Works if JSON is inside ```json ... ``` fences or just plain text. + """ + if not isinstance(text, str): + return None + + # 1. If the text has fenced JSON like ```json ... ``` + if "```" in text: + parts = text.split("```") + for part in parts: + # Look for JSON-specific fences + if part.strip().lower().startswith("json"): + json_part = part[len("json"):].strip() + try: + return json.loads(json_part) + except Exception: + pass # Try next part + + # 2. If no fenced JSON worked, try to parse the whole text + try: + return json.loads(text.strip()) + except Exception: + return None \ No newline at end of file diff --git a/AntiPattern_Remediator/static/prompt/explainer.yaml b/AntiPattern_Remediator/static/prompt/explainer.yaml new file mode 100644 index 0000000..8963f95 --- /dev/null +++ b/AntiPattern_Remediator/static/prompt/explainer.yaml @@ -0,0 +1,42 @@ +explainer: + template: | + You are a senior software reviewer. + Your job is to explain detected anti-patterns and the applied refactor in a clear, structured way. + Output STRICT JSON only — no commentary outside JSON. + + === Inputs === + Language: {language} + Context: {context} + Detected Anti-patterns (JSON): {antipatterns_json} + Code: + ```{code}``` + Refactored Code: + ```{refactored_code}``` + Refactor Rationale: + {refactor_rationale} + + === Required Output Schema === + { + "items": [ + { + "antipattern_name": "", + "antipattern_description": "", + "impact": "", + "why_it_is_bad": "", + "how_we_fixed_it": "", + "refactored_code": "", + "summary": "" + } + ], + "what_changed": [], + "why_better": [], + "principles_applied": [], + "trade_offs": [], + "closing_summary": "" + } + + Notes: + - Always return valid JSON. + - Use multiple entries under "items" if more than one antipattern is relevant. + - Keep `refactored_code` short (or truncated if needed). + - Fill all fields, even if briefly. diff --git a/AntiPattern_Remediator/static/tinydb.json b/AntiPattern_Remediator/static/tinydb.json index cd87283..8f6198a 100644 --- a/AntiPattern_Remediator/static/tinydb.json +++ b/AntiPattern_Remediator/static/tinydb.json @@ -1 +1 @@ -{"_default": {"1": {"name": "Deep Nesting", "description": "Deep Nesting occurs when conditional or loop blocks are embedded within one another across multiple levels, creating code with high indentation and complex control flow. While not always increasing cyclomatic complexity linearly, deep nesting significantly raises cognitive complexity (the mental effort required to understand, modify, and debug a method).", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Low readability : The \"arrowhead\" structure makes it hard to trace logic and understand the intended flow.\nHigh cognitive load : Developers must mentally track the conditions that lead to or prevent reaching a particular line of code.\nError-prone maintenance : Adding or modifying logic inside deeply nested blocks increases the risk of missing cases or introducing bugs.\nInhibited reuse and testing : Deep nesting often combines concerns that should be split into smaller, testable methods or units.\nPoor diffs in version control : Even small changes can alter indentation across many lines, making reviews harder.", "remediation": "Guard clauses (early return) : Exit early when preconditions fail, flattening the control flow\nExtract method : Isolate deeply nested blocks into private methods with clear names to separate concerns and reduce depth.\nInvet conditionals : Invert logic to return early or skip unnecessary branches.\nReplace nested loops with streams (Java-specific): Abstract common filtering or mapping logic into declarative operations.\nUse pattern matching : Replace layered `if` chains with clearer structural or type-based matching.\nEncapsulate state checks : Group multiple conditionals into intention-revealing boolean helpers or state objects.", "limitation": "Can conflict with existing code style : Teams unfamiliar with guard clauses may resist early exits or multiple returns.\nRefactoring can obscure logic during transition : Extracted methods must be clearly named to preserve readability and avoid confusion.\nNested logic may be unavoidable in rare edge cases : Complex parsing, state machines, or embedded domain-specific languages may naturally involve deeper control structures.", "type": "antipattern", "source_file": "deep_nesting.json"}, "2": {"name": "Generic Exception handling", "description": "Generic exception handling refers to the use of broad or unspecific catch blocks (catching but ignoring the exceptions). These patterns obscure the true source of errors, suppress the useful debugging information and can unintentionally hide critical failures. This anti-pattern often stems from the desire to keep the code running, but typically leads to weak systems and increased technical debt.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Loss of context : Catching high-level exceptions removes the granularity needed to understand specific failure reasons\nRepeated boilerplate : Developers may re-implement logging, default values, or stream-closing logic instead of using safe utility methods\nViolation of fail-fast principles : Silent or overly generic handling can let critical errors go unnoticed for too long", "remediation": "", "limitation": "Catching too many specific exceptions can bloat the code and reduce readability.\nRefactoring exception handling may require thorough testing to avoid regressions.\nOver-logging exceptions can clutter logs and obscure real issues.\nSecurity-sensitive applications may need specialised exception handling strategies to avoid leaks", "type": "antipattern", "source_file": "generic_exception_handling.json"}, "3": {"name": "Magic Constants", "description": "Magic constants (or magic numbers) are hard-coded literal values (e.g., 3.14, 42, \"admin\") that appear directly in code without context or explanation. These values become problematic when their purpose is unclear, undocumented, or reused inconsistently. While some literals (like 0, 1, or -1) may be self-explanatory in some contexts, others represent thresholds, identifiers, or rules that should be named and documented for clarity.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Poor readability : Developers must guess the meaning of the value, increasing cognitive load.\nLow maintainability : Changing the value requires updating it everywhere, risking inconsistent updates.\nHarder debugging : Literal values lack descriptive meaning in stack traces, logs, or debuggers.\nDuplication : The same literal used in multiple places leads to repeated logic and tighter coupling.\nViolation of DRY : Embeds implicit meaning multiple times without abstraction.", "remediation": "Define Constants - Move literal values into named constants (e.g., const, final, or static readonly).\nUse Enums - Group related constants (e.g., roles, statuses) as enumerations with meaningful names.", "limitation": "Extracting trivial values like 0 or 1 may clutter code and reduce clarity if overdone.\nIn performance-critical code, indirection through constants or functions may introduce slight overhead.\nOver-abstracting unnamed constants (e.g., MAXCOUNT3) may make code harder to understand.\nIf a value is used only once and is self-explanatory, extracting it may be unnecessary overhead.", "type": "antipattern", "source_file": "magic_constants.json"}, "4": {"name": "SRP Violation", "description": "The Single Responsibility Principle (SRP) is one of the SOLID principles of object-oriented design, stating that a class (or, at a lower level, a method) should have only one reason to change (i.e., it should have only one responsibility or concern). A SRP Violation occurs when a class (method) takes on multiple unrelated responsibilities, making it harder to maintain, test, and understand. These violations often result in bloated classes or methods that mix unrelated concerns such as I/O, business logic, error handling, and configuration.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Poor maintainability : Changing one responsibility might inadvertently affect others, introducing bugs.\nLow cohesion : Code with unrelated responsibilities lacks a clear purpose, reducing clarity and reusability.\nDifficult testing : Unit testing becomes more complex as setup may require mocking or initializing unrelated dependencies.\nCode duplication and tight coupling : Responsibilities are harder to reuse or share, often leading to repeated logic or tight inter-class dependencies.\nHarder onboarding : New developers struggle to understand the purpose and scope of large, multi-purpose classes.", "remediation": "Extract Method \\- Split complex methods into smaller, single-purpose private methods.\nEarly Return (Guard Clauses) \\- Use early exits to reduce nested logic and clarify separate responsibilities.\nUse Local Functions/Lambdas \\- Encapsulate small inline logic into local functions for clarity.\nEncapsulate Temporary Variables \\- Move logic-heavy expressions into descriptive helper methods.\nGroup Related Logic \\- Cluster related operations into distinct helper methods within the same class.\nSeparate Concerns in Loops \\- Extract filtering, transforming, and aggregating into distinct steps.\nIsolate Logging/Error Handling \\- Move side-effect code like logging into dedicated private methods (unless the logging is a trivial single line and does not obscure business logic).", "limitation": "Fixing certain SRP violations (especially class-level violations) requires changing public method signatures, creating new classes, and/or breaking interfaces.\nMethods often depend on multiple injected services or shared state; untangling responsibilities might require broader architectural changes.\nLogging, error handling, metrics, and security checks are often scattered across responsibilities and difficult to isolate cleanly at the method level without aspect-oriented programming (AOP) or middleware/interceptor patterns.", "type": "antipattern", "source_file": "srp_violation.json"}, "5": {"name": "Unsafe or Vague Exception Handling", "description": "Reliable exception handling, type safety, and controlled flow are essential forr writing maintainable and robust software. Unsafe or Vague exception handling often results in code that is fragile, difficult to test, and challenging to debug. Instead of providing meaningful error handling or clear separation of concerns, these implementations either hide the underlying problem or use language features in a way that breaks maintainability.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Hidden failures : Catching exceptions without logging or meaningful handling hides the source of issues and makes debugging difficult\nPoor diagnosis : Broad exception handling with vague error messages hides intent and makes it harder to trace the root cause\nInconsistent runtime behaviour : using assertions for control logic can lead to unpredictable behaviour depending on the JVM configuration\nUncontrolled termination : Using system.exit() directly in application logic makes code untestable and prevents proper resource cleanup", "remediation": "Replace assertions with proper condition checks and informative exceptions to ensure consistent behaviour across environments\nAvoid silent catch blocks, log exceptions or rethrow them to preserve error context\nHandle specific exception types instead of catching broad categories like Exception or RuntimeException\nRefactor abrupt shutdown calls into controlled exits using exception handling or return code to support recovery", "limitation": "", "type": "antipattern", "source_file": "unsafe_or_vague_exception_handling.json"}}} \ No newline at end of file +{"_default": {"1": {"name": "Monolithic Method", "description": "A Monolithic Method is a single method that tries to do too much, often combining unrelated responsibilities into one block of code. This is essentially a method-level violation of the Single Responsibility Principle (SRP). Monolithic methods are hard to read, understand, maintain, and test because they mix business logic, I/O, error handling, and other concerns in one place.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Poor maintainability : Modifying one part of the method risks breaking unrelated functionality.\nLow readability : Long, complex methods are hard to follow and understand.\nDifficult testing : Unit tests become cumbersome because the method does too many things at once.\nCode duplication : Reusing logic is difficult; similar tasks often get reimplemented elsewhere.\nTight coupling : Internal details are intertwined, making refactoring risky.", "remediation": "", "limitation": "Refactoring may require changes to public method signatures, especially if other code depends on it.\nSome logic may rely on shared state or multiple services, making it hard to separate without broader architectural changes.\nIf the method handles cross-cutting concerns (e.g., logging, metrics, validation), isolating responsibilities may require AOP or middleware.", "type": "antipattern", "source_file": "monolithic_method.json"}, "2": {"name": "Unsafe or Vague Exception Handling", "description": "Reliable exception handling, type safety, and controlled flow are essential forr writing maintainable and robust software. Unsafe or Vague exception handling often results in code that is fragile, difficult to test, and challenging to debug. Instead of providing meaningful error handling or clear separation of concerns, these implementations either hide the underlying problem or use language features in a way that breaks maintainability.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Hidden failures : Catching exceptions without logging or meaningful handling hides the source of issues and makes debugging difficult\nPoor diagnosis : Broad exception handling with vague error messages hides intent and makes it harder to trace the root cause\nInconsistent runtime behaviour : using assertions for control logic can lead to unpredictable behaviour depending on the JVM configuration\nUncontrolled termination : Using system.exit() directly in application logic makes code untestable and prevents proper resource cleanup", "remediation": "Replace assertions with proper condition checks and informative exceptions to ensure consistent behaviour across environments\nAvoid silent catch blocks, log exceptions or rethrow them to preserve error context\nHandle specific exception types instead of catching broad categories like Exception or RuntimeException\nRefactor abrupt shutdown calls into controlled exits using exception handling or return code to support recovery", "limitation": "", "type": "antipattern", "source_file": "unsafe_or_vague_exception_handling.json"}, "3": {"name": "Deep Nesting", "description": "Deep Nesting occurs when conditional or loop blocks are embedded within one another across multiple levels, creating code with high indentation and complex control flow. While not always increasing cyclomatic complexity linearly, deep nesting significantly raises cognitive complexity (the mental effort required to understand, modify, and debug a method).", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Low readability : The \"arrowhead\" structure makes it hard to trace logic and understand the intended flow.\nHigh cognitive load : Developers must mentally track the conditions that lead to or prevent reaching a particular line of code.\nError-prone maintenance : Adding or modifying logic inside deeply nested blocks increases the risk of missing cases or introducing bugs.\nInhibited reuse and testing : Deep nesting often combines concerns that should be split into smaller, testable methods or units.\nPoor diffs in version control : Even small changes can alter indentation across many lines, making reviews harder.", "remediation": "Guard clauses (early return) : Exit early when preconditions fail, flattening the control flow\nExtract method : Isolate deeply nested blocks into private methods with clear names to separate concerns and reduce depth.\nInvet conditionals : Invert logic to return early or skip unnecessary branches.\nReplace nested loops with streams (Java-specific): Abstract common filtering or mapping logic into declarative operations.\nUse pattern matching : Replace layered `if` chains with clearer structural or type-based matching.\nEncapsulate state checks : Group multiple conditionals into intention-revealing boolean helpers or state objects.", "limitation": "Can conflict with existing code style : Teams unfamiliar with guard clauses may resist early exits or multiple returns.\nRefactoring can obscure logic during transition : Extracted methods must be clearly named to preserve readability and avoid confusion.\nNested logic may be unavoidable in rare edge cases : Complex parsing, state machines, or embedded domain-specific languages may naturally involve deeper control structures.", "type": "antipattern", "source_file": "deep_nesting.json"}, "4": {"name": "SRP Violation", "description": "The Single Responsibility Principle (SRP) is one of the SOLID principles of object-oriented design, stating that a class (or, at a lower level, a method) should have only one reason to change (i.e., it should have only one responsibility or concern). A SRP Violation occurs when a class (method) takes on multiple unrelated responsibilities, making it harder to maintain, test, and understand. These violations often result in bloated classes or methods that mix unrelated concerns such as I/O, business logic, error handling, and configuration.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Poor maintainability : Changing one responsibility might inadvertently affect others, introducing bugs.\nLow cohesion : Code with unrelated responsibilities lacks a clear purpose, reducing clarity and reusability.\nDifficult testing : Unit testing becomes more complex as setup may require mocking or initializing unrelated dependencies.\nCode duplication and tight coupling : Responsibilities are harder to reuse or share, often leading to repeated logic or tight inter-class dependencies.\nHarder onboarding : New developers struggle to understand the purpose and scope of large, multi-purpose classes.", "remediation": "Extract Method \\- Split complex methods into smaller, single-purpose private methods.\nEarly Return (Guard Clauses) \\- Use early exits to reduce nested logic and clarify separate responsibilities.\nUse Local Functions/Lambdas \\- Encapsulate small inline logic into local functions for clarity.\nEncapsulate Temporary Variables \\- Move logic-heavy expressions into descriptive helper methods.\nGroup Related Logic \\- Cluster related operations into distinct helper methods within the same class.\nSeparate Concerns in Loops \\- Extract filtering, transforming, and aggregating into distinct steps.\nIsolate Logging/Error Handling \\- Move side-effect code like logging into dedicated private methods (unless the logging is a trivial single line and does not obscure business logic).", "limitation": "Fixing certain SRP violations (especially class-level violations) requires changing public method signatures, creating new classes, and/or breaking interfaces.\nMethods often depend on multiple injected services or shared state; untangling responsibilities might require broader architectural changes.\nLogging, error handling, metrics, and security checks are often scattered across responsibilities and difficult to isolate cleanly at the method level without aspect-oriented programming (AOP) or middleware/interceptor patterns.", "type": "antipattern", "source_file": "srp_violation.json"}, "5": {"name": "Magic Constants", "description": "Magic constants (or magic numbers) are hard-coded literal values (e.g., 3.14, 42, \"admin\") that appear directly in code without context or explanation. These values become problematic when their purpose is unclear, undocumented, or reused inconsistently. While some literals (like 0, 1, or -1) may be self-explanatory in some contexts, others represent thresholds, identifiers, or rules that should be named and documented for clarity.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Poor readability : Developers must guess the meaning of the value, increasing cognitive load.\nLow maintainability : Changing the value requires updating it everywhere, risking inconsistent updates.\nHarder debugging : Literal values lack descriptive meaning in stack traces, logs, or debuggers.\nDuplication : The same literal used in multiple places leads to repeated logic and tighter coupling.\nViolation of DRY : Embeds implicit meaning multiple times without abstraction.", "remediation": "Define Constants - Move literal values into named constants (e.g., const, final, or static readonly).\nUse Enums - Group related constants (e.g., roles, statuses) as enumerations with meaningful names.", "limitation": "Extracting trivial values like 0 or 1 may clutter code and reduce clarity if overdone.\nIn performance-critical code, indirection through constants or functions may introduce slight overhead.\nOver-abstracting unnamed constants (e.g., MAXCOUNT3) may make code harder to understand.\nIf a value is used only once and is self-explanatory, extracting it may be unnecessary overhead.", "type": "antipattern", "source_file": "magic_constants.json"}, "6": {"name": "Duplicate Code", "description": "Duplicate Code occurs when identical or very similar code blocks are repeated throughout the codebase. This pattern creates maintenance overhead, increases the likelihood of bugs, and violates the DRY (Don't Repeat Yourself) principle. Common examples include repeated null checks, validation logic, and similar conditional patterns across different methods or classes.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Maintenance overhead : Changes need to be applied in multiple places, increasing the risk of inconsistencies\nBug multiplication : A bug in duplicated code affects multiple locations, making fixes more complex\nCode bloat : Repeated code increases the overall size of the codebase without adding functionality\nViolation of DRY principle : Makes the code harder to understand and reason about", "remediation": "", "limitation": "Over-abstraction can make code harder to understand if the duplication is minimal or contextually different\nPremature extraction of methods may create unnecessary coupling between unrelated parts of the system\nSome duplication might be acceptable if the code serves different business contexts", "type": "antipattern", "source_file": "duplicate_code.json"}, "7": {"name": "God Class", "description": "A God Class anti-pattern refers to a class that centralises too many responsibilities in a single location, becoming overly complex and difficult to maintain. Such classes tend to know too much, do too much, and interact with many different parts of the system. This leads to tightly coupled code, reduces modularity, and makes the system hard to test and extend.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "High coupling: God Classes tend to interact with many other classes and modules, thereby reducing system modularity and increasing the risk of changes.\nPoor maintainability : Large, complex classes are difficult to read, understand, and modify.\nLow reusability : The class becomes so specific and bloated that it is rarely useful outside of its original context.\nHidden dependencies : God Classes often hide dependencies within fields or methods, making the codebase less transparent.", "remediation": "", "limitation": "", "type": "antipattern", "source_file": "god_class.json"}, "8": {"name": "Middle Man", "description": "The Middle Man anti-pattern occurs when a class exists primarily to delegate calls to another class without adding meaningful logic of its own. Essentially, the class acts as a pass-through or proxy, forwarding method calls without adding value. While delegation is sometimes necessary for abstraction, excessive or trivial delegation leads to unnecessary indirection and increases maintenance overhead.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Extra Indirection : Code must go through one more layer, which can complicate understanding the code flow.\nIncreased maintenance : When the delegated class changes, the middle-man class often needs updates for all its pass-through methods.\nLow cohesion : The middle-man class has little real logic, making its purpose unclear\nCode bloat : Many trivial delegation methods clutter the class, making it harder to navigate.\nHarder debugging : Tracing behaviour through multiple layers of delegation adds cognitive load.", "remediation": "", "limitation": "Some delegation is unavoidable, e.g., to implement an interface or provide a stable abstraction layer\nRemoving middleman classes may break existing APIs or require refactoring client code.\nIn some cases, delegation is part of a design pattern (like a proxy), which is intentional and not an anti-pattern", "type": "antipattern", "source_file": "middle_man.json"}, "9": {"name": "Generic Exception handling", "description": "Generic exception handling refers to the use of broad or unspecific catch blocks (catching but ignoring the exceptions). These patterns obscure the true source of errors, suppress the useful debugging information and can unintentionally hide critical failures. This anti-pattern often stems from the desire to keep the code running, but typically leads to weak systems and increased technical debt.", "category": "Uncategorised", "language": "Any", "severity": "MEDIUM", "problem": "Loss of context : Catching high-level exceptions removes the granularity needed to understand specific failure reasons\nRepeated boilerplate : Developers may re-implement logging, default values, or stream-closing logic instead of using safe utility methods\nViolation of fail-fast principles : Silent or overly generic handling can let critical errors go unnoticed for too long", "remediation": "", "limitation": "Catching too many specific exceptions can bloat the code and reduce readability.\nRefactoring exception handling may require thorough testing to avoid regressions.\nOver-logging exceptions can clutter logs and obscure real issues.\nSecurity-sensitive applications may need specialised exception handling strategies to avoid leaks", "type": "antipattern", "source_file": "generic_exception_handling.json"}}} \ No newline at end of file From 1003a4f0e8b2e541c42d37e79c4318fc43d3b91f Mon Sep 17 00:00:00 2001 From: Avinash Date: Sat, 23 Aug 2025 10:36:01 +0100 Subject: [PATCH 8/9] Unit test updated for explainer --- .../src/core/prompt/prompt_manager.py | 31 ++++++++++--------- .../unit_test/prompt/test_prompt_manager.py | 20 ++++++++++-- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/AntiPattern_Remediator/src/core/prompt/prompt_manager.py b/AntiPattern_Remediator/src/core/prompt/prompt_manager.py index 1f1abef..d7fc7dd 100644 --- a/AntiPattern_Remediator/src/core/prompt/prompt_manager.py +++ b/AntiPattern_Remediator/src/core/prompt/prompt_manager.py @@ -41,34 +41,37 @@ def _load_all_prompts(self) -> None: def _load_prompt_from_yaml(self, filename: str, prompt_key: str) -> None: """Load a prompt configuration from a YAML file.""" yaml_path = self.prompt_directory / filename - + if not yaml_path.exists(): print(f"Warning: Prompt file {yaml_path} not found") return - + try: with open(yaml_path, 'r', encoding='utf-8') as file: config = yaml.safe_load(file) - if prompt_key not in config: + + if not config or prompt_key not in config: print(f"Warning: Section '{prompt_key}' not found in {filename}") return - - prompt_config = config[prompt_key] - # Build messages in (role, content) format - messages = [] - if prompt_config.get("system"): - messages.append(("system", prompt_config["system"])) - if prompt_config.get("user"): - messages.append(("user", prompt_config["user"])) - messages.append(MessagesPlaceholder("msgs")) + prompt_config = config.get(prompt_key) or {} + + # Always include System first (empty string if not provided) to satisfy tests + system_text = str(prompt_config.get("system", "") or "") + user_text = str(prompt_config.get("user", "") or "") + + messages = [ + ("system", system_text), # always present (possibly empty) + ("user", user_text), # always present (possibly empty) + MessagesPlaceholder("msgs"), # conversation history + ] - # Use the correct constructor self._prompt_cache[prompt_key] = ChatPromptTemplate.from_messages(messages) print(f"Loaded prompt '{prompt_key}' from {filename}") - + except Exception as e: print(f"Error loading prompt from {filename}: {e}") + def get_prompt(self, prompt_key: str) -> Optional[ChatPromptTemplate]: if prompt_key not in self._prompt_cache: diff --git a/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py b/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py index 088af2f..6e859c4 100644 --- a/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py +++ b/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py @@ -44,6 +44,7 @@ def __init__(self): self.REFACTOR_STRATEGIST = "refactor_strategist" self.CODE_TRANSFORMER = "code_transformer" self.CODE_REVIEWER = "code_reviewer" + self.EXPLAINER_AGENT = "explainer" self.prompt_directory = Path(__file__).parent self._prompt_cache = {} self._load_all_prompts() # Call this to match real behavior @@ -87,7 +88,8 @@ def _load_all_prompts(self): self.ANTIPATTERN_SCANNER, self.REFACTOR_STRATEGIST, self.CODE_TRANSFORMER, - self.CODE_REVIEWER + self.CODE_REVIEWER, + self.EXPLAINER_AGENT, ] for prompt_key in prompt_constants: @@ -155,6 +157,7 @@ def test_initialization_creates_correct_attributes(self): assert hasattr(manager, 'REFACTOR_STRATEGIST') assert hasattr(manager, 'CODE_TRANSFORMER') assert hasattr(manager, 'CODE_REVIEWER') + assert hasattr(manager, 'EXPLAINER_AGENT') assert hasattr(manager, 'prompt_directory') assert hasattr(manager, '_prompt_cache') assert isinstance(manager._prompt_cache, dict) @@ -176,6 +179,7 @@ def test_prompt_constants_have_correct_values(self): assert manager.REFACTOR_STRATEGIST == "refactor_strategist" assert manager.CODE_TRANSFORMER == "code_transformer" assert manager.CODE_REVIEWER == "code_reviewer" + assert manager.EXPLAINER_AGENT == "explainer" def test_prompt_directory_is_set_correctly(self): """Test that prompt directory is assigned from settings.""" @@ -505,6 +509,7 @@ def test_initialization_with_missing_directory(self, capsys): manager.REFACTOR_STRATEGIST = "refactor_strategist" manager.CODE_TRANSFORMER = "code_transformer" manager.CODE_REVIEWER = "code_reviewer" + manager.EXPLAINER_AGENT = "explainer" manager.prompt_directory = Path("/non/existent/path") manager._prompt_cache = {} @@ -575,6 +580,12 @@ def temp_prompt_files(self): 'system': 'You are an expert code reviewer.', 'user': 'Review this code for quality and best practices: {code}' } + }, + 'explainer.yaml': { + 'explainer': { + 'system': 'You are a senior software reviewer.', + 'user': 'Explain: {code}\nLang: {language}\nCtx: {context}' + } } } @@ -593,6 +604,7 @@ def test_load_all_prompts_loads_all_available_files(self, temp_prompt_files, cap manager.REFACTOR_STRATEGIST = "refactor_strategist" manager.CODE_TRANSFORMER = "code_transformer" manager.CODE_REVIEWER = "code_reviewer" + manager.EXPLAINER_AGENT = "explainer" manager.prompt_directory = temp_prompt_files manager._prompt_cache = {} @@ -600,15 +612,16 @@ def test_load_all_prompts_loads_all_available_files(self, temp_prompt_files, cap manager._load_all_prompts() # Assert: Verify all prompts were loaded - assert len(manager._prompt_cache) == 4 + assert len(manager._prompt_cache) == 5 assert "antipattern_scanner" in manager._prompt_cache assert "refactor_strategist" in manager._prompt_cache assert "code_transformer" in manager._prompt_cache assert "code_reviewer" in manager._prompt_cache + assert "explainer" in manager._prompt_cache # Verify success message captured = capsys.readouterr() - assert "Successfully loaded 4 prompts" in captured.out + assert "Successfully loaded 5 prompts" in captured.out def test_load_all_prompts_handles_partial_failures(self, capsys): """Test that _load_all_prompts continues loading even if some files are missing.""" @@ -633,6 +646,7 @@ def test_load_all_prompts_handles_partial_failures(self, capsys): manager.REFACTOR_STRATEGIST = "refactor_strategist" manager.CODE_TRANSFORMER = "code_transformer" manager.CODE_REVIEWER = "code_reviewer" + manager.EXPLAINER_AGENT = "explainer" manager.prompt_directory = temp_path manager._prompt_cache = {} From 69c00d0aae7f10bef8e07d8109237a41a53b0a40 Mon Sep 17 00:00:00 2001 From: Avinash Date: Wed, 27 Aug 2025 10:16:54 +0100 Subject: [PATCH 9/9] Final changes --- .../src/core/agents/explainer.py | 85 ++++++++++++++----- AntiPattern_Remediator/src/core/state.py | 4 + .../static/prompt/explainer.yaml | 27 +++--- .../unit_test/prompt/test_prompt_manager.py | 2 +- 4 files changed, 83 insertions(+), 35 deletions(-) diff --git a/AntiPattern_Remediator/src/core/agents/explainer.py b/AntiPattern_Remediator/src/core/agents/explainer.py index 9344121..4ab86a6 100644 --- a/AntiPattern_Remediator/src/core/agents/explainer.py +++ b/AntiPattern_Remediator/src/core/agents/explainer.py @@ -1,8 +1,8 @@ """ ExplainerAgent — minimal version - Delegates state handling to create_graph.py -- Only builds messages and parses JSON response -- Keeps code minimal and focused +- Uses PromptManager if available; otherwise a tiny inline fallback prompt +- Always passes msgs; always returns a non-empty explanation_json """ from __future__ import annotations @@ -10,6 +10,7 @@ import json from langchain_core.language_models import BaseLanguageModel +from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder from ..prompt import PromptManager from src.core.utils import extract_first_json @@ -32,33 +33,55 @@ def explain_antipattern(self, state: Dict[str, Any]) -> Dict[str, Any]: antipattern_name=state.get("antipattern_name", "Unknown antipattern"), antipattern_description=state.get("antipattern_description", ""), antipatterns_json=json.dumps(state.get("antipatterns_json", []), ensure_ascii=False), + msgs=state.get("msgs", []), # ensure MessagesPlaceholder is satisfied ) messages = self._build_messages(**kwargs) - response = self.llm.invoke(messages) - raw = getattr(response, "content", None) or str(response) + + try: + response = self.llm.invoke(messages) + raw = getattr(response, "content", None) or str(response) + except Exception as e: + raw = f"LLM error: {e}" + state["explanation_response_raw"] = raw - parsed = extract_first_json(raw) - state["explanation_json"] = parsed if isinstance(parsed, dict) else {} + # Robust parse: accept dict, wrap list, or emit a minimal fallback + try: + parsed = extract_first_json(raw) + except Exception: + parsed = None + + if isinstance(parsed, dict): + state["explanation_json"] = parsed + elif isinstance(parsed, list): + state["explanation_json"] = {"items": parsed} + else: + state["explanation_json"] = self._fallback_payload(state) + return state def display_explanation(self, state: Dict[str, Any]) -> Dict[str, Any]: print("\n=== Explanation (raw) ===\n", state.get("explanation_response_raw", "N/A")) if state.get("explanation_json"): - print("\n=== Explanation (JSON) ===\n", json.dumps(state["explanation_json"], indent=2, ensure_ascii=False)) + print("\n=== Explanation (JSON) ===\n", + json.dumps(state["explanation_json"], indent=2, ensure_ascii=False)) return state def _build_messages(self, **kwargs) -> Any: - try: - getp = getattr(self.prompt_manager, "get_prompt", None) - if callable(getp): - prompt = getp(PROMPT_KEY) - if prompt is not None: - return prompt.format_messages(**kwargs) - except Exception: - pass + # Always ensure msgs exists + if "msgs" not in kwargs or kwargs["msgs"] is None: + kwargs = {**kwargs, "msgs": []} + # 1) Try preloaded template from PromptManager + prompt = None + getp = getattr(self.prompt_manager, "get_prompt", None) + if callable(getp): + prompt = getp(PROMPT_KEY) + if prompt is not None: + return prompt.format_messages(**kwargs) + + # 2) Minimal inline fallback schema = { "items": [{ "antipattern_name": "", @@ -76,11 +99,33 @@ def _build_messages(self, **kwargs) -> Any: "closing_summary": "" } content = ( - "Given inputs (JSON):\n" + json.dumps(kwargs, ensure_ascii=False) + + "Given inputs (JSON):\n" + json.dumps({k: v for k, v in kwargs.items() if k != "msgs"}, ensure_ascii=False) + "\nRespond with STRICT JSON using exactly this schema:\n" + json.dumps(schema, ensure_ascii=False) ) - return [ - {"role": "system", "content": "Return STRICT JSON only. No commentary."}, - {"role": "user", "content": content}, - ] + fallback = ChatPromptTemplate.from_messages([ + ("system", "Return STRICT JSON only. No commentary."), + ("user", content), + MessagesPlaceholder("msgs"), + ]) + return fallback.format_messages(**kwargs) + + @staticmethod + def _fallback_payload(state: Dict[str, Any]) -> Dict[str, Any]: + """Tiny fallback so downstream never breaks if parsing fails.""" + return { + "items": [{ + "antipattern_name": state.get("antipattern_name", "Unknown antipattern"), + "antipattern_description": state.get("antipattern_description", ""), + "impact": "", + "why_it_is_bad": "", + "how_we_fixed_it": state.get("refactor_rationale", ""), + "refactored_code": state.get("refactored_code", ""), + "summary": "Auto-generated minimal explanation (parser fallback)." + }], + "what_changed": [], + "why_better": [], + "principles_applied": [], + "trade_offs": [], + "closing_summary": "" + } diff --git a/AntiPattern_Remediator/src/core/state.py b/AntiPattern_Remediator/src/core/state.py index a0b34f3..531cfb1 100644 --- a/AntiPattern_Remediator/src/core/state.py +++ b/AntiPattern_Remediator/src/core/state.py @@ -8,6 +8,7 @@ class AgentState(TypedDict): """State definition for passing data through the workflow""" code: str # Code to be analyzed + language: Optional[str] # Language of the code (used by ExplainerAgent) context: Optional[str] # Context retrieved from knowledge base (scanner) trove_context: Optional[str] # Context retrieved from the Anti-Pattern Trove (TinyDB/Chroma) antipatterns_scanner_results: Optional[str] @@ -20,3 +21,6 @@ class AgentState(TypedDict): explanation_response_raw: Optional[str] # Raw LLM output from explainer explanation_json: Optional[Dict[str, Any]] # Parsed JSON explanation current_file_path: Optional[str] # Path to the current file being processed + + explanation_response_raw: Optional[str] # Raw LLM output from explainer + explanation_json: Optional[Dict[str, Any]] # Parsed JSON explanation diff --git a/AntiPattern_Remediator/static/prompt/explainer.yaml b/AntiPattern_Remediator/static/prompt/explainer.yaml index 8963f95..be29ab7 100644 --- a/AntiPattern_Remediator/static/prompt/explainer.yaml +++ b/AntiPattern_Remediator/static/prompt/explainer.yaml @@ -1,9 +1,8 @@ explainer: - template: | - You are a senior software reviewer. - Your job is to explain detected anti-patterns and the applied refactor in a clear, structured way. - Output STRICT JSON only — no commentary outside JSON. + system: | + You are a senior software reviewer. Output STRICT JSON only — no commentary outside the JSON object. + user: | === Inputs === Language: {language} Context: {context} @@ -15,10 +14,10 @@ explainer: Refactor Rationale: {refactor_rationale} - === Required Output Schema === - { + === Return EXACTLY this JSON structure === + {{ "items": [ - { + {{ "antipattern_name": "", "antipattern_description": "", "impact": "", @@ -26,17 +25,17 @@ explainer: "how_we_fixed_it": "", "refactored_code": "", "summary": "" - } + }} ], "what_changed": [], "why_better": [], "principles_applied": [], "trade_offs": [], "closing_summary": "" - } + }} - Notes: - - Always return valid JSON. - - Use multiple entries under "items" if more than one antipattern is relevant. - - Keep `refactored_code` short (or truncated if needed). - - Fill all fields, even if briefly. + Rules: + - Return valid JSON only (no code fences, no prose). + - If multiple antipatterns apply, include multiple entries in "items". + - Keep "refactored_code" concise; truncate if needed but keep it valid. + - Populate all fields; use brief placeholders if unsure. diff --git a/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py b/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py index 6e859c4..1fb5dec 100644 --- a/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py +++ b/AntiPattern_Remediator/test/unit_test/prompt/test_prompt_manager.py @@ -583,7 +583,7 @@ def temp_prompt_files(self): }, 'explainer.yaml': { 'explainer': { - 'system': 'You are a senior software reviewer.', + 'system': 'You are a senior software code explainer.', 'user': 'Explain: {code}\nLang: {language}\nCtx: {context}' } }