diff --git a/src/controller.py b/src/controller.py index d31ec9c..d325953 100644 --- a/src/controller.py +++ b/src/controller.py @@ -1,11 +1,37 @@ +from typing import List, Dict, Any from src.file_manipulator import FileManipulator +from src.timeline_extractor import TimelineExtractor + class Controller: - def __init__(self): + """ + Controller layer for FireForm. + Responsible for orchestrating the processing pipeline. + """ + + def __init__(self) -> None: self.file_manipulator = FileManipulator() + self.timeline_extractor = TimelineExtractor() + + def fill_form( + self, + user_input: str, + fields: List[str], + pdf_form_path: str + ) -> Dict[str, Any]: + + timeline = self.timeline_extractor.extract_timeline(user_input) + + result = self.file_manipulator.fill_form( + user_input, + fields, + pdf_form_path + ) + + if isinstance(result, dict): + result["timeline"] = timeline + + return result - def fill_form(self, user_input: str, fields: list, pdf_form_path: str): - return self.file_manipulator.fill_form(user_input, fields, pdf_form_path) - - def create_template(self, pdf_path: str): + def create_template(self, pdf_path: str) -> Dict[str, Any]: return self.file_manipulator.create_template(pdf_path) \ No newline at end of file diff --git a/src/django b/src/django new file mode 160000 index 0000000..3abf898 --- /dev/null +++ b/src/django @@ -0,0 +1 @@ +Subproject commit 3abf89887993140d28676f26420ee0d46a617f51 diff --git a/src/inputs/file_template.pdf b/src/inputs/file_template.pdf new file mode 100644 index 0000000..f082168 Binary files /dev/null and b/src/inputs/file_template.pdf differ diff --git a/src/llm.py b/src/llm.py index 70937f9..bb0712f 100644 --- a/src/llm.py +++ b/src/llm.py @@ -45,7 +45,7 @@ def build_prompt(self, current_field): return prompt def main_loop(self): - # self.type_check_all() + self.type_check_all() for field in self._target_fields.keys(): prompt = self.build_prompt(field) # print(prompt) @@ -54,13 +54,13 @@ def main_loop(self): ollama_url = f"{ollama_host}/api/generate" payload = { - "model": "mistral", + "model" : os.getenv("OLLAMA_MODEL", "mistral"), "prompt": prompt, "stream": False, # don't really know why --> look into this later. } try: - response = requests.post(ollama_url, json=payload) + response = requests.post(ollama_url, json=payload, timeout=30) response.raise_for_status() except requests.exceptions.ConnectionError: raise ConnectionError( @@ -72,7 +72,7 @@ def main_loop(self): # parse response json_data = response.json() - parsed_response = json_data["response"] + parsed_response = json_data.get("response", "") # print(parsed_response) self.add_response_to_json(field, parsed_response) diff --git a/src/main.py b/src/main.py index 5bb632b..addb764 100644 --- a/src/main.py +++ b/src/main.py @@ -2,6 +2,7 @@ # from backend import Fill from commonforms import prepare_form from pypdf import PdfReader +from typing import Union from controller import Controller def input_fields(num_fields: int): diff --git a/src/test/test_controller_timeline.py b/src/test/test_controller_timeline.py new file mode 100644 index 0000000..680141c --- /dev/null +++ b/src/test/test_controller_timeline.py @@ -0,0 +1,93 @@ +import pytest +from unittest.mock import MagicMock +from src.controller import Controller + + +class TestControllerTimeline: + """ + Test suite for verifying timeline extraction integration + within the FireForm controller pipeline. + """ + + @pytest.fixture + def controller(self): + """ + Provides a Controller instance with mocked FileManipulator. + """ + controller = Controller() + controller.file_manipulator = MagicMock() + + # Simulate file_manipulator returning a valid result dict + controller.file_manipulator.fill_form.return_value = { + "status": "success", + "filled_pdf": "output.pdf" + } + + return controller + + def test_timeline_extraction_integration(self, controller): + """ + Ensure timeline data is added to controller output. + """ + + incident_text = ( + "Engine 12 arrived at 3:10 PM. " + "Fire contained at 3:25 PM." + ) + + result = controller.fill_form( + user_input=incident_text, + fields=[], + pdf_form_path="dummy.pdf" + ) + + assert isinstance(result, dict) + assert "timeline" in result + assert len(result["timeline"]) == 2 + assert result["timeline"][0]["time"] == "15:10" + assert result["timeline"][1]["time"] == "15:25" + + def test_no_timeline_when_no_times(self, controller): + """ + Ensure timeline is empty when no timestamps exist. + """ + + incident_text = "Firefighters responded quickly to the incident." + + result = controller.fill_form( + user_input=incident_text, + fields=[], + pdf_form_path="dummy.pdf" + ) + + assert "timeline" in result + assert result["timeline"] == [] + + def test_controller_pipeline_still_calls_file_manipulator(self, controller): + """ + Ensure existing pipeline behavior is preserved. + """ + + incident_text = "Engine arrived at 3:10 PM." + + controller.fill_form( + user_input=incident_text, + fields=["name", "location"], + pdf_form_path="incident_form.pdf" + ) + + controller.file_manipulator.fill_form.assert_called_once() + + def test_invalid_input_handling(self, controller): + """ + Ensure controller handles invalid input gracefully. + """ + + result = controller.fill_form( + user_input="", + fields=[], + pdf_form_path="dummy.pdf" + ) + + assert isinstance(result, dict) + assert "timeline" in result \ No newline at end of file diff --git a/src/timeline_extractor.py b/src/timeline_extractor.py new file mode 100644 index 0000000..5afad21 --- /dev/null +++ b/src/timeline_extractor.py @@ -0,0 +1,157 @@ +""" +timeline_extractor.py + +Incident Timeline Extraction Module for FireForm. + +This module extracts chronological events from incident narratives +and returns structured timeline data. + +Author: FireForm Contributor +""" + +import re +import logging +from datetime import datetime +from dataclasses import dataclass +from typing import List, Optional + + +logger = logging.getLogger(__name__) + + +# Precompiled regex patterns +TIME_PATTERN = re.compile( + r"\b(\d{1,2}:\d{2}\s?(?:AM|PM|am|pm)?|\d{1,2}:\d{2})\b" +) + +SENTENCE_SPLIT_PATTERN = re.compile(r"[.!?\n]+") + + +@dataclass +class TimelineEvent: + """ + Data model representing a timeline event. + """ + event: str + time: str + + +class TimelineExtractor: + """ + Extracts chronological timeline events from incident narratives. + """ + + def __init__(self) -> None: + self.time_pattern = TIME_PATTERN + + def normalize_time(self, time_str: str) -> Optional[str]: + """ + Normalize time string into 24-hour HH:MM format. + """ + time_str = time_str.strip() + + formats = [ + "%I:%M %p", + "%I:%M%p", + "%H:%M", + ] + + for fmt in formats: + try: + parsed = datetime.strptime(time_str, fmt) + return parsed.strftime("%H:%M") + except ValueError: + continue + + logger.warning(f"Unable to normalize time: {time_str}") + return None + + def split_sentences(self, text: str) -> List[str]: + """ + Split narrative into sentences. + """ + sentences = SENTENCE_SPLIT_PATTERN.split(text) + return [s.strip() for s in sentences if s.strip()] + + def extract_events(self, text: str) -> List[TimelineEvent]: + """ + Extract timeline events from narrative text. + """ + events: List[TimelineEvent] = [] + + sentences = self.split_sentences(text) + + for sentence in sentences: + + matches = self.time_pattern.findall(sentence) + + if not matches: + continue + + for time_match in matches: + + normalized = self.normalize_time(time_match) + + if not normalized: + continue + + event_text = sentence.replace(time_match, "").strip() + + event_text = re.sub(r"\s+", " ", event_text) + + if not event_text: + continue + + events.append( + TimelineEvent( + event=event_text, + time=normalized + ) + ) + + return events + + def sort_events(self, events: List[TimelineEvent]) -> List[TimelineEvent]: + """ + Sort events chronologically. + """ + + def parse_time(event: TimelineEvent): + try: + return datetime.strptime(event.time, "%H:%M") + except Exception: + return datetime.min + + return sorted(events, key=parse_time) + + def extract_timeline(self, text: str) -> List[dict]: + """ + Main public API. + + Returns structured timeline data. + """ + + if not text or not isinstance(text, str): + logger.warning("Invalid input provided to timeline extractor.") + return [] + + try: + + events = self.extract_events(text) + + if not events: + return [] + + sorted_events = self.sort_events(events) + + return [ + { + "event": e.event, + "time": e.time + } + for e in sorted_events + ] + + except Exception as exc: + logger.error("Timeline extraction failed.", exc_info=exc) + return [] \ No newline at end of file