Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ sqlmodel
pytest
httpx
numpy<2
ollama
ollama
dateparser
28 changes: 15 additions & 13 deletions src/llm.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import os
import requests

from src.utils.normalizer import normalize_date

class LLM:
def __init__(self, transcript_text=None, target_fields=None, json=None):
Expand Down Expand Up @@ -46,7 +46,7 @@ def build_prompt(self, current_field):

def main_loop(self):
# self.type_check_all()
for field in self._target_fields.keys():
for field in self._target_fields:
prompt = self.build_prompt(field)
# print(prompt)
# ollama_url = "http://localhost:11434/api/generate"
Expand Down Expand Up @@ -82,27 +82,29 @@ def main_loop(self):
print("--------- extracted data ---------")

return self

def add_response_to_json(self, field, value):
"""
this method adds the following value under the specified field,
or under a new field if the field doesn't exist, to the json dict
"""

def add_response_to_json(self, field, value) -> None:
value = value.strip().replace('"', "")
parsed_value = None
parsed_value = value

if value != "-1":
# Check if the field is a date field (case-insensitive check)
if "date" in field.lower() or "time" in field.lower():
parsed_value = normalize_date(value)
elif value != "-1":
parsed_value = value

if ";" in value:
parsed_value = self.handle_plural_values(value)

if field in self._json.keys():
self._json[field].append(parsed_value)
# If it's a list, append; if not, turn it into one
if isinstance(self._json[field], list):
self._json[field].append(parsed_value)
else:
self._json[field] = [self._json[field], parsed_value]
else:
self._json[field] = parsed_value

return


def handle_plural_values(self, plural_value):
"""
Expand Down
5 changes: 3 additions & 2 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from commonforms import prepare_form
from pypdf import PdfReader
from controller import Controller
from typing import Union

def input_fields(num_fields: int):
fields = []
Expand Down Expand Up @@ -68,7 +69,7 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio
if __name__ == "__main__":
file = "./src/inputs/file.pdf"
user_input = "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is <Mamañema>, and the date is 01/02/2005"
fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"]
descriptive_fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"]
prepared_pdf = "temp_outfile.pdf"
prepare_form(file, prepared_pdf)

Expand All @@ -80,4 +81,4 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio
num_fields = 0

controller = Controller()
controller.fill_form(user_input, fields, file)
controller.fill_form(user_input, descriptive_fields, file)
15 changes: 15 additions & 0 deletions src/utils/normalizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import dateparser
from datetime import datetime

def normalize_date(raw_text: str, output_format: str = "%d-%m-%Y") -> str:
# If LLM returned -1 or empty, don't try to parse
if not raw_text or raw_text == "-1":
return raw_text

# dateparser handles "yesterday", "2 days ago", "last Friday", etc.
parsed_date = dateparser.parse(raw_text)

if parsed_date:
return parsed_date.strftime(output_format)

return raw_text # Fallback to raw text if parsing fails