From 2003a4bebd291b9ce5bf3e35655111e978274a27 Mon Sep 17 00:00:00 2001 From: Cubix33 Date: Mon, 16 Mar 2026 08:44:36 +0000 Subject: [PATCH] #259- decouple prompt and code --- src/llm.py | 26 +++++++++----------------- src/main.py | 5 +++-- src/prompt.txt | 7 +++++++ 3 files changed, 19 insertions(+), 19 deletions(-) create mode 100644 src/prompt.txt diff --git a/src/llm.py b/src/llm.py index 70937f9..e326728 100644 --- a/src/llm.py +++ b/src/llm.py @@ -7,9 +7,13 @@ class LLM: def __init__(self, transcript_text=None, target_fields=None, json=None): if json is None: json = {} - self._transcript_text = transcript_text # str - self._target_fields = target_fields # List, contains the template field. - self._json = json # dictionary + self._transcript_text = transcript_text + self._target_fields = target_fields + self._json = json + + pp = os.path.join(os.path.dirname(__file__), "prompt.txt") + with open(pp, "r", encoding="utf-8") as f: + self.sp = f.read() def type_check_all(self): if type(self._transcript_text) is not str: @@ -24,29 +28,17 @@ def type_check_all(self): ) def build_prompt(self, current_field): - """ - This method is in charge of the prompt engineering. It creates a specific prompt for each target field. - @params: current_field -> represents the current element of the json that is being prompted. - """ - prompt = f""" - SYSTEM PROMPT: - You are an AI assistant designed to help fillout json files with information extracted from transcribed voice recordings. - You will receive the transcription, and the name of the JSON field whose value you have to identify in the context. Return - only a single string containing the identified value for the JSON field. - If the field name is plural, and you identify more than one possible value in the text, return both separated by a ";". - If you don't identify the value in the provided text, return "-1". - --- + prompt = f"""{self.sp} DATA: Target JSON field to find in text: {current_field} TEXT: {self._transcript_text} """ - return prompt def main_loop(self): # self.type_check_all() - for field in self._target_fields.keys(): + for field in self._target_fields: prompt = self.build_prompt(field) # print(prompt) # ollama_url = "http://localhost:11434/api/generate" diff --git a/src/main.py b/src/main.py index 5bb632b..54f6b0a 100644 --- a/src/main.py +++ b/src/main.py @@ -3,6 +3,7 @@ from commonforms import prepare_form from pypdf import PdfReader from controller import Controller +from typing import Union def input_fields(num_fields: int): fields = [] @@ -68,7 +69,7 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio if __name__ == "__main__": file = "./src/inputs/file.pdf" user_input = "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is , and the date is 01/02/2005" - fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"] + descriptive_fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"] prepared_pdf = "temp_outfile.pdf" prepare_form(file, prepared_pdf) @@ -80,4 +81,4 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio num_fields = 0 controller = Controller() - controller.fill_form(user_input, fields, file) + controller.fill_form(user_input, descriptive_fields, file) diff --git a/src/prompt.txt b/src/prompt.txt new file mode 100644 index 0000000..c977fbc --- /dev/null +++ b/src/prompt.txt @@ -0,0 +1,7 @@ +SYSTEM PROMPT: +You are an AI assistant designed to help fillout json files with information extracted from transcribed voice recordings. +You will receive the transcription, and the name of the JSON field whose value you have to identify in the context. Return +only a single string containing the identified value for the JSON field. +If the field name is plural, and you identify more than one possible value in the text, return both separated by a ";". +If you don't identify the value in the provided text, return "-1". +--- \ No newline at end of file