-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocument_processor.py
More file actions
39 lines (32 loc) · 1.41 KB
/
document_processor.py
File metadata and controls
39 lines (32 loc) · 1.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import logging
from typing import Any
from config import CONCERT_KEYWORDS
from llm_integrator import generate_summary
def is_concert_domain(text: str) -> bool:
"""
Checks if the document text likely belongs to the concert tour domain.
(No changes needed here)
"""
text_lower = text.lower()
return any(keyword in text_lower for keyword in CONCERT_KEYWORDS)
def summarize_document(text: str, llm_client: Any, provider_name: str) -> str:
"""
Generates a concise summary of the document text using the specified LLM client.
Args:
text: The input document text.
llm_client: The initialized LLM client object (HF dict or Gemini model).
provider_name: The name of the active provider ('huggingface' or 'gemini').
Returns:
A summary string, or an error message if summarization fails.
"""
logging.info(f"Attempting to generate summary using LLM provider: {provider_name}")
if not text:
logging.warning("Summarization attempt on empty text.")
return "Error: Cannot summarize empty document."
# Call the LLM integration function, passing the client and provider
summary = generate_summary(text, llm_client, provider_name)
if summary.startswith("Error:"):
logging.error(f"Summarization failed ({provider_name}): {summary}")
else:
logging.info(f"Summary generated successfully ({provider_name}).")
return summary