-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathminimal_cli.py
More file actions
318 lines (275 loc) · 13.6 KB
/
minimal_cli.py
File metadata and controls
318 lines (275 loc) · 13.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
#!/usr/bin/env python3
"""Minimal FilePromptForge — OpenAI-only CLI (single-request mode)
Behavior changes:
- Processes exactly one input file per run. Use --input-file to specify the input file
(path may be absolute or relative to the configured input_dir).
- Loads filepromptforge/default_config.yaml if present and uses it as defaults;
CLI args override config values.
- Grounding is enabled by default and the client will perform a single provider-side
request. There is NO fallback logic. If the provider request fails, a .meta.json
containing error metadata is written next to the expected response file and the
program exits with a non-zero status.
"""
import os
import sys
import argparse
import logging
import json
import yaml
from datetime import datetime
from typing import List, Tuple, Dict, Any, Optional
from pathlib import Path
from dotenv import load_dotenv
# Load .env from the script directory if present
script_dir = Path(__file__).resolve().parent
dotenv_path = script_dir / ".env"
if dotenv_path.exists():
load_dotenv(dotenv_path)
else:
try:
load_dotenv()
except Exception:
pass
try:
from openai import OpenAI
except Exception as e:
print("Missing dependency: openai. Install with: pip install -r requirements.txt")
raise
from grounding.wsg_functions import canonicalize_provider_response, build_error_metadata
LOG = None
def setup_logger(verbose: bool = False):
global LOG
LOG = logging.getLogger("fpf_minimal")
LOG.setLevel(logging.DEBUG if verbose else logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG if verbose else logging.INFO)
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
ch.setFormatter(formatter)
if not LOG.handlers:
LOG.addHandler(ch)
class PromptManager:
def __init__(self, prompts_dir: str):
self.prompts_dir = prompts_dir
def load_prompts(self, prompt_files: List[str]) -> str:
prompts = []
if not prompt_files:
try:
files = sorted(os.listdir(self.prompts_dir))
except FileNotFoundError:
raise FileNotFoundError(f"Prompts directory not found: {self.prompts_dir}")
prompt_files = [f for f in files if os.path.isfile(os.path.join(self.prompts_dir, f))]
for fname in prompt_files:
path = os.path.join(self.prompts_dir, fname)
with open(path, "r", encoding="utf-8") as fh:
content = fh.read()
prompts.append(content)
return "\n".join(prompts)
class FileHandler:
def __init__(self, input_dir: str, output_dir: str):
self.input_dir = input_dir
self.output_dir = output_dir
def list_input_files(self) -> List[str]:
# kept for compatibility but not used in single-request mode
files = []
for root, _, filenames in os.walk(self.input_dir):
for fname in filenames:
full = os.path.join(root, fname)
rel = os.path.relpath(full, self.input_dir)
files.append(rel)
return sorted(files)
def read_file(self, rel_path: str) -> str:
full = os.path.join(self.input_dir, rel_path)
with open(full, "r", encoding="utf-8") as fh:
return fh.read()
def write_file(self, rel_path: str, content: str):
out_rel = os.path.join(os.path.dirname(rel_path), f"response_{os.path.basename(rel_path)}") if os.path.dirname(rel_path) else f"response_{os.path.basename(rel_path)}"
full_out = os.path.join(self.output_dir, out_rel)
os.makedirs(os.path.dirname(full_out), exist_ok=True)
with open(full_out, "w", encoding="utf-8") as fh:
fh.write(content)
class APIClient:
def __init__(self, model: str, temperature: float, max_tokens: int, grounding_enabled: bool = True, base_url: Optional[str] = None):
api_key = os.getenv("OPENAI_API_KEY", "").strip()
if not api_key:
raise RuntimeError("OPENAI_API_KEY not set in environment. This tool requires a valid OpenAI API key.")
if base_url:
self.client = OpenAI(api_key=api_key, base_url=base_url)
else:
self.client = OpenAI(api_key=api_key)
self.model = model
self.temperature = temperature
self.max_tokens = max_tokens
self.grounding_enabled = grounding_enabled
def send_prompt(self, system_prompt: str, user_prompt: str) -> Tuple[str, Dict[str, Any]]:
"""
Perform a single provider-side request. No fallback logic is performed.
Returns: (text, metadata_dict)
On error: raise the caught exception to caller so caller can write error metadata.
"""
messages_input = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
if self.grounding_enabled:
# Use responses.create for grounded calls (as explicitly chosen).
# Rely on LiteLLM to map "web_search_preview" tool if model supports it.
resp = self.client.responses.create(
model=self.model,
input=messages_input, # Use 'input' parameter which can take messages
tools=[{"type": "web_search_preview"}], # Explicitly ask for web search
tool_choice="auto",
temperature=self.temperature,
max_output_tokens=self.max_tokens # Responses API uses max_output_tokens
)
else:
# Non-grounded calls still use chat.completions
resp = self.client.chat.completions.create(
model=self.model,
messages=messages_input,
temperature=self.temperature,
max_tokens=self.max_tokens
)
# Canonicalize provider response (best-effort)
metadata = canonicalize_provider_response(resp, provider="OpenAI", model=self.model)
text = metadata.get("text", "")
return text, metadata
def load_config_file(script_dir: Path) -> Dict:
cfg_path = script_dir / "default_config.yaml"
if not cfg_path.exists():
return {}
try:
with open(cfg_path, "r", encoding="utf-8") as fh:
return yaml.safe_load(fh) or {}
except Exception:
return {}
def _now_iso():
return datetime.utcnow().isoformat() + "Z"
def main(argv=None):
parser = argparse.ArgumentParser(description="FilePromptForge - Minimal OpenAI-only CLI (single-request)")
parser.add_argument("--prompts", nargs="+", help="Ordered list of prompt filenames (from prompts directory). If omitted, all files in prompts_dir are used in sorted order.", default=None)
parser.add_argument("--prompts-dir", help="Directory containing prompt files.", default=None)
parser.add_argument("--input-file", help="Path to single input file to process (absolute or relative to script directory).", default=None)
parser.add_argument("--output-dir", help="Directory for responses.", default=None)
parser.add_argument("--model", help="OpenAI model id to use.", default=None)
parser.add_argument("--temperature", type=float, help="Temperature for the model.", default=None)
parser.add_argument("--max-tokens", type=int, help="Max tokens for completion.", default=None)
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging.")
args = parser.parse_args(argv)
setup_logger(args.verbose)
LOG.info("Starting FilePromptForge (minimal OpenAI-only) [single-request mode]")
# Load config and treat it as defaults; CLI args override config
cfg = load_config_file(script_dir)
prompts_dir = args.prompts_dir or cfg.get("prompts_dir", "test/prompts")
# resolve relative prompts_dir against the package script dir
if prompts_dir and not os.path.isabs(prompts_dir):
prompts_dir = os.path.join(str(script_dir), prompts_dir)
output_dir = args.output_dir or cfg.get("output_dir", "test/output")
if output_dir and not os.path.isabs(output_dir):
output_dir = os.path.join(str(script_dir), output_dir)
openai_cfg = cfg.get("openai", {}) or {}
model = args.model or openai_cfg.get("model", "gpt-4")
temperature = args.temperature if args.temperature is not None else openai_cfg.get("temperature", 0.7)
max_tokens = args.max_tokens if args.max_tokens is not None else openai_cfg.get("max_tokens", 1500)
grounding_cfg = cfg.get("grounding", {}) or {}
grounding_enabled = grounding_cfg.get("enabled", True)
llm_base_url = cfg.get("llm_endpoint_url")
pm = PromptManager(prompts_dir)
system_prompt = pm.load_prompts(args.prompts or [])
# FileHandler expects an input_dir when reading by relative paths; in single-file mode
# we interpret relative input_file paths relative to the package script directory.
fh = FileHandler(str(script_dir), output_dir)
# Determine input file
input_file_arg = args.input_file or cfg.get("input_file")
if not input_file_arg:
LOG.error("No input file specified. Provide --input-file or set input_file in default_config.yaml")
return 2
# Determine full path to input file with normalization:
# - Accept absolute paths as-is
# - If path starts with the package dir name (e.g., "filepromptforge/..."), strip that prefix
# - Try relative to current working directory; if not found, try relative to the script directory
if os.path.isabs(input_file_arg):
full_input = input_file_arg
else:
norm_rel = os.path.normpath(input_file_arg)
parts = norm_rel.split(os.sep)
pkg_name = os.path.basename(str(script_dir))
if parts and parts[0].lower() == pkg_name.lower():
norm_rel = os.path.join(*parts[1:]) if len(parts) > 1 else ""
candidate_cwd = os.path.abspath(norm_rel) if norm_rel else None
candidate_pkg = os.path.join(str(script_dir), norm_rel) if norm_rel else str(script_dir)
if candidate_cwd and os.path.isfile(candidate_cwd):
full_input = candidate_cwd
else:
full_input = candidate_pkg
if not os.path.isfile(full_input):
LOG.error("Input file not found: %s", full_input)
# write error meta next to expected response location
rel = os.path.basename(full_input)
meta_path = os.path.join(output_dir, f"response_{rel}.meta.json")
os.makedirs(os.path.dirname(meta_path) or ".", exist_ok=True)
err_meta = {
"error": {"type": "InputFileNotFound", "message": f"Input file not found: {full_input}"},
"provider": "local",
"model": model,
"method": "provider-tool",
"timestamp": _now_iso(),
}
with open(meta_path, "w", encoding="utf-8") as mh:
json.dump(err_meta, mh, indent=2)
return 2
# Read content
try:
with open(full_input, "r", encoding="utf-8") as fh_in:
user_prompt = fh_in.read()
except Exception as e:
LOG.error("Failed to read input file: %s", e)
rel = os.path.basename(full_input)
meta_path = os.path.join(output_dir, f"response_{rel}.meta.json")
os.makedirs(os.path.dirname(meta_path) or ".", exist_ok=True)
err_meta = build_error_metadata(e, provider="local", model=model)
with open(meta_path, "w", encoding="utf-8") as mh:
json.dump(err_meta, mh, indent=2)
return 2
client = APIClient(model, temperature, max_tokens, grounding_enabled=grounding_enabled, base_url=llm_base_url)
# Compute rel path used by FileHandler.write_file
try:
# Use the package script directory as the base for relative paths
abs_input_dir = os.path.abspath(str(script_dir))
abs_full_input = os.path.abspath(full_input)
if abs_full_input.startswith(abs_input_dir):
rel_path = os.path.relpath(abs_full_input, abs_input_dir)
else:
rel_path = os.path.basename(abs_full_input)
except Exception:
rel_path = os.path.basename(full_input)
# Call provider (single attempt; no fallback). On error write .meta.json and exit non-zero.
try:
response_text, metadata = client.send_prompt(system_prompt, user_prompt)
except Exception as e:
LOG.error("Provider call failed: %s", e)
meta = build_error_metadata(e, provider="OpenAI", model=model)
# write meta json next to the expected response file
meta_rel = os.path.join(os.path.dirname(rel_path), f"response_{os.path.basename(rel_path)}.meta.json") if os.path.dirname(rel_path) else f"response_{os.path.basename(rel_path)}.meta.json"
full_meta_path = os.path.join(output_dir, meta_rel)
os.makedirs(os.path.dirname(full_meta_path) or ".", exist_ok=True)
with open(full_meta_path, "w", encoding="utf-8") as mh:
json.dump(meta, mh, indent=2)
return 3
# Write the response and metadata
try:
fh.write_file(rel_path, response_text)
meta_rel = os.path.join(os.path.dirname(rel_path), f"response_{os.path.basename(rel_path)}.meta.json") if os.path.dirname(rel_path) else f"response_{os.path.basename(rel_path)}.meta.json"
full_meta_path = os.path.join(output_dir, meta_rel)
os.makedirs(os.path.dirname(full_meta_path) or ".", exist_ok=True)
# enrich metadata with a timestamp if missing
if "timestamp" not in metadata:
metadata["timestamp"] = _now_iso()
with open(full_meta_path, "w", encoding="utf-8") as mh:
json.dump(metadata, mh, indent=2)
except Exception as e:
LOG.error("Failed to write output files: %s", e)
return 4
LOG.info("Wrote response and metadata for %s", rel_path)
return 0
if __name__ == "__main__":
sys.exit(main())