Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 27 additions & 6 deletions crates/codegraff-tui/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5578,9 +5578,9 @@ fn parse_image_command(input: &str) -> ImageCommand {
return ImageCommand::Invalid("Usage: /image <path-to-png-jpg-webp>".to_string());
}

if !is_supported_image_path(Path::new(path)) {
if !is_supported_image_path(Path::new(path)) && !is_supported_pdf_path(Path::new(path)) {
return ImageCommand::Invalid(format!(
"Unsupported image type: {path}. Supported: png, jpg, jpeg, webp"
"Unsupported attachment type: {path}. Supported: png, jpg, jpeg, webp, pdf"
));
}

Expand All @@ -5599,6 +5599,15 @@ fn is_supported_image_path(path: &Path) -> bool {
.unwrap_or(false)
}

/// PDFs are attached as document references (`@[path]`) and sent to the model as
/// native document blocks, not images.
fn is_supported_pdf_path(path: &Path) -> bool {
path.extension()
.and_then(|extension| extension.to_str())
.map(|extension| extension.eq_ignore_ascii_case("pdf"))
.unwrap_or(false)
}

fn build_chat_event(prompt: &str, images: &[ImageAttachment]) -> Event {
Event::new(build_chat_prompt(prompt, images))
}
Expand All @@ -5612,7 +5621,7 @@ fn build_chat_prompt(prompt: &str, images: &[ImageAttachment]) -> String {

match (prompt.trim().is_empty(), tags.is_empty()) {
(true, true) => String::new(),
(true, false) => format!("Please analyze the attached image(s).\n\n{tags}"),
(true, false) => format!("Please analyze the attached file(s).\n\n{tags}"),
(false, true) => prompt.trim().to_string(),
(false, false) => format!("{}\n\n{tags}", prompt.trim()),
}
Expand Down Expand Up @@ -5730,7 +5739,8 @@ fn unescape_shell_path(text: &str) -> String {
}

fn is_readable_supported_image_path(path: &Path) -> bool {
is_supported_image_path(path) && path.is_file() && image::open(path).is_ok()
(is_supported_image_path(path) && path.is_file() && image::open(path).is_ok())
|| (is_supported_pdf_path(path) && path.is_file())
}

fn normalize_paste_text(text: &str) -> String {
Expand Down Expand Up @@ -6690,12 +6700,23 @@ mod tests {
let fixture = "/image /tmp/archive.zip";
let actual = parse_image_command(fixture);
let expected = ImageCommand::Invalid(
"Unsupported image type: /tmp/archive.zip. Supported: png, jpg, jpeg, webp".to_string(),
"Unsupported attachment type: /tmp/archive.zip. Supported: png, jpg, jpeg, webp, pdf"
.to_string(),
);

assert_eq!(actual, expected);
}

#[test]
fn image_command_accepts_pdf_as_document_attachment() {
// Intent: PDFs are attachable via /image and flow as @[path] document
// references — they are NOT rejected as "unsupported image".
let actual = parse_image_command("/image /tmp/report.pdf");
let expected = ImageCommand::Attach(ImageAttachment::new("/tmp/report.pdf"));

assert_eq!(actual, expected);
}

#[test]
fn chat_prompt_includes_image_tags_for_backend_attachments() {
let fixture = vec![
Expand Down Expand Up @@ -7103,7 +7124,7 @@ mod tests {
fn build_chat_prompt_sends_image_only_prompt_when_text_is_blank() {
let fixture = vec![ImageAttachment::new("/tmp/a.png")];
let actual = build_chat_prompt(" ", &fixture);
let expected = "Please analyze the attached image(s).\n\n@[/tmp/a.png]";
let expected = "Please analyze the attached file(s).\n\n@[/tmp/a.png]";

assert_eq!(actual, expected);
}
Expand Down
29 changes: 29 additions & 0 deletions crates/forge_app/src/dto/anthropic/request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ impl Message {
.find_map(|(idx, content)| match content {
Content::Text { .. }
| Content::Image { .. }
| Content::Document { .. }
| Content::ToolUse { .. }
| Content::ToolResult { .. } => Some(idx),
_ => None,
Expand All @@ -320,6 +321,19 @@ impl Default for Message {

impl From<Image> for Content {
fn from(value: Image) -> Self {
// PDFs reuse the image byte-carrier but must be sent as a native
// `document` block, not an `image` block (Anthropic rejects
// application/pdf in an image source).
if value.is_pdf() {
return Content::Document {
source: DocumentSource {
type_: "base64".to_string(),
media_type: "application/pdf".to_string(),
data: value.data().into(),
},
cache_control: None,
};
}
Content::Image {
source: ImageSource {
type_: "base64".to_string(),
Expand All @@ -344,6 +358,14 @@ pub struct ImageSource {
pub url: Option<String>,
}

#[derive(Serialize)]
pub struct DocumentSource {
#[serde(rename = "type")]
pub type_: String,
pub media_type: String,
pub data: String,
}

#[derive(Serialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum Content {
Expand All @@ -352,6 +374,11 @@ pub enum Content {
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
Document {
source: DocumentSource,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
Text {
text: String,
#[serde(skip_serializing_if = "Option::is_none")]
Expand Down Expand Up @@ -400,6 +427,7 @@ impl Content {
Content::ToolResult { tool_use_id, content, is_error, cache_control }
}
Content::Image { source, .. } => Content::Image { source, cache_control },
Content::Document { source, .. } => Content::Document { source, cache_control },
// TODO: verify this Thinking variants don't support cache control
Content::Thinking { signature, thinking } => Content::Thinking { signature, thinking },
}
Expand All @@ -411,6 +439,7 @@ impl Content {
Content::ToolUse { cache_control, .. } => cache_control.is_some(),
Content::ToolResult { cache_control, .. } => cache_control.is_some(),
Content::Image { cache_control, .. } => cache_control.is_some(),
Content::Document { cache_control, .. } => cache_control.is_some(),
Content::Thinking { .. } => false,
}
}
Expand Down
1 change: 1 addition & 0 deletions crates/forge_app/src/dto/anthropic/response.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ impl From<Model> for forge_domain::Model {
vec![
forge_domain::InputModality::Text,
forge_domain::InputModality::Image,
forge_domain::InputModality::Pdf,
]
} else {
vec![forge_domain::InputModality::Text]
Expand Down
40 changes: 36 additions & 4 deletions crates/forge_app/src/dto/openai/request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ pub struct ImageUrl {
pub detail: Option<String>,
}

/// A file (e.g. PDF) attachment sent inline via the OpenAI chat completions
/// `file` content part. `file_data` is a `data:<mime>;base64,...` URI.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct FileData {
pub filename: String,
pub file_data: String,
}

#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct Message {
pub role: Role,
Expand Down Expand Up @@ -109,6 +117,11 @@ pub enum ContentPart {
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
File {
file: FileData,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
}

impl ContentPart {
Expand All @@ -120,6 +133,9 @@ impl ContentPart {
ContentPart::ImageUrl { cache_control, .. } => {
*cache_control = None;
}
ContentPart::File { cache_control, .. } => {
*cache_control = None;
}
}
}

Expand All @@ -133,6 +149,9 @@ impl ContentPart {
ContentPart::ImageUrl { cache_control, .. } => {
*cache_control = src_cache_control;
}
ContentPart::File { cache_control, .. } => {
*cache_control = src_cache_control;
}
}
}
}
Expand Down Expand Up @@ -495,10 +514,23 @@ impl From<ContextMessage> for Message {
extra_content: None,
},
ContextMessage::Image(img) => {
let content = vec![ContentPart::ImageUrl {
image_url: ImageUrl { url: img.url().clone(), detail: None },
cache_control: None,
}];
// PDFs reuse the image byte-carrier but must be sent as a `file`
// content part, not `image_url` (OpenAI rejects PDFs as images).
let part = if img.is_pdf() {
ContentPart::File {
file: FileData {
filename: "document.pdf".to_string(),
file_data: img.url().clone(),
},
cache_control: None,
}
} else {
ContentPart::ImageUrl {
image_url: ImageUrl { url: img.url().clone(), detail: None },
cache_control: None,
}
};
let content = vec![part];
Message {
role: Role::User,
content: Some(MessageContent::Parts(content)),
Expand Down
1 change: 1 addition & 0 deletions crates/forge_app/src/tool_registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,7 @@ impl<S> ToolRegistry<S> {
.map(|im| match im {
InputModality::Text => "text".to_string(),
InputModality::Image => "image".to_string(),
InputModality::Pdf => "pdf".to_string(),
})
.collect::<Vec<_>>()
.join(", ")
Expand Down
8 changes: 8 additions & 0 deletions crates/forge_domain/src/image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,12 @@ impl Image {
let content = format!("data:{mime_type};base64,{base64_encoded}");
Self { url: content, mime_type }
}

/// Whether this carrier holds a PDF document rather than an image. PDFs
/// reuse the image byte-carrier but are serialized as native document
/// blocks (Anthropic `document`, OpenAI `file`, Google `inline_data`)
/// rather than image blocks.
pub fn is_pdf(&self) -> bool {
self.mime_type == "application/pdf"
}
}
3 changes: 3 additions & 0 deletions crates/forge_domain/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ pub enum InputModality {
Text,
/// Image input (vision-capable models)
Image,
/// PDF document input (models that accept native PDF/document attachments,
/// e.g. Anthropic Claude, Google Gemini, OpenAI file-capable models)
Pdf,
}

/// Default input modalities when not specified (text-only)
Expand Down
Loading
Loading