From 5ad6e0257479425446d13356b052ff22ed922da0 Mon Sep 17 00:00:00 2001
From: Clifton King <cliftonk@gmail.com>
Date: Tue, 9 Jun 2026 15:56:22 -0500
Subject: [PATCH] fix: route PDFs per provider format and stop dropping media
 in generate/run

Two related media-handling fixes:

1. PDF attachments were silently encoded as images on 3 of 4 providers.
   build_openai_compatible_message_content (OpenAI + Grok) emitted every
   attachment as an image_url part and build_anthropic_message_content
   emitted every attachment as an image block, regardless of MIME type.
   Now the builders branch on mime_type:
   - OpenAI: inline PDFs become the documented file content part
     ({"type":"file","file":{"filename","file_data"}}); URL-based PDFs
     return a clear error (chat completions does not accept file URLs).
   - Anthropic: PDFs become document blocks with base64 or url sources
     per the PDF-support docs.
   - Grok: xAI chat completions only documents text and image parts, so
     PDFs return a clear "not supported" error instead of a mislabeled
     image_url part.
   - Non-image, non-PDF MIME types error with the offending type named.
   - Gemini already passed MIME types through and is unchanged; image
     handling is byte-for-byte unchanged on all providers.

2. generate and tool run silently dropped attached media. Request::generate
   and Request::run ignored Request::media entirely. Added
   LLMClient::generate_with_media (default: error rather than drop) with
   implementations for all four providers via message-based generate
   internals, threaded media through ToolRunner::run_tool_loop and the
   three tool-loop drivers using the same content builders, and routed
   Request::generate/run through them. MockClient records the new
   GenerateWithMedia request kind and tool-loop media for assertions.

Verified shapes against provider docs (Anthropic PDF support, OpenAI PDF
files guide, xAI image-understanding guide). Unit tests cover the
serialized part/block shapes per provider and mockito tests assert the
request bodies for generate/run now carry media.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 README.md                 |  11 +-
 src/backend/anthropic.rs  | 249 +++++++++++++++------------
 src/backend/any_client.rs |   4 +
 src/backend/client.rs     |  50 ++++++
 src/backend/gemini.rs     | 337 +++++++++++++++++++-----------------
 src/backend/grok.rs       | 202 ++++++++++++----------
 src/backend/media.rs      | 347 +++++++++++++++++++++++++++++++++++---
 src/backend/mock.rs       |  17 +-
 src/backend/mod.rs        |   4 +-
 src/backend/openai.rs     | 231 ++++++++++++++-----------
 src/backend/request.rs    |  37 +++-
 src/backend/tools.rs      |  36 +++-
 tests/http_mock_tests.rs  | 154 +++++++++++++++++
 tests/mock_edge_tests.rs  |  61 +++++++
 14 files changed, 1258 insertions(+), 482 deletions(-)
diff --git a/README.md b/README.md
index 6f08ce9..03dd757 100644
--- a/README.md
+++ b/README.md
@@ -293,7 +293,7 @@ impl SchemaType for SecurityId {
 }
 ```
 
-## Multimodal (Image Input)
+## Multimodal (Image & PDF Input)
 
 Analyze images with structured extraction across all major providers by
 attaching media to a request with `with_media`:
@@ -329,7 +329,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
 `MediaFile::new(uri, mime_type)` is also available for URL/URI-based media input.
 The lower-level `LLMClient::materialize_with_media(prompt, &media)` method does
-the same thing in one call when you do not need the builder.
+the same thing in one call when you do not need the builder. Attached media is
+honored by `materialize`, `generate`, and tool `run` alike.
+
+PDFs are supported too: pass `"application/pdf"` as the MIME type and the
+attachment is routed to each provider's documented document format (OpenAI
+`file` part, Anthropic `document` block, Gemini `inlineData`/`fileData`).
+Combinations a provider does not support — PDFs on Grok, or URL-based PDFs on
+OpenAI chat completions — return a clear error instead of a broken request.
 
 Provider examples:
 - `cargo run --example openai_multimodal_example --features openai`
diff --git a/src/backend/anthropic.rs b/src/backend/anthropic.rs
index a252545..b6b356a 100644
--- a/src/backend/anthropic.rs
+++ b/src/backend/anthropic.rs
@@ -418,6 +418,127 @@ impl AnthropicClient {
         trace!(json = %raw_response, "Parsing structured output response");
         parse_validate_and_create_output(raw_response, usage)
     }
+
+    /// Internal implementation of raw text generation (no structured output).
+    ///
+    /// Accepts chat messages so that callers can attach media (images/PDFs) to
+    /// the user message; the same content-building path as `materialize_internal`
+    /// is used, so media is encoded per Anthropic's documented block format.
+    async fn generate_internal(&self, messages: &[ChatMessage]) -> Result<GenerateResult> {
+        info!("Generating raw text response with Anthropic");
+
+        // Build thinking config for Claude 4.x models
+        let is_thinking_model = self.config.model.as_str().contains("sonnet-4")
+            || self.config.model.as_str().contains("opus-4");
+        let thinking_config = self.config.thinking_level.and_then(|level| {
+            if is_thinking_model && level.claude_thinking_enabled() {
+                Some(ClaudeThinkingConfig {
+                    thinking_type: "enabled".to_string(),
+                    budget_tokens: level.claude_budget_tokens(),
+                })
+            } else {
+                None
+            }
+        });
+
+        // Claude requires temperature=1 when thinking is enabled
+        let effective_temp = if thinking_config.is_some() {
+            1.0
+        } else {
+            self.config.temperature
+        };
+
+        // Build API messages, including any attached media blocks
+        let api_messages: Vec<AnthropicMessage> = messages
+            .iter()
+            .map(|msg| {
+                Ok(AnthropicMessage {
+                    role: msg.role.as_str().to_string(),
+                    content: build_anthropic_message_content(msg)?,
+                })
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        // Build the request (no output_format for raw text generation)
+        debug!("Building Anthropic API request for text generation");
+        let request = CompletionRequest {
+            model: self.config.model.as_str().to_string(),
+            messages: api_messages,
+            temperature: effective_temp,
+            max_tokens: effective_max_tokens(self.config.max_tokens, thinking_config.as_ref()),
+            thinking: thinking_config,
+            output_format: None, // Raw text generation doesn't use structured outputs
+        };
+
+        // Send the request to Anthropic
+        debug!(
+            model = %self.config.model.as_str(),
+            max_tokens = request.max_tokens,
+            "Sending request to Anthropic API"
+        );
+        let base_url = self
+            .config
+            .base_url
+            .as_deref()
+            .unwrap_or("https://api.anthropic.com/v1");
+        let url = format!("{}/messages", base_url);
+        debug!(url = %url, "Using Anthropic API endpoint");
+        let response = self
+            .client
+            .post(&url)
+            .header("x-api-key", &self.config.api_key)
+            .header("anthropic-version", "2023-06-01")
+            .header("Content-Type", "application/json")
+            .json(&request)
+            .send()
+            .await
+            .map_err(|e| handle_http_error(e, "Anthropic"))?;
+
+        // Parse the response
+        let response = check_response_status(response, "Anthropic").await?;
+
+        debug!("Successfully received response from Anthropic");
+        let completion: CompletionResponse = response.json().await.map_err(|e| {
+            error!(error = %e, "Failed to parse JSON response from Anthropic");
+            e
+        })?;
+
+        // Extract usage info
+        let model_name = completion
+            .model
+            .clone()
+            .unwrap_or_else(|| self.config.model.as_str().to_string());
+        let usage = completion
+            .usage
+            .as_ref()
+            .map(|u| TokenUsage::new(model_name, u.input_tokens, u.output_tokens));
+
+        // Extract the content
+        debug!("Extracting text content from response blocks");
+        let content: String = completion
+            .content
+            .iter()
+            .filter(|block| block.block_type == "text")
+            .map(|block| block.text.clone())
+            .collect::<Vec<String>>()
+            .join("");
+
+        if content.is_empty() {
+            error!("No text content in Anthropic response");
+            return Err(RStructorError::api_error(
+                "Anthropic",
+                ApiErrorKind::UnexpectedResponse {
+                    details: "No text content in response".to_string(),
+                },
+            ));
+        }
+
+        debug!(
+            content_len = content.len(),
+            "Successfully extracted text content"
+        );
+        Ok(GenerateResult::new(content, usage))
+    }
 }
 
 // Generate builder methods using macro
@@ -565,6 +686,7 @@ impl crate::backend::tools::ToolRunner for AnthropicClient {
         &self,
         system: Option<&str>,
         prompt: &str,
+        media: &[super::MediaFile],
         toolbox: &crate::backend::tools::Toolbox,
         max_iterations: usize,
     ) -> Result<String> {
@@ -584,6 +706,7 @@ impl crate::backend::tools::ToolRunner for AnthropicClient {
                 .unwrap_or(DEFAULT_ANTHROPIC_MAX_TOKENS),
             system,
             prompt,
+            media,
             toolbox,
             max_iterations,
         )
@@ -686,6 +809,26 @@ impl LLMClient for AnthropicClient {
         Ok(result.text)
     }
 
+    #[instrument(
+        name = "anthropic_generate_with_media",
+        skip(self, prompt, media),
+        fields(
+            model = %self.config.model.as_str(),
+            prompt_len = prompt.len(),
+            media_len = media.len()
+        )
+    )]
+    async fn generate_with_media(
+        &self,
+        prompt: &str,
+        media: &[super::MediaFile],
+    ) -> Result<String> {
+        let result = self
+            .generate_internal(&[ChatMessage::user_with_media(prompt, media.to_vec())])
+            .await?;
+        Ok(result.text)
+    }
+
     #[instrument(
         name = "anthropic_generate_with_metadata",
         skip(self, prompt),
@@ -695,111 +838,7 @@ impl LLMClient for AnthropicClient {
         )
     )]
     async fn generate_with_metadata(&self, prompt: &str) -> Result<GenerateResult> {
-        info!("Generating raw text response with Anthropic");
-
-        // Build thinking config for Claude 4.x models
-        let is_thinking_model = self.config.model.as_str().contains("sonnet-4")
-            || self.config.model.as_str().contains("opus-4");
-        let thinking_config = self.config.thinking_level.and_then(|level| {
-            if is_thinking_model && level.claude_thinking_enabled() {
-                Some(ClaudeThinkingConfig {
-                    thinking_type: "enabled".to_string(),
-                    budget_tokens: level.claude_budget_tokens(),
-                })
-            } else {
-                None
-            }
-        });
-
-        // Claude requires temperature=1 when thinking is enabled
-        let effective_temp = if thinking_config.is_some() {
-            1.0
-        } else {
-            self.config.temperature
-        };
-
-        // Build the request (no output_format for raw text generation)
-        debug!("Building Anthropic API request for text generation");
-        let request = CompletionRequest {
-            model: self.config.model.as_str().to_string(),
-            messages: vec![AnthropicMessage {
-                role: "user".to_string(),
-                content: AnthropicMessageContent::Text(prompt.to_string()),
-            }],
-            temperature: effective_temp,
-            max_tokens: effective_max_tokens(self.config.max_tokens, thinking_config.as_ref()),
-            thinking: thinking_config,
-            output_format: None, // Raw text generation doesn't use structured outputs
-        };
-
-        // Send the request to Anthropic
-        debug!(
-            model = %self.config.model.as_str(),
-            max_tokens = request.max_tokens,
-            "Sending request to Anthropic API"
-        );
-        let base_url = self
-            .config
-            .base_url
-            .as_deref()
-            .unwrap_or("https://api.anthropic.com/v1");
-        let url = format!("{}/messages", base_url);
-        debug!(url = %url, "Using Anthropic API endpoint");
-        let response = self
-            .client
-            .post(&url)
-            .header("x-api-key", &self.config.api_key)
-            .header("anthropic-version", "2023-06-01")
-            .header("Content-Type", "application/json")
-            .json(&request)
-            .send()
-            .await
-            .map_err(|e| handle_http_error(e, "Anthropic"))?;
-
-        // Parse the response
-        let response = check_response_status(response, "Anthropic").await?;
-
-        debug!("Successfully received response from Anthropic");
-        let completion: CompletionResponse = response.json().await.map_err(|e| {
-            error!(error = %e, "Failed to parse JSON response from Anthropic");
-            e
-        })?;
-
-        // Extract usage info
-        let model_name = completion
-            .model
-            .clone()
-            .unwrap_or_else(|| self.config.model.as_str().to_string());
-        let usage = completion
-            .usage
-            .as_ref()
-            .map(|u| TokenUsage::new(model_name, u.input_tokens, u.output_tokens));
-
-        // Extract the content
-        debug!("Extracting text content from response blocks");
-        let content: String = completion
-            .content
-            .iter()
-            .filter(|block| block.block_type == "text")
-            .map(|block| block.text.clone())
-            .collect::<Vec<String>>()
-            .join("");
-
-        if content.is_empty() {
-            error!("No text content in Anthropic response");
-            return Err(RStructorError::api_error(
-                "Anthropic",
-                ApiErrorKind::UnexpectedResponse {
-                    details: "No text content in response".to_string(),
-                },
-            ));
-        }
-
-        debug!(
-            content_len = content.len(),
-            "Successfully extracted text content"
-        );
-        Ok(GenerateResult::new(content, usage))
+        self.generate_internal(&[ChatMessage::user(prompt)]).await
     }
 
     #[cfg(feature = "streaming")]
diff --git a/src/backend/any_client.rs b/src/backend/any_client.rs
index 4ecf47a..e886637 100644
--- a/src/backend/any_client.rs
+++ b/src/backend/any_client.rs
@@ -209,6 +209,10 @@ impl LLMClient for AnyClient {
         dispatch!(self, c => c.generate(prompt).await)
     }
 
+    async fn generate_with_media(&self, prompt: &str, media: &[MediaFile]) -> Result<String> {
+        dispatch!(self, c => c.generate_with_media(prompt, media).await)
+    }
+
     async fn generate_with_metadata(&self, prompt: &str) -> Result<GenerateResult> {
         dispatch!(self, c => c.generate_with_metadata(prompt).await)
     }
diff --git a/src/backend/client.rs b/src/backend/client.rs
index 055a5c3..83c2b1a 100644
--- a/src/backend/client.rs
+++ b/src/backend/client.rs
@@ -16,6 +16,13 @@ use crate::model::Instructor;
 ///   Created with [`MediaFile::from_bytes`]. This is useful for public images
 ///   downloaded over HTTPS.
 ///
+/// The `mime_type` decides how each provider encodes the attachment: `image/*`
+/// is sent in the provider's image format, and `application/pdf` is routed to
+/// the provider's document/file format (OpenAI `file` part for inline data,
+/// Anthropic `document` block, Gemini `inlineData`/`fileData`). Combinations a
+/// provider does not document — e.g. any PDF on Grok, or a URL-based PDF on
+/// OpenAI — produce a clear error instead of a silently broken request.
+///
 /// # Examples
 ///
 /// ```no_run
@@ -30,6 +37,10 @@ use crate::model::Instructor;
 /// // Inline data from bytes
 /// let image_bytes = std::fs::read("photo.png").unwrap();
 /// let media = MediaFile::from_bytes(&image_bytes, "image/png");
+///
+/// // Inline PDF (OpenAI, Anthropic, and Gemini)
+/// let pdf_bytes = std::fs::read("report.pdf").unwrap();
+/// let media = MediaFile::from_bytes(&pdf_bytes, "application/pdf");
 /// ```
 #[derive(Debug, Clone)]
 pub struct MediaFile {
@@ -282,6 +293,45 @@ pub trait LLMClient {
     /// ```
     async fn generate(&self, prompt: &str) -> Result<String>;
 
+    /// Raw text completion with media attachments (if supported).
+    ///
+    /// Like [`generate`](Self::generate), but the prompt is sent together with
+    /// `media` (images, or PDFs where the provider supports them), encoded in the
+    /// provider's documented multimodal format.
+    ///
+    /// The default implementation forwards to [`generate`](Self::generate) when
+    /// no media is provided, and otherwise returns
+    /// [`RStructorError::Unsupported`](crate::RStructorError::Unsupported) so that
+    /// media is never silently dropped. Providers with media support override this
+    /// method. All four built-in clients (OpenAI, Anthropic, Grok, Gemini) support
+    /// media here; PDF support varies by provider (Grok, for example, accepts only
+    /// images and returns a clear error for PDFs).
+    ///
+    /// # Example
+    ///
+    /// ```no_run
+    /// # use rstructor::{LLMClient, OpenAIClient, MediaFile};
+    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
+    /// let client = OpenAIClient::from_env()?;
+    /// let pdf_bytes = std::fs::read("report.pdf")?;
+    /// let media = [MediaFile::from_bytes(&pdf_bytes, "application/pdf")];
+    /// let summary = client
+    ///     .generate_with_media("Summarize this report", &media)
+    ///     .await?;
+    /// println!("{summary}");
+    /// # Ok(())
+    /// # }
+    /// ```
+    async fn generate_with_media(&self, prompt: &str, media: &[MediaFile]) -> Result<String> {
+        if media.is_empty() {
+            self.generate(prompt).await
+        } else {
+            Err(crate::error::RStructorError::Unsupported(
+                "this client does not support media inputs".to_string(),
+            ))
+        }
+    }
+
     /// Raw completion with metadata (token usage).
     ///
     /// Like [`generate`](Self::generate), but returns a [`GenerateResult`]
diff --git a/src/backend/gemini.rs b/src/backend/gemini.rs
index b562ce3..012a166 100644
--- a/src/backend/gemini.rs
+++ b/src/backend/gemini.rs
@@ -201,6 +201,50 @@ struct CandidatePart {
     text: Option<String>,
 }
 
+/// Convert provider-agnostic chat messages into Gemini `contents`, including any
+/// attached media as `inlineData` (base64) or `fileData` (URI) parts. Gemini
+/// passes the MIME type through, so images and PDFs share the same encoding.
+fn chat_messages_to_contents(messages: &[ChatMessage]) -> Vec<Content> {
+    messages
+        .iter()
+        .map(|msg| {
+            // Gemini uses "user" and "model" (not "assistant")
+            let role = if msg.role.as_str() == "assistant" {
+                "model"
+            } else {
+                msg.role.as_str()
+            };
+            let mut parts = Vec::new();
+            if !msg.content.is_empty() {
+                parts.push(Part::Text {
+                    text: msg.content.clone(),
+                });
+            }
+            for media in &msg.media {
+                if let Some(ref base64_data) = media.data {
+                    parts.push(Part::InlineData {
+                        inline_data: InlineData {
+                            mime_type: media.mime_type.clone(),
+                            data: base64_data.clone(),
+                        },
+                    });
+                } else {
+                    parts.push(Part::FileData {
+                        file_data: FileData {
+                            mime_type: media.mime_type.clone(),
+                            file_uri: media.uri.clone(),
+                        },
+                    });
+                }
+            }
+            Content {
+                role: Some(role.to_string()),
+                parts,
+            }
+        })
+        .collect()
+}
+
 impl GeminiClient {
     /// Create a new Gemini client with the provided API key.
     ///
@@ -321,44 +365,7 @@ impl GeminiClient {
 
         // Build API contents from conversation history
         // With native response_schema, we don't need to include schema instructions in the prompt
-        let contents: Vec<Content> = messages
-            .iter()
-            .map(|msg| {
-                // Gemini uses "user" and "model" (not "assistant")
-                let role = if msg.role.as_str() == "assistant" {
-                    "model"
-                } else {
-                    msg.role.as_str()
-                };
-                let mut parts = Vec::new();
-                if !msg.content.is_empty() {
-                    parts.push(Part::Text {
-                        text: msg.content.clone(),
-                    });
-                }
-                for media in &msg.media {
-                    if let Some(ref base64_data) = media.data {
-                        parts.push(Part::InlineData {
-                            inline_data: InlineData {
-                                mime_type: media.mime_type.clone(),
-                                data: base64_data.clone(),
-                            },
-                        });
-                    } else {
-                        parts.push(Part::FileData {
-                            file_data: FileData {
-                                mime_type: media.mime_type.clone(),
-                                file_uri: media.uri.clone(),
-                            },
-                        });
-                    }
-                }
-                Content {
-                    role: Some(role.to_string()),
-                    parts,
-                }
-            })
-            .collect();
+        let contents = chat_messages_to_contents(messages);
 
         // Build thinking config only for Gemini 3.x models
         let is_gemini3 = self.config.model.as_str().starts_with("gemini-3");
@@ -493,6 +500,123 @@ impl GeminiClient {
             None,
         ))
     }
+
+    /// Internal implementation of raw text generation (no structured output).
+    ///
+    /// Accepts chat messages so that callers can attach media (images/PDFs) to
+    /// the user message; the same content-building path as `materialize_internal`
+    /// is used, so media is encoded as `inlineData`/`fileData` parts.
+    async fn generate_internal(&self, messages: &[ChatMessage]) -> Result<GenerateResult> {
+        info!("Generating raw text response with Gemini");
+
+        // Build thinking config only for Gemini 3.x models
+        let is_gemini3 = self.config.model.as_str().starts_with("gemini-3");
+        let thinking_config = if is_gemini3 {
+            self.config.thinking_level.and_then(|level| {
+                level.gemini_level().map(|l| ThinkingConfig {
+                    thinking_level: l.to_string(),
+                })
+            })
+        } else {
+            None
+        };
+
+        // Build the request, including any attached media parts
+        debug!("Building Gemini API request");
+        let request = GenerateContentRequest {
+            contents: chat_messages_to_contents(messages),
+            generation_config: GenerationConfig {
+                temperature: self.config.temperature,
+                max_output_tokens: self.config.max_tokens,
+                response_mime_type: None,
+                response_schema: None,
+                thinking_config,
+            },
+        };
+
+        // Send the request to Gemini API
+        let base_url = self
+            .config
+            .base_url
+            .as_deref()
+            .unwrap_or("https://generativelanguage.googleapis.com/v1beta");
+        let url = format!(
+            "{}/models/{}:generateContent",
+            base_url,
+            self.config.model.as_str()
+        );
+        debug!(
+            url = %url,
+            model = %self.config.model.as_str(),
+            "Sending request to Gemini API"
+        );
+        let response = self
+            .client
+            .post(&url)
+            .query(&[("key", &self.config.api_key)])
+            .header("Content-Type", "application/json")
+            .json(&request)
+            .send()
+            .await
+            .map_err(|e| handle_http_error(e, "Gemini"))?;
+
+        // Parse the response
+        let response = check_response_status(response, "Gemini").await?;
+
+        debug!("Successfully received response from Gemini API");
+        let completion: GenerateContentResponse = response.json().await.map_err(|e| {
+            error!(error = %e, "Failed to parse JSON response from Gemini API");
+            e
+        })?;
+
+        if completion.candidates.is_empty() {
+            error!("Gemini API returned empty candidates array");
+            return Err(RStructorError::api_error(
+                "Gemini",
+                ApiErrorKind::UnexpectedResponse {
+                    details: "No completion candidates returned".to_string(),
+                },
+            ));
+        }
+
+        // Extract usage info
+        let model_name = completion
+            .model_version
+            .clone()
+            .unwrap_or_else(|| self.config.model.as_str().to_string());
+        let usage = completion
+            .usage_metadata
+            .as_ref()
+            .map(|u| TokenUsage::new(model_name, u.prompt_token_count, u.candidates_token_count));
+
+        let candidate = &completion.candidates[0];
+        trace!(finish_reason = %candidate.finish_reason, "Completion finish reason");
+
+        // Extract the text content
+        match candidate
+            .content
+            .parts
+            .first()
+            .and_then(|p| p.text.as_ref())
+        {
+            Some(text) => {
+                debug!(
+                    content_len = text.len(),
+                    "Successfully extracted text content from response"
+                );
+                Ok(GenerateResult::new(text.clone(), usage))
+            }
+            None => {
+                error!("No text content in Gemini response");
+                Err(RStructorError::api_error(
+                    "Gemini",
+                    ApiErrorKind::UnexpectedResponse {
+                        details: "No text content in response".to_string(),
+                    },
+                ))
+            }
+        }
+    }
 }
 
 // Generate builder methods using macro
@@ -632,6 +756,7 @@ impl crate::backend::tools::ToolRunner for GeminiClient {
         &self,
         system: Option<&str>,
         prompt: &str,
+        media: &[super::MediaFile],
         toolbox: &crate::backend::tools::Toolbox,
         max_iterations: usize,
     ) -> Result<String> {
@@ -649,6 +774,7 @@ impl crate::backend::tools::ToolRunner for GeminiClient {
             self.config.max_tokens,
             system,
             prompt,
+            media,
             toolbox,
             max_iterations,
         )
@@ -751,6 +877,26 @@ impl LLMClient for GeminiClient {
         Ok(result.text)
     }
 
+    #[instrument(
+        name = "gemini_generate_with_media",
+        skip(self, prompt, media),
+        fields(
+            model = %self.config.model.as_str(),
+            prompt_len = prompt.len(),
+            media_len = media.len()
+        )
+    )]
+    async fn generate_with_media(
+        &self,
+        prompt: &str,
+        media: &[super::MediaFile],
+    ) -> Result<String> {
+        let result = self
+            .generate_internal(&[ChatMessage::user_with_media(prompt, media.to_vec())])
+            .await?;
+        Ok(result.text)
+    }
+
     #[instrument(
         name = "gemini_generate_with_metadata",
         skip(self, prompt),
@@ -760,120 +906,7 @@ impl LLMClient for GeminiClient {
         )
     )]
     async fn generate_with_metadata(&self, prompt: &str) -> Result<GenerateResult> {
-        info!("Generating raw text response with Gemini");
-
-        // Build thinking config only for Gemini 3.x models
-        let is_gemini3 = self.config.model.as_str().starts_with("gemini-3");
-        let thinking_config = if is_gemini3 {
-            self.config.thinking_level.and_then(|level| {
-                level.gemini_level().map(|l| ThinkingConfig {
-                    thinking_level: l.to_string(),
-                })
-            })
-        } else {
-            None
-        };
-
-        // Build the request
-        debug!("Building Gemini API request");
-        let request = GenerateContentRequest {
-            contents: vec![Content {
-                role: Some("user".to_string()),
-                parts: vec![Part::Text {
-                    text: prompt.to_string(),
-                }],
-            }],
-            generation_config: GenerationConfig {
-                temperature: self.config.temperature,
-                max_output_tokens: self.config.max_tokens,
-                response_mime_type: None,
-                response_schema: None,
-                thinking_config,
-            },
-        };
-
-        // Send the request to Gemini API
-        let base_url = self
-            .config
-            .base_url
-            .as_deref()
-            .unwrap_or("https://generativelanguage.googleapis.com/v1beta");
-        let url = format!(
-            "{}/models/{}:generateContent",
-            base_url,
-            self.config.model.as_str()
-        );
-        debug!(
-            url = %url,
-            model = %self.config.model.as_str(),
-            "Sending request to Gemini API"
-        );
-        let response = self
-            .client
-            .post(&url)
-            .query(&[("key", &self.config.api_key)])
-            .header("Content-Type", "application/json")
-            .json(&request)
-            .send()
-            .await
-            .map_err(|e| handle_http_error(e, "Gemini"))?;
-
-        // Parse the response
-        let response = check_response_status(response, "Gemini").await?;
-
-        debug!("Successfully received response from Gemini API");
-        let completion: GenerateContentResponse = response.json().await.map_err(|e| {
-            error!(error = %e, "Failed to parse JSON response from Gemini API");
-            e
-        })?;
-
-        if completion.candidates.is_empty() {
-            error!("Gemini API returned empty candidates array");
-            return Err(RStructorError::api_error(
-                "Gemini",
-                ApiErrorKind::UnexpectedResponse {
-                    details: "No completion candidates returned".to_string(),
-                },
-            ));
-        }
-
-        // Extract usage info
-        let model_name = completion
-            .model_version
-            .clone()
-            .unwrap_or_else(|| self.config.model.as_str().to_string());
-        let usage = completion
-            .usage_metadata
-            .as_ref()
-            .map(|u| TokenUsage::new(model_name, u.prompt_token_count, u.candidates_token_count));
-
-        let candidate = &completion.candidates[0];
-        trace!(finish_reason = %candidate.finish_reason, "Completion finish reason");
-
-        // Extract the text content
-        match candidate
-            .content
-            .parts
-            .first()
-            .and_then(|p| p.text.as_ref())
-        {
-            Some(text) => {
-                debug!(
-                    content_len = text.len(),
-                    "Successfully extracted text content from response"
-                );
-                Ok(GenerateResult::new(text.clone(), usage))
-            }
-            None => {
-                error!("No text content in Gemini response");
-                Err(RStructorError::api_error(
-                    "Gemini",
-                    ApiErrorKind::UnexpectedResponse {
-                        details: "No text content in response".to_string(),
-                    },
-                ))
-            }
-        }
+        self.generate_internal(&[ChatMessage::user(prompt)]).await
     }
 
     #[cfg(feature = "streaming")]
diff --git a/src/backend/grok.rs b/src/backend/grok.rs
index 17dd968..f2bc2ae 100644
--- a/src/backend/grok.rs
+++ b/src/backend/grok.rs
@@ -7,11 +7,12 @@ use crate::backend::model_macro::define_model_enum;
 use crate::backend::{
     ChatMessage, GenerateResult, LLMClient, MaterializeInternalOutput, MaterializeResult,
     ModelInfo, OpenAICompatibleChatCompletionRequest, OpenAICompatibleChatCompletionResponse,
-    OpenAICompatibleChatMessage, OpenAICompatibleMessageContent, ResponseFormat, TokenUsage,
-    ValidationFailureContext, check_response_status, convert_openai_compatible_chat_messages,
-    generate_with_retry_with_history, handle_http_error, materialize_with_media_with_retry,
-    parse_validate_and_create_output, prepare_strict_schema,
+    ResponseFormat, TokenUsage, ValidationFailureContext, check_response_status,
+    convert_openai_compatible_chat_messages, generate_with_retry_with_history, handle_http_error,
+    materialize_with_media_with_retry, parse_validate_and_create_output, prepare_strict_schema,
 };
+#[cfg(feature = "streaming")]
+use crate::backend::{OpenAICompatibleChatMessage, OpenAICompatibleMessageContent};
 use crate::error::{ApiErrorKind, RStructorError, Result};
 use crate::model::Instructor;
 
@@ -294,6 +295,94 @@ impl GrokClient {
             ))
         }
     }
+
+    /// Internal implementation of raw text generation (no structured output).
+    ///
+    /// Accepts chat messages so that callers can attach media (images) to the
+    /// user message; the same content-building path as `materialize_internal` is
+    /// used, so unsupported attachments (e.g. PDFs) fail with a clear error
+    /// instead of being silently mislabeled.
+    async fn generate_internal(&self, messages: &[ChatMessage]) -> Result<GenerateResult> {
+        info!("Generating raw text response with Grok");
+
+        // Build the request without structured outputs
+        debug!("Building Grok API request for text generation");
+        let request = OpenAICompatibleChatCompletionRequest {
+            model: self.config.model.as_str().to_string(),
+            messages: convert_openai_compatible_chat_messages(messages, "Grok")?,
+            response_format: None,
+            temperature: self.config.temperature,
+            max_tokens: self.config.max_tokens,
+            reasoning_effort: None,
+        };
+
+        // Send the request to Grok/xAI API
+        let base_url = self
+            .config
+            .base_url
+            .as_deref()
+            .unwrap_or("https://api.x.ai/v1");
+        let url = format!("{}/chat/completions", base_url);
+        debug!(url = %url, "Sending request to Grok API");
+        let response = self
+            .client
+            .post(&url)
+            .header("Authorization", format!("Bearer {}", self.config.api_key))
+            .header("Content-Type", "application/json")
+            .json(&request)
+            .send()
+            .await
+            .map_err(|e| handle_http_error(e, "Grok"))?;
+
+        // Parse the response
+        let response = check_response_status(response, "Grok").await?;
+
+        debug!("Successfully received response from Grok API");
+        let completion: OpenAICompatibleChatCompletionResponse =
+            response.json().await.map_err(|e| {
+                error!(error = %e, "Failed to parse JSON response from Grok API");
+                e
+            })?;
+
+        if completion.choices.is_empty() {
+            error!("Grok API returned empty choices array");
+            return Err(RStructorError::api_error(
+                "Grok",
+                ApiErrorKind::UnexpectedResponse {
+                    details: "No completion choices returned".to_string(),
+                },
+            ));
+        }
+
+        // Extract usage info
+        let model_name = completion
+            .model
+            .clone()
+            .unwrap_or_else(|| self.config.model.as_str().to_string());
+        let usage = completion
+            .usage
+            .as_ref()
+            .map(|u| TokenUsage::new(model_name, u.prompt_tokens, u.completion_tokens));
+
+        let message = &completion.choices[0].message;
+        trace!(finish_reason = %completion.choices[0].finish_reason, "Completion finish reason");
+
+        if let Some(content) = &message.content {
+            debug!(
+                content_len = content.len(),
+                "Successfully extracted content from response"
+            );
+            Ok(GenerateResult::new(content.clone(), usage))
+        } else {
+            error!("No content in Grok API response");
+            Err(RStructorError::api_error(
+                "Grok",
+                ApiErrorKind::UnexpectedResponse {
+                    details: "No content in response".to_string(),
+                },
+            ))
+        }
+    }
 }
 
 // Generate builder methods using macro
@@ -382,6 +471,7 @@ impl crate::backend::tools::ToolRunner for GrokClient {
         &self,
         system: Option<&str>,
         prompt: &str,
+        media: &[super::MediaFile],
         toolbox: &crate::backend::tools::Toolbox,
         max_iterations: usize,
     ) -> Result<String> {
@@ -403,6 +493,7 @@ impl crate::backend::tools::ToolRunner for GrokClient {
             None,
             system,
             prompt,
+            media,
             toolbox,
             max_iterations,
         )
@@ -505,6 +596,26 @@ impl LLMClient for GrokClient {
         Ok(result.text)
     }
 
+    #[instrument(
+        name = "grok_generate_with_media",
+        skip(self, prompt, media),
+        fields(
+            model = %self.config.model.as_str(),
+            prompt_len = prompt.len(),
+            media_len = media.len()
+        )
+    )]
+    async fn generate_with_media(
+        &self,
+        prompt: &str,
+        media: &[super::MediaFile],
+    ) -> Result<String> {
+        let result = self
+            .generate_internal(&[ChatMessage::user_with_media(prompt, media.to_vec())])
+            .await?;
+        Ok(result.text)
+    }
+
     #[instrument(
         name = "grok_generate_with_metadata",
         skip(self, prompt),
@@ -514,88 +625,7 @@ impl LLMClient for GrokClient {
         )
     )]
     async fn generate_with_metadata(&self, prompt: &str) -> Result<GenerateResult> {
-        info!("Generating raw text response with Grok");
-
-        // Build the request without structured outputs
-        debug!("Building Grok API request for text generation");
-        let request = OpenAICompatibleChatCompletionRequest {
-            model: self.config.model.as_str().to_string(),
-            messages: vec![OpenAICompatibleChatMessage {
-                role: "user".to_string(),
-                content: OpenAICompatibleMessageContent::Text(prompt.to_string()),
-            }],
-            response_format: None,
-            temperature: self.config.temperature,
-            max_tokens: self.config.max_tokens,
-            reasoning_effort: None,
-        };
-
-        // Send the request to Grok/xAI API
-        let base_url = self
-            .config
-            .base_url
-            .as_deref()
-            .unwrap_or("https://api.x.ai/v1");
-        let url = format!("{}/chat/completions", base_url);
-        debug!(url = %url, "Sending request to Grok API");
-        let response = self
-            .client
-            .post(&url)
-            .header("Authorization", format!("Bearer {}", self.config.api_key))
-            .header("Content-Type", "application/json")
-            .json(&request)
-            .send()
-            .await
-            .map_err(|e| handle_http_error(e, "Grok"))?;
-
-        // Parse the response
-        let response = check_response_status(response, "Grok").await?;
-
-        debug!("Successfully received response from Grok API");
-        let completion: OpenAICompatibleChatCompletionResponse =
-            response.json().await.map_err(|e| {
-                error!(error = %e, "Failed to parse JSON response from Grok API");
-                e
-            })?;
-
-        if completion.choices.is_empty() {
-            error!("Grok API returned empty choices array");
-            return Err(RStructorError::api_error(
-                "Grok",
-                ApiErrorKind::UnexpectedResponse {
-                    details: "No completion choices returned".to_string(),
-                },
-            ));
-        }
-
-        // Extract usage info
-        let model_name = completion
-            .model
-            .clone()
-            .unwrap_or_else(|| self.config.model.as_str().to_string());
-        let usage = completion
-            .usage
-            .as_ref()
-            .map(|u| TokenUsage::new(model_name, u.prompt_tokens, u.completion_tokens));
-
-        let message = &completion.choices[0].message;
-        trace!(finish_reason = %completion.choices[0].finish_reason, "Completion finish reason");
-
-        if let Some(content) = &message.content {
-            debug!(
-                content_len = content.len(),
-                "Successfully extracted content from response"
-            );
-            Ok(GenerateResult::new(content.clone(), usage))
-        } else {
-            error!("No content in Grok API response");
-            Err(RStructorError::api_error(
-                "Grok",
-                ApiErrorKind::UnexpectedResponse {
-                    details: "No content in response".to_string(),
-                },
-            ))
-        }
+        self.generate_internal(&[ChatMessage::user(prompt)]).await
     }
 
     #[cfg(feature = "streaming")]
diff --git a/src/backend/media.rs b/src/backend/media.rs
index dba0ad0..2440826 100644
--- a/src/backend/media.rs
+++ b/src/backend/media.rs
@@ -15,6 +15,7 @@ pub(crate) enum OpenAICompatibleMessageContent {
 pub(crate) enum OpenAICompatibleMessagePart {
     Text { text: String },
     ImageUrl { image_url: OpenAICompatibleImageUrl },
+    File { file: OpenAICompatibleFile },
 }
 
 #[derive(Debug, Serialize)]
@@ -24,6 +25,16 @@ pub(crate) struct OpenAICompatibleImageUrl {
     pub(crate) detail: Option<String>,
 }
 
+/// A file content part for OpenAI chat completions (PDF input).
+///
+/// See <https://platform.openai.com/docs/guides/pdf-files>: the part is
+/// `{"type": "file", "file": {"filename": ..., "file_data": "data:application/pdf;base64,..."}}`.
+#[derive(Debug, Serialize)]
+pub(crate) struct OpenAICompatibleFile {
+    pub(crate) filename: String,
+    pub(crate) file_data: String,
+}
+
 #[derive(Debug, Serialize)]
 #[serde(untagged)]
 pub(crate) enum AnthropicMessageContent {
@@ -34,13 +45,25 @@ pub(crate) enum AnthropicMessageContent {
 #[derive(Debug, Serialize)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub(crate) enum AnthropicContentBlock {
-    Text { text: String },
-    Image { source: AnthropicImageSource },
+    Text {
+        text: String,
+    },
+    Image {
+        source: AnthropicMediaSource,
+    },
+    /// A PDF document block, see
+    /// <https://docs.anthropic.com/en/docs/build-with-claude/pdf-support>:
+    /// `{"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": ...}}`
+    /// or `{"type": "document", "source": {"type": "url", "url": ...}}`.
+    Document {
+        source: AnthropicMediaSource,
+    },
 }
 
+/// Source of an Anthropic `image` or `document` block (both share this shape).
 #[derive(Debug, Serialize)]
 #[serde(tag = "type", rename_all = "snake_case")]
-pub(crate) enum AnthropicImageSource {
+pub(crate) enum AnthropicMediaSource {
     Base64 { media_type: String, data: String },
     Url { url: String },
 }
@@ -61,18 +84,108 @@ pub(crate) fn build_openai_compatible_message_content(
     }
 
     for media in &msg.media {
-        let url = media_to_url(media, provider_name)?;
-        parts.push(OpenAICompatibleMessagePart::ImageUrl {
-            image_url: OpenAICompatibleImageUrl {
-                url,
-                detail: Some("auto".to_string()),
-            },
-        });
+        if media.mime_type.starts_with("image/") {
+            let url = media_to_url(media, provider_name)?;
+            parts.push(OpenAICompatibleMessagePart::ImageUrl {
+                image_url: OpenAICompatibleImageUrl {
+                    url,
+                    detail: Some("auto".to_string()),
+                },
+            });
+        } else if media.mime_type == "application/pdf" {
+            parts.push(openai_compatible_pdf_part(media, provider_name)?);
+        } else {
+            return Err(unsupported_media_type(media, provider_name));
+        }
     }
 
     Ok(OpenAICompatibleMessageContent::Parts(parts))
 }
 
+/// Build the PDF content part for an OpenAI-compatible chat completions request,
+/// or a clear error for providers/sources without a documented PDF pathway.
+fn openai_compatible_pdf_part(
+    media: &crate::backend::client::MediaFile,
+    provider_name: &str,
+) -> Result<OpenAICompatibleMessagePart> {
+    // xAI's chat completions API only documents text and image content parts
+    // (https://docs.x.ai/docs/guides/image-understanding); there is no file or
+    // document part, so sending a PDF would be silently mislabeled or rejected.
+    if provider_name == "Grok" {
+        return Err(RStructorError::api_error(
+            provider_name,
+            ApiErrorKind::BadRequest {
+                details: "PDF attachments are not supported for Grok: the xAI chat \
+                          completions API only accepts text and image content parts. \
+                          Extract the PDF's text or render its pages to images and \
+                          attach those instead"
+                    .to_string(),
+            },
+        ));
+    }
+
+    if let Some(data) = media.data.as_ref() {
+        if data.is_empty() {
+            return Err(RStructorError::api_error(
+                provider_name,
+                ApiErrorKind::BadRequest {
+                    details: "MediaFile inline data cannot be empty".to_string(),
+                },
+            ));
+        }
+        // Chat completions accept PDFs as a `file` part with base64 `file_data`
+        // (https://platform.openai.com/docs/guides/pdf-files).
+        Ok(OpenAICompatibleMessagePart::File {
+            file: OpenAICompatibleFile {
+                filename: "document.pdf".to_string(),
+                file_data: format!("data:{};base64,{}", media.mime_type, data),
+            },
+        })
+    } else if !media.uri.is_empty() {
+        // Chat completions do not accept remote file URLs (only `file_data` or an
+        // uploaded `file_id`); see https://platform.openai.com/docs/guides/pdf-files.
+        Err(RStructorError::api_error(
+            provider_name,
+            ApiErrorKind::BadRequest {
+                details: format!(
+                    "{provider_name} chat completions does not support URL-based PDF \
+                     attachments; download the file and attach the bytes inline with \
+                     MediaFile::from_bytes(bytes, \"application/pdf\") instead"
+                ),
+            },
+        ))
+    } else {
+        Err(RStructorError::api_error(
+            provider_name,
+            ApiErrorKind::BadRequest {
+                details: "MediaFile must include either inline data or uri".to_string(),
+            },
+        ))
+    }
+}
+
+/// Error for MIME types with no documented attachment pathway on this provider.
+fn unsupported_media_type(
+    media: &crate::backend::client::MediaFile,
+    provider_name: &str,
+) -> RStructorError {
+    let supported = if provider_name == "Grok" {
+        "image/*"
+    } else {
+        "image/* and application/pdf"
+    };
+    RStructorError::api_error(
+        provider_name,
+        ApiErrorKind::BadRequest {
+            details: format!(
+                "unsupported media type {:?} for {provider_name}: only {supported} \
+                 attachments are supported on this provider",
+                media.mime_type,
+            ),
+        },
+    )
+}
+
 pub(crate) fn build_anthropic_message_content(
     msg: &ChatMessage,
 ) -> Result<AnthropicMessageContent> {
@@ -88,7 +201,10 @@ pub(crate) fn build_anthropic_message_content(
     }
 
     for media in &msg.media {
-        if let Some(data) = media.data.as_ref() {
+        let is_image = media.mime_type.starts_with("image/");
+        let is_pdf = media.mime_type == "application/pdf";
+
+        let source = if let Some(data) = media.data.as_ref() {
             if data.is_empty() {
                 return Err(RStructorError::api_error(
                     "Anthropic",
@@ -105,18 +221,14 @@ pub(crate) fn build_anthropic_message_content(
                     },
                 ));
             }
-            blocks.push(AnthropicContentBlock::Image {
-                source: AnthropicImageSource::Base64 {
-                    media_type: media.mime_type.clone(),
-                    data: data.clone(),
-                },
-            });
+            AnthropicMediaSource::Base64 {
+                media_type: media.mime_type.clone(),
+                data: data.clone(),
+            }
         } else if !media.uri.is_empty() {
-            blocks.push(AnthropicContentBlock::Image {
-                source: AnthropicImageSource::Url {
-                    url: media.uri.clone(),
-                },
-            });
+            AnthropicMediaSource::Url {
+                url: media.uri.clone(),
+            }
         } else {
             return Err(RStructorError::api_error(
                 "Anthropic",
@@ -124,6 +236,16 @@ pub(crate) fn build_anthropic_message_content(
                     details: "MediaFile must include either inline data or uri".to_string(),
                 },
             ));
+        };
+
+        if is_image {
+            blocks.push(AnthropicContentBlock::Image { source });
+        } else if is_pdf {
+            // PDFs go in a `document` block, never an `image` block; see
+            // https://docs.anthropic.com/en/docs/build-with-claude/pdf-support.
+            blocks.push(AnthropicContentBlock::Document { source });
+        } else {
+            return Err(unsupported_media_type(media, "Anthropic"));
         }
     }
 
@@ -211,4 +333,185 @@ mod tests {
         assert_eq!(json[1]["source"]["media_type"], "image/png");
         assert_eq!(json[1]["source"]["data"], "YWJj");
     }
+
+    #[test]
+    fn test_anthropic_content_with_url_image() {
+        let msg = ChatMessage::user_with_media(
+            "describe image",
+            vec![MediaFile::new("https://example.com/cat.png", "image/png")],
+        );
+        let content = build_anthropic_message_content(&msg).expect("content should build");
+        let json = serde_json::to_value(&content).expect("content should serialize");
+        assert_eq!(json[1]["type"], "image");
+        assert_eq!(
+            json[1]["source"],
+            serde_json::json!({"type": "url", "url": "https://example.com/cat.png"})
+        );
+    }
+
+    // ---- PDF routing: OpenAI ----
+
+    #[test]
+    fn test_openai_inline_pdf_becomes_file_part() {
+        let msg = ChatMessage::user_with_media(
+            "summarize",
+            vec![MediaFile::from_bytes(b"%PDF", "application/pdf")],
+        );
+        let content =
+            build_openai_compatible_message_content(&msg, "OpenAI").expect("content should build");
+        let json = serde_json::to_value(&content).expect("content should serialize");
+        assert_eq!(json[0]["type"], "text");
+        assert_eq!(
+            json[1],
+            serde_json::json!({
+                "type": "file",
+                "file": {
+                    "filename": "document.pdf",
+                    "file_data": "data:application/pdf;base64,JVBERg==",
+                }
+            })
+        );
+    }
+
+    #[test]
+    fn test_openai_url_pdf_is_a_clear_error() {
+        let msg = ChatMessage::user_with_media(
+            "summarize",
+            vec![MediaFile::new(
+                "https://example.com/report.pdf",
+                "application/pdf",
+            )],
+        );
+        let err = build_openai_compatible_message_content(&msg, "OpenAI")
+            .expect_err("URL-based PDFs are not supported by chat completions");
+        let text = err.to_string();
+        assert!(
+            text.contains("URL-based PDF") && text.contains("MediaFile::from_bytes"),
+            "error should explain the fix, got: {text}"
+        );
+    }
+
+    #[test]
+    fn test_openai_non_image_non_pdf_is_a_clear_error() {
+        let msg = ChatMessage::user_with_media(
+            "transcribe",
+            vec![MediaFile::from_bytes(b"abc", "audio/mpeg")],
+        );
+        let err = build_openai_compatible_message_content(&msg, "OpenAI")
+            .expect_err("audio attachments have no chat-completions pathway");
+        let text = err.to_string();
+        assert!(
+            text.contains("unsupported media type") && text.contains("audio/mpeg"),
+            "error should name the offending MIME type, got: {text}"
+        );
+    }
+
+    // ---- PDF routing: Grok ----
+
+    #[test]
+    fn test_grok_inline_pdf_is_a_clear_error_not_an_image_url() {
+        let msg = ChatMessage::user_with_media(
+            "summarize",
+            vec![MediaFile::from_bytes(b"%PDF", "application/pdf")],
+        );
+        let err = build_openai_compatible_message_content(&msg, "Grok")
+            .expect_err("Grok has no documented PDF pathway");
+        let text = err.to_string();
+        assert!(
+            text.contains("PDF attachments are not supported for Grok"),
+            "error should say PDFs are unsupported on Grok, got: {text}"
+        );
+    }
+
+    #[test]
+    fn test_grok_url_pdf_is_a_clear_error() {
+        let msg = ChatMessage::user_with_media(
+            "summarize",
+            vec![MediaFile::new(
+                "https://example.com/report.pdf",
+                "application/pdf",
+            )],
+        );
+        let err = build_openai_compatible_message_content(&msg, "Grok")
+            .expect_err("Grok has no documented PDF pathway");
+        assert!(
+            err.to_string()
+                .contains("PDF attachments are not supported for Grok")
+        );
+    }
+
+    #[test]
+    fn test_grok_images_still_use_image_url() {
+        let msg = ChatMessage::user_with_media(
+            "describe image",
+            vec![MediaFile::from_bytes(b"abc", "image/jpeg")],
+        );
+        let content =
+            build_openai_compatible_message_content(&msg, "Grok").expect("content should build");
+        let json = serde_json::to_value(&content).expect("content should serialize");
+        assert_eq!(json[1]["type"], "image_url");
+        assert_eq!(json[1]["image_url"]["url"], "data:image/jpeg;base64,YWJj");
+    }
+
+    // ---- PDF routing: Anthropic ----
+
+    #[test]
+    fn test_anthropic_inline_pdf_becomes_document_block() {
+        let msg = ChatMessage::user_with_media(
+            "summarize",
+            vec![MediaFile::from_bytes(b"%PDF", "application/pdf")],
+        );
+        let content = build_anthropic_message_content(&msg).expect("content should build");
+        let json = serde_json::to_value(&content).expect("content should serialize");
+        assert_eq!(json[0]["type"], "text");
+        assert_eq!(
+            json[1],
+            serde_json::json!({
+                "type": "document",
+                "source": {
+                    "type": "base64",
+                    "media_type": "application/pdf",
+                    "data": "JVBERg==",
+                }
+            })
+        );
+    }
+
+    #[test]
+    fn test_anthropic_url_pdf_becomes_url_document_block() {
+        let msg = ChatMessage::user_with_media(
+            "summarize",
+            vec![MediaFile::new(
+                "https://example.com/report.pdf",
+                "application/pdf",
+            )],
+        );
+        let content = build_anthropic_message_content(&msg).expect("content should build");
+        let json = serde_json::to_value(&content).expect("content should serialize");
+        assert_eq!(
+            json[1],
+            serde_json::json!({
+                "type": "document",
+                "source": {
+                    "type": "url",
+                    "url": "https://example.com/report.pdf",
+                }
+            })
+        );
+    }
+
+    #[test]
+    fn test_anthropic_non_image_non_pdf_is_a_clear_error() {
+        let msg = ChatMessage::user_with_media(
+            "transcribe",
+            vec![MediaFile::from_bytes(b"abc", "audio/mpeg")],
+        );
+        let err = build_anthropic_message_content(&msg)
+            .expect_err("audio attachments have no Messages API pathway");
+        let text = err.to_string();
+        assert!(
+            text.contains("unsupported media type") && text.contains("audio/mpeg"),
+            "error should name the offending MIME type, got: {text}"
+        );
+    }
 }
diff --git a/src/backend/mock.rs b/src/backend/mock.rs
index c67b986..4f9573f 100644
--- a/src/backend/mock.rs
+++ b/src/backend/mock.rs
@@ -153,6 +153,8 @@ pub enum RequestKind {
     MaterializeWithMedia,
     /// [`LLMClient::generate`](crate::LLMClient::generate)
     Generate,
+    /// [`LLMClient::generate_with_media`](crate::LLMClient::generate_with_media)
+    GenerateWithMedia,
     /// [`LLMClient::generate_with_metadata`](crate::LLMClient::generate_with_metadata)
     GenerateWithMetadata,
     /// [`LLMClient::list_models`](crate::LLMClient::list_models)
@@ -185,7 +187,8 @@ pub struct RecordedRequest {
     pub schema: Option<Value>,
     /// The schema name of the target type, when known.
     pub schema_name: Option<String>,
-    /// Media attached to the call (for `materialize_with_media`).
+    /// Media attached to the call (for `materialize_with_media`,
+    /// `generate_with_media`, and the tool loop).
     pub media: Vec<MediaFile>,
     /// Tool names offered to the call (for the tool loop; empty otherwise).
     #[cfg(feature = "tools")]
@@ -627,6 +630,16 @@ impl LLMClient for MockClient {
         }
     }
 
+    async fn generate_with_media(&self, prompt: &str, media: &[MediaFile]) -> Result<String> {
+        let mut view = MockRequestView::bare(RequestKind::GenerateWithMedia, prompt);
+        view.media = media;
+        self.record(&view);
+        match self.pick_response(&view) {
+            MockResponse::Text(s) => Ok(s),
+            MockResponse::Error(e) => Err(e),
+        }
+    }
+
     async fn generate_with_metadata(&self, prompt: &str) -> Result<GenerateResult> {
         let view = MockRequestView::bare(RequestKind::GenerateWithMetadata, prompt);
         self.record(&view);
@@ -755,11 +768,13 @@ impl crate::backend::tools::ToolRunner for MockClient {
         &self,
         _system: Option<&str>,
         prompt: &str,
+        media: &[MediaFile],
         toolbox: &crate::backend::tools::Toolbox,
         _max_iterations: usize,
     ) -> Result<String> {
         let tool_names = toolbox.tool_names();
         let mut view = MockRequestView::bare(RequestKind::RunToolLoop, prompt);
+        view.media = media;
         view.tool_names = &tool_names;
         self.record(&view);
 
diff --git a/src/backend/mod.rs b/src/backend/mod.rs
index 326aa71..22fd82c 100644
--- a/src/backend/mod.rs
+++ b/src/backend/mod.rs
@@ -78,10 +78,12 @@ pub(crate) use media::{
     AnthropicMessageContent, OpenAICompatibleMessageContent, build_anthropic_message_content,
     build_openai_compatible_message_content,
 };
+#[cfg(feature = "streaming")]
+pub(crate) use openai_compatible::OpenAICompatibleChatMessage;
 #[cfg(feature = "_client")]
 pub(crate) use openai_compatible::{
     OpenAICompatibleChatCompletionRequest, OpenAICompatibleChatCompletionResponse,
-    OpenAICompatibleChatMessage, convert_openai_compatible_chat_messages,
+    convert_openai_compatible_chat_messages,
 };
 #[cfg(feature = "_client")]
 pub(crate) use utils::{
diff --git a/src/backend/openai.rs b/src/backend/openai.rs
index 44658bb..1913841 100644
--- a/src/backend/openai.rs
+++ b/src/backend/openai.rs
@@ -7,11 +7,12 @@ use crate::backend::model_macro::define_model_enum;
 use crate::backend::{
     ChatMessage, GenerateResult, LLMClient, MaterializeInternalOutput, MaterializeResult,
     ModelInfo, OpenAICompatibleChatCompletionRequest, OpenAICompatibleChatCompletionResponse,
-    OpenAICompatibleChatMessage, OpenAICompatibleMessageContent, ResponseFormat, ThinkingLevel,
-    TokenUsage, ValidationFailureContext, check_response_status,
+    ResponseFormat, ThinkingLevel, TokenUsage, ValidationFailureContext, check_response_status,
     convert_openai_compatible_chat_messages, generate_with_retry_with_history, handle_http_error,
     materialize_with_media_with_retry, parse_validate_and_create_output, prepare_strict_schema,
 };
+#[cfg(feature = "streaming")]
+use crate::backend::{OpenAICompatibleChatMessage, OpenAICompatibleMessageContent};
 use crate::error::{ApiErrorKind, RStructorError, Result};
 use crate::model::Instructor;
 
@@ -449,6 +450,110 @@ impl OpenAIClient {
             ))
         }
     }
+
+    /// Internal implementation of raw text generation (no structured output).
+    ///
+    /// Accepts chat messages so that callers can attach media (images/PDFs) to
+    /// the user message; the same content-building path as `materialize_internal`
+    /// is used, so media is encoded per OpenAI's documented multimodal format.
+    async fn generate_internal(&self, messages: &[ChatMessage]) -> Result<GenerateResult> {
+        info!("Generating raw text response with OpenAI");
+
+        // Build reasoning_effort for GPT-5.x models
+        let is_gpt5 = self.config.model.as_str().starts_with("gpt-5");
+        let reasoning_effort = if is_gpt5 {
+            self.config
+                .thinking_level
+                .and_then(|level| level.openai_reasoning_effort().map(|s| s.to_string()))
+        } else {
+            None
+        };
+
+        // GPT-5.x with reasoning requires temperature=1.0
+        let effective_temp = if reasoning_effort.is_some() {
+            1.0
+        } else {
+            self.config.temperature
+        };
+
+        // Build the request for text generation (no structured output)
+        debug!("Building OpenAI API request for text generation");
+        let request = OpenAICompatibleChatCompletionRequest {
+            model: self.config.model.as_str().to_string(),
+            messages: convert_openai_compatible_chat_messages(messages, "OpenAI")?,
+            response_format: None,
+            temperature: effective_temp,
+            max_tokens: self.config.max_tokens,
+            reasoning_effort,
+        };
+
+        // Send the request to OpenAI
+        let base_url = self
+            .config
+            .base_url
+            .as_deref()
+            .unwrap_or("https://api.openai.com/v1");
+        let url = format!("{}/chat/completions", base_url);
+        debug!(url = %url, "Sending request to OpenAI API");
+        let response = self
+            .client
+            .post(&url)
+            .header("Authorization", format!("Bearer {}", self.config.api_key))
+            .header("Content-Type", "application/json")
+            .json(&request)
+            .send()
+            .await
+            .map_err(|e| handle_http_error(e, "OpenAI"))?;
+
+        // Parse the response
+        let response = check_response_status(response, "OpenAI").await?;
+
+        debug!("Successfully received response from OpenAI");
+        let completion: OpenAICompatibleChatCompletionResponse =
+            response.json().await.map_err(|e| {
+                error!(error = %e, "Failed to parse JSON response from OpenAI");
+                e
+            })?;
+
+        if completion.choices.is_empty() {
+            error!("OpenAI returned empty choices array");
+            return Err(RStructorError::api_error(
+                "OpenAI",
+                ApiErrorKind::UnexpectedResponse {
+                    details: "No completion choices returned".to_string(),
+                },
+            ));
+        }
+
+        // Extract usage info
+        let model_name = completion
+            .model
+            .clone()
+            .unwrap_or_else(|| self.config.model.as_str().to_string());
+        let usage = completion
+            .usage
+            .as_ref()
+            .map(|u| TokenUsage::new(model_name, u.prompt_tokens, u.completion_tokens));
+
+        let message = &completion.choices[0].message;
+        trace!(finish_reason = %completion.choices[0].finish_reason, "Completion finish reason");
+
+        if let Some(content) = &message.content {
+            debug!(
+                content_len = content.len(),
+                "Successfully extracted content from response"
+            );
+            Ok(GenerateResult::new(content.clone(), usage))
+        } else {
+            error!("No content in OpenAI response");
+            Err(RStructorError::api_error(
+                "OpenAI",
+                ApiErrorKind::UnexpectedResponse {
+                    details: "No content in response".to_string(),
+                },
+            ))
+        }
+    }
 }
 
 #[cfg(feature = "streaming")]
@@ -523,6 +628,7 @@ impl crate::backend::tools::ToolRunner for OpenAIClient {
         &self,
         system: Option<&str>,
         prompt: &str,
+        media: &[super::MediaFile],
         toolbox: &crate::backend::tools::Toolbox,
         max_iterations: usize,
     ) -> Result<String> {
@@ -554,6 +660,7 @@ impl crate::backend::tools::ToolRunner for OpenAIClient {
             None,
             system,
             prompt,
+            media,
             toolbox,
             max_iterations,
         )
@@ -656,6 +763,26 @@ impl LLMClient for OpenAIClient {
         Ok(result.text)
     }
 
+    #[instrument(
+        name = "openai_generate_with_media",
+        skip(self, prompt, media),
+        fields(
+            model = %self.config.model.as_str(),
+            prompt_len = prompt.len(),
+            media_len = media.len()
+        )
+    )]
+    async fn generate_with_media(
+        &self,
+        prompt: &str,
+        media: &[super::MediaFile],
+    ) -> Result<String> {
+        let result = self
+            .generate_internal(&[ChatMessage::user_with_media(prompt, media.to_vec())])
+            .await?;
+        Ok(result.text)
+    }
+
     #[instrument(
         name = "openai_generate_with_metadata",
         skip(self, prompt),
@@ -665,105 +792,7 @@ impl LLMClient for OpenAIClient {
         )
     )]
     async fn generate_with_metadata(&self, prompt: &str) -> Result<GenerateResult> {
-        info!("Generating raw text response with OpenAI");
-
-        // Build reasoning_effort for GPT-5.x models
-        let is_gpt5 = self.config.model.as_str().starts_with("gpt-5");
-        let reasoning_effort = if is_gpt5 {
-            self.config
-                .thinking_level
-                .and_then(|level| level.openai_reasoning_effort().map(|s| s.to_string()))
-        } else {
-            None
-        };
-
-        // GPT-5.x with reasoning requires temperature=1.0
-        let effective_temp = if reasoning_effort.is_some() {
-            1.0
-        } else {
-            self.config.temperature
-        };
-
-        // Build the request for text generation (no structured output)
-        debug!("Building OpenAI API request for text generation");
-        let request = OpenAICompatibleChatCompletionRequest {
-            model: self.config.model.as_str().to_string(),
-            messages: vec![OpenAICompatibleChatMessage {
-                role: "user".to_string(),
-                content: OpenAICompatibleMessageContent::Text(prompt.to_string()),
-            }],
-            response_format: None,
-            temperature: effective_temp,
-            max_tokens: self.config.max_tokens,
-            reasoning_effort,
-        };
-
-        // Send the request to OpenAI
-        let base_url = self
-            .config
-            .base_url
-            .as_deref()
-            .unwrap_or("https://api.openai.com/v1");
-        let url = format!("{}/chat/completions", base_url);
-        debug!(url = %url, "Sending request to OpenAI API");
-        let response = self
-            .client
-            .post(&url)
-            .header("Authorization", format!("Bearer {}", self.config.api_key))
-            .header("Content-Type", "application/json")
-            .json(&request)
-            .send()
-            .await
-            .map_err(|e| handle_http_error(e, "OpenAI"))?;
-
-        // Parse the response
-        let response = check_response_status(response, "OpenAI").await?;
-
-        debug!("Successfully received response from OpenAI");
-        let completion: OpenAICompatibleChatCompletionResponse =
-            response.json().await.map_err(|e| {
-                error!(error = %e, "Failed to parse JSON response from OpenAI");
-                e
-            })?;
-
-        if completion.choices.is_empty() {
-            error!("OpenAI returned empty choices array");
-            return Err(RStructorError::api_error(
-                "OpenAI",
-                ApiErrorKind::UnexpectedResponse {
-                    details: "No completion choices returned".to_string(),
-                },
-            ));
-        }
-
-        // Extract usage info
-        let model_name = completion
-            .model
-            .clone()
-            .unwrap_or_else(|| self.config.model.as_str().to_string());
-        let usage = completion
-            .usage
-            .as_ref()
-            .map(|u| TokenUsage::new(model_name, u.prompt_tokens, u.completion_tokens));
-
-        let message = &completion.choices[0].message;
-        trace!(finish_reason = %completion.choices[0].finish_reason, "Completion finish reason");
-
-        if let Some(content) = &message.content {
-            debug!(
-                content_len = content.len(),
-                "Successfully extracted content from response"
-            );
-            Ok(GenerateResult::new(content.clone(), usage))
-        } else {
-            error!("No content in OpenAI response");
-            Err(RStructorError::api_error(
-                "OpenAI",
-                ApiErrorKind::UnexpectedResponse {
-                    details: "No content in response".to_string(),
-                },
-            ))
-        }
+        self.generate_internal(&[ChatMessage::user(prompt)]).await
     }
 
     #[cfg(feature = "streaming")]
diff --git a/src/backend/request.rs b/src/backend/request.rs
index 90cc674..65d6723 100644
--- a/src/backend/request.rs
+++ b/src/backend/request.rs
@@ -57,7 +57,8 @@ impl<'a, C: ?Sized> Request<'a, C> {
         self
     }
 
-    /// Attach media (images) to the request (used by `materialize`).
+    /// Attach media (images, or PDFs where the provider supports them) to the
+    /// request. Used by `materialize`, `generate`, and `run`.
     #[must_use]
     pub fn media(mut self, media: impl Into<Vec<MediaFile>>) -> Self {
         self.media = media.into();
@@ -106,9 +107,14 @@ impl<C: LLMClient + Sync + ?Sized> Request<'_, C> {
         }
     }
 
-    /// Generate raw text, applying any attached system context.
+    /// Generate raw text, applying any attached system context and media.
     pub async fn generate(self, prompt: &str) -> Result<String> {
-        self.client.generate(&self.combined(prompt)).await
+        let prompt = self.combined(prompt);
+        if self.media.is_empty() {
+            self.client.generate(&prompt).await
+        } else {
+            self.client.generate_with_media(&prompt, &self.media).await
+        }
     }
 }
 
@@ -170,16 +176,30 @@ impl<'a, C: LLMClient + Sync + ?Sized> Request<'a, C> {
 #[cfg(feature = "tools")]
 impl<C: crate::backend::tools::ToolRunner + LLMClient + Sync + ?Sized> Request<'_, C> {
     /// Get a text answer, letting the model call attached tools (if any) in a loop
-    /// until it produces a final response. With no tools attached this is
-    /// equivalent to [`generate`](Self::generate).
+    /// until it produces a final response. Attached media is included in the
+    /// initial user turn. With no tools attached this is equivalent to
+    /// [`generate`](Self::generate).
     pub async fn run(self, prompt: &str) -> Result<String> {
         match self.tools {
             Some(toolbox) => {
                 self.client
-                    .run_tool_loop(self.system.as_deref(), prompt, toolbox, self.max_iterations)
+                    .run_tool_loop(
+                        self.system.as_deref(),
+                        prompt,
+                        &self.media,
+                        toolbox,
+                        self.max_iterations,
+                    )
                     .await
             }
-            None => self.client.generate(&self.combined(prompt)).await,
+            None => {
+                let prompt = self.combined(prompt);
+                if self.media.is_empty() {
+                    self.client.generate(&prompt).await
+                } else {
+                    self.client.generate_with_media(&prompt, &self.media).await
+                }
+            }
         }
     }
 }
@@ -199,7 +219,8 @@ pub trait RequestExt: LLMClient {
         Request::new(self).system(system)
     }
 
-    /// Start a request with attached media (images).
+    /// Start a request with attached media (images, or PDFs where the provider
+    /// supports them).
     fn with_media<'a>(&'a self, media: &'a [MediaFile]) -> Request<'a, Self> {
         Request::new(self).media(media.to_vec())
     }
diff --git a/src/backend/tools.rs b/src/backend/tools.rs
index afdfe74..e640096 100644
--- a/src/backend/tools.rs
+++ b/src/backend/tools.rs
@@ -264,6 +264,7 @@ pub(crate) async fn run_openai_compatible_tools(
     reasoning_effort: Option<String>,
     system: Option<&str>,
     prompt: &str,
+    media: &[crate::backend::MediaFile],
     toolbox: &Toolbox,
     max_iterations: usize,
 ) -> Result<String> {
@@ -276,7 +277,12 @@ pub(crate) async fn run_openai_compatible_tools(
     if let Some(system) = system {
         messages.push(json!({ "role": "system", "content": system }));
     }
-    messages.push(json!({ "role": "user", "content": prompt }));
+    // Encode any attached media with the same content builder as materialize, so
+    // images/PDFs are carried (or rejected with a clear error) per provider rules.
+    let user_msg = crate::backend::ChatMessage::user_with_media(prompt, media.to_vec());
+    let user_content =
+        crate::backend::build_openai_compatible_message_content(&user_msg, provider)?;
+    messages.push(json!({ "role": "user", "content": user_content }));
 
     for iteration in 0..max_iterations {
         let mut body = json!({
@@ -396,6 +402,7 @@ pub(crate) async fn run_anthropic_tools(
     max_tokens: u32,
     system: Option<&str>,
     prompt: &str,
+    media: &[crate::backend::MediaFile],
     toolbox: &Toolbox,
     max_iterations: usize,
 ) -> Result<String> {
@@ -405,7 +412,11 @@ pub(crate) async fn run_anthropic_tools(
 
     let tools_json = toolbox.anthropic_tools_json();
     let url = format!("{base_url}/messages");
-    let mut messages: Vec<Value> = vec![json!({ "role": "user", "content": prompt })];
+    // Encode any attached media with the same content builder as materialize, so
+    // images/PDFs are carried (or rejected with a clear error) per provider rules.
+    let user_msg = crate::backend::ChatMessage::user_with_media(prompt, media.to_vec());
+    let user_content = crate::backend::build_anthropic_message_content(&user_msg)?;
+    let mut messages: Vec<Value> = vec![json!({ "role": "user", "content": user_content })];
 
     for _ in 0..max_iterations {
         let mut body = json!({
@@ -495,6 +506,7 @@ pub(crate) async fn run_gemini_tools(
     max_tokens: Option<u32>,
     system: Option<&str>,
     prompt: &str,
+    media: &[crate::backend::MediaFile],
     toolbox: &Toolbox,
     max_iterations: usize,
 ) -> Result<String> {
@@ -504,7 +516,21 @@ pub(crate) async fn run_gemini_tools(
 
     let tools_json = toolbox.gemini_tools_json();
     let url = format!("{base_url}/models/{model}:generateContent");
-    let mut contents: Vec<Value> = vec![json!({ "role": "user", "parts": [{ "text": prompt }] })];
+    // Attach any media to the initial user turn, mirroring the materialize path:
+    // inline base64 data becomes `inlineData`, URI references become `fileData`.
+    let mut user_parts: Vec<Value> = vec![json!({ "text": prompt })];
+    for m in media {
+        if let Some(data) = m.data.as_ref() {
+            user_parts.push(json!({
+                "inlineData": { "mimeType": m.mime_type, "data": data }
+            }));
+        } else {
+            user_parts.push(json!({
+                "fileData": { "mimeType": m.mime_type, "fileUri": m.uri }
+            }));
+        }
+    }
+    let mut contents: Vec<Value> = vec![json!({ "role": "user", "parts": user_parts })];
 
     for _ in 0..max_iterations {
         let mut generation_config = json!({ "temperature": temperature });
@@ -585,7 +611,8 @@ pub(crate) async fn run_gemini_tools(
 ///
 /// Implemented for each provider client and driven by the fluent
 /// [`Request`](crate::Request) builder (`client.with_tools(..).run(..)`); not
-/// called directly.
+/// called directly. `media` carries any attachments from
+/// [`Request::media`](crate::Request::media), included in the initial user turn.
 #[doc(hidden)]
 #[async_trait]
 pub trait ToolRunner {
@@ -593,6 +620,7 @@ pub trait ToolRunner {
         &self,
         system: Option<&str>,
         prompt: &str,
+        media: &[crate::backend::MediaFile],
         toolbox: &Toolbox,
         max_iterations: usize,
     ) -> Result<String>;
diff --git a/tests/http_mock_tests.rs b/tests/http_mock_tests.rs
index 7050724..4dfc331 100644
--- a/tests/http_mock_tests.rs
+++ b/tests/http_mock_tests.rs
@@ -275,6 +275,115 @@ async fn generate_null_content_is_unexpected_response() {
     m.assert_async().await;
 }
 
+// ---------------------------------------------------------------------------
+// generate / run carry attached media in the request body (offline_mockito)
+// ---------------------------------------------------------------------------
+
+/// `with_media(..).generate(..)` must include the attached image as an
+/// `image_url` content part in the serialized request body — media used to be
+/// silently dropped on the plain-text generation path.
+#[tokio::test]
+async fn generate_request_body_carries_attached_image() {
+    use rstructor::{MediaFile, RequestExt};
+
+    let mut server = mockito::Server::new_async().await;
+    let m = server
+        .mock("POST", "/chat/completions")
+        .match_body(mockito::Matcher::PartialJson(json!({
+            "messages": [{
+                "role": "user",
+                "content": [
+                    { "type": "text", "text": "describe" },
+                    {
+                        "type": "image_url",
+                        "image_url": { "url": "data:image/png;base64,YWJj", "detail": "auto" },
+                    },
+                ],
+            }],
+        })))
+        .with_status(200)
+        .with_body(chat_completion("a red square"))
+        .expect(1)
+        .create_async()
+        .await;
+
+    let media = [MediaFile::from_bytes(b"abc", "image/png")];
+    let text = client(&server)
+        .with_media(&media)
+        .generate("describe")
+        .await
+        .unwrap();
+    assert_eq!(text, "a red square");
+    m.assert_async().await;
+}
+
+/// `generate_with_media` with an inline PDF must encode it as the documented
+/// OpenAI `file` content part (`filename` + base64 `file_data`), not `image_url`.
+#[tokio::test]
+async fn generate_request_body_carries_attached_pdf_as_file_part() {
+    use rstructor::MediaFile;
+
+    let mut server = mockito::Server::new_async().await;
+    let m = server
+        .mock("POST", "/chat/completions")
+        .match_body(mockito::Matcher::PartialJson(json!({
+            "messages": [{
+                "role": "user",
+                "content": [
+                    { "type": "text", "text": "summarize" },
+                    {
+                        "type": "file",
+                        "file": {
+                            "filename": "document.pdf",
+                            "file_data": "data:application/pdf;base64,JVBERg==",
+                        },
+                    },
+                ],
+            }],
+        })))
+        .with_status(200)
+        .with_body(chat_completion("a summary"))
+        .expect(1)
+        .create_async()
+        .await;
+
+    let media = [MediaFile::from_bytes(b"%PDF", "application/pdf")];
+    let text = client(&server)
+        .generate_with_media("summarize", &media)
+        .await
+        .unwrap();
+    assert_eq!(text, "a summary");
+    m.assert_async().await;
+}
+
+/// A URL-based PDF has no chat-completions pathway: `generate_with_media` must
+/// fail with a clear error *before* any HTTP request is made.
+#[tokio::test]
+async fn generate_with_url_pdf_errors_without_sending_request() {
+    use rstructor::MediaFile;
+
+    let mut server = mockito::Server::new_async().await;
+    let m = server
+        .mock("POST", "/chat/completions")
+        .expect(0) // the request must never reach the server
+        .create_async()
+        .await;
+
+    let media = [MediaFile::new(
+        "https://example.com/report.pdf",
+        "application/pdf",
+    )];
+    let err = client(&server)
+        .generate_with_media("summarize", &media)
+        .await
+        .unwrap_err();
+    assert!(
+        err.to_string().contains("URL-based PDF"),
+        "expected a clear URL-PDF error, got: {err}"
+    );
+    m.assert_async().await;
+}
+
 // ---------------------------------------------------------------------------
 // reasoning_effort + temperature override per model (offline_mockito)
 // ---------------------------------------------------------------------------
@@ -788,3 +897,48 @@ async fn tool_loop_exhaustion_errors() {
     );
     always_tool.assert_async().await;
 }
+
+/// `with_tools(..).media(..).run(..)` must include the attached media in the
+/// initial user turn of the tool loop's request body — media used to be
+/// silently dropped on the `run` path.
+#[cfg(feature = "tools")]
+#[tokio::test]
+async fn tool_run_request_body_carries_attached_media() {
+    use rstructor::{MediaFile, RequestExt, Toolbox};
+    use std::sync::Arc;
+
+    let mut server = mockito::Server::new_async().await;
+    let m = server
+        .mock("POST", "/chat/completions")
+        .match_body(mockito::Matcher::PartialJson(json!({
+            "messages": [{
+                "role": "user",
+                "content": [
+                    { "type": "text", "text": "what is in the image?" },
+                    {
+                        "type": "image_url",
+                        "image_url": { "url": "data:image/png;base64,YWJj", "detail": "auto" },
+                    },
+                ],
+            }],
+        })))
+        .with_status(200)
+        .with_body(chat_completion("a red square"))
+        .expect(1)
+        .create_async()
+        .await;
+
+    let invoked = Arc::new(std::sync::atomic::AtomicBool::new(false));
+    let toolbox = Toolbox::new().with(recording_add_tool(invoked.clone()));
+    let media = [MediaFile::from_bytes(b"abc", "image/png")];
+
+    let answer = client(&server)
+        .with_tools(&toolbox)
+        .media(media.to_vec())
+        .run("what is in the image?")
+        .await
+        .unwrap();
+
+    assert_eq!(answer, "a red square");
+    m.assert_async().await;
+}
diff --git a/tests/mock_edge_tests.rs b/tests/mock_edge_tests.rs
index 8dd0501..d27ca44 100644
--- a/tests/mock_edge_tests.rs
+++ b/tests/mock_edge_tests.rs
@@ -347,6 +347,25 @@ mod builder {
         assert_eq!(req.kind, RequestKind::Materialize);
         assert_eq!(req.prompt, "B\n\nhi");
     }
+
+    /// `with_media(..).generate(..)` routes through `generate_with_media`,
+    /// carrying the attached media instead of silently dropping it.
+    #[tokio::test]
+    async fn with_media_generate_routes_to_generate_with_media() {
+        let client = MockClient::new().with_response("a caption");
+        let media = [MediaFile::new("u", "image/png")];
+        let out = client
+            .with_media(&media)
+            .generate("describe")
+            .await
+            .unwrap();
+        assert_eq!(out, "a caption");
+        let req = client.last_request().unwrap();
+        assert_eq!(req.kind, RequestKind::GenerateWithMedia);
+        assert_eq!(req.prompt, "describe");
+        assert_eq!(req.media.len(), 1);
+        assert_eq!(req.media[0].mime_type, "image/png");
+    }
 }
 
 // ---------------------------------------------------------------------------
@@ -373,4 +392,46 @@ mod tools {
             "no tools were attached, so no tool loop should have run"
         );
     }
+
+    /// `run` with NO tools but WITH media falls back to `generate_with_media`,
+    /// carrying the attached media instead of silently dropping it.
+    #[tokio::test]
+    async fn run_with_no_tools_and_media_falls_back_to_generate_with_media() {
+        let client = MockClient::new().with_response("answer");
+        let media = [rstructor::MediaFile::new("u", "image/png")];
+        let out = client.with_media(&media).run("hi").await.unwrap();
+        assert_eq!(out, "answer");
+        let req = client.last_request().unwrap();
+        assert_eq!(req.kind, RequestKind::GenerateWithMedia);
+        assert_eq!(req.media.len(), 1);
+    }
+
+    /// `run` WITH tools forwards attached media into the tool loop's request.
+    #[tokio::test]
+    async fn run_with_tools_carries_media_into_tool_loop() {
+        use rstructor::{FnTool, Instructor, Toolbox};
+        use serde::{Deserialize, Serialize};
+
+        #[derive(Instructor, Serialize, Deserialize)]
+        struct EchoArgs {
+            value: String,
+        }
+
+        let toolbox = Toolbox::new().with(FnTool::new("echo", "Echo", |args: EchoArgs| {
+            std::future::ready(Ok(serde_json::json!(args.value)))
+        }));
+        let client = MockClient::new().with_response("done");
+        let media = [rstructor::MediaFile::new("u", "image/png")];
+        let out = client
+            .with_tools(&toolbox)
+            .media(media.to_vec())
+            .run("hi")
+            .await
+            .unwrap();
+        assert_eq!(out, "done");
+        let req = client.last_request().unwrap();
+        assert_eq!(req.kind, RequestKind::RunToolLoop);
+        assert_eq!(req.media.len(), 1, "media must reach the tool loop");
+        assert_eq!(req.tool_names, vec!["echo"]);
+    }
 }