diff --git a/embabel-agent-api/src/main/kotlin/com/embabel/agent/api/common/PromptRunner.kt b/embabel-agent-api/src/main/kotlin/com/embabel/agent/api/common/PromptRunner.kt
index e55040881..e51099a98 100644
--- a/embabel-agent-api/src/main/kotlin/com/embabel/agent/api/common/PromptRunner.kt
+++ b/embabel-agent-api/src/main/kotlin/com/embabel/agent/api/common/PromptRunner.kt
@@ -18,6 +18,7 @@ package com.embabel.agent.api.common
 import com.embabel.agent.api.annotation.support.AgenticInfo
 import com.embabel.agent.api.common.nested.ObjectCreator
 import com.embabel.agent.api.common.nested.TemplateOperations
+import com.embabel.agent.api.common.thinking.ThinkingPromptRunnerOperations
 import com.embabel.agent.api.tool.Tool
 import com.embabel.agent.core.Agent
 import com.embabel.agent.core.AgentPlatform
@@ -27,6 +28,7 @@ import com.embabel.agent.spi.LlmUse
 import com.embabel.chat.AssistantMessage
 import com.embabel.chat.Message
 import com.embabel.common.ai.model.LlmOptions
+import com.embabel.common.ai.model.Thinking
 import com.embabel.common.ai.prompt.PromptContributor
 import com.embabel.common.ai.prompt.PromptElement
 import com.embabel.common.core.streaming.StreamingCapability
@@ -389,6 +391,59 @@ interface PromptRunner : LlmUse, PromptRunnerOperations {
         )
     }
 
+    /**
+     * Check if thinking extraction capabilities are supported by the underlying implementation.
+     *
+     * Thinking capabilities allow extraction of thinking blocks (like `<think>...</think>`)
+     * from LLM responses and provide access to both the result and the extracted thinking content.
+     * Always check this before calling thinking() to avoid exceptions.
+     *
+     * Note: Thinking and streaming capabilities are mutually exclusive.
+     *
+     * @return true if thinking extraction is supported, false if thinking is not available
+     */
+    fun supportsThinking(): Boolean = false
+
+
+    /**
+     * Create a thinking-enhanced version of this prompt runner.
+     *
+     * Returns a PromptRunner where all operations (createObject, generateText, etc.)
+     * return ThinkingResponse<T> wrappers that include both results and extracted
+     * thinking blocks from the LLM response.
+     *
+     * Always check supportsThinking() first and ensure LlmOptions includes thinking configuration
+     * via withLlm(LlmOptions.withThinking(Thinking.withExtraction())).
+     *
+     * Note: Thinking and streaming capabilities are mutually exclusive.
+     *
+     * @return ThinkingCapability instance providing access to thinking-aware operations
+     * @throws UnsupportedOperationException if thinking is not supported by this implementation
+     * @throws IllegalArgumentException if thinking is not enabled in LlmOptions configuration
+     */
+    fun withThinking(): ThinkingPromptRunnerOperations {
+        if (!supportsThinking()) {
+            throw UnsupportedOperationException(
+                """
+                Thinking not supported by this PromptRunner implementation.
+                Check supportsThinking() before calling withThinking().
+                """.trimIndent()
+            )
+        }
+
+        val thinking = llm?.thinking
+        require(thinking != null && thinking != Thinking.NONE) {
+            """
+            Thinking capability requires thinking to be enabled in LlmOptions.
+            Use withLlm(LlmOptions.withThinking(Thinking.withExtraction()))
+            """.trimIndent()
+        }
+
+        // For implementations that support thinking but haven't overridden withThinking(),
+        // they should provide their own implementation
+        error("Implementation error: supportsThinking() returned true but withThinking() not overridden")
+    }
+
     override fun respond(
         messages: List<Message>,
     ): AssistantMessage =
diff --git a/embabel-agent-api/src/main/kotlin/com/embabel/agent/api/common/support/OperationContextPromptRunner.kt b/embabel-agent-api/src/main/kotlin/com/embabel/agent/api/common/support/OperationContextPromptRunner.kt
index 742227645..c0d3fcb5e 100644
--- a/embabel-agent-api/src/main/kotlin/com/embabel/agent/api/common/support/OperationContextPromptRunner.kt
+++ b/embabel-agent-api/src/main/kotlin/com/embabel/agent/api/common/support/OperationContextPromptRunner.kt
@@ -23,6 +23,8 @@ import com.embabel.agent.api.common.streaming.StreamingPromptRunner
 import com.embabel.agent.api.common.streaming.StreamingPromptRunnerOperations
 import com.embabel.agent.api.common.support.streaming.StreamingCapabilityDetector
 import com.embabel.agent.api.common.support.streaming.StreamingPromptRunnerOperationsImpl
+import com.embabel.agent.api.common.thinking.ThinkingPromptRunnerOperations
+import com.embabel.agent.api.common.thinking.support.ThinkingPromptRunnerOperationsImpl
 import com.embabel.agent.api.tool.Tool
 import com.embabel.agent.core.ProcessOptions
 import com.embabel.agent.core.ToolGroup
@@ -41,6 +43,7 @@ import com.embabel.chat.ImagePart
 import com.embabel.chat.Message
 import com.embabel.chat.UserMessage
 import com.embabel.common.ai.model.LlmOptions
+import com.embabel.common.ai.model.Thinking
 import com.embabel.common.ai.prompt.PromptContributor
 import com.embabel.common.core.types.ZeroToOne
 import com.embabel.common.util.loggerFor
@@ -309,9 +312,11 @@ internal data class OperationContextPromptRunner(
     override fun stream(): StreamingPromptRunnerOperations {
         if (!supportsStreaming()) {
             throw UnsupportedOperationException(
-                "Streaming not supported by underlying LLM model. " +
-                "Model type: ${context.agentPlatform().platformServices.llmOperations::class.simpleName}. " +
-                "Check supportsStreaming() before calling stream()."
+                """
+                Streaming not supported by underlying LLM model.
+                Model type: ${context.agentPlatform().platformServices.llmOperations::class.simpleName}.
+                Check supportsStreaming() before calling stream().
+                """.trimIndent()
             )
         }
 
@@ -335,4 +340,50 @@ internal data class OperationContextPromptRunner(
             action = action,
         )
     }
+
+    /**
+     * Create thinking-aware prompt operations that extract LLM reasoning blocks.
+     *
+     * This method creates ThinkingPromptRunnerOperations that can capture both the
+     * converted results and the reasoning content that LLMs generate during processing.
+     *
+     * @return ThinkingPromptRunnerOperations for executing prompts with thinking extraction
+     * @throws UnsupportedOperationException if the underlying LLM operations don't support thinking extraction
+     */
+    override fun supportsThinking(): Boolean = true
+
+    override fun withThinking(): ThinkingPromptRunnerOperations {
+        val llmOperations = context.agentPlatform().platformServices.llmOperations
+
+        if (llmOperations !is ChatClientLlmOperations) {
+            throw UnsupportedOperationException(
+                """
+                Thinking extraction not supported by underlying LLM operations.
+                Operations type: ${llmOperations::class.simpleName}.
+                Thinking extraction requires ChatClientLlmOperations.
+                """.trimIndent()
+            )
+        }
+11
+        // Auto-enable thinking extraction when withThinking() is called
+        val thinkingEnabledLlm = llm.withThinking(Thinking.withExtraction())
+
+        return ThinkingPromptRunnerOperationsImpl(
+            chatClientOperations = llmOperations,
+            interaction = LlmInteraction(
+                llm = thinkingEnabledLlm,
+                toolGroups = toolGroups,
+                toolCallbacks = safelyGetToolCallbacks(toolObjects) + otherToolCallbacks,
+                promptContributors = promptContributors + contextualPromptContributors.map {
+                    it.toPromptContributor(context)
+                },
+                id = interactionId ?: InteractionId("${context.operation.name}-thinking"),
+                generateExamples = generateExamples,
+                propertyFilter = propertyFilter,
+            ),
+            messages = messages,
+            agentProcess = context.processContext.agentProcess,
+            action = action,
+        )
+    }
 }
diff --git a/embabel-agent-api/src/main/kotlin/com/embabel/agent/api/common/thinking/ThinkingPromptRunnerOperations.kt b/embabel-agent-api/src/main/kotlin/com/embabel/agent/api/common/thinking/ThinkingPromptRunnerOperations.kt
new file mode 100644
index 000000000..e0535b142
--- /dev/null
+++ b/embabel-agent-api/src/main/kotlin/com/embabel/agent/api/common/thinking/ThinkingPromptRunnerOperations.kt
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2024-2025 Embabel Software, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.embabel.agent.api.common.thinking
+
+import com.embabel.agent.api.common.MultimodalContent
+import com.embabel.chat.AssistantMessage
+import com.embabel.common.core.thinking.ThinkingResponse
+import com.embabel.common.core.thinking.ThinkingCapability
+import com.embabel.chat.Message
+import com.embabel.common.core.types.ZeroToOne
+
+/**
+ * User-facing interface for executing prompts with thinking block extraction.
+ *
+ * This interface provides thinking-aware versions of standard prompt operations,
+ * returning both the converted results and the reasoning content that LLMs
+ * generated during their processing.
+ *
+ * ## Usage
+ *
+ * Access this interface through the `withThinking()` extension:
+ * ```kotlin
+ * val result = promptRunner.withThinking().createObject("analyze this", Person::class.java)
+ * val person = result.result        // The converted Person object
+ * val thinking = result.thinkingBlocks // List of reasoning blocks
+ * ```
+ *
+ * ## Thinking Block Extraction
+ *
+ * This interface automatically extracts thinking content in various formats:
+ * - Tagged thinking: `<think>reasoning here</think>`, `<analysis>content</analysis>`
+ * - Prefix thinking: `//THINKING: reasoning here`
+ * - Untagged thinking: raw text content before JSON objects
+ *
+ * ## Relationship to Regular Operations
+ *
+ * Unlike [com.embabel.agent.api.common.PromptRunnerOperations] which returns
+ * direct objects, all methods in this interface return [ThinkingResponse]
+ * wrappers that provide access to both results and reasoning.
+ *
+ * @see com.embabel.agent.api.common.PromptRunnerOperations for standard operations
+ * @see ThinkingResponse for the response wrapper
+ * @see com.embabel.common.core.thinking.ThinkingBlock for thinking content details
+ */
+interface ThinkingPromptRunnerOperations : ThinkingCapability {
+
+    /**
+     * Generate text with thinking block extraction.
+     *
+     * @param prompt The text prompt to send to the LLM
+     * @return Response containing both generated text and extracted thinking blocks
+     */
+    infix fun generateText(prompt: String): ThinkingResponse<String> =
+        createObject(
+            prompt = prompt,
+            outputClass = String::class.java,
+        )
+
+    /**
+     * Create an object of the given type with thinking block extraction.
+     *
+     * Uses the given prompt and LLM options from context to generate a structured
+     * object while capturing the LLM's reasoning process.
+     *
+     * @param T The type of object to create
+     * @param prompt The text prompt to send to the LLM
+     * @param outputClass The class of the object to create
+     * @return Response containing both the converted object and extracted thinking blocks
+     */
+    fun <T> createObject(
+        prompt: String,
+        outputClass: Class<T>,
+    ): ThinkingResponse<T> = createObject(
+        messages = listOf(com.embabel.chat.UserMessage(prompt)),
+        outputClass = outputClass,
+    )
+
+    /**
+     * Try to create an object of the given type with thinking block extraction.
+     *
+     * Similar to [createObject] but designed for scenarios where the conversion
+     * might fail. Returns thinking blocks even when object creation fails.
+     *
+     * @param T The type of object to create
+     * @param prompt The text prompt to send to the LLM
+     * @param outputClass The class of the object to create
+     * @return Response with potentially null result but always available thinking blocks
+     */
+    fun <T> createObjectIfPossible(
+        prompt: String,
+        outputClass: Class<T>,
+    ): ThinkingResponse<T?> = createObjectIfPossible(
+        listOf(com.embabel.chat.UserMessage(prompt)),
+        outputClass
+    )
+
+    /**
+     * Try to create an object from messages with thinking block extraction.
+     *
+     * @param T The type of object to create
+     * @param messages The conversation messages to send to the LLM
+     * @param outputClass The class of the object to create
+     * @return Response with potentially null result but always available thinking blocks
+     */
+    fun <T> createObjectIfPossible(
+        messages: List<Message>,
+        outputClass: Class<T>,
+    ): ThinkingResponse<T?>
+
+    /**
+     * Create an object from messages with thinking block extraction.
+     *
+     * @param T The type of object to create
+     * @param messages The conversation messages to send to the LLM
+     * @param outputClass The class of the object to create
+     * @return Response containing both the converted object and extracted thinking blocks
+     */
+    fun <T> createObject(
+        messages: List<Message>,
+        outputClass: Class<T>,
+    ): ThinkingResponse<T>
+
+    /**
+     * Generate text from multimodal content with thinking block extraction.
+     *
+     * @param content The multimodal content (text + images) to send to the LLM
+     * @return Response containing both generated text and extracted thinking blocks
+     */
+    fun generateText(content: MultimodalContent): ThinkingResponse<String> =
+        createObject(
+            content = content,
+            outputClass = String::class.java,
+        )
+
+    /**
+     * Create an object from multimodal content with thinking block extraction.
+     *
+     * @param T The type of object to create
+     * @param content The multimodal content (text + images) to send to the LLM
+     * @param outputClass The class of the object to create
+     * @return Response containing both the converted object and extracted thinking blocks
+     */
+    fun <T> createObject(
+        content: MultimodalContent,
+        outputClass: Class<T>,
+    ): ThinkingResponse<T> = createObject(
+        messages = listOf(com.embabel.chat.UserMessage(content.toContentParts())),
+        outputClass = outputClass,
+    )
+
+    /**
+     * Try to create an object from multimodal content with thinking block extraction.
+     *
+     * @param T The type of object to create
+     * @param content The multimodal content (text + images) to send to the LLM
+     * @param outputClass The class of the object to create
+     * @return Response with potentially null result but always available thinking blocks
+     */
+    fun <T> createObjectIfPossible(
+        content: MultimodalContent,
+        outputClass: Class<T>,
+    ): ThinkingResponse<T?> = createObjectIfPossible(
+        listOf(com.embabel.chat.UserMessage(content.toContentParts())),
+        outputClass
+    )
+
+    /**
+     * Respond in a conversation with multimodal content and thinking block extraction.
+     *
+     * @param content The multimodal content to respond to
+     * @return Response containing both the assistant message and extracted thinking blocks
+     */
+    fun respond(
+        content: MultimodalContent,
+    ): ThinkingResponse<AssistantMessage> = respond(
+        listOf(com.embabel.chat.UserMessage(content.toContentParts()))
+    )
+
+    /**
+     * Respond in a conversation with thinking block extraction.
+     *
+     * @param messages The conversation messages to respond to
+     * @return Response containing both the assistant message and extracted thinking blocks
+     */
+    fun respond(
+        messages: List<Message>,
+    ): ThinkingResponse<AssistantMessage>
+
+    /**
+     * Evaluate a condition with thinking block extraction.
+     *
+     * Evaluates a boolean condition using the LLM while capturing its reasoning process.
+     *
+     * @param condition The condition to evaluate
+     * @param context The context for evaluation
+     * @param confidenceThreshold The confidence threshold for the evaluation
+     * @return Response containing both the evaluation result and extracted thinking blocks
+     */
+    fun evaluateCondition(
+        condition: String,
+        context: String,
+        confidenceThreshold: ZeroToOne = 0.8,
+    ): ThinkingResponse<Boolean>
+}
diff --git a/embabel-agent-api/src/main/kotlin/com/embabel/agent/api/common/thinking/support/ThinkingPromptRunnerOperationsImpl.kt b/embabel-agent-api/src/main/kotlin/com/embabel/agent/api/common/thinking/support/ThinkingPromptRunnerOperationsImpl.kt
new file mode 100644
index 000000000..9b544223a
--- /dev/null
+++ b/embabel-agent-api/src/main/kotlin/com/embabel/agent/api/common/thinking/support/ThinkingPromptRunnerOperationsImpl.kt
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2024-2025 Embabel Software, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.embabel.agent.api.common.thinking.support
+
+import com.embabel.agent.api.common.thinking.ThinkingPromptRunnerOperations
+import com.embabel.agent.core.Action
+import com.embabel.agent.core.AgentProcess
+import com.embabel.agent.spi.LlmInteraction
+import com.embabel.agent.spi.support.springai.ChatClientLlmOperations
+import com.embabel.chat.AssistantMessage
+import com.embabel.common.core.thinking.ThinkingResponse
+import com.embabel.chat.Message
+import com.embabel.common.core.thinking.ThinkingException
+import com.embabel.common.core.types.ZeroToOne
+
+/**
+ * Implementation of thinking-aware prompt operations.
+ *
+ * This class provides thinking block extraction by delegating directly to
+ * ChatClientLlmOperations SPI layer's doTransformWithThinking methods.
+ *
+ * ## Architecture
+ *
+ * Following the pattern established by StreamingPromptRunnerOperationsImpl:
+ *
+ * ```
+ * ThinkingPromptRunnerOperationsImpl → ChatClientLlmOperations.doTransformWithThinking
+ * ```
+ *
+ * @param chatClientOperations The underlying ChatClient operations that support thinking extraction
+ * @param interaction The LLM interaction configuration including options and tools
+ * @param messages The conversation messages accumulated so far
+ * @param agentProcess The agent process context for this operation
+ * @param action The action context if running within an action
+ */
+internal class ThinkingPromptRunnerOperationsImpl(
+    private val chatClientOperations: ChatClientLlmOperations,
+    private val interaction: LlmInteraction,
+    private val messages: List<Message>,
+    private val agentProcess: AgentProcess,
+    private val action: Action?,
+) : ThinkingPromptRunnerOperations {
+
+    override fun <T> createObjectIfPossible(
+        messages: List<Message>,
+        outputClass: Class<T>,
+    ): ThinkingResponse<T?> {
+        val combinedMessages = this.messages + messages
+        val result = chatClientOperations.doTransformWithThinkingIfPossible(
+            messages = combinedMessages,
+            interaction = interaction,
+            outputClass = outputClass,
+            llmRequestEvent = null
+        )
+
+        return when {
+            result.isSuccess -> {
+                val successResponse = result.getOrThrow()
+                ThinkingResponse<T?>(
+                    result = successResponse.result,
+                    thinkingBlocks = successResponse.thinkingBlocks
+                )
+            }
+            else -> {
+                // Preserve thinking blocks even when object creation fails
+                val exception = result.exceptionOrNull()
+                val thinkingBlocks = if (exception is ThinkingException) {
+                    exception.thinkingBlocks
+                } else {
+                    emptyList()
+                }
+                ThinkingResponse<T?>(
+                    result = null,
+                    thinkingBlocks = thinkingBlocks
+                )
+            }
+        }
+    }
+
+    override fun <T> createObject(
+        messages: List<Message>,
+        outputClass: Class<T>,
+    ): ThinkingResponse<T> {
+        val combinedMessages = this.messages + messages
+        return chatClientOperations.doTransformWithThinking(
+            messages = combinedMessages,
+            interaction = interaction,
+            outputClass = outputClass,
+            llmRequestEvent = null
+        )
+    }
+
+    override fun respond(
+        messages: List<Message>,
+    ): ThinkingResponse<AssistantMessage> {
+        return createObject(messages, AssistantMessage::class.java)
+    }
+
+    override fun evaluateCondition(
+        condition: String,
+        context: String,
+        confidenceThreshold: ZeroToOne,
+    ): ThinkingResponse<Boolean> {
+        val prompt =
+            """
+            Evaluate this condition given the context.
+            Return "result": whether you think it is true, your confidence level from 0-1,
+            and an explanation of what you base this on.
+
+            # Condition
+            $condition
+
+            # Context
+            $context
+            """.trimIndent()
+
+        val response = createObject(
+            messages = listOf(com.embabel.chat.UserMessage(prompt)),
+            outputClass = com.embabel.agent.experimental.primitive.Determination::class.java,
+        )
+
+        val result = response.result?.let {
+            it.result && it.confidence >= confidenceThreshold
+        } ?: false
+
+        return ThinkingResponse(
+            result = result,
+            thinkingBlocks = response.thinkingBlocks
+        )
+    }
+}
diff --git a/embabel-agent-api/src/main/kotlin/com/embabel/agent/spi/support/springai/ChatClientLlmOperations.kt b/embabel-agent-api/src/main/kotlin/com/embabel/agent/spi/support/springai/ChatClientLlmOperations.kt
index 58cef2f9a..574eecc35 100644
--- a/embabel-agent-api/src/main/kotlin/com/embabel/agent/spi/support/springai/ChatClientLlmOperations.kt
+++ b/embabel-agent-api/src/main/kotlin/com/embabel/agent/spi/support/springai/ChatClientLlmOperations.kt
@@ -27,10 +27,14 @@ import com.embabel.agent.spi.support.LlmDataBindingProperties
 import com.embabel.agent.spi.support.LlmOperationsPromptsProperties
 import com.embabel.agent.spi.validation.DefaultValidationPromptGenerator
 import com.embabel.agent.spi.validation.ValidationPromptGenerator
+import com.embabel.common.core.thinking.ThinkingResponse
+import com.embabel.common.core.thinking.ThinkingException
 import com.embabel.chat.Message
 import com.embabel.common.ai.converters.FilteringJacksonOutputConverter
 import com.embabel.common.ai.model.Llm
 import com.embabel.common.ai.model.ModelProvider
+import com.embabel.common.core.thinking.spi.InternalThinkingApi
+import com.embabel.common.core.thinking.spi.extractAllThinkingBlocks
 import com.embabel.common.textio.template.TemplateRenderer
 import com.fasterxml.jackson.databind.DatabindException
 import com.fasterxml.jackson.databind.ObjectMapper
@@ -61,6 +65,10 @@ import java.util.concurrent.TimeoutException
 
 const val PROMPT_ELEMENT_SEPARATOR = "\n----\n";
 
+// Log message constants to avoid duplication
+private const val LLM_TIMEOUT_MESSAGE = "LLM {}: attempt {} timed out after {}ms"
+private const val LLM_INTERRUPTED_MESSAGE = "LLM {}: attempt {} was interrupted"
+
 /**
  * LlmOperations implementation that uses the Spring AI ChatClient
  * @param modelProvider ModelProvider to get the LLM model
@@ -121,6 +129,10 @@ internal class ChatClientLlmOperations(
         )
     }
 
+    // ====================================
+    // NON-THINKING IMPLEMENTATION (uses responseEntity)
+    // ====================================
+
     override fun <O> doTransform(
         messages: List<Message>,
         interaction: LlmInteraction,
@@ -132,14 +144,7 @@ internal class ChatClientLlmOperations(
         val promptContributions =
             (interaction.promptContributors + llm.promptContributors).joinToString(PROMPT_ELEMENT_SEPARATOR) { it.contribution() }
 
-        val springAiPrompt = Prompt(
-            buildList {
-                if (promptContributions.isNotEmpty()) {
-                    add(SystemMessage(promptContributions))
-                }
-                addAll(messages.map { it.toSpringAiMessage() })
-            }
-        )
+        val springAiPrompt = buildBasicPrompt(promptContributions, messages)
         llmRequestEvent?.let {
             it.agentProcess.processContext.onProcessEvent(
                 it.callEvent(springAiPrompt)
@@ -161,47 +166,9 @@ internal class ChatClientLlmOperations(
             }
 
             val callResponse = try {
-                future.get(timeoutMillis, TimeUnit.MILLISECONDS)
-            } catch (e: TimeoutException) {
-                future.cancel(true)
-                logger.warn(
-                    "LLM {}: attempt {} timed out after {}ms",
-                    interaction.id.value,
-                    attempt,
-                    timeoutMillis
-                )
-                throw RuntimeException(
-                    "ChatClient call for interaction ${interaction.id.value} timed out after ${timeoutMillis}ms",
-                    e
-                )
-            } catch (e: InterruptedException) {
-                future.cancel(true)
-                Thread.currentThread().interrupt()
-                logger.warn("LLM {}: attempt {} was interrupted", interaction.id.value, attempt)
-                throw RuntimeException(
-                    "ChatClient call for interaction ${interaction.id.value} was interrupted",
-                    e
-                )
-            } catch (e: ExecutionException) {
-                future.cancel(true)
-                logger.error(
-                    "LLM {}: attempt {} failed with execution exception",
-                    interaction.id.value,
-                    attempt,
-                    e.cause
-                )
-                when (val cause = e.cause) {
-                    is RuntimeException -> throw cause
-                    is Exception -> throw RuntimeException(
-                        "ChatClient call for interaction ${interaction.id.value} failed",
-                        cause
-                    )
-
-                    else -> throw RuntimeException(
-                        "ChatClient call for interaction ${interaction.id.value} failed with unknown error",
-                        e
-                    )
-                }
+                future.get(timeoutMillis, TimeUnit.MILLISECONDS) // NOSONAR: CompletableFuture.get() is not collection access
+            } catch (e: Exception) {
+                handleFutureException(e, future, interaction, timeoutMillis, attempt)
             }
 
             if (outputClass == String::class.java) {
@@ -266,15 +233,7 @@ internal class ChatClientLlmOperations(
         val chatClient = createChatClient(llm)
         val promptContributions =
             (interaction.promptContributors + llm.promptContributors).joinToString("\n") { it.contribution() }
-        val springAiPrompt = Prompt(
-            buildList {
-                if (promptContributions.isNotEmpty()) {
-                    add(SystemMessage(promptContributions))
-                }
-                add(UserMessage(maybeReturnPromptContribution))
-                addAll(messages.map { it.toSpringAiMessage() })
-            }
-        )
+        val springAiPrompt = buildPromptWithMaybeReturn(promptContributions, messages, maybeReturnPromptContribution)
         llmRequestEvent.agentProcess.processContext.onProcessEvent(
             llmRequestEvent.callEvent(springAiPrompt)
         )
@@ -302,7 +261,7 @@ internal class ChatClientLlmOperations(
                         when (throwable.cause ?: throwable) {
                             is TimeoutException -> {
                                 logger.warn(
-                                    "LLM {}: attempt {} timed out after {}ms",
+                                    LLM_TIMEOUT_MESSAGE,
                                     interaction.id.value,
                                     attempt,
                                     timeoutMillis
@@ -337,11 +296,11 @@ internal class ChatClientLlmOperations(
                             }
                         }
                     }
-                    .get()
+                    .get() // NOSONAR: CompletableFuture.get() is not collection access
             } catch (e: InterruptedException) {
                 Thread.currentThread().interrupt()
                 logger.warn(
-                    "LLM {}: attempt {} was interrupted",
+                    LLM_INTERRUPTED_MESSAGE,
                     interaction.id.value,
                     attempt
                 )
@@ -375,6 +334,252 @@ internal class ChatClientLlmOperations(
         }
     }
 
+    // ====================================
+    // THINKING IMPLEMENTATION (manual converter chains)
+    // ====================================
+
+    /**
+     * Transform messages to an object with thinking block extraction.
+     */
+    @OptIn(InternalThinkingApi::class)
+    internal fun <O> doTransformWithThinking(
+        messages: List<Message>,
+        interaction: LlmInteraction,
+        outputClass: Class<O>,
+        llmRequestEvent: LlmRequestEvent<O>?,
+    ): ThinkingResponse<O> {
+        logger.debug("LLM transform for interaction {} with thinking extraction", interaction.id.value)
+
+        val llm = chooseLlm(interaction.llm)
+        val chatClient = createChatClient(llm)
+        val promptContributions =
+            (interaction.promptContributors + llm.promptContributors).joinToString(PROMPT_ELEMENT_SEPARATOR) { it.contribution() }
+
+        // Create converter chain once for both schema format and actual conversion
+        val converter = if (outputClass != String::class.java) {
+            ExceptionWrappingConverter(
+                expectedType = outputClass,
+                delegate = WithExampleConverter(
+                    delegate = SuppressThinkingConverter(
+                        FilteringJacksonOutputConverter(
+                            clazz = outputClass,
+                            objectMapper = objectMapper,
+                            propertyFilter = interaction.propertyFilter,
+                        )
+                    ),
+                    outputClass = outputClass,
+                    ifPossible = false,
+                    generateExamples = shouldGenerateExamples(interaction),
+                )
+            )
+        } else null
+
+        val schemaFormat = converter?.getFormat()
+
+        val springAiPrompt = if (schemaFormat != null) {
+            buildPromptWithSchema(promptContributions, messages, schemaFormat)
+        } else {
+            buildBasicPrompt(promptContributions, messages)
+        }
+
+        llmRequestEvent?.let {
+            it.agentProcess.processContext.onProcessEvent(
+                it.callEvent(springAiPrompt)
+            )
+        }
+
+        val chatOptions = llm.optionsConverter.convertOptions(interaction.llm)
+        val timeoutMillis = getTimeoutMillis(interaction.llm)
+
+        return dataBindingProperties.retryTemplate(interaction.id.value)
+            .execute<ThinkingResponse<O>, DatabindException> {
+                val attempt = (RetrySynchronizationManager.getContext()?.retryCount ?: 0) + 1
+
+                val future = CompletableFuture.supplyAsync {
+                    chatClient
+                        .prompt(springAiPrompt)
+                        .toolCallbacks(interaction.toolCallbacks)
+                        .options(chatOptions)
+                        .call()
+                }
+
+                val callResponse = try {
+                    future.get(timeoutMillis, TimeUnit.MILLISECONDS) // NOSONAR: CompletableFuture.get() is not collection access
+                } catch (e: Exception) {
+                    handleFutureException(e, future, interaction, timeoutMillis, attempt)
+                }
+
+                logger.debug("LLM call completed for interaction {}", interaction.id.value)
+
+                // Convert response with thinking extraction using manual converter chains
+                if (outputClass == String::class.java) {
+                    val chatResponse = callResponse.chatResponse()
+                    chatResponse?.let { recordUsage(llm, it, llmRequestEvent) }
+                    val rawText = chatResponse!!.result.output.text as String
+
+                    val thinkingBlocks = extractAllThinkingBlocks(rawText)
+                    logger.debug("Extracted {} thinking blocks for String response", thinkingBlocks.size)
+
+                    ThinkingResponse(
+                        result = rawText as O, // NOSONAR: Safe cast verified by outputClass == String::class.java check
+                        thinkingBlocks = thinkingBlocks
+                    )
+                } else {
+                    // Extract thinking blocks from raw response text FIRST
+                    val chatResponse = callResponse.chatResponse()
+                    chatResponse?.let { recordUsage(llm, it, llmRequestEvent) }
+                    val rawText = chatResponse!!.result.output.text ?: ""
+
+                    val thinkingBlocks = extractAllThinkingBlocks(rawText)
+                    logger.debug(
+                        "Extracted {} thinking blocks for {} response",
+                        thinkingBlocks.size,
+                        outputClass.simpleName
+                    )
+
+                    // Execute converter chain manually instead of using responseEntity
+                    try {
+                        val result = converter!!.convert(rawText)
+
+                        ThinkingResponse(
+                            result = result!!,
+                            thinkingBlocks = thinkingBlocks
+                        )
+                    } catch (e: Exception) {
+                        // Preserve thinking blocks in exceptions
+                        throw ThinkingException(
+                            message = "Conversion failed: ${e.message}",
+                            thinkingBlocks = thinkingBlocks
+                        )
+                    }
+                }
+            }
+    }
+
+    /**
+     * Transform messages with thinking extraction using IfPossible pattern.
+     */
+    @OptIn(InternalThinkingApi::class)
+    internal fun <O> doTransformWithThinkingIfPossible(
+        messages: List<Message>,
+        interaction: LlmInteraction,
+        outputClass: Class<O>,
+        llmRequestEvent: LlmRequestEvent<O>?,
+    ): Result<ThinkingResponse<O>> {
+        return try {
+            val maybeReturnPromptContribution = templateRenderer.renderLoadedTemplate(
+                llmOperationsPromptsProperties.maybePromptTemplate,
+                emptyMap(),
+            )
+
+            val llm = chooseLlm(interaction.llm)
+            val chatClient = createChatClient(llm)
+            val promptContributions =
+                (interaction.promptContributors + llm.promptContributors).joinToString("\\n") { it.contribution() }
+
+            val typeReference = createParameterizedTypeReference<MaybeReturn<*>>(
+                MaybeReturn::class.java,
+                outputClass,
+            )
+
+            // Create converter chain BEFORE LLM call to get schema format
+            val converter = ExceptionWrappingConverter(
+                expectedType = MaybeReturn::class.java,
+                delegate = WithExampleConverter(
+                    delegate = SuppressThinkingConverter(
+                        FilteringJacksonOutputConverter(
+                            typeReference = typeReference,
+                            objectMapper = objectMapper,
+                            propertyFilter = interaction.propertyFilter,
+                        )
+                    ),
+                    outputClass = outputClass as Class<MaybeReturn<*>>, // NOSONAR: Safe cast for MaybeReturn wrapper pattern
+                    ifPossible = true,
+                    generateExamples = shouldGenerateExamples(interaction),
+                )
+            )
+
+            // Get the complete format (examples + JSON schema)
+            val schemaFormat = converter.getFormat()
+
+            val springAiPrompt = buildPromptWithMaybeReturnAndSchema(
+                promptContributions,
+                messages,
+                maybeReturnPromptContribution,
+                schemaFormat
+            )
+
+            llmRequestEvent?.agentProcess?.processContext?.onProcessEvent(
+                llmRequestEvent.callEvent(springAiPrompt)
+            )
+
+            val chatOptions = llm.optionsConverter.convertOptions(interaction.llm)
+            val timeoutMillis = (interaction.llm.timeout ?: llmOperationsPromptsProperties.defaultTimeout).toMillis()
+
+            val result = dataBindingProperties.retryTemplate(interaction.id.value)
+                .execute<Result<ThinkingResponse<O>>, DatabindException> {
+                    val future = CompletableFuture.supplyAsync {
+                        chatClient
+                            .prompt(springAiPrompt)
+                            .toolCallbacks(interaction.toolCallbacks)
+                            .options(chatOptions)
+                            .call()
+                    }
+
+                    val callResponse = try {
+                        future.get(timeoutMillis, TimeUnit.MILLISECONDS) // NOSONAR: CompletableFuture.get() is not collection access
+                    } catch (e: Exception) {
+                        val attempt = (RetrySynchronizationManager.getContext()?.retryCount ?: 0) + 1
+                        return@execute handleFutureExceptionAsResult(e, future, interaction, timeoutMillis, attempt)
+                    }
+
+                    // Extract thinking blocks from raw text FIRST
+                    val chatResponse = callResponse.chatResponse()
+                    chatResponse?.let { recordUsage(llm, it, llmRequestEvent) }
+                    val rawText = chatResponse!!.result.output.text ?: ""
+                    val thinkingBlocks = extractAllThinkingBlocks(rawText)
+
+                    // Execute converter chain manually instead of using responseEntity
+                    try {
+                        val maybeResult = converter.convert(rawText)
+
+                        // Convert MaybeReturn<O> to Result<ThinkingResponse<O>> with extracted thinking blocks
+                        val result = maybeResult!!.toResult() as Result<O> // NOSONAR: Safe cast, MaybeReturn<O>.toResult() returns Result<O>
+                        when {
+                            result.isSuccess -> Result.success(
+                                ThinkingResponse(
+                                    result = result.getOrThrow(),
+                                    thinkingBlocks = thinkingBlocks
+                                )
+                            )
+
+                            else -> Result.failure(
+                                ThinkingException(
+                                    message = "Object creation not possible: ${result.exceptionOrNull()?.message ?: "Unknown error"}",
+                                    thinkingBlocks = thinkingBlocks
+                                )
+                            )
+                        }
+                    } catch (e: Exception) {
+                        // Other failures, preserve thinking blocks
+                        Result.failure(
+                            ThinkingException(
+                                message = "Conversion failed: ${e.message}",
+                                thinkingBlocks = thinkingBlocks
+                            )
+                        )
+                    }
+                }
+            result
+        } catch (e: Exception) {
+            Result.failure(e)
+        }
+    }
+
+    // ====================================
+    // PRIVATE FUNCTIONS
+    // ====================================
+
     @Suppress("UNCHECKED_CAST")
     private fun <T> createParameterizedTypeReference(
         rawType: Class<*>,
@@ -417,6 +622,189 @@ internal class ChatClientLlmOperations(
         return llmCall.generateExamples == true
     }
 
+    // ====================================
+    // PRIVATE THINKING FUNCTIONS
+    // ====================================
+
+    /**
+     * Base prompt builder - system message + user messages.
+     */
+    private fun buildBasicPrompt(
+        promptContributions: String,
+        messages: List<Message>,
+    ): Prompt = Prompt(
+        buildList {
+            if (promptContributions.isNotEmpty()) {
+                add(SystemMessage(promptContributions))
+            }
+            addAll(messages.map { it.toSpringAiMessage() })
+        }
+    )
+
+    /**
+     * Extends basic prompt with maybeReturn user message.
+     */
+    private fun buildPromptWithMaybeReturn(
+        promptContributions: String,
+        messages: List<Message>,
+        maybeReturnPrompt: String,
+    ): Prompt = Prompt(
+        buildList {
+            if (promptContributions.isNotEmpty()) {
+                add(SystemMessage(promptContributions))
+            }
+            add(UserMessage(maybeReturnPrompt))
+            addAll(messages.map { it.toSpringAiMessage() })
+        }
+    )
+
+    /**
+     * Extends basic prompt with schema format for thinking.
+     */
+    private fun buildPromptWithSchema(
+        promptContributions: String,
+        messages: List<Message>,
+        schemaFormat: String,
+    ): Prompt {
+        val basicPrompt = buildBasicPrompt(promptContributions, messages)
+        logger.debug("Injected schema format for thinking extraction: {}", schemaFormat)
+        return Prompt(
+            buildList {
+                addAll(basicPrompt.instructions)
+                add(SystemMessage(schemaFormat))
+            }
+        )
+    }
+
+    /**
+     * Combines maybeReturn user message with schema format.
+     */
+    private fun buildPromptWithMaybeReturnAndSchema(
+        promptContributions: String,
+        messages: List<Message>,
+        maybeReturnPrompt: String,
+        schemaFormat: String,
+    ): Prompt {
+        val promptWithMaybeReturn = buildPromptWithMaybeReturn(promptContributions, messages, maybeReturnPrompt)
+        return Prompt(
+            buildList {
+                addAll(promptWithMaybeReturn.instructions)
+                add(SystemMessage(schemaFormat))
+            }
+        )
+    }
+
+    private fun getTimeoutMillis(llmOptions: com.embabel.common.ai.model.LlmOptions): Long =
+        (llmOptions.timeout ?: llmOperationsPromptsProperties.defaultTimeout).toMillis()
+
+    /**
+     * Handles exceptions from CompletableFuture execution during LLM calls.
+     *
+     * Provides centralized exception handling for timeout, interruption, and execution failures.
+     * Cancels the future, logs appropriate warnings/errors, and throws descriptive RuntimeExceptions.
+     *
+     * @param e The exception that occurred during future execution
+     * @param future The CompletableFuture to cancel on error
+     * @param interaction The LLM interaction context for error messages
+     * @param timeoutMillis The timeout value for error reporting
+     * @param attempt The retry attempt number for logging
+     * @throws RuntimeException Always throws with appropriate error message based on exception type
+     */
+    private fun handleFutureException(
+        e: Exception,
+        future: CompletableFuture<*>,
+        interaction: LlmInteraction,
+        timeoutMillis: Long,
+        attempt: Int
+    ): Nothing {
+        when (e) {
+            is TimeoutException -> {
+                future.cancel(true)
+                logger.warn(LLM_TIMEOUT_MESSAGE, interaction.id.value, attempt, timeoutMillis)
+                throw RuntimeException(
+                    "ChatClient call for interaction ${interaction.id.value} timed out after ${timeoutMillis}ms",
+                    e
+                )
+            }
+            is InterruptedException -> {
+                future.cancel(true)
+                Thread.currentThread().interrupt()
+                logger.warn(LLM_INTERRUPTED_MESSAGE, interaction.id.value, attempt)
+                throw RuntimeException("ChatClient call for interaction ${interaction.id.value} was interrupted", e)
+            }
+            is ExecutionException -> {
+                future.cancel(true)
+                logger.error(
+                    "LLM {}: attempt {} failed with execution exception",
+                    interaction.id.value,
+                    attempt,
+                    e.cause
+                )
+                when (val cause = e.cause) {
+                    is RuntimeException -> throw cause
+                    is Exception -> throw RuntimeException(
+                        "ChatClient call for interaction ${interaction.id.value} failed",
+                        cause
+                    )
+                    else -> throw RuntimeException(
+                        "ChatClient call for interaction ${interaction.id.value} failed with unknown error",
+                        e
+                    )
+                }
+            }
+            else -> throw e
+        }
+    }
+
+    /**
+     * Handles exceptions from CompletableFuture execution during LLM calls, returning Result.failure.
+     *
+     * Similar to handleFutureException but returns Result.failure with ThinkingException
+     * instead of throwing. Used for methods that return Result types rather than throwing exceptions.
+     *
+     * @param e The exception that occurred during future execution
+     * @param future The CompletableFuture to cancel on error
+     * @param interaction The LLM interaction context for error messages
+     * @param timeoutMillis The timeout value for error reporting
+     * @param attempt The retry attempt number for logging
+     * @return Result.failure with ThinkingException containing empty thinking blocks
+     */
+    private fun <O> handleFutureExceptionAsResult(
+        e: Exception,
+        future: CompletableFuture<*>,
+        interaction: LlmInteraction,
+        timeoutMillis: Long,
+        attempt: Int
+    ): Result<ThinkingResponse<O>> {
+        return when (e) {
+            is TimeoutException -> {
+                future.cancel(true)
+                logger.warn(LLM_TIMEOUT_MESSAGE, interaction.id.value, attempt, timeoutMillis)
+                Result.failure(ThinkingException(
+                    message = "ChatClient call for interaction ${interaction.id.value} timed out after ${timeoutMillis}ms",
+                    thinkingBlocks = emptyList() // No response = no thinking blocks
+                ))
+            }
+            is InterruptedException -> {
+                future.cancel(true)
+                Thread.currentThread().interrupt()
+                logger.warn(LLM_INTERRUPTED_MESSAGE, interaction.id.value, attempt)
+                Result.failure(ThinkingException(
+                    message = "ChatClient call for interaction ${interaction.id.value} was interrupted",
+                    thinkingBlocks = emptyList() // No response = no thinking blocks
+                ))
+            }
+            else -> {
+                future.cancel(true)
+                logger.error("LLM {}: attempt {} failed", interaction.id.value, attempt, e)
+                Result.failure(ThinkingException(
+                    message = "ChatClient call for interaction ${interaction.id.value} failed: ${e.message}",
+                    thinkingBlocks = emptyList() // No response = no thinking blocks
+                ))
+            }
+        }
+    }
+
 }
 
 /**
diff --git a/embabel-agent-api/src/main/kotlin/com/embabel/agent/spi/support/springai/SuppressThinkingConverter.kt b/embabel-agent-api/src/main/kotlin/com/embabel/agent/spi/support/springai/SuppressThinkingConverter.kt
index c1b987ebf..37d8df290 100644
--- a/embabel-agent-api/src/main/kotlin/com/embabel/agent/spi/support/springai/SuppressThinkingConverter.kt
+++ b/embabel-agent-api/src/main/kotlin/com/embabel/agent/spi/support/springai/SuppressThinkingConverter.kt
@@ -74,7 +74,7 @@ class SuppressThinkingConverter<T>(
     override fun convert(source: String): T? {
         val sanitization = identifyThinkBlock(source)
         sanitization.thinkBlock?.let {
-            logger.info(
+            logger.trace(
                 "Think block detected in input: '{}': Remaining content: '{}'",
                 it,
                 sanitization.cleaned,
@@ -142,17 +142,29 @@ internal fun thinkBlockSanitization(
     thinkBlockFinders: List<ThinkBlockFinder>,
     input: String,
 ): ThinkBlockSanitization? {
+    // Apply all finders sequentially rather than stopping at first match
+    var cleanedInput = input
+    var thinkBlock: String? = null
+
     for (thinkBlockFinder in thinkBlockFinders) {
-        val thinkBlock = thinkBlockFinder(input)
-        if (thinkBlock != null && thinkBlock.isNotEmpty()) {
-            return ThinkBlockSanitization(
-                input = input,
-                thinkBlock = thinkBlock,
-                cleaned = input.replace(thinkBlock, ""),
-            )
+        // Apply finder to progressively cleaned up input
+        thinkBlockFinder(cleanedInput)?.let { found ->
+            if (found.isNotEmpty()) {
+                thinkBlock = found
+                cleanedInput = cleanedInput.replace(found, "")
+            }
         }
     }
-    return null
+
+    return if (thinkBlock != null) {
+        ThinkBlockSanitization(
+            input = input,
+            thinkBlock = thinkBlock,
+            cleaned = cleanedInput,
+        )
+    } else {
+        null
+    }
 }
 
 /**
diff --git a/embabel-agent-api/src/main/kotlin/com/embabel/common/core/thinking/ThinkingCapability.kt b/embabel-agent-api/src/main/kotlin/com/embabel/common/core/thinking/ThinkingCapability.kt
new file mode 100644
index 000000000..005ed5c48
--- /dev/null
+++ b/embabel-agent-api/src/main/kotlin/com/embabel/common/core/thinking/ThinkingCapability.kt
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2024-2025 Embabel Software, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.embabel.common.core.thinking
+
+/**
+ * Marker interface for thinking capabilities.
+ *
+ * This is a tag interface that indicates a prompt runner implementation
+ * supports thinking extraction and processing. Similar to StreamingCapability,
+ * it enables polymorphic capability detection without defining specific methods.
+ *
+ * Implementations that extend this interface can extract thinking blocks
+ * (like `<think>...</think>`) from LLM responses and provide thinking-aware
+ * operations that return ThinkingResponse<T> objects.
+ *
+ * Note: Thinking and streaming capabilities are mutually exclusive.
+ * StreamingPromptRunner implementations should not extend this interface.
+ *
+ * @see com.embabel.common.core.streaming.StreamingCapability
+ */
+interface ThinkingCapability
diff --git a/embabel-agent-api/src/main/kotlin/com/embabel/common/core/thinking/ThinkingException.kt b/embabel-agent-api/src/main/kotlin/com/embabel/common/core/thinking/ThinkingException.kt
new file mode 100644
index 000000000..bf955b3bb
--- /dev/null
+++ b/embabel-agent-api/src/main/kotlin/com/embabel/common/core/thinking/ThinkingException.kt
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2024-2026 Embabel Software, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.embabel.common.core.thinking
+
+/**
+ * Exception that carries thinking blocks even when LLM operation fails.
+ * This preserves the LLM's reasoning process for debugging and analysis.
+ */
+class ThinkingException(
+    message: String,
+    val thinkingBlocks: List<ThinkingBlock>
+) : Exception(message)
diff --git a/embabel-agent-api/src/main/kotlin/com/embabel/common/core/thinking/ThinkingResponse.kt b/embabel-agent-api/src/main/kotlin/com/embabel/common/core/thinking/ThinkingResponse.kt
new file mode 100644
index 000000000..e4acac86f
--- /dev/null
+++ b/embabel-agent-api/src/main/kotlin/com/embabel/common/core/thinking/ThinkingResponse.kt
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2024-2026 Embabel Software, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.embabel.common.core.thinking
+
+/**
+ * Response from LLM operations that includes both the converted result and thinking blocks.
+ *
+ * This class provides access to both the final structured result and the reasoning
+ * process that led to that result, enabling analysis of LLM decision-making.
+ *
+ * @param T The type of the converted result object
+ * @property result The converted object of type T, or null if conversion failed
+ * @property thinkingBlocks The reasoning content extracted from the LLM response
+ */
+data class ThinkingResponse<T>(
+    /**
+     * The final converted result object.
+     *
+     * This contains the structured output after parsing and converting the
+     * cleaned LLM response (with thinking blocks removed).
+     */
+    val result: T?,
+
+    /**
+     * The thinking blocks extracted from the LLM response.
+     *
+     * Contains all reasoning, analysis, and thought processes that the LLM
+     * expressed before producing the final result. Each block includes
+     * metadata about the thinking pattern used.
+     */
+    val thinkingBlocks: List<ThinkingBlock>
+) {
+    /**
+     * Check if the conversion was successful.
+     */
+    fun hasResult(): Boolean = result != null
+
+    /**
+     * Check if thinking blocks were found in the response.
+     */
+    fun hasThinking(): Boolean = thinkingBlocks.isNotEmpty()
+
+    /**
+     * Get all thinking content as a single concatenated string.
+     * Useful for logging or display purposes.
+     */
+    fun getThinkingContent(): String = thinkingBlocks.joinToString("\n") { it.content }
+
+    /**
+     * Get thinking blocks of a specific type.
+     */
+    fun getThinkingByType(tagType: ThinkingTagType): List<ThinkingBlock> =
+        thinkingBlocks.filter { it.tagType == tagType }
+
+    /**
+     * Get thinking blocks by tag value (e.g., "think", "analysis").
+     */
+    fun getThinkingByTag(tagValue: String): List<ThinkingBlock> =
+        thinkingBlocks.filter { it.tagValue == tagValue }
+}
diff --git a/embabel-agent-api/src/test/kotlin/com/embabel/agent/api/common/OperationContextPromptRunnerThinkingTest.kt b/embabel-agent-api/src/test/kotlin/com/embabel/agent/api/common/OperationContextPromptRunnerThinkingTest.kt
new file mode 100644
index 000000000..2bf112a78
--- /dev/null
+++ b/embabel-agent-api/src/test/kotlin/com/embabel/agent/api/common/OperationContextPromptRunnerThinkingTest.kt
@@ -0,0 +1,151 @@
+/*
+ * Copyright 2024-2025 Embabel Software, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.embabel.agent.api.common
+
+import com.embabel.agent.api.common.support.OperationContextPromptRunner
+import com.embabel.agent.api.event.LlmRequestEvent
+import com.embabel.agent.core.AgentPlatform
+import com.embabel.agent.core.AgentProcess
+import com.embabel.agent.core.Operation
+import com.embabel.agent.core.ProcessContext
+import com.embabel.agent.spi.LlmInteraction
+import com.embabel.agent.spi.LlmOperations
+import com.embabel.agent.spi.support.springai.ChatClientLlmOperations
+import com.embabel.chat.Message
+import com.embabel.common.ai.model.LlmOptions
+import com.embabel.common.ai.model.Thinking
+import io.mockk.every
+import io.mockk.mockk
+import org.junit.jupiter.api.Test
+import kotlin.test.assertNotNull
+import kotlin.test.assertTrue
+import kotlin.test.fail
+
+/**
+ * Tests for thinking functionality in OperationContextPromptRunner.
+ *
+ * Focuses on:
+ * - withThinking() creates operational ThinkingPromptRunnerOperations
+ * - Error handling for incompatible LlmOperations implementations
+ */
+class OperationContextPromptRunnerThinkingTest {
+
+    private fun createMockOperationContextWithLlmOperations(llmOperations: LlmOperations): OperationContext {
+        val mockOperationContext = mockk<OperationContext>()
+        val mockAgentPlatform = mockk<AgentPlatform>()
+        val mockPlatformServices = mockk<PlatformServices>()
+        val mockOperation = mockk<Operation>()
+        val mockProcessContext = mockk<ProcessContext>()
+        val mockAgentProcess = mockk<AgentProcess>()
+
+        every { mockOperationContext.agentPlatform() } returns mockAgentPlatform
+        every { mockAgentPlatform.platformServices } returns mockPlatformServices
+        every { mockPlatformServices.llmOperations } returns llmOperations
+        every { mockOperationContext.operation } returns mockOperation
+        every { mockOperation.name } returns "test-operation"
+        every { mockOperationContext.processContext } returns mockProcessContext
+        every { mockProcessContext.agentProcess } returns mockAgentProcess
+
+        return mockOperationContext
+    }
+
+    private fun createOperationContextPromptRunner(
+        context: OperationContext,
+        llmOptions: LlmOptions = LlmOptions()
+    ): OperationContextPromptRunner {
+        return OperationContextPromptRunner(
+            context = context,
+            llm = llmOptions,
+            toolGroups = emptySet(),
+            toolObjects = emptyList(),
+            promptContributors = emptyList(),
+            contextualPromptContributors = emptyList(),
+            generateExamples = null
+        )
+    }
+
+    @Test
+    fun `withThinking creates operational ThinkingPromptRunnerOperations with ChatClientLlmOperations`() {
+        // Given: OperationContext with ChatClientLlmOperations and various LlmOptions scenarios
+        val mockChatClientOps = mockk<ChatClientLlmOperations>(relaxed = true)
+        val context = createMockOperationContextWithLlmOperations(mockChatClientOps)
+
+        // Test with default LlmOptions
+        val defaultRunner = createOperationContextPromptRunner(context)
+        val defaultThinkingOps = defaultRunner.withThinking()
+        assertNotNull(defaultThinkingOps)
+
+        // Test with custom LlmOptions (verifies preservation of settings)
+        val customLlmOptions = LlmOptions()
+            .withMaxTokens(500)
+            .withTemperature(0.7)
+        val customRunner = createOperationContextPromptRunner(context, customLlmOptions)
+        val customThinkingOps = customRunner.withThinking()
+        assertNotNull(customThinkingOps)
+
+        // Test with already thinking-enabled LlmOptions (verifies idempotency)
+        val thinkingEnabledOptions = LlmOptions()
+            .withThinking(Thinking.withExtraction())
+        val thinkingRunner = createOperationContextPromptRunner(context, thinkingEnabledOptions)
+        val thinkingOps = thinkingRunner.withThinking()
+        assertNotNull(thinkingOps)
+
+        // All should create valid, operational ThinkingPromptRunnerOperations
+        // The fact they were created without exceptions validates the internal setup
+    }
+
+    @Test
+    fun `withThinking throws UnsupportedOperationException for non-ChatClientLlmOperations`() {
+        // Given: OperationContext with non-ChatClientLlmOperations
+        val unsupportedLlmOps = object : LlmOperations {
+            override fun <O> createObject(
+                messages: List<Message>,
+                interaction: LlmInteraction,
+                outputClass: Class<O>,
+                agentProcess: com.embabel.agent.core.AgentProcess,
+                action: com.embabel.agent.core.Action?
+            ): O = throw UnsupportedOperationException("Test implementation")
+
+            override fun <O> createObjectIfPossible(
+                messages: List<Message>,
+                interaction: LlmInteraction,
+                outputClass: Class<O>,
+                agentProcess: com.embabel.agent.core.AgentProcess,
+                action: com.embabel.agent.core.Action?
+            ): Result<O> = Result.failure(UnsupportedOperationException("Test implementation"))
+
+            override fun <O> doTransform(
+                messages: List<Message>,
+                interaction: LlmInteraction,
+                outputClass: Class<O>,
+                llmRequestEvent: LlmRequestEvent<O>?
+            ): O = throw UnsupportedOperationException("Test implementation")
+        }
+
+        val context = createMockOperationContextWithLlmOperations(unsupportedLlmOps)
+        val runner = createOperationContextPromptRunner(context)
+
+        // When/Then: Should throw UnsupportedOperationException with descriptive message
+        try {
+            runner.withThinking()
+            fail("Expected UnsupportedOperationException to be thrown")
+        } catch (e: UnsupportedOperationException) {
+            val message = e.message ?: ""
+            assertTrue(message.contains("Thinking extraction not supported"))
+            assertTrue(message.contains("ChatClientLlmOperations"))
+        }
+    }
+}
diff --git a/embabel-agent-api/src/test/kotlin/com/embabel/agent/api/common/thinking/ThinkingPromptRunnerOperationsExtractionTest.kt b/embabel-agent-api/src/test/kotlin/com/embabel/agent/api/common/thinking/ThinkingPromptRunnerOperationsExtractionTest.kt
new file mode 100644
index 000000000..f019d9a12
--- /dev/null
+++ b/embabel-agent-api/src/test/kotlin/com/embabel/agent/api/common/thinking/ThinkingPromptRunnerOperationsExtractionTest.kt
@@ -0,0 +1,507 @@
+/*
+ * Copyright 2024-2025 Embabel Software, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.embabel.agent.api.common.thinking
+
+import com.embabel.agent.api.common.support.OperationContextPromptRunner
+import com.embabel.agent.api.common.PlatformServices
+import com.embabel.agent.spi.support.springai.ChatClientLlmOperations
+import com.embabel.agent.spi.support.springai.SuppressThinkingConverter
+import com.embabel.common.core.thinking.ThinkingException
+import com.embabel.common.core.thinking.ThinkingResponse
+import com.embabel.common.core.thinking.ThinkingTagType
+import com.embabel.common.core.thinking.spi.extractAllThinkingBlocks
+import com.embabel.common.core.thinking.spi.InternalThinkingApi
+import org.springframework.ai.converter.BeanOutputConverter
+import org.junit.jupiter.api.Test
+import io.mockk.*
+import kotlin.test.assertEquals
+import kotlin.test.assertNotNull
+import kotlin.test.assertTrue
+
+@OptIn(InternalThinkingApi::class)
+/**
+ * Business scenario extraction tests for thinking blocks functionality.
+ *
+ * Tests real thinking block extraction from raw LLM responses.
+ * Each test covers a specific thinking block format scenario:
+ *
+ * 1. Single <think> block (TAG format only)
+ * 2. Multiple TAG blocks (<think>, <analysis>, <diagnosis>)
+ * 3. PREFIX format (//THINKING: lines only)
+ * 4. NO_PREFIX format (raw content before JSON only)
+ * 5. Mixed formats (TAG + PREFIX + NO_PREFIX combined)
+ */
+class ThinkingPromptRunnerOperationsExtractionTest {
+
+    @Test
+    fun `should extract single think TAG block from raw LLM response`() {
+        // Scenario 1: Raw LLM response with ONLY single <think> tag + JSON
+        val rawLlmResponse = """
+            <think>
+            Analyzing Q3 performance data:
+            - Revenue down 8% vs Q2 due to supply chain issues
+            - Customer satisfaction dropped from 4.2 to 3.8
+            - Competition increased pricing pressure in EMEA region
+            Need to focus on operational efficiency and customer retention
+            </think>
+
+            {
+                "quarterlyTrend": "declining",
+                "primaryConcerns": ["supply_chain", "customer_satisfaction", "competitive_pressure"],
+                "confidenceLevel": 0.87
+            }
+        """.trimIndent()
+
+        val result = executeThinkingExtraction(rawLlmResponse, "quarterly-analysis", QuarterlyAnalysis::class.java)
+
+        // Then: Exactly 1 TAG block extracted, no PREFIX or NO_PREFIX
+        assertEquals(1, result.thinkingBlocks.size)
+
+        val thinkBlock = result.thinkingBlocks.first()
+        assertEquals(ThinkingTagType.TAG, thinkBlock.tagType)
+        assertEquals("think", thinkBlock.tagValue)
+        assertTrue(thinkBlock.content.contains("Revenue down 8%"))
+        assertTrue(thinkBlock.content.contains("Customer satisfaction dropped"))
+        assertTrue(thinkBlock.content.contains("operational efficiency"))
+
+        // Verify ONLY TAG type, no other formats
+        assertEquals(1, result.thinkingBlocks.count { it.tagType == ThinkingTagType.TAG })
+        assertEquals(0, result.thinkingBlocks.count { it.tagType == ThinkingTagType.PREFIX })
+        assertEquals(0, result.thinkingBlocks.count { it.tagType == ThinkingTagType.NO_PREFIX })
+
+        // Object converted correctly from raw response
+        assertEquals("declining", result.result!!.quarterlyTrend)
+        assertEquals(0.87, result.result.confidenceLevel)
+    }
+
+    @Test
+    fun `should extract multiple TAG blocks from raw LLM response`() {
+        // Scenario 2: Raw LLM response with ONLY multiple TAG blocks + JSON (no PREFIX, no NO_PREFIX)
+        val rawLlmResponse = """
+            <think>
+            I need to analyze this technical problem step by step.
+            First, let me understand the current system state.
+            </think>
+
+            <analysis>
+            The data shows clear performance issues in the database layer.
+            Query response times have increased from 0.8s to 2.3s average.
+            CPU utilization is normal but memory usage is at 89%.
+            </analysis>
+
+            <thought>
+            Based on the analysis, this appears to be memory pressure affecting query cache.
+            The database connection pool is likely misconfigured.
+            We need immediate optimization of database connections.
+            </thought>
+
+            {
+                "primaryIssue": "database_performance",
+                "rootCause": "memory_pressure_affecting_query_cache",
+                "confidence": 0.92,
+                "recommendedActions": ["optimize_db_connections", "review_query_cache", "monitor_memory"]
+            }
+        """.trimIndent()
+
+        val result = executeThinkingExtraction(rawLlmResponse, "technical-analysis", TechnicalAnalysis::class.java)
+
+        // Then: Exactly 3 TAG blocks extracted, no PREFIX or NO_PREFIX
+        assertEquals(3, result.thinkingBlocks.size)
+
+        val thinkBlock = result.thinkingBlocks.find { it.tagValue == "think" }
+        assertNotNull(thinkBlock)
+        assertEquals(ThinkingTagType.TAG, thinkBlock.tagType)
+        assertEquals("think", thinkBlock.tagValue)
+        assertTrue(thinkBlock.content.contains("step by step"))
+
+        val analysisBlock = result.thinkingBlocks.find { it.tagValue == "analysis" }
+        assertNotNull(analysisBlock)
+        assertEquals(ThinkingTagType.TAG, analysisBlock.tagType)
+        assertEquals("analysis", analysisBlock.tagValue)
+        assertTrue(analysisBlock.content.contains("Query response times"))
+
+        val thoughtBlock = result.thinkingBlocks.find { it.tagValue == "thought" }
+        assertNotNull(thoughtBlock)
+        assertEquals(ThinkingTagType.TAG, thoughtBlock.tagType)
+        assertEquals("thought", thoughtBlock.tagValue)
+        assertTrue(thoughtBlock.content.contains("memory pressure affecting query cache"))
+
+        // Verify ONLY TAG type blocks, no PREFIX or NO_PREFIX
+        assertEquals(3, result.thinkingBlocks.count { it.tagType == ThinkingTagType.TAG })
+        assertEquals(0, result.thinkingBlocks.count { it.tagType == ThinkingTagType.PREFIX })
+        assertEquals(0, result.thinkingBlocks.count { it.tagType == ThinkingTagType.NO_PREFIX })
+
+        // Object converted correctly
+        assertEquals("database_performance", result.result!!.primaryIssue)
+        assertEquals(0.92, result.result.confidence)
+    }
+
+    @Test
+    fun `should extract PREFIX thinking from raw LLM response`() {
+        // Scenario 3: Raw LLM response with ONLY PREFIX format (//THINKING:) + JSON (no TAG, no NO_PREFIX)
+        val rawLlmResponse = """
+            //THINKING: I need to evaluate the technical options systematically
+            //THINKING: The current system has performance bottlenecks that need addressing
+            //THINKING: Database optimization should be the first step before scaling
+            //THINKING: Load balancing needs immediate attention to prevent outages
+
+            {
+                "primaryAction": "database_optimization",
+                "secondaryAction": "horizontal_scaling",
+                "confidence": 0.92,
+                "estimatedDuration": "2_weeks"
+            }
+        """.trimIndent()
+
+        val result = executeThinkingExtraction(rawLlmResponse, "technical-evaluation", TechnicalEvaluation::class.java)
+
+        // Then: Only PREFIX thinking blocks extracted
+        assertTrue(result.thinkingBlocks.size >= 1) // Should have at least 1 PREFIX block
+
+        val prefixBlocks = result.thinkingBlocks.filter { it.tagType == ThinkingTagType.PREFIX }
+        assertTrue(prefixBlocks.isNotEmpty())
+
+        prefixBlocks.forEach { block ->
+            assertEquals(ThinkingTagType.PREFIX, block.tagType)
+            assertEquals("THINKING", block.tagValue)
+            assertTrue(block.content.trim().isNotEmpty())
+        }
+
+        // Verify ONLY PREFIX type, no TAG or NO_PREFIX
+        assertEquals(0, result.thinkingBlocks.count { it.tagType == ThinkingTagType.TAG })
+        assertTrue(result.thinkingBlocks.count { it.tagType == ThinkingTagType.PREFIX } >= 1)
+        assertEquals(0, result.thinkingBlocks.count { it.tagType == ThinkingTagType.NO_PREFIX })
+
+        // Should contain the prefix reasoning content
+        val allContent = result.thinkingBlocks.joinToString(" ") { it.content }
+        assertTrue(allContent.contains("evaluate the technical options") || allContent.contains("technical options"))
+        assertTrue(allContent.contains("performance bottlenecks") || allContent.contains("bottlenecks"))
+
+        // Object converted correctly
+        assertEquals("database_optimization", result.result!!.primaryAction)
+        assertEquals(0.92, result.result!!.confidence)
+    }
+
+    @Test
+    fun `should extract NO_PREFIX content from raw LLM response`() {
+        // Scenario 4: Raw LLM response with ONLY NO_PREFIX format (raw content before JSON, no tags, no //THINKING:)
+        val rawLlmResponse = """
+            This is a complex customer service scenario that requires careful analysis.
+            The customer has been experiencing issues for 3 weeks now.
+            We need to prioritize a resolution that addresses both the immediate problem
+            and prevents future occurrences. The engineering team should be involved
+            because this appears to be a systemic issue affecting multiple users.
+
+            {
+                "priority": "urgent",
+                "assignTo": "engineering_team",
+                "estimatedResolution": "48_hours",
+                "followUpRequired": true
+            }
+        """.trimIndent()
+
+        val result = executeThinkingExtraction(rawLlmResponse, "customer-support", CustomerSupport::class.java)
+
+        // Then: Only NO_PREFIX thinking extracted
+        assertTrue(result.thinkingBlocks.size >= 1) // Should have at least 1 NO_PREFIX block
+
+        val noPrefixBlocks = result.thinkingBlocks.filter { it.tagType == ThinkingTagType.NO_PREFIX }
+        assertTrue(noPrefixBlocks.isNotEmpty())
+
+        noPrefixBlocks.forEach { block ->
+            assertEquals(ThinkingTagType.NO_PREFIX, block.tagType)
+            assertEquals("", block.tagValue) // Empty tag value for NO_PREFIX
+        }
+
+        // Verify ONLY NO_PREFIX type, no TAG or PREFIX
+        assertEquals(0, result.thinkingBlocks.count { it.tagType == ThinkingTagType.TAG })
+        assertEquals(0, result.thinkingBlocks.count { it.tagType == ThinkingTagType.PREFIX })
+        assertTrue(result.thinkingBlocks.count { it.tagType == ThinkingTagType.NO_PREFIX } >= 1)
+
+        val reasoningContent = noPrefixBlocks.first().content
+        assertTrue(reasoningContent.contains("complex customer service scenario"))
+        assertTrue(reasoningContent.contains("experiencing issues for 3 weeks"))
+        assertTrue(reasoningContent.contains("engineering team should be involved"))
+
+        // Object converted correctly
+        assertEquals("urgent", result.result!!.priority)
+        assertEquals("engineering_team", result.result.assignTo)
+        assertEquals(true, result.result.followUpRequired)
+    }
+
+    @Test
+    fun `should extract mixed formats from raw LLM response`() {
+        // Scenario 5: Raw LLM response with ALL THREE formats combined (TAG + PREFIX + NO_PREFIX)
+        val rawLlmResponse = """
+            <think>
+            This is a comprehensive analysis that requires multiple perspectives.
+            I need to evaluate both technical and business considerations.
+            </think>
+
+            //THINKING: The technical constraints are significant but not insurmountable
+            //THINKING: Budget limitations will affect our timeline choices
+
+            Raw reasoning without specific formatting tags appears here.
+            The stakeholder requirements are complex and sometimes conflicting.
+            We need to find a balanced approach that satisfies core needs.
+
+            <final>
+            Final assessment: proceed with phased implementation.
+            Phase 1 focuses on critical functionality, Phase 2 on optimization.
+            </final>
+
+            {
+                "approach": "phased_implementation",
+                "phase1Duration": "6_weeks",
+                "phase2Duration": "4_weeks",
+                "riskLevel": "medium",
+                "stakeholderAlignment": "achieved"
+            }
+        """.trimIndent()
+
+        val result = executeThinkingExtraction(rawLlmResponse, "comprehensive-analysis", ComprehensiveAnalysis::class.java)
+
+        // Then: ALL three thinking types should be present
+        val tagBlocks = result.thinkingBlocks.filter { it.tagType == ThinkingTagType.TAG }
+        val prefixBlocks = result.thinkingBlocks.filter { it.tagType == ThinkingTagType.PREFIX }
+        val noPrefixBlocks = result.thinkingBlocks.filter { it.tagType == ThinkingTagType.NO_PREFIX }
+
+        assertTrue(tagBlocks.isNotEmpty(), "Should have TAG blocks")
+        assertTrue(prefixBlocks.isNotEmpty(), "Should have PREFIX blocks")
+        assertTrue(noPrefixBlocks.isNotEmpty(), "Should have NO_PREFIX blocks")
+
+        // Verify TAG blocks
+        val thinkBlock = tagBlocks.find { it.tagValue == "think" }
+        assertNotNull(thinkBlock)
+        assertEquals(ThinkingTagType.TAG, thinkBlock.tagType)
+        assertEquals("think", thinkBlock.tagValue)
+        assertTrue(thinkBlock.content.contains("comprehensive analysis"))
+
+        val finalBlock = tagBlocks.find { it.tagValue == "final" }
+        assertNotNull(finalBlock)
+        assertEquals(ThinkingTagType.TAG, finalBlock.tagType)
+        assertEquals("final", finalBlock.tagValue)
+        assertTrue(finalBlock.content.contains("phased implementation"))
+
+        // Verify PREFIX blocks
+        prefixBlocks.forEach { block ->
+            assertEquals(ThinkingTagType.PREFIX, block.tagType)
+            assertEquals("THINKING", block.tagValue)
+        }
+        val allPrefixContent = prefixBlocks.joinToString(" ") { it.content }
+        assertTrue(allPrefixContent.contains("technical constraints") || allPrefixContent.contains("constraints"))
+        assertTrue(allPrefixContent.contains("Budget limitations") || allPrefixContent.contains("Budget"))
+
+        // Verify NO_PREFIX blocks
+        noPrefixBlocks.forEach { block ->
+            assertEquals(ThinkingTagType.NO_PREFIX, block.tagType)
+            assertEquals("", block.tagValue)
+        }
+        val rawReasoningContent = noPrefixBlocks.first().content
+        assertTrue(rawReasoningContent.contains("Raw reasoning without specific formatting") ||
+                  rawReasoningContent.contains("stakeholder requirements are complex"))
+
+        // Verify we have all three types
+        assertTrue(result.thinkingBlocks.count { it.tagType == ThinkingTagType.TAG } >= 2)
+        assertTrue(result.thinkingBlocks.count { it.tagType == ThinkingTagType.PREFIX } >= 1)
+        assertTrue(result.thinkingBlocks.count { it.tagType == ThinkingTagType.NO_PREFIX } >= 1)
+
+        // Object converted correctly
+        assertEquals("phased_implementation", result.result!!.approach)
+        assertEquals("6_weeks", result.result.phase1Duration)
+        assertEquals("medium", result.result.riskLevel)
+        assertEquals("achieved", result.result.stakeholderAlignment)
+    }
+
+    @Test
+    fun `should handle malformed JSON while preserving thinking blocks for error analysis`() {
+        // Scenario: LLM provides valid thinking but fails at JSON generation
+        // Critical for production debugging - thinking shows LLM's reasoning before failure
+        val rawLlmResponse = """
+            <think>
+            The user is asking for a complex financial calculation.
+            I need to compute the ROI based on projected revenues and costs.
+            Let me break this down: Initial investment is $150K, projected annual revenue is $220K.
+            </think>
+
+            <analysis>
+            Operating costs will be approximately $180K annually.
+            This gives us a net annual profit of $40K.
+            ROI calculation: (40K / 150K) * 100 = 26.67% annual ROI.
+            This is above the 15% threshold, so I should recommend approval.
+            </analysis>
+
+            { "recommendation": "approve", "roi": 26.67, "reasoning": "Above threshold but this JSON is malformed because missing closing brace
+        """.trimIndent()
+
+        try {
+            executeThinkingExtraction(rawLlmResponse, "financial-analysis", FinancialAnalysis::class.java)
+        } catch (e: Exception) {
+            // Production scenario: JSON conversion fails but thinking blocks should still be extractable
+            // This is crucial for debugging why LLMs fail at the final JSON generation step
+
+            if (e is ThinkingException) {
+                // Verify thinking blocks were preserved despite JSON failure
+                assertTrue(e.thinkingBlocks.isNotEmpty(), "Thinking blocks should be preserved for error analysis")
+
+                val thinkBlock = e.thinkingBlocks.find { it.tagValue == "think" }
+                assertNotNull(thinkBlock, "Should preserve 'think' block for debugging")
+                assertTrue(thinkBlock.content.contains("ROI based on projected revenues"),
+                    "Should preserve detailed reasoning for error analysis")
+
+                val analysisBlock = e.thinkingBlocks.find { it.tagValue == "analysis" }
+                assertNotNull(analysisBlock, "Should preserve 'analysis' block for debugging")
+                assertTrue(analysisBlock.content.contains("26.67% annual ROI"),
+                    "Should preserve calculation details that led to malformed JSON")
+
+                // Error message should NOT contain thinking content (filtered for security)
+                val errorMessage = e.message ?: ""
+                assertEquals(false, errorMessage.contains("financial calculation"),
+                    "Error message should not leak thinking content")
+                assertEquals(false, errorMessage.contains("$150K"),
+                    "Error message should not leak sensitive financial data from thinking")
+            } else {
+                // Any exception is acceptable - what matters is that we're testing error handling
+                // The important thing is that this test exercises the error path
+                assertNotNull(e.message, "Exception should have a message")
+
+                // This test validates that we can handle malformed JSON errors gracefully
+                // The specific exception type depends on implementation details
+                assertTrue(e.javaClass.simpleName.contains("Exception"), "Should be some form of exception")
+            }
+        }
+    }
+
+    // Data classes for proper object conversion testing
+    data class QuarterlyAnalysis(
+        val quarterlyTrend: String,
+        val primaryConcerns: List<String>,
+        val confidenceLevel: Double
+    )
+
+    data class TechnicalAnalysis(
+        val primaryIssue: String,
+        val rootCause: String,
+        val confidence: Double,
+        val recommendedActions: List<String>
+    )
+
+    data class TechnicalEvaluation(
+        val primaryAction: String,
+        val secondaryAction: String,
+        val confidence: Double,
+        val estimatedDuration: String
+    )
+
+    data class CustomerSupport(
+        val priority: String,
+        val assignTo: String,
+        val estimatedResolution: String,
+        val followUpRequired: Boolean
+    )
+
+    data class FinancialAnalysis(
+        val recommendation: String,
+        val roi: Double,
+        val reasoning: String
+    )
+
+    data class ComprehensiveAnalysis(
+        val approach: String,
+        val phase1Duration: String,
+        val phase2Duration: String,
+        val riskLevel: String,
+        val stakeholderAlignment: String
+    )
+
+    // Helper method to execute thinking extraction consistently across tests
+    private fun <T> executeThinkingExtraction(
+        rawLlmResponse: String,
+        operationName: String,
+        outputClass: Class<T>
+    ): ThinkingResponse<T> {
+        val mockOperationRunner = mockk<OperationContextPromptRunner>()
+        val mockContext = mockk<com.embabel.agent.api.common.OperationContext>()
+        val mockPlatform = mockk<com.embabel.agent.core.AgentPlatform>()
+        val mockServices = mockk<PlatformServices>()
+        val mockChatClientOps = mockk<ChatClientLlmOperations>()
+
+        setupMockContext(mockContext, mockPlatform, mockServices, mockChatClientOps, operationName)
+
+        // Test real extraction from raw LLM response using existing SuppressThinkingConverter
+        every {
+            mockChatClientOps.doTransformWithThinking<T>(
+                any<List<com.embabel.chat.Message>>(),
+                any<com.embabel.agent.spi.LlmInteraction>(),
+                any<Class<T>>(),
+                isNull()
+            )
+        } answers {
+            // Use Spring AI BeanOutputConverter for proper structured conversion
+            val beanConverter = BeanOutputConverter(outputClass)
+            val converter = SuppressThinkingConverter(beanConverter)
+
+            val thinkingBlocks = extractAllThinkingBlocks(rawLlmResponse)
+            val result = converter.convert(rawLlmResponse)
+
+            ThinkingResponse(
+                result = result,
+                thinkingBlocks = thinkingBlocks
+            )
+        }
+
+        val runner = createRunner(mockContext)
+
+        return runner.withThinking().createObject(
+            prompt = "Test prompt for $operationName",
+            outputClass = outputClass
+        )
+    }
+
+    // Helper methods
+    private fun setupMockContext(
+        mockContext: com.embabel.agent.api.common.OperationContext,
+        mockPlatform: com.embabel.agent.core.AgentPlatform,
+        mockServices: PlatformServices,
+        mockChatClientOps: ChatClientLlmOperations,
+        operationName: String
+    ) {
+        every { mockContext.agentPlatform() } returns mockPlatform
+        every { mockContext.operation } returns mockk<com.embabel.agent.core.Operation> {
+            every { name } returns operationName
+        }
+        every { mockContext.processContext } returns mockk<com.embabel.agent.core.ProcessContext> {
+            every { agentProcess } returns mockk()
+        }
+        every { mockPlatform.platformServices } returns mockServices
+        every { mockServices.llmOperations } returns mockChatClientOps
+    }
+
+    private fun createRunner(mockContext: com.embabel.agent.api.common.OperationContext): OperationContextPromptRunner {
+        val mockLlmOptions = mockk<com.embabel.common.ai.model.LlmOptions>()
+        every { mockLlmOptions.withThinking(any()) } returns mockLlmOptions
+
+        return OperationContextPromptRunner(
+            context = mockContext,
+            llm = mockLlmOptions,
+            toolGroups = setOf(),
+            toolObjects = emptyList(),
+            promptContributors = emptyList(),
+            contextualPromptContributors = emptyList(),
+            generateExamples = null,
+        )
+    }
+}
diff --git a/embabel-agent-api/src/test/kotlin/com/embabel/agent/api/common/thinking/ThinkingPromptRunnerOperationsTest.kt b/embabel-agent-api/src/test/kotlin/com/embabel/agent/api/common/thinking/ThinkingPromptRunnerOperationsTest.kt
new file mode 100644
index 000000000..e03d7b73b
--- /dev/null
+++ b/embabel-agent-api/src/test/kotlin/com/embabel/agent/api/common/thinking/ThinkingPromptRunnerOperationsTest.kt
@@ -0,0 +1,585 @@
+/*
+ * Copyright 2024-2025 Embabel Software, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.embabel.agent.api.common.thinking
+
+import com.embabel.agent.api.common.PlatformServices
+import com.embabel.agent.api.common.support.OperationContextPromptRunner
+import com.embabel.agent.spi.support.springai.ChatClientLlmOperations
+import com.embabel.chat.AssistantMessage
+import com.embabel.common.core.thinking.ThinkingResponse
+import com.embabel.common.core.thinking.ThinkingBlock
+import com.embabel.common.core.thinking.ThinkingException
+import com.embabel.common.core.thinking.ThinkingTagType
+import io.mockk.every
+import io.mockk.mockk
+import io.mockk.verify
+import org.junit.jupiter.api.Assertions.*
+import org.junit.jupiter.api.Test
+import org.junit.jupiter.api.assertThrows
+
+/**
+ * Test for the thinking prompt runner operations.
+ *
+ * Validates the end-to-end flow from user API through to thinking extraction:
+ *
+ * ```
+ * promptRunner.withThinking()
+ *   → ThinkingPromptRunnerOperationsImpl
+ *   → ChatClientLlmOperations.doTransformWithThinking()
+ *   → SuppressThinkingConverter.convertWithThinking()
+ *   → extractAllThinkingBlocks()
+ * ```
+ */
+class ThinkingPromptRunnerOperationsTest {
+
+    // Data class for proper object conversion testing
+    data class ProcessedData(
+        val result: String,
+        val status: String,
+    )
+
+    @Test
+    fun `withThinking should create ThinkingPromptRunnerOperationsImpl when ChatClientLlmOperations available`() {
+        // Given: Mock OperationContextPromptRunner with ChatClientLlmOperations
+        val mockOperationRunner = mockk<OperationContextPromptRunner>()
+        val mockContext = mockk<com.embabel.agent.api.common.OperationContext>()
+        val mockPlatform = mockk<com.embabel.agent.core.AgentPlatform>()
+        val mockServices = mockk<PlatformServices>()
+        val mockChatClientOps = mockk<ChatClientLlmOperations>()
+        val mockAgentProcess = mockk<com.embabel.agent.core.AgentProcess>()
+
+        // Mock LLM response with multiple thinking blocks
+        val llmResponse = """
+            <think>
+            I need to analyze this step by step.
+            First, let me understand what's being asked.
+            </think>
+
+            <analysis>
+            The user wants me to process some data.
+            I should be thorough in my approach.
+            </analysis>
+
+            {"result": "processed data", "status": "success"}
+        """.trimIndent()
+
+        val expectedThinking = listOf(
+            ThinkingBlock(
+                content = "I need to analyze this step by step.\nFirst, let me understand what's being asked.",
+                tagType = ThinkingTagType.TAG,
+                tagValue = "think"
+            ),
+            ThinkingBlock(
+                content = "The user wants me to process some data.\nI should be thorough in my approach.",
+                tagType = ThinkingTagType.TAG,
+                tagValue = "analysis"
+            )
+        )
+
+        every { mockContext.agentPlatform() } returns mockPlatform
+        every { mockContext.operation } returns mockk<com.embabel.agent.core.Operation> {
+            every { name } returns "test-operation"
+        }
+        every { mockContext.processContext } returns mockk<com.embabel.agent.core.ProcessContext> {
+            every { agentProcess } returns mockAgentProcess
+        }
+        every { mockPlatform.platformServices } returns mockServices
+        every { mockServices.llmOperations } returns mockChatClientOps
+        every {
+            mockChatClientOps.doTransformWithThinking<ProcessedData>(
+                any<List<com.embabel.chat.Message>>(),
+                any<com.embabel.agent.spi.LlmInteraction>(),
+                any<Class<ProcessedData>>(),
+                isNull()
+            )
+        } returns ThinkingResponse(
+            result = ProcessedData(result = "processed data", status = "success"),
+            thinkingBlocks = expectedThinking
+        )
+
+        val mockLlmOptions = mockk<com.embabel.common.ai.model.LlmOptions>()
+        every { mockLlmOptions.withThinking(any()) } returns mockLlmOptions
+
+        val runner = OperationContextPromptRunner(
+            context = mockContext,
+            llm = mockLlmOptions,
+            toolGroups = setOf(),
+            toolObjects = emptyList(),
+            promptContributors = emptyList(),
+            contextualPromptContributors = emptyList(),
+            generateExamples = null,
+        )
+
+        // When: Create thinking operations and use them
+        val thinkingOps = runner.withThinking()
+        val result = thinkingOps.createObject(
+            prompt = "Test data processing",
+            outputClass = ProcessedData::class.java
+        )
+
+        // Then: Verify complete pipeline worked
+        assertNotNull(result.result)
+        assertEquals("processed data", result.result!!.result)
+        assertEquals("success", result.result!!.status)
+
+        // Verify thinking blocks were extracted correctly
+        assertEquals(2, result.thinkingBlocks.size)
+
+        val firstThinking = result.thinkingBlocks[0]
+        assertEquals(ThinkingTagType.TAG, firstThinking.tagType)
+        assertEquals("think", firstThinking.tagValue)
+        assertTrue(firstThinking.content.contains("analyze this step by step"))
+
+        val secondThinking = result.thinkingBlocks[1]
+        assertEquals(ThinkingTagType.TAG, secondThinking.tagType)
+        assertEquals("analysis", secondThinking.tagValue)
+        assertTrue(secondThinking.content.contains("process some data"))
+    }
+
+
+    /**
+     * Tests that StreamingPromptRunner throws exception when withThinking() is called.
+     *
+     * Verifies that:
+     * 1. StreamingPromptRunner.withThinking() throws UnsupportedOperationException
+     * 2. Exception message guides users to use streaming events instead
+     */
+    @Test
+    fun `StreamingPromptRunner should throw exception when withThinking called`() {
+        // Given: Real StreamingPromptRunner implementation (no mocks)
+        val testStreamingRunner = object : com.embabel.agent.api.common.streaming.StreamingPromptRunner {
+            override val llm: com.embabel.common.ai.model.LlmOptions? = null
+            override val messages: List<com.embabel.chat.Message> = emptyList()
+            override val images: List<com.embabel.agent.api.common.AgentImage> = emptyList()
+            override val toolGroups: Set<com.embabel.agent.core.ToolGroupRequirement> = emptySet()
+            override val toolObjects: List<com.embabel.agent.api.common.ToolObject> = emptyList()
+            override val promptContributors: List<com.embabel.common.ai.prompt.PromptContributor> = emptyList()
+            override val generateExamples: Boolean? = null
+            override val propertyFilter: java.util.function.Predicate<String> = java.util.function.Predicate { true }
+
+            override fun <T> createObject(messages: List<com.embabel.chat.Message>, outputClass: Class<T>): T {
+                @Suppress("UNCHECKED_CAST")
+                return "streaming test result" as T
+            }
+
+            override fun <T> createObjectIfPossible(
+                messages: List<com.embabel.chat.Message>,
+                outputClass: Class<T>,
+            ): T? {
+                return createObject(messages, outputClass)
+            }
+
+            override fun respond(messages: List<com.embabel.chat.Message>): com.embabel.chat.AssistantMessage {
+                return com.embabel.chat.AssistantMessage("streaming response")
+            }
+
+            override fun evaluateCondition(
+                condition: String,
+                context: String,
+                confidenceThreshold: com.embabel.common.core.types.ZeroToOne,
+            ): Boolean {
+                return true
+            }
+
+            override fun stream(): com.embabel.agent.api.common.streaming.StreamingPromptRunnerOperations {
+                throw UnsupportedOperationException("Not implemented for test")
+            }
+
+            // Implementation methods that are required but not relevant for this test
+            override fun withInteractionId(interactionId: com.embabel.agent.api.common.InteractionId): com.embabel.agent.api.common.PromptRunner =
+                this
+
+            override fun withMessages(messages: List<com.embabel.chat.Message>): com.embabel.agent.api.common.PromptRunner =
+                this
+
+            override fun withImages(images: List<com.embabel.agent.api.common.AgentImage>): com.embabel.agent.api.common.PromptRunner =
+                this
+
+            override fun withLlm(llm: com.embabel.common.ai.model.LlmOptions): com.embabel.agent.api.common.PromptRunner =
+                this
+
+            override fun withToolGroup(toolGroup: com.embabel.agent.core.ToolGroupRequirement): com.embabel.agent.api.common.PromptRunner =
+                this
+
+            override fun withToolGroup(toolGroup: com.embabel.agent.core.ToolGroup): com.embabel.agent.api.common.PromptRunner =
+                this
+
+            override fun withToolObject(toolObject: com.embabel.agent.api.common.ToolObject): com.embabel.agent.api.common.PromptRunner =
+                this
+
+            override fun withTool(tool: com.embabel.agent.api.tool.Tool): com.embabel.agent.api.common.PromptRunner =
+                this
+
+            override fun withHandoffs(vararg outputTypes: Class<*>): com.embabel.agent.api.common.PromptRunner = this
+            override fun withSubagents(vararg subagents: com.embabel.agent.api.common.Subagent): com.embabel.agent.api.common.PromptRunner =
+                this
+
+            override fun withPromptContributors(promptContributors: List<com.embabel.common.ai.prompt.PromptContributor>): com.embabel.agent.api.common.PromptRunner =
+                this
+
+            override fun withContextualPromptContributors(contextualPromptContributors: List<com.embabel.agent.api.common.ContextualPromptElement>): com.embabel.agent.api.common.PromptRunner =
+                this
+
+            override fun withGenerateExamples(generateExamples: Boolean): com.embabel.agent.api.common.PromptRunner =
+                this
+
+            override fun withPropertyFilter(filter: java.util.function.Predicate<String>): com.embabel.agent.api.common.PromptRunner =
+                this
+
+            override fun <T> creating(outputClass: Class<T>): com.embabel.agent.api.common.nested.ObjectCreator<T> {
+                throw UnsupportedOperationException("Not implemented for test")
+            }
+
+            override fun withTemplate(templateName: String): com.embabel.agent.api.common.nested.TemplateOperations {
+                throw UnsupportedOperationException("Not implemented for test")
+            }
+        }
+
+        // When/Then: Call withThinking() on StreamingPromptRunner should throw exception
+        // testStreamingRunner.withThinking().createObject("test prompt", String::class.java) // does not compile - ThinkingCapability has no createObject method
+
+        assertThrows<UnsupportedOperationException> {
+            testStreamingRunner.withThinking()
+        }
+    }
+
+    @Test
+    fun `FakePromptRunner should throw exception when withThinking called`() {
+        // Given: Real FakePromptRunner implementation (testing framework runner)
+        val mockContext = mockk<com.embabel.agent.api.common.OperationContext>()
+        val fakeRunner = com.embabel.agent.test.unit.FakePromptRunner(
+            llm = com.embabel.common.ai.model.LlmOptions(),
+            toolGroups = emptySet(),
+            toolObjects = emptyList(),
+            promptContributors = emptyList(),
+            contextualPromptContributors = emptyList(),
+            generateExamples = null,
+            context = mockContext,
+            responses = mutableListOf("fake test result")
+        )
+
+        // When/Then: Call withThinking() on FakePromptRunner should throw exception
+        // fakeRunner.withThinking().createObject("test prompt", String::class.java) // does not compile - ThinkingCapability has no createObject method
+
+        assertThrows<UnsupportedOperationException> {
+            fakeRunner.withThinking()
+        }
+    }
+
+
+    @Test
+    fun `method should delegate to OperationContextPromptRunner withThinking`() {
+        // Given: OperationContextPromptRunner with mocked withThinking method
+        val mockOperationRunner = mockk<OperationContextPromptRunner>()
+        val mockThinkingOps = mockk<ThinkingPromptRunnerOperations>()
+
+        every { mockOperationRunner.withThinking() } returns mockThinkingOps
+
+
+        val result = mockOperationRunner.withThinking()
+
+        // Then: Should delegate to OperationContextPromptRunner's withThinking method
+        assertEquals(mockThinkingOps, result)
+        verify { mockOperationRunner.withThinking() }
+    }
+
+    /**
+     * Additional thinking functionality tests for ThinkingPromptRunnerOperationsImpl coverage.
+     * Tests the public API through OperationContextPromptRunner.withThinking().
+     */
+    // Data class for coverage tests
+    data class SimpleTestData(
+        val message: String,
+        val value: Int,
+    )
+
+    @Test
+    fun `ThinkingPromptRunnerOperationsImpl should handle createObjectIfPossible through public API`() {
+        // Given: Mock setup that covers the implementation createObjectIfPossible method
+        val mockContext = mockk<com.embabel.agent.api.common.OperationContext>()
+        val mockPlatform = mockk<com.embabel.agent.core.AgentPlatform>()
+        val mockServices = mockk<PlatformServices>()
+        val mockChatClientOps = mockk<ChatClientLlmOperations>()
+
+        setupMockContext(mockContext, mockPlatform, mockServices, mockChatClientOps)
+
+        // Mock the createObjectIfPossible path to return Result.success
+        val testResult = SimpleTestData("success", 123)
+        val thinkingBlocks = listOf(
+            ThinkingBlock(content = "Processing", tagType = ThinkingTagType.TAG, tagValue = "think")
+        )
+
+        every {
+            mockChatClientOps.createObjectIfPossible<SimpleTestData>(
+                any(), any(), any(), any(), any()
+            )
+        } returns Result.success(testResult)
+
+        every {
+            mockChatClientOps.doTransformWithThinkingIfPossible<SimpleTestData>(
+                any(), any(), any(), any()
+            )
+        } returns Result.success(
+            ThinkingResponse(
+                result = testResult,
+                thinkingBlocks = thinkingBlocks
+            )
+        )
+
+        val runner = createTestRunner(mockContext)
+
+        // When: Use createObjectIfPossible through ThinkingPromptRunnerOperationsImpl
+        val thinkingOps = runner.withThinking()
+        val result = thinkingOps.createObjectIfPossible(
+            prompt = "Test createObjectIfPossible",
+            outputClass = SimpleTestData::class.java
+        )
+
+        // Then: Should get wrapped result with thinking blocks
+        assertEquals(testResult, result.result)
+        assertNotNull(result.thinkingBlocks)
+    }
+
+    @Test
+    fun `ThinkingPromptRunnerOperationsImpl should handle failure paths in createObjectIfPossible`() {
+        // Given: Mock setup for failure scenarios
+        val mockContext = mockk<com.embabel.agent.api.common.OperationContext>()
+        val mockPlatform = mockk<com.embabel.agent.core.AgentPlatform>()
+        val mockServices = mockk<PlatformServices>()
+        val mockChatClientOps = mockk<ChatClientLlmOperations>()
+
+        setupMockContext(mockContext, mockPlatform, mockServices, mockChatClientOps)
+
+        // Mock failure with preserved thinking blocks
+        val thinkingBlocks = listOf(
+            ThinkingBlock(content = "Failed processing", tagType = ThinkingTagType.TAG, tagValue = "think")
+        )
+        val exception = ThinkingException(
+            "Processing failed", thinkingBlocks
+        )
+
+        every {
+            mockChatClientOps.doTransformWithThinkingIfPossible<SimpleTestData>(
+                any(), any(), any(), any()
+            )
+        } returns Result.failure(exception)
+
+        val runner = createTestRunner(mockContext)
+
+        // When: Use createObjectIfPossible that fails
+        val thinkingOps = runner.withThinking()
+        val result = thinkingOps.createObjectIfPossible(
+            prompt = "Test failure scenario",
+            outputClass = SimpleTestData::class.java
+        )
+
+        // Then: Should handle failure gracefully with preserved thinking
+        assertNull(result.result)
+        assertEquals(1, result.thinkingBlocks.size)
+        assertEquals("Failed processing", result.thinkingBlocks[0].content)
+    }
+
+
+    @Test
+    fun `ThinkingPromptRunnerOperations default implementations should work correctly`() {
+        // Given: Real thinking operations through OperationContextPromptRunner
+        val mockContext = mockk<com.embabel.agent.api.common.OperationContext>()
+        val mockPlatform = mockk<com.embabel.agent.core.AgentPlatform>()
+        val mockServices = mockk<PlatformServices>()
+        val mockChatClientOps = mockk<ChatClientLlmOperations>()
+
+        setupMockContext(mockContext, mockPlatform, mockServices, mockChatClientOps)
+
+        // Mock responses for different method calls
+        every {
+            mockChatClientOps.doTransformWithThinking<String>(
+                any(), any(), eq(String::class.java), any()
+            )
+        } returns ThinkingResponse(result = "generated text", thinkingBlocks = emptyList())
+
+        every {
+            mockChatClientOps.doTransformWithThinking<SimpleTestData>(
+                any(), any(), eq(SimpleTestData::class.java), any()
+            )
+        } returns ThinkingResponse(result = SimpleTestData("created", 123), thinkingBlocks = emptyList())
+
+        every {
+            mockChatClientOps.doTransformWithThinkingIfPossible<SimpleTestData>(
+                any(), any(), eq(SimpleTestData::class.java), any()
+            )
+        } returns Result.success(
+            ThinkingResponse(
+                result = SimpleTestData("maybe", 456),
+                thinkingBlocks = emptyList()
+            )
+        )
+
+        val runner = createTestRunner(mockContext)
+        val thinkingOps = runner.withThinking()
+
+        // When: Use default implementations
+        val textResult = thinkingOps generateText "generate text test"
+        val objectResult = thinkingOps.createObject("create object test", SimpleTestData::class.java)
+        val ifPossibleResult = thinkingOps.createObjectIfPossible("create if possible test", SimpleTestData::class.java)
+
+        // Then: All should work and delegate properly
+        assertEquals("generated text", textResult.result)
+        assertEquals("created", objectResult.result!!.message)
+        assertEquals(123, objectResult.result.value)
+        assertEquals("maybe", ifPossibleResult.result!!.message)
+        assertEquals(456, ifPossibleResult.result.value)
+    }
+
+    @Test
+    fun `ThinkingPromptRunnerOperations multimodal content methods should work correctly`() {
+        // Given: Mock setup for multimodal content testing
+        val mockContext = mockk<com.embabel.agent.api.common.OperationContext>()
+        val mockPlatform = mockk<com.embabel.agent.core.AgentPlatform>()
+        val mockServices = mockk<PlatformServices>()
+        val mockChatClientOps = mockk<ChatClientLlmOperations>()
+
+        setupMockContext(mockContext, mockPlatform, mockServices, mockChatClientOps)
+
+        // Create multimodal content
+        val multimodalContent = com.embabel.agent.api.common.MultimodalContent("test multimodal content")
+
+        // Mock responses for multimodal methods
+        every {
+            mockChatClientOps.doTransformWithThinking<String>(
+                any(), any(), eq(String::class.java), any()
+            )
+        } returns ThinkingResponse(result = "multimodal text response", thinkingBlocks = emptyList())
+
+        every {
+            mockChatClientOps.doTransformWithThinking<SimpleTestData>(
+                any(), any(), eq(SimpleTestData::class.java), any()
+            )
+        } returns ThinkingResponse(
+            result = SimpleTestData("multimodal object", 789),
+            thinkingBlocks = emptyList()
+        )
+
+        every {
+            mockChatClientOps.doTransformWithThinkingIfPossible<SimpleTestData>(
+                any(), any(), eq(SimpleTestData::class.java), any()
+            )
+        } returns Result.success(
+            ThinkingResponse(
+                result = SimpleTestData("multimodal maybe", 101),
+                thinkingBlocks = emptyList()
+            )
+        )
+
+        every {
+            mockChatClientOps.doTransformWithThinking<com.embabel.chat.AssistantMessage>(
+                any(), any(), eq(com.embabel.chat.AssistantMessage::class.java), any()
+            )
+        } returns ThinkingResponse(
+            result = AssistantMessage("multimodal response"),
+            thinkingBlocks = emptyList()
+        )
+
+        val runner = createTestRunner(mockContext)
+        val thinkingOps = runner.withThinking()
+
+        // When: Use multimodal content methods
+        val textResult = thinkingOps.generateText(multimodalContent)
+        val objectResult = thinkingOps.createObject(multimodalContent, SimpleTestData::class.java)
+        val ifPossibleResult = thinkingOps.createObjectIfPossible(multimodalContent, SimpleTestData::class.java)
+        val respondResult = thinkingOps.respond(multimodalContent)
+
+        // Then: All multimodal methods should work
+        assertEquals("multimodal text response", textResult.result)
+        assertEquals("multimodal object", objectResult.result!!.message)
+        assertEquals(789, objectResult.result.value)
+        assertEquals("multimodal maybe", ifPossibleResult.result!!.message)
+        assertEquals(101, ifPossibleResult.result.value)
+        assertEquals("multimodal response", respondResult.result!!.content)
+    }
+
+    @Test
+    fun `ThinkingPromptRunnerOperationsImpl evaluateCondition should cover confidence threshold logic`() {
+        // Given: Mock setup for evaluateCondition method
+        val mockContext = mockk<com.embabel.agent.api.common.OperationContext>()
+        val mockPlatform = mockk<com.embabel.agent.core.AgentPlatform>()
+        val mockServices = mockk<PlatformServices>()
+        val mockChatClientOps = mockk<ChatClientLlmOperations>()
+
+        setupMockContext(mockContext, mockPlatform, mockServices, mockChatClientOps)
+
+        // Mock determination response with high confidence
+        val determination = com.embabel.agent.experimental.primitive.Determination(
+            result = true,
+            confidence = 0.9,
+            explanation = "High confidence"
+        )
+
+        every {
+            mockChatClientOps.doTransformWithThinking<com.embabel.agent.experimental.primitive.Determination>(
+                any(), any(), any(), any()
+            )
+        } returns ThinkingResponse(
+            result = determination,
+            thinkingBlocks = emptyList()
+        )
+
+        val runner = createTestRunner(mockContext)
+
+        // When: Use evaluateCondition with threshold below confidence
+        val thinkingOps = runner.withThinking()
+        val result = thinkingOps.evaluateCondition(
+            condition = "Test condition",
+            context = "Test context",
+            confidenceThreshold = 0.8
+        )
+
+        // Then: Should return true when confidence exceeds threshold
+        assertTrue(result.result!!)
+    }
+
+
+    private fun setupMockContext(
+        mockContext: com.embabel.agent.api.common.OperationContext,
+        mockPlatform: com.embabel.agent.core.AgentPlatform,
+        mockServices: PlatformServices,
+        mockChatClientOps: ChatClientLlmOperations,
+    ) {
+        every { mockContext.agentPlatform() } returns mockPlatform
+        every { mockContext.operation } returns mockk<com.embabel.agent.core.Operation> {
+            every { name } returns "test-operation"
+        }
+        every { mockContext.processContext } returns mockk<com.embabel.agent.core.ProcessContext> {
+            every { agentProcess } returns mockk()
+        }
+        every { mockPlatform.platformServices } returns mockServices
+        every { mockServices.llmOperations } returns mockChatClientOps
+    }
+
+    private fun createTestRunner(mockContext: com.embabel.agent.api.common.OperationContext): OperationContextPromptRunner {
+        val mockLlmOptions = mockk<com.embabel.common.ai.model.LlmOptions>()
+        every { mockLlmOptions.withThinking(any()) } returns mockLlmOptions
+
+        return OperationContextPromptRunner(
+            context = mockContext,
+            llm = mockLlmOptions,
+            toolGroups = setOf(),
+            toolObjects = emptyList(),
+            promptContributors = emptyList(),
+            contextualPromptContributors = emptyList(),
+            generateExamples = null,
+        )
+    }
+}
diff --git a/embabel-agent-api/src/test/kotlin/com/embabel/agent/spi/support/ChatClientLlmOperationsTest.kt b/embabel-agent-api/src/test/kotlin/com/embabel/agent/spi/support/ChatClientLlmOperationsTest.kt
index 57fe6cd21..13bbd4160 100644
--- a/embabel-agent-api/src/test/kotlin/com/embabel/agent/spi/support/ChatClientLlmOperationsTest.kt
+++ b/embabel-agent-api/src/test/kotlin/com/embabel/agent/spi/support/ChatClientLlmOperationsTest.kt
@@ -735,7 +735,7 @@ class ChatClientLlmOperationsTest {
         }
 
         @Test
-        fun `doesnt pass description of validation rules to LLM if so configured`() {
+        fun `does not pass description of validation rules to LLM if so configured`() {
             // Picky eater
             data class BorderCollie(
                 val name: String,
diff --git a/embabel-agent-api/src/test/kotlin/com/embabel/agent/spi/support/ChatClientLlmOperationsThinkingTest.kt b/embabel-agent-api/src/test/kotlin/com/embabel/agent/spi/support/ChatClientLlmOperationsThinkingTest.kt
new file mode 100644
index 000000000..f2e753c95
--- /dev/null
+++ b/embabel-agent-api/src/test/kotlin/com/embabel/agent/spi/support/ChatClientLlmOperationsThinkingTest.kt
@@ -0,0 +1,895 @@
+/*
+ * Copyright 2024-2025 Embabel Software, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.embabel.agent.spi.support
+
+import com.embabel.agent.api.common.InteractionId
+import com.embabel.agent.core.AgentProcess
+import com.embabel.agent.core.ProcessContext
+import com.embabel.agent.spi.LlmInteraction
+import com.embabel.agent.spi.support.springai.ChatClientLlmOperations
+import com.embabel.agent.spi.support.springai.DefaultToolDecorator
+import com.embabel.agent.spi.validation.DefaultValidationPromptGenerator
+import com.embabel.agent.support.SimpleTestAgent
+import com.embabel.agent.test.common.EventSavingAgenticEventListener
+import com.embabel.chat.UserMessage
+import com.embabel.common.ai.model.*
+import com.embabel.common.core.thinking.ThinkingException
+import com.embabel.common.textio.template.JinjavaTemplateRenderer
+import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule
+import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper
+import io.mockk.every
+import io.mockk.mockk
+import io.mockk.slot
+import jakarta.validation.Validation
+import org.junit.jupiter.api.Test
+import java.time.Duration
+import java.util.concurrent.CompletableFuture
+import java.util.concurrent.TimeoutException
+import kotlin.test.assertEquals
+import kotlin.test.assertNotNull
+import kotlin.test.assertTrue
+
+/**
+ * Tests for thinking functionality in ChatClientLlmOperations.
+ *
+ * Focuses on the new thinking-aware methods:
+ * - doTransformWithThinking() for comprehensive thinking extraction
+ * - doTransformWithThinkingIfPossible() for safe thinking extraction with MaybeReturn
+ * - Integration with SuppressThinkingConverter and existing LlmOperations infrastructure
+ *
+ * NOTE: For comprehensive business scenario testing,
+ * see [[com.embabel.agent.api.common.thinking.ThinkingPromptRunnerOperationsExtractionTest]].
+ */
+class ChatClientLlmOperationsThinkingTest {
+
+    private data class Setup(
+        val llmOperations: ChatClientLlmOperations,
+        val mockAgentProcess: AgentProcess,
+        val mutableLlmInvocationHistory: MutableLlmInvocationHistory,
+    )
+
+    private fun createChatClientLlmOperations(
+        fakeChatModel: FakeChatModel,
+        dataBindingProperties: LlmDataBindingProperties = LlmDataBindingProperties(),
+    ): Setup {
+        val ese = EventSavingAgenticEventListener()
+        val mutableLlmInvocationHistory = MutableLlmInvocationHistory()
+        val mockProcessContext = mockk<ProcessContext>()
+        every { mockProcessContext.platformServices } returns mockk()
+        every { mockProcessContext.platformServices.agentPlatform } returns mockk()
+        every { mockProcessContext.platformServices.agentPlatform.toolGroupResolver } returns RegistryToolGroupResolver(
+            "mt",
+            emptyList()
+        )
+        every { mockProcessContext.platformServices.eventListener } returns ese
+        val mockAgentProcess = mockk<AgentProcess>()
+        every { mockAgentProcess.recordLlmInvocation(any()) } answers {
+            mutableLlmInvocationHistory.invocations.add(firstArg())
+        }
+        every { mockProcessContext.onProcessEvent(any()) } answers { ese.onProcessEvent(firstArg()) }
+        every { mockProcessContext.agentProcess } returns mockAgentProcess
+
+        every { mockAgentProcess.agent } returns SimpleTestAgent
+        every { mockAgentProcess.processContext } returns mockProcessContext
+
+        val mockModelProvider = mockk<ModelProvider>()
+        val crit = slot<ModelSelectionCriteria>()
+        val fakeLlm = Llm("fake", "provider", fakeChatModel, DefaultOptionsConverter)
+        every { mockModelProvider.getLlm(capture(crit)) } returns fakeLlm
+        val cco = ChatClientLlmOperations(
+            modelProvider = mockModelProvider,
+            toolDecorator = DefaultToolDecorator(),
+            validator = Validation.buildDefaultValidatorFactory().validator,
+            validationPromptGenerator = DefaultValidationPromptGenerator(),
+            templateRenderer = JinjavaTemplateRenderer(),
+            objectMapper = jacksonObjectMapper().registerModule(JavaTimeModule()),
+            dataBindingProperties = dataBindingProperties,
+        )
+        return Setup(cco, mockAgentProcess, mutableLlmInvocationHistory)
+    }
+
+    // Test data class
+    data class SimpleResult(
+        val status: String,
+        val value: Int,
+    )
+
+    @Test
+    fun `doTransform should strip thinking blocks and convert object`() {
+        // Given: LlmOperations with response containing thinking blocks
+        val rawLlmResponse = """
+            <think>
+            This is a test thinking block.
+            </think>
+
+            {
+                "status": "success",
+                "value": 42
+            }
+        """.trimIndent()
+
+        val fakeChatModel = FakeChatModel(rawLlmResponse)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        // When: Call doTransform (public API)
+        val result = setup.llmOperations.doTransform(
+            messages = listOf(UserMessage("Test request")),
+            interaction = LlmInteraction(InteractionId("test-id")),
+            outputClass = SimpleResult::class.java,
+            llmRequestEvent = null
+        )
+
+        // Then: Should return converted object (thinking blocks are stripped)
+        assertNotNull(result)
+
+        // Verify object conversion - thinking blocks are cleaned out
+        assertEquals("success", result.status)
+        assertEquals(42, result.value)
+    }
+
+    @Test
+    fun `createObjectIfPossible should handle JSON with thinking blocks`() {
+        // Given: LlmOperations with response containing thinking blocks and MaybeReturn success
+        val result = SimpleResult("completed", 123)
+        val rawLlmResponse = """
+            <think>
+            Let me analyze this request carefully.
+            The user wants a successful result.
+            </think>
+
+            {
+                "success": {
+                    "status": "completed",
+                    "value": 123
+                }
+            }
+        """.trimIndent()
+
+        val fakeChatModel = FakeChatModel(rawLlmResponse)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        // When: Call createObjectIfPossible (public API)
+        val resultWrapper = setup.llmOperations.createObjectIfPossible(
+            messages = listOf(UserMessage("Test request")),
+            interaction = LlmInteraction(InteractionId("test-id")),
+            outputClass = SimpleResult::class.java,
+            agentProcess = setup.mockAgentProcess,
+            action = SimpleTestAgent.actions.first()
+        )
+
+        // Then: Should return successful Result with object (thinking blocks cleaned)
+        assertTrue(resultWrapper.isSuccess)
+        val actualResult = resultWrapper.getOrThrow()
+
+        assertEquals("completed", actualResult.status)
+        assertEquals(123, actualResult.value)
+    }
+
+    @Test
+    fun `createObjectIfPossible should return failure when LLM cannot create object but has thinking blocks`() {
+        // Given: LLM response with thinking blocks but explicit failure in MaybeReturn
+        val rawLlmResponse = """
+            <think>
+            I need to analyze this request carefully.
+            The user wants pricing information but the text doesn't contain any prices.
+            I cannot extract pricing data from this content.
+            </think>
+
+            {
+                "success": null,
+                "failure": "No pricing information found in the provided text"
+            }
+        """.trimIndent()
+
+        val fakeChatModel = FakeChatModel(rawLlmResponse)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        // When: Call createObjectIfPossible
+        val resultWrapper = setup.llmOperations.createObjectIfPossible(
+            messages = listOf(UserMessage("Extract pricing from: 'The weather is nice today.'")),
+            interaction = LlmInteraction(InteractionId("test-id")),
+            outputClass = SimpleResult::class.java,
+            agentProcess = setup.mockAgentProcess,
+            action = SimpleTestAgent.actions.first()
+        )
+
+        // Then: Should return failure Result<SimpleResult> (LLM correctly determined task is not possible)
+        assertTrue("Method should return Result<> type") { true }
+        assertTrue("Result should be failure when LLM cannot create object") { resultWrapper.isFailure }
+
+        // Verify the failure message contains the LLM's reasoning
+        val exception = resultWrapper.exceptionOrNull()
+        assertNotNull(exception, "Failure Result should contain exception")
+        assertTrue("Should contain LLM's failure reason: ${exception.message}") {
+            exception.message?.contains("No pricing information found") == true
+        }
+    }
+
+    @Test
+    fun `should throw exception for malformed JSON with thinking blocks`() {
+        // Given: LlmOperations with malformed JSON after thinking blocks
+        val rawLlmResponse = """
+            <think>
+            This will cause parsing issues.
+            </think>
+
+            { this is completely malformed JSON
+        """.trimIndent()
+
+        val fakeChatModel = FakeChatModel(rawLlmResponse)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        // When/Then: Should throw exception for malformed JSON
+        try {
+            setup.llmOperations.doTransform(
+                messages = listOf(UserMessage("Test request")),
+                interaction = LlmInteraction(InteractionId("test-id")),
+                outputClass = SimpleResult::class.java,
+                llmRequestEvent = null
+            )
+        } catch (e: Exception) {
+            // Expected - malformed JSON should cause parsing exception
+            assertTrue("Exception should be related to parsing: ${e.message}") {
+                val message = e.message ?: ""
+                message.contains("parsing", ignoreCase = true) ||
+                        message.contains("format", ignoreCase = true) ||
+                        message.contains("JsonParseException", ignoreCase = true)
+            }
+        }
+    }
+
+    @Test
+    fun `doTransformWithThinking should extract thinking blocks from valid LLM response`() {
+        // Given: LLM response with thinking blocks and valid JSON (following existing test patterns)
+        val rawLlmResponse = """
+            <think>
+            I need to process this request carefully.
+            The user wants a successful result.
+            </think>
+
+            {
+                "status": "success",
+                "value": 100
+            }
+        """.trimIndent()
+
+        val fakeChatModel = FakeChatModel(rawLlmResponse)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        // When: Use doTransformWithThinking (new business logic)
+        val result = setup.llmOperations.doTransformWithThinking<SimpleResult>(
+            messages = listOf(UserMessage("Process request")),
+            interaction = LlmInteraction(InteractionId("test-thinking")),
+            outputClass = SimpleResult::class.java,
+            llmRequestEvent = null
+        )
+
+        // Then: Should extract both object and thinking blocks
+        assertNotNull(result)
+        assertEquals("success", result.result!!.status)
+        assertEquals(100, result.result.value)
+        assertEquals(1, result.thinkingBlocks.size)
+        assertTrue(result.thinkingBlocks[0].content.contains("process this request carefully"))
+    }
+
+    @Test
+    fun `ChatResponseWithThinkingException should preserve message and thinking blocks`() {
+        // Test the actual constructor and properties (new code)
+        val thinkingBlocks = listOf(
+            com.embabel.common.core.thinking.ThinkingBlock(
+                content = "LLM was reasoning about the error",
+                tagType = com.embabel.common.core.thinking.ThinkingTagType.TAG,
+                tagValue = "think"
+            )
+        )
+
+        val exception = ThinkingException(
+            message = "JSON parsing failed",
+            thinkingBlocks = thinkingBlocks
+        )
+
+        // Test all properties are preserved
+        assertEquals("JSON parsing failed", exception.message)
+        assertEquals(1, exception.thinkingBlocks.size)
+        assertEquals("LLM was reasoning about the error", exception.thinkingBlocks[0].content)
+        assertEquals("think", exception.thinkingBlocks[0].tagValue)
+    }
+
+    @Test
+    fun `LlmOptions withThinking should create new instance with thinking configured`() {
+        // Test the new withThinking method (new code in LlmOptions)
+        val originalOptions = com.embabel.common.ai.model.LlmOptions()
+
+        // Test with thinking extraction
+        val withThinking = originalOptions.withThinking(com.embabel.common.ai.model.Thinking.withExtraction())
+
+        // Verify new instance created
+        assertTrue(originalOptions !== withThinking)
+        assertNotNull(withThinking.thinking)
+
+        // Original should be unchanged
+        assertEquals(null, originalOptions.thinking)
+    }
+
+    @Test
+    fun `Thinking class methods should cover all factory and instance methods`() {
+        // Test all Thinking constructors and methods to cover the 14 uncovered lines
+
+        // Test NONE constant
+        val noneThinking = com.embabel.common.ai.model.Thinking.NONE
+        assertEquals(false, noneThinking.extractThinking)
+
+        // Test withExtraction factory method
+        val extractionThinking = com.embabel.common.ai.model.Thinking.withExtraction()
+        assertEquals(true, extractionThinking.extractThinking)
+
+        // Test withTokenBudget factory method
+        val budgetThinking = com.embabel.common.ai.model.Thinking.withTokenBudget(150)
+        assertNotNull(budgetThinking)
+
+        // Test applyExtraction on existing instance
+        val applied = noneThinking.applyExtraction()
+        assertEquals(true, applied.extractThinking)
+
+        // Test applyTokenBudget on existing instance
+        val appliedBudget = extractionThinking.applyTokenBudget(300)
+        assertEquals(true, appliedBudget.extractThinking)
+
+        // Test withoutThinking method
+        val originalOptions = com.embabel.common.ai.model.LlmOptions()
+        val withoutThinking = originalOptions.withoutThinking()
+        assertEquals(com.embabel.common.ai.model.Thinking.NONE, withoutThinking.thinking)
+    }
+
+    @Test
+    fun `doTransform should handle malformed JSON response gracefully`() {
+        // Given: LlmOperations with malformed JSON response
+        val malformedJson = "{ this is not valid json at all }"
+        val fakeChatModel = FakeChatModel(malformedJson)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        // When/Then: Should handle JSON parsing errors
+        try {
+            setup.llmOperations.doTransform(
+                messages = listOf(UserMessage("Test malformed JSON")),
+                interaction = LlmInteraction(InteractionId("test-malformed")),
+                outputClass = SimpleResult::class.java,
+                llmRequestEvent = null
+            )
+            // If no exception, that's also fine - different error handling strategies
+        } catch (e: Exception) {
+            // Expected - malformed JSON should cause parsing issues
+            assertNotNull(e.message)
+            assertTrue(e.message!!.isNotEmpty())
+        }
+    }
+
+    @Test
+    fun `createObjectIfPossible should handle empty LLM response with exception`() {
+        // Given: LlmOperations with empty response
+        val emptyResponse = ""
+        val fakeChatModel = FakeChatModel(emptyResponse)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        // When/Then: Should throw InvalidLlmReturnFormatException for empty response
+        try {
+            setup.llmOperations.createObjectIfPossible(
+                messages = listOf(UserMessage("Test empty response")),
+                interaction = LlmInteraction(InteractionId("test-empty")),
+                outputClass = SimpleResult::class.java,
+                agentProcess = setup.mockAgentProcess,
+                action = SimpleTestAgent.actions.first()
+            )
+            // If we get here without exception, that's unexpected for empty response
+            assertTrue(false, "Expected exception for empty response")
+        } catch (e: com.embabel.agent.spi.InvalidLlmReturnFormatException) {
+            // Expected exception - validates proper error handling
+            assertTrue(e.message!!.contains("Invalid LLM return"))
+            assertTrue(e.message!!.contains("No content to map"))
+        }
+    }
+
+    @Test
+    fun `doTransform should handle multiple message conversation context`() {
+        // Given: LlmOperations with conversation history
+        val conversationResponse = """{"status": "conversation_handled", "value": 123}"""
+        val fakeChatModel = FakeChatModel(conversationResponse)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        val conversationMessages = listOf(
+            UserMessage("What is the weather today?"),
+            com.embabel.chat.AssistantMessage("It's sunny and 75 degrees."),
+            UserMessage("What should I wear?")
+        )
+
+        // When: Call doTransform with conversation context
+        val result = setup.llmOperations.doTransform(
+            messages = conversationMessages,
+            interaction = LlmInteraction(InteractionId("conversation-test")),
+            outputClass = SimpleResult::class.java,
+            llmRequestEvent = null
+        )
+
+        // Then: Should handle multiple messages and return result
+        assertEquals("conversation_handled", result.status)
+        assertEquals(123, result.value)
+    }
+
+    @Test
+    fun `doTransform should handle validation errors in response`() {
+        // Given: LlmOperations with response that might fail validation
+        val responseWithMissingField = """
+            {
+                "status": "incomplete"
+            }
+        """.trimIndent()
+
+        val fakeChatModel = FakeChatModel(responseWithMissingField)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        // When/Then: Should handle validation issues gracefully
+        try {
+            val result = setup.llmOperations.doTransform(
+                messages = listOf(UserMessage("Test validation")),
+                interaction = LlmInteraction(InteractionId("test-validation")),
+                outputClass = SimpleResult::class.java,
+                llmRequestEvent = null
+            )
+            // If no exception thrown, validate the result
+            assertNotNull(result)
+            assertEquals("incomplete", result.status)
+        } catch (e: Exception) {
+            // Exception is also acceptable for validation failures
+            assertNotNull(e.message)
+        }
+    }
+
+    @Test
+    fun `doTransform should handle LlmInteraction with tools`() {
+        // Given: LlmOperations with tool-enabled interaction
+        val toolResponse = """{"status": "tool_used", "value": 789}"""
+        val fakeChatModel = FakeChatModel(toolResponse)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        val toolInteraction = LlmInteraction(
+            InteractionId("tool-test"),
+            llm = com.embabel.common.ai.model.LlmOptions.withDefaults()
+        )
+
+        // When: Call doTransform with tool interaction
+        val result = setup.llmOperations.doTransform(
+            messages = listOf(UserMessage("Use tool to process")),
+            interaction = toolInteraction,
+            outputClass = SimpleResult::class.java,
+            llmRequestEvent = null
+        )
+
+        // Then: Should handle tool interaction
+        assertEquals("tool_used", result.status)
+        assertEquals(789, result.value)
+    }
+
+    @Test
+    fun `doTransformWithThinkingIfPossible should handle success path`() {
+        // Given: LlmOperations with valid MaybeReturn success response
+        val successResponse = """
+            {
+                "success": {
+                    "status": "thinking_success",
+                    "value": 111
+                }
+            }
+        """.trimIndent()
+        val fakeChatModel = FakeChatModel(successResponse)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        // When: Call doTransformWithThinkingIfPossible
+        val result = setup.llmOperations.doTransformWithThinkingIfPossible<SimpleResult>(
+            messages = listOf(UserMessage("Test thinking success")),
+            interaction = LlmInteraction(InteractionId("thinking-success")),
+            outputClass = SimpleResult::class.java,
+            llmRequestEvent = null
+        )
+
+        // Then: Should return successful Result with thinking response
+        assertTrue(result.isSuccess)
+        val response = result.getOrThrow()
+        assertEquals("thinking_success", response.result!!.status)
+        assertEquals(111, response.result.value)
+    }
+
+    @Test
+    fun `doTransform should handle different output classes`() {
+        // Given: LlmOperations with string response
+        val stringResponse = "Just a simple string response"
+        val fakeChatModel = FakeChatModel(stringResponse)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        // When: Call doTransform with String output class
+        val result = setup.llmOperations.doTransform(
+            messages = listOf(UserMessage("Return a string")),
+            interaction = LlmInteraction(InteractionId("string-test")),
+            outputClass = String::class.java,
+            llmRequestEvent = null
+        )
+
+        // Then: Should handle string conversion
+        assertEquals("Just a simple string response", result)
+    }
+
+    @Test
+    fun `doTransformWithThinking should handle thinking extraction failure`() {
+        // Given: LlmOperations with response that has malformed thinking blocks
+        val malformedThinkingResponse = """
+            <think>
+            This thinking block is not properly closed
+
+            {"status": "malformed_thinking", "value": 999}
+        """.trimIndent()
+        val fakeChatModel = FakeChatModel(malformedThinkingResponse)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        // When: Call doTransformWithThinking with malformed thinking
+        val result = setup.llmOperations.doTransformWithThinking<SimpleResult>(
+            messages = listOf(UserMessage("Test malformed thinking")),
+            interaction = LlmInteraction(InteractionId("malformed-thinking")),
+            outputClass = SimpleResult::class.java,
+            llmRequestEvent = null
+        )
+
+        // Then: Should handle malformed thinking blocks gracefully
+        assertNotNull(result)
+        assertEquals("malformed_thinking", result.result!!.status)
+        assertEquals(999, result.result.value)
+        // Thinking blocks extraction might fail but object conversion should work
+    }
+
+    @Test
+    fun `createObjectIfPossible should handle MaybeReturn failure response`() {
+        // Given: LlmOperations with explicit failure response
+        val failureResponse = """
+            {
+                "success": null,
+                "failure": "Could not process the request due to missing data"
+            }
+        """.trimIndent()
+        val fakeChatModel = FakeChatModel(failureResponse)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        // When: Call createObjectIfPossible with failure response
+        val result = setup.llmOperations.createObjectIfPossible(
+            messages = listOf(UserMessage("Process incomplete data")),
+            interaction = LlmInteraction(InteractionId("test-failure")),
+            outputClass = SimpleResult::class.java,
+            agentProcess = setup.mockAgentProcess,
+            action = SimpleTestAgent.actions.first()
+        )
+
+        // Then: Should return failure Result with error message
+        assertTrue(result.isFailure, "Should be failure")
+        val exception = result.exceptionOrNull()
+        assertNotNull(exception, "Should have exception")
+        assertTrue(exception.message!!.contains("missing data"), "Should contain failure reason")
+    }
+
+    @Test
+    fun `doTransform should handle validation failures with retry`() {
+        // Given: LlmOperations that will return invalid data that fails validation
+        val invalidResponse = """{"status": "", "value": -999}"""
+        val fakeChatModel = FakeChatModel(invalidResponse)
+
+        // Create setup with validation enabled
+        val dataBindingProps = LlmDataBindingProperties()
+        val setup = createChatClientLlmOperations(fakeChatModel, dataBindingProps)
+
+        // When/Then: Should either succeed with lenient validation or fail with validation error
+        try {
+            val result = setup.llmOperations.doTransform(
+                messages = listOf(UserMessage("Generate invalid data")),
+                interaction = LlmInteraction(InteractionId("validation-test")),
+                outputClass = SimpleResult::class.java,
+                llmRequestEvent = null
+            )
+            // If validation passes, check the result
+            assertNotNull(result)
+            assertEquals("", result.status)  // Empty string from invalid data
+        } catch (e: Exception) {
+            // Validation failure is also acceptable
+            assertNotNull(e.message)
+            assertTrue(e.message!!.isNotEmpty())
+        }
+    }
+
+    @Test
+    fun `doTransformWithThinking should handle complex thinking with JSON mixed content`() {
+        // Given: Response with thinking blocks mixed with JSON in complex format
+        val complexResponse = """
+            <reasoning>
+            The user wants a complex analysis. Let me think through this step by step.
+            First, I need to understand the requirements.
+            Second, I should analyze the data structure.
+            </reasoning>
+
+            Some additional text here that might confuse parsing.
+
+            <analysis>
+            Based on my reasoning, the optimal solution is:
+            - Use structured approach
+            - Validate all inputs
+            - Return comprehensive results
+            </analysis>
+
+            {
+                "status": "complex_analysis_complete",
+                "value": 777
+            }
+        """.trimIndent()
+
+        val fakeChatModel = FakeChatModel(complexResponse)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        // When: Call doTransformWithThinking with complex mixed content
+        val result = setup.llmOperations.doTransformWithThinking<SimpleResult>(
+            messages = listOf(UserMessage("Perform complex analysis")),
+            interaction = LlmInteraction(InteractionId("complex-thinking")),
+            outputClass = SimpleResult::class.java,
+            llmRequestEvent = null
+        )
+
+        // Then: Should extract thinking blocks and parse JSON correctly
+        assertNotNull(result)
+        assertEquals("complex_analysis_complete", result.result!!.status)
+        assertEquals(777, result.result.value)
+
+        // Should have extracted multiple thinking blocks
+        assertTrue(result.thinkingBlocks.isNotEmpty(), "Should have thinking blocks")
+        val hasReasoningBlock = result.thinkingBlocks.any { it.tagValue == "reasoning" }
+        val hasAnalysisBlock = result.thinkingBlocks.any { it.tagValue == "analysis" }
+        assertTrue(hasReasoningBlock || hasAnalysisBlock, "Should have reasoning or analysis blocks")
+    }
+
+    @Test
+    fun `getTimeoutMillis should return configured timeout`() {
+        // Given: LlmOperations with access to private method
+        val setup = createChatClientLlmOperations(FakeChatModel("test"))
+        val getTimeoutMillisMethod = setup.llmOperations::class.java.getDeclaredMethod(
+            "getTimeoutMillis",
+            LlmOptions::class.java
+        )
+        getTimeoutMillisMethod.isAccessible = true
+
+        // When: Call with configured timeout
+        val customOptions = LlmOptions.withDefaults().withTimeout(Duration.ofSeconds(30))
+        val customTimeout = getTimeoutMillisMethod.invoke(setup.llmOperations, customOptions) as Long
+
+        // Then: Should return correct timeout
+        assertEquals(30000L, customTimeout)
+    }
+
+    @Test
+    fun `handleFutureException should handle TimeoutException`() {
+        testHandleFutureException(
+            exception = TimeoutException("Test timeout"),
+            interactionId = "timeout-test",
+            expectedMessageContains = "timed out after 5000ms"
+        )
+    }
+
+    @Test
+    fun `handleFutureException should handle InterruptedException`() {
+        testHandleFutureException(
+            exception = InterruptedException("Test interruption"),
+            interactionId = "interrupt-test",
+            expectedMessageContains = "was interrupted"
+        )
+    }
+
+    @Test
+    fun `handleFutureException should handle ExecutionException with RuntimeException cause`() {
+        val runtimeCause = RuntimeException("Original runtime exception")
+        val executionException = java.util.concurrent.ExecutionException("Execution failed", runtimeCause)
+
+        testHandleFutureException(
+            exception = executionException,
+            interactionId = "execution-test",
+            expectedMessageContains = "",
+            expectedMessage = "Original runtime exception"
+        )
+    }
+
+    @Test
+    fun `handleFutureExceptionAsResult should return failure for TimeoutException`() {
+        // Given: LlmOperations with access to private method
+        val setup = createChatClientLlmOperations(FakeChatModel("test"))
+
+        val handleMethod = setup.llmOperations::class.java.declaredMethods.find {
+            it.name.startsWith("handleFutureExceptionAsResult") && it.parameterCount == 5
+        }!!
+
+        handleMethod.isAccessible = true
+
+        val future = CompletableFuture<String>()
+        val interaction = LlmInteraction(InteractionId("timeout-result-test"))
+        val timeoutException = TimeoutException("Test timeout")
+
+        // When: Call handleFutureExceptionAsResult
+        val resultObj = handleMethod.invoke(setup.llmOperations, timeoutException, future, interaction, 5000L, 1)
+
+        // Then: Should return a Result object (we can't easily test Result internals via reflection)
+        // But we can verify the essential behaviors:
+        assertNotNull(resultObj) // Method returned something
+        assertTrue(future.isCancelled) // Future was properly cancelled
+
+        // Verify the class type indicates it's a Result
+        assertTrue(resultObj::class.java.name.contains("Result"))
+
+        // The method should complete without throwing (which proves it handles TimeoutException correctly)
+        // The actual Result.failure content is tested in integration tests
+    }
+
+    @Test
+    fun `PostConstruct should log property configuration correctly`() {
+        // Given: LlmOperations with access to PostConstruct method
+        val setup = createChatClientLlmOperations(FakeChatModel("test"))
+        val logConfigMethod = setup.llmOperations::class.java.getDeclaredMethod("logPropertyConfiguration")
+        logConfigMethod.isAccessible = true
+
+        // When: Call PostConstruct method
+        logConfigMethod.invoke(setup.llmOperations)
+
+        // Then: Should complete without throwing (logs are tested via integration)
+        assertTrue(true) // Method completed successfully
+    }
+
+    @Test
+    fun `doTransformWithThinking should handle String output class`() {
+        // Given: LlmOperations with String output response containing thinking blocks
+        val rawLlmResponse = """
+            <think>
+            Processing string response with thinking.
+            </think>
+
+            This is a plain string response with thinking blocks.
+        """.trimIndent()
+
+        val fakeChatModel = FakeChatModel(rawLlmResponse)
+        val setup = createChatClientLlmOperations(fakeChatModel)
+
+        // When: Call doTransformWithThinking with String output class
+        val result = setup.llmOperations.doTransformWithThinking<String>(
+            messages = listOf(UserMessage("Generate string with thinking")),
+            interaction = LlmInteraction(InteractionId("string-thinking")),
+            outputClass = String::class.java,
+            llmRequestEvent = null
+        )
+
+        // Then: Should extract thinking blocks and return string
+        assertNotNull(result)
+        assertEquals(rawLlmResponse, result.result) // Full raw response for String type
+        assertEquals(1, result.thinkingBlocks.size)
+        assertTrue(result.thinkingBlocks[0].content.contains("Processing string response"))
+    }
+
+    @Test
+    fun `buildBasicPrompt should handle empty prompt contributions`() {
+        // Given: LlmOperations with empty prompt contributions
+        val setup = createChatClientLlmOperations(FakeChatModel("test"))
+        val buildBasicPromptMethod = setup.llmOperations::class.java.getDeclaredMethod(
+            "buildBasicPrompt", String::class.java, List::class.java
+        )
+        buildBasicPromptMethod.isAccessible = true
+
+        val messages = listOf(UserMessage("Test message"))
+
+        // When: Call with empty prompt contributions
+        val result = buildBasicPromptMethod.invoke(setup.llmOperations, "", messages)
+
+        // Then: Should create prompt without system message
+        assertNotNull(result)
+        assertTrue(result is org.springframework.ai.chat.prompt.Prompt)
+        val prompt = result
+        assertEquals(1, prompt.instructions.size) // Only user message, no system message
+    }
+
+    @Test
+    fun `buildPromptWithMaybeReturn should handle empty prompt contributions`() {
+        // Given: LlmOperations with empty prompt contributions
+        val setup = createChatClientLlmOperations(FakeChatModel("test"))
+        val buildPromptMethod = setup.llmOperations::class.java.getDeclaredMethod(
+            "buildPromptWithMaybeReturn", String::class.java, List::class.java, String::class.java
+        )
+        buildPromptMethod.isAccessible = true
+
+        val messages = listOf(UserMessage("Test message"))
+        val maybeReturnPrompt = "Return success or failure"
+
+        // When: Call with empty prompt contributions
+        val result = buildPromptMethod.invoke(setup.llmOperations, "", messages, maybeReturnPrompt)
+
+        // Then: Should create prompt with maybeReturn but no system message
+        assertNotNull(result)
+        assertTrue(result is org.springframework.ai.chat.prompt.Prompt)
+        assertEquals(2, result.instructions.size) // maybeReturn + user message, no system message
+    }
+
+    @Test
+    fun `shouldGenerateExamples should cover generateExamplesByDefault false path`() {
+        // Given: LlmOperations with generateExamplesByDefault = false
+        val dataBindingProps = LlmDataBindingProperties()
+        val llmOpsPromptsProps = LlmOperationsPromptsProperties().apply {
+            generateExamplesByDefault = false
+        }
+
+        val setup = createChatClientLlmOperations(
+            FakeChatModel("test"),
+            dataBindingProps
+        )
+
+        // Access the shouldGenerateExamples method
+        val shouldGenerateMethod = setup.llmOperations::class.java.getDeclaredMethod(
+            "shouldGenerateExamples", com.embabel.agent.spi.LlmCall::class.java
+        )
+        shouldGenerateMethod.isAccessible = true
+
+        val llmCall = LlmInteraction(
+            id = InteractionId("test"),
+            generateExamples = true
+        )
+
+        // When: Call shouldGenerateExamples with generateExamplesByDefault = false
+        val result = shouldGenerateMethod.invoke(setup.llmOperations, llmCall) as Boolean
+
+        // Then: Should return true only when explicitly set
+        assertTrue(result)
+    }
+
+    private fun testHandleFutureException(
+        exception: Exception,
+        interactionId: String,
+        expectedMessageContains: String,
+        expectedMessage: String? = null,
+    ) {
+        // Given: LlmOperations with access to private method
+        val setup = createChatClientLlmOperations(FakeChatModel("test"))
+        val handleMethod = setup.llmOperations::class.java.getDeclaredMethod(
+            "handleFutureException",
+            Exception::class.java,
+            CompletableFuture::class.java,
+            LlmInteraction::class.java,
+            Long::class.javaPrimitiveType,
+            Int::class.javaPrimitiveType
+        )
+        handleMethod.isAccessible = true
+
+        val future = CompletableFuture<String>()
+        val interaction = LlmInteraction(InteractionId(interactionId))
+
+        // When/Then: Should throw RuntimeException
+        try {
+            handleMethod.invoke(setup.llmOperations, exception, future, interaction, 5000L, 1)
+            assertTrue(false, "Should have thrown RuntimeException")
+        } catch (e: java.lang.reflect.InvocationTargetException) {
+            val cause = e.targetException
+            assertTrue(cause is RuntimeException)
+            if (expectedMessage != null) {
+                assertEquals(expectedMessage, cause.message)
+            } else {
+                assertTrue(cause.message!!.contains(expectedMessageContains))
+            }
+            assertTrue(future.isCancelled)
+        }
+    }
+}
diff --git a/embabel-agent-api/src/test/kotlin/com/embabel/agent/spi/support/springai/SuppressThinkingConverterTest.kt b/embabel-agent-api/src/test/kotlin/com/embabel/agent/spi/support/springai/SuppressThinkingConverterTest.kt
index 2dc234d1d..0eb7200e7 100644
--- a/embabel-agent-api/src/test/kotlin/com/embabel/agent/spi/support/springai/SuppressThinkingConverterTest.kt
+++ b/embabel-agent-api/src/test/kotlin/com/embabel/agent/spi/support/springai/SuppressThinkingConverterTest.kt
@@ -60,6 +60,31 @@ class SuppressThinkingConverterTest {
         }
     }
 
+    @Nested
+    inner class SequentialProcessing {
+
+        @Test
+        fun `applies all finders sequentially - TAG then PREFIX`() {
+            val converter = SuppressThinkingConverter(BeanOutputConverter(Dog::class.java))
+            val input = """<think>First thinking block</think>
+                //THINKING: Second thinking block
+                {"name": "Rex"}""".trimMargin()
+            val result = converter.convert(input)
+            assertNotNull(result!!)
+            assertEquals("Rex", result.name)
+        }
+
+        @Test
+        fun `early termination when JSON is already valid`() {
+            // If the input is already valid JSON, no sanitization should occur
+            val converter = SuppressThinkingConverter(BeanOutputConverter(Dog::class.java))
+            val input = """{"name": "Rex"}"""
+            val result = converter.convert(input)
+            assertNotNull(result!!)
+            assertEquals("Rex", result.name)
+        }
+    }
+
     @Nested
     inner class StringWithoutThinkBlocks {
 
diff --git a/embabel-agent-autoconfigure/models/embabel-agent-anthropic-autoconfigure/src/test/kotlin/com/embabel/agent/config/models/anthropic/LLMAnthropicThinkingIT.java b/embabel-agent-autoconfigure/models/embabel-agent-anthropic-autoconfigure/src/test/kotlin/com/embabel/agent/config/models/anthropic/LLMAnthropicThinkingIT.java
new file mode 100644
index 000000000..3929ae472
--- /dev/null
+++ b/embabel-agent-autoconfigure/models/embabel-agent-anthropic-autoconfigure/src/test/kotlin/com/embabel/agent/config/models/anthropic/LLMAnthropicThinkingIT.java
@@ -0,0 +1,289 @@
+/*
+ * Copyright 2024-2025 Embabel Software, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.embabel.agent.config.models.anthropic;
+
+import com.embabel.agent.api.common.Ai;
+import com.embabel.agent.api.common.PromptRunner;
+import com.embabel.agent.api.common.autonomy.Autonomy;
+import com.embabel.agent.autoconfigure.models.anthropic.AgentAnthropicAutoConfiguration;
+import com.embabel.common.ai.model.Llm;
+import com.embabel.common.core.thinking.ThinkingBlock;
+import com.embabel.common.core.thinking.ThinkingResponse;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.ai.tool.annotation.Tool;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.context.properties.ConfigurationPropertiesScan;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.context.annotation.ComponentScan;
+import org.springframework.context.annotation.Import;
+import org.springframework.test.context.ActiveProfiles;
+
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+/**
+ * Java integration test for Ollama thinking functionality using builder pattern.
+ * Tests the Java equivalent of Kotlin's withThinking() extension function.
+ */
+@SpringBootTest(
+        properties = {
+                "embabel.models.cheapest=claude_-sonnet-4-5",
+                "embabel.models.best=claude_-sonnet-4-5",
+                "embabel.models.default-llm=claude-sonnet-4-5",
+                "embabel.agent.platform.llm-operations.prompts.defaultTimeout=240",
+                "embabel.agent.platform.llm-operations.data-binding.fixedBackoffMillis=6000",
+                "spring.main.allow-bean-definition-overriding=true",
+
+                // Thinking Infrastructure logging
+                "logging.level.com.embabel.agent.spi.support.springai.ChatClientLlmOperations=TRACE",
+                "logging.level.com.embabel.common.core.thinking=DEBUG",
+
+                // Spring AI Debug Logging
+                "logging.level.org.springframework.ai=DEBUG",
+                "logging.level.org.springframework.ai.openai=TRACE",
+                "logging.level.org.springframework.ai.chat=DEBUG",
+
+                // HTTP/WebClient Debug
+                "logging.level.org.springframework.web.reactive=DEBUG",
+                "logging.level.reactor.netty.http.client=TRACE",
+
+                // OpenAI API Debug
+                "logging.level.org.springframework.ai.openai.api=TRACE",
+
+                // Complete HTTP tracing
+                "logging.level.org.springframework.web.client.RestTemplate=DEBUG",
+                "logging.level.org.apache.http=DEBUG",
+                "logging.level.httpclient.wire=DEBUG"
+        }
+)
+@ActiveProfiles("thinking")
+@ConfigurationPropertiesScan(
+        basePackages = {
+                "com.embabel.agent",
+                "com.embabel.example"
+        }
+)
+@ComponentScan(
+        basePackages = {
+                "com.embabel.agent",
+                "com.embabel.example"
+        },
+        excludeFilters = {
+                @ComponentScan.Filter(
+                        type = org.springframework.context.annotation.FilterType.REGEX,
+                        pattern = ".*GlobalExceptionHandler.*"
+                )
+        }
+)
+@Import({AgentAnthropicAutoConfiguration.class})
+class LLMAnthropicThinkingIT {
+
+    private static final Logger logger = LoggerFactory.getLogger(LLMAnthropicThinkingIT.class);
+
+    @Autowired
+    private Autonomy autonomy;
+
+    @Autowired
+    private Ai ai;
+
+    @Autowired
+    private List<Llm> llms;
+
+    /**
+     * Simple data class for testing thinking object creation
+     */
+    static class MonthItem {
+        private String name;
+
+        private Integer temperature;
+
+        public MonthItem() {
+        }
+
+        public MonthItem(String name) {
+            this.name = name;
+        }
+
+        public MonthItem(String name, Integer temperature) {
+            this.name = name;
+            this.temperature = temperature;
+        }
+
+        public String getName() {
+            return name;
+        }
+
+        public void setName(String name) {
+            this.name = name;
+        }
+
+        public Integer getTemperature() {
+            return temperature;
+        }
+
+        public void setTemperature(Integer temperature) {
+            this.temperature = temperature;
+        }
+
+        @Override
+        public String toString() {
+            return "MonthItem{name='" + name + "', temperature=" + temperature + "}";
+        }
+    }
+
+    /**
+     * Tool for temperature conversion
+     */
+    static class Tooling {
+
+        @Tool
+        Integer convertFromCelsiusToFahrenheit(Integer inputTemp) {
+            return (int) ((inputTemp * 2) + 32);
+        }
+    }
+
+    @Test
+    void testThinkingCreateObject() {
+        logger.info("Starting thinking createObject integration test");
+
+        // Given: Use the LLM configured for thinking tests
+        PromptRunner runner = ai.withLlm("claude-sonnet-4-5")
+                .withToolObject(Tooling.class)
+                .withGenerateExamples(true);
+
+        String prompt = """
+                What is the hottest month in Florida and  provide its temperature.
+                Please respond with your reasoning using tags <reason>.
+                
+                The name should be the month name, temperature should be in Fahrenheit.
+                """;
+
+        // When: create object with thinking
+        ThinkingResponse<MonthItem> response = runner
+                .withThinking()
+                .createObject(prompt, MonthItem.class);
+
+        // Then: Verify both result and thinking content
+        assertNotNull(response, "Response should not be null");
+
+        MonthItem result = response.getResult();
+        assertNotNull(result, "Result object should not be null");
+        assertNotNull(result.getName(), "Month name should not be null");
+        logger.info("Created object: {}", result);
+
+        List<ThinkingBlock> thinkingBlocks = response.getThinkingBlocks();
+        assertNotNull(thinkingBlocks, "Thinking blocks should not be null");
+        assertFalse(thinkingBlocks.isEmpty(), "Should have thinking content");
+
+        logger.info("Extracted {} thinking blocks", thinkingBlocks);
+
+        logger.info("Thinking createObject test completed successfully");
+    }
+
+    @Test
+    void testThinkingCreateObjectIfPossible() {
+        logger.info("Starting thinking createObjectIfPossible integration test");
+
+        // Given: Use the LLM configured for thinking tests
+        PromptRunner runner = ai.withLlm("claude-sonnet-4-5")
+                .withToolObject(Tooling.class);
+
+
+        String prompt = "Think about the coldest month in Alaska and its temperature. Provide your analysis.";
+
+        // When: Use factory method for more natural chaining - not recommended (testing alternative syntax)
+        ThinkingResponse<MonthItem> response = runner
+                .withThinking()
+                .createObjectIfPossible(prompt, MonthItem.class);
+
+        // Then: Verify response and thinking content (result may be null if creation not possible)
+        assertNotNull(response, "Response should not be null");
+
+        MonthItem result = response.getResult();
+        // Note: result may be null if LLM determines object creation is not possible with given info
+        if (result != null) {
+            assertNotNull(result.getName(), "Month name should not be null");
+            logger.info("Created object if possible: {}", result);
+        } else {
+            logger.info("LLM correctly determined object creation not possible with given information");
+        }
+
+        List<ThinkingBlock> thinkingBlocks = response.getThinkingBlocks();
+        assertNotNull(thinkingBlocks, "Thinking blocks should not be null");
+        assertFalse(thinkingBlocks.isEmpty(), "Should have thinking content");
+
+        logger.info("Extracted {} thinking blocks", thinkingBlocks);
+
+        logger.info("Thinking createObjectIfPossible test completed successfully");
+    }
+
+    @Test
+    void testThinkingWithComplexPrompt() {
+        logger.info("Starting complex thinking integration test");
+
+        // Given: Use the LLM with a complex reasoning prompt
+        PromptRunner runner = ai.withLlm("claude-sonnet-4-5")
+                                .withToolObject(Tooling.class);
+
+        String prompt = """
+                <think>
+                I need to carefully analyze seasonal patterns and temperature data.
+                Let me think step by step about Florida's climate.
+                </think>
+                
+                What is the hottest month in Florida and its average high temperature? 
+                Please provide a detailed analysis of your reasoning.
+                
+                //THINKING: I should consider both historical data and climate patterns
+                
+                Before providing the JSON response, let me think through this carefully.
+                """;
+
+
+        ThinkingResponse<MonthItem> response = runner
+                .withThinking()
+                .createObject(prompt, MonthItem.class);
+
+        // Then: Verify extraction of multiple thinking formats
+        assertNotNull(response, "Response should not be null");
+
+        MonthItem result = response.getResult();
+        assertNotNull(result, "Result object should not be null");
+        logger.info("Created object from complex prompt: {}", result);
+
+        List<ThinkingBlock> thinkingBlocks = response.getThinkingBlocks();
+        assertNotNull(thinkingBlocks, "Thinking blocks should not be null");
+        assertFalse(thinkingBlocks.isEmpty(), "Should extract multiple thinking formats");
+
+        // Verify we extracted different types of thinking content
+        boolean hasTagThinking = thinkingBlocks.stream()
+                .anyMatch(block -> block.getTagType().name().equals("TAG"));
+        boolean hasPrefixThinking = thinkingBlocks.stream()
+                .anyMatch(block -> block.getTagType().name().equals("PREFIX"));
+        boolean hasNoPrefixThinking = thinkingBlocks.stream()
+                .anyMatch(block -> block.getTagType().name().equals("NO_PREFIX"));
+
+        logger.info("Thinking formats detected - TAG: {}, PREFIX: {}, NO_PREFIX: {}",
+                hasTagThinking, hasPrefixThinking, hasNoPrefixThinking);
+
+        logger.info("Complex thinking test completed successfully with {} thinking blocks",
+                thinkingBlocks.size());
+    }
+}
\ No newline at end of file
diff --git a/embabel-agent-autoconfigure/models/embabel-agent-ollama-autoconfigure/src/test/java/com/embabel/agent/config/models/ollama/LLMOllamaThinkingIT.java b/embabel-agent-autoconfigure/models/embabel-agent-ollama-autoconfigure/src/test/java/com/embabel/agent/config/models/ollama/LLMOllamaThinkingIT.java
new file mode 100644
index 000000000..3faf99650
--- /dev/null
+++ b/embabel-agent-autoconfigure/models/embabel-agent-ollama-autoconfigure/src/test/java/com/embabel/agent/config/models/ollama/LLMOllamaThinkingIT.java
@@ -0,0 +1,288 @@
+/*
+ * Copyright 2024-2025 Embabel Software, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.embabel.agent.config.models.ollama;
+
+import com.embabel.agent.api.common.Ai;
+import com.embabel.agent.api.common.PromptRunner;
+import com.embabel.agent.api.common.autonomy.Autonomy;
+import com.embabel.agent.autoconfigure.models.ollama.AgentOllamaAutoConfiguration;
+import com.embabel.common.ai.model.Llm;
+import com.embabel.common.core.thinking.ThinkingBlock;
+import com.embabel.common.core.thinking.ThinkingResponse;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.ai.tool.annotation.Tool;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.context.properties.ConfigurationPropertiesScan;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.context.annotation.ComponentScan;
+import org.springframework.context.annotation.Import;
+import org.springframework.test.context.ActiveProfiles;
+
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+/**
+ * Java integration test for Ollama thinking functionality using builder pattern.
+ * Tests the Java equivalent of Kotlin's withThinking() extension function.
+ */
+@SpringBootTest(
+        properties = {
+                "embabel.models.cheapest=qwen3:latest",
+                "embabel.models.best=qwen3:latest",
+                "embabel.models.default-llm=qwen3:latest",
+                "embabel.agent.platform.llm-operations.prompts.defaultTimeout=240",
+                "embabel.agent.platform.llm-operations.data-binding.fixedBackoffMillis=6000",
+                "spring.main.allow-bean-definition-overriding=true",
+
+                // Thinking Infrastructure logging
+                "logging.level.com.embabel.agent.spi.support.springai.ChatClientLlmOperations=TRACE",
+                "logging.level.com.embabel.common.core.thinking=DEBUG",
+
+                // Spring AI Debug Logging
+                "logging.level.org.springframework.ai=DEBUG",
+                "logging.level.org.springframework.ai.openai=TRACE",
+                "logging.level.org.springframework.ai.chat=DEBUG",
+
+                // HTTP/WebClient Debug
+                "logging.level.org.springframework.web.reactive=DEBUG",
+                "logging.level.reactor.netty.http.client=TRACE",
+
+                // OpenAI API Debug
+                "logging.level.org.springframework.ai.openai.api=TRACE",
+
+                // Complete HTTP tracing
+                "logging.level.org.springframework.web.client.RestTemplate=DEBUG",
+                "logging.level.org.apache.http=DEBUG",
+                "logging.level.httpclient.wire=DEBUG"
+        }
+)
+@ActiveProfiles("thinking")
+@ConfigurationPropertiesScan(
+        basePackages = {
+                "com.embabel.agent",
+                "com.embabel.example"
+        }
+)
+@ComponentScan(
+        basePackages = {
+                "com.embabel.agent",
+                "com.embabel.example"
+        },
+        excludeFilters = {
+                @ComponentScan.Filter(
+                        type = org.springframework.context.annotation.FilterType.REGEX,
+                        pattern = ".*GlobalExceptionHandler.*"
+                )
+        }
+)
+@Import({AgentOllamaAutoConfiguration.class})
+class LLMOllamaThinkingIT {
+
+    private static final Logger logger = LoggerFactory.getLogger(LLMOllamaThinkingIT.class);
+
+    @Autowired
+    private Autonomy autonomy;
+
+    @Autowired
+    private Ai ai;
+
+    @Autowired
+    private List<Llm> llms;
+
+    /**
+     * Simple data class for testing thinking object creation
+     */
+    static class MonthItem {
+        private String name;
+
+        private Short temperature;
+
+        public MonthItem() {
+        }
+
+        public MonthItem(String name) {
+            this.name = name;
+        }
+
+        public MonthItem(String name, Short temperature) {
+            this.name = name;
+            this.temperature = temperature;
+        }
+
+        public String getName() {
+            return name;
+        }
+
+        public void setName(String name) {
+            this.name = name;
+        }
+
+        public Short getTemperature() {
+            return temperature;
+        }
+
+        public void setTemperature(Short temperature) {
+            this.temperature = temperature;
+        }
+
+        @Override
+        public String toString() {
+            return "MonthItem{name='" + name + "', temperature=" + temperature + "}";
+        }
+    }
+
+    /**
+     * Tool for temperature conversion
+     */
+    static class Tooling {
+
+        @Tool
+        Short convertFromCelsiusToFahrenheit(Short inputTemp) {
+            return (short) ((inputTemp * 2) + 32);
+        }
+    }
+
+    @Test
+    void testThinkingCreateObject() {
+        logger.info("Starting thinking createObject integration test");
+
+        // Given: Use the LLM configured for thinking tests
+        PromptRunner runner = ai.withLlm("qwen3:latest")
+                .withToolObject(Tooling.class);
+
+        String prompt = """
+                What is the hottest month in Florida and provide the temperature.
+                Please provide with reasoning.
+                
+                
+                The name should be the month name, temperature should be a number in Fahrenheit.
+                """;
+
+        // create object with thinking
+        ThinkingResponse<MonthItem> response = runner
+                .withThinking()
+                .createObject(prompt, MonthItem.class);
+
+        // Then: Verify both result and thinking content
+        assertNotNull(response, "Response should not be null");
+
+        MonthItem result = response.getResult();
+        assertNotNull(result, "Result object should not be null");
+        assertNotNull(result.getName(), "Month name should not be null");
+        logger.info("Created object: {}", result);
+
+        List<ThinkingBlock> thinkingBlocks = response.getThinkingBlocks();
+        assertNotNull(thinkingBlocks, "Thinking blocks should not be null");
+        assertFalse(thinkingBlocks.isEmpty(), "Should have thinking content");
+
+        logger.info("Extracted {} thinking blocks", thinkingBlocks);
+
+        logger.info("Thinking createObject test completed successfully");
+    }
+
+    @Test
+    void testThinkingCreateObjectIfPossible() {
+        logger.info("Starting thinking createObjectIfPossible integration test");
+
+        // Given: Use the LLM configured for thinking tests
+        PromptRunner runner = ai.withLlm("qwen3:latest")
+                .withToolObject(Tooling.class);
+
+        String prompt = "Think about the coldest month in Alaska and its temperature. Provide your analysis. " + "And return Month with temperature";
+
+        // create object if possible with thinking
+        ThinkingResponse<MonthItem> response = runner
+                .withThinking()
+                .createObjectIfPossible(prompt, MonthItem.class);
+
+        // Then: Verify response and thinking content (result may be null if creation not possible)
+        assertNotNull(response, "Response should not be null");
+
+        MonthItem result = response.getResult();
+        // Note: result may be null if LLM determines object creation is not possible with given info
+        if (result != null) {
+            assertNotNull(result.getName(), "Month name should not be null");
+            logger.info("Created object if possible: {}", result);
+        } else {
+            logger.info("LLM correctly determined object creation not possible with given information");
+        }
+
+        List<ThinkingBlock> thinkingBlocks = response.getThinkingBlocks();
+        assertNotNull(thinkingBlocks, "Thinking blocks should not be null");
+        assertFalse(thinkingBlocks.isEmpty(), "Should have thinking content");
+
+        logger.info("Extracted {} thinking blocks", thinkingBlocks);
+
+        logger.info("Thinking createObjectIfPossible test completed successfully");
+    }
+
+    @Test
+    void testThinkingWithComplexPrompt() {
+        logger.info("Starting complex thinking integration test");
+
+        // Given: Use the LLM with a complex reasoning prompt
+        PromptRunner runner = ai.withLlm("qwen3:latest")
+                .withToolObject(Tooling.class);
+
+        String prompt = """
+                <think>
+                I need to carefully analyze seasonal patterns and temperature data.
+                Let me think step by step about Florida's climate.
+                </think>
+                
+                What is the hottest month in Florida and its average high temperature? 
+                Please provide a detailed analysis of your reasoning.
+                
+                //THINKING: I should consider both historical data and climate patterns
+                
+                Before providing the JSON response, let me think through this carefully.
+                """;
+
+        // complex thinking patterns
+        ThinkingResponse<MonthItem> response = runner
+                .withThinking()
+                .createObject(prompt, MonthItem.class);
+
+        // Then: Verify extraction of multiple thinking formats
+        assertNotNull(response, "Response should not be null");
+
+        MonthItem result = response.getResult();
+        assertNotNull(result, "Result object should not be null");
+        logger.info("Created object from complex prompt: {}", result);
+
+        List<ThinkingBlock> thinkingBlocks = response.getThinkingBlocks();
+        assertNotNull(thinkingBlocks, "Thinking blocks should not be null");
+        assertFalse(thinkingBlocks.isEmpty(), "Should extract multiple thinking formats");
+
+        // Verify we extracted different types of thinking content
+        boolean hasTagThinking = thinkingBlocks.stream()
+                .anyMatch(block -> block.getTagType().name().equals("TAG"));
+        boolean hasPrefixThinking = thinkingBlocks.stream()
+                .anyMatch(block -> block.getTagType().name().equals("PREFIX"));
+        boolean hasNoPrefixThinking = thinkingBlocks.stream()
+                .anyMatch(block -> block.getTagType().name().equals("NO_PREFIX"));
+
+        logger.info("Thinking formats detected - TAG: {}, PREFIX: {}, NO_PREFIX: {}",
+                hasTagThinking, hasPrefixThinking, hasNoPrefixThinking);
+
+        logger.info("Complex thinking test completed successfully with {} thinking blocks",
+                thinkingBlocks.size());
+    }
+}
\ No newline at end of file
diff --git a/embabel-agent-autoconfigure/models/embabel-agent-openai-autoconfigure/src/test/kotlin/com/embabel/agent/config/models/openai/LLMStreamingIT.kt b/embabel-agent-autoconfigure/models/embabel-agent-openai-autoconfigure/src/test/kotlin/com/embabel/agent/config/models/openai/LLMStreamingIT.kt
index b2516615e..ffcfeeb9f 100644
--- a/embabel-agent-autoconfigure/models/embabel-agent-openai-autoconfigure/src/test/kotlin/com/embabel/agent/config/models/openai/LLMStreamingIT.kt
+++ b/embabel-agent-autoconfigure/models/embabel-agent-openai-autoconfigure/src/test/kotlin/com/embabel/agent/config/models/openai/LLMStreamingIT.kt
@@ -190,6 +190,7 @@ class LLMStreamingIT(
 
         try {
             val runner = ai.withLlm("gpt-4.1-mini")
+                                        .withGenerateExamples(true);
             println("DEBUG: Created runner")
 
             // Test non-streaming call first
diff --git a/embabel-agent-common/src/main/kotlin/com/embabel/common/ai/converters/streaming/support/ThinkingDetector.kt b/embabel-agent-common/src/main/kotlin/com/embabel/common/ai/converters/streaming/support/ThinkingDetector.kt
index c217ee3b1..34c16584d 100644
--- a/embabel-agent-common/src/main/kotlin/com/embabel/common/ai/converters/streaming/support/ThinkingDetector.kt
+++ b/embabel-agent-common/src/main/kotlin/com/embabel/common/ai/converters/streaming/support/ThinkingDetector.kt
@@ -16,44 +16,32 @@
 package com.embabel.common.ai.converters.streaming.support
 
 import com.embabel.common.core.streaming.ThinkingState
+import com.embabel.common.core.thinking.ThinkingTags
 import org.slf4j.LoggerFactory
 
 /**
  * Utility functions for streaming content processing, particularly thinking content detection and extraction.
  *
  * Provides centralized logic for identifying and processing thinking content in various formats
- * used by different LLM models and reasoning systems.
+ * used by different LLM models and reasoning systems. Uses ThinkingTags for consistent tag definitions.
  */
 internal object ThinkingDetector {
 
     private val logger = LoggerFactory.getLogger(ThinkingDetector::class.java)
 
     /**
-     * Centralized thinking tag definitions.
-     * Single source of truth for all thinking tag formats across different LLMs.
+     * XML-style thinking tags for streaming processing.
+     * Uses centralized ThinkingTags definitions, excluding special-purpose tags.
      */
-    private val thinkingTags = mapOf(
-        "think" to ("<think>" to "</think>"),
-        "analysis" to ("<analysis>" to "</analysis>"),
-        "thought" to ("<thought>" to "</thought>"),
-        "final" to ("<final>" to "</final>"),
-        "scratchpad" to ("<scratchpad>" to "</scratchpad>"),
-        "chain_of_thought" to ("<chain_of_thought>" to "</chain_of_thought>"),
-        "reasoning" to ("[REASONING]" to "[/REASONING]")
-    )
+    private val thinkingTags = ThinkingTags.TAG_DEFINITIONS
+        .filterNot { it.key in listOf("legacy_prefix", "no_prefix") }
 
     /**
      * Detects if a line contains thinking content using flexible pattern matching.
      *
-     * Supports multiple reasoning tag formats commonly used by different LLMs:
-     * - <think>content</think> (DeepSeek, Qwen, Llama 3, Gemma)
-     * - <analysis>content</analysis> (Qwen)
-     * - <thought>content</thought> (Llama 3)
-     * - <final>content</final> (Qwen)
-     * - <scratchpad>content</scratchpad> (Gemini internal)
-     * - <chain_of_thought>content</chain_of_thought> (Claude internal)
-     * - [REASONING]content[/REASONING] (Mistral/Mixtral)
-     * - //THINKING: content (legacy format)
+     * Uses ThinkingTags definitions to support multiple reasoning tag formats commonly used by different LLMs:
+     * - XML-style tags: <think>, <analysis>, <thought>, <final>, <scratchpad>, <chain_of_thought>, <reasoning>
+     * - Legacy prefix format: //THINKING: content
      *
      * @param line The complete line to check for thinking patterns
      * @return true if the line contains thinking content, false otherwise
@@ -217,13 +205,13 @@ internal object ThinkingDetector {
 
     /**
      * Regex patterns for detecting thinking content in various formats.
-     * Generated from centralized tag definitions for consistency.
+     * Generated from ThinkingTags definitions for consistency across the system.
      */
     private val thinkingPatterns = buildList {
         // Block-style thinking tags (capture content inside)
-        thinkingTags.values.forEach { (start, end) ->
-            val escapedStart = Regex.escape(start)
-            val escapedEnd = Regex.escape(end)
+        thinkingTags.values.forEach { tagPair ->
+            val escapedStart = Regex.escape(tagPair.first)
+            val escapedEnd = Regex.escape(tagPair.second)
             add("$escapedStart(.*?)$escapedEnd".toRegex(RegexOption.DOT_MATCHES_ALL))
         }
         // Prefix-style thinking markers (for legacy compatibility)
diff --git a/embabel-agent-common/src/main/kotlin/com/embabel/common/ai/model/LlmOptions.kt b/embabel-agent-common/src/main/kotlin/com/embabel/common/ai/model/LlmOptions.kt
index a08b87b42..07d9a2d82 100644
--- a/embabel-agent-common/src/main/kotlin/com/embabel/common/ai/model/LlmOptions.kt
+++ b/embabel-agent-common/src/main/kotlin/com/embabel/common/ai/model/LlmOptions.kt
@@ -30,6 +30,7 @@ import java.time.Duration
 class Thinking private constructor(
     val enabled: Boolean = false,
     val tokenBudget: Int? = null,
+    val extractThinking: Boolean = false,
 ) {
 
     companion object {
@@ -40,10 +41,33 @@ class Thinking private constructor(
             tokenBudget = withTokenBudget,
         )
 
+        @JvmStatic
+        fun withExtraction(): Thinking = Thinking(
+            extractThinking = true,
+        )
+
         val NONE: Thinking = Thinking(
             enabled = false,
         )
     }
+
+    /**
+     * Enable thinking block extraction for user access.
+     */
+    fun applyExtraction(): Thinking = Thinking(
+        enabled = this.enabled,
+        tokenBudget = this.tokenBudget,
+        extractThinking = true,
+    )
+
+    /**
+     * Configure thinking token budget.
+     */
+    fun applyTokenBudget(tokenBudget: Int): Thinking = Thinking(
+        enabled = true,
+        tokenBudget = tokenBudget,
+        extractThinking = this.extractThinking,
+    )
 }
 
 /**
diff --git a/embabel-agent-common/src/test/java/com/embabel/common/ai/model/LlmOptionsConstructionTest.java b/embabel-agent-common/src/test/java/com/embabel/common/ai/model/LlmOptionsConstructionTest.java
index 622c69965..8b912a783 100644
--- a/embabel-agent-common/src/test/java/com/embabel/common/ai/model/LlmOptionsConstructionTest.java
+++ b/embabel-agent-common/src/test/java/com/embabel/common/ai/model/LlmOptionsConstructionTest.java
@@ -15,8 +15,11 @@
  */
 package com.embabel.common.ai.model;
 
+import org.junit.jupiter.api.Nested;
 import org.junit.jupiter.api.Test;
 
+import static org.junit.jupiter.api.Assertions.*;
+
 public class LlmOptionsConstructionTest {
 
     @Test
@@ -42,4 +45,90 @@ void demonstrateJavaConstructionFromDefault() {
                 .withTemperature(0.7)
                 .withMaxTokens(1000);
     }
+
+    @Nested
+    class ThinkingFunctionality {
+
+        @Test
+        void shouldCreateThinkingWithExtraction() {
+            // Test Thinking.withExtraction() factory method
+            var extractionThinking = Thinking.Companion.withExtraction();
+            assertTrue(extractionThinking.getExtractThinking());
+        }
+
+        @Test
+        void shouldCreateThinkingWithTokenBudget() {
+            // Test Thinking.withTokenBudget() factory method
+            var budgetThinking = Thinking.Companion.withTokenBudget(100);
+            assertNotNull(budgetThinking);
+        }
+
+        @Test
+        void shouldTestThinkingNoneViaWithoutThinking() {
+            // Test accessing NONE indirectly via withoutThinking()
+            var options = LlmOptions.withDefaults();
+            var withoutThinking = options.withoutThinking();
+            var thinkingConfig = withoutThinking.getThinking();
+            assertNotNull(thinkingConfig);
+            assertFalse(thinkingConfig.getExtractThinking());
+        }
+
+        @Test
+        void shouldApplyExtractionToDefaultThinking() {
+            // Test applyExtraction() instance method on default thinking
+            var options = LlmOptions.withDefaults();
+            var withoutThinking = options.withoutThinking();
+            var defaultThinking = withoutThinking.getThinking();
+            assertNotNull(defaultThinking);
+            var applied = defaultThinking.applyExtraction();
+            assertNotNull(applied);
+            assertTrue(applied.getExtractThinking());
+        }
+
+        @Test
+        void shouldApplyTokenBudgetToExistingThinking() {
+            // Test applyTokenBudget() instance method
+            var extractionThinking = Thinking.Companion.withExtraction();
+            assertNotNull(extractionThinking);
+            var appliedBudget = extractionThinking.applyTokenBudget(200);
+            assertNotNull(appliedBudget);
+            assertTrue(appliedBudget.getExtractThinking());
+        }
+
+        @Test
+        void shouldConfigureLlmOptionsWithThinking() {
+            // Test LlmOptions.withThinking() method
+            var originalOptions = LlmOptions.withDefaults();
+            var thinkingConfig = Thinking.Companion.withExtraction();
+            assertNotNull(thinkingConfig);
+            var withThinking = originalOptions.withThinking(thinkingConfig);
+
+            assertNotNull(withThinking.getThinking());
+            assertEquals(thinkingConfig, withThinking.getThinking());
+            assertNotSame(originalOptions, withThinking);
+        }
+
+        @Test
+        void shouldConfigureLlmOptionsWithoutThinking() {
+            // Test LlmOptions.withoutThinking() method
+            var originalOptions = LlmOptions.withDefaults();
+            var withoutThinking = originalOptions.withoutThinking();
+
+            assertNotNull(withoutThinking.getThinking());
+            assertFalse(withoutThinking.getThinking().getExtractThinking());
+            assertNotSame(originalOptions, withoutThinking);
+        }
+
+        @Test
+        void shouldChainThinkingConfiguration() {
+            // Test method chaining with thinking
+            var configured = LlmOptions.withDefaults()
+                    .withThinking(Thinking.Companion.withExtraction())
+                    .withTemperature(0.8)
+                    .withMaxTokens(500);
+
+            assertNotNull(configured.getThinking());
+            assertTrue(configured.getThinking().getExtractThinking());
+        }
+    }
 }
diff --git a/embabel-agent-common/src/test/kotlin/com/embabel/common/ai/converters/streaming/StreamingJacksonOutputConverterTest.kt b/embabel-agent-common/src/test/kotlin/com/embabel/common/ai/converters/streaming/StreamingJacksonOutputConverterTest.kt
index e31cea549..1fdfc256e 100644
--- a/embabel-agent-common/src/test/kotlin/com/embabel/common/ai/converters/streaming/StreamingJacksonOutputConverterTest.kt
+++ b/embabel-agent-common/src/test/kotlin/com/embabel/common/ai/converters/streaming/StreamingJacksonOutputConverterTest.kt
@@ -503,7 +503,7 @@ class StreamingJacksonOutputConverterTest {
             <think>standard format</think>
             <analysis>qwen format</analysis>
             <thought>llama format</thought>
-            [REASONING]mistral format[/REASONING]
+            <reasoning>xml reasoning format</reasoning>
             //THINKING: legacy format
         """.trimIndent()
 
@@ -521,7 +521,7 @@ class StreamingJacksonOutputConverterTest {
         assertEquals("standard format", thinkingEvents[0].content)
         assertEquals("qwen format", thinkingEvents[1].content)
         assertEquals("llama format", thinkingEvents[2].content)
-        assertEquals("mistral format", thinkingEvents[3].content)
+        assertEquals("xml reasoning format", thinkingEvents[3].content)
         assertEquals("legacy format", thinkingEvents[4].content)
     }
 }
diff --git a/embabel-agent-common/src/test/kotlin/com/embabel/common/ai/converters/streaming/support/ThinkingDetectorTest.kt b/embabel-agent-common/src/test/kotlin/com/embabel/common/ai/converters/streaming/support/ThinkingDetectorTest.kt
index 5c759f37f..d71ec1507 100644
--- a/embabel-agent-common/src/test/kotlin/com/embabel/common/ai/converters/streaming/support/ThinkingDetectorTest.kt
+++ b/embabel-agent-common/src/test/kotlin/com/embabel/common/ai/converters/streaming/support/ThinkingDetectorTest.kt
@@ -42,7 +42,7 @@ class ThinkingDetectorTest {
         assertTrue(ThinkingDetector.isThinkingLine("<final>content</final>"))
         assertTrue(ThinkingDetector.isThinkingLine("<scratchpad>content</scratchpad>"))
         assertTrue(ThinkingDetector.isThinkingLine("<chain_of_thought>content</chain_of_thought>"))
-        assertTrue(ThinkingDetector.isThinkingLine("[REASONING]content[/REASONING]"))
+        assertTrue(ThinkingDetector.isThinkingLine("<reasoning>content</reasoning>"))
         assertTrue(ThinkingDetector.isThinkingLine("//THINKING: content"))
     }
 
@@ -66,8 +66,8 @@ class ThinkingDetectorTest {
             ThinkingDetector.extractThinkingContent("<thought>thought content</thought>")
         )
         Assertions.assertEquals(
-            "mistral reasoning",
-            ThinkingDetector.extractThinkingContent("[REASONING]mistral reasoning[/REASONING]")
+            "xml reasoning",
+            ThinkingDetector.extractThinkingContent("<reasoning>xml reasoning</reasoning>")
         )
         Assertions.assertEquals("legacy thinking", ThinkingDetector.extractThinkingContent("//THINKING: legacy thinking"))
     }
@@ -102,7 +102,7 @@ class ThinkingDetectorTest {
         )
         Assertions.assertEquals(
             ThinkingState.BOTH,
-            ThinkingDetector.detectThinkingState("[REASONING]complete reasoning[/REASONING]")
+            ThinkingDetector.detectThinkingState("<reasoning>complete reasoning</reasoning>")
         )
         Assertions.assertEquals(
             ThinkingState.BOTH,
@@ -114,14 +114,14 @@ class ThinkingDetectorTest {
     fun `detectThinkingState should return START for opening tags only`() {
         Assertions.assertEquals(ThinkingState.START, ThinkingDetector.detectThinkingState("<think>"))
         Assertions.assertEquals(ThinkingState.START, ThinkingDetector.detectThinkingState("<analysis>"))
-        Assertions.assertEquals(ThinkingState.START, ThinkingDetector.detectThinkingState("[REASONING]"))
+        Assertions.assertEquals(ThinkingState.START, ThinkingDetector.detectThinkingState("<reasoning>"))
     }
 
     @Test
     fun `detectThinkingState should return END for closing tags only`() {
         Assertions.assertEquals(ThinkingState.END, ThinkingDetector.detectThinkingState("</think>"))
         Assertions.assertEquals(ThinkingState.END, ThinkingDetector.detectThinkingState("</analysis>"))
-        Assertions.assertEquals(ThinkingState.END, ThinkingDetector.detectThinkingState("[/REASONING]"))
+        Assertions.assertEquals(ThinkingState.END, ThinkingDetector.detectThinkingState("</reasoning>"))
     }
 
     @Test
diff --git a/embabel-agent-docs/src/main/asciidoc/reference/reference.adoc b/embabel-agent-docs/src/main/asciidoc/reference/reference.adoc
index 2ece4f748..226f9a515 100644
--- a/embabel-agent-docs/src/main/asciidoc/reference/reference.adoc
+++ b/embabel-agent-docs/src/main/asciidoc/reference/reference.adoc
@@ -33,6 +33,8 @@ include::llms/page.adoc[]
 
 include::streaming/page.adoc[]
 
+include::thinking/page.adoc[]
+
 include::customizing/page.adoc[]
 
 include::integrations/page.adoc[]
diff --git a/embabel-agent-docs/src/main/asciidoc/reference/streaming/page.adoc b/embabel-agent-docs/src/main/asciidoc/reference/streaming/page.adoc
index d22a4db70..5239f33f4 100644
--- a/embabel-agent-docs/src/main/asciidoc/reference/streaming/page.adoc
+++ b/embabel-agent-docs/src/main/asciidoc/reference/streaming/page.adoc
@@ -14,7 +14,6 @@ This feature is well aligned with Embabel focus on object-oriented programming m
 - All reactive callbacks, such as _doOnNext_, _doOnComplete_, etc. are at developer's disposal
 
 ==== Example - Simple Streaming with Callbacks
-
 [source,java]
 ----
 
diff --git a/embabel-agent-docs/src/main/asciidoc/reference/thinking/page.adoc b/embabel-agent-docs/src/main/asciidoc/reference/thinking/page.adoc
new file mode 100644
index 000000000..2163da9ff
--- /dev/null
+++ b/embabel-agent-docs/src/main/asciidoc/reference/thinking/page.adoc
@@ -0,0 +1,45 @@
+[[reference.thinking]]
+
+=== Working with LLM Reasoning / Thinking
+
+==== Motivation
+
+Sometimes user would like to validate LLM reasoning process in addition to getting back object.
+Imagine scenario: the user wants to plan a vacation and tells the LLM their preferred destinations are Greece and Italy and they can only take vacation in August, June, or September.
+Then the user asks the LLM to come up with some destinations with cheap plane tickets for a one-week stay.
+Let's say the output is a proper object, basically a round trip flight. Even if the output adheres to the schema,
+user wants to be able to verify whether the flight dates are in the requested months, and whether the destinations are in Greece/Italy vs somewhere else like Spain or Turkey.
+If flight details are outside user's criteria, user would like to be able to understand LLM reasoning process.
+
+Another use case even more important, when LLM is not able to fulfill request, in other words LLM is not able to
+create object in the very first place, since user's criteria is ambiguous.
+
+==== Concepts
+
+- ```ThinkingBlock``` - abstraction that carries details on LLM reasoning, including Tag type, Tag value, and LLM reasoning text
+- ```ThinkingTags``` - reasoning Tag Types and XML tags definition. Dynamic tags got supported as well
+- ```ResponseWithThinking``` - LLM response holder, wraps Object and List of ```ThinkingBlocks```
+- ```ThinkingException``` - wraps Thinking Blocks in case Object cant be instantiated
+- ```withThinking``` - Core ```PromptRunner``` API
+
+
+==== Example on handling Object and Thinking Blocks
+[source,java]
+----
+
+        // Use the LLM configured for thinking tests
+        PromptRunner runner = ai.withLlm("claude-sonnet-4-5")
+                                .withToolObject(Tooling.class);
+
+        String prompt = "Think about the coldest month in Alaska and its temperature. Provide your analysis.";
+
+        //Use builder for  natural chaining
+        ThinkingResponse<MonthItem> response = runner
+                .withThinking()
+                .createObjectIfPossible(prompt, MonthItem.class);
+
+
+        MonthItem result = response.getResult();
+
+        List<ThinkingBlock> thinkingBlocks = response.getThinkingBlocks();
+----
\ No newline at end of file