Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package com.embabel.agent.api.common
import com.embabel.agent.api.annotation.support.AgenticInfo
import com.embabel.agent.api.common.nested.ObjectCreator
import com.embabel.agent.api.common.nested.TemplateOperations
import com.embabel.agent.api.common.thinking.ThinkingPromptRunnerOperations
import com.embabel.agent.api.tool.Tool
import com.embabel.agent.core.Agent
import com.embabel.agent.core.AgentPlatform
Expand All @@ -27,6 +28,7 @@ import com.embabel.agent.spi.LlmUse
import com.embabel.chat.AssistantMessage
import com.embabel.chat.Message
import com.embabel.common.ai.model.LlmOptions
import com.embabel.common.ai.model.Thinking
import com.embabel.common.ai.prompt.PromptContributor
import com.embabel.common.ai.prompt.PromptElement
import com.embabel.common.core.streaming.StreamingCapability
Expand Down Expand Up @@ -389,6 +391,59 @@ interface PromptRunner : LlmUse, PromptRunnerOperations {
)
}

/**
* Check if thinking extraction capabilities are supported by the underlying implementation.
*
* Thinking capabilities allow extraction of thinking blocks (like `<think>...</think>`)
* from LLM responses and provide access to both the result and the extracted thinking content.
* Always check this before calling thinking() to avoid exceptions.
*
* Note: Thinking and streaming capabilities are mutually exclusive.
*
* @return true if thinking extraction is supported, false if thinking is not available
*/
fun supportsThinking(): Boolean = false


/**
* Create a thinking-enhanced version of this prompt runner.
*
* Returns a PromptRunner where all operations (createObject, generateText, etc.)
* return ThinkingResponse<T> wrappers that include both results and extracted
* thinking blocks from the LLM response.
*
* Always check supportsThinking() first and ensure LlmOptions includes thinking configuration
* via withLlm(LlmOptions.withThinking(Thinking.withExtraction())).
*
* Note: Thinking and streaming capabilities are mutually exclusive.
*
* @return ThinkingCapability instance providing access to thinking-aware operations
* @throws UnsupportedOperationException if thinking is not supported by this implementation
* @throws IllegalArgumentException if thinking is not enabled in LlmOptions configuration
*/
fun withThinking(): ThinkingPromptRunnerOperations {
if (!supportsThinking()) {
throw UnsupportedOperationException(
"""
Thinking not supported by this PromptRunner implementation.
Check supportsThinking() before calling withThinking().
""".trimIndent()
)
}

val thinking = llm?.thinking
require(thinking != null && thinking != Thinking.NONE) {
"""
Thinking capability requires thinking to be enabled in LlmOptions.
Use withLlm(LlmOptions.withThinking(Thinking.withExtraction()))
""".trimIndent()
}

// For implementations that support thinking but haven't overridden withThinking(),
// they should provide their own implementation
error("Implementation error: supportsThinking() returned true but withThinking() not overridden")
}

override fun respond(
messages: List<Message>,
): AssistantMessage =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import com.embabel.agent.api.common.streaming.StreamingPromptRunner
import com.embabel.agent.api.common.streaming.StreamingPromptRunnerOperations
import com.embabel.agent.api.common.support.streaming.StreamingCapabilityDetector
import com.embabel.agent.api.common.support.streaming.StreamingPromptRunnerOperationsImpl
import com.embabel.agent.api.common.thinking.ThinkingPromptRunnerOperations
import com.embabel.agent.api.common.thinking.support.ThinkingPromptRunnerOperationsImpl
import com.embabel.agent.api.tool.Tool
import com.embabel.agent.core.ProcessOptions
import com.embabel.agent.core.ToolGroup
Expand All @@ -41,6 +43,7 @@ import com.embabel.chat.ImagePart
import com.embabel.chat.Message
import com.embabel.chat.UserMessage
import com.embabel.common.ai.model.LlmOptions
import com.embabel.common.ai.model.Thinking
import com.embabel.common.ai.prompt.PromptContributor
import com.embabel.common.core.types.ZeroToOne
import com.embabel.common.util.loggerFor
Expand Down Expand Up @@ -309,9 +312,11 @@ internal data class OperationContextPromptRunner(
override fun stream(): StreamingPromptRunnerOperations {
if (!supportsStreaming()) {
throw UnsupportedOperationException(
"Streaming not supported by underlying LLM model. " +
"Model type: ${context.agentPlatform().platformServices.llmOperations::class.simpleName}. " +
"Check supportsStreaming() before calling stream()."
"""
Streaming not supported by underlying LLM model.
Model type: ${context.agentPlatform().platformServices.llmOperations::class.simpleName}.
Check supportsStreaming() before calling stream().
""".trimIndent()
)
}

Expand All @@ -335,4 +340,50 @@ internal data class OperationContextPromptRunner(
action = action,
)
}

/**
* Create thinking-aware prompt operations that extract LLM reasoning blocks.
*
* This method creates ThinkingPromptRunnerOperations that can capture both the
* converted results and the reasoning content that LLMs generate during processing.
*
* @return ThinkingPromptRunnerOperations for executing prompts with thinking extraction
* @throws UnsupportedOperationException if the underlying LLM operations don't support thinking extraction
*/
override fun supportsThinking(): Boolean = true

override fun withThinking(): ThinkingPromptRunnerOperations {
val llmOperations = context.agentPlatform().platformServices.llmOperations

if (llmOperations !is ChatClientLlmOperations) {
throw UnsupportedOperationException(
"""
Thinking extraction not supported by underlying LLM operations.
Operations type: ${llmOperations::class.simpleName}.
Thinking extraction requires ChatClientLlmOperations.
""".trimIndent()
)
}
11
// Auto-enable thinking extraction when withThinking() is called
val thinkingEnabledLlm = llm.withThinking(Thinking.withExtraction())

return ThinkingPromptRunnerOperationsImpl(
chatClientOperations = llmOperations,
interaction = LlmInteraction(
llm = thinkingEnabledLlm,
toolGroups = toolGroups,
toolCallbacks = safelyGetToolCallbacks(toolObjects) + otherToolCallbacks,
promptContributors = promptContributors + contextualPromptContributors.map {
it.toPromptContributor(context)
},
id = interactionId ?: InteractionId("${context.operation.name}-thinking"),
generateExamples = generateExamples,
propertyFilter = propertyFilter,
),
messages = messages,
agentProcess = context.processContext.agentProcess,
action = action,
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
/*
* Copyright 2024-2025 Embabel Software, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.embabel.agent.api.common.thinking

import com.embabel.agent.api.common.MultimodalContent
import com.embabel.chat.AssistantMessage
import com.embabel.common.core.thinking.ThinkingResponse
import com.embabel.common.core.thinking.ThinkingCapability
import com.embabel.chat.Message
import com.embabel.common.core.types.ZeroToOne

/**
* User-facing interface for executing prompts with thinking block extraction.
*
* This interface provides thinking-aware versions of standard prompt operations,
* returning both the converted results and the reasoning content that LLMs
* generated during their processing.
*
* ## Usage
*
* Access this interface through the `withThinking()` extension:
* ```kotlin
* val result = promptRunner.withThinking().createObject("analyze this", Person::class.java)
* val person = result.result // The converted Person object
* val thinking = result.thinkingBlocks // List of reasoning blocks
* ```
*
* ## Thinking Block Extraction
*
* This interface automatically extracts thinking content in various formats:
* - Tagged thinking: `<think>reasoning here</think>`, `<analysis>content</analysis>`
* - Prefix thinking: `//THINKING: reasoning here`
* - Untagged thinking: raw text content before JSON objects
*
* ## Relationship to Regular Operations
*
* Unlike [com.embabel.agent.api.common.PromptRunnerOperations] which returns
* direct objects, all methods in this interface return [ThinkingResponse]
* wrappers that provide access to both results and reasoning.
*
* @see com.embabel.agent.api.common.PromptRunnerOperations for standard operations
* @see ThinkingResponse for the response wrapper
* @see com.embabel.common.core.thinking.ThinkingBlock for thinking content details
*/
interface ThinkingPromptRunnerOperations : ThinkingCapability {

/**
* Generate text with thinking block extraction.
*
* @param prompt The text prompt to send to the LLM
* @return Response containing both generated text and extracted thinking blocks
*/
infix fun generateText(prompt: String): ThinkingResponse<String> =
createObject(
prompt = prompt,
outputClass = String::class.java,
)

/**
* Create an object of the given type with thinking block extraction.
*
* Uses the given prompt and LLM options from context to generate a structured
* object while capturing the LLM's reasoning process.
*
* @param T The type of object to create
* @param prompt The text prompt to send to the LLM
* @param outputClass The class of the object to create
* @return Response containing both the converted object and extracted thinking blocks
*/
fun <T> createObject(
prompt: String,
outputClass: Class<T>,
): ThinkingResponse<T> = createObject(
messages = listOf(com.embabel.chat.UserMessage(prompt)),
outputClass = outputClass,
)

/**
* Try to create an object of the given type with thinking block extraction.
*
* Similar to [createObject] but designed for scenarios where the conversion
* might fail. Returns thinking blocks even when object creation fails.
*
* @param T The type of object to create
* @param prompt The text prompt to send to the LLM
* @param outputClass The class of the object to create
* @return Response with potentially null result but always available thinking blocks
*/
fun <T> createObjectIfPossible(
prompt: String,
outputClass: Class<T>,
): ThinkingResponse<T?> = createObjectIfPossible(
listOf(com.embabel.chat.UserMessage(prompt)),
outputClass
)

/**
* Try to create an object from messages with thinking block extraction.
*
* @param T The type of object to create
* @param messages The conversation messages to send to the LLM
* @param outputClass The class of the object to create
* @return Response with potentially null result but always available thinking blocks
*/
fun <T> createObjectIfPossible(
messages: List<Message>,
outputClass: Class<T>,
): ThinkingResponse<T?>

/**
* Create an object from messages with thinking block extraction.
*
* @param T The type of object to create
* @param messages The conversation messages to send to the LLM
* @param outputClass The class of the object to create
* @return Response containing both the converted object and extracted thinking blocks
*/
fun <T> createObject(
messages: List<Message>,
outputClass: Class<T>,
): ThinkingResponse<T>

/**
* Generate text from multimodal content with thinking block extraction.
*
* @param content The multimodal content (text + images) to send to the LLM
* @return Response containing both generated text and extracted thinking blocks
*/
fun generateText(content: MultimodalContent): ThinkingResponse<String> =
createObject(
content = content,
outputClass = String::class.java,
)

/**
* Create an object from multimodal content with thinking block extraction.
*
* @param T The type of object to create
* @param content The multimodal content (text + images) to send to the LLM
* @param outputClass The class of the object to create
* @return Response containing both the converted object and extracted thinking blocks
*/
fun <T> createObject(
content: MultimodalContent,
outputClass: Class<T>,
): ThinkingResponse<T> = createObject(
messages = listOf(com.embabel.chat.UserMessage(content.toContentParts())),
outputClass = outputClass,
)

/**
* Try to create an object from multimodal content with thinking block extraction.
*
* @param T The type of object to create
* @param content The multimodal content (text + images) to send to the LLM
* @param outputClass The class of the object to create
* @return Response with potentially null result but always available thinking blocks
*/
fun <T> createObjectIfPossible(
content: MultimodalContent,
outputClass: Class<T>,
): ThinkingResponse<T?> = createObjectIfPossible(
listOf(com.embabel.chat.UserMessage(content.toContentParts())),
outputClass
)

/**
* Respond in a conversation with multimodal content and thinking block extraction.
*
* @param content The multimodal content to respond to
* @return Response containing both the assistant message and extracted thinking blocks
*/
fun respond(
content: MultimodalContent,
): ThinkingResponse<AssistantMessage> = respond(
listOf(com.embabel.chat.UserMessage(content.toContentParts()))
)

/**
* Respond in a conversation with thinking block extraction.
*
* @param messages The conversation messages to respond to
* @return Response containing both the assistant message and extracted thinking blocks
*/
fun respond(
messages: List<Message>,
): ThinkingResponse<AssistantMessage>

/**
* Evaluate a condition with thinking block extraction.
*
* Evaluates a boolean condition using the LLM while capturing its reasoning process.
*
* @param condition The condition to evaluate
* @param context The context for evaluation
* @param confidenceThreshold The confidence threshold for the evaluation
* @return Response containing both the evaluation result and extracted thinking blocks
*/
fun evaluateCondition(
condition: String,
context: String,
confidenceThreshold: ZeroToOne = 0.8,
): ThinkingResponse<Boolean>
}
Loading
Loading