diff --git a/Sources/ChatGPTSwift/ChatGPTAPI.swift b/Sources/ChatGPTSwift/ChatGPTAPI.swift index cb10bc7..7aa1bff 100644 --- a/Sources/ChatGPTSwift/ChatGPTAPI.swift +++ b/Sources/ChatGPTSwift/ChatGPTAPI.swift @@ -120,7 +120,7 @@ public class ChatGPTAPI: @unchecked Sendable { responseFormat: Components.Schemas.CreateChatCompletionRequest.response_formatPayload? = nil, stop: Components.Schemas.CreateChatCompletionRequest.stopPayload? = nil, - imageData: Data? = nil + images: [ImageContent]? = nil ) async throws -> AsyncMapSequence< AsyncThrowingPrefixWhileSequence< AsyncThrowingMapSequence< @@ -131,7 +131,7 @@ public class ChatGPTAPI: @unchecked Sendable { > >, String > { - try await sendMessageStreamInternal(text: text, model: .init(value1: model, value2: nil), systemText: systemText, temperature: temperature, maxTokens: maxTokens, responseFormat: responseFormat, stop: stop, imageData: imageData) + try await sendMessageStreamInternal(text: text, model: .init(value1: model, value2: nil), systemText: systemText, temperature: temperature, maxTokens: maxTokens, responseFormat: responseFormat, stop: stop, images: images) } public func sendMessageStream( @@ -143,7 +143,7 @@ public class ChatGPTAPI: @unchecked Sendable { responseFormat: Components.Schemas.CreateChatCompletionRequest.response_formatPayload? = nil, stop: Components.Schemas.CreateChatCompletionRequest.stopPayload? = nil, - imageData: Data? = nil + images: [ImageContent]? = nil ) async throws -> AsyncMapSequence< AsyncThrowingPrefixWhileSequence< AsyncThrowingMapSequence< @@ -154,7 +154,7 @@ public class ChatGPTAPI: @unchecked Sendable { > >, String > { - try await sendMessageStreamInternal(text: text, model: .init(value1: nil, value2: model), systemText: systemText, temperature: temperature, maxTokens: maxTokens, responseFormat: responseFormat, stop: stop, imageData: imageData) + try await sendMessageStreamInternal(text: text, model: .init(value1: nil, value2: model), systemText: systemText, temperature: temperature, maxTokens: maxTokens, responseFormat: responseFormat, stop: stop, images: images) } private func sendMessageStreamInternal( @@ -166,7 +166,7 @@ public class ChatGPTAPI: @unchecked Sendable { responseFormat: Components.Schemas.CreateChatCompletionRequest.response_formatPayload? = nil, stop: Components.Schemas.CreateChatCompletionRequest.stopPayload? = nil, - imageData: Data? = nil + images: [ImageContent]? = nil ) async throws -> AsyncMapSequence< AsyncThrowingPrefixWhileSequence< AsyncThrowingMapSequence< @@ -178,8 +178,8 @@ public class ChatGPTAPI: @unchecked Sendable { >, String > { var messages = generateInternalMessages(from: text, systemText: systemText) - if let imageData { - messages.append(createMessage(imageData: imageData)) + if let images = images, !images.isEmpty { + messages.append(createMessage(images: images, text: text)) } let response = try await client.createChatCompletion( @@ -234,9 +234,9 @@ public class ChatGPTAPI: @unchecked Sendable { responseFormat: Components.Schemas.CreateChatCompletionRequest.response_formatPayload? = nil, stop: Components.Schemas.CreateChatCompletionRequest.stopPayload? = nil, - imageData: Data? = nil + images: [ImageContent]? = nil ) async throws -> String { - try await sendMessageInternal(text: text, model: .init(value1: model, value2: nil), systemText: systemText, temperature: temperature, maxTokens: maxTokens, responseFormat: responseFormat, stop: stop, imageData: imageData) + try await sendMessageInternal(text: text, model: .init(value1: model, value2: nil), systemText: systemText, temperature: temperature, maxTokens: maxTokens, responseFormat: responseFormat, stop: stop, images: images) } public func sendMessage( @@ -248,9 +248,9 @@ public class ChatGPTAPI: @unchecked Sendable { responseFormat: Components.Schemas.CreateChatCompletionRequest.response_formatPayload? = nil, stop: Components.Schemas.CreateChatCompletionRequest.stopPayload? = nil, - imageData: Data? = nil + images: [ImageContent]? = nil ) async throws -> String { - try await sendMessageInternal(text: text, model: .init(value1: nil, value2: model), systemText: systemText, temperature: temperature, maxTokens: maxTokens, responseFormat: responseFormat, stop: stop, imageData: imageData) + try await sendMessageInternal(text: text, model: .init(value1: nil, value2: model), systemText: systemText, temperature: temperature, maxTokens: maxTokens, responseFormat: responseFormat, stop: stop, images: images) } private func sendMessageInternal( @@ -262,11 +262,11 @@ public class ChatGPTAPI: @unchecked Sendable { responseFormat: Components.Schemas.CreateChatCompletionRequest.response_formatPayload? = nil, stop: Components.Schemas.CreateChatCompletionRequest.stopPayload? = nil, - imageData: Data? = nil + images: [ImageContent]? = nil ) async throws -> String { var messages = generateInternalMessages(from: text, systemText: systemText) - if let imageData { - messages.append(createMessage(imageData: imageData)) + if let images = images, !images.isEmpty { + messages.append(createMessage(images: images, text: text)) } let response = try await client.createChatCompletion( @@ -302,9 +302,9 @@ public class ChatGPTAPI: @unchecked Sendable { stop: Components.Schemas.CreateChatCompletionRequest.stopPayload? = nil, systemText: String = "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.", - imageData: Data? = nil + images: [ImageContent]? = nil ) async throws -> ChatCompletionResponseMessage { - try await callFunctionInternal(prompt: prompt, tools: tools, model: .init(value1: nil, value2: model), maxTokens: maxTokens, responseFormat: responseFormat, stop: stop, systemText: systemText, imageData: imageData) + try await callFunctionInternal(prompt: prompt, tools: tools, model: .init(value1: nil, value2: model), maxTokens: maxTokens, responseFormat: responseFormat, stop: stop, systemText: systemText, images: images) } public func callFunction( @@ -317,9 +317,9 @@ public class ChatGPTAPI: @unchecked Sendable { stop: Components.Schemas.CreateChatCompletionRequest.stopPayload? = nil, systemText: String = "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.", - imageData: Data? = nil + images: [ImageContent]? = nil ) async throws -> ChatCompletionResponseMessage { - try await callFunctionInternal(prompt: prompt, tools: tools, model: .init(value1: model, value2: nil), maxTokens: maxTokens, responseFormat: responseFormat, stop: stop, systemText: systemText, imageData: imageData) + try await callFunctionInternal(prompt: prompt, tools: tools, model: .init(value1: model, value2: nil), maxTokens: maxTokens, responseFormat: responseFormat, stop: stop, systemText: systemText, images: images) } private func callFunctionInternal( @@ -332,11 +332,11 @@ public class ChatGPTAPI: @unchecked Sendable { stop: Components.Schemas.CreateChatCompletionRequest.stopPayload? = nil, systemText: String = "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.", - imageData: Data? = nil + images: [ImageContent]? = nil ) async throws -> ChatCompletionResponseMessage { var messages = generateInternalMessages(from: prompt, systemText: systemText) - if let imageData { - messages.append(createMessage(imageData: imageData)) + if let images = images, !images.isEmpty { + messages.append(createMessage(images: images, text: prompt)) } let response = try await client.createChatCompletion( @@ -527,20 +527,51 @@ public class ChatGPTAPI: @unchecked Sendable { return error } - func createMessage(imageData: Data) -> Components.Schemas.ChatCompletionRequestMessage { - .ChatCompletionRequestUserMessage( - .init( - content: .case2([ - .ChatCompletionRequestMessageContentPartImage( - .init( - _type: .image_url, - image_url: - .init( - url: - "data:image/jpeg;base64,\(imageData.base64EncodedString())", - detail: .auto))) - ]), - role: .user)) + func createMessage(images: [ImageContent], text: String) -> Components.Schemas.ChatCompletionRequestMessage { + var contentParts: [Components.Schemas.ChatCompletionRequestMessageContentPart] = [] + + // Add text content if provided + if !text.isEmpty { + contentParts.append(.ChatCompletionRequestMessageContentPartText(.init(_type: .text, text: text))) + } + + // Add image content parts + for image in images { + let url: String + let detail: Components.Schemas.ChatCompletionRequestMessageContentPartImage.image_urlPayload.detailPayload + + switch image { + case .url(let imageUrl, let imageDetail): + url = imageUrl + detail = imageDetail.map { detail in + switch detail { + case .auto: return .auto + case .low: return .low + case .high: return .high + } + } ?? .auto + case .base64(let data, let imageDetail): + url = "data:image/jpeg;base64,\(data.base64EncodedString())" + detail = imageDetail.map { detail in + switch detail { + case .auto: return .auto + case .low: return .low + case .high: return .high + } + } ?? .auto + } + + contentParts.append(.ChatCompletionRequestMessageContentPartImage( + .init( + _type: .image_url, + image_url: .init(url: url, detail: detail) + ) + )) + } + + return .ChatCompletionRequestUserMessage( + .init(content: .case2(contentParts), role: .user) + ) } } diff --git a/Sources/ChatGPTSwift/Models.swift b/Sources/ChatGPTSwift/Models.swift index 4e9f271..95b8373 100644 --- a/Sources/ChatGPTSwift/Models.swift +++ b/Sources/ChatGPTSwift/Models.swift @@ -10,13 +10,28 @@ import Foundation public struct Message: Codable { public let role: String public let content: String - + public init(role: String, content: String) { self.role = role self.content = content } } +/// Represents image content that can be either a URL or base64-encoded data +public enum ImageContent { + /// Image from a URL + case url(String, detail: ImageDetail? = nil) + /// Image from base64-encoded data + case base64(Data, detail: ImageDetail? = nil) + + /// Image detail level for vision models + public enum ImageDetail: String, Codable { + case auto = "auto" + case low = "low" + case high = "high" + } +} + extension Array where Element == Message { var contentCount: Int { map { $0.content }.count } diff --git a/Sources/SampleApp/main.swift b/Sources/SampleApp/main.swift index 787fc49..d76e5c7 100644 --- a/Sources/SampleApp/main.swift +++ b/Sources/SampleApp/main.swift @@ -2,22 +2,87 @@ import ChatGPTSwift import Foundation let api = ChatGPTAPI(apiKey: "apikey") -let prompt = "what is openai?" + +// Example 1: Text-only message +print("=== Example 1: Text-only message ===") +let textPrompt = "What is OpenAI?" Task { do { - let stream = try await api.sendMessageStream(text: prompt) + let stream = try await api.sendMessageStream(text: textPrompt, model: .gpt_hyphen_4o) var responseText = "" for try await line in stream { responseText += line - print(line) + print(line, terminator: "") } - api.appendToHistoryList(userText: prompt, responseText: responseText) - print(responseText) - exit(0) + api.appendToHistoryList(userText: textPrompt, responseText: responseText) + print("\nResponse: \(responseText)") } catch { - print(error.localizedDescription) + print("Error: \(error.localizedDescription)") + } + + // Example 2: Message with base64-encoded image + print("\n=== Example 2: Message with base64-encoded image ===") + + // For demo purposes, we'll create a small dummy image data + // In real usage, you would load actual image data + let dummyImageData = Data("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==".utf8) + + do { + let imagePrompt = "What's in this image?" + let images: [ImageContent] = [.base64(dummyImageData, detail: .auto)] + let stream = try await api.sendMessageStream(text: imagePrompt, model: .gpt_hyphen_4o, images: images) + var responseText = "" + for try await line in stream { + responseText += line + print(line, terminator: "") + } + api.appendToHistoryList(userText: imagePrompt, responseText: responseText) + print("\nResponse: \(responseText)") + } catch { + print("Error: \(error.localizedDescription)") } -} + // Example 3: Message with image URL + print("\n=== Example 3: Message with image URL ===") + + do { + let imagePrompt = "Describe this image in detail." + let imageUrl = "https://example.com/image.jpg" // Replace with actual image URL + let images: [ImageContent] = [.url(imageUrl, detail: .high)] + let stream = try await api.sendMessageStream(text: imagePrompt, model: .gpt_hyphen_4o, images: images) + var responseText = "" + for try await line in stream { + responseText += line + print(line, terminator: "") + } + api.appendToHistoryList(userText: imagePrompt, responseText: responseText) + print("\nResponse: \(responseText)") + } catch { + print("Error: \(error.localizedDescription)") + } + + // Example 4: Message with multiple images + print("\n=== Example 4: Message with multiple images ===") + + do { + let multiImagePrompt = "Compare these two images." + let images: [ImageContent] = [ + .url("https://example.com/image1.jpg", detail: .high), + .base64(dummyImageData, detail: .low) + ] + let stream = try await api.sendMessageStream(text: multiImagePrompt, model: .gpt_hyphen_4o, images: images) + var responseText = "" + for try await line in stream { + responseText += line + print(line, terminator: "") + } + api.appendToHistoryList(userText: multiImagePrompt, responseText: responseText) + print("\nResponse: \(responseText)") + } catch { + print("Error: \(error.localizedDescription)") + } + + exit(0) +} RunLoop.main.run(until: .distantFuture) \ No newline at end of file