Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@

import org.jspecify.annotations.Nullable;

import ai.docling.serve.api.convert.request.BatchConvertDocumentRequest;
import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
import ai.docling.serve.api.task.response.TaskStatusPollResponse;
import ai.docling.serve.api.util.FileUtils;
import ai.docling.serve.api.util.ValidationUtils;

Expand Down Expand Up @@ -107,6 +109,42 @@ default CompletionStage<ConvertDocumentResponse> convertFilesAsync(@Nullable Con
return convertSourceAsync(createRequest(request, files));
}

/**
* Submits a batch conversion request for processing multiple document sources asynchronously.
*
* <p>This method posts the batch request to the server, which returns a task status containing
* a task ID. The caller can then use {@link DoclingServeTaskApi#pollTaskStatus(ai.docling.serve.api.task.request.TaskStatusPollRequest)}
* and {@link DoclingServeTaskApi#convertTaskResult(ai.docling.serve.api.task.request.TaskResultRequest)}
* to track and retrieve results.
*
* @param request the {@link BatchConvertDocumentRequest} containing the sources, target, conversion options, and optional callbacks.
* @return a {@link TaskStatusPollResponse} containing the task ID and initial status.
* @throws ai.docling.serve.api.validation.ValidationException If request validation fails for any reason.
*/
TaskStatusPollResponse convertSourceBatch(BatchConvertDocumentRequest request);
Comment thread
edeandrea marked this conversation as resolved.

/**
* Submits a batch conversion request and automatically polls for completion.
*
* <p>This method submits the batch request, polls the task status in the background,
* and completes the returned future with the conversion result when all documents
* have been processed.
*
* <p>Example usage:
* <pre>{@code
* client.convertSourceBatchAsync(request)
* .thenAccept(response -> System.out.println("Batch complete"))
* .exceptionally(ex -> { ex.printStackTrace(); return null; });
* }</pre>
*
* @param request the {@link BatchConvertDocumentRequest} containing the sources, target, conversion options, and optional callbacks.
* @return a {@link CompletionStage} that completes with the {@link ConvertDocumentResponse}
* when all documents have been processed, or completes exceptionally if the
* batch conversion fails or times out.
* @throws ai.docling.serve.api.validation.ValidationException If request validation fails for any reason.
*/
CompletionStage<ConvertDocumentResponse> convertSourceBatchAsync(BatchConvertDocumentRequest request);
Comment thread
edeandrea marked this conversation as resolved.

private ConvertDocumentRequest createRequest(@Nullable ConvertDocumentRequest request, Path... files) {
ValidationUtils.ensureNotEmpty(files, "files");

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package ai.docling.serve.api.convert.request;

import java.util.List;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonSetter;
import com.fasterxml.jackson.annotation.Nulls;

import ai.docling.serve.api.convert.request.options.ConvertDocumentOptions;
import ai.docling.serve.api.convert.request.source.Source;
import ai.docling.serve.api.convert.request.target.Target;

/**
* Represents a request to batch convert document sources. The batch endpoint processes multiple
* documents asynchronously and returns a task ID for tracking progress. Sources can be HTTP URLs
* or S3 buckets, and results are delivered to a presigned URL or S3 target.
*
* <p>This class is serialized into JSON to conform to the API specification using
* {@link JsonProperty} annotations. Fields with {@code null} values or empty collections
* are omitted from the serialized JSON using {@link JsonInclude}.
*/
@JsonInclude(JsonInclude.Include.NON_EMPTY)
@tools.jackson.databind.annotation.JsonDeserialize(builder = BatchConvertDocumentRequest.Builder.class)
@lombok.extern.jackson.Jacksonized
@lombok.Builder(toBuilder = true)
@lombok.Getter
@lombok.ToString
public class BatchConvertDocumentRequest {
/**
* List of document sources to be converted.
* Each source can be an HTTP URL or S3 reference.
*
* @param sources the list of document sources
* @return the list of document sources
*/
@JsonProperty("sources")
@JsonSetter(nulls = Nulls.AS_EMPTY)
@lombok.Singular
private List<Source> sources;

/**
* Target specification for where the converted documents should be delivered.
* Must be either a {@link ai.docling.serve.api.convert.request.target.PresignedUrlTarget}
* or {@link ai.docling.serve.api.convert.request.target.S3Target}.
*
* @param target the output target
* @return the output target
*/
@JsonProperty("target")
@lombok.NonNull
private Target target;

/**
* Options controlling the document conversion process.
* Includes settings for OCR, output formats, processing pipelines, and more.
*
* @param options the conversion options
* @return the conversion options
*/
@JsonProperty("options")
@lombok.NonNull
@lombok.Builder.Default
private ConvertDocumentOptions options = ConvertDocumentOptions.builder().build();

/**
* Webhook callbacks for receiving progress notifications during batch processing.
*
* @param callbacks the list of callback specifications
* @return the list of callback specifications
*/
@JsonProperty("callbacks")
@JsonSetter(nulls = Nulls.AS_EMPTY)
@lombok.Singular
private List<CallbackSpec> callbacks;

/**
* Builder for creating {@link BatchConvertDocumentRequest} instances.
* Generated by Lombok's {@code @Builder} annotation.
*
* <p>Builder methods:
* <ul>
* <li>{@code source(Source)} - Add a single document source</li>
* <li>{@code sources(List<Source>)} - Set the list of document sources</li>
* <li>{@code target(Target)} - Set the output target</li>
* <li>{@code options(ConvertDocumentOptions)} - Set the conversion options</li>
* <li>{@code callback(CallbackSpec)} - Add a single callback specification</li>
* <li>{@code callbacks(List<CallbackSpec>)} - Set the list of callback specifications</li>
* </ul>
*/
@tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "")
public static class Builder { }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package ai.docling.serve.api.convert.request;

import java.net.URI;
import java.util.Map;

import org.jspecify.annotations.Nullable;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonSetter;
import com.fasterxml.jackson.annotation.Nulls;

/**
* Represents a webhook callback specification for batch conversion progress notifications.
* When configured, the server sends POST requests to the specified URL with progress updates
* as documents are processed.
*/
@JsonInclude(JsonInclude.Include.NON_EMPTY)
@tools.jackson.databind.annotation.JsonDeserialize(builder = CallbackSpec.Builder.class)
@lombok.extern.jackson.Jacksonized
@lombok.Builder(toBuilder = true)
@lombok.Getter
@lombok.ToString
public class CallbackSpec {
/**
* The webhook URL that receives POST progress updates.
*
* @param url the webhook URL
* @return the webhook URL
*/
@JsonProperty("url")
@lombok.NonNull
private URI url;

/**
* Additional headers sent with callback requests.
*
* @param headers the additional headers
* @return the additional headers
*/
@JsonProperty("headers")
@JsonSetter(nulls = Nulls.AS_EMPTY)
@lombok.Singular
private Map<String, Object> headers;

/**
* Custom CA certificate (PEM) for endpoint verification.
*
* @param caCert the CA certificate in PEM format
* @return the CA certificate in PEM format
*/
@JsonProperty("ca_cert")
@Nullable
private String caCert;

@tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "")
public static class Builder { }
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,17 @@ public final class S3Source extends Source {
@lombok.Builder.Default
private boolean verifySsl = true;

/**
* Maximum number of elements to read from S3. When set, limits the number of objects processed.
* Useful for batch conversions where the S3 source may contain a large number of objects.
*
* @param maxNumElements Maximum number of elements to read from S3.
* @return Maximum number of elements to read from S3.
*/
@JsonProperty("max_num_elements")
@org.jspecify.annotations.Nullable
private Integer maxNumElements;

@tools.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "")
public static class Builder { }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package ai.docling.serve.api.serialization;

import java.io.IOException;
import java.util.List;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonDeserializer;

import ai.docling.serve.api.validation.ValidationErrorDetail;

public class Jackson2ValidationErrorDetailListDeserializer extends JsonDeserializer<List<ValidationErrorDetail>> {

@Override
public List<ValidationErrorDetail> deserialize(JsonParser p, DeserializationContext ctxt) throws IOException {
if (p.currentToken() == JsonToken.VALUE_STRING) {
return List.of(ValidationErrorDetail.builder().message(p.getText()).build());
}

var type = ctxt.getTypeFactory().constructCollectionType(List.class, ValidationErrorDetail.class);
return ctxt.readValue(p, type);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package ai.docling.serve.api.serialization;

import tools.jackson.core.JacksonException;
import tools.jackson.core.JsonParser;
import tools.jackson.core.JsonToken;
import tools.jackson.databind.DeserializationContext;
import tools.jackson.databind.ValueDeserializer;

import java.util.List;

import ai.docling.serve.api.validation.ValidationErrorDetail;

public class Jackson3ValidationErrorDetailListDeserializer extends ValueDeserializer<List<ValidationErrorDetail>> {

@Override
public List<ValidationErrorDetail> deserialize(JsonParser p, DeserializationContext ctxt) throws JacksonException {
if (p.currentToken() == JsonToken.VALUE_STRING) {
return List.of(ValidationErrorDetail.builder().message(p.getText()).build());
}

var type = ctxt.getTypeFactory().constructCollectionType(List.class, ValidationErrorDetail.class);
return ctxt.readValue(p, type);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonSetter;
import com.fasterxml.jackson.annotation.Nulls;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;

import ai.docling.serve.api.serialization.Jackson2ValidationErrorDetailListDeserializer;
import ai.docling.serve.api.serialization.Jackson3ValidationErrorDetailListDeserializer;

/**
* Represents a validation error with customizable serialization and deserialization behavior.
Expand Down Expand Up @@ -34,6 +38,8 @@ public class ValidationError {
*/
@JsonProperty("detail")
@JsonSetter(nulls = Nulls.AS_EMPTY)
@JsonDeserialize(using = Jackson2ValidationErrorDetailListDeserializer.class)
@tools.jackson.databind.annotation.JsonDeserialize(using = Jackson3ValidationErrorDetailListDeserializer.class)
@lombok.Singular
Comment thread
edeandrea marked this conversation as resolved.
private List<ValidationErrorDetail> errorDetails;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.Optional;
import java.util.concurrent.CompletionStage;
import java.util.concurrent.Flow.Subscriber;
import java.util.stream.Collectors;

import org.jspecify.annotations.Nullable;
import org.slf4j.Logger;
Expand All @@ -38,6 +39,7 @@
import ai.docling.serve.api.clear.request.ClearConvertersRequest;
import ai.docling.serve.api.clear.request.ClearResultsRequest;
import ai.docling.serve.api.clear.response.ClearResponse;
import ai.docling.serve.api.convert.request.BatchConvertDocumentRequest;
import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
import ai.docling.serve.api.health.HealthCheckResponse;
Expand All @@ -47,6 +49,7 @@
import ai.docling.serve.api.util.Utils;
import ai.docling.serve.api.util.ValidationUtils;
import ai.docling.serve.api.validation.ValidationError;
import ai.docling.serve.api.validation.ValidationErrorDetail;
import ai.docling.serve.api.validation.ValidationException;
import ai.docling.serve.client.operations.ChunkOperations;
import ai.docling.serve.client.operations.ClearOperations;
Expand Down Expand Up @@ -302,8 +305,8 @@ protected <T> T getResponse(HttpRequest request, HttpResponse<?> response, Class

var statusCode = response.statusCode();

if(statusCode >= 400) {
if(StreamResponse.class.equals(expectedReturnType)) {
if (statusCode >= 400) {
if (StreamResponse.class.equals(expectedReturnType)) {
// typical 4XX & 5XX responses are usually accompanied by JSON response bodies
// hence, reading the stream here.
try (InputStream is = (InputStream) body){
Expand All @@ -313,17 +316,24 @@ protected <T> T getResponse(HttpRequest request, HttpResponse<?> response, Class
}
}

if(statusCode == 422) {
if (statusCode == 422) {
var validationError = readValue(body.toString(), ValidationError.class);
var errorText = validationError.getErrorDetails()
.stream()
.map(ValidationErrorDetail::getMessage)
.filter(Objects::nonNull)
.collect(Collectors.joining("\n"));
Comment thread
edeandrea marked this conversation as resolved.

throw new ValidationException(
readValue(body.toString(), ValidationError.class),
"An error occurred while making %s request to %s".formatted(request.method(), request.uri())
validationError,
"An error occurred while making %s request to %s:\n%s".formatted(request.method(), request.uri(), errorText)
);
} else {
throw new DoclingServeClientException("An error occurred: %s".formatted(body.toString()), statusCode, body.toString());
}
}

if(StreamResponse.class.equals(expectedReturnType)) {
if (StreamResponse.class.equals(expectedReturnType)) {
return (T) StreamResponse
.builder()
.headers(headerName -> response.headers().firstValue(headerName))
Expand Down Expand Up @@ -394,6 +404,16 @@ public CompletionStage<ConvertDocumentResponse> convertSourceAsync(ConvertDocume
return this.convertOps.convertSourceAsync(request);
}

@Override
public TaskStatusPollResponse convertSourceBatch(BatchConvertDocumentRequest request) {
return this.convertOps.convertSourceBatch(request);
}

@Override
public CompletionStage<ConvertDocumentResponse> convertSourceBatchAsync(BatchConvertDocumentRequest request) {
return this.convertOps.convertSourceBatchAsync(request);
}

private class LoggingBodyPublisher<T> implements BodyPublisher {
private final BodyPublisher delegate;
private final String stringContent;
Expand Down
Loading