From dd631acc69899bfec373d3ccd9eaf7e3e69a9b65 Mon Sep 17 00:00:00 2001 From: faywang123 Date: Mon, 13 Jan 2025 12:07:51 -0800 Subject: [PATCH 1/7] audio integration --- .../primitives/tasks/extract.py | 6 + .../util/file_processing/extract.py | 5 +- docker-compose.yaml | 26 ++ .../schemas/audio_extractor_schema.py | 131 +++++++++ src/nv_ingest/schemas/ingest_job_schema.py | 3 +- src/nv_ingest/stages/nim/audio_extraction.py | 254 ++++++++++++++++++ src/nv_ingest/util/nim/helpers.py | 51 ++++ .../util/pipeline/pipeline_builders.py | 6 +- src/nv_ingest/util/pipeline/stage_builders.py | 52 ++++ 9 files changed, 531 insertions(+), 3 deletions(-) create mode 100755 src/nv_ingest/schemas/audio_extractor_schema.py create mode 100755 src/nv_ingest/stages/nim/audio_extraction.py diff --git a/client/src/nv_ingest_client/primitives/tasks/extract.py b/client/src/nv_ingest_client/primitives/tasks/extract.py index 6d3722f5..ae2a8bce 100644 --- a/client/src/nv_ingest_client/primitives/tasks/extract.py +++ b/client/src/nv_ingest_client/primitives/tasks/extract.py @@ -45,6 +45,8 @@ "svg": "image", "tiff": "image", "xml": "lxml", + "mp3": "audio", + "wav": "audio", } _Type_Extract_Method_PDF = Literal[ @@ -63,6 +65,8 @@ _Type_Extract_Method_Image = Literal["image"] +_Type_Extract_Method_Audio = Literal["audio"] + _Type_Extract_Method_Map = { "docx": get_args(_Type_Extract_Method_DOCX), "jpeg": get_args(_Type_Extract_Method_Image), @@ -72,6 +76,8 @@ "pptx": get_args(_Type_Extract_Method_PPTX), "svg": get_args(_Type_Extract_Method_Image), "tiff": get_args(_Type_Extract_Method_Image), + "mp3": get_args(_Type_Extract_Method_Audio), + "wav": get_args(_Type_Extract_Method_Audio), } _Type_Extract_Tables_Method_PDF = Literal["yolox", "pdfium"] diff --git a/client/src/nv_ingest_client/util/file_processing/extract.py b/client/src/nv_ingest_client/util/file_processing/extract.py index 97851481..d82ad6c7 100644 --- a/client/src/nv_ingest_client/util/file_processing/extract.py +++ b/client/src/nv_ingest_client/util/file_processing/extract.py @@ -32,7 +32,8 @@ class DocumentTypeEnum(str, Enum): svg = "svg" tiff = "tiff" txt = "text" - + mp3 = "mp3" + wav = "wav" # Maps MIME types to DocumentTypeEnum MIME_TO_DOCUMENT_TYPE = { @@ -64,6 +65,8 @@ class DocumentTypeEnum(str, Enum): "svg": DocumentTypeEnum.svg, "tiff": DocumentTypeEnum.tiff, "txt": DocumentTypeEnum.txt, + "mp3": DocumentTypeEnum.mp3, + "wav": DocumentTypeEnum.wav, # Add more as needed } diff --git a/docker-compose.yaml b/docker-compose.yaml index 8d9c307a..3f5fd918 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -125,6 +125,29 @@ services: capabilities: [gpu] runtime: nvidia + audio: + image: ads/audio:latest + shm_size: 2gb + ports: + - "8015:8000" + user: root + environment: + - NIM_HTTP_API_PORT=8000 + - NIM_TRITON_LOG_VERBOSE=1 + - NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}} + - CUDA_VISIBLE_DEVICES=0 + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["1"] + capabilities: [gpu] + runtime: nvidia + working_dir: /app/audio_retrieval/src + + + nv-ingest-ms-runtime: image: nvcr.io/ohlfw0olaadg/ea-participants/nv-ingest:24.10.1 build: @@ -141,6 +164,9 @@ services: cap_add: - sys_nice environment: + # Self-hosted audio endpoints. + - AUDIO_HTTP_ENDPOINT=http://audio:8000/v1/transcribe + - AUDIO_INFER_PROTOCOL=http # Self-hosted cached endpoints. - CACHED_GRPC_ENDPOINT=cached:8001 - CACHED_HTTP_ENDPOINT=http://cached:8000/v1/infer diff --git a/src/nv_ingest/schemas/audio_extractor_schema.py b/src/nv_ingest/schemas/audio_extractor_schema.py new file mode 100755 index 00000000..49a3dc5d --- /dev/null +++ b/src/nv_ingest/schemas/audio_extractor_schema.py @@ -0,0 +1,131 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + + +import logging +from typing import Optional +from typing import Tuple + +from pydantic import BaseModel +from pydantic import root_validator + +logger = logging.getLogger(__name__) + + +class AudioConfigSchema(BaseModel): + """ + Configuration schema for audio extraction endpoints and options. + + Parameters + ---------- + auth_token : Optional[str], default=None + Authentication token required for secure services. + + audio_endpoints : Tuple[str, str] + A tuple containing the gRPC and HTTP services for the audio_retriever endpoint. + Either the gRPC or HTTP service can be empty, but not both. + + Methods + ------- + validate_endpoints(values) + Validates that at least one of the gRPC or HTTP services is provided for each endpoint. + + Raises + ------ + ValueError + If both gRPC and HTTP services are empty for any endpoint. + + Config + ------ + extra : str + Pydantic config option to forbid extra fields. + """ + + auth_token: Optional[str] = None + audio_endpoints: Tuple[Optional[str], Optional[str]] = (None, None) + audio_infer_protocol: Optional[str] = None + + @root_validator(pre=True) + def validate_endpoints(cls, values): + """ + Validates the gRPC and HTTP services for all endpoints. + + Parameters + ---------- + values : dict + Dictionary containing the values of the attributes for the class. + + Returns + ------- + dict + The validated dictionary of values. + + Raises + ------ + ValueError + If both gRPC and HTTP services are empty for any endpoint. + """ + + def clean_service(service): + """Set service to None if it's an empty string or contains only spaces or quotes.""" + if service is None or not service.strip() or service.strip(" \"'") == "": + return None + return service + + + print ('===> audio extractor schema values:', values) + endpoint_name = "audio_endpoints" + grpc_service, http_service = values.get(endpoint_name) + print ("grpc_service:", grpc_service) + print ("http_service:", http_service) + grpc_service = clean_service(grpc_service) + http_service = clean_service(http_service) + + if not grpc_service and not http_service: + raise ValueError(f"Both gRPC and HTTP services cannot be empty for {endpoint_name}.") + + values[endpoint_name] = (grpc_service, http_service) + + protocol_name = "audio_infer_protocol" + protocol_value = values.get(protocol_name) + + print("protocol_value:", protocol_value) + if not protocol_value: + protocol_value = "http" if http_service else "grpc" if grpc_service else "" + protocol_value = protocol_value.lower() + values[protocol_name] = protocol_value + + return values + + class Config: + extra = "forbid" + + +class AudioExtractorSchema(BaseModel): + """ + Configuration schema for the PDF extractor settings. + + Parameters + ---------- + max_queue_size : int, default=1 + The maximum number of items allowed in the processing queue. + + n_workers : int, default=16 + The number of worker threads to use for processing. + + raise_on_failure : bool, default=False + A flag indicating whether to raise an exception on processing failure. + + audio_extraction_config: Optional[AudioConfigSchema], default=None + Configuration schema for the audio extraction stage. + """ + + max_queue_size: int = 1 + n_workers: int = 16 + raise_on_failure: bool = False + + audio_extraction_config: Optional[AudioConfigSchema] = None + + class Config: + extra = "forbid" diff --git a/src/nv_ingest/schemas/ingest_job_schema.py b/src/nv_ingest/schemas/ingest_job_schema.py index 97ffc539..09975228 100644 --- a/src/nv_ingest/schemas/ingest_job_schema.py +++ b/src/nv_ingest/schemas/ingest_job_schema.py @@ -33,7 +33,8 @@ class DocumentTypeEnum(str, Enum): svg = "svg" tiff = "tiff" txt = "text" - + mp3 = "mp3" + wav = "wav" class TaskTypeEnum(str, Enum): caption = "caption" diff --git a/src/nv_ingest/stages/nim/audio_extraction.py b/src/nv_ingest/stages/nim/audio_extraction.py new file mode 100755 index 00000000..55556936 --- /dev/null +++ b/src/nv_ingest/stages/nim/audio_extraction.py @@ -0,0 +1,254 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import logging +import functools +import pandas as pd +from typing import Any +from typing import Dict +from typing import Optional +from typing import Tuple + +import tritonclient.grpc as grpcclient +from morpheus.config import Config +from nv_ingest.schemas.audio_extractor_schema import AudioExtractorSchema +from nv_ingest.stages.multiprocessing_stage import MultiProcessingBaseStage + +import sys +sys.path.append('../../..') + +from nv_ingest.util.nim.helpers import call_audio_inference_model, create_inference_client +from nv_ingest.util.nim.helpers import get_version + +logger = logging.getLogger(f"morpheus.{__name__}") + + +def _update_metadata(row: pd.Series, audio_client: Any, audio_version: Any, trace_info: Dict) -> Dict: + """ + Modifies the metadata of a row if the conditions for table extraction are met. + + Parameters + ---------- + row : pd.Series + A row from the DataFrame containing metadata for the audio extraction. + + audio_client : Any + The client used to call the audio inference model. + + trace_info : Dict + Trace information used for logging or debugging. + + Returns + ------- + Dict + The modified metadata if conditions are met, otherwise the original metadata. + + Raises + ------ + ValueError + If critical information (such as metadata) is missing from the row. + """ + + + metadata = row.get("metadata") + + if metadata is None: + logger.error("Row does not contain 'metadata'.") + raise ValueError("Row does not contain 'metadata'.") + + content_metadata = metadata.get("content_metadata", {}) + + # Only modify if content type is audio + if content_metadata.get("type") != "audio" : + return metadata + + source_metadata = metadata.get("source_metadata") + audio_id = source_metadata['source_id'] + + content_metadata = metadata.get("content_metadata") + content_metadata = content_metadata['content'] + audio_content = content_metadata['content'] + + + # Modify audio metadata with the result from the inference model + try: + audio_result = call_audio_inference_model(audio_client, audio_content, audio_id, trace_info=trace_info) + print(audio_result) + metadata['audio_metadata'] = {'content': audio_result} + except Exception as e: + logger.error(f"Unhandled error calling audio inference model: {e}", exc_info=True) + raise + + return metadata + + +def _transcribe_audio(df: pd.DataFrame, task_props: Dict[str, Any], + validated_config: Any, trace_info: Optional[Dict] = None) -> Tuple[pd.DataFrame, Dict]: + """ + Extracts audio data from a DataFrame. + + Parameters + ---------- + df : pd.DataFrame + DataFrame containing the content from which audio data is to be extracted. + + task_props : Dict[str, Any] + Dictionary containing task properties and configurations. + + validated_config : Any + The validated configuration object for audio extraction. + + trace_info : Optional[Dict], optional + Optional trace information for debugging or logging. Defaults to None. + + Returns + ------- + Tuple[pd.DataFrame, Dict] + A tuple containing the updated DataFrame and the trace information. + + Raises + ------ + Exception + If any error occurs during the audio data extraction process. + """ + + #port = 32783 + #audio_client = create_inference_client( + # (None, f'http://0.0.0.0:{port}/v1/transcribe'), + # None, + # "http" + #) + + + audio_client = create_inference_client( + validated_config.stage_config.audio_endpoints, + None, + "http" + ) + + if trace_info is None: + trace_info = {} + logger.debug("No trace_info provided. Initialized empty trace_info dictionary.") + + try: + # Apply the _update_metadata function to each row in the DataFrame + #audio_version = get_version(validated_config.stage_config.audio_endpoints[1]) + audio_version = get_version(f'http://audio:{port}') + df["metadata"] = df.apply(_update_metadata, axis=1, args=(audio_client, audio_version, trace_info)) + + return df, trace_info + + except Exception as e: + logger.error("Error occurred while extracting audio data.", exc_info=True) + raise + + +def generate_audio_extractor_stage( + c: Config, + stage_config: Dict[str, Any], + task: str = "audio_data_extract", + task_desc: str = "audio_data_extraction", + pe_count: int = 1, +): + """ + Generates a multiprocessing stage to perform audio data extraction. + + Parameters + ---------- + c : Config + Morpheus global configuration object. + + stage_config : Dict[str, Any] + Configuration parameters for the audio content extractor, passed as a dictionary + validated against the `AudioExtractorSchema`. + + task : str, optional + The task name for the stage worker function, defining the specific audio extraction process. + Default is "audio_data_extract". + + task_desc : str, optional + A descriptor used for latency tracing and logging during audio extraction. + Default is "audio_data_extraction". + + pe_count : int, optional + The number of process engines to use for audio data extraction. This value controls + how many worker processes will run concurrently. Default is 1. + + Returns + ------- + MultiProcessingBaseStage + A configured Morpheus stage with an applied worker function that handles audio data extraction + from PDF content. + """ + + validated_config = AudioExtractorSchema(**stage_config) + _wrapped_process_fn = functools.partial(_transcribe_audio, validated_config=validated_config) + + return MultiProcessingBaseStage( + c=c, + pe_count=pe_count, + task=task, + task_desc=task_desc, + process_fn=_wrapped_process_fn, + document_type="regex:^(mp3|wav)$", + ) + + + +if __name__ == "__main__": + metadata = { + "source_metadata": { + "access_level": 1, + "collection_id": "", + "date_created": "2024-11-04T12:29:08", + "last_modified": "2024-11-04T12:29:08", + "partition_id": -1, + "source_id": "https://audio.listennotes.com/e/p/3946bc3aba1f425f8b2e146f0b3f72fc/", + "source_location": "", + "source_type": "wav", + "summary": "" + }, + + "content_metadata": { + "description": "Audio wav file", + "type": "audio", + "content": '' + } + } + + + metadata = { + "source_metadata": { + "access_level": 1, + "collection_id": "", + "date_created": "2024-11-04T12:29:08", + "last_modified": "2024-11-04T12:29:08", + "partition_id": -1, + "source_id": "test.mp3", + "source_location": "", + "source_type": "mp3", + "summary": "" + }, + + "content_metadata": { + "description": "Audio wav file", + "type": "audio", + "content": 'some base64 string' + } + } + + + + data = [{"metadata": metadata}] + df = pd.DataFrame(data) + + df.to_csv('test.csv', index=False) + + df_result, _ = _transcribe_audio(df) + + df_result.to_csv('result.csv', index=False) + + + + print("Done!") diff --git a/src/nv_ingest/util/nim/helpers.py b/src/nv_ingest/util/nim/helpers.py index db7e0fdd..61a41634 100644 --- a/src/nv_ingest/util/nim/helpers.py +++ b/src/nv_ingest/util/nim/helpers.py @@ -593,3 +593,54 @@ def get_version(http_endpoint: str, metadata_endpoint: str = "/v1/metadata", ver # Don't let anything squeeze by logger.warning(f"Exception: {ex}") return "" + + +def call_audio_inference_model(client, audio_content: str, audio_id: str, trace_info: dict): + """ + Calls an audio inference model using the provided client. + + If the client is a gRPC client, the inference is performed using gRPC. Otherwise, it is performed using HTTP. + + Parameters + ---------- + client : grpcclient.InferenceServerClient or dict + The inference client, which is an HTTP client. + audio_source : str + The audio source to transcribe. + + Returns + ------- + str or None + The result of the inference as a string if successful, otherwise `None`. + + Raises + ------ + RuntimeError + If the HTTP request fails or if the response format is not as expected. + """ + + try: + url = client["endpoint_url"] + headers = client["headers"] + + payload = {"audio_content": audio_content, "audio_id": audio_id} + response = requests.post(url, json=payload, headers=headers) + + response.raise_for_status() # Raise an exception for HTTP errors + + # Parse the JSON response + json_response = response.json() + + except requests.exceptions.RequestException as e: + raise RuntimeError(f"HTTP request failed: {e}") + except KeyError as e: + raise RuntimeError(f"Missing expected key in response: {e}") + except Exception as e: + raise RuntimeError(f"An error occurred during inference: {e}") + + return json_response + + + + + diff --git a/src/nv_ingest/util/pipeline/pipeline_builders.py b/src/nv_ingest/util/pipeline/pipeline_builders.py index 842682f0..5a1c25cb 100644 --- a/src/nv_ingest/util/pipeline/pipeline_builders.py +++ b/src/nv_ingest/util/pipeline/pipeline_builders.py @@ -32,6 +32,8 @@ def setup_ingestion_pipeline( image_extractor_stage = add_image_extractor_stage(pipe, morpheus_pipeline_config, ingest_config, default_cpu_count) docx_extractor_stage = add_docx_extractor_stage(pipe, morpheus_pipeline_config, default_cpu_count) pptx_extractor_stage = add_pptx_extractor_stage(pipe, morpheus_pipeline_config, default_cpu_count) + ## audio extraction + audio_extractor_stage = add_audio_extractor_stage(pipe, morpheus_pipeline_config, ingest_config, default_cpu_count) ######################################################################################################## ######################################################################################################## @@ -76,7 +78,9 @@ def setup_ingestion_pipeline( pipe.add_edge(pdf_extractor_stage, image_extractor_stage) pipe.add_edge(image_extractor_stage, docx_extractor_stage) pipe.add_edge(docx_extractor_stage, pptx_extractor_stage) - pipe.add_edge(pptx_extractor_stage, image_dedup_stage) + pipe.add_edge(pptx_extractor_stage, audio_extractor_stage) + pipe.add_edge(audio_extractor_stage, image_dedup_stage) + pipe.add_edge(image_dedup_stage, image_filter_stage) pipe.add_edge(image_filter_stage, table_extraction_stage) pipe.add_edge(table_extraction_stage, chart_extraction_stage) diff --git a/src/nv_ingest/util/pipeline/stage_builders.py b/src/nv_ingest/util/pipeline/stage_builders.py index 352ed006..b5153cbb 100644 --- a/src/nv_ingest/util/pipeline/stage_builders.py +++ b/src/nv_ingest/util/pipeline/stage_builders.py @@ -28,6 +28,7 @@ from nv_ingest.stages.filters import generate_image_filter_stage from nv_ingest.stages.nim.chart_extraction import generate_chart_extractor_stage from nv_ingest.stages.nim.table_extraction import generate_table_extractor_stage +from nv_ingest.stages.nim.audio_extraction import generate_audio_extractor_stage from nv_ingest.stages.pdf_extractor_stage import generate_pdf_extractor_stage from nv_ingest.stages.pptx_extractor_stage import generate_pptx_extractor_stage from nv_ingest.stages.storages.embedding_storage_stage import generate_embedding_storage_stage @@ -303,6 +304,57 @@ def add_pptx_extractor_stage(pipe, morpheus_pipeline_config, default_cpu_count): return pptx_extractor_stage +def get_audio_retrieval_service(env_var_prefix): + prefix = env_var_prefix.upper() + grpc_endpoint = os.environ.get( + "AUDIO_GRPC_ENDPOINT", + "", + ) + http_endpoint = os.environ.get( + "AUDIO_HTTP_ENDPOINT", + "", + ) + auth_token = os.environ.get( + "NVIDIA_BUILD_API_KEY", + "", + ) or os.environ.get( + "NGC_API_KEY", + "", + ) + infer_protocol = os.environ.get( + "AUDIO_INFER_PROTOCOL", + "http" if http_endpoint else "grpc" if grpc_endpoint else "", + ) + + logger.info(f"{prefix}_GRPC_TRITON: {grpc_endpoint}") + logger.info(f"{prefix}_HTTP_TRITON: {http_endpoint}") + logger.info(f"{prefix}_INFER_PROTOCOL: {infer_protocol}") + + return grpc_endpoint, http_endpoint, auth_token, infer_protocol + +def add_audio_extractor_stage(pipe, morpheus_pipeline_config, ingest_config, default_cpu_count): + audio_grpc, audio_http, audio_auth, audio_infer_protocol = get_audio_retrieval_service("audio") + audio_extractor_config = ingest_config.get("audio_extraction_module", + { + "audio_extraction_config": { + "audio_endpoints": (audio_grpc, audio_http), + "audio_infer_protocol": audio_infer_protocol, + "auth_token": audio_auth, + # All auth tokens are the same for the moment + } + }) + audio_extractor_stage = pipe.add_stage( + generate_audio_extractor_stage( + morpheus_pipeline_config, + stage_config=audio_extractor_config, + pe_count=8, + task="extract", + task_desc="audio_content_extractor", + ) + ) + return audio_extractor_stage + + def add_image_dedup_stage(pipe, morpheus_pipeline_config, ingest_config, default_cpu_count): image_dedup_config = ingest_config.get("dedup_module", {}) image_dedup_stage = pipe.add_stage( From 48fcfdd32b1f149444c2b7e884b00525139c76b0 Mon Sep 17 00:00:00 2001 From: faywang123 Date: Mon, 13 Jan 2025 18:35:06 -0800 Subject: [PATCH 2/7] audio image --- docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 3f5fd918..11fc8236 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -126,7 +126,7 @@ services: runtime: nvidia audio: - image: ads/audio:latest + image: nvcr.io/nvidian/audio_retrieval:latest shm_size: 2gb ports: - "8015:8000" From 8dc7bf67faac4c4ba18d6d3e1319349182fa86ac Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Fri, 17 Jan 2025 09:29:58 -0700 Subject: [PATCH 3/7] Merge upstream/main --- .../src/nv_ingest_client/client/interface.py | 17 +- client/src/nv_ingest_client/nv_ingest_cli.py | 4 +- .../primitives/tasks/caption.py | 1 + .../primitives/tasks/embed.py | 37 +- client/src/nv_ingest_client/util/milvus.py | 172 +++++- .../developer-guide/kubernetes-dev.md | 94 ++-- src/nv_ingest/api/v1/ingest.py | 171 +++++- .../extraction_workflows/docx/docx_helper.py | 21 +- .../extraction_workflows/docx/docxreader.py | 526 ++++++++++++++---- .../image/image_handlers.py | 193 +++---- .../extraction_workflows/pdf/pdfium_helper.py | 87 +-- .../extraction_workflows/pptx/pptx_helper.py | 301 ++++++---- .../modules/transforms/embed_extractions.py | 211 +++---- .../schemas/docx_extractor_schema.py | 124 +++++ src/nv_ingest/schemas/ingest_job_schema.py | 3 +- .../schemas/ingest_pipeline_config_schema.py | 4 +- src/nv_ingest/schemas/metadata_schema.py | 8 +- .../schemas/pptx_extractor_schema.py | 120 +++- .../schemas/processing_job_schema.py | 31 ++ .../impl/ingest/redis_ingest_service.py | 25 + .../meta/ingest/ingest_service_meta.py | 10 + src/nv_ingest/stages/docx_extractor_stage.py | 107 ++-- .../extractors/image_extractor_stage.py | 2 - src/nv_ingest/stages/nim/chart_extraction.py | 8 +- src/nv_ingest/stages/nim/table_extraction.py | 4 +- src/nv_ingest/stages/pptx_extractor_stage.py | 110 ++-- src/nv_ingest/util/converters/formats.py | 70 +++ src/nv_ingest/util/nim/cached.py | 2 +- src/nv_ingest/util/nim/deplot.py | 2 +- src/nv_ingest/util/nim/helpers.py | 13 +- src/nv_ingest/util/nim/yolox.py | 238 +++----- .../util/pdf/metadata_aggregators.py | 1 - .../util/pipeline/pipeline_builders.py | 6 +- src/nv_ingest/util/pipeline/stage_builders.py | 51 +- src/util/image_viewer.py | 27 +- .../docx/test_docx_helper.py | 17 +- .../image/test_image_handlers.py | 137 ----- .../pptx/test_pptx_helper.py | 20 +- .../test_message_broker_task_source.py | 2 +- .../schemas/test_ingest_job_schema.py | 10 +- .../multimodal_test_raw_results.json | 1 + .../nv_ingest/util/converters/test_formats.py | 101 ++++ tests/nv_ingest/util/nim/test_cached.py | 2 +- tests/nv_ingest/util/nim/test_yolox.py | 154 +++-- .../nv_ingest_client/client/test_interface.py | 25 +- .../nv_ingest_client/util/test_milvus_util.py | 67 +++ 46 files changed, 2152 insertions(+), 1185 deletions(-) create mode 100644 src/nv_ingest/schemas/docx_extractor_schema.py create mode 100644 src/nv_ingest/schemas/processing_job_schema.py create mode 100644 src/nv_ingest/util/converters/formats.py create mode 100644 tests/nv_ingest/util/converters/multimodal_test_raw_results.json create mode 100644 tests/nv_ingest/util/converters/test_formats.py create mode 100644 tests/nv_ingest_client/util/test_milvus_util.py diff --git a/client/src/nv_ingest_client/client/interface.py b/client/src/nv_ingest_client/client/interface.py index e5746651..0d4e3b0d 100644 --- a/client/src/nv_ingest_client/client/interface.py +++ b/client/src/nv_ingest_client/client/interface.py @@ -27,8 +27,8 @@ from nv_ingest_client.primitives.tasks import SplitTask from nv_ingest_client.primitives.tasks import StoreEmbedTask from nv_ingest_client.primitives.tasks import StoreTask -from nv_ingest_client.primitives.tasks import VdbUploadTask from nv_ingest_client.util.util import filter_function_kwargs +from nv_ingest_client.util.milvus import MilvusOperator DEFAULT_JOB_QUEUE_ID = "morpheus_task_queue" @@ -74,6 +74,7 @@ def __init__( self._documents = documents or [] self._client = client self._job_queue_id = job_queue_id + self._vdb_bulk_upload = None if self._client is None: client_kwargs = filter_function_kwargs(NvIngestClient, **kwargs) @@ -223,7 +224,10 @@ def ingest(self, **kwargs: Any) -> List[Dict[str, Any]]: fetch_kwargs = filter_function_kwargs(self._client.fetch_job_result, **kwargs) result = self._client.fetch_job_result(self._job_ids, **fetch_kwargs) - + if self._vdb_bulk_upload: + self._vdb_bulk_upload.run(result) + # only upload as part of jobs user specified this action + self._vdb_bulk_upload = None return result def ingest_async(self, **kwargs: Any) -> Future: @@ -271,6 +275,11 @@ def _done_callback(future): for future in future_to_job_id: future.add_done_callback(_done_callback) + if self._vdb_bulk_upload: + self._vdb_bulk_upload.run(combined_future) + # only upload as part of jobs user specified this action + self._vdb_bulk_upload = None + return combined_future @ensure_job_specs @@ -454,7 +463,6 @@ def store_embed(self, **kwargs: Any) -> "Ingestor": return self - @ensure_job_specs def vdb_upload(self, **kwargs: Any) -> "Ingestor": """ Adds a VdbUploadTask to the batch job specification. @@ -469,8 +477,7 @@ def vdb_upload(self, **kwargs: Any) -> "Ingestor": Ingestor Returns self for chaining. """ - vdb_upload_task = VdbUploadTask(**kwargs) - self._job_specs.add_task(vdb_upload_task) + self._vdb_bulk_upload = MilvusOperator(**kwargs) return self diff --git a/client/src/nv_ingest_client/nv_ingest_cli.py b/client/src/nv_ingest_client/nv_ingest_cli.py index fc6b96f4..f3cb0b9a 100644 --- a/client/src/nv_ingest_client/nv_ingest_cli.py +++ b/client/src/nv_ingest_client/nv_ingest_cli.py @@ -120,7 +120,7 @@ --task 'extract:{"document_type":"pdf", "extract_method":"unstructured_io"}' --task 'extract:{"document_type":"docx", "extract_text":true, "extract_images":true}' --task 'store:{"content_type":"image", "store_method":"minio", "endpoint":"minio:9000"}' - --task 'embed:{"text":true, "tables":true}' + --task 'embed' --task 'vdb_upload' --task 'caption:{}' @@ -143,8 +143,6 @@ - embed: Computes embeddings on multimodal extractions. Options: - filter_errors (bool): Flag to filter embedding errors. Optional. - - tables (bool): Flag to create embeddings for table extractions. Optional. - - text (bool): Flag to create embeddings for text extractions. Optional. \b - extract: Extracts content from documents, customizable per document type. Can be specified multiple times for different 'document_type' values. diff --git a/client/src/nv_ingest_client/primitives/tasks/caption.py b/client/src/nv_ingest_client/primitives/tasks/caption.py index 0f7297fe..adb5b922 100644 --- a/client/src/nv_ingest_client/primitives/tasks/caption.py +++ b/client/src/nv_ingest_client/primitives/tasks/caption.py @@ -24,6 +24,7 @@ class CaptionTaskSchema(BaseModel): model_name: Optional[str] = None model_config = ConfigDict(extra="forbid") + model_config["protected_namespaces"] = () class CaptionTask(Task): diff --git a/client/src/nv_ingest_client/primitives/tasks/embed.py b/client/src/nv_ingest_client/primitives/tasks/embed.py index 2949bc68..6bd51049 100644 --- a/client/src/nv_ingest_client/primitives/tasks/embed.py +++ b/client/src/nv_ingest_client/primitives/tasks/embed.py @@ -9,7 +9,7 @@ import logging from typing import Dict -from pydantic import BaseModel +from pydantic import BaseModel, root_validator from .task_base import Task @@ -17,10 +17,22 @@ class EmbedTaskSchema(BaseModel): - text: bool = True - tables: bool = True filter_errors: bool = False + @root_validator(pre=True) + def handle_deprecated_fields(cls, values): + if "text" in values: + logger.warning( + "'text' parameter is deprecated and will be ignored. Future versions will remove this argument." + ) + values.pop("text") + if "tables" in values: + logger.warning( + "'tables' parameter is deprecated and will be ignored. Future versions will remove this argument." + ) + values.pop("tables") + return values + class Config: extra = "forbid" @@ -30,13 +42,22 @@ class EmbedTask(Task): Object for document embedding task """ - def __init__(self, text: bool = True, tables: bool = True, filter_errors: bool = False) -> None: + def __init__(self, text: bool = None, tables: bool = None, filter_errors: bool = False) -> None: """ Setup Embed Task Config """ super().__init__() - self._text = text - self._tables = tables + + if text is not None: + logger.warning( + "'text' parameter is deprecated and will be ignored. Future versions will remove this argument." + ) + + if tables is not None: + logger.warning( + "'tables' parameter is deprecated and will be ignored. Future versions will remove this argument." + ) + self._filter_errors = filter_errors def __str__(self) -> str: @@ -45,8 +66,6 @@ def __str__(self) -> str: """ info = "" info += "Embed Task:\n" - info += f" text: {self._text}\n" - info += f" tables: {self._tables}\n" info += f" filter_errors: {self._filter_errors}\n" return info @@ -56,8 +75,6 @@ def to_dict(self) -> Dict: """ task_properties = { - "text": self._text, - "tables": self._tables, "filter_errors": False, } diff --git a/client/src/nv_ingest_client/util/milvus.py b/client/src/nv_ingest_client/util/milvus.py index 9e6e3230..8c8a9b6f 100644 --- a/client/src/nv_ingest_client/util/milvus.py +++ b/client/src/nv_ingest_client/util/milvus.py @@ -18,6 +18,87 @@ from typing import List import time from urllib.parse import urlparse +from typing import Union, Dict + + +def _dict_to_params(collections_dict: dict, write_params: dict): + params_tuple_list = [] + for coll_name, data_type in collections_dict.items(): + cp_write_params = write_params.copy() + enabled_dtypes = { + "enable_text": False, + "enable_charts": False, + "enable_tables": False, + } + if not isinstance(data_type, list): + data_type = [data_type] + for d_type in data_type: + enabled_dtypes[f"enable_{d_type}"] = True + cp_write_params.update(enabled_dtypes) + params_tuple_list.append((coll_name, cp_write_params)) + return params_tuple_list + + +class MilvusOperator: + def __init__( + self, + collection_name: Union[str, Dict] = "nv_ingest_collection", + milvus_uri: str = "http://localhost:19530", + sparse: bool = True, + recreate: bool = True, + gpu_index: bool = True, + gpu_search: bool = False, + dense_dim: int = 1024, + minio_endpoint: str = "localhost:9000", + enable_text: bool = True, + enable_charts: bool = True, + enable_tables: bool = True, + bm25_save_path: str = "bm25_model.json", + compute_bm25_stats: bool = True, + access_key: str = "minioadmin", + secret_key: str = "minioadmin", + bucket_name: str = "a-bucket", + **kwargs, + ): + self.milvus_kwargs = locals() + self.milvus_kwargs.pop("self") + self.collection_name = self.milvus_kwargs.pop("collection_name") + self.milvus_kwargs.pop("kwargs", None) + + def get_connection_params(self): + conn_dict = { + "milvus_uri": self.milvus_kwargs["milvus_uri"], + "sparse": self.milvus_kwargs["sparse"], + "recreate": self.milvus_kwargs["recreate"], + "gpu_index": self.milvus_kwargs["gpu_index"], + "gpu_search": self.milvus_kwargs["gpu_search"], + "dense_dim": self.milvus_kwargs["dense_dim"], + } + return (self.collection_name, conn_dict) + + def get_write_params(self): + write_params = self.milvus_kwargs.copy() + del write_params["recreate"] + del write_params["gpu_index"] + del write_params["gpu_search"] + del write_params["dense_dim"] + + return (self.collection_name, write_params) + + def run(self, records): + collection_name, create_params = self.get_connection_params() + _, write_params = self.get_write_params() + if isinstance(collection_name, str): + create_nvingest_collection(collection_name, **create_params) + write_to_nvingest_collection(records, collection_name, **write_params) + elif isinstance(collection_name, dict): + split_params_list = _dict_to_params(collection_name, write_params) + for sub_params in split_params_list: + coll_name, sub_write_params = sub_params + create_nvingest_collection(coll_name, **create_params) + write_to_nvingest_collection(records, coll_name, **sub_write_params) + else: + raise ValueError(f"Unsupported type for collection_name detected: {type(collection_name)}") def create_nvingest_schema(dense_dim: int = 1024, sparse: bool = False) -> CollectionSchema: @@ -54,7 +135,9 @@ def create_nvingest_schema(dense_dim: int = 1024, sparse: bool = False) -> Colle return schema -def create_nvingest_index_params(sparse: bool = False, gpu_index: bool = True, gpu_search: bool = False) -> IndexParams: +def create_nvingest_index_params( + sparse: bool = False, gpu_index: bool = True, gpu_search: bool = False, local_index: bool = True +) -> IndexParams: """ Creates index params necessary to create an index for a collection. At a minimum, this function will create a dense embedding index but can also create a sparse @@ -78,27 +161,35 @@ def create_nvingest_index_params(sparse: bool = False, gpu_index: bool = True, g embedding index. """ index_params = MilvusClient.prepare_index_params() - if gpu_index: + if local_index: index_params.add_index( field_name="vector", index_name="dense_index", - index_type="GPU_CAGRA", + index_type="FLAT", metric_type="L2", - params={ - "intermediate_graph_degree": 128, - "graph_degree": 64, - "build_algo": "NN_DESCENT", - "adapt_for_cpu": "false" if gpu_search else "true", - }, ) else: - index_params.add_index( - field_name="vector", - index_name="dense_index", - index_type="HNSW", - metric_type="L2", - params={"M": 64, "efConstruction": 512}, - ) + if gpu_index: + index_params.add_index( + field_name="vector", + index_name="dense_index", + index_type="GPU_CAGRA", + metric_type="L2", + params={ + "intermediate_graph_degree": 128, + "graph_degree": 64, + "build_algo": "NN_DESCENT", + "adapt_for_cpu": "false" if gpu_search else "true", + }, + ) + else: + index_params.add_index( + field_name="vector", + index_name="dense_index", + index_type="HNSW", + metric_type="L2", + params={"M": 64, "efConstruction": 512}, + ) if sparse: index_params.add_index( field_name="sparse", @@ -178,6 +269,7 @@ def create_nvingest_collection( Returns a milvus collection schema, that represents the fields in the created collection. """ + local_index = False if urlparse(milvus_uri).scheme: connections.connect(uri=milvus_uri) server_version = utility.get_server_version() @@ -185,9 +277,14 @@ def create_nvingest_collection( gpu_index = False else: gpu_index = False + if milvus_uri.endswith(".db"): + local_index = True + client = MilvusClient(milvus_uri) schema = create_nvingest_schema(dense_dim=dense_dim, sparse=sparse) - index_params = create_nvingest_index_params(sparse=sparse, gpu_index=gpu_index, gpu_search=gpu_search) + index_params = create_nvingest_index_params( + sparse=sparse, gpu_index=gpu_index, gpu_search=gpu_search, local_index=local_index + ) create_collection(client, collection_name, schema, index_params, recreate=recreate) @@ -398,11 +495,12 @@ def write_to_nvingest_collection( collection_name: str, milvus_uri: str = "http://localhost:19530", minio_endpoint: str = "localhost:9000", - sparse: bool = False, + sparse: bool = True, enable_text: bool = True, enable_charts: bool = True, enable_tables: bool = True, bm25_save_path: str = "bm25_model.json", + compute_bm25_stats: bool = True, access_key: str = "minioadmin", secret_key: str = "minioadmin", bucket_name: str = "a-bucket", @@ -449,11 +547,14 @@ def write_to_nvingest_collection( else: stream = True bm25_ef = None - if sparse: + if sparse and compute_bm25_stats: bm25_ef = create_bm25_model( records, enable_text=enable_text, enable_charts=enable_charts, enable_tables=enable_tables ) bm25_ef.save(bm25_save_path) + elif sparse and not compute_bm25_stats: + bm25_ef = BM25EmbeddingFunction(build_default_analyzer(language="en")) + bm25_ef.load(bm25_save_path) client = MilvusClient(milvus_uri) schema = Collection(collection_name).schema if stream: @@ -535,7 +636,6 @@ def dense_retrieval( collection_name=collection_name, data=dense_embeddings, anns_field=dense_field, - param={"metric_type": "L2"}, limit=top_k, output_fields=output_fields, ) @@ -552,6 +652,8 @@ def hybrid_retrieval( dense_field: str = "vector", sparse_field: str = "sparse", output_fields: List[str] = ["text"], + gpu_search: bool = False, + local_index: bool = False, ): """ This function takes the input queries and conducts a hybrid @@ -591,22 +693,27 @@ def hybrid_retrieval( dense_embeddings.append(dense_model.get_query_embedding(query)) sparse_embeddings.append(_format_sparse_embedding(sparse_model.encode_queries([query]))) + s_param_1 = { + "metric_type": "L2", + } + if not gpu_search and not local_index: + s_param_1["params"] = {"ef": top_k * 2} + # Create search requests for both vector types search_param_1 = { "data": dense_embeddings, "anns_field": dense_field, - "param": { - "metric_type": "L2", - }, - "limit": top_k, + "param": s_param_1, + "limit": top_k * 2, } + dense_req = AnnSearchRequest(**search_param_1) search_param_2 = { "data": sparse_embeddings, "anns_field": sparse_field, "param": {"metric_type": "IP", "params": {"drop_ratio_build": 0.2}}, - "limit": top_k, + "limit": top_k * 2, } sparse_req = AnnSearchRequest(**search_param_2) @@ -628,6 +735,7 @@ def nvingest_retrieval( sparse_model_filepath: str = "bm25_model.json", model_name: str = "nvidia/nv-embedqa-e5-v5", output_fields: List[str] = ["text", "source", "content_metadata"], + gpu_search: bool = False, ): """ This function takes the input queries and conducts a hybrid/dense @@ -665,14 +773,24 @@ def nvingest_retrieval( List Nested list of top_k results per query. """ + local_index = False embed_model = NVIDIAEmbedding(base_url=embedding_endpoint, model=model_name) client = MilvusClient(milvus_uri) - + if milvus_uri.endswith(".db"): + local_index = True if hybrid: bm25_ef = BM25EmbeddingFunction(build_default_analyzer(language="en")) bm25_ef.load(sparse_model_filepath) results = hybrid_retrieval( - queries, collection_name, client, embed_model, bm25_ef, top_k, output_fields=output_fields + queries, + collection_name, + client, + embed_model, + bm25_ef, + top_k, + output_fields=output_fields, + gpu_search=gpu_search, + local_index=local_index, ) else: results = dense_retrieval(queries, collection_name, client, embed_model, top_k, output_fields=output_fields) diff --git a/docs/docs/user-guide/developer-guide/kubernetes-dev.md b/docs/docs/user-guide/developer-guide/kubernetes-dev.md index 0254d385..8051ffd4 100644 --- a/docs/docs/user-guide/developer-guide/kubernetes-dev.md +++ b/docs/docs/user-guide/developer-guide/kubernetes-dev.md @@ -8,16 +8,16 @@ This page describes how to use Kubernetes generally, and how to deploy nv-ingest ## Kubernetes Cluster -To start you need a Kubernetes cluster. We recommend using `kind` that creates a single Docker container with a Kubernetes cluster inside it. +To start you need a Kubernetes cluster. We recommend that you use `kind`, which creates a single Docker container with a Kubernetes cluster inside it. -Also, because this the `kind` cluster needs access to the GPUs on your system you need to install `kind-with-gpus`. The easiest way to do this is following the instructions laid out in this GitHub repo https://github.com/klueska/kind-with-gpus-examples/tree/master +Because the `kind` cluster needs access to the GPUs on your system, you need to install `nvkind`. +For details, see [Running kind clusters with GPUs using nvkind](https://github.com/NVIDIA/nvkind/tree/main). +`nvkind` provides the following benefits: -Benefits of this: +- Multiple developers on the same system can have isolated Kubernetes clusters +- Easy to create and delete clusters -- Allows many developers on the same system to have isolated Kubernetes clusters -- Enables easy creation and deletion of clusters - -Run the following **from the root of the repo** to create a configuration file for your cluster. +From the root of the repo, run the following code to create a configuration file for your cluster. ```yaml mkdir -p ./.tmp @@ -80,10 +80,10 @@ docker ps | grep kind # aaf5216a3cc8 kindest/node:v1.27.11 "/usr/local/bin/entr…" 44 seconds ago Up 42 seconds 127.0.0.1:45099->6443/tcp jdyer-control-plane ``` -`kind create cluster` will do the following: +`kind create cluster` does the following: -- add a context for this cluster to `${HOME}/.kube/config`, the default config file used by tools like `kubectl` -- change the default context to that one +- Add a context for the cluster to `${HOME}/.kube/config`, the default config file used by tools like `kubectl` +- Change the default context to `${HOME}/.kube/config` You should be able to use `kubectl` immediately, and it should be pointed at that cluster you just created. @@ -100,22 +100,23 @@ NAME STATUS ROLES AGE VERSION jdyer-control-plane Ready control-plane 63s v1.27.11 ``` -Note: All of the containers created inside your Kubernetes cluster will not show up when you run `docker ps` as they are nested within a separate containerd namespace. +Note: Not all of the containers created inside your Kubernetes cluster appear when you run `docker ps` +because some of them are are nested within a separate namespace. + +For help with issues that arise, see [Troubleshooting](#troubleshooting). -Refer to "debugging tools" in the "Troubleshooting" section. ## Skaffold -Now that you have a Kubernetes cluster, you can use `skaffold` to build and deploy your development environment. +Now that you have a Kubernetes cluster, you can use [Skaffold](https://skaffold.dev/) to build and deploy your development environment. -Skaffold does a few things for you in a single command: +In a single command, Skaffold does the following: -- Build containers from the current directory (via `docker build`). -- Install the retriever-ingest helm charts (via `helm install`). -- Apply additional Kubernetes manifests (via `kustomize`). -- Hot reloading - skaffold watches your local directory for changes and syncs them into the Kubernetes container. - - _for details on this, see "Hot reloading" below ([link](#hot-reloading))_ -- Port forwards the -ingest service to the host. +- Build containers from the current directory (via `docker build`) +- Install the retriever-ingest helm charts (via `helm install`) +- Apply additional Kubernetes manifests (via `kustomize`) +- Hot reloading - Skaffold watches your local directory for changes and syncs them into the Kubernetes container +- Port forwards the ingest service to the host ### Directory Structure @@ -140,7 +141,9 @@ Skaffold does a few things for you in a single command: The retriever-ingest service's deployment requires pulling in configurations for other services from third-party sources, for example, Elasticsearch, OpenTelemetry, and Postgres. -The first time you try to deploy this project to a local Kubernetes, you may need to tell your local version of `Helm` (a package manager for Kubernetes configurations) where to find those third-party things, by running something like the following. +The first time you deploy this project to a local Kubernetes, +you might need to tell your local version of `Helm` (a package manager for Kubernetes configurations) +where to find third-party services by running code similar to the following. ```shell helm repo add \ @@ -164,11 +167,12 @@ helm repo add \ https://charts.bitnami.com/bitnami ``` -For the full list of repositories, refer to the `dependencies` section in [this project's Chart.yaml](../../helm/Chart.yaml). +For the full list of repositories, refer to the `dependencies` section in the [Chart.yaml](../../../../helm/Chart.yaml) file of this project. #### NVIDIA GPU Support -In order for the deployed Kubernetes pods to access the NVIDIA GPU resources, the [Nvidia k8s-device-plugin](https://github.com/NVIDIA/k8s-device-plugin) must be installed. There are a multitude of configurations for this plugin but for a straight forward route to start development you can simply run. +For the Kubernetes pods to access the NVIDIA GPU resources, you must install the [NVIDIA device plugin for Kubernetes](https://github.com/NVIDIA/k8s-device-plugin). +There are many configurations for this plugin, but to start development simply run the following code. ```shell kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.15.0/deployments/static/nvidia-device-plugin.yml @@ -197,8 +201,9 @@ data: EOF ``` -An NGC personal API key is needed to access models and images hosted on NGC. -Make sure that you have followed the steps of _[Ensure you have access to NGC](./index.md#ensure-you-have-access-to-ngc)_. Next, store the key in an environment variable: +You need an NGC personal API key to access models and images that are hosted on NGC. +First, [Generate an API key](ngc-api-key.md#generate-an-api-key). +Next, store the key in an environment variable by running the following code. ```shell export NGC_API_KEY="" @@ -253,9 +258,10 @@ Deployments stabilized in 23.08 seconds Watching for changes... ``` -When you run this command, `skaffold dev` finds a random open port on the system and exposes the retriever-ingest service on that port ([skaffold docs](https://skaffold.dev/docs/port-forwarding/)). +When you run this command, `skaffold dev` finds a random open port on the system and exposes the retriever-ingest service on that port. +For more information, see [Port Forwarding](https://skaffold.dev/docs/port-forwarding/). -You can find that port in `skaffold`'s logs, in a statement like this: +You can find that port in `skaffold`'s logs by running the following code. ```bash Port forwarding Service/nv-ingest in namespace , remote port http -> http://0.0.0.0:4503 @@ -283,7 +289,9 @@ curl \ "${API_HOST}/health" ``` -Additionally, running `skaffold verify` in a new terminal will run verification tests against the service ([integration tests](https://skaffold.dev/docs/verify/)). These are very lightweight health checks, and should not be confused with actual integration tests. +When you run `skaffold verify` in a new terminal, Skaffold runs verification tests against the service. +These are very lightweight health checks, and should not be confused with integration tests. +For more information, see [Verify](https://skaffold.dev/docs/verify/). ## Clean Up @@ -320,28 +328,25 @@ kubectl exec \ -- sh ``` -For an interactive, live-updating experience, try `k9s`. +For an interactive, live-updating experience, try [k9s](https://k9scli.io/). To launch it, run `k9s`. ```shell k9s ``` -You should see something like the following. - -![k9s example](./media/k9s-example.png){width=80%} - -For details on how to use it, refer to https://k9scli.io/topics/commands/. ### Installing Helm Repositories -You could encounter an error like this: +You could encounter an error like the following. +This indicates that your local installation of `Helm` (a package manager for Kubernetes configurations) +doesn't know how to access a remote repository containing Kubernetes configurations. -> _Error: no repository definition for https://helm.dask.org. Please add the missing repos via 'helm repo add'_ - -This indicates that your local installation of `Helm` (sort of a package manager for Kubernetes configurations) doesn't know how to access a remote repository containing Kubernetes configurations. +```shell +Error: no repository definition for https://helm.dask.org. Please add the missing repos via 'helm repo add' +``` -As that error message says, run `help repo add` with that URL and an informative name. +To resolve this issue, run `help repo add` with the URL and an informative name. ```shell helm repo add \ @@ -363,12 +368,11 @@ Cleaning up... building helm dependencies: exit status 1 ``` -Seeing only "building helm dependencies" likely means you ran `skaffold dev` or `skaffold run` in a fairly quiet mode. - -Rerun those commands with something like `-v info` or `-v debug` to get more information about what specifically failed. +If you only see `building helm dependencies`, you probably ran `skaffold dev` or `skaffold run` in quiet mode. +Rerun the commands with `-v info` or `-v debug` to get more information about what failed. ## References -- Helm quickstart: https://helm.sh/docs/intro/quickstart/ -- `kind` docs: https://kind.sigs.k8s.io/ -- `skaffold` docs: https://skaffold.dev/docs/ +- [Helm Quickstart](https://helm.sh/docs/intro/quickstart/) +- [Kind Quickstart](https://kind.sigs.k8s.io/) +- [Skaffold Quickstart](https://skaffold.dev/docs/quickstart) diff --git a/src/nv_ingest/api/v1/ingest.py b/src/nv_ingest/api/v1/ingest.py index 7618f078..628f3f79 100644 --- a/src/nv_ingest/api/v1/ingest.py +++ b/src/nv_ingest/api/v1/ingest.py @@ -10,27 +10,33 @@ # pylint: skip-file +from io import BytesIO +from typing import Annotated, Dict, List import base64 import json import logging import time import traceback -from io import BytesIO -from typing import Annotated +import uuid from fastapi import APIRouter, Request, Response from fastapi import Depends -from fastapi import File +from fastapi import File, UploadFile, Form from fastapi import HTTPException -from fastapi import UploadFile +from fastapi.responses import JSONResponse from nv_ingest_client.primitives.jobs.job_spec import JobSpec from nv_ingest_client.primitives.tasks.extract import ExtractTask from opentelemetry import trace from redis import RedisError +from nv_ingest.util.converters.formats import ingest_json_results_to_blob + from nv_ingest.schemas.message_wrapper_schema import MessageWrapper +from nv_ingest.schemas.processing_job_schema import ConversionStatus, ProcessingJob from nv_ingest.service.impl.ingest.redis_ingest_service import RedisIngestService from nv_ingest.service.meta.ingest.ingest_service_meta import IngestServiceMeta +from nv_ingest_client.primitives.tasks.table_extraction import TableExtractionTask +from nv_ingest_client.primitives.tasks.chart_extraction import ChartExtractionTask logger = logging.getLogger("uvicorn") tracer = trace.get_tracer(__name__) @@ -184,3 +190,160 @@ async def fetch_job(job_id: str, ingest_service: INGEST_SERVICE_T): # Catch-all for other exceptions, returning a 500 Internal Server Error traceback.print_exc() raise HTTPException(status_code=500, detail=f"Nv-Ingest Internal Server Error: {str(ex)}") + + +@router.post("/convert") +async def convert_pdf( + ingest_service: INGEST_SERVICE_T, + files: List[UploadFile] = File(...), + job_id: str = Form(...), + extract_text: bool = Form(True), + extract_images: bool = Form(True), + extract_tables: bool = Form(True), + extract_charts: bool = Form(False), +) -> Dict[str, str]: + try: + + if job_id is None: + job_id = str(uuid.uuid4()) + logger.debug(f"JobId is None, Created JobId: {job_id}") + + submitted_jobs: List[ProcessingJob] = [] + for file in files: + file_stream = BytesIO(file.file.read()) + doc_content = base64.b64encode(file_stream.read()).decode("utf-8") + + try: + content_type = file.content_type.split("/")[1] + except Exception: + err_message = f"Unsupported content_type: {file.content_type}" + logger.error(err_message) + raise HTTPException(status_code=500, detail=err_message) + + job_spec = JobSpec( + document_type=content_type, + payload=doc_content, + source_id=file.filename, + source_name=file.filename, + extended_options={ + "tracing_options": { + "trace": True, + "ts_send": time.time_ns(), + } + }, + ) + + extract_task = ExtractTask( + document_type=content_type, + extract_text=extract_text, + extract_images=extract_images, + extract_tables=extract_tables, + extract_charts=extract_charts, + ) + + job_spec.add_task(extract_task) + + # Conditionally add tasks as needed. + if extract_tables: + table_data_extract = TableExtractionTask() + job_spec.add_task(table_data_extract) + + if extract_charts: + chart_data_extract = ChartExtractionTask() + job_spec.add_task(chart_data_extract) + + submitted_job_id = await ingest_service.submit_job( + MessageWrapper(payload=json.dumps(job_spec.to_dict())), job_id + ) + + processing_job = ProcessingJob( + submitted_job_id=submitted_job_id, + filename=file.filename, + status=ConversionStatus.IN_PROGRESS, + ) + + submitted_jobs.append(processing_job) + + await ingest_service.set_processing_cache(job_id, submitted_jobs) + + logger.debug(f"Submitted: {len(submitted_jobs)} documents of type: '{content_type}' for processing") + + return { + "task_id": job_id, + "status": "processing", + "status_url": f"/status/{job_id}", + } + + except Exception as e: + logger.error(f"Error starting conversion: {str(e)}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/status/{job_id}") +async def get_status(ingest_service: INGEST_SERVICE_T, job_id: str): + t_start = time.time() + try: + processing_jobs = await ingest_service.get_processing_cache(job_id) + except Exception as e: + logger.error(f"Error getting status: {str(e)}") + raise HTTPException(status_code=500, detail=str(e)) + + updated_cache: List[ProcessingJob] = [] + num_ready_docs = 0 + + for processing_job in processing_jobs: + logger.debug(f"submitted_job_id: {processing_job.submitted_job_id} - Status: {processing_job.status}") + + if processing_job.status == ConversionStatus.IN_PROGRESS: + # Attempt to fetch the job from the ingest service + try: + job_response = await ingest_service.fetch_job(processing_job.submitted_job_id) + + job_response = json.dumps(job_response) + + # Convert JSON into pseudo markdown format + blob_response = ingest_json_results_to_blob(job_response) + + processing_job.raw_result = job_response + processing_job.content = blob_response + processing_job.status = ConversionStatus.SUCCESS + num_ready_docs = num_ready_docs + 1 + updated_cache.append(processing_job) + + except TimeoutError: + logger.error(f"TimeoutError getting result for job_id: {processing_job.submitted_job_id}") + updated_cache.append(processing_job) + continue + except RedisError: + logger.error(f"RedisError getting result for job_id: {processing_job.submitted_job_id}") + updated_cache.append(processing_job) + continue + else: + logger.debug(f"{processing_job.submitted_job_id} has already finished successfully ....") + num_ready_docs = num_ready_docs + 1 + updated_cache.append(processing_job) + + await ingest_service.set_processing_cache(job_id, updated_cache) + + logger.debug(f"{num_ready_docs}/{len(updated_cache)} complete") + if num_ready_docs == len(updated_cache): + results = [] + raw_results = [] + for result in updated_cache: + results.append( + { + "filename": result.filename, + "status": "success", + "content": result.content, + } + ) + raw_results.append(result.raw_result) + + return JSONResponse( + content={"status": "completed", "result": results}, + status_code=200, + ) + else: + # Not yet ready ... + logger.debug(f"/status/{job_id} endpoint execution time: {time.time() - t_start}") + raise HTTPException(status_code=202, detail="Job is not ready yet. Retry later.") diff --git a/src/nv_ingest/extraction_workflows/docx/docx_helper.py b/src/nv_ingest/extraction_workflows/docx/docx_helper.py index 6bfa12b6..44a946ad 100644 --- a/src/nv_ingest/extraction_workflows/docx/docx_helper.py +++ b/src/nv_ingest/extraction_workflows/docx/docx_helper.py @@ -36,7 +36,14 @@ logger = logging.getLogger(__name__) -def python_docx(docx: Union[str, Path, IO], extract_text: bool, extract_images: bool, extract_tables: bool, **kwargs): +def python_docx( + docx: Union[str, Path, IO], + extract_text: bool, + extract_images: bool, + extract_tables: bool, + extract_charts: bool, + **kwargs +): """ Helper function that use python-docx to extract text from a bytestream document @@ -57,6 +64,8 @@ def python_docx(docx: Union[str, Path, IO], extract_text: bool, extract_images: Specifies whether to extract images. extract_tables : bool Specifies whether to extract tables. + extract_charts : bool + Specifies whether to extract charts. **kwargs The keyword arguments are used for additional extraction parameters. @@ -73,10 +82,12 @@ def python_docx(docx: Union[str, Path, IO], extract_text: bool, extract_images: source_id = row_data["source_id"] # get text_depth text_depth = kwargs.get("text_depth", "document") - text_depth = TextTypeEnum[text_depth.upper()] + text_depth = TextTypeEnum(text_depth) # get base metadata metadata_col = kwargs.get("metadata_column", "metadata") + docx_extractor_config = kwargs.get("docx_extraction_config", {}) + base_unified_metadata = row_data[metadata_col] if metadata_col in row_data.index else {} # get base source_metadata @@ -103,7 +114,9 @@ def python_docx(docx: Union[str, Path, IO], extract_text: bool, extract_images: } # Extract data from the document using python-docx - doc = DocxReader(docx, source_metadata) - extracted_data = doc.extract_data(base_unified_metadata, text_depth, extract_text, extract_tables, extract_images) + doc = DocxReader(docx, source_metadata, extraction_config=docx_extractor_config) + extracted_data = doc.extract_data( + base_unified_metadata, text_depth, extract_text, extract_charts, extract_tables, extract_images + ) return extracted_data diff --git a/src/nv_ingest/extraction_workflows/docx/docxreader.py b/src/nv_ingest/extraction_workflows/docx/docxreader.py index b550d936..b2920203 100644 --- a/src/nv_ingest/extraction_workflows/docx/docxreader.py +++ b/src/nv_ingest/extraction_workflows/docx/docxreader.py @@ -23,14 +23,16 @@ """ Parse document content and properties using python-docx """ - +import io import logging import re import uuid -from typing import Dict +from typing import Dict, Optional, Union from typing import List from typing import Tuple +from collections import defaultdict + import pandas as pd from docx import Document from docx.image.constants import MIME_TYPE @@ -42,7 +44,11 @@ from docx.text.hyperlink import Hyperlink from docx.text.paragraph import Paragraph from docx.text.run import Run +from pandas import DataFrame +from build.lib.nv_ingest.extraction_workflows.image.image_handlers import load_and_preprocess_image +from nv_ingest.extraction_workflows.image.image_handlers import extract_tables_and_charts_from_images +from nv_ingest.schemas.image_extractor_schema import ImageConfigSchema from nv_ingest.schemas.metadata_schema import ContentTypeEnum from nv_ingest.schemas.metadata_schema import ImageTypeEnum from nv_ingest.schemas.metadata_schema import StdContentDescEnum @@ -50,6 +56,7 @@ from nv_ingest.schemas.metadata_schema import validate_metadata from nv_ingest.util.converters import bytetools from nv_ingest.util.detectors.language import detect_language +from nv_ingest.util.pdf.metadata_aggregators import construct_table_and_chart_metadata, CroppedImageWithContent PARAGRAPH_FORMATS = ["text", "markdown"] TABLE_FORMATS = ["markdown", "markdown_light", "csv", "tag"] @@ -92,7 +99,7 @@ def __str__(self): def _update_source_meta_data(self): """ - Update the source meta data with the document's core properties + Update the source metadata with the document's core properties """ self.source_metadata.update( { @@ -132,9 +139,11 @@ def __init__( handle_text_styles: bool = True, image_tag="", table_tag="", + extraction_config: Dict = None, ): if paragraph_format not in PARAGRAPH_FORMATS: raise ValueError(f"Unknown paragraph format {paragraph_format}. Supported formats are: {PARAGRAPH_FORMATS}") + if table_format not in TABLE_FORMATS: raise ValueError(f"Unknown table format {table_format}. Supported formats are: {TABLE_FORMATS}") @@ -161,18 +170,47 @@ def __init__( # placeholders for metadata extraction self._accumulated_text = [] self._extracted_data = [] - self._prev_para_images = [] + self._extraction_config = extraction_config if extraction_config else {} + self._pending_images = [] self._prev_para_image_idx = 0 + self._prev_para_images = [] def is_text_empty(self, text: str) -> bool: """ - Check if text is available + Check if the given text is empty or matches the empty text pattern. + + Parameters + ---------- + text : str + The text to check. + + Returns + ------- + bool + True if the text is empty or matches the empty text pattern, False otherwise. """ + return self.empty_text_pattern.match(text) is not None - def format_text(self, text, bold: bool, italic: bool, underline: bool) -> str: + def format_text(self, text: str, bold: bool, italic: bool, underline: bool) -> str: """ - Apply markdown style to text (bold, italic, underline). + Apply markdown styling (bold, italic, underline) to the given text. + + Parameters + ---------- + text : str + The text to format. + bold : bool + Whether to apply bold styling. + italic : bool + Whether to apply italic styling. + underline : bool + Whether to apply underline styling. + + Returns + ------- + str + The formatted text with the applied styles. """ if self.is_text_empty(text): @@ -198,9 +236,20 @@ def format_text(self, text, bold: bool, italic: bool, underline: bool) -> str: return text - def format_paragraph(self, paragraph: Paragraph) -> Tuple[str, List[Image]]: - f""" - Format a paragraph into text. Supported formats are: {PARAGRAPH_FORMATS} + def format_paragraph(self, paragraph: "Paragraph") -> Tuple[str, List["Image"]]: + """ + Format a paragraph into styled text and extract associated images. + + Parameters + ---------- + paragraph : Paragraph + The paragraph to format. This includes text and potentially embedded images. + + Returns + ------- + tuple of (str, list of Image) + - The formatted paragraph text with markdown styling applied. + - A list of extracted images from the paragraph. """ paragraph_images = [] @@ -257,10 +306,22 @@ def format_paragraph(self, paragraph: Paragraph) -> Tuple[str, List[Image]]: paragraph_text = paragraph_text.strip() return paragraph_text, paragraph_images - def format_cell(self, cell: _Cell) -> Tuple[str, List[Image]]: + def format_cell(self, cell: "_Cell") -> Tuple[str, List["Image"]]: """ - Format a table cell into markdown text + Format a table cell into Markdown text and extract associated images. + + Parameters + ---------- + cell : _Cell + The table cell to format. + + Returns + ------- + tuple of (str, list of Image) + - The formatted text of the cell with markdown styling applied. + - A list of images extracted from the cell. """ + if self.paragraph_format == "markdown": newline = "
" else: @@ -268,10 +329,23 @@ def format_cell(self, cell: _Cell) -> Tuple[str, List[Image]]: paragraph_texts, paragraph_images = zip(*[self.format_paragraph(p) for p in cell.paragraphs]) return newline.join(paragraph_texts), paragraph_images - def format_table(self, table: Table) -> Tuple[str, List[Image]]: - f""" - Format a table into text. Supported formats are: {TABLE_FORMATS} + def format_table(self, table: "Table") -> Tuple[Optional[str], List["Image"], DataFrame]: + """ + Format a table into text, extract images, and represent it as a DataFrame. + + Parameters + ---------- + table : Table + The table to format. + + Returns + ------- + tuple of (str or None, list of Image, DataFrame) + - The formatted table as text, using the specified format (e.g., markdown, CSV). + - A list of images extracted from the table. + - A DataFrame representation of the table's content. """ + rows = [[self.format_cell(cell) for cell in row.cells] for row in table.rows] texts = [[text for text, _ in row] for row in rows] table_images = [image for row in rows for _, images in row for image in images] @@ -295,9 +369,24 @@ def format_table(self, table: Table) -> Tuple[str, List[Image]]: @staticmethod def apply_text_style(style: str, text: str, level: int = 0) -> str: """ - Apply style on a paragraph (heading, list, title, subtitle). - Not recommended if the document has been converted from pdf. + Apply a specific text style (e.g., heading, list, title, subtitle) to the given text. + + Parameters + ---------- + style : str + The style to apply. Supported styles include headings ("Heading 1" to "Heading 9"), + list items ("List"), and document structures ("Title", "Subtitle"). + text : str + The text to style. + level : int, optional + The indentation level for the styled text. Default is 0. + + Returns + ------- + str + The text with the specified style and indentation applied. """ + if re.match(r"^Heading [1-9]$", style): n = int(style.split(" ")[-1]) text = f"{'#' * n} {text}" @@ -313,43 +402,62 @@ def apply_text_style(style: str, text: str, level: int = 0) -> str: return text @staticmethod - def docx_content_type_to_image_type(content_type: MIME_TYPE) -> str: + def docx_content_type_to_image_type(content_type: "MIME_TYPE") -> str: """ - python-docx stores the content type in the image header as a string of format - "image/jpeg" etc. This is converted into one of ImageTypeEnum. - Reference: src/docx/image/jpeg.py + Convert a DOCX content type string to an image type. + + Parameters + ---------- + content_type : MIME_TYPE + The content type string from the image header, e.g., "image/jpeg". + + Returns + ------- + str + The image type extracted from the content type string. """ + return content_type.split("/")[1] - def _construct_image_metadata(self, image, para_idx, caption, base_unified_metadata): + def _construct_image_metadata( + self, para_idx: int, caption: str, base_unified_metadata: Dict, base64_img: str + ) -> List[Union[str, dict]]: """ - Fill the metadata for the extracted image + Build metadata for an image in a DOCX file. + + Parameters + ---------- + para_idx : int + The paragraph index containing the image. + caption : str + The caption associated with the image. + base_unified_metadata : dict + The base metadata to build upon. + base64_img : str + The image content encoded as a base64 string. + + Returns + ------- + list + A list containing the content type, validated metadata, and a unique identifier. """ - image_type = self.docx_content_type_to_image_type(image.content_type) - if ImageTypeEnum.has_value(image_type): - image_type = ImageTypeEnum[image_type.upper()] - - base64_img = bytetools.base64frombytes(image.blob) - # For docx there is no bounding box. The paragraph that follows the image is typically - # the caption. Add that para to the page nearby for now. fixme bbox = (0, 0, 0, 0) + caption_len = len(caption.splitlines()) + + page_idx = 0 # docx => single page + page_count = 1 + page_nearby_blocks = { "text": {"content": [], "bbox": []}, "images": {"content": [], "bbox": []}, "structured": {"content": [], "bbox": []}, } - caption_len = len(caption.splitlines()) + if caption_len: page_nearby_blocks["text"]["content"].append(caption) page_nearby_blocks["text"]["bbox"] = [[-1, -1, -1, -1]] * caption_len - page_block = para_idx - - # python-docx treats the entire document as a single page - page_count = 1 - page_idx = 0 - content_metadata = { "type": ContentTypeEnum.IMAGE, "description": StdContentDescEnum.DOCX_IMAGE, @@ -357,16 +465,15 @@ def _construct_image_metadata(self, image, para_idx, caption, base_unified_metad "hierarchy": { "page_count": page_count, "page": page_idx, - "block": page_block, + "block": para_idx, "line": -1, "span": -1, "nearby_objects": page_nearby_blocks, }, } - # bbox is not available in docx. the para following the image is typically the caption. image_metadata = { - "image_type": image_type, + "image_type": ImageTypeEnum.image_type_1, "structured_image_type": ImageTypeEnum.image_type_1, "caption": caption, "text": "", @@ -374,7 +481,6 @@ def _construct_image_metadata(self, image, para_idx, caption, base_unified_metad } unified_metadata = base_unified_metadata.copy() - unified_metadata.update( { "content": base64_img, @@ -386,24 +492,64 @@ def _construct_image_metadata(self, image, para_idx, caption, base_unified_metad validated_unified_metadata = validate_metadata(unified_metadata) - # Work around until https://github.com/apache/arrow/pull/40412 is resolved - return [ContentTypeEnum.IMAGE.value, validated_unified_metadata.model_dump(), str(uuid.uuid4())] + return [ + ContentTypeEnum.IMAGE.value, + validated_unified_metadata.model_dump(), + str(uuid.uuid4()), + ] - def _extract_para_images(self, images, para_idx, caption, base_unified_metadata, extracted_data): + def _extract_para_images( + self, images: List["Image"], para_idx: int, caption: str, base_unified_metadata: Dict + ) -> None: """ - Extract all images in a paragraph. These images share the same metadata. + Collect images from a paragraph and store them for metadata construction. + + Parameters + ---------- + images : list of Image + The images found in the paragraph. + para_idx : int + The index of the paragraph containing the images. + caption : str + The caption associated with the images. + base_unified_metadata : dict + The base metadata to associate with the images. + + Returns + ------- + None """ + for image in images: logger.debug("image content_type %s para_idx %d", image.content_type, para_idx) logger.debug("image caption %s", caption) - extracted_image = self._construct_image_metadata(image, para_idx, caption, base_unified_metadata) - extracted_data.append(extracted_image) - def _construct_text_metadata(self, accumulated_text, para_idx, text_depth, base_unified_metadata): + # Simply append a tuple so we can build the final metadata in _finalize_images + self._pending_images.append((image, para_idx, caption, base_unified_metadata)) + + def _construct_text_metadata( + self, accumulated_text: List[str], para_idx: int, text_depth: "TextTypeEnum", base_unified_metadata: Dict + ) -> List[Union[str, dict]]: """ - Store the text with associated metadata. Docx uses the same scheme as - PDF. + Build metadata for text content in a DOCX file. + + Parameters + ---------- + accumulated_text : list of str + The accumulated text to include in the metadata. + para_idx : int + The paragraph index containing the text. + text_depth : TextTypeEnum + The depth of the text content (e.g., page-level, paragraph-level). + base_unified_metadata : dict + The base metadata to build upon. + + Returns + ------- + list + A list containing the content type, validated metadata, and a unique identifier. """ + if len(accumulated_text) < 1: return [] @@ -447,36 +593,37 @@ def _construct_text_metadata(self, accumulated_text, para_idx, text_depth, base_ return [ContentTypeEnum.TEXT.value, validated_unified_metadata.model_dump(), str(uuid.uuid4())] - def _extract_para_data( - self, child, base_unified_metadata, text_depth: TextTypeEnum, extract_images: bool, para_idx: int - ): + def _extract_para_text( + self, + paragraph, + paragraph_text, + base_unified_metadata: Dict, + text_depth: "TextTypeEnum", + para_idx: int, + ) -> None: """ - Process the text and images in a docx paragraph + Process the text, images, and styles in a DOCX paragraph. + + Parameters + ---------- + paragraph: Paragraph + The paragraph to process. + paragraph_text: str + The text content of the paragraph. + base_unified_metadata : dict + The base metadata to associate with extracted data. + text_depth : TextTypeEnum + The depth of text extraction (e.g., block-level, document-level). + para_idx : int + The index of the paragraph being processed. + + Returns + ------- + None """ - # Paragraph - paragraph = Paragraph(child, self.document) - paragraph_text, paragraph_images = self.format_paragraph(paragraph) - - if self._prev_para_images: - # build image metadata with image from previous paragraph and text from current - self._extract_para_images( - self._prev_para_images, - self._prev_para_image_idx, - paragraph_text, - base_unified_metadata, - self._extracted_data, - ) - self._prev_para_images = [] - - if extract_images and paragraph_images: - # cache the images till the next paragraph is read - self._prev_para_images = paragraph_images - self._prev_para_image_idx = para_idx - - self.images += paragraph_images + # Handle text styles if desired if self.handle_text_styles: - # Get the level of the paragraph (especially for lists) try: numPr = paragraph._element.xpath("./w:pPr/w:numPr")[0] level = int(numPr.xpath("./w:ilvl/@w:val")[0]) @@ -486,6 +633,7 @@ def _extract_para_data( self._accumulated_text.append(paragraph_text + "\n") + # If text_depth is BLOCK, we flush after each paragraph if text_depth == TextTypeEnum.BLOCK: text_extraction = self._construct_text_metadata( self._accumulated_text, para_idx, text_depth, base_unified_metadata @@ -493,77 +641,233 @@ def _extract_para_data( self._extracted_data.append(text_extraction) self._accumulated_text = [] - def _extract_table_data(self, child, base_unified_metadata, text_depth: TextTypeEnum, para_idx: int): + def _finalize_images(self, extract_tables: bool, extract_charts: bool, **kwargs) -> None: + """ + Build and append final metadata for each pending image in batches. + + Parameters + ---------- + extract_tables : bool + Whether to attempt table detection in images. + extract_charts : bool + Whether to attempt chart detection in images. + **kwargs + Additional configuration for image processing. + + Returns + ------- + None + """ + if not self._pending_images: + return + + # 1) Convert all pending images into numpy arrays (and also store base64 + context), + # so we can run detection on them in one go. + all_image_arrays = [] + image_info = [] # parallel list to hold (para_idx, caption, base_unified_metadata, base64_img) + + for docx_image, para_idx, caption, base_unified_metadata in self._pending_images: + # Convert docx image blob to BytesIO, then to numpy array + image_bytes = docx_image.blob + image_stream = io.BytesIO(image_bytes) + image_array = load_and_preprocess_image(image_stream) + base64_img = str(bytetools.base64frombytes(image_bytes)) + + all_image_arrays.append(image_array) + + # Keep track of all needed metadata so we can rebuild final entries + image_info.append((para_idx, caption, base_unified_metadata, base64_img)) + + # 2) If the user wants to detect tables/charts, do it in one pass for all images. + detection_map = defaultdict(list) # maps image_index -> list of CroppedImageWithContent + + if extract_tables or extract_charts: + try: + # Perform the batched detection on all images + detection_results = extract_tables_and_charts_from_images( + images=all_image_arrays, + config=ImageConfigSchema(**self._extraction_config.model_dump()), + trace_info=kwargs.get("trace_info"), + ) + # detection_results is typically List[Tuple[int, CroppedImageWithContent]] + # Group by image_index + for image_idx, cropped_item in detection_results: + detection_map[image_idx].append(cropped_item) + + except Exception as e: + logger.error(f"Error extracting tables/charts in batch: {e}") + # If something goes wrong, we can fall back to empty detection map + # so that all images are treated normally + detection_map = {} + + # 3) For each pending image, decide if we found tables/charts or not. + for i, _ in enumerate(self._pending_images): + para_idx_i, caption_i, base_unified_metadata_i, base64_img_i = image_info[i] + + # If detection_map[i] is non-empty, we have found table(s)/chart(s). + if i in detection_map and detection_map[i]: + for table_chart_data in detection_map[i]: + # Build structured metadata for each table or chart + structured_entry = construct_table_and_chart_metadata( + structured_image=table_chart_data, # A CroppedImageWithContent + page_idx=0, # docx => single page + page_count=1, + source_metadata=self.properties.source_metadata, + base_unified_metadata=base_unified_metadata_i, + ) + self._extracted_data.append(structured_entry) + else: + # Either detection was not requested, or no table/chart was found + image_entry = self._construct_image_metadata( + para_idx_i, + caption_i, + base_unified_metadata_i, + base64_img_i, + ) + self._extracted_data.append(image_entry) + + # 4) Clear out the pending images after finalizing + self._pending_images = [] + + def _extract_table_data( + self, + child, + base_unified_metadata: Dict, + ) -> None: """ - Process the text in a docx paragraph + Process the text and images in a DOCX table. + + Parameters + ---------- + child : element + The table element to process. + base_unified_metadata : dict + The base metadata to associate with extracted data. + text_depth : TextTypeEnum + The depth of text extraction (e.g., block-level, document-level). + para_idx : int + The index of the table being processed. + + Returns + ------- + None """ + # Table table = Table(child, self.document) table_text, table_images, table_dataframe = self.format_table(table) + self.images += table_images self.tables.append(table_dataframe) - self._accumulated_text.append(table_text + "\n") - if text_depth == TextTypeEnum.BLOCK: - text_extraction = self._construct_text_metadata( - self._accumulated_text, para_idx, text_depth, base_unified_metadata + cropped_image_with_content = CroppedImageWithContent( + content=table_text, + image="", # no image content + bbox=(0, 0, 0, 0), + max_width=0, + max_height=0, + type_string="table", + ) + + self._extracted_data.append( + construct_table_and_chart_metadata( + structured_image=cropped_image_with_content, + page_idx=0, # docx => single page + page_count=1, + source_metadata=self.properties.source_metadata, + base_unified_metadata=base_unified_metadata, ) - if len(text_extraction) > 0: - self._extracted_data.append(text_extraction) - self._accumulated_text = [] + ) def extract_data( self, - base_unified_metadata, - text_depth: TextTypeEnum, + base_unified_metadata: Dict, + text_depth: "TextTypeEnum", extract_text: bool, + extract_charts: bool, extract_tables: bool, extract_images: bool, - ) -> Dict: + ) -> list[list[str | dict]]: """ - Iterate over paragraphs and tables + Iterate over paragraphs and tables in a DOCX document to extract data. + + Parameters + ---------- + base_unified_metadata : dict + The base metadata to associate with all extracted content. + text_depth : TextTypeEnum + The depth of text extraction (e.g., block-level, document-level). + extract_text : bool + Whether to extract text from the document. + extract_charts : bool + Whether to extract charts from the document. + extract_tables : bool + Whether to extract tables from the document. + extract_images : bool + Whether to extract images from the document. + + Returns + ------- + dict + A dictionary containing the extracted data from the document. """ + self._accumulated_text = [] self._extracted_data = [] - - para_idx = 0 + self._pending_images = [] self._prev_para_images = [] self._prev_para_image_idx = 0 + para_idx = 0 + for child in self.document.element.body.iterchildren(): if isinstance(child, CT_P): - if not extract_text: - continue - self._extract_para_data(child, base_unified_metadata, text_depth, extract_images, para_idx) - - if isinstance(child, CT_Tbl): - if not extract_tables: - continue - self._extract_table_data(child, base_unified_metadata, text_depth, para_idx) + paragraph = Paragraph(child, self.document) + paragraph_text, paragraph_images = self.format_paragraph(paragraph) + + if extract_text: + self._extract_para_text( + paragraph, + paragraph_text, + base_unified_metadata, + text_depth, + para_idx, + ) + + if (extract_charts or extract_images or extract_tables) and paragraph_images: + self._prev_para_images = paragraph_images + self._prev_para_image_idx = para_idx + self._pending_images += [(image, para_idx, "", base_unified_metadata) for image in paragraph_images] + self.images += paragraph_images + + elif isinstance(child, CT_Tbl): + if extract_tables or extract_charts: + self._extract_table_data(child, base_unified_metadata) para_idx += 1 - # We treat the document as a single page + # If there's leftover text at the doc’s end if ( extract_text and text_depth in (TextTypeEnum.DOCUMENT, TextTypeEnum.PAGE) and len(self._accumulated_text) > 0 ): text_extraction = self._construct_text_metadata( - self._accumulated_text, -1, text_depth, base_unified_metadata + self._accumulated_text, + -1, + text_depth, + base_unified_metadata, ) - if len(text_extraction) > 0: + + if text_extraction: self._extracted_data.append(text_extraction) - if self._prev_para_images: - # if we got here it means that image was at the end of the document and there - # was no caption for the image - self._extract_para_images( - self._prev_para_images, - self._prev_para_image_idx, - "", - base_unified_metadata, - self._extracted_data, + # Final pass: Decide if images are just images or contain tables/charts + if extract_images or extract_tables or extract_charts: + self._finalize_images( + extract_tables=extract_tables, + extract_charts=extract_charts, + trace_info=None, ) return self._extracted_data diff --git a/src/nv_ingest/extraction_workflows/image/image_handlers.py b/src/nv_ingest/extraction_workflows/image/image_handlers.py index 6181d78c..f7b12982 100644 --- a/src/nv_ingest/extraction_workflows/image/image_handlers.py +++ b/src/nv_ingest/extraction_workflows/image/image_handlers.py @@ -27,11 +27,12 @@ import numpy as np from PIL import Image +from math import log from wand.image import Image as WandImage import nv_ingest.util.nim.yolox as yolox_utils from nv_ingest.extraction_workflows.pdf.doughnut_utils import crop_image -from nv_ingest.schemas.image_extractor_schema import ImageExtractorSchema +from nv_ingest.schemas.image_extractor_schema import ImageConfigSchema from nv_ingest.schemas.metadata_schema import AccessLevelEnum from nv_ingest.util.image_processing.transforms import numpy_to_base64 from nv_ingest.util.nim.helpers import create_inference_client @@ -107,79 +108,6 @@ def convert_svg_to_bitmap(image_stream: io.BytesIO) -> np.ndarray: return image_array -# TODO(Devin): Move to common file -def process_inference_results( - output_array: np.ndarray, - original_image_shapes: List[Tuple[int, int]], - num_classes: int, - conf_thresh: float, - iou_thresh: float, - min_score: float, - final_thresh: float, -): - """ - Process the model output to generate detection results and expand bounding boxes. - - Parameters - ---------- - output_array : np.ndarray - The raw output from the model inference. - original_image_shapes : List[Tuple[int, int]] - The shapes of the original images before resizing, used for scaling bounding boxes. - num_classes : int - The number of classes the model can detect. - conf_thresh : float - The confidence threshold for detecting objects. - iou_thresh : float - The Intersection Over Union (IoU) threshold for non-maximum suppression. - min_score : float - The minimum score for keeping a detection. - final_thresh: float - Threshold for keeping a bounding box applied after postprocessing. - - - Returns - ------- - List[dict] - A list of dictionaries, each containing processed detection results including expanded bounding boxes. - - Notes - ----- - This function applies non-maximum suppression to the model's output and scales the bounding boxes back to the - original image size. - - Examples - -------- - >>> output_array = np.random.rand(2, 100, 85) - >>> original_image_shapes = [(1536, 1536), (1536, 1536)] - >>> results = process_inference_results(output_array, original_image_shapes, 80, 0.5, 0.5, 0.1) - >>> len(results) - 2 - """ - pred = yolox_utils.postprocess_model_prediction( - output_array, num_classes, conf_thresh, iou_thresh, class_agnostic=True - ) - results = yolox_utils.postprocess_results(pred, original_image_shapes, min_score=min_score) - logger.debug(f"Number of results: {len(results)}") - logger.debug(f"Results: {results}") - - annotation_dicts = [yolox_utils.expand_chart_bboxes(annotation_dict) for annotation_dict in results] - inference_results = [] - - # Filter out bounding boxes below the final threshold - for annotation_dict in annotation_dicts: - new_dict = {} - if "table" in annotation_dict: - new_dict["table"] = [bb for bb in annotation_dict["table"] if bb[4] >= final_thresh] - if "chart" in annotation_dict: - new_dict["chart"] = [bb for bb in annotation_dict["chart"] if bb[4] >= final_thresh] - if "title" in annotation_dict: - new_dict["title"] = annotation_dict["title"] - inference_results.append(new_dict) - - return inference_results - - def extract_table_and_chart_images( annotation_dict: Dict[str, List[List[float]]], original_image: np.ndarray, @@ -246,79 +174,85 @@ def extract_table_and_chart_images( tables_and_charts.append((page_idx, table_data)) -def extract_tables_and_charts_from_image( - image: np.ndarray, - config: ImageExtractorSchema, - num_classes: int = YOLOX_NUM_CLASSES, - conf_thresh: float = YOLOX_CONF_THRESHOLD, - iou_thresh: float = YOLOX_IOU_THRESHOLD, - min_score: float = YOLOX_MIN_SCORE, - final_thresh: float = YOLOX_FINAL_SCORE, +def extract_tables_and_charts_from_images( + images: List[np.ndarray], + config: ImageConfigSchema, trace_info: Optional[List] = None, -) -> List[CroppedImageWithContent]: +) -> List[Tuple[int, object]]: """ - Extract tables and charts from a single image using an ensemble of image-based models. - - This function processes a single image to detect and extract tables and charts. - It uses a sequence of models hosted on different inference servers to achieve this. + Detect and extract tables/charts from a list of NumPy images using YOLOX. Parameters ---------- - image : np.ndarray - A preprocessed image array for table and chart detection. - config : ImageExtractorSchema - Configuration for the inference client, including endpoint URLs and authentication. - num_classes : int, optional - The number of classes the model is trained to detect (default is 3). - conf_thresh : float, optional - The confidence threshold for detection (default is 0.01). - iou_thresh : float, optional - The Intersection Over Union (IoU) threshold for non-maximum suppression (default is 0.5). - min_score : float, optional - The minimum score threshold for considering a detection valid (default is 0.1). - final_thresh: float, optional - Threshold for keeping a bounding box applied after postprocessing (default is 0.48). + images : List[np.ndarray] + List of images in NumPy array format. + config : PDFiumConfigSchema + Configuration object containing YOLOX endpoints, auth token, etc. trace_info : Optional[List], optional - Tracing information for logging or debugging purposes. + Optional tracing data for debugging/performance profiling. Returns ------- - List[CroppedImageWithContent] - A list of `CroppedImageWithContent` objects representing detected tables or charts, - each containing metadata about the detected region. + List[Tuple[int, object]] + A list of (image_index, CroppedImageWithContent) + representing extracted table/chart data from each image. """ tables_and_charts = [] - yolox_client = None + try: - model_interface = yolox_utils.YoloxModelInterface() + model_interface = yolox_utils.YoloxPageElementsModelInterface() yolox_client = create_inference_client( - config.yolox_endpoints, model_interface, config.auth_token, config.yolox_infer_protocol + config.yolox_endpoints, + model_interface, + config.auth_token, + config.yolox_infer_protocol, ) - data = {"images": [image]} + max_batch_size = YOLOX_MAX_BATCH_SIZE + batches = [] + i = 0 + while i < len(images): + batch_size = min(2 ** int(log(len(images) - i, 2)), max_batch_size) + batches.append(images[i : i + batch_size]) # noqa: E203 + i += batch_size + + img_index = 0 + for batch in batches: + data = {"images": batch} + + # NimClient inference + inference_results = yolox_client.infer( + data, + model_name="yolox", + num_classes=YOLOX_NUM_CLASSES, + conf_thresh=YOLOX_CONF_THRESHOLD, + iou_thresh=YOLOX_IOU_THRESHOLD, + min_score=YOLOX_MIN_SCORE, + final_thresh=YOLOX_FINAL_SCORE, + trace_info=trace_info, # traceable_func arg + stage_name="pdf_content_extractor", # traceable_func arg + ) - inference_results = yolox_client.infer( - data, - model_name="yolox", - num_classes=YOLOX_NUM_CLASSES, - conf_thresh=YOLOX_CONF_THRESHOLD, - iou_thresh=YOLOX_IOU_THRESHOLD, - min_score=YOLOX_MIN_SCORE, - final_thresh=YOLOX_FINAL_SCORE, - ) + # 5) Extract table/chart info from each image's annotations + for annotation_dict, original_image in zip(inference_results, batch): + extract_table_and_chart_images( + annotation_dict, + original_image, + img_index, + tables_and_charts, + ) + img_index += 1 - extract_table_and_chart_images( - inference_results, - image, - page_idx=0, # Single image treated as one page - tables_and_charts=tables_and_charts, - ) + except TimeoutError: + logger.error("Timeout error during table/chart extraction.") + raise except Exception as e: - logger.error(f"Error during table/chart extraction from image: {str(e)}") + logger.error(f"Unhandled error during table/chart extraction: {str(e)}") traceback.print_exc() raise e + finally: if yolox_client: yolox_client.close() @@ -355,6 +289,8 @@ def image_data_extractor( Specifies whether to extract tables. extract_charts : bool Specifies whether to extract charts. + trace_info : dict, optional + Tracing information for logging or debugging purposes. **kwargs Additional extraction parameters. @@ -425,13 +361,13 @@ def image_data_extractor( # Table and chart extraction if extract_tables or extract_charts: try: - tables_and_charts = extract_tables_and_charts_from_image( - image_array, + tables_and_charts = extract_tables_and_charts_from_images( + [image_array], config=kwargs.get("image_extraction_config"), trace_info=trace_info, ) logger.debug("Extracted table/chart data from image") - for _, table_chart_data in tables_and_charts: + for _, table_chart_data in tables_and_charts[0]: extracted_data.append( construct_table_and_chart_metadata( table_chart_data, @@ -443,6 +379,7 @@ def image_data_extractor( ) except Exception as e: logger.error(f"Error extracting tables/charts from image: {e}") + raise logger.debug(f"Extracted {len(extracted_data)} items from the image.") diff --git a/src/nv_ingest/extraction_workflows/pdf/pdfium_helper.py b/src/nv_ingest/extraction_workflows/pdf/pdfium_helper.py index 1f4dbff7..ad4de2d6 100644 --- a/src/nv_ingest/extraction_workflows/pdf/pdfium_helper.py +++ b/src/nv_ingest/extraction_workflows/pdf/pdfium_helper.py @@ -34,7 +34,6 @@ from nv_ingest.util.image_processing.transforms import crop_image from nv_ingest.util.image_processing.transforms import numpy_to_base64 from nv_ingest.util.nim.helpers import create_inference_client -from nv_ingest.util.nim.helpers import get_version from nv_ingest.util.pdf.metadata_aggregators import Base64Image from nv_ingest.util.pdf.metadata_aggregators import CroppedImageWithContent from nv_ingest.util.pdf.metadata_aggregators import construct_image_metadata_from_pdf_image @@ -64,22 +63,8 @@ def extract_tables_and_charts_using_image_ensemble( ) -> List[Tuple[int, object]]: # List[Tuple[int, CroppedImageWithContent]] tables_and_charts = [] - # Obtain yolox_version - # Assuming that the grpc endpoint is at index 0 - yolox_http_endpoint = config.yolox_endpoints[1] try: - yolox_version = get_version(yolox_http_endpoint) - if not yolox_version: - logger.warning( - "Failed to obtain yolox-page-elements version from the endpoint. Falling back to the latest version." - ) - yolox_version = None # Default to the latest version - except Exception: - logger.waring("Failed to get yolox-page-elements version after 30 seconds. Falling back to the latest version.") - yolox_version = None # Default to the latest version - - try: - model_interface = yolox_utils.YoloxPageElementsModelInterface(yolox_version=yolox_version) + model_interface = yolox_utils.YoloxPageElementsModelInterface() yolox_client = create_inference_client( config.yolox_endpoints, model_interface, config.auth_token, config.yolox_infer_protocol ) @@ -142,76 +127,6 @@ def extract_tables_and_charts_using_image_ensemble( return tables_and_charts -def process_inference_results( - output_array: np.ndarray, - original_image_shapes: List[Tuple[int, int]], - num_classes: int, - conf_thresh: float, - iou_thresh: float, - min_score: float, - final_thresh: float, -): - """ - Process the model output to generate detection results and expand bounding boxes. - - Parameters - ---------- - output_array : np.ndarray - The raw output from the model inference. - original_image_shapes : List[Tuple[int, int]] - The shapes of the original images before resizing, used for scaling bounding boxes. - num_classes : int - The number of classes the model can detect. - conf_thresh : float - The confidence threshold for detecting objects. - iou_thresh : float - The Intersection Over Union (IoU) threshold for non-maximum suppression. - min_score : float - The minimum score for keeping a detection. - final_thresh: float - Threshold for keeping a bounding box applied after postprocessing. - - - Returns - ------- - List[dict] - A list of dictionaries, each containing processed detection results including expanded bounding boxes. - - Notes - ----- - This function applies non-maximum suppression to the model's output and scales the bounding boxes back to the - original image size. - - Examples - -------- - >>> output_array = np.random.rand(2, 100, 85) - >>> original_image_shapes = [(1536, 1536), (1536, 1536)] - >>> results = process_inference_results(output_array, original_image_shapes, 80, 0.5, 0.5, 0.1) - >>> len(results) - 2 - """ - pred = yolox_utils.postprocess_model_prediction( - output_array, num_classes, conf_thresh, iou_thresh, class_agnostic=True - ) - results = yolox_utils.postprocess_results(pred, original_image_shapes, min_score=min_score) - - annotation_dicts = [yolox_utils.expand_chart_bboxes(annotation_dict) for annotation_dict in results] - inference_results = [] - - # Filter out bounding boxes below the final threshold - for annotation_dict in annotation_dicts: - new_dict = {} - if "table" in annotation_dict: - new_dict["table"] = [bb for bb in annotation_dict["table"] if bb[4] >= final_thresh] - if "chart" in annotation_dict: - new_dict["chart"] = [bb for bb in annotation_dict["chart"] if bb[4] >= final_thresh] - if "title" in annotation_dict: - new_dict["title"] = annotation_dict["title"] - inference_results.append(new_dict) - - return inference_results - - # Handle individual table/chart extraction and model inference def extract_table_and_chart_images( annotation_dict, diff --git a/src/nv_ingest/extraction_workflows/pptx/pptx_helper.py b/src/nv_ingest/extraction_workflows/pptx/pptx_helper.py index 7e6b6d89..df930f1c 100644 --- a/src/nv_ingest/extraction_workflows/pptx/pptx_helper.py +++ b/src/nv_ingest/extraction_workflows/pptx/pptx_helper.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: Apache-2.0 - +import io # Copyright (c) 2024, NVIDIA CORPORATION. # @@ -21,8 +21,9 @@ import operator import re import uuid +from collections import defaultdict from datetime import datetime -from typing import Dict +from typing import Dict, List, Tuple from typing import Optional import pandas as pd @@ -31,8 +32,14 @@ from pptx.enum.dml import MSO_THEME_COLOR from pptx.enum.shapes import MSO_SHAPE_TYPE from pptx.enum.shapes import PP_PLACEHOLDER +from pptx.shapes.autoshape import Shape from pptx.slide import Slide +from nv_ingest.extraction_workflows.image.image_handlers import ( + load_and_preprocess_image, + extract_tables_and_charts_from_images, +) +from nv_ingest.schemas.image_extractor_schema import ImageConfigSchema from nv_ingest.schemas.metadata_schema import AccessLevelEnum from nv_ingest.schemas.metadata_schema import ContentTypeEnum from nv_ingest.schemas.metadata_schema import ImageTypeEnum @@ -41,70 +48,144 @@ from nv_ingest.schemas.metadata_schema import TableFormatEnum from nv_ingest.schemas.metadata_schema import TextTypeEnum from nv_ingest.schemas.metadata_schema import validate_metadata +from nv_ingest.schemas.pptx_extractor_schema import PPTXConfigSchema from nv_ingest.util.converters import bytetools from nv_ingest.util.detectors.language import detect_language +from nv_ingest.util.pdf.metadata_aggregators import construct_table_and_chart_metadata logger = logging.getLogger(__name__) -# Define a helper function to use python-pptx to extract text from a base64 -# encoded bytestram PPTX -def python_pptx(pptx_stream, extract_text: bool, extract_images: bool, extract_tables: bool, **kwargs): +def _finalize_images( + pending_images: List[Tuple[Shape, int, int, int, dict, dict, dict]], + extracted_data: List, + pptx_extraction_config: PPTXConfigSchema, + extract_tables: bool = False, + extract_charts: bool = False, + trace_info: Optional[Dict] = None, +): + """ + Post-process all pending images. + - Convert shape image -> NumPy or base64 + - If `extract_tables` or `extract_charts`, do detection (table/chart) + - Build the appropriate metadata, either table/chart or image. + + This mimics the docx approach, but adapted for python-pptx shapes. + """ + if not pending_images: + return + + # Convert each shape to image data (base64 or ndarray). + # We'll store them for a single call to your model if you'd like (batching). + image_arrays = [] + image_contexts = [] + for ( + shape, + shape_idx, + slide_idx, + slide_count, + page_nearby_blocks, + source_metadata, + base_unified_metadata, + ) in pending_images: + try: + image_bytes = shape.image.blob + image_array = load_and_preprocess_image(io.BytesIO(image_bytes)) + base64_img = bytetools.base64frombytes(image_bytes) + + image_arrays.append(image_array) + image_contexts.append( + ( + shape_idx, + slide_idx, + slide_count, + page_nearby_blocks, + source_metadata, + base_unified_metadata, + base64_img, + ) + ) + except Exception as e: + logger.warning(f"Unable to process shape image: {e}") + + # If you want table/chart detection for these images, do it now + # (similar to docx approach). This might use your YOLO or other method: + detection_map = defaultdict(list) # image_idx -> list of CroppedImageWithContent + if extract_tables or extract_charts: + try: + # For example, a call to your function that checks for tables/charts + detection_results = extract_tables_and_charts_from_images( + images=image_arrays, + config=ImageConfigSchema(**(pptx_extraction_config.model_dump())), + trace_info=trace_info, + ) + # detection_results is something like [(image_idx, CroppedImageWithContent), ...] + for img_idx, cropped_obj in detection_results: + detection_map[img_idx].append(cropped_obj) + except Exception as e: + logger.error(f"Error while running table/chart detection on PPTX images: {e}") + detection_map = {} + + # Now build the final metadata objects + for i, context in enumerate(image_contexts): + (shape_idx, slide_idx, slide_count, page_nearby_blocks, source_metadata, base_unified_metadata, base64_img) = ( + context + ) + + # If there's a detection result for this image, handle it + if i in detection_map and detection_map[i]: + # We found table(s)/chart(s) in the image + for cropped_item in detection_map[i]: + structured_entry = construct_table_and_chart_metadata( + structured_image=cropped_item, + page_idx=slide_idx, + page_count=slide_count, + source_metadata=source_metadata, + base_unified_metadata=base_unified_metadata, + ) + extracted_data.append(structured_entry) + else: + # No table detected => build normal image metadata + image_entry = _construct_image_metadata( + shape_idx=shape_idx, + slide_idx=slide_idx, + slide_count=slide_count, + page_nearby_blocks=page_nearby_blocks, + base64_img=base64_img, + source_metadata=source_metadata, + base_unified_metadata=base_unified_metadata, + ) + extracted_data.append(image_entry) + + +def python_pptx( + pptx_stream, extract_text: bool, extract_images: bool, extract_tables: bool, extract_charts: bool, **kwargs +): """ - Helper function to use python-pptx to extract text from a bytestream PPTX. - - A document has five levels - presentation, slides, shapes, paragraphs, and runs. - To align with the pdf extraction, we map the levels as follows: - - Document -> Presention - - Pages -> Slides - - Blocks -> Shapes - - Lines -> Paragraphs - - Spans -> Runs - - Parameters - ---------- - pptx_stream : io.BytesIO - A bytestream PPTX. - extract_text : bool - Specifies whether to extract text. - extract_images : bool - Specifies whether to extract images. - extract_tables : bool - Specifies whether to extract tables. - **kwargs - The keyword arguments are used for additional extraction parameters. - - Returns - ------- - str - A string of extracted text. + Helper function to use python-pptx to extract text from a bytestream PPTX, + while deferring image classification into tables/charts if requested. """ logger.debug("Extracting PPTX with python-pptx backend.") row_data = kwargs.get("row_data") - # get source_id source_id = row_data["source_id"] - # get text_depth + text_depth = kwargs.get("text_depth", "page") text_depth = TextTypeEnum[text_depth.upper()] - # Not configurable anywhere at the moment paragraph_format = kwargs.get("paragraph_format", "markdown") identify_nearby_objects = kwargs.get("identify_nearby_objects", True) - # get base metadata metadata_col = kwargs.get("metadata_column", "metadata") + pptx_extractor_config = kwargs.get("pptx_extraction_config", {}) + trace_info = kwargs.get("trace_info", {}) + base_unified_metadata = row_data[metadata_col] if metadata_col in row_data.index else {} - # get base source_metadata base_source_metadata = base_unified_metadata.get("source_metadata", {}) - # get source_location source_location = base_source_metadata.get("source_location", "") - # get collection_id (assuming coming in from source_metadata...) collection_id = base_source_metadata.get("collection_id", "") - # get partition_id (assuming coming in from source_metadata...) partition_id = base_source_metadata.get("partition_id", -1) - # get access_level (assuming coming in from source_metadata...) access_level = base_source_metadata.get("access_level", AccessLevelEnum.LEVEL_1) presentation = Presentation(pptx_stream) @@ -140,6 +221,10 @@ def python_pptx(pptx_stream, extract_text: bool, extract_images: bool, extract_t accumulated_text = [] extracted_data = [] + # Hold images here for final classification + # Each item is (shape, shape_idx, slide_idx, page_nearby_blocks, base_unified_metadata) + pending_images = [] + for slide_idx, slide in enumerate(presentation.slides): shapes = sorted(ungroup_shapes(slide.shapes), key=operator.attrgetter("top", "left")) @@ -153,6 +238,9 @@ def python_pptx(pptx_stream, extract_text: bool, extract_images: bool, extract_t block_text = [] added_title = added_subtitle = False + # --------------------------------------------- + # 1) Text Extraction + # --------------------------------------------- if extract_text and shape.has_text_frame: for paragraph_idx, paragraph in enumerate(shape.text_frame.paragraphs): if not paragraph.text.strip(): @@ -162,21 +250,22 @@ def python_pptx(pptx_stream, extract_text: bool, extract_images: bool, extract_t text = run.text if not text: continue + text = escape_text(text) if paragraph_format == "markdown": - # For titles/subtitles, process them on the block/shape level, and - # skip formatting. if is_title(shape): - if added_title: + if not added_title: + text = process_title(shape) # format a heading or something + added_title = True + else: continue - text = process_title(shape) - added_title = True elif is_subtitle(shape): - if added_subtitle: + if not added_subtitle: + text = process_subtitle(shape) + added_subtitle = True + else: continue - text = process_subtitle(shape) - added_subtitle = True else: if run.hyperlink.address: text = get_hyperlink(text, run.hyperlink.address) @@ -193,9 +282,11 @@ def python_pptx(pptx_stream, extract_text: bool, extract_images: bool, extract_t accumulated_text.append(text) + # For "nearby objects", store block text if extract_images and identify_nearby_objects: block_text.append(text) + # If we only want text at SPAN level, flush after each run if text_depth == TextTypeEnum.SPAN: text_extraction = _construct_text_metadata( presentation, @@ -211,17 +302,15 @@ def python_pptx(pptx_stream, extract_text: bool, extract_images: bool, extract_t source_metadata, base_unified_metadata, ) - if len(text_extraction) > 0: extracted_data.append(text_extraction) - accumulated_text = [] - # Avoid excessive newline characters and add them only at - # the line/paragraph level or higher. + # Add newlines for separation at line/paragraph level if accumulated_text and not accumulated_text[-1].endswith("\n\n"): accumulated_text.append("\n\n") + # If text_depth is LINE, flush after each paragraph if text_depth == TextTypeEnum.LINE: text_extraction = _construct_text_metadata( presentation, @@ -237,12 +326,11 @@ def python_pptx(pptx_stream, extract_text: bool, extract_images: bool, extract_t source_metadata, base_unified_metadata, ) - if len(text_extraction) > 0: extracted_data.append(text_extraction) - accumulated_text = [] + # If text_depth is BLOCK, flush after we've read the entire shape if text_depth == TextTypeEnum.BLOCK: text_extraction = _construct_text_metadata( presentation, @@ -258,54 +346,60 @@ def python_pptx(pptx_stream, extract_text: bool, extract_images: bool, extract_t source_metadata, base_unified_metadata, ) - if len(text_extraction) > 0: extracted_data.append(text_extraction) - accumulated_text = [] - if extract_images and identify_nearby_objects and (len(block_text) > 0): + # If we have text in this shape and the user wants "nearby objects" references: + if extract_images and identify_nearby_objects and block_text: page_nearby_blocks["text"]["content"].append("".join(block_text)) page_nearby_blocks["text"]["bbox"].append(get_bbox(shape_object=shape)) + # --------------------------------------------- + # 2) Image Handling (DEFERRED) + # --------------------------------------------- + # If shape is a picture (or a placeholder that is an embedded image) + # Instead of building metadata now, we'll store it in pending_images. if extract_images and ( shape.shape_type == MSO_SHAPE_TYPE.PICTURE or ( shape.is_placeholder and shape.placeholder_format.type == PP_PLACEHOLDER.OBJECT and hasattr(shape, "image") - and getattr(shape, "image") ) ): try: - image_extraction = _construct_image_metadata( - shape, - shape_idx, - slide_idx, - slide_count, - source_metadata, - base_unified_metadata, - page_nearby_blocks, + # Just accumulate the shape + context; don't build the final item yet. + pending_images.append( + ( + shape, # so we can later pull shape.image.blob + shape_idx, + slide_idx, + slide_count, + page_nearby_blocks, + source_metadata, + base_unified_metadata, + ) ) - extracted_data.append(image_extraction) except ValueError as e: - # Handle the specific case where no embedded image is found logger.warning(f"No embedded image found for shape {shape_idx} on slide {slide_idx}: {e}") except Exception as e: - # Handle any other exceptions that might occur - logger.warning(f"An error occurred while processing shape {shape_idx} on slide {slide_idx}: {e}") + logger.warning(f"Error processing shape {shape_idx} on slide {slide_idx}: {e}") + # --------------------------------------------- + # 3) Table Handling + # --------------------------------------------- if extract_tables and shape.has_table: table_extraction = _construct_table_metadata( shape, slide_idx, slide_count, source_metadata, base_unified_metadata ) extracted_data.append(table_extraction) - # Extract text - slide (b) + # If text_depth is PAGE, flush once per slide if (extract_text) and (text_depth == TextTypeEnum.PAGE) and (len(accumulated_text) > 0): text_extraction = _construct_text_metadata( presentation, - shape, + shape, # might pass None if you prefer accumulated_text, keywords, slide_idx, @@ -317,17 +411,15 @@ def python_pptx(pptx_stream, extract_text: bool, extract_images: bool, extract_t source_metadata, base_unified_metadata, ) - if len(text_extraction) > 0: extracted_data.append(text_extraction) - accumulated_text = [] - # Extract text - presentation (c) + # If text_depth is DOCUMENT, flush once at the end if (extract_text) and (text_depth == TextTypeEnum.DOCUMENT) and (len(accumulated_text) > 0): text_extraction = _construct_text_metadata( presentation, - shape, + shape, # might pass None accumulated_text, keywords, -1, @@ -339,12 +431,23 @@ def python_pptx(pptx_stream, extract_text: bool, extract_images: bool, extract_t source_metadata, base_unified_metadata, ) - if len(text_extraction) > 0: extracted_data.append(text_extraction) - accumulated_text = [] + # --------------------------------------------- + # FINAL STEP: Finalize images + # --------------------------------------------- + if extract_images or extract_tables or extract_charts: + _finalize_images( + pending_images, + extracted_data, + pptx_extractor_config, + extract_tables=extract_tables, + extract_charts=extract_charts, + trace_info=trace_info, + ) + return extracted_data @@ -410,17 +513,19 @@ def _construct_text_metadata( # need to add block text to hierarchy/nearby_objects, including bbox def _construct_image_metadata( - shape, shape_idx, slide_idx, slide_count, source_metadata, base_unified_metadata, page_nearby_blocks + shape_idx: int, + slide_idx: int, + slide_count: int, + page_nearby_blocks: Dict, + base64_img: str, + source_metadata: Dict, + base_unified_metadata: Dict, ): - image_type = shape.image.ext - if ImageTypeEnum.has_value(image_type): - image_type = ImageTypeEnum[image_type.upper()] - - base64_img = bytetools.base64frombytes(shape.image.blob) - - bbox = get_bbox(shape_object=shape) - width = shape.width - height = shape.height + """ + Build standard PPTX image metadata. + """ + # Example bounding box + bbox = (0, 0, 0, 0) # or extract from shape.left, shape.top, shape.width, shape.height if desired content_metadata = { "type": ContentTypeEnum.IMAGE, @@ -437,17 +542,14 @@ def _construct_image_metadata( } image_metadata = { - "image_type": image_type, + "image_type": ImageTypeEnum.image_type_1, "structured_image_type": ImageTypeEnum.image_type_1, - "caption": "", + "caption": "", # could attempt to guess a caption from nearby text "text": "", "image_location": bbox, - "width": width, - "height": height, } - unified_metadata = base_unified_metadata.copy() - + unified_metadata = base_unified_metadata.copy() if base_unified_metadata else {} unified_metadata.update( { "content": base64_img, @@ -459,7 +561,11 @@ def _construct_image_metadata( validated_unified_metadata = validate_metadata(unified_metadata) - return [ContentTypeEnum.IMAGE, validated_unified_metadata.model_dump(), str(uuid.uuid4())] + return [ + ContentTypeEnum.IMAGE.value, + validated_unified_metadata.model_dump(), + str(uuid.uuid4()), + ] def _construct_table_metadata( @@ -492,12 +598,13 @@ def _construct_table_metadata( "caption": "", "table_format": TableFormatEnum.MARKDOWN, "table_location": bbox, + "table_content": df.to_markdown(index=False), } ext_unified_metadata = base_unified_metadata.copy() ext_unified_metadata.update( { - "content": df.to_markdown(index=False), + "content": "", "source_metadata": source_metadata, "content_metadata": content_metadata, "table_metadata": table_metadata, diff --git a/src/nv_ingest/modules/transforms/embed_extractions.py b/src/nv_ingest/modules/transforms/embed_extractions.py index 2b80c3be..cc6dae72 100644 --- a/src/nv_ingest/modules/transforms/embed_extractions.py +++ b/src/nv_ingest/modules/transforms/embed_extractions.py @@ -281,7 +281,7 @@ def _add_embeddings(row, embeddings, info_msgs): return row -def _get_text_content(row): +def _get_pandas_text_content(row): """ A pandas UDF used to select extracted text content to be used to create embeddings. """ @@ -289,7 +289,7 @@ def _get_text_content(row): return row["content"] -def _get_table_content(row): +def _get_pandas_table_content(row): """ A pandas UDF used to select extracted table/chart content to be used to create embeddings. """ @@ -297,6 +297,38 @@ def _get_table_content(row): return row["table_metadata"]["table_content"] +def _get_pandas_image_content(row): + """ + A pandas UDF used to select extracted image captions to be used to create embeddings. + """ + + return row["image_metadata"]["caption"] + + +def _get_cudf_text_content(df: cudf.DataFrame): + """ + A cuDF UDF used to select extracted text content to be used to create embeddings. + """ + + return df.struct.field("content") + + +def _get_cudf_table_content(df: cudf.DataFrame): + """ + A cuDF UDF used to select extracted table/chart content to be used to create embeddings. + """ + + return df.struct.field("table_metadata").struct.field("table_content") + + +def _get_cudf_image_content(df: cudf.DataFrame): + """ + A cuDF UDF used to select extracted image captions to be used to create embeddings. + """ + + return df.struct.field("image_metadata").struct.field("caption") + + def _batch_generator(iterable: Iterable, batch_size=10): """ A generator to yield batches of size `batch_size` from an interable. @@ -349,7 +381,6 @@ def _generate_batches(prompts: List[str], batch_size: int = 100): def _generate_embeddings( ctrl_msg: ControlMessage, - content_type: ContentTypeEnum, event_loop: asyncio.SelectorEventLoop, batch_size: int, api_key: str, @@ -361,8 +392,10 @@ def _generate_embeddings( filter_errors: bool, ): """ - A function to generate embeddings for the supplied `ContentTypeEnum`. The `ContentTypeEnum` will - drive filtering criteria used to select rows of data to enrich with embeddings. + A function to generate text embeddings for supported content types (TEXT, STRUCTURED, IMAGE). + + This function dynamically selects the appropriate metadata field based on content type and + calculates embeddings using the NIM embedding service. AUDIO and VIDEO types are stubbed and skipped. Parameters ---------- @@ -403,53 +436,71 @@ def _generate_embeddings( content_mask : cudf.Series A boolean mask representing rows filtered to calculate embeddings. """ + cudf_content_extractor = { + ContentTypeEnum.TEXT: _get_cudf_text_content, + ContentTypeEnum.STRUCTURED: _get_cudf_table_content, + ContentTypeEnum.IMAGE: _get_cudf_image_content, + ContentTypeEnum.AUDIO: lambda _: None, # Not supported yet. + ContentTypeEnum.VIDEO: lambda _: None, # Not supported yet. + } + pandas_content_extractor = { + ContentTypeEnum.TEXT: _get_pandas_text_content, + ContentTypeEnum.STRUCTURED: _get_pandas_table_content, + ContentTypeEnum.IMAGE: _get_pandas_image_content, + ContentTypeEnum.AUDIO: lambda _: None, # Not supported yet. + ContentTypeEnum.VIDEO: lambda _: None, # Not supported yet. + } + + logger.debug("Generating text embeddings for supported content types: TEXT, STRUCTURED, IMAGE.") + + embedding_dataframes = [] + content_masks = [] with ctrl_msg.payload().mutable_dataframe() as mdf: if mdf.empty: - return None, None - - # generate table text mask - if content_type == ContentTypeEnum.TEXT: - content_mask = (mdf["document_type"] == content_type.value) & ( - mdf["metadata"].struct.field("content") != "" - ).fillna(False) - content_getter = _get_text_content - elif content_type == ContentTypeEnum.STRUCTURED: - table_mask = mdf["document_type"] == content_type.value - if not table_mask.any(): - return None, None - content_mask = table_mask & ( - mdf["metadata"].struct.field("table_metadata").struct.field("table_content") != "" - ).fillna(False) - content_getter = _get_table_content - - # exit if matches found - if not content_mask.any(): - return None, None - - df_text = mdf.loc[content_mask].to_pandas().reset_index(drop=True) - # get text list - filtered_text = df_text["metadata"].apply(content_getter) - # calculate embeddings - filtered_text_batches = _generate_batches(filtered_text.tolist(), batch_size) - text_embeddings = _async_runner( - filtered_text_batches, - api_key, - embedding_nim_endpoint, - embedding_model, - encoding_format, - input_type, - truncate, - event_loop, - filter_errors, - ) - # update embeddings in metadata - df_text[["metadata", "document_type", "_contains_embeddings"]] = df_text.apply( - _add_embeddings, **text_embeddings, axis=1 - )[["metadata", "document_type", "_contains_embeddings"]] - df_text["_content"] = filtered_text + return ctrl_msg + + for content_type, content_getter in pandas_content_extractor.items(): + if not content_getter: + logger.debug(f"Skipping unsupported content type: {content_type}") + continue + + content_mask = mdf["document_type"] == content_type.value + if not content_mask.any(): + continue + + cudf_content_getter = cudf_content_extractor[content_type] + content_mask = (content_mask & (cudf_content_getter(mdf["metadata"]) != "")).fillna(False) + if not content_mask.any(): + continue + + df_content = mdf.loc[content_mask].to_pandas().reset_index(drop=True) + filtered_content = df_content["metadata"].apply(content_getter) + # calculate embeddings + filtered_content_batches = _generate_batches(filtered_content.tolist(), batch_size) + content_embeddings = _async_runner( + filtered_content_batches, + api_key, + embedding_nim_endpoint, + embedding_model, + encoding_format, + input_type, + truncate, + event_loop, + filter_errors, + ) + # update embeddings in metadata + df_content[["metadata", "document_type", "_contains_embeddings"]] = df_content.apply( + _add_embeddings, **content_embeddings, axis=1 + )[["metadata", "document_type", "_contains_embeddings"]] + df_content["_content"] = filtered_content + + embedding_dataframes.append(df_content) + content_masks.append(content_mask) + + message = _concatenate_extractions(ctrl_msg, embedding_dataframes, content_masks) - return df_text, content_mask + return message def _concatenate_extractions(ctrl_msg: ControlMessage, dataframes: List[pd.DataFrame], masks: List[cudf.Series]): @@ -493,8 +544,8 @@ def _concatenate_extractions(ctrl_msg: ControlMessage, dataframes: List[pd.DataF @register_module(MODULE_NAME, MODULE_NAMESPACE) def _embed_extractions(builder: mrc.Builder): """ - A pipeline module that receives incoming messages in ControlMessage format and calculates embeddings for - supported document types. + A pipeline module that receives incoming messages in ControlMessage format + and calculates text embeddings for all supported content types. Parameters ---------- @@ -519,56 +570,20 @@ def embed_extractions_fn(message: ControlMessage): try: task_props = message.remove_task("embed") model_dump = task_props.model_dump() - embed_text = model_dump.get("text") - embed_tables = model_dump.get("tables") filter_errors = model_dump.get("filter_errors", False) - logger.debug(f"Generating embeddings: text={embed_text}, tables={embed_tables}") - embedding_dataframes = [] - content_masks = [] - - if embed_text: - df_text, content_mask = _generate_embeddings( - message, - ContentTypeEnum.TEXT, - event_loop, - validated_config.batch_size, - validated_config.api_key, - validated_config.embedding_nim_endpoint, - validated_config.embedding_model, - validated_config.encoding_format, - validated_config.input_type, - validated_config.truncate, - filter_errors, - ) - if df_text is not None: - embedding_dataframes.append(df_text) - content_masks.append(content_mask) - - if embed_tables: - df_tables, table_mask = _generate_embeddings( - message, - ContentTypeEnum.STRUCTURED, - event_loop, - validated_config.batch_size, - validated_config.api_key, - validated_config.embedding_nim_endpoint, - validated_config.embedding_model, - validated_config.encoding_format, - validated_config.input_type, - validated_config.truncate, - filter_errors, - ) - if df_tables is not None: - embedding_dataframes.append(df_tables) - content_masks.append(table_mask) - - if len(content_masks) == 0: - return message - - message = _concatenate_extractions(message, embedding_dataframes, content_masks) - - return message + return _generate_embeddings( + message, + event_loop, + validated_config.batch_size, + validated_config.api_key, + validated_config.embedding_nim_endpoint, + validated_config.embedding_model, + validated_config.encoding_format, + validated_config.input_type, + validated_config.truncate, + filter_errors, + ) except Exception as e: traceback.print_exc() diff --git a/src/nv_ingest/schemas/docx_extractor_schema.py b/src/nv_ingest/schemas/docx_extractor_schema.py new file mode 100644 index 00000000..5204674e --- /dev/null +++ b/src/nv_ingest/schemas/docx_extractor_schema.py @@ -0,0 +1,124 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + + +import logging +from typing import Optional +from typing import Tuple + +from pydantic import model_validator, ConfigDict, BaseModel + +logger = logging.getLogger(__name__) + + +class DocxConfigSchema(BaseModel): + """ + Configuration schema for docx extraction endpoints and options. + + Parameters + ---------- + auth_token : Optional[str], default=None + Authentication token required for secure services. + + yolox_endpoints : Tuple[str, str] + A tuple containing the gRPC and HTTP services for the yolox endpoint. + Either the gRPC or HTTP service can be empty, but not both. + + Methods + ------- + validate_endpoints(values) + Validates that at least one of the gRPC or HTTP services is provided for each endpoint. + + Raises + ------ + ValueError + If both gRPC and HTTP services are empty for any endpoint. + + Config + ------ + extra : str + Pydantic config option to forbid extra fields. + """ + + auth_token: Optional[str] = None + + yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None) + yolox_infer_protocol: str = "" + + @model_validator(mode="before") + @classmethod + def validate_endpoints(cls, values): + """ + Validates the gRPC and HTTP services for all endpoints. + + Parameters + ---------- + values : dict + Dictionary containing the values of the attributes for the class. + + Returns + ------- + dict + The validated dictionary of values. + + Raises + ------ + ValueError + If both gRPC and HTTP services are empty for any endpoint. + """ + + def clean_service(service): + """Set service to None if it's an empty string or contains only spaces or quotes.""" + if service is None or not service.strip() or service.strip(" \"'") == "": + return None + return service + + for model_name in ["yolox"]: + endpoint_name = f"{model_name}_endpoints" + grpc_service, http_service = values.get(endpoint_name) + grpc_service = clean_service(grpc_service) + http_service = clean_service(http_service) + + if not grpc_service and not http_service: + raise ValueError(f"Both gRPC and HTTP services cannot be empty for {endpoint_name}.") + + values[endpoint_name] = (grpc_service, http_service) + + protocol_name = f"{model_name}_infer_protocol" + protocol_value = values.get(protocol_name) + if not protocol_value: + protocol_value = "http" if http_service else "grpc" if grpc_service else "" + protocol_value = protocol_value.lower() + values[protocol_name] = protocol_value + + return values + + model_config = ConfigDict(extra="forbid") + + +class DocxExtractorSchema(BaseModel): + """ + Configuration schema for the PDF extractor settings. + + Parameters + ---------- + max_queue_size : int, default=1 + The maximum number of items allowed in the processing queue. + + n_workers : int, default=16 + The number of worker threads to use for processing. + + raise_on_failure : bool, default=False + A flag indicating whether to raise an exception on processing failure. + + image_extraction_config: Optional[ImageConfigSchema], default=None + Configuration schema for the image extraction stage. + """ + + max_queue_size: int = 1 + n_workers: int = 16 + raise_on_failure: bool = False + + docx_extraction_config: Optional[DocxConfigSchema] = None + model_config = ConfigDict(extra="forbid") diff --git a/src/nv_ingest/schemas/ingest_job_schema.py b/src/nv_ingest/schemas/ingest_job_schema.py index 09975228..7672fec7 100644 --- a/src/nv_ingest/schemas/ingest_job_schema.py +++ b/src/nv_ingest/schemas/ingest_job_schema.py @@ -36,6 +36,7 @@ class DocumentTypeEnum(str, Enum): mp3 = "mp3" wav = "wav" + class TaskTypeEnum(str, Enum): caption = "caption" dedup = "dedup" @@ -131,8 +132,6 @@ class IngestTaskDedupSchema(BaseModelNoExt): class IngestTaskEmbedSchema(BaseModelNoExt): - text: bool = True - tables: bool = True filter_errors: bool = False diff --git a/src/nv_ingest/schemas/ingest_pipeline_config_schema.py b/src/nv_ingest/schemas/ingest_pipeline_config_schema.py index 1471a338..fe5debd6 100644 --- a/src/nv_ingest/schemas/ingest_pipeline_config_schema.py +++ b/src/nv_ingest/schemas/ingest_pipeline_config_schema.py @@ -22,7 +22,7 @@ from nv_ingest.schemas.otel_meter_schema import OpenTelemetryMeterSchema from nv_ingest.schemas.otel_tracer_schema import OpenTelemetryTracerSchema from nv_ingest.schemas.pdf_extractor_schema import PDFExtractorSchema -from nv_ingest.schemas.pptx_extractor_schema import PPTXExctractorSchema +from nv_ingest.schemas.pptx_extractor_schema import PPTXExtractorSchema from nv_ingest.schemas.table_extractor_schema import TableExtractorSchema logger = logging.getLogger(__name__) @@ -42,7 +42,7 @@ class PipelineConfigSchema(BaseModel): otel_meter_module: OpenTelemetryMeterSchema = OpenTelemetryMeterSchema() otel_tracer_module: OpenTelemetryTracerSchema = OpenTelemetryTracerSchema() pdf_extractor_module: PDFExtractorSchema = PDFExtractorSchema() - pptx_extractor_module: PPTXExctractorSchema = PPTXExctractorSchema() + pptx_extractor_module: PPTXExtractorSchema = PPTXExtractorSchema() redis_task_sink: MessageBrokerTaskSinkSchema = MessageBrokerTaskSinkSchema() redis_task_source: MessageBrokerTaskSourceSchema = MessageBrokerTaskSourceSchema() table_extractor_module: TableExtractorSchema = TableExtractorSchema() diff --git a/src/nv_ingest/schemas/metadata_schema.py b/src/nv_ingest/schemas/metadata_schema.py index e5e1a459..9de9aba9 100644 --- a/src/nv_ingest/schemas/metadata_schema.py +++ b/src/nv_ingest/schemas/metadata_schema.py @@ -36,12 +36,14 @@ class AccessLevelEnum(int, Enum): class ContentTypeEnum(str, Enum): - TEXT = "text" + AUDIO = "audio" + EMBEDDING = "embedding" IMAGE = "image" + INFO_MSG = "info_message" STRUCTURED = "structured" + TEXT = "text" UNSTRUCTURED = "unstructured" - INFO_MSG = "info_message" - EMBEDDING = "embedding" + VIDEO = "video" class StdContentDescEnum(str, Enum): diff --git a/src/nv_ingest/schemas/pptx_extractor_schema.py b/src/nv_ingest/schemas/pptx_extractor_schema.py index 987ac671..d3897075 100644 --- a/src/nv_ingest/schemas/pptx_extractor_schema.py +++ b/src/nv_ingest/schemas/pptx_extractor_schema.py @@ -3,8 +3,122 @@ # SPDX-License-Identifier: Apache-2.0 -from nv_ingest.schemas.pdf_extractor_schema import PDFExtractorSchema +import logging +from typing import Optional +from typing import Tuple +from pydantic import model_validator, ConfigDict, BaseModel -class PPTXExctractorSchema(PDFExtractorSchema): - pass +logger = logging.getLogger(__name__) + + +class PPTXConfigSchema(BaseModel): + """ + Configuration schema for docx extraction endpoints and options. + + Parameters + ---------- + auth_token : Optional[str], default=None + Authentication token required for secure services. + + yolox_endpoints : Tuple[str, str] + A tuple containing the gRPC and HTTP services for the yolox endpoint. + Either the gRPC or HTTP service can be empty, but not both. + + Methods + ------- + validate_endpoints(values) + Validates that at least one of the gRPC or HTTP services is provided for each endpoint. + + Raises + ------ + ValueError + If both gRPC and HTTP services are empty for any endpoint. + + Config + ------ + extra : str + Pydantic config option to forbid extra fields. + """ + + auth_token: Optional[str] = None + + yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None) + yolox_infer_protocol: str = "" + + @model_validator(mode="before") + @classmethod + def validate_endpoints(cls, values): + """ + Validates the gRPC and HTTP services for all endpoints. + + Parameters + ---------- + values : dict + Dictionary containing the values of the attributes for the class. + + Returns + ------- + dict + The validated dictionary of values. + + Raises + ------ + ValueError + If both gRPC and HTTP services are empty for any endpoint. + """ + + def clean_service(service): + """Set service to None if it's an empty string or contains only spaces or quotes.""" + if service is None or not service.strip() or service.strip(" \"'") == "": + return None + return service + + for model_name in ["yolox"]: + endpoint_name = f"{model_name}_endpoints" + grpc_service, http_service = values.get(endpoint_name) + grpc_service = clean_service(grpc_service) + http_service = clean_service(http_service) + + if not grpc_service and not http_service: + raise ValueError(f"Both gRPC and HTTP services cannot be empty for {endpoint_name}.") + + values[endpoint_name] = (grpc_service, http_service) + + protocol_name = f"{model_name}_infer_protocol" + protocol_value = values.get(protocol_name) + if not protocol_value: + protocol_value = "http" if http_service else "grpc" if grpc_service else "" + protocol_value = protocol_value.lower() + values[protocol_name] = protocol_value + + return values + + model_config = ConfigDict(extra="forbid") + + +class PPTXExtractorSchema(BaseModel): + """ + Configuration schema for the PDF extractor settings. + + Parameters + ---------- + max_queue_size : int, default=1 + The maximum number of items allowed in the processing queue. + + n_workers : int, default=16 + The number of worker threads to use for processing. + + raise_on_failure : bool, default=False + A flag indicating whether to raise an exception on processing failure. + + image_extraction_config: Optional[ImageConfigSchema], default=None + Configuration schema for the image extraction stage. + """ + + max_queue_size: int = 1 + n_workers: int = 16 + raise_on_failure: bool = False + + pptx_extraction_config: Optional[PPTXConfigSchema] = None + model_config = ConfigDict(extra="forbid") diff --git a/src/nv_ingest/schemas/processing_job_schema.py b/src/nv_ingest/schemas/processing_job_schema.py new file mode 100644 index 00000000..731ec986 --- /dev/null +++ b/src/nv_ingest/schemas/processing_job_schema.py @@ -0,0 +1,31 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +from pydantic import BaseModel, ConfigDict +from enum import Enum + + +class ConversionStatus(str, Enum): + IN_PROGRESS = "in_progress" + SUCCESS = "success" + FAILED = "failed" + + model_config = ConfigDict(extra="forbid") + + +class ProcessingJob(BaseModel): + submitted_job_id: str + filename: str + raw_result: str = "" + content: str = "" + status: ConversionStatus + error: str | None = None + + model_config = ConfigDict(extra="forbid") diff --git a/src/nv_ingest/service/impl/ingest/redis_ingest_service.py b/src/nv_ingest/service/impl/ingest/redis_ingest_service.py index 2fb9b6c0..737231f5 100644 --- a/src/nv_ingest/service/impl/ingest/redis_ingest_service.py +++ b/src/nv_ingest/service/impl/ingest/redis_ingest_service.py @@ -14,10 +14,12 @@ from json import JSONDecodeError from typing import Any +from typing import List from nv_ingest.schemas import validate_ingest_job from nv_ingest.schemas.message_wrapper_schema import MessageWrapper from nv_ingest.service.meta.ingest.ingest_service_meta import IngestServiceMeta from nv_ingest.util.message_brokers.redis.redis_client import RedisClient +from nv_ingest.schemas.processing_job_schema import ProcessingJob logger = logging.getLogger("uvicorn") @@ -46,6 +48,8 @@ def __init__(self, redis_hostname: str, redis_port: int, redis_task_queue: str): self._redis_hostname = redis_hostname self._redis_port = redis_port self._redis_task_queue = redis_task_queue + self._cache_prefix = "processing_cache:" + self._bulk_vdb_cache_prefix = "vdb_bulk_upload_cache:" self._ingest_client = RedisClient( host=self._redis_hostname, port=self._redis_port, max_pool_size=self._concurrency_level @@ -89,3 +93,24 @@ async def fetch_job(self, job_id: str) -> Any: raise TimeoutError() return message + + async def set_processing_cache(self, job_id: str, jobs_data: List[ProcessingJob]) -> None: + """Store processing jobs data using simple key-value""" + cache_key = f"{self._cache_prefix}{job_id}" + try: + self._ingest_client.get_client().set(cache_key, json.dumps([job.dict() for job in jobs_data]), ex=3600) + except Exception as err: + logger.error(f"Error setting cache for {cache_key}: {err}") + raise + + async def get_processing_cache(self, job_id: str) -> List[ProcessingJob]: + """Retrieve processing jobs data using simple key-value""" + cache_key = f"{self._cache_prefix}{job_id}" + try: + data = self._ingest_client.get_client().get(cache_key) + if data is None: + return [] + return [ProcessingJob(**job) for job in json.loads(data)] + except Exception as err: + logger.error(f"Error getting cache for {cache_key}: {err}") + raise diff --git a/src/nv_ingest/service/meta/ingest/ingest_service_meta.py b/src/nv_ingest/service/meta/ingest/ingest_service_meta.py index 3bc5b7a6..b94f739a 100644 --- a/src/nv_ingest/service/meta/ingest/ingest_service_meta.py +++ b/src/nv_ingest/service/meta/ingest/ingest_service_meta.py @@ -10,8 +10,10 @@ from abc import ABC from abc import abstractmethod +from typing import List from nv_ingest.schemas.message_wrapper_schema import MessageWrapper +from nv_ingest.schemas.processing_job_schema import ProcessingJob class IngestServiceMeta(ABC): @@ -22,3 +24,11 @@ async def submit_job(self, job_spec: MessageWrapper, trace_id: str) -> str: @abstractmethod async def fetch_job(self, job_id: str): """Abstract method for fetching job from ingestion service based on job_id""" + + @abstractmethod + async def set_processing_cache(self, job_id: str, jobs_data: List[ProcessingJob]) -> None: + """Abstract method for setting processing cache""" + + @abstractmethod + async def get_processing_cache(self, job_id: str) -> List[ProcessingJob]: + """Abstract method for getting processing cache""" diff --git a/src/nv_ingest/stages/docx_extractor_stage.py b/src/nv_ingest/stages/docx_extractor_stage.py index 7fcc434c..953eefc1 100644 --- a/src/nv_ingest/stages/docx_extractor_stage.py +++ b/src/nv_ingest/stages/docx_extractor_stage.py @@ -8,19 +8,70 @@ import io import logging import traceback +from typing import Optional, Dict, Any import pandas as pd from pydantic import BaseModel from morpheus.config import Config from nv_ingest.extraction_workflows import docx +from nv_ingest.schemas.docx_extractor_schema import DocxExtractorSchema from nv_ingest.stages.multiprocessing_stage import MultiProcessingBaseStage from nv_ingest.util.exception_handlers.pdf import create_exception_tag logger = logging.getLogger(f"morpheus.{__name__}") -def _process_docx_bytes(df, task_props): +def decode_and_extract(base64_row, task_props, validated_config: Any, trace_info: Dict, default="python_docx"): + if isinstance(task_props, BaseModel): + task_props = task_props.model_dump() + + # Base64 content to extract + base64_content = base64_row["content"] + # Row data to include in extraction + bool_index = base64_row.index.isin(("content",)) + row_data = base64_row[~bool_index] + task_props["params"]["row_data"] = row_data + # Get source_id + source_id = base64_row["source_id"] if "source_id" in base64_row.index else None + # Decode the base64 content + doc_bytes = base64.b64decode(base64_content) + + # Load the document + doc_stream = io.BytesIO(doc_bytes) + + # Type of extraction method to use + extract_method = task_props.get("method", "python_docx") + extract_params = task_props.get("params", {}) + try: + if validated_config.docx_extraction_config is not None: + extract_params["docx_extraction_config"] = validated_config.docx_extraction_config + + if trace_info is not None: + extract_params["trace_info"] = trace_info + + if not hasattr(docx, extract_method): + extract_method = default + + func = getattr(docx, extract_method, default) + logger.debug("Running extraction method: %s", extract_method) + extracted_data = func(doc_stream, **extract_params) + + return extracted_data + + except Exception as error: + traceback.print_exc() + log_error_message = f"Error loading extractor:{error}" + logger.error(log_error_message) + logger.error(f"Failed on file:{source_id}") + + # Propagate error back and tag message as failed. + exception_tag = create_exception_tag(error_message=log_error_message, source_id=source_id) + + return exception_tag + + +def _process_docx_bytes(df, task_props, validated_config: Any, trace_info: Optional[Dict[str, Any]] = None): """ Processes a cuDF DataFrame containing docx files in base64 encoding. Each document's content is replaced with its extracted text. @@ -33,51 +84,11 @@ def _process_docx_bytes(df, task_props): - A pandas DataFrame with the docx content replaced by the extracted text. """ - def decode_and_extract(base64_row, task_props, default="python_docx"): - if isinstance(task_props, BaseModel): - task_props = task_props.model_dump() - - # Base64 content to extract - base64_content = base64_row["content"] - # Row data to include in extraction - bool_index = base64_row.index.isin(("content",)) - row_data = base64_row[~bool_index] - task_props["params"]["row_data"] = row_data - # Get source_id - source_id = base64_row["source_id"] if "source_id" in base64_row.index else None - # Decode the base64 content - doc_bytes = base64.b64decode(base64_content) - - # Load the document - doc_stream = io.BytesIO(doc_bytes) - - # Type of extraction method to use - extract_method = task_props.get("method", "python_docx") - extract_params = task_props.get("params", {}) - if not hasattr(docx, extract_method): - extract_method = default - try: - func = getattr(docx, extract_method, default) - logger.debug("Running extraction method: %s", extract_method) - extracted_data = func(doc_stream, **extract_params) - - return extracted_data - - except Exception as e: - traceback.print_exc() - log_error_message = f"Error loading extractor:{e}" - logger.error(log_error_message) - logger.error(f"Failed on file:{source_id}") - - # Propagate error back and tag message as failed. - exception_tag = create_exception_tag(error_message=log_error_message, source_id=source_id) - - return exception_tag - try: # Apply the helper function to each row in the 'content' column - _decode_and_extract = functools.partial(decode_and_extract, task_props=task_props) - logger.debug(f"processing ({task_props.get('method', None)})") + _decode_and_extract = functools.partial( + decode_and_extract, task_props=task_props, validated_config=validated_config, trace_info=trace_info + ) sr_extraction = df.apply(_decode_and_extract, axis=1) sr_extraction = sr_extraction.explode().dropna() @@ -92,12 +103,14 @@ def decode_and_extract(base64_row, task_props, default="python_docx"): except Exception as e: traceback.print_exc() logger.error(f"Failed to extract text from document: {e}") + raise return df def generate_docx_extractor_stage( c: Config, + extractor_config: dict, task: str = "docx-extract", task_desc: str = "docx_content_extractor", pe_count: int = 24, @@ -109,6 +122,8 @@ def generate_docx_extractor_stage( ---------- c : Config Morpheus global configuration object + extractor_config : dict + Configuration parameters for document content extractor. task : str The task name to match for the stage worker function. task_desc : str @@ -121,7 +136,9 @@ def generate_docx_extractor_stage( MultiProcessingBaseStage A Morpheus stage with applied worker function. """ + validated_config = DocxExtractorSchema(**extractor_config) + _wrapped_process_fn = functools.partial(_process_docx_bytes, validated_config=validated_config) return MultiProcessingBaseStage( - c=c, pe_count=pe_count, task=task, task_desc=task_desc, process_fn=_process_docx_bytes, document_type="docx" + c=c, pe_count=pe_count, task=task, task_desc=task_desc, process_fn=_wrapped_process_fn, document_type="docx" ) diff --git a/src/nv_ingest/stages/extractors/image_extractor_stage.py b/src/nv_ingest/stages/extractors/image_extractor_stage.py index 9bf97029..c0e90c28 100644 --- a/src/nv_ingest/stages/extractors/image_extractor_stage.py +++ b/src/nv_ingest/stages/extractors/image_extractor_stage.py @@ -81,8 +81,6 @@ def decode_and_extract( source_id = base64_row["source_id"] if "source_id" in base64_row.index else None # Decode the base64 content image_bytes = base64.b64decode(base64_content) - - # Load the PDF image_stream = io.BytesIO(image_bytes) # Type of extraction method to use diff --git a/src/nv_ingest/stages/nim/chart_extraction.py b/src/nv_ingest/stages/nim/chart_extraction.py index 11d6ffa4..3890c62e 100644 --- a/src/nv_ingest/stages/nim/chart_extraction.py +++ b/src/nv_ingest/stages/nim/chart_extraction.py @@ -10,7 +10,6 @@ from typing import Tuple import pandas as pd -import tritonclient.grpc as grpcclient from morpheus.config import Config from nv_ingest.schemas.chart_extractor_schema import ChartExtractorSchema @@ -67,6 +66,7 @@ def _update_metadata(row: pd.Series, cached_client: NimClient, deplot_client: Ni (content_metadata.get("type") != "structured") or (content_metadata.get("subtype") != "chart") or (chart_metadata is None) + or (base64_image in [None, ""]) ): return metadata @@ -190,10 +190,8 @@ def _extract_chart_data( logger.error("Error occurred while extracting chart data.", exc_info=True) raise finally: - if isinstance(cached_client, grpcclient.InferenceServerClient): - cached_client.close() - if isinstance(deplot_client, grpcclient.InferenceServerClient): - deplot_client.close() + cached_client.close() + deplot_client.close() def generate_chart_extractor_stage( diff --git a/src/nv_ingest/stages/nim/table_extraction.py b/src/nv_ingest/stages/nim/table_extraction.py index 95bcf9cd..dd803af1 100644 --- a/src/nv_ingest/stages/nim/table_extraction.py +++ b/src/nv_ingest/stages/nim/table_extraction.py @@ -67,6 +67,7 @@ def _update_metadata(row: pd.Series, paddle_client: NimClient, trace_info: Dict) (content_metadata.get("type") != "structured") or (content_metadata.get("subtype") != "table") or (table_metadata is None) + or (base64_image in [None, ""]) ): return metadata @@ -172,8 +173,7 @@ def _extract_table_data( logger.error("Error occurred while extracting table data.", exc_info=True) raise finally: - if isinstance(paddle_client, NimClient): - paddle_client.close() + paddle_client.close() def generate_table_extractor_stage( diff --git a/src/nv_ingest/stages/pptx_extractor_stage.py b/src/nv_ingest/stages/pptx_extractor_stage.py index 9512a2f4..efbf848b 100644 --- a/src/nv_ingest/stages/pptx_extractor_stage.py +++ b/src/nv_ingest/stages/pptx_extractor_stage.py @@ -8,6 +8,7 @@ import io import logging import traceback +from typing import Any, Optional, Dict import pandas as pd from pydantic import BaseModel @@ -15,12 +16,61 @@ from nv_ingest.extraction_workflows import pptx from nv_ingest.stages.multiprocessing_stage import MultiProcessingBaseStage +from nv_ingest.schemas.pptx_extractor_schema import PPTXExtractorSchema from nv_ingest.util.exception_handlers.pdf import create_exception_tag logger = logging.getLogger(f"morpheus.{__name__}") -def _process_pptx_bytes(df, task_props): +def decode_and_extract(base64_row, task_props, validated_config: Any, trace_info: Dict, default="python_pptx"): + if isinstance(task_props, BaseModel): + task_props = task_props.model_dump() + + # Base64 content to extract + base64_content = base64_row["content"] + # Row data to include in extraction + bool_index = base64_row.index.isin(("content",)) + row_data = base64_row[~bool_index] + task_props["params"]["row_data"] = row_data + # Get source_id + source_id = base64_row["source_id"] if "source_id" in base64_row.index else None + # Decode the base64 content + pptx_bytes = base64.b64decode(base64_content) + + # Load the PPTX + pptx_stream = io.BytesIO(pptx_bytes) + + # Type of extraction method to use + extract_method = task_props.get("method", "python_pptx") + extract_params = task_props.get("params", {}) + if not hasattr(pptx, extract_method): + extract_method = default + try: + if validated_config.pptx_extraction_config is not None: + extract_params["pptx_extraction_config"] = validated_config.pptx_extraction_config + + if trace_info is not None: + extract_params["trace_info"] = trace_info + + func = getattr(pptx, extract_method, default) + logger.debug("Running extraction method: %s", extract_method) + extracted_data = func(pptx_stream, **extract_params) + + return extracted_data + + except Exception as e: + traceback.print_exc() + log_error_message = f"Error loading extractor:{e}" + logger.error(log_error_message) + logger.error(f"Failed on file:{source_id}") + + # Propagate error back and tag message as failed. + exception_tag = create_exception_tag(error_message=log_error_message, source_id=source_id) + + return exception_tag + + +def _process_pptx_bytes(df, task_props: dict, validated_config: Any, trace_info: Optional[Dict[str, Any]] = None): """ Processes a cuDF DataFrame containing PPTX files in base64 encoding. Each PPTX's content is replaced with its extracted text. @@ -32,52 +82,13 @@ def _process_pptx_bytes(df, task_props): Returns: - A pandas DataFrame with the PPTX content replaced by the extracted text. """ - - def decode_and_extract(base64_row, task_props, default="python_pptx"): - if isinstance(task_props, BaseModel): - task_props = task_props.model_dump() - - # Base64 content to extract - base64_content = base64_row["content"] - # Row data to include in extraction - bool_index = base64_row.index.isin(("content",)) - row_data = base64_row[~bool_index] - task_props["params"]["row_data"] = row_data - # Get source_id - source_id = base64_row["source_id"] if "source_id" in base64_row.index else None - # Decode the base64 content - pptx_bytes = base64.b64decode(base64_content) - - # Load the PPTX - pptx_stream = io.BytesIO(pptx_bytes) - - # Type of extraction method to use - extract_method = task_props.get("method", "python_pptx") - extract_params = task_props.get("params", {}) - if not hasattr(pptx, extract_method): - extract_method = default - try: - func = getattr(pptx, extract_method, default) - logger.debug("Running extraction method: %s", extract_method) - extracted_data = func(pptx_stream, **extract_params) - - return extracted_data - - except Exception as e: - traceback.print_exc() - log_error_message = f"Error loading extractor:{e}" - logger.error(log_error_message) - logger.error(f"Failed on file:{source_id}") - - # Propagate error back and tag message as failed. - exception_tag = create_exception_tag(error_message=log_error_message, source_id=source_id) - - return exception_tag - try: # Apply the helper function to each row in the 'content' column - _decode_and_extract = functools.partial(decode_and_extract, task_props=task_props) - logger.debug(f"processing ({task_props.get('method', None)})") + _decode_and_extract = functools.partial( + decode_and_extract, task_props=task_props, validated_config=validated_config, trace_info=trace_info + ) + + # logger.debug(f"processing ({task_props.get('method', None)})") sr_extraction = df.apply(_decode_and_extract, axis=1) sr_extraction = sr_extraction.explode().dropna() @@ -91,12 +102,14 @@ def decode_and_extract(base64_row, task_props, default="python_pptx"): except Exception as e: traceback.print_exc() logger.error(f"Failed to extract text from PPTX: {e}") + raise return df def generate_pptx_extractor_stage( c: Config, + extractor_config: dict, task: str = "pptx-extract", task_desc: str = "pptx_content_extractor", pe_count: int = 24, @@ -108,6 +121,8 @@ def generate_pptx_extractor_stage( ---------- c : Config Morpheus global configuration object + extractor_config : dict + Configuration parameters for document content extractor. task : str The task name to match for the stage worker function. task_desc : str @@ -121,6 +136,9 @@ def generate_pptx_extractor_stage( A Morpheus stage with applied worker function. """ + validated_config = PPTXExtractorSchema(**extractor_config) + _wrapped_process_fn = functools.partial(_process_pptx_bytes, validated_config=validated_config) + return MultiProcessingBaseStage( - c=c, pe_count=pe_count, task=task, task_desc=task_desc, process_fn=_process_pptx_bytes, document_type="pptx" + c=c, pe_count=pe_count, task=task, task_desc=task_desc, process_fn=_wrapped_process_fn, document_type="pptx" ) diff --git a/src/nv_ingest/util/converters/formats.py b/src/nv_ingest/util/converters/formats.py new file mode 100644 index 00000000..cfbe5dd8 --- /dev/null +++ b/src/nv_ingest/util/converters/formats.py @@ -0,0 +1,70 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +# pylint: skip-file + +import json + + +def ingest_json_results_to_blob(result_content): + """ + Parse a JSON string or BytesIO object, combine and sort entries, and create a blob string. + + Returns: + str: The generated blob string. + """ + try: + # Load the JSON data + data = json.loads(result_content) if isinstance(result_content, str) else json.loads(result_content) + data = data["data"] + + # Smarter sorting: by page, then structured objects by x0, y0 + def sorting_key(entry): + page = entry["metadata"]["content_metadata"]["page_number"] + if entry["document_type"] == "structured": + # Use table location's x0 and y0 as secondary keys + x0 = entry["metadata"]["table_metadata"]["table_location"][0] + y0 = entry["metadata"]["table_metadata"]["table_location"][1] + else: + # Non-structured objects are sorted after structured ones + x0 = float("inf") + y0 = float("inf") + return page, x0, y0 + + data.sort(key=sorting_key) + + # Initialize the blob string + blob = [] + + for entry in data: + document_type = entry.get("document_type", "") + + if document_type == "structured": + # Add table content to the blob + blob.append(entry["metadata"]["table_metadata"]["table_content"]) + blob.append("\n") + + elif document_type == "text": + # Add content to the blob + blob.append(entry["metadata"]["content"]) + blob.append("\n") + + elif document_type == "image": + # Add image caption to the blob + caption = entry["metadata"]["image_metadata"].get("caption", "") + blob.append(f"image_caption:[{caption}]") + blob.append("\n") + + # Join all parts of the blob into a single string + return "".join(blob) + + except Exception as e: + print(f"[ERROR] An error occurred while processing JSON content: {e}") + return "" diff --git a/src/nv_ingest/util/nim/cached.py b/src/nv_ingest/util/nim/cached.py index 56513d08..1a7bf0c9 100644 --- a/src/nv_ingest/util/nim/cached.py +++ b/src/nv_ingest/util/nim/cached.py @@ -119,7 +119,7 @@ def parse_output(self, response: Any, protocol: str, data: Optional[Dict[str, An else: raise ValueError("Invalid protocol specified. Must be 'grpc' or 'http'.") - def process_inference_results(self, output: Any, **kwargs) -> Any: + def process_inference_results(self, output: Any, protocol: str, **kwargs) -> Any: """ Process inference results for the Cached model. diff --git a/src/nv_ingest/util/nim/deplot.py b/src/nv_ingest/util/nim/deplot.py index 9cf6175d..63f16a3b 100644 --- a/src/nv_ingest/util/nim/deplot.py +++ b/src/nv_ingest/util/nim/deplot.py @@ -133,7 +133,7 @@ def parse_output(self, response: Any, protocol: str, data: Optional[Dict[str, An else: raise ValueError("Invalid protocol specified. Must be 'grpc' or 'http'.") - def process_inference_results(self, output: Any, **kwargs) -> Any: + def process_inference_results(self, output: Any, protocol: str, **kwargs) -> Any: """ Process inference results for the Deplot model. diff --git a/src/nv_ingest/util/nim/helpers.py b/src/nv_ingest/util/nim/helpers.py index 61a41634..a692265f 100644 --- a/src/nv_ingest/util/nim/helpers.py +++ b/src/nv_ingest/util/nim/helpers.py @@ -75,7 +75,7 @@ def prepare_data_for_inference(self, data: dict): """ raise NotImplementedError("Subclasses should implement this method") - def process_inference_results(self, output_array, **kwargs): + def process_inference_results(self, output_array, protocol: str, **kwargs): """ Process the inference results from the model. @@ -206,7 +206,7 @@ def infer(self, data: dict, model_name: str, **kwargs) -> Any: response, protocol=self.protocol, data=prepared_data, **kwargs ) results = self.model_interface.process_inference_results( - parsed_output, original_image_shapes=data.get("original_image_shapes"), **kwargs + parsed_output, original_image_shapes=data.get("original_image_shapes"), protocol=self.protocol, **kwargs ) return results @@ -624,8 +624,8 @@ def call_audio_inference_model(client, audio_content: str, audio_id: str, trace_ headers = client["headers"] payload = {"audio_content": audio_content, "audio_id": audio_id} - response = requests.post(url, json=payload, headers=headers) - + response = requests.post(url, json=payload, headers=headers) + response.raise_for_status() # Raise an exception for HTTP errors # Parse the JSON response @@ -639,8 +639,3 @@ def call_audio_inference_model(client, audio_content: str, audio_id: str, trace_ raise RuntimeError(f"An error occurred during inference: {e}") return json_response - - - - - diff --git a/src/nv_ingest/util/nim/yolox.py b/src/nv_ingest/util/nim/yolox.py index d07f184e..831c4e62 100644 --- a/src/nv_ingest/util/nim/yolox.py +++ b/src/nv_ingest/util/nim/yolox.py @@ -16,7 +16,6 @@ import numpy as np import torch import torchvision -from packaging import version as pkgversion from PIL import Image from nv_ingest.util.image_processing.transforms import scale_image_to_encoding_size @@ -44,20 +43,6 @@ class YoloxPageElementsModelInterface(ModelInterface): An interface for handling inference with a Yolox object detection model, supporting both gRPC and HTTP protocols. """ - def __init__( - self, - yolox_version: Optional[str] = None, - ): - """ - Initialize the YOLOX model interface. - - Parameters - ---------- - yolox_version : str, optional - The version of the YOLOX model (default: None). - """ - self.yolox_version = yolox_version - def name( self, ) -> str: @@ -70,7 +55,7 @@ def name( The name of the model interface. """ - return f"yolox-page-elements (version {self.yolox_version})" + return "yolox-page-elements" def prepare_data_for_inference(self, data: Dict[str, Any]) -> Dict[str, Any]: """ @@ -86,16 +71,16 @@ def prepare_data_for_inference(self, data: Dict[str, Any]) -> Dict[str, Any]: dict The updated data dictionary with resized images and original image shapes. """ + if (not isinstance(data, dict)) or ("images" not in data): + raise KeyError("Input data must be a dictionary containing an 'images' key with a list of images.") + + if not all(isinstance(x, np.ndarray) for x in data["images"]): + raise ValueError("All elements in the 'images' list must be numpy.ndarray objects.") original_images = data["images"] - # Our yolox model expects images to be resized to 1024x1024 - resized_images = [ - resize_image(image, (YOLOX_IMAGE_PREPROC_WIDTH, YOLOX_IMAGE_PREPROC_HEIGHT)) for image in original_images - ] data["original_image_shapes"] = [image.shape for image in original_images] - data["resized_images"] = resized_images - return data # Return data with added 'resized_images' key + return data def format_input(self, data: Dict[str, Any], protocol: str) -> Any: """ @@ -121,16 +106,18 @@ def format_input(self, data: Dict[str, Any], protocol: str) -> Any: if protocol == "grpc": logger.debug("Formatting input for gRPC Yolox model") + # Our yolox-page-elements model (grPC) expects images to be resized to 1024x1024 + resized_images = [ + resize_image(image, (YOLOX_IMAGE_PREPROC_WIDTH, YOLOX_IMAGE_PREPROC_HEIGHT)) for image in data["images"] + ] # Reorder axes to match model input (batch, channels, height, width) - input_array = np.einsum("bijk->bkij", data["resized_images"]).astype(np.float32) + input_array = np.einsum("bijk->bkij", resized_images).astype(np.float32) return input_array elif protocol == "http": logger.debug("Formatting input for HTTP Yolox model") - # Additional lists to keep track of scaling factors and new sizes - scaling_factors = [] content_list = [] - for image in data["resized_images"]: + for image in data["images"]: # Convert numpy array to PIL Image image_pil = Image.fromarray((image * 255).astype(np.uint8)) original_size = image_pil.size # Should be (1024, 1024) @@ -148,26 +135,12 @@ def format_input(self, data: Dict[str, Any], protocol: str) -> Any: if new_size != original_size: logger.warning(f"Image was scaled from {original_size} to {new_size} to meet size constraints.") - # Compute scaling factor - scaling_factor_x = new_size[0] / YOLOX_IMAGE_PREPROC_WIDTH - scaling_factor_y = new_size[1] / YOLOX_IMAGE_PREPROC_HEIGHT - scaling_factors.append((scaling_factor_x, scaling_factor_y)) - # Add to content_list - if self._is_version_early_access_legacy_api(): - content = {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{scaled_image_b64}"}} - else: - content = {"type": "image_url", "url": f"data:image/png;base64,{scaled_image_b64}"} + content = {"type": "image_url", "url": f"data:image/png;base64,{scaled_image_b64}"} content_list.append(content) - # Store scaling factors in data - data["scaling_factors"] = scaling_factors - - if self._is_version_early_access_legacy_api(): - payload = {"messages": [{"content": content_list}]} - else: - payload = {"input": content_list} + payload = {"input": content_list} return payload else: @@ -203,108 +176,30 @@ def parse_output(self, response: Any, protocol: str, data: Optional[Dict[str, An elif protocol == "http": logger.debug("Parsing output from HTTP Yolox model") - is_legacy_version = self._is_version_early_access_legacy_api() - - # Convert JSON response to numpy array similar to gRPC response - if is_legacy_version: - # Convert response data to GA API format. - response_data = response.get("data", []) - batch_results = [] - for idx, detections in enumerate(response_data): - curr_batch = {"index": idx, "bounding_boxes": {}} - for obj in detections: - obj_type = obj.get("type", "") - bboxes = obj.get("bboxes", []) - if not obj_type: - continue - if obj_type not in curr_batch: - curr_batch["bounding_boxes"][obj_type] = [] - curr_batch["bounding_boxes"][obj_type].extend(bboxes) - batch_results.append(curr_batch) - else: - batch_results = response.get("data", []) - - batch_size = len(batch_results) processed_outputs = [] - scaling_factors = data.get("scaling_factors", [(1.0, 1.0)] * batch_size) - - x_min_label = "xmin" if is_legacy_version else "x_min" - y_min_label = "ymin" if is_legacy_version else "y_min" - x_max_label = "xmax" if is_legacy_version else "x_max" - y_max_label = "ymax" if is_legacy_version else "y_max" - confidence_label = "confidence" - + batch_results = response.get("data", []) for detections in batch_results: - idx = int(detections["index"]) - scale_factor_x, scale_factor_y = scaling_factors[idx] - image_width = YOLOX_IMAGE_PREPROC_WIDTH - image_height = YOLOX_IMAGE_PREPROC_HEIGHT + new_bounding_boxes = {"table": [], "chart": [], "title": []} - # Initialize an empty tensor for detections - max_detections = 100 - detection_tensor = np.zeros((max_detections, 85), dtype=np.float32) - - index = 0 bounding_boxes = detections.get("bounding_boxes", []) for obj_type, bboxes in bounding_boxes.items(): for bbox in bboxes: - if index >= max_detections: - break - xmin_norm = bbox[x_min_label] - ymin_norm = bbox[y_min_label] - xmax_norm = bbox[x_max_label] - ymax_norm = bbox[y_max_label] - confidence = bbox[confidence_label] - - # Convert normalized coordinates to absolute pixel values in scaled image - xmin_scaled = xmin_norm * image_width * scale_factor_x - ymin_scaled = ymin_norm * image_height * scale_factor_y - xmax_scaled = xmax_norm * image_width * scale_factor_x - ymax_scaled = ymax_norm * image_height * scale_factor_y - - # Adjust coordinates back to 1024x1024 image space - xmin = xmin_scaled / scale_factor_x - ymin = ymin_scaled / scale_factor_y - xmax = xmax_scaled / scale_factor_x - ymax = ymax_scaled / scale_factor_y - - # YOLOX expects bbox format: center_x, center_y, width, height - center_x = (xmin + xmax) / 2 - center_y = (ymin + ymax) / 2 - width = xmax - xmin - height = ymax - ymin - - # Set the bbox coordinates - detection_tensor[index, 0] = center_x - detection_tensor[index, 1] = center_y - detection_tensor[index, 2] = width - detection_tensor[index, 3] = height - - # Objectness score - detection_tensor[index, 4] = confidence - - class_index = {"table": 0, "chart": 1, "title": 2}.get(obj_type, -1) - if class_index >= 0: - detection_tensor[index, 5 + class_index] = 1.0 - - index += 1 - - # Trim the detection tensor to the actual number of detections - detection_tensor = detection_tensor[:index, :] - processed_outputs.append(detection_tensor) - - # Pad batch if necessary - max_detections_in_batch = max([output.shape[0] for output in processed_outputs]) if processed_outputs else 0 - batch_output_array = np.zeros((batch_size, max_detections_in_batch, 85), dtype=np.float32) - for i, output in enumerate(processed_outputs): - batch_output_array[i, : output.shape[0], :] = output - - return batch_output_array + xmin = bbox["x_min"] + ymin = bbox["y_min"] + xmax = bbox["x_max"] + ymax = bbox["y_max"] + confidence = bbox["confidence"] + + new_bounding_boxes[obj_type].append([xmin, ymin, xmax, ymax, confidence]) + + processed_outputs.append(new_bounding_boxes) + + return processed_outputs else: raise ValueError("Invalid protocol specified. Must be 'grpc' or 'http'.") - def process_inference_results(self, output_array: np.ndarray, **kwargs) -> List[Dict[str, Any]]: + def process_inference_results(self, output: Any, protocol: str, **kwargs) -> List[Dict[str, Any]]: """ Process the results of the Yolox model inference and return the final annotations. @@ -320,7 +215,6 @@ def process_inference_results(self, output_array: np.ndarray, **kwargs) -> List[ list[dict] A list of annotation dictionaries for each image in the batch. """ - original_image_shapes = kwargs.get("original_image_shapes", []) num_classes = kwargs.get("num_classes", YOLOX_NUM_CLASSES) conf_thresh = kwargs.get("conf_thresh", YOLOX_CONF_THRESHOLD) @@ -328,14 +222,22 @@ def process_inference_results(self, output_array: np.ndarray, **kwargs) -> List[ min_score = kwargs.get("min_score", YOLOX_MIN_SCORE) final_thresh = kwargs.get("final_thresh", YOLOX_FINAL_SCORE) - pred = postprocess_model_prediction(output_array, num_classes, conf_thresh, iou_thresh, class_agnostic=True) + if protocol == "http": + # For http, the output already has postprocessing applied. Skip to table/chart expansion. + results = output - results = postprocess_results(pred, original_image_shapes, min_score=min_score) + elif protocol == "grpc": + # For grpc, apply the same NIM postprocessing. + pred = postprocess_model_prediction(output, num_classes, conf_thresh, iou_thresh, class_agnostic=True) + results = postprocess_results(pred, original_image_shapes, min_score=min_score) - annotation_dicts = [expand_chart_bboxes(annotation_dict) for annotation_dict in results] + # Table/chart expansion is "business logic" specific to nv-ingest + annotation_dicts = [expand_table_bboxes(annotation_dict) for annotation_dict in results] + annotation_dicts = [expand_chart_bboxes(annotation_dict) for annotation_dict in annotation_dicts] inference_results = [] # Filter out bounding boxes below the final threshold + # This final thresholding is "business logic" specific to nv-ingest for annotation_dict in annotation_dicts: new_dict = {} if "table" in annotation_dict: @@ -348,9 +250,6 @@ def process_inference_results(self, output_array: np.ndarray, **kwargs) -> List[ return inference_results - def _is_version_early_access_legacy_api(self): - return self.yolox_version and (pkgversion.parse(self.yolox_version) < pkgversion.parse("1.0.0-rc0")) - def postprocess_model_prediction(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False): # Convert numpy array to torch tensor @@ -423,12 +322,14 @@ def postprocess_results(results, original_image_shapes, min_score=0.0): Keep only bboxes with high enough confidence. """ - labels = ["table", "chart", "title"] + class_labels = ["table", "chart", "title"] out = [] for original_image_shape, result in zip(original_image_shapes, results): + annotation_dict = {label: [] for label in class_labels} + if result is None: - out.append({}) + out.append(annotation_dict) continue try: @@ -447,29 +348,17 @@ def postprocess_results(results, original_image_shapes, min_score=0.0): bboxes[:, [1, 3]] /= original_image_shape[0] bboxes = np.clip(bboxes, 0.0, 1.0) - label_idxs = result[:, 6] + labels = result[:, 6] scores = scores[scores > min_score] except Exception as e: raise ValueError(f"Error in postprocessing {result.shape} and {original_image_shape}: {e}") - annotation_dict = {label: [] for label in labels} - - # bboxes are in format [x_min, y_min, x_max, y_max] - for j in range(len(bboxes)): - label = labels[int(label_idxs[j])] - bbox = bboxes[j] - score = scores[j] - - # additional preprocessing for tables: extend the upper bounds to capture titles if any. - if label == "table": - height = bbox[3] - bbox[1] - bbox[1] = (bbox[1] - height * 0.2).clip(0.0, 1.0) - - annotation_dict[label].append([round(float(x), 4) for x in np.concatenate((bbox, [score]))]) + for box, score, label in zip(bboxes, scores, labels): + class_name = class_labels[int(label)] + annotation_dict[class_name].append([round(float(x), 4) for x in np.concatenate((box, [score]))]) out.append(annotation_dict) - # {label: [[x1, y1, x2, y2, confidence], ...], ...} return out @@ -493,6 +382,37 @@ def resize_image(image, target_img_size): return image +def expand_table_bboxes(annotation_dict, labels=None): + """ + Additional preprocessing for tables: extend the upper bounds to capture titles if any. + Args: + annotation_dict: output of postprocess_results, a dictionary with keys "table", "figure", "title" + + Returns: + annotation_dict: same as input, with expanded bboxes for charts + + """ + if not labels: + labels = ["table", "chart", "title"] + + if not annotation_dict or len(annotation_dict["table"]) == 0: + return annotation_dict + + new_annotation_dict = {label: [] for label in labels} + + for label, bboxes in annotation_dict.items(): + for bbox_and_score in bboxes: + bbox, score = bbox_and_score[:4], bbox_and_score[4] + + if label == "table": + height = bbox[3] - bbox[1] + bbox[1] = max(0.0, min(1.0, bbox[1] - height * 0.2)) + + new_annotation_dict[label].append([round(float(x), 4) for x in bbox + [score]]) + + return new_annotation_dict + + def expand_chart_bboxes(annotation_dict, labels=None): """ Expand bounding boxes of charts and titles based on the bounding boxes of the other class. diff --git a/src/nv_ingest/util/pdf/metadata_aggregators.py b/src/nv_ingest/util/pdf/metadata_aggregators.py index 8c6237f7..3fac696e 100644 --- a/src/nv_ingest/util/pdf/metadata_aggregators.py +++ b/src/nv_ingest/util/pdf/metadata_aggregators.py @@ -29,7 +29,6 @@ from nv_ingest.util.exception_handlers.pdf import pdfium_exception_handler -# TODO(Devin): Shift to this, since there is no difference between ImageTable and ImageChart @dataclass class CroppedImageWithContent: content: str diff --git a/src/nv_ingest/util/pipeline/pipeline_builders.py b/src/nv_ingest/util/pipeline/pipeline_builders.py index 5a1c25cb..4d2d519a 100644 --- a/src/nv_ingest/util/pipeline/pipeline_builders.py +++ b/src/nv_ingest/util/pipeline/pipeline_builders.py @@ -30,9 +30,8 @@ def setup_ingestion_pipeline( ######################################################################################################## pdf_extractor_stage = add_pdf_extractor_stage(pipe, morpheus_pipeline_config, ingest_config, default_cpu_count) image_extractor_stage = add_image_extractor_stage(pipe, morpheus_pipeline_config, ingest_config, default_cpu_count) - docx_extractor_stage = add_docx_extractor_stage(pipe, morpheus_pipeline_config, default_cpu_count) - pptx_extractor_stage = add_pptx_extractor_stage(pipe, morpheus_pipeline_config, default_cpu_count) - ## audio extraction + docx_extractor_stage = add_docx_extractor_stage(pipe, morpheus_pipeline_config, ingest_config, default_cpu_count) + pptx_extractor_stage = add_pptx_extractor_stage(pipe, morpheus_pipeline_config, ingest_config, default_cpu_count) audio_extractor_stage = add_audio_extractor_stage(pipe, morpheus_pipeline_config, ingest_config, default_cpu_count) ######################################################################################################## @@ -80,7 +79,6 @@ def setup_ingestion_pipeline( pipe.add_edge(docx_extractor_stage, pptx_extractor_stage) pipe.add_edge(pptx_extractor_stage, audio_extractor_stage) pipe.add_edge(audio_extractor_stage, image_dedup_stage) - pipe.add_edge(image_dedup_stage, image_filter_stage) pipe.add_edge(image_filter_stage, table_extraction_stage) pipe.add_edge(table_extraction_stage, chart_extraction_stage) diff --git a/src/nv_ingest/util/pipeline/stage_builders.py b/src/nv_ingest/util/pipeline/stage_builders.py index b5153cbb..8780e757 100644 --- a/src/nv_ingest/util/pipeline/stage_builders.py +++ b/src/nv_ingest/util/pipeline/stage_builders.py @@ -274,16 +274,28 @@ def add_image_extractor_stage(pipe, morpheus_pipeline_config, ingest_config, def extractor_config=image_extractor_config, pe_count=8, task="extract", - task_desc="docx_content_extractor", + task_desc="image_content_extractor", ) ) return image_extractor_stage -def add_docx_extractor_stage(pipe, morpheus_pipeline_config, default_cpu_count): +def add_docx_extractor_stage(pipe, morpheus_pipeline_config, ingest_config, default_cpu_count): + yolox_grpc, yolox_http, yolox_auth, yolox_protocol = get_table_detection_service("yolox") + docx_extractor_config = ingest_config.get( + "docx_extraction_module", + { + "docx_extraction_config": { + "yolox_endpoints": (yolox_grpc, yolox_http), + "yolox_infer_protocol": yolox_protocol, + "auth_token": yolox_auth, + } + }, + ) docx_extractor_stage = pipe.add_stage( generate_docx_extractor_stage( morpheus_pipeline_config, + extractor_config=docx_extractor_config, pe_count=1, task="extract", task_desc="docx_content_extractor", @@ -292,10 +304,22 @@ def add_docx_extractor_stage(pipe, morpheus_pipeline_config, default_cpu_count): return docx_extractor_stage -def add_pptx_extractor_stage(pipe, morpheus_pipeline_config, default_cpu_count): +def add_pptx_extractor_stage(pipe, morpheus_pipeline_config, ingest_config, default_cpu_count): + yolox_grpc, yolox_http, yolox_auth, yolox_protocol = get_table_detection_service("yolox") + pptx_extractor_config = ingest_config.get( + "pptx_extraction_module", + { + "pptx_extraction_config": { + "yolox_endpoints": (yolox_grpc, yolox_http), + "yolox_infer_protocol": yolox_protocol, + "auth_token": yolox_auth, + } + }, + ) pptx_extractor_stage = pipe.add_stage( generate_pptx_extractor_stage( morpheus_pipeline_config, + extractor_config=pptx_extractor_config, pe_count=1, task="extract", task_desc="pptx_content_extractor", @@ -332,17 +356,20 @@ def get_audio_retrieval_service(env_var_prefix): return grpc_endpoint, http_endpoint, auth_token, infer_protocol + def add_audio_extractor_stage(pipe, morpheus_pipeline_config, ingest_config, default_cpu_count): audio_grpc, audio_http, audio_auth, audio_infer_protocol = get_audio_retrieval_service("audio") - audio_extractor_config = ingest_config.get("audio_extraction_module", - { - "audio_extraction_config": { - "audio_endpoints": (audio_grpc, audio_http), - "audio_infer_protocol": audio_infer_protocol, - "auth_token": audio_auth, - # All auth tokens are the same for the moment - } - }) + audio_extractor_config = ingest_config.get( + "audio_extraction_module", + { + "audio_extraction_config": { + "audio_endpoints": (audio_grpc, audio_http), + "audio_infer_protocol": audio_infer_protocol, + "auth_token": audio_auth, + # All auth tokens are the same for the moment + } + }, + ) audio_extractor_stage = pipe.add_stage( generate_audio_extractor_stage( morpheus_pipeline_config, diff --git a/src/util/image_viewer.py b/src/util/image_viewer.py index b47ccbdd..cebac902 100644 --- a/src/util/image_viewer.py +++ b/src/util/image_viewer.py @@ -31,12 +31,33 @@ def load_images_from_json(json_file_path): with open(json_file_path, "r") as file: data = json.load(file) + def create_default_image(): + """Create a solid black 300×300 image.""" + width, height = 300, 300 + default_img = Image.new("RGB", (width, height), color="black") + return default_img + images = [] for item in data: # Assuming the JSON is a list of objects if item["document_type"] in ("image", "structured"): - image_data = base64.b64decode(item["metadata"]["content"]) - image = Image.open(BytesIO(image_data)) - images.append(image) + content = item.get("metadata", {}).get("content", "") + # Check if content is missing or empty + if not content: + images.append(create_default_image()) + continue + + # Attempt to decode and open the image + try: + image_data = base64.b64decode(content) + temp_image = Image.open(BytesIO(image_data)) + # Verify & re-open to ensure no corruption or errors + temp_image.verify() + temp_image = Image.open(BytesIO(image_data)) + images.append(temp_image) + except Exception: + # If there's any error decoding/reading the image, use the default + images.append(create_default_image()) + return images diff --git a/tests/nv_ingest/extraction_workflows/docx/test_docx_helper.py b/tests/nv_ingest/extraction_workflows/docx/test_docx_helper.py index e56d003d..341ea68c 100644 --- a/tests/nv_ingest/extraction_workflows/docx/test_docx_helper.py +++ b/tests/nv_ingest/extraction_workflows/docx/test_docx_helper.py @@ -9,6 +9,7 @@ import pytest from nv_ingest.extraction_workflows.docx.docx_helper import python_docx +from nv_ingest.schemas.metadata_schema import ImageTypeEnum @pytest.fixture @@ -37,6 +38,7 @@ def test_docx_all_text(doc_stream, document_df): extract_text=True, extract_images=False, extract_tables=False, + extract_charts=False, row_data=document_df.iloc[0], ) @@ -64,6 +66,7 @@ def test_docx_all_text(doc_stream, document_df): assert extracted_data[0][1]["source_metadata"]["source_id"] == "woods_frost" +@pytest.mark.xfail(reason="Table extract requires yolox, disabling for now") def test_docx_table(doc_stream, document_df): """ Validate text and table extraction. Table content is converted into markdown text. @@ -73,6 +76,7 @@ def test_docx_table(doc_stream, document_df): extract_text=True, extract_images=False, extract_tables=True, + extract_charts=False, row_data=document_df.iloc[0], ) @@ -108,11 +112,11 @@ def test_docx_image(doc_stream, document_df): doc_stream, extract_text=True, extract_images=True, - extract_tables=True, + extract_tables=False, + extract_charts=False, row_data=document_df.iloc[0], ) - expected_captions = ["*Figure 1: Snowy Woods*", "*Figure 2: Robert Frost*"] expected_text_cnt = 1 expected_image_cnt = 2 expected_entry_cnt = expected_image_cnt + expected_text_cnt @@ -133,11 +137,4 @@ def test_docx_image(doc_stream, document_df): assert extracted_data[idx][0] == "image" # validate image type - assert extracted_data[idx][1]["image_metadata"]["image_type"] == "jpeg" - - # validate captions - expected_caption = expected_captions[idx] - extracted_caption = extracted_data[idx][1]["image_metadata"]["caption"] - assert extracted_caption == expected_caption - - assert image_cnt == expected_image_cnt + assert extracted_data[idx][1]["image_metadata"]["image_type"] == ImageTypeEnum.image_type_1 diff --git a/tests/nv_ingest/extraction_workflows/image/test_image_handlers.py b/tests/nv_ingest/extraction_workflows/image/test_image_handlers.py index e4f41c5d..da0c358e 100644 --- a/tests/nv_ingest/extraction_workflows/image/test_image_handlers.py +++ b/tests/nv_ingest/extraction_workflows/image/test_image_handlers.py @@ -7,7 +7,6 @@ from nv_ingest.extraction_workflows.image.image_handlers import convert_svg_to_bitmap from nv_ingest.extraction_workflows.image.image_handlers import extract_table_and_chart_images from nv_ingest.extraction_workflows.image.image_handlers import load_and_preprocess_image -from nv_ingest.extraction_workflows.image.image_handlers import process_inference_results from nv_ingest.util.pdf.metadata_aggregators import CroppedImageWithContent @@ -119,142 +118,6 @@ def test_convert_svg_to_bitmap_large_svg(): assert np.all(result[:, :, 2] == 255) # Blue channel fully on -def test_process_inference_results_basic_case(): - """Test process_inference_results with a typical valid input.""" - - # Simulated model output array for a single image with several detections. - # Array format is (batch_size, num_detections, 85) - 80 classes + 5 box coordinates - # For simplicity, use random values for the boxes and class predictions. - output_array = np.zeros((1, 3, 85), dtype=np.float32) - - # Mock bounding box coordinates - output_array[0, 0, :4] = [0.5, 0.5, 0.2, 0.2] # x_center, y_center, width, height - output_array[0, 1, :4] = [0.6, 0.6, 0.2, 0.2] - output_array[0, 2, :4] = [0.7, 0.7, 0.2, 0.2] - - # Mock object confidence scores - output_array[0, :, 4] = [0.8, 0.9, 0.85] - - # Mock class scores (set class 1 with highest confidence for simplicity) - output_array[0, 0, 5 + 1] = 0.7 - output_array[0, 1, 5 + 1] = 0.75 - output_array[0, 2, 5 + 1] = 0.72 - - original_image_shapes = [(640, 640)] # Original shape of the image before resizing - - # Process inference results with thresholds that should retain all mock detections - results = process_inference_results( - output_array, - original_image_shapes, - num_classes=80, - conf_thresh=0.5, - iou_thresh=0.5, - min_score=0.1, - final_thresh=0.3, - ) - - # Check output structure - assert isinstance(results, list) - assert len(results) == 1 - assert isinstance(results[0], dict) - - # Validate bounding box scaling and structure - assert "chart" in results[0] or "table" in results[0] - if "chart" in results[0]: - assert isinstance(results[0]["chart"], list) - assert len(results[0]["chart"]) > 0 - # Check bounding box format for each detected "chart" item (5 values per box) - for bbox in results[0]["chart"]: - assert len(bbox) == 5 # [x1, y1, x2, y2, score] - assert bbox[4] >= 0.3 # score meets final threshold - - print("Processed inference results:", results) - - -def test_process_inference_results_multiple_images(): - """Test with multiple images to verify batch processing.""" - # Simulate model output with 2 images and 3 detections each - output_array = np.zeros((2, 3, 85), dtype=np.float32) - # Set bounding boxes and confidence for the mock detections - output_array[0, 0, :5] = [0.5, 0.5, 0.2, 0.2, 0.8] - output_array[0, 1, :5] = [0.6, 0.6, 0.2, 0.2, 0.7] - output_array[1, 0, :5] = [0.4, 0.4, 0.1, 0.1, 0.9] - # Assign class confidences for classes 0 and 1 - output_array[0, 0, 5 + 1] = 0.75 - output_array[0, 1, 5 + 1] = 0.65 - output_array[1, 0, 5 + 0] = 0.8 - - original_image_shapes = [(640, 640), (800, 800)] - - results = process_inference_results( - output_array, - original_image_shapes, - num_classes=80, - conf_thresh=0.5, - iou_thresh=0.5, - min_score=0.1, - final_thresh=0.3, - ) - - assert isinstance(results, list) - assert len(results) == 2 - for result in results: - assert isinstance(result, dict) - if "chart" in result: - assert all(len(bbox) == 5 and bbox[4] >= 0.3 for bbox in result["chart"]) - - -def test_process_inference_results_high_confidence_threshold(): - """Test with a high confidence threshold to verify filtering.""" - output_array = np.zeros((1, 5, 85), dtype=np.float32) - # Set low confidence scores below the threshold - output_array[0, :, 4] = [0.2, 0.3, 0.4, 0.4, 0.2] - output_array[0, :, 5] = [0.5] * 5 # Class confidence - - original_image_shapes = [(640, 640)] - - results = process_inference_results( - output_array, - original_image_shapes, - num_classes=80, - conf_thresh=0.9, # High confidence threshold - iou_thresh=0.5, - min_score=0.1, - final_thresh=0.3, - ) - - assert isinstance(results, list) - assert len(results) == 1 - assert results[0] == {} # No detections should pass the high confidence threshold - - -def test_process_inference_results_varied_num_classes(): - """Test compatibility with different model class counts.""" - output_array = np.zeros((1, 3, 25), dtype=np.float32) # 20 classes + 5 box coords - # Assign box, object confidence, and class scores - output_array[0, 0, :5] = [0.5, 0.5, 0.2, 0.2, 0.8] - output_array[0, 1, :5] = [0.6, 0.6, 0.3, 0.3, 0.7] - output_array[0, 0, 5 + 1] = 0.9 # Assign highest confidence to class 1 - - original_image_shapes = [(640, 640)] - - results = process_inference_results( - output_array, - original_image_shapes, - num_classes=20, # Different class count - conf_thresh=0.5, - iou_thresh=0.5, - min_score=0.1, - final_thresh=0.3, - ) - - assert isinstance(results, list) - assert len(results) == 1 - assert isinstance(results[0], dict) - assert "chart" in results[0] - assert len(results[0]["chart"]) > 0 # Verify detections processed correctly with 20 classes - - def crop_image(image: np.ndarray, bbox: Tuple[int, int, int, int]) -> np.ndarray: """Mock function to simulate cropping an image.""" h1, w1, h2, w2 = bbox diff --git a/tests/nv_ingest/extraction_workflows/pptx/test_pptx_helper.py b/tests/nv_ingest/extraction_workflows/pptx/test_pptx_helper.py index 1a85c95c..43e799d9 100644 --- a/tests/nv_ingest/extraction_workflows/pptx/test_pptx_helper.py +++ b/tests/nv_ingest/extraction_workflows/pptx/test_pptx_helper.py @@ -1,8 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: Apache-2.0 - - +import json from io import BytesIO from textwrap import dedent @@ -220,6 +219,7 @@ def test_pptx(pptx_stream_with_text, document_df): extract_text=True, extract_images=False, extract_tables=False, + extract_charts=False, row_data=document_df.iloc[0], ) @@ -267,6 +267,7 @@ def test_pptx_with_multiple_runs_in_title(pptx_stream_with_multiple_runs_in_titl extract_text=True, extract_images=False, extract_tables=False, + extract_charts=False, row_data=document_df.iloc[0], ) @@ -299,6 +300,7 @@ def test_pptx_text_depth_presentation(pptx_stream_with_text, document_df): extract_text=True, extract_images=False, extract_tables=False, + extract_charts=False, row_data=document_df.iloc[0], text_depth="document", ) @@ -343,6 +345,7 @@ def test_pptx_text_depth_shape(pptx_stream_with_text, document_df): extract_text=True, extract_images=False, extract_tables=False, + extract_charts=False, row_data=document_df.iloc[0], text_depth="block", ) @@ -397,6 +400,7 @@ def test_pptx_text_depth_para_run(pptx_stream_with_text, document_df, text_depth extract_text=True, extract_images=False, extract_tables=False, + extract_charts=False, row_data=document_df.iloc[0], text_depth=text_depth, ) @@ -441,6 +445,7 @@ def test_pptx_bullet(pptx_stream_with_bullet, document_df): extract_text=True, extract_images=False, extract_tables=False, + extract_charts=False, row_data=document_df.iloc[0], ) @@ -473,6 +478,7 @@ def test_pptx_group(pptx_stream_with_group, document_df): extract_text=True, extract_images=False, extract_tables=False, + extract_charts=False, row_data=document_df.iloc[0], ) @@ -502,6 +508,7 @@ def test_pptx_table(pptx_stream_with_table, document_df): extract_text=True, extract_images=False, extract_tables=True, + extract_charts=False, row_data=document_df.iloc[0], ) @@ -524,7 +531,7 @@ def test_pptx_table(pptx_stream_with_table, document_df): | Baz | Qux | """ ) - assert extracted_data[0][1]["content"].rstrip() == expected_content.rstrip() + assert extracted_data[0][1]["table_metadata"]["table_content"].rstrip() == expected_content.rstrip() def test_pptx_image(pptx_stream_with_image, document_df): @@ -533,14 +540,17 @@ def test_pptx_image(pptx_stream_with_image, document_df): extract_text=True, extract_images=True, extract_tables=False, + extract_charts=False, row_data=document_df.iloc[0], ) assert isinstance(extracted_data, list) assert len(extracted_data) == 2 assert len(extracted_data[0]) == 3 - assert extracted_data[0][0] == "image" + + assert extracted_data[0][0] == "text" assert extracted_data[0][1]["source_metadata"]["source_id"] == "source1" assert isinstance(extracted_data[0][2], str) - assert extracted_data[0][1]["content"][:10] == "iVBORw0KGg" # PNG format header + assert extracted_data[1][0] == "image" + assert extracted_data[1][1]["content"][:10] == "iVBORw0KGg" # PNG format header diff --git a/tests/nv_ingest/modules/sources/test_message_broker_task_source.py b/tests/nv_ingest/modules/sources/test_message_broker_task_source.py index a824f60d..cdb389e2 100644 --- a/tests/nv_ingest/modules/sources/test_message_broker_task_source.py +++ b/tests/nv_ingest/modules/sources/test_message_broker_task_source.py @@ -52,7 +52,7 @@ def job_payload(): "params": {}, }, }, - {"type": "embed", "task_properties": {"text": True, "tables": True}}, + {"type": "embed", "task_properties": {}}, ], } ) diff --git a/tests/nv_ingest/schemas/test_ingest_job_schema.py b/tests/nv_ingest/schemas/test_ingest_job_schema.py index 97045fbb..d0338666 100644 --- a/tests/nv_ingest/schemas/test_ingest_job_schema.py +++ b/tests/nv_ingest/schemas/test_ingest_job_schema.py @@ -37,10 +37,7 @@ def valid_task_properties(task_type): elif task_type == TaskTypeEnum.store: return {"images": True, "structured": True, "method": "minio", "params": {"endpoint": "minio:9000"}} elif task_type == TaskTypeEnum.embed: - return { - "text": True, - "tables": True, - } + return {} elif task_type == TaskTypeEnum.filter: return { "content_type": "image", @@ -179,10 +176,7 @@ def test_multiple_task_types(): }, { "type": "embed", - "task_properties": { - "text": True, - "tables": True, - }, + "task_properties": {}, }, { "type": "filter", diff --git a/tests/nv_ingest/util/converters/multimodal_test_raw_results.json b/tests/nv_ingest/util/converters/multimodal_test_raw_results.json new file mode 100644 index 00000000..ff3b09ac --- /dev/null +++ b/tests/nv_ingest/util/converters/multimodal_test_raw_results.json @@ -0,0 +1 @@ +{"status": "success", "description": "Successfully processed the message.", "data": [{"document_type": "text", "metadata": {"chart_metadata": null, "content": "TestingDocument\r\nA sample document with headings and placeholder text\r\nIntroduction\r\nThis is a placeholder document that can be used for any purpose. It contains some \r\nheadings and some placeholder text to fill the space. The text is not important and contains \r\nno real value, but it is useful for testing. Below, we will have some simple tables and charts \r\nthat we can use to confirm Ingest is working as expected.\r\nTable 1\r\nThis table describes some animals, and some activities they might be doing in specific \r\nlocations.\r\nAnimal Activity Place\r\nGira@e Driving a car At the beach\r\nLion Putting on sunscreen At the park\r\nCat Jumping onto a laptop In a home o@ice\r\nDog Chasing a squirrel In the front yard\r\nChart 1\r\nThis chart shows some gadgets, and some very fictitious costs. Section One\r\nThis is the first section of the document. It has some more placeholder text to show how \r\nthe document looks like. The text is not meant to be meaningful or informative, but rather to \r\ndemonstrate the layout and formatting of the document.\r\n\u2022 This is the first bullet point\r\n\u2022 This is the second bullet point\r\n\u2022 This is the third bullet point\r\nSection Two\r\nThis is the second section of the document. It is more of the same as we\u2019ve seen in the rest \r\nof the document. The content is meaningless, but the intent is to create a very simple \r\nsmoke test to ensure extraction is working as intended. This will be used in CI as time goes \r\non to ensure that changes we make to the library do not negatively impact our accuracy.\r\nTable 2\r\nThis table shows some popular colors that cars might come in.\r\nCar Color1 Color2 Color3\r\nCoupe White Silver Flat Gray\r\nSedan White Metallic Gray Matte Gray\r\nMinivan Gray Beige Black\r\nTruck Dark Gray Titanium Gray Charcoal\r\nConvertible Light Gray Graphite Slate Gray\r\nPicture\r\nBelow, is a high-quality picture of some shapes. Chart 2\r\nThis chart shows some average frequency ranges for speaker drivers.\r\nConclusion\r\nThis is the conclusion of the document. It has some more placeholder text, but the most \r\nimportant thing is that this is the conclusion. As we end this document, we should have \r\nbeen able to extract 2 tables, 2 charts, and some text including 3 bullet points.", "content_metadata": {"description": "Unstructured text from PDF document.", "hierarchy": {"block": -1, "line": -1, "nearby_objects": {"images": {"bbox": [], "content": []}, "structured": {"bbox": [], "content": []}, "text": {"bbox": [], "content": []}}, "page": -1, "page_count": 3, "span": -1}, "page_number": -1, "subtype": "", "type": "text"}, "content_url": "", "debug_metadata": null, "embedding": null, "error_metadata": null, "image_metadata": null, "info_message_metadata": null, "raise_on_failure": false, "source_metadata": {"access_level": 1, "collection_id": "", "date_created": "2025-01-16T21:56:47.531787", "last_modified": "2025-01-16T21:56:47.531632", "partition_id": -1, "source_id": "/home/jeremy/Development/nv-ingest/data/multimodal_test.pdf", "source_location": "", "source_name": "/home/jeremy/Development/nv-ingest/data/multimodal_test.pdf", "source_type": "PDF", "summary": ""}, "table_metadata": null, "text_metadata": {"keywords": "", "language": "en", "summary": "", "text_location": [-1, -1, -1, -1], "text_type": "document"}}}, {"document_type": "structured", "metadata": {"chart_metadata": null, "content": "", "content_metadata": {"description": "Structured table extracted from PDF document.", "hierarchy": {"block": -1, "line": -1, "nearby_objects": {"images": {"bbox": [], "content": []}, "structured": {"bbox": [], "content": []}, "text": {"bbox": [], "content": []}}, "page": 0, "page_count": 3, "span": -1}, "page_number": 0, "subtype": "table", "type": "structured"}, "content_url": "", "debug_metadata": null, "embedding": null, "error_metadata": null, "image_metadata": null, "info_message_metadata": null, "raise_on_failure": false, "source_metadata": {"access_level": 1, "collection_id": "", "date_created": "2025-01-16T21:56:47.531787", "last_modified": "2025-01-16T21:56:47.531632", "partition_id": -1, "source_id": "/home/jeremy/Development/nv-ingest/data/multimodal_test.pdf", "source_location": "", "source_name": "/home/jeremy/Development/nv-ingest/data/multimodal_test.pdf", "source_type": "PDF", "summary": ""}, "table_metadata": {"caption": "", "table_content": "| locations. |\n| Activity | Place |\n| Animal |\n| Giraffe | Driving a car. | At the beach |\n| Lion | Putting on sunscreen | At the park |\n| Cat |\n| Jumping onto a laptop | In a home office |\n| Dog |\n| Chasing a squirrel | In the front yard |\n", "table_content_format": "pseudo_markdown", "table_format": "image", "table_location": [533.2992, 134.96189999999999, 717.7728, 1051.4446], "table_location_max_dimensions": [1536, 1187], "uploaded_image_uri": ""}, "text_metadata": null}}, {"document_type": "structured", "metadata": {"chart_metadata": null, "content": "iVBORw0KGgoAAAANSUhEUgAAA8MAAAIUCAIAAACxW2lCAADy1UlEQVR4nOzdd2BV5fk48Pecc/fKzc3ek2wyCGGEETYBAVFRwN2q1Y6vVltbx7daW1utrf22Vq3WWrTWBbJkEzaBkATIIAtIQiB733tzc/c57++Ppzm/axJijKzg8/lDwz3rPe8Z97nved/nMJRS8u1QShmGaW5uNplMSUlJ8M9vuc5vRBAElmUvVzZCyDUuD0LXBVx6XV1dzc3NaWlp1/5KvL5G3t8R7hIIXXFwNpaVlYWEhPj6+n7XLsYbB17433Fw6VVVVXl5eYWEhFylK1FyBdcF5+u1v1+McJ3gzQt9d8DZznHc9boSr6+R9xe/TdG1BGcjy7Icx5Hv3sV448AL/ztOvBKv6lauWCTNsmxjY6PRaLyOP74Hta/jzQt9p8Cl53a7nU7niRMnvrPNYDfsfeA7e0S+g+BYW61Wk8kkkUjw0F9L4h0A6xzBpdfX15eSknL1tnLFImmNRhMSEoK//xC6vhiGYRhGEITrXRCEvusMBgOl9Nt3oUQIfRs6nU6j0Vy99TN4kSOEEEIIITQGV7KfNAblCCGEEELohnJVu/pgmzRCCCGEEEJjgd2aEUIIIYQQGguMpBFCCCGEEBoLjKQRQgghhBAaC4ykEUIIIYQQGguMpBFCCCGEEBoLjKQRQgghhBAaC4ykEUIIIYQQGguMpBFCCCGEEBoLjKQRQgghhBAaC4ykEUIIIYQQGguMpBFCCCGEEBoLjKQRQgghhBAaC4ykEUIIIYQQGguMpBFCCCGEEBoLjKQRQgghhBAaC4ykEUIIIYQQGguMpBFCCCGEEBoLjKQRQgghhBAaC8lVXTulVKCUUEIYwjAMHfibZRiGYYbOzwsCoYRhGXa4qTcySgkhlAy7VwghhBBC6GbEUEqvxnopJQIVOPaybd68MNLU8YVSiiE0QgghhNB3zVWJpMXIsq3HeLikqri6vrmrx+F0KeWy2NDAyQnRs9OTvNRKSv/biEsJYQhxuNz/3La/19yfk5E4Ky1RoPRatkwLAqWEMoRh2W+2USiny+222p1ymVQhk16lEiKEEEIIoRvKle/dAWG03el67ZMvP9h5+EJrB7HaiUAJxxBeIBwn0ajiwoKeXL304WXzIJgmlBKGsTucv/7Xxq7Glmd/fM+stMT/TrpWWJYh5JttjlLq5gWphDvb2PrQ799p7eq5c372q4+t5XmB426S5naEEEIIIXQ5VziShjC6t6//7pf+tvtECcOwgQZ99uy45KgwnVrZbeorr7t4vOJ8dUPzo6/9s/Rcw1+eeIBlWMIQhhCGYfQaVa9Wo5TJrmypRqOivrHL1Oen1yVHhdKvi6mh/zfHslIJV1xd9+Dv3q5tbnfa7M2dPdeouAghhBBC6Hq7kpE0JVSglHfzD/zu7d0nSpQK+aLJqb/9weqJ0WGesx0pq/n5mx9VXmh864vd/gavFx68gxcEaH7mBYEXBMGjw4nY+WTkjsgw4E+MgIedeWBMoOc/B/5NydNvf7z74IllC2Z8+YeneUHgRhz0yDAMxzCdRvO/dhz8y/pd7T0mHy9tj8DLpFd3BCdCCCGEELpxXMlOCIJAOZb9x5f7tx07pZDL5qQnrf/tTydGhwkCdfMCzwtuXhAonZ2WsPXVp6ODA5RKxTub8y51dA079BCiamYALwjDdummlPK8wDCEYRhWnJkfZmaGiGEz4XkBWsGZgc9ZlmFYlmX//yfDgtXWt3T84LV/5PzkpWff+bSts2dqUqxWqRBc/FUavokQQgghhG5AVyySppRyLNvXb3t7016ZVOqr07711PdlUomb51mWkXAsx7ESjmUZxulyB/noH1+Va3M4W7t6Nx8uHro2QRBkEgnHsg6Xy2SxOt1ujmX/m0fPczZKGYbhOJYXhC5TX2u3saPX7HS5OW6YmelAORlCoB9zt6nP4XITQii0UDODZx6mYJQSQqovNL334ebq+iYvtfrJtcs++fX/yKQcwQweCCGEEELfJVesN4IgUI5j9hSV1be287xw17zpUcH+PC9IOG7wJiUcpXTRlLTkqNBus6W+uX3QDAxhWJY9Wlb90Z6jFReaLFa7VqXInhj3zL0rfXQaMTEIpZRlmPYe01837Dp25uz5plZjn1WrUsSEBM7JSHxy9S1+eh0E0wzDNLR1/uTP/zL1217/yb1TEmPf2rRna/6p5s6eexfPOn7mrNPlqr7YotRpSs81LH36VUKI2WL7xT3LV8ycPChbH0TKep162szJudPSbp05OX1CZG+fxenmr+X4SIQQQgghdN1d4X69p85dcLjcSrlsaXY6pXTYgXvQATk8wPfwmy/ygiCXSj3zMTOEchz77tZ9j73yDnG6iEpJCCGUHi+pPllTv/0Pv1Ap5JRSSgnLMuV1l1a/+Jea8xcJyyZEhyWEh7R2GwvKawpKq/YUlW/7wy+CfPQQ4vfbHYdLqy1mS2N796f7jv/lvfVELuNUigstHdvzTxGXS6XTKGTSTqN515FiIuFIb9+FedOIR0ft/xaeZQkh05InFLz7W/hEEKjLzWNrNEIIIYTQd80Vi6QhxKy+0MQwjK+XLiowYPjXGA5gGOLjpYW/xWiV5wWlSvXv3UfOXWxelJ354NLZcWFB/TbHXzbszCs+c+h01cbDRffnznbzAscybl74xdsf19Q3RkWEvPrY2kVT0vQaldFi3XDgxP/+8/PTlbV//GTb/z1+P6UCIYRlGLVCrpBKX/3P1pPVtXNyptySPSk62D8hIvihZXMFQfjF3z8pKKmamZnypx/f63S5BIHGhgUSQobtw82xLCVEEARCCfQkuVLViBBCCCGExosrE0lTQhiGuNz8hbZOQohBpw7y1ZNR5JIb+iHLMg2tnS89vPqF790hfpgUGTL90RfqLC37T1XcnzubUoFhJGcvNR0/c5aw7P+sWnzXvOmwQr1G9ciKeQdOV3y+N/9gSaXT5f5vPg1KBEqdbnd53aXXfnLfU6tvGRQie2vVvNNl8NJMToj2/PxyUTID8TQOMUQIIYQQ+q66ork7qOBwuiklcqlUKZeRr0tdJ+blED/hWKbfartv8awXvneHQCnPC4IguHneV69LjQ2nbndzZw8vCBzLEUKau4wqhTwswHdRVqrT5XY4XbxAnW43LwgzJsZTQnpMlh6zRVy5hOP6+m0/vWvp02uXM4Rx8wIvCDylkCTEzQuEYdxugRcEWAlGyQghhBBCaARXtp/0f6NiSikdRSKLgVDVYzaGoQKNCPSllArCf0crUl6ghOjUSkKIy83zvCCTSiilcycl1XzyZ5ZhdGqVx1pZQgjE8ZRQF88PrJkQSiil8eHBAqW8wEslEvLf1nSGDKS9Y5j/ducYtlMHQgghhBBCoisZSUs5Vq9VMwzps9l7zBYfL+3I8bQ4aVDjr9PlHtRWzfx3nOJXEttJOE6vURNCKuobS2svnr3U0t5j7DL2WR3O5q4epUrBC8LQjTpcLpZhBOzZjBBCCCGEvp0rE0kzhAgCZVk2KTL0ROX5LqO5pav3ayNpp8vNCwLLsjLJVzLljWYAHyWEJeTAqYo/frrtcEm1zdRHZFK9l9ZPr9WqlHaHiyHDJ4XG0YEIIYQQQuiKuHL5pKnAEi4xPJhhGKPFWnK+YWJM+LAdjSG8Nvb1r/rf/2vp6pmVnvju0498o21BJumDpytvffZPFnN/Ymz4T358T/bE+EAfb1+dRiLh3t9+8OHf/d2g01yRXUMIIYQQQmioK5cFj2EIIXMnp3iplP1256f7jt+fO5tSQodk8BAEyrCkvO7SkbJql80+f/LEb7QheNUKLwh//my7xdKfEhe547Vfhgf6/nfllPK84HC5viZvCEIIIYQQQt/OFRtXx7IspTQzLionI4kSerik6t+7Dks41j2QBwNA32WWYT7YeZhhWJVWc+e8ad9oQ/C6b6Olv7a5gyHMkmnp4YG+DpdbEAR4SQrHsRzLDu58/XUYwhCGUE/faHmEEEIIIfQdcyUzVED0+eL3VqkVcoZhfvnOJ7sLy6QSCceyYsI7jmU5jn136/4vDhfygjAlKWZKYqzwzaNWWIJS4h4IzSkhLMvIpZKmzu73vtwvVyoE4etXyxACW9eplQzDdBnNDMNIOG7k18oghBBCCCF0JSNplmWpIGTERf71iQdcbndvn/Xe3/ztib9+sLeorKPX1NvX39Fr2ltU9uDv/v70W/+xOZw+Os2ff3K/QiYV23+HJJj+/xgPhBBvjTrM34fhmF0FJdUNzVIJx7Gsw+XKKz6T85OXyusblXIZ/eqQw8utWxAEQkhcWCCVcOcb2/72xe72HmNrt9HqcI5+34fmxkYIIYQQQje3K5tPmrAsKwjCA0ty5FLJz9/6uLmj+41Ptr27db+/t04pl9kczvZuk9NmJxIuJTb8X888lhEXKQgCOxCAutxu6nLz/DDZ69w8T11ul5tnCKGUchz72MoF+09XnGtsy/35K7PTEqUcV1rbUFJaPXfW5B/dtvjnf/1A5ucthtKUDqx8SEM1y7KUkHsXz3p/x6Gmls4n/vrhS+u+6O2zvvP0Q48sn+/meUhrPTJYuVtMX40QQgghhG52VziSJoSwLMsLwpoFM6Ylx/3jy337TlY0tHU2dfRQnmc4NsjXOzIx+pbpk36wYr6vXisIAjvwzm2GYfy9vew2h1alHLpavUbl6+ft66UhDGEYRhDo7TlT1j372Kv/2VrX3PafzXuJXBYREvCLR1a/9NCdp89e8PX2CjToxResSDg2wOBFKFUr5YMLzDCU0tiQwG2vPv27f28+39gGrdFyqXS0u8ww/t5eNqtdr1F9/dwIIYQQQuimwFylkXWCQFmWIYQ4XO665vZuc5/d4VLIpMG+3pFBfhDgivMASmlHr9nN81qVEt5o6Mlo6bfYHAqp1EevhWUESlmGMffbqhqarHaHXqOOCQ3wUqsIITaHs7evn2UZP70OtuXm+S5THy8Ieo1arRgcTJOB3HyEkH6bvc9qFyjVa1Sq4eYcdmc7jWY3z6uVcnhZDEIIIYQQuuldrUiaEAIJO4Z97TYvCOyV6FUMTdpXas2CQAmhLL4nHCGEEEIIjcJVjKQBpZDT46tjCi87M4X800NnudwkSqmY+sMzhhYoHTTzf9fwdVE2VMho5hxlyRFCCCGE0E3pqkfSCCGEEEII3ZSwJwNCCCGEEEJjgZE0QgghhBBCY4GRNEIIIYQQQmOBkTRCCCGEEEJjgZE0QgghhBBCY4GRNEIIIYQQQmOBkTRCCCGEEEJjgZE0QgghhBBCY4GRNEIIIYQQQmOBkTRCCCGEEEJjgZE0QgghhBBCY4GRNEIIIYQQQmOBkTRCCCGEEEJjgZE0QgghhBBCY4GRNEIIIYQQQmOBkTRCCCGEEEJjcSUjaToiz9lGsyqe569g2UbY0DXYyrc3XsqJLgeP4CBYIdeGZz0LgiAIwpVa7Whu0TAbHmuE0E2MwXscQgjdfHieZ1mWYZjrXRCEELqZXa3eHdD4IYbpYkMIpdTlco0QvsOktra2o0ePXu1m6SvYQjPCJtxu97f8uUIp/fYrQdfR1572Y+Z2u6/N05sr7hpcfd9llFKO48QwGs696urq06dPf9NV8Tzvef+BP9rb248ePep2u0eerbOz88iRIy6X69vtzbg0qEIQQjerKxBJw9dhWVlZTExMZGRkVFRUZGRkYmJicnIyfBITExMWFvbcc88RQt54443Y2NiNGzcSQoZGAHDT4Xl+zZo1s2fP/vvf/z7sbFfK2rVrk5KSLl68SDxi/SuLZVmJRDLmZiHY9z179sTGxr766qvkqpUTXc63DPjgCG7cuDE2NvaNN94gI57PlFLP35+jIZFIOI4bc/GuPdi77u7uadOmLV68GEIxjDauLEopwzDbtm1bsmRJSEhIZmam3W5vamrKysrKzMwsKioi3+ROwnHcoJuYIAhwi3777bfJwCk9dDZK6f3335+Tk/OXv/yFXM07+Y1paIUghG5Kkiu1IoZhJBIJIYRlWUppbW2tIAiBgYFqtRqiSZZlCSFtbW2XLl3q6ekhHv2qGYbxvN2wLDtx4sT6+vrIyEhYszhp2PlHNnQR+Cch5Pz582fPnnU4HGQgiIGtj35Vg6bC5xAWwD8LCgry8/OXLVuWmJhIKf3alcP+Dlq/0Wi8ePFic3MzuXyljXJVYiEHfUi+Ws/DfvhNK380839tlYplGHbO0WwCDus3PWcYhoGVD3vIRrNOmEcQBI7juru7L1261NraerltwZzDPou/3I4TQiwWy7///W8vL6+1a9dCOYdWDhnxlP5aI5xL5Kun0zc6LV0uV3l5uY+Pz8gx9MhbH+U8noUc9sCNsuRfewoNe3GRy19K5DKHZlCBxb+/du8AdOo4ffr07bff7na7c3NzY2JiGIbRaDQZGRlGo9HPz2/oei5XM4SQnTt31tTUrF27NigoSKyHiRMn1tbWRkdHi7uzZ8+eioqK1atXh4aGirOlpKScOXMGZht6hxnN7XTkmhzNqob17e9OI8wD5fzyyy/r6uruuecef3//QXeS0ZzYCKFxg14ds2bNYln2wIED4icw7uTFF19kWfb999/ned7pdIpT4SmYp0GfiE/KgNhrYgQ8z8NGB/0Tmv0opdOmTWNZ9ty5c263W/xQLOqgwnjOMMLWxc/hj6eeeooQ8o9//GPY1V5ubeLm4PMNGzawLPvkk08OrTTPUo28qm/jcjU5yvmHra6h67xclQ5aHPZo0Ic8zw/dzW9aFbBOmEcsW2NjY2VlJXw4dEeGrQrPwrhcLp7n//nPf7Is+7//+7/0q2fIoLPFbreXlZV1dXUNWyeeW4eNXrp0iRASGRnpeVYMXXBoVYzGsCsZtgKHPUAjz9Da2urt7R0TEwMlH7ra0ZzJ3+hsH3SY4J9DT8KhSw06BCOf+UONpsyXK/DQZUezRVjhn//8Z0IInHKjmd/zn+I+whaXLFlCCDl06BD9ahWJC8Jst912GyFk9+7dl5vNc9e+0f1kBGNY1aCTxHN/v9Fqv3aenJwcQkhhYSH1OHZX6RaNELqOrnA/aTrQHAuxDjzOg/96ToJWE6lU2t3d3dTU5HQ6OY6jHq1T8C3i2buaZVmO46xWa1NTU2dnJ8MwgxYZBFr4WJY1Go1NTU0mkwn+6flMkw60Q8Pz8dbWVmg1hGZ1z9mgx2FPT09jY2N3d/fQrfM8D30BOY6z2Wzt7e0wValUchwnk8nI5R+nwtoIIW1tbc3NzS6XCzYnrn9QpZlMpsbGRovF4tkPctCq2tvbW1pa3G6356pcLpfT6RxaAIhmBn3I87zD4fjvWeJRk0ajcWhNehLnt1gsTU1Nw1aXeHT6+vqampp6e3vh+A46Oi6Xy+VyweI2mw2OI+wRfMjzfHNzc1dX17CtuWJVDFurQ4sN64R5oAHpRz/6UVhY2PLly+EcFqu3o6OjqanJbDbDXgzdNYZhOjs7W1paxLoa1G0DTiqxYwbHcfv374+NjU1LS9u9ezchxO12e57zXV1dsPVBVaRWq3U6nef6xYvFbrc3Nzd3dHSIVTF6g664trY2+tV+tzCP0+l0u91QsP7+fs8DNLRiLRaLeN5KJBK4xi9XgNFcFJ7ztLS08Dw/9BALguB0OuGgkIHrghDCsixcUCzLwl3I5XINeyjFXYMz+XJnvtPpHPbiggZ48cEXlNnhcDQ1NYmHZtCJAbUKZ1F7e3tfX594IYx+u0ajkWEYaAwWZxB/vHnOKZ7Szc3NDoeD4zixHuC/arVaPFfFffe8RXvOBk8mh53Ns1ZZljWbzZe7nwiC4HA4htaz2+32HG8g3mpMJtNobk3E42be2dnZ1NRks9k893f0JRx0VxS/X2A9cDFqNBpYuWeFDDppxVv05QqMEBoHrmRYPvDLWxCEGTNmEELy8vLoQJsE/PeFF14ghHz66acnT56cOnUqBJoJCQnvvvsu9Wjyef755xMTE48cOUIHhmqdO3funnvu8fPzk0qlSqVy0qRJH330Eb1MOw2s5MCBA0uXLtXpdCzL6vX65cuXD2pWmTJlCsuyDQ0N7777bkREBMdxSqUyJyfnxIkT4myw/k2bNs2aNUuj0UilUrVaPXXq1A0bNngW+KmnnkpOTq6vr3/zzTdDQkKmT5/+3nvvhYWFeXl5EUIMBkNYWFhMTMy5c+c8CwDfMXa7/ZVXXomNjZXL5VKpNDQ09Mknn+zu7qaUQoy7fv16Qshzzz136dKlZcuWabVahmEiIiJ+9atfQZQgltPpdP7lL39JSEiQSqUSiSQsLOzpp5/u7OyEza1bty4pKemvf/0r9Whse//995OTk5cvX97f3y+WrampKSMjY82aNTBPXl7e/PnzdTqdVCr18vLKzc09evTosJUPn5w+ffq2224zGAxQXTNmzNiyZYs4FTZRUlJy5513ent7cxyn0WjmzZu3c+dO6tG0c/HixeTk5EcffbS3t/eRRx4xGAwMwwQHBz/xxBOCILhcrueffz4oKIhlWYPBsHbtWoj2xG9uSul7772XnJwsl8slEklISMjTTz/d29s7QrE7Ozufe+655OTkiIiIqVOn5ubm+vj4PPXUUxs3boTVQvUmJycrFAqpVOrt7b18+fKioiLPM59SunPnzjlz5sC5HR0dvW3bto8//pgQ8vzzz1OPJqht27YtXrw4Ojo6JiZmxYoVAQEBixcvfv311y9cuAAzXLp06eGHH/bz85NIJEqlcvr06WI1Wq3WuXPnBgYGchwnlUrDw8ODg4Ofe+452J1Lly499NBDAQEBUqlUoVCkpKS8++67YrV8LZitoqLi7rvvhitOLpfHxsb+5je/sdlsYqPayZMnk5KSXn755Y6Ojrvvvluv1xNCgoODf/rTn1qtVs8KaWtre+yxx4KCghiG8fb2fvzxxxsaGnx9fcXWdM+CiRfFq6++OuxFIe6I3W5//fXX4+Li4GwPDw9/5plnenp6PC/Ml156KSkpqaysbPfu3cnJybAv2dnZBw8epJQeOXJk9uzZcKQSExM9bymw+Pnz5++9914fHx+JRKJSqXJycvbs2UOHa3N9++23ExMT//a3v1GPBm9KaXFxcUpKyo9+9COYubOz8/HHHw8KCpJIJHK5PCMj4z//+Y/n9XjmzJmkpKTf/OY3Fy5cWLx4sUKheP/995944onk5OTjx4+L+wX/feeddxITE//9739Tj0ccn3zySWhoKBwOb2/vsLCwzMxMo9HI8/zixYuzsrLgd77Y5v3Pf/4zIyNDJpOxLBscHPzYY49dvHgRSvub3/wmNDRUo9EQQgICAkJDQ6dNmwYX0f/+7/8mJibu37+fUvr73/8+NDRUq9USQvz9/UNDQzMzM+G285vf/CYxMXHv3r2eZ35RUdFtt92m1+s5jtNqtYsWLYJvCrES1q9fn5iY+MYbb3gu5XQ6V65cmZqa2tTUJNbAwYMHFy5cCLcmnU63aNEiOLKXe3hCKf3Pf/4zZcoUhULBsmxgYOD3vve9s2fPDrqECwsLL1dCcZ69e/fOmzdPvCsuWbIkPz8fNvTcc8+FhISo1WpCSGBgYGho6OzZs+EGa7PZfv/738OJLZPJQkNDn3rqKfGkHc3liRC60VzrSPp///d/CSHz5s2Db5Ef/OAHq1atgpgeuoLAN+vdd99NCPnyyy9hbc3NzdBn+t57733jjTeef/75wMBAQsj7779PL/Pc9rPPPoMf+nfdddcvf/nL22+/nRAik8nEcI1SmpWVJZPJZs+ezXHckiVLfvjDH0Kxo6Oju7q6IFyjlP773/8mhKjV6ieffPKdd9556qmnlEolIWTr1q1igZcvX04IWbRoESEkMTHxZz/72ebNm5cvXz5hwgRCSFpa2vLly2+99dZLly5RjzumGIUTQtLT019++eXXX3994cKFhJCFCxfa7XaotE8//ZQQMnPmTF9f35iYmIceeujBBx9UqVSEkLfffpsOPKB0Op333nsvIWTChAk/+9nPnn32WdidtLS0xsZGSumxY8cIIVlZWeKXsSAIsDlCyPHjxyml0A792WefEUKefPJJSumhQ4ekUqlKpXryySf/9re//fjHP4YvLYgghz4cr6mp8fX1JYQ88sgjf/vb337xi1/o9XqGYTyr68CBA/C9e9ddd73wwgsPP/wwVOmbb75JB347VVdXE0Jg9Kq/v/+DDz748MMPQ3zwP//zP7NmzVIqlXfddddjjz0GDW9r1qyBOAOK8cwzzxBC4uPjn3/++d///vewmzNmzDCbzYNiSqiHlpaW1NRUQsjq1atfeuklOGEmT5489EhFRES88MILf//73++77z5CiI+PT3V1tVjsTZs2wYm3cuXKn//857m5uQqFYvbs2YSQZ555RqwBGD+akJDw3HPP/exnP4PzGQJlOEMuXLgQGxsLVfSHP/zh5z//OXRvfe+99yil/f39jz322Ny5cyUSiVarXbp06ZIlSyCM6+7uTklJIYSsWrXqL3/5ywsvvBAVFUUI+fOf/zz0YhkKZqiuroYi3XnnnW+++ebLL78MZ/JPf/pTcU/3798P521ERERwcPD3vve9Rx55xMfHhxDy0ksvwWyCIHR2dqalpRFCUlJSnnzyyR/84Ac+Pj7p6ene3t5RUVFDI+mvvSjgKNvt9rvuugvq8Oc///kzzzwzbdo0OGTt7e3ixfvAAw8QQqCiZs+e/dhjj82ZM4cQEhUV9fzzzxNCMjIyHn300ZUrVxJCOI6Dsxp+oFZXV4eGhhJC7rnnntdee+2nP/0p/DD+7LPPPGsS/igsLCSEJCcnwxUk/t742c9+Jl6nbW1tGRkZhJDly5e/8sorzzzzTHh4OCHk97//vVirBQUFhJDMzMzg4GD4YV9QUPDOO+8QQn74wx9SjwjY5XJBxVZUVFCPn6C7d+9evnx5XFwcISQ1NXX58uX33HNPX1+f0+kMDg7mOA7uBhChPv7443AOP/bYYz//+c+zs7MJIbGxsRcuXKCUvvvuu7fccktwcDAhZPr06cuWLXvwwQfNZjOl9J577iGEbNq0iVL6/vvvL1u2DOpq6tSpy5Ytu/feeyE6fPjhhwkhn3/+uXhv2bVrl0KhIISsXbv2hRde+N73vgeP7OBmDufDW2+9RQj5xS9+QT0iaYfDAVdEXV0d1HBBQYFCoVAoFE888cSbb775+OOPy+VytVp97Nixoac6/BO+gLRa7cMPP/z000/DyRAaGlpdXS2eMzt37oQS3n333UNLCPMcPnwYmgmeeuqpN998E+6KXl5ecP688cYbS5cuDQgIgPv2smXLHn74YavVSimFCs/IyPjd7373pz/9acGCBYSQ3NxcaIPHYBqh8ehaR9K/+tWvICr94IMPxKV+85vfMAzz6KOP0oE76fe+9z2WZSHqpZR+8MEHhJBf/vKX4iLnz58nhPj5+cEPffEGBH80NTX5+vqqVKp9+/aJi2zYsAHCXLvdTinleR4KGRcXd/r0aXHxuXPnit8QgiDYbLbIyEiGYaC7G9iyZQt8r9OBGzQM+QoICNixY4dnN7iXXnpJ/CIZtq66u7sNBkNkZCTcoAHE5WJc9cknn0Cw++KLL4pfD9B5eubMmWJDDtTSokWL4JYNIGXKAw88QCl1OBxpaWlyufz8+fMwtbGx0WAwxMTEsCwL0Q/U//e//32GYaCJBb4LIQ4G0Ea+YsUKOlzj3GuvvUYI+b//+z/xczHIgNDKbDbHx8dzHAe/lEB1dXVQUJBcLj9z5gx8UlNTA4HLmjVr4MubUrpnzx74kpszZw4EBJTSixcv+vv7q1SqhoYG+ATCkblz51osFnETv/zlLwkhr776Kv1q303PiGfdunXi5xDsvv766zDD2bNnIbIX2/gppS+++CIZaGzmed5kMsHoLmhoBJs3b4Zv4meffRY+OXfunEQimTp1qth/pq2tLSAgICIiwmQyQZXCjyJobgQNDQ3wlAPa5CilRqNRqVSmpqZSD1u3biWE/OAHPxA/aWlpUalUMpkMemCP/G0NO/vDH/6QEPKHP/xB/LytrS0kJESpVIrVfvDgQXhy/eMf/xiuKUrp4cOHZTJZfHy82CwNscsdd9whdubu6OjIysqCi1GMO2GS50URERHh2f97xYoVnhfFu+++CyGpuGlK6c9//nNCyGOPPUYHIp4f/vCHLMv6+Pjs2rVLnA06/srlcs/D/Ytf/EKM3uBEhcsQbgXg7NmzPj4+/v7+g2oS/pg7dy7DMPCLFLZusViioqIMBgM8MIFz7PXXXxdX2NnZmZqaKpFIKisr4ZMTJ05IpVK4ZqHxmFLa2tpqMBgCAwNhu7DyU6dOsSx7yy23DHtMf/e73xFCPvnkE/ETu90eFxen0+nE82fHjh2EkIkTJ4qfUEqffPJJQojYiE4pXb16NSGkpKTEc2cfeughlmW3b98uzvbggw8SQgoKCjxn+8lPfsKy7MaNGymlPM/39PRERkbKZDJo2gdlZWW+vr4ajQbahiml//jHPzzHFYiRdGpqqlQqra+vh9kgKoXHgwBO/sWLF9Phbk1HjhyBH1EQiwO4hO+++24os1hCaEcfWkIoDNwVPXcfWh9WrlwpfgJnbE1NjfhJZ2enl5dXVFSU5/3nlltuIYRs27aNXr7TPELoRnat3xYOX70vvvjiAw884Ha74Yc43Pjq6+vFGehX04HRgSTT0N2QEBIbG/viiy+uWbNmUKZS6I62ffv2rq6uNWvWzJ8/H7oY8jx/++23r169OiwsrK+vjxDCMAx0fv3ss88yMjKgLyPDMAsXLmQYpq6uDubp6+vz9/e/7bbbpkyZIm5l0aJFWq22oaHBbrd7Fvitt95aunQp9KiDh61Wq5UQYrFY3G73sN2RKaUMw1itVgjUwBNPPPHQQw95e3vDP6Ff3b333vvrX/+aZVnoRpmTk6PRaC5evAgdTwVB+OCDDxiGee6555RKpcPhgJp55plnQkND169fX19fL5PJFi9e7HA48vPzYc0FBQU9PT3PPvtseHg4fCtIpdL+/v68vLyIiIj09HQyMFIejo64+4888gj8DhmafAAOQWNjIz+Q9Gry5MlPPfXUsmXLnE4nwzCHDh06e/bs0qVLly9fDrXkdDoTEhIef/xxh8Pxn//8B5aCyk9ISPjkk0+0Wq3D4XC73TNmzICg+aOPPgoNDXU6nS6XKzw8fNKkSVartbGxEZaFJxJ/+MMf4AEreOmll3Q63aZNm4SBXrNkoN8kpfTQoUNarRa6RNvtdp7nV6xYwTDMsWPHoP57e3vj4+Pvv/9+aHEHy5cvZxgGftexLFtQUFBXVzdr1qx77rkHfuG4XK6VK1dClCZutLi42O12r1q1SiaTwZEKCAjIycm5ePHihQsXWJZtaWnZuHFjdnY2NHuDiIiIJ5980mQyHTp0iBDC8zw8Z3e73TabDR5KkIGLpaOjw2KxwIJBQUFwxfGjSEMmFjIlJQVCKBAQEDB58mSbzQY5ZGBOQRDmz5//5ptvyuVyOC2nTp0aEhJy6dKljo4OhmFsNtvmzZs5jnvuueekUqnD4XA4HH5+fuvWrZPL5dDHemgZKKUsyw57UcBDCZ7nxbNdLpc7HA7Y9+eee87f3//jjz9ubm6GDruEEEEQ/u///i83NxduOISQ3NxchmGeeOKJBx98EMYDwF2IYZja2lpCiEQiuXDhwq5du5YsWQID6UBcXNyPf/zjjo6O48ePE4/Or/DH2rVr6UAwB58cP378woULy5cvDwgIMJvNn376aVxcHDS3A19f32effZbneegcTwb6VU+dOvWDDz7w9/eHsygwMPC2225ra2vLy8sjHnc5QRDg8Z1nF96hdx7xtgmtnuKc8LTt6aefDgkJEevwJz/5yaxZsyCaFy9PQojJZPK8iXneouHahK2YzWaYTRjoQi3OxrLsvn37GhoaVq5cuWjRInHlqampP/rRjywWCwSjZMj9XyS22oqzEULgXg3mzZv36KOPwiMgz1sTnGbQyeqpp56Kjo6G/aWUPvbYYzk5OSqViud5hmHy8vKghAsXLhy2hJ5nLJwtYPHixQ8//DA06sPzAag3o9EoVs4IJza0GmCHaYTGoyuWBW+U4N5nMBjghiiVSlmWlcvlhBDxdj90kQULFoSHh3/44YenT59etWrVnDlz0tLSfv3rX4vzDEoUVVJSwjDM7NmzhYExQ7AeuFPTgfRDdGCgjCAI4rgTtVpNKYWbIM/zPj4+0KTqcDi6urouXbrU1dUFw5vowIBFcZ0+Pj7wuUQigRAN7uYsy8IQK88bJcMwgiD4+Pjcc889b7zxxpQpU9auXbtw4cLp06fPnz9//vz5UABxPIqPjw98kUilUoZhoOOmON6ou7u7srLS29s7KSmJUiqTyWD9Wq02Kytr8+bNFRUV0dHRubm5r7322p49e6ABCZ5jrlq16vDhw59++ml9fX1MTExZWVljY+NPfvITqIo1a9asW7fuySef/PLLL1esWJGTkzNx4sR//OMfQ48UlHP58uV/+MMf/vznP+fn599+++0w/+uvv04Gxp6ePn2aYZhZs2YJgiAOHBQEYfr06QzDnDx5knokhoM+muKpwvM8dGuBqoBoiVKq0WggBIF/lpWVUUo///xzeCoCa4MBl/X19Uaj0WAw0K+m2YK4UPwQ/ikODxIEISsrq6amhhDS19fX1dV18eJFk8kEDXVihFpTU8MwzIwZM+B8kEgk0KIGT6U9t0UIcXu81UL8J2y9pqbGZrM5HI5XXnkFigFncklJCSEE+r2Ig8CgkOJgu5kzZyYlJW3ZsiUzM/Ouu+6aO3duRkYGhPKDLpZhwVTIE0wI6ejoaG1tbW5utlgszc3N4u9PEdSkeFpSSnU6XWNjIxyLtra2+vr64ODgCRMmwGkJByggIECpVA6qAfLVi+Kvf/3r1KlT165du2DBguzs7Hnz5s2bN08sVVVVlb+/f3x8vHi2U0q9vb0zMzN37dpVXV0dEhJCBi5MLy8vuPrgwoQTWyqVwhGHHJ0KhYJSKt6FKisr3W63yWT6/e9/D5UP/4U3m9TU1ECLtWelLVu2TKfTffHFFy+++CI8OYHnYPCDpL6+vqWlZcKECZ7HlOM4GDsBx1RcFXSSgTGRcJO5995733///U8++WTNmjVSqdTpdK5fv95gMCxevHjQMYXzwfPOM6iSoU7sdnt5eTnHcVlZWXD44AYYHR0NbbewuDiWV7yJDT1nWI+s+SPMRgg5deoUwzAzZ86EnRIHnmZnZ8O1P+xSQ08S+OPOO+/8xz/+8cwzz+zatUu8NUFPmEH7C7tWWlrKMMz06dPh1gGXT2BgoPjTlAzcnb62hHBX/OlPf/rll18uX74cNv3ee++JFSKmtxOzSguC4Ofnd/fdd7/11ltZWVlwt8/Ozl64cKHYxW5owwRC6MZ3rSNpACPTBY/hzCPMzPN8aGjo7t27X3755c2bN8PDuODg4HvvvfeXv/zl0JCIEAIjkwwGA8Re4lbg70F3K8+vq0HrgWi4sbHxj3/846ZNm5qbmxUKhUajYVkWBn0PajWBgdijafkTiyQIwu9+97vAwMB//OMf77///vvvv08IueWWW37xi1/Mnj3bc/1QaUPbaUBfX5/ZbA4LC1OpVINSHPj7+zMMYzQaCSGTJ0+OiIg4cOBAX1+fXC7fs2fP1KlTvby85s+f/9FHHx06dCg6Onrfvn2EkGXLlkHlzJs3b/v27a+99tqBAwcOHjxICElOTn7kkUd++MMfQvAklgGKl5SUtHfv3t/97nfbt2+Hd0BERUU98MADP//5z6EzNBwdX19f8ehASgRvb29KaU9Pj8vlgpCLDIyCZ4dkU4EPPY+aOIPT6YSdhS828XOWZbVabUhIiOeq4KzgOG7x4sXFxcX/+c9/fvrTn0IY9K9//UvsRw5fvUVFRX/605/y8vKMRqNarYbfYJ6bMBqNlFIfH59B1TIou8K0adOUSuWnn3766KOPwpOHioqK3bt3JyYmQswNiRcqKytra2s9f4DJZDJokr/c6UQI8fHx2bZt229/+9uNGze+/PLLL7/8sp+f39q1a5955hkxH/Cwi4t1yzCM0+l8++23161bd+bMGRgSqlAoxPGanvNDu7LnT1mxzZIQYrVabTabj4+PQqEQJzEMMzSDhOdewEUREBDw3nvveV4UTz/9NGQWM5vNfX19oaGhnmc7/BfOdshY71lI8cIUiwoXvliMQdUCaygpKamsrPRcUC6X+/v7w49/EZyKQUFBt95660cffXT8+PH58+d3d3dv2bIlOjp61qxZhBCTycQwTENDw6uvvip4ZImWSqX+/v4wbMCzwHCGw6VBCJkxY0ZaWtrevXvPnTsHHdIqKysffvhh+IE9hgjMarWaTCaNRgNjsj3bF6B4VyOqg94pcO2LW4TuN5RSmDrKdlme52fNmrVz585XX301Ly/v8OHDhJDExMSHH374Jz/5iXgDETkcDqPRyHEc7K9nI4jn4YC+W54lJITAsGZKaXd3Nywyf/787du3//GPfzxw4MCBAwcIISkpKXBXhF9rg/YCrhFBEF599dWgoCDxxGYYBu72s2bNGv2+I4RuKNfnF/A3ul/AzImJiR9//PG5c+c2b978gx/8wOFwvPbaa2vWrLHb7WTIO9KgGWbQtzUdyDs29B53uU2zLHvx4sU5c+b87W9/mz9/fl5eXllZ2dmzZ48dO6bRaIbmpfqm90H4FtFoNM8++2xVVdWxY8defvnlyZMn79ixIzc39/jx457Jy0ZeOewa9O8ctAloHYTHtVqtdsmSJR0dHZWVlefOnWttbYXuzjNnzpRIJLt372YYZvfu3T4+PjB+C2py8eLF+/fvr6qq+uCDD+68887q6uqf/vSnL7zwAnw3DK2ByZMnb968+dy5c5999tmDDz7Y2dn561//+uGHHxabeQghQ9+eDfGK+BKfkXd8hNqA1iaGYQ4ePNjY2Hjx4sXGxkb4o6WlpaSkBBr8xDXAV+bzzz+fnZ395JNPzpkz5wc/+EFmZuaf//znFStW3HfffTzPSySSw4cP5+TkbNiw4bHHHjt69GhZWdm5c+fg+bhnVESGZBUctJsulysyMvLtt98uLy+Pi4t78MEH16xZM2nSJIZh/va3v0GUrFQqKaV33HFHY2PjpUuXGgfALjz33HPCZfJ8QUmio6PXrVt37ty5bdu2/fjHP5ZIJG+88cZtt91mMpnIQOgAjeXDrsTtdj/wwANPPvmkQqH4+OOPT506VVVVdf78eWgSHpRQ72tPS5Zlh56WlwujycBFoVarxYvid7/7HVwUkB6BDDS7Xu5sh2bmry3kyCVXKpUMwzz44INQ7U1NTZ6HAHroelYF/LqDcXjQFH3gwIHOzs61a9fCcxU4pnPmzBFX5bnCP//5z577MujHCc/zUqn0gQcecDgc0Htk06ZNhBDY3AiVOQLxjuH+6msmPZu0rzi4QAZlsoNPYOoIB2XQsYYSzps3b+/evTU1Nf/+979Xr159/vz5n/3sZzDaeOj8EolEGMjNKhq0v2K3lqGXLRm4d0Ehc3NzxbviqlWrKisrn3jiiRdffPFyuyDe7Z9//vnq6ur8/PyXX3550qRJ27dvz83NLSwshNspHRhB/nV1iRC6UYyDZ0kMw5SXl+fl5UEr1MqVK999993S0tLMzMy8vLyTJ096xnNw+4uIiIDmH/HeBK0Of/rTnx555BFIKHu5MMITy7LQw/jpp5/+8MMPFyxYEBcXZzAY/P39R2geHtbQ7LmweHt7+549e+rr65VKZXZ29vPPP19YWPirX/3KZrN9+OGHownNxZbIwMDAjo6O7u5u3uOdDoQQGF8YFhYG88MAlyNHjkDbM/QLj46OzsrKys/Pr6ioKCkpWbx4sZeXF6XU5XIdO3asoKDA7XYnJiY+8MAD69evP3LkiF6vf//997u7u4fWw8mTJw8ePGiz2aKiolavXr1u3bqTJ0/GxsauX78eHmFDvoL6+nrx6ECBGxsbGYaJiIgY+jx6lMTHqZDNw2Kx6HQ6b29vrwGHDh3asWPH0H5EEHb39PTccsstUqm0uLjY29v7nXfe+eyzz8TGrbfeestut3/yySevvPLKzJkzY2JivLy8PPtME0KCgoIIIRcuXGAGXpEILcrQyA0gT+2BAwcyMjKys7MrKirq6+t/8IMfnDhxYv78+RDWhIWFwQms1Wo9y2+xWLZt21ZZWTnoab4wgBBSVVW1d+9eo9EYGBi4bNmyN998s6ysbPbs2YWFhfn5+cxAR5Ghv1jIwBv1ysvLP/vss8zMzPz8/LVr16ampgYFBUGz9Dc9FgaDwcfHB5Lyih2ixC46Q9GBTt579uypq6tTKBTZ2dnPPfdcYWHhCy+8ABcFIcTX19ff37+lpaWnp8cz7yEdGIssnu1jFh4eTimFQ2AwGMRD0Nvbu23btnPnzpGvhrDQfjx79uyoqKjNmzc7HI4tW7awLHvnnXfCDMHBwTAoVqVS6fV6cYUul2v79u2nTp362sq84447lErl+vXrofd5bGws9ModW9Sr0WhCQkJsNltbW5tYh9D5/kc/+tFvfvMbz5Za4vGWgK9d8wizQQomuEDEO7Pnte+5y+KdAQoGeevEOne5XMePHz927JjL5YqPj7/vvvs+++yzY8eO+fr6/utf/2pvb/d8ZgVdgOB5VHNzs3i98Dzf39//5JNPPvfcc/DgaOQSwlQYauJ5V9ywYcORI0e8vLz++c9/9vT0sF99naEYvnve7WfMmAF3++eff95qtcKJDc3SYs8ThNC4cENH0nRgnMqLL764aNGiyspKQgg0ooSGhsLwfxg+OMi8efMopTCmG9o5IHz57W9/u27dum90k2pvb+c4DsbeiaMb8/LyzGbz0AeIQymVSrErCLR2iOAuX1xcnJub+/vf/54QAoNUWJa99dZbOY6DFsRR0mg0c+bMsdlsO3fuFJveOY47c+bMyZMno6KiIGEWISQ7O9vHx2f9+vUff/xxXFxcYmIifGUuXbq0ra3tD3/4g91uX758OdzT3W73nXfemZuba7PZyEBj0owZM0JDQy0WC3woPluHPx5//PF58+bBuDRoNYyPj09OThYEob+/nxCSk5PDsuy2bducTiccHeijCccL8iqQbzf4Jjc3lw6kLxRL2NbWtnDhwvvuu8+zEY4OvBj8Rz/6UX9///bt2/Py8kpKSvbt2/foo4/C4YOSdHV1iWeC2MF327ZtxCOUmTx5MsdxcHpA50i5XE4p3bhxo7hFjuM2btz40UcfPf/881u3bj158mRRUdGbb74Jqevg0XBcXFxaWlphYWFxcTHxeIXQ3//+99tuu23Lli1QJEgX3dfXB8mA4cR+/fXXFy9eDNlLYLitn5/frFmzGIYxm81Q2vr6+rfeegt+2AxtPu/q6iKEQJ5mCH8JIZcuXYJe2qOJpcjA4fPz88vIyDCZTAcOHICWPzg5t27dCsUe9DMMVl5UVJSbm/vKK6+QIRcFXO8Gg2HOnDkWi2X37t2wQljzqVOnSktLExISkpOTx9ZSK9bDxIkTY2Jijhw5ItYSrPCPf/zjbbfdtnfv3kFVAS3HSqXynnvu6ejoWLdu3b59+yZPngyZVQRBCAkJmTlz5vnz52FZ8Zh+/vnnK1euXLdu3QgBMQSF4eHhy5cvP3369BtvvFFbW3vPPffIZLJBoy9GQ/zBOX/+fErp+vXroS8v1GFJScnf//73/fv3i5EoXAXQu2yEZmOYDQ7W5WaDzEhffvml2LEerv3169dTSnNzc2E2+BlfX18Pt004xAcPHqyvr1coFFAqQRDuueeehQsXwshauDVNmTIlMjLSYrHAgEsRLLJgwQJK6WeffSb+kuQ4rqam5i9/+cuOHTvg/gwl3Lp166ASwt0J8pw6HI4777xzyZIl8JscvhdmzpzpeVckhEDoL9YbIaSgoABGqhBCYCwjx3Ged3uWZTs7O9966y149vJtzmGE0LVDryjxR3xOTo5UKoUkdJ5Z8H79619LpVLIiQspNSil5eXlUqkUwl+Y7aGHHpJKpWIWPPi9vnDhwtraWmghKCwsDAoKUqvV8BIBMeERTHU6nRCQPfjgg7W1tf39/VVVVXfccQch5IknnhDnnzFjhlQqhSxFYi65v/3tb1Kp9JVXXoEVQmq5nJwcGETV2dn5wgsvKJVKpVKZnJwsJlm7++67pVIpvKfAc3/hOe+MGTNOnDhRXV1ts9nEuoIydHR0QIPfl19+CQ/cOzs7IQMavJiAUvrFF19IpVIouZgTqqenJzQ0NCQkRHxdRVlZmUaj0Wq169evt1qtdru9oKBg4sSJZCC5m/he3Lvvvhu+nH72s5/Rgcx3BQUF8BKTwMDA5uZmOpBs60c/+hHUG+TYdrvdkF4DkrgJHum3YJf/9Kc/EULuuuuuxsZGaIzJy8vTarXBwcHd3d0w//e//31CyN13311XV2e1Wtvb26H7e2Zmptls5gdeiiGVSqdMmSK2W1NK7XZ7SkqKTqeDzNxiDt3Vq1dLpVLxzTtGoxGSQ7/66qtGo9HpdF66dAkGfr344ot0SBpsnudffPFFjUYTHBwcGBg4ceLEyZMn5+bm/upXvxKTf/34xz8mhPzoRz8ym81Op/P8+fP33XcfvDfkzjvvpAPtZ5DqYfXq1bW1tRaLpaioaN68efB9DG9OgSMVHx+v0+mCg4OjoqIyMzNnzpz50EMPffbZZ2IyxM8//5wQkpSUVFRUBONKd+7c6evr6+XlBQ8Z4DyHyP7tt9+urKyEOoGn/9OmTYMUuYIglJeXR0dHS6VSSDxst9shEQ0kPPFs0IVqqa2tlclkvr6+R48ehUwmX3zxBXTBl8lk4usnjhw5Mmjf4ZyZNGmSUqkUE43Br4jw8PDdu3dbrdbm5uaXXnqJ4ziZTJaUlDQoC96gi2Lr1q2DLgp4rxCl9OTJk0ql0tvbe9OmTXC2Hzt2LDExkQykfoOzGhL9bt68mXq8u+Rf//qXVCoVX5QDHxYUFEilUujsBEfhn//8J9RSSUkJHIKNGzdqtVp/f3/Pl4MMOv9LS0tVKhVk44b86OIm8vLyWJYNDw8/cOAApJ05evRoZGSkXC4vLi6GlRQXF0ul0qVLl3pWi7hyyFunVCpVKhXkixyaIBzmfPHFF6VS6b/+9S9xd+x2e0JCgl6vh8ILgnDx4kUo5+uvv97V1WW1Wo8cOZKcnEwI+eKLL+hABuiXX36ZELJmzZrTp0+fO3cOKvaRRx6RSqWQ8Admg1HFt99++6lTp2pqaiA74f/8z/9IpVLIJAi/qyHZyIMPPtjQ0GC1WltaWqAzRnZ2dn9/P+zO2bNn5XK5RqP5xz/+cf78+VOnTj3//PMKhQKOeG1tLeypmLCvs7MTbk2ff/45wzDp6emQd1ysQPijo6MjJiaGEPLb3/62vb3darWeOHEiMzOTECJWlCAIa9euJYR873vfG1TCGTNm9Pf3Q/U+9thjhBB4WxBs+qOPPoLrDpLY0IEMpA8++GBJScnZs2d5nodklzqdbtu2bXBiw1uNxFOFDuQc1Gq14mVOEUI3tqsVSUMDG4TCnpElJHyFr0MxkoYnm2lpaeJsEPWK339Op/MnP/kJhP7wOjdCiEqlgqTUg75LoAytra0wqp0QIg4PuuOOO8THwZTS+Ph44vFeA9g0ZBEWv2XNZjMM0mcYBl4pFxwc/Omnn3p5eWk0GshmTSldunQpIQRy1sJ64D5uNpvFRlZCCCSuHvROh507d8L3mcFgiI6Ohqap1atX9/X1wargHv3ggw9Sj0i6u7tbJpNJpVIYpgNzfvnll1A5MpkMhvexLAvvfYDyeK5QLDB80VqtVhjuBg3SEAQLgtDe3g4tMRzHxcTEGAwGQkhMTAwk2PasfNhEf38/BD2EkMjISHg9ga+vL2SkhvIbjUb4uiKEQC9S+CqF70goJLQFRkdHe0bSNpsNVghvjhAjaTjWkAIWvulramomTZpECJFIJPBOE0LI2rVr+/r6PL9ixTWfPn1ar9erVKqMjIzMzMyJEyfCnqpUKjgPz58/D4GaSqXy9/cnhGRlZcGPCs8fgZcuXYLvZjLQq3LGjBmQZ+bxxx8XZ4PfEjExMVOmTElPT4eXp0AgAintKKWvvfYaNOx5e3vrdDo4Q6BtTNzxzz//XKxAyInrdrvhi58QEhoaCu/L4DgOfphB0jfIYBgYGNjS0kK/+m0Nq4X2YEIIDHDkOO6VV16BlygdPnwY5oTeQdDO5xlJQ+8dyA0MOytmDoEKkclkf/7zn9VqtVarvdybWUa4KMTTeOPGjXAyyOVyONulUukf//hH6vFiFDgV4V0qYkT75ptveh4Ot0eyYRj4JVYv/MCDYsCgwMDAwB07dgy97YiXAM/zkIVNqVSKv/fE/7733nvQs8XLywsy+ikUCnjDK1yGkF9v+vTpg6oF/u7v7588eTIhJDc3d9BpLHJ75Ef3fNURDP0khECp4MP8/Hy4DcJxgT9eeOEFcV8opU1NTeITLb1eD1muodcKJIqGg9ja2gplg2sEtgJpHD/++GM68L6b7u5ueO2R57Wfk5MDyeDFmoe34YoYhvn73/8OXysQYvI839XVBX3VWJaNjo6GvYuMjIQfe4MOkHiZw29sz/392c9+BvclOI0vV0JotYE54QEXGehLBveK2NhYeDML1G19fT3cMeA6ggG727Ztg5PW88SG+xIcuPvvv58Q4uPjA2mzMZJG6Mb3/5M8XFlbtmxpa2uDF1/RgQfoDMOcPHny5MmTOTk5YqcChmG6u7s3bNgQHBwMDULQaRVSDoeHhwsDI9OLior2799fV1cnkUhSUlJyc3NjY2PpcOOdxQ/37t175MgReKlETk4ODJkSp27evLm9vX3NmjV6vZ4ODCQ/c+bMsWPHpkyZMmnSJNi02+3euHFjfn6+2+2eNGnS6tWrdTodpCZdvXo1BAf79u2rra1dvnw5dMVjBrJJQAeJ/fv3Q6ftO+64A0api2WGv1tbW3fv3l1SUtLX1xcRETF79mwoKhSgrq4uLy8vJSVl5syZ4rIOhwOaLVevXg0/FWDm9vb27du3l5SUuFyuhISEJUuWJCQkDCpSb2/vhg0bJBLJXXfdpdFoxH0/cODA+fPnp02bBr9qxKNGCNmzZ09+fn5TU5OXl1dmZuYtt9wybNYU8ZMjR44cPnz4woULSqUyLS1tyZIlYWFhg9Z55MiR/fv3NzU1+fv7z5gxY8mSJWI6FIZhTCbTp59+6ufnBz+rYCme57/44ou+vr41a9Z4ljwvL6+urm7FihXBwcHihzabbceOHQUFBb29vWFhYXPnzoVXmnkWGyqtsbFx0qRJkZGRW7ZsgexphBCn0/nvf//7kUcemTNnDiQt6erqWr9+fUlJiVqtnjVr1h133OFwONatWxcdHb1o0SJxuxaLZdOmTcXFxQzDTJ8+Hd5kvmXLlkmTJmVlZTEM8/TTT//pT3/6+OOP165dK5bk3Llz9913X1FR0cGDB+fMmeN2u+GFHbt27Tp79qxEIklNTV22bJlYjeKO1NTUFBcXWyyWpKSknJwc+BA6qJw7d45l2YSEhNzc3MTERLjmWZbt7u6eO3cuPKuB/ieDesQyDHPkyJFdu3ZBM97KlSuTkpKOHj1aVVW1cuVKCAVaW1u3bt0q7jszkLhw48aNvb29a9as0el0Yp0cOXIkLy+vra0tJibmrrvuio6O/s9//sMwDLzSaNjrt62tbdeuXaWlpWaz2fOigKlw4FpbW7dv315aWsrzfGJi4tKlSyHdnniaHT16tLKycvHixVFRUWJhampqDh06NGnSpClTpogftre3b968OSIiQvzpC2soLS3dvXt3bW2tXC5PT09ftmxZUFCQcJl0GWLlFxcXh4SEQJwnEi/nHTt2VFRUMAyTlJQklhmK0dHRsWnTprCwsKHLwrjDhx9++P333//www/vv/9+SDszbBmKi4tPnTo1Z86chIQE2C7P8/C0as2aNZAHEE4Gk8m0Y8eO4uLi/v7+CRMmLF68GHqkeJ5jZrP54MGDra2tarV61apVSqXy0KFDNTU1S5YsiYiI8DzzDxw40NLSolQqV61apVarof4XLVoUHR0tzkYIOXjw4MGDB5ubmwMCAmbNmgUZvgfdH3bt2nX48GGTyRQdHX377bfHxMRs3bq1ra3t7rvv1mq14iHIy8vLz89vbGzUarWZmZlLly719fUd9nsBFrFarTt27Dhx4oTZbI6JiVm4cKH409fz5iCWMDAwcNasWZBu3LOElNI9e/YcO3asqalJr9dPnjx5yZIl4l1RvNMePHiwo6NDp9OtWrUKuou0trbCiW2xWODEhi4lUDyj0bhp06bU1FR4weq36eSGELo2rlYkfcWNcGcc/fyet/KrtN3RLD6atY2hqJdb1ehLO6zL7cIIwQQZruTCV9+H8rXzfEujrFWIRT766KP777//nXfeefTRR6HjKaWU47j+/v6AgIC4uDh4bDKaw/G1u+B0OuGlkhcvXoQCMAMZx37xi1/88Y9/3LZt27Jlyzw/H2H9oz8/4UOY//jx43PmzPn1r3/93HPPjRCNfaP9GsEY1jaaw3c1zvbRrO3bXP7fcoXt7e3QpFpZWXm5ePEbGU15RrmV0c9GRnHtj6bqvumt6XKThm76a0s4mk2P/toc290eIXSDuFr5pOFR2tCUc/BEkmXZQXcuiBvEb/Shi0N7pGdLyaCVDMIMJEb1/HBQxDBsIQeVcNB66MD7VmDMmZhl4nL7Ky5OB0abDb1dQnwjjhyCfRyUYAty3w7a30FlGHZVw9YSHXiZyKDywF4MWmRoTVKPl84MBSsc+WCJ8wyqh5HPCs+9Hrbknh8OqgowNGSEqRMmTCCEvPPOO9OnTxcf/jY1Nf3mN7/p7+9fuXIlnH7QsOe5CTIQCotrHnoIYJIYGUPr8s6dO//6178+9NBD8PjY4XDk5eW9/fbbwcHBMJQWRjrC2TjC0RTnIQP5vMjAALWhC8KJXV5ePn/+/LVr18KbsYc9jozHG1jExQdV8ugP0LBrG3r2evrai2LYeYbWz2iucbFgQ3dnaE2OJkOcuMjQ8+1yh2bkYhBCamtr29vbX3vttY6OjmeffdbX13fYn0Aj7+PY7hiDbmKw+LC36FHORr7u2iejOwOHvdWMfIBGub9fW8LR3BWZgfQj8E+okDHf7RFCN6xx0yaN0FUCl8Crr776q1/9iuf5sLAwuVzucrlaWlpcLtcjjzzyxhtvQP+Zb99oBMFNRUXFPffcU15ertfr4TUuRqOxq6srOjr6gw8+gLc/Xr3v0d7e3pKSkkE9ndANC47RrFmzIJ/DzJkzN2/ebDAYGIbBY4cQQtcdRtII/dfZs2d3795dVVVltVqVSmVcXNyiRYvEJuorBQIjm822d+/e/Pz8jo4OQkhAQMCMGTMWLFgAHVivQYSED5THCzgftmzZcvHixdjY2Pnz58OLzfHYIYTQjQAjaYQI+ebdvq/GtkaedKXAU2l89cP4hWE0QgjdODCSRui/xA7HYqRy9XorUo/8VmTg7dDswFtgEBpE7A2MJwlCCN1QMJJGCCGEEEJoLHB0MEIIIYQQQmOBkTRCCCGEEEJjgZE0QgghhBBCY4GRNEIIIYQQQmOBkTRCCCGEEEJjgZE0QgghhBBCY4GRNEIIIYQQQmOBkTRCCCGEEEJjgZE0QgghhBBCY4GRNELoerqCr1l1uVz9/f0ul+tKrfDGRCm12WxWqxVebj9KTqezv7/f7XZ/m+2OeVmEELpZSa53ARBC3y2UUoZhXC5XR0fHhQsX2traFApFeHh4WFiYXq9nGGbMay4uLj527NjSpUuTk5NhK9++qISQb7+eKwV2qre398svv+Q47s4771QoFKNc8NChQ2fPnl2yZElsbOwYKqevr6+oqEitVk+aNEkmk42p+FeSGNbfOEcHIfTdhJE0Quiaglhw586dFRUVFovF6XQyDKNUKgMCAubMmTNt2rQxr7m9vb2srCw7O3vYqV8bPnrOAH+bTKZDhw7J5fL58+dLpdJvE7TBCr9RGYZOgj/sdvvZs2dlMtmwjcRut7u0tPT8+fPTp0+PiIgghMB2Gxsbq6qqZs6cOfJWLldUs9m8b9++gICAlJQU2PSguvra3ScjRr2jKZLnJ0aj8eDBgzqdLjs7W6lUjrxyhBC6ejCSRghdU319fXl5eYcOHQoLC1uyZInBYLDb7dXV1SdOnNi9e7fBYIiLixvbmiUSiVKp5Dhu2KlfG2kNncFut586dUqtVs+ZM+dbNsTCysdQhqGTGIaRyWQjlKehoeHEiRNxcXGRkZHih1KpVC6XQ+Ww7Ej9+oYtqkajmT9/vkajkUqlg6aOJoT9Njs+7FS73X7y5MnAwMCsrCyVSvW1BUAIoasEI2mE0LXD83xVVdXhw4djYmLuv//+4OBg+DwxMdFgMBw8eLCurm7ChAli5MTzvNPpdDgcUqlUoVCwLOsZVAmC4HK5xKmUUkEQBrXUCoLgcDhcLpdcLocoUBAEjuPE9QiC4Ha7HQ4Hx3EymQwmwapcLpcgCDzPOxwOiUTCMIw4lVJqt9thtTKZbITYFFYFMysUCqlUKsb6lFKe52Gn3G631WqVSCRyuVwikQxdg8PhcLvdarWaDhi2el0ul9vtppS6XC6Xy0Up9SwzrKq/v59SCoUZFKRCbdhsNqgusagajSY7O5tlWahDnudhzbALDMMMKvOgY+R0OlmWVSgUg37nwOJWqxWeS3geF3HHbTYbpRSqhWVZWAQOjXhwCSFwgC53FBBC6CrBSBohdC3AA3q73V5bW8txXE5ODoTREBGq1erZs2fHx8d7eXlBPEQpbWpqKiwsvHjxosViYRgmLCwsJycnKiqKEMIwTE9PT3FxcWVlpcViUalUKSkp3d3dEDLCFgVBaGlpKSwsrKurs9vt3t7e8fHxlNK2trbc3NyAgABCiMViKSwsrK6u7unpkUgkYWFhkydPTkhIMBqN+/fvP3/+fGtrq0wme//99wkhQUFBs2fPDgkJuXjx4smTJ6FgGo0mOjo6KysrJCRkaCQnCEJ1dfWpU6daWlrsdrtEIklMTJw1a5afnx/DMH19fTt37lSr1Tqd7uzZs62trZTS8PDwnJycyMhICDrdbndtbW1BQUFLS4sgCOHh4QEBAW63e2ibtNPpPHXq1LFjx1paWsxm886dOwsLCwkhM2bMmDx5MlRLfX19TU1NbW2tw+Hw9vbOzs5OS0uDztaU0s7OzsLCwnPnzvX39ysUioiIiFmzZgUFBUFRt2zZ4uPjs2jRIo7jDh061NzcPGXKlLq6urNnzxoMhnvvvVcqlYrdMOCPrq6uwsLC2tpao9Eol8tjYmKysrIiIiJgHrvdXlZWdvr06e7ubkKIn5/fpEmTUlJSlEolpdTtdldWVpaXl7e0tDidToPBkJiYmJmZKQjCjh07Lly40NXV1dfX98EHH8hkMh8fn+XLl+v1+ivSPx4hhEYPI2mE0LVjt9ubm5u1Wm1cXBwEPWLco9VqtVotGQjCOjs7d+3aVV1dHRQU5O/v39TUlJ+f39fXd/vtt4eGhkIXkSNHjnh5eYWGhjocjsOHDxuNRofDIa6ws7Pzyy+/PHPmTHBwcGBgICxit9sZhpk9ezYhpL+/f//+/fv27ZNIJOHh4TabraCg4NKlS3fddZfBYIAWaGhshmZj6D3S0tKydevWCxcuhIeHBwcH9/T0wKZXrFjh4+MzaH/PnTu3bdu2rq6usLAwjUZTV1e3Z88el8u1ZMkSLy8vp9NZUVHR29urUqn0er3BYGhoaMjPz+/u7r733nuDgoJ4nq+trf3ss886Ojqio6PVanVjY2NpaWlXV9fl+sBAsy4UHoj9Mbq7u6FvcUBAQHd395kzZ5qbmzmOy8zMJIT09vZu3769qKgIqrSrq+vw4cOdnZ233357cHCwzWYrLy8PCQmZP38+pfTixYvFxcXNzc19fX1SqVTccc8wuru7e+fOncXFxb6+vgEBASaT6ejRo11dXbfffntAQIDT6Tx+/Pj27ds5jouMjKSU1tbW1tXV9ff3z5gxQyqVVlRUbNmyxW63h4eHcxzX3t6+c+dOl8uVkpIC+yXupkQigecVV/psRQihr4eRNELo2rHb7R0dHTqdTqvVQtTlcrl6e3sdDgchBB7i6/V6uVzO87xSqZwzZ86MGTNkMpnZbN68eXNRUVFaWlpoaGhVVVVBQUFUVNSKFSsCAgJcLldJScnWrVvFSFoQhKKiorKysoyMjKVLl3p7e/f39x86dGjPnj1i1NXY2AiR5X333RcUFORyuU6ePLlp06ajR4+uXbs2Nze3sbHxww8/VCqVd955J/Q90Ov1J06cOHXq1IwZM+666y6pVGq1WgsLC51O5+W6W/j6+mZnZ6empjIM097evm7dumPHjmVlZXl5ecEu9/b2Tpo0af78+d7e3l1dXRs3bqyvrz979mxAQIDNZjt8+HB7e/uKFSsmT54sl8s7Ojp2797d1NQ0dFsSiSQ1NTU6Onr37t1FRUWzZ89OT0+nlKpUKkEQWJa1Wq1xcXHLli2Ljo52Op0HDhzIy8srLy+PjY3V6/WnT58+efJkampqbm6ur6+vxWLZs2fP6dOnq6qq/P39mQGwLZZlHQ5HX1/f8uXLIyMjlUqlZ+8OhmGgMf748eMpKSkrV67UaDQul+v48eOtra39/f2EkIsXLx44cECtVt9xxx0QSdfX12/ZsuXgwYOhoaGxsbFlZWUXLly45557YAhpd3f3sWPHpFKpl5fXLbfc0tzc/MEHH/j5+d12223e3t4cx2k0GoLjDhFC1xxG0giha4fnebvd7uPjI7YgdnV1bdmyBUJDh8MRFRV12223BQcH+/r6Llu2jOM4lUrldrt9fHwCAwOtVmtvby8hpLGx0WazTZkyRWyazcnJqaioaGtrg1iK5/m6ujq32z1t2rTw8HBCiFarzc7OLigogGZpQRAuXrxot9unTJkSHh4ukUjUanV8fLxOp7t06ZLT6fTy8vLx8YGBer6+vmK+OYlEIpVKoS+yRCIxGAzz5s3jeV6tVg/d39jY2KCgIOiRzPN8QECAwWBobGyEaBJoNJo5c+bA6ECdTjdx4sTGxsbW1lae5202W11dXUhIyMyZM6HBXqfTzZo1q7i4eGjgzrKsWq1WqVQqlYrjOC8vL29vb7HaCSEymSwzM3PSpEnwYUZGRmlpaUdHR39/v16vr6+v7+/vT09PDw8Ph58xKSkpZ86cqa+vnzp1qme3GUKIIAgKhSIrK2vy5MnDjn20Wq319fXQaSc0NBQ+XLRoUX9/P4S8ra2t7e3tt9xyS3p6OkydNGlSQ0NDXl5eW1tbbGws9OF2u93QOTs0NHT58uVwPkA3IehT7uvrq9PpvvbEQwihqwQjaYTQtcNxnFwudzgcPM9DK6ZcLo+KioI249LS0paWFnEAmcPhKCsru3Tpkslk6u/v7+3tZRgGgsLe3l6FQmEwGMSxdzKZLCAgQKlUwvtKrFZrX1+fr6+vn58fDFxjWdbHxycgIKCpqUlMaO1yuWpqatatWwcNrjabra+vD5rAdTqd2+2GIYwQN1NKWZYNDg5OTU0tLy9vbW318vLy9fVNSEhITk4etncBx3G9vb0wc19fn9VqhVgf3pACvSACAgK8vLxg/QzDhIaGwr4LgmCxWKxWa0xMjDiekmVZg8EAnU+GrWEoKhkYFAhlhjhYp9P5+/vDJJZl/fz8vL29bTYbDOs0mUwOh+PIkSPl5eVQyVar1WazmUwmGC8obgJ+hygUiqioKOgbTTwag2EvrFZre3u7TqcLDAwUQ3ClUgkZ6wghPT09LMtCb3U4ZCzLQuM3/FhKSUmprq7euXPnyZMnvby8AgIC0tLSYmJiYF/EI+J5aL7lyYkQQmOAkTRC6NqRyWR6vd5qtZpMJn9/f0qpXq+fO3cupbS7u7uvr89oNMKc9fX1GzZsaGpqMhgMQUFB4eHh7e3tHR0dYpgodmIWYzjI7SBua9jBZ2JURymFriAajUbsmaDRaNLS0nx9feVyuWd/Bs+ODYGBgWvXri0pKamoqOjp6WlqaiouLk5LS1uxYkVQUNCgjRYVFe3YsaOvry8gIMDf3z8mJobjuLq6OnEGhmGgkVXcEGQC8YyGYeih5zzQ9XlYnsnyBuXBgKZxMtDtGxKVwIbcbrfb7VYqlWKmPIZhdDpdRkZGcHCwTCazWq2DNiQ2Dw9bDMiqIeb0GFQS+C3BMAzsqdi7WkxjQghJSkr6/ve/X1xcXFdX19raWldXd+LEiXnz5i1YsECtVl9uNxFC6BrDSBohdO3I5fKQkJBTp06VlZUtWLAAwiC5XE4IgVzFENhRSs+ePVtfX7948eKcnBy5XK5QKLZv326z2SBs0ul0dru9v7/fM4rq6emx2WwQTCuVSp1Od/Hixa6urpCQEIgOu7u7Ozs7ocEVetYqlcrs7GyxgwEhBBrLoUiDiAGfwWCYP39+dna2y+Uym83bt28/ffp0UlJSUFCQ5/zQpt7X13fXXXclJydDiNzQ0OBwOAY1oA4b8UOUL5fLjUYjtNpCddnt9p6eHugjMSzPnhieax4h6FQoFHK5XKPRLFq0KDo6GsJcaOuFcZaQPmXQUiM0A8tkMi8vr9bWVpPJZDAYxJ0SBAGKodfreZ43m82Mx1tgzGazIAh6vR7mDw8PDw0NtdvtTqezpaXl888/P3r0aEZGBqQCJB4/ljCYRghdL/g4DCF0LUDApNVqIVPEwYMHq6qqbDYbJBu22Ww9PT0WiwWSEzudzs7OTo7jAgMDfX19tVpte3t7fX09xGGEEGj9rays7O3thezCNTU1Fy9eFCNIyAjBcVxhYWF7e7vD4TCZTIWFhb29vdAOCjnveJ4/e/YsxKxqtdpqtVZWVvb19UHkDePbzGZzc3Ozy+XieR5Ss23YsOH8+fNqtdpgMERGRsbGxtpsNkjl5slkMvX29mo0mqCgIG9vb6VSWV9fL/bkFqtlaBQofqhWqwMDAy9dunT27Fmn0ykIgslkKi8vHzauJYTAfmk0mv7+/ra2Nkgp7RmFX25DLMsGBQVZrdbW1lapVKrValUqldForKyshG7lg9YwQlAOn6tUqrCwsK6urpMnT0ImbJfLVVlZuW/fvs7OTkJIQECAWq0+c+ZMe3s7VGxLS0tVVZVarfb39xcE4dChQ1u2bOnu7tbpdL6+vvHx8cHBwd3d3RaLBWpGKpW2t7ebzWae56Hx/mtOQYQQugqwTRohdI1AjBUTEzNv3rx9+/a9//77EyZMCAgIEAShqamptbW1q6srISFBKpXKZDI/Pz+73X7w4EGz2SyVSisrKxsaGuRyOTz6T0hISExMPH78uNlsjouLs1qtZ86c6ezslEgkYpPz9OnTL126VFJS0tnZ6efn19fX19raCgnUICKPj4+PiYk5efKky+WCaLimpqarq2vhwoV+fn4cx+l0uqSkpK1bt27cuHHChAn+/v5JSUkul+vUqVOVlZXp6el6vb6vr+/UqVN6vR66/HrSaDQ+Pj61tbU7d+5MSkpyOBynT582m81iIclAtwrPpaDvL7wRRqVSzZgx47PPPvvoo4+ysrJgNGRVVRX0VL5cPYeEhHh7ex8+fLivr0+n08XFxcXGxkK0OijchK3DqrKyss6dO7d58+bW1tbQ0FCj0VhWVgbvgoH+6J5FHXZtnrugUChSUlJKS0sPHz7c29sbEREB/cU1Gk1kZGRAQEBkZOT06dMPHDiwbt26iRMnUkrPnDnT2Ng4d+7c8PBwSqnFYjl48GBDQ0NCQoJCoWhrazt//nxYWBiMvFSr1TExMcePH9+wYUNMTIy3t3dmZuYI7fQIIXSVYCSNELqm1Gr14sWLAwICDh8+DCmE4a11SqUyJydnxowZ8NaSjIwMo9FYXV194MABSN0wd+7cM2fOwKuhAwICbrnlFrlcXldX19TUBAmh4+LiqqurxVQS/v7+t99+e2Fh4fnz5xsbG319fefNm1dUVAQvcCGEeHt733777Xv27Kmtra2vryeEKJXKKVOmTJo0CToQy2SySZMmtbW1wbtRwsLCgoKCkpKSbrnllvz8/JMnT0K8K5VKFy5cmJKSQr7azUClUs2ePRuyLzc1Ncnl8ujo6NjY2KqqKrG/so+Pz6BcyDKZDPJRMAzDcVx6errT6Tx8+HBRURHLsl5eXlOmTOnq6hq2ZwVsPTIycvHixcePHz916hTLskqlcsKECVqt1tfX1zPPBsMw3t7eMpkMChMZGXn77bfv3r27rKysoqJCEASdTpeTkxMdHU0IkUgkvr6+er0eWqOhnfhy3bXFYtx555179+6Fd8EQQnx8fGbOnAlZSjQazYIFCyQSyalTpw4fPgw7vnDhwtmzZ0NAnJOT43K5zpw5k5+fD/VsMBgWL14cGBgI5Zk5c6bL5YK35wQGBiYnJ2s0GnwzC0LoGmPwiRhC6Npzu90Wi8VisfT29gqC4O3trVarIbcDM5AQ2uFwGI3G3t5eaKKWyWQ2m00mk0GkJQgCJMUzmUzQ+stxnM1mU6vV4kv77HY7BGHwpm6z2fyXv/zF4XA8++yzAQEBcPeDYY5GoxFelafVasWEd+JW+vr6YNM+Pj5KpdLlcvX19ZnNZqPRCInwoPPGsLtps9l6e3v7+vo0Go2vry/DMDabDTpAix2FdTqdGBlD32vYTWYg5bbZbO7u7na73b6+vhqNBhqSxfdBDkIpdTqdZrO5v79fEASDwQBt506nE7Yr7prZbIYuNzAukOd5i8XS3d1tNps1Go23t7eXlxdMcrvdJpNJIpFAyrm+vj6Xy6XT6UYY+yhuwmQymUwmtVrt4+MDvTLEGex2O+wawzAGg0Gn03lWvs1mgzOkv79fpVLBjoiLw2vGzWaz0+mUSCSBgYEjFwYhhK4GjKQRQjcnePtgUFBQcnKyXC632WyFhYXQT+Phhx++Im+WxhbQawPrGSF0w8LeHQih68MzD/GwuSbEGQb94BfnGXYGcarZbD579uzhw4cjIiIMBkNTU1NjY6NGo5k9ezb0tfXc9NC/BxXjcpu+3FIj78Ww6xm2ZsRPBlXUCFscVOyv3bUR6nzYSV+7y8Nu4msLMMIWxRlGc2gQQuhawjZphNDNyel0nj9//sSJE42NjXa7nVIaEBAwa9as1NTUYZPcIYQQQt8URtIIoZsWpRSy7NntdkgJIpfL8WV4CCGErhSMpBFCCCGEEBoL7CeNELqZXa6PNUIIIfTtYZs0QgghhBBCY4H9BRFCCCGEEBoLjKQRQgghhBAaC4ykEUIIIYQQGotxPOKQUgoprjiO4zhu9Bn7YZKY5H/oVM8XBFzuZbxkyFsDRrl+hBBCCCF0cxivbdIQp/I8X1NT09XVNTRaZTwMO4ll2ctNZVn2clPFTXvOdrmpl1sDQgghhBC6CYzX3B2U0p6entra2kOHDk2YMGH69Om+vr5SqVScajKZOjo6CCE6nS4gIMDz3bwmk6m9vd3hcMjlcj8/P71eL76pwel0dnV19fT0UErVanVQUJBSqRy6davV2tnZaTabWZb18vIKCAgQN00IMZvNnZ2d/f39UqnUYDD4+fnhmyAQQgghhG4+4zKSdrvdNTU1Bw4cuHjxoslkUqvVYWFhWVlZU6dOVSgUhBCe50+fPr1r166urq5JkyY98MADhBCGYQRBaGho2L9/f11dHXQICQkJmTNnTlxcHMuyNputuLg4Pz/faDRyHMeybEZGxrx583x8fDy33tvbe+jQoZKSErvdzrKsUqmcOXNmdna2UqmklLa2tu7fv7+mpsblcrEsq9fr58+fn5GRIZGM4440CCGEEEJoqHEW3kHfie7u7s2bNzudzszMzIaGBj8/v76+vp07d+p0urS0NGgADg0NnTp16r59+4xGo7h4T0/P7t276+vr58yZExYW1tbWVlRUVFlZGRkZqVQqS0tLt23b5u/vv3LlSrlcXl5efvDgQYlEsmTJEoVCIfYnOXjw4KFDhyZOnJiWluZyuYqKirZu3apWq6dOnWq1Wnfs2FFZWTl16tQJEyaYzeb8/PwvvvhCr9dPmDDhutUaQgghhBC6CsZZJA26urpMJtP8+fPT0tJ27NiRmpoaGhp67NgxsX2dZdmgoCCZTFZeXu45/u/ChQsNDQ1z5sxZtmwZISQtLS0mJkYQBIlEYrPZKioqZDLZbbfdNmHCBEppQkKCxWIpKyubNGlSREQErNloNJaVlUVERNx2222+vr6EkPDw8Pfee6+4uDgtLa29vb2mpiY9Pf3WW29VqVSUUl9f3w8//PDkyZMxMTHYxwMhhBBC6GYyziJp6O4MIanb7RYEATppBAcH33HHHTDOb4TFOzs7ZTJZeHh4a2ur2WxWKBSBgYFqtZphmI6Ojs7OzpiYmJCQEJhZo9HExsYeOHDAaDRCJA2dN/r7+6dPn67X62E2X1/f8PDwCxcu9Pf3t7S0EEImTpwotmEHBwcHBAQ0NjYKgoCRNEIIIYTQzWScRdIQngYGBgYGBh4/fryrq6u1tRV6d+h0uhEWhE7S/f39PM+XlpZ2dHRYLBaWZePi4mbMmBEWFtbf32+1WvV6PbQlQ8ju7e1NKbVareJ6TCaT2+328/OTSCQwG8dxBoPh7NmzNpvNZDKxLOvr68uyLLSFy+VyvV7f0tLidruxqzRCCCGE0M1knMV2EOBqtdrc3NzDhw9XVFSYTCaj0djV1ZWZmRkbG6tSqS63oNPpFAShqamJEBITExMVFdXS0nLs2DGr1XrHHXe4XC632y2Xyz2XksvlHMc5nU7xE4fDIQiC52wMw8jlckqp3W632WzwT3Eqx3FyudztdrtcLhgNiRBCCCGEbg7jLJIGHMclJSWFh4eXl5fv379fIpHU1dXV1dWtWrUqLS1thBTOlFKWZadOnbp8+XKO4/r7+z/55JPKysrJkycrFAoYUOg5P8/zsIjnpgkhgiAMmg0mcRxHKfVcCfxzzImlXS6X3W53u91jWBYhhBBC6LtJIpGoVCoI267uhq72Bq4GCEx1Ol18fHxNTc3EiRNVKtXGjRvLyspSUlKG7URBKYXEdkFBQRMmTGBZlud5tVo9YcKEysrK3t7emJgYmUxms9l4nhf7ZthsNkoptCXDJ0qlUiKR9Pf3C4LAMAwEyjabjeM4pVKpVqsJIf39/eKLEl0ul81mk8lkMplsDHsqCILD4XC5XPiGF4QQQgih0aCUymQy6Flwtbc1/iJpQRBaW1tbW1uTk5MhlpVIJOHh4Vqttq2tbYT02BKJRK1WQ3OyODYRAmJ4wYpGo2lpaenr6xNHE7a1tbEsq9FoyMAwR39/f47jLl68mJmZCcExlEetVqtUKn9/f5fL1djYGBcXB7Gv3W7v6Ojw9fUd27GUy+WDOpwghBBCCKEbxLjMJtHW1rZhw4a9e/e2tra6XC6TyVRRUdHW1ub5LkMwqFtFcHCwIAjHjh1rb2+3WCwNDQ1FRUVKpTIwMFCj0URGRl64cOH48eNGo9FisZSXl58+fTokJMTPz08QhOrq6urqar1eHxwcfPr06bKysr6+PqPReOLEibq6uvj4eLVaHRwcrNVqCwoKampq+vv7u7q68vPzu7q6kpOTMXEHQgghhNBNZvy1SbMsGxMTM3HixMLCwrKyMqPR2NraCgk9srOzPZt+KaUwylD8JDY2dsaMGfn5+f/85z8NBkNra6vD4Zg7d25ISAjDMFOnTm1ra8vLy6upqVEqlfX19Uqlcs6cOT4+Pna7fevWrW63+7HHHlu0aNGGDRu++OKLsLAwt9vd0NAQFRU1Y8YMiURiMBgWLFiwZ8+e//znP8HBwRaLpampKTMzMzMzE7tnIIQQQgjdZMbl28IppT09PVVVVWfPnq2srPT19U1LS0tMTIyIiIBO0pCfzmq1FhUVMQwze/Zs6AfCMExvb29paWldXZ3T6dRqtfHx8cnJyWq1GqZeunSptLS0paWFUqrX69PT02NjY6VSqcvlOnLkiNvtnjFjhkajqaysrKio6O7u5jguICAgMzMzJCQEWp2dTmd5eXlNTY3JZJLJZGFhYZMnT4Z3uCCEEEIIoZvJuIykyUCs3NPTs23btsTExClTpnyjxQVBcDqdcrl82KZil8vF8/zlktaJ2abtdjvLspcbSmiz2aRSKeaQRgghhBC6WY3XSJoQAimcL1265O3tDS9DGXYeMpCF2vND+OQbTfX856C/h65hhKkIIYQQQujmMI4jaYQQQgghhK6jcZ9QAn8JIIQQQgih62LcR9LYdwIhhBBCCF0X4z6SRgghhBBC6LrASBohhBBCCKGxwEgaIYQQQgihscBIGiGEEEIIobHASBohhBBCCKGxwEgaIYQQQgihscBIGiGEEEIIobGQXO8CIIQQQgjdtPANcsNgyE3zNhCMpBFCCCGErhZ8g9zNDSNphBBCCKErjFLKMMy5lq4/bj3icLtZhsHGaUIIwxC3QH01yl/eNifIW0vpuP+lgZE0QgghhNAVBjFip7n/i4Iz/U6XhGUEjKQJYRnG4eYjfPU/zJ0e5K2lhI73jh4YSSOEEEIIXRUSltGp5BIJy7HYJk0IIQzDON28Riljx3tb9ACMpBFCCCGErgpKCC9QXqAEhx4SQghhGMILVLiJ2ucxCx5CCCGEEEJjgZE0QgghhBBCY4GRNEIIIYQQQmOBkTRCCCGEEEJjgZE0QgghhBBCY4GRNEIIIYQQQmMxvrPgUUrhHUKEEGZIYkJK6bCfD5pn0AzUI0vNCMuOPNsoV4IQQgghhMav8d0m7Xa7GxsbjUbjsNEqwzBfG8UOnYHxMPKCI8w2ypUghBBCCKHxa7y2SVNK3W53a2vroUOHJkyYkJGRIZfLWfb//zBwuVwOh4MQIpFIFArFsCtxOp1ut1smk0kk/60HQRCcTqfT6aSUSqVSuVzOcdzQBXmedzgcLpeLYRipVKpQKDwjZrfb7XA43G43y7IymUwul1/JPUcIIYQQQjeGcRlJU0o7OjoKCgqqqqouXrx47ty5S5cuZWRkxMTEQEwsCEJNTc2hQ4esVmtycvItt9wytG3Y6XQeOXKkoaFh3rx50dHRlFJBEM6fP19UVNTU1EQI8fLymjx5cmpqqlKp9FzQ4XCUlZWdPn26u7ub4zh/f//p06dPmDABNm2xWIqLiysqKsxms1QqDQ0NnTlzZlhYGDZOI4QQQgjdZMZZ7w7of2wymTZv3lxQUCCTyQwGg1qtrq6u3rRpU2NjI8xAKVUoFFqttrGxsa6ubtiVlJeX79y5s7q62mQyEUIYhjl37tymTZtqamrCw8Pj4uKMRuP69etPnjzpdrvFpQRBKCoq2rx5c1dXV2xsbFhYWH19/aefflpfX08IcTgcBw4c2Llzp9PpjI+P9/PzKykp+fzzzzs6Oq5tPSGEEEIIoatuXLZJd3R0nD9/fvLkyZmZmQcOHEhKStJoNAcOHGhubo6IiIDeyVFRUUqlsrW1VRAEz2VhiOGlS5fy8/ONRmNAQACEyC6X6/Tp052dnatWrcrMzCSEZGVlffjhhwUFBRMmTAgMDITFrVbr8ePHpVLpnXfeGRERIQhCYmLixx9/fPz48cjIyJ6enoKCgqCgoNWrV/v6+rpcrhMnTmzduvXkyZNLlizx7HyCEEIIIYTGu3EW20EfCbvdzjBMYGCgl5eXTCZTKpVpaWnf//73MzIyIFplGEYikajVaqlUOnQNVqv10KFDDodj+vTpYifmnp6epqammJiYiRMnKhQKhUIRERGRkpLS2dnZ1dUF81BKW1tbu7u709PTo6OjFQqFSqVKSEiIioq6cOGCxWJpamqy2+1ZWVnBwcFyuVyj0aSmpvr7+58/f35QQI8QQgghhMa7cdYmDS3K/v7+KpWqsLCQUmqz2axWKyHEYDAMmpnnec9sdGQga15RUVFVVdWKFSvcbjd0iSaE9PX19ff3R0VFaTQaMX2en5+fIAiwftDT0+NyuUJCQiQSCcwGXaUvXrxot9uh53RwcDAhRBAEhmHkcrnBYOju7sZIGiGEEPruuFyKXgRumvoZZ5E01Li3t3dOTs6+ffs2btzocrlMJhOlNDU11cvLyzPVxtBE0dAZ+siRIwkJCVOmTDl69CgZCK/tdrvD4VAqlSzLikmmVSoVy7J2u11cidVq5XlepVIxDAOzsSyrUqkgpu/v74d/iucHNI23tbU5nU6ZTPZN91cQhKG/BxBCCCF0gxMEKpWwLpfzehfkRkQpdTqdbpfLLQjs1QmmIQa7BpH6OIukgUwmmzFjRlRU1MmTJ4uKijo7O7/88svTp0+vXLkyKipq2FqDqLe3tzc/P1+r1ebm5kokEolEwrKsVCr9prmfL7eJy80PwfooV+7J6XQajUan03kT/GhDCCGEvjsEStVyaXd3L6WUIYRgm9gAhhBBELq7u7qlgt3lvhqRNGSe8PLyugaZiMdlJM0wjEKhiI6OlsvlPT09MTExhJADBw6UlpZGREQMmwGaEOJyuQoKCs6cOTNz5ky3293S0tLV1WW329va2qKioqRSqVQqdTgcYvYPQojdbhcEAQ4DfKJQKDiOs9ls4moFQYB+20qlUqlUQvM2TIKk13a7XSKRjKFBmhAil8t9fX2xTRohhBAaXwSByqQSn277f9vCGAym/z+WZX19/fz8fV08f/XapC8XEF5Z4y+SppT29/dbLBY/Pz+5XC6RSAwGQ2RkZHl5eUNDgyAIw1YcNEg3Nzc7HI7i4uLy8nKGYUwmk8lk2rFjh0QiSUhIUKlUXV1dVqtVrVaLbdgsy0I+aRjL6O3tzXFce3s7z/OwIUEQenp6lEqlQqEwGAw8z3d0dERGRsKV43K5ent7tVqt+PKXbwSeTXyr+kIIIYTQNUcpYRgilUoIhtBfRQmBF9uxHCfjuPH+zH38RWmU0nPnzuXl5c2fP9/Pzw9eN9jd3Q0p7QbN7NlnQ6VSTZkyJSYmBvpaSKXSqqqqs2fPpqSkxMTE+Pr6BgQEVFdX19TUpKamEkK6u7srKyv1ej2MZezu7oYxiHq9vqysLCUlJTQ0VBCECxcu1NXVxcTEqNXq4OBgjuNOnToVHR1tMBhcLte5c+fa2tpycnKwewZCCCH03QFNcvhU+XLELgDjPUAaf5E0y7L+/v4cx+3YsSM4OLilpcXlcjmdTkLI5MmTPRukKaUul0s8idVq9cSJEyGlBqWU4ziXy9XU1JSWlhYREUEImTRpUkNDw5dfftnY2KhUKisqKtra2pYtW+bv7+9wODZu3Mjz/F133TV9+vSdO3d+8cUXSUlJbrf79OnTHMdlZ2crFAo/P7/JkycXFRVt2LAhJibGbDafPHkyICAgKysLk0kjhBBCCN1kxl8kTQgJCgpauXLl8ePH6+vr29raLBZLVFRUbm5uWlqaZ8AKL+sWY2ton/acAbqF6HQ6QgilNCUlxe12Hz169OTJk5DAbunSpVOmTJHJZNDXmVLKsuzs2bN5nj916tTx48cZhtHr9XPmzImPjyeEKJXKxYsXS6XSysrKlpYWhmHCw8MXLFgQEhJybWsIIYQQQghddeP1uQOllOf55ubmL7/8MiEhYebMmXK5fFC7Lwz4I4QMfT8LPE3geR66O4vRtiAILpfLaDTyPO/l5QVJ8WASNHtDog/oUmI0GiUSiU6nk8vlns8meJ63Wq0mk0mhUOh0Oljk6lUFQgghhG40gkBZljlx7tKqP31sdbo4lhmfAdcVxjCM082H+Xht/sV9E4J8BEqv0ojDa2ZctkkDiUTi4+OTkZEREhICgwIHgf7swy4Loa1nDA1YlpXL5UP7WxNCPJNvcBynUqlUKtWwK+c4TqvVarXa0e8LQgghhBAad8ZrJA2hsFqtzs7OFl+SMob1DF1Q/GTQJPHFhyPP5jl10CIIIYQQQuhmMl4jafCNXqdyuTVc7pNBky73z2ELcNO8AxMhhBBCCF0OJpRACCGEEEJoLDCSRgghhBBCaCwwkkYIIYQQQmgsMJJGCCGEEEJoLDCSRgghhBBCaCwwkkYIIYQQQmgsMJJGCCGEEEJoLDCSRgghhBBCaCwwkkYIIYQQQmgsMJJGCCGEEEJoLDCSRgghhBBCaCwwkkYIIYQQQmgsMJJGCCGEEEJoLDCSRgghhBBCaCwwkkYIIYQQQmgsMJJGCCGEEEJoLDCSRgghhBBCaCwwkkYIIYQQQmgsMJJGCCGEEEJoLDCSRgghhBBCaCwk17sAY0cp5Xm+r69PLpcrlUqGYYbOQAgZ+rk4adip4qTLLTua2Ua5EoQQQgghNH6N1zZpSinDMHa7vaioqLGxcdholWGYy0WxzIARJo0cAY882yhXghBCCCGExq/x2ibtdDorKysrKipKS0vDwsJ6e3vj4uK8vb0hcqWUNjQ0lJWVud3uiIiIyZMnixGt2Ww+f/78xYsXLRaLVqtNTk6Ojo6WSP5bD93d3ZWVlY2NjTzP+/r6Tpw4MSQkhGUH/95obGysqqpqb2/nOC44ODgtLc1gMMAknufr6+tramp6e3tlMllERMTEiRM1Gs21qhiEEEIIIXSNjLNIGpqirVbrvn37Tpw4IQiC0+lsbm7eunXrhAkTVqxY4ePjwzCMIAgmk+ns2bN1dXWpqamTJ0+GBfv7+w8dOlRYWKjX61mWraysPH369MqVKzMzMymlbW1tu3btqqqq0uv1CoWivLy8tLT0jjvuiI+P9yzAuXPntm/f3tbW5uvrKwjCqVOnqqurV61a5e/v73a7S0pKdu/ebbVaDQYDNJlfuHDh1ltvVavV17HeEEIIIYTQFTfOImnQ2dmZn58fEhKSlZV18uTJqKgot9t94sSJ6urqGTNmQPPzhAkTJBLJhg0bXC6XuGBbW9vZs2ejo6Nzc3NlMtm5c+c2btx46tSphIQEtVp98uTJ06dPz5o1a9asWRzHnT9/ftOmTYcOHQoICNDr9RCLu1yuAwcONDY2rlixIiUlxeVyFRcX5+XlhYaG3nLLLX19fXv37rXb7bfffntERITVaj148GBBQUF0dPS0adOuX4UhhBBCCKErb5xF0hAlm0wmQRAyMjLi4uKqq6uDgoKSkpJiYmLE3h0sy6rV6tDQUK1W67m4r6/v8uXL/fz8fHx8CCEajaa0tNRoNFosFp7na2trQ0ND586d6+/vTwjx9/e/cOFCZWVle3u7Xq+HNbS3t1+8eDE9PT07O1uhUMBKzp8/X1FRMWfOnObm5s7OzoULF2ZmZkKfkAULFtTW1paWlk6ZMmVoLxGEEEIIITR+jbNIGhqGtVotx3G1tbUGg4FS6nK5lEplamrqoJndbvegHB1eXl5eXl6EEEEQWJbt7Ozs6OgIDg7WarWdnZ1mszk2Ntbb25tSSillWTYkJKS0tNRisYhb7+zsdLlc0dHRMplMEASGYeRyeVBQUEVFhc1ma29vl0gkUVFR0MOEYRiNRuPn59fd3c3zPEbS6FqiXz/LdxSOAkYIIXSljLNIGvj7+0+cOPHUqVMNDQ1Wq5VhGH9///DwcIlE4pkrY+T8d3V1dTt27Ojp6cnJyVGpVP39/Xa7Xa1WS6VSCKMJIRqNhmVZm80mLt7X1+d2u728vFiWhbCeZVmdTud2u61Wa19fH8MwOp0ONs0wjFQq1Wq13d3dLpdLKpVe3XpByAPGiwghhNDVNs4iaYhQlUrlokWLgoKCSkpKOjs7T58+fenSpdTU1NmzZ/v6+o6cBJphmI6OjsLCwsLCQpfLlZubO2nSJEKIIAiCIIhJPACE5jzPi5/wPE8p5TjOs0jwT7fb7Xa7GYbxXAnLshKJBFJfj22XPVNTIzR69Cv/Q4QQQhhCKMHUlAiha4BSim0aI4Dwhl61KOeapSEeZ5E0YFnW399/7ty5YWFhO3bs8PPzs9lshYWF3t7ec+bMuVzdQRh96dKlXbt21dTUxMTEzJw5Mz4+XqlUEkKkUinHcZ7DEwkhLpdLEATPtmSpVMowjNPp9FwtLCWTyWQymfhPwPO80+mEeHoMe+pwOEwmk8vlwrzUaJQopXIJd6HL/M6hchcvMAzG0v/FEMJT6qWUPzI7JdhL4+J5vKwQQlePQKlaLuvq6qKUMgTbNf4/hhCe5zs7O3wkvM3lZq/CrZhSKpfLdTqdTCa74isfZFxG0oIg8DwvlUoNBoNOp0tKSgoICPjoo4+qq6tnzZo1bHdkCKO7urry8vJqa2sXLlw4ffp0b29vcZJKpVIqlWaz2eFwQEDMMIzZbGYYBkJtQgh00ZZIJEajUez3LAiC2WyWyWRKpdLLy0sQhN7e3tDQUFjE5XJZLBaVSjW2rh0cx6nVakEQxlhT6LtHoFQu4RwdfRsLK+0uN8cy+FQDMAzjdPMh3prv56RrtVonz1+N2zdCCAGBUrlEolKp8EHYUCzDqNVqlVojuWq3Yo7jPHsQXD3jL5IWBKGqqqq0tHTu3LkSiYTneYiqBUGwWq0jL9va2nr+/Pm0tLTFixdzHAfhMrRLeXt7e3t7X7hwoa2tLSIiApqWL1y4IJPJdDodGQi4AwMD5XJ5TU3NpEmTIKFHX19fQ0ODwWBQqVRBQUGCINTU1MTHx8vlckJIT09PS0tLUlLS2IYbSiSSsTVmo+84nU6rVcklThYjaRFE0hqVwkunU6rVyutdHoTQd4FGo2GgPZrBZun/ooQwLKvV3iS34vEXpUFH5IqKiq6uroSEBAhkz58/39XVNW/evEEBK2ThIANxcE9PT19fH8uytbW10OmCUqpWqwMCAtRqdWJi4pdffrl9+/Y5c+YoFIqSkpKysrIZM2YEBga6XK7du3dTSmfNmpWcnFxQULB79+5Jkya53e6jR4+2t7ffeeedKpUqMDAwIiKisLBQo9HEx8ebTKZ9+/YJgpCVlYXPkdG1IVDKMowgCIJABYEyhGAkDRiGQJ3AQx6oqOtdKITQTUsQKMsy+FT5cm6aW/G4jKQjIyOXLVt29OjRvLw8s9nc0NCg1+unTp06bdq0QS35g9r2od26oKCgtLQUPuF5fsKECUuXLg0PD8/KyrJYLPn5+evWrZNKpVarNSMjY+7cuWq12mazVVZWulyu6dOn5+bm2my2oqKi06dPwxoWLFgwefJklmW1Wu3y5cu3bdu2d+/eI0eOQL6OZcuWxcfHYySNEEIIIXSTGX+RNCFEoVBMmTIlJiamrq7u4MGDkZGR06dP9/f3F9/DAmGrTqdbvXo1fAJt1RkZGYGBgZCmA+ahlELKZ5h/wYIFKSkp7e3tPM8bDIaQkBDIPy2Tye6++25BELy8vGQy2apVq1pbW7u6uiQSia+vb0hIiEqlgu1GR0ffc889ra2tRqNRLpcHBAQEBQVh/juEEEIIoZvPuIykCSEymSwoKEij0fT09MTGxsbGxg6dRyqViiP/gF6vF99WOCy1Wh0TExMTEzPoc47jwsPD4W9K6QjrYRjGz88PQnOEEEIIIXQTG6+RNCGEUqpQKGbNmiWXy6GNeWgPCugkLX5+uayFnguKiwy7LGzlcvOMvAaEEEIIIXQzGceRNLxBEDLZjTDPCP8ceZERlr3cPKOZihBCCCGEbg5jSc12Q8FXACKEEEIIoeti3EfS2O6LEEIIIYSui3EfSSOEEEIIIXRdYCSNEEIIIYTQWGAkjRBCCCGE0FhgJI0QQgghhNBYYCSNEEIIIYTQWGAkjRBCCCGE0FhgJI0QQgghhNBYYCSNEEIIIYTQWGAkjRBCCCGE0FhgJI0QQgghhNBYYCSNEEIIIYTQWGAkjRBCCCGE0FhgJI0QQgghhNBYYCSNEEIIIYTQWGAkjRBCCCGE0FhgJI0QQgghhNBYYCSNEEIIIYTQWGAkjRBCCCGE0FhIrncBxo5SKgiC2+1mWVYikTAMM2gqpZQQwjDM5SaNPBU+HzRVnEf877BrGGEqQgghhBC6OYzXNmlKKcMwgiBUVVV1dXUNjVYZhmFZlmXZESaNPPVyQTBseuT1jzAVIYQQQgjdHMZrmzSltKOj4/z580eOHImNjc3Ozvbz85PL5eLU3t7e1tZWSqm3t3dISIi4oNvt7ujo6OzsdLlcKpUqODhYr9eLUx0OR3t7e3d3tyAIWq02ODhYo9EM2jTDMBaLpa2tzWQysSzr7e0dHBwsk8nEGYxGY1tbm8VikUqlPj4+QUFBHMddxbpACCGEEELXw7iMpF0uV0VFxeHDh5ubm41GY2dnZ0NDw6RJk7Kzs1UqFSFEEIQLFy7s2rWrs7MzPT39wQcfJIQwDGO328vKyo4cOdLT0yORSNxud1RU1KJFiyIjIwkhfX19hYWFJ06c6O/v5ziOUpqcnLxgwQJ/f3/PrXd1dR08ePDMmTNut5thGJlMNnXq1FmzZqnVakEQmpqa9u/fX1dXB7071Gr1nDlzsrKypFLpta8ohBBCCCF09YyzSBp6VnR3d2/btk0QhKysrLq6uoCAAIvFkpeXZzAYMjIyoENFaGjotGnT9u7dazKZxMXr6+u3bdsmk8lyc3P1en19ff3hw4cppXfddZe3t3dpaenu3buDg4MXLFggl8srKiqOHz8ul8uXLl2qVCph0zzPHzp0KD8/Pz09PTU11e12FxUV7dq1S6fTTZ8+3Wq17tq169y5c9OnT4+NjTWbzfn5+Vu3bvXx8YmPj79+1YYQQgghhK68cRZJg+7ubrPZvHDhwtTUVLPZnJycHB4efuLECWhIhj7KAQEBEomkpKREHFzodrvPnj1rs9lWrFgxZcoUaHJ2Op3FxcUXLlxQq9WVlZVyuXzFihWxsbGU0ri4OJPJVF5ePmnSpKioKFhJb2/vmTNnIiMjb731VoPBQAgJCQl57733iouL09PT29vbz507l56evnTpUmgd9/b2/uCDD06ePDlhwgSWHa+90hFCCCGE0FDjLLaD9maO46Crhsvlgr8DAwNXrlyZmpp6uWiVYZi+vr6WlpbAwMCYmBhCCKVUIpFER0cTQrq6uqCXSExMjNipWq1Wx8TE9Pf3i63alNLW1laLxZKcnKzT6SBG9/HxiYiI6Ojo6O/vb2lpIYQkJydDGzYhJDg4OCAgoKmpSRCEq147CCGEEELoGhpnbdLQ5BwQEBAcHHzs2LGOjo729nYfHx+z2ezl5TXysna7vb+/X6vV6nQ6MhCU63Q6qVRqtVotFovNZtPr9WJHDkKIt7c3IcRms4krMZvNPM/7+PhIJBKIlTmO0+v1TqfTbrfDGERfX1+GYWCqTCbT6/XNzc1ut1siGWe1jRBCCCGERjDOYjsIcLVabW5u7pEjR2pqanp7e3t6etra2iZPnhwfH69Wq4cuJXZxdjqdUqlUKpWKXT5kMplUKnW73Tabjed5zxQcMJVlWafTKX7icDgEQRCThECR5HI5pdRut9vtdhiDKE7lOE4ul/M873K5FArFla0NhIZiGYYQgl2JRgCVw2KGSoTQ1cSyeDceyU1zKx5nkTTgOC4hISEiIqK8vDwvL08qlTY1NV24cGHVqlUZGRkjdPCAFNQQWEMwzfM8z/MMw8C7XXie91yE53lBEDxXCCmiPWejlMI/JRIJdNT2nCoIAqx/bNeSy+WyWq2DSoXQCARKFVKJ0WiklDKEEHq9C3QjYQgRBKG3t9esZJ1u/ia4gyOEblj//25M8G78FQwhgsD39vaYFczVuxVLJBKVSnUNugOMy0gaWqZVKlVMTExlZWVqaqpGo1m/fv2ZM2fS0tKGxqwwv1QqValUDofDZrMplUpCCDQk8zyvUCg0Go1MJoOWaZZlIc62Wq2EEGhLFrPacRzX398vvsiQUmq1WjmOUyqVGo2GUgpTYQa32221WuVy+diy4AmC4HK5XC4XvuEFjZJAKUepy+W63gW5UVHqcjldLpfT5cZIGt04KCXwtcFgxDUAqoIhhBmf16pAqYRQt9t9vQtyg4Lw5irdij3fNn21jb9ImlLa1NTU3NyckpICbcwMwwQFBanV6vb29hFqTavVGgyGurq69vb2qKgoaJnu6OhwOp06nc7Ly0un0zU1NZlMJkjKQQhpbW3lOE6r1ZKBxxD+/v4SiaShoSEzMxP6eLhcrtbWVo1Go1Qq/fz83G73xYsXxUwdNputo6MjICBgbC9nkcvlnj1JEBolX4uLYRhK8Gv5KyghLMf5+fn7+PrANzRCCF0tlBCG+PTYGIJ346+ghLAs3IoNN8GteFxG0l1dXVu2bGlubo6JiXE6nb29vWVlZe3t7ampqYPabsU3flNKlUplVFRUSUnJkSNHFAqFWq1ubW09duyYt7d3eHi4Wq2OjIw8cuRIfn5+dna2VCqtra0tKSkJCwvz8/MTBKGiooIQEh4eHhYWVlJSEhkZmZSU5Ha7T506VV9fn5OTo1arQ0JC9Hr9iRMngoKCwsLCrFbr4cOHe3t758+fjz2l0LUhUMoOdF5CwxLbKvBRD7oRwKnY2mPeXFzF8wKGWyKGIbxANQrZHdNS9GrluLtm8W48spvmVjz+ImmWZaOjo9PT00tKSioqKoxGI7Qch4SEZGdnewaslFKXy+V5EicnJzc2Np4+fbq5udnLy6u5uVkQhOXLlwcHBzMMM3Xq1I6OjkOHDp09e1apVF68eFGn0+Xk5BgMBrvdvmvXLpfL9eijjy5cuHDjxo1bt24tKirieb6pqSkuLi47O1sikXh7ey9YsGDPnj2ffvppYGCgxWJpb2/PysoS3xeDEEIIeYI2uYtdxhc+y7M5XRzLYOgFGIZxut2hPvq5ydF6tfImaLxEN6XxF0kTQnQ63eLFiyMiIs6dO1deXq7RaNLT0+Pj48PDwz0jaZVKNWvWLPgEAlkfH5/c3NygoKDGxkan0zlx4sTExMSEhAToxBwWFnbrrbeWl5e3trYKgjB16tTU1NTo6GgYjzht2jS3261UKhMSEu64447q6uquri4Y+5ienh4UFEQIkUgkU6ZM0Wg0Z8+eNZvNOp1uypQp6enpkHcPIYQQGhbLMAqplBKCkbSIYRiWZeRSCTZFoRvZuIykGYbx9vaePn16QkICy7IJCQlZWVmDZiCEqFSqmTNnDlrWx8dn3rx5MJJvaBfkkJCQkJAQt9stCIJnMjupVJqTkwN/U0oTEhISEhKcTifLsoOGhcpksvT09PT0dIfDIZVKsVMHQgih0aD/RTCSFonD9xG6YY3LSJoMdK9RKBTTp0/X6/Uw7nDoz1aYbdDnlFKWZSEJ9KCp8AkEx4Omiv/0fOvKsJuATyBMvwk6ACGEEEIIoWGN10gawlOlUhkbG/u1s13uw6FTPT8ZOnhxNLN97VSEEEIIIXRzGPd9D/C5D0IIIYQQui7GfSSNjb4IIYQQQui6GPeRNEIIIYQQQtcFRtIIIYQQQgiNBUbSCCGEEEIIjQVG0gghhBBCCI3F9cyCJwiC2WyWy+VKpbKnp6e2ttZut8fFxfn7++MLTRBCCCGE0A3uekbSJ0+eLC0tnTdvXlBQ0NatWysrK91ud3x8/KpVq/z8/PCdJgghhBBC6EZ23SJpq9VaUFBAKdXpdJWVlVVVVVOnTlWpVAcOHGhra/Pz87teBUMIIYQQQmg0rlsnira2tr6+vkmTJvn7+9fX13t7e8+aNWvx4sVarba1tRXe/n29yoYQQgghhNDXum6RNKWUUgr9oXt6erRarUwms1qthBCO4zCMRgghhBBCN7jr1rsjMDBQo9GcOHHCbDZfunRp2rRpGo3m+PHjdrs9KCiIYRjsJ40QQgghhG5k161NWq1WZ2RkdHd379mzx9fXNzU11eFwFBcXh4eHBwUFXa9SIYQQQgghNErXrU2aUpqenh4QEOBwOHx8fAIDAymlc+bM8ff39/LyIoRggzRCCCGEELqRXbdIuqOjY9++fRMnTkxPTxc/zMzMxEzSCCGEEEJoXLhukbREImloaHC5XDExMWq1Gj7EMBohhBBCCI0X1y1y9fb2joiIqK+vLysr6+/vd3kQBOF6lQohhBBCCKFRup5vZlEoFF1dXZ988smBAwd8fHxkMpkgCBKJZN68eREREderYAghhBBCCI3GdYukXS4XISQ4ONjtdrvd7p6eHpZlKaUSicThcFyvUiGEEEIIITRK1y2S1ul08+bNmz17Nsdx8AmllBDCMIxWq71epUIIIYQQQmiUrlskzXGcwWAghPA8D+87hLR3lFIxtkYIIYQQQuiGdd0iaUKI2WwuLy+vra11OBwQTBNC5HL5/Pnzv30/aXGFDMMMTU0Nk8CgqSNMGuVKRr8GhBBCCCE0fl3PftL79u3Lz89nGKavrw96dBiNxqioqCuSu4NhmJ6ens7OzoSEhGGnjrDg6DfxLdeAEEIIIYTGr+v5ZpaqqqrIyMhFixZt2bIlLCxs0qRJhw8fNhqNEomEECL29xgDt9vtdDpLSkoqKioCAwPVarVUKhWnulwum83mdrsppTKZTKlUwhY9JzEMA5Mul+La4XDY7Xae5zmOk8vlCoVCnGSz2RwOhyAIHMcpFAq5XD62vUAIIYQQQjey6xZJm81mt9udkZGRkJDg5+cnkUiioqJkMtkHH3zQ2toaFhY25jXbbLYzZ84UFxfX1tZaLJZ///vfSUlJmZmZ3t7ehBCr1Xr69Oni4mKTyUQp9fb2njx5ckZGhlqt7u/vLy4uLisrM5lMHMcFBgbOnDkzNjZ2aL/trq6u/Pz82tra/v5+pVIZGRk5e/ZseOF5U1PT0aNHGxsbbTabRqNJSEiYOXOmXq//NnWFEEIIIYRuQNftzSwajYZlWavVKgiCl5dXV1eX0Wj08fEhhPT09PA8P7YGaUEQysvL169fb7VaAwMDtVoty7J79uzZt28fJNerrq7esWOH2WxOSUlJSUkxGo2bNm0qKyujlB4+fHjz5s1OpzMtLS0mJqa2tvazzz6rr68nX+36bLPZtmzZcuDAAY1Gk5GRYTAYCgoKNmzY0NfXZzKZPvvss1OnTgUEBGRkZCgUin379m3bts3pdF6hakMIIYQQQjeK69YmHRAQEBAQUFhYGB8fHxcXV1FRkZ+fTym1WCyBgYEcx33T3h0wv8ViqayslMvl9957b1VVVVFR0dq1aw8ePNjT02M2mzUaTUVFBaV05cqViYmJhJDo6OhPPvnkwoULQUFBpaWlwcHBa9asCQoKEgQhLCwMguywsDCFQiGW59y5c9XV1VlZWbfccotOp7PZbHv27Dl69GhlZaXdbm9qalq4cOGcOXMUCoXRaNy8efOZM2emTp0aFxd3taoSIYQQQghdD9chkob2XalUumDBghMnTjidzvj4+IiICBh9mJSUFBoaOuaV8zxvt9uVSqWfn59CoeA4zsfHZ/ny5Q6HQ61WQw9mhULh5+cnk8kIIX5+fiqVyuVyNTY29vT0LFiwQOxYMmnSpMLCwoaGBpPJJHaDppTW1tYSQrKysiCLn1arnTp16qlTpyorK202m06nmzx5skajIYT4+vpOnTq1qqqqrq5uwoQJOBIRIYQQQuhmch0iaTGgjImJiYiI4DiOYZi777571qxZUqk0ICAAwtZvGnfC/Gq1OioqqqamZteuXXa73eVyWSwWjUYDw/7UanV4eHhlZeXx48enTp1KCDlx4oTZbA4PD3e73SzLQl9qnudZllUqld7e3k1NTXa7XdwKpdRoNGo0Gkg2AnNqtVqdTtfd3e1wOPR6vUqlgkkcx+n1eqVSaTQav80ASoQQQgghdAO61pG0zWazWq1it2N4QzhEmTAsz2q1siwLDcZjIJPJJk6c2NDQcPjw4f7+foZhtm7dmp6eHhERoVarGYZJSEgoKSnZvHlzQUEBIaS1tTUtLS0pKen06dOQhYN4pKBWKpU8z8OLzcVQ2OFwyGQyGIYIc7Isq1Kpent7HQ6Hr68vpPuAmWGdNpvt21YcQqPDMgzBVIwjgsphsYrQjQGv2a/FjM8qwiM7spvmVnztImmIRCsrK48dO3a5EXiUUolEsmLFitjY2DFvKCgo6K677mpoaIC+yzU1NaWlpbNmzcrNzeU4rrm5uaenJzAwcMKECf+vvft6juPK80R/0pT3DkAVbMERlgBI0HtPUZRtqaVRT4/Znbj7sE/37f4Vu7ERuw8zsd3T3dEtzUhsSSRFSvTegSS8IQAChAcKBaC8z8q8D0esrgZIiiyRMNXfTygURGVVVlaePCd/efJ3TtJvnJubGx8fTyaTr/RDMt68VyKKYiKReC0TbMPfCFGS5DwXjcYkSVrz7dPrxhAiSVI0Go3HYolkMgtacMgCtM7GolGJSD/97r85TKrOCplORbBSRElS8Hw0GiUErfFfedoUR95oU8yyrEwme95cxq/R8kXStALI5XKDwUB7eZeikXRqdufM0Nxoi8Xi9XpDodBnn3323XffdXZ2NjY2ajSatrY2rVb7d3/3d0VFRYSQJ0+efPHFF+3t7SaTSZKkRSF+JBJhWTa1PbQrXalUzs7O0oec09dFUQyHw3Ty6UgkIopi+iKamZ3Zb4nH4z6fLx6Pr63mA1aQKEkahdzr8xJ6EOLUnI4hoih6vR6fio0kBETSsBqIkqRWyLx+/49/o86mYQgRRXHB4zHLSSwhrK1TIW2NfX7/jxEBSjaFIaKY9Hi8PhUbib+RpliSJIVCYTAYluGZHsud3VFXV1dTU/Pi9/ycC4hEIrGwsMCyLB1TSKepbmpqOnny5NTUVG5u7uzsbFFRUWomjYqKioKCgpmZGb1eTwjxer3kadBPo1i1Wk3j4NRWGQyGcDhMU0foK+FwOBAIOJ3OeDw+OzsbjUbp2gghfr8/FosZDIbM6r9CobBarRnvDfgbJEoSz7JWf5xhWYkkCYPm+y8kiXAsa7XarDk2QRQRScNqQOusxRNjCEMIQZ1NJxGJ49gcm82aY0mutToripKMZy0LEYZhJIKS/QtJIhzH2Ww2q836RpviZeiQJssfSUej0WAw+IJ0BYZhDAZDBp24NOkiEAicPXs2HA7/6le/SiQSyWQyEAhMTU2JokgfN6hQKGZnZ8fGxux2OyFkfHx8dnZWo9GUlZUNDQ11dnbW1NTY7XZBENra2iYmJjZt2qTX66PRqM/nU6lUOp2urKzs9u3bDx8+tNlser0+FAq1tLSEw+Hq6upYLDYwMPDw4cNdu3apVCqPx3P//n2WZZ1OZ2aRNMMwS58LA/ACLCEMIT/zxk42Yxie5xmW5Vl2LZ2TIXv9WGfR1D8Xw/M8y7LMWquzLEsIg5J9nuxpipc7T7qrq+v69euxWIxGlulPPKF/ymSyDz/8MIPZl+kKtVptUVHRDz/88Kc//SkWi83NzX311Vf9/f01NTXl5eU8z9fW1l64cOHzzz+vq6uTJKm7u9vtdr/33nsbNmyYnZ39/vvvv/jii9ra2lAo9PDhQ5VK1dTUpFare3t7T5482dDQcOjQoerq6qqqqtu3b/v9/sLCQpfL1dHRUVJS0tDQIAjCw4cPz50753a7rVbr6OhoX1/fxo0bf07aN8AroRVtUc2CdHTnYDodWCVQZ3/SGq2zKNkXW6PFutRyd1xptVqHwyEIQuoVOvcFwzCiKNKZ4zLOKiaEyOXyLVu28Dzf2dk5Pj7u8/mmp6e3bt26adMmOsfzrl27JEnq6uq6c+eOJEkqlergwYNNTU0sy+7Zs0cmk7W0tNy6dYthmPz8/AMHDtAgWCaT6XQ6umFKpfKDDz7Q6/X9/f1jY2Mymaypqenw4cN0XrxPP/30woULQ0ND/f39CoVi9+7dBw4cyHgqEgAAAABYtZZ7xGFNTQ19uGDqlVgsFgqFaPYFnajuZ36RVqvdtWvX7t27L126dP/+/f/+3/87fTI5XWqxWN5///2jR4/6fD6aSaJQKFJzUR88eHD37t1+v18mk2m12tQt8vLy8tLSUhr0E0JsNtsvf/nLSCQSCoXUarVarU6tv6io6J/+6Z9CoVA0GtVqtUqlcnnSdAAAAABgmS13n3RqqmZCSCKRGBwc7O3tnZqaisfjFoulsrKyvr4+NVzvZ34LzU7W6XSLonOGYVQqlUqleuYHFQqFzWZb+vqifGWWZTUajUajWboSjuP0ev3P/xUAAAAAsJqt5LCkjo6OkydPhkIhu93OcdyTJ09aW1tnZmaOHj2q0Wh+TuoM/aDT6XQ6na91kwEAVpiItMvnWFsTOwBAdlixSDocDt+8eZPjuM8++6yqqkomk83MzJw/f/7WrVvpGSA/B9IqACD7IF4EAFg9ViySdrlcXq9348aNTU1NNHGiqKho7969w8PDU1NT69atQxwMAJAiEcIQ4glFHjyeFMQkoul0okSUMn5rRaFaieHdALCsViySliRJkiSO41L5xwzDyGQysmRqPAAAoAlvg9Nz/8+/fh2IxDiWQUNJMQyTEJIOk/7U//cPpUqzKEnotgeAZbNikXRubq7FYmltbbXZbE6nk2VZr9d75cqVRCJRUFDAsmwWTDEIAPDaJSUpmZQIQ6TnPuHqbwvDkGRSSqILBgBWwopF0hqNZvPmzadOnfrDH/5QWFgol8unpqYikciuXbscDsdKbRUAwCrHEsIyhCVEQlcDIYQQhvlxhwAALL+ViaRpf/PWrVuNRmNHR4fb7Y7H43TO5i1bttBHnKBDGgBgKSntP6CwNwBgpaxMJM0wTDQanZ2dFQShsrKypKREpVKVlJTQOZiR1wEAAAAAq99yR9I0Sh4fH799+3Z/f38wGBRFkY41LCgo2Lp1a11dHZ6tDQAAAACr3wo849Dlcn399dejo6OVlZUNDQ0ajSYej8/Pz/f393/55ZeEkIaGhkUPFAQAAAAAWG2WNZKmHdItLS3j4+OHDh3at2+fUqlMLRoYGPjiiy9u3LhRVFRktVqR4wEAAAAAq9lyD3dOJBJDQ0MFBQXbtm1TKpV06mgaNK9bt27z5s1TU1Mej2eZtwoAAAAA4FUtayTNMIzP5wsGgwUFBXq9PtXrTP8vSVJRUREhJBAILOdWAQAAAABkYLnzpAVBEARBoVAQQuhYw/SlKpWK47hEIrHMWwUAAAAA8KpWZhY8mUzGss/oDlcoFM98HQAAAABgtVmBWfAIIXfv3g0EAoIgLFrq9Xrn5uYw0BAAAAAAVr/ljqTlcnlZWVlPT09bWxuNqhdxOBwGg4HgGYcAAAAAsLotdySt1+v379+/bdu2Z2Zx0Ng6Nzd3mbcKAAAAAOBVLXckLZPJ8vPzl/lLAQAAAABeuxUYcfjMpI50yOsAAAAAgNVvBSJpBMoAAAAAkAUw5RwAAAAAQCZWZj7pN40mkITD4XA4bLFYlo5uTM8wWdRH/oJFz3vbone+YBEAAAAAZI0s7JOmDyFnGObJkycXL1585iQhTJr0Dy5d9LysbuavvWDRT+aFAwAAAMBalIV90pIkTUxMdHZ2Pnz40OVyKZXK2tpap9Mpk8noG7xeb0dHx5MnT5LJpM1mq6+vLykpoYuGhoa6urrm5uZ4ni8qKmpqajKZTDQ0T/+KRCLR3d09MDDg9/s1Gk1ZWVlTU5NcLieEhMNhuvJQKGQ0GtetW1dbW8tx3PLuAwAAAAB447Iwkn78+PE333zj8/kEQZAkqaurq7Ozc//+/Tt27GBZ1u12f//99z09PVarVa1WP3jwoK+v7913362urm5tbT116lQ8Hs/JyYnFYt3d3Y8fP37//fdzcnLSg+lkMnnhwoVr166p1WqDweByudrb26empt5++21RFE+ePNna2mo0GjUazcTERHt7+4EDB/bu3YunoAMAAABkmeyJpGmwGw6H29raXC7Xp59+Oj093dHR8cEHH1y8eLGtra2mpsZisfT29ra3t2/dunXPnj1yuXxgYKCvr08QBK/Xe/PmzWg0+t5771VWVsZisRs3bty6dcvpdO7fvz+9U3l8fPzmzZsWi+X48eN5eXkLCwvff//9zZs3q6urY7HYvXv3qqurDx48aDKZJicnT58+fe3aterqarvdvoI7BwAAAABeu+yJpKloNDo3N2ez2Zqbm2/fvk1TOywWi9frValUoVDo8ePHVqt17969OTk5hJCNGzdWVlYqlcqBgYHJyclNmzZt2bKF9h8fPHjw8ePH/f39GzZssFgsNFKXJKmvry8aje7bt6+mpoYQYjabDxw48Nvf/rajoyMSiSiVyoMHD5aVldFFfr//xIkTAwMDeXl5GHoIAAAAkE2yJ5KmcapSqbRarU+ePOnu7o5EIpIkJZNJh8PhcDgIIS6Xa2FhwWQyBYPBBw8exGIxq9VaVVWlUqnm5+eTyaTdbmdZNplMsixrNptzcnJcLhedACT1RW63W6VS0Uea03fm5eUZDIbJyclYLGY0Gq1WqyRJoihyHOdwOORy+ezs7NJkawAAAABY07InkqbUanV9fX1vb+8XX3whimIymbx//351dbVer+c4LhaLxePxoaGhr776KhAIEEICgUBlZeW7774bj8d5nler1eTp5BuEEI1GIwhCPB4nT7NHJEkKhUIqlYqOL6Tv5Hleq9UGg8FYLGa32zmOS61BLpcrlcpQKLSSOwUAAAAA3oBsi6QJIWVlZZ9++mlnZ2dHR4fL5fr2229v3bq1a9eurVu3JpNJSZL8fv/69evfeecdhmFaW1vv3Lljt9tp7/KicYEcx9He5fQXRVFkWTa9g5lhGI7jBEEQBIGG0YsWJZPJjH8OJtGDVyJJEsHciy9Ed4601vYRSvYnoWSz1RouWcKssY1eRstQrMuTC5CFkbRCoaiuri4rK9PpdHfu3Nm3b9+VK1du3bpVWVnJsqwoikVFRYcOHaJDAHNzc6empoaGhmj+RiKRSF9VPB5nWXbRHHY8zwuCkB4ci6IYi8UUCgX9SHrkLYpiIpGgHdgZiEajfr+fdooDvAxRkjQKudvtlkSRIYSgEU/DECImk7OzLjMvROPC2kq4+rFkZ92SKJG1tOHLgWFIMpl0uVx6JhGNJ9ZcyaoVcvfcnESrK+psGoYwyWRyZtalJfFYYu3VWa1S7p5zS0RCa5yOISSZTM7OukxcIhIX2DdTrAqFwmAwZByAvbxsi6QlSaK9yHK5XKfT6fX6/fv3i6J48eLFJ0+e5Ofny2Qys9lss9lovGsymXJzc4eHh+kr4XA4fVXBYFAmk9EQmXZXMwyj0+mePHmSHt0KghAMBq1Wq1wu9/v9giCkFsVisUgkotVqM/s5MplMp9P9nC5t+FsjSpKc53S6IMMwaLcXkQhhWFan02t1BmUyuebOynKe0+l0CKOXkghhGUav12t1+rVaslotQdE+g8SyrEGn1+kNqjVYsoofSxZR9F+RCGGfNsWKZPINRdIcxy3P0zyyJ5Kmecxer/fq1ascx73zzjt0uKEkSSzLxmKxWCym1WpNJpPb7Xa5XPn5+YQQt9s9Ozur0+mcTufQ0NDw8HBzczPNlh4ZGZmeni4qKqJxME3/IIQ4HI779+8/efKkoKCA5/lkMvn48eNAINDc3ByNRu/duzc6OmowGFiWjcfjAwMDoig6HI7M5pPmOE6lUr3W/QR/EzQaDaFtE4Mm/K8wDKPValVq9UpvSIY0Wg3DMCjTxSTCsOzaLlmN5sdoAnU2jZQFdVajYQha48VSxZoFIU72RNKpEX7xePzu3bscx3m93mAwePXq1WvXrtnt9tLSUo1GU1NTc/LkyW+//ZY+qKWlpWV0dPTAgQMbNmwYGRlpaWlRqVQNDQ3hcPjq1as+n6++vl6v14+Ojt66dau0tHTjxo319fU3b9784YcfBEEoKiqamZm5ePGiSqXauHGjIAj379//9ttvQ6GQ1WodHh6+dOlSXl5eVVXVSu8e+FshShLLMIuS+yEd3Tl0R630trwClOxPQslmq7VasqLEsijZ51qjxbpU9kTSlEaj2blzZzwef/Dgwfz8fDAYvHDhQl5e3q5du3JychiGaWxs9Pl8t2/f/uKLL+iEdzt27NixY4dOpzt48GA8Hm9paens7KRjB48dO1ZXV0cI8fv9jx49UqvVkiRZLJb333//zJkz586dUygUiUTCYDAcPXo0Ly9PkqQPP/zw3LlzJ0+e5Hk+kUjk5+cfO3bMYDCs9I4BAAAAgNcs2yJpQojD4XjnnXf8fv/169d7e3s//fRTu91uNBppfoVerz9w4EBdXZ3b7SaEWCyWnJwcnU5HCMnPz//4449nZmY8Ho9MJrNarXl5eUqlkhBSWlr6X//rf9VqtTKZjBBSU1Njs9lmZ2cDgYBWq7XZbDk5OXT9zc3NhYWFbrc7HA4bDIacnByr1bqSuwMAAAAA3owsjKQZhjEajUajsbGxkeO49evXL3qDWq12Op1Op3PpZ+kHl76u0Wg0Gk3qT5Zlc3Nz6cNZFuF5Pj8/nyZhAwAAAEAWy8JImjydpLCsrCw/P5/+e9GA39SLi5Y+7/X0RS//zmd+NQAAAABkh+yMpGnwqlKpnjfxRSq6XRTmPu/1zN6JGBoAAAAgi2UyNdsastaeiAQAAAAAa0aWR9LoFQYAAACANyTLI2kAAAAAgDcEkTQAAAAAQCYQSQMAAAAAZAKRNAAAAABAJhBJAwAAAABkApE0AAAAAEAmEEkDAAAAAGQCkTQAAAAAQCYQSQMAAAAAZAKRNAAAAABAJhBJAwAAAABkApE0AAAAAEAmEEkDAAAAAGQCkTQAAAAAQCYQSQMAAAAAZAKRNAAAAABAJhBJAwAAAABkApE0AAAAAEAmEEkDAAAAAGSCX+kNeFMkSUokEslkUqFQMAzDMMzz3kYISS2lf6Y871MvfufLrwQAAAAA1q6s7ZNmGGZ6evrevXssy74glk0PsiVJYv7ai9f/vHcuWrQosAYAAACA7JCdfdJ+v39qaurmzZtDQ0NGo7GgoMBsNrPs4suGWCw2MTHBsmx+fr5cLmcYZmFhYWJiwu/38zxvsVgKCwuVSuXS9UuSND09PT09HYlElEplbm5ufn4+XX8ymZyYmJidnY3FYhqNxm635+XlLcdvBgAAAIDllYWRtMvlunTpUn9/v8fjiUQiJ0+edDgce/bsqaysTL2Hdj8PDAycOHGiuLj4/fffN5vNY2Nj58+fHxkZ4ThOFEWe57ds2bJz506tVpu+fkmSOjs7L1265PF4aJezTqfbt2/fxo0bJUm6e/fu9evXI5EIfafNZjt06FB1dfVy7wUAAAAAeMOyLZKOx+MPHz68c+fO7t27w+HwwMDApk2bbt26JQhCXl6eXq8nT8Po2dnZK1eu9PT05OTkiKIYjUavXLnS2dm5c+fO2traSCRy48aNc+fOmc3mTZs2kbR05/n5+TNnzvj9/v379xcUFLhcritXrpw6dcrhcAiCcPr0abVaffDgQavVOjIycuXKlTNnzjgcDoPBsJL7BQAAAABet+yJpGl8HAqFRkZG8vLy3nvvvbt3787MzBw6dMhut8/OzoqiSJ4OB4zFYpcvX56fn3c6nYIgsCw7NjY2ODjY2Nh4/PhxtVpNCLFYLL/97W+7u7urqqr0ej1dvyRJvb29brf77bff3rt3L8/z1dXVCoXiq6++6ujoiEQigiAcP368qamJYZiKigpRFC9fvjw4ONjc3LzCOwgAAAAAXqtsG3HIcZxSqYxEIj6fj4415Hm+qanpyJEjRqMx9baHDx92dXVt27Zt3bp1yWSSZdnZ2dlIJFJWVqZWq0VRlCSpqKgoLy/P5XIFAoH0r5iYmJDL5WVlZTzPi6JII2adTjc0NPTkyRODweB0OhmGEUVRJpOtW7eOZdnx8XEaxwMAAABA1sieSJpmX2i12pqammAw+Lvf/a6lpSUQCIyNjSWTSfoe2q88Pj5+9erV4uLi7du302GCtDOb5/lUSjTDMBzH6fX6eDwej8dT3yJJUjAY1Gg06SMRFQqFXq/3+/2BQMBgMPD8X3r6VSqVWq0OBoPLsAcAAAAAYDllTyRNsSxbU1Nz7NixZDI5NDQ0OTn5hz/84ZtvvhkZGSGEMAzj9/svXryYTCYPHjyo0+lSKR/xeJxl2fQgmBAil8tFURQEgaTNEh2Px3meT58JhGEYuVwej8djsZhcLk9fRNeZHosDvFEswxBCls5UAyl057BrbaJ3lOxPQslmq7VasixK9kXWaLEulT150il6vX7fvn0bNmy4cOFCS0tLXl7ezZs3PR7Pp59+qlKpWlpaJiYm3nrrrdLSUkIIjYkVCoVcLpckKdV7TSWTSYZhFlUDOrNHeraGJEmCIPA8n0wmk8lk+gTSdJ0cx2X2WxKJRCgUoqE8wMsQJUkl4xcWPJIkrfn26XVjCBFFcX5h3qYgMSG5tlpwUZKUMt6z4CGSRNbShi8LhoiiOD8/b+LFtVqyHo9E8PCBxRjC0JI1cGIimVxbTzoTJUkl5z0eDyGEIQTFm0Kb4oWFeatcenMVViaTqdVqmUz2JlaeLgsjaUKITCazWq35+fl2u/3Xv/716dOnOzs7JyYmFArFtWvXAoHA48ePp6enRVEcGhqKRCIXLlwIBAKSJEWjUbqG1PhFmUwml8tTLxJCNBrNxMREenSbTCZDoZBWq+V5PhgM0nCcvlkQhGg0SocwZkCSJBq1r63mA1YQPfAk6emVHtrudAwhhEgi3UVr7JlJ9NJIWnPbvSwYQnfNj/cY19YekiSJEElMbfXa2vplIUrSGixYIkkSkWidXWNbviwk8Q03xcu227MnkqaxbzAY7O3tlclkTU1NhBBRFJVKpd1uv3PnjsfjsVqtNptNrVaPjIzQXbywsBCPx4eGhhwOB8/zMzMzoijSTuiFhYW5uTmDwUDj4FRGtdVq7ejocLvdhYWF9MXZ2dlAIFBeXh6NRgcGBubn5w0GA8dxkiRNTU0lEgmbzZZZKCyXy81m82vcS5D1JEIYQszmKMMwEiGEwYn5LySJsCxrsVgMJrOerLGOXVqyJnOIYRmU6SKSRFiOs1isBpNpjZas2RRm6IajzqaRiMSyrNViMZpN0porWYkwDDGZgmiNF5EIYVlujTbFS2VPJE0JgtDd3T08PMzzvM/nSyQSg4ODra2tGo0mNzfXbrd/+OGHgiDQuFYUxVOnTgUCgSNHjphMJpfLde/ePYfDsW7dulgsdu3atampqXfeecdgMMzPzw8MDOTk5JSUlNTU1Ny4cePSpUsqlSovL29+fv78+fOCIDQ2Nsbj8fb29gsXLhw+fNhoNE5OTl6+fFmr1VZWVqJTGZZHarrGld6Q1etpt720tmolSvYnoWSzFUo2K63RYl0qeyJpWhJ6vX7z5s1ut/vEiRPRaNTr9X7++eeiKO7YsaOoqIgmzaR/SqPRJBKJvLw8s9m8Z8+eb7755sSJEw6HIxqNzs7Orl+/vqmpief5iYmJkydPNjc3FxYWFhUV7du37/z583/84x8tFovP5wsGg7t37y4rK5MkaceOHXfu3JmZmdFqtfPz86IoHj161OFwrNBeAQAAAIA3JXsiaYpl2aqqKo1G8+TJk4cPH0Yikbq6usrKypKSEpp1vujqcPPmzfF4XKVSSZJUV1en0Wj6+voWFhZ4nt+2bdv69estFgshxG63Hz161G63cxzHsuyePXtsNtvw8HAgECgsLCwpKamtraXp1G+//XZhYeHY2FgkEnE6neXl5dXV1Wv9egsAAAAAlsq2SJoQwvO80+l0Op1arfb+/fu/+MUv0pcuCmrXr1+f+jfHceXl5eXl5alU6ZScnJycnJzUnwqFoqmpqampaem8HBqNZtu2bdu2bfs5U3YAAAAAwOqXhZE0+XF4u+R0Ok0mE32E4fN6hWkXdWop/SDLsvQfDMPQRYv+TH2QDiskfx2gv2ARAAAAAGSN7Iykachrs9lsNttPvnPpB9P/sej1pR9cGii/YBEAAAAAZI1sfvQOJnEEAAAAgDcnO/ukKXQJAwAAAMCbk8190gAAAAAAbw4iaQAAAACATCCSBgAAAADIBCJpAAAAAIBMIJIGAAAAAMgEImkAAAAAgEwgkgYAAAAAyAQiaQAAAACATCCSBgAAAADIBCJpAAAAAIBMIJIGAAAAAMgEImkAAAAAgEwgkgYAAAAAyAQiaQAAAACATCCSBgAAAADIBCJpAAAAAIBMIJIGAAAAAMgEImkAAAAAgEzwK70Bb4r0FMuyDMMsXZr696KlL1j0vLcteudLrgEAAAAA1rSs7ZNmGGZ+fn5gYOCZsSyT5uUXPe9ti975kmsAAAAAgDUtO/ukBUGIx+Otra1dXV25ubk6nU4mk6UvjUQi8XhckiS5XK7RaDiOo4sSiUQoFEokEgzDKJVKjUbzvGg4Go1GIhFBEDiOU6lUKpUqtSgcDkej0WQyKZPJVCqVQqF4oz8WAAAAAFZEFkbS4XC4vb393r17w8PD4XD43//93+vq6pqbmy0WCyEkHo+3t7c/ePBgbm4uHo/bbLYdO3Y0NDTwPB8Khe7du9fa2urz+ViWdTgcu3fvXrduXSrOTnG73VevXh0cHAyFQiqVyul07tu3z+FwSJI0Pj5+5cqVsbGxaDSq1Wqrq6t3795tNptXYk8AAAAAwBuUbdkdoii2t7d/+eWXyWSyuLjYYDCo1epz586dP38+Go0SQrq7u0+dOhUIBJqbm5ubm+fm5v785z/39vYSQq5cufL1118zDLN9+/b6+vrR0dHPP/98aGiI/HXqcyQS+fOf/3z9+nWbzbZr166CgoKWlpb/+I//8Pv9Ho/nT3/6U3t7e0lJyc6dO00m06VLl06ePBmPx1dqhwAAAADAG5I9fdKSJDEMEwwG+/r6dDrdr3/9656enkQi8emnn167dm1hYSEQCCgUikgkUlFRsWvXrqKiIoZhLBbLV199NTk5mZOT09HR4XQ6P/vss9zcXFEUCwsLT5w40d7eXlRUpFQq6foJIY8ePRocHNy2bduxY8f0en04HDYajVevXu3q6orFYi6X68iRI7t371apVF6v9+TJk729vcPDw1VVVSu9hwAAAADgdcrCPul4PC6Xy41Go0Kh4DjOZDK99dZbH3/8MU2x2LRp0yeffOJ0OjmO4zjObDYrFIpkMjkxMeHxeOrr6x0OB8uyMpmsoaHB4XCMjY35fL7U+iVJGhoaYhhmw4YNRqORYRitVtvc3KxSqXp7e3t7e3U63YYNG2iCtdlsbm5uTiaTw8PDi+b6AAAAAIC1Lnv6pGmHsUajKS0tffTo0ZkzZ2KxWDwe9/l8BoMhNexPLpenPhIIBNrb25PJZF5eXjAY5DjOaDSSp93bKpXKbDaPj49HIpHURyRJ8vl8Go1Gp9Ol3qnVavV6/cLCQjweN5lMdPQhXWQwGJRKpdfrTXVpryqSRCSCEP/ZMPcKAAAAvFj2RNKUTCarr68fGxu7e/duIBAghHz77bcNDQ2lpaV6vZ48DXBjsdjExMTVq1c7OjoaGxvLysru3LnDcdyieTaUSqUgCIlEIvVBQkgsFlMoFDz/l13HcZxarfZ4PLFYzGq1po9Q5HleoVDQFO1ViGEIQxAuZhWWYQgmMn8hunPYtbaLULI/CSWbrdZqybIo2RdZo8W6VLZF0oSQvLy8jz/+eHR09Pr1611dXcPDw52dnTt37nz77bflcjnDMD6f79atW7dv347FYrt3796zZ4/ZbBZFkTzniH/mo1vSszXoI2DoBNL036/rtySTyUQiQbftdWNYhoTjwthCUJREgng6jUQIzzLFFp2S50SJvrBmiJKklPGRaFSSJBTqIgwhkiRFIpF4LBoXkmurBf+xZCNRIqG+LsH8WLKJmCq2BktWIeOj0SjuED4L87TOqhJCcm1FpbTO0q60tbTdb97yNMUcx8lkMpZ942nMWRhJsyxrNBqNRuPCwkIwGPz7v//706dPd3d3NzY2Op1On8937ty5O3fulJaW7t27t6KigvZDK5VKURRjsVjq4YgMw0QiEZ7nU3NR0xBZqVS6XK5kMpl6pyiKkUhEpVKxLBuNRkVRTC0SBCEWi6XPNv1KEomEz+eLx+OvvfkQJUkl49vGXP/v51eC0TjHMUjkphiGCEnJqFL86z8dLss1xRLCmmu7NXKZ3+cjtERRrOkYIoqi1+f1qrloQlhz8ZZaLvP7fZL0Ji6t1zaGEFEUvV6PV8FE1mbJ/mVADupsGlqyHq/XoyDxxNqLpDUKud/vl9AaLyGKos/n9arYN9QUS5KkUCiMRmN6Tu8bkm2RdDwen5ub4zguNzeX4ziWZQsLCxsbG7/55puJiQmn09nd3X39+vWtW7e+9957NNeZ0uv1kiR5vV6GYegDxmOxmNfrValUSqWSEEIvaxiGMRqNjx49CgaDqXeGQqFAIFBaWhqLxWZnZ6PRqF6vpzkefr8/FovRsYkZ/ByFQmGz2V7TvvkroiRxLGsOCoFIzBuO8hyLMZEUwzBCMsmxjNWWY8s1J0VpzZ2VOZa1BgWGZSWSJAya77+QJMJxXI4tx5ZrXasl64sxLIsyXeTHks3Jta7dOuuN/ZhrhzqbRiISx3G5OTk5a7FkRYnnWMtChGEYiaBk/0IihOM4my3Hlmt5o8W6PJde2RNJ017kQCBw9uzZYDD4j//4j4IgJJNJv98/NjYmiqJGoxFFcXJyUqPRNDY2qlSqVPqyTCZzOBwGg6G9vb26utrhcAiC8ODBg/Hx8a1bt+r1+mg06vF41Gq1Xq8vKyu7devW/fv3bTabwWAIBoN3794NhUI1NTXRaLS/v7+lpWXPnj1qtXphYeHu3bssyzqdzsyK8809cpwhhCGE4zie43iO4TkOkTRFdzjPcRzHMQzLcWvsrhwt2WW4n7V2sSxKNjtxHIuSzUprtWQZQhiU7HOt0aZ4qeyJpGkApNfry8vLz5w58/vf/14QBLfb/cUXXzx+/Li+vr6qqioYDM7Pz3s8ntu3b3d1ddHgO5lM1tbW1tfXb9q06dSpU3/84x9rampCoVBbWxud0k6lUnV3d3/zzTeNjY1Hjhyprq6ura29c+eOz+crKCiYnZ3t6uoqLy9vaGgQBKG1tfX8+fMul8tqtY6Ojg4MDGzZsqW0tHSld89i9Lc/TUIhrze9e61LJeeQtJGma8Wa2+CVgJLNTj/eQl9rO2rNbfDyQ8lmqTXZFC+VPZE0JZPJmpubeZ7v6ekZHBz0+/1+v3/Pnj1NTU1qtToYDBYVFc3Nzc3Pz8/PzxN6K18Q6IO+d+7cKZPJHjx40NbWxrJseXn5nj17nE6nJEkqlSo3N1ev1zMMI5fL33//faPR2N/f73a7ZTLZ1q1b9+/fr9FoJEn69NNPL1++PDY2Nj4+rlQqDx06tGfPnmVI0wEAAACAZZZtkTQhRKvVbt++fceOHZcvX75///5/+2//TavV0iserVb71ltvHTlyZNFHWJalT2PZu3fv9u3bA4GATCZTq9U8z9MPlpaWlpSU0MRoQojFYvnggw8ikUg4HFapVCqVimZFMwxTUFDwq1/9KhwOR6NRrVarUChwZwcAAAAgK2VhJE2epheXlpZyHKfVatNjWRo0v+CDCoVi0azS9PX0WaLpejQajUajWboSjuN0Ol36cEYAAAAAyD5ZG0kTQkpKSoqLi3+ySzgLcnQAAAAAYPllZyRNveTEFwijAQAAACADSOEFAAAAAMgEImkAAAAAgEwgkgYAAAAAyAQiaQAAAACATCCSBgAAAADIBCJpAAAAAIBMIJIGAAAAAMgEImkAAAAAgEwgkgYAAAAAyAQiaQAAAACATCCSBgAAAADIBCJpAAAAAIBMIJIGAAAAAMgEImkAAAAAgEwgkgYAAAAAyAQiaQAAAACATCCSBgAAAADIBCJpAAAAAIBMIJIGAAAAAMgEImkAAAAAgEwgkgYAAAAAyAQi6VcgSdL09HQ4HF7pDQEAAACAlYdI+hUIgnD27NnR0VFCiCRJK705AAAAALCS+JXegLXB7/d3dHQ8evSopaXF4/HMzMzU1tZarVaWxaUIAAAAwN8oBII/LRgMnj179syZM263m+d5v99/9erVr7/+enZ2dqU3DQAAAABWDCLpnzY+Pt7e3l5ZWfnJJ59YrdYdO3bs27dvcnLyyZMnyWRypbcOAAAAAFYGsjt+2sLCQiKRqKmpKSsr43k+Jydn/fr1DofDaDQyDLPSW/dzMU//AyprdkjW/JDXJWt2SNb8kNcla/YGSnaRrNkhWfNDXovs2xWIpH+a1WpVKpWdnZ35+fmSJCUSCY7jKisrV3q7Xg8p7T+gsmNvoGSXErNib6Bkl8qOXYGSXSo79gZKdpEs2w+IpH9aUVHR+vXrb9269eTJk7m5uY6ODovFkp+fr1Ao6BtS83gwDPPae6nf0CQhkiQRhiGSxDIM/S/LjuyMMT/uEEIru7TWZmn5sWQJQckuwjAMzzIsQ4i0lktWQskuxjAMmwUlSwjLEJRsulRrLK3d1pg8bY1ZlOyP/lJh33yxLk/iALPWjswVIEmS1+vt6enp7u5ubW1lWdZoNFZWVr733ns6ne7KlSuTk5MMw4iiaDKZ3nrrLZVKJUnSaym/aDTq9XoTicSbCNBZlvGHY32Ts8mkSNZ+msrrwhAiSpKMY+uK8tRymSitsX1DSzYQjvVMzIqilG130X4GhhBJkuQ8V1OQo1bI12jJ+sKxRxOzSRF19i9onVXwXG1hrmrN1ll/ONY74RLFrLvz/TOkSramMHeNtsYcy3rDkb4Jt4hY66m/FGtBrlrxpopVkiSFQmE0GuVy+etf+19Dn/RPYxjGZDLt2LGjurp6cnJyw4YNiUTi9u3bFRUVTU1NHo9namqKZdnkU6/xq2UymcFgEEXxNa4zndnErCspXFNN0xtH+xAkQqIJYe1eZ5pNTCVK9q+lSjaWENbuWc1sZqpQsn8ta+osWuNFJEJvna7tkjWZTVUlRSjZlGVrijmO4zjuza0/BZH0TxMEIZFIKJVKg8Egk8lKS0vLy8sHBgYePXpUX19/5MiRXbt20T5jmUymUqnI67uhwHEcXeEbJaETJA2t4Swhatkbv5B901Cy6VIlq0LJZhfU2WyFks1K2dQUU4ikf1pnZ2dra+uuXbucTqcoiolEQhCEeDwej8cZhjEajSaTaaW38WdB9V4ka3ZI1vyQ1yVrdkjW/JDXJWt2SNb8kNcla3ZI1vyQ1yLL9gYi6Z+m0WgmJiZOnjzZ3NwcDof7+/t7e3uDweChQ4e0Wu2iW05ZMC8eAAAAALwMjDj8afF4/OHDh3fv3nW73dPT03q93m6319bW7t6922AwrPTWAQAAAMDKQCT9UhKJhN/vd7lcv//977ds2bJjxw69Xq9UKtEDDQAAAPA3C9kdL0Umk1ksFrPZXF9fv379+tzc3JXeIgAAAABYYeiTflmSJEmSFAqFFAqFTCYjSIkGAAAA+NuGSBoAAAAAIBPsSm/AGoMLDwAAAACgEEm/GmR0AABAFkOHESyPrDnSkN0Br4YeMEuvKJ73OqxmS6s/SnB5PLPhxc6HF3hxG7toqSRJGRxOP/mpzFabNVakwczKfZ5lPwqRNACsLpIkCYIgSZJMJvvJ87ogCIQQnucJIYlEgmVZjuPWehstSdJq/i2rcPMSiQQhhOf5VbI9a5cgCIlEguf5V92ZyWRSEASZTMayuNe9VqWqNm1RXxV9CDTP8xzHvcx3xWIxhmHkcvlar7aYBQ9eTTAYDIfDdDrt1IvxeNzr9SqVSr1ev4LbBq8kHA57PB5RFGn3AMdxSqVSp9PRqWlWUDwe7+npEUWxoaFh6caIojgzMyOKYm5uriRJHR0dcrm8qqqKENLa2qrT6datW7fiP+EnBYPBQCCQSCTozud5Xq1Wa7VaGpvGYrHOzk6VSlVVVZXxbxEEIZlM/vzIhp5c6UbSE95r2bzXKJFIdHZ2chxXU1Mjl8tXenPeiEQi4fF4WJY1Go1Lo5xoNJpqgVmWdbvdoVAoJydHrVY/b4WiKE5OThJC7HY7z/M0rJmamhoeHo5Go5WVleXl5Yvim3A47HK5tFqtzWZLf50ew2NjY6Ojo/X19RaL5fX97lUkHo97PJ5YLEb/pA2mVqtVKBRv7kuDwaDb7TYYDGaz+c19C3laiH6/v6+vz2g00hb1Vbnd7t7e3srKyvz8/Bf0Ooui6Ha7nzx54na7zWbzxo0b08OJtQiRNLwsWjHu379/69at9957r76+PnU/cXJy8j//8z8bGhreeust+rb0ex3p1emZd8ee+eZnPob9eat95qLU5iHzZBFRFFmW7e7u/vzzz3mep8EHwzA2m23r1q0NDQ0qlYq+8wVF8zJ/vuAwWFoudKJJhmHi8fiDBw/i8Xhtba1MJltUjrFY7Pz588lk8sMPP5TJZHfv3lWr1aWlpQzDXLt2rbCwsLS0dDWEdy8gSdL9+/d/+OGHZDKpUChEUZTJZA6Ho6GhYf369RqNJhqN3r5922g0lpWV0SgnFQ0vPZifd+QPDw9PTk42NDSYTCbyEjXoeauKRqMPHjwwmUzr1q2jwXQsFrt586bFYkltHsMwL846WPqNL75R/kqfisfjd+/e5Xm+rKxMLpen3vPyjQlZxe0D3b0ul+tPf/qTKIqffvqp0+lctLS1tfXEiRNbtmx59913VSrVvXv3BgYG3n//fafTuSigofWIYRhBEH744QdJkn75y18aDAZBEDo6Ok6ePKlQKEpKSmKxGK2P6d/idru/+uqrurq6Y8eOpV4nT9uToaGhK1euOBwOi8Xyk8lLLz4OV6fp6enf//738/PzGo2GVkm9Xt/U1LRt2zadTveC083LnODIc2ro5OTkN998s3nz5v379z9vzS84nl9yP6fK2uPxXL9+3el0piLpl6mJqdenp6d/+OEHtVqdiqSfeXbwer1/+MMf5ubmioqKWJYVRfF52/+CHbWqIJKGVxONRv1+P72XmpJIJHw+X+pinTz/WH/m6z/zxectSr2yCiveahCNRhOJRGNjY01NTSQSmZ2dbW9v//LLL3meb25upmfHlymFF/z54sNg6QfpKyzLxuPxeDy+aCX0HzKZrL6+XhRFpVJJ70SnjsZ4PL7oyFy1wuFwOBzes2eP0+mMRCJut7ujo6O/v1+lUjU2NtIOQvpbFgWpLzjOF70yODh4586dsrKy9N6sF9eFpVenDMOIonj16tWqqqqamprU6ZnuanqEvNI6M9uSF2wh/Uc8Hqd3V575tldtTFahRCIRCoVGRkZ6enpKSkpI2vZHIpGenp7BwUF65BBCysvLjUaj0Wh8ZumkalljYyMhhD6sV5Kkvr4+tVr9X/7Lf6GPHku/QU8/YjAYtm7dmv5gMvo6fWcymYzFYjQqIpmW72omCEIwGCwvL9+wYQMhxO129/X1nTx5kmGYvXv3pq7eX6aGPu/Fpa+bzeatW7cWFhb+5Jp/zkGefpwsakVfqf6KohiLxWjG3TM7sOifIyMjc3Nzx48f37JlC8dxtJsgs4ZilUAkDa+GYRh63C/qsUhFXfTUGwgE/H4/DXdMJhPt9RQEwe/3K5XKaDQaCATkcrnBYFCr1cFg0Ov1SpKk1Wpp6x+NRsPhsEqlCgQCkUhErVbTe5oejycUCrEsazAYtFptaqsSiYTX641EIhzHGY1Gek/T7/ezLCuXy/1+P8MwBoNhlXdVLjOGYTQazbp16zZv3pxMJgkhBQUFv/vd7x4/frxhwwaWZWOxmN/vj0QiPM/TkqJF7PP5RFHU6XS0P9Lv90uSpNPpOI5L/UnvMvv9/kAgIAiCVqs1GAz0rnQkEolEIlqtNhwOx2IxrVar0WiSyaTX6w2FQjzPpyfgiqIYDAYlSaIHA/2isrIyURTlcrkgCOnngJ+M6lYPhmGUSmVtbW11dXUymRRFsaSkhO782tpaWptYlhUEwePxyGQyjUZDwx260/R6Pd2ZkiTRRBFBEDQajU6nk8vlNEyfn58Ph8PT09MymUylUun1evpmv99PCNFoNAaDIT1aotUzGAyyLKvT6Wimls/nm5mZCQaDHo9nbGxMqVSazeZUfff7/QsLC/F4XKfTGY3GpZde9Bo7HA6n6ix9QyQSicViCoUiHA6HQiGZTGYwGFQqFV0qCEIgEAgGgwzD6PV6rVab6pIPhUL0ol2hUBiNxtTNk0VFv7CwkEgkjEajQqGgOy0QCMRiMbVardPplEplPB4Ph8NyuVwURfq8LaPR+CYL/DWgJTswMLBlyxaaX0EvdQYGBh4/fqxQKFJ7yeFw2Gw2rVZLqwzP84FAIBqN0p9J76SzLFteXk4I4TguFApNTU2NjY3p9fpYLLawsKDRaORyOW2lCSG0Ymq12pqamlQyA62bwWAwkUhYrdb0zu9kMklLkBBCb4nQRiA1niEQCNCWXKfT6XS6tVJteZ4vLS3dvHmzTCZLJpMbN2783//7fz98+JC+EovFNBpNKBSKx+P02CNp5yaZTKbT6ehpK5lM0sObZVmPx0NPTzqdLhqN0kOXVk+ZTKbX62tra+kZLRKJhEIhrVabyoXwer2EEJ1Ol0gk6BmTpl+qVCqTyUTPmIFAgNYvjUbzzB8Vi8V8Pl8kEqF3w9JrMb0l5fP54vG4QqEwGAz0qxOJxKK2InXspT6eOoPwPK/X6+nZQRCE+fn53t5e2l75/X56yiaEhEIhv99Pj1Ja6+mOCoVCkiTJ5XKPx5NIJNRqtV6vf6MZNRlAJA2vjGEYOhQsVd/SR4YlEom+vr6bN29OTEzQgGn//v1btmxRq9Xz8/MnTpywWq3z8/OPHz+WyWTNzc0NDQ0tLS19fX2BQKCsrOzdd98tKSl5/PjxhQsXiouLBwcHZ2ZmDAbDnj17cnNzL168ODExkUgkNm3adPjwYavVSgiJRqN37969efMmrZZVVVV79+7Ny8s7d+5cKBQym80dHR2FhYVHjx612+1ZNmT450vlVLAsW1RUZLfbvV4v7Vu6cePGvXv3fD6fXC6vrq7ev39/QUEBIeTKlStTU1Pvvfdefn5+IpE4deqU3+//5JNPrFZrKBT69ttvdTrd4cOH5+bmvv/++9HRUUEQcnNzd+7cuWHDBoVCMTAwcOXKlaqqqkePHsVisYMHD9bW1ra1tV26dMnj8Wg0mpKSkoWFBXrySCaT165dm56ettvtnZ2dFovl8OHDra2tsVjsnXfeYVl27Y6ZpvWIEMJxHMdx9GyUTCZT/b70LHv27NmKiop9+/ZxHJdMJh88eNDT0/PBBx/Y7XZRFMfHxy9cuDA8PByPx61W69atW7dv3z4xMXHhwoXBwUG/3//dd9/JZLKNGzfu3buX3n8fHx8nhOTk5Ozfv7+hoYEGNzSj5ubNmy6Xi+f5oqKiI0eOFBUV3bx5s7W1dW5uLhKJTE9PazSajz/+mAbZLpfr22+/nZqamp+fz83NfeuttxobG9ND82Aw+PDhw/v378/Pz7MsW1xcvGvXroqKCp7nHz16dP/+fYPBMDMzMzU1JYpifX39/v376RHV1tZ248YNl8vFcVxRUdHBgwfLysoYhpmamrp8+fLjx4/pKXzz5s0HDx6kkX36jh0cHPzuu++MRuPbb7+dk5MzOzt78eLFR48ehcNhg8GwcePGAwcOTE9PX7p0yWw20yuETZs2HT9+fJU3DgzDWK1Wt9vd09Ozd+9e+qsFQeju7o5EIjabjfYFEkLu3bs3Ojp69OjRnp6eyclJpVI5NjY2MzOjUCj27Nmze/durVYrCML333/Psuzhw4f7+vquX78+NjYmk8l+//vfa7XaQ4cO2e32M2fO6HQ62hdeU1OzadOmH374Yd26dfv27RNFcWho6Pz58yMjIwzD1NfXe71euveSyWR3dzettjzPV1dXx2IxnuePHj2ak5MTj8fv379/+/bt2dlZnudLSkoOHz5cXFy8JsYpSmk4jrPZbA6Hw+VyRaPR9vb2np6e4uLivr4+QshHH31UUlISDAbv3Llz9+5dv98vk8mqq6sPHjxot9t9Pt/58+d9Pp9Op+vu7o5Go3V1dVu2bBkbG7t79+7CwkJOTs7x48cbGhomJyfPnDnT1NS0c+fOvr6+a9euHT58uLa2lm7D999/L4riW2+9NTo6ev369by8vOnp6ZGREZPJtHfvXoPBcOvWreHhYVEUN2/efOzYMYPBsOgX+f3+69ev3717NxwO2+12g8EQiURSS+fm5q5du9bR0RGNRlUq1aZNm3bv3q1Sqe7fv3/37t35+XmGYfLz83fu3FldXU1bs1QXWyQSuX79eldX18aNG7dt20Yveufm5r7++uu+vj56plCpVE6n8/jx4+Fw+Ny5c8PDwz6fj+O4pqYmen4Ph8OXL1+emprKy8vr7u52u90mk+ngwYNbt25dVSMiEEnDK4vH49PT02azmZ71GYaZnJyMRqO0GQ2FQrSO7dy5k+aDnj17Ni8vr7q6OpFIjI2Ntbe3b9y4cf/+/Y8ePbp48WJPT49Wq920aZPb7W5vbz9//vw///M/JxKJnp6eiYmJhoaGioqKtra2EydO0Hq+Z8+e3t7eO3fuGI3GY8eOSZLU1tb23Xffmc3m3bt3+/3+Bw8eJBKJ48ePe73e27dv2+32vLy80tLSVPcVpCy6KJqdnfX5fOXl5SzL3rhx48KFC1ardffu3fPz8y0tLdFo9OOPPzabzWq1emRkZGZmJj8/f35+vqury+VyHTp0yGq1zs7ODgwM7Ny50+/3f/PNNxMTExs2bDAYDB0dHadOndLpdDU1NbFYrKenZ2RkxGKxVFdXW63W/v7+06dPsyy7ZcsWpVLZ1dXV2dm5Y8cOQogkSYFA4M6dOzk5OTk5OeXl5Wq1enZ2lt7NXxNn3+cRBGFqakqtVicSiWAweOvWLZlMVl5ezvM8vUXOMEwymZyamqIdfoQQ2uU/MzNDM6kWFha+++67gYGBzZs3m0wmWn30en1hYeH69etDoVAikVi3bp3JZCorK5uYmDh9+rTH42lubmZZtr29/dtvv1UqlXV1dZIkdXV1fffddzzP79q1KxAIdHR0fP3115988onT6YzH47Ozs7m5ufX19Wq12mQy0Xu4Q0NDhJCNGzf6/f6WlpZTp04VFhbm5OSQp32lw8PDZ8+edTgcR44cCQQC7e3tDx8+LCws1Gq1NPLgeb6hoWHv3r0jIyP3798nhHzwwQcjIyPfffcdwzC7d++ORqMtLS1nz5799NNPc3Nz79+/Pzw8XFtba7Vau7q6rl69arPZ9uzZw6SNtRgcHPzTn/7EcdyhQ4dsNlswGDx//vzDhw/r6uoKCwt7enouXbpkMBisVuvjx49DoZDRaHQ6ncXFxSt1GLy8ZDJZUVFBCOnt7W1ubqa3KcbGxgYHB9etW+f3+1OXYV6vd2pqivY13rx5Mycnp6mpqaqqqr29/YcffsjJydm4caMkSS6Xi95CLCgoqKmpmZ2d1ev1jY2NWq02Ly9PFMWRkZGFhQWtVltUVFReXp5IJCYnJ2kRu93us2fPjo6O1tXV2e32kZGRjo4Om83GcdzY2Njp06fj8fiOHTsUCkVPT09HR0d1dTXN1+ru7j5z5oxSqdy9e3coFKIjIv7u7/5uUa/2qpUaWEII8fl8c3NzOp2OZVmv19vd3f348ePc3Nza2lqdTicIQltb2+nTp4uLizds2OByuTo6OpLJ5GeffSYIgsvlamtrq62t3b59++joaEdHR19fn9lspvXxxo0bly5dcjgcsVhscnKyrKyMEBIOh6empkKhUGpj5ubmkskk7bgdGBgYHh7esmVLYWFhW1vbN998o9Pp8vPz9+3bR6+U8vLydu7cmX5jNpFIPHjw4Pvvvy8uLt6yZUsoFGpra5udna2uriaERCKRa9euXb16df369QUFBfQ6XKFQ5Ofn//DDDzqd7siRI5FIpLW1tbW1NT8/n440pdcYiUTiwoULt27dqqmpWb9+vUqlojVUq9XW1tYGAoHx8fGamhqLxULLnf789evX5+bmdnd337t3T6vVHj9+PJlMzs3NtbS0OJ3OhoaGZDLZ0tJy7tw5q9VaU1OznOX+Yoik4dXQfrKLFy/ev38/lQgVCoXcbjcNaxQKxZYtWywWS0FBQSKRkMvlp0+fnpqaWrduHY3YcnNz33nnHbvd3tDQ8D/+x/9wuVwffPBBXV2dz+fz+/1DQ0PRaJR+UUVFxfvvv6/RaAoKCv7X//pfNpvto48+ys3Nrays/M1vfjMyMkLvVnd3d2u12l/96lfFxcWSJMlksvb29omJCdo9tn///l27dqXm5Vn9LfVyisfjk5OTVqs1Ho/7/f7Lly8TQurq6sLhcGdnp9Vq/eyzzwoKCkRRVCgUd+7c2bJli9lsLisru3nz5sjISG1t7fj4OO09HRsbKywsHB4eJoSUlJRMTU2Nj48fPXp03759MpmsqqrqN7/5DR3WTXtei4uLf/WrX5nN5kQice/ePY7jfvWrX9Eg3m63P3nyZFHO69atWw8fPqxUKlPzXazgfnstotHouXPn7t69G4vFZmZmPB5PY2Ojw+F48afSf/jU1FRfX9/evXs//PBDlmXLysr++Mc/9vT0NDY27tq1y+12z8/Pb968uaysjOO4kydPzszMfPzxx/QSpbS09PPPP+/t7V23bh3Lsq2trfF4/Ne//nVVVZUgCHl5eWfOnHny5Mnu3btzc3NbW1uLi4sPHjyoUCg4jqPpPSaT6b333quqqqLZHRcvXnzy5InVak1d3tDEj6ampu3bt/M839jYSBMqyNNhguvXr//lL3+pVqsFQfjzn//c19c3MTHx6NGjUCj0z//8z/X19YQQrVZ76dKlycnJvLy8mpqaurq64uJi2pf5b//2b4ODg5s2bUrl6fb29l66dInjuF/84hf0dy0sLHR1ddXX13/22WcqlaqhoeH//t//293dvWvXrlgsZjab//7v/76kpGT1Nw70ssput5tMpmvXrvX392/cuFEUxZ6enkQiUVdX19ra+sw08UQisXfv3n379tGd9vnnnz9+/Liuri5194DjuOLiYo1G09vba7PZDhw4oFarOY6bm5uj9/Q/+eSTqqoqhUJBazc1MTExMjJy+PDhAwcO8Dw/MTHh8Xi8Xi/HcQMDA1NTU7/85S/37NlDCMnNzZ2amkpF+R0dHYIgfPTRRzU1NclkUqPRXL58eWZmht5gXOXo5cfAwAANndvb2z0ez759++gxTAhpbm4+evQoTb7yeDz37983m83/9E//ZLFYQqHQyZMn+/v7XS6XSqWiKTFHjhypq6ubmZn56quvOjs79+7de/jwYY7jRFG8ffu2y+V6+UktJEnasGHD8ePHVSqVxWL58ssvOY47fvx4fn5+Q0PD//yf/5PmBaWP4fb7/W1tbUVFRf/yL/9CR50aDIavvvqKvmFmZobG+v/4j/+oUCjcbvef/vSnwcHBSCQSDod37ty5a9culmVp50gq2ZJhmEAgcP369Zs3b9bV1b399tu0ZGnp63S67du3Lyws+Hy+7du3FxYW0uENlZWVeXl5dC6g4uLif/u3fxsaGkomkxzH0bygAwcObN68md4H+PLLL4eHhxFJwxomSZJCoSgsLMzPz6c9ZyzLzs3NeTweWv2USmVJSYnL5bp3797CwsLw8DAdK0MHMXAcV15ebjabWZY1mUw2my0Wi+Xn53Mcp1arCwoKpqeno9FoMplUq9UVFRW0TbdYLDabzW632+12QojJZMrJyaFDsvx+v9vtTiaTo6Oj4+PjtP3y+/1zc3OEkOLi4urq6rU+w84bQpu8GzdudHZ2xmKxeDzO8/z+/furqqpGR0e9Xu/69etp0dCcmQcPHrjdbvI0C3N0dHRhYWF0dLSoqMhgMIyPj3u93rGxMavVajAYHj9+HIvFAoHA/fv3GYbxer30VgbNCdbpdI2NjTabjbbmLpfLarXS7lhCSGFhIb0ootuZTCbz8/Nra2tpnl/qFvZaRyMYh8MRj8cLCwvHxsYmJyc7OztzcnJ+MqSjvbBzc3O0ct25c4dhGJ/PF41GZ2dnacojjWhp8cViMbfbnZOT43Q6aQi1bt06u90+NTVF7zvPz8+XlJTY7XY6tKCmpub69es0k4retaBTzNLP0tRbh8NRWFjIcZxKpSotLaX95ekbmZubq1Qq79y5Q3MPrFZrYWFhKr2bpvHodDp6neZ0Oh89ejQxMbGwsCBJ0tjYGM2PX1hYCAQCc3NzkiQVFha63e6uri6PxzM3N0fTwQVBoCmYk5OTX375pc/n++Uvf1lZWUmT+Ofn52lO9sOHDzmOi0Qi8Xh8bm6OZgCXlZUtnettNaMz/d27d6+3t3f9+vXz8/OPHj2iHcb37t1b+kMkSTKbzU6nk+aVFhcX63Q6r9crCMKiqfTorCzPLOV169bRJjQ1EkaSpNnZWY1GU1FRQS+NHA5HWVnZw4cPw+Hw7OyszWYrKSmhR2BJSUl+fj4drBKJRDweDyFkZGSEjo3xer2p5nqVo/33LS0tT548EUUxEonQUZsbN25UKBQ08XfTpk2pdKNoNEqD5p6eHjrwIxAIeDwel8tVUlKSTCZtNltZWRnLslar1WazWSyW4uJi2mecl5cnk8loxvPLbBvtzqiqqqKZV/n5+TqdrqSkxGw20yRsmimRGg9KhcNh2s7TQQ60RUoNUPZ6vR6Px2aztba2JpNJOt4gGo2WlpaazeaWlhZRFOmWOxwOehjQPo7r168HAgGVSrVz586ltxroSBh6sKWGXdFhrF1dXV6vd3p6mmaE061NJpMmkyk1wWVubq5er6cp+KsHIml4NXQw2bZt2zZs2JC6Bz0wMDAyMkL/DAaDV69epb2MKpWKXnGmd5ak5kNlGEan02k0mtSf9H4lXQ8dj8g8HRtuNBpTk1XTyXfD4TCd4iAWi83Nzd24cYN2e4iimJeXp9FoaMSmVqufOYgY6EVRWVkZDYNUKlVhYWFBQQEdB0b7J2imAcMwJpNJpVLR1o1eLLW0tExNTU1MTDidTqfT2dXVNTU15XK5qqqq6CjSWCzW3t7e19dHDwCtVmu1WulpmBYu3YxEIhGNRukdUlpScrlcq9WmSk2SJDrX8tpNiX4mhUKxY8eO6upqQRDorZ5//dd/bWlp2bp169LnGqSyO+jYUIZhaAwdj8f7+vpGR0fp4S2Xy/Py8tIvNujOp91IRqORTuBFB25qtdqFhQV6ERWPxy0Wi1KppF9kMpmUSiUdTJZaVWrqcfK0Sqa2kwbui87ThYWFhw4dun///vXr1+PxuFar3bdv35YtW+goQKVSSesmRWcDpMOtaLpIKubOy8ujY6paW1uvXLkSDofpJL50V5Cn/bVzc3OFhYWSJPX29jY1NeXk5NDRhIlEYnh4eH5+nr6ZXpjRmQfX0Fg3ShCEgoKCioqKwcHB8fFxmqS+bds2g8HwzNpB29vU8Cy1Wi2TydInOUmR0oaPp/5Bx4rRipk+rpeONZTJZPSAoe+kAxtodabDiOkitVqtVqt9Ph/DMOFwmD58oKWlJRWv5+fn0+Z69Sdr0XT/6upqOpl0bm5ucXGxVqtNPRiINmvS03kGQ6EQPSGmhtIWFxenenaUSiXdSzKZTKFQqFSq1KBAhUJBa8SiDUg/n4qiSANcuogOz6X/pgVNx/nRRVqtNlVfUhKJRCwWo8E6beflcnlqG+LxeCQSGR8fDwQCqffn5+c7nU6agX3z5k06yHL37t1btmxJdUu73W673b6wsDA4OFhYWLgooXnpsScIQldX1/nz5wOBAJ1TP/13pc4XdK/KZDK1Wr10z6wsRNLwyhiGoZeVqYYv/WlYg4OD9+7dKysro2MEBwcHv/jii/TjftEA/+dNvJC6Zn3xO2mHWXl5+WeffZaaTIf2w/X09KQuf7MsCHtdNBpNY2Pjtm3bUnPe0R1FL28WFhbo/TVCyNzcXCgUSsW7FRUVd+/epeMRnU6n0Wi8ffs2HbZSVlamVqvpSP/33nuP9r7QIqOnB+nps0joNsjlcpVKRW9E0P4YOrRfp9OltpMebFlWjql6RM80NpstLy+vp6cnHo+nd0TRFKbUWC6aWUEI4XleoVBoNJrDhw/TaQEJISzLKpXKRdOBMwxDT9ITExPBYJB2U9FkKjqRBc/zSqVydnY2dVqlXbl6vZ7OSECeBgfpdTC9ei6KR+mfKpVq+/btzc3N09PTbrf7xo0b169fLywsdDqdDMNEo1E6uIKuh+Z0GY3GhYWFgoKCTz75JCcnh15jsCyr1Wq9Xu+NGzeUSuXbb7/tdDoFQfg//+f/pM+55nA4PvvsM5qke/PmzWPHjtHjTalUbt++nQ7boG/W6/VPnjzJ+EFuK4Ue/7Rbure399atW8Fg0Gq1VlVVLQ2SUl5QTC/zjbRtT/8g3QadTheLxSKRCF0qCALt2qCzxAwODno8Hnp3ZW5ubmFhgbYbSqWSdnx++OGHdIgk7QpNzQu0mtEqUFNTc+jQoUVj7il66kn9SStjfn7+p59+Ss9N9GDW6XT07s2iM1qqg5Y8p6ToZD70uYDk6SRICoUi9eb0j6dvzNJNpei1EB2/S1+hbS/9N20WmpubDx8+TJ/NxHEcbXbKyspqa2tnZ2dnZ2fv3r1748YNu91O0y1EUdy1a9fevXtPnDhBJ/inrz+vfGlX/eXLl1mW/fDDDysqKqLR6G9+85v0/ZBq/1/wW1bWar8EhNWGHsepHrLUP1LHN+1Vqq2tpVMsdXd309G46R9ftLb0P1PR8NIweumfoijq9frc3FyaXmKxWCwWC719tvQjsAjdyXQ/p2ado/+n9+IHBgb6+/vpuKWOjg5JkuhgI0JIYWGh2Wxua2tTKBQ5OTkOh0On07W3t6vV6tzcXLlc7nA4JEmamJhQKBS5ubkMw4yNjcXj8VTLmCodjUZjt9tnZmY6OjpoD+LAwMDo6GgqynnmGeuZl1VrqMTpdtIpvYPBoM/ne/z48cjIiNFopM8WoW+gE9gNDw+73W46U0d/fz+td/SmsCRJU1NTKpWK7uTJyclQKESrm0qlopkMgiAoFIq8vDw67QPtn25ra5uamiooKKCd0zRdp7+/Px6P+3y+hw8fBgKBoqIiep5WKBTz8/OBQCD1DIWlu3pRhaVzO1y7di0Wi1VUVOzcubOhocHr9dIEIYZhQqHQo0eP6LwHCwsLvb29PM8XFxcbjUav1+vz+cxms81mCwQCMzMzyWSSzqhYVlZWX19P75jTKUHI095Tu93ucDh27txZVVV18+bNvr4+hmFsNhvP8zMzMxzH5ebmKhQKmtBCZ7FdK0cLleq8oHeBbt269fjx45qaGqvVmkwmF0XMzws7XlBfXvzm1DbQNj83NzcajXZ2dtJckeHh4f7+fo7j5HJ5QUFBOBxub2+fn5/3er2tra20OouiqNFozGbzwsICvaKzWq0+n296elpaIyMfaBE88+BZuq9UKhWdOkYQBJvNRq8S6XwmS9//zD8XFSK9STgwMEAnguzs7HS73alHIy3q0X/xCZTSaDR0wPfo6Gg8Hg8Gg48ePaKz8hFCjEajTqebnp7mOC4nJ0ej0UxNTXk8nsnJyWvXrvn9/oqKih07dtTV1dHZMFNfVFpamp+fv3fvXoZhrl69urCwsHRfpW9tLBabn58vKCior6/X6/Xj4+P0Fjfz9ClCP7mrV9xauiKH1YBeEy+6t0LH8tM7XEVFRRqN5urVq3NzczMzM+Pj46mZ3qW0502Qp893SM/9SCQSkUiE3sKORCKpjpZnfpDO3mAwGJqbm4eGhv7jP/6DDlHq6uoqKSl566234vE4fVLXsuyYtUcQhGg0mppFP71tMhqNGzZsOHv27JdffllRUeHxeAYGBjZt2pSfn0+e9nEWFxd3dnbSCVXovGn9/f30XichZN26dU6n8+rVq263Oy8vb3h42OPxfPTRRxaLJb1w6Z272trarq6u//zP/6ypqVGpVCMjI/S2I92YpeUYj8cFQaCvxOPx1BgamqiwfHvwZxAEwev1Xrhwobu7m85tPD09HY/HDx06pNPp6ASudOapurq677///vPPPy8uLqZ381OJLoWFhfX19ffv3w+FQvn5+UNDQ16v99ixY3l5eYSQoqIivV5/4cKF0dHRdevWlZWVOZ3OS5cuTU9PMwzT29trsVjq6uro3tu4ceP4+PjJkycHBweDwWB/f38qh1gul1dUVLS0tJw4caKgoICO+1m0q2naaHpWiSiKdAqt3t5eOv83nduBToRMc4QeP378xz/+kaZrj42N7dixo6CgQBCE/v7+U6dOjY6OKpXKjo4OeoVst9tzc3MfPnxIWxs68V8qUYG2JKIoqlSqt9566ze/+c2pU6foBeGGDRtaWlr++Mc/lpeXT05OPnny5Pjx40ajkV7GLHfBZ0oUxdQGG43G6upqOjFRatxVeonQ6iOKYuofqfXQt6V2GklLHFpUpota3dQ20FIuKCiorKy8fv06naFyenp6cnLSYDDE4/GqqqrGxsaHDx/SAXP01lZqJU1NTaOjoydPnhweHuZ5vrOzMy8vLzc3l6b6rLYIKV2qCJ6ZWkB3dXomoV6v37Vr14kTJ373u9/V1tZGIhFarSorK+m+Te/ATv84eXqeFQQh/VknNBmdTk5lMpmGhoZ8Ph897JPJZDQaTW2YJEnph7f09FFK6akghBCdTrdhw4avv/763//936uqqqLR6MDAgEwmS33djh07Ll269Nvf/ra8vNztdg8PD+/Zs8dkMl2+fPnhw4d0Mr729naDwZCq13RrJUmi05KcPXv28uXLb7/9dnr50hSg1MZotdqSkpLe3t7Tp08LgjAyMkJ731NHafpD35YelqsBIml4NXTODTqVeqrV0+l0dHQaIaS4uPjAgQMPHjxoa2szGAxHjx4dHh6mI5lUKhV9G/0gz/MVFRU0S4wQwrKsw+FoampSqVRms7mxsdFkMqW6Levq6uhkxoQQuVxeXl4ejUbpxf369esFQbh7925fXx/P85WVlTt27LBarWVlZYIgpOYKWP59tWqlOp4bGxvTB1anv2Hbtm0ymaylpWV4eFipVO7bt2/Xrl10MA3tEqivr/f5fI2NjbSJbGhoiEajDQ0NNF3SbDZ/+OGHdPZiOmfioUOHKisrGYaxWCxNTU00oZB+b0VFxccff3z9+vXJyUme5+vr62tra2liCcuyJSUler2eZh3QzoyKigqaCsLzPB0ORQ+huro6q9W6NMl4FXI4HI2NjcFgcGpqinYwl5SU1NTUNDY2sixLp57VarU6nW7Xrl2RSGRoaOjRo0dOp/PDDz90u910J1ut1uPHj2u12tHRUZfLpdPp9u3bt379evoVJSUlR48effDgQX9/v1KpPHjw4Pvvv3/z5s3JyUman7Nnzx5aARmGoZ+6d+/eyMgIx3EbN27ct28fHeCrUCi2b99OCBkaGgoGg3SUfXV1dSoLkxCi0Wg2btxII3iKTtZBCGltbe3o6OA4zuFwbNq0ic43J0kSfRa6KIr0qSKHDh3aunWrVqutqqp69913b9++TWdIcDgce/fupT3ue/fupXPPq1Sq5ubm9evXKxQKmUzGcRwdYkgPg8LCwmPHjnV1dc3Pz9vt9sOHD9NZzB88eKBWq3fv3t3c3Ox2u+vq6miTtSbQNjA1tUt5efnhw4ctFgt9+h0dJJp6hE1BQYEkSVqtli5NZb7SmRZoEi3DMLQ+0p1G15D+sB6FQlFbW0ubbvqKVqutr6+n67TZbO+++65OpxsfH+/r66OXaj6fT6PRWCyWX/ziFw8ePBgaGpIkadu2bXK5nD42ixBSX1+fTCbv3btH+7ALCwsPHDhA47BV3krrdLqGhgaHw7E0n5thmOLiYvq4qNSLPM83NTXFYrEHDx50d3fLZLK6urrt27fL5XK6b9MnpCssLBQEgdZrQojFYlm/fr3VaqWDs2m5m0ymw4cPy2SyyclJv99fX19fVVWl0WgUCoXNZmtubk7lSatUqvr6+vz8fLqpPM+Xl5fTWzckbT/TmeYlSXr48OHAwIDJZNq+fXsikaCDDlUq1e7du+lVd3t7Ox1B2NzcTFOW7927197ezjBMTk7Opk2b6HM3zWbzxo0b6XgYjuN27doVDAbpTbDUT2MYpqCggE6Nl9ra/fv33759u7+/X6FQVFVVbdiwgaa00RY+fXyhRqOpra1Nf9DmapBVeYewetBLaoVCsWyZiPSpUTTpdnm+8W+BKIrhcJgOiMlsDXTwYvpj6l7wXdFolOO41fb8qhUnSVI4HCZpIVHqdXpSpDtZo9EsvYoQRTGVbUyLgK6KnthSa0j9IxgM0vxIsuROBe3TetW8CLrxDMOkf+Pt27fPnj379ttvb9u2jY5dW1TodEwbx3FLf3IwGExdO708mtSbeswevC50hEN6MYVCodnZWaPRSJ9u+OTJk9/85jd2u/0f/uEfUoMfaPmmxilmN5pELpfLX8ssUnTXva61UYIg0GcJP7Mbgg7rV6lUiypdKBRK1eufj1ZthUKxqh658pLQpsAbwfP8Mp+xOI5LHze9nF+dxehgr4w/Tkfuv/x3/S2cVjPzzCf9po7zF+y31IhGKlUiS0cQ0hdpcT8zPTF9YPHLYxiGbnx6xaSJB/SmP52xZ+lm0w62pdWZRmOvVM0lSaKjD1914+HF6CDC9D8ZhvH7/T/88EM8Ht+wYQN9Oonf79+/f79Op0uVWqp8/xbQJ5i+rrW9oGpkjA4hfeaiF9SdpfX652AY5nnbsPohkoZsgzB69UBZvBavcTcuCp0zW/ozv5oQYjKZqqqqUjPXvuSnfnILX3Il8Lo8s3RMJlNdXd3du3evXLkiiiLP84cOHWpqalqhbVx5b+jwexPNQgbfgspFkN0BAADLLBaL0dRJ9BNnJVEU/X7//Py8IAhGo9FsNr9qNg7AGoJIGgAAAAAgE5hPGgAAAAAgE4ikAQAAAAAygUgaAAAAACATiKQBAAAAADKBSBoAAAAAIBOIpAEAAAAAMoFIGgAAAAAgE4ikAQAAAAAygUgaAAAAACAT/EpvAAAA/GjpQ2cZhlmRLQEAgJeBPmkAgFVBkiSGYRiGofF0+r8BAGB1Qp80AMCbIj0/EGb/urNZkiRBELq6uoaGhtxuN8dxDoejvLy8srKS47gMvpdhGI/HEwqFrFarUqnMaPMBAOAnoMMDAGDlJZPJq1evnj9/XqvVWiyWWCw2Pz8vk8mOHj26adMmlmXJX+d+MEsC8fRFoigyDHP9+vVHjx4dP37c4XAs/QgAAPx86JMGAHhTvrzVeat/VMFz4tNXGEJEicg49l8ObirPs9DOY1EUu7u7z58/X1xcfPjwYbvdnkgkBgYGvvzyywsXLlRVVRkMBvLCUHjRIpoZsrCwMDw8LAgCYmgAgDcEkTQAwOsnSYRhyOWe4X89f1ejUIhP+4wZhiRFUSWXv9W0rjzPIkoSxzCCIHR2djIMc+TIkbKyMvrOpqYmhmGi0ahcLieExOPxoaGhR48eeb1es9lcVVVVWVnJMEwymZyenu7r65uampLL5eXl5bW1tV6vt6urq6enJxgMXrx40WAwlJaW1tXV0VUBAMDrgkgaAOBNUch4jUqhVcj/OpKWVHIZz/6lnziZTLpcrry8PKvVKkkS7ajmOK65uZl2JyeTyc7OztOnT4fDYa1W293d3d3d/dZbb23YsGFmZubPf/6zx+MxGAzRaLSvr29hYaGwsHBiYsLn88VisZmZGa/Xa7FY0DMNAPDaIZIGAHhTJEkSRUkUpfRImr6SPkIlHo97PJ6KigqZTEbj3dTEHaIosizr8/kuXLggCMJHH31kt9uHh4cvXLhw7dq1qqqq4eHh4eHhI0eObNiwIRKJdHR0EELsdvs777zDsmxPT8/+/fvz8/N1Oh3Po8EHAHjN0LACAKywRCIhCIJMJqMjC1NSvchzc3NTU1N79uzZvHkzwzB5eXkzMzOtra0ul0smkxFCBEHgOK6wsNBut8fjca1Wy7Ks2WxWKBT5+flFRUUr8KsAAP4GIJIGAFhhcrmc5/lIJJJMJhctoj3TbrdboVA4nU7aRS2TyUpKSu7duzc7O+t0OktLS69evdrR0aHT6UpLS5ubm7VaLe3PppPrpSaoXokfBwCQzfBkFgCAFSaXyzUazdzcXDweT70oSVIymUxNb0cj41Q0TP8tiqLNZvv444/fffddp9MpCMKVK1fOnz/v9/vT42bE0AAAbwj6pAEA3iCGIQwhqUiWIYv/JITwPF9aWnrv3r3R0VGTyUQD32g0evnyZa/X+/777+fl5SUSiaGhoaamJp7nw+Hw8PCwQqHIy8sLhUI8z+/duzcWi0UikR9++KG9vX18fNxoNLIsm0wmBUFY/l8NAPA3ApE0AMCbIhFChxv+ZcQhIel/EoYhhPA8v23btra2tq+++srj8RQXF8disb6+vps3b9bV1clkMovFkp+f39rampubW1xc3NfX19bWVlRUZLPZWltbb9++vX379nXr1kmSFI1GCSGiKBJCLBZLIpHo6OhQKpUGg0Gn0y3KwwYAgJ8JkTQAwJvCMYyM53iek8S/zN3BMpKM4xZlXBQWFn7yySfnzp07c+aMQqGQJInjuNra2mPHjtEs6nfffff06dNnz56Vy+WRSKSgoODYsWMajaagoECn033//feXLl0ihCQSiY0bN9JJqcvLy2tqalpaWtrb2zdv3nzgwAGtVrv8OwEAIIvhaeEAAG/Ko8nZyQU/x7KpdpYhRJIkjmXqi+0mjSr9zaIoulwul8vl8XjkcrnVarVarRaLhS6lc05PT08HAgG9Xu9wOHJzc+l4xPn5+ampqYWFBY7jbDZbXl6e0WikH1lYWJiamgoGgzk5OcXFxXgyCwDA64VIGgAAAAAgE8juAAB4U6Tn91Uwz5pQIzVdnfSXJ7kwz1uaWrT0S5YuwvQdAABvAvqkAQAAAAAygXHcAAAAAACZQCQNAAAAAJAJRNIAAAAAAJlAJA0AAAAAkAlE0gAAAAAAmUAkDQAAAACQCUTSAAAAAACZQCQNAAAAAJAJRNIAAAAAAJlAJA0AAAAAkAlE0gAAAAAAmUAkDQAAAACQCUTSAAAAAACZQCQNAAAAAJAJRNIAAAAAAJlAJA0AAAAAkAlE0gAAAAAAmUAkDQAAAACQCUTSAAAAAACZQCQNAAAAAJAJRNIAAAAAAJlAJA0AAAAAkAlE0gAAAAAAmUAkDQAAAACQCUTSAAAAAACZ+P8Bxzq8WFIxG+gAAAAASUVORK5CYII=", "content_metadata": {"description": "Structured chart extracted from PDF document.", "hierarchy": {"block": -1, "line": -1, "nearby_objects": {"images": {"bbox": [], "content": []}, "structured": {"bbox": [], "content": []}, "text": {"bbox": [], "content": []}}, "page": 0, "page_count": 3, "span": -1}, "page_number": 0, "subtype": "chart", "type": "structured"}, "content_url": "", "debug_metadata": null, "embedding": null, "error_metadata": null, "image_metadata": null, "info_message_metadata": null, "raise_on_failure": false, "source_metadata": {"access_level": 1, "collection_id": "", "date_created": "2025-01-16T21:56:47.531787", "last_modified": "2025-01-16T21:56:47.531632", "partition_id": -1, "source_id": "/home/jeremy/Development/nv-ingest/data/multimodal_test.pdf", "source_location": "", "source_name": "/home/jeremy/Development/nv-ingest/data/multimodal_test.pdf", "source_type": "PDF", "summary": ""}, "table_metadata": {"caption": "", "table_content": "This chart shows some gadgets, and some very fictitious costs. TITLE | Chart 1 \n Cost | Gadgets and their cost \n Hammer | 20 \n Powerdrill | 120 \n Bluetooth speaker | 75 \n Minifridge | 100 \n Premium desk fan | 150 Hammer - Powerdrill - Bluetooth speaker - Minifridge - Premium desk fan Dollars $- - $20.00 - $40.00 - $60.00 - $80.00 - $100.00 - $120.00 - $140.00 - $160.00 Cost Chart 1 - Gadgets and their cost", "table_content_format": "", "table_format": "image", "table_location": [713.1033325195312, 115.5099316984415, 1244.9893798828125, 1077.069027364254], "table_location_max_dimensions": [1536, 1187], "uploaded_image_uri": ""}, "text_metadata": null}}, {"document_type": "structured", "metadata": {"chart_metadata": null, "content": "", "content_metadata": {"description": "Structured table extracted from PDF document.", "hierarchy": {"block": -1, "line": -1, "nearby_objects": {"images": {"bbox": [], "content": []}, "structured": {"bbox": [], "content": []}, "text": {"bbox": [], "content": []}}, "page": 1, "page_count": 3, "span": -1}, "page_number": 1, "subtype": "table", "type": "structured"}, "content_url": "", "debug_metadata": null, "embedding": null, "error_metadata": null, "image_metadata": null, "info_message_metadata": null, "raise_on_failure": false, "source_metadata": {"access_level": 1, "collection_id": "", "date_created": "2025-01-16T21:56:47.531787", "last_modified": "2025-01-16T21:56:47.531632", "partition_id": -1, "source_id": "/home/jeremy/Development/nv-ingest/data/multimodal_test.pdf", "source_location": "", "source_name": "/home/jeremy/Development/nv-ingest/data/multimodal_test.pdf", "source_type": "PDF", "summary": ""}, "table_metadata": {"caption": "", "table_content": "| This table shows some popular colors that cars might come in. |\n| Car | Color1 | Color2 | Color3 |\n| Coupe | White | Silver | Flat Gray |\n| Sedan | White | Metallic Gray | Matte Gray |\n| Minivan | Gray | Beige | Black |\n| Truck | Dark Gray | Titanium Gray | Charcoal |\n| Convertible | Light Gray | Graphite | Slate Gray |\n", "table_content_format": "pseudo_markdown", "table_format": "image", "table_location": [640.512, 134.96189999999999, 870.6048, 1051.4446], "table_location_max_dimensions": [1536, 1187], "uploaded_image_uri": ""}, "text_metadata": null}}, {"document_type": "structured", "metadata": {"chart_metadata": null, "content": "", "content_metadata": {"description": "Structured chart extracted from PDF document.", "hierarchy": {"block": -1, "line": -1, "nearby_objects": {"images": {"bbox": [], "content": []}, "structured": {"bbox": [], "content": []}, "text": {"bbox": [], "content": []}}, "page": 2, "page_count": 3, "span": -1}, "page_number": 2, "subtype": "chart", "type": "structured"}, "content_url": "", "debug_metadata": null, "embedding": null, "error_metadata": null, "image_metadata": null, "info_message_metadata": null, "raise_on_failure": false, "source_metadata": {"access_level": 1, "collection_id": "", "date_created": "2025-01-16T21:56:47.531787", "last_modified": "2025-01-16T21:56:47.531632", "partition_id": -1, "source_id": "/home/jeremy/Development/nv-ingest/data/multimodal_test.pdf", "source_location": "", "source_name": "/home/jeremy/Development/nv-ingest/data/multimodal_test.pdf", "source_type": "PDF", "summary": ""}, "table_metadata": {"caption": "", "table_content": "This chart shows some average frequency ranges for speaker drivers TITLE | Chart 2 \n Frequency Range Start (Hz) | Frequency Range Start (Hz) | Frequency Range End (Hz) \n Tweeter | 13110 | 13110 \n Midrange | 1375 | 13110 \n Midwoofer | 1710 | 13110 \n Subwoofer | 9 | 130 Tweeter - Midrange - Midwoofer - Subwoofer Hertz (log scale) 10 - 100 - 1000 - 10000 - 100000 Frequency Range Start (Hz) - Frequency Range End (Hz) This chart shows some average frequency ranges for speaker drivers - Frequency Ranges of Speaker Drivers", "table_content_format": "", "table_format": "image", "table_location": [119.01695251464844, 114.26061560213566, 693.9878540039062, 1083.422370672226], "table_location_max_dimensions": [1536, 1187], "uploaded_image_uri": ""}, "text_metadata": null}}], "fragment": 0, "fragment_count": 1, "trace": {"trace::entry::broker_source_network_in": 1.737064606939735e+18, "trace::entry::caption_ext": 1.737064609844455e+18, "trace::entry::caption_ext_channel_in": 1.7370646098369871e+18, "trace::entry::chart_data_extraction": 1.737064608686222e+18, "trace::entry::chart_data_extraction::cached_0": 1.737064609051628e+18, "trace::entry::chart_data_extraction::cached_1": 1.7370646095401439e+18, "trace::entry::chart_data_extraction::deplot_0": 1.737064608866291e+18, "trace::entry::chart_data_extraction::deplot_1": 1.73706460928608e+18, "trace::entry::chart_data_extraction_channel_in": 1.737064608680754e+18, "trace::entry::dedup_images": 1.7370646083958408e+18, "trace::entry::dedup_images_channel_in": 1.737064608394093e+18, "trace::entry::docx_content_extractor": 1.737064608391644e+18, "trace::entry::docx_content_extractor_channel_in": 1.737064608389036e+18, "trace::entry::filter_images": 1.7370646083990769e+18, "trace::entry::filter_images_channel_in": 1.737064608397959e+18, "trace::entry::job_counter": 1.73706460735933e+18, "trace::entry::job_counter_channel_in": 1.737064607357017e+18, "trace::entry::message_broker_task_sink": 1.737064609848043e+18, "trace::entry::message_broker_task_sink_channel_in": 1.737064609846622e+18, "trace::entry::message_broker_task_source": 1.737064607345441e+18, "trace::entry::metadata_injection": 1.7370646073686262e+18, "trace::entry::metadata_injection_channel_in": 1.737064607368335e+18, "trace::entry::pdf_content_extractor": 1.737064607398e+18, "trace::entry::pdf_content_extractor::pdfium_pages_to_numpy_0": 1.7370646075532639e+18, "trace::entry::pdf_content_extractor::pdfium_pages_to_numpy_1": 1.7370646080538499e+18, "trace::entry::pdf_content_extractor::yolox_0": 1.7370646078989688e+18, "trace::entry::pdf_content_extractor::yolox_1": 1.737064608250277e+18, "trace::entry::pdf_content_extractor_channel_in": 1.7370646073974162e+18, "trace::entry::pptx_content_extractor": 1.737064608393168e+18, "trace::entry::pptx_content_extractor_channel_in": 1.737064608392411e+18, "trace::entry::store_embedding_minio": 1.737064609846136e+18, "trace::entry::store_embedding_minio_channel_in": 1.737064609845022e+18, "trace::entry::table_data_extraction": 1.737064608401692e+18, "trace::entry::table_data_extraction::paddle_0": 1.737064608469864e+18, "trace::entry::table_data_extraction::paddle_1": 1.737064608544877e+18, "trace::entry::table_data_extraction_channel_in": 1.737064608399796e+18, "trace::exit::broker_source_network_in": 1.737064607344929e+18, "trace::exit::caption_ext": 1.737064609845022e+18, "trace::exit::caption_ext_channel_in": 1.737064609844396e+18, "trace::exit::chart_data_extraction": 1.7370646098369871e+18, "trace::exit::chart_data_extraction::cached_0": 1.737064609285778e+18, "trace::exit::chart_data_extraction::cached_1": 1.737064609770478e+18, "trace::exit::chart_data_extraction::deplot_0": 1.737064609051506e+18, "trace::exit::chart_data_extraction::deplot_1": 1.73706460954002e+18, "trace::exit::chart_data_extraction_channel_in": 1.737064608686173e+18, "trace::exit::dedup_images": 1.737064608397959e+18, "trace::exit::dedup_images_channel_in": 1.737064608395814e+18, "trace::exit::docx_content_extractor": 1.737064608392411e+18, "trace::exit::docx_content_extractor_channel_in": 1.737064608391604e+18, "trace::exit::filter_images": 1.737064608399796e+18, "trace::exit::filter_images_channel_in": 1.737064608399043e+18, "trace::exit::job_counter": 1.737064607368335e+18, "trace::exit::job_counter_channel_in": 1.737064607359222e+18, "trace::exit::message_broker_task_sink_channel_in": 1.73706460984782e+18, "trace::exit::message_broker_task_source": 1.737064607357001e+18, "trace::exit::metadata_injection": 1.7370646073974162e+18, "trace::exit::metadata_injection_channel_in": 1.737064607368457e+18, "trace::exit::pdf_content_extractor": 1.7370646083872138e+18, "trace::exit::pdf_content_extractor::pdfium_pages_to_numpy_0": 1.737064607898837e+18, "trace::exit::pdf_content_extractor::pdfium_pages_to_numpy_1": 1.737064608250124e+18, "trace::exit::pdf_content_extractor::yolox_0": 1.737064607991429e+18, "trace::exit::pdf_content_extractor::yolox_1": 1.7370646082900168e+18, "trace::exit::pdf_content_extractor_channel_in": 1.7370646073979581e+18, "trace::exit::pptx_content_extractor": 1.737064608394093e+18, "trace::exit::pptx_content_extractor_channel_in": 1.737064608393143e+18, "trace::exit::store_embedding_minio": 1.737064609846622e+18, "trace::exit::store_embedding_minio_channel_in": 1.737064609846076e+18, "trace::exit::table_data_extraction": 1.737064608680754e+18, "trace::exit::table_data_extraction::paddle_0": 1.737064608540777e+18, "trace::exit::table_data_extraction::paddle_1": 1.737064608619194e+18, "trace::exit::table_data_extraction_channel_in": 1.737064608401629e+18}, "annotations": {"annotation::0850e5fb-3149-43f9-bdf3-3a8d80f912ae": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "metadata_injection", "task_result": "SUCCESS"}, "annotation::20cbf1f6-9b0c-4ec8-a148-8789cce2bf9b": {"message": "Created", "source_id": "nv_ingest.modules.sources.message_broker_task_source"}, "annotation::28011457-406e-4750-b1fb-c91acc9c9ad5": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "docx_content_extractor", "task_result": "SUCCESS"}, "annotation::35daf894-ee54-4180-bde5-1944ae215a7f": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "table_data_extraction", "task_result": "SUCCESS"}, "annotation::3e6eafec-db0a-4c8c-9b2f-2bfc603d6b49": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "chart_data_extraction", "task_result": "SUCCESS"}, "annotation::4bebcba2-54f2-4c4f-8e09-2ac1c2ab60a2": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "caption_ext", "task_result": "SUCCESS"}, "annotation::50228775-183c-4faf-9e34-478adf1a44b6": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "docx_content_extractor", "task_result": "SUCCESS"}, "annotation::55d2a8df-82dc-4904-a44e-f8cb07e95bf8": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "pptx_content_extractor", "task_result": "SUCCESS"}, "annotation::5a1be681-0e21-4595-b948-a2a928fe2bef": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "chart_data_extraction", "task_result": "SUCCESS"}, "annotation::5cd0c948-2382-458a-bbca-dfaec0e2837c": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "dedup_images", "task_result": "SUCCESS"}, "annotation::6ec6ac21-c35c-4a02-ac70-72a1fb7d6659": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "filter_images", "task_result": "SUCCESS"}, "annotation::7b253246-c4c1-47bf-867e-90164e715ca4": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "caption_ext", "task_result": "SUCCESS"}, "annotation::7da4625f-a90b-427d-9243-b762050a0cba": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "table_data_extraction", "task_result": "SUCCESS"}, "annotation::7e004440-31ce-4d13-8c38-331d6e1acaab": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "docx_content_extractor", "task_result": "SUCCESS"}, "annotation::96866059-2f15-4d4e-abfc-0443f3349aa9": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "store_embedding_minio", "task_result": "SUCCESS"}, "annotation::9ff9511a-2dc0-4854-9c17-436338708643": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "pptx_content_extractor", "task_result": "SUCCESS"}, "annotation::aa859318-9372-4366-bf7d-2577036ca959": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "filter_images", "task_result": "SUCCESS"}, "annotation::c0218a87-de79-4678-a03f-9c6e50ca2735": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "docx_content_extractor", "task_result": "SUCCESS"}, "annotation::c2920300-2256-416b-9233-bf699fa2547e": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "pdf_content_extractor", "task_result": "SUCCESS"}, "annotation::c703abcc-a807-4437-8092-1de9b5667225": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "table_data_extraction", "task_result": "SUCCESS"}, "annotation::c7a88380-bb36-4ad7-8859-9ef995892393": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "job_counter", "task_result": "SUCCESS"}, "annotation::d57ae40c-75bb-408f-b2af-ad750eb84387": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "chart_data_extraction", "task_result": "SUCCESS"}, "annotation::d79b48dd-13fa-4a83-bad3-87e70d755d86": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "store_embedding_minio", "task_result": "SUCCESS"}, "annotation::f7963fc9-7366-4d3c-846c-6621374b7390": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "dedup_images", "task_result": "SUCCESS"}, "annotation::f801768a-b0b5-4cd7-96b1-ff2b6429177b": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "pdf_content_extractor", "task_result": "SUCCESS"}, "annotation::fdd56e54-9306-43ee-982f-03a357f5684f": {"source_id": "nv_ingest.util.exception_handlers.decorators", "task_id": "pdf_content_extractor", "task_result": "SUCCESS"}}} diff --git a/tests/nv_ingest/util/converters/test_formats.py b/tests/nv_ingest/util/converters/test_formats.py new file mode 100644 index 00000000..17c2a0d6 --- /dev/null +++ b/tests/nv_ingest/util/converters/test_formats.py @@ -0,0 +1,101 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import json +import os + +from nv_ingest.util.converters.formats import ingest_json_results_to_blob + + +sample_result_text_json = """ +[ + { + "document_type": "text", + "metadata": { + "chart_metadata": null, + "content": "TestingDocument\r\nA sample document with headings and placeholder text\r\nIntroduction\r\nThis is a placeholder document that can be used for any purpose. It contains some \r\nheadings and some placeholder text to fill the space. The text is not important and contains \r\nno real value, but it is useful for testing. Below, we will have some simple tables and charts \r\nthat we can use to confirm Ingest is working as expected.\r\nTable 1\r\nThis table describes some animals, and some activities they might be doing in specific \r\nlocations.\r\nAnimal Activity Place\r\nGira@e Driving a car At the beach\r\nLion Putting on sunscreen At the park\r\nCat Jumping onto a laptop In a home o@ice\r\nDog Chasing a squirrel In the front yard\r\nChart 1\r\nThis chart shows some gadgets, and some very fictitious costs. Section One\r\nThis is the first section of the document. It has some more placeholder text to show how \r\nthe document looks like. The text is not meant to be meaningful or informative, but rather to \r\ndemonstrate the layout and formatting of the document.\r\n\u2022 This is the first bullet point\r\n\u2022 This is the second bullet point\r\n\u2022 This is the third bullet point\r\nSection Two\r\nThis is the second section of the document. It is more of the same as we\u2019ve seen in the rest \r\nof the document. The content is meaningless, but the intent is to create a very simple \r\nsmoke test to ensure extraction is working as intended. This will be used in CI as time goes \r\non to ensure that changes we make to the library do not negatively impact our accuracy.\r\nTable 2\r\nThis table shows some popular colors that cars might come in.\r\nCar Color1 Color2 Color3\r\nCoupe White Silver Flat Gray\r\nSedan White Metallic Gray Matte Gray\r\nMinivan Gray Beige Black\r\nTruck Dark Gray Titanium Gray Charcoal\r\nConvertible Light Gray Graphite Slate Gray\r\nPicture\r\nBelow, is a high-quality picture of some shapes. Chart 2\r\nThis chart shows some average frequency ranges for speaker drivers.\r\nConclusion\r\nThis is the conclusion of the document. It has some more placeholder text, but the most \r\nimportant thing is that this is the conclusion. As we end this document, we should have \r\nbeen able to extract 2 tables, 2 charts, and some text including 3 bullet points.", + "content_metadata": { + "description": "Unstructured text from PDF document.", + "hierarchy": { + "block": -1, + "line": -1, + "nearby_objects": { + "images": { + "bbox": [], + "content": [] + }, + "structured": { + "bbox": [], + "content": [] + }, + "text": { + "bbox": [], + "content": [] + } + }, + "page": -1, + "page_count": 3, + "span": -1 + }, + "page_number": -1, + "subtype": "", + "type": "text" + }, + "content_url": "", + "debug_metadata": null, + "embedding": null, + "error_metadata": null, + "image_metadata": null, + "info_message_metadata": null, + "raise_on_failure": false, + "source_metadata": { + "access_level": 1, + "collection_id": "", + "date_created": "2025-01-16T21:31:28.929797", + "last_modified": "2025-01-16T21:31:28.929648", + "partition_id": -1, + "source_id": "/home/jeremy/Development/nv-ingest/data/multimodal_test.pdf", + "source_location": "", + "source_name": "/home/jeremy/Development/nv-ingest/data/multimodal_test.pdf", + "source_type": "PDF", + "summary": "" + }, + "table_metadata": null, + "text_metadata": { + "keywords": "", + "language": "en", + "summary": "", + "text_location": [ + -1, + -1, + -1, + -1 + ], + "text_type": "document" + } + } + } +] +""" # noqa: E501 + + +def test_json_results_to_blob_text_failure(): + # there must be a "data" element in the json otherwise empty is returned + blob_response = ingest_json_results_to_blob(sample_result_text_json) + assert blob_response == "" + + +def test_json_results_to_blob(): + current_directory = os.path.dirname(__file__) + + # Construct the full path to the target file + file_name = "multimodal_test_raw_results.json" + file_path = os.path.join(current_directory, file_name) + + with open(file_path, "r") as file: + json_result_raw_data = json.load(file) + blob_response = ingest_json_results_to_blob(json.dumps(json_result_raw_data)) + + # The actual output is quite large. So we just check for key pieces being present + assert "Tweeter - Midrange - Midwoofer - Subwoofer Hertz" in blob_response diff --git a/tests/nv_ingest/util/nim/test_cached.py b/tests/nv_ingest/util/nim/test_cached.py index 8463f25d..c3871926 100644 --- a/tests/nv_ingest/util/nim/test_cached.py +++ b/tests/nv_ingest/util/nim/test_cached.py @@ -216,7 +216,7 @@ def test_process_inference_results(model_interface): """ output = "Processed Output" - result = model_interface.process_inference_results(output) + result = model_interface.process_inference_results(output, "http") assert result == output diff --git a/tests/nv_ingest/util/nim/test_yolox.py b/tests/nv_ingest/util/nim/test_yolox.py index f42bf778..b3a84fd3 100644 --- a/tests/nv_ingest/util/nim/test_yolox.py +++ b/tests/nv_ingest/util/nim/test_yolox.py @@ -1,25 +1,17 @@ -import pytest -import numpy as np -from io import BytesIO import base64 +import random +from io import BytesIO + +import numpy as np +import pytest from PIL import Image from nv_ingest.util.nim.yolox import YoloxPageElementsModelInterface -@pytest.fixture(params=["0.2.0", "1.0.0"]) -def model_interface(request): - return YoloxPageElementsModelInterface(yolox_version=request.param) - - -@pytest.fixture -def legacy_model_interface(): - return YoloxPageElementsModelInterface(yolox_version="0.2.0") - - @pytest.fixture -def ga_model_interface(): - return YoloxPageElementsModelInterface(yolox_version="1.0.0") +def model_interface(): + return YoloxPageElementsModelInterface() def create_test_image(width=800, height=600, color=(255, 0, 0)): @@ -68,25 +60,18 @@ def create_base64_image(width=1024, height=1024, color=(255, 0, 0)): return base64.b64encode(buffer.getvalue()).decode("utf-8") -def test_name_returns_yolox_legacy(legacy_model_interface): - assert legacy_model_interface.name() == "yolox-page-elements (version 0.2.0)" - - -def test_name_returns_yolox(ga_model_interface): - ga_model_interface = YoloxPageElementsModelInterface(yolox_version="1.0.0") - assert ga_model_interface.name() == "yolox-page-elements (version 1.0.0)" +def test_name_returns_yolox(model_interface): + model_interface = YoloxPageElementsModelInterface() + assert model_interface.name() == "yolox-page-elements" def test_prepare_data_for_inference_valid(model_interface): images = [create_test_image(), create_test_image(width=640, height=480)] input_data = {"images": images} result = model_interface.prepare_data_for_inference(input_data) - assert "resized_images" in result assert "original_image_shapes" in result - assert len(result["resized_images"]) == len(images) assert len(result["original_image_shapes"]) == len(images) - for original_shape, resized_image, image in zip(result["original_image_shapes"], result["resized_images"], images): - assert resized_image.shape == (1024, 1024, 3) + for original_shape, image in zip(result["original_image_shapes"], images): assert original_shape[:2] == image.shape[:2] @@ -118,28 +103,11 @@ def test_format_input_grpc(model_interface): assert formatted_input.shape[1:] == (3, 1024, 1024) -def test_format_input_legacy(legacy_model_interface): - images = [create_test_image(), create_test_image()] - input_data = {"images": images} - prepared_data = legacy_model_interface.prepare_data_for_inference(input_data) - formatted_input = legacy_model_interface.format_input(prepared_data, "http") - assert "messages" in formatted_input - assert isinstance(formatted_input["messages"], list) - for message in formatted_input["messages"]: - assert "content" in message - for content in message["content"]: - assert "type" in content - assert content["type"] == "image_url" - assert "image_url" in content - assert "url" in content["image_url"] - assert content["image_url"]["url"].startswith("data:image/png;base64,") - - -def test_format_input(ga_model_interface): +def test_format_input_http(model_interface): images = [create_test_image(), create_test_image()] input_data = {"images": images} - prepared_data = ga_model_interface.prepare_data_for_inference(input_data) - formatted_input = ga_model_interface.format_input(prepared_data, "http") + prepared_data = model_interface.prepare_data_for_inference(input_data) + formatted_input = model_interface.format_input(prepared_data, "http") assert "input" in formatted_input assert isinstance(formatted_input["input"], list) for content in formatted_input["input"]: @@ -165,45 +133,7 @@ def test_parse_output_grpc(model_interface): assert parsed_output.dtype == np.float32 -def test_parse_output_http_valid_legacy(legacy_model_interface): - response = { - "data": [ - [ - { - "type": "table", - "bboxes": [{"xmin": 0.1, "ymin": 0.1, "xmax": 0.2, "ymax": 0.2, "confidence": 0.9}], - }, - { - "type": "chart", - "bboxes": [{"xmin": 0.3, "ymin": 0.3, "xmax": 0.4, "ymax": 0.4, "confidence": 0.8}], - }, - {"type": "title", "bboxes": [{"xmin": 0.5, "ymin": 0.5, "xmax": 0.6, "ymax": 0.6, "confidence": 0.95}]}, - ], - [ - { - "type": "table", - "bboxes": [{"xmin": 0.15, "ymin": 0.15, "xmax": 0.25, "ymax": 0.25, "confidence": 0.85}], - }, - { - "type": "chart", - "bboxes": [{"xmin": 0.35, "ymin": 0.35, "xmax": 0.45, "ymax": 0.45, "confidence": 0.75}], - }, - { - "type": "title", - "bboxes": [{"xmin": 0.55, "ymin": 0.55, "xmax": 0.65, "ymax": 0.65, "confidence": 0.92}], - }, - ], - ] - } - scaling_factors = [(1.0, 1.0), (1.0, 1.0)] - data = {"scaling_factors": scaling_factors} - parsed_output = legacy_model_interface.parse_output(response, "http", data) - assert isinstance(parsed_output, np.ndarray) - assert parsed_output.shape == (2, 3, 85) - assert parsed_output.dtype == np.float32 - - -def test_parse_output_http_valid(ga_model_interface): +def test_parse_output_http_valid(model_interface): response = { "data": [ { @@ -224,12 +154,19 @@ def test_parse_output_http_valid(ga_model_interface): }, ] } - scaling_factors = [(1.0, 1.0), (1.0, 1.0)] - data = {"scaling_factors": scaling_factors} - parsed_output = ga_model_interface.parse_output(response, "http", data) - assert isinstance(parsed_output, np.ndarray) - assert parsed_output.shape == (2, 3, 85) - assert parsed_output.dtype == np.float32 + parsed_output = model_interface.parse_output(response, "http") + assert parsed_output == [ + { + "table": [[0.1, 0.1, 0.2, 0.2, 0.9]], + "chart": [[0.3, 0.3, 0.4, 0.4, 0.8]], + "title": [[0.5, 0.5, 0.6, 0.6, 0.95]], + }, + { + "table": [[0.15, 0.15, 0.25, 0.25, 0.85]], + "chart": [[0.35, 0.35, 0.45, 0.45, 0.75]], + "title": [[0.55, 0.55, 0.65, 0.65, 0.92]], + }, + ] def test_parse_output_invalid_protocol(model_interface): @@ -238,11 +175,12 @@ def test_parse_output_invalid_protocol(model_interface): model_interface.parse_output(response, "invalid_protocol") -def test_process_inference_results(model_interface): +def test_process_inference_results_grpc(model_interface): output_array = np.random.rand(2, 100, 85).astype(np.float32) original_image_shapes = [(800, 600, 3), (640, 480, 3)] inference_results = model_interface.process_inference_results( output_array, + "grpc", original_image_shapes=original_image_shapes, num_classes=3, conf_thresh=0.5, @@ -262,3 +200,35 @@ def test_process_inference_results(model_interface): assert bbox[4] >= 0.6 if "title" in result: assert isinstance(result["title"], list) + + +def test_process_inference_results_http(model_interface): + output = [ + { + "table": [[random.random() for _ in range(5)] for _ in range(10)], + "chart": [[random.random() for _ in range(5)] for _ in range(10)], + "title": [[random.random() for _ in range(5)] for _ in range(10)], + } + for _ in range(10) + ] + inference_results = model_interface.process_inference_results( + output, + "http", + num_classes=3, + conf_thresh=0.5, + iou_thresh=0.4, + min_score=0.3, + final_thresh=0.6, + ) + assert isinstance(inference_results, list) + assert len(inference_results) == 10 + for result in inference_results: + assert isinstance(result, dict) + if "table" in result: + for bbox in result["table"]: + assert bbox[4] >= 0.6 + if "chart" in result: + for bbox in result["chart"]: + assert bbox[4] >= 0.6 + if "title" in result: + assert isinstance(result["title"], list) diff --git a/tests/nv_ingest_client/client/test_interface.py b/tests/nv_ingest_client/client/test_interface.py index 41c72bc2..83a556b8 100644 --- a/tests/nv_ingest_client/client/test_interface.py +++ b/tests/nv_ingest_client/client/test_interface.py @@ -4,6 +4,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +import logging import tempfile from concurrent.futures import Future from unittest.mock import MagicMock @@ -24,7 +25,7 @@ from nv_ingest_client.primitives.tasks import StoreEmbedTask from nv_ingest_client.primitives.tasks import StoreTask from nv_ingest_client.primitives.tasks import TableExtractionTask -from nv_ingest_client.primitives.tasks import VdbUploadTask +from nv_ingest_client.util.milvus import MilvusOperator MODULE_UNDER_TEST = "nv_ingest_client.client.interface" @@ -72,13 +73,13 @@ def test_embed_task_no_args(ingestor): assert isinstance(ingestor._job_specs.job_specs["pdf"][0]._tasks[0], EmbedTask) -def test_embed_task_some_args(ingestor): - ingestor.embed(text=False, tables=False) +def test_embed_task_some_args(ingestor, caplog): + # `text` and `table` arguments were deprecated before GA. + with caplog.at_level(logging.WARNING): + ingestor.embed(text=False, tables=False) - task = ingestor._job_specs.job_specs["pdf"][0]._tasks[0] - assert isinstance(task, EmbedTask) - assert task._text is False - assert task._tables is False + assert "'text' parameter is deprecated" in caplog.records[0].message + assert "'tables' parameter is deprecated" in caplog.records[1].message def test_extract_task_no_args(ingestor): @@ -193,15 +194,13 @@ def test_store_task_some_args_extra_param(ingestor): def test_vdb_upload_task_no_args(ingestor): ingestor.vdb_upload() - assert isinstance(ingestor._job_specs.job_specs["pdf"][0]._tasks[0], VdbUploadTask) + assert isinstance(ingestor._vdb_bulk_upload, MilvusOperator) def test_vdb_upload_task_some_args(ingestor): ingestor.vdb_upload(filter_errors=True) - task = ingestor._job_specs.job_specs["pdf"][0]._tasks[0] - assert isinstance(task, VdbUploadTask) - assert task._filter_errors is True + assert isinstance(ingestor._vdb_bulk_upload, MilvusOperator) def test_caption_task_no_args(ingestor): @@ -228,8 +227,8 @@ def test_chain(ingestor): assert isinstance(ingestor._job_specs.job_specs["pdf"][0]._tasks[5], FilterTask) assert isinstance(ingestor._job_specs.job_specs["pdf"][0]._tasks[6], SplitTask) assert isinstance(ingestor._job_specs.job_specs["pdf"][0]._tasks[7], StoreTask) - assert isinstance(ingestor._job_specs.job_specs["pdf"][0]._tasks[8], VdbUploadTask) - assert len(ingestor._job_specs.job_specs["pdf"][0]._tasks) == 9 + assert isinstance(ingestor._vdb_bulk_upload, MilvusOperator) + assert len(ingestor._job_specs.job_specs["pdf"][0]._tasks) == 8 def test_ingest(ingestor, mock_client): diff --git a/tests/nv_ingest_client/util/test_milvus_util.py b/tests/nv_ingest_client/util/test_milvus_util.py new file mode 100644 index 00000000..525ca288 --- /dev/null +++ b/tests/nv_ingest_client/util/test_milvus_util.py @@ -0,0 +1,67 @@ +import pytest +from nv_ingest_client.util.milvus import MilvusOperator, _dict_to_params + + +@pytest.fixture +def milvus_test_dict(): + mil_op = MilvusOperator() + kwargs = mil_op.milvus_kwargs + kwargs["collection_name"] = mil_op.collection_name + return kwargs + + +def test_extra_kwargs(milvus_test_dict): + mil_op = MilvusOperator(filter_errors=True) + milvus_test_dict.pop("collection_name") + assert mil_op.milvus_kwargs == milvus_test_dict + + +@pytest.mark.parametrize("collection_name", [None, "name"]) +def test_op_collection_name(collection_name): + if collection_name: + mo = MilvusOperator(collection_name=collection_name) + else: + # default + collection_name = "nv_ingest_collection" + mo = MilvusOperator() + cr_collection_name, conn_params = mo.get_connection_params() + wr_collection_name, write_params = mo.get_write_params() + assert cr_collection_name == wr_collection_name == collection_name + + +def test_op_connection_params(milvus_test_dict): + mo = MilvusOperator() + cr_collection_name, conn_params = mo.get_connection_params() + assert cr_collection_name == milvus_test_dict["collection_name"] + for k, v in conn_params.items(): + assert milvus_test_dict[k] == v + + +def test_op_write_params(milvus_test_dict): + mo = MilvusOperator() + collection_name, wr_params = mo.get_write_params() + assert collection_name == milvus_test_dict["collection_name"] + for k, v in wr_params.items(): + assert milvus_test_dict[k] == v + + +@pytest.mark.parametrize( + "collection_name, expected_results", + [ + ({"text": ["text", "charts", "tables"]}, {"enable_text": True, "enable_charts": True, "enable_tables": True}), + ({"text": ["text", "tables"]}, {"enable_text": True, "enable_charts": False, "enable_tables": True}), + ({"text": ["text", "charts"]}, {"enable_text": True, "enable_charts": True, "enable_tables": False}), + ({"text": ["text"]}, {"enable_text": True, "enable_charts": False, "enable_tables": False}), + ], +) +def test_op_dict_to_params(collection_name, expected_results): + mo = MilvusOperator() + _, wr_params = mo.get_write_params() + response = _dict_to_params(collection_name, wr_params) + if isinstance(collection_name, str): + collection_name = {collection_name: None} + for res in response: + coll_name, write_params = res + for k, v in expected_results.items(): + assert write_params[k] == v + coll_name in collection_name.keys() From a60ffdf94f86f79309dafaf3b079c91c4554fcaf Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Fri, 17 Jan 2025 13:24:34 -0700 Subject: [PATCH 4/7] Workflow is functioning but failing on transcription call --- .../modules/injectors/metadata_injector.py | 11 +- .../schemas/audio_extractor_schema.py | 6 +- src/nv_ingest/schemas/metadata_schema.py | 12 +- src/nv_ingest/stages/nim/audio_extraction.py | 109 +++++++----------- .../util/converters/type_mappings.py | 2 + 5 files changed, 67 insertions(+), 73 deletions(-) diff --git a/src/nv_ingest/modules/injectors/metadata_injector.py b/src/nv_ingest/modules/injectors/metadata_injector.py index 41ffadc6..72133b0a 100644 --- a/src/nv_ingest/modules/injectors/metadata_injector.py +++ b/src/nv_ingest/modules/injectors/metadata_injector.py @@ -4,6 +4,7 @@ import logging +import traceback import mrc import pandas as pd @@ -46,6 +47,9 @@ def on_data(message: ControlMessage): "type": content_type.name.lower(), }, "error_metadata": None, + "audio_metadata": ( + None if content_type != ContentTypeEnum.AUDIO else {"audio_type": row["document_type"]} + ), "image_metadata": ( None if content_type != ContentTypeEnum.IMAGE else {"image_type": row["document_type"]} ), @@ -78,7 +82,12 @@ def _metadata_injection(builder: mrc.Builder): raise_on_failure=validated_config.raise_on_failure, ) def _on_data(message: ControlMessage): - return on_data(message) + try: + return on_data(message) + except Exception as e: + logger.error(f"Unhandled exception in metadata_injector: {e}") + traceback.print_exc() + raise node = builder.make_node("metadata_injector", _on_data) diff --git a/src/nv_ingest/schemas/audio_extractor_schema.py b/src/nv_ingest/schemas/audio_extractor_schema.py index 49a3dc5d..6b00b4e2 100755 --- a/src/nv_ingest/schemas/audio_extractor_schema.py +++ b/src/nv_ingest/schemas/audio_extractor_schema.py @@ -73,12 +73,8 @@ def clean_service(service): return None return service - - print ('===> audio extractor schema values:', values) endpoint_name = "audio_endpoints" grpc_service, http_service = values.get(endpoint_name) - print ("grpc_service:", grpc_service) - print ("http_service:", http_service) grpc_service = clean_service(grpc_service) http_service = clean_service(http_service) @@ -90,9 +86,9 @@ def clean_service(service): protocol_name = "audio_infer_protocol" protocol_value = values.get(protocol_name) - print("protocol_value:", protocol_value) if not protocol_value: protocol_value = "http" if http_service else "grpc" if grpc_service else "" + protocol_value = protocol_value.lower() values[protocol_name] = protocol_value diff --git a/src/nv_ingest/schemas/metadata_schema.py b/src/nv_ingest/schemas/metadata_schema.py index 9de9aba9..3183b57d 100644 --- a/src/nv_ingest/schemas/metadata_schema.py +++ b/src/nv_ingest/schemas/metadata_schema.py @@ -299,6 +299,11 @@ class ChartMetadataSchema(BaseModelNoExt): uploaded_image_uri: str = "" +class AudioMetadataSchema(BaseModelNoExt): + audio_transcript: str = "" + audio_type: str = "" + + # TODO consider deprecating this in favor of info msg... class ErrorMetadataSchema(BaseModelNoExt): task: TaskTypeEnum @@ -321,6 +326,7 @@ class MetadataSchema(BaseModelNoExt): embedding: Optional[List[float]] = None source_metadata: Optional[SourceMetadataSchema] = None content_metadata: Optional[ContentMetadataSchema] = None + audio_metadata: Optional[AudioMetadataSchema] = None text_metadata: Optional[TextMetadataSchema] = None image_metadata: Optional[ImageMetadataSchema] = None table_metadata: Optional[TableMetadataSchema] = None @@ -334,10 +340,12 @@ class MetadataSchema(BaseModelNoExt): @classmethod def check_metadata_type(cls, values): content_type = values.get("content_metadata", {}).get("type", None) - if content_type != ContentTypeEnum.TEXT: - values["text_metadata"] = None + if content_type != ContentTypeEnum.AUDIO: + values["audio_metadata"] = None if content_type != ContentTypeEnum.IMAGE: values["image_metadata"] = None + if content_type != ContentTypeEnum.TEXT: + values["text_metadata"] = None if content_type != ContentTypeEnum.STRUCTURED: values["table_metadata"] = None return values diff --git a/src/nv_ingest/stages/nim/audio_extraction.py b/src/nv_ingest/stages/nim/audio_extraction.py index 55556936..d554eb3e 100755 --- a/src/nv_ingest/stages/nim/audio_extraction.py +++ b/src/nv_ingest/stages/nim/audio_extraction.py @@ -4,27 +4,24 @@ import logging import functools +import traceback + import pandas as pd from typing import Any from typing import Dict from typing import Optional from typing import Tuple -import tritonclient.grpc as grpcclient from morpheus.config import Config from nv_ingest.schemas.audio_extractor_schema import AudioExtractorSchema from nv_ingest.stages.multiprocessing_stage import MultiProcessingBaseStage -import sys -sys.path.append('../../..') - from nv_ingest.util.nim.helpers import call_audio_inference_model, create_inference_client -from nv_ingest.util.nim.helpers import get_version logger = logging.getLogger(f"morpheus.{__name__}") -def _update_metadata(row: pd.Series, audio_client: Any, audio_version: Any, trace_info: Dict) -> Dict: +def _update_metadata(row: pd.Series, audio_client: Any, trace_info: Dict) -> Dict: """ Modifies the metadata of a row if the conditions for table extraction are met. @@ -50,9 +47,8 @@ def _update_metadata(row: pd.Series, audio_client: Any, audio_version: Any, trac If critical information (such as metadata) is missing from the row. """ - metadata = row.get("metadata") - + if metadata is None: logger.error("Row does not contain 'metadata'.") raise ValueError("Row does not contain 'metadata'.") @@ -60,31 +56,30 @@ def _update_metadata(row: pd.Series, audio_client: Any, audio_version: Any, trac content_metadata = metadata.get("content_metadata", {}) # Only modify if content type is audio - if content_metadata.get("type") != "audio" : + # TODO(Devin): Double check dtypes (metadata_schema.py:39) + if content_metadata.get("type") != "audio": return metadata source_metadata = metadata.get("source_metadata") - audio_id = source_metadata['source_id'] - - content_metadata = metadata.get("content_metadata") - content_metadata = content_metadata['content'] - audio_content = content_metadata['content'] - + audio_id = source_metadata["source_id"] + + audio_content = metadata.get("content") # Modify audio metadata with the result from the inference model try: audio_result = call_audio_inference_model(audio_client, audio_content, audio_id, trace_info=trace_info) print(audio_result) - metadata['audio_metadata'] = {'content': audio_result} + metadata["audio_metadata"] = {"audio_transcript": audio_result} except Exception as e: logger.error(f"Unhandled error calling audio inference model: {e}", exc_info=True) raise - + return metadata -def _transcribe_audio(df: pd.DataFrame, task_props: Dict[str, Any], - validated_config: Any, trace_info: Optional[Dict] = None) -> Tuple[pd.DataFrame, Dict]: +def _transcribe_audio( + df: pd.DataFrame, task_props: Dict[str, Any], validated_config: Any, trace_info: Optional[Dict] = None +) -> Tuple[pd.DataFrame, Dict]: """ Extracts audio data from a DataFrame. @@ -113,19 +108,18 @@ def _transcribe_audio(df: pd.DataFrame, task_props: Dict[str, Any], If any error occurs during the audio data extraction process. """ - #port = 32783 - #audio_client = create_inference_client( + # port = 32783 + # audio_client = create_inference_client( # (None, f'http://0.0.0.0:{port}/v1/transcribe'), # None, # "http" - #) + # ) + + logger.debug(f"Entering audio extraction stage with {len(df)} rows.") + + _ = task_props - - audio_client = create_inference_client( - validated_config.stage_config.audio_endpoints, - None, - "http" - ) + audio_client = create_inference_client(validated_config.stage_config.audio_endpoints, None, "http") if trace_info is None: trace_info = {} @@ -133,23 +127,24 @@ def _transcribe_audio(df: pd.DataFrame, task_props: Dict[str, Any], try: # Apply the _update_metadata function to each row in the DataFrame - #audio_version = get_version(validated_config.stage_config.audio_endpoints[1]) - audio_version = get_version(f'http://audio:{port}') - df["metadata"] = df.apply(_update_metadata, axis=1, args=(audio_client, audio_version, trace_info)) - + # audio_version = get_version(validated_config.stage_config.audio_endpoints[1]) + # audio_version = get_version(f'http://audio:{port}') + df["metadata"] = df.apply(_update_metadata, axis=1, args=(audio_client, trace_info)) + return df, trace_info except Exception as e: - logger.error("Error occurred while extracting audio data.", exc_info=True) + traceback.print_exc() + logger.error(f"Error occurred while extracting audio data: {e}", exc_info=True) raise def generate_audio_extractor_stage( - c: Config, - stage_config: Dict[str, Any], - task: str = "audio_data_extract", - task_desc: str = "audio_data_extraction", - pe_count: int = 1, + c: Config, + stage_config: Dict[str, Any], + task: str = "audio_data_extract", + task_desc: str = "audio_data_extraction", + pe_count: int = 1, ): """ Generates a multiprocessing stage to perform audio data extraction. @@ -186,16 +181,15 @@ def generate_audio_extractor_stage( _wrapped_process_fn = functools.partial(_transcribe_audio, validated_config=validated_config) return MultiProcessingBaseStage( - c=c, - pe_count=pe_count, - task=task, - task_desc=task_desc, + c=c, + pe_count=pe_count, + task=task, + task_desc=task_desc, process_fn=_wrapped_process_fn, document_type="regex:^(mp3|wav)$", ) - if __name__ == "__main__": metadata = { "source_metadata": { @@ -207,17 +201,11 @@ def generate_audio_extractor_stage( "source_id": "https://audio.listennotes.com/e/p/3946bc3aba1f425f8b2e146f0b3f72fc/", "source_location": "", "source_type": "wav", - "summary": "" + "summary": "", }, - - "content_metadata": { - "description": "Audio wav file", - "type": "audio", - "content": '' - } + "content_metadata": {"description": "Audio wav file", "type": "audio", "content": ""}, } - metadata = { "source_metadata": { "access_level": 1, @@ -228,27 +216,18 @@ def generate_audio_extractor_stage( "source_id": "test.mp3", "source_location": "", "source_type": "mp3", - "summary": "" + "summary": "", }, - - "content_metadata": { - "description": "Audio wav file", - "type": "audio", - "content": 'some base64 string' - } + "content_metadata": {"description": "Audio wav file", "type": "audio", "content": "some base64 string"}, } - - data = [{"metadata": metadata}] df = pd.DataFrame(data) - df.to_csv('test.csv', index=False) - - df_result, _ = _transcribe_audio(df) + df.to_csv("test.csv", index=False) - df_result.to_csv('result.csv', index=False) + df_result, _ = _transcribe_audio(df) + df_result.to_csv("result.csv", index=False) - print("Done!") diff --git a/src/nv_ingest/util/converters/type_mappings.py b/src/nv_ingest/util/converters/type_mappings.py index 4fbfb0a9..2a8a8626 100644 --- a/src/nv_ingest/util/converters/type_mappings.py +++ b/src/nv_ingest/util/converters/type_mappings.py @@ -11,12 +11,14 @@ DocumentTypeEnum.docx: ContentTypeEnum.STRUCTURED, DocumentTypeEnum.html: ContentTypeEnum.STRUCTURED, DocumentTypeEnum.jpeg: ContentTypeEnum.IMAGE, + DocumentTypeEnum.mp3: ContentTypeEnum.AUDIO, DocumentTypeEnum.pdf: ContentTypeEnum.STRUCTURED, DocumentTypeEnum.png: ContentTypeEnum.IMAGE, DocumentTypeEnum.pptx: ContentTypeEnum.STRUCTURED, DocumentTypeEnum.svg: ContentTypeEnum.IMAGE, DocumentTypeEnum.tiff: ContentTypeEnum.IMAGE, DocumentTypeEnum.txt: ContentTypeEnum.TEXT, + DocumentTypeEnum.wav: ContentTypeEnum.AUDIO, } From 7a3c3402994a09bf658b1b3ba1f24ba0ec28fbb6 Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Fri, 17 Jan 2025 15:20:39 -0700 Subject: [PATCH 5/7] Add ModelInferenceInterface for Parakeet --- .../schemas/ingest_pipeline_config_schema.py | 1 + src/nv_ingest/stages/nim/audio_extraction.py | 105 +++++++------- .../util/flow_control/filter_by_task.py | 112 +++++++++----- src/nv_ingest/util/nim/helpers.py | 26 ++-- src/nv_ingest/util/nim/parakeet.py | 137 ++++++++++++++++++ 5 files changed, 281 insertions(+), 100 deletions(-) create mode 100644 src/nv_ingest/util/nim/parakeet.py diff --git a/src/nv_ingest/schemas/ingest_pipeline_config_schema.py b/src/nv_ingest/schemas/ingest_pipeline_config_schema.py index fe5debd6..60d15a07 100644 --- a/src/nv_ingest/schemas/ingest_pipeline_config_schema.py +++ b/src/nv_ingest/schemas/ingest_pipeline_config_schema.py @@ -29,6 +29,7 @@ class PipelineConfigSchema(BaseModel): + # TODO(Devin): Audio chart_extractor_module: ChartExtractorSchema = ChartExtractorSchema() document_splitter_module: DocumentSplitterSchema = DocumentSplitterSchema() embedding_storage_module: EmbeddingStorageModuleSchema = EmbeddingStorageModuleSchema() diff --git a/src/nv_ingest/stages/nim/audio_extraction.py b/src/nv_ingest/stages/nim/audio_extraction.py index d554eb3e..3b33ee7b 100755 --- a/src/nv_ingest/stages/nim/audio_extraction.py +++ b/src/nv_ingest/stages/nim/audio_extraction.py @@ -17,6 +17,7 @@ from nv_ingest.stages.multiprocessing_stage import MultiProcessingBaseStage from nv_ingest.util.nim.helpers import call_audio_inference_model, create_inference_client +from nv_ingest.util.nim.parakeet import ParakeetModelInterface logger = logging.getLogger(f"morpheus.{__name__}") @@ -53,25 +54,23 @@ def _update_metadata(row: pd.Series, audio_client: Any, trace_info: Dict) -> Dic logger.error("Row does not contain 'metadata'.") raise ValueError("Row does not contain 'metadata'.") + base64_audio = metadata.get("content") content_metadata = metadata.get("content_metadata", {}) # Only modify if content type is audio - # TODO(Devin): Double check dtypes (metadata_schema.py:39) if content_metadata.get("type") != "audio": return metadata source_metadata = metadata.get("source_metadata") audio_id = source_metadata["source_id"] - audio_content = metadata.get("content") - # Modify audio metadata with the result from the inference model try: - audio_result = call_audio_inference_model(audio_client, audio_content, audio_id, trace_info=trace_info) - print(audio_result) + audio_result = call_audio_inference_model(audio_client, base64_audio, audio_id, trace_info=trace_info) metadata["audio_metadata"] = {"audio_transcript": audio_result} except Exception as e: logger.error(f"Unhandled error calling audio inference model: {e}", exc_info=True) + traceback.print_exc() raise return metadata @@ -119,7 +118,13 @@ def _transcribe_audio( _ = task_props - audio_client = create_inference_client(validated_config.stage_config.audio_endpoints, None, "http") + parakeet_model_interface = ParakeetModelInterface() + parakeet_client = create_inference_client( + validated_config.audio_extraction_config.audio_endpoints, + parakeet_model_interface, + auth_token=validated_config.audio_extraction_config.auth_token, + infer_protocol=validated_config.audio_extraction_config.audio_infer_protocol, + ) if trace_info is None: trace_info = {} @@ -129,7 +134,7 @@ def _transcribe_audio( # Apply the _update_metadata function to each row in the DataFrame # audio_version = get_version(validated_config.stage_config.audio_endpoints[1]) # audio_version = get_version(f'http://audio:{port}') - df["metadata"] = df.apply(_update_metadata, axis=1, args=(audio_client, trace_info)) + df["metadata"] = df.apply(_update_metadata, axis=1, args=(parakeet_client, trace_info)) return df, trace_info @@ -186,48 +191,50 @@ def generate_audio_extractor_stage( task=task, task_desc=task_desc, process_fn=_wrapped_process_fn, - document_type="regex:^(mp3|wav)$", + # document_type="regex:^(mp3|wav)$", + document_type="wav", ) -if __name__ == "__main__": - metadata = { - "source_metadata": { - "access_level": 1, - "collection_id": "", - "date_created": "2024-11-04T12:29:08", - "last_modified": "2024-11-04T12:29:08", - "partition_id": -1, - "source_id": "https://audio.listennotes.com/e/p/3946bc3aba1f425f8b2e146f0b3f72fc/", - "source_location": "", - "source_type": "wav", - "summary": "", - }, - "content_metadata": {"description": "Audio wav file", "type": "audio", "content": ""}, - } - - metadata = { - "source_metadata": { - "access_level": 1, - "collection_id": "", - "date_created": "2024-11-04T12:29:08", - "last_modified": "2024-11-04T12:29:08", - "partition_id": -1, - "source_id": "test.mp3", - "source_location": "", - "source_type": "mp3", - "summary": "", - }, - "content_metadata": {"description": "Audio wav file", "type": "audio", "content": "some base64 string"}, - } - - data = [{"metadata": metadata}] - df = pd.DataFrame(data) - - df.to_csv("test.csv", index=False) - - df_result, _ = _transcribe_audio(df) - - df_result.to_csv("result.csv", index=False) - - print("Done!") +# if __name__ == "__main__": +# metadata = { +# "source_metadata": { +# "access_level": 1, +# "collection_id": "", +# "date_created": "2024-11-04T12:29:08", +# "last_modified": "2024-11-04T12:29:08", +# "partition_id": -1, +# "source_id": "https://audio.listennotes.com/e/p/3946bc3aba1f425f8b2e146f0b3f72fc/", +# "source_location": "", +# "source_type": "wav", +# "summary": "", +# }, +# "content_metadata": {"description": "Audio wav file", "type": "audio", "content": ""}, +# } +# +# metadata = { +# "source_metadata": { +# "access_level": 1, +# "collection_id": "", +# "date_created": "2024-11-04T12:29:08", +# "last_modified": "2024-11-04T12:29:08", +# "partition_id": -1, +# "source_id": "test.mp3", +# "source_location": "", +# "source_type": "mp3", +# "summary": "", +# }, +# "content_metadata": {"description": "Audio wav file", "type": "audio", "content": "some base64 string"}, +# } +# +# data = [{"metadata": metadata}] +# df = pd.DataFrame(data) +# +# df.to_csv("test.csv", index=False) +# +# df_result, _ = _transcribe_audio(df) +# +# df_result.to_csv("result.csv", index=False) +# +# print("Done!") +# diff --git a/src/nv_ingest/util/flow_control/filter_by_task.py b/src/nv_ingest/util/flow_control/filter_by_task.py index c5be609c..586c4b16 100644 --- a/src/nv_ingest/util/flow_control/filter_by_task.py +++ b/src/nv_ingest/util/flow_control/filter_by_task.py @@ -3,13 +3,11 @@ # SPDX-License-Identifier: Apache-2.0 -import logging import re import typing from functools import wraps - from pydantic import BaseModel -from morpheus.messages import ControlMessage +import logging logger = logging.getLogger(__name__) @@ -25,10 +23,10 @@ def filter_by_task(required_tasks, forward_func=None): Parameters ---------- required_tasks : list - A list of task keys to check for in the ControlMessage. + A list of task keys (string or tuple/list of [task_name, task_property_dict(s)]) to check for in the + ControlMessage. forward_func : callable, optional - A function to be called with the ControlMessage if no required task is found. Defaults to - None. + A function to be called with the ControlMessage if no required task is found. Defaults to None. Returns ------- @@ -39,39 +37,66 @@ def filter_by_task(required_tasks, forward_func=None): def decorator(func): @wraps(func) def wrapper(*args, **kwargs): - if args and hasattr(args[0], "get_tasks"): - message = args[0] - tasks = message.get_tasks() - for required_task in required_tasks: - if isinstance(required_task, str) and (required_task in tasks): - return func(*args, **kwargs) + if not args or not hasattr(args[0], "get_tasks"): + raise ValueError("The first argument must be a ControlMessage object with task handling capabilities.") + + message = args[0] + tasks = message.get_tasks() + logger.debug(f"Tasks in message: {list(tasks.keys())}") + logger.debug(f"Required tasks: {required_tasks}") - if isinstance(required_task, tuple) or isinstance(required_task, list): - required_task_name, *required_task_props_list = required_task - if required_task_name not in tasks: - continue - - task_props_list = tasks.get(required_task_name, []) - logger.debug(f"Checking task properties for: {required_task_name}") - logger.debug(f"Required task properties: {required_task_props_list}") - for task_props in task_props_list: - if isinstance(task_props, BaseModel): - task_props = task_props.model_dump() - - if all( - _is_subset(task_props, required_task_props) - for required_task_props in required_task_props_list - ): - return func(*args, **kwargs) - - if forward_func: - # If a forward function is provided, call it with the ControlMessage - return forward_func(message) - else: - # If no forward function is provided, return the message directly - return message + for required_task in required_tasks: + # 1) If the required task is a string (simple check for existence) + if isinstance(required_task, str): + if required_task in tasks: + logger.debug(f"Found required task '{required_task}'. Executing function.") + return func(*args, **kwargs) + else: + logger.debug(f"Task '{required_task}' not found in ControlMessage. Skipping.") + + # 2) If the required task is a tuple/list: (task_name, {prop_key: prop_val}, ...) + elif isinstance(required_task, (tuple, list)): + required_task_name, *required_task_props_list = required_task + if required_task_name not in tasks: + logger.debug(f"Task '{required_task_name}' not found in ControlMessage. Skipping.") + continue + + # We have at least one task of this type. Check the properties: + task_props_list = tasks.get(required_task_name, []) + logger.debug(f"Checking task properties for '{required_task_name}': {task_props_list}") + logger.debug(f"Required task properties: {required_task_props_list}") + + # Check each set of task_props against the required subset(s) + for task_props in task_props_list: + if isinstance(task_props, BaseModel): + task_props = task_props.model_dump() + + # We need to match *all* required_task_props in `required_task_props_list` + # with the current `task_props`. + if all( + _is_subset(task_props, required_task_props) + for required_task_props in required_task_props_list + ): + logger.debug( + f"Task '{required_task_name}' with properties {task_props} " + f"matches all required properties. Executing function." + ) + return func(*args, **kwargs) + else: + logger.debug( + f"Task '{required_task_name}' with properties {task_props} " + f"does not match all required properties {required_task_props_list}. Skipping." + ) + + # If we got here, it means none of the required tasks or properties matched + logger.debug("No required tasks matched. Forwarding or returning message as configured.") + + if forward_func: + # If a forward function is provided, call it with the ControlMessage + return forward_func(message) else: - raise ValueError("The first argument must be a ControlMessage object with task handling capabilities.") + # If no forward function is provided, return the message directly + return message return wrapper @@ -81,8 +106,10 @@ def wrapper(*args, **kwargs): def _is_subset(superset, subset): if subset == "*": return True + if isinstance(superset, dict) and isinstance(subset, dict): return all(key in superset and _is_subset(superset[key], val) for key, val in subset.items()) + if isinstance(subset, str) and subset.startswith("regex:"): # The subset is a regex pattern pattern = subset[len("regex:") :] @@ -90,15 +117,19 @@ def _is_subset(superset, subset): return any(re.match(pattern, str(sup_item)) for sup_item in superset) else: return re.match(pattern, str(superset)) is not None + if isinstance(superset, list) and not isinstance(subset, list): # Check if the subset value matches any item in the superset return any(_is_subset(sup_item, subset) for sup_item in superset) - if isinstance(superset, list) or isinstance(superset, set): + + if isinstance(superset, (list, set)) and isinstance(subset, (list, set)): + # Check if each sub_item in `subset` is in `superset` (by subset matching) return all(any(_is_subset(sup_item, sub_item) for sup_item in superset) for sub_item in subset) + return superset == subset -def remove_task_subset(ctrl_msg: ControlMessage, task_type: typing.List, subset: typing.Dict): +def remove_task_subset(ctrl_msg: typing.Any, task_type: typing.List, subset: typing.Dict): """ A helper function to extract a task based on subset matching when the task might be out of order with respect to the Morpheus pipeline. For example, if a deduplication filter occurs before scale filtering in the pipeline, but @@ -127,6 +158,9 @@ def remove_task_subset(ctrl_msg: ControlMessage, task_type: typing.List, subset: for _ in ctrl_msg_tasks[task_type]: task_props = ctrl_msg.remove_task(task_type) if _is_subset(task_props, subset): + logger.debug( + f"Removed task '{task_type}' with properties {task_props} " f"matching subset {subset}." + ) break filter_tasks.append(task_props) break diff --git a/src/nv_ingest/util/nim/helpers.py b/src/nv_ingest/util/nim/helpers.py index a692265f..96a1b04e 100644 --- a/src/nv_ingest/util/nim/helpers.py +++ b/src/nv_ingest/util/nim/helpers.py @@ -603,10 +603,14 @@ def call_audio_inference_model(client, audio_content: str, audio_id: str, trace_ Parameters ---------- - client : grpcclient.InferenceServerClient or dict + client : The inference client, which is an HTTP client. - audio_source : str + audio_content: str The audio source to transcribe. + audio_id: str + The unique identifier for the audio content. + trace_info: dict + Trace information for debugging or logging. Returns ------- @@ -620,16 +624,16 @@ def call_audio_inference_model(client, audio_content: str, audio_id: str, trace_ """ try: - url = client["endpoint_url"] - headers = client["headers"] + data = {"base64_audio": audio_content, "audio_id": audio_id} - payload = {"audio_content": audio_content, "audio_id": audio_id} - response = requests.post(url, json=payload, headers=headers) - - response.raise_for_status() # Raise an exception for HTTP errors + parakeet_result = client.infer( + data, + model_name="parakeet", + trace_info=trace_info, # traceable_func arg + stage_name="audio_extraction", + ) - # Parse the JSON response - json_response = response.json() + return parakeet_result except requests.exceptions.RequestException as e: raise RuntimeError(f"HTTP request failed: {e}") @@ -637,5 +641,3 @@ def call_audio_inference_model(client, audio_content: str, audio_id: str, trace_ raise RuntimeError(f"Missing expected key in response: {e}") except Exception as e: raise RuntimeError(f"An error occurred during inference: {e}") - - return json_response diff --git a/src/nv_ingest/util/nim/parakeet.py b/src/nv_ingest/util/nim/parakeet.py new file mode 100644 index 00000000..19c5daab --- /dev/null +++ b/src/nv_ingest/util/nim/parakeet.py @@ -0,0 +1,137 @@ +import logging +import requests +from typing import Any, Dict, Optional + +from nv_ingest.util.nim.helpers import ModelInterface + +logger = logging.getLogger(__name__) + + +class ParakeetModelInterface(ModelInterface): + """ + A simple interface for handling inference with a Parakeet model (e.g., speech, audio-related). + """ + + def name(self) -> str: + """ + Get the name of the model interface. + + Returns + ------- + str + The name of the model interface ("Parakeet"). + """ + return "Parakeet" + + def prepare_data_for_inference(self, data: Dict[str, Any]) -> Dict[str, Any]: + """ + Prepare input data for inference. This can be as simple or complex as needed. + Here, we assume 'audio_content' and 'audio_id' are already in the right format. + + Parameters + ---------- + data : dict + The input data containing an audio payload. + + Returns + ------- + dict + The updated data dictionary (possibly identical if no special processing is required). + """ + + return data + + def format_input(self, data: Dict[str, Any], protocol: str, **kwargs) -> Any: + """ + Format input data for the specified protocol (e.g., HTTP). + Here, we assume a simple JSON payload containing 'audio_content' and 'audio_id'. + + Parameters + ---------- + data : dict + The input data to format. + protocol : str + The protocol to use ("http"). + **kwargs : dict + Additional parameters for HTTP payload formatting if needed. + + Returns + ------- + Any + The formatted input data. + + Raises + ------ + ValueError + If an invalid protocol is specified. + """ + if protocol == "http": + logger.debug("Formatting input for HTTP Parakeet model") + # For HTTP, we just build a simple JSON payload + payload = {"audio_content": data["base64_audio"], "audio_id": data["audio_id"]} + return payload + else: + raise ValueError("Invalid protocol specified. Must be 'http' for Parakeet.") + + def parse_output(self, response: Any, protocol: str, data: Optional[Dict[str, Any]] = None, **kwargs) -> Any: + """ + Parse the output from the model's inference response. + + Parameters + ---------- + response : requests.Response + The response from the model inference (for HTTP). + protocol : str + The protocol used ("http"). + data : dict, optional + Additional input data passed to the function (not used in this simple example). + + Returns + ------- + dict + The JSON-parsed output from the Parakeet model. + + Raises + ------ + ValueError + If an invalid protocol is specified. + RuntimeError + For any HTTP-related or unexpected errors (e.g., missing keys). + """ + if protocol == "http": + logger.debug("Parsing output from HTTP Parakeet model") + try: + response.raise_for_status() # Raise an exception for HTTP errors + json_response = response.json() + except requests.exceptions.RequestException as e: + raise RuntimeError(f"HTTP request failed: {e}") + except KeyError as e: + raise RuntimeError(f"Missing expected key in response: {e}") + except Exception as e: + raise RuntimeError(f"An error occurred during inference: {e}") + + return json_response + else: + raise ValueError("Invalid protocol specified. Must be 'http' for Parakeet.") + + def process_inference_results(self, output: Any, protocol: str, **kwargs) -> Any: + """ + Process inference results for the Parakeet model. In this simple case, + we simply return the output as-is. + + Parameters + ---------- + output : Any + The raw output from the model. + protocol : str + The protocol used ("http"). + **kwargs : dict + Additional parameters as needed. + + Returns + ------- + Any + The processed inference results. + """ + logger.debug("Processing Parakeet inference results (pass-through).") + return output From 8954924cd8e7545caf86c77993cb64e79bb2a650 Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Fri, 17 Jan 2025 16:43:11 -0700 Subject: [PATCH 6/7] Update parakeet interface to generate audio_id --- src/nv_ingest/util/nim/parakeet.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/nv_ingest/util/nim/parakeet.py b/src/nv_ingest/util/nim/parakeet.py index 19c5daab..e8110856 100644 --- a/src/nv_ingest/util/nim/parakeet.py +++ b/src/nv_ingest/util/nim/parakeet.py @@ -1,4 +1,6 @@ import logging +import uuid + import requests from typing import Any, Dict, Optional @@ -68,7 +70,8 @@ def format_input(self, data: Dict[str, Any], protocol: str, **kwargs) -> Any: if protocol == "http": logger.debug("Formatting input for HTTP Parakeet model") # For HTTP, we just build a simple JSON payload - payload = {"audio_content": data["base64_audio"], "audio_id": data["audio_id"]} + # audio_id just needs to be a unique identifier + payload = {"audio_content": data["base64_audio"], "audio_id": f"{str(uuid.uuid4())}.wav"} return payload else: raise ValueError("Invalid protocol specified. Must be 'http' for Parakeet.") From 4e8ede8ebf6512b04fca17406c7769bbe2c934c9 Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Tue, 21 Jan 2025 12:47:04 -0700 Subject: [PATCH 7/7] Update parakeep NimHandler --- src/nv_ingest/util/nim/parakeet.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/src/nv_ingest/util/nim/parakeet.py b/src/nv_ingest/util/nim/parakeet.py index e8110856..f9183828 100644 --- a/src/nv_ingest/util/nim/parakeet.py +++ b/src/nv_ingest/util/nim/parakeet.py @@ -1,7 +1,6 @@ import logging import uuid -import requests from typing import Any, Dict, Optional from nv_ingest.util.nim.helpers import ModelInterface @@ -102,20 +101,7 @@ def parse_output(self, response: Any, protocol: str, data: Optional[Dict[str, An For any HTTP-related or unexpected errors (e.g., missing keys). """ if protocol == "http": - logger.debug("Parsing output from HTTP Parakeet model") - try: - response.raise_for_status() # Raise an exception for HTTP errors - json_response = response.json() - except requests.exceptions.RequestException as e: - raise RuntimeError(f"HTTP request failed: {e}") - except KeyError as e: - raise RuntimeError(f"Missing expected key in response: {e}") - except Exception as e: - raise RuntimeError(f"An error occurred during inference: {e}") - - return json_response - else: - raise ValueError("Invalid protocol specified. Must be 'http' for Parakeet.") + return response def process_inference_results(self, output: Any, protocol: str, **kwargs) -> Any: """