Skip to content

Commit 6dce7f8

Browse files
GWealecopybara-github
authored andcommitted
fix: Add schema type sanitization to OpenAPI spec parser
This change introduces a `_sanitize_schema_types` method to the OpenAPI spec parser. This method recursively removes or filters out non-standard schema types (e.g., "Any", "Unknown") from the OpenAPI specification Close #3704 Close #3108 Co-authored-by: George Weale <[email protected]> PiperOrigin-RevId: 852986491
1 parent ce64787 commit 6dce7f8

File tree

2 files changed

+271
-0
lines changed

2 files changed

+271
-0
lines changed

src/google/adk/tools/openapi_tool/openapi_spec_parser/openapi_spec_parser.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from typing import Dict
2020
from typing import List
2121
from typing import Optional
22+
from typing import Set
2223

2324
from fastapi.openapi.models import Operation
2425
from pydantic import BaseModel
@@ -29,6 +30,21 @@
2930
from ..common.common import ApiParameter
3031
from .operation_parser import OperationParser
3132

33+
# Valid JSON Schema types as per OpenAPI 3.0/3.1 specification.
34+
#
35+
# These are the only types accepted by Pydantic 2.11+ for Schema.type.
36+
_VALID_SCHEMA_TYPES: Set[str] = frozenset({
37+
"array",
38+
"boolean",
39+
"integer",
40+
"null",
41+
"number",
42+
"object",
43+
"string",
44+
})
45+
46+
_SCHEMA_CONTAINER_KEYS: Set[str] = frozenset({"schema", "schemas"})
47+
3248

3349
class OperationEndpoint(BaseModel):
3450
base_url: str
@@ -70,9 +86,81 @@ def parse(self, openapi_spec_dict: Dict[str, Any]) -> List[ParsedOperation]:
7086
"""
7187

7288
openapi_spec_dict = self._resolve_references(openapi_spec_dict)
89+
openapi_spec_dict = self._sanitize_schema_types(openapi_spec_dict)
7390
operations = self._collect_operations(openapi_spec_dict)
7491
return operations
7592

93+
def _sanitize_schema_types(
94+
self, openapi_spec: Dict[str, Any]
95+
) -> Dict[str, Any]:
96+
"""Recursively sanitizes schema types in an OpenAPI specification.
97+
98+
Pydantic 2.11+ strictly validates that schema types are one of:
99+
'array', 'boolean', 'integer', 'null', 'number', 'object', 'string'.
100+
101+
External APIs (like Google Integration Connectors) may return schemas
102+
with non-standard types like 'Any'. This method removes or converts
103+
such invalid types to ensure compatibility.
104+
105+
Args:
106+
openapi_spec: A dictionary representing the OpenAPI specification.
107+
108+
Returns:
109+
A dictionary with invalid schema types removed or sanitized.
110+
"""
111+
openapi_spec = copy.deepcopy(openapi_spec)
112+
113+
def sanitize_type_field(schema_dict: Dict[str, Any]) -> None:
114+
if "type" not in schema_dict:
115+
return
116+
117+
type_value = schema_dict["type"]
118+
if isinstance(type_value, str):
119+
normalized_type = type_value.lower()
120+
if normalized_type in _VALID_SCHEMA_TYPES:
121+
schema_dict["type"] = normalized_type
122+
return
123+
124+
del schema_dict["type"]
125+
return
126+
127+
if isinstance(type_value, list):
128+
valid_types = []
129+
for entry in type_value:
130+
if not isinstance(entry, str):
131+
continue
132+
133+
normalized_entry = entry.lower()
134+
if normalized_entry not in _VALID_SCHEMA_TYPES:
135+
continue
136+
137+
if normalized_entry not in valid_types:
138+
valid_types.append(normalized_entry)
139+
140+
if valid_types:
141+
schema_dict["type"] = valid_types
142+
else:
143+
del schema_dict["type"]
144+
145+
def sanitize_recursive(obj: Any, *, in_schema: bool) -> Any:
146+
if isinstance(obj, dict):
147+
if in_schema:
148+
sanitize_type_field(obj)
149+
150+
# Recursively process all values in the dict
151+
for key, value in obj.items():
152+
obj[key] = sanitize_recursive(
153+
value,
154+
in_schema=in_schema or key in _SCHEMA_CONTAINER_KEYS,
155+
)
156+
return obj
157+
elif isinstance(obj, list):
158+
return [sanitize_recursive(item, in_schema=in_schema) for item in obj]
159+
else:
160+
return obj
161+
162+
return sanitize_recursive(openapi_spec, in_schema=False)
163+
76164
def _collect_operations(
77165
self, openapi_spec: Dict[str, Any]
78166
) -> List[ParsedOperation]:

tests/unittests/tools/openapi_tool/openapi_spec_parser/test_openapi_spec_parser.py

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,3 +681,186 @@ def test_parse_spec_with_path_level_parameters(openapi_spec_generator):
681681
assert local_param is not None
682682
assert local_param.param_location == "header"
683683
assert local_param.type_value is int
684+
685+
686+
def test_parse_spec_with_invalid_type_any(openapi_spec_generator):
687+
"""Test that schemas with type='Any' are sanitized for Pydantic 2.11+.
688+
689+
External APIs like Google Integration Connectors may return schemas with
690+
non-standard types like 'Any'. This test verifies that such types are
691+
removed to allow parsing to succeed.
692+
"""
693+
openapi_spec = {
694+
"openapi": "3.1.0",
695+
"info": {"title": "API with Any type", "version": "1.0.0"},
696+
"paths": {
697+
"/test": {
698+
"get": {
699+
"operationId": "testAnyType",
700+
"responses": {
701+
"200": {
702+
"description": "Success",
703+
"content": {
704+
"application/json": {"schema": {"type": "Any"}}
705+
},
706+
}
707+
},
708+
}
709+
}
710+
},
711+
}
712+
713+
# This should not raise a ValidationError
714+
parsed_operations = openapi_spec_generator.parse(openapi_spec)
715+
716+
assert len(parsed_operations) == 1
717+
assert parsed_operations[0].name == "test_any_type"
718+
719+
720+
def test_parse_spec_with_nested_invalid_types(openapi_spec_generator):
721+
"""Test that nested schemas with invalid types are sanitized."""
722+
openapi_spec = {
723+
"openapi": "3.1.0",
724+
"info": {"title": "Nested Invalid Types API", "version": "1.0.0"},
725+
"paths": {
726+
"/test": {
727+
"post": {
728+
"operationId": "testNestedInvalid",
729+
"requestBody": {
730+
"content": {
731+
"application/json": {
732+
"schema": {
733+
"type": "object",
734+
"properties": {
735+
"valid_prop": {"type": "string"},
736+
"invalid_prop": {"type": "Unknown"},
737+
"nested_obj": {
738+
"type": "object",
739+
"properties": {
740+
"deeply_invalid": {
741+
"type": "CustomType"
742+
}
743+
},
744+
},
745+
},
746+
}
747+
}
748+
}
749+
},
750+
"responses": {"200": {"description": "OK"}},
751+
}
752+
}
753+
},
754+
}
755+
756+
# This should not raise a ValidationError
757+
parsed_operations = openapi_spec_generator.parse(openapi_spec)
758+
759+
assert len(parsed_operations) == 1
760+
op = parsed_operations[0]
761+
# The valid properties should still be parsed
762+
param_names = [p.original_name for p in op.parameters]
763+
assert "valid_prop" in param_names
764+
assert "invalid_prop" in param_names
765+
assert "nested_obj" in param_names
766+
767+
768+
def test_parse_spec_with_type_list_containing_invalid(openapi_spec_generator):
769+
"""Test that type arrays with invalid values are filtered."""
770+
openapi_spec = {
771+
"openapi": "3.1.0",
772+
"info": {"title": "Type List API", "version": "1.0.0"},
773+
"paths": {
774+
"/test": {
775+
"get": {
776+
"operationId": "testTypeList",
777+
"responses": {
778+
"200": {
779+
"description": "Success",
780+
"content": {
781+
"application/json": {
782+
"schema": {"type": ["string", "Any", "null"]}
783+
}
784+
},
785+
}
786+
},
787+
}
788+
}
789+
},
790+
}
791+
792+
# This should not raise a ValidationError
793+
parsed_operations = openapi_spec_generator.parse(openapi_spec)
794+
795+
assert len(parsed_operations) == 1
796+
797+
798+
def test_sanitize_schema_types_removes_invalid_types(openapi_spec_generator):
799+
"""Test that _sanitize_schema_types correctly handles invalid types."""
800+
spec_with_invalid = {
801+
"components": {
802+
"schemas": {
803+
"InvalidSchema": {"type": "Any", "description": "Invalid type"},
804+
"ValidSchema": {"type": "string", "description": "Valid type"},
805+
}
806+
}
807+
}
808+
809+
sanitized = openapi_spec_generator._sanitize_schema_types(spec_with_invalid)
810+
811+
# Invalid type should be removed
812+
assert "type" not in sanitized["components"]["schemas"]["InvalidSchema"]
813+
assert (
814+
sanitized["components"]["schemas"]["InvalidSchema"]["description"]
815+
== "Invalid type"
816+
)
817+
818+
# Valid type should be preserved
819+
assert sanitized["components"]["schemas"]["ValidSchema"]["type"] == "string"
820+
821+
822+
def test_sanitize_schema_types_does_not_touch_security_schemes(
823+
openapi_spec_generator,
824+
):
825+
"""Test that schema type sanitization does not affect security schemes."""
826+
spec = {
827+
"components": {
828+
"schemas": {"InvalidSchema": {"type": "Any"}},
829+
"securitySchemes": {
830+
"api_key": {
831+
"type": "apiKey",
832+
"in": "header",
833+
"name": "X-API-Key",
834+
}
835+
},
836+
}
837+
}
838+
839+
sanitized = openapi_spec_generator._sanitize_schema_types(spec)
840+
841+
assert "type" not in sanitized["components"]["schemas"]["InvalidSchema"]
842+
assert (
843+
sanitized["components"]["securitySchemes"]["api_key"]["type"] == "apiKey"
844+
)
845+
846+
847+
def test_sanitize_schema_types_filters_type_lists(openapi_spec_generator):
848+
"""Test that type lists with invalid values are filtered."""
849+
spec_with_list = {"schema": {"type": ["string", "Any", "null", "Unknown"]}}
850+
851+
sanitized = openapi_spec_generator._sanitize_schema_types(spec_with_list)
852+
853+
# Only valid types should remain
854+
assert sanitized["schema"]["type"] == ["string", "null"]
855+
856+
857+
def test_sanitize_schema_types_removes_all_invalid_list(openapi_spec_generator):
858+
"""Test that type field is removed when all list values are invalid."""
859+
spec_with_all_invalid = {"schema": {"type": ["Any", "Unknown", "Custom"]}}
860+
861+
sanitized = openapi_spec_generator._sanitize_schema_types(
862+
spec_with_all_invalid
863+
)
864+
865+
# Type field should be removed entirely
866+
assert "type" not in sanitized["schema"]

0 commit comments

Comments
 (0)