Open
Description
To prevent errors from creeping in we should add a package index validation script.
I fed our package description to a LLM, and it gave me the following as a start:
from jsonschema import validate, ValidationError
import yaml
schema = {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"github": {"type": "string", "pattern": r"^[^/]+/[^/]+$"},
"gitlab": {"type": "string", "pattern": r"^[^/]+/[^/]+$"},
"url": {"type": "string", "format": "uri"},
"description": {"type": "string", "maxLength": 200},
"categories": {
"type": "array",
"items": {
"type": "string",
"enum": [
"numerical", "libraries", "scientific", "examples",
"programming", "graphics", "data-types", "io",
"interfaces", "strings"
]
},
"minItems": 1
},
"tags": {
"type": "string"
},
"license": {"type": "string"},
"version": {"type": "string"}
},
"required": ["name", "description", "categories"],
"oneOf": [
{"required": ["github"]},
{"required": ["gitlab"]},
{"required": ["url", "license"]}
],
"additionalProperties": False
}
}
with open("data/package_index.yml", "r") as f:
data = yaml.safe_load(f)
try:
validate(instance=data, schema=schema)
print("YAML is valid!")
except ValidationError as e:
print("Validation error:", e.message)
# Find the error location in the YAML file
path = list(e.path) # JSON path to the invalid entry
if path:
index = path[0] if isinstance(path[0], int) else None
if index is not None:
yaml_lines = yaml_content.splitlines()
for i, line in enumerate(yaml_lines):
if f"name: {data[index]['name']}" in line:
print(f"Error at line {i+1}: {line.strip()}")
break
I think our handling of categories and tags as arrays is not consistent.
Is there a better way of doing this?
Metadata
Metadata
Assignees
Labels
No labels