Wall Syntactic Validator: Difference between revisions

From Open Source Ecology
Jump to navigation Jump to search
(Created page with "#!/usr/bin/env python3 import sys from pathlib import Path from typing import Any import yaml def load_yaml(path: Path) -> Any: with path.open("r", encoding="utf-8") as f: return yaml.safe_load(f) def is_number(value: Any) -> bool: return isinstance(value, (int, float)) and not isinstance(value, bool) def validate_enum(instance_name: str, field_name: str, value: Any, allowed: list[Any], errors: list[str]) -> None: if value not in allowed:...")
 
(Redirected page to Validate syntax.py)
Tag: New redirect
 
Line 1: Line 1:
#!/usr/bin/env python3
#redirect [[validate_syntax.py]]
 
import sys
from pathlib import Path
from typing import Any
 
import yaml
 
 
def load_yaml(path: Path) -> Any:
    with path.open("r", encoding="utf-8") as f:
        return yaml.safe_load(f)
 
 
def is_number(value: Any) -> bool:
    return isinstance(value, (int, float)) and not isinstance(value, bool)
 
 
def validate_enum(instance_name: str, field_name: str, value: Any, allowed: list[Any], errors: list[str]) -> None:
    if value not in allowed:
        errors.append(
            f"{instance_name}: '{field_name}' = {value!r} is invalid; allowed values are {allowed}"
        )
 
 
def validate_numeric_range(
    instance_name: str,
    field_name: str,
    value: Any,
    min_value: float | None,
    max_value: float | None,
    errors: list[str],
) -> None:
    if not is_number(value):
        errors.append(f"{instance_name}: '{field_name}' must be a number, got {type(value).__name__}")
        return
 
    if min_value is not None and value < min_value:
        errors.append(
            f"{instance_name}: '{field_name}' = {value} is below minimum {min_value}"
        )
    if max_value is not None and value > max_value:
        errors.append(
            f"{instance_name}: '{field_name}' = {value} is above maximum {max_value}"
        )
 
 
def validate_required_keys(
    obj: dict[str, Any],
    required_keys: list[str],
    context: str,
    errors: list[str],
) -> None:
    for key in required_keys:
        if key not in obj:
            errors.append(f"{context}: missing required key '{key}'")
 
 
def build_parameter_rules(schema: dict[str, Any]) -> dict[str, dict[str, Any]]:
    if "parameters" not in schema or not isinstance(schema["parameters"], dict):
        raise ValueError("Schema must contain a top-level 'parameters' mapping")
    return schema["parameters"]
 
 
def validate_instance(
    instance: dict[str, Any],
    schema: dict[str, Any],
    parameter_rules: dict[str, dict[str, Any]],
) -> list[str]:
    errors: list[str] = []
 
    validate_required_keys(instance, ["id", "family", "parameters"], "instance", errors)
    if errors:
        return errors
 
    instance_name = instance["id"]
 
    if not isinstance(instance["id"], str):
        errors.append(f"{instance_name}: 'id' must be a string")
 
    if not isinstance(instance["family"], str):
        errors.append(f"{instance_name}: 'family' must be a string")
    elif instance["family"] != schema.get("family"):
        errors.append(
            f"{instance_name}: family {instance['family']!r} does not match schema family {schema.get('family')!r}"
        )
 
    params = instance["parameters"]
    if not isinstance(params, dict):
        errors.append(f"{instance_name}: 'parameters' must be a mapping")
        return errors
 
    # Check required parameters from schema
    for param_name in parameter_rules.keys():
        if param_name not in params:
            errors.append(f"{instance_name}: missing required parameter '{param_name}'")
 
    # Check for unknown parameters
    for param_name in params.keys():
        if param_name not in parameter_rules:
            errors.append(f"{instance_name}: unknown parameter '{param_name}'")
 
    # Validate parameter values against schema
    for param_name, rule in parameter_rules.items():
        if param_name not in params:
            continue
 
        value = params[param_name]
        rule_type = rule.get("type")
 
        if rule_type == "float":
            validate_numeric_range(
                instance_name,
                param_name,
                value,
                rule.get("min"),
                rule.get("max"),
                errors,
            )
 
        elif rule_type == "enum":
            allowed = rule.get("allowed")
            if not isinstance(allowed, list):
                errors.append(
                    f"{instance_name}: schema for '{param_name}' must define 'allowed' as a list"
                )
            else:
                validate_enum(instance_name, param_name, value, allowed, errors)
 
        else:
            errors.append(
                f"{instance_name}: unsupported schema type {rule_type!r} for parameter '{param_name}'"
            )
 
    return errors
 
 
def validate_schema_structure(schema: dict[str, Any]) -> list[str]:
    errors: list[str] = []
 
    if not isinstance(schema, dict):
        return ["Schema root must be a mapping"]
 
    if "family" not in schema:
        errors.append("Schema missing top-level 'family'")
    elif not isinstance(schema["family"], str):
        errors.append("Schema 'family' must be a string")
 
    if "parameters" not in schema:
        errors.append("Schema missing top-level 'parameters'")
    elif not isinstance(schema["parameters"], dict):
        errors.append("Schema 'parameters' must be a mapping")
    else:
        for param_name, rule in schema["parameters"].items():
            if not isinstance(rule, dict):
                errors.append(f"Schema parameter '{param_name}' must be a mapping")
                continue
 
            if "type" not in rule:
                errors.append(f"Schema parameter '{param_name}' missing 'type'")
                continue
 
            rule_type = rule["type"]
            if rule_type == "float":
                if "min" in rule and not is_number(rule["min"]):
                    errors.append(f"Schema parameter '{param_name}' has non-numeric 'min'")
                if "max" in rule and not is_number(rule["max"]):
                    errors.append(f"Schema parameter '{param_name}' has non-numeric 'max'")
            elif rule_type == "enum":
                if "allowed" not in rule:
                    errors.append(f"Schema parameter '{param_name}' missing 'allowed'")
                elif not isinstance(rule["allowed"], list):
                    errors.append(f"Schema parameter '{param_name}' 'allowed' must be a list")
            else:
                errors.append(
                    f"Schema parameter '{param_name}' has unsupported type {rule_type!r}"
                )
 
    return errors
 
 
def main() -> int:
    if len(sys.argv) != 3:
        print("Usage: python validate_syntax.py <schema.yaml> <instances.yaml>")
        return 2
 
    schema_path = Path(sys.argv[1])
    instances_path = Path(sys.argv[2])
 
    if not schema_path.exists():
        print(f"Error: schema file not found: {schema_path}")
        return 2
 
    if not instances_path.exists():
        print(f"Error: instances file not found: {instances_path}")
        return 2
 
    try:
        schema = load_yaml(schema_path)
        instances_doc = load_yaml(instances_path)
    except yaml.YAMLError as e:
        print(f"YAML parse error: {e}")
        return 2
    except Exception as e:
        print(f"File load error: {e}")
        return 2
 
    schema_errors = validate_schema_structure(schema)
    if schema_errors:
        print("Schema syntax validation failed:")
        for err in schema_errors:
            print(f"  - {err}")
        return 1
 
    if not isinstance(instances_doc, dict):
        print("Instances file root must be a mapping")
        return 1
 
    if "instances" not in instances_doc:
        print("Instances file must contain top-level key 'instances'")
        return 1
 
    instances = instances_doc["instances"]
    if not isinstance(instances, list):
        print("Instances file 'instances' must be a list")
        return 1
 
    parameter_rules = build_parameter_rules(schema)
 
    all_errors: list[str] = []
    seen_ids: set[str] = set()
 
    for index, instance in enumerate(instances):
        if not isinstance(instance, dict):
            all_errors.append(f"instances[{index}] must be a mapping")
            continue
 
        instance_id = instance.get("id")
        if isinstance(instance_id, str):
            if instance_id in seen_ids:
                all_errors.append(f"{instance_id}: duplicate instance id")
            else:
                seen_ids.add(instance_id)
 
        all_errors.extend(validate_instance(instance, schema, parameter_rules))
 
    if all_errors:
        print("Instance syntax validation failed:")
        for err in all_errors:
            print(f"  - {err}")
        return 1
 
    print(f"Syntax validation passed: {len(instances)} instance(s) validated successfully.")
    return 0
 
 
if __name__ == "__main__":
    raise SystemExit(main())

Latest revision as of 02:25, 11 March 2026

Redirect to: