Validate syntax.py

From Open Source Ecology
(Redirected from Wall Syntactic Validator)
Jump to navigation Jump to search
  1. !/usr/bin/env python3

import sys from pathlib import Path from typing import Any

import yaml


def load_yaml(path: Path) -> Any:

   with path.open("r", encoding="utf-8") as f:
       return yaml.safe_load(f)


def is_number(value: Any) -> bool:

   return isinstance(value, (int, float)) and not isinstance(value, bool)


def validate_enum(instance_name: str, field_name: str, value: Any, allowed: list[Any], errors: list[str]) -> None:

   if value not in allowed:
       errors.append(
           f"{instance_name}: '{field_name}' = {value!r} is invalid; allowed values are {allowed}"
       )


def validate_numeric_range(

   instance_name: str,
   field_name: str,
   value: Any,
   min_value: float | None,
   max_value: float | None,
   errors: list[str],

) -> None:

   if not is_number(value):
       errors.append(f"{instance_name}: '{field_name}' must be a number, got {type(value).__name__}")
       return
   if min_value is not None and value < min_value:
       errors.append(
           f"{instance_name}: '{field_name}' = {value} is below minimum {min_value}"
       )
   if max_value is not None and value > max_value:
       errors.append(
           f"{instance_name}: '{field_name}' = {value} is above maximum {max_value}"
       )


def validate_required_keys(

   obj: dict[str, Any],
   required_keys: list[str],
   context: str,
   errors: list[str],

) -> None:

   for key in required_keys:
       if key not in obj:
           errors.append(f"{context}: missing required key '{key}'")


def build_parameter_rules(schema: dict[str, Any]) -> dict[str, dict[str, Any]]:

   if "parameters" not in schema or not isinstance(schema["parameters"], dict):
       raise ValueError("Schema must contain a top-level 'parameters' mapping")
   return schema["parameters"]


def validate_instance(

   instance: dict[str, Any],
   schema: dict[str, Any],
   parameter_rules: dict[str, dict[str, Any]],

) -> list[str]:

   errors: list[str] = []
   validate_required_keys(instance, ["id", "family", "parameters"], "instance", errors)
   if errors:
       return errors
   instance_name = instance["id"]
   if not isinstance(instance["id"], str):
       errors.append(f"{instance_name}: 'id' must be a string")
   if not isinstance(instance["family"], str):
       errors.append(f"{instance_name}: 'family' must be a string")
   elif instance["family"] != schema.get("family"):
       errors.append(
           f"{instance_name}: family {instance['family']!r} does not match schema family {schema.get('family')!r}"
       )
   params = instance["parameters"]
   if not isinstance(params, dict):
       errors.append(f"{instance_name}: 'parameters' must be a mapping")
       return errors
   # Check required parameters from schema
   for param_name in parameter_rules.keys():
       if param_name not in params:
           errors.append(f"{instance_name}: missing required parameter '{param_name}'")
   # Check for unknown parameters
   for param_name in params.keys():
       if param_name not in parameter_rules:
           errors.append(f"{instance_name}: unknown parameter '{param_name}'")
   # Validate parameter values against schema
   for param_name, rule in parameter_rules.items():
       if param_name not in params:
           continue
       value = params[param_name]
       rule_type = rule.get("type")
       if rule_type == "float":
           validate_numeric_range(
               instance_name,
               param_name,
               value,
               rule.get("min"),
               rule.get("max"),
               errors,
           )
       elif rule_type == "enum":
           allowed = rule.get("allowed")
           if not isinstance(allowed, list):
               errors.append(
                   f"{instance_name}: schema for '{param_name}' must define 'allowed' as a list"
               )
           else:
               validate_enum(instance_name, param_name, value, allowed, errors)
       else:
           errors.append(
               f"{instance_name}: unsupported schema type {rule_type!r} for parameter '{param_name}'"
           )
   return errors


def validate_schema_structure(schema: dict[str, Any]) -> list[str]:

   errors: list[str] = []
   if not isinstance(schema, dict):
       return ["Schema root must be a mapping"]
   if "family" not in schema:
       errors.append("Schema missing top-level 'family'")
   elif not isinstance(schema["family"], str):
       errors.append("Schema 'family' must be a string")
   if "parameters" not in schema:
       errors.append("Schema missing top-level 'parameters'")
   elif not isinstance(schema["parameters"], dict):
       errors.append("Schema 'parameters' must be a mapping")
   else:
       for param_name, rule in schema["parameters"].items():
           if not isinstance(rule, dict):
               errors.append(f"Schema parameter '{param_name}' must be a mapping")
               continue
           if "type" not in rule:
               errors.append(f"Schema parameter '{param_name}' missing 'type'")
               continue
           rule_type = rule["type"]
           if rule_type == "float":
               if "min" in rule and not is_number(rule["min"]):
                   errors.append(f"Schema parameter '{param_name}' has non-numeric 'min'")
               if "max" in rule and not is_number(rule["max"]):
                   errors.append(f"Schema parameter '{param_name}' has non-numeric 'max'")
           elif rule_type == "enum":
               if "allowed" not in rule:
                   errors.append(f"Schema parameter '{param_name}' missing 'allowed'")
               elif not isinstance(rule["allowed"], list):
                   errors.append(f"Schema parameter '{param_name}' 'allowed' must be a list")
           else:
               errors.append(
                   f"Schema parameter '{param_name}' has unsupported type {rule_type!r}"
               )
   return errors


def main() -> int:

   if len(sys.argv) != 3:
       print("Usage: python validate_syntax.py <schema.yaml> <instances.yaml>")
       return 2
   schema_path = Path(sys.argv[1])
   instances_path = Path(sys.argv[2])
   if not schema_path.exists():
       print(f"Error: schema file not found: {schema_path}")
       return 2
   if not instances_path.exists():
       print(f"Error: instances file not found: {instances_path}")
       return 2
   try:
       schema = load_yaml(schema_path)
       instances_doc = load_yaml(instances_path)
   except yaml.YAMLError as e:
       print(f"YAML parse error: {e}")
       return 2
   except Exception as e:
       print(f"File load error: {e}")
       return 2
   schema_errors = validate_schema_structure(schema)
   if schema_errors:
       print("Schema syntax validation failed:")
       for err in schema_errors:
           print(f"  - {err}")
       return 1
   if not isinstance(instances_doc, dict):
       print("Instances file root must be a mapping")
       return 1
   if "instances" not in instances_doc:
       print("Instances file must contain top-level key 'instances'")
       return 1
   instances = instances_doc["instances"]
   if not isinstance(instances, list):
       print("Instances file 'instances' must be a list")
       return 1
   parameter_rules = build_parameter_rules(schema)
   all_errors: list[str] = []
   seen_ids: set[str] = set()
   for index, instance in enumerate(instances):
       if not isinstance(instance, dict):
           all_errors.append(f"instances[{index}] must be a mapping")
           continue
       instance_id = instance.get("id")
       if isinstance(instance_id, str):
           if instance_id in seen_ids:
               all_errors.append(f"{instance_id}: duplicate instance id")
           else:
               seen_ids.add(instance_id)
       all_errors.extend(validate_instance(instance, schema, parameter_rules))
   if all_errors:
       print("Instance syntax validation failed:")
       for err in all_errors:
           print(f"  - {err}")
       return 1
   print(f"Syntax validation passed: {len(instances)} instance(s) validated successfully.")
   return 0


if __name__ == "__main__":

   raise SystemExit(main())