From d3d99445813401b13654a7cd21e755ba259ef520 Mon Sep 17 00:00:00 2001
From: Mark Zhang <46978338+MarkintoshZ@users.noreply.github.com>
Date: Mon, 12 Feb 2024 21:42:45 -0600
Subject: [PATCH] Add cli schema_match that annotates CRD yaml file (#317)

* Add cli schema_match that annotates CRD yaml file

* Fix cli docs

* Override python version comment in requirement files

* Re-generate requirements-dev.txt using python3.10
---
 acto/cli/schema_match.py | 86 ++++++++++++++++++++++++++++++++++++++++
 pyproject.toml           |  1 +
 requirements-dev.txt     |  4 ++
 requirements.txt         |  4 ++
 4 files changed, 95 insertions(+)
 create mode 100644 acto/cli/schema_match.py

diff --git a/acto/cli/schema_match.py b/acto/cli/schema_match.py
new file mode 100644
index 0000000000..b9f83687af
--- /dev/null
+++ b/acto/cli/schema_match.py
@@ -0,0 +1,86 @@
+import argparse
+import sys
+
+import pandas as pd
+from ruamel.yaml import YAML
+
+from acto.input.k8s_schemas import K8sSchemaMatcher
+from acto.schema.schema import extract_schema
+
+
+def main():
+    """Main function"""
+
+    parser = argparse.ArgumentParser(
+        description="Given a CRD file, annotated it with the matched Kubernetes schema information."
+    )
+    parser.add_argument(
+        "--crd",
+        required=True,
+        help="Path to the YAML CRD file",
+    )
+    parser.add_argument(
+        "--k8s-version",
+        required=False,
+        default="1.29",
+        help="Kubernetes version to match the schema with",
+    )
+    parser.add_argument(
+        "--output",
+        required=False,
+        help="Path to dump the annotated YAML file to",
+    )
+    args = parser.parse_args()
+
+    # read the CRD file
+    yaml = YAML()
+    with open(args.crd, "r", encoding="utf-8") as f:
+        crd = yaml.load(f)
+
+    # extract the schema
+    schema_yaml = crd["spec"]["versions"][-1]["schema"]["openAPIV3Schema"]
+    root = extract_schema([], schema_yaml)
+
+    # match the schema with Kubernetes resource schemas
+    schema_matcher = K8sSchemaMatcher.from_version(args.k8s_version)
+    matches = schema_matcher.find_matched_schemas(root)
+
+    # output the breakdown of the matched schema information
+    df = pd.DataFrame(
+        [
+            {
+                "k8s_schema_name": k8s_schema.k8s_schema_name,
+                "schema_path": "/".join(schema.path),
+            }
+            for schema, k8s_schema in matches
+        ]
+    )
+
+    print(df["k8s_schema_name"].value_counts().to_string())
+    print(f"{len(matches)} schemas matched in total")
+
+    # annotate the yaml file with the matched schema information
+    for schema, k8s_schema in matches:
+        comment = k8s_schema.k8s_schema_name
+        curr = schema_yaml
+        for segment in schema.path[:-1]:
+            if segment == "ITEM":
+                curr = curr["items"]
+            else:
+                curr = curr["properties"][segment]
+        if schema.path[-1] != "ITEM":
+            curr["properties"].yaml_add_eol_comment(comment, schema.path[-1])
+        else:
+            curr.yaml_add_eol_comment(comment, "items")
+
+    # output the annotated yaml file
+    if args.output is None:
+        yaml.dump(crd, sys.stdout)
+    else:
+        with open(args.output, "w", encoding="utf-8") as f:
+            yaml.dump(crd, f)
+        print("Annotated CRD file dumped to", args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index eaacaf9f47..c42312ecde 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,6 +13,7 @@ dependencies = [
     "jsonpatch~=1.33",
     "pandas~=2.0.2",
     "PyYAML~=6.0",
+    "ruamel.yaml~=0.18",
     "requests~=2.31.0",
     "pydantic~=2.5.2",
 ]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 42ab49808f..98e76ec246 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -184,6 +184,10 @@ resolvelib==0.8.1
     # via ansible-core
 rsa==4.9
     # via google-auth
+ruamel-yaml==0.18.5
+    # via acto (pyproject.toml)
+ruamel-yaml-clib==0.2.8
+    # via ruamel-yaml
 six==1.16.0
     # via
     #   kubernetes
diff --git a/requirements.txt b/requirements.txt
index 1552199db5..443aff7680 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -71,6 +71,10 @@ requests-oauthlib==1.3.1
     # via kubernetes
 rsa==4.9
     # via google-auth
+ruamel-yaml==0.18.5
+    # via acto (pyproject.toml)
+ruamel-yaml-clib==0.2.8
+    # via ruamel-yaml
 six==1.16.0
     # via
     #   kubernetes