From d3d99445813401b13654a7cd21e755ba259ef520 Mon Sep 17 00:00:00 2001 From: Mark Zhang <46978338+MarkintoshZ@users.noreply.github.com> Date: Mon, 12 Feb 2024 21:42:45 -0600 Subject: [PATCH] Add cli schema_match that annotates CRD yaml file (#317) * Add cli schema_match that annotates CRD yaml file * Fix cli docs * Override python version comment in requirement files * Re-generate requirements-dev.txt using python3.10 --- acto/cli/schema_match.py | 86 ++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 1 + requirements-dev.txt | 4 ++ requirements.txt | 4 ++ 4 files changed, 95 insertions(+) create mode 100644 acto/cli/schema_match.py diff --git a/acto/cli/schema_match.py b/acto/cli/schema_match.py new file mode 100644 index 0000000000..b9f83687af --- /dev/null +++ b/acto/cli/schema_match.py @@ -0,0 +1,86 @@ +import argparse +import sys + +import pandas as pd +from ruamel.yaml import YAML + +from acto.input.k8s_schemas import K8sSchemaMatcher +from acto.schema.schema import extract_schema + + +def main(): + """Main function""" + + parser = argparse.ArgumentParser( + description="Given a CRD file, annotated it with the matched Kubernetes schema information." + ) + parser.add_argument( + "--crd", + required=True, + help="Path to the YAML CRD file", + ) + parser.add_argument( + "--k8s-version", + required=False, + default="1.29", + help="Kubernetes version to match the schema with", + ) + parser.add_argument( + "--output", + required=False, + help="Path to dump the annotated YAML file to", + ) + args = parser.parse_args() + + # read the CRD file + yaml = YAML() + with open(args.crd, "r", encoding="utf-8") as f: + crd = yaml.load(f) + + # extract the schema + schema_yaml = crd["spec"]["versions"][-1]["schema"]["openAPIV3Schema"] + root = extract_schema([], schema_yaml) + + # match the schema with Kubernetes resource schemas + schema_matcher = K8sSchemaMatcher.from_version(args.k8s_version) + matches = schema_matcher.find_matched_schemas(root) + + # output the breakdown of the matched schema information + df = pd.DataFrame( + [ + { + "k8s_schema_name": k8s_schema.k8s_schema_name, + "schema_path": "/".join(schema.path), + } + for schema, k8s_schema in matches + ] + ) + + print(df["k8s_schema_name"].value_counts().to_string()) + print(f"{len(matches)} schemas matched in total") + + # annotate the yaml file with the matched schema information + for schema, k8s_schema in matches: + comment = k8s_schema.k8s_schema_name + curr = schema_yaml + for segment in schema.path[:-1]: + if segment == "ITEM": + curr = curr["items"] + else: + curr = curr["properties"][segment] + if schema.path[-1] != "ITEM": + curr["properties"].yaml_add_eol_comment(comment, schema.path[-1]) + else: + curr.yaml_add_eol_comment(comment, "items") + + # output the annotated yaml file + if args.output is None: + yaml.dump(crd, sys.stdout) + else: + with open(args.output, "w", encoding="utf-8") as f: + yaml.dump(crd, f) + print("Annotated CRD file dumped to", args.output) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index eaacaf9f47..c42312ecde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ dependencies = [ "jsonpatch~=1.33", "pandas~=2.0.2", "PyYAML~=6.0", + "ruamel.yaml~=0.18", "requests~=2.31.0", "pydantic~=2.5.2", ] diff --git a/requirements-dev.txt b/requirements-dev.txt index 42ab49808f..98e76ec246 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -184,6 +184,10 @@ resolvelib==0.8.1 # via ansible-core rsa==4.9 # via google-auth +ruamel-yaml==0.18.5 + # via acto (pyproject.toml) +ruamel-yaml-clib==0.2.8 + # via ruamel-yaml six==1.16.0 # via # kubernetes diff --git a/requirements.txt b/requirements.txt index 1552199db5..443aff7680 100644 --- a/requirements.txt +++ b/requirements.txt @@ -71,6 +71,10 @@ requests-oauthlib==1.3.1 # via kubernetes rsa==4.9 # via google-auth +ruamel-yaml==0.18.5 + # via acto (pyproject.toml) +ruamel-yaml-clib==0.2.8 + # via ruamel-yaml six==1.16.0 # via # kubernetes