From a37508f79b6b9cf6b8314d48b1c18069b6b2ada0 Mon Sep 17 00:00:00 2001
From: mgunnala <manugunnala@microsoft.com>
Date: Mon, 11 Nov 2024 12:38:33 -0500
Subject: [PATCH] Fix e2e test failures

---
 .../ext_policy_with_dependencies.yml           |  5 ++++-
 .../ext_policy/ext_policy_with_dependencies.py | 18 ++++++++++--------
 ...ent_ext_workflow-check_data_in_agent_log.py |  7 +++++--
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/tests_e2e/test_suites/ext_policy_with_dependencies.yml b/tests_e2e/test_suites/ext_policy_with_dependencies.yml
index 3a460c531..15522cd1d 100644
--- a/tests_e2e/test_suites/ext_policy_with_dependencies.yml
+++ b/tests_e2e/test_suites/ext_policy_with_dependencies.yml
@@ -5,4 +5,7 @@ name: "ExtPolicyWithDependencies"
 tests:
   - "ext_policy/ext_policy_with_dependencies.py"
 images: "random(endorsed)"
-executes_on_scale_set: true
\ No newline at end of file
+executes_on_scale_set: true
+# This test should run on its own VMSS, because other tests may leave behind extensions
+# that are disallowed by policy and affect results.
+owns_vm: true
\ No newline at end of file
diff --git a/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py b/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py
index ae8cf1e96..7882f31df 100644
--- a/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py
+++ b/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py
@@ -76,18 +76,19 @@ def _create_policy_file(ssh_client, policy):
             ssh_client.run_command(f"mv {remote_path} {policy_file_final_dest}", use_sudo=True)
 
     def run(self):
+
+        # Set up the test run
         instances_ip_address: List[VmssInstanceIpAddress] = self._context.vmss.get_instances_ip_address()
         ssh_clients: Dict[str, SshClient] = {}
         for instance in instances_ip_address:
             ssh_clients[instance.instance_name] = SshClient(ip_address=instance.ip_address, username=self._context.username, identity_file=self._context.identity_file)
-
         for ssh_client in ssh_clients.values():
             ssh_client.run_command("update-waagent-conf Debug.EnableExtensionPolicy=y", use_sudo=True)
 
         if not VmExtensionIds.AzureMonitorLinuxAgent.supports_distro(next(iter(ssh_clients.values())).run_command("get_distro.py").rstrip()):
             raise TestSkipped("Currently AzureMonitorLinuxAgent is not supported on this distro")
 
-        # This is the base ARM template that's used for deploying extensions for this scenario
+        # This is the base ARM template that's used for deploying extensions for this scenario.
         base_extension_template = {
             "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json",
             "contentVersion": "1.0.0.0",
@@ -139,16 +140,17 @@ def run(self):
                 dependency_list = "-" if not depends_on else ' and '.join(depends_on)
                 log.info("{0} depends on {1}".format(ext['name'], dependency_list))
 
-            # Copy policy file to each instance
+            # Copy policy file to each VM instance
             log.info("Updating policy file with new policy: {0}".format(policy))
-            for instance_name, ssh_client in ssh_clients.items():
+            for ssh_client in ssh_clients.values():
                 self._create_policy_file(ssh_client, policy)
 
-            # Deploy updated extension template to the scale set.
             log.info("Deploying extensions to the scale set...")
             rg_client = ResourceGroupClient(self._context.vmss.cloud, self._context.vmss.subscription,
                                             self._context.vmss.resource_group, self._context.vmss.location)
 
+            # Deploy updated extension template to the scale set.
+            # If test case is supposed to fail, assert that the operation fails with the expected error messages.
             try:
                 rg_client.deploy_template(template=ext_template)
                 if expected_errors is not None and len(expected_errors) != 0:
@@ -176,9 +178,9 @@ def run(self):
                 log.info("")
 
             # After each test, clean up failed extensions to leave VMSS in a good state for the next test.
-            # If there are leftover failed extensions, CRP will attempt to uninstall them in the next test, but they
+            # If there are leftover failed extensions, CRP will attempt to uninstall them in the next test, but uninstall
             # will be disallowed by policy. Since CRP waits for a 90 minute timeout for uninstall, the operation will
-            # timeout and fail, and subsequently, the whole test case will fail.
+            # timeout and fail without an appropriate error message (known issue), and the whole test case will fail.
             # To clean up, we first update the policy to allow all, then remove the extensions.
             log.info("Starting cleanup for test case...")
             for ssh_client in ssh_clients.values():
@@ -246,7 +248,7 @@ def get_ignore_error_rules(self) -> List[Dict[str, Any]]:
             # We intentionally fail to test that dependent extensions are skipped
             #
             {
-                'message': r"Event: name=Microsoft.Azure.Extensions.CustomScript, op=ExtensionProcessing, message=Dependent Extension .* did not succeed. Status was error, duration=0"
+                'message': r"message=Dependent Extension .* did not succeed. Status was error, duration=0"
             },
             #
             # 2023-10-31T17:47:07.689083Z WARNING ExtHandler ExtHandler [PERIODIC] This status is being reported by the Guest Agent since no status file was reported by extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent: [ExtensionStatusError] Status file /var/lib/waagent/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.28.11/status/6.status does not exist
diff --git a/tests_e2e/tests/scripts/agent_ext_workflow-check_data_in_agent_log.py b/tests_e2e/tests/scripts/agent_ext_workflow-check_data_in_agent_log.py
index 5bab40b8d..03155cb44 100755
--- a/tests_e2e/tests/scripts/agent_ext_workflow-check_data_in_agent_log.py
+++ b/tests_e2e/tests/scripts/agent_ext_workflow-check_data_in_agent_log.py
@@ -36,8 +36,11 @@ def main():
     found = False
 
     try:
-        after_timestamp = (datetime.strptime(args.after_timestamp, '%Y-%m-%d %H:%M:%S') if args.after_timestamp else None)
-        found = AgentLog(Path('/var/log/waagent.log')).agent_log_contains(args.data, after_timestamp)
+        if args.after_timestamp is not None:
+            after_datetime = datetime.strptime(args.after_timestamp, '%Y-%m-%d %H:%M:%S')
+            found = AgentLog(Path('/var/log/waagent.log')).agent_log_contains(args.data, after_datetime)
+        else:
+            found = AgentLog(Path('/var/log/waagent.log')).agent_log_contains(args.data)
         if found:
             print("Found data: {0} in agent log".format(args.data))
         else: