Azure · mgunnala · Nov 8, 2024 · Nov 8, 2024 · Nov 8, 2024 · Nov 11, 2024
@@ -39,6 +39,7 @@
 from azurelinuxagent.common.agent_supported_feature import get_agent_supported_features_list_for_extensions, \
     SupportedFeatureNames, get_supported_feature_by_name, get_agent_supported_features_list_for_crp
 from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator
+from azurelinuxagent.ga.policy.policy_engine import ExtensionPolicyEngine, ExtensionPolicyError
 from azurelinuxagent.common.datacontract import get_properties, set_properties
 from azurelinuxagent.common.errorstate import ErrorState
 from azurelinuxagent.common.event import add_event, elapsed_milliseconds, WALAEventOperation, \
@@ -482,10 +483,50 @@ def handle_ext_handlers(self, goal_state_id):
         depends_on_err_msg = None
         extensions_enabled = conf.get_extensions_enabled()
 
+        # Instantiate policy engine, and use same engine to handle all extension handlers.
+        # If an error is thrown during policy engine initialization, we block all extensions and report the error via handler/extension status for
+        # each extension.
+        policy_error = None
+        try:
+            policy_engine = ExtensionPolicyEngine()
+        except Exception as ex:
+            policy_error = ex
+
         for extension, ext_handler in all_extensions:
 
             handler_i = ExtHandlerInstance(ext_handler, self.protocol, extension=extension)
 
+            # Invoke policy engine to determine if extension is allowed. If not, block extension and report error on
+            # behalf of the extension.
+            policy_err_map = {
+                ExtensionRequestedState.Enabled: ('enable', ExtensionErrorCodes.PluginEnableProcessingFailed),
+                # Note: currently, when uninstall is requested for an extension, CRP polls until the agent does not
+                # report status for that extension, or until timeout is reached. In the case of a policy error, the
+                # agent reports failed status on behalf of the extension, which will cause CRP to poll for the full
+                # timeout, instead of failing fast.
+                #
+                # TODO: CRP does not currently have a terminal error code for uninstall. Once this code is added, use
+                # it instead of PluginDisableProcessingFailed below.
+                ExtensionRequestedState.Uninstall: ('uninstall', ExtensionErrorCodes.PluginDisableProcessingFailed),
+                ExtensionRequestedState.Disabled: ('disable', ExtensionErrorCodes.PluginDisableProcessingFailed),
+            }
+            policy_op, policy_err_code = policy_err_map.get(ext_handler.state)
+            if policy_error is not None:
+                err = ExtensionPolicyError(msg="", inner=policy_error, code=policy_err_code)
+                self.__handle_and_report_policy_error(handler_i, err, report_op=handler_i.operation, message=ustr(err),
+                                                      extension=extension, report=True)
+                continue
+
+            extension_allowed = policy_engine.should_allow_extension(ext_handler.name)
+            if not extension_allowed:
+                msg = "failed to {0} extension '{1}' because extension is not specified in allowlist. To {0}, " \
-                msg = "failed to {0} extension '{1}' because extension is not specified in allowlist. To {0}, " \
+                msg = "failed to {0} extension '{1}' because it is not specified in allowlist. To {0}, " \
-                msg = "failed to {0} extension '{1}' because extension is not specified in allowlist. To {0}, " \
+                msg = "failed to {0} extension '{1}' because it is not specified in allowlist. To {0}, " \
+                      "add extension to the allowed list in the policy file ('{2}').".format(policy_op,
-                      "add extension to the allowed list in the policy file ('{2}').".format(policy_op,
+                      "add the extension to the allowed list in the policy file ('{2}').".format(policy_op,
-                      "add extension to the allowed list in the policy file ('{2}').".format(policy_op,
+                      "add the extension to the allowed list in the policy file ('{2}').".format(policy_op,
+                                                                                             ext_handler.name,
+                                                                                             conf.get_policy_file_path())
+                err = ExtensionPolicyError(msg, code=policy_err_code)
+                self.__handle_and_report_policy_error(handler_i, err, report_op=handler_i.operation, message=ustr(err),
+                                                      extension=extension, report=True)
+
             # In case of extensions disabled, we skip processing extensions. But CRP is still waiting for some status
             # back for the skipped extensions. In order to propagate the status back to CRP, we will report status back
             # here with an error message.
@@ -527,7 +568,11 @@ def handle_ext_handlers(self, goal_state_id):
                 continue
 
             # Process extensions and get if it was successfully executed or not
-            extension_success = self.handle_ext_handler(handler_i, extension, goal_state_id)
+            # If extension was blocked by policy, treat the extension as failed and do not process the handler.
+            if not extension_allowed:
+                extension_success = False
+            else:
+                extension_success = self.handle_ext_handler(handler_i, extension, goal_state_id)
 
             dep_level = self.__get_dependency_level((extension, ext_handler))
             if 0 <= dep_level < max_dep_level:
@@ -692,6 +737,32 @@ def __handle_and_report_ext_handler_errors(ext_handler_i, error, report_op, mess
             add_event(name=name, version=handler_version, op=report_op, is_success=False, log_event=True,
                       message=message)
 
+    @staticmethod
+    def __handle_and_report_policy_error(ext_handler_i, error, report_op, message, report=True, extension=None):
-    def __handle_and_report_policy_error(ext_handler_i, error, report_op, message, report=True, extension=None):
+    def _report_policy_error(ext_handler_i, error, report_op, message, report=True, extension=None):
-    def __handle_and_report_policy_error(ext_handler_i, error, report_op, message, report=True, extension=None):
+    def _report_policy_error(ext_handler_i, error, report_op, message, report=True, extension=None):
+        # TODO: Consider merging this function with __handle_and_report_ext_handler_errors() above, after investigating
+        # the impact of this change.
+        #
+        # If extension status is present, CRP will ignore handler status and report extension status. In the case of policy errors,
+        # extensions are not processed, so collect_ext_status() reports transitioning status on behalf of the extension.
+        # However, extensions blocked by policy should fail fast, so agent should write a .status file for policy failures.
+        # Note that __handle_and_report_ext_handler_errors() does not create the file for single-config extensions, but changing
+        # it will require additional testing/investigation. As a temporary workaround, this separate function was created
+        # to write a status file for single-config extensions.
+
+        # Set handler status for all extensions (with and without settings)
+        ext_handler_i.set_handler_status(message=message, code=error.code)
+
+        # Create status file for extensions with settings (single and multi config).
+        if extension is not None:
+            ext_handler_i.create_status_file_if_not_exist(extension, status=ExtensionStatusValue.error, code=error.code,
+                                                          operation=report_op, message=message)
+
+        if report:
+            name = ext_handler_i.get_extension_full_name(extension)
+            handler_version = ext_handler_i.ext_handler.version
+            add_event(name=name, version=handler_version, op=report_op, is_success=False, log_event=True,
+                      message=message)
+
     def handle_enable(self, ext_handler_i, extension):
         """
              1- Ensure the handler is installed
@@ -990,7 +1061,10 @@ def report_ext_handler_status(self, vm_status, ext_handler, goal_state_changed):
         # extension even if HandlerState == NotInstalled (Sample scenario: ExtensionsGoalStateError, DecideVersionError, etc)
         # We also need to report extension status for an uninstalled handler if extensions are disabled because CRP
         # waits for extension runtime status before failing the extension operation.
-        if handler_state != ExtHandlerState.NotInstalled or ext_handler.supports_multi_config or not conf.get_extensions_enabled():
+        # In the case of policy failures, we want to report extension status with a terminal code so CRP fails fast. If
+        # extension status is not present, collect_ext_status() will set a default transitioning status, and CRP will
+        # wait for timeout.
+        if handler_state != ExtHandlerState.NotInstalled or ext_handler.supports_multi_config or not conf.get_extensions_enabled() or ExtensionPolicyEngine.get_policy_enforcement_enabled():
 
             # Since we require reading the Manifest for reading the heartbeat, this would fail if HandlerManifest not found.
             # Only try to read heartbeat if HandlerState != NotInstalled.

@@ -22,7 +22,7 @@
 from azurelinuxagent.common import logger
 from azurelinuxagent.common.event import WALAEventOperation, add_event
 from azurelinuxagent.common import conf
-from azurelinuxagent.common.exception import AgentError
+from azurelinuxagent.common.exception import AgentError, ExtensionError
 from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import _CaseFoldedDict
 from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
 
@@ -36,12 +36,6 @@
 _MAX_SUPPORTED_POLICY_VERSION = "0.1.0"
 
 
-class PolicyError(AgentError):
-    """
-    Error raised during agent policy enforcement.
-    """
-
-
 class InvalidPolicyError(AgentError):
     """
     Error raised if user-provided policy is invalid.
@@ -51,13 +45,22 @@ def __init__(self, msg, inner=None):
         super(InvalidPolicyError, self).__init__(msg, inner)
 
 
+class ExtensionPolicyError(ExtensionError):
+    """
+    Error raised during agent extension policy enforcement.
+    """
+    def __init__(self, msg, code, inner=None):
+        msg = "Extension will not be processed: {0}".format(msg)
+        super(ExtensionPolicyError, self).__init__(msg, inner, code)
+
+
 class _PolicyEngine(object):
     """
     Implements base policy engine API.
     """
     def __init__(self):
         # Set defaults for policy
-        self._policy_enforcement_enabled = self.__get_policy_enforcement_enabled()
+        self._policy_enforcement_enabled = self.get_policy_enforcement_enabled()
         if not self.policy_enforcement_enabled:
             return
 
@@ -76,7 +79,7 @@ def _log_policy_event(msg, is_success=True, op=WALAEventOperation.Policy, send_e
             add_event(op=op, message=msg, is_success=is_success, log_event=False)
 
     @staticmethod
-    def __get_policy_enforcement_enabled():
+    def get_policy_enforcement_enabled():
         """
         Policy will be enabled if (1) policy file exists at the expected location and (2) the conf flag "Debug.EnableExtensionPolicy" is true.
         """