Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean history files #3282

Merged
merged 5 commits into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
from azurelinuxagent.common import logger
from azurelinuxagent.common.event import add_event, WALAEventOperation
from azurelinuxagent.common.exception import ExtensionsConfigError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.future import ustr, urlparse
from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel, GoalStateSource
from azurelinuxagent.common.protocol.restapi import ExtensionSettings, Extension, VMAgentFamily, ExtensionState, InVMGoalStateMetaData
from azurelinuxagent.common.utils.textutil import parse_doc, parse_json, findall, find, findtext, getattrib, gettext, \
format_exception, is_str_none_or_whitespace, is_str_empty, hasattrib
format_exception, is_str_none_or_whitespace, is_str_empty, hasattrib, gettextxml


class ExtensionsGoalStateFromExtensionsConfig(ExtensionsGoalState):
Expand All @@ -38,6 +38,8 @@ def __init__(self, incarnation, xml_text, wire_client):
self._text = xml_text
self._status_upload_blob = None
self._status_upload_blob_type = None
self._status_upload_blob_xml_node = None
self._artifacts_profile_blob_xml_node = None
self._required_features = []
self._on_hold = False
self._activity_id = None
Expand Down Expand Up @@ -81,21 +83,21 @@ def _parse_extensions_config(self, xml_text, wire_client):
if required_features_list is not None:
self._parse_required_features(required_features_list)

self._status_upload_blob = findtext(xml_doc, "StatusUploadBlob")

status_upload_node = find(xml_doc, "StatusUploadBlob")
self._status_upload_blob_type = getattrib(status_upload_node, "statusBlobType")
self._status_upload_blob_xml_node = find(xml_doc, "StatusUploadBlob")
self._status_upload_blob = gettext(self._status_upload_blob_xml_node)
self._status_upload_blob_type = getattrib(self._status_upload_blob_xml_node, "statusBlobType")
logger.verbose("Extension config shows status blob type as [{0}]", self._status_upload_blob_type)

self._on_hold = ExtensionsGoalStateFromExtensionsConfig._fetch_extensions_on_hold(xml_doc, wire_client)
self._artifacts_profile_blob_xml_node = find(xml_doc, "InVMArtifactsProfileBlob")
self._on_hold = ExtensionsGoalStateFromExtensionsConfig._fetch_extensions_on_hold(self._artifacts_profile_blob_xml_node, wire_client)

in_vm_gs_metadata = InVMGoalStateMetaData(find(xml_doc, "InVMGoalStateMetaData"))
self._activity_id = self._string_to_id(in_vm_gs_metadata.activity_id)
self._correlation_id = self._string_to_id(in_vm_gs_metadata.correlation_id)
self._created_on_timestamp = self._ticks_to_utc_timestamp(in_vm_gs_metadata.created_on_ticks)

@staticmethod
def _fetch_extensions_on_hold(xml_doc, wire_client):
def _fetch_extensions_on_hold(artifacts_profile_blob_xml_node, wire_client):
def log_info(message):
logger.info(message)
add_event(op=WALAEventOperation.ArtifactsProfileBlob, message=message, is_success=True, log_event=False)
Expand All @@ -104,7 +106,7 @@ def log_warning(message):
logger.warn(message)
add_event(op=WALAEventOperation.ArtifactsProfileBlob, message=message, is_success=False, log_event=False)

artifacts_profile_blob = findtext(xml_doc, "InVMArtifactsProfileBlob")
artifacts_profile_blob = gettext(artifacts_profile_blob_xml_node)
if is_str_none_or_whitespace(artifacts_profile_blob):
log_info("ExtensionsConfig does not include a InVMArtifactsProfileBlob; will assume the VM is not on hold")
return False
Expand Down Expand Up @@ -187,12 +189,30 @@ def extensions(self):
return self._extensions

def get_redacted_text(self):
text = self._text
for ext_handler in self._extensions:
for extension in ext_handler.settings:
if extension.protectedSettings is not None:
text = text.replace(extension.protectedSettings, "*** REDACTED ***")
return text
def redact_url(unredacted, xml_node, name):
text_xml = gettextxml(xml_node) # Note that we need to redact the raw XML text (which may contain escape sequences)
if text_xml is None:
return unredacted
parsed = urlparse(text_xml)
redacted = unredacted.replace(parsed.query, "***REDACTED***")
if redacted == unredacted:
raise Exception('Could not redact {0}'.format(name))
return redacted

try:
text = self._text
text = redact_url(text, self._status_upload_blob_xml_node, "StatusUploadBlob")
text = redact_url(text, self._artifacts_profile_blob_xml_node, "InVMArtifactsProfileBlob")
for ext_handler in self._extensions:
for extension in ext_handler.settings:
if extension.protectedSettings is not None:
original = text
text = text.replace(extension.protectedSettings, "***REDACTED***")
if text == original:
return 'Could not redact protectedSettings for {0}'.format(extension.name)
return text
except Exception as e:
return "Error redacting text: {0}".format(e)

def _parse_required_features(self, required_features_list):
for required_feature in findall(required_features_list, "RequiredFeature"):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@
# Requires Python 2.6+ and Openssl 1.0+
import datetime
import json
import re
import sys

from azurelinuxagent.common import logger
from azurelinuxagent.common.AgentGlobals import AgentGlobals
from azurelinuxagent.common.event import WALAEventOperation, add_event
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.future import ustr, urlparse
from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel, VmSettingsParseError
from azurelinuxagent.common.protocol.restapi import VMAgentFamily, Extension, ExtensionRequestedState, ExtensionSettings
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
Expand Down Expand Up @@ -143,7 +142,27 @@ def extensions(self):
return self._extensions

def get_redacted_text(self):
return re.sub(r'("protectedSettings"\s*:\s*)"[^"]+"', r'\1"*** REDACTED ***"', self._text)
try:
text = self._text

if self.status_upload_blob is not None:
parsed = urlparse(self.status_upload_blob)
original = text
text = text.replace(parsed.query, "***REDACTED***")
if text == original:
raise Exception('Could not redact the status upload blob')

for ext_handler in self._extensions:
for extension in ext_handler.settings:
if extension.protectedSettings is not None:
original = text
text = text.replace(extension.protectedSettings, "***REDACTED***")
if text == original:
return 'Could not redact protectedSettings for {0}'.format(extension.name)

return text
except Exception as e:
return "Error redacting text: {0}".format(e)

def _parse_vm_settings(self, json_text):
vm_settings = _CaseFoldedDict.from_dict(json.loads(json_text))
Expand Down
13 changes: 13 additions & 0 deletions azurelinuxagent/common/utils/textutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,19 @@ def gettext(node):
return None


def gettextxml(node):
"""
Get the raw XML of a text node
"""
if node is None:
return None

for child in node.childNodes:
if child.nodeType == child.TEXT_NODE:
return child.toxml()
return None


def findtext(root, tag, namespace=None):
"""
Get text of node by tag and namespace under Node root.
Expand Down
101 changes: 72 additions & 29 deletions tests/common/protocol/test_goal_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
import glob
import os
import re
import shutil
import time

from azurelinuxagent.common import conf
from azurelinuxagent.common.future import httpclient
from azurelinuxagent.common.future import httpclient, urlparse
from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource, GoalStateChannel
from azurelinuxagent.common.protocol.extensions_goal_state_from_extensions_config import ExtensionsGoalStateFromExtensionsConfig
from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import ExtensionsGoalStateFromVmSettings
Expand Down Expand Up @@ -162,40 +163,82 @@ def http_get_handler(url, *_, **__):
self._assert_directory_contents(
self._find_history_subdirectory("234-987"), ["VmSettings.json"])

def test_it_should_redact_the_protected_settings_when_saving_to_the_history_directory(self):
with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol:
protocol.mock_wire_data.set_incarnation(888)
protocol.mock_wire_data.set_etag(888)
def test_it_should_redact_extensions_config(self):
data_file = wire_protocol_data.DATA_FILE_IN_VM_ARTIFACTS_PROFILE.copy()
data_file["ext_conf"] = "wire/ext_conf_redact.xml"
with mock_wire_protocol(data_file, detect_protocol=False) as protocol:
protocol.mock_wire_data.set_incarnation(888) # set the incarnation to a known value that we can use to find the history directory

goal_state = GoalState(protocol.client, save_to_history=True)

extensions_goal_state = goal_state.extensions_goal_state
protected_settings = []
for ext_handler in extensions_goal_state.extensions:
for extension in ext_handler.settings:
if extension.protectedSettings is not None:
protected_settings.append(extension.protectedSettings)
if goal_state.extensions_goal_state.source != GoalStateSource.Fabric:
raise Exception("The test goal state should be Fabric (it is {0})".format(goal_state.extensions_goal_state.source))

protected_settings = [s.protectedSettings for s in [e.settings[0] for e in goal_state.extensions_goal_state.extensions]]
if len(protected_settings) == 0:
raise Exception("The test goal state does not include any protected settings")

history_directory = self._find_history_subdirectory("888-888")
extensions_config_file = os.path.join(history_directory, "ExtensionsConfig.xml")
vm_settings_file = os.path.join(history_directory, "VmSettings.json")
for file_name in extensions_config_file, vm_settings_file:
with open(file_name, "r") as stream:
file_contents = stream.read()

for settings in protected_settings:
self.assertNotIn(
settings,
file_contents,
"The protectedSettings should not have been saved to {0}".format(file_name))

matches = re.findall(r'"protectedSettings"\s*:\s*"\*\*\* REDACTED \*\*\*"', file_contents)
self.assertEqual(
len(matches),
len(protected_settings),
"Could not find the expected number of redacted settings in {0}.\nExpected {1}.\n{2}".format(file_name, len(protected_settings), file_contents))
history_directory = self._find_history_subdirectory("888")
extensions_config = os.path.join(history_directory, "ExtensionsConfig.xml")
with open(extensions_config, "r") as f:
history_contents = f.read()

vmap_blob = re.sub(r'(?s)(.*<InVMArtifactsProfileBlob.*>)(.*)(</InVMArtifactsProfileBlob>.*)', r'\2', goal_state.extensions_goal_state._text)
query = urlparse(vmap_blob).query
redacted = vmap_blob.replace(query, "***REDACTED***")
self.assertNotIn(query, history_contents, "The VMAP query string was not redacted from the history")
self.assertNotIn(vmap_blob, history_contents, "The VMAP URL was not redacted in the history")
self.assertIn(redacted, history_contents, "Could not find the redacted VMAP URL in the history")

status_blob = re.sub(r'(?s)(.*<StatusUploadBlob.*>)(.*)(</StatusUploadBlob>.*)', r'\2', goal_state.extensions_goal_state._text)
query = urlparse(status_blob).query
redacted = status_blob.replace(query, "***REDACTED***")
self.assertNotIn(query, history_contents, "The Status query string was not redacted from the history")
self.assertNotIn(status_blob, history_contents, "The Status URL was not redacted in the history")
self.assertIn(redacted, history_contents, "Could not find the redacted Status URL in the history")

for s in protected_settings:
self.assertNotIn(s, history_contents, "The protected settings were not redacted from the history")
matches = re.findall(r'"protectedSettings"\s*:\s*"\*\*\*REDACTED\*\*\*"', history_contents)
self.assertEqual(len(matches), len(protected_settings),
"Could not find the expected number of redacted settings in {0}.\nExpected {1}.\n{2}".format(extensions_config, len(protected_settings), history_contents))

def test_it_should_redact_vm_settings(self):
# NOTE: vm_settings-redact_formatted.json is the same as vm_settings-redact.json, but formatted for easier reading
for test_file in ["hostgaplugin/vm_settings-redact.json", "hostgaplugin/vm_settings-redact_formatted.json"]:
data_file = wire_protocol_data.DATA_FILE_IN_VM_ARTIFACTS_PROFILE.copy()
data_file["vm_settings"] = test_file
data_file["ETag"] = "123"
with mock_wire_protocol(data_file, detect_protocol=False) as protocol:
goal_state = GoalState(protocol.client, save_to_history=True)

if goal_state.extensions_goal_state.source != GoalStateSource.FastTrack:
raise Exception("The test goal state should be FastTrack (it is {0}) [test: {1}]".format(goal_state.extensions_goal_state.source, test_file))

protected_settings = [s.protectedSettings for s in [e.settings[0] for e in goal_state.extensions_goal_state.extensions]]
if len(protected_settings) == 0:
raise Exception("The test goal state does not include any protected settings [test: {0}]".format(test_file))

history_directory = self._find_history_subdirectory("*-123")
vm_settings = os.path.join(history_directory, "VmSettings.json")
with open(vm_settings, "r") as f:
history_contents = f.read()

status_blob = goal_state.extensions_goal_state.status_upload_blob
query = urlparse(status_blob).query
redacted = status_blob.replace(query, "***REDACTED***")
self.assertNotIn(query, history_contents, "The Status query string was not redacted from the history [test: {0}]".format(test_file))
self.assertNotIn(status_blob, history_contents, "The Status URL was not redacted in the history [test: {0}]".format(test_file))
self.assertIn(redacted, history_contents, "Could not find the redacted Status URL in the history [test: {0}]".format(test_file))

for s in protected_settings:
self.assertNotIn(s, history_contents, "The protected settings were not redacted from the history [test: {0}]".format(test_file))

matches = re.findall(r'"protectedSettings"\s*:\s*"\*\*\*REDACTED\*\*\*"', history_contents)
self.assertEqual(len(matches), len(protected_settings),
"Could not find the expected number of redacted settings in {0} [test {1}].\nExpected {2}.\n{3}".format(vm_settings, test_file, len(protected_settings), history_contents))

shutil.rmtree(history_directory) # clean up the history directory in-between test cases to avoid stale history files

def test_it_should_save_vm_settings_on_parse_errors(self):
with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol:
Expand Down
Loading
Loading