Skip to content

Commit

Permalink
Cgroup rewrite: uses systemctl for expressing desired configuration i…
Browse files Browse the repository at this point in the history
…nstead drop-in files (#3269)

* cgroup rewrite

* new test

* mock cleanup

* logging

* address comments

* removed script

* new comment changes

* pylint

* e2e test changes
  • Loading branch information
nagworld9 authored Dec 17, 2024
1 parent 50fe8ca commit d7fae2d
Show file tree
Hide file tree
Showing 26 changed files with 743 additions and 638 deletions.
7 changes: 5 additions & 2 deletions azurelinuxagent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from azurelinuxagent.ga import logcollector, cgroupconfigurator
from azurelinuxagent.ga.cgroupcontroller import AGENT_LOG_COLLECTOR
from azurelinuxagent.ga.cpucontroller import _CpuController
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, InvalidCgroupMountpointException
from azurelinuxagent.ga.cgroupapi import create_cgroup_api, InvalidCgroupMountpointException
from azurelinuxagent.ga.firewall_manager import FirewallManager

import azurelinuxagent.common.conf as conf
Expand Down Expand Up @@ -216,7 +216,8 @@ def collect_logs(self, is_full_mode):
tracked_controllers = []
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
try:
cgroup_api = get_cgroup_api()
cgroup_api = create_cgroup_api()
logger.info("Using cgroup {0} for resource enforcement and monitoring".format(cgroup_api.get_cgroup_version()))
except InvalidCgroupMountpointException as e:
event.warn(WALAEventOperation.LogCollection, "The agent does not support cgroups if the default systemd mountpoint is not being used: {0}", ustr(e))
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
Expand All @@ -226,6 +227,8 @@ def collect_logs(self, is_full_mode):

log_collector_cgroup = cgroup_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_LOG_COLLECTOR)
tracked_controllers = log_collector_cgroup.get_controllers()
for controller in tracked_controllers:
logger.info("{0} controller for cgroup: {1}".format(controller.get_controller_type(), controller))

if len(tracked_controllers) != len(log_collector_cgroup.get_supported_controller_names()):
event.warn(WALAEventOperation.LogCollection, "At least one required controller is missing. The following controllers are required for the log collector to run: {0}", log_collector_cgroup.get_supported_controller_names())
Expand Down
49 changes: 48 additions & 1 deletion azurelinuxagent/common/osutil/systemd.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ def get_version():
# systemd 245 (245.4-4ubuntu3)
# +PAM +AUDIT +SELINUX +IMA +APPARMOR +SMACK +SYSVINIT +UTMP etc
#
return shellutil.run_command(['systemctl', '--version'])
# return fist line systemd 245 (245.4-4ubuntu3)
try:
output = shellutil.run_command(['systemctl', '--version'])
version = output.split('\n')[0]
return version
except Exception:
return "unknown"


def get_unit_file_install_path():
Expand Down Expand Up @@ -84,3 +90,44 @@ def get_unit_property(unit_name, property_name):
raise ValueError("Can't find property {0} of {1}".format(property_name, unit_name))
return match.group('value')


def set_unit_run_time_property(unit_name, property_name, value):
"""
Set a property of a unit at runtime
Note: --runtime settings only apply until the next reboot
"""
try:
# Ex: systemctl set-property foobar.service CPUWeight=200 --runtime
shellutil.run_command(["systemctl", "set-property", unit_name, "{0}={1}".format(property_name, value), "--runtime"])
except shellutil.CommandError as e:
raise ValueError("Can't set property {0} of {1}: {2}".format(property_name, unit_name, e))


def set_unit_run_time_properties(unit_name, property_names, values):
"""
Set multiple properties of a unit at runtime
Note: --runtime settings only apply until the next reboot
"""
if len(property_names) != len(values):
raise ValueError("The number of property names:{0} and values:{1} must be the same".format(property_names, values))

properties = ["{0}={1}".format(name, value) for name, value in zip(property_names, values)]

try:
# Ex: systemctl set-property foobar.service CPUWeight=200 MemoryMax=2G IPAccounting=yes --runtime
shellutil.run_command(["systemctl", "set-property", unit_name] + properties + ["--runtime"])
except shellutil.CommandError as e:
raise ValueError("Can't set properties {0} of {1}: {2}".format(properties, unit_name, e))


def is_unit_loaded(unit_name):
"""
Determine if a unit is loaded
"""
try:
value = get_unit_property(unit_name, "LoadState")
return value.lower() == "loaded"
except shellutil.CommandError:
return False
28 changes: 20 additions & 8 deletions azurelinuxagent/ga/cgroupapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class CGroupUtil(object):
Cgroup utility methods which are independent of systemd cgroup api.
"""
@staticmethod
def cgroups_supported():
def distro_supported():
distro_info = get_distro()
distro_name = distro_info[0]
try:
Expand Down Expand Up @@ -149,7 +149,7 @@ def __init__(self, msg=None):
super(InvalidCgroupMountpointException, self).__init__(msg)


def get_cgroup_api():
def create_cgroup_api():
"""
Determines which version of Cgroup should be used for resource enforcement and monitoring by the Agent and returns
the corresponding Api.
Expand All @@ -172,7 +172,6 @@ def get_cgroup_api():
root_hierarchy_mode = shellutil.run_command(["stat", "-f", "--format=%T", CGROUP_FILE_SYSTEM_ROOT]).rstrip()

if root_hierarchy_mode == "cgroup2fs":
log_cgroup_info("Using cgroup v2 for resource enforcement and monitoring")
return SystemdCgroupApiv2()

elif root_hierarchy_mode == "tmpfs":
Expand All @@ -192,7 +191,6 @@ def get_cgroup_api():
# mounted in a location other than the systemd default, raise Exception.
if not cgroup_api_v1.are_mountpoints_systemd_created():
raise InvalidCgroupMountpointException("Expected cgroup controllers to be mounted at '{0}', but at least one is not. v1 mount points: \n{1}".format(CGROUP_FILE_SYSTEM_ROOT, json.dumps(cgroup_api_v1.get_controller_mountpoints())))
log_cgroup_info("Using cgroup v1 for resource enforcement and monitoring")
return cgroup_api_v1

raise CGroupsException("{0} has an unexpected file type: {1}".format(CGROUP_FILE_SYSTEM_ROOT, root_hierarchy_mode))
Expand All @@ -206,6 +204,12 @@ def __init__(self):
self._systemd_run_commands = []
self._systemd_run_commands_lock = threading.RLock()

def get_cgroup_version(self):
"""
Returns the version of the cgroup hierarchy in use.
"""
return NotImplementedError()

def get_systemd_run_commands(self):
"""
Returns a list of the systemd-run commands currently running (given as PIDs)
Expand Down Expand Up @@ -297,6 +301,12 @@ def _get_controller_mountpoints():
mount_points[controller] = path
return mount_points

def get_cgroup_version(self):
"""
Returns the version of the cgroup hierarchy in use.
"""
return "v1"

def get_controller_mountpoints(self):
"""
Returns a dictionary of controller-mountpoint mappings.
Expand Down Expand Up @@ -479,6 +489,12 @@ def _get_root_cgroup_path():
return root_cgroup_path
return ""

def get_cgroup_version(self):
"""
Returns the version of the cgroup hierarchy in use.
"""
return "v2"

def get_root_cgroup_path(self):
"""
Returns the unified cgroup mountpoint.
Expand Down Expand Up @@ -650,8 +666,6 @@ def get_controllers(self, expected_relative_path=None):
controller = MemoryControllerV1(self._cgroup_name, controller_path)

if controller is not None:
msg = "{0} controller for cgroup: {1}".format(supported_controller_name, controller)
log_cgroup_info(msg)
controllers.append(controller)

return controllers
Expand Down Expand Up @@ -729,8 +743,6 @@ def get_controllers(self, expected_relative_path=None):
controller = MemoryControllerV2(self._cgroup_name, self._cgroup_path)

if controller is not None:
msg = "{0} controller for cgroup: {1}".format(supported_controller_name, controller)
log_cgroup_info(msg)
controllers.append(controller)

return controllers
Expand Down
Loading

0 comments on commit d7fae2d

Please sign in to comment.