Skip to content

Commit

Permalink
filesystem: revamp udev handling
Browse files Browse the repository at this point in the history
In LP: #2009141, we are hitting kernel limits and pyudev buffer limits.
We don't care about specific events, so much as getting one event,
waiting for things to calm down, then reprobing.

Outright disable the event monitor, and re-enable later.  If there is a
storm of events, testing has shown that stopping the listener is not
enough.
  • Loading branch information
dbungert committed Sep 27, 2023
1 parent aaaf873 commit a3d45f0
Showing 1 changed file with 35 additions and 19 deletions.
54 changes: 35 additions & 19 deletions subiquity/server/controllers/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import logging
import os
import pathlib
import select
import time
from typing import Any, Callable, Dict, List, Optional, Union

Expand Down Expand Up @@ -273,6 +272,7 @@ def __init__(self, app):
self._system_mounter: Optional[Mounter] = None
self._role_to_device: Dict[str, _Device] = {}
self._device_to_structure: Dict[_Device, snapdapi.OnVolume] = {}
self._pyudev_context: Optional[pyudev.Context] = None
self.use_tpm: bool = False
self.locked_probe_data: bool = False
# If probe data come in while we are doing partitioning, store it in
Expand All @@ -299,7 +299,7 @@ async def configured(self):
):
self.app.base_model.source.search_drivers = not self.is_core_boot_classic()
await super().configured()
self.stop_listening_udev()
self.stop_monitor()

async def _mount_systems_dir(self, variation_name):
self._source_handler = self.app.controllers.Source.get_handler(variation_name)
Expand Down Expand Up @@ -1293,6 +1293,13 @@ async def _probe(self, *, context=None):
finally:
elapsed = time.time() - start
log.debug(f"{short_label} probing took {elapsed:.1f} seconds")
# In the past, this start_monitor() equivalent was much sooner.
# We don't actually need the information it provides though
# until a probe has finished, so the start_monitor() is delayed
# to here. start_monitor() is allowed after a failed probe, in
# case of a hotplug event, perhaps to remove a problematic
# device.
self.start_monitor()
break

async def run_autoinstall_guided(self, layout):
Expand Down Expand Up @@ -1456,21 +1463,31 @@ def start(self):
self._start_task = schedule_task(self._start())

async def _start(self):
context = pyudev.Context()
self._monitor = pyudev.Monitor.from_netlink(context)
self._monitor.filter_by(subsystem="block")
self._monitor.enable_receiving()
self.start_listening_udev()
await self._probe_task.start()

def start_listening_udev(self):
def start_monitor(self):
if self._configured:
return

log.debug("start_monitor")
if self._pyudev_context is None:
self._pyudev_context = pyudev.Context()
self._monitor = pyudev.Monitor.from_netlink(self._pyudev_context)
self._monitor.filter_by(subsystem="block")
self._monitor.start()
loop = asyncio.get_running_loop()
loop.add_reader(self._monitor.fileno(), self._udev_event)

def stop_listening_udev(self):
def stop_monitor(self):
if self._monitor is None:
return

log.debug("stop_monitor")
loop = asyncio.get_running_loop()
loop.remove_reader(self._monitor.fileno())

self._monitor = None

def ensure_probing(self):
try:
self._probe_task.start_sync()
Expand All @@ -1480,21 +1497,20 @@ def ensure_probing(self):
log.debug("Triggered Probert run on udev event")

def _udev_event(self):
# We outright stop monitoring because we're not super concerned about
# the specifics of the udev event, only that one happened and that when
# the events settle, we want to reprobe. This is significantly faster
# than keeping a monitor around and draining the event queue.
# LP: #2009141
self.stop_monitor()

cp = run_command(["udevadm", "settle", "-t", "0"])

if cp.returncode != 0:
log.debug("waiting 0.1 to let udev event queue settle")
self.stop_listening_udev()
loop = asyncio.get_running_loop()
loop.call_later(0.1, self.start_listening_udev)
loop.call_later(0.1, self._udev_event)
return
# Drain the udev events in the queue -- if we stopped listening to
# allow udev to settle, it's good bet there is more than one event to
# process and we don't want to kick off a full block probe for each
# one. It's a touch unfortunate that pyudev doesn't have a
# non-blocking read so we resort to select().
while select.select([self._monitor.fileno()], [], [], 0)[0]:
action, dev = self._monitor.receive_device()
log.debug("_udev_event %s %s", action, dev)
self.ensure_probing()

def make_autoinstall(self):
Expand Down

0 comments on commit a3d45f0

Please sign in to comment.