Skip to content

Commit

Permalink
filesystem: revamp udev handling
Browse files Browse the repository at this point in the history
In LP: #2009141, we are hitting kernel limits and pyudev buffer limits.
We don't care about specific events, so much as getting one event,
waiting for things to calm down, then reprobing.

Outright disable the event monitor, and re-enable later.  If there is a
storm of events, testing has shown that stopping the listener is not
enough.
  • Loading branch information
dbungert committed Sep 27, 2023
1 parent aaaf873 commit c5a5713
Showing 1 changed file with 35 additions and 19 deletions.
54 changes: 35 additions & 19 deletions subiquity/server/controllers/filesystem.py
Original file line number Diff line number Diff line change
@@ -20,7 +20,6 @@
import logging
import os
import pathlib
import select
import time
from typing import Any, Callable, Dict, List, Optional, Union

@@ -273,6 +272,7 @@ def __init__(self, app):
self._system_mounter: Optional[Mounter] = None
self._role_to_device: Dict[str, _Device] = {}
self._device_to_structure: Dict[_Device, snapdapi.OnVolume] = {}
self._pyudev_context: Optional[pyudev.Context] = None
self.use_tpm: bool = False
self.locked_probe_data: bool = False
# If probe data come in while we are doing partitioning, store it in
@@ -299,7 +299,7 @@ async def configured(self):
):
self.app.base_model.source.search_drivers = not self.is_core_boot_classic()
await super().configured()
self.stop_listening_udev()
self.stop_monitor()

async def _mount_systems_dir(self, variation_name):
self._source_handler = self.app.controllers.Source.get_handler(variation_name)
@@ -1294,6 +1294,13 @@ async def _probe(self, *, context=None):
elapsed = time.time() - start
log.debug(f"{short_label} probing took {elapsed:.1f} seconds")
break
# The start_monitor call is down here, purposefully after the probe is
# complete and not intended to be run after a probing timeout. The
# reason for the monitor is to decide when to re-probe, so there is no
# need to monitor for events before the first probe has even happened,
# and in the current implementation we have no plan forward if a probe
# timeout has happened, so doing it again is unlikely to help.
self.start_monitor()

async def run_autoinstall_guided(self, layout):
name = layout["name"]
@@ -1456,21 +1463,31 @@ def start(self):
self._start_task = schedule_task(self._start())

async def _start(self):
context = pyudev.Context()
self._monitor = pyudev.Monitor.from_netlink(context)
self._monitor.filter_by(subsystem="block")
self._monitor.enable_receiving()
self.start_listening_udev()
await self._probe_task.start()

def start_listening_udev(self):
def start_monitor(self):
if self._configured:
return

log.debug("start_monitor")
if self._pyudev_context is None:
self._pyudev_context = pyudev.Context()
self._monitor = pyudev.Monitor.from_netlink(self._pyudev_context)
self._monitor.filter_by(subsystem="block")
self._monitor.start()
loop = asyncio.get_running_loop()
loop.add_reader(self._monitor.fileno(), self._udev_event)

def stop_listening_udev(self):
def stop_monitor(self):
if self._monitor is None:
return

log.debug("stop_monitor")
loop = asyncio.get_running_loop()
loop.remove_reader(self._monitor.fileno())

self._monitor = None

def ensure_probing(self):
try:
self._probe_task.start_sync()
@@ -1480,21 +1497,20 @@ def ensure_probing(self):
log.debug("Triggered Probert run on udev event")

def _udev_event(self):
# We outright stop monitoring because we're not super concerned about
# the specifics of the udev event, only that one happened and that when
# the events settle, we want to reprobe. This is significantly faster
# than keeping a monitor around and draining the event queue.
# LP: #2009141
self.stop_monitor()

cp = run_command(["udevadm", "settle", "-t", "0"])

if cp.returncode != 0:
log.debug("waiting 0.1 to let udev event queue settle")
self.stop_listening_udev()
loop = asyncio.get_running_loop()
loop.call_later(0.1, self.start_listening_udev)
loop.call_later(0.1, self._udev_event)
return
# Drain the udev events in the queue -- if we stopped listening to
# allow udev to settle, it's good bet there is more than one event to
# process and we don't want to kick off a full block probe for each
# one. It's a touch unfortunate that pyudev doesn't have a
# non-blocking read so we resort to select().
while select.select([self._monitor.fileno()], [], [], 0)[0]:
action, dev = self._monitor.receive_device()
log.debug("_udev_event %s %s", action, dev)
self.ensure_probing()

def make_autoinstall(self):

0 comments on commit c5a5713

Please sign in to comment.