From 090e104c347da2e82da75a090c54293c75523c8f Mon Sep 17 00:00:00 2001
From: Chris Peterson <chris.peterson@canonical.com>
Date: Wed, 20 Sep 2023 15:10:08 -0700
Subject: [PATCH 1/9] autoinstall: allow for specifying autoinstall path on
 kernel command line

(cherry picked from commit 76b520afa9d36560080dd47de79c81a3ba209deb)
---
 subiquity/server/server.py            | 14 ++++++----
 subiquity/server/tests/test_server.py | 38 ++++++++++++++++++++-------
 2 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/subiquity/server/server.py b/subiquity/server/server.py
index 999d5feda..1cadce595 100644
--- a/subiquity/server/server.py
+++ b/subiquity/server/server.py
@@ -574,10 +574,11 @@ async def wait_for_cloudinit(self):
 
     def select_autoinstall(self):
         # precedence
-        # 1. autoinstall at root of drive
-        # 2. command line argument autoinstall
-        # 3. autoinstall supplied by cloud config
-        # 4. autoinstall baked into the iso, found at /cdrom/autoinstall.yaml
+        # 1. command line argument autoinstall
+        # 2. kernel command line argument subiquity.autoinstallpath
+        # 3. autoinstall at root of drive
+        # 4. autoinstall supplied by cloud config
+        # 5. autoinstall baked into the iso, found at /cdrom/autoinstall.yaml
 
         # if opts.autoinstall is set and empty, that means
         # autoinstall has been explicitly disabled.
@@ -588,9 +589,12 @@ def select_autoinstall(self):
         ):
             raise Exception(f"Autoinstall argument {self.opts.autoinstall} not found")
 
+        kernel_install_path = self.kernel_cmdline.get("subiquity.autoinstallpath", None)
+
         locations = (
-            self.base_relative(root_autoinstall_path),
             self.opts.autoinstall,
+            kernel_install_path,
+            self.base_relative(root_autoinstall_path),
             self.base_relative(cloud_autoinstall_path),
             self.base_relative(iso_autoinstall_path),
         )
diff --git a/subiquity/server/tests/test_server.py b/subiquity/server/tests/test_server.py
index 363e506ff..60c40280d 100644
--- a/subiquity/server/tests/test_server.py
+++ b/subiquity/server/tests/test_server.py
@@ -54,31 +54,46 @@ def create(self, path, contents):
         return path
 
     def test_autoinstall_disabled(self):
+        self.server.opts.autoinstall = ""
+        self.server.kernel_cmdline = {"subiquity.autoinstallpath": "kernel"}
         self.create(root_autoinstall_path, "root")
         self.create(cloud_autoinstall_path, "cloud")
         self.create(iso_autoinstall_path, "iso")
-        self.server.opts.autoinstall = ""
         self.assertIsNone(self.server.select_autoinstall())
 
-    def test_root_wins(self):
+    def test_arg_wins(self):
+        arg = self.create(self.path("arg.autoinstall.yaml"), "arg")
+        self.server.opts.autoinstall = arg
+        kernel = self.create(self.path("kernel.autoinstall.yaml"), "kernel")
+        self.server.kernel_cmdline = {"subiquity.autoinstallpath": kernel}
         root = self.create(root_autoinstall_path, "root")
-        autoinstall = self.create(self.path("arg.autoinstall.yaml"), "arg")
-        self.server.opts.autoinstall = autoinstall
         self.create(cloud_autoinstall_path, "cloud")
         self.create(iso_autoinstall_path, "iso")
         self.assertEqual(root, self.server.select_autoinstall())
-        self.assert_contents(root, "root")
+        self.assert_contents(root, "arg")
 
-    def test_arg_wins(self):
-        root = self.path(root_autoinstall_path)
-        arg = self.create(self.path("arg.autoinstall.yaml"), "arg")
-        self.server.opts.autoinstall = arg
+    def test_kernel_wins(self):
+        self.server.opts.autoinstall = None
+        kernel = self.create(self.path("kernel.autoinstall.yaml"), "kernel")
+        self.server.kernel_cmdline = {"subiquity.autoinstallpath": kernel}
+        root = self.create(root_autoinstall_path, "root")
         self.create(cloud_autoinstall_path, "cloud")
         self.create(iso_autoinstall_path, "iso")
         self.assertEqual(root, self.server.select_autoinstall())
-        self.assert_contents(root, "arg")
+        self.assert_contents(root, "kernel")
+
+    def test_root_wins(self):
+        self.server.opts.autoinstall = None
+        self.server.kernel_cmdline = {}
+        root = self.create(root_autoinstall_path, "root")
+        self.create(cloud_autoinstall_path, "cloud")
+        self.create(iso_autoinstall_path, "iso")
+        self.assertEqual(root, self.server.select_autoinstall())
+        self.assert_contents(root, "root")
 
     def test_cloud_wins(self):
+        self.server.opts.autoinstall = None
+        self.server.kernel_cmdline = {}
         root = self.path(root_autoinstall_path)
         self.create(cloud_autoinstall_path, "cloud")
         self.create(iso_autoinstall_path, "iso")
@@ -86,7 +101,10 @@ def test_cloud_wins(self):
         self.assert_contents(root, "cloud")
 
     def test_iso_wins(self):
+        self.server.opts.autoinstall = None
+        self.server.kernel_cmdline = {}
         root = self.path(root_autoinstall_path)
+        # No cloud config file
         self.create(iso_autoinstall_path, "iso")
         self.assertEqual(root, self.server.select_autoinstall())
         self.assert_contents(root, "iso")

From 03a3dd821a9f575307c874fcff445e0a485390c2 Mon Sep 17 00:00:00 2001
From: Chris Peterson <chris.peterson@canonical.com>
Date: Thu, 21 Sep 2023 11:52:56 -0700
Subject: [PATCH 2/9] docs: reflect autoinstall options and precedence

(cherry picked from commit 28dd55f9ddb5598d8d0cd0d0285b3414a9befe8e)
---
 doc/intro-to-autoinstall.rst | 55 +++++++++++++++++++++++++-----------
 1 file changed, 39 insertions(+), 16 deletions(-)

diff --git a/doc/intro-to-autoinstall.rst b/doc/intro-to-autoinstall.rst
index 79f5881e0..ffb03a9c3 100644
--- a/doc/intro-to-autoinstall.rst
+++ b/doc/intro-to-autoinstall.rst
@@ -78,24 +78,47 @@ Autoinstall on the install media
 Another option for supplying autoinstall to the Ubuntu installer is to place a
 file named :code:`autoinstall.yaml` on the install media itself.
 
-There are two potential locations for the :code:`autoinstall.yaml` file:
- * At the root of the "CD-ROM". When you write the installation ISO to a USB
-   Flash Drive, this can be done by copying the :code:`autoinstall.yaml` to the
-   partition containing the contents of the ISO - i.e.,
-   in the directory containing the ``casper`` sub-directory.
- * On the rootfs of the installation system - this option will typically
-   require modifying the installation ISO and is not suggested, but is
-   supported.
-
-Directly specifying autoinstall as a :code:`autoinstall.yaml` file does not
-require a :code:`#cloud-config` header, and does not use a top level
-``autoinstall:`` key. The autoinstall directives are placed at the top
-level. For example:
+There are two potential locations that subiquity will check for the
+:code:`autoinstall.yaml` file:
 
-.. code-block:: yaml
+* At the root of the "CD-ROM". When you write the installation ISO to a USB
+  Flash Drive, this can be done by copying the :code:`autoinstall.yaml` to the
+  partition containing the contents of the ISO - i.e.,
+  in the directory containing the ``casper`` sub-directory.
+* On the rootfs of the installation system - this option will typically
+  require modifying the installation ISO and is not suggested, but is
+  supported.
+
+Alternatively, you can pass the location of the autoinstall file on the kernel
+command line via the :code:`subiquity.autoinstallpath` parameter, where the
+path is relative to the rootfs of the installation system. For example:
+
+* :code:`subiquity.autoinstallpath=path/to/autoinstall.yaml`
+
+.. note::
+
+    Directly specifying autoinstall as a :code:`autoinstall.yaml` file does not
+    require a :code:`#cloud-config` header, and does not use a top level
+    ``autoinstall:`` key. The autoinstall directives are placed at the top
+    level. For example:
+
+    .. code-block:: yaml
+
+        version: 1
+        ....
+
+
+Order precedence of the autoinstall locations
+======================================
+
+Since there are many ways to specify the autoinstall file, it may happen that
+multiple locations are specified at once. Subiquity will look for the
+autoinstall file in the following order and pick the first existing one:
 
-    version: 1
-    ....
+1. Kernel command line
+2. Root of the installation system
+3. Cloud Config
+4. Root of the CD-ROM (ISO)
 
 
 Cloud-init and autoinstall interaction

From cfb9cce8e9656c703bfc0ab6ee7b6690801a108b Mon Sep 17 00:00:00 2001
From: Olivier Gayot <olivier.gayot@canonical.com>
Date: Fri, 22 Sep 2023 09:34:22 +0200
Subject: [PATCH 3/9] Bump curtin rev for recovery key on systems using zkey

Signed-off-by: Olivier Gayot <olivier.gayot@canonical.com>
(cherry picked from commit 22b6d1258bda1220c4ec6f29e36d9e6190739e4e)
---
 snapcraft.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/snapcraft.yaml b/snapcraft.yaml
index bdaf9b23d..d60d9bfcc 100644
--- a/snapcraft.yaml
+++ b/snapcraft.yaml
@@ -70,7 +70,7 @@ parts:
 
     source: https://git.launchpad.net/curtin
     source-type: git
-    source-commit: a7640fdcac396f9f09044dc7ca7553043ce4231c
+    source-commit: 64ea5fbe827aa98ddc63ea87de2de45689180c82
 
     override-pull: |
       craftctl default

From ad27603b12435228d64b61e4013c8e6e2a5a7166 Mon Sep 17 00:00:00 2001
From: Dan Bungert <daniel.bungert@canonical.com>
Date: Thu, 21 Sep 2023 10:41:57 -0600
Subject: [PATCH 4/9] snapd api: wait longer

While these changes are not supposed to take nearly this long,
per LP: #2034715 we know that they are, and that some systems will
correctly perform the finish_install() step if just given more time.

(cherry picked from commit 5a573f2cef87828e964193966664d5df99caf928)
---
 subiquitycore/snapd.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/subiquitycore/snapd.py b/subiquitycore/snapd.py
index afcd89f49..108e8d374 100644
--- a/subiquitycore/snapd.py
+++ b/subiquitycore/snapd.py
@@ -33,6 +33,11 @@
 
 
 class SnapdConnection:
+    # In LP: #2034715, we found that while some requests should be
+    # non-blocking, they are actually blocking and exceeding one minute.
+    # Extending the timeout helps.
+    default_timeout_seconds = 600
+
     def __init__(self, root, sock):
         self.root = root
         self.url_base = "http+unix://{}/".format(quote_plus(sock))
@@ -41,13 +46,19 @@ def get(self, path, **args):
         if args:
             path += "?" + urlencode(args)
         with requests_unixsocket.Session() as session:
-            return session.get(self.url_base + path, timeout=60)
+            return session.get(
+                self.url_base + path, timeout=self.default_timeout_seconds
+            )
 
     def post(self, path, body, **args):
         if args:
             path += "?" + urlencode(args)
         with requests_unixsocket.Session() as session:
-            return session.post(self.url_base + path, data=json.dumps(body), timeout=60)
+            return session.post(
+                self.url_base + path,
+                data=json.dumps(body),
+                timeout=self.default_timeout_seconds,
+            )
 
     def configure_proxy(self, proxy):
         log.debug("restarting snapd to pick up proxy config")

From 0ba5f775b0f236272103fded13d31d845a4dd738 Mon Sep 17 00:00:00 2001
From: Olivier Gayot <olivier.gayot@canonical.com>
Date: Thu, 21 Sep 2023 11:10:20 +0200
Subject: [PATCH 5/9] ui: have a distinct state file for rich mode over serial

We recently made sure that after doing a snap refresh, the rich mode
(i.e., either rich or basic) is preserved. This was implemented by
storing the rich mode in a state file. When the client starts, it loads
the rich mode from said state file if it exists.

Unfortunately, on s390x, it causes installs to default to basic mode.
This happens because on this architecture, a subiquity install consists
of:

 * a first client (over serial) showing the SSH password
 * a second client (logging over SSH) actually going through the
   installation UI.

Since the first client uses a serial connection, the state file is
created with rich-mode set to basic. Upon connecting using SSH, the
state file is read and the rich-mode is set to basic as well.

Fixed by storing the rich-mode in two separate files, one for clients
over serial and one for other clients.

LP: #2036096

Signed-off-by: Olivier Gayot <olivier.gayot@canonical.com>
(cherry picked from commit c95261e0def019f7492dd071cae27f5fcfb76b2e)
---
 subiquitycore/tests/test_tui.py | 76 +++++++++++++++++++++++++++++++++
 subiquitycore/tui.py            | 46 ++++++++++++++++----
 2 files changed, 113 insertions(+), 9 deletions(-)
 create mode 100644 subiquitycore/tests/test_tui.py

diff --git a/subiquitycore/tests/test_tui.py b/subiquitycore/tests/test_tui.py
new file mode 100644
index 000000000..4409082e9
--- /dev/null
+++ b/subiquitycore/tests/test_tui.py
@@ -0,0 +1,76 @@
+# Copyright 2023 Canonical, Ltd.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import json
+import pathlib
+from unittest.mock import Mock, patch
+
+from subiquitycore.tests import SubiTestCase
+from subiquitycore.tui import TuiApplication
+
+
+class TestTuiApplication(SubiTestCase):
+    def setUp(self):
+        with patch("subiquitycore.tui.Application.__init__", return_value=None):
+            opts = Mock()
+            opts.answers = None
+            opts.dry_run = True
+            self.tui = TuiApplication(opts)
+            # Usually, the below are assigned by Application.__init__()
+            self.tui.opts = opts
+            self.tui.state_dir = self.tmp_dir()
+
+    def test_get_initial_rich_mode_normal(self):
+        self.tui.opts.run_on_serial = False
+        self.assertTrue(self.tui.get_initial_rich_mode())
+
+        # With a state file.
+        with (pathlib.Path(self.tui.state_dir) / "rich-mode-tty").open("w") as fh:
+            fh.write(json.dumps(True))
+        self.assertTrue(self.tui.get_initial_rich_mode())
+        with (pathlib.Path(self.tui.state_dir) / "rich-mode-tty").open("w") as fh:
+            fh.write(json.dumps(False))
+        self.assertFalse(self.tui.get_initial_rich_mode())
+
+    def test_get_initial_rich_mode_serial(self):
+        self.tui.opts.run_on_serial = True
+        self.assertFalse(self.tui.get_initial_rich_mode())
+
+        # With a state file.
+        with (pathlib.Path(self.tui.state_dir) / "rich-mode-serial").open("w") as fh:
+            fh.write(json.dumps(True))
+        self.assertTrue(self.tui.get_initial_rich_mode())
+        with (pathlib.Path(self.tui.state_dir) / "rich-mode-serial").open("w") as fh:
+            fh.write(json.dumps(False))
+        self.assertFalse(self.tui.get_initial_rich_mode())
+
+    def test_get_initial_rich_mode_legacy_state_file(self):
+        # Make sure if an old rich-mode state file is present, it is honored -
+        # but the new format takes precedence.
+        self.tui.opts.run_on_serial = True
+        with (pathlib.Path(self.tui.state_dir) / "rich-mode").open("w") as fh:
+            fh.write(json.dumps(True))
+        self.assertTrue(self.tui.get_initial_rich_mode())
+        with (pathlib.Path(self.tui.state_dir) / "rich-mode-serial").open("w") as fh:
+            fh.write(json.dumps(False))
+        self.assertFalse(self.tui.get_initial_rich_mode())
+
+        self.tui.opts.run_on_serial = False
+        with (pathlib.Path(self.tui.state_dir) / "rich-mode").open("w") as fh:
+            fh.write(json.dumps(False))
+        self.assertFalse(self.tui.get_initial_rich_mode())
+        with (pathlib.Path(self.tui.state_dir) / "rich-mode-tty").open("w") as fh:
+            fh.write(json.dumps(True))
+        self.assertTrue(self.tui.get_initial_rich_mode())
diff --git a/subiquitycore/tui.py b/subiquitycore/tui.py
index f51f2507d..0ce9b3be8 100644
--- a/subiquitycore/tui.py
+++ b/subiquitycore/tui.py
@@ -263,7 +263,11 @@ def set_rich(self, rich):
             urwid.util.set_encoding("ascii")
             new_palette = PALETTE_MONO
             self.rich_mode = False
-        with open(self.state_path("rich-mode"), "w") as fp:
+        if self.opts.run_on_serial:
+            rich_mode_file = "rich-mode-serial"
+        else:
+            rich_mode_file = "rich-mode-tty"
+        with open(self.state_path(rich_mode_file), "w") as fp:
             json.dump(self.rich_mode, fp)
         urwid.CanvasCache.clear()
         self.urwid_loop.screen.register_palette(new_palette)
@@ -286,6 +290,37 @@ def extra_urwid_loop_args(self):
     def make_screen(self, inputf=None, outputf=None):
         return make_screen(self.opts.ascii, inputf, outputf)
 
+    def get_initial_rich_mode(self) -> bool:
+        """Return the initial value for rich-mode, either loaded from an
+        exising state file or automatically determined. True means rich mode
+        and False means basic mode."""
+        if self.opts.run_on_serial:
+            rich_mode_file = "rich-mode-serial"
+        else:
+            rich_mode_file = "rich-mode-tty"
+        try:
+            fp = open(self.state_path(rich_mode_file))
+        except FileNotFoundError:
+            pass
+        else:
+            with fp:
+                return json.load(fp)
+
+        try:
+            # During the 23.10 development cycle, there was only one rich-mode
+            # state file. Let's handle the scenario where we just snap
+            # refresh-ed from a pre 23.10 release.
+            # Once mantic is EOL, let's remove this code.
+            fp = open(self.state_path("rich-mode"))
+        except FileNotFoundError:
+            pass
+        else:
+            with fp:
+                return json.load(fp)
+
+        # By default, basic on serial - rich otherwise.
+        return not self.opts.run_on_serial
+
     def start_urwid(self, input=None, output=None):
         # This stops the tcsetpgrp call in run_command_in_foreground from
         # suspending us. See the rant there for more details.
@@ -302,14 +337,7 @@ def start_urwid(self, input=None, output=None):
             **self.extra_urwid_loop_args(),
         )
         extend_dec_special_charmap()
-        try:
-            fp = open(self.state_path("rich-mode"))
-        except FileNotFoundError:
-            initial_rich_mode = not self.opts.run_on_serial
-        else:
-            with fp:
-                initial_rich_mode = json.load(fp)
-        self.set_rich(initial_rich_mode)
+        self.set_rich(self.get_initial_rich_mode())
         self.urwid_loop.start()
         self.select_initial_screen()
 

From 8c76944e6be314e87a11509098f6cc6b61694788 Mon Sep 17 00:00:00 2001
From: Chris Peterson <chris.peterson@canonical.com>
Date: Mon, 25 Sep 2023 15:59:07 -0700
Subject: [PATCH 6/9] docs: locale must be specified to be interactive

(cherry picked from commit 4ba59a503bfd4942255d7c206264c40dea8e5d59)
---
 doc/reference/autoinstall-reference.rst | 4 ++--
 documentation/autoinstall-reference.md  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/reference/autoinstall-reference.rst b/doc/reference/autoinstall-reference.rst
index d7afa8577..5965d8f5b 100644
--- a/doc/reference/autoinstall-reference.rst
+++ b/doc/reference/autoinstall-reference.rst
@@ -96,7 +96,7 @@ locale
 
 * **type:** string
 * **default:** ``en_US.UTF-8``
-* **can be interactive:** yes, always interactive if any section is
+* **can be interactive:** yes
 
 The locale to configure for the installed system.
 
@@ -779,7 +779,7 @@ install
 * **type:** boolean or string (special value ``auto``)
 * **default:**: ``auto``
 
-Whether to install the available OEM meta-packages. The special value ``auto`` 
+Whether to install the available OEM meta-packages. The special value ``auto``
 -- which is the default -- enables the installation on ubuntu-desktop but not
 on ubuntu-server. This option has no effect on core boot classic.
 
diff --git a/documentation/autoinstall-reference.md b/documentation/autoinstall-reference.md
index 42959641c..545477e1d 100644
--- a/documentation/autoinstall-reference.md
+++ b/documentation/autoinstall-reference.md
@@ -65,7 +65,7 @@ A list of shell commands to invoke as soon as the installer starts, in particula
 
 **type:** string
 **default:** `en_US.UTF-8`
-**can be interactive:** yes, always interactive if any section is
+**can be interactive:** yes
 
 The locale to configure for the installed system.
 

From 0caf03c0372ce2c1e4e7c8f21762df862e31e2ad Mon Sep 17 00:00:00 2001
From: Olivier Gayot <olivier.gayot@canonical.com>
Date: Tue, 26 Sep 2023 15:49:59 +0200
Subject: [PATCH 7/9] filesystem: don't crash if v2/orig_config is called early

When v2/orig_config is called too early, the load_probe_data function
will fail because probe_data is None:

  Traceback (most recent call last):
    File "subiquity/common/api/server.py", line 164, in handler
      result = await implementation(**args)
    File "subiquity/server/controllers/filesystem.py", line 1029, in v2_orig_config_GET
      model = self.model.get_orig_model()
    File "subiquity/models/filesystem.py", line 1428, in get_orig_model
      orig_model.load_probe_data(self._probe_data)
    File "subiquity/models/filesystem.py", line 1894, in load_probe_data
      for devname, devdata in probe_data["blockdev"].items():
  TypeError: 'NoneType' object is not subscriptable

Make sure we don't dereference model._probe_data if it is None.

Signed-off-by: Olivier Gayot <olivier.gayot@canonical.com>
(cherry picked from commit 7de6f0538bdb168008cb45b48bf7d6743a8e0768)
---
 subiquity/models/filesystem.py            | 3 ++-
 subiquity/models/tests/test_filesystem.py | 9 +++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/subiquity/models/filesystem.py b/subiquity/models/filesystem.py
index 2de5eefda..6e532eddc 100644
--- a/subiquity/models/filesystem.py
+++ b/subiquity/models/filesystem.py
@@ -1435,7 +1435,8 @@ def get_orig_model(self):
         # use on the V2 storage API.
         orig_model = FilesystemModel(self.bootloader, root=self.root)
         orig_model.target = self.target
-        orig_model.load_probe_data(self._probe_data)
+        if self._probe_data is not None:
+            orig_model.load_probe_data(self._probe_data)
         return orig_model
 
     def process_probe_data(self):
diff --git a/subiquity/models/tests/test_filesystem.py b/subiquity/models/tests/test_filesystem.py
index f197cba42..ea11ab387 100644
--- a/subiquity/models/tests/test_filesystem.py
+++ b/subiquity/models/tests/test_filesystem.py
@@ -328,6 +328,15 @@ def test_lv_ok_for_xxx(self):
         self.assertFalse(lv.ok_for_raid)
         self.assertFalse(lv.ok_for_lvm_vg)
 
+    def test_get_orig_model_no_probe_data(self):
+        # When v2/get_orig_data gets called early, model._probe_data is still
+        # None. Ensure get_orig_model() does not fail.
+        model = make_model()
+
+        model._probe_data = None
+        orig_model = model.get_orig_model()
+        self.assertIsNone(orig_model._probe_data)
+
 
 def fake_up_blockdata_disk(disk, **kw):
     model = disk._m

From 7fa97fec2ab6237ed2bba149aadaf214a4b8ff7f Mon Sep 17 00:00:00 2001
From: Olivier Gayot <olivier.gayot@canonical.com>
Date: Wed, 27 Sep 2023 11:03:47 +0200
Subject: [PATCH 8/9] oem: make sure storage is configured before using
 is_core_boot_classic

Before using fs_controller.is_core_boot_classic(), we wait for the call
to /meta/confirmation?tty=xxx. That said, in semi-automated desktop
installs, sometimes the call to /meta/confirmation happens before
marking storage configured. This leads to the following error:

  File "subiquity/server/controllers/oem.py", line 209, in apply_autoinstall_config
    await self.load_metapkgs_task
  File "subiquity/server/controllers/oem.py", line 81, in list_and_mark_configured
    await self.load_metapackages_list()
  File "subiquitycore/context.py", line 149, in decorated_async
    return await meth(self, **kw)
  File "subiquity/server/controllers/oem.py", line 136, in load_metapackages_list
    if fs_controller.is_core_boot_classic():
  File "subiquity/server/controllers/filesystem.py", line 284, in is_core_boot_classic
    return self._info.is_core_boot_classic()
AttributeError: 'NoneType' object has no attribute 'is_core_boot_classic'

Receiving the confirmation before getting the storage configured is
arguably wrong - but let's be prepared for it just in case.

Signed-off-by: Olivier Gayot <olivier.gayot@canonical.com>
(cherry picked from commit 59849f7f45bfca599cb80dfb299fec29d37877ff)
---
 subiquity/server/controllers/oem.py | 10 +++++++++
 subiquity/tests/api/test_api.py     | 32 +++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)

diff --git a/subiquity/server/controllers/oem.py b/subiquity/server/controllers/oem.py
index be21d4f9b..2610f4837 100644
--- a/subiquity/server/controllers/oem.py
+++ b/subiquity/server/controllers/oem.py
@@ -65,6 +65,7 @@ def __init__(self, app) -> None:
 
         self.load_metapkgs_task: Optional[asyncio.Task] = None
         self.kernel_configured_event = asyncio.Event()
+        self.fs_configured_event = asyncio.Event()
 
     def start(self) -> None:
         self._wait_confirmation = asyncio.Event()
@@ -76,6 +77,9 @@ def start(self) -> None:
         self.app.hub.subscribe(
             (InstallerChannels.CONFIGURED, "kernel"), self.kernel_configured_event.set
         )
+        self.app.hub.subscribe(
+            (InstallerChannels.CONFIGURED, "filesystem"), self.fs_configured_event.set
+        )
 
         async def list_and_mark_configured() -> None:
             await self.load_metapackages_list()
@@ -128,6 +132,12 @@ async def wants_oem_kernel(self, pkgname: str, *, context, overlay) -> bool:
     async def load_metapackages_list(self, context) -> None:
         with context.child("wait_confirmation"):
             await self._wait_confirmation.wait()
+        # In normal scenarios, the confirmation event comes after the
+        # storage/filesystem is configured. However, in semi automated desktop
+        # installs (especially in CI), it is possible that the events come in
+        # the reverse order. Let's be prepared for it by also waiting for the
+        # storage configured event.
+        await self.fs_configured_event.wait()
 
         # Only look for OEM meta-packages on supported variants and if we are
         # not running core boot.
diff --git a/subiquity/tests/api/test_api.py b/subiquity/tests/api/test_api.py
index a8d3814c7..c4206e0b9 100644
--- a/subiquity/tests/api/test_api.py
+++ b/subiquity/tests/api/test_api.py
@@ -1820,6 +1820,38 @@ async def test_listing_certified_ubuntu_desktop(self):
             source_id="ubuntu-desktop", expected=["oem-somerville-tentacool-meta"]
         )
 
+    async def test_confirmation_before_storage_configured(self):
+        # On ubuntu-desktop, the confirmation event sometimes comes before the
+        # storage configured event. This was known to cause OEM to fail with
+        # the following error:
+        #   File "server/controllers/oem.py", in load_metapackages_list
+        #     if fs_controller.is_core_boot_classic():
+        #   File "server/controllers/filesystem.py", in is_core_boot_classic
+        #     return self._info.is_core_boot_classic()
+        # AttributeError: 'NoneType' object has no attribute
+        # 'is_core_boot_classic'
+        with patch.dict(os.environ, {"SUBIQUITY_DEBUG": "has-drivers"}):
+            config = "examples/machines/simple.json"
+            args = ["--source-catalog", "examples/sources/mixed.yaml"]
+            async with start_server(config, extra_args=args) as inst:
+                await inst.post("/source", source_id="ubuntu-desktop")
+                names = [
+                    "locale",
+                    "keyboard",
+                    "source",
+                    "network",
+                    "proxy",
+                    "mirror",
+                    "storage",
+                ]
+                await inst.post("/meta/confirm", tty="/dev/tty1")
+                await inst.post("/meta/mark_configured", endpoint_names=names)
+
+                resp = await inst.get("/oem", wait=True)
+                self.assertEqual(
+                    ["oem-somerville-tentacool-meta"], resp["metapackages"]
+                )
+
 
 class TestSource(TestAPI):
     @timeout()

From 8276f61d9a6acacf64c2ce856e96fb5d20f47f3f Mon Sep 17 00:00:00 2001
From: Dan Bungert <daniel.bungert@canonical.com>
Date: Mon, 25 Sep 2023 16:41:30 -0600
Subject: [PATCH 9/9] filesystem: revamp udev handling

In LP: #2009141, we are hitting kernel limits and pyudev buffer limits.
We don't care about specific events, so much as getting one event,
waiting for things to calm down, then reprobing.

Outright disable the event monitor, and re-enable later.  If there is a
storm of events, testing has shown that stopping the listener is not
enough.

(cherry picked from commit b11726d3988534d1ed49567adaa909fa65cd0b26)
---
 subiquity/server/controllers/filesystem.py | 54 ++++++++++++++--------
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/subiquity/server/controllers/filesystem.py b/subiquity/server/controllers/filesystem.py
index eb40edede..b9f091ddd 100644
--- a/subiquity/server/controllers/filesystem.py
+++ b/subiquity/server/controllers/filesystem.py
@@ -20,7 +20,6 @@
 import logging
 import os
 import pathlib
-import select
 import time
 from typing import Any, Callable, Dict, List, Optional, Union
 
@@ -273,6 +272,7 @@ def __init__(self, app):
         self._system_mounter: Optional[Mounter] = None
         self._role_to_device: Dict[str, _Device] = {}
         self._device_to_structure: Dict[_Device, snapdapi.OnVolume] = {}
+        self._pyudev_context: Optional[pyudev.Context] = None
         self.use_tpm: bool = False
         self.locked_probe_data: bool = False
         # If probe data come in while we are doing partitioning, store it in
@@ -299,7 +299,7 @@ async def configured(self):
         ):
             self.app.base_model.source.search_drivers = not self.is_core_boot_classic()
         await super().configured()
-        self.stop_listening_udev()
+        self.stop_monitor()
 
     async def _mount_systems_dir(self, variation_name):
         self._source_handler = self.app.controllers.Source.get_handler(variation_name)
@@ -1293,6 +1293,13 @@ async def _probe(self, *, context=None):
             finally:
                 elapsed = time.time() - start
                 log.debug(f"{short_label} probing took {elapsed:.1f} seconds")
+                # In the past, this start_monitor() equivalent was much sooner.
+                # We don't actually need the information it provides though
+                # until a probe has finished, so the start_monitor() is delayed
+                # to here. start_monitor() is allowed after a failed probe, in
+                # case of a hotplug event, perhaps to remove a problematic
+                # device.
+                self.start_monitor()
             break
 
     async def run_autoinstall_guided(self, layout):
@@ -1456,21 +1463,31 @@ def start(self):
         self._start_task = schedule_task(self._start())
 
     async def _start(self):
-        context = pyudev.Context()
-        self._monitor = pyudev.Monitor.from_netlink(context)
-        self._monitor.filter_by(subsystem="block")
-        self._monitor.enable_receiving()
-        self.start_listening_udev()
         await self._probe_task.start()
 
-    def start_listening_udev(self):
+    def start_monitor(self):
+        if self._configured:
+            return
+
+        log.debug("start_monitor")
+        if self._pyudev_context is None:
+            self._pyudev_context = pyudev.Context()
+        self._monitor = pyudev.Monitor.from_netlink(self._pyudev_context)
+        self._monitor.filter_by(subsystem="block")
+        self._monitor.start()
         loop = asyncio.get_running_loop()
         loop.add_reader(self._monitor.fileno(), self._udev_event)
 
-    def stop_listening_udev(self):
+    def stop_monitor(self):
+        if self._monitor is None:
+            return
+
+        log.debug("stop_monitor")
         loop = asyncio.get_running_loop()
         loop.remove_reader(self._monitor.fileno())
 
+        self._monitor = None
+
     def ensure_probing(self):
         try:
             self._probe_task.start_sync()
@@ -1480,21 +1497,20 @@ def ensure_probing(self):
             log.debug("Triggered Probert run on udev event")
 
     def _udev_event(self):
+        # We outright stop monitoring because we're not super concerned about
+        # the specifics of the udev event, only that one happened and that when
+        # the events settle, we want to reprobe.  This is significantly faster
+        # than keeping a monitor around and draining the event queue.
+        # LP: #2009141
+        self.stop_monitor()
+
         cp = run_command(["udevadm", "settle", "-t", "0"])
+
         if cp.returncode != 0:
             log.debug("waiting 0.1 to let udev event queue settle")
-            self.stop_listening_udev()
             loop = asyncio.get_running_loop()
-            loop.call_later(0.1, self.start_listening_udev)
+            loop.call_later(0.1, self._udev_event)
             return
-        # Drain the udev events in the queue -- if we stopped listening to
-        # allow udev to settle, it's good bet there is more than one event to
-        # process and we don't want to kick off a full block probe for each
-        # one.  It's a touch unfortunate that pyudev doesn't have a
-        # non-blocking read so we resort to select().
-        while select.select([self._monitor.fileno()], [], [], 0)[0]:
-            action, dev = self._monitor.receive_device()
-            log.debug("_udev_event %s %s", action, dev)
         self.ensure_probing()
 
     def make_autoinstall(self):