diff --git a/README.md b/README.md
index b737f10..1555d70 100644
--- a/README.md
+++ b/README.md
@@ -142,6 +142,29 @@ with chembl_downloader.supplier() as suppl:
 This example was adapted from Greg Landrum's RDKit blog post
 on [generalized substructure search](https://greglandrum.github.io/rdkit-blog/tutorial/substructure/2021/08/03/generalized-substructure-search.html).
 
+### Get an RDKit substructure library
+
+Building on the `supplier()` function, the `get_substructure_library()`
+makes the preparation of a [substructure library](https://www.rdkit.org/docs/cppapi/classRDKit_1_1SubstructLibrary.html)
+automated and reproducible. Additionally, it caches the results of the build,
+which takes on the order of tens of minutes, only has to be done once and future
+loading from a pickle object takes on the order of seconds.
+
+The implementation was inspired by Greg Landrum's RDKit blog post,
+[Some new features in the SubstructLibrary](https://greglandrum.github.io/rdkit-blog/tutorial/substructure/2021/12/20/substructlibrary-search-order.html).
+The following example shows how it can be used to accomplish some of the first
+tasks presented in the post:
+
+```python
+from rdkit import Chem
+
+import chembl_downloader
+
+library = chembl_downloader.get_substructure_library()
+query = Chem.MolFromSmarts('[O,N]=C-c:1:c:c:n:c:c:1')
+matches = library.GetMatches(query)
+```
+
 ### Store in a Different Place
 
 If you want to store the data elsewhere using `pystow` (e.g., in [`pyobo`](https://github.com/pyobo/pyobo)
diff --git a/src/chembl_downloader/__init__.py b/src/chembl_downloader/__init__.py
index 4a96086..18d4d49 100644
--- a/src/chembl_downloader/__init__.py
+++ b/src/chembl_downloader/__init__.py
@@ -10,5 +10,6 @@
     download_sqlite,
     latest,
     query,
+    get_substructure_library,
     supplier,
 )
diff --git a/src/chembl_downloader/api.py b/src/chembl_downloader/api.py
index a2da9cd..fe112aa 100644
--- a/src/chembl_downloader/api.py
+++ b/src/chembl_downloader/api.py
@@ -5,6 +5,7 @@
 import gzip
 import logging
 import os
+import pickle
 import sqlite3
 import tarfile
 from contextlib import closing, contextmanager
@@ -12,6 +13,7 @@
 from typing import Optional, Sequence, TYPE_CHECKING, Tuple
 
 import pystow
+from tqdm import tqdm
 
 if TYPE_CHECKING:
     import pandas
@@ -25,6 +27,7 @@
     "cursor",
     "query",
     "supplier",
+    "get_substructure_library",
 ]
 
 logger = logging.getLogger(__name__)
@@ -233,3 +236,59 @@ def supplier(
     _, path = download_sdf(version=version, prefix=prefix)
     with gzip.open(path) as file:
         yield Chem.ForwardSDMolSupplier(file, **kwargs)
+
+
+def get_substructure_library(
+    version: Optional[str] = None,
+    prefix: Optional[Sequence[str]] = None,
+    max_heavy: int = 75,
+    **kwargs,
+):
+    """Get the ChEMBL substructure library.
+
+    :param version: The version number of ChEMBL to get. If none specified, uses
+        :func:`bioversions.get_version` to look up the latest.
+    :param prefix: The directory inside :mod:`pystow` to use
+    :param max_heavy: The largest number of heavy atoms that are considered before skipping the molecule.
+    :param kwargs: keyword arguments to pass through to :class:`rdkit.Chem.ForwardSDMolSupplier`, such as
+        ``sanitize`` and ``removeHs`` via :func:`supplier`.
+    :returns: A substructure library object
+    :rtype: rdkit.Chem.rdSubstructLibrary.SubstructLibrary
+
+    .. seealso::
+
+        https://greglandrum.github.io/rdkit-blog/tutorial/substructure/2021/12/20/substructlibrary-search-order.html
+    """
+    # Requires minimum version of v2021.09
+    from rdkit.Chem.rdSubstructLibrary import (
+        CachedTrustedSmilesMolHolder,
+        TautomerPatternHolder,
+        KeyFromPropHolder,
+        SubstructLibrary,
+    )
+
+    if version is None:
+        version = latest()
+
+    path = pystow.join(*(prefix or PYSTOW_PARTS), version, name="ssslib.pkl")
+    if path.is_file():
+        logger.info("loading substructure library from pickle: %s", path)
+        with path.open("rb") as file:
+            return pickle.load(file)
+
+    molecule_holder = CachedTrustedSmilesMolHolder()
+    tautomer_pattern_holder = TautomerPatternHolder()
+    key_from_prop_holder = KeyFromPropHolder()
+    library = SubstructLibrary(molecule_holder, tautomer_pattern_holder, key_from_prop_holder)
+    with supplier(version=version, prefix=prefix, **kwargs) as suppl:
+        for mol in tqdm(
+            suppl, unit="molecule", unit_scale=True, desc="Building substructure library"
+        ):
+            if mol is None:
+                continue
+            if mol.GetNumHeavyAtoms() > max_heavy:  # skip huge molecules
+                continue
+            library.AddMol(mol)
+    with path.open("wb") as file:
+        pickle.dump(library, file, protocol=pickle.HIGHEST_PROTOCOL)
+    return library
diff --git a/src/chembl_downloader/cli.py b/src/chembl_downloader/cli.py
index 58e5b23..1f1ebd4 100644
--- a/src/chembl_downloader/cli.py
+++ b/src/chembl_downloader/cli.py
@@ -7,7 +7,7 @@
 import click
 from more_click import verbose_option
 
-from .api import download_extract_sqlite, query
+from .api import download_extract_sqlite, get_substructure_library, query
 from .queries import ACTIVITIES_QUERY, ID_NAME_QUERY
 
 __all__ = [
@@ -44,5 +44,13 @@ def test(version: Optional[str]):
     click.echo(df.to_markdown(index=False))
 
 
+@main.command()
+@version_option
+@verbose_option
+def substructure(version: Optional[str]):
+    """Build a substructure library."""
+    get_substructure_library(version=version)
+
+
 if __name__ == "__main__":
     main()