diff --git a/mokapot/__init__.py b/mokapot/__init__.py
index 1c747103..4c12e895 100644
--- a/mokapot/__init__.py
+++ b/mokapot/__init__.py
@@ -2,6 +2,7 @@
 from .confidence import PsmConfidence
 from .dataset import PsmDataset
 from .parsers.fasta import digest, make_decoys, read_fasta
+from .parsers.pin import percolator_to_df, read_pin
 from .schema import PsmSchema
 from .version import _get_version
 from .writers import to_csv, to_flashlfq, to_parquet, to_txt
diff --git a/mokapot/parsers/pin.py b/mokapot/parsers/pin.py
index 11b3d0ff..c34ab523 100644
--- a/mokapot/parsers/pin.py
+++ b/mokapot/parsers/pin.py
@@ -1,183 +1,149 @@
-"""
-This module contains the parsers for reading in PSMs
-"""
+"""Parsers for reading in PSMs."""
+import gzip
 import logging
+from io import StringIO
+from os import PathLike
+from typing import IO, Iterable
 
-import pandas as pd
+import numpy as np
 import polars as pl
 
 from .. import utils
-from ..dataset import LinearPsmDataset
+from ..dataset import PsmDataset
+from ..proteins import Proteins
+from ..schema import PsmSchema
 
 LOGGER = logging.getLogger(__name__)
 
 
 def read_pin(
-    pin_files: str | tuple[str] | pl.DataFrame | pd.DataFrame,
-    group_column: str = None,
-    filename_column: str = None,
-    calcmass_column: str = None,
-    expmass_column: str = None,
-    rt_column: str = None,
-    charge_column: str = None,
-    to_df: bool = False,
-) -> LinearPsmDataset | pl.LazyFrame:
+    pin_files: PathLike | pl.DataFrame | Iterable[PathLike | pl.DataFrame],
+    group: str = None,
+    file: str = None,
+    calcmass: str = None,
+    expmass: str = None,
+    ret_time: str = None,
+    charge: str = None,
+    proteins: Proteins | None = None,
+    eval_fdr: float = 0.01,
+    subset: int | None = None,
+    rng: int | np.random.Generator | None = None,
+    strict_parsing: bool = False,
+) -> PsmDataset:
     """Read Percolator input (PIN) tab-delimited files.
 
     Read PSMs from one or more Percolator input (PIN) tab-delmited files,
     aggregating them into a single
-    :py:class:`~mokapot.dataset.LinearPsmDataset`. For more details about the
+    :py:class:`~mokapot.dataset.PsmDataset`. For more details about the
     PIN file format, see the `Percolator documentation
     <https://github.com/percolator/percolator/
     wiki/Interface#tab-delimited-file-format>`_.
+    This function also works on Parquet versions of PIN files.
 
-    Specifically, mokapot requires specific columns in the tab-delmited files:
-    `specid`, `scannr`, `peptide`, and `label`. Note that these
-    column names are case insensitive. In addition to these special columns
-    defined for the PIN format, mokapot also looks for additional columns that
-    specify the MS data file names, theoretical monoisotopic peptide masses,
-    the measured mass, retention times, and charge states, which are necessary
-    to create specific output formats for downstream tools, such as FlashLFQ.
+    Specifically, mokapot requires the following columns in the PIN files:
+    `specid`, `scannr`, `peptide`, and `label`. Note that these column names
+    are case-insensitive. In addition to these special columns that are defined
+    for the PIN format, mokapot also looks for additional columns that specify
+    the MS data file names, theoretical monoisotopic peptide masses, the
+    measured mass, retention times, and charge states, which are necessary to
+    create specific output formats for downstream tools, such as FlashLFQ.
 
     In addition to PIN tab-delimited files, the `pin_files` argument can be a
-    :py:class:`pl.DataFrame` :py:class:`pandas.DataFrame` containing the above
-    columns.
+    :py:class:`pl.DataFrame` or :py:class:`pandas.DataFrame` containing the
+    above columns.
 
     Finally, mokapot does not currently support specifying a default direction
     or feature weights in the PIN file itself. If these are present, they
     will be ignored.
 
+    Warning
+    -------
+    The final column in the PIN format is a tab-delimited list of proteins. For
+    efficiency and because mokapot does not use this column for protein
+    inference, the default behavior is to truncate this column to the first
+    protein in each row. If this is not desired, use the `strict_parsing`
+    parameter. Note that parsing data in this manner will not allow for
+    memory-efficient data streaming that is normally used.
+
     Parameters
     ----------
-    pin_files : str, tuple of str, polars.DataFrame, or pandas.DataFrame
+    pin_files : PathLike, polars.DataFrame, iterable of either
         One or more PIN files or data frames to be read. Multiple files can be
         specified using globs, such as ``"psms_*.pin"`` or separately.
-    group_column : str, optional
+    group : str, optional
         A factor to by which to group PSMs for grouped confidence
         estimation.
-    filename_column : str, optional
+    file : str, optional
         The column specifying the MS data file. If :code:`None`, mokapot will
         look for a column called "filename" (case insensitive). This is
         required for some output formats, such as FlashLFQ.
-    calcmass_column : str, optional
-        The column specifying the theoretical monoisotopic mass of the peptide
-        including modifications. If :code:`None`, mokapot will look for a
-        column called "calcmass" (case insensitive). This is required for some
-        output formats, such as FlashLFQ.
-    expmass_column : str, optional
-        The column specifying the measured neutral precursor mass. If
-        :code:`None`, mokapot will look for a column call "expmass" (case
-        insensitive). This is required for some output formats.
-    rt_column : str, optional
+    ret_time : str, optional
         The column specifying the retention time in seconds. If :code:`None`,
         mokapot will look for a column called "ret_time" (case insensitive).
         This is required for some output formats, such as FlashLFQ.
-    charge_column : str, optional
+    charge : str, optional
         The column specifying the charge state of each peptide. If
         :code:`None`, mokapot will look for a column called "charge" (case
         insensitive). This is required for some output formats, such as
         FlashLFQ.
-    to_df : bool, optional
-        Return a :py:class:`pandas.DataFrame` instead of a
-        :py:class:`~mokapot.dataset.LinearPsmDataset`.
+    proteins : mokapot.Proteins, optional
+        The proteins to use for protein-level confidence estimation. This
+        may be created with :py:func:`mokapot.read_fasta()`.
+    eval_fdr : float, optional
+        The false discovery rate threshold for choosing the best feature and
+        creating positive labels during the trainging procedure.
+    subset: int, optional
+        The maximum number of examples to use for training.
+    rng : int or np.random.Generator, optional
+        A seed or generator used for cross-validation split creation and to
+        break ties, or :code:`None` to use the default random number
+        generator state.
+    strict_parsing : bool, optional
+        If `True`, the fast, memory-efficient parser is replaced by a slower
+        less efficient parser that correctly captures the PIN file protein
+        column. This is generally not recommended.
 
     Returns
     -------
-    LinearPsmDataset or polars.LazyFrame
-        A :py:class:`~mokapot.dataset.LinearPsmDataset` object containing the
-        PSMs from all of the PIN files.
+    PsmDataset
+        The PSMs from all of the PIN files.
 
     """
     logging.info("Parsing PSMs...")
-
-    # Figure out the type of the input...
-    if isinstance(pin_files, pd.DataFrame):
-        pin_df = pl.from_pandas(pin_files).lazy()
-    elif isinstance(pin_files, pl.DataFrame):
-        pin_df = pin_files.lazy()
-    elif isinstance(pin_files, pl.LazyFrame):
-        pin_df = pin_files
-    else:
-        [pl.scan_csv(f, sep="\t") for f in utils.tuplize(pin_files)]
-        pin_df = pl.concat(pin_files, how="diagonal")
-
-    # Find all of the necessary columns, case-insensitive:
-    specid = [c for c in pin_df.columns if c.lower() == "specid"]
-    peptides = [c for c in pin_df.columns if c.lower() == "peptide"][0]
-    labels = [c for c in pin_df.columns if c.lower() == "label"][0]
-    scan = [c for c in pin_df.columns if c.lower() == "scannr"][0]
-
-    # Optional columns
-    filename = _check_column(filename_column, pin_df, "filename")
-    calcmass = _check_column(calcmass_column, pin_df, "calcmass")
-    expmass = _check_column(expmass_column, pin_df, "expmass")
-    ret_time = _check_column(rt_column, pin_df, "ret_time")
-    charge = _check_column(charge_column, pin_df, "charge_column")
-    spectra = [c for c in [filename, scan, ret_time, expmass] if c is not None]
-
-    try:
-        proteins = [c for c in pin_df.columns if c.lower() == "proteins"][0]
-    except IndexError:
-        proteins = None
-
-    nonfeat = [*specid, scan, peptides, proteins, labels]
-
-    # Only add charge to features if there aren't other charge columns:
-    alt_charge = [c for c in pin_df.columns if c.lower().startswith("charge")]
-    if charge is not None and len(alt_charge) > 1:
-        nonfeat.append(charge)
-
-    # Add the grouping column
-    if group_column is not None:
-        nonfeat += [group_column]
-        if group_column not in pin_df.columns:
-            raise ValueError(f"The '{group_column} column was not found.")
-
-    for col in [filename, calcmass, expmass, ret_time]:
-        if col is not None:
-            nonfeat.append(col)
-
-    features = [c for c in pin_df.columns if c not in nonfeat]
-
-    # Check for errors:
-    if not all([specid, peptides, labels, spectra]):
-        raise ValueError(
-            "This PIN format is incompatible with mokapot. Please"
-            " verify that the required columns are present."
-        )
-
-    # Convert labels to the correct format.
-    pin_df = pin_df.with_columns(
-        pl.when(pl.col("Label") == 1)
-        .then(True)
-        .otherwise(False)
-        .alias("Label")
+    data = build_df(pin_files)
+    prot_col = find_column(None, data, "proteins", False)
+
+    schema = PsmSchema(
+        target=find_column(None, data, "label", True),
+        spectrum=[
+            find_column(None, data, "specid", True),
+            find_column(None, data, "scannr", True),
+        ],
+        peptide=find_column(None, data, "peptide", True),
+        file=find_column(file, data, "filename", False),
+        expmass=find_column(expmass, data, "expmass", False),
+        calcmass=find_column(calcmass, data, "calcmass", False),
+        ret_time=find_column(ret_time, data, "ret_time", False),
+        charge=find_column(charge, data, "charge", False),
+        group=find_column(group, data, None, False),
+        metadata=prot_col,
     )
 
-    if to_df:
-        return pin_df
-
-    return LinearPsmDataset(
-        psms=pin_df,
-        target_column=labels,
-        spectrum_columns=spectra,
-        peptide_column=peptides,
-        protein_column=proteins,
-        group_column=group_column,
-        feature_columns=features,
-        filename_column=filename,
-        scan_column=scan,
-        calcmass_column=calcmass,
-        expmass_column=expmass,
-        rt_column=ret_time,
-        charge_column=charge,
-        copy_data=False,
+    if schema.expmass is not None:
+        schema.spectrum.append(schema.expmass)
+
+    return PsmDataset(
+        data=data,
+        schema=schema,
+        eval_fdr=eval_fdr,
+        proteins=proteins,
+        subset=subset,
+        rng=rng,
     )
 
 
-def read_percolator(perc_file):
-    """
-    Read a Percolator tab-delimited file.
+def percolator_to_df(perc_file: PathLike) -> pl.DataFrame:
+    """Read a Percolator tab-delimited file.
 
     Percolator input format (PIN) files and the Percolator result files
     are tab-delimited, but also have a tab-delimited protein list as the
@@ -185,7 +151,7 @@ def read_percolator(perc_file):
 
     Parameters
     ----------
-    perc_file : str
+    perc_file : PathLike
         The file to parse.
 
     Returns
@@ -206,14 +172,16 @@ def read_percolator(perc_file):
             perc.seek(0)
             _ = perc.readline()
 
-        psms = pd.concat((c for c in _parse_in_chunks(perc, cols)), copy=False)
+        psms = pl.concat(_parse_in_chunks(perc, cols), how="vertical")
 
     return psms
 
 
-def _parse_in_chunks(file_obj, columns, chunk_size=int(1e8)):
+def _parse_in_chunks(
+    file_obj: IO, columns: list[str], chunk_size: int = int(1e8)
+) -> pl.DataFrame:
     """
-    Parse a file in chunks
+    Parse a file in chunks.
 
     Parameters
     ----------
@@ -234,20 +202,88 @@ def _parse_in_chunks(file_obj, columns, chunk_size=int(1e8)):
         if not psms:
             break
 
-        psms = [p.rstrip().split("\t", len(columns) - 1) for p in psms]
-        psms = pd.DataFrame.from_records(psms, columns=columns)
-        yield psms.apply(pd.to_numeric, errors="ignore")
+        psms = [
+            ",".join(p.rstrip().split("\t", len(columns) - 1)) for p in psms
+        ]
 
+        psms = [",".join(columns)] + psms
 
-def _check_column(col, df, default):
-    """Check that a column exists in the dataframe."""
+        yield pl.read_csv(StringIO("\n".join(psms)))
+
+
+def find_column(
+    col: str | None,
+    df: pl.LazyFrame,
+    default: str | None,
+    required: bool,
+) -> str:
+    """Check that a column exists in the dataframe.
+
+    Parameters
+    ----------
+    col : str
+        The specified column that should be int he dataframe.
+    df : polars.DataFrame
+        The dataframe.
+    default : str
+        A case-insensitive fall-back option for the column.
+    required : bool
+        Is this column required?
+    """
     if col is None:
         try:
             return [c for c in df.columns if c.lower() == default][0]
-        except IndexError:
-            return None
+        except (IndexError, TypeError):
+            if not required:
+                return None
 
-    if col not in df.columns:
+    if col not in df.columns and required:
         raise ValueError(f"The '{col}' column was not found.")
 
     return col
+
+
+def build_df(
+    pin_files: str | pl.DataFrame | Iterable[str | pl.DataFrame],
+) -> pl.LazyFrame:
+    """Build the PIN DataFrame.
+
+    Parameters
+    ----------
+    pin_files : str, polars.DataFrame, iterable of str or polars.DataFrame
+        One or more PIN files or data frames to be read.
+
+    Returns
+    -------
+    polars.LazyFrame
+        The parsed data.
+    """
+    dfs = []
+    for pin_file in utils.listify(pin_files):
+        # Figure out the type of the input
+        try:
+            pin_file = pl.from_pandas(pin_file)
+        except TypeError:
+            pass
+
+        try:
+            dfs.append(pin_file.lazy())
+        except AttributeError:
+            try:
+                dfs.append(pl.scan_parquet(pin_file))
+            except pl.ArrowError:
+                dfs.append(
+                    pl.scan_csv(
+                        pin_file,
+                        separator="\t",
+                        truncate_ragged_lines=True,
+                    )
+                )
+
+    # Verify columns are identical:
+    first_cols = set(dfs[0].columns)
+    for df in dfs:
+        if set(df.columns) != first_cols:
+            raise ValueError("All pin_files must have the same columns")
+
+    return pl.concat(dfs, how="vertical")
diff --git a/mokapot/schema.py b/mokapot/schema.py
index ce1793d2..dc38dfca 100644
--- a/mokapot/schema.py
+++ b/mokapot/schema.py
@@ -26,7 +26,7 @@ class _SchemaValidatorMixin:
     def __init__(
         self, required: list[str], single: list[str], variadic: list[str]
     ) -> None:
-        """Initialize the ColumnValidator"""
+        """Initialize the ColumnValidator."""
         self._required = required
         self._single = single
         self._variadic = variadic
@@ -209,7 +209,7 @@ class PsmSchema(_SchemaValidatorMixin):
         The column to use as the default score for ranking PSMs.
     desc : bool, optional
         Indicates that higher scores in the score column are better. This is
-        omly relevant if a score column is specified.
+        only relevant if a score column is specified.
     """
 
     target: str
@@ -227,7 +227,7 @@ class PsmSchema(_SchemaValidatorMixin):
     score: str | None = None
     desc: bool = True
 
-    def __post_init__(self):
+    def __post_init__(self) -> None:
         """Perform initial validation."""
         super().__init__(
             required=["target", "spectrum", "peptide"],
diff --git a/mokapot/utils.py b/mokapot/utils.py
index 5cbdc4b3..5101c6fc 100644
--- a/mokapot/utils.py
+++ b/mokapot/utils.py
@@ -1,54 +1,29 @@
 """Utility functions."""
-import itertools
+from typing import Any
 
-import numpy as np
 import polars as pl
 
 
-def flatten(split):
-    """Get the indices from split."""
-    return list(itertools.chain.from_iterable(split))
-
-
-def safe_divide(numerator, denominator, ones=False):
-    """Divide ignoring div by zero warnings."""
-    if isinstance(numerator, pl.Series):
-        numerator = numerator.values
-
-    if isinstance(denominator, pl.Series):
-        denominator = denominator.values
-
-    numerator = numerator.astype(float)
-    denominator = denominator.astype(float)
-    if ones:
-        out = np.ones_like(numerator)
-    else:
-        out = np.zeros_like(numerator)
-
-    return np.divide(numerator, denominator, out=out, where=(denominator != 0))
-
-
-def tuplize(obj):
-    """Convert obj to a tuple, without splitting strings."""
-    try:
-        _ = iter(obj)
-    except TypeError:
-        obj = (obj,)
-    else:
-        if isinstance(obj, str):
-            obj = (obj,)
-
-    return tuple(obj)
+def listify(obj: Any) -> list[Any]:
+    """Convert obj to a list, without splitting strings or dataframes.
 
+    Parameters
+    ----------
+    obj : anything
+        The object to turn into a list.
 
-def listify(obj):
-    """Convert obj to a list, without splitting strings."""
+    Returns
+    -------
+    list
+        The list representation of th object.
+    """
     try:
         _ = iter(obj)
     except TypeError:
         obj = [obj]
     else:
-        if isinstance(obj, str):
+        # Don't listify strings or DataFrames.
+        if isinstance(obj, str) or hasattr(obj, "columns"):
             obj = [obj]
 
     return list(obj)
diff --git a/pyproject.toml b/pyproject.toml
index 956b145f..ecdaaa7f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,7 +72,8 @@ select = ["E", "F", "W", "C", "I", "D", "UP", "N", "ANN", "T20", "I001"]
 # D100 Missing docstring in public module
 # ANN102 Missing type annotation for `cls` in classmethod
 # D401 First line of docstring should be in imperative mood
-ignore = ["D213", "ANN101", "D203", "D100", "ANN102", "D401"]
+# ANN401 Dynamically typed expressions (typing.Any) are disallowed
+ignore = ["D213", "ANN101", "D203", "D100", "ANN102", "D401", "ANN401"]
 fix = true
 line-length = 79
 
diff --git a/tests/unit_tests/test_parser_pin.py b/tests/unit_tests/test_parser_pin.py
index c8010bcd..0d508033 100644
--- a/tests/unit_tests/test_parser_pin.py
+++ b/tests/unit_tests/test_parser_pin.py
@@ -1,12 +1,27 @@
-"""Test that parsing Percolator input files works correctly"""
+"""Test that parsing Percolator input files works correctly."""
 import pytest
+
 import mokapot
-import pandas as pd
 
 
 @pytest.fixture
 def std_pin(tmp_path):
-    """Create a standard pin file"""
+    """Create a standard pin file."""
+    out_file = tmp_path / "std_pin"
+    with open(str(out_file), "w+") as pin:
+        dat = (
+            "sPeCid\tLaBel\tgroup\tpepTide\tsCore\tscanNR\tpRoteins\n"
+            "a\t1\t1\tABC\t5\t2\tprotein1\tprotein2\n"
+            "b\t-1\t1\tCBA\t10\t3\tdecoy_protein1\tdecoy_protein2"
+        )
+        pin.write(dat)
+
+    return out_file
+
+
+@pytest.fixture
+def dir_pin(tmp_path):
+    """Create a standard pin file."""
     out_file = tmp_path / "std_pin"
     with open(str(out_file), "w+") as pin:
         dat = (
@@ -20,18 +35,23 @@ def std_pin(tmp_path):
     return out_file
 
 
-def test_pin_parsing(std_pin):
-    """Test pin parsing"""
-    df = mokapot.read_pin(std_pin, to_df=True)
-    assert df["LaBel"].dtype == "bool"
+def test_perc_parsing(dir_pin):
+    """Test pin parsing."""
+    df = mokapot.percolator_to_df(dir_pin)
     assert len(df) == 2
-    assert len(df[df["LaBel"]]) == 1
-    assert len(df[df["LaBel"]]) == 1
+    assert df["pRoteins"][0] == "protein1\tprotein2"
+
 
-    dat = mokapot.read_pin(std_pin)
-    pd.testing.assert_frame_equal(df.loc[:, ("sCore",)], dat.features)
+def test_std_pin(std_pin):
+    """Test a PIN file without a DefaultDirection line."""
+    dset = mokapot.read_pin(std_pin, group="group")
+    assert len(dset) == 2
+    assert dset.schema.features == ["sCore"]
 
+    df = mokapot.percolator_to_df(std_pin)
+    assert len(df) == 2
+    assert df["pRoteins"][0] == "protein1\tprotein2"
 
-def test_pin_wo_dir():
-    """Test a PIN file without a DefaultDirection line"""
-    mokapot.read_pin("data/scope2_FP97AA.pin")
+    dset = mokapot.read_pin(dset.data.collect(), group="group")
+    assert len(dset) == 2
+    assert dset.schema.features == ["sCore"]
diff --git a/tests/unit_tests/test_utils.py b/tests/unit_tests/test_utils.py
index eeec9add..53b72d91 100644
--- a/tests/unit_tests/test_utils.py
+++ b/tests/unit_tests/test_utils.py
@@ -1,103 +1,36 @@
-"""Test the utility functions"""
-import numpy as np
-import pandas as pd
+"""Test the utility functions."""
+import polars as pl
 import pytest
+from polars.testing import assert_frame_equal
 
 from mokapot import utils
 
-
-@pytest.fixture
-def df():
-    """Create a simple dataframe."""
-    data = {
-        "val1": [1, 2, 2, 1, 1, 1, 3, 2, 1],
-        "val2": [1] * 9,
-        "group": ["A"] * 3 + ["B"] * 3 + ["C"] * 3,
-    }
-
-    max_res = {"val1": [2, 1, 3], "val2": [1, 1, 1], "group": ["A", "B", "C"]}
-
-    return pd.DataFrame(data), pd.DataFrame(max_res)
-
-
-def test_groupby_max(df):
-    """Test that the groupby_max() function works"""
-    in_df, val1_max = df
-
-    # Verify that the basic idea works:
-    idx = utils.groupby_max(in_df, "group", "val1", 42)
-    out_df = in_df.loc[idx, :].sort_values("group").reset_index(drop=True)
-    pd.testing.assert_frame_equal(val1_max, out_df)
-
-    idx = utils.groupby_max(in_df, ["group"], "val1", 42)
-    out_df = in_df.loc[idx, :].sort_values("group").reset_index(drop=True)
-    pd.testing.assert_frame_equal(val1_max, out_df)
-
-    # Verify that the shuffling bit works:
-    idx1 = set(utils.groupby_max(in_df, "group", "val2", 2))
-    idx2 = set(utils.groupby_max(in_df, "group", "val2", 1))
-    assert idx1 != idx2
-
-
-def test_flatten():
-    """Test that the flatten() function works"""
-    nested_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
-    flat_list = [1, 2, 3, 4, 5, 6, 7, 8, 9]
-    assert utils.flatten(nested_list) == flat_list
-
-    nested_array = np.array(nested_list)
-    flat_array = np.array(flat_list)
-    np.testing.assert_array_equal(
-        np.array(utils.flatten(nested_array)), flat_array
-    )
-
-
-def test_safe_divide():
-    """Test that our safe division function works"""
-    np_num = np.array([1, 2, 3, 4])
-    np_den = np.array([1, 2, 3, 0.0])
-    np_out = np.array([1, 1, 1, 0])
-    np.testing.assert_array_equal(utils.safe_divide(np_num, np_den), np_out)
-
-    np_out_ones = np.array([1, 1, 1, 1])
-    np.testing.assert_array_equal(
-        utils.safe_divide(np_num, np_den, ones=True), np_out_ones
-    )
-
-    # Test pandas:
-    df = pd.DataFrame({"num": np_num, "den": np_den})
-    np.testing.assert_array_equal(utils.safe_divide(df.num, df.den), np_out)
-
-    np.testing.assert_array_equal(
-        utils.safe_divide(df.num, df.den, ones=True), np_out_ones
-    )
-
-
-def test_tuplize():
-    """Test that we can turn things into tuples"""
-    list_in = [1, 2, 3]
-    list_out = (1, 2, 3)
-    assert utils.tuplize(list_in) == list_out
-
-    str_in = "blah"
-    str_out = ("blah",)
-    assert utils.tuplize(str_in) == str_out
-
-    tuple_in = ("blah", 1, "x")
-    tuple_out = ("blah", 1, "x")
-    assert utils.tuplize(tuple_in) == tuple_out
-
-
-def test_listify():
-    """Test that we can turn things into lists"""
-    list_in = [1, 2, 3]
-    list_out = [1, 2, 3]
-    assert utils.listify(list_in) == list_out
-
-    str_in = "blah"
-    str_out = ["blah"]
-    assert utils.listify(str_in) == str_out
-
-    tuple_in = ("blah", 1, "x")
-    tuple_out = ["blah", 1, "x"]
-    assert utils.listify(tuple_in) == tuple_out
+DF = pl.DataFrame({"a": [1.0, 2], "b": [3.0, 4]})
+
+
+@pytest.mark.parametrize(
+    ("obj", "expected"),
+    [
+        ([1, 2, 3], [1, 2, 3]),
+        ("blah", ["blah"]),
+        (DF, [DF]),
+    ],
+)
+def test_listify(obj, expected):
+    """Test that we can turn things into lists."""
+    assert utils.listify(obj) == expected
+
+
+@pytest.mark.parametrize(
+    ("obj", "expected"),
+    [
+        (DF, DF.lazy()),
+        (DF.lazy(), DF.lazy()),
+        ({"a": [1.0, 2], "b": [3.0, 4]}, DF.lazy()),
+    ],
+)
+def test_make_lazy(obj, expected):
+    """Test that we can turn things into lists."""
+    out = utils.make_lazy(obj)
+    assert isinstance(out, pl.LazyFrame)
+    assert_frame_equal(out, expected)