Skip to content

Commit

Permalink
Allow using system feedparser with READER_USE_SYSTEM_FEEDPARSER. #350
Browse files Browse the repository at this point in the history
  • Loading branch information
lemon24 committed Oct 12, 2024
1 parent a07cc7a commit 3d5b589
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 3 deletions.
9 changes: 9 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ Unreleased
:attr:`~.RetrievedFeed.caching_info`).
* The ``HTTPAcceptParserType`` was renamed to :class:`.AcceptParserType`.

* Allow :ref:`using the system-installed feedparser <use-system-feedparser>`
instead of the one vendored by *reader*.
This is useful for working around issues in the vendored feedparser,
like the libxml2 <=2.13.3 incompatibility reported in :issue:`350`;
thanks to `Maks Verver`_ for reporting,
root causing, and following up with both dependencies.

* Fix a number of brittle / broken tests.
Thanks to `Maks Verver`_ for the issues and fixes.
(:issue:`348`, :issue:`349`, :issue:`355`)
Expand Down Expand Up @@ -670,6 +677,8 @@ Released 2022-03-12
Fix broken error flashing.
.. _version 2.9:
Version 2.9
-----------
Expand Down
14 changes: 14 additions & 0 deletions docs/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,20 @@ These packages will be installed automatically when installing *reader*:
(at least SQLite 3.18 with the `JSON1`_ and `FTS5`_ extensions).


.. _use-system-feedparser:

.. note::

Because `feedparser`_ makes PyPI releases at a lower cadence,
*reader* uses a vendored version of feedparser's `develop`_ branch
by default since :ref:`version 2.9`.
To opt out of this behavior, and make *reader* use
the system-installed ``feedparser`` package,
set the ``READER_USE_SYSTEM_FEEDPARSER`` environment variable to ``1``.

.. _develop: https://github.com/kurtmckee/feedparser


.. _optional dependencies:

Optional dependencies
Expand Down
10 changes: 8 additions & 2 deletions src/reader/_parser/feedparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import calendar
import logging
import os
import time
import warnings
from datetime import datetime
Expand All @@ -12,14 +13,19 @@

from .._types import EntryData
from .._types import FeedData
from .._vendor import feedparser
from ..exceptions import ParseError
from ..types import Content
from ..types import Enclosure
from ._http_utils import parse_accept_header
from ._http_utils import unparse_accept_header


if os.environ.get('READER_USE_SYSTEM_FEEDPARSER', '') not in ('', '0'):
import feedparser # type: ignore
else:
from .._vendor import feedparser


if TYPE_CHECKING: # pragma: no cover
from . import FeedAndEntries
from .requests import Headers
Expand Down Expand Up @@ -55,7 +61,7 @@ def __call__(
resource,
resolve_relative_uris=True,
sanitize_html=True,
response_headers=headers or {}, # type: ignore[arg-type]
response_headers=headers or {},
)
return _process_feed(url, result)

Expand Down
23 changes: 22 additions & 1 deletion tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
from reader._parser import Parser
from reader._parser import RetrievedFeed
from reader._parser import RetrieveError
from reader._parser.feedparser import feedparser
from reader._parser.feedparser import FeedparserParser
from reader._parser.file import FileRetriever
from reader._parser.jsonfeed import JSONFeedParser
from reader._parser.requests import SessionWrapper
from reader._types import FeedData
from reader._vendor import feedparser
from reader.exceptions import ParseError
from utils import make_url_base

Expand Down Expand Up @@ -1164,3 +1164,24 @@ def retrieve(*_, **__):
assert exc_info.value.__cause__ is cause

assert parse.last_result.http_info == HTTPInfo(200, {})


def test_reader_use_system_feedparser(monkeypatch, reload_module):
import feedparser

import reader._parser.feedparser
import reader._vendor.feedparser

name = 'READER_USE_SYSTEM_FEEDPARSER'

monkeypatch.delenv(name, raising=False)
reload_module(reader._parser.feedparser)
assert reader._parser.feedparser.feedparser is reader._vendor.feedparser

monkeypatch.setenv(name, '0')
reload_module(reader._parser.feedparser)
assert reader._parser.feedparser.feedparser is reader._vendor.feedparser

monkeypatch.setenv(name, '1')
reload_module(reader._parser.feedparser)
assert reader._parser.feedparser.feedparser is feedparser

0 comments on commit 3d5b589

Please sign in to comment.