diff --git a/CHANGES.rst b/CHANGES.rst index 1c1c0a9e..518ce729 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -19,6 +19,13 @@ Unreleased :attr:`~.RetrievedFeed.caching_info`). * The ``HTTPAcceptParserType`` was renamed to :class:`.AcceptParserType`. +* Allow :ref:`using the system-installed feedparser ` + instead of the one vendored by *reader*. + This is useful for working around issues in the vendored feedparser, + like the libxml2 <=2.13.3 incompatibility reported in :issue:`350`; + thanks to `Maks Verver`_ for reporting, + root causing, and following up with both dependencies. + * Fix a number of brittle / broken tests. Thanks to `Maks Verver`_ for the issues and fixes. (:issue:`348`, :issue:`349`, :issue:`355`) @@ -670,6 +677,8 @@ Released 2022-03-12 Fix broken error flashing. +.. _version 2.9: + Version 2.9 ----------- diff --git a/docs/install.rst b/docs/install.rst index d1a53b1f..49bcec3c 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -27,6 +27,20 @@ These packages will be installed automatically when installing *reader*: (at least SQLite 3.18 with the `JSON1`_ and `FTS5`_ extensions). +.. _use-system-feedparser: + +.. note:: + + Because `feedparser`_ makes PyPI releases at a lower cadence, + *reader* uses a vendored version of feedparser's `develop`_ branch + by default since :ref:`version 2.9`. + To opt out of this behavior, and make *reader* use + the system-installed ``feedparser`` package, + set the ``READER_USE_SYSTEM_FEEDPARSER`` environment variable to ``1``. + +.. _develop: https://github.com/kurtmckee/feedparser + + .. _optional dependencies: Optional dependencies diff --git a/src/reader/_parser/feedparser.py b/src/reader/_parser/feedparser.py index c6c9c23f..7ea5f383 100644 --- a/src/reader/_parser/feedparser.py +++ b/src/reader/_parser/feedparser.py @@ -2,6 +2,7 @@ import calendar import logging +import os import time import warnings from datetime import datetime @@ -12,7 +13,6 @@ from .._types import EntryData from .._types import FeedData -from .._vendor import feedparser from ..exceptions import ParseError from ..types import Content from ..types import Enclosure @@ -20,6 +20,12 @@ from ._http_utils import unparse_accept_header +if os.environ.get('READER_USE_SYSTEM_FEEDPARSER', '') not in ('', '0'): + import feedparser # type: ignore +else: + from .._vendor import feedparser + + if TYPE_CHECKING: # pragma: no cover from . import FeedAndEntries from .requests import Headers @@ -55,7 +61,7 @@ def __call__( resource, resolve_relative_uris=True, sanitize_html=True, - response_headers=headers or {}, # type: ignore[arg-type] + response_headers=headers or {}, ) return _process_feed(url, result) diff --git a/tests/test_parser.py b/tests/test_parser.py index 41dc6289..072807bc 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -14,12 +14,12 @@ from reader._parser import Parser from reader._parser import RetrievedFeed from reader._parser import RetrieveError +from reader._parser.feedparser import feedparser from reader._parser.feedparser import FeedparserParser from reader._parser.file import FileRetriever from reader._parser.jsonfeed import JSONFeedParser from reader._parser.requests import SessionWrapper from reader._types import FeedData -from reader._vendor import feedparser from reader.exceptions import ParseError from utils import make_url_base @@ -1164,3 +1164,24 @@ def retrieve(*_, **__): assert exc_info.value.__cause__ is cause assert parse.last_result.http_info == HTTPInfo(200, {}) + + +def test_reader_use_system_feedparser(monkeypatch, reload_module): + import feedparser + + import reader._parser.feedparser + import reader._vendor.feedparser + + name = 'READER_USE_SYSTEM_FEEDPARSER' + + monkeypatch.delenv(name, raising=False) + reload_module(reader._parser.feedparser) + assert reader._parser.feedparser.feedparser is reader._vendor.feedparser + + monkeypatch.setenv(name, '0') + reload_module(reader._parser.feedparser) + assert reader._parser.feedparser.feedparser is reader._vendor.feedparser + + monkeypatch.setenv(name, '1') + reload_module(reader._parser.feedparser) + assert reader._parser.feedparser.feedparser is feedparser