Skip to content

Commit

Permalink
Clean up reader._parser.http exception handling. #307
Browse files Browse the repository at this point in the history
  • Loading branch information
lemon24 committed Aug 20, 2024
1 parent 2a31988 commit 0e50a13
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 60 deletions.
19 changes: 12 additions & 7 deletions src/reader/_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,16 +414,21 @@ def process_entry_pairs(


@contextmanager
def wrap_exceptions(
url: str, when: str, cls: type[ParseError] = ParseError, **kwargs: Any
) -> Iterator[None]:
def wrap_exceptions(url: str | ParseError, message: str = '') -> Iterator[None]:
try:
yield

except ParseError:
# reader exceptions are pass-through
raise
except OSError as e:
# requests.RequestException is also a subclass of OSError
raise cls(url, message=f"error {when}", **kwargs) from e

except Exception as e:
raise cls(url, message=f"unexpected error {when}", **kwargs) from e
exc = ParseError(url, message=message) if isinstance(url, str) else url

if isinstance(e, OSError):
# expected exception raised for various I/O errors;
# requests.RequestException is a subclass of OSError
raise exc from e

exc._message = f"unexpected error {exc._message}".rstrip()
raise exc from e
90 changes: 40 additions & 50 deletions src/reader/_parser/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,57 +54,47 @@ def __call__(
if http_accept:
request_headers['Accept'] = http_accept

with self.get_session() as session:
with wrap_exceptions(url, "while getting feed"):
response, http_etag, http_last_modified = session.caching_get(
url,
http_etag,
http_last_modified,
headers=request_headers,
stream=True,
)

response_headers = response.headers.copy()
http_info = HTTPInfo(response.status_code, response_headers)

try:
error = RetrieveError(url)

with self.get_session() as session, wrap_exceptions(error):
error._message = "while getting feed"
response, http_etag, http_last_modified = session.caching_get(
url,
http_etag,
http_last_modified,
headers=request_headers,
stream=True,
)

with response:
http_info = HTTPInfo(response.status_code, response.headers)
error.http_info = http_info

if response.status_code == 304:
raise NotModified(url, http_info=http_info)

error._message = "bad HTTP status code"
response.raise_for_status()
except Exception as e:
response.close()
raise RetrieveError(
url,
message="bad HTTP status code",
http_info=http_info,
) from e

if response.status_code == 304:
response.close()
raise NotModified(url, http_info=http_info)

response_headers.setdefault('content-location', response.url)

# https://datatracker.ietf.org/doc/html/rfc9110#name-content-encoding
# Content-Encoding is the counterpart of Accept-Encoding;
# it is about binary transformations (mainly compression),
# not text encoding (Content-Type charset does that).
# We let Requests/urllib3 take care of it and remove the header,
# so parsers (like feedparser) don't do it a second time.
response_headers.pop('content-encoding', None)
response.raw.decode_content = True

content_type = response_headers.get('content-type')
mime_type: str | None
if content_type:
mime_type, _ = parse_options_header(content_type)
else:
mime_type = None

with (
wrap_exceptions(
url, "while reading feed", RetrieveError, http_info=http_info
),
response,
):

response.headers.setdefault('content-location', response.url)

# https://datatracker.ietf.org/doc/html/rfc9110#name-content-encoding
# Content-Encoding is the counterpart of Accept-Encoding;
# it is about binary transformations (mainly compression),
# not text encoding (Content-Type charset does that).
# We let Requests/urllib3 take care of it and remove the header,
# so parsers (like feedparser) don't do it a second time.
response.headers.pop('content-encoding', None)
response.raw.decode_content = True

content_type = response.headers.get('content-type')
mime_type: str | None
if content_type:
mime_type, _ = parse_options_header(content_type)
else:
mime_type = None

error._message = "while reading feed"
yield RetrievedFeed(
response.raw,
mime_type,
Expand Down
5 changes: 3 additions & 2 deletions src/reader/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class _FancyExceptionBase(Exception):
_default_message: str = ''

def __init__(self, message: str = ''):
self._message = message
self._message = message or self._default_message

@property
def _str(self) -> str:
Expand All @@ -44,7 +44,8 @@ def message(self) -> str:
Became read-only.
"""
return self._message or self._default_message
# read-only for compatibility with ExceptionGroup
return self._message

@cached_property
def _cause_name(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ def feedparser_parse(*args, **kwargs):
parse(feed_url)
assert excinfo.value.__cause__ is exc
assert excinfo.value.url == feed_url
assert 'error during parser' in excinfo.value.message
assert 'during parser' in excinfo.value.message

assert feedparser_parse.kwargs['resolve_relative_uris'] == True
assert feedparser_parse.kwargs['sanitize_html'] == True
Expand Down

0 comments on commit 0e50a13

Please sign in to comment.