Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable brotli decompression if it is available #620

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions tests/integration/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from urllib.parse import urlencode
from urllib.error import HTTPError
import vcr
from vcr.filters import brotli
import json
from assertions import assert_cassette_has_one_response, assert_is_json

Expand Down Expand Up @@ -118,6 +119,22 @@ def test_decompress_deflate(tmpdir, httpbin):
assert_is_json(decoded_response)


def test_decompress_brotli(tmpdir, httpbin):
if brotli is None:
# XXX: this is never true, because brotlipy is installed with "httpbin"
pytest.skip("Brotli is not installed")

url = httpbin.url + "/brotli"
request = Request(url, headers={"Accept-Encoding": ["gzip, deflate, br"]})
cass_file = str(tmpdir.join("brotli_response.yaml"))
with vcr.use_cassette(cass_file, decode_compressed_response=True):
urlopen(request)
with vcr.use_cassette(cass_file) as cass:
decoded_response = urlopen(url).read()
assert_cassette_has_one_response(cass)
assert_is_json(decoded_response)


def test_decompress_regular(tmpdir, httpbin):
"""Test that it doesn't try to decompress content that isn't compressed"""
url = httpbin.url + "/get"
Expand Down
7 changes: 5 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ skip_missing_interpreters=true
envlist =
cov-clean,
lint,
{py37,py38,py39,py310}-{requests,httplib2,urllib3,tornado4,boto3,aiohttp,httpx},
{pypy3}-{requests,httplib2,urllib3,tornado4,boto3},
{py37,py38,py39,py310}-{requests,httplib2,urllib3,tornado4,boto3,aiohttp,httpx,brotli,brotlipy,brotlicffi},
{pypy3}-{requests,httplib2,urllib3,tornado4,boto3,brotli,brotlipy,brotlicffi},
{py310}-httpx019,
cov-report

Expand Down Expand Up @@ -93,6 +93,9 @@ deps =
# httpx==0.19 is the latest version that supports allow_redirects, newer versions use follow_redirects
httpx019: httpx==0.19
{py37,py38,py39,py310}-{httpx}: pytest-asyncio
brotli: brotli
brotlipy: brotlipy
brotlicffi: brotlicffi
depends =
lint,{py37,py38,py39,py310,pypy3}-{requests,httplib2,urllib3,tornado4,boto3},{py37,py38,py39,py310}-{aiohttp},{py37,py38,py39,py310}-{httpx}: cov-clean
cov-report: lint,{py37,py38,py39,py310,pypy3}-{requests,httplib2,urllib3,tornado4,boto3},{py37,py38,py39,py310}-{aiohttp}
Expand Down
27 changes: 22 additions & 5 deletions vcr/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,20 @@

from .util import CaseInsensitiveDict

try:
# This supports both brotli & brotlipy packages
import brotli
except ImportError:
try:
import brotlicffi as brotli
except ImportError:
brotli = None


AVAILABLE_DECOMPRESSORS = {"gzip", "deflate"}
if brotli is not None:
AVAILABLE_DECOMPRESSORS.add("br")

Copy link
Contributor

@CharString CharString Oct 4, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is easier to expand with future or custom decompressors if AVAILABLE_DECOMPRESSORS is a Dict[[str], Callable[[bytes], str]]

AVAILABLE_DECOMPRESSORS = {
    "brotli": brotli.decompress,
    "deflate": zlib.decompress,
    "gzip": lambda body: zlib.decompress(body, zlib.MAX_WBITS | 16),
}

Then decompress_body can just be

def decompress_body(body, encoding):
    return AVAILABLE_DECOMPRESSORS[encoding](body)

Adding a new scheme will be as easy as adding a function to the dict.


def replace_headers(request, replacements):
"""Replace headers in request according to replacements.
Expand Down Expand Up @@ -136,30 +150,33 @@ def remove_post_data_parameters(request, post_data_parameters_to_remove):

def decode_response(response):
"""
If the response is compressed with gzip or deflate:
If the response is compressed with any supported compression (gzip,
deflate, br if available):
1. decompress the response body
2. delete the content-encoding header
3. update content-length header to decompressed length
"""

def is_compressed(headers):
def is_decompressable(headers):
encoding = headers.get("content-encoding", [])
return encoding and encoding[0] in ("gzip", "deflate")
return encoding and encoding[0] in AVAILABLE_DECOMPRESSORS

def decompress_body(body, encoding):
"""Returns decompressed body according to encoding using zlib.
to (de-)compress gzip format, use wbits = zlib.MAX_WBITS | 16
"""
if encoding == "gzip":
return zlib.decompress(body, zlib.MAX_WBITS | 16)
else: # encoding == 'deflate'
elif encoding == "deflate":
return zlib.decompress(body)
else: # encoding == 'br'
return brotli.decompress(body)

# Deepcopy here in case `headers` contain objects that could
# be mutated by a shallow copy and corrupt the real response.
response = copy.deepcopy(response)
headers = CaseInsensitiveDict(response["headers"])
if is_compressed(headers):
if is_decompressable(headers):
encoding = headers["content-encoding"][0]
headers["content-encoding"].remove(encoding)
if not headers["content-encoding"]:
Expand Down