Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(multipart): do not require CRLF after the closing delimiter #2366

Merged
merged 2 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/changes/4.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,13 @@ Fixed
:class:`~falcon.media.multipart.MultipartParseOptions`. To that end, a proper
:meth:`~falcon.media.Handlers.copy` method has been implemented for the media
:class:`~falcon.media.Handlers` class. (`#2293 <https://github.com/falconry/falcon/issues/2293>`__)
- Falcon's multipart form parser no longer requires a CRLF (:python:`'\\r\\n'`)
after the closing ``--`` delimiter. Although it is a common convention
(followed by the absolute majority of HTTP clients and web browsers) to
include a trailing CRLF, the popular Undici client
(used as Node's default ``fetch`` implementation) omits it at the time of
this writing. (The next version of Undici will adhere to the convention.)
(`#2364 <https://github.com/falconry/falcon/issues/2364>`__)


Misc
Expand Down
19 changes: 13 additions & 6 deletions falcon/asgi/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,13 +220,20 @@ async def _iterate_parts(self) -> AsyncIterator[BodyPart]:
delimiter = _CRLF + delimiter
prologue = False

separator = await stream.read_until(_CRLF, 2, consume_delimiter=True)
if separator == b'--':
# NOTE(vytas): boundary delimiter + '--\r\n' signals the
# end of a multipart form.
# NOTE(vytas): Interpretations of RFC 2046, Appendix A, vary
# as to whether the closing `--` must be followed by CRLF.
# While the absolute majority of HTTP clients and browsers
# do append it as a common convention, it seems that this is
# not mandated by the RFC, so we do not require it either.
# NOTE(vytas): Certain versions of the Undici client
# (Node's fetch implementation) do not follow the convention.
if await stream.peek(2) == b'--':
# NOTE(vytas): boundary delimiter + '--' signals the end of
# a multipart form.
await stream.read(2)
break
elif separator:
raise MultipartParseError(description='unexpected form structure')

await stream.read_until(_CRLF, 0, consume_delimiter=True)

except DelimiterError as err:
raise MultipartParseError(
Expand Down
19 changes: 13 additions & 6 deletions falcon/media/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,13 +383,20 @@ def __iter__(self) -> Iterator[BodyPart]:
delimiter = _CRLF + delimiter
prologue = False

separator = stream.read_until(_CRLF, 2, consume_delimiter=True)
if separator == b'--':
# NOTE(vytas): boundary delimiter + '--\r\n' signals the
# end of a multipart form.
# NOTE(vytas): Interpretations of RFC 2046, Appendix A, vary
# as to whether the closing `--` must be followed by CRLF.
# While the absolute majority of HTTP clients and browsers
# do append it as a common convention, it seems that this is
# not mandated by the RFC, so we do not require it either.
# NOTE(vytas): Certain versions of the Undici client
# (Node's fetch implementation) do not follow the convention.
if stream.peek(2) == b'--':
# NOTE(vytas): boundary delimiter + '--' signals the end of
# a multipart form.
stream.read(2)
break
elif separator:
raise MultipartParseError(description='unexpected form structure')

stream.read_until(_CRLF, 0, consume_delimiter=True)

except errors.DelimiterError as err:
raise MultipartParseError(
Expand Down
31 changes: 26 additions & 5 deletions tests/test_media_multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,25 @@ def test_upload_multipart(client):
]


@pytest.mark.parametrize('epilogue', ['', '--', '\n', '\n\n', ' <-- no CRLF', '💥'])
def test_epilogue(client, epilogue):
# NOTE(vytas): According to RFC 2046, actually including an epilogue might
# require a trailing CRLF first, but we do not mandate it either.
form_body = EXAMPLE1[:-2] + epilogue.encode()

resp = client.simulate_post(
'/submit',
headers={
'Content-Type': 'multipart/form-data; '
'boundary=5b11af82ab65407ba8cdccf37d2a9c4f',
},
body=form_body,
)

assert resp.status_code == 200
assert [part['name'] for part in resp.json] == ['hello', 'document', 'file1']


@pytest.mark.parametrize('truncated_by', [1, 2, 3, 4])
def test_truncated_form(client, truncated_by):
resp = client.simulate_post(
Expand All @@ -495,7 +514,8 @@ def test_truncated_form(client, truncated_by):
'Content-Type': 'multipart/form-data; '
'boundary=5b11af82ab65407ba8cdccf37d2a9c4f',
},
body=EXAMPLE1[:-truncated_by],
# NOTE(vytas): The trailing \r\n is not mandatory, hence +2.
body=EXAMPLE1[: -(truncated_by + 2)],
)

assert resp.status_code == 400
Expand All @@ -505,14 +525,14 @@ def test_truncated_form(client, truncated_by):
}


def test_unexected_form_structure(client):
def test_unexpected_form_structure(client):
resp1 = client.simulate_post(
'/submit',
headers={
'Content-Type': 'multipart/form-data; '
'boundary=5b11af82ab65407ba8cdccf37d2a9c4f',
},
body=EXAMPLE1[:-2] + b'--\r\n',
body=EXAMPLE1[:-4] + b'__\r\n',
)

assert resp1.status_code == 400
Expand Down Expand Up @@ -577,7 +597,8 @@ def test_too_many_body_parts(custom_client, max_body_part_count):


@pytest.mark.skipif(not msgpack, reason='msgpack not installed')
def test_random_form(client):
@pytest.mark.parametrize('close_delimiter', ['--', '--\r\n'])
def test_random_form(client, close_delimiter):
part_data = [os.urandom(random.randint(0, 2**18)) for _ in range(64)]
form_data = (
b''.join(
Expand All @@ -588,7 +609,7 @@ def test_random_form(client):
+ b'\r\n'
for i in range(64)
)
+ '--{}--\r\n'.format(HASH_BOUNDARY).encode()
+ '--{}{}'.format(HASH_BOUNDARY, close_delimiter).encode()
)

handler = media.MultipartFormHandler()
Expand Down