Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions httpie/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,11 +216,21 @@ def start(
"""
assert not self.status.time_started

# FIXME: some servers still might sent Content-Encoding: gzip
# <https://github.com/httpie/cli/issues/423>
try:
total_size = int(final_response.headers['Content-Length'])
except (KeyError, ValueError, TypeError):
# If the server applied a content coding (e.g. ``gzip``), then
# ``requests`` auto-decompresses the body in ``iter_content`` while
# ``Content-Length`` still reflects the *encoded* size per
# RFC 9110 §8.6. Comparing those two numbers would always mark the
# download as incomplete, so skip the size tracking in that case.
# See <https://github.com/httpie/cli/issues/423>.
content_encoding = (

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fix assumes that whenever Content-Encoding is non-identity, requests has decompressed the body in iter_content(). That's true for gzip/deflate (built into urllib3), but br (Brotli) only gets decoded if the optional brotli/brotlicffi package is installed. If it isn't, requests passes the body through unmodified, Content-Length does match the byte count, and this change now silently disables truncation detection for a case where the old (broken) comparison would've actually been correct. Could you check whether decompression actually happened — e.g. by comparing len(response.raw._fp.read()) behavior, or checking response.raw.headers after the fact, rather than inferring it purely from the request header? At minimum, worth a code comment noting the assumption and which encodings it holds for in this codebase's supported environment

final_response.headers.get('Content-Encoding') or 'identity'
).strip().lower()
if content_encoding == 'identity':
try:
total_size = int(final_response.headers['Content-Length'])
except (KeyError, ValueError, TypeError):
total_size = None
else:
total_size = None

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: The tests confirm a complete compressed download is no longer flagged as incomplete. Is there a test (or manual check) confirming a genuinely truncated compressed download still gets caught somehow, e.g. via a decompression error bubbling up from requests/urllib3 mid-stream — now that size tracking is fully disabled for these responses? Worth one test for that case so we know truncation detection isn't silently lost altogether for compressed responses.


if not self._output_file:
Expand Down
47 changes: 47 additions & 0 deletions tests/test_downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,53 @@ def test_download_with_Content_Length(self, mock_env, httpbin_both):
downloader.finish()
assert not downloader.interrupted

def test_download_with_Content_Encoding_skips_size_check(self, mock_env, httpbin_both):
# When the server applies a content coding, ``requests`` auto-decompresses
# the body but Content-Length still reflects the encoded size per
# RFC 9110 §8.6. The downloader must skip the size comparison in that
# case so a fully-received, encoded payload isn't reported as an
# "Incomplete download".
# <https://github.com/httpie/cli/issues/423>
with open(os.devnull, 'w') as devnull:
for content_encoding in ('gzip', 'br', 'deflate'):
downloader = Downloader(mock_env, output_file=devnull)
downloader.start(
initial_url='/',
final_response=Response(
url=httpbin_both.url + '/',
headers={
'Content-Length': 10,
'Content-Encoding': content_encoding,
},
),
)
# Decompressed stream ends up much larger than the encoded size.
downloader.chunk_downloaded(b'1234567890' * 1000)
downloader.finish()
assert not downloader.interrupted, (
f'Content-Encoding={content_encoding!r} should bypass '
f'the size comparison; got interrupted=True.'
)

def test_download_with_Content_Encoding_uppercase_or_padded(self, mock_env, httpbin_both):
# Header values come back with arbitrary casing and surrounding whitespace;
# the downloader must recognise those as content codings too.
with open(os.devnull, 'w') as devnull:
downloader = Downloader(mock_env, output_file=devnull)
downloader.start(
initial_url='/',
final_response=Response(
url=httpbin_both.url + '/',
headers={
'Content-Length': 10,
'Content-Encoding': ' GZIP ',
},
),
)
downloader.chunk_downloaded(b'1234567890' * 1000)
downloader.finish()
assert not downloader.interrupted

def test_download_no_Content_Length(self, mock_env, httpbin_both):
with open(os.devnull, 'w') as devnull:
downloader = Downloader(mock_env, output_file=devnull)
Expand Down
Loading