fsspec · martindurant · Sep 24, 2024 · Jun 19, 2024 · Jun 20, 2024 · Jul 29, 2024
diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py
@@ -358,9 +358,10 @@ def _open(
  kw = self.kwargs.copy()
  kw["asynchronous"] = self.asynchronous
  kw.update(kwargs)
- size = size or self.info(path, **kwargs)["size"]
+ info = self.info(path, **kwargs)
+ size = size or info["size"]
  session = sync(self.loop, self.set_session)
- if block_size and size:
+ if block_size and size and info.get("partial", True):
  return HTTPFile(
  self,
  path,
@@ -834,10 +835,6 @@ async def _file_info(url, session, size_policy="head", **kwargs):
  async with r:
  r.raise_for_status()
 
- # TODO:
- # recognise lack of 'Accept-Ranges',
- # or 'Accept-Ranges': 'none' (not 'bytes')
- # to mean streaming only, no random access => return None
  if "Content-Length" in r.headers:
  # Some servers may choose to ignore Accept-Encoding and return
  # compressed content, in which case the returned size is unreliable.
@@ -852,6 +849,11 @@ async def _file_info(url, session, size_policy="head", **kwargs):
  if "Content-Type" in r.headers:
  info["mimetype"] = r.headers["Content-Type"].partition(";")[0]
 
+ if r.headers.get("Accept-Ranges") == "none":
+ # Some servers may explicitly discourage partial content requests, but
+ # the lack of "Accept-Ranges" does not always indicate they would fail
+ info["partial"] = False
+
  info["url"] = str(r.url)
 
  for checksum_field in ["ETag", "Content-MD5", "Digest"]:

diff --git a/fsspec/implementations/tests/test_http.py b/fsspec/implementations/tests/test_http.py
@@ -237,18 +237,22 @@ def test_random_access(server, headers):
 @pytest.mark.parametrize(
  "headers",
  [
- {"ignore_range": "true", "head_ok": "true", "head_give_length": "true"},
+ # HTTPFile seeks, response headers lack size, assumed no range support
+ {"head_ok": "true", "head_give_length": "true"},
+ # HTTPFile seeks, response is not a range
  {"ignore_range": "true", "give_length": "true"},
  {"ignore_range": "true", "give_range": "true"},
+ # HTTPStreamFile does not seek (past 0)
+ {"accept_range": "none", "head_ok": "true", "give_length": "true"},
  ],
 )
 def test_no_range_support(server, headers):
  h = fsspec.filesystem("http", headers=headers)
  url = server + "/index/realfile"
  with h.open(url, "rb") as f:
  # Random access is not possible if the server doesn't respect Range
- f.seek(5)
  with pytest.raises(ValueError):
+ f.seek(5)
  f.read(10)
 
  # Reading from the beginning should still work

diff --git a/fsspec/tests/conftest.py b/fsspec/tests/conftest.py
@@ -135,10 +135,10 @@ def read_chunks(self):
  self.rfile.readline()
 
  def do_HEAD(self):
+ r_headers = {}
  if "head_not_auth" in self.headers:
- return self._respond(
- 403, {"Content-Length": 123}, b"not authorized for HEAD request"
- )
+ r_headers["Content-Length"] = 123
+ return self._respond(403, r_headers, b"not authorized for HEAD request")
  elif "head_ok" not in self.headers:
  return self._respond(405)
 
@@ -148,23 +148,23 @@ def do_HEAD(self):
  return self._respond(404)
 
  if ("give_length" in self.headers) or ("head_give_length" in self.headers):
- response_headers = {"Content-Length": len(file_data)}
  if "zero_length" in self.headers:
- response_headers["Content-Length"] = 0
+ r_headers["Content-Length"] = 0
  elif "gzip_encoding" in self.headers:
  file_data = gzip.compress(file_data)
- response_headers["Content-Encoding"] = "gzip"
- response_headers["Content-Length"] = len(file_data)
-
- self._respond(200, response_headers)
+ r_headers["Content-Encoding"] = "gzip"
+ r_headers["Content-Length"] = len(file_data)
+ else:
+  r_headers["Content-Length"] = len(file_data)
  elif "give_range" in self.headers:
- self._respond(
- 200, {"Content-Range": f"0-{len(file_data) - 1}/{len(file_data)}"}
- )
+ r_headers["Content-Range"] = f"0-{len(file_data) - 1}/{len(file_data)}"
  elif "give_etag" in self.headers:
- self._respond(200, {"ETag": "xxx"})
- else:
- self._respond(200) # OK response, but no useful info
+ r_headers["ETag"] = "xxx"
+
+ if self.headers.get("accept_range") == "none":
+ r_headers["Accept-Ranges"] = "none"
+
+ self._respond(200, r_headers)
 
 
 @contextlib.contextmanager