Skip to content

Commit

Permalink
Merge pull request #559 from pyinat/max-filesize
Browse files Browse the repository at this point in the history
Add maximum filesize check
  • Loading branch information
JWCook authored Jun 14, 2024
2 parents 52dfd56 + 20c851e commit 75f8f0b
Show file tree
Hide file tree
Showing 8 changed files with 720 additions and 402 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* Increase default request timeout from 10 to 20 seconds
* Add `validate_token()` function to manually check if an access token is valid
* Support rate limits less than one request per second (example: `ClientSession(per_second=0.5)`)
* Add error handling for file uploads over 20MB (not accepted by API)
* Allow setting lockfile path used for multiprocess rate limiting (example: `ClientSession(lock_path='/tmp/pyinat.lock')`)

## 0.19.0 (2023-12-12)
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ response = create_observation(
positional_accuracy=50, # GPS accuracy in meters
access_token=token,
photos=['~/observations/wasp1.jpg', '~/observations/wasp2.jpg'],
sounds=['~/observations/recording.mp3'],
)

# Save the new observation ID
Expand Down
1,070 changes: 681 additions & 389 deletions poetry.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pyinaturalist/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
# Rate-limiting and retry settings
CONNECT_TIMEOUT = 5
MAX_DELAY = 60 # Maximum time to wait for rate-limiting before aborting
MAX_FILESIZE = 20000000 # 20MB maximum file size for uploads
REQUEST_BURST_RATE = 5
REQUESTS_PER_SECOND = 1
REQUESTS_PER_MINUTE = 60
Expand Down Expand Up @@ -275,7 +276,7 @@
DateOrDatetime = Union[date, datetime]
DateRange = Tuple[DateOrDatetime, DateOrDatetime]
Dimensions = Tuple[int, int]
FileOrPath = Union[BinaryIO, str]
FileOrPath = Union[BinaryIO, Path, str]
GeoJson = Dict[str, Any]
HistogramResponse = Dict[DateOrInt, int]
IntOrStr = Union[int, str]
Expand Down
28 changes: 19 additions & 9 deletions pyinaturalist/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from datetime import date, datetime
from io import BytesIO
from logging import getLogger
from os.path import abspath, expanduser
from os import SEEK_END
from pathlib import Path
from typing import IO, Any, Dict, List, Mapping, MutableSequence, Optional, Union
from warnings import catch_warnings, simplefilter
Expand All @@ -15,6 +15,7 @@
from requests import Session

from pyinaturalist.constants import (
MAX_FILESIZE,
AnyFile,
Coordinates,
Dimensions,
Expand Down Expand Up @@ -186,17 +187,26 @@ def ensure_file_obj(value: AnyFile, session: Optional[Session] = None) -> IO:
# Load from URL
if isinstance(value, str) and URL_PATTERN.match(value):
session = session or Session()
return session.get(value).raw

file_obj = session.get(value).raw
# Load from local file path
if isinstance(value, (str, Path)):
file_path = abspath(expanduser(value))
elif isinstance(value, (str, Path)):
file_path = Path(value).expanduser().resolve()
logger.info(f'Reading from file: {file_path}')
with open(file_path, 'rb') as f:
return BytesIO(f.read())

file_obj = BytesIO(file_path.read_bytes())
# Otherwise, assume it's already a file or file-like object
return value
elif hasattr(value, 'read'):
file_obj = value
else:
file_obj = BytesIO(value) # type: ignore

# Verify maximum file size
file_obj.seek(0, SEEK_END)
file_size = file_obj.tell()
file_obj.seek(0)
if file_size > MAX_FILESIZE:
raise ValueError(f'File size exceeds maximum allowed ({MAX_FILESIZE} bytes): {file_size}')

return file_obj


def ensure_list(
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,12 @@ platformdirs = ">=2.6"
python-dateutil = ">=2.0"
python-forge = ">=18.6"
requests = ">=2.25"
requests-cache = ">=1.1"
requests-cache = ">=1.2"
requests-ratelimiter = ">=0.5.1"
rich = ">=10.9"

# Optional dependencies
filelock = {optional=true, version=">2.0"}
ujson = {optional=true, version=">5.0"}

# Documentation dependencies needed for Readthedocs builds
Expand Down Expand Up @@ -84,7 +85,7 @@ requests-mock = "^1.8"
sphinx-autobuild = ">=2021.3"

[tool.poetry.extras]
all = ["ujson"]
all = ["filelock", "ujson"]
docs = [
"furo",
"ipython",
Expand Down
12 changes: 12 additions & 0 deletions test/test_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pytest
from dateutil.tz import tzoffset

from pyinaturalist.constants import MAX_FILESIZE
from pyinaturalist.converters import (
convert_lat_long,
convert_observation_histogram,
Expand Down Expand Up @@ -62,6 +63,17 @@ def test_ensure_file_obj__url():
assert file_obj.read() == b'test content'


def test_ensure_file_obj__bytes():
file_obj = ensure_file_obj(b'test content')
assert file_obj.read() == b'test content'


def test_ensure_file_obj__exceeds_max_filesize():
contents = b'1' * (MAX_FILESIZE + 1)
with pytest.raises(ValueError):
ensure_file_obj(value=contents)


@pytest.mark.parametrize(
'input, expected_output',
[
Expand Down
2 changes: 1 addition & 1 deletion test/v1/test_observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ def test_upload(requests_mock):
status_code=200,
)

response = upload(1234, BytesIO(), BytesIO(), access_token='token')
response = upload(1234, photos=BytesIO(b'1234'), sounds=BytesIO(b'1234'), access_token='token')
assert response[0]['id'] == 1234
assert response[0]['created_at'] == '2020-09-24T21:06:16.964-05:00'
assert response[0]['photo']['native_username'] == 'username'
Expand Down

0 comments on commit 75f8f0b

Please sign in to comment.