Skip to content

Commit

Permalink
chore: Use twisted.python.filepath to secure paths, instead of realpa…
Browse files Browse the repository at this point in the history
…th and commonprefix
  • Loading branch information
jpmckinney committed Jul 25, 2024
1 parent c56cf0b commit d498ae3
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 21 deletions.
12 changes: 6 additions & 6 deletions scrapyd/eggstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from glob import escape, glob

from packaging.version import InvalidVersion, Version
from twisted.python import filepath
from zope.interface import implementer

from scrapyd.exceptions import DirectoryTraversalError, EggNotFoundError, ProjectNotFoundError
Expand Down Expand Up @@ -72,10 +73,9 @@ def _egg_path(self, project, version):
return self._get_path(project, f"{sanitized_version}.egg")

def _get_path(self, project, *trusted):
resolvedir = os.path.realpath(self.basedir)
projectdir = os.path.realpath(os.path.join(resolvedir, project))

if os.path.commonprefix((projectdir, resolvedir)) != resolvedir:
raise DirectoryTraversalError(project)
try:
file = filepath.FilePath(self.basedir).child(project)
except filepath.InsecurePath as e:
raise DirectoryTraversalError(project) from e

return os.path.join(projectdir, *trusted)
return os.path.join(file.path, *trusted)
25 changes: 11 additions & 14 deletions scrapyd/environ.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from posixpath import join as urljoin
from urllib.parse import urlsplit

from twisted.python import filepath
from w3lib.url import path_to_file_uri
from zope.interface import implementer

Expand Down Expand Up @@ -57,31 +58,27 @@ def _get_feeds(self, message, extension):
return parsed._replace(path=path).geturl()

def _get_file(self, message, directory, extension):
resolvedir = os.path.realpath(directory)
project = message["_project"]
spider = message["_spider"]
job = message["_job"]
projectdir = os.path.realpath(os.path.join(resolvedir, project))
spiderdir = os.path.realpath(os.path.join(projectdir, spider))
jobfile = os.path.realpath(os.path.join(spiderdir, f"{job}.{extension}"))

if (
os.path.commonprefix((projectdir, resolvedir)) != resolvedir
or os.path.commonprefix((spiderdir, projectdir)) != projectdir
or os.path.commonprefix((jobfile, spiderdir)) != spiderdir
):
raise DirectoryTraversalError(os.path.join(project, spider, f"{job}.{extension}"))
# https://docs.twisted.org/en/stable/api/twisted.python.filepath.FilePath.html
try:
file = filepath.FilePath(directory).child(project).child(spider).child(f"{job}.{extension}")
except filepath.InsecurePath as e:
raise DirectoryTraversalError(os.path.join(project, spider, f"{job}.{extension}")) from e

if not os.path.exists(spiderdir):
os.makedirs(spiderdir)
parent = file.dirname() # returns a str
if not os.path.exists(parent):
os.makedirs(parent)

to_delete = sorted(
(os.path.join(spiderdir, name) for name in os.listdir(spiderdir)),
(os.path.join(parent, name) for name in os.listdir(parent)),
key=os.path.getmtime,
)[: -self.jobs_to_keep]

for path in to_delete:
with suppress(OSError):
os.remove(path)

return jobfile
return file.path
26 changes: 25 additions & 1 deletion tests/test_environ.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,28 @@ def test_get_settings_secure(values, key, value):
)


def test_jobs_to_keep(chdir):
config = Config(values={"jobs_to_keep": "2"})
environ = Environment(config, initenv={})
directory = chdir / "logs" / "p1" / "s1"

assert not directory.exists()

environ.get_settings({"_project": "p1", "_spider": "s1", "_job": "j1"})

assert directory.exists()

(directory / "j1.a").touch()
(directory / "j2.b").touch()
(directory / "j3.c").touch()
(directory / "j4.d").touch()

environ.get_settings({"_project": "p1", "_spider": "s1", "_job": "j1"})

assert not (directory / "j1.a").exists()
assert not (directory / "j2.b").exists()


@pytest.mark.parametrize(
("message", "run_only_if_has_settings"),
[
Expand All @@ -91,12 +113,14 @@ def test_get_settings_secure(values, key, value):
({"_project": "localproject"}, True),
],
)
def test_get_environment(environ, message, run_only_if_has_settings):
def test_get_environment(monkeypatch, environ, message, run_only_if_has_settings):
if run_only_if_has_settings and not has_settings():
pytest.skip("[settings] section is not set")

monkeypatch.setenv("CUSTOM", "value")
env = environ.get_environment(message, 3)

assert env["CUSTOM"] == "value"
assert env["SCRAPY_PROJECT"] == message["_project"]

if "_version" in message:
Expand Down

0 comments on commit d498ae3

Please sign in to comment.