Skip to content

Commit

Permalink
Merge pull request from GHSA-hrw6-wg82-cm62
Browse files Browse the repository at this point in the history
* filefind: avoid handling absolute paths

we don't need or want absolute path support,
which we inherited from generic ipython_genutils

only supporting relative paths lets us avoid attempting to accessing files we know we won't accept

* Apply suggestions from code review

Co-authored-by: M Bussonnier <bussonniermatthias@gmail.com>

* filefind: only accept Sequence[str]

we only call it one place, might as well be simple about it

* version_info gate for is_relative_to

* clarify docstring

Co-authored-by: Carol Willing <carolcode@willingconsulting.com>

---------

Co-authored-by: M Bussonnier <bussonniermatthias@gmail.com>
Co-authored-by: Carol Willing <carolcode@willingconsulting.com>
  • Loading branch information
3 people authored May 31, 2024
1 parent f137916 commit 79fbf80
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 57 deletions.
99 changes: 42 additions & 57 deletions jupyter_server/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import sys
import warnings
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Generator, NewType, Sequence
from urllib.parse import (
SplitResult,
Expand Down Expand Up @@ -338,81 +339,65 @@ def is_namespace_package(namespace: str) -> bool | None:
return isinstance(spec.submodule_search_locations, _NamespacePath)


def filefind(filename: str, path_dirs: Sequence[str] | str | None = None) -> str:
def filefind(filename: str, path_dirs: Sequence[str]) -> str:
"""Find a file by looking through a sequence of paths.
This iterates through a sequence of paths looking for a file and returns
the full, absolute path of the first occurrence of the file. If no set of
path dirs is given, the filename is tested as is, after running through
:func:`expandvars` and :func:`expanduser`. Thus a simple call::
filefind("myfile.txt")
For use in FileFindHandler.
will find the file in the current working dir, but::
Iterates through a sequence of paths looking for a file and returns
the full, absolute path of the first occurrence of the file.
filefind("~/myfile.txt")
Absolute paths are not accepted for inputs.
Will find the file in the users home directory. This function does not
automatically try any paths, such as the cwd or the user's home directory.
This function does not automatically try any paths,
such as the cwd or the user's home directory.
Parameters
----------
filename : str
The filename to look for.
path_dirs : str, None or sequence of str
The sequence of paths to look for the file in. If None, the filename
need to be absolute or be in the cwd. If a string, the string is
put into a sequence and the searched. If a sequence, walk through
each element and join with ``filename``, calling :func:`expandvars`
and :func:`expanduser` before testing for existence.
The filename to look for. Must be a relative path.
path_dirs : sequence of str
The sequence of paths to look in for the file.
Walk through each element and join with ``filename``.
Only after ensuring the path resolves within the directory is it checked for existence.
Returns
-------
Raises :exc:`IOError` or returns absolute path to file.
Raises :exc:`OSError` or returns absolute path to file.
"""

# If paths are quoted, abspath gets confused, strip them...
filename = filename.strip('"').strip("'")
# If the input is an absolute path, just check it exists
if os.path.isabs(filename) and os.path.isfile(filename):
return filename

if path_dirs is None:
path_dirs = ("",)
elif isinstance(path_dirs, str):
path_dirs = (path_dirs,)

for path in path_dirs:
if path == ".":
path = os.getcwd() # noqa: PLW2901
testname = expand_path(os.path.join(path, filename))
if os.path.isfile(testname):
return os.path.abspath(testname)
file_path = Path(filename)

# If the input is an absolute path, reject it
if file_path.is_absolute():
msg = f"{filename} is absolute, filefind only accepts relative paths."
raise OSError(msg)

for path_str in path_dirs:
path = Path(path_str).absolute()
test_path = path / file_path
# os.path.abspath resolves '..', but Path.absolute() doesn't
# Path.resolve() does, but traverses symlinks, which we don't want
test_path = Path(os.path.abspath(test_path))
if sys.version_info >= (3, 9):
if not test_path.is_relative_to(path):
# points outside root, e.g. via `filename='../foo'`
continue
else:
# is_relative_to is new in 3.9
try:
test_path.relative_to(path)
except ValueError:
# points outside root, e.g. via `filename='../foo'`
continue
# make sure we don't call is_file before we know it's a file within a prefix
# GHSA-hrw6-wg82-cm62 - can leak password hash on windows.
if test_path.is_file():
return os.path.abspath(test_path)

msg = f"File {filename!r} does not exist in any of the search paths: {path_dirs!r}"
raise OSError(msg)


def expand_path(s: str) -> str:
"""Expand $VARS and ~names in a string, like a shell
:Examples:
In [2]: os.environ['FOO']='test'
In [3]: expand_path('variable FOO is $FOO')
Out[3]: 'variable FOO is test'
"""
# This is a pretty subtle hack. When expand user is given a UNC path
# on Windows (\\server\share$\%username%), os.path.expandvars, removes
# the $ to get (\\server\share\%username%). I think it considered $
# alone an empty var. But, we need the $ to remains there (it indicates
# a hidden share).
if os.name == "nt":
s = s.replace("$\\", "IPYTHON_TEMP")
s = os.path.expandvars(os.path.expanduser(s))
if os.name == "nt":
s = s.replace("IPYTHON_TEMP", "$\\")
return s


def import_item(name: str) -> Any:
"""Import and return ``bar`` given the string ``foo.bar``.
Calling ``bar = import_item("foo.bar")`` is the functional equivalent of
Expand Down
39 changes: 39 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from jupyter_server.utils import (
check_pid,
check_version,
filefind,
is_namespace_package,
path2url,
run_sync_in_loop,
Expand Down Expand Up @@ -125,3 +126,41 @@ def test_unix_socket_in_use(tmp_path):
sock.listen(0)
assert unix_socket_in_use(server_address)
sock.close()


@pytest.mark.parametrize(
"filename, result",
[
("/foo", OSError),
("../c/in-c", OSError),
("in-a", "a/in-a"),
("in-b", "b/in-b"),
("in-both", "a/in-both"),
(r"\in-a", OSError),
("not-found", OSError),
],
)
def test_filefind(tmp_path, filename, result):
a = tmp_path / "a"
a.mkdir()
b = tmp_path / "b"
b.mkdir()
c = tmp_path / "c"
c.mkdir()
for parent in (a, b):
with parent.joinpath("in-both").open("w"):
pass
with a.joinpath("in-a").open("w"):
pass
with b.joinpath("in-b").open("w"):
pass
with c.joinpath("in-c").open("w"):
pass

if isinstance(result, str):
found = filefind(filename, [str(a), str(b)])
found_relative = Path(found).relative_to(tmp_path)
assert str(found_relative) == result
else:
with pytest.raises(result):
filefind(filename, [str(a), str(b)])

0 comments on commit 79fbf80

Please sign in to comment.