Skip to content

Commit

Permalink
Refactor LocationPath.from_uri()
Browse files Browse the repository at this point in the history
  - Add get_url_path() and is_windows_drive_path() helpers
  • Loading branch information
brunato committed Apr 26, 2024
1 parent b0aebfb commit c2ccb17
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 40 deletions.
23 changes: 17 additions & 6 deletions tests/test_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,16 +129,22 @@ def test_path_from_uri(self):
self.assertIsInstance(path, LocationPosixPath)
self.assertEqual(str(path), '/home/foo/names')

path = LocationPosixPath.from_uri('file:///home\\foo\\names#foo')
path = LocationPath.from_uri('file:///home\\foo\\names#foo')
self.assertIsInstance(path, LocationWindowsPath)
self.assertTrue(path.as_posix().endswith('/home/foo/names'))

path = LocationPosixPath.from_uri('file:///c:/home/foo/names/')
self.assertIsInstance(path, LocationPosixPath)

path = LocationPath.from_uri('file:///c:/home/foo/names/')
self.assertIsInstance(path, LocationWindowsPath)
self.assertEqual(str(path), r'c:\home\foo\names')
self.assertEqual(path.as_uri(), 'file:///c:/home/foo/names')

path = LocationPosixPath.from_uri('file:c:/home/foo/names/')
self.assertIsInstance(path, LocationPosixPath)

path = LocationPath.from_uri('file:c:/home/foo/names/')
self.assertIsInstance(path, LocationWindowsPath)
self.assertEqual(str(path), r'c:\home\foo\names')
self.assertEqual(path.as_uri(), 'file:///c:/home/foo/names')
Expand Down Expand Up @@ -174,8 +180,9 @@ def test_normalize_url_posix(self):
self.check_url(normalize_url('dummy path.xsd'), cwd_url + 'dummy%20path.xsd')
self.check_url(normalize_url('dummy path.xsd', 'http://site/base'),
'http://site/base/dummy%20path.xsd')
self.check_url(normalize_url('dummy path.xsd', 'file://host/home/'),
PurePath('//host/home/dummy path.xsd').as_uri())

self.assertEqual(normalize_url('dummy path.xsd', 'file://host/home/'),
'file:////host/home/dummy%20path.xsd')

url = "file:///c:/Downloads/file.xsd"
self.check_url(normalize_url(url, base_url="file:///d:/Temp/"), url)
Expand Down Expand Up @@ -288,6 +295,7 @@ def test_normalize_url_slashes(self):
# Issue #116
url = '//anaconda/envs/testenv/lib/python3.6/site-packages/xmlschema/validators/schemas/'
if os.name == 'posix':
normalize_url(url)
self.assertEqual(normalize_url(url), pathlib.PurePath(url).as_uri())
else:
# On Windows // is interpreted as a network share UNC path
Expand All @@ -299,9 +307,9 @@ def test_normalize_url_slashes(self):
f'file://{DRIVE_REGEX}/root/dir1/schema.xsd')

self.assertRegex(normalize_url('////root/dir1/schema.xsd'),
f'file://{DRIVE_REGEX}/root/dir1/schema.xsd')
f'file://{DRIVE_REGEX}//root/dir1/schema.xsd')
self.assertRegex(normalize_url('dir2/schema.xsd', '////root/dir1'),
f'file://{DRIVE_REGEX}/root/dir1/dir2/schema.xsd')
f'file://{DRIVE_REGEX}//root/dir1/dir2/schema.xsd')

self.assertEqual(normalize_url('//root/dir1/schema.xsd'),
'file:////root/dir1/schema.xsd')
Expand All @@ -312,12 +320,15 @@ def test_normalize_url_slashes(self):

def test_normalize_url_hash_character(self):
url = normalize_url('issue #000.xml', 'file:///dir1/dir2/')
self.assertRegex(url, f'file://{DRIVE_REGEX}/dir1/dir2/issue%20')

url = normalize_url('issue%20%23000.xml', 'file:///dir1/dir2/')
self.assertRegex(url, f'file://{DRIVE_REGEX}/dir1/dir2/issue%20%23000.xml')

url = normalize_url('data.xml', 'file:///dir1/dir2/issue%20001')
self.assertRegex(url, f'file://{DRIVE_REGEX}/dir1/dir2/issue%20001/data.xml')

url = normalize_url('data.xml', '/dir1/dir2/issue #002')
url = normalize_url('data.xml', '/dir1/dir2/issue%20%23002')
self.assertRegex(url, f'{DRIVE_REGEX}/dir1/dir2/issue%20%23002/data.xml')

def test_is_url_function(self):
Expand Down
71 changes: 37 additions & 34 deletions xmlschema/locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,42 +50,25 @@ def from_uri(cls, uri: str) -> 'LocationPath':
if not uri:
raise XMLSchemaValueError("Empty URI provided!")

if uri.startswith(r'\\'):
return LocationWindowsPath(uri) # UNC path
elif uri.startswith('/'):
return cls(uri)

parts = urlsplit(uri)
if not parts.scheme:
return cls(parts.path)
elif parts.scheme in DRIVE_LETTERS and len(parts.scheme) == 1:
return LocationWindowsPath(uri) # Eg. k:/Python/lib/....
elif parts.scheme != 'file':
return LocationPosixPath(unquote(parts.path))

# Get file URI path because urlsplit does not parse it well
start = 7 if uri.startswith('file:///') else 5
if parts.query:
path = uri[start:uri.index('?')]
elif parts.fragment:
path = uri[start:uri.index('#')]
else:
path = uri[start:]

if ':' in path:
# Windows path with a drive
pos = path.index(':')
if pos == 2 and path[0] == '/' and path[1] in DRIVE_LETTERS:
path = get_url_path(uri)
if cls is not LocationPath:
return cls(unquote(path))

if ':' not in path:
pass
elif uri.startswith('file:///'):
if path.startswith('/') and is_windows_drive_path(path[1:]):
return LocationWindowsPath(unquote(path[1:]))
if path.startswith(('//', '/\\')) and is_windows_drive_path(path[2:]):
raise XMLSchemaValueError(f"Invalid URI {uri!r}")
elif uri.startswith('file://'):
if path.startswith('//') and is_windows_drive_path(path[2:]):
raise XMLSchemaValueError(f"Invalid URI {uri!r}")

obj = LocationWindowsPath(unquote(path))
if len(obj.drive) != 2 or obj.drive[1] != ':':
raise XMLSchemaValueError("Invalid URI %r" % uri)
return obj

if '\\' in path:
if is_windows_drive_path(path) or path.startswith(r'\\') or '\\' in path:
return LocationWindowsPath(unquote(path))
return cls(unquote(path))
else:
return cls(unquote(path))

def as_uri(self) -> str:
if not self.is_absolute():
Expand Down Expand Up @@ -139,7 +122,7 @@ def normalize_url(url: str, base_url: Optional[str] = None,
accept a simple pathname.
:return: a normalized URL string.
"""
url_parts = urlsplit(url)
url_parts = urlsplit(encode_url(url))
if not is_local_scheme(url_parts.scheme):
return url_parts.geturl()

Expand Down Expand Up @@ -283,6 +266,26 @@ def decode_url(url: str, method: str = 'xml') -> str:
))


def get_url_path(url: str) -> str:
"""Extracts the path part from a URL."""
parts = urlsplit(url)
if not parts.scheme or parts.scheme == 'file':
return urlunsplit(('', parts.netloc, parts.path, '', ''))
elif parts.scheme in DRIVE_LETTERS and len(parts.scheme) == 1:
# url is a Windows path with a drive, e.g. k:/Python/lib/file
return urlunsplit((parts.scheme, parts.netloc, parts.path, '', ''))
else:
return parts.path


def is_windows_drive_path(path: str) -> bool:
if ':' in path:
drive = ntpath.splitroot(path)[0]
if len(drive) == 2 and drive[0] in DRIVE_LETTERS and drive[1] == ':':
return True # a Windows path with a drive
return False


def normalize_locations(locations: LocationsType,
base_url: Optional[str] = None,
keep_relative: bool = False) -> NormalizedLocationsType:
Expand Down

0 comments on commit c2ccb17

Please sign in to comment.