Skip to content

Commit

Permalink
Merge pull request #29 from ENCODE-DCC/dev
Browse files Browse the repository at this point in the history
v0.4.0
  • Loading branch information
leepc12 authored Nov 6, 2022
2 parents 2c231e0 + ea308e8 commit bee800b
Show file tree
Hide file tree
Showing 18 changed files with 158 additions and 249 deletions.
6 changes: 4 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ install_py3_packages: &install_py3_packages
python3 -m pip install --upgrade pip
pip3 install PyYAML --ignore-installed
pip3 install pre-commit pytest requests dateparser filelock six ntplib
pip3 install filelock --upgrade
install_gcs_lib: &install_gcs_lib
Expand Down Expand Up @@ -84,7 +85,7 @@ jobs:
- run: *install_aws_lib
- run: *make_root_only_dir
- run:
no_output_timeout: 60m
no_output_timeout: 90m
command: |
cd tests/
echo ${GCLOUD_SERVICE_ACCOUNT_SECRET_JSON} > tmp_key.json
Expand All @@ -99,7 +100,8 @@ jobs:
--gcp-private-key-file tmp_key.json \
--s3-root ${S3_ROOT} \
--gcs-root ${GCS_ROOT} \
--gcs-root-url ${GCS_ROOT_URL}
--gcs-root-url ${GCS_ROOT_URL} \
-vv -s
# to use gsutil
export BOTO_CONFIG=/dev/null
Expand Down
13 changes: 7 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---
repos:
- repo: https://github.com/psf/black
rev: 19.3b0
rev: 22.3.0
hooks:
- id: black
language_version: python3
Expand Down Expand Up @@ -31,8 +32,8 @@
- id: debug-statements
- id: check-yaml

- repo: https://github.com/jumanjihouse/pre-commit-hook-yamlfmt
rev: 0.0.10
hooks:
- id: yamlfmt
args: [--mapping, '2', --sequence, '4', --offset, '2']
# - repo: https://github.com/jumanjihouse/pre-commit-hook-yamlfmt
# rev: 0.0.10
# hooks:
# - id: yamlfmt
# args: [--mapping, '2', --sequence, '4', --offset, '2']
2 changes: 1 addition & 1 deletion autouri/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
from .s3uri import S3URI

__all__ = ["AbsPath", "AutoURI", "URIBase", "GCSURI", "HTTPURL", "S3URI"]
__version__ = "0.3.0"
__version__ = "0.4.0"
41 changes: 33 additions & 8 deletions autouri/abspath.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
"""Important update to allow relative path for some file extensions:
As AbsPath's name implies, it was originally designed to have an absolute path only.
but will allow relative path of a file if it exists on CWD and has an allowed extension
(,json, .csv, .tsv). Such relative path will be automatically converted to absolute path.
"""

import errno
import glob
import hashlib
Expand All @@ -14,6 +20,26 @@

logger = logging.getLogger(__name__)

EXTS_ALLOWED_FOR_RELPATH_TO_ABSPATH_CONVERSION = (".json", ".csv", ".tsv")


def convert_relpath_to_abspath_if_valid(
rel_path,
base_dir=os.getcwd(),
allowed_exts=EXTS_ALLOWED_FOR_RELPATH_TO_ABSPATH_CONVERSION,
):
"""Valid means it is an existing file with an extensions in allowed_exts."""
if os.path.isabs(rel_path):
return rel_path
abs_path = os.path.join(base_dir, rel_path)
if (
os.path.exists(abs_path)
and os.path.isfile(abs_path)
and abs_path.endswith(allowed_exts)
):
return abs_path
return rel_path


class AbsPath(URIBase):
"""
Expand All @@ -36,8 +62,11 @@ class AbsPath(URIBase):
def __init__(self, uri, thread_id=-1):
if isinstance(uri, str):
uri = os.path.expanduser(uri)

super().__init__(uri, thread_id=thread_id)

self._uri = convert_relpath_to_abspath_if_valid(self._uri)

@property
def is_valid(self):
return os.path.isabs(self._uri)
Expand Down Expand Up @@ -73,8 +102,7 @@ def _get_lock(self, timeout=None, poll_interval=None):
return SoftFileLock(u_lock._uri, timeout=timeout)

def get_metadata(self, skip_md5=False, make_md5_file=False):
"""If md5 file doesn't exist then use hashlib.md5() to calculate md5 hash
"""
"""If md5 file doesn't exist then use hashlib.md5() to calculate md5 hash"""
exists = os.path.exists(self._uri)
mt, sz, md5 = None, None, None
if exists:
Expand Down Expand Up @@ -119,8 +147,7 @@ def _rm(self):
return os.remove(self._uri)

def _cp(self, dest_uri):
"""Copy from AbsPath to other classes
"""
"""Copy from AbsPath to other classes"""
dest_uri = AutoURI(dest_uri)

if isinstance(dest_uri, AbsPath):
Expand Down Expand Up @@ -159,8 +186,7 @@ def get_mapped_url(self, map_path_to_url=None) -> Optional[str]:
return None

def mkdir_dirname(self):
"""Create a directory but raise if no write permission on it
"""
"""Create a directory but raise if no write permission on it"""
os.makedirs(self.dirname, exist_ok=True)
if not os.access(self.dirname, os.W_OK):
raise PermissionError(
Expand Down Expand Up @@ -194,8 +220,7 @@ def soft_link(self, target, force=False):
raise e

def __calc_md5sum(self):
"""Expensive md5 calculation
"""
"""Expensive md5 calculation"""
logger.debug(
"calculating md5sum hash of local file: {file}".format(file=self._uri)
)
Expand Down
42 changes: 14 additions & 28 deletions autouri/autouri.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,7 @@ def short_uuid(self):

@property
def uri(self) -> Any:
"""Can store any type of variable.
"""
"""Can store any type of variable."""
return self._uri

@property
Expand All @@ -147,14 +146,12 @@ def is_valid(self) -> bool:

@property
def dirname(self) -> str:
"""Dirname with a scheme (gs://, s3://, http://, /, ...).
"""
"""Dirname with a scheme (gs://, s3://, http://, /, ...)."""
return os.path.dirname(str(self._uri))

@property
def dirname_wo_scheme(self) -> str:
"""Dirname without a scheme (gs://, s3://, http://, /, ...).
"""
"""Dirname without a scheme (gs://, s3://, http://, /, ...)."""
return os.path.dirname(self.uri_wo_scheme)

@property
Expand All @@ -169,20 +166,17 @@ def loc_dirname(self) -> str:

@property
def basename(self) -> str:
"""Basename.
"""
"""Basename."""
return os.path.basename(str(self._uri))

@property
def basename_wo_ext(self) -> str:
"""Basename without extension.
"""
"""Basename without extension."""
return os.path.splitext(self.basename)[0]

@property
def ext(self) -> str:
"""File extension.
"""
"""File extension."""
return os.path.splitext(self.basename)[1]

@property
Expand All @@ -191,20 +185,17 @@ def exists(self) -> bool:

@property
def mtime(self) -> float:
"""Seconds since the epoch.
"""
"""Seconds since the epoch."""
return self.get_metadata(skip_md5=True).mtime

@property
def size(self) -> int:
"""Size in bytes.
"""
"""Size in bytes."""
return self.get_metadata(skip_md5=True).size

@property
def md5(self) -> str:
"""Md5 hash hexadecimal digest string.
"""
"""Md5 hash hexadecimal digest string."""
return self.get_metadata().md5

@property
Expand Down Expand Up @@ -239,8 +230,7 @@ def md5_from_file(self) -> str:

@property
def md5_file_uri(self) -> "AutoURI":
"""Get md5 file URI. Not guaranteed to exist
"""
"""Get md5 file URI. Not guaranteed to exist"""
return AutoURI(str(self._uri) + AutoURI.MD5_FILE_EXT)

def cp(
Expand Down Expand Up @@ -353,15 +343,13 @@ def cp(
return (d._uri, 0) if return_flag else d._uri

def write(self, s, no_lock=False):
"""Write string/bytes to file. It is protected by a locking mechanism.
"""
"""Write string/bytes to file. It is protected by a locking mechanism."""
with self.get_lock(no_lock=no_lock):
self._write(s)
return

def rm(self, no_lock=False, silent=False):
"""Remove a URI from its storage. It is protected by by a locking mechanism.
"""
"""Remove a URI from its storage. It is protected by by a locking mechanism."""
with self.get_lock(no_lock=no_lock):
self._rm()
if not silent:
Expand Down Expand Up @@ -455,8 +443,7 @@ def get_metadata(self, skip_md5=False, make_md5_file=False) -> URIMetadata:

@abstractmethod
def read(self, byte=False) -> Union[str, bytes]:
"""Reads string/byte from a URI.
"""
"""Reads string/byte from a URI."""
raise NotImplementedError

@abstractmethod
Expand Down Expand Up @@ -504,8 +491,7 @@ def _cp_from(self, src_uri: Union[str, "AutoURI"]) -> bool:

@classmethod
def get_path_sep(cls) -> str:
"""Separator for directory.
"""
"""Separator for directory."""
return cls._PATH_SEP

@classmethod
Expand Down
Loading

0 comments on commit bee800b

Please sign in to comment.