Skip to content

Commit

Permalink
Merge pull request #40 from Sage-Bionetworks-Workflows/bwmac/orca-234…
Browse files Browse the repository at this point in the history
…/improved_file_mounting

[ORCA-234] Improve file path handling
  • Loading branch information
BWMac authored Jun 23, 2023
2 parents d12968e + 3d86c9e commit 0f2aa7b
Show file tree
Hide file tree
Showing 8 changed files with 8 additions and 43 deletions.
6 changes: 3 additions & 3 deletions src/dcqc/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ class File(SerializableMixin):
current work directory (default).
"""

tmp_dir: ClassVar[str] = "dcqc-staged-"

_serialized_properties = ["name", "local_path"]

url: str
Expand Down Expand Up @@ -336,9 +338,7 @@ def stage(
if self._local_path is not None:
return self._local_path
else:
# TODO: This prefix is used by nf-dcqc to easily find the staged file.
# It might be worth using a DCQCTMPDIR to avoid hard-coding this.
destination_str = mkdtemp(prefix="dcqc-staged-")
destination_str = mkdtemp(prefix=self.tmp_dir)
destination = Path(destination_str)

# By this point, destination is defined (not None)
Expand Down
15 changes: 0 additions & 15 deletions src/dcqc/tests/base_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,21 +202,6 @@ def _interpret_process_outputs(self, outputs: dict[str, Path]) -> TestStatus:
status = TestStatus.FAIL
return status

# TODO: make changes to this package or the nextflow
# workflow so that file mounting is handled cleaner
@staticmethod
def _short_string_path(path: Path, substring: str) -> str:
# chech if substring is in path
if substring not in path.as_posix():
raise ValueError(f"{substring} not in {path}")
# get index where staged folder is
index = next(i for i, item in enumerate(path.parts) if substring in item)
# shorten path starting from staged folder
short_path = Path(*path.parts[index:])
# wrap path string in quotes
quote_path = str(short_path)
return f"'{quote_path}'"

# TODO: Include process in serialized test dictionary
# def to_dict(self):
# dictionary = super(ExternalTestMixin, self).to_dict()
Expand Down
3 changes: 1 addition & 2 deletions src/dcqc/tests/bioformats_info_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,13 @@ class BioFormatsInfoTest(ExternalBaseTest):

def generate_process(self) -> Process:
path = self.target.file.stage()
string_path = self._short_string_path(path, "dcqc-staged-")

command_args = [
"/opt/bftools/showinf",
"-nopix",
"-novalid",
"-nocore",
string_path,
f"'{path.name}'",
]
process = Process(
container="quay.io/sagebionetworks/bftools:latest",
Expand Down
3 changes: 1 addition & 2 deletions src/dcqc/tests/grep_date_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ class GrepDateTest(ExternalBaseTest):

def generate_process(self) -> Process:
path = self.target.file.stage()
string_path = self._short_string_path(path, "dcqc-staged-")

command_args = [
"grep",
Expand All @@ -18,7 +17,7 @@ def generate_process(self) -> Process:
"-a", # treat input as text
"-q", # suppress output
"'date|time'", # match date or time
string_path,
f"'{path.name}'",
]
process = Process(
container="quay.io/biocontainers/coreutils:8.30--h14c3975_1000",
Expand Down
3 changes: 1 addition & 2 deletions src/dcqc/tests/libtiff_info_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@ class LibTiffInfoTest(ExternalBaseTest):

def generate_process(self) -> Process:
path = self.target.file.stage()
string_path = self._short_string_path(path, "dcqc-staged-")

command_args = [
"tiffinfo",
string_path,
f"'{path.name}'",
]
process = Process(
container="quay.io/sagebionetworks/libtiff:2.0",
Expand Down
3 changes: 1 addition & 2 deletions src/dcqc/tests/ome_xml_schema_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@ class OmeXmlSchemaTest(ExternalBaseTest):

def generate_process(self) -> Process:
path = self.target.file.stage()
string_path = self._short_string_path(path, "dcqc-staged-")

command_args = [
"/opt/bftools/xmlvalid",
string_path,
f"'{path.name}'",
]
process = Process(
container="quay.io/sagebionetworks/bftools:latest",
Expand Down
3 changes: 1 addition & 2 deletions src/dcqc/tests/tiff_tag_306_date_time_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@ class TiffTag306DateTimeTest(ExternalBaseTest):

def generate_process(self) -> Process:
path = self.target.file.stage()
string_path = self._short_string_path(path, "dcqc-staged-")

command_args = [
"tifftools",
"dump",
string_path,
f"'{path.name}'",
"|",
"grep", # pipe the output
"-a", # treat input as text
Expand Down
15 changes: 0 additions & 15 deletions tests/test_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,18 +324,3 @@ def test_that_paired_fastq_parity_test_correctly_handles_compressed_fastq_files(
test = tests.PairedFastqParityTest(target)
test_status = test.get_status()
assert test_status == TestStatus.PASS


def test_that_short_string_path_correctly_shortens_file_paths():
substring = "test-substring"
long_path = f"path/needs/to/be/shortened/{substring}/file.txt"
expected_short_path = f"'{substring}/file.txt'"
short_path = ExternalTestMixin._short_string_path(Path(long_path), substring)
assert short_path == expected_short_path


def test_that_short_string_path_raises_valueerror_if_substring_not_in_path():
substring = "test-substring"
long_path = "path/needs/to/be/shortened/fail/file.txt"
with pytest.raises(ValueError):
ExternalTestMixin._short_string_path(Path(long_path), substring)

0 comments on commit 0f2aa7b

Please sign in to comment.