Skip to content

Commit

Permalink
chris edit, fix name collision
Browse files Browse the repository at this point in the history
  • Loading branch information
takutosato committed Jul 22, 2024
1 parent a242268 commit 31cae75
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 116 deletions.
33 changes: 29 additions & 4 deletions src/main/java/picard/nio/PicardBucketUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@
import htsjdk.io.IOPath;
import htsjdk.samtools.util.FileExtensions;
import htsjdk.utils.ValidationUtils;
import picard.PicardException;

import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
Expand Down Expand Up @@ -67,11 +65,16 @@ public static PicardHtsPath getTempFilePath(final String directory, String prefi
}
}

/**
* This overload of getTempFilePath takes the directory of type PicardHtsPath instead of String.
*/
public static PicardHtsPath getTempFilePath(final PicardHtsPath directory, String prefix, final String extension){
return getTempFilePath(directory.getURIString(), prefix, extension);
}

// For local temp file, directory should be null.
/**
* Calls getTempFilePath without the prefix.
*/
public static PicardHtsPath getTempFilePath(String directory, String extension){
return getTempFilePath(directory, "", extension);
}
Expand All @@ -80,14 +83,36 @@ public static PicardHtsPath getLocalTempFilePath(final String prefix, final Stri
return getTempFilePath((String) null, prefix, extension);
}

// Move to BucktUtils
// "directory"
// Signaled by the trailing "/"
// Shouldn't be used for other FileSystems

/**
* Creates a path to a "directory" on a Google Cloud System filesystem with a randomly generated URI.
* Since the notion of directories does not exist in GCS, it creates a path to a URI ending in "/".
* Calling this method will not create a directory/file on GCS. It merely returns a path to a non-directory.
*
*
* See: https://stackoverflow.com/questions/51892343/google-gsutil-create-folder
*
* @param parentDir The root path where the new "directory" is to live e.g. "gs://hellbender-test-logs/staging/picard/test/RevertSam/".
* @return A PicardHtsPath object pointing to e.g. "gs://hellbender-test-logs/staging/picard/test/RevertSam/{randomly-generated-string}/"
*/
public static PicardHtsPath getGCSTempDirectory(final PicardHtsPath parentDir){
ValidationUtils.validateArg(parentDir.getScheme().equals(PicardBucketUtils.GOOGLE_CLOUD_STORAGE_FILESYSTEM_SCHEME), "This method is supported only for a GCS path: " + parentDir.getURIString());
ValidationUtils.validateArg(parentDir.getURIString().endsWith("/"), "parentDir must end in backslash '/': " + parentDir.getURIString());
return PicardHtsPath.fromPath(PicardBucketUtils.randomRemotePath(parentDir.getURIString(), "", "/"));
}

/**
* Picks a random name, by putting some random letters between "prefix" and "suffix".
*
* @param stagingLocation The folder where you want the file to be. Must start with "gs://" or "hdfs://"
* @param prefix The beginning of the file name
* @param suffix The end of the file name, e.g. ".tmp"
*/
private static Path randomRemotePath(String stagingLocation, String prefix, String suffix) {
public static Path randomRemotePath(String stagingLocation, String prefix, String suffix) {
if (isGcsUrl(stagingLocation)) {
return getPathOnGcs(stagingLocation).resolve(prefix + UUID.randomUUID() + suffix);
} else if (isHadoopUrl(stagingLocation)) {
Expand Down
51 changes: 14 additions & 37 deletions src/main/java/picard/sam/RevertSam.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import htsjdk.samtools.SAMRecordQueryNameComparator;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.SamInputResource;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
Expand Down Expand Up @@ -241,8 +242,8 @@ public String getHelpDoc() {
"same library name.", shortName = StandardOptionDefinitions.LIBRARY_NAME_SHORT_NAME, optional = true)
public String LIBRARY_NAME;

@Argument(doc = "The prefix to be prepended to the output files, when OUTPUT_BY_READ_GROUP is true but the OUTPUT_MAP was not provided", optional = true)
public String PREFIX = null;
// @Argument(doc = "The prefix to be prepended to the output files, when OUTPUT_BY_READ_GROUP is true but the OUTPUT_MAP was not provided", optional = true)
// public String PREFIX = null;

private final static Log log = Log.getInstance(RevertSam.class);

Expand Down Expand Up @@ -271,7 +272,7 @@ protected int doWork() {
}

final boolean sanitizing = SANITIZE;
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceSequence.getReferencePath()).validationStringency(VALIDATION_STRINGENCY).open(INPUT.toPath());
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceSequence.getReferencePath()).validationStringency(VALIDATION_STRINGENCY).open(SamInputResource.of(INPUT.toPath())); // tsato: confirm this won't break piped
final SAMFileHeader inHeader = in.getFileHeader();
ValidationUtil.validateHeaderOverrides(inHeader, SAMPLE_ALIAS, LIBRARY_NAME);

Expand All @@ -298,11 +299,12 @@ protected int doWork() {
defaultExtension = "." + OUTPUT_BY_READGROUP_FILE_FORMAT.toString();
}

outputMap = createOutputMap(OUTPUT_MAP == null ? null : OUTPUT_MAP.toPath(),
OUTPUT == null ? null : OUTPUT.toPath(),
defaultExtension,
inHeader.getReadGroups(),
PREFIX);
if (OUTPUT_MAP != null){
outputMap = readOutputMap(OUTPUT_MAP.toPath());
} else {
outputMap = createOutputMapFromReadGroups(inHeader.getReadGroups(), OUTPUT.toPath(), defaultExtension);
}

ValidationUtil.assertAllReadGroupsMapped(outputMap, inHeader.getReadGroups());
headerMap = createHeaderMap(inHeader, SORT_ORDER, REMOVE_ALIGNMENT_INFORMATION);
} else {
Expand Down Expand Up @@ -572,32 +574,7 @@ private void overwriteLibrary(final List<SAMReadGroupRecord> readGroups, final S
readGroups.forEach(rg -> rg.setLibrary(libraryName));
}

/**
*
* @param outputMapFile May be null.
* @param outputDir The output map will contain paths to files in this directory if outputMapFile is null. May be null.
* @param extension Self-explanatory.
* @param readGroups Self-explanatory.
* @param prefix The prefix to be prepended to output files when OUTPUT is a directory and OUTPUT_BY_READ_GROUP = true. May be null
* @return
*/
static Map<String, Path> createOutputMap(
final Path outputMapFile,
final Path outputDir,
final String extension,
final List<SAMReadGroupRecord> readGroups,
final String prefix) {

final Map<String, Path> outputMap;
if (outputMapFile != null) {
outputMap = createOutputMapFromFile(outputMapFile);
} else {
outputMap = createOutputMapFromDirectory(readGroups, outputDir, extension, prefix);
}
return outputMap;
}

private static Map<String, Path> createOutputMapFromFile(final Path outputMapFile) {
public static Map<String, Path> readOutputMap(final Path outputMapFile) {
final Map<String, Path> outputMap = new HashMap<>();

try (final TabbedInputParser intermediateParser = new TabbedInputParser(false, Files.newInputStream(outputMapFile));
Expand All @@ -616,12 +593,12 @@ private static Map<String, Path> createOutputMapFromFile(final Path outputMapFil
}

// Create an output map file to be written to a specified directory
private static Map<String, Path> createOutputMapFromDirectory(final List<SAMReadGroupRecord> readGroups, final Path outputDir, final String extension,
final String prefix) {
// tsto: remove prefix?
public static Map<String, Path> createOutputMapFromReadGroups(final List<SAMReadGroupRecord> readGroups, final Path outputDir, final String extension) {
final Map<String, Path> outputMap = new HashMap<>();
for (final SAMReadGroupRecord readGroup : readGroups) {
final String id = readGroup.getId();
final String fileName = prefix == null ? id + extension : prefix + "_" + id + extension;
final String fileName = id + extension;
final Path outputPath = outputDir.resolve(fileName);
outputMap.put(id, outputPath);
}
Expand Down
5 changes: 0 additions & 5 deletions src/main/java/picard/util/TabbedTextFileWithHeaderParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,7 @@
import htsjdk.samtools.util.CloseableIterator;
import picard.PicardException;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.ConcurrentModificationException;
Expand Down
8 changes: 4 additions & 4 deletions src/test/java/picard/cmdline/CommandLineProgramTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ public abstract class CommandLineProgramTest {
public static final PicardHtsPath HG19_CHR2021_GCLOUD = new PicardHtsPath(GCloudTestUtils.getTestInputPath() + "picard/references/human_g1k_v37.20.21.fasta");
public static final PicardHtsPath HG19_CHR2021 = new PicardHtsPath("testdata/picard/reference/human_g1k_v37.20.21.fasta.gz");

public static final PicardHtsPath NA12878_MINI = new PicardHtsPath(GCloudTestUtils.getTestInputPath() + "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.bam");
public static final PicardHtsPath NA12878_MINI_CRAM = new PicardHtsPath(GCloudTestUtils.getTestInputPath() + "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.cram");
public static final PicardHtsPath NA12878_MEDIUM = new PicardHtsPath(GCloudTestUtils.getTestInputPath() + "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.bam");
public static final PicardHtsPath NA12878_MEDIUM_CRAM = new PicardHtsPath(GCloudTestUtils.getTestInputPath() + "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.cram");
public static final PicardHtsPath NA12878_MINI_GCLOUD = new PicardHtsPath(GCloudTestUtils.getTestInputPath() + "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.bam");
public static final PicardHtsPath NA12878_MINI_CRAM_GCLOUD = new PicardHtsPath(GCloudTestUtils.getTestInputPath() + "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.cram");
public static final PicardHtsPath NA12878_MEDIUM_GCLOUD = new PicardHtsPath(GCloudTestUtils.getTestInputPath() + "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.bam");
public static final PicardHtsPath NA12878_MEDIUM_CRAM_GCLOUD = new PicardHtsPath(GCloudTestUtils.getTestInputPath() + "picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.cram");

// A per-test-class directory that will be deleted after the tests are complete.
private File tempOutputDir;
Expand Down
6 changes: 3 additions & 3 deletions src/test/java/picard/sam/CreateSequenceDictionaryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -286,10 +286,10 @@ final Path makeTempDictionary(final Path inputFasta, final String dictNamePrefix
// This is a copy of gs://hellbender/test/resources/hg19mini.fasta. Using the original file in the original location is
// undesirable because an accompanying dictionary already exists in the same directory. So we copied it to picard/references
// where the dictionary does not exist.
final PicardHtsPath HG19_MINI = PicardHtsPath.resolve(GCloudTestUtils.TEST_INPUTS_DEFAULT, "picard/references/hg19mini.fasta");
final PicardHtsPath HG19_MINI = PicardHtsPath.resolve(GCloudTestUtils.TEST_INPUTS_DEFAULT_GCLOUD, "picard/references/hg19mini.fasta");
final PicardHtsPath HG19_MINI_LOCAL = new PicardHtsPath("testdata/picard/reference/hg19mini.fasta");

final PicardHtsPath CLOUD_OUTPUT_DIR = PicardHtsPath.resolve(GCloudTestUtils.TEST_STAGING_DEFAULT, "picard/");
final PicardHtsPath CLOUD_OUTPUT_DIR = PicardHtsPath.resolve(GCloudTestUtils.TEST_STAGING_DEFAULT_GCLOUD, "picard/");

@DataProvider
public Object[][] cloudTestData() {
Expand All @@ -310,7 +310,7 @@ public void testCloud(final PicardHtsPath inputReference) {
};

// This is the "original" dictionary that lives in gs://hellbender/test/resources/
final PicardHtsPath expectedOutputPath = PicardHtsPath.resolve(GCloudTestUtils.TEST_INPUTS_DEFAULT, "hg19mini.dict");
final PicardHtsPath expectedOutputPath = PicardHtsPath.resolve(GCloudTestUtils.TEST_INPUTS_DEFAULT_GCLOUD, "hg19mini.dict");
Assert.assertEquals(runPicardCommandLine(argv), 0);
final SAMSequenceDictionary expectedDictionary = SAMSequenceDictionaryExtractor.extractDictionary(expectedOutputPath.toPath());
final SAMSequenceDictionary actualDictionary = SAMSequenceDictionaryExtractor.extractDictionary(output.toPath());
Expand Down
16 changes: 8 additions & 8 deletions src/test/java/picard/sam/DownsampleSamTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -252,27 +252,27 @@ public void testRepeatedDownsampling(List<Strategy> strategies, List<Integer> se
@DataProvider(name="testCloudBamDataProvider")
public Object[][] testCloudBamDataProvider() {
return new Object[][] {
{NA12878_MINI, true, true},
{NA12878_MINI, true, false},
{NA12878_MINI, false, true},
{NA12878_MINI, false, false},
{NA12878_MINI_GCLOUD, true, true},
{NA12878_MINI_GCLOUD, true, false},
{NA12878_MINI_GCLOUD, false, true},
{NA12878_MINI_GCLOUD, false, false},
};
}

@DataProvider(name="testCloudCramDataProvider")
public Object[][] testCloudCramDataProvider() {
return new Object[][] {
{NA12878_MINI_CRAM, true, HG19_CHR2021_GCLOUD}
{NA12878_MINI_CRAM_GCLOUD, true, HG19_CHR2021_GCLOUD}
};
}

@Test(groups = "cloud", dataProvider = "testCloudBamDataProvider")
public void testCloudBam(final PicardHtsPath inputSAM, final boolean outputInCloud, final boolean createMetrics) throws IOException {
final Optional<PicardHtsPath> output = outputInCloud ?
Optional.of(PicardBucketUtils.getTempFilePath(GCloudTestUtils.TEST_OUTPUT_DEFAULT + "downsample", ".bam")) :
Optional.of(PicardBucketUtils.getTempFilePath(GCloudTestUtils.TEST_OUTPUT_DEFAULT_GCLOUD + "downsample", ".bam")) :
Optional.empty();
final Optional<PicardHtsPath> metricsFile = createMetrics ?
Optional.of(PicardBucketUtils.getTempFilePath(GCloudTestUtils.TEST_OUTPUT_DEFAULT + "metrics", ".txt")) :
Optional.of(PicardBucketUtils.getTempFilePath(GCloudTestUtils.TEST_OUTPUT_DEFAULT_GCLOUD + "metrics", ".txt")) :
Optional.empty();
testDownsampleWorker(inputSAM, 0.5, ConstantMemory.toString(), DEFAULT_RANDOM_SEED, output, metricsFile, Optional.empty());
}
Expand All @@ -281,7 +281,7 @@ public void testCloudBam(final PicardHtsPath inputSAM, final boolean outputInClo
@Test(groups = "cloud", dataProvider = "testCloudCramDataProvider")
public void testCloudCram(final PicardHtsPath inputCRAM, final boolean outputInCloud, final PicardHtsPath reference) throws IOException {
final Optional<PicardHtsPath> output = outputInCloud ?
Optional.of(PicardBucketUtils.getTempFilePath(GCloudTestUtils.TEST_OUTPUT_DEFAULT + "downsample", ".cram")) :
Optional.of(PicardBucketUtils.getTempFilePath(GCloudTestUtils.TEST_OUTPUT_DEFAULT_GCLOUD + "downsample", ".cram")) :
Optional.empty();
testDownsampleWorker(inputCRAM, 0.5, ConstantMemory.toString(), DEFAULT_RANDOM_SEED, output, Optional.empty(), Optional.of(reference));
}
Expand Down
Loading

0 comments on commit 31cae75

Please sign in to comment.