Skip to content

Commit

Permalink
Merge pull request #20 from dufeiyu/error_checking
Browse files Browse the repository at this point in the history
Add more samplesheet error-checking
  • Loading branch information
dufeiyu authored Dec 21, 2023
2 parents fe23b74 + 8851e01 commit 690cc2d
Showing 1 changed file with 58 additions and 4 deletions.
62 changes: 58 additions & 4 deletions scripts/launcher.pl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@

my $conf = File::Spec->join($git_dir, 'application.conf');
my $wdl = File::Spec->join($git_dir, 'Soma.wdl');
#my $json_template = File::Spec->join($git_dir, 'Soma.json');
my $json_template = File::Spec->join($git_dir, 'Soma.json');

my $group = '/cle/wdl/tcp';
Expand All @@ -52,22 +51,73 @@

#parse sample spreadsheet
my $data = Spreadsheet::Read->new($sample_sheet);
my $sheet = $data->sheet(1);

#error-checking
my $qc = 'QC Metrics';
my $sheet = $data->sheet($qc);
die "$qc is not a valid sheet in $sample_sheet" unless $sheet;

my $flag = 0;
my %qc_samples;
for my $row ($sheet->rows()) {
if ($row->[0] =~ /\sNUMBER/) {
unless ($row->[0] =~ /^ACCESSION\sNUMBER/ and $row->[2] =~ /^SAMPLE\sID/) {
die "No ACCESSION NUMBER and or SAMPLE ID as the first and third column header for $qc sheet";
}
$flag = 1;
}
else {
my $acc = $row->[0];
$acc =~ s/\s+//g;
die "$acc not starting with G" unless $acc =~ /^G/;
unless ($acc =~ /^G\-/) {
unless ($acc =~ /^G\d+\-\d+$/) {
die "$acc is not a valid accession number in $qc sheet";
}
}
my $sample = $row->[2];
$sample =~ s/\s+//g;
if ($qc_samples{$sample}) {
die "There are multiple $sample in $qc sheet";
}
else {
$qc_samples{$sample} = 1;
}
}
}
die "There is no row for valid column headers in $qc sheet" unless $flag;

my $ss = 'Samplesheet';
$sheet = $data->sheet($ss);
die "$ss is not a valid sheet in $sample_sheet" unless $sheet;

my $ds_str;
my $si_str;
my $seq_id = 2900000000;

my @cases_excluded;

my %samples;
for my $row ($sheet->rows()) {
next if $row->[0] =~ /Run|Lane/i;
unless ($row->[0] =~ /\d+/) {
die "Lane number is expected, Check sample sheet spreadsheet";
die "Lane number is expected, Check $ss sheet";
}
my ($lane, $flowcell, $name, $index, $exception) = @$row;

$name =~ s/\s+//g;
if ($samples{$name}) {
die "There are multiple $name in $ss sheet";
}
else {
if ($qc_samples{$name}) {
delete $qc_samples{$name};
}
else {
die "$name in $ss sheet does not exist in $qc sheet";
}
$samples{$name} = 1;
}

my $lib = $name;
$lib .= '-lib1' unless $lib =~ /lib/;

Expand All @@ -82,6 +132,10 @@

$seq_id++;
}
if (%qc_samples) {
my $sample_str = join ",", sort keys %qc_samples;
die "$sample_str in $qc sheet but not in $ss sheet";
}

## DRAGEN sample sheet
my $dragen_ss = File::Spec->join($out_dir, 'demux_sample_sheet.csv');
Expand Down

0 comments on commit 690cc2d

Please sign in to comment.