Skip to content

Commit

Permalink
update results for 2019-10-24
Browse files Browse the repository at this point in the history
  • Loading branch information
nickp60 committed Oct 24, 2019
1 parent 630ae96 commit bc8a42f
Show file tree
Hide file tree
Showing 14 changed files with 235,459 additions and 54 deletions.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added plots/2019-10-24-Complete-results-totals.pdf
Binary file not shown.
Binary file added plots/2019-10-24-Draft-results-byyear.pdf
Binary file not shown.
Binary file added plots/2019-10-24-Draft-results-totals.pdf
Binary file not shown.
Binary file added plots/2019-10-24-results-byyear.pdf
Binary file not shown.
Binary file added plots/2019-10-24-results-totals.pdf
Binary file not shown.
Binary file modified results-byyear.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed results/2017-10-16-results-byyear.jpeg
Binary file not shown.
Binary file removed results/2017-10-16-results-byyear.png
Binary file not shown.
2 changes: 1 addition & 1 deletion scripts/parse_results.R
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ print(paste("writing", nrow(bad_hits), "dodgy rows to parsing_errors.txt"))
write.table(row.names = F, col.names = T, bad_hits, sep = "\t",
file = file.path(results_path, paste0("parsing_errors.txt")))


db <- db[grepl("^SAM.*", db$BioSample.Accession), ]
all_biosamples <- merge(db[, c("BioSample.Accession", "Assembly.Accession", "Status", "nuccore_first_chrom", "WGS", "Release.Date", "Modify.Date")],
fixedhits, by.x="BioSample.Accession", by.y="biosample",
all.x = T)
Expand Down
103 changes: 50 additions & 53 deletions scripts/plot_results.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,19 @@ library(ggplot2)
library(dplyr)
###########################

ggplot2::theme_set(ggplot2::theme_minimal() + ggplot2::theme(
rect = element_rect(fill = "transparent"),
#plot.background = element_rect(fill = "#FAFAFA", color=NA),
plot.background = element_rect(fill = "transparent", color=NA),
#axis.text = element_text(size=12),
#axis.title = element_text(size=16),
panel.grid.minor.x = element_blank(),
#title = element_text(size=20),
# legend.text = element_text(size=12),
#plot.subtitle = element_text(size=12, colour = "grey60")
plot.subtitle = element_text(colour = "grey60")
)
ggplot2::theme_set(
ggplot2::theme_minimal() + ggplot2::theme(
rect = element_rect(fill = "transparent"),
#plot.background = element_rect(fill = "#FAFAFA", color=NA),
plot.background = element_rect(fill = "transparent", color=NA),
#axis.text = element_text(size=12),
#axis.title = element_text(size=16),
panel.grid.minor.x = element_blank(),
#title = element_text(size=20),
# legend.text = element_text(size=12),
#plot.subtitle = element_text(size=12, colour = "grey60")
plot.subtitle = element_text(colour = "grey60")
)
)


Expand All @@ -23,33 +24,27 @@ ggplot2::theme_set(ggplot2::theme_minimal() + ggplot2::theme(
args = commandArgs(T)

# test args
#args=c("sraFind.tab", "./tmp_results/", "All")
# args=c("sraFind.tab", "./tmp_results/")
# setwd("~/GitHub/sraFind")
print("Note that 'Complete Genome' and 'Chromosome' level assemblies includes results for any with at least 1 chromosomal replicon, as these end up being used interchangably for microbes")
print("Note that 'Contig' and 'Scaffold' are grouped together as 'Draft'")

if( length(args) != 3 ){
stop('USAGE: Rscript plot_results.R sraFind-CompleteGenome-biosample-with-SRA-hits.txt ./plots/ <All, Complete, Draft>')
if( length(args) != 2 ){
stop('USAGE: Rscript plot_results.R sraFind.tab ./plots/ ')
}
parsed_hits_file <- args[1]
plot_dir <- args[2]
level <- args[3]
if (!dir.exists(plot_dir)) dir.create(plot_dir)

all_statuses <- c("Complete", "Draft", "All")

# level <- gsub("(.*)sraFind-(.*?)-biosample.*", "\\2", parsed_hits_file)
if (! level %in% all_statuses) {
stop("Level must be one of Complete, Draft, All")
}
print("reading parsed hits")
results <- read.csv(parsed_hits_file, header=T, sep="\t", stringsAsFactors=FALSE)

results$lev <- ifelse(results$Status %in% c("Scaffold", "Contig"), "Draft", "Complete")
results$exist <- ifelse(is.na(results$run_SRAs), "No", "Yes")
table(results$exist)

results$createDate <- as.Date(strftime(results[, "Release.Date"]))
results$updateDate <- as.Date(strftime(results[, "Modify.Date"]))
results$createDate <- as.Date(strftime(gsub("-", NA, results$Release.Date)))
results$updateDate <- as.Date(strftime(gsub("-", NA, results$Modify.Date)))
results <- results %>%
mutate(month=format(createDate, "%Y-%m"),
year=format(createDate, "%Y")) %>%
Expand All @@ -62,34 +57,36 @@ results <- results %>%
nmonth_open=sum(exist=="Yes")) %>%
as.data.frame()

str(results)

ptitle <- paste0("'", level,"'-status prokaryotic genomes from NCBI as of ", Sys.Date())
psubtitle <-paste0("From the ", nrow(results), " ", level, "-level assemblies with nuccore entries")
if (level == "Complete"){
psubtitle <- paste0(psubtitle, "\nNote: ", level, " includes both 'Complete Genome' and 'Chromosome' levels")
#str(results)
for (level in c("Draft", "Complete", "All")){
if (level == "All"){
thisdf <- results
} else{
thisdf <- results %>% filter(lev == level)
}
ptitle <- paste0("'", level,"'-status prokaryotic genomes from NCBI as of ", Sys.Date())
psubtitle <-paste0("From the ", nrow(results), " ", level, "-level assemblies with nuccore entries")

}

p_bars <- ggplot(results, aes(exist)) + geom_bar(width = .5) + coord_flip() +
scale_y_continuous(expand=c(0, 0)) +
labs(title=ptitle,
subtitle=psubtitle,
x="SRA Accession Found",
y="Count")
ggsave(p_bars, file=file.path(plot_dir, paste0(Sys.Date(), "-results-totals.pdf")), width = 9, height = 5)

b_byyear <- ggplot(results, aes(x=year, fill=exist)) +
geom_bar(position="dodge") +# coord_flip() +

p_bars <- ggplot(thisdf, aes(exist)) + geom_bar(width = .5) + coord_flip() +
scale_y_continuous(expand=c(0, 0)) +
labs(title=ptitle,
subtitle=psubtitle,
x="SRA Accession Found",
y="Count")
ggsave(p_bars, file=file.path(plot_dir, paste0(Sys.Date(), "-", level, "-results-totals.pdf")), width = 9, height = 5)

b_byyear <- ggplot(thisdf, aes(x=year, fill=exist)) +
geom_bar(position="dodge") +# coord_flip() +
scale_fill_manual(values=c("grey60", "darkgreen"))+
scale_y_continuous(expand=c(0, 0)) +
theme(axis.text.x = element_text(angle=65, hjust = 1))+
labs(title=ptitle,
subtitle=psubtitle,
x="",
y="Number of genomes",
fill="Reads available?")
ggsave(b_byyear, file=file.path(args[2], paste0(Sys.Date(), "-results-byyear.pdf")), width = 9, height = 5)
ggsave(b_byyear, file=file.path("results-byyear.png"), width = 7, height = 5, units = "in", res = 300)


scale_y_continuous(expand=c(0, 0)) +
theme(axis.text.x = element_text(angle=65, hjust = 1))+
labs(title=ptitle,
subtitle=psubtitle,
x="",
y="Number of genomes",
fill="Reads available?")
ggsave(b_byyear, file=file.path(args[2], paste0(Sys.Date(), "-", level, "-results-byyear.pdf")), width = 9, height = 5)
}
ggsave(b_byyear, file=file.path("results-byyear.png"), width = 7, height = 5, units = "in", dpi = 300)
Loading

0 comments on commit bc8a42f

Please sign in to comment.