forked from hiweller/spiderfish
-
Notifications
You must be signed in to change notification settings - Fork 0
/
speciesNames.R
64 lines (51 loc) · 2.86 KB
/
speciesNames.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
args <- commandArgs(trailingOnly = TRUE)
setwd(paste(args[2], '/', args[1], sep=''))
library(rfishbase)
library(jsonlite)
# get list of species in family and species pictures downloaded
species <- species_list(Family = as.character(args[1])) # all species in family
pictures <- fromJSON(paste(args[1], '.json', sep='')) # JSON file for pictures downloaded
jsonSpecies <- unique(pictures$species) # unique species in the pictures downloaded
# find the overlap between species in family and species downloaded
# species in the species list but not the pictures list had no image available
# list as a dataframe
missingSpecies <- paste(args[2], '/', args[1], '/', args[1], '_missingPics.csv', sep="")
missingSpeciesDF <- data.frame(Species = species[(species %in% jsonSpecies)==FALSE])
# correlate picture names with species names
# picture naming convention is :
# first 2 letters of genus +
# first 3 letters of species +
# _ + (u, f, m, j as applicable for unidentified/female/male/juvenile) + number + .jpg
# just list out species and pic URL from JSON file and format as CSV
picSpecies <- pictures$species # list of species names from JSON file
picURL <- unlist(pictures$image) # URLs from JSON file (some duplicates)
picURL <- substr(picURL, nchar(picURL)-11, nchar(picURL)) # get just relevant part of url
allPath <- dir(paste(args[2], '/', args[1], '/All', sep=""), pattern = '*.jpg')
speciesURL <- paste(args[2], '/', args[1], '/', args[1], '_speciesURLs.csv', sep="")
# only keep image urls that got used as permanent URLs in saving
speciesURLDF <- data.frame(Image=picURL, Species=picSpecies)
speciesURLDF <- speciesURLDF[picURL %in% allPath, ]
speciesURLDF <- speciesURLDF[order(speciesURLDF$Image),]
# of species that have a photograph, some probably only have a crummy photo (sorted into 'Fail')
# so let's get a list of species that didn't get a 'Pass' photo
# you'll have to decide whether you want the one(s) in the 'fail' category or not
passPath <- dir(paste(args[2], '/', args[1], '/Pass', sep=""), pattern = '*.jpg')
passSpecies <- speciesURLDF[speciesURLDF$Image %in% passPath,]
failPath <- dir(paste(args[2], '/', args[1], '/Fail', sep=""), pattern = '*.jpg')
failSpecies <- speciesURLDF[speciesURLDF$Image %in% failPath,]
failOnlyDF <- as.character(unique(failSpecies$Species)[(unique(failSpecies$Species) %in% unique(passSpecies$Species))==FALSE])
failOnlyDF <- data.frame(Species=failOnlyDF)
failOnly <- paste(args[2], '/', args[1], '/', args[1], '_failOnly.csv', sep="")
if (!length(species[(species %in% jsonSpecies)==FALSE])==0){
write.csv(x=missingSpeciesDF, row.names=FALSE, file=missingSpecies)
}
write.csv(x=speciesURLDF, row.names=FALSE,
file=speciesURL)
if (dim(failOnlyDF)[1]==0){
} else if (dim(failOnlyDF)[1]==1){
write.csv(x=failOnlyDF, row.names=FALSE,
file=failOnly)
} else {
write.csv(x=failOnlyDF, row.names=FALSE,
file=failOnly)
}