Skip to content

Commit

Permalink
update alternative names
Browse files Browse the repository at this point in the history
  • Loading branch information
zoometh committed Dec 8, 2024
1 parent 0de8c7e commit 1430f08
Show file tree
Hide file tree
Showing 12 changed files with 249 additions and 91 deletions.
13 changes: 12 additions & 1 deletion R/neo_calib.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' @name neo_calib
#'
#' @description Calibrate radiocarbon dates, calculate tpq / taq and the weighted median
#' @description Calibrate radiocarbon dates, calculate tpq / taq and the weighted median (wmedian)
#'
#' @param df.c14 A dataframe. The original XLSX with neonet columns (SiteName, Period, etc.) with with checked values (see: neo_subset)
#' @param intCal calibration curve
Expand All @@ -27,13 +27,24 @@ neo_calib <- function(df.c14 = NA,
ref.period = "https://raw.githubusercontent.com/zoometh/neonet/main/inst/extdata/periods.tsv",
verbose = TRUE,
verbose.freq = 50){
# df.c14 <- c14_3rdpart
if(verbose){
print(paste0("Calibration"))
}
`%>%` <- dplyr::`%>%`
if(inherits(df.c14, "sf")){
if(verbose){
print(paste0("Reads a 'sf' dataframe"))
}
df.c14 <- sf::st_set_geometry(df.c14, NULL)
}
df.c14$C14Age <- as.numeric(df.c14$C14Age)
df.c14$C14SD <- as.numeric(df.c14$C14SD)
if(verbose){
print(paste0("Data in entry: ", nrow(df.c14)))
df.c14 <- na.omit(df.c14)
print(paste0("Data in entry after removing NA: ", nrow(df.c14)))
}
if(stat.mean){
if(verbose){
print(paste0("Create a column 'mean' to be filled with dates weighted means"))
Expand Down
105 changes: 105 additions & 0 deletions R/neo_dbs_3rdpart_parse.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#' @name neo_dbs_3rdpart_parse
#'
#' @description Collect radiocarbon dates from an external dataset
#'
#' @param l.dbs A vector of radiocarbon datasets listed in c14bazAAR.
#' @param col.c14baz A vector of field names to collect from the `l.dbs` datasets.
#' @param present A date for the present, to calibrate from BP (1950). Default: 1950.
#' @param chr.interval.uncalBC A vector of two BC dates of chronological bounds to subset the radiocarbon dates selection.
#' @param roi A `sf` polygon to subset the radiocarbon dates selection.
#' @param verbose if TRUE (default) then display different messages.
#'
#' @return A dataframe of standardized radiocarbon dates.
#'
#' @examples
#'
#'
#' @export
neo_dbs_3rdpart_parse <- function(file.path = "C:/Rprojects/neonet/doc/references/brami15/db_data/12520_2014_193_MOESM1_ESM.xlsx",
sourcedb = "brami15",
db_period = "EN",
Period = "EN",
db_culture = NA,
colors = NA,
drop.sitenames.equiv.coords = FALSE,
sitenames.equiv = "https://raw.githubusercontent.com/zoometh/neonet/main/inst/extdata/c14_corrected_sitenames.geojson",
col.neonet = c("SiteName", "LabCode", "C14Age", "C14SD"),
col.3rdpart = c("Site", "Lab.no.", "Date.BP", "Interval.BP"),
text.to.rm = c('\\(1\\)|\\(2\\)|\\(3\\)'),
present = 1950,
chr.interval.uncalBC = NA,
roi = NA,
verbose = TRUE){
`%>%` <- dplyr::`%>%`
if (length(col.3rdpart) != length(col.neonet)){
stop("'col.neonet' and 'col.3rdpart' should have the same length for the mapping")
}
c14_3rdpart <- openxlsx::read.xlsx(file.path)
# rename columns
for(i in seq(1, length(colnames(c14_3rdpart)))){
if(colnames(c14_3rdpart)[i] %in% col.3rdpart){
idx <- match(colnames(c14_3rdpart)[i], col.3rdpart)
colnames(c14_3rdpart)[i] <- col.neonet[idx]
}
}
# drop all non used columns
c14_3rdpart <- c14_3rdpart[ , col.neonet]
# remove bad text pattern in the whole dataset
c14_3rdpart <- c14_3rdpart.clean %>%
dplyr::mutate(across(everything(), ~ gsub(text.to.rm, "", .)))
c14_3rdpart <- na.omit(c14_3rdpart)
head(c14_3rdpart)
source("R/neo_calib.R")
df.c14 <- neo_calib(c14_3rdpart,
stat.mean = TRUE)
# Add supp data
df.c14$sourcedb <- sourcedb
df.c14$db_period <- db_period
df.c14$db_culture <- db_culture
df.c14$Period <- Period
df.c14$colors <- colors
df.c14$mean <- NULL

sitenames <- sf::st_read(sitenames.equiv)
sitenames$AlternativeNames <- paste0(sitenames$AlternativeNames, " | ", sitenames$SiteName)
sitenames <- sitenames %>%
tidyr::separate_rows(AlternativeNames, sep = "\\|") # Use double escape for the pipe character
sitenames$AlternativeNames <- trimws(sitenames$AlternativeNames)
coordinates <- as.data.frame(sf::st_coordinates(sitenames))
colnames(coordinates) <- c('lon', 'lat')
sitenames <- sf::st_drop_geometry(sitenames)
sitenames <- cbind(sitenames, coordinates)
sitenames <- sitenames[!is.na(sitenames$AlternativeNames), ]
# Perform a left join to replace SiteName in df.c14 with SiteName from sitenames when there's a match on AlternativeNames
df <- df.c14 %>%
dplyr::left_join(sitenames, by = c("SiteName" = "AlternativeNames")) %>%
dplyr::mutate(SiteName = dplyr::coalesce(SiteName.y, SiteName)) %>%
dplyr::select(-SiteName.y)

#
# source("R/neo_dbs_sitename_dates.R")
# df.c14.coords <- neo_dbs_sitename_dates(df.c14,
# drop.sitenames.equiv.coords = drop.sitenames.equiv.coords)
#
# site.coordinates <- sf::st_read("https://raw.githubusercontent.com/zoometh/neonet/refs/heads/main/inst/extdata/c14_corrected_sitenames.geojson")
#

if(verbose){
print("Reorder the columns")
}
# df.c14 <- df.c14 %>%
# dplyr::select(everything(), tpq, taq)
df.c14.reodered <- df %>%
dplyr::relocate(sourcedb, .before = SiteName) %>%
dplyr::relocate(db_period, .after = C14SD) %>%
dplyr::relocate(db_culture, .after = db_period) %>%
dplyr::relocate(Period, .after = db_culture) %>%
dplyr::relocate(lon, .after = Period) %>%
dplyr::relocate(lat, .after = lon) %>%
dplyr::relocate(colors, .after = lat) %>%
dplyr::relocate(median, .after = colors)
# head(df.c14.reodered)
# head(samp_df)

return(df.c14.reodered)
}
2 changes: 1 addition & 1 deletion R/neo_dbs_parse.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' @name neo_parse_dbs
#' @name neo_dbs_parse
#'
#' @description Collect radiocarbon dates form a list of dbs parsed by the c14bazAAR package, creates missing columns (period or culture), can filter on chronology (time interval) and spatial location (roi)
#'
Expand Down
12 changes: 7 additions & 5 deletions R/neo_dbs_sitename_dates.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#' @description Avoid site names mispelling or false duplicates (ex: Franchthi from one database, and Franchthi Cave from another database) by performing a left join to replace SiteName in df.c14 with SiteName from sitenames when there's a match on AlternativeNames
#'
#' @param df.c14 a dataset of dates
#' @param sitenames.equiv A TSV file listing the equivalences between site names.
#' @param sitenames.equiv A GeoJSON file listing the equivalences between site names.
#' @param verbose if TRUE (default) then display different messages.
#'
#' @return A dataframe of dates
Expand All @@ -15,12 +15,16 @@
#'
#' @export
neo_dbs_sitename_dates <- function(df.c14 = NA,
# sitenames.equiv = "https://raw.githubusercontent.com/zoometh/neonet/main/inst/extdata/c14_corrected_sitenames.tsv",
sitenames.equiv = "https://raw.githubusercontent.com/zoometh/neonet/main/inst/extdata/c14_corrected_sitenames.geojson",
drop.sitenames.equiv.coords = TRUE,
verbose = TRUE){
`%>%` <- dplyr::`%>%`
# sitenames <- read.csv2(sitenames.equiv, sep = "\t")
sitenames <- sf::st_read(sitenames.equiv)
sitenames <- sf::st_drop_geometry(sitenames)
if(drop.sitenames.equiv.coords){
if(verbose){print("Drop coordinates")}
sitenames <- sf::st_drop_geometry(sitenames)
}
#
# df <- data.frame(
# id = 1:3,
Expand All @@ -30,8 +34,6 @@ neo_dbs_sitename_dates <- function(df.c14 = NA,
tidyr::separate_rows(AlternativeNames, sep = "\\|") # Use double escape for the pipe character
sitenames$AlternativeNames <- trimws(sitenames$AlternativeNames)
sitenames <- sitenames[!is.na(sitenames$AlternativeNames), ]

`%>%` <- dplyr::`%>%`
# Perform a left join to replace SiteName in df.c14 with SiteName from sitenames when there's a match on AlternativeNames
df <- df.c14 %>%
dplyr::left_join(sitenames, by = c("SiteName" = "AlternativeNames")) %>%
Expand Down
26 changes: 26 additions & 0 deletions R/neo_sitename_create_geojson.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#' @name neo_sitename_create_geojson
#'
#' @description Creates an sf object of all unique sites from a NeoNet compliant layout
#'
#' @param df.c14 a dataset of dates
#' @param verbose if TRUE (default) then display different messages.
#'
#' @return An sf object that can be exported into a GeoJSON file
#'
#' @examples
#'
#' df <- neo_sitename_create_geojson(df.c14)
#' sf::st_write(df, "C:/Rprojects/neonet/inst/extdata/c14_corrected_sitenames.geojson", driver = "GeoJSON")
#'
#'
#' @export
neo_sitename_create_geojson <- function(df.c14 = NA,
verbose = TRUE){
`%>%` <- dplyr::`%>%`
df <- df.c14 %>%
dplyr::group_by(SiteName) %>%
dplyr::filter(dplyr::row_number() == 1) %>%
dplyr::ungroup()
sf::st_crs(df) <- 4326
return(df)
}
Binary file modified doc/projet_neonet.qgz
Binary file not shown.
Binary file added doc/references/brami15/brami15.tif
Binary file not shown.
29 changes: 29 additions & 0 deletions doc/references/brami15/brami15.tif.aux.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<PAMDataset>
<PAMRasterBand band="1">
<Metadata>
<MDI key="STATISTICS_MINIMUM">0</MDI>
<MDI key="STATISTICS_MAXIMUM">255</MDI>
<MDI key="STATISTICS_MEAN">225.99282031667</MDI>
<MDI key="STATISTICS_STDDEV">27.511417600012</MDI>
<MDI key="STATISTICS_VALID_PERCENT">100</MDI>
</Metadata>
</PAMRasterBand>
<PAMRasterBand band="2">
<Metadata>
<MDI key="STATISTICS_MINIMUM">0</MDI>
<MDI key="STATISTICS_MAXIMUM">255</MDI>
<MDI key="STATISTICS_MEAN">226.05375864258</MDI>
<MDI key="STATISTICS_STDDEV">27.01047309827</MDI>
<MDI key="STATISTICS_VALID_PERCENT">100</MDI>
</Metadata>
</PAMRasterBand>
<PAMRasterBand band="3">
<Metadata>
<MDI key="STATISTICS_MINIMUM">0</MDI>
<MDI key="STATISTICS_MAXIMUM">255</MDI>
<MDI key="STATISTICS_MEAN">225.69710661152</MDI>
<MDI key="STATISTICS_STDDEV">27.897104448097</MDI>
<MDI key="STATISTICS_VALID_PERCENT">100</MDI>
</Metadata>
</PAMRasterBand>
</PAMDataset>
17 changes: 17 additions & 0 deletions doc/references/brami15/brami15.tif.points
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#CRS: GEOGCRS["WGS 84",ENSEMBLE["World Geodetic System 1984 ensemble",MEMBER["World Geodetic System 1984 (Transit)"],MEMBER["World Geodetic System 1984 (G730)"],MEMBER["World Geodetic System 1984 (G873)"],MEMBER["World Geodetic System 1984 (G1150)"],MEMBER["World Geodetic System 1984 (G1674)"],MEMBER["World Geodetic System 1984 (G1762)"],MEMBER["World Geodetic System 1984 (G2139)"],MEMBER["World Geodetic System 1984 (G2296)"],ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]],ENSEMBLEACCURACY[2.0]],PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433]],CS[ellipsoidal,2],AXIS["geodetic latitude (Lat)",north,ORDER[1],ANGLEUNIT["degree",0.0174532925199433]],AXIS["geodetic longitude (Lon)",east,ORDER[2],ANGLEUNIT["degree",0.0174532925199433]],USAGE[SCOPE["Horizontal component of 3D system."],AREA["World."],BBOX[-90,-180,90,180]],ID["EPSG",4326]]
mapX,mapY,sourceX,sourceY,enable,dX,dY,residual
29.08225622080595585,41.21042819160548021,374.56686046511629229,-118.98619186046511231,1,-0.27006456774927301,-0.12743638389811451,0.29862167150866542
26.20995921583869759,40.02780875972160857,247.21947674418612451,-207.48619186046511231,1,1.36105985313491828,2.08892874845051324,2.49321223322818497
23.96089646666622031,39.95514805388312141,142.51162790697679839,-225.49491279069764005,1,0.3235035501665493,-1.02708693152072783,1.07682965777834383
23.33766221087143578,39.17179822699182523,118.58575581395353993,-273.60392441860466306,1,-1.35666161430715704,0.9580407422541839,1.66083497059564866
25.85769637560686718,39.19280229682427574,237.70058139534887687,-264.08502906976741542,1,-0.30114808141800609,-1.62815371687088373,1.65577012072999441
24.00154217900066556,37.66452977235186239,155.88953488372098377,-363.64752906976752911,1,1.24068625842792812,-0.04513903049729606,1.24150711795226032
26.71125633463015348,38.41147990367166898,283.75145348837213533,-305.24781976744196754,1,0.08796884521837001,0.2859641657063321,0.29918893996465196
23.17507936153367254,36.43184065377946723,123.98837209302328688,-441.34229651162797836,1,-0.82018280565709745,-0.42676272595542741,0.92456814727766568
27.78159342610380378,35.87391867658387667,355.78633720930235995,-452.40479651162797836,1,0.59090317927194747,0.63179498606848483,0.86506154214308495
23.54089077254365137,35.26780444103967227,147.65697674418609608,-509.77543604651162923,1,0.06913466223278419,-0.20547151134235264,0.21679055213581541
32.27971892444875834,35.09062093019117157,590.67151162790696617,-466.29723837209303383,1,0.3651545982488642,0.50557554590761811,0.62365416156879405
34.57620167134475508,35.68706413739089101,699.75290697674427065,-408.92659883720932612,1,0.2667138451001847,-0.1282801984942239,0.29595959942844025
30.4100161570644083,36.24630785857106474,485.96366279069775374,-413.30014534883713395,1,-1.5738603944149645,-0.89659430895471814,1.81133042152943924
21.12924517403340374,37.92682865166749195,15.93604651162797836,-355.67223837209297699,1,0.44351132232068835,0.55575192611951252,0.71102918112563684
22.9786250852505276,40.54456340676352255,94.27398255813966443,-192.56468023255814614,1,-0.42671865057603497,-0.54113130697217571,0.68913851882973953
Binary file added doc/references/brami15/brami15_modified.tif
Binary file not shown.
29 changes: 6 additions & 23 deletions doc/talks/2025-caa/run_caa25.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ col.c14baz <- c("sourcedb", "site", "labnr", "c14age", "c14std", "period", "cult
samp_df <- read.csv("https://raw.githubusercontent.com/zoometh/neonet/main/doc/talks/2024-simep/df14_simep_4.csv")
df.c14 <- samp_df

source("R/neo_dbs_3rdpart_parse.R")
df.c14 <- neo_dbs_3rdpart_parse() # Brami15 by default

# correct sitenames
source("R/neo_dbs_sitename_dates.R")
df.c14 <- neo_dbs_sitename_dates(df.c14)
Expand All @@ -28,29 +31,9 @@ source("R/neo_dbs_rm_date.R")
df_filtered <- neo_dbs_rm_date(df.c14 = df.c14,
c14.to.remove = "https://raw.githubusercontent.com/zoometh/neonet/main/inst/extdata/c14_aberrant_dates.tsv")
# to sf
df_filtered <- sf::st_as_sf(df_filtered, coords = c("lon", "lat"), crs = 4326)

# OK sites
first_occurrence <- df_filtered %>%
dplyr::group_by(SiteName) %>%
dplyr::filter(dplyr::row_number() == 1) %>%
dplyr::ungroup()
first_occurrence <- first_occurrence[, "SiteName"]

first_occurrence <- first_occurrence[first_occurrence$SiteName == 'Balma Margineda', ]

sitenames.equiv = "https://raw.githubusercontent.com/zoometh/neonet/main/inst/extdata/c14_corrected_sitenames.tsv"
sitenames <- read.csv2(sitenames.equiv, sep = "\t")
`%>%` <- dplyr::`%>%`
# Perform a left join to replace SiteName in df.c14 with SiteName from sitenames when there's a match on AlternativeNames
df <- first_occurrence %>%
dplyr::left_join(sitenames, by = c("SiteName" = "SiteName")) %>%
# dplyr::rename(
# AlternativeNames = SiteName.y,
# )
sf::st_crs(df) <- 4326
sf::st_write(df, "C:/Rprojects/neonet/inst/extdata/c14_corrected_sitenames.geojson", driver = "GeoJSON")

# df_filtered <- sf::st_as_sf(df_filtered, coords = c("lon", "lat"), crs = "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
df_filtered <- sf::st_as_sf(df_filtered, coords = c("lon", "lat"), crs = 4326) # when GDAL/proj.db will be reinstalled



# isochrones
Expand Down
Loading

0 comments on commit 1430f08

Please sign in to comment.