update alternative names

zoometh · Dec 8, 2024 · 1430f08 · 1430f08
1 parent 0de8c7e
commit 1430f08
Show file tree

Hide file tree

Showing 12 changed files with 249 additions and 91 deletions.
diff --git a/R/neo_calib.R b/R/neo_calib.R
@@ -1,6 +1,6 @@
 #' @name neo_calib
 #'
-#' @description Calibrate radiocarbon dates, calculate tpq / taq and the weighted median
+#' @description Calibrate radiocarbon dates, calculate tpq / taq and the weighted median (wmedian)
 #'
 #' @param df.c14 A dataframe. The original XLSX with neonet columns (SiteName, Period, etc.) with with checked values (see: neo_subset)
 #' @param intCal calibration curve
@@ -27,13 +27,24 @@ neo_calib <- function(df.c14 = NA,
                       ref.period = "https://raw.githubusercontent.com/zoometh/neonet/main/inst/extdata/periods.tsv",
                       verbose = TRUE,
                       verbose.freq = 50){
+  # df.c14 <- c14_3rdpart
+  if(verbose){
+    print(paste0("Calibration"))
+  }
   `%>%` <- dplyr::`%>%`
   if(inherits(df.c14, "sf")){
     if(verbose){
       print(paste0("Reads a 'sf' dataframe"))
     }
     df.c14 <- sf::st_set_geometry(df.c14, NULL)
   }
+  df.c14$C14Age <- as.numeric(df.c14$C14Age)
+  df.c14$C14SD  <- as.numeric(df.c14$C14SD)
+  if(verbose){
+    print(paste0("Data in entry: ", nrow(df.c14)))
+    df.c14 <- na.omit(df.c14)
+    print(paste0("Data in entry after removing NA: ", nrow(df.c14)))
+  }
   if(stat.mean){
     if(verbose){
       print(paste0("Create a column 'mean' to be filled with dates weighted means"))

diff --git a/R/neo_dbs_3rdpart_parse.R b/R/neo_dbs_3rdpart_parse.R
@@ -0,0 +1,105 @@
+#' @name neo_dbs_3rdpart_parse
+#'
+#' @description Collect radiocarbon dates from an external dataset
+#'
+#' @param l.dbs A vector of radiocarbon datasets listed in c14bazAAR.
+#' @param col.c14baz A vector of field names to collect from the `l.dbs` datasets.
+#' @param present A date for the present, to calibrate from BP (1950). Default: 1950.
+#' @param chr.interval.uncalBC A vector of two BC dates of chronological bounds to subset the radiocarbon dates selection.
+#' @param roi A `sf` polygon to subset the radiocarbon dates selection.
+#' @param verbose if TRUE (default) then display different messages.
+#'
+#' @return A dataframe of standardized radiocarbon dates.
+#'
+#' @examples
+#'
+#'                    
+#' @export
+neo_dbs_3rdpart_parse <- function(file.path = "C:/Rprojects/neonet/doc/references/brami15/db_data/12520_2014_193_MOESM1_ESM.xlsx",
+                                  sourcedb = "brami15",
+                                  db_period = "EN",
+                                  Period = "EN",
+                                  db_culture = NA,
+                                  colors = NA,
+                                  drop.sitenames.equiv.coords = FALSE,
+                                  sitenames.equiv = "https://raw.githubusercontent.com/zoometh/neonet/main/inst/extdata/c14_corrected_sitenames.geojson",
+                                  col.neonet = c("SiteName", "LabCode", "C14Age", "C14SD"),
+                                  col.3rdpart = c("Site", "Lab.no.", "Date.BP", "Interval.BP"),
+                                  text.to.rm = c('\\(1\\)|\\(2\\)|\\(3\\)'),
+                                  present = 1950, 
+                                  chr.interval.uncalBC = NA, 
+                                  roi = NA,
+                                  verbose = TRUE){
+  `%>%` <- dplyr::`%>%`
+  if (length(col.3rdpart) != length(col.neonet)){
+    stop("'col.neonet' and 'col.3rdpart' should have the same length for the mapping")
+  }
+  c14_3rdpart <- openxlsx::read.xlsx(file.path)
+  # rename columns
+  for(i in seq(1, length(colnames(c14_3rdpart)))){
+    if(colnames(c14_3rdpart)[i] %in% col.3rdpart){
+      idx <- match(colnames(c14_3rdpart)[i], col.3rdpart)
+      colnames(c14_3rdpart)[i] <- col.neonet[idx]
+    }
+  }
+  # drop all non used columns
+  c14_3rdpart <- c14_3rdpart[ , col.neonet]
+  # remove bad text pattern in the whole dataset
+  c14_3rdpart <- c14_3rdpart.clean %>%
+    dplyr::mutate(across(everything(), ~ gsub(text.to.rm, "", .)))
+  c14_3rdpart <- na.omit(c14_3rdpart)
+  head(c14_3rdpart)
+  source("R/neo_calib.R")
+  df.c14 <- neo_calib(c14_3rdpart,
+                      stat.mean = TRUE)
+  # Add supp data
+  df.c14$sourcedb <- sourcedb
+  df.c14$db_period <- db_period
+  df.c14$db_culture <- db_culture
+  df.c14$Period <- Period
+  df.c14$colors <- colors
+  df.c14$mean <- NULL
+
+  sitenames <- sf::st_read(sitenames.equiv)
+  sitenames$AlternativeNames <- paste0(sitenames$AlternativeNames, " | ", sitenames$SiteName)
+  sitenames <- sitenames %>%
+    tidyr::separate_rows(AlternativeNames, sep = "\\|")  # Use double escape for the pipe character
+  sitenames$AlternativeNames <- trimws(sitenames$AlternativeNames)
+  coordinates <- as.data.frame(sf::st_coordinates(sitenames))
+  colnames(coordinates) <- c('lon', 'lat')
+  sitenames <- sf::st_drop_geometry(sitenames)
+  sitenames <- cbind(sitenames, coordinates)
+  sitenames <- sitenames[!is.na(sitenames$AlternativeNames), ]
+  # Perform a left join to replace SiteName in df.c14 with SiteName from sitenames when there's a match on AlternativeNames
+  df <- df.c14 %>%
+    dplyr::left_join(sitenames, by = c("SiteName" = "AlternativeNames")) %>%
+    dplyr::mutate(SiteName = dplyr::coalesce(SiteName.y, SiteName)) %>%
+    dplyr::select(-SiteName.y)
+
+  # 
+  # source("R/neo_dbs_sitename_dates.R")
+  # df.c14.coords <- neo_dbs_sitename_dates(df.c14,
+  #                                         drop.sitenames.equiv.coords = drop.sitenames.equiv.coords)
+  # 
+  # site.coordinates <- sf::st_read("https://raw.githubusercontent.com/zoometh/neonet/refs/heads/main/inst/extdata/c14_corrected_sitenames.geojson")
+  # 
+
+  if(verbose){
+    print("Reorder the columns")
+  }
+  # df.c14 <- df.c14 %>%
+  #   dplyr::select(everything(), tpq, taq)
+  df.c14.reodered <- df %>% 
+    dplyr::relocate(sourcedb, .before = SiteName) %>%
+    dplyr::relocate(db_period, .after = C14SD) %>%
+    dplyr::relocate(db_culture, .after = db_period) %>%
+    dplyr::relocate(Period, .after = db_culture) %>%
+    dplyr::relocate(lon, .after = Period) %>%
+    dplyr::relocate(lat, .after = lon) %>%
+    dplyr::relocate(colors, .after = lat) %>%
+    dplyr::relocate(median, .after = colors)
+  # head(df.c14.reodered)
+  # head(samp_df)
+
+  return(df.c14.reodered)
+}
diff --git a/R/neo_dbs_parse.R b/R/neo_dbs_parse.R
@@ -1,4 +1,4 @@
-#' @name neo_parse_dbs
+#' @name neo_dbs_parse
 #'
 #' @description Collect radiocarbon dates form a list of dbs parsed by the c14bazAAR package, creates missing columns (period or culture), can filter on chronology (time interval) and spatial location (roi)
 #'

diff --git a/R/neo_dbs_sitename_dates.R b/R/neo_dbs_sitename_dates.R
@@ -3,7 +3,7 @@
 #' @description Avoid site names mispelling or false duplicates (ex: Franchthi from one database, and Franchthi Cave from another database) by performing a left join to replace SiteName in df.c14 with SiteName from sitenames when there's a match on AlternativeNames
 #'
 #' @param df.c14 a dataset of dates
-#' @param sitenames.equiv A TSV file listing the equivalences between site names.
+#' @param sitenames.equiv A GeoJSON file listing the equivalences between site names.
 #' @param verbose if TRUE (default) then display different messages.
 #'
 #' @return A dataframe of dates
@@ -15,12 +15,16 @@
 #'
 #' @export
 neo_dbs_sitename_dates <- function(df.c14 = NA,
-                                   # sitenames.equiv = "https://raw.githubusercontent.com/zoometh/neonet/main/inst/extdata/c14_corrected_sitenames.tsv",
                                    sitenames.equiv = "https://raw.githubusercontent.com/zoometh/neonet/main/inst/extdata/c14_corrected_sitenames.geojson",
+                                   drop.sitenames.equiv.coords = TRUE,
                                    verbose = TRUE){
+  `%>%` <- dplyr::`%>%`
   # sitenames <- read.csv2(sitenames.equiv, sep = "\t")
   sitenames <- sf::st_read(sitenames.equiv)
-  sitenames <- sf::st_drop_geometry(sitenames)
+  if(drop.sitenames.equiv.coords){
+    if(verbose){print("Drop coordinates")}
+    sitenames <- sf::st_drop_geometry(sitenames)
+  }
   # 
   # df <- data.frame(
   #   id = 1:3,
@@ -30,8 +34,6 @@ neo_dbs_sitename_dates <- function(df.c14 = NA,
     tidyr::separate_rows(AlternativeNames, sep = "\\|")  # Use double escape for the pipe character
   sitenames$AlternativeNames <- trimws(sitenames$AlternativeNames)
   sitenames <- sitenames[!is.na(sitenames$AlternativeNames), ]
-
-  `%>%` <- dplyr::`%>%`
   # Perform a left join to replace SiteName in df.c14 with SiteName from sitenames when there's a match on AlternativeNames
   df <- df.c14 %>%
     dplyr::left_join(sitenames, by = c("SiteName" = "AlternativeNames")) %>%

diff --git a/R/neo_sitename_create_geojson.R b/R/neo_sitename_create_geojson.R
@@ -0,0 +1,26 @@
+#' @name neo_sitename_create_geojson
+#'
+#' @description Creates an sf object of all unique sites from a NeoNet compliant layout
+#'
+#' @param df.c14 a dataset of dates
+#' @param verbose if TRUE (default) then display different messages.
+#'
+#' @return An sf object that can be exported into a GeoJSON file
+#'
+#' @examples
+#' 
+#' df <- neo_sitename_create_geojson(df.c14)
+#' sf::st_write(df, "C:/Rprojects/neonet/inst/extdata/c14_corrected_sitenames.geojson", driver = "GeoJSON")
+#'
+#'
+#' @export
+neo_sitename_create_geojson <- function(df.c14 = NA,
+                                   verbose = TRUE){
+  `%>%` <- dplyr::`%>%`
+  df <- df.c14 %>% 
+    dplyr::group_by(SiteName) %>% 
+    dplyr::filter(dplyr::row_number() == 1) %>%
+    dplyr::ungroup()
+  sf::st_crs(df) <- 4326
+  return(df)
+}
diff --git a/doc/projet_neonet.qgz b/doc/projet_neonet.qgz
diff --git a/doc/references/brami15/brami15.tif b/doc/references/brami15/brami15.tif
diff --git a/doc/references/brami15/brami15.tif.aux.xml b/doc/references/brami15/brami15.tif.aux.xml
@@ -0,0 +1,29 @@
+<PAMDataset>
+  <PAMRasterBand band="1">
+    <Metadata>
+      <MDI key="STATISTICS_MINIMUM">0</MDI>
+      <MDI key="STATISTICS_MAXIMUM">255</MDI>
+      <MDI key="STATISTICS_MEAN">225.99282031667</MDI>
+      <MDI key="STATISTICS_STDDEV">27.511417600012</MDI>
+      <MDI key="STATISTICS_VALID_PERCENT">100</MDI>
+    </Metadata>
+  </PAMRasterBand>
+  <PAMRasterBand band="2">
+    <Metadata>
+      <MDI key="STATISTICS_MINIMUM">0</MDI>
+      <MDI key="STATISTICS_MAXIMUM">255</MDI>
+      <MDI key="STATISTICS_MEAN">226.05375864258</MDI>
+      <MDI key="STATISTICS_STDDEV">27.01047309827</MDI>
+      <MDI key="STATISTICS_VALID_PERCENT">100</MDI>
+    </Metadata>
+  </PAMRasterBand>
+  <PAMRasterBand band="3">
+    <Metadata>
+      <MDI key="STATISTICS_MINIMUM">0</MDI>
+      <MDI key="STATISTICS_MAXIMUM">255</MDI>
+      <MDI key="STATISTICS_MEAN">225.69710661152</MDI>
+      <MDI key="STATISTICS_STDDEV">27.897104448097</MDI>
+      <MDI key="STATISTICS_VALID_PERCENT">100</MDI>
+    </Metadata>
+  </PAMRasterBand>
+</PAMDataset>
diff --git a/doc/references/brami15/brami15.tif.points b/doc/references/brami15/brami15.tif.points
@@ -0,0 +1,17 @@
+#CRS: GEOGCRS["WGS 84",ENSEMBLE["World Geodetic System 1984 ensemble",MEMBER["World Geodetic System 1984 (Transit)"],MEMBER["World Geodetic System 1984 (G730)"],MEMBER["World Geodetic System 1984 (G873)"],MEMBER["World Geodetic System 1984 (G1150)"],MEMBER["World Geodetic System 1984 (G1674)"],MEMBER["World Geodetic System 1984 (G1762)"],MEMBER["World Geodetic System 1984 (G2139)"],MEMBER["World Geodetic System 1984 (G2296)"],ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]],ENSEMBLEACCURACY[2.0]],PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433]],CS[ellipsoidal,2],AXIS["geodetic latitude (Lat)",north,ORDER[1],ANGLEUNIT["degree",0.0174532925199433]],AXIS["geodetic longitude (Lon)",east,ORDER[2],ANGLEUNIT["degree",0.0174532925199433]],USAGE[SCOPE["Horizontal component of 3D system."],AREA["World."],BBOX[-90,-180,90,180]],ID["EPSG",4326]]
+mapX,mapY,sourceX,sourceY,enable,dX,dY,residual
+29.08225622080595585,41.21042819160548021,374.56686046511629229,-118.98619186046511231,1,-0.27006456774927301,-0.12743638389811451,0.29862167150866542
+26.20995921583869759,40.02780875972160857,247.21947674418612451,-207.48619186046511231,1,1.36105985313491828,2.08892874845051324,2.49321223322818497
+23.96089646666622031,39.95514805388312141,142.51162790697679839,-225.49491279069764005,1,0.3235035501665493,-1.02708693152072783,1.07682965777834383
+23.33766221087143578,39.17179822699182523,118.58575581395353993,-273.60392441860466306,1,-1.35666161430715704,0.9580407422541839,1.66083497059564866
+25.85769637560686718,39.19280229682427574,237.70058139534887687,-264.08502906976741542,1,-0.30114808141800609,-1.62815371687088373,1.65577012072999441
+24.00154217900066556,37.66452977235186239,155.88953488372098377,-363.64752906976752911,1,1.24068625842792812,-0.04513903049729606,1.24150711795226032
+26.71125633463015348,38.41147990367166898,283.75145348837213533,-305.24781976744196754,1,0.08796884521837001,0.2859641657063321,0.29918893996465196
+23.17507936153367254,36.43184065377946723,123.98837209302328688,-441.34229651162797836,1,-0.82018280565709745,-0.42676272595542741,0.92456814727766568
+27.78159342610380378,35.87391867658387667,355.78633720930235995,-452.40479651162797836,1,0.59090317927194747,0.63179498606848483,0.86506154214308495
+23.54089077254365137,35.26780444103967227,147.65697674418609608,-509.77543604651162923,1,0.06913466223278419,-0.20547151134235264,0.21679055213581541
+32.27971892444875834,35.09062093019117157,590.67151162790696617,-466.29723837209303383,1,0.3651545982488642,0.50557554590761811,0.62365416156879405
+34.57620167134475508,35.68706413739089101,699.75290697674427065,-408.92659883720932612,1,0.2667138451001847,-0.1282801984942239,0.29595959942844025
+30.4100161570644083,36.24630785857106474,485.96366279069775374,-413.30014534883713395,1,-1.5738603944149645,-0.89659430895471814,1.81133042152943924
+21.12924517403340374,37.92682865166749195,15.93604651162797836,-355.67223837209297699,1,0.44351132232068835,0.55575192611951252,0.71102918112563684
+22.9786250852505276,40.54456340676352255,94.27398255813966443,-192.56468023255814614,1,-0.42671865057603497,-0.54113130697217571,0.68913851882973953
diff --git a/doc/references/brami15/brami15_modified.tif b/doc/references/brami15/brami15_modified.tif
diff --git a/doc/talks/2025-caa/run_caa25.R b/doc/talks/2025-caa/run_caa25.R
@@ -11,6 +11,9 @@ col.c14baz <- c("sourcedb", "site", "labnr", "c14age", "c14std", "period", "cult
 samp_df <- read.csv("https://raw.githubusercontent.com/zoometh/neonet/main/doc/talks/2024-simep/df14_simep_4.csv")
 df.c14 <- samp_df
 
+source("R/neo_dbs_3rdpart_parse.R")
+df.c14 <- neo_dbs_3rdpart_parse() # Brami15 by default
+
 # correct sitenames
 source("R/neo_dbs_sitename_dates.R")
 df.c14 <- neo_dbs_sitename_dates(df.c14)
@@ -28,29 +31,9 @@ source("R/neo_dbs_rm_date.R")
 df_filtered <- neo_dbs_rm_date(df.c14 = df.c14,
                                c14.to.remove = "https://raw.githubusercontent.com/zoometh/neonet/main/inst/extdata/c14_aberrant_dates.tsv")
 # to sf
-df_filtered <- sf::st_as_sf(df_filtered, coords = c("lon", "lat"), crs = 4326)
-
-# OK sites
-first_occurrence <- df_filtered %>% 
-  dplyr::group_by(SiteName) %>% 
-  dplyr::filter(dplyr::row_number() == 1) %>%
-  dplyr::ungroup()
-first_occurrence <- first_occurrence[, "SiteName"]
-
-first_occurrence <- first_occurrence[first_occurrence$SiteName == 'Balma Margineda', ]
-
-sitenames.equiv = "https://raw.githubusercontent.com/zoometh/neonet/main/inst/extdata/c14_corrected_sitenames.tsv"
-sitenames <- read.csv2(sitenames.equiv, sep = "\t")
-`%>%` <- dplyr::`%>%`
-# Perform a left join to replace SiteName in df.c14 with SiteName from sitenames when there's a match on AlternativeNames
-df <- first_occurrence %>%
-  dplyr::left_join(sitenames, by = c("SiteName" = "SiteName")) %>%
-  # dplyr::rename(
-  #   AlternativeNames = SiteName.y,
-  # )
-sf::st_crs(df) <- 4326
-sf::st_write(df, "C:/Rprojects/neonet/inst/extdata/c14_corrected_sitenames.geojson", driver = "GeoJSON")
-
+# df_filtered <- sf::st_as_sf(df_filtered, coords = c("lon", "lat"), crs = "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
+df_filtered <- sf::st_as_sf(df_filtered, coords = c("lon", "lat"), crs = 4326) # when GDAL/proj.db will be reinstalled
+
 
 
  # isochrones