Skip to content

Commit

Permalink
set script
Browse files Browse the repository at this point in the history
  • Loading branch information
florianthiery committed Aug 14, 2023
1 parent ed57b3a commit a751973
Show file tree
Hide file tree
Showing 12 changed files with 1,168 additions and 21 deletions.
File renamed without changes.
File renamed without changes.
Binary file added csv/_private/cifindspots.ods
Binary file not shown.
Binary file removed csv/cifindspots.ods
Binary file not shown.
18 changes: 18 additions & 0 deletions csv/cifindspots_part.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"id","label","desc","literature","wkt","certainty","certaintyinfo","relatedto","relatedtohow","source","sourcetype","spatialtype","methodtype","agent","methoddesc"
5,"Castelcivita Cave","CI findspot related from literature","Fedele et al., 2008;Giaccio et al., 2008","POINT(15.2092 40.4956)","fsl:high","TODO","http://wikidata.org/entity/Q3777120;http://openstreetmap.org/node/337519639","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:Cave","fsl:Georeferencing","http://orcid.org/0000-0002-3246-3531","set a representative point based on scientific papers using OSM Node 337519639"
6,"Copecchia","CI findspot related from literature","Rosi et al., 1999","POINT(14.7662 40.7198)","fsl:medium","TODO","http://wikidata.org/entity/Q3690987;http://openstreetmap.org/node/4371325110","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:UnknownCategory","fsl:Georeferencing","http://orcid.org/0000-0002-3246-3531","set a representative point based on scientific papers using OSM Node 4371325110"
10,"Marina di Cassano (Naples)","CI findspot related from literature","Civetta et al., 1997","POINT(14.3998 40.6385)","fsl:medium","TODO","http://sws.geonames.org/11961630;http://openstreetmap.org/node/566221732","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:Bight ","fsl:Georeferencing","http://orcid.org/0009-0008-2877-3204","set a representative point based on scientific papers using Google Maps"
19,"Paglicci Cave","CI findspot related from literature","Giaccio et al., 2008","POINT(15.6150 41.6541)","fsl:medium","TODO","http://wikidata.org/entity/Q3777010;http://openstreetmap.org/node/2293681037","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:Cave;fsl:ArchaeologicalSite","fsl:Georeferencing","http://orcid.org/0000-0002-3246-3531","set a representative point based on scientific papers using OSM Node 2293681037"
27,"Sant‘ Agata dei due Golfi","CI findspot related from literature","Civetta et al., 1997","POINT(14.3733 40.6070)","fsl:medium","TODO","http://wikidata.org/entity/Q1860399;http://openstreetmap.org/node/705576753 ","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:InhabitedPlace","fsl:Georeferencing","http://orcid.org/0000-0002-3246-3531","set a representative point based on scientific papers using OSM Node 705576753"
28,"Sant‘ Angelo a Scala","CI findspot related from literature","Rosi et al., 1999","POINT(14.7402 40.9746)","fsl:medium","TODO","http://wikidata.org/entity/Q55100;http://openstreetmap.org/node/68567290","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:InhabitedPlace","fsl:Georeferencing","http://orcid.org/0000-0002-3246-3531","set a representative point based on scientific papers using OSM Node 68567290"
42,"Kozarnika Cave (Bulgaria)","CI findspot related from literature","Lowe et al., 2012","POINT(22.7024 43.6519)","fsl:high","TODO","http://wikidata.org/entity/Q2037533;http://openstreetmap.org/node/8814442373","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:Cave","fsl:Georeferencing","http://orcid.org/0009-0008-2877-3204","set a representative point based on scientific papers using Google Maps"
43,"Temnata Cave (Bulgaria)","CI findspot related from literature","Fedele et al., 2008, 2003;Fitzsimmons et al., 2013;Giaccio et al., 2008","POINT(23.3848 43.0892)","fsl:high","TODO","http://sws.geonames.org/726486;http://openstreetmap.org/node/369821847;http://wikidata.org/entity/Q12296153","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:Cave","fsl:Georeferencing","http://orcid.org/0009-0008-2877-3204","set a representative point based on scientific papers using Google Maps"
44,"Toplitsa Cave (Bulgaria)","CI findspot related from literature","Tsanova et al., 2021","POINT(24.0053 43.1907)","fsl:high","TODO","http://wikidata.org/entity/Q61788595;http://openstreetmap.org/node/11109379095","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:Cave;fsl:ArchaeologicalSite","fsl:Georeferencing","http://orcid.org/0000-0002-3246-3531","set a representative point based on scientific papers using Tsanova et al., 2021, p.2"
45,"Franchthi Cave (Greece)","CI findspot related from literature","Fedele et al., 2003","POINT(23.1311 37.4226)","fsl:high","TODO","http://wikidata.org/entity/Q1441331;http://openstreetmap.org/node/1221172611","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:Cave;fsl:ArchaeologicalSite","fsl:Georeferencing","http://orcid.org/0000-0002-3246-3531","set a representative point based on scientific papers using OSM Node 1221172611"
48,"Susak Island (Greece)","CI findspot related from literature","Wacha, 2011","POINT(14.2921 44.5104)","fsl:medium","TODO","http://sws.geonames.org/3189468;http://openstreetmap.org/relation/9854999","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:Island","fsl:Georeferencing","http://orcid.org/0009-0008-2877-3204","set a representative point based on scientific papers using Google Maps"
50,"Golema Pesht Cave near Zdunje (Macedonia)","CI findspot related from literature","Lowe et al., 2012","POINT(21.1617 41.8117)","fsl:dubious","TODO","http://wikidata.org/entity/Q20565511;http://openstreetmap.org/node/11107939919","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:Cave;fsl:ArchaeologicalSite","fsl:Georeferencing","http://orcid.org/0009-0008-2877-3204","set a representative point based on scientific papers using Google Maps"
51,"Crvena Stiljena (Montenegro)","CI findspot related from literature","Morley & Woodward, 2011","POINT(18.4815 42.7790) ","fsl:high","TODO","http://openstreetmap.org/node/10879170567;http://wikidata.org/entity/Q121418883","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:Cave;fsl:ArchaeologicalSite","fsl:Georeferencing","http://orcid.org/0009-0008-2877-3204","set a representative point based on scientific papers using Google Maps"
52,"Urluia (Romania)","CI findspot related from literature","Fitzsimmons et al., 2014, 2013;Obreht et al., 2017;Pötter et al., 2021","POINT(27.9021 44.0947)","fsl:medium","TODO","http://sws.geonames.org/664132","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:InhabitedPlace","fsl:Georeferencing","http://orcid.org/0009-0008-2877-3204","set a representative point based on scientific papers using Google Maps"
54,"Vlasca (Romania)","CI findspot related from literature","Obreht et al., 2017","POINT(27.8491 44.3921)","fsl:medium","TODO","http://sws.geonames.org/662562","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:UnknownCategory","fsl:Georeferencing","http://orcid.org/0009-0008-2877-3204","set a representative point based on scientific papers using Google Maps"
55,"Lower Danube Basin (Romania)","CI findspot related from literature","Obreht et al., 2017","POINT(25.2920 43.6550)","fsl:low","TODO","http://sws.geonames.org/791630","fsl:spatialCloseMatch","fsl:PaperDesc","fsl:Paper","fsl:Plateau","fsl:Georeferencing","http://orcid.org/0000-0002-3246-3531","set a representative point based on scientific papers using Google Maps and Obdreht et al (2017), Fig. 1"
65,"Haua-Fteah (Libya)","CI findspot related from literature","Lowe et al., 2012","POINT(22.0516 32.9002)","fsl:high","TODO","http://wikidata.org/entity/Q25226810;http://openstreetmap.org/node/7778324735","skos:closeMatch","fsl:PaperDesc","fsl:Paper","fsl:Cave;fsl:ArchaeologicalSite","fsl:Georeferencing","http://orcid.org/0000-0002-3246-3531","set a representative point based on scientific papers using OSM Node 7778324735"
Binary file added csv/cifindspots_vortrag.ods
Binary file not shown.
1 change: 0 additions & 1 deletion data/README.md

This file was deleted.

2 changes: 0 additions & 2 deletions data/italy.csv

This file was deleted.

12 changes: 0 additions & 12 deletions ontology/README.md

This file was deleted.

253 changes: 253 additions & 0 deletions py/CI.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
__author__ = "Florian Thiery"
__copyright__ = "MIT Licence 2023, Florian Thiery"
__credits__ = ["Florian Thiery"]
__license__ = "MIT"
__version__ = "beta"
__maintainer__ = "Florian Thiery"
__email__ = "mail@fthiery.de"
__status__ = "beta"
__update__ = "2023-08-14"

# import dependencies
import uuid
import requests
import io
import pandas as pd
import os
import codecs
import datetime
import importlib
import sys
import hashlib

# set UTF8 as default
importlib.reload(sys)

# set starttime
starttime = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ")
lines = []

# set paths I
file_name = "cifindspots_part.csv"
dir_path = os.path.dirname(os.path.realpath(__file__))
file_in = dir_path.replace("\\py", "\\csv") + "\\" + file_name

# read csv file
data = pd.read_csv(
file_in,
encoding='utf-8',
sep=',',
usecols=['id', 'label', 'desc', 'certainty', 'certaintyinfo', 'relatedto', 'relatedtohow', 'source',
'sourcetype', 'spatialtype', 'methodtype', 'agent', 'methoddesc', 'literature', 'wkt'],
na_values=['.', '??', 'NULL'] # take any '.' or '??' values as NA
)
print("*****************************************")
print(data.info())

# create triples from dataframe
lineNo = 2
for index, row in data.iterrows():
tmpno = lineNo - 2
lineNo += 1

# agent
agent = str(row['agent'])
agents = agent.split(";")
for i in agents:
tmp_agent_id = i.replace("http://orcid.org/", "")
lines.append("fsld:agent_" + tmp_agent_id +
" " + "rdf:type" + " foaf:Person .")
lines.append("fsld:agent_" + tmp_agent_id +
" " + "rdf:type" + " prov:Agent .")
lines.append("fsld:agent_" + tmp_agent_id +
" " + "skos:exactMatch" + " <" + i + ">.")

# entity (site)
# typing
lines.append("fsld:cisite_" +
str(row['id']) + " " + "rdf:type" + " fsl:Site .")
lines.append("fsld:cisite_" +
str(row['id']) + " " + "rdf:type" + " prov:Entity .")
lines.append("fsld:cisite_" +
str(row['id']) + " " + "rdf:type" + " pleiades:Place .")
lines.append("fsld:cisite_" +
str(row['id']) + " " + "fsl:partOf" + " fsl:CampanianIgnimbriteProject .")
lines.append("fsld:cisite_" +
str(row['id']) + " " + "fsl:siteType" + " fsl:ArchaeologicalSite .")
# metadata
lines.append("fsld:cisite_" +
str(row['id']) + " " + "rdfs:label" + " '" + str(row['label']) + "'@en.")
lines.append("fsld:cisite_" +
str(row['id']) + " " + "skos:prefLabel" + " '" + str(row['label']) + "'@en.")
if str(row['desc']) != 'nan':
lines.append("fsld:cisite_" +
str(row['id']) + " " + "skos:scopeNote" + " '" + str(row['desc']) + "'@en.")
lines.append("fsld:cisite_" +
str(row['id']) + " " + "rdfs:comment" + " '" + str(row['desc']) + "'@en.")
lines.append("fsld:cisite_" + str(row['id']) + " " +
"prov:wasDerivedFrom" + " <https://github.com/Research-Squirrel-Engineers/campanian-ignimbrite-geo> .")
# certainty
lines.append("fsld:cisite_" +
str(row['id']) + " " + "fsl:certaintyLevel" + " " + str(row['certainty']) + ".")
lines.append("fsld:cisite_" +
str(row['id']) + " " + "fsl:certaintyDesc" + " '" + str(row['certaintyinfo']) + "'@en.")
# relations
relatedto = str(row['relatedto'])
relatedtos = relatedto.split(";")
for i in relatedtos:
lines.append("fsld:cisite_" + str(row['id']) +
" " + str(row['relatedtohow']) + " <" + i + ">.")
lines.append("fsld:cisite_" +
str(row['id']) + " " + "fsl:spatialType" + " " + str(row['spatialtype']) + ".")
# literature
if str(row['literature']) != 'nan':
lit = str(row['literature'])
lits = lit.split(";")
for i in lits:
lines.append("fsld:cisite_" +
str(row['id']) + " " + "fsl:hasReference" + " '" + i + "'.")

# site geometry
point = str(row['wkt'])
point = "\"<http://www.opengis.net/def/crs/EPSG/0/4326> " + \
point + "\"^^geosparql:wktLiteral"
lines.append("fsld:cisite_" + str(row['id']) + " " +
"geosparql:hasGeometry" + " fsld:cisite_" + str(row['id']) + "_geom .")
lines.append("fsld:cisite_" +
str(row['id']) + "_geom " + "rdf:type" + " sf:Point .")
lines.append("fsld:cisite_" +
str(row['id']) + "_geom " + "geosparql:asWKT " + point + ".")
lines.append("fsld:cisite_" +
str(row['id']) + "_geom " + "fsl:certaintyLevel" + " " + str(row['certainty']) + ".")
lines.append("fsld:cisite_" +
str(row['id']) + "_geom " + "fsl:certaintyDesc" + " '" + str(row['certaintyinfo']) + "'@en.")

# activity
# metadata
lines.append("fsld:cisite_" +
str(row['id']) + "_activity " + "rdf:type" + " prov:Activity .")
lines.append("fsld:cisite_" +
str(row['id']) + "_activity " + "rdf:type" + " " + str(row['methodtype']) + ".")
lines.append("fsld:cisite_" +
str(row['id']) + "_activity " + "prov:startedAtTime '" + starttime + "'^^xsd:dateTime .")
lines.append("fsld:cisite_" +
str(row['id']) + "_activity " + "prov:endedAtTime '" +
datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ") + "'^^xsd:dateTime .")
# activity data
lines.append("fsld:cisite_" +
str(row['id']) + "_activity " + "fsl:hasSource" + " " + str(row['source']) + ".")
lines.append("fsld:cisite_" +
str(row['id']) + "_activity " + "fsl:hasSourceType" + " " + str(row['sourcetype']) + ".")
if str(row['literature']) != 'nan':
for i in lits:
lines.append(
"fsld:cisite_" + str(row['id']) + "_activity " + "fsl:hasReference" + " '" + i + "'.")
lines.append("fsld:cisite_" +
str(row['id']) + "_activity " + "fsl:activityDesc" + " '" + str(row['methoddesc']) + "'@en.")
lines.append("fsld:cisite_" +
str(row['id']) + "_activity " + "fsl:certaintyLevel" + " " + str(row['certainty']) + ".")
lines.append("fsld:cisite_" +
str(row['id']) + "_activity " + "fsl:certaintyDesc" + " '" + str(row['certaintyinfo']) + "'@en.")

# image?
if str(row['relatedto']) != 'nan':
for i in relatedtos:
if "png" in i:
lines.append("fsld:cisite_" +
str(row['id']) + "_activity " + "fsl:image" + " <" + i + ">.")
if "jpg" in i:
lines.append("fsld:cisite_" +
str(row['id']) + "_activity " + "fsl:image" + " <" + i + ">.")

# prov-o model
lines.append("fsld:cisite_" + str(row['id']) + " " + "prov:wasGeneratedBy" +
" " + "fsld:cisite_" + str(row['id']) + "_activity.")
lines.append("fsld:cisite_" +
str(row['id']) + "_activity " + "prov:used " + "fsld:cisite_" + str(row['id']) + ".")
for i in agents:
lines.append(
"fsld:cisite_" + str(row['id']) + " " + "prov:wasAttributedTo" + " <" + i + ">.")
lines.append(
"fsld:cisite_" + str(row['id']) + "_activity " + "prov:wasAssociatedWith" + " <" + i + ">.")

# license
lines.append("fsld:cisite_" + str(row['id']) + " " + "dct:license" +
" <" + "https://creativecommons.org/licenses/by/4.0/" + "> .")
lines.append("fsld:cisite_" + str(row['id']) + " " + "dct:creator" +
" <" + "https://orcid.org/0000-0002-3246-3531" + "> .")
lines.append("fsld:cisite_" + str(row['id']) + " " + "dct:creator" +
" <" + "https://orcid.org/0000-0003-1100-6494" + "> .")
lines.append("fsld:cisite_" + str(row['id']) + " " + "dct:rightsHolder" +
" <" + "https://orcid.org/0000-0002-3246-3531" + "> .")
lines.append("fsld:cisite_" + str(row['id']) + " " + "dct:rightsHolder" +
" <" + "https://orcid.org/0000-0003-1100-6494" + "> .")

# prov-o for script
lines.append("fsld:cisite_" + str(row['id']) + " " +
"prov:wasAttributedTo" + " <https://github.com/Research-Squirrel-Engineers/campanian-ignimbrite-geo/blob/main/py/CI.py> .")
lines.append("fsld:cisite_" + str(row['id']) + " " +
"prov:wasDerivedFrom" + " <https://github.com/Research-Squirrel-Engineers/campanian-ignimbrite-geo> .")
lines.append("fsld:cisite_" + str(row['id']) + " " +
"prov:wasGeneratedBy" + " fsld:cisite_" + str(row['id']) + "_pyscript .")
lines.append("fsld:cisite_" +
str(row['id']) + "_pyscript " + "rdf:type" + " <http://www.w3.org/ns/prov#Activity> .")
lines.append("fsld:cisite_" +
str(row['id']) + "_pyscript " + "prov:startedAtTime '" + starttime + "'^^xsd:dateTime .")
lines.append("fsld:cisite_" +
str(row['id']) + "_pyscript " + "prov:endedAtTime '" +
datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ") + "'^^xsd:dateTime .")
lines.append("fsld:cisite_" +
str(row['id']) + "_pyscript " + "prov:wasAssociatedWith" + " <https://github.com/Research-Squirrel-Engineers/campanian-ignimbrite-geo/blob/main/py/CI.py> .")

lines.append("")

files = (len(lines) / 100000) + 1
print("triples", len(lines), "files", int(files))
thiscount = len(lines)

# write output files
f = 0
step = 100000
prefixes = ""
prefixes += "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\r\n"
prefixes += "@prefix owl: <http://www.w3.org/2002/07/owl#> .\r\n"
prefixes += "@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\r\n"
prefixes += "@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\r\n"
prefixes += "@prefix geosparql: <http://www.opengis.net/ont/geosparql#> .\r\n"
prefixes += "@prefix dc: <http://purl.org/dc/elements/1.1/> .\r\n"
prefixes += "@prefix dct: <http://purl.org/dc/terms/> .\r\n"
prefixes += "@prefix sf: <http://www.opengis.net/ont/sf#> .\r\n"
prefixes += "@prefix prov: <http://www.w3.org/ns/prov#> .\r\n"
prefixes += "@prefix foaf: <http://xmlns.com/foaf/0.1/> .\r\n"
prefixes += "@prefix skos: <http://www.w3.org/2004/02/skos/core#> .\r\n"
prefixes += "@prefix pleiades: <https://pleiades.stoa.org/places/vocab#> .\r\n"
prefixes += "@prefix wikidata: <http://wikidata.org/entity/> .\r\n"
prefixes += "@prefix osmn: <http://openopenstreetmap.org/node/> .\r\n"
prefixes += "@prefix osmr: <http://openopenstreetmap.org/relation/> .\r\n"
prefixes += "@prefix osmw: <http://openopenstreetmap.org/way/> .\r\n"
prefixes += "@prefix fsl: <http://archaeoinformatics.link/ontology#> .\r\n"
prefixes += "@prefix fsld: <http://fuzzy-sl.squirrel.link/data/> .\r\n"
prefixes += "\r\n"

for x in range(1, int(files) + 1):
strX = str(x)
filename = dir_path.replace("\\py", "\\rdf") + \
"\\" + "ci.ttl"
file = codecs.open(filename, "w", "utf-8")
file.write(
"# create triples from https://github.com/Research-Squirrel-Engineers/campanian-ignimbrite-geo \r\n")
file.write(
"# on " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M") + "\r\n\r\n")
file.write(prefixes)
i = f
for i, line in enumerate(lines):
if (i > f - 1 and i < f + step):
file.write(line)
file.write("\r\n")
f = f + step
print(" > ci.ttl")
file.close()

print("*****************************************")
print("SUCCESS: closing script")
print("*****************************************")
6 changes: 0 additions & 6 deletions py/README.md

This file was deleted.

Loading

0 comments on commit a751973

Please sign in to comment.