Merge branch 'main' of https://github.com/LinkedEarth/PyleoTutorials

LinkedEarth · Jul 3, 2024 · 6245d7d · 6245d7d
2 parents dd62de7 + fb4490e
commit 6245d7d
Show file tree

Hide file tree

Showing 4 changed files with 366 additions and 35 deletions.
diff --git a/notebooks/.virtual_documents/L0_working_with_geoseries.ipynb b/notebooks/.virtual_documents/L0_working_with_geoseries.ipynb
@@ -46,6 +46,7 @@ metadata_dict={'time': df['time_values'].iloc[0],
               'elevation': df['geo_meanElev'].iloc[0],    
                'archiveType': df['archiveType'].iloc[0],
                'observationType': df['paleoData_proxy'].iloc[0],
+               'auto_time_params':False
               }
 
 ts = pyleo.GeoSeries(**metadata_dict)
@@ -158,7 +159,7 @@ for _, row in df.iterrows():
 
 
 pages2k = pyleo.MultipleGeoSeries(ts_list,time_unit='year CE')    
-pages2k.stackplot(ylabel_fontsize=0)
+pages2k.stackplot(ylabel_fontsize=2, xlim=[0,2000])
 
 
 
@@ -170,27 +171,27 @@ pages2k.map()
 
 
 
-NAm_coord = {'central_latitude':30, 'central_longitude':-60}
-pages2k.map(projection='Orthographic',proj_default=NAm_coord) 
+NA_coord = {'central_latitude':40, 'central_longitude':-50}
+pages2k.map(projection='Orthographic',proj_default=NA_coord) 
 
 
 
 
 
-pages2k.map(projection='Orthographic', size='elevation', proj_default=NAm_coord) 
+pages2k.map(projection='Orthographic', size='elevation', proj_default=NA_coord) 
 
 
 
 
 
-pages2k.map(projection='Orthographic', hue = 'observationType', proj_default=NAm_coord) 
+pages2k.map(projection='Orthographic', hue = 'observationType', proj_default=NA_coord) 
 
 
 
 
 
 pages2k.map(projection='Orthographic',hue='observationType',
-                       size='elevation', proj_default=NAm_coord, figsize=[18, 8]) 
+                       size='elevation', proj_default=NA_coord, figsize=[18, 8]) 
 
 
 
@@ -209,10 +210,3 @@ ts.map_neighbors(pages2k, radius=5000)
 
 
 %watermark -n -u -v -iv -w
-
-
-ts = pyleo.utils.datasets.load_dataset('EDC-dD')
-fig, ax = ts.map(edgecolor='black')
-
-
-
diff --git a/notebooks/.virtual_documents/L1_working_with_age_ensembles.ipynb b/notebooks/.virtual_documents/L1_working_with_age_ensembles.ipynb
@@ -0,0 +1,280 @@
+
+
+
+%load_ext watermark
+
+import ast
+
+import pyleoclim as pyleo
+import numpy as np
+import pandas as pd
+
+from pylipd.lipd import LiPD
+
+
+
+
+
+
+
+
+#Create a path to the data
+filename = '../data/Crystal.McCabe-Glynn.2013.lpd'
+
+#Initialize the lipd object
+D = LiPD()
+
+#Load the data
+D.load(filename)
+
+
+
+
+
+#Pull the ensemble tables into a dataframe
+ensemble_df = D.get_ensemble_tables()
+ensemble_df
+
+
+
+
+
+#Pull the paleo data into a list. We use all the available data set names because our file only contains one dataset
+df = D.get_timeseries_essentials(D.get_all_dataset_names()[0], mode='paleo')
+df
+
+
+
+
+
+paleoDepth = df['depth_values'].iloc[0]
+paleoValues = df['paleoData_values'].iloc[0]
+paleoTime = df['time_values'].iloc[0]
+
+#It's wise to make sure our units all make sense so we'll pull these as well
+paleo_depth_units = df['depth_units'].iloc[0]
+
+#The stored value name and value unit are horrendously formatted, so we'll hard code them using info from the dataframe
+value_name = 'd18O'
+value_unit = 'permil VPDB'
+
+#We can access the row of interest in our ensemble table via indexing by 0 (because there's just the one row anyway)
+chronDepth = ensemble_df.iloc[0]['ensembleDepthValues']
+chronValues = ensemble_df.iloc[0]['ensembleVariableValues']
+
+#Getting depth units, time name, and time units from our ensemble table
+ensemble_depth_units = ensemble_df.iloc[0]['ensembleDepthUnits']
+
+#The way time name and units are stored in our ensemble dataframe are a bit wonky, so we'll do some organization of our own
+time_name = 'Time'
+time_unit = f'{ensemble_df.iloc[0]["ensembleVariableName"]} {ensemble_df.iloc[0]["ensembleVariableUnits"]}'
+
+
+print(f'Num rows in chronValues: {chronValues.shape[0]}, Length of chronDepth: {len(chronDepth)}')
+
+
+
+
+
+ts = pyleo.Series(time = paleoTime, value = paleoValues, time_name = time_name,
+                 value_name = value_name, time_unit = time_unit,
+                 value_unit = value_unit, label = df['dataSetName'].iloc[0], verbose=False)
+
+
+ensemble = pyleo.EnsembleSeries.from_AgeEnsembleArray(series = ts, age_array = chronValues, value_depth = paleoDepth,
+                                                     age_depth = chronDepth, verbose = False)
+
+
+
+
+
+ensemble.common_time().plot_envelope(figsize=(16,10))
+
+
+
+
+
+vals, headers = ensemble.to_array(axis='time')
+vals
+
+
+
+
+
+
+
+
+#Loading the ensemble tables. Note that column 0 corresponds to depth
+
+ensemble_url = 'https://wiki.linked.earth/wiki/images/7/79/MD982181.Khider.2014.chron1model1ensemble.csv'
+
+ensemble_df = pd.read_csv(ensemble_url,header=None)
+
+ensemble_df
+
+
+
+
+
+url = 'https://www.ncei.noaa.gov/pub/data/paleo/contributions_by_author/khider2014/khider2014-sst.txt'
+
+
+df = pd.read_csv(url, sep = '\t', skiprows=137)
+
+df
+
+
+
+
+
+df.replace(-999.99, np.NaN, inplace=True)
+
+
+df
+
+
+
+
+
+df.columns
+
+
+
+
+
+paleoValues = df['SST '].to_numpy().flatten()
+paleoDepth = df['depth_cm'].to_numpy().flatten()
+age = df['age_calyrBP'].to_numpy().flatten() 
+
+value_name = 'SST'
+value_unit = 'deg C'
+
+time_name = 'Age'
+time_unit = 'Year BP'
+
+
+
+
+
+ensemble_df
+
+
+chronDepth = ensemble_df[0].to_numpy() #The depth values in this case are stored in column 0
+
+chronValues = ensemble_df[ensemble_df.columns.values[1:]].to_numpy() 
+
+
+
+
+
+print(f'Num rows in chronValues: {chronValues.shape[0]}, Length of chronDepth: {len(chronDepth)}')
+
+
+
+
+
+ts = pyleo.GeoSeries(time = age, value = paleoValues, value_name = value_name,
+                  time_name = time_name, value_unit = value_unit, time_unit = time_unit, depth = paleoDepth, lat = 6.45, lon = 125.83)
+
+ensemble = pyleo.EnsembleGeoSeries.from_AgeEnsembleArray(geo_series = ts, age_array = chronValues,
+                                                     age_depth = chronDepth, verbose = False)
+
+
+
+
+
+ensemble.common_time().plot_envelope(figsize=(16,10))
+
+
+
+
+
+D = LiPD()
+
+D.load(['../data/MD982181.Khider.2014.lpd'])
+
+
+query = """
+    PREFIX le: <http://linked.earth/ontology#>
+    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+
+    SELECT ?datasetName ?ensembleTable ?ensembleVariableName ?ensembleVariableValues ?ensembleVariableUnits ?ensembleDepthName ?ensembleDepthValues ?ensembleDepthUnits ?notes ?methodobj ?methods
+    WHERE {
+        ?ds a le:Dataset .
+        ?ds le:hasName ?datasetName .
+            FILTER regex(?datasetName, "[dsname].*", "i").
+
+        ?ds le:hasPaleoData ?paleo .
+        ?paleo le:modeledBy ?model .
+        ?model le:hasEnsembleTable ?ensembleTable .
+            OPTIONAL{?ensembleTable le:notes ?notes .}
+
+        ?ensembleTable le:hasVariable ?ensvar .
+        ?ensvar le:hasName ?ensembleVariableName .
+            FILTER regex(lcase(?ensembleVariableName), "[ensembleVarName].*", "i").
+        ?ensvar le:hasValues ?ensembleVariableValues .
+        OPTIONAL{?ensvar le:hasUnits ?ensembleVariableUnitsObj .
+                ?ensembleVariableUnitsObj rdfs:label ?ensembleVariableUnits .
+                VALUES ?ensembleVariableUnits {"deg C"} .}
+
+        ?ensembleTable le:hasVariable ?ensdepthvar .
+        ?ensdepthvar le:hasName ?ensembleDepthName .
+            FILTER regex(lcase(?ensembleDepthName), "[ensembleDepthVarName].*", "i").
+            FILTER (?ensembleDepthName = 'depth')
+        ?ensdepthvar le:hasValues ?ensembleDepthValues .
+            OPTIONAL{?ensdepthvar le:hasUnits ?ensembleDepthUnitsObj .
+                    ?ensembleDepthUnitsObj rdfs:label ?ensembleDepthUnits.}
+
+       }
+"""
+
+_,ens_df = D.query(query)
+
+ens_df
+
+
+
+
+
+ens_df['ensembleVariableValues'] = ens_df['ensembleVariableValues'].apply(lambda row : np.array(ast.literal_eval(row)))
+ens_df['ensembleDepthValues'] = ens_df['ensembleDepthValues'].apply(lambda row : np.array(ast.literal_eval(row)))
+
+
+paleo_df = D.get_timeseries_essentials()
+paleo_row = paleo_df[paleo_df['paleoData_variableName']=='sst']
+paleo_row
+
+
+paleoValues = ens_df['ensembleVariableValues'][1] #Drop the column that contains depth
+paleoDepth = ens_df['ensembleDepthValues'][1]
+
+value_name = "SST"
+value_unit = "deg C"
+
+chronValues = paleo_row['time_values'].to_numpy()[0]
+chronDepth = paleo_row['depth_values'].to_numpy()[0]
+
+time_name = 'Time'
+time_unit = 'Years BP'
+
+
+
+
+
+ts = pyleo.GeoSeries(time = chronValues, value = paleo_row['paleoData_values'].iloc[0], 
+                     time_name =  time_name, time_unit = time_unit,
+                     value_name = value_name, value_unit = value_unit,
+                     label = 'MD98-2181', archiveType = 'Marine sediment',
+                     depth = chronDepth, depth_name = 'Depth', depth_unit = 'cm', lat = paleo_df['geo_meanLat'].iloc[0],
+                    lon = paleo_df['geo_meanLon'].iloc[0])
+
+ensemble = pyleo.EnsembleGeoSeries.from_PaleoEnsembleArray(geo_series = ts, paleo_array = paleoValues, age_depth = ts.depth, paleo_depth = paleoDepth)
+
+
+ensemble.common_time().spectral().plot_envelope(figsize=(16,10))
+
+
+%watermark -n -u -v -iv -wpy
+
+
+
diff --git a/notebooks/L0_basic_MSES_manipulation.ipynb b/notebooks/L0_basic_MSES_manipulation.ipynb
@@ -135,7 +135,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████████████████████████████████████| 31/31 [00:00<00:00, 126.76it/s]\n"
+      "100%|██████████████████████████████████████████| 31/31 [00:00<00:00, 114.54it/s]\n"
      ]
     },
     {
@@ -187,7 +187,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 3,
    "metadata": {
     "collapsed": false,
     "jupyter": {
@@ -213,7 +213,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/var/folders/xj/p7h9764x7cx0by8547l04rrr0000gn/T/ipykernel_19688/3019538340.py:21: UserWarning: auto_time_params is not specified. Currently default behavior sets this to True, which might modify your supplied time metadata.  Please set to False if you want a different behavior.\n",
+      "/var/folders/xj/p7h9764x7cx0by8547l04rrr0000gn/T/ipykernel_11358/3019538340.py:21: UserWarning: auto_time_params is not specified. Currently default behavior sets this to True, which might modify your supplied time metadata.  Please set to False if you want a different behavior.\n",
       "  series_tmp = pyleo.Series(\n"
      ]
     }