Skip to content

Commit

Permalink
feat: add Solar Alabama dataset;
Browse files Browse the repository at this point in the history
  • Loading branch information
WenjieDu committed Jun 25, 2024
1 parent 4f8a338 commit 8c87e6d
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 1 deletion.
5 changes: 5 additions & 0 deletions dataset_profiles/solar_al/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Solar Alabama

## Citing this dataset 🤗

`https://www.nrel.gov/grid/solar-power-data.html`
3 changes: 3 additions & 0 deletions tsdb/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
load_ais,
load_italy_air_quality,
load_pems_traffic,
load_solar_alabama,
)
from .utils.downloading import download_and_extract
from .utils.file import purge_path, pickle_load, pickle_dump, determine_data_home
Expand Down Expand Up @@ -108,6 +109,8 @@ def load(dataset_name: str, use_cache: bool = True) -> dict:
result = load_ais(dataset_saving_path)
elif dataset_name == "pems_traffic":
result = load_pems_traffic(dataset_saving_path)
elif dataset_name == "solar_alabama":
result = load_solar_alabama(dataset_saving_path)
elif "ucr_uea_" in dataset_name:
actual_dataset_name = dataset_name.replace(
"ucr_uea_", ""
Expand Down
5 changes: 4 additions & 1 deletion tsdb/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,12 @@
"https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh1.csv",
"https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh2.csv",
],
# https://pems.dot.ca.gov
# https://pems.dot.ca.gov, https://github.com/laiguokun/multivariate-time-series-data
"pems_traffic": "https://raw.githubusercontent.com/laiguokun/multivariate-time-series-data/master/"
"traffic/traffic.txt.gz",
# https://www.nrel.gov/grid/solar-power-data.html, https://github.com/laiguokun/multivariate-time-series-data
"solar_alabama": "https://raw.githubusercontent.com/laiguokun/multivariate-time-series-data/master/"
"solar-energy/solar_AL.txt.gz",
}


Expand Down
2 changes: 2 additions & 0 deletions tsdb/loading_funcs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from .ucr_uea_datasets import load_ucr_uea_dataset
from .vessel_ais import load_ais
from .pems_traffic import load_pems_traffic
from .solar_alabama import load_solar_alabama

__all__ = [
"load_beijing_air_quality",
Expand All @@ -25,4 +26,5 @@
"load_ett",
"load_italy_air_quality",
"load_pems_traffic",
"load_solar_alabama",
]
50 changes: 50 additions & 0 deletions tsdb/loading_funcs/solar_alabama.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
Scripts related to dataset Solar Alabama. It contains the solar power production records in the year 2006,
which are sampled every 10 minutes from 137 PV plants in Alabama State.
https://www.nrel.gov/grid/solar-power-data.html
For more information please refer to:
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/solar_al
"""

# Created by Wenjie Du <wenjay.du@gmail.com>
# License: BSD-3-Clause

import os

import pandas as pd


def load_solar_alabama(local_path):
"""Load dataset Solar Alabama.
Parameters
----------
local_path : str,
The local path of dir saving the raw data of Solar Alabama.
Returns
-------
data : dict
A dictionary contains X:
X : pandas.DataFrame
The time-series data of Solar Alabama.
"""
dir_path = os.path.join(local_path, "solar_AL.txt")

# make columns names
col_names = [str(i) for i in range(137)]
df = pd.read_csv(dir_path, index_col=None, names=col_names)
date = pd.date_range(
start="2006-01-01 00:00:00",
end="2006-12-31 23:50:00",
freq="10min",
)
df["date"] = date
col_names.insert(0, "date")
df = df[col_names]

data = {
"X": df,
}
return data

0 comments on commit 8c87e6d

Please sign in to comment.