Skip to content

Commit

Permalink
Merge pull request #452 from OpenEnergyPlatform/feature-449-existing-…
Browse files Browse the repository at this point in the history
…date-parameter

Feature 449 existing date parameter
  • Loading branch information
FlorianK13 authored Aug 31, 2023
2 parents 366b444 + dc30b47 commit 9e58a89
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 30 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/
## [v0.1X.X] current - 2023-XX-XX
### Added
- User-defined output path for csv, xml, database [#402](https://github.com/OpenEnergyPlatform/open-MaStR/pull/402)
- Add date=existing parameter to Mastr.download [#452](https://github.com/OpenEnergyPlatform/open-MaStR/pull/452)
### Changed
- Using sphinx version <7 to build documentation [#454](https://github.com/OpenEnergyPlatform/open-MaStR/pull/454)
### Removed
Expand Down
10 changes: 3 additions & 7 deletions open_mastr/mastr.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
get_data_version_dir,
get_project_home_dir,
get_output_dir,
setup_logger
setup_logger,
)
import open_mastr.utils.orm as orm

Expand Down Expand Up @@ -65,7 +65,6 @@ class Mastr:
"""

def __init__(self, engine="sqlite") -> None:

validate_parameter_format_for_mastr_init(engine)
self.output_dir = get_output_dir()
self.home_directory = get_project_home_dir()
Expand Down Expand Up @@ -143,7 +142,7 @@ def download(
Either "today" or None if the newest data dump should be downloaded
rom the MaStR website. If an already downloaded dump should be used,
state the date of the download in the format
"yyyymmdd". Defaults to None.
"yyyymmdd" or use the string "existing". Defaults to None.
For API method:
Expand Down Expand Up @@ -215,10 +214,9 @@ def download(
method, data, api_data_types, api_location_types, **kwargs
)

date = transform_date_parameter(method, date, **kwargs)
date = transform_date_parameter(self, method, date, **kwargs)

if method == "bulk":

# Find the name of the zipped xml folder
bulk_download_date = parse_date_string(date)
xml_folder_path = os.path.join(self.output_dir, "data", "xml_download")
Expand Down Expand Up @@ -349,15 +347,13 @@ def to_csv(

# Export technologies to csv
for tech in technologies_to_export:

db_query_to_csv(
db_query=create_db_query(tech=tech, limit=limit, engine=self.engine),
data_table=tech,
chunksize=chunksize,
)
# Export additional tables to csv
for addit_table in additional_tables_to_export:

db_query_to_csv(
db_query=create_db_query(
additional_table=addit_table, limit=limit, engine=self.engine
Expand Down
50 changes: 28 additions & 22 deletions open_mastr/utils/helpers.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,38 @@
import os
import json
import os
import sys
from contextlib import contextmanager
from datetime import date, datetime
from warnings import warn

import dateutil
import pandas as pd
import sqlalchemy
from sqlalchemy.sql import insert, literal_column
from dateutil.parser import parse
from sqlalchemy import create_engine
from sqlalchemy.orm import Query, sessionmaker

import pandas as pd
from sqlalchemy.sql import insert, literal_column
from tqdm import tqdm

from open_mastr.soap_api.download import MaStRAPI, log
from open_mastr.soap_api.metadata.create import create_datapackage_meta_json
from open_mastr.utils import orm
from open_mastr.utils.config import (
get_filenames,
get_data_version_dir,
column_renaming,
get_data_version_dir,
get_filenames,
)

from open_mastr.soap_api.download import MaStRAPI, log
from open_mastr.utils.constants import (
BULK_DATA,
TECHNOLOGIES,
ADDITIONAL_TABLES,
API_DATA,
API_DATA_TYPES,
API_LOCATION_TYPES,
BULK_INCLUDE_TABLES_MAP,
BULK_ADDITIONAL_TABLES_CSV_EXPORT_MAP,
BULK_DATA,
BULK_INCLUDE_TABLES_MAP,
ORM_MAP,
TECHNOLOGIES,
UNIT_TYPE_MAP,
ADDITIONAL_TABLES,
)


Expand Down Expand Up @@ -88,7 +87,6 @@ def validate_parameter_format_for_download_method(
api_location_types,
**kwargs,
) -> None:

if "technology" in kwargs:
data = kwargs["technology"]
warn("'technology' parameter is deprecated. Use 'data' instead")
Expand Down Expand Up @@ -173,7 +171,7 @@ def validate_parameter_date(method, date) -> None:
if date is None: # default
return
if method == "bulk":
if date != "today":
if date not in ["today", "existing"]:
try:
_ = parse(date)
except (dateutil.parser._parser.ParserError, TypeError) as e:
Expand Down Expand Up @@ -297,11 +295,25 @@ def transform_data_parameter(
return data, api_data_types, api_location_types, harmonisation_log


def transform_date_parameter(method, date, **kwargs):

def transform_date_parameter(self, method, date, **kwargs):
if method == "bulk":
date = kwargs.get("bulk_date", date)
date = "today" if date is None else date
if date == "existing":
existing_files_list = os.listdir(
os.path.join(self.home_directory, "data", "xml_download")
)
if not existing_files_list:
date = "today"
print(
"By choosing `date`='existing' you want to use an existing "
"xml download."
"However no xml_files were downloaded yet. The parameter `date` is"
"therefore set to 'today'."
)
# we assume that there is only one file in the folder which is the
# zipped xml folder
date = existing_files_list[0].split("_")[1].split(".")[0]
elif method == "API":
date = kwargs.get("api_date", date)

Expand Down Expand Up @@ -333,7 +345,6 @@ def print_api_settings(
api_processes,
api_location_types,
):

print(
f"Downloading with soap_API.\n\n -- API settings -- \nunits after date: "
f"{date}\nunit download limit per data: "
Expand Down Expand Up @@ -467,9 +478,7 @@ def create_db_query(
unit_type_map_reversed = reverse_unit_type_map()

with session_scope(engine=engine) as session:

if tech:

# Select orm tables for specified additional_data.
orm_tables = {
f"{dat}": getattr(orm, ORM_MAP[tech].get(dat, "KeyNotAvailable"), None)
Expand Down Expand Up @@ -540,7 +549,6 @@ def create_db_query(
return query_tech

if additional_table:

orm_table = getattr(orm, ORM_MAP[additional_table], None)

query_additional_tables = Query(orm_table, session=session)
Expand Down Expand Up @@ -573,7 +581,6 @@ def save_metadata(data: list = None, engine=None) -> None:
unit_type_map_reversed = reverse_unit_type_map()

with session_scope(engine=engine) as session:

# check for latest db entry for exported technologies
mastr_technologies = [unit_type_map_reversed[tech] for tech in data]
newest_date = (
Expand Down Expand Up @@ -729,7 +736,6 @@ def db_query_to_csv(db_query, data_table: str, chunksize: int) -> None:
chunk_df[col] = chunk_df[col].str.replace("\r", "")

if not chunk_df.empty:

if chunk_number == 0:
chunk_df.to_csv(
csv_file,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def parameter_dict_working_list():
None,
["wind", "solar"],
],
"date": ["today", "20200108"],
"date": ["today", "20200108", "existing"],
"bulk_cleansing": [True, False],
"api_processes": [None],
"api_limit": [50],
Expand Down

0 comments on commit 9e58a89

Please sign in to comment.