Skip to content

Commit

Permalink
Merge pull request #464 from NREL/skip_dask_metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
rajeee authored Aug 13, 2024
2 parents bcb331a + ce0781e commit fe1ba37
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 19 deletions.
13 changes: 0 additions & 13 deletions buildstockbatch/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,19 +379,6 @@ def write_metadata_files(fs, parquet_root_dir, partition_columns):
parquet.write_metadata(sch, f"{parquet_root_dir}/_common_metadata", filesystem=fs)
logger.info(f"Written _common_metadata to {parquet_root_dir}")

if partition_columns:
partition_glob = "/".join([f"{c}*" for c in partition_columns])
glob_str = f"{parquet_root_dir}/up*/{partition_glob}/*.parquet"
else:
glob_str = f"{parquet_root_dir}/up*/*.parquet"

logger.info(f"Gathering all the parquet files in {glob_str}")
concat_files = fs.glob(glob_str)
logger.info(f"Gathered {len(concat_files)} files. Now writing _metadata")
parquet_root_dir = Path(parquet_root_dir).as_posix()
create_metadata_file(concat_files, root_dir=parquet_root_dir, engine="pyarrow", fs=fs)
logger.info(f"_metadata file written to {parquet_root_dir}")


def combine_results(fs, results_dir, cfg, do_timeseries=True):
"""Combine the results of the batch simulations.
Expand Down
5 changes: 0 additions & 5 deletions buildstockbatch/test/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,11 +218,6 @@ def test_upload_files(mocker, basic_residential_project_file):
assert (source_file_path, s3_file_path) in files_uploaded
files_uploaded.remove((source_file_path, s3_file_path))

s3_file_path = s3_path + "timeseries/_metadata"
source_file_path = os.path.join(source_path, "timeseries", "_metadata")
assert (source_file_path, s3_file_path) in files_uploaded
files_uploaded.remove((source_file_path, s3_file_path))

s3_file_path = s3_path + "buildstock_csv/buildstock.csv"
source_file_path = str(buildstock_csv_path)
assert (source_file_path, s3_file_path) in files_uploaded
Expand Down
1 change: 0 additions & 1 deletion buildstockbatch/test/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ def test_resstock_local_batch(project_filename):
assert (upg["completed_status"] == "Success").all()
assert upg.shape[0] == n_datapoints
assert (ts_pq_path / "_common_metadata").exists()
assert (ts_pq_path / "_metadata").exists()

shutil.rmtree(out_path)

Expand Down
7 changes: 7 additions & 0 deletions docs/changelog/changelog_dev.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,10 @@ Development Changelog
exposes optional ``include_annual_bills`` (defaults to true) and
``include_monthly_bills`` (defaults to false) arguments for reporting annual
and monthly utility bill outputs, respectively.

.. change::
:tags: general, bugfix
:pullreq: 464

Stop creating dask _metadata files for the timeseries parquet files since it crashes the
postprocessing.

0 comments on commit fe1ba37

Please sign in to comment.