From 4962bc683b4e96175d5892d44b5697ce3a186618 Mon Sep 17 00:00:00 2001 From: Yingli Lou Date: Tue, 18 Apr 2023 20:51:52 -0600 Subject: [PATCH 01/14] upload-buildstockcsv-to-s3 --- buildstockbatch/postprocessing.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/buildstockbatch/postprocessing.py b/buildstockbatch/postprocessing.py index 5dff645c..d00a4f4b 100644 --- a/buildstockbatch/postprocessing.py +++ b/buildstockbatch/postprocessing.py @@ -595,6 +595,7 @@ def remove_intermediate_files(fs, results_dir, keep_individual_timeseries=False) def upload_results(aws_conf, output_dir, results_dir): logger.info("Uploading the parquet files to s3") + buildstock_dir = Path(results_dir).parent.joinpath('housing_characteristics') output_folder_name = Path(output_dir).name parquet_dir = Path(results_dir).joinpath('parquet') ts_dir = parquet_dir / 'timeseries' @@ -607,6 +608,9 @@ def upload_results(aws_conf, output_dir, results_dir): all_files.append(file.relative_to(parquet_dir)) for file in [*ts_dir.glob('_common_metadata'), *ts_dir.glob('_metadata')]: all_files.append(file.relative_to(parquet_dir)) + buildstock_csv = [] + for file in buildstock_dir.glob('buildstock.csv'): + buildstock_csv.append(file.relative_to(buildstock_dir)) s3_prefix = aws_conf.get('s3', {}).get('prefix', '').rstrip('/') s3_bucket = aws_conf.get('s3', {}).get('bucket', None) @@ -628,8 +632,16 @@ def upload_file(filepath): bucket = s3.Bucket(s3_bucket) s3key = Path(s3_prefix_output).joinpath(filepath).as_posix() bucket.upload_file(str(full_path), str(s3key)) + + def upload_buildstock_csv(filepath): + full_path = buildstock_dir.joinpath(filepath) + s3 = boto3.resource('s3') + bucket = s3.Bucket(s3_bucket) + s3key = Path(s3_prefix_output).joinpath(filepath).as_posix() + bucket.upload_file(str(full_path), str(s3key)) dask.compute(map(dask.delayed(upload_file), all_files)) + dask.compute(map(dask.delayed(upload_buildstock_csv), buildstock_csv)) logger.info(f"Upload to S3 completed. The files are uploaded to: {s3_bucket}/{s3_prefix_output}") return s3_bucket, s3_prefix_output From 3b1d264f1dd16be862563cd210afc99dab9a152c Mon Sep 17 00:00:00 2001 From: Yingli Lou Date: Wed, 3 May 2023 09:57:25 -0600 Subject: [PATCH 02/14] add buildstock_csv folder --- buildstockbatch/postprocessing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/buildstockbatch/postprocessing.py b/buildstockbatch/postprocessing.py index d00a4f4b..20a7f060 100644 --- a/buildstockbatch/postprocessing.py +++ b/buildstockbatch/postprocessing.py @@ -637,7 +637,8 @@ def upload_buildstock_csv(filepath): full_path = buildstock_dir.joinpath(filepath) s3 = boto3.resource('s3') bucket = s3.Bucket(s3_bucket) - s3key = Path(s3_prefix_output).joinpath(filepath).as_posix() + s3_prefix_output_new = s3_prefix_output+ '/' + 'buildstock_csv' + '/' + s3key = Path(s3_prefix_output_new).joinpath(filepath).as_posix() bucket.upload_file(str(full_path), str(s3key)) dask.compute(map(dask.delayed(upload_file), all_files)) From 236d609221fc26b85329bbcc7ec0bdf28c2afe3f Mon Sep 17 00:00:00 2001 From: Yingli Lou Date: Wed, 3 May 2023 10:12:54 -0600 Subject: [PATCH 03/14] add to the changelog_dev.rst file --- docs/changelog/changelog_dev.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/changelog/changelog_dev.rst b/docs/changelog/changelog_dev.rst index 4227699c..12e0a0f5 100644 --- a/docs/changelog/changelog_dev.rst +++ b/docs/changelog/changelog_dev.rst @@ -40,3 +40,9 @@ Development Changelog :pullreq: 362 Enforce Athena database name and table name to follow strict alphanumeric only naming convention. + + .. change:: + :tags: postprocessing, feature + :pullreq: 365 + + Upload buildstock.csv to S3 during postprocessing From c92259f44d3fbcb06f086bb593ddca5d76a1ba64 Mon Sep 17 00:00:00 2001 From: Yingli Lou Date: Thu, 20 Jul 2023 15:28:48 -0600 Subject: [PATCH 04/14] improvement and test --- buildstockbatch/postprocessing.py | 8 ++++---- buildstockbatch/test/conftest.py | 8 ++++++-- buildstockbatch/test/test_base.py | 6 ++++++ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/buildstockbatch/postprocessing.py b/buildstockbatch/postprocessing.py index 20a7f060..1ff836ae 100644 --- a/buildstockbatch/postprocessing.py +++ b/buildstockbatch/postprocessing.py @@ -608,9 +608,8 @@ def upload_results(aws_conf, output_dir, results_dir): all_files.append(file.relative_to(parquet_dir)) for file in [*ts_dir.glob('_common_metadata'), *ts_dir.glob('_metadata')]: all_files.append(file.relative_to(parquet_dir)) - buildstock_csv = [] for file in buildstock_dir.glob('buildstock.csv'): - buildstock_csv.append(file.relative_to(buildstock_dir)) + buildstock_csv = file.relative_to(buildstock_dir) s3_prefix = aws_conf.get('s3', {}).get('prefix', '').rstrip('/') s3_bucket = aws_conf.get('s3', {}).get('bucket', None) @@ -641,8 +640,9 @@ def upload_buildstock_csv(filepath): s3key = Path(s3_prefix_output_new).joinpath(filepath).as_posix() bucket.upload_file(str(full_path), str(s3key)) - dask.compute(map(dask.delayed(upload_file), all_files)) - dask.compute(map(dask.delayed(upload_buildstock_csv), buildstock_csv)) + tasks = list(map(dask.delayed(upload_file), all_files)) + tasks.append(dask.delayed(upload_buildstock_csv)(buildstock_csv)) + dask.compute(tasks) logger.info(f"Upload to S3 completed. The files are uploaded to: {s3_bucket}/{s3_prefix_output}") return s3_bucket, s3_prefix_output diff --git a/buildstockbatch/test/conftest.py b/buildstockbatch/test/conftest.py index 290372da..ed9eda19 100644 --- a/buildstockbatch/test/conftest.py +++ b/buildstockbatch/test/conftest.py @@ -30,8 +30,12 @@ def _basic_residential_project_file(update_args={}, raw=False): shutil.move(os.path.join(output_directory, 'simulation_output', 'job0.json'), os.path.join(output_directory, 'simulation_output', '..', '..', 'job0.json')) - os.mkdir(os.path.join(output_directory, 'housing_characteristics')) - os.mkdir(os.path.join(buildstock_directory, project_directory, 'housing_characteristics')) + #os.mkdir(os.path.join(output_directory, 'housing_characteristics')) + #os.mkdir(os.path.join(buildstock_directory, project_directory, 'housing_characteristics')) + shutil.copytree( + os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_results', 'housing_characteristics'), + os.path.join(output_directory, 'housing_characteristics') + ) cfg = { 'buildstock_directory': buildstock_directory, 'project_directory': project_directory, diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py index 6fe0c7ce..a5434187 100644 --- a/buildstockbatch/test/test_base.py +++ b/buildstockbatch/test/test_base.py @@ -301,6 +301,7 @@ def test_upload_files(mocked_boto3, basic_residential_project_file): # check if all the files are properly uploaded source_path = os.path.join(results_dir, 'parquet') + buildstock_dir = os.path.join(results_dir, 'housing_characteristics') s3_path = s3_prefix + '/' + OUTPUT_FOLDER_NAME + '/' s3_file_path = s3_path + 'baseline/results_up00.parquet' @@ -333,6 +334,11 @@ def test_upload_files(mocked_boto3, basic_residential_project_file): assert (source_file_path, s3_file_path) in files_uploaded files_uploaded.remove((source_file_path, s3_file_path)) + s3_file_path = s3_path + 'buildstock_csv/buildstock.csv' + source_file_path_tst = os.path.join(buildstock_dir, 'buildstock.csv') + assert (source_file_path_tst, s3_file_path) in files_uploaded + files_uploaded.remove((source_file_path_tst, s3_file_path)) + assert len(files_uploaded) == 0, f"These files shouldn't have been uploaded: {files_uploaded}" From d280288d5bf76c6a7876e1cef84d7238eb1981a9 Mon Sep 17 00:00:00 2001 From: Yingli Lou Date: Thu, 20 Jul 2023 15:29:36 -0600 Subject: [PATCH 05/14] improvement and test --- .../test_results/housing_characteristics/buildstock.csv | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 buildstockbatch/test/test_results/housing_characteristics/buildstock.csv diff --git a/buildstockbatch/test/test_results/housing_characteristics/buildstock.csv b/buildstockbatch/test/test_results/housing_characteristics/buildstock.csv new file mode 100644 index 00000000..c17c173b --- /dev/null +++ b/buildstockbatch/test/test_results/housing_characteristics/buildstock.csv @@ -0,0 +1,6 @@ +Building,Usage Level,Orientation,Eaves,Door Area,Overhangs,Interior Shading,Doors,Hot Water Fixtures,Hot Water Distribution,Solar Hot Water,HVAC Secondary Heating Type And Fuel,HVAC Secondary Heating Efficiency,HVAC System Is Faulted,Dehumidifier,Refrigerator Usage Level,Cooking Range Usage Level,Dishwasher Usage Level,Clothes Washer Usage Level,Clothes Dryer Usage Level,Lighting Interior Use,Lighting Other Use,Holiday Lighting,Plug Load Diversity,Electric Vehicle,Misc Gas Fireplace,Misc Gas Grill,Misc Gas Lighting,Misc Well Pump,Natural Ventilation,Mechanical Ventilation,Bathroom Spot Vent Hour,Range Spot Vent Hour,ASHRAE IECC Climate Zone 2004,County and PUMA,CEC Climate Zone,County,AHS Region,ASHRAE IECC Climate Zone 2004 - 2A Split,State,Location Region,Census Division,Census Division RECS,Census Region,ISO RTO Region,Building America Climate Zone,PUMA,Geometry Building Type ACS,Vintage,Vintage ACS,Geometry Building Type RECS,Heating Fuel,Geometry Building Number Units SFA,Geometry Building Horizontal Location SFA,Geometry Foundation Type,Corridor,Vacancy Status,Tenure,Income,Income RECS2015,Income RECS2020,Neighbors,Radiant Barrier,Roof Material,Insulation Slab,Insulation Floor,Insulation Foundation Wall,Insulation Rim Joist,Water Heater In Unit,Water Heater Fuel,Water Heater Efficiency,HVAC Heating Type,HVAC Cooling Type,HVAC Has Shared System,HVAC Shared Efficiencies,HVAC Heating Efficiency,HVAC Has Zonal Electric Heating,HVAC Heating Type And Fuel,HVAC Cooling Efficiency,HVAC System Single Speed AC Airflow,HVAC System Single Speed ASHP Airflow,HVAC System Single Speed AC Charge,HVAC System Single Speed ASHP Charge,Heating Setpoint,Cooling Setpoint,Heating Setpoint Has Offset,Cooling Setpoint Has Offset,Heating Setpoint Offset Magnitude,Cooling Setpoint Offset Magnitude,Heating Setpoint Offset Period,Cooling Setpoint Offset Period,Ceiling Fan,Lighting,Plug Loads,HVAC Has Ducts,Ducts,Has PV,PV System Size,PV Orientation,Battery,PUMA Metro Status,Geometry Floor Area,Geometry Floor Area Bin,Geometry Stories,Geometry Building Type Height,Geometry Building Level MF,Geometry Building Number Units MF,Geometry Building Horizontal Location MF,Geometry Stories Low Rise,Geometry Attic Type,Geometry Story Bin,Geometry Wall Type,Geometry Wall Exterior Finish,Geometry Garage,Bedrooms,Occupants,Federal Poverty Level,Area Median Income,Window Areas,Insulation Ceiling,Insulation Roof,Insulation Wall,Windows,HVAC Cooling Partial Space Conditioning,Refrigerator,Cooking Range,Dishwasher,Clothes Washer Presence,Clothes Washer,Clothes Dryer,Misc Extra Refrigerator,Misc Freezer,Misc Hot Tub Spa,Misc Pool,Misc Pool Heater,Misc Pool Pump,Infiltration,REEDS Balancing Area,Generation And Emissions Assessment Region,City,AIANNH Area,Household Has Tribal Persons +1,High,South,2 ft,20 ft^2,None,"Summer = 0.7, Winter = 0.85",Fiberglass,200% Usage,Uninsulated,None,None,None,No,None,105% Usage,120% Usage,120% Usage,120% Usage,120% Usage,100% Usage,100% Usage,No Exterior Use,200%,None,None,None,None,None,"Cooling Season, 7 days/wk",None,Hour5,Hour19,3A,"G4500510, G45001102",None,"SC, Horry County",Non-CBSA South Atlantic,3A,SC,CR09,South Atlantic,South Atlantic,South,None,Hot-Humid,"SC, 01102",Single-Family Detached,2000s,2000-09,Single-Family Detached,Electricity,None,None,Slab,Not Applicable,Occupied,Owner,80000-99999,80000-99999,60000-99999,Left/Right at 15ft,No,Composition Shingles,Uninsulated,None,None,None,Yes,Electricity,Electric Standard,Ducted Heat Pump,Ducted Heat Pump,None,None,"ASHP, SEER 13, 7.7 HSPF",No,Electricity ASHP,Ducted Heat Pump,None,None,None,None,68F,72F,No,No,0F,0F,None,None,Standard Efficiency,100% Incandescent,106%,Yes,"20% Leakage, R-8",No,None,None,None,"In metro area, not/partially in principal city",1500-1999,1500-2499,1,Single-Family Detached,None,None,None,1,Vented Attic,<8,Wood Frame,"Vinyl, Light",2 Car,3,2,400%+,150%+,F12 B12 L12 R12,R-30,"Unfinished, Uninsulated","Wood Stud, R-11","Double, Clear, Metal, Air",100% Conditioned,EF 19.9,Electric Resistance,318 Rated kWh,Yes,EnergyStar,Electric,None,None,None,None,None,None,15 ACH50,96,SRVCc,Not in a census Place,No,No +2,Low,North,2 ft,20 ft^2,None,"Summer = 0.7, Winter = 0.85",Fiberglass,50% Usage,Uninsulated,None,None,None,No,None,95% Usage,80% Usage,80% Usage,80% Usage,80% Usage,100% Usage,100% Usage,No Exterior Use,50%,None,None,None,None,None,"Cooling Season, 7 days/wk",None,Hour22,Hour14,5A,"G1700310, G17003525",None,"IL, Cook County","CBSA Chicago-Naperville-Elgin, IL-IN-WI",5A,IL,CR04,East North Central,East North Central,Midwest,PJM,Cold,"IL, 03525",50 or more Unit,2000s,2000-09,Multi-Family with 5+ Units,Electricity,None,None,Slab,Double-Loaded Interior,Occupied,Renter,200000+,140000+,150000+,27,None,"Asphalt Shingles, Medium","2ft R10 Perimeter, Vertical",None,None,None,No,Electricity,Electric Standard,Non-Ducted Heating,Room AC,None,None,"Electric Baseboard, 100% Efficiency",Yes,Electricity Baseboard,"Room AC, EER 10.7",None,None,None,None,70F,70F,No,No,0F,0F,None,None,Standard Efficiency,100% Incandescent,89%,No,None,No,None,None,None,"In metro area, principal city",1000-1499,0-1499,4,"Multifamily with 5+ units, 4-7 stories",Middle,67,Middle,4+,None,<8,Wood Frame,"Brick, Medium/Dark",None,2,2,400%+,150%+,F30 B30 L30 R30,None,"Finished, R-38","Wood Stud, R-11","Double, Clear, Metal, Air",40% Conditioned,EF 17.6,Electric Resistance,318 Rated kWh,Yes,Standard,Electric,None,None,None,None,None,None,15 ACH50,80,RFCWc,"IL, Chicago",No,No +3,Medium,East,2 ft,20 ft^2,None,"Summer = 0.7, Winter = 0.85",Fiberglass,100% Usage,Uninsulated,None,None,None,No,None,100% Usage,100% Usage,100% Usage,100% Usage,100% Usage,100% Usage,100% Usage,No Exterior Use,100%,None,None,None,None,None,"Cooling Season, 7 days/wk",None,Hour21,Hour17,2A,"G0100030, G01002600",None,"AL, Baldwin County",Non-CBSA East South Central,"2A - FL, GA, AL, MS",AL,CR09,East South Central,East South Central,South,None,Hot-Humid,"AL, 02600",Single-Family Detached,2000s,2000-09,Single-Family Detached,Electricity,None,None,Slab,Not Applicable,Occupied,Owner,80000-99999,80000-99999,60000-99999,Left/Right at 15ft,No,Composition Shingles,Uninsulated,None,None,None,Yes,Electricity,Electric Standard,Ducted Heat Pump,Ducted Heat Pump,None,None,"ASHP, SEER 15, 8.5 HSPF",No,Electricity ASHP,Ducted Heat Pump,None,None,None,None,70F,76F,No,No,0F,0F,None,None,Standard Efficiency,100% Incandescent,110%,Yes,"30% Leakage, R-8",No,None,None,None,"In metro area, not/partially in principal city",2000-2499,1500-2499,2,Single-Family Detached,None,None,None,2,Vented Attic,<8,Wood Frame,"Brick, Medium/Dark",2 Car,3,2,400%+,120-150%,F9 B9 L9 R9,R-30,"Unfinished, Uninsulated","Wood Stud, R-11","Double, Clear, Metal, Air",100% Conditioned,EF 19.9,Electric Resistance,318 Rated kWh,Yes,Standard,Electric,None,None,None,None,None,None,15 ACH50,90,SRSOc,In another census Place,No,No +4,Medium,Southeast,2 ft,20 ft^2,None,"Summer = 0.7, Winter = 0.85",Fiberglass,100% Usage,Uninsulated,None,None,None,No,None,100% Usage,100% Usage,100% Usage,100% Usage,100% Usage,100% Usage,100% Usage,No Exterior Use,100%,None,None,None,None,Typical Efficiency,"Cooling Season, 7 days/wk",None,Hour23,Hour18,4A,"G3600610, G36003805",None,"NY, New York County","CBSA New York-Newark-Jersey City, NY-NJ-PA",4A,NY,CR07,Middle Atlantic,Middle Atlantic,Northeast,NYISO,Mixed-Humid,"NY, 03805",50 or more Unit,1960s,1960-79,Multi-Family with 5+ Units,Natural Gas,None,None,Vented Crawlspace,Double-Loaded Interior,Occupied,Renter,200000+,140000+,150000+,12,None,"Asphalt Shingles, Medium",None,Uninsulated,Uninsulated,Uninsulated,No,Natural Gas,Natural Gas Standard,Non-Ducted Heating,Room AC,Heating Only,"Boiler Baseboards Heating Only, Natural Gas",Shared Heating,No,Natural Gas Shared Heating,"Room AC, EER 10.7",None,None,None,None,72F,70F,No,No,0F,0F,None,None,None,100% Incandescent,89%,No,None,No,None,None,None,"In metro area, principal city",500-749,0-1499,21,"Multifamily with 5+ units, 8+ stories",Middle,326,Middle,4+,None,8+,Steel Frame,"Aluminum, Light",None,1,2,400%+,150%+,F30 B30 L30 R30,None,"Finished, R-30","Wood Stud, Uninsulated","Double, Clear, Metal, Air",40% Conditioned,EF 17.6,Gas,318 Rated kWh,None,None,None,None,None,None,None,None,None,20 ACH50,127,NYSTc,"NY, New York",No,No +5,Medium,West,2 ft,20 ft^2,None,"Summer = 0.7, Winter = 0.85",Fiberglass,100% Usage,Uninsulated,None,None,None,No,None,100% Usage,100% Usage,100% Usage,100% Usage,100% Usage,100% Usage,100% Usage,No Exterior Use,100%,None,None,None,None,None,"Cooling Season, 7 days/wk",None,Hour6,Hour16,3B,"G0400150, G04000600",None,"AZ, Mohave County",Non-CBSA Mountain,3B,AZ,CR10,Mountain,Mountain South,West,None,Hot-Dry,"AZ, 00600",Single-Family Detached,2000s,2000-09,Single-Family Detached,Electricity,None,None,Slab,Not Applicable,Occupied,Owner,80000-99999,80000-99999,60000-99999,Left/Right at 15ft,No,"Tile, Clay or Ceramic",Uninsulated,None,None,None,Yes,Electricity,Electric Standard,Ducted Heating,Central AC,None,None,"Electric Furnace, 100% AFUE",No,Electricity Electric Furnace,"AC, SEER 13",None,None,None,None,68F,78F,Yes,No,3F,0F,Night +5h,None,Standard Efficiency,100% CFL,84%,Yes,"10% Leakage, R-8",No,None,None,None,Not/partially in metro area,2000-2499,1500-2499,1,Single-Family Detached,None,None,None,1,Vented Attic,<8,Wood Frame,"Wood, Medium/Dark",2 Car,4,2,400%+,150%+,F15 B15 L15 R15,R-30,"Unfinished, Uninsulated","Wood Stud, R-11","Double, Low-E, Non-metal, Air, M-Gain",100% Conditioned,EF 19.9,Electric Resistance,318 Rated kWh,Yes,Standard,Electric,None,None,None,None,None,None,5 ACH50,27,AZNMc,In another census Place,No,No From 18e2b9db1d861f026d2ab1a0439ebb10b488daae Mon Sep 17 00:00:00 2001 From: Yingli Lou Date: Fri, 21 Jul 2023 14:03:03 -0600 Subject: [PATCH 06/14] bug fix --- buildstockbatch/test/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildstockbatch/test/conftest.py b/buildstockbatch/test/conftest.py index ed9eda19..289449ee 100644 --- a/buildstockbatch/test/conftest.py +++ b/buildstockbatch/test/conftest.py @@ -31,7 +31,7 @@ def _basic_residential_project_file(update_args={}, raw=False): os.path.join(output_directory, 'simulation_output', '..', '..', 'job0.json')) #os.mkdir(os.path.join(output_directory, 'housing_characteristics')) - #os.mkdir(os.path.join(buildstock_directory, project_directory, 'housing_characteristics')) + os.mkdir(os.path.join(buildstock_directory, project_directory, 'housing_characteristics')) shutil.copytree( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_results', 'housing_characteristics'), os.path.join(output_directory, 'housing_characteristics') From d095b9a5c2b9c32d968a4120ab9f40cbfed59c92 Mon Sep 17 00:00:00 2001 From: Yingli Lou Date: Fri, 21 Jul 2023 15:00:33 -0600 Subject: [PATCH 07/14] bug fix --- buildstockbatch/postprocessing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/buildstockbatch/postprocessing.py b/buildstockbatch/postprocessing.py index 1ff836ae..8962b8b0 100644 --- a/buildstockbatch/postprocessing.py +++ b/buildstockbatch/postprocessing.py @@ -608,6 +608,7 @@ def upload_results(aws_conf, output_dir, results_dir): all_files.append(file.relative_to(parquet_dir)) for file in [*ts_dir.glob('_common_metadata'), *ts_dir.glob('_metadata')]: all_files.append(file.relative_to(parquet_dir)) + buildstock_csv = '' for file in buildstock_dir.glob('buildstock.csv'): buildstock_csv = file.relative_to(buildstock_dir) From 172a8467283e48d33cafc423eff6bd50b04a01d7 Mon Sep 17 00:00:00 2001 From: Yingli Lou Date: Fri, 21 Jul 2023 16:33:36 -0600 Subject: [PATCH 08/14] modity unit test --- buildstockbatch/test/conftest.py | 3 ++- buildstockbatch/test/test_base.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/buildstockbatch/test/conftest.py b/buildstockbatch/test/conftest.py index 289449ee..b42cca05 100644 --- a/buildstockbatch/test/conftest.py +++ b/buildstockbatch/test/conftest.py @@ -3,6 +3,7 @@ import shutil import tempfile import yaml +from pathlib import Path OUTPUT_FOLDER_NAME = 'output' @@ -34,7 +35,7 @@ def _basic_residential_project_file(update_args={}, raw=False): os.mkdir(os.path.join(buildstock_directory, project_directory, 'housing_characteristics')) shutil.copytree( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_results', 'housing_characteristics'), - os.path.join(output_directory, 'housing_characteristics') + os.path.join(Path(output_directory).parent.joinpath('housing_characteristics')) ) cfg = { 'buildstock_directory': buildstock_directory, diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py index 26772027..03ffda14 100644 --- a/buildstockbatch/test/test_base.py +++ b/buildstockbatch/test/test_base.py @@ -14,6 +14,7 @@ import tempfile from unittest.mock import patch, MagicMock, PropertyMock import yaml +from pathlib import Path import buildstockbatch from buildstockbatch.base import BuildStockBatchBase @@ -302,7 +303,7 @@ def test_upload_files(mocked_boto3, basic_residential_project_file): # check if all the files are properly uploaded source_path = os.path.join(results_dir, 'parquet') - buildstock_dir = os.path.join(results_dir, 'housing_characteristics') + buildstock_dir = Path(results_dir).parent.joinpath('housing_characteristics') s3_path = s3_prefix + '/' + OUTPUT_FOLDER_NAME + '/' s3_file_path = s3_path + 'baseline/results_up00.parquet' From 4a75b8507fa1e81d3a02ffb53474414c19d0e404 Mon Sep 17 00:00:00 2001 From: Yingli Date: Tue, 25 Jul 2023 15:37:06 -0600 Subject: [PATCH 09/14] fix testing error --- buildstockbatch/test/test_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py index 03ffda14..17bfd9d7 100644 --- a/buildstockbatch/test/test_base.py +++ b/buildstockbatch/test/test_base.py @@ -105,7 +105,7 @@ def simplify_columns(colname): reference_pq = pd.read_parquet(os.path.join(reference_path, 'baseline', 'results_up00.parquet')).\ rename(columns=simplify_columns).sort_values('buildingid').reset_index().drop(columns=['index']) mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq))) - pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols]) + pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols], check_dtype=False) test_pq = pd.read_parquet(os.path.join(test_path, 'upgrades', 'upgrade=1', 'results_up01.parquet')).\ rename(columns=simplify_columns).sort_values('buildingid').reset_index().drop(columns=['index']) @@ -209,7 +209,7 @@ def test_combine_files(basic_residential_project_file): .reset_index().drop(columns=['index']) reference_pq = pd.read_parquet(os.path.join(reference_path, 'baseline', 'results_up00.parquet'))\ .sort_values('building_id').reset_index().drop(columns=['index']) - pd.testing.assert_frame_equal(test_pq, reference_pq) + pd.testing.assert_frame_equal(test_pq, reference_pq, check_dtype=False) test_pq = pd.read_parquet(os.path.join(test_path, 'upgrades', 'upgrade=1', 'results_up01.parquet'))\ .sort_values('building_id').reset_index().drop(columns=['index']) From d17cb792edaed3b4adc9a54886c763b037e97f35 Mon Sep 17 00:00:00 2001 From: Yingli Date: Tue, 25 Jul 2023 16:00:03 -0600 Subject: [PATCH 10/14] fix testing error --- buildstockbatch/test/test_base.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py index 17bfd9d7..47e531b3 100644 --- a/buildstockbatch/test/test_base.py +++ b/buildstockbatch/test/test_base.py @@ -86,14 +86,14 @@ def simplify_columns(colname): reference_csv = read_csv(os.path.join(reference_path, 'results_up00.csv.gz')).rename(columns=simplify_columns).\ sort_values('buildingid').reset_index().drop(columns=['index']) mutul_cols = list(set(test_csv.columns).intersection(set(reference_csv))) - pd.testing.assert_frame_equal(test_csv[mutul_cols], reference_csv[mutul_cols]) + pd.testing.assert_frame_equal(test_csv[mutul_cols], reference_csv[mutul_cols], check_dtype=False) test_csv = read_csv(os.path.join(test_path, 'results_up01.csv.gz')).rename(columns=simplify_columns).\ sort_values('buildingid').reset_index().drop(columns=['index']) reference_csv = read_csv(os.path.join(reference_path, 'results_up01.csv.gz')).rename(columns=simplify_columns).\ sort_values('buildingid').reset_index().drop(columns=['index']) mutul_cols = list(set(test_csv.columns).intersection(set(reference_csv))) - pd.testing.assert_frame_equal(test_csv[mutul_cols], reference_csv[mutul_cols]) + pd.testing.assert_frame_equal(test_csv[mutul_cols], reference_csv[mutul_cols], check_dtype=False) # test parquet files reference_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_results', 'parquet') @@ -112,7 +112,7 @@ def simplify_columns(colname): reference_pq = pd.read_parquet(os.path.join(reference_path, 'upgrades', 'upgrade=1', 'results_up01.parquet')).\ rename(columns=simplify_columns).sort_values('buildingid').reset_index().drop(columns=['index']) mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq))) - pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols]) + pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols], check_dtype=False) # timeseries parquet test_pq = dd.read_parquet(os.path.join(test_path, 'timeseries', 'upgrade=0'), engine='pyarrow')\ @@ -120,14 +120,14 @@ def simplify_columns(colname): reference_pq = dd.read_parquet(os.path.join(reference_path, 'timeseries', 'upgrade=0'), engine='pyarrow')\ .compute().reset_index() mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq))) - pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols]) + pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols], check_dtype=False) test_pq = dd.read_parquet(os.path.join(test_path, 'timeseries', 'upgrade=1'), engine='pyarrow')\ .compute().reset_index() reference_pq = dd.read_parquet(os.path.join(reference_path, 'timeseries', 'upgrade=1'), engine='pyarrow')\ .compute().reset_index() mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq))) - pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols]) + pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols], check_dtype=False) def test_downselect_integer_options(basic_residential_project_file, mocker): @@ -192,13 +192,13 @@ def test_combine_files(basic_residential_project_file): .drop(columns=['index']) reference_csv = read_csv(os.path.join(reference_path, 'results_up00.csv.gz')).sort_values('building_id')\ .reset_index().drop(columns=['index']) - pd.testing.assert_frame_equal(test_csv, reference_csv) + pd.testing.assert_frame_equal(test_csv, reference_csv, check_dtype=False) test_csv = read_csv(os.path.join(test_path, 'results_up01.csv.gz')).sort_values('building_id').reset_index()\ .drop(columns=['index']) reference_csv = read_csv(os.path.join(reference_path, 'results_up01.csv.gz')).sort_values('building_id')\ .reset_index().drop(columns=['index']) - pd.testing.assert_frame_equal(test_csv, reference_csv) + pd.testing.assert_frame_equal(test_csv, reference_csv, check_dtype=False) # test parquet files reference_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_results', 'parquet') @@ -215,7 +215,7 @@ def test_combine_files(basic_residential_project_file): .sort_values('building_id').reset_index().drop(columns=['index']) reference_pq = pd.read_parquet(os.path.join(reference_path, 'upgrades', 'upgrade=1', 'results_up01.parquet'))\ .sort_values('building_id').reset_index().drop(columns=['index']) - pd.testing.assert_frame_equal(test_pq, reference_pq) + pd.testing.assert_frame_equal(test_pq, reference_pq, check_dtype=False) # timeseries parquet test_pq_all = dd.read_parquet(os.path.join(test_path, 'timeseries'), engine='pyarrow')\ @@ -225,13 +225,13 @@ def test_combine_files(basic_residential_project_file): reference_pq = dd.read_parquet(os.path.join(reference_path, 'timeseries', 'upgrade=0'), engine='pyarrow')\ .compute().reset_index() reference_pq['upgrade'] = test_pq['upgrade'] = 0 - pd.testing.assert_frame_equal(test_pq, reference_pq) + pd.testing.assert_frame_equal(test_pq, reference_pq, check_dtype=False) test_pq = test_pq_all[test_pq_all['upgrade'] == 1].copy().reset_index() reference_pq = dd.read_parquet(os.path.join(reference_path, 'timeseries', 'upgrade=1'), engine='pyarrow')\ .compute().reset_index() reference_pq['upgrade'] = test_pq['upgrade'] = 1 - pd.testing.assert_frame_equal(test_pq, reference_pq) + pd.testing.assert_frame_equal(test_pq, reference_pq, check_dtype=False) @patch('buildstockbatch.postprocessing.boto3') @@ -421,7 +421,7 @@ def test_provide_buildstock_csv(basic_residential_project_file, mocker): bsb = LocalBatch(project_filename) sampling_output_csv = bsb.sampler.run_sampling() df2 = read_csv(sampling_output_csv, dtype=str) - pd.testing.assert_frame_equal(df, df2) + pd.testing.assert_frame_equal(df, df2, check_dtype=False) assert (df['Geometry Shared Walls'] == "None").all() # Verify None is being read properly # Test file missing with open(project_filename, 'r') as f: From 3e06ec852ca69f7e4c6ea37741ce97bf85d0464f Mon Sep 17 00:00:00 2001 From: Yingli Date: Tue, 25 Jul 2023 16:28:01 -0600 Subject: [PATCH 11/14] fix testing error --- buildstockbatch/test/test_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py index 47e531b3..e6c3e2a8 100644 --- a/buildstockbatch/test/test_base.py +++ b/buildstockbatch/test/test_base.py @@ -112,7 +112,7 @@ def simplify_columns(colname): reference_pq = pd.read_parquet(os.path.join(reference_path, 'upgrades', 'upgrade=1', 'results_up01.parquet')).\ rename(columns=simplify_columns).sort_values('buildingid').reset_index().drop(columns=['index']) mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq))) - pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols], check_dtype=False) + pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols]) # timeseries parquet test_pq = dd.read_parquet(os.path.join(test_path, 'timeseries', 'upgrade=0'), engine='pyarrow')\ @@ -215,7 +215,7 @@ def test_combine_files(basic_residential_project_file): .sort_values('building_id').reset_index().drop(columns=['index']) reference_pq = pd.read_parquet(os.path.join(reference_path, 'upgrades', 'upgrade=1', 'results_up01.parquet'))\ .sort_values('building_id').reset_index().drop(columns=['index']) - pd.testing.assert_frame_equal(test_pq, reference_pq, check_dtype=False) + pd.testing.assert_frame_equal(test_pq, reference_pq) # timeseries parquet test_pq_all = dd.read_parquet(os.path.join(test_path, 'timeseries'), engine='pyarrow')\ From 7b1c38e094c4791f5431bd347f9732aa38ae24fe Mon Sep 17 00:00:00 2001 From: Yingli Date: Tue, 25 Jul 2023 16:57:08 -0600 Subject: [PATCH 12/14] fix testing error --- buildstockbatch/test/test_base.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py index e6c3e2a8..193610df 100644 --- a/buildstockbatch/test/test_base.py +++ b/buildstockbatch/test/test_base.py @@ -86,14 +86,14 @@ def simplify_columns(colname): reference_csv = read_csv(os.path.join(reference_path, 'results_up00.csv.gz')).rename(columns=simplify_columns).\ sort_values('buildingid').reset_index().drop(columns=['index']) mutul_cols = list(set(test_csv.columns).intersection(set(reference_csv))) - pd.testing.assert_frame_equal(test_csv[mutul_cols], reference_csv[mutul_cols], check_dtype=False) + pd.testing.assert_frame_equal(test_csv[mutul_cols], reference_csv[mutul_cols]) test_csv = read_csv(os.path.join(test_path, 'results_up01.csv.gz')).rename(columns=simplify_columns).\ sort_values('buildingid').reset_index().drop(columns=['index']) reference_csv = read_csv(os.path.join(reference_path, 'results_up01.csv.gz')).rename(columns=simplify_columns).\ sort_values('buildingid').reset_index().drop(columns=['index']) mutul_cols = list(set(test_csv.columns).intersection(set(reference_csv))) - pd.testing.assert_frame_equal(test_csv[mutul_cols], reference_csv[mutul_cols], check_dtype=False) + pd.testing.assert_frame_equal(test_csv[mutul_cols], reference_csv[mutul_cols]) # test parquet files reference_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_results', 'parquet') @@ -105,14 +105,14 @@ def simplify_columns(colname): reference_pq = pd.read_parquet(os.path.join(reference_path, 'baseline', 'results_up00.parquet')).\ rename(columns=simplify_columns).sort_values('buildingid').reset_index().drop(columns=['index']) mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq))) - pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols], check_dtype=False) + pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols]) test_pq = pd.read_parquet(os.path.join(test_path, 'upgrades', 'upgrade=1', 'results_up01.parquet')).\ rename(columns=simplify_columns).sort_values('buildingid').reset_index().drop(columns=['index']) reference_pq = pd.read_parquet(os.path.join(reference_path, 'upgrades', 'upgrade=1', 'results_up01.parquet')).\ rename(columns=simplify_columns).sort_values('buildingid').reset_index().drop(columns=['index']) mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq))) - pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols]) + pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols], check_like=True) # timeseries parquet test_pq = dd.read_parquet(os.path.join(test_path, 'timeseries', 'upgrade=0'), engine='pyarrow')\ @@ -120,14 +120,14 @@ def simplify_columns(colname): reference_pq = dd.read_parquet(os.path.join(reference_path, 'timeseries', 'upgrade=0'), engine='pyarrow')\ .compute().reset_index() mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq))) - pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols], check_dtype=False) + pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols]) test_pq = dd.read_parquet(os.path.join(test_path, 'timeseries', 'upgrade=1'), engine='pyarrow')\ .compute().reset_index() reference_pq = dd.read_parquet(os.path.join(reference_path, 'timeseries', 'upgrade=1'), engine='pyarrow')\ .compute().reset_index() mutul_cols = list(set(test_pq.columns).intersection(set(reference_pq))) - pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols], check_dtype=False) + pd.testing.assert_frame_equal(test_pq[mutul_cols], reference_pq[mutul_cols]) def test_downselect_integer_options(basic_residential_project_file, mocker): @@ -192,13 +192,13 @@ def test_combine_files(basic_residential_project_file): .drop(columns=['index']) reference_csv = read_csv(os.path.join(reference_path, 'results_up00.csv.gz')).sort_values('building_id')\ .reset_index().drop(columns=['index']) - pd.testing.assert_frame_equal(test_csv, reference_csv, check_dtype=False) + pd.testing.assert_frame_equal(test_csv, reference_csv) test_csv = read_csv(os.path.join(test_path, 'results_up01.csv.gz')).sort_values('building_id').reset_index()\ .drop(columns=['index']) reference_csv = read_csv(os.path.join(reference_path, 'results_up01.csv.gz')).sort_values('building_id')\ .reset_index().drop(columns=['index']) - pd.testing.assert_frame_equal(test_csv, reference_csv, check_dtype=False) + pd.testing.assert_frame_equal(test_csv, reference_csv) # test parquet files reference_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_results', 'parquet') @@ -209,7 +209,7 @@ def test_combine_files(basic_residential_project_file): .reset_index().drop(columns=['index']) reference_pq = pd.read_parquet(os.path.join(reference_path, 'baseline', 'results_up00.parquet'))\ .sort_values('building_id').reset_index().drop(columns=['index']) - pd.testing.assert_frame_equal(test_pq, reference_pq, check_dtype=False) + pd.testing.assert_frame_equal(test_pq, reference_pq) test_pq = pd.read_parquet(os.path.join(test_path, 'upgrades', 'upgrade=1', 'results_up01.parquet'))\ .sort_values('building_id').reset_index().drop(columns=['index']) @@ -225,13 +225,13 @@ def test_combine_files(basic_residential_project_file): reference_pq = dd.read_parquet(os.path.join(reference_path, 'timeseries', 'upgrade=0'), engine='pyarrow')\ .compute().reset_index() reference_pq['upgrade'] = test_pq['upgrade'] = 0 - pd.testing.assert_frame_equal(test_pq, reference_pq, check_dtype=False) + pd.testing.assert_frame_equal(test_pq, reference_pq) test_pq = test_pq_all[test_pq_all['upgrade'] == 1].copy().reset_index() reference_pq = dd.read_parquet(os.path.join(reference_path, 'timeseries', 'upgrade=1'), engine='pyarrow')\ .compute().reset_index() reference_pq['upgrade'] = test_pq['upgrade'] = 1 - pd.testing.assert_frame_equal(test_pq, reference_pq, check_dtype=False) + pd.testing.assert_frame_equal(test_pq, reference_pq) @patch('buildstockbatch.postprocessing.boto3') @@ -421,7 +421,7 @@ def test_provide_buildstock_csv(basic_residential_project_file, mocker): bsb = LocalBatch(project_filename) sampling_output_csv = bsb.sampler.run_sampling() df2 = read_csv(sampling_output_csv, dtype=str) - pd.testing.assert_frame_equal(df, df2, check_dtype=False) + pd.testing.assert_frame_equal(df, df2) assert (df['Geometry Shared Walls'] == "None").all() # Verify None is being read properly # Test file missing with open(project_filename, 'r') as f: From b911d9ccd1742ef41992aa06bba27dff7c7aa32a Mon Sep 17 00:00:00 2001 From: Noel Merket Date: Thu, 5 Oct 2023 14:57:38 -0600 Subject: [PATCH 13/14] cleaning up changelog --- docs/changelog/changelog_dev.rst | 42 ++++---------------------------- 1 file changed, 5 insertions(+), 37 deletions(-) diff --git a/docs/changelog/changelog_dev.rst b/docs/changelog/changelog_dev.rst index 1550d745..532fadee 100644 --- a/docs/changelog/changelog_dev.rst +++ b/docs/changelog/changelog_dev.rst @@ -15,13 +15,6 @@ Development Changelog directory. ``pullreq`` should be set to the appropriate pull request number and ``tickets`` to any related github issues. These will be automatically linked in the documentation. - .. change:: - :tags: general, feature - :pullreq: 349 - :tickets: 300 - - Remove docker dependency for local runs. - .. change:: :tags: general, bugfix :pullreq: 387 @@ -30,40 +23,15 @@ Development Changelog Removing broken postprocessing tests. .. change:: - :tags: general, bugfix - :pullreq: 355 - :tickets: 352 - - Fix an issue with schedules datatype that was causing the crash of postporcessing at the final step. - - .. change:: - :tags: workflow, feature - :pullreq: 353 - - Avoid unnecessarily validating the HPXML file twice after having slightly changed the ``residential_hpxml`` workflow. - - .. change:: - :tags: validation, feature - :pullreq: 362 - - Enforce Athena database name and table name to follow strict alphanumeric only naming convention. - - .. change:: - :tags: validation, feature - :pullreq: 366 + :tags: bugfix + :pullreq: 386 + :tickets: 256 - Add a references section in the yaml schema to allow defining the anchors at a single place. + No longer automatically downloads the appropriate singularity image from + S3. Also added validation to ensure the image is in the correct location. .. change:: :tags: postprocessing, feature :pullreq: 365 Upload buildstock.csv to S3 during postprocessing - - .. change:: - :tags: bugfix - :pullreq: 386 - :tickets: 256 - - No longer automatically downloads the appropriate singularity image from - S3. Also added validation to ensure the image is in the correct location. From aa4828f602cb49ba96dad32dd7973f819984aeae Mon Sep 17 00:00:00 2001 From: Noel Merket Date: Mon, 9 Oct 2023 17:15:43 -0600 Subject: [PATCH 14/14] using csv_path from the sampler to locate the builstock.csv file --- buildstockbatch/base.py | 4 +++- buildstockbatch/postprocessing.py | 31 ++++++++++++++----------------- buildstockbatch/test/conftest.py | 6 +----- buildstockbatch/test/test_base.py | 17 +++++++++++------ 4 files changed, 29 insertions(+), 29 deletions(-) diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py index b646267d..1cf31033 100644 --- a/buildstockbatch/base.py +++ b/buildstockbatch/base.py @@ -871,7 +871,9 @@ def process_results(self, skip_combine=False, force_upload=False): aws_conf = self.cfg.get('postprocessing', {}).get('aws', {}) if 's3' in aws_conf or force_upload: - s3_bucket, s3_prefix = postprocessing.upload_results(aws_conf, self.output_dir, self.results_dir) + s3_bucket, s3_prefix = postprocessing.upload_results( + aws_conf, self.output_dir, self.results_dir, self.sampler.csv_path + ) if 'athena' in aws_conf: postprocessing.create_athena_tables(aws_conf, os.path.basename(self.output_dir), s3_bucket, s3_prefix) diff --git a/buildstockbatch/postprocessing.py b/buildstockbatch/postprocessing.py index 8962b8b0..9d5e02f0 100644 --- a/buildstockbatch/postprocessing.py +++ b/buildstockbatch/postprocessing.py @@ -592,10 +592,9 @@ def remove_intermediate_files(fs, results_dir, keep_individual_timeseries=False) fs.rm(ts_in_dir, recursive=True) -def upload_results(aws_conf, output_dir, results_dir): +def upload_results(aws_conf, output_dir, results_dir, buildstock_csv_filename): logger.info("Uploading the parquet files to s3") - buildstock_dir = Path(results_dir).parent.joinpath('housing_characteristics') output_folder_name = Path(output_dir).name parquet_dir = Path(results_dir).joinpath('parquet') ts_dir = parquet_dir / 'timeseries' @@ -608,9 +607,6 @@ def upload_results(aws_conf, output_dir, results_dir): all_files.append(file.relative_to(parquet_dir)) for file in [*ts_dir.glob('_common_metadata'), *ts_dir.glob('_metadata')]: all_files.append(file.relative_to(parquet_dir)) - buildstock_csv = '' - for file in buildstock_dir.glob('buildstock.csv'): - buildstock_csv = file.relative_to(buildstock_dir) s3_prefix = aws_conf.get('s3', {}).get('prefix', '').rstrip('/') s3_bucket = aws_conf.get('s3', {}).get('bucket', None) @@ -626,23 +622,24 @@ def upload_results(aws_conf, output_dir, results_dir): logger.error(f"There are already {n_existing_files} files in the s3 folder {s3_bucket}/{s3_prefix_output}.") raise FileExistsError(f"s3://{s3_bucket}/{s3_prefix_output}") - def upload_file(filepath): - full_path = parquet_dir.joinpath(filepath) + def upload_file(filepath, s3key=None): + full_path = filepath if filepath.is_absolute() else parquet_dir.joinpath(filepath) s3 = boto3.resource('s3') bucket = s3.Bucket(s3_bucket) - s3key = Path(s3_prefix_output).joinpath(filepath).as_posix() - bucket.upload_file(str(full_path), str(s3key)) - - def upload_buildstock_csv(filepath): - full_path = buildstock_dir.joinpath(filepath) - s3 = boto3.resource('s3') - bucket = s3.Bucket(s3_bucket) - s3_prefix_output_new = s3_prefix_output+ '/' + 'buildstock_csv' + '/' - s3key = Path(s3_prefix_output_new).joinpath(filepath).as_posix() + if s3key is None: + s3key = Path(s3_prefix_output).joinpath(filepath).as_posix() bucket.upload_file(str(full_path), str(s3key)) tasks = list(map(dask.delayed(upload_file), all_files)) - tasks.append(dask.delayed(upload_buildstock_csv)(buildstock_csv)) + if buildstock_csv_filename is not None: + buildstock_csv_filepath = Path(buildstock_csv_filename) + if buildstock_csv_filepath.exists(): + tasks.append(dask.delayed(upload_file)( + buildstock_csv_filepath, + f"{s3_prefix_output}buildstock_csv/{buildstock_csv_filepath.name}" + )) + else: + logger.warning(f"{buildstock_csv_filename} doesn't exist, can't upload.") dask.compute(tasks) logger.info(f"Upload to S3 completed. The files are uploaded to: {s3_bucket}/{s3_prefix_output}") return s3_bucket, s3_prefix_output diff --git a/buildstockbatch/test/conftest.py b/buildstockbatch/test/conftest.py index b42cca05..5a1240bd 100644 --- a/buildstockbatch/test/conftest.py +++ b/buildstockbatch/test/conftest.py @@ -31,12 +31,8 @@ def _basic_residential_project_file(update_args={}, raw=False): shutil.move(os.path.join(output_directory, 'simulation_output', 'job0.json'), os.path.join(output_directory, 'simulation_output', '..', '..', 'job0.json')) - #os.mkdir(os.path.join(output_directory, 'housing_characteristics')) + os.mkdir(os.path.join(output_directory, 'housing_characteristics')) os.mkdir(os.path.join(buildstock_directory, project_directory, 'housing_characteristics')) - shutil.copytree( - os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_results', 'housing_characteristics'), - os.path.join(Path(output_directory).parent.joinpath('housing_characteristics')) - ) cfg = { 'buildstock_directory': buildstock_directory, 'project_directory': project_directory, diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py index 8658db8a..5c9c0dfe 100644 --- a/buildstockbatch/test/test_base.py +++ b/buildstockbatch/test/test_base.py @@ -20,7 +20,7 @@ from buildstockbatch.local import LocalBatch from buildstockbatch.exc import ValidationError from buildstockbatch.postprocessing import write_dataframe_as_parquet -from buildstockbatch.utils import read_csv +from buildstockbatch.utils import read_csv, ContainerRuntime dask.config.set(scheduler='synchronous') here = os.path.dirname(os.path.abspath(__file__)) @@ -129,10 +129,16 @@ def test_upload_files(mocked_boto3, basic_residential_project_file): mocked_boto3.client = MagicMock(return_value=mocked_glueclient) mocked_boto3.resource().Bucket().objects.filter.side_effect = [[], ['a', 'b', 'c']] project_filename, results_dir = basic_residential_project_file(upload_config) + buildstock_csv_path = Path(results_dir).parent / 'openstudio_buildstock' / 'project_resstock_national' / 'housing_characteristics' / 'buildstock.csv' # noqa: E501 + shutil.copy2( + Path(__file__).parent / 'test_results' / 'housing_characteristics' / 'buildstock.csv', + buildstock_csv_path + ) with patch.object(BuildStockBatchBase, 'weather_dir', None), \ patch.object(BuildStockBatchBase, 'output_dir', results_dir), \ patch.object(BuildStockBatchBase, 'get_dask_client') as get_dask_client_mock, \ - patch.object(BuildStockBatchBase, 'results_dir', results_dir): + patch.object(BuildStockBatchBase, 'results_dir', results_dir), \ + patch.object(BuildStockBatchBase, 'CONTAINER_RUNTIME', ContainerRuntime.LOCAL_OPENSTUDIO): bsb = BuildStockBatchBase(project_filename) bsb.process_results() get_dask_client_mock.assert_called_once() @@ -169,7 +175,6 @@ def test_upload_files(mocked_boto3, basic_residential_project_file): # check if all the files are properly uploaded source_path = os.path.join(results_dir, 'parquet') - buildstock_dir = Path(results_dir).parent.joinpath('housing_characteristics') s3_path = s3_prefix + '/' + OUTPUT_FOLDER_NAME + '/' s3_file_path = s3_path + 'baseline/results_up00.parquet' @@ -203,9 +208,9 @@ def test_upload_files(mocked_boto3, basic_residential_project_file): files_uploaded.remove((source_file_path, s3_file_path)) s3_file_path = s3_path + 'buildstock_csv/buildstock.csv' - source_file_path_tst = os.path.join(buildstock_dir, 'buildstock.csv') - assert (source_file_path_tst, s3_file_path) in files_uploaded - files_uploaded.remove((source_file_path_tst, s3_file_path)) + source_file_path = str(buildstock_csv_path) + assert (source_file_path, s3_file_path) in files_uploaded + files_uploaded.remove((source_file_path, s3_file_path)) assert len(files_uploaded) == 0, f"These files shouldn't have been uploaded: {files_uploaded}"