-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #10 from onefact/chrisgebert-add_dbt_sources
chrisgebert: Add dbt config files and Social Determinants of Health models
- Loading branch information
Showing
7 changed files
with
644 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import os | ||
import requests | ||
from bs4 import BeautifulSoup | ||
from urllib.parse import urljoin | ||
import pandas as pd | ||
|
||
def model(dbt, session): | ||
base_url = "https://www.ahrq.gov" | ||
url = f"{base_url}/sdoh/data-analytics/sdoh-data.html#download" | ||
response = requests.get(url) | ||
soup = BeautifulSoup(response.content, 'html.parser') | ||
|
||
download_dir = "../data/SDOH" | ||
os.makedirs(download_dir, exist_ok=True) | ||
|
||
for link in soup.select('a[href$=".xlsx"]'): | ||
file_url = urljoin(base_url, link['href']) | ||
file_name = file_url.split('/')[-1] | ||
file_path = os.path.join(download_dir, file_name) | ||
|
||
with open(file_path, 'wb') as file: | ||
file.write(requests.get(file_url).content) | ||
|
||
return pd.DataFrame({"status": ["SDOH data downloaded successfully."]}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
{{ config( | ||
materialized = 'external', | ||
location = '../data/sdoh_county.parquet' | ||
) | ||
}} | ||
|
||
{% set sdoh_county_years = [ | ||
'2009', '2010', '2011', '2012', '2013', '2014', | ||
'2015', '2016', '2017', '2018', '2019', '2020'] %} | ||
|
||
{% set relations = [] %} | ||
{% for sdoh_county_year in sdoh_county_years -%} | ||
{% do relations.append(source('social_determinants_of_health', 'sdoh_county_' ~ sdoh_county_year)) %} | ||
{% endfor %} | ||
|
||
with union_unpivot as ( | ||
|
||
{% for relation in relations %} | ||
unpivot {{ relation }} | ||
on columns(* exclude (year, countyfips, statefips, state, county, region, territory)) | ||
into | ||
name survey_variable_name | ||
value survey_score | ||
|
||
{% if not loop.last %} union all {% endif -%} | ||
{% endfor %} | ||
) | ||
|
||
select | ||
{{ dbt_utils.generate_surrogate_key( | ||
['year', 'countyfips', 'county'] | ||
) }} as sdoh_county_key, | ||
* | ||
from union_unpivot |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
{{ config( | ||
materialized = 'external', | ||
location = '../data/sdoh_tract.parquet') }} | ||
|
||
{% set sdoh_tract_years = [ | ||
'2009', '2010', '2011', '2012', '2013', '2014', | ||
'2015', '2016', '2017', '2018', '2019', '2020'] %} | ||
|
||
{% set relations = [] %} | ||
{% for sdoh_tract_year in sdoh_tract_years -%} | ||
{% do relations.append(source('social_determinants_of_health', 'sdoh_tract_' ~ sdoh_tract_year)) %} | ||
{% endfor %} | ||
|
||
with union_unpivot as ( | ||
|
||
{% for relation in relations %} | ||
unpivot {{ relation }} | ||
on columns(* exclude (year, tractfips, countyfips, statefips, state, county, region, territory)) | ||
into | ||
name survey_variable_name | ||
value survey_score | ||
|
||
{% if not loop.last %} union all {% endif -%} | ||
{% endfor %} | ||
) | ||
|
||
select | ||
{{ dbt_utils.generate_surrogate_key( | ||
['year', 'tractfips', 'countyfips', 'county'] | ||
) }} as sdoh_county_key, | ||
* | ||
from union_unpivot |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
{{ config( | ||
materialized = 'external', | ||
location = '../data/sdoh_zipcode.parquet' | ||
) | ||
}} | ||
|
||
{% set sdoh_zipcode_years = [ | ||
'2011', '2012', '2013', '2014', '2015', | ||
'2016', '2017', '2018', '2019', '2020'] %} | ||
|
||
{% set relations = [] %} | ||
{% for sdoh_zipcode_year in sdoh_zipcode_years -%} | ||
{% do relations.append(source('social_determinants_of_health', 'sdoh_zipcode_' ~ sdoh_zipcode_year)) %} | ||
{% endfor %} | ||
|
||
with union_unpivot as ( | ||
|
||
{% for relation in relations %} | ||
unpivot {{ relation }} | ||
on columns(* exclude (year, statefips, zipcode, zcta, state, region, territory, point_zip)) | ||
into | ||
name survey_variable_name | ||
value survey_score | ||
|
||
{%- if not loop.last %} union all {% endif -%} | ||
{% endfor %} | ||
) | ||
|
||
select | ||
{{ dbt_utils.generate_surrogate_key( | ||
['year', 'zcta', 'zipcode'] | ||
) }} as sdoh_zipcode_key, | ||
* | ||
from union_unpivot |
Oops, something went wrong.