Skip to content

Commit

Permalink
Merge pull request #10 from onefact/chrisgebert-add_dbt_sources
Browse files Browse the repository at this point in the history
chrisgebert: Add dbt config files and Social Determinants of Health models
  • Loading branch information
jaanli authored Jun 2, 2024
2 parents ff180ff + 3bd11dc commit 2f4f5b7
Show file tree
Hide file tree
Showing 7 changed files with 644 additions and 0 deletions.
24 changes: 24 additions & 0 deletions data_processing/models/ahrq.gov/sdoh/download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import pandas as pd

def model(dbt, session):
base_url = "https://www.ahrq.gov"
url = f"{base_url}/sdoh/data-analytics/sdoh-data.html#download"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

download_dir = "../data/SDOH"
os.makedirs(download_dir, exist_ok=True)

for link in soup.select('a[href$=".xlsx"]'):
file_url = urljoin(base_url, link['href'])
file_name = file_url.split('/')[-1]
file_path = os.path.join(download_dir, file_name)

with open(file_path, 'wb') as file:
file.write(requests.get(file_url).content)

return pd.DataFrame({"status": ["SDOH data downloaded successfully."]})
34 changes: 34 additions & 0 deletions data_processing/models/ahrq.gov/sdoh/sdoh_county.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{{ config(
materialized = 'external',
location = '../data/sdoh_county.parquet'
)
}}

{% set sdoh_county_years = [
'2009', '2010', '2011', '2012', '2013', '2014',
'2015', '2016', '2017', '2018', '2019', '2020'] %}

{% set relations = [] %}
{% for sdoh_county_year in sdoh_county_years -%}
{% do relations.append(source('social_determinants_of_health', 'sdoh_county_' ~ sdoh_county_year)) %}
{% endfor %}

with union_unpivot as (

{% for relation in relations %}
unpivot {{ relation }}
on columns(* exclude (year, countyfips, statefips, state, county, region, territory))
into
name survey_variable_name
value survey_score

{% if not loop.last %} union all {% endif -%}
{% endfor %}
)

select
{{ dbt_utils.generate_surrogate_key(
['year', 'countyfips', 'county']
) }} as sdoh_county_key,
*
from union_unpivot
32 changes: 32 additions & 0 deletions data_processing/models/ahrq.gov/sdoh/sdoh_tract.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{{ config(
materialized = 'external',
location = '../data/sdoh_tract.parquet') }}

{% set sdoh_tract_years = [
'2009', '2010', '2011', '2012', '2013', '2014',
'2015', '2016', '2017', '2018', '2019', '2020'] %}

{% set relations = [] %}
{% for sdoh_tract_year in sdoh_tract_years -%}
{% do relations.append(source('social_determinants_of_health', 'sdoh_tract_' ~ sdoh_tract_year)) %}
{% endfor %}

with union_unpivot as (

{% for relation in relations %}
unpivot {{ relation }}
on columns(* exclude (year, tractfips, countyfips, statefips, state, county, region, territory))
into
name survey_variable_name
value survey_score

{% if not loop.last %} union all {% endif -%}
{% endfor %}
)

select
{{ dbt_utils.generate_surrogate_key(
['year', 'tractfips', 'countyfips', 'county']
) }} as sdoh_county_key,
*
from union_unpivot
34 changes: 34 additions & 0 deletions data_processing/models/ahrq.gov/sdoh/sdoh_zipcode.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{{ config(
materialized = 'external',
location = '../data/sdoh_zipcode.parquet'
)
}}

{% set sdoh_zipcode_years = [
'2011', '2012', '2013', '2014', '2015',
'2016', '2017', '2018', '2019', '2020'] %}

{% set relations = [] %}
{% for sdoh_zipcode_year in sdoh_zipcode_years -%}
{% do relations.append(source('social_determinants_of_health', 'sdoh_zipcode_' ~ sdoh_zipcode_year)) %}
{% endfor %}

with union_unpivot as (

{% for relation in relations %}
unpivot {{ relation }}
on columns(* exclude (year, statefips, zipcode, zcta, state, region, territory, point_zip))
into
name survey_variable_name
value survey_score

{%- if not loop.last %} union all {% endif -%}
{% endfor %}
)

select
{{ dbt_utils.generate_surrogate_key(
['year', 'zcta', 'zipcode']
) }} as sdoh_zipcode_key,
*
from union_unpivot
Loading

0 comments on commit 2f4f5b7

Please sign in to comment.