Skip to content

Commit

Permalink
Close #4, #5, #6 (#7)
Browse files Browse the repository at this point in the history
  • Loading branch information
aspeddro authored Aug 13, 2024
1 parent 610ff9e commit 9a72037
Show file tree
Hide file tree
Showing 21 changed files with 298 additions and 81 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
run: poetry install

- name: Test
run: poetry run pytest
run: poetry run pytest -s

- name: Ruff check
run: poetry run ruff check .
Expand Down
6 changes: 0 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,6 @@ Pacote para uso da equipe de dados na Base dos Dados.

- [CONTRIBUTING.md](./CONTRIBUTING.md)

## Instalação

```sh
poetry add git+https://github.com/basedosdados/databasers-utils.git
```

## Setup

### Credenciais
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,9 @@ build-backend = "poetry.core.masonry.api"

[tool.ruff]
line-length = 80

[tool.pytest.ini_options]
pythonpath = "src"
addopts = [
"--import-mode=importlib",
]
19 changes: 15 additions & 4 deletions src/databasers_utils/copy_models_from_dev_to_prod.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,30 @@ def search_model_directory_recursive(directory: str) -> Optional[str]:
return search_model_directory_recursive(parent_directory)


def copy_models_from_dev_to_prod(datasets: list[str]) -> None:
dev_models_path = search_model_directory_recursive(os.getcwd())
def copy_models_from_dev_to_prod(
datasets: list[str], dir: str = os.getcwd()
) -> None:
dev_models_path = search_model_directory_recursive(dir)

if dev_models_path is None:
raise Exception("Failed to find model directory")
raise Exception(f"Failed to find model directory at {dir}")

prod_models_dir = dev_models_path.replace(
"queries-basedosdados-dev", "queries-basedosdados"
)

if not os.path.exists(prod_models_dir):
raise Exception(
f"Prod models directory not exists at {prod_models_dir}"
)

# Go to root of queries-basedosdados
root_prod = os.path.dirname(prod_models_dir)

for dataset_id in datasets:
prod_models_dataset_dir = f"{prod_models_dir}/{dataset_id}"
copy_tree(f"{dev_models_path}/{dataset_id}", prod_models_dataset_dir)
update_dbt_project(dataset_id, prod_models_dir)
update_dbt_project(dataset_id, dir=root_prod)
[
change_origin_from_dev_to_staging(file, prod_models_dataset_dir)
for file in os.listdir(prod_models_dataset_dir)
Expand Down
1 change: 1 addition & 0 deletions src/databasers_utils/create_yaml_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ def create_yaml_file(
schema_path = f"{output_path}/schema.yml"

yaml_obj = yaml.YAML(typ="rt")
yaml_obj.explicit_start = True
yaml_obj.indent(mapping=4, sequence=4, offset=2)

if os.path.exists(schema_path):
Expand Down
9 changes: 6 additions & 3 deletions src/databasers_utils/table_architecture.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,16 @@ def create_sql_files(

output_path = f"{models_dir}/{self.dataset_id}"

if not os.path.exists(output_path):
os.makedirs(output_path)

for table_id, url in self.__tables.items():
architecture_df = read_architecture_table(url)

if preprocessed_staging_column_names:
architecture_df["original_name"] = architecture_df["name"]

header = f'{{{{ config(alias="{table_id}", schema="{self.dataset_id}") }}}}'
header = f'{{{{ config(alias="{table_id}", schema="{self.dataset_id}", materialized="table") }}}}'

with open(
f"{output_path}/{self.dataset_id}__{table_id}.sql", "w"
Expand All @@ -80,8 +83,8 @@ def create_sql_files(
print("SQL files created!")
return None

def update_dbt_project(self) -> None:
return update_dbt_project(self.dataset_id, dir=os.getcwd())
def update_dbt_project(self, dir: str = os.getcwd()) -> None:
return update_dbt_project(self.dataset_id, dir=dir)

def upload_columns(
self,
Expand Down
9 changes: 7 additions & 2 deletions src/databasers_utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,13 @@ def get_model_directory(dir: str) -> str:


def update_dbt_project(dataset_id: str, dir: str) -> None:
if ["dbt_project.yml", "dbt_project.yaml"] not in os.listdir(dir):
raise Exception("Failed to find root directory with dbt_project file")
if not any(
file in os.listdir(dir)
for file in ["dbt_project.yml", "dbt_project.yaml"]
):
raise Exception(
f"Failed to find root directory at {dir} with dbt_project file"
)

dbt_project_yaml = f"{dir}/dbt_project.yml"

Expand Down
Empty file.
7 changes: 7 additions & 0 deletions tests/queries-basedosdados-dev/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: dummy

models:
basedosdados:
dummy:
+materialized: table
+schema: dummy
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{ config(alias="gini", schema="br_ibge_pib") }}
{{ config(alias="gini", schema="br_ibge_pib", materialized="table") }}
select
safe_cast(id_uf as string) id_uf,
safe_cast(ano as int64) ano,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{ config(alias="uf", schema="br_ibge_pib") }}
{{ config(alias="uf", schema="br_ibge_pib", materialized="table") }}
select
safe_cast(ano as int64) ano,
safe_cast(id_uf as string) id_uf,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
---
version: 2

models:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import os
import pytest
from databasers_utils import copy_models_from_dev_to_prod

TESTS_DEV_DIR = os.path.join(os.getcwd(), "tests", "queries-basedosdados-dev")


@pytest.mark.dependency(
depends=[
"test_table_architecture.test_create_yaml_file",
"test_table_architecture.test_create_sql_files",
"test_table_architecture.test_update_dbt_project",
]
)
def test_copy_models():
copy_models_from_dev_to_prod(datasets=["br_ibge_pib"], dir=TESTS_DEV_DIR)
113 changes: 113 additions & 0 deletions tests/queries-basedosdados-dev/test_table_architecture.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import os
import pytest
import pandas as pd
import ruamel.yaml as yaml
import sys
from databasers_utils import TableArchitecture

TESTS_DEV_DIR = os.path.join(os.getcwd(), "tests", "queries-basedosdados-dev")


def test_architecture():
table_arch = TableArchitecture(
dataset_id="br_ibge_pib",
tables={
"uf": "https://docs.google.com/spreadsheets/d/12F5NzhOYlN_bi9flLBEdXDWpa5iVakSP4EKm9UoyWuo/edit?usp=drive_link",
"gini": "https://docs.google.com/spreadsheets/d/1K1svie4Gyqe6NnRjBgJbapU5sTsLqXWTQUmTRVIRwQc/edit?usp=drive_link",
},
)

tables = table_arch.tables()
assert "uf" and "gini" in tables.keys()
assert isinstance(tables["uf"], pd.DataFrame)
assert isinstance(tables["gini"], pd.DataFrame)
assert len(tables) == 2


@pytest.mark.dependency()
def test_create_yaml_file():
table_arch = TableArchitecture(
dataset_id="br_ibge_pib",
tables={
"uf": "https://docs.google.com/spreadsheets/d/12F5NzhOYlN_bi9flLBEdXDWpa5iVakSP4EKm9UoyWuo/edit?usp=drive_link",
"gini": "https://docs.google.com/spreadsheets/d/1K1svie4Gyqe6NnRjBgJbapU5sTsLqXWTQUmTRVIRwQc/edit?usp=drive_link",
},
)

os.makedirs(os.path.join(TESTS_DEV_DIR, "models"), exist_ok=True)

table_arch.create_yaml_file(dir=TESTS_DEV_DIR)

assert os.path.exists(
os.path.join(TESTS_DEV_DIR, "models", "br_ibge_pib", "schema.yml")
)


@pytest.mark.dependency()
def test_create_sql_files():
table_arch = TableArchitecture(
dataset_id="br_ibge_pib",
tables={
"uf": "https://docs.google.com/spreadsheets/d/12F5NzhOYlN_bi9flLBEdXDWpa5iVakSP4EKm9UoyWuo/edit?usp=drive_link",
"gini": "https://docs.google.com/spreadsheets/d/1K1svie4Gyqe6NnRjBgJbapU5sTsLqXWTQUmTRVIRwQc/edit?usp=drive_link",
},
)

os.makedirs(os.path.join(TESTS_DEV_DIR, "models"), exist_ok=True)

table_arch.create_sql_files(dir=TESTS_DEV_DIR)

assert os.path.exists(
os.path.join(
TESTS_DEV_DIR, "models", "br_ibge_pib", "br_ibge_pib__gini.sql"
)
)

assert os.path.exists(
os.path.join(
TESTS_DEV_DIR, "models", "br_ibge_pib", "br_ibge_pib__uf.sql"
)
)


@pytest.mark.dependency()
def test_update_dbt_project():
table_arch = TableArchitecture(
dataset_id="br_ibge_pib",
tables={
"uf": "https://docs.google.com/spreadsheets/d/12F5NzhOYlN_bi9flLBEdXDWpa5iVakSP4EKm9UoyWuo/edit?usp=drive_link",
"gini": "https://docs.google.com/spreadsheets/d/1K1svie4Gyqe6NnRjBgJbapU5sTsLqXWTQUmTRVIRwQc/edit?usp=drive_link",
},
)

yaml_obj = yaml.YAML(typ="rt")
yaml_obj.indent(mapping=2, sequence=4, offset=2)

DBT_PROJECT_PATH = os.path.join(TESTS_DEV_DIR, "dbt_project.yml")

with open(DBT_PROJECT_PATH, "r") as file:
content = yaml_obj.load(file)
file.close()

assert content["models"]["basedosdados"].get("br_ibge_pib") is None

table_arch.update_dbt_project(dir=TESTS_DEV_DIR)

yaml_obj_2 = yaml.YAML(typ="rt")
yaml_obj_2.indent(mapping=2, sequence=4, offset=2)

with open(DBT_PROJECT_PATH, "r") as file:
content_updated = yaml_obj_2.load(file)
file.close()

# Inspect changes
yaml_obj_2.dump(content_updated, sys.stdout)

added = content_updated["models"]["basedosdados"]["br_ibge_pib"]
assert added["+materialized"] == "table"
assert added["+schema"] == "br_ibge_pib"

# Restore changes
with open(DBT_PROJECT_PATH, "w") as io:
yaml_obj.dump(content, io)
io.close()
Empty file.
11 changes: 11 additions & 0 deletions tests/queries-basedosdados/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
name: dummy

models:
basedosdados:
br_ibge_pib:
+materialized: table
+schema: br_ibge_pib
dummy:
+materialized: table
+schema: dummy
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{{ config(alias="gini", schema="br_ibge_pib", materialized="table") }}
select
safe_cast(id_uf as string) id_uf,
safe_cast(ano as int64) ano,
safe_cast(gini_pib as int64) gini_pib,
safe_cast(gini_va as int64) gini_va,
safe_cast(gini_va_agro as int64) gini_va_agro,
safe_cast(gini_va_industria as int64) gini_va_industria,
safe_cast(gini_va_servicos as int64) gini_va_servicos,
safe_cast(gini_va_adespss as int64) gini_va_adespss,
from `basedosdados-staging.br_ibge_pib_staging.gini` as t
13 changes: 13 additions & 0 deletions tests/queries-basedosdados/models/br_ibge_pib/br_ibge_pib__uf.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{{ config(alias="uf", schema="br_ibge_pib", materialized="table") }}
select
safe_cast(ano as int64) ano,
safe_cast(id_uf as string) id_uf,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(pib as int64) pib,
safe_cast(impostos_liquidos as int64) impostos_liquidos,
safe_cast(va as int64) va,
safe_cast(va_agropecuaria as int64) va_agropecuaria,
safe_cast(va_industria as int64) va_industria,
safe_cast(va_servicos as int64) va_servicos,
safe_cast(va_adespss as int64) va_adespss,
from `basedosdados-staging.br_ibge_pib_staging.uf` as t
Loading

0 comments on commit 9a72037

Please sign in to comment.