Skip to content

Commit

Permalink
Merge pull request #29 from SalatielBairros/api-adjustments
Browse files Browse the repository at this point in the history
Api adjustments
  • Loading branch information
SalatielBairros authored Aug 11, 2022
2 parents 87996cd + fd33df4 commit 73dcd0b
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 9 deletions.
89 changes: 84 additions & 5 deletions app/api/controllers/ingestion/data_ingestion_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,97 @@ def evaluate_model(response: Response):
return {"message": "Data ingestion failed", "error": str(e)}

@router.get("/ingested/processed-data")
def get_processed_data(response: Response):
try:
return _repository.get_processed_dataset().to_dict(orient='records')
def get_processed_data(columns:str, response: Response):
try:
processed_dataset = _repository.get_processed_dataset()
columns_list = __get_columns__(columns)
if(columns_list is not None):
processed_dataset = processed_dataset[columns_list]
return processed_dataset.to_dict(orient='records')
except Exception as e:
response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
return {"message": "Error to get processed data", "error": str(e)}

@router.get("/ingested/pandemic-data")
def get_pandemic_data(response: Response):
try:
repository = LocalStorageRepository()
return repository.get_pandemic_dataset().to_dict(orient='records')
return _repository.get_pandemic_dataset().to_dict(orient='records')
except Exception as e:
response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
return {"message": "Pandemic data ingestion failed", "error": str(e)}

@router.get("/ingested/processed-data/grouped")
def get_grouped_data(group_by:str, columns:str, response: Response, country:str=None, region:str=None):
try:
processed_dataset = _repository.get_processed_dataset()
if(region is not None):
processed_dataset = processed_dataset[processed_dataset["region"] == region]
if(country is not None):
processed_dataset = processed_dataset[processed_dataset["country"] == country]

grouped_data = processed_dataset.groupby(by=group_by).mean().reset_index().sort_values(by=group_by, ascending=True)
columns_list = __get_columns__(columns)
if(columns_list is not None):
grouped_data = grouped_data[columns_list]
return grouped_data.to_dict(orient='records')
except Exception as e:
response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
return {"message": "Error to get grouped data", "error": str(e)}

@router.get("/ingested/processed-data/countries")
def get_countries(response: Response):
try:
countries = _repository.get_processed_dataset()['country'].unique().tolist()
return {
'data': countries
}
except Exception as e:
response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
return {"message": "Error to get countries", "error": str(e)}

@router.get("/ingested/processed-data/regions")
def get_regions(response: Response):
try:
regions = _repository.get_processed_dataset()['region'].unique().tolist()
return {
'data': regions
}
except Exception as e:
response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
return {"message": "Error to get regions", "error": str(e)}

@router.get("/ingested/processed-data/correlations")
def get_correlations(columns:str, response: Response):
try:
processed_dataset = _repository.get_processed_dataset()
columns_list = __get_columns__(columns)
if(columns_list is not None):
processed_dataset = processed_dataset[columns_list]
correlations = processed_dataset.corr()
return correlations.to_dict(orient='list')
except Exception as e:
response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
return {"message": "Error to get correlations", "error": str(e)}

@router.get("/ingested/processed-data/pandemic/grouped")
def get_pandemic_grouped_data(group_by:str, columns:str, response: Response, country:str=None, region:str=None):
try:
processed_dataset = _repository.get_pandemic_dataset()
if(region is not None):
processed_dataset = processed_dataset[processed_dataset["region"] == region]
if(country is not None):
processed_dataset = processed_dataset[processed_dataset["country"] == country]

grouped_data = processed_dataset.groupby(by=group_by).mean().reset_index().sort_values(by=group_by, ascending=True)
columns_list = __get_columns__(columns)
if(columns_list is not None):
grouped_data = grouped_data[columns_list]
return grouped_data.to_dict(orient='records')
except Exception as e:
response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
return {"message": "Error to get pandemic grouped data", "error": str(e)}

def __get_columns__(columns:str) -> list[str]:
if(columns is not None and len(columns.strip()) > 0):
return columns.split(",")
return None
6 changes: 6 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from app.api.controllers.region_classification.rf_region_controller import router as rf_region_router
from app.api.controllers.region_classification.knn_region_controller import router as knn_region_router
from fastapi.openapi.utils import get_openapi
from fastapi.middleware.cors import CORSMiddleware

prepare_environment()
app = FastAPI()
Expand All @@ -26,4 +27,9 @@
"name": "Salatiel Bairros",
"email": "salatiel.costabairros@gmail.com"
}
)

app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
)
16 changes: 14 additions & 2 deletions services/classification_model_evaluation_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from eli5.sklearn import PermutationImportance
import numpy as np

classification_evaluation_cache = {}

class ClassificationModelEvaluationService:
def __init__(self, model: BaseLearningModel) -> None:
self.model = model
Expand All @@ -19,6 +21,10 @@ def __init__(self, model: BaseLearningModel) -> None:
self.columns = []

def evaluate(self, train_data: pd.DataFrame = None) -> ClassificationModelEvaluationData:
model_name = self.model.get_model_name()
if(model_name in classification_evaluation_cache):
return classification_evaluation_cache[model_name]

if(train_data is None):
train_data = self.repository.get_processed_dataset().drop(columns=self.drop_from_processed_dataset)

Expand All @@ -32,9 +38,14 @@ def evaluate(self, train_data: pd.DataFrame = None) -> ClassificationModelEvalua
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

return self.__get_metrics__(y_test, y_pred, x_test, model)
classification_evaluation_cache[model_name] = self.__get_metrics__(y_test, y_pred, x_test, model)
return classification_evaluation_cache[model_name]

def evaluate_augmentaded_data(self, balanced_dataset: pd.DataFrame = None) -> ClassificationModelEvaluationResponse:
model_name = f'balanced_{self.model.get_model_name()}'
if(model_name in classification_evaluation_cache):
return classification_evaluation_cache[model_name]

if(balanced_dataset is None):
balanced_dataset = self.repository.get_augmented_dataset()

Expand All @@ -57,9 +68,10 @@ def evaluate_augmentaded_data(self, balanced_dataset: pd.DataFrame = None) -> Cl
test_metrics = self.__get_metrics__(y_test, y_test_pred, x_test, test_model)
validation_metrics = self.__get_metrics__(y_validation, y_validation_pred, x_validation, validation_model)

return ClassificationModelEvaluationResponse(
classification_evaluation_cache[model_name] = ClassificationModelEvaluationResponse(
test_data_evaluation=test_metrics,
validation_data_evaluation=validation_metrics)
return classification_evaluation_cache[model_name]

def __get_metrics__(self, y_real, y_pred, x_test, model) -> ClassificationModelEvaluationData:
accuracy = metrics.accuracy_score(y_real, y_pred)
Expand Down
12 changes: 10 additions & 2 deletions services/regression_model_evaluation_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,31 @@
from sklearn import metrics
import numpy as np
from environment.constants import EnvironmentVariables
from lib.memo_cache import memo
from models.base_learning_model import BaseLearningModel
from repository.local_storage_repository import LocalStorageRepository
from eli5.sklearn import PermutationImportance
from environment.constants import DatasetConstants

regression_evaluation_cache = {}

class RegressionModelEvaluationService:
def __init__(self, model: BaseLearningModel) -> None:
self.columns_to_drop_x = DatasetConstants.COLUMNS_TO_DROP_TO_TRAINING
self.model = model
self.repository = LocalStorageRepository()

def evaluate(self) -> list[RegressionModelEvaluationResponse]:
model_name = self.model.get_model_name()
if(model_name in regression_evaluation_cache):
return regression_evaluation_cache[model_name]

dataset = self.repository.get_processed_dataset()
if(dataset is None):
raise Exception('Dataset is empty')

return self.__year_cross_validation__(dataset)
regression_evaluation_cache[model_name] = self.__year_cross_validation__(dataset)
return regression_evaluation_cache[model_name]

def __get_metrics__(self, y_real, y_pred, x_test, model, year) -> RegressionModelEvaluationResponse:
r2 = metrics.r2_score(y_real, y_pred)
Expand Down Expand Up @@ -51,7 +59,7 @@ def __year_cross_validation__(self, dataset: pd.DataFrame):
to_test = dataset.query(f'year == {year}')
X = to_train.drop(self.columns_to_drop_x, axis=1).drop(columns=[self.model.target_column])
y = to_train[self.model.target_column]
X_test = to_test.drop(self.columns_to_drop_x, axis=1)
X_test = to_test.drop(self.columns_to_drop_x, axis=1).drop(columns=[self.model.target_column])
y_test = to_test[self.model.target_column]

model = self.model.get_model()
Expand Down

0 comments on commit 73dcd0b

Please sign in to comment.