-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1f406ee
commit 9152031
Showing
19 changed files
with
618 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
on: | ||
push: | ||
branches: [ main ] | ||
pull_request: | ||
branches: [ main ] | ||
|
||
name: MLOps AWS | ||
|
||
jobs: | ||
deploy: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v2 | ||
|
||
- name: Install kubectl | ||
uses: azure/setup-kubectl@v2.0 | ||
with: | ||
version: 'v1.24.0' # default is latest stable | ||
id: install | ||
|
||
- name: Configure AWS credentials | ||
uses: aws-actions/configure-aws-credentials@v1 | ||
with: | ||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||
aws-region: us-east-2 | ||
|
||
- name: Login to Amazon ECR | ||
id: login-ecr | ||
uses: aws-actions/amazon-ecr-login@v1 | ||
|
||
- name: Build, tag, and push the image to Amazon ECR | ||
id: build-image | ||
env: | ||
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} | ||
ECR_REPOSITORY: ${{ secrets.REPO_NAME }} | ||
IMAGE_TAG: latest | ||
run: | | ||
# Build a docker container and push it to ECR | ||
docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . | ||
echo "Pushing image to ECR..." | ||
docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG | ||
echo "::set-output name=image::$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
FROM python:3.10-slim-buster | ||
|
||
RUN pip install --upgrade pip | ||
|
||
WORKDIR /app | ||
|
||
COPY . /app | ||
|
||
#set permissions | ||
|
||
RUN chmod +x /app/tests | ||
|
||
RUN chmod +w /app/tests | ||
|
||
RUN chmod +x /app/prediction_model | ||
|
||
RUN chmod +w /app/prediction_model/trained_models | ||
|
||
RUN chmod +w /app/prediction_model/datasets | ||
|
||
|
||
ENV PYTHONPATH "${PYTHONPATH}:/app/prediction_model" | ||
|
||
|
||
RUN pip install --no-cache-dir -r requirements.txt | ||
|
||
RUN pip install dvc[s3] | ||
|
||
RUN dvc pull | ||
|
||
RUN python /app/prediction_model/training_pipeline.py | ||
|
||
RUN pytest -v /app/tests/test_prediction.py | ||
|
||
RUN pytest --junitxml=/app/tests/test-results.xml /app/tests/test_prediction.py | ||
|
||
EXPOSE 8005 | ||
|
||
ENTRYPOINT ["python"] | ||
|
||
CMD ["main.py"] | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
from fastapi import FastAPI , File, UploadFile | ||
from fastapi.responses import StreamingResponse | ||
from pydantic import BaseModel | ||
import uvicorn | ||
import numpy as np | ||
import pandas as pd | ||
from fastapi.middleware.cors import CORSMiddleware | ||
from prediction_model.predict import generate_predictions,generate_predictions_batch | ||
from prediction_model.config import config | ||
import mlflow | ||
import io | ||
import boto3 | ||
from datetime import datetime | ||
|
||
|
||
def upload_to_s3(file_content, filename): | ||
s3 = boto3.client('s3') | ||
|
||
current_date = datetime.now().strftime("%Y-%m-%d") | ||
if filename.endswith('.csv'): | ||
filename = filename[:-4] | ||
|
||
current_datetime = datetime.now().strftime("%Y-%m-%d_%H:%M:%S") | ||
|
||
folder_path = f"{config.FOLDER}/{current_date}" | ||
|
||
filename_with_datetime = f"{filename}_{current_datetime}.csv" | ||
|
||
s3_key = f"{folder_path}/{filename_with_datetime}" | ||
|
||
response = s3.put_object(Bucket=config.S3_BUCKET, Key=s3_key, Body=file_content) | ||
|
||
return s3_key | ||
|
||
# mlflow.set_tracking_uri("http://localhost:5000") | ||
|
||
mlflow.set_tracking_uri(config.TRACKING_URI) | ||
|
||
app = FastAPI( | ||
title="Loan Prediction App using FastAPI - MLOps", | ||
description = "MLOps Demo", | ||
version='1.0' | ||
) | ||
|
||
origins=[ | ||
"*" | ||
] | ||
|
||
app.add_middleware( | ||
CORSMiddleware, | ||
allow_origins=origins, | ||
allow_credentials=True, | ||
allow_methods=["*"], | ||
allow_headers=["*"] | ||
) | ||
|
||
class LoanPrediction(BaseModel): | ||
Gender: str | ||
Married: str | ||
Dependents: str | ||
Education: str | ||
Self_Employed: str | ||
ApplicantIncome: float | ||
CoapplicantIncome: float | ||
LoanAmount: float | ||
Loan_Amount_Term: float | ||
Credit_History: float | ||
Property_Area: str | ||
|
||
|
||
@app.get("/") | ||
def index(): | ||
return {"message":"Welcome to Loan Prediction App using API - CI CD Jenkins" } | ||
|
||
@app.post("/prediction_api") | ||
def predict(loan_details: LoanPrediction): | ||
data = loan_details.model_dump() | ||
prediction = generate_predictions([data])["prediction"][0] | ||
if prediction == "Y": | ||
pred = "Approved" | ||
else: | ||
pred = "Rejected" | ||
return {"status":pred} | ||
|
||
@app.post("/prediction_ui") | ||
def predict_gui(Gender: str, | ||
Married: str, | ||
Dependents: str, | ||
Education: str, | ||
Self_Employed: str, | ||
ApplicantIncome: float, | ||
CoapplicantIncome: float, | ||
LoanAmount: float, | ||
Loan_Amount_Term: float, | ||
Credit_History: float, | ||
Property_Area: str): | ||
|
||
input_data = [Gender, Married,Dependents, Education, Self_Employed,ApplicantIncome, | ||
CoapplicantIncome,LoanAmount, Loan_Amount_Term,Credit_History, Property_Area ] | ||
|
||
cols = ['Gender', 'Married', 'Dependents', 'Education', | ||
'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', | ||
'Loan_Amount_Term', 'Credit_History', 'Property_Area'] | ||
|
||
data_dict = dict(zip(cols,input_data)) | ||
prediction = generate_predictions([data_dict])["prediction"][0] | ||
if prediction == "Y": | ||
pred = "Approved" | ||
else: | ||
pred = "Rejected" | ||
return {"status":pred} | ||
|
||
|
||
@app.post("/batch_prediction") | ||
async def batch_predict(file: UploadFile = File(...)): | ||
|
||
content = await file.read() | ||
df = pd.read_csv(io.BytesIO(content),index_col=False) | ||
print(df) | ||
|
||
# Ensure the CSV file contains the required features | ||
required_columns = config.FEATURES | ||
if not all(column in df.columns for column in required_columns): | ||
return {"error": "CSV file does not contain the required columns."} | ||
|
||
predictions = generate_predictions_batch(df)["prediction"] | ||
|
||
df['Prediction'] = predictions | ||
result = df.to_csv(index=False) | ||
|
||
s3_key = upload_to_s3(result.encode('utf-8'), file.filename) | ||
|
||
return StreamingResponse(io.BytesIO(result.encode('utf-8')), media_type="text/csv", headers={"Content-Disposition":"attachment; filename=predictions.csv"}) | ||
|
||
|
||
|
||
|
||
if __name__== "__main__": | ||
uvicorn.run(app, host="0.0.0.0",port=8005) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
1.0.0 |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import pathlib | ||
import os | ||
|
||
|
||
current_directory = os.path.dirname(os.path.realpath(__file__)) #current directory of the script | ||
|
||
PACKAGE_ROOT = os.path.dirname(current_directory) #parent directory of current directory | ||
|
||
|
||
# PACKAGE_ROOT = pathlib.Path(prediction_model.__file__).resolve().parent | ||
|
||
DATAPATH = os.path.join(PACKAGE_ROOT,"datasets") | ||
|
||
TRAIN_FILE = 'train.csv' | ||
TEST_FILE = 'test.csv' | ||
|
||
TARGET = 'Loan_Status' | ||
|
||
#Final features used in the model | ||
FEATURES = ['Gender', 'Married', 'Dependents', 'Education', | ||
'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', | ||
'Loan_Amount_Term', 'Credit_History', 'Property_Area'] | ||
|
||
NUM_FEATURES = ['ApplicantIncome', 'LoanAmount', 'Loan_Amount_Term'] | ||
|
||
CAT_FEATURES = ['Gender', | ||
'Married', | ||
'Dependents', | ||
'Education', | ||
'Self_Employed', | ||
'Credit_History', | ||
'Property_Area'] | ||
|
||
# in our case it is same as Categorical features | ||
FEATURES_TO_ENCODE = ['Gender', | ||
'Married', | ||
'Dependents', | ||
'Education', | ||
'Self_Employed', | ||
'Credit_History', | ||
'Property_Area'] | ||
|
||
FEATURE_TO_MODIFY = ['ApplicantIncome'] | ||
FEATURE_TO_ADD = 'CoapplicantIncome' | ||
|
||
DROP_FEATURES = ['CoapplicantIncome'] | ||
|
||
LOG_FEATURES = ['ApplicantIncome', 'LoanAmount'] # taking log of numerical columns | ||
|
||
S3_BUCKET = "loanprediction" | ||
|
||
FOLDER="datadrift" | ||
|
||
TRACKING_URI="http://ec2-3-19-244-223.us-east-2.compute.amazonaws.com:5000/" | ||
|
||
|
||
EXPERIMENT_NAME="loan_prediction_model" | ||
|
||
MODEL_NAME="/Loanprediction-model" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from sklearn.pipeline import Pipeline | ||
from prediction_model.config import config | ||
import prediction_model.processing.preprocessing as pp | ||
from sklearn.preprocessing import MinMaxScaler | ||
|
||
|
||
|
||
|
||
|
||
preprocessing_pipeline = Pipeline( | ||
[ | ||
('DomainProcessing',pp.DomainProcessing(variable_to_modify = config.FEATURE_TO_MODIFY, | ||
variable_to_add = config.FEATURE_TO_ADD)), | ||
('MeanImputation', pp.MeanImputer(variables=config.NUM_FEATURES)), | ||
('ModeImputation',pp.ModeImputer(variables=config.CAT_FEATURES)), | ||
('DropFeatures', pp.DropColumns(variables_to_drop=config.DROP_FEATURES)), | ||
('LabelEncoder',pp.CustomLabelEncoder(variables=config.FEATURES_TO_ENCODE)), | ||
('LogTransform',pp.LogTransforms(variables=config.LOG_FEATURES)), | ||
('MinMaxScale', MinMaxScaler()) | ||
] | ||
) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import pandas as pd | ||
import numpy as np | ||
from prediction_model.config import config | ||
import mlflow | ||
|
||
|
||
|
||
def generate_predictions(data_input): | ||
data = pd.DataFrame(data_input) | ||
experiment_name = config.EXPERIMENT_NAME | ||
experiment = mlflow.get_experiment_by_name(experiment_name) | ||
experiment_id = experiment.experiment_id | ||
runs_df=mlflow.search_runs(experiment_ids=experiment_id,order_by=['metrics.f1_score DESC']) | ||
best_run=runs_df.iloc[0] | ||
best_run_id=best_run['run_id'] | ||
best_model='runs:/' + best_run_id + config.MODEL_NAME | ||
loan_prediction_model=mlflow.sklearn.load_model(best_model) | ||
prediction=loan_prediction_model.predict(data) | ||
output = np.where(prediction==1,'Y','N') | ||
result = {"prediction":output} | ||
return result | ||
|
||
|
||
def generate_predictions_batch(data_input): | ||
# data = pd.DataFrame(data_input) | ||
experiment_name = config.EXPERIMENT_NAME | ||
experiment = mlflow.get_experiment_by_name(experiment_name) | ||
experiment_id = experiment.experiment_id | ||
runs_df=mlflow.search_runs(experiment_ids=experiment_id,order_by=['metrics.f1_score DESC']) | ||
best_run=runs_df.iloc[0] | ||
best_run_id=best_run['run_id'] | ||
best_model='runs:/' + best_run_id + config.MODEL_NAME | ||
loan_prediction_model=mlflow.sklearn.load_model(best_model) | ||
prediction=loan_prediction_model.predict(data_input) | ||
output = np.where(prediction==1,'Y','N') | ||
result = {"prediction":output} | ||
return result | ||
|
||
|
||
|
||
|
||
|
||
if __name__=='__main__': | ||
generate_predictions() |
Binary file added
BIN
+1.65 KB
prediction_model/processing/__pycache__/data_handling.cpython-311.pyc
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+6.27 KB
prediction_model/processing/__pycache__/preprocessing.cpython-311.pyc
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
import os | ||
import pandas as pd | ||
from prediction_model.config import config | ||
|
||
#Load the dataset | ||
def load_dataset(file_name): | ||
filepath = os.path.join(config.DATAPATH,file_name) | ||
_data = pd.read_csv(filepath) | ||
return _data | ||
|
Oops, something went wrong.