diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index bc5322b..3c62baa 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -2,14 +2,6 @@ name: "Manually Deploy Docker Files to Artifact Registry"
on:
workflow_dispatch:
- # push:
- # branches:
- # - 'main'
- # paths:
- # -'backend/'
- # -'frontend/'
- # -'machine-learning/Dockerfile'
-
jobs:
deploy:
@@ -28,15 +20,19 @@ jobs:
with:
credentials_json: ${{ secrets.OWNER_SA_KEY }}
+ - name: Decode credentials.json
+ run: echo "${{ secrets.B64_ENCODED_KEY }}" | base64 --decode > credentials.json
+
- name: Install GCloud CLI
uses: google-github-actions/setup-gcloud@v0
- name: Build and Push Backend Docker Image
env:
GOOGLE_PROJECT: ${{ secrets.PROJECT_ID }}
+ CREDENTIALS_JSON: ${{ secrets.B64_ENCODED_KEY }}
run: |
gcloud auth configure-docker us-central1-docker.pkg.dev
- docker build -t us-central1-docker.pkg.dev/automate-gdsc/backend-images/backend:latest ./backend
+ docker build --build-arg CREDENTIALS_JSON=./credentials.json -t us-central1-docker.pkg.dev/automate-gdsc/backend-images/backend:latest ./backend
docker push us-central1-docker.pkg.dev/automate-gdsc/backend-images/backend:latest
- name: Build and Push Frontend Docker Image
diff --git a/backend/Dockerfile b/backend/Dockerfile
index 0507747..cd96ecd 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -1,3 +1,6 @@
+# argument to be passed while running the docker build command
+ARG CREDENTIALS_JSON
+
# Use the official Python 3.11 image from Docker Hub
FROM python:3.11.6-slim
@@ -22,10 +25,11 @@ COPY pyproject.toml poetry.lock ./
RUN poetry config virtualenvs.create false && poetry install --no-dev
# Copy the creds file and compute dir into the container
-COPY credentials.json ./
COPY compute ./compute
+COPY ${CREDENTIALS_JSON} ./
# Copy the FastAPI application into the container
+COPY big_query.py ./
COPY main.py ./
# Specify the command to run the FastAPI application using uvicorn
diff --git a/backend/main.py b/backend/main.py
index de861cc..9083ee5 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -1,6 +1,5 @@
from http.client import HTTPException
from google.cloud import storage
-from google.cloud import bigquery
from starlette.responses import FileResponse
from io import BytesIO, StringIO
import pandas as pd
@@ -8,15 +7,18 @@
from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
+
+# custom functions for EDA and AutoML
from compute.autoEDA import generate_eda
from compute.autoML import generate_model
+from big_query import bq_ops
+
import csv
app = FastAPI()
DATA_BUCKET = "automate-ml-datasets"
-BQ_DATASET = "automl_dataset_1"
GRAPH_BUCKET = "automate_ml_graphs"
origins = ["*"]
@@ -197,69 +199,11 @@ async def getModel():
return FileResponse(path=model_path, filename=model_path.split("/")[-1], media_type='application/octet-stream')
-# get file from bucket, load it to big query as a table & display the rows
+# big query operations
@app.get("/api/bq")
async def bq(fileName, query=None):
-
- # construct client objects (authorized with the service account json file)
- bq_client = bigquery.Client.from_service_account_json("./credentials.json")
- storage_client = storage.Client.from_service_account_json("./credentials.json")
-
- # check if the file name has .csv extension, if not, add it
- # if not fileName.endswith('.csv'):
- # fileName += '.csv'
-
- uri = f"gs://{DATA_BUCKET}/{fileName}"
-
- # if file does not exist in the bucket, return an error
- blob = storage_client.get_bucket(DATA_BUCKET).blob(fileName)
- if not blob.exists():
- return {"error": f"File {fileName} does not exist in the bucket."}
-
- fileName = fileName.replace('.csv', '')
- table_id = f"{BQ_DATASET}.{fileName}_table"
-
- # if table does not exist, load it
- # try:
- # bq_client.get_table(table_id)
- # except:
- job_config = bigquery.LoadJobConfig(
- autodetect=True, # Automatically infer the schema.
- source_format=bigquery.SourceFormat.CSV,
- skip_leading_rows=1, # column headers
- write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, # Overwrite the table
- )
- # Make an API request
- load_job = bq_client.load_table_from_uri(
- uri, table_id, job_config=job_config
- )
- # Waits for the job to complete.
- load_job.result()
-
- #------------------------------------------ Query ops ----------------------------------------#
-
- query = query.upper() if query else None
-
- # List of potentially harmful operations
- harmful_ops = ['DROP', 'DELETE', 'INSERT', 'UPDATE']
-
- # Check if the query contains any harmful operations
- if query and any(op in query.upper() for op in harmful_ops):
- print("\nQuery contains harmful operations!\nusing default query.\n")
- final_query = f"SELECT * FROM `{table_id}`"
- else:
- print("\nQuery is safe to be passed.\n")
- # remove everything before the `SELECT` keyword from the received query
- query = query[query.find("SELECT"):] if query else None
- final_query = query.replace("FROM TABLE", f"FROM `{table_id}`") if query else f"SELECT * FROM `{table_id}`"
- print("Final Query:\n", final_query, "\n")
-
- query_job = bq_client.query(final_query)
- rows = query_job.result()
-
- # display the rows
- data = []
- for row in rows:
- data.append(dict(row))
-
- return {"message": f"Loaded {table_id} with {rows.total_rows} rows.", "data": data}
+ try:
+ result = bq_ops(fileName, query)
+ return result
+ except Exception as e:
+ return {"error": f"An error occurred: {str(e)}"}
diff --git a/cloud-infra/k8s/frontend-deployment.yaml b/cloud-infra/k8s/frontend-deployment.yaml
index 0981472..1b3c4ee 100644
--- a/cloud-infra/k8s/frontend-deployment.yaml
+++ b/cloud-infra/k8s/frontend-deployment.yaml
@@ -25,6 +25,6 @@ spec:
imagePullPolicy: Always
ports:
- containerPort: 3000
- env:
- - name: APP_ENV
- value: "production"
+ # env:
+ # - name: APP_ENV
+ # value: "production"
diff --git a/docker-compose.yml b/docker-compose.yml
index 2c0a39d..3dc922e 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,11 +2,14 @@ version: '3'
services:
frontend-service:
build: ./frontend
- environment:
- - APP_ENV=development
+ # environment:
+ # - APP_ENV=development
ports:
- '3000:3000'
backend-service:
- build: ./backend
+ build:
+ context: ./backend
+ args:
+ - CREDENTIALS_JSON=./backend/credentials.json
ports:
- '8000:8000'
\ No newline at end of file
diff --git a/frontend/src/pages/query.js b/frontend/src/pages/query.js
index b726c00..13bee22 100644
--- a/frontend/src/pages/query.js
+++ b/frontend/src/pages/query.js
@@ -1,10 +1,26 @@
import React, { useState, useEffect } from 'react';
import Editor from '@monaco-editor/react';
-import { Table, TableBody, TableCell, TableContainer, TableHead, TableRow,
- Paper, Button, ListItemText, ListItemButton, Box, Container, Typography
- } from "@mui/material"
+import {
+ Table,
+ TableBody,
+ TableCell,
+ TableContainer,
+ TableHead,
+ TableRow,
+ Paper,
+ Button,
+ ListItemText,
+ ListItemButton,
+ Box,
+ Container,
+ Typography,
+} from '@mui/material';
-import { PlayCircleOutline, CloudUpload, CloudDownload } from '@mui/icons-material';
+import {
+ PlayCircleOutline,
+ CloudUpload,
+ CloudDownload,
+} from '@mui/icons-material';
import CircularProgress from '@mui/material/CircularProgress';
import theme from '@/themes/theme';
@@ -17,15 +33,15 @@ const DataSetListComponent = ({ onSelectDataSet, uploadTrigger }) => {
useEffect(() => {
// Fetch datasets from /api/datasets and update state
const fetchData = async () => {
- try {
- const res = await fetch("/api/datasets");
- const data = await res.json();
- setDataSets(data.names);
- } catch {
- console.error("API Endpoint Not Working");
- }
- };
- fetchData();
+ try {
+ const res = await fetch('/api/datasets');
+ const data = await res.json();
+ setDataSets(data.names);
+ } catch {
+ console.error('API Endpoint Not Working');
+ }
+ };
+ fetchData();
}, [uploadTrigger]);
const handleSelectDataSet = (dataSet) => {
@@ -33,35 +49,37 @@ const DataSetListComponent = ({ onSelectDataSet, uploadTrigger }) => {
onSelectDataSet(dataSet); // This will pass the selected dataset to the parent component
};
- return (
- //render the list of selectable datasets
-