From a4c0330c8b298752dc595506e7fe9b78d0eeb239 Mon Sep 17 00:00:00 2001 From: rawanmahdi Date: Sat, 24 Feb 2024 10:43:53 -0500 Subject: [PATCH 1/4] Altered credentials path to be taken in as build argument --- backend/Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index 0507747..448ae9d 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,3 +1,5 @@ +ARG CREDENTIALS_JSON + # Use the official Python 3.11 image from Docker Hub FROM python:3.11.6-slim @@ -22,7 +24,8 @@ COPY pyproject.toml poetry.lock ./ RUN poetry config virtualenvs.create false && poetry install --no-dev # Copy the creds file and compute dir into the container -COPY credentials.json ./ +# COPY credentials.json ./ +COPY ${CREDENTIALS_JSON} ./credentials.json COPY compute ./compute # Copy the FastAPI application into the container From 5b9207b887f738a5e7a2790daf05a1af071c37bc Mon Sep 17 00:00:00 2001 From: rawanmahdi Date: Sat, 24 Feb 2024 10:44:58 -0500 Subject: [PATCH 2/4] Updated workflow to push credentials as build arg to dockerfile --- .github/workflows/docker.yml | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index bc5322b..e80ffae 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -2,13 +2,6 @@ name: "Manually Deploy Docker Files to Artifact Registry" on: workflow_dispatch: - # push: - # branches: - # - 'main' - # paths: - # -'backend/' - # -'frontend/' - # -'machine-learning/Dockerfile' jobs: @@ -28,15 +21,19 @@ jobs: with: credentials_json: ${{ secrets.OWNER_SA_KEY }} + - name: Decode credentials.json + run: echo "${{ secrets.B64_ENCODED_KEY }}" | base64 --decode > docker_credentials.json + - name: Install GCloud CLI uses: google-github-actions/setup-gcloud@v0 - name: Build and Push Backend Docker Image env: GOOGLE_PROJECT: ${{ secrets.PROJECT_ID }} + CREDENTIALS_JSON: ${{ secrets.B64_ENCODED_KEY }} run: | gcloud auth configure-docker us-central1-docker.pkg.dev - docker build -t us-central1-docker.pkg.dev/automate-gdsc/backend-images/backend:latest ./backend + docker build --build-arg CREDENTIALS_JSON=./docker_credentials.json -t us-central1-docker.pkg.dev/automate-gdsc/backend-images/backend:latest ./backend docker push us-central1-docker.pkg.dev/automate-gdsc/backend-images/backend:latest - name: Build and Push Frontend Docker Image From ff717eacc8959945b66190deac084ae8a322b68b Mon Sep 17 00:00:00 2001 From: rawanmahdi Date: Sat, 24 Feb 2024 10:45:36 -0500 Subject: [PATCH 3/4] Updated compose to pass credentials as build argument --- docker-compose.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 2c0a39d..1c844eb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,6 +7,9 @@ services: ports: - '3000:3000' backend-service: - build: ./backend + build: + context: ./backend + args: + - CREDENTIALS_JSON=./backend/credentials.json ports: - '8000:8000' \ No newline at end of file From 55e5b8427380635dd817c4c3a57b66d98d44e773 Mon Sep 17 00:00:00 2001 From: LaeekAhmed Date: Sat, 2 Mar 2024 18:29:52 -0500 Subject: [PATCH 4/4] code cleanup + formatting --- .github/workflows/docker.yml | 5 +- backend/Dockerfile | 5 +- backend/main.py | 76 +----- cloud-infra/k8s/frontend-deployment.yaml | 6 +- docker-compose.yml | 4 +- frontend/src/pages/query.js | 313 ++++++++++++++--------- 6 files changed, 206 insertions(+), 203 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index e80ffae..3c62baa 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -3,7 +3,6 @@ name: "Manually Deploy Docker Files to Artifact Registry" on: workflow_dispatch: - jobs: deploy: env: @@ -22,7 +21,7 @@ jobs: credentials_json: ${{ secrets.OWNER_SA_KEY }} - name: Decode credentials.json - run: echo "${{ secrets.B64_ENCODED_KEY }}" | base64 --decode > docker_credentials.json + run: echo "${{ secrets.B64_ENCODED_KEY }}" | base64 --decode > credentials.json - name: Install GCloud CLI uses: google-github-actions/setup-gcloud@v0 @@ -33,7 +32,7 @@ jobs: CREDENTIALS_JSON: ${{ secrets.B64_ENCODED_KEY }} run: | gcloud auth configure-docker us-central1-docker.pkg.dev - docker build --build-arg CREDENTIALS_JSON=./docker_credentials.json -t us-central1-docker.pkg.dev/automate-gdsc/backend-images/backend:latest ./backend + docker build --build-arg CREDENTIALS_JSON=./credentials.json -t us-central1-docker.pkg.dev/automate-gdsc/backend-images/backend:latest ./backend docker push us-central1-docker.pkg.dev/automate-gdsc/backend-images/backend:latest - name: Build and Push Frontend Docker Image diff --git a/backend/Dockerfile b/backend/Dockerfile index 448ae9d..cd96ecd 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,3 +1,4 @@ +# argument to be passed while running the docker build command ARG CREDENTIALS_JSON # Use the official Python 3.11 image from Docker Hub @@ -24,11 +25,11 @@ COPY pyproject.toml poetry.lock ./ RUN poetry config virtualenvs.create false && poetry install --no-dev # Copy the creds file and compute dir into the container -# COPY credentials.json ./ -COPY ${CREDENTIALS_JSON} ./credentials.json COPY compute ./compute +COPY ${CREDENTIALS_JSON} ./ # Copy the FastAPI application into the container +COPY big_query.py ./ COPY main.py ./ # Specify the command to run the FastAPI application using uvicorn diff --git a/backend/main.py b/backend/main.py index de861cc..9083ee5 100644 --- a/backend/main.py +++ b/backend/main.py @@ -1,6 +1,5 @@ from http.client import HTTPException from google.cloud import storage -from google.cloud import bigquery from starlette.responses import FileResponse from io import BytesIO, StringIO import pandas as pd @@ -8,15 +7,18 @@ from fastapi import FastAPI, File, UploadFile, Form from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware + +# custom functions for EDA and AutoML from compute.autoEDA import generate_eda from compute.autoML import generate_model +from big_query import bq_ops + import csv app = FastAPI() DATA_BUCKET = "automate-ml-datasets" -BQ_DATASET = "automl_dataset_1" GRAPH_BUCKET = "automate_ml_graphs" origins = ["*"] @@ -197,69 +199,11 @@ async def getModel(): return FileResponse(path=model_path, filename=model_path.split("/")[-1], media_type='application/octet-stream') -# get file from bucket, load it to big query as a table & display the rows +# big query operations @app.get("/api/bq") async def bq(fileName, query=None): - - # construct client objects (authorized with the service account json file) - bq_client = bigquery.Client.from_service_account_json("./credentials.json") - storage_client = storage.Client.from_service_account_json("./credentials.json") - - # check if the file name has .csv extension, if not, add it - # if not fileName.endswith('.csv'): - # fileName += '.csv' - - uri = f"gs://{DATA_BUCKET}/{fileName}" - - # if file does not exist in the bucket, return an error - blob = storage_client.get_bucket(DATA_BUCKET).blob(fileName) - if not blob.exists(): - return {"error": f"File {fileName} does not exist in the bucket."} - - fileName = fileName.replace('.csv', '') - table_id = f"{BQ_DATASET}.{fileName}_table" - - # if table does not exist, load it - # try: - # bq_client.get_table(table_id) - # except: - job_config = bigquery.LoadJobConfig( - autodetect=True, # Automatically infer the schema. - source_format=bigquery.SourceFormat.CSV, - skip_leading_rows=1, # column headers - write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, # Overwrite the table - ) - # Make an API request - load_job = bq_client.load_table_from_uri( - uri, table_id, job_config=job_config - ) - # Waits for the job to complete. - load_job.result() - - #------------------------------------------ Query ops ----------------------------------------# - - query = query.upper() if query else None - - # List of potentially harmful operations - harmful_ops = ['DROP', 'DELETE', 'INSERT', 'UPDATE'] - - # Check if the query contains any harmful operations - if query and any(op in query.upper() for op in harmful_ops): - print("\nQuery contains harmful operations!\nusing default query.\n") - final_query = f"SELECT * FROM `{table_id}`" - else: - print("\nQuery is safe to be passed.\n") - # remove everything before the `SELECT` keyword from the received query - query = query[query.find("SELECT"):] if query else None - final_query = query.replace("FROM TABLE", f"FROM `{table_id}`") if query else f"SELECT * FROM `{table_id}`" - print("Final Query:\n", final_query, "\n") - - query_job = bq_client.query(final_query) - rows = query_job.result() - - # display the rows - data = [] - for row in rows: - data.append(dict(row)) - - return {"message": f"Loaded {table_id} with {rows.total_rows} rows.", "data": data} + try: + result = bq_ops(fileName, query) + return result + except Exception as e: + return {"error": f"An error occurred: {str(e)}"} diff --git a/cloud-infra/k8s/frontend-deployment.yaml b/cloud-infra/k8s/frontend-deployment.yaml index 0981472..1b3c4ee 100644 --- a/cloud-infra/k8s/frontend-deployment.yaml +++ b/cloud-infra/k8s/frontend-deployment.yaml @@ -25,6 +25,6 @@ spec: imagePullPolicy: Always ports: - containerPort: 3000 - env: - - name: APP_ENV - value: "production" + # env: + # - name: APP_ENV + # value: "production" diff --git a/docker-compose.yml b/docker-compose.yml index 1c844eb..3dc922e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,8 +2,8 @@ version: '3' services: frontend-service: build: ./frontend - environment: - - APP_ENV=development + # environment: + # - APP_ENV=development ports: - '3000:3000' backend-service: diff --git a/frontend/src/pages/query.js b/frontend/src/pages/query.js index b726c00..13bee22 100644 --- a/frontend/src/pages/query.js +++ b/frontend/src/pages/query.js @@ -1,10 +1,26 @@ import React, { useState, useEffect } from 'react'; import Editor from '@monaco-editor/react'; -import { Table, TableBody, TableCell, TableContainer, TableHead, TableRow, - Paper, Button, ListItemText, ListItemButton, Box, Container, Typography - } from "@mui/material" +import { + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Paper, + Button, + ListItemText, + ListItemButton, + Box, + Container, + Typography, +} from '@mui/material'; -import { PlayCircleOutline, CloudUpload, CloudDownload } from '@mui/icons-material'; +import { + PlayCircleOutline, + CloudUpload, + CloudDownload, +} from '@mui/icons-material'; import CircularProgress from '@mui/material/CircularProgress'; import theme from '@/themes/theme'; @@ -17,15 +33,15 @@ const DataSetListComponent = ({ onSelectDataSet, uploadTrigger }) => { useEffect(() => { // Fetch datasets from /api/datasets and update state const fetchData = async () => { - try { - const res = await fetch("/api/datasets"); - const data = await res.json(); - setDataSets(data.names); - } catch { - console.error("API Endpoint Not Working"); - } - }; - fetchData(); + try { + const res = await fetch('/api/datasets'); + const data = await res.json(); + setDataSets(data.names); + } catch { + console.error('API Endpoint Not Working'); + } + }; + fetchData(); }, [uploadTrigger]); const handleSelectDataSet = (dataSet) => { @@ -33,35 +49,37 @@ const DataSetListComponent = ({ onSelectDataSet, uploadTrigger }) => { onSelectDataSet(dataSet); // This will pass the selected dataset to the parent component }; - return ( - //render the list of selectable datasets - - + return ( + //render the list of selectable datasets + + {dataSets.map((dataSet, idx) => ( - handleSelectDataSet(dataSet)} - > - - + handleSelectDataSet(dataSet)} + > + + ))} - - + + ); }; // Component to display the selected dataset const DataSetDisplayComponent = ({ selectedDataSet }) => { const [data, setData] = useState([{}]); - const [csvString, setCsvString] = useState(""); + const [csvString, setCsvString] = useState(''); useEffect(() => { // Simulate fetching data - console.log("FETCHING DATA FOR", selectedDataSet) + console.log('FETCHING DATA FOR', selectedDataSet); const fetchData = async () => { try { - const res = await fetch(`/api/data?fileName=${encodeURIComponent(selectedDataSet)}`) + const res = await fetch( + `/api/data?fileName=${encodeURIComponent(selectedDataSet)}` + ); if (!res.ok) { throw new Error(`Error: ${res.status}`); } @@ -69,16 +87,15 @@ const DataSetDisplayComponent = ({ selectedDataSet }) => { const data = await res.json(); const jsonObject = data.json; - setCsvString(data.data) + setCsvString(data.data); setData(jsonObject); - } catch (error) { console.error('Failed to fetch data:', error); return null; } }; fetchData(); -}, [selectedDataSet]); + }, [selectedDataSet]); const handleDownload = () => { const blob = new Blob([csvString], { type: 'text/csv' }); @@ -91,95 +108,123 @@ const DataSetDisplayComponent = ({ selectedDataSet }) => { link.click(); // Programmatically click the link to trigger the download URL.revokeObjectURL(url); // Free up memory by releasing the object URL link.remove(); // Remove the link from the document - } - - const [query, setQuery] = useState("-- use `table` for table name, eg:\nSELECT * FROM table LIMIT 2"); + }; + + const [query, setQuery] = useState( + '-- use `table` for table name, eg:\nSELECT * FROM table LIMIT 2' + ); const [loading, setLoading] = useState(false); const [error, setError] = useState(null); - + // calls the `/api/bq` endpoint with the `fileName` and `query` parameters const runQuery = async (query) => { setLoading(true); // for the loading spinner setError(null); // clear any previous errors - + // List of potentially harmful operations const harmfulOps = ['DROP', 'DELETE', 'INSERT', 'UPDATE']; // Check if the query contains any harmful operations - if (harmfulOps.some(op => query.toUpperCase().includes(op))) { + if (harmfulOps.some((op) => query.toUpperCase().includes(op))) { setError('Harmful operations detected'); setLoading(false); return; } - + try { - const res = await fetch(`/api/bq?fileName=${encodeURIComponent(selectedDataSet)}&query=${query}`); + const res = await fetch( + `/api/bq?fileName=${encodeURIComponent(selectedDataSet)}&query=${query}` + ); if (!res.ok) { throw new Error(`Error: ${res.status}`); } - + const response = await res.json(); + // if response contains error key: + if (response.error) { + setError("invalid query"); + return; + } const data = response.data; setData(data); - } catch (error) { console.error('Failed to fetch data:', error); - setError("something went wrong, make sure your query is valid"); // inform the user of the error - + setError('something went wrong, make sure your query is valid'); // inform the user of the error } finally { setLoading(false); // remove the loading spinner } }; - + const headers = data[0] ? Object.keys(data[0]) : []; return ( -
- - - - - - {headers.map((header, index) => ( - {header} - ))} - - - - {data.slice(0, 10).map((item, rowIndex) => ( - - {Object.values(item).map((value, colIndex) => ( - {value} - ))} - - ))} - -
-
- + + + + + {headers.map((header, index) => ( + {header} + ))} + + + + {data.slice(0, 10).map((item, rowIndex) => ( + + {Object.values(item).map((value, colIndex) => ( + {value} + ))} + + ))} + +
+
+ Run Queries on the Dataset: -
+
setQuery(value)} />
{error &&
{error}
} -
); @@ -190,48 +235,45 @@ const MainComponent = () => { const [selectedDataSet, setSelectedDataSet] = useState(null); const [uploadTrigger, setUploadTrigger] = useState(0); - const handleUpload = async (event) => { const file = event.target.files[0]; if (!file) { - console.error("No file selected."); - return; + console.error('No file selected.'); + return; } // Prepare FormData const formData = new FormData(); - formData.append("file", file); - formData.append("fileName", file.name); // Adjust according to how you want to name files on the backend + formData.append('file', file); + formData.append('fileName', file.name); // Adjust according to how you want to name files on the backend // Log FormData contents for debugging for (let [key, value] of formData.entries()) { - console.log(`${key}:`, value); + console.log(`${key}:`, value); } try { - // Make an asynchronous PUT request to your backend - const response = await fetch("/api/upload", { - method: "PUT", - body: formData, // FormData will be correctly interpreted by your backend - }); - - // Assuming your backend responds with JSON - const data = await response.json(); + // Make an asynchronous PUT request to your backend + const response = await fetch('/api/upload', { + method: 'PUT', + body: formData, // FormData will be correctly interpreted by your backend + }); - // Handle response - if (response.ok) { - console.log("Upload successful:", data.message); - setUploadTrigger(trigger => trigger + 1); + // Assuming your backend responds with JSON + const data = await response.json(); - } else { - console.error("Upload failed:", data.error); - } + // Handle response + if (response.ok) { + console.log('Upload successful:', data.message); + setUploadTrigger((trigger) => trigger + 1); + } else { + console.error('Upload failed:', data.error); + } } catch (error) { - console.error("Error during upload:", error); + console.error('Error during upload:', error); } -}; + }; - const handleSelectDataSet = (dataSet) => { setSelectedDataSet(dataSet); }; @@ -242,41 +284,58 @@ const MainComponent = () => { }; return ( - - + Query Datasets -
-
- -
- - -
- -
+
+
+ +
+ + +
+
- {selectedDataSet && ( -
- -
- )} + {selectedDataSet && ( +
+ +
+ )}
); }; -export default MainComponent; \ No newline at end of file +export default MainComponent;