From 512024e8804df8d6323de3d6818b625f087d42f0 Mon Sep 17 00:00:00 2001 From: andrewpeng02 Date: Tue, 9 Apr 2024 15:16:39 -0400 Subject: [PATCH] upload train results to s3, add endpoint to get train results, frontend request from endpoint --- dlp-terraform/ecs/s3.tf | 15 + dlp-terraform/ecs/sqs.tf | 9 + .../src/features/Train/redux/trainspaceApi.ts | 8 + .../src/features/Train/types/trainTypes.ts | 49 ++- frontend/src/pages/train/[train_space_id].tsx | 400 +++++++----------- training/poetry.lock | 285 ++++++++++++- training/pyproject.toml | 1 + training/tests/test_imports.py | 10 +- training/tests/test_loss_function.py | 2 +- training/tests/test_model.py | 2 +- ...est_sk_learn_default_dataset_train_test.py | 2 +- training/training/celery_app.py | 15 + training/training/celeryconfig.py | 11 + training/training/core/authenticator.py | 6 +- training/training/core/celery/__init__.py | 0 .../training/core/{ => celery}/criterion.py | 0 .../training/core/{ => celery}/dataset.py | 0 .../training/core/{ => celery}/dl_model.py | 0 .../training/core/{ => celery}/optimizer.py | 0 .../training/core/{ => celery}/trainer.py | 2 +- training/training/core/celery/worker.py | 226 ++++++++++ .../routes/datasets/default/columns.py | 3 +- training/training/routes/image/image.py | 45 +- training/training/routes/tabular/tabular.py | 67 +-- .../routes/training/results/__init__.py | 0 .../routes/training/results/results.py | 41 ++ .../routes/training/results/schemas.py | 67 +++ training/training/urls.py | 2 + 28 files changed, 912 insertions(+), 356 deletions(-) create mode 100644 dlp-terraform/ecs/s3.tf create mode 100644 dlp-terraform/ecs/sqs.tf create mode 100644 training/training/celery_app.py create mode 100644 training/training/celeryconfig.py create mode 100644 training/training/core/celery/__init__.py rename training/training/core/{ => celery}/criterion.py (100%) rename training/training/core/{ => celery}/dataset.py (100%) rename training/training/core/{ => celery}/dl_model.py (100%) rename training/training/core/{ => celery}/optimizer.py (100%) rename training/training/core/{ => celery}/trainer.py (99%) create mode 100644 training/training/core/celery/worker.py create mode 100644 training/training/routes/training/results/__init__.py create mode 100644 training/training/routes/training/results/results.py create mode 100644 training/training/routes/training/results/schemas.py diff --git a/dlp-terraform/ecs/s3.tf b/dlp-terraform/ecs/s3.tf new file mode 100644 index 000000000..2631fc1d5 --- /dev/null +++ b/dlp-terraform/ecs/s3.tf @@ -0,0 +1,15 @@ +resource "aws_s3_bucket" "s3bucket_executions" { + bucket = "dlp-executions" + + tags = { + Name = "Execution data" + } +} +resource "aws_s3_bucket_public_access_block" "access_block_uploads" { + bucket = aws_s3_bucket.s3bucket_executions.id + + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} diff --git a/dlp-terraform/ecs/sqs.tf b/dlp-terraform/ecs/sqs.tf new file mode 100644 index 000000000..0e64d39f0 --- /dev/null +++ b/dlp-terraform/ecs/sqs.tf @@ -0,0 +1,9 @@ +resource "aws_sqs_queue" "training_queue" { + name = "training-queue.fifo" + fifo_queue = true + message_retention_seconds = 60*24 +} + +output "sqs_queue_url" { + value = aws_sqs_queue.training_queue.url +} \ No newline at end of file diff --git a/frontend/src/features/Train/redux/trainspaceApi.ts b/frontend/src/features/Train/redux/trainspaceApi.ts index e9dad9959..d502e2fab 100644 --- a/frontend/src/features/Train/redux/trainspaceApi.ts +++ b/frontend/src/features/Train/redux/trainspaceApi.ts @@ -2,6 +2,7 @@ import { backendApi } from "@/common/redux/backendApi"; import { DATA_SOURCE, DatasetData, + DetailedTrainResultsData, FileUploadData, } from "@/features/Train/types/trainTypes"; import { fetchBaseQuery } from "@reduxjs/toolkit/dist/query"; @@ -10,6 +11,12 @@ const trainspaceApi = backendApi .enhanceEndpoints({ addTagTypes: ["UserDatasetFilesData"] }) .injectEndpoints({ endpoints: (builder) => ({ + getTrainResultsData: builder.query({ + query: ( {trainspaceId}) => ({ + url: `/api/training/training/results/${trainspaceId}` + }) + } + ), getDatasetFilesData: builder.query< FileUploadData[], { dataSource: DATA_SOURCE } @@ -95,6 +102,7 @@ const trainspaceApi = backendApi }); export const { + useGetTrainResultsDataQuery, useGetDatasetFilesDataQuery, useUploadDatasetFileMutation, useLazyGetColumnsFromDatasetQuery, diff --git a/frontend/src/features/Train/types/trainTypes.ts b/frontend/src/features/Train/types/trainTypes.ts index 886c796db..20f8e0bcf 100644 --- a/frontend/src/features/Train/types/trainTypes.ts +++ b/frontend/src/features/Train/types/trainTypes.ts @@ -1,5 +1,6 @@ import { DATA_SOURCE_ARR } from "../constants/trainConstants"; +// keep in sync with worker.py export type DATA_SOURCE = typeof DATA_SOURCE_ARR[number]; export type TRAIN_STATUS = @@ -16,9 +17,10 @@ export interface BaseTrainspaceData { step: number; } +// basic information, used on dashboard export interface TrainResultsData { name: string; - trainspaceId: number; + trainspaceId: string; dataSource: DATA_SOURCE; status: TRAIN_STATUS; created: Date; @@ -26,6 +28,51 @@ export interface TrainResultsData { uid: string; } +export type CHART_TYPE = "LINE" | "AUC/ROC" | "CONFUSION_MATRIX" + +export type Chart = TimeSeriesChart | AucRocChart | ConfusionMatrixChart + +export interface TimeSeriesMetric { + x_name: string; + y_name: string; + + x_values: number[]; + y_values: number[]; +} + +export interface TimeSeriesChart { + name: string; + + time_series: TimeSeriesMetric[] + chart_type: "LINE" + graph_index: number; +} + +export interface AucRocChart { + name: string; + + values: [number[], number[], number][]; + + chart_type: "AUC/ROC" + graph_index: number; +} + +export interface ConfusionMatrixChart { + name: string; + + values: number[][]; + + chart_type: "CONFUSION_MATRIX" + graph_index: number; +} + +// more detailed information, used when viewing a run +export interface DetailedTrainResultsData { + basicInfo: TrainResultsData + + allMetrics: Chart[] +} + export interface FileUploadData { name: string; lastModified: string; diff --git a/frontend/src/pages/train/[train_space_id].tsx b/frontend/src/pages/train/[train_space_id].tsx index e228cb8b4..43375763a 100644 --- a/frontend/src/pages/train/[train_space_id].tsx +++ b/frontend/src/pages/train/[train_space_id].tsx @@ -2,6 +2,8 @@ import Footer from "@/common/components/Footer"; import NavbarMain from "@/common/components/NavBarMain"; import { useAppSelector } from "@/common/redux/hooks"; import { isSignedIn } from "@/common/redux/userLogin"; +import { useGetTrainResultsDataQuery } from "@/features/Train/redux/trainspaceApi"; +import { DetailedTrainResultsData } from "@/features/Train/types/trainTypes"; import Container from "@mui/material/Container"; import Grid from "@mui/material/Grid"; import Paper from "@mui/material/Paper"; @@ -11,88 +13,149 @@ import { Data, XAxisName, YAxisName } from "plotly.js"; import React, { useEffect } from "react"; const Plot = dynamic(() => import("react-plotly.js"), { ssr: false }); +const LINE_CHART_COLORS = ["red", "blue", "green"]; + +const mapTrainResultsDataToCharts = ( + detailedTrainResultsData: DetailedTrainResultsData +) => { + // sort by graph_index asc and ignore negative graph indices + const sortedData = detailedTrainResultsData.allMetrics + .filter((metric) => metric.graph_index >= 0) + .sort((a, b) => a.graph_index - b.graph_index); + const charts = []; + let i = 0; + while (i < sortedData.length) { + const metric = sortedData[i]; + if (metric.chart_type === "LINE") { + const data = []; + for (let i = 0; i < metric.time_series.length; i++) { + const time_series = metric.time_series[i]; + data.push({ + name: time_series.y_name, + x: time_series.x_values, + y: time_series.y_values, + type: "scatter", + mode: "markers", + marker: { color: LINE_CHART_COLORS[i], size: 10 }, + }); + } + charts.push( + + ); + } else if (metric.chart_type === "AUC/ROC") { + charts.push( + ({ + name: `(AUC: ${x[2]})`, + x: x[0] as number[], + y: x[1] as number[], + type: "scatter", + })) as Data[]), + ]} + layout={{ + height: 350, + width: 525, + xaxis: { title: "False Positive Rate" }, + yaxis: { title: "True Positive Rate" }, + title: "AUC/ROC Curves for your Deep Learning Model", + showlegend: true, + paper_bgcolor: "rgba(0,0,0,0)", + plot_bgcolor: "rgba(0,0,0,0)", + }} + config={{ responsive: true }} + /> + ); + } else if (metric.chart_type === "CONFUSION_MATRIX") { + charts.push( + + row.map((_, j) => ({ + xref: "x1" as XAxisName, + yref: "y1" as YAxisName, + x: j, + y: (i + metric.values.length - 1) % metric.values.length, + text: metric.values[ + (i + metric.values.length - 1) % metric.values.length + ][j].toString(), + font: { + color: + metric.values[ + (i + metric.values.length - 1) % metric.values.length + ][j] > 0 + ? "white" + : "black", + }, + showarrow: false, + })) + ) + .flat(), + paper_bgcolor: "rgba(0,0,0,0)", + plot_bgcolor: "rgba(0,0,0,0)", + }} + /> + ); + } else { + throw Error("Undefined chart type received"); + } + i += 1; + } + + return charts; +}; + const TrainSpace = () => { const { train_space_id } = useRouter().query; - const data = { - success: true, - message: "Dataset trained and results outputted successfully", - dl_results: [ - { - epoch: 1, - train_time: 0.029964923858642578, - train_loss: 1.1126993695894878, - test_loss: 1.1082043647766113, - train_acc: 0.3333333333333333, - "val/test acc": 0.3, - }, - { - epoch: 2, - train_time: 0.0221712589263916, - train_loss: 1.1002190907796223, - test_loss: 1.100191593170166, - train_acc: 0.3333333333333333, - "val/test acc": 0.3, - }, - { - epoch: 3, - train_time: 0.0680840015411377, - train_loss: 1.0896958708763123, - test_loss: 1.0933666229248047, - train_acc: 0.3333333333333333, - "val/test acc": 0.3, - }, - { - epoch: 4, - train_time: 0.007375478744506836, - train_loss: 1.0802951455116272, - test_loss: 1.0868618488311768, - train_acc: 0.3333333333333333, - "val/test acc": 0.3, - }, - { - epoch: 5, - train_time: 0.008754491806030273, - train_loss: 1.071365197499593, - test_loss: 1.080164909362793, - train_acc: 0.3333333333333333, - "val/test acc": 0.3, - }, - ], - auxiliary_outputs: { - confusion_matrix: [ - [0, 0, 6], - [0, 0, 8], - [0, 0, 6], - ], - AUC_ROC_curve_data: [ - [ - [0.0, 0.0, 0.0, 0.07142857142857142, 0.07142857142857142, 1.0], - [ - 0.0, 0.16666666666666666, 0.8333333333333334, 0.8333333333333334, - 1.0, 1.0, - ], - 0.9880952380952381, - ], - [ - [ - 0.0, 0.08333333333333333, 0.5, 0.5, 0.5833333333333334, - 0.5833333333333334, 0.6666666666666666, 0.6666666666666666, 1.0, - ], - [0.0, 0.0, 0.0, 0.75, 0.75, 0.875, 0.875, 1.0, 1.0], - 0.46875, - ], - [ - [0.0, 0.0, 0.0, 0.07142857142857142, 0.07142857142857142, 1.0], - [ - 0.0, 0.16666666666666666, 0.8333333333333334, 0.8333333333333334, - 1.0, 1.0, - ], - 0.9880952380952381, - ], - ], - }, - status: 200, - }; + const { data, isLoading } = useGetTrainResultsDataQuery({ + trainspaceId: train_space_id, + }); + const user = useAppSelector((state) => state.currentUser.user); const router = useRouter(); useEffect(() => { @@ -100,183 +163,22 @@ const TrainSpace = () => { router.replace({ pathname: "/login" }); } }, [user, router.isReady]); - if (!isSignedIn(user)) { + if (!isSignedIn(user) || !data || isLoading) { return <>; } + + const charts = mapTrainResultsDataToCharts(data); return (

{train_space_id}

- - - x.epoch), - y: data.dl_results.map((x) => x["train_acc"]), - type: "scatter", - mode: "markers", - marker: { color: "red", size: 10 }, - }, - { - name: "Test accuracy", - x: data.dl_results.map((x) => x.epoch), - y: data.dl_results.map((x) => x["val/test acc"]), - type: "scatter", - mode: "markers", - marker: { color: "blue", size: 10 }, - }, - ]} - layout={{ - height: 350, - width: 525, - xaxis: { title: "Epoch Number" }, - yaxis: { title: "Accuracy" }, - title: "Train vs. Test Accuracy for your Deep Learning Model", - showlegend: true, - paper_bgcolor: "rgba(0,0,0,0)", - plot_bgcolor: "rgba(0,0,0,0)", - }} - config={{ responsive: true }} - /> - - - - - x.epoch), - y: data.dl_results.map((x) => x.train_loss), - type: "scatter", - mode: "markers", - marker: { color: "red", size: 10 }, - }, - { - name: "Test loss", - x: data.dl_results.map((x) => x.epoch), - y: data.dl_results.map((x) => x.test_loss), - type: "scatter", - mode: "markers", - marker: { color: "blue", size: 10 }, - }, - ]} - layout={{ - height: 350, - width: 525, - xaxis: { title: "Epoch Number" }, - yaxis: { title: "Loss" }, - title: "Train vs. Test Loss for your Deep Learning Model", - showlegend: true, - paper_bgcolor: "rgba(0,0,0,0)", - plot_bgcolor: "rgba(0,0,0,0)", - }} - config={{ responsive: true }} - /> - - - - - ({ - name: `(AUC: ${x[2]})`, - x: x[0] as number[], - y: x[1] as number[], - type: "scatter", - })) as Data[]), - ]} - layout={{ - height: 350, - width: 525, - xaxis: { title: "False Positive Rate" }, - yaxis: { title: "True Positive Rate" }, - title: "AUC/ROC Curves for your Deep Learning Model", - showlegend: true, - paper_bgcolor: "rgba(0,0,0,0)", - plot_bgcolor: "rgba(0,0,0,0)", - }} - config={{ responsive: true }} - /> - - - - - - row.map((_, j) => ({ - xref: "x1" as XAxisName, - yref: "y1" as YAxisName, - x: j, - y: - (i + - data.auxiliary_outputs.confusion_matrix.length - - 1) % - data.auxiliary_outputs.confusion_matrix.length, - text: data.auxiliary_outputs.confusion_matrix[ - (i + - data.auxiliary_outputs.confusion_matrix.length - - 1) % - data.auxiliary_outputs.confusion_matrix.length - ][j].toString(), - font: { - color: - data.auxiliary_outputs.confusion_matrix[ - (i + - data.auxiliary_outputs.confusion_matrix.length - - 1) % - data.auxiliary_outputs.confusion_matrix.length - ][j] > 0 - ? "white" - : "black", - }, - showarrow: false, - })) - ) - .flat(), - paper_bgcolor: "rgba(0,0,0,0)", - plot_bgcolor: "rgba(0,0,0,0)", - }} - /> - - + {charts.map((chart) => ( + + {chart} + + ))}