Merge pull request #65 from Racix/readme-and-scripts

Readme and scripts
Racix · Dec 14, 2023 · fb9ca7c · fb9ca7c
2 parents e15ce0d + 4818624
commit fb9ca7c
Show file tree

Hide file tree

Showing 5 changed files with 162 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -1,3 +1,54 @@
 # DoRiS - Diarization of Recordings in Speech-to-text
 
 Today, there is a pressing need for speech transcription and translation to increase the accessibility of information for everyone in society. This need comes in various forms such as live meetings, recorded videos, or phone calls. Therefore, we are developing a service that, with the help of AI, can automate these processes to efficiently use time and resources.
+
+We are a group of 8 students from Luleå University of Technology in Sweden that has developed this work during a project course on the Master Programme in Computer Science and Engineering, with specialisation Information and Communication Technology. Our work combines some more or less well-known AI tools for Transcribtion, Diarization, Translation and text summarization.
+
+## Models Used
+
+- [Whisper](https://github.com/openai/whisper): Handles transcription.
+- [NeMo](https://github.com/NVIDIA/NeMo): Handles diarization.
+- [Mistral7b OpenHermes 2.5](https://huggingface.co/TheBloke/OpenHermes-2.5-Mistral-7B-GGUF) with [LLama-index](https://github.com/run-llama/llama_index): Utilized for summarization, requires GPU
+- [Argos-translate](https://github.com/argosopentech/argos-translate): Provides translation functionality.
+
+## Installation
+
+### Prerequisites
+
+- **Docker Engine**: Install Docker Desktop or another Docker version compatible with your system.
+
+### Installation Steps
+
+#### Backend and Frontend Setup
+
+
+- Run after installing Docker Engine. This script builds both the backend and frontend without using GPU.
+    ```
+    ./start.sh
+    ```
+    to run with GPU:
+    ```
+    ./start.sh gpu
+    ```
+- Alternatively, navigate to the respective backend and frontend folders and build and deploy separetely
+    ```
+    ./backend.sh build 
+    ./backend.sh run
+    ``` 
+    with GPU:
+    ```
+    ./backend.sh gpu build 
+    ./backend.sh gpu run
+    ``` 
+
+
+    ```
+    ./frontend.sh build
+    ./frontend.sh run
+    ``` 
+
+- Running with Kubernetes and Helm
+ Use the build scripts from above to create the images, helm to deploy.
+
+
+ - Running live-transcription: 
diff --git a/backend/backend.sh b/backend/backend.sh
@@ -2,6 +2,11 @@
 
 COMPOSE_FILE="compose.yaml"
 
+if [[ "$1" == "gpu" ]]; then
+    COMPOSE_FILE="compose-gpu.yaml"
+    shift
+fi
+
 build_services() {
     if [ $# -eq 0 ]; then
         echo "Building all docker services"
@@ -11,13 +16,13 @@ build_services() {
 
     for service in "$@"; do
         echo "Building Docker service: $service"
-        docker-compose -f $COMPOSE_FILE build $service
+        docker-compose -f $COMPOSE_FILE build "$service"
     done
 }
 
 run_services() {
     echo "Running Docker services..."
-    docker-compose -f $COMPOSE_FILE --compatibility up -d 
+    docker-compose -f $COMPOSE_FILE --compatibility up -d
 }
 
 case "$1" in
@@ -29,7 +34,7 @@ case "$1" in
         run_services
         ;;
     *)
-        echo "Usage: $0 {build|run}"
+        echo "Usage: $0 {gpu} build|run"
         exit 1
         ;;
 esac

diff --git a/frontend/src/App.js b/frontend/src/App.js
@@ -22,15 +22,48 @@ function App() {
   return (
     <Router>
       <div className="App">
-        <Navbar /> {/* Navbar added here will be on top */}
+        <Navbar /> 
         <Routes>
           <Route path="/" element={
             <>
-              <h1>Welcome to DoRiS</h1>
-              <div>DoRiS is a tool for transcribing and Diarizing your recordings</div>
-              <img src={logo} alt="Logo" className="home-page-logo" />
-            </>
-          } />
+            <h1>Welcome to DoRiS</h1>
+            <div className='dorisField'>A tool for transcribing and diarizing your recordings</div>
+            <div className='app-body'>
+            <div className='textField'>
+              Today, there is a pressing need for speech transcription and translation to increase the accessibility of information for everyone in society.
+              This need comes in various forms such as live meetings, recorded videos, or phone calls. Therefore, we are developing a service that, with the help of AI,
+              can automate these processes to efficiently use time and resources.
+            </div>
+            <div className='textField'>
+              We are a group of 8 students from Luleå University of Technology in Sweden that has developed this work during a project course on the Master Programme in Computer
+              Science and Engineering, with specialisation in Information and Communication Technology. Our work combines some more or less well-known AI tools for Transcription,
+              Diarization, Translation, and text summarization.
+            </div>
+            <div className='textField'> 
+              <strong>Models Used:</strong>
+              <ul>
+                <li><a href="https://github.com/openai/whisper">Whisper</a>: Handles transcription.</li>
+                <li><a href="https://github.com/NVIDIA/NeMo">NeMo</a>: Handles diarization.</li>
+                <li><a href="https://huggingface.co/TheBloke/OpenHermes-2.5-Mistral-7B-GGUF">Mistral7b OpenHermes 2.5</a> with <a href="https://github.com/run-llama/llama_index">LLama-index</a>: Utilized for summarization, requires GPU.</li>
+                <li><a href="https://github.com/argosopentech/argos-translate">Argos-translate</a>: Provides translation functionality.</li>
+              </ul>
+            </div>
+            <div className='textField'>
+              <strong>Usage:</strong>
+              <br></br>
+              Navigate to Upload & processes to get your files analysed and view the content.  
+
+              <br></br>
+              <br></br>
+              To start live transcription, one in the call must run the recorder on their own mahcine. To do this, navigate to the sound_driver directory and run the sound_driver.exe file. When this is done connect to the channel in the Live transcription page here on the website and connect to the channel.
+              <br></br>
+              <br></br>
+              <a href='https://github.com/Racix/Project-AI-Translation/tree/main'> Read more in the github repository</a>
+            </div>
+            </div>
+          </>
+        } />
+
           <Route path="/display/:id" element={<TranscriptionDisplay />} />
           <Route path="/upload" element={<Upload />}/>
           <Route path="/live" element={<LiveTranscription />}/>

diff --git a/frontend/src/styles/App.css b/frontend/src/styles/App.css
@@ -124,3 +124,53 @@ p {
 .file-list li:hover {
   background-color: #f5f5f5;
 }
+
+
+.textField {
+  margin-bottom: 15px;
+}
+
+ul {
+  list-style: none;
+  padding: 0;
+}
+
+a {
+  color: #0066cc;
+  text-decoration: none;
+}
+
+a:hover {
+  text-decoration: underline;
+}
+
+pre {
+  background-color: #f4f4f4;
+  padding: 10px;
+  border-radius: 5px;
+  overflow-x: auto;
+}
+
+code {
+  font-family: 'Courier New', Courier, monospace;
+}
+
+strong {
+  font-size: 25px;
+  font-weight: bold;
+}
+
+.head-line{
+  font-size: 17px;
+  font-weight: bold;
+}
+
+.app-body{
+  font-size: 20px;
+}
+
+.dorisField{
+  margin-left: 29%;
+  font-size: 22px;
+  margin-bottom: 15px;
+}
diff --git a/start.sh b/start.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+cd ./backend
+if [[ "$1" == "gpu" ]]; then
+    ./backend.sh gpu build
+    ./backend.sh gpu run
+else
+    ./backend.sh build
+    ./backend.sh run
+fi
+
+cd ../frontend
+./frontend.sh build
+./frontend.sh run